From e7fe561c24f7213f752506f47a7a5be5c2b15d5b Mon Sep 17 00:00:00 2001 From: Serguei Sokol Date: Fri, 24 Mar 2023 18:53:38 +0100 Subject: [PATCH] v6.5.0-3 updated sundials to 6.5.0 Signed-off-by: Serguei Sokol --- .Rbuildignore | 10 +- DESCRIPTION | 10 +- NEWS | 12 +- R/RcppExports.R | 21 +- inst/CITATION | 4 +- inst/COPYRIGHTS | 4 +- inst/include/cvodes/cvodes.h | 122 +- inst/include/cvodes/cvodes_bandpre.h | 2 +- inst/include/cvodes/cvodes_bbdpre.h | 2 +- inst/include/cvodes/cvodes_diag.h | 2 +- inst/include/cvodes/cvodes_direct.h | 12 +- inst/include/cvodes/cvodes_ls.h | 28 +- inst/include/cvodes/cvodes_proj.h | 58 + inst/include/cvodes/cvodes_spils.h | 22 +- inst/include/nvector/nvector_cuda.h | 207 + inst/include/nvector/nvector_hip.h | 207 + inst/include/nvector/nvector_kokkos.hpp | 656 ++ inst/include/nvector/nvector_manyvector.h | 179 + inst/include/nvector/nvector_mpimanyvector.h | 200 + inst/include/nvector/nvector_mpiplusx.h | 57 + inst/include/nvector/nvector_openmp.h | 217 + inst/include/nvector/nvector_openmpdev.h | 218 + inst/include/nvector/nvector_parallel.h | 248 + inst/include/nvector/nvector_parhyp.h | 238 + inst/include/nvector/nvector_petsc.h | 217 + inst/include/nvector/nvector_pthreads.h | 268 + inst/include/nvector/nvector_raja.h | 197 + inst/include/nvector/nvector_serial.h | 43 +- inst/include/nvector/nvector_sycl.h | 225 + inst/include/nvector/nvector_trilinos.h | 141 + .../SundialsTpetraVectorInterface.hpp | 70 + .../trilinos/SundialsTpetraVectorKernels.hpp | 678 ++ inst/include/sundials/sundials_band.h | 191 +- inst/include/sundials/sundials_base.hpp | 159 + inst/include/sundials/sundials_config.h | 251 +- inst/include/sundials/sundials_config.in | 226 +- inst/include/sundials/sundials_context.h | 47 + inst/include/sundials/sundials_context.hpp | 71 + inst/include/sundials/sundials_context_impl.h | 39 + .../sundials/sundials_convertibleto.hpp | 39 + inst/include/sundials/sundials_cuda.h | 116 + .../sundials/sundials_cuda_policies.hpp | 234 + inst/include/sundials/sundials_debug.h | 51 + inst/include/sundials/sundials_dense.h | 318 +- inst/include/sundials/sundials_direct.h | 307 +- inst/include/sundials/sundials_export.h | 42 + inst/include/sundials/sundials_futils.h | 38 + inst/include/sundials/sundials_hashmap.h | 440 ++ inst/include/sundials/sundials_hip.h | 73 + .../sundials/sundials_hip_policies.hpp | 238 + inst/include/sundials/sundials_iterative.h | 353 +- .../sundials/sundials_iterative_impl.h | 35 + inst/include/sundials/sundials_lapack.h | 181 +- inst/include/sundials/sundials_lapack_defs.h | 103 + inst/include/sundials/sundials_linearsolver.h | 148 +- .../sundials/sundials_linearsolver.hpp | 41 + inst/include/sundials/sundials_logger.h | 59 + inst/include/sundials/sundials_logger_impl.h | 61 + inst/include/sundials/sundials_math.h | 144 +- inst/include/sundials/sundials_matrix.h | 78 +- inst/include/sundials/sundials_matrix.hpp | 41 + inst/include/sundials/sundials_memory.h | 162 + inst/include/sundials/sundials_mpi_types.h | 6 +- .../sundials/sundials_nonlinearsolver.h | 111 +- .../sundials/sundials_nonlinearsolver.hpp | 41 + inst/include/sundials/sundials_nvector.h | 210 +- inst/include/sundials/sundials_nvector.hpp | 41 + .../sundials/sundials_nvector_senswrapper.h | 4 +- inst/include/sundials/sundials_profiler.h | 118 + inst/include/sundials/sundials_sycl.h | 73 + .../sundials/sundials_sycl_policies.hpp | 163 + inst/include/sundials/sundials_types.h | 90 +- inst/include/sundials/sundials_utils.h | 88 + inst/include/sundials/sundials_version.h | 2 +- inst/include/sundials/sundials_xbraid.h | 134 + inst/include/sunlinsol/sunlinsol_band.h | 9 +- .../sunlinsol/sunlinsol_cusolversp_batchqr.h | 30 +- inst/include/sunlinsol/sunlinsol_dense.h | 9 +- inst/include/sunlinsol/sunlinsol_ginkgo.hpp | 348 + inst/include/sunlinsol/sunlinsol_klu.h | 13 +- .../sunlinsol/sunlinsol_kokkosdense.hpp | 236 + inst/include/sunlinsol/sunlinsol_lapackband.h | 27 +- .../include/sunlinsol/sunlinsol_lapackdense.h | 27 +- inst/include/sunlinsol/sunlinsol_magmadense.h | 79 + .../include/sunlinsol/sunlinsol_onemkldense.h | 85 + inst/include/sunlinsol/sunlinsol_pcg.h | 35 +- inst/include/sunlinsol/sunlinsol_spbcgs.h | 40 +- inst/include/sunlinsol/sunlinsol_spfgmr.h | 41 +- inst/include/sunlinsol/sunlinsol_spgmr.h | 43 +- inst/include/sunlinsol/sunlinsol_sptfqmr.h | 36 +- .../include/sunlinsol/sunlinsol_superludist.h | 36 +- inst/include/sunlinsol/sunlinsol_superlumt.h | 13 +- inst/include/sunlinsol_rmumps.h | 3 +- inst/include/sunmatrix/sunmatrix_band.h | 22 +- inst/include/sunmatrix/sunmatrix_cusparse.h | 135 + inst/include/sunmatrix/sunmatrix_dense.h | 14 +- inst/include/sunmatrix/sunmatrix_ginkgo.hpp | 361 + .../sunmatrix/sunmatrix_kokkosdense.hpp | 403 ++ inst/include/sunmatrix/sunmatrix_magmadense.h | 122 + .../include/sunmatrix/sunmatrix_onemkldense.h | 175 + inst/include/sunmatrix/sunmatrix_slunrloc.h | 4 +- inst/include/sunmatrix/sunmatrix_sparse.h | 20 +- .../sunnonlinsol/sunnonlinsol_fixedpoint.h | 75 +- .../sunnonlinsol/sunnonlinsol_newton.h | 24 +- .../sunnonlinsol/sunnonlinsol_petscsnes.h | 6 +- inst/unitTests/runit.r2cvodes.R | 23 +- man/r2cvodes.Rd | 21 +- src/Makevars | 7 +- src/RcppExports.cpp | 5 + src/lib/cvodes/LICENSE | 2 +- src/lib/cvodes/README.md | 24 +- src/lib/cvodes/cvodea.c | 120 +- src/lib/cvodes/cvodea_io.c | 2 +- src/lib/cvodes/cvodes.c | 6416 +++++++++-------- src/lib/cvodes/cvodes_bandpre.c | 8 +- src/lib/cvodes/cvodes_bandpre_impl.h | 2 +- src/lib/cvodes/cvodes_bbdpre.c | 12 +- src/lib/cvodes/cvodes_bbdpre_impl.h | 2 +- src/lib/cvodes/cvodes_diag.c | 2 +- src/lib/cvodes/cvodes_diag_impl.h | 2 +- src/lib/cvodes/cvodes_direct.c | 2 +- src/lib/cvodes/cvodes_impl.h | 423 +- src/lib/cvodes/cvodes_io.c | 919 ++- src/lib/cvodes/cvodes_ls.c | 464 +- src/lib/cvodes/cvodes_ls_impl.h | 27 +- src/lib/cvodes/cvodes_nls.c | 86 +- src/lib/cvodes/cvodes_nls_sim.c | 58 +- src/lib/cvodes/cvodes_nls_stg.c | 8 +- src/lib/cvodes/cvodes_nls_stg1.c | 2 +- src/lib/cvodes/cvodes_proj.c | 477 ++ src/lib/cvodes/cvodes_proj_impl.h | 75 + src/lib/cvodes/cvodes_spils.c | 2 +- .../nvector/hip/VectorArrayKernels.hip.hpp | 226 + src/lib/nvector/hip/VectorKernels.hip.hpp | 368 + src/lib/nvector/hip/nvector_hip.hip.cpp | 2559 +++++++ src/lib/nvector/serial/fnvector_serial.c | 154 - src/lib/nvector/serial/fnvector_serial.h | 92 - src/lib/nvector/serial/nvector_serial.c | 153 +- src/lib/readme.txt | 27 +- src/lib/sundials/sundials_band.c | 148 +- src/lib/sundials/sundials_context.c | 205 + src/lib/sundials/sundials_context_impl.h | 39 + src/lib/sundials/sundials_cuda.h | 116 + src/lib/sundials/sundials_cuda_kernels.cuh | 493 ++ src/lib/sundials/sundials_debug.h | 51 + src/lib/sundials/sundials_dense.c | 169 +- src/lib/sundials/sundials_direct.c | 150 +- src/lib/sundials/sundials_futils.c | 29 + src/lib/sundials/sundials_hashmap.h | 440 ++ src/lib/sundials/sundials_hip.h | 73 + src/lib/sundials/sundials_hip_kernels.hip.hpp | 506 ++ src/lib/sundials/sundials_iterative.c | 405 +- src/lib/sundials/sundials_iterative_impl.h | 35 + src/lib/sundials/sundials_lapack_defs.h | 103 + src/lib/sundials/sundials_linearsolver.c | 97 +- src/lib/sundials/sundials_logger.c | 590 ++ src/lib/sundials/sundials_logger_impl.h | 61 + src/lib/sundials/sundials_math.c | 148 +- src/lib/sundials/sundials_matrix.c | 83 +- src/lib/sundials/sundials_memory.c | 266 + src/lib/sundials/sundials_nonlinearsolver.c | 40 +- src/lib/sundials/sundials_nvector.c | 564 +- .../sundials/sundials_nvector_senswrapper.c | 88 +- src/lib/sundials/sundials_profiler.c | 517 ++ src/lib/sundials/sundials_reductions.hpp | 84 + src/lib/sundials/sundials_sycl.h | 73 + src/lib/sundials/sundials_utils.h | 88 + src/lib/sundials/sundials_version.c | 34 +- src/lib/sunlinsol/band/sunlinsol_band.c | 241 + .../sunlinsol_cusolversp_batchqr.cu | 356 + src/lib/sunlinsol/dense/fsunlinsol_dense.c | 96 - src/lib/sunlinsol/dense/fsunlinsol_dense.h | 62 - src/lib/sunlinsol/dense/sunlinsol_dense.c | 22 +- .../lapackband/sunlinsol_lapackband.c | 249 + .../lapackdense/sunlinsol_lapackdense.c | 240 + .../magmadense/sunlinsol_magmadense.cpp | 424 ++ .../onemkldense/sunlinsol_onemkldense.cpp | 617 ++ src/lib/sunlinsol/pcg/sunlinsol_pcg.c | 609 ++ src/lib/sunlinsol/spbcgs/sunlinsol_spbcgs.c | 811 +++ src/lib/sunlinsol/spfgmr/sunlinsol_spfgmr.c | 851 +++ src/lib/sunlinsol/spgmr/sunlinsol_spgmr.c | 899 +++ src/lib/sunlinsol/sptfqmr/sunlinsol_sptfqmr.c | 938 +++ src/lib/sunmatrix/band/fsunmatrix_band.c | 90 - src/lib/sunmatrix/band/fsunmatrix_band.h | 62 - src/lib/sunmatrix/band/sunmatrix_band.c | 58 +- .../sunmatrix/cusparse/cusparse_kernels.cuh | 172 + .../sunmatrix/cusparse/sunmatrix_cusparse.cu | 1265 ++++ src/lib/sunmatrix/dense/fsunmatrix_dense.c | 83 - src/lib/sunmatrix/dense/fsunmatrix_dense.h | 62 - src/lib/sunmatrix/dense/sunmatrix_dense.c | 195 +- .../magmadense/dense_cuda_kernels.cuh | 113 + .../magmadense/dense_hip_kernels.hip.hpp | 112 + .../magmadense/sunmatrix_magmadense.cpp | 653 ++ .../onemkldense/sunmatrix_onemkldense.cpp | 771 ++ src/lib/sunmatrix/sparse/fsunmatrix_sparse.c | 94 - src/lib/sunmatrix/sparse/fsunmatrix_sparse.h | 65 - src/lib/sunmatrix/sparse/sunmatrix_sparse.c | 126 +- .../fixedpoint/sunnonlinsol_fixedpoint.c | 769 ++ .../newton/fsunnonlinsol_newton.c | 95 - .../newton/fsunnonlinsol_newton.h | 56 - .../sunnonlinsol/newton/sunnonlinsol_newton.c | 105 +- src/r2sundials.cpp | 43 +- src/sunlinsol_rmumps.cpp | 61 +- 203 files changed, 36007 insertions(+), 6336 deletions(-) create mode 100644 inst/include/cvodes/cvodes_proj.h create mode 100644 inst/include/nvector/nvector_cuda.h create mode 100644 inst/include/nvector/nvector_hip.h create mode 100644 inst/include/nvector/nvector_kokkos.hpp create mode 100644 inst/include/nvector/nvector_manyvector.h create mode 100644 inst/include/nvector/nvector_mpimanyvector.h create mode 100644 inst/include/nvector/nvector_mpiplusx.h create mode 100644 inst/include/nvector/nvector_openmp.h create mode 100644 inst/include/nvector/nvector_openmpdev.h create mode 100644 inst/include/nvector/nvector_parallel.h create mode 100644 inst/include/nvector/nvector_parhyp.h create mode 100644 inst/include/nvector/nvector_petsc.h create mode 100644 inst/include/nvector/nvector_pthreads.h create mode 100644 inst/include/nvector/nvector_raja.h create mode 100644 inst/include/nvector/nvector_sycl.h create mode 100644 inst/include/nvector/nvector_trilinos.h create mode 100644 inst/include/nvector/trilinos/SundialsTpetraVectorInterface.hpp create mode 100644 inst/include/nvector/trilinos/SundialsTpetraVectorKernels.hpp create mode 100644 inst/include/sundials/sundials_base.hpp create mode 100644 inst/include/sundials/sundials_context.h create mode 100644 inst/include/sundials/sundials_context.hpp create mode 100644 inst/include/sundials/sundials_context_impl.h create mode 100644 inst/include/sundials/sundials_convertibleto.hpp create mode 100644 inst/include/sundials/sundials_cuda.h create mode 100644 inst/include/sundials/sundials_cuda_policies.hpp create mode 100644 inst/include/sundials/sundials_debug.h create mode 100644 inst/include/sundials/sundials_export.h create mode 100644 inst/include/sundials/sundials_futils.h create mode 100644 inst/include/sundials/sundials_hashmap.h create mode 100644 inst/include/sundials/sundials_hip.h create mode 100644 inst/include/sundials/sundials_hip_policies.hpp create mode 100644 inst/include/sundials/sundials_iterative_impl.h create mode 100644 inst/include/sundials/sundials_lapack_defs.h create mode 100644 inst/include/sundials/sundials_linearsolver.hpp create mode 100644 inst/include/sundials/sundials_logger.h create mode 100644 inst/include/sundials/sundials_logger_impl.h create mode 100644 inst/include/sundials/sundials_matrix.hpp create mode 100644 inst/include/sundials/sundials_memory.h create mode 100644 inst/include/sundials/sundials_nonlinearsolver.hpp create mode 100644 inst/include/sundials/sundials_nvector.hpp create mode 100644 inst/include/sundials/sundials_profiler.h create mode 100644 inst/include/sundials/sundials_sycl.h create mode 100644 inst/include/sundials/sundials_sycl_policies.hpp create mode 100644 inst/include/sundials/sundials_utils.h create mode 100644 inst/include/sundials/sundials_xbraid.h create mode 100644 inst/include/sunlinsol/sunlinsol_ginkgo.hpp create mode 100644 inst/include/sunlinsol/sunlinsol_kokkosdense.hpp create mode 100644 inst/include/sunlinsol/sunlinsol_magmadense.h create mode 100644 inst/include/sunlinsol/sunlinsol_onemkldense.h create mode 100644 inst/include/sunmatrix/sunmatrix_cusparse.h create mode 100644 inst/include/sunmatrix/sunmatrix_ginkgo.hpp create mode 100644 inst/include/sunmatrix/sunmatrix_kokkosdense.hpp create mode 100644 inst/include/sunmatrix/sunmatrix_magmadense.h create mode 100644 inst/include/sunmatrix/sunmatrix_onemkldense.h create mode 100644 src/lib/cvodes/cvodes_proj.c create mode 100644 src/lib/cvodes/cvodes_proj_impl.h create mode 100644 src/lib/nvector/hip/VectorArrayKernels.hip.hpp create mode 100644 src/lib/nvector/hip/VectorKernels.hip.hpp create mode 100644 src/lib/nvector/hip/nvector_hip.hip.cpp delete mode 100644 src/lib/nvector/serial/fnvector_serial.c delete mode 100644 src/lib/nvector/serial/fnvector_serial.h create mode 100644 src/lib/sundials/sundials_context.c create mode 100644 src/lib/sundials/sundials_context_impl.h create mode 100644 src/lib/sundials/sundials_cuda.h create mode 100644 src/lib/sundials/sundials_cuda_kernels.cuh create mode 100644 src/lib/sundials/sundials_debug.h create mode 100644 src/lib/sundials/sundials_futils.c create mode 100644 src/lib/sundials/sundials_hashmap.h create mode 100644 src/lib/sundials/sundials_hip.h create mode 100644 src/lib/sundials/sundials_hip_kernels.hip.hpp create mode 100644 src/lib/sundials/sundials_iterative_impl.h create mode 100644 src/lib/sundials/sundials_lapack_defs.h create mode 100644 src/lib/sundials/sundials_logger.c create mode 100644 src/lib/sundials/sundials_logger_impl.h create mode 100644 src/lib/sundials/sundials_memory.c create mode 100644 src/lib/sundials/sundials_profiler.c create mode 100644 src/lib/sundials/sundials_reductions.hpp create mode 100644 src/lib/sundials/sundials_sycl.h create mode 100644 src/lib/sundials/sundials_utils.h create mode 100644 src/lib/sunlinsol/band/sunlinsol_band.c create mode 100644 src/lib/sunlinsol/cusolversp/sunlinsol_cusolversp_batchqr.cu delete mode 100644 src/lib/sunlinsol/dense/fsunlinsol_dense.c delete mode 100644 src/lib/sunlinsol/dense/fsunlinsol_dense.h create mode 100644 src/lib/sunlinsol/lapackband/sunlinsol_lapackband.c create mode 100644 src/lib/sunlinsol/lapackdense/sunlinsol_lapackdense.c create mode 100644 src/lib/sunlinsol/magmadense/sunlinsol_magmadense.cpp create mode 100644 src/lib/sunlinsol/onemkldense/sunlinsol_onemkldense.cpp create mode 100644 src/lib/sunlinsol/pcg/sunlinsol_pcg.c create mode 100644 src/lib/sunlinsol/spbcgs/sunlinsol_spbcgs.c create mode 100644 src/lib/sunlinsol/spfgmr/sunlinsol_spfgmr.c create mode 100644 src/lib/sunlinsol/spgmr/sunlinsol_spgmr.c create mode 100644 src/lib/sunlinsol/sptfqmr/sunlinsol_sptfqmr.c delete mode 100644 src/lib/sunmatrix/band/fsunmatrix_band.c delete mode 100644 src/lib/sunmatrix/band/fsunmatrix_band.h create mode 100644 src/lib/sunmatrix/cusparse/cusparse_kernels.cuh create mode 100644 src/lib/sunmatrix/cusparse/sunmatrix_cusparse.cu delete mode 100644 src/lib/sunmatrix/dense/fsunmatrix_dense.c delete mode 100644 src/lib/sunmatrix/dense/fsunmatrix_dense.h create mode 100644 src/lib/sunmatrix/magmadense/dense_cuda_kernels.cuh create mode 100644 src/lib/sunmatrix/magmadense/dense_hip_kernels.hip.hpp create mode 100644 src/lib/sunmatrix/magmadense/sunmatrix_magmadense.cpp create mode 100644 src/lib/sunmatrix/onemkldense/sunmatrix_onemkldense.cpp delete mode 100644 src/lib/sunmatrix/sparse/fsunmatrix_sparse.c delete mode 100644 src/lib/sunmatrix/sparse/fsunmatrix_sparse.h create mode 100644 src/lib/sunnonlinsol/fixedpoint/sunnonlinsol_fixedpoint.c delete mode 100644 src/lib/sunnonlinsol/newton/fsunnonlinsol_newton.c delete mode 100644 src/lib/sunnonlinsol/newton/fsunnonlinsol_newton.h diff --git a/.Rbuildignore b/.Rbuildignore index b9c4dae..ab7db4a 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -1,4 +1,12 @@ +todo.txt +gdb.txt +NAMESPACE.empty +NAMESPACE.save +^.*\.save$ +^\.git.*$ +^autom.*$ Readme.Rmd Readme.md -gdb.txt ^rm$ +^.*\.toremove$ +lib-5.0.0 diff --git a/DESCRIPTION b/DESCRIPTION index bb355df..2ff58e6 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,8 +1,8 @@ Package: r2sundials Type: Package Title: Wrapper for 'SUNDIALS' Solving ODE and Sensitivity Problem -Version: 5.0.0-10 -Date: 2021-05-17 +Version: 6.5.0-3 +Date: 2023-03-21 Authors@R: c( person("Serguei", "Sokol", role=c("cre", "aut"), email="sokol@insa-toulouse.fr"), person("Carol S.", "Woodward", role="ctb"), @@ -33,7 +33,9 @@ Authors@R: c( person("Chris", "White", role="ctb"), person("Lawrence Livermore National Security", role="cph"), person("Southern Methodist University", role="cph"), - person("INRAE", role="cph") + person("INSA", role="cph"), + person("INRAE", role="cph"), + person("CNRS", role="cph") ) Maintainer: Serguei Sokol Description: Wrapper for widely used 'SUNDIALS' software (SUite of Nonlinear and DIfferential/ALgebraic Equation Solvers) and more precisely to its 'CVODES' solver. It is aiming to solve ordinary differential equations (ODE) and optionally pending forward sensitivity problem. The wrapper is made 'R' friendly by allowing to pass custom parameters to user's callback functions. Such functions can be both written in 'R' and in 'C++' ('RcppArmadillo' flavor). In case of 'C++', performance is greatly improved so this option is highly advisable when performance matters. If provided, Jacobian matrix can be calculated either in dense or sparse format. In the latter case 'rmumps' package is used to solve corresponding linear systems. Root finding and pending event management are optional and can be specified as 'R' or 'C++' functions too. This makes them a very flexible tool for controlling the ODE system during the time course simulation. 'SUNDIALS' library was published in Hindmarsh et al. (2005) . @@ -43,7 +45,7 @@ License: GPL (>=2) Imports: Rcpp (>= 1.0.0) LinkingTo: Rcpp, RcppArmadillo, rmumps (>= 5.2.1-6) Suggests: RcppXPtrUtils, slam, RUnit, deSolve, RcppArmadillo -RoxygenNote: 7.1.1 +RoxygenNote: 7.2.2 Encoding: UTF-8 NeedsCompilation: yes Biarch: FALSE diff --git a/NEWS b/NEWS index 9d8d518..7858e84 100644 --- a/NEWS +++ b/NEWS @@ -1,8 +1,16 @@ -Version 5.0.0-10 +Version 6.5.0-3 =============== +* 2023-03-21 + - passed to v6.5.0 of SUNDIALS library + - fixed warnings from "-Wstrict-prototypes" (reported by R-core) + - updated CIATION format + +Version 5.0.0-10 +================ + * 2021-05-17 - - fixed errors form "-fno-common" compile option relative to global variables + - fixed errors from "-fno-common" compile option relative to global variables (reported by R-core) Version 5.0.0-9 =============== diff --git a/R/RcppExports.R b/R/RcppExports.R index b2f72e5..93443e7 100644 --- a/R/RcppExports.R +++ b/R/RcppExports.R @@ -123,7 +123,8 @@ #' return(CV_SUCCESS); #' } #' ', depends=c("RcppArmadillo","r2sundials","rmumps"), -#' includes="using namespace arma;\n#include ", cacheDir="lib", verbose=FALSE) +#' includes=c("// [[Rcpp::plugins(cpp14)]]", "using namespace arma;", "#include "), +#' cacheDir="lib", verbose=FALSE) #' # For ease of use in C++, we convert param to a numeric vector instead of a list. #' pv=c(a=p$a) #' # new call to r2cvodes() with XPtr pointer ptr_exp. @@ -155,7 +156,8 @@ #' return(CV_SUCCESS); #' } #' ', depends=c("RcppArmadillo","r2sundials","rmumps"), -#' includes="using namespace arma;\n#include ", cacheDir="lib", verbose=FALSE) +#' includes=c("// [[Rcpp::plugins(cpp14)]]", "using namespace arma;", "#include "), +#' cacheDir="lib", verbose=FALSE) #' #' # root function #' ptr_ball_root=cppXPtr(code=' @@ -166,7 +168,8 @@ #' return(0); #' } #' ', depends=c("RcppArmadillo","r2sundials","rmumps"), -#' includes="using namespace arma;\n#include ", cacheDir="lib", verbose=FALSE) +#' includes=c("// [[Rcpp::plugins(cpp14)]]", "using namespace arma;", "#include "), +#' cacheDir="lib", verbose=FALSE) #' #' # event handler function #' ptr_ball_event=cppXPtr(code=' @@ -192,7 +195,8 @@ #' } #' } #' ', depends=c("RcppArmadillo","r2sundials","rmumps"), -#' includes="using namespace arma;\n#include ", cacheDir="lib", verbose=FALSE) +#' includes=c("// [[Rcpp::plugins(cpp14)]]", "using namespace arma;", "#include "), +#' cacheDir="lib", verbose=FALSE) #' #' # ODE solving and plotting #' res_ball <- r2sundials::r2cvodes(yv, ti, ptr_ball, param=pv, nroot=2L, @@ -233,7 +237,8 @@ #' return(CV_SUCCESS); #' } #' ', depends=c("RcppArmadillo","r2sundials","rmumps"), -#' includes="using namespace arma;\n#include ", cacheDir="lib", verbose=FALSE) +#' includes=c("// [[Rcpp::plugins(cpp14)]]", "using namespace arma;", "#include "), +#' cacheDir="lib", verbose=FALSE) #' # pointer to sparse jacobian function #' ptr_rob_jacsp=cppXPtr(code=' #' int spjac_rob(double t, const vec &y, const vec &ydot, uvec &ir, uvec &pj, vec &v, int n, int nz, @@ -268,7 +273,8 @@ #' return(0); #' } #' ', depends=c("RcppArmadillo","r2sundials","rmumps"), -#' includes="using namespace arma;\n#include ", cacheDir="lib", verbose=FALSE) +#' includes=c("// [[Rcpp::plugins(cpp14)]]", "using namespace arma;", "#include "), +#' cacheDir="lib", verbose=FALSE) #' # pointer to sensitivity rhs function #' ptr_rob_sens1=cppXPtr(code=' #' int sens_rob1(int Ns, double t, const vec &y, const vec &ydot, int iS, const vec &yS, vec &ySdot, @@ -296,7 +302,8 @@ #' return(CV_SUCCESS); #' } #' ', depends=c("RcppArmadillo","r2sundials","rmumps"), -#' includes="using namespace arma;\n#include ", cacheDir="lib", verbose=FALSE) +#' includes=c("// [[Rcpp::plugins(cpp14)]]", "using namespace arma;", "#include "), +#' cacheDir="lib", verbose=FALSE) #' # Note that we don't use psens param for sensitivity calculations as we provide our own fsens1. #' res_rob <- r2sundials::r2cvodes(yv, ti, ptr_rob, param=pv, nz=8, fjac=ptr_rob_jacsp, Ns=3, #' fsens1=ptr_rob_sens1) diff --git a/inst/CITATION b/inst/CITATION index 29053c0..7f109de 100644 --- a/inst/CITATION +++ b/inst/CITATION @@ -1,9 +1,9 @@ citHeader("To cite SUNDIALS in publications use:") c( -citEntry(entry = "Article", +bibentry(bibtype = "Article", title = "SUNDIALS: Suite of nonlinear and differential/algebraic equation solvers", - author = personList(as.person("A. C. Hindmarsh"), + author = c(as.person("A. C. Hindmarsh"), as.person("P. N. Brown"), as.person("K. E. Grant"), as.person("S. L. Lee"), as.person("R. Serban"), as.person("D. E. Shumaker"), as.person("C. S. Woodward")), journal = "ACM Transactions on Mathematical Software (TOMS)", year = "2005", diff --git a/inst/COPYRIGHTS b/inst/COPYRIGHTS index 52f0e6d..d5aafa6 100644 --- a/inst/COPYRIGHTS +++ b/inst/COPYRIGHTS @@ -1,7 +1,7 @@ For SUNDIALS code: -Copyright (c) 2002-2019, Lawrence Livermore National Security and Southern Methodist University. +Copyright (c) 2002-2022, Lawrence Livermore National Security and Southern Methodist University. All rights reserved. For r2sundials wrapper: -Copyright (c) 2020, INRAE/INSA/CNRS +Copyright (c) 2023, INRAE/INSA/CNRS All rights reserved. diff --git a/inst/include/cvodes/cvodes.h b/inst/include/cvodes/cvodes.h index d0e4977..8e3e98b 100644 --- a/inst/include/cvodes/cvodes.h +++ b/inst/include/cvodes/cvodes.h @@ -2,7 +2,7 @@ * Programmer(s): Radu Serban @ LLNL * ----------------------------------------------------------------- * SUNDIALS Copyright Start - * Copyright (c) 2002-2019, Lawrence Livermore National Security + * Copyright (c) 2002-2022, Lawrence Livermore National Security * and Southern Methodist University. * All rights reserved. * @@ -18,9 +18,11 @@ #define _CVODES_H #include +#include #include #include #include +#include #ifdef __cplusplus /* wrapper to enable C++ usage */ extern "C" { @@ -107,6 +109,11 @@ extern "C" { #define CV_REPTD_QSRHSFUNC_ERR -53 #define CV_UNREC_QSRHSFUNC_ERR -54 +#define CV_CONTEXT_ERR -55 + +#define CV_PROJ_MEM_NULL -56 +#define CV_PROJFUNC_FAIL -57 +#define CV_REPTD_PROJFUNC_ERR -58 #define CV_UNRECOGNIZED_ERR -99 @@ -136,6 +143,8 @@ typedef void (*CVErrHandlerFn)(int error_code, const char *module, const char *function, char *msg, void *user_data); +typedef int (*CVMonitorFn)(void *cvode_mem, void *user_data); + typedef int (*CVQuadRhsFn)(realtype t, N_Vector y, N_Vector yQdot, void *user_data); @@ -176,7 +185,7 @@ typedef int (*CVQuadRhsFnBS)(realtype t, N_Vector y, N_Vector *yS, * --------------------------------------- */ /* Initialization functions */ -SUNDIALS_EXPORT void *CVodeCreate(int lmm); +SUNDIALS_EXPORT void *CVodeCreate(int lmm, SUNContext sunctx); SUNDIALS_EXPORT int CVodeInit(void *cvode_mem, CVRhsFn f, realtype t0, N_Vector y0); @@ -190,26 +199,53 @@ SUNDIALS_EXPORT int CVodeSVtolerances(void *cvode_mem, realtype reltol, SUNDIALS_EXPORT int CVodeWFtolerances(void *cvode_mem, CVEwtFn efun); /* Optional input functions */ -SUNDIALS_EXPORT int CVodeSetErrHandlerFn(void *cvode_mem, CVErrHandlerFn ehfun, - void *eh_data); + +SUNDIALS_EXPORT int CVodeSetConstraints(void *cvode_mem, N_Vector constraints); +SUNDIALS_EXPORT int CVodeSetDeltaGammaMaxLSetup(void *cvode_mem, + realtype dgmax_lsetup); SUNDIALS_EXPORT int CVodeSetErrFile(void *cvode_mem, FILE *errfp); -SUNDIALS_EXPORT int CVodeSetUserData(void *cvode_mem, void *user_data); -SUNDIALS_EXPORT int CVodeSetMaxOrd(void *cvode_mem, int maxord); -SUNDIALS_EXPORT int CVodeSetMaxNumSteps(void *cvode_mem, long int mxsteps); -SUNDIALS_EXPORT int CVodeSetMaxHnilWarns(void *cvode_mem, int mxhnil); -SUNDIALS_EXPORT int CVodeSetStabLimDet(void *cvode_mem, booleantype stldet); +SUNDIALS_EXPORT int CVodeSetErrHandlerFn(void *cvode_mem, CVErrHandlerFn ehfun, void *eh_data); SUNDIALS_EXPORT int CVodeSetInitStep(void *cvode_mem, realtype hin); -SUNDIALS_EXPORT int CVodeSetMinStep(void *cvode_mem, realtype hmin); -SUNDIALS_EXPORT int CVodeSetMaxStep(void *cvode_mem, realtype hmax); -SUNDIALS_EXPORT int CVodeSetStopTime(void *cvode_mem, realtype tstop); +SUNDIALS_EXPORT int CVodeSetLSetupFrequency(void *cvode_mem, long int msbp); +SUNDIALS_EXPORT int CVodeSetMaxConvFails(void *cvode_mem, int maxncf); SUNDIALS_EXPORT int CVodeSetMaxErrTestFails(void *cvode_mem, int maxnef); +SUNDIALS_EXPORT int CVodeSetMaxHnilWarns(void *cvode_mem, int mxhnil); SUNDIALS_EXPORT int CVodeSetMaxNonlinIters(void *cvode_mem, int maxcor); -SUNDIALS_EXPORT int CVodeSetMaxConvFails(void *cvode_mem, int maxncf); +SUNDIALS_EXPORT int CVodeSetMaxNumSteps(void *cvode_mem, long int mxsteps); +SUNDIALS_EXPORT int CVodeSetMaxOrd(void *cvode_mem, int maxord); +SUNDIALS_EXPORT int CVodeSetMaxStep(void *cvode_mem, realtype hmax); +SUNDIALS_EXPORT int CVodeSetMinStep(void *cvode_mem, realtype hmin); +SUNDIALS_EXPORT int CVodeSetMonitorFn(void *cvode_mem, CVMonitorFn fn); +SUNDIALS_EXPORT int CVodeSetMonitorFrequency(void *cvode_mem, long int nst); +SUNDIALS_EXPORT int CVodeSetNlsRhsFn(void *cvode_mem, CVRhsFn f); SUNDIALS_EXPORT int CVodeSetNonlinConvCoef(void *cvode_mem, realtype nlscoef); -SUNDIALS_EXPORT int CVodeSetConstraints(void *cvode_mem, N_Vector constraints); +SUNDIALS_EXPORT int CVodeSetNonlinearSolver(void *cvode_mem, SUNNonlinearSolver NLS); +SUNDIALS_EXPORT int CVodeSetStabLimDet(void *cvode_mem, booleantype stldet); +SUNDIALS_EXPORT int CVodeSetStopTime(void *cvode_mem, realtype tstop); +SUNDIALS_EXPORT int CVodeSetUserData(void *cvode_mem, void *user_data); -SUNDIALS_EXPORT int CVodeSetNonlinearSolver(void *cvode_mem, - SUNNonlinearSolver NLS); +/* Optional step adaptivity input functions */ +SUNDIALS_EXPORT +int CVodeSetEtaFixedStepBounds(void* cvode_mem, realtype eta_min_fx, + realtype eta_max_fx); +SUNDIALS_EXPORT +int CVodeSetEtaMaxFirstStep(void* cvode_mem, realtype eta_max_fs); +SUNDIALS_EXPORT +int CVodeSetEtaMaxEarlyStep(void* cvode_mem, realtype eta_max_es); +SUNDIALS_EXPORT +int CVodeSetNumStepsEtaMaxEarlyStep(void* cvode_mem, long int small_nst); +SUNDIALS_EXPORT +int CVodeSetEtaMax(void* cvode_mem, realtype eta_max_gs); +SUNDIALS_EXPORT +int CVodeSetEtaMin(void* cvode_mem, realtype eta_min); +SUNDIALS_EXPORT +int CVodeSetEtaMinErrFail(void *cvode_mem, realtype eta_min_ef); +SUNDIALS_EXPORT +int CVodeSetEtaMaxErrFail(void* cvode_mem, realtype eta_max_ef); +SUNDIALS_EXPORT +int CVodeSetNumFailsEtaMaxErrFail(void *cvode_mem, int small_nef); +SUNDIALS_EXPORT +int CVodeSetEtaConvFail(void* cvode_mem, realtype eta_cf); /* Rootfinding initialization function */ SUNDIALS_EXPORT int CVodeRootInit(void *cvode_mem, int nrtfn, CVRootFn g); @@ -222,6 +258,14 @@ SUNDIALS_EXPORT int CVodeSetNoInactiveRootWarn(void *cvode_mem); SUNDIALS_EXPORT int CVode(void *cvode_mem, realtype tout, N_Vector yout, realtype *tret, int itask); +/* Utility functions to update/compute y based on ycor */ +SUNDIALS_EXPORT int CVodeComputeState(void *cvode_mem, N_Vector ycor, + N_Vector y); +SUNDIALS_EXPORT int CVodeComputeStateSens(void *cvode_mem, N_Vector *yScor, + N_Vector *yS); +SUNDIALS_EXPORT int CVodeComputeStateSens1(void *cvode_mem, int idx, + N_Vector yScor1, N_Vector yS1); + /* Dense output function */ SUNDIALS_EXPORT int CVodeGetDky(void *cvode_mem, realtype t, int k, N_Vector dky); @@ -259,17 +303,39 @@ SUNDIALS_EXPORT int CVodeGetIntegratorStats(void *cvode_mem, long int *nsteps, int *qlast, int *qcur, realtype *hinused, realtype *hlast, realtype *hcur, realtype *tcur); +SUNDIALS_EXPORT int CVodeGetNonlinearSystemData(void *cvode_mem, realtype *tcur, + N_Vector *ypred, N_Vector *yn, + N_Vector *fn, realtype *gamma, + realtype *rl1, N_Vector *zn1, + void **user_data); +SUNDIALS_EXPORT int CVodeGetNonlinearSystemDataSens(void *cvode_mem, + realtype *tcur, + N_Vector **ySpred, + N_Vector **ySn, + realtype *gamma, + realtype *rl1, + N_Vector **zn1, + void **user_data); SUNDIALS_EXPORT int CVodeGetNumNonlinSolvIters(void *cvode_mem, long int *nniters); SUNDIALS_EXPORT int CVodeGetNumNonlinSolvConvFails(void *cvode_mem, - long int *nncfails); + long int *nnfails); SUNDIALS_EXPORT int CVodeGetNonlinSolvStats(void *cvode_mem, long int *nniters, - long int *nncfails); + long int *nnfails); +SUNDIALS_EXPORT int CVodeGetNumStepSolveFails(void *cvode_mem, + long int *nncfails); +SUNDIALS_EXPORT int CVodeGetUserData(void *cvode_mem, void **user_data); +SUNDIALS_EXPORT int CVodePrintAllStats(void *cvode_mem, FILE *outfile, + SUNOutputFormat fmt); SUNDIALS_EXPORT char *CVodeGetReturnFlagName(long int flag); /* Free function */ SUNDIALS_EXPORT void CVodeFree(void **cvode_mem); +/* CVLS interface function that depends on CVRhsFn */ +SUNDIALS_EXPORT int CVodeSetJacTimesRhsFn(void *cvode_mem, + CVRhsFn jtimesRhsFn); + /* --------------------------------- * Exported Functions -- Quadrature @@ -372,14 +438,21 @@ SUNDIALS_EXPORT int CVodeGetSensStats(void *cvode_mem, long int *nfSevals, SUNDIALS_EXPORT int CVodeGetSensNumNonlinSolvIters(void *cvode_mem, long int *nSniters); SUNDIALS_EXPORT int CVodeGetSensNumNonlinSolvConvFails(void *cvode_mem, - long int *nSncfails); + long int *nSnfails); +SUNDIALS_EXPORT int CVodeGetSensNonlinSolvStats(void *cvode_mem, + long int *nSniters, + long int *nSnfails); +SUNDIALS_EXPORT int CVodeGetNumStepSensSolveFails(void *cvode_mem, + long int *nSncfails); SUNDIALS_EXPORT int CVodeGetStgrSensNumNonlinSolvIters(void *cvode_mem, long int *nSTGR1niters); SUNDIALS_EXPORT int CVodeGetStgrSensNumNonlinSolvConvFails(void *cvode_mem, - long int *nSTGR1ncfails); -SUNDIALS_EXPORT int CVodeGetSensNonlinSolvStats(void *cvode_mem, - long int *nSniters, - long int *nSncfails); + long int *nSTGR1nfails); +SUNDIALS_EXPORT int CVodeGetStgrSensNonlinSolvStats(void *cvode_mem, + long int *nSTGR1niters, + long int *nSTGR1nfails); +SUNDIALS_EXPORT int CVodeGetNumStepStgrSensSolveFails(void *cvode_mem, + long int *nSTGR1ncfails); /* Free function */ SUNDIALS_EXPORT void CVodeSensFree(void *cvode_mem); @@ -537,6 +610,9 @@ typedef struct { SUNDIALS_EXPORT int CVodeGetAdjCheckPointsInfo(void *cvode_mem, CVadjCheckPointRec *ckpnt); +/* CVLS interface function that depends on CVRhsFn */ +int CVodeSetJacTimesRhsFnB(void *cvode_mem, int which, CVRhsFn jtimesRhsFn); + /* Undocumented Optional Output Functions For Backward Problems */ diff --git a/inst/include/cvodes/cvodes_bandpre.h b/inst/include/cvodes/cvodes_bandpre.h index aec8709..32c8519 100644 --- a/inst/include/cvodes/cvodes_bandpre.h +++ b/inst/include/cvodes/cvodes_bandpre.h @@ -3,7 +3,7 @@ * Radu Serban @ LLNL * ----------------------------------------------------------------- * SUNDIALS Copyright Start - * Copyright (c) 2002-2019, Lawrence Livermore National Security + * Copyright (c) 2002-2022, Lawrence Livermore National Security * and Southern Methodist University. * All rights reserved. * diff --git a/inst/include/cvodes/cvodes_bbdpre.h b/inst/include/cvodes/cvodes_bbdpre.h index 13dea8d..194a65b 100644 --- a/inst/include/cvodes/cvodes_bbdpre.h +++ b/inst/include/cvodes/cvodes_bbdpre.h @@ -3,7 +3,7 @@ * Radu Serban @ LLNL * ----------------------------------------------------------------- * SUNDIALS Copyright Start - * Copyright (c) 2002-2019, Lawrence Livermore National Security + * Copyright (c) 2002-2022, Lawrence Livermore National Security * and Southern Methodist University. * All rights reserved. * diff --git a/inst/include/cvodes/cvodes_diag.h b/inst/include/cvodes/cvodes_diag.h index b98e341..f7963d0 100644 --- a/inst/include/cvodes/cvodes_diag.h +++ b/inst/include/cvodes/cvodes_diag.h @@ -2,7 +2,7 @@ * Programmer(s): Radu Serban @ LLNL * --------------------------------------------------------------------- * SUNDIALS Copyright Start - * Copyright (c) 2002-2019, Lawrence Livermore National Security + * Copyright (c) 2002-2022, Lawrence Livermore National Security * and Southern Methodist University. * All rights reserved. * diff --git a/inst/include/cvodes/cvodes_direct.h b/inst/include/cvodes/cvodes_direct.h index cef57b0..378a049 100644 --- a/inst/include/cvodes/cvodes_direct.h +++ b/inst/include/cvodes/cvodes_direct.h @@ -2,7 +2,7 @@ * Programmer(s): Daniel R. Reynolds @ SMU * ----------------------------------------------------------------- * SUNDIALS Copyright Start - * Copyright (c) 2002-2019, Lawrence Livermore National Security + * Copyright (c) 2002-2022, Lawrence Livermore National Security * and Southern Methodist University. * All rights reserved. * @@ -38,27 +38,37 @@ typedef CVLsJacFnBS CVDlsJacFnBS; Exported Functions (wrappers for equivalent routines in cvodes_ls.h) ====================================================================*/ +SUNDIALS_DEPRECATED_EXPORT_MSG("use CVodeSetLinearSolver instead") int CVDlsSetLinearSolver(void *cvode_mem, SUNLinearSolver LS, SUNMatrix A); +SUNDIALS_DEPRECATED_EXPORT_MSG("use CVodeSetJacFn instead") int CVDlsSetJacFn(void *cvode_mem, CVDlsJacFn jac); +SUNDIALS_DEPRECATED_EXPORT_MSG("use CVodeGetLinWorkSpace instead") int CVDlsGetWorkSpace(void *cvode_mem, long int *lenrwLS, long int *leniwLS); +SUNDIALS_DEPRECATED_EXPORT_MSG("use CVodeGetNumJacEvals instead") int CVDlsGetNumJacEvals(void *cvode_mem, long int *njevals); +SUNDIALS_DEPRECATED_EXPORT_MSG("use CVodeGetNumLinRhsEvals instead") int CVDlsGetNumRhsEvals(void *cvode_mem, long int *nfevalsLS); +SUNDIALS_DEPRECATED_EXPORT_MSG("use CVodeGetLastLinFlag instead") int CVDlsGetLastFlag(void *cvode_mem, long int *flag); +SUNDIALS_DEPRECATED_EXPORT_MSG("use CVodeGetLinReturnFlagName instead") char *CVDlsGetReturnFlagName(long int flag); +SUNDIALS_DEPRECATED_EXPORT_MSG("use CVodeSetLinearSolverB instead") int CVDlsSetLinearSolverB(void *cvode_mem, int which, SUNLinearSolver LS, SUNMatrix A); +SUNDIALS_DEPRECATED_EXPORT_MSG("use CVodeSetJacFnB instead") int CVDlsSetJacFnB(void *cvode_mem, int which, CVDlsJacFnB jacB); +SUNDIALS_DEPRECATED_EXPORT_MSG("use CVodeSetJacFnBS instead") int CVDlsSetJacFnBS(void *cvode_mem, int which, CVDlsJacFnBS jacBS); diff --git a/inst/include/cvodes/cvodes_ls.h b/inst/include/cvodes/cvodes_ls.h index 56300d7..e017c2a 100644 --- a/inst/include/cvodes/cvodes_ls.h +++ b/inst/include/cvodes/cvodes_ls.h @@ -3,7 +3,7 @@ * Radu Serban @ LLNL * ---------------------------------------------------------------- * SUNDIALS Copyright Start - * Copyright (c) 2002-2019, Lawrence Livermore National Security + * Copyright (c) 2002-2022, Lawrence Livermore National Security * and Southern Methodist University. * All rights reserved. * @@ -96,9 +96,15 @@ SUNDIALS_EXPORT int CVodeSetLinearSolver(void *cvode_mem, -----------------------------------------------------------------*/ SUNDIALS_EXPORT int CVodeSetJacFn(void *cvode_mem, CVLsJacFn jac); -SUNDIALS_EXPORT int CVodeSetMaxStepsBetweenJac(void *cvode_mem, - long int msbj); +SUNDIALS_EXPORT int CVodeSetJacEvalFrequency(void *cvode_mem, + long int msbj); +SUNDIALS_EXPORT int CVodeSetLinearSolutionScaling(void *cvode_mem, + booleantype onoff); +SUNDIALS_EXPORT int CVodeSetDeltaGammaMaxBadJac(void *cvode_mem, + realtype dgmax_jbad); SUNDIALS_EXPORT int CVodeSetEpsLin(void *cvode_mem, realtype eplifac); +SUNDIALS_EXPORT int CVodeSetLSNormFactor(void *arkode_mem, + realtype nrmfac); SUNDIALS_EXPORT int CVodeSetPreconditioner(void *cvode_mem, CVLsPrecSetupFn pset, CVLsPrecSolveFn psolve); @@ -111,6 +117,9 @@ SUNDIALS_EXPORT int CVodeSetLinSysFn(void *cvode_mem, CVLsLinSysFn linsys); Optional outputs from the CVLS linear solver interface -----------------------------------------------------------------*/ +SUNDIALS_EXPORT int CVodeGetJac(void *cvode_mem, SUNMatrix *J); +SUNDIALS_EXPORT int CVodeGetJacTime(void *cvode_mem, sunrealtype *t_J); +SUNDIALS_EXPORT int CVodeGetJacNumSteps(void *cvode_mem, long int *nst_J); SUNDIALS_EXPORT int CVodeGetLinWorkSpace(void *cvode_mem, long int *lenrwLS, long int *leniwLS); @@ -130,11 +139,15 @@ SUNDIALS_EXPORT int CVodeGetNumJtimesEvals(void *cvode_mem, long int *njvevals); SUNDIALS_EXPORT int CVodeGetNumLinRhsEvals(void *cvode_mem, long int *nfevalsLS); +SUNDIALS_EXPORT int CVodeGetLinSolveStats(void* cvode_mem, + long int* njevals, long int* nfevalsLS, + long int* nliters, long int* nlcfails, + long int* npevals, long int* npsolves, + long int* njtsetups, long int* njtimes); SUNDIALS_EXPORT int CVodeGetLastLinFlag(void *cvode_mem, long int *flag); SUNDIALS_EXPORT char *CVodeGetLinReturnFlagName(long int flag); - /*================================================================= Backward problems =================================================================*/ @@ -227,6 +240,12 @@ SUNDIALS_EXPORT int CVodeSetJacFnBS(void *cvode_mem, int which, SUNDIALS_EXPORT int CVodeSetEpsLinB(void *cvode_mem, int which, realtype eplifacB); +SUNDIALS_EXPORT int CVodeSetLSNormFactorB(void *arkode_mem, int which, + realtype nrmfacB); + +SUNDIALS_EXPORT int CVodeSetLinearSolutionScalingB(void *cvode_mem, int which, + booleantype onoffB); + SUNDIALS_EXPORT int CVodeSetPreconditionerB(void *cvode_mem, int which, CVLsPrecSetupFnB psetB, CVLsPrecSolveFnB psolveB); @@ -246,7 +265,6 @@ SUNDIALS_EXPORT int CVodeSetLinSysFnB(void *cvode_mem, int which, SUNDIALS_EXPORT int CVodeSetLinSysFnBS(void *cvode_mem, int which, CVLsLinSysFnBS linsys); - #ifdef __cplusplus } #endif diff --git a/inst/include/cvodes/cvodes_proj.h b/inst/include/cvodes/cvodes_proj.h new file mode 100644 index 0000000..fb61de3 --- /dev/null +++ b/inst/include/cvodes/cvodes_proj.h @@ -0,0 +1,58 @@ +/* ----------------------------------------------------------------------------- + * Programmer(s): David J. Gardner @ LLNL + * ----------------------------------------------------------------------------- + * Based on CPODES by Radu Serban @ LLNL + * ----------------------------------------------------------------------------- + * SUNDIALS Copyright Start + * Copyright (c) 2002-2022, Lawrence Livermore National Security + * and Southern Methodist University. + * All rights reserved. + * + * See the top-level LICENSE and NOTICE files for details. + * + * SPDX-License-Identifier: BSD-3-Clause + * SUNDIALS Copyright End + * ----------------------------------------------------------------------------- + * This is the header file for CVODE's projection interface. + * ---------------------------------------------------------------------------*/ + +#ifndef _CVPROJ_H +#define _CVPROJ_H + +#include + +#ifdef __cplusplus /* wrapper to enable C++ usage */ +extern "C" { +#endif + +/* ----------------------------------------------------------------------------- + * CVProj user-supplied function prototypes + * ---------------------------------------------------------------------------*/ + +typedef int (*CVProjFn)(realtype t, N_Vector ycur, N_Vector corr, + realtype epsProj, N_Vector err, void *user_data); + + +/* ----------------------------------------------------------------------------- + * CVProj Exported functions + * ---------------------------------------------------------------------------*/ + +/* Projection initialization functions */ +SUNDIALS_EXPORT int CVodeSetProjFn(void *cvode_mem, CVProjFn pfun); + +/* Optional input functions */ +SUNDIALS_EXPORT int CVodeSetProjErrEst(void *cvode_mem, booleantype onoff); +SUNDIALS_EXPORT int CVodeSetProjFrequency(void *cvode_mem, long int proj_freq); +SUNDIALS_EXPORT int CVodeSetMaxNumProjFails(void *cvode_mem, int max_fails); +SUNDIALS_EXPORT int CVodeSetEpsProj(void *cvode_mem, realtype eps); +SUNDIALS_EXPORT int CVodeSetProjFailEta(void *cvode_mem, realtype eta); + +/* Optional output functions */ +SUNDIALS_EXPORT int CVodeGetNumProjEvals(void *cvode_mem, long int *nproj); +SUNDIALS_EXPORT int CVodeGetNumProjFails(void *cvode_mem, long int *nprf); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/inst/include/cvodes/cvodes_spils.h b/inst/include/cvodes/cvodes_spils.h index da3bc71..78f2179 100644 --- a/inst/include/cvodes/cvodes_spils.h +++ b/inst/include/cvodes/cvodes_spils.h @@ -2,7 +2,7 @@ * Programmer(s): Daniel R. Reynolds @ SMU * ----------------------------------------------------------------- * SUNDIALS Copyright Start - * Copyright (c) 2002-2019, Lawrence Livermore National Security + * Copyright (c) 2002-2022, Lawrence Livermore National Security * and Southern Methodist University. * All rights reserved. * @@ -47,54 +47,74 @@ typedef CVLsJacTimesVecFnBS CVSpilsJacTimesVecFnBS; Exported Functions (wrappers for equivalent routines in cvodes_ls.h) ====================================================================*/ +SUNDIALS_DEPRECATED_EXPORT_MSG("use CVodeSetLinearSolver instead") int CVSpilsSetLinearSolver(void *cvode_mem, SUNLinearSolver LS); +SUNDIALS_DEPRECATED_EXPORT_MSG("use CVodeSetEpsLin instead") int CVSpilsSetEpsLin(void *cvode_mem, realtype eplifac); +SUNDIALS_DEPRECATED_EXPORT_MSG("use CVodeSetPreconditioner instead") int CVSpilsSetPreconditioner(void *cvode_mem, CVSpilsPrecSetupFn pset, CVSpilsPrecSolveFn psolve); +SUNDIALS_DEPRECATED_EXPORT_MSG("use CVodeSetJacTimes instead") int CVSpilsSetJacTimes(void *cvode_mem, CVSpilsJacTimesSetupFn jtsetup, CVSpilsJacTimesVecFn jtimes); +SUNDIALS_DEPRECATED_EXPORT_MSG("use CVodeGetLinWorkSpace instead") int CVSpilsGetWorkSpace(void *cvode_mem, long int *lenrwLS, long int *leniwLS); +SUNDIALS_DEPRECATED_EXPORT_MSG("use CVodeGetNumPrecEvals instead") int CVSpilsGetNumPrecEvals(void *cvode_mem, long int *npevals); +SUNDIALS_DEPRECATED_EXPORT_MSG("use CVodeGetNumPrecSolves instead") int CVSpilsGetNumPrecSolves(void *cvode_mem, long int *npsolves); +SUNDIALS_DEPRECATED_EXPORT_MSG("use CVodeGetNumLinIters instead") int CVSpilsGetNumLinIters(void *cvode_mem, long int *nliters); +SUNDIALS_DEPRECATED_EXPORT_MSG("use CVodeGetNumConvFails instead") int CVSpilsGetNumConvFails(void *cvode_mem, long int *nlcfails); +SUNDIALS_DEPRECATED_EXPORT_MSG("use CVodeGetNumJTSetupEvals instead") int CVSpilsGetNumJTSetupEvals(void *cvode_mem, long int *njtsetups); +SUNDIALS_DEPRECATED_EXPORT_MSG("use CVodeGetNumJtimesEvals instead") int CVSpilsGetNumJtimesEvals(void *cvode_mem, long int *njvevals); +SUNDIALS_DEPRECATED_EXPORT_MSG("use CVodeGetNumLinRhsEvals instead") int CVSpilsGetNumRhsEvals(void *cvode_mem, long int *nfevalsLS); +SUNDIALS_DEPRECATED_EXPORT_MSG("use CVodeGetLastLinFlag instead") int CVSpilsGetLastFlag(void *cvode_mem, long int *flag); +SUNDIALS_DEPRECATED_EXPORT_MSG("use CVodeGetLinReturnFlagName instead") char *CVSpilsGetReturnFlagName(long int flag); +SUNDIALS_DEPRECATED_EXPORT_MSG("use CVodeSetLinearSolverB instead") int CVSpilsSetLinearSolverB(void *cvode_mem, int which, SUNLinearSolver LS); +SUNDIALS_DEPRECATED_EXPORT_MSG("use CVodeSetEpsLinB instead") int CVSpilsSetEpsLinB(void *cvode_mem, int which, realtype eplifacB); +SUNDIALS_DEPRECATED_EXPORT_MSG("use CVodeSetPreconditionerB instead") int CVSpilsSetPreconditionerB(void *cvode_mem, int which, CVSpilsPrecSetupFnB psetB, CVSpilsPrecSolveFnB psolveB); +SUNDIALS_DEPRECATED_EXPORT_MSG("use CVodeSetPreconditionerBS instead") int CVSpilsSetPreconditionerBS(void *cvode_mem, int which, CVSpilsPrecSetupFnBS psetBS, CVSpilsPrecSolveFnBS psolveBS); +SUNDIALS_DEPRECATED_EXPORT_MSG("use CVodeSetJacTimesB instead") int CVSpilsSetJacTimesB(void *cvode_mem, int which, CVSpilsJacTimesSetupFnB jtsetupB, CVSpilsJacTimesVecFnB jtimesB); +SUNDIALS_DEPRECATED_EXPORT_MSG("use CVodeSetJacTimesBS instead") int CVSpilsSetJacTimesBS(void *cvode_mem, int which, CVSpilsJacTimesSetupFnBS jtsetupBS, CVSpilsJacTimesVecFnBS jtimesBS); diff --git a/inst/include/nvector/nvector_cuda.h b/inst/include/nvector/nvector_cuda.h new file mode 100644 index 0000000..a24d0f4 --- /dev/null +++ b/inst/include/nvector/nvector_cuda.h @@ -0,0 +1,207 @@ +/* ----------------------------------------------------------------- + * Programmer(s): Slaven Peles and Cody J. Balos @ LLNL + * ----------------------------------------------------------------- + * SUNDIALS Copyright Start + * Copyright (c) 2002-2022, Lawrence Livermore National Security + * and Southern Methodist University. + * All rights reserved. + * + * See the top-level LICENSE and NOTICE files for details. + * + * SPDX-License-Identifier: BSD-3-Clause + * SUNDIALS Copyright End + * ----------------------------------------------------------------- + * This is the header file for the CUDA implementation of the + * NVECTOR module. + * -----------------------------------------------------------------*/ + +#ifndef _NVECTOR_CUDA_H +#define _NVECTOR_CUDA_H + +#include +#include + +#include +#include +#include +#include + +#ifdef __cplusplus /* wrapper to enable C++ usage */ +extern "C" { +#endif + +/* + * ----------------------------------------------------------------- + * CUDA implementation of N_Vector + * ----------------------------------------------------------------- + */ + +struct _N_VectorContent_Cuda +{ + sunindextype length; + booleantype own_helper; + SUNMemory host_data; + SUNMemory device_data; + SUNCudaExecPolicy* stream_exec_policy; + SUNCudaExecPolicy* reduce_exec_policy; + SUNMemoryHelper mem_helper; + void* priv; /* 'private' data */ +}; + +typedef struct _N_VectorContent_Cuda *N_VectorContent_Cuda; + +/* + * ----------------------------------------------------------------- + * NVECTOR_CUDA implementation specific functions + * ----------------------------------------------------------------- + */ + +SUNDIALS_EXPORT N_Vector N_VNewEmpty_Cuda(SUNContext sunctx); +SUNDIALS_EXPORT N_Vector N_VNew_Cuda(sunindextype length, SUNContext sunctx); +SUNDIALS_EXPORT N_Vector N_VNewManaged_Cuda(sunindextype length, SUNContext sunctx); +SUNDIALS_EXPORT N_Vector N_VNewWithMemHelp_Cuda(sunindextype length, + booleantype use_managed_mem, + SUNMemoryHelper helper, + SUNContext sunctx); +SUNDIALS_EXPORT N_Vector N_VMake_Cuda(sunindextype length, + realtype *h_vdata, + realtype *d_vdata, + SUNContext sunctx); +SUNDIALS_EXPORT N_Vector N_VMakeManaged_Cuda(sunindextype length, + realtype *vdata, + SUNContext sunctx); +SUNDIALS_EXPORT void N_VSetHostArrayPointer_Cuda(realtype* h_vdata, N_Vector v); +SUNDIALS_EXPORT void N_VSetDeviceArrayPointer_Cuda(realtype* d_vdata, N_Vector v); +SUNDIALS_EXPORT booleantype N_VIsManagedMemory_Cuda(N_Vector x); +SUNDIALS_EXPORT int N_VSetKernelExecPolicy_Cuda(N_Vector x, + SUNCudaExecPolicy* stream_exec_policy, + SUNCudaExecPolicy* reduce_exec_policy); +SUNDIALS_EXPORT void N_VCopyToDevice_Cuda(N_Vector v); +SUNDIALS_EXPORT void N_VCopyFromDevice_Cuda(N_Vector v); + +SUNDIALS_STATIC_INLINE +sunindextype N_VGetLength_Cuda(N_Vector x) +{ + N_VectorContent_Cuda content = (N_VectorContent_Cuda)x->content; + return content->length; +} + +SUNDIALS_STATIC_INLINE +realtype *N_VGetHostArrayPointer_Cuda(N_Vector x) +{ + N_VectorContent_Cuda content = (N_VectorContent_Cuda)x->content; + return(content->host_data == NULL ? NULL : (realtype*)content->host_data->ptr); +} + +SUNDIALS_STATIC_INLINE +realtype *N_VGetDeviceArrayPointer_Cuda(N_Vector x) +{ + N_VectorContent_Cuda content = (N_VectorContent_Cuda)x->content; + return(content->device_data == NULL ? NULL : (realtype*)content->device_data->ptr); +} + +/* + * ----------------------------------------------------------------- + * NVECTOR API functions + * ----------------------------------------------------------------- + */ + +SUNDIALS_STATIC_INLINE +N_Vector_ID N_VGetVectorID_Cuda(N_Vector /*v*/) +{ + return SUNDIALS_NVEC_CUDA; +} + +SUNDIALS_EXPORT N_Vector N_VCloneEmpty_Cuda(N_Vector w); +SUNDIALS_EXPORT N_Vector N_VClone_Cuda(N_Vector w); +SUNDIALS_EXPORT void N_VDestroy_Cuda(N_Vector v); +SUNDIALS_EXPORT void N_VSpace_Cuda(N_Vector v, sunindextype *lrw, sunindextype *liw); + +/* standard vector operations */ +SUNDIALS_EXPORT void N_VLinearSum_Cuda(realtype a, N_Vector x, realtype b, N_Vector y, N_Vector z); +SUNDIALS_EXPORT void N_VConst_Cuda(realtype c, N_Vector z); +SUNDIALS_EXPORT void N_VProd_Cuda(N_Vector x, N_Vector y, N_Vector z); +SUNDIALS_EXPORT void N_VDiv_Cuda(N_Vector x, N_Vector y, N_Vector z); +SUNDIALS_EXPORT void N_VScale_Cuda(realtype c, N_Vector x, N_Vector z); +SUNDIALS_EXPORT void N_VAbs_Cuda(N_Vector x, N_Vector z); +SUNDIALS_EXPORT void N_VInv_Cuda(N_Vector x, N_Vector z); +SUNDIALS_EXPORT void N_VAddConst_Cuda(N_Vector x, realtype b, N_Vector z); +SUNDIALS_EXPORT realtype N_VDotProd_Cuda(N_Vector x, N_Vector y); +SUNDIALS_EXPORT realtype N_VMaxNorm_Cuda(N_Vector x); +SUNDIALS_EXPORT realtype N_VWrmsNorm_Cuda(N_Vector x, N_Vector w); +SUNDIALS_EXPORT realtype N_VWrmsNormMask_Cuda(N_Vector x, N_Vector w, N_Vector id); +SUNDIALS_EXPORT realtype N_VMin_Cuda(N_Vector x); +SUNDIALS_EXPORT realtype N_VWL2Norm_Cuda(N_Vector x, N_Vector w); +SUNDIALS_EXPORT realtype N_VL1Norm_Cuda(N_Vector x); +SUNDIALS_EXPORT void N_VCompare_Cuda(realtype c, N_Vector x, N_Vector z); +SUNDIALS_EXPORT booleantype N_VInvTest_Cuda(N_Vector x, N_Vector z); +SUNDIALS_EXPORT booleantype N_VConstrMask_Cuda(N_Vector c, N_Vector x, N_Vector m); +SUNDIALS_EXPORT realtype N_VMinQuotient_Cuda(N_Vector num, N_Vector denom); + +/* fused vector operations */ +SUNDIALS_EXPORT int N_VLinearCombination_Cuda(int nvec, realtype* c, N_Vector* X, + N_Vector Z); +SUNDIALS_EXPORT int N_VScaleAddMulti_Cuda(int nvec, realtype* c, N_Vector X, + N_Vector* Y, N_Vector* Z); +SUNDIALS_EXPORT int N_VDotProdMulti_Cuda(int nvec, N_Vector x, N_Vector* Y, + realtype* dotprods); + +/* vector array operations */ +SUNDIALS_EXPORT int N_VLinearSumVectorArray_Cuda(int nvec, + realtype a, N_Vector* X, + realtype b, N_Vector* Y, + N_Vector* Z); +SUNDIALS_EXPORT int N_VScaleVectorArray_Cuda(int nvec, realtype* c, N_Vector* X, + N_Vector* Z); +SUNDIALS_EXPORT int N_VConstVectorArray_Cuda(int nvec, realtype c, N_Vector* Z); +SUNDIALS_EXPORT int N_VScaleAddMultiVectorArray_Cuda(int nvec, int nsum, + realtype* a, N_Vector* X, + N_Vector** Y, N_Vector** Z); +SUNDIALS_EXPORT int N_VLinearCombinationVectorArray_Cuda(int nvec, int nsum, + realtype* c, + N_Vector** X, + N_Vector* Z); +SUNDIALS_EXPORT int N_VWrmsNormVectorArray_Cuda(int nvec, N_Vector* X, + N_Vector* W, realtype* nrm); +SUNDIALS_EXPORT int N_VWrmsNormMaskVectorArray_Cuda(int nvec, N_Vector* X, + N_Vector* W, N_Vector id, + realtype* nrm); + +/* OPTIONAL local reduction kernels (no parallel communication) */ +SUNDIALS_EXPORT realtype N_VWSqrSumLocal_Cuda(N_Vector x, N_Vector w); +SUNDIALS_EXPORT realtype N_VWSqrSumMaskLocal_Cuda(N_Vector x, N_Vector w, N_Vector id); + +/* OPTIONAL XBraid interface operations */ +SUNDIALS_EXPORT int N_VBufSize_Cuda(N_Vector x, sunindextype *size); +SUNDIALS_EXPORT int N_VBufPack_Cuda(N_Vector x, void *buf); +SUNDIALS_EXPORT int N_VBufUnpack_Cuda(N_Vector x, void *buf); + +/* OPTIONAL operations for debugging */ +SUNDIALS_EXPORT void N_VPrint_Cuda(N_Vector v); +SUNDIALS_EXPORT void N_VPrintFile_Cuda(N_Vector v, FILE *outfile); + +/* + * ----------------------------------------------------------------- + * Enable / disable fused vector operations + * ----------------------------------------------------------------- + */ + +SUNDIALS_EXPORT int N_VEnableFusedOps_Cuda(N_Vector v, booleantype tf); + +SUNDIALS_EXPORT int N_VEnableLinearCombination_Cuda(N_Vector v, booleantype tf); +SUNDIALS_EXPORT int N_VEnableScaleAddMulti_Cuda(N_Vector v, booleantype tf); +SUNDIALS_EXPORT int N_VEnableDotProdMulti_Cuda(N_Vector v, booleantype tf); + +SUNDIALS_EXPORT int N_VEnableLinearSumVectorArray_Cuda(N_Vector v, booleantype tf); +SUNDIALS_EXPORT int N_VEnableScaleVectorArray_Cuda(N_Vector v, booleantype tf); +SUNDIALS_EXPORT int N_VEnableConstVectorArray_Cuda(N_Vector v, booleantype tf); +SUNDIALS_EXPORT int N_VEnableWrmsNormVectorArray_Cuda(N_Vector v, booleantype tf); +SUNDIALS_EXPORT int N_VEnableWrmsNormMaskVectorArray_Cuda(N_Vector v, booleantype tf); +SUNDIALS_EXPORT int N_VEnableScaleAddMultiVectorArray_Cuda(N_Vector v, booleantype tf); +SUNDIALS_EXPORT int N_VEnableLinearCombinationVectorArray_Cuda(N_Vector v, booleantype tf); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/inst/include/nvector/nvector_hip.h b/inst/include/nvector/nvector_hip.h new file mode 100644 index 0000000..2a38c06 --- /dev/null +++ b/inst/include/nvector/nvector_hip.h @@ -0,0 +1,207 @@ +/* ----------------------------------------------------------------- + * Programmer(s): Daniel McGreer and Cody J. Balos @ LLNL + * ----------------------------------------------------------------- + * SUNDIALS Copyright Start + * Copyright (c) 2002-2022, Lawrence Livermore National Security + * and Southern Methodist University. + * All rights reserved. + * + * See the top-level LICENSE and NOTICE files for details. + * + * SPDX-License-Identifier: BSD-3-Clause + * SUNDIALS Copyright End + * ----------------------------------------------------------------- + * This is the header file for the HIP implementation of the + * NVECTOR module. + * -----------------------------------------------------------------*/ + +#ifndef _NVECTOR_HIP_H +#define _NVECTOR_HIP_H + +#include +#include + +#include +#include +#include +#include + +#ifdef __cplusplus /* wrapper to enable C++ usage */ +extern "C" { +#endif + +/* + * ----------------------------------------------------------------- + * HIP implementation of N_Vector + * ----------------------------------------------------------------- + */ + +struct _N_VectorContent_Hip +{ + sunindextype length; + booleantype own_helper; + SUNMemory host_data; + SUNMemory device_data; + SUNHipExecPolicy* stream_exec_policy; + SUNHipExecPolicy* reduce_exec_policy; + SUNMemoryHelper mem_helper; + void* priv; /* 'private' data */ +}; + +typedef struct _N_VectorContent_Hip *N_VectorContent_Hip; + +/* + * ----------------------------------------------------------------- + * NVECTOR_HIP implementation specific functions + * ----------------------------------------------------------------- + */ + +SUNDIALS_EXPORT N_Vector N_VNewEmpty_Hip(SUNContext sunctx); +SUNDIALS_EXPORT N_Vector N_VNew_Hip(sunindextype length, SUNContext sunctx); +SUNDIALS_EXPORT N_Vector N_VNewManaged_Hip(sunindextype length, SUNContext sunctx); +SUNDIALS_EXPORT N_Vector N_VNewWithMemHelp_Hip(sunindextype length, + booleantype use_managed_mem, + SUNMemoryHelper helper, + SUNContext sunctx); +SUNDIALS_EXPORT N_Vector N_VMake_Hip(sunindextype length, + realtype *h_vdata, + realtype *d_vdata, + SUNContext sunctx); +SUNDIALS_EXPORT N_Vector N_VMakeManaged_Hip(sunindextype length, + realtype *vdata, + SUNContext sunctx); +SUNDIALS_EXPORT void N_VSetHostArrayPointer_Hip(realtype* h_vdata, N_Vector v); +SUNDIALS_EXPORT void N_VSetDeviceArrayPointer_Hip(realtype* d_vdata, N_Vector v); +SUNDIALS_EXPORT booleantype N_VIsManagedMemory_Hip(N_Vector x); +SUNDIALS_EXPORT int N_VSetKernelExecPolicy_Hip(N_Vector x, + SUNHipExecPolicy* stream_exec_policy, + SUNHipExecPolicy* reduce_exec_policy); +SUNDIALS_EXPORT void N_VCopyToDevice_Hip(N_Vector v); +SUNDIALS_EXPORT void N_VCopyFromDevice_Hip(N_Vector v); + +SUNDIALS_STATIC_INLINE +sunindextype N_VGetLength_Hip(N_Vector x) +{ + N_VectorContent_Hip content = (N_VectorContent_Hip)x->content; + return content->length; +} + +SUNDIALS_STATIC_INLINE +realtype *N_VGetHostArrayPointer_Hip(N_Vector x) +{ + N_VectorContent_Hip content = (N_VectorContent_Hip)x->content; + return(content->host_data == NULL ? NULL : (realtype*)content->host_data->ptr); +} + +SUNDIALS_STATIC_INLINE +realtype *N_VGetDeviceArrayPointer_Hip(N_Vector x) +{ + N_VectorContent_Hip content = (N_VectorContent_Hip)x->content; + return(content->device_data == NULL ? NULL : (realtype*)content->device_data->ptr); +} + +/* + * ----------------------------------------------------------------- + * NVECTOR API functions + * ----------------------------------------------------------------- + */ + +SUNDIALS_STATIC_INLINE +N_Vector_ID N_VGetVectorID_Hip(N_Vector /*v*/) +{ + return SUNDIALS_NVEC_HIP; +} + +SUNDIALS_EXPORT N_Vector N_VCloneEmpty_Hip(N_Vector w); +SUNDIALS_EXPORT N_Vector N_VClone_Hip(N_Vector w); +SUNDIALS_EXPORT void N_VDestroy_Hip(N_Vector v); +SUNDIALS_EXPORT void N_VSpace_Hip(N_Vector v, sunindextype *lrw, sunindextype *liw); + +/* standard vector operations */ +SUNDIALS_EXPORT void N_VLinearSum_Hip(realtype a, N_Vector x, realtype b, N_Vector y, N_Vector z); +SUNDIALS_EXPORT void N_VConst_Hip(realtype c, N_Vector z); +SUNDIALS_EXPORT void N_VProd_Hip(N_Vector x, N_Vector y, N_Vector z); +SUNDIALS_EXPORT void N_VDiv_Hip(N_Vector x, N_Vector y, N_Vector z); +SUNDIALS_EXPORT void N_VScale_Hip(realtype c, N_Vector x, N_Vector z); +SUNDIALS_EXPORT void N_VAbs_Hip(N_Vector x, N_Vector z); +SUNDIALS_EXPORT void N_VInv_Hip(N_Vector x, N_Vector z); +SUNDIALS_EXPORT void N_VAddConst_Hip(N_Vector x, realtype b, N_Vector z); +SUNDIALS_EXPORT realtype N_VDotProd_Hip(N_Vector x, N_Vector y); +SUNDIALS_EXPORT realtype N_VMaxNorm_Hip(N_Vector x); +SUNDIALS_EXPORT realtype N_VWrmsNorm_Hip(N_Vector x, N_Vector w); +SUNDIALS_EXPORT realtype N_VWrmsNormMask_Hip(N_Vector x, N_Vector w, N_Vector id); +SUNDIALS_EXPORT realtype N_VMin_Hip(N_Vector x); +SUNDIALS_EXPORT realtype N_VWL2Norm_Hip(N_Vector x, N_Vector w); +SUNDIALS_EXPORT realtype N_VL1Norm_Hip(N_Vector x); +SUNDIALS_EXPORT void N_VCompare_Hip(realtype c, N_Vector x, N_Vector z); +SUNDIALS_EXPORT booleantype N_VInvTest_Hip(N_Vector x, N_Vector z); +SUNDIALS_EXPORT booleantype N_VConstrMask_Hip(N_Vector c, N_Vector x, N_Vector m); +SUNDIALS_EXPORT realtype N_VMinQuotient_Hip(N_Vector num, N_Vector denom); + +/* fused vector operations */ +SUNDIALS_EXPORT int N_VLinearCombination_Hip(int nvec, realtype* c, N_Vector* X, + N_Vector Z); +SUNDIALS_EXPORT int N_VScaleAddMulti_Hip(int nvec, realtype* c, N_Vector X, + N_Vector* Y, N_Vector* Z); +SUNDIALS_EXPORT int N_VDotProdMulti_Hip(int nvec, N_Vector x, N_Vector* Y, + realtype* dotprods); + +/* vector array operations */ +SUNDIALS_EXPORT int N_VLinearSumVectorArray_Hip(int nvec, + realtype a, N_Vector* X, + realtype b, N_Vector* Y, + N_Vector* Z); +SUNDIALS_EXPORT int N_VScaleVectorArray_Hip(int nvec, realtype* c, N_Vector* X, + N_Vector* Z); +SUNDIALS_EXPORT int N_VConstVectorArray_Hip(int nvec, realtype c, N_Vector* Z); +SUNDIALS_EXPORT int N_VScaleAddMultiVectorArray_Hip(int nvec, int nsum, + realtype* a, N_Vector* X, + N_Vector** Y, N_Vector** Z); +SUNDIALS_EXPORT int N_VLinearCombinationVectorArray_Hip(int nvec, int nsum, + realtype* c, + N_Vector** X, + N_Vector* Z); +SUNDIALS_EXPORT int N_VWrmsNormVectorArray_Hip(int nvec, N_Vector* X, + N_Vector* W, realtype* nrm); +SUNDIALS_EXPORT int N_VWrmsNormMaskVectorArray_Hip(int nvec, N_Vector* X, + N_Vector* W, N_Vector id, + realtype* nrm); + +/* OPTIONAL local reduction kernels (no parallel communication) */ +SUNDIALS_EXPORT realtype N_VWSqrSumLocal_Hip(N_Vector x, N_Vector w); +SUNDIALS_EXPORT realtype N_VWSqrSumMaskLocal_Hip(N_Vector x, N_Vector w, N_Vector id); + +/* OPTIONAL XBraid interface operations */ +SUNDIALS_EXPORT int N_VBufSize_Hip(N_Vector x, sunindextype *size); +SUNDIALS_EXPORT int N_VBufPack_Hip(N_Vector x, void *buf); +SUNDIALS_EXPORT int N_VBufUnpack_Hip(N_Vector x, void *buf); + +/* OPTIONAL operations for debugging */ +SUNDIALS_EXPORT void N_VPrint_Hip(N_Vector v); +SUNDIALS_EXPORT void N_VPrintFile_Hip(N_Vector v, FILE *outfile); + +/* + * ----------------------------------------------------------------- + * Enable / disable fused vector operations + * ----------------------------------------------------------------- + */ + +SUNDIALS_EXPORT int N_VEnableFusedOps_Hip(N_Vector v, booleantype tf); + +SUNDIALS_EXPORT int N_VEnableLinearCombination_Hip(N_Vector v, booleantype tf); +SUNDIALS_EXPORT int N_VEnableScaleAddMulti_Hip(N_Vector v, booleantype tf); +SUNDIALS_EXPORT int N_VEnableDotProdMulti_Hip(N_Vector v, booleantype tf); + +SUNDIALS_EXPORT int N_VEnableLinearSumVectorArray_Hip(N_Vector v, booleantype tf); +SUNDIALS_EXPORT int N_VEnableScaleVectorArray_Hip(N_Vector v, booleantype tf); +SUNDIALS_EXPORT int N_VEnableConstVectorArray_Hip(N_Vector v, booleantype tf); +SUNDIALS_EXPORT int N_VEnableWrmsNormVectorArray_Hip(N_Vector v, booleantype tf); +SUNDIALS_EXPORT int N_VEnableWrmsNormMaskVectorArray_Hip(N_Vector v, booleantype tf); +SUNDIALS_EXPORT int N_VEnableScaleAddMultiVectorArray_Hip(N_Vector v, booleantype tf); +SUNDIALS_EXPORT int N_VEnableLinearCombinationVectorArray_Hip(N_Vector v, booleantype tf); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/inst/include/nvector/nvector_kokkos.hpp b/inst/include/nvector/nvector_kokkos.hpp new file mode 100644 index 0000000..5235b01 --- /dev/null +++ b/inst/include/nvector/nvector_kokkos.hpp @@ -0,0 +1,656 @@ +/* ----------------------------------------------------------------------------- + * Programmer(s): Daniel McGreer, Cody Balos @ LLNL + * ----------------------------------------------------------------------------- + * SUNDIALS Copyright Start + * Copyright (c) 2002-2020, Lawrence Livermore National Security + * and Southern Methodist University. + * All rights reserved. + * + * See the top-level LICENSE and NOTICE files for details. + * + * SPDX-License-Identifier: BSD-3-Clause + * SUNDIALS Copyright End + * ----------------------------------------------------------------------------- + * This is the header file an NVector implementation using Kokkos. + * ---------------------------------------------------------------------------*/ + +#ifndef _NVECTOR_KOKKOS_HPP +#define _NVECTOR_KOKKOS_HPP + +#include +#include +#include + +namespace sundials { +namespace kokkos { + +// Forward declaration +template +class Vector; + +// Get the Kokkos vector wrapped by an N_Vector +template +inline VectorType* GetVec(N_Vector v) +{ + return static_cast(v->content); +} + +// ============================================================================= +// Everything in the implementation (impl) namespace is private and should not +// be referred to directly in user code. +// ============================================================================= + +namespace impl { + +/* N_Vector API ops */ + +inline N_Vector_ID N_VGetVectorID_Kokkos(N_Vector v) +{ + return SUNDIALS_NVEC_KOKKOS; +} + +template +sunindextype N_VGetLength_Kokkos(N_Vector v) +{ + auto vec{GetVec(v)}; + return static_cast(vec->Length()); +} + +template +sunrealtype* N_VGetArrayPointer_Kokkos(N_Vector v) +{ + auto vec{GetVec(v)}; + return vec->HostView().data(); +} + +template +sunrealtype* N_VGetDeviceArrayPointer_Kokkos(N_Vector v) +{ + auto vec{GetVec(v)}; + return vec->View().data(); +} + +template +N_Vector N_VClone_Kokkos(N_Vector w) +{ + auto vec{GetVec(w)}; + auto new_vec{new VectorType(*vec)}; + return new_vec->Convert(); +} + +template +void N_VDestroy_Kokkos(N_Vector v) +{ + auto vec{GetVec(v)}; + delete vec; + return; +} + +template +void N_VPrint_Kokkos(N_Vector v) +{ + auto vec{GetVec(v)}; +} + +template +void N_VPrintFile_Kokkos(N_Vector v, FILE* outfile) +{ + auto vec{GetVec(v)}; +} + +/* OPTIONAL local reduction kernels (no parallel communication) */ + +template +sunrealtype N_VWSqrSumLocal_Kokkos(N_Vector x, N_Vector w) +{ + auto xvec{GetVec(x)}; + auto xdata{xvec->View()}; + auto wvec{GetVec(w)}; + auto wdata{wvec->View()}; + + using size_type = typename VectorType::size_type; + + sunrealtype gpu_result{0.0}; + Kokkos::parallel_reduce( + "N_VWSqrSumLocal", typename VectorType::range_policy(0, xvec->Length()), + KOKKOS_LAMBDA(const size_type i, sunrealtype& update) { + update += (xdata(i) * wdata(i) * xdata(i) * wdata(i)); + }, + gpu_result); + + return gpu_result; +} + +template +sunrealtype N_VWSqrSumMaskLocal_Kokkos(N_Vector x, N_Vector w, N_Vector id) +{ + auto xvec{GetVec(x)}; + auto xdata{xvec->View()}; + auto wvec{GetVec(w)}; + auto wdata{wvec->View()}; + auto idvec{GetVec(id)}; + auto iddata{idvec->View()}; + + using size_type = typename VectorType::size_type; + + sunrealtype gpu_result{0.0}; + Kokkos::parallel_reduce( + "N_VWSqrSumMaskLocal", typename VectorType::range_policy(0, xvec->Length()), + KOKKOS_LAMBDA(const size_type i, sunrealtype& update) { + if (iddata(i) > sunrealtype{0.0}) + update += (xdata(i) * wdata(i) * xdata(i) * wdata(i)); + }, + gpu_result); + + return gpu_result; +} + +/* standard vector operations */ + +template +void N_VAbs_Kokkos(N_Vector x, N_Vector z) +{ + auto xvec{GetVec(x)}; + auto xdata{xvec->View()}; + auto zvec{GetVec(z)}; + auto zdata{zvec->View()}; + + using size_type = typename VectorType::size_type; + + Kokkos::parallel_for( + "N_VAbs", typename VectorType::range_policy(0, xvec->Length()), + KOKKOS_LAMBDA(const size_type i) { zdata(i) = std::abs(xdata(i)); }); +} + +template +void N_VAddConst_Kokkos(N_Vector x, sunrealtype b, N_Vector z) +{ + auto xvec{GetVec(x)}; + auto xdata{xvec->View()}; + auto zvec{GetVec(z)}; + auto zdata{zvec->View()}; + + using size_type = typename VectorType::size_type; + + Kokkos::parallel_for( + "N_VAddConst", typename VectorType::range_policy(0, xvec->Length()), + KOKKOS_LAMBDA(const size_type i) { zdata(i) = xdata(i) + b; }); +} + +template +void N_VCompare_Kokkos(sunrealtype c, N_Vector x, N_Vector z) +{ + auto xvec{GetVec(x)}; + auto xdata{xvec->View()}; + auto zvec{GetVec(z)}; + auto zdata{zvec->View()}; + + using size_type = typename VectorType::size_type; + + Kokkos::parallel_for( + "N_VCompare", typename VectorType::range_policy(0, xvec->Length()), + KOKKOS_LAMBDA(const size_type i) { + zdata(i) = std::abs(xdata(i)) >= c ? sunrealtype{1.0} : sunrealtype{0.0}; + }); +} + +template +void N_VConst_Kokkos(sunrealtype c, N_Vector z) +{ + auto zvec{GetVec(z)}; + auto zdata{zvec->View()}; + + using size_type = typename VectorType::size_type; + + Kokkos::parallel_for( + "N_VConst", typename VectorType::range_policy(0, zvec->Length()), + KOKKOS_LAMBDA(const size_type i) { zdata(i) = c; }); +} + +template +booleantype N_VConstrMask_Kokkos(N_Vector c, N_Vector x, N_Vector m) +{ + auto cvec{GetVec(c)}; + auto cdata{cvec->View()}; + auto xvec{GetVec(x)}; + auto xdata{xvec->View()}; + auto mvec{GetVec(m)}; + auto mdata{mvec->View()}; + + using size_type = typename VectorType::size_type; + + sunrealtype sum{0.0}; + Kokkos::parallel_reduce( + "N_VConstrMask", typename VectorType::range_policy(0, mvec->Length()), + KOKKOS_LAMBDA(const size_type i, sunrealtype& update) { + bool test = (std::abs(cdata(i)) > sunrealtype{1.5} && + cdata(i) * xdata(i) <= sunrealtype{0.0}) || + (std::abs(cdata(i)) > sunrealtype{0.5} && + cdata(i) * xdata(i) < sunrealtype{0.0}); + mdata(i) = test ? sunrealtype{1.0} : sunrealtype{0.0}; + update += mdata(i); + }, + sum); + + return (sum < sunrealtype{0.5}); +} + +template +void N_VDiv_Kokkos(N_Vector x, N_Vector y, N_Vector z) +{ + auto xvec{GetVec(x)}; + auto yvec{GetVec(y)}; + auto zvec{GetVec(z)}; + auto xdata{xvec->View()}; + auto ydata{yvec->View()}; + auto zdata{zvec->View()}; + + using size_type = typename VectorType::size_type; + + Kokkos::parallel_for( + "N_VDiv", typename VectorType::range_policy(0, xvec->Length()), + KOKKOS_LAMBDA(const size_type i) { zdata(i) = xdata(i) / ydata(i); }); +} + +template +sunrealtype N_VDotProd_Kokkos(N_Vector x, N_Vector y) +{ + auto xvec{GetVec(x)}; + auto yvec{GetVec(y)}; + auto xdata{xvec->View()}; + auto ydata{yvec->View()}; + + using size_type = typename VectorType::size_type; + + sunrealtype gpu_result{0.0}; + Kokkos::parallel_reduce( + "N_VDotProd", typename VectorType::range_policy(0, xvec->Length()), + KOKKOS_LAMBDA(const size_type i, sunrealtype& update) { + update += xdata(i) * ydata(i); + }, + gpu_result); + + return gpu_result; +} + +template +void N_VInv_Kokkos(N_Vector x, N_Vector z) +{ + auto xvec{GetVec(x)}; + auto xdata{xvec->View()}; + auto zvec{GetVec(z)}; + auto zdata{zvec->View()}; + + using size_type = typename VectorType::size_type; + + Kokkos::parallel_for( + "N_VInv", typename VectorType::range_policy(0, xvec->Length()), + KOKKOS_LAMBDA(const size_type i) { zdata(i) = sunrealtype{1.0} / xdata(i); }); +} + +template +booleantype N_VInvTest_Kokkos(N_Vector x, N_Vector z) +{ + auto xvec{GetVec(x)}; + auto xdata{xvec->View()}; + auto zvec{GetVec(z)}; + auto zdata{zvec->View()}; + + using size_type = typename VectorType::size_type; + + sunrealtype minimum{0.0}; + Kokkos::parallel_reduce( + "N_VInvTest", typename VectorType::range_policy(0, xvec->Length()), + KOKKOS_LAMBDA(const size_type i, sunrealtype& update) { + if (xdata(i) == sunrealtype{0.0}) { update += sunrealtype{1.0}; } + else { zdata(i) = sunrealtype{1.0} / xdata(i); } + }, + minimum); + + return (minimum < sunrealtype{0.5}); +} + +template +sunrealtype N_VL1Norm_Kokkos(N_Vector x) +{ + auto xvec{GetVec(x)}; + auto xdata{xvec->View()}; + + using size_type = typename VectorType::size_type; + + sunrealtype gpu_result{0.0}; + Kokkos::parallel_reduce( + "N_VL1Norm", typename VectorType::range_policy(0, xvec->Length()), + KOKKOS_LAMBDA(const size_type i, sunrealtype& update) { + update += (std::abs(xdata(i))); + }, + gpu_result); + + return gpu_result; +} + +template +void N_VLinearSum_Kokkos(sunrealtype a, N_Vector x, sunrealtype b, N_Vector y, + N_Vector z) +{ + auto xvec{GetVec(x)}; + auto yvec{GetVec(y)}; + auto zvec{GetVec(z)}; + auto xdata{xvec->View()}; + auto ydata{yvec->View()}; + auto zdata{zvec->View()}; + + using size_type = typename VectorType::size_type; + + Kokkos::parallel_for( + "N_VLinearSum", typename VectorType::range_policy(0, xvec->Length()), + KOKKOS_LAMBDA(const size_type i) { zdata(i) = a * xdata(i) + b * ydata(i); }); +} + +template +sunrealtype N_VMaxNorm_Kokkos(N_Vector x) +{ + auto xvec{GetVec(x)}; + auto xdata{xvec->View()}; + + using size_type = typename VectorType::size_type; + + sunrealtype gpu_result{0.0}; + Kokkos::parallel_reduce( + "N_VMaxNorm", typename VectorType::range_policy(0, xvec->Length()), + KOKKOS_LAMBDA(const size_type i, sunrealtype& update) { + if (std::abs(xdata(i)) > update) update = std::abs(xdata(i)); + }, + Kokkos::Max(gpu_result)); + + return gpu_result; +} + +template +sunrealtype N_VMin_Kokkos(N_Vector x) +{ + auto xvec{GetVec(x)}; + auto xdata{xvec->View()}; + + using size_type = typename VectorType::size_type; + + sunrealtype gpu_result{std::numeric_limits::max()}; + Kokkos::parallel_reduce( + "N_VMin", typename VectorType::range_policy(0, xvec->Length()), + KOKKOS_LAMBDA(const size_type i, sunrealtype& update) { + if (xdata(i) < update) update = xdata(i); + }, + Kokkos::Min(gpu_result)); + + return gpu_result; +} + +template +sunrealtype N_VMinQuotient_Kokkos(N_Vector num, N_Vector denom) +{ + auto nvec{GetVec(num)}; + auto ndata{nvec->View()}; + auto dvec{GetVec(denom)}; + auto ddata{dvec->View()}; + + using size_type = typename VectorType::size_type; + + sunrealtype gpu_result{std::numeric_limits::max()}; + Kokkos::parallel_reduce( + "N_VMinQuotient", typename VectorType::range_policy(0, nvec->Length()), + KOKKOS_LAMBDA(const size_type i, sunrealtype& update) { + if (ddata(i) != sunrealtype{0.0}) + { + if ((ndata(i) / ddata(i)) < update) update = ndata(i) / ddata(i); + } + }, + Kokkos::Min(gpu_result)); + + return gpu_result; +} + +template +void N_VProd_Kokkos(N_Vector x, N_Vector y, N_Vector z) +{ + auto xvec{GetVec(x)}; + auto yvec{GetVec(y)}; + auto zvec{GetVec(z)}; + auto xdata{xvec->View()}; + auto ydata{yvec->View()}; + auto zdata{zvec->View()}; + + using size_type = typename VectorType::size_type; + + Kokkos::parallel_for( + "N_VProd", typename VectorType::range_policy(0, xvec->Length()), + KOKKOS_LAMBDA(const size_type i) { zdata(i) = xdata(i) * ydata(i); }); +} + +template +void N_VScale_Kokkos(sunrealtype c, N_Vector x, N_Vector z) +{ + auto xvec{GetVec(x)}; + auto xdata{xvec->View()}; + auto zvec{GetVec(z)}; + auto zdata{zvec->View()}; + + using size_type = typename VectorType::size_type; + + Kokkos::parallel_for( + "N_VScale", typename VectorType::range_policy(0, xvec->Length()), + KOKKOS_LAMBDA(const size_type i) { zdata(i) = c * xdata(i); }); +} + +template +sunrealtype N_VWL2Norm_Kokkos(N_Vector x, N_Vector w) +{ + return std::sqrt(impl::N_VWSqrSumLocal_Kokkos(x, w)); +} + +template +sunrealtype N_VWrmsNorm_Kokkos(N_Vector x, N_Vector w) +{ + auto xvec{GetVec(x)}; + return std::sqrt(impl::N_VWSqrSumLocal_Kokkos(x, w) / + static_cast(xvec->Length())); +} + +template +sunrealtype N_VWrmsNormMask_Kokkos(N_Vector x, N_Vector w, N_Vector id) +{ + auto xvec{GetVec(x)}; + return std::sqrt(impl::N_VWSqrSumMaskLocal_Kokkos(x, w, id) / + static_cast(xvec->Length())); +} + +} // namespace impl + +// ============================================================================= +// Public namespace +// ============================================================================= + +template +class Vector : public sundials::impl::BaseNVector, + public sundials::ConvertibleTo +{ +public: + using view_type = Kokkos::View; + using size_type = typename view_type::size_type; + using host_view_type = typename view_type::HostMirror; + using memory_space = MemorySpace; + using exec_space = typename MemorySpace::execution_space; + using range_policy = Kokkos::RangePolicy; + + // Default constructor + Vector() = default; + + Vector(size_type length, SUNContext sunctx) + : view_("Vector device view", length), + host_view_(Kokkos::create_mirror_view(view_)), + sundials::impl::BaseNVector(sunctx) + { + initNvector(); + } + + Vector(view_type view, SUNContext sunctx) + : view_(view), + host_view_(Kokkos::create_mirror_view(view_)), + sundials::impl::BaseNVector(sunctx) + { + initNvector(); + } + + Vector(view_type view, host_view_type host_view, SUNContext sunctx) + : view_(view), host_view_(host_view), sundials::impl::BaseNVector(sunctx) + { + initNvector(); + } + + // Move constructor + Vector(Vector&& that_vector) noexcept + : view_(std::move(that_vector.view_)), + host_view_(std::move(that_vector.host_view_)), + sundials::impl::BaseNVector(std::move(that_vector)) + { + initNvector(); + } + + // Copy constructor + Vector(const Vector& that_vector) + : view_("Vector device view", that_vector.Length()), + host_view_(Kokkos::create_mirror_view(view_)), + sundials::impl::BaseNVector(that_vector) + { + initNvector(); + } + + // Move assignment + Vector& operator=(Vector&& rhs) noexcept + { + view_ = std::move(rhs.view_); + host_view_ = std::move(rhs.host_view_); + + sundials::impl::BaseNVector::operator=(std::move(rhs)); + + return *this; + } + + // Copy assignment + Vector& operator=(const Vector& rhs) + { + view_ = Kokkos::View("Vector device view", rhs.Length()); + host_view_ = Kokkos::create_mirror_view(view_); + + sundials::impl::BaseNVector::operator=(rhs); + + return *this; + } + + // Default destructor + virtual ~Vector() = default; + + // Accessors + + size_type Length() const { return static_cast(view_.extent(0)); } + + view_type View() { return view_; } + + host_view_type HostView() { return host_view_; } + + // Override ConvertibleTo operations + + operator N_Vector() override { return object_.get(); } + + operator N_Vector() const override { return object_.get(); } + + N_Vector Convert() override { return object_.get(); } + + N_Vector Convert() const override { return object_.get(); } + +private: + view_type view_; + host_view_type host_view_; + + void initNvector() + { + using this_type = Vector; + + this->object_->content = this; + + /* constructors, destructors, and utility operations */ + this->object_->ops->nvclone = impl::N_VClone_Kokkos; + this->object_->ops->nvdestroy = impl::N_VDestroy_Kokkos; + this->object_->ops->nvgetarraypointer = + impl::N_VGetArrayPointer_Kokkos; + this->object_->ops->nvgetdevicearraypointer = + impl::N_VGetDeviceArrayPointer_Kokkos; + this->object_->ops->nvgetlength = impl::N_VGetLength_Kokkos; + this->object_->ops->nvgetvectorid = impl::N_VGetVectorID_Kokkos; + + /* standard vector operations */ + this->object_->ops->nvabs = impl::N_VAbs_Kokkos; + this->object_->ops->nvaddconst = impl::N_VAddConst_Kokkos; + this->object_->ops->nvcompare = impl::N_VCompare_Kokkos; + this->object_->ops->nvconst = impl::N_VConst_Kokkos; + this->object_->ops->nvconstrmask = impl::N_VConstrMask_Kokkos; + this->object_->ops->nvdiv = impl::N_VDiv_Kokkos; + this->object_->ops->nvdotprod = impl::N_VDotProd_Kokkos; + this->object_->ops->nvinv = impl::N_VInv_Kokkos; + this->object_->ops->nvinvtest = impl::N_VInvTest_Kokkos; + this->object_->ops->nvl1norm = impl::N_VL1Norm_Kokkos; + this->object_->ops->nvlinearsum = impl::N_VLinearSum_Kokkos; + this->object_->ops->nvmaxnorm = impl::N_VMaxNorm_Kokkos; + this->object_->ops->nvmin = impl::N_VMin_Kokkos; + this->object_->ops->nvminquotient = impl::N_VMinQuotient_Kokkos; + this->object_->ops->nvprod = impl::N_VProd_Kokkos; + this->object_->ops->nvscale = impl::N_VScale_Kokkos; + this->object_->ops->nvwl2norm = impl::N_VWL2Norm_Kokkos; + this->object_->ops->nvwrmsnorm = impl::N_VWrmsNorm_Kokkos; + this->object_->ops->nvwrmsnormmask = impl::N_VWrmsNormMask_Kokkos; + + /* local reduction operations */ + this->object_->ops->nvconstrmasklocal = impl::N_VConstrMask_Kokkos; + this->object_->ops->nvdotprodlocal = impl::N_VDotProd_Kokkos; + this->object_->ops->nvinvtestlocal = impl::N_VInvTest_Kokkos; + this->object_->ops->nvl1normlocal = impl::N_VL1Norm_Kokkos; + this->object_->ops->nvmaxnormlocal = impl::N_VMaxNorm_Kokkos; + this->object_->ops->nvminlocal = impl::N_VMin_Kokkos; + this->object_->ops->nvminquotientlocal = + impl::N_VMinQuotient_Kokkos; + this->object_->ops->nvwsqrsumlocal = impl::N_VWSqrSumLocal_Kokkos; + this->object_->ops->nvwsqrsummasklocal = + impl::N_VWSqrSumMaskLocal_Kokkos; + } +}; + +template +void CopyToDevice(N_Vector v) +{ + auto vec{GetVec(v)}; + CopyToDevice(*vec); +} + +template +void CopyFromDevice(N_Vector v) +{ + auto vec{GetVec(v)}; + CopyFromDevice(*vec); +} + +template +void CopyToDevice(VectorType& v) +{ + Kokkos::deep_copy(v.View(), v.HostView()); +} + +template +void CopyFromDevice(VectorType& v) +{ + Kokkos::deep_copy(v.HostView(), v.View()); +} + +} // namespace kokkos +} // namespace sundials + +#endif diff --git a/inst/include/nvector/nvector_manyvector.h b/inst/include/nvector/nvector_manyvector.h new file mode 100644 index 0000000..2fe2a0d --- /dev/null +++ b/inst/include/nvector/nvector_manyvector.h @@ -0,0 +1,179 @@ +/* ----------------------------------------------------------------- + * Programmer(s): Daniel R. Reynolds @ SMU + * ----------------------------------------------------------------- + * SUNDIALS Copyright Start + * Copyright (c) 2002-2022, Lawrence Livermore National Security + * and Southern Methodist University. + * All rights reserved. + * + * See the top-level LICENSE and NOTICE files for details. + * + * SPDX-License-Identifier: BSD-3-Clause + * SUNDIALS Copyright End + * ----------------------------------------------------------------- + * This is the main header file for the "ManyVector" implementation + * of the NVECTOR module. + * + * Notes: + * + * - The definition of the generic N_Vector structure can be found + * in the header file sundials_nvector.h. + * + * - The definitions of the types 'realtype' and 'sunindextype' can + * be found in the header file sundials_types.h, and it may be + * changed (at the configuration stage) according to the user's needs. + * The sundials_types.h file also contains the definition + * for the type 'booleantype'. + * + * - N_Vector arguments to arithmetic vector operations need not + * be distinct. For example, the following call: + * + * N_VLinearSum_ManyVector(a,x,b,y,y); + * + * (which stores the result of the operation a*x+b*y in y) + * is legal. + * -----------------------------------------------------------------*/ + +#ifndef _NVECTOR_MANY_VECTOR_H +#define _NVECTOR_MANY_VECTOR_H + +#include +#include + +#ifdef __cplusplus /* wrapper to enable C++ usage */ +extern "C" { +#endif + +/* ----------------------------------------------------------------- + ManyVector implementation of N_Vector + ----------------------------------------------------------------- */ + +struct _N_VectorContent_ManyVector { + sunindextype num_subvectors; /* number of vectors attached */ + sunindextype global_length; /* overall global manyvector length */ + N_Vector* subvec_array; /* pointer to N_Vector array */ + booleantype own_data; /* flag indicating data ownership */ +}; + +typedef struct _N_VectorContent_ManyVector *N_VectorContent_ManyVector; + +/* ----------------------------------------------------------------- + functions exported by ManyVector + ----------------------------------------------------------------- */ + +SUNDIALS_EXPORT N_Vector N_VNew_ManyVector(sunindextype num_subvectors, + N_Vector *vec_array, + SUNContext sunctx); + +SUNDIALS_EXPORT N_Vector N_VGetSubvector_ManyVector(N_Vector v, + sunindextype vec_num); + +SUNDIALS_EXPORT realtype *N_VGetSubvectorArrayPointer_ManyVector(N_Vector v, + sunindextype vec_num); + +SUNDIALS_EXPORT int N_VSetSubvectorArrayPointer_ManyVector(realtype *v_data, N_Vector v, + sunindextype vec_num); + +SUNDIALS_EXPORT sunindextype N_VGetNumSubvectors_ManyVector(N_Vector v); + +/* standard vector operations */ +SUNDIALS_EXPORT N_Vector_ID N_VGetVectorID_ManyVector(N_Vector v); +SUNDIALS_EXPORT void N_VPrint_ManyVector(N_Vector v); +SUNDIALS_EXPORT void N_VPrintFile_ManyVector(N_Vector v, FILE *outfile); +SUNDIALS_EXPORT N_Vector N_VCloneEmpty_ManyVector(N_Vector w); +SUNDIALS_EXPORT N_Vector N_VClone_ManyVector(N_Vector w); +SUNDIALS_EXPORT void N_VDestroy_ManyVector(N_Vector v); +SUNDIALS_EXPORT void N_VSpace_ManyVector(N_Vector v, sunindextype *lrw, + sunindextype *liw); +SUNDIALS_EXPORT sunindextype N_VGetLength_ManyVector(N_Vector v); +SUNDIALS_EXPORT sunindextype N_VGetSubvectorLocalLength_ManyVector(N_Vector v, sunindextype vec_num); +SUNDIALS_EXPORT void N_VLinearSum_ManyVector(realtype a, N_Vector x, + realtype b, N_Vector y, + N_Vector z); +SUNDIALS_EXPORT void N_VConst_ManyVector(realtype c, N_Vector z); +SUNDIALS_EXPORT void N_VProd_ManyVector(N_Vector x, N_Vector y, N_Vector z); +SUNDIALS_EXPORT void N_VDiv_ManyVector(N_Vector x, N_Vector y, N_Vector z); +SUNDIALS_EXPORT void N_VScale_ManyVector(realtype c, N_Vector x, N_Vector z); +SUNDIALS_EXPORT void N_VAbs_ManyVector(N_Vector x, N_Vector z); +SUNDIALS_EXPORT void N_VInv_ManyVector(N_Vector x, N_Vector z); +SUNDIALS_EXPORT void N_VAddConst_ManyVector(N_Vector x, realtype b, + N_Vector z); +SUNDIALS_EXPORT realtype N_VWrmsNorm_ManyVector(N_Vector x, N_Vector w); +SUNDIALS_EXPORT realtype N_VWrmsNormMask_ManyVector(N_Vector x, N_Vector w, + N_Vector id); +SUNDIALS_EXPORT realtype N_VWL2Norm_ManyVector(N_Vector x, N_Vector w); +SUNDIALS_EXPORT void N_VCompare_ManyVector(realtype c, N_Vector x, N_Vector z); + +/* fused vector operations */ +SUNDIALS_EXPORT int N_VLinearCombination_ManyVector(int nvec, realtype* c, + N_Vector* V, N_Vector z); +SUNDIALS_EXPORT int N_VScaleAddMulti_ManyVector(int nvec, realtype* a, + N_Vector x, N_Vector* Y, + N_Vector* Z); +SUNDIALS_EXPORT int N_VDotProdMulti_ManyVector(int nvec, N_Vector x, + N_Vector *Y, + realtype* dotprods); + +/* vector array operations */ +SUNDIALS_EXPORT int N_VLinearSumVectorArray_ManyVector(int nvec, + realtype a, N_Vector* X, + realtype b, N_Vector* Y, + N_Vector* Z); +SUNDIALS_EXPORT int N_VScaleVectorArray_ManyVector(int nvec, realtype* c, + N_Vector* X, N_Vector* Z); +SUNDIALS_EXPORT int N_VConstVectorArray_ManyVector(int nvecs, realtype c, + N_Vector* Z); +SUNDIALS_EXPORT int N_VWrmsNormVectorArray_ManyVector(int nvecs, N_Vector* X, + N_Vector* W, realtype* nrm); +SUNDIALS_EXPORT int N_VWrmsNormMaskVectorArray_ManyVector(int nvec, + N_Vector* X, + N_Vector* W, + N_Vector id, + realtype* nrm); + +/* OPTIONAL local reduction kernels (no parallel communication) */ +SUNDIALS_EXPORT realtype N_VDotProdLocal_ManyVector(N_Vector x, N_Vector y); +SUNDIALS_EXPORT realtype N_VMaxNormLocal_ManyVector(N_Vector x); +SUNDIALS_EXPORT realtype N_VMinLocal_ManyVector(N_Vector x); +SUNDIALS_EXPORT realtype N_VL1NormLocal_ManyVector(N_Vector x); +SUNDIALS_EXPORT realtype N_VWSqrSumLocal_ManyVector(N_Vector x, N_Vector w); +SUNDIALS_EXPORT realtype N_VWSqrSumMaskLocal_ManyVector(N_Vector x, N_Vector w, + N_Vector id); +SUNDIALS_EXPORT booleantype N_VInvTestLocal_ManyVector(N_Vector x, N_Vector z); +SUNDIALS_EXPORT booleantype N_VConstrMaskLocal_ManyVector(N_Vector c, N_Vector x, + N_Vector m); +SUNDIALS_EXPORT realtype N_VMinQuotientLocal_ManyVector(N_Vector num, + N_Vector denom); + +/* OPTIONAL single buffer reduction operations */ +SUNDIALS_EXPORT int N_VDotProdMultiLocal_ManyVector(int nvec, N_Vector x, + N_Vector *Y, + realtype* dotprods); + +/* OPTIONAL XBraid interface operations */ +SUNDIALS_EXPORT int N_VBufSize_ManyVector(N_Vector x, sunindextype *size); +SUNDIALS_EXPORT int N_VBufPack_ManyVector(N_Vector x, void *buf); +SUNDIALS_EXPORT int N_VBufUnpack_ManyVector(N_Vector x, void *buf); + +/* ----------------------------------------------------------------- + Enable / disable fused vector operations + ----------------------------------------------------------------- */ + +SUNDIALS_EXPORT int N_VEnableFusedOps_ManyVector(N_Vector v, booleantype tf); + +SUNDIALS_EXPORT int N_VEnableLinearCombination_ManyVector(N_Vector v, booleantype tf); +SUNDIALS_EXPORT int N_VEnableScaleAddMulti_ManyVector(N_Vector v, booleantype tf); +SUNDIALS_EXPORT int N_VEnableDotProdMulti_ManyVector(N_Vector v, booleantype tf); + +SUNDIALS_EXPORT int N_VEnableLinearSumVectorArray_ManyVector(N_Vector v, booleantype tf); +SUNDIALS_EXPORT int N_VEnableScaleVectorArray_ManyVector(N_Vector v, booleantype tf); +SUNDIALS_EXPORT int N_VEnableConstVectorArray_ManyVector(N_Vector v, booleantype tf); +SUNDIALS_EXPORT int N_VEnableWrmsNormVectorArray_ManyVector(N_Vector v, booleantype tf); +SUNDIALS_EXPORT int N_VEnableWrmsNormMaskVectorArray_ManyVector(N_Vector v, booleantype tf); + +SUNDIALS_EXPORT int N_VEnableDotProdMultiLocal_ManyVector(N_Vector v, booleantype tf); + +#ifdef __cplusplus +} +#endif +#endif diff --git a/inst/include/nvector/nvector_mpimanyvector.h b/inst/include/nvector/nvector_mpimanyvector.h new file mode 100644 index 0000000..fbfd7fe --- /dev/null +++ b/inst/include/nvector/nvector_mpimanyvector.h @@ -0,0 +1,200 @@ +/* ----------------------------------------------------------------- + * Programmer(s): Daniel R. Reynolds @ SMU + * ----------------------------------------------------------------- + * SUNDIALS Copyright Start + * Copyright (c) 2002-2022, Lawrence Livermore National Security + * and Southern Methodist University. + * All rights reserved. + * + * See the top-level LICENSE and NOTICE files for details. + * + * SPDX-License-Identifier: BSD-3-Clause + * SUNDIALS Copyright End + * ----------------------------------------------------------------- + * This is the main header file for the "MPIManyVector" implementation + * of the NVECTOR module. + * + * Notes: + * + * - The definition of the generic N_Vector structure can be found + * in the header file sundials_nvector.h. + * + * - The definitions of the types 'realtype' and 'sunindextype' can + * be found in the header file sundials_types.h, and it may be + * changed (at the configuration stage) according to the user's needs. + * The sundials_types.h file also contains the definition + * for the type 'booleantype'. + * + * - N_Vector arguments to arithmetic vector operations need not + * be distinct. For example, the following call: + * + * N_VLinearSum_MPIManyVector(a,x,b,y,y); + * + * (which stores the result of the operation a*x+b*y in y) + * is legal. + * -----------------------------------------------------------------*/ + +#ifndef _NVECTOR_MPI_MANY_VECTOR_H +#define _NVECTOR_MPI_MANY_VECTOR_H + +#include +#include +#include +#include + +#ifdef __cplusplus /* wrapper to enable C++ usage */ +extern "C" { +#endif + +/* ----------------------------------------------------------------- + ManyVector implementation of N_Vector + ----------------------------------------------------------------- */ + +struct _N_VectorContent_MPIManyVector { + MPI_Comm comm; /* overall MPI communicator */ + sunindextype num_subvectors; /* number of vectors attached */ + sunindextype global_length; /* overall global manyvector length */ + N_Vector* subvec_array; /* pointer to N_Vector array */ + booleantype own_data; /* flag indicating data ownership */ +}; + +typedef struct _N_VectorContent_MPIManyVector *N_VectorContent_MPIManyVector; + +/* ----------------------------------------------------------------- + functions exported by ManyVector + ----------------------------------------------------------------- */ + +SUNDIALS_EXPORT N_Vector N_VMake_MPIManyVector(MPI_Comm comm, + sunindextype num_subvectors, + N_Vector *vec_array, + SUNContext sunctx); + +SUNDIALS_EXPORT N_Vector N_VNew_MPIManyVector(sunindextype num_subvectors, + N_Vector *vec_array, + SUNContext sunctx); + +SUNDIALS_EXPORT N_Vector N_VGetSubvector_MPIManyVector(N_Vector v, + sunindextype vec_num); + +SUNDIALS_EXPORT realtype *N_VGetSubvectorArrayPointer_MPIManyVector(N_Vector v, + sunindextype vec_num); + +SUNDIALS_EXPORT int N_VSetSubvectorArrayPointer_MPIManyVector(realtype *v_data, N_Vector v, + sunindextype vec_num); + +SUNDIALS_EXPORT sunindextype N_VGetNumSubvectors_MPIManyVector(N_Vector v); + +/* standard vector operations */ +SUNDIALS_EXPORT N_Vector_ID N_VGetVectorID_MPIManyVector(N_Vector v); +SUNDIALS_EXPORT void N_VPrint_MPIManyVector(N_Vector v); +SUNDIALS_EXPORT void N_VPrintFile_MPIManyVector(N_Vector v, FILE *outfile); +SUNDIALS_EXPORT N_Vector N_VCloneEmpty_MPIManyVector(N_Vector w); +SUNDIALS_EXPORT N_Vector N_VClone_MPIManyVector(N_Vector w); +SUNDIALS_EXPORT void N_VDestroy_MPIManyVector(N_Vector v); +SUNDIALS_EXPORT void N_VSpace_MPIManyVector(N_Vector v, sunindextype *lrw, + sunindextype *liw); +SUNDIALS_EXPORT void *N_VGetCommunicator_MPIManyVector(N_Vector v); +SUNDIALS_EXPORT sunindextype N_VGetLength_MPIManyVector(N_Vector v); +SUNDIALS_EXPORT sunindextype N_VGetSubvectorLocalLength_MPIManyVector(N_Vector v, sunindextype vec_num); +SUNDIALS_EXPORT void N_VLinearSum_MPIManyVector(realtype a, N_Vector x, + realtype b, N_Vector y, + N_Vector z); +SUNDIALS_EXPORT void N_VConst_MPIManyVector(realtype c, N_Vector z); +SUNDIALS_EXPORT void N_VProd_MPIManyVector(N_Vector x, N_Vector y, N_Vector z); +SUNDIALS_EXPORT void N_VDiv_MPIManyVector(N_Vector x, N_Vector y, N_Vector z); +SUNDIALS_EXPORT void N_VScale_MPIManyVector(realtype c, N_Vector x, N_Vector z); +SUNDIALS_EXPORT void N_VAbs_MPIManyVector(N_Vector x, N_Vector z); +SUNDIALS_EXPORT void N_VInv_MPIManyVector(N_Vector x, N_Vector z); +SUNDIALS_EXPORT void N_VAddConst_MPIManyVector(N_Vector x, realtype b, + N_Vector z); +SUNDIALS_EXPORT realtype N_VDotProd_MPIManyVector(N_Vector x, N_Vector y); +SUNDIALS_EXPORT realtype N_VMaxNorm_MPIManyVector(N_Vector x); +SUNDIALS_EXPORT realtype N_VWrmsNorm_MPIManyVector(N_Vector x, N_Vector w); +SUNDIALS_EXPORT realtype N_VWrmsNormMask_MPIManyVector(N_Vector x, N_Vector w, + N_Vector id); +SUNDIALS_EXPORT realtype N_VMin_MPIManyVector(N_Vector x); +SUNDIALS_EXPORT realtype N_VWL2Norm_MPIManyVector(N_Vector x, N_Vector w); +SUNDIALS_EXPORT realtype N_VL1Norm_MPIManyVector(N_Vector x); +SUNDIALS_EXPORT void N_VCompare_MPIManyVector(realtype c, N_Vector x, N_Vector z); +SUNDIALS_EXPORT booleantype N_VInvTest_MPIManyVector(N_Vector x, N_Vector z); +SUNDIALS_EXPORT booleantype N_VConstrMask_MPIManyVector(N_Vector c, N_Vector x, + N_Vector m); +SUNDIALS_EXPORT realtype N_VMinQuotient_MPIManyVector(N_Vector num, + N_Vector denom); + +/* fused vector operations */ +SUNDIALS_EXPORT int N_VLinearCombination_MPIManyVector(int nvec, realtype* c, + N_Vector* V, N_Vector z); +SUNDIALS_EXPORT int N_VScaleAddMulti_MPIManyVector(int nvec, realtype* a, + N_Vector x, N_Vector* Y, + N_Vector* Z); +SUNDIALS_EXPORT int N_VDotProdMulti_MPIManyVector(int nvec, N_Vector x, + N_Vector *Y, + realtype* dotprods); + +/* single buffer reduction operations */ +SUNDIALS_EXPORT int N_VDotProdMultiLocal_MPIManyVector(int nvec, N_Vector x, + N_Vector *Y, + realtype* dotprods); +SUNDIALS_EXPORT int N_VDotProdMultiAllReduce_MPIManyVector(int nvec_total, + N_Vector x, + realtype* sum); + +/* vector array operations */ +SUNDIALS_EXPORT int N_VLinearSumVectorArray_MPIManyVector(int nvec, + realtype a, N_Vector* X, + realtype b, N_Vector* Y, + N_Vector* Z); +SUNDIALS_EXPORT int N_VScaleVectorArray_MPIManyVector(int nvec, realtype* c, + N_Vector* X, N_Vector* Z); +SUNDIALS_EXPORT int N_VConstVectorArray_MPIManyVector(int nvecs, realtype c, + N_Vector* Z); +SUNDIALS_EXPORT int N_VWrmsNormVectorArray_MPIManyVector(int nvecs, N_Vector* X, + N_Vector* W, realtype* nrm); +SUNDIALS_EXPORT int N_VWrmsNormMaskVectorArray_MPIManyVector(int nvec, + N_Vector* X, + N_Vector* W, + N_Vector id, + realtype* nrm); + +/* OPTIONAL local reduction kernels (no parallel communication) */ +SUNDIALS_EXPORT realtype N_VDotProdLocal_MPIManyVector(N_Vector x, N_Vector y); +SUNDIALS_EXPORT realtype N_VMaxNormLocal_MPIManyVector(N_Vector x); +SUNDIALS_EXPORT realtype N_VMinLocal_MPIManyVector(N_Vector x); +SUNDIALS_EXPORT realtype N_VL1NormLocal_MPIManyVector(N_Vector x); +SUNDIALS_EXPORT realtype N_VWSqrSumLocal_MPIManyVector(N_Vector x, N_Vector w); +SUNDIALS_EXPORT realtype N_VWSqrSumMaskLocal_MPIManyVector(N_Vector x, N_Vector w, + N_Vector id); +SUNDIALS_EXPORT booleantype N_VInvTestLocal_MPIManyVector(N_Vector x, N_Vector z); +SUNDIALS_EXPORT booleantype N_VConstrMaskLocal_MPIManyVector(N_Vector c, N_Vector x, + N_Vector m); +SUNDIALS_EXPORT realtype N_VMinQuotientLocal_MPIManyVector(N_Vector num, + N_Vector denom); + +/* OPTIONAL XBraid interface operations */ +SUNDIALS_EXPORT int N_VBufSize_MPIManyVector(N_Vector x, sunindextype *size); +SUNDIALS_EXPORT int N_VBufPack_MPIManyVector(N_Vector x, void *buf); +SUNDIALS_EXPORT int N_VBufUnpack_MPIManyVector(N_Vector x, void *buf); + +/* ----------------------------------------------------------------- + Enable / disable fused vector operations + ----------------------------------------------------------------- */ + +SUNDIALS_EXPORT int N_VEnableFusedOps_MPIManyVector(N_Vector v, booleantype tf); + +SUNDIALS_EXPORT int N_VEnableLinearCombination_MPIManyVector(N_Vector v, booleantype tf); +SUNDIALS_EXPORT int N_VEnableScaleAddMulti_MPIManyVector(N_Vector v, booleantype tf); +SUNDIALS_EXPORT int N_VEnableDotProdMulti_MPIManyVector(N_Vector v, booleantype tf); + +SUNDIALS_EXPORT int N_VEnableLinearSumVectorArray_MPIManyVector(N_Vector v, booleantype tf); +SUNDIALS_EXPORT int N_VEnableScaleVectorArray_MPIManyVector(N_Vector v, booleantype tf); +SUNDIALS_EXPORT int N_VEnableConstVectorArray_MPIManyVector(N_Vector v, booleantype tf); +SUNDIALS_EXPORT int N_VEnableWrmsNormVectorArray_MPIManyVector(N_Vector v, booleantype tf); +SUNDIALS_EXPORT int N_VEnableWrmsNormMaskVectorArray_MPIManyVector(N_Vector v, booleantype tf); + +SUNDIALS_EXPORT int N_VEnableDotProdMultiLocal_MPIManyVector(N_Vector v, booleantype tf); + +#ifdef __cplusplus +} +#endif +#endif diff --git a/inst/include/nvector/nvector_mpiplusx.h b/inst/include/nvector/nvector_mpiplusx.h new file mode 100644 index 0000000..b7c99bc --- /dev/null +++ b/inst/include/nvector/nvector_mpiplusx.h @@ -0,0 +1,57 @@ +/* ----------------------------------------------------------------- + * Programmer(s): Cody Balos @ LLNL + * ----------------------------------------------------------------- + * SUNDIALS Copyright Start + * Copyright (c) 2002-2022, Lawrence Livermore National Security + * and Southern Methodist University. + * All rights reserved. + * + * See the top-level LICENSE and NOTICE files for details. + * + * SPDX-License-Identifier: BSD-3-Clause + * SUNDIALS Copyright End + * ----------------------------------------------------------------- + * This is the header file for the MPI+X implementation of the + * NVECTOR module. The MPIPlusX NVECTOR is really just an extension + * of the ManyVector. + * -----------------------------------------------------------------*/ + +#ifndef _NVECTOR_MPIPLUSX_H +#define _NVECTOR_MPIPLUSX_H + +#include +#include +#include + +#ifdef __cplusplus /* wrapper to enable C++ usage */ +extern "C" { +#endif + +typedef N_VectorContent_MPIManyVector N_VectorContent_MPIPlusX; + + +SUNDIALS_EXPORT N_Vector N_VMake_MPIPlusX(MPI_Comm comm, N_Vector X, SUNContext sunctx); + +SUNDIALS_EXPORT N_Vector_ID N_VGetVectorID_MPIPlusX(N_Vector v); + +SUNDIALS_EXPORT realtype* N_VGetArrayPointer_MPIPlusX(N_Vector v); + +SUNDIALS_EXPORT void N_VSetArrayPointer_MPIPlusX(realtype *vdata, N_Vector v); + +SUNDIALS_EXPORT void N_VPrint_MPIPlusX(N_Vector x); + +SUNDIALS_EXPORT void N_VPrintFile_MPIPlusX(N_Vector x, FILE *outfile); + +SUNDIALS_EXPORT N_Vector N_VGetLocalVector_MPIPlusX(N_Vector v); + +SUNDIALS_EXPORT sunindextype N_VGetLocalLength_MPIPlusX(N_Vector v); + +SUNDIALS_STATIC_INLINE +int N_VEnableFusedOps_MPIPlusX(N_Vector v, booleantype tf) +{ return N_VEnableFusedOps_MPIManyVector(v, tf); } + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/inst/include/nvector/nvector_openmp.h b/inst/include/nvector/nvector_openmp.h new file mode 100644 index 0000000..e5b0138 --- /dev/null +++ b/inst/include/nvector/nvector_openmp.h @@ -0,0 +1,217 @@ +/* ----------------------------------------------------------------- + * Programmer(s): David J. Gardner and Carol S. Woodward @ LLNL + * ----------------------------------------------------------------- + * Acknowledgements: This NVECTOR module is based on the NVECTOR + * Serial module by Scott D. Cohen, Alan C. + * Hindmarsh, Radu Serban, and Aaron Collier + * @ LLNL + * ----------------------------------------------------------------- + * SUNDIALS Copyright Start + * Copyright (c) 2002-2022, Lawrence Livermore National Security + * and Southern Methodist University. + * All rights reserved. + * + * See the top-level LICENSE and NOTICE files for details. + * + * SPDX-License-Identifier: BSD-3-Clause + * SUNDIALS Copyright End + * ----------------------------------------------------------------- + * This is the header file for the OpenMP implementation of the + * NVECTOR module. + * + * Notes: + * + * - The definition of the generic N_Vector structure can be found + * in the header file sundials_nvector.h. + * + * - The definition of the type 'realtype' can be found in the + * header file sundials_types.h, and it may be changed (at the + * configuration stage) according to the user's needs. + * The sundials_types.h file also contains the definition + * for the type 'booleantype'. + * + * - N_Vector arguments to arithmetic vector operations need not + * be distinct. For example, the following call: + * + * N_VLinearSum_OpenMP(a,x,b,y,y); + * + * (which stores the result of the operation a*x+b*y in y) + * is legal. + * -----------------------------------------------------------------*/ + +#ifndef _NVECTOR_OPENMP_H +#define _NVECTOR_OPENMP_H + +#include +#include + +#ifdef __cplusplus /* wrapper to enable C++ usage */ +extern "C" { +#endif + +/* + * ----------------------------------------------------------------- + * OpenMP implementation of N_Vector + * ----------------------------------------------------------------- + */ + +struct _N_VectorContent_OpenMP { + sunindextype length; /* vector length */ + booleantype own_data; /* data ownership flag */ + realtype *data; /* data array */ + int num_threads; /* number of OpenMP threads */ +}; + +typedef struct _N_VectorContent_OpenMP *N_VectorContent_OpenMP; + +/* + * ----------------------------------------------------------------- + * Macros NV_CONTENT_OMP, NV_DATA_OMP, NV_OWN_DATA_OMP, + * NV_LENGTH_OMP, and NV_Ith_OMP + * ----------------------------------------------------------------- + */ + +#define NV_CONTENT_OMP(v) ( (N_VectorContent_OpenMP)(v->content) ) + +#define NV_LENGTH_OMP(v) ( NV_CONTENT_OMP(v)->length ) + +#define NV_NUM_THREADS_OMP(v) ( NV_CONTENT_OMP(v)->num_threads ) + +#define NV_OWN_DATA_OMP(v) ( NV_CONTENT_OMP(v)->own_data ) + +#define NV_DATA_OMP(v) ( NV_CONTENT_OMP(v)->data ) + +#define NV_Ith_OMP(v,i) ( NV_DATA_OMP(v)[i] ) + +/* + * ----------------------------------------------------------------- + * Functions exported by nvector_openmp + * ----------------------------------------------------------------- + */ + +SUNDIALS_EXPORT N_Vector N_VNew_OpenMP(sunindextype vec_length, int num_threads, SUNContext sunctx); + +SUNDIALS_EXPORT N_Vector N_VNewEmpty_OpenMP(sunindextype vec_length, int num_threads, SUNContext sunctx); + +SUNDIALS_EXPORT N_Vector N_VMake_OpenMP(sunindextype vec_length, realtype *v_data, + int num_threads, SUNContext sunctx); + +SUNDIALS_EXPORT sunindextype N_VGetLength_OpenMP(N_Vector v); + +SUNDIALS_EXPORT void N_VPrint_OpenMP(N_Vector v); + +SUNDIALS_EXPORT void N_VPrintFile_OpenMP(N_Vector v, FILE *outfile); + + +SUNDIALS_EXPORT N_Vector_ID N_VGetVectorID_OpenMP(N_Vector v); +SUNDIALS_EXPORT N_Vector N_VCloneEmpty_OpenMP(N_Vector w); +SUNDIALS_EXPORT N_Vector N_VClone_OpenMP(N_Vector w); +SUNDIALS_EXPORT void N_VDestroy_OpenMP(N_Vector v); +SUNDIALS_EXPORT void N_VSpace_OpenMP(N_Vector v, sunindextype *lrw, sunindextype *liw); +SUNDIALS_EXPORT realtype *N_VGetArrayPointer_OpenMP(N_Vector v); +SUNDIALS_EXPORT void N_VSetArrayPointer_OpenMP(realtype *v_data, N_Vector v); + +/* standard vector operations */ +SUNDIALS_EXPORT void N_VLinearSum_OpenMP(realtype a, N_Vector x, realtype b, N_Vector y, N_Vector z); +SUNDIALS_EXPORT void N_VConst_OpenMP(realtype c, N_Vector z); +SUNDIALS_EXPORT void N_VProd_OpenMP(N_Vector x, N_Vector y, N_Vector z); +SUNDIALS_EXPORT void N_VDiv_OpenMP(N_Vector x, N_Vector y, N_Vector z); +SUNDIALS_EXPORT void N_VScale_OpenMP(realtype c, N_Vector x, N_Vector z); +SUNDIALS_EXPORT void N_VAbs_OpenMP(N_Vector x, N_Vector z); +SUNDIALS_EXPORT void N_VInv_OpenMP(N_Vector x, N_Vector z); +SUNDIALS_EXPORT void N_VAddConst_OpenMP(N_Vector x, realtype b, N_Vector z); +SUNDIALS_EXPORT realtype N_VDotProd_OpenMP(N_Vector x, N_Vector y); +SUNDIALS_EXPORT realtype N_VMaxNorm_OpenMP(N_Vector x); +SUNDIALS_EXPORT realtype N_VWrmsNorm_OpenMP(N_Vector x, N_Vector w); +SUNDIALS_EXPORT realtype N_VWrmsNormMask_OpenMP(N_Vector x, N_Vector w, N_Vector id); +SUNDIALS_EXPORT realtype N_VMin_OpenMP(N_Vector x); +SUNDIALS_EXPORT realtype N_VWL2Norm_OpenMP(N_Vector x, N_Vector w); +SUNDIALS_EXPORT realtype N_VL1Norm_OpenMP(N_Vector x); +SUNDIALS_EXPORT void N_VCompare_OpenMP(realtype c, N_Vector x, N_Vector z); +SUNDIALS_EXPORT booleantype N_VInvTest_OpenMP(N_Vector x, N_Vector z); +SUNDIALS_EXPORT booleantype N_VConstrMask_OpenMP(N_Vector c, N_Vector x, N_Vector m); +SUNDIALS_EXPORT realtype N_VMinQuotient_OpenMP(N_Vector num, N_Vector denom); + +/* fused vector operations */ +SUNDIALS_EXPORT int N_VLinearCombination_OpenMP(int nvec, realtype* c, + N_Vector* V, N_Vector z); +SUNDIALS_EXPORT int N_VScaleAddMulti_OpenMP(int nvec, realtype* a, N_Vector x, + N_Vector* Y, N_Vector* Z); +SUNDIALS_EXPORT int N_VDotProdMulti_OpenMP(int nvec, N_Vector x, + N_Vector* Y, realtype* dotprods); + +/* vector array operations */ +SUNDIALS_EXPORT int N_VLinearSumVectorArray_OpenMP(int nvec, + realtype a, N_Vector* X, + realtype b, N_Vector* Y, + N_Vector* Z); +SUNDIALS_EXPORT int N_VScaleVectorArray_OpenMP(int nvec, realtype* c, + N_Vector* X, N_Vector* Z); +SUNDIALS_EXPORT int N_VConstVectorArray_OpenMP(int nvecs, realtype c, + N_Vector* Z); +SUNDIALS_EXPORT int N_VWrmsNormVectorArray_OpenMP(int nvecs, N_Vector* X, + N_Vector* W, realtype* nrm); +SUNDIALS_EXPORT int N_VWrmsNormMaskVectorArray_OpenMP(int nvecs, N_Vector* X, + N_Vector* W, N_Vector id, + realtype* nrm); +SUNDIALS_EXPORT int N_VScaleAddMultiVectorArray_OpenMP(int nvec, int nsum, + realtype* a, + N_Vector* X, + N_Vector** Y, + N_Vector** Z); +SUNDIALS_EXPORT int N_VLinearCombinationVectorArray_OpenMP(int nvec, int nsum, + realtype* c, + N_Vector** X, + N_Vector* Z); + +/* OPTIONAL local reduction kernels (no parallel communication) */ +SUNDIALS_EXPORT realtype N_VWSqrSumLocal_OpenMP(N_Vector x, N_Vector w); +SUNDIALS_EXPORT realtype N_VWSqrSumMaskLocal_OpenMP(N_Vector x, N_Vector w, + N_Vector id); + +/* OPTIONAL XBraid interface operations */ +SUNDIALS_EXPORT int N_VBufSize_OpenMP(N_Vector x, sunindextype *size); +SUNDIALS_EXPORT int N_VBufPack_OpenMP(N_Vector x, void *buf); +SUNDIALS_EXPORT int N_VBufUnpack_OpenMP(N_Vector x, void *buf); + +/* + * ----------------------------------------------------------------- + * Enable / disable fused vector operations + * ----------------------------------------------------------------- + */ + +SUNDIALS_EXPORT int N_VEnableFusedOps_OpenMP(N_Vector v, booleantype tf); + +SUNDIALS_EXPORT int N_VEnableLinearCombination_OpenMP(N_Vector v, booleantype tf); +SUNDIALS_EXPORT int N_VEnableScaleAddMulti_OpenMP(N_Vector v, booleantype tf); +SUNDIALS_EXPORT int N_VEnableDotProdMulti_OpenMP(N_Vector v, booleantype tf); + +SUNDIALS_EXPORT int N_VEnableLinearSumVectorArray_OpenMP(N_Vector v, booleantype tf); +SUNDIALS_EXPORT int N_VEnableScaleVectorArray_OpenMP(N_Vector v, booleantype tf); +SUNDIALS_EXPORT int N_VEnableConstVectorArray_OpenMP(N_Vector v, booleantype tf); +SUNDIALS_EXPORT int N_VEnableWrmsNormVectorArray_OpenMP(N_Vector v, booleantype tf); +SUNDIALS_EXPORT int N_VEnableWrmsNormMaskVectorArray_OpenMP(N_Vector v, booleantype tf); +SUNDIALS_EXPORT int N_VEnableScaleAddMultiVectorArray_OpenMP(N_Vector v, booleantype tf); +SUNDIALS_EXPORT int N_VEnableLinearCombinationVectorArray_OpenMP(N_Vector v, booleantype tf); + +/* + * ----------------------------------------------------------------- + * Deprecated functions + * ----------------------------------------------------------------- + */ + +/* use N_VCloneVectorArray */ +SUNDIALS_DEPRECATED_EXPORT N_Vector* N_VCloneVectorArray_OpenMP(int count, N_Vector w); + +/* use N_VCloneVectorArrayEmpty */ +SUNDIALS_DEPRECATED_EXPORT N_Vector* N_VCloneVectorArrayEmpty_OpenMP(int count, N_Vector w); + +/* use N_VDestroyVectorArray */ +SUNDIALS_DEPRECATED_EXPORT void N_VDestroyVectorArray_OpenMP(N_Vector* vs, int count); + + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/inst/include/nvector/nvector_openmpdev.h b/inst/include/nvector/nvector_openmpdev.h new file mode 100644 index 0000000..1ea6efe --- /dev/null +++ b/inst/include/nvector/nvector_openmpdev.h @@ -0,0 +1,218 @@ +/* ------------------------------------------------------------------- + * Programmer(s): David J. Gardner and Shelby Lockhart @ LLNL + * ------------------------------------------------------------------- + * Acknowledgements: This NVECTOR module is based on the NVECTOR + * Serial module by Scott D. Cohen, Alan C. + * Hindmarsh, Radu Serban, and Aaron Collier + * @ LLNL + * ------------------------------------------------------------------- + * SUNDIALS Copyright Start + * Copyright (c) 2002-2022, Lawrence Livermore National Security + * and Southern Methodist University. + * All rights reserved. + * + * See the top-level LICENSE and NOTICE files for details. + * + * SPDX-License-Identifier: BSD-3-Clause + * SUNDIALS Copyright End + * ----------------------------------------------------------------- + * This is the header file for the OpenMP 4.5+ implementation of the + * NVECTOR module. + * + * Notes: + * + * - The definition of the generic N_Vector structure can be found + * in the header file sundials_nvector.h. + * + * - The definition of the type 'realtype' can be found in the + * header file sundials_types.h, and it may be changed (at the + * configuration stage) according to the user's needs. + * The sundials_types.h file also contains the definition + * for the type 'booleantype'. + * + * - N_Vector arguments to arithmetic vector operations need not + * be distinct. For example, the following call: + * + * N_VLinearSum_OpenMPDEV(a,x,b,y,y); + * + * (which stores the result of the operation a*x+b*y in y) + * is legal. + * -----------------------------------------------------------------*/ + +#ifndef _NVECTOR_OPENMPDEV_H +#define _NVECTOR_OPENMPDEV_H + +#include +#include + +#ifdef __cplusplus /* wrapper to enable C++ usage */ +extern "C" { +#endif + +/* + * ----------------------------------------------------------------- + * OpenMPDEV implementation of N_Vector + * ----------------------------------------------------------------- + */ + +struct _N_VectorContent_OpenMPDEV { + sunindextype length; /* vector length */ + booleantype own_data; /* data ownership flag */ + realtype *host_data; /* host data array */ + realtype *dev_data; /* device data array */ +}; + +typedef struct _N_VectorContent_OpenMPDEV *N_VectorContent_OpenMPDEV; + +/* + * ----------------------------------------------------------------- + * Macros NV_CONTENT_OMPDEV, NV_DATA_HOST_OMPDEV, NV_OWN_DATA_OMPDEV, + * NV_LENGTH_OMPDEV, and NV_Ith_OMPDEV + * ----------------------------------------------------------------- + */ + +#define NV_CONTENT_OMPDEV(v) ( (N_VectorContent_OpenMPDEV)(v->content) ) + +#define NV_LENGTH_OMPDEV(v) ( NV_CONTENT_OMPDEV(v)->length ) + +#define NV_OWN_DATA_OMPDEV(v) ( NV_CONTENT_OMPDEV(v)->own_data ) + +#define NV_DATA_HOST_OMPDEV(v) ( NV_CONTENT_OMPDEV(v)->host_data ) + +#define NV_DATA_DEV_OMPDEV(v) ( NV_CONTENT_OMPDEV(v)->dev_data ) + +/* + * ----------------------------------------------------------------- + * Functions exported by nvector_openmpdev + * ----------------------------------------------------------------- + */ + +SUNDIALS_EXPORT N_Vector N_VNew_OpenMPDEV(sunindextype vec_length, SUNContext sunctx); + +SUNDIALS_EXPORT N_Vector N_VNewEmpty_OpenMPDEV(sunindextype vec_length, SUNContext sunctx); + +SUNDIALS_EXPORT N_Vector N_VMake_OpenMPDEV(sunindextype vec_length, + realtype *h_data, + realtype *v_data, + SUNContext sunctx); + +SUNDIALS_EXPORT sunindextype N_VGetLength_OpenMPDEV(N_Vector v); + +SUNDIALS_EXPORT realtype *N_VGetHostArrayPointer_OpenMPDEV(N_Vector v); + +SUNDIALS_EXPORT realtype *N_VGetDeviceArrayPointer_OpenMPDEV(N_Vector v); + +SUNDIALS_EXPORT void N_VPrint_OpenMPDEV(N_Vector v); + +SUNDIALS_EXPORT void N_VPrintFile_OpenMPDEV(N_Vector v, FILE *outfile); + +SUNDIALS_EXPORT void N_VCopyToDevice_OpenMPDEV(N_Vector v); + +SUNDIALS_EXPORT void N_VCopyFromDevice_OpenMPDEV(N_Vector v); + +SUNDIALS_EXPORT N_Vector_ID N_VGetVectorID_OpenMPDEV(N_Vector v); +SUNDIALS_EXPORT N_Vector N_VCloneEmpty_OpenMPDEV(N_Vector w); +SUNDIALS_EXPORT N_Vector N_VClone_OpenMPDEV(N_Vector w); +SUNDIALS_EXPORT void N_VDestroy_OpenMPDEV(N_Vector v); +SUNDIALS_EXPORT void N_VSpace_OpenMPDEV(N_Vector v, sunindextype *lrw, sunindextype *liw); + +/* standard vector operations */ +SUNDIALS_EXPORT void N_VLinearSum_OpenMPDEV(realtype a, N_Vector x, realtype b, N_Vector y, N_Vector z); +SUNDIALS_EXPORT void N_VConst_OpenMPDEV(realtype c, N_Vector z); +SUNDIALS_EXPORT void N_VProd_OpenMPDEV(N_Vector x, N_Vector y, N_Vector z); +SUNDIALS_EXPORT void N_VDiv_OpenMPDEV(N_Vector x, N_Vector y, N_Vector z); +SUNDIALS_EXPORT void N_VScale_OpenMPDEV(realtype c, N_Vector x, N_Vector z); +SUNDIALS_EXPORT void N_VAbs_OpenMPDEV(N_Vector x, N_Vector z); +SUNDIALS_EXPORT void N_VInv_OpenMPDEV(N_Vector x, N_Vector z); +SUNDIALS_EXPORT void N_VAddConst_OpenMPDEV(N_Vector x, realtype b, N_Vector z); +SUNDIALS_EXPORT realtype N_VDotProd_OpenMPDEV(N_Vector x, N_Vector y); +SUNDIALS_EXPORT realtype N_VMaxNorm_OpenMPDEV(N_Vector x); +SUNDIALS_EXPORT realtype N_VWrmsNorm_OpenMPDEV(N_Vector x, N_Vector w); +SUNDIALS_EXPORT realtype N_VWrmsNormMask_OpenMPDEV(N_Vector x, N_Vector w, N_Vector id); +SUNDIALS_EXPORT realtype N_VMin_OpenMPDEV(N_Vector x); +SUNDIALS_EXPORT realtype N_VWL2Norm_OpenMPDEV(N_Vector x, N_Vector w); +SUNDIALS_EXPORT realtype N_VL1Norm_OpenMPDEV(N_Vector x); +SUNDIALS_EXPORT void N_VCompare_OpenMPDEV(realtype c, N_Vector x, N_Vector z); +SUNDIALS_EXPORT booleantype N_VInvTest_OpenMPDEV(N_Vector x, N_Vector z); +SUNDIALS_EXPORT booleantype N_VConstrMask_OpenMPDEV(N_Vector c, N_Vector x, N_Vector m); +SUNDIALS_EXPORT realtype N_VMinQuotient_OpenMPDEV(N_Vector num, N_Vector denom); + +/* fused vector operations */ +SUNDIALS_EXPORT int N_VLinearCombination_OpenMPDEV(int nvec, realtype* c, + N_Vector* V, N_Vector z); +SUNDIALS_EXPORT int N_VScaleAddMulti_OpenMPDEV(int nvec, realtype* a, N_Vector x, + N_Vector* Y, N_Vector* Z); +SUNDIALS_EXPORT int N_VDotProdMulti_OpenMPDEV(int nvec, N_Vector x, + N_Vector *Y, realtype* dotprods); + +/* vector array operations */ +SUNDIALS_EXPORT int N_VLinearSumVectorArray_OpenMPDEV(int nvec, + realtype a, N_Vector* X, + realtype b, N_Vector* Y, + N_Vector* Z); +SUNDIALS_EXPORT int N_VScaleVectorArray_OpenMPDEV(int nvec, realtype* c, + N_Vector* X, N_Vector* Z); +SUNDIALS_EXPORT int N_VConstVectorArray_OpenMPDEV(int nvecs, realtype c, + N_Vector* Z); +SUNDIALS_EXPORT int N_VWrmsNormVectorArray_OpenMPDEV(int nvecs, N_Vector* X, + N_Vector* W, realtype* nrm); +SUNDIALS_EXPORT int N_VWrmsNormMaskVectorArray_OpenMPDEV(int nvecs, N_Vector* X, + N_Vector* W, N_Vector id, + realtype* nrm); +SUNDIALS_EXPORT int N_VScaleAddMultiVectorArray_OpenMPDEV(int nvec, int nsum, + realtype* a, + N_Vector* X, + N_Vector** Y, + N_Vector** Z); +SUNDIALS_EXPORT int N_VLinearCombinationVectorArray_OpenMPDEV(int nvec, int nsum, + realtype* c, + N_Vector** X, + N_Vector* Z); + +/* OPTIONAL local reduction kernels (no parallel communication) */ +SUNDIALS_EXPORT realtype N_VWSqrSumLocal_OpenMPDEV(N_Vector x, N_Vector w); +SUNDIALS_EXPORT realtype N_VWSqrSumMaskLocal_OpenMPDEV(N_Vector x, N_Vector w, + N_Vector id); + + +/* + * ----------------------------------------------------------------- + * Enable / disable fused vector operations + * ----------------------------------------------------------------- + */ + +SUNDIALS_EXPORT int N_VEnableFusedOps_OpenMPDEV(N_Vector v, booleantype tf); + +SUNDIALS_EXPORT int N_VEnableLinearCombination_OpenMPDEV(N_Vector v, booleantype tf); +SUNDIALS_EXPORT int N_VEnableScaleAddMulti_OpenMPDEV(N_Vector v, booleantype tf); +SUNDIALS_EXPORT int N_VEnableDotProdMulti_OpenMPDEV(N_Vector v, booleantype tf); + +SUNDIALS_EXPORT int N_VEnableLinearSumVectorArray_OpenMPDEV(N_Vector v, booleantype tf); +SUNDIALS_EXPORT int N_VEnableScaleVectorArray_OpenMPDEV(N_Vector v, booleantype tf); +SUNDIALS_EXPORT int N_VEnableConstVectorArray_OpenMPDEV(N_Vector v, booleantype tf); +SUNDIALS_EXPORT int N_VEnableWrmsNormVectorArray_OpenMPDEV(N_Vector v, booleantype tf); +SUNDIALS_EXPORT int N_VEnableWrmsNormMaskVectorArray_OpenMPDEV(N_Vector v, booleantype tf); +SUNDIALS_EXPORT int N_VEnableScaleAddMultiVectorArray_OpenMPDEV(N_Vector v, booleantype tf); +SUNDIALS_EXPORT int N_VEnableLinearCombinationVectorArray_OpenMPDEV(N_Vector v, booleantype tf); + +/* + * ----------------------------------------------------------------- + * Deprecated functions + * ----------------------------------------------------------------- + */ + +/* use N_VCloneVectorArray */ +SUNDIALS_DEPRECATED_EXPORT N_Vector *N_VCloneVectorArray_OpenMPDEV(int count, N_Vector w); + +/* use N_VCloneVectorArrayEmpty */ +SUNDIALS_DEPRECATED_EXPORT N_Vector *N_VCloneVectorArrayEmpty_OpenMPDEV(int count, N_Vector w); + +/* use N_VDestroyVectorArray */ +SUNDIALS_DEPRECATED_EXPORT void N_VDestroyVectorArray_OpenMPDEV(N_Vector *vs, int count); + + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/inst/include/nvector/nvector_parallel.h b/inst/include/nvector/nvector_parallel.h new file mode 100644 index 0000000..da28e32 --- /dev/null +++ b/inst/include/nvector/nvector_parallel.h @@ -0,0 +1,248 @@ +/* ----------------------------------------------------------------- + * Programmer(s): Scott D. Cohen, Alan C. Hindmarsh, Radu Serban, + * and Aaron Collier @ LLNL + * ----------------------------------------------------------------- + * SUNDIALS Copyright Start + * Copyright (c) 2002-2022, Lawrence Livermore National Security + * and Southern Methodist University. + * All rights reserved. + * + * See the top-level LICENSE and NOTICE files for details. + * + * SPDX-License-Identifier: BSD-3-Clause + * SUNDIALS Copyright End + * ----------------------------------------------------------------- + * This is the main header file for the MPI-enabled implementation + * of the NVECTOR module. + * + * Notes: + * + * - The definition of the generic N_Vector structure can be + * found in the header file sundials_nvector.h. + * + * - The definition of the type realtype can be found in the + * header file sundials_types.h, and it may be changed (at the + * configuration stage) according to the user's needs. + * The sundials_types.h file also contains the definition + * for the type booleantype. + * + * - N_Vector arguments to arithmetic vector operations need not + * be distinct. For example, the following call: + * + * N_VLinearSum_Parallel(a,x,b,y,y); + * + * (which stores the result of the operation a*x+b*y in y) + * is legal. + * -----------------------------------------------------------------*/ + +#ifndef _NVECTOR_PARALLEL_H +#define _NVECTOR_PARALLEL_H + +#include +#include +#include +#include + +#ifdef __cplusplus /* wrapper to enable C++ usage */ +extern "C" { +#endif + +/* + * ----------------------------------------------------------------- + * Parallel implementation of N_Vector + * ----------------------------------------------------------------- + */ + +struct _N_VectorContent_Parallel { + sunindextype local_length; /* local vector length */ + sunindextype global_length; /* global vector length */ + booleantype own_data; /* ownership of data */ + realtype *data; /* local data array */ + MPI_Comm comm; /* pointer to MPI communicator */ +}; + +typedef struct _N_VectorContent_Parallel *N_VectorContent_Parallel; + +/* + * ----------------------------------------------------------------- + * Macros NV_CONTENT_P, NV_DATA_P, NV_OWN_DATA_P, + * NV_LOCLENGTH_P, NV_GLOBLENGTH_P,NV_COMM_P, and NV_Ith_P + * ----------------------------------------------------------------- + */ + +#define NV_CONTENT_P(v) ( (N_VectorContent_Parallel)(v->content) ) + +#define NV_LOCLENGTH_P(v) ( NV_CONTENT_P(v)->local_length ) + +#define NV_GLOBLENGTH_P(v) ( NV_CONTENT_P(v)->global_length ) + +#define NV_OWN_DATA_P(v) ( NV_CONTENT_P(v)->own_data ) + +#define NV_DATA_P(v) ( NV_CONTENT_P(v)->data ) + +#define NV_COMM_P(v) ( NV_CONTENT_P(v)->comm ) + +#define NV_Ith_P(v,i) ( NV_DATA_P(v)[i] ) + +/* + * ----------------------------------------------------------------- + * Functions exported by nvector_parallel + * ----------------------------------------------------------------- + */ + +SUNDIALS_EXPORT N_Vector N_VNew_Parallel(MPI_Comm comm, + sunindextype local_length, + sunindextype global_length, + SUNContext sunctx); + +SUNDIALS_EXPORT N_Vector N_VNewEmpty_Parallel(MPI_Comm comm, + sunindextype local_length, + sunindextype global_length, + SUNContext sunctx); + +SUNDIALS_EXPORT N_Vector N_VMake_Parallel(MPI_Comm comm, + sunindextype local_length, + sunindextype global_length, + realtype *v_data, + SUNContext sunctx); + +SUNDIALS_EXPORT sunindextype N_VGetLength_Parallel(N_Vector v); + +SUNDIALS_EXPORT sunindextype N_VGetLocalLength_Parallel(N_Vector v); + +SUNDIALS_EXPORT void N_VPrint_Parallel(N_Vector v); + +SUNDIALS_EXPORT void N_VPrintFile_Parallel(N_Vector v, FILE *outfile); + +SUNDIALS_EXPORT N_Vector_ID N_VGetVectorID_Parallel(N_Vector v); +SUNDIALS_EXPORT N_Vector N_VCloneEmpty_Parallel(N_Vector w); +SUNDIALS_EXPORT N_Vector N_VClone_Parallel(N_Vector w); +SUNDIALS_EXPORT void N_VDestroy_Parallel(N_Vector v); +SUNDIALS_EXPORT void N_VSpace_Parallel(N_Vector v, sunindextype *lrw, + sunindextype *liw); +SUNDIALS_EXPORT realtype *N_VGetArrayPointer_Parallel(N_Vector v); +SUNDIALS_EXPORT void N_VSetArrayPointer_Parallel(realtype *v_data, N_Vector v); +SUNDIALS_EXPORT void *N_VGetCommunicator_Parallel(N_Vector v); + +/* standard vector operations */ +SUNDIALS_EXPORT void N_VLinearSum_Parallel(realtype a, N_Vector x, realtype b, N_Vector y, N_Vector z); +SUNDIALS_EXPORT void N_VConst_Parallel(realtype c, N_Vector z); +SUNDIALS_EXPORT void N_VProd_Parallel(N_Vector x, N_Vector y, N_Vector z); +SUNDIALS_EXPORT void N_VDiv_Parallel(N_Vector x, N_Vector y, N_Vector z); +SUNDIALS_EXPORT void N_VScale_Parallel(realtype c, N_Vector x, N_Vector z); +SUNDIALS_EXPORT void N_VAbs_Parallel(N_Vector x, N_Vector z); +SUNDIALS_EXPORT void N_VInv_Parallel(N_Vector x, N_Vector z); +SUNDIALS_EXPORT void N_VAddConst_Parallel(N_Vector x, realtype b, N_Vector z); +SUNDIALS_EXPORT realtype N_VDotProd_Parallel(N_Vector x, N_Vector y); +SUNDIALS_EXPORT realtype N_VMaxNorm_Parallel(N_Vector x); +SUNDIALS_EXPORT realtype N_VWrmsNorm_Parallel(N_Vector x, N_Vector w); +SUNDIALS_EXPORT realtype N_VWrmsNormMask_Parallel(N_Vector x, N_Vector w, N_Vector id); +SUNDIALS_EXPORT realtype N_VMin_Parallel(N_Vector x); +SUNDIALS_EXPORT realtype N_VWL2Norm_Parallel(N_Vector x, N_Vector w); +SUNDIALS_EXPORT realtype N_VL1Norm_Parallel(N_Vector x); +SUNDIALS_EXPORT void N_VCompare_Parallel(realtype c, N_Vector x, N_Vector z); +SUNDIALS_EXPORT booleantype N_VInvTest_Parallel(N_Vector x, N_Vector z); +SUNDIALS_EXPORT booleantype N_VConstrMask_Parallel(N_Vector c, N_Vector x, N_Vector m); +SUNDIALS_EXPORT realtype N_VMinQuotient_Parallel(N_Vector num, N_Vector denom); + +/* fused vector operations */ +SUNDIALS_EXPORT int N_VLinearCombination_Parallel(int nvec, realtype* c, N_Vector* V, + N_Vector z); +SUNDIALS_EXPORT int N_VScaleAddMulti_Parallel(int nvec, realtype* a, N_Vector x, + N_Vector* Y, N_Vector* Z); +SUNDIALS_EXPORT int N_VDotProdMulti_Parallel(int nvec, N_Vector x, + N_Vector* Y, realtype* dotprods); + +/* vector array operations */ +SUNDIALS_EXPORT int N_VLinearSumVectorArray_Parallel(int nvec, + realtype a, N_Vector* X, + realtype b, N_Vector* Y, + N_Vector* Z); +SUNDIALS_EXPORT int N_VScaleVectorArray_Parallel(int nvec, realtype* c, + N_Vector* X, N_Vector* Z); +SUNDIALS_EXPORT int N_VConstVectorArray_Parallel(int nvecs, realtype c, + N_Vector* Z); +SUNDIALS_EXPORT int N_VWrmsNormVectorArray_Parallel(int nvecs, N_Vector* X, + N_Vector* W, realtype* nrm); +SUNDIALS_EXPORT int N_VWrmsNormMaskVectorArray_Parallel(int nvec, N_Vector* X, + N_Vector* W, N_Vector id, + realtype* nrm); +SUNDIALS_EXPORT int N_VScaleAddMultiVectorArray_Parallel(int nvec, int nsum, + realtype* a, + N_Vector* X, + N_Vector** Y, + N_Vector** Z); +SUNDIALS_EXPORT int N_VLinearCombinationVectorArray_Parallel(int nvec, int nsum, + realtype* c, + N_Vector** X, + N_Vector* Z); + +/* OPTIONAL local reduction kernels (no parallel communication) */ +SUNDIALS_EXPORT realtype N_VDotProdLocal_Parallel(N_Vector x, N_Vector y); +SUNDIALS_EXPORT realtype N_VMaxNormLocal_Parallel(N_Vector x); +SUNDIALS_EXPORT realtype N_VMinLocal_Parallel(N_Vector x); +SUNDIALS_EXPORT realtype N_VL1NormLocal_Parallel(N_Vector x); +SUNDIALS_EXPORT realtype N_VWSqrSumLocal_Parallel(N_Vector x, N_Vector w); +SUNDIALS_EXPORT realtype N_VWSqrSumMaskLocal_Parallel(N_Vector x, N_Vector w, + N_Vector id); +SUNDIALS_EXPORT booleantype N_VInvTestLocal_Parallel(N_Vector x, N_Vector z); +SUNDIALS_EXPORT booleantype N_VConstrMaskLocal_Parallel(N_Vector c, N_Vector x, + N_Vector m); +SUNDIALS_EXPORT realtype N_VMinQuotientLocal_Parallel(N_Vector num, + N_Vector denom); + +/* OPTIONAL single buffer reduction operations */ +SUNDIALS_EXPORT int N_VDotProdMultiLocal_Parallel(int nvec, N_Vector x, + N_Vector* Y, + realtype* dotprods); +SUNDIALS_EXPORT int N_VDotProdMultiAllReduce_Parallel(int nvec_total, N_Vector x, + realtype* dotprods); + +/* OPTIONAL XBraid interface operations */ +SUNDIALS_EXPORT int N_VBufSize_Parallel(N_Vector x, sunindextype *size); +SUNDIALS_EXPORT int N_VBufPack_Parallel(N_Vector x, void *buf); +SUNDIALS_EXPORT int N_VBufUnpack_Parallel(N_Vector x, void *buf); + +/* + * ----------------------------------------------------------------- + * Enable / disable fused vector operations + * ----------------------------------------------------------------- + */ + +SUNDIALS_EXPORT int N_VEnableFusedOps_Parallel(N_Vector v, booleantype tf); + +SUNDIALS_EXPORT int N_VEnableLinearCombination_Parallel(N_Vector v, booleantype tf); +SUNDIALS_EXPORT int N_VEnableScaleAddMulti_Parallel(N_Vector v, booleantype tf); +SUNDIALS_EXPORT int N_VEnableDotProdMulti_Parallel(N_Vector v, booleantype tf); + +SUNDIALS_EXPORT int N_VEnableLinearSumVectorArray_Parallel(N_Vector v, booleantype tf); +SUNDIALS_EXPORT int N_VEnableScaleVectorArray_Parallel(N_Vector v, booleantype tf); +SUNDIALS_EXPORT int N_VEnableConstVectorArray_Parallel(N_Vector v, booleantype tf); +SUNDIALS_EXPORT int N_VEnableWrmsNormVectorArray_Parallel(N_Vector v, booleantype tf); +SUNDIALS_EXPORT int N_VEnableWrmsNormMaskVectorArray_Parallel(N_Vector v, booleantype tf); +SUNDIALS_EXPORT int N_VEnableScaleAddMultiVectorArray_Parallel(N_Vector v, booleantype tf); +SUNDIALS_EXPORT int N_VEnableLinearCombinationVectorArray_Parallel(N_Vector v, booleantype tf); + +SUNDIALS_EXPORT int N_VEnableDotProdMultiLocal_Parallel(N_Vector v, booleantype tf); + +/* + * ----------------------------------------------------------------- + * Deprecated functions + * ----------------------------------------------------------------- + */ + +/* use N_VCloneVectorArray */ +SUNDIALS_DEPRECATED_EXPORT N_Vector* N_VCloneVectorArray_Parallel(int count, N_Vector w); + +/* use N_VCloneVectorArrayEmpty */ +SUNDIALS_DEPRECATED_EXPORT N_Vector* N_VCloneVectorArrayEmpty_Parallel(int count, N_Vector w); + +/* use N_VDestroyVectorArray */ +SUNDIALS_DEPRECATED_EXPORT void N_VDestroyVectorArray_Parallel(N_Vector* vs, int count); + + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/inst/include/nvector/nvector_parhyp.h b/inst/include/nvector/nvector_parhyp.h new file mode 100644 index 0000000..8efcc67 --- /dev/null +++ b/inst/include/nvector/nvector_parhyp.h @@ -0,0 +1,238 @@ +/* ----------------------------------------------------------------- + * Programmer(s): Jean M. Sexton @ SMU + * Slaven Peles @ LLNL + * ----------------------------------------------------------------- + * Based on work by: Scott D. Cohen, Alan C. Hindmarsh, Radu Serban, + * and Aaron Collier @ LLNL + * ----------------------------------------------------------------- + * SUNDIALS Copyright Start + * Copyright (c) 2002-2022, Lawrence Livermore National Security + * and Southern Methodist University. + * All rights reserved. + * + * See the top-level LICENSE and NOTICE files for details. + * + * SPDX-License-Identifier: BSD-3-Clause + * SUNDIALS Copyright End + * ----------------------------------------------------------------- + * This is the main header file for the ParHyp implementation + * of the NVECTOR module. + * + * Notes: + * + * - The definition of the generic N_Vector structure can be + * found in the header file sundials_nvector.h. + * + * - The definition of the type realtype can be found in the + * header file sundials_types.h, and it may be changed (at the + * configuration stage) according to the user's needs. + * The sundials_types.h file also contains the definition + * for the type booleantype. + * + * - N_Vector arguments to arithmetic vector operations need not + * be distinct. For example, the following call: + * + * N_VLinearSum_ParHyp(a,x,b,y,y); + * + * (which stores the result of the operation a*x+b*y in y) + * is legal. + * -----------------------------------------------------------------*/ + +#ifndef _NVECTOR_PARHYP_H +#define _NVECTOR_PARHYP_H + +#include +#include +#include +#include + +/* hypre header files */ +#include <_hypre_parcsr_mv.h> + +#ifdef __cplusplus /* wrapper to enable C++ usage */ +extern "C" { +#endif + +/* + * ----------------------------------------------------------------- + * ParHyp implementation of N_Vector + * ----------------------------------------------------------------- + */ + +struct _N_VectorContent_ParHyp { + sunindextype local_length; /* local vector length */ + sunindextype global_length; /* global vector length */ + booleantype own_parvector; /* ownership of HYPRE vector */ + MPI_Comm comm; /* pointer to MPI communicator */ + + HYPRE_ParVector x; /* the actual HYPRE_ParVector object */ +}; + +typedef struct _N_VectorContent_ParHyp *N_VectorContent_ParHyp; + + +/* + * ----------------------------------------------------------------- + * Functions exported by nvector_parhyp + * ----------------------------------------------------------------- + */ + +SUNDIALS_EXPORT N_Vector N_VNewEmpty_ParHyp(MPI_Comm comm, + sunindextype local_length, + sunindextype global_length, + SUNContext sunctx); + +SUNDIALS_EXPORT N_Vector N_VMake_ParHyp(HYPRE_ParVector x, SUNContext sunctx); + +SUNDIALS_EXPORT HYPRE_ParVector N_VGetVector_ParHyp(N_Vector v); + +SUNDIALS_EXPORT void N_VPrint_ParHyp(N_Vector v); + +SUNDIALS_EXPORT void N_VPrintFile_ParHyp(N_Vector v, FILE *outfile); + +SUNDIALS_EXPORT N_Vector_ID N_VGetVectorID_ParHyp(N_Vector v); +SUNDIALS_EXPORT N_Vector N_VCloneEmpty_ParHyp(N_Vector w); +SUNDIALS_EXPORT N_Vector N_VClone_ParHyp(N_Vector w); +SUNDIALS_EXPORT void N_VDestroy_ParHyp(N_Vector v); +SUNDIALS_EXPORT void N_VSpace_ParHyp(N_Vector v, sunindextype *lrw, + sunindextype *liw); +SUNDIALS_EXPORT realtype *N_VGetArrayPointer_ParHyp(N_Vector v); +SUNDIALS_EXPORT void N_VSetArrayPointer_ParHyp(realtype *v_data, N_Vector v); +SUNDIALS_EXPORT void *N_VGetCommunicator_ParHyp(N_Vector v); +SUNDIALS_EXPORT sunindextype N_VGetLength_ParHyp(N_Vector v); + +/* standard vector operations */ +SUNDIALS_EXPORT void N_VLinearSum_ParHyp(realtype a, N_Vector x, realtype b, + N_Vector y, N_Vector z); +SUNDIALS_EXPORT void N_VConst_ParHyp(realtype c, N_Vector z); +SUNDIALS_EXPORT void N_VProd_ParHyp(N_Vector x, N_Vector y, N_Vector z); +SUNDIALS_EXPORT void N_VDiv_ParHyp(N_Vector x, N_Vector y, N_Vector z); +SUNDIALS_EXPORT void N_VScale_ParHyp(realtype c, N_Vector x, N_Vector z); +SUNDIALS_EXPORT void N_VAbs_ParHyp(N_Vector x, N_Vector z); +SUNDIALS_EXPORT void N_VInv_ParHyp(N_Vector x, N_Vector z); +SUNDIALS_EXPORT void N_VAddConst_ParHyp(N_Vector x, realtype b, N_Vector z); +SUNDIALS_EXPORT realtype N_VDotProd_ParHyp(N_Vector x, N_Vector y); +SUNDIALS_EXPORT realtype N_VMaxNorm_ParHyp(N_Vector x); +SUNDIALS_EXPORT realtype N_VWrmsNorm_ParHyp(N_Vector x, N_Vector w); +SUNDIALS_EXPORT realtype N_VWrmsNormMask_ParHyp(N_Vector x, N_Vector w, + N_Vector id); +SUNDIALS_EXPORT realtype N_VMin_ParHyp(N_Vector x); +SUNDIALS_EXPORT realtype N_VWL2Norm_ParHyp(N_Vector x, N_Vector w); +SUNDIALS_EXPORT realtype N_VL1Norm_ParHyp(N_Vector x); +SUNDIALS_EXPORT void N_VCompare_ParHyp(realtype c, N_Vector x, N_Vector z); +SUNDIALS_EXPORT booleantype N_VInvTest_ParHyp(N_Vector x, N_Vector z); +SUNDIALS_EXPORT booleantype N_VConstrMask_ParHyp(N_Vector c, N_Vector x, + N_Vector m); +SUNDIALS_EXPORT realtype N_VMinQuotient_ParHyp(N_Vector num, N_Vector denom); + +/* fused vector operations */ +SUNDIALS_EXPORT int N_VLinearCombination_ParHyp(int nvec, realtype* c, + N_Vector* X, N_Vector z); +SUNDIALS_EXPORT int N_VScaleAddMulti_ParHyp(int nvec, realtype* a, N_Vector x, + N_Vector* Y, N_Vector* Z); +SUNDIALS_EXPORT int N_VDotProdMulti_ParHyp(int nvec, N_Vector x, N_Vector* Y, + realtype* dotprods); + +/* vector array operations */ +SUNDIALS_EXPORT int N_VLinearSumVectorArray_ParHyp(int nvec, + realtype a, N_Vector* X, + realtype b, N_Vector* Y, + N_Vector* Z); +SUNDIALS_EXPORT int N_VScaleVectorArray_ParHyp(int nvec, realtype* c, + N_Vector* X, N_Vector* Z); +SUNDIALS_EXPORT int N_VConstVectorArray_ParHyp(int nvecs, realtype c, + N_Vector* Z); +SUNDIALS_EXPORT int N_VWrmsNormVectorArray_ParHyp(int nvecs, N_Vector* X, + N_Vector* W, realtype* nrm); +SUNDIALS_EXPORT int N_VWrmsNormMaskVectorArray_ParHyp(int nvec, N_Vector* X, + N_Vector* W, N_Vector id, + realtype* nrm); +SUNDIALS_EXPORT int N_VScaleAddMultiVectorArray_ParHyp(int nvec, int nsum, + realtype* a, + N_Vector* X, + N_Vector** Y, + N_Vector** Z); +SUNDIALS_EXPORT int N_VLinearCombinationVectorArray_ParHyp(int nvec, int nsum, + realtype* c, + N_Vector** X, + N_Vector* Z); + +/* OPTIONAL local reduction kernels (no parallel communication) */ +SUNDIALS_EXPORT realtype N_VDotProdLocal_ParHyp(N_Vector x, N_Vector y); +SUNDIALS_EXPORT realtype N_VMaxNormLocal_ParHyp(N_Vector x); +SUNDIALS_EXPORT realtype N_VMinLocal_ParHyp(N_Vector x); +SUNDIALS_EXPORT realtype N_VL1NormLocal_ParHyp(N_Vector x); +SUNDIALS_EXPORT realtype N_VWSqrSumLocal_ParHyp(N_Vector x, N_Vector w); +SUNDIALS_EXPORT realtype N_VWSqrSumMaskLocal_ParHyp(N_Vector x, N_Vector w, + N_Vector id); +SUNDIALS_EXPORT booleantype N_VInvTestLocal_ParHyp(N_Vector x, N_Vector z); +SUNDIALS_EXPORT booleantype N_VConstrMaskLocal_ParHyp(N_Vector c, N_Vector x, + N_Vector m); +SUNDIALS_EXPORT realtype N_VMinQuotientLocal_ParHyp(N_Vector num, + N_Vector denom); + +/* OPTIONAL single buffer reduction operations */ +SUNDIALS_EXPORT int N_VDotProdMultiLocal_ParHyp(int nvec, N_Vector x, + N_Vector* Y, + realtype* dotprods); +SUNDIALS_EXPORT int N_VDotProdMultiAllReduce_ParHyp(int nvec, N_Vector x, + realtype* sum); + +/* OPTIONAL XBraid interface operations */ +SUNDIALS_EXPORT int N_VBufSize_ParHyp(N_Vector x, sunindextype *size); +SUNDIALS_EXPORT int N_VBufPack_ParHyp(N_Vector x, void *buf); +SUNDIALS_EXPORT int N_VBufUnpack_ParHyp(N_Vector x, void *buf); + +/* + * ----------------------------------------------------------------- + * Enable / disable fused vector operations + * ----------------------------------------------------------------- + */ + +SUNDIALS_EXPORT int N_VEnableFusedOps_ParHyp(N_Vector v, booleantype tf); + +SUNDIALS_EXPORT int N_VEnableLinearCombination_ParHyp(N_Vector v, + booleantype tf); +SUNDIALS_EXPORT int N_VEnableScaleAddMulti_ParHyp(N_Vector v, booleantype tf); +SUNDIALS_EXPORT int N_VEnableDotProdMulti_ParHyp(N_Vector v, booleantype tf); + +SUNDIALS_EXPORT int N_VEnableLinearSumVectorArray_ParHyp(N_Vector v, + booleantype tf); +SUNDIALS_EXPORT int N_VEnableScaleVectorArray_ParHyp(N_Vector v, + booleantype tf); +SUNDIALS_EXPORT int N_VEnableConstVectorArray_ParHyp(N_Vector v, + booleantype tf); +SUNDIALS_EXPORT int N_VEnableWrmsNormVectorArray_ParHyp(N_Vector v, + booleantype tf); +SUNDIALS_EXPORT int N_VEnableWrmsNormMaskVectorArray_ParHyp(N_Vector v, + booleantype tf); +SUNDIALS_EXPORT int N_VEnableScaleAddMultiVectorArray_ParHyp(N_Vector v, + booleantype tf); +SUNDIALS_EXPORT int N_VEnableLinearCombinationVectorArray_ParHyp(N_Vector v, + booleantype tf); + +/* + * ----------------------------------------------------------------- + * Deprecated functions + * ----------------------------------------------------------------- + */ + +/* use N_VCloneVectorArray */ +SUNDIALS_DEPRECATED_EXPORT N_Vector *N_VCloneVectorArray_ParHyp(int count, + N_Vector w); + +/* use N_VCloneVectorArrayEmpty */ +SUNDIALS_DEPRECATED_EXPORT N_Vector *N_VCloneVectorArrayEmpty_ParHyp(int count, + N_Vector w); + +/* use N_VDestroyVectorArray */ +SUNDIALS_DEPRECATED_EXPORT void N_VDestroyVectorArray_ParHyp(N_Vector *vs, + int count); + +SUNDIALS_EXPORT int N_VEnableDotProdMultiLocal_ParHyp(N_Vector v, booleantype tf); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/inst/include/nvector/nvector_petsc.h b/inst/include/nvector/nvector_petsc.h new file mode 100644 index 0000000..65a36bf --- /dev/null +++ b/inst/include/nvector/nvector_petsc.h @@ -0,0 +1,217 @@ +/* ----------------------------------------------------------------- + * Programmer(s): Slaven Peles @ LLNL + * ----------------------------------------------------------------- + * SUNDIALS Copyright Start + * Copyright (c) 2002-2022, Lawrence Livermore National Security + * and Southern Methodist University. + * All rights reserved. + * + * See the top-level LICENSE and NOTICE files for details. + * + * SPDX-License-Identifier: BSD-3-Clause + * SUNDIALS Copyright End + * ----------------------------------------------------------------- + * This is the main header file for the PETSc vector wrapper + * for NVECTOR module. + * + * Notes: + * + * - The definition of the generic N_Vector structure can be + * found in the header file sundials_nvector.h. + * + * - The definition of the type realtype can be found in the + * header file sundials_types.h, and it may be changed (at the + * build configuration stage) according to the user's needs. + * The sundials_types.h file also contains the definition + * for the type booleantype. + * + * - N_Vector arguments to arithmetic vector operations need not + * be distinct. For example, the following call: + * + * N_VLinearSum_Petsc(a,x,b,y,y); + * + * (which stores the result of the operation a*x+b*y in y) + * is legal. + * -----------------------------------------------------------------*/ + +#ifndef _NVECTOR_PETSC_H +#define _NVECTOR_PETSC_H + +#include +#include +#include +#include + +#ifdef __cplusplus /* wrapper to enable C++ usage */ +extern "C" { +#endif + +/* + * ----------------------------------------------------------------- + * PETSc implementation of N_Vector + * ----------------------------------------------------------------- + */ + +struct _N_VectorContent_Petsc { + sunindextype local_length; /* copy of local vector length */ + sunindextype global_length; /* copy of global vector length */ + booleantype own_data; /* ownership of data */ + Vec pvec; /* the PETSc Vec object */ + MPI_Comm comm; /* copy of MPI communicator */ +}; + +typedef struct _N_VectorContent_Petsc *N_VectorContent_Petsc; + +/* + * ----------------------------------------------------------------- + * Functions exported by nvector_petsc + * ----------------------------------------------------------------- + */ + +SUNDIALS_EXPORT N_Vector N_VNewEmpty_Petsc(MPI_Comm comm, + sunindextype local_length, + sunindextype global_length, + SUNContext sunctx); + +SUNDIALS_EXPORT N_Vector N_VMake_Petsc(Vec v, SUNContext sunctx); + +SUNDIALS_EXPORT realtype *N_VGetArrayPointer_Petsc(N_Vector v); + +SUNDIALS_EXPORT Vec N_VGetVector_Petsc(N_Vector v); + +SUNDIALS_EXPORT void N_VSetVector_Petsc(N_Vector v, Vec p); + +SUNDIALS_EXPORT void N_VPrint_Petsc(N_Vector v); + +SUNDIALS_EXPORT void N_VPrintFile_Petsc(N_Vector v, const char fname[]); + +/* nvector API functions */ +SUNDIALS_EXPORT N_Vector_ID N_VGetVectorID_Petsc(N_Vector v); +SUNDIALS_EXPORT N_Vector N_VCloneEmpty_Petsc(N_Vector w); +SUNDIALS_EXPORT N_Vector N_VClone_Petsc(N_Vector w); +SUNDIALS_EXPORT void N_VDestroy_Petsc(N_Vector v); +SUNDIALS_EXPORT void N_VSpace_Petsc(N_Vector v, sunindextype *lrw, sunindextype *liw); +SUNDIALS_EXPORT void N_VSetArrayPointer_Petsc(realtype *v_data, N_Vector v); +SUNDIALS_EXPORT void *N_VGetCommunicator_Petsc(N_Vector v); +SUNDIALS_EXPORT sunindextype N_VGetLength_Petsc(N_Vector v); + +/* standard vector operations */ +SUNDIALS_EXPORT void N_VLinearSum_Petsc(realtype a, N_Vector x, realtype b, N_Vector y, N_Vector z); +SUNDIALS_EXPORT void N_VConst_Petsc(realtype c, N_Vector z); +SUNDIALS_EXPORT void N_VProd_Petsc(N_Vector x, N_Vector y, N_Vector z); +SUNDIALS_EXPORT void N_VDiv_Petsc(N_Vector x, N_Vector y, N_Vector z); +SUNDIALS_EXPORT void N_VScale_Petsc(realtype c, N_Vector x, N_Vector z); +SUNDIALS_EXPORT void N_VAbs_Petsc(N_Vector x, N_Vector z); +SUNDIALS_EXPORT void N_VInv_Petsc(N_Vector x, N_Vector z); +SUNDIALS_EXPORT void N_VAddConst_Petsc(N_Vector x, realtype b, N_Vector z); +SUNDIALS_EXPORT realtype N_VDotProd_Petsc(N_Vector x, N_Vector y); +SUNDIALS_EXPORT realtype N_VMaxNorm_Petsc(N_Vector x); +SUNDIALS_EXPORT realtype N_VWrmsNorm_Petsc(N_Vector x, N_Vector w); +SUNDIALS_EXPORT realtype N_VWrmsNormMask_Petsc(N_Vector x, N_Vector w, N_Vector id); +SUNDIALS_EXPORT realtype N_VMin_Petsc(N_Vector x); +SUNDIALS_EXPORT realtype N_VWL2Norm_Petsc(N_Vector x, N_Vector w); +SUNDIALS_EXPORT realtype N_VL1Norm_Petsc(N_Vector x); +SUNDIALS_EXPORT void N_VCompare_Petsc(realtype c, N_Vector x, N_Vector z); +SUNDIALS_EXPORT booleantype N_VInvTest_Petsc(N_Vector x, N_Vector z); +SUNDIALS_EXPORT booleantype N_VConstrMask_Petsc(N_Vector c, N_Vector x, N_Vector m); +SUNDIALS_EXPORT realtype N_VMinQuotient_Petsc(N_Vector num, N_Vector denom); + +/* fused vector operations */ +SUNDIALS_EXPORT int N_VLinearCombination_Petsc(int nvec, realtype* c, + N_Vector* X, N_Vector z); +SUNDIALS_EXPORT int N_VScaleAddMulti_Petsc(int nvec, realtype* a, N_Vector x, + N_Vector* Y, N_Vector* Z); +SUNDIALS_EXPORT int N_VDotProdMulti_Petsc(int nvec, N_Vector x, N_Vector* Y, + realtype* dotprods); + +/* vector array operations */ +SUNDIALS_EXPORT int N_VLinearSumVectorArray_Petsc(int nvec, + realtype a, N_Vector* X, + realtype b, N_Vector* Y, + N_Vector* Z); +SUNDIALS_EXPORT int N_VScaleVectorArray_Petsc(int nvec, realtype* c, + N_Vector* X, N_Vector* Z); +SUNDIALS_EXPORT int N_VConstVectorArray_Petsc(int nvecs, realtype c, + N_Vector* Z); +SUNDIALS_EXPORT int N_VWrmsNormVectorArray_Petsc(int nvecs, N_Vector* X, + N_Vector* W, realtype* nrm); +SUNDIALS_EXPORT int N_VWrmsNormMaskVectorArray_Petsc(int nvec, N_Vector* X, + N_Vector* W, N_Vector id, + realtype* nrm); +SUNDIALS_EXPORT int N_VScaleAddMultiVectorArray_Petsc(int nvec, int nsum, + realtype* a, + N_Vector* X, + N_Vector** Y, + N_Vector** Z); +SUNDIALS_EXPORT int N_VLinearCombinationVectorArray_Petsc(int nvec, int nsum, + realtype* c, + N_Vector** X, + N_Vector* Z); + +/* OPTIONAL local reduction kernels (no parallel communication) */ +SUNDIALS_EXPORT realtype N_VDotProdLocal_Petsc(N_Vector x, N_Vector y); +SUNDIALS_EXPORT realtype N_VMaxNormLocal_Petsc(N_Vector x); +SUNDIALS_EXPORT realtype N_VMinLocal_Petsc(N_Vector x); +SUNDIALS_EXPORT realtype N_VL1NormLocal_Petsc(N_Vector x); +SUNDIALS_EXPORT realtype N_VWSqrSumLocal_Petsc(N_Vector x, N_Vector w); +SUNDIALS_EXPORT realtype N_VWSqrSumMaskLocal_Petsc(N_Vector x, N_Vector w, + N_Vector id); +SUNDIALS_EXPORT booleantype N_VInvTestLocal_Petsc(N_Vector x, N_Vector z); +SUNDIALS_EXPORT booleantype N_VConstrMaskLocal_Petsc(N_Vector c, N_Vector x, + N_Vector m); +SUNDIALS_EXPORT realtype N_VMinQuotientLocal_Petsc(N_Vector num, + N_Vector denom); + +/* OPTIONAL single buffer reduction operations */ +SUNDIALS_EXPORT int N_VDotProdMultiLocal_Petsc(int nvec, N_Vector x, + N_Vector* Y, realtype* dotprods); +SUNDIALS_EXPORT int N_VDotProdMultiAllReduce_Petsc(int nvec, N_Vector x, + realtype* sum); + +/* OPTIONAL XBraid interface operations */ +SUNDIALS_EXPORT int N_VBufSize_Petsc(N_Vector x, sunindextype *size); +SUNDIALS_EXPORT int N_VBufPack_Petsc(N_Vector x, void *buf); +SUNDIALS_EXPORT int N_VBufUnpack_Petsc(N_Vector x, void *buf); + +/* + * ----------------------------------------------------------------- + * Enable / disable fused vector operations + * ----------------------------------------------------------------- + */ + +SUNDIALS_EXPORT int N_VEnableFusedOps_Petsc(N_Vector v, booleantype tf); + +SUNDIALS_EXPORT int N_VEnableLinearCombination_Petsc(N_Vector v, booleantype tf); +SUNDIALS_EXPORT int N_VEnableScaleAddMulti_Petsc(N_Vector v, booleantype tf); +SUNDIALS_EXPORT int N_VEnableDotProdMulti_Petsc(N_Vector v, booleantype tf); + +SUNDIALS_EXPORT int N_VEnableLinearSumVectorArray_Petsc(N_Vector v, booleantype tf); +SUNDIALS_EXPORT int N_VEnableScaleVectorArray_Petsc(N_Vector v, booleantype tf); +SUNDIALS_EXPORT int N_VEnableConstVectorArray_Petsc(N_Vector v, booleantype tf); +SUNDIALS_EXPORT int N_VEnableWrmsNormVectorArray_Petsc(N_Vector v, booleantype tf); +SUNDIALS_EXPORT int N_VEnableWrmsNormMaskVectorArray_Petsc(N_Vector v, booleantype tf); +SUNDIALS_EXPORT int N_VEnableScaleAddMultiVectorArray_Petsc(N_Vector v, booleantype tf); +SUNDIALS_EXPORT int N_VEnableLinearCombinationVectorArray_Petsc(N_Vector v, booleantype tf); + +SUNDIALS_EXPORT int N_VEnableDotProdMultiLocal_Petsc(N_Vector v, booleantype tf); + +/* + * ----------------------------------------------------------------- + * Deprecated functions + * ----------------------------------------------------------------- + */ + +/* use N_VCloneVectorArray */ +SUNDIALS_DEPRECATED_EXPORT N_Vector *N_VCloneVectorArray_Petsc(int count, N_Vector w); + +/* use N_VCloneVectorArrayEmpty */ +SUNDIALS_DEPRECATED_EXPORT N_Vector *N_VCloneVectorArrayEmpty_Petsc(int count, N_Vector w); + +/* use N_VDestroyVectorArray */ +SUNDIALS_DEPRECATED_EXPORT void N_VDestroyVectorArray_Petsc(N_Vector *vs, int count); + +#ifdef __cplusplus +} +#endif + +#endif /* _NVECTOR_PETSC_H */ diff --git a/inst/include/nvector/nvector_pthreads.h b/inst/include/nvector/nvector_pthreads.h new file mode 100644 index 0000000..bd7c366 --- /dev/null +++ b/inst/include/nvector/nvector_pthreads.h @@ -0,0 +1,268 @@ +/* ----------------------------------------------------------------- + * Programmer(s): David J. Gardner @ LLNL + * ----------------------------------------------------------------- + * Acknowledgements: This NVECTOR module is based on the NVECTOR + * Serial module by Scott D. Cohen, Alan C. + * Hindmarsh, Radu Serban, and Aaron Collier + * @ LLNL + * ----------------------------------------------------------------- + * SUNDIALS Copyright Start + * Copyright (c) 2002-2022, Lawrence Livermore National Security + * and Southern Methodist University. + * All rights reserved. + * + * See the top-level LICENSE and NOTICE files for details. + * + * SPDX-License-Identifier: BSD-3-Clause + * SUNDIALS Copyright End + * ----------------------------------------------------------------- + * This is the header file for the POSIX Threads (Pthreads) + * implementation of the NVECTOR module using LOCAL data structs + * to share data between threads. + * + * Notes: + * + * - The definition of the generic N_Vector structure can be found + * in the header file sundials_nvector.h. + * + * - The definition of the type 'realtype' can be found in the + * header file sundials_types.h, and it may be changed (at the + * configuration stage) according to the user's needs. + * The sundials_types.h file also contains the definition + * for the type 'booleantype'. + * + * - N_Vector arguments to arithmetic vector operations need not + * be distinct. For example, the following call: + * + * N_VLinearSum_Pthreads(a,x,b,y,y); + * + * (which stores the result of the operation a*x+b*y in y) + * is legal. + * -----------------------------------------------------------------*/ + +#ifndef _NVECTOR_PTHREADS_H +#define _NVECTOR_PTHREADS_H + +#include +#include +#include + +#ifdef __cplusplus /* wrapper to enable C++ usage */ +extern "C" { +#endif + +/* + * ----------------------------------------------------------------- + * Pthreads implementation of N_Vector + * ----------------------------------------------------------------- + */ + +struct _N_VectorContent_Pthreads { + sunindextype length; /* vector length */ + booleantype own_data; /* data ownership flag */ + realtype *data; /* data array */ + int num_threads; /* number of POSIX threads */ +}; + +typedef struct _N_VectorContent_Pthreads *N_VectorContent_Pthreads; + +/* Structure to hold parallelization information for each thread when + calling "companion" functions to compute vector operations. The + start and end vector (loop) indices are unique to each thread, the + realtype variables are the same for each thread, and the mutex + variable is used to lock variables in reductions. */ + +struct _Pthreads_Data{ + sunindextype start; /* starting index for loop */ + sunindextype end; /* ending index for loop */ + realtype c1, c2; /* scalar values */ + realtype *v1, *v2, *v3; /* vector data */ + realtype *global_val; /* shared global variable */ + pthread_mutex_t *global_mutex; /* lock for shared variable */ + + int nvec; /* number of vectors in fused op */ + int nsum; /* number of sums in fused op */ + + realtype* cvals; /* scalar values in fused op */ + + N_Vector x1; /* vector array in fused op */ + N_Vector x2; /* vector array in fused op */ + N_Vector x3; /* vector array in fused op */ + + N_Vector* Y1; /* vector array in fused op */ + N_Vector* Y2; /* vector array in fused op */ + N_Vector* Y3; /* vector array in fused op */ + + N_Vector** ZZ1; /* array of vector arrays in fused op */ + N_Vector** ZZ2; /* array of vector arrays in fused op */ +}; + +typedef struct _Pthreads_Data Pthreads_Data; + +/* + * ----------------------------------------------------------------- + * Macros NV_CONTENT_PT, NV_DATA_PT, NV_OWN_DATA_PT, + * NV_LENGTH_PT, and NV_Ith_PT + * ----------------------------------------------------------------- + */ + +#define NV_CONTENT_PT(v) ( (N_VectorContent_Pthreads)(v->content) ) + +#define NV_LENGTH_PT(v) ( NV_CONTENT_PT(v)->length ) + +#define NV_NUM_THREADS_PT(v) ( NV_CONTENT_PT(v)->num_threads ) + +#define NV_OWN_DATA_PT(v) ( NV_CONTENT_PT(v)->own_data ) + +#define NV_DATA_PT(v) ( NV_CONTENT_PT(v)->data ) + +#define NV_Ith_PT(v,i) ( NV_DATA_PT(v)[i] ) + +/* + * ----------------------------------------------------------------- + * Functions exported by nvector_Pthreads + * ----------------------------------------------------------------- + */ + +SUNDIALS_EXPORT N_Vector N_VNew_Pthreads(sunindextype vec_length, int n_threads, + SUNContext sunctx); + +SUNDIALS_EXPORT N_Vector N_VNewEmpty_Pthreads(sunindextype vec_length, + int n_threads, SUNContext sunctx); + +SUNDIALS_EXPORT N_Vector N_VMake_Pthreads(sunindextype vec_length, + int n_threads, realtype *v_data, + SUNContext sunctx); + +SUNDIALS_EXPORT sunindextype N_VGetLength_Pthreads(N_Vector v); + +SUNDIALS_EXPORT void N_VPrint_Pthreads(N_Vector v); + +SUNDIALS_EXPORT void N_VPrintFile_Pthreads(N_Vector v, FILE *outfile); + +SUNDIALS_EXPORT N_Vector_ID N_VGetVectorID_Pthreads(N_Vector v); +SUNDIALS_EXPORT N_Vector N_VCloneEmpty_Pthreads(N_Vector w); +SUNDIALS_EXPORT N_Vector N_VClone_Pthreads(N_Vector w); +SUNDIALS_EXPORT void N_VDestroy_Pthreads(N_Vector v); +SUNDIALS_EXPORT void N_VSpace_Pthreads(N_Vector v, sunindextype *lrw, + sunindextype *liw); +SUNDIALS_EXPORT realtype *N_VGetArrayPointer_Pthreads(N_Vector v); +SUNDIALS_EXPORT void N_VSetArrayPointer_Pthreads(realtype *v_data, N_Vector v); + +/* standard vector operations */ +SUNDIALS_EXPORT void N_VLinearSum_Pthreads(realtype a, N_Vector x, realtype b, + N_Vector y, N_Vector z); +SUNDIALS_EXPORT void N_VConst_Pthreads(realtype c, N_Vector z); +SUNDIALS_EXPORT void N_VProd_Pthreads(N_Vector x, N_Vector y, N_Vector z); +SUNDIALS_EXPORT void N_VDiv_Pthreads(N_Vector x, N_Vector y, N_Vector z); +SUNDIALS_EXPORT void N_VScale_Pthreads(realtype c, N_Vector x, N_Vector z); +SUNDIALS_EXPORT void N_VAbs_Pthreads(N_Vector x, N_Vector z); +SUNDIALS_EXPORT void N_VInv_Pthreads(N_Vector x, N_Vector z); +SUNDIALS_EXPORT void N_VAddConst_Pthreads(N_Vector x, realtype b, N_Vector z); +SUNDIALS_EXPORT realtype N_VDotProd_Pthreads(N_Vector x, N_Vector y); +SUNDIALS_EXPORT realtype N_VMaxNorm_Pthreads(N_Vector x); +SUNDIALS_EXPORT realtype N_VWrmsNorm_Pthreads(N_Vector x, N_Vector w); +SUNDIALS_EXPORT realtype N_VWrmsNormMask_Pthreads(N_Vector x, N_Vector w, + N_Vector id); +SUNDIALS_EXPORT realtype N_VMin_Pthreads(N_Vector x); +SUNDIALS_EXPORT realtype N_VWL2Norm_Pthreads(N_Vector x, N_Vector w); +SUNDIALS_EXPORT realtype N_VL1Norm_Pthreads(N_Vector x); +SUNDIALS_EXPORT void N_VCompare_Pthreads(realtype c, N_Vector x, N_Vector z); +SUNDIALS_EXPORT booleantype N_VInvTest_Pthreads(N_Vector x, N_Vector z); +SUNDIALS_EXPORT booleantype N_VConstrMask_Pthreads(N_Vector c, N_Vector x, + N_Vector m); +SUNDIALS_EXPORT realtype N_VMinQuotient_Pthreads(N_Vector num, N_Vector denom); + +/* fused vector operations */ +SUNDIALS_EXPORT int N_VLinearCombination_Pthreads(int nvec, realtype* c, + N_Vector* X, N_Vector z); +SUNDIALS_EXPORT int N_VScaleAddMulti_Pthreads(int nvec, realtype* a, N_Vector x, + N_Vector* Y, N_Vector* Z); +SUNDIALS_EXPORT int N_VDotProdMulti_Pthreads(int nvec, N_Vector x, N_Vector* Y, + realtype* dotprods); + +/* vector array operations */ +SUNDIALS_EXPORT int N_VLinearSumVectorArray_Pthreads(int nvec, + realtype a, N_Vector* X, + realtype b, N_Vector* Y, + N_Vector* Z); +SUNDIALS_EXPORT int N_VScaleVectorArray_Pthreads(int nvec, realtype* c, + N_Vector* X, N_Vector* Z); +SUNDIALS_EXPORT int N_VConstVectorArray_Pthreads(int nvec, realtype c, + N_Vector* Z); +SUNDIALS_EXPORT int N_VWrmsNormVectorArray_Pthreads(int nvec, N_Vector* X, + N_Vector* W, realtype* nrm); +SUNDIALS_EXPORT int N_VWrmsNormMaskVectorArray_Pthreads(int nvec, N_Vector* X, + N_Vector* W, N_Vector id, + realtype* nrm); +SUNDIALS_EXPORT int N_VScaleAddMultiVectorArray_Pthreads(int nvec, int nsum, + realtype* a, + N_Vector* X, + N_Vector** Y, + N_Vector** Z); +SUNDIALS_EXPORT int N_VLinearCombinationVectorArray_Pthreads(int nvec, int nsum, + realtype* c, + N_Vector** X, + N_Vector* Z); + +/* OPTIONAL local reduction kernels (no parallel communication) */ +SUNDIALS_EXPORT realtype N_VWSqrSumLocal_Pthreads(N_Vector x, N_Vector w); +SUNDIALS_EXPORT realtype N_VWSqrSumMaskLocal_Pthreads(N_Vector x, N_Vector w, + N_Vector id); + +/* OPTIONAL XBraid interface operations */ +SUNDIALS_EXPORT int N_VBufSize_Pthreads(N_Vector x, sunindextype *size); +SUNDIALS_EXPORT int N_VBufPack_Pthreads(N_Vector x, void *buf); +SUNDIALS_EXPORT int N_VBufUnpack_Pthreads(N_Vector x, void *buf); + +/* + * ----------------------------------------------------------------- + * Enable / disable fused vector operations + * ----------------------------------------------------------------- + */ + +SUNDIALS_EXPORT int N_VEnableFusedOps_Pthreads(N_Vector v, booleantype tf); + +SUNDIALS_EXPORT int N_VEnableLinearCombination_Pthreads(N_Vector v, + booleantype tf); +SUNDIALS_EXPORT int N_VEnableScaleAddMulti_Pthreads(N_Vector v, booleantype tf); +SUNDIALS_EXPORT int N_VEnableDotProdMulti_Pthreads(N_Vector v, booleantype tf); + +SUNDIALS_EXPORT int N_VEnableLinearSumVectorArray_Pthreads(N_Vector v, + booleantype tf); +SUNDIALS_EXPORT int N_VEnableScaleVectorArray_Pthreads(N_Vector v, + booleantype tf); +SUNDIALS_EXPORT int N_VEnableConstVectorArray_Pthreads(N_Vector v, + booleantype tf); +SUNDIALS_EXPORT int N_VEnableWrmsNormVectorArray_Pthreads(N_Vector v, + booleantype tf); +SUNDIALS_EXPORT int N_VEnableWrmsNormMaskVectorArray_Pthreads(N_Vector v, + booleantype tf); +SUNDIALS_EXPORT int N_VEnableScaleAddMultiVectorArray_Pthreads(N_Vector v, + booleantype tf); +SUNDIALS_EXPORT int N_VEnableLinearCombinationVectorArray_Pthreads(N_Vector v, + booleantype tf); + +/* + * ----------------------------------------------------------------- + * Deprecated functions + * ----------------------------------------------------------------- + */ + +/* use N_VCloneVectorArray */ +SUNDIALS_DEPRECATED_EXPORT N_Vector* N_VCloneVectorArray_Pthreads(int count, + N_Vector w); + +/* use N_VCloneVectorArrayEmpty */ +SUNDIALS_DEPRECATED_EXPORT N_Vector* N_VCloneVectorArrayEmpty_Pthreads(int count, + N_Vector w); + +/* use N_VDestroyVectorArray */ +SUNDIALS_DEPRECATED_EXPORT void N_VDestroyVectorArray_Pthreads(N_Vector* vs, + int count); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/inst/include/nvector/nvector_raja.h b/inst/include/nvector/nvector_raja.h new file mode 100644 index 0000000..be44c52 --- /dev/null +++ b/inst/include/nvector/nvector_raja.h @@ -0,0 +1,197 @@ +/* ----------------------------------------------------------------- + * Programmer(s): Slaven Peles, Cody J. Balos, Daniel McGreer @ LLNL + * ----------------------------------------------------------------- + * SUNDIALS Copyright Start + * Copyright (c) 2002-2022, Lawrence Livermore National Security + * and Southern Methodist University. + * All rights reserved. + * + * See the top-level LICENSE and NOTICE files for details. + * + * SPDX-License-Identifier: BSD-3-Clause + * SUNDIALS Copyright End + * ----------------------------------------------------------------- + * This is the header file for the RAJA implementation of the + * NVECTOR module. + * -----------------------------------------------------------------*/ + +#ifndef _NVECTOR_RAJA_H +#define _NVECTOR_RAJA_H + +#include + +#include +#include +#include + +#ifdef __cplusplus /* wrapper to enable C++ usage */ +extern "C" { +#endif + +/* + * ----------------------------------------------------------------- + * RAJA implementation of N_Vector + * ----------------------------------------------------------------- + */ + +/* RAJA implementation of the N_Vector 'content' structure + contains the length of the vector, pointers to host and device + arrays of 'realtype' components, a flag indicating ownership of + the data, and a private data pointer */ + +struct _N_VectorContent_Raja { + sunindextype length; + booleantype own_helper; + SUNMemory host_data; + SUNMemory device_data; + SUNMemoryHelper mem_helper; + void* priv; /* 'private' data */ +}; + +typedef struct _N_VectorContent_Raja *N_VectorContent_Raja; + +/* + * ----------------------------------------------------------------- + * NVECTOR_RAJA implementation specific functions + * ----------------------------------------------------------------- + */ + +SUNDIALS_EXPORT N_Vector N_VNewEmpty_Raja(SUNContext sunctx); +SUNDIALS_EXPORT N_Vector N_VNew_Raja(sunindextype length, SUNContext sunctx); +SUNDIALS_EXPORT N_Vector N_VNewManaged_Raja(sunindextype length, + SUNContext sunctx); +SUNDIALS_EXPORT N_Vector N_VNewWithMemHelp_Raja(sunindextype length, + booleantype use_managed_mem, + SUNMemoryHelper helper, + SUNContext sunctx); +SUNDIALS_EXPORT N_Vector N_VMake_Raja(sunindextype length, realtype *h_vdata, + realtype *d_vdata, SUNContext sunctx); +SUNDIALS_EXPORT N_Vector N_VMakeManaged_Raja(sunindextype length, + realtype *vdata, + SUNContext sunctx); +SUNDIALS_EXPORT void N_VSetHostArrayPointer_Raja(realtype* h_vdata, N_Vector v); +SUNDIALS_EXPORT void N_VSetDeviceArrayPointer_Raja(realtype* d_vdata, + N_Vector v); +SUNDIALS_EXPORT booleantype N_VIsManagedMemory_Raja(N_Vector x); +SUNDIALS_EXPORT void N_VCopyToDevice_Raja(N_Vector v); +SUNDIALS_EXPORT void N_VCopyFromDevice_Raja(N_Vector v); + +SUNDIALS_STATIC_INLINE +sunindextype N_VGetLength_Raja(N_Vector x) +{ + N_VectorContent_Raja content = (N_VectorContent_Raja)x->content; + return content->length; +} + +SUNDIALS_STATIC_INLINE +realtype *N_VGetHostArrayPointer_Raja(N_Vector x) +{ + N_VectorContent_Raja content = (N_VectorContent_Raja)x->content; + return(content->host_data == NULL ? NULL : (realtype*)content->host_data->ptr); +} + +SUNDIALS_STATIC_INLINE +realtype *N_VGetDeviceArrayPointer_Raja(N_Vector x) +{ + N_VectorContent_Raja content = (N_VectorContent_Raja)x->content; + return(content->device_data == NULL ? NULL : (realtype*)content->device_data->ptr); +} + + +/* + * ----------------------------------------------------------------- + * NVECTOR API functions + * ----------------------------------------------------------------- + */ + +SUNDIALS_STATIC_INLINE +N_Vector_ID N_VGetVectorID_Raja(N_Vector v) +{ + return SUNDIALS_NVEC_RAJA; +} + +SUNDIALS_EXPORT N_Vector N_VCloneEmpty_Raja(N_Vector w); +SUNDIALS_EXPORT N_Vector N_VClone_Raja(N_Vector w); +SUNDIALS_EXPORT void N_VDestroy_Raja(N_Vector v); +SUNDIALS_EXPORT void N_VSpace_Raja(N_Vector v, sunindextype *lrw, sunindextype *liw); +SUNDIALS_EXPORT void N_VSetArrayPointer_Raja(realtype *v_data, N_Vector v); + +/* standard vector operations */ +SUNDIALS_EXPORT void N_VLinearSum_Raja(realtype a, N_Vector x, realtype b, N_Vector y, N_Vector z); +SUNDIALS_EXPORT void N_VConst_Raja(realtype c, N_Vector z); +SUNDIALS_EXPORT void N_VProd_Raja(N_Vector x, N_Vector y, N_Vector z); +SUNDIALS_EXPORT void N_VDiv_Raja(N_Vector x, N_Vector y, N_Vector z); +SUNDIALS_EXPORT void N_VScale_Raja(realtype c, N_Vector x, N_Vector z); +SUNDIALS_EXPORT void N_VAbs_Raja(N_Vector x, N_Vector z); +SUNDIALS_EXPORT void N_VInv_Raja(N_Vector x, N_Vector z); +SUNDIALS_EXPORT void N_VAddConst_Raja(N_Vector x, realtype b, N_Vector z); +SUNDIALS_EXPORT realtype N_VDotProd_Raja(N_Vector x, N_Vector y); +SUNDIALS_EXPORT realtype N_VMaxNorm_Raja(N_Vector x); +SUNDIALS_EXPORT realtype N_VWrmsNorm_Raja(N_Vector x, N_Vector w); +SUNDIALS_EXPORT realtype N_VWrmsNormMask_Raja(N_Vector x, N_Vector w, N_Vector id); +SUNDIALS_EXPORT realtype N_VMin_Raja(N_Vector x); +SUNDIALS_EXPORT realtype N_VWL2Norm_Raja(N_Vector x, N_Vector w); +SUNDIALS_EXPORT realtype N_VL1Norm_Raja(N_Vector x); +SUNDIALS_EXPORT void N_VCompare_Raja(realtype c, N_Vector x, N_Vector z); +SUNDIALS_EXPORT booleantype N_VInvTest_Raja(N_Vector x, N_Vector z); +SUNDIALS_EXPORT booleantype N_VConstrMask_Raja(N_Vector c, N_Vector x, N_Vector m); +SUNDIALS_EXPORT realtype N_VMinQuotient_Raja(N_Vector num, N_Vector denom); + +/* fused vector operations */ +SUNDIALS_EXPORT int N_VLinearCombination_Raja(int nvec, realtype* c, N_Vector* X, + N_Vector z); +SUNDIALS_EXPORT int N_VScaleAddMulti_Raja(int nvec, realtype* c, N_Vector x, + N_Vector* Y, N_Vector* Z); + +/* vector array operations */ +SUNDIALS_EXPORT int N_VLinearSumVectorArray_Raja(int nvec, + realtype a, N_Vector* X, + realtype b, N_Vector* Y, + N_Vector* Z); +SUNDIALS_EXPORT int N_VScaleVectorArray_Raja(int nvec, realtype* c, N_Vector* X, + N_Vector* Z); +SUNDIALS_EXPORT int N_VConstVectorArray_Raja(int nvec, realtype c, N_Vector* Z); +SUNDIALS_EXPORT int N_VScaleAddMultiVectorArray_Raja(int nvec, int nsum, + realtype* a, + N_Vector* X, N_Vector** Y, + N_Vector** Z); +SUNDIALS_EXPORT int N_VLinearCombinationVectorArray_Raja(int nvec, int nsum, + realtype* c, + N_Vector** X, + N_Vector* Z); + +/* OPTIONAL local reduction kernels (no parallel communication) */ +SUNDIALS_EXPORT realtype N_VWSqrSumLocal_Raja(N_Vector x, N_Vector w); +SUNDIALS_EXPORT realtype N_VWSqrSumMaskLocal_Raja(N_Vector x, N_Vector w, N_Vector id); + +/* OPTIONAL XBraid interface operations */ +SUNDIALS_EXPORT int N_VBufSize_Raja(N_Vector x, sunindextype *size); +SUNDIALS_EXPORT int N_VBufPack_Raja(N_Vector x, void *buf); +SUNDIALS_EXPORT int N_VBufUnpack_Raja(N_Vector x, void *buf); + +/* OPTIONAL operations for debugging */ +SUNDIALS_EXPORT void N_VPrint_Raja(N_Vector v); +SUNDIALS_EXPORT void N_VPrintFile_Raja(N_Vector v, FILE *outfile); + +/* + * ----------------------------------------------------------------- + * Enable / disable fused vector operations + * ----------------------------------------------------------------- + */ + +SUNDIALS_EXPORT int N_VEnableFusedOps_Raja(N_Vector v, booleantype tf); + +SUNDIALS_EXPORT int N_VEnableLinearCombination_Raja(N_Vector v, booleantype tf); +SUNDIALS_EXPORT int N_VEnableScaleAddMulti_Raja(N_Vector v, booleantype tf); + +SUNDIALS_EXPORT int N_VEnableLinearSumVectorArray_Raja(N_Vector v, booleantype tf); +SUNDIALS_EXPORT int N_VEnableScaleVectorArray_Raja(N_Vector v, booleantype tf); +SUNDIALS_EXPORT int N_VEnableConstVectorArray_Raja(N_Vector v, booleantype tf); +SUNDIALS_EXPORT int N_VEnableScaleAddMultiVectorArray_Raja(N_Vector v, booleantype tf); +SUNDIALS_EXPORT int N_VEnableLinearCombinationVectorArray_Raja(N_Vector v, booleantype tf); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/inst/include/nvector/nvector_serial.h b/inst/include/nvector/nvector_serial.h index f7445b9..95c6c3b 100644 --- a/inst/include/nvector/nvector_serial.h +++ b/inst/include/nvector/nvector_serial.h @@ -3,7 +3,7 @@ * and Aaron Collier @ LLNL * ----------------------------------------------------------------- * SUNDIALS Copyright Start - * Copyright (c) 2002-2019, Lawrence Livermore National Security + * Copyright (c) 2002-2022, Lawrence Livermore National Security * and Southern Methodist University. * All rights reserved. * @@ -21,8 +21,8 @@ * in the header file sundials_nvector.h. * * - The definition of the type 'realtype' can be found in the - * header file sundials_types.h, and it may be changed (at the - * configuration stage) according to the user's needs. + * header file sundials_types.h, and it may be changed (at the + * configuration stage) according to the user's needs. * The sundials_types.h file also contains the definition * for the type 'booleantype'. * @@ -82,17 +82,11 @@ typedef struct _N_VectorContent_Serial *N_VectorContent_Serial; * ----------------------------------------------------------------- */ -SUNDIALS_EXPORT N_Vector N_VNew_Serial(sunindextype vec_length); +SUNDIALS_EXPORT N_Vector N_VNew_Serial(sunindextype vec_length, SUNContext sunctx); -SUNDIALS_EXPORT N_Vector N_VNewEmpty_Serial(sunindextype vec_length); +SUNDIALS_EXPORT N_Vector N_VNewEmpty_Serial(sunindextype vec_length, SUNContext sunctx); -SUNDIALS_EXPORT N_Vector N_VMake_Serial(sunindextype vec_length, realtype *v_data); - -SUNDIALS_EXPORT N_Vector* N_VCloneVectorArray_Serial(int count, N_Vector w); - -SUNDIALS_EXPORT N_Vector* N_VCloneVectorArrayEmpty_Serial(int count, N_Vector w); - -SUNDIALS_EXPORT void N_VDestroyVectorArray_Serial(N_Vector* vs, int count); +SUNDIALS_EXPORT N_Vector N_VMake_Serial(sunindextype vec_length, realtype *v_data, SUNContext sunctx); SUNDIALS_EXPORT sunindextype N_VGetLength_Serial(N_Vector v); @@ -138,7 +132,7 @@ SUNDIALS_EXPORT int N_VDotProdMulti_Serial(int nvec, N_Vector x, N_Vector* Y, realtype* dotprods); /* vector array operations */ -SUNDIALS_EXPORT int N_VLinearSumVectorArray_Serial(int nvec, +SUNDIALS_EXPORT int N_VLinearSumVectorArray_Serial(int nvec, realtype a, N_Vector* X, realtype b, N_Vector* Y, N_Vector* Z); @@ -164,7 +158,12 @@ SUNDIALS_EXPORT int N_VLinearCombinationVectorArray_Serial(int nvec, int nsum, /* OPTIONAL local reduction kernels (no parallel communication) */ SUNDIALS_EXPORT realtype N_VWSqrSumLocal_Serial(N_Vector x, N_Vector w); SUNDIALS_EXPORT realtype N_VWSqrSumMaskLocal_Serial(N_Vector x, N_Vector w, N_Vector id); - + +/* OPTIONAL XBraid interface operations */ +SUNDIALS_EXPORT int N_VBufSize_Serial(N_Vector x, sunindextype *size); +SUNDIALS_EXPORT int N_VBufPack_Serial(N_Vector x, void *buf); +SUNDIALS_EXPORT int N_VBufUnpack_Serial(N_Vector x, void *buf); + /* * ----------------------------------------------------------------- * Enable / disable fused vector operations @@ -185,6 +184,22 @@ SUNDIALS_EXPORT int N_VEnableWrmsNormMaskVectorArray_Serial(N_Vector v, booleant SUNDIALS_EXPORT int N_VEnableScaleAddMultiVectorArray_Serial(N_Vector v, booleantype tf); SUNDIALS_EXPORT int N_VEnableLinearCombinationVectorArray_Serial(N_Vector v, booleantype tf); +/* + * ----------------------------------------------------------------- + * Deprecated functions + * ----------------------------------------------------------------- + */ + +/* use N_VCloneVectorArray */ +SUNDIALS_DEPRECATED_EXPORT N_Vector* N_VCloneVectorArray_Serial(int count, N_Vector w); + +/* use N_VCloneVectorArrayEmpty */ +SUNDIALS_DEPRECATED_EXPORT N_Vector* N_VCloneVectorArrayEmpty_Serial(int count, N_Vector w); + +/* use N_VDestroyVectorArray */ +SUNDIALS_DEPRECATED_EXPORT void N_VDestroyVectorArray_Serial(N_Vector* vs, int count); + + #ifdef __cplusplus } #endif diff --git a/inst/include/nvector/nvector_sycl.h b/inst/include/nvector/nvector_sycl.h new file mode 100644 index 0000000..81838e5 --- /dev/null +++ b/inst/include/nvector/nvector_sycl.h @@ -0,0 +1,225 @@ +/* ----------------------------------------------------------------- + * Programmer(s): David J. Gardner @ LLNL + * ----------------------------------------------------------------- + * SUNDIALS Copyright Start + * Copyright (c) 2002-2022, Lawrence Livermore National Security + * and Southern Methodist University. + * All rights reserved. + * + * See the top-level LICENSE and NOTICE files for details. + * + * SPDX-License-Identifier: BSD-3-Clause + * SUNDIALS Copyright End + * ----------------------------------------------------------------- + * This is the header file for the SYCL implementation of the + * NVECTOR module. + * -----------------------------------------------------------------*/ + +#ifndef _NVECTOR_SYCL_H +#define _NVECTOR_SYCL_H + +#include +#include + +#include +#include +#include +#include + +#ifdef __cplusplus /* wrapper to enable C++ usage */ +extern "C" { +#endif + + +/* ----------------------------------------------------------------- + * SYCL implementation of N_Vector + * ----------------------------------------------------------------- */ + + +struct _N_VectorContent_Sycl +{ + sunindextype length; + booleantype own_helper; + SUNMemory host_data; + SUNMemory device_data; + SUNSyclExecPolicy* stream_exec_policy; + SUNSyclExecPolicy* reduce_exec_policy; + SUNMemoryHelper mem_helper; + ::sycl::queue* queue; + void* priv; /* 'private' data */ +}; + +typedef struct _N_VectorContent_Sycl *N_VectorContent_Sycl; + + +/* ----------------------------------------------------------------- + * NVECTOR_SYCL implementation specific functions + * ----------------------------------------------------------------- */ + + +SUNDIALS_EXPORT N_Vector N_VNewEmpty_Sycl(SUNContext sunctx); +SUNDIALS_EXPORT N_Vector N_VNew_Sycl(sunindextype length, + ::sycl::queue *Q, + SUNContext sunctx); +SUNDIALS_EXPORT N_Vector N_VNewManaged_Sycl(sunindextype length, + ::sycl::queue *Q, + SUNContext sunctx); +SUNDIALS_EXPORT N_Vector N_VNewWithMemHelp_Sycl(sunindextype length, + booleantype use_managed_mem, + SUNMemoryHelper helper, + ::sycl::queue *Q, + SUNContext sunctx); +SUNDIALS_EXPORT N_Vector N_VMake_Sycl(sunindextype length, + realtype *h_vdata, + realtype *d_vdata, + ::sycl::queue *Q, + SUNContext sunctx); +SUNDIALS_EXPORT N_Vector N_VMakeManaged_Sycl(sunindextype length, + realtype *vdata, + ::sycl::queue *Q, + SUNContext sunctx); + +SUNDIALS_EXPORT void N_VSetHostArrayPointer_Sycl(realtype* h_vdata, N_Vector v); +SUNDIALS_EXPORT void N_VSetDeviceArrayPointer_Sycl(realtype* d_vdata, + N_Vector v); +SUNDIALS_EXPORT booleantype N_VIsManagedMemory_Sycl(N_Vector x); +SUNDIALS_EXPORT int N_VSetKernelExecPolicy_Sycl(N_Vector x, + SUNSyclExecPolicy* stream_exec_policy, + SUNSyclExecPolicy* reduce_exec_policy); +SUNDIALS_EXPORT void N_VCopyToDevice_Sycl(N_Vector v); +SUNDIALS_EXPORT void N_VCopyFromDevice_Sycl(N_Vector v); + +SUNDIALS_STATIC_INLINE +sunindextype N_VGetLength_Sycl(N_Vector x) +{ + N_VectorContent_Sycl content = (N_VectorContent_Sycl)x->content; + return content->length; +} + +SUNDIALS_STATIC_INLINE +realtype *N_VGetHostArrayPointer_Sycl(N_Vector x) +{ + N_VectorContent_Sycl content = (N_VectorContent_Sycl)x->content; + return(content->host_data == NULL ? NULL : (realtype*)content->host_data->ptr); +} + +SUNDIALS_STATIC_INLINE +realtype *N_VGetDeviceArrayPointer_Sycl(N_Vector x) +{ + N_VectorContent_Sycl content = (N_VectorContent_Sycl)x->content; + return(content->device_data == NULL ? NULL : (realtype*)content->device_data->ptr); +} + + +/* ----------------------------------------------------------------- + * NVECTOR API functions + * ----------------------------------------------------------------- */ + + +SUNDIALS_STATIC_INLINE +N_Vector_ID N_VGetVectorID_Sycl(N_Vector v) +{ + return SUNDIALS_NVEC_SYCL; +} + +SUNDIALS_EXPORT N_Vector N_VCloneEmpty_Sycl(N_Vector w); +SUNDIALS_EXPORT N_Vector N_VClone_Sycl(N_Vector w); +SUNDIALS_EXPORT void N_VDestroy_Sycl(N_Vector v); +SUNDIALS_EXPORT void N_VSpace_Sycl(N_Vector v, sunindextype *lrw, + sunindextype *liw); + +/* standard vector operations */ +SUNDIALS_EXPORT void N_VLinearSum_Sycl(realtype a, N_Vector x, + realtype b, N_Vector y, N_Vector z); +SUNDIALS_EXPORT void N_VConst_Sycl(realtype c, N_Vector z); +SUNDIALS_EXPORT void N_VProd_Sycl(N_Vector x, N_Vector y, N_Vector z); +SUNDIALS_EXPORT void N_VDiv_Sycl(N_Vector x, N_Vector y, N_Vector z); +SUNDIALS_EXPORT void N_VScale_Sycl(realtype c, N_Vector x, N_Vector z); +SUNDIALS_EXPORT void N_VAbs_Sycl(N_Vector x, N_Vector z); +SUNDIALS_EXPORT void N_VInv_Sycl(N_Vector x, N_Vector z); +SUNDIALS_EXPORT void N_VAddConst_Sycl(N_Vector x, realtype b, N_Vector z); +SUNDIALS_EXPORT realtype N_VDotProd_Sycl(N_Vector x, N_Vector y); +SUNDIALS_EXPORT realtype N_VMaxNorm_Sycl(N_Vector x); +SUNDIALS_EXPORT realtype N_VWrmsNorm_Sycl(N_Vector x, N_Vector w); +SUNDIALS_EXPORT realtype N_VWrmsNormMask_Sycl(N_Vector x, N_Vector w, + N_Vector id); +SUNDIALS_EXPORT realtype N_VMin_Sycl(N_Vector x); +SUNDIALS_EXPORT realtype N_VWL2Norm_Sycl(N_Vector x, N_Vector w); +SUNDIALS_EXPORT realtype N_VL1Norm_Sycl(N_Vector x); +SUNDIALS_EXPORT void N_VCompare_Sycl(realtype c, N_Vector x, N_Vector z); +SUNDIALS_EXPORT booleantype N_VInvTest_Sycl(N_Vector x, N_Vector z); +SUNDIALS_EXPORT booleantype N_VConstrMask_Sycl(N_Vector c, N_Vector x, + N_Vector m); +SUNDIALS_EXPORT realtype N_VMinQuotient_Sycl(N_Vector num, N_Vector denom); + +/* fused vector operations */ +SUNDIALS_EXPORT int N_VLinearCombination_Sycl(int nvec, realtype* c, N_Vector* X, + N_Vector Z); +SUNDIALS_EXPORT int N_VScaleAddMulti_Sycl(int nvec, realtype* c, N_Vector X, + N_Vector* Y, N_Vector* Z); + +/* vector array operations */ +SUNDIALS_EXPORT int N_VLinearSumVectorArray_Sycl(int nvec, + realtype a, N_Vector* X, + realtype b, N_Vector* Y, + N_Vector* Z); +SUNDIALS_EXPORT int N_VScaleVectorArray_Sycl(int nvec, realtype* c, N_Vector* X, + N_Vector* Z); +SUNDIALS_EXPORT int N_VConstVectorArray_Sycl(int nvec, realtype c, N_Vector* Z); +SUNDIALS_EXPORT int N_VScaleAddMultiVectorArray_Sycl(int nvec, int nsum, + realtype* a, N_Vector* X, + N_Vector** Y, N_Vector** Z); +SUNDIALS_EXPORT int N_VLinearCombinationVectorArray_Sycl(int nvec, int nsum, + realtype* c, + N_Vector** X, + N_Vector* Z); +SUNDIALS_EXPORT int N_VWrmsNormVectorArray_Sycl(int nvec, N_Vector* X, + N_Vector* W, realtype* nrm); +SUNDIALS_EXPORT int N_VWrmsNormMaskVectorArray_Sycl(int nvec, N_Vector* X, + N_Vector* W, N_Vector id, + realtype* nrm); + +/* OPTIONAL local reduction kernels (no parallel communication) */ +SUNDIALS_EXPORT realtype N_VWSqrSumLocal_Sycl(N_Vector x, N_Vector w); +SUNDIALS_EXPORT realtype N_VWSqrSumMaskLocal_Sycl(N_Vector x, N_Vector w, + N_Vector id); + +/* OPTIONAL XBraid interface operations */ +SUNDIALS_EXPORT int N_VBufSize_Sycl(N_Vector x, sunindextype *size); +SUNDIALS_EXPORT int N_VBufPack_Sycl(N_Vector x, void *buf); +SUNDIALS_EXPORT int N_VBufUnpack_Sycl(N_Vector x, void *buf); + +/* OPTIONAL operations for debugging */ +SUNDIALS_EXPORT void N_VPrint_Sycl(N_Vector v); +SUNDIALS_EXPORT void N_VPrintFile_Sycl(N_Vector v, FILE *outfile); + + +/* ----------------------------------------------------------------- + * Enable / disable fused vector operations + * ----------------------------------------------------------------- */ + + +SUNDIALS_EXPORT int N_VEnableFusedOps_Sycl(N_Vector v, booleantype tf); + +SUNDIALS_EXPORT int N_VEnableLinearCombination_Sycl(N_Vector v, booleantype tf); +SUNDIALS_EXPORT int N_VEnableScaleAddMulti_Sycl(N_Vector v, booleantype tf); +SUNDIALS_EXPORT int N_VEnableDotProdMulti_Sycl(N_Vector v, booleantype tf); + +SUNDIALS_EXPORT int N_VEnableLinearSumVectorArray_Sycl(N_Vector v, + booleantype tf); +SUNDIALS_EXPORT int N_VEnableScaleVectorArray_Sycl(N_Vector v, booleantype tf); +SUNDIALS_EXPORT int N_VEnableConstVectorArray_Sycl(N_Vector v, booleantype tf); +SUNDIALS_EXPORT int N_VEnableWrmsNormVectorArray_Sycl(N_Vector v, + booleantype tf); +SUNDIALS_EXPORT int N_VEnableWrmsNormMaskVectorArray_Sycl(N_Vector v, + booleantype tf); +SUNDIALS_EXPORT int N_VEnableScaleAddMultiVectorArray_Sycl(N_Vector v, + booleantype tf); +SUNDIALS_EXPORT int N_VEnableLinearCombinationVectorArray_Sycl(N_Vector v, + booleantype tf); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/inst/include/nvector/nvector_trilinos.h b/inst/include/nvector/nvector_trilinos.h new file mode 100644 index 0000000..e95c3d0 --- /dev/null +++ b/inst/include/nvector/nvector_trilinos.h @@ -0,0 +1,141 @@ +/* ----------------------------------------------------------------- + * Programmer(s): Slaven Peles @ LLNL + * ----------------------------------------------------------------- + * SUNDIALS Copyright Start + * Copyright (c) 2002-2022, Lawrence Livermore National Security + * and Southern Methodist University. + * All rights reserved. + * + * See the top-level LICENSE and NOTICE files for details. + * + * SPDX-License-Identifier: BSD-3-Clause + * SUNDIALS Copyright End + * ----------------------------------------------------------------- + * This is the main header file for the Trilinos vector wrapper + * for NVECTOR module. + * + * Part I contains declarations specific to the Trilinos vector wrapper + * implementation. + * + * Part II contains the prototype for the constructor + * N_VMake_Trilinos as well as Trilinos-specific prototypes + * for various useful vector operations. + * + * Notes: + * + * - The definition of the generic N_Vector structure can be + * found in the header file sundials_nvector.h. + * + * - The definition of the type realtype can be found in the + * header file sundials_types.h, and it may be changed (at the + * build configuration stage) according to the user's needs. + * The sundials_types.h file also contains the definition + * for the type booleantype. + * + * - N_Vector arguments to arithmetic vector operations need not + * be distinct. For example, the following call: + * + * N_VLinearSum_Trilinos(a,x,b,y,y); + * + * (which stores the result of the operation a*x+b*y in y) + * is legal. + * -----------------------------------------------------------------*/ + +#ifndef _NVECTOR_TRILINOS_H +#define _NVECTOR_TRILINOS_H + +#include + +#ifdef __cplusplus /* wrapper to enable C++ usage */ +extern "C" { +#endif + + +/* + * ----------------------------------------------------------------- + * PART I: N_Vector interface to Trilinos vector + * ----------------------------------------------------------------- + */ + +/* + * Dummy _N_VectorContent_Trilinos structure is used for + * interfacing C with C++ code + */ + +struct _N_VectorContent_Trilinos {}; + +typedef struct _N_VectorContent_Trilinos *N_VectorContent_Trilinos; + +/* + * ----------------------------------------------------------------- + * PART II: functions exported by nvector_Trilinos + * + * CONSTRUCTORS: + * N_VNewEmpty_Trilinos + * ----------------------------------------------------------------- + */ + + +/* + * ----------------------------------------------------------------- + * Function : N_VNewEmpty_Trilinos + * ----------------------------------------------------------------- + * This function creates a new N_Vector wrapper for a Trilinos + * vector. + * ----------------------------------------------------------------- + */ + +SUNDIALS_EXPORT N_Vector N_VNewEmpty_Trilinos(SUNContext sunctx); + +/* + * ----------------------------------------------------------------- + * Trilinos implementations of the vector operations + * ----------------------------------------------------------------- + */ + +SUNDIALS_EXPORT N_Vector_ID N_VGetVectorID_Trilinos(N_Vector v); +SUNDIALS_EXPORT N_Vector N_VCloneEmpty_Trilinos(N_Vector w); +SUNDIALS_EXPORT N_Vector N_VClone_Trilinos(N_Vector w); +SUNDIALS_EXPORT void N_VDestroy_Trilinos(N_Vector v); +SUNDIALS_EXPORT void N_VSpace_Trilinos(N_Vector v, sunindextype *lrw, sunindextype *liw); +SUNDIALS_EXPORT void *N_VGetCommunicator_Trilinos(N_Vector v); +SUNDIALS_EXPORT sunindextype N_VGetLength_Trilinos(N_Vector v); +SUNDIALS_EXPORT void N_VLinearSum_Trilinos(realtype a, N_Vector x, realtype b, N_Vector y, N_Vector z); +SUNDIALS_EXPORT void N_VConst_Trilinos(realtype c, N_Vector z); +SUNDIALS_EXPORT void N_VProd_Trilinos(N_Vector x, N_Vector y, N_Vector z); +SUNDIALS_EXPORT void N_VDiv_Trilinos(N_Vector x, N_Vector y, N_Vector z); +SUNDIALS_EXPORT void N_VScale_Trilinos(realtype c, N_Vector x, N_Vector z); +SUNDIALS_EXPORT void N_VAbs_Trilinos(N_Vector x, N_Vector z); +SUNDIALS_EXPORT void N_VInv_Trilinos(N_Vector x, N_Vector z); +SUNDIALS_EXPORT void N_VAddConst_Trilinos(N_Vector x, realtype b, N_Vector z); +SUNDIALS_EXPORT realtype N_VDotProd_Trilinos(N_Vector x, N_Vector y); +SUNDIALS_EXPORT realtype N_VMaxNorm_Trilinos(N_Vector x); +SUNDIALS_EXPORT realtype N_VWrmsNorm_Trilinos(N_Vector x, N_Vector w); +SUNDIALS_EXPORT realtype N_VWrmsNormMask_Trilinos(N_Vector x, N_Vector w, N_Vector id); +SUNDIALS_EXPORT realtype N_VMin_Trilinos(N_Vector x); +SUNDIALS_EXPORT realtype N_VWL2Norm_Trilinos(N_Vector x, N_Vector w); +SUNDIALS_EXPORT realtype N_VL1Norm_Trilinos(N_Vector x); +SUNDIALS_EXPORT void N_VCompare_Trilinos(realtype c, N_Vector x, N_Vector z); +SUNDIALS_EXPORT booleantype N_VInvTest_Trilinos(N_Vector x, N_Vector z); +SUNDIALS_EXPORT booleantype N_VConstrMask_Trilinos(N_Vector c, N_Vector x, N_Vector m); +SUNDIALS_EXPORT realtype N_VMinQuotient_Trilinos(N_Vector num, N_Vector denom); + +/* OPTIONAL local reduction kernels (no parallel communication) */ +SUNDIALS_EXPORT realtype N_VDotProdLocal_Trilinos(N_Vector x, N_Vector y); +SUNDIALS_EXPORT realtype N_VMaxNormLocal_Trilinos(N_Vector x); +SUNDIALS_EXPORT realtype N_VMinLocal_Trilinos(N_Vector x); +SUNDIALS_EXPORT realtype N_VL1NormLocal_Trilinos(N_Vector x); +SUNDIALS_EXPORT realtype N_VWSqrSumLocal_Trilinos(N_Vector x, N_Vector w); +SUNDIALS_EXPORT realtype N_VWSqrSumMaskLocal_Trilinos(N_Vector x, N_Vector w, + N_Vector id); +SUNDIALS_EXPORT booleantype N_VInvTestLocal_Trilinos(N_Vector x, N_Vector z); +SUNDIALS_EXPORT booleantype N_VConstrMaskLocal_Trilinos(N_Vector c, N_Vector x, + N_Vector m); +SUNDIALS_EXPORT realtype N_VMinQuotientLocal_Trilinos(N_Vector num, + N_Vector denom); + +#ifdef __cplusplus +} +#endif + +#endif /* _NVECTOR_TRILINOS_H */ diff --git a/inst/include/nvector/trilinos/SundialsTpetraVectorInterface.hpp b/inst/include/nvector/trilinos/SundialsTpetraVectorInterface.hpp new file mode 100644 index 0000000..7bfb2e5 --- /dev/null +++ b/inst/include/nvector/trilinos/SundialsTpetraVectorInterface.hpp @@ -0,0 +1,70 @@ +/* ----------------------------------------------------------------- + * Programmer(s): Slaven Peles @ LLNL + * ----------------------------------------------------------------- + * SUNDIALS Copyright Start + * Copyright (c) 2002-2022, Lawrence Livermore National Security + * and Southern Methodist University. + * All rights reserved. + * + * See the top-level LICENSE and NOTICE files for details. + * + * SPDX-License-Identifier: BSD-3-Clause + * SUNDIALS Copyright End + * -----------------------------------------------------------------*/ + +#ifndef _SUNDIALS_TPETRA_INTERFACE_HPP_ +#define _SUNDIALS_TPETRA_INTERFACE_HPP_ + +#include +#include + +namespace sundials +{ +namespace trilinos +{ +namespace nvector_tpetra +{ + + struct TpetraVectorInterface : public _N_VectorContent_Trilinos + { + // Typedef of Tpetra vector class to be used with SUNDIALS + typedef Tpetra::Vector vector_type; + + TpetraVectorInterface(Teuchos::RCP rcpvec) + { + rcpvec_ = rcpvec; + } + + ~TpetraVectorInterface() = default; + + Teuchos::RCP rcpvec_; + }; + + +} // namespace nvector_tpetra +} // namespace trilinos +} // namespace sundials + +inline Teuchos::RCP N_VGetVector_Trilinos(N_Vector v) +{ + sundials::trilinos::nvector_tpetra::TpetraVectorInterface* iface = + reinterpret_cast(v->content); + + return iface->rcpvec_; +} + +/* + * ----------------------------------------------------------------- + * Function : N_VMake_Trilinos + * ----------------------------------------------------------------- + * This function attaches N_Vector functions to a Tpetra vector. + * ----------------------------------------------------------------- + */ + +SUNDIALS_EXPORT N_Vector +N_VMake_Trilinos(Teuchos::RCP v, + SUNContext sunctx); + + + +#endif // _TPETRA_SUNDIALS_INTERFACE_HPP_ diff --git a/inst/include/nvector/trilinos/SundialsTpetraVectorKernels.hpp b/inst/include/nvector/trilinos/SundialsTpetraVectorKernels.hpp new file mode 100644 index 0000000..df5a092 --- /dev/null +++ b/inst/include/nvector/trilinos/SundialsTpetraVectorKernels.hpp @@ -0,0 +1,678 @@ +/* ----------------------------------------------------------------- + * Programmer(s): Slaven Peles @ LLNL + * ----------------------------------------------------------------- + * SUNDIALS Copyright Start + * Copyright (c) 2002-2022, Lawrence Livermore National Security + * and Southern Methodist University. + * All rights reserved. + * + * See the top-level LICENSE and NOTICE files for details. + * + * SPDX-License-Identifier: BSD-3-Clause + * SUNDIALS Copyright End + * -----------------------------------------------------------------*/ + +#ifndef _SUNDIALS_TPETRA_VECTOR_KERNELS_HPP_ +#define _SUNDIALS_TPETRA_VECTOR_KERNELS_HPP_ + +#include +#include +#include + +namespace sundials +{ +namespace trilinos +{ + + /** + * The namespace contains custom Kokkos-based kernels needed by SUNDIALS + * + * Kernels are inlined in case this file is included in more than one + * translation unit. + */ + namespace nvector_tpetra + { + using Teuchos::outArg; + using Teuchos::REDUCE_SUM; + using Teuchos::REDUCE_MIN; + using Teuchos::REDUCE_MAX; + using Teuchos::reduceAll; + + typedef sundials::trilinos::nvector_tpetra::TpetraVectorInterface::vector_type vector_type; + typedef vector_type::scalar_type scalar_type; + typedef vector_type::mag_type mag_type; + typedef vector_type::global_ordinal_type global_ordinal_type; + typedef vector_type::local_ordinal_type local_ordinal_type; + typedef vector_type::node_type::memory_space memory_space; + typedef vector_type::execution_space execution_space; + + static constexpr scalar_type zero = 0; + static constexpr scalar_type half = 0.5; + static constexpr scalar_type one = 1.0; + static constexpr scalar_type onept5 = 1.5; + + /*---------------------------------------------------------------- + * Streaming vector kernels + *---------------------------------------------------------------*/ + + /// Divide: z(i) = x(i)/y(i) forall i + inline void elementWiseDivide(const vector_type& x, + const vector_type& y, + vector_type& z) + { + const local_ordinal_type N = + static_cast(x.getLocalLength()); + + if (x.need_sync()) + const_cast(x).sync(); + if (y.need_sync()) + const_cast(y).sync(); + + auto x_2d = x.getLocalView(); + auto x_1d = Kokkos::subview (x_2d, Kokkos::ALL(), 0); + auto y_2d = y.getLocalView(); + auto y_1d = Kokkos::subview (y_2d, Kokkos::ALL(), 0); + auto z_2d = z.getLocalView(); + auto z_1d = Kokkos::subview (z_2d, Kokkos::ALL(), 0); + + z.modify(); + + Kokkos::parallel_for ("elementWiseDivide", Kokkos::RangePolicy(0, N), + KOKKOS_LAMBDA (const local_ordinal_type &i) + { + z_1d(i) = x_1d(i)/y_1d(i); + } + ); + } + + + /// Add constant to all vector elements: z(i) = x(i) + b + inline void addConst(const vector_type& x, + scalar_type b, + vector_type& z) + { + const local_ordinal_type N = + static_cast(x.getLocalLength()); + + if (x.need_sync()) + const_cast(x).sync(); + if (z.need_sync()) + const_cast(z).sync(); + + auto x_2d = x.getLocalView(); + auto x_1d = Kokkos::subview (x_2d, Kokkos::ALL(), 0); + auto z_2d = z.getLocalView(); + auto z_1d = Kokkos::subview (z_2d, Kokkos::ALL(), 0); + + z.modify(); + + Kokkos::parallel_for ("addConst", Kokkos::RangePolicy(0, N), + KOKKOS_LAMBDA (const local_ordinal_type &i) + { + z_1d(i) = x_1d(i) + b; + } + ); + } + + + /// Compare vector elements to c: z(i) = |x(i)| >= c ? 1 : 0 + inline void compare(scalar_type c, + const vector_type& x, + vector_type& z) + { + const local_ordinal_type N = + static_cast(x.getLocalLength()); + + if (x.need_sync()) + const_cast(x).sync(); + if (z.need_sync()) + const_cast(z).sync(); + + auto x_2d = x.getLocalView(); + auto x_1d = Kokkos::subview (x_2d, Kokkos::ALL(), 0); + auto z_2d = z.getLocalView(); + auto z_1d = Kokkos::subview (z_2d, Kokkos::ALL(), 0); + + z.modify(); + + Kokkos::parallel_for ("compare", Kokkos::RangePolicy(0, N), + KOKKOS_LAMBDA (const local_ordinal_type &i) + { + z_1d(i) = std::abs(x_1d(i)) >= c ? one : zero; + } + ); + } + + + /*---------------------------------------------------------------- + * Reduction vector kernels + *---------------------------------------------------------------*/ + + /// Weighted root-mean-square norm + inline mag_type normWrms(const vector_type& x, + const vector_type& w) + { + const Teuchos::RCP >& comm = x.getMap()->getComm(); + const local_ordinal_type N = + static_cast(x.getLocalLength()); + const global_ordinal_type Nglob = + static_cast(x.getGlobalLength()); + + if (x.need_sync()) + const_cast(x).sync(); + if (w.need_sync()) + const_cast(w).sync(); + + auto x_2d = x.getLocalView(); + auto x_1d = Kokkos::subview (x_2d, Kokkos::ALL(), 0); + auto w_2d = w.getLocalView(); + auto w_1d = Kokkos::subview (w_2d, Kokkos::ALL(), 0); + + mag_type sum = zero; + Kokkos::parallel_reduce ("normWrms", Kokkos::RangePolicy(0, N), + KOKKOS_LAMBDA (const local_ordinal_type &i, mag_type &local_sum) + { + local_sum += x_1d(i)*w_1d(i)*(x_1d(i)*w_1d(i)); + }, sum); + + mag_type globalSum = zero; + reduceAll(*comm, REDUCE_SUM, sum, outArg(globalSum)); + return std::sqrt(globalSum/static_cast(Nglob)); + } + + + /// Weighted root-mean-square norm with mask + inline mag_type normWrmsMask(const vector_type& x, + const vector_type& w, + const vector_type& id) + { + const Teuchos::RCP >& comm = x.getMap()->getComm(); + const local_ordinal_type N = + static_cast(x.getLocalLength()); + const global_ordinal_type Nglob = + static_cast(x.getGlobalLength()); + + if (x.need_sync()) + const_cast(x).sync(); + if (w.need_sync()) + const_cast(w).sync(); + if (id.need_sync()) + const_cast(id).sync(); + + auto x_2d = x.getLocalView(); + auto x_1d = Kokkos::subview (x_2d, Kokkos::ALL(), 0); + auto w_2d = w.getLocalView(); + auto w_1d = Kokkos::subview (w_2d, Kokkos::ALL(), 0); + auto id_2d = id.getLocalView(); + auto id_1d = Kokkos::subview (id_2d, Kokkos::ALL(), 0); + + mag_type sum = zero; + Kokkos::parallel_reduce ("normWrmsMask", Kokkos::RangePolicy(0, N), + KOKKOS_LAMBDA (const local_ordinal_type &i, mag_type &local_sum) + { + if (id_1d(i) > zero) + local_sum += x_1d(i)*w_1d(i)*(x_1d(i)*w_1d(i)); + }, sum); + + mag_type globalSum = zero; + reduceAll(*comm, REDUCE_SUM, sum, outArg(globalSum)); + return std::sqrt(globalSum/static_cast(Nglob)); + } + + + /// Find minimum element value in the vector + inline scalar_type minElement(const vector_type& x) + { + using namespace Kokkos; + + const Teuchos::RCP >& comm = x.getMap()->getComm(); + const local_ordinal_type N = + static_cast(x.getLocalLength()); + + if (x.need_sync()) + const_cast(x).sync(); + + auto x_2d = x.getLocalView(); + auto x_1d = Kokkos::subview (x_2d, Kokkos::ALL(), 0); + + scalar_type minimum; + Min min_reducer(minimum); + + Kokkos::parallel_reduce ("minElement", Kokkos::RangePolicy(0, N), + KOKKOS_LAMBDA (const local_ordinal_type &i, scalar_type &local_min) + { + min_reducer.join(local_min, x_1d(i)); + }, min_reducer); + + scalar_type globalMin; + reduceAll(*comm, REDUCE_MIN, minimum, outArg(globalMin)); + return globalMin; + } + + + /// Weighted L2 norm + inline mag_type normWL2(const vector_type& x, + const vector_type& w) + { + const Teuchos::RCP >& comm = x.getMap()->getComm(); + const local_ordinal_type N = + static_cast(x.getLocalLength()); + + if (x.need_sync()) + const_cast(x).sync(); + if (w.need_sync()) + const_cast(w).sync(); + + auto x_2d = x.getLocalView(); + auto x_1d = Kokkos::subview (x_2d, Kokkos::ALL(), 0); + auto w_2d = w.getLocalView(); + auto w_1d = Kokkos::subview (w_2d, Kokkos::ALL(), 0); + + mag_type sum = zero; + Kokkos::parallel_reduce ("normWL2", Kokkos::RangePolicy(0, N), + KOKKOS_LAMBDA (const local_ordinal_type &i, mag_type &local_sum) + { + local_sum += x_1d(i)*w_1d(i)*(x_1d(i)*w_1d(i)); + }, sum); + + mag_type globalSum = zero; + reduceAll(*comm, REDUCE_SUM, sum, outArg(globalSum)); + return std::sqrt(globalSum); + } + + + /// Elementwise inverse, return false if any denominator is zero. + inline bool invTest(const vector_type& x, + vector_type& z) + { + using namespace Kokkos; + + const Teuchos::RCP >& comm = x.getMap()->getComm(); + const local_ordinal_type N = + static_cast(x.getLocalLength()); + + if (x.need_sync()) + const_cast(x).sync(); + + auto x_2d = x.getLocalView(); + auto x_1d = Kokkos::subview (x_2d, Kokkos::ALL(), 0); + auto z_2d = z.getLocalView(); + auto z_1d = Kokkos::subview (z_2d, Kokkos::ALL(), 0); + + scalar_type minimum; + Min min_reducer(minimum); + + z.modify(); + + Kokkos::parallel_reduce ("invTest", Kokkos::RangePolicy(0, N), + KOKKOS_LAMBDA (const local_ordinal_type &i, scalar_type &local_min) + { + static constexpr scalar_type zero = 0; + static constexpr scalar_type one = 1.0; + if (x_1d(i) == zero) + { + min_reducer.join(local_min, zero); + } + else + { + z_1d(i) = one/x_1d(i); + } + }, min_reducer); + + scalar_type globalMin; + reduceAll(*comm, REDUCE_MIN, minimum, outArg(globalMin)); + return (globalMin > half); + } + + + /// Find constraint violations + inline bool constraintMask(const vector_type& c, + const vector_type& x, + vector_type& m) + { + const Teuchos::RCP >& comm = x.getMap()->getComm(); + const local_ordinal_type N = + static_cast(x.getLocalLength()); + + if (c.need_sync()) + const_cast(c).sync(); + if (x.need_sync()) + const_cast(x).sync(); + + auto c_2d = c.getLocalView(); + auto c_1d = Kokkos::subview (c_2d, Kokkos::ALL(), 0); + auto x_2d = x.getLocalView(); + auto x_1d = Kokkos::subview (x_2d, Kokkos::ALL(), 0); + auto m_2d = m.getLocalView(); + auto m_1d = Kokkos::subview (m_2d, Kokkos::ALL(), 0); + + m.modify(); + + mag_type sum = zero; + Kokkos::parallel_reduce ("constraintMask", Kokkos::RangePolicy(0, N), + KOKKOS_LAMBDA (const local_ordinal_type &i, mag_type &local_sum) + { + const bool test = (std::abs(c_1d(i)) > onept5 && c_1d(i)*x_1d(i) <= zero) || + (std::abs(c_1d(i)) > half && c_1d(i)*x_1d(i) < zero); + m_1d(i) = test ? one : zero; + local_sum += m_1d(i); + }, sum); + + mag_type globalSum = zero; + reduceAll(*comm, REDUCE_SUM, sum, outArg(globalSum)); + return (globalSum < half); + } + + + /// Minimum quotient: min_i(num(i)/den(i)) + inline scalar_type minQuotient(const vector_type& num, + const vector_type& den) + { + using namespace Kokkos; + + const Teuchos::RCP >& comm = num.getMap()->getComm(); + const local_ordinal_type N = + static_cast(num.getLocalLength()); + + if (num.need_sync()) + const_cast(num).sync(); + if (den.need_sync()) + const_cast(den).sync(); + + auto num_2d = num.getLocalView(); + auto num_1d = Kokkos::subview (num_2d, Kokkos::ALL(), 0); + auto den_2d = den.getLocalView(); + auto den_1d = Kokkos::subview (den_2d, Kokkos::ALL(), 0); + + scalar_type minimum; + Min min_reducer(minimum); + + Kokkos::parallel_reduce ("minQuotient", Kokkos::RangePolicy(0, N), + KOKKOS_LAMBDA (const local_ordinal_type &i, scalar_type &local_min) + { + if (den_1d(i) != zero) + min_reducer.join(local_min, num_1d(i)/den_1d(i)); + }, min_reducer); + + scalar_type globalMin; + reduceAll(*comm, REDUCE_MIN, minimum, outArg(globalMin)); + return globalMin; + } + + + /// MPI task-local dot-product + inline scalar_type dotProdLocal(const vector_type& x, + const vector_type& y) + { + const local_ordinal_type N = + static_cast(x.getLocalLength()); + if (x.need_sync()) + const_cast(x).sync(); + if (y.need_sync()) + const_cast(y).sync(); + + auto x_2d = x.getLocalView(); + auto x_1d = Kokkos::subview (x_2d, Kokkos::ALL(), 0); + auto y_2d = y.getLocalView(); + auto y_1d = Kokkos::subview (y_2d, Kokkos::ALL(), 0); + + scalar_type sum = zero; + Kokkos::parallel_reduce ("dotProdLocal", Kokkos::RangePolicy(0, N), + KOKKOS_LAMBDA (const local_ordinal_type &i, scalar_type &local_sum) + { + local_sum += x_1d(i)*y_1d(i); + }, sum); + + return sum; + } + + + /// MPI task-local maximum norm of a vector + inline mag_type maxNormLocal(const vector_type& x) + { + using namespace Kokkos; + + const local_ordinal_type N = + static_cast(x.getLocalLength()); + if (x.need_sync()) + const_cast(x).sync(); + + auto x_2d = x.getLocalView(); + auto x_1d = Kokkos::subview (x_2d, Kokkos::ALL(), 0); + + mag_type maximum; + Max max_reducer(maximum); + + Kokkos::parallel_reduce ("maxNormLocal", Kokkos::RangePolicy(0, N), + KOKKOS_LAMBDA (const local_ordinal_type &i, mag_type &local_max) + { + max_reducer.join(local_max, std::abs(x_1d(i))); + }, max_reducer); + + return maximum; + } + + + /// MPI task-local minimum element in the vector + inline scalar_type minLocal(const vector_type& x) + { + using namespace Kokkos; + + const local_ordinal_type N = + static_cast(x.getLocalLength()); + if (x.need_sync()) + const_cast(x).sync(); + + auto x_2d = x.getLocalView(); + auto x_1d = Kokkos::subview (x_2d, Kokkos::ALL(), 0); + + scalar_type minimum; + Min min_reducer(minimum); + + Kokkos::parallel_reduce ("minElement", Kokkos::RangePolicy(0, N), + KOKKOS_LAMBDA (const local_ordinal_type &i, scalar_type &local_min) + { + min_reducer.join(local_min, x_1d(i)); + }, min_reducer); + + return minimum; + } + + + /// MPI task-local L1 norm of a vector + inline mag_type L1NormLocal(const vector_type& x) + { + using namespace Kokkos; + + const local_ordinal_type N = + static_cast(x.getLocalLength()); + if (x.need_sync()) + const_cast(x).sync(); + + auto x_2d = x.getLocalView(); + auto x_1d = Kokkos::subview (x_2d, Kokkos::ALL(), 0); + + mag_type sum = zero; + Kokkos::parallel_reduce ("L1NormLocal", Kokkos::RangePolicy(0, N), + KOKKOS_LAMBDA (const local_ordinal_type &i, mag_type &local_sum) + { + local_sum += std::abs(x_1d(i)); + }, sum); + + return sum; + } + + + /// MPI task-local weighted squared sum + inline mag_type WSqrSumLocal(const vector_type& x, + const vector_type& w) + { + const local_ordinal_type N = + static_cast(x.getLocalLength()); + if (x.need_sync()) + const_cast(x).sync(); + if (w.need_sync()) + const_cast(w).sync(); + + auto x_2d = x.getLocalView(); + auto x_1d = Kokkos::subview (x_2d, Kokkos::ALL(), 0); + auto w_2d = w.getLocalView(); + auto w_1d = Kokkos::subview (w_2d, Kokkos::ALL(), 0); + + mag_type sum = zero; + Kokkos::parallel_reduce ("WSqrSumLocal", Kokkos::RangePolicy(0, N), + KOKKOS_LAMBDA (const local_ordinal_type &i, mag_type &local_sum) + { + local_sum += x_1d(i)*w_1d(i)*(x_1d(i)*w_1d(i)); + }, sum); + + return sum; + } + + + /// MPI task-local weighted squared masked sum + inline mag_type WSqrSumMaskLocal(const vector_type& x, + const vector_type& w, + const vector_type& id) + { + const local_ordinal_type N = + static_cast(x.getLocalLength()); + if (x.need_sync()) + const_cast(x).sync(); + if (w.need_sync()) + const_cast(w).sync(); + if (id.need_sync()) + const_cast(id).sync(); + + auto x_2d = x.getLocalView(); + auto x_1d = Kokkos::subview (x_2d, Kokkos::ALL(), 0); + auto w_2d = w.getLocalView(); + auto w_1d = Kokkos::subview (w_2d, Kokkos::ALL(), 0); + auto id_2d = id.getLocalView(); + auto id_1d = Kokkos::subview (id_2d, Kokkos::ALL(), 0); + + mag_type sum = zero; + Kokkos::parallel_reduce ("WSqrSumMaskLocal", Kokkos::RangePolicy(0, N), + KOKKOS_LAMBDA (const local_ordinal_type &i, mag_type &local_sum) + { + if (id_1d(i) > zero) + local_sum += x_1d(i)*w_1d(i)*(x_1d(i)*w_1d(i)); + }, sum); + + return sum; + } + + + /// MPI task-local elementwise inverse, return false if any denominator is zero. + inline bool invTestLocal(const vector_type& x, + vector_type& z) + { + using namespace Kokkos; + + const local_ordinal_type N = + static_cast(x.getLocalLength()); + if (x.need_sync()) + const_cast(x).sync(); + + auto x_2d = x.getLocalView(); + auto x_1d = Kokkos::subview (x_2d, Kokkos::ALL(), 0); + auto z_2d = z.getLocalView(); + auto z_1d = Kokkos::subview (z_2d, Kokkos::ALL(), 0); + + scalar_type minimum; + Min min_reducer(minimum); + + z.modify(); + + Kokkos::parallel_reduce ("invTestLocal", Kokkos::RangePolicy(0, N), + KOKKOS_LAMBDA (const local_ordinal_type &i, scalar_type &local_min) + { + static constexpr scalar_type zero = 0; + static constexpr scalar_type one = 1.0; + if (x_1d(i) == zero) + { + min_reducer.join(local_min, zero); + } + else + { + z_1d(i) = one/x_1d(i); + } + }, min_reducer); + + return (minimum > half); + } + + + /// MPI task-local constraint violation check + inline bool constraintMaskLocal(const vector_type& c, + const vector_type& x, + vector_type& m) + { + const local_ordinal_type N = + static_cast(x.getLocalLength()); + if (c.need_sync()) + const_cast(c).sync(); + if (x.need_sync()) + const_cast(x).sync(); + + auto c_2d = c.getLocalView(); + auto c_1d = Kokkos::subview (c_2d, Kokkos::ALL(), 0); + auto x_2d = x.getLocalView(); + auto x_1d = Kokkos::subview (x_2d, Kokkos::ALL(), 0); + auto m_2d = m.getLocalView(); + auto m_1d = Kokkos::subview (m_2d, Kokkos::ALL(), 0); + + m.modify(); + + mag_type sum = zero; + Kokkos::parallel_reduce ("constraintMaskLocal", Kokkos::RangePolicy(0, N), + KOKKOS_LAMBDA (const local_ordinal_type &i, mag_type &local_sum) + { + const bool test = (std::abs(c_1d(i)) > onept5 && c_1d(i)*x_1d(i) <= zero) || + (std::abs(c_1d(i)) > half && c_1d(i)*x_1d(i) < zero); + m_1d(i) = test ? one : zero; + local_sum += m_1d(i); + }, sum); + + return (sum < half); + } + + + /// MPI task-local minimum quotient: min_i(num(i)/den(i)) + inline scalar_type minQuotientLocal(const vector_type& num, + const vector_type& den) + { + using namespace Kokkos; + + const local_ordinal_type N = + static_cast(num.getLocalLength()); + if (num.need_sync()) + const_cast(num).sync(); + if (den.need_sync()) + const_cast(den).sync(); + + auto num_2d = num.getLocalView(); + auto num_1d = Kokkos::subview (num_2d, Kokkos::ALL(), 0); + auto den_2d = den.getLocalView(); + auto den_1d = Kokkos::subview (den_2d, Kokkos::ALL(), 0); + + scalar_type minimum; + Min min_reducer(minimum); + + Kokkos::parallel_reduce ("minQuotient", Kokkos::RangePolicy(0, N), + KOKKOS_LAMBDA (const local_ordinal_type &i, scalar_type &local_min) + { + if (den_1d(i) != zero) + min_reducer.join(local_min, num_1d(i)/den_1d(i)); + }, min_reducer); + + return minimum; + } + + + } // namespace nvector_tpetra + +} // namespace trilinos +} // namespace sundials + +#endif // _TPETRA_SUNDIALS_VECTOR_KERNELS_HPP_ diff --git a/inst/include/sundials/sundials_band.h b/inst/include/sundials/sundials_band.h index c549d29..2ba469a 100644 --- a/inst/include/sundials/sundials_band.h +++ b/inst/include/sundials/sundials_band.h @@ -2,7 +2,7 @@ * Programmer(s): Alan C. Hindmarsh and Radu Serban @ LLNL * ----------------------------------------------------------------- * SUNDIALS Copyright Start - * Copyright (c) 2002-2019, Lawrence Livermore National Security + * Copyright (c) 2002-2022, Lawrence Livermore National Security * and Southern Methodist University. * All rights reserved. * @@ -12,12 +12,12 @@ * SUNDIALS Copyright End * ----------------------------------------------------------------- * This is the header file for a generic BAND linear solver - * package, based on the DlsMat type defined in sundials_direct.h. + * package, based on the SUNDlsMat type defined in sundials_direct.h. * * There are two sets of band solver routines listed in - * this file: one set uses type DlsMat defined below and the + * this file: one set uses type SUNDlsMat defined below and the * other set uses the type realtype ** for band matrix arguments. - * Routines that work with the type DlsMat begin with "Band". + * Routines that work with the type SUNDlsMat begin with "Band". * Routines that work with realtype ** begin with "band". * -----------------------------------------------------------------*/ @@ -32,12 +32,12 @@ extern "C" { /* * ----------------------------------------------------------------- - * Function : BandGBTRF + * Function: SUNDlsMat_BandGBTRF * ----------------------------------------------------------------- - * Usage : ier = BandGBTRF(A, p); + * Usage : ier = SUNDlsMat_BandGBTRF(A, p); * if (ier != 0) ... A is singular * ----------------------------------------------------------------- - * BandGBTRF performs the LU factorization of the N by N band + * SUNDlsMat_BandGBTRF performs the LU factorization of the N by N band * matrix A. This is done using standard Gaussian elimination * with partial pivoting. * @@ -45,7 +45,7 @@ extern "C" { * pivot array p with the following information: * * (1) p[k] contains the row number of the pivot element chosen - * at the beginning of elimination step k, k=0, 1, ..., N-1. + * at the beginning of elimination step k, k = 0, 1, ..., N-1. * * (2) If the unique LU factorization of A is given by PA = LU, * where P is a permutation matrix, L is a lower triangular @@ -54,7 +54,7 @@ extern "C" { * (including its diagonal) contains U and the strictly lower * triangular part of A contains the multipliers, I-L. * - * BandGBTRF returns 0 if successful. Otherwise it encountered + * SUNDlsMat_BandGBTRF returns 0 if successful. Otherwise it encountered * a zero diagonal element during the factorization. In this case * it returns the column index (numbered from one) at which * it encountered the zero. @@ -68,111 +68,168 @@ extern "C" { * call A = BandAllocMat(N,mu,ml,smu), where mu, ml, and smu are * as defined above. The user does not have to zero the "extra" * storage allocated for the purpose of factorization. This will - * handled by the BandGBTRF routine. + * handled by the SUNDlsMat_BandGBTRF routine. * - * BandGBTRF is only a wrapper around bandGBTRF. All work is done - * in bandGBTRF, which works directly on the data in the DlsMat A - * (i.e. in the field A->cols). + * SUNDlsMat_BandGBTRF is only a wrapper around SUNDlsMat_bandGBTRF. + * All work is done in SUNDlsMat_bandGBTRF, which works directly on the + * data in the SUNDlsMat A (i.e. in the field A->cols). * ----------------------------------------------------------------- */ -SUNDIALS_EXPORT sunindextype BandGBTRF(DlsMat A, sunindextype *p); -SUNDIALS_EXPORT sunindextype bandGBTRF(realtype **a, sunindextype n, - sunindextype mu, sunindextype ml, - sunindextype smu, sunindextype *p); +SUNDIALS_EXPORT +sunindextype SUNDlsMat_BandGBTRF(SUNDlsMat A, sunindextype* p); + +SUNDIALS_DEPRECATED_EXPORT_MSG("use SUNDlsMat_BandGBTRF instead") +sunindextype BandGBTRF(DlsMat A, sunindextype *p); + +SUNDIALS_EXPORT +sunindextype SUNDlsMat_bandGBTRF(realtype **a, sunindextype n, + sunindextype mu, sunindextype ml, + sunindextype smu, sunindextype *p); + +SUNDIALS_DEPRECATED_EXPORT_MSG("use SUNDlsMat_bandGBTRF instead") +sunindextype bandGBTRF(realtype **a, sunindextype n, + sunindextype mu, sunindextype ml, + sunindextype smu, sunindextype *p); /* * ----------------------------------------------------------------- - * Function : BandGBTRS + * Function: SUNDlsMat_BandGBTRS * ----------------------------------------------------------------- - * Usage : BandGBTRS(A, p, b); + * Usage: SUNDlsMat_BandGBTRS(A, p, b); * ----------------------------------------------------------------- - * BandGBTRS solves the N-dimensional system A x = b using - * the LU factorization in A and the pivot information in p - * computed in BandGBTRF. The solution x is returned in b. This - * routine cannot fail if the corresponding call to BandGBTRF - * did not fail. + * SUNDlsMat_BandGBTRS solves the N-dimensional system A x = b using + * the LU factorization in A and the pivot information in p computed + * in SUNDlsMat_BandGBTRF. The solution x is returned in b. This + * routine cannot fail if the corresponding call to + * SUNDlsMat_BandGBTRF did not fail. * - * BandGBTRS is only a wrapper around bandGBTRS which does all the - * work directly on the data in the DlsMat A (i.e. in A->cols). + * SUNDlsMat_BandGBTRS is only a wrapper around SUNDlsMat_bandGBTRS + * which does all the work directly on the data in the DlsMat A (i.e. + * in A->cols). * ----------------------------------------------------------------- */ -SUNDIALS_EXPORT void BandGBTRS(DlsMat A, sunindextype *p, realtype *b); -SUNDIALS_EXPORT void bandGBTRS(realtype **a, sunindextype n, sunindextype smu, - sunindextype ml, sunindextype *p, realtype *b); +SUNDIALS_EXPORT +void SUNDlsMat_BandGBTRS(SUNDlsMat A, sunindextype *p, realtype *b); + +SUNDIALS_DEPRECATED_EXPORT_MSG("use SUNDlsMat_BandGBTRS instead") +void BandGBTRS(DlsMat A, sunindextype *p, realtype *b); + +SUNDIALS_EXPORT +void SUNDlsMat_bandGBTRS(realtype **a, sunindextype n, sunindextype smu, + sunindextype ml, sunindextype *p, realtype *b); + +SUNDIALS_DEPRECATED_EXPORT_MSG("use SUNDlsMat_bandGBTRS instead") +void bandGBTRS(realtype **a, sunindextype n, sunindextype smu, + sunindextype ml, sunindextype *p, realtype *b); /* * ----------------------------------------------------------------- - * Function : BandCopy + * Function: SUNDlsMat_BandCopy * ----------------------------------------------------------------- - * Usage : BandCopy(A, B, copymu, copyml); + * Usage: SUNDlsMat_BandCopy(A, B, copymu, copyml); * ----------------------------------------------------------------- - * BandCopy copies the submatrix with upper and lower bandwidths - * copymu, copyml of the N by N band matrix A into the N by N - * band matrix B. + * SUNDlsMat_BandCopy copies the submatrix with upper and lower + * bandwidths copymu, copyml of the N by N band matrix A into the N by + * N band matrix B. * - * BandCopy is a wrapper around bandCopy which accesses the data - * in the DlsMat A and DlsMat B (i.e. the fields cols). + * SUNDlsMat_BandCopy is a wrapper around SUNDlsMat_bandCopy which + * accesses the data in the DlsMat A and DlsMat B (i.e. the fields + * cols). * ----------------------------------------------------------------- */ -SUNDIALS_EXPORT void BandCopy(DlsMat A, DlsMat B, sunindextype copymu, - sunindextype copyml); -SUNDIALS_EXPORT void bandCopy(realtype **a, realtype **b, sunindextype n, - sunindextype a_smu, sunindextype b_smu, - sunindextype copymu, sunindextype copyml); +SUNDIALS_EXPORT +void SUNDlsMat_BandCopy(SUNDlsMat A, SUNDlsMat B, sunindextype copymu, + sunindextype copyml); + +SUNDIALS_DEPRECATED_EXPORT_MSG("use SUNDlsMat_BandCopy instead") +void BandCopy(DlsMat A, DlsMat B, sunindextype copymu, + sunindextype copyml); + +SUNDIALS_EXPORT +void SUNDlsMat_bandCopy(realtype **a, realtype **b, sunindextype n, + sunindextype a_smu, sunindextype b_smu, + sunindextype copymu, sunindextype copyml); + +SUNDIALS_DEPRECATED_EXPORT_MSG("use SUNDlsMat_bandCopy instead") +void bandCopy(realtype **a, realtype **b, sunindextype n, + sunindextype a_smu, sunindextype b_smu, + sunindextype copymu, sunindextype copyml); /* * ----------------------------------------------------------------- - * Function: BandScale + * Function: SUNDlsMat_BandScale * ----------------------------------------------------------------- - * Usage : BandScale(c, A); + * Usage: SUNDlsMat_BandScale(c, A); * ----------------------------------------------------------------- - * A(i,j) <- c*A(i,j), j-(A->mu) <= i <= j+(A->ml). + * A(i,j) <- c*A(i,j), j-(A->mu) < = i < = j+(A->ml). * - * BandScale is a wrapper around bandScale which performs the actual - * scaling by accessing the data in the DlsMat A (i.e. the field - * A->cols). + * SUNDlsMat_BandScale is a wrapper around SUNDlsMat_bandScale which + * performs the actual scaling by accessing the data in the + * SUNDlsMat A (i.e. the field A->cols). * ----------------------------------------------------------------- */ -SUNDIALS_EXPORT void BandScale(realtype c, DlsMat A); -SUNDIALS_EXPORT void bandScale(realtype c, realtype **a, sunindextype n, - sunindextype mu, sunindextype ml, - sunindextype smu); +void SUNDlsMat_BandScale(realtype c, SUNDlsMat A); + +SUNDIALS_DEPRECATED_EXPORT_MSG("use SUNDlsMat_BandScale instead") +void BandScale(realtype c, DlsMat A); + +SUNDIALS_EXPORT +void SUNDlsMat_bandScale(realtype c, realtype **a, sunindextype n, + sunindextype mu, sunindextype ml, sunindextype smu); + +SUNDIALS_DEPRECATED_EXPORT_MSG("use SUNDlsMat_bandScale instead") +void bandScale(realtype c, realtype **a, sunindextype n, + sunindextype mu, sunindextype ml, sunindextype smu); /* * ----------------------------------------------------------------- - * Function: bandAddIdentity + * Function: SUNDlsMat_bandAddIdentity * ----------------------------------------------------------------- - * bandAddIdentity adds the identity matrix to the n-by-n matrix - * stored in the realtype** arrays. + * SUNDlsMat_bandAddIdentity adds the identity matrix to the n-by-n + * matrix stored in the realtype** arrays. * ----------------------------------------------------------------- */ -SUNDIALS_EXPORT void bandAddIdentity(realtype **a, sunindextype n, - sunindextype smu); +void SUNDlsMat_bandAddIdentity(realtype **a, sunindextype n, sunindextype smu); + +SUNDIALS_DEPRECATED_EXPORT_MSG("use SUNDlsMat_bandAddIdentity instead") +void bandAddIdentity(realtype **a, sunindextype n, sunindextype smu); /* * ----------------------------------------------------------------- - * Function: BandMatvec + * Function: SUNDlsMat_BandMatvec * ----------------------------------------------------------------- - * BandMatvec computes the matrix-vector product y = A*x, where A - * is an M-by-N band matrix, x is a vector of length N, and y is a - * vector of length M. No error checking is performed on the length - * of the arrays x and y. Only y is modified in this routine. + * SUNDlsMat_BandMatvec computes the matrix-vector product y = A*x, + * where A is an M-by-N band matrix, x is a vector of length N, and y + * is a vector of length M. No error checking is performed on the + * length of the arrays x and y. Only y is modified in this routine. * - * BandMatvec is a wrapper around bandMatvec which performs the - * actual product by accessing the data in the DlsMat A. + * SUNDlsMat_BandMatvec is a wrapper around SUNDlsMat_bandMatvec which + * performs the actual product by accessing the data in the SUNDlsMat + * A. * ----------------------------------------------------------------- */ -SUNDIALS_EXPORT void BandMatvec(DlsMat A, realtype *x, realtype *y); -SUNDIALS_EXPORT void bandMatvec(realtype **a, realtype *x, realtype *y, - sunindextype n, sunindextype mu, - sunindextype ml, sunindextype smu); +SUNDIALS_EXPORT +void SUNDlsMat_BandMatvec(SUNDlsMat A, realtype *x, realtype *y); + +SUNDIALS_DEPRECATED_EXPORT_MSG("use SUNDlsMat_BandMatvec instead") +void BandMatvec(DlsMat A, realtype *x, realtype *y); + +SUNDIALS_EXPORT +void SUNDlsMat_bandMatvec(realtype **a, realtype *x, realtype *y, + sunindextype n, sunindextype mu, + sunindextype ml, sunindextype smu); + +SUNDIALS_DEPRECATED_EXPORT_MSG("use SUNDlsMat_bandMatvec instead") +void bandMatvec(realtype **a, realtype *x, realtype *y, + sunindextype n, sunindextype mu, + sunindextype ml, sunindextype smu); #ifdef __cplusplus } diff --git a/inst/include/sundials/sundials_base.hpp b/inst/include/sundials/sundials_base.hpp new file mode 100644 index 0000000..753cfef --- /dev/null +++ b/inst/include/sundials/sundials_base.hpp @@ -0,0 +1,159 @@ +/* ----------------------------------------------------------------------------- + * Programmer(s): Cody J. Balos @ LLNL + * ----------------------------------------------------------------------------- + * SUNDIALS Copyright Start + * Copyright (c) 2002-2022, Lawrence Livermore National Security + * and Southern Methodist University. + * All rights reserved. + * + * See the top-level LICENSE and NOTICE files for details. + * + * SPDX-License-Identifier: BSD-3-Clause + * SUNDIALS Copyright End + * ----------------------------------------------------------------------------- + * Base classes for C++ implementations of SUNDIALS objects and wrappers (views) + * of SUNDIALS objects + * ---------------------------------------------------------------------------*/ + +#ifndef _SUNDIALS_BASE_HPP +#define _SUNDIALS_BASE_HPP + +#include +#include +#include + +namespace sundials { +namespace impl { + +// +// Common base class for C++ implementations of SUNDIALS data structures. +// +template +class BaseObject +{ +public: + BaseObject() = default; + + BaseObject(SUNContext sunctx) + : sunctx_(sunctx), object_(std::make_unique()), object_ops_(std::make_unique()) + { + object_->content = this; + object_->sunctx = sunctx_; + object_->ops = object_ops_.get(); + } + + // Move constructor + BaseObject(BaseObject&& other) noexcept + : sunctx_(std::move(other.sunctx_)), object_(std::move(other.object_)), object_ops_(std::move(other.object_ops_)) + { + object_->content = this; + object_->sunctx = sunctx_; + object_->ops = object_ops_.get(); + } + + // Copy constructor + BaseObject(const BaseObject& other) + : sunctx_(other.sunctx_), object_(std::make_unique()), + object_ops_(std::make_unique(*other.object_ops_)) + { + object_->content = this; + object_->sunctx = other.sunctx_; + object_->ops = object_ops_.get(); + } + + // Move assignment + BaseObject& operator=(BaseObject&& rhs) noexcept + { + sunctx_ = std::move(rhs.sunctx_); + object_ops_ = std::move(rhs.object_ops_); + object_ = std::move(rhs.object_); + object_->content = this; + object_->sunctx = sunctx_; + object_->ops = object_ops_.get(); + return *this; + } + + // Copy assignment + BaseObject& operator=(const BaseObject& rhs) + { + sunctx_ = rhs.sunctx_; + object_ops_ = std::make_unique(*rhs.object_ops_); + object_ = std::make_unique(); + object_->content = this; + object_->sunctx = sunctx_; + object_->ops = object_ops_.get(); + return *this; + } + + // We have a pure virtual destructor to make this an asbtract class + virtual ~BaseObject() = 0; + + // Getters + SUNContext sunctx() const + { + return this->object_->sunctx; + } + +protected: + // NOLINTNEXTLINE(cppcoreguidelines-non-private-member-variables-in-classes) + SUNContext sunctx_{}; + // NOLINTNEXTLINE(cppcoreguidelines-non-private-member-variables-in-classes) + std::unique_ptr object_; + // NOLINTNEXTLINE(cppcoreguidelines-non-private-member-variables-in-classes) + std::unique_ptr object_ops_; +}; + +// Pure virtual destructor requires implementation +template +BaseObject::~BaseObject() = default; + +} // namespace impl + +namespace experimental { + +template +class ClassView : public sundials::ConvertibleTo +{ +public: + ClassView() : object_(nullptr) {} + ClassView(T&& object) : object_(std::make_unique(object)) {} + + ClassView(const ClassView&) = delete; + ClassView(ClassView&& other) = default; + + ClassView& operator=(const ClassView&) = delete; + ClassView& operator=(ClassView&& rhs) = default; + + ~ClassView() + { + if (object_) { + Deleter{}(this->Convert()); + } + }; + + // Override ConvertibleTo functions + T Convert() override + { + return *object_.get(); + } + T Convert() const override + { + return *object_.get(); + } + operator T() override + { + return *object_.get(); + } + operator T() const override + { + return *object_.get(); + } + +private: + std::unique_ptr object_; +}; + +} // namespace experimental +} // namespace sundials + +#endif diff --git a/inst/include/sundials/sundials_config.h b/inst/include/sundials/sundials_config.h index 890ac5e..f054943 100644 --- a/inst/include/sundials/sundials_config.h +++ b/inst/include/sundials/sundials_config.h @@ -1,49 +1,59 @@ -/* ----------------------------------------------------------------- - * Programmer(s): Aaron Collier and Radu Serban @ LLNL +/* ----------------------------------------------------------------- + * Programmer(s): Cody J. Balos, Aaron Collier and Radu Serban @ LLNL * ----------------------------------------------------------------- - * LLNS/SMU Copyright Start - * Copyright (c) 2002-2018, Southern Methodist University and - * Lawrence Livermore National Security + * SUNDIALS Copyright Start + * Copyright (c) 2002-2022, Lawrence Livermore National Security + * and Southern Methodist University. + * All rights reserved. * - * This work was performed under the auspices of the U.S. Department - * of Energy by Southern Methodist University and Lawrence Livermore - * National Laboratory under Contract DE-AC52-07NA27344. - * Produced at Southern Methodist University and the Lawrence - * Livermore National Laboratory. + * See the top-level LICENSE and NOTICE files for details. * - * All rights reserved. - * For details, see the LICENSE file. - * LLNS/SMU Copyright End + * SPDX-License-Identifier: BSD-3-Clause + * SUNDIALS Copyright End * ----------------------------------------------------------------- - * SUNDIALS configuration header file + * SUNDIALS configuration header file. * -----------------------------------------------------------------*/ -/* Define SUNDIALS version numbers */ -#define SUNDIALS_VERSION "5.0.0-dev.2" -#define SUNDIALS_VERSION_MAJOR 5 -#define SUNDIALS_VERSION_MINOR 0 -#define SUNDIALS_VERSION_PATCH 0 -#define SUNDIALS_VERSION_LABEL "dev.2" +#ifndef _SUNDIALS_CONFIG_H +#define _SUNDIALS_CONFIG_H -/* FCMIX: Define Fortran name-mangling macro for C identifiers. - * Depending on the inferred scheme, one of the following six - * macros will be defined: - * #define SUNDIALS_F77_FUNC(name,NAME) name - * #define SUNDIALS_F77_FUNC(name,NAME) name ## _ - * #define SUNDIALS_F77_FUNC(name,NAME) name ## __ - * #define SUNDIALS_F77_FUNC(name,NAME) NAME - * #define SUNDIALS_F77_FUNC(name,NAME) NAME ## _ - * #define SUNDIALS_F77_FUNC(name,NAME) NAME ## __ - */ +#include "sundials/sundials_export.h" +#ifndef SUNDIALS_DEPRECATED_MSG +# define SUNDIALS_DEPRECATED_MSG(msg) __attribute__ ((__deprecated__(msg))) +#endif -/* FCMIX: Define Fortran name-mangling macro for C identifiers - * which contain underscores. - */ +#ifndef SUNDIALS_DEPRECATED_EXPORT_MSG +# define SUNDIALS_DEPRECATED_EXPORT_MSG(msg) SUNDIALS_EXPORT SUNDIALS_DEPRECATED_MSG(msg) +#endif + +#ifndef SUNDIALS_DEPRECATED_NO_EXPORT_MSG +# define SUNDIALS_DEPRECATED_NO_EXPORT_MSG(msg) SUNDIALS_NO_EXPORT SUNDIALS_DEPRECATED_MSG(msg) +#endif + +/* ------------------------------------------------------------------ + * Define SUNDIALS version numbers + * -----------------------------------------------------------------*/ -/* Define precision of SUNDIALS data type 'realtype' - * Depending on the precision level, one of the following +#define SUNDIALS_VERSION "6.5.0" +#define SUNDIALS_VERSION_MAJOR 6 +#define SUNDIALS_VERSION_MINOR 5 +#define SUNDIALS_VERSION_PATCH 0 +#define SUNDIALS_VERSION_LABEL "" +#define SUNDIALS_GIT_VERSION "" + + +/* ------------------------------------------------------------------ + * SUNDIALS build information + * -----------------------------------------------------------------*/ + +#define SUNDIALS_C_COMPILER_HAS_MATH_PRECISIONS +#define SUNDIALS_C_COMPILER_HAS_ISINF_ISNAN +#define SUNDIALS_C_COMPILER_HAS_INLINE + +/* Define precision of SUNDIALS data type 'realtype' + * Depending on the precision level, one of the following * three macros will be defined: * #define SUNDIALS_SINGLE_PRECISION 1 * #define SUNDIALS_DOUBLE_PRECISION 1 @@ -51,8 +61,8 @@ */ #define SUNDIALS_DOUBLE_PRECISION 1 -/* Define type of vector indices in SUNDIALS 'sunindextype'. - * Depending on user choice of index type, one of the following +/* Define type of vector indices in SUNDIALS 'sunindextype'. + * Depending on user choice of index type, one of the following * two macros will be defined: * #define SUNDIALS_INT64_T 1 * #define SUNDIALS_INT32_T 1 @@ -64,72 +74,155 @@ */ #define SUNDIALS_INDEX_TYPE int32_t -/* Use generic math functions - * If it was decided that generic math functions can be used, then - * #define SUNDIALS_USE_GENERIC_MATH +/* Use std-c math functions + * DEPRECATED SUNDIALS_USE_GENERIC_MATH */ -#define SUNDIALS_USE_GENERIC_MATH +/* #undef SUNDIALS_USE_GENERIC_MATH */ /* Use POSIX timers if available. * #define SUNDIALS_HAVE_POSIX_TIMERS */ #define SUNDIALS_HAVE_POSIX_TIMERS -/* Blas/Lapack available - * If working libraries for Blas/lapack support were found, then - * #define SUNDIALS_BLAS_LAPACK - */ -/* #undef SUNDIALS_BLAS_LAPACK */ +/* BUILD CVODE with fused kernel functionality */ +/* #undef SUNDIALS_BUILD_PACKAGE_FUSED_KERNELS */ -/* SUPERLUMT available - * If working libraries for SUPERLUMT support were found, then - * #define SUNDIALS_SUPERLUMT - */ -/* #undef SUNDIALS_SUPERLUMT */ -/* #undef SUNDIALS_SUPERLUMT_THREAD_TYPE */ +/* BUILD SUNDIALS with monitoring functionalities */ +/* #undef SUNDIALS_BUILD_WITH_MONITORING */ -/* SUPERLUDIST available - * If working libraries for SUPERLUDIST support were found, then - * #define SUNDIALS_SUPERLUDIST - */ -/* #undef SUNDIALS_SUPERLUDIST */ +/* BUILD SUNDIALS with profiling functionalities */ +/* #undef SUNDIALS_BUILD_WITH_PROFILING */ -/* KLU available - * If working libraries for KLU support were found, then - * #define SUNDIALS_KLU - */ -/* #undef SUNDIALS_KLU */ +/* BUILD SUNDIALS with logging functionalities */ +#define SUNDIALS_LOGGING_LEVEL 0 + +/* BUILD SUNDIALS with MPI-enabled logging */ +/* #undef SUNDIALS_LOGGING_ENABLE_MPI */ + +/* Is snprintf available? */ +#define SUNDIALS_C_COMPILER_HAS_SNPRINTF_AND_VA_COPY +#ifndef SUNDIALS_C_COMPILER_HAS_SNPRINTF_AND_VA_COPY +#define SUNDIALS_MAX_SPRINTF_SIZE +#endif + +/* ------------------------------------------------------------------ + * SUNDIALS TPL macros + * -----------------------------------------------------------------*/ -/* Trilinos available - * If working libraries for Trilinos support were found, then - * #define SUNDIALS_TRILINOS +/* Caliper */ +/* #undef SUNDIALS_CALIPER_ENABLED */ + +/* Ginkgo backends */ +/* #undef SUNDIALS_GINKGO_BACKENDS_CUDA */ +/* #undef SUNDIALS_GINKGO_BACKENDS_HIP */ +/* #undef SUNDIALS_GINKGO_BACKENDS_OMP */ +/* #undef SUNDIALS_GINKGO_BACKENDS_REF */ +/* #undef SUNDIALS_GINKGO_BACKENDS_DPCPP */ + +/* MAGMA backends */ +/* #undef SUNDIALS_MAGMA_BACKENDS_CUDA */ +/* #undef SUNDIALS_MAGMA_BACKENDS_HIP */ + +/* Set if SUNDIALS is built with MPI support, then + * #define SUNDIALS_MPI_ENABLED 1 + * otherwise + * #define SUNDIALS_MPI_ENABLED 0 */ -/* #undef SUNDIALS_TRILINOS */ +#define SUNDIALS_MPI_ENABLED 0 + + /* SUPERLUMT threading type */ +/* #undef SUNDIALS_SUPERLUMT_THREAD_TYPE */ /* Trilinos with MPI is available, then * #define SUNDIALS_TRILINOS_HAVE_MPI */ /* #undef SUNDIALS_TRILINOS_HAVE_MPI */ -/* Set if SUNDIALS is built with MPI support. - * +/* RAJA backends */ +/* #undef SUNDIALS_RAJA_BACKENDS_CUDA */ +/* #undef SUNDIALS_RAJA_BACKENDS_HIP */ +/* #undef SUNDIALS_RAJA_BACKENDS_SYCL */ + +/* ------------------------------------------------------------------ + * SUNDIALS modules enabled + * -----------------------------------------------------------------*/ + +#define SUNDIALS_CVODES 1 +#define SUNDIALS_NVECTOR_SERIAL 1 +#define SUNDIALS_NVECTOR_MANYVECTOR 1 +#define SUNDIALS_SUNMATRIX_BAND 1 +#define SUNDIALS_SUNMATRIX_DENSE 1 +#define SUNDIALS_SUNMATRIX_SPARSE 1 +#define SUNDIALS_SUNLINSOL_BAND 1 +#define SUNDIALS_SUNLINSOL_DENSE 1 +#define SUNDIALS_SUNLINSOL_PCG 1 +#define SUNDIALS_SUNLINSOL_SPBCGS 1 +#define SUNDIALS_SUNLINSOL_SPFGMR 1 +#define SUNDIALS_SUNLINSOL_SPGMR 1 +#define SUNDIALS_SUNLINSOL_SPTFQMR 1 +#define SUNDIALS_SUNNONLINSOL_NEWTON 1 +#define SUNDIALS_SUNNONLINSOL_FIXEDPOINT 1 + + + +/* ------------------------------------------------------------------ + * SUNDIALS fortran configuration + * -----------------------------------------------------------------*/ + + +/* Define Fortran name-mangling macro for C identifiers. + * Depending on the inferred scheme, one of the following six + * macros will be defined: + * #define SUNDIALS_F77_FUNC(name,NAME) name + * #define SUNDIALS_F77_FUNC(name,NAME) name ## _ + * #define SUNDIALS_F77_FUNC(name,NAME) name ## __ + * #define SUNDIALS_F77_FUNC(name,NAME) NAME + * #define SUNDIALS_F77_FUNC(name,NAME) NAME ## _ + * #define SUNDIALS_F77_FUNC(name,NAME) NAME ## __ */ -/* FNVECTOR: Allow user to specify different MPI communicator +/* Define Fortran name-mangling macro for C identifiers + * which contain underscores. + */ + + +/* Allow user to specify different MPI communicator * If it was found that the MPI implementation supports MPI_Comm_f2c, then * #define SUNDIALS_MPI_COMM_F2C 1 * otherwise * #define SUNDIALS_MPI_COMM_F2C 0 */ -#define SUNDIALS_MPI_COMM_F2C 0 - -/* Mark SUNDIALS API functions for export/import - * When building shared SUNDIALS libraries under Windows, use - * #define SUNDIALS_EXPORT __declspec(dllexport) - * When linking to shared SUNDIALS libraries under Windows, use - * #define SUNDIALS_EXPORT __declspec(dllimport) - * In all other cases (other platforms or static libraries under - * Windows), the SUNDIALS_EXPORT macro is empty + + + +/* ------------------------------------------------------------------ + * SUNDIALS inline macros. + * -----------------------------------------------------------------*/ + + +/* Mark SUNDIALS function as inline. + */ +#ifndef SUNDIALS_CXX_INLINE +#define SUNDIALS_CXX_INLINE inline +#endif + +#ifndef SUNDIALS_C_INLINE +#ifdef SUNDIALS_C_COMPILER_HAS_INLINE +#define SUNDIALS_C_INLINE inline +#else +#define SUNDIALS_C_INLINE +#endif +#endif + +#ifdef __cplusplus +#define SUNDIALS_INLINE SUNDIALS_CXX_INLINE +#else +#define SUNDIALS_INLINE SUNDIALS_C_INLINE +#endif + +/* Mark SUNDIALS function as static inline. */ -#define SUNDIALS_EXPORT +#define SUNDIALS_STATIC_INLINE static SUNDIALS_INLINE + +#endif /* _SUNDIALS_CONFIG_H */ diff --git a/inst/include/sundials/sundials_config.in b/inst/include/sundials/sundials_config.in index 604518e..c21737f 100644 --- a/inst/include/sundials/sundials_config.in +++ b/inst/include/sundials/sundials_config.in @@ -1,49 +1,59 @@ -/* ----------------------------------------------------------------- - * Programmer(s): Aaron Collier and Radu Serban @ LLNL +/* ----------------------------------------------------------------- + * Programmer(s): Cody J. Balos, Aaron Collier and Radu Serban @ LLNL * ----------------------------------------------------------------- - * LLNS/SMU Copyright Start - * Copyright (c) 2002-2018, Southern Methodist University and - * Lawrence Livermore National Security + * SUNDIALS Copyright Start + * Copyright (c) 2002-2022, Lawrence Livermore National Security + * and Southern Methodist University. + * All rights reserved. * - * This work was performed under the auspices of the U.S. Department - * of Energy by Southern Methodist University and Lawrence Livermore - * National Laboratory under Contract DE-AC52-07NA27344. - * Produced at Southern Methodist University and the Lawrence - * Livermore National Laboratory. + * See the top-level LICENSE and NOTICE files for details. * - * All rights reserved. - * For details, see the LICENSE file. - * LLNS/SMU Copyright End + * SPDX-License-Identifier: BSD-3-Clause + * SUNDIALS Copyright End * ----------------------------------------------------------------- - * SUNDIALS configuration header file + * SUNDIALS configuration header file. + * -----------------------------------------------------------------*/ + +#ifndef _SUNDIALS_CONFIG_H +#define _SUNDIALS_CONFIG_H + +#include "sundials/sundials_export.h" + +#ifndef SUNDIALS_DEPRECATED_MSG +# define SUNDIALS_DEPRECATED_MSG(msg) @SUNDIALS_DEPRECATED_MSG_MACRO@ +#endif + +#ifndef SUNDIALS_DEPRECATED_EXPORT_MSG +# define SUNDIALS_DEPRECATED_EXPORT_MSG(msg) SUNDIALS_EXPORT SUNDIALS_DEPRECATED_MSG(msg) +#endif + +#ifndef SUNDIALS_DEPRECATED_NO_EXPORT_MSG +# define SUNDIALS_DEPRECATED_NO_EXPORT_MSG(msg) SUNDIALS_NO_EXPORT SUNDIALS_DEPRECATED_MSG(msg) +#endif + +/* ------------------------------------------------------------------ + * Define SUNDIALS version numbers * -----------------------------------------------------------------*/ -/* Define SUNDIALS version numbers */ + #define SUNDIALS_VERSION "@PACKAGE_VERSION@" #define SUNDIALS_VERSION_MAJOR @PACKAGE_VERSION_MAJOR@ #define SUNDIALS_VERSION_MINOR @PACKAGE_VERSION_MINOR@ #define SUNDIALS_VERSION_PATCH @PACKAGE_VERSION_PATCH@ #define SUNDIALS_VERSION_LABEL "@PACKAGE_VERSION_LABEL@" +#define SUNDIALS_GIT_VERSION "@SUNDIALS_GIT_VERSION@" -/* FCMIX: Define Fortran name-mangling macro for C identifiers. - * Depending on the inferred scheme, one of the following six - * macros will be defined: - * #define SUNDIALS_F77_FUNC(name,NAME) name - * #define SUNDIALS_F77_FUNC(name,NAME) name ## _ - * #define SUNDIALS_F77_FUNC(name,NAME) name ## __ - * #define SUNDIALS_F77_FUNC(name,NAME) NAME - * #define SUNDIALS_F77_FUNC(name,NAME) NAME ## _ - * #define SUNDIALS_F77_FUNC(name,NAME) NAME ## __ - */ -@F77_MANGLE_MACRO1@ -/* FCMIX: Define Fortran name-mangling macro for C identifiers - * which contain underscores. - */ -@F77_MANGLE_MACRO2@ +/* ------------------------------------------------------------------ + * SUNDIALS build information + * -----------------------------------------------------------------*/ -/* Define precision of SUNDIALS data type 'realtype' - * Depending on the precision level, one of the following +#cmakedefine SUNDIALS_C_COMPILER_HAS_MATH_PRECISIONS +#cmakedefine SUNDIALS_C_COMPILER_HAS_ISINF_ISNAN +#cmakedefine SUNDIALS_C_COMPILER_HAS_INLINE + +/* Define precision of SUNDIALS data type 'realtype' + * Depending on the precision level, one of the following * three macros will be defined: * #define SUNDIALS_SINGLE_PRECISION 1 * #define SUNDIALS_DOUBLE_PRECISION 1 @@ -51,8 +61,8 @@ */ @PRECISION_LEVEL@ -/* Define type of vector indices in SUNDIALS 'sunindextype'. - * Depending on user choice of index type, one of the following +/* Define type of vector indices in SUNDIALS 'sunindextype'. + * Depending on user choice of index type, one of the following * two macros will be defined: * #define SUNDIALS_INT64_T 1 * #define SUNDIALS_INT32_T 1 @@ -64,9 +74,8 @@ */ #define SUNDIALS_INDEX_TYPE @SUNDIALS_CINDEX_TYPE@ -/* Use generic math functions - * If it was decided that generic math functions can be used, then - * #define SUNDIALS_USE_GENERIC_MATH +/* Use std-c math functions + * DEPRECATED SUNDIALS_USE_GENERIC_MATH */ #cmakedefine SUNDIALS_USE_GENERIC_MATH @@ -75,48 +84,95 @@ */ #cmakedefine SUNDIALS_HAVE_POSIX_TIMERS -/* Blas/Lapack available - * If working libraries for Blas/lapack support were found, then - * #define SUNDIALS_BLAS_LAPACK - */ -#cmakedefine SUNDIALS_BLAS_LAPACK +/* BUILD CVODE with fused kernel functionality */ +#cmakedefine SUNDIALS_BUILD_PACKAGE_FUSED_KERNELS -/* SUPERLUMT available - * If working libraries for SUPERLUMT support were found, then - * #define SUNDIALS_SUPERLUMT - */ -#cmakedefine SUNDIALS_SUPERLUMT -#cmakedefine SUNDIALS_SUPERLUMT_THREAD_TYPE "@SUPERLUMT_THREAD_TYPE@" +/* BUILD SUNDIALS with monitoring functionalities */ +#cmakedefine SUNDIALS_BUILD_WITH_MONITORING -/* SUPERLUDIST available - * If working libraries for SUPERLUDIST support were found, then - * #define SUNDIALS_SUPERLUDIST - */ -#cmakedefine SUNDIALS_SUPERLUDIST +/* BUILD SUNDIALS with profiling functionalities */ +#cmakedefine SUNDIALS_BUILD_WITH_PROFILING -/* KLU available - * If working libraries for KLU support were found, then - * #define SUNDIALS_KLU - */ -#cmakedefine SUNDIALS_KLU +/* BUILD SUNDIALS with logging functionalities */ +#define SUNDIALS_LOGGING_LEVEL @SUNDIALS_LOGGING_LEVEL@ + +/* BUILD SUNDIALS with MPI-enabled logging */ +#cmakedefine SUNDIALS_LOGGING_ENABLE_MPI + +/* Is snprintf available? */ +#cmakedefine SUNDIALS_C_COMPILER_HAS_SNPRINTF_AND_VA_COPY +#ifndef SUNDIALS_C_COMPILER_HAS_SNPRINTF_AND_VA_COPY +#define SUNDIALS_MAX_SPRINTF_SIZE @SUNDIALS_MAX_SPRINTF_SIZE@ +#endif + +/* ------------------------------------------------------------------ + * SUNDIALS TPL macros + * -----------------------------------------------------------------*/ -/* Trilinos available - * If working libraries for Trilinos support were found, then - * #define SUNDIALS_TRILINOS +/* Caliper */ +#cmakedefine SUNDIALS_CALIPER_ENABLED + +/* Ginkgo backends */ +#cmakedefine SUNDIALS_GINKGO_BACKENDS_CUDA +#cmakedefine SUNDIALS_GINKGO_BACKENDS_HIP +#cmakedefine SUNDIALS_GINKGO_BACKENDS_OMP +#cmakedefine SUNDIALS_GINKGO_BACKENDS_REF +#cmakedefine SUNDIALS_GINKGO_BACKENDS_DPCPP + +/* MAGMA backends */ +#cmakedefine SUNDIALS_MAGMA_BACKENDS_CUDA +#cmakedefine SUNDIALS_MAGMA_BACKENDS_HIP + +/* Set if SUNDIALS is built with MPI support, then + * #define SUNDIALS_MPI_ENABLED 1 + * otherwise + * #define SUNDIALS_MPI_ENABLED 0 */ -#cmakedefine SUNDIALS_TRILINOS +#cmakedefine01 SUNDIALS_MPI_ENABLED + + /* SUPERLUMT threading type */ +#cmakedefine SUNDIALS_SUPERLUMT_THREAD_TYPE "@SUPERLUMT_THREAD_TYPE@" /* Trilinos with MPI is available, then * #define SUNDIALS_TRILINOS_HAVE_MPI */ #cmakedefine SUNDIALS_TRILINOS_HAVE_MPI -/* Set if SUNDIALS is built with MPI support. - * +/* RAJA backends */ +#cmakedefine SUNDIALS_RAJA_BACKENDS_CUDA +#cmakedefine SUNDIALS_RAJA_BACKENDS_HIP +#cmakedefine SUNDIALS_RAJA_BACKENDS_SYCL + +/* ------------------------------------------------------------------ + * SUNDIALS modules enabled + * -----------------------------------------------------------------*/ + +@SUNDIALS_CONFIGH_BUILDS@ + + +/* ------------------------------------------------------------------ + * SUNDIALS fortran configuration + * -----------------------------------------------------------------*/ + + +/* Define Fortran name-mangling macro for C identifiers. + * Depending on the inferred scheme, one of the following six + * macros will be defined: + * #define SUNDIALS_F77_FUNC(name,NAME) name + * #define SUNDIALS_F77_FUNC(name,NAME) name ## _ + * #define SUNDIALS_F77_FUNC(name,NAME) name ## __ + * #define SUNDIALS_F77_FUNC(name,NAME) NAME + * #define SUNDIALS_F77_FUNC(name,NAME) NAME ## _ + * #define SUNDIALS_F77_FUNC(name,NAME) NAME ## __ + */ +@F77_MANGLE_MACRO1@ + +/* Define Fortran name-mangling macro for C identifiers + * which contain underscores. */ -@IS_MPI_ENABLED@ +@F77_MANGLE_MACRO2@ -/* FNVECTOR: Allow user to specify different MPI communicator +/* Allow user to specify different MPI communicator * If it was found that the MPI implementation supports MPI_Comm_f2c, then * #define SUNDIALS_MPI_COMM_F2C 1 * otherwise @@ -124,12 +180,34 @@ */ @F77_MPI_COMM_F2C@ -/* Mark SUNDIALS API functions for export/import - * When building shared SUNDIALS libraries under Windows, use - * #define SUNDIALS_EXPORT __declspec(dllexport) - * When linking to shared SUNDIALS libraries under Windows, use - * #define SUNDIALS_EXPORT __declspec(dllimport) - * In all other cases (other platforms or static libraries under - * Windows), the SUNDIALS_EXPORT macro is empty + +/* ------------------------------------------------------------------ + * SUNDIALS inline macros. + * -----------------------------------------------------------------*/ + + +/* Mark SUNDIALS function as inline. */ -@SUNDIALS_EXPORT@ +#ifndef SUNDIALS_CXX_INLINE +#define SUNDIALS_CXX_INLINE inline +#endif + +#ifndef SUNDIALS_C_INLINE +#ifdef SUNDIALS_C_COMPILER_HAS_INLINE +#define SUNDIALS_C_INLINE inline +#else +#define SUNDIALS_C_INLINE +#endif +#endif + +#ifdef __cplusplus +#define SUNDIALS_INLINE SUNDIALS_CXX_INLINE +#else +#define SUNDIALS_INLINE SUNDIALS_C_INLINE +#endif + +/* Mark SUNDIALS function as static inline. + */ +#define SUNDIALS_STATIC_INLINE static SUNDIALS_INLINE + +#endif /* _SUNDIALS_CONFIG_H */ diff --git a/inst/include/sundials/sundials_context.h b/inst/include/sundials/sundials_context.h new file mode 100644 index 0000000..807f1a3 --- /dev/null +++ b/inst/include/sundials/sundials_context.h @@ -0,0 +1,47 @@ +/* ----------------------------------------------------------------- + * Programmer(s): Cody J. Balos @ LLNL + * ----------------------------------------------------------------- + * SUNDIALS Copyright Start + * Copyright (c) 2002-2022, Lawrence Livermore National Security + * and Southern Methodist University. + * All rights reserved. + * + * See the top-level LICENSE and NOTICE files for details. + * + * SPDX-License-Identifier: BSD-3-Clause + * SUNDIALS Copyright End + * ----------------------------------------------------------------- + * SUNDIALS context class. A context object holds data that all + * SUNDIALS objects in a simulation share. It is thread-safe provided + * that each thread has its own context object. + * ----------------------------------------------------------------*/ + +#ifndef _SUNDIALS_CONTEXT_H +#define _SUNDIALS_CONTEXT_H + +#include "sundials/sundials_logger.h" +#include "sundials/sundials_profiler.h" +#include "sundials/sundials_types.h" + +#ifdef __cplusplus /* wrapper to enable C++ usage */ +extern "C" { +#endif + +typedef struct _SUNContext* SUNContext; + +SUNDIALS_EXPORT int SUNContext_Create(void* comm, SUNContext* ctx); +SUNDIALS_EXPORT int SUNContext_GetProfiler(SUNContext sunctx, SUNProfiler* profiler); +SUNDIALS_EXPORT int SUNContext_SetProfiler(SUNContext sunctx, SUNProfiler profiler); +SUNDIALS_EXPORT int SUNContext_GetLogger(SUNContext sunctx, SUNLogger* logger); +SUNDIALS_EXPORT int SUNContext_SetLogger(SUNContext sunctx, SUNLogger logger); +SUNDIALS_EXPORT int SUNContext_Free(SUNContext* ctx); + +#ifdef __cplusplus +} + +/* We include this here for backwards compatibility + (the contents used to be defined here directly) */ +#include + +#endif +#endif diff --git a/inst/include/sundials/sundials_context.hpp b/inst/include/sundials/sundials_context.hpp new file mode 100644 index 0000000..f4cba8e --- /dev/null +++ b/inst/include/sundials/sundials_context.hpp @@ -0,0 +1,71 @@ +/* ----------------------------------------------------------------------------- + * Programmer(s): Cody J. Balos @ LLNL + * ----------------------------------------------------------------------------- + * SUNDIALS Copyright Start + * Copyright (c) 2002-2022, Lawrence Livermore National Security + * and Southern Methodist University. + * All rights reserved. + * + * See the top-level LICENSE and NOTICE files for details. + * + * SPDX-License-Identifier: BSD-3-Clause + * SUNDIALS Copyright End + * ----------------------------------------------------------------------------- + * C++ interface to the SUNDIALS context object + * ---------------------------------------------------------------------------*/ + +#ifndef _SUNDIALS_CONTEXT_HPP +#define _SUNDIALS_CONTEXT_HPP + +#include +#include +#include + +namespace sundials { + +class Context : public sundials::ConvertibleTo +{ +public: + explicit Context(void* comm = nullptr) + { + sunctx_ = std::make_unique(); + SUNContext_Create(comm, sunctx_.get()); + } + + /* disallow copy, but allow move construction */ + Context(const Context&) = delete; + Context(Context&&) = default; + + /* disallow copy, but allow move operators */ + Context& operator=(const Context&) = delete; + Context& operator=(Context&&) = default; + + SUNContext Convert() override + { + return *sunctx_.get(); + } + SUNContext Convert() const override + { + return *sunctx_.get(); + } + operator SUNContext() override + { + return *sunctx_.get(); + } + operator SUNContext() const override + { + return *sunctx_.get(); + } + + ~Context() + { + if (sunctx_) SUNContext_Free(sunctx_.get()); + } + +private: + std::unique_ptr sunctx_; +}; + +} // namespace sundials + +#endif // _SUNDIALS_CONTEXT_HPP diff --git a/inst/include/sundials/sundials_context_impl.h b/inst/include/sundials/sundials_context_impl.h new file mode 100644 index 0000000..28083aa --- /dev/null +++ b/inst/include/sundials/sundials_context_impl.h @@ -0,0 +1,39 @@ +/* ----------------------------------------------------------------- + * Programmer(s): Cody J. Balos @ LLNL + * ----------------------------------------------------------------- + * SUNDIALS Copyright Start + * Copyright (c) 2002-2022, Lawrence Livermore National Security + * and Southern Methodist University. + * All rights reserved. + * + * See the top-level LICENSE and NOTICE files for details. + * + * SPDX-License-Identifier: BSD-3-Clause + * SUNDIALS Copyright End + * ----------------------------------------------------------------- + * SUNDIALS context class implementation. + * ----------------------------------------------------------------*/ + +#ifndef _SUNDIALS_CONTEXT_IMPL_H +#define _SUNDIALS_CONTEXT_IMPL_H + +#include +#include +#include +#include + +#ifdef __cplusplus /* wrapper to enable C++ usage */ +extern "C" { +#endif + +struct _SUNContext { + SUNProfiler profiler; + booleantype own_profiler; + SUNLogger logger; + booleantype own_logger; +}; + +#ifdef __cplusplus +} +#endif +#endif diff --git a/inst/include/sundials/sundials_convertibleto.hpp b/inst/include/sundials/sundials_convertibleto.hpp new file mode 100644 index 0000000..ad50a11 --- /dev/null +++ b/inst/include/sundials/sundials_convertibleto.hpp @@ -0,0 +1,39 @@ +/* ----------------------------------------------------------------------------- + * Programmer(s): Cody J. Balos @ LLNL + * ----------------------------------------------------------------------------- + * SUNDIALS Copyright Start + * Copyright (c) 2002-2022, Lawrence Livermore National Security + * and Southern Methodist University. + * All rights reserved. + * + * See the top-level LICENSE and NOTICE files for details. + * + * SPDX-License-Identifier: BSD-3-Clause + * SUNDIALS Copyright End + * ----------------------------------------------------------------------------- + * Base class for converting C++ wappers (views) to SUNDIALS objects + * ---------------------------------------------------------------------------*/ + +#ifndef _SUNDIALS_CONVERTIBLETO_HPP +#define _SUNDIALS_CONVERTIBLETO_HPP + +namespace sundials { + +template +class ConvertibleTo +{ +public: + // Explicit conversion to the underlying type + virtual T Convert() = 0; + virtual T Convert() const = 0; + + // Implicit conversion to the underlying type + virtual operator T() = 0; + virtual operator T() const = 0; + + virtual ~ConvertibleTo() = default; +}; + +} // namespace sundials + +#endif // _SUNDIALS_CONVERTIBLETO_HPP diff --git a/inst/include/sundials/sundials_cuda.h b/inst/include/sundials/sundials_cuda.h new file mode 100644 index 0000000..207d020 --- /dev/null +++ b/inst/include/sundials/sundials_cuda.h @@ -0,0 +1,116 @@ +/* + * ----------------------------------------------------------------- + * Programmer(s): Cody J. Balos @ LLNL + * ----------------------------------------------------------------- + * SUNDIALS Copyright Start + * Copyright (c) 2002-2022, Lawrence Livermore National Security + * and Southern Methodist University. + * All rights reserved. + * + * See the top-level LICENSE and NOTICE files for details. + * + * SPDX-License-Identifier: BSD-3-Clause + * SUNDIALS Copyright End + * ----------------------------------------------------------------- + * This header files defines internal utility functions and macros + * for working with CUDA. + * ----------------------------------------------------------------- + */ + +#include +#include + +#include +#include +#include + +#include + +#ifndef _SUNDIALS_CUDA_H +#define _SUNDIALS_CUDA_H + +#ifdef __cplusplus /* wrapper to enable C++ usage */ +extern "C" { +#endif + +/* --------------------------------------------------------------------------- + * Utility macros + * ---------------------------------------------------------------------------*/ + +#define SUNDIALS_CUDA_VERIFY(cuerr) SUNDIALS_CUDA_Assert(cuerr, __FILE__, __LINE__) +#define SUNDIALS_CUSPARSE_VERIFY(cuerr) SUNDIALS_CUSPARSE_Assert(cuerr, __FILE__, __LINE__) +#define SUNDIALS_CUSOLVER_VERIFY(cuerr) SUNDIALS_CUSOLVER_Assert(cuerr, __FILE__, __LINE__) + +#define SUNDIALS_KERNEL_NAME(...) __VA_ARGS__ +#ifndef SUNDIALS_DEBUG_CUDA_LASTERROR +#define SUNDIALS_LAUNCH_KERNEL(kernel, gridDim, blockDim, shMem, stream, ...) \ +{ kernel<<>>(__VA_ARGS__); } +#else +#define SUNDIALS_LAUNCH_KERNEL(kernel, gridDim, blockDim, shMem, stream, ...) \ +{ \ + kernel<<>>(__VA_ARGS__); \ + cudaDeviceSynchronize(); \ + SUNDIALS_CUDA_VERIFY(cudaGetLastError()); \ +} +#endif + +/* --------------------------------------------------------------------------- + * Utility functions + * ---------------------------------------------------------------------------*/ + +inline booleantype SUNDIALS_CUDA_Assert(cudaError_t cuerr, const char *file, int line) +{ + if (cuerr != cudaSuccess) + { +#ifdef SUNDIALS_DEBUG + fprintf(stderr, + "ERROR in CUDA runtime operation: %s %s:%d\n", + cudaGetErrorString(cuerr), file, line); +#ifdef SUNDIALS_DEBUG_ASSERT + assert(false); +#endif +#endif + return SUNFALSE; /* Assert failed */ + } + return SUNTRUE; /* Assert OK */ +} + +inline booleantype SUNDIALS_CUSPARSE_Assert(cusparseStatus_t status, const char *file, int line) +{ + if (status != CUSPARSE_STATUS_SUCCESS) + { +#ifdef SUNDIALS_DEBUG + fprintf(stderr, + "ERROR in cuSPARSE runtime operation: cusparseStatus_t = %d %s:%d\n", + status, file, line); +#ifdef SUNDIALS_DEBUG_ASSERT + assert(false); +#endif +#endif + return SUNFALSE; /* Assert failed */ + } + return SUNTRUE; /* Assert OK */ +} + +inline booleantype SUNDIALS_CUSOLVER_Assert(cusolverStatus_t status, const char *file, int line) +{ + if (status != CUSOLVER_STATUS_SUCCESS) + { +#ifdef SUNDIALS_DEBUG + fprintf(stderr, + "ERROR in cuSOLVER runtime operation: cusolverStatus_t = %d %s:%d\n", + status, file, line); +#ifdef SUNDIALS_DEBUG_ASSERT + assert(false); +#endif +#endif + return SUNFALSE; /* Assert failed */ + } + return SUNTRUE; /* Assert OK */ +} + +#ifdef __cplusplus /* wrapper to enable C++ usage */ +} +#endif + +#endif /* _SUNDIALS_CUDA_H */ \ No newline at end of file diff --git a/inst/include/sundials/sundials_cuda_policies.hpp b/inst/include/sundials/sundials_cuda_policies.hpp new file mode 100644 index 0000000..160fd42 --- /dev/null +++ b/inst/include/sundials/sundials_cuda_policies.hpp @@ -0,0 +1,234 @@ +/* + * ----------------------------------------------------------------- + * Programmer(s): Cody J. Balos @ LLNL + * ----------------------------------------------------------------- + * SUNDIALS Copyright Start + * Copyright (c) 2002-2022, Lawrence Livermore National Security + * and Southern Methodist University. + * All rights reserved. + * + * See the top-level LICENSE and NOTICE files for details. + * + * SPDX-License-Identifier: BSD-3-Clause + * SUNDIALS Copyright End + * ----------------------------------------------------------------- + * This header files defines the ExecPolicy classes which + * are utilized to determine CUDA kernel launch parameters. + * ----------------------------------------------------------------- + */ + +#ifndef _SUNDIALS_CUDAEXECPOLICIES_HPP +#define _SUNDIALS_CUDAEXECPOLICIES_HPP + +#include +#include + +#include + +namespace sundials +{ +namespace cuda +{ + +constexpr const sunindextype WARP_SIZE = 32; +constexpr const sunindextype MAX_BLOCK_SIZE = 1024; +constexpr const sunindextype MAX_WARPS = MAX_BLOCK_SIZE / WARP_SIZE; + +class ExecPolicy +{ +public: + ExecPolicy(cudaStream_t stream = 0) : stream_(stream) { } + virtual size_t gridSize(size_t numWorkUnits = 0, size_t blockDim = 0) const = 0; + virtual size_t blockSize(size_t numWorkUnits = 0, size_t gridDim = 0) const = 0; + virtual const cudaStream_t* stream() const { return (&stream_); } + virtual ExecPolicy* clone() const = 0; + ExecPolicy* clone_new_stream(cudaStream_t stream) const { + ExecPolicy* ex = clone(); + ex->stream_ = stream; + return ex; + } + virtual bool atomic() const { return false; } + virtual ~ExecPolicy() {} +protected: + cudaStream_t stream_; +}; + + +/* + * A kernel execution policy that maps each thread to a work unit. + * The number of threads per block (blockSize) can be set to anything. + * The grid size will be chosen so that there are enough threads for one + * thread per element. If a stream is provided, it will be used to + * execute the kernel. + */ +class ThreadDirectExecPolicy : public ExecPolicy +{ +public: + ThreadDirectExecPolicy(const size_t blockDim, cudaStream_t stream = 0) + : blockDim_(blockDim), ExecPolicy(stream) + {} + + ThreadDirectExecPolicy(const ThreadDirectExecPolicy& ex) + : blockDim_(ex.blockDim_), ExecPolicy(ex.stream_) + {} + + virtual size_t gridSize(size_t numWorkUnits = 0, size_t /*blockDim*/ = 0) const + { + /* ceil(n/m) = floor((n + m - 1) / m) */ + return (numWorkUnits + blockSize() - 1) / blockSize(); + } + + virtual size_t blockSize(size_t /*numWorkUnits*/ = 0, size_t /*gridDim*/ = 0) const + { + return blockDim_; + } + + virtual ExecPolicy* clone() const + { + return static_cast(new ThreadDirectExecPolicy(*this)); + } + +private: + const size_t blockDim_; +}; + +/* + * A kernel execution policy for kernels that use grid stride loops. + * The number of threads per block (blockSize) can be set to anything. + * The number of blocks (gridSize) can be set to anything. If a stream + * is provided, it will be used to execute the kernel. + */ +class GridStrideExecPolicy : public ExecPolicy +{ +public: + GridStrideExecPolicy(const size_t blockDim, const size_t gridDim, cudaStream_t stream = 0) + : blockDim_(blockDim), gridDim_(gridDim), ExecPolicy(stream) + {} + + GridStrideExecPolicy(const GridStrideExecPolicy& ex) + : blockDim_(ex.blockDim_), gridDim_(ex.gridDim_), ExecPolicy(ex.stream_) + {} + + virtual size_t gridSize(size_t /*numWorkUnits*/ = 0, size_t /*blockDim*/ = 0) const + { + return gridDim_; + } + + virtual size_t blockSize(size_t /*numWorkUnits*/ = 0, size_t /*gridDim*/ = 0) const + { + return blockDim_; + } + + virtual ExecPolicy* clone() const + { + return static_cast(new GridStrideExecPolicy(*this)); + } + +private: + const size_t blockDim_; + const size_t gridDim_; +}; + +/* + * A kernel execution policy for performing a reduction across indvidual thread + * blocks. The number of threads per block (blockSize) can be set to any valid + * multiple of the CUDA warp size. The number of blocks (gridSize) can be set to + * any value greater or equal to 0. If it is set to 0, then the grid size will + * be chosen so that there are at most two work units per thread. If a stream is + * provided, it will be used to execute the kernel. + */ + +class BlockReduceAtomicExecPolicy : public ExecPolicy +{ +public: + BlockReduceAtomicExecPolicy(const size_t blockDim, const size_t gridDim = 0, cudaStream_t stream = 0) + : blockDim_(blockDim), gridDim_(gridDim), ExecPolicy(stream) + { + if (blockDim < 1 || blockDim % WARP_SIZE) + { + throw std::invalid_argument("the block size must be a multiple of the CUDA warp size"); + } + } + + BlockReduceAtomicExecPolicy(const BlockReduceAtomicExecPolicy& ex) + : blockDim_(ex.blockDim_), gridDim_(ex.gridDim_), ExecPolicy(ex.stream_) + {} + + virtual size_t gridSize(size_t numWorkUnits = 0, size_t /*blockDim*/ = 0) const + { + if (gridDim_ == 0) + { + return (numWorkUnits + (blockSize() * 2 - 1)) / (blockSize() * 2); + } + return gridDim_; + } + + virtual size_t blockSize(size_t /*numWorkUnits*/ = 0, size_t /*gridDim*/ = 0) const + { + return blockDim_; + } + + virtual ExecPolicy* clone() const + { + return static_cast(new BlockReduceAtomicExecPolicy(*this)); + } + + virtual bool atomic() const { return true; } + +private: + const size_t blockDim_; + const size_t gridDim_; +}; + +class BlockReduceExecPolicy : public ExecPolicy +{ +public: + BlockReduceExecPolicy(const size_t blockDim, const size_t gridDim = 0, cudaStream_t stream = 0) + : blockDim_(blockDim), gridDim_(gridDim), ExecPolicy(stream) + { + if (blockDim < 1 || blockDim % WARP_SIZE) + { + throw std::invalid_argument("the block size must be a multiple of the CUDA warp size"); + } + } + + BlockReduceExecPolicy(const BlockReduceExecPolicy& ex) + : blockDim_(ex.blockDim_), gridDim_(ex.gridDim_), ExecPolicy(ex.stream_) + {} + + virtual size_t gridSize(size_t numWorkUnits = 0, size_t /*blockDim*/ = 0) const + { + if (gridDim_ == 0) + { + return (numWorkUnits + (blockSize() * 2 - 1)) / (blockSize() * 2); + } + return gridDim_; + } + + virtual size_t blockSize(size_t /*numWorkUnits*/ = 0, size_t /*gridDim*/ = 0) const + { + return blockDim_; + } + + virtual ExecPolicy* clone() const + { + return static_cast(new BlockReduceExecPolicy(*this)); + } + + bool atomic() const { return false; } + +private: + const size_t blockDim_; + const size_t gridDim_; +}; + +} // namespace cuda +} // namespace sundials + +typedef sundials::cuda::ExecPolicy SUNCudaExecPolicy; +typedef sundials::cuda::ThreadDirectExecPolicy SUNCudaThreadDirectExecPolicy; +typedef sundials::cuda::GridStrideExecPolicy SUNCudaGridStrideExecPolicy; +typedef sundials::cuda::BlockReduceExecPolicy SUNCudaBlockReduceExecPolicy; +typedef sundials::cuda::BlockReduceAtomicExecPolicy SUNCudaBlockReduceAtomicExecPolicy; + +#endif diff --git a/inst/include/sundials/sundials_debug.h b/inst/include/sundials/sundials_debug.h new file mode 100644 index 0000000..2c44c1b --- /dev/null +++ b/inst/include/sundials/sundials_debug.h @@ -0,0 +1,51 @@ +/* ----------------------------------------------------------------- + * Programmer(s): Cody J. Balos @ LLNL + * ----------------------------------------------------------------- + * SUNDIALS Copyright Start + * Copyright (c) 2002-2022, Lawrence Livermore National Security + * and Southern Methodist University. + * All rights reserved. + * + * See the top-level LICENSE and NOTICE files for details. + * + * SPDX-License-Identifier: BSD-3-Clause + * SUNDIALS Copyright End + * ----------------------------------------------------------------- + * This header files defines internal utility functions and macros + * for SUNDIALS debugging. + * -----------------------------------------------------------------*/ + +#ifndef _SUNDIALS_DEBUG_H +#define _SUNDIALS_DEBUG_H + +#include + +#ifdef __cplusplus /* wrapper to enable C++ usage */ +extern "C" { +#endif + +/* + * Macro which prints to stderr when in debug mode + */ +#ifdef SUNDIALS_DEBUG +#define SUNDIALS_DEBUG_PRINT(str) fprintf(stderr, str) +#else +#define SUNDIALS_DEBUG_PRINT(str) +#endif + +/* + * Macro which prints error messages in debug mode + */ +#ifdef SUNDIALS_DEBUG +#define SUNDIALS_DEBUG_ERROR(msg) \ + fprintf(stderr, "ERROR in %s (%s line %d): %s", \ + __func__, __FILE__, __LINE__, msg); +#else +#define SUNDIALS_DEBUG_ERROR(msg) +#endif + +#ifdef __cplusplus /* wrapper to enable C++ usage */ +} +#endif + +#endif /* _SUNDIALS_DEBUG_H */ diff --git a/inst/include/sundials/sundials_dense.h b/inst/include/sundials/sundials_dense.h index 7dee165..7296346 100644 --- a/inst/include/sundials/sundials_dense.h +++ b/inst/include/sundials/sundials_dense.h @@ -2,7 +2,7 @@ * Programmer: Radu Serban @ LLNL * ----------------------------------------------------------------- * SUNDIALS Copyright Start - * Copyright (c) 2002-2019, Lawrence Livermore National Security + * Copyright (c) 2002-2022, Lawrence Livermore National Security * and Southern Methodist University. * All rights reserved. * @@ -31,178 +31,252 @@ extern "C" { #endif /* - * ----------------------------------------------------------------- - * Functions: DenseGETRF and DenseGETRS - * ----------------------------------------------------------------- - * DenseGETRF performs the LU factorization of the M by N dense - * matrix A. This is done using standard Gaussian elimination - * with partial (row) pivoting. Note that this applies only - * to matrices with M >= N and full column rank. - * - * A successful LU factorization leaves the matrix A and the - * pivot array p with the following information: - * - * (1) p[k] contains the row number of the pivot element chosen - * at the beginning of elimination step k, k=0, 1, ..., N-1. - * - * (2) If the unique LU factorization of A is given by PA = LU, - * where P is a permutation matrix, L is a lower trapezoidal - * matrix with all 1's on the diagonal, and U is an upper - * triangular matrix, then the upper triangular part of A - * (including its diagonal) contains U and the strictly lower + * ---------------------------------------------------------------------------- + * Functions: SUNDlsMat_DenseGETRF and SUNDlsMat_DenseGETRS + * ---------------------------------------------------------------------------- + * SUNDlsMat_DenseGETRF performs the LU factorization of the M by N dense matrix A. + * This is done using standard Gaussian elimination with partial (row) pivoting. + * Note that this applies only to matrices with M >= N and full column rank. + * + * A successful LU factorization leaves the matrix A and the pivot array p with + * the following information: + * + * (1) p[k] contains the row number of the pivot element chosen at the beginning + * of elimination step k, k=0, 1, ..., N-1. + * + * (2) If the unique LU factorization of A is given by PA = LU, where P is a + * permutation matrix, L is a lower trapezoidal matrix with all 1's on the + * diagonal, and U is an upper triangular matrix, then the upper triangular + * part of A (including its diagonal) contains U and the strictly lower * trapezoidal part of A contains the multipliers, I-L. * * For square matrices (M = N), L is unit lower triangular. * - * DenseGETRF returns 0 if successful. Otherwise it encountered - * a zero diagonal element during the factorization. In this case - * it returns the column index (numbered from one) at which - * it encountered the zero. + * SUNDlsMat_DenseGETRF returns 0 if successful. Otherwise it encountered a zero + * diagonal element during the factorization. In this case it returns the column + * index (numbered from one) at which it encountered the zero. * - * DenseGETRS solves the N-dimensional system A x = b using - * the LU factorization in A and the pivot information in p - * computed in DenseGETRF. The solution x is returned in b. This - * routine cannot fail if the corresponding call to DenseGETRF - * did not fail. - * DenseGETRS does NOT check for a square matrix! + * SUNDlsMat_DenseGETRS solves the N-dimensional system A x = b using the LU + * factorization in A and the pivot information in p computed in + * SUNDlsMat_DenseGETRF. The solution x is returned in b. This routine cannot fail + * if the corresponding call to SUNDlsMat_DenseGETRF did not fail. + * SUNDlsMat_DenseGETRS does NOT check for a square matrix! * - * ----------------------------------------------------------------- - * DenseGETRF and DenseGETRS are simply wrappers around denseGETRF - * and denseGETRS, respectively, which perform all the work by - * directly accessing the data in the DlsMat A (i.e. in A->cols). - * ----------------------------------------------------------------- + * ---------------------------------------------------------------------------- + * SUNDlsMat_DenseGETRF and SUNDlsMat_DenseGETRS are simply wrappers around + * SUNDlsMat_denseGETRF and SUNDlsMat_denseGETRS, respectively, which perform all the + * work by directly accessing the data in the SUNDlsMat A (i.e. in A->cols). + * ---------------------------------------------------------------------------- */ -SUNDIALS_EXPORT sunindextype DenseGETRF(DlsMat A, sunindextype *p); -SUNDIALS_EXPORT void DenseGETRS(DlsMat A, sunindextype *p, realtype *b); +SUNDIALS_EXPORT +sunindextype SUNDlsMat_DenseGETRF(SUNDlsMat A, sunindextype *p); +SUNDIALS_DEPRECATED_EXPORT_MSG("use SUNDlsMat_DenseGETRF instead") +sunindextype DenseGETRF(DlsMat A, sunindextype *p); + +SUNDIALS_EXPORT +void SUNDlsMat_DenseGETRS(SUNDlsMat A, sunindextype *p, realtype *b); +SUNDIALS_DEPRECATED_EXPORT_MSG("use SUNDlsMat_DenseGETRS instead") +void DenseGETRS(DlsMat A, sunindextype *p, realtype *b); -SUNDIALS_EXPORT sunindextype denseGETRF(realtype **a, sunindextype m, - sunindextype n, sunindextype *p); -SUNDIALS_EXPORT void denseGETRS(realtype **a, sunindextype n, sunindextype *p, - realtype *b); +SUNDIALS_EXPORT +sunindextype SUNDlsMat_denseGETRF(realtype **a, sunindextype m, + sunindextype n, sunindextype *p); +SUNDIALS_DEPRECATED_EXPORT_MSG("use SUNDlsMat_denseGETRF instead") +sunindextype denseGETRF(realtype **a, sunindextype m, + sunindextype n, sunindextype *p); + +SUNDIALS_EXPORT +void SUNDlsMat_denseGETRS(realtype **a, sunindextype n, sunindextype *p, + realtype *b); +SUNDIALS_DEPRECATED_EXPORT_MSG("use SUNDlsMat_denseGETRS instead") +void denseGETRS(realtype **a, sunindextype n, sunindextype *p, + realtype *b); /* - * ----------------------------------------------------------------- - * Functions : DensePOTRF and DensePOTRS - * ----------------------------------------------------------------- - * DensePOTRF computes the Cholesky factorization of a real symmetric + * ---------------------------------------------------------------------------- + * Functions : SUNDlsMat_DensePOTRF and SUNDlsMat_DensePOTRS + * ---------------------------------------------------------------------------- + * SUNDlsMat_DensePOTRF computes the Cholesky factorization of a real symmetric * positive definite matrix A. - * ----------------------------------------------------------------- - * DensePOTRS solves a system of linear equations A*X = B with a - * symmetric positive definite matrix A using the Cholesky factorization - * A = L*L**T computed by DensePOTRF. + * ---------------------------------------------------------------------------- + * SUNDlsMat_DensePOTRS solves a system of linear equations A*X = B with a + * symmetric positive definite matrix A using the Cholesky factorization A = + * L*L**T computed by SUNDlsMat_DensePOTRF. * - * ----------------------------------------------------------------- - * DensePOTRF and DensePOTRS are simply wrappers around densePOTRF - * and densePOTRS, respectively, which perform all the work by - * directly accessing the data in the DlsMat A (i.e. the field cols) - * ----------------------------------------------------------------- + * ---------------------------------------------------------------------------- + * SUNDlsMat_DensePOTRF and SUNDlsMat_DensePOTRS are simply wrappers around + * SUNDlsMat_densePOTRF and SUNDlsMat_densePOTRS, respectively, which perform all the + * work by directly accessing the data in the DlsMat A (i.e. the field cols) + * ---------------------------------------------------------------------------- */ -SUNDIALS_EXPORT sunindextype DensePOTRF(DlsMat A); -SUNDIALS_EXPORT void DensePOTRS(DlsMat A, realtype *b); -SUNDIALS_EXPORT sunindextype densePOTRF(realtype **a, sunindextype m); -SUNDIALS_EXPORT void densePOTRS(realtype **a, sunindextype m, realtype *b); +SUNDIALS_EXPORT +sunindextype SUNDlsMat_DensePOTRF(SUNDlsMat A); +SUNDIALS_DEPRECATED_EXPORT_MSG("use SUNDlsMat_DensePOTRF instead") +sunindextype DensePOTRF(DlsMat A); + +SUNDIALS_EXPORT +void SUNDlsMat_DensePOTRS(SUNDlsMat A, realtype *b); +SUNDIALS_DEPRECATED_EXPORT_MSG("use SUNDlsMat_DensePOTRS instead") +void DensePOTRS(DlsMat A, realtype *b); + +SUNDIALS_EXPORT +sunindextype SUNDlsMat_densePOTRF(realtype **a, sunindextype m); +SUNDIALS_DEPRECATED_EXPORT_MSG("use SUNDlsMat_densePOTRF instead") +sunindextype densePOTRF(realtype **a, sunindextype m); + +SUNDIALS_EXPORT +void SUNDlsMat_densePOTRS(realtype **a, sunindextype m, realtype *b); +SUNDIALS_DEPRECATED_EXPORT_MSG("use SUNDlsMat_densePOTRS instead") +void densePOTRS(realtype **a, sunindextype m, realtype *b); /* - * ----------------------------------------------------------------- - * Functions : DenseGEQRF and DenseORMQR - * ----------------------------------------------------------------- - * DenseGEQRF computes a QR factorization of a real M-by-N matrix A: - * A = Q * R (with M>= N). + * ----------------------------------------------------------------------------- + * Functions : SUNDlsMat_DenseGEQRF and SUNDlsMat_DenseORMQR + * ----------------------------------------------------------------------------- + * SUNDlsMat_DenseGEQRF computes a QR factorization of a real M-by-N matrix A: A = + * Q * R (with M>= N). * - * DenseGEQRF requires a temporary work vector wrk of length M. - * ----------------------------------------------------------------- - * DenseORMQR computes the product w = Q * v where Q is a real - * orthogonal matrix defined as the product of k elementary reflectors + * SUNDlsMat_DenseGEQRF requires a temporary work vector wrk of length M. + * ----------------------------------------------------------------------------- + * SUNDlsMat_DenseORMQR computes the product w = Q * v where Q is a real orthogonal + * matrix defined as the product of k elementary reflectors * * Q = H(1) H(2) . . . H(k) * - * as returned by DenseGEQRF. Q is an M-by-N matrix, v is a vector - * of length N and w is a vector of length M (with M >= N). + * as returned by SUNDlsMat_DenseGEQRF. Q is an M-by-N matrix, v is a vector of + * length N and w is a vector of length M (with M >= N). * - * DenseORMQR requires a temporary work vector wrk of length M. + * SUNDlsMat_DenseORMQR requires a temporary work vector wrk of length M. * - * ----------------------------------------------------------------- - * DenseGEQRF and DenseORMQR are simply wrappers around denseGEQRF - * and denseORMQR, respectively, which perform all the work by - * directly accessing the data in the DlsMat A (i.e. the field cols) - * ----------------------------------------------------------------- + * ----------------------------------------------------------------------------- + * SUNDlsMat_DenseGEQRF and SUNDlsMat_DenseORMQR are simply wrappers around + * SUNDlsMat_denseGEQRF and SUNDlsMat_denseORMQR, respectively, which perform all the + * work by directly accessing the data in the DlsMat A (i.e. the field cols) + * ----------------------------------------------------------------------------- */ -SUNDIALS_EXPORT int DenseGEQRF(DlsMat A, realtype *beta, realtype *wrk); -SUNDIALS_EXPORT int DenseORMQR(DlsMat A, realtype *beta, realtype *vn, - realtype *vm, realtype *wrk); +SUNDIALS_EXPORT +int SUNDlsMat_DenseGEQRF(SUNDlsMat A, realtype *beta, realtype *wrk); +SUNDIALS_DEPRECATED_EXPORT_MSG("use SUNDlsMat_DenseGEQRF instead") +int DenseGEQRF(DlsMat A, realtype *beta, realtype *wrk); + +SUNDIALS_EXPORT +int SUNDlsMat_DenseORMQR(SUNDlsMat A, realtype *beta, realtype *vn, + realtype *vm, realtype *wrk); +SUNDIALS_DEPRECATED_EXPORT_MSG("use SUNDlsMat_DenseORMQR instead") +int DenseORMQR(DlsMat A, realtype *beta, realtype *vn, + realtype *vm, realtype *wrk); + + +SUNDIALS_EXPORT +int SUNDlsMat_denseGEQRF(realtype **a, sunindextype m, sunindextype n, + realtype *beta, realtype *wrk); +SUNDIALS_DEPRECATED_EXPORT_MSG("use SUNDlsMat_denseGEQRF instead") +int denseGEQRF(realtype **a, sunindextype m, sunindextype n, + realtype *beta, realtype *wrk); -SUNDIALS_EXPORT int denseGEQRF(realtype **a, sunindextype m, sunindextype n, - realtype *beta, realtype *wrk); -SUNDIALS_EXPORT int denseORMQR(realtype **a, sunindextype m, sunindextype n, - realtype *beta, realtype *v, realtype *w, - realtype *wrk); +SUNDIALS_EXPORT +int SUNDlsMat_denseORMQR(realtype **a, sunindextype m, sunindextype n, + realtype *beta, realtype *v, realtype *w, + realtype *wrk); +SUNDIALS_DEPRECATED_EXPORT_MSG("use SUNDlsMat_denseORMQR instead") +int denseORMQR(realtype **a, sunindextype m, sunindextype n, + realtype *beta, realtype *v, realtype *w, + realtype *wrk); /* - * ----------------------------------------------------------------- - * Function : DenseCopy - * ----------------------------------------------------------------- - * DenseCopy copies the contents of the M-by-N matrix A into the + * ---------------------------------------------------------------------------- + * Function : SUNDlsMat_DenseCopy + * ---------------------------------------------------------------------------- + * SUNDlsMat_DenseCopy copies the contents of the M-by-N matrix A into the * M-by-N matrix B. * - * DenseCopy is a wrapper around denseCopy which accesses the data - * in the DlsMat A and DlsMat B (i.e. the fields cols) - * ----------------------------------------------------------------- + * SUNDlsMat_DenseCopy is a wrapper around SUNDlsMat_denseCopy which accesses + * the data in the SUNDlsMat A and SUNDlsMat B (i.e. the fields cols) + * ----------------------------------------------------------------------------- */ -SUNDIALS_EXPORT void DenseCopy(DlsMat A, DlsMat B); -SUNDIALS_EXPORT void denseCopy(realtype **a, realtype **b, sunindextype m, - sunindextype n); +SUNDIALS_EXPORT +void SUNDlsMat_DenseCopy(SUNDlsMat A, SUNDlsMat B); +SUNDIALS_DEPRECATED_EXPORT_MSG("use SUNDlsMat_DenseCopy instead") +void DenseCopy(DlsMat A, DlsMat B); + +SUNDIALS_EXPORT +void SUNDlsMat_denseCopy(realtype **a, realtype **b, sunindextype m, + sunindextype n); +SUNDIALS_DEPRECATED_EXPORT_MSG("use SUNDlsMat_denseCopy instead") +void denseCopy(realtype **a, realtype **b, sunindextype m, + sunindextype n); /* - * ----------------------------------------------------------------- - * Function: DenseScale - * ----------------------------------------------------------------- - * DenseScale scales the elements of the M-by-N matrix A by the + * ----------------------------------------------------------------------------- + * Function: SUNDlsMat_DenseScale + * ----------------------------------------------------------------------------- + * SUNDlsMat_DenseScale scales the elements of the M-by-N matrix A by the * constant c and stores the result back in A. * - * DenseScale is a wrapper around denseScale which performs the actual - * scaling by accessing the data in the DlsMat A (i.e. in A->cols). - * ----------------------------------------------------------------- + * SUNDlsMat_DenseScale is a wrapper around SUNDlsMat_denseScale which performs + * the actual scaling by accessing the data in the SUNDlsMat A (i.e. in + * A->cols). + * ----------------------------------------------------------------------------- */ -SUNDIALS_EXPORT void DenseScale(realtype c, DlsMat A); -SUNDIALS_EXPORT void denseScale(realtype c, realtype **a, sunindextype m, - sunindextype n); +SUNDIALS_EXPORT +void SUNDlsMat_DenseScale(realtype c, SUNDlsMat A); +SUNDIALS_DEPRECATED_EXPORT_MSG("use SUNDlsSUNDlsMat_DenseScale_denseCopy instead") +void DenseScale(realtype c, DlsMat A); + +SUNDIALS_EXPORT +void SUNDlsMat_denseScale(realtype c, realtype **a, sunindextype m, + sunindextype n); +SUNDIALS_DEPRECATED_EXPORT_MSG("use SUNDlsMat_denseScale instead") +void denseScale(realtype c, realtype **a, sunindextype m, + sunindextype n); /* - * ----------------------------------------------------------------- - * Function: denseAddIdentity - * ----------------------------------------------------------------- - * denseAddIdentity adds the identity matrix to the n-by-n matrix + * ----------------------------------------------------------------------------- + * Function: SUNDlsMat_denseAddIdentity + * ----------------------------------------------------------------------------- + * SUNDlsMat_denseAddIdentity adds the identity matrix to the n-by-n matrix * stored in a realtype** array. - * ----------------------------------------------------------------- + * ----------------------------------------------------------------------------- */ -SUNDIALS_EXPORT void denseAddIdentity(realtype **a, sunindextype n); +SUNDIALS_EXPORT +void SUNDlsMat_denseAddIdentity(realtype **a, sunindextype n); +SUNDIALS_DEPRECATED_EXPORT_MSG("use SUNDlsMat_denseAddIdentity instead") +void denseAddIdentity(realtype **a, sunindextype n); /* - * ----------------------------------------------------------------- - * Function: DenseMatvec - * ----------------------------------------------------------------- - * DenseMatvec computes the matrix-vector product y = A*x, where A - * is an M-by-N matrix, x is a vector of length N, and y is a vector - * of length M. No error checking is performed on the length of the - * arrays x and y. Only y is modified in this routine. + * ----------------------------------------------------------------------------- + * Function: SUNDlsMat_DenseMatvec + * ----------------------------------------------------------------------------- + * SUNDlsMat_DenseMatvec computes the matrix-vector product y = A*x, where A is + * an M-by-N matrix, x is a vector of length N, and y is a vector of length M. + * No error checking is performed on the length of the arrays x and y. Only y + * is modified in this routine. * - * DenseMatvec is a wrapper around denseMatvec which performs the - * actual product by accessing the data in the DlsMat A. - * ----------------------------------------------------------------- + * SUNDlsMat_DenseMatvec is a wrapper around SUNDlsMat_denseMatvec which + * performs the actual product by accessing the data in the SUNDlsMat A. + * ----------------------------------------------------------------------------- */ -SUNDIALS_EXPORT void DenseMatvec(DlsMat A, realtype *x, realtype *y); -SUNDIALS_EXPORT void denseMatvec(realtype **a, realtype *x, realtype *y, - sunindextype m, sunindextype n); +SUNDIALS_EXPORT +void SUNDlsMat_DenseMatvec(SUNDlsMat A, realtype *x, realtype *y); +SUNDIALS_DEPRECATED_EXPORT_MSG("use SUNDlsMat_DenseMatvec instead") +void DenseMatvec(DlsMat A, realtype *x, realtype *y); + +SUNDIALS_EXPORT +void SUNDlsMat_denseMatvec(realtype **a, realtype *x, realtype *y, + sunindextype m, sunindextype n); +SUNDIALS_DEPRECATED_EXPORT_MSG("use SUNDlsMat_denseMatvec instead") +void denseMatvec(realtype **a, realtype *x, realtype *y, + sunindextype m, sunindextype n); #ifdef __cplusplus diff --git a/inst/include/sundials/sundials_direct.h b/inst/include/sundials/sundials_direct.h index da4be77..d532d56 100644 --- a/inst/include/sundials/sundials_direct.h +++ b/inst/include/sundials/sundials_direct.h @@ -2,7 +2,7 @@ * Programmer: Radu Serban @ LLNL * ----------------------------------------------------------------- * SUNDIALS Copyright Start - * Copyright (c) 2002-2019, Lawrence Livermore National Security + * Copyright (c) 2002-2022, Lawrence Livermore National Security * and Southern Methodist University. * All rights reserved. * @@ -48,9 +48,9 @@ extern "C" { /* * ----------------------------------------------------------------- - * Type : DlsMat + * Type : SUNDlsMat * ----------------------------------------------------------------- - * The type DlsMat is defined to be a pointer to a structure + * The type SUNDlsMat is defined to be a pointer to a structure * with various sizes, a data field, and an array of pointers to * the columns which defines a dense or band matrix for use in * direct linear solvers. The M and N fields indicates the number @@ -58,7 +58,7 @@ extern "C" { * dimensional array used for component storage. The cols field * stores the pointers in data for the beginning of each column. * ----------------------------------------------------------------- - * For DENSE matrices, the relevant fields in DlsMat are: + * For DENSE matrices, the relevant fields in SUNDlsMat are: * type = SUNDIALS_DENSE * M - number of rows * N - number of columns @@ -70,17 +70,18 @@ extern "C" { * * The elements of a dense matrix are stored columnwise (i.e. columns * are stored one on top of the other in memory). - * If A is of type DlsMat, then the (i,j)th element of A (with + * If A is of type SUNDlsMat, then the (i,j)th element of A (with * 0 <= i < M and 0 <= j < N) is given by (A->data)[j*n+i]. * - * The DENSE_COL and DENSE_ELEM macros below allow a user to access - * efficiently individual matrix elements without writing out explicit - * data structure references and without knowing too much about the - * underlying element storage. The only storage assumption needed is - * that elements are stored columnwise and that a pointer to the - * jth column of elements can be obtained via the DENSE_COL macro. - * ----------------------------------------------------------------- - * For BAND matrices, the relevant fields in DlsMat are: + * The SUNDLS_DENSE_COL and SUNDLS_DENSE_ELEM macros below allow a + * user to access efficiently individual matrix elements without + * writing out explicit data structure references and without knowing + * too much about the underlying element storage. The only storage + * assumption needed is that elements are stored columnwise and that a + * pointer to the jth column of elements can be obtained via the + * SUNDLS_DENSE_COL macro. + * ----------------------------------------------------------------- + * For BAND matrices, the relevant fields in SUNDlsMat are: * type = SUNDIALS_BAND * M - number of rows * N - number of columns @@ -98,17 +99,18 @@ extern "C" { * cols - array of pointers. cols[j] points to the first element * of the j-th column of the matrix in the array data. * - * The BAND_COL, BAND_COL_ELEM, and BAND_ELEM macros below allow a - * user to access individual matrix elements without writing out - * explicit data structure references and without knowing too much - * about the underlying element storage. The only storage assumption - * needed is that elements are stored columnwise and that a pointer - * into the jth column of elements can be obtained via the BAND_COL - * macro. The BAND_COL_ELEM macro selects an element from a column - * which has already been isolated via BAND_COL. The macro - * BAND_COL_ELEM allows the user to avoid the translation - * from the matrix location (i,j) to the index in the array returned - * by BAND_COL at which the (i,j)th element is stored. + * The SUNDLS_BAND_COL, SUNDLS_BAND_COL_ELEM, and SUNDLS_BAND_ELEM + * macros below allow a user to access individual matrix elements + * without writing out explicit data structure references and without + * knowing too much about the underlying element storage. The only + * storage assumption needed is that elements are stored columnwise + * and that a pointer into the jth column of elements can be obtained + * via the SUNDLS_BAND_COL macro. The SUNDLS_BAND_COL_ELEM macro + * selects an element from a column which has already been isolated + * via SUNDLS_BAND_COL. The macro SUNDLS_BAND_COL_ELEM allows the user + * to avoid the translation from the matrix location (i,j) to the + * index in the array returned by SUNDLS_BAND_COL at which the (i,j)th + * element is stored. * ----------------------------------------------------------------- */ @@ -123,7 +125,9 @@ typedef struct _DlsMat { realtype *data; sunindextype ldata; realtype **cols; -} *DlsMat; +} *SUNDlsMat; /* DEPRECATED DlsMat: use SUNDlsMat instead */ + +typedef SUNDlsMat DlsMat; /* * ================================================================== @@ -133,177 +137,230 @@ typedef struct _DlsMat { /* * ----------------------------------------------------------------- - * DENSE_COL and DENSE_ELEM + * SUNDLS_DENSE_COL and SUNDLS_DENSE_ELEM * ----------------------------------------------------------------- * - * DENSE_COL(A,j) references the jth column of the M-by-N dense - * matrix A, 0 <= j < N. The type of the expression DENSE_COL(A,j) - * is (realtype *). After the assignment col_j = DENSE_COL(A,j), - * col_j may be treated as an array indexed from 0 to M-1. - * The (i,j)-th element of A is thus referenced by col_j[i]. + * SUNDLS_DENSE_COL(A,j) references the jth column of the M-by-N dense + * matrix A, 0 <= j < N. The type of the expression SUNDLS_DENSE_COL(A,j) + * is (realtype *). After the assignment col_j = SUNDLS_DENSE_COL(A,j), + * col_j may be treated as an array indexed from 0 to M-1. The (i,j)-th + * element of A is thus referenced by * col_j[i]. * - * DENSE_ELEM(A,i,j) references the (i,j)th element of the dense + * SUNDLS_DENSE_ELEM(A,i,j) references the (i,j)th element of the dense * M-by-N matrix A, 0 <= i < M ; 0 <= j < N. * * ----------------------------------------------------------------- */ -#define DENSE_COL(A,j) ((A->cols)[j]) -#define DENSE_ELEM(A,i,j) ((A->cols)[j][i]) +#define SUNDLS_DENSE_COL(A,j) ((A->cols)[j]) +#define SUNDLS_DENSE_ELEM(A,i,j) ((A->cols)[j][i]) + +/* DEPRECATED DENSE_COL: use SUNDLS_DENSE_COL instead */ +#define DENSE_COL(A,j) SUNDLS_DENSE_COL(A,j) +/* DEPRECATED DENSE_ELEM: use SUNDLS_DENSE_ELEM instead */ +#define DENSE_ELEM(A,i,j) SUNDLS_DENSE_ELEM(A,i,j) /* * ----------------------------------------------------------------- - * BAND_COL, BAND_COL_ELEM, and BAND_ELEM + * SUNDLS_BAND_COL, SUNDLS_BAND_COL_ELEM, and SUNDLS_BAND_ELEM * ----------------------------------------------------------------- * - * BAND_COL(A,j) references the diagonal element of the jth column - * of the N by N band matrix A, 0 <= j <= N-1. The type of the - * expression BAND_COL(A,j) is realtype *. The pointer returned by - * the call BAND_COL(A,j) can be treated as an array which is - * indexed from -(A->mu) to (A->ml). + * SUNDLS_BAND_COL(A,j) references the diagonal element of the jth + * column of the N by N band matrix A, 0 <= j <= N-1. The type of the + * expression SUNDLS_BAND_COL(A,j) is realtype *. The pointer returned + * by the call SUNDLS_BAND_COL(A,j) can be treated as an array which + * is indexed from -(A->mu) to (A->ml). * - * BAND_COL_ELEM references the (i,j)th entry of the band matrix A - * when used in conjunction with BAND_COL. The index (i,j) should - * satisfy j-(A->mu) <= i <= j+(A->ml). + * SUNDLS_BAND_COL_ELEM references the (i,j)th entry of the band + * matrix A when used in conjunction with SUNDLS_BAND_COL. The index + * (i,j) should satisfy j-(A->mu) <= i <= j+(A->ml). * - * BAND_ELEM(A,i,j) references the (i,j)th element of the M-by-N - * band matrix A, where 0 <= i,j <= N-1. The location (i,j) should - * further satisfy j-(A->mu) <= i <= j+(A->ml). + * SUNDLS_BAND_ELEM(A,i,j) references the (i,j)th element of the + * M-by-N band matrix A, where 0 <= i,j <= N-1. The location (i,j) + * should further satisfy j-(A->mu) <= i <= j+(A->ml). * * ----------------------------------------------------------------- */ -#define BAND_COL(A,j) (((A->cols)[j])+(A->s_mu)) -#define BAND_COL_ELEM(col_j,i,j) (col_j[(i)-(j)]) -#define BAND_ELEM(A,i,j) ((A->cols)[j][(i)-(j)+(A->s_mu)]) +#define SUNDLS_BAND_COL(A,j) (((A->cols)[j])+(A->s_mu)) +#define SUNDLS_BAND_COL_ELEM(col_j,i,j) (col_j[(i)-(j)]) +#define SUNDLS_BAND_ELEM(A,i,j) ((A->cols)[j][(i)-(j)+(A->s_mu)]) + +/* DEPRECATED BAND_COL: use SUNDLS_BAND_COL */ +#define BAND_COL(A,j) SUNDLS_BAND_COL(A,j) +/* DEPRECATED BAND_COL_ELEM: use SUNDLS_BAND_COL_ELEM */ +#define BAND_COL_ELEM(col_j,i,j) SUNDLS_BAND_COL_ELEM(col_j,i,j) +/* DEPRECATED BAND_ELEM: use SUNDLS_BAND_ELEM */ +#define BAND_ELEM(A,i,j) SUNDLS_BAND_ELEM(A,i,j) /* * ================================================================== - * Exported function prototypes (functions working on dlsMat) + * Exported function prototypes (functions working on SUNDlsMat) * ================================================================== */ /* * ----------------------------------------------------------------- - * Function: NewDenseMat + * Function: SUNDlsMat_NewDenseMat * ----------------------------------------------------------------- - * NewDenseMat allocates memory for an M-by-N dense matrix and - * returns the storage allocated (type DlsMat). NewDenseMat - * returns NULL if the request for matrix storage cannot be - * satisfied. See the above documentation for the type DlsMat - * for matrix storage details. + * SUNDlsMat_NewDenseMat allocates memory for an M-by-N dense matrix + * and returns the storage allocated (type SUNDlsMat). + * SUNDlsMat_NewDenseMat returns NULL if the request for matrix + * storage cannot be satisfied. See the above documentation for the + * type SUNDlsMat for matrix storage details. * ----------------------------------------------------------------- */ -SUNDIALS_EXPORT DlsMat NewDenseMat(sunindextype M, sunindextype N); +SUNDIALS_EXPORT +SUNDlsMat SUNDlsMat_NewDenseMat(sunindextype M, sunindextype N); + +SUNDIALS_DEPRECATED_EXPORT_MSG("use SUNDlsMat_NewDenseMat instead") +DlsMat NewDenseMat(sunindextype M, sunindextype N); /* * ----------------------------------------------------------------- - * Function: NewBandMat + * Function: SUNDlsMat_NewBandMat * ----------------------------------------------------------------- - * NewBandMat allocates memory for an M-by-N band matrix + * SUNDlsMat_NewBandMat allocates memory for an M-by-N band matrix * with upper bandwidth mu, lower bandwidth ml, and storage upper - * bandwidth smu. Pass smu as follows depending on whether A will - * be LU factored: + * bandwidth smu. Pass smu as follows depending on whether A will be + * LU factored: * * (1) Pass smu = mu if A will not be factored. * * (2) Pass smu = MIN(N-1,mu+ml) if A will be factored. * - * NewBandMat returns the storage allocated (type DlsMat) or - * NULL if the request for matrix storage cannot be satisfied. - * See the documentation for the type DlsMat for matrix storage + * SUNDlsMat_NewBandMat returns the storage allocated (type SUNDlsMat) + * or NULL if the request for matrix storage cannot be satisfied. See + * the documentation for the type SUNDlsMat for matrix storage * details. * ----------------------------------------------------------------- */ -SUNDIALS_EXPORT DlsMat NewBandMat(sunindextype N, sunindextype mu, - sunindextype ml, sunindextype smu); +SUNDIALS_EXPORT +SUNDlsMat SUNDlsMat_NewBandMat(sunindextype N, sunindextype mu, + sunindextype ml, sunindextype smu); + +SUNDIALS_DEPRECATED_EXPORT_MSG("use SUNDlsMat_NewBandMat instead") +DlsMat NewBandMat(sunindextype N, sunindextype mu, + sunindextype ml, sunindextype smu); /* * ----------------------------------------------------------------- - * Functions: DestroyMat + * Functions: SUNDlsMat_DestroyMat * ----------------------------------------------------------------- - * DestroyMat frees the memory allocated by NewDenseMat or NewBandMat + * SUNDlsMat_DestroyMat frees the memory allocated by + * SUNDlsMat_NewDenseMat or SUNDlsMat_NewBandMat * ----------------------------------------------------------------- */ -SUNDIALS_EXPORT void DestroyMat(DlsMat A); +SUNDIALS_EXPORT +void SUNDlsMat_DestroyMat(DlsMat A); + +SUNDIALS_DEPRECATED_EXPORT_MSG("use SUNDlsMat_DestroyMat instead") +void DestroyMat(DlsMat A); /* * ----------------------------------------------------------------- - * Function: NewIntArray + * Function: SUNDlsMat_NewIntArray * ----------------------------------------------------------------- - * NewIntArray allocates memory an array of N int's and returns - * the pointer to the memory it allocates. If the request for + * SUNDlsMat_NewIntArray allocates memory an array of N int's and + * returns the pointer to the memory it allocates. If the request for * memory storage cannot be satisfied, it returns NULL. * ----------------------------------------------------------------- */ -SUNDIALS_EXPORT int *NewIntArray(int N); +SUNDIALS_EXPORT +int* SUNDlsMat_NewIntArray(int N); + +SUNDIALS_DEPRECATED_EXPORT_MSG("use SUNDlsMat_NewIntArray instead") +int* NewIntArray(int N); /* * ----------------------------------------------------------------- - * Function: NewIndexArray + * Function: SUNDlsMat_NewIndexArray * ----------------------------------------------------------------- - * NewIndexArray allocates memory an array of N sunindextype's and - * returns the pointer to the memory it allocates. If the request - * for memory storage cannot be satisfied, it returns NULL. + * SUNDlsMat_NewIndexArray allocates memory an array of N + * sunindextype's and returns the pointer to the memory it + * allocates. If the request for memory storage cannot be satisfied, + * it returns NULL. * ----------------------------------------------------------------- */ -SUNDIALS_EXPORT sunindextype *NewIndexArray(sunindextype N); +SUNDIALS_EXPORT +sunindextype* SUNDlsMat_NewIndexArray(sunindextype N); + +SUNDIALS_DEPRECATED_EXPORT_MSG("use SUNDlsMat_NewIndexArray instead") +sunindextype* NewIndexArray(sunindextype N); /* * ----------------------------------------------------------------- - * Function: NewRealArray + * Function: SUNDlsMat_NewRealArray * ----------------------------------------------------------------- - * NewRealArray allocates memory an array of N realtype and returns - * the pointer to the memory it allocates. If the request for + * SUNDlsMat_NewRealArray allocates memory an array of N realtype and + * returns the pointer to the memory it allocates. If the request for * memory storage cannot be satisfied, it returns NULL. * ----------------------------------------------------------------- */ -SUNDIALS_EXPORT realtype *NewRealArray(sunindextype N); +SUNDIALS_EXPORT +realtype* SUNDlsMat_NewRealArray(sunindextype N); + +SUNDIALS_DEPRECATED_EXPORT_MSG("use SUNDlsMat_NewRealArray instead") +realtype* NewRealArray(sunindextype N); /* * ----------------------------------------------------------------- - * Function: DestroyArray + * Function: SUNDlsMat_DestroyArray * ----------------------------------------------------------------- - * DestroyArray frees memory allocated by NewIntArray, NewIndexArray, - * or NewRealArray. + * SUNDlsMat_DestroyArray frees memory allocated by + * SUNDlsMat_NewIntArray, SUNDlsMat_NewIndexArray, or + * SUNDlsMat_NewRealArray. * ----------------------------------------------------------------- */ -SUNDIALS_EXPORT void DestroyArray(void *p); +SUNDIALS_EXPORT +void SUNDlsMat_DestroyArray(void *p); + +SUNDIALS_DEPRECATED_EXPORT_MSG("use SUNDlsMat_DestroyArray instead") +void DestroyArray(void *p); /* * ----------------------------------------------------------------- - * Function : AddIdentity + * Function : SUNDlsMat_AddIdentity * ----------------------------------------------------------------- - * AddIdentity adds 1.0 to the main diagonal (A_ii, i=0,1,...,N-1) of - * the M-by-N matrix A (M>= N) and stores the result back in A. - * AddIdentity is typically used with square matrices. - * AddIdentity does not check for M >= N and therefore a segmentation - * fault will occur if M < N! + * SUNDlsMat_AddIdentity adds 1.0 to the main diagonal (A_ii, + * i=0,1,...,N-1) of the M-by-N matrix A (M>= N) and stores the result + * back in A. SUNDlsMat_AddIdentity is typically used with square + * matrices. SUNDlsMat_AddIdentity does not check for M >= N and + * therefore a segmentation fault will occur if M < N! * ----------------------------------------------------------------- */ -SUNDIALS_EXPORT void AddIdentity(DlsMat A); +SUNDIALS_EXPORT +void SUNDlsMat_AddIdentity(SUNDlsMat A); + +SUNDIALS_DEPRECATED_EXPORT_MSG("use SUNDlsMat_AddIdentity instead") +void AddIdentity(DlsMat A); /* * ----------------------------------------------------------------- - * Function : SetToZero + * Function : SUNDlsMat_SetToZero * ----------------------------------------------------------------- - * SetToZero sets all the elements of the M-by-N matrix A to 0.0. + * SUNDlsMat_SetToZero sets all the elements of the M-by-N matrix A + * to 0.0. * ----------------------------------------------------------------- */ -SUNDIALS_EXPORT void SetToZero(DlsMat A); +SUNDIALS_EXPORT +void SUNDlsMat_SetToZero(SUNDlsMat A); + +SUNDIALS_DEPRECATED_EXPORT_MSG("use SUNDlsMat_SetToZero instead") +void SetToZero(DlsMat A); /* * ----------------------------------------------------------------- - * Functions: PrintMat + * Functions: SUNDlsMat_PrintMat * ----------------------------------------------------------------- * This function prints the M-by-N (dense or band) matrix A to * outfile as it would normally appear on paper. @@ -313,8 +370,11 @@ SUNDIALS_EXPORT void SetToZero(DlsMat A); * ----------------------------------------------------------------- */ -SUNDIALS_EXPORT void PrintMat(DlsMat A, FILE *outfile); +SUNDIALS_EXPORT +void SUNDlsMat_PrintMat(SUNDlsMat A, FILE *outfile); +SUNDIALS_DEPRECATED_EXPORT_MSG("use SUNDlsMat_PrintMat") +void PrintMat(DlsMat A, FILE *outfile); /* * ================================================================== @@ -322,14 +382,49 @@ SUNDIALS_EXPORT void PrintMat(DlsMat A, FILE *outfile); * ================================================================== */ -SUNDIALS_EXPORT realtype **newDenseMat(sunindextype m, sunindextype n); -SUNDIALS_EXPORT realtype **newBandMat(sunindextype n, sunindextype smu, - sunindextype ml); -SUNDIALS_EXPORT void destroyMat(realtype **a); -SUNDIALS_EXPORT int *newIntArray(int n); -SUNDIALS_EXPORT sunindextype *newIndexArray(sunindextype n); -SUNDIALS_EXPORT realtype *newRealArray(sunindextype m); -SUNDIALS_EXPORT void destroyArray(void *v); +SUNDIALS_EXPORT +realtype** SUNDlsMat_newDenseMat(sunindextype m, sunindextype n); + +SUNDIALS_DEPRECATED_EXPORT_MSG("use SUNDlsMat_newDenseMat instead") +realtype** newDenseMat(sunindextype m, sunindextype n); + +SUNDIALS_EXPORT +realtype** SUNDlsMat_newBandMat(sunindextype n, sunindextype smu, + sunindextype ml); + +SUNDIALS_DEPRECATED_EXPORT_MSG("use SUNDlsMat_newBandMat instead") +realtype** newBandMat(sunindextype n, sunindextype smu, + sunindextype ml); + +SUNDIALS_EXPORT +void SUNDlsMat_destroyMat(realtype** a); + +SUNDIALS_DEPRECATED_EXPORT_MSG("use SUNDlsMat_destroyMat instead") +void destroyMat(realtype** a); + +SUNDIALS_EXPORT +int* SUNDlsMat_newIntArray(int n); + +SUNDIALS_DEPRECATED_EXPORT_MSG("use SUNDlsMat_newIntArray instead") +int* newIntArray(int n); + +SUNDIALS_EXPORT +sunindextype* SUNDlsMat_newIndexArray(sunindextype n); + +SUNDIALS_DEPRECATED_EXPORT_MSG("use SUNDlsMat_newIndexArray instead") +sunindextype* newIndexArray(sunindextype n); + +SUNDIALS_EXPORT +realtype* SUNDlsMat_newRealArray(sunindextype m); + +SUNDIALS_DEPRECATED_EXPORT_MSG("use SUNDlsMat_newRealArray instead") + realtype* newRealArray(sunindextype m); + +SUNDIALS_EXPORT +void SUNDlsMat_destroyArray(void* v); + +SUNDIALS_DEPRECATED_EXPORT_MSG("use SUNDlsMat_destroyArray instead") +void destroyArray(void* v); #ifdef __cplusplus diff --git a/inst/include/sundials/sundials_export.h b/inst/include/sundials/sundials_export.h new file mode 100644 index 0000000..c84af70 --- /dev/null +++ b/inst/include/sundials/sundials_export.h @@ -0,0 +1,42 @@ + +#ifndef SUNDIALS_EXPORT_H +#define SUNDIALS_EXPORT_H + +#ifdef SUNDIALS_STATIC_DEFINE +# define SUNDIALS_EXPORT +# define SUNDIALS_NO_EXPORT +#else +# ifndef SUNDIALS_EXPORT +# ifdef sundials_generic_EXPORTS + /* We are building this library */ +# define SUNDIALS_EXPORT __attribute__((visibility("default"))) +# else + /* We are using this library */ +# define SUNDIALS_EXPORT __attribute__((visibility("default"))) +# endif +# endif + +# ifndef SUNDIALS_NO_EXPORT +# define SUNDIALS_NO_EXPORT __attribute__((visibility("hidden"))) +# endif +#endif + +#ifndef SUNDIALS_DEPRECATED +# define SUNDIALS_DEPRECATED __attribute__ ((__deprecated__)) +#endif + +#ifndef SUNDIALS_DEPRECATED_EXPORT +# define SUNDIALS_DEPRECATED_EXPORT SUNDIALS_EXPORT SUNDIALS_DEPRECATED +#endif + +#ifndef SUNDIALS_DEPRECATED_NO_EXPORT +# define SUNDIALS_DEPRECATED_NO_EXPORT SUNDIALS_NO_EXPORT SUNDIALS_DEPRECATED +#endif + +#if 0 /* DEFINE_NO_DEPRECATED */ +# ifndef SUNDIALS_NO_DEPRECATED +# define SUNDIALS_NO_DEPRECATED +# endif +#endif + +#endif /* SUNDIALS_EXPORT_H */ diff --git a/inst/include/sundials/sundials_futils.h b/inst/include/sundials/sundials_futils.h new file mode 100644 index 0000000..e37ec7b --- /dev/null +++ b/inst/include/sundials/sundials_futils.h @@ -0,0 +1,38 @@ +/* ----------------------------------------------------------------- + * Programmer(s): Cody J. Balos + * ----------------------------------------------------------------- + * SUNDIALS Copyright Start + * Copyright (c) 2002-2022, Lawrence Livermore National Security + * and Southern Methodist University. + * All rights reserved. + * + * See the top-level LICENSE and NOTICE files for details. + * + * SPDX-License-Identifier: BSD-3-Clause + * SUNDIALS Copyright End + * ----------------------------------------------------------------- + * SUNDIALS Fortran 2003 interface utility definitions. + * -----------------------------------------------------------------*/ + +#ifndef _SUNDIALS_FUTILS_H +#define _SUNDIALS_FUTILS_H + +#include +#include + +#ifdef __cplusplus /* wrapper to enable C++ usage */ +extern "C" { +#endif + +/* Create a file pointer with the given file name and mode. */ +SUNDIALS_EXPORT FILE* SUNDIALSFileOpen(const char* filename, const char* modes); + +/* Close a file pointer with the given file name. */ +SUNDIALS_EXPORT void SUNDIALSFileClose(FILE* fp); + + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/inst/include/sundials/sundials_hashmap.h b/inst/include/sundials/sundials_hashmap.h new file mode 100644 index 0000000..32da1e9 --- /dev/null +++ b/inst/include/sundials/sundials_hashmap.h @@ -0,0 +1,440 @@ +/* ----------------------------------------------------------------- + * Programmer: Cody J. Balos @ LLNL + * ----------------------------------------------------------------- + * SUNDIALS Copyright Start + * Copyright (c) 2002-2022, Lawrence Livermore National Security + * and Southern Methodist University. + * All rights reserved. + * + * See the top-level LICENSE and NOTICE files for details. + * + * SPDX-License-Identifier: BSD-3-Clause + * SUNDIALS Copyright End + * ----------------------------------------------------------------- + * A simple header-only hashmap implementation for char* keys and + * void* values. Uses linear probing to resolve collisions. + * The values can be anything, but will be freed by + * the hash map upon its destruction. + * -----------------------------------------------------------------*/ + +#ifndef _SUNDIALS_HASHMAP_H +#define _SUNDIALS_HASHMAP_H + +#include +#include + +static const unsigned long HASH_PRIME = 2216829733; /* 14695981039346656037U; */ +static const unsigned long HASH_OFFSET_BASIS = 435; /* 1099511628211U; */ + +/* + For a nice discussion on popular hashing algorithms see: + https://softwareengineering.stackexchange.com/questions/49550/which-hashing-algorithm-is-best-for-uniqueness-and-speed/145633#145633 + + This is a 64-bit implementation of the 'a' modification of the + Fowler–Noll–Vo hash (i.e., FNV1-a). + */ +static unsigned long fnv1a_hash(const char* str) +{ + unsigned long hash = HASH_OFFSET_BASIS; + char c; + while ((c = *str++)) + { + hash = (hash ^ c) * HASH_PRIME; + } + return hash; +} + +typedef struct _SUNHashMapKeyValue* SUNHashMapKeyValue; + +struct _SUNHashMapKeyValue { + const char* key; + void* value; +}; + +typedef struct _SUNHashMap* SUNHashMap; + +struct _SUNHashMap { + int size; /* current number of entries */ + int max_size; /* max number of entries */ + SUNHashMapKeyValue* buckets; +}; + +/* + This function creates a new SUNHashMap object allocated to hold + up to 'max_size' entries. + + **Arguments:** + * ``max_size`` -- the max number of entries in the hashmap + * ``map`` -- on input, a SUNHasMap pointer, on output the SUNHashMap will be + allocated + + **Returns:** + * ``0`` -- success + * ``-1`` -- an error occurred + */ +static int SUNHashMap_New(int max_size, SUNHashMap* map) +{ + int i; + + if (max_size <= 0) + { + return (-1); + } + + *map = NULL; + *map = (SUNHashMap)malloc(sizeof(struct _SUNHashMap)); + + if (map == NULL) + { + return (-1); + } + + (*map)->size = 0; + (*map)->max_size = max_size; + + (*map)->buckets = NULL; + (*map)->buckets = + (SUNHashMapKeyValue*)malloc(max_size * sizeof(SUNHashMapKeyValue)); + + if ((*map)->buckets == NULL) + { + free(*map); + return (-1); + } + + /* Initialize all buckets to NULL */ + for (i = 0; i < max_size; i++) + { + (*map)->buckets[i] = NULL; + } + + return (0); +} + +/* + This function frees the SUNHashMap object. + + **Arguments:** + * ``map`` -- on input, a SUNHasMap pointer, on output the SUNHashMap will be + deallocated and set to ``NULL`` + * ``freevalue`` -- callback function that should free the value object + + **Returns:** + * ``0`` -- success + * ``-1`` -- an error occurred + */ +static int SUNHashMap_Destroy(SUNHashMap* map, void (*freevalue)(void* ptr)) +{ + int i; + + if (map == NULL || freevalue == NULL) + { + return (-1); + } + + for (i = 0; i < (*map)->max_size; i++) + { + if ((*map)->buckets[i] && (*map)->buckets[i]->value) + { + freevalue((*map)->buckets[i]->value); + } + + if ((*map)->buckets[i]) + { + free((*map)->buckets[i]); + } + } + if ((*map)->buckets) + { + free((*map)->buckets); + } + if (*map) + { + free(*map); + } + *map = NULL; + + return (0); +} + +/* + This function iterates the map over the range [start, N]. N is either the + index at which ``yieldfn`` indicates the iteration should stop, or the max + entries in the map. + + **Arguments:** + * ``map`` -- the ``SUNHashMap`` object to operate on + * ``start`` -- the start of the iteration range + * ``yieldfn`` -- the callback function to call every iteration + this should return -1 to continue the iteration, or >= 0 to + stop; the first argument is the current index, the second + argument is the current key-value pair, and the final + argument is the same pointer ``ctx`` as the final argument + to SUNHashMapIterate. + * ``ctx`` -- a pointer to pass on to ``yieldfn`` + + **Returns:** + * ``max_size`` -- iterated the whole map + * ``>=0`` -- the index at which the iteration stopped + * ``<-1`` -- an error occurred + */ +static int SUNHashMap_Iterate(SUNHashMap map, int start, + int (*yieldfn)(int, SUNHashMapKeyValue, void*), + void* ctx) +{ + int i; + + if (map == NULL || yieldfn == NULL) + { + return (-2); + } + + for (i = start; i < map->max_size; i++) + { + int retval = yieldfn(i, map->buckets[i], ctx); + if (retval >= 0) + { + return (retval); /* yieldfn indicates the loop should break */ + } + if (retval < -1) + { + return (retval); /* error occurred */ + } + } + + return (map->max_size); +} + +static int sunHashMapLinearProbeInsert(int idx, SUNHashMapKeyValue kv, void* ctx) +{ + /* find the next open spot */ + if (kv == NULL) + { + return (idx); /* open spot found at idx */ + } + return (-1); /* keep looking */ +} + +/* + This function creates a key-value pair and attempts to insert it into the map. + Will use linear probing if there is a collision. + + **Arguments:** + * ``map`` -- the ``SUNHashMap`` object to operate on + * ``key`` -- the key to store + * ``value`` -- the value associated with the key + + **Returns:** + * ``0`` -- success + * ``-1`` -- an error occurred + * ``-2`` -- the map is full + */ +static int SUNHashMap_Insert(SUNHashMap map, const char* key, void* value) +{ + int idx; + int retval; + SUNHashMapKeyValue kvp; + + if (map == NULL || key == NULL || value == NULL) + { + return (-1); + } + + /* We want the index to be in (0, map->max_size) */ + idx = (int)(fnv1a_hash(key) % map->max_size); + + /* Check if the bucket is already filled */ + if (map->buckets[idx] != NULL) + { + /* Find the next open spot */ + retval = SUNHashMap_Iterate(map, idx, sunHashMapLinearProbeInsert, NULL); + if (retval < 0) + { + return (-1); /* error occurred */ + } + if (retval == map->max_size) + { + return (-2); /* no open entry */ + } + + idx = retval; + } + + /* Create the key-value pair */ + kvp = (SUNHashMapKeyValue)malloc(sizeof(struct _SUNHashMapKeyValue)); + if (kvp == NULL) + { + return (-1); + } + + kvp->key = key; + kvp->value = value; + + /* Insert the key-value pair */ + map->buckets[idx] = kvp; + map->size++; + + return (0); +} + +static int sunHashMapLinearProbeGet(int idx, SUNHashMapKeyValue kv, void* key) +{ + /* target key cannot be NULL */ + if (key == NULL) + { + return (-2); + } + + /* find the matching entry */ + if (kv == NULL) + { + return (-1); /* keep looking since this bucket is empty */ + } + if (!strcmp(kv->key, (const char*)key)) + { + return (idx); /* found it at idx */ + } + return (-1); /* keep looking */ +} + +/* + This function gets the value for the given key. + + **Arguments:** + * ``map`` -- the ``SUNHashMap`` object to operate on + * ``key`` -- the key to look up + * ``value`` -- the value associated with the key + + **Returns:** + * ``0`` -- success + * ``-1`` -- an error occurred + * ``-2`` -- key not found + */ +static int SUNHashMap_GetValue(SUNHashMap map, const char* key, void** value) +{ + int idx; + int retval; + + if (map == NULL || key == NULL || value == NULL) + { + return (-1); + } + + /* We want the index to be in (0, map->max_size) */ + idx = (int)(fnv1a_hash(key) % map->max_size); + + /* Check if the key exists */ + if (map->buckets[idx] == NULL) + { + return (-2); + } + + /* Check to see if this is a collision */ + if (strcmp(map->buckets[idx]->key, key)) + { + /* Keys did not match, so we have a collision and need to probe */ + retval = + SUNHashMap_Iterate(map, idx + 1, sunHashMapLinearProbeGet, (void*)key); + if (retval < 0) + { + return (-1); /* error occurred */ + } + if (retval == map->max_size) + { + return (-2); /* not found */ + } + } + + /* Return a reference to the value only */ + *value = map->buckets[idx]->value; + + return (0); +} + +/* + This function allocates a new array the same max_size as the map, + then it sorts map into a new array of key-value pairs leaving + the map unchanged. + + **Arguments:** + * ``map`` -- the ``SUNHashMap`` object to operate on + * ``sorted`` -- pointer to the sorted array of key-value pairs, this + function will allocate the array + * ``compar`` -- comparator function that is passed to the C standard qsort + function + + **Returns:** + * ``0`` -- success + * ``-1`` -- an error occurred + */ +static int SUNHashMap_Sort(SUNHashMap map, SUNHashMapKeyValue** sorted, + int (*compar)(const void*, const void*)) +{ + int i; + + if (map == NULL || compar == NULL) + { + return (-1); + } + + *sorted = + (SUNHashMapKeyValue*)malloc(map->max_size * sizeof(SUNHashMapKeyValue)); + if (*sorted == NULL) + { + return (-1); + } + + /* Copy the buckets into a new array */ + for (i = 0; i < map->max_size; i++) + { + (*sorted)[i] = map->buckets[i]; + } + + qsort(*sorted, map->max_size, sizeof(SUNHashMapKeyValue), compar); + + return (0); +} + +/* + This function allocates a new array with just they values of the map. + + **Arguments:** + * ``map`` -- the ``SUNHashMap`` object to operate on + * ``values`` -- pointer to the array of keys + * ``value_size`` -- the size of the values in bytes + + **Returns:** + * ``0`` -- success + * ``-1`` -- an error occurred + */ +#if SUNDIALS_MPI_ENABLED +static int SUNHashMap_Values(SUNHashMap map, void*** values, size_t value_size) +{ + int i; + int count = 0; + + if (map == NULL) + { + return (-1); + } + + *values = (void**)malloc(map->size * sizeof(value_size)); + if (values == NULL) + { + return (-1); + } + + /* Copy the values into a new array */ + for (i = 0; i < map->max_size; i++) + { + if (map->buckets[i]) + { + (*values)[count++] = map->buckets[i]->value; + } + } + + return (0); +} +#endif + +#endif diff --git a/inst/include/sundials/sundials_hip.h b/inst/include/sundials/sundials_hip.h new file mode 100644 index 0000000..c2ba37a --- /dev/null +++ b/inst/include/sundials/sundials_hip.h @@ -0,0 +1,73 @@ +/* + * ----------------------------------------------------------------- + * Programmer(s): Cody J. Balos, and Daniel McGreer @ LLNL + * ----------------------------------------------------------------- + * SUNDIALS Copyright Start + * Copyright (c) 2002-2022, Lawrence Livermore National Security + * and Southern Methodist University. + * All rights reserved. + * + * See the top-level LICENSE and NOTICE files for details. + * + * SPDX-License-Identifier: BSD-3-Clause + * SUNDIALS Copyright End + * ----------------------------------------------------------------- + * This header files defines internal utility functions and macros + * for working with HIP. + * ----------------------------------------------------------------- + */ + +#include + +#include + +#include + +#ifndef _SUNDIALS_HIP_H +#define _SUNDIALS_HIP_H + +#ifdef __cplusplus /* wrapper to enable C++ usage */ +extern "C" { +#endif + +/* --------------------------------------------------------------------------- + * Utility macros + * ---------------------------------------------------------------------------*/ + +#define SUNDIALS_HIP_VERIFY(hiperr) SUNDIALS_HIP_Assert(hiperr, __FILE__, __LINE__) + +#define SUNDIALS_KERNEL_NAME(...) __VA_ARGS__ +#ifndef SUNDIALS_DEBUG_HIP_LASTERROR +#define SUNDIALS_LAUNCH_KERNEL(kernel, gridDim, blockDim, shMem, stream, ...) \ +{ kernel<<>>(__VA_ARGS__); } +#else +#define SUNDIALS_LAUNCH_KERNEL(kernel, gridDim, blockDim, shMem, stream, ...) \ +{ \ + kernel<<>>(__VA_ARGS__); \ + hipDeviceSynchronize(); \ + SUNDIALS_HIP_VERIFY(hipGetLastError()); \ +} +#endif + +/* --------------------------------------------------------------------------- + * Utility functions + * ---------------------------------------------------------------------------*/ +inline booleantype SUNDIALS_HIP_Assert(hipError_t hiperr, const char *file, int line) +{ + if (hiperr != hipSuccess) + { +#ifdef SUNDIALS_DEBUG + fprintf(stderr, + "ERROR in HIP runtime operation: %s %s:%d\n", + hipGetErrorString(hiperr), file, line); +#endif + return SUNFALSE; /* Assert failed */ + } + return SUNTRUE; /* Assert OK */ +} + +#ifdef __cplusplus /* wrapper to enable C++ usage */ +} +#endif + +#endif /* _SUNDIALS_HIP_H */ diff --git a/inst/include/sundials/sundials_hip_policies.hpp b/inst/include/sundials/sundials_hip_policies.hpp new file mode 100644 index 0000000..05579ba --- /dev/null +++ b/inst/include/sundials/sundials_hip_policies.hpp @@ -0,0 +1,238 @@ +/* + * ----------------------------------------------------------------- + * Programmer(s): Cody J. Balos @ LLNL + * ----------------------------------------------------------------- + * SUNDIALS Copyright Start + * Copyright (c) 2002-2022, Lawrence Livermore National Security + * and Southern Methodist University. + * All rights reserved. + * + * See the top-level LICENSE and NOTICE files for details. + * + * SPDX-License-Identifier: BSD-3-Clause + * SUNDIALS Copyright End + * ----------------------------------------------------------------- + * This header files defines the ExecPolicy classes which + * are utilized to determine HIP kernel launch parameters. + * ----------------------------------------------------------------- + */ + +#ifndef _SUNDIALS_HIPEXECPOLICIES_HPP +#define _SUNDIALS_HIPEXECPOLICIES_HPP + +#include +#include + +#include + +namespace sundials +{ +namespace hip +{ + +#if defined(__HIP_PLATFORM_HCC__) +constexpr const sunindextype WARP_SIZE = 64; +#elif defined(__HIP_PLATFORM_NVCC__) +constexpr const sunindextype WARP_SIZE = 32; +#endif +constexpr const sunindextype MAX_BLOCK_SIZE = 1024; +constexpr const sunindextype MAX_WARPS = MAX_BLOCK_SIZE / WARP_SIZE; + +class ExecPolicy +{ +public: + ExecPolicy(hipStream_t stream = 0) : stream_(stream) { } + virtual size_t gridSize(size_t numWorkUnits = 0, size_t blockDim = 0) const = 0; + virtual size_t blockSize(size_t numWorkUnits = 0, size_t gridDim = 0) const = 0; + virtual const hipStream_t* stream() const { return (&stream_); } + virtual ExecPolicy* clone() const = 0; + ExecPolicy* clone_new_stream(hipStream_t stream) const { + ExecPolicy* ex = clone(); + ex->stream_ = stream; + return ex; + } + virtual bool atomic() const { return false; } + virtual ~ExecPolicy() {} +protected: + hipStream_t stream_; +}; + + +/* + * A kernel execution policy that maps each thread to a work unit. + * The number of threads per block (blockSize) can be set to anything. + * The grid size will be chosen so that there are enough threads for one + * thread per element. If a stream is provided, it will be used to + * execute the kernel. + */ +class ThreadDirectExecPolicy : public ExecPolicy +{ +public: + ThreadDirectExecPolicy(const size_t blockDim, hipStream_t stream = 0) + : blockDim_(blockDim), ExecPolicy(stream) + {} + + ThreadDirectExecPolicy(const ThreadDirectExecPolicy& ex) + : blockDim_(ex.blockDim_), ExecPolicy(ex.stream_) + {} + + virtual size_t gridSize(size_t numWorkUnits = 0, size_t /*blockDim*/ = 0) const + { + /* ceil(n/m) = floor((n + m - 1) / m) */ + return (numWorkUnits + blockSize() - 1) / blockSize(); + } + + virtual size_t blockSize(size_t /*numWorkUnits*/ = 0, size_t /*gridDim*/ = 0) const + { + return blockDim_; + } + + virtual ExecPolicy* clone() const + { + return static_cast(new ThreadDirectExecPolicy(*this)); + } + +private: + const size_t blockDim_; +}; + +/* + * A kernel execution policy for kernels that use grid stride loops. + * The number of threads per block (blockSize) can be set to anything. + * The number of blocks (gridSize) can be set to anything. If a stream + * is provided, it will be used to execute the kernel. + */ +class GridStrideExecPolicy : public ExecPolicy +{ +public: + GridStrideExecPolicy(const size_t blockDim, const size_t gridDim, hipStream_t stream = 0) + : blockDim_(blockDim), gridDim_(gridDim), ExecPolicy(stream) + {} + + GridStrideExecPolicy(const GridStrideExecPolicy& ex) + : blockDim_(ex.blockDim_), gridDim_(ex.gridDim_), ExecPolicy(ex.stream_) + {} + + virtual size_t gridSize(size_t /*numWorkUnits*/ = 0, size_t /*blockDim*/ = 0) const + { + return gridDim_; + } + + virtual size_t blockSize(size_t /*numWorkUnits*/ = 0, size_t /*gridDim*/ = 0) const + { + return blockDim_; + } + + virtual ExecPolicy* clone() const + { + return static_cast(new GridStrideExecPolicy(*this)); + } + +private: + const size_t blockDim_; + const size_t gridDim_; +}; + +/* + * A kernel execution policy for performing a reduction across indvidual thread + * blocks. The number of threads per block (blockSize) can be set to any valid + * multiple of the HIP warp size. The number of blocks (gridSize) can be set to + * any value greater or equal to 0. If it is set to 0, then the grid size will + * be chosen so that there are at most two work units per thread. If a stream is + * provided, it will be used to execute the kernel. + */ + +class BlockReduceAtomicExecPolicy : public ExecPolicy +{ +public: + BlockReduceAtomicExecPolicy(const size_t blockDim, const size_t gridDim = 0, hipStream_t stream = 0) + : blockDim_(blockDim), gridDim_(gridDim), ExecPolicy(stream) + { + if (blockDim < 1 || blockDim % WARP_SIZE) + { + throw std::invalid_argument("the block size must be a multiple of the HIP warp size"); + } + } + + BlockReduceAtomicExecPolicy(const BlockReduceAtomicExecPolicy& ex) + : blockDim_(ex.blockDim_), gridDim_(ex.gridDim_), ExecPolicy(ex.stream_) + {} + + virtual size_t gridSize(size_t numWorkUnits = 0, size_t /*blockDim*/ = 0) const + { + if (gridDim_ == 0) + { + return (numWorkUnits + (blockSize() * 2 - 1)) / (blockSize() * 2); + } + return gridDim_; + } + + virtual size_t blockSize(size_t /*numWorkUnits*/ = 0, size_t /*gridDim*/ = 0) const + { + return blockDim_; + } + + virtual ExecPolicy* clone() const + { + return static_cast(new BlockReduceAtomicExecPolicy(*this)); + } + + virtual bool atomic() const { return true; } + +private: + const size_t blockDim_; + const size_t gridDim_; +}; + +class BlockReduceExecPolicy : public ExecPolicy +{ +public: + BlockReduceExecPolicy(const size_t blockDim, const size_t gridDim = 0, hipStream_t stream = 0) + : blockDim_(blockDim), gridDim_(gridDim), ExecPolicy(stream) + { + if (blockDim < 1 || blockDim % WARP_SIZE) + { + throw std::invalid_argument("the block size must be a multiple of the HIP warp size"); + } + } + + BlockReduceExecPolicy(const BlockReduceExecPolicy& ex) + : blockDim_(ex.blockDim_), gridDim_(ex.gridDim_), ExecPolicy(ex.stream_) + {} + + virtual size_t gridSize(size_t numWorkUnits = 0, size_t /*blockDim*/ = 0) const + { + if (gridDim_ == 0) + { + return (numWorkUnits + (blockSize() * 2 - 1)) / (blockSize() * 2); + } + return gridDim_; + } + + virtual size_t blockSize(size_t /*numWorkUnits*/ = 0, size_t /*gridDim*/ = 0) const + { + return blockDim_; + } + + virtual ExecPolicy* clone() const + { + return static_cast(new BlockReduceExecPolicy(*this)); + } + + bool atomic() const { return false; } + +private: + const size_t blockDim_; + const size_t gridDim_; +}; + +} // namespace hip +} // namespace sundials + +typedef sundials::hip::ExecPolicy SUNHipExecPolicy; +typedef sundials::hip::ThreadDirectExecPolicy SUNHipThreadDirectExecPolicy; +typedef sundials::hip::GridStrideExecPolicy SUNHipGridStrideExecPolicy; +typedef sundials::hip::BlockReduceExecPolicy SUNHipBlockReduceExecPolicy; +typedef sundials::hip::BlockReduceAtomicExecPolicy SUNHipBlockReduceAtomicExecPolicy; + +#endif diff --git a/inst/include/sundials/sundials_iterative.h b/inst/include/sundials/sundials_iterative.h index 87e9493..071eb5a 100644 --- a/inst/include/sundials/sundials_iterative.h +++ b/inst/include/sundials/sundials_iterative.h @@ -1,8 +1,9 @@ /* ----------------------------------------------------------------- * Programmer(s): Scott D. Cohen and Alan C. Hindmarsh @ LLNL + * Shelby Lockhart @ LLNL * ----------------------------------------------------------------- * SUNDIALS Copyright Start - * Copyright (c) 2002-2019, Lawrence Livermore National Security + * Copyright (c) 2002-2022, Lawrence Livermore National Security * and Southern Methodist University. * All rights reserved. * @@ -17,13 +18,13 @@ * The function type declarations give the prototypes for the * functions to be called within an iterative linear solver, that * are responsible for - * multiplying A by a given vector v (ATimesFn), - * setting up a preconditioner P (PSetupFn), and - * solving the preconditioner equation Pz = r (PSolveFn). + * multiplying A by a given vector v (SUNATimesFn), + * setting up a preconditioner P (SUNPSetupFn), and + * solving the preconditioner equation Pz = r (SUNPSolveFn). * -----------------------------------------------------------------*/ -#ifndef _ITERATIVE_H -#define _ITERATIVE_H +#ifndef _SUNDIALS_ITERATIVE_H +#define _SUNDIALS_ITERATIVE_H #include @@ -36,71 +37,82 @@ extern "C" { * ----------------------------------------------------------------- * enum : types of preconditioning * ----------------------------------------------------------------- - * PREC_NONE : The iterative linear solver should not use - * preconditioning. + * SUN_PREC_NONE : The iterative linear solver should not use + * preconditioning. * - * PREC_LEFT : The iterative linear solver uses preconditioning on - * the left only. + * SUN_PREC_LEFT : The iterative linear solver uses preconditioning + * on the left only. * - * PREC_RIGHT : The iterative linear solver uses preconditioning on - * the right only. + * SUN_PREC_RIGHT : The iterative linear solver uses preconditioning + * on the right only. * - * PREC_BOTH : The iterative linear solver uses preconditioning on - * both the left and the right. + * SUN_PREC_BOTH : The iterative linear solver uses preconditioning + * on both the left and the right. * ----------------------------------------------------------------- */ +/* DEPRECATED PREC_NONE: use SUN_PREC_NONE */ +/* DEPRECATED PREC_LEFT: use SUN_PREC_LEFT */ +/* DEPRECATED PREC_RIGHT: use SUN_PREC_RIGHT */ +/* DEPRECATED PREC_BOTH: use SUN_PREC_BOTH */ enum { PREC_NONE, PREC_LEFT, PREC_RIGHT, PREC_BOTH }; +enum { SUN_PREC_NONE, SUN_PREC_LEFT, SUN_PREC_RIGHT, SUN_PREC_BOTH }; /* * ----------------------------------------------------------------- * enum : types of Gram-Schmidt routines * ----------------------------------------------------------------- - * MODIFIED_GS : The iterative solver uses the modified - * Gram-Schmidt routine ModifiedGS listed in this - * file. + * SUN_MODIFIED_GS : The iterative solver uses the modified + * Gram-Schmidt routine SUNModifiedGS listed in + * this file. * - * CLASSICAL_GS : The iterative solver uses the classical - * Gram-Schmidt routine ClassicalGS listed in this - * file. + * SUN_CLASSICAL_GS : The iterative solver uses the classical + * Gram-Schmidt routine SUNClassicalGS listed in + * this file. * ----------------------------------------------------------------- */ +/* DEPRECATED MODIFIED_GS: use SUN_MODIFIED_GS */ +/* DEPRECATED CLASSICAL_GS: use SUN_CLASSICAL_GS */ enum { MODIFIED_GS = 1, CLASSICAL_GS = 2 }; +enum { SUN_MODIFIED_GS = 1, SUN_CLASSICAL_GS = 2 }; /* * ----------------------------------------------------------------- - * Type: ATimesFn + * Type: SUNATimesFn * ----------------------------------------------------------------- - * An ATimesFn multiplies Av and stores the result in z. The + * An SUNATimesFn multiplies Av and stores the result in z. The * caller is responsible for allocating memory for the z vector. * The parameter A_data is a pointer to any information about A * which the function needs in order to do its job. The vector v - * is unchanged. An ATimesFn returns 0 if successful and a + * is unchanged. An SUNATimesFn returns 0 if successful and a * non-zero value if unsuccessful. * ----------------------------------------------------------------- */ +/* DEPRECATED ATimesFn: use SUNATimesFn */ typedef int (*ATimesFn)(void *A_data, N_Vector v, N_Vector z); +typedef int (*SUNATimesFn)(void *A_data, N_Vector v, N_Vector z); /* * ----------------------------------------------------------------- - * Type: PSetupFn + * Type: SUNPSetupFn * ----------------------------------------------------------------- - * A PSetupFn is an integrator-supplied routine that accesses data - * stored in the integrator memory structure (P_data), and calls - * the user-supplied, integrator-specific preconditioner setup - * routine. + * A SUNPSetupFn is an integrator-supplied routine that accesses data + * stored in the integrator memory structure (P_data), and calls the + * user-supplied, integrator-specific preconditioner setup routine. * ----------------------------------------------------------------- */ +/* DEPRECATED PSetupFn: use SUNPSetupFn */ typedef int (*PSetupFn)(void *P_data); +typedef int (*SUNPSetupFn)(void *P_data); /* * ----------------------------------------------------------------- - * Type: PSolveFn + * Type: SUNPSolveFn * ----------------------------------------------------------------- - * A PSolveFn solves the preconditioner equation Pz = r for the + * A SUNPSolveFn solves the preconditioner equation Pz = r for the * vector z. The caller is responsible for allocating memory for * the z vector. The parameter P_data is a pointer to any * information about P which the function needs in order to do @@ -113,8 +125,8 @@ typedef int (*PSetupFn)(void *P_data); * || Pz - r ||_wrms < tol * where the weight vector for the WRMS norm may be accessed from * the main integrator memory structure. - * The vector r should not be modified by the PSolveFn. - * A PSolveFn returns 0 if successful and a non-zero value if + * The vector r should not be modified by the SUNPSolveFn. + * A SUNPSolveFn returns 0 if successful and a non-zero value if * unsuccessful. On a failure, a negative return value indicates * an unrecoverable condition, while a positive value indicates * a recoverable one, in which the calling routine may reattempt @@ -122,14 +134,44 @@ typedef int (*PSetupFn)(void *P_data); * ----------------------------------------------------------------- */ +/* DEPRECATED PSolveFn: use SUNPSolveFn */ typedef int (*PSolveFn)(void *P_data, N_Vector r, N_Vector z, realtype tol, int lr); +typedef int (*SUNPSolveFn)(void *P_data, N_Vector r, N_Vector z, + realtype tol, int lr); /* * ----------------------------------------------------------------- - * Function: ModifiedGS + * Type: SUNQRAddFn * ----------------------------------------------------------------- - * ModifiedGS performs a modified Gram-Schmidt orthogonalization + * A QRAddFn updates a given QR factorization defined by the input + * parameters: + * Q : N_Vector * + * R : realtype * + * with the input vector + * f : N_Vector + * + * Additional input parameters include: + * + * m : (int) the number of vectors already in the QR factorization + * + * mMax : (int) the maximum number of vectors to be in the QR + * factorization (the number of N_Vectors allocated to be in Q) + * + * SUNQR_data : (void *) a structure containing any additional inputs + * required for the execution of QRAddFn + * + * ----------------------------------------------------------------- +*/ + +typedef int (*SUNQRAddFn)(N_Vector *Q, realtype *R, N_Vector f, + int m, int mMax, void *QR_data); + +/* + * ----------------------------------------------------------------- + * Function: SUNModifiedGS + * ----------------------------------------------------------------- + * SUNModifiedGS performs a modified Gram-Schmidt orthogonalization * of the N_Vector v[k] against the p unit N_Vectors at * v[k-1], v[k-2], ..., v[k-p]. * @@ -152,47 +194,58 @@ typedef int (*PSolveFn)(void *P_data, N_Vector r, N_Vector z, * new_vk_norm is a pointer to memory allocated by the caller to * hold the Euclidean norm of the orthogonalized vector v[k]. * - * If (k-p) < 0, then ModifiedGS uses p=k. The orthogonalized + * If (k-p) < 0, then SUNModifiedGS uses p=k. The orthogonalized * v[k] is NOT normalized and is stored over the old v[k]. Once * the orthogonalization has been performed, the Euclidean norm * of v[k] is stored in (*new_vk_norm). * - * ModifiedGS returns 0 to indicate success. It cannot fail. + * SUNModifiedGS returns 0 to indicate success. It cannot fail. * ----------------------------------------------------------------- */ -SUNDIALS_EXPORT int ModifiedGS(N_Vector* v, realtype **h, int k, int p, - realtype *new_vk_norm); +SUNDIALS_EXPORT +int SUNModifiedGS(N_Vector* v, realtype **h, int k, int p, + realtype *new_vk_norm); + +SUNDIALS_DEPRECATED_EXPORT_MSG("use SUNModifiedGS instead") +int ModifiedGS(N_Vector* v, realtype **h, int k, int p, + realtype *new_vk_norm); /* * ----------------------------------------------------------------- - * Function: ClassicalGS + * Function: SUNClassicalGS * ----------------------------------------------------------------- - * ClassicalGS performs a classical Gram-Schmidt + * SUNClassicalGS performs a classical Gram-Schmidt * orthogonalization of the N_Vector v[k] against the p unit * N_Vectors at v[k-1], v[k-2], ..., v[k-p]. The parameters v, h, * k, p, and new_vk_norm are as described in the documentation - * for ModifiedGS. + * for SUNModifiedGS. * * stemp is a length k+1 array of realtype which can be used as - * workspace by the ClassicalGS routine. + * workspace by the SUNClassicalGS routine. * * vtemp is an N_Vector array of k+1 vectors which can be used as - * workspace by the ClassicalGS routine. + * workspace by the SUNClassicalGS routine. * - * ClassicalGS returns 0 to indicate success. + * SUNClassicalGS returns 0 to indicate success. * ----------------------------------------------------------------- */ -SUNDIALS_EXPORT int ClassicalGS(N_Vector* v, realtype **h, int k, int p, - realtype *new_vk_norm, realtype *stemp, - N_Vector* vtemp); +SUNDIALS_EXPORT +int SUNClassicalGS(N_Vector* v, realtype **h, int k, int p, + realtype *new_vk_norm, realtype *stemp, + N_Vector* vtemp); + +SUNDIALS_DEPRECATED_EXPORT_MSG("use SUNClassicalGS instead") +int ClassicalGS(N_Vector* v, realtype **h, int k, int p, + realtype *new_vk_norm, realtype *stemp, + N_Vector* vtemp); /* * ----------------------------------------------------------------- - * Function: QRfact + * Function: SUNQRfact * ----------------------------------------------------------------- - * QRfact performs a QR factorization of the Hessenberg matrix H. + * SUNQRfact performs a QR factorization of the Hessenberg matrix H. * * n is the problem size; the matrix H is (n+1) by n. * @@ -211,50 +264,226 @@ SUNDIALS_EXPORT int ClassicalGS(N_Vector* v, realtype **h, int k, int p, * n-1 columns of h have already been factored and only the last * column needs to be updated. * - * QRfact returns 0 if successful. If a zero is encountered on - * the diagonal of the triangular factor R, then QRfact returns + * SUNQRfact returns 0 if successful. If a zero is encountered on + * the diagonal of the triangular factor R, then SUNQRfact returns * the equation number of the zero entry, where the equations are - * numbered from 1, not 0. If QRsol is subsequently called in + * numbered from 1, not 0. If SUNQRsol is subsequently called in * this situation, it will return an error because it could not * divide by the zero diagonal entry. * ----------------------------------------------------------------- */ -SUNDIALS_EXPORT int QRfact(int n, realtype **h, realtype *q, int job); +SUNDIALS_EXPORT +int SUNQRfact(int n, realtype **h, realtype *q, int job); + +SUNDIALS_DEPRECATED_EXPORT_MSG("use SUNQRFact instead") +int QRfact(int n, realtype **h, realtype *q, int job); /* * ----------------------------------------------------------------- - * Function: QRsol + * Function: SUNQRsol * ----------------------------------------------------------------- - * QRsol solves the linear least squares problem + * SUNQRsol solves the linear least squares problem * * min (b - H*x, b - H*x), x in R^n, * * where H is a Hessenberg matrix, and b is in R^(n+1). - * It uses the QR factors of H computed by QRfact. + * It uses the QR factors of H computed by SUNQRfact. * * n is the problem size; the matrix H is (n+1) by n. * - * h is a matrix (computed by QRfact) containing the upper + * h is a matrix (computed by SUNQRfact) containing the upper * triangular factor R of the original Hessenberg matrix H. * - * q is an array of length 2*n (computed by QRfact) containing + * q is an array of length 2*n (computed by SUNQRfact) containing * the Givens rotations used to factor H. * * b is the (n+1)-vector appearing in the least squares problem * above. * * On return, b contains the solution x of the least squares - * problem, if QRsol was successful. + * problem, if SUNQRsol was successful. * - * QRsol returns a 0 if successful. Otherwise, a zero was + * SUNQRsol returns a 0 if successful. Otherwise, a zero was * encountered on the diagonal of the triangular factor R. - * In this case, QRsol returns the equation number (numbered + * In this case, SUNQRsol returns the equation number (numbered * from 1, not 0) of the zero entry. * ----------------------------------------------------------------- */ -SUNDIALS_EXPORT int QRsol(int n, realtype **h, realtype *q, realtype *b); +SUNDIALS_EXPORT +int SUNQRsol(int n, realtype **h, realtype *q, realtype *b); + +SUNDIALS_DEPRECATED_EXPORT_MSG("use SUNQRsol instead") +int QRsol(int n, realtype **h, realtype *q, realtype *b); + +/* + * ----------------------------------------------------------------- + * Function: SUNQRAdd_MGS + * ----------------------------------------------------------------- + * SUNQRAdd_MGS uses Modified Gram Schmidt to update the QR factorization + * stored in user inputs + * - N_Vector *Q + * - realtype *R + * to include the orthonormalized vector input by + * - N_Vector df. + * + * Additional input parameters include: + * + * m : (int) current number of vectors in QR factorization + * + * mMax : (int) maximum number of vectors that will be in the QR + * factorization (the allocated number of N_Vectors in Q) + * + * QRdata : (void *) a struct containing any additional temporary + * vectors or arrays required for the QRAdd routine + * + * On return, Q and R contain the updated Q R factors, if + * SUNQRAdd_MGS was successful. + * + * SUNQRAdd_MGS returns a 0 if successful. + * ----------------------------------------------------------------- + */ + +SUNDIALS_EXPORT +int SUNQRAdd_MGS(N_Vector *Q, realtype *R, N_Vector df, + int m, int mMax, void *QRdata); + +/* + * ----------------------------------------------------------------- + * Function: SUNQRAdd_ICWY + * ----------------------------------------------------------------- + * SUNQRAdd_ICWY uses the Inverse Compact WY Modified Gram Schmidt + * method to update the QR factorization stored in user inputs + * - N_Vector *Q + * - realtype *R + * - realtype *T (held within (void *) QRdata) + * to include the orthonormalized vector input by + * - N_Vector df. + * where the factorization to be updated is of the form + * Q * T * R + * + * Additional input parameters include: + * + * m : (int) current number of vectors in QR factorization + * + * mMax : (int) maximum number of vectors that will be in the QR + * factorization (the allocated number of N_Vectors in Q) + * + * QRdata : (void *) a struct containing any additional temporary + * vectors or arrays required for the QRAdd routine + * + * QRdata should contain : + * N_Vector vtemp, realtype *temp_array (this will be used for T) + * + * On return, Q, R, and T contain the updated Q T R factors, if + * SUNQRAdd_ICWY was successful. + * + * SUNQRAdd_ICWY returns a 0 if successful. + * ----------------------------------------------------------------- + */ + +SUNDIALS_EXPORT +int SUNQRAdd_ICWY(N_Vector *Q, realtype *R, N_Vector df, + int m, int mMax, void *QRdata); + +/* + * ----------------------------------------------------------------- + * Function: SUNQRAdd_ICWY_SB + * ----------------------------------------------------------------- + * The same function as SUNQRAdd_ICWY but using a single buffer + * for global reductions. + * ----------------------------------------------------------------- + */ + +SUNDIALS_EXPORT +int SUNQRAdd_ICWY_SB(N_Vector *Q, realtype *R, N_Vector df, + int m, int mMax, void *QRdata); + +/* + * ----------------------------------------------------------------- + * Function: SUNQRAdd_CGS2 + * ----------------------------------------------------------------- + * SUNQRAdd_CGS2 uses a Classical Gram Schmidt with Reorthogonalization + * formulation to update the QR factorization stored in user inputs + * - N_Vector *Q + * - realtype *R + * to include the orthonormalized vector input by + * - N_Vector df. + * + * Additional input parameters include: + * + * m : (int) current number of vectors in QR factorization + * + * mMax : (int) maximum number of vectors that will be in the QR + * factorization (the allocated number of N_Vectors in Q) + * + * QRdata : (void *) a struct containing any additional temporary + * vectors or arrays required for the QRAdd routine + * + * QRdata should contain : + * N_Vector vtemp, N_Vector vtemp2, realtype *temp_array + * + * On return, Q and R contain the updated Q R factors, if + * SUNQRAdd_CGS2 was successful. + * + * SUNQRAdd_CGS2 returns a 0 if successful. + * ----------------------------------------------------------------- + */ + +SUNDIALS_EXPORT +int SUNQRAdd_CGS2(N_Vector *Q, realtype *R, N_Vector df, + int m, int mMax, void *QRdata); + +/* + * ----------------------------------------------------------------- + * Function: SUNQRAdd_DCGS2 + * ----------------------------------------------------------------- + * SUNQRAdd_DCGS2 uses a Classical Gram Schmidt with Reorthogonalization + * formulation that delays reorthogonlization (for the purpose of + * reducing number of inner products) to update the QR factorization + * stored in user inputs + * - N_Vector *Q + * - realtype *R + * to include the orthonormalized vector input by + * - N_Vector df. + * + * Additional input parameters include: + * + * m : (int) current number of vectors in QR factorization + * + * mMax : (int) maximum number of vectors that will be in the QR + * factorization (the allocated number of N_Vectors in Q) + * + * QRdata : (void *) a struct containing any additional temporary + * vectors or arrays required for the QRAdd routine + * + * QRdata should contain : + * N_Vector vtemp, N_Vector vtemp2, realtype *temp_array + * + * On return, Q and R contain the updated Q R factors, if + * SUNQRAdd_DCGS2 was successful. + * + * SUNQRAdd_DCGS2 returns a 0 if successful. Otherwise,.... + * ----------------------------------------------------------------- + */ + +SUNDIALS_EXPORT +int SUNQRAdd_DCGS2(N_Vector *Q, realtype *R, N_Vector df, + int m, int mMax, void *QRdata); + +/* + * ----------------------------------------------------------------- + * Function: SUNQRAdd_DCGS2_SB + * ----------------------------------------------------------------- + * The same function as SUNQRAdd_DCGS2 but using a single buffer + * for global reductions. + * ----------------------------------------------------------------- + */ + +SUNDIALS_EXPORT +int SUNQRAdd_DCGS2_SB(N_Vector *Q, realtype *R, N_Vector df, + int m, int mMax, void *QRdata); #ifdef __cplusplus } diff --git a/inst/include/sundials/sundials_iterative_impl.h b/inst/include/sundials/sundials_iterative_impl.h new file mode 100644 index 0000000..fed1c72 --- /dev/null +++ b/inst/include/sundials/sundials_iterative_impl.h @@ -0,0 +1,35 @@ +/* ----------------------------------------------------------------------------- + * Programmer(s): David J. Gardner and Shelby Lockhart @ LLNL + * ----------------------------------------------------------------------------- + * SUNDIALS Copyright Start + * Copyright (c) 2002-2022, Lawrence Livermore National Security + * and Southern Methodist University. + * All rights reserved. + * + * See the top-level LICENSE and NOTICE files for details. + * + * SPDX-License-Identifier: BSD-3-Clause + * SUNDIALS Copyright End + * ----------------------------------------------------------------------------- + * This is the implementation header file for SUNDIALS functions used by + * different iterative solvers. + * ---------------------------------------------------------------------------*/ + +#include + +/* ----------------------------------------------------------------------------- + * Type: SUNQRData + * ----------------------------------------------------------------------------- + * A SUNQRData struct holds temporary workspace vectors and realtype arrays for + * a SUNQRAddFn. The N_Vectors and realtype arrays it contains are created by + * the routine calling a SUNQRAdd function. + * ---------------------------------------------------------------------------*/ + +typedef struct _SUNQRData *SUNQRData; + +struct _SUNQRData +{ + N_Vector vtemp; + N_Vector vtemp2; + realtype *temp_array; +}; diff --git a/inst/include/sundials/sundials_lapack.h b/inst/include/sundials/sundials_lapack.h index 886fecb..8d1b6da 100644 --- a/inst/include/sundials/sundials_lapack.h +++ b/inst/include/sundials/sundials_lapack.h @@ -1,13 +1,9 @@ -/* - * ----------------------------------------------------------------- - * $Revision$ - * $Date$ - * ----------------------------------------------------------------- +/* ----------------------------------------------------------------- * Programmer: Radu Serban @ LLNL * Daniel Reynolds @ SMU * ----------------------------------------------------------------- * SUNDIALS Copyright Start - * Copyright (c) 2002-2019, Lawrence Livermore National Security + * Copyright (c) 2002-2022, Lawrence Livermore National Security * and Southern Methodist University. * All rights reserved. * @@ -18,12 +14,15 @@ * ----------------------------------------------------------------- * This is the header file for a generic package of direct matrix * operations for use with BLAS/LAPACK. - * ----------------------------------------------------------------- - */ + * -----------------------------------------------------------------*/ #ifndef _SUNDIALS_LAPACK_H #define _SUNDIALS_LAPACK_H +#include + +#warning "This header file is deprecated and will be removed in SUNDIALS v7.0.0" + #ifdef __cplusplus /* wrapper to enable C++ usage */ extern "C" { #endif @@ -106,100 +105,150 @@ extern "C" { /* Level-1 BLAS */ -extern void dcopy_f77(int *n, const double *x, const int *inc_x, double *y, const int *inc_y); -extern void dscal_f77(int *n, const double *alpha, double *x, const int *inc_x); +extern void dcopy_f77(sunindextype *n, const double *x, + const sunindextype *inc_x, double *y, + const sunindextype *inc_y); + +extern void dscal_f77(sunindextype *n, const double *alpha, double *x, + const sunindextype *inc_x); + +extern void scopy_f77(sunindextype *n, const float *x, + const sunindextype *inc_x, float *y, + const sunindextype *inc_y); -extern void scopy_f77(int *n, const float *x, const int *inc_x, float *y, const int *inc_y); -extern void sscal_f77(int *n, const float *alpha, float *x, const int *inc_x); +extern void sscal_f77(sunindextype *n, const float *alpha, float *x, + const sunindextype *inc_x); /* Level-2 BLAS */ -extern void dgemv_f77(const char *trans, int *m, int *n, const double *alpha, const double *a, - int *lda, const double *x, int *inc_x, const double *beta, double *y, int *inc_y, - int len_trans); +extern void dgemv_f77(const char *trans, sunindextype *m, sunindextype *n, + const double *alpha, const double *a, sunindextype *lda, + const double *x, sunindextype *inc_x, const double *beta, + double *y, sunindextype *inc_y); -extern void dtrsv_f77(const char *uplo, const char *trans, const char *diag, const int *n, - const double *a, const int *lda, double *x, const int *inc_x, - int len_uplo, int len_trans, int len_diag); +extern void dtrsv_f77(const char *uplo, const char *trans, const char *diag, + const sunindextype *n, const double *a, + const sunindextype *lda, double *x, + const sunindextype *inc_x); -extern void sgemv_f77(const char *trans, int *m, int *n, const float *alpha, const float *a, - int *lda, const float *x, int *inc_x, const float *beta, float *y, int *inc_y, - int len_trans); +extern void sgemv_f77(const char *trans, sunindextype *m, sunindextype *n, + const float *alpha, const float *a, sunindextype *lda, + const float *x, sunindextype *inc_x, const float *beta, + float *y, sunindextype *inc_y); -extern void strsv_f77(const char *uplo, const char *trans, const char *diag, const int *n, - const float *a, const int *lda, float *x, const int *inc_x, - int len_uplo, int len_trans, int len_diag); +extern void strsv_f77(const char *uplo, const char *trans, const char *diag, + const sunindextype *n, const float *a, + const sunindextype *lda, float *x, + const sunindextype *inc_x); /* Level-3 BLAS */ -extern void dsyrk_f77(const char *uplo, const char *trans, const int *n, const int *k, - const double *alpha, const double *a, const int *lda, const double *beta, - const double *c, const int *ldc, int len_uplo, int len_trans); +extern void dsyrk_f77(const char *uplo, const char *trans, + const sunindextype *n, const sunindextype *k, + const double *alpha, const double *a, + const sunindextype *lda, const double *beta, + const double *c, const sunindextype *ldc); -extern void ssyrk_f77(const char *uplo, const char *trans, const int *n, const int *k, - const float *alpha, const float *a, const int *lda, const float *beta, - const float *c, const int *ldc, int len_uplo, int len_trans); +extern void ssyrk_f77(const char *uplo, const char *trans, + const sunindextype *n, const sunindextype *k, + const float *alpha, const float *a, + const sunindextype *lda, const float *beta, + const float *c, const sunindextype *ldc); /* LAPACK */ -extern void dgbtrf_f77(const int *m, const int *n, const int *kl, const int *ku, - double *ab, int *ldab, int *ipiv, int *info); +extern void dgbtrf_f77(const sunindextype *m, const sunindextype *n, + const sunindextype *kl, const sunindextype *ku, + double *ab, sunindextype *ldab, sunindextype *ipiv, + sunindextype *info); -extern void dgbtrs_f77(const char *trans, const int *n, const int *kl, const int *ku, const int *nrhs, - double *ab, const int *ldab, int *ipiv, double *b, const int *ldb, - int *info, int len_trans); +extern void dgbtrs_f77(const char *trans, const sunindextype *n, + const sunindextype *kl, const sunindextype *ku, + const sunindextype *nrhs, double *ab, + const sunindextype *ldab, sunindextype *ipiv, + double *b, const sunindextype *ldb, sunindextype *info); -extern void dgeqp3_f77(const int *m, const int *n, double *a, const int *lda, int *jpvt, double *tau, - double *work, const int *lwork, int *info); +extern void dgeqp3_f77(const sunindextype *m, const sunindextype *n, double *a, + const sunindextype *lda, sunindextype *jpvt, double *tau, + double *work, const sunindextype *lwork, + sunindextype *info); -extern void dgeqrf_f77(const int *m, const int *n, double *a, const int *lda, double *tau, double *work, - const int *lwork, int *info); +extern void dgeqrf_f77(const sunindextype *m, const sunindextype *n, double *a, + const sunindextype *lda, double *tau, double *work, + const sunindextype *lwork, sunindextype *info); -extern void dgetrf_f77(const int *m, const int *n, double *a, int *lda, int *ipiv, int *info); +extern void dgetrf_f77(const sunindextype *m, const sunindextype *n, double *a, + sunindextype *lda, sunindextype *ipiv, + sunindextype *info); -extern void dgetrs_f77(const char *trans, const int *n, const int *nrhs, double *a, const int *lda, - int *ipiv, double *b, const int *ldb, int *info, int len_trans); +extern void dgetrs_f77(const char *trans, const sunindextype *n, + const sunindextype *nrhs, double *a, + const sunindextype *lda, sunindextype *ipiv, double *b, + const sunindextype *ldb, sunindextype *info); -extern void dormqr_f77(const char *side, const char *trans, const int *m, const int *n, const int *k, - double *a, const int *lda, double *tau, double *c, const int *ldc, - double *work, const int *lwork, int *info, int len_side, int len_trans); +extern void dormqr_f77(const char *side, const char *trans, + const sunindextype *m, const sunindextype *n, + const sunindextype *k, double *a, + const sunindextype *lda, double *tau, double *c, + const sunindextype *ldc, double *work, + const sunindextype *lwork, sunindextype *info); -extern void dpotrf_f77(const char *uplo, const int *n, double *a, int *lda, int *info, int len_uplo); +extern void dpotrf_f77(const char *uplo, const sunindextype *n, double *a, + sunindextype *lda, sunindextype *info); -extern void dpotrs_f77(const char *uplo, const int *n, const int *nrhs, double *a, const int *lda, - double *b, const int *ldb, int * info, int len_uplo); +extern void dpotrs_f77(const char *uplo, const sunindextype *n, + const sunindextype *nrhs, double *a, + const sunindextype *lda, double *b, + const sunindextype *ldb, sunindextype *info); -extern void sgbtrf_f77(const int *m, const int *n, const int *kl, const int *ku, - float *ab, int *ldab, int *ipiv, int *info); +extern void sgbtrf_f77(const sunindextype *m, const sunindextype *n, + const sunindextype *kl, const sunindextype *ku, + float *ab, sunindextype *ldab, sunindextype *ipiv, + sunindextype *info); -extern void sgbtrs_f77(const char *trans, const int *n, const int *kl, const int *ku, const int *nrhs, - float *ab, const int *ldab, int *ipiv, float *b, const int *ldb, - int *info, int len_trans); +extern void sgbtrs_f77(const char *trans, const sunindextype *n, + const sunindextype *kl, const sunindextype *ku, + const sunindextype *nrhs, float *ab, + const sunindextype *ldab, sunindextype *ipiv, + float *b, const sunindextype *ldb, sunindextype *info); -extern void sgeqp3_f77(const int *m, const int *n, float *a, const int *lda, int *jpvt, float *tau, - float *work, const int *lwork, int *info); +extern void sgeqp3_f77(const sunindextype *m, const sunindextype *n, float *a, + const sunindextype *lda, sunindextype *jpvt, float *tau, + float *work, const sunindextype *lwork, + sunindextype *info); -extern void sgeqrf_f77(const int *m, const int *n, float *a, const int *lda, float *tau, float *work, - const int *lwork, int *info); +extern void sgeqrf_f77(const sunindextype *m, const sunindextype *n, float *a, + const sunindextype *lda, float *tau, float *work, + const sunindextype *lwork, sunindextype *info); -extern void sgetrf_f77(const int *m, const int *n, float *a, int *lda, int *ipiv, int *info); +extern void sgetrf_f77(const sunindextype *m, const sunindextype *n, float *a, + sunindextype *lda, sunindextype *ipiv, + sunindextype *info); -extern void sgetrs_f77(const char *trans, const int *n, const int *nrhs, float *a, const int *lda, - int *ipiv, float *b, const int *ldb, int *info, int len_trans); +extern void sgetrs_f77(const char *trans, const sunindextype *n, + const sunindextype *nrhs, float *a, + const sunindextype *lda, sunindextype *ipiv, + float *b, const sunindextype *ldb, sunindextype *info); -extern void sormqr_f77(const char *side, const char *trans, const int *m, const int *n, const int *k, - float *a, const int *lda, float *tau, float *c, const int *ldc, - float *work, const int *lwork, int *info, int len_side, int len_trans); +extern void sormqr_f77(const char *side, const char *trans, + const sunindextype *m, const sunindextype *n, + const sunindextype *k, float *a, const sunindextype *lda, + float *tau, float *c, const sunindextype *ldc, + float *work, const sunindextype *lwork, + sunindextype *info); -extern void spotrf_f77(const char *uplo, const int *n, float *a, int *lda, int *info, int len_uplo); +extern void spotrf_f77(const char *uplo, const sunindextype *n, float *a, + sunindextype *lda, sunindextype *info); -extern void spotrs_f77(const char *uplo, const int *n, const int *nrhs, float *a, const int *lda, - float *b, const int *ldb, int * info, int len_uplo); +extern void spotrs_f77(const char *uplo, const sunindextype *n, + const sunindextype *nrhs, float *a, + const sunindextype *lda, float *b, + const sunindextype *ldb, sunindextype *info); #ifdef __cplusplus diff --git a/inst/include/sundials/sundials_lapack_defs.h b/inst/include/sundials/sundials_lapack_defs.h new file mode 100644 index 0000000..c3696fd --- /dev/null +++ b/inst/include/sundials/sundials_lapack_defs.h @@ -0,0 +1,103 @@ +/* ----------------------------------------------------------------- + * Programmer: Cody J. Balos @ LLNL + * ----------------------------------------------------------------- + * SUNDIALS Copyright Start + * Copyright (c) 2002-2022, Lawrence Livermore National Security + * and Southern Methodist University. + * All rights reserved. + * + * See the top-level LICENSE and NOTICE files for details. + * + * SPDX-License-Identifier: BSD-3-Clause + * SUNDIALS Copyright End + * -----------------------------------------------------------------*/ + +#ifndef _SUNDIALS_LAPACK_H +#define _SUNDIALS_LAPACK_H + +#include + +#ifdef __cplusplus /* wrapper to enable C++ usage */ +extern "C" { +#endif + +/* + * ================================================================== + * Blas and Lapack functions + * ================================================================== + */ + +#if defined(SUNDIALS_F77_FUNC) + +#define dgbtrf_f77 SUNDIALS_F77_FUNC(dgbtrf, DGBTRF) +#define dgbtrs_f77 SUNDIALS_F77_FUNC(dgbtrs, DGBTRS) +#define dgetrf_f77 SUNDIALS_F77_FUNC(dgetrf, DGETRF) +#define dgetrs_f77 SUNDIALS_F77_FUNC(dgetrs, DGETRS) + +#define sgbtrf_f77 SUNDIALS_F77_FUNC(sgbtrf, SGBTRF) +#define sgbtrs_f77 SUNDIALS_F77_FUNC(sgbtrs, SGBTRS) +#define sgetrf_f77 SUNDIALS_F77_FUNC(sgetrf, SGETRF) +#define sgetrs_f77 SUNDIALS_F77_FUNC(sgetrs, SGETRS) + +#else + +#define dgbtrf_f77 dgbtrf_ +#define dgbtrs_f77 dgbtrs_ +#define dgetrf_f77 dgetrf_ +#define dgetrs_f77 dgetrs_ + +#define sgbtrf_f77 sgbtrf_ +#define sgbtrs_f77 sgbtrs_ +#define sgetrf_f77 sgetrf_ +#define sgetrs_f77 sgetrs_ + +#endif + +/* LAPACK */ + +extern void dgbtrf_f77(const sunindextype *m, const sunindextype *n, + const sunindextype *kl, const sunindextype *ku, + double *ab, sunindextype *ldab, sunindextype *ipiv, + sunindextype *info); + +extern void dgbtrs_f77(const char *trans, const sunindextype *n, + const sunindextype *kl, const sunindextype *ku, + const sunindextype *nrhs, double *ab, + const sunindextype *ldab, sunindextype *ipiv, + double *b, const sunindextype *ldb, sunindextype *info); + + +extern void dgetrf_f77(const sunindextype *m, const sunindextype *n, double *a, + sunindextype *lda, sunindextype *ipiv, + sunindextype *info); + +extern void dgetrs_f77(const char *trans, const sunindextype *n, + const sunindextype *nrhs, double *a, + const sunindextype *lda, sunindextype *ipiv, double *b, + const sunindextype *ldb, sunindextype *info); + +extern void sgbtrf_f77(const sunindextype *m, const sunindextype *n, + const sunindextype *kl, const sunindextype *ku, + float *ab, sunindextype *ldab, sunindextype *ipiv, + sunindextype *info); + +extern void sgbtrs_f77(const char *trans, const sunindextype *n, + const sunindextype *kl, const sunindextype *ku, + const sunindextype *nrhs, float *ab, + const sunindextype *ldab, sunindextype *ipiv, + float *b, const sunindextype *ldb, sunindextype *info); + +extern void sgetrf_f77(const sunindextype *m, const sunindextype *n, float *a, + sunindextype *lda, sunindextype *ipiv, + sunindextype *info); + +extern void sgetrs_f77(const char *trans, const sunindextype *n, + const sunindextype *nrhs, float *a, + const sunindextype *lda, sunindextype *ipiv, + float *b, const sunindextype *ldb, sunindextype *info); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/inst/include/sundials/sundials_linearsolver.h b/inst/include/sundials/sundials_linearsolver.h index 8b539fc..a22548f 100644 --- a/inst/include/sundials/sundials_linearsolver.h +++ b/inst/include/sundials/sundials_linearsolver.h @@ -1,9 +1,10 @@ /* ----------------------------------------------------------------- * Programmer(s): Daniel Reynolds @ SMU - * David Gardner, Carol Woodward, Slaven Peles @ LLNL + * David Gardner, Carol Woodward, + * Slaven Peles, Cody Balos @ LLNL * ----------------------------------------------------------------- * SUNDIALS Copyright Start - * Copyright (c) 2002-2019, Lawrence Livermore National Security + * Copyright (c) 2002-2022, Lawrence Livermore National Security * and Southern Methodist University. * All rights reserved. * @@ -52,27 +53,29 @@ #ifndef _SUNLINEARSOLVER_H #define _SUNLINEARSOLVER_H -#include #include #include #include +#include -#ifdef __cplusplus /* wrapper to enable C++ usage */ +#ifdef __cplusplus /* wrapper to enable C++ usage */ extern "C" { #endif - /* ----------------------------------------------------------------- * Implemented SUNLinearSolver types and IDs: * ----------------------------------------------------------------- */ -typedef enum { +typedef enum +{ SUNLINEARSOLVER_DIRECT, SUNLINEARSOLVER_ITERATIVE, - SUNLINEARSOLVER_MATRIX_ITERATIVE + SUNLINEARSOLVER_MATRIX_ITERATIVE, + SUNLINEARSOLVER_MATRIX_EMBEDDED } SUNLinearSolver_Type; -typedef enum { +typedef enum +{ SUNLINEARSOLVER_BAND, SUNLINEARSOLVER_DENSE, SUNLINEARSOLVER_KLU, @@ -86,55 +89,64 @@ typedef enum { SUNLINEARSOLVER_SUPERLUDIST, SUNLINEARSOLVER_SUPERLUMT, SUNLINEARSOLVER_CUSOLVERSP_BATCHQR, + SUNLINEARSOLVER_MAGMADENSE, + SUNLINEARSOLVER_ONEMKLDENSE, + SUNLINEARSOLVER_GINKGO, + SUNLINEARSOLVER_KOKKOSDENSE, SUNLINEARSOLVER_CUSTOM } SUNLinearSolver_ID; - /* ----------------------------------------------------------------- * Generic definition of SUNLinearSolver * ----------------------------------------------------------------- */ /* Forward reference for pointer to SUNLinearSolver_Ops object */ -typedef _SUNDIALS_STRUCT_ _generic_SUNLinearSolver_Ops *SUNLinearSolver_Ops; +typedef _SUNDIALS_STRUCT_ _generic_SUNLinearSolver_Ops* SUNLinearSolver_Ops; /* Forward reference for pointer to SUNLinearSolver object */ typedef _SUNDIALS_STRUCT_ _generic_SUNLinearSolver *SUNLinearSolver; /* Structure containing function pointers to linear solver operations */ -struct _generic_SUNLinearSolver_Ops { +struct _generic_SUNLinearSolver_Ops +{ SUNLinearSolver_Type (*gettype)(SUNLinearSolver); - SUNLinearSolver_ID (*getid)(SUNLinearSolver); - int (*setatimes)(SUNLinearSolver, void*, ATimesFn); - int (*setpreconditioner)(SUNLinearSolver, void*, - PSetupFn, PSolveFn); - int (*setscalingvectors)(SUNLinearSolver, - N_Vector, N_Vector); - int (*initialize)(SUNLinearSolver); - int (*setup)(SUNLinearSolver, SUNMatrix); - int (*solve)(SUNLinearSolver, SUNMatrix, N_Vector, - N_Vector, realtype); - int (*numiters)(SUNLinearSolver); - realtype (*resnorm)(SUNLinearSolver); - sunindextype (*lastflag)(SUNLinearSolver); - int (*space)(SUNLinearSolver, long int*, long int*); - N_Vector (*resid)(SUNLinearSolver); - int (*free)(SUNLinearSolver); + SUNLinearSolver_ID (*getid)(SUNLinearSolver); + int (*setatimes)(SUNLinearSolver, void*, SUNATimesFn); + int (*setpreconditioner)(SUNLinearSolver, void*, SUNPSetupFn, SUNPSolveFn); + int (*setscalingvectors)(SUNLinearSolver, N_Vector, N_Vector); + int (*setzeroguess)(SUNLinearSolver, booleantype); + int (*initialize)(SUNLinearSolver); + int (*setup)(SUNLinearSolver, SUNMatrix); + int (*solve)(SUNLinearSolver, SUNMatrix, N_Vector, N_Vector, realtype); + int (*numiters)(SUNLinearSolver); + realtype (*resnorm)(SUNLinearSolver); + sunindextype (*lastflag)(SUNLinearSolver); + int (*space)(SUNLinearSolver, long int*, long int*); + N_Vector (*resid)(SUNLinearSolver); + int (*free)(SUNLinearSolver); +#ifdef __cplusplus + _generic_SUNLinearSolver_Ops() = default; +#endif }; /* A linear solver is a structure with an implementation-dependent 'content' field, and a pointer to a structure of linear solver operations corresponding to that implementation. */ -struct _generic_SUNLinearSolver { - void *content; +struct _generic_SUNLinearSolver +{ + void* content; SUNLinearSolver_Ops ops; + SUNContext sunctx; +#ifdef __cplusplus + _generic_SUNLinearSolver() = default; +#endif }; - /* ----------------------------------------------------------------- * Functions exported by SUNLinearSolver module * ----------------------------------------------------------------- */ -SUNDIALS_EXPORT SUNLinearSolver SUNLinSolNewEmpty(); +SUNDIALS_EXPORT SUNLinearSolver SUNLinSolNewEmpty(SUNContext sunctx); SUNDIALS_EXPORT void SUNLinSolFreeEmpty(SUNLinearSolver S); @@ -142,21 +154,19 @@ SUNDIALS_EXPORT SUNLinearSolver_Type SUNLinSolGetType(SUNLinearSolver S); SUNDIALS_EXPORT SUNLinearSolver_ID SUNLinSolGetID(SUNLinearSolver S); -SUNDIALS_EXPORT int SUNLinSolSetATimes(SUNLinearSolver S, void* A_data, - ATimesFn ATimes); +SUNDIALS_EXPORT int SUNLinSolSetATimes(SUNLinearSolver S, void* A_data, SUNATimesFn ATimes); -SUNDIALS_EXPORT int SUNLinSolSetPreconditioner(SUNLinearSolver S, void* P_data, - PSetupFn Pset, PSolveFn Psol); +SUNDIALS_EXPORT int SUNLinSolSetPreconditioner(SUNLinearSolver S, void* P_data, SUNPSetupFn Pset, SUNPSolveFn Psol); -SUNDIALS_EXPORT int SUNLinSolSetScalingVectors(SUNLinearSolver S, N_Vector s1, - N_Vector s2); +SUNDIALS_EXPORT int SUNLinSolSetScalingVectors(SUNLinearSolver S, N_Vector s1, N_Vector s2); + +SUNDIALS_EXPORT int SUNLinSolSetZeroGuess(SUNLinearSolver S, booleantype onoff); SUNDIALS_EXPORT int SUNLinSolInitialize(SUNLinearSolver S); SUNDIALS_EXPORT int SUNLinSolSetup(SUNLinearSolver S, SUNMatrix A); -SUNDIALS_EXPORT int SUNLinSolSolve(SUNLinearSolver S, SUNMatrix A, N_Vector x, - N_Vector b, realtype tol); +SUNDIALS_EXPORT int SUNLinSolSolve(SUNLinearSolver S, SUNMatrix A, N_Vector x, N_Vector b, realtype tol); SUNDIALS_EXPORT int SUNLinSolNumIters(SUNLinearSolver S); @@ -166,37 +176,49 @@ SUNDIALS_EXPORT N_Vector SUNLinSolResid(SUNLinearSolver S); SUNDIALS_EXPORT sunindextype SUNLinSolLastFlag(SUNLinearSolver S); -SUNDIALS_EXPORT int SUNLinSolSpace(SUNLinearSolver S, long int *lenrwLS, - long int *leniwLS); +SUNDIALS_EXPORT int SUNLinSolSpace(SUNLinearSolver S, long int* lenrwLS, long int* leniwLS); SUNDIALS_EXPORT int SUNLinSolFree(SUNLinearSolver S); - /* ----------------------------------------------------------------- * SUNLinearSolver return values * ----------------------------------------------------------------- */ -#define SUNLS_SUCCESS 0 /* successful/converged */ - -#define SUNLS_MEM_NULL -801 /* mem argument is NULL */ -#define SUNLS_ILL_INPUT -802 /* illegal function input */ -#define SUNLS_MEM_FAIL -803 /* failed memory access */ -#define SUNLS_ATIMES_FAIL_UNREC -804 /* atimes unrecoverable failure */ -#define SUNLS_PSET_FAIL_UNREC -805 /* pset unrecoverable failure */ -#define SUNLS_PSOLVE_FAIL_UNREC -806 /* psolve unrecoverable failure */ -#define SUNLS_PACKAGE_FAIL_UNREC -807 /* external package unrec. fail */ -#define SUNLS_GS_FAIL -808 /* Gram-Schmidt failure */ -#define SUNLS_QRSOL_FAIL -809 /* QRsol found singular R */ -#define SUNLS_VECTOROP_ERR -810 /* vector operation error */ - -#define SUNLS_RES_REDUCED 801 /* nonconv. solve, resid reduced */ -#define SUNLS_CONV_FAIL 802 /* nonconvergent solve */ -#define SUNLS_ATIMES_FAIL_REC 803 /* atimes failed recoverably */ -#define SUNLS_PSET_FAIL_REC 804 /* pset failed recoverably */ -#define SUNLS_PSOLVE_FAIL_REC 805 /* psolve failed recoverably */ -#define SUNLS_PACKAGE_FAIL_REC 806 /* external package recov. fail */ -#define SUNLS_QRFACT_FAIL 807 /* QRfact found singular matrix */ -#define SUNLS_LUFACT_FAIL 808 /* LUfact found singular matrix */ +#define SUNLS_SUCCESS 0 /* successful/converged */ + +#define SUNLS_MEM_NULL -801 /* mem argument is NULL */ +#define SUNLS_ILL_INPUT -802 /* illegal function input */ +#define SUNLS_MEM_FAIL -803 /* failed memory access */ +#define SUNLS_ATIMES_NULL -804 /* atimes function is NULL */ +#define SUNLS_ATIMES_FAIL_UNREC -805 /* atimes unrecoverable failure */ +#define SUNLS_PSET_FAIL_UNREC -806 /* pset unrecoverable failure */ +#define SUNLS_PSOLVE_NULL -807 /* psolve function is NULL */ +#define SUNLS_PSOLVE_FAIL_UNREC -808 /* psolve unrecoverable failure */ +#define SUNLS_PACKAGE_FAIL_UNREC -809 /* external package unrec. fail */ +#define SUNLS_GS_FAIL -810 /* Gram-Schmidt failure */ +#define SUNLS_QRSOL_FAIL -811 /* QRsol found singular R */ +#define SUNLS_VECTOROP_ERR -812 /* vector operation error */ + +#define SUNLS_RES_REDUCED 801 /* nonconv. solve, resid reduced */ +#define SUNLS_CONV_FAIL 802 /* nonconvergent solve */ +#define SUNLS_ATIMES_FAIL_REC 803 /* atimes failed recoverably */ +#define SUNLS_PSET_FAIL_REC 804 /* pset failed recoverably */ +#define SUNLS_PSOLVE_FAIL_REC 805 /* psolve failed recoverably */ +#define SUNLS_PACKAGE_FAIL_REC 806 /* external package recov. fail */ +#define SUNLS_QRFACT_FAIL 807 /* QRfact found singular matrix */ +#define SUNLS_LUFACT_FAIL 808 /* LUfact found singular matrix */ + +/* ----------------------------------------------------------------------------- + * SUNLinearSolver messages + * ---------------------------------------------------------------------------*/ + +#if defined(SUNDIALS_EXTENDED_PRECISION) +#define SUNLS_MSG_RESIDUAL "\t\tlin. iteration %ld, lin. residual: %Lg\n" +#elif defined(SUNDIALS_DOUBLE_PRECISION) +#define SUNLS_MSG_RESIDUAL "\t\tlin. iteration %ld, lin. residual: %g\n" +#else +#define SUNLS_MSG_RESIDUAL "\t\tlin. iteration %ld, lin. residual: %g\n" +#endif #ifdef __cplusplus } diff --git a/inst/include/sundials/sundials_linearsolver.hpp b/inst/include/sundials/sundials_linearsolver.hpp new file mode 100644 index 0000000..1037d4a --- /dev/null +++ b/inst/include/sundials/sundials_linearsolver.hpp @@ -0,0 +1,41 @@ +/* ----------------------------------------------------------------------------- + * Programmer(s): Cody J. Balos @ LLNL + * ----------------------------------------------------------------------------- + * SUNDIALS Copyright Start + * Copyright (c) 2002-2022, Lawrence Livermore National Security + * and Southern Methodist University. + * All rights reserved. + * + * See the top-level LICENSE and NOTICE files for details. + * + * SPDX-License-Identifier: BSD-3-Clause + * SUNDIALS Copyright End + * ----------------------------------------------------------------------------- + * C++ view of SUNDIALS SUNLinaerSolver + * ---------------------------------------------------------------------------*/ + +#ifndef _SUNDIALS_LINEARSOLVER_HPP +#define _SUNDIALS_LINEARSOLVER_HPP + +#include +#include +#include + +namespace sundials { +namespace impl { +using BaseLinearSolver = BaseObject<_generic_SUNLinearSolver, _generic_SUNLinearSolver_Ops>; +} // namespace impl + +namespace experimental { +struct SUNLinearSolverDeleter +{ + void operator()(SUNLinearSolver LS) + { + if (LS) SUNLinSolFree(LS); + } +}; +using SUNLinearSolverView = ClassView; +} // namespace experimental +} // namespace sundials + +#endif diff --git a/inst/include/sundials/sundials_logger.h b/inst/include/sundials/sundials_logger.h new file mode 100644 index 0000000..b941e3e --- /dev/null +++ b/inst/include/sundials/sundials_logger.h @@ -0,0 +1,59 @@ +/* ----------------------------------------------------------------- + * Programmer: Cody J. Balos @ LLNL + * ----------------------------------------------------------------- + * SUNDIALS Copyright Start + * Copyright (c) 2002-2022, Lawrence Livermore National Security + * and Southern Methodist University. + * All rights reserved. + * + * See the top-level LICENSE and NOTICE files for details. + * + * SPDX-License-Identifier: BSD-3-Clause + * SUNDIALS Copyright End + * -----------------------------------------------------------------*/ + +#ifndef _SUNDIALS_LOGGER_H +#define _SUNDIALS_LOGGER_H + +#include + +#include "sundials/sundials_config.h" +#include "sundials/sundials_types.h" + +#ifdef __cplusplus /* wrapper to enable C++ usage */ +extern "C" { +#endif + +typedef struct SUNLogger_* SUNLogger; + +typedef enum { + SUN_LOGLEVEL_ALL = -1, + SUN_LOGLEVEL_NONE = 0, + SUN_LOGLEVEL_ERROR = 1, + SUN_LOGLEVEL_WARNING = 2, + SUN_LOGLEVEL_INFO = 3, + SUN_LOGLEVEL_DEBUG = 4 +} SUNLogLevel; + +SUNDIALS_EXPORT int SUNLogger_Create(void* comm, int output_rank, + SUNLogger* logger); +SUNDIALS_EXPORT int SUNLogger_CreateFromEnv(void* comm, SUNLogger* logger); +SUNDIALS_EXPORT int SUNLogger_SetErrorFilename(SUNLogger logger, + const char* error_filename); +SUNDIALS_EXPORT int SUNLogger_SetWarningFilename(SUNLogger logger, + const char* warning_filename); +SUNDIALS_EXPORT int SUNLogger_SetDebugFilename(SUNLogger logger, + const char* debug_filename); +SUNDIALS_EXPORT int SUNLogger_SetInfoFilename(SUNLogger logger, + const char* info_filename); +SUNDIALS_EXPORT int SUNLogger_QueueMsg(SUNLogger logger, SUNLogLevel lvl, + const char* scope, const char* label, + const char* msg_txt, ...); +SUNDIALS_EXPORT int SUNLogger_Flush(SUNLogger logger, SUNLogLevel lvl); +SUNDIALS_EXPORT int SUNLogger_GetOutputRank(SUNLogger logger, int* output_rank); +SUNDIALS_EXPORT int SUNLogger_Destroy(SUNLogger* logger); + +#ifdef __cplusplus /* wrapper to enable C++ usage */ +} +#endif +#endif /* SUNDIALS_LOGGER_H_ */ diff --git a/inst/include/sundials/sundials_logger_impl.h b/inst/include/sundials/sundials_logger_impl.h new file mode 100644 index 0000000..f28e86b --- /dev/null +++ b/inst/include/sundials/sundials_logger_impl.h @@ -0,0 +1,61 @@ +/* ----------------------------------------------------------------- + * Programmer(s): Cody J. Balos @ LLNL + * ----------------------------------------------------------------- + * SUNDIALS Copyright Start + * Copyright (c) 2002-2022, Lawrence Livermore National Security + * and Southern Methodist University. + * All rights reserved. + * + * See the top-level LICENSE and NOTICE files for details. + * + * SPDX-License-Identifier: BSD-3-Clause + * SUNDIALS Copyright End + * ----------------------------------------------------------------- + * SUNDIALS logging class implementation. + * ----------------------------------------------------------------*/ + +#ifndef _SUNDIALS_LOGGER_IMPL_H +#define _SUNDIALS_LOGGER_IMPL_H + +#include +#include +#include + +#include "sundials_hashmap.h" + +#define SUNDIALS_LOGGING_ERROR 1 +#define SUNDIALS_LOGGING_WARNING 2 +#define SUNDIALS_LOGGING_INFO 3 +#define SUNDIALS_LOGGING_DEBUG 4 +#if SUNDIALS_LOGGING_LEVEL > SUNDIALS_LOGGING_DEBUG +#define SUNDIALS_LOGGING_EXTRA_DEBUG +#endif + +struct SUNLogger_ { + /* MPI information */ + void* commptr; + int output_rank; + + /* Ouput files */ + FILE* debug_fp; + FILE* warning_fp; + FILE* info_fp; + FILE* error_fp; + + /* Hashmap used to store filename, FILE* pairs */ + SUNHashMap filenames; + + /* Slic-style format string */ + const char* format; + + /* Content for custom implementations */ + void* content; + + /* Overridable operations */ + int (*queuemsg)(SUNLogger logger, SUNLogLevel lvl, const char* scope, + const char* label, const char* msg_txt, va_list args); + int (*flush)(SUNLogger logger, SUNLogLevel lvl); + int (*destroy)(SUNLogger* logger); +}; + +#endif /* _SUNDIALS_LOGGER_IMPL_H */ diff --git a/inst/include/sundials/sundials_math.h b/inst/include/sundials/sundials_math.h index 5b7e667..c6f6d9f 100644 --- a/inst/include/sundials/sundials_math.h +++ b/inst/include/sundials/sundials_math.h @@ -4,7 +4,7 @@ * Aaron Collier @ LLNL * ----------------------------------------------------------------- * SUNDIALS Copyright Start - * Copyright (c) 2002-2019, Lawrence Livermore National Security + * Copyright (c) 2002-2022, Lawrence Livermore National Security * and Southern Methodist University. * All rights reserved. * @@ -14,7 +14,7 @@ * SUNDIALS Copyright End * ----------------------------------------------------------------- * This is the header file for a simple C-language math library. The - * routines listed here work with the type realtype as defined in + * routines listed here work with the type sunrealtype as defined in * the header file sundials_types.h. * ----------------------------------------------------------------- */ @@ -66,7 +66,7 @@ extern "C" { * ----------------------------------------------------------------- * Function : SUNRsqrt * ----------------------------------------------------------------- - * Usage : realtype sqrt_x; + * Usage : sunrealtype sqrt_x; * sqrt_x = SUNRsqrt(x); * ----------------------------------------------------------------- * SUNRsqrt(x) returns the square root of x. If x < ZERO, then @@ -75,14 +75,16 @@ extern "C" { */ #ifndef SUNRsqrt -#if defined(SUNDIALS_USE_GENERIC_MATH) -#define SUNRsqrt(x) ((x) <= RCONST(0.0) ? (RCONST(0.0)) : ((realtype) sqrt((double) (x)))) -#elif defined(SUNDIALS_DOUBLE_PRECISION) -#define SUNRsqrt(x) ((x) <= RCONST(0.0) ? (RCONST(0.0)) : (sqrt((x)))) -#elif defined(SUNDIALS_SINGLE_PRECISION) -#define SUNRsqrt(x) ((x) <= RCONST(0.0) ? (RCONST(0.0)) : (sqrtf((x)))) -#elif defined(SUNDIALS_EXTENDED_PRECISION) -#define SUNRsqrt(x) ((x) <= RCONST(0.0) ? (RCONST(0.0)) : (sqrtl((x)))) +#if defined(__cplusplus) || defined(SUNDIALS_C_COMPILER_HAS_MATH_PRECISIONS) +# if defined(SUNDIALS_DOUBLE_PRECISION) +# define SUNRsqrt(x) ((x) <= RCONST(0.0) ? (RCONST(0.0)) : (sqrt((x)))) +# elif defined(SUNDIALS_SINGLE_PRECISION) +# define SUNRsqrt(x) ((x) <= RCONST(0.0) ? (RCONST(0.0)) : (sqrtf((x)))) +# elif defined(SUNDIALS_EXTENDED_PRECISION) +# define SUNRsqrt(x) ((x) <= RCONST(0.0) ? (RCONST(0.0)) : (sqrtl((x)))) +# endif +#else +# define SUNRsqrt(x) ((x) <= RCONST(0.0) ? (RCONST(0.0)) : ((sunrealtype) sqrt((double) (x)))) #endif #endif @@ -90,7 +92,7 @@ extern "C" { * ----------------------------------------------------------------- * Function : SUNRabs * ----------------------------------------------------------------- - * Usage : realtype abs_x; + * Usage : sunrealtype abs_x; * abs_x = SUNRabs(x); * ----------------------------------------------------------------- * SUNRabs(x) returns the absolute value of x. @@ -98,14 +100,16 @@ extern "C" { */ #ifndef SUNRabs -#if defined(SUNDIALS_USE_GENERIC_MATH) -#define SUNRabs(x) ((realtype) fabs((double) (x))) -#elif defined(SUNDIALS_DOUBLE_PRECISION) -#define SUNRabs(x) (fabs((x))) -#elif defined(SUNDIALS_SINGLE_PRECISION) -#define SUNRabs(x) (fabsf((x))) -#elif defined(SUNDIALS_EXTENDED_PRECISION) -#define SUNRabs(x) (fabsl((x))) +#if defined(__cplusplus) || defined(SUNDIALS_C_COMPILER_HAS_MATH_PRECISIONS) +# if defined(SUNDIALS_DOUBLE_PRECISION) +# define SUNRabs(x) (fabs((x))) +# elif defined(SUNDIALS_SINGLE_PRECISION) +# define SUNRabs(x) (fabsf((x))) +# elif defined(SUNDIALS_EXTENDED_PRECISION) +# define SUNRabs(x) (fabsl((x))) +# endif +#else +# define SUNRabs(x) ((sunrealtype) fabs((double) (x))) #endif #endif @@ -113,7 +117,7 @@ extern "C" { * ----------------------------------------------------------------- * Function : SUNRexp * ----------------------------------------------------------------- - * Usage : realtype exp_x; + * Usage : sunrealtype exp_x; * exp_x = SUNRexp(x); * ----------------------------------------------------------------- * SUNRexp(x) returns e^x (base-e exponential function). @@ -121,14 +125,16 @@ extern "C" { */ #ifndef SUNRexp -#if defined(SUNDIALS_USE_GENERIC_MATH) -#define SUNRexp(x) ((realtype) exp((double) (x))) -#elif defined(SUNDIALS_DOUBLE_PRECISION) -#define SUNRexp(x) (exp((x))) -#elif defined(SUNDIALS_SINGLE_PRECISION) -#define SUNRexp(x) (expf((x))) -#elif defined(SUNDIALS_EXTENDED_PRECISION) -#define SUNRexp(x) (expl((x))) +#if defined(__cplusplus) || defined(SUNDIALS_C_COMPILER_HAS_MATH_PRECISIONS) +# if defined(SUNDIALS_DOUBLE_PRECISION) +# define SUNRexp(x) (exp((x))) +# elif defined(SUNDIALS_SINGLE_PRECISION) +# define SUNRexp(x) (expf((x))) +# elif defined(SUNDIALS_EXTENDED_PRECISION) +# define SUNRexp(x) (expl((x))) +# endif +#else +# define SUNRexp(x) ((sunrealtype) exp((double) (x))) #endif #endif @@ -136,7 +142,7 @@ extern "C" { * ----------------------------------------------------------------- * Function : SUNRceil * ----------------------------------------------------------------- - * Usage : realtype ceil_x; + * Usage : sunrealtype ceil_x; * ceil_x = SUNRceil(x); * ----------------------------------------------------------------- * SUNRceil(x) returns the smallest integer value not less than x. @@ -144,14 +150,16 @@ extern "C" { */ #ifndef SUNRceil -#if defined(SUNDIALS_USE_GENERIC_MATH) -#define SUNRceil(x) ((realtype) ceil((double) (x))) -#elif defined(SUNDIALS_DOUBLE_PRECISION) -#define SUNRceil(x) (ceil((x))) -#elif defined(SUNDIALS_SINGLE_PRECISION) -#define SUNRceil(x) (ceilf((x))) -#elif defined(SUNDIALS_EXTENDED_PRECISION) -#define SUNRceil(x) (ceill((x))) +#if defined(__cplusplus) || defined(SUNDIALS_C_COMPILER_HAS_MATH_PRECISIONS) +# if defined(SUNDIALS_DOUBLE_PRECISION) +# define SUNRceil(x) (ceil((x))) +# elif defined(SUNDIALS_SINGLE_PRECISION) +# define SUNRceil(x) (ceilf((x))) +# elif defined(SUNDIALS_EXTENDED_PRECISION) +# define SUNRceil(x) (ceill((x))) +# endif +#else +# define SUNRceil(x) ((sunrealtype) ceil((double) (x))) #endif #endif @@ -160,31 +168,79 @@ extern "C" { * Function : SUNRpowerI * ----------------------------------------------------------------- * Usage : int exponent; - * realtype base, ans; + * sunrealtype base, ans; * ans = SUNRpowerI(base,exponent); * ----------------------------------------------------------------- * SUNRpowerI returns the value of base^exponent, where base is of type - * realtype and exponent is of type int. + * sunrealtype and exponent is of type int. * ----------------------------------------------------------------- */ -SUNDIALS_EXPORT realtype SUNRpowerI(realtype base, int exponent); +SUNDIALS_EXPORT sunrealtype SUNRpowerI(sunrealtype base, int exponent); /* * ----------------------------------------------------------------- * Function : SUNRpowerR * ----------------------------------------------------------------- - * Usage : realtype base, exponent, ans; + * Usage : sunrealtype base, exponent, ans; * ans = SUNRpowerR(base,exponent); * ----------------------------------------------------------------- * SUNRpowerR returns the value of base^exponent, where both base and - * exponent are of type realtype. If base < ZERO, then SUNRpowerR + * exponent are of type sunrealtype. If base < ZERO, then SUNRpowerR * returns ZERO. * ----------------------------------------------------------------- */ -SUNDIALS_EXPORT realtype SUNRpowerR(realtype base, realtype exponent); +SUNDIALS_EXPORT sunrealtype SUNRpowerR(sunrealtype base, sunrealtype exponent); +/* + * ----------------------------------------------------------------- + * Function : SUNRCompare + * ----------------------------------------------------------------- + * Usage : int isNotEqual; + * sunrealtype a, b; + * isNotEqual = SUNRCompare(a, b); + * ----------------------------------------------------------------- + * SUNRCompareTol returns 0 if the relative difference of a and b is + * less than or equal to 10*machine epsilon. If the relative + * difference is greater than 10*machine epsilon, it returns 1. The + * function handles the case where a or b are near zero as well as + * the case where a or b are inf/nan. + * ----------------------------------------------------------------- + */ + +SUNDIALS_EXPORT booleantype SUNRCompare(sunrealtype a, sunrealtype b); + +/* + * ----------------------------------------------------------------- + * Function : SUNRCompareTol + * ----------------------------------------------------------------- + * Usage : int isNotEqual; + * sunrealtype a, b, tol; + * isNotEqual = SUNRCompareTol(a, b, tol); + * ----------------------------------------------------------------- + * SUNRCompareTol returns 0 if the relative difference of a and b is + * less than or equal to the provided tolerance. If the relative + * difference is greater than the tolerance, it returns 1. The + * function handles the case where a or b are near zero as well as + * the case where a or b are inf/nan. + * ----------------------------------------------------------------- + */ + +SUNDIALS_EXPORT booleantype SUNRCompareTol(sunrealtype a, sunrealtype b, sunrealtype tol); + +/* + * ----------------------------------------------------------------- + * Function : SUNStrToReal + * ----------------------------------------------------------------- + * Usage : realtype a = SUNStrToReal(const char* str) + * ----------------------------------------------------------------- + * SUNStrToReal parses str into the realtype variable. Uses standard + * strtod variants when they are available. + * ----------------------------------------------------------------- + */ + +SUNDIALS_EXPORT sunrealtype SUNStrToReal(const char* str); #ifdef __cplusplus } diff --git a/inst/include/sundials/sundials_matrix.h b/inst/include/sundials/sundials_matrix.h index d5e6d1b..2d77aea 100644 --- a/inst/include/sundials/sundials_matrix.h +++ b/inst/include/sundials/sundials_matrix.h @@ -4,7 +4,7 @@ * Cody Balos @ LLNL * ----------------------------------------------------------------- * SUNDIALS Copyright Start - * Copyright (c) 2002-2019, Lawrence Livermore National Security + * Copyright (c) 2002-2022, Lawrence Livermore National Security * and Southern Methodist University. * All rights reserved. * @@ -44,65 +44,78 @@ #ifndef _SUNMATRIX_H #define _SUNMATRIX_H -#include +#include #include +#include -#ifdef __cplusplus /* wrapper to enable C++ usage */ +#ifdef __cplusplus /* wrapper to enable C++ usage */ extern "C" { #endif - /* ----------------------------------------------------------------- * Implemented SUNMatrix types * ----------------------------------------------------------------- */ -typedef enum { - SUNMATRIX_DENSE, - SUNMATRIX_BAND, +typedef enum +{ + SUNMATRIX_DENSE, + SUNMATRIX_MAGMADENSE, + SUNMATRIX_ONEMKLDENSE, + SUNMATRIX_BAND, SUNMATRIX_SPARSE, SUNMATRIX_SLUNRLOC, + SUNMATRIX_CUSPARSE, + SUNMATRIX_GINKGO, + SUNMATRIX_KOKKOSDENSE, SUNMATRIX_CUSTOM } SUNMatrix_ID; - /* ----------------------------------------------------------------- * Generic definition of SUNMatrix * ----------------------------------------------------------------- */ /* Forward reference for pointer to SUNMatrix_Ops object */ -typedef _SUNDIALS_STRUCT_ _generic_SUNMatrix_Ops *SUNMatrix_Ops; +typedef _SUNDIALS_STRUCT_ _generic_SUNMatrix_Ops* SUNMatrix_Ops; /* Forward reference for pointer to SUNMatrix object */ -typedef _SUNDIALS_STRUCT_ _generic_SUNMatrix *SUNMatrix; +typedef _SUNDIALS_STRUCT_ _generic_SUNMatrix* SUNMatrix; /* Structure containing function pointers to matrix operations */ -struct _generic_SUNMatrix_Ops { +struct _generic_SUNMatrix_Ops +{ SUNMatrix_ID (*getid)(SUNMatrix); - SUNMatrix (*clone)(SUNMatrix); - void (*destroy)(SUNMatrix); - int (*zero)(SUNMatrix); - int (*copy)(SUNMatrix, SUNMatrix); - int (*scaleadd)(realtype, SUNMatrix, SUNMatrix); - int (*scaleaddi)(realtype, SUNMatrix); - int (*matvecsetup)(SUNMatrix); - int (*matvec)(SUNMatrix, N_Vector, N_Vector); - int (*space)(SUNMatrix, long int*, long int*); + SUNMatrix (*clone)(SUNMatrix); + void (*destroy)(SUNMatrix); + int (*zero)(SUNMatrix); + int (*copy)(SUNMatrix, SUNMatrix); + int (*scaleadd)(realtype, SUNMatrix, SUNMatrix); + int (*scaleaddi)(realtype, SUNMatrix); + int (*matvecsetup)(SUNMatrix); + int (*matvec)(SUNMatrix, N_Vector, N_Vector); + int (*space)(SUNMatrix, long int*, long int*); +#ifdef __cplusplus + _generic_SUNMatrix_Ops() = default; +#endif }; /* A matrix is a structure with an implementation-dependent 'content' field, and a pointer to a structure of matrix operations corresponding to that implementation. */ -struct _generic_SUNMatrix { - void *content; +struct _generic_SUNMatrix +{ + void* content; SUNMatrix_Ops ops; + SUNContext sunctx; +#ifdef __cplusplus + _generic_SUNMatrix() = default; +#endif }; - /* ----------------------------------------------------------------- * Functions exported by SUNMatrix module * ----------------------------------------------------------------- */ -SUNDIALS_EXPORT SUNMatrix SUNMatNewEmpty(); +SUNDIALS_EXPORT SUNMatrix SUNMatNewEmpty(SUNContext sunctx); SUNDIALS_EXPORT void SUNMatFreeEmpty(SUNMatrix A); SUNDIALS_EXPORT int SUNMatCopyOps(SUNMatrix A, SUNMatrix B); SUNDIALS_EXPORT SUNMatrix_ID SUNMatGetID(SUNMatrix A); @@ -112,9 +125,9 @@ SUNDIALS_EXPORT int SUNMatZero(SUNMatrix A); SUNDIALS_EXPORT int SUNMatCopy(SUNMatrix A, SUNMatrix B); SUNDIALS_EXPORT int SUNMatScaleAdd(realtype c, SUNMatrix A, SUNMatrix B); SUNDIALS_EXPORT int SUNMatScaleAddI(realtype c, SUNMatrix A); -SUNDIALS_EXPORT int SUNMatMatvecSetup(SUNMatrix A); +SUNDIALS_EXPORT int SUNMatMatvecSetup(SUNMatrix A); SUNDIALS_EXPORT int SUNMatMatvec(SUNMatrix A, N_Vector x, N_Vector y); -SUNDIALS_EXPORT int SUNMatSpace(SUNMatrix A, long int *lenrw, long int *leniw); +SUNDIALS_EXPORT int SUNMatSpace(SUNMatrix A, long int* lenrw, long int* leniw); /* * ----------------------------------------------------------------- @@ -122,13 +135,14 @@ SUNDIALS_EXPORT int SUNMatSpace(SUNMatrix A, long int *lenrw, long int *leniw); * --------------------------------------------------------------- */ -#define SUNMAT_SUCCESS 0 /* function successfull */ -#define SUNMAT_ILL_INPUT -701 /* illegal function input */ -#define SUNMAT_MEM_FAIL -702 /* failed memory access/alloc */ -#define SUNMAT_OPERATION_FAIL -703 /* a SUNMatrix operation returned nonzero */ -#define SUNMAT_MATVEC_SETUP_REQUIRED -704 /* the SUNMatMatvecSetup routine needs to be called */ +#define SUNMAT_SUCCESS 0 /* function successfull */ +#define SUNMAT_ILL_INPUT -701 /* illegal function input */ +#define SUNMAT_MEM_FAIL -702 /* failed memory access/alloc */ +#define SUNMAT_OPERATION_FAIL -703 /* a SUNMatrix operation returned nonzero */ +#define SUNMAT_MATVEC_SETUP_REQUIRED -704 /* the SUNMatMatvecSetup routine needs to be called */ #ifdef __cplusplus } #endif -#endif + +#endif /* _SUNMATRIX_H */ diff --git a/inst/include/sundials/sundials_matrix.hpp b/inst/include/sundials/sundials_matrix.hpp new file mode 100644 index 0000000..9a80617 --- /dev/null +++ b/inst/include/sundials/sundials_matrix.hpp @@ -0,0 +1,41 @@ +/* ----------------------------------------------------------------------------- + * Programmer(s): Cody J. Balos @ LLNL + * ----------------------------------------------------------------------------- + * SUNDIALS Copyright Start + * Copyright (c) 2002-2022, Lawrence Livermore National Security + * and Southern Methodist University. + * All rights reserved. + * + * See the top-level LICENSE and NOTICE files for details. + * + * SPDX-License-Identifier: BSD-3-Clause + * SUNDIALS Copyright End + * ----------------------------------------------------------------------------- + * C++ view of SUNDIALS SUNMatrix + * ---------------------------------------------------------------------------*/ + +#ifndef _SUNDIALS_MATRIX_HPP +#define _SUNDIALS_MATRIX_HPP + +#include +#include +#include + +namespace sundials { +namespace impl { +using BaseMatrix = BaseObject<_generic_SUNMatrix, _generic_SUNMatrix_Ops>; +} // namespace impl + +namespace experimental { +struct SUNMatrixDeleter +{ + void operator()(SUNMatrix A) + { + if (A) SUNMatDestroy(A); + } +}; +using SUNMatrixView = ClassView; +} // namespace experimental +} // namespace sundials + +#endif diff --git a/inst/include/sundials/sundials_memory.h b/inst/include/sundials/sundials_memory.h new file mode 100644 index 0000000..d31dd66 --- /dev/null +++ b/inst/include/sundials/sundials_memory.h @@ -0,0 +1,162 @@ +/* ----------------------------------------------------------------- + * Programmer(s): Cody J. Balos @ LLNL + * ----------------------------------------------------------------- + * SUNDIALS Copyright Start + * Copyright (c) 2002-2022, Lawrence Livermore National Security + * and Southern Methodist University. + * All rights reserved. + * + * See the top-level LICENSE and NOTICE files for details. + * + * SPDX-License-Identifier: BSD-3-Clause + * SUNDIALS Copyright End + * ----------------------------------------------------------------- + * SUNDIALS memory helpers and types. + * ----------------------------------------------------------------*/ + +#ifndef _SUNDIALS_MEMORY_H +#define _SUNDIALS_MEMORY_H + +#include + +#include +#include + +#ifdef __cplusplus /* wrapper to enable C++ usage */ +extern "C" { +#endif + +typedef enum +{ + SUNMEMTYPE_HOST, /* pageable memory accessible on the host */ + SUNMEMTYPE_PINNED, /* page-locked memory accesible on the host */ + SUNMEMTYPE_DEVICE, /* memory accessible from the device */ + SUNMEMTYPE_UVM /* memory accessible from the host or device */ +} SUNMemoryType; + + +/* + * SUNMemory is a simple abstraction of a pointer to some + * contiguos memory, so that we can keep track of its type + * and its ownership. + */ + +typedef struct _SUNMemory *SUNMemory; + +struct _SUNMemory +{ + void* ptr; + SUNMemoryType type; + booleantype own; + size_t bytes; +}; + +/* Creates a new SUNMemory object with a NULL ptr */ +SUNDIALS_EXPORT SUNMemory SUNMemoryNewEmpty(void); + +/* + * SUNMemoryHelper holds ops which can allocate, deallocate, + * and copy SUNMemory. + */ + +typedef struct _SUNMemoryHelper_Ops *SUNMemoryHelper_Ops; +typedef struct _SUNMemoryHelper *SUNMemoryHelper; + +struct _SUNMemoryHelper +{ + void* content; + SUNMemoryHelper_Ops ops; + SUNContext sunctx; +}; + +struct _SUNMemoryHelper_Ops +{ + /* operations that implementations are required to provide */ + int (*alloc)(SUNMemoryHelper, SUNMemory* memptr, size_t mem_size, + SUNMemoryType mem_type, void* queue); + int (*dealloc)(SUNMemoryHelper, SUNMemory mem, void* queue); + int (*copy)(SUNMemoryHelper, SUNMemory dst, SUNMemory src, + size_t mem_size, void* queue); + + /* operations that provide default implementations */ + int (*copyasync)(SUNMemoryHelper, SUNMemory dst, SUNMemory src, + size_t mem_size, void* queue); + int (*getallocstats)(SUNMemoryHelper, SUNMemoryType mem_type, unsigned long* num_allocations, + unsigned long* num_deallocations, size_t* bytes_allocated, + size_t* bytes_high_watermark); + SUNMemoryHelper (*clone)(SUNMemoryHelper); + int (*destroy)(SUNMemoryHelper); +}; + + +/* + * Generic SUNMemoryHelper functions that work without a SUNMemoryHelper object. + */ + +/* Creates a new SUNMemory object which points to the same data as another + * SUNMemory object. + * The SUNMemory returned will not own the ptr, therefore, it will not free + * the ptr in Dealloc. */ +SUNDIALS_EXPORT SUNMemory SUNMemoryHelper_Alias(SUNMemory mem); + +/* Creates a new SUNMemory object with ptr set to the user provided pointer + * The SUNMemory returned will not own the ptr, therefore, it will not free + * the ptr in Dealloc. */ +SUNDIALS_EXPORT SUNMemory SUNMemoryHelper_Wrap(void* ptr, SUNMemoryType mem_type); + +/* + * Required SUNMemoryHelper operations. + */ + + +SUNDIALS_EXPORT +int SUNMemoryHelper_Alloc(SUNMemoryHelper, SUNMemory* memptr, size_t mem_size, + SUNMemoryType mem_type, void* queue); + +SUNDIALS_EXPORT +int SUNMemoryHelper_Dealloc(SUNMemoryHelper, SUNMemory mem, void* queue); + +SUNDIALS_EXPORT +int SUNMemoryHelper_Copy(SUNMemoryHelper, SUNMemory dst, SUNMemory src, + size_t mem_size, void* queue); + +/* + * Optional SUNMemoryHelper operations. + */ + +SUNDIALS_EXPORT +int SUNMemoryHelper_CopyAsync(SUNMemoryHelper, SUNMemory dst, SUNMemory src, + size_t mem_size, void* queue); + +SUNDIALS_EXPORT +int SUNMemoryHelper_GetAllocStats(SUNMemoryHelper, SUNMemoryType mem_type, unsigned long* num_allocations, + unsigned long* num_deallocations, size_t* bytes_allocated, + size_t* bytes_high_watermark); + +/* Clones the SUNMemoryHelper */ +SUNDIALS_EXPORT SUNMemoryHelper SUNMemoryHelper_Clone(SUNMemoryHelper); + +/* Frees the SUNMemoryHelper */ +SUNDIALS_EXPORT int SUNMemoryHelper_Destroy(SUNMemoryHelper); + +/* + * Utility SUNMemoryHelper functions. + */ + +/* Creates an empty SUNMemoryHelper object */ +SUNDIALS_EXPORT SUNMemoryHelper SUNMemoryHelper_NewEmpty(SUNContext sunctx); + +/* Copyies the SUNMemoryHelper ops structure from src->ops to dst->ops. */ +SUNDIALS_EXPORT int SUNMemoryHelper_CopyOps(SUNMemoryHelper src, + SUNMemoryHelper dst); + +/* Checks that all required SUNMemoryHelper ops are provided */ +SUNDIALS_EXPORT +booleantype SUNMemoryHelper_ImplementsRequiredOps(SUNMemoryHelper); + + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/inst/include/sundials/sundials_mpi_types.h b/inst/include/sundials/sundials_mpi_types.h index 48c896f..ac60e6c 100644 --- a/inst/include/sundials/sundials_mpi_types.h +++ b/inst/include/sundials/sundials_mpi_types.h @@ -3,7 +3,7 @@ * Aaron Collier, and Slaven Peles @ LLNL * ----------------------------------------------------------------- * SUNDIALS Copyright Start - * Copyright (c) 2002-2019, Lawrence Livermore National Security + * Copyright (c) 2002-2022, Lawrence Livermore National Security * and Southern Methodist University. * All rights reserved. * @@ -33,7 +33,3 @@ #elif defined(SUNDIALS_INT32_T) #define MPI_SUNINDEXTYPE MPI_INT32_T #endif - -/* define legacy SUNDIALS MPI data types */ -#define PVEC_REAL_MPI_TYPE MPI_SUNREALTYPE -#define PVEC_INTEGER_MPI_TYPE MPI_SUNINDEXTYPE diff --git a/inst/include/sundials/sundials_nonlinearsolver.h b/inst/include/sundials/sundials_nonlinearsolver.h index d93dd4b..f554bf1 100644 --- a/inst/include/sundials/sundials_nonlinearsolver.h +++ b/inst/include/sundials/sundials_nonlinearsolver.h @@ -1,8 +1,8 @@ /* ----------------------------------------------------------------------------- - * Programmer(s): David J. Gardner @ LLNL + * Programmer(s): David J. Gardner, and Cody J. Balos @ LLNL * ----------------------------------------------------------------------------- * SUNDIALS Copyright Start - * Copyright (c) 2002-2019, Lawrence Livermore National Security + * Copyright (c) 2002-2022, Lawrence Livermore National Security * and Southern Methodist University. * All rights reserved. * @@ -47,24 +47,23 @@ #ifndef _SUNNONLINEARSOLVER_H #define _SUNNONLINEARSOLVER_H -#include +#include #include +#include -#ifdef __cplusplus /* wrapper to enable C++ usage */ +#ifdef __cplusplus /* wrapper to enable C++ usage */ extern "C" { #endif - /* ----------------------------------------------------------------------------- * Forward references for SUNNonlinearSolver types defined below * ---------------------------------------------------------------------------*/ /* Forward reference for pointer to SUNNonlinearSolver_Ops object */ -typedef _SUNDIALS_STRUCT_ _generic_SUNNonlinearSolver_Ops *SUNNonlinearSolver_Ops; +typedef _SUNDIALS_STRUCT_ _generic_SUNNonlinearSolver_Ops* SUNNonlinearSolver_Ops; /* Forward reference for pointer to SUNNonlinearSolver object */ -typedef _SUNDIALS_STRUCT_ _generic_SUNNonlinearSolver *SUNNonlinearSolver; - +typedef _SUNDIALS_STRUCT_ _generic_SUNNonlinearSolver* SUNNonlinearSolver; /* ----------------------------------------------------------------------------- * Integrator supplied function types @@ -72,37 +71,34 @@ typedef _SUNDIALS_STRUCT_ _generic_SUNNonlinearSolver *SUNNonlinearSolver; typedef int (*SUNNonlinSolSysFn)(N_Vector y, N_Vector F, void* mem); -typedef int (*SUNNonlinSolLSetupFn)(booleantype jbad, booleantype* jcur, - void* mem); +typedef int (*SUNNonlinSolLSetupFn)(booleantype jbad, booleantype* jcur, void* mem); typedef int (*SUNNonlinSolLSolveFn)(N_Vector b, void* mem); -typedef int (*SUNNonlinSolConvTestFn)(SUNNonlinearSolver NLS, N_Vector y, - N_Vector del, realtype tol, N_Vector ewt, +typedef int (*SUNNonlinSolConvTestFn)(SUNNonlinearSolver NLS, N_Vector y, N_Vector del, realtype tol, N_Vector ewt, void* mem); - /* ----------------------------------------------------------------------------- * SUNNonlinearSolver types * ---------------------------------------------------------------------------*/ -typedef enum { +typedef enum +{ SUNNONLINEARSOLVER_ROOTFIND, SUNNONLINEARSOLVER_FIXEDPOINT } SUNNonlinearSolver_Type; - /* ----------------------------------------------------------------------------- * Generic definition of SUNNonlinearSolver * ---------------------------------------------------------------------------*/ /* Structure containing function pointers to nonlinear solver operations */ -struct _generic_SUNNonlinearSolver_Ops { +struct _generic_SUNNonlinearSolver_Ops +{ SUNNonlinearSolver_Type (*gettype)(SUNNonlinearSolver); int (*initialize)(SUNNonlinearSolver); int (*setup)(SUNNonlinearSolver, N_Vector, void*); - int (*solve)(SUNNonlinearSolver, N_Vector, N_Vector, N_Vector, realtype, - booleantype, void*); + int (*solve)(SUNNonlinearSolver, N_Vector, N_Vector, N_Vector, realtype, booleantype, void*); int (*free)(SUNNonlinearSolver); int (*setsysfn)(SUNNonlinearSolver, SUNNonlinSolSysFn); int (*setlsetupfn)(SUNNonlinearSolver, SUNNonlinSolLSetupFn); @@ -112,23 +108,30 @@ struct _generic_SUNNonlinearSolver_Ops { int (*getnumiters)(SUNNonlinearSolver, long int*); int (*getcuriter)(SUNNonlinearSolver, int*); int (*getnumconvfails)(SUNNonlinearSolver, long int*); +#ifdef __cplusplus + _generic_SUNNonlinearSolver_Ops() = default; +#endif }; /* A nonlinear solver is a structure with an implementation-dependent 'content' field, and a pointer to a structure of solver nonlinear solver operations corresponding to that implementation. */ -struct _generic_SUNNonlinearSolver { - void *content; +struct _generic_SUNNonlinearSolver +{ + void* content; SUNNonlinearSolver_Ops ops; + SUNContext sunctx; +#ifdef __cplusplus + _generic_SUNNonlinearSolver() = default; +#endif }; - /* ----------------------------------------------------------------------------- * Functions exported by SUNNonlinearSolver module * ---------------------------------------------------------------------------*/ /* empty constructor/destructor */ -SUNDIALS_EXPORT SUNNonlinearSolver SUNNonlinSolNewEmpty(); +SUNDIALS_EXPORT SUNNonlinearSolver SUNNonlinSolNewEmpty(SUNContext sunctx); SUNDIALS_EXPORT void SUNNonlinSolFreeEmpty(SUNNonlinearSolver NLS); /* core functions */ @@ -136,59 +139,59 @@ SUNDIALS_EXPORT SUNNonlinearSolver_Type SUNNonlinSolGetType(SUNNonlinearSolver N SUNDIALS_EXPORT int SUNNonlinSolInitialize(SUNNonlinearSolver NLS); -SUNDIALS_EXPORT int SUNNonlinSolSetup(SUNNonlinearSolver NLS, - N_Vector y, void* mem); +SUNDIALS_EXPORT int SUNNonlinSolSetup(SUNNonlinearSolver NLS, N_Vector y, void* mem); -SUNDIALS_EXPORT int SUNNonlinSolSolve(SUNNonlinearSolver NLS, - N_Vector y0, N_Vector y, - N_Vector w, realtype tol, - booleantype callLSetup, void *mem); +SUNDIALS_EXPORT int SUNNonlinSolSolve(SUNNonlinearSolver NLS, N_Vector y0, N_Vector y, N_Vector w, realtype tol, + booleantype callLSetup, void* mem); SUNDIALS_EXPORT int SUNNonlinSolFree(SUNNonlinearSolver NLS); /* set functions */ -SUNDIALS_EXPORT int SUNNonlinSolSetSysFn(SUNNonlinearSolver NLS, - SUNNonlinSolSysFn SysFn); +SUNDIALS_EXPORT int SUNNonlinSolSetSysFn(SUNNonlinearSolver NLS, SUNNonlinSolSysFn SysFn); -SUNDIALS_EXPORT int SUNNonlinSolSetLSetupFn(SUNNonlinearSolver NLS, - SUNNonlinSolLSetupFn SetupFn); +SUNDIALS_EXPORT int SUNNonlinSolSetLSetupFn(SUNNonlinearSolver NLS, SUNNonlinSolLSetupFn SetupFn); -SUNDIALS_EXPORT int SUNNonlinSolSetLSolveFn(SUNNonlinearSolver NLS, - SUNNonlinSolLSolveFn SolveFn); +SUNDIALS_EXPORT int SUNNonlinSolSetLSolveFn(SUNNonlinearSolver NLS, SUNNonlinSolLSolveFn SolveFn); -SUNDIALS_EXPORT int SUNNonlinSolSetConvTestFn(SUNNonlinearSolver NLS, - SUNNonlinSolConvTestFn CTestFn, - void* ctest_data); +SUNDIALS_EXPORT int SUNNonlinSolSetConvTestFn(SUNNonlinearSolver NLS, SUNNonlinSolConvTestFn CTestFn, void* ctest_data); -SUNDIALS_EXPORT int SUNNonlinSolSetMaxIters(SUNNonlinearSolver NLS, - int maxiters); -/* get functions */ -SUNDIALS_EXPORT int SUNNonlinSolGetNumIters(SUNNonlinearSolver NLS, - long int *niters); +SUNDIALS_EXPORT int SUNNonlinSolSetMaxIters(SUNNonlinearSolver NLS, int maxiters); -SUNDIALS_EXPORT int SUNNonlinSolGetCurIter(SUNNonlinearSolver NLS, - int *iter); +/* get functions */ +SUNDIALS_EXPORT int SUNNonlinSolGetNumIters(SUNNonlinearSolver NLS, long int* niters); -SUNDIALS_EXPORT int SUNNonlinSolGetNumConvFails(SUNNonlinearSolver NLS, - long int *nconvfails); +SUNDIALS_EXPORT int SUNNonlinSolGetCurIter(SUNNonlinearSolver NLS, int* iter); +SUNDIALS_EXPORT int SUNNonlinSolGetNumConvFails(SUNNonlinearSolver NLS, long int* nconvfails); /* ----------------------------------------------------------------------------- * SUNNonlinearSolver return values * ---------------------------------------------------------------------------*/ -#define SUN_NLS_SUCCESS 0 /* successful / converged */ +#define SUN_NLS_SUCCESS 0 /* successful / converged */ /* Recoverable */ -#define SUN_NLS_CONTINUE +901 /* not converged, keep iterating */ -#define SUN_NLS_CONV_RECVR +902 /* convergece failure, try to recover */ +#define SUN_NLS_CONTINUE +901 /* not converged, keep iterating */ +#define SUN_NLS_CONV_RECVR +902 /* convergece failure, try to recover */ /* Unrecoverable */ -#define SUN_NLS_MEM_NULL -901 /* memory argument is NULL */ -#define SUN_NLS_MEM_FAIL -902 /* failed memory access / allocation */ -#define SUN_NLS_ILL_INPUT -903 /* illegal function input */ -#define SUN_NLS_VECTOROP_ERR -904 /* failed NVector operation */ -#define SUN_NLS_EXT_FAIL -905 /* failed in external library call */ +#define SUN_NLS_MEM_NULL -901 /* memory argument is NULL */ +#define SUN_NLS_MEM_FAIL -902 /* failed memory access / allocation */ +#define SUN_NLS_ILL_INPUT -903 /* illegal function input */ +#define SUN_NLS_VECTOROP_ERR -904 /* failed NVector operation */ +#define SUN_NLS_EXT_FAIL -905 /* failed in external library call */ + +/* ----------------------------------------------------------------------------- + * SUNNonlinearSolver messages + * ---------------------------------------------------------------------------*/ + +#if defined(SUNDIALS_EXTENDED_PRECISION) +#define SUN_NLS_MSG_RESIDUAL "\tnonlin. iteration %ld, nonlin. residual: %Lg\n" +#elif defined(SUNDIALS_DOUBLE_PRECISION) +#define SUN_NLS_MSG_RESIDUAL "\tnonlin. iteration %ld, nonlin. residual: %g\n" +#else +#define SUN_NLS_MSG_RESIDUAL "\tnonlin. iteration %ld, nonlin. residual: %g\n" +#endif #ifdef __cplusplus } diff --git a/inst/include/sundials/sundials_nonlinearsolver.hpp b/inst/include/sundials/sundials_nonlinearsolver.hpp new file mode 100644 index 0000000..7fe048c --- /dev/null +++ b/inst/include/sundials/sundials_nonlinearsolver.hpp @@ -0,0 +1,41 @@ +/* ----------------------------------------------------------------------------- + * Programmer(s): Cody J. Balos @ LLNL + * ----------------------------------------------------------------------------- + * SUNDIALS Copyright Start + * Copyright (c) 2002-2022, Lawrence Livermore National Security + * and Southern Methodist University. + * All rights reserved. + * + * See the top-level LICENSE and NOTICE files for details. + * + * SPDX-License-Identifier: BSD-3-Clause + * SUNDIALS Copyright End + * ----------------------------------------------------------------------------- + * C++ view of SUNDIALS SUNNonlinearSolver + * ---------------------------------------------------------------------------*/ + +#ifndef _SUNDIALS_NONLINEARSOLVER_HPP +#define _SUNDIALS_NONLINEARSOLVER_HPP + +#include +#include +#include + +namespace sundials { +namespace impl { +using BaseNonlinearSolver = BaseObject<_generic_SUNNonlinearSolver, _generic_SUNNonlinearSolver_Ops>; +} // namespace impl + +namespace experimental { +struct SUNNonlinearSolverDeleter +{ + void operator()(SUNNonlinearSolver NLS) + { + if (NLS) SUNNonlinSolFree(NLS); + } +}; +using SUNNonlinearSolverView = ClassView; +} // namespace experimental +} // namespace sundials + +#endif diff --git a/inst/include/sundials/sundials_nvector.h b/inst/include/sundials/sundials_nvector.h index ae65e3c..d20ce0f 100644 --- a/inst/include/sundials/sundials_nvector.h +++ b/inst/include/sundials/sundials_nvector.h @@ -2,7 +2,7 @@ * Programmer(s): Radu Serban and Aaron Collier @ LLNL * ----------------------------------------------------------------- * SUNDIALS Copyright Start - * Copyright (c) 2002-2019, Lawrence Livermore National Security + * Copyright (c) 2002-2022, Lawrence Livermore National Security * and Southern Methodist University. * All rights reserved. * @@ -45,18 +45,21 @@ #ifndef _NVECTOR_H #define _NVECTOR_H +#include +#include +#include #include -#ifdef __cplusplus /* wrapper to enable C++ usage */ +#ifdef __cplusplus /* wrapper to enable C++ usage */ extern "C" { #endif - /* ----------------------------------------------------------------- * Implemented N_Vector types * ----------------------------------------------------------------- */ -typedef enum { +typedef enum +{ SUNDIALS_NVEC_SERIAL, SUNDIALS_NVEC_PARALLEL, SUNDIALS_NVEC_OPENMP, @@ -64,7 +67,10 @@ typedef enum { SUNDIALS_NVEC_PARHYP, SUNDIALS_NVEC_PETSC, SUNDIALS_NVEC_CUDA, + SUNDIALS_NVEC_HIP, + SUNDIALS_NVEC_SYCL, SUNDIALS_NVEC_RAJA, + SUNDIALS_NVEC_KOKKOS, SUNDIALS_NVEC_OPENMPDEV, SUNDIALS_NVEC_TRILINOS, SUNDIALS_NVEC_MANYVECTOR, @@ -73,61 +79,75 @@ typedef enum { SUNDIALS_NVEC_CUSTOM } N_Vector_ID; - /* ----------------------------------------------------------------- * Generic definition of N_Vector * ----------------------------------------------------------------- */ /* Forward reference for pointer to N_Vector_Ops object */ -typedef _SUNDIALS_STRUCT_ _generic_N_Vector_Ops *N_Vector_Ops; +typedef _SUNDIALS_STRUCT_ _generic_N_Vector_Ops* N_Vector_Ops; /* Forward reference for pointer to N_Vector object */ -typedef _SUNDIALS_STRUCT_ _generic_N_Vector *N_Vector; +typedef _SUNDIALS_STRUCT_ _generic_N_Vector* N_Vector; /* Define array of N_Vectors */ -typedef N_Vector *N_Vector_S; +typedef N_Vector* N_Vector_S; /* Structure containing function pointers to vector operations */ -struct _generic_N_Vector_Ops { - N_Vector_ID (*nvgetvectorid)(N_Vector); - N_Vector (*nvclone)(N_Vector); - N_Vector (*nvcloneempty)(N_Vector); - void (*nvdestroy)(N_Vector); - void (*nvspace)(N_Vector, sunindextype *, sunindextype *); - realtype* (*nvgetarraypointer)(N_Vector); - void (*nvsetarraypointer)(realtype *, N_Vector); - void* (*nvgetcommunicator)(N_Vector); +struct _generic_N_Vector_Ops +{ + /* + * REQUIRED operations. + * + * These must be implemented by derivations of the generic N_Vector. + */ + + /* constructors, destructors, and utility operations */ + N_Vector_ID (*nvgetvectorid)(N_Vector); + N_Vector (*nvclone)(N_Vector); + N_Vector (*nvcloneempty)(N_Vector); + void (*nvdestroy)(N_Vector); + void (*nvspace)(N_Vector, sunindextype*, sunindextype*); + realtype* (*nvgetarraypointer)(N_Vector); + realtype* (*nvgetdevicearraypointer)(N_Vector); + void (*nvsetarraypointer)(realtype*, N_Vector); + void* (*nvgetcommunicator)(N_Vector); sunindextype (*nvgetlength)(N_Vector); + sunindextype (*nvgetlocallength)(N_Vector); /* standard vector operations */ - void (*nvlinearsum)(realtype, N_Vector, realtype, N_Vector, N_Vector); - void (*nvconst)(realtype, N_Vector); - void (*nvprod)(N_Vector, N_Vector, N_Vector); - void (*nvdiv)(N_Vector, N_Vector, N_Vector); - void (*nvscale)(realtype, N_Vector, N_Vector); - void (*nvabs)(N_Vector, N_Vector); - void (*nvinv)(N_Vector, N_Vector); - void (*nvaddconst)(N_Vector, realtype, N_Vector); - realtype (*nvdotprod)(N_Vector, N_Vector); - realtype (*nvmaxnorm)(N_Vector); - realtype (*nvwrmsnorm)(N_Vector, N_Vector); - realtype (*nvwrmsnormmask)(N_Vector, N_Vector, N_Vector); - realtype (*nvmin)(N_Vector); - realtype (*nvwl2norm)(N_Vector, N_Vector); - realtype (*nvl1norm)(N_Vector); - void (*nvcompare)(realtype, N_Vector, N_Vector); + void (*nvlinearsum)(realtype, N_Vector, realtype, N_Vector, N_Vector); + void (*nvconst)(realtype, N_Vector); + void (*nvprod)(N_Vector, N_Vector, N_Vector); + void (*nvdiv)(N_Vector, N_Vector, N_Vector); + void (*nvscale)(realtype, N_Vector, N_Vector); + void (*nvabs)(N_Vector, N_Vector); + void (*nvinv)(N_Vector, N_Vector); + void (*nvaddconst)(N_Vector, realtype, N_Vector); + realtype (*nvdotprod)(N_Vector, N_Vector); + realtype (*nvmaxnorm)(N_Vector); + realtype (*nvwrmsnorm)(N_Vector, N_Vector); + realtype (*nvwrmsnormmask)(N_Vector, N_Vector, N_Vector); + realtype (*nvmin)(N_Vector); + realtype (*nvwl2norm)(N_Vector, N_Vector); + realtype (*nvl1norm)(N_Vector); + void (*nvcompare)(realtype, N_Vector, N_Vector); booleantype (*nvinvtest)(N_Vector, N_Vector); booleantype (*nvconstrmask)(N_Vector, N_Vector, N_Vector); - realtype (*nvminquotient)(N_Vector, N_Vector); + realtype (*nvminquotient)(N_Vector, N_Vector); + + /* + * OPTIONAL operations. + * + * These operations provide default implementations that may be overriden. + */ - /* fused vector operations */ + /* OPTIONAL fused vector operations */ int (*nvlinearcombination)(int, realtype*, N_Vector*, N_Vector); int (*nvscaleaddmulti)(int, realtype*, N_Vector, N_Vector*, N_Vector*); int (*nvdotprodmulti)(int, N_Vector, N_Vector*, realtype*); - /* vector array operations */ - int (*nvlinearsumvectorarray)(int, realtype, N_Vector*, realtype, N_Vector*, - N_Vector*); + /* OPTIONAL vector array operations */ + int (*nvlinearsumvectorarray)(int, realtype, N_Vector*, realtype, N_Vector*, N_Vector*); int (*nvscalevectorarray)(int, realtype*, N_Vector*, N_Vector*); int (*nvconstvectorarray)(int, realtype, N_Vector*); int (*nvwrmsnormvectorarray)(int, N_Vector*, N_Vector*, realtype*); @@ -135,7 +155,11 @@ struct _generic_N_Vector_Ops { int (*nvscaleaddmultivectorarray)(int, int, realtype*, N_Vector*, N_Vector**, N_Vector**); int (*nvlinearcombinationvectorarray)(int, int, realtype*, N_Vector**, N_Vector*); - /* OPTIONAL local reduction kernels (no parallel communication) */ + /* + * OPTIONAL operations with no default implementation. + */ + + /* Local reduction kernels (no parallel communication) */ realtype (*nvdotprodlocal)(N_Vector, N_Vector); realtype (*nvmaxnormlocal)(N_Vector); realtype (*nvminlocal)(N_Vector); @@ -145,38 +169,64 @@ struct _generic_N_Vector_Ops { realtype (*nvminquotientlocal)(N_Vector, N_Vector); realtype (*nvwsqrsumlocal)(N_Vector, N_Vector); realtype (*nvwsqrsummasklocal)(N_Vector, N_Vector, N_Vector); + + /* Single buffer reduction operations */ + int (*nvdotprodmultilocal)(int, N_Vector, N_Vector*, realtype*); + int (*nvdotprodmultiallreduce)(int, N_Vector, realtype*); + + /* XBraid interface operations */ + int (*nvbufsize)(N_Vector, sunindextype*); + int (*nvbufpack)(N_Vector, void*); + int (*nvbufunpack)(N_Vector, void*); + + /* Debugging functions (called when SUNDIALS_DEBUG_PRINTVEC is defined). */ + void (*nvprint)(N_Vector); + void (*nvprintfile)(N_Vector, FILE*); + +#ifdef __cplusplus + _generic_N_Vector_Ops() = default; +#endif }; /* A vector is a structure with an implementation-dependent 'content' field, and a pointer to a structure of vector operations corresponding to that implementation. */ -struct _generic_N_Vector { - void *content; +struct _generic_N_Vector +{ + void* content; N_Vector_Ops ops; + SUNContext sunctx; +#ifdef __cplusplus + _generic_N_Vector() = default; +#endif }; - /* ----------------------------------------------------------------- * Functions exported by NVECTOR module * ----------------------------------------------------------------- */ -SUNDIALS_EXPORT N_Vector N_VNewEmpty(); +SUNDIALS_EXPORT N_Vector N_VNewEmpty(SUNContext sunctx); SUNDIALS_EXPORT void N_VFreeEmpty(N_Vector v); SUNDIALS_EXPORT int N_VCopyOps(N_Vector w, N_Vector v); +/* + * Required operations. + */ + SUNDIALS_EXPORT N_Vector_ID N_VGetVectorID(N_Vector w); SUNDIALS_EXPORT N_Vector N_VClone(N_Vector w); SUNDIALS_EXPORT N_Vector N_VCloneEmpty(N_Vector w); SUNDIALS_EXPORT void N_VDestroy(N_Vector v); -SUNDIALS_EXPORT void N_VSpace(N_Vector v, sunindextype *lrw, sunindextype *liw); -SUNDIALS_EXPORT realtype *N_VGetArrayPointer(N_Vector v); -SUNDIALS_EXPORT void N_VSetArrayPointer(realtype *v_data, N_Vector v); -SUNDIALS_EXPORT void *N_VGetCommunicator(N_Vector v); +SUNDIALS_EXPORT void N_VSpace(N_Vector v, sunindextype* lrw, sunindextype* liw); +SUNDIALS_EXPORT realtype* N_VGetArrayPointer(N_Vector v); +SUNDIALS_EXPORT realtype* N_VGetDeviceArrayPointer(N_Vector v); +SUNDIALS_EXPORT void N_VSetArrayPointer(realtype* v_data, N_Vector v); +SUNDIALS_EXPORT void* N_VGetCommunicator(N_Vector v); SUNDIALS_EXPORT sunindextype N_VGetLength(N_Vector v); +SUNDIALS_EXPORT sunindextype N_VGetLocalLength(N_Vector v); /* standard vector operations */ -SUNDIALS_EXPORT void N_VLinearSum(realtype a, N_Vector x, realtype b, - N_Vector y, N_Vector z); +SUNDIALS_EXPORT void N_VLinearSum(realtype a, N_Vector x, realtype b, N_Vector y, N_Vector z); SUNDIALS_EXPORT void N_VConst(realtype c, N_Vector z); SUNDIALS_EXPORT void N_VProd(N_Vector x, N_Vector y, N_Vector z); SUNDIALS_EXPORT void N_VDiv(N_Vector x, N_Vector y, N_Vector z); @@ -196,43 +246,38 @@ SUNDIALS_EXPORT booleantype N_VInvTest(N_Vector x, N_Vector z); SUNDIALS_EXPORT booleantype N_VConstrMask(N_Vector c, N_Vector x, N_Vector m); SUNDIALS_EXPORT realtype N_VMinQuotient(N_Vector num, N_Vector denom); -/* OPTIONAL fused vector operations */ -SUNDIALS_EXPORT int N_VLinearCombination(int nvec, realtype* c, N_Vector* X, - N_Vector z); +/* + * OPTIONAL operations with default implementations. + */ -SUNDIALS_EXPORT int N_VScaleAddMulti(int nvec, realtype* a, N_Vector x, - N_Vector* Y, N_Vector* Z); +/* fused vector operations */ +SUNDIALS_EXPORT int N_VLinearCombination(int nvec, realtype* c, N_Vector* X, N_Vector z); -SUNDIALS_EXPORT int N_VDotProdMulti(int nvec, N_Vector x, N_Vector* Y, - realtype* dotprods); +SUNDIALS_EXPORT int N_VScaleAddMulti(int nvec, realtype* a, N_Vector x, N_Vector* Y, N_Vector* Z); -/* OPTIONAL vector array operations */ -SUNDIALS_EXPORT int N_VLinearSumVectorArray(int nvec, - realtype a, N_Vector* X, - realtype b, N_Vector* Y, - N_Vector* Z); +SUNDIALS_EXPORT int N_VDotProdMulti(int nvec, N_Vector x, N_Vector* Y, realtype* dotprods); -SUNDIALS_EXPORT int N_VScaleVectorArray(int nvec, realtype* c, N_Vector* X, - N_Vector* Z); +/* vector array operations */ +SUNDIALS_EXPORT int N_VLinearSumVectorArray(int nvec, realtype a, N_Vector* X, realtype b, N_Vector* Y, N_Vector* Z); + +SUNDIALS_EXPORT int N_VScaleVectorArray(int nvec, realtype* c, N_Vector* X, N_Vector* Z); SUNDIALS_EXPORT int N_VConstVectorArray(int nvec, realtype c, N_Vector* Z); -SUNDIALS_EXPORT int N_VWrmsNormVectorArray(int nvec, N_Vector* X, N_Vector* W, - realtype* nrm); +SUNDIALS_EXPORT int N_VWrmsNormVectorArray(int nvec, N_Vector* X, N_Vector* W, realtype* nrm); + +SUNDIALS_EXPORT int N_VWrmsNormMaskVectorArray(int nvec, N_Vector* X, N_Vector* W, N_Vector id, realtype* nrm); -SUNDIALS_EXPORT int N_VWrmsNormMaskVectorArray(int nvec, N_Vector* X, - N_Vector* W, N_Vector id, - realtype* nrm); +SUNDIALS_EXPORT int N_VScaleAddMultiVectorArray(int nvec, int nsum, realtype* a, N_Vector* X, N_Vector** Y, + N_Vector** Z); -SUNDIALS_EXPORT int N_VScaleAddMultiVectorArray(int nvec, int nsum, - realtype* a, N_Vector* X, - N_Vector** Y, N_Vector** Z); +SUNDIALS_EXPORT int N_VLinearCombinationVectorArray(int nvec, int nsum, realtype* c, N_Vector** X, N_Vector* Z); -SUNDIALS_EXPORT int N_VLinearCombinationVectorArray(int nvec, int nsum, - realtype* c, N_Vector** X, - N_Vector* Z); +/* + * OPTIONAL operations with no default implementation. + */ -/* OPTIONAL local reduction kernels (no parallel communication) */ +/* local reduction kernels (no parallel communication) */ SUNDIALS_EXPORT realtype N_VDotProdLocal(N_Vector x, N_Vector y); SUNDIALS_EXPORT realtype N_VMaxNormLocal(N_Vector x); SUNDIALS_EXPORT realtype N_VMinLocal(N_Vector x); @@ -243,6 +288,14 @@ SUNDIALS_EXPORT booleantype N_VInvTestLocal(N_Vector x, N_Vector z); SUNDIALS_EXPORT booleantype N_VConstrMaskLocal(N_Vector c, N_Vector x, N_Vector m); SUNDIALS_EXPORT realtype N_VMinQuotientLocal(N_Vector num, N_Vector denom); +/* single buffer reduction operations */ +SUNDIALS_EXPORT int N_VDotProdMultiLocal(int nvec, N_Vector x, N_Vector* Y, realtype* dotprods); +SUNDIALS_EXPORT int N_VDotProdMultiAllReduce(int nvec_total, N_Vector x, realtype* sum); + +/* XBraid interface operations */ +SUNDIALS_EXPORT int N_VBufSize(N_Vector x, sunindextype* size); +SUNDIALS_EXPORT int N_VBufPack(N_Vector x, void* buf); +SUNDIALS_EXPORT int N_VBufUnpack(N_Vector x, void* buf); /* ----------------------------------------------------------------- * Additional functions exported by NVECTOR module @@ -257,6 +310,13 @@ SUNDIALS_EXPORT void N_VDestroyVectorArray(N_Vector* vs, int count); SUNDIALS_EXPORT N_Vector N_VGetVecAtIndexVectorArray(N_Vector* vs, int index); SUNDIALS_EXPORT void N_VSetVecAtIndexVectorArray(N_Vector* vs, int index, N_Vector w); +/* ----------------------------------------------------------------- + * Debugging functions + * ----------------------------------------------------------------- */ + +SUNDIALS_EXPORT void N_VPrint(N_Vector v); +SUNDIALS_EXPORT void N_VPrintFile(N_Vector v, FILE* outfile); + #ifdef __cplusplus } #endif diff --git a/inst/include/sundials/sundials_nvector.hpp b/inst/include/sundials/sundials_nvector.hpp new file mode 100644 index 0000000..3cfd049 --- /dev/null +++ b/inst/include/sundials/sundials_nvector.hpp @@ -0,0 +1,41 @@ +/* ----------------------------------------------------------------------------- + * Programmer(s): Cody J. Balos @ LLNL + * ----------------------------------------------------------------------------- + * SUNDIALS Copyright Start + * Copyright (c) 2002-2022, Lawrence Livermore National Security + * and Southern Methodist University. + * All rights reserved. + * + * See the top-level LICENSE and NOTICE files for details. + * + * SPDX-License-Identifier: BSD-3-Clause + * SUNDIALS Copyright End + * ----------------------------------------------------------------------------- + * C++ view of SUNDIALS NVector + * ---------------------------------------------------------------------------*/ + +#ifndef _SUNDIALS_NVECTOR_HPP +#define _SUNDIALS_NVECTOR_HPP + +#include +#include +#include + +namespace sundials { +namespace impl { +using BaseNVector = BaseObject<_generic_N_Vector, _generic_N_Vector_Ops>; +} // namespace impl + +namespace experimental { +struct NVectorDeleter +{ + void operator()(N_Vector v) + { + if (v) N_VDestroy(v); + } +}; +using NVectorView = ClassView; +} // namespace experimental +} // namespace sundials + +#endif diff --git a/inst/include/sundials/sundials_nvector_senswrapper.h b/inst/include/sundials/sundials_nvector_senswrapper.h index 728ded6..7cd7898 100644 --- a/inst/include/sundials/sundials_nvector_senswrapper.h +++ b/inst/include/sundials/sundials_nvector_senswrapper.h @@ -2,7 +2,7 @@ * Programmer(s): David J. Gardner @ LLNL * ----------------------------------------------------------------------------- * SUNDIALS Copyright Start - * Copyright (c) 2002-2019, Lawrence Livermore National Security + * Copyright (c) 2002-2022, Lawrence Livermore National Security * and Southern Methodist University. * All rights reserved. * @@ -60,7 +60,7 @@ typedef struct _N_VectorContent_SensWrapper *N_VectorContent_SensWrapper; ============================================================================*/ /* constructor creates an empty vector wrapper */ -SUNDIALS_EXPORT N_Vector N_VNewEmpty_SensWrapper(int nvecs); +SUNDIALS_EXPORT N_Vector N_VNewEmpty_SensWrapper(int nvecs, SUNContext sunctx); SUNDIALS_EXPORT N_Vector N_VNew_SensWrapper(int count, N_Vector w); /* clone operations */ diff --git a/inst/include/sundials/sundials_profiler.h b/inst/include/sundials/sundials_profiler.h new file mode 100644 index 0000000..64922b9 --- /dev/null +++ b/inst/include/sundials/sundials_profiler.h @@ -0,0 +1,118 @@ +/* ----------------------------------------------------------------- + * Programmer: Cody J. Balos @ LLNL + * ----------------------------------------------------------------- + * SUNDIALS Copyright Start + * Copyright (c) 2002-2022, Lawrence Livermore National Security + * and Southern Methodist University. + * All rights reserved. + * + * See the top-level LICENSE and NOTICE files for details. + * + * SPDX-License-Identifier: BSD-3-Clause + * SUNDIALS Copyright End + * -----------------------------------------------------------------*/ + +#ifndef _SUNDIALS_PROFILER_H +#define _SUNDIALS_PROFILER_H + +#include + +#include "sundials/sundials_config.h" + +#if defined(SUNDIALS_BUILD_WITH_PROFILING) && defined(SUNDIALS_CALIPER_ENABLED) +#include "caliper/cali.h" +#endif + +#ifdef __cplusplus /* wrapper to enable C++ usage */ +extern "C" { +#endif + +typedef struct _SUNProfiler *SUNProfiler; + +SUNDIALS_EXPORT int SUNProfiler_Create(void* comm, const char* title, SUNProfiler* p); +SUNDIALS_EXPORT int SUNProfiler_Free(SUNProfiler* p); +SUNDIALS_EXPORT int SUNProfiler_Begin(SUNProfiler p, const char* name); +SUNDIALS_EXPORT int SUNProfiler_End(SUNProfiler p, const char* name); +SUNDIALS_EXPORT int SUNProfiler_Print(SUNProfiler p, FILE* fp); +SUNDIALS_EXPORT int SUNProfiler_Reset(SUNProfiler p); + +#if defined(SUNDIALS_BUILD_WITH_PROFILING) && defined(SUNDIALS_CALIPER_ENABLED) + +#define SUNDIALS_MARK_FUNCTION_BEGIN(profobj) CALI_MARK_FUNCTION_BEGIN + +#define SUNDIALS_MARK_FUNCTION_END(profobj) CALI_MARK_FUNCTION_END + +#define SUNDIALS_WRAP_STATEMENT(profobj, name, stmt) CALI_WRAP_STATEMENT(name, stmt) + +#define SUNDIALS_MARK_BEGIN(profobj, name) CALI_MARK_BEGIN(name) + +#define SUNDIALS_MARK_END(profobj, name) CALI_MARK_END(name) + +#ifdef __cplusplus +#define SUNDIALS_CXX_MARK_FUNCTION(projobj) CALI_CXX_MARK_FUNCTION +#endif + +#elif defined(SUNDIALS_BUILD_WITH_PROFILING) + +#define SUNDIALS_MARK_FUNCTION_BEGIN(profobj) SUNProfiler_Begin(profobj, __func__) + +#define SUNDIALS_MARK_FUNCTION_END(profobj) SUNProfiler_End(profobj, __func__) + +#define SUNDIALS_WRAP_STATEMENT(profobj, name, stmt) \ + SUNProfiler_Begin(profobj, (name)); \ + stmt; \ + SUNProfiler_End(profobj, (name)); + +#define SUNDIALS_MARK_BEGIN(profobj, name) SUNProfiler_Begin(profobj, (name)) + +#define SUNDIALS_MARK_END(profobj, name) SUNProfiler_End(profobj, (name)) + +#ifdef __cplusplus +#define SUNDIALS_CXX_MARK_FUNCTION(profobj) sundials::ProfilerMarkScope __ProfilerMarkScope(profobj, __func__) +#endif + +#else + +#define SUNDIALS_MARK_FUNCTION_BEGIN(profobj) + +#define SUNDIALS_MARK_FUNCTION_END(profobj) + +#define SUNDIALS_WRAP_STATEMENT(profobj, name, stmt) + +#define SUNDIALS_MARK_BEGIN(profobj, name) + +#define SUNDIALS_MARK_END(profobj, name) + +#ifdef __cplusplus +#define SUNDIALS_CXX_MARK_FUNCTION(profobj) +#endif + +#endif + +#ifdef __cplusplus +} + +namespace sundials +{ +/* Convenience class for C++ codes. + Allows for simpler profiler statements using C++ scoping rules. */ +class ProfilerMarkScope +{ +public: + ProfilerMarkScope(SUNProfiler prof, const char* name) { + prof_ = prof; + name_ = name; + SUNProfiler_Begin(prof_, name_); + } + + ~ProfilerMarkScope() { + SUNProfiler_End(prof_, name_); + } +private: + SUNProfiler prof_; + const char* name_; +}; +} + +#endif +#endif /* SUNDIALS_PROFILER_H_ */ diff --git a/inst/include/sundials/sundials_sycl.h b/inst/include/sundials/sundials_sycl.h new file mode 100644 index 0000000..1b7f982 --- /dev/null +++ b/inst/include/sundials/sundials_sycl.h @@ -0,0 +1,73 @@ +/* --------------------------------------------------------------------------- + * Programmer(s): David J. Gardner @ LLNL + * --------------------------------------------------------------------------- + * SUNDIALS Copyright Start + * Copyright (c) 2002-2022, Lawrence Livermore National Security + * and Southern Methodist University. + * All rights reserved. + * + * See the top-level LICENSE and NOTICE files for details. + * + * SPDX-License-Identifier: BSD-3-Clause + * SUNDIALS Copyright End + * --------------------------------------------------------------------------- + * This header files defines internal utility functions and macros for working + * with SYCL. + * ---------------------------------------------------------------------------*/ + +#include +#include + +#ifndef _SUNDIALS_SYCL_H +#define _SUNDIALS_SYCL_H + +#ifdef __cplusplus /* wrapper to enable C++ usage */ +extern "C" { +#endif + +/* Get the maximum work group size (block size) for a queue */ +#define SYCL_BLOCKDIM(q) (q->get_device().get_info<::sycl::info::device::max_work_group_size>()) + +/* Grid (work group) stride loop */ +#define GRID_STRIDE_XLOOP(item, iter, max) \ + for (sunindextype iter = item.get_global_id(0); \ + iter < max; \ + iter += item.get_global_range(0)) + +/* Sycl parallel for loop */ +#define SYCL_FOR(q, total, block, item, loop) \ + q->submit([&](::sycl::handler& h) { \ + h.parallel_for(::sycl::nd_range<1>{total,block}, \ + [=](::sycl::nd_item<1> item) \ + { loop }); }); + +/* Sycl parallel for loop with stream for ouput */ +#define SYCL_FOR_DEBUG(q, total, block, item, loop) \ + q->submit([&](::sycl::handler& h) { \ + ::sycl::stream out(1024, 256, h); \ + h.parallel_for(::sycl::nd_range<1>{total,block}, \ + [=](::sycl::nd_item<1> item) \ + { loop }); }); + +/* Sycl parallel for loop with reduction */ +#define SYCL_FOR_REDUCE(q, total, block, item, rvar, rop, loop) \ + q->submit([&](::sycl::handler& h) { \ + h.parallel_for(::sycl::nd_range<1>{total,block}, \ + ::sycl::reduction(rvar, rop), \ + [=](::sycl::nd_item<1> item, auto& rvar) \ + { loop }); }); + +/* Sycl parallel for loop with reduction and stream for ouput */ +#define SYCL_FOR_REDUCE_DEBUG(q, total, block, item, rvar, rop, loop) \ + q->submit([&](::sycl::handler& h) { \ + ::sycl::stream out(1024, 256, h); \ + h.parallel_for(::sycl::nd_range<1>{total,block}, \ + ::sycl::reduction(rvar, rop), \ + [=](::sycl::nd_item<1> item, auto& rvar) \ + { loop }); }); + +#ifdef __cplusplus /* wrapper to enable C++ usage */ +} +#endif + +#endif /* _SUNDIALS_SYCL_H */ diff --git a/inst/include/sundials/sundials_sycl_policies.hpp b/inst/include/sundials/sundials_sycl_policies.hpp new file mode 100644 index 0000000..1a86b4c --- /dev/null +++ b/inst/include/sundials/sundials_sycl_policies.hpp @@ -0,0 +1,163 @@ +/* ----------------------------------------------------------------- + * Programmer(s): David J. Gardner @ LLNL + * ----------------------------------------------------------------- + * SUNDIALS Copyright Start + * Copyright (c) 2002-2022, Lawrence Livermore National Security + * and Southern Methodist University. + * All rights reserved. + * + * See the top-level LICENSE and NOTICE files for details. + * + * SPDX-License-Identifier: BSD-3-Clause + * SUNDIALS Copyright End + * ----------------------------------------------------------------- + * This header files defines the ExecPolicy classes which + * are utilized to determine SYCL kernel launch parameters. + * -----------------------------------------------------------------*/ + +#ifndef _SUNDIALS_SYCLEXECPOLICIES_HPP +#define _SUNDIALS_SYCLEXECPOLICIES_HPP + +#include +#include +#include + +namespace sundials +{ +namespace sycl +{ + +class ExecPolicy +{ +public: + virtual size_t gridSize(size_t numWorkUnits = 0, size_t blockDim = 0) const = 0; + virtual size_t blockSize(size_t numWorkUnits = 0, size_t gridDim = 0) const = 0; + virtual ExecPolicy* clone() const = 0; + virtual ~ExecPolicy() {} +}; + + +/* + * A kernel execution policy that maps each thread to a work unit. + * The number of threads per block (blockSize) can be set to anything. + * The grid size will be chosen so that there are enough threads for one + * thread per element. + */ +class ThreadDirectExecPolicy : public ExecPolicy +{ +public: + ThreadDirectExecPolicy(const size_t blockDim) + : blockDim_(blockDim) + {} + + ThreadDirectExecPolicy(const ThreadDirectExecPolicy& ex) + : blockDim_(ex.blockDim_) + {} + + virtual size_t gridSize(size_t numWorkUnits = 0, size_t blockDim = 0) const + { + /* ceil(n/m) = floor((n + m - 1) / m) */ + return (numWorkUnits + blockSize() - 1) / blockSize(); + } + + virtual size_t blockSize(size_t numWorkUnits = 0, size_t gridDim = 0) const + { + return blockDim_; + } + + virtual ExecPolicy* clone() const + { + return static_cast(new ThreadDirectExecPolicy(*this)); + } + +private: + const size_t blockDim_; +}; + +/* + * A kernel execution policy for kernels that use grid stride loops. + * The number of threads per block (blockSize) can be set to anything. + * The number of blocks (gridSize) can be set to anything. + */ +class GridStrideExecPolicy : public ExecPolicy +{ +public: + GridStrideExecPolicy(const size_t blockDim, const size_t gridDim) + : blockDim_(blockDim), gridDim_(gridDim) + {} + + GridStrideExecPolicy(const GridStrideExecPolicy& ex) + : blockDim_(ex.blockDim_), gridDim_(ex.gridDim_) + {} + + virtual size_t gridSize(size_t numWorkUnits = 0, size_t blockDim = 0) const + { + return gridDim_; + } + + virtual size_t blockSize(size_t numWorkUnits = 0, size_t gridDim = 0) const + { + return blockDim_; + } + + virtual ExecPolicy* clone() const + { + return static_cast(new GridStrideExecPolicy(*this)); + } + +private: + const size_t blockDim_; + const size_t gridDim_; +}; + +/* + * A kernel execution policy for performing a reduction across indvidual thread + * blocks. The number of threads per block (blockSize) can be set to anything. + * The number of blocks (gridSize) can be set to any value greater than or equal + * to 0. If it is set to 0, then the grid size will be chosen so that there are + * at most two work units per thread. + */ +class BlockReduceExecPolicy : public ExecPolicy +{ +public: + BlockReduceExecPolicy(const size_t blockDim, const size_t gridDim = 0) + : blockDim_(blockDim), gridDim_(gridDim) + {} + + BlockReduceExecPolicy(const BlockReduceExecPolicy& ex) + : blockDim_(ex.blockDim_), gridDim_(ex.gridDim_) + {} + + virtual size_t gridSize(size_t numWorkUnits = 0, size_t blockDim = 0) const + { + if (gridDim_ == 0) + { + return (numWorkUnits + (blockSize() * 2 - 1)) / (blockSize() * 2); + } + return gridDim_; + } + + virtual size_t blockSize(size_t numWorkUnits = 0, size_t gridDim = 0) const + { + return blockDim_; + } + + virtual ExecPolicy* clone() const + { + return static_cast(new BlockReduceExecPolicy(*this)); + } + +private: + const size_t blockDim_; + const size_t gridDim_; +}; + +} // namespace sycl +} // namespace sundials + +typedef sundials::sycl::ExecPolicy SUNSyclExecPolicy; +typedef sundials::sycl::ThreadDirectExecPolicy SUNSyclThreadDirectExecPolicy; +typedef sundials::sycl::GridStrideExecPolicy SUNSyclGridStrideExecPolicy; +typedef sundials::sycl::BlockReduceExecPolicy SUNSyclBlockReduceExecPolicy; + +#endif diff --git a/inst/include/sundials/sundials_types.h b/inst/include/sundials/sundials_types.h index 24737de..8bc4467 100644 --- a/inst/include/sundials/sundials_types.h +++ b/inst/include/sundials/sundials_types.h @@ -3,7 +3,7 @@ * Aaron Collier, and Slaven Peles @ LLNL * ----------------------------------------------------------------- * SUNDIALS Copyright Start - * Copyright (c) 2002-2019, Lawrence Livermore National Security + * Copyright (c) 2002-2022, Lawrence Livermore National Security * and Southern Methodist University. * All rights reserved. * @@ -12,18 +12,18 @@ * SPDX-License-Identifier: BSD-3-Clause * SUNDIALS Copyright End * ----------------------------------------------------------------- - * This header file exports three types: realtype, sunindextype and - * booleantype, as well as the constants SUNTRUE and SUNFALSE. + * This header file exports three types: sunrealtype, sunindextype, + * and sunbooleantype, as well as the constants SUNTRUE and SUNFALSE. * * Users should include the header file sundials_types.h in every - * program file and use the exported name realtype instead of + * program file and use the exported name sunrealtype instead of * float, double or long double. * * The constants SUNDIALS_SINGLE_PRECISION, SUNDIALS_DOUBLE_PRECISION * and SUNDIALS_LONG_DOUBLE_PRECISION indicate the underlying data - * type of realtype. + * type of sunrealtype. * - * The legal types for realtype are float, double and long double. + * The legal types for sunrealtype are float, double and long double. * * The constants SUNDIALS_INT64_T and SUNDIALS_INT32_T indicate * the underlying data type of sunindextype -- the integer data type @@ -31,27 +31,24 @@ * * Data types are set at the configuration stage. * - * The macro RCONST gives the user a convenient way to define + * The macro SUN_RCONST gives the user a convenient way to define * real-valued literal constants. To use the constant 1.0, for example, * the user should write the following: * - * #define ONE RCONST(1.0) + * #define ONE SUN_RCONST(1.0) * - * If realtype is defined as a double, then RCONST(1.0) expands - * to 1.0. If realtype is defined as a float, then RCONST(1.0) - * expands to 1.0F. If realtype is defined as a long double, - * then RCONST(1.0) expands to 1.0L. There is never a need to - * explicitly cast 1.0 to (realtype). The macro can be used for + * If sunrealtype is defined as a double, then SUN_RCONST(1.0) expands + * to 1.0. If sunrealtype is defined as a float, then SUN_RCONST(1.0) + * expands to 1.0F. If sunrealtype is defined as a long double, + * then SUN_RCONST(1.0) expands to 1.0L. There is never a need to + * explicitly cast 1.0 to (sunrealtype). The macro can be used for * literal constants only. It cannot be used for expressions. * -----------------------------------------------------------------*/ -#ifndef _SUNDIALSTYPES_H -#define _SUNDIALSTYPES_H +#ifndef _SUNDIALS_TYPES_H +#define _SUNDIALS_TYPES_H -#ifndef _SUNDIALS_CONFIG_H -#define _SUNDIALS_CONFIG_H #include -#endif #include #include @@ -67,7 +64,7 @@ extern "C" { * The _SUNDIALS_STRUCT_ macro is defined as a `struct` unless * generating the SWIG interfaces - in that case it is defined as * nothing. This is needed to work around a bug in SWIG which prevents - * it from properly parsing our generic module structures. + * it from properly parsing our generic module structures. *------------------------------------------------------------------ */ #ifdef SWIG @@ -78,36 +75,57 @@ extern "C" { /* *------------------------------------------------------------------ - * Type realtype - * Macro RCONST - * Constants BIG_REAL, SMALL_REAL, and UNIT_ROUNDOFF + * Type sunrealtype + * Macro SUN_RCONST + * Constants SUN_SMALL_REAL, SUN_BIG_REAL, and SUN_UNIT_ROUNDOFF *------------------------------------------------------------------ */ #if defined(SUNDIALS_SINGLE_PRECISION) +/* deprecated */ typedef float realtype; # define RCONST(x) x##F # define BIG_REAL FLT_MAX # define SMALL_REAL FLT_MIN # define UNIT_ROUNDOFF FLT_EPSILON +typedef float sunrealtype; +# define SUN_RCONST(x) x##F +# define SUN_BIG_REAL FLT_MAX +# define SUN_SMALL_REAL FLT_MIN +# define SUN_UNIT_ROUNDOFF FLT_EPSILON + #elif defined(SUNDIALS_DOUBLE_PRECISION) +/* deprecated */ typedef double realtype; # define RCONST(x) x # define BIG_REAL DBL_MAX # define SMALL_REAL DBL_MIN # define UNIT_ROUNDOFF DBL_EPSILON +typedef double sunrealtype; +# define SUN_RCONST(x) x +# define SUN_BIG_REAL DBL_MAX +# define SUN_SMALL_REAL DBL_MIN +# define SUN_UNIT_ROUNDOFF DBL_EPSILON + #elif defined(SUNDIALS_EXTENDED_PRECISION) +/* deprecated */ typedef long double realtype; # define RCONST(x) x##L # define BIG_REAL LDBL_MAX # define SMALL_REAL LDBL_MIN # define UNIT_ROUNDOFF LDBL_EPSILON +typedef long double sunrealtype; +# define SUN_RCONST(x) x##L +# define SUN_BIG_REAL LDBL_MAX +# define SUN_SMALL_REAL LDBL_MIN +# define SUN_UNIT_ROUNDOFF LDBL_EPSILON + #endif @@ -126,25 +144,30 @@ typedef SUNDIALS_INDEX_TYPE sunindextype; /* *------------------------------------------------------------------ - * Type : booleantype + * Type : sunbooleantype *------------------------------------------------------------------ * Constants : SUNFALSE and SUNTRUE *------------------------------------------------------------------ * ANSI C does not have a built-in boolean data type. Below is the - * definition for a new type called booleantype. The advantage of - * using the name booleantype (instead of int) is an increase in + * definition for a new type called sunbooleantype. The advantage of + * using the name sunbooleantype (instead of int) is an increase in * code readability. It also allows the programmer to make a * distinction between int and boolean data. Variables of type - * booleantype are intended to have only the two values SUNFALSE and + * sunbooleantype are intended to have only the two values SUNFALSE and * SUNTRUE which are defined below to be equal to 0 and 1, * respectively. *------------------------------------------------------------------ */ +/* deprecated */ #ifndef booleantype #define booleantype int #endif +#ifndef sunbooleantype +#define sunbooleantype int +#endif + #ifndef SUNFALSE #define SUNFALSE 0 #endif @@ -153,8 +176,21 @@ typedef SUNDIALS_INDEX_TYPE sunindextype; #define SUNTRUE 1 #endif +/* + *------------------------------------------------------------------ + * Type : sunoutputformat + *------------------------------------------------------------------ + * Constants for different output formats + *------------------------------------------------------------------ + */ + +typedef enum { + SUN_OUTPUTFORMAT_TABLE, + SUN_OUTPUTFORMAT_CSV +} SUNOutputFormat; + #ifdef __cplusplus } #endif -#endif /* _SUNDIALSTYPES_H */ +#endif /* _SUNDIALS_TYPES_H */ diff --git a/inst/include/sundials/sundials_utils.h b/inst/include/sundials/sundials_utils.h new file mode 100644 index 0000000..2b1a217 --- /dev/null +++ b/inst/include/sundials/sundials_utils.h @@ -0,0 +1,88 @@ +/* ----------------------------------------------------------------- + * Programmer(s): Cody J. Balos @ LLNL + * ----------------------------------------------------------------- + * SUNDIALS Copyright Start + * Copyright (c) 2002-2022, Lawrence Livermore National Security + * and Southern Methodist University. + * All rights reserved. + * + * See the top-level LICENSE and NOTICE files for details. + * + * SPDX-License-Identifier: BSD-3-Clause + * SUNDIALS Copyright End + * ----------------------------------------------------------------- + * This header file contains common utility functions. + * -----------------------------------------------------------------*/ + +#ifndef _SUNDIALS_UTILS_H +#define _SUNDIALS_UTILS_H + +#include +#include +#include + +static int sunvsnprintf(char* buffer, size_t bufsz, const char* format, va_list vlist) +{ + int size = 0; +#ifdef SUNDIALS_C_COMPILER_HAS_SNPRINTF_AND_VA_COPY + va_list tmp; + va_copy(tmp, vlist); + size = vsnprintf(buffer, bufsz, format, tmp); + va_end(tmp); +#else + size = SUNDIALS_MAX_SPRINTF_SIZE; + if ((int) strlen(format) > size) + { + /* buffer is definitely not big enough */ + size = -1; + } + else if (buffer != NULL) + { + vsprintf(buffer, format, vlist); + } +#endif +return size; +} + + +static int sunsnprintf(char* buffer, size_t bufsz, const char* format, ...) +{ + int size = 0; + va_list args; + va_start(args, format); + size = sunvsnprintf(buffer, bufsz, format, args); + va_end(args); + return size; +} + +/* + * Implementation of the GNU extension function vasprintf which + * is itself an analog for vsprintf, except it allocates a string + * large enough to hold the output byte ('\0'). + */ +static int sunvasnprintf(char** str, const char* fmt, va_list args) +{ + int size = 0; + + /* compute string length */ + size = sunvsnprintf(NULL, 0, fmt, args); + + if (size < 0) + { + return -1; + } + + /* add one to size for the null terminator*/ + *str = (char*) malloc(size + 1); + if (NULL == *str) + { + return -1; + } + + size = vsprintf(*str, fmt, args); + + return size; +} + + +#endif /* _SUNDIALS_UTILS_H */ diff --git a/inst/include/sundials/sundials_version.h b/inst/include/sundials/sundials_version.h index e557405..4805db3 100644 --- a/inst/include/sundials/sundials_version.h +++ b/inst/include/sundials/sundials_version.h @@ -2,7 +2,7 @@ * Programmer(s): David J. Gardner @ LLNL * ----------------------------------------------------------------- * SUNDIALS Copyright Start - * Copyright (c) 2002-2019, Lawrence Livermore National Security + * Copyright (c) 2002-2022, Lawrence Livermore National Security * and Southern Methodist University. * All rights reserved. * diff --git a/inst/include/sundials/sundials_xbraid.h b/inst/include/sundials/sundials_xbraid.h new file mode 100644 index 0000000..1639fac --- /dev/null +++ b/inst/include/sundials/sundials_xbraid.h @@ -0,0 +1,134 @@ +/* -------------------------------------------------------------------------- + * Programmer(s): David J. Gardner @ LLNL + * -------------------------------------------------------------------------- + * SUNDIALS Copyright Start + * Copyright (c) 2002-2022, Lawrence Livermore National Security + * and Southern Methodist University. + * All rights reserved. + * + * See the top-level LICENSE and NOTICE files for details. + * + * SPDX-License-Identifier: BSD-3-Clause + * SUNDIALS Copyright End + * -------------------------------------------------------------------------- + * This is the header file for SUNDIALS + XBraid interface base class and + * NVector interface. + * -------------------------------------------------------------------------- */ + +#ifndef _SUNDIALS_XBRAID_H +#define _SUNDIALS_XBRAID_H + +#include "sundials/sundials_types.h" +#include "sundials/sundials_nvector.h" +#include "braid.h" + +#ifdef __cplusplus /* wrapper to enable C++ usage */ +extern "C" { +#endif + + +/* ----------------------- + * XBraid vector structure + * ----------------------- */ + + +struct _braid_Vector_struct +{ + N_Vector y; +}; + +/* Poiner to vector wrapper (same as braid_Vector) */ +typedef struct _braid_Vector_struct *SUNBraidVector; + + +/* ----------------------------- + * XBraid ops and app structures + * ----------------------------- */ + + +/* Structure containing function pointers to operations */ +struct _SUNBraidOps +{ + int (*getvectmpl)(braid_App app, N_Vector *tmpl); +}; + +/* Pointer to operations structure */ +typedef struct _SUNBraidOps *SUNBraidOps; + + +/* Define XBraid App structure */ +struct _braid_App_struct +{ + void *content; + SUNBraidOps ops; +}; + +/* Pointer to the interface object (same as braid_App) */ +typedef struct _braid_App_struct *SUNBraidApp; + + +/* ----------------------- + * SUNBraid app operations + * ----------------------- */ + + +SUNDIALS_EXPORT int SUNBraidApp_NewEmpty(braid_App *app); + +SUNDIALS_EXPORT int SUNBraidApp_FreeEmpty(braid_App *app); + +SUNDIALS_EXPORT int SUNBraidApp_GetVecTmpl(braid_App app, N_Vector *tmpl); + + +/* ------------------------- + * SUNBraid vector functions + * ------------------------- */ + + +SUNDIALS_EXPORT int SUNBraidVector_New(N_Vector y, SUNBraidVector *u); + +SUNDIALS_EXPORT int SUNBraidVector_GetNVector(SUNBraidVector u, N_Vector *y); + +SUNDIALS_EXPORT int SUNBraidVector_Clone(braid_App app, braid_Vector u, + braid_Vector *v_ptr); + +SUNDIALS_EXPORT int SUNBraidVector_Free(braid_App app, braid_Vector u); + +SUNDIALS_EXPORT int SUNBraidVector_Sum(braid_App app, + braid_Real alpha, braid_Vector x, + braid_Real beta, braid_Vector y); + +SUNDIALS_EXPORT int SUNBraidVector_SpatialNorm(braid_App app, braid_Vector u, + braid_Real *norm_ptr); + +SUNDIALS_EXPORT int SUNBraidVector_BufSize(braid_App app, braid_Int *size_ptr, + braid_BufferStatus bstatus); + +SUNDIALS_EXPORT int SUNBraidVector_BufPack(braid_App app, braid_Vector u, + void *buffer, + braid_BufferStatus bstatus); + +SUNDIALS_EXPORT int SUNBraidVector_BufUnpack(braid_App app, void *buffer, + braid_Vector *u_ptr, + braid_BufferStatus bstatus); + + +/* ---------------------- + * SUNBraid return values + * ---------------------- */ + + +#define SUNBRAID_SUCCESS 0 /* call/operation was successful */ + +#define SUNBRAID_ALLOCFAIL -1 /* a memory allocation failed */ +#define SUNBRAID_MEMFAIL -2 /* a memory access fail */ +#define SUNBRAID_OPNULL -3 /* the SUNBraid operation is NULL */ +#define SUNBRAID_ILLINPUT -4 /* an invalid input was provided */ +#define SUNBRAID_BRAIDFAIL -5 /* an XBraid function failed */ +#define SUNBRAID_SUNFAIL -6 /* a SUNDIALS function failed */ + + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/inst/include/sunlinsol/sunlinsol_band.h b/inst/include/sunlinsol/sunlinsol_band.h index 45b31b5..820309b 100644 --- a/inst/include/sunlinsol/sunlinsol_band.h +++ b/inst/include/sunlinsol/sunlinsol_band.h @@ -3,7 +3,7 @@ * Programmer(s): Daniel Reynolds, Ashley Crawford @ SMU * ----------------------------------------------------------------- * SUNDIALS Copyright Start - * Copyright (c) 2002-2019, Lawrence Livermore National Security + * Copyright (c) 2002-2022, Lawrence Livermore National Security * and Southern Methodist University. * All rights reserved. * @@ -51,12 +51,7 @@ typedef struct _SUNLinearSolverContent_Band *SUNLinearSolverContent_Band; * Exported Functions for SUNLINSOL_BAND * -------------------------------------- */ -SUNDIALS_EXPORT SUNLinearSolver SUNLinSol_Band(N_Vector y, SUNMatrix A); - -/* deprecated */ -SUNDIALS_EXPORT SUNLinearSolver SUNBandLinearSolver(N_Vector y, - SUNMatrix A); - +SUNDIALS_EXPORT SUNLinearSolver SUNLinSol_Band(N_Vector y, SUNMatrix A, SUNContext sunctx); SUNDIALS_EXPORT SUNLinearSolver_Type SUNLinSolGetType_Band(SUNLinearSolver S); SUNDIALS_EXPORT SUNLinearSolver_ID SUNLinSolGetID_Band(SUNLinearSolver S); SUNDIALS_EXPORT int SUNLinSolInitialize_Band(SUNLinearSolver S); diff --git a/inst/include/sunlinsol/sunlinsol_cusolversp_batchqr.h b/inst/include/sunlinsol/sunlinsol_cusolversp_batchqr.h index e306147..528f898 100644 --- a/inst/include/sunlinsol/sunlinsol_cusolversp_batchqr.h +++ b/inst/include/sunlinsol/sunlinsol_cusolversp_batchqr.h @@ -4,7 +4,7 @@ * Based on work by Donald Wilcox @ LBNL * ---------------------------------------------------------------------------- * SUNDIALS Copyright Start - * Copyright (c) 2002-2019, Lawrence Livermore National Security + * Copyright (c) 2002-2022, Lawrence Livermore National Security * and Southern Methodist University. * All rights reserved. * @@ -16,8 +16,8 @@ * Header file for cuSolverSp batched QR SUNLinearSolver interface. * ----------------------------------------------------------------------------*/ -#ifndef _SUNLINSOL_SLUDIST_H -#define _SUNLINSOL_SLUDIST_H +#ifndef _SUNLINSOL_CUSOLVERSP_H +#define _SUNLINSOL_CUSOLVERSP_H #include #include @@ -25,7 +25,6 @@ #include #include #include -#include #ifdef __cplusplus extern "C" { @@ -39,18 +38,11 @@ extern "C" { */ struct _SUNLinearSolverContent_cuSolverSp_batchQR { - int nsubsys; /* number of subsystems */ - int subsys_size; /* size of each subsystem */ - int subsys_nnz; /* number of nonzeros per subsystem */ int last_flag; /* last return flag */ booleantype first_factorize; /* is this the first factorization? */ size_t internal_size; /* size of cusolver internal buffer for Q and R */ size_t workspace_size; /* size of cusolver memory block for num. factorization */ cusolverSpHandle_t cusolver_handle; /* cuSolverSp context */ - cusparseMatDescr_t system_description; /* matrix description */ - realtype* d_values; /* device array of matrix A values */ - int* d_rowptr; /* device array of rowptrs for a subsystem */ - int* d_colind; /* device array of column indices for a subsystem */ csrqrInfo_t info; /* opaque cusolver data structure */ void* workspace; /* memory block used by cusolver */ const char* desc; /* description of this linear solver */ @@ -65,11 +57,9 @@ typedef struct _SUNLinearSolverContent_cuSolverSp_batchQR *SUNLinearSolverConten * ---------------------------------------------------------------------------- */ -SUNDIALS_EXPORT SUNLinearSolver SUNLinSol_cuSolverSp_batchQR(N_Vector y, - SUNMatrix A, - int nsubsys, - int subsys_size, - int subsys_nnz); +SUNDIALS_EXPORT SUNLinearSolver SUNLinSol_cuSolverSp_batchQR(N_Vector y, SUNMatrix A, + cusolverSpHandle_t cusol_handle, + SUNContext sunctx); /* @@ -95,10 +85,6 @@ SUNDIALS_EXPORT int SUNLinSolSolve_cuSolverSp_batchQR(SUNLinearSolver S, SUNDIALS_EXPORT sunindextype SUNLinSolLastFlag_cuSolverSp_batchQR(SUNLinearSolver S); -SUNDIALS_EXPORT int SUNLinSolSpace_cuSolverSp_batchQR(SUNLinearSolver S, - long int* lenrwLS, - long int* leniwLS); - SUNDIALS_EXPORT int SUNLinSolFree_cuSolverSp_batchQR(SUNLinearSolver S); @@ -114,6 +100,10 @@ SUNDIALS_EXPORT void SUNLinSol_cuSolverSp_batchQR_GetDescription(SUNLinearSolver SUNDIALS_EXPORT void SUNLinSol_cuSolverSp_batchQR_SetDescription(SUNLinearSolver S, const char* desc); +SUNDIALS_EXPORT void SUNLinSol_cuSolverSp_batchQR_GetDeviceSpace(SUNLinearSolver S, + size_t* cuSolverInternal, + size_t* cuSolverWorkspace); + #ifdef __cplusplus } diff --git a/inst/include/sunlinsol/sunlinsol_dense.h b/inst/include/sunlinsol/sunlinsol_dense.h index fc1b6e3..17ba16b 100644 --- a/inst/include/sunlinsol/sunlinsol_dense.h +++ b/inst/include/sunlinsol/sunlinsol_dense.h @@ -3,7 +3,7 @@ * Programmer(s): Daniel Reynolds, Ashley Crawford @ SMU * ----------------------------------------------------------------- * SUNDIALS Copyright Start - * Copyright (c) 2002-2019, Lawrence Livermore National Security + * Copyright (c) 2002-2022, Lawrence Livermore National Security * and Southern Methodist University. * All rights reserved. * @@ -55,12 +55,7 @@ typedef struct _SUNLinearSolverContent_Dense *SUNLinearSolverContent_Dense; * Exported Functions for SUNLINSOL_DENSE * ---------------------------------------- */ -SUNDIALS_EXPORT SUNLinearSolver SUNLinSol_Dense(N_Vector y, SUNMatrix A); - -/* deprecated */ -SUNDIALS_EXPORT SUNLinearSolver SUNDenseLinearSolver(N_Vector y, - SUNMatrix A); - +SUNDIALS_EXPORT SUNLinearSolver SUNLinSol_Dense(N_Vector y, SUNMatrix A, SUNContext sunctx); SUNDIALS_EXPORT SUNLinearSolver_Type SUNLinSolGetType_Dense(SUNLinearSolver S); SUNDIALS_EXPORT SUNLinearSolver_ID SUNLinSolGetID_Dense(SUNLinearSolver S); SUNDIALS_EXPORT int SUNLinSolInitialize_Dense(SUNLinearSolver S); diff --git a/inst/include/sunlinsol/sunlinsol_ginkgo.hpp b/inst/include/sunlinsol/sunlinsol_ginkgo.hpp new file mode 100644 index 0000000..5aa4a70 --- /dev/null +++ b/inst/include/sunlinsol/sunlinsol_ginkgo.hpp @@ -0,0 +1,348 @@ +/* ----------------------------------------------------------------------------- + * Programmer(s): Cody J. Balos @ LLNL + * ----------------------------------------------------------------------------- + * SUNDIALS Copyright Start + * Copyright (c) 2002-2022, Lawrence Livermore National Security + * and Southern Methodist University. + * All rights reserved. + * + * See the top-level LICENSE and NOTICE files for details. + * + * SPDX-License-Identifier: BSD-3-Clause + * SUNDIALS Copyright End + * ----------------------------------------------------------------------------- + * SUNLinearSolver interface to Ginkgo linear solvers + * ---------------------------------------------------------------------------*/ + +#ifndef _SUNLINSOL_GINKGO_HPP +#define _SUNLINSOL_GINKGO_HPP + +#include +#include +#include +#include +#include +#include + +namespace sundials { +namespace ginkgo { + +template +class LinearSolver; + +// ============================================================================= +// Everything in the implementation (impl) namespace is private and should not +// be referred to directly in user code. +// ============================================================================= + +namespace impl { + +inline SUNLinearSolver_Type SUNLinSolGetType_Ginkgo(SUNLinearSolver S) +{ + return SUNLINEARSOLVER_MATRIX_ITERATIVE; +} + +inline SUNLinearSolver_ID SUNLinSolGetID_Ginkgo(SUNLinearSolver S) +{ + return SUNLINEARSOLVER_GINKGO; +} + +template +int SUNLinSolSetup_Ginkgo(SUNLinearSolver S, SUNMatrix A) +{ + auto solver{static_cast*>(S->content)}; + solver->Setup(static_cast*>(A->content)); + return SUNLS_SUCCESS; +} + +template +int SUNLinSolSolve_Ginkgo(SUNLinearSolver S, SUNMatrix A, N_Vector x, N_Vector b, sunrealtype tol) +{ + auto solver{static_cast*>(S->content)}; + solver->Solve(b, x, tol); + return SUNLS_SUCCESS; +} + +template +int SUNLinSolFree_Ginkgo(SUNLinearSolver S) +{ + auto solver{static_cast*>(S->content)}; + delete solver; // NOLINT + return SUNLS_SUCCESS; +} + +template +int SUNLinSolNumIters_Ginkgo(SUNLinearSolver S) +{ + auto solver{static_cast*>(S->content)}; + return solver->NumIters(); +} + +template +sunrealtype SUNLinSolResNorm_Ginkgo(SUNLinearSolver S) +{ + auto solver{static_cast*>(S->content)}; + return solver->ResNorm(); +} + +} // namespace impl + +/// Custom gko::stop::Criterion that does the normal SUNDIALS stopping checks. +/// This checks if: +/// 1. Was the absolute residual tolerance met? +/// 2. Was the max iteration count reached? +class DefaultStop : public gko::EnablePolymorphicObject +{ + friend class gko::EnablePolymorphicObject; + +public: + GKO_CREATE_FACTORY_PARAMETERS(parameters, Factory) + { + sunrealtype GKO_FACTORY_PARAMETER_SCALAR(reduction_factor, + SUN_UNIT_ROUNDOFF); // NOLINT(cppcoreguidelines-avoid-magic-numbers) + gko::uint64 GKO_FACTORY_PARAMETER_SCALAR(max_iters, 5); + }; + GKO_ENABLE_CRITERION_FACTORY(DefaultStop, parameters, Factory); + GKO_ENABLE_BUILD_METHOD(Factory); + + gko::uint64 get_max_iters() const + { + return parameters_.max_iters; + } + + sunrealtype get_reduction_factor() const + { + return parameters_.reduction_factor; + } + +protected: + bool check_impl(gko::uint8 stoppingId, bool setFinalized, gko::array* stop_status, + bool* one_changed, const Updater&) override; + + explicit DefaultStop(std::shared_ptr exec) + : EnablePolymorphicObject(std::move(exec)) + {} + + explicit DefaultStop(const Factory* factory, const gko::stop::CriterionArgs& args) + : EnablePolymorphicObject(factory->get_executor()), parameters_{ + factory->get_parameters()} + { + criteria_.push_back(gko::stop::ResidualNorm::build() + .with_reduction_factor(parameters_.reduction_factor) + .with_baseline(gko::stop::mode::absolute) + .on(factory->get_executor()) + ->generate(args)); + criteria_.push_back( + gko::stop::Iteration::build().with_max_iters(parameters_.max_iters).on(factory->get_executor())->generate(args)); + } + +private: + std::vector> criteria_{}; +}; + +inline bool DefaultStop::check_impl(gko::uint8 stoppingId, bool setFinalized, + gko::array* stop_status, bool* one_changed, + const Updater& updater) +{ + bool one_converged = false; + gko::uint8 ids{1}; + *one_changed = false; + for (auto& c : criteria_) { + bool local_one_changed = false; + one_converged |= c->check(ids, setFinalized, stop_status, &local_one_changed, updater); + *one_changed |= local_one_changed; + if (one_converged) { + break; + } + ids++; + } + return one_converged; +} + +/// Class that wraps a Ginkgo solver (factory) and is convertible to a fully +/// functioning ``SUNLinearSolver``. +template +class LinearSolver : public ConvertibleTo +{ +public: + /// Default constructor - means the solver must be moved to + LinearSolver() = default; + + /// Constructs a new LinearSolver from a Ginkgo solver factory + /// \param gko_solver_factory The Ginkgo solver factory (typically + /// `gko::matrix::::Factory`) \param sunctx The SUNDIALS simulation + /// context (:c:type:`SUNContext`) + LinearSolver(std::shared_ptr gko_solver_factory, SUNContext sunctx) + : gko_solver_factory_(gko_solver_factory), gko_solver_(nullptr), + sunlinsol_(std::make_unique<_generic_SUNLinearSolver>()), + sunlinsol_ops_(std::make_unique<_generic_SUNLinearSolver_Ops>()), res_norm_(sunrealtype{0.0}) + { + sunlinsol_->content = this; + sunlinsol_->ops = sunlinsol_ops_.get(); + sunlinsol_->sunctx = sunctx; + + sunlinsol_->ops->gettype = impl::SUNLinSolGetType_Ginkgo; + sunlinsol_->ops->getid = impl::SUNLinSolGetID_Ginkgo; + sunlinsol_->ops->setup = impl::SUNLinSolSetup_Ginkgo; + sunlinsol_->ops->solve = impl::SUNLinSolSolve_Ginkgo; + sunlinsol_->ops->numiters = impl::SUNLinSolNumIters_Ginkgo; + sunlinsol_->ops->resnorm = impl::SUNLinSolResNorm_Ginkgo; + sunlinsol_->ops->free = impl::SUNLinSolFree_Ginkgo; + } + + // Copy constructor is deleted + LinearSolver(const LinearSolver& that_solver) = delete; + + /// Move constructor + LinearSolver(LinearSolver&& that_solver) noexcept + : gko_solver_factory_(std::move(that_solver.gko_solver_factory_)), gko_solver_(std::move(that_solver.gko_solver_)), + sunlinsol_(std::move(that_solver.sunlinsol_)), sunlinsol_ops_(std::move(that_solver.sunlinsol_ops_)), + iter_count_(that_solver.iter_count_), res_norm_(that_solver.res_norm_) + { + sunlinsol_->content = this; + sunlinsol_->ops = sunlinsol_ops_.get(); + } + + // Don't allow copy assignment + LinearSolver& operator=(const LinearSolver& rhs) = delete; + + /// Move assignment + LinearSolver& operator=(LinearSolver&& rhs) + { + gko_solver_factory_ = std::move(rhs.gko_solver_factory_); + gko_solver_ = std::move(rhs.gko_solver_); + sunlinsol_ = std::move(rhs.sunlinsol_); + sunlinsol_ops_ = std::move(rhs.sunlinsol_ops_); + iter_count_ = rhs.iter_count_; + res_norm_ = rhs.res_norm_; + sunlinsol_->content = this; + sunlinsol_->ops = sunlinsol_ops_.get(); + return *this; + } + + /// Default destructor + ~LinearSolver() override = default; + + /// Implicit conversion to a :c:type:`SUNLinearSolver` + operator SUNLinearSolver() override + { + return sunlinsol_.get(); + } + + /// Implicit conversion to a :c:type:`SUNLinearSolver` + operator SUNLinearSolver() const override + { + return sunlinsol_.get(); + } + + /// Explicit conversion to a :c:type:`SUNLinearSolver` + SUNLinearSolver Convert() override + { + return sunlinsol_.get(); + } + + /// Explicit conversion to a :c:type:`SUNLinearSolver` + SUNLinearSolver Convert() const override + { + return sunlinsol_.get(); + } + + /// Get the ``gko::Executor`` associated with the Ginkgo solver + std::shared_ptr GkoExec() const + { + return gko_solver_factory_->get_executor(); + } + + /// Get the underlying Ginkgo solver factory + std::shared_ptr GkoFactory() + { + return gko_solver_factory_; + } + + /// Get the underlying Ginkgo solver + /// \note This will be `nullptr` until the linear solver setup phase. + GkoSolverType* GkoSolver() + { + return gko_solver_.get(); + } + + /// Get the number of linear solver iterations in the most recent solve. + int NumIters() const + { + return iter_count_; + } + + /// Get the residual norm of the solution at the end of the last solve. + /// The type of residual norm depends on the Ginkgo stopping criteria + /// used with the solver. With the \ref DefaultStop "DefaultStop" criteria + /// this would be the absolute residual 2-norm. + sunrealtype ResNorm() const + { + return res_norm_; + } + + /// Setup the linear system + /// \param A the linear system matrix + GkoSolverType* Setup(Matrix* A) + { + gko_solver_ = gko_solver_factory_->generate(A->GkoMtx()); + return gko_solver_.get(); + } + + /// Solve the linear system Ax = b to the specificed tolerance. + /// \param b the right-hand side vector + /// \param x the solution vector + /// \param tol the tolerance to solve the system to + gko::LinOp* Solve(N_Vector b, N_Vector x, sunrealtype tol) + { + auto logger{gko::share(gko::log::Convergence::create())}; + + // Ginkgo provides a lot of options for stopping criterion, + // so we make it possible to use it, but default to using + // our normal iterative linear solver criterion. + // If the criterion on the solver is of type DefaultStop, + // then we will override the reduction_factor (tolerance). + auto crit{dynamic_cast(GkoSolver()->get_stop_criterion_factory().get())}; + if (crit != nullptr) { + auto new_crit = + DefaultStop::build().with_reduction_factor(tol).with_max_iters(crit->get_parameters().max_iters).on(GkoExec()); + new_crit->add_logger(logger); + GkoSolver()->set_stop_criterion_factory(std::move(new_crit)); + } + + gko::LinOp* result{nullptr}; + if (x != b) { + auto x_vec = impl::WrapVector(GkoExec(), x); + auto b_vec = impl::WrapVector(GkoExec(), b); + + // x = A^{-1} b + result = GkoSolver()->apply(b_vec.get(), x_vec.get()); + } + else { + auto x_vec = impl::WrapVector(GkoExec(), x); + + // x = A^{-1} x + result = GkoSolver()->apply(x_vec.get(), x_vec.get()); + } + + iter_count_ = static_cast(logger->get_num_iterations()); + GkoExec()->get_master()->copy_from(gko::lend(GkoExec()), 1, + gko::as(logger->get_residual_norm())->get_const_values(), + &res_norm_); + + return result; + } + +private: + std::shared_ptr gko_solver_factory_; + std::unique_ptr gko_solver_; + std::unique_ptr<_generic_SUNLinearSolver> sunlinsol_; + std::unique_ptr<_generic_SUNLinearSolver_Ops> sunlinsol_ops_; + int iter_count_{}; + sunrealtype res_norm_{}; +}; + +} // namespace ginkgo +} // namespace sundials + +#endif // SUNLINSOL_GINKGO_HPP diff --git a/inst/include/sunlinsol/sunlinsol_klu.h b/inst/include/sunlinsol/sunlinsol_klu.h index e739e7d..b2d70e8 100644 --- a/inst/include/sunlinsol/sunlinsol_klu.h +++ b/inst/include/sunlinsol/sunlinsol_klu.h @@ -5,7 +5,7 @@ * code, written by Carol S. Woodward @ LLNL * ----------------------------------------------------------------- * SUNDIALS Copyright Start - * Copyright (c) 2002-2019, Lawrence Livermore National Security + * Copyright (c) 2002-2022, Lawrence Livermore National Security * and Southern Methodist University. * All rights reserved. * @@ -104,21 +104,12 @@ typedef struct _SUNLinearSolverContent_KLU *SUNLinearSolverContent_KLU; * Exported Functions for SUNLINSOL_KLU * ------------------------------------- */ -SUNDIALS_EXPORT SUNLinearSolver SUNLinSol_KLU(N_Vector y, SUNMatrix A); +SUNDIALS_EXPORT SUNLinearSolver SUNLinSol_KLU(N_Vector y, SUNMatrix A, SUNContext sunctx); SUNDIALS_EXPORT int SUNLinSol_KLUReInit(SUNLinearSolver S, SUNMatrix A, sunindextype nnz, int reinit_type); SUNDIALS_EXPORT int SUNLinSol_KLUSetOrdering(SUNLinearSolver S, int ordering_choice); -/* deprecated */ -SUNDIALS_EXPORT SUNLinearSolver SUNKLU(N_Vector y, SUNMatrix A); -/* deprecated */ -SUNDIALS_EXPORT int SUNKLUReInit(SUNLinearSolver S, SUNMatrix A, - sunindextype nnz, int reinit_type); -/* deprecated */ -SUNDIALS_EXPORT int SUNKLUSetOrdering(SUNLinearSolver S, - int ordering_choice); - /* -------------------- * Accessor functions * -------------------- */ diff --git a/inst/include/sunlinsol/sunlinsol_kokkosdense.hpp b/inst/include/sunlinsol/sunlinsol_kokkosdense.hpp new file mode 100644 index 0000000..8115ae7 --- /dev/null +++ b/inst/include/sunlinsol/sunlinsol_kokkosdense.hpp @@ -0,0 +1,236 @@ +/* ----------------------------------------------------------------------------- + * Programmer(s): David J. Gardner @ LLNL + * ----------------------------------------------------------------------------- + * SUNDIALS Copyright Start + * Copyright (c) 2002-2022, Lawrence Livermore National Security + * and Southern Methodist University. + * All rights reserved. + * + * See the top-level LICENSE and NOTICE files for details. + * + * SPDX-License-Identifier: BSD-3-Clause + * SUNDIALS Copyright End + * ----------------------------------------------------------------------------- + * This is the header file for a SUNLinearSolver using Kokkoks Kernels + * ---------------------------------------------------------------------------*/ + +#ifndef _SUNLINSOL_KOKKOSDENSE_HPP +#define _SUNLINSOL_KOKKOSDENSE_HPP + +#include +#include +#include +#include +#include +#include +#include + +namespace sundials { +namespace kokkos { + +// Forward declaration of DenseLinearSolver class +template +class DenseLinearSolver; + +// ============================================================================= +// Everything in the implementation (impl) namespace is private and should not +// be referred to directly in user code. +// ============================================================================= + +namespace impl { + +SUNLinearSolver_Type SUNLinSolGetType_KokkosDense(SUNLinearSolver S) +{ + return SUNLINEARSOLVER_DIRECT; +} + +SUNLinearSolver_ID SUNLinSolGetID_KokkosDense(SUNLinearSolver S) +{ + return SUNLINEARSOLVER_KOKKOSDENSE; +} + +template +int SUNLinSolSetup_KokkosDense(SUNLinearSolver S, SUNMatrix A) +{ + // Access matrix data + auto A_mat{sundials::kokkos::GetDenseMat(A)}; + + auto A_exec = A_mat->ExecSpace(); + auto A_data = A_mat->View(); + + const auto blocks = A_mat->Blocks(); + + // Compute LU factorization of A (no pivoting) + using team_policy = typename LinearSolverType::team_policy; + using member_type = typename LinearSolverType::member_type; + + Kokkos::parallel_for( + "sunlinsol_lu", + team_policy(A_exec, static_cast(blocks), Kokkos::AUTO, Kokkos::AUTO), + KOKKOS_LAMBDA(const member_type& team_member) { + const auto idx = team_member.league_rank(); + auto A_subdata = Kokkos::subview(A_data, idx, Kokkos::ALL(), Kokkos::ALL()); + KokkosBatched::TeamLU< + member_type, KokkosBatched::Algo::LU::Unblocked>::invoke(team_member, + A_subdata); + }); + + return SUNLS_SUCCESS; +} + +template +int SUNLinSolSolve_KokkosDense(SUNLinearSolver S, SUNMatrix A, N_Vector x, + N_Vector b, sunrealtype tol) +{ + // Copy b into x + N_VScale(SUN_RCONST(1.0), b, x); + + // Access matrix and vector data + auto A_mat{sundials::kokkos::GetDenseMat(A)}; + auto x_vec{sundials::kokkos::GetVec(x)}; + + auto A_exec = A_mat->ExecSpace(); + auto A_data = A_mat->View(); + auto x_data = x_vec->View(); + + const auto blocks = A_mat->Blocks(); + const auto rows = A_mat->BlockRows(); + + // Solve the linear system + using team_policy = typename LinearSolverType::team_policy; + using member_type = typename LinearSolverType::member_type; + using size_type = typename VectorType::size_type; + + Kokkos::parallel_for( + "sunlinsol_trsv", + team_policy(A_exec, static_cast(blocks), Kokkos::AUTO, Kokkos::AUTO), + KOKKOS_LAMBDA(const member_type& team_member) { + const auto idx = team_member.league_rank(); + auto A_subdata = Kokkos::subview(A_data, idx, Kokkos::ALL(), Kokkos::ALL()); + auto x_subdata = + Kokkos::subview(x_data, + Kokkos::pair(idx * rows, + (idx + 1) * rows)); + // Lower triangular solve + KokkosBatched::TeamVectorTrsv< + member_type, KokkosBatched::Uplo::Lower, + KokkosBatched::Trans::NoTranspose, KokkosBatched::Diag::Unit, + KokkosBatched::Algo::Trsv::Unblocked>::invoke(team_member, + SUN_RCONST(1.0), + A_subdata, x_subdata); + // Upper triangular solve + KokkosBatched::TeamVectorTrsv< + member_type, KokkosBatched::Uplo::Upper, + KokkosBatched::Trans::NoTranspose, KokkosBatched::Diag::NonUnit, + KokkosBatched::Algo::Trsv::Unblocked>::invoke(team_member, + SUN_RCONST(1.0), + A_subdata, x_subdata); + }); + + return SUNLS_SUCCESS; +} + +template +int SUNLinSolFree_KokkosDense(SUNLinearSolver S) +{ + auto S_ls{static_cast(S->content)}; + delete S_ls; // NOLINT + return SUNLS_SUCCESS; +} + +} // namespace impl + +// ============================================================================= +// Public namespace +// ============================================================================= + +// ----------------------------------------------------------------------------- +// Kokkos dense linear solver class, convertible to a SUNLinearSolver +// ----------------------------------------------------------------------------- + +template +class DenseLinearSolver : public sundials::impl::BaseLinearSolver, + public sundials::ConvertibleTo +{ +public: + using exec_space = ExecutionSpace; + using memory_space = MemorySpace; + using team_policy = typename Kokkos::TeamPolicy; + using member_type = typename Kokkos::TeamPolicy::member_type; + + // Default constructor - means the linear solver must be copied or moved to + DenseLinearSolver() = default; + + DenseLinearSolver(SUNContext sunctx) + : sundials::impl::BaseLinearSolver(sunctx) + { + initSUNLinearSolver(); + } + + // Move constructor + DenseLinearSolver(DenseLinearSolver&& that_solver) noexcept + : sundials::impl::BaseLinearSolver( + std::forward(that_solver)) + {} + + // Copy constructor + DenseLinearSolver(const DenseLinearSolver& that_solver) + : sundials::impl::BaseLinearSolver(that_solver) + {} + + // Move assignment + DenseLinearSolver& operator=(DenseLinearSolver&& rhs) noexcept + { + sundials::impl::BaseLinearSolver::operator=( + std::forward(rhs)); + return *this; + } + + // Copy assignment + DenseLinearSolver& operator=(const DenseLinearSolver& rhs) + { + sundials::impl::BaseLinearSolver::operator=(rhs); + return *this; + } + + // Default destructor since all members are RAII + virtual ~DenseLinearSolver() = default; + + // Override the ConvertibleTo methods + + // Implicit conversion to a SUNLinearSolver + operator SUNLinearSolver() override { return object_.get(); } + + // Implicit conversion to SUNLinearSolver + operator SUNLinearSolver() const override { return object_.get(); } + + // Explicit conversion to a SUNLinearSolver + SUNLinearSolver Convert() override { return object_.get(); } + + // Explicit conversion to a SUNLinearSolver + SUNLinearSolver Convert() const override { return object_.get(); } + +private: + void initSUNLinearSolver() + { + using vec_type = Vector; + using mat_type = DenseMatrix; + using ls_type = DenseLinearSolver; + + this->object_->content = this; + + this->object_->ops->gettype = impl::SUNLinSolGetType_KokkosDense; + this->object_->ops->getid = impl::SUNLinSolGetID_KokkosDense; + this->object_->ops->setup = + impl::SUNLinSolSetup_KokkosDense; + this->object_->ops->solve = + impl::SUNLinSolSolve_KokkosDense; + this->object_->ops->free = impl::SUNLinSolFree_KokkosDense; + } +}; + +} // namespace kokkos +} // namespace sundials + +#endif diff --git a/inst/include/sunlinsol/sunlinsol_lapackband.h b/inst/include/sunlinsol/sunlinsol_lapackband.h index 5bce527..e4d5317 100644 --- a/inst/include/sunlinsol/sunlinsol_lapackband.h +++ b/inst/include/sunlinsol/sunlinsol_lapackband.h @@ -3,7 +3,7 @@ * Programmer(s): Daniel Reynolds @ SMU * ----------------------------------------------------------------- * SUNDIALS Copyright Start - * Copyright (c) 2002-2019, Lawrence Livermore National Security + * Copyright (c) 2002-2022, Lawrence Livermore National Security * and Southern Methodist University. * All rights reserved. * @@ -25,7 +25,6 @@ #define _SUNLINSOL_LAPBAND_H #include -#include #include #include #include @@ -34,23 +33,6 @@ extern "C" { #endif -/* Interfaces to match 'realtype' with the correct LAPACK functions */ -#if defined(SUNDIALS_DOUBLE_PRECISION) -#define xgbtrf_f77 dgbtrf_f77 -#define xgbtrs_f77 dgbtrs_f77 -#elif defined(SUNDIALS_SINGLE_PRECISION) -#define xgbtrf_f77 sgbtrf_f77 -#define xgbtrs_f77 sgbtrs_f77 -#else -#error Incompatible realtype for LAPACK; disable LAPACK and rebuild -#endif - -/* Catch to disable LAPACK linear solvers with incompatible sunindextype */ -#if defined(SUNDIALS_INT32_T) -#else /* incompatible sunindextype for LAPACK */ -#error Incompatible sunindextype for LAPACK; disable LAPACK and rebuild -#endif - /* ---------------------------------------------- * LAPACK band implementation of SUNLinearSolver * ---------------------------------------------- */ @@ -69,11 +51,8 @@ typedef struct _SUNLinearSolverContent_LapackBand *SUNLinearSolverContent_Lapack * -------------------------------------------- */ SUNDIALS_EXPORT SUNLinearSolver SUNLinSol_LapackBand(N_Vector y, - SUNMatrix A); - -/* deprecated */ -SUNDIALS_EXPORT SUNLinearSolver SUNLapackBand(N_Vector y, SUNMatrix A); - + SUNMatrix A, + SUNContext sunctx); SUNDIALS_EXPORT SUNLinearSolver_Type SUNLinSolGetType_LapackBand(SUNLinearSolver S); SUNDIALS_EXPORT SUNLinearSolver_ID SUNLinSolGetID_LapackBand(SUNLinearSolver S); SUNDIALS_EXPORT int SUNLinSolInitialize_LapackBand(SUNLinearSolver S); diff --git a/inst/include/sunlinsol/sunlinsol_lapackdense.h b/inst/include/sunlinsol/sunlinsol_lapackdense.h index e1b66cd..e456fe4 100644 --- a/inst/include/sunlinsol/sunlinsol_lapackdense.h +++ b/inst/include/sunlinsol/sunlinsol_lapackdense.h @@ -3,7 +3,7 @@ * Programmer(s): Daniel Reynolds @ SMU * ----------------------------------------------------------------- * SUNDIALS Copyright Start - * Copyright (c) 2002-2019, Lawrence Livermore National Security + * Copyright (c) 2002-2022, Lawrence Livermore National Security * and Southern Methodist University. * All rights reserved. * @@ -25,7 +25,6 @@ #define _SUNLINSOL_LAPDENSE_H #include -#include #include #include #include @@ -34,23 +33,6 @@ extern "C" { #endif -/* Interfaces to match 'realtype' with the correct LAPACK functions */ -#if defined(SUNDIALS_DOUBLE_PRECISION) -#define xgetrf_f77 dgetrf_f77 -#define xgetrs_f77 dgetrs_f77 -#elif defined(SUNDIALS_SINGLE_PRECISION) -#define xgetrf_f77 sgetrf_f77 -#define xgetrs_f77 sgetrs_f77 -#else -#error Incompatible realtype for LAPACK; disable LAPACK and rebuild -#endif - -/* Catch to disable LAPACK linear solvers with incompatible sunindextype */ -#if defined(SUNDIALS_INT32_T) -#else /* incompatible sunindextype for LAPACK */ -#error Incompatible sunindextype for LAPACK; disable LAPACK and rebuild -#endif - /* ----------------------------------------------- * LAPACK dense implementation of SUNLinearSolver * ----------------------------------------------- */ @@ -69,11 +51,8 @@ typedef struct _SUNLinearSolverContent_LapackDense *SUNLinearSolverContent_Lapac * --------------------------------------------- */ SUNDIALS_EXPORT SUNLinearSolver SUNLinSol_LapackDense(N_Vector y, - SUNMatrix A); - -/* deprecated */ -SUNDIALS_EXPORT SUNLinearSolver SUNLapackDense(N_Vector y, SUNMatrix A); - + SUNMatrix A, + SUNContext sunctx); SUNDIALS_EXPORT SUNLinearSolver_Type SUNLinSolGetType_LapackDense(SUNLinearSolver S); SUNDIALS_EXPORT SUNLinearSolver_ID SUNLinSolGetID_LapackDense(SUNLinearSolver S); SUNDIALS_EXPORT int SUNLinSolInitialize_LapackDense(SUNLinearSolver S); diff --git a/inst/include/sunlinsol/sunlinsol_magmadense.h b/inst/include/sunlinsol/sunlinsol_magmadense.h new file mode 100644 index 0000000..ff298f8 --- /dev/null +++ b/inst/include/sunlinsol/sunlinsol_magmadense.h @@ -0,0 +1,79 @@ +/* + * ----------------------------------------------------------------- + * Programmer(s): Cody J. Balos @ LLNL + * ----------------------------------------------------------------- + * SUNDIALS Copyright Start + * Copyright (c) 2002-2022, Lawrence Livermore National Security + * and Southern Methodist University. + * All rights reserved. + * + * See the top-level LICENSE and NOTICE files for details. + * + * SPDX-License-Identifier: BSD-3-Clause + * SUNDIALS Copyright End + * ----------------------------------------------------------------- + * This is the header file for the MAGMA dense implementation of the + * SUNLINSOL module, SUNLINSOL_MAGMADENSE. + * ----------------------------------------------------------------- + */ + +#ifndef _SUNLINSOL_MAGMADENSE_H +#define _SUNLINSOL_MAGMADENSE_H + +#include +#include +#include +#include + +#if defined(SUNDIALS_MAGMA_BACKENDS_CUDA) +#define HAVE_CUBLAS +#elif defined(SUNDIALS_MAGMA_BACKENDS_HIP) +#define HAVE_HIP +#endif +#include + +#ifdef __cplusplus /* wrapper to enable C++ usage */ +extern "C" { +#endif + +/* ----------------------------------------------- + * MAGMA dense implementation of SUNLinearSolver + * ----------------------------------------------- */ + +struct _SUNLinearSolverContent_MagmaDense { + int last_flag; + booleantype async; + sunindextype N; + SUNMemory pivots; + SUNMemory pivotsarr; + SUNMemory dpivotsarr; + SUNMemory infoarr; + SUNMemory rhsarr; + SUNMemoryHelper memhelp; + magma_queue_t q; +}; + +typedef struct _SUNLinearSolverContent_MagmaDense *SUNLinearSolverContent_MagmaDense; + + +SUNDIALS_EXPORT SUNLinearSolver SUNLinSol_MagmaDense(N_Vector y, SUNMatrix A, SUNContext sunctx); + +SUNDIALS_EXPORT int SUNLinSol_MagmaDense_SetAsync(SUNLinearSolver S, booleantype onoff); + +SUNDIALS_EXPORT SUNLinearSolver_Type SUNLinSolGetType_MagmaDense(SUNLinearSolver S); +SUNDIALS_EXPORT SUNLinearSolver_ID SUNLinSolGetID_MagmaDense(SUNLinearSolver S); +SUNDIALS_EXPORT int SUNLinSolInitialize_MagmaDense(SUNLinearSolver S); +SUNDIALS_EXPORT int SUNLinSolSetup_MagmaDense(SUNLinearSolver S, SUNMatrix A); +SUNDIALS_EXPORT int SUNLinSolSolve_MagmaDense(SUNLinearSolver S, SUNMatrix A, + N_Vector x, N_Vector b, realtype tol); +SUNDIALS_EXPORT sunindextype SUNLinSolLastFlag_MagmaDense(SUNLinearSolver S); +SUNDIALS_EXPORT int SUNLinSolSpace_MagmaDense(SUNLinearSolver S, + long int *lenrwLS, + long int *leniwLS); +SUNDIALS_EXPORT int SUNLinSolFree_MagmaDense(SUNLinearSolver S); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/inst/include/sunlinsol/sunlinsol_onemkldense.h b/inst/include/sunlinsol/sunlinsol_onemkldense.h new file mode 100644 index 0000000..0bef357 --- /dev/null +++ b/inst/include/sunlinsol/sunlinsol_onemkldense.h @@ -0,0 +1,85 @@ +/* --------------------------------------------------------------------------- + * Programmer(s): David J. Gardner @ LLNL + * --------------------------------------------------------------------------- + * SUNDIALS Copyright Start + * Copyright (c) 2002-2022, Lawrence Livermore National Security + * and Southern Methodist University. + * All rights reserved. + * + * See the top-level LICENSE and NOTICE files for details. + * + * SPDX-License-Identifier: BSD-3-Clause + * SUNDIALS Copyright End + * --------------------------------------------------------------------------- + * This is the header file for the SUNLINEARSOLVER class implementation using + * the Intel oneAPI Math Kernel Library (oneMKL). + * ---------------------------------------------------------------------------*/ + +#ifndef _SUNLINSOL_ONEMKLDENSE_H +#define _SUNLINSOL_ONEMKLDENSE_H + +#include + +#include +#include +#include +#include + +#ifdef __cplusplus /* wrapper to enable C++ usage */ +extern "C" { +#endif + +struct _SUNLinearSolverContent_OneMklDense +{ + int last_flag; /* last error code returned */ + sunindextype rows; /* number of rows in A */ + SUNMemory pivots; /* pivots array */ + sunindextype f_scratch_size; /* num scratchpad elements */ + SUNMemory f_scratchpad; /* scratchpad memory */ + sunindextype s_scratch_size; /* num scratchpad elements */ + SUNMemory s_scratchpad; /* scratchpad memory */ + SUNMemoryType mem_type; /* memory type */ + SUNMemoryHelper mem_helper; /* memory helper */ + ::sycl::queue* queue; /* operation queue */ +}; + +typedef struct _SUNLinearSolverContent_OneMklDense *SUNLinearSolverContent_OneMklDense; + +/* --------------------------------------------------------------------------- + * Implementation specific functions + * ---------------------------------------------------------------------------*/ + +SUNDIALS_EXPORT +SUNLinearSolver SUNLinSol_OneMklDense(N_Vector y, SUNMatrix A, SUNContext sunctx); + +SUNDIALS_STATIC_INLINE +SUNLinearSolver_Type SUNLinSolGetType_OneMklDense(SUNLinearSolver S) { return SUNLINEARSOLVER_DIRECT; }; + +SUNDIALS_STATIC_INLINE +SUNLinearSolver_ID SUNLinSolGetID_OneMklDense(SUNLinearSolver S) { return SUNLINEARSOLVER_ONEMKLDENSE; }; + +SUNDIALS_EXPORT +int SUNLinSolInitialize_OneMklDense(SUNLinearSolver S); + +SUNDIALS_EXPORT +int SUNLinSolSetup_OneMklDense(SUNLinearSolver S, SUNMatrix A); + +SUNDIALS_EXPORT +int SUNLinSolSolve_OneMklDense(SUNLinearSolver S, SUNMatrix A, N_Vector x, + N_Vector b, realtype tol); + +SUNDIALS_EXPORT +sunindextype SUNLinSolLastFlag_OneMklDense(SUNLinearSolver S); + +SUNDIALS_EXPORT +int SUNLinSolSpace_OneMklDense(SUNLinearSolver S, long int *lenrwLS, + long int *leniwLS); + +SUNDIALS_EXPORT +int SUNLinSolFree_OneMklDense(SUNLinearSolver S); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/inst/include/sunlinsol/sunlinsol_pcg.h b/inst/include/sunlinsol/sunlinsol_pcg.h index 221a85a..444cdd0 100644 --- a/inst/include/sunlinsol/sunlinsol_pcg.h +++ b/inst/include/sunlinsol/sunlinsol_pcg.h @@ -3,7 +3,7 @@ * Programmer(s): Daniel Reynolds, Ashley Crawford @ SMU * ----------------------------------------------------------------- * SUNDIALS Copyright Start - * Copyright (c) 2002-2019, Lawrence Livermore National Security + * Copyright (c) 2002-2022, Lawrence Livermore National Security * and Southern Methodist University. * All rights reserved. * @@ -43,14 +43,15 @@ extern "C" { struct _SUNLinearSolverContent_PCG { int maxl; int pretype; + booleantype zeroguess; int numiters; realtype resnorm; int last_flag; - ATimesFn ATimes; + SUNATimesFn ATimes; void* ATData; - PSetupFn Psetup; - PSolveFn Psolve; + SUNPSetupFn Psetup; + SUNPSolveFn Psolve; void* PData; N_Vector s; @@ -58,6 +59,9 @@ struct _SUNLinearSolverContent_PCG { N_Vector p; N_Vector z; N_Vector Ap; + + int print_level; + FILE* info_file; }; typedef struct _SUNLinearSolverContent_PCG *SUNLinearSolverContent_PCG; @@ -69,31 +73,27 @@ typedef struct _SUNLinearSolverContent_PCG *SUNLinearSolverContent_PCG; SUNDIALS_EXPORT SUNLinearSolver SUNLinSol_PCG(N_Vector y, int pretype, - int maxl); + int maxl, + SUNContext sunctx); SUNDIALS_EXPORT int SUNLinSol_PCGSetPrecType(SUNLinearSolver S, int pretype); SUNDIALS_EXPORT int SUNLinSol_PCGSetMaxl(SUNLinearSolver S, int maxl); -/* deprecated */ -SUNDIALS_EXPORT SUNLinearSolver SUNPCG(N_Vector y, int pretype, int maxl); -/* deprecated */ -SUNDIALS_EXPORT int SUNPCGSetPrecType(SUNLinearSolver S, int pretype); -/* deprecated */ -SUNDIALS_EXPORT int SUNPCGSetMaxl(SUNLinearSolver S, int maxl); - SUNDIALS_EXPORT SUNLinearSolver_Type SUNLinSolGetType_PCG(SUNLinearSolver S); SUNDIALS_EXPORT SUNLinearSolver_ID SUNLinSolGetID_PCG(SUNLinearSolver S); SUNDIALS_EXPORT int SUNLinSolInitialize_PCG(SUNLinearSolver S); SUNDIALS_EXPORT int SUNLinSolSetATimes_PCG(SUNLinearSolver S, void* A_data, - ATimesFn ATimes); + SUNATimesFn ATimes); SUNDIALS_EXPORT int SUNLinSolSetPreconditioner_PCG(SUNLinearSolver S, void* P_data, - PSetupFn Pset, - PSolveFn Psol); + SUNPSetupFn Pset, + SUNPSolveFn Psol); SUNDIALS_EXPORT int SUNLinSolSetScalingVectors_PCG(SUNLinearSolver S, N_Vector s, N_Vector nul); +SUNDIALS_EXPORT int SUNLinSolSetZeroGuess_PCG(SUNLinearSolver S, + booleantype onoff); SUNDIALS_EXPORT int SUNLinSolSetup_PCG(SUNLinearSolver S, SUNMatrix nul); SUNDIALS_EXPORT int SUNLinSolSolve_PCG(SUNLinearSolver S, SUNMatrix nul, N_Vector x, N_Vector b, realtype tol); @@ -105,6 +105,11 @@ SUNDIALS_EXPORT int SUNLinSolSpace_PCG(SUNLinearSolver S, long int *lenrwLS, long int *leniwLS); SUNDIALS_EXPORT int SUNLinSolFree_PCG(SUNLinearSolver S); +SUNDIALS_DEPRECATED_EXPORT_MSG("Use SUNLogger_SetInfoFilename instead") +int SUNLinSolSetInfoFile_PCG(SUNLinearSolver LS, + FILE* info_file); +SUNDIALS_DEPRECATED_EXPORT_MSG("Use SUNLogger interface instead") +int SUNLinSolSetPrintLevel_PCG(SUNLinearSolver LS, int print_level); #ifdef __cplusplus } diff --git a/inst/include/sunlinsol/sunlinsol_spbcgs.h b/inst/include/sunlinsol/sunlinsol_spbcgs.h index 38984df..a8aa7fd 100644 --- a/inst/include/sunlinsol/sunlinsol_spbcgs.h +++ b/inst/include/sunlinsol/sunlinsol_spbcgs.h @@ -5,7 +5,7 @@ * Aaron Collier @ LLNL * ----------------------------------------------------------------- * SUNDIALS Copyright Start - * Copyright (c) 2002-2019, Lawrence Livermore National Security + * Copyright (c) 2002-2022, Lawrence Livermore National Security * and Southern Methodist University. * All rights reserved. * @@ -27,6 +27,8 @@ #ifndef _SUNLINSOL_SPBCGS_H #define _SUNLINSOL_SPBCGS_H +#include + #include #include #include @@ -45,14 +47,15 @@ extern "C" { struct _SUNLinearSolverContent_SPBCGS { int maxl; int pretype; + booleantype zeroguess; int numiters; realtype resnorm; int last_flag; - ATimesFn ATimes; + SUNATimesFn ATimes; void* ATData; - PSetupFn Psetup; - PSolveFn Psolve; + SUNPSetupFn Psetup; + SUNPSolveFn Psolve; void* PData; N_Vector s1; @@ -64,6 +67,9 @@ struct _SUNLinearSolverContent_SPBCGS { N_Vector u; N_Vector Ap; N_Vector vtemp; + + int print_level; + FILE* info_file; }; typedef struct _SUNLinearSolverContent_SPBCGS *SUNLinearSolverContent_SPBCGS; @@ -75,31 +81,26 @@ typedef struct _SUNLinearSolverContent_SPBCGS *SUNLinearSolverContent_SPBCGS; SUNDIALS_EXPORT SUNLinearSolver SUNLinSol_SPBCGS(N_Vector y, int pretype, - int maxl); + int maxl, + SUNContext sunctx); SUNDIALS_EXPORT int SUNLinSol_SPBCGSSetPrecType(SUNLinearSolver S, int pretype); SUNDIALS_EXPORT int SUNLinSol_SPBCGSSetMaxl(SUNLinearSolver S, int maxl); - -/* deprecated */ -SUNDIALS_EXPORT SUNLinearSolver SUNSPBCGS(N_Vector y, int pretype, int maxl); -/* deprecated */ -SUNDIALS_EXPORT int SUNSPBCGSSetPrecType(SUNLinearSolver S, int pretype); -/* deprecated */ -SUNDIALS_EXPORT int SUNSPBCGSSetMaxl(SUNLinearSolver S, int maxl); - SUNDIALS_EXPORT SUNLinearSolver_Type SUNLinSolGetType_SPBCGS(SUNLinearSolver S); SUNDIALS_EXPORT SUNLinearSolver_ID SUNLinSolGetID_SPBCGS(SUNLinearSolver S); SUNDIALS_EXPORT int SUNLinSolInitialize_SPBCGS(SUNLinearSolver S); SUNDIALS_EXPORT int SUNLinSolSetATimes_SPBCGS(SUNLinearSolver S, void* A_data, - ATimesFn ATimes); + SUNATimesFn ATimes); SUNDIALS_EXPORT int SUNLinSolSetPreconditioner_SPBCGS(SUNLinearSolver S, void* P_data, - PSetupFn Pset, - PSolveFn Psol); + SUNPSetupFn Pset, + SUNPSolveFn Psol); SUNDIALS_EXPORT int SUNLinSolSetScalingVectors_SPBCGS(SUNLinearSolver S, N_Vector s1, N_Vector s2); +SUNDIALS_EXPORT int SUNLinSolSetZeroGuess_SPBCGS(SUNLinearSolver S, + booleantype onoff); SUNDIALS_EXPORT int SUNLinSolSetup_SPBCGS(SUNLinearSolver S, SUNMatrix A); SUNDIALS_EXPORT int SUNLinSolSolve_SPBCGS(SUNLinearSolver S, SUNMatrix A, N_Vector x, N_Vector b, realtype tol); @@ -111,6 +112,13 @@ SUNDIALS_EXPORT int SUNLinSolSpace_SPBCGS(SUNLinearSolver S, long int *lenrwLS, long int *leniwLS); SUNDIALS_EXPORT int SUNLinSolFree_SPBCGS(SUNLinearSolver S); +SUNDIALS_DEPRECATED_EXPORT_MSG("Use SUNLogger_SetInfoFilename instead") +int SUNLinSolSetInfoFile_SPBCGS(SUNLinearSolver LS, + FILE* info_file); +SUNDIALS_DEPRECATED_EXPORT_MSG("Use SUNLogger interface instead") +int SUNLinSolSetPrintLevel_SPBCGS(SUNLinearSolver LS, int print_level); + + #ifdef __cplusplus diff --git a/inst/include/sunlinsol/sunlinsol_spfgmr.h b/inst/include/sunlinsol/sunlinsol_spfgmr.h index df376c8..e7d38ca 100644 --- a/inst/include/sunlinsol/sunlinsol_spfgmr.h +++ b/inst/include/sunlinsol/sunlinsol_spfgmr.h @@ -5,7 +5,7 @@ * Hilari C. Tiedeman @ SMU * ----------------------------------------------------------------- * SUNDIALS Copyright Start - * Copyright (c) 2002-2019, Lawrence Livermore National Security + * Copyright (c) 2002-2022, Lawrence Livermore National Security * and Southern Methodist University. * All rights reserved. * @@ -39,7 +39,7 @@ extern "C" { /* Default SPFGMR solver parameters */ #define SUNSPFGMR_MAXL_DEFAULT 5 #define SUNSPFGMR_MAXRS_DEFAULT 0 -#define SUNSPFGMR_GSTYPE_DEFAULT MODIFIED_GS +#define SUNSPFGMR_GSTYPE_DEFAULT SUN_MODIFIED_GS /* ----------------------------------------- * SPFGMR Implementation of SUNLinearSolver @@ -50,14 +50,15 @@ struct _SUNLinearSolverContent_SPFGMR { int pretype; int gstype; int max_restarts; + booleantype zeroguess; int numiters; realtype resnorm; int last_flag; - ATimesFn ATimes; + SUNATimesFn ATimes; void* ATData; - PSetupFn Psetup; - PSolveFn Psolve; + SUNPSetupFn Psetup; + SUNPSolveFn Psolve; void* PData; N_Vector s1; @@ -72,6 +73,9 @@ struct _SUNLinearSolverContent_SPFGMR { realtype *cv; N_Vector *Xv; + + int print_level; + FILE* info_file; }; typedef struct _SUNLinearSolverContent_SPFGMR *SUNLinearSolverContent_SPFGMR; @@ -82,36 +86,28 @@ typedef struct _SUNLinearSolverContent_SPFGMR *SUNLinearSolverContent_SPFGMR; SUNDIALS_EXPORT SUNLinearSolver SUNLinSol_SPFGMR(N_Vector y, int pretype, - int maxl); + int maxl, + SUNContext sunctx); SUNDIALS_EXPORT int SUNLinSol_SPFGMRSetPrecType(SUNLinearSolver S, int pretype); SUNDIALS_EXPORT int SUNLinSol_SPFGMRSetGSType(SUNLinearSolver S, int gstype); SUNDIALS_EXPORT int SUNLinSol_SPFGMRSetMaxRestarts(SUNLinearSolver S, int maxrs); - -/* deprecated */ -SUNDIALS_EXPORT SUNLinearSolver SUNSPFGMR(N_Vector y, int pretype, int maxl); -/* deprecated */ -SUNDIALS_EXPORT int SUNSPFGMRSetPrecType(SUNLinearSolver S, int pretype); -/* deprecated */ -SUNDIALS_EXPORT int SUNSPFGMRSetGSType(SUNLinearSolver S, int gstype); -/* deprecated */ -SUNDIALS_EXPORT int SUNSPFGMRSetMaxRestarts(SUNLinearSolver S, int maxrs); - - SUNDIALS_EXPORT SUNLinearSolver_Type SUNLinSolGetType_SPFGMR(SUNLinearSolver S); SUNDIALS_EXPORT SUNLinearSolver_ID SUNLinSolGetID_SPFGMR(SUNLinearSolver S); SUNDIALS_EXPORT int SUNLinSolInitialize_SPFGMR(SUNLinearSolver S); SUNDIALS_EXPORT int SUNLinSolSetATimes_SPFGMR(SUNLinearSolver S, void* A_data, - ATimesFn ATimes); + SUNATimesFn ATimes); SUNDIALS_EXPORT int SUNLinSolSetPreconditioner_SPFGMR(SUNLinearSolver S, void* P_data, - PSetupFn Pset, - PSolveFn Psol); + SUNPSetupFn Pset, + SUNPSolveFn Psol); SUNDIALS_EXPORT int SUNLinSolSetScalingVectors_SPFGMR(SUNLinearSolver S, N_Vector s1, N_Vector s2); +SUNDIALS_EXPORT int SUNLinSolSetZeroGuess_SPFGMR(SUNLinearSolver S, + booleantype onoff); SUNDIALS_EXPORT int SUNLinSolSetup_SPFGMR(SUNLinearSolver S, SUNMatrix A); SUNDIALS_EXPORT int SUNLinSolSolve_SPFGMR(SUNLinearSolver S, SUNMatrix A, N_Vector x, N_Vector b, realtype tol); @@ -123,6 +119,11 @@ SUNDIALS_EXPORT int SUNLinSolSpace_SPFGMR(SUNLinearSolver S, long int *lenrwLS, long int *leniwLS); SUNDIALS_EXPORT int SUNLinSolFree_SPFGMR(SUNLinearSolver S); +SUNDIALS_DEPRECATED_EXPORT_MSG("Use SUNLogger_SetInfoFilename instead") +int SUNLinSolSetInfoFile_SPFGMR(SUNLinearSolver LS, + FILE* info_file); +SUNDIALS_DEPRECATED_EXPORT_MSG("Use SUNLogger interface instead") +int SUNLinSolSetPrintLevel_SPFGMR(SUNLinearSolver LS, int print_level); #ifdef __cplusplus diff --git a/inst/include/sunlinsol/sunlinsol_spgmr.h b/inst/include/sunlinsol/sunlinsol_spgmr.h index 7b54d47..9ba6368 100644 --- a/inst/include/sunlinsol/sunlinsol_spgmr.h +++ b/inst/include/sunlinsol/sunlinsol_spgmr.h @@ -5,7 +5,7 @@ * Alan C. Hindmarsh and Radu Serban @ LLNL * ----------------------------------------------------------------- * SUNDIALS Copyright Start - * Copyright (c) 2002-2019, Lawrence Livermore National Security + * Copyright (c) 2002-2022, Lawrence Livermore National Security * and Southern Methodist University. * All rights reserved. * @@ -28,6 +28,8 @@ #ifndef _SUNLINSOL_SPGMR_H #define _SUNLINSOL_SPGMR_H +#include + #include #include #include @@ -39,7 +41,7 @@ extern "C" { /* Default SPGMR solver parameters */ #define SUNSPGMR_MAXL_DEFAULT 5 #define SUNSPGMR_MAXRS_DEFAULT 0 -#define SUNSPGMR_GSTYPE_DEFAULT MODIFIED_GS +#define SUNSPGMR_GSTYPE_DEFAULT SUN_MODIFIED_GS /* ---------------------------------------- * SPGMR Implementation of SUNLinearSolver @@ -50,14 +52,15 @@ struct _SUNLinearSolverContent_SPGMR { int pretype; int gstype; int max_restarts; + booleantype zeroguess; int numiters; realtype resnorm; int last_flag; - ATimesFn ATimes; + SUNATimesFn ATimes; void* ATData; - PSetupFn Psetup; - PSolveFn Psolve; + SUNPSetupFn Psetup; + SUNPSolveFn Psolve; void* PData; N_Vector s1; @@ -71,6 +74,9 @@ struct _SUNLinearSolverContent_SPGMR { realtype *cv; N_Vector *Xv; + + int print_level; + FILE* info_file; }; typedef struct _SUNLinearSolverContent_SPGMR *SUNLinearSolverContent_SPGMR; @@ -82,35 +88,28 @@ typedef struct _SUNLinearSolverContent_SPGMR *SUNLinearSolverContent_SPGMR; SUNDIALS_EXPORT SUNLinearSolver SUNLinSol_SPGMR(N_Vector y, int pretype, - int maxl); + int maxl, + SUNContext sunctx); SUNDIALS_EXPORT int SUNLinSol_SPGMRSetPrecType(SUNLinearSolver S, int pretype); SUNDIALS_EXPORT int SUNLinSol_SPGMRSetGSType(SUNLinearSolver S, int gstype); SUNDIALS_EXPORT int SUNLinSol_SPGMRSetMaxRestarts(SUNLinearSolver S, int maxrs); - -/* deprecated */ -SUNDIALS_EXPORT SUNLinearSolver SUNSPGMR(N_Vector y, int pretype, int maxl); -/* deprecated */ -SUNDIALS_EXPORT int SUNSPGMRSetPrecType(SUNLinearSolver S, int pretype); -/* deprecated */ -SUNDIALS_EXPORT int SUNSPGMRSetGSType(SUNLinearSolver S, int gstype); -/* deprecated */ -SUNDIALS_EXPORT int SUNSPGMRSetMaxRestarts(SUNLinearSolver S, int maxrs); - SUNDIALS_EXPORT SUNLinearSolver_Type SUNLinSolGetType_SPGMR(SUNLinearSolver S); SUNDIALS_EXPORT SUNLinearSolver_ID SUNLinSolGetID_SPGMR(SUNLinearSolver S); SUNDIALS_EXPORT int SUNLinSolInitialize_SPGMR(SUNLinearSolver S); SUNDIALS_EXPORT int SUNLinSolSetATimes_SPGMR(SUNLinearSolver S, void* A_data, - ATimesFn ATimes); + SUNATimesFn ATimes); SUNDIALS_EXPORT int SUNLinSolSetPreconditioner_SPGMR(SUNLinearSolver S, void* P_data, - PSetupFn Pset, - PSolveFn Psol); + SUNPSetupFn Pset, + SUNPSolveFn Psol); SUNDIALS_EXPORT int SUNLinSolSetScalingVectors_SPGMR(SUNLinearSolver S, N_Vector s1, N_Vector s2); +SUNDIALS_EXPORT int SUNLinSolSetZeroGuess_SPGMR(SUNLinearSolver S, + booleantype onff); SUNDIALS_EXPORT int SUNLinSolSetup_SPGMR(SUNLinearSolver S, SUNMatrix A); SUNDIALS_EXPORT int SUNLinSolSolve_SPGMR(SUNLinearSolver S, SUNMatrix A, N_Vector x, N_Vector b, realtype tol); @@ -122,6 +121,12 @@ SUNDIALS_EXPORT int SUNLinSolSpace_SPGMR(SUNLinearSolver S, long int *lenrwLS, long int *leniwLS); SUNDIALS_EXPORT int SUNLinSolFree_SPGMR(SUNLinearSolver S); +SUNDIALS_DEPRECATED_EXPORT_MSG("Use SUNLogger_SetInfoFilename instead") +int SUNLinSolSetInfoFile_SPGMR(SUNLinearSolver LS, + FILE* info_file); +SUNDIALS_DEPRECATED_EXPORT_MSG("Use SUNLogger interface instead") +int SUNLinSolSetPrintLevel_SPGMR(SUNLinearSolver LS, int print_level); + #ifdef __cplusplus diff --git a/inst/include/sunlinsol/sunlinsol_sptfqmr.h b/inst/include/sunlinsol/sunlinsol_sptfqmr.h index 4627d9d..c75b696 100644 --- a/inst/include/sunlinsol/sunlinsol_sptfqmr.h +++ b/inst/include/sunlinsol/sunlinsol_sptfqmr.h @@ -4,7 +4,7 @@ * Based on code sundials_sptfqmr.h by: Aaron Collier @ LLNL * ----------------------------------------------------------------- * SUNDIALS Copyright Start - * Copyright (c) 2002-2019, Lawrence Livermore National Security + * Copyright (c) 2002-2022, Lawrence Livermore National Security * and Southern Methodist University. * All rights reserved. * @@ -45,14 +45,15 @@ extern "C" { struct _SUNLinearSolverContent_SPTFQMR { int maxl; int pretype; + booleantype zeroguess; int numiters; realtype resnorm; int last_flag; - ATimesFn ATimes; + SUNATimesFn ATimes; void* ATData; - PSetupFn Psetup; - PSolveFn Psolve; + SUNPSetupFn Psetup; + SUNPSolveFn Psolve; void* PData; N_Vector s1; @@ -67,6 +68,9 @@ struct _SUNLinearSolverContent_SPTFQMR { N_Vector vtemp1; N_Vector vtemp2; N_Vector vtemp3; + + int print_level; + FILE* info_file; }; typedef struct _SUNLinearSolverContent_SPTFQMR *SUNLinearSolverContent_SPTFQMR; @@ -77,31 +81,26 @@ typedef struct _SUNLinearSolverContent_SPTFQMR *SUNLinearSolverContent_SPTFQMR; SUNDIALS_EXPORT SUNLinearSolver SUNLinSol_SPTFQMR(N_Vector y, int pretype, - int maxl); + int maxl, + SUNContext sunctx); SUNDIALS_EXPORT int SUNLinSol_SPTFQMRSetPrecType(SUNLinearSolver S, int pretype); SUNDIALS_EXPORT int SUNLinSol_SPTFQMRSetMaxl(SUNLinearSolver S, int maxl); - -/* deprecated */ -SUNDIALS_EXPORT SUNLinearSolver SUNSPTFQMR(N_Vector y, int pretype, int maxl); -/* deprecated */ -SUNDIALS_EXPORT int SUNSPTFQMRSetPrecType(SUNLinearSolver S, int pretype); -/* deprecated */ -SUNDIALS_EXPORT int SUNSPTFQMRSetMaxl(SUNLinearSolver S, int maxl); - SUNDIALS_EXPORT SUNLinearSolver_Type SUNLinSolGetType_SPTFQMR(SUNLinearSolver S); SUNDIALS_EXPORT SUNLinearSolver_ID SUNLinSolGetID_SPTFQMR(SUNLinearSolver S); SUNDIALS_EXPORT int SUNLinSolInitialize_SPTFQMR(SUNLinearSolver S); SUNDIALS_EXPORT int SUNLinSolSetATimes_SPTFQMR(SUNLinearSolver S, void* A_data, - ATimesFn ATimes); + SUNATimesFn ATimes); SUNDIALS_EXPORT int SUNLinSolSetPreconditioner_SPTFQMR(SUNLinearSolver S, void* P_data, - PSetupFn Pset, - PSolveFn Psol); + SUNPSetupFn Pset, + SUNPSolveFn Psol); SUNDIALS_EXPORT int SUNLinSolSetScalingVectors_SPTFQMR(SUNLinearSolver S, N_Vector s1, N_Vector s2); +SUNDIALS_EXPORT int SUNLinSolSetZeroGuess_SPTFQMR(SUNLinearSolver S, + booleantype onoff); SUNDIALS_EXPORT int SUNLinSolSetup_SPTFQMR(SUNLinearSolver S, SUNMatrix A); SUNDIALS_EXPORT int SUNLinSolSolve_SPTFQMR(SUNLinearSolver S, SUNMatrix A, N_Vector x, N_Vector b, realtype tol); @@ -113,6 +112,11 @@ SUNDIALS_EXPORT int SUNLinSolSpace_SPTFQMR(SUNLinearSolver S, long int *lenrwLS, long int *leniwLS); SUNDIALS_EXPORT int SUNLinSolFree_SPTFQMR(SUNLinearSolver S); +SUNDIALS_DEPRECATED_EXPORT_MSG("Use SUNLogger_SetInfoFilename instead") +int SUNLinSolSetInfoFile_SPTFQMR(SUNLinearSolver LS, + FILE* info_file); +SUNDIALS_DEPRECATED_EXPORT_MSG("Use SUNLogger interface instead") +int SUNLinSolSetPrintLevel_SPTFQMR(SUNLinearSolver LS, int print_level); #ifdef __cplusplus diff --git a/inst/include/sunlinsol/sunlinsol_superludist.h b/inst/include/sunlinsol/sunlinsol_superludist.h index 3d3ac84..1d1d501 100644 --- a/inst/include/sunlinsol/sunlinsol_superludist.h +++ b/inst/include/sunlinsol/sunlinsol_superludist.h @@ -3,7 +3,7 @@ * Programmer(s): Cody J. Balos @ LLNL * ---------------------------------------------------------------------------- * SUNDIALS Copyright Start - * Copyright (c) 2002-2019, Lawrence Livermore National Security + * Copyright (c) 2002-2022, Lawrence Livermore National Security * and Southern Methodist University. * All rights reserved. * @@ -32,11 +32,22 @@ #define _SUNLINSOL_SLUDIST_H #include + #include + #include #include #include -#include +#include + +#define xLUstructInit dLUstructInit +#define xScalePermstructInit dScalePermstructInit +#define xScalePermstructFree dScalePermstructFree +#define xLUstructFree dLUstructFree +#define xDestroy_LU dDestroy_LU +#define xScalePermstruct_t dScalePermstruct_t +#define xLUstruct_t dLUstruct_t +#define xSOLVEstruct_t dSOLVEstruct_t #ifdef __cplusplus extern "C" { @@ -54,10 +65,10 @@ struct _SUNLinearSolverContent_SuperLUDIST { int last_flag; realtype berr; gridinfo_t *grid; - LUstruct_t *lu; + xLUstruct_t *lu; superlu_dist_options_t *options; - ScalePermstruct_t *scaleperm; - SOLVEstruct_t *solve; + xScalePermstruct_t *scaleperm; + xSOLVEstruct_t *solve; SuperLUStat_t *stat; sunindextype N; }; @@ -73,11 +84,12 @@ typedef struct _SUNLinearSolverContent_SuperLUDIST *SUNLinearSolverContent_Super SUNDIALS_EXPORT SUNLinearSolver SUNLinSol_SuperLUDIST(N_Vector y, SUNMatrix A, gridinfo_t *grid, - LUstruct_t *lu, - ScalePermstruct_t *scaleperm, - SOLVEstruct_t *solve, + xLUstruct_t *lu, + xScalePermstruct_t *scaleperm, + xSOLVEstruct_t *solve, SuperLUStat_t *stat, - superlu_dist_options_t *options); + superlu_dist_options_t *options, + SUNContext sunctx); /* * ---------------------------------------------------------------------------- @@ -87,10 +99,10 @@ SUNDIALS_EXPORT SUNLinearSolver SUNLinSol_SuperLUDIST(N_Vector y, SUNMatrix A, SUNDIALS_EXPORT realtype SUNLinSol_SuperLUDIST_GetBerr(SUNLinearSolver LS); SUNDIALS_EXPORT gridinfo_t* SUNLinSol_SuperLUDIST_GetGridinfo(SUNLinearSolver LS); -SUNDIALS_EXPORT LUstruct_t* SUNLinSol_SuperLUDIST_GetLUstruct(SUNLinearSolver LS); +SUNDIALS_EXPORT xLUstruct_t* SUNLinSol_SuperLUDIST_GetLUstruct(SUNLinearSolver LS); SUNDIALS_EXPORT superlu_dist_options_t* SUNLinSol_SuperLUDIST_GetSuperLUOptions(SUNLinearSolver LS); -SUNDIALS_EXPORT ScalePermstruct_t* SUNLinSol_SuperLUDIST_GetScalePermstruct(SUNLinearSolver LS); -SUNDIALS_EXPORT SOLVEstruct_t* SUNLinSol_SuperLUDIST_GetSOLVEstruct(SUNLinearSolver LS); +SUNDIALS_EXPORT xScalePermstruct_t* SUNLinSol_SuperLUDIST_GetScalePermstruct(SUNLinearSolver LS); +SUNDIALS_EXPORT xSOLVEstruct_t* SUNLinSol_SuperLUDIST_GetSOLVEstruct(SUNLinearSolver LS); SUNDIALS_EXPORT SuperLUStat_t* SUNLinSol_SuperLUDIST_GetSuperLUStat(SUNLinearSolver LS); /* diff --git a/inst/include/sunlinsol/sunlinsol_superlumt.h b/inst/include/sunlinsol/sunlinsol_superlumt.h index c7dc563..b54c2d5 100644 --- a/inst/include/sunlinsol/sunlinsol_superlumt.h +++ b/inst/include/sunlinsol/sunlinsol_superlumt.h @@ -5,7 +5,7 @@ * written by Carol S. Woodward @ LLNL * ----------------------------------------------------------------- * SUNDIALS Copyright Start - * Copyright (c) 2002-2019, Lawrence Livermore National Security + * Copyright (c) 2002-2022, Lawrence Livermore National Security * and Southern Methodist University. * All rights reserved. * @@ -95,17 +95,10 @@ typedef struct _SUNLinearSolverContent_SuperLUMT *SUNLinearSolverContent_SuperLU SUNDIALS_EXPORT SUNLinearSolver SUNLinSol_SuperLUMT(N_Vector y, SUNMatrix A, - int num_threads); + int num_threads, + SUNContext sunctx); SUNDIALS_EXPORT int SUNLinSol_SuperLUMTSetOrdering(SUNLinearSolver S, int ordering_choice); - -/* deprecated */ -SUNDIALS_EXPORT SUNLinearSolver SUNSuperLUMT(N_Vector y, SUNMatrix A, - int num_threads); -/* deprecated */ -SUNDIALS_EXPORT int SUNSuperLUMTSetOrdering(SUNLinearSolver S, - int ordering_choice); - SUNDIALS_EXPORT SUNLinearSolver_Type SUNLinSolGetType_SuperLUMT(SUNLinearSolver S); SUNDIALS_EXPORT SUNLinearSolver_ID SUNLinSolGetID_SuperLUMT(SUNLinearSolver S); SUNDIALS_EXPORT int SUNLinSolInitialize_SuperLUMT(SUNLinearSolver S); diff --git a/inst/include/sunlinsol_rmumps.h b/inst/include/sunlinsol_rmumps.h index 2c949bd..8f079d4 100644 --- a/inst/include/sunlinsol_rmumps.h +++ b/inst/include/sunlinsol_rmumps.h @@ -8,6 +8,7 @@ using namespace Rcpp; using namespace arma; +#include #include #include #include @@ -50,7 +51,7 @@ struct _SUNLinearSolverContent_RMUMPS { #undef SUNDIALS_EXPORT #define SUNDIALS_EXPORT typedef struct _SUNLinearSolverContent_RMUMPS *SUNLinearSolverContent_RMUMPS; -SUNDIALS_EXPORT SUNLinearSolver SUNLinSol_RMUMPS(N_Vector y, SUNMatrix A, int permutation); +SUNDIALS_EXPORT SUNLinearSolver SUNLinSol_RMUMPS(N_Vector y, SUNMatrix A, int permutation, SUNContext sunctx); SUNDIALS_EXPORT int SUNLinSol_RMUMPSSetOrdering(SUNLinearSolver S, std::string ordering_choice); /* diff --git a/inst/include/sunmatrix/sunmatrix_band.h b/inst/include/sunmatrix/sunmatrix_band.h index ea0727d..67e3402 100644 --- a/inst/include/sunmatrix/sunmatrix_band.h +++ b/inst/include/sunmatrix/sunmatrix_band.h @@ -5,7 +5,7 @@ * Based on code sundials_direct.h by: Radu Serban @ LLNL * ----------------------------------------------------------------- * SUNDIALS Copyright Start - * Copyright (c) 2002-2019, Lawrence Livermore National Security + * Copyright (c) 2002-2022, Lawrence Livermore National Security * and Southern Methodist University. * All rights reserved. * @@ -14,15 +14,15 @@ * SPDX-License-Identifier: BSD-3-Clause * SUNDIALS Copyright End * ----------------------------------------------------------------- - * This is the header file for the band implementation of the + * This is the header file for the band implementation of the * SUNMATRIX module, SUNMATRIX_BAND. * * Notes: * - The definition of the generic SUNMatrix structure can be found * in the header file sundials_matrix.h. * - The definition of the type 'realtype' can be found in the - * header file sundials_types.h, and it may be changed (at the - * configuration stage) according to the user's needs. + * header file sundials_types.h, and it may be changed (at the + * configuration stage) according to the user's needs. * The sundials_types.h file also contains the definition * for the type 'booleantype' and 'indextype'. * ----------------------------------------------------------------- @@ -41,7 +41,7 @@ extern "C" { /* --------------------------------- * Band implementation of SUNMatrix * --------------------------------- */ - + struct _SUNMatrixContent_Band { sunindextype M; sunindextype N; @@ -56,7 +56,7 @@ struct _SUNMatrixContent_Band { typedef struct _SUNMatrixContent_Band *SUNMatrixContent_Band; - + /* ------------------------------------ * Macros for access to SUNMATRIX_BAND * ------------------------------------ */ @@ -89,16 +89,17 @@ typedef struct _SUNMatrixContent_Band *SUNMatrixContent_Band; /* ---------------------------------------- - * Exported Functions for SUNMATRIX_BAND + * Exported Functions for SUNMATRIX_BAND * ---------------------------------------- */ SUNDIALS_EXPORT SUNMatrix SUNBandMatrix(sunindextype N, sunindextype mu, - sunindextype ml); + sunindextype ml, SUNContext sunctx); SUNDIALS_EXPORT SUNMatrix SUNBandMatrixStorage(sunindextype N, sunindextype mu, sunindextype ml, - sunindextype smu); + sunindextype smu, + SUNContext sunctx); SUNDIALS_EXPORT void SUNBandMatrix_Print(SUNMatrix A, FILE* outfile); @@ -108,6 +109,7 @@ SUNDIALS_EXPORT sunindextype SUNBandMatrix_LowerBandwidth(SUNMatrix A); SUNDIALS_EXPORT sunindextype SUNBandMatrix_UpperBandwidth(SUNMatrix A); SUNDIALS_EXPORT sunindextype SUNBandMatrix_StoredUpperBandwidth(SUNMatrix A); SUNDIALS_EXPORT sunindextype SUNBandMatrix_LDim(SUNMatrix A); +SUNDIALS_EXPORT sunindextype SUNBandMatrix_LData(SUNMatrix A); SUNDIALS_EXPORT realtype* SUNBandMatrix_Data(SUNMatrix A); SUNDIALS_EXPORT realtype** SUNBandMatrix_Cols(SUNMatrix A); SUNDIALS_EXPORT realtype* SUNBandMatrix_Column(SUNMatrix A, sunindextype j); @@ -121,7 +123,7 @@ SUNDIALS_EXPORT int SUNMatScaleAdd_Band(realtype c, SUNMatrix A, SUNMatrix B); SUNDIALS_EXPORT int SUNMatScaleAddI_Band(realtype c, SUNMatrix A); SUNDIALS_EXPORT int SUNMatMatvec_Band(SUNMatrix A, N_Vector x, N_Vector y); SUNDIALS_EXPORT int SUNMatSpace_Band(SUNMatrix A, long int *lenrw, long int *leniw); - + #ifdef __cplusplus } #endif diff --git a/inst/include/sunmatrix/sunmatrix_cusparse.h b/inst/include/sunmatrix/sunmatrix_cusparse.h new file mode 100644 index 0000000..a18483d --- /dev/null +++ b/inst/include/sunmatrix/sunmatrix_cusparse.h @@ -0,0 +1,135 @@ +/* + * ----------------------------------------------------------------- + * Programmer(s): Cody J. Balos @ LLNL + * ----------------------------------------------------------------- + * SUNDIALS Copyright Start + * Copyright (c) 2002-2022, Lawrence Livermore National Security + * and Southern Methodist University. + * All rights reserved. + * + * See the top-level LICENSE and NOTICE files for details. + * + * SPDX-License-Identifier: BSD-3-Clause + * SUNDIALS Copyright End + * ----------------------------------------------------------------- + * This is the header file is for the cuSPARSE implementation of the + * SUNMATRIX module. + * ----------------------------------------------------------------- + */ + +#ifndef _SUNMATRIX_CUSPARSE_H +#define _SUNMATRIX_CUSPARSE_H + +#include + +#include +#include + +#include +#include +#include + +#ifdef __cplusplus /* wrapper to enable C++ usage */ +extern "C" { +#endif + +/* ------------------------------------------ + * Implementation of SUNMATRIX_CUSPARSE + * ------------------------------------------ */ + +/* storage formats */ +#define SUNMAT_CUSPARSE_CSR 0 +#define SUNMAT_CUSPARSE_BCSR 1 + +struct _SUNMatrix_Content_cuSparse { + int M; + int N; + int NNZ; + int nblocks; + int blockrows; + int blockcols; + int blocknnz; + int sparse_type; + booleantype own_matd; + booleantype fixed_pattern; + booleantype matvec_issetup; + SUNMemory colind; + SUNMemory rowptrs; + SUNMemory data; + SUNMemoryHelper mem_helper; + cusparseMatDescr_t mat_descr; +#if CUDART_VERSION >= 11000 + SUNMemory dBufferMem; + size_t bufferSize; + cusparseDnVecDescr_t vecX, vecY; + cusparseSpMatDescr_t spmat_descr; +#endif + cusparseHandle_t cusp_handle; + SUNCudaExecPolicy* exec_policy; +}; + +typedef struct _SUNMatrix_Content_cuSparse *SUNMatrix_Content_cuSparse; + +/* ------------------------------------------------------------------ + * Constructors. + * ------------------------------------------------------------------ */ + +SUNDIALS_EXPORT SUNMatrix SUNMatrix_cuSparse_NewCSR(int M, int N, int NNZ, cusparseHandle_t cusp, + SUNContext sunctx); +SUNDIALS_EXPORT SUNMatrix SUNMatrix_cuSparse_MakeCSR(cusparseMatDescr_t mat_descr, int M, int N, int NNZ, + int *rowptrs , int *colind , realtype *data, + cusparseHandle_t cusp, SUNContext sunctx); + +/* Creates a CSR block-diagonal matrix where each block shares the same sparsity structure. + Reduces memory usage by only storing the row pointers and column indices for one block. */ +SUNDIALS_EXPORT SUNMatrix SUNMatrix_cuSparse_NewBlockCSR(int nblocks, int blockrows, int blockcols, + int blocknnz, cusparseHandle_t cusp, + SUNContext sunctx); + + +/* ------------------------------------------------------------------ + * Implementation specific routines. + * ------------------------------------------------------------------ */ + +SUNDIALS_EXPORT int SUNMatrix_cuSparse_SparseType(SUNMatrix A); +SUNDIALS_EXPORT int SUNMatrix_cuSparse_Rows(SUNMatrix A); +SUNDIALS_EXPORT int SUNMatrix_cuSparse_Columns(SUNMatrix A); +SUNDIALS_EXPORT int SUNMatrix_cuSparse_NNZ(SUNMatrix A); +SUNDIALS_EXPORT int* SUNMatrix_cuSparse_IndexPointers(SUNMatrix A); +SUNDIALS_EXPORT int* SUNMatrix_cuSparse_IndexValues(SUNMatrix A); +SUNDIALS_EXPORT realtype* SUNMatrix_cuSparse_Data(SUNMatrix A); + +SUNDIALS_EXPORT int SUNMatrix_cuSparse_SetFixedPattern(SUNMatrix A, booleantype yesno); +SUNDIALS_EXPORT int SUNMatrix_cuSparse_SetKernelExecPolicy(SUNMatrix A, SUNCudaExecPolicy* exec_policy); +SUNDIALS_EXPORT int SUNMatrix_cuSparse_NumBlocks(SUNMatrix A); +SUNDIALS_EXPORT int SUNMatrix_cuSparse_BlockRows(SUNMatrix A); +SUNDIALS_EXPORT int SUNMatrix_cuSparse_BlockColumns(SUNMatrix A); +SUNDIALS_EXPORT int SUNMatrix_cuSparse_BlockNNZ(SUNMatrix A); +SUNDIALS_EXPORT realtype* SUNMatrix_cuSparse_BlockData(SUNMatrix A, int blockidx); +SUNDIALS_EXPORT cusparseMatDescr_t SUNMatrix_cuSparse_MatDescr(SUNMatrix A); +SUNDIALS_EXPORT int SUNMatrix_cuSparse_CopyToDevice(SUNMatrix device, realtype* h_data, + int* h_idxptrs, int* h_idxvals); +SUNDIALS_EXPORT int SUNMatrix_cuSparse_CopyFromDevice(SUNMatrix device, realtype* h_data, + int* h_idxptrs, int* h_idxvals); + + +/* ------------------------------------------------------------------ + * SUNMatrix API routines. + * ------------------------------------------------------------------ */ + +SUNDIALS_EXPORT SUNMatrix_ID SUNMatGetID_cuSparse(SUNMatrix A); +SUNDIALS_EXPORT SUNMatrix SUNMatClone_cuSparse(SUNMatrix A); +SUNDIALS_EXPORT void SUNMatDestroy_cuSparse(SUNMatrix A); +SUNDIALS_EXPORT int SUNMatZero_cuSparse(SUNMatrix A); +SUNDIALS_EXPORT int SUNMatCopy_cuSparse(SUNMatrix A, SUNMatrix B); +SUNDIALS_EXPORT int SUNMatScaleAdd_cuSparse(realtype c, SUNMatrix A, SUNMatrix B); +SUNDIALS_EXPORT int SUNMatScaleAddI_cuSparse(realtype c, SUNMatrix A); +SUNDIALS_EXPORT int SUNMatMatvecSetup_cuSparse(SUNMatrix A); +SUNDIALS_EXPORT int SUNMatMatvec_cuSparse(SUNMatrix A, N_Vector x, N_Vector y); + + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/inst/include/sunmatrix/sunmatrix_dense.h b/inst/include/sunmatrix/sunmatrix_dense.h index de40e56..418d16d 100644 --- a/inst/include/sunmatrix/sunmatrix_dense.h +++ b/inst/include/sunmatrix/sunmatrix_dense.h @@ -5,7 +5,7 @@ * Based on code sundials_direct.h by: Radu Serban @ LLNL * ----------------------------------------------------------------- * SUNDIALS Copyright Start - * Copyright (c) 2002-2019, Lawrence Livermore National Security + * Copyright (c) 2002-2022, Lawrence Livermore National Security * and Southern Methodist University. * All rights reserved. * @@ -14,15 +14,15 @@ * SPDX-License-Identifier: BSD-3-Clause * SUNDIALS Copyright End * ----------------------------------------------------------------- - * This is the header file for the dense implementation of the + * This is the header file for the dense implementation of the * SUNMATRIX module, SUNMATRIX_DENSE. * * Notes: * - The definition of the generic SUNMatrix structure can be found * in the header file sundials_matrix.h. * - The definition of the type 'realtype' can be found in the - * header file sundials_types.h, and it may be changed (at the - * configuration stage) according to the user's needs. + * header file sundials_types.h, and it may be changed (at the + * configuration stage) according to the user's needs. * The sundials_types.h file also contains the definition * for the type 'booleantype' and 'indextype'. * ----------------------------------------------------------------- @@ -41,7 +41,7 @@ extern "C" { /* ---------------------------------- * Dense implementation of SUNMatrix * ---------------------------------- */ - + struct _SUNMatrixContent_Dense { sunindextype M; sunindextype N; @@ -76,7 +76,7 @@ typedef struct _SUNMatrixContent_Dense *SUNMatrixContent_Dense; * Exported Functions for SUNMATRIX_DENSE * --------------------------------------- */ -SUNDIALS_EXPORT SUNMatrix SUNDenseMatrix(sunindextype M, sunindextype N); +SUNDIALS_EXPORT SUNMatrix SUNDenseMatrix(sunindextype M, sunindextype N, SUNContext sunctx); SUNDIALS_EXPORT void SUNDenseMatrix_Print(SUNMatrix A, FILE* outfile); @@ -97,7 +97,7 @@ SUNDIALS_EXPORT int SUNMatScaleAddI_Dense(realtype c, SUNMatrix A); SUNDIALS_EXPORT int SUNMatMatvec_Dense(SUNMatrix A, N_Vector x, N_Vector y); SUNDIALS_EXPORT int SUNMatSpace_Dense(SUNMatrix A, long int *lenrw, long int *leniw); - + #ifdef __cplusplus } #endif diff --git a/inst/include/sunmatrix/sunmatrix_ginkgo.hpp b/inst/include/sunmatrix/sunmatrix_ginkgo.hpp new file mode 100644 index 0000000..9d0937c --- /dev/null +++ b/inst/include/sunmatrix/sunmatrix_ginkgo.hpp @@ -0,0 +1,361 @@ +/* ----------------------------------------------------------------------------- + * Programmer(s): Cody J. Balos @ LLNL + * ----------------------------------------------------------------------------- + * SUNDIALS Copyright Start + * Copyright (c) 2002-2022, Lawrence Livermore National Security + * and Southern Methodist University. + * All rights reserved. + * + * See the top-level LICENSE and NOTICE files for details. + * + * SPDX-License-Identifier: BSD-3-Clause + * SUNDIALS Copyright End + * ----------------------------------------------------------------------------- + * SUNMatrix interface to Ginkgo matrices + * ---------------------------------------------------------------------------*/ + +#ifndef _SUNMATRIX_GINKGO_HPP +#define _SUNMATRIX_GINKGO_HPP + +#include +#include +#include +#include +#include + +namespace sundials { +namespace ginkgo { + +// Forward decalaration of regular Matrix class +template +class Matrix; + +// ============================================================================= +// Everything in the implementation (impl) namespace is private and should not +// be referred to directly in user code. +// ============================================================================= + +namespace impl { + +using GkoDenseMat = gko::matrix::Dense; +using GkoCsrMat = gko::matrix::Csr; +using GkoVecType = GkoDenseMat; + +// +// Prototypes for non-class methods that operate on Matrix +// + +inline std::unique_ptr WrapVector(std::shared_ptr gko_exec, N_Vector x); + +inline std::unique_ptr WrapConstVector(std::shared_ptr gko_exec, N_Vector x); + +template +void Print(Matrix& A, std::ostream& ost = std::cout); + +template +void Matvec(Matrix& A, GkoVecType* x, GkoVecType* y); + +template +void Matvec(Matrix& A, N_Vector x, N_Vector y); + +template +void ScaleAdd(const sunrealtype c, Matrix& A, Matrix& B); + +template +void ScaleAddI(const sunrealtype c, Matrix& A); + +template +void Zero(Matrix& A); + +template<> +inline void Zero(Matrix& A); + +template +void Copy(Matrix& A, Matrix& B); + +// +// Methods that operate on SUNMatrix +// + +template +SUNMatrix_ID SUNMatGetID_Ginkgo(SUNMatrix A) +{ + return SUNMATRIX_GINKGO; +} + +template +SUNMatrix SUNMatClone_Ginkgo(SUNMatrix A) +{ + auto A_mat{static_cast*>(A->content)}; + auto new_mat{new Matrix(*A_mat)}; // NOLINT + return new_mat->Convert(); +} + +template +void SUNMatDestroy_Ginkgo(SUNMatrix A) +{ + auto A_mat{static_cast*>(A->content)}; + delete A_mat; // NOLINT + return; +} + +template +int SUNMatZero_Ginkgo(SUNMatrix A) +{ + auto A_mat{static_cast*>(A->content)}; + impl::Zero(*A_mat); + return SUNMAT_SUCCESS; +} + +template +int SUNMatCopy_Ginkgo(SUNMatrix A, SUNMatrix B) +{ + auto A_mat{static_cast*>(A->content)}; + auto B_mat{static_cast*>(B->content)}; + impl::Copy(*A_mat, *B_mat); + return SUNMAT_SUCCESS; +} + +template +int SUNMatScaleAdd_Ginkgo(sunrealtype c, SUNMatrix A, SUNMatrix B) +{ + auto A_mat{static_cast*>(A->content)}; + auto B_mat{static_cast*>(B->content)}; + impl::ScaleAdd(c, *A_mat, *B_mat); + return SUNMAT_SUCCESS; +} + +template +int SUNMatScaleAddI_Ginkgo(sunrealtype c, SUNMatrix A) +{ + auto A_mat{static_cast*>(A->content)}; + impl::ScaleAddI(c, *A_mat); + return SUNMAT_SUCCESS; +} + +template +int SUNMatMatvec_Ginkgo(SUNMatrix A, N_Vector x, N_Vector y) +{ + auto A_mat{static_cast*>(A->content)}; + impl::Matvec(*A_mat, x, y); + return SUNMAT_SUCCESS; +} + +} // namespace impl + +// ============================================================================= +// Public namespace +// ============================================================================= + +/// Class that wraps a Ginkgo matrix and allows it to convert to a fully functioning `SUNMatrix`. +template +class Matrix : public sundials::impl::BaseMatrix, public sundials::ConvertibleTo +{ +public: + /// Default constructor - means the matrix must be copied or moved to + Matrix() = default; + + /// Constructs a Matrix from an existing Ginkgo matrix object. + /// \param gko_mat A Ginkgo matrix object + /// \param sunctx The SUNDIALS simulation context object + Matrix(std::shared_ptr gko_mat, SUNContext sunctx) : sundials::impl::BaseMatrix(sunctx), gkomtx_(gko_mat) + { + initSUNMatrix(); + } + + /// Move constructor + Matrix(Matrix&& that_matrix) noexcept + : sundials::impl::BaseMatrix(std::forward(that_matrix)), gkomtx_(std::move(that_matrix.gkomtx_)) + {} + + /// Copy constructor clones the ``gko::matrix`` and ``SUNMatrix`` + Matrix(const Matrix& that_matrix) : sundials::impl::BaseMatrix(that_matrix), gkomtx_(gko::clone(that_matrix.gkomtx_)) + {} + + /// Move assignment + Matrix& operator=(Matrix&& rhs) noexcept + { + gkomtx_ = std::move(rhs.gkomtx_); + + sundials::impl::BaseMatrix::operator=(std::forward(rhs)); + + return *this; + } + + /// Copy assignment clones the gko::matrix and SUNMatrix + Matrix& operator=(const Matrix& rhs) + { + gkomtx_ = gko::clone(rhs.gkomtx_); + + sundials::impl::BaseMatrix::operator=(rhs); + + return *this; + } + + /// Default destructor + // fine since all members are RAII + virtual ~Matrix() = default; + + /// Get the underlying Ginkgo matrix object + std::shared_ptr GkoMtx() const + { + return gkomtx_; + } + + /// Get the ``gko::Executor`` associated with the Ginkgo matrix + std::shared_ptr GkoExec() const + { + return GkoMtx()->get_executor(); + } + + /// Get the size, i.e. ``gko::dim``, for the Ginkgo matrix + const gko::dim<2>& GkoSize() const + { + return GkoMtx()->get_size(); + } + + using sundials::impl::BaseMatrix::sunctx; + + // Override the ConvertibleTo methods + + /// Implicit conversion to a :c:type:`SUNMatrix` + operator SUNMatrix() override + { + return object_.get(); + } + + /// Implicit conversion to a :c:type:`SUNMatrix` + operator SUNMatrix() const override + { + return object_.get(); + } + + /// Explicit conversion to a :c:type:`SUNMatrix` + SUNMatrix Convert() override + { + return object_.get(); + } + + /// Explicit conversion to a :c:type:`SUNMatrix` + SUNMatrix Convert() const override + { + return object_.get(); + } + +private: + std::shared_ptr gkomtx_; + + void initSUNMatrix() + { + this->object_->content = this; + + this->object_->ops->getid = impl::SUNMatGetID_Ginkgo; + this->object_->ops->clone = impl::SUNMatClone_Ginkgo; + this->object_->ops->zero = impl::SUNMatZero_Ginkgo; + this->object_->ops->copy = impl::SUNMatCopy_Ginkgo; + this->object_->ops->scaleadd = impl::SUNMatScaleAdd_Ginkgo; + this->object_->ops->scaleaddi = impl::SUNMatScaleAddI_Ginkgo; + this->object_->ops->matvec = impl::SUNMatMatvec_Ginkgo; + this->object_->ops->destroy = impl::SUNMatDestroy_Ginkgo; + } +}; + +// ============================================================================= +// Everything in the implementation (impl) namespace is private and should not +// be referred to directly in user code. +// ============================================================================= + +namespace impl { + +// +// Non-class methods that operate on Matrix +// + +inline std::unique_ptr WrapVector(std::shared_ptr gko_exec, N_Vector x) +{ + sunrealtype* x_arr{(x->ops->nvgetdevicearraypointer) ? N_VGetDeviceArrayPointer(x) : N_VGetArrayPointer(x)}; + const sunindextype x_len{N_VGetLength(x)}; + return GkoVecType::create(gko_exec, gko::dim<2>(x_len, 1), gko::array::view(gko_exec, x_len, x_arr), 1); +} + +inline std::unique_ptr WrapConstVector(std::shared_ptr gko_exec, N_Vector x) +{ + sunrealtype* x_arr{(x->ops->nvgetdevicearraypointer) ? N_VGetDeviceArrayPointer(x) : N_VGetArrayPointer(x)}; + const sunindextype x_len{N_VGetLength(x)}; + return GkoVecType::create_const(gko_exec, gko::dim<2>(x_len, 1), + gko::array::const_view(gko_exec, x_len, x_arr), 1); +} + +template +void Print(Matrix& A, std::ostream& ost) +{ + gko::write(ost, A.GkoMtx().get()); +} + +template +void Matvec(Matrix& A, GkoVecType* x, GkoVecType* y) +{ + A.GkoMtx()->apply(x, y); +} + +template +void Matvec(Matrix& A, N_Vector x, N_Vector y) +{ + if (x != y) { + auto x_vec{WrapConstVector(A.GkoExec(), x)}; + auto y_vec{WrapVector(A.GkoExec(), y)}; + + // y = Ax + A.GkoMtx()->apply(x_vec.get(), y_vec.get()); + } + else { + auto x_vec{WrapVector(A.GkoExec(), x)}; + + // x = Ax + A.GkoMtx()->apply(x_vec.get(), x_vec.get()); + } +} + +template +void ScaleAdd(const sunrealtype c, Matrix& A, Matrix& B) +{ + const auto I{gko::matrix::Identity::create(A.GkoExec(), A.GkoSize())}; + const auto one{gko::initialize({1.0}, A.GkoExec())}; + const auto cmat{gko::initialize({c}, A.GkoExec())}; + // A = B + cA + B.GkoMtx()->apply(one.get(), I.get(), cmat.get(), A.GkoMtx().get()); +} + +template +void ScaleAddI(const sunrealtype c, Matrix& A) +{ + const auto one{gko::initialize({1.0}, A.GkoExec())}; + const auto cmat{gko::initialize({c}, A.GkoExec())}; + // A = 1*I + c*A = cA + I + A.GkoMtx()->add_scaled_identity(one.get(), cmat.get()); +} + +template +void Zero(Matrix& A) +{ + A.GkoMtx()->scale(gko::initialize({0.0}, A.GkoExec()).get()); +} + +template<> +inline void Zero(Matrix& A) +{ + A.GkoMtx()->fill(0.0); +} + +template +void Copy(Matrix& A, Matrix& B) +{ + B.GkoMtx()->copy_from(A.GkoMtx().get()); +} + +} // namespace impl + +} // namespace ginkgo +} // namespace sundials + +#endif diff --git a/inst/include/sunmatrix/sunmatrix_kokkosdense.hpp b/inst/include/sunmatrix/sunmatrix_kokkosdense.hpp new file mode 100644 index 0000000..f59a266 --- /dev/null +++ b/inst/include/sunmatrix/sunmatrix_kokkosdense.hpp @@ -0,0 +1,403 @@ +/* ----------------------------------------------------------------------------- + * Programmer(s): David J. Gardner @ LLNL + * ----------------------------------------------------------------------------- + * SUNDIALS Copyright Start + * Copyright (c) 2002-2022, Lawrence Livermore National Security + * and Southern Methodist University. + * All rights reserved. + * + * See the top-level LICENSE and NOTICE files for details. + * + * SPDX-License-Identifier: BSD-3-Clause + * SUNDIALS Copyright End + * ----------------------------------------------------------------------------- + * This is the header file for a dense SUNMarix implementation using Kokkos. + * ---------------------------------------------------------------------------*/ + +#ifndef _SUNMATRIX_KOKKOSDENSE_HPP +#define _SUNMATRIX_KOKKOSDENSE_HPP + +#include +#include +#include +#include +#include +#include + +namespace sundials { +namespace kokkos { + +// Forward declaration of Matrix class +template +class DenseMatrix; + +// Get the Kokkos dense matrix wrapped by a SUNMatrix +template +inline MatrixType* GetDenseMat(SUNMatrix A) +{ + return static_cast(A->content); +} + +// ============================================================================= +// Everything in the implementation (impl) namespace is private and should not +// be referred to directly in user code. +// ============================================================================= + +namespace impl { + +SUNMatrix_ID SUNMatGetID_KokkosDense(SUNMatrix A) +{ + return SUNMATRIX_KOKKOSDENSE; +} + +template +SUNMatrix SUNMatClone_KokkosDense(SUNMatrix A) +{ + auto A_mat{GetDenseMat(A)}; + auto new_mat{new MatrixType(*A_mat)}; + return new_mat->Convert(); +} + +template +void SUNMatDestroy_KokkosDense(SUNMatrix A) +{ + auto A_mat{GetDenseMat(A)}; + delete A_mat; // NOLINT + return; +} + +template +int SUNMatZero_KokkosDense(SUNMatrix A) +{ + auto A_mat{GetDenseMat(A)}; + + const auto blocks = A_mat->Blocks(); + const auto rows = A_mat->BlockRows(); + const auto cols = A_mat->BlockCols(); + + auto A_exec = A_mat->ExecSpace(); + auto A_data = A_mat->View(); + + using range_policy = typename MatrixType::range_policy; + using size_type = typename MatrixType::size_type; + + // Zero out matrix + Kokkos::parallel_for( + "sunmat_zero", range_policy(A_exec, {0, 0, 0}, {blocks, rows, cols}), + KOKKOS_LAMBDA(const size_type i, const size_type j, const size_type k) { + A_data(i, j, k) = SUN_RCONST(0.0); + }); + + return SUNMAT_SUCCESS; +} + +template +int SUNMatCopy_KokkosDense(SUNMatrix A, SUNMatrix B) +{ + auto A_mat{GetDenseMat(A)}; + auto B_mat{GetDenseMat(B)}; + + const auto blocks = A_mat->Blocks(); + const auto rows = A_mat->BlockRows(); + const auto cols = A_mat->BlockCols(); + + auto A_exec = A_mat->ExecSpace(); + auto A_data = A_mat->View(); + auto B_data = B_mat->View(); + + using range_policy = typename MatrixType::range_policy; + using size_type = typename MatrixType::size_type; + + // Copy A into B + Kokkos::parallel_for( + "sunmat_copy", range_policy(A_exec, {0, 0, 0}, {blocks, rows, cols}), + KOKKOS_LAMBDA(const size_type i, const size_type j, const size_type k) { + B_data(i, j, k) = A_data(i, j, k); + }); + + return SUNMAT_SUCCESS; +} + +template +int SUNMatScaleAdd_KokkosDense(sunrealtype c, SUNMatrix A, SUNMatrix B) +{ + auto A_mat{GetDenseMat(A)}; + auto B_mat{GetDenseMat(B)}; + + const auto blocks = A_mat->Blocks(); + const auto rows = A_mat->BlockRows(); + const auto cols = A_mat->BlockCols(); + + auto A_exec = A_mat->ExecSpace(); + auto A_data = A_mat->View(); + auto B_data = B_mat->View(); + + using range_policy = typename MatrixType::range_policy; + using size_type = typename MatrixType::size_type; + + // Scale A by c and add B + Kokkos::parallel_for( + "sunmat_scale_add", range_policy(A_exec, {0, 0, 0}, {blocks, rows, cols}), + KOKKOS_LAMBDA(const size_type i, const size_type j, const size_type k) { + A_data(i, j, k) = c * A_data(i, j, k) + B_data(i, j, k); + }); + + return SUNMAT_SUCCESS; +} + +template +int SUNMatScaleAddI_KokkosDense(sunrealtype c, SUNMatrix A) +{ + auto A_mat{GetDenseMat(A)}; + + const auto blocks = A_mat->Blocks(); + const auto rows = A_mat->BlockRows(); + const auto cols = A_mat->BlockCols(); + + auto A_exec = A_mat->ExecSpace(); + auto A_data = A_mat->View(); + + using range_policy = typename MatrixType::range_policy; + using size_type = typename MatrixType::size_type; + + // Scale A by c and add I + Kokkos::parallel_for( + "sunmat_scale_add_i", range_policy(A_exec, {0, 0, 0}, {blocks, rows, cols}), + KOKKOS_LAMBDA(const size_type i, const size_type j, const size_type k) { + if (j == k) A_data(i, j, k) = c * A_data(i, j, k) + SUN_RCONST(1.0); + else A_data(i, j, k) = c * A_data(i, j, k); + }); + + return SUNMAT_SUCCESS; +} + +template +int SUNMatMatvec_KokkosDense(SUNMatrix A, N_Vector x, N_Vector y) +{ + auto A_mat{GetDenseMat(A)}; + auto x_vec{GetVec(x)}; + auto y_vec{GetVec(y)}; + + const auto blocks = A_mat->Blocks(); + const auto rows = A_mat->BlockRows(); + const auto cols = A_mat->BlockCols(); + + auto A_exec = A_mat->ExecSpace(); + auto A_data = A_mat->View(); + auto x_data = x_vec->View(); + auto y_data = y_vec->View(); + + // Use batched or single gemv to do y = alpha * A * x + beta * y + if (blocks > 1) + { + using team_policy = typename MatrixType::team_policy; + using member_type = typename MatrixType::member_type; + using size_type = typename MatrixType::size_type; + + Kokkos::parallel_for( + "sunmatvec_batch", + team_policy(A_exec, static_cast(blocks), Kokkos::AUTO, Kokkos::AUTO), + KOKKOS_LAMBDA(const member_type& team_member) { + const int idx = team_member.league_rank(); + auto A_subdata = Kokkos::subview(A_data, idx, Kokkos::ALL(), + Kokkos::ALL()); + auto x_subdata = + Kokkos::subview(x_data, + Kokkos::pair(idx * cols, + (idx + 1) * cols)); + auto y_subdata = + Kokkos::subview(y_data, + Kokkos::pair(idx * rows, + (idx + 1) * rows)); + KokkosBatched::TeamVectorGemv< + member_type, KokkosBatched::Trans::NoTranspose, + KokkosBatched::Algo::Gemv::Unblocked>::invoke(team_member, + SUN_RCONST(1.0), + A_subdata, x_subdata, + SUN_RCONST(0.0), + y_subdata); + }); + } + else + { + auto A_subdata = Kokkos::subview(A_data, 0, Kokkos::ALL(), Kokkos::ALL()); + KokkosBlas::gemv("N", SUN_RCONST(1.0), A_subdata, x_data, SUN_RCONST(0.0), + y_data); + } + + return SUNMAT_SUCCESS; +} + +} // namespace impl + +// ============================================================================= +// Public namespace +// ============================================================================= + +// ----------------------------------------------------------------------------- +// Kokkos dense matrix class, convertible to a SUNMatrix +// ----------------------------------------------------------------------------- + +template +class DenseMatrix : public sundials::impl::BaseMatrix, + public sundials::ConvertibleTo +{ +public: + using exec_space = ExecutionSpace; + using memory_space = MemorySpace; + using view_type = Kokkos::View; + using size_type = typename view_type::size_type; + using range_policy = Kokkos::MDRangePolicy>; + using team_policy = typename Kokkos::TeamPolicy; + using member_type = typename Kokkos::TeamPolicy::member_type; + + // Default constructor - means the matrix must be copied or moved to + DenseMatrix() = default; + + // Single matrix constructors + DenseMatrix(size_type rows, size_type cols, SUNContext sunctx) + : DenseMatrix(1, rows, cols, exec_space(), sunctx) + {} + + DenseMatrix(size_type rows, size_type cols, exec_space ex, SUNContext sunctx) + : DenseMatrix(1, rows, cols, ex, sunctx) + {} + + // Block-diagonal matrix constructors + DenseMatrix(size_type blocks, size_type block_rows, size_type block_cols, + SUNContext sunctx) + : DenseMatrix(blocks, block_rows, block_cols, exec_space(), sunctx) + {} + + // Block-diagonal matrix with user-supplied execution space instance + DenseMatrix(size_type blocks, size_type block_rows, size_type block_cols, + exec_space ex, SUNContext sunctx) + : sundials::impl::BaseMatrix(sunctx), + exec_space_(ex), + view_("sunmat_view", blocks, block_rows, block_cols) + { + initSUNMatrix(); + } + + // Move constructor + DenseMatrix(DenseMatrix&& that_matrix) noexcept + : sundials::impl::BaseMatrix(std::forward(that_matrix)), + exec_space_(std::move(that_matrix.exec_space_)), + view_(std::move(that_matrix.exec_space_)) + { + initSUNMatrix(); + } + + // Copy constructor + DenseMatrix(const DenseMatrix& that_matrix) + : sundials::impl::BaseMatrix(that_matrix), + exec_space_(that_matrix.exec_space_), + view_("sunmat_view", that_matrix.Blocks(), that_matrix.BlockRows(), + that_matrix.BlockCols()) + { + initSUNMatrix(); + } + + // Move assignment + DenseMatrix& operator=(DenseMatrix&& rhs) noexcept + { + exec_space_ = std::move(rhs.exec_space_); + view_ = std::move(rhs.view_); + + sundials::impl::BaseMatrix::operator=(std::forward(rhs)); + + return *this; + } + + // Copy assignment + DenseMatrix& operator=(const DenseMatrix& rhs) + { + exec_space_ = rhs.exec_space_; + view_ = view_type("sunmat_view", rhs.Blocks(), rhs.BlockRows(), + rhs.BlockCols()); + + sundials::impl::BaseMatrix::operator=(rhs); + + return *this; + } + + // Default destructor since all members are RAII + virtual ~DenseMatrix() = default; + + // Get the Kokkos execution space + exec_space ExecSpace() { return exec_space_; } + + // Get the Kokkos view + view_type View() { return view_; } + + // Get the number of blocks + size_type Blocks() const { return static_cast(view_.extent(0)); } + + // Get the number of rows in a block + size_type BlockRows() const + { + return static_cast(view_.extent(1)); + } + + // Get the number of columns in a block + size_type BlockCols() const + { + return static_cast(view_.extent(2)); + } + + // Get the number of rows + size_type Rows() const + { + return static_cast(view_.extent(0) * view_.extent(1)); + } + + // Get the number of columns + size_type Cols() const + { + return static_cast(view_.extent(0) * view_.extent(2)); + } + + using sundials::impl::BaseMatrix::sunctx; + + // Override the ConvertibleTo methods + + // Implicit conversion to a SUNMatrix + operator SUNMatrix() override { return object_.get(); } + + // Implicit conversion to SUNMatrix + operator SUNMatrix() const override { return object_.get(); } + + // Explicit conversion to a SUNMatrix + SUNMatrix Convert() override { return object_.get(); } + + // Explicit conversion to a SUNMatrix + SUNMatrix Convert() const override { return object_.get(); } + +private: + exec_space exec_space_; // Kokkos execution space + view_type view_; // Matrix data view [blocks, rows, cols] + + void initSUNMatrix() + { + using vec_type = Vector; + using mat_type = DenseMatrix; + + this->object_->content = this; + + this->object_->ops->getid = impl::SUNMatGetID_KokkosDense; + this->object_->ops->clone = impl::SUNMatClone_KokkosDense; + this->object_->ops->destroy = impl::SUNMatDestroy_KokkosDense; + this->object_->ops->zero = impl::SUNMatZero_KokkosDense; + this->object_->ops->copy = impl::SUNMatCopy_KokkosDense; + this->object_->ops->scaleadd = impl::SUNMatScaleAdd_KokkosDense; + this->object_->ops->scaleaddi = impl::SUNMatScaleAddI_KokkosDense; + this->object_->ops->matvec = + impl::SUNMatMatvec_KokkosDense; + } +}; + +} // namespace kokkos +} // namespace sundials + +#endif diff --git a/inst/include/sunmatrix/sunmatrix_magmadense.h b/inst/include/sunmatrix/sunmatrix_magmadense.h new file mode 100644 index 0000000..e8e2a3f --- /dev/null +++ b/inst/include/sunmatrix/sunmatrix_magmadense.h @@ -0,0 +1,122 @@ +/* + * ----------------------------------------------------------------- + * Programmer(s): Cody J. Balos @ LLNL + * ----------------------------------------------------------------- + * SUNDIALS Copyright Start + * Copyright (c) 2002-2022, Lawrence Livermore National Security + * and Southern Methodist University. + * All rights reserved. + * + * See the top-level LICENSE and NOTICE files for details. + * + * SPDX-License-Identifier: BSD-3-Clause + * SUNDIALS Copyright End + * ----------------------------------------------------------------- + * This is the header file for the dense implementation of the + * SUNMATRIX module, SUNMATRIX_MAGMADENSE. + * ----------------------------------------------------------------- + */ + + +#ifndef _SUNMATRIX_MAGMADENSE_H +#define _SUNMATRIX_MAGMADENSE_H + +#include +#include +#include + +#if defined(SUNDIALS_MAGMA_BACKENDS_CUDA) +#define HAVE_CUBLAS +#elif defined(SUNDIALS_MAGMA_BACKENDS_HIP) +#define HAVE_HIP +#endif +#include + +#ifdef __cplusplus /* wrapper to enable C++ usage */ +extern "C" { +#endif + +struct _SUNMatrixContent_MagmaDense { + int last_flag; /* last error code returned by magma */ + int device_id; /* device ID used by magma */ + sunindextype M; /* number of rows in block */ + sunindextype N; /* number of columns in block */ + sunindextype nblocks; /* number of blocks in matrix */ + sunindextype ldata; /* length of data array */ + SUNMemory data; /* matrix data; column-major */ + SUNMemory blocks; /* device pointers to blocks of A */ + SUNMemory xblocks; /* device pointers to blocks of x */ + SUNMemory yblocks; /* device pointers to blocks of y */ + SUNMemoryHelper memhelp; /* memory helper */ + magma_queue_t q; /* operation queue (i.e. stream) */ +}; + +typedef struct _SUNMatrixContent_MagmaDense *SUNMatrixContent_MagmaDense; + +/* --------------------------------------- + * Implementation specific functions + * ---------------------------------------*/ + +SUNDIALS_EXPORT SUNMatrix SUNMatrix_MagmaDense(sunindextype M, sunindextype N, SUNMemoryType memtype, + SUNMemoryHelper memhelper, void* queue, SUNContext sunctx); +SUNDIALS_EXPORT SUNMatrix SUNMatrix_MagmaDenseBlock(sunindextype nblocks, sunindextype M, sunindextype N, + SUNMemoryType memtype, SUNMemoryHelper memhelper, + void* queue, SUNContext sunctx); +SUNDIALS_EXPORT void SUNMatrix_MagmaDense_Print(SUNMatrix A); +SUNDIALS_EXPORT realtype* SUNMatrix_MagmaDense_Data(SUNMatrix A); +SUNDIALS_EXPORT sunindextype SUNMatrix_MagmaDense_LData(SUNMatrix A); +SUNDIALS_EXPORT sunindextype SUNMatrix_MagmaDense_Rows(SUNMatrix A); +SUNDIALS_EXPORT sunindextype SUNMatrix_MagmaDense_Columns(SUNMatrix A); +SUNDIALS_EXPORT sunindextype SUNMatrix_MagmaDense_BlockRows(SUNMatrix A); +SUNDIALS_EXPORT sunindextype SUNMatrix_MagmaDense_BlockColumns(SUNMatrix A); +SUNDIALS_EXPORT sunindextype SUNMatrix_MagmaDense_BlockLData(SUNMatrix A); +SUNDIALS_EXPORT sunindextype SUNMatrix_MagmaDense_NumBlocks(SUNMatrix A); +SUNDIALS_EXPORT realtype** SUNMatrix_MagmaDense_BlockData(SUNMatrix A); +SUNDIALS_EXPORT int SUNMatrix_MagmaDense_CopyToDevice(SUNMatrix A, realtype* h_data); +SUNDIALS_EXPORT int SUNMatrix_MagmaDense_CopyFromDevice(SUNMatrix A, realtype* h_data); + +SUNDIALS_STATIC_INLINE +realtype* SUNMatrix_MagmaDense_Block(SUNMatrix Amat, sunindextype k) +{ + SUNMatrixContent_MagmaDense A = (SUNMatrixContent_MagmaDense) Amat->content; + return( ((realtype*) A->data->ptr) + k*A->M*A->N ); +} + +SUNDIALS_STATIC_INLINE +realtype* SUNMatrix_MagmaDense_Column(SUNMatrix Amat, sunindextype j) +{ + SUNMatrixContent_MagmaDense A = (SUNMatrixContent_MagmaDense) Amat->content; + return( ((realtype*) A->data->ptr) + j*A->M ); +} + +SUNDIALS_STATIC_INLINE +realtype* SUNMatrix_MagmaDense_BlockColumn(SUNMatrix Amat, sunindextype k, sunindextype j) +{ + SUNMatrixContent_MagmaDense A = (SUNMatrixContent_MagmaDense) Amat->content; + return( ((realtype*) A->data->ptr) + k*A->M*A->N + j*A->M ); +} + + +/* --------------------------------------- + * SUNMatrix API functions + * ---------------------------------------*/ + +SUNDIALS_STATIC_INLINE +SUNMatrix_ID SUNMatGetID_MagmaDense(SUNMatrix A) { return SUNMATRIX_MAGMADENSE; } + +SUNDIALS_EXPORT SUNMatrix SUNMatClone_MagmaDense(SUNMatrix A); +SUNDIALS_EXPORT void SUNMatDestroy_MagmaDense(SUNMatrix A); +SUNDIALS_EXPORT int SUNMatZero_MagmaDense(SUNMatrix A); +SUNDIALS_EXPORT int SUNMatCopy_MagmaDense(SUNMatrix A, SUNMatrix B); +SUNDIALS_EXPORT int SUNMatScaleAdd_MagmaDense(realtype c, SUNMatrix A, SUNMatrix B); +SUNDIALS_EXPORT int SUNMatScaleAddI_MagmaDense(realtype c, SUNMatrix A); +SUNDIALS_EXPORT int SUNMatMatvecSetup_MagmaDense(SUNMatrix A); +SUNDIALS_EXPORT int SUNMatMatvec_MagmaDense(SUNMatrix A, N_Vector x, N_Vector y); +SUNDIALS_EXPORT int SUNMatSpace_MagmaDense(SUNMatrix A, long int *lenrw, long int *leniw); + + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/inst/include/sunmatrix/sunmatrix_onemkldense.h b/inst/include/sunmatrix/sunmatrix_onemkldense.h new file mode 100644 index 0000000..c602d43 --- /dev/null +++ b/inst/include/sunmatrix/sunmatrix_onemkldense.h @@ -0,0 +1,175 @@ +/* --------------------------------------------------------------------------- + * Programmer(s): David J. Gardner @ LLNL + * --------------------------------------------------------------------------- + * SUNDIALS Copyright Start + * Copyright (c) 2002-2022, Lawrence Livermore National Security + * and Southern Methodist University. + * All rights reserved. + * + * See the top-level LICENSE and NOTICE files for details. + * + * SPDX-License-Identifier: BSD-3-Clause + * SUNDIALS Copyright End + * --------------------------------------------------------------------------- + * This is the header file for the dense implementation of the SUNMATRIX + * class using the Intel oneAPI Math Kernel Library (oneMKL). + * ---------------------------------------------------------------------------*/ + +#ifndef _SUNMATRIX_ONEMKLDENSE_H +#define _SUNMATRIX_ONEMKLDENSE_H + +#include +#include + +#include +#include +#include + +#ifdef __cplusplus /* wrapper to enable C++ usage */ +extern "C" { +#endif + +struct _SUNMatrixContent_OneMklDense { + int last_flag; /* last error code returned */ + sunindextype block_rows; /* number of rows in a block */ + sunindextype block_cols; /* number of columns in a block */ + sunindextype num_blocks; /* number of blocks in the matrix */ + sunindextype rows; /* total number of rows */ + sunindextype cols; /* total number of columns */ + sunindextype ldata; /* length of data array */ + SUNMemory data; /* matrix data; column-major */ + SUNMemory blocks; /* device pointers to blocks of A */ + SUNSyclExecPolicy* exec_policy; /* execution policy */ + SUNMemoryType mem_type; /* memory type */ + SUNMemoryHelper mem_helper; /* memory helper */ + ::sycl::queue* queue; /* operation queue */ +}; + +typedef struct _SUNMatrixContent_OneMklDense *SUNMatrixContent_OneMklDense; + +/* --------------------------------------------------------------------------- + * Implementation specific functions + * ---------------------------------------------------------------------------*/ + +/* Constructors */ + +SUNDIALS_EXPORT +SUNMatrix SUNMatrix_OneMklDense(sunindextype M, sunindextype N, + SUNMemoryType mem_type, + SUNMemoryHelper mem_helper, + ::sycl::queue* queue, + SUNContext sunctx); + +SUNDIALS_EXPORT +SUNMatrix SUNMatrix_OneMklDenseBlock(sunindextype num_blocks, sunindextype M_block, + sunindextype N_block, + SUNMemoryType mem_type, + SUNMemoryHelper mem_helper, + ::sycl::queue* queue, + SUNContext sunctx); + +/* Get matrix dimensions */ + +SUNDIALS_EXPORT +sunindextype SUNMatrix_OneMklDense_Rows(SUNMatrix A); + +SUNDIALS_EXPORT +sunindextype SUNMatrix_OneMklDense_Columns(SUNMatrix A); + +/* Get matrix block dimensions */ + +SUNDIALS_EXPORT +sunindextype SUNMatrix_OneMklDense_NumBlocks(SUNMatrix A); + +SUNDIALS_EXPORT +sunindextype SUNMatrix_OneMklDense_BlockRows(SUNMatrix A); + +SUNDIALS_EXPORT +sunindextype SUNMatrix_OneMklDense_BlockColumns(SUNMatrix A); + +/* Get matrix data */ + +SUNDIALS_EXPORT +sunindextype SUNMatrix_OneMklDense_LData(SUNMatrix A); + +SUNDIALS_EXPORT +realtype* SUNMatrix_OneMklDense_Data(SUNMatrix A); + +SUNDIALS_STATIC_INLINE +realtype* SUNMatrix_OneMklDense_Column(SUNMatrix Amat, sunindextype j) +{ + SUNMatrixContent_OneMklDense A = (SUNMatrixContent_OneMklDense) Amat->content; + return( ((realtype*) A->data->ptr) + j * A->block_rows ); +} + +/* Get matrix block data */ + +SUNDIALS_EXPORT +sunindextype SUNMatrix_OneMklDense_BlockLData(SUNMatrix A); + +SUNDIALS_EXPORT +realtype** SUNMatrix_OneMklDense_BlockData(SUNMatrix A); + +SUNDIALS_STATIC_INLINE +realtype* SUNMatrix_OneMklDense_Block(SUNMatrix Amat, sunindextype k) +{ + SUNMatrixContent_OneMklDense A = (SUNMatrixContent_OneMklDense) Amat->content; + return( ((realtype*) A->data->ptr) + k * A->block_rows * A->block_cols ); +} + +SUNDIALS_STATIC_INLINE +realtype* SUNMatrix_OneMklDense_BlockColumn(SUNMatrix Amat, sunindextype k, + sunindextype j) +{ + SUNMatrixContent_OneMklDense A = (SUNMatrixContent_OneMklDense) Amat->content; + return( ((realtype*) A->data->ptr) + + k * A->block_rows * A->block_cols + j * A->block_rows ); +} + +/* Copy data */ + +SUNDIALS_EXPORT +int SUNMatrix_OneMklDense_CopyToDevice(SUNMatrix A, realtype* h_data); + +SUNDIALS_EXPORT +int SUNMatrix_OneMklDense_CopyFromDevice(SUNMatrix A, realtype* h_data); + +/* --------------------------------------------------------------------------- + * SUNMatrix API functions + * ---------------------------------------------------------------------------*/ + +SUNDIALS_STATIC_INLINE +SUNMatrix_ID SUNMatGetID_OneMklDense(SUNMatrix A) { return SUNMATRIX_ONEMKLDENSE; } + +SUNDIALS_EXPORT +SUNMatrix SUNMatClone_OneMklDense(SUNMatrix A); + +SUNDIALS_EXPORT +void SUNMatDestroy_OneMklDense(SUNMatrix A); + +SUNDIALS_EXPORT +int SUNMatZero_OneMklDense(SUNMatrix A); + +SUNDIALS_EXPORT +int SUNMatCopy_OneMklDense(SUNMatrix A, SUNMatrix B); + +SUNDIALS_EXPORT +int SUNMatScaleAdd_OneMklDense(realtype c, SUNMatrix A, SUNMatrix B); + +SUNDIALS_EXPORT +int SUNMatScaleAddI_OneMklDense(realtype c, SUNMatrix A); + +SUNDIALS_EXPORT +int SUNMatMatvecSetup_OneMklDense(SUNMatrix A); + +SUNDIALS_EXPORT +int SUNMatMatvec_OneMklDense(SUNMatrix A, N_Vector x, N_Vector y); + +SUNDIALS_EXPORT +int SUNMatSpace_OneMklDense(SUNMatrix A, long int *lenrw, long int *leniw); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/inst/include/sunmatrix/sunmatrix_slunrloc.h b/inst/include/sunmatrix/sunmatrix_slunrloc.h index 809b6b6..6e18920 100644 --- a/inst/include/sunmatrix/sunmatrix_slunrloc.h +++ b/inst/include/sunmatrix/sunmatrix_slunrloc.h @@ -3,7 +3,7 @@ * Programmer(s): Cody Balos @ LLNL * ---------------------------------------------------------------------------- * SUNDIALS Copyright Start - * Copyright (c) 2002-2019, Lawrence Livermore National Security + * Copyright (c) 2002-2022, Lawrence Livermore National Security * and Southern Methodist University. * All rights reserved. * @@ -53,7 +53,7 @@ typedef struct _SUNMatrixContent_SLUNRloc *SUNMatrixContent_SLUNRloc; * --------------------------------------------------------------------------*/ -SUNDIALS_EXPORT SUNMatrix SUNMatrix_SLUNRloc(SuperMatrix *A_super, gridinfo_t *grid); +SUNDIALS_EXPORT SUNMatrix SUNMatrix_SLUNRloc(SuperMatrix *A_super, gridinfo_t *grid, SUNContext sunctx); SUNDIALS_EXPORT void SUNMatrix_SLUNRloc_Print(SUNMatrix A, FILE *fp); /* ---------------------------------------------------------------------------- diff --git a/inst/include/sunmatrix/sunmatrix_sparse.h b/inst/include/sunmatrix/sunmatrix_sparse.h index aea60aa..4a6b0a4 100644 --- a/inst/include/sunmatrix/sunmatrix_sparse.h +++ b/inst/include/sunmatrix/sunmatrix_sparse.h @@ -2,11 +2,11 @@ * ----------------------------------------------------------------- * Programmer(s): Daniel Reynolds @ SMU * David Gardner @ LLNL - * Based on code sundials_sparse.h by: Carol Woodward and + * Based on code sundials_sparse.h by: Carol Woodward and * Slaven Peles @ LLNL, and Daniel R. Reynolds @ SMU * ----------------------------------------------------------------- * SUNDIALS Copyright Start - * Copyright (c) 2002-2019, Lawrence Livermore National Security + * Copyright (c) 2002-2022, Lawrence Livermore National Security * and Southern Methodist University. * All rights reserved. * @@ -15,15 +15,15 @@ * SPDX-License-Identifier: BSD-3-Clause * SUNDIALS Copyright End * ----------------------------------------------------------------- - * This is the header file for the sparse implementation of the + * This is the header file for the sparse implementation of the * SUNMATRIX module, SUNMATRIX_SPARSE. * * Notes: * - The definition of the generic SUNMatrix structure can be found * in the header file sundials_matrix.h. * - The definition of the type 'realtype' can be found in the - * header file sundials_types.h, and it may be changed (at the - * configuration stage) according to the user's needs. + * header file sundials_types.h, and it may be changed (at the + * configuration stage) according to the user's needs. * The sundials_types.h file also contains the definition * for the type 'booleantype' and 'indextype'. * ----------------------------------------------------------------- @@ -48,11 +48,11 @@ extern "C" { #define CSC_MAT 0 #define CSR_MAT 1 - + /* ------------------------------------------ * Sparse Implementation of SUNMATRIX_SPARSE * ------------------------------------------ */ - + struct _SUNMatrixContent_Sparse { sunindextype M; sunindextype N; @@ -100,7 +100,8 @@ typedef struct _SUNMatrixContent_Sparse *SUNMatrixContent_Sparse; * ---------------------------------------- */ SUNDIALS_EXPORT SUNMatrix SUNSparseMatrix(sunindextype M, sunindextype N, - sunindextype NNZ, int sparsetype); + sunindextype NNZ, int sparsetype, + SUNContext sunctx); SUNDIALS_EXPORT SUNMatrix SUNSparseFromDenseMatrix(SUNMatrix A, realtype droptol, @@ -110,6 +111,9 @@ SUNDIALS_EXPORT SUNMatrix SUNSparseFromBandMatrix(SUNMatrix A, realtype droptol, int sparsetype); +SUNDIALS_EXPORT int SUNSparseMatrix_ToCSR(const SUNMatrix A, SUNMatrix* Bout); +SUNDIALS_EXPORT int SUNSparseMatrix_ToCSC(const SUNMatrix A, SUNMatrix* Bout); + SUNDIALS_EXPORT int SUNSparseMatrix_Realloc(SUNMatrix A); SUNDIALS_EXPORT int SUNSparseMatrix_Reallocate(SUNMatrix A, sunindextype NNZ); diff --git a/inst/include/sunnonlinsol/sunnonlinsol_fixedpoint.h b/inst/include/sunnonlinsol/sunnonlinsol_fixedpoint.h index 5be03e0..fbf826b 100644 --- a/inst/include/sunnonlinsol/sunnonlinsol_fixedpoint.h +++ b/inst/include/sunnonlinsol/sunnonlinsol_fixedpoint.h @@ -1,8 +1,8 @@ -/*----------------------------------------------------------------------------- +/* --------------------------------------------------------------------------- * Programmer(s): Daniel R. Reynolds @ SMU - *----------------------------------------------------------------------------- + * --------------------------------------------------------------------------- * SUNDIALS Copyright Start - * Copyright (c) 2002-2019, Lawrence Livermore National Security + * Copyright (c) 2002-2022, Lawrence Livermore National Security * and Southern Methodist University. * All rights reserved. * @@ -10,14 +10,14 @@ * * SPDX-License-Identifier: BSD-3-Clause * SUNDIALS Copyright End - *----------------------------------------------------------------------------- + * --------------------------------------------------------------------------- * This is the header file for the SUNNonlinearSolver module implementation of * the Anderson-accelerated fixed-point method. * * Part I defines the solver-specific content structure. * * Part II contains prototypes for the solver constructor and operations. - *---------------------------------------------------------------------------*/ + * ---------------------------------------------------------------------------*/ #ifndef _SUNNONLINSOL_FIXEDPOINT_H #define _SUNNONLINSOL_FIXEDPOINT_H @@ -41,25 +41,32 @@ struct _SUNNonlinearSolverContent_FixedPoint { SUNNonlinSolConvTestFn CTest; /* convergence test function */ /* nonlinear solver variables */ - int m; /* number of acceleration vectors to use */ - int *imap; /* array of length m */ - realtype *R; /* array of length m*m */ - realtype *gamma; /* array of length m */ - realtype *cvals; /* array of length m+1 for fused vector op */ - N_Vector *df; /* vector array of length m */ - N_Vector *dg; /* vector array of length m */ - N_Vector *q; /* vector array of length m */ - N_Vector *Xvecs; /* array of length m+1 for fused vector op */ - N_Vector yprev; /* temporary vectors for performing solve */ - N_Vector gy; - N_Vector fold; - N_Vector gold; - N_Vector delta; /* correction vector (change between 2 iterates) */ - int curiter; /* current iteration number in a solve attempt */ - int maxiters; /* maximum number of iterations per solve attempt */ - long int niters; /* total number of iterations across all solves */ - long int nconvfails; /* total number of convergence failures */ - void* ctest_data; /* data to pass to convergence test function */ + int m; /* number of acceleration vectors to use */ + int *imap; /* array of length m */ + booleantype damping; /* flag to apply dampling in acceleration */ + realtype beta; /* damping paramter */ + realtype *R; /* array of length m*m */ + realtype *gamma; /* array of length m */ + realtype *cvals; /* array of length m+1 for fused vector op */ + N_Vector *df; /* vector array of length m */ + N_Vector *dg; /* vector array of length m */ + N_Vector *q; /* vector array of length m */ + N_Vector *Xvecs; /* array of length m+1 for fused vector op */ + N_Vector yprev; /* temporary vectors for performing solve */ + N_Vector gy; + N_Vector fold; + N_Vector gold; + N_Vector delta; /* correction vector (change between 2 iterates) */ + int curiter; /* current iteration number in a solve attempt */ + int maxiters; /* maximum number of iterations per solve attempt */ + long int niters; /* total number of iterations across all solves */ + long int nconvfails; /* total number of convergence failures */ + void *ctest_data; /* data to pass to convergence test function */ + + /* if 0 (default) nothing is printed, if 1 the residual is printed every iteration */ + int print_level; + /* if NULL nothing is printed, if 1 the residual is printed every iteration */ + FILE* info_file; }; typedef struct _SUNNonlinearSolverContent_FixedPoint *SUNNonlinearSolverContent_FixedPoint; @@ -69,8 +76,12 @@ typedef struct _SUNNonlinearSolverContent_FixedPoint *SUNNonlinearSolverContent_ ---------------------------------------------------------------------------*/ /* Constructor to create solver and allocates memory */ -SUNDIALS_EXPORT SUNNonlinearSolver SUNNonlinSol_FixedPoint(N_Vector y, int m); -SUNDIALS_EXPORT SUNNonlinearSolver SUNNonlinSol_FixedPointSens(int count, N_Vector y, int m); +SUNDIALS_EXPORT +SUNNonlinearSolver SUNNonlinSol_FixedPoint(N_Vector y, int m, SUNContext sunctx); + +SUNDIALS_EXPORT +SUNNonlinearSolver SUNNonlinSol_FixedPointSens(int count, N_Vector y, int m, + SUNContext sunctx); /* core functions */ SUNDIALS_EXPORT SUNNonlinearSolver_Type SUNNonlinSolGetType_FixedPoint(SUNNonlinearSolver NLS); @@ -95,6 +106,9 @@ SUNDIALS_EXPORT int SUNNonlinSolSetConvTestFn_FixedPoint(SUNNonlinearSolver NLS, SUNDIALS_EXPORT int SUNNonlinSolSetMaxIters_FixedPoint(SUNNonlinearSolver NLS, int maxiters); +SUNDIALS_EXPORT int SUNNonlinSolSetDamping_FixedPoint(SUNNonlinearSolver NLS, + realtype beta); + /* get functions */ SUNDIALS_EXPORT int SUNNonlinSolGetNumIters_FixedPoint(SUNNonlinearSolver NLS, long int *niters); @@ -108,6 +122,15 @@ SUNDIALS_EXPORT int SUNNonlinSolGetNumConvFails_FixedPoint(SUNNonlinearSolver NL SUNDIALS_EXPORT int SUNNonlinSolGetSysFn_FixedPoint(SUNNonlinearSolver NLS, SUNNonlinSolSysFn *SysFn); +SUNDIALS_DEPRECATED_EXPORT_MSG("Use SUNLogger_SetInfoFilename instead") +int SUNNonlinSolSetInfoFile_FixedPoint(SUNNonlinearSolver NLS, + FILE* info_file); + +SUNDIALS_DEPRECATED_EXPORT_MSG("Use SUNLogger interface instead") +SUNDIALS_EXPORT int SUNNonlinSolSetPrintLevel_FixedPoint(SUNNonlinearSolver NLS, + int print_level); + + #ifdef __cplusplus } #endif diff --git a/inst/include/sunnonlinsol/sunnonlinsol_newton.h b/inst/include/sunnonlinsol/sunnonlinsol_newton.h index 281f79c..8ec182d 100644 --- a/inst/include/sunnonlinsol/sunnonlinsol_newton.h +++ b/inst/include/sunnonlinsol/sunnonlinsol_newton.h @@ -2,7 +2,7 @@ * Programmer(s): David J. Gardner @ LLNL * ----------------------------------------------------------------------------- * SUNDIALS Copyright Start - * Copyright (c) 2002-2019, Lawrence Livermore National Security + * Copyright (c) 2002-2022, Lawrence Livermore National Security * and Southern Methodist University. * All rights reserved. * @@ -50,6 +50,11 @@ struct _SUNNonlinearSolverContent_Newton { long int niters; /* total number of nonlinear iterations across all solves */ long int nconvfails; /* total number of convergence failures across all solves */ void* ctest_data; /* data to pass to convergence test function */ + + /* if 0 (default) nothing is printed, if 1 the residual is printed every iteration */ + int print_level; + /* if NULL nothing is printed, if 1 the residual is printed every iteration */ + FILE* info_file; }; typedef struct _SUNNonlinearSolverContent_Newton *SUNNonlinearSolverContent_Newton; @@ -59,8 +64,12 @@ typedef struct _SUNNonlinearSolverContent_Newton *SUNNonlinearSolverContent_Newt * ---------------------------------------------------------------------------*/ /* Constructor to create solver and allocates memory */ -SUNDIALS_EXPORT SUNNonlinearSolver SUNNonlinSol_Newton(N_Vector y); -SUNDIALS_EXPORT SUNNonlinearSolver SUNNonlinSol_NewtonSens(int count, N_Vector y); +SUNDIALS_EXPORT +SUNNonlinearSolver SUNNonlinSol_Newton(N_Vector y, SUNContext sunctx); + +SUNDIALS_EXPORT +SUNNonlinearSolver SUNNonlinSol_NewtonSens(int count, N_Vector y, + SUNContext sunctx); /* core functions */ SUNDIALS_EXPORT SUNNonlinearSolver_Type SUNNonlinSolGetType_Newton(SUNNonlinearSolver NLS); @@ -104,6 +113,15 @@ SUNDIALS_EXPORT int SUNNonlinSolGetNumConvFails_Newton(SUNNonlinearSolver NLS, SUNDIALS_EXPORT int SUNNonlinSolGetSysFn_Newton(SUNNonlinearSolver NLS, SUNNonlinSolSysFn *SysFn); +SUNDIALS_DEPRECATED_EXPORT_MSG("Use SUNLogger_SetInfoFilename instead") +int SUNNonlinSolSetInfoFile_Newton(SUNNonlinearSolver NLS, + FILE* info_file); + +SUNDIALS_DEPRECATED_EXPORT_MSG("Use SUNLogger interface instead") +SUNDIALS_EXPORT int SUNNonlinSolSetPrintLevel_Newton(SUNNonlinearSolver NLS, + int print_level); + + #ifdef __cplusplus } #endif diff --git a/inst/include/sunnonlinsol/sunnonlinsol_petscsnes.h b/inst/include/sunnonlinsol/sunnonlinsol_petscsnes.h index ed2c6f8..ede0731 100644 --- a/inst/include/sunnonlinsol/sunnonlinsol_petscsnes.h +++ b/inst/include/sunnonlinsol/sunnonlinsol_petscsnes.h @@ -2,7 +2,7 @@ * Programmer(s): Cody J. Balos @ LLNL * ----------------------------------------------------------------------------- * SUNDIALS Copyright Start - * Copyright (c) 2002-2019, Lawrence Livermore National Security + * Copyright (c) 2002-2022, Lawrence Livermore National Security * and Southern Methodist University. * All rights reserved. * @@ -56,7 +56,9 @@ typedef struct _SUNNonlinearSolverContent_PetscSNES *SUNNonlinearSolverContent_P /* Constructor to create solver and allocates memory */ -SUNDIALS_EXPORT SUNNonlinearSolver SUNNonlinSol_PetscSNES(N_Vector y, SNES snes); +SUNDIALS_EXPORT +SUNNonlinearSolver SUNNonlinSol_PetscSNES(N_Vector y, SNES snes, + SUNContext sunctx); /* SUNNonlinearSolver API functions */ diff --git a/inst/unitTests/runit.r2cvodes.R b/inst/unitTests/runit.r2cvodes.R index 4da7fa0..60054ae 100644 --- a/inst/unitTests/runit.r2cvodes.R +++ b/inst/unitTests/runit.r2cvodes.R @@ -1,3 +1,4 @@ +VERBOSE=TRUE yini <- c(y1=1, y2=0, y3=0) neq <- length(yini) # parameters @@ -14,7 +15,7 @@ r_rober <- function(t, y, parms, psens) vRober(y, parms) times <- 10^(seq(from = -5, to = 11, by = 0.1)) # pointer to rhs function -includes <- "using namespace arma;\n#include " +includes <- "// [[Rcpp::plugins(cpp14)]]\nusing namespace arma;\n#include " pfnd <- cppXPtr(code=' int d_robertson(double t, const vec &y, vec &ydot, RObject ¶m, NumericVector &psens) { NumericVector p(param); @@ -23,7 +24,7 @@ int d_robertson(double t, const vec &y, vec &ydot, RObject ¶m, NumericVector ydot[1] = -ydot[0] - ydot[2]; return(CV_SUCCESS); } -', depends=c("RcppArmadillo","r2sundials","rmumps"), includes=includes, cacheDir="lib", verbose=FALSE) +', depends=c("RcppArmadillo","r2sundials","rmumps"), includes=includes, cacheDir="lib", verbose=VERBOSE) # pointer to dense jacobian function pfnj <- cppXPtr(code=' int jac_robertson(double t, const vec &y, const vec &ydot, mat &J, RObject ¶m, NumericVector &psens, vec &tmp1, vec &tmp2, vec &tmp3) { @@ -41,7 +42,7 @@ int jac_robertson(double t, const vec &y, const vec &ydot, mat &J, RObject ¶ J(2, 2) = 0.; return(CV_SUCCESS); } -', depends=c("RcppArmadillo","r2sundials","rmumps"), includes=includes, cacheDir="lib", verbose=FALSE) +', depends=c("RcppArmadillo","r2sundials","rmumps"), includes=includes, cacheDir="lib", verbose=VERBOSE) # pointer to sparse jacobian function # illustrates usage of named components of param vector pfnspj <- cppXPtr(code=' @@ -75,7 +76,7 @@ int spjac_robertson(double t, const vec &y, const vec &ydot, uvec &ir, uvec &pj, pj[3] = i; return(CV_SUCCESS); } -', depends=c("RcppArmadillo","r2sundials","rmumps"), includes=includes, cacheDir="lib", verbose=FALSE) +', depends=c("RcppArmadillo","r2sundials","rmumps"), includes=includes, cacheDir="lib", verbose=VERBOSE) # pointer to sensitivity1 rhs function pfnsens1 <- cppXPtr(code=' int sens_robertson1(int Ns, double t, const vec &y, const vec &ydot, int iS, const vec &yS, vec &ySdot, RObject ¶m, NumericVector &p, vec &tmp1, vec &tmp2) { @@ -102,8 +103,8 @@ int sens_robertson1(int Ns, double t, const vec &y, const vec &ydot, int iS, con } return(CV_SUCCESS); } -', depends=c("RcppArmadillo","r2sundials","rmumps"), includes=includes, cacheDir="lib", verbose=FALSE) - +', depends=c("RcppArmadillo","r2sundials","rmumps"), includes=includes, cacheDir="lib", verbose=VERBOSE) +#browser() # just rhs outr <- r2sundials::r2cvodes(yini, times, r_rober, param=parms, maxsteps=2000) out0 <- r2sundials::r2cvodes(yini, times, pfnd, param=parms, maxsteps=2000) @@ -113,6 +114,8 @@ test.r_vs_cpp <- function() { } # sparse Jacobian out1 <- r2sundials::r2cvodes(yini, times, pfnd, param=parms, fjac=pfnspj, nz=8, maxsteps=2000) +#browser() + test.sparse <- function() { checkEqualsNumeric(out0, out1, tolerance=1.e-6, msg="equivalence of solution with sparse Jacobian and internal cvodes Jacobian") } @@ -143,14 +146,14 @@ int d_ball(double t, const vec &y, vec &ydot, RObject ¶m, NumericVector &pse ydot[3] = y[1] > 0 ? -p["g"] : -y[3]; // falling till y=0 then damping return(CV_SUCCESS); } -', depends=c("RcppArmadillo","r2sundials","rmumps"), includes=includes, cacheDir="lib", verbose=FALSE) +', depends=c("RcppArmadillo","r2sundials","rmumps"), includes=includes, cacheDir="lib", verbose=VERBOSE) # pointer to root function proot <- cppXPtr(code=' int root_ball(double t, const vec &y, vec &vroot, RObject ¶m, NumericVector &psens) { vroot[0] = y[1]; // y==0 return(CV_SUCCESS); } -', depends=c("RcppArmadillo","r2sundials","rmumps"), includes=includes, cacheDir="lib", verbose=FALSE) +', depends=c("RcppArmadillo","r2sundials","rmumps"), includes=includes, cacheDir="lib", verbose=VERBOSE) # pointer to event handler function pevt <- cppXPtr(code=' int event_ball(double t, const vec &y, vec &ynew, int Ns, std::vector &ySv, const ivec &rootsfound, RObject ¶m, NumericVector &psens) { @@ -170,7 +173,7 @@ int event_ball(double t, const vec &y, vec &ynew, int Ns, std::vector &ySv, return(R2SUNDIALS_EVENT_STOP); } } -', depends=c("RcppArmadillo", "r2sundials", "rmumps"), includes=includes, cacheDir="lib", verbose=FALSE) +', depends=c("RcppArmadillo", "r2sundials", "rmumps"), includes=includes, cacheDir="lib", verbose=VERBOSE) outb <- r2sundials::r2cvodes(yinib, timesb, pball, paramb, nroot=1, froot=proot, fevent=pevt) test.root.cpp <- function() { checkEqualsNumeric(dim(attr(outb, "roots")), c(2, 5), msg="root finding") @@ -221,7 +224,7 @@ int d_exp(double t, const vec &y, vec &ydot, RObject ¶m, NumericVector &psen ydot[0] = -psens["nu"]*(y[0]-psens["lim"]); return(CV_SUCCESS); } -', depends=c("RcppArmadillo","r2sundials","rmumps"), includes=includes, cacheDir="lib", verbose=FALSE) +', depends=c("RcppArmadillo","r2sundials","rmumps"), includes=includes, cacheDir="lib", verbose=VERBOSE) par_exp <- c("nu"=1, "lim"=1) ti <- seq(0, 5, length.out=11) oute <- r2sundials::r2cvodes(0., ti, pexp, Ns=2, psens=par_exp) diff --git a/man/r2cvodes.Rd b/man/r2cvodes.Rd index 8bb8929..32579e3 100644 --- a/man/r2cvodes.Rd +++ b/man/r2cvodes.Rd @@ -198,7 +198,8 @@ int rhs_exp(double t, const vec &y, vec &ydot, RObject ¶m, NumericVector &ps return(CV_SUCCESS); } ', depends=c("RcppArmadillo","r2sundials","rmumps"), - includes="using namespace arma;\n#include ", cacheDir="lib", verbose=FALSE) + includes=c("// [[Rcpp::plugins(cpp14)]]", "using namespace arma;", "#include "), + cacheDir="lib", verbose=FALSE) # For ease of use in C++, we convert param to a numeric vector instead of a list. pv=c(a=p$a) # new call to r2cvodes() with XPtr pointer ptr_exp. @@ -230,7 +231,8 @@ int rhs_ball(double t, const vec &y, vec &ydot, RObject ¶m, NumericVector &p return(CV_SUCCESS); } ', depends=c("RcppArmadillo","r2sundials","rmumps"), - includes="using namespace arma;\n#include ", cacheDir="lib", verbose=FALSE) + includes=c("// [[Rcpp::plugins(cpp14)]]", "using namespace arma;", "#include "), + cacheDir="lib", verbose=FALSE) # root function ptr_ball_root=cppXPtr(code=' @@ -241,7 +243,8 @@ int root_ball(double t, const vec &y, vec &vroot, RObject ¶m, NumericVector return(0); } ', depends=c("RcppArmadillo","r2sundials","rmumps"), - includes="using namespace arma;\n#include ", cacheDir="lib", verbose=FALSE) + includes=c("// [[Rcpp::plugins(cpp14)]]", "using namespace arma;", "#include "), + cacheDir="lib", verbose=FALSE) # event handler function ptr_ball_event=cppXPtr(code=' @@ -267,7 +270,8 @@ int event_ball(double t, const vec &y, vec &ynew, int Ns, std::vector &ySv, } } ', depends=c("RcppArmadillo","r2sundials","rmumps"), - includes="using namespace arma;\n#include ", cacheDir="lib", verbose=FALSE) + includes=c("// [[Rcpp::plugins(cpp14)]]", "using namespace arma;", "#include "), + cacheDir="lib", verbose=FALSE) # ODE solving and plotting res_ball <- r2sundials::r2cvodes(yv, ti, ptr_ball, param=pv, nroot=2L, @@ -308,7 +312,8 @@ int rhs_rob(double t, const vec &y, vec &ydot, RObject ¶m, NumericVector &ps return(CV_SUCCESS); } ', depends=c("RcppArmadillo","r2sundials","rmumps"), - includes="using namespace arma;\n#include ", cacheDir="lib", verbose=FALSE) + includes=c("// [[Rcpp::plugins(cpp14)]]", "using namespace arma;", "#include "), + cacheDir="lib", verbose=FALSE) # pointer to sparse jacobian function ptr_rob_jacsp=cppXPtr(code=' int spjac_rob(double t, const vec &y, const vec &ydot, uvec &ir, uvec &pj, vec &v, int n, int nz, @@ -343,7 +348,8 @@ int spjac_rob(double t, const vec &y, const vec &ydot, uvec &ir, uvec &pj, vec & return(0); } ', depends=c("RcppArmadillo","r2sundials","rmumps"), - includes="using namespace arma;\n#include ", cacheDir="lib", verbose=FALSE) + includes=c("// [[Rcpp::plugins(cpp14)]]", "using namespace arma;", "#include "), + cacheDir="lib", verbose=FALSE) # pointer to sensitivity rhs function ptr_rob_sens1=cppXPtr(code=' int sens_rob1(int Ns, double t, const vec &y, const vec &ydot, int iS, const vec &yS, vec &ySdot, @@ -371,7 +377,8 @@ int sens_rob1(int Ns, double t, const vec &y, const vec &ydot, int iS, const vec return(CV_SUCCESS); } ', depends=c("RcppArmadillo","r2sundials","rmumps"), - includes="using namespace arma;\n#include ", cacheDir="lib", verbose=FALSE) + includes=c("// [[Rcpp::plugins(cpp14)]]", "using namespace arma;", "#include "), + cacheDir="lib", verbose=FALSE) # Note that we don't use psens param for sensitivity calculations as we provide our own fsens1. res_rob <- r2sundials::r2cvodes(yv, ti, ptr_rob, param=pv, nz=8, fjac=ptr_rob_jacsp, Ns=3, fsens1=ptr_rob_sens1) diff --git a/src/Makevars b/src/Makevars index 3502ce7..f2e183a 100644 --- a/src/Makevars +++ b/src/Makevars @@ -1,12 +1,13 @@ -CXX_STD = CXX11 -PKG_CPPFLAGS = -I../inst/include -DNO_FPRINTF_OUTPUT +#CXX_STD = CXX14 +#PKG_CFLAGS = -fno-common +PKG_CPPFLAGS = -I../inst/include -I ../inst/include/sundials -DNO_FPRINTF_OUTPUT # PKG_LIBS = -L../inst -lsundials $(LAPACK_LIBS) $(BLAS_LIBS) $(FLIBS) $(SHLIB): ../inst/libsundials.a # find lib -type f -name '*'.c | tr $'\n' ' ' -sunsrc = lib/cvodes/cvodea.c lib/cvodes/cvodes_spils.c lib/cvodes/cvodes_io.c lib/cvodes/cvodea_io.c lib/cvodes/cvodes_nls_sim.c lib/cvodes/cvodes.c lib/cvodes/cvodes_direct.c lib/cvodes/cvodes_nls_stg.c lib/cvodes/cvodes_nls_stg1.c lib/cvodes/cvodes_diag.c lib/cvodes/cvodes_ls.c lib/cvodes/cvodes_nls.c lib/nvector/serial/nvector_serial.c lib/nvector/serial/fnvector_serial.c lib/sunnonlinsol/newton/fsunnonlinsol_newton.c lib/sunnonlinsol/newton/sunnonlinsol_newton.c lib/sunlinsol/dense/sunlinsol_dense.c lib/sunlinsol/dense/fsunlinsol_dense.c lib/sundials/sundials_nonlinearsolver.c lib/sundials/sundials_matrix.c lib/sundials/sundials_nvector_senswrapper.c lib/sundials/sundials_direct.c lib/sundials/sundials_dense.c lib/sundials/sundials_nvector.c lib/sundials/sundials_band.c lib/sundials/sundials_math.c lib/sundials/sundials_version.c lib/sundials/sundials_linearsolver.c lib/sundials/sundials_iterative.c lib/sunmatrix/dense/fsunmatrix_dense.c lib/sunmatrix/dense/sunmatrix_dense.c lib/sunmatrix/band/fsunmatrix_band.c lib/sunmatrix/band/sunmatrix_band.c lib/sunmatrix/sparse/fsunmatrix_sparse.c lib/sunmatrix/sparse/sunmatrix_sparse.c +sunsrc = lib/cvodes/cvodes_proj.c lib/cvodes/cvodea.c lib/cvodes/cvodes_spils.c lib/cvodes/cvodes_io.c lib/cvodes/cvodea_io.c lib/cvodes/cvodes_bandpre.c lib/cvodes/cvodes_nls_sim.c lib/cvodes/cvodes.c lib/cvodes/cvodes_direct.c lib/cvodes/cvodes_bbdpre.c lib/cvodes/cvodes_nls_stg.c lib/cvodes/cvodes_nls_stg1.c lib/cvodes/cvodes_diag.c lib/cvodes/cvodes_ls.c lib/cvodes/cvodes_nls.c lib/nvector/serial/nvector_serial.c lib/sunnonlinsol/newton/sunnonlinsol_newton.c lib/sunnonlinsol/fixedpoint/sunnonlinsol_fixedpoint.c lib/sunlinsol/sptfqmr/sunlinsol_sptfqmr.c lib/sunlinsol/spgmr/sunlinsol_spgmr.c lib/sunlinsol/pcg/sunlinsol_pcg.c lib/sunlinsol/dense/sunlinsol_dense.c lib/sunlinsol/band/sunlinsol_band.c lib/sunlinsol/spbcgs/sunlinsol_spbcgs.c lib/sunlinsol/spfgmr/sunlinsol_spfgmr.c lib/sunlinsol/lapackdense/sunlinsol_lapackdense.c lib/sunlinsol/lapackband/sunlinsol_lapackband.c lib/sundials/sundials_futils.c lib/sundials/sundials_context.c lib/sundials/sundials_nonlinearsolver.c lib/sundials/sundials_matrix.c lib/sundials/sundials_nvector_senswrapper.c lib/sundials/sundials_logger.c lib/sundials/sundials_memory.c lib/sundials/sundials_direct.c lib/sundials/sundials_dense.c lib/sundials/sundials_profiler.c lib/sundials/sundials_nvector.c lib/sundials/sundials_band.c lib/sundials/sundials_math.c lib/sundials/sundials_version.c lib/sundials/sundials_linearsolver.c lib/sundials/sundials_iterative.c lib/sunmatrix/dense/sunmatrix_dense.c lib/sunmatrix/band/sunmatrix_band.c lib/sunmatrix/sparse/sunmatrix_sparse.c sunobj = $(sunsrc:.c=.o) ../inst/libsundials.a: $(sunobj) $(AR) -crvs ../inst/libsundials.a $(sunobj) diff --git a/src/RcppExports.cpp b/src/RcppExports.cpp index 0738dbc..448cc0b 100644 --- a/src/RcppExports.cpp +++ b/src/RcppExports.cpp @@ -7,6 +7,11 @@ using namespace Rcpp; +#ifdef RCPP_USE_GLOBAL_ROSTREAM +Rcpp::Rostream& Rcpp::Rcout = Rcpp::Rcpp_cout_get(); +Rcpp::Rostream& Rcpp::Rcerr = Rcpp::Rcpp_cerr_get(); +#endif + // r2cvodes NumericMatrix r2cvodes(const NumericVector& yv, const vec& times, const RObject& frhs, RObject param, const NumericVector tstop, const double abstol, const double reltol, IntegerVector integrator, const int maxord, const int maxsteps, const double hin, const double hmax, const double hmin, const vec& constraints, const RObject fjac, const int nz, IntegerVector rmumps_perm, const int nroot, const RObject froot, const RObject fevent, const int Ns, NumericVector psens, NumericVector sens_init, NumericVector psens_bar, const IntegerVector psens_list, const RObject fsens, const RObject fsens1, IntegerVector sens_method, const bool errconS); RcppExport SEXP _r2sundials_r2cvodes(SEXP yvSEXP, SEXP timesSEXP, SEXP frhsSEXP, SEXP paramSEXP, SEXP tstopSEXP, SEXP abstolSEXP, SEXP reltolSEXP, SEXP integratorSEXP, SEXP maxordSEXP, SEXP maxstepsSEXP, SEXP hinSEXP, SEXP hmaxSEXP, SEXP hminSEXP, SEXP constraintsSEXP, SEXP fjacSEXP, SEXP nzSEXP, SEXP rmumps_permSEXP, SEXP nrootSEXP, SEXP frootSEXP, SEXP feventSEXP, SEXP NsSEXP, SEXP psensSEXP, SEXP sens_initSEXP, SEXP psens_barSEXP, SEXP psens_listSEXP, SEXP fsensSEXP, SEXP fsens1SEXP, SEXP sens_methodSEXP, SEXP errconSSEXP) { diff --git a/src/lib/cvodes/LICENSE b/src/lib/cvodes/LICENSE index 2966cbb..d2280c3 100644 --- a/src/lib/cvodes/LICENSE +++ b/src/lib/cvodes/LICENSE @@ -1,6 +1,6 @@ BSD 3-Clause License -Copyright (c) 2002-2019, Lawrence Livermore National Security and Southern Methodist University. +Copyright (c) 2002-2022, Lawrence Livermore National Security and Southern Methodist University. All rights reserved. Redistribution and use in source and binary forms, with or without diff --git a/src/lib/cvodes/README.md b/src/lib/cvodes/README.md index 8e21be4..3e64b71 100644 --- a/src/lib/cvodes/README.md +++ b/src/lib/cvodes/README.md @@ -1,8 +1,11 @@ # CVODES -### Version 5.0.0 (Oct 2019) +### Version 6.5.0 (Dec 2022) + +**Alan C. Hindmarsh, Radu Serban, Cody J. Balos, David J. Gardner, + and Carol S. Woodward, Center for Applied Scientific Computing, LLNL** + +**Daniel R. Reynolds, Department of Mathematics, Southern Methodist University** -**Alan C. Hindmarsh and Radu Serban - Center for Applied Scientific Computing, LLNL** CVODES is a package for the solution of stiff and nonstiff ordinary differential equation (ODE) systems (initial value problem) given in explicit form @@ -24,13 +27,13 @@ nonlinear solver APIs used across SUNDIALS packages. ## Documentation -See the [CVODES User Guide](./doc/cvodes/cvs_guide.pdf) and -[CVODES Examples](./doc/cvodes/cvs_examples.pdf) document for more information +See the [CVODES User Guide](/doc/cvodes/cvs_guide.pdf) and +[CVODES Examples](/doc/cvodes/cvs_examples.pdf) document for more information about CVODES usage and the provided example programs respectively. ## Installation -For installation instructions see the [INSTALL_GUIDE](./INSTALL_GUIDE.pdf) +For installation instructions see the [INSTALL_GUIDE](/INSTALL_GUIDE.pdf) or the "Installation Procedure" chapter in the CVODES User Guide. ## Release History @@ -41,11 +44,12 @@ the "SUNDIALS Release History" appendix of the CVODES User Guide. ## References -* A. C. Hindmarsh and R. Serban, "User Documentation for CVODES v5.0.0," - LLNL technical report UCRL-SM-208111, Oct 2019. +* A. C. Hindmarsh, R. Serban, C. J. Balos, D. J. Gardner, D. R. Reynolds + and C. S. Woodward, "User Documentation for CVODES v6.5.0," + LLNL technical report UCRL-SM-208111, Dec 2022. -* A. C. Hindmarsh and R. Serban, "Example Programs for CVODES v5.0.0," - LLNL technical report UCRL-SM-208115, Oct 2019. +* A. C. Hindmarsh and R. Serban, "Example Programs for CVODES v6.5.0," + LLNL technical report UCRL-SM-208115, Dec 2022. * R. Serban and A. C. Hindmarsh, "CVODES: the Sensitivity-Enabled ODE solver in SUNDIALS," Proceedings of IDETC/CIE 2005, Sept. 2005, diff --git a/src/lib/cvodes/cvodea.c b/src/lib/cvodes/cvodea.c index 62a81fc..30e03e7 100644 --- a/src/lib/cvodes/cvodea.c +++ b/src/lib/cvodes/cvodea.c @@ -1,12 +1,9 @@ /* - * ----------------------------------------------------------------- - * $Revision$ - * $Date$ * ----------------------------------------------------------------- * Programmer(s): Radu Serban @ LLNL * ----------------------------------------------------------------- * SUNDIALS Copyright Start - * Copyright (c) 2002-2019, Lawrence Livermore National Security + * Copyright (c) 2002-2022, Lawrence Livermore National Security * and Southern Methodist University. * All rights reserved. * @@ -45,6 +42,12 @@ #define HUNDRED RCONST(100.0) /* real 100.0 */ #define FUZZ_FACTOR RCONST(1000000.0) /* fuzz factor for IMget */ +/*=================================================================*/ +/* Shortcuts */ +/*=================================================================*/ + +#define CV_PROFILER cv_mem->cv_sunctx->profiler + /* * ================================================================= * PRIVATE FUNCTION PROTOTYPES @@ -110,13 +113,17 @@ int CVodeAdjInit(void *cvode_mem, long int steps, int interp) } cv_mem = (CVodeMem)cvode_mem; + SUNDIALS_MARK_FUNCTION_BEGIN(CV_PROFILER); + if (steps <= 0) { cvProcessError(cv_mem, CV_ILL_INPUT, "CVODEA", "CVodeAdjInit", MSGCV_BAD_STEPS); + SUNDIALS_MARK_FUNCTION_END(CV_PROFILER); return(CV_ILL_INPUT); } if ( (interp != CV_HERMITE) && (interp != CV_POLYNOMIAL) ) { cvProcessError(cv_mem, CV_ILL_INPUT, "CVODEA", "CVodeAdjInit", MSGCV_BAD_INTERP); + SUNDIALS_MARK_FUNCTION_END(CV_PROFILER); return(CV_ILL_INPUT); } @@ -128,6 +135,7 @@ int CVodeAdjInit(void *cvode_mem, long int steps, int interp) ca_mem = (CVadjMem) malloc(sizeof(struct CVadjMemRec)); if (ca_mem == NULL) { cvProcessError(cv_mem, CV_MEM_FAIL, "CVODEA", "CVodeAdjInit", MSGCV_MEM_FAIL); + SUNDIALS_MARK_FUNCTION_END(CV_PROFILER); return(CV_MEM_FAIL); } @@ -170,6 +178,7 @@ int CVodeAdjInit(void *cvode_mem, long int steps, int interp) if (ca_mem->dt_mem == NULL) { free(ca_mem); ca_mem = NULL; cvProcessError(cv_mem, CV_MEM_FAIL, "CVODEA", "CVodeAdjInit", MSGCV_MEM_FAIL); + SUNDIALS_MARK_FUNCTION_END(CV_PROFILER); return(CV_MEM_FAIL); } @@ -181,6 +190,7 @@ int CVodeAdjInit(void *cvode_mem, long int steps, int interp) free(ca_mem->dt_mem); ca_mem->dt_mem = NULL; free(ca_mem); ca_mem = NULL; cvProcessError(cv_mem, CV_MEM_FAIL, "CVODEA", "CVodeAdjInit", MSGCV_MEM_FAIL); + SUNDIALS_MARK_FUNCTION_END(CV_PROFILER); return(CV_MEM_FAIL); } } @@ -248,6 +258,7 @@ int CVodeAdjInit(void *cvode_mem, long int steps, int interp) cv_mem->cv_adj = SUNTRUE; cv_mem->cv_adjMallocDone = SUNTRUE; + SUNDIALS_MARK_FUNCTION_END(CV_PROFILER); return(CV_SUCCESS); } @@ -278,9 +289,12 @@ int CVodeAdjReInit(void *cvode_mem) } cv_mem = (CVodeMem) cvode_mem; + SUNDIALS_MARK_FUNCTION_BEGIN(CV_PROFILER); + /* Was ASA initialized? */ if (cv_mem->cv_adjMallocDone == SUNFALSE) { cvProcessError(cv_mem, CV_NO_ADJ, "CVODEA", "CVodeAdjReInit", MSGCV_NO_ADJ); + SUNDIALS_MARK_FUNCTION_END(CV_PROFILER); return(CV_NO_ADJ); } @@ -302,6 +316,7 @@ int CVodeAdjReInit(void *cvode_mem) ca_mem->ca_tstopCVodeFcall = SUNFALSE; ca_mem->ca_firstCVodeBcall = SUNTRUE; + SUNDIALS_MARK_FUNCTION_END(CV_PROFILER); return(CV_SUCCESS); } @@ -367,6 +382,7 @@ int CVodeF(void *cvode_mem, realtype tout, N_Vector yout, CVodeMem cv_mem; CkpntMem tmp; DtpntMem *dt_mem; + long int nstloc; int flag, i; booleantype allocOK, earlyret; realtype ttest; @@ -378,9 +394,12 @@ int CVodeF(void *cvode_mem, realtype tout, N_Vector yout, } cv_mem = (CVodeMem) cvode_mem; + SUNDIALS_MARK_FUNCTION_BEGIN(CV_PROFILER); + /* Was ASA initialized? */ if (cv_mem->cv_adjMallocDone == SUNFALSE) { cvProcessError(cv_mem, CV_NO_ADJ, "CVODEA", "CVodeF", MSGCV_NO_ADJ); + SUNDIALS_MARK_FUNCTION_END(CV_PROFILER); return(CV_NO_ADJ); } @@ -389,18 +408,21 @@ int CVodeF(void *cvode_mem, realtype tout, N_Vector yout, /* Check for yout != NULL */ if (yout == NULL) { cvProcessError(cv_mem, CV_ILL_INPUT, "CVODEA", "CVodeF", MSGCV_YOUT_NULL); + SUNDIALS_MARK_FUNCTION_END(CV_PROFILER); return(CV_ILL_INPUT); } /* Check for tret != NULL */ if (tret == NULL) { cvProcessError(cv_mem, CV_ILL_INPUT, "CVODEA", "CVodeF", MSGCV_TRET_NULL); + SUNDIALS_MARK_FUNCTION_END(CV_PROFILER); return(CV_ILL_INPUT); } /* Check for valid itask */ if ( (itask != CV_NORMAL) && (itask != CV_ONE_STEP) ) { cvProcessError(cv_mem, CV_ILL_INPUT, "CVODEA", "CVodeF", MSGCV_BAD_ITASK); + SUNDIALS_MARK_FUNCTION_END(CV_PROFILER); return(CV_ILL_INPUT); } @@ -428,6 +450,7 @@ int CVodeF(void *cvode_mem, realtype tout, N_Vector yout, ca_mem->ck_mem = CVAckpntInit(cv_mem); if (ca_mem->ck_mem == NULL) { cvProcessError(cv_mem, CV_MEM_FAIL, "CVODEA", "CVodeF", MSGCV_MEM_FAIL); + SUNDIALS_MARK_FUNCTION_END(CV_PROFILER); return(CV_MEM_FAIL); } @@ -440,6 +463,7 @@ int CVodeF(void *cvode_mem, realtype tout, N_Vector yout, allocOK = ca_mem->ca_IMmalloc(cv_mem); if (!allocOK) { cvProcessError(cv_mem, CV_MEM_FAIL, "CVODEA", "CVodeF", MSGCV_MEM_FAIL); + SUNDIALS_MARK_FUNCTION_END(CV_PROFILER); return(CV_MEM_FAIL); } @@ -491,19 +515,32 @@ int CVodeF(void *cvode_mem, realtype tout, N_Vector yout, ca_mem->ca_IMnewData = SUNTRUE; ca_mem->ca_ckpntData = ca_mem->ck_mem; ca_mem->ca_np = cv_mem->cv_nst % ca_mem->ca_nsteps + 1; + SUNDIALS_MARK_FUNCTION_END(CV_PROFILER); return(flag); } } /* Integrate to tout (in CV_ONE_STEP mode) while loading check points */ + nstloc = 0; for(;;) { + /* Check for too many steps */ + + if ( (cv_mem->cv_mxstep>0) && (nstloc >= cv_mem->cv_mxstep) ) { + cvProcessError(cv_mem, CV_TOO_MUCH_WORK, "CVODEA", "CVodeF", + MSGCV_MAX_STEPS, cv_mem->cv_tn); + flag = CV_TOO_MUCH_WORK; + break; + } + /* Perform one step of the integration */ flag = CVode(cv_mem, tout, yout, tret, CV_ONE_STEP); if (flag < 0) break; + nstloc++; + /* Test if a new check point is needed */ if ( cv_mem->cv_nst % ca_mem->ca_nsteps == 0 ) { @@ -580,6 +617,7 @@ int CVodeF(void *cvode_mem, realtype tout, N_Vector yout, ca_mem->ca_ckpntData = ca_mem->ck_mem; ca_mem->ca_np = cv_mem->cv_nst % ca_mem->ca_nsteps + 1; + SUNDIALS_MARK_FUNCTION_END(CV_PROFILER); return(flag); } @@ -624,7 +662,7 @@ int CVodeCreateB(void *cvode_mem, int lmmB, int *which) /* Create and set a new CVODES object for the backward problem */ - cvodeB_mem = CVodeCreate(lmmB); + cvodeB_mem = CVodeCreate(lmmB, cv_mem->cv_sunctx); if (cvodeB_mem == NULL) { cvProcessError(cv_mem, CV_MEM_FAIL, "CVODEA", "CVodeCreateB", MSGCV_MEM_FAIL); return(CV_MEM_FAIL); @@ -695,10 +733,13 @@ int CVodeInitB(void *cvode_mem, int which, } cv_mem = (CVodeMem) cvode_mem; + SUNDIALS_MARK_FUNCTION_BEGIN(CV_PROFILER); + /* Was ASA initialized? */ if (cv_mem->cv_adjMallocDone == SUNFALSE) { cvProcessError(cv_mem, CV_NO_ADJ, "CVODEA", "CVodeInitB", MSGCV_NO_ADJ); + SUNDIALS_MARK_FUNCTION_END(CV_PROFILER); return(CV_NO_ADJ); } ca_mem = cv_mem->cv_adj_mem; @@ -707,6 +748,7 @@ int CVodeInitB(void *cvode_mem, int which, if ( which >= ca_mem->ca_nbckpbs ) { cvProcessError(cv_mem, CV_ILL_INPUT, "CVODEA", "CVodeInitB", MSGCV_BAD_WHICH); + SUNDIALS_MARK_FUNCTION_END(CV_PROFILER); return(CV_ILL_INPUT); } @@ -724,7 +766,10 @@ int CVodeInitB(void *cvode_mem, int which, flag = CVodeInit(cvodeB_mem, CVArhs, tB0, yB0); - if (flag != CV_SUCCESS) return(flag); + if (flag != CV_SUCCESS) { + SUNDIALS_MARK_FUNCTION_END(CV_PROFILER); + return(flag); + } /* Copy fB function in cvB_mem */ @@ -737,6 +782,7 @@ int CVodeInitB(void *cvode_mem, int which, cvB_mem->cv_y = N_VClone(yB0); N_VScale(ONE, yB0, cvB_mem->cv_y); + SUNDIALS_MARK_FUNCTION_END(CV_PROFILER); return(CV_SUCCESS); } @@ -758,10 +804,13 @@ int CVodeInitBS(void *cvode_mem, int which, } cv_mem = (CVodeMem) cvode_mem; + SUNDIALS_MARK_FUNCTION_BEGIN(CV_PROFILER); + /* Was ASA initialized? */ if (cv_mem->cv_adjMallocDone == SUNFALSE) { cvProcessError(cv_mem, CV_NO_ADJ, "CVODEA", "CVodeInitBS", MSGCV_NO_ADJ); + SUNDIALS_MARK_FUNCTION_END(CV_PROFILER); return(CV_NO_ADJ); } ca_mem = cv_mem->cv_adj_mem; @@ -770,6 +819,7 @@ int CVodeInitBS(void *cvode_mem, int which, if ( which >= ca_mem->ca_nbckpbs ) { cvProcessError(cv_mem, CV_ILL_INPUT, "CVODEA", "CVodeInitBS", MSGCV_BAD_WHICH); + SUNDIALS_MARK_FUNCTION_END(CV_PROFILER); return(CV_ILL_INPUT); } @@ -787,7 +837,10 @@ int CVodeInitBS(void *cvode_mem, int which, flag = CVodeInit(cvodeB_mem, CVArhs, tB0, yB0); - if (flag != CV_SUCCESS) return(flag); + if (flag != CV_SUCCESS) { + SUNDIALS_MARK_FUNCTION_END(CV_PROFILER); + return(flag); + } /* Copy fBs function in cvB_mem */ @@ -800,6 +853,7 @@ int CVodeInitBS(void *cvode_mem, int which, cvB_mem->cv_y = N_VClone(yB0); N_VScale(ONE, yB0, cvB_mem->cv_y); + SUNDIALS_MARK_FUNCTION_END(CV_PROFILER); return(CV_SUCCESS); } @@ -820,9 +874,12 @@ int CVodeReInitB(void *cvode_mem, int which, } cv_mem = (CVodeMem) cvode_mem; + SUNDIALS_MARK_FUNCTION_BEGIN(CV_PROFILER); + /* Was ASA initialized? */ if (cv_mem->cv_adjMallocDone == SUNFALSE) { cvProcessError(cv_mem, CV_NO_ADJ, "CVODEA", "CVodeReInitB", MSGCV_NO_ADJ); + SUNDIALS_MARK_FUNCTION_END(CV_PROFILER); return(CV_NO_ADJ); } ca_mem = cv_mem->cv_adj_mem; @@ -830,6 +887,7 @@ int CVodeReInitB(void *cvode_mem, int which, /* Check the value of which */ if ( which >= ca_mem->ca_nbckpbs ) { cvProcessError(cv_mem, CV_ILL_INPUT, "CVODEA", "CVodeReInitB", MSGCV_BAD_WHICH); + SUNDIALS_MARK_FUNCTION_END(CV_PROFILER); return(CV_ILL_INPUT); } @@ -846,6 +904,7 @@ int CVodeReInitB(void *cvode_mem, int which, flag = CVodeReInit(cvodeB_mem, tB0, yB0); + SUNDIALS_MARK_FUNCTION_END(CV_PROFILER); return(flag); } @@ -964,9 +1023,12 @@ int CVodeQuadInitB(void *cvode_mem, int which, } cv_mem = (CVodeMem) cvode_mem; + SUNDIALS_MARK_FUNCTION_BEGIN(CV_PROFILER); + /* Was ASA initialized? */ if (cv_mem->cv_adjMallocDone == SUNFALSE) { cvProcessError(cv_mem, CV_NO_ADJ, "CVODEA", "CVodeQuadInitB", MSGCV_NO_ADJ); + SUNDIALS_MARK_FUNCTION_END(CV_PROFILER); return(CV_NO_ADJ); } ca_mem = cv_mem->cv_adj_mem; @@ -974,6 +1036,7 @@ int CVodeQuadInitB(void *cvode_mem, int which, /* Check which */ if ( which >= ca_mem->ca_nbckpbs ) { cvProcessError(cv_mem, CV_ILL_INPUT, "CVODEA", "CVodeQuadInitB", MSGCV_BAD_WHICH); + SUNDIALS_MARK_FUNCTION_END(CV_PROFILER); return(CV_ILL_INPUT); } @@ -987,11 +1050,15 @@ int CVodeQuadInitB(void *cvode_mem, int which, cvodeB_mem = (void *) (cvB_mem->cv_mem); flag = CVodeQuadInit(cvodeB_mem, CVArhsQ, yQB0); - if (flag != CV_SUCCESS) return(flag); + if (flag != CV_SUCCESS) { + SUNDIALS_MARK_FUNCTION_END(CV_PROFILER); + return(flag); + } cvB_mem->cv_fQ_withSensi = SUNFALSE; cvB_mem->cv_fQ = fQB; + SUNDIALS_MARK_FUNCTION_END(CV_PROFILER); return(CV_SUCCESS); } @@ -1011,9 +1078,12 @@ int CVodeQuadInitBS(void *cvode_mem, int which, } cv_mem = (CVodeMem) cvode_mem; + SUNDIALS_MARK_FUNCTION_BEGIN(CV_PROFILER); + /* Was ASA initialized? */ if (cv_mem->cv_adjMallocDone == SUNFALSE) { cvProcessError(cv_mem, CV_NO_ADJ, "CVODEA", "CVodeQuadInitBS", MSGCV_NO_ADJ); + SUNDIALS_MARK_FUNCTION_END(CV_PROFILER); return(CV_NO_ADJ); } ca_mem = cv_mem->cv_adj_mem; @@ -1021,6 +1091,7 @@ int CVodeQuadInitBS(void *cvode_mem, int which, /* Check which */ if ( which >= ca_mem->ca_nbckpbs ) { cvProcessError(cv_mem, CV_ILL_INPUT, "CVODEA", "CVodeQuadInitBS", MSGCV_BAD_WHICH); + SUNDIALS_MARK_FUNCTION_END(CV_PROFILER); return(CV_ILL_INPUT); } @@ -1034,11 +1105,15 @@ int CVodeQuadInitBS(void *cvode_mem, int which, cvodeB_mem = (void *) (cvB_mem->cv_mem); flag = CVodeQuadInit(cvodeB_mem, CVArhsQ, yQB0); - if (flag != CV_SUCCESS) return(flag); + if (flag != CV_SUCCESS) { + SUNDIALS_MARK_FUNCTION_END(CV_PROFILER); + return(flag); + } cvB_mem->cv_fQ_withSensi = SUNTRUE; cvB_mem->cv_fQs = fQBs; + SUNDIALS_MARK_FUNCTION_END(CV_PROFILER); return(CV_SUCCESS); } @@ -1057,9 +1132,12 @@ int CVodeQuadReInitB(void *cvode_mem, int which, N_Vector yQB0) } cv_mem = (CVodeMem) cvode_mem; + SUNDIALS_MARK_FUNCTION_BEGIN(CV_PROFILER); + /* Was ASA initialized? */ if (cv_mem->cv_adjMallocDone == SUNFALSE) { cvProcessError(cv_mem, CV_NO_ADJ, "CVODEA", "CVodeQuadReInitB", MSGCV_NO_ADJ); + SUNDIALS_MARK_FUNCTION_END(CV_PROFILER); return(CV_NO_ADJ); } ca_mem = cv_mem->cv_adj_mem; @@ -1067,6 +1145,7 @@ int CVodeQuadReInitB(void *cvode_mem, int which, N_Vector yQB0) /* Check the value of which */ if ( which >= ca_mem->ca_nbckpbs ) { cvProcessError(cv_mem, CV_ILL_INPUT, "CVODEA", "CVodeQuadReInitB", MSGCV_BAD_WHICH); + SUNDIALS_MARK_FUNCTION_END(CV_PROFILER); return(CV_ILL_INPUT); } @@ -1080,8 +1159,12 @@ int CVodeQuadReInitB(void *cvode_mem, int which, N_Vector yQB0) cvodeB_mem = (void *) (cvB_mem->cv_mem); flag = CVodeQuadReInit(cvodeB_mem, yQB0); - if (flag != CV_SUCCESS) return(flag); + if (flag != CV_SUCCESS) { + SUNDIALS_MARK_FUNCTION_END(CV_PROFILER); + return(flag); + } + SUNDIALS_MARK_FUNCTION_END(CV_PROFILER); return(CV_SUCCESS); } @@ -1100,9 +1183,12 @@ int CVodeQuadSStolerancesB(void *cvode_mem, int which, realtype reltolQB, realty } cv_mem = (CVodeMem) cvode_mem; + SUNDIALS_MARK_FUNCTION_BEGIN(CV_PROFILER); + /* Was ASA initialized? */ if (cv_mem->cv_adjMallocDone == SUNFALSE) { cvProcessError(cv_mem, CV_NO_ADJ, "CVODEA", "CVodeQuadSStolerancesB", MSGCV_NO_ADJ); + SUNDIALS_MARK_FUNCTION_END(CV_PROFILER); return(CV_NO_ADJ); } ca_mem = cv_mem->cv_adj_mem; @@ -1110,6 +1196,7 @@ int CVodeQuadSStolerancesB(void *cvode_mem, int which, realtype reltolQB, realty /* Check which */ if ( which >= ca_mem->ca_nbckpbs ) { cvProcessError(cv_mem, CV_ILL_INPUT, "CVODEA", "CVodeQuadSStolerancesB", MSGCV_BAD_WHICH); + SUNDIALS_MARK_FUNCTION_END(CV_PROFILER); return(CV_ILL_INPUT); } @@ -1124,6 +1211,7 @@ int CVodeQuadSStolerancesB(void *cvode_mem, int which, realtype reltolQB, realty flag = CVodeQuadSStolerances(cvodeB_mem, reltolQB, abstolQB); + SUNDIALS_MARK_FUNCTION_END(CV_PROFILER); return(flag); } @@ -1206,10 +1294,13 @@ int CVodeB(void *cvode_mem, realtype tBout, int itaskB) } cv_mem = (CVodeMem) cvode_mem; + SUNDIALS_MARK_FUNCTION_BEGIN(CV_PROFILER); + /* Was ASA initialized? */ if (cv_mem->cv_adjMallocDone == SUNFALSE) { cvProcessError(cv_mem, CV_NO_ADJ, "CVODEA", "CVodeB", MSGCV_NO_ADJ); + SUNDIALS_MARK_FUNCTION_END(CV_PROFILER); return(CV_NO_ADJ); } ca_mem = cv_mem->cv_adj_mem; @@ -1218,6 +1309,7 @@ int CVodeB(void *cvode_mem, realtype tBout, int itaskB) if ( ca_mem->ca_nbckpbs == 0 ) { cvProcessError(cv_mem, CV_NO_BCK, "CVODEA", "CVodeB", MSGCV_NO_BCK); + SUNDIALS_MARK_FUNCTION_END(CV_PROFILER); return(CV_NO_BCK); } cvB_mem = ca_mem->cvB_mem; @@ -1226,6 +1318,7 @@ int CVodeB(void *cvode_mem, realtype tBout, int itaskB) if ( ca_mem->ca_firstCVodeFcall ) { cvProcessError(cv_mem, CV_NO_FWD, "CVODEA", "CVodeB", MSGCV_NO_FWD); + SUNDIALS_MARK_FUNCTION_END(CV_PROFILER); return(CV_NO_FWD); } sign = (ca_mem->ca_tfinal - ca_mem->ca_tinitial > ZERO) ? 1 : -1; @@ -1247,12 +1340,14 @@ int CVodeB(void *cvode_mem, realtype tBout, int itaskB) if ( (sign*(tBn-ca_mem->ca_tinitial) < ZERO) || (sign*(ca_mem->ca_tfinal-tBn) < ZERO) ) { cvProcessError(cv_mem, CV_BAD_TB0, "CVODEA", "CVodeB", MSGCV_BAD_TB0, tmp_cvB_mem->cv_index); + SUNDIALS_MARK_FUNCTION_END(CV_PROFILER); return(CV_BAD_TB0); } if (sign*(tBn-tBout) <= ZERO) { cvProcessError(cv_mem, CV_ILL_INPUT, "CVODEA", "CVodeB", MSGCV_BAD_TBOUT, tmp_cvB_mem->cv_index); + SUNDIALS_MARK_FUNCTION_END(CV_PROFILER); return(CV_ILL_INPUT); } @@ -1265,6 +1360,7 @@ int CVodeB(void *cvode_mem, realtype tBout, int itaskB) if ( ca_mem->ca_IMinterpSensi && !ca_mem->ca_IMstoreSensi) { cvProcessError(cv_mem, CV_ILL_INPUT, "CVODEA", "CVodeB", MSGCV_BAD_SENSI); + SUNDIALS_MARK_FUNCTION_END(CV_PROFILER); return(CV_ILL_INPUT); } @@ -1275,6 +1371,7 @@ int CVodeB(void *cvode_mem, realtype tBout, int itaskB) if ( (itaskB != CV_NORMAL) && (itaskB != CV_ONE_STEP) ) { cvProcessError(cv_mem, CV_ILL_INPUT, "CVODEA", "CVodeB", MSGCV_BAD_ITASKB); + SUNDIALS_MARK_FUNCTION_END(CV_PROFILER); return(CV_ILL_INPUT); } @@ -1286,6 +1383,7 @@ int CVodeB(void *cvode_mem, realtype tBout, int itaskB) tBout = ca_mem->ca_tinitial; } else { cvProcessError(cv_mem, CV_ILL_INPUT, "CVODEA", "CVodeB", MSGCV_BAD_TBOUT); + SUNDIALS_MARK_FUNCTION_END(CV_PROFILER); return(CV_ILL_INPUT); } } @@ -1385,6 +1483,7 @@ int CVodeB(void *cvode_mem, realtype tBout, int itaskB) if (flag <0) { cvProcessError(cv_mem, flag, "CVODEA", "CVodeB", MSGCV_BACK_ERROR, tmp_cvB_mem->cv_index); + SUNDIALS_MARK_FUNCTION_END(CV_PROFILER); return(flag); } @@ -1413,6 +1512,7 @@ int CVodeB(void *cvode_mem, realtype tBout, int itaskB) } + SUNDIALS_MARK_FUNCTION_END(CV_PROFILER); return(flag); } diff --git a/src/lib/cvodes/cvodea_io.c b/src/lib/cvodes/cvodea_io.c index 167b2eb..ebd6142 100644 --- a/src/lib/cvodes/cvodea_io.c +++ b/src/lib/cvodes/cvodea_io.c @@ -6,7 +6,7 @@ * Programmer: Radu Serban @ LLNL * ----------------------------------------------------------------- * SUNDIALS Copyright Start - * Copyright (c) 2002-2019, Lawrence Livermore National Security + * Copyright (c) 2002-2022, Lawrence Livermore National Security * and Southern Methodist University. * All rights reserved. * diff --git a/src/lib/cvodes/cvodes.c b/src/lib/cvodes/cvodes.c index 7ead2b6..d14c444 100644 --- a/src/lib/cvodes/cvodes.c +++ b/src/lib/cvodes/cvodes.c @@ -1,9 +1,8 @@ -/* - * ----------------------------------------------------------------- +/* ----------------------------------------------------------------- * Programmer(s): Alan C. Hindmarsh and Radu Serban @ LLNL * ----------------------------------------------------------------- * SUNDIALS Copyright Start - * Copyright (c) 2002-2019, Lawrence Livermore National Security + * Copyright (c) 2002-2022, Lawrence Livermore National Security * and Southern Methodist University. * All rights reserved. * @@ -182,11 +181,9 @@ * * -----------------------------------------------------------------*/ -/* - * ================================================================= - * IMPORTED HEADER FILES - * ================================================================= - */ +/*=================================================================*/ +/* Import Header Files */ +/*=================================================================*/ #include #include @@ -194,110 +191,52 @@ #include #include "cvodes_impl.h" -#include #include #include -/* - * ================================================================= - * CVODES PRIVATE CONSTANTS - * ================================================================= - */ - -#define ZERO RCONST(0.0) -#define TINY RCONST(1.0e-10) -#define PT1 RCONST(0.1) -#define POINT2 RCONST(0.2) -#define FOURTH RCONST(0.25) -#define HALF RCONST(0.5) -#define PT9 RCONST(0.9) -#define ONE RCONST(1.0) -#define ONEPT5 RCONST(1.5) -#define TWO RCONST(2.0) -#define THREE RCONST(3.0) -#define FOUR RCONST(4.0) -#define FIVE RCONST(5.0) -#define TWELVE RCONST(12.0) -#define HUNDRED RCONST(100.0) - -/* - * ================================================================= - * CVODES ROUTINE-SPECIFIC CONSTANTS - * ================================================================= - */ - -/* - * Control constants for lower-level functions used by cvStep - * ---------------------------------------------------------- - * - * cvHin return values: - * CV_SUCCESS, - * CV_RHSFUNC_FAIL, CV_RPTD_RHSFUNC_ERR, - * CV_QRHSFUNC_FAIL, CV_RPTD_QRHSFUNC_ERR, - * CV_SRHSFUNC_FAIL, CV_RPTD_SRHSFUNC_ERR, - * CV_TOO_CLOSE - * - * cvStep control constants: - * DO_ERROR_TEST - * PREDICT_AGAIN - * - * cvStep return values: - * CV_SUCCESS, - * CV_CONV_FAILURE, CV_ERR_FAILURE, - * CV_LSETUP_FAIL, CV_LSOLVE_FAIL, - * CV_RTFUNC_FAIL, - * CV_RHSFUNC_FAIL, CV_QRHSFUNC_FAIL, CV_SRHSFUNC_FAIL, CV_QSRHSFUNC_FAIL, - * CV_FIRST_RHSFUNC_ERR, CV_FIRST_QRHSFUNC_ERR, CV_FIRST_SRHSFUNC_ERR, CV_FIRST_QSRHSFUNC_ERR, - * CV_UNREC_RHSFUNC_ERR, CV_UNREC_QRHSFUNC_ERR, CV_UNREC_SRHSFUNC_ERR, CV_UNREC_QSRHSFUNC_ERR, - * CV_REPTD_RHSFUNC_ERR, CV_REPTD_QRHSFUNC_ERR, CV_REPTD_SRHSFUNC_ERR, CV_REPTD_QSRHSFUNC_ERR, - * - * cvNls input nflag values: - * FIRST_CALL - * PREV_CONV_FAIL - * PREV_ERR_FAIL - * - * cvNls return values: - * CV_SUCCESS, - * CV_LSETUP_FAIL, CV_LSOLVE_FAIL, - * CV_RHSFUNC_FAIL, CV_SRHSFUNC_FAIL, - * SUN_NLS_CONV_RECVR, - * RHSFUNC_RECVR, SRHSFUNC_RECVR - * - */ - -#define DO_ERROR_TEST +2 -#define PREDICT_AGAIN +3 - -#define CONV_FAIL +4 -#define TRY_AGAIN +5 -#define FIRST_CALL +6 -#define PREV_CONV_FAIL +7 -#define PREV_ERR_FAIL +8 - -#define CONSTR_RECVR +10 - -#define QRHSFUNC_RECVR +11 -#define QSRHSFUNC_RECVR +13 +/*=================================================================*/ +/* CVODE Private Constants */ +/*=================================================================*/ + +#define ZERO RCONST(0.0) /* real 0.0 */ +#define TINY RCONST(1.0e-10) /* small number */ +#define PT1 RCONST(0.1) /* real 0.1 */ +#define POINT2 RCONST(0.2) /* real 0.2 */ +#define FOURTH RCONST(0.25) /* real 0.25 */ +#define HALF RCONST(0.5) /* real 0.5 */ +#define PT9 RCONST(0.9) /* real 0.9 */ +#define ONE RCONST(1.0) /* real 1.0 */ +#define ONEPT5 RCONST(1.50) /* real 1.5 */ +#define TWO RCONST(2.0) /* real 2.0 */ +#define THREE RCONST(3.0) /* real 3.0 */ +#define FOUR RCONST(4.0) /* real 4.0 */ +#define FIVE RCONST(5.0) /* real 5.0 */ +#define TWELVE RCONST(12.0) /* real 12.0 */ +#define HUNDRED RCONST(100.0) /* real 100.0 */ + +/*=================================================================*/ +/* CVODE Routine-Specific Constants */ +/*=================================================================*/ /* * Control constants for lower-level rootfinding functions * ------------------------------------------------------- * * cvRcheck1 return values: - * CV_SUCCESS, - * CV_RTFUNC_FAIL, + * CV_SUCCESS + * CV_RTFUNC_FAIL * cvRcheck2 return values: - * CV_SUCCESS, - * CV_RTFUNC_FAIL, - * CLOSERT, + * CV_SUCCESS + * CV_RTFUNC_FAIL + * CLOSERT * RTFOUND * cvRcheck3 return values: - * CV_SUCCESS, - * CV_RTFUNC_FAIL, + * CV_SUCCESS + * CV_RTFUNC_FAIL * RTFOUND * cvRootfind return values: - * CV_SUCCESS, - * CV_RTFUNC_FAIL, + * CV_SUCCESS + * CV_RTFUNC_FAIL * RTFOUND */ @@ -352,38 +291,8 @@ * * CORTES constant in nonlinear iteration convergence test * - * cvStep - * - * THRESH if eta < THRESH reject a change in step size or order - * ETAMX1 -+ - * ETAMX2 | - * ETAMX3 |-> bounds for eta (step size change) - * ETAMXF | - * ETAMIN | - * ETACF -+ - * ADDON safety factor in computing eta - * BIAS1 -+ - * BIAS2 |-> bias factors in eta selection - * BIAS3 -+ - * ONEPSM (1+epsilon) used in testing if the step size is below its bound - * - * SMALL_NST nst > SMALL_NST => use ETAMX3 - * MXNCF max no. of convergence failures during one step try - * MXNEF max no. of error test failures during one step try - * MXNEF1 max no. of error test failures before forcing a reduction of order - * SMALL_NEF if an error failure occurs and SMALL_NEF <= nef <= MXNEF1, then - * reset eta = SUNMIN(eta, ETAMXF) - * LONG_WAIT number of steps to wait before considering an order change when - * q==1 and MXNEF1 error test failures have occurred - * - * cvNls - * - * DGMAX |gamma/gammap-1| > DGMAX => call lsetup - * MSBP max no. of steps between lsetup calls - * */ - #define FUZZ_FACTOR RCONST(100.0) #define HLB_FACTOR RCONST(100.0) @@ -393,37 +302,16 @@ #define CORTES RCONST(0.1) -#define THRESH RCONST(1.5) -#define ETAMX1 RCONST(10000.0) -#define ETAMX2 RCONST(10.0) -#define ETAMX3 RCONST(10.0) -#define ETAMXF RCONST(0.2) -#define ETAMIN RCONST(0.1) -#define ETACF RCONST(0.25) -#define ADDON RCONST(0.000001) -#define BIAS1 RCONST(6.0) -#define BIAS2 RCONST(6.0) -#define BIAS3 RCONST(10.0) -#define ONEPSM RCONST(1.000001) - -#define SMALL_NST 10 -#define MXNCF 10 -#define MXNEF 7 -#define MXNEF1 3 -#define SMALL_NEF 2 -#define LONG_WAIT 10 - -#define DGMAX RCONST(0.3) -#define MSBP 20 - -/* - * ================================================================= - * PRIVATE FUNCTION PROTOTYPES - * ================================================================= - */ +/*=================================================================*/ +/* Private Helper Functions Prototypes */ +/*=================================================================*/ static booleantype cvCheckNvector(N_Vector tmpl); +/* Initial setup */ + +static int cvInitialSetup(CVodeMem cv_mem); + /* Memory allocation/deallocation */ static booleantype cvAllocVectors(CVodeMem cv_mem, N_Vector tmpl); @@ -438,15 +326,6 @@ static void cvSensFreeVectors(CVodeMem cv_mem); static booleantype cvQuadSensAllocVectors(CVodeMem cv_mem, N_Vector tmpl); static void cvQuadSensFreeVectors(CVodeMem cv_mem); -/* Initial stepsize calculation */ - -static int cvHin(CVodeMem cv_mem, realtype tout); -static realtype cvUpperBoundH0(CVodeMem cv_mem, realtype tdist); -static int cvYddNorm(CVodeMem cv_mem, realtype hg, realtype *yddnrm); - -/* Initial setup */ - -static int cvInitialSetup(CVodeMem cv_mem); static int cvEwtSetSS(CVodeMem cv_mem, N_Vector ycur, N_Vector weight); static int cvEwtSetSV(CVodeMem cv_mem, N_Vector ycur, N_Vector weight); @@ -465,6 +344,13 @@ static int cvQuadSensEwtSetEE(CVodeMem cv_mem, N_Vector *yQScur, N_Vector *weigh static int cvQuadSensEwtSetSS(CVodeMem cv_mem, N_Vector *yQScur, N_Vector *weightQS); static int cvQuadSensEwtSetSV(CVodeMem cv_mem, N_Vector *yQScur, N_Vector *weightQS); + +/* Initial stepsize calculation */ + +static int cvHin(CVodeMem cv_mem, realtype tout); +static realtype cvUpperBoundH0(CVodeMem cv_mem, realtype tdist); +static int cvYddNorm(CVodeMem cv_mem, realtype hg, realtype *yddnrm); + /* Main cvStep function */ static int cvStep(CVodeMem cv_mem); @@ -477,7 +363,6 @@ static void cvAdjustAdams(CVodeMem cv_mem, int deltaq); static void cvAdjustBDF(CVodeMem cv_mem, int deltaq); static void cvIncreaseBDF(CVodeMem cv_mem); static void cvDecreaseBDF(CVodeMem cv_mem); -static void cvRescale(CVodeMem cv_mem); static void cvPredict(CVodeMem cv_mem); static void cvSet(CVodeMem cv_mem); static void cvSetAdams(CVodeMem cv_mem); @@ -501,8 +386,6 @@ static int cvCheckConstraints(CVodeMem cv_mem); static int cvHandleNFlag(CVodeMem cv_mem, int *nflagPtr, realtype saved_t, int *ncfPtr, long int *ncfnPtr); -static void cvRestore(CVodeMem cv_mem, realtype saved_t); - /* Error Test */ static int cvDoErrorTest(CVodeMem cv_mem, int *nflagPtr, realtype saved_t, @@ -558,7 +441,7 @@ static int cvQuadSensRhs1InternalDQ(CVodeMem cv_mem, int is, realtype t, /* * ================================================================= - * EXPORTED FUNCTIONS IMPLEMENTATION + * Exported Functions Implementation * ================================================================= */ @@ -579,7 +462,7 @@ static int cvQuadSensRhs1InternalDQ(CVodeMem cv_mem, int is, realtype t, * message to standard err and returns NULL. */ -void *CVodeCreate(int lmm) +void *CVodeCreate(int lmm, SUNContext sunctx) { int maxord; CVodeMem cv_mem; @@ -591,6 +474,11 @@ void *CVodeCreate(int lmm) return(NULL); } + if (sunctx == NULL) { + cvProcessError(NULL, 0, "CVODES", "CVodeCreate", MSGCV_NULL_SUNCTX); + return(NULL); + } + cv_mem = NULL; cv_mem = (CVodeMem) malloc(sizeof(struct CVodeMemRec)); if (cv_mem == NULL) { @@ -603,40 +491,56 @@ void *CVodeCreate(int lmm) maxord = (lmm == CV_ADAMS) ? ADAMS_Q_MAX : BDF_Q_MAX; - /* copy input parameter into cv_mem */ - + /* Copy input parameters into cv_mem */ + cv_mem->cv_sunctx = sunctx; cv_mem->cv_lmm = lmm; /* Set uround */ - cv_mem->cv_uround = UNIT_ROUNDOFF; /* Set default values for integrator optional inputs */ - - cv_mem->cv_f = NULL; - cv_mem->cv_user_data = NULL; - cv_mem->cv_itol = CV_NN; - cv_mem->cv_atolmin0 = SUNTRUE; - cv_mem->cv_user_efun = SUNFALSE; - cv_mem->cv_efun = NULL; - cv_mem->cv_e_data = NULL; - cv_mem->cv_ehfun = cvErrHandler; - cv_mem->cv_eh_data = cv_mem; - cv_mem->cv_errfp = stderr; - cv_mem->cv_qmax = maxord; - cv_mem->cv_mxstep = MXSTEP_DEFAULT; - cv_mem->cv_mxhnil = MXHNIL_DEFAULT; - cv_mem->cv_sldeton = SUNFALSE; - cv_mem->cv_hin = ZERO; - cv_mem->cv_hmin = HMIN_DEFAULT; - cv_mem->cv_hmax_inv = HMAX_INV_DEFAULT; - cv_mem->cv_tstopset = SUNFALSE; - cv_mem->cv_maxnef = MXNEF; - cv_mem->cv_maxncf = MXNCF; - cv_mem->cv_nlscoef = CORTES; - cv_mem->convfail = CV_NO_FAILURES; - cv_mem->cv_constraints = NULL; - cv_mem->cv_constraintsSet = SUNFALSE; + cv_mem->cv_f = NULL; + cv_mem->cv_user_data = NULL; + cv_mem->cv_itol = CV_NN; + cv_mem->cv_atolmin0 = SUNTRUE; + cv_mem->cv_user_efun = SUNFALSE; + cv_mem->cv_efun = NULL; + cv_mem->cv_e_data = NULL; + cv_mem->cv_ehfun = cvErrHandler; + cv_mem->cv_eh_data = cv_mem; + cv_mem->cv_monitorfun = NULL; + cv_mem->cv_monitor_interval = 0; + cv_mem->cv_errfp = stderr; +#if SUNDIALS_LOGGING_LEVEL > 0 + cv_mem->cv_errfp = (CV_LOGGER->error_fp) ? CV_LOGGER->error_fp : stderr; +#endif + cv_mem->cv_qmax = maxord; + cv_mem->cv_mxstep = MXSTEP_DEFAULT; + cv_mem->cv_mxhnil = MXHNIL_DEFAULT; + cv_mem->cv_sldeton = SUNFALSE; + cv_mem->cv_hin = ZERO; + cv_mem->cv_hmin = HMIN_DEFAULT; + cv_mem->cv_hmax_inv = HMAX_INV_DEFAULT; + cv_mem->cv_eta_min_fx = ETA_MIN_FX_DEFAULT; + cv_mem->cv_eta_max_fx = ETA_MAX_FX_DEFAULT; + cv_mem->cv_eta_max_fs = ETA_MAX_FS_DEFAULT; + cv_mem->cv_eta_max_es = ETA_MAX_ES_DEFAULT; + cv_mem->cv_eta_max_gs = ETA_MAX_GS_DEFAULT; + cv_mem->cv_eta_min = ETA_MIN_DEFAULT; + cv_mem->cv_eta_min_ef = ETA_MIN_EF_DEFAULT; + cv_mem->cv_eta_max_ef = ETA_MAX_EF_DEFAULT; + cv_mem->cv_eta_cf = ETA_CF_DEFAULT; + cv_mem->cv_small_nst = SMALL_NST_DEFAULT; + cv_mem->cv_small_nef = SMALL_NEF_DEFAULT; + cv_mem->cv_tstopset = SUNFALSE; + cv_mem->cv_maxnef = MXNEF; + cv_mem->cv_maxncf = MXNCF; + cv_mem->cv_nlscoef = CORTES; + cv_mem->cv_msbp = MSBP_DEFAULT; + cv_mem->cv_dgmax_lsetup = DGMAX_LSETUP_DEFAULT; + cv_mem->convfail = CV_NO_FAILURES; + cv_mem->cv_constraints = NULL; + cv_mem->cv_constraintsSet = SUNFALSE; /* Initialize root finding variables */ @@ -650,6 +554,11 @@ void *CVodeCreate(int lmm) cv_mem->cv_gactive = NULL; cv_mem->cv_mxgnull = 1; + /* Initialize projection variables */ + cv_mem->proj_mem = NULL; + cv_mem->proj_enabled = SUNFALSE; + cv_mem->proj_applied = SUNFALSE; + /* Set default values for quad. optional inputs */ cv_mem->cv_quadr = SUNFALSE; @@ -675,6 +584,7 @@ void *CVodeCreate(int lmm) cv_mem->cv_ncfS1 = NULL; cv_mem->cv_ncfnS1 = NULL; cv_mem->cv_nniS1 = NULL; + cv_mem->cv_nnfS1 = NULL; cv_mem->cv_itolS = CV_NN; cv_mem->cv_atolSmin0 = NULL; @@ -693,7 +603,7 @@ void *CVodeCreate(int lmm) cv_mem->cv_adj = SUNFALSE; cv_mem->cv_adj_mem = NULL; - /* Set the saved values for qmax_alloc */ + /* Set the saved value for qmax_alloc */ cv_mem->cv_qmax_alloc = maxord; cv_mem->cv_qmax_allocQ = maxord; @@ -774,23 +684,24 @@ int CVodeInit(void *cvode_mem, CVRhsFn f, realtype t0, N_Vector y0) /* Check cvode_mem */ if (cvode_mem==NULL) { - cvProcessError(NULL, CV_MEM_NULL, "CVODES", "CVodeInit", - MSGCV_NO_MEM); + cvProcessError(NULL, CV_MEM_NULL, "CVODES", "CVodeInit", MSGCV_NO_MEM); return(CV_MEM_NULL); } cv_mem = (CVodeMem) cvode_mem; + SUNDIALS_MARK_FUNCTION_BEGIN(CV_PROFILER); + /* Check for legal input parameters */ if (y0==NULL) { - cvProcessError(cv_mem, CV_ILL_INPUT, "CVODES", "CVodeInit", - MSGCV_NULL_Y0); + cvProcessError(cv_mem, CV_ILL_INPUT, "CVODES", "CVodeInit", MSGCV_NULL_Y0); + SUNDIALS_MARK_FUNCTION_END(CV_PROFILER); return(CV_ILL_INPUT); } if (f == NULL) { - cvProcessError(cv_mem, CV_ILL_INPUT, "CVODES", "CVodeInit", - MSGCV_NULL_F); + cvProcessError(cv_mem, CV_ILL_INPUT, "CVODES", "CVodeInit", MSGCV_NULL_F); + SUNDIALS_MARK_FUNCTION_END(CV_PROFILER); return(CV_ILL_INPUT); } @@ -800,6 +711,7 @@ int CVodeInit(void *cvode_mem, CVRhsFn f, realtype t0, N_Vector y0) if(!nvectorOK) { cvProcessError(cv_mem, CV_ILL_INPUT, "CVODES", "CVodeInit", MSGCV_BAD_NVECTOR); + SUNDIALS_MARK_FUNCTION_END(CV_PROFILER); return(CV_ILL_INPUT); } @@ -818,8 +730,8 @@ int CVodeInit(void *cvode_mem, CVRhsFn f, realtype t0, N_Vector y0) allocOK = cvAllocVectors(cv_mem, y0); if (!allocOK) { - cvProcessError(cv_mem, CV_MEM_FAIL, "CVODES", "CVodeInit", - MSGCV_MEM_FAIL); + cvProcessError(cv_mem, CV_MEM_FAIL, "CVODES", "CVodeInit", MSGCV_MEM_FAIL); + SUNDIALS_MARK_FUNCTION_END(CV_PROFILER); return(CV_MEM_FAIL); } @@ -839,16 +751,27 @@ int CVodeInit(void *cvode_mem, CVRhsFn f, realtype t0, N_Vector y0) cvFreeVectors(cv_mem); cvProcessError(cv_mem, CV_MEM_FAIL, "CVODES", "CVodeInit", MSGCV_MEM_FAIL); + SUNDIALS_MARK_FUNCTION_END(CV_PROFILER); return(CV_MEM_FAIL); } + /* Input checks complete at this point and history array allocated */ + + /* Copy the input parameters into CVODE state */ + cv_mem->cv_f = f; + cv_mem->cv_tn = t0; + + /* Initialize zn[0] in the history array */ + N_VScale(ONE, y0, cv_mem->cv_zn[0]); + /* create a Newton nonlinear solver object by default */ - NLS = SUNNonlinSol_Newton(y0); + NLS = SUNNonlinSol_Newton(y0, cv_mem->cv_sunctx); /* check that nonlinear solver is non-NULL */ if (NLS == NULL) { cvProcessError(cv_mem, CV_MEM_FAIL, "CVODES", "CVodeInit", MSGCV_MEM_FAIL); cvFreeVectors(cv_mem); + SUNDIALS_MARK_FUNCTION_END(CV_PROFILER); return(CV_MEM_FAIL); } @@ -861,6 +784,7 @@ int CVodeInit(void *cvode_mem, CVRhsFn f, realtype t0, N_Vector y0) "Setting the nonlinear solver failed"); cvFreeVectors(cv_mem); SUNNonlinSolFree(NLS); + SUNDIALS_MARK_FUNCTION_END(CV_PROFILER); return(CV_MEM_FAIL); } @@ -869,21 +793,16 @@ int CVodeInit(void *cvode_mem, CVRhsFn f, realtype t0, N_Vector y0) /* All error checking is complete at this point */ - /* Copy the input parameters into CVODES state */ - - cv_mem->cv_f = f; - cv_mem->cv_tn = t0; - /* Set step parameters */ cv_mem->cv_q = 1; cv_mem->cv_L = 2; cv_mem->cv_qwait = cv_mem->cv_L; - cv_mem->cv_etamax = ETAMX1; + cv_mem->cv_etamax = cv_mem->cv_eta_max_fs; - cv_mem->cv_qu = 0; - cv_mem->cv_hu = ZERO; - cv_mem->cv_tolsf = ONE; + cv_mem->cv_qu = 0; + cv_mem->cv_hu = ZERO; + cv_mem->cv_tolsf = ONE; /* Set the linear solver addresses to NULL. (We check != NULL later, in CVode) */ @@ -898,10 +817,6 @@ int CVodeInit(void *cvode_mem, CVRhsFn f, realtype t0, N_Vector y0) cv_mem->cv_forceSetup = SUNFALSE; - /* Initialize zn[0] in the history array */ - - N_VScale(ONE, y0, cv_mem->cv_zn[0]); - /* Initialize all the counters */ cv_mem->cv_nst = 0; @@ -909,6 +824,7 @@ int CVodeInit(void *cvode_mem, CVRhsFn f, realtype t0, N_Vector y0) cv_mem->cv_ncfn = 0; cv_mem->cv_netf = 0; cv_mem->cv_nni = 0; + cv_mem->cv_nnf = 0; cv_mem->cv_nsetups = 0; cv_mem->cv_nhnil = 0; cv_mem->cv_nstlp = 0; @@ -937,6 +853,7 @@ int CVodeInit(void *cvode_mem, CVRhsFn f, realtype t0, N_Vector y0) cv_mem->cv_MallocDone = SUNTRUE; + SUNDIALS_MARK_FUNCTION_END(CV_PROFILER); return(CV_SUCCESS); } @@ -962,17 +879,19 @@ int CVodeReInit(void *cvode_mem, realtype t0, N_Vector y0) /* Check cvode_mem */ if (cvode_mem==NULL) { - cvProcessError(NULL, CV_MEM_NULL, "CVODES", "CVodeReInit", - MSGCV_NO_MEM); + cvProcessError(NULL, CV_MEM_NULL, "CVODES", "CVodeReInit", MSGCV_NO_MEM); return(CV_MEM_NULL); } cv_mem = (CVodeMem) cvode_mem; + SUNDIALS_MARK_FUNCTION_BEGIN(CV_PROFILER); + /* Check if cvode_mem was allocated */ if (cv_mem->cv_MallocDone == SUNFALSE) { cvProcessError(cv_mem, CV_NO_MALLOC, "CVODES", "CVodeReInit", MSGCV_NO_MALLOC); + SUNDIALS_MARK_FUNCTION_END(CV_PROFILER); return(CV_NO_MALLOC); } @@ -981,6 +900,7 @@ int CVodeReInit(void *cvode_mem, realtype t0, N_Vector y0) if (y0 == NULL) { cvProcessError(cv_mem, CV_ILL_INPUT, "CVODES", "CVodeReInit", MSGCV_NULL_Y0); + SUNDIALS_MARK_FUNCTION_END(CV_PROFILER); return(CV_ILL_INPUT); } @@ -993,11 +913,11 @@ int CVodeReInit(void *cvode_mem, realtype t0, N_Vector y0) cv_mem->cv_q = 1; cv_mem->cv_L = 2; cv_mem->cv_qwait = cv_mem->cv_L; - cv_mem->cv_etamax = ETAMX1; + cv_mem->cv_etamax = cv_mem->cv_eta_max_fs; - cv_mem->cv_qu = 0; - cv_mem->cv_hu = ZERO; - cv_mem->cv_tolsf = ONE; + cv_mem->cv_qu = 0; + cv_mem->cv_hu = ZERO; + cv_mem->cv_tolsf = ONE; /* Set forceSetup to SUNFALSE */ @@ -1014,6 +934,7 @@ int CVodeReInit(void *cvode_mem, realtype t0, N_Vector y0) cv_mem->cv_ncfn = 0; cv_mem->cv_netf = 0; cv_mem->cv_nni = 0; + cv_mem->cv_nnf = 0; cv_mem->cv_nsetups = 0; cv_mem->cv_nhnil = 0; cv_mem->cv_nstlp = 0; @@ -1037,6 +958,7 @@ int CVodeReInit(void *cvode_mem, realtype t0, N_Vector y0) /* Problem has been successfully re-initialized */ + SUNDIALS_MARK_FUNCTION_END(CV_PROFILER); return(CV_SUCCESS); } @@ -1063,29 +985,29 @@ int CVodeSStolerances(void *cvode_mem, realtype reltol, realtype abstol) CVodeMem cv_mem; if (cvode_mem==NULL) { - cvProcessError(NULL, CV_MEM_NULL, "CVODES", - "CVodeSStolerances", MSGCV_NO_MEM); + cvProcessError(NULL, CV_MEM_NULL, "CVODES", "CVodeSStolerances", + MSGCV_NO_MEM); return(CV_MEM_NULL); } cv_mem = (CVodeMem) cvode_mem; if (cv_mem->cv_MallocDone == SUNFALSE) { - cvProcessError(cv_mem, CV_NO_MALLOC, "CVODES", - "CVodeSStolerances", MSGCV_NO_MALLOC); + cvProcessError(cv_mem, CV_NO_MALLOC, "CVODES", "CVodeSStolerances", + MSGCV_NO_MALLOC); return(CV_NO_MALLOC); } /* Check inputs */ if (reltol < ZERO) { - cvProcessError(cv_mem, CV_ILL_INPUT, "CVODES", - "CVodeSStolerances", MSGCV_BAD_RELTOL); + cvProcessError(cv_mem, CV_ILL_INPUT, "CVODES", "CVodeSStolerances", + MSGCV_BAD_RELTOL); return(CV_ILL_INPUT); } if (abstol < ZERO) { - cvProcessError(cv_mem, CV_ILL_INPUT, "CVODES", - "CVodeSStolerances", MSGCV_BAD_ABSTOL); + cvProcessError(cv_mem, CV_ILL_INPUT, "CVODES", "CVodeSStolerances", + MSGCV_BAD_ABSTOL); return(CV_ILL_INPUT); } @@ -1111,23 +1033,23 @@ int CVodeSVtolerances(void *cvode_mem, realtype reltol, N_Vector abstol) realtype atolmin; if (cvode_mem==NULL) { - cvProcessError(NULL, CV_MEM_NULL, "CVODES", - "CVodeSVtolerances", MSGCV_NO_MEM); + cvProcessError(NULL, CV_MEM_NULL, "CVODES", "CVodeSVtolerances", + MSGCV_NO_MEM); return(CV_MEM_NULL); } cv_mem = (CVodeMem) cvode_mem; if (cv_mem->cv_MallocDone == SUNFALSE) { - cvProcessError(cv_mem, CV_NO_MALLOC, "CVODES", - "CVodeSVtolerances", MSGCV_NO_MALLOC); + cvProcessError(cv_mem, CV_NO_MALLOC, "CVODES", "CVodeSVtolerances", + MSGCV_NO_MALLOC); return(CV_NO_MALLOC); } /* Check inputs */ if (reltol < ZERO) { - cvProcessError(cv_mem, CV_ILL_INPUT, "CVODES", - "CVodeSVtolerances", MSGCV_BAD_RELTOL); + cvProcessError(cv_mem, CV_ILL_INPUT, "CVODES", "CVodeSVtolerances", + MSGCV_BAD_RELTOL); return(CV_ILL_INPUT); } @@ -1138,8 +1060,8 @@ int CVodeSVtolerances(void *cvode_mem, realtype reltol, N_Vector abstol) } atolmin = N_VMin(abstol); if (atolmin < ZERO) { - cvProcessError(cv_mem, CV_ILL_INPUT, "CVODES", - "CVodeSVtolerances", MSGCV_BAD_ABSTOL); + cvProcessError(cv_mem, CV_ILL_INPUT, "CVODES", "CVodeSVtolerances", + MSGCV_BAD_ABSTOL); return(CV_ILL_INPUT); } @@ -1171,15 +1093,15 @@ int CVodeWFtolerances(void *cvode_mem, CVEwtFn efun) CVodeMem cv_mem; if (cvode_mem==NULL) { - cvProcessError(NULL, CV_MEM_NULL, "CVODES", - "CVodeWFtolerances", MSGCV_NO_MEM); + cvProcessError(NULL, CV_MEM_NULL, "CVODES", "CVodeWFtolerances", + MSGCV_NO_MEM); return(CV_MEM_NULL); } cv_mem = (CVodeMem) cvode_mem; if (cv_mem->cv_MallocDone == SUNFALSE) { - cvProcessError(cv_mem, CV_NO_MALLOC, "CVODES", - "CVodeWFtolerances", MSGCV_NO_MALLOC); + cvProcessError(cv_mem, CV_NO_MALLOC, "CVODES", "CVodeWFtolerances", + MSGCV_NO_MALLOC); return(CV_NO_MALLOC); } @@ -1568,6 +1490,7 @@ int CVodeSensInit(void *cvode_mem, int Ns, int ism, CVSensRhsFn fS, N_Vector *yS cv_mem->cv_ncfnS = 0; cv_mem->cv_netfS = 0; cv_mem->cv_nniS = 0; + cv_mem->cv_nnfS = 0; cv_mem->cv_nsetupsS = 0; /* Set default values for plist and pbar */ @@ -1584,9 +1507,9 @@ int CVodeSensInit(void *cvode_mem, int Ns, int ism, CVSensRhsFn fS, N_Vector *yS /* create a Newton nonlinear solver object by default */ if (ism == CV_SIMULTANEOUS) - NLS = SUNNonlinSol_NewtonSens(Ns+1, cv_mem->cv_acor); + NLS = SUNNonlinSol_NewtonSens(Ns+1, cv_mem->cv_acor, cv_mem->cv_sunctx); else - NLS = SUNNonlinSol_NewtonSens(Ns, cv_mem->cv_acor); + NLS = SUNNonlinSol_NewtonSens(Ns, cv_mem->cv_acor, cv_mem->cv_sunctx); /* check that the nonlinear solver is non-NULL */ if (NLS == NULL) { @@ -1711,9 +1634,12 @@ int CVodeSensInit1(void *cvode_mem, int Ns, int ism, CVSensRhs1Fn fS1, N_Vector cv_mem->cv_ncfnS1 = (long int*)malloc(Ns*sizeof(long int)); cv_mem->cv_nniS1 = NULL; cv_mem->cv_nniS1 = (long int*)malloc(Ns*sizeof(long int)); - if ( (cv_mem->cv_ncfS1 == NULL) || + cv_mem->cv_nnfS1 = NULL; + cv_mem->cv_nnfS1 = (long int*)malloc(Ns*sizeof(long int)); + if ( (cv_mem->cv_ncfS1 == NULL) || (cv_mem->cv_ncfnS1 == NULL) || - (cv_mem->cv_nniS1 == NULL) ) { + (cv_mem->cv_nniS1 == NULL) || + (cv_mem->cv_nnfS1 == NULL) ) { cvProcessError(cv_mem, CV_MEM_FAIL, "CVODES", "CVodeSensInit1", MSGCV_MEM_FAIL); return(CV_MEM_FAIL); @@ -1727,9 +1653,10 @@ int CVodeSensInit1(void *cvode_mem, int Ns, int ism, CVSensRhs1Fn fS1, N_Vector allocOK = cvSensAllocVectors(cv_mem, yS0[0]); if (!allocOK) { if (cv_mem->cv_stgr1alloc) { - free(cv_mem->cv_ncfS1); cv_mem->cv_ncfS1 = NULL; + free(cv_mem->cv_ncfS1); cv_mem->cv_ncfS1 = NULL; free(cv_mem->cv_ncfnS1); cv_mem->cv_ncfnS1 = NULL; - free(cv_mem->cv_nniS1); cv_mem->cv_nniS1 = NULL; + free(cv_mem->cv_nniS1); cv_mem->cv_nniS1 = NULL; + free(cv_mem->cv_nnfS1); cv_mem->cv_nnfS1 = NULL; } cvProcessError(cv_mem, CV_MEM_FAIL, "CVODES", "CVodeSensInit1", MSGCV_MEM_FAIL); @@ -1750,9 +1677,10 @@ int CVodeSensInit1(void *cvode_mem, int Ns, int ism, CVSensRhs1Fn fS1, N_Vector (cv_mem->cv_Xvecs == NULL) || (cv_mem->cv_Zvecs == NULL)) { if (cv_mem->cv_stgr1alloc) { - free(cv_mem->cv_ncfS1); cv_mem->cv_ncfS1 = NULL; + free(cv_mem->cv_ncfS1); cv_mem->cv_ncfS1 = NULL; free(cv_mem->cv_ncfnS1); cv_mem->cv_ncfnS1 = NULL; - free(cv_mem->cv_nniS1); cv_mem->cv_nniS1 = NULL; + free(cv_mem->cv_nniS1); cv_mem->cv_nniS1 = NULL; + free(cv_mem->cv_nnfS1); cv_mem->cv_nnfS1 = NULL; } cvSensFreeVectors(cv_mem); cvProcessError(cv_mem, CV_MEM_FAIL, "CVODES", "CVodeSensInit1", @@ -1780,11 +1708,13 @@ int CVodeSensInit1(void *cvode_mem, int Ns, int ism, CVSensRhs1Fn fS1, N_Vector cv_mem->cv_ncfnS = 0; cv_mem->cv_netfS = 0; cv_mem->cv_nniS = 0; + cv_mem->cv_nnfS = 0; cv_mem->cv_nsetupsS = 0; if (ism==CV_STAGGERED1) for (is=0; iscv_ncfnS1[is] = 0; - cv_mem->cv_nniS1[is] = 0; + cv_mem->cv_nniS1[is] = 0; + cv_mem->cv_nnfS1[is] = 0; } /* Set default values for plist and pbar */ @@ -1801,11 +1731,11 @@ int CVodeSensInit1(void *cvode_mem, int Ns, int ism, CVSensRhs1Fn fS1, N_Vector /* create a Newton nonlinear solver object by default */ if (ism == CV_SIMULTANEOUS) - NLS = SUNNonlinSol_NewtonSens(Ns+1, cv_mem->cv_acor); + NLS = SUNNonlinSol_NewtonSens(Ns+1, cv_mem->cv_acor, cv_mem->cv_sunctx); else if (ism == CV_STAGGERED) - NLS = SUNNonlinSol_NewtonSens(Ns, cv_mem->cv_acor); + NLS = SUNNonlinSol_NewtonSens(Ns, cv_mem->cv_acor, cv_mem->cv_sunctx); else - NLS = SUNNonlinSol_Newton(cv_mem->cv_acor); + NLS = SUNNonlinSol_Newton(cv_mem->cv_acor, cv_mem->cv_sunctx); /* check that the nonlinear solver is non-NULL */ if (NLS == NULL) { @@ -1919,9 +1849,12 @@ int CVodeSensReInit(void *cvode_mem, int ism, N_Vector *yS0) cv_mem->cv_ncfnS1 = (long int*)malloc(cv_mem->cv_Ns*sizeof(long int)); cv_mem->cv_nniS1 = NULL; cv_mem->cv_nniS1 = (long int*)malloc(cv_mem->cv_Ns*sizeof(long int)); - if ( (cv_mem->cv_ncfS1==NULL) || + cv_mem->cv_nnfS1 = NULL; + cv_mem->cv_nnfS1 = (long int*)malloc(cv_mem->cv_Ns*sizeof(long int)); + if ( (cv_mem->cv_ncfS1==NULL) || (cv_mem->cv_ncfnS1==NULL) || - (cv_mem->cv_nniS1==NULL) ) { + (cv_mem->cv_nniS1==NULL) || + (cv_mem->cv_nnfS1==NULL) ) { cvProcessError(cv_mem, CV_MEM_FAIL, "CVODES", "CVodeSensReInit", MSGCV_MEM_FAIL); return(CV_MEM_FAIL); @@ -1948,11 +1881,13 @@ int CVodeSensReInit(void *cvode_mem, int ism, N_Vector *yS0) cv_mem->cv_ncfnS = 0; cv_mem->cv_netfS = 0; cv_mem->cv_nniS = 0; + cv_mem->cv_nnfS = 0; cv_mem->cv_nsetupsS = 0; if (ism==CV_STAGGERED1) for (is=0; iscv_Ns; is++) { cv_mem->cv_ncfnS1[is] = 0; - cv_mem->cv_nniS1[is] = 0; + cv_mem->cv_nniS1[is] = 0; + cv_mem->cv_nnfS1[is] = 0; } /* Problem has been successfully re-initialized */ @@ -1966,11 +1901,13 @@ int CVodeSensReInit(void *cvode_mem, int ism, N_Vector *yS0) /* create a Newton nonlinear solver object by default */ if (ism == CV_SIMULTANEOUS) - NLS = SUNNonlinSol_NewtonSens(cv_mem->cv_Ns+1, cv_mem->cv_acor); + NLS = SUNNonlinSol_NewtonSens(cv_mem->cv_Ns+1, cv_mem->cv_acor, + cv_mem->cv_sunctx); else if (ism == CV_STAGGERED) - NLS = SUNNonlinSol_NewtonSens(cv_mem->cv_Ns, cv_mem->cv_acor); + NLS = SUNNonlinSol_NewtonSens(cv_mem->cv_Ns, cv_mem->cv_acor, + cv_mem->cv_sunctx); else - NLS = SUNNonlinSol_Newton(cv_mem->cv_acor); + NLS = SUNNonlinSol_Newton(cv_mem->cv_acor, cv_mem->cv_sunctx); /* check that the nonlinear solver is non-NULL */ if (NLS == NULL) { @@ -2613,10 +2550,9 @@ int CVodeRootInit(void *cvode_mem, int nrtfn, CVRootFn g) CVodeMem cv_mem; int i, nrt; - /* Check cvode_mem */ - if (cvode_mem==NULL) { - cvProcessError(NULL, CV_MEM_NULL, "CVODES", "CVodeRootInit", - MSGCV_NO_MEM); + /* Check cvode_mem pointer */ + if (cvode_mem == NULL) { + cvProcessError(NULL, CV_MEM_NULL, "CVODES", "CVodeRootInit", MSGCV_NO_MEM); return(CV_MEM_NULL); } cv_mem = (CVodeMem) cvode_mem; @@ -2636,7 +2572,6 @@ int CVodeRootInit(void *cvode_mem, int nrtfn, CVRootFn g) cv_mem->cv_lrw -= 3 * (cv_mem->cv_nrtfn); cv_mem->cv_liw -= 3 * (cv_mem->cv_nrtfn); - } /* If CVodeRootInit() was called with nrtfn == 0, then set cv_nrtfn to @@ -2655,10 +2590,10 @@ int CVodeRootInit(void *cvode_mem, int nrtfn, CVRootFn g) if (nrt == cv_mem->cv_nrtfn) { if (g != cv_mem->cv_gfun) { if (g == NULL) { - free(cv_mem->cv_glo); cv_mem->cv_glo = NULL; - free(cv_mem->cv_ghi); cv_mem->cv_ghi = NULL; - free(cv_mem->cv_grout); cv_mem->cv_grout = NULL; - free(cv_mem->cv_iroots); cv_mem->cv_iroots = NULL; + free(cv_mem->cv_glo); cv_mem->cv_glo = NULL; + free(cv_mem->cv_ghi); cv_mem->cv_ghi = NULL; + free(cv_mem->cv_grout); cv_mem->cv_grout = NULL; + free(cv_mem->cv_iroots); cv_mem->cv_iroots = NULL; free(cv_mem->cv_rootdir); cv_mem->cv_rootdir = NULL; free(cv_mem->cv_gactive); cv_mem->cv_gactive = NULL; @@ -2670,7 +2605,7 @@ int CVodeRootInit(void *cvode_mem, int nrtfn, CVRootFn g) return(CV_ILL_INPUT); } else { - cv_mem->cv_gfun = g; + cv_mem->cv_gfun = g; return(CV_SUCCESS); } } @@ -2737,7 +2672,6 @@ int CVodeRootInit(void *cvode_mem, int nrtfn, CVRootFn g) return(CV_MEM_FAIL); } - cv_mem->cv_gactive = NULL; cv_mem->cv_gactive = (booleantype *) malloc(nrt*sizeof(booleantype)); if (cv_mem->cv_gactive == NULL) { @@ -2751,7 +2685,6 @@ int CVodeRootInit(void *cvode_mem, int nrtfn, CVRootFn g) return(CV_MEM_FAIL); } - /* Set default values for rootdir (both directions) */ for(i=0; icv_rootdir[i] = 0; @@ -2804,37 +2737,38 @@ int CVode(void *cvode_mem, realtype tout, N_Vector yout, /* Check if cvode_mem exists */ if (cvode_mem == NULL) { - cvProcessError(NULL, CV_MEM_NULL, "CVODES", "CVode", - MSGCV_NO_MEM); + cvProcessError(NULL, CV_MEM_NULL, "CVODES", "CVode", MSGCV_NO_MEM); return(CV_MEM_NULL); } cv_mem = (CVodeMem) cvode_mem; + SUNDIALS_MARK_FUNCTION_BEGIN(CV_PROFILER); + /* Check if cvode_mem was allocated */ if (cv_mem->cv_MallocDone == SUNFALSE) { - cvProcessError(cv_mem, CV_NO_MALLOC, "CVODES", "CVode", - MSGCV_NO_MALLOC); + cvProcessError(cv_mem, CV_NO_MALLOC, "CVODES", "CVode", MSGCV_NO_MALLOC); + SUNDIALS_MARK_FUNCTION_END(CV_PROFILER); return(CV_NO_MALLOC); } /* Check for yout != NULL */ if ((cv_mem->cv_y = yout) == NULL) { - cvProcessError(cv_mem, CV_ILL_INPUT, "CVODES", "CVode", - MSGCV_YOUT_NULL); + cvProcessError(cv_mem, CV_ILL_INPUT, "CVODES", "CVode", MSGCV_YOUT_NULL); + SUNDIALS_MARK_FUNCTION_END(CV_PROFILER); return(CV_ILL_INPUT); } /* Check for tret != NULL */ if (tret == NULL) { - cvProcessError(cv_mem, CV_ILL_INPUT, "CVODES", "CVode", - MSGCV_TRET_NULL); + cvProcessError(cv_mem, CV_ILL_INPUT, "CVODES", "CVode", MSGCV_TRET_NULL); + SUNDIALS_MARK_FUNCTION_END(CV_PROFILER); return(CV_ILL_INPUT); } /* Check for valid itask */ if ( (itask != CV_NORMAL) && (itask != CV_ONE_STEP) ) { - cvProcessError(cv_mem, CV_ILL_INPUT, "CVODES", "CVode", - MSGCV_BAD_ITASK); + cvProcessError(cv_mem, CV_ILL_INPUT, "CVODES", "CVode", MSGCV_BAD_ITASK); + SUNDIALS_MARK_FUNCTION_END(CV_PROFILER); return(CV_ILL_INPUT); } @@ -2860,7 +2794,10 @@ int CVode(void *cvode_mem, realtype tout, N_Vector yout, /* Check inputs for corectness */ ier = cvInitialSetup(cv_mem); - if (ier!= CV_SUCCESS) return(ier); + if (ier != CV_SUCCESS) { + SUNDIALS_MARK_FUNCTION_END(CV_PROFILER); + return(ier); + } /* * Call f at (t0,y0), set zn[1] = y'(t0). @@ -2875,11 +2812,13 @@ int CVode(void *cvode_mem, realtype tout, N_Vector yout, if (retval < 0) { cvProcessError(cv_mem, CV_RHSFUNC_FAIL, "CVODES", "CVode", MSGCV_RHSFUNC_FAILED, cv_mem->cv_tn); + SUNDIALS_MARK_FUNCTION_END(CV_PROFILER); return(CV_RHSFUNC_FAIL); } if (retval > 0) { cvProcessError(cv_mem, CV_FIRST_RHSFUNC_ERR, "CVODES", "CVode", MSGCV_RHSFUNC_FIRST); + SUNDIALS_MARK_FUNCTION_END(CV_PROFILER); return(CV_FIRST_RHSFUNC_ERR); } @@ -2890,11 +2829,13 @@ int CVode(void *cvode_mem, realtype tout, N_Vector yout, if (retval < 0) { cvProcessError(cv_mem, CV_QRHSFUNC_FAIL, "CVODES", "CVode", MSGCV_QRHSFUNC_FAILED, cv_mem->cv_tn); + SUNDIALS_MARK_FUNCTION_END(CV_PROFILER); return(CV_QRHSFUNC_FAIL); } if (retval > 0) { cvProcessError(cv_mem, CV_FIRST_QRHSFUNC_ERR, "CVODES", "CVode", MSGCV_QRHSFUNC_FIRST); + SUNDIALS_MARK_FUNCTION_END(CV_PROFILER); return(CV_FIRST_QRHSFUNC_ERR); } } @@ -2907,11 +2848,13 @@ int CVode(void *cvode_mem, realtype tout, N_Vector yout, if (retval < 0) { cvProcessError(cv_mem, CV_SRHSFUNC_FAIL, "CVODES", "CVode", MSGCV_SRHSFUNC_FAILED, cv_mem->cv_tn); + SUNDIALS_MARK_FUNCTION_END(CV_PROFILER); return(CV_SRHSFUNC_FAIL); } if (retval > 0) { cvProcessError(cv_mem, CV_FIRST_SRHSFUNC_ERR, "CVODES", "CVode", MSGCV_SRHSFUNC_FIRST); + SUNDIALS_MARK_FUNCTION_END(CV_PROFILER); return(CV_FIRST_SRHSFUNC_ERR); } } @@ -2925,11 +2868,13 @@ int CVode(void *cvode_mem, realtype tout, N_Vector yout, if (retval < 0) { cvProcessError(cv_mem, CV_QSRHSFUNC_FAIL, "CVODES", "CVode", MSGCV_QSRHSFUNC_FAILED, cv_mem->cv_tn); + SUNDIALS_MARK_FUNCTION_END(CV_PROFILER); return(CV_QSRHSFUNC_FAIL); } if (retval > 0) { cvProcessError(cv_mem, CV_FIRST_QSRHSFUNC_ERR, "CVODES", "CVode", MSGCV_QSRHSFUNC_FIRST); + SUNDIALS_MARK_FUNCTION_END(CV_PROFILER); return(CV_FIRST_QSRHSFUNC_ERR); } } @@ -2940,6 +2885,7 @@ int CVode(void *cvode_mem, realtype tout, N_Vector yout, if ( (cv_mem->cv_tstop - cv_mem->cv_tn)*(tout - cv_mem->cv_tn) <= ZERO ) { cvProcessError(cv_mem, CV_ILL_INPUT, "CVODES", "CVode", MSGCV_BAD_TSTOP, cv_mem->cv_tstop, cv_mem->cv_tn); + SUNDIALS_MARK_FUNCTION_END(CV_PROFILER); return(CV_ILL_INPUT); } } @@ -2949,6 +2895,7 @@ int CVode(void *cvode_mem, realtype tout, N_Vector yout, cv_mem->cv_h = cv_mem->cv_hin; if ( (cv_mem->cv_h != ZERO) && ((tout-cv_mem->cv_tn)*cv_mem->cv_h < ZERO) ) { cvProcessError(cv_mem, CV_ILL_INPUT, "CVODES", "CVode", MSGCV_BAD_H0); + SUNDIALS_MARK_FUNCTION_END(CV_PROFILER); return(CV_ILL_INPUT); } if (cv_mem->cv_h == ZERO) { @@ -2959,9 +2906,13 @@ int CVode(void *cvode_mem, realtype tout, N_Vector yout, hflag = cvHin(cv_mem, tout_hin); if (hflag != CV_SUCCESS) { istate = cvHandleFailure(cv_mem, hflag); + SUNDIALS_MARK_FUNCTION_END(CV_PROFILER); return(istate); } } + + /* Enforce hmax and hmin */ + rh = SUNRabs(cv_mem->cv_h)*cv_mem->cv_hmax_inv; if (rh > ONE) cv_mem->cv_h /= rh; if (SUNRabs(cv_mem->cv_h) < cv_mem->cv_hmin) @@ -2996,7 +2947,10 @@ int CVode(void *cvode_mem, realtype tout, N_Vector yout, retval = N_VScaleVectorArray(cv_mem->cv_Ns, cv_mem->cv_cvals, cv_mem->cv_znS[1], cv_mem->cv_znS[1]); - if (retval != CV_SUCCESS) return (CV_VECTOROP_ERR); + if (retval != CV_SUCCESS) { + SUNDIALS_MARK_FUNCTION_END(CV_PROFILER); + return(CV_VECTOROP_ERR); + } } if (cv_mem->cv_quadr_sensi) { @@ -3005,7 +2959,10 @@ int CVode(void *cvode_mem, realtype tout, N_Vector yout, retval = N_VScaleVectorArray(cv_mem->cv_Ns, cv_mem->cv_cvals, cv_mem->cv_znQS[1], cv_mem->cv_znQS[1]); - if (retval != CV_SUCCESS) return (CV_VECTOROP_ERR); + if (retval != CV_SUCCESS) { + SUNDIALS_MARK_FUNCTION_END(CV_PROFILER); + return(CV_VECTOROP_ERR); + } } /* Check for zeros of root function g at and near t0. */ @@ -3017,12 +2974,13 @@ int CVode(void *cvode_mem, realtype tout, N_Vector yout, if (retval == CV_RTFUNC_FAIL) { cvProcessError(cv_mem, CV_RTFUNC_FAIL, "CVODES", "cvRcheck1", MSGCV_RTFUNC_FAILED, cv_mem->cv_tn); + SUNDIALS_MARK_FUNCTION_END(CV_PROFILER); return(CV_RTFUNC_FAIL); } } - } /* end first call block */ + } /* end of first call block */ /* * ------------------------------------------------------ @@ -3044,7 +3002,7 @@ int CVode(void *cvode_mem, realtype tout, N_Vector yout, troundoff = FUZZ_FACTOR * cv_mem->cv_uround * (SUNRabs(cv_mem->cv_tn) + SUNRabs(cv_mem->cv_h)); - /* First check for a root in the last step taken, other than the + /* First, check for a root in the last step taken, other than the last root found, if any. If itask = CV_ONE_STEP and y(tn) was not returned because of an intervening root, return y(tn) now. */ if (cv_mem->cv_nrtfn > 0) { @@ -3056,13 +3014,16 @@ int CVode(void *cvode_mem, realtype tout, N_Vector yout, if (retval == CLOSERT) { cvProcessError(cv_mem, CV_ILL_INPUT, "CVODES", "cvRcheck2", MSGCV_CLOSE_ROOTS, cv_mem->cv_tlo); + SUNDIALS_MARK_FUNCTION_END(CV_PROFILER); return(CV_ILL_INPUT); } else if (retval == CV_RTFUNC_FAIL) { cvProcessError(cv_mem, CV_RTFUNC_FAIL, "CVODES", "cvRcheck2", MSGCV_RTFUNC_FAILED, cv_mem->cv_tlo); + SUNDIALS_MARK_FUNCTION_END(CV_PROFILER); return(CV_RTFUNC_FAIL); } else if (retval == RTFOUND) { cv_mem->cv_tretlast = *tret = cv_mem->cv_tlo; + SUNDIALS_MARK_FUNCTION_END(CV_PROFILER); return(CV_ROOT_RETURN); } @@ -3077,15 +3038,18 @@ int CVode(void *cvode_mem, realtype tout, N_Vector yout, if ((irfndp == 1) && (itask == CV_ONE_STEP)) { cv_mem->cv_tretlast = *tret = cv_mem->cv_tn; N_VScale(ONE, cv_mem->cv_zn[0], yout); + SUNDIALS_MARK_FUNCTION_END(CV_PROFILER); return(CV_SUCCESS); } } else if (retval == RTFOUND) { /* a new root was found */ cv_mem->cv_irfnd = 1; cv_mem->cv_tretlast = *tret = cv_mem->cv_tlo; + SUNDIALS_MARK_FUNCTION_END(CV_PROFILER); return(CV_ROOT_RETURN); } else if (retval == CV_RTFUNC_FAIL) { /* g failed */ cvProcessError(cv_mem, CV_RTFUNC_FAIL, "CVODES", "cvRcheck3", MSGCV_RTFUNC_FAILED, cv_mem->cv_tlo); + SUNDIALS_MARK_FUNCTION_END(CV_PROFILER); return(CV_RTFUNC_FAIL); } @@ -3100,8 +3064,10 @@ int CVode(void *cvode_mem, realtype tout, N_Vector yout, if (ier != CV_SUCCESS) { cvProcessError(cv_mem, CV_ILL_INPUT, "CVODES", "CVode", MSGCV_BAD_TOUT, tout); + SUNDIALS_MARK_FUNCTION_END(CV_PROFILER); return(CV_ILL_INPUT); } + SUNDIALS_MARK_FUNCTION_END(CV_PROFILER); return(CV_SUCCESS); } @@ -3110,6 +3076,7 @@ int CVode(void *cvode_mem, realtype tout, N_Vector yout, SUNRabs(cv_mem->cv_tn - cv_mem->cv_tretlast) > troundoff ) { cv_mem->cv_tretlast = *tret = cv_mem->cv_tn; N_VScale(ONE, cv_mem->cv_zn[0], yout); + SUNDIALS_MARK_FUNCTION_END(CV_PROFILER); return(CV_SUCCESS); } @@ -3121,10 +3088,12 @@ int CVode(void *cvode_mem, realtype tout, N_Vector yout, if (ier != CV_SUCCESS) { cvProcessError(cv_mem, CV_ILL_INPUT, "CVODES", "CVode", MSGCV_BAD_TSTOP, cv_mem->cv_tstop, cv_mem->cv_tn); + SUNDIALS_MARK_FUNCTION_END(CV_PROFILER); return(CV_ILL_INPUT); } cv_mem->cv_tretlast = *tret = cv_mem->cv_tstop; cv_mem->cv_tstopset = SUNFALSE; + SUNDIALS_MARK_FUNCTION_END(CV_PROFILER); return(CV_TSTOP_RETURN); } @@ -3136,7 +3105,7 @@ int CVode(void *cvode_mem, realtype tout, N_Vector yout, } - } /* end stopping tests block at nst>0 */ + } /* end stopping tests block */ /* * -------------------------------------------------- @@ -3171,6 +3140,7 @@ int CVode(void *cvode_mem, realtype tout, N_Vector yout, else cvProcessError(cv_mem, CV_ILL_INPUT, "CVODES", "CVode", MSGCV_EWT_NOW_BAD, cv_mem->cv_tn); + istate = CV_ILL_INPUT; cv_mem->cv_tretlast = *tret = cv_mem->cv_tn; N_VScale(ONE, cv_mem->cv_zn[0], yout); @@ -3328,7 +3298,7 @@ int CVode(void *cvode_mem, realtype tout, N_Vector yout, break; } - /* Check if tn is at tstop, or about to pass tstop */ + /* Check if tn is at tstop or near tstop */ if ( cv_mem->cv_tstopset ) { troundoff = FUZZ_FACTOR * cv_mem->cv_uround * @@ -3363,17 +3333,103 @@ int CVode(void *cvode_mem, realtype tout, N_Vector yout, /* Load optional output */ if (cv_mem->cv_sensi && (cv_mem->cv_ism==CV_STAGGERED1)) { cv_mem->cv_nniS = 0; + cv_mem->cv_nnfS = 0; cv_mem->cv_ncfnS = 0; for (is=0; iscv_Ns; is++) { cv_mem->cv_nniS += cv_mem->cv_nniS1[is]; + cv_mem->cv_nnfS += cv_mem->cv_nnfS1[is]; cv_mem->cv_ncfnS += cv_mem->cv_ncfnS1[is]; } } + SUNDIALS_MARK_FUNCTION_END(CV_PROFILER); return(istate); +} + +/* + * CVodeComputeState + * + * Computes y based on the current prediction and given correction. + */ +int CVodeComputeState(void *cvode_mem, N_Vector ycor, N_Vector y) +{ + CVodeMem cv_mem; + + if (cvode_mem == NULL) { + cvProcessError(NULL, CV_MEM_NULL, "CVODES", "CVodeComputeState", + MSGCV_NO_MEM); + return(CV_MEM_NULL); + } + + cv_mem = (CVodeMem) cvode_mem; + + SUNDIALS_MARK_FUNCTION_BEGIN(CV_PROFILER); + + N_VLinearSum(ONE, cv_mem->cv_zn[0], ONE, ycor, y); + + SUNDIALS_MARK_FUNCTION_END(CV_PROFILER); + return(CV_SUCCESS); +} + +/* + * CVodeComputeStateSens + * + * Computes yS based on the current prediction and given correction. + */ +int CVodeComputeStateSens(void *cvode_mem, N_Vector *ycorS, N_Vector *yS) +{ + int retval; + CVodeMem cv_mem; + + if (cvode_mem == NULL) { + cvProcessError(NULL, CV_MEM_NULL, "CVODES", "CVodeComputeStateSens", + MSGCV_NO_MEM); + return(CV_MEM_NULL); + } + + cv_mem = (CVodeMem) cvode_mem; + + SUNDIALS_MARK_FUNCTION_BEGIN(CV_PROFILER); + + retval = N_VLinearSumVectorArray(cv_mem->cv_Ns, + ONE, cv_mem->cv_znS[0], + ONE, ycorS, yS); + if (retval != CV_SUCCESS) { + SUNDIALS_MARK_FUNCTION_END(CV_PROFILER); + return(CV_VECTOROP_ERR); + } + + SUNDIALS_MARK_FUNCTION_END(CV_PROFILER); + return(CV_SUCCESS); +} + +/* + * CVodeComputeStateSens1 + * + * Computes yS[idx] based on the current prediction and given correction. + */ +int CVodeComputeStateSens1(void *cvode_mem, int idx, N_Vector ycorS1, + N_Vector yS1) +{ + CVodeMem cv_mem; + if (cvode_mem == NULL) { + cvProcessError(NULL, CV_MEM_NULL, "CVODES", "CVodeComputeStateSens1", + MSGCV_NO_MEM); + return(CV_MEM_NULL); + } + + cv_mem = (CVodeMem) cvode_mem; + + SUNDIALS_MARK_FUNCTION_BEGIN(CV_PROFILER); + + N_VLinearSum(ONE, cv_mem->cv_znS[0][idx], ONE, ycorS1, yS1); + + SUNDIALS_MARK_FUNCTION_END(CV_PROFILER); + return(CV_SUCCESS); } + /* * ----------------------------------------------------------------- * Interpolated output and extraction functions @@ -3411,13 +3467,17 @@ int CVodeGetDky(void *cvode_mem, realtype t, int k, N_Vector dky) } cv_mem = (CVodeMem) cvode_mem; + SUNDIALS_MARK_FUNCTION_BEGIN(CV_PROFILER); + if (dky == NULL) { cvProcessError(cv_mem, CV_BAD_DKY, "CVODES", "CVodeGetDky", MSGCV_NULL_DKY); + SUNDIALS_MARK_FUNCTION_END(CV_PROFILER); return(CV_BAD_DKY); } if ((k < 0) || (k > cv_mem->cv_q)) { cvProcessError(cv_mem, CV_BAD_K, "CVODES", "CVodeGetDky", MSGCV_BAD_K); + SUNDIALS_MARK_FUNCTION_END(CV_PROFILER); return(CV_BAD_K); } @@ -3430,6 +3490,7 @@ int CVodeGetDky(void *cvode_mem, realtype t, int k, N_Vector dky) if ((t-tp)*(t-tn1) > ZERO) { cvProcessError(cv_mem, CV_BAD_T, "CVODES", "CVodeGetDky", MSGCV_BAD_T, t, cv_mem->cv_tn-cv_mem->cv_hu, cv_mem->cv_tn); + SUNDIALS_MARK_FUNCTION_END(CV_PROFILER); return(CV_BAD_T); } @@ -3447,13 +3508,20 @@ int CVodeGetDky(void *cvode_mem, realtype t, int k, N_Vector dky) nvec += 1; } ier = N_VLinearCombination(nvec, cv_mem->cv_cvals, cv_mem->cv_Xvecs, dky); - if (ier != CV_SUCCESS) return (CV_VECTOROP_ERR); + if (ier != CV_SUCCESS) { + SUNDIALS_MARK_FUNCTION_END(CV_PROFILER); + return(CV_VECTOROP_ERR); + } - if (k == 0) return(CV_SUCCESS); + if (k == 0) { + SUNDIALS_MARK_FUNCTION_END(CV_PROFILER); + return(CV_SUCCESS); + } r = SUNRpowerI(cv_mem->cv_h, -k); N_VScale(r, dky, dky); - return(CV_SUCCESS); + SUNDIALS_MARK_FUNCTION_END(CV_PROFILER); + return(CV_SUCCESS); } /* @@ -3475,10 +3543,13 @@ int CVodeGetQuad(void *cvode_mem, realtype *tret, N_Vector yQout) } cv_mem = (CVodeMem) cvode_mem; + SUNDIALS_MARK_FUNCTION_BEGIN(CV_PROFILER); + *tret = cv_mem->cv_tretlast; flag = CVodeGetQuadDky(cvode_mem,cv_mem->cv_tretlast,0,yQout); + SUNDIALS_MARK_FUNCTION_END(CV_PROFILER); return(flag); } @@ -3511,18 +3582,23 @@ int CVodeGetQuadDky(void *cvode_mem, realtype t, int k, N_Vector dkyQ) } cv_mem = (CVodeMem) cvode_mem; + SUNDIALS_MARK_FUNCTION_BEGIN(CV_PROFILER); + if(cv_mem->cv_quadr != SUNTRUE) { cvProcessError(cv_mem, CV_NO_QUAD, "CVODES", "CVodeGetQuadDky", MSGCV_NO_QUAD); + SUNDIALS_MARK_FUNCTION_END(CV_PROFILER); return(CV_NO_QUAD); } if (dkyQ == NULL) { cvProcessError(cv_mem, CV_BAD_DKY, "CVODES", "CVodeGetQuadDky", MSGCV_NULL_DKY); + SUNDIALS_MARK_FUNCTION_END(CV_PROFILER); return(CV_BAD_DKY); } if ((k < 0) || (k > cv_mem->cv_q)) { cvProcessError(cv_mem, CV_BAD_K, "CVODES", "CVodeGetQuadDky", MSGCV_BAD_K); + SUNDIALS_MARK_FUNCTION_END(CV_PROFILER); return(CV_BAD_K); } @@ -3534,6 +3610,7 @@ int CVodeGetQuadDky(void *cvode_mem, realtype t, int k, N_Vector dkyQ) tn1 = cv_mem->cv_tn + tfuzz; if ((t-tp)*(t-tn1) > ZERO) { cvProcessError(cv_mem, CV_BAD_T, "CVODES", "CVodeGetQuadDky", MSGCV_BAD_T); + SUNDIALS_MARK_FUNCTION_END(CV_PROFILER); return(CV_BAD_T); } @@ -3551,13 +3628,20 @@ int CVodeGetQuadDky(void *cvode_mem, realtype t, int k, N_Vector dkyQ) nvec += 1; } ier = N_VLinearCombination(nvec, cv_mem->cv_cvals, cv_mem->cv_Xvecs, dkyQ); - if (ier != CV_SUCCESS) return (CV_VECTOROP_ERR); + if (ier != CV_SUCCESS) { + SUNDIALS_MARK_FUNCTION_END(CV_PROFILER); + return(CV_VECTOROP_ERR); + } - if (k == 0) return(CV_SUCCESS); + if (k == 0) { + SUNDIALS_MARK_FUNCTION_END(CV_PROFILER); + return(CV_SUCCESS); + } r = SUNRpowerI(cv_mem->cv_h, -k); N_VScale(r, dkyQ, dkyQ); - return(CV_SUCCESS); + SUNDIALS_MARK_FUNCTION_END(CV_PROFILER); + return(CV_SUCCESS); } /* @@ -3579,10 +3663,13 @@ int CVodeGetSens(void *cvode_mem, realtype *tret, N_Vector *ySout) } cv_mem = (CVodeMem) cvode_mem; + SUNDIALS_MARK_FUNCTION_BEGIN(CV_PROFILER); + *tret = cv_mem->cv_tretlast; flag = CVodeGetSensDky(cvode_mem,cv_mem->cv_tretlast,0,ySout); + SUNDIALS_MARK_FUNCTION_END(CV_PROFILER); return(flag); } @@ -3605,10 +3692,13 @@ int CVodeGetSens1(void *cvode_mem, realtype *tret, int is, N_Vector ySout) } cv_mem = (CVodeMem) cvode_mem; + SUNDIALS_MARK_FUNCTION_BEGIN(CV_PROFILER); + *tret = cv_mem->cv_tretlast; flag = CVodeGetSensDky1(cvode_mem,cv_mem->cv_tretlast,0,is,ySout); + SUNDIALS_MARK_FUNCTION_END(CV_PROFILER); return(flag); } @@ -3633,9 +3723,12 @@ int CVodeGetSensDky(void *cvode_mem, realtype t, int k, N_Vector *dkyS) } cv_mem = (CVodeMem) cvode_mem; + SUNDIALS_MARK_FUNCTION_BEGIN(CV_PROFILER); + if (dkyS == NULL) { cvProcessError(cv_mem, CV_BAD_DKY, "CVODES", "CVodeGetSensDky", MSGCV_NULL_DKYA); + SUNDIALS_MARK_FUNCTION_END(CV_PROFILER); return(CV_BAD_DKY); } @@ -3644,6 +3737,7 @@ int CVodeGetSensDky(void *cvode_mem, realtype t, int k, N_Vector *dkyS) if (ier!=CV_SUCCESS) break; } + SUNDIALS_MARK_FUNCTION_END(CV_PROFILER); return(ier); } @@ -3677,27 +3771,33 @@ int CVodeGetSensDky1(void *cvode_mem, realtype t, int k, int is, N_Vector dkyS) } cv_mem = (CVodeMem) cvode_mem; + SUNDIALS_MARK_FUNCTION_BEGIN(CV_PROFILER); + if(cv_mem->cv_sensi != SUNTRUE) { cvProcessError(cv_mem, CV_NO_SENS, "CVODES", "CVodeGetSensDky1", MSGCV_NO_SENSI); + SUNDIALS_MARK_FUNCTION_END(CV_PROFILER); return(CV_NO_SENS); } if (dkyS == NULL) { cvProcessError(cv_mem, CV_BAD_DKY, "CVODES", "CVodeGetSensDky1", MSGCV_NULL_DKY); + SUNDIALS_MARK_FUNCTION_END(CV_PROFILER); return(CV_BAD_DKY); } if ((k < 0) || (k > cv_mem->cv_q)) { cvProcessError(cv_mem, CV_BAD_K, "CVODES", "CVodeGetSensDky1", MSGCV_BAD_K); + SUNDIALS_MARK_FUNCTION_END(CV_PROFILER); return(CV_BAD_K); } if ((is < 0) || (is > cv_mem->cv_Ns-1)) { cvProcessError(cv_mem, CV_BAD_IS, "CVODES", "CVodeGetSensDky1", MSGCV_BAD_IS); + SUNDIALS_MARK_FUNCTION_END(CV_PROFILER); return(CV_BAD_IS); } @@ -3710,6 +3810,7 @@ int CVodeGetSensDky1(void *cvode_mem, realtype t, int k, int is, N_Vector dkyS) if ((t-tp)*(t-tn1) > ZERO) { cvProcessError(cv_mem, CV_BAD_T, "CVODES", "CVodeGetSensDky1", MSGCV_BAD_T); + SUNDIALS_MARK_FUNCTION_END(CV_PROFILER); return(CV_BAD_T); } @@ -3727,13 +3828,20 @@ int CVodeGetSensDky1(void *cvode_mem, realtype t, int k, int is, N_Vector dkyS) nvec += 1; } ier = N_VLinearCombination(nvec, cv_mem->cv_cvals, cv_mem->cv_Xvecs, dkyS); - if (ier != CV_SUCCESS) return (CV_VECTOROP_ERR); + if (ier != CV_SUCCESS) { + SUNDIALS_MARK_FUNCTION_END(CV_PROFILER); + return(CV_VECTOROP_ERR); + } - if (k == 0) return(CV_SUCCESS); + if (k == 0) { + SUNDIALS_MARK_FUNCTION_END(CV_PROFILER); + return(CV_SUCCESS); + } r = SUNRpowerI(cv_mem->cv_h, -k); N_VScale(r, dkyS, dkyS); - return(CV_SUCCESS); + SUNDIALS_MARK_FUNCTION_END(CV_PROFILER); + return(CV_SUCCESS); } /* @@ -3755,10 +3863,13 @@ int CVodeGetQuadSens(void *cvode_mem, realtype *tret, N_Vector *yQSout) } cv_mem = (CVodeMem) cvode_mem; + SUNDIALS_MARK_FUNCTION_BEGIN(CV_PROFILER); + *tret = cv_mem->cv_tretlast; flag = CVodeGetQuadSensDky(cvode_mem,cv_mem->cv_tretlast,0,yQSout); + SUNDIALS_MARK_FUNCTION_END(CV_PROFILER); return(flag); } @@ -3774,10 +3885,13 @@ int CVodeGetQuadSens1(void *cvode_mem, realtype *tret, int is, N_Vector yQSout) } cv_mem = (CVodeMem) cvode_mem; + SUNDIALS_MARK_FUNCTION_BEGIN(CV_PROFILER); + *tret = cv_mem->cv_tretlast; flag = CVodeGetQuadSensDky1(cvode_mem,cv_mem->cv_tretlast,0,is,yQSout); + SUNDIALS_MARK_FUNCTION_END(CV_PROFILER); return(flag); } @@ -3801,9 +3915,12 @@ int CVodeGetQuadSensDky(void *cvode_mem, realtype t, int k, N_Vector *dkyQS_all) } cv_mem = (CVodeMem) cvode_mem; + SUNDIALS_MARK_FUNCTION_BEGIN(CV_PROFILER); + if (dkyQS_all == NULL) { cvProcessError(cv_mem, CV_BAD_DKY, "CVODES", "CVodeGetSensDky", MSGCV_NULL_DKYA); + SUNDIALS_MARK_FUNCTION_END(CV_PROFILER); return(CV_BAD_DKY); } @@ -3812,6 +3929,7 @@ int CVodeGetQuadSensDky(void *cvode_mem, realtype t, int k, N_Vector *dkyQS_all) if (ier!=CV_SUCCESS) break; } + SUNDIALS_MARK_FUNCTION_END(CV_PROFILER); return(ier); } @@ -3831,27 +3949,33 @@ int CVodeGetQuadSensDky1(void *cvode_mem, realtype t, int k, int is, N_Vector dk } cv_mem = (CVodeMem) cvode_mem; + SUNDIALS_MARK_FUNCTION_BEGIN(CV_PROFILER); + if(cv_mem->cv_quadr_sensi != SUNTRUE) { cvProcessError(cv_mem, CV_NO_QUADSENS, "CVODES", "CVodeGetQuadSensDky1", MSGCV_NO_QUADSENSI); + SUNDIALS_MARK_FUNCTION_END(CV_PROFILER); return(CV_NO_QUADSENS); } if (dkyQS == NULL) { cvProcessError(cv_mem, CV_BAD_DKY, "CVODES", "CVodeGetQuadSensDky1", MSGCV_NULL_DKY); + SUNDIALS_MARK_FUNCTION_END(CV_PROFILER); return(CV_BAD_DKY); } if ((k < 0) || (k > cv_mem->cv_q)) { cvProcessError(cv_mem, CV_BAD_K, "CVODES", "CVodeGetQuadSensDky1", MSGCV_BAD_K); + SUNDIALS_MARK_FUNCTION_END(CV_PROFILER); return(CV_BAD_K); } if ((is < 0) || (is > cv_mem->cv_Ns-1)) { cvProcessError(cv_mem, CV_BAD_IS, "CVODES", "CVodeGetQuadSensDky1", MSGCV_BAD_IS); + SUNDIALS_MARK_FUNCTION_END(CV_PROFILER); return(CV_BAD_IS); } @@ -3864,6 +3988,7 @@ int CVodeGetQuadSensDky1(void *cvode_mem, realtype t, int k, int is, N_Vector dk if ((t-tp)*(t-tn1) > ZERO) { cvProcessError(cv_mem, CV_BAD_T, "CVODES", "CVodeGetQuadSensDky1", MSGCV_BAD_T); + SUNDIALS_MARK_FUNCTION_END(CV_PROFILER); return(CV_BAD_T); } @@ -3881,13 +4006,20 @@ int CVodeGetQuadSensDky1(void *cvode_mem, realtype t, int k, int is, N_Vector dk nvec += 1; } ier = N_VLinearCombination(nvec, cv_mem->cv_cvals, cv_mem->cv_Xvecs, dkyQS); - if (ier != CV_SUCCESS) return (CV_VECTOROP_ERR); + if (ier != CV_SUCCESS) { + SUNDIALS_MARK_FUNCTION_END(CV_PROFILER); + return(CV_VECTOROP_ERR); + } - if (k == 0) return(CV_SUCCESS); + if (k == 0) { + SUNDIALS_MARK_FUNCTION_END(CV_PROFILER); + return(CV_SUCCESS); + } r = SUNRpowerI(cv_mem->cv_h, -k); N_VScale(r, dkyQS, dkyQS); - return(CV_SUCCESS); + SUNDIALS_MARK_FUNCTION_END(CV_PROFILER); + return(CV_SUCCESS); } /* @@ -3916,6 +4048,7 @@ void CVodeFree(void **cvode_mem) cvFreeVectors(cv_mem); + /* if CVODE created the nonlinear solver object then free it */ if (cv_mem->ownNLS) { SUNNonlinSolFree(cv_mem->NLS); cv_mem->ownNLS = SUNFALSE; @@ -3934,8 +4067,8 @@ void CVodeFree(void **cvode_mem) if (cv_mem->cv_nrtfn > 0) { free(cv_mem->cv_glo); cv_mem->cv_glo = NULL; - free(cv_mem->cv_ghi); cv_mem->cv_ghi = NULL; - free(cv_mem->cv_grout); cv_mem->cv_grout = NULL; + free(cv_mem->cv_ghi); cv_mem->cv_ghi = NULL; + free(cv_mem->cv_grout); cv_mem->cv_grout = NULL; free(cv_mem->cv_iroots); cv_mem->cv_iroots = NULL; free(cv_mem->cv_rootdir); cv_mem->cv_rootdir = NULL; free(cv_mem->cv_gactive); cv_mem->cv_gactive = NULL; @@ -3945,6 +4078,10 @@ void CVodeFree(void **cvode_mem) free(cv_mem->cv_Xvecs); cv_mem->cv_Xvecs = NULL; free(cv_mem->cv_Zvecs); cv_mem->cv_Zvecs = NULL; + if (cv_mem->proj_mem) { + cvProjFree(&(cv_mem->proj_mem)); + } + free(*cvode_mem); *cvode_mem = NULL; } @@ -3991,6 +4128,7 @@ void CVodeSensFree(void *cvode_mem) free(cv_mem->cv_ncfS1); cv_mem->cv_ncfS1 = NULL; free(cv_mem->cv_ncfnS1); cv_mem->cv_ncfnS1 = NULL; free(cv_mem->cv_nniS1); cv_mem->cv_nniS1 = NULL; + free(cv_mem->cv_nnfS1); cv_mem->cv_nnfS1 = NULL; cv_mem->cv_stgr1alloc = SUNFALSE; } cvSensFreeVectors(cv_mem); @@ -4029,6 +4167,11 @@ void CVodeSensFree(void *cvode_mem) cv_mem->NLSstg1 = NULL; } + /* free min atol array if necessary */ + if (cv_mem->cv_atolSmin0) { + free(cv_mem->cv_atolSmin0); + cv_mem->cv_atolSmin0 = NULL; + } } /* @@ -4051,12 +4194,18 @@ void CVodeQuadSensFree(void *cvode_mem) cv_mem->cv_QuadSensMallocDone = SUNFALSE; cv_mem->cv_quadr_sensi = SUNFALSE; } + + /* free min atol array if necessary */ + if (cv_mem->cv_atolQSmin0) { + free(cv_mem->cv_atolQSmin0); + cv_mem->cv_atolQSmin0 = NULL; + } } /* * ================================================================= - * PRIVATE FUNCTIONS + * Private Functions Implementation * ================================================================= */ @@ -4193,7 +4342,7 @@ static booleantype cvAllocVectors(CVodeMem cv_mem, N_Vector tmpl) /* * cvFreeVectors * - * This routine frees the CVODES vectors allocated in cvAllocVectors. + * This routine frees the vectors allocated in cvAllocVectors. */ static void cvFreeVectors(CVodeMem cv_mem) @@ -4589,38 +4738,283 @@ static void cvQuadSensFreeVectors(CVodeMem cv_mem) /* * ----------------------------------------------------------------- - * Initial stepsize calculation + * Initial setup * ----------------------------------------------------------------- */ + /* - * cvHin - * - * This routine computes a tentative initial step size h0. - * If tout is too close to tn (= t0), then cvHin returns CV_TOO_CLOSE - * and h remains uninitialized. Note that here tout is either the value - * passed to CVode at the first call or the value of tstop (if tstop is - * enabled and it is closer to t0=tn than tout). - * If any RHS function fails unrecoverably, cvHin returns CV_*RHSFUNC_FAIL. - * If any RHS function fails recoverably too many times and recovery is - * not possible, cvHin returns CV_REPTD_*RHSFUNC_ERR. - * Otherwise, cvHin sets h to the chosen value h0 and returns CV_SUCCESS. - * - * The algorithm used seeks to find h0 as a solution of - * (WRMS norm of (h0^2 ydd / 2)) = 1, - * where ydd = estimated second derivative of y. Here, y includes - * all variables considered in the error test. - * - * We start with an initial estimate equal to the geometric mean of the - * lower and upper bounds on the step size. - * - * Loop up to MAX_ITERS times to find h0. - * Stop if new and previous values differ by a factor < 2. - * Stop if hnew/hg > 2 after one iteration, as this probably means - * that the ydd value is bad because of cancellation error. + * cvInitialSetup * - * For each new proposed hg, we allow MAX_ITERS attempts to - * resolve a possible recoverable failure from f() by reducing + * This routine performs input consistency checks at the first step. + * If needed, it also checks the linear solver module and calls the + * linear solver initialization routine. + */ + +static int cvInitialSetup(CVodeMem cv_mem) +{ + int ier; + booleantype conOK; + + /* Did the user specify tolerances? */ + if (cv_mem->cv_itol == CV_NN) { + cvProcessError(cv_mem, CV_ILL_INPUT, "CVODES", "cvInitialSetup", + MSGCV_NO_TOL); + return(CV_ILL_INPUT); + } + + /* If using a built-in routine for error weights with abstol==0, + ensure that N_VMin is available */ + if ((!cv_mem->cv_user_efun) && (cv_mem->cv_atolmin0) && (!cv_mem->cv_tempv->ops->nvmin)) { + cvProcessError(cv_mem, CV_ILL_INPUT, "CVODES", "cvInitialSetup", + "Missing N_VMin routine from N_Vector"); + return(CV_ILL_INPUT); + } + + /* Set data for efun */ + if (cv_mem->cv_user_efun) cv_mem->cv_e_data = cv_mem->cv_user_data; + else cv_mem->cv_e_data = cv_mem; + + /* Check to see if y0 satisfies constraints */ + if (cv_mem->cv_constraintsSet) { + + if (cv_mem->cv_sensi && (cv_mem->cv_ism==CV_SIMULTANEOUS)) { + cvProcessError(cv_mem, CV_ILL_INPUT, "CVODES", "cvInitialSetup", MSGCV_BAD_ISM_CONSTR); + return(CV_ILL_INPUT); + } + + conOK = N_VConstrMask(cv_mem->cv_constraints, cv_mem->cv_zn[0], cv_mem->cv_tempv); + if (!conOK) { + cvProcessError(cv_mem, CV_ILL_INPUT, "CVODES", "cvInitialSetup", + MSGCV_Y0_FAIL_CONSTR); + return(CV_ILL_INPUT); + } + } + + /* Load initial error weights */ + ier = cv_mem->cv_efun(cv_mem->cv_zn[0], cv_mem->cv_ewt, cv_mem->cv_e_data); + if (ier != 0) { + if (cv_mem->cv_itol == CV_WF) + cvProcessError(cv_mem, CV_ILL_INPUT, "CVODES", "cvInitialSetup", + MSGCV_EWT_FAIL); + else + cvProcessError(cv_mem, CV_ILL_INPUT, "CVODES", "cvInitialSetup", + MSGCV_BAD_EWT); + return(CV_ILL_INPUT); + } + + /* Quadrature initial setup */ + + if (cv_mem->cv_quadr && cv_mem->cv_errconQ) { + + /* Did the user specify tolerances? */ + if (cv_mem->cv_itolQ == CV_NN) { + cvProcessError(cv_mem, CV_ILL_INPUT, "CVODES", "cvInitialSetup", + MSGCV_NO_TOLQ); + return(CV_ILL_INPUT); + } + + /* Load ewtQ */ + ier = cvQuadEwtSet(cv_mem, cv_mem->cv_znQ[0], cv_mem->cv_ewtQ); + if (ier != 0) { + cvProcessError(cv_mem, CV_ILL_INPUT, "CVODES", "cvInitialSetup", + MSGCV_BAD_EWTQ); + return(CV_ILL_INPUT); + } + + } + + if (!cv_mem->cv_quadr) cv_mem->cv_errconQ = SUNFALSE; + + /* Forward sensitivity initial setup */ + + if (cv_mem->cv_sensi) { + + /* Did the user specify tolerances? */ + if (cv_mem->cv_itolS == CV_NN) { + cvProcessError(cv_mem, CV_ILL_INPUT, "CVODES", "cvInitialSetup", + MSGCV_NO_TOLS); + return(CV_ILL_INPUT); + } + + /* If using the internal DQ functions, we must have access to the problem parameters */ + if(cv_mem->cv_fSDQ && (cv_mem->cv_p == NULL)) { + cvProcessError(cv_mem, CV_ILL_INPUT, "CVODES", "cvInitialSetup", + MSGCV_NULL_P); + return(CV_ILL_INPUT); + } + + /* Load ewtS */ + ier = cvSensEwtSet(cv_mem, cv_mem->cv_znS[0], cv_mem->cv_ewtS); + if (ier != 0) { + cvProcessError(cv_mem, CV_ILL_INPUT, "CVODES", "cvInitialSetup", + MSGCV_BAD_EWTS); + return(CV_ILL_INPUT); + } + + } + + /* FSA of quadrature variables */ + + if (cv_mem->cv_quadr_sensi) { + + /* If using the internal DQ functions, we must have access to fQ + * (i.e. quadrature integration must be enabled) and to the problem parameters */ + + if (cv_mem->cv_fQSDQ) { + + /* Test if quadratures are defined, so we can use fQ */ + if (!cv_mem->cv_quadr) { + cvProcessError(cv_mem, CV_ILL_INPUT, "CVODES", "cvInitialSetup", + MSGCV_NULL_FQ); + return(CV_ILL_INPUT); + } + + /* Test if we have the problem parameters */ + if(cv_mem->cv_p == NULL) { + cvProcessError(cv_mem, CV_ILL_INPUT, "CVODES", "cvInitialSetup", + MSGCV_NULL_P); + return(CV_ILL_INPUT); + } + + } + + if (cv_mem->cv_errconQS) { + + /* Did the user specify tolerances? */ + if (cv_mem->cv_itolQS == CV_NN) { + cvProcessError(cv_mem, CV_ILL_INPUT, "CVODES", "cvInitialSetup", + MSGCV_NO_TOLQS); + return(CV_ILL_INPUT); + } + + /* If needed, did the user provide quadrature tolerances? */ + if ( (cv_mem->cv_itolQS == CV_EE) && (cv_mem->cv_itolQ == CV_NN) ) { + cvProcessError(cv_mem, CV_ILL_INPUT, "CVODES", "cvInitialSetup", + MSGCV_NO_TOLQ); + return(CV_ILL_INPUT); + } + + /* Load ewtQS */ + ier = cvQuadSensEwtSet(cv_mem, cv_mem->cv_znQS[0], cv_mem->cv_ewtQS); + if (ier != 0) { + cvProcessError(cv_mem, CV_ILL_INPUT, "CVODES", "cvInitialSetup", + MSGCV_BAD_EWTQS); + return(CV_ILL_INPUT); + } + + } + + } else { + + cv_mem->cv_errconQS = SUNFALSE; + + } + + /* Call linit function (if it exists) */ + if (cv_mem->cv_linit != NULL) { + ier = cv_mem->cv_linit(cv_mem); + if (ier != 0) { + cvProcessError(cv_mem, CV_LINIT_FAIL, "CVODES", "cvInitialSetup", + MSGCV_LINIT_FAIL); + return(CV_LINIT_FAIL); + } + } + + /* Initialize the nonlinear solver (must occur after linear solver is + initialized) so that lsetup and lsolve pointer have been set */ + + /* always initialize the ODE NLS in case the user disables sensitivities */ + ier = cvNlsInit(cv_mem); + if (ier != 0) { + cvProcessError(cv_mem, CV_NLS_INIT_FAIL, "CVODES", "cvInitialSetup", + MSGCV_NLS_INIT_FAIL); + return(CV_NLS_INIT_FAIL); + } + + if (cv_mem->NLSsim != NULL) { + ier = cvNlsInitSensSim(cv_mem); + if (ier != 0) { + cvProcessError(cv_mem, CV_NLS_INIT_FAIL, "CVODES", + "cvInitialSetup", MSGCV_NLS_INIT_FAIL); + return(CV_NLS_INIT_FAIL); + } + } + + if (cv_mem->NLSstg != NULL) { + ier = cvNlsInitSensStg(cv_mem); + if (ier != 0) { + cvProcessError(cv_mem, CV_NLS_INIT_FAIL, "CVODES", + "cvInitialSetup", MSGCV_NLS_INIT_FAIL); + return(CV_NLS_INIT_FAIL); + } + } + + if (cv_mem->NLSstg1 != NULL) { + ier = cvNlsInitSensStg1(cv_mem); + if (ier != 0) { + cvProcessError(cv_mem, CV_NLS_INIT_FAIL, "CVODES", + "cvInitialSetup", MSGCV_NLS_INIT_FAIL); + return(CV_NLS_INIT_FAIL); + } + } + + /* Initialize projection data */ + if (cv_mem->proj_enabled && cv_mem->proj_mem == NULL) { + cvProcessError(cv_mem, CV_PROJ_MEM_NULL, "CVODE", + "cvInitialSetup", MSG_CV_PROJ_MEM_NULL); + return(CV_PROJ_MEM_NULL); + } + + if (cv_mem->proj_mem != NULL) { + ier = cvProjInit(cv_mem->proj_mem); + if (ier != CV_SUCCESS) { + cvProcessError(cv_mem, CV_MEM_FAIL, "CVODE", "cvInitialSetup", + MSGCV_MEM_FAIL); + return(CV_MEM_FAIL); + } + cv_mem->proj_applied = SUNFALSE; + } + + /* Initial setup complete */ + return(CV_SUCCESS); +} + + +/* + * ----------------------------------------------------------------- + * Initial stepsize calculation + * ----------------------------------------------------------------- + */ + +/* + * cvHin + * + * This routine computes a tentative initial step size h0. + * If tout is too close to tn (= t0), then cvHin returns CV_TOO_CLOSE + * and h remains uninitialized. Note that here tout is either the value + * passed to CVode at the first call or the value of tstop (if tstop is + * enabled and it is closer to t0=tn than tout). + * If any RHS function fails unrecoverably, cvHin returns CV_*RHSFUNC_FAIL. + * If any RHS function fails recoverably too many times and recovery is + * not possible, cvHin returns CV_REPTD_*RHSFUNC_ERR. + * Otherwise, cvHin sets h to the chosen value h0 and returns CV_SUCCESS. + * + * The algorithm used seeks to find h0 as a solution of + * (WRMS norm of (h0^2 ydd / 2)) = 1, + * where ydd = estimated second derivative of y. Here, y includes + * all variables considered in the error test. + * + * We start with an initial estimate equal to the geometric mean of the + * lower and upper bounds on the step size. + * + * Loop up to MAX_ITERS times to find h0. + * Stop if new and previous values differ by a factor < 2. + * Stop if hnew/hg > 2 after one iteration, as this probably means + * that the ydd value is bad because of cancellation error. + * + * For each new proposed hg, we allow MAX_ITERS attempts to + * resolve a possible recoverable failure from f() by reducing * the proposed stepsize by a factor of 0.2. If a legal stepsize * still cannot be found, fall back on a previous value if possible, * or else return CV_REPTD_RHSFUNC_ERR. @@ -4679,7 +5073,7 @@ static int cvHin(CVodeMem cv_mem, realtype tout) if (retval < 0) return(retval); /* If successful, we can use ydd */ if (retval == CV_SUCCESS) {hgOK = SUNTRUE; break;} - /* A RHS function failed recoverably; cut step size and test it again */ + /* A RHS function failed recoverably; cut step size and test again */ hg *= POINT2; } @@ -4849,7 +5243,7 @@ static realtype cvUpperBoundH0(CVodeMem cv_mem, realtype tdist) hub = HUB_FACTOR*tdist; - /* Use the smaler of the two */ + /* Use the smaller of the two */ if (hub*hub_inv > ONE) hub = ONE/hub_inv; @@ -4961,3390 +5355,3358 @@ static int cvYddNorm(CVodeMem cv_mem, realtype hg, realtype *yddnrm) return(CV_SUCCESS); } + /* * ----------------------------------------------------------------- - * Initial setup + * Main cvStep function * ----------------------------------------------------------------- */ /* - * cvInitialSetup + * cvStep * - * This routine performs input consistency checks at the first step. - * If needed, it also checks the linear solver module and calls the - * linear solver initialization routine. + * This routine performs one internal cvode step, from tn to tn + h. + * It calls other routines to do all the work. + * + * The main operations done here are as follows: + * - preliminary adjustments if a new step size was chosen; + * - prediction of the Nordsieck history array zn at tn + h; + * - setting of multistep method coefficients and test quantities; + * - solution of the nonlinear system; + * - testing the local error; + * - updating zn and other state data if successful; + * - resetting stepsize and order for the next step. + * - if SLDET is on, check for stability, reduce order if necessary. + * On a failure in the nonlinear system solution or error test, the + * step may be reattempted, depending on the nature of the failure. */ -static int cvInitialSetup(CVodeMem cv_mem) +static int cvStep(CVodeMem cv_mem) { - int ier; - booleantype conOK; - - /* Did the user specify tolerances? */ - if (cv_mem->cv_itol == CV_NN) { - cvProcessError(cv_mem, CV_ILL_INPUT, "CVODES", "cvInitialSetup", - MSGCV_NO_TOL); - return(CV_ILL_INPUT); - } + realtype saved_t; /* time to restore to if a failure occurs */ + realtype dsm; /* local truncation error estimate */ + realtype dsmQ; /* quadrature error estimate */ + realtype dsmS; /* sensitivity error estimate */ + realtype dsmQS; /* quadrature sensitivity error estimate */ + int ncf; /* corrector failures in this step attempt */ + int ncfS; /* sensitivity corrector failures */ + int npf; /* projection failures in this step attempt */ + int nef; /* error test failures in this step attempt */ + int nefQ; /* quadrature error test fails */ + int nefS; /* sensitivity error test fails */ + int nefQS; /* quadrature sensitivity error test fails */ + int nflag, kflag; /* nonlinear solver flags */ + int pflag; /* projection return flag */ + int eflag; /* error test return flag */ + int retval, is; + booleantype doProjection; /* flag to apply projection in this step */ + booleantype do_sensi_stg; /* staggered strategy */ + booleantype do_sensi_stg1; /* staggered 1 strategy */ - /* Set data for efun */ - if (cv_mem->cv_user_efun) cv_mem->cv_e_data = cv_mem->cv_user_data; - else cv_mem->cv_e_data = cv_mem; + /* Are we computing sensitivities with a staggered approach? */ - /* Check to see if y0 satisfies constraints */ - if (cv_mem->cv_constraintsSet) { + do_sensi_stg = (cv_mem->cv_sensi && (cv_mem->cv_ism==CV_STAGGERED)); + do_sensi_stg1 = (cv_mem->cv_sensi && (cv_mem->cv_ism==CV_STAGGERED1)); - if (cv_mem->cv_sensi && (cv_mem->cv_ism==CV_SIMULTANEOUS)) { - cvProcessError(cv_mem, CV_ILL_INPUT, "CVODES", "cvInitialSetup", MSGCV_BAD_ISM_CONSTR); - return(CV_ILL_INPUT); - } + /* Initialize local counters for convergence and error test failures */ - conOK = N_VConstrMask(cv_mem->cv_constraints, cv_mem->cv_zn[0], cv_mem->cv_tempv); - if (!conOK) { - cvProcessError(cv_mem, CV_ILL_INPUT, "CVODES", "cvInitialSetup", MSGCV_Y0_FAIL_CONSTR); - return(CV_ILL_INPUT); - } + ncf = npf = nef = 0; + nefQ = nefQS = 0; + ncfS = nefS = 0; + if (do_sensi_stg1) { + for (is=0; iscv_Ns; is++) + cv_mem->cv_ncfS1[is] = 0; } - /* Load initial error weights */ - ier = cv_mem->cv_efun(cv_mem->cv_zn[0], cv_mem->cv_ewt, - cv_mem->cv_e_data); - if (ier != 0) { - if (cv_mem->cv_itol == CV_WF) - cvProcessError(cv_mem, CV_ILL_INPUT, "CVODES", "cvInitialSetup", - MSGCV_EWT_FAIL); - else - cvProcessError(cv_mem, CV_ILL_INPUT, "CVODES", "cvInitialSetup", - MSGCV_BAD_EWT); - return(CV_ILL_INPUT); + /* If the step size has changed, update the history array */ + if ((cv_mem->cv_nst > 0) && (cv_mem->cv_hprime != cv_mem->cv_h)) { + cvAdjustParams(cv_mem); } - /* Quadrature initial setup */ + /* Check if this step should be projected */ + doProjection = SUNFALSE; + if (cv_mem->proj_enabled) + doProjection = cv_mem->proj_mem->freq > 0 && + (cv_mem->cv_nst == 0 || (cv_mem->cv_nst >= cv_mem->proj_mem->nstlprj + + cv_mem->proj_mem->freq)); - if (cv_mem->cv_quadr && cv_mem->cv_errconQ) { + /* Looping point for attempts to take a step */ - /* Did the user specify tolerances? */ - if (cv_mem->cv_itolQ == CV_NN) { - cvProcessError(cv_mem, CV_ILL_INPUT, "CVODES", "cvInitialSetup", - MSGCV_NO_TOLQ); - return(CV_ILL_INPUT); - } + saved_t = cv_mem->cv_tn; + nflag = FIRST_CALL; - /* Load ewtQ */ - ier = cvQuadEwtSet(cv_mem, cv_mem->cv_znQ[0], cv_mem->cv_ewtQ); - if (ier != 0) { - cvProcessError(cv_mem, CV_ILL_INPUT, "CVODES", "cvInitialSetup", - MSGCV_BAD_EWTQ); - return(CV_ILL_INPUT); - } + for(;;) { - } +#if SUNDIALS_LOGGING_LEVEL >= SUNDIALS_LOGGING_DEBUG + SUNLogger_QueueMsg(CV_LOGGER, SUN_LOGLEVEL_DEBUG, + "CVODES::cvStep", "enter-step-attempt-loop", + "step = %li, h = %.16g, q = %d, t_n = %.16g", + cv_mem->cv_nst, cv_mem->cv_next_h, cv_mem->cv_next_q, cv_mem->cv_tn); +#endif - if (!cv_mem->cv_quadr) cv_mem->cv_errconQ = SUNFALSE; + cvPredict(cv_mem); + cvSet(cv_mem); - /* Forward sensitivity initial setup */ + /* ------ Correct state variables ------ */ - if (cv_mem->cv_sensi) { + nflag = cvNls(cv_mem, nflag); + kflag = cvHandleNFlag(cv_mem, &nflag, saved_t, &ncf, &(cv_mem->cv_ncfn)); - /* Did the user specify tolerances? */ - if (cv_mem->cv_itolS == CV_NN) { - cvProcessError(cv_mem, CV_ILL_INPUT, "CVODES", "cvInitialSetup", - MSGCV_NO_TOLS); - return(CV_ILL_INPUT); - } + /* Go back in loop if we need to predict again (nflag=PREV_CONV_FAIL) */ + if (kflag == PREDICT_AGAIN) continue; - /* If using the internal DQ functions, we must have access to the problem parameters */ - if(cv_mem->cv_fSDQ && (cv_mem->cv_p == NULL)) { - cvProcessError(cv_mem, CV_ILL_INPUT, "CVODES", "cvInitialSetup", - MSGCV_NULL_P); - return(CV_ILL_INPUT); - } + /* Return if nonlinear solve failed and recovery is not possible. */ + if (kflag != DO_ERROR_TEST) return(kflag); - /* Load ewtS */ - ier = cvSensEwtSet(cv_mem, cv_mem->cv_znS[0], cv_mem->cv_ewtS); - if (ier != 0) { - cvProcessError(cv_mem, CV_ILL_INPUT, "CVODES", "cvInitialSetup", - MSGCV_BAD_EWTS); - return(CV_ILL_INPUT); + /* Check if a projection needs to be performed */ + cv_mem->proj_applied = SUNFALSE; + + if (doProjection) { + + /* Perform projection (nflag=CV_SUCCESS) */ + pflag = cvDoProjection(cv_mem, &nflag, saved_t, &npf); + + /* Go back in loop if we need to predict again (nflag=PREV_PROJ_FAIL) */ + if (pflag == PREDICT_AGAIN) continue; + + /* Return if projection failed and recovery is not possible */ + if (pflag != CV_SUCCESS) return(pflag); } - } + /* Perform error test (nflag=CV_SUCCESS) */ + eflag = cvDoErrorTest(cv_mem, &nflag, saved_t, cv_mem->cv_acnrm, + &nef, &(cv_mem->cv_netf), &dsm); - /* FSA of quadrature variables */ + /* Go back in loop if we need to predict again (nflag=PREV_ERR_FAIL) */ + if (eflag == TRY_AGAIN) continue; - if (cv_mem->cv_quadr_sensi) { + /* Return if error test failed and recovery is not possible. */ + if (eflag != CV_SUCCESS) return(eflag); - /* If using the internal DQ functions, we must have access to fQ - * (i.e. quadrature integration must be enabled) and to the problem parameters */ + /* Error test passed (eflag=CV_SUCCESS, nflag=CV_SUCCESS), go on */ - if (cv_mem->cv_fQSDQ) { + /* ------ Correct the quadrature variables ------ */ - /* Test if quadratures are defined, so we can use fQ */ - if (!cv_mem->cv_quadr) { - cvProcessError(cv_mem, CV_ILL_INPUT, "CVODES", "cvInitialSetup", - MSGCV_NULL_FQ); - return(CV_ILL_INPUT); - } + if (cv_mem->cv_quadr) { - /* Test if we have the problem parameters */ - if(cv_mem->cv_p == NULL) { - cvProcessError(cv_mem, CV_ILL_INPUT, "CVODES", "cvInitialSetup", - MSGCV_NULL_P); - return(CV_ILL_INPUT); + ncf = nef = 0; /* reset counters for states */ + + nflag = cvQuadNls(cv_mem); + kflag = cvHandleNFlag(cv_mem, &nflag, saved_t, &ncf, &(cv_mem->cv_ncfn)); + + if (kflag == PREDICT_AGAIN) continue; + if (kflag != DO_ERROR_TEST) return(kflag); + + /* Error test on quadratures */ + if (cv_mem->cv_errconQ) { + cv_mem->cv_acnrmQ = N_VWrmsNorm(cv_mem->cv_acorQ, cv_mem->cv_ewtQ); + eflag = cvDoErrorTest(cv_mem, &nflag, saved_t, cv_mem->cv_acnrmQ, + &nefQ, &(cv_mem->cv_netfQ), &dsmQ); + + if (eflag == TRY_AGAIN) continue; + if (eflag != CV_SUCCESS) return(eflag); + + /* Set dsm = max(dsm, dsmQ) to be used in cvPrepareNextStep */ + if (dsmQ > dsm) dsm = dsmQ; } } - if (cv_mem->cv_errconQS) { + /* ------ Correct the sensitivity variables (STAGGERED or STAGGERED1) ------- */ - /* Did the user specify tolerances? */ - if (cv_mem->cv_itolQS == CV_NN) { - cvProcessError(cv_mem, CV_ILL_INPUT, "CVODES", "cvInitialSetup", - MSGCV_NO_TOLQS); - return(CV_ILL_INPUT); - } + if (do_sensi_stg || do_sensi_stg1) { - /* If needed, did the user provide quadrature tolerances? */ - if ( (cv_mem->cv_itolQS == CV_EE) && (cv_mem->cv_itolQ == CV_NN) ) { - cvProcessError(cv_mem, CV_ILL_INPUT, "CVODES", "cvInitialSetup", - MSGCV_NO_TOLQ); - return(CV_ILL_INPUT); + ncf = nef = 0; /* reset counters for states */ + if (cv_mem->cv_quadr) nefQ = 0; /* reset counter for quadratures */ + + /* Evaluate f at converged y, needed for future evaluations of sens. RHS + * If f() fails recoverably, treat it as a convergence failure and + * attempt the step again */ + + retval = cv_mem->cv_f(cv_mem->cv_tn, cv_mem->cv_y, + cv_mem->cv_ftemp, cv_mem->cv_user_data); + cv_mem->cv_nfe++; + if (retval < 0) return(CV_RHSFUNC_FAIL); + if (retval > 0) { + nflag = PREV_CONV_FAIL; + continue; } - /* Load ewtQS */ - ier = cvQuadSensEwtSet(cv_mem, cv_mem->cv_znQS[0], cv_mem->cv_ewtQS); - if (ier != 0) { - cvProcessError(cv_mem, CV_ILL_INPUT, "CVODES", "cvInitialSetup", - MSGCV_BAD_EWTQS); - return(CV_ILL_INPUT); + if (do_sensi_stg) { + /* Nonlinear solve for sensitivities (all-at-once) */ + nflag = cvStgrNls(cv_mem); + kflag = cvHandleNFlag(cv_mem, &nflag, saved_t, &ncfS, + &(cv_mem->cv_ncfnS)); + } else { + /* Nonlinear solve for sensitivities (one-by-one) */ + for (is=0; iscv_Ns; is++) { + cv_mem->sens_solve_idx = is; + nflag = cvStgr1Nls(cv_mem, is); + kflag = cvHandleNFlag(cv_mem, &nflag, saved_t, + &(cv_mem->cv_ncfS1[is]), + &(cv_mem->cv_ncfnS1[is])); + if (kflag != DO_ERROR_TEST) break; + } } - } + if (kflag == PREDICT_AGAIN) continue; + if (kflag != DO_ERROR_TEST) return(kflag); - } else { + /* Error test on sensitivities */ + if (cv_mem->cv_errconS) { - cv_mem->cv_errconQS = SUNFALSE; + if (!cv_mem->cv_acnrmScur) + cv_mem->cv_acnrmS = cvSensNorm(cv_mem, cv_mem->cv_acorS, cv_mem->cv_ewtS); - } + eflag = cvDoErrorTest(cv_mem, &nflag, saved_t, cv_mem->cv_acnrmS, + &nefS, &(cv_mem->cv_netfS), &dsmS); - /* Call linit function (if it exists) */ - if (cv_mem->cv_linit != NULL) { - ier = cv_mem->cv_linit(cv_mem); - if (ier != 0) { - cvProcessError(cv_mem, CV_LINIT_FAIL, "CVODES", "cvInitialSetup", - MSGCV_LINIT_FAIL); - return(CV_LINIT_FAIL); - } - } + if (eflag == TRY_AGAIN) continue; + if (eflag != CV_SUCCESS) return(eflag); - /* Initialize the nonlinear solver (must occur after linear solver is - initialized) so that lsetup and lsolve pointer have been set */ + /* Set dsm = max(dsm, dsmS) to be used in cvPrepareNextStep */ + if (dsmS > dsm) dsm = dsmS; - /* always initialize the ODE NLS in case the user disables sensitivities */ - ier = cvNlsInit(cv_mem); - if (ier != 0) { - cvProcessError(cv_mem, CV_NLS_INIT_FAIL, "CVODES", - "cvInitialSetup", MSGCV_NLS_INIT_FAIL); - return(CV_NLS_INIT_FAIL); - } + } - if (cv_mem->NLSsim != NULL) { - ier = cvNlsInitSensSim(cv_mem); - if (ier != 0) { - cvProcessError(cv_mem, CV_NLS_INIT_FAIL, "CVODES", - "cvInitialSetup", MSGCV_NLS_INIT_FAIL); - return(CV_NLS_INIT_FAIL); } - } - if (cv_mem->NLSstg != NULL) { - ier = cvNlsInitSensStg(cv_mem); - if (ier != 0) { - cvProcessError(cv_mem, CV_NLS_INIT_FAIL, "CVODES", - "cvInitialSetup", MSGCV_NLS_INIT_FAIL); - return(CV_NLS_INIT_FAIL); - } - } + /* ------ Correct the quadrature sensitivity variables ------ */ - if (cv_mem->NLSstg1 != NULL) { - ier = cvNlsInitSensStg1(cv_mem); - if (ier != 0) { - cvProcessError(cv_mem, CV_NLS_INIT_FAIL, "CVODES", - "cvInitialSetup", MSGCV_NLS_INIT_FAIL); - return(CV_NLS_INIT_FAIL); - } - } + if (cv_mem->cv_quadr_sensi) { - return(CV_SUCCESS); -} + /* Reset local convergence and error test failure counters */ + ncf = nef = 0; + if (cv_mem->cv_quadr) nefQ = 0; + if (do_sensi_stg) ncfS = nefS = 0; + if (do_sensi_stg1) { + for (is=0; iscv_Ns; is++) + cv_mem->cv_ncfS1[is] = 0; + nefS = 0; + } -/* - * cvEwtSet - * - * This routine is responsible for setting the error weight vector ewt, - * according to tol_type, as follows: - * - * (1) ewt[i] = 1 / (reltol * SUNRabs(ycur[i]) + abstol), i=0,...,neq-1 - * if tol_type = CV_SS - * (2) ewt[i] = 1 / (reltol * SUNRabs(ycur[i]) + abstol[i]), i=0,...,neq-1 - * if tol_type = CV_SV - * - * cvEwtSet returns 0 if ewt is successfully set as above to a - * positive vector and -1 otherwise. In the latter case, ewt is - * considered undefined. - * - * All the real work is done in the routines cvEwtSetSS, cvEwtSetSV. - */ + /* Note that ftempQ contains yQdot evaluated at the converged y + * (stored in cvQuadNls) and can be used in evaluating fQS */ -int cvEwtSet(N_Vector ycur, N_Vector weight, void *data) -{ - CVodeMem cv_mem; - int flag = 0; + nflag = cvQuadSensNls(cv_mem); + kflag = cvHandleNFlag(cv_mem, &nflag, saved_t, &ncf, &(cv_mem->cv_ncfn)); - /* data points to cv_mem here */ + if (kflag == PREDICT_AGAIN) continue; + if (kflag != DO_ERROR_TEST) return(kflag); - cv_mem = (CVodeMem) data; + /* Error test on quadrature sensitivities */ + if (cv_mem->cv_errconQS) { + cv_mem->cv_acnrmQS = cvQuadSensNorm(cv_mem, cv_mem->cv_acorQS, + cv_mem->cv_ewtQS); + eflag = cvDoErrorTest(cv_mem, &nflag, saved_t, cv_mem->cv_acnrmQS, + &nefQS, &(cv_mem->cv_netfQS), &dsmQS); - switch(cv_mem->cv_itol) { - case CV_SS: - flag = cvEwtSetSS(cv_mem, ycur, weight); - break; - case CV_SV: - flag = cvEwtSetSV(cv_mem, ycur, weight); + if (eflag == TRY_AGAIN) continue; + if (eflag != CV_SUCCESS) return(eflag); + + /* Set dsm = max(dsm, dsmQS) to be used in cvPrepareNextStep */ + if (dsmQS > dsm) dsm = dsmQS; + } + + } + + /* Error test passed (eflag=CV_SUCCESS), break from loop */ break; + } - return(flag); -} + /* Nonlinear system solve and error test were both successful. + Update data, and consider change of step and/or order. */ -/* - * cvEwtSetSS - * - * This routine sets ewt as decribed above in the case tol_type = CV_SS. - * If the absolute tolerance is zero, it tests for non-positive components - * before inverting. cvEwtSetSS returns 0 if ewt is successfully set to a - * positive vector and -1 otherwise. In the latter case, ewt is considered - * undefined. - */ + cvCompleteStep(cv_mem); -static int cvEwtSetSS(CVodeMem cv_mem, N_Vector ycur, N_Vector weight) -{ - N_VAbs(ycur, cv_mem->cv_tempv); - N_VScale(cv_mem->cv_reltol, cv_mem->cv_tempv, cv_mem->cv_tempv); - N_VAddConst(cv_mem->cv_tempv, cv_mem->cv_Sabstol, cv_mem->cv_tempv); - if (cv_mem->cv_atolmin0) { - if (N_VMin(cv_mem->cv_tempv) <= ZERO) return(-1); - } - N_VInv(cv_mem->cv_tempv, weight); + cvPrepareNextStep(cv_mem, dsm); - return(0); -} + /* If Stablilty Limit Detection is turned on, call stability limit + detection routine for possible order reduction. */ -/* - * cvEwtSetSV - * - * This routine sets ewt as decribed above in the case tol_type = CV_SV. - * If any absolute tolerance is zero, it tests for non-positive components - * before inverting. cvEwtSetSV returns 0 if ewt is successfully set to a - * positive vector and -1 otherwise. In the latter case, ewt is considered - * undefined. - */ + if (cv_mem->cv_sldeton) cvBDFStab(cv_mem); -static int cvEwtSetSV(CVodeMem cv_mem, N_Vector ycur, N_Vector weight) -{ - N_VAbs(ycur, cv_mem->cv_tempv); - N_VLinearSum(cv_mem->cv_reltol, cv_mem->cv_tempv, ONE, - cv_mem->cv_Vabstol, cv_mem->cv_tempv); - if (cv_mem->cv_atolmin0) { - if (N_VMin(cv_mem->cv_tempv) <= ZERO) return(-1); - } - N_VInv(cv_mem->cv_tempv, weight); - return(0); -} + cv_mem->cv_etamax = (cv_mem->cv_nst <= cv_mem->cv_small_nst) ? + cv_mem->cv_eta_max_es : cv_mem->cv_eta_max_gs; -/* - * cvQuadEwtSet - * - */ + /* Finally, we rescale the acor array to be the + estimated local error vector. */ -static int cvQuadEwtSet(CVodeMem cv_mem, N_Vector qcur, N_Vector weightQ) -{ - int flag=0; + N_VScale(cv_mem->cv_tq[2], cv_mem->cv_acor, cv_mem->cv_acor); - switch (cv_mem->cv_itolQ) { - case CV_SS: - flag = cvQuadEwtSetSS(cv_mem, qcur, weightQ); - break; - case CV_SV: - flag = cvQuadEwtSetSV(cv_mem, qcur, weightQ); - break; - } + if (cv_mem->cv_quadr) + N_VScale(cv_mem->cv_tq[2], cv_mem->cv_acorQ, cv_mem->cv_acorQ); - return(flag); + if (cv_mem->cv_sensi) { + for (is=0; iscv_Ns; is++) + cv_mem->cv_cvals[is] = cv_mem->cv_tq[2]; -} + retval = N_VScaleVectorArray(cv_mem->cv_Ns, cv_mem->cv_cvals, + cv_mem->cv_acorS, cv_mem->cv_acorS); + if (retval != CV_SUCCESS) return (CV_VECTOROP_ERR); + } -/* - * cvQuadEwtSetSS - * - */ + if (cv_mem->cv_quadr_sensi) { + for (is=0; iscv_Ns; is++) + cv_mem->cv_cvals[is] = cv_mem->cv_tq[2]; -static int cvQuadEwtSetSS(CVodeMem cv_mem, N_Vector qcur, N_Vector weightQ) -{ - N_VAbs(qcur, cv_mem->cv_tempvQ); - N_VScale(cv_mem->cv_reltolQ, cv_mem->cv_tempvQ, cv_mem->cv_tempvQ); - N_VAddConst(cv_mem->cv_tempvQ, cv_mem->cv_SabstolQ, cv_mem->cv_tempvQ); - if (cv_mem->cv_atolQmin0) { - if (N_VMin(cv_mem->cv_tempvQ) <= ZERO) return(-1); + retval = N_VScaleVectorArray(cv_mem->cv_Ns, cv_mem->cv_cvals, + cv_mem->cv_acorQS, cv_mem->cv_acorQS); + if (retval != CV_SUCCESS) return (CV_VECTOROP_ERR); } - N_VInv(cv_mem->cv_tempvQ, weightQ); - return(0); + return(CV_SUCCESS); } /* - * cvQuadEwtSetSV + * ----------------------------------------------------------------- + * Function called at beginning of step + * ----------------------------------------------------------------- + */ + +/* + * cvAdjustParams * + * This routine is called when a change in step size was decided upon, + * and it handles the required adjustments to the history array zn. + * If there is to be a change in order, we call cvAdjustOrder and reset + * q, L = q+1, and qwait. Then in any case, we call cvRescale, which + * resets h and rescales the Nordsieck array. */ -static int cvQuadEwtSetSV(CVodeMem cv_mem, N_Vector qcur, N_Vector weightQ) +static void cvAdjustParams(CVodeMem cv_mem) { - N_VAbs(qcur, cv_mem->cv_tempvQ); - N_VLinearSum(cv_mem->cv_reltolQ, cv_mem->cv_tempvQ, ONE, - cv_mem->cv_VabstolQ, cv_mem->cv_tempvQ); - if (cv_mem->cv_atolQmin0) { - if (N_VMin(cv_mem->cv_tempvQ) <= ZERO) return(-1); + if (cv_mem->cv_qprime != cv_mem->cv_q) { + cvAdjustOrder(cv_mem, cv_mem->cv_qprime-cv_mem->cv_q); + cv_mem->cv_q = cv_mem->cv_qprime; + cv_mem->cv_L = cv_mem->cv_q+1; + cv_mem->cv_qwait = cv_mem->cv_L; } - N_VInv(cv_mem->cv_tempvQ, weightQ); - - return(0); + cvRescale(cv_mem); } /* - * cvSensEwtSet + * cvAdjustOrder * + * This routine is a high level routine which handles an order + * change by an amount deltaq (= +1 or -1). If a decrease in order + * is requested and q==2, then the routine returns immediately. + * Otherwise cvAdjustAdams or cvAdjustBDF is called to handle the + * order change (depending on the value of lmm). */ -static int cvSensEwtSet(CVodeMem cv_mem, N_Vector *yScur, N_Vector *weightS) +static void cvAdjustOrder(CVodeMem cv_mem, int deltaq) { - int flag=0; + if ((cv_mem->cv_q==2) && (deltaq != 1)) return; - switch (cv_mem->cv_itolS) { - case CV_EE: - flag = cvSensEwtSetEE(cv_mem, yScur, weightS); - break; - case CV_SS: - flag = cvSensEwtSetSS(cv_mem, yScur, weightS); + switch(cv_mem->cv_lmm){ + case CV_ADAMS: + cvAdjustAdams(cv_mem, deltaq); break; - case CV_SV: - flag = cvSensEwtSetSV(cv_mem, yScur, weightS); + case CV_BDF: + cvAdjustBDF(cv_mem, deltaq); break; } - - return(flag); } /* - * cvSensEwtSetEE - * - * In this case, the error weight vector for the i-th sensitivity is set to - * - * ewtS_i = pbar_i * efun(pbar_i*yS_i) - * - * In other words, the scaled sensitivity pbar_i * yS_i has the same error - * weight vector calculation as the solution vector. + * cvAdjustAdams * + * This routine adjusts the history array on a change of order q by + * deltaq, in the case that lmm == CV_ADAMS. */ -static int cvSensEwtSetEE(CVodeMem cv_mem, N_Vector *yScur, N_Vector *weightS) +static void cvAdjustAdams(CVodeMem cv_mem, int deltaq) { - int is; - N_Vector pyS; - int flag; - - /* Use tempvS[0] as temporary storage for the scaled sensitivity */ - pyS = cv_mem->cv_tempvS[0]; + int i, j; + realtype xi, hsum; - for (is=0; iscv_Ns; is++) { - N_VScale(cv_mem->cv_pbar[is], yScur[is], pyS); - flag = cv_mem->cv_efun(pyS, weightS[is], cv_mem->cv_e_data); - if (flag != 0) return(-1); - N_VScale(cv_mem->cv_pbar[is], weightS[is], weightS[is]); - } + /* On an order increase, set new column of zn to zero and return */ - return(0); -} - -/* - * cvSensEwtSetSS - * - */ + if (deltaq==1) { + N_VConst(ZERO, cv_mem->cv_zn[cv_mem->cv_L]); + if (cv_mem->cv_quadr) + N_VConst(ZERO, cv_mem->cv_znQ[cv_mem->cv_L]); + if (cv_mem->cv_sensi) + (void) N_VConstVectorArray(cv_mem->cv_Ns, ZERO, + cv_mem->cv_znS[cv_mem->cv_L]); + return; + } -static int cvSensEwtSetSS(CVodeMem cv_mem, N_Vector *yScur, N_Vector *weightS) -{ - int is; + /* + * On an order decrease, each zn[j] is adjusted by a multiple of zn[q]. + * The coeffs. in the adjustment are the coeffs. of the polynomial: + * x + * q * INT { u * ( u + xi_1 ) * ... * ( u + xi_{q-2} ) } du + * 0 + * where xi_j = [t_n - t_(n-j)]/h => xi_0 = 0 + */ - for (is=0; iscv_Ns; is++) { - N_VAbs(yScur[is], cv_mem->cv_tempv); - N_VScale(cv_mem->cv_reltolS, cv_mem->cv_tempv, cv_mem->cv_tempv); - N_VAddConst(cv_mem->cv_tempv, cv_mem->cv_SabstolS[is], cv_mem->cv_tempv); - if (cv_mem->cv_atolSmin0[is]) { - if (N_VMin(cv_mem->cv_tempv) <= ZERO) return(-1); - } - N_VInv(cv_mem->cv_tempv, weightS[is]); + for (i=0; i <= cv_mem->cv_qmax; i++) cv_mem->cv_l[i] = ZERO; + cv_mem->cv_l[1] = ONE; + hsum = ZERO; + for (j=1; j <= cv_mem->cv_q-2; j++) { + hsum += cv_mem->cv_tau[j]; + xi = hsum / cv_mem->cv_hscale; + for (i=j+1; i >= 1; i--) + cv_mem->cv_l[i] = cv_mem->cv_l[i]*xi + cv_mem->cv_l[i-1]; } - return(0); -} -/* - * cvSensEwtSetSV - * - */ + for (j=1; j <= cv_mem->cv_q-2; j++) + cv_mem->cv_l[j+1] = cv_mem->cv_q * (cv_mem->cv_l[j] / (j+1)); -static int cvSensEwtSetSV(CVodeMem cv_mem, N_Vector *yScur, N_Vector *weightS) -{ - int is; + if (cv_mem->cv_q > 2) { - for (is=0; iscv_Ns; is++) { - N_VAbs(yScur[is], cv_mem->cv_tempv); - N_VLinearSum(cv_mem->cv_reltolS, cv_mem->cv_tempv, ONE, - cv_mem->cv_VabstolS[is], cv_mem->cv_tempv); - if (cv_mem->cv_atolSmin0[is]) { - if (N_VMin(cv_mem->cv_tempv) <= ZERO) return(-1); - } - N_VInv(cv_mem->cv_tempv, weightS[is]); + for (j=2; j < cv_mem->cv_q; j++) + cv_mem->cv_cvals[j-2] = -cv_mem->cv_l[j]; + + (void) N_VScaleAddMulti(cv_mem->cv_q-2, cv_mem->cv_cvals, + cv_mem->cv_zn[cv_mem->cv_q], + cv_mem->cv_zn+2, cv_mem->cv_zn+2); + + if (cv_mem->cv_quadr) + (void) N_VScaleAddMulti(cv_mem->cv_q-2, cv_mem->cv_cvals, + cv_mem->cv_znQ[cv_mem->cv_q], + cv_mem->cv_znQ+2, cv_mem->cv_znQ+2); + + if (cv_mem->cv_sensi) + (void) N_VScaleAddMultiVectorArray(cv_mem->cv_Ns, cv_mem->cv_q-2, + cv_mem->cv_cvals, + cv_mem->cv_znS[cv_mem->cv_q], + cv_mem->cv_znS+2, + cv_mem->cv_znS+2); } - return(0); } /* - * cvQuadSensEwtSet + * cvAdjustBDF * + * This is a high level routine which handles adjustments to the + * history array on a change of order by deltaq in the case that + * lmm == CV_BDF. cvAdjustBDF calls cvIncreaseBDF if deltaq = +1 and + * cvDecreaseBDF if deltaq = -1 to do the actual work. */ -static int cvQuadSensEwtSet(CVodeMem cv_mem, N_Vector *yQScur, N_Vector *weightQS) +static void cvAdjustBDF(CVodeMem cv_mem, int deltaq) { - int flag=0; - - switch (cv_mem->cv_itolQS) { - case CV_EE: - flag = cvQuadSensEwtSetEE(cv_mem, yQScur, weightQS); - break; - case CV_SS: - flag = cvQuadSensEwtSetSS(cv_mem, yQScur, weightQS); - break; - case CV_SV: - flag = cvQuadSensEwtSetSV(cv_mem, yQScur, weightQS); - break; + switch(deltaq) { + case 1: + cvIncreaseBDF(cv_mem); + return; + case -1: + cvDecreaseBDF(cv_mem); + return; } - - return(flag); } /* - * cvQuadSensEwtSetEE - * - * In this case, the error weight vector for the i-th quadrature sensitivity - * is set to - * - * ewtQS_i = pbar_i * cvQuadEwtSet(pbar_i*yQS_i) - * - * In other words, the scaled sensitivity pbar_i * yQS_i has the same error - * weight vector calculation as the quadrature vector. + * cvIncreaseBDF * + * This routine adjusts the history array on an increase in the + * order q in the case that lmm == CV_BDF. + * A new column zn[q+1] is set equal to a multiple of the saved + * vector (= acor) in zn[indx_acor]. Then each zn[j] is adjusted by + * a multiple of zn[q+1]. The coefficients in the adjustment are the + * coefficients of the polynomial x*x*(x+xi_1)*...*(x+xi_j), + * where xi_j = [t_n - t_(n-j)]/h. */ -static int cvQuadSensEwtSetEE(CVodeMem cv_mem, N_Vector *yQScur, N_Vector *weightQS) -{ - int is; - N_Vector pyS; - int flag; - - /* Use tempvQS[0] as temporary storage for the scaled sensitivity */ - pyS = cv_mem->cv_tempvQS[0]; - - for (is=0; iscv_Ns; is++) { - N_VScale(cv_mem->cv_pbar[is], yQScur[is], pyS); - flag = cvQuadEwtSet(cv_mem, pyS, weightQS[is]); - if (flag != 0) return(-1); - N_VScale(cv_mem->cv_pbar[is], weightQS[is], weightQS[is]); - } - - return(0); -} -static int cvQuadSensEwtSetSS(CVodeMem cv_mem, N_Vector *yQScur, N_Vector *weightQS) +static void cvIncreaseBDF(CVodeMem cv_mem) { + realtype alpha0, alpha1, prod, xi, xiold, hsum, A1; + int i, j; int is; - for (is=0; iscv_Ns; is++) { - N_VAbs(yQScur[is], cv_mem->cv_tempvQ); - N_VScale(cv_mem->cv_reltolQS, cv_mem->cv_tempvQ, cv_mem->cv_tempvQ); - N_VAddConst(cv_mem->cv_tempvQ, cv_mem->cv_SabstolQS[is], cv_mem->cv_tempvQ); - if (cv_mem->cv_atolQSmin0[is]) { - if (N_VMin(cv_mem->cv_tempvQ) <= ZERO) return(-1); + for (i=0; i <= cv_mem->cv_qmax; i++) cv_mem->cv_l[i] = ZERO; + cv_mem->cv_l[2] = alpha1 = prod = xiold = ONE; + alpha0 = -ONE; + hsum = cv_mem->cv_hscale; + if (cv_mem->cv_q > 1) { + for (j=1; j < cv_mem->cv_q; j++) { + hsum += cv_mem->cv_tau[j+1]; + xi = hsum / cv_mem->cv_hscale; + prod *= xi; + alpha0 -= ONE / (j+1); + alpha1 += ONE / xi; + for (i=j+2; i >= 2; i--) + cv_mem->cv_l[i] = cv_mem->cv_l[i]*xiold + cv_mem->cv_l[i-1]; + xiold = xi; } - N_VInv(cv_mem->cv_tempvQ, weightQS[is]); } + A1 = (-alpha0 - alpha1) / prod; - return(0); -} + /* + zn[indx_acor] contains the value Delta_n = y_n - y_n(0) + This value was stored there at the previous successful + step (in cvCompleteStep) -static int cvQuadSensEwtSetSV(CVodeMem cv_mem, N_Vector *yQScur, N_Vector *weightQS) -{ - int is; + A1 contains dbar = (1/xi* - 1/xi_q)/prod(xi_j) + */ - for (is=0; iscv_Ns; is++) { - N_VAbs(yQScur[is], cv_mem->cv_tempvQ); - N_VLinearSum(cv_mem->cv_reltolQS, cv_mem->cv_tempvQ, ONE, - cv_mem->cv_VabstolQS[is], cv_mem->cv_tempvQ); - if (cv_mem->cv_atolQSmin0[is]) { - if (N_VMin(cv_mem->cv_tempvQ) <= ZERO) return(-1); - } - N_VInv(cv_mem->cv_tempvQ, weightQS[is]); - } + N_VScale(A1, cv_mem->cv_zn[cv_mem->cv_indx_acor], + cv_mem->cv_zn[cv_mem->cv_L]); - return(0); -} + /* for (j=2; j <= cv_mem->cv_q; j++) */ + if (cv_mem->cv_q > 1) + (void) N_VScaleAddMulti(cv_mem->cv_q-1, cv_mem->cv_l+2, + cv_mem->cv_zn[cv_mem->cv_L], + cv_mem->cv_zn+2, cv_mem->cv_zn+2); + if (cv_mem->cv_quadr) { + N_VScale(A1, cv_mem->cv_znQ[cv_mem->cv_indx_acor], + cv_mem->cv_znQ[cv_mem->cv_L]); + /* for (j=2; j <= cv_mem->cv_q; j++) */ + if (cv_mem->cv_q > 1) + (void) N_VScaleAddMulti(cv_mem->cv_q-1, cv_mem->cv_l+2, + cv_mem->cv_znQ[cv_mem->cv_L], + cv_mem->cv_znQ+2, cv_mem->cv_znQ+2); + } -/* - * ----------------------------------------------------------------- - * Main cvStep function - * ----------------------------------------------------------------- - */ + if (cv_mem->cv_sensi) { -/* - * cvStep - * - * This routine performs one internal cvode step, from tn to tn + h. - * It calls other routines to do all the work. - * - * The main operations done here are as follows: - * - preliminary adjustments if a new step size was chosen; - * - prediction of the Nordsieck history array zn at tn + h; - * - setting of multistep method coefficients and test quantities; - * - solution of the nonlinear system; - * - testing the local error; - * - updating zn and other state data if successful; - * - resetting stepsize and order for the next step. - * - if SLDET is on, check for stability, reduce order if necessary. - * On a failure in the nonlinear system solution or error test, the - * step may be reattempted, depending on the nature of the failure. - */ + for (is=0; iscv_Ns; is++) + cv_mem->cv_cvals[is] = A1; -static int cvStep(CVodeMem cv_mem) -{ - realtype saved_t, dsm, dsmQ, dsmS, dsmQS; - booleantype do_sensi_stg, do_sensi_stg1; - int ncf, ncfS; - int nef, nefQ, nefS, nefQS; - int nflag, kflag, eflag; - int retval, is; + (void) N_VScaleVectorArray(cv_mem->cv_Ns, cv_mem->cv_cvals, + cv_mem->cv_znS[cv_mem->cv_indx_acor], + cv_mem->cv_znS[cv_mem->cv_L]); - /* Are we computing sensitivities with a staggered approach? */ + /* for (j=2; j <= cv_mem->cv_q; j++) */ + if (cv_mem->cv_q > 1) + (void) N_VScaleAddMultiVectorArray(cv_mem->cv_Ns, cv_mem->cv_q-1, + cv_mem->cv_l+2, + cv_mem->cv_znS[cv_mem->cv_L], + cv_mem->cv_znS+2, + cv_mem->cv_znS+2); + } - do_sensi_stg = (cv_mem->cv_sensi && (cv_mem->cv_ism==CV_STAGGERED)); - do_sensi_stg1 = (cv_mem->cv_sensi && (cv_mem->cv_ism==CV_STAGGERED1)); + if (cv_mem->cv_quadr_sensi) { - /* Initialize local counters for convergence and error test failures */ + for (is=0; iscv_Ns; is++) + cv_mem->cv_cvals[is] = A1; - ncf = nef = 0; - nefQ = nefQS = 0; - ncfS = nefS = 0; - if (do_sensi_stg1) { - for (is=0; iscv_Ns; is++) - cv_mem->cv_ncfS1[is] = 0; + (void) N_VScaleVectorArray(cv_mem->cv_Ns, cv_mem->cv_cvals, + cv_mem->cv_znQS[cv_mem->cv_indx_acor], + cv_mem->cv_znQS[cv_mem->cv_L]); + + /* for (j=2; j <= cv_mem->cv_q; j++) */ + if (cv_mem->cv_q > 1) + (void) N_VScaleAddMultiVectorArray(cv_mem->cv_Ns, cv_mem->cv_q-1, + cv_mem->cv_l+2, + cv_mem->cv_znQS[cv_mem->cv_L], + cv_mem->cv_znQS+2, + cv_mem->cv_znQS+2); } - /* If needed, adjust method parameters */ +} - if ((cv_mem->cv_nst > 0) && (cv_mem->cv_hprime != cv_mem->cv_h)) - cvAdjustParams(cv_mem); +/* + * cvDecreaseBDF + * + * This routine adjusts the history array on a decrease in the + * order q in the case that lmm == CV_BDF. + * Each zn[j] is adjusted by a multiple of zn[q]. The coefficients + * in the adjustment are the coefficients of the polynomial + * x*x*(x+xi_1)*...*(x+xi_j), where xi_j = [t_n - t_(n-j)]/h. + */ - /* Looping point for attempts to take a step */ +static void cvDecreaseBDF(CVodeMem cv_mem) +{ + realtype hsum, xi; + int i, j; - saved_t = cv_mem->cv_tn; - nflag = FIRST_CALL; + for (i=0; i <= cv_mem->cv_qmax; i++) cv_mem->cv_l[i] = ZERO; + cv_mem->cv_l[2] = ONE; + hsum = ZERO; + for (j=1; j <= cv_mem->cv_q-2; j++) { + hsum += cv_mem->cv_tau[j]; + xi = hsum / cv_mem->cv_hscale; + for (i=j+2; i >= 2; i--) + cv_mem->cv_l[i] = cv_mem->cv_l[i]*xi + cv_mem->cv_l[i-1]; + } - for(;;) { + if (cv_mem->cv_q > 2) { - cvPredict(cv_mem); - cvSet(cv_mem); + for (j=2; j < cv_mem->cv_q; j++) + cv_mem->cv_cvals[j-2] = -cv_mem->cv_l[j]; - /* ------ Correct state variables ------ */ + (void) N_VScaleAddMulti(cv_mem->cv_q-2, cv_mem->cv_cvals, + cv_mem->cv_zn[cv_mem->cv_q], + cv_mem->cv_zn+2, cv_mem->cv_zn+2); - nflag = cvNls(cv_mem, nflag); - kflag = cvHandleNFlag(cv_mem, &nflag, saved_t, &ncf, &(cv_mem->cv_ncfn)); + if (cv_mem->cv_quadr) + (void) N_VScaleAddMulti(cv_mem->cv_q-2, cv_mem->cv_cvals, + cv_mem->cv_znQ[cv_mem->cv_q], + cv_mem->cv_znQ+2, cv_mem->cv_znQ+2); - /* Go back in loop if we need to predict again (nflag=PREV_CONV_FAIL) */ - if (kflag == PREDICT_AGAIN) continue; + if (cv_mem->cv_sensi) + (void) N_VScaleAddMultiVectorArray(cv_mem->cv_Ns, cv_mem->cv_q-2, + cv_mem->cv_cvals, + cv_mem->cv_znS[cv_mem->cv_q], + cv_mem->cv_znS+2, + cv_mem->cv_znS+2); - /* Return if nonlinear solve failed and recovery not possible. */ - if (kflag != DO_ERROR_TEST) return(kflag); + if (cv_mem->cv_quadr_sensi) + (void) N_VScaleAddMultiVectorArray(cv_mem->cv_Ns, cv_mem->cv_q-2, + cv_mem->cv_cvals, + cv_mem->cv_znQS[cv_mem->cv_q], + cv_mem->cv_znQS+2, + cv_mem->cv_znQS+2); + } - /* Perform error test (nflag=CV_SUCCESS) */ - eflag = cvDoErrorTest(cv_mem, &nflag, saved_t, cv_mem->cv_acnrm, - &nef, &(cv_mem->cv_netf), &dsm); +} - /* Go back in loop if we need to predict again (nflag=PREV_ERR_FAIL) */ - if (eflag == TRY_AGAIN) continue; +/* + * cvRescale + * + * This routine rescales the Nordsieck array by multiplying the + * jth column zn[j] by eta^j, j = 1, ..., q. Then the value of + * h is rescaled by eta, and hscale is reset to h. + */ - /* Return if error test failed and recovery not possible. */ - if (eflag != CV_SUCCESS) return(eflag); +void cvRescale(CVodeMem cv_mem) +{ + int j; + int is; - /* Error test passed (eflag=CV_SUCCESS, nflag=CV_SUCCESS), go on */ + /* compute scaling factors */ + cv_mem->cv_cvals[0] = cv_mem->cv_eta; + for (j=1; j <= cv_mem->cv_q; j++) + cv_mem->cv_cvals[j] = cv_mem->cv_eta * cv_mem->cv_cvals[j-1]; - /* ------ Correct the quadrature variables ------ */ + (void) N_VScaleVectorArray(cv_mem->cv_q, cv_mem->cv_cvals, + cv_mem->cv_zn+1, cv_mem->cv_zn+1); - if (cv_mem->cv_quadr) { + if (cv_mem->cv_quadr) + (void) N_VScaleVectorArray(cv_mem->cv_q, cv_mem->cv_cvals, + cv_mem->cv_znQ+1, cv_mem->cv_znQ+1); - ncf = nef = 0; /* reset counters for states */ + /* compute sensi scaling factors */ + if (cv_mem->cv_sensi || cv_mem->cv_quadr_sensi) { + for (is=0; iscv_Ns; is++) + cv_mem->cv_cvals[is] = cv_mem->cv_eta; + for (j=1; j <= cv_mem->cv_q; j++) + for (is=0; iscv_Ns; is++) + cv_mem->cv_cvals[j*cv_mem->cv_Ns+is] = + cv_mem->cv_eta * cv_mem->cv_cvals[(j-1)*cv_mem->cv_Ns+is]; + } - nflag = cvQuadNls(cv_mem); - kflag = cvHandleNFlag(cv_mem, &nflag, saved_t, &ncf, &(cv_mem->cv_ncfn)); + if (cv_mem->cv_sensi) { + for (j=1; j <= cv_mem->cv_q; j++) + for (is=0; iscv_Ns; is++) + cv_mem->cv_Xvecs[(j-1)*cv_mem->cv_Ns+is] = cv_mem->cv_znS[j][is]; - if (kflag == PREDICT_AGAIN) continue; - if (kflag != DO_ERROR_TEST) return(kflag); + (void) N_VScaleVectorArray(cv_mem->cv_q*cv_mem->cv_Ns, cv_mem->cv_cvals, + cv_mem->cv_Xvecs, cv_mem->cv_Xvecs); + } - /* Error test on quadratures */ - if (cv_mem->cv_errconQ) { - cv_mem->cv_acnrmQ = N_VWrmsNorm(cv_mem->cv_acorQ, cv_mem->cv_ewtQ); - eflag = cvDoErrorTest(cv_mem, &nflag, saved_t, cv_mem->cv_acnrmQ, - &nefQ, &(cv_mem->cv_netfQ), &dsmQ); + if (cv_mem->cv_quadr_sensi) { + for (j=1; j <= cv_mem->cv_q; j++) + for (is=0; iscv_Ns; is++) + cv_mem->cv_Xvecs[(j-1)*cv_mem->cv_Ns+is] = cv_mem->cv_znQS[j][is]; - if (eflag == TRY_AGAIN) continue; - if (eflag != CV_SUCCESS) return(eflag); + (void) N_VScaleVectorArray(cv_mem->cv_q*cv_mem->cv_Ns, cv_mem->cv_cvals, + cv_mem->cv_Xvecs, cv_mem->cv_Xvecs); + } - /* Set dsm = max(dsm, dsmQ) to be used in cvPrepareNextStep */ - if (dsmQ > dsm) dsm = dsmQ; - } + cv_mem->cv_h = cv_mem->cv_hscale * cv_mem->cv_eta; + cv_mem->cv_next_h = cv_mem->cv_h; + cv_mem->cv_hscale = cv_mem->cv_h; + cv_mem->cv_nscon = 0; +} - } +/* + * cvPredict + * + * This routine advances tn by the tentative step size h, and computes + * the predicted array z_n(0), which is overwritten on zn. The + * prediction of zn is done by repeated additions. + * If tstop is enabled, it is possible for tn + h to be past tstop by roundoff, + * and in that case, we reset tn (after incrementing by h) to tstop. + */ - /* ------ Correct the sensitivity variables (STAGGERED or STAGGERED1) ------- */ +static void cvPredict(CVodeMem cv_mem) +{ +#ifdef SUNDIALS_LOGGING_EXTRA_DEBUG + int i; +#endif + int j, k; - if (do_sensi_stg || do_sensi_stg1) { + cv_mem->cv_tn += cv_mem->cv_h; + if (cv_mem->cv_tstopset) { + if ((cv_mem->cv_tn - cv_mem->cv_tstop)*cv_mem->cv_h > ZERO) + cv_mem->cv_tn = cv_mem->cv_tstop; + } - ncf = nef = 0; /* reset counters for states */ - if (cv_mem->cv_quadr) nefQ = 0; /* reset counter for quadratures */ + for (k = 1; k <= cv_mem->cv_q; k++) + for (j = cv_mem->cv_q; j >= k; j--) + N_VLinearSum(ONE, cv_mem->cv_zn[j-1], ONE, + cv_mem->cv_zn[j], cv_mem->cv_zn[j-1]); - /* Evaluate f at converged y, needed for future evaluations of sens. RHS - * If f() fails recoverably, treat it as a convergence failure and - * attempt the step again */ +#ifdef SUNDIALS_LOGGING_EXTRA_DEBUG + SUNLogger_QueueMsg(CV_LOGGER, SUN_LOGLEVEL_DEBUG, + "CVODES::cvPredict", "forward", "predictor =", ""); + N_VPrintFile(cv_mem->cv_zn[0], CV_LOGGER->debug_fp); +#endif - retval = cv_mem->cv_f(cv_mem->cv_tn, cv_mem->cv_y, - cv_mem->cv_ftemp, cv_mem->cv_user_data); - cv_mem->cv_nfe++; - if (retval < 0) return(CV_RHSFUNC_FAIL); - if (retval > 0) { - nflag = PREV_CONV_FAIL; - continue; - } + if (cv_mem->cv_quadr) { + for (k = 1; k <= cv_mem->cv_q; k++) + for (j = cv_mem->cv_q; j >= k; j--) + N_VLinearSum(ONE, cv_mem->cv_znQ[j-1], ONE, + cv_mem->cv_znQ[j], cv_mem->cv_znQ[j-1]); - if (do_sensi_stg) { - /* Nonlinear solve for sensitivities (all-at-once) */ - nflag = cvStgrNls(cv_mem); - kflag = cvHandleNFlag(cv_mem, &nflag, saved_t, &ncfS, - &(cv_mem->cv_ncfnS)); - } else { - /* Nonlinear solve for sensitivities (one-by-one) */ - for (is=0; iscv_Ns; is++) { - cv_mem->sens_solve_idx = is; - nflag = cvStgr1Nls(cv_mem, is); - kflag = cvHandleNFlag(cv_mem, &nflag, saved_t, - &(cv_mem->cv_ncfS1[is]), - &(cv_mem->cv_ncfnS1[is])); - if (kflag != DO_ERROR_TEST) break; +#ifdef SUNDIALS_LOGGING_EXTRA_DEBUG + SUNLogger_QueueMsg(CV_LOGGER, SUN_LOGLEVEL_DEBUG, + "CVODES::cvPredict", "quad", "predictor =", ""); + N_VPrintFile(cv_mem->cv_znQ[0], CV_LOGGER->debug_fp); +#endif + } + + if (cv_mem->cv_sensi) { + for (k = 1; k <= cv_mem->cv_q; k++) { + for (j = cv_mem->cv_q; j >= k; j--) { + (void) N_VLinearSumVectorArray(cv_mem->cv_Ns, + ONE, cv_mem->cv_znS[j-1], + ONE, cv_mem->cv_znS[j], + cv_mem->cv_znS[j-1]); +#ifdef SUNDIALS_LOGGING_EXTRA_DEBUG + for(i = 0; i < cv_mem->cv_Ns; i++) { + SUNLogger_QueueMsg(CV_LOGGER, SUN_LOGLEVEL_DEBUG, + "CVODES::cvPredict", "sensi", " i = %d, predictor_i = ", i); + N_VPrintFile(cv_mem->cv_znS[0][i], CV_LOGGER->debug_fp); } +#endif } + } + } - if (kflag == PREDICT_AGAIN) continue; - if (kflag != DO_ERROR_TEST) return(kflag); - - /* Error test on sensitivities */ - if (cv_mem->cv_errconS) { - - if (!cv_mem->cv_acnrmScur) - cv_mem->cv_acnrmS = cvSensNorm(cv_mem, cv_mem->cv_acorS, cv_mem->cv_ewtS); - - eflag = cvDoErrorTest(cv_mem, &nflag, saved_t, cv_mem->cv_acnrmS, - &nefS, &(cv_mem->cv_netfS), &dsmS); - - if (eflag == TRY_AGAIN) continue; - if (eflag != CV_SUCCESS) return(eflag); - - /* Set dsm = max(dsm, dsmS) to be used in cvPrepareNextStep */ - if (dsmS > dsm) dsm = dsmS; - + if (cv_mem->cv_quadr_sensi) { + for (k = 1; k <= cv_mem->cv_q; k++) { + for (j = cv_mem->cv_q; j >= k; j--) { + (void) N_VLinearSumVectorArray(cv_mem->cv_Ns, + ONE, cv_mem->cv_znQS[j-1], + ONE, cv_mem->cv_znQS[j], + cv_mem->cv_znQS[j-1]); +#ifdef SUNDIALS_LOGGING_EXTRA_DEBUG + for(i = 0; i < cv_mem->cv_Ns; i++) { + SUNLogger_QueueMsg(CV_LOGGER, SUN_LOGLEVEL_DEBUG, + "CVODES::cvPredict", "quad-sensi", " i = %d, predictor_i = ", i); + N_VPrintFile(cv_mem->cv_znQS[0][i], CV_LOGGER->debug_fp); + } +#endif } - - } - - /* ------ Correct the quadrature sensitivity variables ------ */ - - if (cv_mem->cv_quadr_sensi) { - - /* Reset local convergence and error test failure counters */ - ncf = nef = 0; - if (cv_mem->cv_quadr) nefQ = 0; - if (do_sensi_stg) ncfS = nefS = 0; - if (do_sensi_stg1) { - for (is=0; iscv_Ns; is++) - cv_mem->cv_ncfS1[is] = 0; - nefS = 0; - } - - /* Note that ftempQ contains yQdot evaluated at the converged y - * (stored in cvQuadNls) and can be used in evaluating fQS */ - - nflag = cvQuadSensNls(cv_mem); - kflag = cvHandleNFlag(cv_mem, &nflag, saved_t, &ncf, &(cv_mem->cv_ncfn)); - - if (kflag == PREDICT_AGAIN) continue; - if (kflag != DO_ERROR_TEST) return(kflag); - - /* Error test on quadrature sensitivities */ - if (cv_mem->cv_errconQS) { - cv_mem->cv_acnrmQS = cvQuadSensNorm(cv_mem, cv_mem->cv_acorQS, - cv_mem->cv_ewtQS); - eflag = cvDoErrorTest(cv_mem, &nflag, saved_t, cv_mem->cv_acnrmQS, - &nefQS, &(cv_mem->cv_netfQS), &dsmQS); - - if (eflag == TRY_AGAIN) continue; - if (eflag != CV_SUCCESS) return(eflag); - - /* Set dsm = max(dsm, dsmQS) to be used in cvPrepareNextStep */ - if (dsmQS > dsm) dsm = dsmQS; - } - - } + } +} +/* + * cvSet + * + * This routine is a high level routine which calls cvSetAdams or + * cvSetBDF to set the polynomial l, the test quantity array tq, + * and the related variables rl1, gamma, and gamrat. + * + * The array tq is loaded with constants used in the control of estimated + * local errors and in the nonlinear convergence test. Specifically, while + * running at order q, the components of tq are as follows: + * tq[1] = a coefficient used to get the est. local error at order q-1 + * tq[2] = a coefficient used to get the est. local error at order q + * tq[3] = a coefficient used to get the est. local error at order q+1 + * tq[4] = constant used in nonlinear iteration convergence test + * tq[5] = coefficient used to get the order q+2 derivative vector used in + * the est. local error at order q+1 + */ - /* Everything went fine; exit loop */ +static void cvSet(CVodeMem cv_mem) +{ + switch(cv_mem->cv_lmm) { + case CV_ADAMS: + cvSetAdams(cv_mem); + break; + case CV_BDF: + cvSetBDF(cv_mem); break; - } + cv_mem->cv_rl1 = ONE / cv_mem->cv_l[1]; + cv_mem->cv_gamma = cv_mem->cv_h * cv_mem->cv_rl1; + if (cv_mem->cv_nst == 0) cv_mem->cv_gammap = cv_mem->cv_gamma; + cv_mem->cv_gamrat = (cv_mem->cv_nst > 0) ? + cv_mem->cv_gamma / cv_mem->cv_gammap : ONE; /* protect x / x != 1.0 */ +} - /* Nonlinear system solve and error test were both successful. - Update data, and consider change of step and/or order. */ - - cvCompleteStep(cv_mem); - - cvPrepareNextStep(cv_mem, dsm); - - /* If Stablilty Limit Detection is turned on, call stability limit - detection routine for possible order reduction. */ - - if (cv_mem->cv_sldeton) cvBDFStab(cv_mem); - - cv_mem->cv_etamax = (cv_mem->cv_nst <= SMALL_NST) ? ETAMX2 : ETAMX3; - - /* Finally, we rescale the acor array to be the - estimated local error vector. */ - - N_VScale(cv_mem->cv_tq[2], cv_mem->cv_acor, cv_mem->cv_acor); - - if (cv_mem->cv_quadr) - N_VScale(cv_mem->cv_tq[2], cv_mem->cv_acorQ, cv_mem->cv_acorQ); +/* + * cvSetAdams + * + * This routine handles the computation of l and tq for the + * case lmm == CV_ADAMS. + * + * The components of the array l are the coefficients of a + * polynomial Lambda(x) = l_0 + l_1 x + ... + l_q x^q, given by + * q-1 + * (d/dx) Lambda(x) = c * PRODUCT (1 + x / xi_i) , where + * i=1 + * Lambda(-1) = 0, Lambda(0) = 1, and c is a normalization factor. + * Here xi_i = [t_n - t_(n-i)] / h. + * + * The array tq is set to test quantities used in the convergence + * test, the error test, and the selection of h at a new order. + */ - if (cv_mem->cv_sensi) { - for (is=0; iscv_Ns; is++) - cv_mem->cv_cvals[is] = cv_mem->cv_tq[2]; +static void cvSetAdams(CVodeMem cv_mem) +{ + realtype m[L_MAX], M[3], hsum; - retval = N_VScaleVectorArray(cv_mem->cv_Ns, cv_mem->cv_cvals, - cv_mem->cv_acorS, cv_mem->cv_acorS); - if (retval != CV_SUCCESS) return (CV_VECTOROP_ERR); + if (cv_mem->cv_q == 1) { + cv_mem->cv_l[0] = cv_mem->cv_l[1] = cv_mem->cv_tq[1] = cv_mem->cv_tq[5] = ONE; + cv_mem->cv_tq[2] = HALF; + cv_mem->cv_tq[3] = ONE/TWELVE; + cv_mem->cv_tq[4] = cv_mem->cv_nlscoef / cv_mem->cv_tq[2]; /* = 0.1 / tq[2] */ + return; } - if (cv_mem->cv_quadr_sensi) { - for (is=0; iscv_Ns; is++) - cv_mem->cv_cvals[is] = cv_mem->cv_tq[2]; - - retval = N_VScaleVectorArray(cv_mem->cv_Ns, cv_mem->cv_cvals, - cv_mem->cv_acorQS, cv_mem->cv_acorQS); - if (retval != CV_SUCCESS) return (CV_VECTOROP_ERR); - } + hsum = cvAdamsStart(cv_mem, m); - return(CV_SUCCESS); + M[0] = cvAltSum(cv_mem->cv_q-1, m, 1); + M[1] = cvAltSum(cv_mem->cv_q-1, m, 2); + cvAdamsFinish(cv_mem, m, M, hsum); } /* - * ----------------------------------------------------------------- - * Function called at beginning of step - * ----------------------------------------------------------------- + * cvAdamsStart + * + * This routine generates in m[] the coefficients of the product + * polynomial needed for the Adams l and tq coefficients for q > 1. */ +static realtype cvAdamsStart(CVodeMem cv_mem, realtype m[]) +{ + realtype hsum, xi_inv, sum; + int i, j; + + hsum = cv_mem->cv_h; + m[0] = ONE; + for (i=1; i <= cv_mem->cv_q; i++) m[i] = ZERO; + for (j=1; j < cv_mem->cv_q; j++) { + if ((j==cv_mem->cv_q-1) && (cv_mem->cv_qwait == 1)) { + sum = cvAltSum(cv_mem->cv_q-2, m, 2); + cv_mem->cv_tq[1] = cv_mem->cv_q * sum / m[cv_mem->cv_q-2]; + } + xi_inv = cv_mem->cv_h / hsum; + for (i=j; i >= 1; i--) m[i] += m[i-1] * xi_inv; + hsum += cv_mem->cv_tau[j]; + /* The m[i] are coefficients of product(1 to j) (1 + x/xi_i) */ + } + return(hsum); +} + /* - * cvAdjustParams + * cvAdamsFinish * - * This routine is called when a change in step size was decided upon, - * and it handles the required adjustments to the history array zn. - * If there is to be a change in order, we call cvAdjustOrder and reset - * q, L = q+1, and qwait. Then in any case, we call cvRescale, which - * resets h and rescales the Nordsieck array. + * This routine completes the calculation of the Adams l and tq. */ -static void cvAdjustParams(CVodeMem cv_mem) +static void cvAdamsFinish(CVodeMem cv_mem, realtype m[], realtype M[], realtype hsum) { - if (cv_mem->cv_qprime != cv_mem->cv_q) { - cvAdjustOrder(cv_mem, cv_mem->cv_qprime-cv_mem->cv_q); - cv_mem->cv_q = cv_mem->cv_qprime; - cv_mem->cv_L = cv_mem->cv_q+1; - cv_mem->cv_qwait = cv_mem->cv_L; + int i; + realtype M0_inv, xi, xi_inv; + + M0_inv = ONE / M[0]; + + cv_mem->cv_l[0] = ONE; + for (i=1; i <= cv_mem->cv_q; i++) + cv_mem->cv_l[i] = M0_inv * (m[i-1] / i); + xi = hsum / cv_mem->cv_h; + xi_inv = ONE / xi; + + cv_mem->cv_tq[2] = M[1] * M0_inv / xi; + cv_mem->cv_tq[5] = xi / cv_mem->cv_l[cv_mem->cv_q]; + + if (cv_mem->cv_qwait == 1) { + for (i=cv_mem->cv_q; i >= 1; i--) m[i] += m[i-1] * xi_inv; + M[2] = cvAltSum(cv_mem->cv_q, m, 2); + cv_mem->cv_tq[3] = M[2] * M0_inv / cv_mem->cv_L; } - cvRescale(cv_mem); + + cv_mem->cv_tq[4] = cv_mem->cv_nlscoef / cv_mem->cv_tq[2]; } /* - * cvAdjustOrder + * cvAltSum * - * This routine is a high level routine which handles an order - * change by an amount deltaq (= +1 or -1). If a decrease in order - * is requested and q==2, then the routine returns immediately. - * Otherwise cvAdjustAdams or cvAdjustBDF is called to handle the - * order change (depending on the value of lmm). + * cvAltSum returns the value of the alternating sum + * sum (i= 0 ... iend) [ (-1)^i * (a[i] / (i + k)) ]. + * If iend < 0 then cvAltSum returns 0. + * This operation is needed to compute the integral, from -1 to 0, + * of a polynomial x^(k-1) M(x) given the coefficients of M(x). */ -static void cvAdjustOrder(CVodeMem cv_mem, int deltaq) +static realtype cvAltSum(int iend, realtype a[], int k) { - if ((cv_mem->cv_q==2) && (deltaq != 1)) return; + int i, sign; + realtype sum; - switch(cv_mem->cv_lmm){ - case CV_ADAMS: - cvAdjustAdams(cv_mem, deltaq); - break; - case CV_BDF: - cvAdjustBDF(cv_mem, deltaq); - break; + if (iend < 0) return(ZERO); + + sum = ZERO; + sign = 1; + for (i=0; i <= iend; i++) { + sum += sign * (a[i] / (i+k)); + sign = -sign; } + return(sum); } /* - * cvAdjustAdams + * cvSetBDF * - * This routine adjusts the history array on a change of order q by - * deltaq, in the case that lmm == CV_ADAMS. + * This routine computes the coefficients l and tq in the case + * lmm == CV_BDF. cvSetBDF calls cvSetTqBDF to set the test + * quantity array tq. + * + * The components of the array l are the coefficients of a + * polynomial Lambda(x) = l_0 + l_1 x + ... + l_q x^q, given by + * q-1 + * Lambda(x) = (1 + x / xi*_q) * PRODUCT (1 + x / xi_i) , where + * i=1 + * + * The components of the array p (for projections) are the + * coefficients of a polynomial Phi(x) = p_0 + p_1 x + ... + p_q x^q, + * given by + * q + * Phi(x) = PRODUCT (1 + x / xi_i) + * i=1 + * + * Here xi_i = [t_n - t_(n-i)] / h. + * + * The array tq is set to test quantities used in the convergence + * test, the error test, and the selection of h at a new order. */ -static void cvAdjustAdams(CVodeMem cv_mem, int deltaq) +static void cvSetBDF(CVodeMem cv_mem) { - int i, j; - realtype xi, hsum; + realtype alpha0, alpha0_hat, xi_inv, xistar_inv, hsum; + int i,j; - /* On an order increase, set new column of zn to zero and return */ + cv_mem->cv_l[0] = cv_mem->cv_l[1] = xi_inv = xistar_inv = ONE; + for (i=2; i <= cv_mem->cv_q; i++) cv_mem->cv_l[i] = ZERO; + alpha0 = alpha0_hat = -ONE; + hsum = cv_mem->cv_h; - if (deltaq==1) { - N_VConst(ZERO, cv_mem->cv_zn[cv_mem->cv_L]); - if (cv_mem->cv_quadr) - N_VConst(ZERO, cv_mem->cv_znQ[cv_mem->cv_L]); - if (cv_mem->cv_sensi) - (void) N_VConstVectorArray(cv_mem->cv_Ns, ZERO, - cv_mem->cv_znS[cv_mem->cv_L]); - return; - } - - /* - * On an order decrease, each zn[j] is adjusted by a multiple of zn[q]. - * The coeffs. in the adjustment are the coeffs. of the polynomial: - * x - * q * INT { u * ( u + xi_1 ) * ... * ( u + xi_{q-2} ) } du - * 0 - * where xi_j = [t_n - t_(n-j)]/h => xi_0 = 0 - */ - - for (i=0; i <= cv_mem->cv_qmax; i++) cv_mem->cv_l[i] = ZERO; - cv_mem->cv_l[1] = ONE; - hsum = ZERO; - for (j=1; j <= cv_mem->cv_q-2; j++) { - hsum += cv_mem->cv_tau[j]; - xi = hsum / cv_mem->cv_hscale; - for (i=j+1; i >= 1; i--) - cv_mem->cv_l[i] = cv_mem->cv_l[i]*xi + cv_mem->cv_l[i-1]; - } - - for (j=1; j <= cv_mem->cv_q-2; j++) - cv_mem->cv_l[j+1] = cv_mem->cv_q * (cv_mem->cv_l[j] / (j+1)); + if (cv_mem->proj_enabled) + for (i=0; i <= cv_mem->cv_q; i++) + cv_mem->proj_p[i] = cv_mem->cv_l[i]; - if (cv_mem->cv_q > 2) { - - for (j=2; j < cv_mem->cv_q; j++) - cv_mem->cv_cvals[j-2] = -cv_mem->cv_l[j]; + if (cv_mem->cv_q > 1) { + for (j=2; j < cv_mem->cv_q; j++) { + hsum += cv_mem->cv_tau[j-1]; + xi_inv = cv_mem->cv_h / hsum; + alpha0 -= ONE / j; + for (i=j; i >= 1; i--) cv_mem->cv_l[i] += cv_mem->cv_l[i-1]*xi_inv; + /* The l[i] are coefficients of product(1 to j) (1 + x/xi_i) */ + } - (void) N_VScaleAddMulti(cv_mem->cv_q-2, cv_mem->cv_cvals, - cv_mem->cv_zn[cv_mem->cv_q], - cv_mem->cv_zn+2, cv_mem->cv_zn+2); + /* j = q */ + alpha0 -= ONE / cv_mem->cv_q; + xistar_inv = -cv_mem->cv_l[1] - alpha0; + hsum += cv_mem->cv_tau[cv_mem->cv_q-1]; + xi_inv = cv_mem->cv_h / hsum; + alpha0_hat = -cv_mem->cv_l[1] - xi_inv; - if (cv_mem->cv_quadr) - (void) N_VScaleAddMulti(cv_mem->cv_q-2, cv_mem->cv_cvals, - cv_mem->cv_znQ[cv_mem->cv_q], - cv_mem->cv_znQ+2, cv_mem->cv_znQ+2); + if (cv_mem->proj_enabled) + for (i = cv_mem->cv_q; i >= 1; i--) + cv_mem->proj_p[i] = cv_mem->cv_l[i] + cv_mem->proj_p[i-1] * xi_inv; - if (cv_mem->cv_sensi) - (void) N_VScaleAddMultiVectorArray(cv_mem->cv_Ns, cv_mem->cv_q-2, - cv_mem->cv_cvals, - cv_mem->cv_znS[cv_mem->cv_q], - cv_mem->cv_znS+2, - cv_mem->cv_znS+2); + for (i=cv_mem->cv_q; i >= 1; i--) + cv_mem->cv_l[i] += cv_mem->cv_l[i-1]*xistar_inv; } + cvSetTqBDF(cv_mem, hsum, alpha0, alpha0_hat, xi_inv, xistar_inv); } /* - * cvAdjustBDF + * cvSetTqBDF * - * This is a high level routine which handles adjustments to the - * history array on a change of order by deltaq in the case that - * lmm == CV_BDF. cvAdjustBDF calls cvIncreaseBDF if deltaq = +1 and - * cvDecreaseBDF if deltaq = -1 to do the actual work. + * This routine sets the test quantity array tq in the case + * lmm == CV_BDF. */ -static void cvAdjustBDF(CVodeMem cv_mem, int deltaq) +static void cvSetTqBDF(CVodeMem cv_mem, realtype hsum, realtype alpha0, + realtype alpha0_hat, realtype xi_inv, realtype xistar_inv) { - switch(deltaq) { - case 1: - cvIncreaseBDF(cv_mem); - return; - case -1: - cvDecreaseBDF(cv_mem); - return; + realtype A1, A2, A3, A4, A5, A6; + realtype C, Cpinv, Cppinv; + + A1 = ONE - alpha0_hat + alpha0; + A2 = ONE + cv_mem->cv_q * A1; + cv_mem->cv_tq[2] = SUNRabs(A1 / (alpha0 * A2)); + cv_mem->cv_tq[5] = SUNRabs(A2 * xistar_inv / (cv_mem->cv_l[cv_mem->cv_q] * xi_inv)); + if (cv_mem->cv_qwait == 1) { + if (cv_mem->cv_q > 1) { + C = xistar_inv / cv_mem->cv_l[cv_mem->cv_q]; + A3 = alpha0 + ONE / cv_mem->cv_q; + A4 = alpha0_hat + xi_inv; + Cpinv = (ONE - A4 + A3) / A3; + cv_mem->cv_tq[1] = SUNRabs(C * Cpinv); + } + else cv_mem->cv_tq[1] = ONE; + hsum += cv_mem->cv_tau[cv_mem->cv_q]; + xi_inv = cv_mem->cv_h / hsum; + A5 = alpha0 - (ONE / (cv_mem->cv_q+1)); + A6 = alpha0_hat - xi_inv; + Cppinv = (ONE - A6 + A5) / A2; + cv_mem->cv_tq[3] = SUNRabs(Cppinv / (xi_inv * (cv_mem->cv_q+2) * A5)); } + cv_mem->cv_tq[4] = cv_mem->cv_nlscoef / cv_mem->cv_tq[2]; } /* - * cvIncreaseBDF + * ----------------------------------------------------------------- + * Nonlinear solver functions + * ----------------------------------------------------------------- + */ + +/* + * cvNls * - * This routine adjusts the history array on an increase in the - * order q in the case that lmm == CV_BDF. - * A new column zn[q+1] is set equal to a multiple of the saved - * vector (= acor) in zn[indx_acor]. Then each zn[j] is adjusted by - * a multiple of zn[q+1]. The coefficients in the adjustment are the - * coefficients of the polynomial x*x*(x+xi_1)*...*(x+xi_j), - * where xi_j = [t_n - t_(n-j)]/h. + * This routine attempts to solve the nonlinear system associated + * with a single implicit step of the linear multistep method. */ -static void cvIncreaseBDF(CVodeMem cv_mem) +static int cvNls(CVodeMem cv_mem, int nflag) { - realtype alpha0, alpha1, prod, xi, xiold, hsum, A1; - int i, j; - int is; + int flag = CV_SUCCESS; + booleantype callSetup; + booleantype do_sensi_sim; + long int nni_inc = 0; + long int nnf_inc = 0; - for (i=0; i <= cv_mem->cv_qmax; i++) - cv_mem->cv_l[i] = ZERO; - cv_mem->cv_l[2] = alpha1 = prod = xiold = ONE; - alpha0 = -ONE; - hsum = cv_mem->cv_hscale; - if (cv_mem->cv_q > 1) { - for (j=1; j < cv_mem->cv_q; j++) { - hsum += cv_mem->cv_tau[j+1]; - xi = hsum / cv_mem->cv_hscale; - prod *= xi; - alpha0 -= ONE / (j+1); - alpha1 += ONE / xi; - for (i=j+2; i >= 2; i--) - cv_mem->cv_l[i] = cv_mem->cv_l[i]*xiold + cv_mem->cv_l[i-1]; - xiold = xi; + /* Are we computing sensitivities with the CV_SIMULTANEOUS approach? */ + do_sensi_sim = (cv_mem->cv_sensi && (cv_mem->cv_ism==CV_SIMULTANEOUS)); + + /* Decide whether or not to call setup routine (if one exists) and */ + /* set flag convfail (input to lsetup for its evaluation decision) */ + if (cv_mem->cv_lsetup) { + cv_mem->convfail = ((nflag == FIRST_CALL) || (nflag == PREV_ERR_FAIL)) ? + CV_NO_FAILURES : CV_FAIL_OTHER; + + callSetup = (nflag == PREV_CONV_FAIL) || (nflag == PREV_ERR_FAIL) || + (cv_mem->cv_nst == 0) || + (cv_mem->cv_nst >= cv_mem->cv_nstlp + cv_mem->cv_msbp) || + (SUNRabs(cv_mem->cv_gamrat-ONE) > cv_mem->cv_dgmax_lsetup); + + /* Decide whether to force a call to setup */ + if (cv_mem->cv_forceSetup) { + callSetup = SUNTRUE; + cv_mem->convfail = CV_FAIL_OTHER; } + } else { + cv_mem->cv_crate = ONE; + cv_mem->cv_crateS = ONE; /* if NO lsetup all conv. rates are set to ONE */ + callSetup = SUNFALSE; } - A1 = (-alpha0 - alpha1) / prod; - /* - zn[indx_acor] contains the value Delta_n = y_n - y_n(0) - This value was stored there at the previous successful - step (in cvCompleteStep) + /* initial guess for the correction to the predictor */ + if (do_sensi_sim) + N_VConst(ZERO, cv_mem->ycorSim); + else + N_VConst(ZERO, cv_mem->cv_acor); - A1 contains dbar = (1/xi* - 1/xi_q)/prod(xi_j) - */ + /* call nonlinear solver setup if it exists */ + if ((cv_mem->NLS)->ops->setup) { + if (do_sensi_sim) + flag = SUNNonlinSolSetup(cv_mem->NLS, cv_mem->ycorSim, cv_mem); + else + flag = SUNNonlinSolSetup(cv_mem->NLS, cv_mem->cv_acor, cv_mem); - N_VScale(A1, cv_mem->cv_zn[cv_mem->cv_indx_acor], - cv_mem->cv_zn[cv_mem->cv_L]); + if (flag < 0) return(CV_NLS_SETUP_FAIL); + if (flag > 0) return(SUN_NLS_CONV_RECVR); + } - /* for (j=2; j <= cv_mem->cv_q; j++) */ - if (cv_mem->cv_q > 1) - (void) N_VScaleAddMulti(cv_mem->cv_q-1, cv_mem->cv_l+2, - cv_mem->cv_zn[cv_mem->cv_L], - cv_mem->cv_zn+2, cv_mem->cv_zn+2); + /* solve the nonlinear system */ + if (do_sensi_sim) { - if (cv_mem->cv_quadr) { - N_VScale(A1, cv_mem->cv_znQ[cv_mem->cv_indx_acor], - cv_mem->cv_znQ[cv_mem->cv_L]); + flag = SUNNonlinSolSolve(cv_mem->NLSsim, cv_mem->zn0Sim, cv_mem->ycorSim, + cv_mem->ewtSim, cv_mem->cv_tq[4], callSetup, cv_mem); - /* for (j=2; j <= cv_mem->cv_q; j++) */ - if (cv_mem->cv_q > 1) - (void) N_VScaleAddMulti(cv_mem->cv_q-1, cv_mem->cv_l+2, - cv_mem->cv_znQ[cv_mem->cv_L], - cv_mem->cv_znQ+2, cv_mem->cv_znQ+2); - } + /* increment counters */ + (void) SUNNonlinSolGetNumIters(cv_mem->NLSsim, &nni_inc); + cv_mem->cv_nni += nni_inc; - if (cv_mem->cv_sensi) { + (void) SUNNonlinSolGetNumConvFails(cv_mem->NLSsim, &nnf_inc); + cv_mem->cv_nnf += nnf_inc; - for (is=0; iscv_Ns; is++) - cv_mem->cv_cvals[is] = A1; + } else { - (void) N_VScaleVectorArray(cv_mem->cv_Ns, cv_mem->cv_cvals, - cv_mem->cv_znS[cv_mem->cv_indx_acor], - cv_mem->cv_znS[cv_mem->cv_L]); + flag = SUNNonlinSolSolve(cv_mem->NLS, cv_mem->cv_zn[0], cv_mem->cv_acor, + cv_mem->cv_ewt, cv_mem->cv_tq[4], callSetup, cv_mem); - /* for (j=2; j <= cv_mem->cv_q; j++) */ - if (cv_mem->cv_q > 1) - (void) N_VScaleAddMultiVectorArray(cv_mem->cv_Ns, cv_mem->cv_q-1, - cv_mem->cv_l+2, - cv_mem->cv_znS[cv_mem->cv_L], - cv_mem->cv_znS+2, - cv_mem->cv_znS+2); + /* increment counters */ + (void) SUNNonlinSolGetNumIters(cv_mem->NLS, &nni_inc); + cv_mem->cv_nni += nni_inc; + + (void) SUNNonlinSolGetNumConvFails(cv_mem->NLS, &nnf_inc); + cv_mem->cv_nnf += nnf_inc; } - if (cv_mem->cv_quadr_sensi) { + /* if the solve failed return */ + if (flag != SUN_NLS_SUCCESS) return(flag); - for (is=0; iscv_Ns; is++) - cv_mem->cv_cvals[is] = A1; + /* solve successful */ - (void) N_VScaleVectorArray(cv_mem->cv_Ns, cv_mem->cv_cvals, - cv_mem->cv_znQS[cv_mem->cv_indx_acor], - cv_mem->cv_znQS[cv_mem->cv_L]); + /* update the state based on the final correction from the nonlinear solver */ + N_VLinearSum(ONE, cv_mem->cv_zn[0], ONE, cv_mem->cv_acor, cv_mem->cv_y); - /* for (j=2; j <= cv_mem->cv_q; j++) */ - if (cv_mem->cv_q > 1) - (void) N_VScaleAddMultiVectorArray(cv_mem->cv_Ns, cv_mem->cv_q-1, - cv_mem->cv_l+2, - cv_mem->cv_znQS[cv_mem->cv_L], - cv_mem->cv_znQS+2, - cv_mem->cv_znQS+2); + /* update the sensitivities based on the final correction from the nonlinear solver */ + if (do_sensi_sim) { + N_VLinearSumVectorArray(cv_mem->cv_Ns, + ONE, cv_mem->cv_znS[0], + ONE, cv_mem->cv_acorS, cv_mem->cv_yS); + } + + /* compute acnrm if is was not already done by the nonlinear solver */ + if (!cv_mem->cv_acnrmcur) { + if (do_sensi_sim && cv_mem->cv_errconS) + cv_mem->cv_acnrm = N_VWrmsNorm(cv_mem->ycorSim, cv_mem->ewtSim); + else + cv_mem->cv_acnrm = N_VWrmsNorm(cv_mem->cv_acor, cv_mem->cv_ewt); } + /* update Jacobian status */ + cv_mem->cv_jcur = SUNFALSE; + + /* check inequality constraints */ + if (cv_mem->cv_constraintsSet) + flag = cvCheckConstraints(cv_mem); + + return(flag); } /* - * cvDecreaseBDF + * cvCheckConstraints * - * This routine adjusts the history array on a decrease in the - * order q in the case that lmm == CV_BDF. - * Each zn[j] is adjusted by a multiple of zn[q]. The coefficients - * in the adjustment are the coefficients of the polynomial - * x*x*(x+xi_1)*...*(x+xi_j), where xi_j = [t_n - t_(n-j)]/h. + * This routine determines if the constraints of the problem + * are satisfied by the proposed step + * + * Possible return values are: + * + * CV_SUCCESS ---> allows stepping forward + * + * CONSTR_RECVR ---> values failed to satisfy constraints + * + * CV_CONSTR_FAIL ---> values failed to satisfy constraints with hmin */ -static void cvDecreaseBDF(CVodeMem cv_mem) +static int cvCheckConstraints(CVodeMem cv_mem) { - realtype hsum, xi; - int i, j; + booleantype constraintsPassed; + realtype vnorm; + N_Vector mm = cv_mem->cv_ftemp; + N_Vector tmp = cv_mem->cv_tempv; - for (i=0; i <= cv_mem->cv_qmax; i++) - cv_mem->cv_l[i] = ZERO; - cv_mem->cv_l[2] = ONE; - hsum = ZERO; - for (j=1; j <= cv_mem->cv_q-2; j++) { - hsum += cv_mem->cv_tau[j]; - xi = hsum / cv_mem->cv_hscale; - for (i=j+2; i >= 2; i--) - cv_mem->cv_l[i] = cv_mem->cv_l[i]*xi + cv_mem->cv_l[i-1]; - } + /* Get mask vector mm, set where constraints failed */ + constraintsPassed = N_VConstrMask(cv_mem->cv_constraints, cv_mem->cv_y, mm); + if (constraintsPassed) return(CV_SUCCESS); - if (cv_mem->cv_q > 2) { + /* Constraints not met */ - for (j=2; j < cv_mem->cv_q; j++) - cv_mem->cv_cvals[j-2] = -cv_mem->cv_l[j]; + /* Compute correction to satisfy constraints */ + N_VCompare(ONEPT5, cv_mem->cv_constraints, tmp); /* a[i]=1 when |c[i]|=2 */ + N_VProd(tmp, cv_mem->cv_constraints, tmp); /* a * c */ + N_VDiv(tmp, cv_mem->cv_ewt, tmp); /* a * c * wt */ + N_VLinearSum(ONE, cv_mem->cv_y, -PT1, tmp, tmp); /* y - 0.1 * a * c * wt */ + N_VProd(tmp, mm, tmp); /* v = mm*(y-0.1*a*c*wt) */ - (void) N_VScaleAddMulti(cv_mem->cv_q-2, cv_mem->cv_cvals, - cv_mem->cv_zn[cv_mem->cv_q], - cv_mem->cv_zn+2, cv_mem->cv_zn+2); + vnorm = N_VWrmsNorm(tmp, cv_mem->cv_ewt); /* ||v|| */ - if (cv_mem->cv_quadr) - (void) N_VScaleAddMulti(cv_mem->cv_q-2, cv_mem->cv_cvals, - cv_mem->cv_znQ[cv_mem->cv_q], - cv_mem->cv_znQ+2, cv_mem->cv_znQ+2); + /* If vector v of constraint corrections is small in norm, correct and + accept this step */ + if (vnorm <= cv_mem->cv_tq[4]) { + N_VLinearSum(ONE, cv_mem->cv_acor, + -ONE, tmp, cv_mem->cv_acor); /* acor <- acor - v */ + return(CV_SUCCESS); + } - if (cv_mem->cv_sensi) - (void) N_VScaleAddMultiVectorArray(cv_mem->cv_Ns, cv_mem->cv_q-2, - cv_mem->cv_cvals, - cv_mem->cv_znS[cv_mem->cv_q], - cv_mem->cv_znS+2, - cv_mem->cv_znS+2); + /* Return with error if |h| == hmin */ + if (SUNRabs(cv_mem->cv_h) <= cv_mem->cv_hmin*ONEPSM) return(CV_CONSTR_FAIL); - if (cv_mem->cv_quadr_sensi) - (void) N_VScaleAddMultiVectorArray(cv_mem->cv_Ns, cv_mem->cv_q-2, - cv_mem->cv_cvals, - cv_mem->cv_znQS[cv_mem->cv_q], - cv_mem->cv_znQS+2, - cv_mem->cv_znQS+2); - } + /* Constraint correction is too large, reduce h by computing eta = h'/h */ + N_VLinearSum(ONE, cv_mem->cv_zn[0], -ONE, cv_mem->cv_y, tmp); + N_VProd(mm, tmp, tmp); + cv_mem->cv_eta = PT9*N_VMinQuotient(cv_mem->cv_zn[0], tmp); + cv_mem->cv_eta = SUNMAX(cv_mem->cv_eta, PT1); + cv_mem->cv_eta = SUNMAX(cv_mem->cv_eta, + cv_mem->cv_hmin / SUNRabs(cv_mem->cv_h)); + /* Reattempt step with new step size */ + return(CONSTR_RECVR); } - /* - * cvRescale + * cvQuadNls + * + * This routine solves for the quadrature variables at the new step. + * It does not solve a nonlinear system, but rather updates the + * quadrature variables. The name for this function is just for + * uniformity purposes. + * + * Possible return values (interpreted by cvHandleNFlag) + * + * CV_SUCCESS -> continue with error test + * CV_QRHSFUNC_FAIL -> halt the integration + * QRHSFUNC_RECVR -> predict again or stop if too many * - * This routine rescales the Nordsieck array by multiplying the - * jth column zn[j] by eta^j, j = 1, ..., q. Then the value of - * h is rescaled by eta, and hscale is reset to h. */ -static void cvRescale(CVodeMem cv_mem) +static int cvQuadNls(CVodeMem cv_mem) { - int j; - int is; - - /* compute scaling factors */ - cv_mem->cv_cvals[0] = cv_mem->cv_eta; - for (j=1; j <= cv_mem->cv_q; j++) - cv_mem->cv_cvals[j] = cv_mem->cv_eta * cv_mem->cv_cvals[j-1]; - - (void) N_VScaleVectorArray(cv_mem->cv_q, cv_mem->cv_cvals, - cv_mem->cv_zn+1, cv_mem->cv_zn+1); - - if (cv_mem->cv_quadr) - (void) N_VScaleVectorArray(cv_mem->cv_q, cv_mem->cv_cvals, - cv_mem->cv_znQ+1, cv_mem->cv_znQ+1); - - /* compute sensi scaling factors */ - if (cv_mem->cv_sensi || cv_mem->cv_quadr_sensi) { - for (is=0; iscv_Ns; is++) - cv_mem->cv_cvals[is] = cv_mem->cv_eta; - for (j=1; j <= cv_mem->cv_q; j++) - for (is=0; iscv_Ns; is++) - cv_mem->cv_cvals[j*cv_mem->cv_Ns+is] = - cv_mem->cv_eta * cv_mem->cv_cvals[(j-1)*cv_mem->cv_Ns+is]; - } - - if (cv_mem->cv_sensi) { - for (j=1; j <= cv_mem->cv_q; j++) - for (is=0; iscv_Ns; is++) - cv_mem->cv_Xvecs[(j-1)*cv_mem->cv_Ns+is] = cv_mem->cv_znS[j][is]; + int retval; - (void) N_VScaleVectorArray(cv_mem->cv_q*cv_mem->cv_Ns, cv_mem->cv_cvals, - cv_mem->cv_Xvecs, cv_mem->cv_Xvecs); - } + /* Save quadrature correction in acorQ */ + retval = cv_mem->cv_fQ(cv_mem->cv_tn, cv_mem->cv_y, + cv_mem->cv_acorQ, cv_mem->cv_user_data); + cv_mem->cv_nfQe++; + if (retval < 0) return(CV_QRHSFUNC_FAIL); + if (retval > 0) return(QRHSFUNC_RECVR); + /* If needed, save the value of yQdot = fQ into ftempQ + * for use in evaluating fQS */ if (cv_mem->cv_quadr_sensi) { - for (j=1; j <= cv_mem->cv_q; j++) - for (is=0; iscv_Ns; is++) - cv_mem->cv_Xvecs[(j-1)*cv_mem->cv_Ns+is] = cv_mem->cv_znQS[j][is]; - - (void) N_VScaleVectorArray(cv_mem->cv_q*cv_mem->cv_Ns, cv_mem->cv_cvals, - cv_mem->cv_Xvecs, cv_mem->cv_Xvecs); + N_VScale(ONE, cv_mem->cv_acorQ, cv_mem->cv_ftempQ); } - cv_mem->cv_h = cv_mem->cv_hscale * cv_mem->cv_eta; - cv_mem->cv_next_h = cv_mem->cv_h; - cv_mem->cv_hscale = cv_mem->cv_h; - cv_mem->cv_nscon = 0; + N_VLinearSum(cv_mem->cv_h, cv_mem->cv_acorQ, -ONE, + cv_mem->cv_znQ[1], cv_mem->cv_acorQ); + N_VScale(cv_mem->cv_rl1, cv_mem->cv_acorQ, cv_mem->cv_acorQ); + + /* Apply correction to quadrature variables */ + N_VLinearSum(ONE, cv_mem->cv_znQ[0], ONE, cv_mem->cv_acorQ, cv_mem->cv_yQ); + return(CV_SUCCESS); } /* - * cvPredict + * cvQuadSensNls + * + * This routine solves for the quadrature sensitivity variables + * at the new step. It does not solve a nonlinear system, but + * rather updates the quadrature variables. The name for this + * function is just for uniformity purposes. + * + * Possible return values (interpreted by cvHandleNFlag) + * + * CV_SUCCESS -> continue with error test + * CV_QSRHSFUNC_FAIL -> halt the integration + * QSRHSFUNC_RECVR -> predict again or stop if too many * - * This routine advances tn by the tentative step size h, and computes - * the predicted array z_n(0), which is overwritten on zn. The - * prediction of zn is done by repeated additions. - * If tstop is enabled, it is possible for tn + h to be past tstop by roundoff, - * and in that case, we reset tn (after incrementing by h) to tstop. */ -static void cvPredict(CVodeMem cv_mem) +static int cvQuadSensNls(CVodeMem cv_mem) { - int j, k; - - cv_mem->cv_tn += cv_mem->cv_h; - if (cv_mem->cv_tstopset) { - if ((cv_mem->cv_tn - cv_mem->cv_tstop)*cv_mem->cv_h > ZERO) - cv_mem->cv_tn = cv_mem->cv_tstop; - } - - for (k = 1; k <= cv_mem->cv_q; k++) - for (j = cv_mem->cv_q; j >= k; j--) - N_VLinearSum(ONE, cv_mem->cv_zn[j-1], ONE, - cv_mem->cv_zn[j], cv_mem->cv_zn[j-1]); + int is, retval; - if (cv_mem->cv_quadr) { - for (k = 1; k <= cv_mem->cv_q; k++) - for (j = cv_mem->cv_q; j >= k; j--) - N_VLinearSum(ONE, cv_mem->cv_znQ[j-1], ONE, - cv_mem->cv_znQ[j], cv_mem->cv_znQ[j-1]); - } + /* Save quadrature correction in acorQ */ + retval = cv_mem->cv_fQS(cv_mem->cv_Ns, cv_mem->cv_tn, cv_mem->cv_y, + cv_mem->cv_yS, cv_mem->cv_ftempQ, + cv_mem->cv_acorQS, cv_mem->cv_user_data, + cv_mem->cv_tempv, cv_mem->cv_tempvQ); + cv_mem->cv_nfQSe++; + if (retval < 0) return(CV_QSRHSFUNC_FAIL); + if (retval > 0) return(QSRHSFUNC_RECVR); - if (cv_mem->cv_sensi) { - for (k = 1; k <= cv_mem->cv_q; k++) - for (j = cv_mem->cv_q; j >= k; j--) - (void) N_VLinearSumVectorArray(cv_mem->cv_Ns, - ONE, cv_mem->cv_znS[j-1], - ONE, cv_mem->cv_znS[j], - cv_mem->cv_znS[j-1]); - } - if (cv_mem->cv_quadr_sensi) { - for (k = 1; k <= cv_mem->cv_q; k++) - for (j = cv_mem->cv_q; j >= k; j--) - (void) N_VLinearSumVectorArray(cv_mem->cv_Ns, - ONE, cv_mem->cv_znQS[j-1], - ONE, cv_mem->cv_znQS[j], - cv_mem->cv_znQS[j-1]); + for (is=0; iscv_Ns; is++) { + N_VLinearSum(cv_mem->cv_h, cv_mem->cv_acorQS[is], -ONE, + cv_mem->cv_znQS[1][is], cv_mem->cv_acorQS[is]); + N_VScale(cv_mem->cv_rl1, cv_mem->cv_acorQS[is], cv_mem->cv_acorQS[is]); + /* Apply correction to quadrature sensitivity variables */ + N_VLinearSum(ONE, cv_mem->cv_znQS[0][is], ONE, + cv_mem->cv_acorQS[is], cv_mem->cv_yQS[is]); } + return(CV_SUCCESS); } + /* - * cvSet - * - * This routine is a high level routine which calls cvSetAdams or - * cvSetBDF to set the polynomial l, the test quantity array tq, - * and the related variables rl1, gamma, and gamrat. + * cvStgrNls * - * The array tq is loaded with constants used in the control of estimated - * local errors and in the nonlinear convergence test. Specifically, while - * running at order q, the components of tq are as follows: - * tq[1] = a coefficient used to get the est. local error at order q-1 - * tq[2] = a coefficient used to get the est. local error at order q - * tq[3] = a coefficient used to get the est. local error at order q+1 - * tq[4] = constant used in nonlinear iteration convergence test - * tq[5] = coefficient used to get the order q+2 derivative vector used in - * the est. local error at order q+1 + * This is a high-level routine that attempts to solve the + * sensitivity linear systems using the attached nonlinear solver + * once the states y_n were obtained and passed the error test. */ -static void cvSet(CVodeMem cv_mem) +static int cvStgrNls(CVodeMem cv_mem) { - switch(cv_mem->cv_lmm) { - case CV_ADAMS: - cvSetAdams(cv_mem); - break; - case CV_BDF: - cvSetBDF(cv_mem); - break; - } - cv_mem->cv_rl1 = ONE / cv_mem->cv_l[1]; - cv_mem->cv_gamma = cv_mem->cv_h * cv_mem->cv_rl1; - if (cv_mem->cv_nst == 0) cv_mem->cv_gammap = cv_mem->cv_gamma; - cv_mem->cv_gamrat = (cv_mem->cv_nst > 0) ? - cv_mem->cv_gamma / cv_mem->cv_gammap : ONE; /* protect x / x != 1.0 */ -} + booleantype callSetup; + int flag=CV_SUCCESS; + long int nniS_inc = 0; + long int nnfS_inc = 0; -/* - * cvSetAdams - * - * This routine handles the computation of l and tq for the - * case lmm == CV_ADAMS. - * - * The components of the array l are the coefficients of a - * polynomial Lambda(x) = l_0 + l_1 x + ... + l_q x^q, given by - * q-1 - * (d/dx) Lambda(x) = c * PRODUCT (1 + x / xi_i) , where - * i=1 - * Lambda(-1) = 0, Lambda(0) = 1, and c is a normalization factor. - * Here xi_i = [t_n - t_(n-i)] / h. - * - * The array tq is set to test quantities used in the convergence - * test, the error test, and the selection of h at a new order. - */ + callSetup = SUNFALSE; + if (cv_mem->cv_lsetup == NULL) + cv_mem->cv_crateS = ONE; -static void cvSetAdams(CVodeMem cv_mem) -{ - realtype m[L_MAX], M[3], hsum; + /* initial guess for the correction to the predictor */ + N_VConst(ZERO, cv_mem->ycorStg); - if (cv_mem->cv_q == 1) { - cv_mem->cv_l[0] = cv_mem->cv_l[1] = cv_mem->cv_tq[1] = cv_mem->cv_tq[5] = ONE; - cv_mem->cv_tq[2] = HALF; - cv_mem->cv_tq[3] = ONE/TWELVE; - cv_mem->cv_tq[4] = cv_mem->cv_nlscoef / cv_mem->cv_tq[2]; /* = 0.1 / tq[2] */ - return; - } + /* set sens solve flag */ + cv_mem->sens_solve = SUNTRUE; - hsum = cvAdamsStart(cv_mem, m); + /* solve the nonlinear system */ + flag = SUNNonlinSolSolve(cv_mem->NLSstg, cv_mem->zn0Stg, cv_mem->ycorStg, + cv_mem->ewtStg, cv_mem->cv_tq[4], callSetup, cv_mem); - M[0] = cvAltSum(cv_mem->cv_q-1, m, 1); - M[1] = cvAltSum(cv_mem->cv_q-1, m, 2); + /* increment counters */ + (void) SUNNonlinSolGetNumIters(cv_mem->NLSstg, &nniS_inc); + cv_mem->cv_nniS += nniS_inc; - cvAdamsFinish(cv_mem, m, M, hsum); -} + (void) SUNNonlinSolGetNumConvFails(cv_mem->NLSstg, &nnfS_inc); + cv_mem->cv_nnfS += nnfS_inc; -/* - * cvAdamsStart - * - * This routine generates in m[] the coefficients of the product - * polynomial needed for the Adams l and tq coefficients for q > 1. - */ + /* reset sens solve flag */ + cv_mem->sens_solve = SUNFALSE; -static realtype cvAdamsStart(CVodeMem cv_mem, realtype m[]) -{ - realtype hsum, xi_inv, sum; - int i, j; + /* if the solve failed return */ + if (flag != SUN_NLS_SUCCESS) return(flag); - hsum = cv_mem->cv_h; - m[0] = ONE; - for (i=1; i <= cv_mem->cv_q; i++) m[i] = ZERO; - for (j=1; j < cv_mem->cv_q; j++) { - if ((j==cv_mem->cv_q-1) && (cv_mem->cv_qwait == 1)) { - sum = cvAltSum(cv_mem->cv_q-2, m, 2); - cv_mem->cv_tq[1] = cv_mem->cv_q * sum / m[cv_mem->cv_q-2]; - } - xi_inv = cv_mem->cv_h / hsum; - for (i=j; i >= 1; i--) - m[i] += m[i-1] * xi_inv; - hsum += cv_mem->cv_tau[j]; - /* The m[i] are coefficients of product(1 to j) (1 + x/xi_i) */ - } - return(hsum); + /* solve successful */ + + /* update the sensitivities based on the final correction from the nonlinear solver */ + N_VLinearSumVectorArray(cv_mem->cv_Ns, + ONE, cv_mem->cv_znS[0], + ONE, cv_mem->cv_acorS, cv_mem->cv_yS); + + /* update Jacobian status */ + cv_mem->cv_jcur = SUNFALSE; + + return(flag); } /* - * cvAdamsFinish + * cvStgr1Nls * - * This routine completes the calculation of the Adams l and tq. + * This is a high-level routine that attempts to solve the i-th + * sensitivity linear system using the attached nonlinear solver + * once the states y_n were obtained and passed the error test. */ -static void cvAdamsFinish(CVodeMem cv_mem, realtype m[], realtype M[], realtype hsum) +static int cvStgr1Nls(CVodeMem cv_mem, int is) { - int i; - realtype M0_inv, xi, xi_inv; + booleantype callSetup; + long int nniS1_inc = 0; + long int nnfS1_inc = 0; + int flag=CV_SUCCESS; - M0_inv = ONE / M[0]; - cv_mem->cv_l[0] = ONE; - for (i=1; i <= cv_mem->cv_q; i++) - cv_mem->cv_l[i] = M0_inv * (m[i-1] / i); - xi = hsum / cv_mem->cv_h; - xi_inv = ONE / xi; + callSetup = SUNFALSE; + if (cv_mem->cv_lsetup == NULL) + cv_mem->cv_crateS = ONE; - cv_mem->cv_tq[2] = M[1] * M0_inv / xi; - cv_mem->cv_tq[5] = xi / cv_mem->cv_l[cv_mem->cv_q]; + /* initial guess for the correction to the predictor */ + N_VConst(ZERO, cv_mem->cv_acorS[is]); - if (cv_mem->cv_qwait == 1) { - for (i=cv_mem->cv_q; i >= 1; i--) - m[i] += m[i-1] * xi_inv; - M[2] = cvAltSum(cv_mem->cv_q, m, 2); - cv_mem->cv_tq[3] = M[2] * M0_inv / cv_mem->cv_L; - } + /* set sens solve flag */ + cv_mem->sens_solve = SUNTRUE; - cv_mem->cv_tq[4] = cv_mem->cv_nlscoef / cv_mem->cv_tq[2]; -} + /* solve the nonlinear system */ + flag = SUNNonlinSolSolve(cv_mem->NLSstg1, + cv_mem->cv_znS[0][is], cv_mem->cv_acorS[is], + cv_mem->cv_ewtS[is], cv_mem->cv_tq[4], callSetup, cv_mem); -/* - * cvAltSum - * - * cvAltSum returns the value of the alternating sum - * sum (i= 0 ... iend) [ (-1)^i * (a[i] / (i + k)) ]. - * If iend < 0 then cvAltSum returns 0. - * This operation is needed to compute the integral, from -1 to 0, - * of a polynomial x^(k-1) M(x) given the coefficients of M(x). - */ + /* increment counters */ + (void) SUNNonlinSolGetNumIters(cv_mem->NLSstg1, &nniS1_inc); + cv_mem->cv_nniS1[is] += nniS1_inc; -static realtype cvAltSum(int iend, realtype a[], int k) -{ - int i, sign; - realtype sum; + (void) SUNNonlinSolGetNumConvFails(cv_mem->NLSstg1, &nnfS1_inc); + cv_mem->cv_nnfS1[is] += nnfS1_inc; - if (iend < 0) return(ZERO); + /* reset sens solve flag */ + cv_mem->sens_solve = SUNFALSE; - sum = ZERO; - sign = 1; - for (i=0; i <= iend; i++) { - sum += sign * (a[i] / (i+k)); - sign = -sign; - } - return(sum); + /* if the solve failed return */ + if (flag != SUN_NLS_SUCCESS) return(flag); + + /* solve successful */ + + /* update the sensitivity with the final correction from the nonlinear solver */ + N_VLinearSum(ONE, cv_mem->cv_znS[0][is], + ONE, cv_mem->cv_acorS[is], cv_mem->cv_yS[is]); + + /* update Jacobian status */ + cv_mem->cv_jcur = SUNFALSE; + + return(flag); } /* - * cvSetBDF + * cvHandleNFlag * - * This routine computes the coefficients l and tq in the case - * lmm == CV_BDF. cvSetBDF calls cvSetTqBDF to set the test - * quantity array tq. + * This routine takes action on the return value nflag = *nflagPtr + * returned by cvNls, as follows: * - * The components of the array l are the coefficients of a - * polynomial Lambda(x) = l_0 + l_1 x + ... + l_q x^q, given by - * q-1 - * Lambda(x) = (1 + x / xi*_q) * PRODUCT (1 + x / xi_i) , where - * i=1 - * xi_i = [t_n - t_(n-i)] / h. + * If cvNls succeeded in solving the nonlinear system, then + * cvHandleNFlag returns the constant DO_ERROR_TEST, which tells cvStep + * to perform the error test. + * + * If the nonlinear system was not solved successfully, then ncfn and + * ncf = *ncfPtr are incremented and Nordsieck array zn is restored. + * + * If the solution of the nonlinear system failed due to an + * unrecoverable failure by setup, we return the value CV_LSETUP_FAIL. + * + * If it failed due to an unrecoverable failure in solve, then we return + * the value CV_LSOLVE_FAIL. + * + * If it failed due to an unrecoverable failure in rhs, then we return + * the value CV_RHSFUNC_FAIL. + * + * If it failed due to an unrecoverable failure in quad rhs, then we return + * the value CV_QRHSFUNC_FAIL. + * + * If it failed due to an unrecoverable failure in sensi rhs, then we return + * the value CV_SRHSFUNC_FAIL. + * + * If it failed due to an unrecoverable failure in sensi quad rhs, then we + * return the value CV_QSRHSFUNC_FAIL. + * + * Otherwise, a recoverable failure occurred when solving the nonlinear system + * (cvNls returned SUN_NLS_CONV_RECVR, RHSFUNC_RECVR, or SRHSFUNC_RECVR). + * + * If ncf is now equal to maxncf or |h| = hmin, we return the value + * CV_CONV_FAILURE (if SUN_NLS_CONV_RECVR), + * CV_REPTD_RHSFUNC_ERR (if RHSFUNC_RECVR), or + * CV_REPTD_SRHSFUNC_ERR (if SRHSFUNC_RECVR). + * Otherwise, we set *nflagPtr = PREV_CONV_FAIL and return the value + * PREDICT_AGAIN, telling cvStep to reattempt the step. * - * The array tq is set to test quantities used in the convergence - * test, the error test, and the selection of h at a new order. */ -static void cvSetBDF(CVodeMem cv_mem) +static int cvHandleNFlag(CVodeMem cv_mem, int *nflagPtr, realtype saved_t, + int *ncfPtr, long int *ncfnPtr) { - realtype alpha0, alpha0_hat, xi_inv, xistar_inv, hsum; - int i,j; + int nflag; - cv_mem->cv_l[0] = cv_mem->cv_l[1] = xi_inv = xistar_inv = ONE; - for (i=2; i <= cv_mem->cv_q; i++) cv_mem->cv_l[i] = ZERO; - alpha0 = alpha0_hat = -ONE; - hsum = cv_mem->cv_h; - if (cv_mem->cv_q > 1) { - for (j=2; j < cv_mem->cv_q; j++) { - hsum += cv_mem->cv_tau[j-1]; - xi_inv = cv_mem->cv_h / hsum; - alpha0 -= ONE / j; - for (i=j; i >= 1; i--) - cv_mem->cv_l[i] += cv_mem->cv_l[i-1]*xi_inv; - /* The l[i] are coefficients of product(1 to j) (1 + x/xi_i) */ - } + nflag = *nflagPtr; - /* j = q */ - alpha0 -= ONE / cv_mem->cv_q; - xistar_inv = -cv_mem->cv_l[1] - alpha0; - hsum += cv_mem->cv_tau[cv_mem->cv_q-1]; - xi_inv = cv_mem->cv_h / hsum; - alpha0_hat = -cv_mem->cv_l[1] - xi_inv; - for (i=cv_mem->cv_q; i >= 1; i--) - cv_mem->cv_l[i] += cv_mem->cv_l[i-1]*xistar_inv; - } + if (nflag == CV_SUCCESS) return(DO_ERROR_TEST); - cvSetTqBDF(cv_mem, hsum, alpha0, alpha0_hat, xi_inv, xistar_inv); -} + /* The nonlinear soln. failed; increment ncfn and restore zn */ + (*ncfnPtr)++; + cvRestore(cv_mem, saved_t); -/* - * cvSetTqBDF - * - * This routine sets the test quantity array tq in the case - * lmm == CV_BDF. - */ + /* Return if failed unrecoverably */ + if (nflag < 0) { + if (nflag == CV_LSETUP_FAIL) return(CV_LSETUP_FAIL); + else if (nflag == CV_LSOLVE_FAIL) return(CV_LSOLVE_FAIL); + else if (nflag == CV_RHSFUNC_FAIL) return(CV_RHSFUNC_FAIL); + else if (nflag == CV_QRHSFUNC_FAIL) return(CV_QRHSFUNC_FAIL); + else if (nflag == CV_SRHSFUNC_FAIL) return(CV_SRHSFUNC_FAIL); + else if (nflag == CV_QSRHSFUNC_FAIL) return(CV_QSRHSFUNC_FAIL); + else return(CV_NLS_FAIL); + } -static void cvSetTqBDF(CVodeMem cv_mem, realtype hsum, realtype alpha0, - realtype alpha0_hat, realtype xi_inv, realtype xistar_inv) -{ - realtype A1, A2, A3, A4, A5, A6; - realtype C, Cpinv, Cppinv; + /* At this point, a recoverable error occurred. */ - A1 = ONE - alpha0_hat + alpha0; - A2 = ONE + cv_mem->cv_q * A1; - cv_mem->cv_tq[2] = SUNRabs(A1 / (alpha0 * A2)); - cv_mem->cv_tq[5] = SUNRabs(A2 * xistar_inv / (cv_mem->cv_l[cv_mem->cv_q] * xi_inv)); - if (cv_mem->cv_qwait == 1) { - if (cv_mem->cv_q > 1) { - C = xistar_inv / cv_mem->cv_l[cv_mem->cv_q]; - A3 = alpha0 + ONE / cv_mem->cv_q; - A4 = alpha0_hat + xi_inv; - Cpinv = (ONE - A4 + A3) / A3; - cv_mem->cv_tq[1] = SUNRabs(C * Cpinv); - } - else cv_mem->cv_tq[1] = ONE; - hsum += cv_mem->cv_tau[cv_mem->cv_q]; - xi_inv = cv_mem->cv_h / hsum; - A5 = alpha0 - (ONE / (cv_mem->cv_q+1)); - A6 = alpha0_hat - xi_inv; - Cppinv = (ONE - A6 + A5) / A2; - cv_mem->cv_tq[3] = SUNRabs(Cppinv / (xi_inv * (cv_mem->cv_q+2) * A5)); + (*ncfPtr)++; + cv_mem->cv_etamax = ONE; + + /* If we had maxncf failures or |h| = hmin, return failure. */ + + if ((SUNRabs(cv_mem->cv_h) <= cv_mem->cv_hmin*ONEPSM) || + (*ncfPtr == cv_mem->cv_maxncf)) { + if (nflag == SUN_NLS_CONV_RECVR) return(CV_CONV_FAILURE); + if (nflag == CONSTR_RECVR) return(CV_CONSTR_FAIL); + if (nflag == RHSFUNC_RECVR) return(CV_REPTD_RHSFUNC_ERR); + if (nflag == QRHSFUNC_RECVR) return(CV_REPTD_QRHSFUNC_ERR); + if (nflag == SRHSFUNC_RECVR) return(CV_REPTD_SRHSFUNC_ERR); + if (nflag == QSRHSFUNC_RECVR) return(CV_REPTD_QSRHSFUNC_ERR); } - cv_mem->cv_tq[4] = cv_mem->cv_nlscoef / cv_mem->cv_tq[2]; -} -/* - * ----------------------------------------------------------------- - * Nonlinear solver functions - * ----------------------------------------------------------------- - */ + /* Reduce step size; return to reattempt the step + Note that if nflag = CONSTR_RECVR, then eta was already set in cvCheckConstraints */ + if (nflag != CONSTR_RECVR) + cv_mem->cv_eta = SUNMAX(cv_mem->cv_eta_cf, + cv_mem->cv_hmin / SUNRabs(cv_mem->cv_h)); + *nflagPtr = PREV_CONV_FAIL; + cvRescale(cv_mem); + + return(PREDICT_AGAIN); +} /* - * cvNls + * cvRestore * - * This routine attempts to solve the nonlinear system associated - * with a single implicit step of the linear multistep method. + * This routine restores the value of tn to saved_t and undoes the + * prediction. After execution of cvRestore, the Nordsieck array zn has + * the same values as before the call to cvPredict. */ -static int cvNls(CVodeMem cv_mem, int nflag) +void cvRestore(CVodeMem cv_mem, realtype saved_t) { - int flag = CV_SUCCESS; - booleantype callSetup; - booleantype do_sensi_sim; - - /* Are we computing sensitivities with the CV_SIMULTANEOUS approach? */ - do_sensi_sim = (cv_mem->cv_sensi && (cv_mem->cv_ism==CV_SIMULTANEOUS)); - - /* Decide whether or not to call setup routine (if one exists) and */ - /* set flag convfail (input to lsetup for its evaluation decision) */ - if (cv_mem->cv_lsetup) { - cv_mem->convfail = ((nflag == FIRST_CALL) || (nflag == PREV_ERR_FAIL)) ? - CV_NO_FAILURES : CV_FAIL_OTHER; - - callSetup = (nflag == PREV_CONV_FAIL) || (nflag == PREV_ERR_FAIL) || - (cv_mem->cv_nst == 0) || - (cv_mem->cv_nst >= cv_mem->cv_nstlp + MSBP) || - (SUNRabs(cv_mem->cv_gamrat-ONE) > DGMAX); - - /* Decide whether to force a call to setup */ - if (cv_mem->cv_forceSetup) { - callSetup = SUNTRUE; - cv_mem->convfail = CV_FAIL_OTHER; - } - } else { - cv_mem->cv_crate = ONE; - cv_mem->cv_crateS = ONE; /* if NO lsetup all conv. rates are set to ONE */ - callSetup = SUNFALSE; - } - - /* initial guess for the correction to the predictor */ - if (do_sensi_sim) - N_VConst(ZERO, cv_mem->ycorSim); - else - N_VConst(ZERO, cv_mem->cv_acor); + int j, k; - /* call nonlinear solver setup if it exists */ - if ((cv_mem->NLS)->ops->setup) { - if (do_sensi_sim) - flag = SUNNonlinSolSetup(cv_mem->NLS, cv_mem->ycorSim, cv_mem); - else - flag = SUNNonlinSolSetup(cv_mem->NLS, cv_mem->cv_acor, cv_mem); + cv_mem->cv_tn = saved_t; + for (k = 1; k <= cv_mem->cv_q; k++) + for (j = cv_mem->cv_q; j >= k; j--) + N_VLinearSum(ONE, cv_mem->cv_zn[j-1], -ONE, + cv_mem->cv_zn[j], cv_mem->cv_zn[j-1]); - if (flag < 0) return(CV_NLS_SETUP_FAIL); - if (flag > 0) return(SUN_NLS_CONV_RECVR); + if (cv_mem->cv_quadr) { + for (k = 1; k <= cv_mem->cv_q; k++) + for (j = cv_mem->cv_q; j >= k; j--) + N_VLinearSum(ONE, cv_mem->cv_znQ[j-1], -ONE, + cv_mem->cv_znQ[j], cv_mem->cv_znQ[j-1]); } - /* solve the nonlinear system */ - if (do_sensi_sim) - flag = SUNNonlinSolSolve(cv_mem->NLSsim, cv_mem->zn0Sim, cv_mem->ycorSim, - cv_mem->ewtSim, cv_mem->cv_tq[4], callSetup, cv_mem); - else - flag = SUNNonlinSolSolve(cv_mem->NLS, cv_mem->cv_zn[0], cv_mem->cv_acor, - cv_mem->cv_ewt, cv_mem->cv_tq[4], callSetup, cv_mem); - - /* if the solve failed return */ - if (flag != CV_SUCCESS) return(flag); - - /* solve successful */ - - /* update the state based on the final correction from the nonlinear solver */ - N_VLinearSum(ONE, cv_mem->cv_zn[0], ONE, cv_mem->cv_acor, cv_mem->cv_y); - - /* update the sensitivities based on the final correction from the nonlinear solver */ - if (do_sensi_sim) { - N_VLinearSumVectorArray(cv_mem->cv_Ns, - ONE, cv_mem->cv_znS[0], - ONE, cv_mem->cv_acorS, cv_mem->cv_yS); + if (cv_mem->cv_sensi) { + for (k = 1; k <= cv_mem->cv_q; k++) + for (j = cv_mem->cv_q; j >= k; j--) + (void) N_VLinearSumVectorArray(cv_mem->cv_Ns, + ONE, cv_mem->cv_znS[j-1], + -ONE, cv_mem->cv_znS[j], + cv_mem->cv_znS[j-1]); } - /* compute acnrm if is was not already done by the nonlinear solver */ - if (!cv_mem->cv_acnrmcur) { - if (do_sensi_sim && cv_mem->cv_errconS) - cv_mem->cv_acnrm = N_VWrmsNorm(cv_mem->ycorSim, cv_mem->ewtSim); - else - cv_mem->cv_acnrm = N_VWrmsNorm(cv_mem->cv_acor, cv_mem->cv_ewt); + if (cv_mem->cv_quadr_sensi) { + for (k = 1; k <= cv_mem->cv_q; k++) + for (j = cv_mem->cv_q; j >= k; j--) + (void) N_VLinearSumVectorArray(cv_mem->cv_Ns, + ONE, cv_mem->cv_znQS[j-1], + -ONE, cv_mem->cv_znQS[j], + cv_mem->cv_znQS[j-1]); } - - /* update Jacobian status */ - cv_mem->cv_jcur = SUNFALSE; - - /* check inequality constraints */ - if (cv_mem->cv_constraintsSet) - flag = cvCheckConstraints(cv_mem); - - return(flag); - } /* - * cvCheckConstraints + * ----------------------------------------------------------------- + * Error Test + * ----------------------------------------------------------------- + */ + +/* + * cvDoErrorTest * - * This routine determines if the constraints of the problem - * are satisfied by the proposed step + * This routine performs the local error test, for the state, quadrature, + * or sensitivity variables. Its last three arguments change depending + * on which variables the error test is to be performed on. * - * Possible return values are: + * The weighted local error norm dsm is loaded into *dsmPtr, and + * the test dsm ?<= 1 is made. * - * CV_SUCCESS ---> allows stepping forward + * If the test passes, cvDoErrorTest returns CV_SUCCESS. * - * CONSTR_RECVR ---> values failed to satisfy constraints + * If the test fails, we undo the step just taken (call cvRestore) and + * + * - if maxnef error test failures have occurred or if SUNRabs(h) = hmin, + * we return CV_ERR_FAILURE. + * + * - if more than MXNEF1 error test failures have occurred, an order + * reduction is forced. If already at order 1, restart by reloading + * zn from scratch (also znQ and znS if appropriate). + * If f() fails, we return CV_RHSFUNC_FAIL or CV_UNREC_RHSFUNC_ERR; + * if fQ() fails, we return CV_QRHSFUNC_FAIL or CV_UNREC_QRHSFUNC_ERR; + * if cvSensRhsWrapper() fails, we return CV_SRHSFUNC_FAIL or CV_UNREC_SRHSFUNC_ERR; + * (no recovery is possible at this stage). + * + * - otherwise, set *nflagPtr to PREV_ERR_FAIL, and return TRY_AGAIN. * - * CV_CONSTR_FAIL ---> values failed to satisfy constraints with hmin */ -static int cvCheckConstraints(CVodeMem cv_mem) +static int cvDoErrorTest(CVodeMem cv_mem, int *nflagPtr, realtype saved_t, + realtype acor_nrm, + int *nefPtr, long int *netfPtr, realtype *dsmPtr) { - booleantype constraintsPassed; - realtype vnorm; - N_Vector mm = cv_mem->cv_ftemp; - N_Vector tmp = cv_mem->cv_tempv; + realtype dsm; + int retval, is; + N_Vector wrk1, wrk2; - /* Get mask vector mm, set where constraints failed */ - constraintsPassed = N_VConstrMask(cv_mem->cv_constraints, cv_mem->cv_y, mm); - if (constraintsPassed) return(CV_SUCCESS); + dsm = acor_nrm * cv_mem->cv_tq[2]; - /* Constraints not met */ +#if SUNDIALS_LOGGING_LEVEL >= SUNDIALS_LOGGING_DEBUG + SUNLogger_QueueMsg(CV_LOGGER, SUN_LOGLEVEL_DEBUG, + "CVODES::cvDoErrorTest", "error-test", "step = %li, h = %.16g, dsm = %.16g", + cv_mem->cv_nst, cv_mem->cv_h, dsm); +#endif - /* Compute correction to satisfy constraints */ - N_VCompare(ONEPT5, cv_mem->cv_constraints, tmp); /* a[i]=1 when |c[i]|=2 */ - N_VProd(tmp, cv_mem->cv_constraints, tmp); /* a * c */ - N_VDiv(tmp, cv_mem->cv_ewt, tmp); /* a * c * wt */ - N_VLinearSum(ONE, cv_mem->cv_y, -PT1, tmp, tmp); /* y - 0.1 * a * c * wt */ - N_VProd(tmp, mm, tmp); /* v = mm*(y-0.1*a*c*wt) */ + /* If est. local error norm dsm passes test, return CV_SUCCESS */ + *dsmPtr = dsm; + if (dsm <= ONE) return(CV_SUCCESS); - vnorm = N_VWrmsNorm(tmp, cv_mem->cv_ewt); /* ||v|| */ + /* Test failed; increment counters, set nflag, and restore zn array */ + (*nefPtr)++; + (*netfPtr)++; + *nflagPtr = PREV_ERR_FAIL; + cvRestore(cv_mem, saved_t); - /* If vector v of constraint corrections is small in norm, correct and - accept this step */ - if (vnorm <= cv_mem->cv_tq[4]) { - N_VLinearSum(ONE, cv_mem->cv_acor, - -ONE, tmp, cv_mem->cv_acor); /* acor <- acor - v */ - return(CV_SUCCESS); - } + /* At maxnef failures or |h| = hmin, return CV_ERR_FAILURE */ + if ((SUNRabs(cv_mem->cv_h) <= cv_mem->cv_hmin*ONEPSM) || + (*nefPtr == cv_mem->cv_maxnef)) + return(CV_ERR_FAILURE); - /* Return with error if |h| == hmin */ - if (SUNRabs(cv_mem->cv_h) <= cv_mem->cv_hmin*ONEPSM) return(CV_CONSTR_FAIL); + /* Set etamax = 1 to prevent step size increase at end of this step */ + cv_mem->cv_etamax = ONE; - /* Constraint correction is too large, reduce h by computing eta = h'/h */ - N_VLinearSum(ONE, cv_mem->cv_zn[0], -ONE, cv_mem->cv_y, tmp); - N_VProd(mm, tmp, tmp); - cv_mem->cv_eta = PT9*N_VMinQuotient(cv_mem->cv_zn[0], tmp); - cv_mem->cv_eta = SUNMAX(cv_mem->cv_eta, PT1); - cv_mem->cv_eta = SUNMAX(cv_mem->cv_eta, - cv_mem->cv_hmin / SUNRabs(cv_mem->cv_h)); + /* Set h ratio eta from dsm, rescale, and return for retry of step */ + if (*nefPtr <= MXNEF1) { + cv_mem->cv_eta = ONE / (SUNRpowerR(BIAS2*dsm,ONE/cv_mem->cv_L) + ADDON); + cv_mem->cv_eta = SUNMAX(cv_mem->cv_eta_min_ef, + SUNMAX(cv_mem->cv_eta, + cv_mem->cv_hmin / SUNRabs(cv_mem->cv_h))); + if (*nefPtr >= cv_mem->cv_small_nef) + cv_mem->cv_eta = SUNMIN(cv_mem->cv_eta, cv_mem->cv_eta_max_ef); - /* Reattempt step with new step size */ - return(CONSTR_RECVR); -} + cvRescale(cv_mem); -/* - * cvQuadNls - * - * This routine solves for the quadrature variables at the new step. - * It does not solve a nonlinear system, but rather updates the - * quadrature variables. The name for this function is just for - * uniformity purposes. - * - * Possible return values (interpreted by cvHandleNFlag) - * - * CV_SUCCESS -> continue with error test - * CV_QRHSFUNC_FAIL -> halt the integration - * QRHSFUNC_RECVR -> predict again or stop if too many - * - */ - -static int cvQuadNls(CVodeMem cv_mem) -{ - int retval; - - /* Save quadrature correction in acorQ */ - retval = cv_mem->cv_fQ(cv_mem->cv_tn, cv_mem->cv_y, - cv_mem->cv_acorQ, cv_mem->cv_user_data); - cv_mem->cv_nfQe++; - if (retval < 0) return(CV_QRHSFUNC_FAIL); - if (retval > 0) return(QRHSFUNC_RECVR); +#if SUNDIALS_LOGGING_LEVEL >= SUNDIALS_LOGGING_DEBUG + SUNLogger_QueueMsg(CV_LOGGER, SUN_LOGLEVEL_DEBUG, + "CVODES::cvDoErrorTest", "new-step-eta", + "eta = %.16g", cv_mem->cv_eta); +#endif - /* If needed, save the value of yQdot = fQ into ftempQ - * for use in evaluating fQS */ - if (cv_mem->cv_quadr_sensi) { - N_VScale(ONE, cv_mem->cv_acorQ, cv_mem->cv_ftempQ); + return(TRY_AGAIN); } - N_VLinearSum(cv_mem->cv_h, cv_mem->cv_acorQ, -ONE, - cv_mem->cv_znQ[1], cv_mem->cv_acorQ); - N_VScale(cv_mem->cv_rl1, cv_mem->cv_acorQ, cv_mem->cv_acorQ); - - /* Apply correction to quadrature variables */ - N_VLinearSum(ONE, cv_mem->cv_znQ[0], ONE, cv_mem->cv_acorQ, cv_mem->cv_yQ); + /* After MXNEF1 failures, force an order reduction and retry step */ + if (cv_mem->cv_q > 1) { + cv_mem->cv_eta = SUNMAX(cv_mem->cv_eta_min_ef, + cv_mem->cv_hmin / SUNRabs(cv_mem->cv_h)); + cvAdjustOrder(cv_mem,-1); + cv_mem->cv_L = cv_mem->cv_q; + cv_mem->cv_q--; + cv_mem->cv_qwait = cv_mem->cv_L; + cvRescale(cv_mem); +#if SUNDIALS_LOGGING_LEVEL >= SUNDIALS_LOGGING_DEBUG + SUNLogger_QueueMsg(CV_LOGGER, SUN_LOGLEVEL_DEBUG, + "CVODES::cvDoErrorTest", "new-step-eta-mxnef1", + "eta = %.16g", cv_mem->cv_eta); +#endif + return(TRY_AGAIN); + } - return(CV_SUCCESS); -} + /* If already at order 1, restart: reload zn, znQ, znS, znQS from scratch */ -/* - * cvQuadSensNls - * - * This routine solves for the quadrature sensitivity variables - * at the new step. It does not solve a nonlinear system, but - * rather updates the quadrature variables. The name for this - * function is just for uniformity purposes. - * - * Possible return values (interpreted by cvHandleNFlag) - * - * CV_SUCCESS -> continue with error test - * CV_QSRHSFUNC_FAIL -> halt the integration - * QSRHSFUNC_RECVR -> predict again or stop if too many - * - */ + cv_mem->cv_eta = SUNMAX(cv_mem->cv_eta_min_ef, + cv_mem->cv_hmin / SUNRabs(cv_mem->cv_h)); + cv_mem->cv_h *= cv_mem->cv_eta; + cv_mem->cv_next_h = cv_mem->cv_h; + cv_mem->cv_hscale = cv_mem->cv_h; + cv_mem->cv_qwait = LONG_WAIT; + cv_mem->cv_nscon = 0; -static int cvQuadSensNls(CVodeMem cv_mem) -{ - int is, retval; + retval = cv_mem->cv_f(cv_mem->cv_tn, cv_mem->cv_zn[0], + cv_mem->cv_tempv, cv_mem->cv_user_data); + cv_mem->cv_nfe++; + if (retval < 0) return(CV_RHSFUNC_FAIL); + if (retval > 0) return(CV_UNREC_RHSFUNC_ERR); - /* Save quadrature correction in acorQ */ - retval = cv_mem->cv_fQS(cv_mem->cv_Ns, cv_mem->cv_tn, cv_mem->cv_y, - cv_mem->cv_yS, cv_mem->cv_ftempQ, - cv_mem->cv_acorQS, cv_mem->cv_user_data, - cv_mem->cv_tempv, cv_mem->cv_tempvQ); - cv_mem->cv_nfQSe++; - if (retval < 0) return(CV_QSRHSFUNC_FAIL); - if (retval > 0) return(QSRHSFUNC_RECVR); + N_VScale(cv_mem->cv_h, cv_mem->cv_tempv, cv_mem->cv_zn[1]); +#if SUNDIALS_LOGGING_LEVEL >= SUNDIALS_LOGGING_DEBUG + SUNLogger_QueueMsg(CV_LOGGER, SUN_LOGLEVEL_DEBUG, + "CVODES::cvDoErrorTest", "new-step-eta-mxnef1-q1", + "eta = %.16g", cv_mem->cv_eta); +#endif - for (is=0; iscv_Ns; is++) { - N_VLinearSum(cv_mem->cv_h, cv_mem->cv_acorQS[is], -ONE, - cv_mem->cv_znQS[1][is], cv_mem->cv_acorQS[is]); - N_VScale(cv_mem->cv_rl1, cv_mem->cv_acorQS[is], cv_mem->cv_acorQS[is]); - /* Apply correction to quadrature sensitivity variables */ - N_VLinearSum(ONE, cv_mem->cv_znQS[0][is], ONE, - cv_mem->cv_acorQS[is], cv_mem->cv_yQS[is]); - } + if (cv_mem->cv_quadr) { - return(CV_SUCCESS); -} + retval = cv_mem->cv_fQ(cv_mem->cv_tn, cv_mem->cv_zn[0], + cv_mem->cv_tempvQ, cv_mem->cv_user_data); + cv_mem->cv_nfQe++; + if (retval < 0) return(CV_QRHSFUNC_FAIL); + if (retval > 0) return(CV_UNREC_QRHSFUNC_ERR); + N_VScale(cv_mem->cv_h, cv_mem->cv_tempvQ, cv_mem->cv_znQ[1]); -/* - * cvStgrNls - * - * This is a high-level routine that attempts to solve the - * sensitivity linear systems using the attached nonlinear solver - * once the states y_n were obtained and passed the error test. - */ + } -static int cvStgrNls(CVodeMem cv_mem) -{ - booleantype callSetup; - int flag=CV_SUCCESS; + if (cv_mem->cv_sensi) { - callSetup = SUNFALSE; - if (cv_mem->cv_lsetup == NULL) - cv_mem->cv_crateS = ONE; + wrk1 = cv_mem->cv_ftemp; + wrk2 = cv_mem->cv_ftempS[0]; - /* initial guess for the correction to the predictor */ - N_VConst(ZERO, cv_mem->ycorStg); + retval = cvSensRhsWrapper(cv_mem, cv_mem->cv_tn, cv_mem->cv_zn[0], + cv_mem->cv_tempv, cv_mem->cv_znS[0], + cv_mem->cv_tempvS, wrk1, wrk2); + if (retval < 0) return(CV_SRHSFUNC_FAIL); + if (retval > 0) return(CV_UNREC_SRHSFUNC_ERR); - /* set sens solve flag */ - cv_mem->sens_solve = SUNTRUE; + for (is=0; iscv_Ns; is++) + cv_mem->cv_cvals[is] = cv_mem->cv_h; - /* solve the nonlinear system */ - flag = SUNNonlinSolSolve(cv_mem->NLSstg, cv_mem->zn0Stg, cv_mem->ycorStg, - cv_mem->ewtStg, cv_mem->cv_tq[4], callSetup, cv_mem); + retval = N_VScaleVectorArray(cv_mem->cv_Ns, cv_mem->cv_cvals, + cv_mem->cv_tempvS, cv_mem->cv_znS[1]); + if (retval != CV_SUCCESS) return (CV_VECTOROP_ERR); + } - /* reset sens solve flag */ - cv_mem->sens_solve = SUNFALSE; + if (cv_mem->cv_quadr_sensi) { - /* if the solve failed return */ - if (flag != CV_SUCCESS) return(flag); + wrk1 = cv_mem->cv_ftemp; + wrk2 = cv_mem->cv_ftempQ; - /* solve successful */ + retval = cv_mem->cv_fQS(cv_mem->cv_Ns, cv_mem->cv_tn, + cv_mem->cv_zn[0], cv_mem->cv_znS[0], + cv_mem->cv_tempvQ, cv_mem->cv_tempvQS, + cv_mem->cv_fQS_data, wrk1, wrk2); + cv_mem->cv_nfQSe++; + if (retval < 0) return(CV_QSRHSFUNC_FAIL); + if (retval > 0) return(CV_UNREC_QSRHSFUNC_ERR); - /* update the sensitivities based on the final correction from the nonlinear solver */ - N_VLinearSumVectorArray(cv_mem->cv_Ns, - ONE, cv_mem->cv_znS[0], - ONE, cv_mem->cv_acorS, cv_mem->cv_yS); + for (is=0; iscv_Ns; is++) + cv_mem->cv_cvals[is] = cv_mem->cv_h; - /* update Jacobian status */ - cv_mem->cv_jcur = SUNFALSE; + retval = N_VScaleVectorArray(cv_mem->cv_Ns, cv_mem->cv_cvals, + cv_mem->cv_tempvQS, cv_mem->cv_znQS[1]); + if (retval != CV_SUCCESS) return (CV_VECTOROP_ERR); + } - return(flag); + return(TRY_AGAIN); } /* - * cvStgr1Nls + * ----------------------------------------------------------------- + * Functions called after a successful step + * ----------------------------------------------------------------- + */ + +/* + * cvCompleteStep * - * This is a high-level routine that attempts to solve the i-th - * sensitivity linear system using the attached nonlinear solver - * once the states y_n were obtained and passed the error test. + * This routine performs various update operations when the solution + * to the nonlinear system has passed the local error test. + * We increment the step counter nst, record the values hu and qu, + * update the tau array, and apply the corrections to the zn array. + * The tau[i] are the last q values of h, with tau[1] the most recent. + * The counter qwait is decremented, and if qwait == 1 (and q < qmax) + * we save acor and tq[5] for a possible order increase. */ -static int cvStgr1Nls(CVodeMem cv_mem, int is) +static void cvCompleteStep(CVodeMem cv_mem) { - booleantype callSetup; - long int nni; - int flag=CV_SUCCESS; + int i; + int is; - callSetup = SUNFALSE; - if (cv_mem->cv_lsetup == NULL) - cv_mem->cv_crateS = ONE; + cv_mem->cv_nst++; + cv_mem->cv_nscon++; + cv_mem->cv_hu = cv_mem->cv_h; + cv_mem->cv_qu = cv_mem->cv_q; - /* initial guess for the correction to the predictor */ - N_VConst(ZERO, cv_mem->cv_acorS[is]); + for (i=cv_mem->cv_q; i >= 2; i--) cv_mem->cv_tau[i] = cv_mem->cv_tau[i-1]; + if ((cv_mem->cv_q==1) && (cv_mem->cv_nst > 1)) + cv_mem->cv_tau[2] = cv_mem->cv_tau[1]; + cv_mem->cv_tau[1] = cv_mem->cv_h; - /* set sens solve flag */ - cv_mem->sens_solve = SUNTRUE; + /* Apply correction to column j of zn: l_j * Delta_n */ + (void) N_VScaleAddMulti(cv_mem->cv_q+1, cv_mem->cv_l, cv_mem->cv_acor, + cv_mem->cv_zn, cv_mem->cv_zn); - /* solve the nonlinear system */ - flag = SUNNonlinSolSolve(cv_mem->NLSstg1, - cv_mem->cv_znS[0][is], cv_mem->cv_acorS[is], - cv_mem->cv_ewtS[is], cv_mem->cv_tq[4], callSetup, cv_mem); + /* Apply the projection correction to column j of zn: p_j * Delta_n */ + if (cv_mem->proj_applied) { + (void) N_VScaleAddMulti(cv_mem->cv_q+1, + cv_mem->proj_p, cv_mem->cv_tempv, /* tempv = acorP */ + cv_mem->cv_zn, cv_mem->cv_zn); + } - /* reset sens solve flag */ - cv_mem->sens_solve = SUNFALSE; + if (cv_mem->cv_quadr) + (void) N_VScaleAddMulti(cv_mem->cv_q+1, cv_mem->cv_l, cv_mem->cv_acorQ, + cv_mem->cv_znQ, cv_mem->cv_znQ); - /* update nniS iteration count */ - (void) SUNNonlinSolGetNumIters(cv_mem->NLSstg1, &nni); - cv_mem->cv_nniS1[is] += nni - cv_mem->nnip; - cv_mem->nnip = nni; + if (cv_mem->cv_sensi) + (void) N_VScaleAddMultiVectorArray(cv_mem->cv_Ns, cv_mem->cv_q+1, + cv_mem->cv_l, cv_mem->cv_acorS, + cv_mem->cv_znS, cv_mem->cv_znS); - /* if the solve failed return */ - if (flag != CV_SUCCESS) return(flag); + if (cv_mem->cv_quadr_sensi) + (void) N_VScaleAddMultiVectorArray(cv_mem->cv_Ns, cv_mem->cv_q+1, + cv_mem->cv_l, cv_mem->cv_acorQS, + cv_mem->cv_znQS, cv_mem->cv_znQS); - /* solve successful */ + /* If necessary, store Delta_n in zn[qmax] to be used in order increase. + * This actually will be Delta_{n-1} in the ELTE at q+1 since it happens at + * the next to last step of order q before a possible one at order q+1 + */ - /* update the sensitivity with the final correction from the nonlinear solver */ - N_VLinearSum(ONE, cv_mem->cv_znS[0][is], - ONE, cv_mem->cv_acorS[is], cv_mem->cv_yS[is]); + cv_mem->cv_qwait--; + if ((cv_mem->cv_qwait == 1) && (cv_mem->cv_q != cv_mem->cv_qmax)) { - /* update Jacobian status */ - cv_mem->cv_jcur = SUNFALSE; + N_VScale(ONE, cv_mem->cv_acor, cv_mem->cv_zn[cv_mem->cv_qmax]); - return(flag); -} + if (cv_mem->cv_quadr) + N_VScale(ONE, cv_mem->cv_acorQ, cv_mem->cv_znQ[cv_mem->cv_qmax]); -/* - * cvHandleNFlag - * - * This routine takes action on the return value nflag = *nflagPtr - * returned by cvNls, as follows: - * - * If cvNls succeeded in solving the nonlinear system, then - * cvHandleNFlag returns the constant DO_ERROR_TEST, which tells cvStep - * to perform the error test. - * - * If the nonlinear system was not solved successfully, then ncfn and - * ncf = *ncfPtr are incremented and Nordsieck array zn is restored. - * - * If the solution of the nonlinear system failed due to an - * unrecoverable failure by setup, we return the value CV_LSETUP_FAIL. - * - * If it failed due to an unrecoverable failure in solve, then we return - * the value CV_LSOLVE_FAIL. - * - * If it failed due to an unrecoverable failure in rhs, then we return - * the value CV_RHSFUNC_FAIL. - * - * If it failed due to an unrecoverable failure in quad rhs, then we return - * the value CV_QRHSFUNC_FAIL. - * - * If it failed due to an unrecoverable failure in sensi rhs, then we return - * the value CV_SRHSFUNC_FAIL. - * - * If it failed due to an unrecoverable failure in sensi quad rhs, then we - * return the value CV_QSRHSFUNC_FAIL. - * - * Otherwise, a recoverable failure occurred when solving the - * nonlinear system (cvNls returned nflag = SUN_NLS_CONV_RECVT, RHSFUNC_RECVR, - * or SRHSFUNC_RECVR). - * In this case, if ncf is now equal to maxncf or |h| = hmin, - * we return the value CV_CONV_FAILURE (if nflag=SUN_NLS_CONV_RECVR), or - * CV_REPTD_RHSFUNC_ERR (if nflag=RHSFUNC_RECVR), or CV_REPTD_SRHSFUNC_ERR - * (if nflag=SRHSFUNC_RECVR). - * If not, we set *nflagPtr = PREV_CONV_FAIL and return the value - * PREDICT_AGAIN, telling cvStep to reattempt the step. - * - */ - -static int cvHandleNFlag(CVodeMem cv_mem, int *nflagPtr, realtype saved_t, - int *ncfPtr, long int *ncfnPtr) -{ - int nflag; + if (cv_mem->cv_sensi) { + for (is=0; iscv_Ns; is++) + cv_mem->cv_cvals[is] = ONE; - nflag = *nflagPtr; + (void) N_VScaleVectorArray(cv_mem->cv_Ns, cv_mem->cv_cvals, + cv_mem->cv_acorS, cv_mem->cv_znS[cv_mem->cv_qmax]); + } - if (nflag == CV_SUCCESS) return(DO_ERROR_TEST); + if (cv_mem->cv_quadr_sensi) { + for (is=0; iscv_Ns; is++) + cv_mem->cv_cvals[is] = ONE; - /* The nonlinear soln. failed; increment ncfn and restore zn */ - (*ncfnPtr)++; - cvRestore(cv_mem, saved_t); + (void) N_VScaleVectorArray(cv_mem->cv_Ns, cv_mem->cv_cvals, + cv_mem->cv_acorQS, cv_mem->cv_znQS[cv_mem->cv_qmax]); + } - /* Return if failed unrecoverably */ - if (nflag < 0) { - if (nflag == CV_LSETUP_FAIL) return(CV_LSETUP_FAIL); - else if (nflag == CV_LSOLVE_FAIL) return(CV_LSOLVE_FAIL); - else if (nflag == CV_RHSFUNC_FAIL) return(CV_RHSFUNC_FAIL); - else if (nflag == CV_QRHSFUNC_FAIL) return(CV_QRHSFUNC_FAIL); - else if (nflag == CV_SRHSFUNC_FAIL) return(CV_SRHSFUNC_FAIL); - else if (nflag == CV_QSRHSFUNC_FAIL) return(CV_QSRHSFUNC_FAIL); - else return(CV_NLS_FAIL); + cv_mem->cv_saved_tq5 = cv_mem->cv_tq[5]; + cv_mem->cv_indx_acor = cv_mem->cv_qmax; } - /* At this point, nflag = SUN_NLS_CONV_RECVR, CONSTR_RECVR, RHSFUNC_RECVR, - or SRHSFUNC_RECVR; increment ncf */ +#ifdef SUNDIALS_BUILD_WITH_MONITORING + /* If user access function was provided, call it now */ + if (cv_mem->cv_monitorfun != NULL && + !(cv_mem->cv_nst % cv_mem->cv_monitor_interval)) { + cv_mem->cv_monitorfun((void*) cv_mem, cv_mem->cv_user_data); + } +#endif - (*ncfPtr)++; - cv_mem->cv_etamax = ONE; +#if SUNDIALS_LOGGING_LEVEL >= SUNDIALS_LOGGING_DEBUG + SUNLogger_QueueMsg(CV_LOGGER, SUN_LOGLEVEL_DEBUG, + "CVODES::cvCompleteStep", "return", + "nst = %d, nscon = %d", cv_mem->cv_nst, cv_mem->cv_nscon); +#endif +} - /* If we had maxncf failures or |h| = hmin, - return CV_CONV_FAILURE, CV_CONSTR_FAIL, - CV_REPTD_RHSFUNC_ERR, CV_REPTD_QRHSFUNC_ERR, - CV_REPTD_SRHSFUNC_ERR, or CV_CONSTR_FAIL */ +/* + * cvPrepareNextStep + * + * This routine handles the setting of stepsize and order for the + * next step -- hprime and qprime. Along with hprime, it sets the + * ratio eta = hprime/h. It also updates other state variables + * related to a change of step size or order. + */ - if ((SUNRabs(cv_mem->cv_h) <= cv_mem->cv_hmin*ONEPSM) || - (*ncfPtr == cv_mem->cv_maxncf)) { - if (nflag == SUN_NLS_CONV_RECVR) return(CV_CONV_FAILURE); - if (nflag == CONSTR_RECVR) return(CV_CONSTR_FAIL); - if (nflag == RHSFUNC_RECVR) return(CV_REPTD_RHSFUNC_ERR); - if (nflag == QRHSFUNC_RECVR) return(CV_REPTD_QRHSFUNC_ERR); - if (nflag == SRHSFUNC_RECVR) return(CV_REPTD_SRHSFUNC_ERR); - if (nflag == QSRHSFUNC_RECVR) return(CV_REPTD_QSRHSFUNC_ERR); +static void cvPrepareNextStep(CVodeMem cv_mem, realtype dsm) +{ + /* If etamax = 1, defer step size or order changes */ + if (cv_mem->cv_etamax == ONE) { + cv_mem->cv_qwait = SUNMAX(cv_mem->cv_qwait, 2); + cv_mem->cv_qprime = cv_mem->cv_q; + cv_mem->cv_hprime = cv_mem->cv_h; + cv_mem->cv_eta = ONE; + } else { + /* etaq is the ratio of new to old h at the current order */ + cv_mem->cv_etaq = ONE /(SUNRpowerR(BIAS2*dsm,ONE/cv_mem->cv_L) + ADDON); + + /* If no order change, adjust eta and acor in cvSetEta and return */ + if (cv_mem->cv_qwait != 0) { + cv_mem->cv_eta = cv_mem->cv_etaq; + cv_mem->cv_qprime = cv_mem->cv_q; + cvSetEta(cv_mem); + } else { + /* If qwait = 0, consider an order change. etaqm1 and etaqp1 are + the ratios of new to old h at orders q-1 and q+1, respectively. + cvChooseEta selects the largest; cvSetEta adjusts eta and acor */ + cv_mem->cv_qwait = 2; + cv_mem->cv_etaqm1 = cvComputeEtaqm1(cv_mem); + cv_mem->cv_etaqp1 = cvComputeEtaqp1(cv_mem); + cvChooseEta(cv_mem); + cvSetEta(cv_mem); + } } - /* Reduce step size; return to reattempt the step - Note that if nflag=CONSTR_RECVR then eta was already set in CVNls */ - if (nflag != CONSTR_RECVR) - cv_mem->cv_eta = SUNMAX(ETACF, cv_mem->cv_hmin / SUNRabs(cv_mem->cv_h)); - *nflagPtr = PREV_CONV_FAIL; - cvRescale(cv_mem); +#if SUNDIALS_LOGGING_LEVEL >= SUNDIALS_LOGGING_DEBUG + SUNLogger_QueueMsg(CV_LOGGER, SUN_LOGLEVEL_DEBUG, + "CVODES::cvPrepareNextStep", "return", + "eta = %.16g, hprime = %.16g, qprime = %d, qwait = %d\n", + cv_mem->cv_eta, cv_mem->cv_hprime, cv_mem->cv_qprime, cv_mem->cv_qwait); +#endif +} - return(PREDICT_AGAIN); +/* + * cvSetEta + * + * This routine adjusts the value of eta according to the various + * heuristic limits and the optional input hmax. + */ + +static void cvSetEta(CVodeMem cv_mem) +{ + if ((cv_mem->cv_eta > cv_mem->cv_eta_min_fx) && + (cv_mem->cv_eta < cv_mem->cv_eta_max_fx)) + { + /* Eta is within the fixed step bounds, retain step size */ + cv_mem->cv_eta = ONE; + cv_mem->cv_hprime = cv_mem->cv_h; + } + else + { + if (cv_mem->cv_eta >= cv_mem->cv_eta_max_fx) + { + /* Increase the step size, limit eta by etamax and hmax */ + cv_mem->cv_eta = SUNMIN(cv_mem->cv_eta, cv_mem->cv_etamax); + cv_mem->cv_eta /= SUNMAX(ONE, SUNRabs(cv_mem->cv_h) * + cv_mem->cv_hmax_inv * cv_mem->cv_eta); + } + else + { + /* Reduce the step size, limit eta by etamin and hmin */ + cv_mem->cv_eta = SUNMAX(cv_mem->cv_eta, cv_mem->cv_eta_min); + cv_mem->cv_eta = SUNMAX(cv_mem->cv_eta, cv_mem->cv_hmin / SUNRabs(cv_mem->cv_h)); + } + /* Set hprime */ + cv_mem->cv_hprime = cv_mem->cv_h * cv_mem->cv_eta; + if (cv_mem->cv_qprime < cv_mem->cv_q) cv_mem->cv_nscon = 0; + } } /* - * cvRestore + * cvComputeEtaqm1 * - * This routine restores the value of cv_mem->cv_tn to saved_t and undoes the - * prediction. After execution of cvRestore, the Nordsieck array zn has - * the same values as before the call to cvPredict. + * This routine computes and returns the value of etaqm1 for a + * possible decrease in order by 1. */ -static void cvRestore(CVodeMem cv_mem, realtype saved_t) +static realtype cvComputeEtaqm1(CVodeMem cv_mem) { - int j, k; + realtype ddn; - cv_mem->cv_tn = saved_t; - for (k = 1; k <= cv_mem->cv_q; k++) - for (j = cv_mem->cv_q; j >= k; j--) - N_VLinearSum(ONE, cv_mem->cv_zn[j-1], -ONE, - cv_mem->cv_zn[j], cv_mem->cv_zn[j-1]); + cv_mem->cv_etaqm1 = ZERO; + if (cv_mem->cv_q > 1) { + ddn = N_VWrmsNorm(cv_mem->cv_zn[cv_mem->cv_q], cv_mem->cv_ewt); - if (cv_mem->cv_quadr) { - for (k = 1; k <= cv_mem->cv_q; k++) - for (j = cv_mem->cv_q; j >= k; j--) - N_VLinearSum(ONE, cv_mem->cv_znQ[j-1], -ONE, - cv_mem->cv_znQ[j], cv_mem->cv_znQ[j-1]); - } + if ( cv_mem->cv_quadr && cv_mem->cv_errconQ ) + ddn = cvQuadUpdateNorm(cv_mem, ddn, cv_mem->cv_znQ[cv_mem->cv_q], + cv_mem->cv_ewtQ); - if (cv_mem->cv_sensi) { - for (k = 1; k <= cv_mem->cv_q; k++) - for (j = cv_mem->cv_q; j >= k; j--) - (void) N_VLinearSumVectorArray(cv_mem->cv_Ns, - ONE, cv_mem->cv_znS[j-1], - -ONE, cv_mem->cv_znS[j], - cv_mem->cv_znS[j-1]); - } + if ( cv_mem->cv_sensi && cv_mem->cv_errconS ) + ddn = cvSensUpdateNorm(cv_mem, ddn, cv_mem->cv_znS[cv_mem->cv_q], + cv_mem->cv_ewtS); - if (cv_mem->cv_quadr_sensi) { - for (k = 1; k <= cv_mem->cv_q; k++) - for (j = cv_mem->cv_q; j >= k; j--) - (void) N_VLinearSumVectorArray(cv_mem->cv_Ns, - ONE, cv_mem->cv_znQS[j-1], - -ONE, cv_mem->cv_znQS[j], - cv_mem->cv_znQS[j-1]); + if ( cv_mem->cv_quadr_sensi && cv_mem->cv_errconQS ) + ddn = cvQuadSensUpdateNorm(cv_mem, ddn, cv_mem->cv_znQS[cv_mem->cv_q], + cv_mem->cv_ewtQS); + + ddn = ddn * cv_mem->cv_tq[1]; + cv_mem->cv_etaqm1 = ONE/(SUNRpowerR(BIAS1*ddn, ONE/cv_mem->cv_q) + ADDON); } + return(cv_mem->cv_etaqm1); } /* - * ----------------------------------------------------------------- - * Error Test - * ----------------------------------------------------------------- - */ - -/* - * cvDoErrorTest - * - * This routine performs the local error test, for the state, quadrature, - * or sensitivity variables. Its last three arguments change depending - * on which variables the error test is to be performed on. - * - * The weighted local error norm dsm is loaded into *dsmPtr, and - * the test dsm ?<= 1 is made. - * - * If the test passes, cvDoErrorTest returns CV_SUCCESS. - * - * If the test fails, we undo the step just taken (call cvRestore) and - * - * - if maxnef error test failures have occurred or if SUNRabs(h) = hmin, - * we return CV_ERR_FAILURE. - * - * - if more than MXNEF1 error test failures have occurred, an order - * reduction is forced. If already at order 1, restart by reloading - * zn from scratch (also znQ and znS if appropriate). - * If f() fails, we return CV_RHSFUNC_FAIL or CV_UNREC_RHSFUNC_ERR; - * if fQ() fails, we return CV_QRHSFUNC_FAIL or CV_UNREC_QRHSFUNC_ERR; - * if cvSensRhsWrapper() fails, we return CV_SRHSFUNC_FAIL or CV_UNREC_SRHSFUNC_ERR; - * (no recovery is possible at this stage). - * - * - otherwise, set *nflagPtr to PREV_ERR_FAIL, and return TRY_AGAIN. + * cvComputeEtaqp1 * + * This routine computes and returns the value of etaqp1 for a + * possible increase in order by 1. */ -static int cvDoErrorTest(CVodeMem cv_mem, int *nflagPtr, realtype saved_t, - realtype acor_nrm, - int *nefPtr, long int *netfPtr, realtype *dsmPtr) +static realtype cvComputeEtaqp1(CVodeMem cv_mem) { - realtype dsm; - int retval, is; - N_Vector wrk1, wrk2; + realtype dup, cquot; - dsm = acor_nrm * cv_mem->cv_tq[2]; + cv_mem->cv_etaqp1 = ZERO; + if (cv_mem->cv_q != cv_mem->cv_qmax) { + if (cv_mem->cv_saved_tq5 == ZERO) return(cv_mem->cv_etaqp1); + cquot = (cv_mem->cv_tq[5] / cv_mem->cv_saved_tq5) * + SUNRpowerI(cv_mem->cv_h/cv_mem->cv_tau[2], cv_mem->cv_L); + N_VLinearSum(-cquot, cv_mem->cv_zn[cv_mem->cv_qmax], ONE, + cv_mem->cv_acor, cv_mem->cv_tempv); + dup = N_VWrmsNorm(cv_mem->cv_tempv, cv_mem->cv_ewt); - /* If est. local error norm dsm passes test, return CV_SUCCESS */ - *dsmPtr = dsm; - if (dsm <= ONE) return(CV_SUCCESS); + if ( cv_mem->cv_quadr && cv_mem->cv_errconQ ) { + N_VLinearSum(-cquot, cv_mem->cv_znQ[cv_mem->cv_qmax], ONE, + cv_mem->cv_acorQ, cv_mem->cv_tempvQ); + dup = cvQuadUpdateNorm(cv_mem, dup, cv_mem->cv_tempvQ, cv_mem->cv_ewtQ); + } - /* Test failed; increment counters, set nflag, and restore zn array */ - (*nefPtr)++; - (*netfPtr)++; - *nflagPtr = PREV_ERR_FAIL; - cvRestore(cv_mem, saved_t); + if ( cv_mem->cv_sensi && cv_mem->cv_errconS ) { + (void) N_VLinearSumVectorArray(cv_mem->cv_Ns, + -cquot, cv_mem->cv_znS[cv_mem->cv_qmax], + ONE, cv_mem->cv_acorS, + cv_mem->cv_tempvS); - /* At maxnef failures or |h| = hmin, return CV_ERR_FAILURE */ - if ((SUNRabs(cv_mem->cv_h) <= cv_mem->cv_hmin*ONEPSM) || - (*nefPtr == cv_mem->cv_maxnef)) - return(CV_ERR_FAILURE); + dup = cvSensUpdateNorm(cv_mem, dup, cv_mem->cv_tempvS, cv_mem->cv_ewtS); + } - /* Set etamax = 1 to prevent step size increase at end of this step */ - cv_mem->cv_etamax = ONE; + if ( cv_mem->cv_quadr_sensi && cv_mem->cv_errconQS ) { + (void) N_VLinearSumVectorArray(cv_mem->cv_Ns, + -cquot, cv_mem->cv_znQS[cv_mem->cv_qmax], + ONE, cv_mem->cv_acorQS, + cv_mem->cv_tempvQS); - /* Set h ratio eta from dsm, rescale, and return for retry of step */ - if (*nefPtr <= MXNEF1) { - cv_mem->cv_eta = ONE / (SUNRpowerR(BIAS2*dsm,ONE/cv_mem->cv_L) + ADDON); - cv_mem->cv_eta = SUNMAX(ETAMIN, SUNMAX(cv_mem->cv_eta, - cv_mem->cv_hmin / SUNRabs(cv_mem->cv_h))); - if (*nefPtr >= SMALL_NEF) - cv_mem->cv_eta = SUNMIN(cv_mem->cv_eta, ETAMXF); - cvRescale(cv_mem); - return(TRY_AGAIN); + dup = cvSensUpdateNorm(cv_mem, dup, cv_mem->cv_tempvQS, cv_mem->cv_ewtQS); + } + + dup = dup * cv_mem->cv_tq[3]; + cv_mem->cv_etaqp1 = ONE / (SUNRpowerR(BIAS3*dup, ONE/(cv_mem->cv_L+1)) + ADDON); } + return(cv_mem->cv_etaqp1); +} - /* After MXNEF1 failures, force an order reduction and retry step */ - if (cv_mem->cv_q > 1) { - cv_mem->cv_eta = SUNMAX(ETAMIN, cv_mem->cv_hmin / SUNRabs(cv_mem->cv_h)); - cvAdjustOrder(cv_mem,-1); - cv_mem->cv_L = cv_mem->cv_q; - cv_mem->cv_q--; - cv_mem->cv_qwait = cv_mem->cv_L; - cvRescale(cv_mem); - return(TRY_AGAIN); - } - - /* If already at order 1, restart: reload zn, znQ, znS, znQS from scratch */ - cv_mem->cv_eta = SUNMAX(ETAMIN, cv_mem->cv_hmin / SUNRabs(cv_mem->cv_h)); - cv_mem->cv_h *= cv_mem->cv_eta; - cv_mem->cv_next_h = cv_mem->cv_h; - cv_mem->cv_hscale = cv_mem->cv_h; - cv_mem->cv_qwait = LONG_WAIT; - cv_mem->cv_nscon = 0; - - retval = cv_mem->cv_f(cv_mem->cv_tn, cv_mem->cv_zn[0], - cv_mem->cv_tempv, cv_mem->cv_user_data); - cv_mem->cv_nfe++; - if (retval < 0) return(CV_RHSFUNC_FAIL); - if (retval > 0) return(CV_UNREC_RHSFUNC_ERR); - - N_VScale(cv_mem->cv_h, cv_mem->cv_tempv, cv_mem->cv_zn[1]); - - if (cv_mem->cv_quadr) { +/* + * cvChooseEta + * Given etaqm1, etaq, etaqp1 (the values of eta for qprime = + * q - 1, q, or q + 1, respectively), this routine chooses the + * maximum eta value, sets eta to that value, and sets qprime to the + * corresponding value of q. If there is a tie, the preference + * order is to (1) keep the same order, then (2) decrease the order, + * and finally (3) increase the order. If the maximum eta value + * is within the fixed step bounds, the order is kept unchanged and + * eta is set to 1. + */ - retval = cv_mem->cv_fQ(cv_mem->cv_tn, cv_mem->cv_zn[0], - cv_mem->cv_tempvQ, cv_mem->cv_user_data); - cv_mem->cv_nfQe++; - if (retval < 0) return(CV_QRHSFUNC_FAIL); - if (retval > 0) return(CV_UNREC_QRHSFUNC_ERR); +static void cvChooseEta(CVodeMem cv_mem) +{ + realtype etam; + int is; - N_VScale(cv_mem->cv_h, cv_mem->cv_tempvQ, cv_mem->cv_znQ[1]); + etam = SUNMAX(cv_mem->cv_etaqm1, SUNMAX(cv_mem->cv_etaq, cv_mem->cv_etaqp1)); + if ((etam > cv_mem->cv_eta_min_fx) && (etam < cv_mem->cv_eta_max_fx)) + { + cv_mem->cv_eta = ONE; + cv_mem->cv_qprime = cv_mem->cv_q; } + else + { + if (etam == cv_mem->cv_etaq) + { + cv_mem->cv_eta = cv_mem->cv_etaq; + cv_mem->cv_qprime = cv_mem->cv_q; + } + else if (etam == cv_mem->cv_etaqm1) + { + cv_mem->cv_eta = cv_mem->cv_etaqm1; + cv_mem->cv_qprime = cv_mem->cv_q - 1; + } + else + { + cv_mem->cv_eta = cv_mem->cv_etaqp1; + cv_mem->cv_qprime = cv_mem->cv_q + 1; + + if (cv_mem->cv_lmm == CV_BDF) + { + /* + * Store Delta_n in zn[qmax] to be used in order increase + * + * This happens at the last step of order q before an increase + * to order q+1, so it represents Delta_n in the ELTE at q+1 + */ + + N_VScale(ONE, cv_mem->cv_acor, cv_mem->cv_zn[cv_mem->cv_qmax]); + + if (cv_mem->cv_quadr && cv_mem->cv_errconQ) + N_VScale(ONE, cv_mem->cv_acorQ, cv_mem->cv_znQ[cv_mem->cv_qmax]); + + if (cv_mem->cv_sensi && cv_mem->cv_errconS) + { + for (is=0; iscv_Ns; is++) + cv_mem->cv_cvals[is] = ONE; + + (void) N_VScaleVectorArray(cv_mem->cv_Ns, cv_mem->cv_cvals, + cv_mem->cv_acorS, cv_mem->cv_znS[cv_mem->cv_qmax]); + } - if (cv_mem->cv_sensi) { - - wrk1 = cv_mem->cv_ftemp; - wrk2 = cv_mem->cv_ftempS[0]; - - retval = cvSensRhsWrapper(cv_mem, cv_mem->cv_tn, cv_mem->cv_zn[0], - cv_mem->cv_tempv, cv_mem->cv_znS[0], - cv_mem->cv_tempvS, wrk1, wrk2); - if (retval < 0) return(CV_SRHSFUNC_FAIL); - if (retval > 0) return(CV_UNREC_SRHSFUNC_ERR); - - for (is=0; iscv_Ns; is++) - cv_mem->cv_cvals[is] = cv_mem->cv_h; + if (cv_mem->cv_quadr_sensi && cv_mem->cv_errconQS) + { + for (is=0; iscv_Ns; is++) + cv_mem->cv_cvals[is] = ONE; - retval = N_VScaleVectorArray(cv_mem->cv_Ns, cv_mem->cv_cvals, - cv_mem->cv_tempvS, cv_mem->cv_znS[1]); - if (retval != CV_SUCCESS) return (CV_VECTOROP_ERR); + (void) N_VScaleVectorArray(cv_mem->cv_Ns, cv_mem->cv_cvals, + cv_mem->cv_acorQS, cv_mem->cv_znQS[cv_mem->cv_qmax]); + } + } + } } +} - if (cv_mem->cv_quadr_sensi) { +/* + * ----------------------------------------------------------------- + * Function to handle failures + * ----------------------------------------------------------------- + */ - wrk1 = cv_mem->cv_ftemp; - wrk2 = cv_mem->cv_ftempQ; +/* + * cvHandleFailure + * + * This routine prints error messages for all cases of failure by + * cvHin and cvStep. + * It returns to CVode the value that CVode is to return to the user. + */ - retval = cv_mem->cv_fQS(cv_mem->cv_Ns, cv_mem->cv_tn, - cv_mem->cv_zn[0], cv_mem->cv_znS[0], - cv_mem->cv_tempvQ, cv_mem->cv_tempvQS, - cv_mem->cv_fQS_data, wrk1, wrk2); - cv_mem->cv_nfQSe++; - if (retval < 0) return(CV_QSRHSFUNC_FAIL); - if (retval > 0) return(CV_UNREC_QSRHSFUNC_ERR); +static int cvHandleFailure(CVodeMem cv_mem, int flag) +{ - for (is=0; iscv_Ns; is++) - cv_mem->cv_cvals[is] = cv_mem->cv_h; + /* Set vector of absolute weighted local errors */ + /* + N_VProd(acor, ewt, tempv); + N_VAbs(tempv, tempv); + */ - retval = N_VScaleVectorArray(cv_mem->cv_Ns, cv_mem->cv_cvals, - cv_mem->cv_tempvQS, cv_mem->cv_znQS[1]); - if (retval != CV_SUCCESS) return (CV_VECTOROP_ERR); + /* Depending on flag, print error message and return error flag */ + switch (flag) { + case CV_ERR_FAILURE: + cvProcessError(cv_mem, CV_ERR_FAILURE, "CVODES", "CVode", + MSGCV_ERR_FAILS, cv_mem->cv_tn, cv_mem->cv_h); + break; + case CV_CONV_FAILURE: + cvProcessError(cv_mem, CV_CONV_FAILURE, "CVODES", "CVode", + MSGCV_CONV_FAILS, cv_mem->cv_tn, cv_mem->cv_h); + break; + case CV_LSETUP_FAIL: + cvProcessError(cv_mem, CV_LSETUP_FAIL, "CVODES", "CVode", + MSGCV_SETUP_FAILED, cv_mem->cv_tn); + break; + case CV_LSOLVE_FAIL: + cvProcessError(cv_mem, CV_LSOLVE_FAIL, "CVODES", "CVode", + MSGCV_SOLVE_FAILED, cv_mem->cv_tn); + break; + case CV_RHSFUNC_FAIL: + cvProcessError(cv_mem, CV_RHSFUNC_FAIL, "CVODES", "CVode", + MSGCV_RHSFUNC_FAILED, cv_mem->cv_tn); + break; + case CV_UNREC_RHSFUNC_ERR: + cvProcessError(cv_mem, CV_UNREC_RHSFUNC_ERR, "CVODES", "CVode", + MSGCV_RHSFUNC_UNREC, cv_mem->cv_tn); + break; + case CV_REPTD_RHSFUNC_ERR: + cvProcessError(cv_mem, CV_REPTD_RHSFUNC_ERR, "CVODES", "CVode", + MSGCV_RHSFUNC_REPTD, cv_mem->cv_tn); + break; + case CV_RTFUNC_FAIL: + cvProcessError(cv_mem, CV_RTFUNC_FAIL, "CVODES", "CVode", + MSGCV_RTFUNC_FAILED, cv_mem->cv_tn); + break; + case CV_QRHSFUNC_FAIL: + cvProcessError(cv_mem, CV_QRHSFUNC_FAIL, "CVODES", "CVode", + MSGCV_QRHSFUNC_FAILED, cv_mem->cv_tn); + break; + case CV_UNREC_QRHSFUNC_ERR: + cvProcessError(cv_mem, CV_UNREC_QRHSFUNC_ERR, "CVODES", "CVode", + MSGCV_QRHSFUNC_UNREC, cv_mem->cv_tn); + break; + case CV_REPTD_QRHSFUNC_ERR: + cvProcessError(cv_mem, CV_REPTD_QRHSFUNC_ERR, "CVODES", "CVode", + MSGCV_QRHSFUNC_REPTD, cv_mem->cv_tn); + break; + case CV_SRHSFUNC_FAIL: + cvProcessError(cv_mem, CV_SRHSFUNC_FAIL, "CVODES", "CVode", + MSGCV_SRHSFUNC_FAILED, cv_mem->cv_tn); + break; + case CV_UNREC_SRHSFUNC_ERR: + cvProcessError(cv_mem, CV_UNREC_SRHSFUNC_ERR, "CVODES", "CVode", + MSGCV_SRHSFUNC_UNREC, cv_mem->cv_tn); + break; + case CV_REPTD_SRHSFUNC_ERR: + cvProcessError(cv_mem, CV_REPTD_SRHSFUNC_ERR, "CVODES", "CVode", + MSGCV_SRHSFUNC_REPTD, cv_mem->cv_tn); + break; + case CV_QSRHSFUNC_FAIL: + cvProcessError(cv_mem, CV_QSRHSFUNC_FAIL, "CVODES", "CVode", + MSGCV_QSRHSFUNC_FAILED, cv_mem->cv_tn); + break; + case CV_UNREC_QSRHSFUNC_ERR: + cvProcessError(cv_mem, CV_UNREC_QSRHSFUNC_ERR, "CVODES", "CVode", + MSGCV_QSRHSFUNC_UNREC, cv_mem->cv_tn); + break; + case CV_REPTD_QSRHSFUNC_ERR: + cvProcessError(cv_mem, CV_REPTD_QSRHSFUNC_ERR, "CVODES", "CVode", + MSGCV_QSRHSFUNC_REPTD, cv_mem->cv_tn); + break; + case CV_TOO_CLOSE: + cvProcessError(cv_mem, CV_TOO_CLOSE, "CVODES", "CVode", + MSGCV_TOO_CLOSE); + break; + case CV_MEM_NULL: + cvProcessError(NULL, CV_MEM_NULL, "CVODES", "CVode", + MSGCV_NO_MEM); + break; + case SUN_NLS_MEM_NULL: + cvProcessError(cv_mem, CV_MEM_NULL, "CVODES", "CVode", + MSGCV_NLS_INPUT_NULL, cv_mem->cv_tn); + break; + case CV_NLS_SETUP_FAIL: + cvProcessError(cv_mem, CV_NLS_SETUP_FAIL, "CVODES", "CVode", + MSGCV_NLS_SETUP_FAILED, cv_mem->cv_tn); + break; + case CV_CONSTR_FAIL: + cvProcessError(cv_mem, CV_CONSTR_FAIL, "CVODES", "CVode", + MSGCV_FAILED_CONSTR, cv_mem->cv_tn); + break; + case CV_NLS_FAIL: + cvProcessError(cv_mem, CV_NLS_FAIL, "CVODES", "CVode", + MSGCV_NLS_FAIL, cv_mem->cv_tn); + break; + case CV_PROJ_MEM_NULL: + cvProcessError(cv_mem, CV_PROJ_MEM_NULL, "CVODES", "CVode", + MSG_CV_PROJ_MEM_NULL); + break; + case CV_PROJFUNC_FAIL: + cvProcessError(cv_mem, CV_PROJFUNC_FAIL, "CVODES", "CVode", + MSG_CV_PROJFUNC_FAIL, cv_mem->cv_tn); + break; + case CV_REPTD_PROJFUNC_ERR: + cvProcessError(cv_mem, CV_REPTD_PROJFUNC_ERR, "CVODES", "CVode", + MSG_CV_REPTD_PROJFUNC_ERR, cv_mem->cv_tn); + break; + default: + /* This return should never happen */ + cvProcessError(cv_mem, CV_UNRECOGNIZED_ERR, "CVODES", "CVode", + "CVODES encountered an unrecognized error. Please report this to the Sundials developers at sundials-users@llnl.gov"); + return (CV_UNRECOGNIZED_ERR); } - return(TRY_AGAIN); + return(flag); } /* * ----------------------------------------------------------------- - * Functions called after a successful step + * Functions for BDF Stability Limit Detection * ----------------------------------------------------------------- */ /* - * cvCompleteStep + * cvBDFStab * - * This routine performs various update operations when the solution - * to the nonlinear system has passed the local error test. - * We increment the step counter nst, record the values hu and qu, - * update the tau array, and apply the corrections to the zn array. - * The tau[i] are the last q values of h, with tau[1] the most recent. - * The counter qwait is decremented, and if qwait == 1 (and q < qmax) - * we save acor and tq[5] for a possible order increase. + * This routine handles the BDF Stability Limit Detection Algorithm + * STALD. It is called if lmm = CV_BDF and the SLDET option is on. + * If the order is 3 or more, the required norm data is saved. + * If a decision to reduce order has not already been made, and + * enough data has been saved, cvSLdet is called. If it signals + * a stability limit violation, the order is reduced, and the step + * size is reset accordingly. */ -static void cvCompleteStep(CVodeMem cv_mem) +static void cvBDFStab(CVodeMem cv_mem) { - int i; - int is; + int i,k, ldflag, factorial; + realtype sq, sqm1, sqm2; - cv_mem->cv_nst++; - cv_mem->cv_nscon++; - cv_mem->cv_hu = cv_mem->cv_h; - cv_mem->cv_qu = cv_mem->cv_q; + /* If order is 3 or greater, then save scaled derivative data, + push old data down in i, then add current values to top. */ - for (i=cv_mem->cv_q; i >= 2; i--) - cv_mem->cv_tau[i] = cv_mem->cv_tau[i-1]; - if ((cv_mem->cv_q==1) && (cv_mem->cv_nst > 1)) - cv_mem->cv_tau[2] = cv_mem->cv_tau[1]; - cv_mem->cv_tau[1] = cv_mem->cv_h; + if (cv_mem->cv_q >= 3) { + for (k = 1; k <= 3; k++) + for (i = 5; i >= 2; i--) + cv_mem->cv_ssdat[i][k] = cv_mem->cv_ssdat[i-1][k]; + factorial = 1; + for (i = 1; i <= cv_mem->cv_q-1; i++) factorial *= i; + sq = factorial * cv_mem->cv_q * (cv_mem->cv_q+1) * + cv_mem->cv_acnrm / SUNMAX(cv_mem->cv_tq[5],TINY); + sqm1 = factorial * cv_mem->cv_q * + N_VWrmsNorm(cv_mem->cv_zn[cv_mem->cv_q], cv_mem->cv_ewt); + sqm2 = factorial * + N_VWrmsNorm(cv_mem->cv_zn[cv_mem->cv_q-1], cv_mem->cv_ewt); + cv_mem->cv_ssdat[1][1] = sqm2*sqm2; + cv_mem->cv_ssdat[1][2] = sqm1*sqm1; + cv_mem->cv_ssdat[1][3] = sq*sq; + } - /* Apply correction to column j of zn: l_j * Delta_n */ - (void) N_VScaleAddMulti(cv_mem->cv_q+1, cv_mem->cv_l, cv_mem->cv_acor, - cv_mem->cv_zn, cv_mem->cv_zn); - - if (cv_mem->cv_quadr) - (void) N_VScaleAddMulti(cv_mem->cv_q+1, cv_mem->cv_l, cv_mem->cv_acorQ, - cv_mem->cv_znQ, cv_mem->cv_znQ); - - if (cv_mem->cv_sensi) - (void) N_VScaleAddMultiVectorArray(cv_mem->cv_Ns, cv_mem->cv_q+1, - cv_mem->cv_l, cv_mem->cv_acorS, - cv_mem->cv_znS, cv_mem->cv_znS); - - if (cv_mem->cv_quadr_sensi) - (void) N_VScaleAddMultiVectorArray(cv_mem->cv_Ns, cv_mem->cv_q+1, - cv_mem->cv_l, cv_mem->cv_acorQS, - cv_mem->cv_znQS, cv_mem->cv_znQS); - - /* If necessary, store Delta_n in zn[qmax] to be used in order increase. - * This actually will be Delta_{n-1} in the ELTE at q+1 since it happens at - * the next to last step of order q before a possible one at order q+1 - */ - - cv_mem->cv_qwait--; - if ((cv_mem->cv_qwait == 1) && (cv_mem->cv_q != cv_mem->cv_qmax)) { - - N_VScale(ONE, cv_mem->cv_acor, cv_mem->cv_zn[cv_mem->cv_qmax]); - - if (cv_mem->cv_quadr) - N_VScale(ONE, cv_mem->cv_acorQ, cv_mem->cv_znQ[cv_mem->cv_qmax]); - - if (cv_mem->cv_sensi) { - for (is=0; iscv_Ns; is++) - cv_mem->cv_cvals[is] = ONE; - - (void) N_VScaleVectorArray(cv_mem->cv_Ns, cv_mem->cv_cvals, - cv_mem->cv_acorS, cv_mem->cv_znS[cv_mem->cv_qmax]); - } + if (cv_mem->cv_qprime >= cv_mem->cv_q) { - if (cv_mem->cv_quadr_sensi) { - for (is=0; iscv_Ns; is++) - cv_mem->cv_cvals[is] = ONE; + /* If order is 3 or greater, and enough ssdat has been saved, + nscon >= q+5, then call stability limit detection routine. */ - (void) N_VScaleVectorArray(cv_mem->cv_Ns, cv_mem->cv_cvals, - cv_mem->cv_acorQS, cv_mem->cv_znQS[cv_mem->cv_qmax]); + if ( (cv_mem->cv_q >= 3) && (cv_mem->cv_nscon >= cv_mem->cv_q+5) ) { + ldflag = cvSLdet(cv_mem); + if (ldflag > 3) { + /* A stability limit violation is indicated by + a return flag of 4, 5, or 6. + Reduce new order. */ + cv_mem->cv_qprime = cv_mem->cv_q-1; + cv_mem->cv_eta = cv_mem->cv_etaqm1; + cv_mem->cv_eta = SUNMIN(cv_mem->cv_eta,cv_mem->cv_etamax); + cv_mem->cv_eta = cv_mem->cv_eta / + SUNMAX(ONE,SUNRabs(cv_mem->cv_h)*cv_mem->cv_hmax_inv*cv_mem->cv_eta); + cv_mem->cv_hprime = cv_mem->cv_h * cv_mem->cv_eta; + cv_mem->cv_nor = cv_mem->cv_nor + 1; + } } - - cv_mem->cv_saved_tq5 = cv_mem->cv_tq[5]; - cv_mem->cv_indx_acor = cv_mem->cv_qmax; } - + else { + /* Otherwise, let order increase happen, and + reset stability limit counter, nscon. */ + cv_mem->cv_nscon = 0; + } } /* - * cvPrepareNextStep + * cvSLdet * - * This routine handles the setting of stepsize and order for the - * next step -- hprime and qprime. Along with hprime, it sets the - * ratio eta = hprime/h. It also updates other state variables - * related to a change of step size or order. + * This routine detects stability limitation using stored scaled + * derivatives data. cvSLdet returns the magnitude of the + * dominate characteristic root, rr. The presence of a stability + * limit is indicated by rr > "something a little less then 1.0", + * and a positive kflag. This routine should only be called if + * order is greater than or equal to 3, and data has been collected + * for 5 time steps. + * + * Returned values: + * kflag = 1 -> Found stable characteristic root, normal matrix case + * kflag = 2 -> Found stable characteristic root, quartic solution + * kflag = 3 -> Found stable characteristic root, quartic solution, + * with Newton correction + * kflag = 4 -> Found stability violation, normal matrix case + * kflag = 5 -> Found stability violation, quartic solution + * kflag = 6 -> Found stability violation, quartic solution, + * with Newton correction + * + * kflag < 0 -> No stability limitation, + * or could not compute limitation. + * + * kflag = -1 -> Min/max ratio of ssdat too small. + * kflag = -2 -> For normal matrix case, vmax > vrrt2*vrrt2 + * kflag = -3 -> For normal matrix case, The three ratios + * are inconsistent. + * kflag = -4 -> Small coefficient prevents elimination of quartics. + * kflag = -5 -> R value from quartics not consistent. + * kflag = -6 -> No corrected root passes test on qk values + * kflag = -7 -> Trouble solving for sigsq. + * kflag = -8 -> Trouble solving for B, or R via B. + * kflag = -9 -> R via sigsq[k] disagrees with R from data. */ -static void cvPrepareNextStep(CVodeMem cv_mem, realtype dsm) +static int cvSLdet(CVodeMem cv_mem) { - /* If etamax = 1, defer step size or order changes */ - if (cv_mem->cv_etamax == ONE) { - cv_mem->cv_qwait = SUNMAX(cv_mem->cv_qwait, 2); - cv_mem->cv_qprime = cv_mem->cv_q; - cv_mem->cv_hprime = cv_mem->cv_h; - cv_mem->cv_eta = ONE; - return; - } - - /* etaq is the ratio of new to old h at the current order */ - cv_mem->cv_etaq = ONE /(SUNRpowerR(BIAS2*dsm,ONE/cv_mem->cv_L) + ADDON); + int i, k, j, it, kmin = 0, kflag = 0; + realtype rat[5][4], rav[4], qkr[4], sigsq[4], smax[4], ssmax[4]; + realtype drr[4], rrc[4],sqmx[4], qjk[4][4], vrat[5], qc[6][4], qco[6][4]; + realtype rr, rrcut, vrrtol, vrrt2, sqtol, rrtol; + realtype smink, smaxk, sumrat, sumrsq, vmin, vmax, drrmax, adrr; + realtype tem, sqmax, saqk, qp, s, sqmaxk, saqj, sqmin; + realtype rsa, rsb, rsc, rsd, rd1a, rd1b, rd1c; + realtype rd2a, rd2b, rd3a, cest1, corr1; + realtype ratp, ratm, qfac1, qfac2, bb, rrb; - /* If no order change, adjust eta and acor in cvSetEta and return */ - if (cv_mem->cv_qwait != 0) { - cv_mem->cv_eta = cv_mem->cv_etaq; - cv_mem->cv_qprime = cv_mem->cv_q; - cvSetEta(cv_mem); - return; - } + /* The following are cutoffs and tolerances used by this routine */ - /* If qwait = 0, consider an order change. etaqm1 and etaqp1 are - the ratios of new to old h at orders q-1 and q+1, respectively. - cvChooseEta selects the largest; cvSetEta adjusts eta and acor */ - cv_mem->cv_qwait = 2; - cv_mem->cv_etaqm1 = cvComputeEtaqm1(cv_mem); - cv_mem->cv_etaqp1 = cvComputeEtaqp1(cv_mem); - cvChooseEta(cv_mem); - cvSetEta(cv_mem); -} + rrcut = RCONST(0.98); + vrrtol = RCONST(1.0e-4); + vrrt2 = RCONST(5.0e-4); + sqtol = RCONST(1.0e-3); + rrtol = RCONST(1.0e-2); -/* - * cvSetEta - * - * This routine adjusts the value of eta according to the various - * heuristic limits and the optional input hmax. - */ + rr = ZERO; -static void cvSetEta(CVodeMem cv_mem) -{ + /* Index k corresponds to the degree of the interpolating polynomial. */ + /* k = 1 -> q-1 */ + /* k = 2 -> q */ + /* k = 3 -> q+1 */ - /* If eta below the threshhold THRESH, reject a change of step size */ - if (cv_mem->cv_eta < THRESH) { - cv_mem->cv_eta = ONE; - cv_mem->cv_hprime = cv_mem->cv_h; - } else { - /* Limit eta by etamax and hmax, then set hprime */ - cv_mem->cv_eta = SUNMIN(cv_mem->cv_eta, cv_mem->cv_etamax); - cv_mem->cv_eta /= SUNMAX(ONE, SUNRabs(cv_mem->cv_h) * - cv_mem->cv_hmax_inv*cv_mem->cv_eta); - cv_mem->cv_hprime = cv_mem->cv_h * cv_mem->cv_eta; - if (cv_mem->cv_qprime < cv_mem->cv_q) cv_mem->cv_nscon = 0; - } -} + /* Index i is a backward-in-time index, i = 1 -> current time, */ + /* i = 2 -> previous step, etc */ -/* - * cvComputeEtaqm1 - * - * This routine computes and returns the value of etaqm1 for a - * possible decrease in order by 1. - */ + /* get maxima, minima, and variances, and form quartic coefficients */ -static realtype cvComputeEtaqm1(CVodeMem cv_mem) -{ - realtype ddn; + for (k=1; k<=3; k++) { + smink = cv_mem->cv_ssdat[1][k]; + smaxk = ZERO; - cv_mem->cv_etaqm1 = ZERO; + for (i=1; i<=5; i++) { + smink = SUNMIN(smink,cv_mem->cv_ssdat[i][k]); + smaxk = SUNMAX(smaxk,cv_mem->cv_ssdat[i][k]); + } - if (cv_mem->cv_q > 1) { + if (smink < TINY*smaxk) { + kflag = -1; + return(kflag); + } + smax[k] = smaxk; + ssmax[k] = smaxk*smaxk; - ddn = N_VWrmsNorm(cv_mem->cv_zn[cv_mem->cv_q], cv_mem->cv_ewt); + sumrat = ZERO; + sumrsq = ZERO; + for (i=1; i<=4; i++) { + rat[i][k] = cv_mem->cv_ssdat[i][k] / cv_mem->cv_ssdat[i+1][k]; + sumrat = sumrat + rat[i][k]; + sumrsq = sumrsq + rat[i][k]*rat[i][k]; + } + rav[k] = FOURTH*sumrat; + vrat[k] = SUNRabs(FOURTH*sumrsq - rav[k]*rav[k]); - if ( cv_mem->cv_quadr && cv_mem->cv_errconQ ) - ddn = cvQuadUpdateNorm(cv_mem, ddn, cv_mem->cv_znQ[cv_mem->cv_q], - cv_mem->cv_ewtQ); + qc[5][k] = cv_mem->cv_ssdat[1][k] * cv_mem->cv_ssdat[3][k] - + cv_mem->cv_ssdat[2][k] * cv_mem->cv_ssdat[2][k]; + qc[4][k] = cv_mem->cv_ssdat[2][k] * cv_mem->cv_ssdat[3][k] - + cv_mem->cv_ssdat[1][k] * cv_mem->cv_ssdat[4][k]; + qc[3][k] = ZERO; + qc[2][k] = cv_mem->cv_ssdat[2][k] * cv_mem->cv_ssdat[5][k] - + cv_mem->cv_ssdat[3][k] * cv_mem->cv_ssdat[4][k]; + qc[1][k] = cv_mem->cv_ssdat[4][k] * cv_mem->cv_ssdat[4][k] - + cv_mem->cv_ssdat[3][k] * cv_mem->cv_ssdat[5][k]; - if ( cv_mem->cv_sensi && cv_mem->cv_errconS ) - ddn = cvSensUpdateNorm(cv_mem, ddn, cv_mem->cv_znS[cv_mem->cv_q], - cv_mem->cv_ewtS); + for (i=1; i<=5; i++) + qco[i][k] = qc[i][k]; - if ( cv_mem->cv_quadr_sensi && cv_mem->cv_errconQS ) - ddn = cvQuadSensUpdateNorm(cv_mem, ddn, cv_mem->cv_znQS[cv_mem->cv_q], - cv_mem->cv_ewtQS); + } /* End of k loop */ - ddn = ddn * cv_mem->cv_tq[1]; - cv_mem->cv_etaqm1 = ONE/(SUNRpowerR(BIAS1*ddn, ONE/cv_mem->cv_q) + ADDON); - } + /* Isolate normal or nearly-normal matrix case. The three quartics will + have a common or nearly-common root in this case. + Return a kflag = 1 if this procedure works. If the three roots + differ more than vrrt2, return error kflag = -3. */ - return(cv_mem->cv_etaqm1); -} + vmin = SUNMIN(vrat[1],SUNMIN(vrat[2],vrat[3])); + vmax = SUNMAX(vrat[1],SUNMAX(vrat[2],vrat[3])); -/* - * cvComputeEtaqp1 - * - * This routine computes and returns the value of etaqp1 for a - * possible increase in order by 1. - */ + if (vmin < vrrtol*vrrtol) { -static realtype cvComputeEtaqp1(CVodeMem cv_mem) -{ - realtype dup, cquot; + if (vmax > vrrt2*vrrt2) { + kflag = -2; + return(kflag); + } else { + rr = (rav[1] + rav[2] + rav[3])/THREE; + drrmax = ZERO; + for (k = 1;k<=3;k++) { + adrr = SUNRabs(rav[k] - rr); + drrmax = SUNMAX(drrmax, adrr); + } + if (drrmax > vrrt2) { kflag = -3; return(kflag); } - cv_mem->cv_etaqp1 = ZERO; + kflag = 1; - if (cv_mem->cv_q != cv_mem->cv_qmax) { + /* can compute charactistic root, drop to next section */ + } - if (cv_mem->cv_saved_tq5 == ZERO) return(cv_mem->cv_etaqp1); + } else { - cquot = (cv_mem->cv_tq[5] / cv_mem->cv_saved_tq5) * - SUNRpowerI(cv_mem->cv_h/cv_mem->cv_tau[2], cv_mem->cv_L); - N_VLinearSum(-cquot, cv_mem->cv_zn[cv_mem->cv_qmax], ONE, - cv_mem->cv_acor, cv_mem->cv_tempv); - dup = N_VWrmsNorm(cv_mem->cv_tempv, cv_mem->cv_ewt); + /* use the quartics to get rr. */ - if ( cv_mem->cv_quadr && cv_mem->cv_errconQ ) { - N_VLinearSum(-cquot, cv_mem->cv_znQ[cv_mem->cv_qmax], ONE, - cv_mem->cv_acorQ, cv_mem->cv_tempvQ); - dup = cvQuadUpdateNorm(cv_mem, dup, cv_mem->cv_tempvQ, cv_mem->cv_ewtQ); + if (SUNRabs(qco[1][1]) < TINY*ssmax[1]) { + kflag = -4; + return(kflag); } - if ( cv_mem->cv_sensi && cv_mem->cv_errconS ) { - (void) N_VLinearSumVectorArray(cv_mem->cv_Ns, - -cquot, cv_mem->cv_znS[cv_mem->cv_qmax], - ONE, cv_mem->cv_acorS, - cv_mem->cv_tempvS); + tem = qco[1][2]/qco[1][1]; + for (i=2; i<=5; i++) { + qco[i][2] = qco[i][2] - tem*qco[i][1]; + } - dup = cvSensUpdateNorm(cv_mem, dup, cv_mem->cv_tempvS, cv_mem->cv_ewtS); + qco[1][2] = ZERO; + tem = qco[1][3]/qco[1][1]; + for (i=2; i<=5; i++) { + qco[i][3] = qco[i][3] - tem*qco[i][1]; } + qco[1][3] = ZERO; - if ( cv_mem->cv_quadr_sensi && cv_mem->cv_errconQS ) { - (void) N_VLinearSumVectorArray(cv_mem->cv_Ns, - -cquot, cv_mem->cv_znQS[cv_mem->cv_qmax], - ONE, cv_mem->cv_acorQS, - cv_mem->cv_tempvQS); + if (SUNRabs(qco[2][2]) < TINY*ssmax[2]) { + kflag = -4; + return(kflag); + } - dup = cvSensUpdateNorm(cv_mem, dup, cv_mem->cv_tempvQS, cv_mem->cv_ewtQS); + tem = qco[2][3]/qco[2][2]; + for (i=3; i<=5; i++) { + qco[i][3] = qco[i][3] - tem*qco[i][2]; } - dup = dup * cv_mem->cv_tq[3]; - cv_mem->cv_etaqp1 = ONE / (SUNRpowerR(BIAS3*dup, ONE/(cv_mem->cv_L+1)) + ADDON); - } + if (SUNRabs(qco[4][3]) < TINY*ssmax[3]) { + kflag = -4; + return(kflag); + } - return(cv_mem->cv_etaqp1); -} + rr = -qco[5][3]/qco[4][3]; -/* - * cvChooseEta - * Given etaqm1, etaq, etaqp1 (the values of eta for qprime = - * q - 1, q, or q + 1, respectively), this routine chooses the - * maximum eta value, sets eta to that value, and sets qprime to the - * corresponding value of q. If there is a tie, the preference - * order is to (1) keep the same order, then (2) decrease the order, - * and finally (3) increase the order. If the maximum eta value - * is below the threshhold THRESH, the order is kept unchanged and - * eta is set to 1. - */ + if (rr < TINY || rr > HUNDRED) { + kflag = -5; + return(kflag); + } -static void cvChooseEta(CVodeMem cv_mem) -{ - realtype etam; - int is; + for (k=1; k<=3; k++) + qkr[k] = qc[5][k] + rr*(qc[4][k] + rr*rr*(qc[2][k] + rr*qc[1][k])); - etam = SUNMAX(cv_mem->cv_etaqm1, SUNMAX(cv_mem->cv_etaq, cv_mem->cv_etaqp1)); + sqmax = ZERO; + for (k=1; k<=3; k++) { + saqk = SUNRabs(qkr[k])/ssmax[k]; + if (saqk > sqmax) sqmax = saqk; + } - if (etam < THRESH) { - cv_mem->cv_eta = ONE; - cv_mem->cv_qprime = cv_mem->cv_q; - return; - } + if (sqmax < sqtol) { + kflag = 2; - if (etam == cv_mem->cv_etaq) { + /* can compute charactistic root, drop to "given rr,etc" */ - cv_mem->cv_eta = cv_mem->cv_etaq; - cv_mem->cv_qprime = cv_mem->cv_q; + } else { - } else if (etam == cv_mem->cv_etaqm1) { + /* do Newton corrections to improve rr. */ - cv_mem->cv_eta = cv_mem->cv_etaqm1; - cv_mem->cv_qprime = cv_mem->cv_q - 1; + for (it=1; it<=3; it++) { + for (k=1; k<=3; k++) { + qp = qc[4][k] + rr*rr*(THREE*qc[2][k] + rr*FOUR*qc[1][k]); + drr[k] = ZERO; + if (SUNRabs(qp) > TINY*ssmax[k]) drr[k] = -qkr[k]/qp; + rrc[k] = rr + drr[k]; + } - } else { + for (k=1; k<=3; k++) { + s = rrc[k]; + sqmaxk = ZERO; + for (j=1; j<=3; j++) { + qjk[j][k] = qc[5][j] + s*(qc[4][j] + s*s*(qc[2][j] + s*qc[1][j])); + saqj = SUNRabs(qjk[j][k])/ssmax[j]; + if (saqj > sqmaxk) sqmaxk = saqj; + } + sqmx[k] = sqmaxk; + } + + sqmin = sqmx[1] + ONE; + for (k=1; k<=3; k++) { + if (sqmx[k] < sqmin) { + kmin = k; + sqmin = sqmx[k]; + } + } + rr = rrc[kmin]; + + if (sqmin < sqtol) { + kflag = 3; + /* can compute charactistic root */ + /* break out of Newton correction loop and drop to "given rr,etc" */ + break; + } else { + for (j=1; j<=3; j++) { + qkr[j] = qjk[j][kmin]; + } + } + } /* end of Newton correction loop */ - cv_mem->cv_eta = cv_mem->cv_etaqp1; - cv_mem->cv_qprime = cv_mem->cv_q + 1; + if (sqmin > sqtol) { + kflag = -6; + return(kflag); + } + } /* end of if (sqmax < sqtol) else */ + } /* end of if (vmin < vrrtol*vrrtol) else, quartics to get rr. */ + + /* given rr, find sigsq[k] and verify rr. */ + /* All positive kflag drop to this section */ - if (cv_mem->cv_lmm == CV_BDF) { + for (k=1; k<=3; k++) { + rsa = cv_mem->cv_ssdat[1][k]; + rsb = cv_mem->cv_ssdat[2][k]*rr; + rsc = cv_mem->cv_ssdat[3][k]*rr*rr; + rsd = cv_mem->cv_ssdat[4][k]*rr*rr*rr; + rd1a = rsa - rsb; + rd1b = rsb - rsc; + rd1c = rsc - rsd; + rd2a = rd1a - rd1b; + rd2b = rd1b - rd1c; + rd3a = rd2a - rd2b; - /* - * Store Delta_n in zn[qmax] to be used in order increase - * - * This happens at the last step of order q before an increase - * to order q+1, so it represents Delta_n in the ELTE at q+1 - */ + if (SUNRabs(rd1b) < TINY*smax[k]) { + kflag = -7; + return(kflag); + } - N_VScale(ONE, cv_mem->cv_acor, cv_mem->cv_zn[cv_mem->cv_qmax]); + cest1 = -rd3a/rd1b; + if (cest1 < TINY || cest1 > FOUR) { + kflag = -7; + return(kflag); + } + corr1 = (rd2b/cest1)/(rr*rr); + sigsq[k] = cv_mem->cv_ssdat[3][k] + corr1; + } - if (cv_mem->cv_quadr && cv_mem->cv_errconQ) - N_VScale(ONE, cv_mem->cv_acorQ, cv_mem->cv_znQ[cv_mem->cv_qmax]); + if (sigsq[2] < TINY) { + kflag = -8; + return(kflag); + } - if (cv_mem->cv_sensi && cv_mem->cv_errconS) { - for (is=0; iscv_Ns; is++) - cv_mem->cv_cvals[is] = ONE; + ratp = sigsq[3]/sigsq[2]; + ratm = sigsq[1]/sigsq[2]; + qfac1 = FOURTH*(cv_mem->cv_q*cv_mem->cv_q - ONE); + qfac2 = TWO/(cv_mem->cv_q - ONE); + bb = ratp*ratm - ONE - qfac1*ratp; + tem = ONE - qfac2*bb; - (void) N_VScaleVectorArray(cv_mem->cv_Ns, cv_mem->cv_cvals, - cv_mem->cv_acorS, cv_mem->cv_znS[cv_mem->cv_qmax]); - } + if (SUNRabs(tem) < TINY) { + kflag = -8; + return(kflag); + } - if (cv_mem->cv_quadr_sensi && cv_mem->cv_errconQS) { - for (is=0; iscv_Ns; is++) - cv_mem->cv_cvals[is] = ONE; + rrb = ONE/tem; - (void) N_VScaleVectorArray(cv_mem->cv_Ns, cv_mem->cv_cvals, - cv_mem->cv_acorQS, cv_mem->cv_znQS[cv_mem->cv_qmax]); - } + if (SUNRabs(rrb - rr) > rrtol) { + kflag = -9; + return(kflag); + } - } + /* Check to see if rr is above cutoff rrcut */ + if (rr > rrcut) { + if (kflag == 1) kflag = 4; + if (kflag == 2) kflag = 5; + if (kflag == 3) kflag = 6; } + + /* All positive kflag returned at this point */ + + return(kflag); + } /* * ----------------------------------------------------------------- - * Function to handle failures + * Functions for rootfinding * ----------------------------------------------------------------- */ /* - * cvHandleFailure + * cvRcheck1 * - * This routine prints error messages for all cases of failure by - * cvHin or cvStep. - * It returns to CVode the value that CVode is to return to the user. + * This routine completes the initialization of rootfinding memory + * information, and checks whether g has a zero both at and very near + * the initial point of the IVP. + * + * This routine returns an int equal to: + * CV_RTFUNC_FAIL < 0 if the g function failed, or + * CV_SUCCESS = 0 otherwise. */ -static int cvHandleFailure(CVodeMem cv_mem, int flag) +static int cvRcheck1(CVodeMem cv_mem) { + int i, retval; + realtype smallh, hratio, tplus; + booleantype zroot; - /* Set vector of absolute weighted local errors */ - /* - N_VProd(acor, ewt, tempv); - N_VAbs(tempv, tempv); - */ + for (i = 0; i < cv_mem->cv_nrtfn; i++) cv_mem->cv_iroots[i] = 0; + cv_mem->cv_tlo = cv_mem->cv_tn; + cv_mem->cv_ttol = (SUNRabs(cv_mem->cv_tn) + SUNRabs(cv_mem->cv_h)) * + cv_mem->cv_uround*HUNDRED; - /* Depending on flag, print error message and return error flag */ - switch (flag) { - case CV_ERR_FAILURE: - cvProcessError(cv_mem, CV_ERR_FAILURE, "CVODES", "CVode", - MSGCV_ERR_FAILS, cv_mem->cv_tn, cv_mem->cv_h); - break; - case CV_CONV_FAILURE: - cvProcessError(cv_mem, CV_CONV_FAILURE, "CVODES", "CVode", - MSGCV_CONV_FAILS, cv_mem->cv_tn, cv_mem->cv_h); - break; - case CV_LSETUP_FAIL: - cvProcessError(cv_mem, CV_LSETUP_FAIL, "CVODES", "CVode", - MSGCV_SETUP_FAILED, cv_mem->cv_tn); - break; - case CV_LSOLVE_FAIL: - cvProcessError(cv_mem, CV_LSOLVE_FAIL, "CVODES", "CVode", - MSGCV_SOLVE_FAILED, cv_mem->cv_tn); - break; - case CV_RHSFUNC_FAIL: - cvProcessError(cv_mem, CV_RHSFUNC_FAIL, "CVODES", "CVode", - MSGCV_RHSFUNC_FAILED, cv_mem->cv_tn); - break; - case CV_UNREC_RHSFUNC_ERR: - cvProcessError(cv_mem, CV_UNREC_RHSFUNC_ERR, "CVODES", "CVode", - MSGCV_RHSFUNC_UNREC, cv_mem->cv_tn); - break; - case CV_REPTD_RHSFUNC_ERR: - cvProcessError(cv_mem, CV_REPTD_RHSFUNC_ERR, "CVODES", "CVode", - MSGCV_RHSFUNC_REPTD, cv_mem->cv_tn); - break; - case CV_RTFUNC_FAIL: - cvProcessError(cv_mem, CV_RTFUNC_FAIL, "CVODES", "CVode", - MSGCV_RTFUNC_FAILED, cv_mem->cv_tn); - break; - case CV_QRHSFUNC_FAIL: - cvProcessError(cv_mem, CV_QRHSFUNC_FAIL, "CVODES", "CVode", - MSGCV_QRHSFUNC_FAILED, cv_mem->cv_tn); - break; - case CV_UNREC_QRHSFUNC_ERR: - cvProcessError(cv_mem, CV_UNREC_QRHSFUNC_ERR, "CVODES", "CVode", - MSGCV_QRHSFUNC_UNREC, cv_mem->cv_tn); - break; - case CV_REPTD_QRHSFUNC_ERR: - cvProcessError(cv_mem, CV_REPTD_QRHSFUNC_ERR, "CVODES", "CVode", - MSGCV_QRHSFUNC_REPTD, cv_mem->cv_tn); - break; - case CV_SRHSFUNC_FAIL: - cvProcessError(cv_mem, CV_SRHSFUNC_FAIL, "CVODES", "CVode", - MSGCV_SRHSFUNC_FAILED, cv_mem->cv_tn); - break; - case CV_UNREC_SRHSFUNC_ERR: - cvProcessError(cv_mem, CV_UNREC_SRHSFUNC_ERR, "CVODES", "CVode", - MSGCV_SRHSFUNC_UNREC, cv_mem->cv_tn); - break; - case CV_REPTD_SRHSFUNC_ERR: - cvProcessError(cv_mem, CV_REPTD_SRHSFUNC_ERR, "CVODES", "CVode", - MSGCV_SRHSFUNC_REPTD, cv_mem->cv_tn); - break; - case CV_QSRHSFUNC_FAIL: - cvProcessError(cv_mem, CV_QSRHSFUNC_FAIL, "CVODES", "CVode", - MSGCV_QSRHSFUNC_FAILED, cv_mem->cv_tn); - break; - case CV_UNREC_QSRHSFUNC_ERR: - cvProcessError(cv_mem, CV_UNREC_QSRHSFUNC_ERR, "CVODES", "CVode", - MSGCV_QSRHSFUNC_UNREC, cv_mem->cv_tn); - break; - case CV_REPTD_QSRHSFUNC_ERR: - cvProcessError(cv_mem, CV_REPTD_QSRHSFUNC_ERR, "CVODES", "CVode", - MSGCV_QSRHSFUNC_REPTD, cv_mem->cv_tn); - break; - case CV_TOO_CLOSE: - cvProcessError(cv_mem, CV_TOO_CLOSE, "CVODES", "CVode", - MSGCV_TOO_CLOSE); - break; - case CV_MEM_NULL: - cvProcessError(NULL, CV_MEM_NULL, "CVODES", "CVode", MSGCV_NO_MEM); - break; - case SUN_NLS_MEM_NULL: - cvProcessError(cv_mem, CV_MEM_NULL, "CVODES", "CVode", MSGCV_NLS_INPUT_NULL, - cv_mem->cv_tn); - break; - case CV_NLS_SETUP_FAIL: - cvProcessError(cv_mem, CV_NLS_SETUP_FAIL, "CVODES", "CVode", MSGCV_NLS_SETUP_FAILED, - cv_mem->cv_tn); - break; - case CV_CONSTR_FAIL: - cvProcessError(cv_mem, CV_CONSTR_FAIL, "CVODES", "CVode", - MSGCV_FAILED_CONSTR, cv_mem->cv_tn); - case CV_NLS_FAIL: - cvProcessError(cv_mem, CV_NLS_FAIL, "CVODES", "CVode", - MSGCV_NLS_FAIL, cv_mem->cv_tn); - break; - default: - cvProcessError(cv_mem, CV_UNRECOGNIZED_ERR, "CVODES", "CVode", - "CVODES encountered an unrecognized error. Please report this to the Sundials developers at sundials-users@llnl.gov"); - return (CV_UNRECOGNIZED_ERR); + /* Evaluate g at initial t and check for zero values. */ + retval = cv_mem->cv_gfun(cv_mem->cv_tlo, cv_mem->cv_zn[0], + cv_mem->cv_glo, cv_mem->cv_user_data); + cv_mem->cv_nge = 1; + if (retval != 0) return(CV_RTFUNC_FAIL); + + zroot = SUNFALSE; + for (i = 0; i < cv_mem->cv_nrtfn; i++) { + if (SUNRabs(cv_mem->cv_glo[i]) == ZERO) { + zroot = SUNTRUE; + cv_mem->cv_gactive[i] = SUNFALSE; + } } + if (!zroot) return(CV_SUCCESS); - return(flag); -} + /* Some g_i is zero at t0; look at g at t0+(small increment). */ + hratio = SUNMAX(cv_mem->cv_ttol/SUNRabs(cv_mem->cv_h), PT1); + smallh = hratio*cv_mem->cv_h; + tplus = cv_mem->cv_tlo + smallh; + N_VLinearSum(ONE, cv_mem->cv_zn[0], hratio, cv_mem->cv_zn[1], cv_mem->cv_y); + retval = cv_mem->cv_gfun(tplus, cv_mem->cv_y, + cv_mem->cv_ghi, cv_mem->cv_user_data); + cv_mem->cv_nge++; + if (retval != 0) return(CV_RTFUNC_FAIL); -/* - * ----------------------------------------------------------------- - * Functions for BDF Stability Limit Detection - * ----------------------------------------------------------------- - */ + /* We check now only the components of g which were exactly 0.0 at t0 + * to see if we can 'activate' them. */ + for (i = 0; i < cv_mem->cv_nrtfn; i++) { + if (!cv_mem->cv_gactive[i] && SUNRabs(cv_mem->cv_ghi[i]) != ZERO) { + cv_mem->cv_gactive[i] = SUNTRUE; + cv_mem->cv_glo[i] = cv_mem->cv_ghi[i]; + } + } + return(CV_SUCCESS); +} /* - * cvBDFStab + * cvRcheck2 * - * This routine handles the BDF Stability Limit Detection Algorithm - * STALD. It is called if lmm = CV_BDF and the SLDET option is on. - * If the order is 3 or more, the required norm data is saved. - * If a decision to reduce order has not already been made, and - * enough data has been saved, cvSLdet is called. If it signals - * a stability limit violation, the order is reduced, and the step - * size is reset accordingly. + * This routine checks for exact zeros of g at the last root found, + * if the last return was a root. It then checks for a close pair of + * zeros (an error condition), and for a new root at a nearby point. + * The array glo = g(tlo) at the left endpoint of the search interval + * is adjusted if necessary to assure that all g_i are nonzero + * there, before returning to do a root search in the interval. + * + * On entry, tlo = tretlast is the last value of tret returned by + * CVode. This may be the previous tn, the previous tout value, + * or the last root location. + * + * This routine returns an int equal to: + * CV_RTFUNC_FAIL < 0 if the g function failed, or + * CLOSERT = 3 if a close pair of zeros was found, or + * RTFOUND = 1 if a new zero of g was found near tlo, or + * CV_SUCCESS = 0 otherwise. */ -static void cvBDFStab(CVodeMem cv_mem) +static int cvRcheck2(CVodeMem cv_mem) { - int i,k, ldflag, factorial; - realtype sq, sqm1, sqm2; + int i, retval; + realtype smallh, hratio, tplus; + booleantype zroot; - /* If order is 3 or greater, then save scaled derivative data, - push old data down in i, then add current values to top. */ + if (cv_mem->cv_irfnd == 0) return(CV_SUCCESS); - if (cv_mem->cv_q >= 3) { - for (k = 1; k <= 3; k++) - for (i = 5; i >= 2; i--) - cv_mem->cv_ssdat[i][k] = cv_mem->cv_ssdat[i-1][k]; - factorial = 1; - for (i = 1; i <= cv_mem->cv_q-1; i++) factorial *= i; - sq = factorial * cv_mem->cv_q * (cv_mem->cv_q+1) * - cv_mem->cv_acnrm / SUNMAX(cv_mem->cv_tq[5],TINY); - sqm1 = factorial * cv_mem->cv_q * - N_VWrmsNorm(cv_mem->cv_zn[cv_mem->cv_q], cv_mem->cv_ewt); - sqm2 = factorial * - N_VWrmsNorm(cv_mem->cv_zn[cv_mem->cv_q-1], cv_mem->cv_ewt); - cv_mem->cv_ssdat[1][1] = sqm2*sqm2; - cv_mem->cv_ssdat[1][2] = sqm1*sqm1; - cv_mem->cv_ssdat[1][3] = sq*sq; - } + (void) CVodeGetDky(cv_mem, cv_mem->cv_tlo, 0, cv_mem->cv_y); + retval = cv_mem->cv_gfun(cv_mem->cv_tlo, cv_mem->cv_y, + cv_mem->cv_glo, cv_mem->cv_user_data); + cv_mem->cv_nge++; + if (retval != 0) return(CV_RTFUNC_FAIL); - if (cv_mem->cv_qprime >= cv_mem->cv_q) { + zroot = SUNFALSE; + for (i = 0; i < cv_mem->cv_nrtfn; i++) cv_mem->cv_iroots[i] = 0; + for (i = 0; i < cv_mem->cv_nrtfn; i++) { + if (!cv_mem->cv_gactive[i]) continue; + if (SUNRabs(cv_mem->cv_glo[i]) == ZERO) { + zroot = SUNTRUE; + cv_mem->cv_iroots[i] = 1; + } + } + if (!zroot) return(CV_SUCCESS); - /* If order is 3 or greater, and enough ssdat has been saved, - nscon >= q+5, then call stability limit detection routine. */ + /* One or more g_i has a zero at tlo. Check g at tlo+smallh. */ + cv_mem->cv_ttol = (SUNRabs(cv_mem->cv_tn) + SUNRabs(cv_mem->cv_h)) * + cv_mem->cv_uround * HUNDRED; + smallh = (cv_mem->cv_h > ZERO) ? cv_mem->cv_ttol : -cv_mem->cv_ttol; + tplus = cv_mem->cv_tlo + smallh; + if ( (tplus - cv_mem->cv_tn)*cv_mem->cv_h >= ZERO) { + hratio = smallh/cv_mem->cv_h; + N_VLinearSum(ONE, cv_mem->cv_y, hratio, cv_mem->cv_zn[1], cv_mem->cv_y); + } else { + (void) CVodeGetDky(cv_mem, tplus, 0, cv_mem->cv_y); + } + retval = cv_mem->cv_gfun(tplus, cv_mem->cv_y, + cv_mem->cv_ghi, cv_mem->cv_user_data); + cv_mem->cv_nge++; + if (retval != 0) return(CV_RTFUNC_FAIL); - if ( (cv_mem->cv_q >= 3) && (cv_mem->cv_nscon >= cv_mem->cv_q+5) ) { - ldflag = cvSLdet(cv_mem); - if (ldflag > 3) { - /* A stability limit violation is indicated by - a return flag of 4, 5, or 6. - Reduce new order. */ - cv_mem->cv_qprime = cv_mem->cv_q-1; - cv_mem->cv_eta = cv_mem->cv_etaqm1; - cv_mem->cv_eta = SUNMIN(cv_mem->cv_eta,cv_mem->cv_etamax); - cv_mem->cv_eta = cv_mem->cv_eta / - SUNMAX(ONE,SUNRabs(cv_mem->cv_h)*cv_mem->cv_hmax_inv*cv_mem->cv_eta); - cv_mem->cv_hprime = cv_mem->cv_h * cv_mem->cv_eta; - cv_mem->cv_nor = cv_mem->cv_nor + 1; - } + /* Check for close roots (error return), for a new zero at tlo+smallh, + and for a g_i that changed from zero to nonzero. */ + zroot = SUNFALSE; + for (i = 0; i < cv_mem->cv_nrtfn; i++) { + if (!cv_mem->cv_gactive[i]) continue; + if (SUNRabs(cv_mem->cv_ghi[i]) == ZERO) { + if (cv_mem->cv_iroots[i] == 1) return(CLOSERT); + zroot = SUNTRUE; + cv_mem->cv_iroots[i] = 1; + } else { + if (cv_mem->cv_iroots[i] == 1) + cv_mem->cv_glo[i] = cv_mem->cv_ghi[i]; } } - else { - /* Otherwise, let order increase happen, and - reset stability limit counter, nscon. */ - cv_mem->cv_nscon = 0; - } + if (zroot) return(RTFOUND); + return(CV_SUCCESS); } /* - * cvSLdet - * - * This routine detects stability limitation using stored scaled - * derivatives data. cvSLdet returns the magnitude of the - * dominate characteristic root, rr. The presence of a stability - * limit is indicated by rr > "something a little less then 1.0", - * and a positive kflag. This routine should only be called if - * order is greater than or equal to 3, and data has been collected - * for 5 time steps. - * - * Returned values: - * kflag = 1 -> Found stable characteristic root, normal matrix case - * kflag = 2 -> Found stable characteristic root, quartic solution - * kflag = 3 -> Found stable characteristic root, quartic solution, - * with Newton correction - * kflag = 4 -> Found stability violation, normal matrix case - * kflag = 5 -> Found stability violation, quartic solution - * kflag = 6 -> Found stability violation, quartic solution, - * with Newton correction + * cvRcheck3 * - * kflag < 0 -> No stability limitation, - * or could not compute limitation. + * This routine interfaces to cvRootfind to look for a root of g + * between tlo and either tn or tout, whichever comes first. + * Only roots beyond tlo in the direction of integration are sought. * - * kflag = -1 -> Min/max ratio of ssdat too small. - * kflag = -2 -> For normal matrix case, vmax > vrrt2*vrrt2 - * kflag = -3 -> For normal matrix case, The three ratios - * are inconsistent. - * kflag = -4 -> Small coefficient prevents elimination of quartics. - * kflag = -5 -> R value from quartics not consistent. - * kflag = -6 -> No corrected root passes test on qk values - * kflag = -7 -> Trouble solving for sigsq. - * kflag = -8 -> Trouble solving for B, or R via B. - * kflag = -9 -> R via sigsq[k] disagrees with R from data. + * This routine returns an int equal to: + * CV_RTFUNC_FAIL < 0 if the g function failed, or + * RTFOUND = 1 if a root of g was found, or + * CV_SUCCESS = 0 otherwise. */ -static int cvSLdet(CVodeMem cv_mem) +static int cvRcheck3(CVodeMem cv_mem) { - int i, k, j, it, kmin = 0, kflag = 0; - realtype rat[5][4], rav[4], qkr[4], sigsq[4], smax[4], ssmax[4]; - realtype drr[4], rrc[4],sqmx[4], qjk[4][4], vrat[5], qc[6][4], qco[6][4]; - realtype rr, rrcut, vrrtol, vrrt2, sqtol, rrtol; - realtype smink, smaxk, sumrat, sumrsq, vmin, vmax, drrmax, adrr; - realtype tem, sqmax, saqk, qp, s, sqmaxk, saqj, sqmin; - realtype rsa, rsb, rsc, rsd, rd1a, rd1b, rd1c; - realtype rd2a, rd2b, rd3a, cest1, corr1; - realtype ratp, ratm, qfac1, qfac2, bb, rrb; - - /* The following are cutoffs and tolerances used by this routine */ + int i, ier, retval; - rrcut = RCONST(0.98); - vrrtol = RCONST(1.0e-4); - vrrt2 = RCONST(5.0e-4); - sqtol = RCONST(1.0e-3); - rrtol = RCONST(1.0e-2); - - rr = ZERO; - - /* Index k corresponds to the degree of the interpolating polynomial. */ - /* k = 1 -> q-1 */ - /* k = 2 -> q */ - /* k = 3 -> q+1 */ - - /* Index i is a backward-in-time index, i = 1 -> current time, */ - /* i = 2 -> previous step, etc */ - - /* get maxima, minima, and variances, and form quartic coefficients */ - - for (k=1; k<=3; k++) { - smink = cv_mem->cv_ssdat[1][k]; - smaxk = ZERO; - - for (i=1; i<=5; i++) { - smink = SUNMIN(smink,cv_mem->cv_ssdat[i][k]); - smaxk = SUNMAX(smaxk,cv_mem->cv_ssdat[i][k]); - } - - if (smink < TINY*smaxk) { - kflag = -1; - return(kflag); - } - smax[k] = smaxk; - ssmax[k] = smaxk*smaxk; - - sumrat = ZERO; - sumrsq = ZERO; - for (i=1; i<=4; i++) { - rat[i][k] = cv_mem->cv_ssdat[i][k] / cv_mem->cv_ssdat[i+1][k]; - sumrat = sumrat + rat[i][k]; - sumrsq = sumrsq + rat[i][k]*rat[i][k]; - } - rav[k] = FOURTH*sumrat; - vrat[k] = SUNRabs(FOURTH*sumrsq - rav[k]*rav[k]); - - qc[5][k] = cv_mem->cv_ssdat[1][k] * cv_mem->cv_ssdat[3][k] - - cv_mem->cv_ssdat[2][k] * cv_mem->cv_ssdat[2][k]; - qc[4][k] = cv_mem->cv_ssdat[2][k] * cv_mem->cv_ssdat[3][k] - - cv_mem->cv_ssdat[1][k] * cv_mem->cv_ssdat[4][k]; - qc[3][k] = ZERO; - qc[2][k] = cv_mem->cv_ssdat[2][k] * cv_mem->cv_ssdat[5][k] - - cv_mem->cv_ssdat[3][k] * cv_mem->cv_ssdat[4][k]; - qc[1][k] = cv_mem->cv_ssdat[4][k] * cv_mem->cv_ssdat[4][k] - - cv_mem->cv_ssdat[3][k] * cv_mem->cv_ssdat[5][k]; - - for (i=1; i<=5; i++) { - qco[i][k] = qc[i][k]; - } - } /* End of k loop */ - - /* Isolate normal or nearly-normal matrix case. The three quartics will - have a common or nearly-common root in this case. - Return a kflag = 1 if this procedure works. If the three roots - differ more than vrrt2, return error kflag = -3. */ - - vmin = SUNMIN(vrat[1],SUNMIN(vrat[2],vrat[3])); - vmax = SUNMAX(vrat[1],SUNMAX(vrat[2],vrat[3])); - - if (vmin < vrrtol*vrrtol) { - - if (vmax > vrrt2*vrrt2) { - kflag = -2; - return(kflag); + /* Set thi = tn or tout, whichever comes first; set y = y(thi). */ + if (cv_mem->cv_taskc == CV_ONE_STEP) { + cv_mem->cv_thi = cv_mem->cv_tn; + N_VScale(ONE, cv_mem->cv_zn[0], cv_mem->cv_y); + } + if (cv_mem->cv_taskc == CV_NORMAL) { + if ( (cv_mem->cv_toutc - cv_mem->cv_tn)*cv_mem->cv_h >= ZERO) { + cv_mem->cv_thi = cv_mem->cv_tn; + N_VScale(ONE, cv_mem->cv_zn[0], cv_mem->cv_y); } else { - rr = (rav[1] + rav[2] + rav[3])/THREE; - drrmax = ZERO; - for (k = 1;k<=3;k++) { - adrr = SUNRabs(rav[k] - rr); - drrmax = SUNMAX(drrmax, adrr); - } - if (drrmax > vrrt2) { kflag = -3; return(kflag); } - - kflag = 1; - - /* can compute charactistic root, drop to next section */ - } - - } else { - - /* use the quartics to get rr. */ - - if (SUNRabs(qco[1][1]) < TINY*ssmax[1]) { - kflag = -4; - return(kflag); - } - - tem = qco[1][2]/qco[1][1]; - for (i=2; i<=5; i++) { - qco[i][2] = qco[i][2] - tem*qco[i][1]; - } - - qco[1][2] = ZERO; - tem = qco[1][3]/qco[1][1]; - for (i=2; i<=5; i++) { - qco[i][3] = qco[i][3] - tem*qco[i][1]; - } - qco[1][3] = ZERO; - - if (SUNRabs(qco[2][2]) < TINY*ssmax[2]) { - kflag = -4; - return(kflag); + cv_mem->cv_thi = cv_mem->cv_toutc; + (void) CVodeGetDky(cv_mem, cv_mem->cv_thi, 0, cv_mem->cv_y); } + } - tem = qco[2][3]/qco[2][2]; - for (i=3; i<=5; i++) { - qco[i][3] = qco[i][3] - tem*qco[i][2]; - } + /* Set ghi = g(thi) and call cvRootfind to search (tlo,thi) for roots. */ + retval = cv_mem->cv_gfun(cv_mem->cv_thi, cv_mem->cv_y, + cv_mem->cv_ghi, cv_mem->cv_user_data); + cv_mem->cv_nge++; + if (retval != 0) return(CV_RTFUNC_FAIL); - if (SUNRabs(qco[4][3]) < TINY*ssmax[3]) { - kflag = -4; - return(kflag); - } + cv_mem->cv_ttol = (SUNRabs(cv_mem->cv_tn) + SUNRabs(cv_mem->cv_h)) * + cv_mem->cv_uround * HUNDRED; + ier = cvRootfind(cv_mem); + if (ier == CV_RTFUNC_FAIL) return(CV_RTFUNC_FAIL); + for(i=0; icv_nrtfn; i++) { + if(!cv_mem->cv_gactive[i] && cv_mem->cv_grout[i] != ZERO) + cv_mem->cv_gactive[i] = SUNTRUE; + } + cv_mem->cv_tlo = cv_mem->cv_trout; + for (i = 0; i < cv_mem->cv_nrtfn; i++) + cv_mem->cv_glo[i] = cv_mem->cv_grout[i]; - rr = -qco[5][3]/qco[4][3]; + /* If no root found, return CV_SUCCESS. */ + if (ier == CV_SUCCESS) return(CV_SUCCESS); - if (rr < TINY || rr > HUNDRED) { - kflag = -5; - return(kflag); - } + /* If a root was found, interpolate to get y(trout) and return. */ + (void) CVodeGetDky(cv_mem, cv_mem->cv_trout, 0, cv_mem->cv_y); + return(RTFOUND); +} - for (k=1; k<=3; k++) - qkr[k] = qc[5][k] + rr*(qc[4][k] + rr*rr*(qc[2][k] + rr*qc[1][k])); +#define DIFFERENT_SIGN(a,b) ( ( (a) < 0 && (b) > 0 ) || ( (a) > 0 && (b) < 0 ) ) - sqmax = ZERO; - for (k=1; k<=3; k++) { - saqk = SUNRabs(qkr[k])/ssmax[k]; - if (saqk > sqmax) sqmax = saqk; - } +/* + * cvRootfind + * + * This routine solves for a root of g(t) between tlo and thi, if + * one exists. Only roots of odd multiplicity (i.e. with a change + * of sign in one of the g_i), or exact zeros, are found. + * Here the sign of tlo - thi is arbitrary, but if multiple roots + * are found, the one closest to tlo is returned. + * + * The method used is the Illinois algorithm, a modified secant method. + * Reference: Kathie L. Hiebert and Lawrence F. Shampine, Implicitly + * Defined Output Points for Solutions of ODEs, Sandia National + * Laboratory Report SAND80-0180, February 1980. + * + * This routine uses the following parameters for communication: + * + * nrtfn = number of functions g_i, or number of components of + * the vector-valued function g(t). Input only. + * + * gfun = user-defined function for g(t). Its form is + * (void) gfun(t, y, gt, user_data) + * + * rootdir = in array specifying the direction of zero-crossings. + * If rootdir[i] > 0, search for roots of g_i only if + * g_i is increasing; if rootdir[i] < 0, search for + * roots of g_i only if g_i is decreasing; otherwise + * always search for roots of g_i. + * + * gactive = array specifying whether a component of g should + * or should not be monitored. gactive[i] is initially + * set to SUNTRUE for all i=0,...,nrtfn-1, but it may be + * reset to SUNFALSE if at the first step g[i] is 0.0 + * both at the I.C. and at a small perturbation of them. + * gactive[i] is then set back on SUNTRUE only after the + * corresponding g function moves away from 0.0. + * + * nge = cumulative counter for gfun calls. + * + * ttol = a convergence tolerance for trout. Input only. + * When a root at trout is found, it is located only to + * within a tolerance of ttol. Typically, ttol should + * be set to a value on the order of + * 100 * UROUND * max (SUNRabs(tlo), SUNRabs(thi)) + * where UROUND is the unit roundoff of the machine. + * + * tlo, thi = endpoints of the interval in which roots are sought. + * On input, these must be distinct, but tlo - thi may + * be of either sign. The direction of integration is + * assumed to be from tlo to thi. On return, tlo and thi + * are the endpoints of the final relevant interval. + * + * glo, ghi = arrays of length nrtfn containing the vectors g(tlo) + * and g(thi) respectively. Input and output. On input, + * none of the glo[i] should be zero. + * + * trout = root location, if a root was found, or thi if not. + * Output only. If a root was found other than an exact + * zero of g, trout is the endpoint thi of the final + * interval bracketing the root, with size at most ttol. + * + * grout = array of length nrtfn containing g(trout) on return. + * + * iroots = int array of length nrtfn with root information. + * Output only. If a root was found, iroots indicates + * which components g_i have a root at trout. For + * i = 0, ..., nrtfn-1, iroots[i] = 1 if g_i has a root + * and g_i is increasing, iroots[i] = -1 if g_i has a + * root and g_i is decreasing, and iroots[i] = 0 if g_i + * has no roots or g_i varies in the direction opposite + * to that indicated by rootdir[i]. + * + * This routine returns an int equal to: + * CV_RTFUNC_FAIL < 0 if the g function failed, or + * RTFOUND = 1 if a root of g was found, or + * CV_SUCCESS = 0 otherwise. + */ - if (sqmax < sqtol) { - kflag = 2; +static int cvRootfind(CVodeMem cv_mem) +{ + realtype alph, tmid, gfrac, maxfrac, fracint, fracsub; + int i, retval, imax, side, sideprev; + booleantype zroot, sgnchg; - /* can compute charactistic root, drop to "given rr,etc" */ + imax = 0; + /* First check for change in sign in ghi or for a zero in ghi. */ + maxfrac = ZERO; + zroot = SUNFALSE; + sgnchg = SUNFALSE; + for (i = 0; i < cv_mem->cv_nrtfn; i++) { + if(!cv_mem->cv_gactive[i]) continue; + if (SUNRabs(cv_mem->cv_ghi[i]) == ZERO) { + if(cv_mem->cv_rootdir[i]*cv_mem->cv_glo[i] <= ZERO) { + zroot = SUNTRUE; + } } else { - - /* do Newton corrections to improve rr. */ - - for (it=1; it<=3; it++) { - for (k=1; k<=3; k++) { - qp = qc[4][k] + rr*rr*(THREE*qc[2][k] + rr*FOUR*qc[1][k]); - drr[k] = ZERO; - if (SUNRabs(qp) > TINY*ssmax[k]) drr[k] = -qkr[k]/qp; - rrc[k] = rr + drr[k]; - } - - for (k=1; k<=3; k++) { - s = rrc[k]; - sqmaxk = ZERO; - for (j=1; j<=3; j++) { - qjk[j][k] = qc[5][j] + s*(qc[4][j] + s*s*(qc[2][j] + s*qc[1][j])); - saqj = SUNRabs(qjk[j][k])/ssmax[j]; - if (saqj > sqmaxk) sqmaxk = saqj; - } - sqmx[k] = sqmaxk; - } - - sqmin = sqmx[1] + ONE; - for (k=1; k<=3; k++) { - if (sqmx[k] < sqmin) { - kmin = k; - sqmin = sqmx[k]; - } - } - rr = rrc[kmin]; - - if (sqmin < sqtol) { - kflag = 3; - /* can compute charactistic root */ - /* break out of Newton correction loop and drop to "given rr,etc" */ - break; - } else { - for (j=1; j<=3; j++) { - qkr[j] = qjk[j][kmin]; - } + if ( (DIFFERENT_SIGN(cv_mem->cv_glo[i], cv_mem->cv_ghi[i])) && + (cv_mem->cv_rootdir[i]*cv_mem->cv_glo[i] <= ZERO) ) { + gfrac = SUNRabs(cv_mem->cv_ghi[i]/(cv_mem->cv_ghi[i] - cv_mem->cv_glo[i])); + if (gfrac > maxfrac) { + sgnchg = SUNTRUE; + maxfrac = gfrac; + imax = i; } - } /* end of Newton correction loop */ - - if (sqmin > sqtol) { - kflag = -6; - return(kflag); } - } /* end of if (sqmax < sqtol) else */ - } /* end of if (vmin < vrrtol*vrrtol) else, quartics to get rr. */ - - /* given rr, find sigsq[k] and verify rr. */ - /* All positive kflag drop to this section */ - - for (k=1; k<=3; k++) { - rsa = cv_mem->cv_ssdat[1][k]; - rsb = cv_mem->cv_ssdat[2][k]*rr; - rsc = cv_mem->cv_ssdat[3][k]*rr*rr; - rsd = cv_mem->cv_ssdat[4][k]*rr*rr*rr; - rd1a = rsa - rsb; - rd1b = rsb - rsc; - rd1c = rsc - rsd; - rd2a = rd1a - rd1b; - rd2b = rd1b - rd1c; - rd3a = rd2a - rd2b; - - if (SUNRabs(rd1b) < TINY*smax[k]) { - kflag = -7; - return(kflag); } + } - cest1 = -rd3a/rd1b; - if (cest1 < TINY || cest1 > FOUR) { - kflag = -7; - return(kflag); + /* If no sign change was found, reset trout and grout. Then return + CV_SUCCESS if no zero was found, or set iroots and return RTFOUND. */ + if (!sgnchg) { + cv_mem->cv_trout = cv_mem->cv_thi; + for (i = 0; i < cv_mem->cv_nrtfn; i++) cv_mem->cv_grout[i] = cv_mem->cv_ghi[i]; + if (!zroot) return(CV_SUCCESS); + for (i = 0; i < cv_mem->cv_nrtfn; i++) { + cv_mem->cv_iroots[i] = 0; + if(!cv_mem->cv_gactive[i]) continue; + if ( (SUNRabs(cv_mem->cv_ghi[i]) == ZERO) && + (cv_mem->cv_rootdir[i]*cv_mem->cv_glo[i] <= ZERO) ) + cv_mem->cv_iroots[i] = cv_mem->cv_glo[i] > 0 ? -1 : 1; } - corr1 = (rd2b/cest1)/(rr*rr); - sigsq[k] = cv_mem->cv_ssdat[3][k] + corr1; + return(RTFOUND); } - if (sigsq[2] < TINY) { - kflag = -8; - return(kflag); - } + /* Initialize alph to avoid compiler warning */ + alph = ONE; - ratp = sigsq[3]/sigsq[2]; - ratm = sigsq[1]/sigsq[2]; - qfac1 = FOURTH*(cv_mem->cv_q*cv_mem->cv_q - ONE); - qfac2 = TWO/(cv_mem->cv_q - ONE); - bb = ratp*ratm - ONE - qfac1*ratp; - tem = ONE - qfac2*bb; + /* A sign change was found. Loop to locate nearest root. */ - if (SUNRabs(tem) < TINY) { - kflag = -8; - return(kflag); - } + side = 0; sideprev = -1; + for(;;) { /* Looping point */ - rrb = ONE/tem; + /* If interval size is already less than tolerance ttol, break. */ + if (SUNRabs(cv_mem->cv_thi - cv_mem->cv_tlo) <= cv_mem->cv_ttol) break; - if (SUNRabs(rrb - rr) > rrtol) { - kflag = -9; - return(kflag); - } + /* Set weight alph. + On the first two passes, set alph = 1. Thereafter, reset alph + according to the side (low vs high) of the subinterval in which + the sign change was found in the previous two passes. + If the sides were opposite, set alph = 1. + If the sides were the same, then double alph (if high side), + or halve alph (if low side). + The next guess tmid is the secant method value if alph = 1, but + is closer to tlo if alph < 1, and closer to thi if alph > 1. */ - /* Check to see if rr is above cutoff rrcut */ - if (rr > rrcut) { - if (kflag == 1) kflag = 4; - if (kflag == 2) kflag = 5; - if (kflag == 3) kflag = 6; - } + if (sideprev == side) { + alph = (side == 2) ? alph*TWO : alph*HALF; + } else { + alph = ONE; + } - /* All positive kflag returned at this point */ + /* Set next root approximation tmid and get g(tmid). + If tmid is too close to tlo or thi, adjust it inward, + by a fractional distance that is between 0.1 and 0.5. */ + tmid = cv_mem->cv_thi - (cv_mem->cv_thi - cv_mem->cv_tlo) * + cv_mem->cv_ghi[imax] / (cv_mem->cv_ghi[imax] - alph*cv_mem->cv_glo[imax]); + if (SUNRabs(tmid - cv_mem->cv_tlo) < HALF*cv_mem->cv_ttol) { + fracint = SUNRabs(cv_mem->cv_thi - cv_mem->cv_tlo)/cv_mem->cv_ttol; + fracsub = (fracint > FIVE) ? PT1 : HALF/fracint; + tmid = cv_mem->cv_tlo + fracsub*(cv_mem->cv_thi - cv_mem->cv_tlo); + } + if (SUNRabs(cv_mem->cv_thi - tmid) < HALF*cv_mem->cv_ttol) { + fracint = SUNRabs(cv_mem->cv_thi - cv_mem->cv_tlo)/cv_mem->cv_ttol; + fracsub = (fracint > FIVE) ? PT1 : HALF/fracint; + tmid = cv_mem->cv_thi - fracsub*(cv_mem->cv_thi - cv_mem->cv_tlo); + } - return(kflag); + (void) CVodeGetDky(cv_mem, tmid, 0, cv_mem->cv_y); + retval = cv_mem->cv_gfun(tmid, cv_mem->cv_y, cv_mem->cv_grout, + cv_mem->cv_user_data); + cv_mem->cv_nge++; + if (retval != 0) return(CV_RTFUNC_FAIL); + + /* Check to see in which subinterval g changes sign, and reset imax. + Set side = 1 if sign change is on low side, or 2 if on high side. */ + maxfrac = ZERO; + zroot = SUNFALSE; + sgnchg = SUNFALSE; + sideprev = side; + for (i = 0; i < cv_mem->cv_nrtfn; i++) { + if(!cv_mem->cv_gactive[i]) continue; + if (SUNRabs(cv_mem->cv_grout[i]) == ZERO) { + if(cv_mem->cv_rootdir[i]*cv_mem->cv_glo[i] <= ZERO) zroot = SUNTRUE; + } else { + if ( (DIFFERENT_SIGN(cv_mem->cv_glo[i], cv_mem->cv_grout[i])) && + (cv_mem->cv_rootdir[i]*cv_mem->cv_glo[i] <= ZERO) ) { + gfrac = SUNRabs(cv_mem->cv_grout[i] / + (cv_mem->cv_grout[i] - cv_mem->cv_glo[i])); + if (gfrac > maxfrac) { + sgnchg = SUNTRUE; + maxfrac = gfrac; + imax = i; + } + } + } + } + if (sgnchg) { + /* Sign change found in (tlo,tmid); replace thi with tmid. */ + cv_mem->cv_thi = tmid; + for (i = 0; i < cv_mem->cv_nrtfn; i++) + cv_mem->cv_ghi[i] = cv_mem->cv_grout[i]; + side = 1; + /* Stop at root thi if converged; otherwise loop. */ + if (SUNRabs(cv_mem->cv_thi - cv_mem->cv_tlo) <= cv_mem->cv_ttol) break; + continue; /* Return to looping point. */ + } + + if (zroot) { + /* No sign change in (tlo,tmid), but g = 0 at tmid; return root tmid. */ + cv_mem->cv_thi = tmid; + for (i = 0; i < cv_mem->cv_nrtfn; i++) + cv_mem->cv_ghi[i] = cv_mem->cv_grout[i]; + break; + } + + /* No sign change in (tlo,tmid), and no zero at tmid. + Sign change must be in (tmid,thi). Replace tlo with tmid. */ + cv_mem->cv_tlo = tmid; + for (i = 0; i < cv_mem->cv_nrtfn; i++) + cv_mem->cv_glo[i] = cv_mem->cv_grout[i]; + side = 2; + /* Stop at root thi if converged; otherwise loop back. */ + if (SUNRabs(cv_mem->cv_thi - cv_mem->cv_tlo) <= cv_mem->cv_ttol) break; + + } /* End of root-search loop */ + /* Reset trout and grout, set iroots, and return RTFOUND. */ + cv_mem->cv_trout = cv_mem->cv_thi; + for (i = 0; i < cv_mem->cv_nrtfn; i++) { + cv_mem->cv_grout[i] = cv_mem->cv_ghi[i]; + cv_mem->cv_iroots[i] = 0; + if(!cv_mem->cv_gactive[i]) continue; + if ( (SUNRabs(cv_mem->cv_ghi[i]) == ZERO) && + (cv_mem->cv_rootdir[i]*cv_mem->cv_glo[i] <= ZERO) ) + cv_mem->cv_iroots[i] = cv_mem->cv_glo[i] > 0 ? -1 : 1; + if ( (DIFFERENT_SIGN(cv_mem->cv_glo[i], cv_mem->cv_ghi[i])) && + (cv_mem->cv_rootdir[i]*cv_mem->cv_glo[i] <= ZERO) ) + cv_mem->cv_iroots[i] = cv_mem->cv_glo[i] > 0 ? -1 : 1; + } + return(RTFOUND); } /* - * ----------------------------------------------------------------- - * Functions for rootfinding - * ----------------------------------------------------------------- + * ================================================================= + * Internal EWT function + * ================================================================= */ /* - * cvRcheck1 + * cvEwtSet * - * This routine completes the initialization of rootfinding memory - * information, and checks whether g has a zero both at and very near - * the initial point of the IVP. + * This routine is responsible for setting the error weight vector ewt, + * according to tol_type, as follows: * - * This routine returns an int equal to: - * CV_RTFUNC_FAIL < 0 if the g function failed, or - * CV_SUCCESS = 0 otherwise. + * (1) ewt[i] = 1 / (reltol * SUNRabs(ycur[i]) + abstol), i=0,...,neq-1 + * if tol_type = CV_SS + * (2) ewt[i] = 1 / (reltol * SUNRabs(ycur[i]) + abstol[i]), i=0,...,neq-1 + * if tol_type = CV_SV + * + * cvEwtSet returns 0 if ewt is successfully set as above to a + * positive vector and -1 otherwise. In the latter case, ewt is + * considered undefined. + * + * All the real work is done in the routines cvEwtSetSS, cvEwtSetSV. */ -static int cvRcheck1(CVodeMem cv_mem) +int cvEwtSet(N_Vector ycur, N_Vector weight, void *data) { - int i, retval; - realtype smallh, hratio, tplus; - booleantype zroot; + CVodeMem cv_mem; + int flag = 0; - for (i = 0; i < cv_mem->cv_nrtfn; i++) - cv_mem->cv_iroots[i] = 0; - cv_mem->cv_tlo = cv_mem->cv_tn; - cv_mem->cv_ttol = (SUNRabs(cv_mem->cv_tn) + SUNRabs(cv_mem->cv_h)) * - cv_mem->cv_uround*HUNDRED; + /* data points to cv_mem here */ - /* Evaluate g at initial t and check for zero values. */ - retval = cv_mem->cv_gfun(cv_mem->cv_tlo, cv_mem->cv_zn[0], - cv_mem->cv_glo, cv_mem->cv_user_data); - cv_mem->cv_nge = 1; - if (retval != 0) return(CV_RTFUNC_FAIL); + cv_mem = (CVodeMem) data; - zroot = SUNFALSE; - for (i = 0; i < cv_mem->cv_nrtfn; i++) { - if (SUNRabs(cv_mem->cv_glo[i]) == ZERO) { - zroot = SUNTRUE; - cv_mem->cv_gactive[i] = SUNFALSE; - } + switch(cv_mem->cv_itol) { + case CV_SS: + flag = cvEwtSetSS(cv_mem, ycur, weight); + break; + case CV_SV: + flag = cvEwtSetSV(cv_mem, ycur, weight); + break; } - if (!zroot) return(CV_SUCCESS); - /* Some g_i is zero at t0; look at g at t0+(small increment). */ - hratio = SUNMAX(cv_mem->cv_ttol/SUNRabs(cv_mem->cv_h), PT1); - smallh = hratio*cv_mem->cv_h; - tplus = cv_mem->cv_tlo + smallh; - N_VLinearSum(ONE, cv_mem->cv_zn[0], hratio, cv_mem->cv_zn[1], cv_mem->cv_y); - retval = cv_mem->cv_gfun(tplus, cv_mem->cv_y, - cv_mem->cv_ghi, cv_mem->cv_user_data); - cv_mem->cv_nge++; - if (retval != 0) return(CV_RTFUNC_FAIL); + return(flag); +} - /* We check now only the components of g which were exactly 0.0 at t0 - * to see if we can 'activate' them. */ - for (i = 0; i < cv_mem->cv_nrtfn; i++) { - if (!cv_mem->cv_gactive[i] && SUNRabs(cv_mem->cv_ghi[i]) != ZERO) { - cv_mem->cv_gactive[i] = SUNTRUE; - cv_mem->cv_glo[i] = cv_mem->cv_ghi[i]; - } +/* + * cvEwtSetSS + * + * This routine sets ewt as decribed above in the case tol_type = CV_SS. + * If the absolute tolerance is zero, it tests for non-positive components + * before inverting. cvEwtSetSS returns 0 if ewt is successfully set to a + * positive vector and -1 otherwise. In the latter case, ewt is considered + * undefined. + */ + +static int cvEwtSetSS(CVodeMem cv_mem, N_Vector ycur, N_Vector weight) +{ + N_VAbs(ycur, cv_mem->cv_tempv); + N_VScale(cv_mem->cv_reltol, cv_mem->cv_tempv, cv_mem->cv_tempv); + N_VAddConst(cv_mem->cv_tempv, cv_mem->cv_Sabstol, cv_mem->cv_tempv); + if (cv_mem->cv_atolmin0) { + if (N_VMin(cv_mem->cv_tempv) <= ZERO) return(-1); + } + N_VInv(cv_mem->cv_tempv, weight); + return(0); +} + +/* + * cvEwtSetSV + * + * This routine sets ewt as decribed above in the case tol_type = CV_SV. + * If any absolute tolerance is zero, it tests for non-positive components + * before inverting. cvEwtSetSV returns 0 if ewt is successfully set to a + * positive vector and -1 otherwise. In the latter case, ewt is considered + * undefined. + */ + +static int cvEwtSetSV(CVodeMem cv_mem, N_Vector ycur, N_Vector weight) +{ + N_VAbs(ycur, cv_mem->cv_tempv); + N_VLinearSum(cv_mem->cv_reltol, cv_mem->cv_tempv, ONE, + cv_mem->cv_Vabstol, cv_mem->cv_tempv); + if (cv_mem->cv_atolmin0) { + if (N_VMin(cv_mem->cv_tempv) <= ZERO) return(-1); } - return(CV_SUCCESS); + N_VInv(cv_mem->cv_tempv, weight); + return(0); } /* - * cvRcheck2 - * - * This routine checks for exact zeros of g at the last root found, - * if the last return was a root. It then checks for a close pair of - * zeros (an error condition), and for a new root at a nearby point. - * The array glo = g(tlo) at the left endpoint of the search interval - * is adjusted if necessary to assure that all g_i are nonzero - * there, before returning to do a root search in the interval. - * - * On entry, tlo = tretlast is the last value of tret returned by - * CVode. This may be the previous tn, the previous tout value, - * or the last root location. + * cvQuadEwtSet * - * This routine returns an int equal to: - * CV_RTFUNC_FAIL < 0 if the g function failed, or - * CLOSERT = 3 if a close pair of zeros was found, or - * RTFOUND = 1 if a new zero of g was found near tlo, or - * CV_SUCCESS = 0 otherwise. */ -static int cvRcheck2(CVodeMem cv_mem) +static int cvQuadEwtSet(CVodeMem cv_mem, N_Vector qcur, N_Vector weightQ) { - int i, retval; - realtype smallh, hratio, tplus; - booleantype zroot; + int flag=0; - if (cv_mem->cv_irfnd == 0) return(CV_SUCCESS); + switch (cv_mem->cv_itolQ) { + case CV_SS: + flag = cvQuadEwtSetSS(cv_mem, qcur, weightQ); + break; + case CV_SV: + flag = cvQuadEwtSetSV(cv_mem, qcur, weightQ); + break; + } - (void) CVodeGetDky(cv_mem, cv_mem->cv_tlo, 0, cv_mem->cv_y); - retval = cv_mem->cv_gfun(cv_mem->cv_tlo, cv_mem->cv_y, - cv_mem->cv_glo, cv_mem->cv_user_data); - cv_mem->cv_nge++; - if (retval != 0) return(CV_RTFUNC_FAIL); + return(flag); - zroot = SUNFALSE; - for (i = 0; i < cv_mem->cv_nrtfn; i++) - cv_mem->cv_iroots[i] = 0; - for (i = 0; i < cv_mem->cv_nrtfn; i++) { - if (!cv_mem->cv_gactive[i]) continue; - if (SUNRabs(cv_mem->cv_glo[i]) == ZERO) { - zroot = SUNTRUE; - cv_mem->cv_iroots[i] = 1; - } - } - if (!zroot) return(CV_SUCCESS); +} - /* One or more g_i has a zero at tlo. Check g at tlo+smallh. */ - cv_mem->cv_ttol = (SUNRabs(cv_mem->cv_tn) + SUNRabs(cv_mem->cv_h)) * - cv_mem->cv_uround*HUNDRED; - smallh = (cv_mem->cv_h > ZERO) ? cv_mem->cv_ttol : -cv_mem->cv_ttol; - tplus = cv_mem->cv_tlo + smallh; - if ( (tplus - cv_mem->cv_tn)*cv_mem->cv_h >= ZERO) { - hratio = smallh/cv_mem->cv_h; - N_VLinearSum(ONE, cv_mem->cv_y, hratio, cv_mem->cv_zn[1], cv_mem->cv_y); - } else { - (void) CVodeGetDky(cv_mem, tplus, 0, cv_mem->cv_y); - } - retval = cv_mem->cv_gfun(tplus, cv_mem->cv_y, - cv_mem->cv_ghi, cv_mem->cv_user_data); - cv_mem->cv_nge++; - if (retval != 0) return(CV_RTFUNC_FAIL); +/* + * cvQuadEwtSetSS + * + */ - /* Check for close roots (error return), for a new zero at tlo+smallh, - and for a g_i that changed from zero to nonzero. */ - zroot = SUNFALSE; - for (i = 0; i < cv_mem->cv_nrtfn; i++) { - if (!cv_mem->cv_gactive[i]) continue; - if (SUNRabs(cv_mem->cv_ghi[i]) == ZERO) { - if (cv_mem->cv_iroots[i] == 1) return(CLOSERT); - zroot = SUNTRUE; - cv_mem->cv_iroots[i] = 1; - } else { - if (cv_mem->cv_iroots[i] == 1) - cv_mem->cv_glo[i] = cv_mem->cv_ghi[i]; - } +static int cvQuadEwtSetSS(CVodeMem cv_mem, N_Vector qcur, N_Vector weightQ) +{ + N_VAbs(qcur, cv_mem->cv_tempvQ); + N_VScale(cv_mem->cv_reltolQ, cv_mem->cv_tempvQ, cv_mem->cv_tempvQ); + N_VAddConst(cv_mem->cv_tempvQ, cv_mem->cv_SabstolQ, cv_mem->cv_tempvQ); + if (cv_mem->cv_atolQmin0) { + if (N_VMin(cv_mem->cv_tempvQ) <= ZERO) return(-1); } - if (zroot) return(RTFOUND); - return(CV_SUCCESS); + N_VInv(cv_mem->cv_tempvQ, weightQ); + return(0); } /* - * cvRcheck3 - * - * This routine interfaces to cvRootfind to look for a root of g - * between tlo and either tn or tout, whichever comes first. - * Only roots beyond tlo in the direction of integration are sought. + * cvQuadEwtSetSV * - * This routine returns an int equal to: - * CV_RTFUNC_FAIL < 0 if the g function failed, or - * RTFOUND = 1 if a root of g was found, or - * CV_SUCCESS = 0 otherwise. */ -static int cvRcheck3(CVodeMem cv_mem) +static int cvQuadEwtSetSV(CVodeMem cv_mem, N_Vector qcur, N_Vector weightQ) { - int i, ier, retval; - - /* Set thi = tn or tout, whichever comes first; set y = y(thi). */ - if (cv_mem->cv_taskc == CV_ONE_STEP) { - cv_mem->cv_thi = cv_mem->cv_tn; - N_VScale(ONE, cv_mem->cv_zn[0], cv_mem->cv_y); - } - if (cv_mem->cv_taskc == CV_NORMAL) { - if ( (cv_mem->cv_toutc - cv_mem->cv_tn)*cv_mem->cv_h >= ZERO) { - cv_mem->cv_thi = cv_mem->cv_tn; - N_VScale(ONE, cv_mem->cv_zn[0], cv_mem->cv_y); - } else { - cv_mem->cv_thi = cv_mem->cv_toutc; - (void) CVodeGetDky(cv_mem, cv_mem->cv_thi, 0, cv_mem->cv_y); - } + N_VAbs(qcur, cv_mem->cv_tempvQ); + N_VLinearSum(cv_mem->cv_reltolQ, cv_mem->cv_tempvQ, ONE, + cv_mem->cv_VabstolQ, cv_mem->cv_tempvQ); + if (cv_mem->cv_atolQmin0) { + if (N_VMin(cv_mem->cv_tempvQ) <= ZERO) return(-1); } + N_VInv(cv_mem->cv_tempvQ, weightQ); + return(0); +} - /* Set ghi = g(thi) and call cvRootfind to search (tlo,thi) for roots. */ - retval = cv_mem->cv_gfun(cv_mem->cv_thi, cv_mem->cv_y, - cv_mem->cv_ghi, cv_mem->cv_user_data); - cv_mem->cv_nge++; - if (retval != 0) return(CV_RTFUNC_FAIL); +/* + * cvSensEwtSet + * + */ - cv_mem->cv_ttol = (SUNRabs(cv_mem->cv_tn) + SUNRabs(cv_mem->cv_h)) * - cv_mem->cv_uround*HUNDRED; - ier = cvRootfind(cv_mem); - if (ier == CV_RTFUNC_FAIL) return(CV_RTFUNC_FAIL); - for(i=0; icv_nrtfn; i++) { - if(!cv_mem->cv_gactive[i] && cv_mem->cv_grout[i] != ZERO) - cv_mem->cv_gactive[i] = SUNTRUE; - } - cv_mem->cv_tlo = cv_mem->cv_trout; - for (i = 0; i < cv_mem->cv_nrtfn; i++) - cv_mem->cv_glo[i] = cv_mem->cv_grout[i]; +static int cvSensEwtSet(CVodeMem cv_mem, N_Vector *yScur, N_Vector *weightS) +{ + int flag=0; - /* If no root found, return CV_SUCCESS. */ - if (ier == CV_SUCCESS) return(CV_SUCCESS); + switch (cv_mem->cv_itolS) { + case CV_EE: + flag = cvSensEwtSetEE(cv_mem, yScur, weightS); + break; + case CV_SS: + flag = cvSensEwtSetSS(cv_mem, yScur, weightS); + break; + case CV_SV: + flag = cvSensEwtSetSV(cv_mem, yScur, weightS); + break; + } - /* If a root was found, interpolate to get y(trout) and return. */ - (void) CVodeGetDky(cv_mem, cv_mem->cv_trout, 0, cv_mem->cv_y); - return(RTFOUND); + return(flag); } /* - * cvRootfind - * - * This routine solves for a root of g(t) between tlo and thi, if - * one exists. Only roots of odd multiplicity (i.e. with a change - * of sign in one of the g_i), or exact zeros, are found. - * Here the sign of tlo - thi is arbitrary, but if multiple roots - * are found, the one closest to tlo is returned. - * - * The method used is the Illinois algorithm, a modified secant method. - * Reference: Kathie L. Hiebert and Lawrence F. Shampine, Implicitly - * Defined Output Points for Solutions of ODEs, Sandia National - * Laboratory Report SAND80-0180, February 1980. - * - * This routine uses the following parameters for communication: - * - * nrtfn = number of functions g_i, or number of components of - * the vector-valued function g(t). Input only. - * - * gfun = user-defined function for g(t). Its form is - * (void) gfun(t, y, gt, user_data) - * - * rootdir = in array specifying the direction of zero-crossings. - * If rootdir[i] > 0, search for roots of g_i only if - * g_i is increasing; if rootdir[i] < 0, search for - * roots of g_i only if g_i is decreasing; otherwise - * always search for roots of g_i. - * - * gactive = array specifying whether a component of g should - * or should not be monitored. gactive[i] is initially - * set to SUNTRUE for all i=0,...,nrtfn-1, but it may be - * reset to SUNFALSE if at the first step g[i] is 0.0 - * both at the I.C. and at a small perturbation of them. - * gactive[i] is then set back on SUNTRUE only after the - * corresponding g function moves away from 0.0. - * - * nge = cumulative counter for gfun calls. - * - * ttol = a convergence tolerance for trout. Input only. - * When a root at trout is found, it is located only to - * within a tolerance of ttol. Typically, ttol should - * be set to a value on the order of - * 100 * UROUND * max (SUNRabs(tlo), SUNRabs(thi)) - * where UROUND is the unit roundoff of the machine. - * - * tlo, thi = endpoints of the interval in which roots are sought. - * On input, these must be distinct, but tlo - thi may - * be of either sign. The direction of integration is - * assumed to be from tlo to thi. On return, tlo and thi - * are the endpoints of the final relevant interval. + * cvSensEwtSetEE * - * glo, ghi = arrays of length nrtfn containing the vectors g(tlo) - * and g(thi) respectively. Input and output. On input, - * none of the glo[i] should be zero. + * In this case, the error weight vector for the i-th sensitivity is set to * - * trout = root location, if a root was found, or thi if not. - * Output only. If a root was found other than an exact - * zero of g, trout is the endpoint thi of the final - * interval bracketing the root, with size at most ttol. + * ewtS_i = pbar_i * efun(pbar_i*yS_i) * - * grout = array of length nrtfn containing g(trout) on return. + * In other words, the scaled sensitivity pbar_i * yS_i has the same error + * weight vector calculation as the solution vector. * - * iroots = int array of length nrtfn with root information. - * Output only. If a root was found, iroots indicates - * which components g_i have a root at trout. For - * i = 0, ..., nrtfn-1, iroots[i] = 1 if g_i has a root - * and g_i is increasing, iroots[i] = -1 if g_i has a - * root and g_i is decreasing, and iroots[i] = 0 if g_i - * has no roots or g_i varies in the direction opposite - * to that indicated by rootdir[i]. + */ + +static int cvSensEwtSetEE(CVodeMem cv_mem, N_Vector *yScur, N_Vector *weightS) +{ + int is; + N_Vector pyS; + int flag; + + /* Use tempvS[0] as temporary storage for the scaled sensitivity */ + pyS = cv_mem->cv_tempvS[0]; + + for (is=0; iscv_Ns; is++) { + N_VScale(cv_mem->cv_pbar[is], yScur[is], pyS); + flag = cv_mem->cv_efun(pyS, weightS[is], cv_mem->cv_e_data); + if (flag != 0) return(-1); + N_VScale(cv_mem->cv_pbar[is], weightS[is], weightS[is]); + } + return(0); +} + +/* + * cvSensEwtSetSS * - * This routine returns an int equal to: - * CV_RTFUNC_FAIL < 0 if the g function failed, or - * RTFOUND = 1 if a root of g was found, or - * CV_SUCCESS = 0 otherwise. */ -static int cvRootfind(CVodeMem cv_mem) +static int cvSensEwtSetSS(CVodeMem cv_mem, N_Vector *yScur, N_Vector *weightS) { - realtype alph, tmid, gfrac, maxfrac, fracint, fracsub; - int i, retval, imax, side, sideprev; - booleantype zroot, sgnchg; - - imax = 0; + int is; - /* First check for change in sign in ghi or for a zero in ghi. */ - maxfrac = ZERO; - zroot = SUNFALSE; - sgnchg = SUNFALSE; - for (i = 0; i < cv_mem->cv_nrtfn; i++) { - if(!cv_mem->cv_gactive[i]) continue; - if (SUNRabs(cv_mem->cv_ghi[i]) == ZERO) { - if(cv_mem->cv_rootdir[i]*cv_mem->cv_glo[i] <= ZERO) { - zroot = SUNTRUE; - } - } else { - if ( (cv_mem->cv_glo[i]*cv_mem->cv_ghi[i] < ZERO) && - (cv_mem->cv_rootdir[i]*cv_mem->cv_glo[i] <= ZERO) ) { - gfrac = SUNRabs(cv_mem->cv_ghi[i]/(cv_mem->cv_ghi[i] - cv_mem->cv_glo[i])); - if (gfrac > maxfrac) { - sgnchg = SUNTRUE; - maxfrac = gfrac; - imax = i; - } - } + for (is=0; iscv_Ns; is++) { + N_VAbs(yScur[is], cv_mem->cv_tempv); + N_VScale(cv_mem->cv_reltolS, cv_mem->cv_tempv, cv_mem->cv_tempv); + N_VAddConst(cv_mem->cv_tempv, cv_mem->cv_SabstolS[is], cv_mem->cv_tempv); + if (cv_mem->cv_atolSmin0[is]) { + if (N_VMin(cv_mem->cv_tempv) <= ZERO) return(-1); } + N_VInv(cv_mem->cv_tempv, weightS[is]); } + return(0); +} - /* If no sign change was found, reset trout and grout. Then return - CV_SUCCESS if no zero was found, or set iroots and return RTFOUND. */ - if (!sgnchg) { - cv_mem->cv_trout = cv_mem->cv_thi; - for (i = 0; i < cv_mem->cv_nrtfn; i++) - cv_mem->cv_grout[i] = cv_mem->cv_ghi[i]; - if (!zroot) return(CV_SUCCESS); - for (i = 0; i < cv_mem->cv_nrtfn; i++) { - cv_mem->cv_iroots[i] = 0; - if(!cv_mem->cv_gactive[i]) continue; - if ( (SUNRabs(cv_mem->cv_ghi[i]) == ZERO) && - (cv_mem->cv_rootdir[i]*cv_mem->cv_glo[i] <= ZERO) ) - cv_mem->cv_iroots[i] = cv_mem->cv_glo[i] > 0 ? -1:1; +/* + * cvSensEwtSetSV + * + */ + +static int cvSensEwtSetSV(CVodeMem cv_mem, N_Vector *yScur, N_Vector *weightS) +{ + int is; + + for (is=0; iscv_Ns; is++) { + N_VAbs(yScur[is], cv_mem->cv_tempv); + N_VLinearSum(cv_mem->cv_reltolS, cv_mem->cv_tempv, ONE, + cv_mem->cv_VabstolS[is], cv_mem->cv_tempv); + if (cv_mem->cv_atolSmin0[is]) { + if (N_VMin(cv_mem->cv_tempv) <= ZERO) return(-1); } - return(RTFOUND); + N_VInv(cv_mem->cv_tempv, weightS[is]); } + return(0); +} - /* Initialize alph to avoid compiler warning */ - alph = ONE; - - /* A sign change was found. Loop to locate nearest root. */ +/* + * cvQuadSensEwtSet + * + */ - side = 0; sideprev = -1; - for(;;) { /* Looping point */ +static int cvQuadSensEwtSet(CVodeMem cv_mem, N_Vector *yQScur, N_Vector *weightQS) +{ + int flag=0; - /* If interval size is already less than tolerance ttol, break. */ - if (SUNRabs(cv_mem->cv_thi - cv_mem->cv_tlo) <= cv_mem->cv_ttol) break; + switch (cv_mem->cv_itolQS) { + case CV_EE: + flag = cvQuadSensEwtSetEE(cv_mem, yQScur, weightQS); + break; + case CV_SS: + flag = cvQuadSensEwtSetSS(cv_mem, yQScur, weightQS); + break; + case CV_SV: + flag = cvQuadSensEwtSetSV(cv_mem, yQScur, weightQS); + break; + } - /* Set weight alph. - On the first two passes, set alph = 1. Thereafter, reset alph - according to the side (low vs high) of the subinterval in which - the sign change was found in the previous two passes. - If the sides were opposite, set alph = 1. - If the sides were the same, then double alph (if high side), - or halve alph (if low side). - The next guess tmid is the secant method value if alph = 1, but - is closer to cv_mem->cv_tlo if alph < 1, and closer to thi if alph > 1. */ + return(flag); +} - if (sideprev == side) { - alph = (side == 2) ? alph*TWO : alph*HALF; - } else { - alph = ONE; - } +/* + * cvQuadSensEwtSetEE + * + * In this case, the error weight vector for the i-th quadrature sensitivity + * is set to + * + * ewtQS_i = pbar_i * cvQuadEwtSet(pbar_i*yQS_i) + * + * In other words, the scaled sensitivity pbar_i * yQS_i has the same error + * weight vector calculation as the quadrature vector. + * + */ +static int cvQuadSensEwtSetEE(CVodeMem cv_mem, N_Vector *yQScur, N_Vector *weightQS) +{ + int is; + N_Vector pyS; + int flag; - /* Set next root approximation tmid and get g(tmid). - If tmid is too close to tlo or thi, adjust it inward, - by a fractional distance that is between 0.1 and 0.5. */ - tmid = cv_mem->cv_thi - (cv_mem->cv_thi - cv_mem->cv_tlo) * - cv_mem->cv_ghi[imax] / (cv_mem->cv_ghi[imax] - alph*cv_mem->cv_glo[imax]); - if (SUNRabs(tmid - cv_mem->cv_tlo) < HALF*cv_mem->cv_ttol) { - fracint = SUNRabs(cv_mem->cv_thi - cv_mem->cv_tlo)/cv_mem->cv_ttol; - fracsub = (fracint > FIVE) ? PT1 : HALF/fracint; - tmid = cv_mem->cv_tlo + fracsub*(cv_mem->cv_thi - cv_mem->cv_tlo); - } - if (SUNRabs(cv_mem->cv_thi - tmid) < HALF*cv_mem->cv_ttol) { - fracint = SUNRabs(cv_mem->cv_thi - cv_mem->cv_tlo)/cv_mem->cv_ttol; - fracsub = (fracint > FIVE) ? PT1 : HALF/fracint; - tmid = cv_mem->cv_thi - fracsub*(cv_mem->cv_thi - cv_mem->cv_tlo); - } + /* Use tempvQS[0] as temporary storage for the scaled sensitivity */ + pyS = cv_mem->cv_tempvQS[0]; - (void) CVodeGetDky(cv_mem, tmid, 0, cv_mem->cv_y); - retval = cv_mem->cv_gfun(tmid, cv_mem->cv_y, cv_mem->cv_grout, - cv_mem->cv_user_data); - cv_mem->cv_nge++; - if (retval != 0) return(CV_RTFUNC_FAIL); + for (is=0; iscv_Ns; is++) { + N_VScale(cv_mem->cv_pbar[is], yQScur[is], pyS); + flag = cvQuadEwtSet(cv_mem, pyS, weightQS[is]); + if (flag != 0) return(-1); + N_VScale(cv_mem->cv_pbar[is], weightQS[is], weightQS[is]); + } + return(0); +} - /* Check to see in which subinterval g changes sign, and reset imax. - Set side = 1 if sign change is on low side, or 2 if on high side. */ - maxfrac = ZERO; - zroot = SUNFALSE; - sgnchg = SUNFALSE; - sideprev = side; - for (i = 0; i < cv_mem->cv_nrtfn; i++) { - if(!cv_mem->cv_gactive[i]) continue; - if (SUNRabs(cv_mem->cv_grout[i]) == ZERO) { - if(cv_mem->cv_rootdir[i]*cv_mem->cv_glo[i] <= ZERO) zroot = SUNTRUE; - } else { - if ( (cv_mem->cv_glo[i]*cv_mem->cv_grout[i] < ZERO) && - (cv_mem->cv_rootdir[i]*cv_mem->cv_glo[i] <= ZERO) ) { - gfrac = SUNRabs(cv_mem->cv_grout[i] / - (cv_mem->cv_grout[i] - cv_mem->cv_glo[i])); - if (gfrac > maxfrac) { - sgnchg = SUNTRUE; - maxfrac = gfrac; - imax = i; - } - } - } - } - if (sgnchg) { - /* Sign change found in (tlo,tmid); replace thi with tmid. */ - cv_mem->cv_thi = tmid; - for (i = 0; i < cv_mem->cv_nrtfn; i++) - cv_mem->cv_ghi[i] = cv_mem->cv_grout[i]; - side = 1; - /* Stop at root thi if converged; otherwise loop. */ - if (SUNRabs(cv_mem->cv_thi - cv_mem->cv_tlo) <= cv_mem->cv_ttol) break; - continue; /* Return to looping point. */ - } +static int cvQuadSensEwtSetSS(CVodeMem cv_mem, N_Vector *yQScur, N_Vector *weightQS) +{ + int is; - if (zroot) { - /* No sign change in (tlo,tmid), but g = 0 at tmid; return root tmid. */ - cv_mem->cv_thi = tmid; - for (i = 0; i < cv_mem->cv_nrtfn; i++) - cv_mem->cv_ghi[i] = cv_mem->cv_grout[i]; - break; + for (is=0; iscv_Ns; is++) { + N_VAbs(yQScur[is], cv_mem->cv_tempvQ); + N_VScale(cv_mem->cv_reltolQS, cv_mem->cv_tempvQ, cv_mem->cv_tempvQ); + N_VAddConst(cv_mem->cv_tempvQ, cv_mem->cv_SabstolQS[is], cv_mem->cv_tempvQ); + if (cv_mem->cv_atolQSmin0[is]) { + if (N_VMin(cv_mem->cv_tempvQ) <= ZERO) return(-1); } + N_VInv(cv_mem->cv_tempvQ, weightQS[is]); + } + return(0); +} - /* No sign change in (tlo,tmid), and no zero at tmid. - Sign change must be in (tmid,thi). Replace tlo with tmid. */ - cv_mem->cv_tlo = tmid; - for (i = 0; i < cv_mem->cv_nrtfn; i++) - cv_mem->cv_glo[i] = cv_mem->cv_grout[i]; - side = 2; - /* Stop at root thi if converged; otherwise loop back. */ - if (SUNRabs(cv_mem->cv_thi - cv_mem->cv_tlo) <= cv_mem->cv_ttol) break; - - } /* End of root-search loop */ +static int cvQuadSensEwtSetSV(CVodeMem cv_mem, N_Vector *yQScur, N_Vector *weightQS) +{ + int is; - /* Reset trout and grout, set iroots, and return RTFOUND. */ - cv_mem->cv_trout = cv_mem->cv_thi; - for (i = 0; i < cv_mem->cv_nrtfn; i++) { - cv_mem->cv_grout[i] = cv_mem->cv_ghi[i]; - cv_mem->cv_iroots[i] = 0; - if(!cv_mem->cv_gactive[i]) continue; - if ( (SUNRabs(cv_mem->cv_ghi[i]) == ZERO) && - (cv_mem->cv_rootdir[i]*cv_mem->cv_glo[i] <= ZERO) ) - cv_mem->cv_iroots[i] = cv_mem->cv_glo[i] > 0 ? -1:1; - if ( (cv_mem->cv_glo[i]*cv_mem->cv_ghi[i] < ZERO) && - (cv_mem->cv_rootdir[i]*cv_mem->cv_glo[i] <= ZERO) ) - cv_mem->cv_iroots[i] = cv_mem->cv_glo[i] > 0 ? -1:1; + for (is=0; iscv_Ns; is++) { + N_VAbs(yQScur[is], cv_mem->cv_tempvQ); + N_VLinearSum(cv_mem->cv_reltolQS, cv_mem->cv_tempvQ, ONE, + cv_mem->cv_VabstolQS[is], cv_mem->cv_tempvQ); + if (cv_mem->cv_atolQSmin0[is]) { + if (N_VMin(cv_mem->cv_tempvQ) <= ZERO) return(-1); + } + N_VInv(cv_mem->cv_tempvQ, weightQS[is]); } - return(RTFOUND); + return(0); } /* diff --git a/src/lib/cvodes/cvodes_bandpre.c b/src/lib/cvodes/cvodes_bandpre.c index 6914fe3..d60a357 100644 --- a/src/lib/cvodes/cvodes_bandpre.c +++ b/src/lib/cvodes/cvodes_bandpre.c @@ -4,7 +4,7 @@ * Radu Serban and Aaron Collier @ LLNL * ----------------------------------------------------------------- * SUNDIALS Copyright Start - * Copyright (c) 2002-2019, Lawrence Livermore National Security + * Copyright (c) 2002-2022, Lawrence Livermore National Security * and Southern Methodist University. * All rights reserved. * @@ -112,7 +112,7 @@ int CVBandPrecInit(void *cvode_mem, sunindextype N, /* Allocate memory for saved banded Jacobian approximation. */ pdata->savedJ = NULL; - pdata->savedJ = SUNBandMatrixStorage(N, mup, mlp, mup); + pdata->savedJ = SUNBandMatrixStorage(N, mup, mlp, mup, cv_mem->cv_sunctx); if (pdata->savedJ == NULL) { free(pdata); pdata = NULL; cvProcessError(cv_mem, CVLS_MEM_FAIL, "CVSBANDPRE", @@ -123,7 +123,7 @@ int CVBandPrecInit(void *cvode_mem, sunindextype N, /* Allocate memory for banded preconditioner. */ storagemu = SUNMIN(N-1, mup+mlp); pdata->savedP = NULL; - pdata->savedP = SUNBandMatrixStorage(N, mup, mlp, storagemu); + pdata->savedP = SUNBandMatrixStorage(N, mup, mlp, storagemu, cv_mem->cv_sunctx); if (pdata->savedP == NULL) { SUNMatDestroy(pdata->savedJ); free(pdata); pdata = NULL; @@ -134,7 +134,7 @@ int CVBandPrecInit(void *cvode_mem, sunindextype N, /* Allocate memory for banded linear solver */ pdata->LS = NULL; - pdata->LS = SUNLinSol_Band(cv_mem->cv_tempv, pdata->savedP); + pdata->LS = SUNLinSol_Band(cv_mem->cv_tempv, pdata->savedP, cv_mem->cv_sunctx); if (pdata->LS == NULL) { SUNMatDestroy(pdata->savedP); SUNMatDestroy(pdata->savedJ); diff --git a/src/lib/cvodes/cvodes_bandpre_impl.h b/src/lib/cvodes/cvodes_bandpre_impl.h index de1b147..6afa1ab 100644 --- a/src/lib/cvodes/cvodes_bandpre_impl.h +++ b/src/lib/cvodes/cvodes_bandpre_impl.h @@ -4,7 +4,7 @@ * Radu Serban @ LLNL * ----------------------------------------------------------------- * SUNDIALS Copyright Start - * Copyright (c) 2002-2019, Lawrence Livermore National Security + * Copyright (c) 2002-2022, Lawrence Livermore National Security * and Southern Methodist University. * All rights reserved. * diff --git a/src/lib/cvodes/cvodes_bbdpre.c b/src/lib/cvodes/cvodes_bbdpre.c index 85e0723..b2af393 100644 --- a/src/lib/cvodes/cvodes_bbdpre.c +++ b/src/lib/cvodes/cvodes_bbdpre.c @@ -4,7 +4,7 @@ * Radu Serban and Aaron Collier @ LLNL * ----------------------------------------------------------------- * SUNDIALS Copyright Start - * Copyright (c) 2002-2019, Lawrence Livermore National Security + * Copyright (c) 2002-2022, Lawrence Livermore National Security * and Southern Methodist University. * All rights reserved. * @@ -124,7 +124,7 @@ int CVBBDPrecInit(void *cvode_mem, sunindextype Nlocal, pdata->mlkeep = mlk; /* Allocate memory for saved Jacobian */ - pdata->savedJ = SUNBandMatrixStorage(Nlocal, muk, mlk, muk); + pdata->savedJ = SUNBandMatrixStorage(Nlocal, muk, mlk, muk, cv_mem->cv_sunctx); if (pdata->savedJ == NULL) { free(pdata); pdata = NULL; cvProcessError(cv_mem, CVLS_MEM_FAIL, "CVSBBDPRE", @@ -135,7 +135,7 @@ int CVBBDPrecInit(void *cvode_mem, sunindextype Nlocal, /* Allocate memory for preconditioner matrix */ storage_mu = SUNMIN(Nlocal-1, muk + mlk); pdata->savedP = NULL; - pdata->savedP = SUNBandMatrixStorage(Nlocal, muk, mlk, storage_mu); + pdata->savedP = SUNBandMatrixStorage(Nlocal, muk, mlk, storage_mu, cv_mem->cv_sunctx); if (pdata->savedP == NULL) { SUNMatDestroy(pdata->savedJ); free(pdata); pdata = NULL; @@ -146,7 +146,7 @@ int CVBBDPrecInit(void *cvode_mem, sunindextype Nlocal, /* Allocate memory for temporary N_Vectors */ pdata->zlocal = NULL; - pdata->zlocal = N_VNewEmpty_Serial(Nlocal); + pdata->zlocal = N_VNewEmpty_Serial(Nlocal, cv_mem->cv_sunctx); if (pdata->zlocal == NULL) { SUNMatDestroy(pdata->savedP); SUNMatDestroy(pdata->savedJ); @@ -156,7 +156,7 @@ int CVBBDPrecInit(void *cvode_mem, sunindextype Nlocal, return(CVLS_MEM_FAIL); } pdata->rlocal = NULL; - pdata->rlocal = N_VNewEmpty_Serial(Nlocal); + pdata->rlocal = N_VNewEmpty_Serial(Nlocal, cv_mem->cv_sunctx); if (pdata->rlocal == NULL) { N_VDestroy(pdata->zlocal); SUNMatDestroy(pdata->savedP); @@ -208,7 +208,7 @@ int CVBBDPrecInit(void *cvode_mem, sunindextype Nlocal, /* Allocate memory for banded linear solver */ pdata->LS = NULL; - pdata->LS = SUNLinSol_Band(pdata->rlocal, pdata->savedP); + pdata->LS = SUNLinSol_Band(pdata->rlocal, pdata->savedP, cv_mem->cv_sunctx); if (pdata->LS == NULL) { N_VDestroy(pdata->tmp1); N_VDestroy(pdata->tmp2); diff --git a/src/lib/cvodes/cvodes_bbdpre_impl.h b/src/lib/cvodes/cvodes_bbdpre_impl.h index e6ad589..c5efeaa 100644 --- a/src/lib/cvodes/cvodes_bbdpre_impl.h +++ b/src/lib/cvodes/cvodes_bbdpre_impl.h @@ -4,7 +4,7 @@ * Radu Serban @ LLNL * ----------------------------------------------------------------- * SUNDIALS Copyright Start - * Copyright (c) 2002-2019, Lawrence Livermore National Security + * Copyright (c) 2002-2022, Lawrence Livermore National Security * and Southern Methodist University. * All rights reserved. * diff --git a/src/lib/cvodes/cvodes_diag.c b/src/lib/cvodes/cvodes_diag.c index cc3ca62..d8529f8 100644 --- a/src/lib/cvodes/cvodes_diag.c +++ b/src/lib/cvodes/cvodes_diag.c @@ -3,7 +3,7 @@ * Programmer(s): Radu Serban @ LLNL * ----------------------------------------------------------------- * SUNDIALS Copyright Start - * Copyright (c) 2002-2019, Lawrence Livermore National Security + * Copyright (c) 2002-2022, Lawrence Livermore National Security * and Southern Methodist University. * All rights reserved. * diff --git a/src/lib/cvodes/cvodes_diag_impl.h b/src/lib/cvodes/cvodes_diag_impl.h index 799f7cb..2dcb117 100644 --- a/src/lib/cvodes/cvodes_diag_impl.h +++ b/src/lib/cvodes/cvodes_diag_impl.h @@ -3,7 +3,7 @@ * Programmer(s): Radu Serban @ LLNL * ----------------------------------------------------------------- * SUNDIALS Copyright Start - * Copyright (c) 2002-2019, Lawrence Livermore National Security + * Copyright (c) 2002-2022, Lawrence Livermore National Security * and Southern Methodist University. * All rights reserved. * diff --git a/src/lib/cvodes/cvodes_direct.c b/src/lib/cvodes/cvodes_direct.c index daf7d25..d21192d 100644 --- a/src/lib/cvodes/cvodes_direct.c +++ b/src/lib/cvodes/cvodes_direct.c @@ -2,7 +2,7 @@ * Programmer(s): Daniel R. Reynolds @ SMU * ----------------------------------------------------------------- * SUNDIALS Copyright Start - * Copyright (c) 2002-2019, Lawrence Livermore National Security + * Copyright (c) 2002-2022, Lawrence Livermore National Security * and Southern Methodist University. * All rights reserved. * diff --git a/src/lib/cvodes/cvodes_impl.h b/src/lib/cvodes/cvodes_impl.h index ceaa23a..fe04358 100644 --- a/src/lib/cvodes/cvodes_impl.h +++ b/src/lib/cvodes/cvodes_impl.h @@ -2,7 +2,7 @@ * Programmer(s): Radu Serban @ LLNL * ----------------------------------------------------------------- * SUNDIALS Copyright Start - * Copyright (c) 2002-2019, Lawrence Livermore National Security + * Copyright (c) 2002-2022, Lawrence Livermore National Security * and Southern Methodist University. * All rights reserved. * @@ -19,38 +19,176 @@ #include -#include -#include -#include +#include "cvodes/cvodes.h" +#include "cvodes_proj_impl.h" +#include "sundials_context_impl.h" +#include "sundials_logger_impl.h" +#include "sundials/sundials_math.h" #ifdef __cplusplus /* wrapper to enable C++ usage */ extern "C" { #endif +#if defined(SUNDIALS_EXTENDED_PRECISION) +#define RSYM ".32Lg" +#define RSYMW "19.32Lg" +#else +#define RSYM ".16g" +#define RSYMW "23.16g" +#endif + +/*=================================================================*/ +/* Shortcuts */ +/*=================================================================*/ + +#define CV_PROFILER cv_mem->cv_sunctx->profiler +#define CV_LOGGER cv_mem->cv_sunctx->logger + /* * ================================================================= - * I N T E R N A L C V O D E S C O N S T A N T S + * I N T E R N A L C O N S T A N T S * ================================================================= */ -/* Basic CVODES constants */ +/* Basic constants */ -#define ADAMS_Q_MAX 12 /* max value of q for lmm == ADAMS */ -#define BDF_Q_MAX 5 /* max value of q for lmm == BDF */ -#define Q_MAX ADAMS_Q_MAX /* max value of q for either lmm */ -#define L_MAX (Q_MAX+1) /* max value of L for either lmm */ -#define NUM_TESTS 5 /* number of error test quantities */ +#define ADAMS_Q_MAX 12 /* max value of q for lmm == ADAMS */ +#define BDF_Q_MAX 5 /* max value of q for lmm == BDF */ +#define Q_MAX ADAMS_Q_MAX /* max value of q for either lmm */ +#define L_MAX (Q_MAX+1) /* max value of L for either lmm */ +#define NUM_TESTS 5 /* number of error test quantities */ #define HMIN_DEFAULT RCONST(0.0) /* hmin default value */ #define HMAX_INV_DEFAULT RCONST(0.0) /* hmax_inv default value */ #define MXHNIL_DEFAULT 10 /* mxhnil default value */ #define MXSTEP_DEFAULT 500 /* mxstep default value */ -/* Return values for lower level routines used by CVode and functions - provided to the nonlinear solver */ +#define MSBP_DEFAULT 20 /* max steps between lsetup calls */ +#define DGMAX_LSETUP_DEFAULT RCONST(0.3) /* gamma threshold to call lsetup */ + +/* Step size change constants + * -------------------------- + * ETA_MIN_FX_DEFAULT if eta_min_fx < eta < eta_max_fx reject a change in step size or order + * ETA_MAX_FX_DEFAULT + * ETA_MAX_FS_DEFAULT -+ + * ETA_MAX_ES_DEFAULT | + * ETA_MAX_GS_DEFAULT | + * ETA_MIN_DEFAULT |-> bounds for eta (step size change) + * ETA_MAX_EF_DEFAULT | + * ETA_MIN_EF_DEFAULT | + * ETA_CF_DEFAULT -+ + * SMALL_NST_DEFAULT nst <= SMALL_NST => use eta_max_es + * SMALL_NEF_DEFAULT if small_nef <= nef <= MXNEF1, then eta = SUNMIN(eta, eta_max_ef) + * ONEPSM (1+epsilon) used in testing if the step size is below its bound + */ + +#define ETA_MIN_FX_DEFAULT RCONST(0.0) +#define ETA_MAX_FX_DEFAULT RCONST(1.5) +#define ETA_MAX_FS_DEFAULT RCONST(10000.0) +#define ETA_MAX_ES_DEFAULT RCONST(10.0) +#define ETA_MAX_GS_DEFAULT RCONST(10.0) +#define ETA_MIN_DEFAULT RCONST(0.1) +#define ETA_MAX_EF_DEFAULT RCONST(0.2) +#define ETA_MIN_EF_DEFAULT RCONST(0.1) +#define ETA_CF_DEFAULT RCONST(0.25) +#define SMALL_NST_DEFAULT 10 +#define SMALL_NEF_DEFAULT 2 +#define ONEPSM RCONST(1.000001) + +/* Step size controller constants + * ------------------------------ + * ADDON safety factor in computing eta + * BIAS1 -+ + * BIAS2 |-> bias factors in eta selection + * BIAS3 -+ + */ + +#define ADDON RCONST(0.000001) +#define BIAS1 RCONST(6.0) +#define BIAS2 RCONST(6.0) +#define BIAS3 RCONST(10.0) -#define RHSFUNC_RECVR +9 -#define SRHSFUNC_RECVR +12 +/* Order selection constants + * ------------------------- + * LONG_WAIT number of steps to wait before considering an order change when + * q==1 and MXNEF1 error test failures have occurred + */ + +#define LONG_WAIT 10 + +/* Failure limits + * -------------- + * MXNCF max no. of convergence failures during one step try + * MXNEF max no. of error test failures during one step try + * MXNEF1 max no. of error test failures before forcing a reduction of order + */ + +#define MXNCF 10 +#define MXNEF 7 +#define MXNEF1 3 + +/* Control constants for lower-level functions used by cvStep + * ---------------------------------------------------------- + * + * cvHin return values: + * CV_SUCCESS, + * CV_RHSFUNC_FAIL, CV_RPTD_RHSFUNC_ERR, + * CV_QRHSFUNC_FAIL, CV_RPTD_QRHSFUNC_ERR, + * CV_SRHSFUNC_FAIL, CV_RPTD_SRHSFUNC_ERR, + * CV_TOO_CLOSE + * + * cvStep control constants: + * DO_ERROR_TEST + * PREDICT_AGAIN + * + * cvStep return values: + * CV_SUCCESS, + * CV_CONV_FAILURE, CV_ERR_FAILURE, + * CV_LSETUP_FAIL, CV_LSOLVE_FAIL, + * CV_RTFUNC_FAIL, + * CV_RHSFUNC_FAIL, CV_QRHSFUNC_FAIL, CV_SRHSFUNC_FAIL, CV_QSRHSFUNC_FAIL, + * CV_FIRST_RHSFUNC_ERR, CV_FIRST_QRHSFUNC_ERR, CV_FIRST_SRHSFUNC_ERR, CV_FIRST_QSRHSFUNC_ERR, + * CV_UNREC_RHSFUNC_ERR, CV_UNREC_QRHSFUNC_ERR, CV_UNREC_SRHSFUNC_ERR, CV_UNREC_QSRHSFUNC_ERR, + * CV_REPTD_RHSFUNC_ERR, CV_REPTD_QRHSFUNC_ERR, CV_REPTD_SRHSFUNC_ERR, CV_REPTD_QSRHSFUNC_ERR, + * + * cvNls input nflag values: + * FIRST_CALL + * PREV_CONV_FAIL + * PREV_PROJ_FAIL + * PREV_ERR_FAIL + * + * cvNls return values: + * CV_SUCCESS, + * CV_LSETUP_FAIL, CV_LSOLVE_FAIL, + * CV_RHSFUNC_FAIL, CV_SRHSFUNC_FAIL, + * SUN_NLS_CONV_RECVR, + * RHSFUNC_RECVR, SRHSFUNC_RECVR + * + * cvNewtonIteration return values: + * CV_SUCCESS, + * CV_LSOLVE_FAIL, CV_RHSFUNC_FAIL + * RHSFUNC_RECVR, + * TRY_AGAIN + * + */ + +#define DO_ERROR_TEST +2 +#define PREDICT_AGAIN +3 + +#define TRY_AGAIN +5 +#define FIRST_CALL +6 +#define PREV_CONV_FAIL +7 +#define PREV_PROJ_FAIL +8 +#define PREV_ERR_FAIL +9 + +#define RHSFUNC_RECVR +10 +#define CONSTR_RECVR +11 +#define CONSTRFUNC_RECVR +12 +#define PROJFUNC_RECVR +13 + +#define QRHSFUNC_RECVR +14 +#define SRHSFUNC_RECVR +15 +#define QSRHSFUNC_RECVR +16 /* nonlinear solver constants NLS_MAXCOR maximum no. of corrector iterations for the nonlinear solver @@ -91,28 +229,29 @@ typedef struct CVodeBMemRec *CVodeBMem; typedef struct CVodeMemRec { - realtype cv_uround; /* machine unit roundoff */ + SUNContext cv_sunctx; + + realtype cv_uround; /* machine unit roundoff */ /*-------------------------- Problem Specification Data --------------------------*/ - CVRhsFn cv_f; /* y' = f(t,y(t)) */ - void *cv_user_data; /* user pointer passed to f */ - - int cv_lmm; /* lmm = ADAMS or BDF */ + CVRhsFn cv_f; /* y' = f(t,y(t)) */ + void *cv_user_data; /* user pointer passed to f */ + int cv_lmm; /* lmm = CV_ADAMS or CV_BDF */ + int cv_itol; /* itol = CV_SS, CV_SV, CV_WF, CV_NN */ - int cv_itol; /* itol = CV_SS, CV_SV, or CV_WF, or CV_NN */ - realtype cv_reltol; /* relative tolerance */ - realtype cv_Sabstol; /* scalar absolute tolerance */ - N_Vector cv_Vabstol; /* vector absolute tolerance */ - booleantype cv_atolmin0; /* flag indicating that min(abstol) = 0 */ - booleantype cv_user_efun; /* SUNTRUE if user sets efun */ - CVEwtFn cv_efun; /* function to set ewt */ - void *cv_e_data; /* user pointer passed to efun */ + realtype cv_reltol; /* relative tolerance */ + realtype cv_Sabstol; /* scalar absolute tolerance */ + N_Vector cv_Vabstol; /* vector absolute tolerance */ + booleantype cv_atolmin0; /* flag indicating that min(abstol) = 0 */ + booleantype cv_user_efun; /* SUNTRUE if user sets efun */ + CVEwtFn cv_efun; /* function to set ewt */ + void *cv_e_data; /* user pointer passed to efun */ booleantype cv_constraintsSet; /* constraints vector present: - do constraints calc */ + do constraints calc */ /*----------------------- Quadrature Related Data @@ -182,30 +321,29 @@ typedef struct CVodeMemRec { Nordsieck History Array -----------------------*/ - N_Vector cv_zn[L_MAX]; /* Nordsieck array, of size N x (q+1). - zn[j] is a vector of length N (j=0,...,q) - zn[j] = [1/factorial(j)] * h^j * - (jth derivative of the interpolating poly.) */ + N_Vector cv_zn[L_MAX]; /* Nordsieck array, of size N x (q+1). + zn[j] is a vector of length N (j=0,...,q) + zn[j] = [1/factorial(j)] * h^j * + (jth derivative of the interpolating polynomial) */ /*------------------- Vectors of length N -------------------*/ - N_Vector cv_ewt; /* error weight vector */ - N_Vector cv_y; /* y is used as temporary storage by the solver. - The memory is provided by the user to CVode - where the vector is named yout. */ - N_Vector cv_acor; /* In the context of the solution of the - nonlinear equation, acor = y_n(m) - y_n(0). - On return, this vector is scaled to give - the estimated local error in y. */ - N_Vector cv_tempv; /* temporary storage vector */ - N_Vector cv_ftemp; /* temporary storage vector */ - N_Vector cv_vtemp1; /* temporary storage vector */ - N_Vector cv_vtemp2; /* temporary storage vector */ - N_Vector cv_vtemp3; /* temporary storage vector */ - - N_Vector cv_constraints; /* vector of inequality constraint options */ + N_Vector cv_ewt; /* error weight vector */ + N_Vector cv_y; /* y is used as temporary storage by the solver + The memory is provided by the user to CVode + where the vector is named yout. */ + N_Vector cv_acor; /* In the context of the solution of the nonlinear + equation, acor = y_n(m) - y_n(0). On return, this + vector is scaled to give the estimated local error */ + N_Vector cv_tempv; /* temporary storage vector */ + N_Vector cv_ftemp; /* temporary storage vector */ + N_Vector cv_vtemp1; /* temporary storage vector */ + N_Vector cv_vtemp2; /* temporary storage vector */ + N_Vector cv_vtemp3; /* temporary storage vector */ + + N_Vector cv_constraints; /* vector of inequality constraint options */ /*-------------------------- Quadrature Related Vectors @@ -254,25 +392,25 @@ typedef struct CVodeMemRec { int cv_q; /* current order */ int cv_qprime; /* order to be used on the next step - * qprime = q-1, q, or q+1 */ + qprime = q-1, q, or q+1 */ int cv_next_q; /* order to be used on the next step */ int cv_qwait; /* number of internal steps to wait before - * considering a change in q */ + considering a change in q */ int cv_L; /* L = q + 1 */ - realtype cv_hin; + realtype cv_hin; /* initial step size */ realtype cv_h; /* current step size */ realtype cv_hprime; /* step size to be used on the next step */ realtype cv_next_h; /* step size to be used on the next step */ realtype cv_eta; /* eta = hprime / h */ realtype cv_hscale; /* value of h used in zn */ realtype cv_tn; /* current internal value of t */ - realtype cv_tretlast; /* last value of t returned */ + realtype cv_tretlast; /* last value of t returned by CVode */ realtype cv_tau[L_MAX+1]; /* array of previous q+1 successful step - * sizes indexed from 1 to q+1 */ + sizes indexed from 1 to q+1 */ realtype cv_tq[NUM_TESTS+1]; /* array of test quantities indexed from - * 1 to NUM_TESTS(=5) */ + 1 to NUM_TESTS(=5) */ realtype cv_l[L_MAX]; /* coefficients of l(x) (degree q poly) */ realtype cv_rl1; /* the scalar 1/l[1] */ @@ -280,8 +418,8 @@ typedef struct CVodeMemRec { realtype cv_gammap; /* gamma at the last setup call */ realtype cv_gamrat; /* gamma / gammap */ - realtype cv_crate; /* est. corrector conv. rate in Nls */ - realtype cv_crateS; /* est. corrector conv. rate in NlsStgr */ + realtype cv_crate; /* estimated corrector convergence rate */ + realtype cv_crateS; /* estimated corrector convergence rate (Stgr) */ realtype cv_delp; /* norm of previous nonlinear solver update */ realtype cv_acnrm; /* | acor | */ booleantype cv_acnrmcur; /* is | acor | current? */ @@ -297,21 +435,32 @@ typedef struct CVodeMemRec { Limits ------*/ - int cv_qmax; /* q <= qmax */ - long int cv_mxstep; /* maximum number of internal steps for one - user call */ - int cv_mxhnil; /* max. number of warning messages issued to the - user that t + h == t for the next internal step */ - int cv_maxnef; /* maximum number of error test failures */ - int cv_maxncf; /* maximum number of nonlinear conv. failures */ - - realtype cv_hmin; /* |h| >= hmin */ - realtype cv_hmax_inv; /* |h| <= 1/hmax_inv */ - realtype cv_etamax; /* eta <= etamax */ - - /*---------- + int cv_qmax; /* q <= qmax */ + long int cv_mxstep; /* maximum number of internal steps for one user call */ + int cv_mxhnil; /* maximum number of warning messages issued to the + user that t + h == t for the next internal step */ + int cv_maxnef; /* maximum number of error test failures */ + int cv_maxncf; /* maximum number of nonlinear convergence failures */ + + realtype cv_hmin; /* |h| >= hmin */ + realtype cv_hmax_inv; /* |h| <= 1/hmax_inv */ + realtype cv_etamax; /* eta <= etamax */ + realtype cv_eta_min_fx; /* eta_min_fx < eta < eta_max_fx keep the current h */ + realtype cv_eta_max_fx; + realtype cv_eta_max_fs; /* eta <= eta_max_fs on the first step */ + realtype cv_eta_max_es; /* eta <= eta_max_es on early steps */ + realtype cv_eta_max_gs; /* eta <= eta_max_gs on a general step */ + realtype cv_eta_min; /* eta >= eta_min on a general step */ + realtype cv_eta_min_ef; /* eta >= eta_min_ef after an error test failure */ + realtype cv_eta_max_ef; /* eta on multiple (>= small_nef) error test failures */ + realtype cv_eta_cf; /* eta on a nonlinear solver convergence failure */ + + long int cv_small_nst; /* nst <= small_nst use eta_max_es */ + int cv_small_nef; /* nef >= small_nef use eta_max_ef */ + + /*-------- Counters - ----------*/ + --------*/ long int cv_nst; /* number of internal steps taken */ @@ -322,7 +471,6 @@ typedef struct CVodeMemRec { long int cv_nfQSe; /* number of fQS calls */ long int cv_nfQeS; /* number of fQ calls from sensi DQ */ - long int cv_ncfn; /* number of corrector convergence failures */ long int cv_ncfnS; /* number of total sensi. corr. conv. failures */ long int *cv_ncfnS1; /* number of sensi. corrector conv. failures */ @@ -331,6 +479,10 @@ typedef struct CVodeMemRec { long int cv_nniS; /* number of total sensi. nonlinear iterations */ long int *cv_nniS1; /* number of sensi. nonlinear iterations */ + long int cv_nnf; /* number of nonlinear convergence fails */ + long int cv_nnfS; /* number of total sensi. nonlinear conv. fails */ + long int *cv_nnfS1; /* number of sensi. nonlinear conv. fails */ + long int cv_netf; /* number of error test failures */ long int cv_netfQ; /* number of quadr. error test failures */ long int cv_netfS; /* number of sensi. error test failures */ @@ -340,18 +492,7 @@ typedef struct CVodeMemRec { long int cv_nsetupsS; /* number of setup calls due to sensitivities */ int cv_nhnil; /* number of messages issued to the user that - t + h == t for the next iternal step */ - - /*----------------------------- - Space requirements for CVODES - -----------------------------*/ - - sunindextype cv_lrw1; /* no. of realtype words in 1 N_Vector y */ - sunindextype cv_liw1; /* no. of integer words in 1 N_Vector y */ - sunindextype cv_lrw1Q; /* no. of realtype words in 1 N_Vector yQ */ - sunindextype cv_liw1Q; /* no. of integer words in 1 N_Vector yQ */ - long int cv_lrw; /* no. of realtype words in CVODES work vectors */ - long int cv_liw; /* no. of integer words in CVODES work vectors */ + t + h == t for the next iternal step */ /*---------------- Step size ratios @@ -361,12 +502,23 @@ typedef struct CVodeMemRec { realtype cv_etaq; /* ratio of new to old h for order q */ realtype cv_etaqp1; /* ratio of new to old h for order q+1 */ + /*------------------ + Space requirements + ------------------*/ + + sunindextype cv_lrw1; /* no. of realtype words in 1 N_Vector y */ + sunindextype cv_liw1; /* no. of integer words in 1 N_Vector y */ + sunindextype cv_lrw1Q; /* no. of realtype words in 1 N_Vector yQ */ + sunindextype cv_liw1Q; /* no. of integer words in 1 N_Vector yQ */ + long int cv_lrw; /* no. of realtype words in CVODE work vectors */ + long int cv_liw; /* no. of integer words in CVODE work vectors */ + /*--------------------- Nonlinear Solver Data ---------------------*/ - SUNNonlinearSolver NLS; /* nonlinear solver object for ODE solves */ - booleantype ownNLS; /* flag indicating NLS ownership */ + SUNNonlinearSolver NLS; /* nonlinear solver object */ + booleantype ownNLS; /* flag indicating NLS ownership */ SUNNonlinearSolver NLSsim; /* NLS object for the simultaneous corrector */ booleantype ownNLSsim; /* flag indicating NLS ownership */ @@ -381,6 +533,7 @@ typedef struct CVodeMemRec { booleantype sens_solve; /* flag indicating if the current solve is a staggered or staggered1 sensitivity solve */ + CVRhsFn nls_f; /* f(t,y(t)) used in the nonlinear solver */ int convfail; /* flag to indicate when a Jacobian update may be needed */ @@ -413,21 +566,21 @@ typedef struct CVodeMemRec { int (*cv_linit)(struct CVodeMemRec *cv_mem); int (*cv_lsetup)(struct CVodeMemRec *cv_mem, int convfail, - N_Vector ypred, N_Vector fpred, booleantype *jcurPtr, - N_Vector vtemp1, N_Vector vtemp2, N_Vector vtemp3); + N_Vector ypred, N_Vector fpred, booleantype *jcurPtr, + N_Vector vtemp1, N_Vector vtemp2, N_Vector vtemp3); int (*cv_lsolve)(struct CVodeMemRec *cv_mem, N_Vector b, N_Vector weight, - N_Vector ycur, N_Vector fcur); + N_Vector ycur, N_Vector fcur); int (*cv_lfree)(struct CVodeMemRec *cv_mem); /* Linear Solver specific memory */ - void *cv_lmem; - - /* Flag to request a call to the setup routine */ - - booleantype cv_forceSetup; + void *cv_lmem; /* linear solver interface memory structure */ + long int cv_msbp; /* max number of steps between lsetip calls */ + realtype cv_dgmax_lsetup; /* gamma ratio threshold to signal for a linear + * solver setup */ + booleantype cv_forceSetup; /* flag to request a call to the setup routine */ /*------------ Saved Values @@ -441,11 +594,11 @@ typedef struct CVodeMemRec { booleantype cv_jcur; /* is Jacobian info for linear solver current? */ int cv_convfail; /* flag storing previous solver failure mode */ realtype cv_tolsf; /* tolerance scale factor */ - int cv_qmax_alloc; /* qmax used when allocating mem */ + int cv_qmax_alloc; /* value of qmax used when allocating mem */ int cv_qmax_allocQ; /* qmax used when allocating quad. mem */ int cv_qmax_allocS; /* qmax used when allocating sensi. mem */ int cv_qmax_allocQS; /* qmax used when allocating quad. sensi. mem */ - int cv_indx_acor; /* index of zn vector in which acor is saved */ + int cv_indx_acor; /* index of the zn vector with saved acor */ /*-------------------------------------------------------------------- Flags turned ON by CVodeInit, CVodeSensMalloc, and CVodeQuadMalloc @@ -471,15 +624,21 @@ typedef struct CVodeMemRec { Error handler function and error ouput file -------------------------------------------*/ - CVErrHandlerFn cv_ehfun; /* Error messages are handled by ehfun */ - void *cv_eh_data; /* dats pointer passed to ehfun */ - FILE *cv_errfp; /* CVODES error messages are sent to errfp */ + CVErrHandlerFn cv_ehfun; /* error messages are handled by ehfun */ + void *cv_eh_data; /* data pointer passed to ehfun */ + FILE *cv_errfp; /* CVODE error messages are sent to errfp */ + + /*------------------------------------------- + User access function + -------------------------------------------*/ + CVMonitorFn cv_monitorfun; /* func called with CVODE mem and user data */ + long int cv_monitor_interval; /* step interval to call cv_monitorfun */ /*------------------------- Stability Limit Detection -------------------------*/ - booleantype cv_sldeton; /* Is Stability Limit Detection on? */ + booleantype cv_sldeton; /* is Stability Limit Detection on? */ realtype cv_ssdat[6][4]; /* scaled data array for STALD */ int cv_nscon; /* counter for STALD method */ long int cv_nor; /* counter for number of order reductions */ @@ -488,7 +647,7 @@ typedef struct CVodeMemRec { Rootfinding Data ----------------*/ - CVRootFn cv_gfun; /* Function g for roots sought */ + CVRootFn cv_gfun; /* function g for roots sought */ int cv_nrtfn; /* number of components of g */ int *cv_iroots; /* array for root information */ int *cv_rootdir; /* array specifying direction of zero-crossing */ @@ -506,6 +665,15 @@ typedef struct CVodeMemRec { booleantype *cv_gactive; /* array with active/inactive event functions */ int cv_mxgnull; /* number of warning messages about possible g==0 */ + /*--------------- + Projection Data + ---------------*/ + + CVodeProjMem proj_mem; /* projection memory structure */ + booleantype proj_enabled; /* flag indicating if projection is enabled */ + booleantype proj_applied; /* flag indicating if projection was applied */ + realtype proj_p[L_MAX]; /* coefficients of p(x) (degree q poly) */ + /*----------------------- Fused Vector Operations -----------------------*/ @@ -955,7 +1123,7 @@ struct CVadjMemRec { /* * ================================================================= - * C V O D E S I N T E R N A L F U N C T I O N S + * I N T E R N A L F U N C T I O N S * ================================================================= */ @@ -966,21 +1134,41 @@ realtype cvSensNorm(CVodeMem cv_mem, N_Vector *xS, N_Vector *wS); realtype cvSensUpdateNorm(CVodeMem cv_mem, realtype old_nrm, N_Vector *xS, N_Vector *wS); - /* Prototype of internal ewtSet function */ int cvEwtSet(N_Vector ycur, N_Vector weight, void *data); /* High level error handler */ -void cvProcessError(CVodeMem cv_mem, - int error_code, const char *module, const char *fname, - const char *msgfmt, ...); +void cvProcessError(CVodeMem cv_mem, int error_code, const char *module, + const char *fname, const char *msgfmt, ...); -/* Prototype of internal errHandler function */ +/* Prototype of internal ErrHandler function */ void cvErrHandler(int error_code, const char *module, const char *function, - char *msg, void *data); + char *msg, void *data); + +/* Nonlinear solver initialization */ + +int cvNlsInit(CVodeMem cv_mem); +int cvNlsInitSensSim(CVodeMem cv_mem); +int cvNlsInitSensStg(CVodeMem cv_mem); +int cvNlsInitSensStg1(CVodeMem cv_mem); + +/* Projection functions */ + +int cvDoProjection(CVodeMem cv_mem, int *nflagPtr, realtype saved_t, + int *npfPtr); +int cvProjInit(CVodeProjMem proj_mem); +int cvProjFree(CVodeProjMem *proj_mem); + +/* Restore tn and undo prediction to reattempt a step */ + +void cvRestore(CVodeMem cv_mem, realtype saved_t); + +/* Reset h and rescale history array to prepare for a step */ + +void cvRescale(CVodeMem cv_mem); /* Prototypes for internal sensitivity rhs wrappers */ @@ -1008,15 +1196,9 @@ int cvSensRhs1InternalDQ(int Ns, realtype t, void *fS_data, N_Vector tempv, N_Vector ftemp); -/* Nonlinear solver functions */ -int cvNlsInit(CVodeMem cv_mem); -int cvNlsInitSensSim(CVodeMem cv_mem); -int cvNlsInitSensStg(CVodeMem cv_mem); -int cvNlsInitSensStg1(CVodeMem cv_mem); - /* * ================================================================= - * C V O D E S E R R O R M E S S A G E S + * E R R O R M E S S A G E S * ================================================================= */ @@ -1046,13 +1228,13 @@ int cvNlsInitSensStg1(CVodeMem cv_mem); #endif - /* Initialization and I/O error messages */ #define MSGCV_NO_MEM "cvode_mem = NULL illegal." #define MSGCV_CVMEM_FAIL "Allocation of cvode_mem failed." #define MSGCV_MEM_FAIL "A memory request failed." #define MSGCV_BAD_LMM "Illegal value for lmm. The legal values are CV_ADAMS and CV_BDF." +#define MSGCV_NULL_SUNCTX "sunctx = NULL illegal." #define MSGCV_NO_MALLOC "Attempt to call before CVodeInit." #define MSGCV_NEG_MAXORD "maxord <= 0 illegal." #define MSGCV_BAD_MAXORD "Illegal attempt to increase maximum method order." @@ -1143,6 +1325,15 @@ int cvNlsInitSensStg1(CVodeMem cv_mem); #define MSGCV_NLS_INPUT_NULL "At " MSG_TIME ", the nonlinear solver was passed a NULL input." #define MSGCV_NLS_FAIL "At " MSG_TIME ", the nonlinear solver failed in an unrecoverable manner." +/* CVode Projection Error Messages */ + +#define MSG_CV_MEM_NULL "cvode_mem = NULL illegal." +#define MSG_CV_MEM_FAIL "A memory request failed." + +#define MSG_CV_PROJ_MEM_NULL "proj_mem = NULL illegal." +#define MSG_CV_PROJFUNC_FAIL "At " MSG_TIME " the projection function failed with an unrecoverable error." +#define MSG_CV_REPTD_PROJFUNC_ERR "At " MSG_TIME " the projection function had repeated recoverable errors." + #define MSGCV_NO_TOLQ "No integration tolerances for quadrature variables have been specified." #define MSGCV_BAD_EWTQ "Initial ewtQ has component(s) equal to zero (illegal)." #define MSGCV_EWTQ_NOW_BAD "At " MSG_TIME ", a component of ewtQ has become <= 0." @@ -1171,7 +1362,7 @@ int cvNlsInitSensStg1(CVodeMem cv_mem); /* * ================================================================= - * C V O D E A E R R O R M E S S A G E S + * A D J O I N T E R R O R M E S S A G E S * ================================================================= */ diff --git a/src/lib/cvodes/cvodes_io.c b/src/lib/cvodes/cvodes_io.c index 8b062e8..56dceda 100644 --- a/src/lib/cvodes/cvodes_io.c +++ b/src/lib/cvodes/cvodes_io.c @@ -1,12 +1,8 @@ -/* - * ----------------------------------------------------------------- - * $Revision$ - * $Date$ - * ----------------------------------------------------------------- +/* ----------------------------------------------------------------- * Programmer(s): Alan C. Hindmarsh and Radu Serban @ LLNL * ----------------------------------------------------------------- * SUNDIALS Copyright Start - * Copyright (c) 2002-2019, Lawrence Livermore National Security + * Copyright (c) 2002-2022, Lawrence Livermore National Security * and Southern Methodist University. * All rights reserved. * @@ -17,16 +13,14 @@ * ----------------------------------------------------------------- * This is the implementation file for the optional input and output * functions for the CVODES solver. - * ----------------------------------------------------------------- - */ + * ----------------------------------------------------------------- */ #include #include #include "cvodes_impl.h" - -#include -#include +#include "cvodes_ls_impl.h" +#include "sundials/sundials_types.h" #define ZERO RCONST(0.0) #define HALF RCONST(0.5) @@ -39,6 +33,34 @@ * ================================================================= */ + +/* + * CVodeSetDeltaGammaMaxLSetup + * + * Specifies the gamma ratio threshold to signal for a linear solver setup + */ + +int CVodeSetDeltaGammaMaxLSetup(void *cvode_mem, realtype dgmax_lsetup) +{ + CVodeMem cv_mem; + + if (cvode_mem == NULL) { + cvProcessError(NULL, CV_MEM_NULL, "CVODES", "CVodeSetDeltaGammaMaxLSetup", + MSGCV_NO_MEM); + return(CV_MEM_NULL); + } + + cv_mem = (CVodeMem) cvode_mem; + + /* Set value or use default */ + if(dgmax_lsetup < ZERO) + cv_mem->cv_dgmax_lsetup = DGMAX_LSETUP_DEFAULT; + else + cv_mem->cv_dgmax_lsetup = dgmax_lsetup; + + return(CV_SUCCESS); +} + /* * CVodeSetErrHandlerFn * @@ -106,6 +128,64 @@ int CVodeSetUserData(void *cvode_mem, void *user_data) return(CV_SUCCESS); } +/* + * CVodeSetMonitorFn + * + * Specifies the user function to call for monitoring + * the solution and/or integrator statistics. + */ + +int CVodeSetMonitorFn(void *cvode_mem, CVMonitorFn fn) +{ + CVodeMem cv_mem; + + if (cvode_mem==NULL) { + cvProcessError(NULL, CV_MEM_NULL, "CVODES", "CVodeSetMonitorFn", MSGCV_NO_MEM); + return(CV_MEM_NULL); + } + + cv_mem = (CVodeMem) cvode_mem; + +#ifdef SUNDIALS_BUILD_WITH_MONITORING + cv_mem->cv_monitorfun = fn; + return(CV_SUCCESS); +#else + cvProcessError(cv_mem, CV_ILL_INPUT, "CVODES", "CVodeSetMonitorFn", "SUNDIALS was not built with monitoring enabled."); + return(CV_ILL_INPUT); +#endif +} + +/* + * CVodeSetMonitorFrequency + * + * Specifies the frequency with which to call the user function. + */ + +int CVodeSetMonitorFrequency(void *cvode_mem, long int nst) +{ + CVodeMem cv_mem; + + if (cvode_mem==NULL) { + cvProcessError(NULL, CV_MEM_NULL, "CVODES", "CVodeSetMonitorFrequency", MSGCV_NO_MEM); + return(CV_MEM_NULL); + } + + if (nst < 0) { + cvProcessError(NULL, CV_ILL_INPUT, "CVODES", "CVodeSetMonitorFrequency", "step interval must be >= 0\n"); + return(CV_ILL_INPUT); + } + + cv_mem = (CVodeMem) cvode_mem; + +#ifdef SUNDIALS_BUILD_WITH_MONITORING + cv_mem->cv_monitor_interval = nst; + return(CV_SUCCESS); +#else + cvProcessError(cv_mem, CV_ILL_INPUT, "CVODES", "CVodeSetMonitorFrequency", "SUNDIALS was not built with monitoring enabled."); + return(CV_ILL_INPUT); +#endif +} + /* * CVodeSetMaxOrd * @@ -259,7 +339,7 @@ int CVodeSetMinStep(void *cvode_mem, realtype hmin) cv_mem = (CVodeMem) cvode_mem; - if (hmin<0) { + if (hmin < ZERO) { cvProcessError(cv_mem, CV_ILL_INPUT, "CVODES", "CVodeSetMinStep", MSGCV_NEG_HMIN); return(CV_ILL_INPUT); } @@ -298,7 +378,7 @@ int CVodeSetMaxStep(void *cvode_mem, realtype hmax) cv_mem = (CVodeMem) cvode_mem; - if (hmax < 0) { + if (hmax < ZERO) { cvProcessError(cv_mem, CV_ILL_INPUT, "CVODES", "CVodeSetMaxStep", MSGCV_NEG_HMAX); return(CV_ILL_INPUT); } @@ -320,6 +400,287 @@ int CVodeSetMaxStep(void *cvode_mem, realtype hmax) return(CV_SUCCESS); } +/* + * CVodeSetEtaFixedStepBounds + * + * Specifies the bounds for retaining the current step size + */ + + +int CVodeSetEtaFixedStepBounds(void* cvode_mem, realtype eta_min_fx, + realtype eta_max_fx) +{ + CVodeMem cv_mem; + + if (cvode_mem == NULL) { + cvProcessError(NULL, CV_MEM_NULL, "CVODES", "CVodeSetEtaFixedStepBounds", + MSGCV_NO_MEM); + return (CV_MEM_NULL); + } + + cv_mem = (CVodeMem) cvode_mem; + + /* set allowed value or use default */ + if (eta_min_fx < ZERO || eta_min_fx >= ONE) + cv_mem->cv_eta_min_fx = ETA_MIN_FX_DEFAULT; + else + cv_mem->cv_eta_min_fx = eta_min_fx; + + if (eta_max_fx <= ONE) + cv_mem->cv_eta_max_fx = ETA_MAX_FX_DEFAULT; + else + cv_mem->cv_eta_max_fx = eta_max_fx; + + return(CV_SUCCESS); +} + +/* + * CVodeSetEtaMaxFirstStep + * + * Specifies the maximum step size change on the first step + */ + +int CVodeSetEtaMaxFirstStep(void* cvode_mem, realtype eta_max_fs) +{ + CVodeMem cv_mem; + + if (cvode_mem == NULL) { + cvProcessError(NULL, CV_MEM_NULL, "CVODES", "CVodeSetEtaMaxFirstStep", + MSGCV_NO_MEM); + return (CV_MEM_NULL); + } + + cv_mem = (CVodeMem) cvode_mem; + + /* set allowed value or use default */ + if (eta_max_fs <= ONE) + cv_mem->cv_eta_max_fs = ETA_MAX_FS_DEFAULT; + else + cv_mem->cv_eta_max_fs = eta_max_fs; + + return(CV_SUCCESS); +} + +/* + * CVodeSetEtaMaxEarlyStep + * + * Specifies the maximum step size change on steps early in the integration + * when nst <= small_nst + */ + +int CVodeSetEtaMaxEarlyStep(void* cvode_mem, realtype eta_max_es) +{ + CVodeMem cv_mem; + + if (cvode_mem == NULL) { + cvProcessError(NULL, CV_MEM_NULL, "CVODES", "CVodeSetEtaMaxEarlyStep", + MSGCV_NO_MEM); + return (CV_MEM_NULL); + } + + cv_mem = (CVodeMem) cvode_mem; + + /* set allowed value or use default */ + if (eta_max_es <= ONE) + cv_mem->cv_eta_max_es = ETA_MAX_ES_DEFAULT; + else + cv_mem->cv_eta_max_es = eta_max_es; + + return(CV_SUCCESS); +} + +/* + * CVodeSetNumStepsEtaMaxEarlyStep + * + * Specifies the maximum number of steps for using the early integration change + * factor + */ + +int CVodeSetNumStepsEtaMaxEarlyStep(void* cvode_mem, long int small_nst) +{ + CVodeMem cv_mem; + + if (cvode_mem == NULL) { + cvProcessError(NULL, CV_MEM_NULL, "CVODES", "CVodeSetNumStepsEtaMaxEarlyStep", + MSGCV_NO_MEM); + return (CV_MEM_NULL); + } + + cv_mem = (CVodeMem) cvode_mem; + + /* set allowed value or use default */ + if (small_nst < 0) + cv_mem->cv_small_nst = SMALL_NST_DEFAULT; + else + cv_mem->cv_small_nst = small_nst; + + return(CV_SUCCESS); +} + +/* + * CVodeSetEtaMax + * + * Specifies the maximum step size change on a general steps (nst > small_nst) + */ + +int CVodeSetEtaMax(void* cvode_mem, realtype eta_max_gs) +{ + CVodeMem cv_mem; + + if (cvode_mem == NULL) { + cvProcessError(NULL, CV_MEM_NULL, "CVODES", "CVodeSetEtaMax", + MSGCV_NO_MEM); + return (CV_MEM_NULL); + } + + cv_mem = (CVodeMem) cvode_mem; + + /* set allowed value or use default */ + if (eta_max_gs <= ONE) + cv_mem->cv_eta_max_gs = ETA_MAX_GS_DEFAULT; + else + cv_mem->cv_eta_max_gs = eta_max_gs; + + return(CV_SUCCESS); +} + +/* + * CVodeSetEtaMin + * + * Specifies the minimum change on a general steps + */ + +int CVodeSetEtaMin(void* cvode_mem, realtype eta_min) +{ + CVodeMem cv_mem; + + if (cvode_mem == NULL) { + cvProcessError(NULL, CV_MEM_NULL, "CVODES", "CVodeSetEtaMin", + MSGCV_NO_MEM); + return (CV_MEM_NULL); + } + + cv_mem = (CVodeMem) cvode_mem; + + /* set allowed value or use default */ + if (eta_min <= ZERO || eta_min >= ONE) + cv_mem->cv_eta_min = ETA_MIN_DEFAULT; + else + cv_mem->cv_eta_min = eta_min; + + return(CV_SUCCESS); +} + +/* + * CVodeSetEtaMinErrFail + * + * Specifies the minimum step size change after an error test failure + */ + +int CVodeSetEtaMinErrFail(void* cvode_mem, realtype eta_min_ef) +{ + CVodeMem cv_mem; + + if (cvode_mem == NULL) { + cvProcessError(NULL, CV_MEM_NULL, "CVODES", "CVodeSetEtaMinErrFail", + MSGCV_NO_MEM); + return (CV_MEM_NULL); + } + + cv_mem = (CVodeMem) cvode_mem; + + /* set allowed value or use default */ + if (eta_min_ef <= ZERO || eta_min_ef >= ONE) + cv_mem->cv_eta_min_ef = ETA_MIN_EF_DEFAULT; + else + cv_mem->cv_eta_min_ef = eta_min_ef; + + return(CV_SUCCESS); +} + +/* + * CVodeSetEtaMaxErrFail + * + * Specifies the maximum step size change after multiple (>= small_nef) error + * test failures + */ + +int CVodeSetEtaMaxErrFail(void* cvode_mem, realtype eta_max_ef) +{ + CVodeMem cv_mem; + + if (cvode_mem == NULL) { + cvProcessError(NULL, CV_MEM_NULL, "CVODES", "CVodeSetEtaMaxErrFail", + MSGCV_NO_MEM); + return (CV_MEM_NULL); + } + + cv_mem = (CVodeMem) cvode_mem; + + /* set allowed value or use default */ + if (eta_max_ef <= ZERO || eta_max_ef >= ONE) + cv_mem->cv_eta_max_ef = ETA_MAX_EF_DEFAULT; + else + cv_mem->cv_eta_max_ef = eta_max_ef; + + return(CV_SUCCESS); +} + +/* + * CVodeSetNumFailsEtaMaxErrFail + * + * Specifies the maximum number of error test failures necessary to enforce + * eta_max_ef + */ + +int CVodeSetNumFailsEtaMaxErrFail(void* cvode_mem, int small_nef) +{ + CVodeMem cv_mem; + + if (cvode_mem == NULL) { + cvProcessError(NULL, CV_MEM_NULL, "CVODES", "CVodeSetNumFailsEtaMaxErrFail", + MSGCV_NO_MEM); + return (CV_MEM_NULL); + } + + cv_mem = (CVodeMem) cvode_mem; + + /* set allowed value or use default */ + if (small_nef < 0) + cv_mem->cv_small_nef = SMALL_NEF_DEFAULT; + else + cv_mem->cv_small_nef = small_nef; + + return(CV_SUCCESS); +} + +/* + * CVodeSetEtaConvFail + * + * Specifies the step size change after a nonlinear solver failure + */ + +int CVodeSetEtaConvFail(void* cvode_mem, realtype eta_cf) +{ + CVodeMem cv_mem; + + if (cvode_mem == NULL) { + cvProcessError(NULL, CV_MEM_NULL, "CVODES", "CVodeSetEtaConvFail", + MSGCV_NO_MEM); + return (CV_MEM_NULL); + } + + cv_mem = (CVodeMem) cvode_mem; + + /* set allowed value or use default */ + if (eta_cf <= ZERO || eta_cf >= ONE) + cv_mem->cv_eta_cf = ETA_CF_DEFAULT; + else + cv_mem->cv_eta_cf = eta_cf; + + return(CV_SUCCESS); +} + /* * CVodeSetStopTime * @@ -473,6 +834,38 @@ int CVodeSetNonlinConvCoef(void *cvode_mem, realtype nlscoef) return(CV_SUCCESS); } +/* + * CVodeSetLSetupFrequency + * + * Specifies the frequency for calling the linear solver setup function to + * recompute the Jacobian matrix and/or preconditioner + */ + +int CVodeSetLSetupFrequency(void *cvode_mem, long int msbp) +{ + CVodeMem cv_mem; + + if (cvode_mem == NULL) { + cvProcessError(NULL, CV_MEM_NULL, "CVODES", "CVodeSetLSetupFrequency", + MSGCV_NO_MEM); + return(CV_MEM_NULL); + } + + cv_mem = (CVodeMem) cvode_mem; + + /* check for a valid input */ + if (msbp < 0) { + cvProcessError(cv_mem, CV_ILL_INPUT, "CVODES", "CVodeSetLSetupFrequency", + "A negative setup frequency was provided"); + return(CV_ILL_INPUT); + } + + /* use default or user provided value */ + cv_mem->cv_msbp = (msbp == 0) ? MSBP_DEFAULT : msbp; + + return(CV_SUCCESS); +} + /* * CVodeSetRootDirection * @@ -503,7 +896,6 @@ int CVodeSetRootDirection(void *cvode_mem, int *rootdir) return(CV_SUCCESS); } - /* * CVodeSetNoInactiveRootWarn * @@ -1244,16 +1636,16 @@ int CVodeGetIntegratorStats(void *cvode_mem, long int *nsteps, long int *nfevals cv_mem = (CVodeMem) cvode_mem; - *nsteps = cv_mem->cv_nst; - *nfevals = cv_mem->cv_nfe; + *nsteps = cv_mem->cv_nst; + *nfevals = cv_mem->cv_nfe; *nlinsetups = cv_mem->cv_nsetups; - *netfails = cv_mem->cv_netf; - *qlast = cv_mem->cv_qu; - *qcur = cv_mem->cv_next_q; - *hinused = cv_mem->cv_h0u; - *hlast = cv_mem->cv_hu; - *hcur = cv_mem->cv_next_h; - *tcur = cv_mem->cv_tn; + *netfails = cv_mem->cv_netf; + *qlast = cv_mem->cv_qu; + *qcur = cv_mem->cv_next_q; + *hinused = cv_mem->cv_h0u; + *hlast = cv_mem->cv_hu; + *hcur = cv_mem->cv_next_h; + *tcur = cv_mem->cv_tn; return(CV_SUCCESS); } @@ -1315,42 +1707,16 @@ int CVodeGetRootInfo(void *cvode_mem, int *rootsfound) int CVodeGetNumNonlinSolvIters(void *cvode_mem, long int *nniters) { CVodeMem cv_mem; - booleantype sensi_sim; if (cvode_mem==NULL) { - cvProcessError(NULL, CV_MEM_NULL, "CVODES", - "CVodeGetNumNonlinSolvIters", MSGCV_NO_MEM); + cvProcessError(NULL, CV_MEM_NULL, "CVODES", "CVodeGetNumNonlinSolvIters", + MSGCV_NO_MEM); return(CV_MEM_NULL); } cv_mem = (CVodeMem) cvode_mem; - /* are we computing sensitivities with the simultaneous approach? */ - sensi_sim = (cv_mem->cv_sensi && (cv_mem->cv_ism==CV_SIMULTANEOUS)); - - /* get number of iterations from the NLS */ - if (sensi_sim) { - - /* check that the NLS is non-NULL */ - if (cv_mem->NLSsim == NULL) { - cvProcessError(NULL, CV_MEM_FAIL, "CVODES", - "CVodeGetNumNonlinSolvIters", MSGCV_MEM_FAIL); - return(CV_MEM_FAIL); - } - - return(SUNNonlinSolGetNumIters(cv_mem->NLSsim, nniters)); - - } else { - - /* check that the NLS is non-NULL */ - if (cv_mem->NLS == NULL) { - cvProcessError(NULL, CV_MEM_FAIL, "CVODES", - "CVodeGetNumNonlinSolvIters", MSGCV_MEM_FAIL); - return(CV_MEM_FAIL); - } - - return(SUNNonlinSolGetNumIters(cv_mem->NLS, nniters)); - } + *nniters = cv_mem->cv_nni; return(CV_SUCCESS); } @@ -1362,7 +1728,7 @@ int CVodeGetNumNonlinSolvIters(void *cvode_mem, long int *nniters) * nonlinear solver */ -int CVodeGetNumNonlinSolvConvFails(void *cvode_mem, long int *nncfails) +int CVodeGetNumNonlinSolvConvFails(void *cvode_mem, long int *nnfails) { CVodeMem cv_mem; @@ -1373,7 +1739,7 @@ int CVodeGetNumNonlinSolvConvFails(void *cvode_mem, long int *nncfails) cv_mem = (CVodeMem) cvode_mem; - *nncfails = cv_mem->cv_ncfn; + *nnfails = cv_mem->cv_nnf; return(CV_SUCCESS); } @@ -1385,52 +1751,48 @@ int CVodeGetNumNonlinSolvConvFails(void *cvode_mem, long int *nncfails) */ int CVodeGetNonlinSolvStats(void *cvode_mem, long int *nniters, - long int *nncfails) + long int *nnfails) { CVodeMem cv_mem; - booleantype sensi_sim; if (cvode_mem==NULL) { - cvProcessError(NULL, CV_MEM_NULL, "CVODES", - "CVodeGetNonlinSolvStats", MSGCV_NO_MEM); + cvProcessError(NULL, CV_MEM_NULL, "CVODES", "CVodeGetNonlinSolvStats", + MSGCV_NO_MEM); return(CV_MEM_NULL); } cv_mem = (CVodeMem) cvode_mem; - *nncfails = cv_mem->cv_ncfn; - - /* are we computing sensitivities with the simultaneous approach? */ - sensi_sim = (cv_mem->cv_sensi && (cv_mem->cv_ism==CV_SIMULTANEOUS)); + *nniters = cv_mem->cv_nni; + *nnfails = cv_mem->cv_nnf; - /* get number of iterations from the NLS */ - if (sensi_sim) { + return(CV_SUCCESS); +} - /* check that the NLS is non-NULL */ - if (cv_mem->NLSsim == NULL) { - cvProcessError(NULL, CV_MEM_FAIL, "CVODES", - "CVodeGetNumNonlinSolvIters", MSGCV_MEM_FAIL); - return(CV_MEM_FAIL); - } +/* + * CVodeGetNumStepSolveFails + * + * Returns the current number of failed steps due to a nonlinear solver + * convergence failure + */ - return(SUNNonlinSolGetNumIters(cv_mem->NLSsim, nniters)); +int CVodeGetNumStepSolveFails(void *cvode_mem, long int *nncfails) +{ + CVodeMem cv_mem; - } else { + if (cvode_mem==NULL) { + cvProcessError(NULL, CV_MEM_NULL, "CVODES", "CVodeGetNumStepSolveFails", + MSGCV_NO_MEM); + return(CV_MEM_NULL); + } - /* check that the NLS is non-NULL */ - if (cv_mem->NLS == NULL) { - cvProcessError(NULL, CV_MEM_FAIL, "CVODES", - "CVodeGetNumNonlinSolvIters", MSGCV_MEM_FAIL); - return(CV_MEM_FAIL); - } + cv_mem = (CVodeMem) cvode_mem; - return(SUNNonlinSolGetNumIters(cv_mem->NLS, nniters)); - } + *nncfails = cv_mem->cv_ncfn; return(CV_SUCCESS); } - /* * ================================================================= * Quadrature optional output functions @@ -1791,7 +2153,6 @@ int CVodeGetSensStats(void *cvode_mem, long int *nfSevals, long int *nfevalsS, int CVodeGetSensNumNonlinSolvIters(void *cvode_mem, long int *nSniters) { CVodeMem cv_mem; - booleantype sensi_stg; if (cvode_mem==NULL) { cvProcessError(NULL, CV_MEM_NULL, "CVODES", @@ -1807,49 +2168,76 @@ int CVodeGetSensNumNonlinSolvIters(void *cvode_mem, long int *nSniters) return(CV_NO_SENS); } - /* Are we computing sensitivities with a staggered approach? */ - sensi_stg = (cv_mem->cv_sensi && (cv_mem->cv_ism==CV_STAGGERED)); + *nSniters = cv_mem->cv_nniS; - if (sensi_stg) { + return(CV_SUCCESS); +} - /* check that the NLS is non-NULL */ - if (cv_mem->NLSstg == NULL) { - cvProcessError(NULL, CV_MEM_FAIL, "CVODES", - "CVodeGetSensNumNonlinSolvIters", MSGCV_MEM_FAIL); - return(CV_MEM_FAIL); - } +/*-----------------------------------------------------------------*/ - return(SUNNonlinSolGetNumIters(cv_mem->NLSstg, nSniters)); +int CVodeGetSensNumNonlinSolvConvFails(void *cvode_mem, long int *nSnfails) +{ + CVodeMem cv_mem; - } else { + if (cvode_mem==NULL) { + cvProcessError(NULL, CV_MEM_NULL, "CVODES", "CVodeGetSensNumNonlinSolvConvFails", MSGCV_NO_MEM); + return(CV_MEM_NULL); + } - /* check that the NLS is non-NULL */ - if (cv_mem->NLSstg1 == NULL) { - cvProcessError(NULL, CV_MEM_FAIL, "CVODES", - "CVodeGetSensNumNonlinSolvIters", MSGCV_MEM_FAIL); - return(CV_MEM_FAIL); - } + cv_mem = (CVodeMem) cvode_mem; - return(SUNNonlinSolGetNumIters(cv_mem->NLSstg1, nSniters)); + if (cv_mem->cv_sensi==SUNFALSE) { + cvProcessError(cv_mem, CV_NO_SENS, "CVODES", "CVodeGetSensNumNonlinSolvConvFails", MSGCV_NO_SENSI); + return(CV_NO_SENS); } + *nSnfails = cv_mem->cv_nnfS; + + return(CV_SUCCESS); } /*-----------------------------------------------------------------*/ -int CVodeGetSensNumNonlinSolvConvFails(void *cvode_mem, long int *nSncfails) +int CVodeGetSensNonlinSolvStats(void *cvode_mem, long int *nSniters, + long int *nSnfails) { CVodeMem cv_mem; if (cvode_mem==NULL) { - cvProcessError(NULL, CV_MEM_NULL, "CVODES", "CVodeGetSensNumNonlinSolvConvFails", MSGCV_NO_MEM); + cvProcessError(NULL, CV_MEM_NULL, "CVODES", + "CVodeGetSensNonlinSolvStats", MSGCV_NO_MEM); return(CV_MEM_NULL); } cv_mem = (CVodeMem) cvode_mem; if (cv_mem->cv_sensi==SUNFALSE) { - cvProcessError(cv_mem, CV_NO_SENS, "CVODES", "CVodeGetSensNumNonlinSolvConvFails", MSGCV_NO_SENSI); + cvProcessError(cv_mem, CV_NO_SENS, "CVODES", + "CVodeGetSensNonlinSolvStats", MSGCV_NO_SENSI); + return(CV_NO_SENS); + } + + *nSniters = cv_mem->cv_nniS; + *nSnfails = cv_mem->cv_nnfS; + + return(CV_SUCCESS); +} + +int CVodeGetNumStepSensSolveFails(void *cvode_mem, long int *nSncfails) +{ + CVodeMem cv_mem; + + if (cvode_mem==NULL) { + cvProcessError(NULL, CV_MEM_NULL, "CVODES", "CVodeGetNumStepSensSolveFails", + MSGCV_NO_MEM); + return(CV_MEM_NULL); + } + + cv_mem = (CVodeMem) cvode_mem; + + if (cv_mem->cv_sensi==SUNFALSE) { + cvProcessError(cv_mem, CV_NO_SENS, "CVODES", + "CVodeGetNumStepSensSolveFails", MSGCV_NO_SENSI); return(CV_NO_SENS); } @@ -1887,7 +2275,7 @@ int CVodeGetStgrSensNumNonlinSolvIters(void *cvode_mem, long int *nSTGR1niters) /*-----------------------------------------------------------------*/ -int CVodeGetStgrSensNumNonlinSolvConvFails(void *cvode_mem, long int *nSTGR1ncfails) +int CVodeGetStgrSensNumNonlinSolvConvFails(void *cvode_mem, long int *nSTGR1nfails) { CVodeMem cv_mem; int is, Ns; @@ -1907,59 +2295,318 @@ int CVodeGetStgrSensNumNonlinSolvConvFails(void *cvode_mem, long int *nSTGR1ncfa } if(cv_mem->cv_ism==CV_STAGGERED1) - for(is=0; iscv_ncfnS1[is]; + for(is=0; iscv_nnfS1[is]; return(CV_SUCCESS); } /*-----------------------------------------------------------------*/ -int CVodeGetSensNonlinSolvStats(void *cvode_mem, long int *nSniters, - long int *nSncfails) +int CVodeGetStgrSensNonlinSolvStats(void *cvode_mem, + long int *nSTGR1niters, + long int *nSTGR1nfails) { CVodeMem cv_mem; - booleantype sensi_stg; + int is, Ns; + + if (cvode_mem == NULL) { + cvProcessError(NULL, CV_MEM_NULL, "CVODES", + "CVodeGetStgrSensNonlinSolvStats", MSGCV_NO_MEM); + return(CV_MEM_NULL); + } + + cv_mem = (CVodeMem) cvode_mem; + + Ns = cv_mem->cv_Ns; + + if (cv_mem->cv_sensi == SUNFALSE) { + cvProcessError(cv_mem, CV_NO_SENS, "CVODES", + "CVodeGetStgrSensNonlinSolvStats", MSGCV_NO_SENSI); + return(CV_NO_SENS); + } + + if(cv_mem->cv_ism == CV_STAGGERED1) { + for(is=0; iscv_nniS1[is]; + for(is=0; iscv_nnfS1[is]; + } + + return(CV_SUCCESS); +} + +/*-----------------------------------------------------------------*/ + +int CVodeGetNumStepStgrSensSolveFails(void *cvode_mem, long int *nSTGR1ncfails) +{ + CVodeMem cv_mem; + int is, Ns; if (cvode_mem==NULL) { cvProcessError(NULL, CV_MEM_NULL, "CVODES", - "CVodeGetSensNonlinSolvstats", MSGCV_NO_MEM); + "CVodeGetNumStepStgrSensSolveFails", MSGCV_NO_MEM); return(CV_MEM_NULL); } cv_mem = (CVodeMem) cvode_mem; + Ns = cv_mem->cv_Ns; + if (cv_mem->cv_sensi==SUNFALSE) { cvProcessError(cv_mem, CV_NO_SENS, "CVODES", - "CVodeGetSensNonlinSolvStats", MSGCV_NO_SENSI); + "CVodeGetNumStepStgrSensSolveFails", MSGCV_NO_SENSI); return(CV_NO_SENS); } - *nSncfails = cv_mem->cv_ncfnS; + if(cv_mem->cv_ism==CV_STAGGERED1) + for(is=0; iscv_ncfnS1[is]; - /* Are we computing sensitivities with a staggered approach? */ - sensi_stg = (cv_mem->cv_sensi && (cv_mem->cv_ism==CV_STAGGERED)); + return(CV_SUCCESS); +} - if (sensi_stg) { +/* + * CVodePrintAllStats + * + * Print all integrator statistics + */ - /* check that the NLS is non-NULL */ - if (cv_mem->NLSstg == NULL) { - cvProcessError(NULL, CV_MEM_FAIL, "CVODES", - "CVodeGetSensNumNonlinSolvStats", MSGCV_MEM_FAIL); - return(CV_MEM_FAIL); +int CVodePrintAllStats(void *cvode_mem, FILE *outfile, SUNOutputFormat fmt) +{ + CVodeMem cv_mem; + CVLsMem cvls_mem; + CVodeProjMem cvproj_mem; + int is; + + if (cvode_mem == NULL) { + cvProcessError(NULL, CV_MEM_NULL, "CVODES", "CVodePrintAllStats", + MSGCV_NO_MEM); + return(CV_MEM_NULL); + } + + cv_mem = (CVodeMem) cvode_mem; + + switch(fmt) + { + case SUN_OUTPUTFORMAT_TABLE: + /* step and method stats */ + fprintf(outfile, "Current time = %"RSYM"\n", cv_mem->cv_tn); + fprintf(outfile, "Steps = %ld\n", cv_mem->cv_nst); + fprintf(outfile, "Error test fails = %ld\n", cv_mem->cv_netf); + fprintf(outfile, "NLS step fails = %ld\n", cv_mem->cv_ncfn); + fprintf(outfile, "Initial step size = %"RSYM"\n", cv_mem->cv_h0u); + fprintf(outfile, "Last step size = %"RSYM"\n", cv_mem->cv_hu); + fprintf(outfile, "Current step size = %"RSYM"\n", cv_mem->cv_next_h); + fprintf(outfile, "Last method order = %d\n", cv_mem->cv_qu); + fprintf(outfile, "Current method order = %d\n", cv_mem->cv_next_q); + fprintf(outfile, "Stab. lim. order reductions = %ld\n", cv_mem->cv_nor); + + /* function evaluations */ + fprintf(outfile, "RHS fn evals = %ld\n", cv_mem->cv_nfe); + + /* nonlinear solver stats */ + fprintf(outfile, "NLS iters = %ld\n", cv_mem->cv_nni); + fprintf(outfile, "NLS fails = %ld\n", cv_mem->cv_nnf); + if (cv_mem->cv_nst > 0) + { + fprintf(outfile, "NLS iters per step = %"RSYM"\n", + (realtype) cv_mem->cv_nni / (realtype) cv_mem->cv_nst); } - return(SUNNonlinSolGetNumIters(cv_mem->NLSstg, nSniters)); + /* linear solver stats */ + fprintf(outfile, "LS setups = %ld\n", cv_mem->cv_nsetups); + if (cv_mem->cv_lmem) + { + cvls_mem = (CVLsMem) (cv_mem->cv_lmem); + fprintf(outfile, "Jac fn evals = %ld\n", cvls_mem->nje); + fprintf(outfile, "LS RHS fn evals = %ld\n", cvls_mem->nfeDQ); + fprintf(outfile, "Prec setup evals = %ld\n", cvls_mem->npe); + fprintf(outfile, "Prec solves = %ld\n", cvls_mem->nps); + fprintf(outfile, "LS iters = %ld\n", cvls_mem->nli); + fprintf(outfile, "LS fails = %ld\n", cvls_mem->ncfl); + fprintf(outfile, "Jac-times setups = %ld\n", cvls_mem->njtsetup); + fprintf(outfile, "Jac-times evals = %ld\n", cvls_mem->njtimes); + if (cv_mem->cv_nni > 0) + { + fprintf(outfile, "LS iters per NLS iter = %"RSYM"\n", + (realtype) cvls_mem->nli / (realtype) cv_mem->cv_nni); + fprintf(outfile, "Jac evals per NLS iter = %"RSYM"\n", + (realtype) cvls_mem->nje / (realtype) cv_mem->cv_nni); + fprintf(outfile, "Prec evals per NLS iter = %"RSYM"\n", + (realtype) cvls_mem->npe / (realtype) cv_mem->cv_nni); + } + } - } else { + /* rootfinding stats */ + fprintf(outfile, "Root fn evals = %ld\n", cv_mem->cv_nge); - /* check that the NLS is non-NULL */ - if (cv_mem->NLSstg1 == NULL) { - cvProcessError(NULL, CV_MEM_FAIL, "CVODES", - "CVodeGetSensNumNonlinSolvStats", MSGCV_MEM_FAIL); - return(CV_MEM_FAIL); + /* projection stats */ + if (cv_mem->proj_mem) + { + cvproj_mem = (CVodeProjMem) (cv_mem->proj_mem); + fprintf(outfile, "Projection fn evals = %ld\n", cvproj_mem->nproj); + fprintf(outfile, "Projection fails = %ld\n", cvproj_mem->npfails); + } + + /* quadrature stats */ + if (cv_mem->cv_quadr) + { + fprintf(outfile, "Quad fn evals = %ld\n", cv_mem->cv_nfQe); + fprintf(outfile, "Quad error test fails = %ld\n", cv_mem->cv_netfQ); + } + + /* sensitivity stats */ + if (cv_mem->cv_sensi) + { + fprintf(outfile, "Sens fn evals = %ld\n", cv_mem->cv_nfSe); + fprintf(outfile, "Sens RHS fn evals = %ld\n", cv_mem->cv_nfeS); + fprintf(outfile, "Sens error test fails = %ld\n", cv_mem->cv_netfS); + if (cv_mem->cv_ism != CV_SIMULTANEOUS) + { + fprintf(outfile, "Sens NLS iters = %ld\n", cv_mem->cv_nniS); + fprintf(outfile, "Sens NLS fails = %ld\n", cv_mem->cv_nnfS); + fprintf(outfile, "Sens NLS step fails = %ld\n", cv_mem->cv_ncfnS); + } + if (cv_mem->cv_ism == CV_STAGGERED1) + { + fprintf(outfile, "Sens stgr1 NLS iters = %ld", cv_mem->cv_nniS1[0]); + for (is = 1; is < cv_mem->cv_Ns; is++) + fprintf(outfile, ", %ld", cv_mem->cv_nniS1[is]); + fprintf(outfile, "\n"); + fprintf(outfile, "Sens stgr1 NLS fails = %ld", cv_mem->cv_nnfS1[0]); + for(is = 1; is < cv_mem->cv_Ns; is++) + fprintf(outfile, ", %ld", cv_mem->cv_nnfS1[is]); + fprintf(outfile, "\n"); + fprintf(outfile, "Sens stgr1 NLS step fails = %ld", cv_mem->cv_ncfnS1[0]); + for(is = 1; is < cv_mem->cv_Ns; is++) + fprintf(outfile, ", %ld", cv_mem->cv_ncfnS1[is]); + fprintf(outfile, "\n"); + } + fprintf(outfile, "Sens LS setups = %ld\n", cv_mem->cv_nsetupsS); + } + + /* quadrature-sensitivity stats */ + if (cv_mem->cv_quadr_sensi) + { + fprintf(outfile, "QuadSens fn evals = %ld\n", cv_mem->cv_nfQSe); + fprintf(outfile, "QuadSens error test fails = %ld\n", cv_mem->cv_netfQS); + } + break; + + case SUN_OUTPUTFORMAT_CSV: + /* step and method stats */ + fprintf(outfile, "Time,%"RSYM, cv_mem->cv_tn); + fprintf(outfile, ",Steps,%ld", cv_mem->cv_nst); + fprintf(outfile, ",Error test fails,%ld", cv_mem->cv_netf); + fprintf(outfile, ",NLS step fails,%ld", cv_mem->cv_ncfn); + fprintf(outfile, ",Initial step size,%"RSYM, cv_mem->cv_h0u); + fprintf(outfile, ",Last step size,%"RSYM, cv_mem->cv_hu); + fprintf(outfile, ",Current step size,%"RSYM, cv_mem->cv_next_h); + fprintf(outfile, ",Last method order,%d", cv_mem->cv_qu); + fprintf(outfile, ",Current method order,%d", cv_mem->cv_next_q); + fprintf(outfile, ",Stab. lim. order reductions,%ld", cv_mem->cv_nor); + + /* function evaluations */ + fprintf(outfile, ",RHS fn evals,%ld", cv_mem->cv_nfe); + + /* nonlinear solver stats */ + fprintf(outfile, ",NLS iters,%ld", cv_mem->cv_nni); + fprintf(outfile, ",NLS fails,%ld", cv_mem->cv_nnf); + if (cv_mem->cv_nst > 0) + { + fprintf(outfile, ",NLS iters per step,%"RSYM, + (realtype) cv_mem->cv_nni / (realtype) cv_mem->cv_nst); + } + else + { + fprintf(outfile, ",NLS iters per step,0"); + } + + /* linear solver stats */ + fprintf(outfile, ",LS setups,%ld", cv_mem->cv_nsetups); + if (cv_mem->cv_lmem) + { + cvls_mem = (CVLsMem) (cv_mem->cv_lmem); + fprintf(outfile, ",Jac fn evals,%ld", cvls_mem->nje); + fprintf(outfile, ",LS RHS fn evals,%ld", cvls_mem->nfeDQ); + fprintf(outfile, ",Prec setup evals,%ld", cvls_mem->npe); + fprintf(outfile, ",Prec solves,%ld", cvls_mem->nps); + fprintf(outfile, ",LS iters,%ld", cvls_mem->nli); + fprintf(outfile, ",LS fails,%ld", cvls_mem->ncfl); + fprintf(outfile, ",Jac-times setups,%ld", cvls_mem->njtsetup); + fprintf(outfile, ",Jac-times evals,%ld", cvls_mem->njtimes); + if (cv_mem->cv_nni > 0) + { + fprintf(outfile, ",LS iters per NLS iter,%"RSYM, + (realtype) cvls_mem->nli / (realtype) cv_mem->cv_nni); + fprintf(outfile, ",Jac evals per NLS iter,%"RSYM, + (realtype) cvls_mem->nje / (realtype) cv_mem->cv_nni); + fprintf(outfile, ",Prec evals per NLS iter,%"RSYM, + (realtype) cvls_mem->npe / (realtype) cv_mem->cv_nni); + } + else + { + fprintf(outfile, ",LS iters per NLS iter,0"); + fprintf(outfile, ",Jac evals per NLS iter,0"); + fprintf(outfile, ",Prec evals per NLS iter,0"); + } } - return(SUNNonlinSolGetNumIters(cv_mem->NLSstg1, nSniters)); + /* rootfinding stats */ + fprintf(outfile, ",Root fn evals,%ld", cv_mem->cv_nge); + + /* projection stats */ + if (cv_mem->proj_mem) + { + cvproj_mem = (CVodeProjMem) (cv_mem->proj_mem); + fprintf(outfile, ",Projection fn evals,%ld", cvproj_mem->nproj); + fprintf(outfile, ",Projection fails,%ld", cvproj_mem->npfails); + } + + /* quadrature stats */ + if (cv_mem->cv_quadr) + { + fprintf(outfile, ",Quad fn evals,%ld", cv_mem->cv_nfQe); + fprintf(outfile, ",Quad error test fails,%ld", cv_mem->cv_netfQ); + } + + /* sensitivity stats */ + if (cv_mem->cv_sensi) + { + fprintf(outfile, ",Sens fn evals,%ld", cv_mem->cv_nfSe); + fprintf(outfile, ",Sens RHS fn evals,%ld", cv_mem->cv_nfeS); + fprintf(outfile, ",Sens error test fails,%ld", cv_mem->cv_netfS); + if (cv_mem->cv_ism != CV_SIMULTANEOUS) + { + fprintf(outfile, ",Sens NLS iters,%ld", cv_mem->cv_nniS); + fprintf(outfile, ",Sens NLS fails,%ld", cv_mem->cv_nnfS); + fprintf(outfile, ",Sens NLS step fails,%ld", cv_mem->cv_ncfnS); + } + if (cv_mem->cv_ism == CV_STAGGERED1) + { + for (is = 0; is < cv_mem->cv_Ns; is++) + fprintf(outfile, ",Sens stgr1[%i] NLS iters,%ld", + is, cv_mem->cv_nniS1[is]); + for (is = 0; is < cv_mem->cv_Ns; is++) + fprintf(outfile, ",Sens stgr1[%i] NLS fails,%ld", + is, cv_mem->cv_nnfS1[is]); + for (is = 0; is < cv_mem->cv_Ns; is++) + fprintf(outfile, ",Sens stgr1[%i] NLS step fails,%ld", + is, cv_mem->cv_ncfnS1[is]); + } + fprintf(outfile, ",Sens LS setups,%ld", cv_mem->cv_nsetupsS); + } + + /* quadrature-sensitivity stats */ + if (cv_mem->cv_quadr_sensi) + { + fprintf(outfile, ",QuadSens fn evals,%ld", cv_mem->cv_nfQSe); + fprintf(outfile, ",QuadSens error test fails,%ld", cv_mem->cv_netfQS); + } + fprintf(outfile, "\n"); + break; + + default: + cvProcessError(cv_mem, CV_ILL_INPUT, "CVODES", "CVodePrintAllStats", + "Invalid formatting option."); + return(CV_ILL_INPUT); } return(CV_SUCCESS); @@ -1967,6 +2614,24 @@ int CVodeGetSensNonlinSolvStats(void *cvode_mem, long int *nSniters, /*-----------------------------------------------------------------*/ +int CVodeGetUserData(void *cvode_mem, void** user_data) +{ + CVodeMem cv_mem; + + if (cvode_mem==NULL) { + cvProcessError(NULL, CV_MEM_NULL, "CVODES", "CVodeGetUserData", MSGCV_NO_MEM); + return(CV_MEM_NULL); + } + + cv_mem = (CVodeMem) cvode_mem; + + *user_data = cv_mem->cv_user_data; + + return(CV_SUCCESS); +} + +/*-----------------------------------------------------------------*/ + char *CVodeGetReturnFlagName(long int flag) { char *name; diff --git a/src/lib/cvodes/cvodes_ls.c b/src/lib/cvodes/cvodes_ls.c index 6960940..d44371e 100644 --- a/src/lib/cvodes/cvodes_ls.c +++ b/src/lib/cvodes/cvodes_ls.c @@ -1,9 +1,9 @@ -/*----------------------------------------------------------------- +/* ---------------------------------------------------------------- * Programmer(s): Daniel R. Reynolds @ SMU * Radu Serban @ LLNL - *----------------------------------------------------------------- + * ---------------------------------------------------------------- * SUNDIALS Copyright Start - * Copyright (c) 2002-2019, Lawrence Livermore National Security + * Copyright (c) 2002-2022, Lawrence Livermore National Security * and Southern Methodist University. * All rights reserved. * @@ -11,14 +11,14 @@ * * SPDX-License-Identifier: BSD-3-Clause * SUNDIALS Copyright End - *----------------------------------------------------------------- + * ---------------------------------------------------------------- * Implementation file for CVODES' linear solver interface. * * Part I contains routines for using CVSLS on forward problems. * * Part II contains wrappers for using CVSLS on adjoint * (backward) problems. - *-----------------------------------------------------------------*/ + * ---------------------------------------------------------------- */ #include #include @@ -124,9 +124,9 @@ static int cvLsLinSysBSWrapper(realtype t, N_Vector yB, N_Vector fyB, PART I - forward problems ================================================================*/ -/*----------------------------------------------------------------- +/*=============================================================== CVSLS Exported functions -- Required - -----------------------------------------------------------------*/ + ===============================================================*/ /*--------------------------------------------------------------- CVodeSetLinearSolver specifies the linear solver @@ -134,9 +134,11 @@ static int cvLsLinSysBSWrapper(realtype t, N_Vector yB, N_Vector fyB, int CVodeSetLinearSolver(void *cvode_mem, SUNLinearSolver LS, SUNMatrix A) { - CVodeMem cv_mem; - CVLsMem cvls_mem; - int retval, LSType; + CVodeMem cv_mem; + CVLsMem cvls_mem; + int retval, LSType; + booleantype iterative; /* is the solver iterative? */ + booleantype matrixbased; /* is a matrix structure used? */ /* Return immediately if either cvode_mem or LS inputs are NULL */ if (cvode_mem == NULL) { @@ -163,6 +165,11 @@ int CVodeSetLinearSolver(void *cvode_mem, SUNLinearSolver LS, /* Retrieve the LS type */ LSType = SUNLinSolGetType(LS); + /* Set flags based on LS type */ + iterative = (LSType != SUNLINEARSOLVER_DIRECT); + matrixbased = ((LSType != SUNLINEARSOLVER_ITERATIVE) && + (LSType != SUNLINEARSOLVER_MATRIX_EMBEDDED)); + /* Test if vector is compatible with LS interface */ if ( (cv_mem->cv_tempv->ops->nvconst == NULL) || (cv_mem->cv_tempv->ops->nvwrmsnorm == NULL) ) { @@ -171,30 +178,41 @@ int CVodeSetLinearSolver(void *cvode_mem, SUNLinearSolver LS, return(CVLS_ILL_INPUT); } - if ( (LSType == SUNLINEARSOLVER_ITERATIVE) || - (LSType == SUNLINEARSOLVER_MATRIX_ITERATIVE) ) { + /* Ensure that A is NULL when LS is matrix-embedded */ + if ((LSType == SUNLINEARSOLVER_MATRIX_EMBEDDED) && (A != NULL)) { + cvProcessError(cv_mem, CVLS_ILL_INPUT, "CVLS", "CVodeSetLinearSolver", + "Incompatible inputs: matrix-embedded LS requires NULL matrix"); + return(CVLS_ILL_INPUT); + } + + /* Check for compatible LS type, matrix and "atimes" support */ + if (iterative) { + if (cv_mem->cv_tempv->ops->nvgetlength == NULL) { cvProcessError(cv_mem, CVLS_ILL_INPUT, "CVSLS", "CVodeSetLinearSolver", MSG_LS_BAD_NVECTOR); return(CVLS_ILL_INPUT); } - } - /* Check for compatible LS type, matrix and "atimes" support */ - if ((LSType == SUNLINEARSOLVER_ITERATIVE) && (LS->ops->setatimes == NULL)) { - cvProcessError(cv_mem, CVLS_ILL_INPUT, "CVLS", "CVodeSetLinearSolver", - "Incompatible inputs: iterative LS must support ATimes routine"); - return(CVLS_ILL_INPUT); - } - if ((LSType == SUNLINEARSOLVER_DIRECT) && (A == NULL)) { - cvProcessError(cv_mem, CVLS_ILL_INPUT, "CVLS", "CVodeSetLinearSolver", - "Incompatible inputs: direct LS requires non-NULL matrix"); - return(CVLS_ILL_INPUT); - } - if ((LSType == SUNLINEARSOLVER_MATRIX_ITERATIVE) && (A == NULL)) { - cvProcessError(cv_mem, CVLS_ILL_INPUT, "CVLS", "CVodeSetLinearSolver", - "Incompatible inputs: matrix-iterative LS requires non-NULL matrix"); + if (!matrixbased && (LSType != SUNLINEARSOLVER_MATRIX_EMBEDDED) && + (LS->ops->setatimes == NULL)) { + cvProcessError(cv_mem, CVLS_ILL_INPUT, "CVSLS", "CVodeSetLinearSolver", + "Incompatible inputs: iterative LS must support ATimes routine"); + return(CVLS_ILL_INPUT); + } + + if (matrixbased && (A == NULL)) { + cvProcessError(cv_mem, CVLS_ILL_INPUT, "CVSLS", "CVodeSetLinearSolver", + "Incompatible inputs: matrix-iterative LS requires non-NULL matrix"); + return(CVLS_ILL_INPUT); + } + + } else if (A == NULL) { + + cvProcessError(cv_mem, CVLS_ILL_INPUT, "CVSLS", "CVodeSetLinearSolver", + "Incompatible inputs: direct LS requires non-NULL matrix"); return(CVLS_ILL_INPUT); + } /* free any existing system solver attached to CVode */ @@ -219,6 +237,10 @@ int CVodeSetLinearSolver(void *cvode_mem, SUNLinearSolver LS, /* set SUNLinearSolver pointer */ cvls_mem->LS = LS; + /* Linear solver type information */ + cvls_mem->iterative = iterative; + cvls_mem->matrixbased = matrixbased; + /* Set defaults for Jacobian-related fields */ if (A != NULL) { cvls_mem->jacDQ = SUNTRUE; @@ -233,6 +255,7 @@ int CVodeSetLinearSolver(void *cvode_mem, SUNLinearSolver LS, cvls_mem->jtimesDQ = SUNTRUE; cvls_mem->jtsetup = NULL; cvls_mem->jtimes = cvLsDQJtimes; + cvls_mem->jt_f = cv_mem->cv_f; cvls_mem->jt_data = cv_mem; cvls_mem->user_linsys = SUNFALSE; @@ -249,10 +272,11 @@ int CVodeSetLinearSolver(void *cvode_mem, SUNLinearSolver LS, cvLsInitializeCounters(cvls_mem); /* Set default values for the rest of the LS parameters */ - cvls_mem->msbj = CVLS_MSBJ; - cvls_mem->jbad = SUNTRUE; - cvls_mem->eplifac = CVLS_EPLIN; - cvls_mem->last_flag = CVLS_SUCCESS; + cvls_mem->msbj = CVLS_MSBJ; + cvls_mem->jbad = SUNTRUE; + cvls_mem->dgmax_jbad = CVLS_DGMAX; + cvls_mem->eplifac = CVLS_EPLIN; + cvls_mem->last_flag = CVLS_SUCCESS; /* If LS supports ATimes, attach CVLs routine */ if (LS->ops->setatimes) { @@ -302,10 +326,15 @@ int CVodeSetLinearSolver(void *cvode_mem, SUNLinearSolver LS, return(CVLS_MEM_FAIL); } - /* For iterative LS, compute sqrtN */ - if ( (LSType == SUNLINEARSOLVER_ITERATIVE) || - (LSType == SUNLINEARSOLVER_MATRIX_ITERATIVE) ) - cvls_mem->sqrtN = SUNRsqrt( N_VGetLength(cvls_mem->ytemp) ); + /* For iterative LS, compute default norm conversion factor */ + if (iterative) + cvls_mem->nrmfac = SUNRsqrt( N_VGetLength(cvls_mem->ytemp) ); + + /* Check if solution scaling should be enabled */ + if (matrixbased && cv_mem->cv_lmm == CV_BDF) + cvls_mem->scalesol = SUNTRUE; + else + cvls_mem->scalesol = SUNFALSE; /* Attach linear solver memory to integrator memory */ cv_mem->cv_lmem = cvls_mem; @@ -314,9 +343,9 @@ int CVodeSetLinearSolver(void *cvode_mem, SUNLinearSolver LS, } -/*----------------------------------------------------------------- - CVSLS Exported functions -- Optional input/output - -----------------------------------------------------------------*/ +/*=============================================================== + Optional Set routines + ===============================================================*/ /* CVodeSetJacFn specifies the Jacobian function. */ @@ -358,6 +387,30 @@ int CVodeSetJacFn(void *cvode_mem, CVLsJacFn jac) } +/* CVodeSetDeltaGammaMaxBadJac specifies the maximum gamma ratio change + * after a NLS convergence failure with a potentially bad Jacobian. If + * |gamma/gammap-1| < dgmax_jbad then the Jacobian is marked as bad */ +int CVodeSetDeltaGammaMaxBadJac(void *cvode_mem, realtype dgmax_jbad) +{ + CVodeMem cv_mem; + CVLsMem cvls_mem; + int retval; + + /* Access CVLsMem structure */ + retval = cvLs_AccessLMem(cvode_mem, "CVodeSetDeltaGammaMaxBadJac", + &cv_mem, &cvls_mem); + if (retval != CVLS_SUCCESS) return(retval); + + /* Set value or use default */ + if(dgmax_jbad <= ZERO) + cvls_mem->dgmax_jbad = CVLS_DGMAX; + else + cvls_mem->dgmax_jbad = dgmax_jbad; + + return(CVLS_SUCCESS); +} + + /* CVodeSetEpsLin specifies the nonlinear -> linear tolerance scale factor */ int CVodeSetEpsLin(void *cvode_mem, realtype eplifac) { @@ -383,20 +436,79 @@ int CVodeSetEpsLin(void *cvode_mem, realtype eplifac) } -/* CVodeSetMaxStepsBetweenJac specifies the maximum number of - time steps to wait before recomputing the Jacobian matrix - and/or preconditioner */ -int CVodeSetMaxStepsBetweenJac(void *cvode_mem, long int msbj) +/* CVodeSetLSNormFactor sets or computes the factor to use when converting from + the integrator tolerance to the linear solver tolerance (WRMS to L2 norm). */ +int CVodeSetLSNormFactor(void *cvode_mem, realtype nrmfac) +{ + CVodeMem cv_mem; + CVLsMem cvls_mem; + int retval; + + /* access CVLsMem structure */ + retval = cvLs_AccessLMem(cvode_mem, "CVodeSetLSNormFactor", + &cv_mem, &cvls_mem); + if (retval != CVLS_SUCCESS) return(retval); + + if (nrmfac > ZERO) { + /* user-provided factor */ + cvls_mem->nrmfac = nrmfac; + } else if (nrmfac < ZERO) { + /* compute factor for WRMS norm with dot product */ + N_VConst(ONE, cvls_mem->ytemp); + cvls_mem->nrmfac = SUNRsqrt(N_VDotProd(cvls_mem->ytemp, cvls_mem->ytemp)); + } else { + /* compute default factor for WRMS norm from vector legnth */ + cvls_mem->nrmfac = SUNRsqrt(N_VGetLength(cvls_mem->ytemp)); + } + + return(CVLS_SUCCESS); +} + + +/* CVodeSetJacEvalFrequency specifies the frequency for recomputing the Jacobian + matrix and/or preconditioner */ +int CVodeSetJacEvalFrequency(void *cvode_mem, long int msbj) { CVodeMem cv_mem; CVLsMem cvls_mem; int retval; /* access CVLsMem structure; store input and return */ - retval = cvLs_AccessLMem(cvode_mem, "CVodeSetMaxStepsBetweenJac", + retval = cvLs_AccessLMem(cvode_mem, "CVodeSetJacEvalFrequency", &cv_mem, &cvls_mem); if (retval != CVLS_SUCCESS) return(retval); - cvls_mem->msbj = (msbj <= ZERO) ? CVLS_MSBJ : msbj; + + /* Check for legal msbj */ + if(msbj < 0) { + cvProcessError(cv_mem, CVLS_ILL_INPUT, "CVLS", "CVodeSetJacEvalFrequency", + "A negative evaluation frequency was provided."); + return(CVLS_ILL_INPUT); + } + + cvls_mem->msbj = (msbj == 0) ? CVLS_MSBJ : msbj; + + return(CVLS_SUCCESS); +} + +/* CVodeSetLinearSolutionScaling enables or disables scaling the + linear solver solution to account for changes in gamma. */ +int CVodeSetLinearSolutionScaling(void *cvode_mem, booleantype onoff) +{ + CVodeMem cv_mem; + CVLsMem cvls_mem; + int retval; + + /* access CVLsMem structure; store input and return */ + retval = cvLs_AccessLMem(cvode_mem, "CVodeSetLinearSolutionScaling", + &cv_mem, &cvls_mem); + if (retval != CVLS_SUCCESS) return(retval); + + /* check for valid solver and method type */ + if (!(cvls_mem->matrixbased) || cv_mem->cv_lmm != CV_BDF) + return(CVLS_ILL_INPUT); + + /* set solution scaling flag */ + cvls_mem->scalesol = onoff; return(CVLS_SUCCESS); } @@ -409,8 +521,8 @@ int CVodeSetPreconditioner(void *cvode_mem, CVLsPrecSetupFn psetup, { CVodeMem cv_mem; CVLsMem cvls_mem; - PSetupFn cvls_psetup; - PSolveFn cvls_psolve; + SUNPSetupFn cvls_psetup; + SUNPSolveFn cvls_psolve; int retval; /* access CVLsMem structure */ @@ -479,6 +591,7 @@ int CVodeSetJacTimes(void *cvode_mem, CVLsJacTimesSetupFn jtsetup, cvls_mem->jtimesDQ = SUNTRUE; cvls_mem->jtsetup = NULL; cvls_mem->jtimes = cvLsDQJtimes; + cvls_mem->jt_f = cv_mem->cv_f; cvls_mem->jt_data = cv_mem; } @@ -486,6 +599,37 @@ int CVodeSetJacTimes(void *cvode_mem, CVLsJacTimesSetupFn jtsetup, } +/* CVodeSetJacTimesRhsFn specifies an alternative user-supplied ODE right-hand + side function to use in the internal finite difference Jacobian-vector + product */ +int CVodeSetJacTimesRhsFn(void *cvode_mem, CVRhsFn jtimesRhsFn) +{ + CVodeMem cv_mem; + CVLsMem cvls_mem; + int retval; + + /* access CVLsMem structure */ + retval = cvLs_AccessLMem(cvode_mem, "CVodeSetJacTimesRhsFn", + &cv_mem, &cvls_mem); + if (retval != CVLS_SUCCESS) return(retval); + + /* check if using internal finite difference approximation */ + if (!(cvls_mem->jtimesDQ)) { + cvProcessError(cv_mem, CVLS_ILL_INPUT, "CVSLS", "CVodeSetJacTimesRhsFn", + "Internal finite-difference Jacobian-vector product is disabled."); + return(CVLS_ILL_INPUT); + } + + /* store function pointers for RHS function (NULL implies use ODE RHS) */ + if (jtimesRhsFn != NULL) + cvls_mem->jt_f = jtimesRhsFn; + else + cvls_mem->jt_f = cv_mem->cv_f; + + return(CVLS_SUCCESS); +} + + /* CVodeSetLinSysFn specifies the linear system setup function. */ int CVodeSetLinSysFn(void *cvode_mem, CVLsLinSysFn linsys) { @@ -519,6 +663,50 @@ int CVodeSetLinSysFn(void *cvode_mem, CVLsLinSysFn linsys) return(CVLS_SUCCESS); } +/*=============================================================== + Optional Get routines + ===============================================================*/ + +int CVodeGetJac(void* cvode_mem, SUNMatrix* J) +{ + CVodeMem cv_mem; + CVLsMem cvls_mem; + int retval; + + /* access CVLsMem structure; set output and return */ + retval = cvLs_AccessLMem(cvode_mem, "CVodeGetJac", &cv_mem, &cvls_mem); + if (retval != CVLS_SUCCESS) return retval; + *J = cvls_mem->savedJ; + return CVLS_SUCCESS; +} + +int CVodeGetJacTime(void* cvode_mem, sunrealtype* t_J) +{ + CVodeMem cv_mem; + CVLsMem cvls_mem; + int retval; + + /* access CVLsMem structure; set output and return */ + retval = cvLs_AccessLMem(cvode_mem, "CVodeGetJacTime", &cv_mem, + &cvls_mem); + if (retval != CVLS_SUCCESS) return retval; + *t_J = cvls_mem->tnlj; + return CVLS_SUCCESS; +} + +int CVodeGetJacNumSteps(void* cvode_mem, long int* nst_J) +{ + CVodeMem cv_mem; + CVLsMem cvls_mem; + int retval; + + /* access CVLsMem structure; set output and return */ + retval = cvLs_AccessLMem(cvode_mem, "CVodeGetJacNumSteps", &cv_mem, + &cvls_mem); + if (retval != CVLS_SUCCESS) return retval; + *nst_J = cvls_mem->nstlj; + return CVLS_SUCCESS; +} /* CVodeGetLinWorkSpace returns the length of workspace allocated for the CVLS linear solver interface */ @@ -706,6 +894,33 @@ int CVodeGetNumJtimesEvals(void *cvode_mem, long int *njvevals) } +/* CVodeGetLinSolveStats returns statistics related to the linear solve. */ +int CVodeGetLinSolveStats(void* cvode_mem, long int* njevals, long int* nfevalsLS, + long int* nliters, long int* nlcfails, long int* npevals, + long int* npsolves, long int* njtsetups, long int* njtimes) +{ + CVodeMem cv_mem; + CVLsMem cvls_mem; + int retval; + + /* access CVLsMem structure; set output value and return */ + retval = cvLs_AccessLMem(cvode_mem, "CVodeGetLinSolveStats", + &cv_mem, &cvls_mem); + if (retval != CVLS_SUCCESS) return(retval); + + *njevals = cvls_mem->nje; + *nfevalsLS = cvls_mem->nfeDQ; + *nliters = cvls_mem->nli; + *nlcfails = cvls_mem->ncfl; + *npevals = cvls_mem->npe; + *npsolves = cvls_mem->nps; + *njtsetups = cvls_mem->njtsetup; + *njtimes = cvls_mem->njtimes; + + return(CVLS_SUCCESS); +} + + /* CVodeGetLastLinFlag returns the last flag set in a CVLS function */ int CVodeGetLastLinFlag(void *cvode_mem, long int *flag) { @@ -773,10 +988,9 @@ char *CVodeGetLinReturnFlagName(long int flag) return(name); } - -/*----------------------------------------------------------------- +/*================================================================= CVSLS private functions - -----------------------------------------------------------------*/ + =================================================================*/ /*----------------------------------------------------------------- cvLsATimes @@ -964,7 +1178,7 @@ int cvLsDenseDQJac(realtype t, N_Vector y, N_Vector fy, cvls_mem = (CVLsMem) cv_mem->cv_lmem; /* access matrix dimension */ - N = SUNDenseMatrix_Rows(Jac); + N = SUNDenseMatrix_Columns(Jac); /* Rename work vector for readibility */ ftemp = tmp1; @@ -1158,7 +1372,7 @@ int cvLsDQJtimes(N_Vector v, N_Vector Jv, realtype t, N_VLinearSum(sig, v, ONE, y, work); /* Set Jv = f(tn, y+sig*v) */ - retval = cv_mem->cv_f(t, work, Jv, cv_mem->cv_user_data); + retval = cvls_mem->jt_f(t, work, Jv, cv_mem->cv_user_data); cvls_mem->nfeDQ++; if (retval == 0) break; if (retval < 0) return(-1); @@ -1207,7 +1421,7 @@ static int cvLsLinSys(realtype t, N_Vector y, N_Vector fy, SUNMatrix A, retval = SUNMatCopy(cvls_mem->savedJ, A); if (retval) { cvProcessError(cv_mem, CVLS_SUNMAT_FAIL, "CVSLS", - "cvLsSetup", MSG_LS_SUNMAT_FAILED); + "cvLsLinSys", MSG_LS_SUNMAT_FAILED); cvls_mem->last_flag = CVLS_SUNMAT_FAIL; return(cvls_mem->last_flag); } @@ -1222,7 +1436,7 @@ static int cvLsLinSys(realtype t, N_Vector y, N_Vector fy, SUNMatrix A, retval = SUNMatZero(A); if (retval) { cvProcessError(cv_mem, CVLS_SUNMAT_FAIL, "CVSLS", - "cvLsSetup", MSG_LS_SUNMAT_FAILED); + "cvLsLinSys", MSG_LS_SUNMAT_FAILED); cvls_mem->last_flag = CVLS_SUNMAT_FAIL; return(cvls_mem->last_flag); } @@ -1233,7 +1447,7 @@ static int cvLsLinSys(realtype t, N_Vector y, N_Vector fy, SUNMatrix A, vtemp1, vtemp2, vtemp3); if (retval < 0) { cvProcessError(cv_mem, CVLS_JACFUNC_UNRECVR, "CVSLS", - "cvLsSetup", MSG_LS_JACFUNC_FAILED); + "cvLsLinSys", MSG_LS_JACFUNC_FAILED); cvls_mem->last_flag = CVLS_JACFUNC_UNRECVR; return(-1); } @@ -1246,7 +1460,7 @@ static int cvLsLinSys(realtype t, N_Vector y, N_Vector fy, SUNMatrix A, retval = SUNMatCopy(A, cvls_mem->savedJ); if (retval) { cvProcessError(cv_mem, CVLS_SUNMAT_FAIL, "CVSLS", - "cvLsSetup", MSG_LS_SUNMAT_FAILED); + "cvLsLinSys", MSG_LS_SUNMAT_FAILED); cvls_mem->last_flag = CVLS_SUNMAT_FAIL; return(cvls_mem->last_flag); } @@ -1257,7 +1471,7 @@ static int cvLsLinSys(realtype t, N_Vector y, N_Vector fy, SUNMatrix A, retval = SUNMatScaleAddI(-gamma, A); if (retval) { cvProcessError(cv_mem, CVLS_SUNMAT_FAIL, "CVSLS", - "cvLsSetup", MSG_LS_SUNMAT_FAILED); + "cvLsLinSys", MSG_LS_SUNMAT_FAILED); cvls_mem->last_flag = CVLS_SUNMAT_FAIL; return(cvls_mem->last_flag); } @@ -1338,7 +1552,7 @@ int cvLsInitialize(CVodeMem cv_mem) if (cvls_mem->savedJ == NULL) { cvls_mem->savedJ = SUNMatClone(cvls_mem->A); if (cvls_mem->savedJ == NULL) { - cvProcessError(cv_mem, CVLS_MEM_FAIL, "CVLS", "cvLsInitialize", + cvProcessError(cv_mem, CVLS_MEM_FAIL, "CVSLS", "cvLsInitialize", MSG_LS_MEM_FAIL); cvls_mem->last_flag = CVLS_MEM_FAIL; return(CVLS_MEM_FAIL); @@ -1377,6 +1591,12 @@ int cvLsInitialize(CVodeMem cv_mem) if ( (cvls_mem->A == NULL) && (cvls_mem->pset == NULL) ) cv_mem->cv_lsetup = NULL; + /* When using a matrix-embedded linear solver, disable lsetup call and solution scaling */ + if (SUNLinSolGetType(cvls_mem->LS) == SUNLINEARSOLVER_MATRIX_EMBEDDED) { + cv_mem->cv_lsetup = NULL; + cvls_mem->scalesol = SUNFALSE; + } + /* Call LS initialize routine, and return result */ cvls_mem->last_flag = SUNLinSolInitialize(cvls_mem->LS); return(cvls_mem->last_flag); @@ -1413,6 +1633,12 @@ int cvLsSetup(CVodeMem cv_mem, int convfail, N_Vector ypred, } cvls_mem = (CVLsMem) cv_mem->cv_lmem; + /* Immediately return when using matrix-embedded linear solver */ + if (SUNLinSolGetType(cvls_mem->LS) == SUNLINEARSOLVER_MATRIX_EMBEDDED) { + cvls_mem->last_flag = CVLS_SUCCESS; + return(cvls_mem->last_flag); + } + /* Set CVLs N_Vector pointers to current solution and rhs */ cvls_mem->ycur = ypred; cvls_mem->fcur = fpred; @@ -1420,8 +1646,8 @@ int cvLsSetup(CVodeMem cv_mem, int convfail, N_Vector ypred, /* Use nst, gamma/gammap, and convfail to set J/P eval. flag jok */ dgamma = SUNRabs((cv_mem->cv_gamma/cv_mem->cv_gammap) - ONE); cvls_mem->jbad = (cv_mem->cv_nst == 0) || - (cv_mem->cv_nst > cvls_mem->nstlj + cvls_mem->msbj) || - ((convfail == CV_FAIL_BAD_J) && (dgamma < CVLS_DGMAX)) || + (cv_mem->cv_nst >= cvls_mem->nstlj + cvls_mem->msbj) || + ((convfail == CV_FAIL_BAD_J) && (dgamma < cvls_mem->dgmax_jbad)) || (convfail == CV_FAIL_OTHER); /* Setup the linear system if necessary */ @@ -1436,6 +1662,7 @@ int cvLsSetup(CVodeMem cv_mem, int convfail, N_Vector ypred, if (*jcurPtr) { cvls_mem->nje++; cvls_mem->nstlj = cv_mem->cv_nst; + cvls_mem->tnlj = cv_mem->cv_tn; } /* Check linsys() return value and return if necessary */ @@ -1473,6 +1700,7 @@ int cvLsSetup(CVodeMem cv_mem, int convfail, N_Vector ypred, if (*jcurPtr) { cvls_mem->npe++; cvls_mem->nstlj = cv_mem->cv_nst; + cvls_mem->tnlj = cv_mem->cv_tn; } /* Update jcur flag if we suggested an update */ @@ -1496,8 +1724,12 @@ int cvLsSolve(CVodeMem cv_mem, N_Vector b, N_Vector weight, { CVLsMem cvls_mem; realtype bnorm, deltar, delta, w_mean; - int curiter, nli_inc, retval, LSType; + int curiter, nli_inc, retval; booleantype do_sensi_sim, do_sensi_stg, do_sensi_stg1; +#if SUNDIALS_LOGGING_LEVEL >= SUNDIALS_LOGGING_DEBUG + realtype resnorm; + long int nps_inc; +#endif /* access CVLsMem structure */ if (cv_mem->cv_lmem==NULL) { @@ -1507,9 +1739,6 @@ int cvLsSolve(CVodeMem cv_mem, N_Vector b, N_Vector weight, } cvls_mem = (CVLsMem) cv_mem->cv_lmem; - /* Retrieve the LS type */ - LSType = SUNLinSolGetType(cvls_mem->LS); - /* are we computing sensitivities and with which approach? */ do_sensi_sim = (cv_mem->cv_sensi && (cv_mem->cv_ism==CV_SIMULTANEOUS)); do_sensi_stg = (cv_mem->cv_sensi && (cv_mem->cv_ism==CV_STAGGERED)); @@ -1528,8 +1757,7 @@ int cvLsSolve(CVodeMem cv_mem, N_Vector b, N_Vector weight, /* If the linear solver is iterative: test norm(b), if small, return x = 0 or x = b; set linear solver tolerance (in left/right scaled 2-norm) */ - if ( (LSType == SUNLINEARSOLVER_ITERATIVE) || - (LSType == SUNLINEARSOLVER_MATRIX_ITERATIVE) ) { + if (cvls_mem->iterative) { deltar = cvls_mem->eplifac * cv_mem->cv_tq[4]; bnorm = N_VWrmsNorm(b, weight); if (bnorm <= deltar) { @@ -1537,7 +1765,8 @@ int cvLsSolve(CVodeMem cv_mem, N_Vector b, N_Vector weight, cvls_mem->last_flag = CVLS_SUCCESS; return(cvls_mem->last_flag); } - delta = deltar * cvls_mem->sqrtN; + /* Adjust tolerance for 2-norm */ + delta = deltar * cvls_mem->nrmfac; } else { delta = ZERO; } @@ -1572,9 +1801,8 @@ int cvLsSolve(CVodeMem cv_mem, N_Vector b, N_Vector weight, <=> w_mean^2 \sum_{i=0}^{n-1} (b - A x_i)^2 < tol^2 <=> \sum_{i=0}^{n-1} (b - A x_i)^2 < tol^2 / w_mean^2 <=> || b - A x ||_2 < tol / w_mean - So we compute w_mean = ||w||_RMS and scale the desired tolerance accordingly. */ - } else if ( (LSType == SUNLINEARSOLVER_ITERATIVE) || - (LSType == SUNLINEARSOLVER_MATRIX_ITERATIVE) ) { + So we compute w_mean = ||w||_RMS = ||w||_2 and scale the desired tolerance accordingly. */ + } else if (cvls_mem->iterative) { N_VConst(ONE, cvls_mem->x); w_mean = N_VWrmsNorm(weight, cvls_mem->x); @@ -1585,6 +1813,15 @@ int cvLsSolve(CVodeMem cv_mem, N_Vector b, N_Vector weight, /* Set initial guess x = 0 to LS */ N_VConst(ZERO, cvls_mem->x); + /* Set zero initial guess flag */ + retval = SUNLinSolSetZeroGuess(cvls_mem->LS, SUNTRUE); + if (retval != SUNLS_SUCCESS) return(-1); + +#if SUNDIALS_LOGGING_LEVEL >= SUNDIALS_LOGGING_DEBUG + /* Store previous nps value in nps_inc */ + nps_inc = cvls_mem->nps; +#endif + /* If a user-provided jtsetup routine is supplied, call that here */ if (cvls_mem->jtsetup) { cvls_mem->last_flag = cvls_mem->jtsetup(cv_mem->cv_tn, ynow, fnow, @@ -1603,18 +1840,23 @@ int cvLsSolve(CVodeMem cv_mem, N_Vector b, N_Vector weight, /* If using a direct or matrix-iterative solver, BDF method, and gamma has changed, scale the correction to account for change in gamma */ - if ( ((LSType == SUNLINEARSOLVER_DIRECT) || - (LSType == SUNLINEARSOLVER_MATRIX_ITERATIVE)) && - (cv_mem->cv_lmm == CV_BDF) && - (cv_mem->cv_gamrat != ONE) ) + if (cvls_mem->scalesol && cv_mem->cv_gamrat != ONE) N_VScale(TWO/(ONE + cv_mem->cv_gamrat), b, b); /* Retrieve statistics from iterative linear solvers */ +#if SUNDIALS_LOGGING_LEVEL >= SUNDIALS_LOGGING_DEBUG + resnorm = ZERO; +#endif nli_inc = 0; - if ( ((LSType == SUNLINEARSOLVER_ITERATIVE) || - (LSType == SUNLINEARSOLVER_MATRIX_ITERATIVE)) && - (cvls_mem->LS->ops->numiters) ) - nli_inc = SUNLinSolNumIters(cvls_mem->LS); + if (cvls_mem->iterative) { +#if SUNDIALS_LOGGING_LEVEL >= SUNDIALS_LOGGING_DEBUG + if (cvls_mem->LS->ops->resnorm) + resnorm = SUNLinSolResNorm(cvls_mem->LS); +#endif + if (cvls_mem->LS->ops->numiters) + nli_inc = SUNLinSolNumIters(cvls_mem->LS); + } + /* Increment counters nli and ncfl */ cvls_mem->nli += nli_inc; @@ -1623,6 +1865,13 @@ int cvLsSolve(CVodeMem cv_mem, N_Vector b, N_Vector weight, /* Interpret solver return value */ cvls_mem->last_flag = retval; +#if SUNDIALS_LOGGING_LEVEL >= SUNDIALS_LOGGING_DEBUG + SUNLogger_QueueMsg(CV_LOGGER, SUN_LOGLEVEL_DEBUG, + "CVODE::cvLsSolve", "ls-stats", + "bnorm = %.16g, resnorm = %.16g, ls_iters = %i, prec_solves = %i", + bnorm, resnorm, nli_inc, (int)(cvls_mem->nps - nps_inc)); +#endif + switch(retval) { case SUNLS_SUCCESS: @@ -1938,6 +2187,47 @@ int CVodeSetEpsLinB(void *cvode_mem, int which, realtype eplifacB) } +int CVodeSetLSNormFactorB(void *cvode_mem, int which, realtype nrmfacB) +{ + CVodeMem cv_mem; + CVadjMem ca_mem; + CVodeBMem cvB_mem; + CVLsMemB cvlsB_mem; + void *cvodeB_mem; + int retval; + + /* access relevant memory structures */ + retval = cvLs_AccessLMemB(cvode_mem, which, "CVodeSetLSNormFactorB", + &cv_mem, &ca_mem, &cvB_mem, &cvlsB_mem); + if (retval != CVLS_SUCCESS) return(retval); + + /* call corresponding routine for cvodeB_mem structure */ + cvodeB_mem = (void *) (cvB_mem->cv_mem); + return(CVodeSetLSNormFactor(cvodeB_mem, nrmfacB)); +} + + +int CVodeSetLinearSolutionScalingB(void *cvode_mem, int which, + booleantype onoffB) +{ + CVodeMem cv_mem; + CVadjMem ca_mem; + CVodeBMem cvB_mem; + CVLsMemB cvlsB_mem; + void *cvodeB_mem; + int retval; + + /* access relevant memory structures */ + retval = cvLs_AccessLMemB(cvode_mem, which, "CVodeSetLinearSolutionScalingB", + &cv_mem, &ca_mem, &cvB_mem, &cvlsB_mem); + if (retval != CVLS_SUCCESS) return(retval); + + /* call corresponding routine for cvodeB_mem structure */ + cvodeB_mem = (void *) (cvB_mem->cv_mem); + return(CVodeSetLinearSolutionScaling(cvodeB_mem, onoffB)); +} + + int CVodeSetPreconditionerB(void *cvode_mem, int which, CVLsPrecSetupFnB psetupB, CVLsPrecSolveFnB psolveB) @@ -2058,6 +2348,26 @@ int CVodeSetJacTimesBS(void *cvode_mem, int which, } +int CVodeSetJacTimesRhsFnB(void *cvode_mem, int which, CVRhsFn jtimesRhsFn) +{ + CVodeMem cv_mem; + CVadjMem ca_mem; + CVodeBMem cvB_mem; + CVLsMemB cvlsB_mem; + void *cvodeB_mem; + int retval; + + /* access relevant memory structures */ + retval = cvLs_AccessLMemB(cvode_mem, which, "CVodeSetJacTimesRhsFnB", + &cv_mem, &ca_mem, &cvB_mem, &cvlsB_mem); + if (retval != CVLS_SUCCESS) return(retval); + + /* Call the corresponding "set" routine for the backward problem */ + cvodeB_mem = (void *) (cvB_mem->cv_mem); + return(CVodeSetJacTimesRhsFn(cvodeB_mem, jtimesRhsFn)); +} + + int CVodeSetLinSysFnB(void *cvode_mem, int which, CVLsLinSysFnB linsysB) { CVodeMem cv_mem; diff --git a/src/lib/cvodes/cvodes_ls_impl.h b/src/lib/cvodes/cvodes_ls_impl.h index f7553bc..f8b259d 100644 --- a/src/lib/cvodes/cvodes_ls_impl.h +++ b/src/lib/cvodes/cvodes_ls_impl.h @@ -3,7 +3,7 @@ * Radu Serban @ LLNL *----------------------------------------------------------------- * SUNDIALS Copyright Start - * Copyright (c) 2002-2019, Lawrence Livermore National Security + * Copyright (c) 2002-2022, Lawrence Livermore National Security * and Southern Methodist University. * All rights reserved. * @@ -37,7 +37,7 @@ extern "C" { the nonlinear iteration is multiplied to get a tolerance on the linear iteration -----------------------------------------------------------------*/ -#define CVLS_MSBJ 50 +#define CVLS_MSBJ 51 #define CVLS_DGMAX RCONST(0.2) #define CVLS_EPLIN RCONST(0.05) @@ -53,15 +53,24 @@ extern "C" { -----------------------------------------------------------------*/ typedef struct CVLsMemRec { + /* Linear solver type information */ + booleantype iterative; /* is the solver iterative? */ + booleantype matrixbased; /* is a matrix structure used? */ + /* Jacobian construction & storage */ - booleantype jacDQ; /* SUNTRUE if using internal DQ Jac approx. */ - CVLsJacFn jac; /* Jacobian routine to be called */ - void *J_data; /* user data is passed to jac */ - booleantype jbad; /* heuristic suggestion for pset */ + booleantype jacDQ; /* SUNTRUE if using internal DQ Jac approx. */ + CVLsJacFn jac; /* Jacobian routine to be called */ + void *J_data; /* user data is passed to jac */ + booleantype jbad; /* heuristic suggestion for pset */ + realtype dgmax_jbad; /* if convfail = FAIL_BAD_J and the gamma ratio * + * |gamma/gammap-1| < dgmax_jbad then J is bad */ + + /* Matrix-based solver, scale solution to account for change in gamma */ + booleantype scalesol; /* Iterative solver tolerance */ - realtype sqrtN; /* sqrt(N) */ - realtype eplifac; /* eplifac = user specified or EPLIN_DEFAULT */ + realtype eplifac; /* nonlinear -> linear tol scaling factor */ + realtype nrmfac; /* integrator -> LS norm conversion factor */ /* Linear solver, matrix and vector objects/pointers */ SUNLinearSolver LS; /* generic linear solver object */ @@ -84,6 +93,7 @@ typedef struct CVLsMemRec { long int ncfl; /* ncfl = total number of convergence failures */ long int njtsetup; /* njtsetup = total number of calls to jtsetup */ long int njtimes; /* njtimes = total number of calls to jtimes */ + sunrealtype tnlj; /* tnlj = t_n at last jac/pset call */ /* Preconditioner computation * (a) user-provided: @@ -107,6 +117,7 @@ typedef struct CVLsMemRec { booleantype jtimesDQ; CVLsJacTimesSetupFn jtsetup; CVLsJacTimesVecFn jtimes; + CVRhsFn jt_f; void *jt_data; /* Linear system setup function diff --git a/src/lib/cvodes/cvodes_nls.c b/src/lib/cvodes/cvodes_nls.c index 3ccb293..a903a78 100644 --- a/src/lib/cvodes/cvodes_nls.c +++ b/src/lib/cvodes/cvodes_nls.c @@ -2,7 +2,7 @@ * Programmer(s): David J. Gardner @ LLNL * ----------------------------------------------------------------------------- * SUNDIALS Copyright Start - * Copyright (c) 2002-2019, Lawrence Livermore National Security + * Copyright (c) 2002-2022, Lawrence Livermore National Security * and Southern Methodist University. * All rights reserved. * @@ -109,6 +109,77 @@ int CVodeSetNonlinearSolver(void *cvode_mem, SUNNonlinearSolver NLS) /* Reset the acnrmcur flag to SUNFALSE */ cv_mem->cv_acnrmcur = SUNFALSE; + /* Set the nonlinear system RHS function */ + if (!(cv_mem->cv_f)) { + cvProcessError(cv_mem, CV_ILL_INPUT, "CVODES", "CVodeSetNonlinearSolver", + "The ODE RHS function is NULL"); + return(CV_ILL_INPUT); + } + cv_mem->nls_f = cv_mem->cv_f; + + return(CV_SUCCESS); +} + + +/*--------------------------------------------------------------- + CVodeSetNlsRhsFn: + + This routine sets an alternative user-supplied ODE right-hand + side function to use in the evaluation of nonlinear system + functions. + ---------------------------------------------------------------*/ +int CVodeSetNlsRhsFn(void *cvode_mem, CVRhsFn f) +{ + CVodeMem cv_mem; + + if (cvode_mem==NULL) { + cvProcessError(NULL, CV_MEM_NULL, "CVODES", "CVodeSetNlsRhsFn", + MSGCV_NO_MEM); + return(CV_MEM_NULL); + } + + cv_mem = (CVodeMem) cvode_mem; + + if (f) + cv_mem->nls_f = f; + else + cv_mem->nls_f = cv_mem->cv_f; + + return(CV_SUCCESS); +} + + +/*--------------------------------------------------------------- + CVodeGetNonlinearSystemData: + + This routine provides access to the relevant data needed to + compute the nonlinear system function. + ---------------------------------------------------------------*/ +int CVodeGetNonlinearSystemData(void *cvode_mem, realtype *tcur, + N_Vector *ypred, N_Vector *yn, + N_Vector *fn, realtype *gamma, + realtype *rl1, N_Vector *zn1, + void **user_data) +{ + CVodeMem cv_mem; + + if (cvode_mem == NULL) { + cvProcessError(NULL, CV_MEM_NULL, "CVODES", "CVodeGetNonlinearSystemData", + MSGCV_NO_MEM); + return(CV_MEM_NULL); + } + + cv_mem = (CVodeMem) cvode_mem; + + *tcur = cv_mem->cv_tn; + *ypred = cv_mem->cv_zn[0]; + *yn = cv_mem->cv_y; + *fn = cv_mem->cv_ftemp; + *gamma = cv_mem->cv_gamma; + *rl1 = cv_mem->cv_rl1; + *zn1 = cv_mem->cv_zn[1]; + *user_data = cv_mem->cv_user_data; + return(CV_SUCCESS); } @@ -246,7 +317,7 @@ static int cvNlsConvTest(SUNNonlinearSolver NLS, N_Vector ycor, N_Vector delta, dcon = del * SUNMIN(ONE, cv_mem->cv_crate) / tol; if (dcon <= ONE) { - cv_mem->cv_acnrm = (m==0) ? del : N_VWrmsNorm(ycor, cv_mem->cv_ewt); + cv_mem->cv_acnrm = (m==0) ? del : N_VWrmsNorm(ycor, ewt); cv_mem->cv_acnrmcur = SUNTRUE; return(CV_SUCCESS); /* Nonlinear system was solved successfully */ } @@ -268,8 +339,7 @@ static int cvNlsResidual(N_Vector ycor, N_Vector res, void* cvode_mem) int retval; if (cvode_mem == NULL) { - cvProcessError(NULL, CV_MEM_NULL, "CVODE", - "cvNlsResidual", MSGCV_NO_MEM); + cvProcessError(NULL, CV_MEM_NULL, "CVODE", "cvNlsResidual", MSGCV_NO_MEM); return(CV_MEM_NULL); } cv_mem = (CVodeMem) cvode_mem; @@ -278,8 +348,8 @@ static int cvNlsResidual(N_Vector ycor, N_Vector res, void* cvode_mem) N_VLinearSum(ONE, cv_mem->cv_zn[0], ONE, ycor, cv_mem->cv_y); /* evaluate the rhs function */ - retval = cv_mem->cv_f(cv_mem->cv_tn, cv_mem->cv_y, cv_mem->cv_ftemp, - cv_mem->cv_user_data); + retval = cv_mem->nls_f(cv_mem->cv_tn, cv_mem->cv_y, cv_mem->cv_ftemp, + cv_mem->cv_user_data); cv_mem->cv_nfe++; if (retval < 0) return(CV_RHSFUNC_FAIL); if (retval > 0) return(RHSFUNC_RECVR); @@ -307,8 +377,8 @@ static int cvNlsFPFunction(N_Vector ycor, N_Vector res, void* cvode_mem) N_VLinearSum(ONE, cv_mem->cv_zn[0], ONE, ycor, cv_mem->cv_y); /* evaluate the rhs function */ - retval = cv_mem->cv_f(cv_mem->cv_tn, cv_mem->cv_y, res, - cv_mem->cv_user_data); + retval = cv_mem->nls_f(cv_mem->cv_tn, cv_mem->cv_y, res, + cv_mem->cv_user_data); cv_mem->cv_nfe++; if (retval < 0) return(CV_RHSFUNC_FAIL); if (retval > 0) return(RHSFUNC_RECVR); diff --git a/src/lib/cvodes/cvodes_nls_sim.c b/src/lib/cvodes/cvodes_nls_sim.c index 1e255bf..e570fd4 100644 --- a/src/lib/cvodes/cvodes_nls_sim.c +++ b/src/lib/cvodes/cvodes_nls_sim.c @@ -2,7 +2,7 @@ * Programmer(s): David J. Gardner @ LLNL * ----------------------------------------------------------------------------- * SUNDIALS Copyright Start - * Copyright (c) 2002-2019, Lawrence Livermore National Security + * Copyright (c) 2002-2022, Lawrence Livermore National Security * and Southern Methodist University. * All rights reserved. * @@ -146,14 +146,14 @@ int CVodeSetNonlinearSolverSensSim(void *cvode_mem, SUNNonlinearSolver NLS) /* create vector wrappers if necessary */ if (cv_mem->simMallocDone == SUNFALSE) { - cv_mem->zn0Sim = N_VNewEmpty_SensWrapper(cv_mem->cv_Ns+1); + cv_mem->zn0Sim = N_VNewEmpty_SensWrapper(cv_mem->cv_Ns+1, cv_mem->cv_sunctx); if (cv_mem->zn0Sim == NULL) { cvProcessError(cv_mem, CV_MEM_FAIL, "CVODES", "CVodeSetNonlinearSolverSensSim", MSGCV_MEM_FAIL); return(CV_MEM_FAIL); } - cv_mem->ycorSim = N_VNewEmpty_SensWrapper(cv_mem->cv_Ns+1); + cv_mem->ycorSim = N_VNewEmpty_SensWrapper(cv_mem->cv_Ns+1, cv_mem->cv_sunctx); if (cv_mem->ycorSim == NULL) { N_VDestroy(cv_mem->zn0Sim); cvProcessError(cv_mem, CV_MEM_FAIL, "CVODES", @@ -161,7 +161,7 @@ int CVodeSetNonlinearSolverSensSim(void *cvode_mem, SUNNonlinearSolver NLS) return(CV_MEM_FAIL); } - cv_mem->ewtSim = N_VNewEmpty_SensWrapper(cv_mem->cv_Ns+1); + cv_mem->ewtSim = N_VNewEmpty_SensWrapper(cv_mem->cv_Ns+1, cv_mem->cv_sunctx); if (cv_mem->ewtSim == NULL) { N_VDestroy(cv_mem->zn0Sim); N_VDestroy(cv_mem->ycorSim); @@ -187,6 +187,48 @@ int CVodeSetNonlinearSolverSensSim(void *cvode_mem, SUNNonlinearSolver NLS) /* Reset the acnrmcur flag to SUNFALSE */ cv_mem->cv_acnrmcur = SUNFALSE; + /* Set the nonlinear system RHS function */ + if (!(cv_mem->cv_f)) { + cvProcessError(cv_mem, CV_ILL_INPUT, "CVODES", + "CVodeSetNonlinearSolverSensSim", + "The ODE RHS function is NULL"); + return(CV_ILL_INPUT); + } + cv_mem->nls_f = cv_mem->cv_f; + + return(CV_SUCCESS); +} + + +/*--------------------------------------------------------------- + CVodeGetNonlinearSystemDataSens: + + This routine provides access to the relevant data needed to + compute the nonlinear system function. + ---------------------------------------------------------------*/ +int CVodeGetNonlinearSystemDataSens(void *cvode_mem, realtype *tcur, + N_Vector **ySpred, N_Vector **ySn, + realtype *gamma, realtype *rl1, + N_Vector **znS1, void **user_data) +{ + CVodeMem cv_mem; + + if (cvode_mem == NULL) { + cvProcessError(NULL, CV_MEM_NULL, "CVODES", + "CVodeGetNonlinearSystemDataSens", MSGCV_NO_MEM); + return(CV_MEM_NULL); + } + + cv_mem = (CVodeMem) cvode_mem; + + *tcur = cv_mem->cv_tn; + *ySpred = cv_mem->cv_znS[0]; + *ySn = cv_mem->cv_yS; + *gamma = cv_mem->cv_gamma; + *rl1 = cv_mem->cv_rl1; + *znS1 = cv_mem->cv_znS[1]; + *user_data = cv_mem->cv_user_data; + return(CV_SUCCESS); } @@ -418,8 +460,8 @@ static int cvNlsResidualSensSim(N_Vector ycorSim, N_Vector resSim, void* cvode_m N_VLinearSum(ONE, cv_mem->cv_zn[0], ONE, ycor, cv_mem->cv_y); /* evaluate the rhs function */ - retval = cv_mem->cv_f(cv_mem->cv_tn, cv_mem->cv_y, cv_mem->cv_ftemp, - cv_mem->cv_user_data); + retval = cv_mem->nls_f(cv_mem->cv_tn, cv_mem->cv_y, cv_mem->cv_ftemp, + cv_mem->cv_user_data); cv_mem->cv_nfe++; if (retval < 0) return(CV_RHSFUNC_FAIL); if (retval > 0) return(RHSFUNC_RECVR); @@ -482,8 +524,8 @@ static int cvNlsFPFunctionSensSim(N_Vector ycorSim, N_Vector resSim, void* cvode N_VLinearSum(ONE, cv_mem->cv_zn[0], ONE, ycor, cv_mem->cv_y); /* evaluate the rhs function */ - retval = cv_mem->cv_f(cv_mem->cv_tn, cv_mem->cv_y, res, - cv_mem->cv_user_data); + retval = cv_mem->nls_f(cv_mem->cv_tn, cv_mem->cv_y, res, + cv_mem->cv_user_data); cv_mem->cv_nfe++; if (retval < 0) return(CV_RHSFUNC_FAIL); if (retval > 0) return(RHSFUNC_RECVR); diff --git a/src/lib/cvodes/cvodes_nls_stg.c b/src/lib/cvodes/cvodes_nls_stg.c index 7d3956a..eb50136 100644 --- a/src/lib/cvodes/cvodes_nls_stg.c +++ b/src/lib/cvodes/cvodes_nls_stg.c @@ -2,7 +2,7 @@ * Programmer(s): David J. Gardner @ LLNL * ----------------------------------------------------------------------------- * SUNDIALS Copyright Start - * Copyright (c) 2002-2019, Lawrence Livermore National Security + * Copyright (c) 2002-2022, Lawrence Livermore National Security * and Southern Methodist University. * All rights reserved. * @@ -137,14 +137,14 @@ int CVodeSetNonlinearSolverSensStg(void *cvode_mem, SUNNonlinearSolver NLS) /* create vector wrappers if necessary */ if (cv_mem->stgMallocDone == SUNFALSE) { - cv_mem->zn0Stg = N_VNewEmpty_SensWrapper(cv_mem->cv_Ns); + cv_mem->zn0Stg = N_VNewEmpty_SensWrapper(cv_mem->cv_Ns, cv_mem->cv_sunctx); if (cv_mem->zn0Stg == NULL) { cvProcessError(cv_mem, CV_MEM_FAIL, "CVODES", "CVodeSetNonlinearSolverSensStg", MSGCV_MEM_FAIL); return(CV_MEM_FAIL); } - cv_mem->ycorStg = N_VNewEmpty_SensWrapper(cv_mem->cv_Ns); + cv_mem->ycorStg = N_VNewEmpty_SensWrapper(cv_mem->cv_Ns, cv_mem->cv_sunctx); if (cv_mem->ycorStg == NULL) { N_VDestroy(cv_mem->zn0Stg); cvProcessError(cv_mem, CV_MEM_FAIL, "CVODES", @@ -152,7 +152,7 @@ int CVodeSetNonlinearSolverSensStg(void *cvode_mem, SUNNonlinearSolver NLS) return(CV_MEM_FAIL); } - cv_mem->ewtStg = N_VNewEmpty_SensWrapper(cv_mem->cv_Ns); + cv_mem->ewtStg = N_VNewEmpty_SensWrapper(cv_mem->cv_Ns, cv_mem->cv_sunctx); if (cv_mem->ewtStg == NULL) { N_VDestroy(cv_mem->zn0Stg); N_VDestroy(cv_mem->ycorStg); diff --git a/src/lib/cvodes/cvodes_nls_stg1.c b/src/lib/cvodes/cvodes_nls_stg1.c index eb706fc..8ab17e6 100644 --- a/src/lib/cvodes/cvodes_nls_stg1.c +++ b/src/lib/cvodes/cvodes_nls_stg1.c @@ -2,7 +2,7 @@ * Programmer(s): David J. Gardner @ LLNL * ----------------------------------------------------------------------------- * SUNDIALS Copyright Start - * Copyright (c) 2002-2019, Lawrence Livermore National Security + * Copyright (c) 2002-2022, Lawrence Livermore National Security * and Southern Methodist University. * All rights reserved. * diff --git a/src/lib/cvodes/cvodes_proj.c b/src/lib/cvodes/cvodes_proj.c new file mode 100644 index 0000000..d99b2df --- /dev/null +++ b/src/lib/cvodes/cvodes_proj.c @@ -0,0 +1,477 @@ +/* --------------------------------------------------------------------------- + * Programmer(s): David J. Gardner @ LLNL + * --------------------------------------------------------------------------- + * Based on CPODES by Radu Serban @ LLNL + * --------------------------------------------------------------------------- + * SUNDIALS Copyright Start + * Copyright (c) 2002-2022, Lawrence Livermore National Security + * and Southern Methodist University. + * All rights reserved. + * + * See the top-level LICENSE and NOTICE files for details. + * + * SPDX-License-Identifier: BSD-3-Clause + * SUNDIALS Copyright End + * --------------------------------------------------------------------------- + * Implementation file for projections in CVODE. + * ---------------------------------------------------------------------------*/ + +#include +#include + +#include "sundials/sundials_math.h" +#include "cvodes_impl.h" + +/* Private constants */ +#define ZERO RCONST(0.0) /* real 0.0 */ +#define ONE RCONST(1.0) /* real 1.0 */ + +#define ONEPSM RCONST(1.000001) + +/* Private utility function prototypes */ +static int cvProjCreate(CVodeProjMem *proj_mem); +static int cvProjSetDefaults(CVodeProjMem proj_mem); +static int cvAccessProjMem(void* cvode_mem, const char *fname, + CVodeMem *cv_mem, CVodeProjMem *proj_mem); + + +/* =========================================================================== + * Exported Functions - projection initialization + * ===========================================================================*/ + +/* ----------------------------------------------------------------------------- + * CVodeSetProjFn sets a user defined projection function + * ---------------------------------------------------------------------------*/ +int CVodeSetProjFn(void *cvode_mem, CVProjFn pfun) +{ + int retval; + CVodeMem cv_mem; + CVodeProjMem proj_mem; + + /* Check the CVODE memory pointer */ + if (cvode_mem == NULL) + { + cvProcessError(NULL, CV_MEM_NULL, "CVODE", "CVodeSetProjFn", + MSG_CV_MEM_NULL); + return(CV_MEM_NULL); + } + cv_mem = (CVodeMem) cvode_mem; + + /* Check if the projection function is NULL */ + if (pfun == NULL) + { + cvProcessError(cv_mem, CV_ILL_INPUT, "CVODE", "CVodeSetProjFn", + "The projection function is NULL."); + return(CV_ILL_INPUT); + } + + /* Check for compatible method */ + if (cv_mem->cv_lmm != CV_BDF) + { + cvProcessError(cv_mem, CV_ILL_INPUT, "CVODE", "CVodeSetProjFn", + "Projection is only supported with BDF methods."); + return(CV_ILL_INPUT); + } + + /* Create the projection memory (if necessary) */ + retval = cvProjCreate(&(cv_mem->proj_mem)); + if (retval != CV_SUCCESS) + { + cvProcessError(cv_mem, CV_MEM_FAIL, "CVODE", "CVodeSetProjFn", + MSG_CV_MEM_FAIL); + return(CV_MEM_FAIL); + } + + /* Shortcut to projection memory */ + proj_mem = cv_mem->proj_mem; + + /* User-defined projection */ + proj_mem->internal_proj = SUNFALSE; + + /* Set the projection function */ + proj_mem->pfun = pfun; + + /* Enable projection */ + cv_mem->proj_enabled = SUNTRUE; + + return(CV_SUCCESS); +} + + +/* =========================================================================== + * Exported Functions - projection set function + * ===========================================================================*/ + + +int CVodeSetProjErrEst(void *cvode_mem, booleantype onoff) +{ + int retval; + CVodeMem cv_mem; + CVodeProjMem proj_mem; + + /* Access memory structures */ + retval = cvAccessProjMem(cvode_mem, "CVodeSetProjErrEst", + &cv_mem, &proj_mem); + if (retval != CV_SUCCESS) return(retval); + + /* Set projection error flag */ + proj_mem->err_proj = onoff; + + return(CV_SUCCESS); +} + + +int CVodeSetProjFrequency(void *cvode_mem, long int freq) +{ + int retval; + CVodeMem cv_mem; + CVodeProjMem proj_mem; + + /* Access memory structures */ + retval = cvAccessProjMem(cvode_mem, "CVodeSetProjFrequency", + &cv_mem, &proj_mem); + if (retval != CV_SUCCESS) return(retval); + + /* Set projection frequency */ + if (freq < 0) + { + /* Restore default */ + proj_mem->freq = 1; + cv_mem->proj_enabled = SUNTRUE; + } + else if (freq == 0) + { + /* Disable projection */ + proj_mem->freq = 0; + cv_mem->proj_enabled = SUNFALSE; + } + else + { + /* Enable projection at given frequency */ + proj_mem->freq = freq; + cv_mem->proj_enabled = SUNTRUE; + } + + return(CV_SUCCESS); +} + + +int CVodeSetMaxNumProjFails(void *cvode_mem, int max_fails) +{ + int retval; + CVodeMem cv_mem; + CVodeProjMem proj_mem; + + /* Access memory structures */ + retval = cvAccessProjMem(cvode_mem, "CVodeSetMaxNumProjFails", + &cv_mem, &proj_mem); + if (retval != CV_SUCCESS) return(retval); + + /* Set maximum number of projection failures in a step attempt */ + if (max_fails < 1) + { + /* Restore default */ + proj_mem->max_fails = PROJ_MAX_FAILS; + } + else + { + /* Update max number of fails */ + proj_mem->max_fails = max_fails; + } + + return(CV_SUCCESS); +} + + +int CVodeSetEpsProj(void *cvode_mem, realtype eps) +{ + int retval; + CVodeMem cv_mem; + CVodeProjMem proj_mem; + + /* Access memory structures */ + retval = cvAccessProjMem(cvode_mem, "CVodeSetEpsProj", + &cv_mem, &proj_mem); + if (retval != CV_SUCCESS) return(retval); + + /* Set the projection tolerance */ + if (eps <= ZERO) + { + /* Restore default */ + proj_mem->eps_proj = PROJ_EPS; + } + else + { + /* Update projection tolerance */ + proj_mem->eps_proj = eps; + } + + return(CV_SUCCESS); +} + + +int CVodeSetProjFailEta(void *cvode_mem, realtype eta) +{ + int retval; + CVodeMem cv_mem; + CVodeProjMem proj_mem; + + /* Access memory structures */ + retval = cvAccessProjMem(cvode_mem, "CVodeSetProjFailEta", + &cv_mem, &proj_mem); + if (retval != CV_SUCCESS) return(retval); + + /* Set the step size reduction factor for a projection failure */ + if ((eta <= ZERO) || (eta > ONE)) + { + /* Restore detault */ + proj_mem->eta_pfail = PROJ_FAIL_ETA; + } + else + { + /* Udpate the eta value */ + proj_mem->eta_pfail = PROJ_FAIL_ETA; + } + + return(CV_SUCCESS); +} + + +/* =========================================================================== + * Exported Functions - projection get functions + * ===========================================================================*/ + + +int CVodeGetNumProjEvals(void *cvode_mem, long int *nproj) +{ + int retval; + CVodeMem cv_mem; + CVodeProjMem proj_mem; + + /* Access memory structures */ + retval = cvAccessProjMem(cvode_mem, "CVodeGetNumProjectionEvals", + &cv_mem, &proj_mem); + if (retval != CV_SUCCESS) return(retval); + + /* Get number of projection evaluations */ + *nproj = proj_mem->nproj; + + return(CV_SUCCESS); +} + + +int CVodeGetNumProjFails(void *cvode_mem, long int *npfails) +{ + int retval; + CVodeMem cv_mem; + CVodeProjMem proj_mem; + + /* Access memory structures */ + retval = cvAccessProjMem(cvode_mem, "CVodeGetNumProjFails", + &cv_mem, &proj_mem); + if (retval != CV_SUCCESS) return(retval); + + /* Get number of projection fails */ + *npfails = proj_mem->npfails; + + return(CV_SUCCESS); +} + + +/* =========================================================================== + * Internal Functions + * ===========================================================================*/ + + +/* + * cvProjection + * + * For user supplied projection function, use ftemp as temporary storage + * for the current error estimate (acor) and use tempv to store the + * accumulated corection due to projection, acorP (tempv is not touched + * until it is potentially used in cvCompleteStep). + */ + +int cvDoProjection(CVodeMem cv_mem, int *nflagPtr, realtype saved_t, + int *npfailPtr) +{ + int retval; + N_Vector errP; + N_Vector acorP; + CVodeProjMem proj_mem; + + /* Access projection memory */ + if (cv_mem->proj_mem == NULL) { + cvProcessError(cv_mem, CV_PROJ_MEM_NULL, "CVODE", + "cvDoProjection", MSG_CV_PROJ_MEM_NULL); + return(CV_PROJ_MEM_NULL); + } + proj_mem = cv_mem->proj_mem; + + /* Initialize return flag to success */ + retval = CV_SUCCESS; + + /* Use tempv to store acorP and, if projecting the error, ftemp to store + errP (recall that in this case we did not allocate vectors to for + acorP and errP). */ + acorP = cv_mem->cv_tempv; + if (proj_mem->err_proj) + errP = cv_mem->cv_ftemp; + else + errP = NULL; + + /* Copy acor into errP (if projecting the error) */ + if (proj_mem->err_proj) N_VScale(ONE, cv_mem->cv_acor, errP); + + /* Call the user projection function */ + retval = proj_mem->pfun(cv_mem->cv_tn, cv_mem->cv_y, acorP, + proj_mem->eps_proj, errP, cv_mem->cv_user_data); + proj_mem->nproj++; + + /* This is not the first projection anymore */ + proj_mem->first_proj = SUNFALSE; + + /* Check the return value */ + if (retval == CV_SUCCESS) + { + /* Recompute acnrm to be used in error test (if projecting the error) */ + if (proj_mem->err_proj) + cv_mem->cv_acnrm = N_VWrmsNorm(errP, cv_mem->cv_ewt); + + /* The projection was successful, return now */ + cv_mem->proj_applied = SUNTRUE; + return(CV_SUCCESS); + } + + /* The projection failed, update the return value */ + if (retval < 0) retval = CV_PROJFUNC_FAIL; + if (retval > 0) retval = PROJFUNC_RECVR; + + /* Increment cumulative failure count and restore zn */ + proj_mem->npfails++; + cvRestore(cv_mem, saved_t); + + /* Return if failed unrecoverably */ + if (retval == CV_PROJFUNC_FAIL) return(CV_PROJFUNC_FAIL); + + /* Recoverable failure, increment failure count for this step attempt */ + (*npfailPtr)++; + cv_mem->cv_etamax = ONE; + + /* Check for maximum number of failures or |h| = hmin */ + if ((SUNRabs(cv_mem->cv_h) <= cv_mem->cv_hmin * ONEPSM) || + (*npfailPtr == proj_mem->max_fails)) + { + if (retval == PROJFUNC_RECVR) return(CV_REPTD_PROJFUNC_ERR); + } + + /* Reduce step size; return to reattempt the step */ + cv_mem->cv_eta = SUNMAX(proj_mem->eta_pfail, + cv_mem->cv_hmin / SUNRabs(cv_mem->cv_h)); + *nflagPtr = PREV_PROJ_FAIL; + cvRescale(cv_mem); + + return(PREDICT_AGAIN); +} + + +int cvProjInit(CVodeProjMem proj_mem) +{ + /* check if projection memory exists */ + if (proj_mem == NULL) return(CV_PROJ_MEM_NULL); + + /* reset flags and counters */ + proj_mem->first_proj = SUNTRUE; + proj_mem->nstlprj = 0; + proj_mem->nproj = 0; + proj_mem->npfails = 0; + + return(CV_SUCCESS); +} + + +int cvProjFree(CVodeProjMem *proj_mem) +{ + if (*proj_mem == NULL) return(CV_SUCCESS); + + free(*proj_mem); + *proj_mem = NULL; + + return(CV_SUCCESS); +} + + +/* =========================================================================== + * Utility Functions + * ===========================================================================*/ + +static int cvProjCreate(CVodeProjMem *proj_mem) +{ + int retval; + + /* Allocate projection memory if necessary, otherwise return success */ + if (*proj_mem == NULL) + { + *proj_mem = (CVodeProjMem) malloc(sizeof(struct CVodeProjMemRec)); + if (*proj_mem == NULL) return(CV_MEM_FAIL); + + /* Zero out proj_mem */ + memset(*proj_mem, 0, sizeof(struct CVodeProjMemRec)); + + /* Initialize projection variables */ + retval = cvProjSetDefaults(*proj_mem); + if (retval != CV_SUCCESS) return(retval); + } + + return(CV_SUCCESS); +} + + +static int cvProjSetDefaults(CVodeProjMem proj_mem) +{ + if (proj_mem == NULL) return(CV_MEM_FAIL); + + proj_mem->internal_proj = SUNTRUE; + proj_mem->err_proj = SUNTRUE; + proj_mem->first_proj = SUNTRUE; + + proj_mem->freq = 1; + proj_mem->nstlprj = 0; + + proj_mem->max_fails = PROJ_MAX_FAILS; + + proj_mem->pfun = NULL; + + proj_mem->eps_proj = PROJ_EPS; + proj_mem->eta_pfail = PROJ_FAIL_ETA; + + proj_mem->nproj = 0; + proj_mem->npfails = 0; + + return(CV_SUCCESS); +} + + +static int cvAccessProjMem(void* cvode_mem, const char *fname, + CVodeMem *cv_mem, CVodeProjMem *proj_mem) +{ + /* Access cvode memory */ + if (cvode_mem == NULL) + { + cvProcessError(NULL, CV_MEM_NULL, "CVODE", + fname, MSG_CV_MEM_NULL); + return(CV_MEM_NULL); + } + *cv_mem = (CVodeMem) cvode_mem; + + /* Access projection memory */ + if ((*cv_mem)->proj_mem == NULL) + { + cvProcessError(*cv_mem, CV_PROJ_MEM_NULL, "CVODE", + fname, MSG_CV_PROJ_MEM_NULL); + return(CV_PROJ_MEM_NULL); + } + *proj_mem = (CVodeProjMem) (*cv_mem)->proj_mem; + + return(CV_SUCCESS); +} diff --git a/src/lib/cvodes/cvodes_proj_impl.h b/src/lib/cvodes/cvodes_proj_impl.h new file mode 100644 index 0000000..e87ff48 --- /dev/null +++ b/src/lib/cvodes/cvodes_proj_impl.h @@ -0,0 +1,75 @@ +/* ----------------------------------------------------------------------------- + * Programmer(s): David J. Gardner @ LLNL + * ----------------------------------------------------------------------------- + * Based on CPODES by Radu Serban @ LLNL + * ----------------------------------------------------------------------------- + * SUNDIALS Copyright Start + * Copyright (c) 2002-2022, Lawrence Livermore National Security + * and Southern Methodist University. + * All rights reserved. + * + * See the top-level LICENSE and NOTICE files for details. + * + * SPDX-License-Identifier: BSD-3-Clause + * SUNDIALS Copyright End + * ----------------------------------------------------------------------------- + * Implementation header file for projections in CVODE. + * ---------------------------------------------------------------------------*/ + +#ifndef _CVODE_PROJ_IMPL_H +#define _CVODE_PROJ_IMPL_H + +#include "cvodes/cvodes.h" + +#ifdef __cplusplus /* wrapper to enable C++ usage */ +extern "C" { +#endif + +/* ============================================================================= + * Default Projection Constants + * + * PROJ_MAX_FAILS max nunmber of projection failures in one step attempt + * PROJ_EPS projection solve tolerance + * PROJ_FAIL_ETA maximum step size decrease on projection failure + * ===========================================================================*/ + +#define PROJ_MAX_FAILS 10 +#define PROJ_EPS RCONST(0.1) +#define PROJ_FAIL_ETA RCONST(0.25) + +/* ============================================================================= + * Projection Data Structure + * ===========================================================================*/ + +/* ----------------------------------------------------------------------------- + * Types : struct CVodeProjMemRec, CVodeProjMem + * ----------------------------------------------------------------------------- + * The type CVodeProjMem is type pointer to struct CVodeProjMemRec. This + * structure contains data pertaining to the use of projection capabilities. + * ---------------------------------------------------------------------------*/ +typedef struct CVodeProjMemRec { + + booleantype internal_proj; /* use the internal projection algorithm? */ + booleantype err_proj; /* is error projection enabled? */ + booleantype first_proj; /* is this the first time we project? */ + + long int freq; /* projection frequency */ + long int nstlprj; /* step number of last projection */ + + int max_fails; /* maximum number of projection failures */ + + CVProjFn pfun; /* function to perform projection */ + + realtype eps_proj; /* projection solve tolerance */ + realtype eta_pfail; /* projection failure step reduction factor */ + + long int nproj; /* number of projections performed */ + long int npfails; /* number of projection failures */ + +} *CVodeProjMem; + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/lib/cvodes/cvodes_spils.c b/src/lib/cvodes/cvodes_spils.c index 0ce3a9a..1ae7e7e 100644 --- a/src/lib/cvodes/cvodes_spils.c +++ b/src/lib/cvodes/cvodes_spils.c @@ -2,7 +2,7 @@ * Programmer(s): Daniel R. Reynolds @ SMU * ----------------------------------------------------------------- * SUNDIALS Copyright Start - * Copyright (c) 2002-2019, Lawrence Livermore National Security + * Copyright (c) 2002-2022, Lawrence Livermore National Security * and Southern Methodist University. * All rights reserved. * diff --git a/src/lib/nvector/hip/VectorArrayKernels.hip.hpp b/src/lib/nvector/hip/VectorArrayKernels.hip.hpp new file mode 100644 index 0000000..b2029ac --- /dev/null +++ b/src/lib/nvector/hip/VectorArrayKernels.hip.hpp @@ -0,0 +1,226 @@ +/* + * ----------------------------------------------------------------- + * Programmer(s): David Gardner, Cody J. Balos @ LLNL + * ----------------------------------------------------------------- + * SUNDIALS Copyright Start + * Copyright (c) 2002-2022, Lawrence Livermore National Security + * and Southern Methodist University. + * All rights reserved. + * + * See the top-level LICENSE and NOTICE files for details. + * + * SPDX-License-Identifier: BSD-3-Clause + * SUNDIALS Copyright End + * ----------------------------------------------------------------- + */ + + +#ifndef _NVECTOR_HIP_ARRAY_KERNELS_CUH_ +#define _NVECTOR_HIP_ARRAY_KERNELS_CUH_ + +#include +#include "sundials_hip_kernels.hip.hpp" + +namespace sundials +{ +namespace hip +{ +namespace impl +{ + +/* + * ----------------------------------------------------------------------------- + * fused vector operation kernels + * ----------------------------------------------------------------------------- + */ + +/* + * Computes the linear combination of nv vectors + */ +template +__global__ void +linearCombinationKernel(int nv, T* c, T** xd, T* zd, I n) +{ + GRID_STRIDE_XLOOP(I, i, n) + { + zd[i] = c[0]*xd[0][i]; + for (int j=1; j +__global__ void +scaleAddMultiKernel(int nv, T* c, T* xd, T** yd, T** zd, I n) +{ + GRID_STRIDE_XLOOP(I, i, n) + { + for (int j=0; j class GridReducer> +__global__ void +dotProdMultiKernel(int nv, const T* xd, T** yd, T* out, I n) +{ + // REQUIRES nv blocks (i.e. gridDim.x == nv) + using op = sundials::reductions::impl::plus; + constexpr T Id = op::identity(); + const I k = blockIdx.x; + + // Initialize to zero. + T sum = Id; + for (I i = threadIdx.x; i < n; i += blockDim.x) + { // each thread computes n/blockDim.x elements + sum += xd[i] * yd[k][i]; + } + GridReducer{}(sum, Id, &out[k], nullptr); +} + + +/* + * ----------------------------------------------------------------------------- + * vector array operation kernels + * ----------------------------------------------------------------------------- + */ + + +/* + * Computes the linear sum of multiple vectors + */ +template +__global__ void +linearSumVectorArrayKernel(int nv, T a, T** xd, T b, T** yd, T** zd, I n) +{ + GRID_STRIDE_XLOOP(I, i, n) + { + for (int j=0; j +__global__ void +scaleVectorArrayKernel(int nv, T* c, T** xd, T** zd, I n) +{ + GRID_STRIDE_XLOOP(I, i, n) + { + for (int j=0; j +__global__ void +constVectorArrayKernel(int nv, T c, T** zd, I n) +{ + GRID_STRIDE_XLOOP(I, i, n) + { + for (int j=0; j class GridReducer> +__global__ void +wL2NormSquareVectorArrayKernel(int nv, T** xd, T** wd, T* out, I n) +{ + // REQUIRES nv blocks (i.e. gridDim.x == nv) + using op = sundials::reductions::impl::plus; + constexpr T Id = op::identity(); + const I k = blockIdx.x; + + // Initialize to zero. + T sum = 0.0; + for (I i = threadIdx.x; i < n; i += blockDim.x) + { // each thread computes n/blockDim.x elements + sum += xd[k][i] * wd[k][i] * xd[k][i] * wd[k][i]; + } + GridReducer{}(sum, Id, &out[k], nullptr); +} + + +/* + * Masked WRMS norm of nv vectors. + * + */ +template class GridReducer> +__global__ void +wL2NormSquareMaskVectorArrayKernel(int nv, T** xd, T** wd, T* id, T* out, I n) +{ + // REQUIRES nv blocks (i.e. gridDim.x == nv) + using op = sundials::reductions::impl::plus; + constexpr T Id = op::identity(); + const I k = blockIdx.x; + + // Initialize to zero. + T sum = 0.0; + for (I i = threadIdx.x; i < n; i += blockDim.x) + { // each thread computes n/blockDim.x elements + if (id[i] > 0.0) sum += xd[k][i] * wd[k][i] * xd[k][i] * wd[k][i]; + } + GridReducer{}(sum, Id, &out[k], nullptr); +} + + +/* + * Computes the scaled sum of a vector array with multiple other vector arrays + */ +template +__global__ void +scaleAddMultiVectorArrayKernel(int nv, int ns, T* c, T** xd, T** yd, T** zd, I n) +{ + GRID_STRIDE_XLOOP(I, i, n) + { + for (int k=0; k +__global__ void +linearCombinationVectorArrayKernel(int nv, int ns, T* c, T** xd, T** zd, I n) +{ + GRID_STRIDE_XLOOP(I, i, n) + { + for (int k=0; k +#include "sundials_hip_kernels.hip.hpp" + +namespace sundials +{ +namespace hip +{ +namespace impl +{ + +/* + * Sets all elements of the vector X to constant value a. + * + */ + +template +__global__ void +setConstKernel(T a, T *X, I n) +{ + GRID_STRIDE_XLOOP(I, i, n) + { + X[i] = a; + } +} + + +/* + * Computes linear sum (combination) of two vectors. + * + */ + +template +__global__ void +linearSumKernel(T a, const T *X, T b, const T *Y, T *Z, I n) +{ + GRID_STRIDE_XLOOP(I, i, n) + { + Z[i] = a*X[i] + b*Y[i]; + } +} + + +/* + * Elementwise product of two vectors. + * + */ + +template +__global__ void +prodKernel(const T *X, const T *Y, T *Z, I n) +{ + GRID_STRIDE_XLOOP(I, i, n) + { + Z[i] = X[i]*Y[i]; + } +} + + +/* + * Elementwise division of two vectors. + * + */ + +template +__global__ void +divKernel(const T *X, const T *Y, T *Z, I n) +{ + GRID_STRIDE_XLOOP(I, i, n) + { + Z[i] = X[i]/Y[i]; + } +} + + +/* + * Scale vector with scalar value 'a'. + * + */ + +template +__global__ void +scaleKernel(T a, const T *X, T *Z, I n) +{ + GRID_STRIDE_XLOOP(I, i, n) + { + Z[i] = a*X[i]; + } +} + + +/* + * Stores absolute values of vector X elements into vector Z. + * + */ + +template +__global__ void +absKernel(const T *X, T *Z, I n) +{ + GRID_STRIDE_XLOOP(I, i, n) + { + Z[i] = abs(X[i]); + } +} + + +/* + * Elementwise inversion. + * + */ + +template +__global__ void +invKernel(const T *X, T *Z, I n) +{ + GRID_STRIDE_XLOOP(I, i, n) + { + Z[i] = 1.0/(X[i]); + } +} + + +/* + * Add constant 'c' to each vector element. + * + */ + +template +__global__ void +addConstKernel(T a, const T *X, T *Z, I n) +{ + GRID_STRIDE_XLOOP(I, i, n) + { + Z[i] = a + X[i]; + } +} + + +/* + * Compare absolute values of vector 'X' with constant 'c'. + * + */ + +template +__global__ void +compareKernel(T c, const T *X, T *Z, I n) +{ + GRID_STRIDE_XLOOP(I, i, n) + { + Z[i] = (abs(X[i]) >= c) ? 1.0 : 0.0; + } +} + + +/* + * Dot product of two vectors. + * + */ +template class GridReducer> +__global__ void +dotProdKernel(const T *x, const T *y, T *out, I n, unsigned int *device_count) +{ + using op = sundials::reductions::impl::plus; + const T Id = op::identity(); + + T sum = Id; + GRID_STRIDE_XLOOP(I, i, n) + { + sum += x[i] * y[i]; + } + GridReducer{}(sum, Id, out, device_count); +} + + +/* + * Finds max norm the vector. + * + */ +template class GridReducer> +__global__ void +maxNormKernel(const T *x, T *out, I n, unsigned int* device_count) +{ + using op = sundials::reductions::impl::maximum; + const T Id = op::identity(); + + T maximum = Id; + GRID_STRIDE_XLOOP(I, i, n) + { + maximum = max(abs(x[i]), maximum); + } + GridReducer{}(maximum, Id, out, device_count); +} + + +/* + * Weighted L2 norm squared. + * + */ +template class GridReducer> +__global__ void +wL2NormSquareKernel(const T *x, const T *w, T *out, I n, unsigned int* device_count) +{ + using op = sundials::reductions::impl::plus; + const T Id = op::identity(); + + T sum = Id; + GRID_STRIDE_XLOOP(I, i, n) + { + sum += x[i] * w[i] * x[i] * w[i]; + } + GridReducer{}(sum, Id, out, device_count); +} + +/* + * Weighted L2 norm squared with mask. Vector id specifies the mask. + * + */ +template class GridReducer> +__global__ void +wL2NormSquareMaskKernel(const T *x, const T *w, const T *id, T *out, I n, unsigned int* device_count) +{ + using op = sundials::reductions::impl::plus; + const T Id = op::identity(); + + T sum = Id; + GRID_STRIDE_XLOOP(I, i, n) + { + if(id[i] > 0.0) sum += x[i] * w[i] * x[i] * w[i]; + } + GridReducer{}(sum, Id, out, device_count); +} + + +/* + * Finds min value in the vector. + * + */ +template class GridReducer> +__global__ void +findMinKernel(T MAX_VAL, const T *x, T *out, I n, unsigned int* device_count) +{ + using op = sundials::reductions::impl::minimum; + const T Id = op::identity(); + + T minimum = Id; + GRID_STRIDE_XLOOP(I, i, n) + { + minimum = min(x[i], minimum); + } + GridReducer{}(minimum, Id, out, device_count); +} + + +/* + * Computes L1 norm of vector + * + */ +template class GridReducer> +__global__ void +L1NormKernel(const T *x, T *out, I n, unsigned int* device_count) +{ + using op = sundials::reductions::impl::plus; + const T Id = op::identity(); + + T sum = Id; + GRID_STRIDE_XLOOP(I, i, n) + { + sum += abs(x[i]); + } + GridReducer{}(sum, Id, out, device_count); +} + +/* + * Vector inverse z[i] = 1/x[i] with check for zeros. Reduction is performed + * to flag the result if any x[i] = 0. + * + */ +template class GridReducer> +__global__ void +invTestKernel(const T *x, T *z, T *out, I n, unsigned int* device_count) +{ + using op = sundials::reductions::impl::plus; + const T Id = op::identity(); + + T flag = Id; + GRID_STRIDE_XLOOP(I, i, n) + { + if (x[i] == static_cast(0.0)) + flag += 1.0; + else + z[i] = 1.0/x[i]; + } + GridReducer{}(flag, Id, out, device_count); +} + + +/* + * Checks if inequality constraints are satisfied. Constraint check + * results are stored in vector 'm'. A sum reduction over all elements + * of 'm' is performed to find if any of the constraints is violated. + * If all constraints are satisfied sum == 0. + * + */ +template class GridReducer> +__global__ void +constrMaskKernel(const T *c, const T *x, T *m, T *out, I n, unsigned int* device_count) +{ + using op = sundials::reductions::impl::plus; + const T Id = op::identity(); + + T sum = Id; + GRID_STRIDE_XLOOP(I, i, n) + { + // test = true if constraints violated + bool test = (std::abs(c[i]) > 1.5 && c[i]*x[i] <= 0.0) || + (std::abs(c[i]) > 0.5 && c[i]*x[i] < 0.0); + m[i] = test ? 1.0 : 0.0; + sum = m[i]; + } + GridReducer{}(sum, Id, out, device_count); +} + + +/* + * Finds minimum component-wise quotient. + * + */ +template class GridReducer> +__global__ void +minQuotientKernel(const T MAX_VAL, const T *num, const T *den, T *min_quotient, I n, unsigned int* device_count) +{ + using op = sundials::reductions::impl::minimum; + const T Id = op::identity(); + + T minimum = Id; + T quotient = 0.0; + GRID_STRIDE_XLOOP(I, i, n) + { + quotient = (den[i] == static_cast(0.0)) ? Id : num[i]/den[i]; + minimum = min(quotient, minimum); + } + GridReducer{}(minimum, Id, min_quotient, device_count); +} + +} // namespace impl +} // namespace hip +} // namespace sundials + +#endif // _NVECTOR_HIP_KERNELS_CUH_ diff --git a/src/lib/nvector/hip/nvector_hip.hip.cpp b/src/lib/nvector/hip/nvector_hip.hip.cpp new file mode 100644 index 0000000..a5258b1 --- /dev/null +++ b/src/lib/nvector/hip/nvector_hip.hip.cpp @@ -0,0 +1,2559 @@ +/* ----------------------------------------------------------------- + * Programmer(s): Daniel McGreer, and Cody J. Balos @ LLNL + * ----------------------------------------------------------------- + * SUNDIALS Copyright Start + * Copyright (c) 2002-2022, Lawrence Livermore National Security + * and Southern Methodist University. + * All rights reserved. + * + * See the top-level LICENSE and NOTICE files for details. + * + * SPDX-License-Identifier: BSD-3-Clause + * SUNDIALS Copyright End + * ----------------------------------------------------------------- + * This is the implementation file for a HIP implementation + * of the NVECTOR package. + * -----------------------------------------------------------------*/ + +#include +#include +#include +#include +#include + +#include +#include "VectorArrayKernels.hip.hpp" +#include "VectorKernels.hip.hpp" + +#include "sundials_hip.h" +#include "sundials_debug.h" + +#define ZERO RCONST(0.0) +#define HALF RCONST(0.5) + +using namespace sundials; +using namespace sundials::hip; +using namespace sundials::hip::impl; + +/* + * Private function definitions + */ + +// Allocate vector data +static int AllocateData(N_Vector v); + +// Reduction buffer functions +static int InitializeDeviceCounter(N_Vector v); +static int FreeDeviceCounter(N_Vector v); +static int InitializeReductionBuffer(N_Vector v, realtype value, size_t n = 1); +static void FreeReductionBuffer(N_Vector v); +static int CopyReductionBufferFromDevice(N_Vector v, size_t n = 1); + +// Kernel launch parameters +static int GetKernelParameters(N_Vector v, booleantype reduction, size_t& grid, size_t& block, + size_t& shMemSize, hipStream_t& stream, size_t n = 0); +static int GetKernelParameters(N_Vector v, booleantype reduction, size_t& grid, size_t& block, + size_t& shMemSize, hipStream_t& stream, bool& atomic, size_t n = 0); +static void PostKernelLaunch(); + +/* + * Macro definitions + */ + +// Macros to access vector content +#define NVEC_HIP_CONTENT(x) ((N_VectorContent_Hip)(x->content)) +#define NVEC_HIP_MEMSIZE(x) (NVEC_HIP_CONTENT(x)->length * sizeof(realtype)) +#define NVEC_HIP_MEMHELP(x) (NVEC_HIP_CONTENT(x)->mem_helper) +#define NVEC_HIP_HDATAp(x) ((realtype*) NVEC_HIP_CONTENT(x)->host_data->ptr) +#define NVEC_HIP_DDATAp(x) ((realtype*) NVEC_HIP_CONTENT(x)->device_data->ptr) +#define NVEC_HIP_STREAM(x) (NVEC_HIP_CONTENT(x)->stream_exec_policy->stream()) + +// Macros to access vector private content +#define NVEC_HIP_PRIVATE(x) ((N_PrivateVectorContent_Hip)(NVEC_HIP_CONTENT(x)->priv)) +#define NVEC_HIP_HBUFFERp(x) ((realtype*) NVEC_HIP_PRIVATE(x)->reduce_buffer_host->ptr) +#define NVEC_HIP_DBUFFERp(x) ((realtype*) NVEC_HIP_PRIVATE(x)->reduce_buffer_dev->ptr) +#define NVEC_HIP_DCOUNTERp(x) ((unsigned int*) NVEC_HIP_PRIVATE(x)->device_counter->ptr) + +/* + * Private structure definition + */ + +struct _N_PrivateVectorContent_Hip +{ + booleantype use_managed_mem; /* indicates if the data pointers and buffer pointers are managed memory */ + size_t reduce_buffer_allocated_bytes; /* current size of the reduction buffer */ + SUNMemory reduce_buffer_dev; /* device buffer used for reductions */ + SUNMemory reduce_buffer_host; /* host buffer used for reductions */ + SUNMemory device_counter; /* device memory for a counter (used in LDS reductions) */ +}; + +typedef struct _N_PrivateVectorContent_Hip *N_PrivateVectorContent_Hip; + +/* Default policies to clone */ +ThreadDirectExecPolicy DEFAULT_STREAMING_EXECPOLICY(512); +BlockReduceExecPolicy DEFAULT_REDUCTION_EXECPOLICY(512); + +extern "C" { + +N_Vector N_VNewEmpty_Hip(SUNContext sunctx) +{ + N_Vector v; + + /* Create vector */ + v = NULL; + v = N_VNewEmpty(sunctx); + if (v == NULL) return(NULL); + + /* Attach operations */ + + /* constructors, destructors, and utility operations */ + v->ops->nvgetvectorid = N_VGetVectorID_Hip; + v->ops->nvclone = N_VClone_Hip; + v->ops->nvcloneempty = N_VCloneEmpty_Hip; + v->ops->nvdestroy = N_VDestroy_Hip; + v->ops->nvspace = N_VSpace_Hip; + v->ops->nvgetlength = N_VGetLength_Hip; + v->ops->nvgetarraypointer = N_VGetHostArrayPointer_Hip; + v->ops->nvgetdevicearraypointer = N_VGetDeviceArrayPointer_Hip; + v->ops->nvsetarraypointer = N_VSetHostArrayPointer_Hip; + + /* standard vector operations */ + v->ops->nvlinearsum = N_VLinearSum_Hip; + v->ops->nvconst = N_VConst_Hip; + v->ops->nvprod = N_VProd_Hip; + v->ops->nvdiv = N_VDiv_Hip; + v->ops->nvscale = N_VScale_Hip; + v->ops->nvabs = N_VAbs_Hip; + v->ops->nvinv = N_VInv_Hip; + v->ops->nvaddconst = N_VAddConst_Hip; + v->ops->nvdotprod = N_VDotProd_Hip; + v->ops->nvmaxnorm = N_VMaxNorm_Hip; + v->ops->nvmin = N_VMin_Hip; + v->ops->nvl1norm = N_VL1Norm_Hip; + v->ops->nvinvtest = N_VInvTest_Hip; + v->ops->nvconstrmask = N_VConstrMask_Hip; + v->ops->nvminquotient = N_VMinQuotient_Hip; + v->ops->nvwrmsnormmask = N_VWrmsNormMask_Hip; + v->ops->nvwrmsnorm = N_VWrmsNorm_Hip; + v->ops->nvwl2norm = N_VWL2Norm_Hip; + v->ops->nvcompare = N_VCompare_Hip; + + /* fused and vector array operations are disabled (NULL) by default */ + + /* local reduction operations */ + v->ops->nvdotprodlocal = N_VDotProd_Hip; + v->ops->nvmaxnormlocal = N_VMaxNorm_Hip; + v->ops->nvminlocal = N_VMin_Hip; + v->ops->nvl1normlocal = N_VL1Norm_Hip; + v->ops->nvinvtestlocal = N_VInvTest_Hip; + v->ops->nvconstrmasklocal = N_VConstrMask_Hip; + v->ops->nvminquotientlocal = N_VMinQuotient_Hip; + v->ops->nvwsqrsumlocal = N_VWSqrSumLocal_Hip; + v->ops->nvwsqrsummasklocal = N_VWSqrSumMaskLocal_Hip; + + /* single buffer reduction operations */ + v->ops->nvdotprodmultilocal = N_VDotProdMulti_Hip; + + /* XBraid interface operations */ + v->ops->nvbufsize = N_VBufSize_Hip; + v->ops->nvbufpack = N_VBufPack_Hip; + v->ops->nvbufunpack = N_VBufUnpack_Hip; + + /* print operation for debugging */ + v->ops->nvprint = N_VPrint_Hip; + v->ops->nvprintfile = N_VPrintFile_Hip; + + /* Create content */ + + v->content = (N_VectorContent_Hip) malloc(sizeof(_N_VectorContent_Hip)); + if (v->content == NULL) + { + N_VDestroy(v); + return(NULL); + } + + NVEC_HIP_CONTENT(v)->priv = malloc(sizeof(_N_PrivateVectorContent_Hip)); + if (NVEC_HIP_CONTENT(v)->priv == NULL) + { + N_VDestroy(v); + return(NULL); + } + + // Initialize content + NVEC_HIP_CONTENT(v)->length = 0; + NVEC_HIP_CONTENT(v)->host_data = NULL; + NVEC_HIP_CONTENT(v)->device_data = NULL; + NVEC_HIP_CONTENT(v)->stream_exec_policy = NULL; + NVEC_HIP_CONTENT(v)->reduce_exec_policy = NULL; + NVEC_HIP_CONTENT(v)->mem_helper = NULL; + NVEC_HIP_CONTENT(v)->own_helper = SUNFALSE; + + // Initialize private content + NVEC_HIP_PRIVATE(v)->use_managed_mem = SUNFALSE; + NVEC_HIP_PRIVATE(v)->reduce_buffer_dev = NULL; + NVEC_HIP_PRIVATE(v)->reduce_buffer_host = NULL; + NVEC_HIP_PRIVATE(v)->device_counter = NULL; + NVEC_HIP_PRIVATE(v)->reduce_buffer_allocated_bytes = 0; + + return(v); +} + +N_Vector N_VNew_Hip(sunindextype length, SUNContext sunctx) +{ + N_Vector v; + + v = NULL; + v = N_VNewEmpty_Hip(sunctx); + if (v == NULL) return(NULL); + + NVEC_HIP_CONTENT(v)->length = length; + NVEC_HIP_CONTENT(v)->mem_helper = SUNMemoryHelper_Hip(sunctx); + NVEC_HIP_CONTENT(v)->stream_exec_policy = DEFAULT_STREAMING_EXECPOLICY.clone(); + NVEC_HIP_CONTENT(v)->reduce_exec_policy = DEFAULT_REDUCTION_EXECPOLICY.clone(); + NVEC_HIP_CONTENT(v)->own_helper = SUNTRUE; + NVEC_HIP_PRIVATE(v)->use_managed_mem = SUNFALSE; + + if (NVEC_HIP_MEMHELP(v) == NULL) + { + SUNDIALS_DEBUG_PRINT("ERROR in N_VNew_Hip: memory helper is NULL\n"); + N_VDestroy(v); + return(NULL); + } + + if (AllocateData(v)) + { + SUNDIALS_DEBUG_PRINT("ERROR in N_VNew_Hip: AllocateData returned nonzero\n"); + N_VDestroy(v); + return(NULL); + } + + return(v); +} + +N_Vector N_VNewWithMemHelp_Hip(sunindextype length, booleantype use_managed_mem, SUNMemoryHelper helper, SUNContext sunctx) +{ + N_Vector v; + + if (helper == NULL) + { + SUNDIALS_DEBUG_PRINT("ERROR in N_VNewWithMemHelp_Hip: helper is NULL\n"); + return(NULL); + } + + if (!SUNMemoryHelper_ImplementsRequiredOps(helper)) + { + SUNDIALS_DEBUG_PRINT("ERROR in N_VNewWithMemHelp_Hip: helper doesn't implement all required ops\n"); + return(NULL); + } + + v = NULL; + v = N_VNewEmpty_Hip(sunctx); + if (v == NULL) return(NULL); + + NVEC_HIP_CONTENT(v)->length = length; + NVEC_HIP_CONTENT(v)->mem_helper = helper; + NVEC_HIP_CONTENT(v)->stream_exec_policy = DEFAULT_STREAMING_EXECPOLICY.clone(); + NVEC_HIP_CONTENT(v)->reduce_exec_policy = DEFAULT_REDUCTION_EXECPOLICY.clone(); + NVEC_HIP_CONTENT(v)->own_helper = SUNFALSE; + NVEC_HIP_PRIVATE(v)->use_managed_mem = use_managed_mem; + + if (AllocateData(v)) + { + SUNDIALS_DEBUG_PRINT("ERROR in N_VNewWithMemHelp_Hip: AllocateData returned nonzero\n"); + N_VDestroy(v); + return(NULL); + } + + return(v); +} + +N_Vector N_VNewManaged_Hip(sunindextype length, SUNContext sunctx) +{ + N_Vector v; + + v = NULL; + v = N_VNewEmpty_Hip(sunctx); + if (v == NULL) return(NULL); + + NVEC_HIP_CONTENT(v)->length = length; + NVEC_HIP_CONTENT(v)->stream_exec_policy = DEFAULT_STREAMING_EXECPOLICY.clone(); + NVEC_HIP_CONTENT(v)->reduce_exec_policy = DEFAULT_REDUCTION_EXECPOLICY.clone(); + NVEC_HIP_CONTENT(v)->mem_helper = SUNMemoryHelper_Hip(sunctx); + NVEC_HIP_CONTENT(v)->own_helper = SUNTRUE; + NVEC_HIP_PRIVATE(v)->use_managed_mem = SUNTRUE; + + if (NVEC_HIP_MEMHELP(v) == NULL) + { + SUNDIALS_DEBUG_PRINT("ERROR in N_VNewManaged_Hip: memory helper is NULL\n"); + N_VDestroy(v); + return(NULL); + } + + if (AllocateData(v)) + { + SUNDIALS_DEBUG_PRINT("ERROR in N_VNewManaged_Hip: AllocateData returned nonzero\n"); + N_VDestroy(v); + return(NULL); + } + + return(v); +} + +N_Vector N_VMake_Hip(sunindextype length, realtype *h_vdata, realtype *d_vdata, SUNContext sunctx) +{ + N_Vector v; + + if (h_vdata == NULL || d_vdata == NULL) return(NULL); + + v = NULL; + v = N_VNewEmpty_Hip(sunctx); + if (v == NULL) return(NULL); + + NVEC_HIP_CONTENT(v)->length = length; + NVEC_HIP_CONTENT(v)->host_data = SUNMemoryHelper_Wrap(h_vdata, SUNMEMTYPE_HOST); + NVEC_HIP_CONTENT(v)->device_data = SUNMemoryHelper_Wrap(d_vdata, SUNMEMTYPE_DEVICE); + NVEC_HIP_CONTENT(v)->stream_exec_policy = DEFAULT_STREAMING_EXECPOLICY.clone(); + NVEC_HIP_CONTENT(v)->reduce_exec_policy = DEFAULT_REDUCTION_EXECPOLICY.clone(); + NVEC_HIP_CONTENT(v)->mem_helper = SUNMemoryHelper_Hip(sunctx); + NVEC_HIP_CONTENT(v)->own_helper = SUNTRUE; + NVEC_HIP_PRIVATE(v)->use_managed_mem = SUNFALSE; + + if (NVEC_HIP_MEMHELP(v) == NULL) + { + SUNDIALS_DEBUG_PRINT("ERROR in N_VMake_Hip: memory helper is NULL\n"); + N_VDestroy(v); + return(NULL); + } + + if (NVEC_HIP_CONTENT(v)->device_data == NULL || + NVEC_HIP_CONTENT(v)->host_data == NULL) + { + SUNDIALS_DEBUG_PRINT("ERROR in N_VMake_Hip: SUNMemoryHelper_Wrap returned NULL\n"); + N_VDestroy(v); + return(NULL); + } + + return(v); +} + +N_Vector N_VMakeManaged_Hip(sunindextype length, realtype *vdata, SUNContext sunctx) +{ + N_Vector v; + + if (vdata == NULL) return(NULL); + + v = NULL; + v = N_VNewEmpty_Hip(sunctx); + if (v == NULL) return(NULL); + + NVEC_HIP_CONTENT(v)->length = length; + NVEC_HIP_CONTENT(v)->host_data = SUNMemoryHelper_Wrap(vdata, SUNMEMTYPE_UVM); + NVEC_HIP_CONTENT(v)->device_data = SUNMemoryHelper_Alias(NVEC_HIP_CONTENT(v)->host_data); + NVEC_HIP_CONTENT(v)->stream_exec_policy = DEFAULT_STREAMING_EXECPOLICY.clone(); + NVEC_HIP_CONTENT(v)->reduce_exec_policy = DEFAULT_REDUCTION_EXECPOLICY.clone(); + NVEC_HIP_CONTENT(v)->mem_helper = SUNMemoryHelper_Hip(sunctx); + NVEC_HIP_CONTENT(v)->own_helper = SUNTRUE; + NVEC_HIP_PRIVATE(v)->use_managed_mem = SUNTRUE; + + if (NVEC_HIP_MEMHELP(v) == NULL) + { + SUNDIALS_DEBUG_PRINT("ERROR in N_VMakeManaged_Hip: memory helper is NULL\n"); + N_VDestroy(v); + return(NULL); + } + + if (NVEC_HIP_CONTENT(v)->device_data == NULL || + NVEC_HIP_CONTENT(v)->host_data == NULL) + { + SUNDIALS_DEBUG_PRINT("ERROR in N_VMakeManaged_Hip: SUNMemoryHelper_Wrap returned NULL\n"); + N_VDestroy(v); + return(NULL); + } + + return(v); +} + +/* ---------------------------------------------------------------------------- + * Set pointer to the raw host data. Does not free the existing pointer. + */ + +void N_VSetHostArrayPointer_Hip(realtype* h_vdata, N_Vector v) +{ + if (N_VIsManagedMemory_Hip(v)) + { + if (NVEC_HIP_CONTENT(v)->host_data) + { + NVEC_HIP_CONTENT(v)->host_data->ptr = (void*) h_vdata; + NVEC_HIP_CONTENT(v)->device_data->ptr = (void*) h_vdata; + } + else + { + NVEC_HIP_CONTENT(v)->host_data = SUNMemoryHelper_Wrap((void*) h_vdata, SUNMEMTYPE_UVM); + NVEC_HIP_CONTENT(v)->device_data = SUNMemoryHelper_Alias(NVEC_HIP_CONTENT(v)->host_data); + } + } + else + { + if (NVEC_HIP_CONTENT(v)->host_data) + { + NVEC_HIP_CONTENT(v)->host_data->ptr = (void*) h_vdata; + } + else + { + NVEC_HIP_CONTENT(v)->host_data = SUNMemoryHelper_Wrap((void*) h_vdata, SUNMEMTYPE_HOST); + } + } +} + +/* ---------------------------------------------------------------------------- + * Set pointer to the raw device data + */ + +void N_VSetDeviceArrayPointer_Hip(realtype* d_vdata, N_Vector v) +{ + if (N_VIsManagedMemory_Hip(v)) + { + if (NVEC_HIP_CONTENT(v)->device_data) + { + NVEC_HIP_CONTENT(v)->device_data->ptr = (void*) d_vdata; + NVEC_HIP_CONTENT(v)->host_data->ptr = (void*) d_vdata; + } + else + { + NVEC_HIP_CONTENT(v)->device_data = SUNMemoryHelper_Wrap((void*) d_vdata, SUNMEMTYPE_UVM); + NVEC_HIP_CONTENT(v)->host_data = SUNMemoryHelper_Alias(NVEC_HIP_CONTENT(v)->device_data); + } + } + else + { + if (NVEC_HIP_CONTENT(v)->device_data) + { + NVEC_HIP_CONTENT(v)->device_data->ptr = (void*) d_vdata; + } + else + { + NVEC_HIP_CONTENT(v)->device_data = SUNMemoryHelper_Wrap((void*) d_vdata, SUNMEMTYPE_DEVICE); + } + } +} + +/* ---------------------------------------------------------------------------- + * Return a flag indicating if the memory for the vector data is managed + */ + +booleantype N_VIsManagedMemory_Hip(N_Vector x) +{ + return NVEC_HIP_PRIVATE(x)->use_managed_mem; +} + +int N_VSetKernelExecPolicy_Hip(N_Vector x, + SUNHipExecPolicy* stream_exec_policy, + SUNHipExecPolicy* reduce_exec_policy) +{ + if (x == NULL) return(-1); + + /* Delete the old policies */ + delete NVEC_HIP_CONTENT(x)->stream_exec_policy; + delete NVEC_HIP_CONTENT(x)->reduce_exec_policy; + + /* Reset the policy if it is null */ + + if (stream_exec_policy == NULL) + NVEC_HIP_CONTENT(x)->stream_exec_policy = DEFAULT_STREAMING_EXECPOLICY.clone(); + else + NVEC_HIP_CONTENT(x)->stream_exec_policy = stream_exec_policy->clone(); + + if (reduce_exec_policy == NULL) + NVEC_HIP_CONTENT(x)->reduce_exec_policy = DEFAULT_REDUCTION_EXECPOLICY.clone(); + else + NVEC_HIP_CONTENT(x)->reduce_exec_policy = reduce_exec_policy->clone(); + + return(0); +} + +/* ---------------------------------------------------------------------------- + * Copy vector data to the device + */ + +void N_VCopyToDevice_Hip(N_Vector x) +{ + int copy_fail; + + copy_fail = SUNMemoryHelper_CopyAsync(NVEC_HIP_MEMHELP(x), + NVEC_HIP_CONTENT(x)->device_data, + NVEC_HIP_CONTENT(x)->host_data, + NVEC_HIP_MEMSIZE(x), + (void*) NVEC_HIP_STREAM(x)); + + if (copy_fail) + { + SUNDIALS_DEBUG_PRINT("ERROR in N_VCopyToDevice_Hip: SUNMemoryHelper_CopyAsync returned nonzero\n"); + } + + /* we synchronize with respect to the host, but only in this stream */ + SUNDIALS_HIP_VERIFY(hipStreamSynchronize(*NVEC_HIP_STREAM(x))); +} + +/* ---------------------------------------------------------------------------- + * Copy vector data from the device to the host + */ + +void N_VCopyFromDevice_Hip(N_Vector x) +{ + int copy_fail; + + copy_fail = SUNMemoryHelper_CopyAsync(NVEC_HIP_MEMHELP(x), + NVEC_HIP_CONTENT(x)->host_data, + NVEC_HIP_CONTENT(x)->device_data, + NVEC_HIP_MEMSIZE(x), + (void*) NVEC_HIP_STREAM(x)); + + if (copy_fail) + { + SUNDIALS_DEBUG_PRINT("ERROR in N_VCopyFromDevice_Hip: SUNMemoryHelper_CopyAsync returned nonzero\n"); + } + + /* we synchronize with respect to the host, but only in this stream */ + SUNDIALS_HIP_VERIFY(hipStreamSynchronize(*NVEC_HIP_STREAM(x))); +} + +/* ---------------------------------------------------------------------------- + * Function to print the a CUDA-based vector to stdout + */ + +void N_VPrint_Hip(N_Vector x) +{ + N_VPrintFile_Hip(x, stdout); +} + +/* ---------------------------------------------------------------------------- + * Function to print the a CUDA-based vector to outfile + */ + +void N_VPrintFile_Hip(N_Vector x, FILE *outfile) +{ + sunindextype i; + +#ifdef SUNDIALS_DEBUG_PRINTVEC + N_VCopyFromDevice_Hip(x); +#endif + + for (i = 0; i < NVEC_HIP_CONTENT(x)->length; i++) { +#if defined(SUNDIALS_EXTENDED_PRECISION) + fprintf(outfile, "%35.32Le\n", NVEC_HIP_HDATAp(x)[i]); +#elif defined(SUNDIALS_DOUBLE_PRECISION) + fprintf(outfile, "%19.16e\n", NVEC_HIP_HDATAp(x)[i]); +#else + fprintf(outfile, "%11.8e\n", NVEC_HIP_HDATAp(x)[i]); +#endif + } + fprintf(outfile, "\n"); + + return; +} + + +/* + * ----------------------------------------------------------------- + * implementation of vector operations + * ----------------------------------------------------------------- + */ + +N_Vector N_VCloneEmpty_Hip(N_Vector w) +{ + N_Vector v; + + if (w == NULL) return(NULL); + + /* Create vector */ + v = NULL; + v = N_VNewEmpty_Hip(w->sunctx); + if (v == NULL) return(NULL); + + /* Attach operations */ + if (N_VCopyOps(w, v)) { N_VDestroy(v); return(NULL); } + + /* Set content */ + NVEC_HIP_CONTENT(v)->length = NVEC_HIP_CONTENT(w)->length; + NVEC_HIP_PRIVATE(v)->use_managed_mem = NVEC_HIP_PRIVATE(w)->use_managed_mem; + + return(v); +} + +N_Vector N_VClone_Hip(N_Vector w) +{ + N_Vector v; + + v = NULL; + v = N_VCloneEmpty_Hip(w); + if (v == NULL) return(NULL); + + NVEC_HIP_MEMHELP(v) = SUNMemoryHelper_Clone(NVEC_HIP_MEMHELP(w)); + NVEC_HIP_CONTENT(v)->own_helper = SUNTRUE; + NVEC_HIP_CONTENT(v)->stream_exec_policy = NVEC_HIP_CONTENT(w)->stream_exec_policy->clone(); + NVEC_HIP_CONTENT(v)->reduce_exec_policy = NVEC_HIP_CONTENT(w)->reduce_exec_policy->clone(); + + if (NVEC_HIP_MEMHELP(v) == NULL) + { + SUNDIALS_DEBUG_PRINT("ERROR in N_VClone_Hip: SUNMemoryHelper_Clone returned NULL\n"); + N_VDestroy(v); + return(NULL); + } + + if (AllocateData(v)) + { + SUNDIALS_DEBUG_PRINT("ERROR in N_VClone_Hip: AllocateData returned nonzero\n"); + N_VDestroy(v); + return(NULL); + } + + return(v); +} + +void N_VDestroy_Hip(N_Vector v) +{ + N_VectorContent_Hip vc; + N_PrivateVectorContent_Hip vcp; + + if (v == NULL) return; + + /* free ops structure */ + if (v->ops != NULL) + { + free(v->ops); + v->ops = NULL; + } + + /* extract content */ + vc = NVEC_HIP_CONTENT(v); + if (vc == NULL) + { + free(v); + v = NULL; + return; + } + + /* free private content */ + vcp = (N_PrivateVectorContent_Hip) vc->priv; + if (vcp != NULL) + { + /* free items in private content */ + FreeDeviceCounter(v); + FreeReductionBuffer(v); + free(vcp); + vc->priv = NULL; + } + + /* free items in content */ + if (NVEC_HIP_MEMHELP(v)) + { + SUNMemoryHelper_Dealloc(NVEC_HIP_MEMHELP(v), vc->host_data, (void*) NVEC_HIP_STREAM(v)); + vc->host_data = NULL; + SUNMemoryHelper_Dealloc(NVEC_HIP_MEMHELP(v), vc->device_data, (void*) NVEC_HIP_STREAM(v)); + vc->device_data = NULL; + if (vc->own_helper) SUNMemoryHelper_Destroy(vc->mem_helper); + vc->mem_helper = NULL; + } + + /* we can delete the exec policies now that we are done with the streams */ + delete vc->stream_exec_policy; + delete vc->reduce_exec_policy; + + /* free content struct */ + free(vc); + + /* free vector */ + free(v); + + return; +} + +void N_VSpace_Hip(N_Vector X, sunindextype *lrw, sunindextype *liw) +{ + *lrw = NVEC_HIP_CONTENT(X)->length; + *liw = 2; +} + +void N_VConst_Hip(realtype a, N_Vector X) +{ + size_t grid, block, shMemSize; + hipStream_t stream; + + if (GetKernelParameters(X, false, grid, block, shMemSize, stream)) + { + SUNDIALS_DEBUG_PRINT("ERROR in N_VConst_Hip: GetKernelParameters returned nonzero\n"); + } + + setConstKernel<<>> + ( + a, + NVEC_HIP_DDATAp(X), + NVEC_HIP_CONTENT(X)->length + ); + PostKernelLaunch(); +} + +void N_VLinearSum_Hip(realtype a, N_Vector X, realtype b, N_Vector Y, N_Vector Z) +{ + size_t grid, block, shMemSize; + hipStream_t stream; + + if (GetKernelParameters(X, false, grid, block, shMemSize, stream)) + { + SUNDIALS_DEBUG_PRINT("ERROR in N_VLinearSum_Hip: GetKernelParameters returned nonzero\n"); + } + + linearSumKernel<<>> + ( + a, + NVEC_HIP_DDATAp(X), + b, + NVEC_HIP_DDATAp(Y), + NVEC_HIP_DDATAp(Z), + NVEC_HIP_CONTENT(X)->length + ); + PostKernelLaunch(); +} + +void N_VProd_Hip(N_Vector X, N_Vector Y, N_Vector Z) +{ + size_t grid, block, shMemSize; + hipStream_t stream; + + if (GetKernelParameters(X, false, grid, block, shMemSize, stream)) + { + SUNDIALS_DEBUG_PRINT("ERROR in N_VProd_Hip: GetKernelParameters returned nonzero\n"); + } + + + prodKernel<<>> + ( + NVEC_HIP_DDATAp(X), + NVEC_HIP_DDATAp(Y), + NVEC_HIP_DDATAp(Z), + NVEC_HIP_CONTENT(X)->length + ); + PostKernelLaunch(); +} + +void N_VDiv_Hip(N_Vector X, N_Vector Y, N_Vector Z) +{ + size_t grid, block, shMemSize; + hipStream_t stream; + + if (GetKernelParameters(X, false, grid, block, shMemSize, stream)) + { + SUNDIALS_DEBUG_PRINT("ERROR in N_VDiv_Hip: GetKernelParameters returned nonzero\n"); + } + + divKernel<<>> + ( + NVEC_HIP_DDATAp(X), + NVEC_HIP_DDATAp(Y), + NVEC_HIP_DDATAp(Z), + NVEC_HIP_CONTENT(X)->length + ); + PostKernelLaunch(); +} + +void N_VScale_Hip(realtype a, N_Vector X, N_Vector Z) +{ + size_t grid, block, shMemSize; + hipStream_t stream; + + if (GetKernelParameters(X, false, grid, block, shMemSize, stream)) + { + SUNDIALS_DEBUG_PRINT("ERROR in N_VScale_Hip: GetKernelParameters returned nonzero\n"); + } + + scaleKernel<<>> + ( + a, + NVEC_HIP_DDATAp(X), + NVEC_HIP_DDATAp(Z), + NVEC_HIP_CONTENT(X)->length + ); + PostKernelLaunch(); +} + +void N_VAbs_Hip(N_Vector X, N_Vector Z) +{ + size_t grid, block, shMemSize; + hipStream_t stream; + + if (GetKernelParameters(X, false, grid, block, shMemSize, stream)) + { + SUNDIALS_DEBUG_PRINT("ERROR in N_VAbs_Hip: GetKernelParameters returned nonzero\n"); + } + + absKernel<<>> + ( + NVEC_HIP_DDATAp(X), + NVEC_HIP_DDATAp(Z), + NVEC_HIP_CONTENT(X)->length + ); + PostKernelLaunch(); +} + +void N_VInv_Hip(N_Vector X, N_Vector Z) +{ + size_t grid, block, shMemSize; + hipStream_t stream; + + if (GetKernelParameters(X, false, grid, block, shMemSize, stream)) + { + SUNDIALS_DEBUG_PRINT("ERROR in N_VInv_Hip: GetKernelParameters returned nonzero\n"); + } + + invKernel<<>> + ( + NVEC_HIP_DDATAp(X), + NVEC_HIP_DDATAp(Z), + NVEC_HIP_CONTENT(X)->length + ); + PostKernelLaunch(); +} + +void N_VAddConst_Hip(N_Vector X, realtype b, N_Vector Z) +{ + size_t grid, block, shMemSize; + hipStream_t stream; + + if (GetKernelParameters(X, false, grid, block, shMemSize, stream)) + { + SUNDIALS_DEBUG_PRINT("ERROR in N_VAddConst_Hip: GetKernelParameters returned nonzero\n"); + } + + addConstKernel<<>> + ( + b, + NVEC_HIP_DDATAp(X), + NVEC_HIP_DDATAp(Z), + NVEC_HIP_CONTENT(X)->length + ); + PostKernelLaunch(); +} + +realtype N_VDotProd_Hip(N_Vector X, N_Vector Y) +{ + bool atomic; + size_t grid, block, shMemSize; + hipStream_t stream; + + realtype gpu_result = ZERO; + + if (GetKernelParameters(X, true, grid, block, shMemSize, stream, atomic)) + { + SUNDIALS_DEBUG_PRINT("ERROR in N_VDotProd_Hip: GetKernelParameters returned nonzero\n"); + } + + // When using atomic reductions, we only need one output value + const size_t buffer_size = atomic ? 1 : grid; + if (InitializeReductionBuffer(X, gpu_result, buffer_size)) + { + SUNDIALS_DEBUG_PRINT("ERROR in N_VDotProd_Hip: InitializeReductionBuffer returned nonzero\n"); + } + + if (atomic) + { + dotProdKernel<<>> + ( + NVEC_HIP_DDATAp(X), + NVEC_HIP_DDATAp(Y), + NVEC_HIP_DBUFFERp(X), + NVEC_HIP_CONTENT(X)->length, + nullptr + ); + } + else + { + dotProdKernel<<>> + ( + NVEC_HIP_DDATAp(X), + NVEC_HIP_DDATAp(Y), + NVEC_HIP_DBUFFERp(X), + NVEC_HIP_CONTENT(X)->length, + NVEC_HIP_DCOUNTERp(X) + ); + } + PostKernelLaunch(); + + // Get result from the GPU + CopyReductionBufferFromDevice(X); + gpu_result = NVEC_HIP_HBUFFERp(X)[0]; + + return gpu_result; +} + +realtype N_VMaxNorm_Hip(N_Vector X) +{ + bool atomic; + size_t grid, block, shMemSize; + hipStream_t stream; + + realtype gpu_result = ZERO; + + if (GetKernelParameters(X, true, grid, block, shMemSize, stream, atomic)) + { + SUNDIALS_DEBUG_PRINT("ERROR in N_VMaxNorm_Hip: GetKernelParameters returned nonzero\n"); + } + + // When using atomic reductions, we only need one output value + const size_t buffer_size = atomic ? 1 : grid; + if (InitializeReductionBuffer(X, gpu_result, buffer_size)) + { + SUNDIALS_DEBUG_PRINT("ERROR in N_VMaxNorm_Hip: InitializeReductionBuffer returned nonzero\n"); + } + + if (atomic) + { + maxNormKernel<<>> + ( + NVEC_HIP_DDATAp(X), + NVEC_HIP_DBUFFERp(X), + NVEC_HIP_CONTENT(X)->length, + nullptr + ); + } + else + { + maxNormKernel<<>> + ( + NVEC_HIP_DDATAp(X), + NVEC_HIP_DBUFFERp(X), + NVEC_HIP_CONTENT(X)->length, + NVEC_HIP_DCOUNTERp(X) + ); + } + + PostKernelLaunch(); + + // Finish reduction on CPU if there are less than two blocks of data left. + CopyReductionBufferFromDevice(X); + gpu_result = NVEC_HIP_HBUFFERp(X)[0]; + + return gpu_result; +} + +realtype N_VWSqrSumLocal_Hip(N_Vector X, N_Vector W) +{ + bool atomic; + size_t grid, block, shMemSize; + hipStream_t stream; + + realtype gpu_result = ZERO; + + if (GetKernelParameters(X, true, grid, block, shMemSize, stream, atomic)) + { + SUNDIALS_DEBUG_PRINT("ERROR in N_VWSqrSumLocal_Hip: GetKernelParameters returned nonzero\n"); + } + + const size_t buffer_size = atomic ? 1 : grid; + if (InitializeReductionBuffer(X, gpu_result, buffer_size)) + { + SUNDIALS_DEBUG_PRINT("ERROR in N_VWSqrSumLocal_Hip: InitializeReductionBuffer returned nonzero\n"); + } + + if (atomic) + { + wL2NormSquareKernel<<>> + ( + NVEC_HIP_DDATAp(X), + NVEC_HIP_DDATAp(W), + NVEC_HIP_DBUFFERp(X), + NVEC_HIP_CONTENT(X)->length, + nullptr + ); + } + else + { + wL2NormSquareKernel<<>> + ( + NVEC_HIP_DDATAp(X), + NVEC_HIP_DDATAp(W), + NVEC_HIP_DBUFFERp(X), + NVEC_HIP_CONTENT(X)->length, + NVEC_HIP_DCOUNTERp(X) + ); + } + + PostKernelLaunch(); + + // Get result from the GPU + CopyReductionBufferFromDevice(X); + gpu_result = NVEC_HIP_HBUFFERp(X)[0]; + + return gpu_result; +} + +realtype N_VWrmsNorm_Hip(N_Vector X, N_Vector W) +{ + const realtype sum = N_VWSqrSumLocal_Hip(X, W); + return std::sqrt(sum/NVEC_HIP_CONTENT(X)->length); +} + +realtype N_VWSqrSumMaskLocal_Hip(N_Vector X, N_Vector W, N_Vector Id) +{ + bool atomic; + size_t grid, block, shMemSize; + hipStream_t stream; + + realtype gpu_result = ZERO; + + if (GetKernelParameters(X, true, grid, block, shMemSize, stream, atomic)) + { + SUNDIALS_DEBUG_PRINT("ERROR in N_VWSqrSumMaskLocal_Hip: GetKernelParameters returned nonzero\n"); + } + + const size_t buffer_size = atomic ? 1 : grid; + if (InitializeReductionBuffer(X, gpu_result, buffer_size)) + { + SUNDIALS_DEBUG_PRINT("ERROR in N_VWSqrSumMaskLocal_Hip: InitializeReductionBuffer returned nonzero\n"); + } + + if (atomic) + { + wL2NormSquareMaskKernel<<>> + ( + NVEC_HIP_DDATAp(X), + NVEC_HIP_DDATAp(W), + NVEC_HIP_DDATAp(Id), + NVEC_HIP_DBUFFERp(X), + NVEC_HIP_CONTENT(X)->length, + nullptr + ); + } + else + { + wL2NormSquareMaskKernel<<>> + ( + NVEC_HIP_DDATAp(X), + NVEC_HIP_DDATAp(W), + NVEC_HIP_DDATAp(Id), + NVEC_HIP_DBUFFERp(X), + NVEC_HIP_CONTENT(X)->length, + NVEC_HIP_DCOUNTERp(X) + ); + } + + PostKernelLaunch(); + + // Get result from the GPU + CopyReductionBufferFromDevice(X); + gpu_result = NVEC_HIP_HBUFFERp(X)[0]; + + return gpu_result; +} + +realtype N_VWrmsNormMask_Hip(N_Vector X, N_Vector W, N_Vector Id) +{ + const realtype sum = N_VWSqrSumMaskLocal_Hip(X, W, Id); + return std::sqrt(sum/NVEC_HIP_CONTENT(X)->length); +} + +realtype N_VMin_Hip(N_Vector X) +{ + bool atomic; + size_t grid, block, shMemSize; + hipStream_t stream; + + realtype gpu_result = std::numeric_limits::max(); + + if (GetKernelParameters(X, true, grid, block, shMemSize, stream, atomic)) + { + SUNDIALS_DEBUG_PRINT("ERROR in N_VMin_Hip: GetKernelParameters returned nonzero\n"); + } + + const size_t buffer_size = atomic ? 1 : grid; + if (InitializeReductionBuffer(X, gpu_result, buffer_size)) + { + SUNDIALS_DEBUG_PRINT("ERROR in N_VMin_Hip: InitializeReductionBuffer returned nonzero\n"); + } + + if (atomic) + { + findMinKernel<<>> + ( + gpu_result, + NVEC_HIP_DDATAp(X), + NVEC_HIP_DBUFFERp(X), + NVEC_HIP_CONTENT(X)->length, + nullptr + ); + } + else + { + findMinKernel<<>> + ( + gpu_result, + NVEC_HIP_DDATAp(X), + NVEC_HIP_DBUFFERp(X), + NVEC_HIP_CONTENT(X)->length, + NVEC_HIP_DCOUNTERp(X) + ); + } + + PostKernelLaunch(); + + // Get result from the GPU + CopyReductionBufferFromDevice(X); + gpu_result = NVEC_HIP_HBUFFERp(X)[0]; + + return gpu_result; +} + +realtype N_VWL2Norm_Hip(N_Vector X, N_Vector W) +{ + const realtype sum = N_VWSqrSumLocal_Hip(X, W); + return std::sqrt(sum); +} + +realtype N_VL1Norm_Hip(N_Vector X) +{ + bool atomic; + size_t grid, block, shMemSize; + hipStream_t stream; + + realtype gpu_result = ZERO; + + if (GetKernelParameters(X, true, grid, block, shMemSize, stream, atomic)) + { + SUNDIALS_DEBUG_PRINT("ERROR in N_VL1Norm_Hip: GetKernelParameters returned nonzero\n"); + } + + const size_t buffer_size = atomic ? 1 : grid; + if (InitializeReductionBuffer(X, gpu_result, buffer_size)) + { + SUNDIALS_DEBUG_PRINT("ERROR in N_VL1Norm_Hip: InitializeReductionBuffer returned nonzero\n"); + } + + if (atomic) + { + L1NormKernel<<>> + ( + NVEC_HIP_DDATAp(X), + NVEC_HIP_DBUFFERp(X), + NVEC_HIP_CONTENT(X)->length, + nullptr + ); + } + else + { + L1NormKernel<<>> + ( + NVEC_HIP_DDATAp(X), + NVEC_HIP_DBUFFERp(X), + NVEC_HIP_CONTENT(X)->length, + NVEC_HIP_DCOUNTERp(X) + ); + } + + PostKernelLaunch(); + + // Get result from the GPU + CopyReductionBufferFromDevice(X); + gpu_result = NVEC_HIP_HBUFFERp(X)[0]; + + return gpu_result; +} + +void N_VCompare_Hip(realtype c, N_Vector X, N_Vector Z) +{ + size_t grid, block, shMemSize; + hipStream_t stream; + + if (GetKernelParameters(X, false, grid, block, shMemSize, stream)) + { + SUNDIALS_DEBUG_PRINT("ERROR in N_VCompare_Hip: GetKernelParameters returned nonzero\n"); + } + + compareKernel<<>> + ( + c, + NVEC_HIP_DDATAp(X), + NVEC_HIP_DDATAp(Z), + NVEC_HIP_CONTENT(X)->length + ); + PostKernelLaunch(); +} + +booleantype N_VInvTest_Hip(N_Vector X, N_Vector Z) +{ + bool atomic; + size_t grid, block, shMemSize; + hipStream_t stream; + + realtype gpu_result = ZERO; + + if (GetKernelParameters(X, true, grid, block, shMemSize, stream, atomic)) + { + SUNDIALS_DEBUG_PRINT("ERROR in N_VInvTest_Hip: GetKernelParameters returned nonzero\n"); + } + + const size_t buffer_size = atomic ? 1 : grid; + if (InitializeReductionBuffer(X, gpu_result, buffer_size)) + { + SUNDIALS_DEBUG_PRINT("ERROR in N_VInvTest_Hip: InitializeReductionBuffer returned nonzero\n"); + } + + if (atomic) + { + invTestKernel<<>> + ( + NVEC_HIP_DDATAp(X), + NVEC_HIP_DDATAp(Z), + NVEC_HIP_DBUFFERp(X), + NVEC_HIP_CONTENT(X)->length, + nullptr + ); + } + else + { + invTestKernel<<>> + ( + NVEC_HIP_DDATAp(X), + NVEC_HIP_DDATAp(Z), + NVEC_HIP_DBUFFERp(X), + NVEC_HIP_CONTENT(X)->length, + NVEC_HIP_DCOUNTERp(X) + ); + } + + PostKernelLaunch(); + + // Get result from the GPU + CopyReductionBufferFromDevice(X); + gpu_result = NVEC_HIP_HBUFFERp(X)[0]; + + return (gpu_result < HALF); +} + +booleantype N_VConstrMask_Hip(N_Vector C, N_Vector X, N_Vector M) +{ + bool atomic; + size_t grid, block, shMemSize; + hipStream_t stream; + + realtype gpu_result = ZERO; + + if (GetKernelParameters(X, true, grid, block, shMemSize, stream, atomic)) + { + SUNDIALS_DEBUG_PRINT("ERROR in N_VConstrMask_Hip: GetKernelParameters returned nonzero\n"); + } + + const size_t buffer_size = atomic ? 1 : grid; + if (InitializeReductionBuffer(X, gpu_result, buffer_size)) + { + SUNDIALS_DEBUG_PRINT("ERROR in N_VConstrMask_Hip: InitializeReductionBuffer returned nonzero\n"); + } + + if (atomic) + { + constrMaskKernel<<>> + ( + NVEC_HIP_DDATAp(C), + NVEC_HIP_DDATAp(X), + NVEC_HIP_DDATAp(M), + NVEC_HIP_DBUFFERp(X), + NVEC_HIP_CONTENT(X)->length, + nullptr + ); + } + else + { + constrMaskKernel<<>> + ( + NVEC_HIP_DDATAp(C), + NVEC_HIP_DDATAp(X), + NVEC_HIP_DDATAp(M), + NVEC_HIP_DBUFFERp(X), + NVEC_HIP_CONTENT(X)->length, + NVEC_HIP_DCOUNTERp(X) + ); + } + + PostKernelLaunch(); + + // Get result from the GPU + CopyReductionBufferFromDevice(X); + gpu_result = NVEC_HIP_HBUFFERp(X)[0]; + + return (gpu_result < HALF); +} + +realtype N_VMinQuotient_Hip(N_Vector num, N_Vector denom) +{ + bool atomic; + size_t grid, block, shMemSize; + hipStream_t stream; + + realtype gpu_result = std::numeric_limits::max();; + + if (GetKernelParameters(num, true, grid, block, shMemSize, stream, atomic)) + { + SUNDIALS_DEBUG_PRINT("ERROR in N_VMinQuotient_Hip: GetKernelParameters returned nonzero\n"); + } + + const size_t buffer_size = atomic ? 1 : grid; + if (InitializeReductionBuffer(num, gpu_result, buffer_size)) + { + SUNDIALS_DEBUG_PRINT("ERROR in N_VMinQuotient_Hip: InitializeReductionBuffer returned nonzero\n"); + } + + if (atomic) + { + minQuotientKernel<<>> + ( + gpu_result, + NVEC_HIP_DDATAp(num), + NVEC_HIP_DDATAp(denom), + NVEC_HIP_DBUFFERp(num), + NVEC_HIP_CONTENT(num)->length, + nullptr + ); + } + else + { + minQuotientKernel<<>> + ( + gpu_result, + NVEC_HIP_DDATAp(num), + NVEC_HIP_DDATAp(denom), + NVEC_HIP_DBUFFERp(num), + NVEC_HIP_CONTENT(num)->length, + NVEC_HIP_DCOUNTERp(num) + ); + } + + PostKernelLaunch(); + + // Get result from the GPU + CopyReductionBufferFromDevice(num); + gpu_result = NVEC_HIP_HBUFFERp(num)[0]; + + return gpu_result; +} + + +/* + * ----------------------------------------------------------------- + * fused vector operations + * ----------------------------------------------------------------- + */ + +int N_VLinearCombination_Hip(int nvec, realtype* c, N_Vector* X, N_Vector Z) +{ + hipError_t err; + + // Copy c array to device + realtype* d_c; + err = hipMalloc((void**) &d_c, nvec*sizeof(realtype)); + if (!SUNDIALS_HIP_VERIFY(err)) return(-1); + err = hipMemcpy(d_c, c, nvec*sizeof(realtype), hipMemcpyHostToDevice); + if (!SUNDIALS_HIP_VERIFY(err)) return(-1); + + // Create array of device pointers on host + realtype** h_Xd = new realtype*[nvec]; + for (int i=0; i>>( + nvec, + d_c, + d_Xd, + NVEC_HIP_DDATAp(Z), + NVEC_HIP_CONTENT(Z)->length + ); + PostKernelLaunch(); + + // Free host array + delete[] h_Xd; + + // Free device arrays + err = hipFree(d_c); + if (!SUNDIALS_HIP_VERIFY(err)) return(-1); + err = hipFree(d_Xd); + if (!SUNDIALS_HIP_VERIFY(err)) return(-1); + + return(0); +} + +int N_VScaleAddMulti_Hip(int nvec, realtype* c, N_Vector X, N_Vector* Y, + N_Vector* Z) +{ + hipError_t err; + + // Copy c array to device + realtype* d_c; + err = hipMalloc((void**) &d_c, nvec*sizeof(realtype)); + if (!SUNDIALS_HIP_VERIFY(err)) return(-1); + err = hipMemcpy(d_c, c, nvec*sizeof(realtype), hipMemcpyHostToDevice); + if (!SUNDIALS_HIP_VERIFY(err)) return(-1); + + // Create array of device pointers on host + realtype** h_Yd = new realtype*[nvec]; + for (int i=0; i>>( + nvec, + d_c, + NVEC_HIP_DDATAp(X), + d_Yd, + d_Zd, + NVEC_HIP_CONTENT(X)->length + ); + PostKernelLaunch(); + + // Free host array + delete[] h_Yd; + delete[] h_Zd; + + // Free device arrays + err = hipFree(d_c); + if (!SUNDIALS_HIP_VERIFY(err)) return(-1); + err = hipFree(d_Yd); + if (!SUNDIALS_HIP_VERIFY(err)) return(-1); + err = hipFree(d_Zd); + if (!SUNDIALS_HIP_VERIFY(err)) return(-1); + + return(0); +} + +int N_VDotProdMulti_Hip(int nvec, N_Vector X, N_Vector* Y, realtype* dots) +{ + hipError_t err; + + // Create array of device pointers on host + realtype** h_Yd = new realtype*[nvec]; + for (int i=0; i<<>>( + nvec, + NVEC_HIP_DDATAp(X), + d_Yd, + d_buff, + NVEC_HIP_CONTENT(X)->length + ); + PostKernelLaunch(); + + // Copy GPU result to the cpu. + err = hipMemcpy(dots, d_buff, grid*sizeof(realtype), hipMemcpyDeviceToHost); + if (!SUNDIALS_HIP_VERIFY(err)) return(-1); + + // Free host array + delete[] h_Yd; + + // Free device arrays + err = hipFree(d_Yd); + if (!SUNDIALS_HIP_VERIFY(err)) return(-1); + err = hipFree(d_buff); + if (!SUNDIALS_HIP_VERIFY(err)) return(-1); + + return(0); +} + + +/* + * ----------------------------------------------------------------------------- + * vector array operations + * ----------------------------------------------------------------------------- + */ + +int N_VLinearSumVectorArray_Hip(int nvec, realtype a, N_Vector* X, realtype b, + N_Vector* Y, N_Vector* Z) +{ + hipError_t err; + + // Create array of device pointers on host + realtype** h_Xd = new realtype*[nvec]; + for (int i=0; i>>( + nvec, + a, + d_Xd, + b, + d_Yd, + d_Zd, + NVEC_HIP_CONTENT(Z[0])->length + ); + PostKernelLaunch(); + + // Free host array + delete[] h_Xd; + delete[] h_Yd; + delete[] h_Zd; + + // Free device arrays + err = hipFree(d_Xd); + if (!SUNDIALS_HIP_VERIFY(err)) return(-1); + err = hipFree(d_Yd); + if (!SUNDIALS_HIP_VERIFY(err)) return(-1); + err = hipFree(d_Zd); + if (!SUNDIALS_HIP_VERIFY(err)) return(-1); + + return(0); +} + +int N_VScaleVectorArray_Hip(int nvec, realtype* c, N_Vector* X, N_Vector* Z) +{ + hipError_t err; + + // Copy c array to device + realtype* d_c; + err = hipMalloc((void**) &d_c, nvec*sizeof(realtype)); + if (!SUNDIALS_HIP_VERIFY(err)) return(-1); + err = hipMemcpy(d_c, c, nvec*sizeof(realtype), hipMemcpyHostToDevice); + if (!SUNDIALS_HIP_VERIFY(err)) return(-1); + + // Create array of device pointers on host + realtype** h_Xd = new realtype*[nvec]; + for (int i=0; i>>( + nvec, + d_c, + d_Xd, + d_Zd, + NVEC_HIP_CONTENT(Z[0])->length + ); + PostKernelLaunch(); + + // Free host array + delete[] h_Xd; + delete[] h_Zd; + + // Free device arrays + err = hipFree(d_c); + if (!SUNDIALS_HIP_VERIFY(err)) return(-1); + err = hipFree(d_Xd); + if (!SUNDIALS_HIP_VERIFY(err)) return(-1); + err = hipFree(d_Zd); + if (!SUNDIALS_HIP_VERIFY(err)) return(-1); + + return(0); +} + +int N_VConstVectorArray_Hip(int nvec, realtype c, N_Vector* Z) +{ + hipError_t err; + + // Create array of device pointers on host + realtype** h_Zd = new realtype*[nvec]; + for (int i=0; i>>( + nvec, + c, + d_Zd, + NVEC_HIP_CONTENT(Z[0])->length + ); + PostKernelLaunch(); + + // Free host array + delete[] h_Zd; + + // Free device arrays + err = hipFree(d_Zd); + if (!SUNDIALS_HIP_VERIFY(err)) return(-1); + + return(0); +} + +int N_VWrmsNormVectorArray_Hip(int nvec, N_Vector* X, N_Vector* W, + realtype* norms) +{ + hipError_t err; + + // Create array of device pointers on host + realtype** h_Xd = new realtype*[nvec]; + for (int i=0; i<<>>( + nvec, + d_Xd, + d_Wd, + d_buff, + NVEC_HIP_CONTENT(X[0])->length + ); + PostKernelLaunch(); + + // Copy GPU result to the cpu. + err = hipMemcpy(norms, d_buff, grid*sizeof(realtype), hipMemcpyDeviceToHost); + if (!SUNDIALS_HIP_VERIFY(err)) return(-1); + + // Finish computation + for (int k=0; klength); + + // Free host array + delete[] h_Xd; + delete[] h_Wd; + + // Free device arrays + err = hipFree(d_Xd); + if (!SUNDIALS_HIP_VERIFY(err)) return(-1); + err = hipFree(d_Wd); + if (!SUNDIALS_HIP_VERIFY(err)) return(-1); + err = hipFree(d_buff); + if (!SUNDIALS_HIP_VERIFY(err)) return(-1); + + return(0); +} + +int N_VWrmsNormMaskVectorArray_Hip(int nvec, N_Vector* X, N_Vector* W, + N_Vector id, realtype* norms) +{ + hipError_t err; + + // Create array of device pointers on host + realtype** h_Xd = new realtype*[nvec]; + for (int i=0; i<<>>( + nvec, + d_Xd, + d_Wd, + NVEC_HIP_DDATAp(id), + d_buff, + NVEC_HIP_CONTENT(X[0])->length + ); + PostKernelLaunch(); + + // Copy GPU result to the cpu. + err = hipMemcpy(norms, d_buff, grid*sizeof(realtype), hipMemcpyDeviceToHost); + if (!SUNDIALS_HIP_VERIFY(err)) return(-1); + + // Finish computation + for (int k=0; klength); + + // Free host array + delete[] h_Xd; + delete[] h_Wd; + + // Free device arrays + err = hipFree(d_Xd); + if (!SUNDIALS_HIP_VERIFY(err)) return(-1); + err = hipFree(d_Wd); + if (!SUNDIALS_HIP_VERIFY(err)) return(-1); + err = hipFree(d_buff); + if (!SUNDIALS_HIP_VERIFY(err)) return(-1); + + return(0); +} + +int N_VScaleAddMultiVectorArray_Hip(int nvec, int nsum, realtype* c, + N_Vector* X, N_Vector** Y, N_Vector** Z) +{ + hipError_t err; + + // Copy c array to device + realtype* d_c; + err = hipMalloc((void**) &d_c, nsum*sizeof(realtype)); + if (!SUNDIALS_HIP_VERIFY(err)) return(-1); + err = hipMemcpy(d_c, c, nsum*sizeof(realtype), hipMemcpyHostToDevice); + if (!SUNDIALS_HIP_VERIFY(err)) return(-1); + + // Create array of device pointers on host + realtype** h_Xd = new realtype*[nvec]; + for (int i=0; i>>( + nvec, + nsum, + d_c, + d_Xd, + d_Yd, + d_Zd, + NVEC_HIP_CONTENT(Z[0][0])->length + ); + PostKernelLaunch(); + + // Free host array + delete[] h_Xd; + delete[] h_Yd; + delete[] h_Zd; + + // Free device arrays + err = hipFree(d_c); + if (!SUNDIALS_HIP_VERIFY(err)) return(-1); + err = hipFree(d_Xd); + if (!SUNDIALS_HIP_VERIFY(err)) return(-1); + err = hipFree(d_Yd); + if (!SUNDIALS_HIP_VERIFY(err)) return(-1); + err = hipFree(d_Zd); + if (!SUNDIALS_HIP_VERIFY(err)) return(-1); + + return(0); +} + +int N_VLinearCombinationVectorArray_Hip(int nvec, int nsum, realtype* c, + N_Vector** X, N_Vector* Z) +{ + hipError_t err; + + // Copy c array to device + realtype* d_c; + err = hipMalloc((void**) &d_c, nsum*sizeof(realtype)); + if (!SUNDIALS_HIP_VERIFY(err)) return(-1); + err = hipMemcpy(d_c, c, nsum*sizeof(realtype), hipMemcpyHostToDevice); + if (!SUNDIALS_HIP_VERIFY(err)) return(-1); + + // Create array of device pointers on host + realtype** h_Xd = new realtype*[nsum*nvec]; + for (int j=0; j>>( + nvec, + nsum, + d_c, + d_Xd, + d_Zd, + NVEC_HIP_CONTENT(Z[0])->length + ); + PostKernelLaunch(); + + // Free host array + delete[] h_Xd; + delete[] h_Zd; + + // Free device arrays + err = hipFree(d_c); + if (!SUNDIALS_HIP_VERIFY(err)) return(-1); + err = hipFree(d_Xd); + if (!SUNDIALS_HIP_VERIFY(err)) return(-1); + err = hipFree(d_Zd); + if (!SUNDIALS_HIP_VERIFY(err)) return(-1); + + return hipGetLastError(); +} + + +/* + * ----------------------------------------------------------------- + * OPTIONAL XBraid interface operations + * ----------------------------------------------------------------- + */ + + +int N_VBufSize_Hip(N_Vector x, sunindextype *size) +{ + if (x == NULL) return(-1); + *size = (sunindextype)NVEC_HIP_MEMSIZE(x); + return(0); +} + + +int N_VBufPack_Hip(N_Vector x, void *buf) +{ + int copy_fail = 0; + hipError_t cuerr; + + if (x == NULL || buf == NULL) return(-1); + + SUNMemory buf_mem = SUNMemoryHelper_Wrap(buf, SUNMEMTYPE_HOST); + if (buf_mem == NULL) return(-1); + + copy_fail = SUNMemoryHelper_CopyAsync(NVEC_HIP_MEMHELP(x), + buf_mem, + NVEC_HIP_CONTENT(x)->device_data, + NVEC_HIP_MEMSIZE(x), + (void*) NVEC_HIP_STREAM(x)); + + /* we synchronize with respect to the host, but only in this stream */ + cuerr = hipStreamSynchronize(*NVEC_HIP_STREAM(x)); + + SUNMemoryHelper_Dealloc(NVEC_HIP_MEMHELP(x), buf_mem, (void*) NVEC_HIP_STREAM(x)); + + return (!SUNDIALS_HIP_VERIFY(cuerr) || copy_fail ? -1 : 0); +} + + +int N_VBufUnpack_Hip(N_Vector x, void *buf) +{ + int copy_fail = 0; + hipError_t cuerr; + + if (x == NULL || buf == NULL) return(-1); + + SUNMemory buf_mem = SUNMemoryHelper_Wrap(buf, SUNMEMTYPE_HOST); + if (buf_mem == NULL) return(-1); + + copy_fail = SUNMemoryHelper_CopyAsync(NVEC_HIP_MEMHELP(x), + NVEC_HIP_CONTENT(x)->device_data, + buf_mem, + NVEC_HIP_MEMSIZE(x), + (void*) NVEC_HIP_STREAM(x)); + + /* we synchronize with respect to the host, but only in this stream */ + cuerr = hipStreamSynchronize(*NVEC_HIP_STREAM(x)); + + SUNMemoryHelper_Dealloc(NVEC_HIP_MEMHELP(x), buf_mem, (void*) NVEC_HIP_STREAM(x)); + + return (!SUNDIALS_HIP_VERIFY(cuerr) || copy_fail ? -1 : 0); +} + + +/* + * ----------------------------------------------------------------- + * Enable / Disable fused and vector array operations + * ----------------------------------------------------------------- + */ + + +int N_VEnableFusedOps_Hip(N_Vector v, booleantype tf) +{ + /* check that vector is non-NULL */ + if (v == NULL) return(-1); + + /* check that ops structure is non-NULL */ + if (v->ops == NULL) return(-1); + + if (tf) + { + /* enable all fused vector operations */ + v->ops->nvlinearcombination = N_VLinearCombination_Hip; + v->ops->nvscaleaddmulti = N_VScaleAddMulti_Hip; + v->ops->nvdotprodmulti = N_VDotProdMulti_Hip; + /* enable all vector array operations */ + v->ops->nvlinearsumvectorarray = N_VLinearSumVectorArray_Hip; + v->ops->nvscalevectorarray = N_VScaleVectorArray_Hip; + v->ops->nvconstvectorarray = N_VConstVectorArray_Hip; + v->ops->nvwrmsnormvectorarray = N_VWrmsNormVectorArray_Hip; + v->ops->nvwrmsnormmaskvectorarray = N_VWrmsNormMaskVectorArray_Hip; + v->ops->nvscaleaddmultivectorarray = N_VScaleAddMultiVectorArray_Hip; + v->ops->nvlinearcombinationvectorarray = N_VLinearCombinationVectorArray_Hip; + /* enable single buffer reduction operations */ + v->ops->nvdotprodmultilocal = N_VDotProdMulti_Hip; + } + else + { + /* disable all fused vector operations */ + v->ops->nvlinearcombination = NULL; + v->ops->nvscaleaddmulti = NULL; + v->ops->nvdotprodmulti = NULL; + /* disable all vector array operations */ + v->ops->nvlinearsumvectorarray = NULL; + v->ops->nvscalevectorarray = NULL; + v->ops->nvconstvectorarray = NULL; + v->ops->nvwrmsnormvectorarray = NULL; + v->ops->nvwrmsnormmaskvectorarray = NULL; + v->ops->nvscaleaddmultivectorarray = NULL; + v->ops->nvlinearcombinationvectorarray = NULL; + /* disable single buffer reduction operations */ + v->ops->nvdotprodmultilocal = NULL; + } + + /* return success */ + return(0); +} + +int N_VEnableLinearCombination_Hip(N_Vector v, booleantype tf) +{ + /* check that vector is non-NULL */ + if (v == NULL) return(-1); + + /* check that ops structure is non-NULL */ + if (v->ops == NULL) return(-1); + + /* enable/disable operation */ + if (tf) + v->ops->nvlinearcombination = N_VLinearCombination_Hip; + else + v->ops->nvlinearcombination = NULL; + + /* return success */ + return(0); +} + +int N_VEnableScaleAddMulti_Hip(N_Vector v, booleantype tf) +{ + /* check that vector is non-NULL */ + if (v == NULL) return(-1); + + /* check that ops structure is non-NULL */ + if (v->ops == NULL) return(-1); + + /* enable/disable operation */ + if (tf) + v->ops->nvscaleaddmulti = N_VScaleAddMulti_Hip; + else + v->ops->nvscaleaddmulti = NULL; + + /* return success */ + return(0); +} + +int N_VEnableDotProdMulti_Hip(N_Vector v, booleantype tf) +{ + /* check that vector is non-NULL */ + if (v == NULL) return(-1); + + /* check that ops structure is non-NULL */ + if (v->ops == NULL) return(-1); + + /* enable/disable operation */ + if (tf) { + v->ops->nvdotprodmulti = N_VDotProdMulti_Hip; + v->ops->nvdotprodmultilocal = N_VDotProdMulti_Hip; + } else { + v->ops->nvdotprodmulti = NULL; + v->ops->nvdotprodmultilocal = NULL; + } + + /* return success */ + return(0); +} + +int N_VEnableLinearSumVectorArray_Hip(N_Vector v, booleantype tf) +{ + /* check that vector is non-NULL */ + if (v == NULL) return(-1); + + /* check that ops structure is non-NULL */ + if (v->ops == NULL) return(-1); + + /* enable/disable operation */ + if (tf) + v->ops->nvlinearsumvectorarray = N_VLinearSumVectorArray_Hip; + else + v->ops->nvlinearsumvectorarray = NULL; + + /* return success */ + return(0); +} + +int N_VEnableScaleVectorArray_Hip(N_Vector v, booleantype tf) +{ + /* check that vector is non-NULL */ + if (v == NULL) return(-1); + + /* check that ops structure is non-NULL */ + if (v->ops == NULL) return(-1); + + /* enable/disable operation */ + if (tf) + v->ops->nvscalevectorarray = N_VScaleVectorArray_Hip; + else + v->ops->nvscalevectorarray = NULL; + + /* return success */ + return(0); +} + +int N_VEnableConstVectorArray_Hip(N_Vector v, booleantype tf) +{ + /* check that vector is non-NULL */ + if (v == NULL) return(-1); + + /* check that ops structure is non-NULL */ + if (v->ops == NULL) return(-1); + + /* enable/disable operation */ + if (tf) + v->ops->nvconstvectorarray = N_VConstVectorArray_Hip; + else + v->ops->nvconstvectorarray = NULL; + + /* return success */ + return(0); +} + +int N_VEnableWrmsNormVectorArray_Hip(N_Vector v, booleantype tf) +{ + /* check that vector is non-NULL */ + if (v == NULL) return(-1); + + /* check that ops structure is non-NULL */ + if (v->ops == NULL) return(-1); + + /* enable/disable operation */ + if (tf) + v->ops->nvwrmsnormvectorarray = N_VWrmsNormVectorArray_Hip; + else + v->ops->nvwrmsnormvectorarray = NULL; + +/* return success */ + return(0); +} + +int N_VEnableWrmsNormMaskVectorArray_Hip(N_Vector v, booleantype tf) +{ + /* check that vector is non-NULL */ + if (v == NULL) return(-1); + + /* check that ops structure is non-NULL */ + if (v->ops == NULL) return(-1); + + /* enable/disable operation */ + if (tf) + v->ops->nvwrmsnormmaskvectorarray = N_VWrmsNormMaskVectorArray_Hip; + else + v->ops->nvwrmsnormmaskvectorarray = NULL; + + /* return success */ + return(0); +} + +int N_VEnableScaleAddMultiVectorArray_Hip(N_Vector v, booleantype tf) +{ + /* check that vector is non-NULL */ + if (v == NULL) return(-1); + + /* check that ops structure is non-NULL */ + if (v->ops == NULL) return(-1); + + /* enable/disable operation */ + if (tf) + v->ops->nvscaleaddmultivectorarray = N_VScaleAddMultiVectorArray_Hip; + else + v->ops->nvscaleaddmultivectorarray = NULL; + + /* return success */ + return(0); +} + +int N_VEnableLinearCombinationVectorArray_Hip(N_Vector v, booleantype tf) +{ + /* check that vector is non-NULL */ + if (v == NULL) return(-1); + + /* check that ops structure is non-NULL */ + if (v->ops == NULL) return(-1); + + /* enable/disable operation */ + if (tf) + v->ops->nvlinearcombinationvectorarray = N_VLinearCombinationVectorArray_Hip; + else + v->ops->nvlinearcombinationvectorarray = NULL; + + /* return success */ + return(0); +} + +} // extern "C" + +/* + * Private helper functions. + */ + +int AllocateData(N_Vector v) +{ + int alloc_fail = 0; + N_VectorContent_Hip vc = NVEC_HIP_CONTENT(v); + N_PrivateVectorContent_Hip vcp = NVEC_HIP_PRIVATE(v); + + if (N_VGetLength_Hip(v) == 0) return(0); + + if (vcp->use_managed_mem) + { + alloc_fail = SUNMemoryHelper_Alloc(NVEC_HIP_MEMHELP(v), &(vc->device_data), + NVEC_HIP_MEMSIZE(v), SUNMEMTYPE_UVM, + (void*) NVEC_HIP_STREAM(v)); + if (alloc_fail) + { + SUNDIALS_DEBUG_PRINT("ERROR in AllocateData: SUNMemoryHelper_Alloc failed for SUNMEMTYPE_UVM\n"); + } + vc->host_data = SUNMemoryHelper_Alias(vc->device_data); + } + else + { + alloc_fail = SUNMemoryHelper_Alloc(NVEC_HIP_MEMHELP(v), &(vc->host_data), + NVEC_HIP_MEMSIZE(v), SUNMEMTYPE_HOST, + (void*) NVEC_HIP_STREAM(v)); + if (alloc_fail) + { + SUNDIALS_DEBUG_PRINT("ERROR in AllocateData: SUNMemoryHelper_Alloc failed to alloc SUNMEMTYPE_HOST\n"); + } + + alloc_fail = SUNMemoryHelper_Alloc(NVEC_HIP_MEMHELP(v), &(vc->device_data), + NVEC_HIP_MEMSIZE(v), SUNMEMTYPE_DEVICE, + (void*) NVEC_HIP_STREAM(v)); + if (alloc_fail) + { + SUNDIALS_DEBUG_PRINT("ERROR in AllocateData: SUNMemoryHelper_Alloc failed to alloc SUNMEMTYPE_DEVICE\n"); + } + } + + return(alloc_fail ? -1 : 0); +} + +/* + * Initializes the internal buffer used for reductions. + * If the buffer is already allocated, it will only be reallocated + * if it is no longer large enough. This may occur if the length + * of the vector is increased. The buffer is initialized to the + * value given. + */ +static int InitializeReductionBuffer(N_Vector v, realtype value, size_t n) +{ + int alloc_fail = 0; + int copy_fail = 0; + booleantype alloc_mem = SUNFALSE; + size_t bytes = n * sizeof(realtype); + + // Get the vector private memory structure + N_PrivateVectorContent_Hip vcp = NVEC_HIP_PRIVATE(v); + + // Check if the existing reduction memory is not large enough + if (vcp->reduce_buffer_allocated_bytes < bytes) + { + FreeReductionBuffer(v); + alloc_mem = SUNTRUE; + } + + if (alloc_mem) + { + // Allocate pinned memory on the host + alloc_fail = SUNMemoryHelper_Alloc(NVEC_HIP_MEMHELP(v), + &(vcp->reduce_buffer_host), bytes, + SUNMEMTYPE_PINNED, (void*) NVEC_HIP_STREAM(v)); + if (alloc_fail) + { + SUNDIALS_DEBUG_PRINT("WARNING in InitializeReductionBuffer: SUNMemoryHelper_Alloc failed to alloc SUNMEMTYPE_PINNED, using SUNMEMTYPE_HOST instead\n"); + + // If pinned alloc failed, allocate plain host memory + alloc_fail = SUNMemoryHelper_Alloc(NVEC_HIP_MEMHELP(v), + &(vcp->reduce_buffer_host), bytes, + SUNMEMTYPE_HOST, (void*) NVEC_HIP_STREAM(v)); + if (alloc_fail) + { + SUNDIALS_DEBUG_PRINT("ERROR in InitializeReductionBuffer: SUNMemoryHelper_Alloc failed to alloc SUNMEMTYPE_HOST\n"); + } + } + + // Allocate device memory + alloc_fail = SUNMemoryHelper_Alloc(NVEC_HIP_MEMHELP(v), + &(vcp->reduce_buffer_dev), bytes, + SUNMEMTYPE_DEVICE, (void*) NVEC_HIP_STREAM(v)); + if (alloc_fail) + { + SUNDIALS_DEBUG_PRINT("ERROR in InitializeReductionBuffer: SUNMemoryHelper_Alloc failed to alloc SUNMEMTYPE_DEVICE\n"); + } + } + + if (!alloc_fail) + { + // Store the size of the reduction memory buffer + vcp->reduce_buffer_allocated_bytes = bytes; + + // Initialize the host memory with the value + for (int i = 0; i < n; ++i) + ((realtype*)vcp->reduce_buffer_host->ptr)[i] = value; + + // Initialize the device memory with the value + copy_fail = SUNMemoryHelper_CopyAsync(NVEC_HIP_MEMHELP(v), + vcp->reduce_buffer_dev, vcp->reduce_buffer_host, + bytes, (void*) NVEC_HIP_STREAM(v)); + + if (copy_fail) + { + SUNDIALS_DEBUG_PRINT("ERROR in InitializeReductionBuffer: SUNMemoryHelper_CopyAsync failed\n"); + } + } + + return((alloc_fail || copy_fail) ? -1 : 0); +} + +/* Free the reduction buffer + */ +static void FreeReductionBuffer(N_Vector v) +{ + N_PrivateVectorContent_Hip vcp = NVEC_HIP_PRIVATE(v); + + if (vcp == NULL) return; + + // Free device mem + if (vcp->reduce_buffer_dev != NULL) + SUNMemoryHelper_Dealloc(NVEC_HIP_MEMHELP(v), vcp->reduce_buffer_dev, + (void*) NVEC_HIP_STREAM(v)); + vcp->reduce_buffer_dev = NULL; + + // Free host mem + if (vcp->reduce_buffer_host != NULL) + SUNMemoryHelper_Dealloc(NVEC_HIP_MEMHELP(v), vcp->reduce_buffer_host, + (void*) NVEC_HIP_STREAM(v)); + vcp->reduce_buffer_host = NULL; + + // Reset allocated memory size + vcp->reduce_buffer_allocated_bytes = 0; +} + +/* Copy the reduction buffer from the device to the host. + */ +static int CopyReductionBufferFromDevice(N_Vector v, size_t n) +{ + int copy_fail; + hipError_t cuerr; + + copy_fail = SUNMemoryHelper_CopyAsync(NVEC_HIP_MEMHELP(v), + NVEC_HIP_PRIVATE(v)->reduce_buffer_host, + NVEC_HIP_PRIVATE(v)->reduce_buffer_dev, + n * sizeof(realtype), + (void*) NVEC_HIP_STREAM(v)); + + if (copy_fail) + { + SUNDIALS_DEBUG_PRINT("ERROR in CopyReductionBufferFromDevice: SUNMemoryHelper_CopyAsync returned nonzero\n"); + } + + /* we synchronize with respect to the host, but only in this stream */ + cuerr = hipStreamSynchronize(*NVEC_HIP_STREAM(v)); + return (!SUNDIALS_HIP_VERIFY(cuerr) || copy_fail ? -1 : 0); +} + +static int InitializeDeviceCounter(N_Vector v) +{ + int retval = 0; + /* AMD hardware does not seem to like atomicInc on pinned memory, so use device memory. */ + if (NVEC_HIP_PRIVATE(v)->device_counter == NULL) + { + retval = SUNMemoryHelper_Alloc(NVEC_HIP_MEMHELP(v), + &(NVEC_HIP_PRIVATE(v)->device_counter), sizeof(unsigned int), + SUNMEMTYPE_DEVICE, (void*) NVEC_HIP_STREAM(v)); + } + hipMemsetAsync(NVEC_HIP_DCOUNTERp(v), 0, sizeof(unsigned int), *NVEC_HIP_STREAM(v)); + return retval; +} + +static int FreeDeviceCounter(N_Vector v) +{ + int retval = 0; + if (NVEC_HIP_PRIVATE(v)->device_counter) + retval = SUNMemoryHelper_Dealloc(NVEC_HIP_MEMHELP(v), NVEC_HIP_PRIVATE(v)->device_counter, + (void*) NVEC_HIP_STREAM(v)); + return retval; +} + +/* Get the kernel launch parameters based on the kernel type (reduction or not), + * using the appropriate kernel execution policy. + */ +static int GetKernelParameters(N_Vector v, booleantype reduction, size_t& grid, + size_t& block, size_t& shMemSize, + hipStream_t& stream, bool& atomic, size_t n) +{ + n = (n == 0) ? NVEC_HIP_CONTENT(v)->length : n; + if (reduction) + { + SUNHipExecPolicy* reduce_exec_policy = NVEC_HIP_CONTENT(v)->reduce_exec_policy; + grid = reduce_exec_policy->gridSize(n); + block = reduce_exec_policy->blockSize(); + shMemSize = 0; + stream = *(reduce_exec_policy->stream()); + atomic = reduce_exec_policy->atomic(); + + if (!atomic) + { + if (InitializeDeviceCounter(v)) + { + #ifdef SUNDIALS_DEBUG + throw std::runtime_error("SUNMemoryHelper_Alloc returned nonzero\n"); + #endif + return(-1); + } + } + + if (block % sundials::hip::WARP_SIZE) + { +#ifdef SUNDIALS_DEBUG + throw std::runtime_error("the block size must be a multiple must be of the HIP warp size"); +#endif + return(-1); + } + } + else + { + SUNHipExecPolicy* stream_exec_policy = NVEC_HIP_CONTENT(v)->stream_exec_policy; + grid = stream_exec_policy->gridSize(n); + block = stream_exec_policy->blockSize(); + shMemSize = 0; + stream = *(stream_exec_policy->stream()); + atomic = false; + } + + if (grid == 0) + { +#ifdef SUNDIALS_DEBUG + throw std::runtime_error("the grid size must be > 0"); +#endif + return(-1); + } + if (block == 0) + { +#ifdef SUNDIALS_DEBUG + throw std::runtime_error("the block size must be > 0"); +#endif + return(-1); + } + + return(0); +} + +static int GetKernelParameters(N_Vector v, booleantype reduction, size_t& grid, + size_t& block, size_t& shMemSize, hipStream_t& stream, + size_t n) +{ + bool atomic; + return GetKernelParameters(v, reduction, grid, block, shMemSize, stream, atomic, n); +} + +/* Should be called after a kernel launch. + * If SUNDIALS_DEBUG_HIP_LASTERROR is not defined, then the function does nothing. + * If it is defined, the function will synchronize and check the last HIP error. + */ +static void PostKernelLaunch() +{ +#ifdef SUNDIALS_DEBUG_HIP_LASTERROR + hipDeviceSynchronize(); + SUNDIALS_HIP_VERIFY(hipGetLastError()); +#endif +} diff --git a/src/lib/nvector/serial/fnvector_serial.c b/src/lib/nvector/serial/fnvector_serial.c deleted file mode 100644 index 19485c3..0000000 --- a/src/lib/nvector/serial/fnvector_serial.c +++ /dev/null @@ -1,154 +0,0 @@ -/* - * ----------------------------------------------------------------- - * Programmer(s): Radu Serban @ LLNL - * ----------------------------------------------------------------- - * SUNDIALS Copyright Start - * Copyright (c) 2002-2019, Lawrence Livermore National Security - * and Southern Methodist University. - * All rights reserved. - * - * See the top-level LICENSE and NOTICE files for details. - * - * SPDX-License-Identifier: BSD-3-Clause - * SUNDIALS Copyright End - * ----------------------------------------------------------------- - * This file (companion of nvector_serial.h) contains the - * implementation needed for the Fortran initialization of serial - * vector operations. - * ----------------------------------------------------------------- - */ - -#include -#include - -#include "fnvector_serial.h" - -/* Define global vector variables */ - -N_Vector F2C_CVODE_vec; -N_Vector F2C_CVODE_vecQ; -N_Vector *F2C_CVODE_vecS; -N_Vector F2C_CVODE_vecB; -N_Vector F2C_CVODE_vecQB; - -N_Vector F2C_IDA_vec; -N_Vector F2C_IDA_vecQ; -N_Vector *F2C_IDA_vecS; -N_Vector F2C_IDA_vecB; -N_Vector F2C_IDA_vecQB; - -N_Vector F2C_KINSOL_vec; - -N_Vector F2C_ARKODE_vec; - -/* Fortran callable interfaces */ - -void FNV_INITS(int *code, long int *N, int *ier) -{ - *ier = 0; - - switch(*code) { - case FCMIX_CVODE: - F2C_CVODE_vec = NULL; - F2C_CVODE_vec = N_VNewEmpty_Serial((sunindextype)(*N)); - if (F2C_CVODE_vec == NULL) *ier = -1; - break; - case FCMIX_IDA: - F2C_IDA_vec = NULL; - F2C_IDA_vec = N_VNewEmpty_Serial((sunindextype)(*N)); - if (F2C_IDA_vec == NULL) *ier = -1; - break; - case FCMIX_KINSOL: - F2C_KINSOL_vec = NULL; - F2C_KINSOL_vec = N_VNewEmpty_Serial((sunindextype)(*N)); - if (F2C_KINSOL_vec == NULL) *ier = -1; - break; - case FCMIX_ARKODE: - F2C_ARKODE_vec = NULL; - F2C_ARKODE_vec = N_VNewEmpty_Serial((sunindextype)(*N)); - if (F2C_ARKODE_vec == NULL) *ier = -1; - break; - default: - *ier = -1; - } -} - -void FNV_INITS_Q(int *code, long int *Nq, int *ier) -{ - *ier = 0; - - switch(*code) { - case FCMIX_CVODE: - F2C_CVODE_vecQ = NULL; - F2C_CVODE_vecQ = N_VNewEmpty_Serial((sunindextype)(*Nq)); - if (F2C_CVODE_vecQ == NULL) *ier = -1; - break; - case FCMIX_IDA: - F2C_IDA_vecQ = NULL; - F2C_IDA_vecQ = N_VNewEmpty_Serial((sunindextype)(*Nq)); - if (F2C_IDA_vecQ == NULL) *ier = -1; - break; - default: - *ier = -1; - } -} - -void FNV_INITS_B(int *code, long int *NB, int *ier) -{ - *ier = 0; - - switch(*code) { - case FCMIX_CVODE: - F2C_CVODE_vecB = NULL; - F2C_CVODE_vecB = N_VNewEmpty_Serial((sunindextype)(*NB)); - if (F2C_CVODE_vecB == NULL) *ier = -1; - break; - case FCMIX_IDA: - F2C_IDA_vecB = NULL; - F2C_IDA_vecB = N_VNewEmpty_Serial((sunindextype)(*NB)); - if (F2C_IDA_vecB == NULL) *ier = -1; - break; - default: - *ier = -1; - } -} - -void FNV_INITS_QB(int *code, long int *NqB, int *ier) -{ - *ier = 0; - - switch(*code) { - case FCMIX_CVODE: - F2C_CVODE_vecQB = NULL; - F2C_CVODE_vecQB = N_VNewEmpty_Serial((sunindextype)(*NqB)); - if (F2C_CVODE_vecQB == NULL) *ier = -1; - break; - case FCMIX_IDA: - F2C_IDA_vecQB = NULL; - F2C_IDA_vecQB = N_VNewEmpty_Serial((sunindextype)(*NqB)); - if (F2C_IDA_vecQB == NULL) *ier = -1; - break; - default: - *ier = -1; - } -} - -void FNV_INITS_S(int *code, int *Ns, int *ier) -{ - *ier = 0; - - switch(*code) { - case FCMIX_CVODE: - F2C_CVODE_vecS = NULL; - F2C_CVODE_vecS = (N_Vector *) N_VCloneVectorArrayEmpty_Serial(*Ns, F2C_CVODE_vec); - if (F2C_CVODE_vecS == NULL) *ier = -1; - break; - case FCMIX_IDA: - F2C_IDA_vecS = NULL; - F2C_IDA_vecS = (N_Vector *) N_VCloneVectorArrayEmpty_Serial(*Ns, F2C_IDA_vec); - if (F2C_IDA_vecS == NULL) *ier = -1; - break; - default: - *ier = -1; - } -} diff --git a/src/lib/nvector/serial/fnvector_serial.h b/src/lib/nvector/serial/fnvector_serial.h deleted file mode 100644 index 8ee03b5..0000000 --- a/src/lib/nvector/serial/fnvector_serial.h +++ /dev/null @@ -1,92 +0,0 @@ -/* - * ----------------------------------------------------------------- - * Programmer(s): Radu Serban and Aaron Collier @ LLNL - * ----------------------------------------------------------------- - * SUNDIALS Copyright Start - * Copyright (c) 2002-2019, Lawrence Livermore National Security - * and Southern Methodist University. - * All rights reserved. - * - * See the top-level LICENSE and NOTICE files for details. - * - * SPDX-License-Identifier: BSD-3-Clause - * SUNDIALS Copyright End - * ----------------------------------------------------------------- - * This file (companion of nvector_serial.h) contains the - * definitions needed for the initialization of serial - * vector operations in Fortran. - * ----------------------------------------------------------------- - */ - -#ifndef _FNVECTOR_SERIAL_H -#define _FNVECTOR_SERIAL_H - -#include -#include - -#ifdef __cplusplus /* wrapper to enable C++ usage */ -extern "C" { -#endif - -#if defined(SUNDIALS_F77_FUNC) -#define FNV_INITS SUNDIALS_F77_FUNC(fnvinits, FNVINITS) -#else -#define FNV_INITS fnvinits_ -#endif - -#if defined(SUNDIALS_F77_FUNC_) - -#define FNV_INITS_Q SUNDIALS_F77_FUNC_(fnvinits_q, FNVINITS_Q) -#define FNV_INITS_S SUNDIALS_F77_FUNC_(fnvinits_s, FNVINITS_S) -#define FNV_INITS_B SUNDIALS_F77_FUNC_(fnvinits_b, FNVINITS_B) -#define FNV_INITS_QB SUNDIALS_F77_FUNC_(fnvinits_qb, FNVINITS_QB) - -#else - -#define FNV_INITS_Q fnvinits_q_ -#define FNV_INITS_S fnvinits_s_ -#define FNV_INITS_B fnvinits_b_ -#define FNV_INITS_QB fnvinits_qb_ - -#endif - -/* Declarations of global variables */ - -extern N_Vector F2C_CVODE_vec; -extern N_Vector F2C_CVODE_vecQ; -extern N_Vector *F2C_CVODE_vecS; -extern N_Vector F2C_CVODE_vecB; -extern N_Vector F2C_CVODE_vecQB; - -extern N_Vector F2C_IDA_vec; -extern N_Vector F2C_IDA_vecQ; -extern N_Vector *F2C_IDA_vecS; -extern N_Vector F2C_IDA_vecB; -extern N_Vector F2C_IDA_vecQB; - -extern N_Vector F2C_KINSOL_vec; - -extern N_Vector F2C_ARKODE_vec; - -/* - * Prototypes of exported functions - * - * FNV_INITS - initializes serial vector operations for main problem - * FNV_INITS_Q - initializes serial vector operations for quadratures - * FNV_INITS_S - initializes serial vector operations for sensitivities - * FNV_INITS_B - initializes serial vector operations for adjoint problem - * FNV_INITS_QB - initializes serial vector operations for adjoint quadratures - * - */ - -void FNV_INITS(int *code, long int *neq, int *ier); -void FNV_INITS_Q(int *code, long int *Nq, int *ier); -void FNV_INITS_S(int *code, int *Ns, int *ier); -void FNV_INITS_B(int *code, long int *NB, int *ier); -void FNV_INITS_QB(int *code, long int *NqB, int *ier); - -#ifdef __cplusplus -} -#endif - -#endif diff --git a/src/lib/nvector/serial/nvector_serial.c b/src/lib/nvector/serial/nvector_serial.c index b867dc2..daa8ccc 100644 --- a/src/lib/nvector/serial/nvector_serial.c +++ b/src/lib/nvector/serial/nvector_serial.c @@ -3,7 +3,7 @@ * and Aaron Collier @ LLNL * ----------------------------------------------------------------- * SUNDIALS Copyright Start - * Copyright (c) 2002-2019, Lawrence Livermore National Security + * Copyright (c) 2002-2022, Lawrence Livermore National Security * and Southern Methodist University. * All rights reserved. * @@ -21,6 +21,7 @@ #include #include +#include "sundials/sundials_nvector.h" #define ZERO RCONST(0.0) #define HALF RCONST(0.5) @@ -67,14 +68,14 @@ N_Vector_ID N_VGetVectorID_Serial(N_Vector v) * Function to create a new empty serial vector */ -N_Vector N_VNewEmpty_Serial(sunindextype length) +N_Vector N_VNewEmpty_Serial(sunindextype length, SUNContext sunctx) { N_Vector v; N_VectorContent_Serial content; /* Create an empty vector object */ v = NULL; - v = N_VNewEmpty(); + v = N_VNewEmpty(sunctx); if (v == NULL) return(NULL); /* Attach operations */ @@ -88,6 +89,7 @@ N_Vector N_VNewEmpty_Serial(sunindextype length) v->ops->nvgetarraypointer = N_VGetArrayPointer_Serial; v->ops->nvsetarraypointer = N_VSetArrayPointer_Serial; v->ops->nvgetlength = N_VGetLength_Serial; + v->ops->nvgetlocallength = N_VGetLength_Serial; /* standard vector operations */ v->ops->nvlinearsum = N_VLinearSum_Serial; @@ -123,6 +125,18 @@ N_Vector N_VNewEmpty_Serial(sunindextype length) v->ops->nvwsqrsumlocal = N_VWSqrSumLocal_Serial; v->ops->nvwsqrsummasklocal = N_VWSqrSumMaskLocal_Serial; + /* single buffer reduction operations */ + v->ops->nvdotprodmultilocal = N_VDotProdMulti_Serial; + + /* XBraid interface operations */ + v->ops->nvbufsize = N_VBufSize_Serial; + v->ops->nvbufpack = N_VBufPack_Serial; + v->ops->nvbufunpack = N_VBufUnpack_Serial; + + /* debugging functions */ + v->ops->nvprint = N_VPrint_Serial; + v->ops->nvprintfile = N_VPrintFile_Serial; + /* Create content */ content = NULL; content = (N_VectorContent_Serial) malloc(sizeof *content); @@ -143,13 +157,13 @@ N_Vector N_VNewEmpty_Serial(sunindextype length) * Function to create a new serial vector */ -N_Vector N_VNew_Serial(sunindextype length) +N_Vector N_VNew_Serial(sunindextype length, SUNContext sunctx) { N_Vector v; realtype *data; v = NULL; - v = N_VNewEmpty_Serial(length); + v = N_VNewEmpty_Serial(length, sunctx); if (v == NULL) return(NULL); /* Create data */ @@ -173,12 +187,12 @@ N_Vector N_VNew_Serial(sunindextype length) * Function to create a serial N_Vector with user data component */ -N_Vector N_VMake_Serial(sunindextype length, realtype *v_data) +N_Vector N_VMake_Serial(sunindextype length, realtype *v_data, SUNContext sunctx) { N_Vector v; v = NULL; - v = N_VNewEmpty_Serial(length); + v = N_VNewEmpty_Serial(length, sunctx); if (v == NULL) return(NULL); if (length > 0) { @@ -196,25 +210,7 @@ N_Vector N_VMake_Serial(sunindextype length, realtype *v_data) N_Vector* N_VCloneVectorArray_Serial(int count, N_Vector w) { - N_Vector* vs; - int j; - - if (count <= 0) return(NULL); - - vs = NULL; - vs = (N_Vector*) malloc(count * sizeof(N_Vector)); - if(vs == NULL) return(NULL); - - for (j = 0; j < count; j++) { - vs[j] = NULL; - vs[j] = N_VClone_Serial(w); - if (vs[j] == NULL) { - N_VDestroyVectorArray_Serial(vs, j-1); - return(NULL); - } - } - - return(vs); + return(N_VCloneVectorArray(count, w)); } /* ---------------------------------------------------------------------------- @@ -223,25 +219,7 @@ N_Vector* N_VCloneVectorArray_Serial(int count, N_Vector w) N_Vector* N_VCloneVectorArrayEmpty_Serial(int count, N_Vector w) { - N_Vector* vs; - int j; - - if (count <= 0) return(NULL); - - vs = NULL; - vs = (N_Vector*) malloc(count * sizeof(N_Vector)); - if(vs == NULL) return(NULL); - - for (j = 0; j < count; j++) { - vs[j] = NULL; - vs[j] = N_VCloneEmpty_Serial(w); - if (vs[j] == NULL) { - N_VDestroyVectorArray_Serial(vs, j-1); - return(NULL); - } - } - - return(vs); + return(N_VCloneEmptyVectorArray(count, w)); } /* ---------------------------------------------------------------------------- @@ -250,12 +228,7 @@ N_Vector* N_VCloneVectorArrayEmpty_Serial(int count, N_Vector w) void N_VDestroyVectorArray_Serial(N_Vector* vs, int count) { - int j; - - for (j = 0; j < count; j++) N_VDestroy_Serial(vs[j]); - - free(vs); vs = NULL; - + N_VDestroyVectorArray(vs, count); return; } @@ -292,11 +265,11 @@ void N_VPrintFile_Serial(N_Vector x, FILE* outfile) for (i = 0; i < N; i++) { #if defined(SUNDIALS_EXTENDED_PRECISION) - fprintf(outfile, "%35.32Lg\n", xd[i]); + fprintf(outfile, "%35.32Le\n", xd[i]); #elif defined(SUNDIALS_DOUBLE_PRECISION) - fprintf(outfile, "%19.16g\n", xd[i]); + fprintf(outfile, "%19.16e\n", xd[i]); #else - fprintf(outfile, "%11.8g\n", xd[i]); + fprintf(outfile, "%11.8e\n", xd[i]); #endif } fprintf(outfile, "\n"); @@ -319,7 +292,7 @@ N_Vector N_VCloneEmpty_Serial(N_Vector w) /* Create vector */ v = NULL; - v = N_VNewEmpty(); + v = N_VNewEmpty(w->sunctx); if (v == NULL) return(NULL); /* Attach operations */ @@ -1521,6 +1494,59 @@ int N_VLinearCombinationVectorArray_Serial(int nvec, int nsum, realtype* c, } +/* + * ----------------------------------------------------------------- + * OPTIONAL XBraid interface operations + * ----------------------------------------------------------------- + */ + + +int N_VBufSize_Serial(N_Vector x, sunindextype *size) +{ + if (x == NULL) return(-1); + *size = NV_LENGTH_S(x) * ((sunindextype)sizeof(realtype)); + return(0); +} + + +int N_VBufPack_Serial(N_Vector x, void *buf) +{ + sunindextype i, N; + realtype *xd = NULL; + realtype *bd = NULL; + + if (x == NULL || buf == NULL) return(-1); + + N = NV_LENGTH_S(x); + xd = NV_DATA_S(x); + bd = (realtype*) buf; + + for (i = 0; i < N; i++) + bd[i] = xd[i]; + + return(0); +} + + +int N_VBufUnpack_Serial(N_Vector x, void *buf) +{ + sunindextype i, N; + realtype *xd = NULL; + realtype *bd = NULL; + + if (x == NULL || buf == NULL) return(-1); + + N = NV_LENGTH_S(x); + xd = NV_DATA_S(x); + bd = (realtype*) buf; + + for (i = 0; i < N; i++) + xd[i] = bd[i]; + + return(0); +} + + /* * ----------------------------------------------------------------- * private functions for special cases of vector operations @@ -1876,7 +1902,7 @@ static int VaxpyVectorArray_Serial(int nvec, realtype a, N_Vector* X, N_Vector* } return(0); - } + } for (i=0; iops->nvwrmsnormmaskvectorarray = N_VWrmsNormMaskVectorArray_Serial; v->ops->nvscaleaddmultivectorarray = N_VScaleAddMultiVectorArray_Serial; v->ops->nvlinearcombinationvectorarray = N_VLinearCombinationVectorArray_Serial; + /* enable single buffer reduction operations */ + v->ops->nvdotprodmultilocal = N_VDotProdMulti_Serial; } else { /* disable all fused vector operations */ v->ops->nvlinearcombination = NULL; @@ -1929,6 +1957,8 @@ int N_VEnableFusedOps_Serial(N_Vector v, booleantype tf) v->ops->nvwrmsnormmaskvectorarray = NULL; v->ops->nvscaleaddmultivectorarray = NULL; v->ops->nvlinearcombinationvectorarray = NULL; + /* disable single buffer reduction operations */ + v->ops->nvdotprodmultilocal = NULL; } /* return success */ @@ -1981,10 +2011,13 @@ int N_VEnableDotProdMulti_Serial(N_Vector v, booleantype tf) if (v->ops == NULL) return(-1); /* enable/disable operation */ - if (tf) - v->ops->nvdotprodmulti = N_VDotProdMulti_Serial; - else - v->ops->nvdotprodmulti = NULL; + if (tf) { + v->ops->nvdotprodmulti = N_VDotProdMulti_Serial; + v->ops->nvdotprodmultilocal = N_VDotProdMulti_Serial; + } else { + v->ops->nvdotprodmulti = NULL; + v->ops->nvdotprodmultilocal = NULL; + } /* return success */ return(0); diff --git a/src/lib/readme.txt b/src/lib/readme.txt index 9e8de92..cf0b72c 100644 --- a/src/lib/readme.txt +++ b/src/lib/readme.txt @@ -1,23 +1,40 @@ -# 2019-12-12 sokol@insa-toulouse.fr +# 2023-03-17 sokol@insa-toulouse.fr # build internal lib for sundials -SUNTOP=/usr/local/src/cvodes-5.0.0 -MYTOP=$HOME/dev/R/rcpp-pkgs/r2sundials +SUNTOP=/usr/local/src/cvodes-6.5.0 +MYTOP=$HOME/dev/r/rcpp-pkgs/r2sundials mkdir -p $MYTOP/src/lib/ +# cmake build original CVODES to get sundials_config.h +cd $SUNTOP +mkdir instdir +mkdir build +cd build +ccmake .. +# c - as configure +# set: +# CMAKE_INSTALL_PREFIX = /usr/local/src/cvodes-6.5.0/instdir +# SUNDIALS_INDEX_SIZE = 32 +# c - as configure +# g - as generate + +make -j4 + # copy sources cp -a $SUNTOP/src/{cvodes,nvector,sundials,sunlinsol,sunmatrix,sunnonlinsol} $MYTOP/src/lib/ # remove fmod dirs find $MYTOP/src/lib/ -type d -name fmod -exec rm -fr {} \; find $MYTOP/src/lib/ -name CMakeLists.txt -exec rm -fr {} \; # remove pthreads etc -rm -rf $MYTOP/src/lib/nvector/{cuda,manyvector,mpiplusx,openmp,openmpdev,parallel,parhyp,petsc,raja,trilinos} +rm -rf $MYTOP/src/lib/nvector/{cuda,manyvector,mpiplusx,openmp,openmpdev,parallel,parhyp,petsc,raja,trilinos,pthreads,sycl} rm -rf $MYTOP/src/lib/sunnonlinsol/petscsnes rm -rf $MYTOP/src/lib/sunlinsol/{klu,superludist,superlumt} rm -rf $MYTOP/src/lib/sunmatrix/slunrloc +rm -rf $MYTOP/src/lib/sundials/sundials_xbraid.c ( cd $MYTOP/inst/include/nvector/ && rm -rf $(ls -1 | grep -v serial) ) # copy includes +cp -a $SUNTOP/src/sundials/*.h $MYTOP/inst/include/sundials/ cp -a $SUNTOP/include/{cvodes,nvector,sundials,sunlinsol,sunmatrix,sunnonlinsol} $MYTOP/inst/include -cp -a $SUNTOP/build/include/sundials/sundials_config.h $MYTOP/inst/include/sundials/ +cp -a $SUNTOP/build/include/sundials/{sundials_config,sundials_export}.h $MYTOP/inst/include/sundials/ diff --git a/src/lib/sundials/sundials_band.c b/src/lib/sundials/sundials_band.c index 1495a57..1b675af 100644 --- a/src/lib/sundials/sundials_band.c +++ b/src/lib/sundials/sundials_band.c @@ -6,7 +6,7 @@ * Programmer(s): Alan C. Hindmarsh and Radu Serban @ LLNL * ----------------------------------------------------------------- * SUNDIALS Copyright Start - * Copyright (c) 2002-2019, Lawrence Livermore National Security + * Copyright (c) 2002-2022, Lawrence Livermore National Security * and Southern Methodist University. * All rights reserved. * @@ -33,33 +33,58 @@ /* * ----------------------------------------------------- - * Functions working on DlsMat + * Functions working on SUNDlsMat * ----------------------------------------------------- */ -sunindextype BandGBTRF(DlsMat A, sunindextype *p) +sunindextype SUNDlsMat_BandGBTRF(SUNDlsMat A, sunindextype* p) { - return(bandGBTRF(A->cols, A->M, A->mu, A->ml, A->s_mu, p)); + return(SUNDlsMat_bandGBTRF(A->cols, A->M, A->mu, A->ml, A->s_mu, p)); } -void BandGBTRS(DlsMat A, sunindextype *p, realtype *b) +sunindextype BandGBTRF(SUNDlsMat A, sunindextype *p) { - bandGBTRS(A->cols, A->M, A->s_mu, A->ml, p, b); + return(SUNDlsMat_bandGBTRF(A->cols, A->M, A->mu, A->ml, A->s_mu, p)); } -void BandCopy(DlsMat A, DlsMat B, sunindextype copymu, sunindextype copyml) +void SUNDlsMat_BandGBTRS(SUNDlsMat A, sunindextype *p, realtype *b) { - bandCopy(A->cols, B->cols, A->M, A->s_mu, B->s_mu, copymu, copyml); + SUNDlsMat_bandGBTRS(A->cols, A->M, A->s_mu, A->ml, p, b); } -void BandScale(realtype c, DlsMat A) +void BandGBTRS(SUNDlsMat A, sunindextype *p, realtype *b) { - bandScale(c, A->cols, A->M, A->mu, A->ml, A->s_mu); + SUNDlsMat_bandGBTRS(A->cols, A->M, A->s_mu, A->ml, p, b); } -void BandMatvec(DlsMat A, realtype *x, realtype *y) +void SUNDlsMat_BandCopy(SUNDlsMat A, SUNDlsMat B, sunindextype copymu, sunindextype copyml) { - bandMatvec(A->cols, x, y, A->M, A->mu, A->ml, A->s_mu); + SUNDlsMat_bandCopy(A->cols, B->cols, A->M, A->s_mu, B->s_mu, copymu, copyml); +} + +void BandCopy(SUNDlsMat A, SUNDlsMat B, sunindextype copymu, sunindextype copyml) +{ + SUNDlsMat_bandCopy(A->cols, B->cols, A->M, A->s_mu, B->s_mu, copymu, copyml); +} + +void SUNDlsMat_BandScale(realtype c, SUNDlsMat A) +{ + SUNDlsMat_bandScale(c, A->cols, A->M, A->mu, A->ml, A->s_mu); +} + +void BandScale(realtype c, SUNDlsMat A) +{ + SUNDlsMat_bandScale(c, A->cols, A->M, A->mu, A->ml, A->s_mu); +} + +void SUNDlsMat_BandMatvec(SUNDlsMat A, realtype *x, realtype *y) +{ + SUNDlsMat_bandMatvec(A->cols, x, y, A->M, A->mu, A->ml, A->s_mu); +} + +void BandMatvec(SUNDlsMat A, realtype *x, realtype *y) +{ + SUNDlsMat_bandMatvec(A->cols, x, y, A->M, A->mu, A->ml, A->s_mu); } /* @@ -69,6 +94,11 @@ void BandMatvec(DlsMat A, realtype *x, realtype *y) */ sunindextype bandGBTRF(realtype **a, sunindextype n, sunindextype mu, sunindextype ml, sunindextype smu, sunindextype *p) +{ + return(SUNDlsMat_bandGBTRF(a, n, mu, ml, smu, p)); +} + +sunindextype SUNDlsMat_bandGBTRF(realtype **a, sunindextype n, sunindextype mu, sunindextype ml, sunindextype smu, sunindextype *p) { sunindextype c, r, num_rows; sunindextype i, j, k, l, storage_l, storage_k, last_col_k, last_row_k; @@ -83,7 +113,7 @@ sunindextype bandGBTRF(realtype **a, sunindextype n, sunindextype mu, sunindexty for (c=0; c < n; c++) { a_c = a[c]; for (r=0; r < num_rows; r++) { - a_c[r] = ZERO; + a_c[r] = ZERO; } } } @@ -91,7 +121,7 @@ sunindextype bandGBTRF(realtype **a, sunindextype n, sunindextype mu, sunindexty /* k = elimination step number */ for (k=0; k < n-1; k++, p++) { - + col_k = a[k]; diag_k = col_k + smu; sub_diag_k = diag_k + 1; @@ -101,21 +131,21 @@ sunindextype bandGBTRF(realtype **a, sunindextype n, sunindextype mu, sunindexty l=k; max = SUNRabs(*diag_k); - for (i=k+1, kptr=sub_diag_k; i <= last_row_k; i++, kptr++) { + for (i=k+1, kptr=sub_diag_k; i <= last_row_k; i++, kptr++) { if (SUNRabs(*kptr) > max) { - l=i; - max = SUNRabs(*kptr); + l=i; + max = SUNRabs(*kptr); } } storage_l = ROW(l, k, smu); *p = l; - + /* check for zero pivot element */ if (col_k[storage_l] == ZERO) return(k+1); - + /* swap a(l,k) and a(k,k) if necessary */ - + if ( (swap = (l != k) )) { temp = col_k[storage_l]; col_k[storage_l] = *diag_k; @@ -127,7 +157,7 @@ sunindextype bandGBTRF(realtype **a, sunindextype n, sunindextype mu, sunindexty /* a(k,k) holds the pivot element. This scaling */ /* stores the pivot row multipliers -a(i,k)/a(k,k) */ /* in a(i,k), i=k+1, ..., SUNMIN(n-1,k+ml). */ - + mult = -ONE / (*diag_k); for (i=k+1, kptr = sub_diag_k; i <= last_row_k; i++, kptr++) (*kptr) *= mult; @@ -136,37 +166,37 @@ sunindextype bandGBTRF(realtype **a, sunindextype n, sunindextype mu, sunindexty /* row k is the pivot row after swapping with row l. */ /* The computation is done one column at a time, */ /* column j=k+1, ..., SUNMIN(k+smu,n-1). */ - + last_col_k = SUNMIN(k+smu,n-1); for (j=k+1; j <= last_col_k; j++) { - + col_j = a[j]; - storage_l = ROW(l,j,smu); - storage_k = ROW(k,j,smu); + storage_l = ROW(l,j,smu); + storage_k = ROW(k,j,smu); a_kj = col_j[storage_l]; /* Swap the elements a(k,j) and a(k,l) if l!=k. */ - + if (swap) { - col_j[storage_l] = col_j[storage_k]; - col_j[storage_k] = a_kj; + col_j[storage_l] = col_j[storage_k]; + col_j[storage_k] = a_kj; } /* a(i,j) = a(i,j) - [a(i,k)/a(k,k)]*a(k,j) */ /* a_kj = a(k,j), *kptr = - a(i,k)/a(k,k), *jptr = a(i,j) */ if (a_kj != ZERO) { - for (i=k+1, kptr=sub_diag_k, jptr=col_j+ROW(k+1,j,smu); - i <= last_row_k; - i++, kptr++, jptr++) - (*jptr) += a_kj * (*kptr); + for (i=k+1, kptr=sub_diag_k, jptr=col_j+ROW(k+1,j,smu); + i <= last_row_k; + i++, kptr++, jptr++) + (*jptr) += a_kj * (*kptr); } - } + } } - + /* set the last pivot row to be n-1 and check for a zero pivot */ - *p = n-1; + *p = n-1; if (a[n-1][smu] == ZERO) return(n); /* return 0 to indicate success */ @@ -174,13 +204,19 @@ sunindextype bandGBTRF(realtype **a, sunindextype n, sunindextype mu, sunindexty return(0); } -void bandGBTRS(realtype **a, sunindextype n, sunindextype smu, sunindextype ml, sunindextype *p, realtype *b) +void bandGBTRS(realtype **a, sunindextype n, sunindextype smu, + sunindextype ml, sunindextype *p, realtype *b) +{ + SUNDlsMat_bandGBTRS(a, n, smu, ml, p, b); +} + +void SUNDlsMat_bandGBTRS(realtype **a, sunindextype n, sunindextype smu, sunindextype ml, sunindextype *p, realtype *b) { sunindextype k, l, i, first_row_k, last_row_k; realtype mult, *diag_k; - + /* Solve Ly = Pb, store solution y in b */ - + for (k=0; k < n-1; k++) { l = p[k]; mult = b[l]; @@ -193,9 +229,9 @@ void bandGBTRS(realtype **a, sunindextype n, sunindextype smu, sunindextype ml, for (i=k+1; i <= last_row_k; i++) b[i] += mult * diag_k[i-k]; } - + /* Solve Ux = y, store solution x in b */ - + for (k=n-1; k >= 0; k--) { diag_k = a[k]+smu; first_row_k = SUNMAX(0,k-smu); @@ -206,14 +242,20 @@ void bandGBTRS(realtype **a, sunindextype n, sunindextype smu, sunindextype ml, } } -void bandCopy(realtype **a, realtype **b, sunindextype n, sunindextype a_smu, sunindextype b_smu, +void bandCopy(realtype **a, realtype **b, sunindextype n, sunindextype a_smu, sunindextype b_smu, sunindextype copymu, sunindextype copyml) +{ + SUNDlsMat_bandCopy(a, b, n, a_smu, b_smu, copymu, copyml); +} + +void SUNDlsMat_bandCopy(realtype **a, realtype **b, sunindextype n, sunindextype a_smu, sunindextype b_smu, + sunindextype copymu, sunindextype copyml) { sunindextype i, j, copySize; realtype *a_col_j, *b_col_j; copySize = copymu + copyml + 1; - + for (j=0; j < n; j++) { a_col_j = a[j]+a_smu-copymu; b_col_j = b[j]+b_smu-copymu; @@ -223,6 +265,11 @@ void bandCopy(realtype **a, realtype **b, sunindextype n, sunindextype a_smu, su } void bandScale(realtype c, realtype **a, sunindextype n, sunindextype mu, sunindextype ml, sunindextype smu) +{ + SUNDlsMat_bandScale(c, a, n, mu, ml, smu); +} + +void SUNDlsMat_bandScale(realtype c, realtype **a, sunindextype n, sunindextype mu, sunindextype ml, sunindextype smu) { sunindextype i, j, colSize; realtype *col_j; @@ -237,15 +284,26 @@ void bandScale(realtype c, realtype **a, sunindextype n, sunindextype mu, sunind } void bandAddIdentity(realtype **a, sunindextype n, sunindextype smu) +{ + SUNDlsMat_bandAddIdentity(a, n, smu); +} + +void SUNDlsMat_bandAddIdentity(realtype **a, sunindextype n, sunindextype smu) { sunindextype j; - + for(j=0; j < n; j++) a[j][smu] += ONE; } -void bandMatvec(realtype **a, realtype *x, realtype *y, sunindextype n, - sunindextype mu, sunindextype ml, sunindextype smu) +void bandMatvec(realtype **a, realtype *x, realtype *y, sunindextype n, + sunindextype mu, sunindextype ml, sunindextype smu) +{ + SUNDlsMat_bandMatvec(a, x, y, n, mu, ml, smu); +} + +void SUNDlsMat_bandMatvec(realtype **a, realtype *x, realtype *y, sunindextype n, + sunindextype mu, sunindextype ml, sunindextype smu) { sunindextype i, j, is, ie; realtype *col_j; diff --git a/src/lib/sundials/sundials_context.c b/src/lib/sundials/sundials_context.c new file mode 100644 index 0000000..ba04aac --- /dev/null +++ b/src/lib/sundials/sundials_context.c @@ -0,0 +1,205 @@ +/* ----------------------------------------------------------------- + * Programmer(s): Cody J. Balos @ LLNL + * ----------------------------------------------------------------- + * SUNDIALS Copyright Start + * Copyright (c) 2002-2022, Lawrence Livermore National Security + * and Southern Methodist University. + * All rights reserved. + * + * See the top-level LICENSE and NOTICE files for details. + * + * SPDX-License-Identifier: BSD-3-Clause + * SUNDIALS Copyright End + * ----------------------------------------------------------------- + * SUNDIALS context class. A context object holds data that all + * SUNDIALS objects in a simulation share. + * ----------------------------------------------------------------*/ + +#include +#include +#include +#include +#include +#include + +#include "sundials_context_impl.h" +#include "sundials_debug.h" + +int SUNContext_Create(void* comm, SUNContext* sunctx) +{ + SUNProfiler profiler = NULL; + SUNLogger logger = NULL; + +#if defined(SUNDIALS_BUILD_WITH_PROFILING) && !defined(SUNDIALS_CALIPER_ENABLED) + if (SUNProfiler_Create(comm, "SUNContext Default", &profiler)) return (-1); +#endif + +#if SUNDIALS_LOGGING_LEVEL > 0 +#if defined(SUNDIALS_LOGGING_ENABLE_MPI) + if (SUNLogger_CreateFromEnv(comm, &logger)) +#else + if (SUNLogger_CreateFromEnv(NULL, &logger)) +#endif + { + return (-1); + } +#else + if (SUNLogger_Create(NULL, 0, &logger)) + { + return (-1); + } + SUNLogger_SetErrorFilename(logger, ""); + SUNLogger_SetWarningFilename(logger, ""); + SUNLogger_SetInfoFilename(logger, ""); + SUNLogger_SetDebugFilename(logger, ""); +#endif + + *sunctx = NULL; + *sunctx = (SUNContext)malloc(sizeof(struct _SUNContext)); + + if (*sunctx == NULL) + { +#if defined(SUNDIALS_BUILD_WITH_PROFILING) && !defined(SUNDIALS_CALIPER_ENABLED) + SUNProfiler_Free(&profiler); +#endif + SUNLogger_Destroy(&logger); + return (-1); + } + + (*sunctx)->logger = logger; + (*sunctx)->own_logger = logger != NULL; + (*sunctx)->profiler = profiler; + (*sunctx)->own_profiler = profiler != NULL; + + return (0); +} + +int SUNContext_GetProfiler(SUNContext sunctx, SUNProfiler* profiler) +{ + if (sunctx == NULL) + { + return (-1); + } + +#ifdef SUNDIALS_BUILD_WITH_PROFILING + /* get profiler */ + *profiler = sunctx->profiler; +#else + *profiler = NULL; +#endif + + return (0); +} + +int SUNContext_SetProfiler(SUNContext sunctx, SUNProfiler profiler) +{ + if (sunctx == NULL) + { + return (-1); + } + +#ifdef SUNDIALS_BUILD_WITH_PROFILING + /* free any existing profiler */ + if (sunctx->profiler && sunctx->own_profiler) + { + if (SUNProfiler_Free(&(sunctx->profiler))) return (-1); + sunctx->profiler = NULL; + } + + /* set profiler */ + sunctx->profiler = profiler; + sunctx->own_profiler = SUNFALSE; +#endif + + return (0); +} + +int SUNContext_GetLogger(SUNContext sunctx, SUNLogger* logger) +{ + if (sunctx == NULL) + { + return (-1); + } + + /* get logger */ + *logger = sunctx->logger; + + return (0); +} + +int SUNContext_SetLogger(SUNContext sunctx, SUNLogger logger) +{ + if (sunctx == NULL) + { + return (-1); + } + + /* free any existing logger */ + if (sunctx->logger && sunctx->own_logger) + { + if (SUNLogger_Destroy(&(sunctx->logger))) + { + return (-1); + } + sunctx->logger = NULL; + } + + /* set logger */ + sunctx->logger = logger; + sunctx->own_logger = SUNFALSE; + + return (0); +} + +int SUNContext_Free(SUNContext* sunctx) +{ +#if defined(SUNDIALS_BUILD_WITH_PROFILING) && !defined(SUNDIALS_CALIPER_ENABLED) + FILE* fp; + char* sunprofiler_print_env; +#endif + + if (!sunctx) + { + return (0); + } + if (!(*sunctx)) + { + return (0); + } + +#if defined(SUNDIALS_BUILD_WITH_PROFILING) && !defined(SUNDIALS_CALIPER_ENABLED) + /* Find out where we are printing to */ + sunprofiler_print_env = getenv("SUNPROFILER_PRINT"); + fp = NULL; + if (sunprofiler_print_env) + { + if (!strcmp(sunprofiler_print_env, "0")) + fp = NULL; + else if (!strcmp(sunprofiler_print_env, "1") || + !strcmp(sunprofiler_print_env, "TRUE") || + !strcmp(sunprofiler_print_env, "stdout")) + fp = stdout; + else + fp = fopen(sunprofiler_print_env, "a"); + } + + /* Enforce that the profiler is freed before finalizing, + if it is not owned by the sunctx. */ + if ((*sunctx)->profiler) + { + if (fp) SUNProfiler_Print((*sunctx)->profiler, fp); + if (fp) fclose(fp); + if ((*sunctx)->own_profiler) SUNProfiler_Free(&(*sunctx)->profiler); + } +#endif + + if ((*sunctx)->logger && (*sunctx)->own_logger) + { + SUNLogger_Destroy(&(*sunctx)->logger); + } + + free(*sunctx); + *sunctx = NULL; + + return (0); +} diff --git a/src/lib/sundials/sundials_context_impl.h b/src/lib/sundials/sundials_context_impl.h new file mode 100644 index 0000000..28083aa --- /dev/null +++ b/src/lib/sundials/sundials_context_impl.h @@ -0,0 +1,39 @@ +/* ----------------------------------------------------------------- + * Programmer(s): Cody J. Balos @ LLNL + * ----------------------------------------------------------------- + * SUNDIALS Copyright Start + * Copyright (c) 2002-2022, Lawrence Livermore National Security + * and Southern Methodist University. + * All rights reserved. + * + * See the top-level LICENSE and NOTICE files for details. + * + * SPDX-License-Identifier: BSD-3-Clause + * SUNDIALS Copyright End + * ----------------------------------------------------------------- + * SUNDIALS context class implementation. + * ----------------------------------------------------------------*/ + +#ifndef _SUNDIALS_CONTEXT_IMPL_H +#define _SUNDIALS_CONTEXT_IMPL_H + +#include +#include +#include +#include + +#ifdef __cplusplus /* wrapper to enable C++ usage */ +extern "C" { +#endif + +struct _SUNContext { + SUNProfiler profiler; + booleantype own_profiler; + SUNLogger logger; + booleantype own_logger; +}; + +#ifdef __cplusplus +} +#endif +#endif diff --git a/src/lib/sundials/sundials_cuda.h b/src/lib/sundials/sundials_cuda.h new file mode 100644 index 0000000..207d020 --- /dev/null +++ b/src/lib/sundials/sundials_cuda.h @@ -0,0 +1,116 @@ +/* + * ----------------------------------------------------------------- + * Programmer(s): Cody J. Balos @ LLNL + * ----------------------------------------------------------------- + * SUNDIALS Copyright Start + * Copyright (c) 2002-2022, Lawrence Livermore National Security + * and Southern Methodist University. + * All rights reserved. + * + * See the top-level LICENSE and NOTICE files for details. + * + * SPDX-License-Identifier: BSD-3-Clause + * SUNDIALS Copyright End + * ----------------------------------------------------------------- + * This header files defines internal utility functions and macros + * for working with CUDA. + * ----------------------------------------------------------------- + */ + +#include +#include + +#include +#include +#include + +#include + +#ifndef _SUNDIALS_CUDA_H +#define _SUNDIALS_CUDA_H + +#ifdef __cplusplus /* wrapper to enable C++ usage */ +extern "C" { +#endif + +/* --------------------------------------------------------------------------- + * Utility macros + * ---------------------------------------------------------------------------*/ + +#define SUNDIALS_CUDA_VERIFY(cuerr) SUNDIALS_CUDA_Assert(cuerr, __FILE__, __LINE__) +#define SUNDIALS_CUSPARSE_VERIFY(cuerr) SUNDIALS_CUSPARSE_Assert(cuerr, __FILE__, __LINE__) +#define SUNDIALS_CUSOLVER_VERIFY(cuerr) SUNDIALS_CUSOLVER_Assert(cuerr, __FILE__, __LINE__) + +#define SUNDIALS_KERNEL_NAME(...) __VA_ARGS__ +#ifndef SUNDIALS_DEBUG_CUDA_LASTERROR +#define SUNDIALS_LAUNCH_KERNEL(kernel, gridDim, blockDim, shMem, stream, ...) \ +{ kernel<<>>(__VA_ARGS__); } +#else +#define SUNDIALS_LAUNCH_KERNEL(kernel, gridDim, blockDim, shMem, stream, ...) \ +{ \ + kernel<<>>(__VA_ARGS__); \ + cudaDeviceSynchronize(); \ + SUNDIALS_CUDA_VERIFY(cudaGetLastError()); \ +} +#endif + +/* --------------------------------------------------------------------------- + * Utility functions + * ---------------------------------------------------------------------------*/ + +inline booleantype SUNDIALS_CUDA_Assert(cudaError_t cuerr, const char *file, int line) +{ + if (cuerr != cudaSuccess) + { +#ifdef SUNDIALS_DEBUG + fprintf(stderr, + "ERROR in CUDA runtime operation: %s %s:%d\n", + cudaGetErrorString(cuerr), file, line); +#ifdef SUNDIALS_DEBUG_ASSERT + assert(false); +#endif +#endif + return SUNFALSE; /* Assert failed */ + } + return SUNTRUE; /* Assert OK */ +} + +inline booleantype SUNDIALS_CUSPARSE_Assert(cusparseStatus_t status, const char *file, int line) +{ + if (status != CUSPARSE_STATUS_SUCCESS) + { +#ifdef SUNDIALS_DEBUG + fprintf(stderr, + "ERROR in cuSPARSE runtime operation: cusparseStatus_t = %d %s:%d\n", + status, file, line); +#ifdef SUNDIALS_DEBUG_ASSERT + assert(false); +#endif +#endif + return SUNFALSE; /* Assert failed */ + } + return SUNTRUE; /* Assert OK */ +} + +inline booleantype SUNDIALS_CUSOLVER_Assert(cusolverStatus_t status, const char *file, int line) +{ + if (status != CUSOLVER_STATUS_SUCCESS) + { +#ifdef SUNDIALS_DEBUG + fprintf(stderr, + "ERROR in cuSOLVER runtime operation: cusolverStatus_t = %d %s:%d\n", + status, file, line); +#ifdef SUNDIALS_DEBUG_ASSERT + assert(false); +#endif +#endif + return SUNFALSE; /* Assert failed */ + } + return SUNTRUE; /* Assert OK */ +} + +#ifdef __cplusplus /* wrapper to enable C++ usage */ +} +#endif + +#endif /* _SUNDIALS_CUDA_H */ \ No newline at end of file diff --git a/src/lib/sundials/sundials_cuda_kernels.cuh b/src/lib/sundials/sundials_cuda_kernels.cuh new file mode 100644 index 0000000..9743026 --- /dev/null +++ b/src/lib/sundials/sundials_cuda_kernels.cuh @@ -0,0 +1,493 @@ +/* + * ----------------------------------------------------------------- + * Programmer(s): Cody J. Balos @ LLNL + * ----------------------------------------------------------------- + * SUNDIALS Copyright Start + * Copyright (c) 2002-2022, Lawrence Livermore National Security + * and Southern Methodist University. + * All rights reserved. + * + * See the top-level LICENSE and NOTICE files for details. + * + * SPDX-License-Identifier: BSD-3-Clause + * SUNDIALS Copyright End + * ----------------------------------------------------------------- + */ + +#ifndef _SUNDIALS_CUDA_KERNELS_CUH +#define _SUNDIALS_CUDA_KERNELS_CUH + +#define SUNDIALS_HOST_DEVICE __host__ __device__ +#define SUNDIALS_DEVICE_INLINE __forceinline__ +#include "sundials_reductions.hpp" + +#define GRID_STRIDE_XLOOP(type, iter, max) \ + for (type iter = blockDim.x * blockIdx.x + threadIdx.x; \ + iter < max; \ + iter += blockDim.x * gridDim.x) + +#include "sundials_cuda.h" + +namespace sundials +{ +namespace cuda +{ +namespace impl +{ + +template +__forceinline__ __device__ T shfl_xor_sync(T var, int laneMask); + +template +__forceinline__ __device__ T shfl_sync(T var, int srcLane); + +template +__forceinline__ __device__ T shfl_down_sync(T var, int srcLane); + +template <> +__forceinline__ __device__ float shfl_xor_sync(float var, int laneMask) +{ + return ::__shfl_xor_sync(0xFFFFFFFF, var, laneMask); +} + +template <> +__forceinline__ __device__ double shfl_xor_sync(double var, int laneMask) +{ + return ::__shfl_xor_sync(0xFFFFFFFF, var, laneMask); +} + +template <> +__forceinline__ __device__ float shfl_sync(float var, int srcLane) +{ + return ::__shfl_sync(0xFFFFFFFF, var, srcLane); +} + +template <> +__forceinline__ __device__ double shfl_sync(double var, int srcLane) +{ + return ::__shfl_sync(0xFFFFFFFF, var, srcLane); +} + +template<> +__forceinline__ __device__ float shfl_down_sync(float val, int srcLane) +{ + return ::__shfl_down_sync(0xFFFFFFFF, val, srcLane); +} + +template<> +__forceinline__ __device__ double shfl_down_sync(double val, int srcLane) +{ + return ::__shfl_down_sync(0xFFFFFFFF, val, srcLane); +} + +/* The atomic functions below are implemented using the atomic compare and swap + function atomicCAS which performs an atomic version of + (*address == assumed) ? (assumed + val) : *address. Since *address could change + between when the value is loaded and the atomicCAS call the operation is repeated + until *address does not change between the read and the compare and swap operation. */ + + __forceinline__ __device__ +double atomicAdd(double* address, double val) +{ +#if __CUDA_ARCH__ < 600 + unsigned long long int* address_as_ull = (unsigned long long int*)address; + unsigned long long int old = *address_as_ull, assumed; + + do { + assumed = old; + old = atomicCAS(address_as_ull, assumed, + __double_as_longlong(val + + __longlong_as_double(assumed))); + // Note: uses integer comparison to avoid hang in case of NaN (since NaN != NaN) + } while (assumed != old); + + return __longlong_as_double(old); +#else + return ::atomicAdd(address, val); +#endif +} + +__forceinline__ __device__ +float atomicAdd(float* address, float val) +{ +#if __CUDA_ARCH__ < 600 + unsigned int* address_as_ull = (unsigned int*)address; + unsigned int old = *address_as_ull, assumed; + + do { + assumed = old; + old = atomicCAS(address_as_ull, assumed, + __float_as_int(val + + __int_as_float(assumed))); + // Note: uses integer comparison to avoid hang in case of NaN (since NaN != NaN) + } while (assumed != old); + + return __int_as_float(old); +#else + return ::atomicAdd(address, val); +#endif +} + +/* + * Compute the maximum of 2 double-precision floating point values using an atomic operation + * "address" is the address of the reference value which might get updated with the maximum + * "value" is the value that is compared to the reference in order to determine the maximum + */ +__forceinline__ __device__ +void atomicMax(double* const address, const double value) +{ + if (*address >= value) + { + return; + } + + unsigned long long * const address_as_i = (unsigned long long *)address; + unsigned long long old = * address_as_i, assumed; + + do + { + assumed = old; + if (__longlong_as_double(assumed) >= value) + { + break; + } + old = atomicCAS(address_as_i, assumed, __double_as_longlong(value)); + } while (assumed != old); +} + +/* + * Compute the maximum of 2 single-precision floating point values using an atomic operation + * "address" is the address of the reference value which might get updated with the maximum + * "value" is the value that is compared to the reference in order to determine the maximum + */ + __forceinline__ __device__ +void atomicMax(float* const address, const float value) +{ + if (*address >= value) + { + return; + } + + unsigned int* const address_as_i = (unsigned int *)address; + unsigned int old = *address_as_i, assumed; + + do + { + assumed = old; + if (__int_as_float(assumed) >= value) + { + break; + } + old = atomicCAS(address_as_i, assumed, __float_as_int(value)); + } while (assumed != old); +} + +/* + * Compute the minimum of 2 double-precision floating point values using an atomic operation + * "address" is the address of the reference value which might get updated with the minimum + * "value" is the value that is compared to the reference in order to determine the minimum + */ +__forceinline__ __device__ +void atomicMin(double* const address, const double value) +{ + if (*address <= value) + { + return; + } + + unsigned long long* const address_as_i = (unsigned long long *)address; + unsigned long long old = *address_as_i, assumed; + + do + { + assumed = old; + if (__longlong_as_double(assumed) <= value) + { + break; + } + old = atomicCAS(address_as_i, assumed, __double_as_longlong(value)); + } while (assumed != old); +} + +/* + * Compute the minimum of 2 single-precision floating point values using an atomic operation + * "address" is the address of the reference value which might get updated with the minimum + * "value" is the value that is compared to the reference in order to determine the minimum + */ +__forceinline__ __device__ +void atomicMin(float* const address, const float value) +{ + if (*address <= value) + { + return; + } + + unsigned int* const address_as_i = (unsigned int *)address; + unsigned int old = *address_as_i, assumed; + + do + { + assumed = old; + if (__int_as_float(assumed) <= value) + { + break; + } + old = atomicCAS(address_as_i, assumed, __float_as_int(value)); + } while (assumed != old); +} + +// +// Atomic specializations of sundials::reduction operators +// + +template +struct atomic; + +template +struct atomic> { + __device__ __forceinline__ void operator()(T* out, const T val) + { + atomicAdd(out, val); + } +}; + +template +struct atomic> { + __device__ __forceinline__ void operator()(T* out, const T val) + { + atomicMax(out, val); + } +}; + +template +struct atomic> { + __device__ __forceinline__ void operator()(T* out, const T val) + { + atomicMin(out, val); + } +}; + + +/* + * Perform a reduce on the warp to get the operation result. + */ +template +__inline__ __device__ +T warpReduceShflDown(T val) +{ + for (int offset = warpSize/2; offset > 0; offset /= 2) + { + T rhs = shfl_down_sync(val, offset); + val = BinaryReductionOp{}(val, rhs); + } + return val; +} + +/* + * Reduce value across the thread block. + */ +template +__inline__ __device__ +T blockReduceShflDown(T val, T identity) +{ + // Shared memory for the partial sums + static __shared__ T shared[MAX_WARPS]; + + int numThreads = blockDim.x * blockDim.y * blockDim.z; + + int threadId = threadIdx.x + blockDim.x * threadIdx.y + + (blockDim.x * blockDim.y) * threadIdx.z; + + int warpId = threadId / WARP_SIZE; + int warpLane = threadId % WARP_SIZE; + + // Each warp performs partial reduction + val = warpReduceShflDown(val); + + // Write reduced value from each warp to shared memory + if (warpLane == 0) shared[warpId] = val; + + // Wait for all partial reductions to complete + __syncthreads(); + + // Read per warp values from shared memory only if that warp existed + val = (threadId < numThreads / warpSize) ? shared[warpLane] : identity; + + // Final reduce within first warp + if (warpId == 0) + val = warpReduceShflDown(val); + + return val; +} + +/* + * Warp reduce + block reduce using shfl instead of shfl_down. + */ +template +__inline__ __device__ +T blockReduceShfl(T val, T identity) +{ + int numThreads = blockDim.x * blockDim.y * blockDim.z; + + int threadId = threadIdx.x + blockDim.x * threadIdx.y + + (blockDim.x * blockDim.y) * threadIdx.z; + + int warpId = threadId / WARP_SIZE; + int warpLane = threadId % WARP_SIZE; + + T temp = val; + + // Reduce each warp + if (numThreads % WARP_SIZE == 0) + { + for (int i = 1; i < WARP_SIZE; i *= 2) + { + T rhs = shfl_xor_sync(temp, i); + temp = BinaryReductionOp{}(temp, rhs); + } + } + else + { + for (int i = 1; i < WARP_SIZE; i *= 2) + { + int srcLane = threadId ^ i; + T rhs = shfl_sync(temp, srcLane); + // Only add from threads that exist to avoid double counting + if (srcLane < numThreads) + temp = BinaryReductionOp{}(temp, rhs); + } + } + + // Reduce per warp values + if (numThreads > WARP_SIZE) + { + static_assert(MAX_WARPS <= WARP_SIZE, "max warps must be <= warp size for this algorithm to work"); + + __shared__ T shared[MAX_WARPS]; + + // Write per warp values to shared memory + if (warpLane == 0) + shared[warpId] = temp; + + __syncthreads(); + + if (warpId == 0) + { + // Read per warp values only if the warp existed + temp = (warpLane * WARP_SIZE < numThreads) ? shared[warpLane] : identity; + + // Final reduction + for (int i = 1; i < MAX_WARPS; i *= 2) + { + T rhs = shfl_xor_sync(temp, i); + temp = BinaryReductionOp{}(temp, rhs); + } + } + + __syncthreads(); + } + + return temp; +} + +/* + * Reduce values into thread 0 of the last running thread block. + * Output value is device_mem[0]. + */ +template +__device__ __forceinline__ void gridReduce(T val, + T identity, + T* device_mem, + unsigned int* device_count) +{ + int numBlocks = gridDim.x * gridDim.y * gridDim.z; + int numThreads = blockDim.x * blockDim.y * blockDim.z; + unsigned int wrap_around = numBlocks - 1; + + int blockId = blockIdx.x + gridDim.x * blockIdx.y + + (gridDim.x * gridDim.y) * blockIdx.z; + + int threadId = threadIdx.x + blockDim.x * threadIdx.y + + (blockDim.x * blockDim.y) * threadIdx.z; + + // Each block reduces a subset of the input + T temp = blockReduceShfl(val, identity); + + __shared__ bool isLastBlockDone; + if (threadId == 0) + { + // One thread per block stores the partial reductions to global memory + device_mem[blockId] = temp; + + // Ensure write visible to all threads + __threadfence(); + + // Increment counter, (wraps back to zero if old count == wrap_around) + unsigned int old_count = atomicInc(device_count, wrap_around); + isLastBlockDone = (old_count == wrap_around) ? 1 : 0; + } + + // Synchronize to ensure that each thread reads the + // correct value of isLastBlockDone. + __syncthreads(); + + // The last block reduces values in device_mem + if (isLastBlockDone) + { + // Reduce thread_i in each block into temp + temp = identity; + for (int i = threadId; i < numBlocks; i += numThreads) + temp = BinaryReductionOp{}(temp, device_mem[i]); + + // Compute the final block partial reductions + temp = blockReduceShfl(temp, identity); + + // One thread returns the final value + if (threadId == 0) + device_mem[0] = temp; + } +} + +template +__device__ __forceinline__ void gridReduceAtomic(T val, + T identity, + T* device_mem) +{ + int threadId = threadIdx.x + blockDim.x * threadIdx.y + + (blockDim.x * blockDim.y) * threadIdx.z; + val = blockReduceShflDown(val, identity); + // Final reduction of all block values into the output device_mem + if (threadId == 0) + atomic{}(device_mem, val); +} + +template +struct GridReducerLDS +{ + __device__ __forceinline__ void operator()(T val, + T identity, + T* device_mem, + unsigned int* device_count) + { + return gridReduce(val, identity, device_mem, device_count); + } +}; + + +template +struct GridReducerAtomic +{ + __device__ __forceinline__ void operator()(T val, + T identity, + T* device_mem, + unsigned int* device_count) + { + return gridReduceAtomic(val, identity, device_mem); + } +}; + +} // namespace impl +} // namespace cuda +} // namespace sundials + +#endif // _SUNDIALS_CUDA_KERNELS_CUH \ No newline at end of file diff --git a/src/lib/sundials/sundials_debug.h b/src/lib/sundials/sundials_debug.h new file mode 100644 index 0000000..2c44c1b --- /dev/null +++ b/src/lib/sundials/sundials_debug.h @@ -0,0 +1,51 @@ +/* ----------------------------------------------------------------- + * Programmer(s): Cody J. Balos @ LLNL + * ----------------------------------------------------------------- + * SUNDIALS Copyright Start + * Copyright (c) 2002-2022, Lawrence Livermore National Security + * and Southern Methodist University. + * All rights reserved. + * + * See the top-level LICENSE and NOTICE files for details. + * + * SPDX-License-Identifier: BSD-3-Clause + * SUNDIALS Copyright End + * ----------------------------------------------------------------- + * This header files defines internal utility functions and macros + * for SUNDIALS debugging. + * -----------------------------------------------------------------*/ + +#ifndef _SUNDIALS_DEBUG_H +#define _SUNDIALS_DEBUG_H + +#include + +#ifdef __cplusplus /* wrapper to enable C++ usage */ +extern "C" { +#endif + +/* + * Macro which prints to stderr when in debug mode + */ +#ifdef SUNDIALS_DEBUG +#define SUNDIALS_DEBUG_PRINT(str) fprintf(stderr, str) +#else +#define SUNDIALS_DEBUG_PRINT(str) +#endif + +/* + * Macro which prints error messages in debug mode + */ +#ifdef SUNDIALS_DEBUG +#define SUNDIALS_DEBUG_ERROR(msg) \ + fprintf(stderr, "ERROR in %s (%s line %d): %s", \ + __func__, __FILE__, __LINE__, msg); +#else +#define SUNDIALS_DEBUG_ERROR(msg) +#endif + +#ifdef __cplusplus /* wrapper to enable C++ usage */ +} +#endif + +#endif /* _SUNDIALS_DEBUG_H */ diff --git a/src/lib/sundials/sundials_dense.c b/src/lib/sundials/sundials_dense.c index debaf7f..aa11085 100644 --- a/src/lib/sundials/sundials_dense.c +++ b/src/lib/sundials/sundials_dense.c @@ -7,7 +7,7 @@ * Radu Serban @ LLNL * ----------------------------------------------------------------- * SUNDIALS Copyright Start - * Copyright (c) 2002-2019, Lawrence Livermore National Security + * Copyright (c) 2002-2022, Lawrence Livermore National Security * and Southern Methodist University. * All rights reserved. * @@ -19,7 +19,7 @@ * This is the implementation file for a generic package of dense * matrix operations. * ----------------------------------------------------------------- - */ + */ #include #include @@ -33,56 +33,106 @@ /* * ----------------------------------------------------- - * Functions working on DlsMat + * Functions working on SUNDlsMat * ----------------------------------------------------- */ -sunindextype DenseGETRF(DlsMat A, sunindextype *p) +sunindextype SUNDlsMat_DenseGETRF(SUNDlsMat A, sunindextype *p) { - return(denseGETRF(A->cols, A->M, A->N, p)); + return(SUNDlsMat_denseGETRF(A->cols, A->M, A->N, p)); } -void DenseGETRS(DlsMat A, sunindextype *p, realtype *b) +sunindextype DenseGETRF(SUNDlsMat A, sunindextype *p) { - denseGETRS(A->cols, A->N, p, b); + return(SUNDlsMat_denseGETRF(A->cols, A->M, A->N, p)); } -sunindextype DensePOTRF(DlsMat A) +void SUNDlsMat_DenseGETRS(SUNDlsMat A, sunindextype *p, realtype *b) { - return(densePOTRF(A->cols, A->M)); + SUNDlsMat_denseGETRS(A->cols, A->N, p, b); } -void DensePOTRS(DlsMat A, realtype *b) +void DenseGETRS(SUNDlsMat A, sunindextype *p, realtype *b) { - densePOTRS(A->cols, A->M, b); + SUNDlsMat_denseGETRS(A->cols, A->N, p, b); } -int DenseGEQRF(DlsMat A, realtype *beta, realtype *wrk) +sunindextype SUNDlsMat_DensePOTRF(SUNDlsMat A) { - return(denseGEQRF(A->cols, A->M, A->N, beta, wrk)); + return(SUNDlsMat_densePOTRF(A->cols, A->M)); } -int DenseORMQR(DlsMat A, realtype *beta, realtype *vn, realtype *vm, realtype *wrk) +sunindextype DensePOTRF(SUNDlsMat A) { - return(denseORMQR(A->cols, A->M, A->N, beta, vn, vm, wrk)); + return(SUNDlsMat_densePOTRF(A->cols, A->M)); } -void DenseCopy(DlsMat A, DlsMat B) +void SUNDlsMat_DensePOTRS(SUNDlsMat A, realtype *b) { - denseCopy(A->cols, B->cols, A->M, A->N); + SUNDlsMat_densePOTRS(A->cols, A->M, b); } -void DenseScale(realtype c, DlsMat A) +void DensePOTRS(SUNDlsMat A, realtype *b) { - denseScale(c, A->cols, A->M, A->N); + SUNDlsMat_densePOTRS(A->cols, A->M, b); } -void DenseMatvec(DlsMat A, realtype *x, realtype *y) +int SUNDlsMat_DenseGEQRF(SUNDlsMat A, realtype *beta, realtype *wrk) { - denseMatvec(A->cols, x, y, A->M, A->N); + return(SUNDlsMat_denseGEQRF(A->cols, A->M, A->N, beta, wrk)); +} + +int DenseGEQRF(SUNDlsMat A, realtype *beta, realtype *wrk) +{ + return(SUNDlsMat_denseGEQRF(A->cols, A->M, A->N, beta, wrk)); +} + +int SUNDlsMat_DenseORMQR(SUNDlsMat A, realtype *beta, realtype *vn, realtype *vm, realtype *wrk) +{ + return(SUNDlsMat_denseORMQR(A->cols, A->M, A->N, beta, vn, vm, wrk)); +} + +int DenseORMQR(SUNDlsMat A, realtype *beta, realtype *vn, realtype *vm, realtype *wrk) +{ + return(SUNDlsMat_denseORMQR(A->cols, A->M, A->N, beta, vn, vm, wrk)); +} + +void SUNDlsMat_DenseCopy(SUNDlsMat A, SUNDlsMat B) +{ + SUNDlsMat_denseCopy(A->cols, B->cols, A->M, A->N); +} + +void DenseCopy(SUNDlsMat A, SUNDlsMat B) +{ + SUNDlsMat_denseCopy(A->cols, B->cols, A->M, A->N); +} + +void SUNDlsMat_DenseScale(realtype c, SUNDlsMat A) +{ + SUNDlsMat_denseScale(c, A->cols, A->M, A->N); +} + +void DenseScale(realtype c, SUNDlsMat A) +{ + SUNDlsMat_denseScale(c, A->cols, A->M, A->N); +} + +void SUNDlsMat_DenseMatvec(SUNDlsMat A, realtype *x, realtype *y) +{ + SUNDlsMat_denseMatvec(A->cols, x, y, A->M, A->N); +} + +void DenseMatvec(SUNDlsMat A, realtype *x, realtype *y) +{ + SUNDlsMat_denseMatvec(A->cols, x, y, A->M, A->N); } sunindextype denseGETRF(realtype **a, sunindextype m, sunindextype n, sunindextype *p) +{ + return(SUNDlsMat_denseGETRF(a, m, n, p)); +} + +sunindextype SUNDlsMat_denseGETRF(realtype **a, sunindextype m, sunindextype n, sunindextype *p) { sunindextype i, j, k, l; realtype *col_j, *col_k; @@ -101,8 +151,8 @@ sunindextype denseGETRF(realtype **a, sunindextype m, sunindextype n, sunindexty /* check for zero pivot element */ if (col_k[l] == ZERO) return(k+1); - - /* swap a(k,1:n) and a(l,1:n) if necessary */ + + /* swap a(k,1:n) and a(l,1:n) if necessary */ if ( l!= k ) { for (i=0; i0) { for(i=j; i=0; i--) { col_i = a[i]; - for (j=i+1; j= n) * using Householder reflections. * - * On exit, the elements on and above the diagonal of A contain the n by n - * upper triangular matrix R; the elements below the diagonal, with the array beta, + * On exit, the elements on and above the diagonal of A contain the n by n + * upper triangular matrix R; the elements below the diagonal, with the array beta, * represent the orthogonal matrix Q as a product of elementary reflectors . * * v (of length m) must be provided as workspace. @@ -259,6 +326,11 @@ void densePOTRS(realtype **a, sunindextype m, realtype *b) */ int denseGEQRF(realtype **a, sunindextype m, sunindextype n, realtype *beta, realtype *v) +{ + return(SUNDlsMat_denseGEQRF(a, m, n, beta, v)); +} + +int SUNDlsMat_denseGEQRF(realtype **a, sunindextype m, sunindextype n, realtype *beta, realtype *v) { realtype ajj, s, mu, v1, v1_2; realtype *col_j, *col_k; @@ -270,7 +342,7 @@ int denseGEQRF(realtype **a, sunindextype m, sunindextype n, realtype *beta, rea col_j = a[j]; ajj = col_j[j]; - + /* Compute the j-th Householder vector (of length m-j) */ v[0] = ONE; s = ZERO; @@ -286,7 +358,7 @@ int denseGEQRF(realtype **a, sunindextype m, sunindextype n, realtype *beta, rea beta[j] = TWO * v1_2 / (s + v1_2); for(i=1; i= n. * * v (of length m) must be provided as workspace. */ + int denseORMQR(realtype **a, sunindextype m, sunindextype n, realtype *beta, realtype *vn, realtype *vm, realtype *v) +{ + return(SUNDlsMat_denseORMQR(a, m, n, beta, vn, vm, v)); +} + +int SUNDlsMat_denseORMQR(realtype **a, sunindextype m, sunindextype n, realtype *beta, + realtype *vn, realtype *vm, realtype *v) { realtype *col_j, s; sunindextype i, j; @@ -350,6 +429,11 @@ int denseORMQR(realtype **a, sunindextype m, sunindextype n, realtype *beta, } void denseCopy(realtype **a, realtype **b, sunindextype m, sunindextype n) +{ + SUNDlsMat_denseCopy(a, b, m, n); +} + +void SUNDlsMat_denseCopy(realtype **a, realtype **b, sunindextype m, sunindextype n) { sunindextype i, j; realtype *a_col_j, *b_col_j; @@ -364,6 +448,11 @@ void denseCopy(realtype **a, realtype **b, sunindextype m, sunindextype n) } void denseScale(realtype c, realtype **a, sunindextype m, sunindextype n) +{ + SUNDlsMat_denseScale(c, a, m, n); +} + +void SUNDlsMat_denseScale(realtype c, realtype **a, sunindextype m, sunindextype n) { sunindextype i, j; realtype *col_j; @@ -376,13 +465,23 @@ void denseScale(realtype c, realtype **a, sunindextype m, sunindextype n) } void denseAddIdentity(realtype **a, sunindextype n) +{ + SUNDlsMat_denseAddIdentity(a, n); +} + +void SUNDlsMat_denseAddIdentity(realtype **a, sunindextype n) { sunindextype i; - + for (i=0; i < n; i++) a[i][i] += ONE; } void denseMatvec(realtype **a, realtype *x, realtype *y, sunindextype m, sunindextype n) +{ + SUNDlsMat_denseMatvec(a, x, y, m, n); +} + +void SUNDlsMat_denseMatvec(realtype **a, realtype *x, realtype *y, sunindextype m, sunindextype n) { sunindextype i, j; realtype *col_j; diff --git a/src/lib/sundials/sundials_direct.c b/src/lib/sundials/sundials_direct.c index 6d2dab6..eb5eba5 100644 --- a/src/lib/sundials/sundials_direct.c +++ b/src/lib/sundials/sundials_direct.c @@ -2,7 +2,7 @@ * Programmer: Radu Serban @ LLNL * ----------------------------------------------------------------- * SUNDIALS Copyright Start - * Copyright (c) 2002-2019, Lawrence Livermore National Security + * Copyright (c) 2002-2022, Lawrence Livermore National Security * and Southern Methodist University. * All rights reserved. * @@ -13,7 +13,7 @@ * ----------------------------------------------------------------- * This is the implementation file for operations to be used by a * generic direct linear solver. - * -----------------------------------------------------------------*/ + * -----------------------------------------------------------------*/ #include #include @@ -24,17 +24,22 @@ #define ZERO RCONST(0.0) #define ONE RCONST(1.0) -DlsMat NewDenseMat(sunindextype M, sunindextype N) +SUNDlsMat NewDenseMat(sunindextype M, sunindextype N) { - DlsMat A; + return(SUNDlsMat_NewDenseMat(M, N)); +} + +SUNDlsMat SUNDlsMat_NewDenseMat(sunindextype M, sunindextype N) +{ + SUNDlsMat A; sunindextype j; if ( (M <= 0) || (N <= 0) ) return(NULL); A = NULL; - A = (DlsMat) malloc(sizeof *A); + A = (SUNDlsMat) malloc(sizeof *A); if (A==NULL) return (NULL); - + A->data = (realtype *) malloc(M * N * sizeof(realtype)); if (A->data == NULL) { free(A); A = NULL; @@ -59,7 +64,12 @@ DlsMat NewDenseMat(sunindextype M, sunindextype N) return(A); } -realtype **newDenseMat(sunindextype m, sunindextype n) +realtype** newDenseMat(sunindextype m, sunindextype n) +{ + return(SUNDlsMat_newDenseMat(m, n)); +} + +realtype** SUNDlsMat_newDenseMat(sunindextype m, sunindextype n) { sunindextype j; realtype **a; @@ -83,15 +93,20 @@ realtype **newDenseMat(sunindextype m, sunindextype n) } -DlsMat NewBandMat(sunindextype N, sunindextype mu, sunindextype ml, sunindextype smu) +SUNDlsMat NewBandMat(sunindextype N, sunindextype mu, sunindextype ml, sunindextype smu) { - DlsMat A; + return(SUNDlsMat_NewBandMat(N, mu, ml, smu)); +} + +SUNDlsMat SUNDlsMat_NewBandMat(sunindextype N, sunindextype mu, sunindextype ml, sunindextype smu) +{ + SUNDlsMat A; sunindextype j, colSize; if (N <= 0) return(NULL); - + A = NULL; - A = (DlsMat) malloc(sizeof *A); + A = (SUNDlsMat) malloc(sizeof *A); if (A == NULL) return (NULL); colSize = smu + ml + 1; @@ -125,7 +140,12 @@ DlsMat NewBandMat(sunindextype N, sunindextype mu, sunindextype ml, sunindextype return(A); } -realtype **newBandMat(sunindextype n, sunindextype smu, sunindextype ml) +realtype** newBandMat(sunindextype n, sunindextype smu, sunindextype ml) +{ + return(SUNDlsMat_newBandMat(n, smu, ml)); +} + +realtype** SUNDlsMat_newBandMat(sunindextype n, sunindextype smu, sunindextype ml) { realtype **a; sunindextype j, colSize; @@ -149,7 +169,12 @@ realtype **newBandMat(sunindextype n, sunindextype smu, sunindextype ml) return(a); } -void DestroyMat(DlsMat A) +void DestroyMat(SUNDlsMat A) +{ + SUNDlsMat_DestroyMat(A); +} + +void SUNDlsMat_DestroyMat(SUNDlsMat A) { free(A->data); A->data = NULL; free(A->cols); @@ -157,12 +182,22 @@ void DestroyMat(DlsMat A) } void destroyMat(realtype **a) +{ + SUNDlsMat_destroyMat(a); +} + +void SUNDlsMat_destroyMat(realtype **a) { free(a[0]); a[0] = NULL; free(a); a = NULL; } -int *NewIntArray(int N) +int* NewIntArray(int N) +{ + return(SUNDlsMat_NewIntArray(N)); +} + +int* SUNDlsMat_NewIntArray(int N) { int *vec; @@ -174,7 +209,12 @@ int *NewIntArray(int N) return(vec); } -int *newIntArray(int n) +int* newIntArray(int N) +{ + return(SUNDlsMat_newIntArray(N)); +} + +int* SUNDlsMat_newIntArray(int n) { int *v; @@ -186,7 +226,12 @@ int *newIntArray(int n) return(v); } -sunindextype *NewIndexArray(sunindextype N) +sunindextype* NewIndexArray(sunindextype N) +{ + return(SUNDlsMat_NewIndexArray(N)); +} + +sunindextype* SUNDlsMat_NewIndexArray(sunindextype N) { sunindextype *vec; @@ -198,7 +243,12 @@ sunindextype *NewIndexArray(sunindextype N) return(vec); } -sunindextype *newIndexArray(sunindextype n) +sunindextype* newIndexArray(sunindextype n) +{ + return(SUNDlsMat_newIndexArray(n)); +} + +sunindextype* SUNDlsMat_newIndexArray(sunindextype n) { sunindextype *v; @@ -210,7 +260,12 @@ sunindextype *newIndexArray(sunindextype n) return(v); } -realtype *NewRealArray(sunindextype N) +realtype* NewRealArray(sunindextype N) +{ + return(SUNDlsMat_NewRealArray(N)); +} + +realtype* SUNDlsMat_NewRealArray(sunindextype N) { realtype *vec; @@ -222,7 +277,12 @@ realtype *NewRealArray(sunindextype N) return(vec); } -realtype *newRealArray(sunindextype m) +realtype* newRealArray(sunindextype N) +{ + return(SUNDlsMat_newRealArray(N)); +} + +realtype* SUNDlsMat_newRealArray(sunindextype m) { realtype *v; @@ -234,20 +294,34 @@ realtype *newRealArray(sunindextype m) return(v); } -void DestroyArray(void *V) -{ - free(V); +void DestroyArray(void *p) +{ + SUNDlsMat_DestroyArray(p); +} + +void SUNDlsMat_DestroyArray(void *V) +{ + free(V); V = NULL; } -void destroyArray(void *v) +void destroyArray(void *p) { - free(v); + SUNDlsMat_destroyArray(p); +} + +void SUNDlsMat_destroyArray(void *v) +{ + free(v); v = NULL; } +void AddIdentity(SUNDlsMat A) +{ + SUNDlsMat_AddIdentity(A); +} -void AddIdentity(DlsMat A) +void SUNDlsMat_AddIdentity(SUNDlsMat A) { sunindextype i; @@ -265,8 +339,12 @@ void AddIdentity(DlsMat A) } +void SetToZero(SUNDlsMat A) +{ + SUNDlsMat_SetToZero(A); +} -void SetToZero(DlsMat A) +void SUNDlsMat_SetToZero(SUNDlsMat A) { sunindextype i, j, colSize; realtype *col_j; @@ -274,7 +352,7 @@ void SetToZero(DlsMat A) switch (A->type) { case SUNDIALS_DENSE: - + for (j=0; jN; j++) { col_j = A->cols[j]; for (i=0; iM; i++) @@ -298,8 +376,12 @@ void SetToZero(DlsMat A) } +void PrintMat(SUNDlsMat A, FILE *outfile) +{ + SUNDlsMat_PrintMat(A, outfile); +} -void PrintMat(DlsMat A, FILE *outfile) +void SUNDlsMat_PrintMat(SUNDlsMat A, FILE *outfile) { sunindextype i, j, start, finish; realtype **a; @@ -312,17 +394,17 @@ void PrintMat(DlsMat A, FILE *outfile) for (i=0; i < A->M; i++) { for (j=0; j < A->N; j++) { #if defined(SUNDIALS_EXTENDED_PRECISION) - fprintf(outfile, "%12Lg ", DENSE_ELEM(A,i,j)); + fprintf(outfile, "%12Lg ", SUNDLS_DENSE_ELEM(A,i,j)); #elif defined(SUNDIALS_DOUBLE_PRECISION) - fprintf(outfile, "%12g ", DENSE_ELEM(A,i,j)); + fprintf(outfile, "%12g ", SUNDLS_DENSE_ELEM(A,i,j)); #else - fprintf(outfile, "%12g ", DENSE_ELEM(A,i,j)); + fprintf(outfile, "%12g ", SUNDLS_DENSE_ELEM(A,i,j)); #endif } fprintf(outfile, "\n"); } fprintf(outfile, "\n"); - + break; case SUNDIALS_BAND: @@ -345,11 +427,9 @@ void PrintMat(DlsMat A, FILE *outfile) fprintf(outfile, "\n"); } fprintf(outfile, "\n"); - + break; } } - - diff --git a/src/lib/sundials/sundials_futils.c b/src/lib/sundials/sundials_futils.c new file mode 100644 index 0000000..129a360 --- /dev/null +++ b/src/lib/sundials/sundials_futils.c @@ -0,0 +1,29 @@ +/* ----------------------------------------------------------------- + * Programmer(s): Cody J. Balos + * ----------------------------------------------------------------- + * SUNDIALS Copyright Start + * Copyright (c) 2002-2022, Lawrence Livermore National Security + * and Southern Methodist University. + * All rights reserved. + * + * See the top-level LICENSE and NOTICE files for details. + * + * SPDX-License-Identifier: BSD-3-Clause + * SUNDIALS Copyright End + * ----------------------------------------------------------------- + * SUNDIALS Fortran 2003 interface utility implementations. + * -----------------------------------------------------------------*/ + +#include + +/* Create a file pointer with the given file name and mode. */ +FILE* SUNDIALSFileOpen(const char* filename, const char* mode) +{ + return fopen(filename, mode); +} + +/* Close a file pointer with the given file name. */ +void SUNDIALSFileClose(FILE* fp) +{ + fclose(fp); +} diff --git a/src/lib/sundials/sundials_hashmap.h b/src/lib/sundials/sundials_hashmap.h new file mode 100644 index 0000000..32da1e9 --- /dev/null +++ b/src/lib/sundials/sundials_hashmap.h @@ -0,0 +1,440 @@ +/* ----------------------------------------------------------------- + * Programmer: Cody J. Balos @ LLNL + * ----------------------------------------------------------------- + * SUNDIALS Copyright Start + * Copyright (c) 2002-2022, Lawrence Livermore National Security + * and Southern Methodist University. + * All rights reserved. + * + * See the top-level LICENSE and NOTICE files for details. + * + * SPDX-License-Identifier: BSD-3-Clause + * SUNDIALS Copyright End + * ----------------------------------------------------------------- + * A simple header-only hashmap implementation for char* keys and + * void* values. Uses linear probing to resolve collisions. + * The values can be anything, but will be freed by + * the hash map upon its destruction. + * -----------------------------------------------------------------*/ + +#ifndef _SUNDIALS_HASHMAP_H +#define _SUNDIALS_HASHMAP_H + +#include +#include + +static const unsigned long HASH_PRIME = 2216829733; /* 14695981039346656037U; */ +static const unsigned long HASH_OFFSET_BASIS = 435; /* 1099511628211U; */ + +/* + For a nice discussion on popular hashing algorithms see: + https://softwareengineering.stackexchange.com/questions/49550/which-hashing-algorithm-is-best-for-uniqueness-and-speed/145633#145633 + + This is a 64-bit implementation of the 'a' modification of the + Fowler–Noll–Vo hash (i.e., FNV1-a). + */ +static unsigned long fnv1a_hash(const char* str) +{ + unsigned long hash = HASH_OFFSET_BASIS; + char c; + while ((c = *str++)) + { + hash = (hash ^ c) * HASH_PRIME; + } + return hash; +} + +typedef struct _SUNHashMapKeyValue* SUNHashMapKeyValue; + +struct _SUNHashMapKeyValue { + const char* key; + void* value; +}; + +typedef struct _SUNHashMap* SUNHashMap; + +struct _SUNHashMap { + int size; /* current number of entries */ + int max_size; /* max number of entries */ + SUNHashMapKeyValue* buckets; +}; + +/* + This function creates a new SUNHashMap object allocated to hold + up to 'max_size' entries. + + **Arguments:** + * ``max_size`` -- the max number of entries in the hashmap + * ``map`` -- on input, a SUNHasMap pointer, on output the SUNHashMap will be + allocated + + **Returns:** + * ``0`` -- success + * ``-1`` -- an error occurred + */ +static int SUNHashMap_New(int max_size, SUNHashMap* map) +{ + int i; + + if (max_size <= 0) + { + return (-1); + } + + *map = NULL; + *map = (SUNHashMap)malloc(sizeof(struct _SUNHashMap)); + + if (map == NULL) + { + return (-1); + } + + (*map)->size = 0; + (*map)->max_size = max_size; + + (*map)->buckets = NULL; + (*map)->buckets = + (SUNHashMapKeyValue*)malloc(max_size * sizeof(SUNHashMapKeyValue)); + + if ((*map)->buckets == NULL) + { + free(*map); + return (-1); + } + + /* Initialize all buckets to NULL */ + for (i = 0; i < max_size; i++) + { + (*map)->buckets[i] = NULL; + } + + return (0); +} + +/* + This function frees the SUNHashMap object. + + **Arguments:** + * ``map`` -- on input, a SUNHasMap pointer, on output the SUNHashMap will be + deallocated and set to ``NULL`` + * ``freevalue`` -- callback function that should free the value object + + **Returns:** + * ``0`` -- success + * ``-1`` -- an error occurred + */ +static int SUNHashMap_Destroy(SUNHashMap* map, void (*freevalue)(void* ptr)) +{ + int i; + + if (map == NULL || freevalue == NULL) + { + return (-1); + } + + for (i = 0; i < (*map)->max_size; i++) + { + if ((*map)->buckets[i] && (*map)->buckets[i]->value) + { + freevalue((*map)->buckets[i]->value); + } + + if ((*map)->buckets[i]) + { + free((*map)->buckets[i]); + } + } + if ((*map)->buckets) + { + free((*map)->buckets); + } + if (*map) + { + free(*map); + } + *map = NULL; + + return (0); +} + +/* + This function iterates the map over the range [start, N]. N is either the + index at which ``yieldfn`` indicates the iteration should stop, or the max + entries in the map. + + **Arguments:** + * ``map`` -- the ``SUNHashMap`` object to operate on + * ``start`` -- the start of the iteration range + * ``yieldfn`` -- the callback function to call every iteration + this should return -1 to continue the iteration, or >= 0 to + stop; the first argument is the current index, the second + argument is the current key-value pair, and the final + argument is the same pointer ``ctx`` as the final argument + to SUNHashMapIterate. + * ``ctx`` -- a pointer to pass on to ``yieldfn`` + + **Returns:** + * ``max_size`` -- iterated the whole map + * ``>=0`` -- the index at which the iteration stopped + * ``<-1`` -- an error occurred + */ +static int SUNHashMap_Iterate(SUNHashMap map, int start, + int (*yieldfn)(int, SUNHashMapKeyValue, void*), + void* ctx) +{ + int i; + + if (map == NULL || yieldfn == NULL) + { + return (-2); + } + + for (i = start; i < map->max_size; i++) + { + int retval = yieldfn(i, map->buckets[i], ctx); + if (retval >= 0) + { + return (retval); /* yieldfn indicates the loop should break */ + } + if (retval < -1) + { + return (retval); /* error occurred */ + } + } + + return (map->max_size); +} + +static int sunHashMapLinearProbeInsert(int idx, SUNHashMapKeyValue kv, void* ctx) +{ + /* find the next open spot */ + if (kv == NULL) + { + return (idx); /* open spot found at idx */ + } + return (-1); /* keep looking */ +} + +/* + This function creates a key-value pair and attempts to insert it into the map. + Will use linear probing if there is a collision. + + **Arguments:** + * ``map`` -- the ``SUNHashMap`` object to operate on + * ``key`` -- the key to store + * ``value`` -- the value associated with the key + + **Returns:** + * ``0`` -- success + * ``-1`` -- an error occurred + * ``-2`` -- the map is full + */ +static int SUNHashMap_Insert(SUNHashMap map, const char* key, void* value) +{ + int idx; + int retval; + SUNHashMapKeyValue kvp; + + if (map == NULL || key == NULL || value == NULL) + { + return (-1); + } + + /* We want the index to be in (0, map->max_size) */ + idx = (int)(fnv1a_hash(key) % map->max_size); + + /* Check if the bucket is already filled */ + if (map->buckets[idx] != NULL) + { + /* Find the next open spot */ + retval = SUNHashMap_Iterate(map, idx, sunHashMapLinearProbeInsert, NULL); + if (retval < 0) + { + return (-1); /* error occurred */ + } + if (retval == map->max_size) + { + return (-2); /* no open entry */ + } + + idx = retval; + } + + /* Create the key-value pair */ + kvp = (SUNHashMapKeyValue)malloc(sizeof(struct _SUNHashMapKeyValue)); + if (kvp == NULL) + { + return (-1); + } + + kvp->key = key; + kvp->value = value; + + /* Insert the key-value pair */ + map->buckets[idx] = kvp; + map->size++; + + return (0); +} + +static int sunHashMapLinearProbeGet(int idx, SUNHashMapKeyValue kv, void* key) +{ + /* target key cannot be NULL */ + if (key == NULL) + { + return (-2); + } + + /* find the matching entry */ + if (kv == NULL) + { + return (-1); /* keep looking since this bucket is empty */ + } + if (!strcmp(kv->key, (const char*)key)) + { + return (idx); /* found it at idx */ + } + return (-1); /* keep looking */ +} + +/* + This function gets the value for the given key. + + **Arguments:** + * ``map`` -- the ``SUNHashMap`` object to operate on + * ``key`` -- the key to look up + * ``value`` -- the value associated with the key + + **Returns:** + * ``0`` -- success + * ``-1`` -- an error occurred + * ``-2`` -- key not found + */ +static int SUNHashMap_GetValue(SUNHashMap map, const char* key, void** value) +{ + int idx; + int retval; + + if (map == NULL || key == NULL || value == NULL) + { + return (-1); + } + + /* We want the index to be in (0, map->max_size) */ + idx = (int)(fnv1a_hash(key) % map->max_size); + + /* Check if the key exists */ + if (map->buckets[idx] == NULL) + { + return (-2); + } + + /* Check to see if this is a collision */ + if (strcmp(map->buckets[idx]->key, key)) + { + /* Keys did not match, so we have a collision and need to probe */ + retval = + SUNHashMap_Iterate(map, idx + 1, sunHashMapLinearProbeGet, (void*)key); + if (retval < 0) + { + return (-1); /* error occurred */ + } + if (retval == map->max_size) + { + return (-2); /* not found */ + } + } + + /* Return a reference to the value only */ + *value = map->buckets[idx]->value; + + return (0); +} + +/* + This function allocates a new array the same max_size as the map, + then it sorts map into a new array of key-value pairs leaving + the map unchanged. + + **Arguments:** + * ``map`` -- the ``SUNHashMap`` object to operate on + * ``sorted`` -- pointer to the sorted array of key-value pairs, this + function will allocate the array + * ``compar`` -- comparator function that is passed to the C standard qsort + function + + **Returns:** + * ``0`` -- success + * ``-1`` -- an error occurred + */ +static int SUNHashMap_Sort(SUNHashMap map, SUNHashMapKeyValue** sorted, + int (*compar)(const void*, const void*)) +{ + int i; + + if (map == NULL || compar == NULL) + { + return (-1); + } + + *sorted = + (SUNHashMapKeyValue*)malloc(map->max_size * sizeof(SUNHashMapKeyValue)); + if (*sorted == NULL) + { + return (-1); + } + + /* Copy the buckets into a new array */ + for (i = 0; i < map->max_size; i++) + { + (*sorted)[i] = map->buckets[i]; + } + + qsort(*sorted, map->max_size, sizeof(SUNHashMapKeyValue), compar); + + return (0); +} + +/* + This function allocates a new array with just they values of the map. + + **Arguments:** + * ``map`` -- the ``SUNHashMap`` object to operate on + * ``values`` -- pointer to the array of keys + * ``value_size`` -- the size of the values in bytes + + **Returns:** + * ``0`` -- success + * ``-1`` -- an error occurred + */ +#if SUNDIALS_MPI_ENABLED +static int SUNHashMap_Values(SUNHashMap map, void*** values, size_t value_size) +{ + int i; + int count = 0; + + if (map == NULL) + { + return (-1); + } + + *values = (void**)malloc(map->size * sizeof(value_size)); + if (values == NULL) + { + return (-1); + } + + /* Copy the values into a new array */ + for (i = 0; i < map->max_size; i++) + { + if (map->buckets[i]) + { + (*values)[count++] = map->buckets[i]->value; + } + } + + return (0); +} +#endif + +#endif diff --git a/src/lib/sundials/sundials_hip.h b/src/lib/sundials/sundials_hip.h new file mode 100644 index 0000000..c2ba37a --- /dev/null +++ b/src/lib/sundials/sundials_hip.h @@ -0,0 +1,73 @@ +/* + * ----------------------------------------------------------------- + * Programmer(s): Cody J. Balos, and Daniel McGreer @ LLNL + * ----------------------------------------------------------------- + * SUNDIALS Copyright Start + * Copyright (c) 2002-2022, Lawrence Livermore National Security + * and Southern Methodist University. + * All rights reserved. + * + * See the top-level LICENSE and NOTICE files for details. + * + * SPDX-License-Identifier: BSD-3-Clause + * SUNDIALS Copyright End + * ----------------------------------------------------------------- + * This header files defines internal utility functions and macros + * for working with HIP. + * ----------------------------------------------------------------- + */ + +#include + +#include + +#include + +#ifndef _SUNDIALS_HIP_H +#define _SUNDIALS_HIP_H + +#ifdef __cplusplus /* wrapper to enable C++ usage */ +extern "C" { +#endif + +/* --------------------------------------------------------------------------- + * Utility macros + * ---------------------------------------------------------------------------*/ + +#define SUNDIALS_HIP_VERIFY(hiperr) SUNDIALS_HIP_Assert(hiperr, __FILE__, __LINE__) + +#define SUNDIALS_KERNEL_NAME(...) __VA_ARGS__ +#ifndef SUNDIALS_DEBUG_HIP_LASTERROR +#define SUNDIALS_LAUNCH_KERNEL(kernel, gridDim, blockDim, shMem, stream, ...) \ +{ kernel<<>>(__VA_ARGS__); } +#else +#define SUNDIALS_LAUNCH_KERNEL(kernel, gridDim, blockDim, shMem, stream, ...) \ +{ \ + kernel<<>>(__VA_ARGS__); \ + hipDeviceSynchronize(); \ + SUNDIALS_HIP_VERIFY(hipGetLastError()); \ +} +#endif + +/* --------------------------------------------------------------------------- + * Utility functions + * ---------------------------------------------------------------------------*/ +inline booleantype SUNDIALS_HIP_Assert(hipError_t hiperr, const char *file, int line) +{ + if (hiperr != hipSuccess) + { +#ifdef SUNDIALS_DEBUG + fprintf(stderr, + "ERROR in HIP runtime operation: %s %s:%d\n", + hipGetErrorString(hiperr), file, line); +#endif + return SUNFALSE; /* Assert failed */ + } + return SUNTRUE; /* Assert OK */ +} + +#ifdef __cplusplus /* wrapper to enable C++ usage */ +} +#endif + +#endif /* _SUNDIALS_HIP_H */ diff --git a/src/lib/sundials/sundials_hip_kernels.hip.hpp b/src/lib/sundials/sundials_hip_kernels.hip.hpp new file mode 100644 index 0000000..37e01c8 --- /dev/null +++ b/src/lib/sundials/sundials_hip_kernels.hip.hpp @@ -0,0 +1,506 @@ +/* + * ----------------------------------------------------------------- + * Programmer(s): Cody J. Balos @ LLNL + * ----------------------------------------------------------------- + * SUNDIALS Copyright Start + * Copyright (c) 2002-2022, Lawrence Livermore National Security + * and Southern Methodist University. + * All rights reserved. + * + * See the top-level LICENSE and NOTICE files for details. + * + * SPDX-License-Identifier: BSD-3-Clause + * SUNDIALS Copyright End + * ----------------------------------------------------------------- + */ + +#ifndef _SUNDIALS_HIP_KERNELS_CUH +#define _SUNDIALS_HIP_KERNELS_CUH + +#define SUNDIALS_HOST_DEVICE __host__ __device__ +#define SUNDIALS_DEVICE_INLINE __forceinline__ +#include "sundials_reductions.hpp" + +#define GRID_STRIDE_XLOOP(type, iter, max) \ + for (type iter = blockDim.x * blockIdx.x + threadIdx.x; \ + iter < max; \ + iter += blockDim.x * gridDim.x) + +#include "sundials_hip.h" + +namespace sundials +{ +namespace hip +{ +namespace impl +{ + +template +__forceinline__ __device__ T shfl_xor_sync(T var, int laneMask); + +template +__forceinline__ __device__ T shfl_sync(T var, int srcLane); + +template +__forceinline__ __device__ T shfl_down_sync(T var, int srcLane); + +template <> +__forceinline__ __device__ int shfl_xor_sync(int var, int laneMask) +{ + return ::__shfl_xor(var, laneMask); +} + +template <> +__forceinline__ __device__ float shfl_xor_sync(float var, int laneMask) +{ + return ::__shfl_xor(var, laneMask); +} + +template <> +__forceinline__ __device__ double shfl_xor_sync(double var, int laneMask) +{ + return ::__shfl_xor(var, laneMask); +} + +template <> +__forceinline__ __device__ int shfl_sync(int var, int srcLane) +{ + return ::__shfl(var, srcLane); +} + +template <> +__forceinline__ __device__ float shfl_sync(float var, int srcLane) +{ + return ::__shfl(var, srcLane); +} + +template <> +__forceinline__ __device__ double shfl_sync(double var, int srcLane) +{ + return ::__shfl(var, srcLane); +} + +template<> +__forceinline__ __device__ float shfl_down_sync(float val, int srcLane) +{ + return ::__shfl_down(val, srcLane); +} + +template<> +__forceinline__ __device__ double shfl_down_sync(double val, int srcLane) +{ + return ::__shfl_down(val, srcLane); +} + + +/* The atomic functions below are implemented using the atomic compare and swap + function atomicCAS which performs an atomic version of + (*address == assumed) ? (assumed + val) : *address. Since *address could change + between when the value is loaded and the atomicCAS call the operation is repeated + until *address does not change between the read and the compare and swap operation. */ + +__forceinline__ __device__ +double atomicAdd(double* address, double val) +{ +#ifndef __HIP_ARCH_HAS_DOUBLE_ATOMIC_ADD__ + unsigned long long int* address_as_ull = (unsigned long long int*)address; + unsigned long long int old = *address_as_ull, assumed; + + do { + assumed = old; + old = atomicCAS(address_as_ull, assumed, + __double_as_longlong(val + + __longlong_as_double(assumed))); + // Note: uses integer comparison to avoid hang in case of NaN (since NaN != NaN) + } while (assumed != old); + + return __longlong_as_double(old); +#else + return ::atomicAdd(address, val); +#endif +} + +__forceinline__ __device__ +float atomicAdd(float* address, float val) +{ +#ifndef __HIP_ARCH_HAS_FLOAT_ATOMIC_ADD__ + unsigned int* address_as_ull = (unsigned int*)address; + unsigned int old = *address_as_ull, assumed; + + do { + assumed = old; + old = atomicCAS(address_as_ull, assumed, + __float_as_int(val + + __int_as_float(assumed))); + // Note: uses integer comparison to avoid hang in case of NaN (since NaN != NaN) + } while (assumed != old); + + return __int_as_float(old); +#else + return ::atomicAdd(address, val); +#endif +} + +/* + * Compute the maximum of 2 double-precision floating point values using an atomic operation + * "address" is the address of the reference value which might get updated with the maximum + * "value" is the value that is compared to the reference in order to determine the maximum + */ +__forceinline__ __device__ +void atomicMax(double* const address, const double value) +{ + if (*address >= value) + { + return; + } + + unsigned long long * const address_as_i = (unsigned long long *)address; + unsigned long long old = * address_as_i, assumed; + + do + { + assumed = old; + if (__longlong_as_double(assumed) >= value) + { + break; + } + old = atomicCAS(address_as_i, assumed, __double_as_longlong(value)); + } while (assumed != old); +} + +/* + * Compute the maximum of 2 single-precision floating point values using an atomic operation + * "address" is the address of the reference value which might get updated with the maximum + * "value" is the value that is compared to the reference in order to determine the maximum + */ + __forceinline__ __device__ +void atomicMax(float* const address, const float value) +{ + if (*address >= value) + { + return; + } + + unsigned int* const address_as_i = (unsigned int *)address; + unsigned int old = *address_as_i, assumed; + + do + { + assumed = old; + if (__int_as_float(assumed) >= value) + { + break; + } + old = atomicCAS(address_as_i, assumed, __float_as_int(value)); + } while (assumed != old); +} + +/* + * Compute the minimum of 2 double-precision floating point values using an atomic operation + * "address" is the address of the reference value which might get updated with the minimum + * "value" is the value that is compared to the reference in order to determine the minimum + */ +__forceinline__ __device__ +void atomicMin(double* const address, const double value) +{ + if (*address <= value) + { + return; + } + + unsigned long long* const address_as_i = (unsigned long long *)address; + unsigned long long old = *address_as_i, assumed; + + do + { + assumed = old; + if (__longlong_as_double(assumed) <= value) + { + break; + } + old = atomicCAS(address_as_i, assumed, __double_as_longlong(value)); + } while (assumed != old); +} + +/* + * Compute the minimum of 2 single-precision floating point values using an atomic operation + * "address" is the address of the reference value which might get updated with the minimum + * "value" is the value that is compared to the reference in order to determine the minimum + */ +__forceinline__ __device__ +void atomicMin(float* const address, const float value) +{ + if (*address <= value) + { + return; + } + + unsigned int* const address_as_i = (unsigned int *)address; + unsigned int old = *address_as_i, assumed; + + do + { + assumed = old; + if (__int_as_float(assumed) <= value) + { + break; + } + old = atomicCAS(address_as_i, assumed, __float_as_int(value)); + } while (assumed != old); +} + +// +// Atomic specializations of sundials::reductions operators +// + +template +struct atomic; + +template +struct atomic> { + __device__ __forceinline__ void operator()(T* out, const T val) + { + atomicAdd(out, val); + } +}; + +template +struct atomic> { + __device__ __forceinline__ void operator()(T* out, const T val) + { + atomicMax(out, val); + } +}; + +template +struct atomic> { + __device__ __forceinline__ void operator()(T* out, const T val) + { + atomicMin(out, val); + } +}; + + +/* + * Perform a reduce on the warp to get the operation result. + */ +template +__inline__ __device__ +T warpReduceShflDown(T val) +{ + for (int offset = warpSize/2; offset > 0; offset /= 2) + { + T rhs = shfl_down_sync(val, offset); + val = BinaryReductionOp{}(val, rhs); + } + return val; +} + +/* + * Reduce value across the thread block. + */ +template +__inline__ __device__ +T blockReduceShflDown(T val, T identity) +{ + // Shared memory for the partial sums + static __shared__ T shared[MAX_WARPS]; + + int numThreads = blockDim.x * blockDim.y * blockDim.z; + + int threadId = threadIdx.x + blockDim.x * threadIdx.y + + (blockDim.x * blockDim.y) * threadIdx.z; + + int warpId = threadId / WARP_SIZE; + int warpLane = threadId % WARP_SIZE; + + // Each warp performs partial reduction + val = warpReduceShflDown(val); + + // Write reduced value from each warp to shared memory + if (warpLane == 0) shared[warpId] = val; + + // Wait for all partial reductions to complete + __syncthreads(); + + // Read per warp values from shared memory only if that warp existed + val = (threadId < numThreads / warpSize) ? shared[warpLane] : identity; + + // Final reduce within first warp + if (warpId == 0) + val = warpReduceShflDown(val); + + return val; +} + +/* + * Warp reduce + block reduce using shfl instead of shfl_down. + */ +template +__inline__ __device__ +T blockReduceShfl(T val, T identity) +{ + int numThreads = blockDim.x * blockDim.y * blockDim.z; + + int threadId = threadIdx.x + blockDim.x * threadIdx.y + + (blockDim.x * blockDim.y) * threadIdx.z; + + int warpId = threadId / WARP_SIZE; + int warpLane = threadId % WARP_SIZE; + + T temp = val; + + // Reduce each warp + if (numThreads % WARP_SIZE == 0) + { + for (int i = 1; i < WARP_SIZE; i *= 2) + { + T rhs = shfl_xor_sync(temp, i); + temp = BinaryReductionOp{}(temp, rhs); + } + } + else + { + for (int i = 1; i < WARP_SIZE; i *= 2) + { + int srcLane = threadId ^ i; + T rhs = shfl_sync(temp, srcLane); + // Only add from threads that exist to avoid double counting + if (srcLane < numThreads) + temp = BinaryReductionOp{}(temp, rhs); + } + } + + // Reduce per warp values + if (numThreads > WARP_SIZE) + { + static_assert(MAX_WARPS <= WARP_SIZE, "max warps must be <= warp size for this algorithm to work"); + + __shared__ T shared[MAX_WARPS]; + + // Write per warp values to shared memory + if (warpLane == 0) + shared[warpId] = temp; + + __syncthreads(); + + if (warpId == 0) + { + // Read per warp values only if the warp existed + temp = (warpLane * WARP_SIZE < numThreads) ? shared[warpLane] : identity; + + // Final reduction + for (int i = 1; i < MAX_WARPS; i *= 2) + { + T rhs = shfl_xor_sync(temp, i); + temp = BinaryReductionOp{}(temp, rhs); + } + } + + __syncthreads(); + } + + return temp; +} + +/* + * Reduce values into thread 0 of the last running thread block. + * Output value is device_mem[0]. + */ +template +__device__ __forceinline__ void gridReduce(T val, + T identity, + T* device_mem, + unsigned int* device_count) +{ + int numBlocks = gridDim.x * gridDim.y * gridDim.z; + int numThreads = blockDim.x * blockDim.y * blockDim.z; + unsigned int wrap_around = numBlocks - 1; + + int blockId = blockIdx.x + gridDim.x * blockIdx.y + + (gridDim.x * gridDim.y) * blockIdx.z; + + int threadId = threadIdx.x + blockDim.x * threadIdx.y + + (blockDim.x * blockDim.y) * threadIdx.z; + + // Each block reduces a subset of the input + T temp = blockReduceShfl(val, identity); + + __shared__ bool isLastBlockDone; + if (threadId == 0) + { + // One thread per block stores the partial reductions to global memory + device_mem[blockId] = temp; + + // Ensure write visible to all threads + __threadfence(); + + // Increment counter, (wraps back to zero if old count == wrap_around) + unsigned int old_count = atomicInc(device_count, wrap_around); + isLastBlockDone = (old_count == wrap_around) ? 1 : 0; + } + + // Synchronize to ensure that each thread reads the + // correct value of isLastBlockDone. + __syncthreads(); + + // The last block reduces values in device_mem + if (isLastBlockDone) + { + // Reduce thread_i in each block into temp + temp = identity; + for (int i = threadId; i < numBlocks; i += numThreads) + temp = BinaryReductionOp{}(temp, device_mem[i]); + + // Compute the final block partial reductions + temp = blockReduceShfl(temp, identity); + + // One thread returns the final value + if (threadId == 0) + device_mem[0] = temp; + } +} + +template +__device__ __forceinline__ void gridReduceAtomic(T val, + T identity, + T* device_mem) +{ + int threadId = threadIdx.x + blockDim.x * threadIdx.y + + (blockDim.x * blockDim.y) * threadIdx.z; + val = blockReduceShflDown(val, identity); + // Final reduction of all block values into the output device_mem + if (threadId == 0) + atomic{}(device_mem, val); +} + +template +struct GridReducerLDS +{ + __device__ __forceinline__ void operator()(T val, + T identity, + T* device_mem, + unsigned int* device_count) + { + return impl::gridReduce(val, identity, device_mem, device_count); + } +}; + + +template +struct GridReducerAtomic +{ + __device__ __forceinline__ void operator()(T val, + T identity, + T* device_mem, + unsigned int* device_count) + { + return impl::gridReduceAtomic(val, identity, device_mem); + } +}; + +} // namespace impl +} // namespace hip +} // namespace sundials + +#endif // _SUNDIALS_HIP_KERNELS_CUH diff --git a/src/lib/sundials/sundials_iterative.c b/src/lib/sundials/sundials_iterative.c index da71f3b..76a0259 100644 --- a/src/lib/sundials/sundials_iterative.c +++ b/src/lib/sundials/sundials_iterative.c @@ -1,13 +1,10 @@ -/* - * ----------------------------------------------------------------- - * $Revision$ - * $Date$ - * ----------------------------------------------------------------- +/* ----------------------------------------------------------------- * Programmer(s): Scott D. Cohen, Alan C. Hindmarsh and * Radu Serban @ LLNL + * Shelby Lockhart @ LLNL * ----------------------------------------------------------------- * SUNDIALS Copyright Start - * Copyright (c) 2002-2019, Lawrence Livermore National Security + * Copyright (c) 2002-2022, Lawrence Livermore National Security * and Southern Methodist University. * All rights reserved. * @@ -19,12 +16,11 @@ * This is the implementation file for the iterative.h header * file. It contains the implementation of functions that may be * useful for many different iterative solvers of A x = b. - * ----------------------------------------------------------------- - */ + * -----------------------------------------------------------------*/ #include -#include +#include "sundials_iterative_impl.h" #include #define FACTOR RCONST(1000.0) @@ -33,26 +29,32 @@ /* * ----------------------------------------------------------------- - * Function : ModifiedGS + * Function : SUNModifiedGS * ----------------------------------------------------------------- - * This implementation of ModifiedGS is a slight modification of a - * previous modified Gram-Schmidt routine (called mgs) written by + * This implementation of SUNModifiedGS is a slight modification of + * a previous modified Gram-Schmidt routine (called mgs) written by * Milo Dorr. * ----------------------------------------------------------------- */ - -int ModifiedGS(N_Vector *v, realtype **h, int k, int p, + +int ModifiedGS(N_Vector *v, realtype **h, int k, int p, realtype *new_vk_norm) +{ + return(SUNModifiedGS(v, h, k, p, new_vk_norm)); +} + +int SUNModifiedGS(N_Vector *v, realtype **h, int k, int p, + realtype *new_vk_norm) { int i, k_minus_1, i0; realtype new_norm_2, new_product, vk_norm, temp; - + vk_norm = SUNRsqrt(N_VDotProd(v[k],v[k])); k_minus_1 = k - 1; i0 = SUNMAX(k-p, 0); - + /* Perform modified Gram-Schmidt */ - + for (i=i0; i < k; i++) { h[i][k_minus_1] = N_VDotProd(v[i], v[k]); N_VLinearSum(ONE, v[k], -h[i][k_minus_1], v[i], v[k]); @@ -70,7 +72,7 @@ int ModifiedGS(N_Vector *v, realtype **h, int k, int p, temp = FACTOR * vk_norm; if ((temp + (*new_vk_norm)) != temp) return(0); - + new_norm_2 = ZERO; for (i=i0; i < k; i++) { @@ -92,15 +94,21 @@ int ModifiedGS(N_Vector *v, realtype **h, int k, int p, /* * ----------------------------------------------------------------- - * Function : ClassicalGS + * Function : SUNClassicalGS * ----------------------------------------------------------------- - * This implementation of ClassicalGS was contributed by Homer Walker - * and Peter Brown. + * This implementation of SUNClassicalGS was contributed by Homer + * Walker and Peter Brown. * ----------------------------------------------------------------- */ int ClassicalGS(N_Vector *v, realtype **h, int k, int p, realtype *new_vk_norm, realtype *stemp, N_Vector *vtemp) +{ + return(SUNClassicalGS(v, h, k, p, new_vk_norm, stemp, vtemp)); +} + +int SUNClassicalGS(N_Vector *v, realtype **h, int k, int p, realtype *new_vk_norm, + realtype *stemp, N_Vector *vtemp) { int i, i0, k_minus_1, retval; realtype vk_norm; @@ -155,14 +163,19 @@ int ClassicalGS(N_Vector *v, realtype **h, int k, int p, realtype *new_vk_norm, /* * ----------------------------------------------------------------- - * Function : QRfact + * Function : SUNQRfact * ----------------------------------------------------------------- - * This implementation of QRfact is a slight modification of a + * This implementation of SUNQRfact is a slight modification of a * previous routine (called qrfact) written by Milo Dorr. * ----------------------------------------------------------------- */ int QRfact(int n, realtype **h, realtype *q, int job) +{ + return(SUNQRfact(n, h, q, job)); +} + +int SUNQRfact(int n, realtype **h, realtype *q, int job) { realtype c, s, temp1, temp2, temp3; int i, j, k, q_ptr, n_minus_1, code=0; @@ -174,35 +187,35 @@ int QRfact(int n, realtype **h, realtype *q, int job) code = 0; for (k=0; k < n; k++) { - + /* Multiply column k by the previous k-1 Givens rotations */ for (j=0; j < k-1; j++) { - i = 2*j; - temp1 = h[j][k]; - temp2 = h[j+1][k]; - c = q[i]; - s = q[i+1]; - h[j][k] = c*temp1 - s*temp2; - h[j+1][k] = s*temp1 + c*temp2; + i = 2*j; + temp1 = h[j][k]; + temp2 = h[j+1][k]; + c = q[i]; + s = q[i+1]; + h[j][k] = c*temp1 - s*temp2; + h[j+1][k] = s*temp1 + c*temp2; } - + /* Compute the Givens rotation components c and s */ q_ptr = 2*k; temp1 = h[k][k]; temp2 = h[k+1][k]; if( temp2 == ZERO) { - c = ONE; - s = ZERO; + c = ONE; + s = ZERO; } else if (SUNRabs(temp2) >= SUNRabs(temp1)) { - temp3 = temp1/temp2; - s = -ONE/SUNRsqrt(ONE+SUNSQR(temp3)); - c = -s*temp3; + temp3 = temp1/temp2; + s = -ONE/SUNRsqrt(ONE+SUNSQR(temp3)); + c = -s*temp3; } else { - temp3 = temp2/temp1; - c = ONE/SUNRsqrt(ONE+SUNSQR(temp3)); - s = -c*temp3; + temp3 = temp2/temp1; + c = ONE/SUNRsqrt(ONE+SUNSQR(temp3)); + s = -c*temp3; } q[q_ptr] = c; q[q_ptr+1] = s; @@ -216,7 +229,7 @@ int QRfact(int n, realtype **h, realtype *q, int job) n_minus_1 = n - 1; code = 0; - + /* Multiply the new column by the previous n-1 Givens rotations */ for (k=0; k < n_minus_1; k++) { @@ -228,9 +241,9 @@ int QRfact(int n, realtype **h, realtype *q, int job) h[k][n_minus_1] = c*temp1 - s*temp2; h[k+1][n_minus_1] = s*temp1 + c*temp2; } - + /* Compute new Givens rotation and multiply it times the last two - entries in the new column of H. Note that the second entry of + entries in the new column of H. Note that the second entry of this product will be 0, so it is not necessary to compute it. */ temp1 = h[n_minus_1][n_minus_1]; @@ -253,26 +266,31 @@ int QRfact(int n, realtype **h, realtype *q, int job) if ((h[n_minus_1][n_minus_1] = c*temp1 - s*temp2) == ZERO) code = n; } - + return (code); } /* * ----------------------------------------------------------------- - * Function : QRsol + * Function : SUNQRsol * ----------------------------------------------------------------- - * This implementation of QRsol is a slight modification of a + * This implementation of SUNQRsol is a slight modification of a * previous routine (called qrsol) written by Milo Dorr. * ----------------------------------------------------------------- */ int QRsol(int n, realtype **h, realtype *q, realtype *b) +{ + return(SUNQRsol(n, h, q, b)); +} + +int SUNQRsol(int n, realtype **h, realtype *q, realtype *b) { realtype c, s, temp1, temp2; int i, k, q_ptr, code=0; /* Compute Q*b */ - + for (k=0; k < n; k++) { q_ptr = 2*k; c = q[q_ptr]; @@ -293,6 +311,297 @@ int QRsol(int n, realtype **h, realtype *q, realtype *b) b[k] /= h[k][k]; for (i=0; i < k; i++) b[i] -= b[k]*h[i][k]; } - + return (code); } + + +/* + * ----------------------------------------------------------------- + * Function : SUNQRAdd_MGS + * ----------------------------------------------------------------- + * Implementation of QRAdd to be called in Anderson Acceleration + * ----------------------------------------------------------------- + */ + +int SUNQRAdd_MGS(N_Vector *Q, realtype *R, N_Vector df, + int m, int mMax, void *QRdata) +{ + sunindextype j; + SUNQRData qrdata = (SUNQRData) QRdata; + + N_VScale(ONE, df, qrdata->vtemp); + for (j=0; j < m; j++) { + R[m * mMax + j] = N_VDotProd(Q[j], qrdata->vtemp); + N_VLinearSum(ONE, qrdata->vtemp, -R[m * mMax + j], Q[j], qrdata->vtemp); + } + R[m * mMax + m] = SUNRsqrt(N_VDotProd(qrdata->vtemp, qrdata->vtemp)); + N_VScale((1/R[m * mMax + m]), qrdata->vtemp, Q[m]); + + /* Return success */ + return 0; +} + +/* + * ----------------------------------------------------------------- + * Function : SUNQRAdd_ICWY + * ----------------------------------------------------------------- + * Low synchronous implementation of QRAdd to be called in + * Anderson Acceleration. + * ----------------------------------------------------------------- + */ + +int SUNQRAdd_ICWY(N_Vector *Q, realtype *R, N_Vector df, + int m, int mMax, void *QRdata) +{ + sunindextype j, k; + SUNQRData qrdata = (SUNQRData) QRdata; + + N_VScale(ONE, df, qrdata->vtemp); /* stores d_fi in temp */ + + if (m > 0) { + /* T(1:k-1,k-1)^T = Q(:,1:k-1)^T * Q(:,k-1) */ + N_VDotProdMulti(m, Q[m-1], Q, qrdata->temp_array + (m-1) * mMax); + + /* T(k-1,k-1) = 1.0 */ + qrdata->temp_array[(m-1) * mMax + (m-1)] = ONE; + + /* R(1:k-1,k) = Q_k-1^T * df */ + N_VDotProdMulti(m, qrdata->vtemp, Q, R + m * mMax ); + + /* Solve T^T * R(1:k-1,k) = R(1:k-1,k) */ + for (k = 0; k < m; k++) { + /* Skip setting the diagonal element because it doesn't change */ + for (j = k+1; j < m; j++) { + R[m * mMax + j] -= R[m * mMax + k] * qrdata->temp_array[j * mMax + k]; + } + } + /* end */ + + /* Q(:,k-1) = df - Q_k-1 R(1:k-1,k) */ + N_VLinearCombination(m, R + m * mMax, Q, qrdata->vtemp2); + N_VLinearSum(ONE, qrdata->vtemp, -ONE, qrdata->vtemp2, qrdata->vtemp); + } + + /* R(k,k) = \| df \| */ + R[m * mMax + m] = SUNRsqrt(N_VDotProd(qrdata->vtemp, qrdata->vtemp)); + /* Q(:,k) = df / \| df \| */ + N_VScale((1/R[m * mMax + m]), qrdata->vtemp, Q[m]); + + /* Return success */ + return 0; +} + +/* + * ----------------------------------------------------------------- + * Function : SUNQRAdd_ICWY_SB + * ----------------------------------------------------------------- + * Low synchronous implementation of QRAdd to be called in + * Anderson Acceleration which utilizes a single buffer reduction. + * ----------------------------------------------------------------- + */ + +int SUNQRAdd_ICWY_SB(N_Vector *Q, realtype *R, N_Vector df, + int m, int mMax, void *QRdata) +{ + sunindextype j, k; + SUNQRData qrdata = (SUNQRData) QRdata; + + N_VScale(ONE, df, qrdata->vtemp); /* stores d_fi in temp */ + + if (m > 0) { + /* T(1:k-1,k-1)^T = Q(:,1:k-1)^T * Q(:,k-1) */ + N_VDotProdMultiLocal(m, Q[m-1], Q, qrdata->temp_array + (m-1) * mMax); + + /* R(1:k-1,k) = Q_k-1^T * df */ + /* Put R values at end of temp_array */ + N_VDotProdMultiLocal(m, qrdata->vtemp, Q, qrdata->temp_array + (m-1) * mMax + m ); + N_VDotProdMultiAllReduce(m+m, qrdata->vtemp, qrdata->temp_array + (m-1) * mMax); + + /* Move the last values from temp array into R */ + for (k = 0; k < m; k++) { + R[m*mMax + k] = qrdata->temp_array[(m-1)*mMax + m + k]; + } + + /* T(k-1,k-1) = 1.0 */ + qrdata->temp_array[(m-1) * mMax + (m-1)] = ONE; + + /* Solve T^T * R(1:k-1,k) = R(1:k-1,k) */ + for (k = 0; k < m; k++) { + /* Skip setting the diagonal element because it doesn't change */ + for (j = k+1; j < m; j++) { + R[m * mMax + j] -= R[m * mMax + k] * qrdata->temp_array[j * mMax + k]; + } + } + /* end */ + + /* Q(:,k-1) = df - Q_k-1 R(1:k-1,k) */ + N_VLinearCombination(m, R + m * mMax, Q, qrdata->vtemp2); + N_VLinearSum(ONE, qrdata->vtemp, -ONE, qrdata->vtemp2, qrdata->vtemp); + } + + /* R(k,k) = \| df \| */ + R[m * mMax + m] = SUNRsqrt(N_VDotProd(qrdata->vtemp, qrdata->vtemp)); + /* Q(:,k) = df / \| df \| */ + N_VScale((1/R[m * mMax + m]), qrdata->vtemp, Q[m]); + + /* Return success */ + return 0; +} + +/* + * ----------------------------------------------------------------- + * Function : SUNQRAdd_CGS2 + * ----------------------------------------------------------------- + * Low synchronous Implementation of QRAdd to be called in + * Anderson Acceleration. + * ----------------------------------------------------------------- + */ + +int SUNQRAdd_CGS2(N_Vector *Q, realtype *R, N_Vector df, + int m, int mMax, void *QRdata) +{ + sunindextype j; + SUNQRData qrdata = (SUNQRData) QRdata; + + N_VScale(ONE, df, qrdata->vtemp); /* temp = df */ + + if (m > 0) { + /* s_k = Q_k-1^T df_aa -- update with sdata as a realtype* array */ + N_VDotProdMulti(m, qrdata->vtemp, Q, R + m * mMax); + + /* y = df - Q_k-1 s_k */ + N_VLinearCombination(m, R + m * mMax, Q, qrdata->vtemp2); + N_VLinearSum(ONE, qrdata->vtemp, -ONE, qrdata->vtemp2, qrdata->vtemp2); + + /* z_k = Q_k-1^T y */ + N_VDotProdMulti(m, qrdata->vtemp2, Q, qrdata->temp_array); + + /* df = y - Q_k-1 z_k -- update using N_VLinearCombination */ + N_VLinearCombination(m, qrdata->temp_array, Q, Q[m]); + N_VLinearSum(ONE, qrdata->vtemp2, -ONE, Q[m], qrdata->vtemp); + + /* R(1:k-1,k) = s_k + z_k */ + for (j = 0; j < m; j++) { + R[m * mMax + j] = R[m * mMax + j] + qrdata->temp_array[j]; + } + } + + /* R(k,k) = \| df \| */ + R[m * mMax + m] = SUNRsqrt(N_VDotProd(qrdata->vtemp, qrdata->vtemp)); + /* Q(:,k) = df / R(k,k) */ + N_VScale((1/R[m * mMax + m]), qrdata->vtemp, Q[m]); + + /* Return success */ + return 0; +} + +/* + * ----------------------------------------------------------------- + * Function : SUNQRAdd_DCGS2 + * ----------------------------------------------------------------- + * Low synchronous Implementation of QRAdd to be called in + * Anderson Acceleration. + * ----------------------------------------------------------------- + */ + +int SUNQRAdd_DCGS2(N_Vector *Q, realtype *R, N_Vector df, + int m, int mMax, void *QRdata) +{ + sunindextype j; + SUNQRData qrdata = (SUNQRData) QRdata; + + N_VScale(ONE, df, qrdata->vtemp); /* temp = df */ + + if (m > 0) { + /* R(1:k-1,k) = Q_k-1^T df_aa */ + N_VDotProdMulti(m, qrdata->vtemp, Q, R + m*mMax); + /* Delayed reorthogonalization */ + if (m > 1) { + /* s = Q_k-2^T Q(:,k-1) */ + N_VDotProdMulti(m-1, Q[m-1], Q, qrdata->temp_array); + + /* Q(:,k-1) = Q(:,k-1) - Q_k-2 s */ + N_VLinearCombination(m-1, qrdata->temp_array, Q, qrdata->vtemp2); + N_VLinearSum(ONE, Q[m-1], -ONE, qrdata->vtemp2, Q[m-1]); + + /* R(1:k-2,k-1) = R(1:k-2,k-1) + s */ + for (j = 0; j < m-1; j++) { + R[(m-1) * mMax + j] = R[(m-1) * mMax + j] + qrdata->temp_array[j]; + } + } + + /* df = df - Q(:,k-1) R(1:k-1,k) */ + N_VLinearCombination(m, R + m * mMax, Q, qrdata->vtemp2); + N_VLinearSum(ONE, qrdata->vtemp, -ONE, qrdata->vtemp2, qrdata->vtemp); + } + + /* R(k,k) = \| df \| */ + R[m * mMax + m] = SUNRsqrt(N_VDotProd(qrdata->vtemp, qrdata->vtemp)); + /* Q(:,k) = df / R(k,k) */ + N_VScale((1/R[m * mMax + m]), qrdata->vtemp, Q[m]); + + /* Return success */ + return 0; +} + +/* + * ----------------------------------------------------------------- + * Function : SUNQRAdd_DCGS2_SB + * ----------------------------------------------------------------- + * Low synchronous Implementation of QRAdd to be called in + * Anderson Acceleration which utilizes a single buffer reduction. + * ----------------------------------------------------------------- + */ + +int SUNQRAdd_DCGS2_SB(N_Vector *Q, realtype *R, N_Vector df, + int m, int mMax, void *QRdata) +{ + sunindextype j; + SUNQRData qrdata = (SUNQRData) QRdata; + + N_VScale(ONE, df, qrdata->vtemp); /* temp = df */ + + if (m > 0) { + if (m == 1) { + /* R(1:k-1,k) = Q_k-1^T df_aa */ + N_VDotProdMulti(m, qrdata->vtemp, Q, R + m*mMax); + } + /* Delayed reorthogonalization */ + else if (m > 1) { + /* R(1:k-1,k) = Q_k-1^T df_aa */ + /* Put R values at beginning of temp array */ + N_VDotProdMultiLocal(m, qrdata->vtemp, Q, qrdata->temp_array); + + /* s = Q_k-2^T Q(:,k-1) */ + N_VDotProdMultiLocal(m-1, Q[m-1], Q, qrdata->temp_array + m); + N_VDotProdMultiAllReduce(m + m-1, qrdata->vtemp, qrdata->temp_array); + + /* Move R values to R */ + for (j = 0; j < m; j++) { + R[m*mMax + j] = qrdata->temp_array[j]; + } + + /* Q(:,k-1) = Q(:,k-1) - Q_k-2 s */ + N_VLinearCombination(m-1, qrdata->temp_array + m, Q, qrdata->vtemp2); + N_VLinearSum(ONE, Q[m-1], -ONE, qrdata->vtemp2, Q[m-1]); + + /* R(1:k-2,k-1) = R(1:k-2,k-1) + s */ + for (j = 0; j < m-1; j++) { + R[(m-1) * mMax + j] = R[(m-1) * mMax + j] + qrdata->temp_array[m + j]; + } + } + + /* df = df - Q(:,k-1) R(1:k-1,k) */ + N_VLinearCombination(m, R + m * mMax, Q, qrdata->vtemp2); + N_VLinearSum(ONE, qrdata->vtemp, -ONE, qrdata->vtemp2, qrdata->vtemp); + } + + /* R(k,k) = \| df \| */ + R[m * mMax + m] = SUNRsqrt(N_VDotProd(qrdata->vtemp, qrdata->vtemp)); + /* Q(:,k) = df / R(k,k) */ + N_VScale((1/R[m * mMax + m]), qrdata->vtemp, Q[m]); + + /* Return success */ + return 0; +} diff --git a/src/lib/sundials/sundials_iterative_impl.h b/src/lib/sundials/sundials_iterative_impl.h new file mode 100644 index 0000000..fed1c72 --- /dev/null +++ b/src/lib/sundials/sundials_iterative_impl.h @@ -0,0 +1,35 @@ +/* ----------------------------------------------------------------------------- + * Programmer(s): David J. Gardner and Shelby Lockhart @ LLNL + * ----------------------------------------------------------------------------- + * SUNDIALS Copyright Start + * Copyright (c) 2002-2022, Lawrence Livermore National Security + * and Southern Methodist University. + * All rights reserved. + * + * See the top-level LICENSE and NOTICE files for details. + * + * SPDX-License-Identifier: BSD-3-Clause + * SUNDIALS Copyright End + * ----------------------------------------------------------------------------- + * This is the implementation header file for SUNDIALS functions used by + * different iterative solvers. + * ---------------------------------------------------------------------------*/ + +#include + +/* ----------------------------------------------------------------------------- + * Type: SUNQRData + * ----------------------------------------------------------------------------- + * A SUNQRData struct holds temporary workspace vectors and realtype arrays for + * a SUNQRAddFn. The N_Vectors and realtype arrays it contains are created by + * the routine calling a SUNQRAdd function. + * ---------------------------------------------------------------------------*/ + +typedef struct _SUNQRData *SUNQRData; + +struct _SUNQRData +{ + N_Vector vtemp; + N_Vector vtemp2; + realtype *temp_array; +}; diff --git a/src/lib/sundials/sundials_lapack_defs.h b/src/lib/sundials/sundials_lapack_defs.h new file mode 100644 index 0000000..c3696fd --- /dev/null +++ b/src/lib/sundials/sundials_lapack_defs.h @@ -0,0 +1,103 @@ +/* ----------------------------------------------------------------- + * Programmer: Cody J. Balos @ LLNL + * ----------------------------------------------------------------- + * SUNDIALS Copyright Start + * Copyright (c) 2002-2022, Lawrence Livermore National Security + * and Southern Methodist University. + * All rights reserved. + * + * See the top-level LICENSE and NOTICE files for details. + * + * SPDX-License-Identifier: BSD-3-Clause + * SUNDIALS Copyright End + * -----------------------------------------------------------------*/ + +#ifndef _SUNDIALS_LAPACK_H +#define _SUNDIALS_LAPACK_H + +#include + +#ifdef __cplusplus /* wrapper to enable C++ usage */ +extern "C" { +#endif + +/* + * ================================================================== + * Blas and Lapack functions + * ================================================================== + */ + +#if defined(SUNDIALS_F77_FUNC) + +#define dgbtrf_f77 SUNDIALS_F77_FUNC(dgbtrf, DGBTRF) +#define dgbtrs_f77 SUNDIALS_F77_FUNC(dgbtrs, DGBTRS) +#define dgetrf_f77 SUNDIALS_F77_FUNC(dgetrf, DGETRF) +#define dgetrs_f77 SUNDIALS_F77_FUNC(dgetrs, DGETRS) + +#define sgbtrf_f77 SUNDIALS_F77_FUNC(sgbtrf, SGBTRF) +#define sgbtrs_f77 SUNDIALS_F77_FUNC(sgbtrs, SGBTRS) +#define sgetrf_f77 SUNDIALS_F77_FUNC(sgetrf, SGETRF) +#define sgetrs_f77 SUNDIALS_F77_FUNC(sgetrs, SGETRS) + +#else + +#define dgbtrf_f77 dgbtrf_ +#define dgbtrs_f77 dgbtrs_ +#define dgetrf_f77 dgetrf_ +#define dgetrs_f77 dgetrs_ + +#define sgbtrf_f77 sgbtrf_ +#define sgbtrs_f77 sgbtrs_ +#define sgetrf_f77 sgetrf_ +#define sgetrs_f77 sgetrs_ + +#endif + +/* LAPACK */ + +extern void dgbtrf_f77(const sunindextype *m, const sunindextype *n, + const sunindextype *kl, const sunindextype *ku, + double *ab, sunindextype *ldab, sunindextype *ipiv, + sunindextype *info); + +extern void dgbtrs_f77(const char *trans, const sunindextype *n, + const sunindextype *kl, const sunindextype *ku, + const sunindextype *nrhs, double *ab, + const sunindextype *ldab, sunindextype *ipiv, + double *b, const sunindextype *ldb, sunindextype *info); + + +extern void dgetrf_f77(const sunindextype *m, const sunindextype *n, double *a, + sunindextype *lda, sunindextype *ipiv, + sunindextype *info); + +extern void dgetrs_f77(const char *trans, const sunindextype *n, + const sunindextype *nrhs, double *a, + const sunindextype *lda, sunindextype *ipiv, double *b, + const sunindextype *ldb, sunindextype *info); + +extern void sgbtrf_f77(const sunindextype *m, const sunindextype *n, + const sunindextype *kl, const sunindextype *ku, + float *ab, sunindextype *ldab, sunindextype *ipiv, + sunindextype *info); + +extern void sgbtrs_f77(const char *trans, const sunindextype *n, + const sunindextype *kl, const sunindextype *ku, + const sunindextype *nrhs, float *ab, + const sunindextype *ldab, sunindextype *ipiv, + float *b, const sunindextype *ldb, sunindextype *info); + +extern void sgetrf_f77(const sunindextype *m, const sunindextype *n, float *a, + sunindextype *lda, sunindextype *ipiv, + sunindextype *info); + +extern void sgetrs_f77(const char *trans, const sunindextype *n, + const sunindextype *nrhs, float *a, + const sunindextype *lda, sunindextype *ipiv, + float *b, const sunindextype *ldb, sunindextype *info); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/lib/sundials/sundials_linearsolver.c b/src/lib/sundials/sundials_linearsolver.c index 74937fb..7a2a62e 100644 --- a/src/lib/sundials/sundials_linearsolver.c +++ b/src/lib/sundials/sundials_linearsolver.c @@ -4,7 +4,7 @@ * Slaven Peles @ LLNL * ----------------------------------------------------------------- * SUNDIALS Copyright Start - * Copyright (c) 2002-2019, Lawrence Livermore National Security + * Copyright (c) 2002-2022, Lawrence Livermore National Security * and Southern Methodist University. * All rights reserved. * @@ -20,16 +20,27 @@ #include #include +#include "sundials_context_impl.h" + +#if defined(SUNDIALS_BUILD_WITH_PROFILING) +static SUNProfiler getSUNProfiler(SUNLinearSolver S) +{ + return(S->sunctx->profiler); +} +#endif /* ----------------------------------------------------------------- * Create a new empty SUNLinearSolver object * ----------------------------------------------------------------- */ -SUNLinearSolver SUNLinSolNewEmpty() +SUNLinearSolver SUNLinSolNewEmpty(SUNContext sunctx) { SUNLinearSolver LS; SUNLinearSolver_Ops ops; + /* a context is required */ + if (sunctx == NULL) return(NULL); + /* create linear solver object */ LS = NULL; LS = (SUNLinearSolver) malloc(sizeof *LS); @@ -46,6 +57,7 @@ SUNLinearSolver SUNLinSolNewEmpty() ops->setatimes = NULL; ops->setpreconditioner = NULL; ops->setscalingvectors = NULL; + ops->setzeroguess = NULL; ops->initialize = NULL; ops->setup = NULL; ops->solve = NULL; @@ -56,9 +68,10 @@ SUNLinearSolver SUNLinSolNewEmpty() ops->space = NULL; ops->free = NULL; - /* attach ops and initialize content to NULL */ + /* attach ops and initialize content and context to NULL */ LS->ops = ops; LS->content = NULL; + LS->sunctx = sunctx; return(LS); } @@ -98,77 +111,119 @@ SUNLinearSolver_ID SUNLinSolGetID(SUNLinearSolver S) } int SUNLinSolSetATimes(SUNLinearSolver S, void* A_data, - ATimesFn ATimes) + SUNATimesFn ATimes) { + int ier; + SUNDIALS_MARK_FUNCTION_BEGIN(getSUNProfiler(S)); if (S->ops->setatimes) - return ((int) S->ops->setatimes(S, A_data, ATimes)); + ier = S->ops->setatimes(S, A_data, ATimes); else - return SUNLS_SUCCESS; + ier = SUNLS_SUCCESS; + SUNDIALS_MARK_FUNCTION_END(getSUNProfiler(S)); + return(ier); } int SUNLinSolSetPreconditioner(SUNLinearSolver S, void* P_data, - PSetupFn Pset, PSolveFn Psol) + SUNPSetupFn Pset, SUNPSolveFn Psol) { + int ier; + SUNDIALS_MARK_FUNCTION_BEGIN(getSUNProfiler(S)); if (S->ops->setpreconditioner) - return ((int) S->ops->setpreconditioner(S, P_data, Pset, Psol)); + ier = S->ops->setpreconditioner(S, P_data, Pset, Psol); else - return SUNLS_SUCCESS; + ier = SUNLS_SUCCESS; + SUNDIALS_MARK_FUNCTION_END(getSUNProfiler(S)); + return(ier); } int SUNLinSolSetScalingVectors(SUNLinearSolver S, N_Vector s1, N_Vector s2) { + int ier; + SUNDIALS_MARK_FUNCTION_BEGIN(getSUNProfiler(S)); if (S->ops->setscalingvectors) - return ((int) S->ops->setscalingvectors(S, s1, s2)); + ier = S->ops->setscalingvectors(S, s1, s2); + else + ier = SUNLS_SUCCESS; + SUNDIALS_MARK_FUNCTION_END(getSUNProfiler(S)); + return(ier); +} + +int SUNLinSolSetZeroGuess(SUNLinearSolver S, booleantype onoff) +{ + if (S->ops->setzeroguess) + return ((int) S->ops->setzeroguess(S, onoff)); else return SUNLS_SUCCESS; } int SUNLinSolInitialize(SUNLinearSolver S) { + int ier; + SUNDIALS_MARK_FUNCTION_BEGIN(getSUNProfiler(S)); if (S->ops->initialize) - return ((int) S->ops->initialize(S)); + ier = S->ops->initialize(S); else - return SUNLS_SUCCESS; + ier = SUNLS_SUCCESS; + SUNDIALS_MARK_FUNCTION_END(getSUNProfiler(S)); + return(ier); } int SUNLinSolSetup(SUNLinearSolver S, SUNMatrix A) { + int ier; + SUNDIALS_MARK_FUNCTION_BEGIN(getSUNProfiler(S)); if (S->ops->setup) - return ((int) S->ops->setup(S, A)); + ier = S->ops->setup(S, A); else - return SUNLS_SUCCESS; + ier = SUNLS_SUCCESS; + SUNDIALS_MARK_FUNCTION_END(getSUNProfiler(S)); + return(ier); } int SUNLinSolSolve(SUNLinearSolver S, SUNMatrix A, N_Vector x, N_Vector b, realtype tol) { - return ((int) S->ops->solve(S, A, x, b, tol)); + int ier; + SUNDIALS_MARK_FUNCTION_BEGIN(getSUNProfiler(S)); + ier = S->ops->solve(S, A, x, b, tol); + SUNDIALS_MARK_FUNCTION_END(getSUNProfiler(S)); + return(ier); } int SUNLinSolNumIters(SUNLinearSolver S) { + int ier; if (S->ops->numiters) - return ((int) S->ops->numiters(S)); + ier = S->ops->numiters(S); else - return 0; + ier = 0; + return(ier); } realtype SUNLinSolResNorm(SUNLinearSolver S) { + double result; + SUNDIALS_MARK_FUNCTION_BEGIN(getSUNProfiler(S)); if (S->ops->resnorm) - return ((realtype) S->ops->resnorm(S)); + result = S->ops->resnorm(S); else - return RCONST(0.0); + result = RCONST(0.0); + SUNDIALS_MARK_FUNCTION_END(getSUNProfiler(S)); + return(result); } N_Vector SUNLinSolResid(SUNLinearSolver S) { + N_Vector resid; + SUNDIALS_MARK_FUNCTION_BEGIN(getSUNProfiler(S)); if (S->ops->resid) - return ((N_Vector) S->ops->resid(S)); + resid = S->ops->resid(S); else - return NULL; + resid = NULL; + SUNDIALS_MARK_FUNCTION_END(getSUNProfiler(S)); + return(resid); } sunindextype SUNLinSolLastFlag(SUNLinearSolver S) diff --git a/src/lib/sundials/sundials_logger.c b/src/lib/sundials/sundials_logger.c new file mode 100644 index 0000000..4214677 --- /dev/null +++ b/src/lib/sundials/sundials_logger.c @@ -0,0 +1,590 @@ +/* ----------------------------------------------------------------- + * Programmer: Cody J. Balos @ LLNL + * ----------------------------------------------------------------- + * SUNDIALS Copyright Start + * Copyright (c) 2002-2022, Lawrence Livermore National Security + * and Southern Methodist University. + * All rights reserved. + * + * See the top-level LICENSE and NOTICE files for details. + * + * SPDX-License-Identifier: BSD-3-Clause + * SUNDIALS Copyright End + * -----------------------------------------------------------------*/ + +#include +#include +#include +#include +#include +#include + +#ifdef SUNDIALS_LOGGING_ENABLE_MPI +#include +#endif + +#include "sundials_logger_impl.h" +#include "sundials_utils.h" + +/* max number of files that can be opened */ +#define SUN_MAX_LOGFILE_HANDLES_ 8 + +/* shortcut */ +#define SUNLOGGER_MPICOMM(logger) (*((MPI_Comm*)logger->commptr)) + +static void sunCreateLogMessage(SUNLogLevel lvl, int rank, const char* scope, + const char* label, const char* txt, + va_list args, char** log_msg) +{ + char* prefix; + char* formatted_txt; + int msg_length; + + prefix = NULL; + formatted_txt = NULL; + msg_length = 0; + *log_msg = NULL; + + msg_length = sunvasnprintf(&formatted_txt, txt, args); + if (msg_length < 0) + { + fprintf(stderr, "[FATAL LOGGER ERROR] %s\n", + "SUNDIALS_MAX_SPRINTF_SIZE is too small"); + } + + if (lvl == SUN_LOGLEVEL_DEBUG) + { + prefix = "DEBUG"; + } + else if (lvl == SUN_LOGLEVEL_WARNING) + { + prefix = "WARNING"; + } + else if (lvl == SUN_LOGLEVEL_INFO) + { + prefix = "INFO"; + } + else if (lvl == SUN_LOGLEVEL_ERROR) + { + prefix = "ERROR"; + } + + msg_length = sunsnprintf(NULL, 0, "[%s][rank::%d][%s][%s] %s\n", prefix, + rank, scope, label, formatted_txt); + *log_msg = (char*)malloc(msg_length + 1); + sunsnprintf(*log_msg, msg_length + 1, "[%s][rank::%d][%s][%s] %s\n", prefix, + rank, scope, label, formatted_txt); + free(formatted_txt); +} + +static FILE* sunOpenLogFile(const char* fname, const char* mode) +{ + FILE* fp = NULL; + + if (fname) + { + if (!strcmp(fname, "stdout")) + { + fp = stdout; + } + else if (!strcmp(fname, "stderr")) + { + fp = stderr; + } + else + { + fp = fopen(fname, mode); + } + } + + return fp; +} + +static void sunCloseLogFile(void* fp) +{ + if (fp && fp != stdout && fp != stderr) + { + fclose((FILE*)fp); + } +} + +static sunbooleantype sunLoggerIsOutputRank(SUNLogger logger, int* rank_ref) +{ + sunbooleantype retval; + +#ifdef SUNDIALS_LOGGING_ENABLE_MPI + int rank = 0; + + if (logger->commptr) + { + MPI_Comm_rank(SUNLOGGER_MPICOMM(logger), &rank); + + if (logger->output_rank < 0) + { + if (rank_ref) + { + *rank_ref = rank; + } + retval = SUNTRUE; /* output all ranks */ + } + else + { + if (rank_ref) + { + *rank_ref = rank; + } + retval = logger->output_rank == rank; + } + } + else + { + retval = SUNTRUE; /* output all ranks */ + } +#else + if (rank_ref) + { + *rank_ref = -1; + } + retval = SUNTRUE; +#endif + + return retval; +} + +int SUNLogger_Create(void* comm, int output_rank, SUNLogger* logger_ptr) +{ + SUNLogger logger = NULL; + + *logger_ptr = logger = (SUNLogger)malloc(sizeof(struct SUNLogger_)); + if (logger == NULL) + { + return -1; + } + + /* Attach the comm, duplicating it if MPI is used. */ +#ifdef SUNDIALS_LOGGING_ENABLE_MPI + logger->commptr = NULL; + if (comm != NULL) + { + logger->commptr = malloc(sizeof(MPI_Comm)); + MPI_Comm_dup(*((MPI_Comm*) comm), (MPI_Comm*) logger->commptr); + } +#else + if (comm != NULL) + { + return -1; + } + logger->commptr = NULL; +#endif + logger->output_rank = output_rank; + logger->content = NULL; + + /* use default routines */ + logger->queuemsg = NULL; + logger->flush = NULL; + logger->destroy = NULL; + + /* set the output file handles */ + logger->filenames = NULL; + logger->error_fp = NULL; + logger->warning_fp = NULL; + logger->debug_fp = NULL; + logger->info_fp = NULL; + if (sunLoggerIsOutputRank(logger, NULL)) + { + /* We store the FILE* in a hash map so that we can ensure + that we do not open a file twice if the same file is used + for multiple output levels */ + SUNHashMap_New(SUN_MAX_LOGFILE_HANDLES_, &logger->filenames); + } + + return 0; +} + +int SUNLogger_CreateFromEnv(void* comm, SUNLogger* logger) +{ + int retval = 0; + + const char* output_rank_env = getenv("SUNLOGGER_OUTPUT_RANK"); + int output_rank = (output_rank_env) ? atoi(output_rank_env) : 0; + const char* error_fname_env = getenv("SUNLOGGER_ERROR_FILENAME"); + const char* warning_fname_env = getenv("SUNLOGGER_WARNING_FILENAME"); + const char* info_fname_env = getenv("SUNLOGGER_INFO_FILENAME"); + const char* debug_fname_env = getenv("SUNLOGGER_DEBUG_FILENAME"); + + retval += SUNLogger_Create(comm, output_rank, logger); + retval += SUNLogger_SetErrorFilename(*logger, error_fname_env); + retval += SUNLogger_SetWarningFilename(*logger, warning_fname_env); + retval += SUNLogger_SetDebugFilename(*logger, debug_fname_env); + retval += SUNLogger_SetInfoFilename(*logger, info_fname_env); + + return (retval < 0) ? -1 : 0; +} + +int SUNLogger_SetErrorFilename(SUNLogger logger, const char* error_filename) +{ + if (logger == NULL) + { + return -1; + } + + if (!sunLoggerIsOutputRank(logger, NULL)) + { + return 0; + } + + if (error_filename && strcmp(error_filename, "")) + { +#if SUNDIALS_LOGGING_LEVEL >= SUNDIALS_LOGGING_ERROR + FILE* fp = NULL; + if (!SUNHashMap_GetValue(logger->filenames, error_filename, (void*)&fp)) + { + logger->error_fp = fp; + } + else + { + logger->error_fp = sunOpenLogFile(error_filename, "w+"); + if (logger->error_fp) + { + SUNHashMap_Insert(logger->filenames, error_filename, + (void*)logger->error_fp); + } + else + { + return -1; + } + } +#else + fprintf(stderr, + "[LOGGER WARNING] " + "SUNDIALS_LOGGING_LEVEL=%d (build time option) " + "is set too low for ERROR, but a ERROR file was provided. " + "Set the logging level to >= %d and recompile if ERROR output level " + "is desired.\n", SUN_LOGLEVEL_ERROR, SUNDIALS_LOGGING_LEVEL); +#endif + } + + return 0; +} + +int SUNLogger_SetWarningFilename(SUNLogger logger, const char* warning_filename) +{ + if (logger == NULL) + { + return -1; + } + + if (!sunLoggerIsOutputRank(logger, NULL)) + { + return 0; + } + + if (warning_filename && strcmp(warning_filename, "")) + { +#if SUNDIALS_LOGGING_LEVEL >= SUNDIALS_LOGGING_WARNING + FILE* fp = NULL; + if (!SUNHashMap_GetValue(logger->filenames, warning_filename, (void*)&fp)) + { + logger->warning_fp = fp; + } + else + { + logger->warning_fp = sunOpenLogFile(warning_filename, "w+"); + if (logger->warning_fp) + { + SUNHashMap_Insert(logger->filenames, warning_filename, + (void*)logger->warning_fp); + } + else + { + return -1; + } + } +#else + fprintf(stderr, + "[LOGGER WARNING] " + "SUNDIALS_LOGGING_LEVEL=%d (build time option) " + "is set too low for WARNING, but a WARNING file was provided. " + "Set the logging level to >= %d and recompile if WARNING output " + "level is desired.\n", SUN_LOGLEVEL_WARNING, SUNDIALS_LOGGING_LEVEL); +#endif + } + + return 0; +} + +int SUNLogger_SetInfoFilename(SUNLogger logger, const char* info_filename) +{ + if (logger == NULL) + { + return -1; + } + + if (!sunLoggerIsOutputRank(logger, NULL)) + { + return 0; + } + + if (info_filename && strcmp(info_filename, "")) + { +#if SUNDIALS_LOGGING_LEVEL >= SUNDIALS_LOGGING_INFO + FILE* fp = NULL; + if (!SUNHashMap_GetValue(logger->filenames, info_filename, (void*)&fp)) + { + logger->info_fp = fp; + } + else + { + logger->info_fp = sunOpenLogFile(info_filename, "w+"); + if (logger->info_fp) + { + SUNHashMap_Insert(logger->filenames, info_filename, + (void*)logger->info_fp); + } + else + { + return -1; + } + } +#else + fprintf(stderr, + "[LOGGER WARNING] " + "SUNDIALS_LOGGING_LEVEL=%d (build time option) " + "is set too low for INFO, but a INFO file was provided. Set the " + "logging level to >= %d and recompile if INFO output level is " + "desired.\n", SUN_LOGLEVEL_INFO, SUNDIALS_LOGGING_LEVEL); +#endif + } + + return 0; +} + +int SUNLogger_SetDebugFilename(SUNLogger logger, const char* debug_filename) +{ + if (logger == NULL) + { + return -1; + } + + if (!sunLoggerIsOutputRank(logger, NULL)) + { + return 0; + } + + if (debug_filename && strcmp(debug_filename, "")) + { +#if SUNDIALS_LOGGING_LEVEL >= SUNDIALS_LOGGING_DEBUG + FILE* fp = NULL; + if (!SUNHashMap_GetValue(logger->filenames, debug_filename, (void*)&fp)) + { + logger->debug_fp = fp; + } + else + { + logger->debug_fp = sunOpenLogFile(debug_filename, "w+"); + if (logger->debug_fp) + { + SUNHashMap_Insert(logger->filenames, debug_filename, + (void*)logger->debug_fp); + } + else + { + return -1; + } + } +#else + fprintf(stderr, + "[LOGGER WARNING] " + "SUNDIALS_LOGGING_LEVEL=%d (build time option) " + "is set too low for DEBUG output, but a DEBUG file was provided. " + "Set the logging level to >= %d and recompile if DEBUG output level " + "is desired.\n", SUN_LOGLEVEL_DEBUG, SUNDIALS_LOGGING_LEVEL); +#endif + } + + return 0; +} + + +int SUNLogger_QueueMsg(SUNLogger logger, SUNLogLevel lvl, const char* scope, + const char* label, const char* msg_txt, ...) +{ + int retval = 0; + + if (logger == NULL) + { + return -1; + } + +#if SUNDIALS_LOGGING_LEVEL > 0 + { + va_list args; + va_start(args, msg_txt); + + if (logger->queuemsg) + { + retval = logger->queuemsg(logger, lvl, scope, label, msg_txt, args); + } + else + { + /* Default implementation */ + int rank = 0; + if (sunLoggerIsOutputRank(logger, &rank)) + { + char* log_msg = NULL; + sunCreateLogMessage(lvl, rank, scope, label, msg_txt, args, &log_msg); + + switch (lvl) + { + case (SUN_LOGLEVEL_DEBUG): + if (logger->debug_fp) + { + fprintf(logger->debug_fp, "%s", log_msg); + } + break; + case (SUN_LOGLEVEL_WARNING): + if (logger->warning_fp) + { + fprintf(logger->warning_fp, "%s", log_msg); + } + break; + case (SUN_LOGLEVEL_INFO): + if (logger->info_fp) + { + fprintf(logger->info_fp, "%s", log_msg); + } + break; + case (SUN_LOGLEVEL_ERROR): + if (logger->error_fp) + { + fprintf(logger->error_fp, "%s", log_msg); + } + break; + default: + retval = -1; + } + + free(log_msg); + } + } + + va_end(args); + } +#endif + + return retval; +} + +int SUNLogger_Flush(SUNLogger logger, SUNLogLevel lvl) +{ + int retval = 0; + + if (logger == NULL) + { + return -1; + } + +#if SUNDIALS_LOGGING_LEVEL > 0 + if (logger->flush) + { + retval = logger->flush(logger, lvl); + } + else + { + /* Default implementation */ + if (sunLoggerIsOutputRank(logger, NULL)) + { + switch (lvl) + { + case (SUN_LOGLEVEL_DEBUG): + if (logger->debug_fp) + { + fflush(logger->debug_fp); + } + break; + case (SUN_LOGLEVEL_WARNING): + if (logger->warning_fp) + { + fflush(logger->warning_fp); + } + break; + case (SUN_LOGLEVEL_INFO): + if (logger->info_fp) + { + fflush(logger->info_fp); + } + break; + case (SUN_LOGLEVEL_ERROR): + if (logger->error_fp) + { + fflush(logger->error_fp); + } + break; + case (SUN_LOGLEVEL_ALL): + if (logger->debug_fp) + { + fflush(logger->debug_fp); + } + if (logger->warning_fp) + { + fflush(logger->warning_fp); + } + if (logger->info_fp) + { + fflush(logger->info_fp); + } + if (logger->error_fp) + { + fflush(logger->error_fp); + } + break; + default: + retval = -1; + } + } + } +#endif + + return retval; +} + +int SUNLogger_GetOutputRank(SUNLogger logger, int* output_rank) +{ + int retval = 0; + if (logger == NULL) + { + retval = -1; + } + else + { + *output_rank = logger->output_rank; + retval = 0; + } + return retval; +} + +int SUNLogger_Destroy(SUNLogger* logger) +{ + int retval = 0; + + if ((*logger)->destroy) + { + retval = (*logger)->destroy(logger); + } + else + { + if (logger && (*logger)) + { + /* Default implementation */ + if (sunLoggerIsOutputRank(*logger, NULL)) + { + SUNHashMap_Destroy(&(*logger)->filenames, sunCloseLogFile); + } + + free(*logger); + *logger = NULL; + } + } + + return retval; +} diff --git a/src/lib/sundials/sundials_logger_impl.h b/src/lib/sundials/sundials_logger_impl.h new file mode 100644 index 0000000..f28e86b --- /dev/null +++ b/src/lib/sundials/sundials_logger_impl.h @@ -0,0 +1,61 @@ +/* ----------------------------------------------------------------- + * Programmer(s): Cody J. Balos @ LLNL + * ----------------------------------------------------------------- + * SUNDIALS Copyright Start + * Copyright (c) 2002-2022, Lawrence Livermore National Security + * and Southern Methodist University. + * All rights reserved. + * + * See the top-level LICENSE and NOTICE files for details. + * + * SPDX-License-Identifier: BSD-3-Clause + * SUNDIALS Copyright End + * ----------------------------------------------------------------- + * SUNDIALS logging class implementation. + * ----------------------------------------------------------------*/ + +#ifndef _SUNDIALS_LOGGER_IMPL_H +#define _SUNDIALS_LOGGER_IMPL_H + +#include +#include +#include + +#include "sundials_hashmap.h" + +#define SUNDIALS_LOGGING_ERROR 1 +#define SUNDIALS_LOGGING_WARNING 2 +#define SUNDIALS_LOGGING_INFO 3 +#define SUNDIALS_LOGGING_DEBUG 4 +#if SUNDIALS_LOGGING_LEVEL > SUNDIALS_LOGGING_DEBUG +#define SUNDIALS_LOGGING_EXTRA_DEBUG +#endif + +struct SUNLogger_ { + /* MPI information */ + void* commptr; + int output_rank; + + /* Ouput files */ + FILE* debug_fp; + FILE* warning_fp; + FILE* info_fp; + FILE* error_fp; + + /* Hashmap used to store filename, FILE* pairs */ + SUNHashMap filenames; + + /* Slic-style format string */ + const char* format; + + /* Content for custom implementations */ + void* content; + + /* Overridable operations */ + int (*queuemsg)(SUNLogger logger, SUNLogLevel lvl, const char* scope, + const char* label, const char* msg_txt, va_list args); + int (*flush)(SUNLogger logger, SUNLogLevel lvl); + int (*destroy)(SUNLogger* logger); +}; + +#endif /* _SUNDIALS_LOGGER_IMPL_H */ diff --git a/src/lib/sundials/sundials_math.c b/src/lib/sundials/sundials_math.c index bca60cd..8e21e6a 100644 --- a/src/lib/sundials/sundials_math.c +++ b/src/lib/sundials/sundials_math.c @@ -3,7 +3,7 @@ * Aaron Collier @ LLNL * ----------------------------------------------------------------- * SUNDIALS Copyright Start - * Copyright (c) 2002-2019, Lawrence Livermore National Security + * Copyright (c) 2002-2022, Lawrence Livermore National Security * and Southern Methodist University. * All rights reserved. * @@ -20,34 +20,158 @@ #include #include +#include #include -#define ZERO RCONST(0.0) -#define ONE RCONST(1.0) +static long double sunNextafterl(long double from, long double to); -realtype SUNRpowerI(realtype base, int exponent) +static booleantype sunIsInf(sunrealtype a) +{ +#if defined(__cplusplus) || defined(SUNDIALS_C_COMPILER_HAS_ISINF_ISNAN) + return(isinf(a)); +#else + return(a < -BIG_REAL || a > BIG_REAL); +#endif +} + +static booleantype sunIsNaN(sunrealtype a) +{ +#if defined(__cplusplus) || defined(SUNDIALS_C_COMPILER_HAS_ISINF_ISNAN) + return(isnan(a)); +#else + /* Most compilers/platforms follow NaN != a, + * but since C89 does not require this, it is + * possible some platforms might not follow it. + */ + return(a != a); +#endif +} + +sunrealtype SUNRpowerI(sunrealtype base, int exponent) { int i, expt; - realtype prod; + sunrealtype prod; - prod = ONE; + prod = RCONST(1.0); expt = abs(exponent); for(i = 1; i <= expt; i++) prod *= base; - if (exponent < 0) prod = ONE/prod; + if (exponent < 0) prod = RCONST(1.0)/prod; return(prod); } -realtype SUNRpowerR(realtype base, realtype exponent) +sunrealtype SUNRpowerR(sunrealtype base, sunrealtype exponent) { - if (base <= ZERO) return(ZERO); + if (base <= RCONST(0.0)) return(RCONST(0.0)); -#if defined(SUNDIALS_USE_GENERIC_MATH) - return((realtype) pow((double) base, (double) exponent)); -#elif defined(SUNDIALS_DOUBLE_PRECISION) +#if defined(__cplusplus) || defined(SUNDIALS_C_COMPILER_HAS_MATH_PRECISIONS) +#if defined(SUNDIALS_DOUBLE_PRECISION) return(pow(base, exponent)); #elif defined(SUNDIALS_SINGLE_PRECISION) return(powf(base, exponent)); #elif defined(SUNDIALS_EXTENDED_PRECISION) return(powl(base, exponent)); #endif +#else + return((sunrealtype) pow((double) base, (double) exponent)); +#endif +} + +booleantype SUNRCompare(sunrealtype a, sunrealtype b) +{ + return(SUNRCompareTol(a, b, 10*UNIT_ROUNDOFF)); +} + +booleantype SUNRCompareTol(sunrealtype a, sunrealtype b, sunrealtype tol) +{ + sunrealtype diff; + sunrealtype norm; + + /* If a and b are exactly equal. + * This also covers the case where a and b are both inf under IEEE 754. + */ + if (a == b) return(SUNFALSE); + + /* If a or b are NaN */ + if (sunIsNaN(a) || sunIsNaN(b)) return(SUNTRUE); + + /* If one of a or b are Inf (since we handled both being inf above) */ + if (sunIsInf(a) || sunIsInf(b)) return(SUNTRUE); + + diff = SUNRabs(a - b); + norm = SUNMIN(SUNRabs(a + b), BIG_REAL); + + /* When |a + b| is very small (less than 10*UNIT_ROUNDOFF) or zero, we use an + * absolute difference: + * |a - b| >= 10*UNIT_ROUNDOFF + * Otherwise we use a relative difference: + * |a - b| < tol * |a + b| + * The choice to use |a + b| over max(a, b) + * is arbitrary, as is the choice to use + * 10*UNIT_ROUNDOFF. + */ + return(diff >= SUNMAX(10*UNIT_ROUNDOFF, tol*norm)); +} + +long double sunNextafterl(long double from, long double to) +{ +#if defined(__cplusplus) || defined(SUNDIALS_C_COMPILER_HAS_MATH_PRECISIONS) + return nextafterl(from, to); +#else + union { + long double f; + int i; + } u; + + u.i = 0; + u.f = from; + + /* if either are NaN, then return NaN via the sum */ + if (sunIsNaN((sunrealtype) from) || sunIsNaN((sunrealtype) to)) { return from + to; } + + if (from == to) { + return to; + } + + /* ordering is -0.0, +0.0 so nextafter(-0.0, 0.0) should give +0.0 + and nextafter(0.0, -0.0) should give -0.0 */ + if (from == 0) { + u.i = 1; + return to > 0 ? u.f : -u.f; + } + + if ((from > 0) == (to > from)) { + u.i++; + } else { + u.i--; + } + + return u.f; +#endif +} + +sunrealtype SUNStrToReal(const char* str) +{ + char* end; +#if defined(__cplusplus) || defined(SUNDIALS_C_COMPILER_HAS_MATH_PRECISIONS) +#if defined(SUNDIALS_EXTENDED_PRECISION) + return strtold(str, &end); +#elif defined(SUNDIALS_DOUBLE_PRECISION) + return strtod(str, &end); +#elif defined(SUNDIALS_SINGLE_PRECISION) + return strtof(str, &end); +#else +#error "Should not be here, no SUNDIALS precision defined, report to github.com/LLNL/sundials/issues" +#endif +#else +#if defined(SUNDIALS_EXTENDED_PRECISION) + /* Use strtod, but then round down to the closest double value + since strtod will effectively round up to the closest long double. */ + double val = strtod(str, &end); + return (sunrealtype) sunNextafterl(val, -0.0); +#elif defined(SUNDIALS_DOUBLE_PRECISION) + return strtod(str, &end); +#elif defined(SUNDIALS_SINGLE_PRECISION) + return strtod(str, &end); +#endif +#endif } diff --git a/src/lib/sundials/sundials_matrix.c b/src/lib/sundials/sundials_matrix.c index 5035a79..0fa0e1a 100644 --- a/src/lib/sundials/sundials_matrix.c +++ b/src/lib/sundials/sundials_matrix.c @@ -4,7 +4,7 @@ * Slaven Peles @ LLNL * ----------------------------------------------------------------- * SUNDIALS Copyright Start - * Copyright (c) 2002-2019, Lawrence Livermore National Security + * Copyright (c) 2002-2022, Lawrence Livermore National Security * and Southern Methodist University. * All rights reserved. * @@ -21,16 +21,27 @@ #include #include #include +#include "sundials_context_impl.h" + +#if defined(SUNDIALS_BUILD_WITH_PROFILING) +static SUNProfiler getSUNProfiler(SUNMatrix A) +{ + return(A->sunctx->profiler); +} +#endif /* ----------------------------------------------------------------- * Create a new empty SUNMatrix object * ----------------------------------------------------------------- */ -SUNMatrix SUNMatNewEmpty() +SUNMatrix SUNMatNewEmpty(SUNContext sunctx) { SUNMatrix A; SUNMatrix_Ops ops; + /* a context is required */ + if (sunctx == NULL) return(NULL); + /* create matrix object */ A = NULL; A = (SUNMatrix) malloc(sizeof *A); @@ -56,6 +67,7 @@ SUNMatrix SUNMatNewEmpty() /* attach ops and initialize content to NULL */ A->ops = ops; A->content = NULL; + A->sunctx = sunctx; return(A); } @@ -68,7 +80,7 @@ SUNMatrix SUNMatNewEmpty() void SUNMatFreeEmpty(SUNMatrix A) { if (A == NULL) return; - + /* free non-NULL ops structure */ if (A->ops) free(A->ops); A->ops = NULL; @@ -90,15 +102,16 @@ int SUNMatCopyOps(SUNMatrix A, SUNMatrix B) if (A->ops == NULL || B->ops == NULL) return(-1); /* Copy ops from A to B */ - B->ops->getid = A->ops->getid; - B->ops->clone = A->ops->clone; - B->ops->destroy = A->ops->destroy; - B->ops->zero = A->ops->zero; - B->ops->copy = A->ops->copy; - B->ops->scaleadd = A->ops->scaleadd; - B->ops->scaleaddi = A->ops->scaleaddi; - B->ops->matvec = A->ops->matvec; - B->ops->space = A->ops->space; + B->ops->getid = A->ops->getid; + B->ops->clone = A->ops->clone; + B->ops->destroy = A->ops->destroy; + B->ops->zero = A->ops->zero; + B->ops->copy = A->ops->copy; + B->ops->scaleadd = A->ops->scaleadd; + B->ops->scaleaddi = A->ops->scaleaddi; + B->ops->matvecsetup = A->ops->matvecsetup; + B->ops->matvec = A->ops->matvec; + B->ops->space = A->ops->space; return(0); } @@ -118,7 +131,10 @@ SUNMatrix_ID SUNMatGetID(SUNMatrix A) SUNMatrix SUNMatClone(SUNMatrix A) { SUNMatrix B = NULL; + SUNDIALS_MARK_FUNCTION_BEGIN(getSUNProfiler(A)); B = A->ops->clone(A); + B->sunctx = A->sunctx; + SUNDIALS_MARK_FUNCTION_END(getSUNProfiler(A)); return(B); } @@ -141,35 +157,64 @@ void SUNMatDestroy(SUNMatrix A) int SUNMatZero(SUNMatrix A) { - return((int) A->ops->zero(A)); + int ier; + SUNDIALS_MARK_FUNCTION_BEGIN(getSUNProfiler(A)); + ier = A->ops->zero(A); + SUNDIALS_MARK_FUNCTION_END(getSUNProfiler(A)); + return(ier); } int SUNMatCopy(SUNMatrix A, SUNMatrix B) { - return((int) A->ops->copy(A, B)); + int ier; + SUNDIALS_MARK_FUNCTION_BEGIN(getSUNProfiler(A)); + ier = A->ops->copy(A, B); + SUNDIALS_MARK_FUNCTION_END(getSUNProfiler(A)); + return(ier); } int SUNMatScaleAdd(realtype c, SUNMatrix A, SUNMatrix B) { - return((int) A->ops->scaleadd(c, A, B)); + int ier; + SUNDIALS_MARK_FUNCTION_BEGIN(getSUNProfiler(A)); + ier = A->ops->scaleadd(c, A, B); + SUNDIALS_MARK_FUNCTION_END(getSUNProfiler(A)); + return(ier); } int SUNMatScaleAddI(realtype c, SUNMatrix A) { - return((int) A->ops->scaleaddi(c, A)); + int ier; + SUNDIALS_MARK_FUNCTION_BEGIN(getSUNProfiler(A)); + ier = A->ops->scaleaddi(c, A); + SUNDIALS_MARK_FUNCTION_END(getSUNProfiler(A)); + return(ier); } int SUNMatMatvecSetup(SUNMatrix A) { - return((int) A->ops->matvecsetup(A)); + int ier = 0; + SUNDIALS_MARK_FUNCTION_BEGIN(getSUNProfiler(A)); + if (A->ops->matvecsetup) + ier = A->ops->matvecsetup(A); + SUNDIALS_MARK_FUNCTION_END(getSUNProfiler(A)); + return(ier); } int SUNMatMatvec(SUNMatrix A, N_Vector x, N_Vector y) { - return((int) A->ops->matvec(A, x, y)); + int ier; + SUNDIALS_MARK_FUNCTION_BEGIN(getSUNProfiler(A)); + ier = A->ops->matvec(A, x, y); + SUNDIALS_MARK_FUNCTION_END(getSUNProfiler(A)); + return(ier); } int SUNMatSpace(SUNMatrix A, long int *lenrw, long int *leniw) { - return((int) A->ops->space(A, lenrw, leniw)); + int ier; + SUNDIALS_MARK_FUNCTION_BEGIN(getSUNProfiler(A)); + ier = A->ops->space(A, lenrw, leniw); + SUNDIALS_MARK_FUNCTION_END(getSUNProfiler(A)); + return(ier); } diff --git a/src/lib/sundials/sundials_memory.c b/src/lib/sundials/sundials_memory.c new file mode 100644 index 0000000..190562a --- /dev/null +++ b/src/lib/sundials/sundials_memory.c @@ -0,0 +1,266 @@ +/* ----------------------------------------------------------------- + * Programmer(s): Cody J. Balos @ LLNL + * ----------------------------------------------------------------- + * SUNDIALS Copyright Start + * Copyright (c) 2002-2022, Lawrence Livermore National Security + * and Southern Methodist University. + * All rights reserved. + * + * See the top-level LICENSE and NOTICE files for details. + * + * SPDX-License-Identifier: BSD-3-Clause + * SUNDIALS Copyright End + * ----------------------------------------------------------------- + * SUNDIALS memory helper. + * ----------------------------------------------------------------*/ + +#include + +#include +#include +#include "sundials_debug.h" +#include "sundials_context_impl.h" + +#if defined(SUNDIALS_BUILD_WITH_PROFILING) +static SUNProfiler getSUNProfiler(SUNMemoryHelper H) +{ + return(H->sunctx->profiler); +} +#endif + +SUNMemory SUNMemoryNewEmpty(void) +{ + SUNMemory mem = NULL; + + mem = (SUNMemory) malloc(sizeof(struct _SUNMemory)); + if (mem == NULL) + { + SUNDIALS_DEBUG_PRINT("ERROR in SUNMemoryNewEmpty: malloc failed\n"); + return(NULL); + } + + mem->bytes = 0; + + return(mem); +} + + +SUNMemoryHelper SUNMemoryHelper_NewEmpty(SUNContext sunctx) +{ + SUNMemoryHelper helper = NULL; + + if (sunctx == NULL) return(NULL); + + helper = (SUNMemoryHelper) malloc(sizeof(struct _SUNMemoryHelper)); + if (helper == NULL) + { + SUNDIALS_DEBUG_PRINT("ERROR in SUNMemoryHelper_NewEmpty: malloc failed\n"); + return(NULL); + } + + helper->ops = (SUNMemoryHelper_Ops) malloc(sizeof(struct _SUNMemoryHelper_Ops)); + if (helper->ops == NULL) + { + SUNDIALS_DEBUG_PRINT("ERROR in SUNMemoryHelper_NewEmpty: malloc failed\n"); + free(helper); + return(NULL); + } + + /* Set all ops to NULL */ + memset(helper->ops, 0, sizeof(struct _SUNMemoryHelper_Ops)); + helper->content = NULL; + helper->sunctx = sunctx; + + return(helper); +} + + +int SUNMemoryHelper_CopyOps(SUNMemoryHelper src, SUNMemoryHelper dst) +{ + /* Check that ops structures exist */ + if (src == NULL || dst == NULL || src->ops == NULL || dst->ops == NULL) + return(-1); + memcpy(dst->ops, src->ops, sizeof(struct _SUNMemoryHelper_Ops)); + return(0); +} + + +booleantype SUNMemoryHelper_ImplementsRequiredOps(SUNMemoryHelper helper) +{ + if (helper->ops->alloc == NULL || helper->ops->dealloc == NULL || + helper->ops->copy == NULL) + { + return(SUNFALSE); + } + return(SUNTRUE); +} + + +SUNMemory SUNMemoryHelper_Alias(SUNMemory mem) +{ + SUNMemory alias = SUNMemoryNewEmpty(); + + alias->ptr = mem->ptr; + alias->type = mem->type; + alias->own = SUNFALSE; + + return(alias); +} + + +SUNMemory SUNMemoryHelper_Wrap(void* ptr, SUNMemoryType mem_type) +{ + SUNMemory mem = SUNMemoryNewEmpty(); + + mem->ptr = ptr; + mem->own = SUNFALSE; + + switch(mem_type) + { + case SUNMEMTYPE_HOST: + mem->type = SUNMEMTYPE_HOST; + break; + case SUNMEMTYPE_PINNED: + mem->type = SUNMEMTYPE_PINNED; + break; + case SUNMEMTYPE_DEVICE: + mem->type = SUNMEMTYPE_DEVICE; + break; + case SUNMEMTYPE_UVM: + mem->type = SUNMEMTYPE_UVM; + break; + default: + free(mem); + SUNDIALS_DEBUG_PRINT("ERROR in SUNMemoryHelper_Wrap: unknown memory type\n"); + return(NULL); + } + + return(mem); +} + +int SUNMemoryHelper_GetAllocStats(SUNMemoryHelper helper, SUNMemoryType mem_type, unsigned long* num_allocations, + unsigned long* num_deallocations, size_t* bytes_allocated, + size_t* bytes_high_watermark) +{ + int ier; + SUNDIALS_MARK_FUNCTION_BEGIN(getSUNProfiler(helper)); + if (helper->ops->getallocstats) { + return helper->ops->getallocstats(helper, mem_type, num_allocations, num_deallocations, bytes_allocated, bytes_high_watermark); + } else { + ier = -1; + } + SUNDIALS_MARK_FUNCTION_END(getSUNProfiler(helper)); + return(ier); +} + + +int SUNMemoryHelper_Alloc(SUNMemoryHelper helper, SUNMemory* memptr, + size_t mem_size, SUNMemoryType mem_type, void* queue) +{ + int ier; + SUNDIALS_MARK_FUNCTION_BEGIN(getSUNProfiler(helper)); + if (helper->ops->alloc == NULL) { + ier = -1; + } else { + ier = helper->ops->alloc(helper, memptr, mem_size, mem_type, queue); + } + SUNDIALS_MARK_FUNCTION_END(getSUNProfiler(helper)); + return(ier); +} + + +int SUNMemoryHelper_Dealloc(SUNMemoryHelper helper, SUNMemory mem, void* queue) +{ + int ier; + SUNDIALS_MARK_FUNCTION_BEGIN(getSUNProfiler(helper)); + if (helper->ops->dealloc == NULL) { ier = -1; } + if (!mem) { + ier = 0; + } else { + ier = helper->ops->dealloc(helper, mem, queue); + } + SUNDIALS_MARK_FUNCTION_END(getSUNProfiler(helper)); + return(ier); +} + + +int SUNMemoryHelper_Copy(SUNMemoryHelper helper, SUNMemory dst, + SUNMemory src, size_t memory_size, void* queue) +{ + int ier; + SUNDIALS_MARK_FUNCTION_BEGIN(getSUNProfiler(helper)); + if (helper->ops->copy == NULL) + { + SUNDIALS_DEBUG_PRINT("ERROR in SUNMemoryHelper_Copy: function pointer is NULL\n"); + ier = -1; + } + else + { + ier = helper->ops->copy(helper, dst, src, memory_size, queue); + } + SUNDIALS_MARK_FUNCTION_END(getSUNProfiler(helper)); + return(ier); +} + + +int SUNMemoryHelper_CopyAsync(SUNMemoryHelper helper, SUNMemory dst, + SUNMemory src, size_t memory_size, + void* queue) +{ + int ier; + SUNDIALS_MARK_FUNCTION_BEGIN(getSUNProfiler(helper)); + if (helper->ops->copyasync == NULL) + ier = SUNMemoryHelper_Copy(helper, dst, src, memory_size, queue); + else + ier = helper->ops->copyasync(helper, dst, src, memory_size, queue); + SUNDIALS_MARK_FUNCTION_END(getSUNProfiler(helper)); + return(ier); +} + + +int SUNMemoryHelper_Destroy(SUNMemoryHelper helper) +{ + if (!helper) return 0; + + if (helper->ops->destroy) + { + /* user helper defined destroy */ + return helper->ops->destroy(helper); + } + else if (helper->content) + { + /* helper should have defined destroy */ + return -1; + } + else + { + /* default destroy */ + free(helper->ops); + free(helper); + return 0; + } + + return 0; +} + + +SUNMemoryHelper SUNMemoryHelper_Clone(SUNMemoryHelper helper) +{ + if (helper->ops->clone == NULL) + { + if (helper->content != NULL) + { + return(NULL); + } + else + { + SUNMemoryHelper hclone = SUNMemoryHelper_NewEmpty(helper->sunctx); + if (hclone) SUNMemoryHelper_CopyOps(helper, hclone); + return(hclone); + } + } + else + { + return(helper->ops->clone(helper)); + } +} diff --git a/src/lib/sundials/sundials_nonlinearsolver.c b/src/lib/sundials/sundials_nonlinearsolver.c index 6c604a3..87c0a1e 100644 --- a/src/lib/sundials/sundials_nonlinearsolver.c +++ b/src/lib/sundials/sundials_nonlinearsolver.c @@ -2,7 +2,7 @@ * Programmer(s): David J. Gardner @ LLNL * ----------------------------------------------------------------------------- * SUNDIALS Copyright Start - * Copyright (c) 2002-2019, Lawrence Livermore National Security + * Copyright (c) 2002-2022, Lawrence Livermore National Security * and Southern Methodist University. * All rights reserved. * @@ -18,16 +18,27 @@ #include #include +#include "sundials_context_impl.h" + +#if defined(SUNDIALS_BUILD_WITH_PROFILING) +static SUNProfiler getSUNProfiler(SUNNonlinearSolver NLS) +{ + return(NLS->sunctx->profiler); +} +#endif /* ----------------------------------------------------------------------------- * Create a new empty SUNLinearSolver object * ---------------------------------------------------------------------------*/ -SUNNonlinearSolver SUNNonlinSolNewEmpty() +SUNNonlinearSolver SUNNonlinSolNewEmpty(SUNContext sunctx) { SUNNonlinearSolver NLS; SUNNonlinearSolver_Ops ops; + /* check input */ + if (!sunctx) return(NULL); + /* create nonlinear solver object */ NLS = NULL; NLS = (SUNNonlinearSolver) malloc(sizeof *NLS); @@ -53,7 +64,8 @@ SUNNonlinearSolver SUNNonlinSolNewEmpty() ops->getcuriter = NULL; ops->getnumconvfails = NULL; - /* attach ops and initialize content to NULL */ + /* attach context and ops, initialize content to NULL */ + NLS->sunctx = sunctx; NLS->ops = ops; NLS->content = NULL; @@ -88,18 +100,26 @@ SUNNonlinearSolver_Type SUNNonlinSolGetType(SUNNonlinearSolver NLS) int SUNNonlinSolInitialize(SUNNonlinearSolver NLS) { + int ier; + SUNDIALS_MARK_FUNCTION_BEGIN(getSUNProfiler(NLS)); if (NLS->ops->initialize) - return((int) NLS->ops->initialize(NLS)); + ier = NLS->ops->initialize(NLS); else - return(SUN_NLS_SUCCESS); + ier = SUN_NLS_SUCCESS; + SUNDIALS_MARK_FUNCTION_END(getSUNProfiler(NLS)); + return(ier); } int SUNNonlinSolSetup(SUNNonlinearSolver NLS, N_Vector y, void* mem) { + int ier; + SUNDIALS_MARK_FUNCTION_BEGIN(getSUNProfiler(NLS)); if (NLS->ops->setup) - return((int) NLS->ops->setup(NLS, y, mem)); + ier = NLS->ops->setup(NLS, y, mem); else - return(SUN_NLS_SUCCESS); + ier = SUN_NLS_SUCCESS; + SUNDIALS_MARK_FUNCTION_END(getSUNProfiler(NLS)); + return(ier); } int SUNNonlinSolSolve(SUNNonlinearSolver NLS, @@ -107,7 +127,11 @@ int SUNNonlinSolSolve(SUNNonlinearSolver NLS, N_Vector w, realtype tol, booleantype callLSetup, void* mem) { - return((int) NLS->ops->solve(NLS, y0, y, w, tol, callLSetup, mem)); + int ier; + SUNDIALS_MARK_FUNCTION_BEGIN(getSUNProfiler(NLS)); + ier = NLS->ops->solve(NLS, y0, y, w, tol, callLSetup, mem); + SUNDIALS_MARK_FUNCTION_END(getSUNProfiler(NLS)); + return(ier); } int SUNNonlinSolFree(SUNNonlinearSolver NLS) diff --git a/src/lib/sundials/sundials_nvector.c b/src/lib/sundials/sundials_nvector.c index ad4e330..5f5807e 100644 --- a/src/lib/sundials/sundials_nvector.c +++ b/src/lib/sundials/sundials_nvector.c @@ -4,7 +4,7 @@ * Daniel R. Reynolds @ SMU * ----------------------------------------------------------------- * SUNDIALS Copyright Start - * Copyright (c) 2002-2019, Lawrence Livermore National Security + * Copyright (c) 2002-2022, Lawrence Livermore National Security * and Southern Methodist University. * All rights reserved. * @@ -18,18 +18,31 @@ * in nvector.h. * -----------------------------------------------------------------*/ +#include #include + #include +#include "sundials_context_impl.h" + +#if defined(SUNDIALS_BUILD_WITH_PROFILING) +static SUNProfiler getSUNProfiler(N_Vector v) +{ + return(v->sunctx->profiler); +} +#endif /* ----------------------------------------------------------------- - * Create an empty NVector object + * Methods that are not ops (i.e., non-virtual and not overridable). * -----------------------------------------------------------------*/ -N_Vector N_VNewEmpty() +/* Create an empty NVector object */ +N_Vector N_VNewEmpty(SUNContext sunctx) { N_Vector v; N_Vector_Ops ops; + if (sunctx == NULL) return(NULL); + /* create vector object */ v = NULL; v = (N_Vector) malloc(sizeof *v); @@ -42,16 +55,23 @@ N_Vector N_VNewEmpty() /* initialize operations to NULL */ + /* + * REQUIRED operations. + * + * These must be implemented by derivations of the generic N_Vector. + */ + /* constructors, destructors, and utility operations */ - ops->nvgetvectorid = NULL; - ops->nvclone = NULL; - ops->nvcloneempty = NULL; - ops->nvdestroy = NULL; - ops->nvspace = NULL; - ops->nvgetarraypointer = NULL; - ops->nvsetarraypointer = NULL; - ops->nvgetcommunicator = NULL; - ops->nvgetlength = NULL; + ops->nvgetvectorid = NULL; + ops->nvclone = NULL; + ops->nvcloneempty = NULL; + ops->nvdestroy = NULL; + ops->nvspace = NULL; + ops->nvgetarraypointer = NULL; + ops->nvgetdevicearraypointer = NULL; + ops->nvsetarraypointer = NULL; + ops->nvgetcommunicator = NULL; + ops->nvgetlength = NULL; /* standard vector operations */ ops->nvlinearsum = NULL; @@ -64,8 +84,8 @@ N_Vector N_VNewEmpty() ops->nvaddconst = NULL; ops->nvdotprod = NULL; ops->nvmaxnorm = NULL; - ops->nvwrmsnormmask = NULL; ops->nvwrmsnorm = NULL; + ops->nvwrmsnormmask = NULL; ops->nvmin = NULL; ops->nvwl2norm = NULL; ops->nvl1norm = NULL; @@ -74,6 +94,12 @@ N_Vector N_VNewEmpty() ops->nvconstrmask = NULL; ops->nvminquotient = NULL; + /* + * OPTIONAL operations. + * + * These operations provide default implementations that may be overriden. + */ + /* fused vector operations (optional) */ ops->nvlinearcombination = NULL; ops->nvscaleaddmulti = NULL; @@ -88,29 +114,47 @@ N_Vector N_VNewEmpty() ops->nvscaleaddmultivectorarray = NULL; ops->nvlinearcombinationvectorarray = NULL; + /* + * OPTIONAL operations with no default implementation. + */ + + ops->nvgetlocallength = NULL; + /* local reduction operations (optional) */ - ops->nvdotprodlocal = NULL; - ops->nvmaxnormlocal = NULL; - ops->nvminlocal = NULL; - ops->nvl1normlocal = NULL; - ops->nvinvtestlocal = NULL; - ops->nvconstrmasklocal = NULL; - ops->nvminquotientlocal = NULL; - ops->nvwsqrsumlocal = NULL; - ops->nvwsqrsummasklocal = NULL; - - /* attach ops and initialize content to NULL */ - v->ops = ops; + ops->nvdotprodlocal = NULL; + ops->nvmaxnormlocal = NULL; + ops->nvminlocal = NULL; + ops->nvl1normlocal = NULL; + ops->nvinvtestlocal = NULL; + ops->nvconstrmasklocal = NULL; + ops->nvminquotientlocal = NULL; + ops->nvwsqrsumlocal = NULL; + ops->nvwsqrsummasklocal = NULL; + + /* single buffer reduction operations */ + ops->nvdotprodmultilocal = NULL; + ops->nvdotprodmultiallreduce = NULL; + + /* XBraid interface operations */ + ops->nvbufsize = NULL; + ops->nvbufpack = NULL; + ops->nvbufunpack = NULL; + + /* debugging functions */ + ops->nvprint = NULL; + ops->nvprintfile = NULL; + + /* attach ops */ + v->ops = ops; + + /* initialize content and sunctx to NULL */ v->content = NULL; + v->sunctx = sunctx; return(v); } - -/* ----------------------------------------------------------------- - * Free a generic N_Vector (assumes content is already empty) - * -----------------------------------------------------------------*/ - +/* Free a generic N_Vector (assumes content is already empty) */ void N_VFreeEmpty(N_Vector v) { if (v == NULL) return; @@ -124,11 +168,7 @@ void N_VFreeEmpty(N_Vector v) return; } - -/* ----------------------------------------------------------------- - * Copy a vector 'ops' structure - * -----------------------------------------------------------------*/ - +/* Copy a vector 'ops' structure */ int N_VCopyOps(N_Vector w, N_Vector v) { /* Check that ops structures exist */ @@ -137,16 +177,24 @@ int N_VCopyOps(N_Vector w, N_Vector v) /* Copy ops from w to v */ + /* + * REQUIRED operations. + * + * These must be implemented by derivations of the generic N_Vector. + */ + /* constructors, destructors, and utility operations */ - v->ops->nvgetvectorid = w->ops->nvgetvectorid; - v->ops->nvclone = w->ops->nvclone; - v->ops->nvcloneempty = w->ops->nvcloneempty; - v->ops->nvdestroy = w->ops->nvdestroy; - v->ops->nvspace = w->ops->nvspace; - v->ops->nvgetarraypointer = w->ops->nvgetarraypointer; - v->ops->nvsetarraypointer = w->ops->nvsetarraypointer; - v->ops->nvgetcommunicator = w->ops->nvgetcommunicator; - v->ops->nvgetlength = w->ops->nvgetlength; + v->ops->nvgetvectorid = w->ops->nvgetvectorid; + v->ops->nvclone = w->ops->nvclone; + v->ops->nvcloneempty = w->ops->nvcloneempty; + v->ops->nvdestroy = w->ops->nvdestroy; + v->ops->nvspace = w->ops->nvspace; + v->ops->nvgetarraypointer = w->ops->nvgetarraypointer; + v->ops->nvgetdevicearraypointer = w->ops->nvgetdevicearraypointer; + v->ops->nvsetarraypointer = w->ops->nvsetarraypointer; + v->ops->nvgetcommunicator = w->ops->nvgetcommunicator; + v->ops->nvgetlength = w->ops->nvgetlength; + v->ops->nvgetlocallength = w->ops->nvgetlocallength; /* standard vector operations */ v->ops->nvlinearsum = w->ops->nvlinearsum; @@ -159,8 +207,8 @@ int N_VCopyOps(N_Vector w, N_Vector v) v->ops->nvaddconst = w->ops->nvaddconst; v->ops->nvdotprod = w->ops->nvdotprod; v->ops->nvmaxnorm = w->ops->nvmaxnorm; - v->ops->nvwrmsnormmask = w->ops->nvwrmsnormmask; v->ops->nvwrmsnorm = w->ops->nvwrmsnorm; + v->ops->nvwrmsnormmask = w->ops->nvwrmsnormmask; v->ops->nvmin = w->ops->nvmin; v->ops->nvwl2norm = w->ops->nvwl2norm; v->ops->nvl1norm = w->ops->nvl1norm; @@ -169,6 +217,12 @@ int N_VCopyOps(N_Vector w, N_Vector v) v->ops->nvconstrmask = w->ops->nvconstrmask; v->ops->nvminquotient = w->ops->nvminquotient; + /* + * OPTIONAL operations. + * + * These operations provide default implementations that may be overriden. + */ + /* fused vector operations */ v->ops->nvlinearcombination = w->ops->nvlinearcombination; v->ops->nvscaleaddmulti = w->ops->nvscaleaddmulti; @@ -183,16 +237,33 @@ int N_VCopyOps(N_Vector w, N_Vector v) v->ops->nvscaleaddmultivectorarray = w->ops->nvscaleaddmultivectorarray; v->ops->nvlinearcombinationvectorarray = w->ops->nvlinearcombinationvectorarray; + /* + * OPTIONAL operations with no default implementation. + */ + /* local reduction operations */ - v->ops->nvdotprodlocal = w->ops->nvdotprodlocal; - v->ops->nvmaxnormlocal = w->ops->nvmaxnormlocal; - v->ops->nvminlocal = w->ops->nvminlocal; - v->ops->nvl1normlocal = w->ops->nvl1normlocal; - v->ops->nvinvtestlocal = w->ops->nvinvtestlocal; - v->ops->nvconstrmasklocal = w->ops->nvconstrmasklocal; - v->ops->nvminquotientlocal = w->ops->nvminquotientlocal; - v->ops->nvwsqrsumlocal = w->ops->nvwsqrsumlocal; - v->ops->nvwsqrsummasklocal = w->ops->nvwsqrsummasklocal; + v->ops->nvdotprodlocal = w->ops->nvdotprodlocal; + v->ops->nvmaxnormlocal = w->ops->nvmaxnormlocal; + v->ops->nvminlocal = w->ops->nvminlocal; + v->ops->nvl1normlocal = w->ops->nvl1normlocal; + v->ops->nvinvtestlocal = w->ops->nvinvtestlocal; + v->ops->nvconstrmasklocal = w->ops->nvconstrmasklocal; + v->ops->nvminquotientlocal = w->ops->nvminquotientlocal; + v->ops->nvwsqrsumlocal = w->ops->nvwsqrsumlocal; + v->ops->nvwsqrsummasklocal = w->ops->nvwsqrsummasklocal; + + /* single buffer reduction operations */ + v->ops->nvdotprodmultilocal = w->ops->nvdotprodmultilocal; + v->ops->nvdotprodmultiallreduce = w->ops->nvdotprodmultiallreduce; + + /* XBraid interface operations */ + v->ops->nvbufsize = w->ops->nvbufsize; + v->ops->nvbufpack = w->ops->nvbufpack; + v->ops->nvbufunpack = w->ops->nvbufunpack; + + /* debugging functions */ + v->ops->nvprint = w->ops->nvprint; + v->ops->nvprintfile = w->ops->nvprintfile; return(0); } @@ -208,12 +279,22 @@ N_Vector_ID N_VGetVectorID(N_Vector w) N_Vector N_VClone(N_Vector w) { - return(w->ops->nvclone(w)); + N_Vector result = NULL; + SUNDIALS_MARK_FUNCTION_BEGIN(getSUNProfiler(w)); + result = w->ops->nvclone(w); + result->sunctx = w->sunctx; + SUNDIALS_MARK_FUNCTION_END(getSUNProfiler(w)); + return result; } N_Vector N_VCloneEmpty(N_Vector w) { - return(w->ops->nvcloneempty(w)); + N_Vector result; + SUNDIALS_MARK_FUNCTION_BEGIN(getSUNProfiler(w)); + result = w->ops->nvcloneempty(w); + result->sunctx = w->sunctx; + SUNDIALS_MARK_FUNCTION_END(getSUNProfiler(w)); + return result; } void N_VDestroy(N_Vector v) @@ -244,6 +325,14 @@ realtype *N_VGetArrayPointer(N_Vector v) return((realtype *) v->ops->nvgetarraypointer(v)); } +realtype *N_VGetDeviceArrayPointer(N_Vector v) +{ + if (v->ops->nvgetdevicearraypointer) + return((realtype *) v->ops->nvgetdevicearraypointer(v)); + else + return(NULL); +} + void N_VSetArrayPointer(realtype *v_data, N_Vector v) { v->ops->nvsetarraypointer(v_data, v); @@ -263,173 +352,248 @@ sunindextype N_VGetLength(N_Vector v) return((sunindextype) v->ops->nvgetlength(v)); } +sunindextype N_VGetLocalLength(N_Vector v) +{ + return((sunindextype) v->ops->nvgetlocallength(v)); +} + /* ----------------------------------------------------------------- * standard vector operations * -----------------------------------------------------------------*/ void N_VLinearSum(realtype a, N_Vector x, realtype b, N_Vector y, N_Vector z) { + SUNDIALS_MARK_FUNCTION_BEGIN(getSUNProfiler(x)); z->ops->nvlinearsum(a, x, b, y, z); + SUNDIALS_MARK_FUNCTION_END(getSUNProfiler(x)); return; } void N_VConst(realtype c, N_Vector z) { + SUNDIALS_MARK_FUNCTION_BEGIN(getSUNProfiler(z)); z->ops->nvconst(c, z); + SUNDIALS_MARK_FUNCTION_END(getSUNProfiler(z)); return; } void N_VProd(N_Vector x, N_Vector y, N_Vector z) { + SUNDIALS_MARK_FUNCTION_BEGIN(getSUNProfiler(x)); z->ops->nvprod(x, y, z); + SUNDIALS_MARK_FUNCTION_END(getSUNProfiler(x)); return; } void N_VDiv(N_Vector x, N_Vector y, N_Vector z) { + SUNDIALS_MARK_FUNCTION_BEGIN(getSUNProfiler(x)); z->ops->nvdiv(x, y, z); + SUNDIALS_MARK_FUNCTION_END(getSUNProfiler(x)); return; } void N_VScale(realtype c, N_Vector x, N_Vector z) { + SUNDIALS_MARK_FUNCTION_BEGIN(getSUNProfiler(x)); z->ops->nvscale(c, x, z); + SUNDIALS_MARK_FUNCTION_END(getSUNProfiler(x)); return; } void N_VAbs(N_Vector x, N_Vector z) { + SUNDIALS_MARK_FUNCTION_BEGIN(getSUNProfiler(x)); z->ops->nvabs(x, z); + SUNDIALS_MARK_FUNCTION_END(getSUNProfiler(x)); return; } void N_VInv(N_Vector x, N_Vector z) { + SUNDIALS_MARK_FUNCTION_BEGIN(getSUNProfiler(x)); z->ops->nvinv(x, z); + SUNDIALS_MARK_FUNCTION_END(getSUNProfiler(x)); return; } void N_VAddConst(N_Vector x, realtype b, N_Vector z) { + SUNDIALS_MARK_FUNCTION_BEGIN(getSUNProfiler(x)); z->ops->nvaddconst(x, b, z); + SUNDIALS_MARK_FUNCTION_END(getSUNProfiler(x)); return; } realtype N_VDotProd(N_Vector x, N_Vector y) { - return((realtype) y->ops->nvdotprod(x, y)); + realtype result; + SUNDIALS_MARK_FUNCTION_BEGIN(getSUNProfiler(x)); + result = ((realtype) y->ops->nvdotprod(x, y)); + SUNDIALS_MARK_FUNCTION_END(getSUNProfiler(x)); + return(result); } realtype N_VMaxNorm(N_Vector x) { - return((realtype) x->ops->nvmaxnorm(x)); + realtype result; + SUNDIALS_MARK_FUNCTION_BEGIN(getSUNProfiler(x)); + result = ((realtype) x->ops->nvmaxnorm(x)); + SUNDIALS_MARK_FUNCTION_END(getSUNProfiler(x)); + return(result); } realtype N_VWrmsNorm(N_Vector x, N_Vector w) { - return((realtype) x->ops->nvwrmsnorm(x, w)); + realtype result; + SUNDIALS_MARK_FUNCTION_BEGIN(getSUNProfiler(x)); + result = ((realtype) x->ops->nvwrmsnorm(x, w)); + SUNDIALS_MARK_FUNCTION_END(getSUNProfiler(x)); + return(result); } realtype N_VWrmsNormMask(N_Vector x, N_Vector w, N_Vector id) { - return((realtype) x->ops->nvwrmsnormmask(x, w, id)); + realtype result; + SUNDIALS_MARK_FUNCTION_BEGIN(getSUNProfiler(x)); + result = ((realtype) x->ops->nvwrmsnormmask(x, w, id)); + SUNDIALS_MARK_FUNCTION_END(getSUNProfiler(x)); + return(result); } realtype N_VMin(N_Vector x) { - return((realtype) x->ops->nvmin(x)); + realtype result; + SUNDIALS_MARK_FUNCTION_BEGIN(getSUNProfiler(x)); + result = ((realtype) x->ops->nvmin(x)); + SUNDIALS_MARK_FUNCTION_END(getSUNProfiler(x)); + return(result); } realtype N_VWL2Norm(N_Vector x, N_Vector w) { - return((realtype) x->ops->nvwl2norm(x, w)); + realtype result; + SUNDIALS_MARK_FUNCTION_BEGIN(getSUNProfiler(x)); + result = ((realtype) x->ops->nvwl2norm(x, w)); + SUNDIALS_MARK_FUNCTION_END(getSUNProfiler(x)); + return(result); } realtype N_VL1Norm(N_Vector x) { - return((realtype) x->ops->nvl1norm(x)); + realtype result; + SUNDIALS_MARK_FUNCTION_BEGIN(getSUNProfiler(x)); + result = ((realtype) x->ops->nvl1norm(x)); + SUNDIALS_MARK_FUNCTION_END(getSUNProfiler(x)); + return(result); } void N_VCompare(realtype c, N_Vector x, N_Vector z) { + SUNDIALS_MARK_FUNCTION_BEGIN(getSUNProfiler(x)); z->ops->nvcompare(c, x, z); + SUNDIALS_MARK_FUNCTION_END(getSUNProfiler(x)); return; } booleantype N_VInvTest(N_Vector x, N_Vector z) { - return((booleantype) z->ops->nvinvtest(x, z)); + booleantype result; + SUNDIALS_MARK_FUNCTION_BEGIN(getSUNProfiler(x)); + result = ((booleantype) z->ops->nvinvtest(x, z)); + SUNDIALS_MARK_FUNCTION_END(getSUNProfiler(x)); + return(result); } booleantype N_VConstrMask(N_Vector c, N_Vector x, N_Vector m) { - return((booleantype) x->ops->nvconstrmask(c, x, m)); + booleantype result; + SUNDIALS_MARK_FUNCTION_BEGIN(getSUNProfiler(c)); + result = ((booleantype) x->ops->nvconstrmask(c, x, m)); + SUNDIALS_MARK_FUNCTION_END(getSUNProfiler(c)); + return(result); } realtype N_VMinQuotient(N_Vector num, N_Vector denom) { - return((realtype) num->ops->nvminquotient(num, denom)); + realtype result; + SUNDIALS_MARK_FUNCTION_BEGIN(getSUNProfiler(num)); + result = ((realtype) num->ops->nvminquotient(num, denom)); + SUNDIALS_MARK_FUNCTION_END(getSUNProfiler(num)); + return(result); } + + /* ----------------------------------------------------------------- * OPTIONAL fused vector operations * -----------------------------------------------------------------*/ int N_VLinearCombination(int nvec, realtype* c, N_Vector* X, N_Vector z) { - int i; - realtype ONE=RCONST(1.0); + int i, ier; + SUNDIALS_MARK_FUNCTION_BEGIN(getSUNProfiler(X[0])); if (z->ops->nvlinearcombination != NULL) { - return(z->ops->nvlinearcombination(nvec, c, X, z)); + ier = z->ops->nvlinearcombination(nvec, c, X, z); } else { z->ops->nvscale(c[0], X[0], z); for (i=1; iops->nvlinearsum(c[i], X[i], ONE, z, z); + z->ops->nvlinearsum(c[i], X[i], RCONST(1.0), z, z); } - return(0); + ier = 0; } + + SUNDIALS_MARK_FUNCTION_END(getSUNProfiler(X[0])); + return(ier); } int N_VScaleAddMulti(int nvec, realtype* a, N_Vector x, N_Vector* Y, N_Vector* Z) { - int i; - realtype ONE=RCONST(1.0); + int i, ier; + SUNDIALS_MARK_FUNCTION_BEGIN(getSUNProfiler(x)); if (x->ops->nvscaleaddmulti != NULL) { - return(x->ops->nvscaleaddmulti(nvec, a, x, Y, Z)); + ier = x->ops->nvscaleaddmulti(nvec, a, x, Y, Z); } else { for (i=0; iops->nvlinearsum(a[i], x, ONE, Y[i], Z[i]); + x->ops->nvlinearsum(a[i], x, RCONST(1.0), Y[i], Z[i]); } - return(0); + ier = 0; } + + SUNDIALS_MARK_FUNCTION_END(getSUNProfiler(x)); + return(ier); } int N_VDotProdMulti(int nvec, N_Vector x, N_Vector* Y, realtype* dotprods) { - int i; + int i, ier; + SUNDIALS_MARK_FUNCTION_BEGIN(getSUNProfiler(x)); if (x->ops->nvdotprodmulti != NULL) { - return(x->ops->nvdotprodmulti(nvec, x, Y, dotprods)); + ier = x->ops->nvdotprodmulti(nvec, x, Y, dotprods); } else { for (i=0; iops->nvdotprod(x, Y[i]); } - return(0); + ier = 0; } + + SUNDIALS_MARK_FUNCTION_END(getSUNProfiler(x)); + return(ier); } /* ----------------------------------------------------------------- @@ -439,111 +603,126 @@ int N_VDotProdMulti(int nvec, N_Vector x, N_Vector* Y, realtype* dotprods) int N_VLinearSumVectorArray(int nvec, realtype a, N_Vector* X, realtype b, N_Vector* Y, N_Vector* Z) { - int i; + int i, ier; + SUNDIALS_MARK_FUNCTION_BEGIN(getSUNProfiler(X[0])); if (Z[0]->ops->nvlinearsumvectorarray != NULL) { - return(Z[0]->ops->nvlinearsumvectorarray(nvec, a, X, b, Y, Z)); + ier = Z[0]->ops->nvlinearsumvectorarray(nvec, a, X, b, Y, Z); } else { for (i=0; iops->nvlinearsum(a, X[i], b, Y[i], Z[i]); } - return(0); + ier = 0; } + + SUNDIALS_MARK_FUNCTION_END(getSUNProfiler(X[0])); + return(ier); } int N_VScaleVectorArray(int nvec, realtype* c, N_Vector* X, N_Vector* Z) { - int i; + int i, ier; + SUNDIALS_MARK_FUNCTION_BEGIN(getSUNProfiler(X[0])); if (Z[0]->ops->nvscalevectorarray != NULL) { - return(Z[0]->ops->nvscalevectorarray(nvec, c, X, Z)); + ier = Z[0]->ops->nvscalevectorarray(nvec, c, X, Z); } else { for (i=0; iops->nvscale(c[i], X[i], Z[i]); } - return(0); + ier = 0; } + + SUNDIALS_MARK_FUNCTION_END(getSUNProfiler(X[0])); + return(ier); } int N_VConstVectorArray(int nvec, realtype c, N_Vector* Z) { int i, ier; + SUNDIALS_MARK_FUNCTION_BEGIN(getSUNProfiler(Z[0])); if (Z[0]->ops->nvconstvectorarray != NULL) { ier = Z[0]->ops->nvconstvectorarray(nvec, c, Z); - return(ier); } else { for (i=0; iops->nvconst(c, Z[i]); } - return(0); + ier = 0; } + + SUNDIALS_MARK_FUNCTION_BEGIN(getSUNProfiler(Z[0])); + return(ier); } int N_VWrmsNormVectorArray(int nvec, N_Vector* X, N_Vector* W, realtype* nrm) { int i, ier; + SUNDIALS_MARK_FUNCTION_BEGIN(getSUNProfiler(X[0])); if (X[0]->ops->nvwrmsnormvectorarray != NULL) { ier = X[0]->ops->nvwrmsnormvectorarray(nvec, X, W, nrm); - return(ier); } else { for (i=0; iops->nvwrmsnorm(X[i], W[i]); } - return(0); + ier = 0; } + + SUNDIALS_MARK_FUNCTION_END(getSUNProfiler(X[0])); + return(ier); } int N_VWrmsNormMaskVectorArray(int nvec, N_Vector* X, N_Vector* W, N_Vector id, realtype* nrm) { int i, ier; + SUNDIALS_MARK_FUNCTION_BEGIN(getSUNProfiler(X[0])); if (id->ops->nvwrmsnormmaskvectorarray != NULL) { ier = id->ops->nvwrmsnormmaskvectorarray(nvec, X, W, id, nrm); - return(ier); } else { for (i=0; iops->nvwrmsnormmask(X[i], W[i], id); } - return(0); + ier = 0; } + + SUNDIALS_MARK_FUNCTION_END(getSUNProfiler(X[0])); + return(ier); } int N_VScaleAddMultiVectorArray(int nvec, int nsum, realtype* a, N_Vector* X, N_Vector** Y, N_Vector** Z) { - int i, j; - int ier=0; - realtype ONE=RCONST(1.0); - N_Vector* YY=NULL; - N_Vector* ZZ=NULL; + int i, j, ier; + N_Vector* YY = NULL; + N_Vector* ZZ = NULL; + SUNDIALS_MARK_FUNCTION_BEGIN(getSUNProfiler(X[0])); if (X[0]->ops->nvscaleaddmultivectorarray != NULL) { ier = X[0]->ops->nvscaleaddmultivectorarray(nvec, nsum, a, X, Y, Z); - return(ier); } else if (X[0]->ops->nvscaleaddmulti != NULL ) { @@ -566,31 +745,30 @@ int N_VScaleAddMultiVectorArray(int nvec, int nsum, realtype* a, N_Vector* X, free(YY); free(ZZ); - return(ier); - } else { for (i=0; iops->nvlinearsum(a[j], X[i], ONE, Y[j][i], Z[j][i]); + X[0]->ops->nvlinearsum(a[j], X[i], RCONST(1.0), Y[j][i], Z[j][i]); } } - return(0); + ier = 0; } + + SUNDIALS_MARK_FUNCTION_END(getSUNProfiler(X[0])); + return(ier); } int N_VLinearCombinationVectorArray(int nvec, int nsum, realtype* c, N_Vector** X, N_Vector* Z) { - int i, j; - int ier=0; - realtype ONE=RCONST(1.0); - N_Vector* Y=NULL; + int i, j, ier; + N_Vector* Y = NULL; + SUNDIALS_MARK_FUNCTION_BEGIN(getSUNProfiler(X[0][0])); if (Z[0]->ops->nvlinearcombinationvectorarray != NULL) { ier = Z[0]->ops->nvlinearcombinationvectorarray(nvec, nsum, c, X, Z); - return(ier); } else if (Z[0]->ops->nvlinearcombination != NULL ) { @@ -610,18 +788,19 @@ int N_VLinearCombinationVectorArray(int nvec, int nsum, realtype* c, /* free array of vectors */ free(Y); - return(ier); - } else { for (i=0; iops->nvscale(c[0], X[0][i], Z[i]); for (j=1; jops->nvlinearsum(c[j], X[j][i], ONE, Z[i], Z[i]); + Z[0]->ops->nvlinearsum(c[j], X[j][i], RCONST(1.0), Z[i], Z[i]); } } - return(0); + ier = 0; } + + SUNDIALS_MARK_FUNCTION_END(getSUNProfiler(X[0][0])); + return(ier); } /* ----------------------------------------------------------------- @@ -630,47 +809,155 @@ int N_VLinearCombinationVectorArray(int nvec, int nsum, realtype* c, realtype N_VDotProdLocal(N_Vector x, N_Vector y) { - return((realtype) y->ops->nvdotprodlocal(x, y)); + realtype result; + SUNDIALS_MARK_FUNCTION_BEGIN(getSUNProfiler(x)); + result = ((realtype) y->ops->nvdotprodlocal(x, y)); + SUNDIALS_MARK_FUNCTION_END(getSUNProfiler(x)); + return(result); } realtype N_VMaxNormLocal(N_Vector x) { - return((realtype) x->ops->nvmaxnormlocal(x)); + realtype result; + SUNDIALS_MARK_FUNCTION_BEGIN(getSUNProfiler(x)); + result = ((realtype) x->ops->nvmaxnormlocal(x)); + SUNDIALS_MARK_FUNCTION_END(getSUNProfiler(x)); + return(result); } realtype N_VMinLocal(N_Vector x) { - return((realtype) x->ops->nvminlocal(x)); + realtype result; + SUNDIALS_MARK_FUNCTION_BEGIN(getSUNProfiler(x)); + result = ((realtype) x->ops->nvminlocal(x)); + SUNDIALS_MARK_FUNCTION_END(getSUNProfiler(x)); + return(result); } realtype N_VL1NormLocal(N_Vector x) { - return((realtype) x->ops->nvl1normlocal(x)); + realtype result; + SUNDIALS_MARK_FUNCTION_BEGIN(getSUNProfiler(x)); + result = ((realtype) x->ops->nvl1normlocal(x)); + SUNDIALS_MARK_FUNCTION_END(getSUNProfiler(x)); + return(result); } realtype N_VWSqrSumLocal(N_Vector x, N_Vector w) { - return((realtype) x->ops->nvwsqrsumlocal(x,w)); + realtype result; + SUNDIALS_MARK_FUNCTION_BEGIN(getSUNProfiler(x)); + result = ((realtype) x->ops->nvwsqrsumlocal(x,w)); + SUNDIALS_MARK_FUNCTION_END(getSUNProfiler(x)); + return(result); } realtype N_VWSqrSumMaskLocal(N_Vector x, N_Vector w, N_Vector id) { - return((realtype) x->ops->nvwsqrsummasklocal(x,w,id)); + realtype result; + SUNDIALS_MARK_FUNCTION_BEGIN(getSUNProfiler(x)); + result = ((realtype) x->ops->nvwsqrsummasklocal(x,w,id)); + SUNDIALS_MARK_FUNCTION_END(getSUNProfiler(x)); + return(result); } booleantype N_VInvTestLocal(N_Vector x, N_Vector z) { - return((booleantype) z->ops->nvinvtestlocal(x,z)); + booleantype result; + SUNDIALS_MARK_FUNCTION_BEGIN(getSUNProfiler(x)); + result = ((booleantype) z->ops->nvinvtestlocal(x,z)); + SUNDIALS_MARK_FUNCTION_END(getSUNProfiler(x)); + return(result); } booleantype N_VConstrMaskLocal(N_Vector c, N_Vector x, N_Vector m) { - return((booleantype) x->ops->nvconstrmasklocal(c,x,m)); + booleantype result; + SUNDIALS_MARK_FUNCTION_BEGIN(getSUNProfiler(x)); + result = ((booleantype) x->ops->nvconstrmasklocal(c,x,m)); + SUNDIALS_MARK_FUNCTION_END(getSUNProfiler(x)); + return(result); } realtype N_VMinQuotientLocal(N_Vector num, N_Vector denom) { - return((realtype) num->ops->nvminquotientlocal(num,denom)); + realtype result; + SUNDIALS_MARK_FUNCTION_BEGIN(getSUNProfiler(num)); + result = ((realtype) num->ops->nvminquotientlocal(num,denom)); + SUNDIALS_MARK_FUNCTION_END(getSUNProfiler(num)); + return(result); +} + +/* ------------------------------------------- + * OPTIONAL single buffer reduction operations + * -------------------------------------------*/ + +int N_VDotProdMultiLocal(int nvec, N_Vector x, N_Vector* Y, realtype* dotprods) +{ + int i; + SUNDIALS_MARK_FUNCTION_BEGIN(getSUNProfiler(x)); + + if (x->ops->nvdotprodmultilocal) + return((int) x->ops->nvdotprodmultilocal(nvec, x, Y, dotprods)); + + if (x->ops->nvdotprodlocal) { + for (i = 0; i < nvec; i++) { + dotprods[i] = x->ops->nvdotprodlocal(x, Y[i]); + } + return(0); + } + + SUNDIALS_MARK_FUNCTION_END(getSUNProfiler(x)); + return(-1); +} + +int N_VDotProdMultiAllReduce(int nvec, N_Vector x, realtype* sum) +{ + SUNDIALS_MARK_FUNCTION_BEGIN(getSUNProfiler(x)); + if (x->ops->nvdotprodmultiallreduce) + return(x->ops->nvdotprodmultiallreduce(nvec, x, sum)); + SUNDIALS_MARK_FUNCTION_END(getSUNProfiler(x)); + return(-1); +} + +/* ------------------------------------ + * OPTIONAL XBraid interface operations + * ------------------------------------*/ + +int N_VBufSize(N_Vector x, sunindextype *size) +{ + int ier; + SUNDIALS_MARK_FUNCTION_BEGIN(getSUNProfiler(x)); + if (x->ops->nvbufsize == NULL) + ier = -1; + else + ier = x->ops->nvbufsize(x, size); + SUNDIALS_MARK_FUNCTION_END(getSUNProfiler(x)); + return(ier); +} + +int N_VBufPack(N_Vector x, void *buf) +{ + int ier; + SUNDIALS_MARK_FUNCTION_BEGIN(getSUNProfiler(x)); + if (x->ops->nvbufpack == NULL) + ier = -1; + else + ier = x->ops->nvbufpack(x, buf); + SUNDIALS_MARK_FUNCTION_END(getSUNProfiler(x)); + return(ier); +} + +int N_VBufUnpack(N_Vector x, void *buf) +{ + int ier; + SUNDIALS_MARK_FUNCTION_BEGIN(getSUNProfiler(x)); + if (x->ops->nvbufunpack == NULL) + ier = -1; + else + ier = x->ops->nvbufunpack(x, buf); + SUNDIALS_MARK_FUNCTION_END(getSUNProfiler(x)); + return(ier); } /* ----------------------------------------------------------------- @@ -721,7 +1008,7 @@ N_Vector* N_VCloneVectorArray(int count, N_Vector w) if (count <= 0) return(NULL); vs = (N_Vector* ) malloc(count * sizeof(N_Vector)); - if(vs == NULL) return(NULL); + if (vs == NULL) return(NULL); for (j = 0; j < count; j++) { vs[j] = N_VClone(w); @@ -738,9 +1025,12 @@ void N_VDestroyVectorArray(N_Vector* vs, int count) { int j; - if (vs==NULL) return; + if (vs == NULL) return; - for (j = 0; j < count; j++) N_VDestroy(vs[j]); + for (j = 0; j < count; j++) { + N_VDestroy(vs[j]); + vs[j] = NULL; + } free(vs); vs = NULL; @@ -761,3 +1051,33 @@ void N_VSetVecAtIndexVectorArray(N_Vector* vs, int index, N_Vector w) else if (index < 0) return; else vs[index] = w; } + + +/* ----------------------------------------------------------------- + * Debugging functions + * ----------------------------------------------------------------- */ + +void N_VPrint(N_Vector v) +{ + if (v == NULL) { + printf("NULL Vector\n"); + } else if (v->ops->nvprint == NULL) { + printf("NULL Print Op\n"); + } else { + v->ops->nvprint(v); + } +} + + +void N_VPrintFile(N_Vector v, FILE* outfile) +{ + if (outfile != NULL) { + if (v == NULL) { + fprintf(outfile, "NULL Vector\n"); + } else if (v->ops->nvprintfile == NULL) { + fprintf(outfile, "NULL PrintFile Op\n"); + } else { + v->ops->nvprintfile(v, outfile); + } + } +} diff --git a/src/lib/sundials/sundials_nvector_senswrapper.c b/src/lib/sundials/sundials_nvector_senswrapper.c index b16f401..8cdb6b9 100644 --- a/src/lib/sundials/sundials_nvector_senswrapper.c +++ b/src/lib/sundials/sundials_nvector_senswrapper.c @@ -2,7 +2,7 @@ * Programmer(s): David J. Gardner @ LLNL * ----------------------------------------------------------------------------- * SUNDIALS Copyright Start - * Copyright (c) 2002-2019, Lawrence Livermore National Security + * Copyright (c) 2002-2022, Lawrence Livermore National Security * and Southern Methodist University. * All rights reserved. * @@ -32,87 +32,64 @@ /*------------------------------------------------------------------------------ create a new empty vector wrapper with space for vectors ----------------------------------------------------------------------------*/ -N_Vector N_VNewEmpty_SensWrapper(int nvecs) +N_Vector N_VNewEmpty_SensWrapper(int nvecs, SUNContext sunctx) { int i; N_Vector v; - N_Vector_Ops ops; N_VectorContent_SensWrapper content; /* return if wrapper is empty */ if (nvecs < 1) return(NULL); - /* create vector */ + /* Create an empty vector object */ v = NULL; - v = (N_Vector) malloc(sizeof *v); + v = N_VNewEmpty(sunctx); if (v == NULL) return(NULL); - /* create vector operation structure */ - ops = NULL; - ops = (N_Vector_Ops) malloc(sizeof *ops); - if (ops == NULL) {free(v); return(NULL);} + /* Attach operations */ - ops->nvgetvectorid = NULL; - ops->nvclone = N_VClone_SensWrapper; - ops->nvcloneempty = N_VCloneEmpty_SensWrapper; - ops->nvdestroy = N_VDestroy_SensWrapper; - ops->nvspace = NULL; - ops->nvgetarraypointer = NULL; - ops->nvsetarraypointer = NULL; + v->ops->nvclone = N_VClone_SensWrapper; + v->ops->nvcloneempty = N_VCloneEmpty_SensWrapper; + v->ops->nvdestroy = N_VDestroy_SensWrapper; /* standard vector operations */ - ops->nvlinearsum = N_VLinearSum_SensWrapper; - ops->nvconst = N_VConst_SensWrapper; - ops->nvprod = N_VProd_SensWrapper; - ops->nvdiv = N_VDiv_SensWrapper; - ops->nvscale = N_VScale_SensWrapper; - ops->nvabs = N_VAbs_SensWrapper; - ops->nvinv = N_VInv_SensWrapper; - ops->nvaddconst = N_VAddConst_SensWrapper; - ops->nvdotprod = N_VDotProd_SensWrapper; - ops->nvmaxnorm = N_VMaxNorm_SensWrapper; - ops->nvwrmsnormmask = N_VWrmsNormMask_SensWrapper; - ops->nvwrmsnorm = N_VWrmsNorm_SensWrapper; - ops->nvmin = N_VMin_SensWrapper; - ops->nvwl2norm = N_VWL2Norm_SensWrapper; - ops->nvl1norm = N_VL1Norm_SensWrapper; - ops->nvcompare = N_VCompare_SensWrapper; - ops->nvinvtest = N_VInvTest_SensWrapper; - ops->nvconstrmask = N_VConstrMask_SensWrapper; - ops->nvminquotient = N_VMinQuotient_SensWrapper; - - /* fused vector operations */ - ops->nvlinearcombination = NULL; - ops->nvscaleaddmulti = NULL; - ops->nvdotprodmulti = NULL; - - /* vector array operations */ - ops->nvlinearsumvectorarray = NULL; - ops->nvscalevectorarray = NULL; - ops->nvconstvectorarray = NULL; - ops->nvwrmsnormvectorarray = NULL; - ops->nvwrmsnormmaskvectorarray = NULL; - ops->nvscaleaddmultivectorarray = NULL; - ops->nvlinearcombinationvectorarray = NULL; + v->ops->nvlinearsum = N_VLinearSum_SensWrapper; + v->ops->nvconst = N_VConst_SensWrapper; + v->ops->nvprod = N_VProd_SensWrapper; + v->ops->nvdiv = N_VDiv_SensWrapper; + v->ops->nvscale = N_VScale_SensWrapper; + v->ops->nvabs = N_VAbs_SensWrapper; + v->ops->nvinv = N_VInv_SensWrapper; + v->ops->nvaddconst = N_VAddConst_SensWrapper; + v->ops->nvdotprod = N_VDotProd_SensWrapper; + v->ops->nvmaxnorm = N_VMaxNorm_SensWrapper; + v->ops->nvwrmsnormmask = N_VWrmsNormMask_SensWrapper; + v->ops->nvwrmsnorm = N_VWrmsNorm_SensWrapper; + v->ops->nvmin = N_VMin_SensWrapper; + v->ops->nvwl2norm = N_VWL2Norm_SensWrapper; + v->ops->nvl1norm = N_VL1Norm_SensWrapper; + v->ops->nvcompare = N_VCompare_SensWrapper; + v->ops->nvinvtest = N_VInvTest_SensWrapper; + v->ops->nvconstrmask = N_VConstrMask_SensWrapper; + v->ops->nvminquotient = N_VMinQuotient_SensWrapper; /* create content */ content = NULL; content = (N_VectorContent_SensWrapper) malloc(sizeof *content); - if (content == NULL) {free(ops); free(v); return(NULL);} + if (content == NULL) { N_VFreeEmpty(v); return(NULL); } content->nvecs = nvecs; content->own_vecs = SUNFALSE; content->vecs = NULL; content->vecs = (N_Vector*) malloc(nvecs * sizeof(N_Vector)); - if (content->vecs == NULL) {free(ops); free(v); free(content); return(NULL);} + if (content->vecs == NULL) { free(content); N_VFreeEmpty(v); return(NULL); } /* initialize vector array to null */ for (i=0; i < nvecs; i++) content->vecs[i] = NULL; - /* attach content and ops */ + /* attach content */ v->content = content; - v->ops = ops; return(v); } @@ -124,7 +101,7 @@ N_Vector N_VNew_SensWrapper(int count, N_Vector w) int i; v = NULL; - v = N_VNewEmpty_SensWrapper(count); + v = N_VNewEmpty_SensWrapper(count, w->sunctx); if (v == NULL) return(NULL); for (i=0; i < NV_NVECS_SW(v); i++) { @@ -135,6 +112,9 @@ N_Vector N_VNew_SensWrapper(int count, N_Vector w) /* update own vectors status */ NV_OWN_VECS_SW(v) = SUNTRUE; + /* set context */ + v->sunctx = w->sunctx; + return(v); } diff --git a/src/lib/sundials/sundials_profiler.c b/src/lib/sundials/sundials_profiler.c new file mode 100644 index 0000000..1345226 --- /dev/null +++ b/src/lib/sundials/sundials_profiler.c @@ -0,0 +1,517 @@ +/* ----------------------------------------------------------------- + * Programmer: Cody J. Balos @ LLNL + * ----------------------------------------------------------------- + * SUNDIALS Copyright Start + * Copyright (c) 2002-2022, Lawrence Livermore National Security + * and Southern Methodist University. + * All rights reserved. + * + * See the top-level LICENSE and NOTICE files for details. + * + * SPDX-License-Identifier: BSD-3-Clause + * SUNDIALS Copyright End + * -----------------------------------------------------------------*/ + +#include + +#if SUNDIALS_MPI_ENABLED +#include +#include +#elif defined(SUNDIALS_HAVE_POSIX_TIMERS) +/* Minimum POSIX version needed for struct timespec and clock_monotonic */ +#if !defined(_POSIX_C_SOURCE) || (_POSIX_C_SOURCE < 199309L) +#define _POSIX_C_SOURCE 199309L +#endif +#include +#include +#include +#else +#error Either MPI_Wtime or clock_getttime is required but neither were found +#endif + +#include +#include + +#include +#include +#include "sundials_hashmap.h" +#include "sundials_debug.h" + +#define SUNDIALS_ROOT_TIMER ((const char*) "From profiler epoch") + +/* Private functions */ +#if SUNDIALS_MPI_ENABLED +static int sunCollectTimers(SUNProfiler p); +#endif +static void sunPrintTimers(int idx, SUNHashMapKeyValue kv, FILE* fp, void* pvoid); +static int sunCompareTimes(const void* l, const void* r); + +/* + sunTimerStruct. + A private structure holding timing information. + */ + +struct _sunTimerStruct +{ +#if SUNDIALS_MPI_ENABLED + double tic; + double toc; +#else + struct timespec* tic; + struct timespec* toc; +#endif + double average; + double maximum; + double elapsed; + long count; +}; + +typedef struct _sunTimerStruct sunTimerStruct; + +static sunTimerStruct* sunTimerStructNew(void) +{ + sunTimerStruct* ts = (sunTimerStruct*) malloc(sizeof(sunTimerStruct)); +#if SUNDIALS_MPI_ENABLED + ts->tic = 0.0; + ts->toc = 0.0; +#else + ts->tic = (struct timespec *) malloc(sizeof(struct timespec)); + ts->toc = (struct timespec *) malloc(sizeof(struct timespec)); + ts->tic->tv_sec = 0; + ts->tic->tv_nsec = 0; +#endif + ts->elapsed = 0.0; + ts->average = 0.0; + ts->maximum = 0.0; + ts->count = 0; + return ts; +} + +static void sunTimerStructFree(void* TS) +{ + sunTimerStruct* ts = (sunTimerStruct*) TS; + if (ts) + { +#if !SUNDIALS_MPI_ENABLED + if (ts->tic) free(ts->tic); + if (ts->toc) free(ts->toc); +#endif + free(ts); + } +} + +static void sunStartTiming(sunTimerStruct* entry) +{ +#if SUNDIALS_MPI_ENABLED + entry->tic = MPI_Wtime(); +#else + clock_gettime(CLOCK_MONOTONIC, entry->tic); +#endif +} + +static void sunStopTiming(sunTimerStruct* entry) +{ +#if SUNDIALS_MPI_ENABLED + entry->toc = MPI_Wtime(); + entry->elapsed += entry->toc - entry->tic; +#else + clock_gettime(CLOCK_MONOTONIC, entry->toc); + entry->elapsed += + ((double) (entry->toc->tv_sec - entry->tic->tv_sec) + + (double) (entry->toc->tv_nsec - entry->tic->tv_nsec) * 1e-9); +#endif + /* Initialize to total value */ + entry->average = entry->elapsed; + entry->maximum = entry->elapsed; +} + +static void sunResetTiming(sunTimerStruct* entry) +{ +#if SUNDIALS_MPI_ENABLED + entry->tic = 0.0; + entry->toc = 0.0; +#else + entry->tic->tv_sec = 0; + entry->tic->tv_nsec = 0; + entry->toc->tv_sec = 0; + entry->toc->tv_nsec = 0; +#endif + entry->elapsed = 0.0; + entry->average = 0.0; + entry->maximum = 0.0; + entry->count = 0; +} + + +/* + SUNProfiler. + + This structure holds all of the timers in a map.s + */ + +struct _SUNProfiler +{ + void* comm; + char* title; + SUNHashMap map; + sunTimerStruct* overhead; + double sundials_time; +}; + +int SUNProfiler_Create(void* comm, const char* title, SUNProfiler* p) +{ + SUNProfiler profiler; + int max_entries; + char* max_entries_env; + + *p = profiler = (SUNProfiler) malloc(sizeof(struct _SUNProfiler)); + + if (profiler == NULL) + return(-1); + + profiler->overhead = sunTimerStructNew(); + if (profiler->overhead == NULL) + { + free(profiler); + *p = profiler = NULL; + return(-1); + } + + sunStartTiming(profiler->overhead); + + /* Check to see if max entries env variable was set, and use if it was. */ + max_entries = 2560; + max_entries_env = getenv("SUNPROFILER_MAX_ENTRIES"); + if (max_entries_env) max_entries = atoi(max_entries_env); + if (max_entries <= 0) max_entries = 2560; + + /* Create the hashmap used to store the timers */ + if (SUNHashMap_New(max_entries, &profiler->map)) + { + sunTimerStructFree((void*) profiler->overhead); + free(profiler); + *p = profiler = NULL; + return(-1); + } + + /* Attach the comm, duplicating it if MPI is used. */ +#if SUNDIALS_MPI_ENABLED + profiler->comm = NULL; + if (comm != NULL) + { + profiler->comm = malloc(sizeof(MPI_Comm)); + MPI_Comm_dup(*((MPI_Comm*) comm), (MPI_Comm*) profiler->comm); + } +#else + profiler->comm = comm; +#endif + + /* Copy the title of the profiler (note strlen does not include terminating + null character hence the +1) */ + profiler->title = malloc((strlen(title) + 1) * sizeof(char)); + strcpy(profiler->title, title); + + /* Initialize the overall timer to 0. */ + profiler->sundials_time = 0.0; + + SUNDIALS_MARK_BEGIN(profiler, SUNDIALS_ROOT_TIMER); + sunStopTiming(profiler->overhead); + + return(0); +} + +int SUNProfiler_Free(SUNProfiler* p) +{ + if (p == NULL) return(-1); + + SUNDIALS_MARK_END(*p, SUNDIALS_ROOT_TIMER); + + if (*p) + { + SUNHashMap_Destroy(&(*p)->map, sunTimerStructFree); + sunTimerStructFree((void*) (*p)->overhead); +#if SUNDIALS_MPI_ENABLED + if ((*p)->comm) + { + MPI_Comm_free((*p)->comm); + free((*p)->comm); + } +#endif + free((*p)->title); + free(*p); + } + *p = NULL; + + return(0); +} + +int SUNProfiler_Begin(SUNProfiler p, const char* name) +{ + int ier; + sunTimerStruct* timer = NULL; +#ifdef SUNDIALS_DEBUG + size_t slen; + char* errmsg; +#endif + + if (p == NULL) return(-1); + sunStartTiming(p->overhead); + + if (SUNHashMap_GetValue(p->map, name, (void**) &timer)) + { + timer = sunTimerStructNew(); + ier = SUNHashMap_Insert(p->map, name, (void*) timer); + if (ier) + { +#ifdef SUNDIALS_DEBUG + slen = strlen(name); + errmsg = malloc(slen*sizeof(char)); + snprintf(errmsg, 128+slen, "(((( [ERROR] in SUNProfilerBegin: SUNHashMapInsert failed with code %d while inserting %s))))\n", ier, name); + SUNDIALS_DEBUG_PRINT(errmsg); + free(errmsg); +#endif + sunTimerStructFree(timer); + sunStopTiming(p->overhead); + return(-1); + } + } + + timer->count++; + sunStartTiming(timer); + + sunStopTiming(p->overhead); + return(0); +} + +int SUNProfiler_End(SUNProfiler p, const char* name) +{ + sunTimerStruct* timer; + + if (p == NULL) return(-1); + sunStartTiming(p->overhead); + + if (SUNHashMap_GetValue(p->map, name, (void**) &timer)) + { + sunStopTiming(p->overhead); + return(-1); + } + + sunStopTiming(timer); + + sunStopTiming(p->overhead); + return(0); +} + +int SUNProfiler_Reset(SUNProfiler p) +{ + int i = 0; + sunTimerStruct* timer = NULL; + + /* Check for valid input */ + if (!p) return -1; + if (!(p->overhead)) return -1; + if (!(p->map)) return -1; + if (!(p->map->buckets)) return -1; + + /* Reset the overhead timer */ + sunResetTiming(p->overhead); + sunStartTiming(p->overhead); + + /* Reset all timers */ + for (i = 0; i < p->map->max_size; i++) + { + if (!(p->map->buckets[i])) continue; + timer = p->map->buckets[i]->value; + if (timer) sunResetTiming(timer); + } + + /* Reset the overall timer. */ + p->sundials_time = 0.0; + + SUNDIALS_MARK_BEGIN(p, SUNDIALS_ROOT_TIMER); + sunStopTiming(p->overhead); + + return 0; +} + +int SUNProfiler_Print(SUNProfiler p, FILE* fp) +{ + int i = 0; + int rank = 0; + sunTimerStruct* timer = NULL; + SUNHashMapKeyValue* sorted = NULL; + + if (p == NULL) return(-1); + sunStartTiming(p->overhead); + + /* Get the total SUNDIALS time up to this point */ + SUNDIALS_MARK_END(p, SUNDIALS_ROOT_TIMER); + SUNDIALS_MARK_BEGIN(p, SUNDIALS_ROOT_TIMER); + + if (SUNHashMap_GetValue(p->map, SUNDIALS_ROOT_TIMER, (void**) &timer)) + return(-1); + p->sundials_time = timer->elapsed; + +#if SUNDIALS_MPI_ENABLED + if (p->comm) + { + MPI_Comm_rank(*((MPI_Comm*) p->comm), &rank); + /* Find the max and average time across all ranks */ + sunCollectTimers(p); + } +#endif + + if (rank == 0) + { + /* Sort the timers in descending order */ + if (SUNHashMap_Sort(p->map, &sorted, sunCompareTimes)) + return(-1); + fprintf(fp, "\n================================================================================================================\n"); + fprintf(fp, "SUNDIALS GIT VERSION: %s\n", SUNDIALS_GIT_VERSION); + fprintf(fp, "SUNDIALS PROFILER: %s\n", p->title); + fprintf(fp, "%-40s\t %% time (inclusive) \t max/rank \t average/rank \t count \n", "Results:"); + fprintf(fp, "================================================================================================================\n"); + +#if SUNDIALS_MPI_ENABLED + if (p->comm == NULL) + printf("WARNING: no MPI communicator provided, times shown are for rank 0\n"); +#endif + + /* Print all the other timers out */ + for (i = 0; i < p->map->size; i++) + if (sorted[i]) sunPrintTimers(i, sorted[i], fp, (void*) p); + free(sorted); + } + + sunStopTiming(p->overhead); + + if (rank == 0) + { + /* Print out the total time and the profiler overhead */ + fprintf(fp, "%-40s\t %6.2f%% \t %.6fs \t -- \t\t -- \n", "Est. profiler overhead", + p->overhead->elapsed/p->sundials_time, + p->overhead->elapsed); + + /* End of output */ + fprintf(fp, "\n"); + } + + return(0); +} + +#if SUNDIALS_MPI_ENABLED +static void sunTimerStructReduceMaxAndSum(void* a, void* b, int* len, MPI_Datatype* dType) +{ + sunTimerStruct* a_ts = (sunTimerStruct*) a; + sunTimerStruct* b_ts = (sunTimerStruct*) b; + int i; + for (i = 0; i < *len; ++i) { + b_ts[i].average += a_ts[i].elapsed; + b_ts[i].maximum = SUNMAX(a_ts[i].maximum, b_ts[i].maximum); + } +} + +/* Find the max and average time across all ranks */ +int sunCollectTimers(SUNProfiler p) +{ + int i, rank, nranks; + + MPI_Comm comm = *((MPI_Comm*) p->comm); + MPI_Comm_rank(comm, &rank); + MPI_Comm_size(comm, &nranks); + + sunTimerStruct** values = NULL; + + /* Extract the elapsed times from the hash map */ + SUNHashMap_Values(p->map, (void***) &values, sizeof(sunTimerStruct)); + sunTimerStruct* reduced = (sunTimerStruct*) malloc(p->map->size*sizeof(sunTimerStruct)); + for (i = 0; i < p->map->size; ++i) + reduced[i] = *values[i]; + + /* Register MPI datatype for sunTimerStruct */ + MPI_Datatype tmp_type, MPI_sunTimerStruct; + const int block_lens[2] = { 5, 1 }; + const MPI_Datatype types[2] = { MPI_DOUBLE, MPI_LONG }; + const MPI_Aint displ[2] = { offsetof(sunTimerStruct, tic), + offsetof(sunTimerStruct, count) }; + MPI_Aint lb, extent; + + MPI_Type_create_struct(2, block_lens, displ, types, &tmp_type); + MPI_Type_get_extent(tmp_type, &lb, &extent); + extent = sizeof(sunTimerStruct); + MPI_Type_create_resized(tmp_type, lb, extent, &MPI_sunTimerStruct); + MPI_Type_commit(&MPI_sunTimerStruct); + + /* Register max and sum MPI reduction operations for our datatype */ + MPI_Op MPI_sunTimerStruct_MAXANDSUM; + MPI_Op_create(sunTimerStructReduceMaxAndSum, 1, &MPI_sunTimerStruct_MAXANDSUM); + + /* Compute max and average time across all ranks */ + if (rank == 0) + { + MPI_Reduce(MPI_IN_PLACE, reduced, p->map->size, MPI_sunTimerStruct, + MPI_sunTimerStruct_MAXANDSUM, 0, comm); + } + else + { + MPI_Reduce(reduced, reduced, p->map->size, MPI_sunTimerStruct, + MPI_sunTimerStruct_MAXANDSUM, 0, comm); + } + + /* Cleanup custom MPI datatype and operations */ + MPI_Type_free(&tmp_type); + MPI_Type_free(&MPI_sunTimerStruct); + MPI_Op_free(&MPI_sunTimerStruct_MAXANDSUM); + + /* Update the values that are in this rank's hash map. */ + for (i = 0; i < p->map->size; ++i) { + values[i]->average = reduced[i].average / (realtype) nranks; + values[i]->maximum = reduced[i].maximum; + } + + free(reduced); + free(values); + + return(0); +} +#endif + +/* Print out the: timer name, percentage of exec time (based on the max), + max across ranks, average across ranks, and the timer counter. */ +void sunPrintTimers(int idx, SUNHashMapKeyValue kv, FILE* fp, void* pvoid) +{ + SUNProfiler p = (SUNProfiler) pvoid; + sunTimerStruct* ts = (sunTimerStruct*) kv->value; + double maximum = ts->maximum; + double average = ts->average; + double percent = strcmp((const char*) kv->key, (const char*) SUNDIALS_ROOT_TIMER) ? maximum / p->sundials_time * 100 : 100; + fprintf(fp, "%-40s\t %6.2f%% \t %.6fs \t %.6fs \t %ld\n", + kv->key, percent, maximum, average, ts->count); +} + +/* Comparator for qsort that compares key-value pairs + based on the maximum time in the sunTimerStruct. */ +int sunCompareTimes(const void* l, const void* r) +{ + double left_max; + double right_max; + + const SUNHashMapKeyValue left = *((SUNHashMapKeyValue*) l); + const SUNHashMapKeyValue right = *((SUNHashMapKeyValue*) r); + + if (left == NULL && right == NULL) + return(0); + if (left == NULL) + return(1); + if (right == NULL) + return(-1); + + left_max = ((sunTimerStruct*) left->value)->maximum; + right_max = ((sunTimerStruct*) right->value)->maximum; + + if (left_max < right_max) + return(1); + if (left_max > right_max) + return(-1); + return(0); +} diff --git a/src/lib/sundials/sundials_reductions.hpp b/src/lib/sundials/sundials_reductions.hpp new file mode 100644 index 0000000..26c0687 --- /dev/null +++ b/src/lib/sundials/sundials_reductions.hpp @@ -0,0 +1,84 @@ +/* + * ----------------------------------------------------------------- + * Programmer(s): Cody J. Balos @ LLNL + * ----------------------------------------------------------------- + * SUNDIALS Copyright Start + * Copyright (c) 2002-2022, Lawrence Livermore National Security + * and Southern Methodist University. + * All rights reserved. + * + * See the top-level LICENSE and NOTICE files for details. + * + * SPDX-License-Identifier: BSD-3-Clause + * SUNDIALS Copyright End + * ----------------------------------------------------------------- + */ + +/* NOTE: SUNDIALS_HOST_DEVICE and SUNDIALS_DEVICE_INLINE must be defined + before including this file */ + +#include + +namespace sundials +{ +namespace reductions +{ +namespace impl +{ + +template +struct BinaryOperator +{ + using first_arg_type = Arg1; + using second_arg_type = Arg2; + using result_arg_type = Result; +}; + +template +struct plus : public BinaryOperator +{ + SUNDIALS_HOST_DEVICE constexpr Ret operator()(const Arg1& lhs, + const Arg2& rhs) const + { + return Ret{lhs} + rhs; + } + + static SUNDIALS_HOST_DEVICE SUNDIALS_DEVICE_INLINE constexpr Ret identity() + { + return Ret{0}; + } +}; + +template +struct maximum : public BinaryOperator +{ + SUNDIALS_HOST_DEVICE constexpr Ret operator()(const Arg1& lhs, + const Arg2& rhs) const + { + return (lhs >= rhs) ? lhs : rhs; + } + + static SUNDIALS_HOST_DEVICE SUNDIALS_DEVICE_INLINE constexpr Ret identity() + { + return std::numeric_limits::lowest(); + } +}; + +template +struct minimum : public BinaryOperator +{ + SUNDIALS_HOST_DEVICE constexpr Ret operator()(const Arg1& lhs, + const Arg2& rhs) const + { + return (rhs < lhs) ? rhs : lhs; + } + + static SUNDIALS_HOST_DEVICE SUNDIALS_DEVICE_INLINE constexpr Ret identity() + { + return std::numeric_limits::max(); + } +}; + +} // impl +} // reductions +} // sundials diff --git a/src/lib/sundials/sundials_sycl.h b/src/lib/sundials/sundials_sycl.h new file mode 100644 index 0000000..1b7f982 --- /dev/null +++ b/src/lib/sundials/sundials_sycl.h @@ -0,0 +1,73 @@ +/* --------------------------------------------------------------------------- + * Programmer(s): David J. Gardner @ LLNL + * --------------------------------------------------------------------------- + * SUNDIALS Copyright Start + * Copyright (c) 2002-2022, Lawrence Livermore National Security + * and Southern Methodist University. + * All rights reserved. + * + * See the top-level LICENSE and NOTICE files for details. + * + * SPDX-License-Identifier: BSD-3-Clause + * SUNDIALS Copyright End + * --------------------------------------------------------------------------- + * This header files defines internal utility functions and macros for working + * with SYCL. + * ---------------------------------------------------------------------------*/ + +#include +#include + +#ifndef _SUNDIALS_SYCL_H +#define _SUNDIALS_SYCL_H + +#ifdef __cplusplus /* wrapper to enable C++ usage */ +extern "C" { +#endif + +/* Get the maximum work group size (block size) for a queue */ +#define SYCL_BLOCKDIM(q) (q->get_device().get_info<::sycl::info::device::max_work_group_size>()) + +/* Grid (work group) stride loop */ +#define GRID_STRIDE_XLOOP(item, iter, max) \ + for (sunindextype iter = item.get_global_id(0); \ + iter < max; \ + iter += item.get_global_range(0)) + +/* Sycl parallel for loop */ +#define SYCL_FOR(q, total, block, item, loop) \ + q->submit([&](::sycl::handler& h) { \ + h.parallel_for(::sycl::nd_range<1>{total,block}, \ + [=](::sycl::nd_item<1> item) \ + { loop }); }); + +/* Sycl parallel for loop with stream for ouput */ +#define SYCL_FOR_DEBUG(q, total, block, item, loop) \ + q->submit([&](::sycl::handler& h) { \ + ::sycl::stream out(1024, 256, h); \ + h.parallel_for(::sycl::nd_range<1>{total,block}, \ + [=](::sycl::nd_item<1> item) \ + { loop }); }); + +/* Sycl parallel for loop with reduction */ +#define SYCL_FOR_REDUCE(q, total, block, item, rvar, rop, loop) \ + q->submit([&](::sycl::handler& h) { \ + h.parallel_for(::sycl::nd_range<1>{total,block}, \ + ::sycl::reduction(rvar, rop), \ + [=](::sycl::nd_item<1> item, auto& rvar) \ + { loop }); }); + +/* Sycl parallel for loop with reduction and stream for ouput */ +#define SYCL_FOR_REDUCE_DEBUG(q, total, block, item, rvar, rop, loop) \ + q->submit([&](::sycl::handler& h) { \ + ::sycl::stream out(1024, 256, h); \ + h.parallel_for(::sycl::nd_range<1>{total,block}, \ + ::sycl::reduction(rvar, rop), \ + [=](::sycl::nd_item<1> item, auto& rvar) \ + { loop }); }); + +#ifdef __cplusplus /* wrapper to enable C++ usage */ +} +#endif + +#endif /* _SUNDIALS_SYCL_H */ diff --git a/src/lib/sundials/sundials_utils.h b/src/lib/sundials/sundials_utils.h new file mode 100644 index 0000000..2b1a217 --- /dev/null +++ b/src/lib/sundials/sundials_utils.h @@ -0,0 +1,88 @@ +/* ----------------------------------------------------------------- + * Programmer(s): Cody J. Balos @ LLNL + * ----------------------------------------------------------------- + * SUNDIALS Copyright Start + * Copyright (c) 2002-2022, Lawrence Livermore National Security + * and Southern Methodist University. + * All rights reserved. + * + * See the top-level LICENSE and NOTICE files for details. + * + * SPDX-License-Identifier: BSD-3-Clause + * SUNDIALS Copyright End + * ----------------------------------------------------------------- + * This header file contains common utility functions. + * -----------------------------------------------------------------*/ + +#ifndef _SUNDIALS_UTILS_H +#define _SUNDIALS_UTILS_H + +#include +#include +#include + +static int sunvsnprintf(char* buffer, size_t bufsz, const char* format, va_list vlist) +{ + int size = 0; +#ifdef SUNDIALS_C_COMPILER_HAS_SNPRINTF_AND_VA_COPY + va_list tmp; + va_copy(tmp, vlist); + size = vsnprintf(buffer, bufsz, format, tmp); + va_end(tmp); +#else + size = SUNDIALS_MAX_SPRINTF_SIZE; + if ((int) strlen(format) > size) + { + /* buffer is definitely not big enough */ + size = -1; + } + else if (buffer != NULL) + { + vsprintf(buffer, format, vlist); + } +#endif +return size; +} + + +static int sunsnprintf(char* buffer, size_t bufsz, const char* format, ...) +{ + int size = 0; + va_list args; + va_start(args, format); + size = sunvsnprintf(buffer, bufsz, format, args); + va_end(args); + return size; +} + +/* + * Implementation of the GNU extension function vasprintf which + * is itself an analog for vsprintf, except it allocates a string + * large enough to hold the output byte ('\0'). + */ +static int sunvasnprintf(char** str, const char* fmt, va_list args) +{ + int size = 0; + + /* compute string length */ + size = sunvsnprintf(NULL, 0, fmt, args); + + if (size < 0) + { + return -1; + } + + /* add one to size for the null terminator*/ + *str = (char*) malloc(size + 1); + if (NULL == *str) + { + return -1; + } + + size = vsprintf(*str, fmt, args); + + return size; +} + + +#endif /* _SUNDIALS_UTILS_H */ diff --git a/src/lib/sundials/sundials_version.c b/src/lib/sundials/sundials_version.c index 5192f43..e77b895 100644 --- a/src/lib/sundials/sundials_version.c +++ b/src/lib/sundials/sundials_version.c @@ -1,8 +1,8 @@ /* ----------------------------------------------------------------- - * Programmer(s): David J. Gardner @ LLNL + * Programmer(s): David J. Gardner @ LLNL * ----------------------------------------------------------------- * SUNDIALS Copyright Start - * Copyright (c) 2002-2019, Lawrence Livermore National Security + * Copyright (c) 2002-2022, Lawrence Livermore National Security * and Southern Methodist University. * All rights reserved. * @@ -19,30 +19,36 @@ #include +/* note strlen does not include terminating null character hence the + use of >= when checking len below and strncpy copies up to len + characters including the terminating null character */ + /* fill string with SUNDIALS version information */ int SUNDIALSGetVersion(char *version, int len) { - if (strlen(SUNDIALS_VERSION) > len) { - return(-1); - } - - strncpy(version, SUNDIALS_VERSION, len); + if (version == NULL) return(-1); + if (strlen(SUNDIALS_VERSION) >= (size_t)len) return(-1); + + strncpy(version, SUNDIALS_VERSION, (size_t)len); + return(0); } -/* fill integers with SUNDIALS major, minor, and patch release +/* fill integers with SUNDIALS major, minor, and patch release numbers and fill a string with the release label */ -int SUNDIALSGetVersionNumber(int *major, int *minor, int *patch, +int SUNDIALSGetVersionNumber(int *major, int *minor, int *patch, char *label, int len) { - if (strlen(SUNDIALS_VERSION_LABEL) > len) { - return(-1); - } - + if (major == NULL || + minor == NULL || + patch == NULL || + label == NULL) return(-1); + if (strlen(SUNDIALS_VERSION_LABEL) >= (size_t)len) return(-1); + *major = SUNDIALS_VERSION_MAJOR; *minor = SUNDIALS_VERSION_MINOR; *patch = SUNDIALS_VERSION_PATCH; - strncpy(label, SUNDIALS_VERSION_LABEL, len); + strncpy(label, SUNDIALS_VERSION_LABEL, (size_t)len); return(0); } diff --git a/src/lib/sunlinsol/band/sunlinsol_band.c b/src/lib/sunlinsol/band/sunlinsol_band.c new file mode 100644 index 0000000..f570587 --- /dev/null +++ b/src/lib/sunlinsol/band/sunlinsol_band.c @@ -0,0 +1,241 @@ +/* ----------------------------------------------------------------- + * Programmer(s): Daniel Reynolds @ SMU + * ----------------------------------------------------------------- + * SUNDIALS Copyright Start + * Copyright (c) 2002-2022, Lawrence Livermore National Security + * and Southern Methodist University. + * All rights reserved. + * + * See the top-level LICENSE and NOTICE files for details. + * + * SPDX-License-Identifier: BSD-3-Clause + * SUNDIALS Copyright End + * ----------------------------------------------------------------- + * This is the implementation file for the band implementation of + * the SUNLINSOL package. + * -----------------------------------------------------------------*/ + +#include +#include + +#include +#include + +#define ZERO RCONST(0.0) +#define ONE RCONST(1.0) +#define ROW(i,j,smu) (i-j+smu) + +/* + * ----------------------------------------------------------------- + * Band solver structure accessibility macros: + * ----------------------------------------------------------------- + */ + +#define BAND_CONTENT(S) ( (SUNLinearSolverContent_Band)(S->content) ) +#define PIVOTS(S) ( BAND_CONTENT(S)->pivots ) +#define LASTFLAG(S) ( BAND_CONTENT(S)->last_flag ) + +/* + * ----------------------------------------------------------------- + * exported functions + * ----------------------------------------------------------------- + */ + +/* ---------------------------------------------------------------------------- + * Function to create a new band linear solver + */ + +SUNLinearSolver SUNLinSol_Band(N_Vector y, SUNMatrix A, SUNContext sunctx) +{ + SUNLinearSolver S; + SUNLinearSolverContent_Band content; + sunindextype MatrixRows; + + /* Check compatibility with supplied SUNMatrix and N_Vector */ + if (SUNMatGetID(A) != SUNMATRIX_BAND) return(NULL); + + if (SUNBandMatrix_Rows(A) != SUNBandMatrix_Columns(A)) return(NULL); + + if ( (N_VGetVectorID(y) != SUNDIALS_NVEC_SERIAL) && + (N_VGetVectorID(y) != SUNDIALS_NVEC_OPENMP) && + (N_VGetVectorID(y) != SUNDIALS_NVEC_PTHREADS) ) + return(NULL); + + /* Check that A has appropriate storage upper bandwidth for factorization */ + MatrixRows = SUNBandMatrix_Rows(A); + + if (SUNBandMatrix_StoredUpperBandwidth(A) < + SUNMIN(MatrixRows-1, + SUNBandMatrix_LowerBandwidth(A)+SUNBandMatrix_UpperBandwidth(A))) + return(NULL); + + if (MatrixRows != N_VGetLength(y)) return(NULL); + + /* Create an empty linear solver */ + S = NULL; + S = SUNLinSolNewEmpty(sunctx); + if (S == NULL) return(NULL); + + /* Attach operations */ + S->ops->gettype = SUNLinSolGetType_Band; + S->ops->getid = SUNLinSolGetID_Band; + S->ops->initialize = SUNLinSolInitialize_Band; + S->ops->setup = SUNLinSolSetup_Band; + S->ops->solve = SUNLinSolSolve_Band; + S->ops->lastflag = SUNLinSolLastFlag_Band; + S->ops->space = SUNLinSolSpace_Band; + S->ops->free = SUNLinSolFree_Band; + + /* Create content */ + content = NULL; + content = (SUNLinearSolverContent_Band) malloc(sizeof *content); + if (content == NULL) { SUNLinSolFree(S); return(NULL); } + + /* Attach content */ + S->content = content; + + /* Fill content */ + content->N = MatrixRows; + content->last_flag = 0; + content->pivots = NULL; + + /* Allocate content */ + content->pivots = (sunindextype *) malloc(MatrixRows * sizeof(sunindextype)); + if (content->pivots == NULL) { SUNLinSolFree(S); return(NULL); } + + return(S); +} + +/* + * ----------------------------------------------------------------- + * implementation of linear solver operations + * ----------------------------------------------------------------- + */ + +SUNLinearSolver_Type SUNLinSolGetType_Band(SUNLinearSolver S) +{ + return(SUNLINEARSOLVER_DIRECT); +} + +SUNLinearSolver_ID SUNLinSolGetID_Band(SUNLinearSolver S) +{ + return(SUNLINEARSOLVER_BAND); +} + +int SUNLinSolInitialize_Band(SUNLinearSolver S) +{ + /* all solver-specific memory has already been allocated */ + LASTFLAG(S) = SUNLS_SUCCESS; + return(SUNLS_SUCCESS); +} + +int SUNLinSolSetup_Band(SUNLinearSolver S, SUNMatrix A) +{ + realtype **A_cols; + sunindextype *pivots; + + /* check for valid inputs */ + if ( (A == NULL) || (S == NULL) ) + return(SUNLS_MEM_NULL); + + /* Ensure that A is a band matrix */ + if (SUNMatGetID(A) != SUNMATRIX_BAND) { + LASTFLAG(S) = SUNLS_ILL_INPUT; + return(SUNLS_ILL_INPUT); + } + + /* access data pointers (return with failure on NULL) */ + A_cols = NULL; + pivots = NULL; + A_cols = SM_COLS_B(A); + pivots = PIVOTS(S); + if ( (A_cols == NULL) || (pivots == NULL) ) { + LASTFLAG(S) = SUNLS_MEM_FAIL; + return(SUNLS_MEM_FAIL); + } + + /* ensure that storage upper bandwidth is sufficient for fill-in */ + if (SM_SUBAND_B(A) < SUNMIN(SM_COLUMNS_B(A)-1, SM_UBAND_B(A) + SM_LBAND_B(A))) { + LASTFLAG(S) = SUNLS_MEM_FAIL; + return(SUNLS_MEM_FAIL); + } + + /* perform LU factorization of input matrix */ + LASTFLAG(S) = SUNDlsMat_bandGBTRF(A_cols, SM_COLUMNS_B(A), SM_UBAND_B(A), + SM_LBAND_B(A), SM_SUBAND_B(A), pivots); + + /* store error flag (if nonzero, that row encountered zero-valued pivod) */ + if (LASTFLAG(S) > 0) + return(SUNLS_LUFACT_FAIL); + return(SUNLS_SUCCESS); +} + +int SUNLinSolSolve_Band(SUNLinearSolver S, SUNMatrix A, N_Vector x, + N_Vector b, realtype tol) +{ + realtype **A_cols, *xdata; + sunindextype *pivots; + + /* check for valid inputs */ + if ( (A == NULL) || (S == NULL) || (x == NULL) || (b == NULL) ) + return(SUNLS_MEM_NULL); + + /* copy b into x */ + N_VScale(ONE, b, x); + + /* access data pointers (return with failure on NULL) */ + A_cols = NULL; + xdata = NULL; + pivots = NULL; + A_cols = SUNBandMatrix_Cols(A); + xdata = N_VGetArrayPointer(x); + pivots = PIVOTS(S); + if ( (A_cols == NULL) || (xdata == NULL) || (pivots == NULL) ) { + LASTFLAG(S) = SUNLS_MEM_FAIL; + return(SUNLS_MEM_FAIL); + } + + /* solve using LU factors */ + SUNDlsMat_bandGBTRS(A_cols, SM_COLUMNS_B(A), SM_SUBAND_B(A), + SM_LBAND_B(A), pivots, xdata); + LASTFLAG(S) = SUNLS_SUCCESS; + return(SUNLS_SUCCESS); +} + +sunindextype SUNLinSolLastFlag_Band(SUNLinearSolver S) +{ + /* return the stored 'last_flag' value */ + if (S == NULL) return(-1); + return(LASTFLAG(S)); +} + +int SUNLinSolSpace_Band(SUNLinearSolver S, + long int *lenrwLS, + long int *leniwLS) +{ + *leniwLS = 2 + BAND_CONTENT(S)->N; + *lenrwLS = 0; + return(SUNLS_SUCCESS); +} + +int SUNLinSolFree_Band(SUNLinearSolver S) +{ + /* return if S is already free */ + if (S == NULL) return(SUNLS_SUCCESS); + + /* delete items from contents, then delete generic structure */ + if (S->content) { + if (PIVOTS(S)) { + free(PIVOTS(S)); + PIVOTS(S) = NULL; + } + free(S->content); + S->content = NULL; + } + if (S->ops) { + free(S->ops); + S->ops = NULL; + } + free(S); S = NULL; + return(SUNLS_SUCCESS); +} diff --git a/src/lib/sunlinsol/cusolversp/sunlinsol_cusolversp_batchqr.cu b/src/lib/sunlinsol/cusolversp/sunlinsol_cusolversp_batchqr.cu new file mode 100644 index 0000000..c6c027c --- /dev/null +++ b/src/lib/sunlinsol/cusolversp/sunlinsol_cusolversp_batchqr.cu @@ -0,0 +1,356 @@ +/* ---------------------------------------------------------------------------- + * Programmer(s): Cody J. Balos @ LLNL + * ---------------------------------------------------------------------------- + * Based on work by Donald Wilcox @ LBNL + * ---------------------------------------------------------------------------- + * SUNDIALS Copyright Start + * Copyright (c) 2002-2022, Lawrence Livermore National Security + * and Southern Methodist University. + * All rights reserved. + * + * See the top-level LICENSE and NOTICE files for details. + * + * SPDX-License-Identifier: BSD-3-Clause + * SUNDIALS Copyright End + * ---------------------------------------------------------------------------- + * Implementation file for cuSolverSp batched QR SUNLinearSolver interface. + * ----------------------------------------------------------------------------*/ + +#include +#include + +#include +#include + +#include "sundials_cuda.h" +#include "sundials_debug.h" + +#define ZERO RCONST(0.0) +#define ONE RCONST(1.0) +#define TWO RCONST(2.0) + +/* macros for handling the different function names based on precision */ +#if defined(SUNDIALS_DOUBLE_PRECISION) +#define _cusolverSpXcsrqrBufferInfoBatched cusolverSpDcsrqrBufferInfoBatched +#define _cusolverSpXcsrqrsvBatched cusolverSpDcsrqrsvBatched +#elif defined(SUNDIALS_SINGLE_PRECISION) +#define _cusolverSpXcsrqrBufferInfoBatched cusolverSpScsrqrBufferInfoBatched +#define _cusolverSpXcsrqrsvBatched cusolverSpScsrqrsvBatched +#endif + +/* + * ----------------------------------------------------------------- + * cuSolverSp solver structure accessibility macros: + * ----------------------------------------------------------------- + */ + +#define SUN_CUSP_CONTENT(S) ( (SUNLinearSolverContent_cuSolverSp_batchQR)(S->content) ) +#define SUN_CUSP_QRWORKSPACE(S) ( SUN_CUSP_CONTENT(S)->workspace ) +#define SUN_CUSP_FIRSTFACTORIZE(S) ( SUN_CUSP_CONTENT(S)->first_factorize ) +#define SUN_CUSP_LASTFLAG(S) ( SUN_CUSP_CONTENT(S)->last_flag ) +#define SUN_CUSOL_HANDLE(S) ( SUN_CUSP_CONTENT(S)->cusolver_handle ) +#define SUN_CUSP_DESC(S) ( SUN_CUSP_CONTENT(S)->desc ) +#define SUN_CUSP_QRINFO(S) ( SUN_CUSP_CONTENT(S)->info ) +#define SUN_CUSP_INTERNAL_SIZE(S) ( SUN_CUSP_CONTENT(S)->internal_size ) +#define SUN_CUSP_WORK_SIZE(S) ( SUN_CUSP_CONTENT(S)->workspace_size ) + +/* + * ---------------------------------------------------------------------------- + * Implementations of exported functions. + * ---------------------------------------------------------------------------- + */ + +SUNLinearSolver SUNLinSol_cuSolverSp_batchQR(N_Vector y, SUNMatrix A, cusolverSpHandle_t cusol_handle, SUNContext sunctx) +{ + /* Check that required arguments are not NULL */ + if (y == NULL || A == NULL) + { + SUNDIALS_DEBUG_PRINT("ERROR in SUNLinSol_cuSolverSp_batchQR: y or A is null\n"); + return NULL; + } + + /* Check compatibility with supplied SUNMatrix and N_Vector */ + if (SUNMatGetID(A) != SUNMATRIX_CUSPARSE || y->ops->nvgetdevicearraypointer == NULL) + { + SUNDIALS_DEBUG_PRINT("ERROR in SUNLinSol_cuSolverSp_batchQR: illegal type for y or A\n"); + return NULL; + } + + /* Matrix and vector dimensions must agree */ + if (N_VGetLength(y) != SUNMatrix_cuSparse_Columns(A)) + { + SUNDIALS_DEBUG_PRINT("ERROR in SUNLinSol_cuSolverSp_batchQR: matrix and vector dimensions don't agree\n"); + return NULL; + } + + /* Create an empty linear solver */ + SUNLinearSolver S; + + S = NULL; + S = SUNLinSolNewEmpty(sunctx); + if (S == NULL) + { + return NULL; + } + + /* Attach operations */ + S->ops->gettype = SUNLinSolGetType_cuSolverSp_batchQR; + S->ops->getid = SUNLinSolGetID_cuSolverSp_batchQR; + S->ops->initialize = SUNLinSolInitialize_cuSolverSp_batchQR; + S->ops->setup = SUNLinSolSetup_cuSolverSp_batchQR; + S->ops->solve = SUNLinSolSolve_cuSolverSp_batchQR; + S->ops->lastflag = SUNLinSolLastFlag_cuSolverSp_batchQR; + S->ops->free = SUNLinSolFree_cuSolverSp_batchQR; + + /* Create content */ + SUNLinearSolverContent_cuSolverSp_batchQR content; + + content = NULL; + content = (SUNLinearSolverContent_cuSolverSp_batchQR) malloc(sizeof(*content)); + if (content == NULL) + { + SUNLinSolFree(S); + return NULL; + } + + /* Attach content */ + S->content = content; + + /* Fill content */ + content->last_flag = SUNLS_SUCCESS; + content->first_factorize = SUNTRUE; + content->internal_size = 0; + content->workspace_size = 0; + content->cusolver_handle = cusol_handle; + content->info = NULL; + content->workspace = NULL; + content->desc = NULL; + + return S; +} + +/* + * ----------------------------------------------------------------- + * Implementation of accessor and setter functions. + * ----------------------------------------------------------------- + */ + +void SUNLinSol_cuSolverSp_batchQR_GetDescription(SUNLinearSolver S, const char** desc) +{ + *desc = SUN_CUSP_DESC(S); +} + +void SUNLinSol_cuSolverSp_batchQR_SetDescription(SUNLinearSolver S, const char* desc) +{ + SUN_CUSP_DESC(S) = desc; +} + +void SUNLinSol_cuSolverSp_batchQR_GetDeviceSpace(SUNLinearSolver S, + size_t* cuSolverInternal, + size_t* cuSolverWorkspace) +{ + /* size is in bytes */ + *cuSolverInternal = SUN_CUSP_INTERNAL_SIZE(S); /* buffer for Q and R factors */ + *cuSolverWorkspace = SUN_CUSP_WORK_SIZE(S); /* numerical factorization buffer */ +} + +/* + * ----------------------------------------------------------------- + * Implementation of linear solver operations + * ----------------------------------------------------------------- + */ + +SUNLinearSolver_Type SUNLinSolGetType_cuSolverSp_batchQR(SUNLinearSolver S) +{ + return(SUNLINEARSOLVER_DIRECT); +} + +SUNLinearSolver_ID SUNLinSolGetID_cuSolverSp_batchQR(SUNLinearSolver S) +{ + return(SUNLINEARSOLVER_CUSOLVERSP_BATCHQR); +} + +int SUNLinSolInitialize_cuSolverSp_batchQR(SUNLinearSolver S) +{ + SUN_CUSP_FIRSTFACTORIZE(S) = SUNTRUE; + SUN_CUSP_LASTFLAG(S) = SUNLS_SUCCESS; + return(SUN_CUSP_LASTFLAG(S)); +} + +int SUNLinSolSetup_cuSolverSp_batchQR(SUNLinearSolver S, SUNMatrix A) +{ + int blockrows, blockcols, blocknnz, nblock; + int *d_rowptr, *d_colind; + realtype *d_data; + cusparseMatDescr_t mat_descr; + cudaError_t cuerr; + cusolverStatus_t status; + + if (SUN_CUSP_LASTFLAG(S) != SUNLS_SUCCESS) + return SUN_CUSP_LASTFLAG(S); + + if (SUN_CUSP_FIRSTFACTORIZE(S)) + { + + /* Free old workspace and symbloic analysis */ + if (SUN_CUSP_QRWORKSPACE(S)) + { + cudaFree(SUN_CUSP_QRWORKSPACE(S)); + cusolverSpDestroyCsrqrInfo(SUN_CUSP_QRINFO(S)); + } + + /* We must create a new csrqrinfo_t context every time we want to + do a symbolic analysis. Trying to reuse it results in a + CUSOLVER_STATUS_INVALID_VALUE error. */ + status = cusolverSpCreateCsrqrInfo(&SUN_CUSP_QRINFO(S)); + if (!SUNDIALS_CUSOLVER_VERIFY(status)) + { + SUN_CUSP_LASTFLAG(S) = SUNLS_PACKAGE_FAIL_UNREC; + return SUN_CUSP_LASTFLAG(S); + } + + nblock = SUNMatrix_cuSparse_NumBlocks(A); + blocknnz = SUNMatrix_cuSparse_BlockNNZ(A); + blockrows = SUNMatrix_cuSparse_BlockRows(A); + blockcols = SUNMatrix_cuSparse_BlockColumns(A); + d_data = SUNMatrix_cuSparse_Data(A); + d_rowptr = SUNMatrix_cuSparse_IndexPointers(A); + d_colind = SUNMatrix_cuSparse_IndexValues(A); + mat_descr = SUNMatrix_cuSparse_MatDescr(A); + + /* Perform symbolic analysis of sparsity structure */ + status = cusolverSpXcsrqrAnalysisBatched(SUN_CUSOL_HANDLE(S), + blockrows, + blockcols, + blocknnz, + mat_descr, + d_rowptr, + d_colind, + SUN_CUSP_QRINFO(S)); + + if (!SUNDIALS_CUSOLVER_VERIFY(status)) + { + SUN_CUSP_LASTFLAG(S) = SUNLS_PACKAGE_FAIL_UNREC; + return SUN_CUSP_LASTFLAG(S); + } + + /* Compute the workspace we will need */ + status = _cusolverSpXcsrqrBufferInfoBatched(SUN_CUSOL_HANDLE(S), + blockrows, + blockcols, + blocknnz, + mat_descr, + d_data, + d_rowptr, + d_colind, + nblock, + SUN_CUSP_QRINFO(S), + &SUN_CUSP_INTERNAL_SIZE(S), + &SUN_CUSP_WORK_SIZE(S)); + + if (!SUNDIALS_CUSOLVER_VERIFY(status)) + { + SUN_CUSP_LASTFLAG(S) = SUNLS_PACKAGE_FAIL_UNREC; + return SUN_CUSP_LASTFLAG(S); + } + + cuerr = cudaMalloc((void**) &SUN_CUSP_QRWORKSPACE(S), SUN_CUSP_WORK_SIZE(S)); + if (!SUNDIALS_CUDA_VERIFY(cuerr)) + { + SUN_CUSP_LASTFLAG(S) = SUNLS_PACKAGE_FAIL_UNREC; + return SUN_CUSP_LASTFLAG(S); + } + + SUN_CUSP_FIRSTFACTORIZE(S) = SUNFALSE; + } + + SUN_CUSP_LASTFLAG(S) = SUNLS_SUCCESS; + return(SUN_CUSP_LASTFLAG(S)); +} + +int SUNLinSolSolve_cuSolverSp_batchQR(SUNLinearSolver S, SUNMatrix A, + N_Vector x, N_Vector b, realtype tol) +{ + cusolverStatus_t status; + int blockrows, blockcols, blocknnz, nblock; + int *d_rowptr, *d_colind; + realtype *d_data; + cusparseMatDescr_t mat_descr; + + if ((S == NULL) || (A == NULL) || (x == NULL) || (b == NULL)) + return SUNLS_MEM_NULL; + + SUN_CUSP_LASTFLAG(S) = SUNLS_SUCCESS; + + realtype* device_b = N_VGetDeviceArrayPointer(b); + realtype* device_x = N_VGetDeviceArrayPointer(x); + + if (SUN_CUSP_LASTFLAG(S) != SUNLS_SUCCESS) + return SUN_CUSP_LASTFLAG(S); + + /* solve the system */ + nblock = SUNMatrix_cuSparse_NumBlocks(A); + blocknnz = SUNMatrix_cuSparse_BlockNNZ(A); + blockrows = SUNMatrix_cuSparse_BlockRows(A); + blockcols = SUNMatrix_cuSparse_BlockColumns(A); + d_data = SUNMatrix_cuSparse_Data(A); + d_rowptr = SUNMatrix_cuSparse_IndexPointers(A); + d_colind = SUNMatrix_cuSparse_IndexValues(A); + mat_descr = SUNMatrix_cuSparse_MatDescr(A); + + status = _cusolverSpXcsrqrsvBatched(SUN_CUSOL_HANDLE(S), + blockrows, + blockcols, + blocknnz, + mat_descr, + d_data, + d_rowptr, + d_colind, + device_b, + device_x, + nblock, + SUN_CUSP_QRINFO(S), + SUN_CUSP_QRWORKSPACE(S)); + + if (!SUNDIALS_CUSOLVER_VERIFY(status)) + { + SUN_CUSP_LASTFLAG(S) = SUNLS_PACKAGE_FAIL_UNREC; + return SUN_CUSP_LASTFLAG(S); + } + + return SUN_CUSP_LASTFLAG(S); +} + +sunindextype SUNLinSolLastFlag_cuSolverSp_batchQR(SUNLinearSolver S) +{ + if (S == NULL) return -1; + return SUN_CUSP_LASTFLAG(S); +} + +int SUNLinSolFree_cuSolverSp_batchQR(SUNLinearSolver S) +{ + /* return with success if already freed */ + if (S == NULL) return SUNLS_SUCCESS; + + /* free stuff in the content structure */ + cusolverSpDestroyCsrqrInfo(SUN_CUSP_QRINFO(S)); + cudaFree(SUN_CUSP_QRWORKSPACE(S)); + + /* free content structure */ + if (S->content) { + free(S->content); + S->content = NULL; + } + + /* free ops structure */ + if (S->ops) { + free(S->ops); + S->ops = NULL; + } + + /* free the actual SUNLinSol */ + free(S); + S = NULL; + + return(SUNLS_SUCCESS); +} diff --git a/src/lib/sunlinsol/dense/fsunlinsol_dense.c b/src/lib/sunlinsol/dense/fsunlinsol_dense.c deleted file mode 100644 index 2330b1d..0000000 --- a/src/lib/sunlinsol/dense/fsunlinsol_dense.c +++ /dev/null @@ -1,96 +0,0 @@ -/* - * ----------------------------------------------------------------- - * Programmer(s): Daniel Reynolds @ SMU - * ----------------------------------------------------------------- - * SUNDIALS Copyright Start - * Copyright (c) 2002-2019, Lawrence Livermore National Security - * and Southern Methodist University. - * All rights reserved. - * - * See the top-level LICENSE and NOTICE files for details. - * - * SPDX-License-Identifier: BSD-3-Clause - * SUNDIALS Copyright End - * ----------------------------------------------------------------- - * This file (companion of fsunlinsol_dense.h) contains the - * implementation needed for the Fortran initialization of dense - * linear solver operations. - * ----------------------------------------------------------------- - */ - -#include -#include - -#include "fsunlinsol_dense.h" - -/* Define global linsol variables */ - -SUNLinearSolver F2C_CVODE_linsol; -SUNLinearSolver F2C_IDA_linsol; -SUNLinearSolver F2C_KINSOL_linsol; -SUNLinearSolver F2C_ARKODE_linsol; -SUNLinearSolver F2C_ARKODE_mass_sol; - -/* Declarations of external global variables */ - -extern SUNMatrix F2C_CVODE_matrix; -extern SUNMatrix F2C_IDA_matrix; -extern SUNMatrix F2C_KINSOL_matrix; -extern SUNMatrix F2C_ARKODE_matrix; -extern SUNMatrix F2C_ARKODE_mass_matrix; - -extern N_Vector F2C_CVODE_vec; -extern N_Vector F2C_IDA_vec; -extern N_Vector F2C_KINSOL_vec; -extern N_Vector F2C_ARKODE_vec; - -/* Fortran callable interfaces */ - -void FSUNDENSELINSOL_INIT(int *code, int *ier) -{ - *ier = 0; - - switch(*code) { - case FCMIX_CVODE: - if (F2C_CVODE_linsol) SUNLinSolFree(F2C_CVODE_linsol); - F2C_CVODE_linsol = NULL; - F2C_CVODE_linsol = SUNLinSol_Dense(F2C_CVODE_vec, - F2C_CVODE_matrix); - if (F2C_CVODE_linsol == NULL) *ier = -1; - break; - case FCMIX_IDA: - if (F2C_IDA_linsol) SUNLinSolFree(F2C_IDA_linsol); - F2C_IDA_linsol = NULL; - F2C_IDA_linsol = SUNLinSol_Dense(F2C_IDA_vec, - F2C_IDA_matrix); - if (F2C_IDA_linsol == NULL) *ier = -1; - break; - case FCMIX_KINSOL: - if (F2C_KINSOL_linsol) SUNLinSolFree(F2C_KINSOL_linsol); - F2C_KINSOL_linsol = NULL; - F2C_KINSOL_linsol = SUNLinSol_Dense(F2C_KINSOL_vec, - F2C_KINSOL_matrix); - if (F2C_KINSOL_linsol == NULL) *ier = -1; - break; - case FCMIX_ARKODE: - if (F2C_ARKODE_linsol) SUNLinSolFree(F2C_ARKODE_linsol); - F2C_ARKODE_linsol = NULL; - F2C_ARKODE_linsol = SUNLinSol_Dense(F2C_ARKODE_vec, - F2C_ARKODE_matrix); - if (F2C_ARKODE_linsol == NULL) *ier = -1; - break; - default: - *ier = -1; - } -} - - -void FSUNMASSDENSELINSOL_INIT(int *ier) -{ - *ier = 0; - if (F2C_ARKODE_mass_sol) SUNLinSolFree(F2C_ARKODE_mass_sol); - F2C_ARKODE_mass_sol = NULL; - F2C_ARKODE_mass_sol = SUNLinSol_Dense(F2C_ARKODE_vec, - F2C_ARKODE_mass_matrix); - if (F2C_ARKODE_mass_sol == NULL) *ier = -1; -} diff --git a/src/lib/sunlinsol/dense/fsunlinsol_dense.h b/src/lib/sunlinsol/dense/fsunlinsol_dense.h deleted file mode 100644 index 872bdba..0000000 --- a/src/lib/sunlinsol/dense/fsunlinsol_dense.h +++ /dev/null @@ -1,62 +0,0 @@ -/* - * ----------------------------------------------------------------- - * Programmer(s): Daniel Reynolds, Ashley Crawford @ SMU - * ----------------------------------------------------------------- - * SUNDIALS Copyright Start - * Copyright (c) 2002-2019, Lawrence Livermore National Security - * and Southern Methodist University. - * All rights reserved. - * - * See the top-level LICENSE and NOTICE files for details. - * - * SPDX-License-Identifier: BSD-3-Clause - * SUNDIALS Copyright End - * ----------------------------------------------------------------- - * This file (companion of fsunlinsol_dense.c) contains the - * definitions needed for the initialization of dense - * linear solver operations in Fortran. - * ----------------------------------------------------------------- - */ - -#ifndef _FSUNLINSOL_DENSE_H -#define _FSUNLINSOL_DENSE_H - -#include -#include - -#ifdef __cplusplus /* wrapper to enable C++ usage */ -extern "C" { -#endif - -#if defined(SUNDIALS_F77_FUNC) -#define FSUNDENSELINSOL_INIT SUNDIALS_F77_FUNC(fsundenselinsolinit, FSUNDENSELINSOLINIT) -#define FSUNMASSDENSELINSOL_INIT SUNDIALS_F77_FUNC(fsunmassdenselinsolinit, FSUNMASSDENSELINSOLINIT) -#else -#define FSUNDENSELINSOL_INIT fsundenselinsolinit_ -#define FSUNMASSDENSELINSOL_INIT fsunmassdenselinsolinit_ -#endif - - -/* Declarations of global variables */ - -extern SUNLinearSolver F2C_CVODE_linsol; -extern SUNLinearSolver F2C_IDA_linsol; -extern SUNLinearSolver F2C_KINSOL_linsol; -extern SUNLinearSolver F2C_ARKODE_linsol; -extern SUNLinearSolver F2C_ARKODE_mass_sol; - -/* - * Prototypes of exported functions - * - * FSUNDENSELINSOL_INIT - initializes dense linear solver for main problem - * FSUNMASSDENSELINSOL_INIT - initializes dense linear solver for mass matrix solve - */ - -void FSUNDENSELINSOL_INIT(int *code, int *ier); -void FSUNMASSDENSELINSOL_INIT(int *ier); - -#ifdef __cplusplus -} -#endif - -#endif diff --git a/src/lib/sunlinsol/dense/sunlinsol_dense.c b/src/lib/sunlinsol/dense/sunlinsol_dense.c index 1316e5a..efd37af 100644 --- a/src/lib/sunlinsol/dense/sunlinsol_dense.c +++ b/src/lib/sunlinsol/dense/sunlinsol_dense.c @@ -2,7 +2,7 @@ * Programmer(s): Daniel Reynolds, Ashley Crawford @ SMU * ----------------------------------------------------------------- * SUNDIALS Copyright Start - * Copyright (c) 2002-2019, Lawrence Livermore National Security + * Copyright (c) 2002-2022, Lawrence Livermore National Security * and Southern Methodist University. * All rights reserved. * @@ -33,16 +33,6 @@ #define PIVOTS(S) ( DENSE_CONTENT(S)->pivots ) #define LASTFLAG(S) ( DENSE_CONTENT(S)->last_flag ) - -/* - * ----------------------------------------------------------------- - * deprecated wrapper functions - * ----------------------------------------------------------------- - */ - -SUNLinearSolver SUNDenseLinearSolver(N_Vector y, SUNMatrix A) -{ return(SUNLinSol_Dense(y, A)); } - /* * ----------------------------------------------------------------- * exported functions @@ -53,7 +43,7 @@ SUNLinearSolver SUNDenseLinearSolver(N_Vector y, SUNMatrix A) * Function to create a new dense linear solver */ -SUNLinearSolver SUNLinSol_Dense(N_Vector y, SUNMatrix A) +SUNLinearSolver SUNLinSol_Dense(N_Vector y, SUNMatrix A, SUNContext sunctx) { SUNLinearSolver S; SUNLinearSolverContent_Dense content; @@ -74,7 +64,7 @@ SUNLinearSolver SUNLinSol_Dense(N_Vector y, SUNMatrix A) /* Create an empty linear solver */ S = NULL; - S = SUNLinSolNewEmpty(); + S = SUNLinSolNewEmpty(sunctx); if (S == NULL) return(NULL); /* Attach operations */ @@ -156,8 +146,8 @@ int SUNLinSolSetup_Dense(SUNLinearSolver S, SUNMatrix A) } /* perform LU factorization of input matrix */ - LASTFLAG(S) = denseGETRF(A_cols, SUNDenseMatrix_Rows(A), - SUNDenseMatrix_Columns(A), pivots); + LASTFLAG(S) = SUNDlsMat_denseGETRF(A_cols, SUNDenseMatrix_Rows(A), + SUNDenseMatrix_Columns(A), pivots); /* store error flag (if nonzero, this row encountered zero-valued pivod) */ if (LASTFLAG(S) > 0) @@ -190,7 +180,7 @@ int SUNLinSolSolve_Dense(SUNLinearSolver S, SUNMatrix A, N_Vector x, } /* solve using LU factors */ - denseGETRS(A_cols, SUNDenseMatrix_Rows(A), pivots, xdata); + SUNDlsMat_denseGETRS(A_cols, SUNDenseMatrix_Rows(A), pivots, xdata); LASTFLAG(S) = SUNLS_SUCCESS; return(SUNLS_SUCCESS); } diff --git a/src/lib/sunlinsol/lapackband/sunlinsol_lapackband.c b/src/lib/sunlinsol/lapackband/sunlinsol_lapackband.c new file mode 100644 index 0000000..134274a --- /dev/null +++ b/src/lib/sunlinsol/lapackband/sunlinsol_lapackband.c @@ -0,0 +1,249 @@ +/* ----------------------------------------------------------------- + * Programmer(s): Daniel Reynolds @ SMU + * Based on codes _lapack.c by: Radu Serban @ LLNL + * ----------------------------------------------------------------- + * SUNDIALS Copyright Start + * Copyright (c) 2002-2022, Lawrence Livermore National Security + * and Southern Methodist University. + * All rights reserved. + * + * See the top-level LICENSE and NOTICE files for details. + * + * SPDX-License-Identifier: BSD-3-Clause + * SUNDIALS Copyright End + * ----------------------------------------------------------------- + * This is the implementation file for the LAPACK band + * implementation of the SUNLINSOL package. + * -----------------------------------------------------------------*/ + +#include +#include + +#include +#include + +#include "sundials_lapack_defs.h" + +/* Interfaces to match 'realtype' with the correct LAPACK functions */ +#if defined(SUNDIALS_DOUBLE_PRECISION) +#define xgbtrf_f77 dgbtrf_f77 +#define xgbtrs_f77 dgbtrs_f77 +#elif defined(SUNDIALS_SINGLE_PRECISION) +#define xgbtrf_f77 sgbtrf_f77 +#define xgbtrs_f77 sgbtrs_f77 +#else +#error Incompatible realtype for LAPACK; disable LAPACK and rebuild +#endif + +#define ZERO RCONST(0.0) +#define ONE RCONST(1.0) + +/* + * ----------------------------------------------------------------- + * Band solver structure accessibility macros: + * ----------------------------------------------------------------- + */ + +#define LAPACKBAND_CONTENT(S) ( (SUNLinearSolverContent_LapackBand)(S->content) ) +#define PIVOTS(S) ( LAPACKBAND_CONTENT(S)->pivots ) +#define LASTFLAG(S) ( LAPACKBAND_CONTENT(S)->last_flag ) + +/* + * ----------------------------------------------------------------- + * exported functions + * ----------------------------------------------------------------- + */ + +/* ---------------------------------------------------------------------------- + * Function to create a new LAPACK band linear solver + */ + +SUNLinearSolver SUNLinSol_LapackBand(N_Vector y, SUNMatrix A, SUNContext sunctx) +{ + SUNLinearSolver S; + SUNLinearSolverContent_LapackBand content; + sunindextype MatrixRows; + + /* Check compatibility with supplied SUNMatrix and N_Vector */ + if (SUNMatGetID(A) != SUNMATRIX_BAND) return(NULL); + + if (SUNBandMatrix_Rows(A) != SUNBandMatrix_Columns(A)) return(NULL); + + if ( (N_VGetVectorID(y) != SUNDIALS_NVEC_SERIAL) && + (N_VGetVectorID(y) != SUNDIALS_NVEC_OPENMP) && + (N_VGetVectorID(y) != SUNDIALS_NVEC_PTHREADS) ) + return(NULL); + + MatrixRows = SUNBandMatrix_Rows(A); + if (MatrixRows != N_VGetLength(y)) return(NULL); + + /* Create an empty linear solver */ + S = NULL; + S = SUNLinSolNewEmpty(sunctx); + if (S == NULL) return(NULL); + + /* Attach operations */ + S->ops->gettype = SUNLinSolGetType_LapackBand; + S->ops->getid = SUNLinSolGetID_LapackBand; + S->ops->initialize = SUNLinSolInitialize_LapackBand; + S->ops->setup = SUNLinSolSetup_LapackBand; + S->ops->solve = SUNLinSolSolve_LapackBand; + S->ops->lastflag = SUNLinSolLastFlag_LapackBand; + S->ops->space = SUNLinSolSpace_LapackBand; + S->ops->free = SUNLinSolFree_LapackBand; + + /* Create content */ + content = NULL; + content = (SUNLinearSolverContent_LapackBand) malloc(sizeof *content); + if (content == NULL) { SUNLinSolFree(S); return(NULL); } + + /* Attach content */ + S->content = content; + + /* Fill content */ + content->N = MatrixRows; + content->last_flag = 0; + content->pivots = NULL; + + /* Allocate content */ + content->pivots = (sunindextype *) malloc(MatrixRows * sizeof(sunindextype)); + if (content->pivots == NULL) { SUNLinSolFree(S); return(NULL); } + + return(S); +} + + +/* + * ----------------------------------------------------------------- + * implementation of linear solver operations + * ----------------------------------------------------------------- + */ + +SUNLinearSolver_Type SUNLinSolGetType_LapackBand(SUNLinearSolver S) +{ + return(SUNLINEARSOLVER_DIRECT); +} + + +SUNLinearSolver_ID SUNLinSolGetID_LapackBand(SUNLinearSolver S) +{ + return(SUNLINEARSOLVER_LAPACKBAND); +} + + +int SUNLinSolInitialize_LapackBand(SUNLinearSolver S) +{ + /* all solver-specific memory has already been allocated */ + LASTFLAG(S) = SUNLS_SUCCESS; + return(SUNLS_SUCCESS); +} + + +int SUNLinSolSetup_LapackBand(SUNLinearSolver S, SUNMatrix A) +{ + sunindextype n, ml, mu, ldim, ier; + + /* check for valid inputs */ + if ( (A == NULL) || (S == NULL) ) + return(SUNLS_MEM_NULL); + + /* Ensure that A is a band matrix */ + if (SUNMatGetID(A) != SUNMATRIX_BAND) { + LASTFLAG(S) = SUNLS_ILL_INPUT; + return(SUNLS_ILL_INPUT); + } + + /* Call LAPACK to do LU factorization of A */ + ier = 0; + n = SUNBandMatrix_Rows(A); + ml = SUNBandMatrix_LowerBandwidth(A); + mu = SUNBandMatrix_UpperBandwidth(A); + ldim = SUNBandMatrix_LDim(A); + xgbtrf_f77(&n, &n, &ml, &mu, SUNBandMatrix_Data(A), + &ldim, PIVOTS(S), &ier); + + LASTFLAG(S) = ier; + if (ier > 0) + return(SUNLS_LUFACT_FAIL); + if (ier < 0) + return(SUNLS_PACKAGE_FAIL_UNREC); + return(SUNLS_SUCCESS); +} + + +int SUNLinSolSolve_LapackBand(SUNLinearSolver S, SUNMatrix A, N_Vector x, + N_Vector b, realtype tol) +{ + sunindextype n, ml, mu, ldim, one, ier; + realtype *xdata; + + /* check for valid inputs */ + if ( (A == NULL) || (S == NULL) || (x == NULL) || (b == NULL) ) + return(SUNLS_MEM_NULL); + + /* copy b into x */ + N_VScale(ONE, b, x); + + /* access x data array */ + xdata = N_VGetArrayPointer(x); + if (xdata == NULL) { + LASTFLAG(S) = SUNLS_MEM_FAIL; + return(SUNLS_MEM_FAIL); + } + + /* Call LAPACK to solve the linear system */ + ier = 0; + n = SUNBandMatrix_Rows(A); + ml = SUNBandMatrix_LowerBandwidth(A); + mu = SUNBandMatrix_UpperBandwidth(A); + ldim = SUNBandMatrix_LDim(A); + one = 1; + xgbtrs_f77("N", &n, &ml, &mu, &one, SUNBandMatrix_Data(A), + &ldim, PIVOTS(S), xdata, &n, &ier); + LASTFLAG(S) = ier; + if (ier < 0) + return(SUNLS_PACKAGE_FAIL_UNREC); + + LASTFLAG(S) = SUNLS_SUCCESS; + return(SUNLS_SUCCESS); +} + + +sunindextype SUNLinSolLastFlag_LapackBand(SUNLinearSolver S) +{ + /* return the stored 'last_flag' value */ + if (S == NULL) return(-1); + return(LASTFLAG(S)); +} + + +int SUNLinSolSpace_LapackBand(SUNLinearSolver S, + long int *lenrwLS, + long int *leniwLS) +{ + *lenrwLS = 0; + *leniwLS = 2 + LAPACKBAND_CONTENT(S)->N; + return(SUNLS_SUCCESS); +} + +int SUNLinSolFree_LapackBand(SUNLinearSolver S) +{ + /* return with success if already freed */ + if (S == NULL) return(SUNLS_SUCCESS); + + /* delete items from contents, then delete generic structure */ + if (S->content) { + if (PIVOTS(S)) { + free(PIVOTS(S)); + PIVOTS(S) = NULL; + } + free(S->content); + S->content = NULL; + } + if (S->ops) { + free(S->ops); + S->ops = NULL; + } + free(S); S = NULL; + return(SUNLS_SUCCESS); +} diff --git a/src/lib/sunlinsol/lapackdense/sunlinsol_lapackdense.c b/src/lib/sunlinsol/lapackdense/sunlinsol_lapackdense.c new file mode 100644 index 0000000..7aa1a66 --- /dev/null +++ b/src/lib/sunlinsol/lapackdense/sunlinsol_lapackdense.c @@ -0,0 +1,240 @@ +/* ----------------------------------------------------------------- + * Programmer(s): Daniel Reynolds @ SMU + * Based on codes _lapack.c by: Radu Serban @ LLNL + * ----------------------------------------------------------------- + * SUNDIALS Copyright Start + * Copyright (c) 2002-2022, Lawrence Livermore National Security + * and Southern Methodist University. + * All rights reserved. + * + * See the top-level LICENSE and NOTICE files for details. + * + * SPDX-License-Identifier: BSD-3-Clause + * SUNDIALS Copyright End + * ----------------------------------------------------------------- + * This is the implementation file for the LAPACK dense + * implementation of the SUNLINSOL package. + * -----------------------------------------------------------------*/ + +#include +#include + +#include +#include + +#include "sundials_lapack_defs.h" + +/* Interfaces to match 'realtype' with the correct LAPACK functions */ +#if defined(SUNDIALS_DOUBLE_PRECISION) +#define xgetrf_f77 dgetrf_f77 +#define xgetrs_f77 dgetrs_f77 +#elif defined(SUNDIALS_SINGLE_PRECISION) +#define xgetrf_f77 sgetrf_f77 +#define xgetrs_f77 sgetrs_f77 +#else +#error Incompatible realtype for LAPACK; disable LAPACK and rebuild +#endif + +#define ZERO RCONST(0.0) +#define ONE RCONST(1.0) + +/* + * ----------------------------------------------------------------- + * LapackDense solver structure accessibility macros: + * ----------------------------------------------------------------- + */ + +#define LAPACKDENSE_CONTENT(S) ( (SUNLinearSolverContent_LapackDense)(S->content) ) +#define PIVOTS(S) ( LAPACKDENSE_CONTENT(S)->pivots ) +#define LASTFLAG(S) ( LAPACKDENSE_CONTENT(S)->last_flag ) + +/* + * ----------------------------------------------------------------- + * exported functions + * ----------------------------------------------------------------- + */ + +/* ---------------------------------------------------------------------------- + * Function to create a new LAPACK dense linear solver + */ + +SUNLinearSolver SUNLinSol_LapackDense(N_Vector y, SUNMatrix A, SUNContext sunctx) +{ + SUNLinearSolver S; + SUNLinearSolverContent_LapackDense content; + sunindextype MatrixRows; + + /* Check compatibility with supplied SUNMatrix and N_Vector */ + if (SUNMatGetID(A) != SUNMATRIX_DENSE) return(NULL); + + if (SUNDenseMatrix_Rows(A) != SUNDenseMatrix_Columns(A)) return(NULL); + + if ( (N_VGetVectorID(y) != SUNDIALS_NVEC_SERIAL) && + (N_VGetVectorID(y) != SUNDIALS_NVEC_OPENMP) && + (N_VGetVectorID(y) != SUNDIALS_NVEC_PTHREADS) ) + return(NULL); + + MatrixRows = SUNDenseMatrix_Rows(A); + if (MatrixRows != N_VGetLength(y)) return(NULL); + + /* Create linear solver */ + S = NULL; + S = SUNLinSolNewEmpty(sunctx); + if (S == NULL) return(NULL); + + /* Attach operations */ + S->ops->gettype = SUNLinSolGetType_LapackDense; + S->ops->getid = SUNLinSolGetID_LapackDense; + S->ops->initialize = SUNLinSolInitialize_LapackDense; + S->ops->setup = SUNLinSolSetup_LapackDense; + S->ops->solve = SUNLinSolSolve_LapackDense; + S->ops->lastflag = SUNLinSolLastFlag_LapackDense; + S->ops->space = SUNLinSolSpace_LapackDense; + S->ops->free = SUNLinSolFree_LapackDense; + + /* Create content */ + content = NULL; + content = (SUNLinearSolverContent_LapackDense) malloc(sizeof *content); + if (content == NULL) { SUNLinSolFree(S); return(NULL); } + + /* Attach content */ + S->content = content; + + /* Fill content */ + content->N = MatrixRows; + content->last_flag = 0; + content->pivots = NULL; + + /* Allocate content */ + content->pivots = (sunindextype *) malloc(MatrixRows * sizeof(sunindextype)); + if (content->pivots == NULL) { SUNLinSolFree(S); return(NULL); } + + return(S); +} + + +/* + * ----------------------------------------------------------------- + * implementation of linear solver operations + * ----------------------------------------------------------------- + */ + +SUNLinearSolver_Type SUNLinSolGetType_LapackDense(SUNLinearSolver S) +{ + return(SUNLINEARSOLVER_DIRECT); +} + + +SUNLinearSolver_ID SUNLinSolGetID_LapackDense(SUNLinearSolver S) +{ + return(SUNLINEARSOLVER_LAPACKDENSE); +} + + +int SUNLinSolInitialize_LapackDense(SUNLinearSolver S) +{ + /* all solver-specific memory has already been allocated */ + LASTFLAG(S) = SUNLS_SUCCESS; + return(SUNLS_SUCCESS); +} + + +int SUNLinSolSetup_LapackDense(SUNLinearSolver S, SUNMatrix A) +{ + sunindextype n, ier; + + /* check for valid inputs */ + if ( (A == NULL) || (S == NULL) ) + return(SUNLS_MEM_NULL); + + /* Ensure that A is a dense matrix */ + if (SUNMatGetID(A) != SUNMATRIX_DENSE) { + LASTFLAG(S) = SUNLS_ILL_INPUT; + return(SUNLS_ILL_INPUT); + } + + /* Call LAPACK to do LU factorization of A */ + n = SUNDenseMatrix_Rows(A); + ier = 0; + xgetrf_f77(&n, &n, SUNDenseMatrix_Data(A), &n, PIVOTS(S), &ier); + LASTFLAG(S) = ier; + if (ier > 0) + return(SUNLS_LUFACT_FAIL); + if (ier < 0) + return(SUNLS_PACKAGE_FAIL_UNREC); + return(SUNLS_SUCCESS); +} + + +int SUNLinSolSolve_LapackDense(SUNLinearSolver S, SUNMatrix A, N_Vector x, + N_Vector b, realtype tol) +{ + sunindextype n, one, ier; + realtype *xdata; + + if ( (A == NULL) || (S == NULL) || (x == NULL) || (b == NULL) ) + return(SUNLS_MEM_NULL); + + /* copy b into x */ + N_VScale(ONE, b, x); + + /* access x data array */ + xdata = N_VGetArrayPointer(x); + if (xdata == NULL) { + LASTFLAG(S) = SUNLS_MEM_FAIL; + return(SUNLS_MEM_FAIL); + } + + /* Call LAPACK to solve the linear system */ + n = SUNDenseMatrix_Rows(A); + one = 1; + ier = 0; + xgetrs_f77("N", &n, &one, SUNDenseMatrix_Data(A), + &n, PIVOTS(S), xdata, &n, &ier); + LASTFLAG(S) = ier; + if (ier < 0) + return(SUNLS_PACKAGE_FAIL_UNREC); + + LASTFLAG(S) = SUNLS_SUCCESS; + return(SUNLS_SUCCESS); +} + + +sunindextype SUNLinSolLastFlag_LapackDense(SUNLinearSolver S) +{ + /* return the stored 'last_flag' value */ + if (S == NULL) return(-1); + return(LASTFLAG(S)); +} + + +int SUNLinSolSpace_LapackDense(SUNLinearSolver S, + long int *lenrwLS, + long int *leniwLS) +{ + *lenrwLS = 0; + *leniwLS = 2 + LAPACKDENSE_CONTENT(S)->N; + return(SUNLS_SUCCESS); +} + +int SUNLinSolFree_LapackDense(SUNLinearSolver S) +{ + /* return if S is already free */ + if (S == NULL) return(SUNLS_SUCCESS); + + /* delete items from contents, then delete generic structure */ + if (S->content) { + if (PIVOTS(S)) { + free(PIVOTS(S)); + PIVOTS(S) = NULL; + } + free(S->content); + S->content = NULL; + } + if (S->ops) { + free(S->ops); + S->ops = NULL; + } + free(S); S = NULL; + return(SUNLS_SUCCESS); +} diff --git a/src/lib/sunlinsol/magmadense/sunlinsol_magmadense.cpp b/src/lib/sunlinsol/magmadense/sunlinsol_magmadense.cpp new file mode 100644 index 0000000..85830c5 --- /dev/null +++ b/src/lib/sunlinsol/magmadense/sunlinsol_magmadense.cpp @@ -0,0 +1,424 @@ +/* ----------------------------------------------------------------- + * Programmer(s): Cody J. Balos @ LLNL + * ----------------------------------------------------------------- + * SUNDIALS Copyright Start + * Copyright (c) 2002-2022, Lawrence Livermore National Security + * and Southern Methodist University. + * All rights reserved. + * + * See the top-level LICENSE and NOTICE files for details. + * + * SPDX-License-Identifier: BSD-3-Clause + * SUNDIALS Copyright End + * ----------------------------------------------------------------*/ + +#include +#include + +#include +#include +#include + +/* Interfaces to match 'realtype' with the correct MAGMA functions */ +#if defined(SUNDIALS_DOUBLE_PRECISION) +#define xgetrf magma_dgetrf_gpu +#define xgetrf_batched magma_dgetrf_batched +#define xgetrs magma_dgetrs_gpu +#define xgetrs_batched magma_dgetrs_batched +#define xset_pointer magma_dset_pointer +#elif defined(SUNDIALS_SINGLE_PRECISION) +#define xgetrf magma_sgetrf_gpu +#define xgetrf_batched magma_sgetrf_batched +#define xgetrs magma_sgetrs_gpu +#define xgetrs_batched magma_sgetrs_batched +#define xset_pointer magma_sset_pointer +#else +#error Incompatible realtype for MAGMA +#endif + +#define ZERO RCONST(0.0) +#define ONE RCONST(1.0) + +/* + * ----------------------------------------------------------------- + * MAGMADENSE solver structure accessibility macros: + * ----------------------------------------------------------------- + */ + +#define MAGMADENSE_CONTENT(S) ( (SUNLinearSolverContent_MagmaDense)(S->content) ) +#define MHELP(S) ( MAGMADENSE_CONTENT(S)->memhelp ) +#define QUEUE(S) ( MAGMADENSE_CONTENT(S)->q ) +#define PIVOTS(S) ( (sunindextype*)MAGMADENSE_CONTENT(S)->pivots->ptr ) +#define PIVOTSARRAY(S) ( (sunindextype**)MAGMADENSE_CONTENT(S)->pivotsarr->ptr ) +#define RHSARRAY(S) ( (realtype**)MAGMADENSE_CONTENT(S)->rhsarr->ptr ) +#define INFOARRAY(S) ( (sunindextype*)MAGMADENSE_CONTENT(S)->infoarr->ptr ) +#define LASTFLAG(S) ( MAGMADENSE_CONTENT(S)->last_flag ) +#define ASYNCHRONOUS(S) ( MAGMADENSE_CONTENT(S)->async) + +/* + * ---------------------------------------------------------------------------- + * Implementation specific routines + * ---------------------------------------------------------------------------- + */ + +/* + * Constructor functions + */ + +SUNLinearSolver SUNLinSol_MagmaDense(N_Vector y, SUNMatrix Amat, SUNContext sunctx) +{ + int retval = 0; + SUNLinearSolver S; + SUNLinearSolverContent_MagmaDense content; + SUNMatrixContent_MagmaDense A; + sunindextype M, nblocks; + + /* Check inputs */ + if (y == NULL || Amat == NULL) + return(NULL); + + if (y->ops == NULL || Amat->ops == NULL) + return(NULL); + + if (y->ops->nvgetlength == NULL || y->ops->nvgetdevicearraypointer == NULL || + Amat->ops->getid == NULL) + return(NULL); + + /* Check compatibility with supplied SUNMatrix */ + if (SUNMatGetID(Amat) != SUNMATRIX_MAGMADENSE) + return(NULL); + + if (Amat->content == NULL) + return(NULL); + + A = (SUNMatrixContent_MagmaDense) Amat->content; + + /* Check that the matrix is square */ + if (A->M != A->N) + return(NULL); + + M = A->M; + nblocks = A->nblocks; + + /* Check that the matirx and vector dimensions agree */ + if (M*nblocks != N_VGetLength(y)) + return(NULL); + + /* Create the linear solver */ + S = NULL; + S = SUNLinSolNewEmpty(sunctx); + if (S == NULL) return(NULL); + + /* Attach operations */ + S->ops->gettype = SUNLinSolGetType_MagmaDense; + S->ops->getid = SUNLinSolGetID_MagmaDense; + S->ops->initialize = SUNLinSolInitialize_MagmaDense; + S->ops->setup = SUNLinSolSetup_MagmaDense; + S->ops->solve = SUNLinSolSolve_MagmaDense; + S->ops->lastflag = SUNLinSolLastFlag_MagmaDense; + S->ops->space = SUNLinSolSpace_MagmaDense; + S->ops->free = SUNLinSolFree_MagmaDense; + + /* Create content */ + content = NULL; + content = (SUNLinearSolverContent_MagmaDense) malloc(sizeof(*content)); + if (content == NULL) { SUNLinSolFree(S); return(NULL); } + + /* Attach content */ + S->content = content; + + /* Fill content */ + content->last_flag = 0; + content->async = SUNTRUE; + content->N = M; + content->pivots = NULL; + content->pivotsarr = NULL; + content->infoarr = NULL; + content->rhsarr = NULL; + content->memhelp = A->memhelp; + content->q = A->q; + + /* Allocate data */ + + /* The pivots need to be in host memory when calling the non-batched methods, + but in device memory for the batched methods. */ + retval = SUNMemoryHelper_Alloc(content->memhelp, &content->pivots, + M * nblocks * sizeof(sunindextype), + nblocks > 1 ? SUNMEMTYPE_DEVICE : SUNMEMTYPE_HOST, + nullptr); + if (retval) { SUNLinSolFree(S); return(NULL); } + + /* If we have multiple blocks, then we need to allocate some extra + pointer arrays needed when calling MAGMA batched methods. */ + if (nblocks > 1) + { + retval = SUNMemoryHelper_Alloc(content->memhelp, &content->pivotsarr, + nblocks * sizeof(sunindextype*), + SUNMEMTYPE_DEVICE, nullptr); + if (retval) { SUNLinSolFree(S); return(NULL); } + + /* Set the pivots array on the device */ + magma_iset_pointer((sunindextype**)content->pivotsarr->ptr, /* 2D output array */ + (sunindextype*)content->pivots->ptr, /* 1D input array */ + 1, /* leading dimension of input */ + 0, /* row */ + 0, /* column */ + M, /* rows in a block */ + nblocks, /* number of blocks */ + content->q); + + /* We use pinned memory for the info array because we are going to + check its values on the host and we need it to have fast transfers. */ + retval = SUNMemoryHelper_Alloc(content->memhelp, &content->infoarr, + nblocks * sizeof(sunindextype), + SUNMEMTYPE_PINNED, nullptr); + if (retval) { SUNLinSolFree(S); return(NULL); } + + retval = SUNMemoryHelper_Alloc(content->memhelp, &content->rhsarr, + nblocks * sizeof(realtype*), + SUNMEMTYPE_DEVICE, nullptr); + if (retval) { SUNLinSolFree(S); return(NULL); } + } + + return(S); +} + +/* + * Set functions + */ + +int SUNLinSol_MagmaDense_SetAsync(SUNLinearSolver S, booleantype onoff) +{ + if (S == NULL) return SUNLS_MEM_NULL; + ASYNCHRONOUS(S) = onoff; + return SUNLS_SUCCESS; +} + +/* + * ----------------------------------------------------------------- + * Implementation of generic SUNLinearSolver operations. + * ----------------------------------------------------------------- + */ + +SUNLinearSolver_Type SUNLinSolGetType_MagmaDense(SUNLinearSolver S) +{ + return(SUNLINEARSOLVER_DIRECT); +} + +SUNLinearSolver_ID SUNLinSolGetID_MagmaDense(SUNLinearSolver S) +{ + return(SUNLINEARSOLVER_MAGMADENSE); +} + +int SUNLinSolInitialize_MagmaDense(SUNLinearSolver S) +{ + /* All solver-specific memory has already been allocated */ + if (S == NULL) return SUNLS_MEM_NULL; + LASTFLAG(S) = SUNLS_SUCCESS; + return(SUNLS_SUCCESS); +} + +int SUNLinSolSetup_MagmaDense(SUNLinearSolver S, SUNMatrix A) +{ + /* Check for valid inputs */ + if (S == NULL) return SUNLS_MEM_NULL; + + if (A == NULL) + { + LASTFLAG(S) = SUNLS_MEM_NULL; + return(SUNLS_MEM_NULL); + } + + /* Ensure that A is a magma dense matrix */ + if (SUNMatGetID(A) != SUNMATRIX_MAGMADENSE) + { + LASTFLAG(S) = SUNLS_ILL_INPUT; + return(SUNLS_ILL_INPUT); + } + + sunindextype ier = 0; + sunindextype M = SUNMatrix_MagmaDense_BlockRows(A); + sunindextype nblocks = SUNMatrix_MagmaDense_NumBlocks(A); + + /* Call MAGMA to do LU factorization of A */ + if (nblocks > 1) + { +#ifndef SUNDIALS_MAGMA_USE_GETRF_LOOP + xgetrf_batched(M, /* number of rows per block */ + M, /* number of columns per block */ + SUNMatrix_MagmaDense_BlockData(A), + M, /* leading dimension of each block */ + PIVOTSARRAY(S), + INFOARRAY(S), + nblocks, + QUEUE(S)); +#else + realtype** blocks = SUNMatrix_MagmaDense_BlockData(A); + for (int k = 0; k < nblocks; k++) + { + xgetrf(M, /* number of rows */ + M, /* number of columns */ + blocks[k], + M, /* leading dimension of A */ + PIVOTSARRAY(S)[k], + &INFOARRAY(S)[k]); + } +#endif + + if (!ASYNCHRONOUS(S)) + { + magma_queue_sync(QUEUE(S)); + /* Check if there were any failures when factoring */ + for (sunindextype k = 0; k < nblocks; k++) + { + if (INFOARRAY(S)[k] < 0) ier = INFOARRAY(S)[k]; + if (INFOARRAY(S)[k] > 0) + { + ier = INFOARRAY(S)[k]; + break; + } + } + } + } + else + { + xgetrf(M, /* number of rows */ + M, /* number of columns */ + SUNMatrix_MagmaDense_Data(A), + M, /* leading dimension of A */ + PIVOTS(S), + &ier); + if (!ASYNCHRONOUS(S)) magma_queue_sync(QUEUE(S)); + } + + LASTFLAG(S) = ier; + if (ier > 0) return(SUNLS_LUFACT_FAIL); + if (ier < 0) return(SUNLS_PACKAGE_FAIL_UNREC); + return(SUNLS_SUCCESS); +} + +int SUNLinSolSolve_MagmaDense(SUNLinearSolver S, SUNMatrix A, N_Vector x, + N_Vector b, realtype tol) +{ + /* Check for valid inputs */ + if (S == NULL) return(SUNLS_MEM_NULL); + + if ( (A == NULL) || (x == NULL) || (b == NULL) ) + { + LASTFLAG(S) = SUNLS_MEM_NULL; + return(SUNLS_MEM_NULL); + } + + /* Ensure that A is a magma dense matrix */ + if (SUNMatGetID(A) != SUNMATRIX_MAGMADENSE) + { + LASTFLAG(S) = SUNLS_ILL_INPUT; + return(SUNLS_ILL_INPUT); + } + + int ier = 0; + sunindextype M = SUNMatrix_MagmaDense_BlockRows(A); + sunindextype nblocks = SUNMatrix_MagmaDense_NumBlocks(A); + + /* Copy b into x */ + N_VScale(ONE, b, x); + + /* Access x data array */ + realtype* xdata = N_VGetDeviceArrayPointer(x); + if (xdata == NULL) + { + LASTFLAG(S) = SUNLS_MEM_FAIL; + return(SUNLS_MEM_FAIL); + } + + /* Call MAGMA to solve the linear system */ + if (nblocks > 1) + { + /* First, set pointers to RHS blocks */ + xset_pointer(RHSARRAY(S), /* 2D output array */ + xdata, /* 1D input array */ + 1, /* leading dimension of input */ + 0, /* rows */ + 0, /* cols */ + M, /* number of rows in block */ + nblocks, + QUEUE(S)); + + /* Now, solve the batch system */ + xgetrs_batched(MagmaNoTrans, + M, /* order of the matrix */ + 1, /* number of right hand sides */ + SUNMatrix_MagmaDense_BlockData(A), + M, /* leading dimension of A */ + PIVOTSARRAY(S), + RHSARRAY(S), /* right hand side (input), solution (output) */ + M, /* leading dimension of b */ + nblocks, + QUEUE(S)); + } + else + { + xgetrs(MagmaNoTrans, + M, /* order of the matrix */ + 1, /* number of right hand sides */ + SUNMatrix_MagmaDense_Data(A), + M, /* leading dimension of A */ + PIVOTS(S), + xdata, /* right hand side (input), solution (output) */ + M, /* leading dimension of x */ + &ier); + } + if(!ASYNCHRONOUS(S)) magma_queue_sync(QUEUE(S)); + + LASTFLAG(S) = ier; + return((ier < 0) ? SUNLS_PACKAGE_FAIL_UNREC : SUNLS_SUCCESS); +} + +sunindextype SUNLinSolLastFlag_MagmaDense(SUNLinearSolver S) +{ + /* return the stored 'last_flag' value */ + if (S == NULL) return(-1); + return(LASTFLAG(S)); +} + +int SUNLinSolSpace_MagmaDense(SUNLinearSolver S, + long int *lenrwLS, + long int *leniwLS) +{ + *lenrwLS = 0; + *leniwLS = 2 + MAGMADENSE_CONTENT(S)->N; + return(SUNLS_SUCCESS); +} + +int SUNLinSolFree_MagmaDense(SUNLinearSolver S) +{ + /* return if S is already free */ + if (S == NULL) return(SUNLS_SUCCESS); + + /* delete items from contents, then delete generic structure */ + if (S->content) + { + if (MAGMADENSE_CONTENT(S)->pivots) + SUNMemoryHelper_Dealloc(MHELP(S), MAGMADENSE_CONTENT(S)->pivots, + nullptr); + if (MAGMADENSE_CONTENT(S)->pivotsarr) + SUNMemoryHelper_Dealloc(MHELP(S), MAGMADENSE_CONTENT(S)->pivotsarr, + nullptr); + if (MAGMADENSE_CONTENT(S)->infoarr) + SUNMemoryHelper_Dealloc(MHELP(S), MAGMADENSE_CONTENT(S)->infoarr, + nullptr); + if (MAGMADENSE_CONTENT(S)->rhsarr) + SUNMemoryHelper_Dealloc(MHELP(S), MAGMADENSE_CONTENT(S)->rhsarr, + nullptr); + free(S->content); + S->content = NULL; + } + if (S->ops) + { + free(S->ops); + S->ops = NULL; + } + free(S); + S = NULL; + return(SUNLS_SUCCESS); +} diff --git a/src/lib/sunlinsol/onemkldense/sunlinsol_onemkldense.cpp b/src/lib/sunlinsol/onemkldense/sunlinsol_onemkldense.cpp new file mode 100644 index 0000000..5ecef33 --- /dev/null +++ b/src/lib/sunlinsol/onemkldense/sunlinsol_onemkldense.cpp @@ -0,0 +1,617 @@ +/* --------------------------------------------------------------------------- + * Programmer(s): David J. Gardner @ LLNL + * --------------------------------------------------------------------------- + * SUNDIALS Copyright Start + * Copyright (c) 2002-2022, Lawrence Livermore National Security + * and Southern Methodist University. + * All rights reserved. + * + * See the top-level LICENSE and NOTICE files for details. + * + * SPDX-License-Identifier: BSD-3-Clause + * SUNDIALS Copyright End + * --------------------------------------------------------------------------- + * This is the implementation file for the dense implementation of the + * SUNLINEARSOLVER class using the Intel oneAPI Math Kernel Library (oneMKL). + * ---------------------------------------------------------------------------*/ + +#include +#include + +#include +using namespace oneapi::mkl::lapack; + +// SUNDIALS public headers +#include +#include + +// SUNDIALS private headers +#include "sundials_debug.h" + +// Check for a valid precision and index size +#if defined(SUNDIALS_EXTENDED_PRECISION) +#error "oneMLK unsupported precision" +#endif + +#if defined(SUNDIALS_INT32_T) +#error "oneMLK unsupported index size" +#endif + +// Accessor macros + +// Content and last error flag +#define LS_CONTENT(S) ((SUNLinearSolverContent_OneMklDense)(S->content)) +#define LS_LASTFLAG(S) (LS_CONTENT(S)->last_flag ) + +// Pivots array length and memory +#define LS_ROWS(S) (LS_CONTENT(S)->rows) +#define LS_PIVOTS(S) (LS_CONTENT(S)->pivots) +#define LS_PIVOTSp(S) ((sunindextype*) LS_CONTENT(S)->pivots->ptr) + +// Getrf scratch space size and memory +#define LS_F_SCRATCH_SIZE(S) (LS_CONTENT(S)->f_scratch_size) +#define LS_F_SCRATCH(S) (LS_CONTENT(S)->f_scratchpad) +#define LS_F_SCRATCHp(S) ((realtype*) LS_CONTENT(S)->f_scratchpad->ptr) + +// Getrs scratch space size and memory +#define LS_S_SCRATCH_SIZE(S) (LS_CONTENT(S)->s_scratch_size) +#define LS_S_SCRATCH(S) (LS_CONTENT(S)->s_scratchpad) +#define LS_S_SCRATCHp(S) ((realtype*) LS_CONTENT(S)->s_scratchpad->ptr) + +// Memory type, helper, and SYCL queue +#define LS_MEM_TYPE(S) (LS_CONTENT(S)->mem_type) +#define LS_MEM_HELPER(S) (LS_CONTENT(S)->mem_helper) +#define LS_QUEUE(S) (LS_CONTENT(S)->queue) + + +/* -------------------------------------------------------------------------- + * Constructors + * -------------------------------------------------------------------------- */ + + +SUNLinearSolver SUNLinSol_OneMklDense(N_Vector y, SUNMatrix Amat, SUNContext sunctx) +{ + int retval = 0; + + // Check inputs + if (!y || !Amat) + { + SUNDIALS_DEBUG_ERROR("Illegal input, y or A is NULL\n"); + return NULL; + } + + if (!(y->ops) || !(Amat->ops)) + { + SUNDIALS_DEBUG_ERROR("Illegal input, y->ops or A->ops is NULL\n"); + return NULL; + } + + if ( !(y->ops->nvgetlength) || !(y->ops->nvgetdevicearraypointer) || + !(Amat->ops->getid) ) + { + SUNDIALS_DEBUG_ERROR("Illegal input, y or A missing required operations\n"); + return NULL; + } + + // Check compatibility with supplied SUNMatrix + if (SUNMatGetID(Amat) != SUNMATRIX_ONEMKLDENSE) + { + SUNDIALS_DEBUG_ERROR("Illegal input, SUNMatID != SUNMATRIX_ONEMKLDENSE\n"); + return NULL; + } + + if (!(Amat->content)) + { + SUNDIALS_DEBUG_ERROR("Illegal input, SUNMatID != SUNMATRIX_ONEMKLDENSE\n"); + return NULL; + } + + SUNMatrixContent_OneMklDense A = (SUNMatrixContent_OneMklDense) Amat->content; + + // Check that the matrix is square + if (A->rows != A->cols) + { + SUNDIALS_DEBUG_ERROR("Illegal input, A is not square\n"); + return NULL; + } + + // Check that the matrix and vector dimensions agree + if (A->cols != N_VGetLength(y)) + { + SUNDIALS_DEBUG_ERROR("Illegal input, number of columns in A != length of y\n"); + return NULL; + } + + // Create the linear solver + SUNLinearSolver S = SUNLinSolNewEmpty(sunctx); + if (!S) + { + SUNDIALS_DEBUG_ERROR("SUNLinSolNewEmpty returned NULL\n"); + return NULL; + } + + // Attach operations + S->ops->gettype = SUNLinSolGetType_OneMklDense; + S->ops->getid = SUNLinSolGetID_OneMklDense; + S->ops->initialize = SUNLinSolInitialize_OneMklDense; + S->ops->setup = SUNLinSolSetup_OneMklDense; + S->ops->solve = SUNLinSolSolve_OneMklDense; + S->ops->lastflag = SUNLinSolLastFlag_OneMklDense; + S->ops->space = SUNLinSolSpace_OneMklDense; + S->ops->free = SUNLinSolFree_OneMklDense; + + // Create content + S->content = (SUNLinearSolverContent_OneMklDense) malloc(sizeof(_SUNLinearSolverContent_OneMklDense)); + if (!(S->content)) + { + SUNDIALS_DEBUG_ERROR("Content allocation failed\n"); + SUNLinSolFree(S); + return NULL; + } + + // Fill content + LS_CONTENT(S)->last_flag = 0; + LS_CONTENT(S)->rows = A->rows; + LS_CONTENT(S)->pivots = NULL; + LS_CONTENT(S)->f_scratch_size = 0; + LS_CONTENT(S)->f_scratchpad = NULL; + LS_CONTENT(S)->s_scratch_size = 0; + LS_CONTENT(S)->s_scratchpad = NULL; + LS_CONTENT(S)->mem_type = A->mem_type; + LS_CONTENT(S)->mem_helper = A->mem_helper; + LS_CONTENT(S)->queue = A->queue; + + // Allocate data + retval = SUNMemoryHelper_Alloc(LS_MEM_HELPER(S), &(LS_PIVOTS(S)), + A->rows * sizeof(sunindextype), + LS_MEM_TYPE(S), A->queue); + if (retval) + { + SUNDIALS_DEBUG_ERROR("Pivots allocation failed\n"); + SUNLinSolFree(S); + return NULL; + } + + // Compute scratchpad size for factorization and solve + ::sycl::queue* queue = A->queue; + sunindextype M = SUNMatrix_OneMklDense_BlockRows(Amat); + sunindextype N = SUNMatrix_OneMklDense_BlockColumns(Amat); + sunindextype num_blocks = SUNMatrix_OneMklDense_NumBlocks(Amat); + + if (num_blocks > 1) + { + LS_F_SCRATCH_SIZE(S) = + getrf_batch_scratchpad_size(*queue, // device queue + M, // rows in A_i + N, // columns in A_i + M, // leading dimension + M * N, // stride between A_i + M, // stride in P_i + num_blocks); // number of blocks + +#ifdef SUNDIALS_ONEMKL_USE_GETRS_BATCHED + LS_S_SCRATCH_SIZE(S)= + getrs_batch_scratchpad_size(*queue, // device queue + oneapi::mkl::transpose::nontrans, + M, // number of rows in A_i + 1, // number of right-hand sides + M, // leading dimensino of A_i + M * N, // stride between A_i + M, // stride between pivots + M, // leading dimension of B_i + M, // stride between B_i + num_blocks); // number of blocks +#else + LS_S_SCRATCH_SIZE(S) = + getrs_scratchpad_size(*queue, // device queue + oneapi::mkl::transpose::nontrans, + M, // number of rows in A + 1, // number of right-hand sizes + M, // leading dimension of A + M); // leading dimension of B +#endif + } + else + { + LS_F_SCRATCH_SIZE(S) = + getrf_scratchpad_size(*queue, // device queue + M, // rows in A_i + N, // columns in A_i + M); // leading dimension + + LS_S_SCRATCH_SIZE(S) = + getrs_scratchpad_size(*queue, // device queue + oneapi::mkl::transpose::nontrans, + M, // number of rows in A + 1, // number of right-hand sizes + M, // leading dimension of A + M); // leading dimension of B + } + + // Allocate factorization scratchpad if necessary + retval = SUNMemoryHelper_Alloc(LS_MEM_HELPER(S), &(LS_F_SCRATCH(S)), + LS_F_SCRATCH_SIZE(S) * sizeof(realtype), + LS_MEM_TYPE(S), queue); + if (retval) + { + SUNDIALS_DEBUG_ERROR("Scratchpad allocation failed\n"); + SUNLinSolFree(S); + return NULL; + } + + // Allocate solve scratchpad if necessary + retval = SUNMemoryHelper_Alloc(LS_MEM_HELPER(S), &(LS_S_SCRATCH(S)), + LS_S_SCRATCH_SIZE(S) * sizeof(realtype), + LS_MEM_TYPE(S), queue); + if (retval) + { + SUNDIALS_DEBUG_ERROR("Scratchpad allocation failed\n"); + SUNLinSolFree(S); + return NULL; + } + + return S; +} + + +/* -------------------------------------------------------------------------- + * Implementation of SUNLinearSolver operations + * -------------------------------------------------------------------------- */ + + +int SUNLinSolInitialize_OneMklDense(SUNLinearSolver S) +{ + // All solver-specific memory has already been allocated + if (!S) + { + SUNDIALS_DEBUG_ERROR("Linear solver is NULL\n"); + return SUNLS_MEM_NULL; + } + + LS_LASTFLAG(S) = SUNLS_SUCCESS; + return SUNLS_SUCCESS; +} + + +int SUNLinSolSetup_OneMklDense(SUNLinearSolver S, SUNMatrix A) +{ + // Check for valid inputs + if (!S) + { + SUNDIALS_DEBUG_ERROR("Linear solver is NULL\n"); + return SUNLS_MEM_NULL; + } + + if (!A) + { + SUNDIALS_DEBUG_ERROR("Matrix is NULL\n"); + LS_LASTFLAG(S) = SUNLS_MEM_NULL; + return SUNLS_MEM_NULL; + } + + // Ensure that A is a oneMKL dense matrix + if (SUNMatGetID(A) != SUNMATRIX_ONEMKLDENSE) + { + SUNDIALS_DEBUG_ERROR("Matrix is not the oneMKL matrix\n"); + LS_LASTFLAG(S) = SUNLS_ILL_INPUT; + return SUNLS_ILL_INPUT; + } + + // Access A matrix data array + realtype* Adata = SUNMatrix_OneMklDense_Data(A); + if (!Adata) + { + SUNDIALS_DEBUG_ERROR("Matrix data array is NULL\n"); + LS_LASTFLAG(S) = SUNLS_MEM_FAIL; + return SUNLS_MEM_FAIL; + } + + // Access pivots data array + sunindextype* pivots = LS_PIVOTSp(S); + if (!pivots) + { + SUNDIALS_DEBUG_ERROR("Matrix data array is NULL\n"); + LS_LASTFLAG(S) = SUNLS_MEM_FAIL; + return SUNLS_MEM_FAIL; + } + + // Call oneMKL to do LU factorization of A + ::sycl::queue* queue = LS_QUEUE(S); + sunindextype ier = 0; + sunindextype M = SUNMatrix_OneMklDense_BlockRows(A); + sunindextype N = SUNMatrix_OneMklDense_BlockColumns(A); + sunindextype num_blocks = SUNMatrix_OneMklDense_NumBlocks(A); + sunindextype scratch_size = LS_F_SCRATCH_SIZE(S); + realtype* scratchpad = LS_F_SCRATCHp(S); + + if (num_blocks > 1) + { + try + { + getrf_batch(*queue, // device queue + M, // number of block rows + N, // number of block columns + Adata, // matrix data + M, // leading dimension of A + M * N, // stride between A_i + pivots, // array of pivots + M, // stride between P_i + num_blocks, // number of blocks + scratchpad, // scratchpad memory + scratch_size); // scratchpad size + } + catch(oneapi::mkl::lapack::exception const& e) + { + SUNDIALS_DEBUG_ERROR("An exception occured in getrf_batch\n"); + if (e.info()) + { + // An illegal value was providied or the scratch pad is too small + ier = -1; + } + else + { + // The diagonal element of some of U_i is zero + ier = 1; + } + } + } + else + { + try + { + getrf(*queue, // device queue + M, // number of rows + N, // number of columns + Adata, // matrix data + M, // leading dimension of A + pivots, // array of pivots + scratchpad, // scratchpad memory + scratch_size); // scratchpad size + } + catch(oneapi::mkl::lapack::exception const& e) + { + SUNDIALS_DEBUG_ERROR("An exception occured in getrf\n"); + if (e.info()) + { + // An illegal value was providied or the scratch pad is too small + ier = -1; + } + else + { + // The diagonal element of some of U_i is zero + ier = 1; + } + } + } + + if (ier > 0) + { + LS_LASTFLAG(S) = ier; + return SUNLS_LUFACT_FAIL; + } + + if (ier < 0) + { + LS_LASTFLAG(S) = ier; + return SUNLS_PACKAGE_FAIL_UNREC; + } + + LS_LASTFLAG(S) = SUNLS_SUCCESS; + return SUNLS_SUCCESS; +} + + +int SUNLinSolSolve_OneMklDense(SUNLinearSolver S, SUNMatrix A, N_Vector x, + N_Vector b, realtype tol) +{ + // Check for valid inputs + if (!S) + { + SUNDIALS_DEBUG_ERROR("Linear solver is NULL\n"); + return SUNLS_MEM_NULL; + } + + if (!A || !x || !b) + { + SUNDIALS_DEBUG_ERROR("A, x, or b is NULL\n"); + LS_LASTFLAG(S) = SUNLS_MEM_NULL; + return SUNLS_MEM_NULL; + } + + // Ensure that A is a onemkl dense matrix + if (SUNMatGetID(A) != SUNMATRIX_ONEMKLDENSE) + { + SUNDIALS_DEBUG_ERROR("Matrix is not the oneMKL matrix\n"); + LS_LASTFLAG(S) = SUNLS_ILL_INPUT; + return SUNLS_ILL_INPUT; + } + + // Copy b into x + N_VScale(RCONST(1.0), b, x); + + // Access x vector data array + realtype* xdata = N_VGetDeviceArrayPointer(x); + if (!xdata) + { + SUNDIALS_DEBUG_ERROR("Vector data array is NULL\n"); + LS_LASTFLAG(S) = SUNLS_MEM_FAIL; + return SUNLS_MEM_FAIL; + } + + // Access A matrix data array + realtype* Adata = SUNMatrix_OneMklDense_Data(A); + if (!Adata) + { + SUNDIALS_DEBUG_ERROR("Matrix data array is NULL\n"); + LS_LASTFLAG(S) = SUNLS_MEM_FAIL; + return SUNLS_MEM_FAIL; + } + + // Access pivots data array + sunindextype* pivots = LS_PIVOTSp(S); + if (!pivots) + { + SUNDIALS_DEBUG_ERROR("Matrix data array is NULL\n"); + LS_LASTFLAG(S) = SUNLS_MEM_FAIL; + return SUNLS_MEM_FAIL; + } + + // Call oneMKL to solve the linear system + sunindextype ier = 0; + ::sycl::queue* queue = LS_QUEUE(S); + sunindextype M = SUNMatrix_OneMklDense_BlockRows(A); + sunindextype N = SUNMatrix_OneMklDense_BlockColumns(A); + sunindextype num_blocks = SUNMatrix_OneMklDense_NumBlocks(A); + sunindextype scratch_size = LS_S_SCRATCH_SIZE(S); + realtype* scratchpad = LS_S_SCRATCHp(S); + + if (num_blocks > 1) + { +#ifdef SUNDIALS_ONEMKL_USE_GETRS_BATCHED + try + { + getrs_batch(*queue, // device queue + oneapi::mkl::transpose::nontrans, + M, // number of rows + 1, // number of right-hand sides + Adata, // factorized matrix data + M, // leading dimension of A_i + M * N, // stride between A_i + pivots, // array of pivots + M, // stride between pivots + xdata, // right-hand side data + M, // leading dimension of B_i + M, // stride between B_i + num_blocks, // number of blocks + scratchpad, // scratchpad memory + scratch_size); // scratchpad size + } + catch(oneapi::mkl::lapack::exception const& e) + { + SUNDIALS_DEBUG_ERROR("An exception occured in getrs_batch\n"); + ier = -1; + } +#else + try + { + for (sunindextype i = 0; i < num_blocks; i++) + { + getrs(*queue, // device queue + oneapi::mkl::transpose::nontrans, + M, // number of rows + 1, // number of right-hand sides + Adata + i * M * N, // factorized matrix data + M, // leading dimension of A + pivots, // array of pivots + xdata + i * M, // right-hand side data + M, // leading dimension of B_i + scratchpad, // scratchpad memory + scratch_size); // scratchpad size + } + } + catch(oneapi::mkl::lapack::exception const& e) + { + SUNDIALS_DEBUG_ERROR("An exception occured in getrs\n"); + ier = -1; + } +#endif + } + else + { + try + { + getrs(*queue, // device queue + oneapi::mkl::transpose::nontrans, + M, // number of rows + 1, // number of right-hand sides + Adata, // factorized matrix data + M, // leading dimension of A + pivots, // array of pivots + xdata, // right-hand side data + M, // leading dimension of B_i + scratchpad, // scratchpad memory + scratch_size); // scratchpad size + } + catch(oneapi::mkl::lapack::exception const& e) + { + SUNDIALS_DEBUG_ERROR("An exception occured in getrs\n"); + ier = -1; + } + } + + if (ier < 0) + { + LS_LASTFLAG(S) = ier; + return SUNLS_PACKAGE_FAIL_UNREC; + } + + LS_LASTFLAG(S) = SUNLS_SUCCESS; + return SUNLS_SUCCESS; +} + + +sunindextype SUNLinSolLastFlag_OneMklDense(SUNLinearSolver S) +{ + // return the stored 'last_flag' value + if (!S) + { + SUNDIALS_DEBUG_ERROR("Linear solver is NULL\n"); + return SUNLS_MEM_NULL; + } + + return LS_LASTFLAG(S); +} + + +int SUNLinSolSpace_OneMklDense(SUNLinearSolver S, + long int *lenrwLS, + long int *leniwLS) +{ + if (!S) + { + SUNDIALS_DEBUG_ERROR("Linear solver is NULL\n"); + return SUNLS_MEM_NULL; + } + + *lenrwLS = 0; + *leniwLS = 2 + LS_CONTENT(S)->rows; + + LS_LASTFLAG(S) = SUNLS_SUCCESS; + return SUNLS_SUCCESS; +} + + +int SUNLinSolFree_OneMklDense(SUNLinearSolver S) +{ + // return if S is already free + if (!S) return SUNLS_SUCCESS; + + // delete items from contents, then delete generic structure + if (S->content) + { + // Pivots memory + if (LS_PIVOTS(S)) + { + SUNMemoryHelper_Dealloc(LS_MEM_HELPER(S), LS_PIVOTS(S), LS_QUEUE(S)); + } + + // Factorization scrach memory + if (LS_F_SCRATCH(S)) + { + SUNMemoryHelper_Dealloc(LS_MEM_HELPER(S), LS_F_SCRATCH(S), LS_QUEUE(S)); + } + LS_F_SCRATCH_SIZE(S) = 0; + + // Solve scratch memory + if (LS_S_SCRATCH(S)) + { + SUNMemoryHelper_Dealloc(LS_MEM_HELPER(S), LS_S_SCRATCH(S), LS_QUEUE(S)); + } + LS_S_SCRATCH_SIZE(S) = 0; + } + + SUNLinSolFreeEmpty(S); + S = NULL; + + return SUNLS_SUCCESS; +} diff --git a/src/lib/sunlinsol/pcg/sunlinsol_pcg.c b/src/lib/sunlinsol/pcg/sunlinsol_pcg.c new file mode 100644 index 0000000..9f3f330 --- /dev/null +++ b/src/lib/sunlinsol/pcg/sunlinsol_pcg.c @@ -0,0 +1,609 @@ +/* ----------------------------------------------------------------- + * Programmer(s): Daniel Reynolds, Ashley Crawford @ SMU + * Based on sundials_pcg.c code, written by Daniel Reynolds @ SMU + * ----------------------------------------------------------------- + * SUNDIALS Copyright Start + * Copyright (c) 2002-2022, Lawrence Livermore National Security + * and Southern Methodist University. + * All rights reserved. + * + * See the top-level LICENSE and NOTICE files for details. + * + * SPDX-License-Identifier: BSD-3-Clause + * SUNDIALS Copyright End + * ----------------------------------------------------------------- + * This is the implementation file for the PCG implementation of + * the SUNLINSOL package. + * -----------------------------------------------------------------*/ + +#include +#include + +#include +#include + +#include "sundials_context_impl.h" +#include "sundials_logger_impl.h" + +#define ZERO RCONST(0.0) +#define ONE RCONST(1.0) + +/* + * ----------------------------------------------------------------- + * PCG solver structure accessibility macros: + * ----------------------------------------------------------------- + */ + +#define PCG_CONTENT(S) ( (SUNLinearSolverContent_PCG)(S->content) ) +#define PRETYPE(S) ( PCG_CONTENT(S)->pretype ) +#define LASTFLAG(S) ( PCG_CONTENT(S)->last_flag ) + +/* + * ----------------------------------------------------------------- + * exported functions + * ----------------------------------------------------------------- + */ + +/* ---------------------------------------------------------------------------- + * Function to create a new PCG linear solver + */ + +SUNLinearSolver SUNLinSol_PCG(N_Vector y, int pretype, int maxl, SUNContext sunctx) +{ + SUNLinearSolver S; + SUNLinearSolverContent_PCG content; + + /* check for legal pretype and maxl values; if illegal use defaults */ + if ((pretype != SUN_PREC_NONE) && (pretype != SUN_PREC_LEFT) && + (pretype != SUN_PREC_RIGHT) && (pretype != SUN_PREC_BOTH)) + pretype = SUN_PREC_NONE; + if (maxl <= 0) + maxl = SUNPCG_MAXL_DEFAULT; + + /* Create linear solver */ + S = NULL; + S = SUNLinSolNewEmpty(sunctx); + if (S == NULL) return(NULL); + + /* Attach operations */ + S->ops->gettype = SUNLinSolGetType_PCG; + S->ops->getid = SUNLinSolGetID_PCG; + S->ops->setatimes = SUNLinSolSetATimes_PCG; + S->ops->setpreconditioner = SUNLinSolSetPreconditioner_PCG; + S->ops->setscalingvectors = SUNLinSolSetScalingVectors_PCG; + S->ops->setzeroguess = SUNLinSolSetZeroGuess_PCG; + S->ops->initialize = SUNLinSolInitialize_PCG; + S->ops->setup = SUNLinSolSetup_PCG; + S->ops->solve = SUNLinSolSolve_PCG; + S->ops->numiters = SUNLinSolNumIters_PCG; + S->ops->resnorm = SUNLinSolResNorm_PCG; + S->ops->resid = SUNLinSolResid_PCG; + S->ops->lastflag = SUNLinSolLastFlag_PCG; + S->ops->space = SUNLinSolSpace_PCG; + S->ops->free = SUNLinSolFree_PCG; + + /* Create content */ + content = NULL; + content = (SUNLinearSolverContent_PCG) malloc(sizeof *content); + if (content == NULL) { SUNLinSolFree(S); return(NULL); } + + /* Attach content */ + S->content = content; + + /* Fill content */ + content->last_flag = 0; + content->maxl = maxl; + content->pretype = pretype; + content->zeroguess = SUNFALSE; + content->numiters = 0; + content->resnorm = ZERO; + content->r = NULL; + content->p = NULL; + content->z = NULL; + content->Ap = NULL; + content->s = NULL; + content->ATimes = NULL; + content->ATData = NULL; + content->Psetup = NULL; + content->Psolve = NULL; + content->PData = NULL; + content->print_level = 0; + content->info_file = stdout; +#if SUNDIALS_LOGGING_LEVEL >= SUNDIALS_LOGGING_INFO + content->info_file = (sunctx->logger->info_fp) ? sunctx->logger->info_fp : stdout; +#endif + + /* Allocate content */ + content->r = N_VClone(y); + if (content->r == NULL) { SUNLinSolFree(S); return NULL; } + + content->p = N_VClone(y); + if (content->p == NULL) { SUNLinSolFree(S); return NULL; } + + content->z = N_VClone(y); + if (content->z == NULL) { SUNLinSolFree(S); return NULL; } + + content->Ap = N_VClone(y); + if (content->Ap == NULL) { SUNLinSolFree(S); return NULL; } + + return(S); +} + + +/* ---------------------------------------------------------------------------- + * Function to set the type of preconditioning for PCG to use + */ + +int SUNLinSol_PCGSetPrecType(SUNLinearSolver S, int pretype) +{ + /* Check for legal pretype */ + if ((pretype != SUN_PREC_NONE) && (pretype != SUN_PREC_LEFT) && + (pretype != SUN_PREC_RIGHT) && (pretype != SUN_PREC_BOTH)) { + return(SUNLS_ILL_INPUT); + } + + /* Check for non-NULL SUNLinearSolver */ + if (S == NULL) return(SUNLS_MEM_NULL); + + /* Set pretype */ + PRETYPE(S) = pretype; + return(SUNLS_SUCCESS); +} + + +/* ---------------------------------------------------------------------------- + * Function to set the maximum number of iterations for PCG to use + */ + +int SUNLinSol_PCGSetMaxl(SUNLinearSolver S, int maxl) +{ + /* Check for non-NULL SUNLinearSolver */ + if (S == NULL) return(SUNLS_MEM_NULL); + + /* Check for legal number of iters */ + if (maxl <= 0) + maxl = SUNPCG_MAXL_DEFAULT; + + /* Set max iters */ + PCG_CONTENT(S)->maxl = maxl; + return(SUNLS_SUCCESS); +} + + +/* + * ----------------------------------------------------------------- + * implementation of linear solver operations + * ----------------------------------------------------------------- + */ + +SUNLinearSolver_Type SUNLinSolGetType_PCG(SUNLinearSolver S) +{ + return(SUNLINEARSOLVER_ITERATIVE); +} + + +SUNLinearSolver_ID SUNLinSolGetID_PCG(SUNLinearSolver S) +{ + return(SUNLINEARSOLVER_PCG); +} + + +int SUNLinSolInitialize_PCG(SUNLinearSolver S) +{ + /* ensure valid options */ + if (S == NULL) return(SUNLS_MEM_NULL); + + if (PCG_CONTENT(S)->maxl <= 0) + PCG_CONTENT(S)->maxl = SUNPCG_MAXL_DEFAULT; + + if (PCG_CONTENT(S)->ATimes == NULL) { + LASTFLAG(S) = SUNLS_ATIMES_NULL; + return(LASTFLAG(S)); + } + + if ( (PRETYPE(S) != SUN_PREC_LEFT) && + (PRETYPE(S) != SUN_PREC_RIGHT) && + (PRETYPE(S) != SUN_PREC_BOTH) ) + PRETYPE(S) = SUN_PREC_NONE; + + if ((PRETYPE(S) != SUN_PREC_NONE) && (PCG_CONTENT(S)->Psolve == NULL)) { + LASTFLAG(S) = SUNLS_PSOLVE_NULL; + return(LASTFLAG(S)); + } + + /* no additional memory to allocate */ + + /* return with success */ + LASTFLAG(S) = SUNLS_SUCCESS; + return(LASTFLAG(S)); +} + + +int SUNLinSolSetATimes_PCG(SUNLinearSolver S, void* ATData, + SUNATimesFn ATimes) +{ + /* set function pointers to integrator-supplied ATimes routine + and data, and return with success */ + if (S == NULL) return(SUNLS_MEM_NULL); + PCG_CONTENT(S)->ATimes = ATimes; + PCG_CONTENT(S)->ATData = ATData; + LASTFLAG(S) = SUNLS_SUCCESS; + return(LASTFLAG(S)); +} + + +int SUNLinSolSetPreconditioner_PCG(SUNLinearSolver S, void* PData, + SUNPSetupFn Psetup, SUNPSolveFn Psolve) +{ + /* set function pointers to integrator-supplied Psetup and PSolve + routines and data, and return with success */ + if (S == NULL) return(SUNLS_MEM_NULL); + PCG_CONTENT(S)->Psetup = Psetup; + PCG_CONTENT(S)->Psolve = Psolve; + PCG_CONTENT(S)->PData = PData; + LASTFLAG(S) = SUNLS_SUCCESS; + return(LASTFLAG(S)); +} + + +int SUNLinSolSetScalingVectors_PCG(SUNLinearSolver S, N_Vector s, + N_Vector nul) +{ + /* set N_Vector pointer to integrator-supplied scaling vector + (only use the first one), and return with success */ + if (S == NULL) return(SUNLS_MEM_NULL); + PCG_CONTENT(S)->s = s; + LASTFLAG(S) = SUNLS_SUCCESS; + return(LASTFLAG(S)); +} + + +int SUNLinSolSetZeroGuess_PCG(SUNLinearSolver S, booleantype onoff) +{ + /* set flag indicating a zero initial guess */ + if (S == NULL) return(SUNLS_MEM_NULL); + PCG_CONTENT(S)->zeroguess = onoff; + LASTFLAG(S) = SUNLS_SUCCESS; + return(LASTFLAG(S)); +} + + +int SUNLinSolSetup_PCG(SUNLinearSolver S, SUNMatrix nul) +{ + int ier; + SUNPSetupFn Psetup; + void* PData; + + /* Set shortcuts to PCG memory structures */ + if (S == NULL) return(SUNLS_MEM_NULL); + Psetup = PCG_CONTENT(S)->Psetup; + PData = PCG_CONTENT(S)->PData; + + /* no solver-specific setup is required, but if user-supplied + Psetup routine exists, call that here */ + if (Psetup != NULL) { + ier = Psetup(PData); + if (ier != 0) { + LASTFLAG(S) = (ier < 0) ? + SUNLS_PSET_FAIL_UNREC : SUNLS_PSET_FAIL_REC; + return(LASTFLAG(S)); + } + } + + /* return with success */ + LASTFLAG(S) = SUNLS_SUCCESS; + return(LASTFLAG(S)); +} + + +int SUNLinSolSolve_PCG(SUNLinearSolver S, SUNMatrix nul, N_Vector x, + N_Vector b, realtype delta) +{ + /* local data and shortcut variables */ + realtype alpha, beta, r0_norm, rho, rz, rz_old; + N_Vector r, p, z, Ap, w; + booleantype UsePrec, UseScaling, converged; + booleantype *zeroguess; + int l, l_max, pretype, ier; + void *A_data, *P_data; + SUNATimesFn atimes; + SUNPSolveFn psolve; + realtype *res_norm; + int *nli; + + /* Make local shorcuts to solver variables. */ + if (S == NULL) return(SUNLS_MEM_NULL); + l_max = PCG_CONTENT(S)->maxl; + r = PCG_CONTENT(S)->r; + p = PCG_CONTENT(S)->p; + z = PCG_CONTENT(S)->z; + Ap = PCG_CONTENT(S)->Ap; + w = PCG_CONTENT(S)->s; + A_data = PCG_CONTENT(S)->ATData; + P_data = PCG_CONTENT(S)->PData; + atimes = PCG_CONTENT(S)->ATimes; + psolve = PCG_CONTENT(S)->Psolve; + pretype = PCG_CONTENT(S)->pretype; + zeroguess = &(PCG_CONTENT(S)->zeroguess); + nli = &(PCG_CONTENT(S)->numiters); + res_norm = &(PCG_CONTENT(S)->resnorm); + + /* Initialize counters and convergence flag */ + *nli = 0; + converged = SUNFALSE; + + /* set booleantype flags for internal solver options */ + UsePrec = ( (pretype == SUN_PREC_BOTH) || + (pretype == SUN_PREC_LEFT) || + (pretype == SUN_PREC_RIGHT) ); + UseScaling = (w != NULL); + +#if SUNDIALS_LOGGING_LEVEL >= SUNDIALS_LOGGING_INFO + if (PCG_CONTENT(S)->print_level && PCG_CONTENT(S)->info_file + && (PCG_CONTENT(S)->info_file != S->sunctx->logger->info_fp)) + fprintf(PCG_CONTENT(S)->info_file, "SUNLINSOL_PCG:\n"); +#endif + + /* Check if Atimes function has been set */ + if (atimes == NULL) { + *zeroguess = SUNFALSE; + LASTFLAG(S) = SUNLS_ATIMES_NULL; + return(LASTFLAG(S)); + } + + /* If preconditioning, check if psolve has been set */ + if (UsePrec && psolve == NULL) { + *zeroguess = SUNFALSE; + LASTFLAG(S) = SUNLS_PSOLVE_NULL; + return(LASTFLAG(S)); + } + + /* Set r to initial residual r_0 = b - A*x_0 */ + if (*zeroguess) { + N_VScale(ONE, b, r); + } else { + ier = atimes(A_data, x, r); + if (ier != 0) { + *zeroguess = SUNFALSE; + LASTFLAG(S) = (ier < 0) ? + SUNLS_ATIMES_FAIL_UNREC : SUNLS_ATIMES_FAIL_REC; + return(LASTFLAG(S)); + } + N_VLinearSum(ONE, b, -ONE, r, r); + } + + /* Set rho to scaled L2 norm of r, and return if small */ + if (UseScaling) N_VProd(r, w, Ap); + else N_VScale(ONE, r, Ap); + *res_norm = r0_norm = rho = SUNRsqrt(N_VDotProd(Ap, Ap)); + +#if SUNDIALS_LOGGING_LEVEL >= SUNDIALS_LOGGING_INFO + /* print initial residual */ + if (PCG_CONTENT(S)->print_level && PCG_CONTENT(S)->info_file + && (PCG_CONTENT(S)->info_file != S->sunctx->logger->info_fp)) + { + fprintf(PCG_CONTENT(S)->info_file, + SUNLS_MSG_RESIDUAL, + (long int) 0, *res_norm); + } + SUNLogger_QueueMsg(S->sunctx->logger, SUN_LOGLEVEL_INFO, + "SUNLinSolSolve_PCG", "initial-residual", + "nli = %li, resnorm = %.16g", (long int) 0, *res_norm); +#endif + + if (rho <= delta) { + *zeroguess = SUNFALSE; + LASTFLAG(S) = SUNLS_SUCCESS; + return(LASTFLAG(S)); + } + + /* Apply preconditioner and b-scaling to r = r_0 */ + if (UsePrec) { + ier = psolve(P_data, r, z, delta, SUN_PREC_LEFT); /* z = P^{-1}r */ + if (ier != 0) { + *zeroguess = SUNFALSE; + LASTFLAG(S) = (ier < 0) ? + SUNLS_PSOLVE_FAIL_UNREC : SUNLS_PSOLVE_FAIL_REC; + return(LASTFLAG(S)); + } + } + else N_VScale(ONE, r, z); + + /* Initialize rz to */ + rz = N_VDotProd(r, z); + + /* Copy z to p */ + N_VScale(ONE, z, p); + + /* Begin main iteration loop */ + for(l=0; l / */ + alpha = rz / N_VDotProd(Ap, p); + + /* Update x = x + alpha*p */ + if (l == 0 && *zeroguess) + N_VScale(alpha, p, x); + else + N_VLinearSum(ONE, x, alpha, p, x); + + /* Update r = r - alpha*Ap */ + N_VLinearSum(ONE, r, -alpha, Ap, r); + + /* Set rho and check convergence */ + if (UseScaling) N_VProd(r, w, Ap); + else N_VScale(ONE, r, Ap); + *res_norm = rho = SUNRsqrt(N_VDotProd(Ap, Ap)); + +#if SUNDIALS_LOGGING_LEVEL >= SUNDIALS_LOGGING_INFO + /* print current iteration number and the residual */ + if (PCG_CONTENT(S)->print_level && PCG_CONTENT(S)->info_file + && (PCG_CONTENT(S)->info_file != S->sunctx->logger->info_fp)) + { + fprintf(PCG_CONTENT(S)->info_file, + SUNLS_MSG_RESIDUAL, + (long int) *nli, *res_norm); + } + SUNLogger_QueueMsg(S->sunctx->logger, SUN_LOGLEVEL_INFO, + "SUNLinSolSolve_PCG", "iterate-residual", + "nli = %li, resnorm = %.16g", (long int) 0, *res_norm); +#endif + + if (rho <= delta) { + converged = SUNTRUE; + break; + } + + /* Exit early on last iteration */ + if (l == l_max - 1) break; + + /* Apply preconditioner: z = P^{-1}*r */ + if (UsePrec) { + ier = psolve(P_data, r, z, delta, SUN_PREC_LEFT); + if (ier != 0) { + *zeroguess = SUNFALSE; + LASTFLAG(S) = (ier < 0) ? + SUNLS_PSOLVE_FAIL_UNREC : SUNLS_PSOLVE_FAIL_REC; + return(LASTFLAG(S)); + } + } + else N_VScale(ONE, r, z); + + /* update rz */ + rz_old = rz; + rz = N_VDotProd(r, z); + + /* Calculate beta = / */ + beta = rz / rz_old; + + /* Update p = z + beta*p */ + N_VLinearSum(ONE, z, beta, p, p); + } + + /* Main loop finished, return with result */ + *zeroguess = SUNFALSE; + if (converged == SUNTRUE) { + LASTFLAG(S) = SUNLS_SUCCESS; + } else if (rho < r0_norm) { + LASTFLAG(S) = SUNLS_RES_REDUCED; + } else { + LASTFLAG(S) = SUNLS_CONV_FAIL; + } + return(LASTFLAG(S)); +} + + + + +int SUNLinSolNumIters_PCG(SUNLinearSolver S) +{ + /* return the stored 'numiters' value */ + if (S == NULL) return(-1); + return (PCG_CONTENT(S)->numiters); +} + + +realtype SUNLinSolResNorm_PCG(SUNLinearSolver S) +{ + /* return the stored 'resnorm' value */ + if (S == NULL) return(-ONE); + return (PCG_CONTENT(S)->resnorm); +} + + +N_Vector SUNLinSolResid_PCG(SUNLinearSolver S) +{ + /* return the stored 'r' vector */ + return (PCG_CONTENT(S)->r); +} + + +sunindextype SUNLinSolLastFlag_PCG(SUNLinearSolver S) +{ + /* return the stored 'last_flag' value */ + if (S == NULL) return(-1); + return (LASTFLAG(S)); +} + + +int SUNLinSolSpace_PCG(SUNLinearSolver S, + long int *lenrwLS, + long int *leniwLS) +{ + sunindextype liw1, lrw1; + N_VSpace(PCG_CONTENT(S)->r, &lrw1, &liw1); + *lenrwLS = 1 + lrw1*4; + *leniwLS = 4 + liw1*4; + return(SUNLS_SUCCESS); +} + +int SUNLinSolFree_PCG(SUNLinearSolver S) +{ + if (S == NULL) return(SUNLS_SUCCESS); + + if (S->content) { + /* delete items from within the content structure */ + if (PCG_CONTENT(S)->r) { + N_VDestroy(PCG_CONTENT(S)->r); + PCG_CONTENT(S)->r = NULL; + } + if (PCG_CONTENT(S)->p) { + N_VDestroy(PCG_CONTENT(S)->p); + PCG_CONTENT(S)->p = NULL; + } + if (PCG_CONTENT(S)->z) { + N_VDestroy(PCG_CONTENT(S)->z); + PCG_CONTENT(S)->z = NULL; + } + if (PCG_CONTENT(S)->Ap) { + N_VDestroy(PCG_CONTENT(S)->Ap); + PCG_CONTENT(S)->Ap = NULL; + } + free(S->content); S->content = NULL; + } + if (S->ops) { free(S->ops); S->ops = NULL; } + free(S); S = NULL; + return(SUNLS_SUCCESS); +} + + +int SUNLinSolSetInfoFile_PCG(SUNLinearSolver S, + FILE* info_file) +{ + /* check that the linear solver is non-null */ + if (S == NULL) + return(SUNLS_MEM_NULL); + + PCG_CONTENT(S)->info_file = info_file; + + return(SUNLS_SUCCESS); +} + + +int SUNLinSolSetPrintLevel_PCG(SUNLinearSolver S, + int print_level) +{ + /* check that the linear solver is non-null */ + if (S == NULL) + return(SUNLS_MEM_NULL); + + /* check for valid print level */ + if (print_level < 0 || print_level > 1) + return(SUNLS_ILL_INPUT); + + PCG_CONTENT(S)->print_level = print_level; + + return(SUNLS_SUCCESS); +} diff --git a/src/lib/sunlinsol/spbcgs/sunlinsol_spbcgs.c b/src/lib/sunlinsol/spbcgs/sunlinsol_spbcgs.c new file mode 100644 index 0000000..9999e4e --- /dev/null +++ b/src/lib/sunlinsol/spbcgs/sunlinsol_spbcgs.c @@ -0,0 +1,811 @@ +/* ----------------------------------------------------------------- + * Programmer(s): Daniel Reynolds @ SMU + * Based on sundials_spbcgs.c code, written by Peter Brown and + * Aaron Collier @ LLNL + * ----------------------------------------------------------------- + * SUNDIALS Copyright Start + * Copyright (c) 2002-2022, Lawrence Livermore National Security + * and Southern Methodist University. + * All rights reserved. + * + * See the top-level LICENSE and NOTICE files for details. + * + * SPDX-License-Identifier: BSD-3-Clause + * SUNDIALS Copyright End + * ----------------------------------------------------------------- + * This is the implementation file for the SPBCGS implementation of + * the SUNLINSOL package. + * -----------------------------------------------------------------*/ + +#include +#include + +#include +#include + +#include "sundials_context_impl.h" +#include "sundials_logger_impl.h" + +#define ZERO RCONST(0.0) +#define ONE RCONST(1.0) + +/* + * ----------------------------------------------------------------- + * SPBCGS solver structure accessibility macros: + * ----------------------------------------------------------------- + */ + +#define SPBCGS_CONTENT(S) ( (SUNLinearSolverContent_SPBCGS)(S->content) ) +#define PRETYPE(S) ( SPBCGS_CONTENT(S)->pretype ) +#define LASTFLAG(S) ( SPBCGS_CONTENT(S)->last_flag ) + +/* + * ----------------------------------------------------------------- + * exported functions + * ----------------------------------------------------------------- + */ + +/* ---------------------------------------------------------------------------- + * Function to create a new SPBCGS linear solver + */ + +SUNLinearSolver SUNLinSol_SPBCGS(N_Vector y, int pretype, int maxl, SUNContext sunctx) +{ + SUNLinearSolver S; + SUNLinearSolverContent_SPBCGS content; + + /* check for legal pretype and maxl values; if illegal use defaults */ + if ((pretype != SUN_PREC_NONE) && (pretype != SUN_PREC_LEFT) && + (pretype != SUN_PREC_RIGHT) && (pretype != SUN_PREC_BOTH)) + pretype = SUN_PREC_NONE; + if (maxl <= 0) + maxl = SUNSPBCGS_MAXL_DEFAULT; + + /* check that the supplied N_Vector supports all requisite operations */ + if ( (y->ops->nvclone == NULL) || (y->ops->nvdestroy == NULL) || + (y->ops->nvlinearsum == NULL) || (y->ops->nvprod == NULL) || + (y->ops->nvdiv == NULL) || (y->ops->nvscale == NULL) || + (y->ops->nvdotprod == NULL) ) + return(NULL); + + /* Create linear solver */ + S = NULL; + S = SUNLinSolNewEmpty(sunctx); + if (S == NULL) return(NULL); + + /* Attach operations */ + S->ops->gettype = SUNLinSolGetType_SPBCGS; + S->ops->getid = SUNLinSolGetID_SPBCGS; + S->ops->setatimes = SUNLinSolSetATimes_SPBCGS; + S->ops->setpreconditioner = SUNLinSolSetPreconditioner_SPBCGS; + S->ops->setscalingvectors = SUNLinSolSetScalingVectors_SPBCGS; + S->ops->setzeroguess = SUNLinSolSetZeroGuess_SPBCGS; + S->ops->initialize = SUNLinSolInitialize_SPBCGS; + S->ops->setup = SUNLinSolSetup_SPBCGS; + S->ops->solve = SUNLinSolSolve_SPBCGS; + S->ops->numiters = SUNLinSolNumIters_SPBCGS; + S->ops->resnorm = SUNLinSolResNorm_SPBCGS; + S->ops->resid = SUNLinSolResid_SPBCGS; + S->ops->lastflag = SUNLinSolLastFlag_SPBCGS; + S->ops->space = SUNLinSolSpace_SPBCGS; + S->ops->free = SUNLinSolFree_SPBCGS; + + /* Create content */ + content = NULL; + content = (SUNLinearSolverContent_SPBCGS) malloc(sizeof *content); + if (content == NULL) { SUNLinSolFree(S); return(NULL); } + + /* Attach content */ + S->content = content; + + /* Fill content */ + content->last_flag = 0; + content->maxl = maxl; + content->pretype = pretype; + content->zeroguess = SUNFALSE; + content->numiters = 0; + content->resnorm = ZERO; + content->r_star = NULL; + content->r = NULL; + content->p = NULL; + content->q = NULL; + content->u = NULL; + content->Ap = NULL; + content->vtemp = NULL; + content->s1 = NULL; + content->s2 = NULL; + content->ATimes = NULL; + content->ATData = NULL; + content->Psetup = NULL; + content->Psolve = NULL; + content->PData = NULL; + content->print_level = 0; + content->info_file = stdout; +#if SUNDIALS_LOGGING_LEVEL >= SUNDIALS_LOGGING_INFO + content->info_file = (sunctx->logger->info_fp) ? sunctx->logger->info_fp : stdout; +#endif + + /* Allocate content */ + content->r_star = N_VClone(y); + if (content->r_star == NULL) { SUNLinSolFree(S); return(NULL); } + + content->r = N_VClone(y); + if (content->r == NULL) { SUNLinSolFree(S); return(NULL); } + + content->p = N_VClone(y); + if (content->p == NULL) { SUNLinSolFree(S); return(NULL); } + + content->q = N_VClone(y); + if (content->q == NULL) { SUNLinSolFree(S); return(NULL); } + + content->u = N_VClone(y); + if (content->u == NULL) { SUNLinSolFree(S); return(NULL); } + + content->Ap = N_VClone(y); + if (content->Ap == NULL) { SUNLinSolFree(S); return(NULL); } + + content->vtemp = N_VClone(y); + if (content->vtemp == NULL) { SUNLinSolFree(S); return(NULL); } + + return(S); +} + + +/* ---------------------------------------------------------------------------- + * Function to set the type of preconditioning for SPBCGS to use + */ + +int SUNLinSol_SPBCGSSetPrecType(SUNLinearSolver S, int pretype) +{ + /* Check for legal pretype */ + if ((pretype != SUN_PREC_NONE) && (pretype != SUN_PREC_LEFT) && + (pretype != SUN_PREC_RIGHT) && (pretype != SUN_PREC_BOTH)) { + return(SUNLS_ILL_INPUT); + } + + /* Check for non-NULL SUNLinearSolver */ + if (S == NULL) return(SUNLS_MEM_NULL); + + /* Set pretype */ + PRETYPE(S) = pretype; + return(SUNLS_SUCCESS); +} + + +/* ---------------------------------------------------------------------------- + * Function to set the maximum number of iterations for SPBCGS to use + */ + +int SUNLinSol_SPBCGSSetMaxl(SUNLinearSolver S, int maxl) +{ + /* Check for non-NULL SUNLinearSolver */ + if (S == NULL) return(SUNLS_MEM_NULL); + + /* Check for legal pretype */ + if (maxl <= 0) + maxl = SUNSPBCGS_MAXL_DEFAULT; + + /* Set pretype */ + SPBCGS_CONTENT(S)->maxl = maxl; + return(SUNLS_SUCCESS); +} + + +/* + * ----------------------------------------------------------------- + * implementation of linear solver operations + * ----------------------------------------------------------------- + */ + +SUNLinearSolver_Type SUNLinSolGetType_SPBCGS(SUNLinearSolver S) +{ + return(SUNLINEARSOLVER_ITERATIVE); +} + + +SUNLinearSolver_ID SUNLinSolGetID_SPBCGS(SUNLinearSolver S) +{ + return(SUNLINEARSOLVER_SPBCGS); +} + + +int SUNLinSolInitialize_SPBCGS(SUNLinearSolver S) +{ + /* ensure valid options */ + if (S == NULL) return(SUNLS_MEM_NULL); + + if (SPBCGS_CONTENT(S)->maxl <= 0) + SPBCGS_CONTENT(S)->maxl = SUNSPBCGS_MAXL_DEFAULT; + + if (SPBCGS_CONTENT(S)->ATimes == NULL) { + LASTFLAG(S) = SUNLS_ATIMES_NULL; + return(LASTFLAG(S)); + } + + if ( (PRETYPE(S) != SUN_PREC_LEFT) && + (PRETYPE(S) != SUN_PREC_RIGHT) && + (PRETYPE(S) != SUN_PREC_BOTH) ) + PRETYPE(S) = SUN_PREC_NONE; + + if ((PRETYPE(S) != SUN_PREC_NONE) && (SPBCGS_CONTENT(S)->Psolve == NULL)) { + LASTFLAG(S) = SUNLS_PSOLVE_NULL; + return(LASTFLAG(S)); + } + + /* no additional memory to allocate */ + + /* return with success */ + LASTFLAG(S) = SUNLS_SUCCESS; + return(LASTFLAG(S)); +} + + +int SUNLinSolSetATimes_SPBCGS(SUNLinearSolver S, void* ATData, + SUNATimesFn ATimes) +{ + /* set function pointers to integrator-supplied ATimes routine + and data, and return with success */ + if (S == NULL) return(SUNLS_MEM_NULL); + SPBCGS_CONTENT(S)->ATimes = ATimes; + SPBCGS_CONTENT(S)->ATData = ATData; + LASTFLAG(S) = SUNLS_SUCCESS; + return(LASTFLAG(S)); +} + + +int SUNLinSolSetPreconditioner_SPBCGS(SUNLinearSolver S, void* PData, + SUNPSetupFn Psetup, SUNPSolveFn Psolve) +{ + /* set function pointers to integrator-supplied Psetup and PSolve + routines and data, and return with success */ + if (S == NULL) return(SUNLS_MEM_NULL); + SPBCGS_CONTENT(S)->Psetup = Psetup; + SPBCGS_CONTENT(S)->Psolve = Psolve; + SPBCGS_CONTENT(S)->PData = PData; + LASTFLAG(S) = SUNLS_SUCCESS; + return(LASTFLAG(S)); +} + + +int SUNLinSolSetScalingVectors_SPBCGS(SUNLinearSolver S, N_Vector s1, + N_Vector s2) +{ + /* set N_Vector pointers to integrator-supplied scaling vectors, + and return with success */ + if (S == NULL) return(SUNLS_MEM_NULL); + SPBCGS_CONTENT(S)->s1 = s1; + SPBCGS_CONTENT(S)->s2 = s2; + LASTFLAG(S) = SUNLS_SUCCESS; + return(LASTFLAG(S)); +} + + +int SUNLinSolSetZeroGuess_SPBCGS(SUNLinearSolver S, booleantype onoff) +{ + /* set flag indicating a zero initial guess */ + if (S == NULL) return(SUNLS_MEM_NULL); + SPBCGS_CONTENT(S)->zeroguess = onoff; + LASTFLAG(S) = SUNLS_SUCCESS; + return(LASTFLAG(S)); +} + + +int SUNLinSolSetup_SPBCGS(SUNLinearSolver S, SUNMatrix A) +{ + int ier; + SUNPSetupFn Psetup; + void* PData; + + /* Set shortcuts to SPBCGS memory structures */ + if (S == NULL) return(SUNLS_MEM_NULL); + Psetup = SPBCGS_CONTENT(S)->Psetup; + PData = SPBCGS_CONTENT(S)->PData; + + /* no solver-specific setup is required, but if user-supplied + Psetup routine exists, call that here */ + if (Psetup != NULL) { + ier = Psetup(PData); + if (ier != 0) { + LASTFLAG(S) = (ier < 0) ? + SUNLS_PSET_FAIL_UNREC : SUNLS_PSET_FAIL_REC; + return(LASTFLAG(S)); + } + } + + /* return with success */ + LASTFLAG(S) = SUNLS_SUCCESS; + return(LASTFLAG(S)); +} + + +int SUNLinSolSolve_SPBCGS(SUNLinearSolver S, SUNMatrix A, N_Vector x, + N_Vector b, realtype delta) +{ + /* local data and shortcut variables */ + realtype alpha, beta, omega, omega_denom, beta_num, beta_denom, r_norm, rho; + N_Vector r_star, r, p, q, u, Ap, vtemp; + booleantype preOnLeft, preOnRight, scale_x, scale_b, converged; + booleantype *zeroguess; + int l, l_max, ier; + void *A_data, *P_data; + N_Vector sx, sb; + SUNATimesFn atimes; + SUNPSolveFn psolve; + realtype *res_norm; + int *nli; + + /* local variables for fused vector operations */ + realtype cv[3]; + N_Vector Xv[3]; + + /* Make local shorcuts to solver variables. */ + if (S == NULL) return(SUNLS_MEM_NULL); + l_max = SPBCGS_CONTENT(S)->maxl; + r_star = SPBCGS_CONTENT(S)->r_star; + r = SPBCGS_CONTENT(S)->r; + p = SPBCGS_CONTENT(S)->p; + q = SPBCGS_CONTENT(S)->q; + u = SPBCGS_CONTENT(S)->u; + Ap = SPBCGS_CONTENT(S)->Ap; + vtemp = SPBCGS_CONTENT(S)->vtemp; + sb = SPBCGS_CONTENT(S)->s1; + sx = SPBCGS_CONTENT(S)->s2; + A_data = SPBCGS_CONTENT(S)->ATData; + P_data = SPBCGS_CONTENT(S)->PData; + atimes = SPBCGS_CONTENT(S)->ATimes; + psolve = SPBCGS_CONTENT(S)->Psolve; + zeroguess = &(SPBCGS_CONTENT(S)->zeroguess); + nli = &(SPBCGS_CONTENT(S)->numiters); + res_norm = &(SPBCGS_CONTENT(S)->resnorm); + + /* Initialize counters and convergence flag */ + *nli = 0; + converged = SUNFALSE; + + /* set booleantype flags for internal solver options */ + preOnLeft = ( (PRETYPE(S) == SUN_PREC_LEFT) || + (PRETYPE(S) == SUN_PREC_BOTH) ); + preOnRight = ( (PRETYPE(S) == SUN_PREC_RIGHT) || + (PRETYPE(S) == SUN_PREC_BOTH) ); + scale_x = (sx != NULL); + scale_b = (sb != NULL); + + /* Check for unsupported use case */ + if (preOnRight && !(*zeroguess)) { + *zeroguess = SUNFALSE; + LASTFLAG(S) = SUNLS_ILL_INPUT; + return(SUNLS_ILL_INPUT); + } + +#if SUNDIALS_LOGGING_LEVEL >= SUNDIALS_LOGGING_INFO + if (SPBCGS_CONTENT(S)->print_level && SPBCGS_CONTENT(S)->info_file + && (SPBCGS_CONTENT(S)->info_file != S->sunctx->logger->info_fp)) + fprintf(SPBCGS_CONTENT(S)->info_file, "SUNLINSOL_SPBCGS:\n"); +#endif + + /* Check if Atimes function has been set */ + if (atimes == NULL) { + *zeroguess = SUNFALSE; + LASTFLAG(S) = SUNLS_ATIMES_NULL; + return(LASTFLAG(S)); + } + + /* If preconditioning, check if psolve has been set */ + if ((preOnLeft || preOnRight) && psolve == NULL) { + *zeroguess = SUNFALSE; + LASTFLAG(S) = SUNLS_PSOLVE_NULL; + return(LASTFLAG(S)); + } + + /* Set r_star to initial (unscaled) residual r_0 = b - A*x_0 */ + + if (*zeroguess) { + N_VScale(ONE, b, r_star); + } else { + ier = atimes(A_data, x, r_star); + if (ier != 0) { + *zeroguess = SUNFALSE; + LASTFLAG(S) = (ier < 0) ? + SUNLS_ATIMES_FAIL_UNREC : SUNLS_ATIMES_FAIL_REC; + return(LASTFLAG(S)); + } + N_VLinearSum(ONE, b, -ONE, r_star, r_star); + } + + /* Apply left preconditioner and b-scaling to r_star = r_0 */ + + if (preOnLeft) { + ier = psolve(P_data, r_star, r, delta, SUN_PREC_LEFT); + if (ier != 0) { + *zeroguess = SUNFALSE; + LASTFLAG(S) = (ier < 0) ? + SUNLS_PSOLVE_FAIL_UNREC : SUNLS_PSOLVE_FAIL_REC; + return(LASTFLAG(S)); + } + } + else N_VScale(ONE, r_star, r); + + if (scale_b) N_VProd(sb, r, r_star); + else N_VScale(ONE, r, r_star); + + /* Initialize beta_denom to the dot product of r0 with r0 */ + + beta_denom = N_VDotProd(r_star, r_star); + + /* Set r_norm to L2 norm of r_star = sb P1_inv r_0, and + return if small */ + + *res_norm = r_norm = rho = SUNRsqrt(beta_denom); + +#if SUNDIALS_LOGGING_LEVEL >= SUNDIALS_LOGGING_INFO + /* print the initial residual */ + if (SPBCGS_CONTENT(S)->print_level && SPBCGS_CONTENT(S)->info_file + && (SPBCGS_CONTENT(S)->info_file != S->sunctx->logger->info_fp)) + { + fprintf(SPBCGS_CONTENT(S)->info_file, + SUNLS_MSG_RESIDUAL, + (long int) 0, *res_norm); + } + SUNLogger_QueueMsg(S->sunctx->logger, SUN_LOGLEVEL_INFO, + "SUNLinSolSolve_SPBCGS", "initial-residual", + "nli = %li, resnorm = %.16g", (long int) 0, *res_norm); +#endif + + if (r_norm <= delta) { + *zeroguess = SUNFALSE; + LASTFLAG(S) = SUNLS_SUCCESS; + return(LASTFLAG(S)); + } + + /* Copy r_star to r and p */ + + N_VScale(ONE, r_star, r); + N_VScale(ONE, r_star, p); + + /* Set x = sx x if non-zero guess */ + if (scale_x && !(*zeroguess)) N_VProd(sx, x, x); + + /* Begin main iteration loop */ + + for(l = 0; l < l_max; l++) { + + (*nli)++; + + /* Generate Ap = A-tilde p, where A-tilde = sb P1_inv A P2_inv sx_inv */ + + /* Apply x-scaling: vtemp = sx_inv p */ + + if (scale_x) N_VDiv(p, sx, vtemp); + else N_VScale(ONE, p, vtemp); + + /* Apply right preconditioner: vtemp = P2_inv sx_inv p */ + + if (preOnRight) { + N_VScale(ONE, vtemp, Ap); + ier = psolve(P_data, Ap, vtemp, delta, SUN_PREC_RIGHT); + if (ier != 0) { + *zeroguess = SUNFALSE; + LASTFLAG(S) = (ier < 0) ? + SUNLS_PSOLVE_FAIL_UNREC : SUNLS_PSOLVE_FAIL_REC; + return(LASTFLAG(S)); + } + } + + /* Apply A: Ap = A P2_inv sx_inv p */ + + ier = atimes(A_data, vtemp, Ap ); + if (ier != 0) { + *zeroguess = SUNFALSE; + LASTFLAG(S) = (ier < 0) ? + SUNLS_ATIMES_FAIL_UNREC : SUNLS_ATIMES_FAIL_REC; + return(LASTFLAG(S)); + } + + /* Apply left preconditioner: vtemp = P1_inv A P2_inv sx_inv p */ + + if (preOnLeft) { + ier = psolve(P_data, Ap, vtemp, delta, SUN_PREC_LEFT); + if (ier != 0) { + *zeroguess = SUNFALSE; + LASTFLAG(S) = (ier < 0) ? + SUNLS_PSOLVE_FAIL_UNREC : SUNLS_PSOLVE_FAIL_REC; + return(LASTFLAG(S)); + } + } + else N_VScale(ONE, Ap, vtemp); + + /* Apply b-scaling: Ap = sb P1_inv A P2_inv sx_inv p */ + + if (scale_b) N_VProd(sb, vtemp, Ap); + else N_VScale(ONE, vtemp, Ap); + + + /* Calculate alpha = / */ + + alpha = ((beta_denom / N_VDotProd(Ap, r_star))); + + /* Update q = r - alpha*Ap = r - alpha*(sb P1_inv A P2_inv sx_inv p) */ + + N_VLinearSum(ONE, r, -alpha, Ap, q); + + /* Generate u = A-tilde q */ + + /* Apply x-scaling: vtemp = sx_inv q */ + + if (scale_x) N_VDiv(q, sx, vtemp); + else N_VScale(ONE, q, vtemp); + + /* Apply right preconditioner: vtemp = P2_inv sx_inv q */ + + if (preOnRight) { + N_VScale(ONE, vtemp, u); + ier = psolve(P_data, u, vtemp, delta, SUN_PREC_RIGHT); + if (ier != 0) { + *zeroguess = SUNFALSE; + LASTFLAG(S) = (ier < 0) ? + SUNLS_PSOLVE_FAIL_UNREC : SUNLS_PSOLVE_FAIL_REC; + return(LASTFLAG(S)); + } + } + + /* Apply A: u = A P2_inv sx_inv u */ + + ier = atimes(A_data, vtemp, u ); + if (ier != 0) { + *zeroguess = SUNFALSE; + LASTFLAG(S) = (ier < 0) ? + SUNLS_ATIMES_FAIL_UNREC : SUNLS_ATIMES_FAIL_REC; + return(LASTFLAG(S)); + } + + /* Apply left preconditioner: vtemp = P1_inv A P2_inv sx_inv p */ + + if (preOnLeft) { + ier = psolve(P_data, u, vtemp, delta, SUN_PREC_LEFT); + if (ier != 0) { + *zeroguess = SUNFALSE; + LASTFLAG(S) = (ier < 0) ? + SUNLS_PSOLVE_FAIL_UNREC : SUNLS_PSOLVE_FAIL_REC; + return(LASTFLAG(S)); + } + } + else N_VScale(ONE, u, vtemp); + + /* Apply b-scaling: u = sb P1_inv A P2_inv sx_inv u */ + + if (scale_b) N_VProd(sb, vtemp, u); + else N_VScale(ONE, vtemp, u); + + + /* Calculate omega = / */ + + omega_denom = N_VDotProd(u, u); + if (omega_denom == ZERO) omega_denom = ONE; + omega = (N_VDotProd(u, q) / omega_denom); + + /* Update x = x + alpha*p + omega*q */ + if (l == 0 && *zeroguess) { + N_VLinearSum(alpha, p, omega, q, x); + } else { + cv[0] = ONE; + Xv[0] = x; + + cv[1] = alpha; + Xv[1] = p; + + cv[2] = omega; + Xv[2] = q; + + ier = N_VLinearCombination(3, cv, Xv, x); + if (ier != SUNLS_SUCCESS) { + *zeroguess = SUNFALSE; + LASTFLAG(S) = SUNLS_VECTOROP_ERR; + return(SUNLS_VECTOROP_ERR); + } + + } + + /* Update the residual r = q - omega*u */ + + N_VLinearSum(ONE, q, -omega, u, r); + + /* Set rho = norm(r) and check convergence */ + + *res_norm = rho = SUNRsqrt(N_VDotProd(r, r)); + +#if SUNDIALS_LOGGING_LEVEL >= SUNDIALS_LOGGING_INFO + /* print current iteration number and the residual */ + if (SPBCGS_CONTENT(S)->print_level && SPBCGS_CONTENT(S)->info_file + && (SPBCGS_CONTENT(S)->info_file != S->sunctx->logger->info_fp)) + { + fprintf(SPBCGS_CONTENT(S)->info_file, + SUNLS_MSG_RESIDUAL, + (long int) *nli, *res_norm); + } + SUNLogger_QueueMsg(S->sunctx->logger, SUN_LOGLEVEL_INFO, + "SUNLinSolSolve_SPBCGS", "iterate-residual", + "nli = %li, resnorm = %.16g", (long int) 0, *res_norm); +#endif + + if (rho <= delta) { + converged = SUNTRUE; + break; + } + + /* Not yet converged, continue iteration */ + /* Update beta = / * alpha / omega */ + + beta_num = N_VDotProd(r, r_star); + beta = ((beta_num / beta_denom) * (alpha / omega)); + + /* Update p = r + beta*(p - omega*Ap) = beta*p - beta*omega*Ap + r */ + cv[0] = beta; + Xv[0] = p; + + cv[1] = -alpha*(beta_num / beta_denom); + Xv[1] = Ap; + + cv[2] = ONE; + Xv[2] = r; + + ier = N_VLinearCombination(3, cv, Xv, p); + if (ier != SUNLS_SUCCESS) { + *zeroguess = SUNFALSE; + LASTFLAG(S) = SUNLS_VECTOROP_ERR; + return(SUNLS_VECTOROP_ERR); + } + + /* udpate beta_denom for next iteration */ + beta_denom = beta_num; + } + + /* Main loop finished */ + + if ((converged == SUNTRUE) || (rho < r_norm)) { + + /* Apply the x-scaling and right preconditioner: x = P2_inv sx_inv x */ + + if (scale_x) N_VDiv(x, sx, x); + if (preOnRight) { + ier = psolve(P_data, x, vtemp, delta, SUN_PREC_RIGHT); + if (ier != 0) { + *zeroguess = SUNFALSE; + LASTFLAG(S) = (ier < 0) ? + SUNLS_PSOLVE_FAIL_UNREC : SUNLS_PSOLVE_FAIL_REC; + return(LASTFLAG(S)); + } + N_VScale(ONE, vtemp, x); + } + + *zeroguess = SUNFALSE; + if (converged == SUNTRUE) + LASTFLAG(S) = SUNLS_SUCCESS; + else + LASTFLAG(S) = SUNLS_RES_REDUCED; + return(LASTFLAG(S)); + + } + else { + *zeroguess = SUNFALSE; + LASTFLAG(S) = SUNLS_CONV_FAIL; + return(LASTFLAG(S)); + } +} + + +int SUNLinSolNumIters_SPBCGS(SUNLinearSolver S) +{ + /* return the stored 'numiters' value */ + if (S == NULL) return(-1); + return (SPBCGS_CONTENT(S)->numiters); +} + + +realtype SUNLinSolResNorm_SPBCGS(SUNLinearSolver S) +{ + /* return the stored 'resnorm' value */ + if (S == NULL) return(-ONE); + return (SPBCGS_CONTENT(S)->resnorm); +} + + +N_Vector SUNLinSolResid_SPBCGS(SUNLinearSolver S) +{ + /* return the stored 'r' vector */ + return (SPBCGS_CONTENT(S)->r); +} + + +sunindextype SUNLinSolLastFlag_SPBCGS(SUNLinearSolver S) +{ + /* return the stored 'last_flag' value */ + if (S == NULL) return(-1); + return (LASTFLAG(S)); +} + + +int SUNLinSolSpace_SPBCGS(SUNLinearSolver S, + long int *lenrwLS, + long int *leniwLS) +{ + sunindextype liw1, lrw1; + if (SPBCGS_CONTENT(S)->vtemp->ops->nvspace) + N_VSpace(SPBCGS_CONTENT(S)->vtemp, &lrw1, &liw1); + else + lrw1 = liw1 = 0; + *lenrwLS = lrw1*9; + *leniwLS = liw1*9; + return(SUNLS_SUCCESS); +} + + +int SUNLinSolFree_SPBCGS(SUNLinearSolver S) +{ + if (S == NULL) return(SUNLS_SUCCESS); + + if (S->content) { + /* delete items from within the content structure */ + if (SPBCGS_CONTENT(S)->r_star) { + N_VDestroy(SPBCGS_CONTENT(S)->r_star); + SPBCGS_CONTENT(S)->r_star = NULL; + } + if (SPBCGS_CONTENT(S)->r) { + N_VDestroy(SPBCGS_CONTENT(S)->r); + SPBCGS_CONTENT(S)->r = NULL; + } + if (SPBCGS_CONTENT(S)->p) { + N_VDestroy(SPBCGS_CONTENT(S)->p); + SPBCGS_CONTENT(S)->p = NULL; + } + if (SPBCGS_CONTENT(S)->q) { + N_VDestroy(SPBCGS_CONTENT(S)->q); + SPBCGS_CONTENT(S)->q = NULL; + } + if (SPBCGS_CONTENT(S)->u) { + N_VDestroy(SPBCGS_CONTENT(S)->u); + SPBCGS_CONTENT(S)->u = NULL; + } + if (SPBCGS_CONTENT(S)->Ap) { + N_VDestroy(SPBCGS_CONTENT(S)->Ap); + SPBCGS_CONTENT(S)->Ap = NULL; + } + if (SPBCGS_CONTENT(S)->vtemp) { + N_VDestroy(SPBCGS_CONTENT(S)->vtemp); + SPBCGS_CONTENT(S)->vtemp = NULL; + } + free(S->content); S->content = NULL; + } + if (S->ops) { free(S->ops); S->ops = NULL; } + free(S); S = NULL; + return(SUNLS_SUCCESS); +} + + +int SUNLinSolSetInfoFile_SPBCGS(SUNLinearSolver S, + FILE* info_file) +{ + /* check that the linear solver is non-null */ + if (S == NULL) + return(SUNLS_MEM_NULL); + + SPBCGS_CONTENT(S)->info_file = info_file; + + return(SUNLS_SUCCESS); +} + + +int SUNLinSolSetPrintLevel_SPBCGS(SUNLinearSolver S, + int print_level) +{ + /* check that the linear solver is non-null */ + if (S == NULL) + return(SUNLS_MEM_NULL); + + /* check for valid print level */ + if (print_level < 0 || print_level > 1) + return(SUNLS_ILL_INPUT); + + SPBCGS_CONTENT(S)->print_level = print_level; + + return(SUNLS_SUCCESS); +} diff --git a/src/lib/sunlinsol/spfgmr/sunlinsol_spfgmr.c b/src/lib/sunlinsol/spfgmr/sunlinsol_spfgmr.c new file mode 100644 index 0000000..1cccb07 --- /dev/null +++ b/src/lib/sunlinsol/spfgmr/sunlinsol_spfgmr.c @@ -0,0 +1,851 @@ +/* ----------------------------------------------------------------- + * Programmer(s): Daniel Reynolds @ SMU + * Based on sundials_spfgmr.c code, written by Daniel R. Reynolds + * and Hilari C. Tiedeman @ SMU + * ----------------------------------------------------------------- + * SUNDIALS Copyright Start + * Copyright (c) 2002-2022, Lawrence Livermore National Security + * and Southern Methodist University. + * All rights reserved. + * + * See the top-level LICENSE and NOTICE files for details. + * + * SPDX-License-Identifier: BSD-3-Clause + * SUNDIALS Copyright End + * ----------------------------------------------------------------- + * This is the implementation file for the SPFGMR implementation of + * the SUNLINSOL package. + * -----------------------------------------------------------------*/ + +#include +#include + +#include +#include + +#include "sundials_context_impl.h" +#include "sundials_logger_impl.h" + +#define ZERO RCONST(0.0) +#define ONE RCONST(1.0) + +/* + * ----------------------------------------------------------------- + * SPFGMR solver structure accessibility macros: + * ----------------------------------------------------------------- + */ + +#define SPFGMR_CONTENT(S) ( (SUNLinearSolverContent_SPFGMR)(S->content) ) +#define LASTFLAG(S) ( SPFGMR_CONTENT(S)->last_flag ) + +/* + * ----------------------------------------------------------------- + * exported functions + * ----------------------------------------------------------------- + */ + +/* ---------------------------------------------------------------------------- + * Function to create a new SPFGMR linear solver + */ + +SUNLinearSolver SUNLinSol_SPFGMR(N_Vector y, int pretype, int maxl, SUNContext sunctx) +{ + SUNLinearSolver S; + SUNLinearSolverContent_SPFGMR content; + + /* set preconditioning flag (enabling any preconditioner implies right + preconditioning, since SPFGMR does not support left preconditioning) */ + pretype = ( (pretype == SUN_PREC_LEFT) || + (pretype == SUN_PREC_RIGHT) || + (pretype == SUN_PREC_BOTH) ) ? SUN_PREC_RIGHT : SUN_PREC_NONE; + + /* if maxl input is illegal, set to default */ + if (maxl <= 0) maxl = SUNSPFGMR_MAXL_DEFAULT; + + /* check that the supplied N_Vector supports all requisite operations */ + if ( (y->ops->nvclone == NULL) || (y->ops->nvdestroy == NULL) || + (y->ops->nvlinearsum == NULL) || (y->ops->nvconst == NULL) || + (y->ops->nvprod == NULL) || (y->ops->nvdiv == NULL) || + (y->ops->nvscale == NULL) || (y->ops->nvdotprod == NULL) ) + return(NULL); + + /* Create linear solver */ + S = NULL; + S = SUNLinSolNewEmpty(sunctx); + if (S == NULL) return(NULL); + + /* Attach operations */ + S->ops->gettype = SUNLinSolGetType_SPFGMR; + S->ops->getid = SUNLinSolGetID_SPFGMR; + S->ops->setatimes = SUNLinSolSetATimes_SPFGMR; + S->ops->setpreconditioner = SUNLinSolSetPreconditioner_SPFGMR; + S->ops->setscalingvectors = SUNLinSolSetScalingVectors_SPFGMR; + S->ops->setzeroguess = SUNLinSolSetZeroGuess_SPFGMR; + S->ops->initialize = SUNLinSolInitialize_SPFGMR; + S->ops->setup = SUNLinSolSetup_SPFGMR; + S->ops->solve = SUNLinSolSolve_SPFGMR; + S->ops->numiters = SUNLinSolNumIters_SPFGMR; + S->ops->resnorm = SUNLinSolResNorm_SPFGMR; + S->ops->resid = SUNLinSolResid_SPFGMR; + S->ops->lastflag = SUNLinSolLastFlag_SPFGMR; + S->ops->space = SUNLinSolSpace_SPFGMR; + S->ops->free = SUNLinSolFree_SPFGMR; + + /* Create content */ + content = NULL; + content = (SUNLinearSolverContent_SPFGMR) malloc(sizeof *content); + if (content == NULL) { SUNLinSolFree(S); return(NULL); } + + /* Attach content */ + S->content = content; + + /* Fill content */ + content->last_flag = 0; + content->maxl = maxl; + content->pretype = pretype; + content->gstype = SUNSPFGMR_GSTYPE_DEFAULT; + content->max_restarts = SUNSPFGMR_MAXRS_DEFAULT; + content->zeroguess = SUNFALSE; + content->numiters = 0; + content->resnorm = ZERO; + content->xcor = NULL; + content->vtemp = NULL; + content->s1 = NULL; + content->s2 = NULL; + content->ATimes = NULL; + content->ATData = NULL; + content->Psetup = NULL; + content->Psolve = NULL; + content->PData = NULL; + content->V = NULL; + content->Z = NULL; + content->Hes = NULL; + content->givens = NULL; + content->yg = NULL; + content->cv = NULL; + content->Xv = NULL; + content->print_level = 0; + content->info_file = stdout; +#if SUNDIALS_LOGGING_LEVEL >= SUNDIALS_LOGGING_INFO + content->info_file = (sunctx->logger->info_fp) ? sunctx->logger->info_fp : stdout; +#endif + + /* Allocate content */ + content->xcor = N_VClone(y); + if (content->xcor == NULL) { SUNLinSolFree(S); return(NULL); } + + content->vtemp = N_VClone(y); + if (content->vtemp == NULL) { SUNLinSolFree(S); return(NULL); } + + return(S); +} + + +/* ---------------------------------------------------------------------------- + * Function to toggle preconditioning on/off -- turns on if pretype is any + * one of SUN_PREC_LEFT, SUN_PREC_RIGHT or SUN_PREC_BOTH; otherwise turns off + */ + +int SUNLinSol_SPFGMRSetPrecType(SUNLinearSolver S, int pretype) +{ + /* Check for legal pretype */ + pretype = ( (pretype == SUN_PREC_LEFT) || + (pretype == SUN_PREC_RIGHT) || + (pretype == SUN_PREC_BOTH) ) ? SUN_PREC_RIGHT : SUN_PREC_NONE; + + /* Check for non-NULL SUNLinearSolver */ + if (S == NULL) return(SUNLS_MEM_NULL); + + /* Set pretype */ + SPFGMR_CONTENT(S)->pretype = pretype; + return(SUNLS_SUCCESS); +} + + +/* ---------------------------------------------------------------------------- + * Function to set the type of Gram-Schmidt orthogonalization for SPFGMR to use + */ + +int SUNLinSol_SPFGMRSetGSType(SUNLinearSolver S, int gstype) +{ + /* Check for legal gstype */ + if ((gstype != SUN_MODIFIED_GS) && (gstype != SUN_CLASSICAL_GS)) { + return(SUNLS_ILL_INPUT); + } + + /* Check for non-NULL SUNLinearSolver */ + if (S == NULL) return(SUNLS_MEM_NULL); + + /* Set pretype */ + SPFGMR_CONTENT(S)->gstype = gstype; + return(SUNLS_SUCCESS); +} + + +/* ---------------------------------------------------------------------------- + * Function to set the maximum number of FGMRES restarts to allow + */ + +int SUNLinSol_SPFGMRSetMaxRestarts(SUNLinearSolver S, int maxrs) +{ + /* Illegal maxrs implies use of default value */ + if (maxrs < 0) + maxrs = SUNSPFGMR_MAXRS_DEFAULT; + + /* Check for non-NULL SUNLinearSolver */ + if (S == NULL) return(SUNLS_MEM_NULL); + + /* Set max_restarts */ + SPFGMR_CONTENT(S)->max_restarts = maxrs; + return(SUNLS_SUCCESS); +} + + +/* + * ----------------------------------------------------------------- + * implementation of linear solver operations + * ----------------------------------------------------------------- + */ + +SUNLinearSolver_Type SUNLinSolGetType_SPFGMR(SUNLinearSolver S) +{ + return(SUNLINEARSOLVER_ITERATIVE); +} + + +SUNLinearSolver_ID SUNLinSolGetID_SPFGMR(SUNLinearSolver S) +{ + return(SUNLINEARSOLVER_SPFGMR); +} + + +int SUNLinSolInitialize_SPFGMR(SUNLinearSolver S) +{ + int k; + SUNLinearSolverContent_SPFGMR content; + + /* set shortcut to SPFGMR memory structure */ + if (S == NULL) return(SUNLS_MEM_NULL); + content = SPFGMR_CONTENT(S); + + /* ensure valid options */ + if (content->max_restarts < 0) + content->max_restarts = SUNSPFGMR_MAXRS_DEFAULT; + + if (content->ATimes == NULL) { + LASTFLAG(S) = SUNLS_ATIMES_NULL; + return(LASTFLAG(S)); + } + + if ( (content->pretype != SUN_PREC_LEFT) && + (content->pretype != SUN_PREC_RIGHT) && + (content->pretype != SUN_PREC_BOTH) ) + content->pretype = SUN_PREC_NONE; + + if ((content->pretype != SUN_PREC_NONE) && (content->Psolve == NULL)) { + LASTFLAG(S) = SUNLS_PSOLVE_NULL; + return(LASTFLAG(S)); + } + + /* allocate solver-specific memory (where the size depends on the + choice of maxl) here */ + + /* Krylov subspace vectors */ + if (content->V == NULL) { + content->V = N_VCloneVectorArray(content->maxl+1, content->vtemp); + if (content->V == NULL) { + content->last_flag = SUNLS_MEM_FAIL; + return(SUNLS_MEM_FAIL); + } + } + + /* Preconditioned basis vectors */ + if (content->Z == NULL) { + content->Z = N_VCloneVectorArray(content->maxl+1, content->vtemp); + if (content->Z == NULL) { + content->last_flag = SUNLS_MEM_FAIL; + return(SUNLS_MEM_FAIL); + } + } + + /* Hessenberg matrix Hes */ + if (content->Hes == NULL) { + content->Hes = (realtype **) malloc((content->maxl+1)*sizeof(realtype *)); + if (content->Hes == NULL) { + content->last_flag = SUNLS_MEM_FAIL; + return(SUNLS_MEM_FAIL); + } + + for (k=0; k<=content->maxl; k++) { + content->Hes[k] = NULL; + content->Hes[k] = (realtype *) malloc(content->maxl*sizeof(realtype)); + if (content->Hes[k] == NULL) { + content->last_flag = SUNLS_MEM_FAIL; + return(SUNLS_MEM_FAIL); + } + } + } + + /* Givens rotation components */ + if (content->givens == NULL) { + content->givens = (realtype *) malloc(2*content->maxl*sizeof(realtype)); + if (content->givens == NULL) { + content->last_flag = SUNLS_MEM_FAIL; + return(SUNLS_MEM_FAIL); + } + } + + /* y and g vectors */ + if (content->yg == NULL) { + content->yg = (realtype *) malloc((content->maxl+1)*sizeof(realtype)); + if (content->yg == NULL) { + content->last_flag = SUNLS_MEM_FAIL; + return(SUNLS_MEM_FAIL); + } + } + + /* cv vector for fused vector ops */ + if (content->cv == NULL) { + content->cv = (realtype *) malloc((content->maxl+1)*sizeof(realtype)); + if (content->cv == NULL) { + content->last_flag = SUNLS_MEM_FAIL; + return(SUNLS_MEM_FAIL); + } + } + + /* Xv vector for fused vector ops */ + if (content->Xv == NULL) { + content->Xv = (N_Vector *) malloc((content->maxl+1)*sizeof(N_Vector)); + if (content->Xv == NULL) { + content->last_flag = SUNLS_MEM_FAIL; + return(SUNLS_MEM_FAIL); + } + } + + /* return with success */ + content->last_flag = SUNLS_SUCCESS; + return(SUNLS_SUCCESS); +} + + +int SUNLinSolSetATimes_SPFGMR(SUNLinearSolver S, void* ATData, + SUNATimesFn ATimes) +{ + /* set function pointers to integrator-supplied ATimes routine + and data, and return with success */ + if (S == NULL) return(SUNLS_MEM_NULL); + SPFGMR_CONTENT(S)->ATimes = ATimes; + SPFGMR_CONTENT(S)->ATData = ATData; + LASTFLAG(S) = SUNLS_SUCCESS; + return(LASTFLAG(S)); +} + + +int SUNLinSolSetPreconditioner_SPFGMR(SUNLinearSolver S, void* PData, + SUNPSetupFn Psetup, SUNPSolveFn Psolve) +{ + /* set function pointers to integrator-supplied Psetup and PSolve + routines and data, and return with success */ + if (S == NULL) return(SUNLS_MEM_NULL); + SPFGMR_CONTENT(S)->Psetup = Psetup; + SPFGMR_CONTENT(S)->Psolve = Psolve; + SPFGMR_CONTENT(S)->PData = PData; + LASTFLAG(S) = SUNLS_SUCCESS; + return(LASTFLAG(S)); +} + + +int SUNLinSolSetScalingVectors_SPFGMR(SUNLinearSolver S, N_Vector s1, + N_Vector s2) +{ + /* set N_Vector pointers to integrator-supplied scaling vectors, + and return with success */ + if (S == NULL) return(SUNLS_MEM_NULL); + SPFGMR_CONTENT(S)->s1 = s1; + SPFGMR_CONTENT(S)->s2 = s2; + LASTFLAG(S) = SUNLS_SUCCESS; + return(LASTFLAG(S)); +} + + +int SUNLinSolSetZeroGuess_SPFGMR(SUNLinearSolver S, booleantype onoff) +{ + /* set flag indicating a zero initial guess */ + if (S == NULL) return(SUNLS_MEM_NULL); + SPFGMR_CONTENT(S)->zeroguess = onoff; + LASTFLAG(S) = SUNLS_SUCCESS; + return(LASTFLAG(S)); +} + + +int SUNLinSolSetup_SPFGMR(SUNLinearSolver S, SUNMatrix A) +{ + int ier; + SUNPSetupFn Psetup; + void* PData; + + /* Set shortcuts to SPFGMR memory structures */ + if (S == NULL) return(SUNLS_MEM_NULL); + Psetup = SPFGMR_CONTENT(S)->Psetup; + PData = SPFGMR_CONTENT(S)->PData; + + /* no solver-specific setup is required, but if user-supplied + Psetup routine exists, call that here */ + if (Psetup != NULL) { + ier = Psetup(PData); + if (ier != 0) { + LASTFLAG(S) = (ier < 0) ? + SUNLS_PSET_FAIL_UNREC : SUNLS_PSET_FAIL_REC; + return(LASTFLAG(S)); + } + } + + /* return with success */ + return(SUNLS_SUCCESS); +} + + +int SUNLinSolSolve_SPFGMR(SUNLinearSolver S, SUNMatrix A, N_Vector x, + N_Vector b, realtype delta) +{ + /* local data and shortcut variables */ + N_Vector *V, *Z, xcor, vtemp, s1, s2; + realtype **Hes, *givens, *yg, *res_norm; + realtype beta, rotation_product, r_norm, s_product, rho; + booleantype preOnRight, scale1, scale2, converged; + booleantype *zeroguess; + int i, j, k, l, l_max, krydim, ier, ntries, max_restarts, gstype; + int *nli; + void *A_data, *P_data; + SUNATimesFn atimes; + SUNPSolveFn psolve; + + /* local shortcuts for fused vector operations */ + realtype* cv; + N_Vector* Xv; + + /* Initialize some variables */ + krydim = 0; + + /* Make local shorcuts to solver variables. */ + if (S == NULL) return(SUNLS_MEM_NULL); + l_max = SPFGMR_CONTENT(S)->maxl; + max_restarts = SPFGMR_CONTENT(S)->max_restarts; + gstype = SPFGMR_CONTENT(S)->gstype; + V = SPFGMR_CONTENT(S)->V; + Z = SPFGMR_CONTENT(S)->Z; + Hes = SPFGMR_CONTENT(S)->Hes; + givens = SPFGMR_CONTENT(S)->givens; + xcor = SPFGMR_CONTENT(S)->xcor; + yg = SPFGMR_CONTENT(S)->yg; + vtemp = SPFGMR_CONTENT(S)->vtemp; + s1 = SPFGMR_CONTENT(S)->s1; + s2 = SPFGMR_CONTENT(S)->s2; + A_data = SPFGMR_CONTENT(S)->ATData; + P_data = SPFGMR_CONTENT(S)->PData; + atimes = SPFGMR_CONTENT(S)->ATimes; + psolve = SPFGMR_CONTENT(S)->Psolve; + zeroguess = &(SPFGMR_CONTENT(S)->zeroguess); + nli = &(SPFGMR_CONTENT(S)->numiters); + res_norm = &(SPFGMR_CONTENT(S)->resnorm); + cv = SPFGMR_CONTENT(S)->cv; + Xv = SPFGMR_CONTENT(S)->Xv; + + /* Initialize counters and convergence flag */ + *nli = 0; + converged = SUNFALSE; + + /* Set booleantype flags for internal solver options */ + preOnRight = ( (SPFGMR_CONTENT(S)->pretype == SUN_PREC_LEFT) || + (SPFGMR_CONTENT(S)->pretype == SUN_PREC_RIGHT) || + (SPFGMR_CONTENT(S)->pretype == SUN_PREC_BOTH) ); + scale1 = (s1 != NULL); + scale2 = (s2 != NULL); + +#if SUNDIALS_LOGGING_LEVEL >= SUNDIALS_LOGGING_INFO + if (SPFGMR_CONTENT(S)->print_level && SPFGMR_CONTENT(S)->info_file + && (SPFGMR_CONTENT(S)->info_file != S->sunctx->logger->info_fp)) + fprintf(SPFGMR_CONTENT(S)->info_file, "SUNLINSOL_SPFGMR:\n"); +#endif + + /* Check if Atimes function has been set */ + if (atimes == NULL) { + *zeroguess = SUNFALSE; + LASTFLAG(S) = SUNLS_ATIMES_NULL; + return(LASTFLAG(S)); + } + + /* If preconditioning, check if psolve has been set */ + if (preOnRight && psolve == NULL) { + *zeroguess = SUNFALSE; + LASTFLAG(S) = SUNLS_PSOLVE_NULL; + return(LASTFLAG(S)); + } + + /* Set vtemp and V[0] to initial (unscaled) residual r_0 = b - A*x_0 */ + if (*zeroguess) { + N_VScale(ONE, b, vtemp); + } else { + ier = atimes(A_data, x, vtemp); + if (ier != 0) { + *zeroguess = SUNFALSE; + LASTFLAG(S) = (ier < 0) ? + SUNLS_ATIMES_FAIL_UNREC : SUNLS_ATIMES_FAIL_REC; + return(LASTFLAG(S)); + } + N_VLinearSum(ONE, b, -ONE, vtemp, vtemp); + } + + /* Apply left scaling to vtemp = r_0 to fill V[0]. */ + if (scale1) { + N_VProd(s1, vtemp, V[0]); + } else { + N_VScale(ONE, vtemp, V[0]); + } + + /* Set r_norm = beta to L2 norm of V[0] = s1 r_0, and return if small */ + *res_norm = r_norm = beta = SUNRsqrt(N_VDotProd(V[0], V[0])); + +#if SUNDIALS_LOGGING_LEVEL >= SUNDIALS_LOGGING_INFO + /* print initial residual */ + if (SPFGMR_CONTENT(S)->print_level && SPFGMR_CONTENT(S)->info_file + && (SPFGMR_CONTENT(S)->info_file != S->sunctx->logger->info_fp)) + { + fprintf(SPFGMR_CONTENT(S)->info_file, + SUNLS_MSG_RESIDUAL, + (long int) 0, *res_norm); + } + SUNLogger_QueueMsg(S->sunctx->logger, SUN_LOGLEVEL_INFO, + "SUNLinSolSolve_SPFGMR", "initial-residual", + "nli = %li, resnorm = %.16g", (long int) 0, *res_norm); +#endif + + if (r_norm <= delta) { + *zeroguess = SUNFALSE; + LASTFLAG(S) = SUNLS_SUCCESS; + return(LASTFLAG(S)); + } + + /* Initialize rho to avoid compiler warning message */ + rho = beta; + + /* Set xcor = 0. */ + N_VConst(ZERO, xcor); + + /* Begin outer iterations: up to (max_restarts + 1) attempts. */ + for (ntries=0; ntries<=max_restarts; ntries++) { + + /* Initialize the Hessenberg matrix Hes and Givens rotation + product. Normalize the initial vector V[0]. */ + for (i=0; i<=l_max; i++) + for (j=0; j= SUNDIALS_LOGGING_INFO + /* print current iteration number and the residual */ + if (SPFGMR_CONTENT(S)->print_level && SPFGMR_CONTENT(S)->info_file + && (SPFGMR_CONTENT(S)->info_file != S->sunctx->logger->info_fp)) + { + fprintf(SPFGMR_CONTENT(S)->info_file, + SUNLS_MSG_RESIDUAL, + (long int) *nli, *res_norm); + } + SUNLogger_QueueMsg(S->sunctx->logger, SUN_LOGLEVEL_INFO, + "SUNLinSolSolve_SPFGMR", "iterate-residual", + "nli = %li, resnorm = %.16g", (long int) 0, *res_norm); +#endif + + if (rho <= delta) { converged = SUNTRUE; break; } + + /* Normalize V[l+1] with norm value from the Gram-Schmidt routine. */ + N_VScale(ONE/Hes[l+1][l], V[l+1], V[l+1]); + } + + /* Inner loop is done. Compute the new correction vector xcor. */ + + /* Construct g, then solve for y. */ + yg[0] = r_norm; + for (i=1; i<=krydim; i++) yg[i]=ZERO; + if (SUNQRsol(krydim, Hes, givens, yg) != 0) { + *zeroguess = SUNFALSE; + LASTFLAG(S) = SUNLS_QRSOL_FAIL; + return(LASTFLAG(S)); + } + + /* Add correction vector Z_l y to xcor. */ + cv[0] = ONE; + Xv[0] = xcor; + + for (k=0; k0; i--) { + yg[i] = s_product*givens[2*i-2]; + s_product *= givens[2*i-1]; + } + yg[0] = s_product; + + /* Scale r_norm and yg. */ + r_norm *= s_product; + for (i=0; i<=krydim; i++) + yg[i] *= r_norm; + r_norm = SUNRabs(r_norm); + + /* Multiply yg by V_(krydim+1) to get last residual vector; restart. */ + for (k=0; k<=krydim; k++) { + cv[k] = yg[k]; + Xv[k] = V[k]; + } + ier = N_VLinearCombination(krydim+1, cv, Xv, V[0]); + if (ier != SUNLS_SUCCESS) { + *zeroguess = SUNFALSE; + LASTFLAG(S) = SUNLS_VECTOROP_ERR; + return(SUNLS_VECTOROP_ERR); + } + + } + + /* Failed to converge, even after allowed restarts. + If the residual norm was reduced below its initial value, compute + and return x anyway. Otherwise return failure flag. */ + if (rho < beta) { + if (*zeroguess) + N_VScale(ONE, xcor, x); + else + N_VLinearSum(ONE, x, ONE, xcor, x); + *zeroguess = SUNFALSE; + LASTFLAG(S) = SUNLS_RES_REDUCED; + return(LASTFLAG(S)); + } + + *zeroguess = SUNFALSE; + LASTFLAG(S) = SUNLS_CONV_FAIL; + return(LASTFLAG(S)); +} + + +int SUNLinSolNumIters_SPFGMR(SUNLinearSolver S) +{ + /* return the stored 'numiters' value */ + if (S == NULL) return(-1); + return (SPFGMR_CONTENT(S)->numiters); +} + + +realtype SUNLinSolResNorm_SPFGMR(SUNLinearSolver S) +{ + /* return the stored 'resnorm' value */ + if (S == NULL) return(-ONE); + return (SPFGMR_CONTENT(S)->resnorm); +} + + +N_Vector SUNLinSolResid_SPFGMR(SUNLinearSolver S) +{ + /* return the stored 'vtemp' vector */ + return (SPFGMR_CONTENT(S)->vtemp); +} + + +sunindextype SUNLinSolLastFlag_SPFGMR(SUNLinearSolver S) +{ + /* return the stored 'last_flag' value */ + if (S == NULL) return(-1); + return (LASTFLAG(S)); +} + + +int SUNLinSolSpace_SPFGMR(SUNLinearSolver S, + long int *lenrwLS, + long int *leniwLS) +{ + int maxl; + sunindextype liw1, lrw1; + maxl = SPFGMR_CONTENT(S)->maxl; + if (SPFGMR_CONTENT(S)->vtemp->ops->nvspace) + N_VSpace(SPFGMR_CONTENT(S)->vtemp, &lrw1, &liw1); + else + lrw1 = liw1 = 0; + *lenrwLS = lrw1*(2*maxl + 4) + maxl*(maxl + 5) + 2; + *leniwLS = liw1*(2*maxl + 4); + return(SUNLS_SUCCESS); +} + +int SUNLinSolFree_SPFGMR(SUNLinearSolver S) +{ + int k; + + if (S == NULL) return(SUNLS_SUCCESS); + + if (S->content) { + /* delete items from within the content structure */ + if (SPFGMR_CONTENT(S)->xcor) { + N_VDestroy(SPFGMR_CONTENT(S)->xcor); + SPFGMR_CONTENT(S)->xcor = NULL; + } + if (SPFGMR_CONTENT(S)->vtemp) { + N_VDestroy(SPFGMR_CONTENT(S)->vtemp); + SPFGMR_CONTENT(S)->vtemp = NULL; + } + if (SPFGMR_CONTENT(S)->V) { + N_VDestroyVectorArray(SPFGMR_CONTENT(S)->V, + SPFGMR_CONTENT(S)->maxl+1); + SPFGMR_CONTENT(S)->V = NULL; + } + if (SPFGMR_CONTENT(S)->Z) { + N_VDestroyVectorArray(SPFGMR_CONTENT(S)->Z, + SPFGMR_CONTENT(S)->maxl+1); + SPFGMR_CONTENT(S)->Z = NULL; + } + if (SPFGMR_CONTENT(S)->Hes) { + for (k=0; k<=SPFGMR_CONTENT(S)->maxl; k++) + if (SPFGMR_CONTENT(S)->Hes[k]) { + free(SPFGMR_CONTENT(S)->Hes[k]); + SPFGMR_CONTENT(S)->Hes[k] = NULL; + } + free(SPFGMR_CONTENT(S)->Hes); + SPFGMR_CONTENT(S)->Hes = NULL; + } + if (SPFGMR_CONTENT(S)->givens) { + free(SPFGMR_CONTENT(S)->givens); + SPFGMR_CONTENT(S)->givens = NULL; + } + if (SPFGMR_CONTENT(S)->yg) { + free(SPFGMR_CONTENT(S)->yg); + SPFGMR_CONTENT(S)->yg = NULL; + } + if (SPFGMR_CONTENT(S)->cv) { + free(SPFGMR_CONTENT(S)->cv); + SPFGMR_CONTENT(S)->cv = NULL; + } + if (SPFGMR_CONTENT(S)->Xv) { + free(SPFGMR_CONTENT(S)->Xv); + SPFGMR_CONTENT(S)->Xv = NULL; + } + free(S->content); S->content = NULL; + } + if (S->ops) { free(S->ops); S->ops = NULL; } + free(S); S = NULL; + return(SUNLS_SUCCESS); +} + + +int SUNLinSolSetInfoFile_SPFGMR(SUNLinearSolver S, + FILE* info_file) +{ + /* check that the linear solver is non-null */ + if (S == NULL) + return(SUNLS_MEM_NULL); + + SPFGMR_CONTENT(S)->info_file = info_file; + + return(SUNLS_SUCCESS); +} + + +int SUNLinSolSetPrintLevel_SPFGMR(SUNLinearSolver S, + int print_level) +{ + /* check that the linear solver is non-null */ + if (S == NULL) + return(SUNLS_MEM_NULL); + + /* check for valid print level */ + if (print_level < 0 || print_level > 1) + return(SUNLS_ILL_INPUT); + + SPFGMR_CONTENT(S)->print_level = print_level; + + return(SUNLS_SUCCESS); +} + diff --git a/src/lib/sunlinsol/spgmr/sunlinsol_spgmr.c b/src/lib/sunlinsol/spgmr/sunlinsol_spgmr.c new file mode 100644 index 0000000..a7471e4 --- /dev/null +++ b/src/lib/sunlinsol/spgmr/sunlinsol_spgmr.c @@ -0,0 +1,899 @@ +/* ----------------------------------------------------------------- + * Programmer(s): Daniel Reynolds @ SMU + * Based on sundials_spgmr.c code, written by Scott D. Cohen, + * Alan C. Hindmarsh and Radu Serban @ LLNL + * ----------------------------------------------------------------- + * SUNDIALS Copyright Start + * Copyright (c) 2002-2022, Lawrence Livermore National Security + * and Southern Methodist University. + * All rights reserved. + * + * See the top-level LICENSE and NOTICE files for details. + * + * SPDX-License-Identifier: BSD-3-Clause + * SUNDIALS Copyright End + * ----------------------------------------------------------------- + * This is the implementation file for the SPGMR implementation of + * the SUNLINSOL package. + * -----------------------------------------------------------------*/ + +#include +#include + +#include +#include + +#include "sundials_context_impl.h" +#include "sundials_logger_impl.h" + +#define ZERO RCONST(0.0) +#define ONE RCONST(1.0) + +/* + * ----------------------------------------------------------------- + * SPGMR solver structure accessibility macros: + * ----------------------------------------------------------------- + */ + +#define SPGMR_CONTENT(S) ( (SUNLinearSolverContent_SPGMR)(S->content) ) +#define LASTFLAG(S) ( SPGMR_CONTENT(S)->last_flag ) + +/* + * ----------------------------------------------------------------- + * exported functions + * ----------------------------------------------------------------- + */ + +/* ---------------------------------------------------------------------------- + * Function to create a new SPGMR linear solver + */ + +SUNLinearSolver SUNLinSol_SPGMR(N_Vector y, int pretype, int maxl, SUNContext sunctx) +{ + SUNLinearSolver S; + SUNLinearSolverContent_SPGMR content; + + /* check for legal pretype and maxl values; if illegal use defaults */ + if ((pretype != SUN_PREC_NONE) && (pretype != SUN_PREC_LEFT) && + (pretype != SUN_PREC_RIGHT) && (pretype != SUN_PREC_BOTH)) + pretype = SUN_PREC_NONE; + if (maxl <= 0) + maxl = SUNSPGMR_MAXL_DEFAULT; + + /* check that the supplied N_Vector supports all requisite operations */ + if ( (y->ops->nvclone == NULL) || (y->ops->nvdestroy == NULL) || + (y->ops->nvlinearsum == NULL) || (y->ops->nvconst == NULL) || + (y->ops->nvprod == NULL) || (y->ops->nvdiv == NULL) || + (y->ops->nvscale == NULL) || (y->ops->nvdotprod == NULL) ) + return(NULL); + + /* Create linear solver */ + S = NULL; + S = SUNLinSolNewEmpty(sunctx); + if (S == NULL) return(NULL); + + /* Attach operations */ + S->ops->gettype = SUNLinSolGetType_SPGMR; + S->ops->getid = SUNLinSolGetID_SPGMR; + S->ops->setatimes = SUNLinSolSetATimes_SPGMR; + S->ops->setpreconditioner = SUNLinSolSetPreconditioner_SPGMR; + S->ops->setscalingvectors = SUNLinSolSetScalingVectors_SPGMR; + S->ops->setzeroguess = SUNLinSolSetZeroGuess_SPGMR; + S->ops->initialize = SUNLinSolInitialize_SPGMR; + S->ops->setup = SUNLinSolSetup_SPGMR; + S->ops->solve = SUNLinSolSolve_SPGMR; + S->ops->numiters = SUNLinSolNumIters_SPGMR; + S->ops->resnorm = SUNLinSolResNorm_SPGMR; + S->ops->resid = SUNLinSolResid_SPGMR; + S->ops->lastflag = SUNLinSolLastFlag_SPGMR; + S->ops->space = SUNLinSolSpace_SPGMR; + S->ops->free = SUNLinSolFree_SPGMR; + + /* Create content */ + content = NULL; + content = (SUNLinearSolverContent_SPGMR) malloc(sizeof *content); + if (content == NULL) { SUNLinSolFree(S); return(NULL); } + + /* Attach content */ + S->content = content; + + /* Fill content */ + content->last_flag = 0; + content->maxl = maxl; + content->pretype = pretype; + content->gstype = SUNSPGMR_GSTYPE_DEFAULT; + content->max_restarts = SUNSPGMR_MAXRS_DEFAULT; + content->zeroguess = SUNFALSE; + content->numiters = 0; + content->resnorm = ZERO; + content->xcor = NULL; + content->vtemp = NULL; + content->s1 = NULL; + content->s2 = NULL; + content->ATimes = NULL; + content->ATData = NULL; + content->Psetup = NULL; + content->Psolve = NULL; + content->PData = NULL; + content->V = NULL; + content->Hes = NULL; + content->givens = NULL; + content->yg = NULL; + content->cv = NULL; + content->Xv = NULL; + content->print_level = 0; + content->info_file = stdout; +#if SUNDIALS_LOGGING_LEVEL >= SUNDIALS_LOGGING_INFO + content->info_file = (sunctx->logger->info_fp) ? sunctx->logger->info_fp : stdout; +#endif + + /* Allocate content */ + content->xcor = N_VClone(y); + if (content->xcor == NULL) { SUNLinSolFree(S); return(NULL); } + + content->vtemp = N_VClone(y); + if (content->vtemp == NULL) { SUNLinSolFree(S); return(NULL); } + + return(S); +} + + +/* ---------------------------------------------------------------------------- + * Function to set the type of preconditioning for SPGMR to use + */ + +int SUNLinSol_SPGMRSetPrecType(SUNLinearSolver S, int pretype) +{ + /* Check for legal pretype */ + if ((pretype != SUN_PREC_NONE) && (pretype != SUN_PREC_LEFT) && + (pretype != SUN_PREC_RIGHT) && (pretype != SUN_PREC_BOTH)) { + return(SUNLS_ILL_INPUT); + } + + /* Check for non-NULL SUNLinearSolver */ + if (S == NULL) return(SUNLS_MEM_NULL); + + /* Set pretype */ + SPGMR_CONTENT(S)->pretype = pretype; + return(SUNLS_SUCCESS); +} + + +/* ---------------------------------------------------------------------------- + * Function to set the type of Gram-Schmidt orthogonalization for SPGMR to use + */ + +int SUNLinSol_SPGMRSetGSType(SUNLinearSolver S, int gstype) +{ + /* Check for legal gstype */ + if ((gstype != SUN_MODIFIED_GS) && (gstype != SUN_CLASSICAL_GS)) { + return(SUNLS_ILL_INPUT); + } + + /* Check for non-NULL SUNLinearSolver */ + if (S == NULL) return(SUNLS_MEM_NULL); + + /* Set pretype */ + SPGMR_CONTENT(S)->gstype = gstype; + return(SUNLS_SUCCESS); +} + + +/* ---------------------------------------------------------------------------- + * Function to set the maximum number of GMRES restarts to allow + */ + +int SUNLinSol_SPGMRSetMaxRestarts(SUNLinearSolver S, int maxrs) +{ + /* Illegal maxrs implies use of default value */ + if (maxrs < 0) + maxrs = SUNSPGMR_MAXRS_DEFAULT; + + /* Check for non-NULL SUNLinearSolver */ + if (S == NULL) return(SUNLS_MEM_NULL); + + /* Set max_restarts */ + SPGMR_CONTENT(S)->max_restarts = maxrs; + return(SUNLS_SUCCESS); +} + + +/* + * ----------------------------------------------------------------- + * implementation of linear solver operations + * ----------------------------------------------------------------- + */ + +SUNLinearSolver_Type SUNLinSolGetType_SPGMR(SUNLinearSolver S) +{ + return(SUNLINEARSOLVER_ITERATIVE); +} + + +SUNLinearSolver_ID SUNLinSolGetID_SPGMR(SUNLinearSolver S) +{ + return(SUNLINEARSOLVER_SPGMR); +} + + +int SUNLinSolInitialize_SPGMR(SUNLinearSolver S) +{ + int k; + SUNLinearSolverContent_SPGMR content; + + /* set shortcut to SPGMR memory structure */ + if (S == NULL) return(SUNLS_MEM_NULL); + content = SPGMR_CONTENT(S); + + /* ensure valid options */ + if (content->max_restarts < 0) + content->max_restarts = SUNSPGMR_MAXRS_DEFAULT; + + if (content->ATimes == NULL) { + LASTFLAG(S) = SUNLS_ATIMES_NULL; + return(LASTFLAG(S)); + } + + if ( (content->pretype != SUN_PREC_LEFT) && + (content->pretype != SUN_PREC_RIGHT) && + (content->pretype != SUN_PREC_BOTH) ) + content->pretype = SUN_PREC_NONE; + + if ((content->pretype != SUN_PREC_NONE) && (content->Psolve == NULL)) { + LASTFLAG(S) = SUNLS_PSOLVE_NULL; + return(LASTFLAG(S)); + } + + /* allocate solver-specific memory (where the size depends on the + choice of maxl) here */ + + /* Krylov subspace vectors */ + if (content->V == NULL) { + content->V = N_VCloneVectorArray(content->maxl+1, content->vtemp); + if (content->V == NULL) { + content->last_flag = SUNLS_MEM_FAIL; + return(SUNLS_MEM_FAIL); + } + } + + /* Hessenberg matrix Hes */ + if (content->Hes == NULL) { + content->Hes = (realtype **) malloc((content->maxl+1)*sizeof(realtype *)); + if (content->Hes == NULL) { + content->last_flag = SUNLS_MEM_FAIL; + return(SUNLS_MEM_FAIL); + } + + for (k=0; k<=content->maxl; k++) { + content->Hes[k] = NULL; + content->Hes[k] = (realtype *) malloc(content->maxl*sizeof(realtype)); + if (content->Hes[k] == NULL) { + content->last_flag = SUNLS_MEM_FAIL; + return(SUNLS_MEM_FAIL); + } + } + } + + /* Givens rotation components */ + if (content->givens == NULL) { + content->givens = (realtype *) malloc(2*content->maxl*sizeof(realtype)); + if (content->givens == NULL) { + content->last_flag = SUNLS_MEM_FAIL; + return(SUNLS_MEM_FAIL); + } + } + + /* y and g vectors */ + if (content->yg == NULL) { + content->yg = (realtype *) malloc((content->maxl+1)*sizeof(realtype)); + if (content->yg == NULL) { + content->last_flag = SUNLS_MEM_FAIL; + return(SUNLS_MEM_FAIL); + } + } + + /* cv vector for fused vector ops */ + if (content->cv == NULL) { + content->cv = (realtype *) malloc((content->maxl+1)*sizeof(realtype)); + if (content->cv == NULL) { + content->last_flag = SUNLS_MEM_FAIL; + return(SUNLS_MEM_FAIL); + } + } + + /* Xv vector for fused vector ops */ + if (content->Xv == NULL) { + content->Xv = (N_Vector *) malloc((content->maxl+1)*sizeof(N_Vector)); + if (content->Xv == NULL) { + content->last_flag = SUNLS_MEM_FAIL; + return(SUNLS_MEM_FAIL); + } + } + + /* return with success */ + content->last_flag = SUNLS_SUCCESS; + return(SUNLS_SUCCESS); +} + + +int SUNLinSolSetATimes_SPGMR(SUNLinearSolver S, void* ATData, + SUNATimesFn ATimes) +{ + /* set function pointers to integrator-supplied ATimes routine + and data, and return with success */ + if (S == NULL) return(SUNLS_MEM_NULL); + SPGMR_CONTENT(S)->ATimes = ATimes; + SPGMR_CONTENT(S)->ATData = ATData; + LASTFLAG(S) = SUNLS_SUCCESS; + return(LASTFLAG(S)); +} + + +int SUNLinSolSetPreconditioner_SPGMR(SUNLinearSolver S, void* PData, + SUNPSetupFn Psetup, SUNPSolveFn Psolve) +{ + /* set function pointers to integrator-supplied Psetup and PSolve + routines and data, and return with success */ + if (S == NULL) return(SUNLS_MEM_NULL); + SPGMR_CONTENT(S)->Psetup = Psetup; + SPGMR_CONTENT(S)->Psolve = Psolve; + SPGMR_CONTENT(S)->PData = PData; + LASTFLAG(S) = SUNLS_SUCCESS; + return(LASTFLAG(S)); +} + + +int SUNLinSolSetScalingVectors_SPGMR(SUNLinearSolver S, N_Vector s1, + N_Vector s2) +{ + /* set N_Vector pointers to integrator-supplied scaling vectors, + and return with success */ + if (S == NULL) return(SUNLS_MEM_NULL); + SPGMR_CONTENT(S)->s1 = s1; + SPGMR_CONTENT(S)->s2 = s2; + LASTFLAG(S) = SUNLS_SUCCESS; + return(LASTFLAG(S)); +} + + +int SUNLinSolSetZeroGuess_SPGMR(SUNLinearSolver S, booleantype onff) +{ + /* set flag indicating a zero initial guess */ + if (S == NULL) return(SUNLS_MEM_NULL); + SPGMR_CONTENT(S)->zeroguess = onff; + LASTFLAG(S) = SUNLS_SUCCESS; + return(LASTFLAG(S)); +} + + +int SUNLinSolSetup_SPGMR(SUNLinearSolver S, SUNMatrix A) +{ + int ier; + SUNPSetupFn Psetup; + void* PData; + + /* Set shortcuts to SPGMR memory structures */ + if (S == NULL) return(SUNLS_MEM_NULL); + Psetup = SPGMR_CONTENT(S)->Psetup; + PData = SPGMR_CONTENT(S)->PData; + + /* no solver-specific setup is required, but if user-supplied + Psetup routine exists, call that here */ + if (Psetup != NULL) { + ier = Psetup(PData); + if (ier != 0) { + LASTFLAG(S) = (ier < 0) ? + SUNLS_PSET_FAIL_UNREC : SUNLS_PSET_FAIL_REC; + return(LASTFLAG(S)); + } + } + + /* return with success */ + return(SUNLS_SUCCESS); +} + + +int SUNLinSolSolve_SPGMR(SUNLinearSolver S, SUNMatrix A, N_Vector x, + N_Vector b, realtype delta) +{ + /* local data and shortcut variables */ + N_Vector *V, xcor, vtemp, s1, s2; + realtype **Hes, *givens, *yg, *res_norm; + realtype beta, rotation_product, r_norm, s_product, rho; + booleantype preOnLeft, preOnRight, scale2, scale1, converged; + booleantype *zeroguess; + int i, j, k, l, l_plus_1, l_max, krydim, ier, ntries, max_restarts, gstype; + int *nli; + void *A_data, *P_data; + SUNATimesFn atimes; + SUNPSolveFn psolve; + + /* local shortcuts for fused vector operations */ + realtype* cv; + N_Vector* Xv; + + /* Initialize some variables */ + l_plus_1 = 0; + krydim = 0; + + /* Make local shorcuts to solver variables. */ + if (S == NULL) return(SUNLS_MEM_NULL); + l_max = SPGMR_CONTENT(S)->maxl; + max_restarts = SPGMR_CONTENT(S)->max_restarts; + gstype = SPGMR_CONTENT(S)->gstype; + V = SPGMR_CONTENT(S)->V; + Hes = SPGMR_CONTENT(S)->Hes; + givens = SPGMR_CONTENT(S)->givens; + xcor = SPGMR_CONTENT(S)->xcor; + yg = SPGMR_CONTENT(S)->yg; + vtemp = SPGMR_CONTENT(S)->vtemp; + s1 = SPGMR_CONTENT(S)->s1; + s2 = SPGMR_CONTENT(S)->s2; + A_data = SPGMR_CONTENT(S)->ATData; + P_data = SPGMR_CONTENT(S)->PData; + atimes = SPGMR_CONTENT(S)->ATimes; + psolve = SPGMR_CONTENT(S)->Psolve; + zeroguess = &(SPGMR_CONTENT(S)->zeroguess); + nli = &(SPGMR_CONTENT(S)->numiters); + res_norm = &(SPGMR_CONTENT(S)->resnorm); + cv = SPGMR_CONTENT(S)->cv; + Xv = SPGMR_CONTENT(S)->Xv; + + /* Initialize counters and convergence flag */ + *nli = 0; + converged = SUNFALSE; + + /* Set booleantype flags for internal solver options */ + preOnLeft = ( (SPGMR_CONTENT(S)->pretype == SUN_PREC_LEFT) || + (SPGMR_CONTENT(S)->pretype == SUN_PREC_BOTH) ); + preOnRight = ( (SPGMR_CONTENT(S)->pretype == SUN_PREC_RIGHT) || + (SPGMR_CONTENT(S)->pretype == SUN_PREC_BOTH) ); + scale1 = (s1 != NULL); + scale2 = (s2 != NULL); + +#if SUNDIALS_LOGGING_LEVEL >= SUNDIALS_LOGGING_INFO + if (SPGMR_CONTENT(S)->print_level && SPGMR_CONTENT(S)->info_file + && (SPGMR_CONTENT(S)->info_file != S->sunctx->logger->info_fp)) + fprintf(SPGMR_CONTENT(S)->info_file, "SUNLINSOL_SPGMR:\n"); +#endif + + /* Check if Atimes function has been set */ + if (atimes == NULL) { + *zeroguess = SUNFALSE; + LASTFLAG(S) = SUNLS_ATIMES_NULL; + return(LASTFLAG(S)); + } + + /* If preconditioning, check if psolve has been set */ + if ((preOnLeft || preOnRight) && psolve == NULL) { + *zeroguess = SUNFALSE; + LASTFLAG(S) = SUNLS_PSOLVE_NULL; + return(LASTFLAG(S)); + } + + /* Set vtemp and V[0] to initial (unscaled) residual r_0 = b - A*x_0 */ + if (*zeroguess) { + N_VScale(ONE, b, vtemp); + } else { + ier = atimes(A_data, x, vtemp); + if (ier != 0) { + *zeroguess = SUNFALSE; + LASTFLAG(S) = (ier < 0) ? + SUNLS_ATIMES_FAIL_UNREC : SUNLS_ATIMES_FAIL_REC; + return(LASTFLAG(S)); + } + N_VLinearSum(ONE, b, -ONE, vtemp, vtemp); + } + N_VScale(ONE, vtemp, V[0]); + + /* Apply left preconditioner and left scaling to V[0] = r_0 */ + if (preOnLeft) { + ier = psolve(P_data, V[0], vtemp, delta, SUN_PREC_LEFT); + if (ier != 0) { + *zeroguess = SUNFALSE; + LASTFLAG(S) = (ier < 0) ? + SUNLS_PSOLVE_FAIL_UNREC : SUNLS_PSOLVE_FAIL_REC; + return(LASTFLAG(S)); + } + } else { + N_VScale(ONE, V[0], vtemp); + } + + if (scale1) { + N_VProd(s1, vtemp, V[0]); + } else { + N_VScale(ONE, vtemp, V[0]); + } + + /* Set r_norm = beta to L2 norm of V[0] = s1 P1_inv r_0, and + return if small */ + *res_norm = r_norm = beta = SUNRsqrt(N_VDotProd(V[0], V[0])); + +#if SUNDIALS_LOGGING_LEVEL >= SUNDIALS_LOGGING_INFO + /* print initial residual */ + if (SPGMR_CONTENT(S)->print_level && SPGMR_CONTENT(S)->info_file + && (SPGMR_CONTENT(S)->info_file != S->sunctx->logger->info_fp)) + { + fprintf(SPGMR_CONTENT(S)->info_file, + SUNLS_MSG_RESIDUAL, + (long int) 0, *res_norm); + } + SUNLogger_QueueMsg(S->sunctx->logger, SUN_LOGLEVEL_INFO, + "SUNLinSolSolve_SPGMR", "initial-residual", + "nli = %li, resnorm = %.16g", (long int) 0, *res_norm); +#endif + + if (r_norm <= delta) { + *zeroguess = SUNFALSE; + LASTFLAG(S) = SUNLS_SUCCESS; + return(LASTFLAG(S)); + } + + /* Initialize rho to avoid compiler warning message */ + rho = beta; + + /* Set xcor = 0 */ + N_VConst(ZERO, xcor); + + /* Begin outer iterations: up to (max_restarts + 1) attempts */ + for (ntries=0; ntries<=max_restarts; ntries++) { + + /* Initialize the Hessenberg matrix Hes and Givens rotation + product. Normalize the initial vector V[0] */ + for (i=0; i<=l_max; i++) + for (j=0; j= SUNDIALS_LOGGING_INFO + /* print current iteration number and the residual */ + if (SPGMR_CONTENT(S)->print_level && SPGMR_CONTENT(S)->info_file + && (SPGMR_CONTENT(S)->info_file != S->sunctx->logger->info_fp)) + { + fprintf(SPGMR_CONTENT(S)->info_file, + SUNLS_MSG_RESIDUAL, + (long int) *nli, *res_norm); + } + SUNLogger_QueueMsg(S->sunctx->logger, SUN_LOGLEVEL_INFO, + "SUNLinSolSolve_SPGMR", "iterate-residual", + "nli = %li, resnorm = %.16g", (long int) *nli, *res_norm); +#endif + + if (rho <= delta) { converged = SUNTRUE; break; } + + /* Normalize V[l+1] with norm value from the Gram-Schmidt routine */ + N_VScale(ONE/Hes[l_plus_1][l], V[l_plus_1], V[l_plus_1]); + } + + /* Inner loop is done. Compute the new correction vector xcor */ + + /* Construct g, then solve for y */ + yg[0] = r_norm; + for (i=1; i<=krydim; i++) yg[i]=ZERO; + if (SUNQRsol(krydim, Hes, givens, yg) != 0) { + *zeroguess = SUNFALSE; + LASTFLAG(S) = SUNLS_QRSOL_FAIL; + return(LASTFLAG(S)); + } + + /* Add correction vector V_l y to xcor */ + cv[0] = ONE; + Xv[0] = xcor; + + for (k=0; k0; i--) { + yg[i] = s_product*givens[2*i-2]; + s_product *= givens[2*i-1]; + } + yg[0] = s_product; + + /* Scale r_norm and yg */ + r_norm *= s_product; + for (i=0; i<=krydim; i++) + yg[i] *= r_norm; + r_norm = SUNRabs(r_norm); + + /* Multiply yg by V_(krydim+1) to get last residual vector; restart */ + for (k=0; k<=krydim; k++) { + cv[k] = yg[k]; + Xv[k] = V[k]; + } + ier = N_VLinearCombination(krydim+1, cv, Xv, V[0]); + if (ier != SUNLS_SUCCESS) { + *zeroguess = SUNFALSE; + LASTFLAG(S) = SUNLS_VECTOROP_ERR; + return(SUNLS_VECTOROP_ERR); + } + + } + + /* Failed to converge, even after allowed restarts. + If the residual norm was reduced below its initial value, compute + and return x anyway. Otherwise return failure flag. */ + if (rho < beta) { + + /* Apply right scaling and right precond.: vtemp = P2_inv s2_inv xcor */ + if (scale2) N_VDiv(xcor, s2, xcor); + if (preOnRight) { + ier = psolve(P_data, xcor, vtemp, delta, SUN_PREC_RIGHT); + if (ier != 0) { + *zeroguess = SUNFALSE; + LASTFLAG(S) = (ier < 0) ? + SUNLS_PSOLVE_FAIL_UNREC : SUNLS_PSOLVE_FAIL_REC; + return(LASTFLAG(S)); + } + } else { + N_VScale(ONE, xcor, vtemp); + } + + /* Add vtemp to initial x to get final solution x, and return */ + if (*zeroguess) + N_VScale(ONE, vtemp, x); + else + N_VLinearSum(ONE, x, ONE, vtemp, x); + + *zeroguess = SUNFALSE; + LASTFLAG(S) = SUNLS_RES_REDUCED; + return(LASTFLAG(S)); + } + + *zeroguess = SUNFALSE; + LASTFLAG(S) = SUNLS_CONV_FAIL; + return(LASTFLAG(S)); +} + + +int SUNLinSolNumIters_SPGMR(SUNLinearSolver S) +{ + /* return the stored 'numiters' value */ + if (S == NULL) return(-1); + return (SPGMR_CONTENT(S)->numiters); +} + + +realtype SUNLinSolResNorm_SPGMR(SUNLinearSolver S) +{ + /* return the stored 'resnorm' value */ + if (S == NULL) return(-ONE); + return (SPGMR_CONTENT(S)->resnorm); +} + + +N_Vector SUNLinSolResid_SPGMR(SUNLinearSolver S) +{ + /* return the stored 'vtemp' vector */ + return (SPGMR_CONTENT(S)->vtemp); +} + + +sunindextype SUNLinSolLastFlag_SPGMR(SUNLinearSolver S) +{ + /* return the stored 'last_flag' value */ + if (S == NULL) return(-1); + return (LASTFLAG(S)); +} + + +int SUNLinSolSpace_SPGMR(SUNLinearSolver S, + long int *lenrwLS, + long int *leniwLS) +{ + int maxl; + sunindextype liw1, lrw1; + maxl = SPGMR_CONTENT(S)->maxl; + if (SPGMR_CONTENT(S)->vtemp->ops->nvspace) + N_VSpace(SPGMR_CONTENT(S)->vtemp, &lrw1, &liw1); + else + lrw1 = liw1 = 0; + *lenrwLS = lrw1*(maxl + 5) + maxl*(maxl + 5) + 2; + *leniwLS = liw1*(maxl + 5); + return(SUNLS_SUCCESS); +} + + +int SUNLinSolFree_SPGMR(SUNLinearSolver S) +{ + int k; + + if (S == NULL) return(SUNLS_SUCCESS); + + if (S->content) { + /* delete items from within the content structure */ + if (SPGMR_CONTENT(S)->xcor) { + N_VDestroy(SPGMR_CONTENT(S)->xcor); + SPGMR_CONTENT(S)->xcor = NULL; + } + if (SPGMR_CONTENT(S)->vtemp) { + N_VDestroy(SPGMR_CONTENT(S)->vtemp); + SPGMR_CONTENT(S)->vtemp = NULL; + } + if (SPGMR_CONTENT(S)->V) { + N_VDestroyVectorArray(SPGMR_CONTENT(S)->V, + SPGMR_CONTENT(S)->maxl+1); + SPGMR_CONTENT(S)->V = NULL; + } + if (SPGMR_CONTENT(S)->Hes) { + for (k=0; k<=SPGMR_CONTENT(S)->maxl; k++) + if (SPGMR_CONTENT(S)->Hes[k]) { + free(SPGMR_CONTENT(S)->Hes[k]); + SPGMR_CONTENT(S)->Hes[k] = NULL; + } + free(SPGMR_CONTENT(S)->Hes); + SPGMR_CONTENT(S)->Hes = NULL; + } + if (SPGMR_CONTENT(S)->givens) { + free(SPGMR_CONTENT(S)->givens); + SPGMR_CONTENT(S)->givens = NULL; + } + if (SPGMR_CONTENT(S)->yg) { + free(SPGMR_CONTENT(S)->yg); + SPGMR_CONTENT(S)->yg = NULL; + } + if (SPGMR_CONTENT(S)->cv) { + free(SPGMR_CONTENT(S)->cv); + SPGMR_CONTENT(S)->cv = NULL; + } + if (SPGMR_CONTENT(S)->Xv) { + free(SPGMR_CONTENT(S)->Xv); + SPGMR_CONTENT(S)->Xv = NULL; + } + free(S->content); S->content = NULL; + } + if (S->ops) { free(S->ops); S->ops = NULL; } + free(S); S = NULL; + return(SUNLS_SUCCESS); +} + + +int SUNLinSolSetInfoFile_SPGMR(SUNLinearSolver S, + FILE* info_file) +{ + /* check that the linear solver is non-null */ + if (S == NULL) + return(SUNLS_MEM_NULL); + + SPGMR_CONTENT(S)->info_file = info_file; + + return(SUNLS_SUCCESS); +} + + +int SUNLinSolSetPrintLevel_SPGMR(SUNLinearSolver S, + int print_level) +{ + /* check that the linear solver is non-null */ + if (S == NULL) + return(SUNLS_MEM_NULL); + + /* check for valid print level */ + if (print_level < 0 || print_level > 1) + return(SUNLS_ILL_INPUT); + + SPGMR_CONTENT(S)->print_level = print_level; + + return(SUNLS_SUCCESS); +} diff --git a/src/lib/sunlinsol/sptfqmr/sunlinsol_sptfqmr.c b/src/lib/sunlinsol/sptfqmr/sunlinsol_sptfqmr.c new file mode 100644 index 0000000..2bc1758 --- /dev/null +++ b/src/lib/sunlinsol/sptfqmr/sunlinsol_sptfqmr.c @@ -0,0 +1,938 @@ +/* ----------------------------------------------------------------- + * Programmer(s): Daniel Reynolds @ SMU + * Based on sundials_sptfqmr.c code, written by Aaron Collier @ LLNL + * ----------------------------------------------------------------- + * SUNDIALS Copyright Start + * Copyright (c) 2002-2022, Lawrence Livermore National Security + * and Southern Methodist University. + * All rights reserved. + * + * See the top-level LICENSE and NOTICE files for details. + * + * SPDX-License-Identifier: BSD-3-Clause + * SUNDIALS Copyright End + * ----------------------------------------------------------------- + * This is the implementation file for the SPTFQMR implementation of + * the SUNLINSOL package. + * -----------------------------------------------------------------*/ + +#include +#include + +#include +#include + +#include "sundials_context_impl.h" +#include "sundials_logger_impl.h" + +#define ZERO RCONST(0.0) +#define ONE RCONST(1.0) + +/* + * ----------------------------------------------------------------- + * SPTFQMR solver structure accessibility macros: + * ----------------------------------------------------------------- + */ + +#define SPTFQMR_CONTENT(S) ( (SUNLinearSolverContent_SPTFQMR)(S->content) ) +#define LASTFLAG(S) ( SPTFQMR_CONTENT(S)->last_flag ) + +/* + * ----------------------------------------------------------------- + * exported functions + * ----------------------------------------------------------------- + */ + +/* ---------------------------------------------------------------------------- + * Function to create a new SPTFQMR linear solver + */ + +SUNLinearSolver SUNLinSol_SPTFQMR(N_Vector y, int pretype, int maxl, SUNContext sunctx) +{ + SUNLinearSolver S; + SUNLinearSolverContent_SPTFQMR content; + + /* check for legal pretype and maxl values; if illegal use defaults */ + if ((pretype != SUN_PREC_NONE) && (pretype != SUN_PREC_LEFT) && + (pretype != SUN_PREC_RIGHT) && (pretype != SUN_PREC_BOTH)) + pretype = SUN_PREC_NONE; + if (maxl <= 0) + maxl = SUNSPTFQMR_MAXL_DEFAULT; + + /* check that the supplied N_Vector supports all requisite operations */ + if ( (y->ops->nvclone == NULL) || (y->ops->nvdestroy == NULL) || + (y->ops->nvlinearsum == NULL) || (y->ops->nvconst == NULL) || + (y->ops->nvprod == NULL) || (y->ops->nvdiv == NULL) || + (y->ops->nvscale == NULL) || (y->ops->nvdotprod == NULL) ) + return(NULL); + + /* Create linear solver */ + S = NULL; + S = SUNLinSolNewEmpty(sunctx); + if (S == NULL) return(NULL); + + /* Attach operations */ + S->ops->gettype = SUNLinSolGetType_SPTFQMR; + S->ops->getid = SUNLinSolGetID_SPTFQMR; + S->ops->setatimes = SUNLinSolSetATimes_SPTFQMR; + S->ops->setpreconditioner = SUNLinSolSetPreconditioner_SPTFQMR; + S->ops->setscalingvectors = SUNLinSolSetScalingVectors_SPTFQMR; + S->ops->setzeroguess = SUNLinSolSetZeroGuess_SPTFQMR; + S->ops->initialize = SUNLinSolInitialize_SPTFQMR; + S->ops->setup = SUNLinSolSetup_SPTFQMR; + S->ops->solve = SUNLinSolSolve_SPTFQMR; + S->ops->numiters = SUNLinSolNumIters_SPTFQMR; + S->ops->resnorm = SUNLinSolResNorm_SPTFQMR; + S->ops->resid = SUNLinSolResid_SPTFQMR; + S->ops->lastflag = SUNLinSolLastFlag_SPTFQMR; + S->ops->space = SUNLinSolSpace_SPTFQMR; + S->ops->free = SUNLinSolFree_SPTFQMR; + + /* Create content */ + content = NULL; + content = (SUNLinearSolverContent_SPTFQMR) malloc(sizeof *content); + if (content == NULL) { SUNLinSolFree(S); return(NULL); } + + /* Attach content */ + S->content = content; + + /* Fill content */ + content->last_flag = 0; + content->maxl = maxl; + content->pretype = pretype; + content->zeroguess = SUNFALSE; + content->numiters = 0; + content->resnorm = ZERO; + content->r_star = NULL; + content->q = NULL; + content->d = NULL; + content->v = NULL; + content->p = NULL; + content->r = NULL; + content->u = NULL; + content->vtemp1 = NULL; + content->vtemp2 = NULL; + content->vtemp3 = NULL; + content->s1 = NULL; + content->s2 = NULL; + content->ATimes = NULL; + content->ATData = NULL; + content->Psetup = NULL; + content->Psolve = NULL; + content->PData = NULL; + content->print_level = 0; + content->info_file = stdout; +#if SUNDIALS_LOGGING_LEVEL >= SUNDIALS_LOGGING_INFO + content->info_file = (sunctx->logger->info_fp) ? sunctx->logger->info_fp : stdout; +#endif + + /* Allocate content */ + content->r_star = N_VClone(y); + if (content->r_star == NULL) { SUNLinSolFree(S); return(NULL); } + + content->q = N_VClone(y); + if (content->q == NULL) { SUNLinSolFree(S); return(NULL); } + + content->d = N_VClone(y); + if (content->d == NULL) { SUNLinSolFree(S); return(NULL); } + + content->v = N_VClone(y); + if (content->v == NULL) { SUNLinSolFree(S); return(NULL); } + + content->p = N_VClone(y); + if (content->p == NULL) { SUNLinSolFree(S); return(NULL); } + + content->r = N_VCloneVectorArray(2, y); + if (content->r == NULL) { SUNLinSolFree(S); return(NULL); } + + content->u = N_VClone(y); + if (content->u == NULL) { SUNLinSolFree(S); return(NULL); } + + content->vtemp1 = N_VClone(y); + if (content->vtemp1 == NULL) { SUNLinSolFree(S); return(NULL); } + + content->vtemp2 = N_VClone(y); + if (content->vtemp2 == NULL) { SUNLinSolFree(S); return(NULL); } + + content->vtemp3 = N_VClone(y); + if (content->vtemp3 == NULL) { SUNLinSolFree(S); return(NULL); } + + return(S); +} + + +/* ---------------------------------------------------------------------------- + * Function to set the type of preconditioning for SPTFQMR to use + */ + +int SUNLinSol_SPTFQMRSetPrecType(SUNLinearSolver S, int pretype) +{ + /* Check for legal pretype */ + if ((pretype != SUN_PREC_NONE) && (pretype != SUN_PREC_LEFT) && + (pretype != SUN_PREC_RIGHT) && (pretype != SUN_PREC_BOTH)) { + return(SUNLS_ILL_INPUT); + } + + /* Check for non-NULL SUNLinearSolver */ + if (S == NULL) return(SUNLS_MEM_NULL); + + /* Set pretype */ + SPTFQMR_CONTENT(S)->pretype = pretype; + return(SUNLS_SUCCESS); +} + + +/* ---------------------------------------------------------------------------- + * Function to set the maximum number of iterations for SPTFQMR to use + */ + +int SUNLinSol_SPTFQMRSetMaxl(SUNLinearSolver S, int maxl) +{ + /* Check for non-NULL SUNLinearSolver */ + if (S == NULL) return(SUNLS_MEM_NULL); + + /* Check for legal pretype */ + if (maxl <= 0) + maxl = SUNSPTFQMR_MAXL_DEFAULT; + + /* Set pretype */ + SPTFQMR_CONTENT(S)->maxl = maxl; + return(SUNLS_SUCCESS); +} + + +/* + * ----------------------------------------------------------------- + * implementation of linear solver operations + * ----------------------------------------------------------------- + */ + +SUNLinearSolver_Type SUNLinSolGetType_SPTFQMR(SUNLinearSolver S) +{ + return(SUNLINEARSOLVER_ITERATIVE); +} + + +SUNLinearSolver_ID SUNLinSolGetID_SPTFQMR(SUNLinearSolver S) +{ + return(SUNLINEARSOLVER_SPTFQMR); +} + + +int SUNLinSolInitialize_SPTFQMR(SUNLinearSolver S) +{ + SUNLinearSolverContent_SPTFQMR content; + + /* set shortcut to SPTFQMR memory structure */ + if (S == NULL) return(SUNLS_MEM_NULL); + content = SPTFQMR_CONTENT(S); + + /* ensure valid options */ + if (content->maxl <= 0) + content->maxl = SUNSPTFQMR_MAXL_DEFAULT; + + if (content->ATimes == NULL) { + LASTFLAG(S) = SUNLS_ATIMES_NULL; + return(LASTFLAG(S)); + } + + if ( (content->pretype != SUN_PREC_LEFT) && + (content->pretype != SUN_PREC_RIGHT) && + (content->pretype != SUN_PREC_BOTH) ) + content->pretype = SUN_PREC_NONE; + + if ((content->pretype != SUN_PREC_NONE) && (content->Psolve == NULL)) { + LASTFLAG(S) = SUNLS_PSOLVE_NULL; + return(LASTFLAG(S)); + } + + /* no additional memory to allocate */ + + /* return with success */ + content->last_flag = SUNLS_SUCCESS; + return(SUNLS_SUCCESS); +} + + +int SUNLinSolSetATimes_SPTFQMR(SUNLinearSolver S, void* ATData, + SUNATimesFn ATimes) +{ + /* set function pointers to integrator-supplied ATimes routine + and data, and return with success */ + if (S == NULL) return(SUNLS_MEM_NULL); + SPTFQMR_CONTENT(S)->ATimes = ATimes; + SPTFQMR_CONTENT(S)->ATData = ATData; + LASTFLAG(S) = SUNLS_SUCCESS; + return(LASTFLAG(S)); +} + + +int SUNLinSolSetPreconditioner_SPTFQMR(SUNLinearSolver S, void* PData, + SUNPSetupFn Psetup, SUNPSolveFn Psolve) +{ + /* set function pointers to integrator-supplied Psetup and PSolve + routines and data, and return with success */ + if (S == NULL) return(SUNLS_MEM_NULL); + SPTFQMR_CONTENT(S)->Psetup = Psetup; + SPTFQMR_CONTENT(S)->Psolve = Psolve; + SPTFQMR_CONTENT(S)->PData = PData; + LASTFLAG(S) = SUNLS_SUCCESS; + return(LASTFLAG(S)); +} + + +int SUNLinSolSetScalingVectors_SPTFQMR(SUNLinearSolver S, + N_Vector s1, + N_Vector s2) +{ + /* set N_Vector pointers to integrator-supplied scaling vectors, + and return with success */ + if (S == NULL) return(SUNLS_MEM_NULL); + SPTFQMR_CONTENT(S)->s1 = s1; + SPTFQMR_CONTENT(S)->s2 = s2; + LASTFLAG(S) = SUNLS_SUCCESS; + return(LASTFLAG(S)); +} + + +int SUNLinSolSetZeroGuess_SPTFQMR(SUNLinearSolver S, booleantype onoff) +{ + /* set flag indicating a zero initial guess */ + if (S == NULL) return(SUNLS_MEM_NULL); + SPTFQMR_CONTENT(S)->zeroguess = onoff; + LASTFLAG(S) = SUNLS_SUCCESS; + return(LASTFLAG(S)); +} + + +int SUNLinSolSetup_SPTFQMR(SUNLinearSolver S, SUNMatrix A) +{ + int ier; + SUNPSetupFn Psetup; + void* PData; + + /* Set shortcuts to SPTFQMR memory structures */ + if (S == NULL) return(SUNLS_MEM_NULL); + Psetup = SPTFQMR_CONTENT(S)->Psetup; + PData = SPTFQMR_CONTENT(S)->PData; + + /* no solver-specific setup is required, but if user-supplied + Psetup routine exists, call that here */ + if (Psetup != NULL) { + ier = Psetup(PData); + if (ier != 0) { + LASTFLAG(S) = (ier < 0) ? + SUNLS_PSET_FAIL_UNREC : SUNLS_PSET_FAIL_REC; + return(LASTFLAG(S)); + } + } + + /* return with success */ + return(SUNLS_SUCCESS); +} + + +int SUNLinSolSolve_SPTFQMR(SUNLinearSolver S, SUNMatrix A, N_Vector x, + N_Vector b, realtype delta) +{ + /* local data and shortcut variables */ + realtype alpha, tau, eta, beta, c, sigma, v_bar, omega; + realtype rho[2]; + realtype r_init_norm, r_curr_norm; + realtype temp_val; + booleantype preOnLeft, preOnRight, scale_x, scale_b, converged, b_ok; + booleantype *zeroguess; + int n, m, ier, l_max; + void *A_data, *P_data; + SUNATimesFn atimes; + SUNPSolveFn psolve; + realtype *res_norm; + int *nli; + N_Vector sx, sb, r_star, q, d, v, p, *r, u, vtemp1, vtemp2, vtemp3; + + /* local variables for fused vector operations */ + realtype cv[3]; + N_Vector Xv[3]; + + /* Make local shorcuts to solver variables. */ + if (S == NULL) return(SUNLS_MEM_NULL); + l_max = SPTFQMR_CONTENT(S)->maxl; + r_star = SPTFQMR_CONTENT(S)->r_star; + q = SPTFQMR_CONTENT(S)->q; + d = SPTFQMR_CONTENT(S)->d; + v = SPTFQMR_CONTENT(S)->v; + p = SPTFQMR_CONTENT(S)->p; + r = SPTFQMR_CONTENT(S)->r; + u = SPTFQMR_CONTENT(S)->u; + vtemp1 = SPTFQMR_CONTENT(S)->vtemp1; + vtemp2 = SPTFQMR_CONTENT(S)->vtemp2; + vtemp3 = SPTFQMR_CONTENT(S)->vtemp3; + sb = SPTFQMR_CONTENT(S)->s1; + sx = SPTFQMR_CONTENT(S)->s2; + A_data = SPTFQMR_CONTENT(S)->ATData; + P_data = SPTFQMR_CONTENT(S)->PData; + atimes = SPTFQMR_CONTENT(S)->ATimes; + psolve = SPTFQMR_CONTENT(S)->Psolve; + zeroguess = &(SPTFQMR_CONTENT(S)->zeroguess); + nli = &(SPTFQMR_CONTENT(S)->numiters); + res_norm = &(SPTFQMR_CONTENT(S)->resnorm); + + /* Initialize counters and convergence flag */ + temp_val = r_curr_norm = -ONE; + *nli = 0; + converged = SUNFALSE; + b_ok = SUNFALSE; + + /* set booleantype flags for internal solver options */ + preOnLeft = ( (SPTFQMR_CONTENT(S)->pretype == SUN_PREC_LEFT) || + (SPTFQMR_CONTENT(S)->pretype == SUN_PREC_BOTH) ); + preOnRight = ( (SPTFQMR_CONTENT(S)->pretype == SUN_PREC_RIGHT) || + (SPTFQMR_CONTENT(S)->pretype == SUN_PREC_BOTH) ); + scale_x = (sx != NULL); + scale_b = (sb != NULL); + + /* Check for unsupported use case */ + if (preOnRight && !(*zeroguess)) { + *zeroguess = SUNFALSE; + LASTFLAG(S) = SUNLS_ILL_INPUT; + return(SUNLS_ILL_INPUT); + } + +#if SUNDIALS_LOGGING_LEVEL >= SUNDIALS_LOGGING_INFO + if (SPTFQMR_CONTENT(S)->print_level && SPTFQMR_CONTENT(S)->info_file + && (SPTFQMR_CONTENT(S)->info_file != S->sunctx->logger->info_fp)) + fprintf(SPTFQMR_CONTENT(S)->info_file, "SUNLINSOL_SPTFQMR:\n"); +#endif + + /* Check if Atimes function has been set */ + if (atimes == NULL) { + *zeroguess = SUNFALSE; + LASTFLAG(S) = SUNLS_ATIMES_NULL; + return(LASTFLAG(S)); + } + + /* If preconditioning, check if psolve has been set */ + if ((preOnLeft || preOnRight) && psolve == NULL) { + *zeroguess = SUNFALSE; + LASTFLAG(S) = SUNLS_PSOLVE_NULL; + return(LASTFLAG(S)); + } + + /* Set r_star to initial (unscaled) residual r_star = r_0 = b - A*x_0 */ + /* NOTE: if x == 0 then just set residual to b and continue */ + if (*zeroguess) { + N_VScale(ONE, b, r_star); + } else { + ier = atimes(A_data, x, r_star); + if (ier != 0) { + *zeroguess = SUNFALSE; + LASTFLAG(S) = (ier < 0) ? + SUNLS_ATIMES_FAIL_UNREC : SUNLS_ATIMES_FAIL_REC; + return(LASTFLAG(S)); + } + N_VLinearSum(ONE, b, -ONE, r_star, r_star); + } + + /* Apply left preconditioner and b-scaling to r_star (or really just r_0) */ + if (preOnLeft) { + ier = psolve(P_data, r_star, vtemp1, delta, SUN_PREC_LEFT); + if (ier != 0) { + *zeroguess = SUNFALSE; + LASTFLAG(S) = (ier < 0) ? + SUNLS_PSOLVE_FAIL_UNREC : SUNLS_PSOLVE_FAIL_REC; + return(LASTFLAG(S)); + } + } + else N_VScale(ONE, r_star, vtemp1); + if (scale_b) N_VProd(sb, vtemp1, r_star); + else N_VScale(ONE, vtemp1, r_star); + + /* Initialize rho[0] */ + /* NOTE: initialized here to reduce number of computations - avoid need + to compute r_star^T*r_star twice, and avoid needlessly squaring + values */ + rho[0] = N_VDotProd(r_star, r_star); + + /* Compute norm of initial residual (r_0) to see if we really need + to do anything */ + *res_norm = r_init_norm = SUNRsqrt(rho[0]); + +#if SUNDIALS_LOGGING_LEVEL >= SUNDIALS_LOGGING_INFO + /* print initial residual */ + if (SPTFQMR_CONTENT(S)->print_level && SPTFQMR_CONTENT(S)->info_file + && (SPTFQMR_CONTENT(S)->info_file != S->sunctx->logger->info_fp)) + { + fprintf(SPTFQMR_CONTENT(S)->info_file, + SUNLS_MSG_RESIDUAL, + (long int) 0, *res_norm); + } + SUNLogger_QueueMsg(S->sunctx->logger, SUN_LOGLEVEL_INFO, + "SUNLinSolSolve_SPFTQMR", "initial-residual", + "nli = %li, resnorm = %.16g", (long int) 0, *res_norm); +#endif + + if (r_init_norm <= delta) { + *zeroguess = SUNFALSE; + LASTFLAG(S) = SUNLS_SUCCESS; + return(LASTFLAG(S)); + } + + /* Set v = A*r_0 (preconditioned and scaled) */ + if (scale_x) N_VDiv(r_star, sx, vtemp1); + else N_VScale(ONE, r_star, vtemp1); + if (preOnRight) { + N_VScale(ONE, vtemp1, v); + ier = psolve(P_data, v, vtemp1, delta, SUN_PREC_RIGHT); + if (ier != 0) { + *zeroguess = SUNFALSE; + LASTFLAG(S) = (ier < 0) ? + SUNLS_PSOLVE_FAIL_UNREC : SUNLS_PSOLVE_FAIL_REC; + return(LASTFLAG(S)); + } + } + ier = atimes(A_data, vtemp1, v); + if (ier != 0) { + *zeroguess = SUNFALSE; + LASTFLAG(S) = (ier < 0) ? + SUNLS_ATIMES_FAIL_UNREC : SUNLS_ATIMES_FAIL_REC; + return(LASTFLAG(S)); + } + if (preOnLeft) { + ier = psolve(P_data, v, vtemp1, delta, SUN_PREC_LEFT); + if (ier != 0) { + *zeroguess = SUNFALSE; + LASTFLAG(S) = (ier < 0) ? + SUNLS_PSOLVE_FAIL_UNREC : SUNLS_PSOLVE_FAIL_REC; + return(LASTFLAG(S)); + } + } + else N_VScale(ONE, v, vtemp1); + if (scale_b) N_VProd(sb, vtemp1, v); + else N_VScale(ONE, vtemp1, v); + + /* Initialize remaining variables */ + N_VScale(ONE, r_star, r[0]); + N_VScale(ONE, r_star, u); + N_VScale(ONE, r_star, p); + N_VConst(ZERO, d); + + /* Set x = sx x if non-zero guess */ + if (scale_x && !(*zeroguess)) N_VProd(sx, x, x); + + tau = r_init_norm; + v_bar = eta = ZERO; + + /* START outer loop */ + for (n = 0; n < l_max; ++n) { + + /* Increment linear iteration counter */ + (*nli)++; + + /* sigma = r_star^T*v */ + sigma = N_VDotProd(r_star, v); + + /* alpha = rho[0]/sigma */ + alpha = rho[0]/sigma; + + /* q = u-alpha*v */ + N_VLinearSum(ONE, u, -alpha, v, q); + + /* r[1] = r[0]-alpha*A*(u+q) */ + N_VLinearSum(ONE, u, ONE, q, r[1]); + if (scale_x) N_VDiv(r[1], sx, r[1]); + if (preOnRight) { + N_VScale(ONE, r[1], vtemp1); + ier = psolve(P_data, vtemp1, r[1], delta, SUN_PREC_RIGHT); + if (ier != 0) { + *zeroguess = SUNFALSE; + LASTFLAG(S) = (ier < 0) ? + SUNLS_PSOLVE_FAIL_UNREC : SUNLS_PSOLVE_FAIL_REC; + return(LASTFLAG(S)); + } + } + ier = atimes(A_data, r[1], vtemp1); + if (ier != 0) { + *zeroguess = SUNFALSE; + LASTFLAG(S) = (ier < 0) ? + SUNLS_ATIMES_FAIL_UNREC : SUNLS_ATIMES_FAIL_REC; + return(LASTFLAG(S)); + } + if (preOnLeft) { + ier = psolve(P_data, vtemp1, r[1], delta, SUN_PREC_LEFT); + if (ier != 0) { + *zeroguess = SUNFALSE; + LASTFLAG(S) = (ier < 0) ? + SUNLS_PSOLVE_FAIL_UNREC : SUNLS_PSOLVE_FAIL_REC; + return(LASTFLAG(S)); + } + } + else N_VScale(ONE, vtemp1, r[1]); + if (scale_b) N_VProd(sb, r[1], vtemp1); + else N_VScale(ONE, r[1], vtemp1); + N_VLinearSum(ONE, r[0], -alpha, vtemp1, r[1]); + + /* START inner loop */ + for (m = 0; m < 2; ++m) { + + /* d = [*]+(v_bar^2*eta/alpha)*d */ + /* NOTES: + * (1) [*] = u if m == 0, and q if m == 1 + * (2) using temp_val reduces the number of required computations + * if the inner loop is executed twice + */ + if (m == 0) { + temp_val = SUNRsqrt(N_VDotProd(r[1], r[1])); + omega = SUNRsqrt(SUNRsqrt(N_VDotProd(r[0], r[0]))*temp_val); + N_VLinearSum(ONE, u, SUNSQR(v_bar)*eta/alpha, d, d); + } + else { + omega = temp_val; + N_VLinearSum(ONE, q, SUNSQR(v_bar)*eta/alpha, d, d); + } + + /* v_bar = omega/tau */ + v_bar = omega/tau; + + /* c = (1+v_bar^2)^(-1/2) */ + c = ONE / SUNRsqrt(ONE+SUNSQR(v_bar)); + + /* tau = tau*v_bar*c */ + tau = tau*v_bar*c; + + /* eta = c^2*alpha */ + eta = SUNSQR(c)*alpha; + + /* x = x+eta*d */ + if (n == 0 && m == 0 && *zeroguess) + N_VScale(eta, d, x); + else + N_VLinearSum(ONE, x, eta, d, x); + + /* Check for convergence... */ + /* NOTE: just use approximation to norm of residual, if possible */ + *res_norm = r_curr_norm = tau*SUNRsqrt(m+1); + +#if SUNDIALS_LOGGING_LEVEL >= SUNDIALS_LOGGING_INFO + /* print current iteration number and the residual */ + if (SPTFQMR_CONTENT(S)->print_level && SPTFQMR_CONTENT(S)->info_file + && (SPTFQMR_CONTENT(S)->info_file != S->sunctx->logger->info_fp)) + { + fprintf(SPTFQMR_CONTENT(S)->info_file, + SUNLS_MSG_RESIDUAL, + (long int) *nli, *res_norm); + } + SUNLogger_QueueMsg(S->sunctx->logger, SUN_LOGLEVEL_INFO, + "SUNLinSolSolve_SPTFQMR", "iterate-residual", + "nli = %li, resnorm = %.16g", (long int) 0, *res_norm); +#endif + + /* Exit inner loop if iteration has converged based upon approximation + to norm of current residual */ + if (r_curr_norm <= delta) { + converged = SUNTRUE; + break; + } + + /* Decide if actual norm of residual vector should be computed */ + /* NOTES: + * (1) if r_curr_norm > delta, then check if actual residual norm + * is OK (recall we first compute an approximation) + * (2) if r_curr_norm >= r_init_norm and m == 1 and n == l_max, then + * compute actual residual norm to see if the iteration can be + * saved + * (3) the scaled and preconditioned right-hand side of the given + * linear system (denoted by b) is only computed once, and the + * result is stored in vtemp3 so it can be reused - reduces the + * number of psovles if using left preconditioning + */ + if ((r_curr_norm > delta) || + (r_curr_norm >= r_init_norm && m == 1 && n == l_max)) { + + /* Compute norm of residual ||b-A*x||_2 (preconditioned and scaled) */ + if (scale_x) N_VDiv(x, sx, vtemp1); + else N_VScale(ONE, x, vtemp1); + if (preOnRight) { + ier = psolve(P_data, vtemp1, vtemp2, delta, SUN_PREC_RIGHT); + if (ier != 0) { + *zeroguess = SUNFALSE; + LASTFLAG(S) = (ier < 0) ? + SUNLS_PSOLVE_FAIL_UNREC : SUNLS_PSOLVE_FAIL_UNREC; + return(LASTFLAG(S)); + } + N_VScale(ONE, vtemp2, vtemp1); + } + ier = atimes(A_data, vtemp1, vtemp2); + if (ier != 0) { + *zeroguess = SUNFALSE; + LASTFLAG(S) = (ier < 0) ? + SUNLS_ATIMES_FAIL_UNREC : SUNLS_ATIMES_FAIL_REC; + return(LASTFLAG(S)); + } + if (preOnLeft) { + ier = psolve(P_data, vtemp2, vtemp1, delta, SUN_PREC_LEFT); + if (ier != 0) { + *zeroguess = SUNFALSE; + LASTFLAG(S) = (ier < 0) ? + SUNLS_PSOLVE_FAIL_UNREC : SUNLS_PSOLVE_FAIL_REC; + return(LASTFLAG(S)); + } + } + else N_VScale(ONE, vtemp2, vtemp1); + if (scale_b) N_VProd(sb, vtemp1, vtemp2); + else N_VScale(ONE, vtemp1, vtemp2); + /* Only precondition and scale b once (result saved for reuse) */ + if (!b_ok) { + b_ok = SUNTRUE; + if (preOnLeft) { + ier = psolve(P_data, b, vtemp3, delta, SUN_PREC_LEFT); + if (ier != 0) { + *zeroguess = SUNFALSE; + LASTFLAG(S) = (ier < 0) ? + SUNLS_PSOLVE_FAIL_UNREC : SUNLS_PSOLVE_FAIL_REC; + return(LASTFLAG(S)); + } + } + else N_VScale(ONE, b, vtemp3); + if (scale_b) N_VProd(sb, vtemp3, vtemp3); + } + N_VLinearSum(ONE, vtemp3, -ONE, vtemp2, vtemp1); + *res_norm = r_curr_norm = SUNRsqrt(N_VDotProd(vtemp1, vtemp1)); + + /* Exit inner loop if inequality condition is satisfied + (meaning exit if we have converged) */ + if (r_curr_norm <= delta) { + converged = SUNTRUE; + break; + } + + } + + } /* END inner loop */ + + /* If converged, then exit outer loop as well */ + if (converged == SUNTRUE) break; + + /* rho[1] = r_star^T*r_[1] */ + rho[1] = N_VDotProd(r_star, r[1]); + + /* beta = rho[1]/rho[0] */ + beta = rho[1]/rho[0]; + + /* u = r[1]+beta*q */ + N_VLinearSum(ONE, r[1], beta, q, u); + + /* p = u+beta*(q+beta*p) = beta*beta*p + beta*q + u */ + cv[0] = SUNSQR(beta); + Xv[0] = p; + + cv[1] = beta; + Xv[1] = q; + + cv[2] = ONE; + Xv[2] = u; + + ier = N_VLinearCombination(3, cv, Xv, p); + if (ier != SUNLS_SUCCESS) { + *zeroguess = SUNFALSE; + LASTFLAG(S) = SUNLS_VECTOROP_ERR; + return(SUNLS_VECTOROP_ERR); + } + + /* v = A*p */ + if (scale_x) N_VDiv(p, sx, vtemp1); + else N_VScale(ONE, p, vtemp1); + if (preOnRight) { + N_VScale(ONE, vtemp1, v); + ier = psolve(P_data, v, vtemp1, delta, SUN_PREC_RIGHT); + if (ier != 0) { + *zeroguess = SUNFALSE; + LASTFLAG(S) = (ier < 0) ? + SUNLS_PSOLVE_FAIL_UNREC : SUNLS_PSOLVE_FAIL_REC; + return(LASTFLAG(S)); + } + } + ier = atimes(A_data, vtemp1, v); + if (ier != 0) { + *zeroguess = SUNFALSE; + LASTFLAG(S) = (ier < 0) ? + SUNLS_ATIMES_FAIL_UNREC : SUNLS_ATIMES_FAIL_REC; + return(LASTFLAG(S)); + } + if (preOnLeft) { + ier = psolve(P_data, v, vtemp1, delta, SUN_PREC_LEFT); + if (ier != 0) { + *zeroguess = SUNFALSE; + LASTFLAG(S) = (ier < 0) ? + SUNLS_PSOLVE_FAIL_UNREC : SUNLS_PSOLVE_FAIL_REC; + return(LASTFLAG(S)); + } + } + else N_VScale(ONE, v, vtemp1); + if (scale_b) N_VProd(sb, vtemp1, v); + else N_VScale(ONE, vtemp1, v); + + /* Shift variable values */ + /* NOTE: reduces storage requirements */ + N_VScale(ONE, r[1], r[0]); + rho[0] = rho[1]; + + } /* END outer loop */ + + /* Determine return value */ + /* If iteration converged or residual was reduced, then return current iterate (x) */ + if ((converged == SUNTRUE) || (r_curr_norm < r_init_norm)) { + if (scale_x) N_VDiv(x, sx, x); + if (preOnRight) { + ier = psolve(P_data, x, vtemp1, delta, SUN_PREC_RIGHT); + if (ier != 0) { + *zeroguess = SUNFALSE; + LASTFLAG(S) = (ier < 0) ? + SUNLS_PSOLVE_FAIL_UNREC : SUNLS_PSOLVE_FAIL_UNREC; + return(LASTFLAG(S)); + } + N_VScale(ONE, vtemp1, x); + } + + *zeroguess = SUNFALSE; + if (converged == SUNTRUE) + LASTFLAG(S) = SUNLS_SUCCESS; + else + LASTFLAG(S) = SUNLS_RES_REDUCED; + return(LASTFLAG(S)); + } + /* Otherwise, return error code */ + else { + *zeroguess = SUNFALSE; + LASTFLAG(S) = SUNLS_CONV_FAIL; + return(LASTFLAG(S)); + } +} + + +int SUNLinSolNumIters_SPTFQMR(SUNLinearSolver S) +{ + /* return the stored 'numiters' value */ + if (S == NULL) return(-1); + return (SPTFQMR_CONTENT(S)->numiters); +} + + +realtype SUNLinSolResNorm_SPTFQMR(SUNLinearSolver S) +{ + /* return the stored 'resnorm' value */ + if (S == NULL) return(-ONE); + return (SPTFQMR_CONTENT(S)->resnorm); +} + + +N_Vector SUNLinSolResid_SPTFQMR(SUNLinearSolver S) +{ + /* return the stored 'vtemp1' vector */ + return (SPTFQMR_CONTENT(S)->vtemp1); +} + + +sunindextype SUNLinSolLastFlag_SPTFQMR(SUNLinearSolver S) +{ + /* return the stored 'last_flag' value */ + if (S == NULL) return(-1); + return (LASTFLAG(S)); +} + + +int SUNLinSolSpace_SPTFQMR(SUNLinearSolver S, + long int *lenrwLS, + long int *leniwLS) +{ + sunindextype liw1, lrw1; + if (SPTFQMR_CONTENT(S)->vtemp1->ops->nvspace) + N_VSpace(SPTFQMR_CONTENT(S)->vtemp1, &lrw1, &liw1); + else + lrw1 = liw1 = 0; + *lenrwLS = lrw1*11; + *leniwLS = liw1*11; + return(SUNLS_SUCCESS); +} + +int SUNLinSolFree_SPTFQMR(SUNLinearSolver S) +{ + if (S == NULL) return(SUNLS_SUCCESS); + + if (S->content) { + /* delete items from within the content structure */ + if (SPTFQMR_CONTENT(S)->r_star) { + N_VDestroy(SPTFQMR_CONTENT(S)->r_star); + SPTFQMR_CONTENT(S)->r_star = NULL; + } + if (SPTFQMR_CONTENT(S)->q) { + N_VDestroy(SPTFQMR_CONTENT(S)->q); + SPTFQMR_CONTENT(S)->q = NULL; + } + if (SPTFQMR_CONTENT(S)->d) { + N_VDestroy(SPTFQMR_CONTENT(S)->d); + SPTFQMR_CONTENT(S)->d = NULL; + } + if (SPTFQMR_CONTENT(S)->v) { + N_VDestroy(SPTFQMR_CONTENT(S)->v); + SPTFQMR_CONTENT(S)->v = NULL; + } + if (SPTFQMR_CONTENT(S)->p) { + N_VDestroy(SPTFQMR_CONTENT(S)->p); + SPTFQMR_CONTENT(S)->p = NULL; + } + if (SPTFQMR_CONTENT(S)->r) { + N_VDestroyVectorArray(SPTFQMR_CONTENT(S)->r, 2); + SPTFQMR_CONTENT(S)->r = NULL; + } + if (SPTFQMR_CONTENT(S)->u) { + N_VDestroy(SPTFQMR_CONTENT(S)->u); + SPTFQMR_CONTENT(S)->u = NULL; + } + if (SPTFQMR_CONTENT(S)->vtemp1) { + N_VDestroy(SPTFQMR_CONTENT(S)->vtemp1); + SPTFQMR_CONTENT(S)->vtemp1 = NULL; + } + if (SPTFQMR_CONTENT(S)->vtemp2) { + N_VDestroy(SPTFQMR_CONTENT(S)->vtemp2); + SPTFQMR_CONTENT(S)->vtemp2 = NULL; + } + if (SPTFQMR_CONTENT(S)->vtemp3) { + N_VDestroy(SPTFQMR_CONTENT(S)->vtemp3); + SPTFQMR_CONTENT(S)->vtemp3 = NULL; + } + free(S->content); S->content = NULL; + } + if (S->ops) { free(S->ops); S->ops = NULL; } + free(S); S = NULL; + return(SUNLS_SUCCESS); +} + + +int SUNLinSolSetInfoFile_SPTFQMR(SUNLinearSolver S, + FILE* info_file) +{ + /* check that the linear solver is non-null */ + if (S == NULL) + return(SUNLS_MEM_NULL); + + SPTFQMR_CONTENT(S)->info_file = info_file; + + return(SUNLS_SUCCESS); +} + + +int SUNLinSolSetPrintLevel_SPTFQMR(SUNLinearSolver S, + int print_level) +{ + /* check that the linear solver is non-null */ + if (S == NULL) + return(SUNLS_MEM_NULL); + + /* check for valid print level */ + if (print_level < 0 || print_level > 1) + return(SUNLS_ILL_INPUT); + + SPTFQMR_CONTENT(S)->print_level = print_level; + + return(SUNLS_SUCCESS); +} diff --git a/src/lib/sunmatrix/band/fsunmatrix_band.c b/src/lib/sunmatrix/band/fsunmatrix_band.c deleted file mode 100644 index b189c1f..0000000 --- a/src/lib/sunmatrix/band/fsunmatrix_band.c +++ /dev/null @@ -1,90 +0,0 @@ -/* - * ----------------------------------------------------------------- - * Programmer(s): Daniel Reynolds @ SMU - * ----------------------------------------------------------------- - * SUNDIALS Copyright Start - * Copyright (c) 2002-2019, Lawrence Livermore National Security - * and Southern Methodist University. - * All rights reserved. - * - * See the top-level LICENSE and NOTICE files for details. - * - * SPDX-License-Identifier: BSD-3-Clause - * SUNDIALS Copyright End - * ----------------------------------------------------------------- - * This file (companion of fsunmatrix_band.h) contains the - * implementation needed for the Fortran initialization of band - * vector operations. - * ----------------------------------------------------------------- - */ - -#include -#include - -#include "fsunmatrix_band.h" - -/* Define global matrix variables */ - -extern SUNMatrix F2C_CVODE_matrix; -extern SUNMatrix F2C_IDA_matrix; -extern SUNMatrix F2C_KINSOL_matrix; -extern SUNMatrix F2C_ARKODE_matrix; -extern SUNMatrix F2C_ARKODE_mass_matrix; - -/* Fortran callable interfaces */ - -void FSUNBANDMAT_INIT(int *code, long int *N, long int *mu, - long int *ml, int *ier) -{ - *ier = 0; - - switch(*code) { - case FCMIX_CVODE: - if (F2C_CVODE_matrix) SUNMatDestroy(F2C_CVODE_matrix); - F2C_CVODE_matrix = NULL; - F2C_CVODE_matrix = SUNBandMatrix((sunindextype)(*N), - (sunindextype)(*mu), - (sunindextype)(*ml)); - if (F2C_CVODE_matrix == NULL) *ier = -1; - break; - case FCMIX_IDA: - if (F2C_IDA_matrix) SUNMatDestroy(F2C_IDA_matrix); - F2C_IDA_matrix = NULL; - F2C_IDA_matrix = SUNBandMatrix((sunindextype)(*N), - (sunindextype)(*mu), - (sunindextype)(*ml)); - if (F2C_IDA_matrix == NULL) *ier = -1; - break; - case FCMIX_KINSOL: - if (F2C_KINSOL_matrix) SUNMatDestroy(F2C_KINSOL_matrix); - F2C_KINSOL_matrix = NULL; - F2C_KINSOL_matrix = SUNBandMatrix((sunindextype)(*N), - (sunindextype)(*mu), - (sunindextype)(*ml)); - if (F2C_KINSOL_matrix == NULL) *ier = -1; - break; - case FCMIX_ARKODE: - if (F2C_ARKODE_matrix) SUNMatDestroy(F2C_ARKODE_matrix); - F2C_ARKODE_matrix = NULL; - F2C_ARKODE_matrix = SUNBandMatrix((sunindextype)(*N), - (sunindextype)(*mu), - (sunindextype)(*ml)); - if (F2C_ARKODE_matrix == NULL) *ier = -1; - break; - default: - *ier = -1; - } -} - - -void FSUNBANDMASSMAT_INIT(long int *N, long int *mu, - long int *ml, int *ier) -{ - *ier = 0; - if (F2C_ARKODE_mass_matrix) SUNMatDestroy(F2C_ARKODE_mass_matrix); - F2C_ARKODE_mass_matrix = NULL; - F2C_ARKODE_mass_matrix = SUNBandMatrix((sunindextype)(*N), - (sunindextype)(*mu), - (sunindextype)(*ml)); - if (F2C_ARKODE_mass_matrix == NULL) *ier = -1; -} diff --git a/src/lib/sunmatrix/band/fsunmatrix_band.h b/src/lib/sunmatrix/band/fsunmatrix_band.h deleted file mode 100644 index b9276c3..0000000 --- a/src/lib/sunmatrix/band/fsunmatrix_band.h +++ /dev/null @@ -1,62 +0,0 @@ -/* - * ----------------------------------------------------------------- - * Programmer(s): Daniel Reynolds @ SMU - * ----------------------------------------------------------------- - * SUNDIALS Copyright Start - * Copyright (c) 2002-2019, Lawrence Livermore National Security - * and Southern Methodist University. - * All rights reserved. - * - * See the top-level LICENSE and NOTICE files for details. - * - * SPDX-License-Identifier: BSD-3-Clause - * SUNDIALS Copyright End - * ----------------------------------------------------------------- - * This file (companion of fsunmatrix_band.c) contains the - * definitions needed for the initialization of band - * matrix operations in Fortran. - * ----------------------------------------------------------------- - */ - -#ifndef _FSUNMATRIX_BAND_H -#define _FSUNMATRIX_BAND_H - -#include -#include - -#ifdef __cplusplus /* wrapper to enable C++ usage */ -extern "C" { -#endif - -#if defined(SUNDIALS_F77_FUNC) -#define FSUNBANDMAT_INIT SUNDIALS_F77_FUNC(fsunbandmatinit, FSUNBANDMATINIT) -#define FSUNBANDMASSMAT_INIT SUNDIALS_F77_FUNC(fsunbandmassmatinit, FSUNBANDMASSMATINIT) -#else -#define FSUNBANDMAT_INIT fsunbandmatinit_ -#define FSUNBANDMASSMAT_INIT fsunbandmassmatinit_ -#endif - - -/* Declarations of global variables */ - -extern SUNMatrix F2C_CVODE_matrix; -extern SUNMatrix F2C_IDA_matrix; -extern SUNMatrix F2C_KINSOL_matrix; -extern SUNMatrix F2C_ARKODE_matrix; -extern SUNMatrix F2C_ARKODE_mass_matrix; - -/* - * Prototypes of exported functions - * - * FSUNBANDMAT_INIT - initializes band matrix operations for main problem - * FSUNBANDMASSMAT_INIT - initializes band matrix operations for mass matrix solve - */ - -void FSUNBANDMAT_INIT(int *code, long int *N, long int *mu, long int *ml, int *ier); -void FSUNBANDMASSMAT_INIT(long int *N, long int *mu, long int *ml, int *ier); - -#ifdef __cplusplus -} -#endif - -#endif diff --git a/src/lib/sunmatrix/band/sunmatrix_band.c b/src/lib/sunmatrix/band/sunmatrix_band.c index e254df8..eaed16a 100644 --- a/src/lib/sunmatrix/band/sunmatrix_band.c +++ b/src/lib/sunmatrix/band/sunmatrix_band.c @@ -2,11 +2,11 @@ * ----------------------------------------------------------------- * Programmer(s): Daniel Reynolds @ SMU * David Gardner @ LLNL - * Based on code sundials_band.c by: Alan C. Hindmarsh and + * Based on code sundials_band.c by: Alan C. Hindmarsh and * Radu Serban @ LLNL * ----------------------------------------------------------------- * SUNDIALS Copyright Start - * Copyright (c) 2002-2019, Lawrence Livermore National Security + * Copyright (c) 2002-2022, Lawrence Livermore National Security * and Southern Methodist University. * All rights reserved. * @@ -15,10 +15,10 @@ * SPDX-License-Identifier: BSD-3-Clause * SUNDIALS Copyright End * ----------------------------------------------------------------- - * This is the implementation file for the band implementation of + * This is the implementation file for the band implementation of * the SUNMATRIX package. * ----------------------------------------------------------------- - */ + */ #include #include @@ -43,12 +43,13 @@ static int SMScaleAddNew_Band(realtype c, SUNMatrix A, SUNMatrix B); */ /* ---------------------------------------------------------------------------- - * Function to create a new band matrix with default storage upper bandwidth + * Function to create a new band matrix with default storage upper bandwidth */ -SUNMatrix SUNBandMatrix(sunindextype N, sunindextype mu, sunindextype ml) +SUNMatrix SUNBandMatrix(sunindextype N, sunindextype mu, + sunindextype ml, SUNContext sunctx) { - return (SUNBandMatrixStorage(N, mu, ml, mu+ml)); + return (SUNBandMatrixStorage(N, mu, ml, mu+ml, sunctx)); } /* ---------------------------------------------------------------------------- @@ -56,7 +57,8 @@ SUNMatrix SUNBandMatrix(sunindextype N, sunindextype mu, sunindextype ml) */ SUNMatrix SUNBandMatrixStorage(sunindextype N, sunindextype mu, - sunindextype ml, sunindextype smu) + sunindextype ml, sunindextype smu, + SUNContext sunctx) { SUNMatrix A; SUNMatrixContent_Band content; @@ -67,7 +69,7 @@ SUNMatrix SUNBandMatrixStorage(sunindextype N, sunindextype mu, /* Create an empty matrix object */ A = NULL; - A = SUNMatNewEmpty(); + A = SUNMatNewEmpty(sunctx); if (A == NULL) return(NULL); /* Attach operations */ @@ -113,14 +115,14 @@ SUNMatrix SUNBandMatrixStorage(sunindextype N, sunindextype mu, } /* ---------------------------------------------------------------------------- - * Function to print the band matrix + * Function to print the band matrix */ - + void SUNBandMatrix_Print(SUNMatrix A, FILE* outfile) { sunindextype i, j, start, finish; - /* should not be called unless A is a band matrix; + /* should not be called unless A is a band matrix; otherwise return immediately */ if (SUNMatGetID(A) != SUNMATRIX_BAND) return; @@ -199,6 +201,14 @@ sunindextype SUNBandMatrix_LDim(SUNMatrix A) return SUNMAT_ILL_INPUT; } +sunindextype SUNBandMatrix_LData(SUNMatrix A) +{ + if (SUNMatGetID(A) == SUNMATRIX_BAND) + return SM_LDATA_B(A); + else + return SUNMAT_ILL_INPUT; +} + realtype* SUNBandMatrix_Data(SUNMatrix A) { if (SUNMatGetID(A) == SUNMATRIX_BAND) @@ -237,7 +247,7 @@ SUNMatrix_ID SUNMatGetID_Band(SUNMatrix A) SUNMatrix SUNMatClone_Band(SUNMatrix A) { SUNMatrix B = SUNBandMatrixStorage(SM_COLUMNS_B(A), SM_UBAND_B(A), - SM_LBAND_B(A), SM_SUBAND_B(A)); + SM_LBAND_B(A), SM_SUBAND_B(A), A->sunctx); return(B); } @@ -309,9 +319,9 @@ int SUNMatCopy_Band(SUNMatrix A, SUNMatrix B) SM_CONTENT_B(B)->data = (realtype *) realloc(SM_CONTENT_B(B)->data, SM_COLUMNS_B(B) * colSize*sizeof(realtype)); for (j=0; jcols[j] = SM_CONTENT_B(B)->data + j * colSize; + SM_CONTENT_B(B)->cols[j] = SM_CONTENT_B(B)->data + j * colSize; } - + /* Perform operation */ if (SUNMatZero_Band(B) != SUNMAT_SUCCESS) return SUNMAT_OPERATION_FAIL; @@ -328,7 +338,7 @@ int SUNMatScaleAddI_Band(realtype c, SUNMatrix A) { sunindextype i, j; realtype *A_colj; - + /* Verify that A is a band matrix */ if (SUNMatGetID(A) != SUNMATRIX_BAND) return SUNMAT_ILL_INPUT; @@ -357,7 +367,7 @@ int SUNMatScaleAdd_Band(realtype c, SUNMatrix A, SUNMatrix B) (SM_LBAND_B(B) > SM_LBAND_B(A)) ) { return SMScaleAddNew_Band(c,A,B); } - + /* Otherwise, perform operation in-place */ for (j=0; jsunctx); /* scale/add c*A into new matrix */ for (j=0; jcontent = C->content; C->content = NULL; SUNMatDestroy_Band(C); - + return SUNMAT_SUCCESS; } diff --git a/src/lib/sunmatrix/cusparse/cusparse_kernels.cuh b/src/lib/sunmatrix/cusparse/cusparse_kernels.cuh new file mode 100644 index 0000000..f46e678 --- /dev/null +++ b/src/lib/sunmatrix/cusparse/cusparse_kernels.cuh @@ -0,0 +1,172 @@ +/* + * ----------------------------------------------------------------- + * Programmer(s): Cody J. Balos @ LLNL + * ----------------------------------------------------------------- + * SUNDIALS Copyright Start + * Copyright (c) 2002-2022, Lawrence Livermore National Security + * and Southern Methodist University. + * All rights reserved. + * + * See the top-level LICENSE and NOTICE files for details. + * + * SPDX-License-Identifier: BSD-3-Clause + * SUNDIALS Copyright End + * ----------------------------------------------------------------- + * This is the header file is for the cuSPARSE implementation of the + * SUNMATRIX module. + * ----------------------------------------------------------------- + */ + + +#ifndef _SUNCUSPARSE_MATRIX_KERNELS_CUH_ +#define _SUNCUSPARSE_MATRIX_KERNELS_CUH_ + +#include +#include + +namespace sundials +{ +namespace sunmatrix_cusparse +{ + +template +__global__ void +scaleAddIKernelCSR(I m, T c, T* A, const I* rowptr, const I* colind) +{ + // REQUIRES THE DIAGONAL TO BE PRESENT! + + // Each thread loops over one row of the matrix so memory accesses by a thread are stride-1. + // If there aren't enough threads to cover all rows, then some threads will be reused for + // more than one row. + for (I row = blockIdx.x*blockDim.x + threadIdx.x; + row < m; + row += blockDim.x * gridDim.x) + { + I tmp = rowptr[row]; + I rownnz = rowptr[row+1] - tmp; + I idx = tmp; + for (I j = 0; j < rownnz; j++) + { + if (colind[idx+j] == row) A[idx+j] = c*A[idx+j] + 1.0; + else A[idx+j] = c*A[idx+j]; + } + } +} + +template +__global__ void +scaleAddIKernelBCSR(I m, I nblocks, I blocknnz, T c, T* A, const I* rowptr, const I* colind) +{ + // REQUIRES THE DIAGONAL TO BE PRESENT! + + // Ideally each thread block will be in charge of one block of the matrix. + for (I block = blockIdx.x; + block < nblocks; + block += gridDim.x) + { + // Each thread loops over one row of the matrix so memory accesses by a thread are stride-1. + // If there aren't enough threads to cover all rows, then some threads will be reused for + // more than one row. + for (I row = threadIdx.x; + row < m; + row += blockDim.x) + { + I tmp = rowptr[row]; + I rownnz = rowptr[row+1] - tmp; + I idxl = tmp; + I idxg = block*blocknnz + tmp; + for (I j = 0; j < rownnz; j++) + { + if (colind[idxl+j] == row) A[idxg+j] = c*A[idxg+j] + 1.0; + else A[idxg+j] = c*A[idxg+j]; + } + } + } +} + +template +__global__ void +scaleAddKernelCSR(I nnz, T c, T* A, const T* B) +{ + // REQUIRES A AND B TO HAVE THE SAME SPARSITY PATTERN + for (I i = blockIdx.x * blockDim.x + threadIdx.x; + i < nnz; + i += blockDim.x * gridDim.x) + { + A[i] = c*A[i] + B[i]; + } +} + +template +__global__ void +matvecBCSR(I m, I nblocks, I blocknnz, const T* A, const I* rowptr, const I* colind, const T* x, T* y) +{ + // Zero out result vector + for (I i = blockIdx.x * blockDim.x + threadIdx.x; + i < nblocks*blocknnz; + i += blockDim.x * gridDim.x) + { + y[i] = 0.0; + } + + __syncthreads(); + + // Ideally each thread block will be in charge of one block of the matrix. + for (I block = blockIdx.x; + block < nblocks; + block += gridDim.x) + { + // Each thread loops over one row of the matrix so memory accesses by a thread are stride-1. + // If there aren't enough threads to cover all rows, then some threads will be reused for + // more than one row. + for (I row = threadIdx.x; + row < m; + row += blockDim.x) + { + I tmp = rowptr[row]; + I rownnz = rowptr[row+1] - tmp; // number of nnz in this row + I idxl = tmp; // local (to this block) starting nonzero index + I idxg = block*blocknnz + tmp; // global (overall matrix) starting nonzero index + I rowg = block*m+row; // global (overall matrix) row + I colg = block*m; // global (overall matrix) starting column + for (I j = 0; j < rownnz; j++) + { + y[rowg] += A[idxg+j] * x[ colg+colind[idxl+j] ]; + } + } + } +} + +// kernels for debugging +#ifdef SUNDIALS_DEBUG + +template +__global__ void +print_kernel(I m, I nnz, I blocknnz, T* A, const I* rowptr, const I* colind) +{ + for (I i = blockIdx.x * blockDim.x + threadIdx.x; + i < nnz; + i += blockDim.x * gridDim.x) + { + printf("A[%d] = %f\n", i, A[i]); + } + for (I i = blockIdx.x * blockDim.x + threadIdx.x; + i < m+1; + i += blockDim.x * gridDim.x) + { + printf("rowptr[%d] = %d\n", i, rowptr[i]); + } + for (I i = blockIdx.x * blockDim.x + threadIdx.x; + i < blocknnz; + i += blockDim.x * gridDim.x) + { + printf("colind[%d] = %d\n", i, colind[i]); + } +} + +#endif + +} // namespace sunmatrix_cusparse +} // namespace sundials + +#endif \ No newline at end of file diff --git a/src/lib/sunmatrix/cusparse/sunmatrix_cusparse.cu b/src/lib/sunmatrix/cusparse/sunmatrix_cusparse.cu new file mode 100644 index 0000000..dc6e6a9 --- /dev/null +++ b/src/lib/sunmatrix/cusparse/sunmatrix_cusparse.cu @@ -0,0 +1,1265 @@ +/* + * ----------------------------------------------------------------- + * Programmer(s): Cody J. Balos @ LLNL + * ----------------------------------------------------------------- + * SUNDIALS Copyright Start + * Copyright (c) 2002-2022, Lawrence Livermore National Security + * and Southern Methodist University. + * All rights reserved. + * + * See the top-level LICENSE and NOTICE files for details. + * + * SPDX-License-Identifier: BSD-3-Clause + * SUNDIALS Copyright End + * ----------------------------------------------------------------- + * This is the header file is for the cuSPARSE implementation of the + * SUNMATRIX module. + * ----------------------------------------------------------------- + */ + +#include +#include + +#include +#include + +#include "sundials_cuda.h" +#include "sundials_debug.h" +#include "cusparse_kernels.cuh" + + +/* Use the namespace for the kernels */ +using namespace sundials::cuda; +using namespace sundials::sunmatrix_cusparse; + +/* Constants */ +#define ZERO RCONST(0.0) +#define ONE RCONST(1.0) + +/* Private function prototypes */ +static booleantype SMCompatible_cuSparse(SUNMatrix, SUNMatrix); +static SUNMatrix SUNMatrix_cuSparse_NewEmpty(SUNContext sunctx); +#if CUDART_VERSION >= 11000 +static cusparseStatus_t CreateSpMatDescr(SUNMatrix, cusparseSpMatDescr_t*); +#endif + +#if CUDART_VERSION >= 12000 +#define SPMV_ALG CUSPARSE_SPMV_CSR_ALG1 +#else +#define SPMV_ALG CUSPARSE_MV_ALG_DEFAULT +#endif + +/* Macros for handling the different function names based on precision */ +#if defined(SUNDIALS_DOUBLE_PRECISION) +#define cusparseXcsrmv cusparseDcsrmv +#define CUDA_R_XF CUDA_R_64F +#elif defined(SUNDIALS_SINGLE_PRECISION) +#define cusparseXcsrmv cusparseScsrmv +#define CUDA_R_XF CUDA_R_32F +#endif + +/* Content accessor macros */ +#define SMCU_CONTENT(A) ( (SUNMatrix_Content_cuSparse)(A->content) ) +#define SMCU_ROWS(A) ( SMCU_CONTENT(A)->M ) +#define SMCU_COLUMNS(A) ( SMCU_CONTENT(A)->N ) +#define SMCU_NNZ(A) ( SMCU_CONTENT(A)->NNZ ) +#define SMCU_NBLOCKS(A) ( SMCU_CONTENT(A)->nblocks ) +#define SMCU_BLOCKROWS(A) ( SMCU_CONTENT(A)->blockrows ) +#define SMCU_BLOCKCOLS(A) ( SMCU_CONTENT(A)->blockcols ) +#define SMCU_BLOCKNNZ(A) ( SMCU_CONTENT(A)->blocknnz ) +#define SMCU_NP(A) ( SMCU_CONTENT(A)->NP ) +#define SMCU_SPARSETYPE(A) ( SMCU_CONTENT(A)->sparse_type ) +#define SMCU_OWNMATD(A) ( SMCU_CONTENT(A)->own_matd ) +#define SMCU_DATA(A) ( SMCU_CONTENT(A)->data ) +#define SMCU_DATAp(A) ( (realtype*)SMCU_CONTENT(A)->data->ptr ) +#define SMCU_INDEXVALS(A) ( SMCU_CONTENT(A)->colind ) +#define SMCU_INDEXPTRS(A) ( SMCU_CONTENT(A)->rowptrs ) +#define SMCU_INDEXVALSp(A) ( (int*) SMCU_CONTENT(A)->colind->ptr ) +#define SMCU_INDEXPTRSp(A) ( (int*) SMCU_CONTENT(A)->rowptrs->ptr ) +#define SMCU_MEMHELP(A) ( SMCU_CONTENT(A)->mem_helper ) +#define SMCU_MATDESCR(A) ( SMCU_CONTENT(A)->mat_descr ) +#define SMCU_CUSPHANDLE(A) ( SMCU_CONTENT(A)->cusp_handle ) +#define SMCU_FIXEDPATTERN(A)( SMCU_CONTENT(A)->fixed_pattern ) +#define SMCU_EXECPOLICY(A) ( SMCU_CONTENT(A)->exec_policy ) + + +/* ------------------------------------------------------------------ + * Default execution policy definition. + * + * This policy tries to help us leverage the structure of the matrix. + * It will choose block sizes which are a multiple of the warp size, + * and it will choose a grid size to such that all work elements are + * covered. + * ------------------------------------------------------------------ */ + +class SUNCuSparseMatrixExecPolicy : public ExecPolicy +{ +public: + SUNCuSparseMatrixExecPolicy(const cudaStream_t stream = 0) + : ExecPolicy(stream) + {} + + SUNCuSparseMatrixExecPolicy(const SUNCuSparseMatrixExecPolicy& ex) + : ExecPolicy(ex.stream_) + {} + + virtual size_t gridSize(size_t numWorkElements, size_t blockDim = 0) const + { + return(numWorkElements + blockDim - 1)/blockDim; + } + + virtual size_t blockSize(size_t numWorkElements = 0, size_t gridDim = 0) const + { + return(max_block_size(WARP_SIZE*(numWorkElements + WARP_SIZE - 1)/WARP_SIZE)); + } + + virtual const cudaStream_t* stream() const + { + return(&stream_); + } + + virtual ExecPolicy* clone() const + { + return(static_cast(new SUNCuSparseMatrixExecPolicy(*this))); + } + + static size_t max_block_size(int val) + { + return((val > MAX_BLOCK_SIZE) ? MAX_BLOCK_SIZE : val ); + } +}; + +SUNCuSparseMatrixExecPolicy DEFAULT_EXEC_POLICY; + +/* ------------------------------------------------------------------ + * Constructors. + * ------------------------------------------------------------------ */ + +SUNMatrix SUNMatrix_cuSparse_NewCSR(int M, int N, int NNZ, cusparseHandle_t cusp, SUNContext sunctx) +{ + SUNMemory d_colind, d_rowptr, d_values; + int alloc_fail = 0; + + /* return with NULL matrix on illegal input */ + if ( (M <= 0) || (N <= 0) || (NNZ < 0) ) + { + SUNDIALS_DEBUG_PRINT("ERROR in SUNMatrix_NewCSR_cuSparse: illegal value(s) for M, N, or NNZ\n"); + return(NULL); + } + + SUNMatrix A = SUNMatrix_cuSparse_NewEmpty(sunctx); + if (A == NULL) + { + SUNDIALS_DEBUG_PRINT("ERROR in SUNMatrix_NewCSR_cuSparse: SUNMatrix_cuSparse_NewEmpty returned NULL\n"); + return(NULL); + } + + SMCU_MEMHELP(A) = SUNMemoryHelper_Cuda(sunctx); + if (SMCU_MEMHELP(A) == NULL) + { + SUNDIALS_DEBUG_PRINT("ERROR in SUNMatrix_NewCSR_cuSparse: SUNMemoryHelper_Cuda returned NULL\n"); + SUNMatDestroy(A); + return(NULL); + } + + /* Allocate device memory for the matrix */ + alloc_fail += SUNMemoryHelper_Alloc(SMCU_MEMHELP(A), &d_colind, + sizeof(int)*NNZ, SUNMEMTYPE_DEVICE, + nullptr); + alloc_fail += SUNMemoryHelper_Alloc(SMCU_MEMHELP(A), &d_rowptr, + sizeof(int)*(M+1), SUNMEMTYPE_DEVICE, + nullptr); + alloc_fail += SUNMemoryHelper_Alloc(SMCU_MEMHELP(A), &d_values, + sizeof(realtype)*NNZ, SUNMEMTYPE_DEVICE, + nullptr); + if (alloc_fail) + { + SUNMemoryHelper_Dealloc(SMCU_MEMHELP(A), d_colind, nullptr); + SUNMemoryHelper_Dealloc(SMCU_MEMHELP(A), d_rowptr, nullptr); + SUNMemoryHelper_Dealloc(SMCU_MEMHELP(A), d_values, nullptr); + SUNMatDestroy(A); + return(NULL); + } + + /* Choose sensible defaults */ + cusparseStatus_t cusparse_status = CUSPARSE_STATUS_SUCCESS; + cusparseMatDescr_t mat_descr; + cusparse_status = cusparseCreateMatDescr(&mat_descr); + if (!SUNDIALS_CUSPARSE_VERIFY(cusparse_status)) + { + SUNMemoryHelper_Dealloc(SMCU_MEMHELP(A), d_colind, nullptr); + SUNMemoryHelper_Dealloc(SMCU_MEMHELP(A), d_rowptr, nullptr); + SUNMemoryHelper_Dealloc(SMCU_MEMHELP(A), d_values, nullptr); + SUNMatDestroy(A); + return(NULL); + } + + cusparse_status = cusparseSetMatType(mat_descr, CUSPARSE_MATRIX_TYPE_GENERAL); + if (!SUNDIALS_CUSPARSE_VERIFY(cusparse_status)) + { + SUNMemoryHelper_Dealloc(SMCU_MEMHELP(A), d_colind, nullptr); + SUNMemoryHelper_Dealloc(SMCU_MEMHELP(A), d_rowptr, nullptr); + SUNMemoryHelper_Dealloc(SMCU_MEMHELP(A), d_values, nullptr); + cusparseDestroyMatDescr(mat_descr); + SUNMatDestroy(A); + return(NULL); + } + + cusparse_status = cusparseSetMatIndexBase(mat_descr, CUSPARSE_INDEX_BASE_ZERO); + if (!SUNDIALS_CUSPARSE_VERIFY(cusparse_status)) + { + SUNMemoryHelper_Dealloc(SMCU_MEMHELP(A), d_colind, nullptr); + SUNMemoryHelper_Dealloc(SMCU_MEMHELP(A), d_rowptr, nullptr); + SUNMemoryHelper_Dealloc(SMCU_MEMHELP(A), d_values, nullptr); + cusparseDestroyMatDescr(mat_descr); + SUNMatDestroy(A); + return(NULL); + } + + cudaStream_t stream; + if (!SUNDIALS_CUSPARSE_VERIFY(cusparseGetStream(cusp, &stream))) + { + SUNMemoryHelper_Dealloc(SMCU_MEMHELP(A), d_colind, nullptr); + SUNMemoryHelper_Dealloc(SMCU_MEMHELP(A), d_rowptr, nullptr); + SUNMemoryHelper_Dealloc(SMCU_MEMHELP(A), d_values, nullptr); + cusparseDestroyMatDescr(mat_descr); + SUNMatDestroy(A); + return(NULL); + } + + /* Fill the content */ + SMCU_CONTENT(A)->M = M; + SMCU_CONTENT(A)->N = N; + SMCU_CONTENT(A)->NNZ = NNZ; + SMCU_CONTENT(A)->nblocks = 1; + SMCU_CONTENT(A)->blockrows = M; + SMCU_CONTENT(A)->blockcols = N; + SMCU_CONTENT(A)->blocknnz = NNZ; + SMCU_CONTENT(A)->own_matd = SUNTRUE; + SMCU_CONTENT(A)->matvec_issetup = SUNFALSE; + SMCU_CONTENT(A)->fixed_pattern = SUNFALSE; + SMCU_CONTENT(A)->sparse_type = SUNMAT_CUSPARSE_CSR; + SMCU_CONTENT(A)->colind = d_colind; + SMCU_CONTENT(A)->rowptrs = d_rowptr; + SMCU_CONTENT(A)->data = d_values; + SMCU_CONTENT(A)->mat_descr = mat_descr; + SMCU_CONTENT(A)->cusp_handle = cusp; + SMCU_CONTENT(A)->exec_policy = DEFAULT_EXEC_POLICY.clone_new_stream(stream); + +#if CUDART_VERSION >= 11000 + cusparseSpMatDescr_t spmat_descr; + if (!SUNDIALS_CUSPARSE_VERIFY(CreateSpMatDescr(A, &spmat_descr))) + { + SUNMatDestroy(A); + return(NULL); + } + SMCU_CONTENT(A)->spmat_descr = spmat_descr; + SMCU_CONTENT(A)->dBufferMem = NULL; + SMCU_CONTENT(A)->bufferSize = 0; + SMCU_CONTENT(A)->vecX = NULL; + SMCU_CONTENT(A)->vecY = NULL; +#endif + + return A; +} + + +SUNMatrix SUNMatrix_cuSparse_MakeCSR(cusparseMatDescr_t mat_descr, int M, int N, int NNZ, + int *rowptrs , int *colind , realtype *data, + cusparseHandle_t cusp, SUNContext sunctx) +{ + /* return with NULL matrix on illegal input */ + if ( (M <= 0) || (N <= 0) || (NNZ < 0) ) + { + SUNDIALS_DEBUG_PRINT("ERROR in SUNMatrix_MakeCSR_cuSparse: illegal value(s) for M, N, or NNZ\n"); + return(NULL); + } + + if ( (rowptrs == NULL) || (colind == NULL) || (data == NULL) ) + { + SUNDIALS_DEBUG_PRINT("ERROR in SUNMatrix_MakeCSR_cuSparse: rowptrs, colind, or data is NULL\n"); + return(NULL); + } + + if (cusparseGetMatIndexBase(mat_descr) != CUSPARSE_INDEX_BASE_ZERO) + { + SUNDIALS_DEBUG_PRINT("ERROR in SUNMatrix_MakeCSR_cuSparse: the cusparseMatDescr_t must have index base CUSPARSE_INDEX_BASE_ZERO\n"); + return(NULL); + } + + SUNMatrix A = SUNMatrix_cuSparse_NewEmpty(sunctx); + if (A == NULL) + { + SUNDIALS_DEBUG_PRINT("ERROR in SUNMatrix_MakeCSR_cuSparse: SUNMatrix_cuSparse_NewEmpty returned NULL\n"); + return(NULL); + } + + SMCU_MEMHELP(A) = SUNMemoryHelper_Cuda(sunctx); + if (SMCU_MEMHELP(A) == NULL) + { + SUNDIALS_DEBUG_PRINT("ERROR in SUNMatrix_NewCSR_cuSparse: SUNMemoryHelper_Cuda returned NULL\n"); + SUNMatDestroy(A); + return(NULL); + } + + cudaStream_t stream; + if (!SUNDIALS_CUSPARSE_VERIFY(cusparseGetStream(cusp, &stream))) + { + SUNMatDestroy(A); + return(NULL); + } + + /* Fill content */ + SMCU_CONTENT(A)->M = M; + SMCU_CONTENT(A)->N = N; + SMCU_CONTENT(A)->NNZ = NNZ; + SMCU_CONTENT(A)->nblocks = 1; + SMCU_CONTENT(A)->blockrows = M; + SMCU_CONTENT(A)->blockcols = N; + SMCU_CONTENT(A)->blocknnz = NNZ; + SMCU_CONTENT(A)->own_matd = SUNFALSE; + SMCU_CONTENT(A)->matvec_issetup = SUNFALSE; + SMCU_CONTENT(A)->fixed_pattern = SUNFALSE; + SMCU_CONTENT(A)->sparse_type = SUNMAT_CUSPARSE_CSR; + SMCU_CONTENT(A)->colind = SUNMemoryHelper_Wrap(colind, SUNMEMTYPE_DEVICE); + SMCU_CONTENT(A)->rowptrs = SUNMemoryHelper_Wrap(rowptrs, SUNMEMTYPE_DEVICE); + SMCU_CONTENT(A)->data = SUNMemoryHelper_Wrap(data, SUNMEMTYPE_DEVICE); + SMCU_CONTENT(A)->mat_descr = mat_descr; + SMCU_CONTENT(A)->cusp_handle = cusp; + + SMCU_CONTENT(A)->exec_policy = DEFAULT_EXEC_POLICY.clone_new_stream(stream); + + if (SMCU_CONTENT(A)->colind == NULL || + SMCU_CONTENT(A)->rowptrs == NULL || + SMCU_CONTENT(A)->data == NULL) + { + SUNMatDestroy(A); + return(NULL); + } + +#if CUDART_VERSION >= 11000 + cusparseSpMatDescr_t spmat_descr; + if (!SUNDIALS_CUSPARSE_VERIFY(CreateSpMatDescr(A, &spmat_descr))) + { + SUNMatDestroy(A); + return(NULL); + } + SMCU_CONTENT(A)->spmat_descr = spmat_descr; + SMCU_CONTENT(A)->dBufferMem = NULL; + SMCU_CONTENT(A)->bufferSize = 0; + SMCU_CONTENT(A)->vecX = NULL; + SMCU_CONTENT(A)->vecY = NULL; +#endif + + return(A); +} + + +SUNMatrix SUNMatrix_cuSparse_NewBlockCSR(int nblocks, int blockrows, int blockcols, int blocknnz, cusparseHandle_t cusp, SUNContext sunctx) +{ + SUNMemory d_colind, d_rowptr, d_values; + int M, N, NNZ; + int alloc_fail = 0; + + /* Return with NULL matrix on illegal input */ + if (blockrows != blockcols) + { + SUNDIALS_DEBUG_PRINT("ERROR in SUNMatrix_cuSparse_NewBlockCSR: matrix must be square for the BCSR format\n"); + return(NULL); + } + + M = nblocks * blockrows; + N = M; + NNZ = nblocks * blocknnz; + + /* Return with NULL matrix on illegal input */ + if ( (M <= 0) || (N <= 0) || (NNZ < 0) ) + { + SUNDIALS_DEBUG_PRINT("ERROR in SUNMatrix_cuSparse_NewBlockCSR: illegal value(s) for M, N, or NNZ\n"); + return(NULL); + } + + /* Allocate the SUNMatrix object */ + SUNMatrix A = SUNMatrix_cuSparse_NewEmpty(sunctx); + if (A == NULL) + { + SUNDIALS_DEBUG_PRINT("ERROR in SUNMatrix_cuSparse_NewBlockCSR: SUNMatrix_cuSparse_NewEmpty returned NULL\n"); + return(NULL); + } + + SMCU_MEMHELP(A) = SUNMemoryHelper_Cuda(sunctx); + if (SMCU_MEMHELP(A) == NULL) + { + SUNDIALS_DEBUG_PRINT("ERROR in SUNMatrix_NewCSR_cuSparse: SUNMemoryHelper_Cuda returned NULL\n"); + SUNMatDestroy(A); + return(NULL); + } + + /* Allocate device memory for the matrix */ + alloc_fail += SUNMemoryHelper_Alloc(SMCU_MEMHELP(A), &d_colind, + sizeof(int)*blocknnz, SUNMEMTYPE_DEVICE, + nullptr); + alloc_fail += SUNMemoryHelper_Alloc(SMCU_MEMHELP(A), &d_rowptr, + sizeof(int)*(blockrows + 1), + SUNMEMTYPE_DEVICE, nullptr); + alloc_fail += SUNMemoryHelper_Alloc(SMCU_MEMHELP(A), &d_values, + sizeof(realtype)*blocknnz*nblocks, + SUNMEMTYPE_DEVICE, nullptr); + if (alloc_fail) + { + SUNMemoryHelper_Dealloc(SMCU_MEMHELP(A), d_colind, nullptr); + SUNMemoryHelper_Dealloc(SMCU_MEMHELP(A), d_rowptr, nullptr); + SUNMemoryHelper_Dealloc(SMCU_MEMHELP(A), d_values, nullptr); + SUNMatDestroy(A); + return(NULL); + } + + /* Choose sensible defaults */ + cusparseStatus_t cusparse_status = CUSPARSE_STATUS_SUCCESS; + cusparseMatDescr_t mat_descr; + cusparse_status = cusparseCreateMatDescr(&mat_descr); + if (!SUNDIALS_CUSPARSE_VERIFY(cusparse_status)) + { + SUNMemoryHelper_Dealloc(SMCU_MEMHELP(A), d_colind, nullptr); + SUNMemoryHelper_Dealloc(SMCU_MEMHELP(A), d_rowptr, nullptr); + SUNMemoryHelper_Dealloc(SMCU_MEMHELP(A), d_values, nullptr); + SUNMatDestroy(A); + return(NULL); + } + + cusparse_status = cusparseSetMatType(mat_descr, CUSPARSE_MATRIX_TYPE_GENERAL); + if (!SUNDIALS_CUSPARSE_VERIFY(cusparse_status)) + { + SUNMemoryHelper_Dealloc(SMCU_MEMHELP(A), d_colind, nullptr); + SUNMemoryHelper_Dealloc(SMCU_MEMHELP(A), d_rowptr, nullptr); + SUNMemoryHelper_Dealloc(SMCU_MEMHELP(A), d_values, nullptr); + cusparseDestroyMatDescr(mat_descr); + SUNMatDestroy(A); + return(NULL); + } + + cusparse_status = cusparseSetMatIndexBase(mat_descr, CUSPARSE_INDEX_BASE_ZERO); + if (!SUNDIALS_CUSPARSE_VERIFY(cusparse_status)) + { + SUNMemoryHelper_Dealloc(SMCU_MEMHELP(A), d_colind, nullptr); + SUNMemoryHelper_Dealloc(SMCU_MEMHELP(A), d_rowptr, nullptr); + SUNMemoryHelper_Dealloc(SMCU_MEMHELP(A), d_values, nullptr); + cusparseDestroyMatDescr(mat_descr); + SUNMatDestroy(A); + return(NULL); + } + + cudaStream_t stream; + if (!SUNDIALS_CUSPARSE_VERIFY(cusparseGetStream(cusp, &stream))) + { + SUNMemoryHelper_Dealloc(SMCU_MEMHELP(A), d_colind, nullptr); + SUNMemoryHelper_Dealloc(SMCU_MEMHELP(A), d_rowptr, nullptr); + SUNMemoryHelper_Dealloc(SMCU_MEMHELP(A), d_values, nullptr); + cusparseDestroyMatDescr(mat_descr); + SUNMatDestroy(A); + return(NULL); + } + + /* Fill the content */ + SMCU_CONTENT(A)->M = M; + SMCU_CONTENT(A)->N = N; + SMCU_CONTENT(A)->NNZ = NNZ; + SMCU_CONTENT(A)->nblocks = nblocks; + SMCU_CONTENT(A)->blockrows = blockrows; + SMCU_CONTENT(A)->blockcols = blockrows; + SMCU_CONTENT(A)->blocknnz = blocknnz; + SMCU_CONTENT(A)->own_matd = SUNTRUE; + SMCU_CONTENT(A)->matvec_issetup = SUNFALSE; + SMCU_CONTENT(A)->cusp_handle = cusp; + SMCU_CONTENT(A)->fixed_pattern = SUNFALSE; + SMCU_CONTENT(A)->sparse_type = SUNMAT_CUSPARSE_BCSR; + SMCU_CONTENT(A)->colind = d_colind; + SMCU_CONTENT(A)->rowptrs = d_rowptr; + SMCU_CONTENT(A)->data = d_values; + SMCU_CONTENT(A)->mat_descr = mat_descr; + SMCU_CONTENT(A)->exec_policy = DEFAULT_EXEC_POLICY.clone_new_stream(stream); + +#if CUDART_VERSION >= 11000 + cusparseSpMatDescr_t spmat_descr; + if (!SUNDIALS_CUSPARSE_VERIFY(CreateSpMatDescr(A, &spmat_descr))) + { + SUNMatDestroy(A); + return(NULL); + } + SMCU_CONTENT(A)->spmat_descr = spmat_descr; + SMCU_CONTENT(A)->dBufferMem = NULL; + SMCU_CONTENT(A)->bufferSize = 0; + SMCU_CONTENT(A)->vecX = NULL; + SMCU_CONTENT(A)->vecY = NULL; +#endif + + return(A); +} + +/* ------------------------------------------------------------------ + * Implementation specific routines. + * ------------------------------------------------------------------ */ + +int SUNMatrix_cuSparse_SparseType(SUNMatrix A) +{ + if (SUNMatGetID(A) == SUNMATRIX_CUSPARSE) + return(SMCU_SPARSETYPE(A)); + else + return(SUNMAT_ILL_INPUT); +} + +int SUNMatrix_cuSparse_Rows(SUNMatrix A) +{ + if (SUNMatGetID(A) == SUNMATRIX_CUSPARSE) + return(SMCU_ROWS(A)); + else + return(SUNMAT_ILL_INPUT); +} + +int SUNMatrix_cuSparse_Columns(SUNMatrix A) +{ + if (SUNMatGetID(A) == SUNMATRIX_CUSPARSE) + return(SMCU_COLUMNS(A)); + else + return(SUNMAT_ILL_INPUT); +} + +int SUNMatrix_cuSparse_NNZ(SUNMatrix A) +{ + if (SUNMatGetID(A) == SUNMATRIX_CUSPARSE) + return(SMCU_NNZ(A)); + else + return(SUNMAT_ILL_INPUT); +} + +int* SUNMatrix_cuSparse_IndexPointers(SUNMatrix A) +{ + if (SUNMatGetID(A) == SUNMATRIX_CUSPARSE) + return(SMCU_INDEXPTRSp(A)); + else + return(NULL); +} + +int* SUNMatrix_cuSparse_IndexValues(SUNMatrix A) +{ + if (SUNMatGetID(A) == SUNMATRIX_CUSPARSE) + return(SMCU_INDEXVALSp(A)); + else + return(NULL); +} + +realtype* SUNMatrix_cuSparse_Data(SUNMatrix A) +{ + if (SUNMatGetID(A) == SUNMATRIX_CUSPARSE) + return(SMCU_DATAp(A)); + else + return(NULL); +} + +int SUNMatrix_cuSparse_NumBlocks(SUNMatrix A) +{ + if (SUNMatGetID(A) == SUNMATRIX_CUSPARSE) + return(SMCU_NBLOCKS(A)); + else + return(SUNMAT_ILL_INPUT); +} + +int SUNMatrix_cuSparse_BlockRows(SUNMatrix A) +{ + if (SUNMatGetID(A) == SUNMATRIX_CUSPARSE) + return(SMCU_BLOCKROWS(A)); + else + return(SUNMAT_ILL_INPUT); +} + +int SUNMatrix_cuSparse_BlockColumns(SUNMatrix A) +{ + if (SUNMatGetID(A) == SUNMATRIX_CUSPARSE) + return(SMCU_BLOCKCOLS(A)); + else + return(SUNMAT_ILL_INPUT); +} + +int SUNMatrix_cuSparse_BlockNNZ(SUNMatrix A) +{ + if (SUNMatGetID(A) == SUNMATRIX_CUSPARSE) + return(SMCU_BLOCKNNZ(A)); + else + return(SUNMAT_ILL_INPUT); +} + +realtype* SUNMatrix_cuSparse_BlockData(SUNMatrix A, int blockidx) +{ + realtype *matdata; + int offset; + + if (SUNMatGetID(A) != SUNMATRIX_CUSPARSE) + return(NULL); + + if (blockidx >= SMCU_NBLOCKS(A)) + return(NULL); + + matdata = SMCU_DATAp(A); + offset = SMCU_BLOCKNNZ(A)*blockidx; + + return(&matdata[offset]); +} + +cusparseMatDescr_t SUNMatrix_cuSparse_MatDescr(SUNMatrix A) +{ + if (SUNMatGetID(A) == SUNMATRIX_CUSPARSE) + return(SMCU_MATDESCR(A)); + else + return(NULL); +} + +int SUNMatrix_cuSparse_SetFixedPattern(SUNMatrix A, booleantype yesno) +{ + if (SUNMatGetID(A) != SUNMATRIX_CUSPARSE) + return(SUNMAT_ILL_INPUT); + + SMCU_FIXEDPATTERN(A) = yesno; + + return(SUNMAT_SUCCESS); +} + + +int SUNMatrix_cuSparse_SetKernelExecPolicy(SUNMatrix A, SUNCudaExecPolicy* exec_policy) +{ + if (SUNMatGetID(A) != SUNMATRIX_CUSPARSE) + return(SUNMAT_ILL_INPUT); + + /* Reset to the default policy if the new one is NULL */ + delete SMCU_EXECPOLICY(A); + if (exec_policy) + SMCU_EXECPOLICY(A) = exec_policy->clone(); + else + SMCU_EXECPOLICY(A) = DEFAULT_EXEC_POLICY.clone_new_stream(*SMCU_EXECPOLICY(A)->stream()); + + return(SUNMAT_SUCCESS); +} + + +int SUNMatrix_cuSparse_CopyToDevice(SUNMatrix dA, realtype* h_data, + int* h_idxptrs, int* h_idxvals) +{ + int retval; + SUNMemory _h_data, _h_idxptrs, _h_idxvals; + const cudaStream_t* stream; + int nidxvals, nidxptrs; + + if (SUNMatGetID(dA) != SUNMATRIX_CUSPARSE) + return(SUNMAT_ILL_INPUT); + + stream = SMCU_EXECPOLICY(dA)->stream(); + + if (h_data != NULL) + { + _h_data = SUNMemoryHelper_Wrap(h_data, SUNMEMTYPE_HOST); + retval = SUNMemoryHelper_CopyAsync(SMCU_MEMHELP(dA), + SMCU_DATA(dA), + _h_data, + SMCU_NNZ(dA)*sizeof(realtype), + (void*) stream); + SUNMemoryHelper_Dealloc(SMCU_MEMHELP(dA), _h_data, nullptr); + if (retval != 0) return(SUNMAT_OPERATION_FAIL); + } + + switch(SMCU_SPARSETYPE(dA)) + { + case SUNMAT_CUSPARSE_CSR: + nidxptrs = SMCU_ROWS(dA)+1; + nidxvals = SMCU_NNZ(dA); + break; + case SUNMAT_CUSPARSE_BCSR: + nidxptrs = SMCU_BLOCKROWS(dA)+1; + nidxvals = SMCU_BLOCKNNZ(dA); + break; + default: + SUNDIALS_DEBUG_PRINT("ERROR in SUNMatrix_cuSparse_CopyToDevice: unrecognized sparse type\n"); + return(SUNMAT_ILL_INPUT); + } + + if (h_idxptrs != NULL) + { + _h_idxptrs = SUNMemoryHelper_Wrap(h_idxptrs, SUNMEMTYPE_HOST); + retval = SUNMemoryHelper_CopyAsync(SMCU_MEMHELP(dA), + SMCU_INDEXPTRS(dA), + _h_idxptrs, + nidxptrs*sizeof(int), + (void*) stream); + SUNMemoryHelper_Dealloc(SMCU_MEMHELP(dA), _h_idxptrs, nullptr); + if (retval != 0) return(SUNMAT_OPERATION_FAIL); + } + + if (h_idxvals != NULL) + { + _h_idxvals = SUNMemoryHelper_Wrap(h_idxvals, SUNMEMTYPE_HOST); + retval = SUNMemoryHelper_CopyAsync(SMCU_MEMHELP(dA), + SMCU_INDEXVALS(dA), + _h_idxvals, + nidxvals*sizeof(int), + (void*) stream); + SUNMemoryHelper_Dealloc(SMCU_MEMHELP(dA), _h_idxvals, nullptr); + if (retval != 0) return(SUNMAT_OPERATION_FAIL); + } + + return(SUNMAT_SUCCESS); +} + + +int SUNMatrix_cuSparse_CopyFromDevice(SUNMatrix dA, realtype* h_data, + int* h_idxptrs, int* h_idxvals) +{ + int retval; + SUNMemory _h_data, _h_idxptrs, _h_idxvals; + const cudaStream_t* stream; + int nidxvals, nidxptrs; + + if (SUNMatGetID(dA) != SUNMATRIX_CUSPARSE) + return(SUNMAT_ILL_INPUT); + + stream = SMCU_EXECPOLICY(dA)->stream(); + + if (h_data != NULL) + { + _h_data = SUNMemoryHelper_Wrap(h_data, SUNMEMTYPE_HOST); + retval = SUNMemoryHelper_CopyAsync(SMCU_MEMHELP(dA), + _h_data, + SMCU_DATA(dA), + SMCU_NNZ(dA)*sizeof(realtype), + (void*) stream); + SUNMemoryHelper_Dealloc(SMCU_MEMHELP(dA), _h_data, nullptr); + if (retval != 0) return(SUNMAT_OPERATION_FAIL); + } + + + switch(SMCU_SPARSETYPE(dA)) + { + case SUNMAT_CUSPARSE_CSR: + nidxptrs = SMCU_ROWS(dA)+1; + nidxvals = SMCU_NNZ(dA); + case SUNMAT_CUSPARSE_BCSR: + nidxptrs = SMCU_BLOCKROWS(dA)+1; + nidxvals = SMCU_BLOCKNNZ(dA); + } + + if (h_idxptrs != NULL) + { + _h_idxptrs = SUNMemoryHelper_Wrap(h_idxptrs, SUNMEMTYPE_HOST); + retval = SUNMemoryHelper_CopyAsync(SMCU_MEMHELP(dA), + _h_idxptrs, + SMCU_INDEXPTRS(dA), + nidxptrs*sizeof(int), + (void*) stream); + SUNMemoryHelper_Dealloc(SMCU_MEMHELP(dA), _h_idxptrs, nullptr); + if (retval != 0) return(SUNMAT_OPERATION_FAIL); + } + + if (h_idxvals != NULL) + { + _h_idxvals = SUNMemoryHelper_Wrap(h_idxvals, SUNMEMTYPE_HOST); + retval = SUNMemoryHelper_CopyAsync(SMCU_MEMHELP(dA), + _h_idxvals, + SMCU_INDEXVALS(dA), + nidxvals*sizeof(int), + (void*) stream); + SUNMemoryHelper_Dealloc(SMCU_MEMHELP(dA), _h_idxvals, nullptr); + if (retval != 0) return(SUNMAT_OPERATION_FAIL); + } + + + return(SUNMAT_SUCCESS); +} + +/* + * ----------------------------------------------------------------- + * implementation of matrix operations + * ----------------------------------------------------------------- + */ + + +SUNMatrix_ID SUNMatGetID_cuSparse(SUNMatrix A) +{ + return(SUNMATRIX_CUSPARSE); +} + +/* Returns a new matrix allocated to have the same structure as A, + but it does not copy any nonzeros, column vals, or row pointers. */ +SUNMatrix SUNMatClone_cuSparse(SUNMatrix A) +{ + SUNMatrix B; + + switch (SMCU_SPARSETYPE(A)) + { + case SUNMAT_CUSPARSE_CSR: + B = SUNMatrix_cuSparse_NewCSR(SMCU_ROWS(A), SMCU_COLUMNS(A), SMCU_NNZ(A), + SMCU_CUSPHANDLE(A), A->sunctx); + break; + case SUNMAT_CUSPARSE_BCSR: + B = SUNMatrix_cuSparse_NewBlockCSR(SMCU_NBLOCKS(A), SMCU_BLOCKROWS(A), SMCU_BLOCKCOLS(A), + SMCU_BLOCKNNZ(A), SMCU_CUSPHANDLE(A), A->sunctx); + break; + default: + SUNDIALS_DEBUG_PRINT("ERROR in SUNMatClone_cuSparse: sparse type not recognized\n"); + B = NULL; + } + + SMCU_FIXEDPATTERN(B) = SMCU_FIXEDPATTERN(A); + delete SMCU_EXECPOLICY(B); + SMCU_EXECPOLICY(B) = SMCU_EXECPOLICY(A)->clone(); + + return(B); +} + + +/* Deallocates the SUNMatrix object and all data it owns */ +void SUNMatDestroy_cuSparse(SUNMatrix A) +{ + if (A == NULL) return; + + /* free content */ + if (A->content != NULL) + { + if (SMCU_MEMHELP(A)) + { + SUNMemoryHelper_Dealloc(SMCU_MEMHELP(A), SMCU_DATA(A), nullptr); + SUNMemoryHelper_Dealloc(SMCU_MEMHELP(A), SMCU_INDEXPTRS(A), nullptr); + SUNMemoryHelper_Dealloc(SMCU_MEMHELP(A), SMCU_INDEXVALS(A), nullptr); + } + else + { + SUNDIALS_DEBUG_PRINT("WARNING in SUNMatDestroy_cuSparse: mem_helper was NULL when trying to dealloc data, this could result in a memory leak\n"); + } + + if (SMCU_OWNMATD(A)) + { + /* free cusparseMatDescr_t */ + SUNDIALS_CUSPARSE_VERIFY( cusparseDestroyMatDescr(SMCU_MATDESCR(A)) ); + } + +#if CUDART_VERSION >= 11000 + SUNDIALS_CUSPARSE_VERIFY( cusparseDestroyDnVec(SMCU_CONTENT(A)->vecX) ); + SUNDIALS_CUSPARSE_VERIFY( cusparseDestroyDnVec(SMCU_CONTENT(A)->vecY) ); + SUNDIALS_CUSPARSE_VERIFY( cusparseDestroySpMat(SMCU_CONTENT(A)->spmat_descr) ); + SUNMemoryHelper_Dealloc(SMCU_MEMHELP(A), SMCU_CONTENT(A)->dBufferMem, + nullptr); +#endif + + if (SMCU_EXECPOLICY(A)) + { + delete SMCU_EXECPOLICY(A); + SMCU_EXECPOLICY(A) = NULL; + } + + SUNMemoryHelper_Destroy(SMCU_MEMHELP(A)); + + /* free content struct */ + free(A->content); + A->content = NULL; + } + + /* free ops and matrix */ + if (A->ops) { free(A->ops); A->ops = NULL; } + free(A); A = NULL; + + return; +} + + +/* Performs A_ij = 0 */ +int SUNMatZero_cuSparse(SUNMatrix A) +{ + cudaError_t cuerr; + cudaStream_t stream; + + stream = *SMCU_EXECPOLICY(A)->stream(); + + /* set all data to zero */ + cuerr = cudaMemsetAsync(SMCU_DATAp(A), 0, SMCU_NNZ(A)*sizeof(realtype), stream); + if (!SUNDIALS_CUDA_VERIFY(cuerr)) return(SUNMAT_OPERATION_FAIL); + + /* set all rowptrs to zero unless the sparsity pattern is fixed */ + if (!SMCU_FIXEDPATTERN(A)) + { + cuerr = cudaMemsetAsync(SMCU_INDEXPTRSp(A), 0, + (SMCU_BLOCKROWS(A)+1)*sizeof(int), + stream); + if (!SUNDIALS_CUDA_VERIFY(cuerr)) return(SUNMAT_OPERATION_FAIL); + + /* set all colind to zero */ + cuerr = cudaMemsetAsync(SMCU_INDEXVALSp(A), 0, + SMCU_BLOCKNNZ(A)*sizeof(int), + stream); + if (!SUNDIALS_CUDA_VERIFY(cuerr)) return(SUNMAT_OPERATION_FAIL); + } + + return(SUNMAT_SUCCESS); +} + + +/* Copies the nonzeros, column vals, and row pointers into dst */ +int SUNMatCopy_cuSparse(SUNMatrix src, SUNMatrix dst) +{ + int retval; + const cudaStream_t* stream; + + /* Verify that src and dst are compatible */ + if (!SMCompatible_cuSparse(src, dst)) + return(SUNMAT_ILL_INPUT); + + stream = SMCU_EXECPOLICY(src)->stream(); + + /* Ensure that dst is allocated with at least as + much memory as we have nonzeros in src */ + if (SMCU_NNZ(dst) < SMCU_NNZ(src)) + { + SUNDIALS_DEBUG_PRINT("ERROR in SUNMatCopy_cuSparse: the destination matrix has less nonzeros than the source\n"); + return(SUNMAT_ILL_INPUT); + } + + /* Zero out dst so that copy works correctly */ + if (SUNMatZero_cuSparse(dst) != SUNMAT_SUCCESS) + { + SUNDIALS_DEBUG_PRINT("ERROR in SUNMatCopy_cuSparse: SUNMatZero_cuSparse failed\n"); + return(SUNMAT_OPERATION_FAIL); + } + + /* Copy the data over */ + retval = SUNMemoryHelper_CopyAsync(SMCU_MEMHELP(src), + SMCU_DATA(dst), + SMCU_DATA(src), + SMCU_NNZ(src)*sizeof(realtype), + (void*) stream); + if (retval) return(SUNMAT_OPERATION_FAIL); + + /* Copy the row pointers over */ + retval = SUNMemoryHelper_CopyAsync(SMCU_MEMHELP(src), + SMCU_INDEXPTRS(dst), + SMCU_INDEXPTRS(src), + (SMCU_BLOCKROWS(src)+1)*sizeof(int), + (void*) stream); + if (retval) return(SUNMAT_OPERATION_FAIL); + + /* Copy the column indices over */ + retval = SUNMemoryHelper_CopyAsync(SMCU_MEMHELP(src), + SMCU_INDEXVALS(dst), + SMCU_INDEXVALS(src), + SMCU_BLOCKNNZ(src)*sizeof(int), + (void*) stream); + if (retval) return(SUNMAT_OPERATION_FAIL); + + return(SUNMAT_SUCCESS); +} + + +/* Performs A = cA + I. Requires the diagonal to be allocated already. */ +int SUNMatScaleAddI_cuSparse(realtype c, SUNMatrix A) +{ + unsigned threadsPerBlock, gridSize; + cudaStream_t stream = *SMCU_EXECPOLICY(A)->stream(); + + switch (SMCU_SPARSETYPE(A)) + { + case SUNMAT_CUSPARSE_CSR: + /* Choose the grid size to be the number of rows in the matrix, + and then choose threadsPerBlock to be a multiple of the warp size + that results in enough threads to have one per 2 columns. */ + threadsPerBlock = SMCU_EXECPOLICY(A)->blockSize(SMCU_COLUMNS(A)/2); + gridSize = SMCU_EXECPOLICY(A)->gridSize(SMCU_ROWS(A)*SMCU_COLUMNS(A)/2, threadsPerBlock); + scaleAddIKernelCSR + <<>>(SMCU_ROWS(A), + c, + SMCU_DATAp(A), + SMCU_INDEXPTRSp(A), + SMCU_INDEXVALSp(A)); + break; + case SUNMAT_CUSPARSE_BCSR: + /* Choose the grid size to be the number of blocks in the matrix, + and then choose threadsPerBlock to be a multiple of the warp size + that results in enough threads to have one per row of the block. */ + threadsPerBlock = SMCU_EXECPOLICY(A)->blockSize(SMCU_BLOCKROWS(A)); + gridSize = SMCU_EXECPOLICY(A)->gridSize(SMCU_NBLOCKS(A)*SMCU_BLOCKROWS(A), threadsPerBlock); + scaleAddIKernelBCSR + <<>>(SMCU_BLOCKROWS(A), + SMCU_NBLOCKS(A), + SMCU_BLOCKNNZ(A), + c, + SMCU_DATAp(A), + SMCU_INDEXPTRSp(A), + SMCU_INDEXVALSp(A)); + break; + default: + SUNDIALS_DEBUG_PRINT("ERROR in SUNMatScaleAddI_cuSparse: sparse type not recognized\n"); + return(SUNMAT_ILL_INPUT); + } + +#ifdef SUNDIALS_DEBUG_CUDA_LASTERROR + cudaDeviceSynchronize(); + if (!SUNDIALS_CUDA_VERIFY(cudaGetLastError())) return(SUNMAT_OPERATION_FAIL); +#endif + + return(SUNMAT_SUCCESS); +} + + +/* Performs A = cA + B */ +int SUNMatScaleAdd_cuSparse(realtype c, SUNMatrix A, SUNMatrix B) +{ + cudaStream_t stream; + unsigned threadsPerBlock, gridSize; + + if (!SMCompatible_cuSparse(A, B)) + { + SUNDIALS_DEBUG_PRINT("ERROR in SUNMatScaleAdd_cuSparse: SUNMatScaleAdd_cuSparse failed\n"); + return(SUNMAT_ILL_INPUT); + } + + stream = *SMCU_EXECPOLICY(A)->stream(); + + switch (SMCU_SPARSETYPE(A)) + { + case SUNMAT_CUSPARSE_CSR: + /* Choose the grid size to be the number of rows in the matrix, + and then choose threadsPerBlock to be a multiple of the warp size + that results in enough threads to have one per 2 columns. */ + threadsPerBlock = SMCU_EXECPOLICY(A)->blockSize(SMCU_COLUMNS(A)/2); + gridSize = SMCU_EXECPOLICY(A)->gridSize(SMCU_ROWS(A)*SMCU_COLUMNS(A)/2, threadsPerBlock); + scaleAddKernelCSR + <<>>(SMCU_NNZ(A), + c, + SMCU_DATAp(A), + SMCU_DATAp(B)); + break; + case SUNMAT_CUSPARSE_BCSR: + /* Choose the grid size to be the number of blocks in the matrix, + and then choose threadsPerBlock to be a multiple of the warp size + that results in enough threads to have one per row of the block. */ + threadsPerBlock = SMCU_EXECPOLICY(A)->blockSize(SMCU_BLOCKROWS(A)); + gridSize = SMCU_EXECPOLICY(A)->gridSize(SMCU_NBLOCKS(A)*SMCU_BLOCKROWS(A), threadsPerBlock); + scaleAddKernelCSR + <<>>(SMCU_NNZ(A), + c, + SMCU_DATAp(A), + SMCU_DATAp(B)); + break; + default: + SUNDIALS_DEBUG_PRINT("ERROR in SUNMatScaleAdd_cuSparse: sparse type not recognized\n"); + return(SUNMAT_ILL_INPUT); + } + +#ifdef SUNDIALS_DEBUG_CUDA_LASTERROR + cudaDeviceSynchronize(); + if (!SUNDIALS_CUDA_VERIFY(cudaGetLastError())) return(SUNMAT_OPERATION_FAIL); +#endif + + return(SUNMAT_SUCCESS); +} + +/* Setup buffers needed for Matvec */ +int SUNMatMatvecSetup_cuSparse(SUNMatrix A) +{ +#if CUDART_VERSION >= 11000 + realtype placeholder[1]; + const realtype one = ONE; + + /* Check if setup has already been done */ + if (!(SMCU_CONTENT(A)->matvec_issetup)) + { + SUNDIALS_CUSPARSE_VERIFY( cusparseCreateDnVec(&SMCU_CONTENT(A)->vecX, + SMCU_COLUMNS(A), + placeholder, CUDA_R_XF) ); + SUNDIALS_CUSPARSE_VERIFY( cusparseCreateDnVec(&SMCU_CONTENT(A)->vecY, + SMCU_ROWS(A), + placeholder, CUDA_R_XF) ); + + SUNDIALS_CUSPARSE_VERIFY( + cusparseSpMV_bufferSize(SMCU_CUSPHANDLE(A), + CUSPARSE_OPERATION_NON_TRANSPOSE, + &one, SMCU_CONTENT(A)->spmat_descr, + SMCU_CONTENT(A)->vecX, &one, SMCU_CONTENT(A)->vecY, + CUDA_R_XF, SPMV_ALG, + &SMCU_CONTENT(A)->bufferSize) ); + + if ( SUNMemoryHelper_Alloc(SMCU_MEMHELP(A), &SMCU_CONTENT(A)->dBufferMem, + SMCU_CONTENT(A)->bufferSize, SUNMEMTYPE_DEVICE, + nullptr) ) + return(SUNMAT_OPERATION_FAIL); + } +#endif + SMCU_CONTENT(A)->matvec_issetup = SUNTRUE; + return(SUNMAT_SUCCESS); +} + +/* Perform y = Ax */ +int SUNMatMatvec_cuSparse(SUNMatrix A, N_Vector x, N_Vector y) +{ + /* Verify that the dimensions of A, x, and y agree */ + if ( (SMCU_COLUMNS(A) != N_VGetLength(x)) || + (SMCU_ROWS(A) != N_VGetLength(y)) ) + { + SUNDIALS_DEBUG_PRINT("ERROR in SUNMatMatvec_cuSparse: dimensions do not agree\n"); + return(SUNMAT_ILL_INPUT); + } + + realtype *d_xdata = N_VGetDeviceArrayPointer(x); + realtype *d_ydata = N_VGetDeviceArrayPointer(y); + + if (SMCU_SPARSETYPE(A) == SUNMAT_CUSPARSE_CSR) + { + const realtype one = ONE; + + /* Zero result vector */ + N_VConst(ZERO, y); + +#if CUDART_VERSION >= 11000 + { + /* Setup matvec if it has not been done yet */ + if (!SMCU_CONTENT(A)->matvec_issetup && SUNMatMatvecSetup_cuSparse(A)) + { + return(SUNMAT_OPERATION_FAIL); + } + + SUNDIALS_CUSPARSE_VERIFY( cusparseDnVecSetValues(SMCU_CONTENT(A)->vecX, + d_xdata) ); + SUNDIALS_CUSPARSE_VERIFY( cusparseDnVecSetValues(SMCU_CONTENT(A)->vecY, + d_ydata) ); + + SUNDIALS_CUSPARSE_VERIFY( cusparseSpMV(SMCU_CUSPHANDLE(A), + CUSPARSE_OPERATION_NON_TRANSPOSE, + &one, SMCU_CONTENT(A)->spmat_descr, + SMCU_CONTENT(A)->vecX, &one, + SMCU_CONTENT(A)->vecY, CUDA_R_XF, + SPMV_ALG, + SMCU_CONTENT(A)->dBufferMem->ptr) ); + } +#else + SUNDIALS_CUSPARSE_VERIFY( + cusparseXcsrmv(SMCU_CUSPHANDLE(A), CUSPARSE_OPERATION_NON_TRANSPOSE, + SMCU_ROWS(A), SMCU_COLUMNS(A), SMCU_NNZ(A), + &one, SMCU_MATDESCR(A), SMCU_DATAp(A), SMCU_INDEXPTRSp(A), + SMCU_INDEXVALSp(A), d_xdata, &one, d_ydata) ); +#endif + } + else if (SMCU_SPARSETYPE(A) == SUNMAT_CUSPARSE_BCSR) + { + cudaStream_t stream; + unsigned gridSize, threadsPerBlock; + + stream = *SMCU_EXECPOLICY(A)->stream(); + + /* Choose the grid size to be the number of blocks in the matrix, + and then choose threadsPerBlock to be a multiple of the warp size + that results in enough threads to have one per row of the block. */ + threadsPerBlock = SMCU_EXECPOLICY(A)->blockSize(SMCU_COLUMNS(A)/2); + gridSize = SMCU_EXECPOLICY(A)->gridSize(SMCU_ROWS(A)*SMCU_COLUMNS(A)/2, threadsPerBlock); + matvecBCSR + <<>>(SMCU_BLOCKROWS(A), + SMCU_NBLOCKS(A), + SMCU_BLOCKNNZ(A), + SMCU_DATAp(A), + SMCU_INDEXPTRSp(A), + SMCU_INDEXVALSp(A), + d_xdata, + d_ydata); + } + else + { + SUNDIALS_DEBUG_PRINT("ERROR in SUNMatMatvec_cuSparse: sparse type not recognized\n"); + return(SUNMAT_ILL_INPUT); + } + +#ifdef SUNDIALS_DEBUG_CUDA_LASTERROR + cudaDeviceSynchronize(); + if (!SUNDIALS_CUDA_VERIFY(cudaGetLastError())) return(SUNMAT_OPERATION_FAIL); +#endif + + return(SUNMAT_SUCCESS); +} + + +/* + * ================================================================= + * private functions + * ================================================================= + */ + + +/* ----------------------------------------------------------------- + * Function to check compatibility of two sparse SUNMatrix objects + */ +static booleantype SMCompatible_cuSparse(SUNMatrix A, SUNMatrix B) +{ + /* both matrices must be sparse */ + if ( (SUNMatGetID(A) != SUNMATRIX_CUSPARSE) || + (SUNMatGetID(B) != SUNMATRIX_CUSPARSE) ) + return(SUNFALSE); + + /* both matrices must have the same shape and sparsity type */ + if (SMCU_ROWS(A) != SMCU_ROWS(B)) + return(SUNFALSE); + if (SMCU_COLUMNS(A) != SMCU_COLUMNS(B)) + return(SUNFALSE); + if (SMCU_SPARSETYPE(A) != SMCU_SPARSETYPE(B)) + return(SUNFALSE); + + return(SUNTRUE); +} + +/* ----------------------------------------------------------------- + * Function to create empty SUNMatrix with ops attached and + * the content structure allocated. + */ +SUNMatrix SUNMatrix_cuSparse_NewEmpty(SUNContext sunctx) +{ + /* Create an empty matrix object */ + SUNMatrix A = NULL; + A = SUNMatNewEmpty(sunctx); + if (A == NULL) + { + SUNDIALS_DEBUG_PRINT("ERROR in SUNMatrix_cuSparse_NewEmpty: SUNMatNewEmpty failed\n"); + return(NULL); + } + + /* Attach operations */ + A->ops->getid = SUNMatGetID_cuSparse; + A->ops->clone = SUNMatClone_cuSparse; + A->ops->destroy = SUNMatDestroy_cuSparse; + A->ops->zero = SUNMatZero_cuSparse; + A->ops->copy = SUNMatCopy_cuSparse; + A->ops->scaleadd = SUNMatScaleAdd_cuSparse; + A->ops->scaleaddi = SUNMatScaleAddI_cuSparse; + A->ops->matvecsetup = SUNMatMatvecSetup_cuSparse; + A->ops->matvec = SUNMatMatvec_cuSparse; + + /* Create content */ + SUNMatrix_Content_cuSparse content = NULL; + content = (SUNMatrix_Content_cuSparse) malloc(sizeof *content); + if (content == NULL) + { + SUNDIALS_DEBUG_PRINT("ERROR in SUNMatrix_cuSparse_NewEmpty: failed to malloc content\n"); + SUNMatDestroy(A); + return(NULL); + } + + /* Attach content */ + A->content = content; + content->mem_helper = NULL; + + return(A); +} + +#if CUDART_VERSION >= 11000 +cusparseStatus_t CreateSpMatDescr(SUNMatrix A, cusparseSpMatDescr_t *spmat_descr) +{ + /* CUDA 11 introduced the "Generic API" and removed the cusparseXcsrmv that + works on the old cusparseMatDescr_t and raw data arrays. However, + cuSolverSp stuff requires the cusparseMatDescr_t still. So, we have to + create this cusparseSpMatDescr_t *and* the cusparseMatDescr_t. */ + return(cusparseCreateCsr(spmat_descr, SMCU_ROWS(A), SMCU_COLUMNS(A), + SMCU_NNZ(A), SMCU_INDEXPTRSp(A), + SMCU_INDEXVALSp(A), SMCU_DATAp(A), + CUSPARSE_INDEX_32I, CUSPARSE_INDEX_32I, + CUSPARSE_INDEX_BASE_ZERO, CUDA_R_XF)); +} +#endif diff --git a/src/lib/sunmatrix/dense/fsunmatrix_dense.c b/src/lib/sunmatrix/dense/fsunmatrix_dense.c deleted file mode 100644 index 7a352b1..0000000 --- a/src/lib/sunmatrix/dense/fsunmatrix_dense.c +++ /dev/null @@ -1,83 +0,0 @@ -/* - * ----------------------------------------------------------------- - * Programmer(s): Daniel Reynolds @ SMU - * ----------------------------------------------------------------- - * SUNDIALS Copyright Start - * Copyright (c) 2002-2019, Lawrence Livermore National Security - * and Southern Methodist University. - * All rights reserved. - * - * See the top-level LICENSE and NOTICE files for details. - * - * SPDX-License-Identifier: BSD-3-Clause - * SUNDIALS Copyright End - * ----------------------------------------------------------------- - * This file (companion of fsunmatrix_dense.h) contains the - * implementation needed for the Fortran initialization of dense - * vector operations. - * ----------------------------------------------------------------- - */ - -#include -#include - -#include "fsunmatrix_dense.h" - -/* Define global matrix variables */ - -SUNMatrix F2C_CVODE_matrix; -SUNMatrix F2C_IDA_matrix; -SUNMatrix F2C_KINSOL_matrix; -SUNMatrix F2C_ARKODE_matrix; -SUNMatrix F2C_ARKODE_mass_matrix; - -/* Fortran callable interfaces */ - -void FSUNDENSEMAT_INIT(int *code, long int *M, long int *N, int *ier) -{ - *ier = 0; - - switch(*code) { - case FCMIX_CVODE: - if (F2C_CVODE_matrix) SUNMatDestroy(F2C_CVODE_matrix); - F2C_CVODE_matrix = NULL; - F2C_CVODE_matrix = SUNDenseMatrix((sunindextype)(*M), - (sunindextype)(*N)); - if (F2C_CVODE_matrix == NULL) *ier = -1; - break; - case FCMIX_IDA: - if (F2C_IDA_matrix) SUNMatDestroy(F2C_IDA_matrix); - F2C_IDA_matrix = NULL; - F2C_IDA_matrix = SUNDenseMatrix((sunindextype)(*M), - (sunindextype)(*N)); - if (F2C_IDA_matrix == NULL) *ier = -1; - break; - case FCMIX_KINSOL: - if (F2C_KINSOL_matrix) SUNMatDestroy(F2C_KINSOL_matrix); - F2C_KINSOL_matrix = NULL; - F2C_KINSOL_matrix = SUNDenseMatrix((sunindextype)(*M), - (sunindextype)(*N)); - if (F2C_KINSOL_matrix == NULL) *ier = -1; - break; - case FCMIX_ARKODE: - if (F2C_ARKODE_matrix) SUNMatDestroy(F2C_ARKODE_matrix); - F2C_ARKODE_matrix = NULL; - F2C_ARKODE_matrix = SUNDenseMatrix((sunindextype)(*M), - (sunindextype)(*N)); - if (F2C_ARKODE_matrix == NULL) *ier = -1; - break; - default: - *ier = -1; - } -} - - -void FSUNDENSEMASSMAT_INIT(long int *M, long int *N, int *ier) -{ - *ier = 0; - if (F2C_ARKODE_mass_matrix) SUNMatDestroy(F2C_ARKODE_mass_matrix); - F2C_ARKODE_mass_matrix = NULL; - F2C_ARKODE_mass_matrix = SUNDenseMatrix((sunindextype)(*M), - (sunindextype)(*N)); - if (F2C_ARKODE_mass_matrix == NULL) *ier = -1; -} diff --git a/src/lib/sunmatrix/dense/fsunmatrix_dense.h b/src/lib/sunmatrix/dense/fsunmatrix_dense.h deleted file mode 100644 index cf952b4..0000000 --- a/src/lib/sunmatrix/dense/fsunmatrix_dense.h +++ /dev/null @@ -1,62 +0,0 @@ -/* - * ----------------------------------------------------------------- - * Programmer(s): Daniel Reynolds @ SMU - * ----------------------------------------------------------------- - * SUNDIALS Copyright Start - * Copyright (c) 2002-2019, Lawrence Livermore National Security - * and Southern Methodist University. - * All rights reserved. - * - * See the top-level LICENSE and NOTICE files for details. - * - * SPDX-License-Identifier: BSD-3-Clause - * SUNDIALS Copyright End - * ----------------------------------------------------------------- - * This file (companion of fsunmatrix_dense.c) contains the - * definitions needed for the initialization of dense - * matrix operations in Fortran. - * ----------------------------------------------------------------- - */ - -#ifndef _FSUNMATRIX_DENSE_H -#define _FSUNMATRIX_DENSE_H - -#include -#include - -#ifdef __cplusplus /* wrapper to enable C++ usage */ -extern "C" { -#endif - -#if defined(SUNDIALS_F77_FUNC) -#define FSUNDENSEMAT_INIT SUNDIALS_F77_FUNC(fsundensematinit, FSUNDENSEMATINIT) -#define FSUNDENSEMASSMAT_INIT SUNDIALS_F77_FUNC(fsundensemassmatinit, FSUNDENSEMASSMATINIT) -#else -#define FSUNDENSEMAT_INIT fsundensematinit_ -#define FSUNDENSEMASSMAT_INIT fsundensemassmatinit_ -#endif - - -/* Declarations of global variables */ - -extern SUNMatrix F2C_CVODE_matrix; -extern SUNMatrix F2C_IDA_matrix; -extern SUNMatrix F2C_KINSOL_matrix; -extern SUNMatrix F2C_ARKODE_matrix; -extern SUNMatrix F2C_ARKODE_mass_matrix; - -/* - * Prototypes of exported functions - * - * FSUNDENSEMAT_INIT - initializes dense matrix operations for main problem - * FSUNDENSEMASSMAT_INIT - initializes dense matrix operations for mass matrix solver - */ - -void FSUNDENSEMAT_INIT(int *code, long int *M, long int *N, int *ier); -void FSUNDENSEMASSMAT_INIT(long int *M, long int *N, int *ier); - -#ifdef __cplusplus -} -#endif - -#endif diff --git a/src/lib/sunmatrix/dense/sunmatrix_dense.c b/src/lib/sunmatrix/dense/sunmatrix_dense.c index bc842b1..c3c56c1 100644 --- a/src/lib/sunmatrix/dense/sunmatrix_dense.c +++ b/src/lib/sunmatrix/dense/sunmatrix_dense.c @@ -5,7 +5,7 @@ * Alan C. Hindmarsh and Radu Serban @ LLNL * ----------------------------------------------------------------- * SUNDIALS Copyright Start - * Copyright (c) 2002-2019, Lawrence Livermore National Security + * Copyright (c) 2002-2022, Lawrence Livermore National Security * and Southern Methodist University. * All rights reserved. * @@ -20,18 +20,15 @@ #include #include - -#include #include +#include #define ZERO RCONST(0.0) #define ONE RCONST(1.0) - /* Private function prototypes */ -static booleantype SMCompatible_Dense(SUNMatrix A, SUNMatrix B); -static booleantype SMCompatible2_Dense(SUNMatrix A, N_Vector x, N_Vector y); - +static booleantype compatibleMatrices(SUNMatrix A, SUNMatrix B); +static booleantype compatibleMatrixAndVectors(SUNMatrix A, N_Vector x, N_Vector y); /* * ----------------------------------------------------------------- @@ -43,19 +40,21 @@ static booleantype SMCompatible2_Dense(SUNMatrix A, N_Vector x, N_Vector y); * Function to create a new dense matrix */ -SUNMatrix SUNDenseMatrix(sunindextype M, sunindextype N) +SUNMatrix SUNDenseMatrix(sunindextype M, sunindextype N, SUNContext sunctx) { SUNMatrix A; SUNMatrixContent_Dense content; sunindextype j; /* return with NULL matrix on illegal dimension input */ - if ( (M <= 0) || (N <= 0) ) return(NULL); + if ((M <= 0) || (N <= 0)) + return (NULL); /* Create an empty matrix object */ A = NULL; - A = SUNMatNewEmpty(); - if (A == NULL) return(NULL); + A = SUNMatNewEmpty(sunctx); + if (A == NULL) + return (NULL); /* Attach operations */ A->ops->getid = SUNMatGetID_Dense; @@ -70,8 +69,11 @@ SUNMatrix SUNDenseMatrix(sunindextype M, sunindextype N) /* Create content */ content = NULL; - content = (SUNMatrixContent_Dense) malloc(sizeof *content); - if (content == NULL) { SUNMatDestroy(A); return(NULL); } + content = (SUNMatrixContent_Dense)malloc(sizeof *content); + if (content == NULL) { + SUNMatDestroy(A); + return (NULL); + } /* Attach content */ A->content = content; @@ -79,54 +81,59 @@ SUNMatrix SUNDenseMatrix(sunindextype M, sunindextype N) /* Fill content */ content->M = M; content->N = N; - content->ldata = M*N; + content->ldata = M * N; content->data = NULL; content->cols = NULL; /* Allocate content */ - content->data = (realtype *) calloc(M * N, sizeof(realtype)); - if (content->data == NULL) { SUNMatDestroy(A); return(NULL); } + content->data = (realtype*)calloc(M * N, sizeof(realtype)); + if (content->data == NULL) { + SUNMatDestroy(A); + return (NULL); + } - content->cols = (realtype **) malloc(N * sizeof(realtype *)); - if (content->cols == NULL) { SUNMatDestroy(A); return(NULL); } - for (j=0; jcols[j] = content->data + j * M; + content->cols = (realtype**)malloc(N * sizeof(realtype*)); + if (content->cols == NULL) { + SUNMatDestroy(A); + return (NULL); + } + for (j = 0; j < N; j++) + content->cols[j] = content->data + j * M; - return(A); + return (A); } - /* ---------------------------------------------------------------------------- - * Function to print the dense matrix + * Function to print the dense matrix */ - + void SUNDenseMatrix_Print(SUNMatrix A, FILE* outfile) { sunindextype i, j; - - /* should not be called unless A is a dense matrix; + + /* should not be called unless A is a dense matrix; otherwise return immediately */ if (SUNMatGetID(A) != SUNMATRIX_DENSE) return; /* perform operation */ - fprintf(outfile,"\n"); - for (i=0; isunctx); + return (B); } void SUNMatDestroy_Dense(SUNMatrix A) { - if (A == NULL) return; + if (A == NULL) + return; /* free content */ if (A->content != NULL) { @@ -219,8 +223,12 @@ void SUNMatDestroy_Dense(SUNMatrix A) } /* free ops and matrix */ - if (A->ops) { free(A->ops); A->ops = NULL; } - free(A); A = NULL; + if (A->ops) { + free(A->ops); + A->ops = NULL; + } + free(A); + A = NULL; return; } @@ -228,12 +236,13 @@ void SUNMatDestroy_Dense(SUNMatrix A) int SUNMatZero_Dense(SUNMatrix A) { sunindextype i; - realtype *Adata; + realtype* Adata; - /* Perform operation */ + /* Perform operation A_ij = 0 */ Adata = SM_DATA_D(A); - for (i=0; iops->nvgetarraypointer || !y->ops->nvgetarraypointer) { return SUNFALSE; + } - /* Optimally we would verify that the dimensions of A, x and y agree, - but since there is no generic 'length' routine for N_Vectors we cannot */ + /* Check that the dimensions agree */ + if ((N_VGetLength(x) != SM_COLUMNS_D(A)) || (N_VGetLength(y) != SM_ROWS_D(A))) { + return SUNFALSE; + } return SUNTRUE; } - diff --git a/src/lib/sunmatrix/magmadense/dense_cuda_kernels.cuh b/src/lib/sunmatrix/magmadense/dense_cuda_kernels.cuh new file mode 100644 index 0000000..0dbc3cd --- /dev/null +++ b/src/lib/sunmatrix/magmadense/dense_cuda_kernels.cuh @@ -0,0 +1,113 @@ +/* ----------------------------------------------------------------- + * Programmer(s): Cody J. Balos @ LLNL + * ----------------------------------------------------------------- + * SUNDIALS Copyright Start + * Copyright (c) 2002-2022, Lawrence Livermore National Security + * and Southern Methodist University. + * All rights reserved. + * + * See the top-level LICENSE and NOTICE files for details. + * + * SPDX-License-Identifier: BSD-3-Clause + * SUNDIALS Copyright End + * ----------------------------------------------------------------- + * This is the implementation file for the dense matrix CUDA kernels + * for the SUNMATRIX package based on MAGMA. + * -----------------------------------------------------------------*/ + +#ifndef _SUNGPUDENSE_MATRIX_KERNELS_CUH_ +#define _SUNGPUDENSE_MATRIX_KERNELS_CUH_ + +#include +#include + +namespace sundials +{ +namespace sunmatrix_gpudense +{ +namespace cuda +{ + + template + __device__ __forceinline__ void + block_col_row(I nblocks, I m, I n, Lambda&& fn) + { + for (I block = blockIdx.x*blockDim.x + threadIdx.x; + block < nblocks; + block += blockDim.x*gridDim.x) + { + for (I col = blockIdx.y*blockDim.y + threadIdx.y; + col < n; + col += blockDim.y*gridDim.y) + { + for (I row = blockIdx.z*blockDim.z + threadIdx.z; + row < m; + row += blockDim.z*gridDim.z) + { + fn(block*m*n+(col*m + row), row, col); + } + } + } + } + + template + __global__ void + getBlockPointers(I m, I n, I nblocks, T* A, T** Ablocks, + T* x, T** xblocks, T* y, T** yblocks) + { + for (I block = blockIdx.x*blockDim.x + threadIdx.x; + block < nblocks; + block += blockDim.x*gridDim.x) + { + Ablocks[block] = &A[block*m*n]; + xblocks[block] = &x[block*n]; + yblocks[block] = &y[block*m]; + }; + } + + template + __global__ void + zeroKernel(I m, I n, I nblocks, T* A) + { + block_col_row(nblocks, m, n, + [=] __device__ (I kij, I row, I col) { + A[kij] = 0.0; + }); + } + + template + __global__ void + copyKernel(I m, I n, I nblocks, const T* A, T* B) + { + block_col_row(nblocks, m, n, + [=] __device__ (I kij, I row, I col) { + B[kij] = A[kij]; + }); + } + + template + __global__ void + scaleAddIKernel(I m, I n, I nblocks, T c, T* A) + { + block_col_row(nblocks, m, n, + [=] __device__ (I kij, I row, I col) { + if (row == col) A[kij] = c*A[kij] + 1.0; + else A[kij] = c*A[kij]; + }); + } + + template + __global__ void + scaleAddKernel(I m, I n, I nblocks, T c, T* A, const T* B) + { + block_col_row(nblocks, m, n, + [=] __device__ (I kij, I row, I col) { + A[kij] = c*A[kij] + B[kij]; + }); + } + +} // namespace cuda +} // namespace sunmatrix_gpudense +} // namespace sundials + +#endif diff --git a/src/lib/sunmatrix/magmadense/dense_hip_kernels.hip.hpp b/src/lib/sunmatrix/magmadense/dense_hip_kernels.hip.hpp new file mode 100644 index 0000000..0f29191 --- /dev/null +++ b/src/lib/sunmatrix/magmadense/dense_hip_kernels.hip.hpp @@ -0,0 +1,112 @@ +/* ----------------------------------------------------------------- + * Programmer(s): Cody J. Balos @ LLNL + * ----------------------------------------------------------------- + * SUNDIALS Copyright Start + * Copyright (c) 2002-2022, Lawrence Livermore National Security + * and Southern Methodist University. + * All rights reserved. + * + * See the top-level LICENSE and NOTICE files for details. + * + * SPDX-License-Identifier: BSD-3-Clause + * SUNDIALS Copyright End + * ----------------------------------------------------------------- + * This is the implementation file for the dense matrix HIP kernels + * for the SUNMATRIX package based on MAGMA. + * -----------------------------------------------------------------*/ + +#ifndef _SUNGPUDENSE_MATRIX_KERNELS_HIP +#define _SUNGPUDENSE_MATRIX_KERNELS_HIP + +#include + +namespace sundials +{ +namespace sunmatrix_gpudense +{ +namespace hip +{ + + template + __device__ __forceinline__ void + block_col_row(I nblocks, I m, I n, Lambda&& fn) + { + for (I block = blockIdx.x*blockDim.x + threadIdx.x; + block < nblocks; + block += blockDim.x*gridDim.x) + { + for (I col = blockIdx.y*blockDim.y + threadIdx.y; + col < n; + col += blockDim.y*gridDim.y) + { + for (I row = blockIdx.z*blockDim.z + threadIdx.z; + row < m; + row += blockDim.z*gridDim.z) + { + fn(block*m*n+(col*m + row), row, col); + } + } + } + } + + template + __global__ void + getBlockPointers(I m, I n, I nblocks, T* A, T** Ablocks, + T* x, T** xblocks, T* y, T** yblocks) + { + for (I block = blockIdx.x*blockDim.x + threadIdx.x; + block < nblocks; + block += blockDim.x*gridDim.x) + { + Ablocks[block] = &A[block*m*n]; + xblocks[block] = &x[block*n]; + yblocks[block] = &y[block*m]; + }; + } + + template + __global__ void + zeroKernel(I m, I n, I nblocks, T* A) + { + block_col_row(nblocks, m, n, + [=] __device__ (I kij, I row, I col) { + A[kij] = 0.0; + }); + } + + template + __global__ void + copyKernel(I m, I n, I nblocks, const T* A, T* B) + { + block_col_row(nblocks, m, n, + [=] __device__ (I kij, I row, I col) { + B[kij] = A[kij]; + }); + } + + template + __global__ void + scaleAddIKernel(I m, I n, I nblocks, T c, T* A) + { + block_col_row(nblocks, m, n, + [=] __device__ (I kij, I row, I col) { + if (row == col) A[kij] = c*A[kij] + 1.0; + else A[kij] = c*A[kij]; + }); + } + + template + __global__ void + scaleAddKernel(I m, I n, I nblocks, T c, T* A, const T* B) + { + block_col_row(nblocks, m, n, + [=] __device__ (I kij, I row, I col) { + A[kij] = c*A[kij] + B[kij]; + }); + } + +} // namespace cuda +} // namespace sunmatrix_gpudense +} // namespace sundials + +#endif diff --git a/src/lib/sunmatrix/magmadense/sunmatrix_magmadense.cpp b/src/lib/sunmatrix/magmadense/sunmatrix_magmadense.cpp new file mode 100644 index 0000000..c6ea578 --- /dev/null +++ b/src/lib/sunmatrix/magmadense/sunmatrix_magmadense.cpp @@ -0,0 +1,653 @@ +/* ----------------------------------------------------------------- + * Programmer(s): Cody J. Balos @ LLNL + * ----------------------------------------------------------------- + * SUNDIALS Copyright Start + * Copyright (c) 2002-2022, Lawrence Livermore National Security + * and Southern Methodist University. + * All rights reserved. + * + * See the top-level LICENSE and NOTICE files for details. + * + * SPDX-License-Identifier: BSD-3-Clause + * SUNDIALS Copyright End + * ----------------------------------------------------------------- + * This is the implementation file for the dense implementation of + * the SUNMATRIX package based on MAGMA. + * -----------------------------------------------------------------*/ + +#include +#include + +#if defined(SUNDIALS_MAGMA_BACKENDS_CUDA) + +#include "sundials_cuda.h" +#include "dense_cuda_kernels.cuh" +using namespace sundials::sunmatrix_gpudense::cuda; +#define SUNDIALS_HIP_OR_CUDA(a,b) b + +#elif defined(SUNDIALS_MAGMA_BACKENDS_HIP) + +#include "sundials_hip.h" +#include "dense_hip_kernels.hip.hpp" +using namespace sundials::sunmatrix_gpudense::hip; +#define SUNDIALS_HIP_OR_CUDA(a,b) a + +#endif + +/* Content accessor macro */ +#define SMLD_CONTENT(A) ( (SUNMatrixContent_MagmaDense) (A->content) ) + +/* Constants */ +#define ZERO RCONST(0.0) +#define ONE RCONST(1.0) + +/* Macros for magma operations based on precision */ +#if defined(SUNDIALS_DOUBLE_PRECISION) +#define xgemv(q,...) magma_dgemv(__VA_ARGS__, q) +#define xgemv_batched(q,...) magmablas_dgemv_batched(__VA_ARGS__, q) +#define magma_xset_pointer(q,...) magma_dset_pointer(__VA_ARGS__, q) +#define xprint(q,...) magma_dprint_gpu(__VA_ARGS__, q) +#elif defined(SUNDIALS_SINGLE_PRECISION) +#define xgemv(q,...) magma_sgemv(__VA_ARGS__, q) +#define xgemv_batched(q,...) magmablas_sgemv_batched(__VA_ARGS__, q) +#define magma_xset_pointer(q,...) magma_sset_pointer(__VA_ARGS__, q) +#define xprint(q,...) magma_sprint_gpu(__VA_ARGS__, q) +#else +#error unsupported precision +#endif + +/* Private function prototypes */ +static booleantype SMCompatible_MagmaDense(SUNMatrix A, SUNMatrix B); +static booleantype SMCompatible2_MagmaDense(SUNMatrix A, N_Vector x, N_Vector y); + +/* + * ---------------------------------------------------------------------------- + * Implementation specific routines + * ---------------------------------------------------------------------------- + */ + +/* + * Constructor functions + */ + +SUNMatrix SUNMatrix_MagmaDense(sunindextype M, sunindextype N, SUNMemoryType memtype, + SUNMemoryHelper memhelper, + void* queue, SUNContext sunctx) +{ + return(SUNMatrix_MagmaDenseBlock(1, M, N, memtype, memhelper, queue, sunctx)); +} + +SUNMatrix SUNMatrix_MagmaDenseBlock(sunindextype nblocks, sunindextype M, sunindextype N, + SUNMemoryType memtype, SUNMemoryHelper memhelper, + void* queue, SUNContext sunctx) +{ + SUNMatrix Amat; + SUNMatrixContent_MagmaDense A; + int retval; + + /* Return with NULL matrix on illegal dimension input */ + if ( (M <= 0) || (N <= 0) || (nblocks <= 0)) + return(NULL); + + /* Check for valid memory type options */ + if ((memtype != SUNMEMTYPE_UVM) && (memtype != SUNMEMTYPE_DEVICE)) + return(NULL); + + /* Check for valid memory helper */ + if (memhelper == NULL) + return(NULL); + + /* First thing we do is initialize magma */ + retval = magma_init(); + if (retval != MAGMA_SUCCESS) return(NULL); + + /* Create an empty matrix object */ + Amat = NULL; + Amat = SUNMatNewEmpty(sunctx); + if (Amat == NULL) return(NULL); + + /* Attach operations */ + Amat->ops->getid = SUNMatGetID_MagmaDense; + Amat->ops->clone = SUNMatClone_MagmaDense; + Amat->ops->destroy = SUNMatDestroy_MagmaDense; + Amat->ops->zero = SUNMatZero_MagmaDense; + Amat->ops->copy = SUNMatCopy_MagmaDense; + Amat->ops->scaleadd = SUNMatScaleAdd_MagmaDense; + Amat->ops->scaleaddi = SUNMatScaleAddI_MagmaDense; + Amat->ops->matvecsetup = SUNMatMatvecSetup_MagmaDense; + Amat->ops->matvec = SUNMatMatvec_MagmaDense; + Amat->ops->space = SUNMatSpace_MagmaDense; + + /* Create content */ + A = NULL; + A = (SUNMatrixContent_MagmaDense) malloc(sizeof(*A)); + if (A == NULL) { SUNMatDestroy(Amat); return(NULL); } + + /* Attach content */ + Amat->content = A; + + /* Fill content */ + A->M = M; + A->N = N; + A->nblocks = nblocks; + A->ldata = M*N*nblocks; + A->data = NULL; + A->blocks = NULL; + A->xblocks = NULL; + A->yblocks = NULL; + A->memhelp = memhelper; + A->q = NULL; + + magma_getdevice(&A->device_id); + SUNDIALS_HIP_OR_CUDA( + magma_queue_create_from_hip(A->device_id, (hipStream_t) queue, NULL, NULL, &A->q);, + magma_queue_create_from_cuda(A->device_id, (cudaStream_t) queue, NULL, NULL, &A->q); ) + + /* Allocate data */ + retval = SUNMemoryHelper_Alloc(A->memhelp, &A->data, + sizeof(realtype) * A->ldata, memtype, nullptr); + if (retval) { SUNMatDestroy(Amat); return(NULL); } + + if (A->nblocks > 1) + { + /* Allocate array of pointers to block data */ + retval = SUNMemoryHelper_Alloc(A->memhelp, &A->blocks, + sizeof(realtype*) * A->nblocks, memtype, + nullptr); + if (retval) { SUNMatDestroy(Amat); return(NULL); } + + /* Initialize array of pointers to block data */ + magma_xset_pointer(A->q, (realtype**)A->blocks->ptr, (realtype*)A->data->ptr, + A->M, 0, 0, A->M*A->N, A->nblocks); + } + + return(Amat); +} + +/* + * Accessor functions + */ + +sunindextype SUNMatrix_MagmaDense_Rows(SUNMatrix Amat) +{ + SUNMatrixContent_MagmaDense A = SMLD_CONTENT(Amat); + + if (SUNMatGetID(Amat) == SUNMATRIX_MAGMADENSE) + return(A->M * A->nblocks); + else + return(SUNMAT_ILL_INPUT); +} + +sunindextype SUNMatrix_MagmaDense_Columns(SUNMatrix Amat) +{ + SUNMatrixContent_MagmaDense A = SMLD_CONTENT(Amat); + + if (SUNMatGetID(Amat) == SUNMATRIX_MAGMADENSE) + return(A->N * A->nblocks); + else + return(SUNMAT_ILL_INPUT); +} + +sunindextype SUNMatrix_MagmaDense_BlockRows(SUNMatrix Amat) +{ + SUNMatrixContent_MagmaDense A = SMLD_CONTENT(Amat); + + if (SUNMatGetID(Amat) == SUNMATRIX_MAGMADENSE) + return(A->M); + else + return(SUNMAT_ILL_INPUT); +} + +sunindextype SUNMatrix_MagmaDense_BlockColumns(SUNMatrix Amat) +{ + SUNMatrixContent_MagmaDense A = SMLD_CONTENT(Amat); + + if (SUNMatGetID(Amat) == SUNMATRIX_MAGMADENSE) + return(A->N); + else + return(SUNMAT_ILL_INPUT); +} + +sunindextype SUNMatrix_MagmaDense_NumBlocks(SUNMatrix Amat) +{ + SUNMatrixContent_MagmaDense A = SMLD_CONTENT(Amat); + + if (SUNMatGetID(Amat) == SUNMATRIX_MAGMADENSE) + return(A->nblocks); + else + return(SUNMAT_ILL_INPUT); +} + +sunindextype SUNMatrix_MagmaDense_LData(SUNMatrix Amat) +{ + SUNMatrixContent_MagmaDense A = SMLD_CONTENT(Amat); + + if (SUNMatGetID(Amat) == SUNMATRIX_MAGMADENSE) + return(A->ldata); + else + return(SUNMAT_ILL_INPUT); +} + +sunindextype SUNMatrix_MagmaDense_BlockLData(SUNMatrix Amat) +{ + SUNMatrixContent_MagmaDense A = SMLD_CONTENT(Amat); + + if (SUNMatGetID(Amat) == SUNMATRIX_MAGMADENSE) + return A->M * A->N; + else + return SUNMAT_ILL_INPUT; +} + +realtype* SUNMatrix_MagmaDense_Data(SUNMatrix Amat) +{ + SUNMatrixContent_MagmaDense A = SMLD_CONTENT(Amat); + + if (SUNMatGetID(Amat) == SUNMATRIX_MAGMADENSE) + return((realtype*) A->data->ptr); + else + return(NULL); +} + +realtype** SUNMatrix_MagmaDense_BlockData(SUNMatrix Amat) +{ + SUNMatrixContent_MagmaDense A = SMLD_CONTENT(Amat); + + if (SUNMatGetID(Amat) == SUNMATRIX_MAGMADENSE) + return((realtype**) A->blocks->ptr); + else + return(NULL); +} + +extern realtype* SUNMatrix_MagmaDense_Block(SUNMatrix Amat, sunindextype k); + +extern realtype* SUNMatrix_MagmaDense_Column(SUNMatrix Amat, sunindextype j); + +extern realtype* SUNMatrix_MagmaDense_BlockColumn(SUNMatrix Amat, sunindextype k, sunindextype j); + +/* + * Utility functions + */ + +void SUNMatrix_MagmaDense_Print(SUNMatrix Amat) +{ + if (SUNMatGetID(Amat) != SUNMATRIX_MAGMADENSE) return; + + SUNMatrixContent_MagmaDense A = SMLD_CONTENT(Amat); + + for (sunindextype k = 0; k < A->nblocks; k++) + xprint(A->q, A->M, A->N, SUNMatrix_MagmaDense_Block(Amat,k), A->M); +} + +int SUNMatrix_MagmaDense_CopyToDevice(SUNMatrix Amat, realtype* h_data) +{ + if (SUNMatGetID(Amat) != SUNMATRIX_MAGMADENSE) return(SUNMAT_ILL_INPUT); + SUNMatrixContent_MagmaDense A = SMLD_CONTENT(Amat); + + int retval = 0; + SUNMemory _h_data = SUNMemoryHelper_Wrap(h_data, SUNMEMTYPE_HOST); + SUNDIALS_HIP_OR_CUDA( hipStream_t stream = magma_queue_get_hip_stream(A->q);, + cudaStream_t stream = magma_queue_get_cuda_stream(A->q); ) + + retval = SUNMemoryHelper_CopyAsync(A->memhelp, + A->data, + _h_data, + sizeof(realtype) * A->ldata, + (void*) &stream); + magma_queue_sync(A->q); /* sync with respect to host, but only this stream */ + + SUNMemoryHelper_Dealloc(A->memhelp, _h_data, nullptr); + return(retval == 0 ? SUNMAT_SUCCESS : SUNMAT_MEM_FAIL); +} + +int SUNMatrix_MagmaDense_CopyFromDevice(SUNMatrix Amat, realtype* h_data) +{ + if (SUNMatGetID(Amat) != SUNMATRIX_MAGMADENSE) return(SUNMAT_ILL_INPUT); + SUNMatrixContent_MagmaDense A = SMLD_CONTENT(Amat); + + int retval = 0; + SUNMemory _h_data = SUNMemoryHelper_Wrap(h_data, SUNMEMTYPE_HOST); + SUNDIALS_HIP_OR_CUDA( hipStream_t stream = magma_queue_get_hip_stream(A->q);, + cudaStream_t stream = magma_queue_get_cuda_stream(A->q); ) + + retval = SUNMemoryHelper_CopyAsync(A->memhelp, + _h_data, + A->data, + sizeof(realtype) * A->ldata, + (void*) &stream); + magma_queue_sync(A->q); /* sync with respect to host, but only this stream */ + + SUNMemoryHelper_Dealloc(A->memhelp, _h_data, nullptr); + return(retval == 0 ? SUNMAT_SUCCESS : SUNMAT_MEM_FAIL); +} + +/* + * ----------------------------------------------------------------- + * Implementation of generic SUNMatrix operations. + * ----------------------------------------------------------------- + */ + +SUNMatrix SUNMatClone_MagmaDense(SUNMatrix Amat) +{ + if (Amat == NULL) return(NULL); + + if (SUNMatGetID(Amat) != SUNMATRIX_MAGMADENSE) return(NULL); + + SUNMatrixContent_MagmaDense A = SMLD_CONTENT(Amat); + SUNMatrix B = NULL; + SUNDIALS_HIP_OR_CUDA( hipStream_t stream = magma_queue_get_hip_stream(A->q);, + cudaStream_t stream = magma_queue_get_cuda_stream(A->q); ) + + if (A->nblocks > 1) + B = SUNMatrix_MagmaDenseBlock(A->nblocks, A->M, A->N, A->data->type, + A->memhelp, stream, Amat->sunctx); + else + B = SUNMatrix_MagmaDense(A->M, A->N, A->data->type, A->memhelp, stream, + Amat->sunctx); + + return(B); +} + +void SUNMatDestroy_MagmaDense(SUNMatrix Amat) +{ + if (Amat == NULL) return; + + if (SUNMatGetID(Amat) != SUNMATRIX_MAGMADENSE) return; + + SUNMatrixContent_MagmaDense A = SMLD_CONTENT(Amat); + + /* Sync before destroying */ + magma_queue_sync(A->q); + + /* Free content */ + if (A) + { + /* Free data array(s) */ + if (A->data) SUNMemoryHelper_Dealloc(A->memhelp, A->data, nullptr); + if (A->blocks) SUNMemoryHelper_Dealloc(A->memhelp, A->blocks, nullptr); + if (A->xblocks) SUNMemoryHelper_Dealloc(A->memhelp, A->xblocks, nullptr); + if (A->yblocks) SUNMemoryHelper_Dealloc(A->memhelp, A->yblocks, nullptr); + magma_queue_destroy(A->q); + /* Free content struct */ + free(A); + Amat->content = NULL; + } + + /* Free ops */ + if (Amat->ops) + { + free(Amat->ops); + Amat->ops = NULL; + } + + /* Free matrix */ + free(Amat); + Amat = NULL; + + /* Call magma_finalize, but note that magma_finalize does + nothing until it has been called the same number of times + as magma_init */ + magma_finalize(); + + return; +} + +int SUNMatZero_MagmaDense(SUNMatrix Amat) +{ + if (Amat == NULL) return(SUNMAT_ILL_INPUT); + + if (SUNMatGetID(Amat) != SUNMATRIX_MAGMADENSE) return(SUNMAT_ILL_INPUT); + + SUNMatrixContent_MagmaDense A = SMLD_CONTENT(Amat); + + /* Zero out matrix */ + SUNDIALS_LAUNCH_KERNEL(SUNDIALS_KERNEL_NAME(zeroKernel), + dim3(std::min(A->nblocks,INT_MAX),1,1), + SUNDIALS_HIP_OR_CUDA( dim3(1,16,16), dim3(1,16,32) ), /* We choose slightly larger thread blocks when using HIP since the warps are larger */ + 0, + SUNDIALS_HIP_OR_CUDA( magma_queue_get_hip_stream(A->q), magma_queue_get_cuda_stream(A->q) ), + A->M, + A->N, + A->nblocks, + (realtype*) A->data->ptr + ); + + return(SUNMAT_SUCCESS); +} + +int SUNMatCopy_MagmaDense(SUNMatrix Amat, SUNMatrix Bmat) +{ + if ((Amat == NULL) || (Bmat == NULL)) return(SUNMAT_ILL_INPUT); + + if (SUNMatGetID(Amat) != SUNMATRIX_MAGMADENSE) return(SUNMAT_ILL_INPUT); + + SUNMatrixContent_MagmaDense A = SMLD_CONTENT(Amat); + SUNMatrixContent_MagmaDense B = SMLD_CONTENT(Bmat); + + /* Verify that A and B are compatible */ + if (!SMCompatible_MagmaDense(Amat, Bmat)) + return SUNMAT_ILL_INPUT; + + /* Copy A into B */ + SUNDIALS_LAUNCH_KERNEL(SUNDIALS_KERNEL_NAME(copyKernel), + dim3(std::min(A->nblocks,INT_MAX),1,1), + SUNDIALS_HIP_OR_CUDA( dim3(1,16,16), dim3(1,16,32) ), + 0, + SUNDIALS_HIP_OR_CUDA( magma_queue_get_hip_stream(A->q), magma_queue_get_cuda_stream(A->q) ), + A->M, + A->N, + A->nblocks, + (const realtype*) A->data->ptr, + (realtype*) B->data->ptr + ); + + return(SUNMAT_SUCCESS); +} + +int SUNMatScaleAddI_MagmaDense(realtype c, SUNMatrix Amat) +{ + if (Amat == NULL) return(SUNMAT_ILL_INPUT); + + if (SUNMatGetID(Amat) != SUNMATRIX_MAGMADENSE) return(SUNMAT_ILL_INPUT); + + SUNMatrixContent_MagmaDense A = SMLD_CONTENT(Amat); + + + SUNDIALS_LAUNCH_KERNEL(SUNDIALS_KERNEL_NAME(scaleAddIKernel), + dim3(std::min(A->nblocks,INT_MAX),1,1), + SUNDIALS_HIP_OR_CUDA( dim3(1,16,16), dim3(1,16,32) ), + 0, + SUNDIALS_HIP_OR_CUDA( magma_queue_get_hip_stream(A->q), magma_queue_get_cuda_stream(A->q) ), + A->M, + A->N, + A->nblocks, + c, + (realtype*) A->data->ptr + ); + + return(SUNMAT_SUCCESS); +} + +int SUNMatScaleAdd_MagmaDense(realtype c, SUNMatrix Amat, SUNMatrix Bmat) +{ + if ((Amat == NULL) || (Bmat == NULL)) return(SUNMAT_ILL_INPUT); + + if ((SUNMatGetID(Amat) != SUNMATRIX_MAGMADENSE) || + !SMCompatible_MagmaDense(Amat, Bmat)) + return(SUNMAT_ILL_INPUT); + + SUNMatrixContent_MagmaDense A = SMLD_CONTENT(Amat); + SUNMatrixContent_MagmaDense B = SMLD_CONTENT(Bmat); + + SUNDIALS_LAUNCH_KERNEL(SUNDIALS_KERNEL_NAME(scaleAddKernel), + dim3(std::min(A->nblocks,INT_MAX),1,1), + SUNDIALS_HIP_OR_CUDA( dim3(1,16,16), dim3(1,16,32) ), + 0, + SUNDIALS_HIP_OR_CUDA( magma_queue_get_hip_stream(A->q), magma_queue_get_cuda_stream(A->q) ), + A->M, + A->N, + A->nblocks, + c, + (realtype*) A->data->ptr, + (const realtype*) B->data->ptr + ); + + return(SUNMAT_SUCCESS); +} + +int SUNMatMatvecSetup_MagmaDense(SUNMatrix Amat) +{ + int retval = 0; + + if (Amat == NULL) return(SUNMAT_ILL_INPUT); + + SUNMatrixContent_MagmaDense A = SMLD_CONTENT(Amat); + + if (A->nblocks > 1) + { + /* Allocate array of pointers to blocks on device */ + if (A->xblocks == NULL) + retval = SUNMemoryHelper_Alloc(A->memhelp, &A->xblocks, + sizeof(realtype*) * A->nblocks, + A->data->type, nullptr); + if (retval) return(SUNMAT_MEM_FAIL); + + if (A->yblocks == NULL) + retval = SUNMemoryHelper_Alloc(A->memhelp, &A->yblocks, + sizeof(realtype*) * A->nblocks, + A->data->type, nullptr); + if (retval) return(SUNMAT_MEM_FAIL); + } + + return(SUNMAT_SUCCESS); +} + +int SUNMatMatvec_MagmaDense(SUNMatrix Amat, N_Vector x, N_Vector y) +{ + if ((Amat == NULL) || (x == NULL) || (y == NULL)) return(SUNMAT_ILL_INPUT); + + if ((SUNMatGetID(Amat) != SUNMATRIX_MAGMADENSE) || + !SMCompatible2_MagmaDense(Amat, x, y)) + return(SUNMAT_ILL_INPUT); + + SUNMatrixContent_MagmaDense A = SMLD_CONTENT(Amat); + + if (A->nblocks > 1) + { + /* First, we need to create an array of pointers to the matrix and vector blocks */ + SUNDIALS_LAUNCH_KERNEL(SUNDIALS_KERNEL_NAME(getBlockPointers), + A->nblocks, + 256, + 0, + SUNDIALS_HIP_OR_CUDA( magma_queue_get_hip_stream(A->q), magma_queue_get_cuda_stream(A->q) ), + A->M, + A->N, + A->nblocks, + (realtype*)A->data->ptr, + (realtype**)A->blocks->ptr, + (realtype*)N_VGetDeviceArrayPointer(x), + (realtype**)A->xblocks->ptr, + (realtype*)N_VGetDeviceArrayPointer(y), + (realtype**)A->yblocks->ptr + ); + + /* Now we can use a batched gemv to do y = alpha*A*x + beta*y where A is block diagonal */ + xgemv_batched( + A->q, /* queue/stream to execute in */ + MagmaNoTrans, /* use A not A^T */ + A->M, /* number of rows for a block */ + A->N, /* number of cols for a block */ + ONE, /* alpha */ + (realtype**)A->blocks->ptr, + A->M, /* leading dimension of A */ + (realtype**)A->xblocks->ptr, + 1, /* increment (stride) of xblocks */ + ZERO, /* beta */ + (realtype**)A->yblocks->ptr, + 1, /* increment (stride) of yblocks */ + A->nblocks /* number of blocks */ + ); + } + else + { + /* Now we can use gemv to do y = alpha*A*x + beta*y */ + xgemv( + A->q, /* queue/stream to execute in */ + MagmaNoTrans, /* use A not A^T */ + A->M, /* number of rows */ + A->N, /* number of cols */ + ONE, /* alpha */ + (const realtype*)A->data->ptr, + A->M, /* leading dimension of A */ + (const realtype*)N_VGetDeviceArrayPointer(x), + 1, /* increment for x data */ + ZERO, /* beta */ + (realtype*)N_VGetDeviceArrayPointer(y), + 1 /* increment for y data */ + ); + } + + return(SUNMAT_SUCCESS); +} + +int SUNMatSpace_MagmaDense(SUNMatrix Amat, long int *lenrw, long int *leniw) +{ + if (Amat == NULL) return(SUNMAT_ILL_INPUT); + + if (SUNMatGetID(Amat) != SUNMATRIX_MAGMADENSE) return(SUNMAT_ILL_INPUT); + + SUNMatrixContent_MagmaDense A = SMLD_CONTENT(Amat); + + *lenrw = A->ldata; + *leniw = 4; + + return(SUNMAT_SUCCESS); +} + + +/* + * ----------------------------------------------------------------- + * Private functions + * ----------------------------------------------------------------- + */ + +static booleantype SMCompatible_MagmaDense(SUNMatrix Amat, SUNMatrix Bmat) +{ + SUNMatrixContent_MagmaDense A = SMLD_CONTENT(Amat); + SUNMatrixContent_MagmaDense B = SMLD_CONTENT(Bmat); + + /* Both matrices must be SUNMATRIX_MAGMADENSE */ + if (SUNMatGetID(Amat) != SUNMATRIX_MAGMADENSE) + return(SUNFALSE); + if (SUNMatGetID(Bmat) != SUNMATRIX_MAGMADENSE) + return(SUNFALSE); + + /* Both matrices must have the same shape */ + if (A->M != B->M) + return(SUNFALSE); + if (A->N != B->N) + return(SUNFALSE); + if (A->nblocks != B->nblocks) + return(SUNFALSE); + + return(SUNTRUE); +} + +static booleantype SMCompatible2_MagmaDense(SUNMatrix Amat, N_Vector x, N_Vector y) +{ + SUNMatrixContent_MagmaDense A = SMLD_CONTENT(Amat); + + /* Vectors must implement N_VGetDeviceArrayPointer */ + if (x->ops->nvgetdevicearraypointer == NULL || + y->ops->nvgetdevicearraypointer == NULL) + return(SUNFALSE); + + /* Inner dimensions must agree */ + if (A->N*A->nblocks != N_VGetLength(x)) + return(SUNFALSE); + + /* Outer dimensions must agree */ + if (A->M*A->nblocks != N_VGetLength(y)) + return(SUNFALSE); + + return(SUNTRUE); +} diff --git a/src/lib/sunmatrix/onemkldense/sunmatrix_onemkldense.cpp b/src/lib/sunmatrix/onemkldense/sunmatrix_onemkldense.cpp new file mode 100644 index 0000000..d8c1c34 --- /dev/null +++ b/src/lib/sunmatrix/onemkldense/sunmatrix_onemkldense.cpp @@ -0,0 +1,771 @@ +/* --------------------------------------------------------------------------- + * Programmer(s): David J. Gardner @ LLNL + * --------------------------------------------------------------------------- + * SUNDIALS Copyright Start + * Copyright (c) 2002-2022, Lawrence Livermore National Security + * and Southern Methodist University. + * All rights reserved. + * + * See the top-level LICENSE and NOTICE files for details. + * + * SPDX-License-Identifier: BSD-3-Clause + * SUNDIALS Copyright End + * --------------------------------------------------------------------------- + * This is the implementation file for the dense implementation of the + * SUNMATRIX class using the Intel oneAPI Math Kernel Library (oneMKL). + * ---------------------------------------------------------------------------*/ + +#include +#include + +#include +#include + +// SUNDIALS public headers +#include +#include + +// SUNDIALS private headers +#include "sundials_debug.h" +#include "sundials_sycl.h" + +// Check for a valid precision +#if defined(SUNDIALS_EXTENDED_PRECISION) +#error "oneMLK unsupported precision" +#endif + +// Constants +#define ZERO RCONST(0.0) +#define ONE RCONST(1.0) + +// Content accessor macros +#define MAT_CONTENT(A) ((SUNMatrixContent_OneMklDense) (A->content)) +#define MAT_LAST_FLAG(A) (MAT_CONTENT(A)->last_flag) +#define MAT_BLOCK_ROWS(A) (MAT_CONTENT(A)->block_rows) +#define MAT_BLOCK_COLS(A) (MAT_CONTENT(A)->block_cols) +#define MAT_ROWS(A) (MAT_CONTENT(A)->rows) +#define MAT_COLS(A) (MAT_CONTENT(A)->cols) +#define MAT_NBLOCKS(A) (MAT_CONTENT(A)->num_blocks) +#define MAT_LDATA(A) (MAT_CONTENT(A)->ldata) +#define MAT_DATA(A) (MAT_CONTENT(A)->data) +#define MAT_DATAp(A) ((realtype*) MAT_CONTENT(A)->data->ptr) +#define MAT_BLOCKS(A) (MAT_CONTENT(A)->blocks) +#define MAT_BLOCKSp(A) ((realtype**) MAT_CONTENT(A)->blocks->ptr) +#define MAT_EXECPOLICY(A) (MAT_CONTENT(A)->exec_policy) +#define MAT_MEMTYPE(A) (MAT_CONTENT(A)->mem_type) +#define MAT_MEMHELPER(A) (MAT_CONTENT(A)->mem_helper) +#define MAT_QUEUE(A) (MAT_CONTENT(A)->queue) + +// Private function prototypes +static booleantype Compatible_AB(SUNMatrix A, SUNMatrix B); +static booleantype Compatible_Axy(SUNMatrix A, N_Vector x, N_Vector y); + +// Kernel launch parameters +static int GetKernelParameters(SUNMatrix A, booleantype reduction, + size_t& nthreads_total, + size_t& nthreads_per_block); + + +/* -------------------------------------------------------------------------- + * Constructors + * -------------------------------------------------------------------------- */ + + +SUNMatrix SUNMatrix_OneMklDense(sunindextype M, sunindextype N, + SUNMemoryType mem_type, + SUNMemoryHelper mem_helper, + sycl::queue* queue, SUNContext sunctx) +{ + return SUNMatrix_OneMklDenseBlock(1, M, N, mem_type, mem_helper, queue, + sunctx); +} + + +SUNMatrix SUNMatrix_OneMklDenseBlock(sunindextype num_blocks, sunindextype M, + sunindextype N, SUNMemoryType mem_type, + SUNMemoryHelper mem_helper, + sycl::queue* queue, SUNContext sunctx) +{ + int retval; + + // Check inputs + if ( (M <= 0) || (N <= 0) || (num_blocks <= 0) || (!mem_helper) || + ((mem_type != SUNMEMTYPE_UVM) && (mem_type != SUNMEMTYPE_DEVICE))) + { + SUNDIALS_DEBUG_ERROR("Illegal input\n"); + return NULL; + } + + // Create an empty matrix object + SUNMatrix A = SUNMatNewEmpty(sunctx); + if (!A) + { + SUNDIALS_DEBUG_ERROR("SUNMatNewEmpty returned NULL\n"); + return NULL; + } + + // Attach operations + A->ops->getid = SUNMatGetID_OneMklDense; + A->ops->clone = SUNMatClone_OneMklDense; + A->ops->destroy = SUNMatDestroy_OneMklDense; + A->ops->zero = SUNMatZero_OneMklDense; + A->ops->copy = SUNMatCopy_OneMklDense; + A->ops->scaleadd = SUNMatScaleAdd_OneMklDense; + A->ops->scaleaddi = SUNMatScaleAddI_OneMklDense; + A->ops->matvec = SUNMatMatvec_OneMklDense; + A->ops->space = SUNMatSpace_OneMklDense; + + // Create content + A->content = (SUNMatrixContent_OneMklDense) malloc(sizeof(_SUNMatrixContent_OneMklDense)); + if (!(A->content)) + { + SUNDIALS_DEBUG_ERROR("Content allocation failed\n"); + SUNMatDestroy(A); + return NULL; + } + + // Fill content + MAT_CONTENT(A)->block_rows = M; + MAT_CONTENT(A)->block_cols = N; + MAT_CONTENT(A)->rows = num_blocks * M; + MAT_CONTENT(A)->cols = num_blocks * N; + MAT_CONTENT(A)->num_blocks = num_blocks; + MAT_CONTENT(A)->ldata = M * N * num_blocks; + MAT_CONTENT(A)->data = NULL; + MAT_CONTENT(A)->blocks = NULL; + MAT_CONTENT(A)->mem_type = mem_type; + MAT_CONTENT(A)->mem_helper = mem_helper; + MAT_CONTENT(A)->exec_policy = new sundials::sycl::ThreadDirectExecPolicy(SYCL_BLOCKDIM(queue)); + MAT_CONTENT(A)->queue = queue; + + // Allocate data + retval = SUNMemoryHelper_Alloc(MAT_MEMHELPER(A), &(MAT_DATA(A)), + sizeof(realtype) * MAT_LDATA(A), mem_type, + queue); + if (retval) + { + SUNDIALS_DEBUG_ERROR("SUNMemory allocation failed\n"); + SUNMatDestroy(A); + return NULL; + } + + if (MAT_NBLOCKS(A) > 1) + { + // Allocate array of pointers to block data + retval = SUNMemoryHelper_Alloc(MAT_MEMHELPER(A), &(MAT_BLOCKS(A)), + sizeof(realtype*) * MAT_NBLOCKS(A), mem_type, + queue); + if (retval) + { + SUNDIALS_DEBUG_ERROR("SUNMemory allocation failed\n"); + SUNMatDestroy(A); + return NULL; + } + + size_t nthreads_total, nthreads_per_block; + + if (GetKernelParameters(A, SUNFALSE, nthreads_total, nthreads_per_block)) + { + SUNDIALS_DEBUG_ERROR("GetKernelParameters returned nonzero\n"); + SUNMatDestroy(A); + return NULL; + } + + realtype* Adata = MAT_DATAp(A); + realtype** Ablocks = MAT_BLOCKSp(A); + + // Initialize array of pointers to block data + SYCL_FOR(queue, nthreads_total, nthreads_per_block, item, + GRID_STRIDE_XLOOP(item, i, num_blocks) + { + Ablocks[i] = Adata + i * M * N; + }); + } + + return A; +} + + +/* -------------------------------------------------------------------------- + * Accessor functions + * -------------------------------------------------------------------------- */ + + +sunindextype SUNMatrix_OneMklDense_Rows(SUNMatrix A) +{ + if (SUNMatGetID(A) == SUNMATRIX_ONEMKLDENSE) + return MAT_ROWS(A); + else + return SUNMAT_ILL_INPUT; +} + + +sunindextype SUNMatrix_OneMklDense_Columns(SUNMatrix A) +{ + if (SUNMatGetID(A) == SUNMATRIX_ONEMKLDENSE) + return MAT_COLS(A); + else + return SUNMAT_ILL_INPUT; +} + + +sunindextype SUNMatrix_OneMklDense_NumBlocks(SUNMatrix A) +{ + if (SUNMatGetID(A) == SUNMATRIX_ONEMKLDENSE) + return MAT_NBLOCKS(A); + else + return SUNMAT_ILL_INPUT; +} + + +sunindextype SUNMatrix_OneMklDense_BlockRows(SUNMatrix A) +{ + if (SUNMatGetID(A) == SUNMATRIX_ONEMKLDENSE) + return MAT_BLOCK_ROWS(A); + else + return SUNMAT_ILL_INPUT; +} + + +sunindextype SUNMatrix_OneMklDense_BlockColumns(SUNMatrix A) +{ + if (SUNMatGetID(A) == SUNMATRIX_ONEMKLDENSE) + return MAT_BLOCK_COLS(A); + else + return SUNMAT_ILL_INPUT; +} + + +sunindextype SUNMatrix_OneMklDense_LData(SUNMatrix A) +{ + if (SUNMatGetID(A) == SUNMATRIX_ONEMKLDENSE) + return MAT_LDATA(A); + else + return SUNMAT_ILL_INPUT; +} + + +realtype* SUNMatrix_OneMklDense_Data(SUNMatrix A) +{ + if (SUNMatGetID(A) == SUNMATRIX_ONEMKLDENSE) + return MAT_DATAp(A); + else + return NULL; +} + + +sunindextype SUNMatrix_OneMklDense_BlockLData(SUNMatrix A) +{ + if (SUNMatGetID(A) == SUNMATRIX_ONEMKLDENSE) + return MAT_BLOCK_ROWS(A) * MAT_BLOCK_COLS(A); + else + return SUNMAT_ILL_INPUT; +} + + +realtype** SUNMatrix_OneMklDense_BlockData(SUNMatrix A) +{ + if (SUNMatGetID(A) == SUNMATRIX_ONEMKLDENSE) + return MAT_BLOCKSp(A); + else + return NULL; +} + + +/* Functions that return pointers to the start of a block, column, or block + column. These are defined as inline functions in sunmatrix_onemkldense.h, so + we just mark them as extern here. */ + +extern realtype* SUNMatrix_OneMklDense_Block(SUNMatrix A, sunindextype k); + +extern realtype* SUNMatrix_OneMklDense_Column(SUNMatrix A, sunindextype j); + +extern realtype* SUNMatrix_OneMklDense_BlockColumn(SUNMatrix A, sunindextype k, + sunindextype j); + + +/* -------------------------------------------------------------------------- + * Utility functions + * -------------------------------------------------------------------------- */ + + +int SUNMatrix_OneMklDense_CopyToDevice(SUNMatrix A, realtype* h_data) +{ + if (SUNMatGetID(A) != SUNMATRIX_ONEMKLDENSE) + { + SUNDIALS_DEBUG_ERROR("Illegal input\n"); + return SUNMAT_ILL_INPUT; + } + + // Wrap the input pointer + SUNMemory _h_data = SUNMemoryHelper_Wrap(h_data, SUNMEMTYPE_HOST); + if (!_h_data) + { + SUNDIALS_DEBUG_ERROR("SUNMemory wrap failed\n"); + return SUNMAT_ILL_INPUT; + } + + // Copy the data + int copy_fail = SUNMemoryHelper_CopyAsync(MAT_MEMHELPER(A), + MAT_DATA(A), + _h_data, + sizeof(realtype) * MAT_LDATA(A), + MAT_QUEUE(A)); + + // Sync with respect to host, but only this queue + MAT_QUEUE(A)->wait_and_throw(); + + int retval = SUNMemoryHelper_Dealloc(MAT_MEMHELPER(A), _h_data, MAT_QUEUE(A)); + if (retval) + { + SUNDIALS_DEBUG_ERROR("SUNMemory dealloc failed\n"); + return SUNMAT_MEM_FAIL; + } + + return (copy_fail ? SUNMAT_MEM_FAIL : SUNMAT_SUCCESS); +} + + +int SUNMatrix_OneMklDense_CopyFromDevice(SUNMatrix A, realtype* h_data) +{ + if (SUNMatGetID(A) != SUNMATRIX_ONEMKLDENSE) + { + SUNDIALS_DEBUG_ERROR("Illegal input\n"); + return SUNMAT_ILL_INPUT; + } + + SUNMemory _h_data = SUNMemoryHelper_Wrap(h_data, SUNMEMTYPE_HOST); + if (!_h_data) + { + SUNDIALS_DEBUG_ERROR("SUNMemory wrap failed\n"); + return SUNMAT_MEM_FAIL; + } + + int copy_fail = SUNMemoryHelper_CopyAsync(MAT_MEMHELPER(A), + _h_data, + MAT_DATA(A), + sizeof(realtype) * MAT_LDATA(A), + MAT_QUEUE(A)); + + // Sync with respect to host, but only this queue + MAT_QUEUE(A)->wait_and_throw(); + + int retval = SUNMemoryHelper_Dealloc(MAT_MEMHELPER(A), _h_data, MAT_QUEUE(A)); + if (retval) + { + SUNDIALS_DEBUG_ERROR("SUNMemory dealloc failed\n"); + return SUNMAT_MEM_FAIL; + } + + return (copy_fail ? SUNMAT_MEM_FAIL : SUNMAT_SUCCESS); +} + + +/* -------------------------------------------------------------------------- + * Implementation of generic SUNMatrix operations. + * -------------------------------------------------------------------------- */ + + +SUNMatrix SUNMatClone_OneMklDense(SUNMatrix A) +{ + if (!A) + { + SUNDIALS_DEBUG_ERROR("Input matrix is NULL\n"); + return NULL; + } + + if (SUNMatGetID(A) != SUNMATRIX_ONEMKLDENSE) + { + SUNDIALS_DEBUG_ERROR("Invalid matrix ID\n"); + return NULL; + } + + SUNMatrix B = SUNMatrix_OneMklDenseBlock(MAT_NBLOCKS(A), + MAT_BLOCK_ROWS(A), + MAT_BLOCK_COLS(A), + MAT_DATA(A)->type, + MAT_MEMHELPER(A), + MAT_QUEUE(A), + A->sunctx); + + if (!B) + { + SUNDIALS_DEBUG_ERROR("Output matrix is NULL\n"); + return NULL; + } + + return B; +} + + +void SUNMatDestroy_OneMklDense(SUNMatrix A) +{ + if (!A) + { + SUNDIALS_DEBUG_ERROR("Input matrix is NULL\n"); + return; + } + + if (SUNMatGetID(A) != SUNMATRIX_ONEMKLDENSE) + { + SUNDIALS_DEBUG_ERROR("Invalid matrix ID\n"); + return; + } + + // Free content + if (A->content) + { + // Free data array(s) + if (MAT_DATA(A)) SUNMemoryHelper_Dealloc(MAT_MEMHELPER(A), MAT_DATA(A), + MAT_QUEUE(A)); + if (MAT_BLOCKS(A)) SUNMemoryHelper_Dealloc(MAT_MEMHELPER(A), MAT_BLOCKS(A), + MAT_QUEUE(A)); + + // Free content struct + free(A->content); + A->content = NULL; + } + + // Free matrix + SUNMatFreeEmpty(A); + A = NULL; + + return; +} + + +int SUNMatZero_OneMklDense(SUNMatrix A) +{ + if (!A) + { + SUNDIALS_DEBUG_ERROR("Input matrix is NULL\n"); + return SUNMAT_ILL_INPUT; + } + + if (SUNMatGetID(A) != SUNMATRIX_ONEMKLDENSE) + { + SUNDIALS_DEBUG_ERROR("Invalid matrix ID\n"); + return SUNMAT_ILL_INPUT; + } + + const sunindextype ldata = MAT_LDATA(A); + realtype *Adata = MAT_DATAp(A); + sycl::queue *Q = MAT_QUEUE(A); + size_t nthreads_total, nthreads_per_block; + + if (GetKernelParameters(A, SUNFALSE, nthreads_total, nthreads_per_block)) + { + SUNDIALS_DEBUG_ERROR("GetKernelParameters returned nonzero\n"); + return SUNMAT_MEM_FAIL; + } + + // Zero out matrix + SYCL_FOR(Q, nthreads_total, nthreads_per_block, item, + GRID_STRIDE_XLOOP(item, i, ldata) + { + Adata[i] = ZERO; + }); + + return SUNMAT_SUCCESS; +} + + +int SUNMatCopy_OneMklDense(SUNMatrix A, SUNMatrix B) +{ + if (!A || !B) + { + SUNDIALS_DEBUG_ERROR("An input matrix is NULL\n"); + return SUNMAT_ILL_INPUT; + } + + if (SUNMatGetID(A) != SUNMATRIX_ONEMKLDENSE) + { + SUNDIALS_DEBUG_ERROR("Invalid matrix ID\n"); + return SUNMAT_ILL_INPUT; + } + + // Verify that A and B are compatible + if (!Compatible_AB(A, B)) + { + SUNDIALS_DEBUG_ERROR("Input matrices are incompatible\n"); + return SUNMAT_ILL_INPUT; + } + + const sunindextype ldata = MAT_LDATA(A); + realtype *Adata = MAT_DATAp(A); + realtype *Bdata = MAT_DATAp(B); + sycl::queue *Q = MAT_QUEUE(A); + size_t nthreads_total, nthreads_per_block; + + if (GetKernelParameters(A, SUNFALSE, nthreads_total, nthreads_per_block)) + { + SUNDIALS_DEBUG_ERROR("GetKernelParameters returned nonzero\n"); + return SUNMAT_MEM_FAIL; + } + + // Copy A into B + SYCL_FOR(Q, nthreads_total, nthreads_per_block, item, + GRID_STRIDE_XLOOP(item, i, ldata) + { + Bdata[i] = Adata[i]; + }); + + return SUNMAT_SUCCESS; +} + + +int SUNMatScaleAddI_OneMklDense(realtype c, SUNMatrix A) +{ + if (!A) + { + SUNDIALS_DEBUG_ERROR("Input matrix is NULL\n"); + return SUNMAT_ILL_INPUT; + } + + if (SUNMatGetID(A) != SUNMATRIX_ONEMKLDENSE) + { + SUNDIALS_DEBUG_ERROR("Invalid matrix ID\n"); + return SUNMAT_ILL_INPUT; + } + + const size_t M = static_cast(MAT_BLOCK_ROWS(A)); + const size_t N = static_cast(MAT_BLOCK_COLS(A)); + const size_t B = static_cast(MAT_NBLOCKS(A)); + realtype* Adata = MAT_DATAp(A); + sycl::queue* Q = MAT_QUEUE(A); + + // Compute A = c * A + I + Q->submit([&](sycl::handler& h) + { + h.parallel_for(sycl::range{M, N, B}, [=](sycl::id<3> idx) + { + sunindextype i = idx[0]; + sunindextype j = idx[1]; + sunindextype k = idx[2]; + + // Index into 1D data array + sunindextype tid = k * M * N + j * M + i; + + if (i == j) + { + Adata[tid] = c * Adata[tid] + ONE; + } + else + { + Adata[tid] = c * Adata[tid]; + } + }); + }); + + return SUNMAT_SUCCESS; +} + + +int SUNMatScaleAdd_OneMklDense(realtype c, SUNMatrix A, SUNMatrix B) +{ + if (!A || !B) + { + SUNDIALS_DEBUG_ERROR("An input matrix is NULL\n"); + return SUNMAT_ILL_INPUT; + } + + if (!Compatible_AB(A, B)) + { + SUNDIALS_DEBUG_ERROR("Input matrices are incompatible\n"); + return SUNMAT_ILL_INPUT; + } + + const sunindextype ldata = MAT_LDATA(A); + realtype *Adata = MAT_DATAp(A); + realtype *Bdata = MAT_DATAp(B); + sycl::queue *Q = MAT_QUEUE(A); + size_t nthreads_total, nthreads_per_block; + + if (GetKernelParameters(A, SUNFALSE, nthreads_total, nthreads_per_block)) + { + SUNDIALS_DEBUG_ERROR("GetKernelParameters returned nonzero\n"); + return SUNMAT_MEM_FAIL; + } + + // Compute A = c * A + B + SYCL_FOR(Q, nthreads_total, nthreads_per_block, item, + GRID_STRIDE_XLOOP(item, i, ldata) + { + Adata[i] = c * Adata[i] + Bdata[i]; + }); + + return SUNMAT_SUCCESS; +} + + +int SUNMatMatvec_OneMklDense(SUNMatrix A, N_Vector x, N_Vector y) +{ + if (!A || !x || !y) + { + SUNDIALS_DEBUG_ERROR("Input matrix or vectors are NULL\n"); + return SUNMAT_ILL_INPUT; + } + + if (!Compatible_Axy(A, x, y)) + { + SUNDIALS_DEBUG_ERROR("Input matrix and vectors are incompatible\n"); + return SUNMAT_ILL_INPUT; + } + + if (MAT_NBLOCKS(A) > 1) + { + sycl::queue* Q = MAT_QUEUE(A); + sunindextype M = MAT_BLOCK_ROWS(A); + sunindextype N = MAT_BLOCK_COLS(A); + + // TODO(DJG): Replace with batched function + for (sunindextype i = 0; i < MAT_NBLOCKS(A); i++) + { + const realtype* Adata = MAT_DATAp(A) + i * M * N; + const realtype* xdata = N_VGetDeviceArrayPointer(x) + i * N; + realtype* ydata = N_VGetDeviceArrayPointer(y) + i * M; + + // Copmute y = a * A * x + b * y + oneapi::mkl::blas::gemv(*Q, oneapi::mkl::transpose::N, M, N, ONE, Adata, + M, xdata, 1, ZERO, ydata, 1); + } + } + else + { + sycl::queue* Q = MAT_QUEUE(A); + sunindextype M = MAT_ROWS(A); + sunindextype N = MAT_COLS(A); + const realtype* Adata = MAT_DATAp(A); + const realtype* xdata = N_VGetDeviceArrayPointer(x); + realtype* ydata = N_VGetDeviceArrayPointer(y); + + // Copmute y = a * A * x + b * y + oneapi::mkl::blas::gemv(*Q, oneapi::mkl::transpose::N, M, N, ONE, Adata, M, + xdata, 1, ZERO, ydata, 1); + } + + return SUNMAT_SUCCESS; +} + + +int SUNMatSpace_OneMklDense(SUNMatrix A, long int *lenrw, long int *leniw) +{ + if (!A) + { + SUNDIALS_DEBUG_ERROR("Input matrix is NULL\n"); + return SUNMAT_ILL_INPUT; + } + + if (SUNMatGetID(A) != SUNMATRIX_ONEMKLDENSE) + { + SUNDIALS_DEBUG_ERROR("Invalid matrix ID\n"); + return SUNMAT_ILL_INPUT; + } + + *lenrw = MAT_LDATA(A); + *leniw = 4; + + return SUNMAT_SUCCESS; +} + + +/* -------------------------------------------------------------------------- + * Private functions + * -------------------------------------------------------------------------- */ + + +// Get the kernel launch parameters +static int GetKernelParameters(SUNMatrix A, booleantype reduction, + size_t& nthreads_total, + size_t& nthreads_per_block) +{ + if (!MAT_EXECPOLICY(A)) + { + SUNDIALS_DEBUG_ERROR("The execution policy is NULL\n"); + return -1; + } + + /* Get the number of threads per block and total number threads */ + nthreads_per_block = MAT_EXECPOLICY(A)->blockSize(); + nthreads_total = nthreads_per_block * + MAT_EXECPOLICY(A)->gridSize(MAT_LDATA(A)); + + if (nthreads_per_block == 0) + { + SUNDIALS_DEBUG_ERROR("The number of threads per block must be > 0\n"); + return -1; + } + + if (nthreads_total == 0) + { + SUNDIALS_DEBUG_ERROR("The total number of threads must be > 0\n"); + return -1; + } + + return 0; +} + + +static booleantype Compatible_AB(SUNMatrix A, SUNMatrix B) +{ + // Both matrices must have the SUNMATRIEX_MKLDENSE ID + if (SUNMatGetID(A) != SUNMATRIX_ONEMKLDENSE) + { + SUNDIALS_DEBUG_ERROR("Illegal matrix ID\n"); + return SUNFALSE; + } + + if (SUNMatGetID(B) != SUNMATRIX_ONEMKLDENSE) + { + SUNDIALS_DEBUG_ERROR("Illegal matrix ID\n"); + return SUNFALSE; + } + + // Both matrices must have the same shape + if (MAT_BLOCK_ROWS(A) != MAT_BLOCK_ROWS(B)) + { + SUNDIALS_DEBUG_ERROR("Number of block rows do not match\n"); + return SUNFALSE; + } + + if (MAT_BLOCK_COLS(A) != MAT_BLOCK_COLS(B)) + { + SUNDIALS_DEBUG_ERROR("Number of block columns do not match\n"); + return SUNFALSE; + } + + if (MAT_NBLOCKS(A) != MAT_NBLOCKS(B)) + { + SUNDIALS_DEBUG_ERROR("Number of blocks do not match\n"); + return SUNFALSE; + } + + return SUNTRUE; +} + + +static booleantype Compatible_Axy(SUNMatrix A, N_Vector x, N_Vector y) +{ + // Vectors must implement N_VGetDeviceArrayPointer + if (!(x->ops->nvgetdevicearraypointer) || !(y->ops->nvgetdevicearraypointer)) + { + SUNDIALS_DEBUG_ERROR("Vectors do not have GetDeviceArrayPointer\n"); + return SUNFALSE; + } + + // Inner dimensions must agree + if (MAT_COLS(A) != N_VGetLength(x)) + { + SUNDIALS_DEBUG_ERROR("Number of columns != input vectors length\n"); + return SUNFALSE; + } + + // Outer dimensions must agree + if (MAT_ROWS(A) != N_VGetLength(y)) + { + SUNDIALS_DEBUG_ERROR("Number of rows != output vector length\n"); + return SUNFALSE; + } + + return SUNTRUE; +} diff --git a/src/lib/sunmatrix/sparse/fsunmatrix_sparse.c b/src/lib/sunmatrix/sparse/fsunmatrix_sparse.c deleted file mode 100644 index 1746e6e..0000000 --- a/src/lib/sunmatrix/sparse/fsunmatrix_sparse.c +++ /dev/null @@ -1,94 +0,0 @@ -/* - * ----------------------------------------------------------------- - * Programmer(s): Daniel Reynolds @ SMU - * ----------------------------------------------------------------- - * SUNDIALS Copyright Start - * Copyright (c) 2002-2019, Lawrence Livermore National Security - * and Southern Methodist University. - * All rights reserved. - * - * See the top-level LICENSE and NOTICE files for details. - * - * SPDX-License-Identifier: BSD-3-Clause - * SUNDIALS Copyright End - * ----------------------------------------------------------------- - * This file (companion of fsunmatrix_sparse.h) contains the - * implementation needed for the Fortran initialization of sparse - * vector operations. - * ----------------------------------------------------------------- - */ - -#include -#include - -#include "fsunmatrix_sparse.h" - -/* Define global matrix variables */ - -extern SUNMatrix F2C_CVODE_matrix; -extern SUNMatrix F2C_IDA_matrix; -extern SUNMatrix F2C_KINSOL_matrix; -extern SUNMatrix F2C_ARKODE_matrix; -extern SUNMatrix F2C_ARKODE_mass_matrix; - -/* Fortran callable interfaces */ - -void FSUNSPARSEMAT_INIT(int *code, long int *M, long int *N, - long int *NNZ, int *sparsetype, int *ier) -{ - *ier = 0; - - switch(*code) { - case FCMIX_CVODE: - if (F2C_CVODE_matrix) SUNMatDestroy(F2C_CVODE_matrix); - F2C_CVODE_matrix = NULL; - F2C_CVODE_matrix = SUNSparseMatrix((sunindextype)(*M), - (sunindextype)(*N), - (sunindextype)(*NNZ), - *sparsetype); - if (F2C_CVODE_matrix == NULL) *ier = -1; - break; - case FCMIX_IDA: - if (F2C_IDA_matrix) SUNMatDestroy(F2C_IDA_matrix); - F2C_IDA_matrix = NULL; - F2C_IDA_matrix = SUNSparseMatrix((sunindextype)(*M), - (sunindextype)(*N), - (sunindextype)(*NNZ), - *sparsetype); - if (F2C_IDA_matrix == NULL) *ier = -1; - break; - case FCMIX_KINSOL: - if (F2C_KINSOL_matrix) SUNMatDestroy(F2C_KINSOL_matrix); - F2C_KINSOL_matrix = NULL; - F2C_KINSOL_matrix = SUNSparseMatrix((sunindextype)(*M), - (sunindextype)(*N), - (sunindextype)(*NNZ), - *sparsetype); - if (F2C_KINSOL_matrix == NULL) *ier = -1; - break; - case FCMIX_ARKODE: - if (F2C_ARKODE_matrix) SUNMatDestroy(F2C_ARKODE_matrix); - F2C_ARKODE_matrix = NULL; - F2C_ARKODE_matrix = SUNSparseMatrix((sunindextype)(*M), - (sunindextype)(*N), - (sunindextype)(*NNZ), - *sparsetype); - if (F2C_ARKODE_matrix == NULL) *ier = -1; - break; - default: - *ier = -1; - } -} - -void FSUNSPARSEMASSMAT_INIT(long int *M, long int *N, long int *NNZ, - int *sparsetype, int *ier) -{ - *ier = 0; - if (F2C_ARKODE_mass_matrix) SUNMatDestroy(F2C_ARKODE_mass_matrix); - F2C_ARKODE_mass_matrix = NULL; - F2C_ARKODE_mass_matrix = SUNSparseMatrix((sunindextype)(*M), - (sunindextype)(*N), - (sunindextype)(*NNZ), - *sparsetype); - if (F2C_ARKODE_mass_matrix == NULL) *ier = -1; -} diff --git a/src/lib/sunmatrix/sparse/fsunmatrix_sparse.h b/src/lib/sunmatrix/sparse/fsunmatrix_sparse.h deleted file mode 100644 index 1fbcde6..0000000 --- a/src/lib/sunmatrix/sparse/fsunmatrix_sparse.h +++ /dev/null @@ -1,65 +0,0 @@ -/* - * ----------------------------------------------------------------- - * Programmer(s): Daniel Reynolds @ SMU - * ----------------------------------------------------------------- - * SUNDIALS Copyright Start - * Copyright (c) 2002-2019, Lawrence Livermore National Security - * and Southern Methodist University. - * All rights reserved. - * - * See the top-level LICENSE and NOTICE files for details. - * - * SPDX-License-Identifier: BSD-3-Clause - * SUNDIALS Copyright End - * ----------------------------------------------------------------- - * This file (companion of fsunmatrix_sparse.c) contains the - * definitions needed for the initialization of sparse - * matrix operations in Fortran. - * ----------------------------------------------------------------- - */ - -#ifndef _FSUNMATRIX_SPARSE_H -#define _FSUNMATRIX_SPARSE_H - -#include -#include - -#ifdef __cplusplus /* wrapper to enable C++ usage */ -extern "C" { -#endif - -#if defined(SUNDIALS_F77_FUNC) -#define FSUNSPARSEMAT_INIT SUNDIALS_F77_FUNC(fsunsparsematinit, FSUNSPARSEMATINIT) -#define FSUNSPARSEMASSMAT_INIT SUNDIALS_F77_FUNC(fsunsparsemassmatinit, FSUNSPARSEMASSMATINIT) -#else -#define FSUNSPARSEMAT_INIT fsunsparsematinit_ -#define FSUNSPARSEMASSMAT_INIT fsunsparsemassmatinit_ -#endif - - -/* Declarations of global variables */ - -extern SUNMatrix F2C_CVODE_matrix; -extern SUNMatrix F2C_IDA_matrix; -extern SUNMatrix F2C_KINSOL_matrix; -extern SUNMatrix F2C_ARKODE_matrix; -extern SUNMatrix F2C_ARKODE_mass_matrix; - -/* - * Prototypes of exported functions - * - * FSUNSPARSEMAT_INIT - initializes sparse matrix operations for main problem - * FSUNSPARSEMASSMAT_INIT - initializes sparse matrix operations for mass matrix solve - */ - -void FSUNSPARSEMAT_INIT(int *code, long int *M, long int *N, - long int *NNZ, int *sparsetype, int *ier); - -void FSUNSPARSEMASSMAT_INIT(long int *M, long int *N, - long int *NNZ, int *sparsetype, int *ier); - -#ifdef __cplusplus -} -#endif - -#endif diff --git a/src/lib/sunmatrix/sparse/sunmatrix_sparse.c b/src/lib/sunmatrix/sparse/sunmatrix_sparse.c index 1a0d62d..1d1a8aa 100644 --- a/src/lib/sunmatrix/sparse/sunmatrix_sparse.c +++ b/src/lib/sunmatrix/sparse/sunmatrix_sparse.c @@ -6,7 +6,7 @@ * Slaven Peles @ LLNL, and Daniel R. Reynolds @ SMU * ----------------------------------------------------------------- * SUNDIALS Copyright Start - * Copyright (c) 2002-2019, Lawrence Livermore National Security + * Copyright (c) 2002-2022, Lawrence Livermore National Security * and Southern Methodist University. * All rights reserved. * @@ -33,8 +33,9 @@ /* Private function prototypes */ static booleantype SMCompatible_Sparse(SUNMatrix A, SUNMatrix B); static booleantype SMCompatible2_Sparse(SUNMatrix A, N_Vector x, N_Vector y); -int Matvec_SparseCSC(SUNMatrix A, N_Vector x, N_Vector y); -int Matvec_SparseCSR(SUNMatrix A, N_Vector x, N_Vector y); +static int Matvec_SparseCSC(SUNMatrix A, N_Vector x, N_Vector y); +static int Matvec_SparseCSR(SUNMatrix A, N_Vector x, N_Vector y); +static int format_convert(const SUNMatrix A, SUNMatrix B); /* * ----------------------------------------------------------------- @@ -53,7 +54,8 @@ int Matvec_SparseCSR(SUNMatrix A, N_Vector x, N_Vector y); */ SUNMatrix SUNSparseMatrix(sunindextype M, sunindextype N, - sunindextype NNZ, int sparsetype) + sunindextype NNZ, int sparsetype, + SUNContext sunctx) { SUNMatrix A; SUNMatrixContent_Sparse content; @@ -64,7 +66,7 @@ SUNMatrix SUNSparseMatrix(sunindextype M, sunindextype N, /* Create an empty matrix object */ A = NULL; - A = SUNMatNewEmpty(); + A = SUNMatNewEmpty(sunctx); if (A == NULL) return(NULL); /* Attach operations */ @@ -128,13 +130,15 @@ SUNMatrix SUNSparseMatrix(sunindextype M, sunindextype N, + /* ---------------------------------------------------------------------------- * Function to create a new sparse matrix from an existing dense matrix * by copying all nonzero values into the sparse matrix structure. Returns NULL * if the request for matrix storage cannot be satisfied. */ -SUNMatrix SUNSparseFromDenseMatrix(SUNMatrix Ad, realtype droptol, int sparsetype) +SUNMatrix SUNSparseFromDenseMatrix(SUNMatrix Ad, realtype droptol, + int sparsetype) { sunindextype i, j, nnz; sunindextype M, N; @@ -159,7 +163,8 @@ SUNMatrix SUNSparseFromDenseMatrix(SUNMatrix Ad, realtype droptol, int sparsetyp nnz += (SUNRabs(SM_ELEMENT_D(Ad,i,j)) > droptol); /* allocate sparse matrix */ - As = SUNSparseMatrix(M, N, nnz, sparsetype); + As = NULL; + As = SUNSparseMatrix(M, N, nnz, sparsetype, Ad->sunctx); if (As == NULL) return NULL; /* copy nonzeros from Ad into As, based on CSR/CSC type */ @@ -223,7 +228,7 @@ SUNMatrix SUNSparseFromBandMatrix(SUNMatrix Ad, realtype droptol, int sparsetype nnz += (SUNRabs(SM_ELEMENT_B(Ad,i,j)) > droptol); /* allocate sparse matrix */ - As = SUNSparseMatrix(M, N, nnz, sparsetype); + As = SUNSparseMatrix(M, N, nnz, sparsetype, Ad->sunctx); if (As == NULL) return NULL; /* copy nonzeros from Ad into As, based on CSR/CSC type */ @@ -256,6 +261,36 @@ SUNMatrix SUNSparseFromBandMatrix(SUNMatrix Ad, realtype droptol, int sparsetype } +/* ---------------------------------------------------------------------------- + * Function to create a new CSR matrix from a CSC matrix. + */ +int SUNSparseMatrix_ToCSR(const SUNMatrix A, SUNMatrix* Bout) +{ + if (A == NULL) return(SUNMAT_ILL_INPUT); + if (SM_SPARSETYPE_S(A) != CSC_MAT) return(SUNMAT_ILL_INPUT); + + *Bout = SUNSparseMatrix(SM_ROWS_S(A), SM_COLUMNS_S(A), SM_NNZ_S(A), CSR_MAT, A->sunctx); + if (*Bout == NULL) return(SUNMAT_MEM_FAIL); + + return format_convert(A, *Bout); +} + + +/* ---------------------------------------------------------------------------- + * Function to create a new CSC matrix from a CSR matrix. + */ +int SUNSparseMatrix_ToCSC(const SUNMatrix A, SUNMatrix* Bout) +{ + if (A == NULL) return(SUNMAT_ILL_INPUT); + if (SM_SPARSETYPE_S(A) != CSR_MAT) return(SUNMAT_ILL_INPUT); + + *Bout = SUNSparseMatrix(SM_ROWS_S(A), SM_COLUMNS_S(A), SM_NNZ_S(A), CSC_MAT, A->sunctx); + if (*Bout == NULL) return(SUNMAT_MEM_FAIL); + + return format_convert(A, *Bout); +} + + /* ---------------------------------------------------------------------------- * Function to reallocate internal sparse matrix storage arrays so that the * resulting sparse matrix holds indexptrs[NP] nonzeros. Returns 0 on success @@ -441,7 +476,7 @@ SUNMatrix_ID SUNMatGetID_Sparse(SUNMatrix A) SUNMatrix SUNMatClone_Sparse(SUNMatrix A) { SUNMatrix B = SUNSparseMatrix(SM_ROWS_S(A), SM_COLUMNS_S(A), - SM_NNZ_S(A), SM_SPARSETYPE_S(A)); + SM_NNZ_S(A), SM_SPARSETYPE_S(A), A->sunctx); return(B); } @@ -668,7 +703,7 @@ int SUNMatScaleAddI_Sparse(realtype c, SUNMatrix A) /* create new matrix for sum */ C = SUNSparseMatrix(SM_ROWS_S(A), SM_COLUMNS_S(A), Ap[N] + newvals, - SM_SPARSETYPE_S(A)); + SM_SPARSETYPE_S(A), A->sunctx); /* access data from CSR structures (return if failure) */ Cp = Ci = NULL; @@ -892,7 +927,7 @@ int SUNMatScaleAdd_Sparse(realtype c, SUNMatrix A, SUNMatrix B) /* create new matrix for sum */ C = SUNSparseMatrix(SM_ROWS_S(A), SM_COLUMNS_S(A), - Ap[N] + newvals, SM_SPARSETYPE_S(A)); + Ap[N] + newvals, SM_SPARSETYPE_S(A), A->sunctx); /* access data from CSR structures (return if failure) */ Cp = Ci = NULL; @@ -1130,3 +1165,72 @@ int Matvec_SparseCSR(SUNMatrix A, N_Vector x, N_Vector y) } +/* ----------------------------------------------------------------- + * Copies A into a matrix B in the opposite format of A. + * Returns 0 if successful, nonzero if unsuccessful. + */ +int format_convert(const SUNMatrix A, SUNMatrix B) +{ + realtype *Ax, *Bx; + sunindextype *Ap, *Aj; + sunindextype *Bp, *Bi; + sunindextype n_row, n_col, nnz; + sunindextype n, col, csum, row, last; + + if (SM_SPARSETYPE_S(A) == SM_SPARSETYPE_S(B)) + return SUNMatCopy_Sparse(A, B); + + Ap = SM_INDEXPTRS_S(A); + Aj = SM_INDEXVALS_S(A); + Ax = SM_DATA_S(A); + + n_row = (SM_SPARSETYPE_S(A) == CSR_MAT) ? SM_ROWS_S(A) : SM_COLUMNS_S(A); + n_col = (SM_SPARSETYPE_S(A) == CSR_MAT) ? SM_COLUMNS_S(A) : SM_ROWS_S(A); + + Bp = SM_INDEXPTRS_S(B); + Bi = SM_INDEXVALS_S(B); + Bx = SM_DATA_S(B); + + nnz = Ap[n_row]; + + SUNMatZero_Sparse(B); + + /* compute number of non-zero entries per column (if CSR) or per row (if CSC) of A */ + for (n = 0; n < nnz; n++) + { + Bp[Aj[n]]++; + } + + /* cumualtive sum the nnz per column to get Bp[] */ + for (col = 0, csum = 0; col < n_col; col++) + { + sunindextype temp = Bp[col]; + Bp[col] = csum; + csum += temp; + } + Bp[n_col] = nnz; + + for (row = 0; row < n_row; row++) + { + sunindextype jj; + for (jj = Ap[row]; jj < Ap[row+1]; jj++) + { + sunindextype col = Aj[jj]; + sunindextype dest = Bp[col]; + + Bi[dest] = row; + Bx[dest] = Ax[jj]; + + Bp[col]++; + } + } + + for (col = 0, last = 0; col <= n_col; col++) + { + sunindextype temp = Bp[col]; + Bp[col] = last; + last = temp; + } + + return 0; +} diff --git a/src/lib/sunnonlinsol/fixedpoint/sunnonlinsol_fixedpoint.c b/src/lib/sunnonlinsol/fixedpoint/sunnonlinsol_fixedpoint.c new file mode 100644 index 0000000..e79ee0c --- /dev/null +++ b/src/lib/sunnonlinsol/fixedpoint/sunnonlinsol_fixedpoint.c @@ -0,0 +1,769 @@ +/* ----------------------------------------------------------------------------- + * Programmer(s): Daniel R. Reynolds @ SMU + * ----------------------------------------------------------------------------- + * SUNDIALS Copyright Start + * Copyright (c) 2002-2022, Lawrence Livermore National Security + * and Southern Methodist University. + * All rights reserved. + * + * See the top-level LICENSE and NOTICE files for details. + * + * SPDX-License-Identifier: BSD-3-Clause + * SUNDIALS Copyright End + * ----------------------------------------------------------------------------- + * This is the implementation file for the SUNNonlinearSolver module + * implementation of the Anderson-accelerated Fixed-Point method. + * ---------------------------------------------------------------------------*/ + +#include +#include +#include + +#include +#include +#include + +#include "sundials_context_impl.h" +#include "sundials_logger_impl.h" + +/* Internal utility routines */ +static int AndersonAccelerate(SUNNonlinearSolver NLS, N_Vector gval, N_Vector x, + N_Vector xold, int iter); + +static int AllocateContent(SUNNonlinearSolver NLS, N_Vector tmpl); +static void FreeContent(SUNNonlinearSolver NLS); + +/* Content structure accessibility macros */ +#define FP_CONTENT(S) ( (SUNNonlinearSolverContent_FixedPoint)(S->content) ) + +/* Constant macros */ +#define ONE RCONST(1.0) +#define ZERO RCONST(0.0) + +/*============================================================================== + Constructor to create a new fixed point solver + ============================================================================*/ + +SUNNonlinearSolver SUNNonlinSol_FixedPoint(N_Vector y, int m, SUNContext sunctx) +{ + SUNNonlinearSolver NLS; + SUNNonlinearSolverContent_FixedPoint content; + int retval; + + /* Check that the supplied N_Vector is non-NULL */ + if (y == NULL) return(NULL); + + /* Check that the supplied N_Vector supports all required operations */ + if ( (y->ops->nvclone == NULL) || + (y->ops->nvdestroy == NULL) || + (y->ops->nvscale == NULL) || + (y->ops->nvlinearsum == NULL) || + (y->ops->nvdotprod == NULL) ) + return(NULL); + + /* Create nonlinear linear solver */ + NLS = SUNNonlinSolNewEmpty(sunctx); + if (NLS == NULL) return(NULL); + + /* Attach operations */ + NLS->ops->gettype = SUNNonlinSolGetType_FixedPoint; + NLS->ops->initialize = SUNNonlinSolInitialize_FixedPoint; + NLS->ops->solve = SUNNonlinSolSolve_FixedPoint; + NLS->ops->free = SUNNonlinSolFree_FixedPoint; + NLS->ops->setsysfn = SUNNonlinSolSetSysFn_FixedPoint; + NLS->ops->setctestfn = SUNNonlinSolSetConvTestFn_FixedPoint; + NLS->ops->setmaxiters = SUNNonlinSolSetMaxIters_FixedPoint; + NLS->ops->getnumiters = SUNNonlinSolGetNumIters_FixedPoint; + NLS->ops->getcuriter = SUNNonlinSolGetCurIter_FixedPoint; + NLS->ops->getnumconvfails = SUNNonlinSolGetNumConvFails_FixedPoint; + + /* Create nonlinear solver content structure */ + content = NULL; + content = (SUNNonlinearSolverContent_FixedPoint) malloc(sizeof *content); + if (content == NULL) { SUNNonlinSolFree(NLS); return(NULL); } + + /* Initialize all components of content to 0/NULL */ + memset(content, 0, sizeof(struct _SUNNonlinearSolverContent_FixedPoint)); + + /* Attach content */ + NLS->content = content; + + /* Fill general content */ + content->Sys = NULL; + content->CTest = NULL; + content->m = m; + content->damping = SUNFALSE; + content->beta = ONE; + content->curiter = 0; + content->maxiters = 3; + content->niters = 0; + content->nconvfails = 0; + content->ctest_data = NULL; + content->print_level = 0; + content->info_file = stdout; +#if SUNDIALS_LOGGING_LEVEL >= SUNDIALS_LOGGING_INFO + content->info_file = (sunctx->logger->info_fp) ? sunctx->logger->info_fp : stdout; +#endif + + /* Fill allocatable content */ + retval = AllocateContent(NLS, y); + if (retval != SUN_NLS_SUCCESS) { SUNNonlinSolFree(NLS); return(NULL); } + + return(NLS); +} + + +/*============================================================================== + Constructor wrapper to create a new fixed point solver for sensitivity solvers + ============================================================================*/ + +SUNNonlinearSolver SUNNonlinSol_FixedPointSens(int count, N_Vector y, int m, + SUNContext sunctx) +{ + SUNNonlinearSolver NLS; + N_Vector w; + + /* create sensitivity vector wrapper */ + w = N_VNew_SensWrapper(count, y); + + /* create nonlinear solver using sensitivity vector wrapper */ + NLS = SUNNonlinSol_FixedPoint(w, m, sunctx); + + /* free sensitivity vector wrapper */ + N_VDestroy(w); + + /* return NLS object */ + return(NLS); +} + + +/*============================================================================== + GetType, Initialize, Setup, Solve, and Free operations + ============================================================================*/ + +SUNNonlinearSolver_Type SUNNonlinSolGetType_FixedPoint(SUNNonlinearSolver NLS) +{ + return(SUNNONLINEARSOLVER_FIXEDPOINT); +} + + +int SUNNonlinSolInitialize_FixedPoint(SUNNonlinearSolver NLS) +{ + /* check that the nonlinear solver is non-null */ + if (NLS == NULL) return(SUN_NLS_MEM_NULL); + + /* check that all required function pointers have been set */ + if ( (FP_CONTENT(NLS)->Sys == NULL) || (FP_CONTENT(NLS)->CTest == NULL) ) + return(SUN_NLS_MEM_NULL); + + /* reset the total number of iterations and convergence failures */ + FP_CONTENT(NLS)->niters = 0; + FP_CONTENT(NLS)->nconvfails = 0; + + return(SUN_NLS_SUCCESS); +} + + +/*----------------------------------------------------------------------------- + SUNNonlinSolSolve_FixedPoint: Performs the fixed-point solve g(y) = y + + Successful solve return code: + SUN_NLS_SUCCESS = 0 + + Recoverable failure return codes (positive): + SUN_NLS_CONV_RECVR + *_RHSFUNC_RECVR (ODEs) or *_RES_RECVR (DAEs) + + Unrecoverable failure return codes (negative): + *_MEM_NULL + *_RHSFUNC_FAIL (ODEs) or *_RES_FAIL (DAEs) + + Note that return values beginning with * are package specific values returned + by the Sys function provided to the nonlinear solver. + ---------------------------------------------------------------------------*/ +int SUNNonlinSolSolve_FixedPoint(SUNNonlinearSolver NLS, N_Vector y0, + N_Vector ycor, N_Vector w, realtype tol, + booleantype callSetup, void* mem) +{ + /* local variables */ + int retval; + N_Vector yprev, gy, delta; + + /* check that the inputs are non-null */ + if ( (NLS == NULL) || + (y0 == NULL) || + (ycor == NULL) || + (w == NULL) || + (mem == NULL) ) + return(SUN_NLS_MEM_NULL); + + /* check that all required function pointers have been set */ + if ( (FP_CONTENT(NLS)->Sys == NULL) || (FP_CONTENT(NLS)->CTest == NULL) ) + return(SUN_NLS_MEM_NULL); + + /* check that all required function pointers have been set */ + if ( (FP_CONTENT(NLS)->Sys == NULL) || (FP_CONTENT(NLS)->CTest == NULL) ) + return(SUN_NLS_MEM_NULL); + + /* set local shortcut variables */ + yprev = FP_CONTENT(NLS)->yprev; + gy = FP_CONTENT(NLS)->gy; + delta = FP_CONTENT(NLS)->delta; + + /* initialize iteration and convergence fail counters for this solve */ + FP_CONTENT(NLS)->niters = 0; + FP_CONTENT(NLS)->nconvfails = 0; + +#if SUNDIALS_LOGGING_LEVEL >= SUNDIALS_LOGGING_INFO + /* print current iteration number and the nonlinear residual */ + if (FP_CONTENT(NLS)->print_level && FP_CONTENT(NLS)->info_file + && (FP_CONTENT(NLS)->info_file != NLS->sunctx->logger->info_fp)) + { + fprintf(FP_CONTENT(NLS)->info_file, + "SUNNONLINSOL_FIXEDPOINT (nni=%ld):\n", + (long int) FP_CONTENT(NLS)->niters); + } + SUNLogger_QueueMsg(NLS->sunctx->logger, SUN_LOGLEVEL_INFO, + "SUNNonlinSolSolve_FixedPoint", "begin-iteration", + "iter = %ld, nni = %ld", (long int) 0, FP_CONTENT(NLS)->niters); +#endif + + /* Looping point for attempts at solution of the nonlinear system: + Evaluate fixed-point function (store in gy). + Performs the accelerated fixed-point iteration. + Performs stopping tests. */ + for( FP_CONTENT(NLS)->curiter = 0; + FP_CONTENT(NLS)->curiter < FP_CONTENT(NLS)->maxiters; + FP_CONTENT(NLS)->curiter++ ) { + + /* update previous solution guess */ + N_VScale(ONE, ycor, yprev); + + /* compute fixed-point iteration function, store in gy */ + retval = FP_CONTENT(NLS)->Sys(ycor, gy, mem); + if (retval != SUN_NLS_SUCCESS) break; + + /* perform fixed point update, based on choice of acceleration or not */ + if (FP_CONTENT(NLS)->m == 0) { /* basic fixed-point solver */ + N_VScale(ONE, gy, ycor); + } else { /* Anderson-accelerated solver */ + retval = AndersonAccelerate(NLS, gy, ycor, yprev, FP_CONTENT(NLS)->curiter); + } + + /* increment nonlinear solver iteration counter */ + FP_CONTENT(NLS)->niters++; + + /* compute change in solution, and call the convergence test function */ + N_VLinearSum(ONE, ycor, -ONE, yprev, delta); + + /* test for convergence */ + retval = FP_CONTENT(NLS)->CTest(NLS, ycor, delta, tol, w, + FP_CONTENT(NLS)->ctest_data); + +#if SUNDIALS_LOGGING_LEVEL >= SUNDIALS_LOGGING_INFO + /* print current iteration number and the nonlinear residual */ + if (FP_CONTENT(NLS)->print_level && FP_CONTENT(NLS)->info_file + && (FP_CONTENT(NLS)->info_file != NLS->sunctx->logger->info_fp)) + { + fprintf(FP_CONTENT(NLS)->info_file, + "SUNNONLINSOL_FIXEDPOINT (nni=%ld):\n", + (long int) FP_CONTENT(NLS)->niters); + } + SUNLogger_QueueMsg(NLS->sunctx->logger, SUN_LOGLEVEL_INFO, + "SUNNonlinSolSolve_FixedPoint", "end-of-iterate", + "iter = %ld, nni = %ld, wrmsnorm = %.16g", (long int) FP_CONTENT(NLS)->curiter, + FP_CONTENT(NLS)->niters, N_VWrmsNorm(delta, w)); +#endif + + /* return if successful */ + if (retval == SUN_NLS_SUCCESS) return(SUN_NLS_SUCCESS); + + /* check if the iterations should continue; otherwise increment the + convergence failure count and return error flag */ + if (retval != SUN_NLS_CONTINUE) { + FP_CONTENT(NLS)->nconvfails++; + return(retval); + } + + } + + /* if we've reached this point, then we exhausted the iteration limit; + increment the convergence failure count and return */ + FP_CONTENT(NLS)->nconvfails++; + return(SUN_NLS_CONV_RECVR); +} + + +int SUNNonlinSolFree_FixedPoint(SUNNonlinearSolver NLS) +{ + /* return if NLS is already free */ + if (NLS == NULL) return(SUN_NLS_SUCCESS); + + /* free items from content structure, then the structure itself */ + if (NLS->content) { + FreeContent(NLS); + free(NLS->content); + NLS->content = NULL; + } + + /* free the ops structure */ + if (NLS->ops) { + free(NLS->ops); + NLS->ops = NULL; + } + + /* free the overall NLS structure */ + free(NLS); + + return(SUN_NLS_SUCCESS); +} + + +/*============================================================================== + Set functions + ============================================================================*/ + +int SUNNonlinSolSetSysFn_FixedPoint(SUNNonlinearSolver NLS, SUNNonlinSolSysFn SysFn) +{ + /* check that the nonlinear solver is non-null */ + if (NLS == NULL) + return(SUN_NLS_MEM_NULL); + + /* check that the nonlinear system function is non-null */ + if (SysFn == NULL) + return(SUN_NLS_ILL_INPUT); + + FP_CONTENT(NLS)->Sys = SysFn; + return(SUN_NLS_SUCCESS); +} + +int SUNNonlinSolSetConvTestFn_FixedPoint(SUNNonlinearSolver NLS, + SUNNonlinSolConvTestFn CTestFn, + void* ctest_data) +{ + /* check that the nonlinear solver is non-null */ + if (NLS == NULL) + return(SUN_NLS_MEM_NULL); + + /* check that the convergence test function is non-null */ + if (CTestFn == NULL) + return(SUN_NLS_ILL_INPUT); + + FP_CONTENT(NLS)->CTest = CTestFn; + + /* attach convergence test data */ + FP_CONTENT(NLS)->ctest_data = ctest_data; + + return(SUN_NLS_SUCCESS); +} + +int SUNNonlinSolSetMaxIters_FixedPoint(SUNNonlinearSolver NLS, int maxiters) +{ + /* check that the nonlinear solver is non-null */ + if (NLS == NULL) + return(SUN_NLS_MEM_NULL); + + /* check that maxiters is a vaild */ + if (maxiters < 1) + return(SUN_NLS_ILL_INPUT); + + FP_CONTENT(NLS)->maxiters = maxiters; + return(SUN_NLS_SUCCESS); +} + +int SUNNonlinSolSetDamping_FixedPoint(SUNNonlinearSolver NLS, realtype beta) +{ + /* check that the nonlinear solver is non-null */ + if (NLS == NULL) + return(SUN_NLS_MEM_NULL); + + /* check that beta is a vaild */ + if (beta <= ZERO) + return(SUN_NLS_ILL_INPUT); + + if (beta < ONE) { + /* enable damping */ + FP_CONTENT(NLS)->beta = beta; + FP_CONTENT(NLS)->damping = SUNTRUE; + } else { + /* disable damping */ + FP_CONTENT(NLS)->beta = ONE; + FP_CONTENT(NLS)->damping = SUNFALSE; + } + + return(SUN_NLS_SUCCESS); +} + + +/*============================================================================== + Get functions + ============================================================================*/ + +int SUNNonlinSolGetNumIters_FixedPoint(SUNNonlinearSolver NLS, long int *niters) +{ + /* check that the nonlinear solver is non-null */ + if (NLS == NULL) + return(SUN_NLS_MEM_NULL); + + /* return number of nonlinear iterations in the last solve */ + *niters = FP_CONTENT(NLS)->niters; + return(SUN_NLS_SUCCESS); +} + + +int SUNNonlinSolGetCurIter_FixedPoint(SUNNonlinearSolver NLS, int *iter) +{ + /* check that the nonlinear solver is non-null */ + if (NLS == NULL) + return(SUN_NLS_MEM_NULL); + + /* return the current nonlinear solver iteration count */ + *iter = FP_CONTENT(NLS)->curiter; + return(SUN_NLS_SUCCESS); +} + + +int SUNNonlinSolGetNumConvFails_FixedPoint(SUNNonlinearSolver NLS, long int *nconvfails) +{ + /* check that the nonlinear solver is non-null */ + if (NLS == NULL) + return(SUN_NLS_MEM_NULL); + + /* return the total number of nonlinear convergence failures */ + *nconvfails = FP_CONTENT(NLS)->nconvfails; + return(SUN_NLS_SUCCESS); +} + + +int SUNNonlinSolGetSysFn_FixedPoint(SUNNonlinearSolver NLS, SUNNonlinSolSysFn *SysFn) +{ + /* check that the nonlinear solver is non-null */ + if (NLS == NULL) + return(SUN_NLS_MEM_NULL); + + /* return the nonlinear system defining function */ + *SysFn = FP_CONTENT(NLS)->Sys; + return(SUN_NLS_SUCCESS); +} + + +/*============================================================================= + Utility routines + ===========================================================================*/ + +/*--------------------------------------------------------------- + AndersonAccelerate + + This routine computes the Anderson-accelerated fixed point + iterate. Upon entry, the predicted solution is held in xold; + this array is never changed throughout this routine. + + The result of the routine is held in x. + + Possible return values: + SUN_NLS_MEM_NULL --> a required item was missing from memory + SUN_NLS_SUCCESS --> successful completion + -------------------------------------------------------------*/ +static int AndersonAccelerate(SUNNonlinearSolver NLS, N_Vector gval, + N_Vector x, N_Vector xold, int iter) +{ + /* local variables */ + int nvec, retval, i_pt, i, j, lAA, maa, *ipt_map; + realtype a, b, rtemp, c, s, beta, onembeta, *cvals, *R, *gamma; + N_Vector fv, vtemp, gold, fold, *df, *dg, *Q, *Xvecs; + booleantype damping; + + /* local shortcut variables */ + vtemp = x; /* use result as temporary vector */ + ipt_map = FP_CONTENT(NLS)->imap; + maa = FP_CONTENT(NLS)->m; + gold = FP_CONTENT(NLS)->gold; + fold = FP_CONTENT(NLS)->fold; + df = FP_CONTENT(NLS)->df; + dg = FP_CONTENT(NLS)->dg; + Q = FP_CONTENT(NLS)->q; + cvals = FP_CONTENT(NLS)->cvals; + Xvecs = FP_CONTENT(NLS)->Xvecs; + R = FP_CONTENT(NLS)->R; + gamma = FP_CONTENT(NLS)->gamma; + fv = FP_CONTENT(NLS)->delta; + damping = FP_CONTENT(NLS)->damping; + beta = FP_CONTENT(NLS)->beta; + + /* reset ipt_map, i_pt */ + for (i = 0; i < maa; i++) ipt_map[i]=0; + i_pt = iter-1 - ((iter-1)/maa)*maa; + + /* update dg[i_pt], df[i_pt], fv, gold and fold*/ + N_VLinearSum(ONE, gval, -ONE, xold, fv); + if (iter > 0) { + N_VLinearSum(ONE, gval, -ONE, gold, dg[i_pt]); /* dg_new = gval - gold */ + N_VLinearSum(ONE, fv, -ONE, fold, df[i_pt]); /* df_new = fv - fold */ + } + N_VScale(ONE, gval, gold); + N_VScale(ONE, fv, fold); + + /* on first iteration, just do basic fixed-point update */ + if (iter == 0) { + N_VScale(ONE, gval, x); + return(SUN_NLS_SUCCESS); + } + + /* update data structures based on current iteration index */ + + if (iter == 1) { /* second iteration */ + + R[0] = SUNRsqrt( N_VDotProd(df[i_pt], df[i_pt]) ); + N_VScale(ONE/R[0], df[i_pt], Q[i_pt]); + ipt_map[0] = 0; + + } else if (iter <= maa) { /* another iteration before we've reached maa */ + + N_VScale(ONE, df[i_pt], vtemp); + for (j = 0; j < iter-1; j++) { + ipt_map[j] = j; + R[(iter-1)*maa+j] = N_VDotProd(Q[j], vtemp); + N_VLinearSum(ONE, vtemp, -R[(iter-1)*maa+j], Q[j], vtemp); + } + R[(iter-1)*maa+iter-1] = SUNRsqrt( N_VDotProd(vtemp, vtemp) ); + if (R[(iter-1)*maa+iter-1] == ZERO) { + N_VScale(ZERO, vtemp, Q[i_pt]); + } else { + N_VScale((ONE/R[(iter-1)*maa+iter-1]), vtemp, Q[i_pt]); + } + ipt_map[iter-1] = iter-1; + + } else { /* we've filled the acceleration subspace, so start recycling */ + + /* delete left-most column vector from QR factorization */ + for (i = 0; i < maa-1; i++) { + a = R[(i+1)*maa + i]; + b = R[(i+1)*maa + i+1]; + rtemp = SUNRsqrt(a*a + b*b); + c = a / rtemp; + s = b / rtemp; + R[(i+1)*maa + i] = rtemp; + R[(i+1)*maa + i+1] = ZERO; + if (i < maa-1) { + for (j = i+2; j < maa; j++) { + a = R[j*maa + i]; + b = R[j*maa + i+1]; + rtemp = c * a + s * b; + R[j*maa + i+1] = -s*a + c*b; + R[j*maa + i] = rtemp; + } + } + N_VLinearSum(c, Q[i], s, Q[i+1], vtemp); + N_VLinearSum(-s, Q[i], c, Q[i+1], Q[i+1]); + N_VScale(ONE, vtemp, Q[i]); + } + + /* ahift R to the left by one */ + for (i = 1; i < maa; i++) + for (j = 0; j < maa-1; j++) + R[(i-1)*maa + j] = R[i*maa + j]; + + /* add the new df vector */ + N_VScale(ONE, df[i_pt], vtemp); + for (j = 0; j < maa-1; j++) { + R[(maa-1)*maa+j] = N_VDotProd(Q[j], vtemp); + N_VLinearSum(ONE, vtemp, -R[(maa-1)*maa+j], Q[j], vtemp); + } + R[(maa-1)*maa+maa-1] = SUNRsqrt( N_VDotProd(vtemp, vtemp) ); + N_VScale((ONE/R[(maa-1)*maa+maa-1]), vtemp, Q[maa-1]); + + /* update the iteration map */ + j = 0; + for (i = i_pt+1; i < maa; i++) + ipt_map[j++] = i; + for (i = 0; i < i_pt+1; i++) + ipt_map[j++] = i; + } + + /* solve least squares problem and update solution */ + lAA = iter; + if (maa < iter) lAA = maa; + retval = N_VDotProdMulti(lAA, fv, Q, gamma); + if (retval != 0) return(SUN_NLS_VECTOROP_ERR); + + /* set arrays for fused vector operation */ + cvals[0] = ONE; + Xvecs[0] = gval; + nvec = 1; + for (i = lAA-1; i > -1; i--) { + for (j = i+1; j < lAA; j++) + gamma[i] -= R[j*maa+i]*gamma[j]; + if (gamma[i] == ZERO) { + gamma[i] = ZERO; + } else { + gamma[i] /= R[i*maa+i]; + } + cvals[nvec] = -gamma[i]; + Xvecs[nvec] = dg[ipt_map[i]]; + nvec += 1; + } + + /* if enabled, apply damping */ + if (damping) { + onembeta = (ONE - beta); + cvals[nvec] = -onembeta; + Xvecs[nvec] = fv; + nvec += 1; + for (i = lAA - 1; i > -1; i--) { + cvals[nvec] = onembeta * gamma[i]; + Xvecs[nvec] = df[ipt_map[i]]; + nvec += 1; + } + } + + /* update solution */ + retval = N_VLinearCombination(nvec, cvals, Xvecs, x); + if (retval != 0) return(SUN_NLS_VECTOROP_ERR); + + return(SUN_NLS_SUCCESS); +} + +static int AllocateContent(SUNNonlinearSolver NLS, N_Vector y) +{ + int m = FP_CONTENT(NLS)->m; + + FP_CONTENT(NLS)->yprev = N_VClone(y); + if (FP_CONTENT(NLS)->yprev == NULL) { FreeContent(NLS); return(SUN_NLS_MEM_FAIL); } + + FP_CONTENT(NLS)->gy = N_VClone(y); + if (FP_CONTENT(NLS)->gy == NULL) { FreeContent(NLS); return(SUN_NLS_MEM_FAIL); } + + FP_CONTENT(NLS)->delta = N_VClone(y); + if (FP_CONTENT(NLS)->delta == NULL) { FreeContent(NLS); return(SUN_NLS_MEM_FAIL); } + + /* Allocate all m-dependent content */ + if (m > 0) { + + FP_CONTENT(NLS)->fold = N_VClone(y); + if (FP_CONTENT(NLS)->fold == NULL) { + FreeContent(NLS); return(SUN_NLS_MEM_FAIL); } + + FP_CONTENT(NLS)->gold = N_VClone(y); + if (FP_CONTENT(NLS)->gold == NULL) { + FreeContent(NLS); return(SUN_NLS_MEM_FAIL); } + + FP_CONTENT(NLS)->imap = (int *) malloc(m * sizeof(int)); + if (FP_CONTENT(NLS)->imap == NULL) { + FreeContent(NLS); return(SUN_NLS_MEM_FAIL); } + + FP_CONTENT(NLS)->R = (realtype *) malloc((m*m) * sizeof(realtype)); + if (FP_CONTENT(NLS)->R == NULL) { + FreeContent(NLS); return(SUN_NLS_MEM_FAIL); } + + FP_CONTENT(NLS)->gamma = (realtype *) malloc(m * sizeof(realtype)); + if (FP_CONTENT(NLS)->gamma == NULL) { + FreeContent(NLS); return(SUN_NLS_MEM_FAIL); } + + FP_CONTENT(NLS)->cvals = (realtype *) malloc(2*(m+1) * sizeof(realtype)); + if (FP_CONTENT(NLS)->cvals == NULL) { + FreeContent(NLS); return(SUN_NLS_MEM_FAIL); } + + FP_CONTENT(NLS)->df = N_VCloneVectorArray(m, y); + if (FP_CONTENT(NLS)->df == NULL) { + FreeContent(NLS); return(SUN_NLS_MEM_FAIL); } + + FP_CONTENT(NLS)->dg = N_VCloneVectorArray(m, y); + if (FP_CONTENT(NLS)->dg == NULL) { + FreeContent(NLS); return(SUN_NLS_MEM_FAIL); } + + FP_CONTENT(NLS)->q = N_VCloneVectorArray(m, y); + if (FP_CONTENT(NLS)->q == NULL) { + FreeContent(NLS); return(SUN_NLS_MEM_FAIL); } + + FP_CONTENT(NLS)->Xvecs = (N_Vector *) malloc(2*(m+1) * sizeof(N_Vector)); + if (FP_CONTENT(NLS)->Xvecs == NULL) { + FreeContent(NLS); return(SUN_NLS_MEM_FAIL); } + } + + return(SUN_NLS_SUCCESS); +} + +static void FreeContent(SUNNonlinearSolver NLS) +{ + if (FP_CONTENT(NLS)->yprev) { + N_VDestroy(FP_CONTENT(NLS)->yprev); + FP_CONTENT(NLS)->yprev = NULL; } + + if (FP_CONTENT(NLS)->gy) { + N_VDestroy(FP_CONTENT(NLS)->gy); + FP_CONTENT(NLS)->gy = NULL; } + + if (FP_CONTENT(NLS)->fold) { + N_VDestroy(FP_CONTENT(NLS)->fold); + FP_CONTENT(NLS)->fold = NULL; } + + if (FP_CONTENT(NLS)->gold) { + N_VDestroy(FP_CONTENT(NLS)->gold); + FP_CONTENT(NLS)->gold = NULL; } + + if (FP_CONTENT(NLS)->delta) { + N_VDestroy(FP_CONTENT(NLS)->delta); + FP_CONTENT(NLS)->delta = NULL; } + + if (FP_CONTENT(NLS)->imap) { + free(FP_CONTENT(NLS)->imap); + FP_CONTENT(NLS)->imap = NULL; } + + if (FP_CONTENT(NLS)->R) { + free(FP_CONTENT(NLS)->R); + FP_CONTENT(NLS)->R = NULL; } + + if (FP_CONTENT(NLS)->gamma) { + free(FP_CONTENT(NLS)->gamma); + FP_CONTENT(NLS)->gamma = NULL; } + + if (FP_CONTENT(NLS)->cvals) { + free(FP_CONTENT(NLS)->cvals); + FP_CONTENT(NLS)->cvals = NULL; } + + if (FP_CONTENT(NLS)->df) { + N_VDestroyVectorArray(FP_CONTENT(NLS)->df, FP_CONTENT(NLS)->m); + FP_CONTENT(NLS)->df = NULL; } + + if (FP_CONTENT(NLS)->dg) { + N_VDestroyVectorArray(FP_CONTENT(NLS)->dg, FP_CONTENT(NLS)->m); + FP_CONTENT(NLS)->dg = NULL; } + + if (FP_CONTENT(NLS)->q) { + N_VDestroyVectorArray(FP_CONTENT(NLS)->q, FP_CONTENT(NLS)->m); + FP_CONTENT(NLS)->q = NULL; } + + if (FP_CONTENT(NLS)->Xvecs) { + free(FP_CONTENT(NLS)->Xvecs); + FP_CONTENT(NLS)->Xvecs = NULL; } + + return; +} + +int SUNNonlinSolSetInfoFile_FixedPoint(SUNNonlinearSolver NLS, + FILE* info_file) +{ + /* check that the nonlinear solver is non-null */ + if (NLS == NULL) + return(SUN_NLS_MEM_NULL); + + FP_CONTENT(NLS)->info_file = info_file; + + return(SUN_NLS_SUCCESS); +} + +int SUNNonlinSolSetPrintLevel_FixedPoint(SUNNonlinearSolver NLS, + int print_level) +{ + /* check that the nonlinear solver is non-null */ + if (NLS == NULL) + return(SUN_NLS_MEM_NULL); + + /* check for valid print level */ + if (print_level < 0 || print_level > 1) + return(SUN_NLS_ILL_INPUT); + + FP_CONTENT(NLS)->print_level = print_level; + + return(SUN_NLS_SUCCESS); +} diff --git a/src/lib/sunnonlinsol/newton/fsunnonlinsol_newton.c b/src/lib/sunnonlinsol/newton/fsunnonlinsol_newton.c deleted file mode 100644 index dfa9909..0000000 --- a/src/lib/sunnonlinsol/newton/fsunnonlinsol_newton.c +++ /dev/null @@ -1,95 +0,0 @@ -/* ----------------------------------------------------------------------------- - * Programmer(s): David J. Gardner @ LLNL - * ----------------------------------------------------------------------------- - * SUNDIALS Copyright Start - * Copyright (c) 2002-2019, Lawrence Livermore National Security - * and Southern Methodist University. - * All rights reserved. - * - * See the top-level LICENSE and NOTICE files for details. - * - * SPDX-License-Identifier: BSD-3-Clause - * SUNDIALS Copyright End - * ----------------------------------------------------------------------------- - * This file contains the implementation of functions needed for initialization - * of the SUNNonlinearSolver Newton moudule operations in Fortran. - * ---------------------------------------------------------------------------*/ - -#include -#include - -#include "fsunnonlinsol_newton.h" - -/* Define global nonlinsol variables */ - -SUNNonlinearSolver F2C_CVODE_nonlinsol; -SUNNonlinearSolver F2C_IDA_nonlinsol; -SUNNonlinearSolver F2C_ARKODE_nonlinsol; - -/* Declarations of external global variables */ - -extern N_Vector F2C_CVODE_vec; -extern N_Vector F2C_IDA_vec; -extern N_Vector F2C_ARKODE_vec; - -/* Fortran callable interfaces */ - -void FSUNNEWTON_INIT(int *code, int *ier) -{ - *ier = 0; - - switch(*code) { - case FCMIX_CVODE: - if (F2C_CVODE_nonlinsol) SUNNonlinSolFree(F2C_CVODE_nonlinsol); - F2C_CVODE_nonlinsol = NULL; - F2C_CVODE_nonlinsol = SUNNonlinSol_Newton(F2C_CVODE_vec); - if (F2C_CVODE_nonlinsol == NULL) *ier = -1; - break; - case FCMIX_IDA: - if (F2C_IDA_nonlinsol) SUNNonlinSolFree(F2C_IDA_nonlinsol); - F2C_IDA_nonlinsol = NULL; - F2C_IDA_nonlinsol = SUNNonlinSol_Newton(F2C_IDA_vec); - if (F2C_IDA_nonlinsol == NULL) *ier = -1; - break; - case FCMIX_ARKODE: - if (F2C_ARKODE_nonlinsol) SUNNonlinSolFree(F2C_ARKODE_nonlinsol); - F2C_ARKODE_nonlinsol = NULL; - F2C_ARKODE_nonlinsol = SUNNonlinSol_Newton(F2C_ARKODE_vec); - if (F2C_ARKODE_nonlinsol == NULL) *ier = -1; - break; - default: - *ier = -1; - } -} - - -void FSUNNEWTON_SETMAXITERS(int *code, int *maxiters, int *ier) -{ - *ier = 0; - - switch(*code) { - case FCMIX_CVODE: - if (!F2C_CVODE_nonlinsol) { - *ier = -1; - return; - } - *ier = SUNNonlinSolSetMaxIters(F2C_CVODE_nonlinsol, *maxiters); - break; - case FCMIX_IDA: - if (!F2C_IDA_nonlinsol) { - *ier = -1; - return; - } - *ier = SUNNonlinSolSetMaxIters(F2C_IDA_nonlinsol, *maxiters); - break; - case FCMIX_ARKODE: - if (!F2C_ARKODE_nonlinsol) { - *ier = -1; - return; - } - *ier = SUNNonlinSolSetMaxIters(F2C_ARKODE_nonlinsol, *maxiters); - break; - default: - *ier = -1; - } -} diff --git a/src/lib/sunnonlinsol/newton/fsunnonlinsol_newton.h b/src/lib/sunnonlinsol/newton/fsunnonlinsol_newton.h deleted file mode 100644 index 289a525..0000000 --- a/src/lib/sunnonlinsol/newton/fsunnonlinsol_newton.h +++ /dev/null @@ -1,56 +0,0 @@ -/* ----------------------------------------------------------------------------- - * Programmer(s): David J. Gardner @ LLNL - * ----------------------------------------------------------------------------- - * SUNDIALS Copyright Start - * Copyright (c) 2002-2019, Lawrence Livermore National Security - * and Southern Methodist University. - * All rights reserved. - * - * See the top-level LICENSE and NOTICE files for details. - * - * SPDX-License-Identifier: BSD-3-Clause - * SUNDIALS Copyright End - * ----------------------------------------------------------------------------- - * This file contains the definitions needed for initialization of the - * SUNNonlinearSolver Newton moudule operations in Fortran. - * ---------------------------------------------------------------------------*/ - -#ifndef _FSUNNONLINSOL_NEWTON_H -#define _FSUNNONLINSOL_NEWTON_H - -#include /* FCMIX_* solver IDs */ -#include - -#ifdef __cplusplus /* wrapper to enable C++ usage */ -extern "C" { -#endif - -#if defined(SUNDIALS_F77_FUNC) -#define FSUNNEWTON_INIT SUNDIALS_F77_FUNC(fsunnewtoninit, FSUNNEWTONINIT) -#define FSUNNEWTON_SETMAXITERS SUNDIALS_F77_FUNC(fsunnewtonsetmaxiters, FSUNNEWTONSETMAXITERS) -#else -#define FSUNNEWTON_INIT fsunnewtoninit_ -#define FSUNNEWTON_SETMAXITERS fsunnewtonsetmaxiters_ -#endif - -/* Declarations of global variables */ - -extern SUNNonlinearSolver F2C_CVODE_nonlinsol; -extern SUNNonlinearSolver F2C_IDA_nonlinsol; -extern SUNNonlinearSolver F2C_ARKODE_nonlinsol; - -/* ----------------------------------------------------------------------------- - * Prototypes of exported functions - * - * FSUNNEWTON_INIT - initializes Newton nonlinear solver for main problem - * FSUNNEWTON_SETMAXITERS - sets the maximum number of nonlinear iterations - * ---------------------------------------------------------------------------*/ - -void FSUNNEWTON_INIT(int *code, int *ier); -void FSUNNEWTON_SETMAXITERS(int *code, int *maxiters, int *ier); - -#ifdef __cplusplus -} -#endif - -#endif diff --git a/src/lib/sunnonlinsol/newton/sunnonlinsol_newton.c b/src/lib/sunnonlinsol/newton/sunnonlinsol_newton.c index d645dcb..401754d 100644 --- a/src/lib/sunnonlinsol/newton/sunnonlinsol_newton.c +++ b/src/lib/sunnonlinsol/newton/sunnonlinsol_newton.c @@ -2,7 +2,7 @@ * Programmer(s): David J. Gardner @ LLNL * ----------------------------------------------------------------------------- * SUNDIALS Copyright Start - * Copyright (c) 2002-2019, Lawrence Livermore National Security + * Copyright (c) 2002-2022, Lawrence Livermore National Security * and Southern Methodist University. * All rights reserved. * @@ -23,6 +23,9 @@ #include #include +#include "sundials_context_impl.h" +#include "sundials_logger_impl.h" + /* Content structure accessibility macros */ #define NEWTON_CONTENT(S) ( (SUNNonlinearSolverContent_Newton)(S->content) ) @@ -34,7 +37,7 @@ Constructor to create a new Newton solver ============================================================================*/ -SUNNonlinearSolver SUNNonlinSol_Newton(N_Vector y) +SUNNonlinearSolver SUNNonlinSol_Newton(N_Vector y, SUNContext sunctx) { SUNNonlinearSolver NLS; SUNNonlinearSolverContent_Newton content; @@ -50,8 +53,7 @@ SUNNonlinearSolver SUNNonlinSol_Newton(N_Vector y) return(NULL); /* Create an empty nonlinear linear solver object */ - NLS = NULL; - NLS = SUNNonlinSolNewEmpty(); + NLS = SUNNonlinSolNewEmpty(sunctx); if (NLS == NULL) return(NULL); /* Attach operations */ @@ -80,16 +82,21 @@ SUNNonlinearSolver SUNNonlinSol_Newton(N_Vector y) NLS->content = content; /* Fill general content */ - content->Sys = NULL; - content->LSetup = NULL; - content->LSolve = NULL; - content->CTest = NULL; - content->jcur = SUNFALSE; - content->curiter = 0; - content->maxiters = 3; - content->niters = 0; - content->nconvfails = 0; - content->ctest_data = NULL; + content->Sys = NULL; + content->LSetup = NULL; + content->LSolve = NULL; + content->CTest = NULL; + content->jcur = SUNFALSE; + content->curiter = 0; + content->maxiters = 3; + content->niters = 0; + content->nconvfails = 0; + content->ctest_data = NULL; + content->print_level = 0; + content->info_file = stdout; +#if SUNDIALS_LOGGING_LEVEL >= SUNDIALS_LOGGING_INFO + content->info_file = (sunctx->logger->info_fp) ? sunctx->logger->info_fp : stdout; +#endif /* Fill allocatable content */ content->delta = N_VClone(y); @@ -103,7 +110,8 @@ SUNNonlinearSolver SUNNonlinSol_Newton(N_Vector y) Constructor wrapper to create a new Newton solver for sensitivity solvers ============================================================================*/ -SUNNonlinearSolver SUNNonlinSol_NewtonSens(int count, N_Vector y) +SUNNonlinearSolver SUNNonlinSol_NewtonSens(int count, N_Vector y, + SUNContext sunctx) { SUNNonlinearSolver NLS; N_Vector w; @@ -112,7 +120,7 @@ SUNNonlinearSolver SUNNonlinSol_NewtonSens(int count, N_Vector y) w = N_VNew_SensWrapper(count, y); /* create nonlinear solver using sensitivity vector wrapper */ - NLS = SUNNonlinSol_Newton(w); + NLS = SUNNonlinSol_Newton(w, sunctx); /* free sensitivity vector wrapper */ N_VDestroy(w); @@ -208,6 +216,10 @@ int SUNNonlinSolSolve_Newton(SUNNonlinearSolver NLS, /* assume the Jacobian is good */ jbad = SUNFALSE; + /* initialize iteration and convergence fail counters for this solve */ + NEWTON_CONTENT(NLS)->niters = 0; + NEWTON_CONTENT(NLS)->nconvfails = 0; + /* looping point for attempts at solution of the nonlinear system: Evaluate the nonlinear residual function (store in delta) Setup the linear solver if necessary @@ -226,9 +238,23 @@ int SUNNonlinSolSolve_Newton(SUNNonlinearSolver NLS, if (retval != SUN_NLS_SUCCESS) break; } - /* initialize counter curiter */ + /* initialize current iteration counter for this solve attempt */ NEWTON_CONTENT(NLS)->curiter = 0; +#if SUNDIALS_LOGGING_LEVEL >= SUNDIALS_LOGGING_INFO + /* print current iteration number and the nonlinear residual */ + if (NEWTON_CONTENT(NLS)->print_level && NEWTON_CONTENT(NLS)->info_file + && (NEWTON_CONTENT(NLS)->info_file != NLS->sunctx->logger->info_fp)) + { + fprintf(NEWTON_CONTENT(NLS)->info_file, + "SUNNONLINSOL_NEWTON (nni=%ld):\n", + (long int) NEWTON_CONTENT(NLS)->niters); + } + SUNLogger_QueueMsg(NLS->sunctx->logger, SUN_LOGLEVEL_INFO, + "SUNNonlinSolSolve_Newton", "begin-iteration", + "iter = %ld, nni = %ld", (long int) 0, NEWTON_CONTENT(NLS)->niters); +#endif + /* looping point for Newton iteration. Break out on any error. */ for(;;) { @@ -249,6 +275,21 @@ int SUNNonlinSolSolve_Newton(SUNNonlinearSolver NLS, retval = NEWTON_CONTENT(NLS)->CTest(NLS, ycor, delta, tol, w, NEWTON_CONTENT(NLS)->ctest_data); +#if SUNDIALS_LOGGING_LEVEL >= SUNDIALS_LOGGING_INFO + /* print current iteration number and the nonlinear residual */ + if (NEWTON_CONTENT(NLS)->print_level && NEWTON_CONTENT(NLS)->info_file + && (NEWTON_CONTENT(NLS)->info_file != NLS->sunctx->logger->info_fp)) + { + fprintf(NEWTON_CONTENT(NLS)->info_file, + "SUNNONLINSOL_NEWTON (nni=%ld):\n", + (long int) NEWTON_CONTENT(NLS)->niters); + } + SUNLogger_QueueMsg(NLS->sunctx->logger, SUN_LOGLEVEL_INFO, + "SUNNonlinSolSolve_Newton", "end-of-iterate", + "iter = %ld, nni = %ld, wrmsnorm = %.16g", NEWTON_CONTENT(NLS)->curiter, + NEWTON_CONTENT(NLS)->niters, N_VWrmsNorm(delta, w)); +#endif + /* if successful update Jacobian status and return */ if (retval == SUN_NLS_SUCCESS) { NEWTON_CONTENT(NLS)->jcur = SUNFALSE; @@ -418,7 +459,7 @@ int SUNNonlinSolGetNumIters_Newton(SUNNonlinearSolver NLS, long int *niters) if (NLS == NULL) return(SUN_NLS_MEM_NULL); - /* return the total number of nonlinear iterations */ + /* return the number of nonlinear iterations in the last solve */ *niters = NEWTON_CONTENT(NLS)->niters; return(SUN_NLS_SUCCESS); } @@ -458,3 +499,31 @@ int SUNNonlinSolGetSysFn_Newton(SUNNonlinearSolver NLS, SUNNonlinSolSysFn *SysFn *SysFn = NEWTON_CONTENT(NLS)->Sys; return(SUN_NLS_SUCCESS); } + +int SUNNonlinSolSetInfoFile_Newton(SUNNonlinearSolver NLS, + FILE* info_file) +{ + /* check that the nonlinear solver is non-null */ + if (NLS == NULL) + return(SUN_NLS_MEM_NULL); + + NEWTON_CONTENT(NLS)->info_file = info_file; + + return(SUN_NLS_SUCCESS); +} + +int SUNNonlinSolSetPrintLevel_Newton(SUNNonlinearSolver NLS, + int print_level) +{ + /* check that the nonlinear solver is non-null */ + if (NLS == NULL) + return(SUN_NLS_MEM_NULL); + + /* check for valid print level */ + if (print_level < 0 || print_level > 1) + return(SUN_NLS_ILL_INPUT); + + NEWTON_CONTENT(NLS)->print_level = print_level; + + return(SUN_NLS_SUCCESS); +} diff --git a/src/r2sundials.cpp b/src/r2sundials.cpp index 5eded7b..a549dfc 100644 --- a/src/r2sundials.cpp +++ b/src/r2sundials.cpp @@ -121,7 +121,8 @@ //' return(CV_SUCCESS); //' } //' ', depends=c("RcppArmadillo","r2sundials","rmumps"), -//' includes="using namespace arma;\n#include ", cacheDir="lib", verbose=FALSE) +//' includes=c("// [[Rcpp::plugins(cpp14)]]", "using namespace arma;", "#include "), +//' cacheDir="lib", verbose=FALSE) //' # For ease of use in C++, we convert param to a numeric vector instead of a list. //' pv=c(a=p$a) //' # new call to r2cvodes() with XPtr pointer ptr_exp. @@ -153,7 +154,8 @@ //' return(CV_SUCCESS); //' } //' ', depends=c("RcppArmadillo","r2sundials","rmumps"), -//' includes="using namespace arma;\n#include ", cacheDir="lib", verbose=FALSE) +//' includes=c("// [[Rcpp::plugins(cpp14)]]", "using namespace arma;", "#include "), +//' cacheDir="lib", verbose=FALSE) //' //' # root function //' ptr_ball_root=cppXPtr(code=' @@ -164,7 +166,8 @@ //' return(0); //' } //' ', depends=c("RcppArmadillo","r2sundials","rmumps"), -//' includes="using namespace arma;\n#include ", cacheDir="lib", verbose=FALSE) +//' includes=c("// [[Rcpp::plugins(cpp14)]]", "using namespace arma;", "#include "), +//' cacheDir="lib", verbose=FALSE) //' //' # event handler function //' ptr_ball_event=cppXPtr(code=' @@ -190,7 +193,8 @@ //' } //' } //' ', depends=c("RcppArmadillo","r2sundials","rmumps"), -//' includes="using namespace arma;\n#include ", cacheDir="lib", verbose=FALSE) +//' includes=c("// [[Rcpp::plugins(cpp14)]]", "using namespace arma;", "#include "), +//' cacheDir="lib", verbose=FALSE) //' //' # ODE solving and plotting //' res_ball <- r2sundials::r2cvodes(yv, ti, ptr_ball, param=pv, nroot=2L, @@ -231,7 +235,8 @@ //' return(CV_SUCCESS); //' } //' ', depends=c("RcppArmadillo","r2sundials","rmumps"), -//' includes="using namespace arma;\n#include ", cacheDir="lib", verbose=FALSE) +//' includes=c("// [[Rcpp::plugins(cpp14)]]", "using namespace arma;", "#include "), +//' cacheDir="lib", verbose=FALSE) //' # pointer to sparse jacobian function //' ptr_rob_jacsp=cppXPtr(code=' //' int spjac_rob(double t, const vec &y, const vec &ydot, uvec &ir, uvec &pj, vec &v, int n, int nz, @@ -266,7 +271,8 @@ //' return(0); //' } //' ', depends=c("RcppArmadillo","r2sundials","rmumps"), -//' includes="using namespace arma;\n#include ", cacheDir="lib", verbose=FALSE) +//' includes=c("// [[Rcpp::plugins(cpp14)]]", "using namespace arma;", "#include "), +//' cacheDir="lib", verbose=FALSE) //' # pointer to sensitivity rhs function //' ptr_rob_sens1=cppXPtr(code=' //' int sens_rob1(int Ns, double t, const vec &y, const vec &ydot, int iS, const vec &yS, vec &ySdot, @@ -294,7 +300,8 @@ //' return(CV_SUCCESS); //' } //' ', depends=c("RcppArmadillo","r2sundials","rmumps"), -//' includes="using namespace arma;\n#include ", cacheDir="lib", verbose=FALSE) +//' includes=c("// [[Rcpp::plugins(cpp14)]]", "using namespace arma;", "#include "), +//' cacheDir="lib", verbose=FALSE) //' # Note that we don't use psens param for sensitivity calculations as we provide our own fsens1. //' res_rob <- r2sundials::r2cvodes(yv, ti, ptr_rob, param=pv, nz=8, fjac=ptr_rob_jacsp, Ns=3, //' fsens1=ptr_rob_sens1) @@ -316,6 +323,7 @@ const int Ns=0, NumericVector psens=NumericVector::create(), NumericVector sens_ //long clk_tck = CLOCKS_PER_SEC; //clock_t t1, t2; //t1 = clock(); + SUNContext sunctx; Sunmem mem; UserData udata; realtype t; @@ -327,7 +335,7 @@ const int Ns=0, NumericVector psens=NumericVector::create(), NumericVector sens_ mat res, mroots, msens_init, msens; vec ti; cube asens; - Function rf_event(Environment::global_env()["ls"]); // just a placeholder + Function rf_event(Environment::namespace_env("base")["ls"]); // just a placeholder rsunEventFn user_event_fn=NULL; if (integrator.size() == 0) @@ -358,8 +366,11 @@ const int Ns=0, NumericVector psens=NumericVector::create(), NumericVector sens_ asens.set_size(neq, nti, Ns); std::vector ySv(Ns); + /* Create the SUNDIALS context that all SUNDIALS objects require */ + check_retval(SUNContext_Create(NULL, &sunctx)); + mem.add((void **) &sunctx, (funfree) SUNContext_Free); /* Create serial vector of length neq from yv (initial conditions)*/ - getmem(nv_y, N_VNew_Serial(neq)); + getmem(nv_y, N_VNew_Serial(neq, sunctx)); mem.add((void **) &nv_y, (funfree) N_VDestroy); vec yvec(NV_DATA_S(nv_y), neq, false), ynew; // vec proxy for y and y after event treatment by user function // copy init values @@ -370,7 +381,7 @@ const int Ns=0, NumericVector psens=NumericVector::create(), NumericVector sens_ //Rcout << "yvec.memptr()=" << yvec.memptr() << "\n"; // create the solver memory and specify the Backward Differentiation Formula - getmem(cvode_mem, CVodeCreate(integrator[0])); + getmem(cvode_mem, CVodeCreate(integrator[0], sunctx)); mem.add((void **) &cvode_mem, (funfreep) CVodeFree); check_retval(CVodeSetErrHandlerFn(cvode_mem, rsunerr, NULL)); // Set cvode_mem and put different solver components @@ -398,7 +409,7 @@ const int Ns=0, NumericVector psens=NumericVector::create(), NumericVector sens_ if (ita != 0. && ita != 1. && ita != 2.) stop("cvode: values in constraints must be 0, ±1 or ±2, instead %g found", it); } - getmem(nv_constraints, N_VNewEmpty_Serial(neq)); + getmem(nv_constraints, N_VNewEmpty_Serial(neq, sunctx)); mem.add((void **) &nv_constraints, (funfree) N_VDestroy); NV_DATA_S(nv_constraints) = (realtype *) constraints.begin(); check_retval(CVodeSetConstraints(cvode_mem, nv_constraints)); @@ -408,18 +419,18 @@ const int Ns=0, NumericVector psens=NumericVector::create(), NumericVector sens_ if (nz > 0) { if (fjac.isNULL()) stop("cvode: fjac must not be NULL if nz > 0 (%d)", nz); - getmem(A, SUNSparseMatrix(neq, neq, nz, CSC_MAT)); + getmem(A, SUNSparseMatrix(neq, neq, nz, CSC_MAT, sunctx)); mem.add((void **) &A, (funfree) SUNMatDestroy); - getmem(LS, SUNLinSol_RMUMPS(nv_y, A, rmumps_perm[0])); + getmem(LS, SUNLinSol_RMUMPS(nv_y, A, rmumps_perm[0], sunctx)); mem.add((void **) &LS, (funfree) SUNLinSolFree); check_retval(CVodeSetLinearSolver(cvode_mem, LS, A)); check_retval(CVodeSetJacFn(cvode_mem, spjacwrap)); //jacsps)); // } else { // dense SUNMatrix for use in linear solves - getmem(A, SUNDenseMatrix(neq, neq)); + getmem(A, SUNDenseMatrix(neq, neq, sunctx)); mem.add((void **) &A, (funfree) SUNMatDestroy); // dense SUNLinearSolver object - getmem(LS, SUNLinSol_Dense(nv_y, A)); + getmem(LS, SUNLinSol_Dense(nv_y, A, sunctx)); mem.add((void **) &LS, (funfree) SUNLinSolFree); check_retval(CVodeSetLinearSolver(cvode_mem, LS, A)); if (!fjac.isNULL()) @@ -487,7 +498,7 @@ const int Ns=0, NumericVector psens=NumericVector::create(), NumericVector sens_ else msens_init=zeros(neq, Ns); //msens_init.print("msens_init"); - yS = N_VCloneVectorArray_Serial(Ns, nv_y); + yS = N_VCloneVectorArray(Ns, nv_y); mem.add((void **) &yS, (funfree1) N_VDestroyVectorArray, (int) Ns); for (int is=0; is < Ns; is++) { ySv[is]=vec(NV_DATA_S(yS[is]), neq, false); // vec proxies of yS "matrix" diff --git a/src/sunlinsol_rmumps.cpp b/src/sunlinsol_rmumps.cpp index f900d43..d99edc7 100644 --- a/src/sunlinsol_rmumps.cpp +++ b/src/sunlinsol_rmumps.cpp @@ -1,13 +1,14 @@ #include +#include +#include #include // exported functions // Function to create a new RMUMPS linear solver -SUNDIALS_EXPORT SUNLinearSolver SUNLinSol_RMUMPS(N_Vector y, SUNMatrix A, int permutation=RMUMPS_PERM_AUTO) { +SUNDIALS_EXPORT SUNLinearSolver SUNLinSol_RMUMPS(N_Vector y, SUNMatrix A, int permutation, SUNContext sunctx) { //Rcout << "call SUNLinSol_RMUMPS\n"; //Rcout << "permutation=" << permutation << "\n"; SUNLinearSolver S; - SUNLinearSolver_Ops ops; SUNLinearSolverContent_RMUMPS content; // Check compatibility with supplied SUNMatrix and N_Vector @@ -22,44 +23,48 @@ SUNDIALS_EXPORT SUNLinearSolver SUNLinSol_RMUMPS(N_Vector y, SUNMatrix A, int p int n = NV_LENGTH_S(y), nz=SM_NNZ_S(A); // Create linear solver - S = NULL; - S = (SUNLinearSolver) malloc(sizeof *S); + S = SUNLinSolNewEmpty(sunctx); if (S == NULL) return(NULL); - // Create linear solver operation structure - ops = NULL; - ops = (SUNLinearSolver_Ops) malloc(sizeof(struct _generic_SUNLinearSolver_Ops)); - if (ops == NULL) { free(S); return(NULL); } - // Attach operations - ops->gettype = SUNLinSolGetType_RMUMPS; - ops->initialize = SUNLinSolInitialize_RMUMPS; - ops->setup = SUNLinSolSetup_RMUMPS; - ops->solve = SUNLinSolSolve_RMUMPS; - ops->lastflag = NULL; - ops->space = NULL; - ops->free = SUNLinSolFree_RMUMPS; - ops->setatimes = NULL; - ops->setpreconditioner = NULL; - ops->setscalingvectors = NULL; - ops->numiters = NULL; - ops->resnorm = NULL; - ops->resid = NULL; - + S->ops->gettype = SUNLinSolGetType_RMUMPS; + S->ops->getid = NULL; + S->ops->setatimes = NULL; + S->ops->setpreconditioner = NULL; + S->ops->setscalingvectors = NULL; + S->ops->setzeroguess = NULL; + S->ops->initialize = SUNLinSolInitialize_RMUMPS; + S->ops->setup = SUNLinSolSetup_RMUMPS; + S->ops->solve = SUNLinSolSolve_RMUMPS; + S->ops->numiters = NULL; + S->ops->resnorm = NULL; + S->ops->resid = NULL; + S->ops->lastflag = NULL; + S->ops->space = NULL; + S->ops->free = SUNLinSolFree_RMUMPS; + // Create content content = NULL; content = (SUNLinearSolverContent_RMUMPS) malloc(sizeof(struct _SUNLinearSolverContent_RMUMPS)); - if (content == NULL) { free(ops); free(S); return(NULL); } + if (content == NULL) { SUNLinSolFree(S); return(NULL); } + + /* Attach content */ + S->content = content; // Fill content content->last_flag = 0; if (SUNSparseMatrix_SparseType(A) != CSC_MAT) { + SUNLinSolFree(S); stop("SUNLinSol_RMUMPS: wrong sparse matrix type, expected CSC_MAT"); } - if (n != SM_COLUMNS_S(A)) + if (n != SM_COLUMNS_S(A)) { + SUNLinSolFree(S); stop("SUNLinSol_RMUMPS: ncol(A) (%d) and length(y) (%d) don't concord", SM_COLUMNS_S(A), n); - if (SM_COLUMNS_S(A) != SM_ROWS_S(A)) + } + if (SM_COLUMNS_S(A) != SM_ROWS_S(A)) { + SUNLinSolFree(S); stop("SUNLinSol_RMUMPS: matrix is supposed to be square, instead got %dx%d", SM_ROWS_S(A), SM_COLUMNS_S(A)); + } // build jcp array from irp and pc #if defined(SUNDIALS_INT32_T) ivec ir(SM_INDEXVALS_S(A), nz, false); @@ -88,10 +93,6 @@ print(asl["j"]); print(asl["v"]); print(asl["nrow"]); */ - // Attach content and ops - S->content = content; - S->ops = ops; - return(S); }