diff --git a/.ci/ci.jl b/.ci/ci.jl
index ff1b4624..23ead768 100644
--- a/.ci/ci.jl
+++ b/.ci/ci.jl
@@ -8,7 +8,8 @@ Pkg.activate(@__DIR__)
 
 
 if ARGS[1] == "full"
-    pkgs = ["MadNLPHSL","MadNLPPardiso","MadNLPMumps","MadNLPKrylov"]
+    pkgs = ["MadNLPHSL","MadNLPPardiso","MadNLPMumps"]
+            # ,"MadNLPKrylov"] # Krylov has been discontinued since the introduction of iterative refinement on the full space.
 elseif ARGS[1] == "basic"
     pkgs = ["MadNLPMumps","MadNLPKrylov"]
 elseif ARGS[1] == "cuda"
diff --git a/.github/workflows/TagBot.yml b/.github/workflows/TagBot.yml
index d77d3a0c..f8fd014e 100644
--- a/.github/workflows/TagBot.yml
+++ b/.github/workflows/TagBot.yml
@@ -9,3 +9,4 @@ jobs:
       - uses: JuliaRegistries/TagBot@v1
         with:
           token: ${{ secrets.GITHUB_TOKEN }}
+          ssh: ${{ secrets.DOCUMENTER_KEY }}
diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml
index c502bfe7..a8bb17ea 100644
--- a/.github/workflows/docs.yml
+++ b/.github/workflows/docs.yml
@@ -14,7 +14,7 @@ jobs:
       - uses: actions/checkout@v2
       - uses: julia-actions/setup-julia@latest
         with:
-          version: '1.6'
+          version: '1.9'
       - name: Install dependencies
         run: julia --project=docs/ docs/install.jl
       - name: Build and deploy
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index 9bdf107b..0c1896f7 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -7,12 +7,16 @@ on:
   pull_request:
     types: [opened, synchronize, reopened]
 
+permissions:
+  actions: write
+  contents: read
+
 jobs:
   test-github:
     runs-on: ${{ matrix.os }}
     strategy:
       matrix:
-        julia-version: ['1.6','^1.7']
+        julia-version: ['1.10']
         julia-arch: [x64]
         os: [ubuntu-latest,macos-latest,windows-latest]
     steps:
@@ -20,19 +24,21 @@ jobs:
       - uses: julia-actions/setup-julia@latest
         with:
           version: ${{ matrix.julia-version }}
+      - uses: julia-actions/cache@v1
       - run: julia --color=yes --project=.ci .ci/ci.jl basic
   test-moonshot:
     env:
-      JULIA_DEPOT_PATH: /scratch/sshin/github-actions/julia_depot_madnlp
+      JULIA_DEPOT_PATH: /home/sshin/action-runners/MadNLP/julia-depot/
     runs-on: self-hosted
     strategy:
       matrix:
-        julia-version: ['1.6','^1.7']
+        julia-version: ['1.10']
     steps:
       - uses: actions/checkout@v2
       - uses: julia-actions/setup-julia@latest
         with:
           version: ${{ matrix.julia-version }}
+      - uses: julia-actions/cache@v1
       - run: julia --color=yes --project=.ci .ci/ci.jl full
       - uses: julia-actions/julia-processcoverage@v1
         with:
@@ -43,15 +49,15 @@ jobs:
   test-moonshot-cuda:
     env:
       CUDA_VISIBLE_DEVICES: 1
-      JULIA_DEPOT_PATH: /scratch/sshin/github-actions/julia_depot_madnlp
-      JULIA_CUDA_USE_BINARYBUILDER: true
+      JULIA_DEPOT_PATH: /home/sshin/action-runners/MadNLP/julia-depot/
     runs-on: self-hosted
     strategy:
       matrix:
-        julia-version: ['^1.7']
+        julia-version: ['1.10']
     steps:
       - uses: actions/checkout@v2
       - uses: julia-actions/setup-julia@latest
         with:
           version: ${{ matrix.julia-version }}
+      - uses: julia-actions/cache@v1
       - run: julia --color=yes --project=.ci .ci/ci.jl cuda
diff --git a/CITATION.bib b/CITATION.bib
index 68f3852f..0a60cca3 100644
--- a/CITATION.bib
+++ b/CITATION.bib
@@ -1,3 +1,9 @@
+@article{shin2023accelerating,
+  title={Accelerating optimal power flow with {GPU}s: {SIMD} abstraction of nonlinear programs and condensed-space interior-point methods},
+  author={Shin, Sungho and Pacaud, Fran{\c{c}}ois and Anitescu, Mihai},
+  journal={arXiv preprint arXiv:2307.16830},
+  year={2023}
+}
 @article{shin2020graph,
   title={Graph-Based Modeling and Decomposition of Energy Infrastructures},
   author={Shin, Sungho and Coffrin, Carleton and Sundar, Kaarthik and Zavala, Victor M},
diff --git a/NEWS.md b/NEWS.md
index b99e717f..ed0df74c 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,4 +1,68 @@
 # Release notes
+## v0.8.2 (May 15th, 2024)
+### Bug Fixes
+- [MOI] fix eval_constraint_jacobian_product (#337)
+- [MOI] Add support for nonlinear problems without Hessian (#322)
+- [MadNLPGPU] Bug fix for empty Hessian (#326)
+
+### New feature
+- [LinearSolvers] Add support for LDL factorization in CHOLMOD (#321)
+- Add an ordering for cuDSS (#317)
+
+### Performance and maintenance
+- Reexport MadNLP from MadNLP/libs (#325)
+- Use GH Action Julia cache (#339)
+- [MAdNLPMumps] sarrays removed (#328)
+
+## v0.8.1 (Apr 9th, 2024)
+### Bug Fixes
+- fix optional arguments in get_index_constraints (#316)
+- CuDSS synchronize added (#314)
+
+### Performance and maintenance
+- Introduce linear solvers with version info (#315)
+- Add logos to documentation (#310)
+
+## v0.8.0 (Mar 7th, 2024)
+### Bug Fixes
+- fix non-deterministic behavior by forcing instantiations (#284)
+
+### New features
+- LDL factorization improvement (#300)
+- add support for MOI.ScalarNonLinearFunction (#280)
+- [Linear Solver] Added undocumented cholesky solver (#292)
+- ss/init_improve Add supports to CUDSS.jl (#296)
+- ss/cudss_option_cholmod [MOI] add support for MOI.Interval{Float64} (#295)
+
+### Performance and maintenance
+- Update README, OPTIONS, CITATION, and documentation (#304)
+- MOI interface moved to ext (#268)
+- Improve kkt creation on GPUs (#299)
+- CompatHelper: add new compat entry for Metis at version 1 for package MadNLPGPU, (keep existing compat) (#298)
+- CompatHelper: add new compat entry for CUDSS at version 0.1 for package MadNLPGPU, (keep existing compat) (#297)
+- update documentation (#293)
+- [Algorithm] Improve LBFGS performance (#290)
+- [API] Expose the options for iterative refinements and quasi-Newton (#287)
+- [README] Fix Options.md typo (#291)
+- [Options] Deactivate scaling if `nlp_scaling=false` (#289)
+- [API] Simplify arguments of create_kkt_system (#286)
+- [Tests] Remove test nlp_009_010 from MINLPTests tests (#288)
+- Simplify API of SparseCallback and DenseCallback (#285)
+- [MadNLPHSL] Use HSL.jl (#277)
+- add support for CUDA.jl v5 (#283)
+- simplify implementation of kernels (#281)
+- add proper testing for KKT systems (#278)
+
+
+## v0.7.0 (June 12th, 2023)
+
+### Performance and maintenance
+- CompatHelper: bump compat for NLPModels to 0.20, (keep existing compat) (#252)
+- CompatHelper: bump compat for NLPModels to 0.20 for package MadNLPTests, (keep existing compat) (#253)
+- [MadNLPGPU] Migrate to KernelAbstractions 0.9 (#258)
+- [MadNLPTests] Remove ADNLPModels in deps (#259)
+- Update README.md for JuMP documentation (#260)
+- Update for julia v1.9 (#261)
 
 ## v0.6.0 (March 23th, 2023)
 
diff --git a/OPTIONS.md b/OPTIONS.md
deleted file mode 100644
index d89a6c77..00000000
--- a/OPTIONS.md
+++ /dev/null
@@ -1,227 +0,0 @@
-## MadNLP Options
-### Interior Point Solver Options
-- `linear_solver::Module = DefaultLinearSolver`:\
-    Linear solver used for solving primal-dual system. Valid values are: {`MadNLPUmfpack`, `MadNLPMumps`, `MadNLPPardisoMKL`, `MadNLPMa27`, `MadNLPMa57`, `MadNLPMa77`, `MadNLPMa86`, `MadNLPMa97`, `MadNLPPardiso`, `MadNLPSchur`, `MadNLPSchwarz`, `MadNLPLapackCPU`, `MadNLPLapackGPU`} (some may require using extension packages). The selected solver should be properly built in the build procedure. See [README.md](https://github.com/sshin23/MadNLP.jl) file.
-- `iterator::Module = Richardson `\
-    Iterator used for iterative refinement. Valid values are: {`MadNLPRichardson`,`MadNLPKrylov`}.
-    - `Richardson` uses [Richardson iteration](https://en.wikipedia.org/wiki/Modified_Richardson_iteration)
-    - `Krylov` uses [restarted Generalized Minimal Residual](https://en.wikipedia.org/wiki/Generalized_minimal_residual_method) method implemented in [IterativeSolvers.jl](https://github.com/JuliaMath/IterativeSolvers.jl).
-- `blas_num_threads::Int = 1`\
-    Number of threads used for BLAS routines. Valid range is ``[1,\infty)``.
-- `disable_garbage_collector::Bool = false `\
-    If `true`, Julia garbage collector is temporarily disabled while solving the problem, and then enabled back once the solution is complete.
-- `rethrow_error::Bool = true `\
-    If `false`, any internal error thrown by `MadNLP` and interruption exception (triggered by the user via `^C`) is caught, and not rethrown. If an error is caught, the solver terminates with an error message.
-- `print_level::LogLevels = INFO`\
-    `stdout` print level. Any message with level less than `print_level` is not printed on `stdout`. Valid values are: `MadNLP`.{`TRACE`, `DEBUG`, `INFO`, `NOTICE`, `WARN`, `ERROR`}.
-- `output_file::String = INFO`\
-    If not `""`, the output log is teed to the file at the path specified in `output_file`.
-- `file_print_level::LogLevels = TRACE`\
-    File print level; any message with level less than `file_print_level` is not printed on the file specified in `output_file`. Valid values are: `MadNLP`.{`TRACE`, `DEBUG`, `INFO`, `NOTICE`, `WARN`, `ERROR`}.
-- `tol::Float64 = 1e-8`\
-    Termination tolerance. The solver terminates if the scaled primal, dual, complementary infeasibility is less than `tol`. Valid range is ``(0,\infty)``.
-- `acceptable_tol::Float64 = 1e-6`\
-    Acceptable tolerance. The solver terminates if the scaled primal, dual, complementary infeasibility is less than `acceptable_tol`, for `acceptable_iter` consecutive interior point iteration steps.
-- `acceptable_iter::Int = 15`\
-    Acceptable iteration tolerance. Valid rage is ``[1,\infty)``.
-- `diverging_iterates_tol::Float64 = 1e20`\
-    Diverging iteration tolerance. The solver terminates with exit symbol `:Diverging_Iterates` if the NLP error is greater than `diverging_iterates_tol`.
-- `max_iter::Int = 3000`\
-    Maximum number of interior point iterations. The solver terminates with exit symbol `:Maximum_Iterations_Exceeded` if the interior point iteration count exceeds `max_iter`.
-- `max_wall_time::Float64 = 1e6`\
-    Maximum wall time for interior point solver. The solver terminates with exit symbol `:Maximum_WallTime_Exceeded` if the total solver wall time exceeds `max_wall_time`.
-- `fixed_variable_treatment::FixedVariableTreatments = MAKE_PARAMETER`\
-    Valid values are: `MadNLP`.{`RELAX_BOUND`,`MAKE_PARAMETER`}.
-- `jacobian_constant::Bool = false`\
-    If `true`, constraint Jacobian is only evaluated once and reused.
-- `hessian_constant::Bool = false`\
-    If `true`, Lagrangian Hessian is only evaluated once and reused.
-- `reduced_system::Bool = true`\
-    If `true`, the primal-dual system is formulated as in Greif (2014).
-- `inertia_correction_method::InertiaCorrectionMethods = INERTIA_AUTO`\
-    Valid values are: `MadNLP`.{`INERTIA_AUTO`,`INERTIA_BASED`, `INERTIA_FREE`}.
-    - `INERTIA_BASED` uses the strategy in Ipopt.
-    - `INERTIA_FREE` uses the strategy in Chiang (2016).
-    - `INERTIA_AUTO` uses `INERTIA_BASED` if inertia information is available and uses `INERTIA_FREE` otherwise.
-- `s_max::Float64 = 100.`
-- `kappa_d::Float64 = 1e-5`
-- `constr_mult_init_max::Float64 = 1e3`
-- `bound_push::Float64 = 1e-2`
-- `bound_fac::Float64 = 1e-2`
-- `nlp_scaling_max_gradient::Float64 = 100.`
-- `inertia_free_tol::Float64 = 0.`
-- `min_hessian_perturbation::Float64 = 1e-20`
-- `first_hessian_perturbation::Float64 = 1e-4`
-- `max_hessian_perturbation::Float64 = 1e20`
-- `perturb_inc_fact_first::Float64 = 1e2`
-- `perturb_inc_fact::Float64 = 8.`
-- `perturb_dec_fact::Float64 = 1/3`
-- `jacobian_regularization_exponent::Float64 = 1/4`
-- `jacobian_regularization_value::Float64 = 1e-8`
-- `soft_resto_pderror_reduction_factor::Float64 = 0.9999`
-- `required_infeasibility_reduction::Float64 = 0.9`
-- `obj_max_inc::Float64 = 5.`
-- `kappha_soc::Float64 = 0.99`
-- `max_soc::Int = 4`
-- `alpha_min_frac::Float64 = 0.05`
-- `s_theta::Float64 = 1.1`
-- `s_phi::Float64 = 2.3`
-- `eta_phi::Float64 = 1e-4`
-- `kappa_soc::Float64 = 0.99`
-- `gamma_theta::Float64 = 1e-5`
-- `gamma_phi::Float64 = 1e-5`
-- `delta::Float64 = 1`
-- `kappa_sigma::Float64 = 1e10`
-- `barrier_tol_factor::Float64 = 10.`
-- `rho::Float64 = 1000.`
-- `mu_init::Float64 = 1e-1`
-- `mu_min::Float64 = 1e-9`
-- `mu_superlinear_decrease_power::Float64 = 1.5`
-- `tau_min::Float64 = 0.99`
-- `mu_linear_decrease_factor::Float64 = .2`
-
-### Linear Solver Options
-Linear solver options are specific to the linear solver chosen at `linear_solver` option. Irrelevant options are ignored and a warning message is printed.
-#### Ma27 (requires `MadNLPHSL`)
-- `ma27_pivtol::Float64 = 1e-8`
-- `ma27_pivtolmax::Float64 = 1e-4`
-- `ma27_liw_init_factor::Float64 = 5.`
-- `ma27_la_init_factor::Float64 = 5.`
-- `ma27_meminc_factor::Float64 = 2.`
-
-#### Ma57 (requires `MadNLPHSL`)
-- `ma57_pivtol::Float64 = 1e-8`
-- `ma57_pivtolmax::Float64 = 1e-4`
-- `ma57_pre_alloc::Float64 = 1.05`
-- `ma57_pivot_order::Int = 5`
-- `ma57_automatic_scaling::Bool = false`
-- `ma57_block_size::Int = 16`
-- `ma57_node_amalgamation::Int = 16`
-- `ma57_small_pivot_flag::Int = 0`
-
-#### Ma77 (requires `MadNLPHSL`)
-- `ma77_buffer_lpage::Int = 4096`
-- `ma77_buffer_npage::Int = 1600`
-- `ma77_file_size::Int = 2097152`
-- `ma77_maxstore::Int = 0`
-- `ma77_nemin::Int = 8`
-- `ma77_order::Ma77.Ordering = Ma77.METIS`
-- `ma77_print_level::Int = -1`
-- `ma77_small::Float64 = 1e-20`
-- `ma77_static::Float64 = 0.`
-- `ma77_u::Float64 = 1e-8`
-- `ma77_umax::Float64 = 1e-4`
-
-#### Ma86 (requires `MadNLPHSL`)
-- `ma86_num_threads::Int = 1`
-- `ma86_print_level::Float64 = -1`
-- `ma86_nemin::Int = 32`
-- `ma86_order::Ma86.Ordering = Ma86.METIS`
-- `ma86_scaling::Ma86.Scaling = Ma86.SCALING_NONE`
-- `ma86_small::Float64 = 1e-20`
-- `ma86_static::Float64 = 0.`
-- `ma86_u::Float64 = 1e-8`
-- `ma86_umax::Float64 = 1e-4`
-
-#### Ma97 (requires `MadNLPHSL`)
-- `ma97_num_threads::Int = 1`
-- `ma97_print_level::Int = -1`
-- `ma97_nemin::Int = 8`
-- `ma97_order::Ma97.Ordering = Ma97.METIS`
-- `ma97_scaling::Ma97.Scaling = Ma97.SCALING_NONE`
-- `ma97_small::Float64 = 1e-20`
-- `ma97_u::Float64 = 1e-8`
-- `ma97_umax::Float64 = 1e-4`
-
-#### Mumps (requires `MadNLPMumps`)
-- `mumps_dep_tol::Float64 = 0.`
-- `mumps_mem_percent::Int = 1000`
-- `mumps_permuting_scaling::Int = 7`
-- `mumps_pivot_order::Int = 7`
-- `mumps_pivtol::Float64 = 1e-6`
-- `mumps_pivtolmax::Float64 = .1`
-- `mumps_scaling::Int = 77`
-
-#### Umfpack (requires `MadNLPUmfpack`)
-- `umfpack_pivtol::Float64 = 1e-4`
-- `umfpack_pivtolmax::Float64 = 1e-1`
-- `umfpack_sym_pivtol::Float64 = 1e-3`
-- `umfpack_block_size::Float64 = 16`
-- `umfpack_strategy::Float64 = 2.`
-
-#### Pardiso (requires `MadNLPPardiso`)
-- `pardiso_matching_strategy::Pardiso.MatchingStrategy = COMPLETE2x2`
-- `pardiso_max_inner_refinement_steps::Int = 1`
-- `pardiso_msglvl::Int = 0`
-- `pardiso_order::Int = 2`
-
-#### PardisoMKL
-- `pardisomkl_num_threads::Int = 1`
-- `pardiso_matching_strategy::PardisoMKL.MatchingStrategy = COMPLETE2x2`
-- `pardisomkl_max_iterative_refinement_steps::Int = 1`
-- `pardisomkl_msglvl::Int = 0`
-- `pardisomkl_order::Int = 2`
-
-#### LapackGPU (requires `MadNLPGPU`)
-- `lapackgpu_algorithm::LapackGPU.Algorithms = BUNCHKAUFMAN`
-
-#### LapackCPU
-- `lapackcpu_algorithm::LapackCPU.Algorithms = BUNCHKAUFMAN`
-
-#### Schur (requires `MadNLPGraphs`)
-- `schur_subproblem_solver::Module = DefaultLinearSolver` \
-   Linear solver used for solving subproblem. Valid values are: {`MadNLPUmfpack`, `MadNLPMa27`, `MadNLPMa57`, `MadNLPMa97`, `MadNLPMumps`}.
-- `schur_dense_solver::Module = DefaultDenseSolver` \
-   Linear solver used for solving Schur complement system
-- `schur_custom_partition::Bool = false` \
-   If `false`, Schur solver automatically detects the partition using `Metis`. If `true`, the partition information given in `schur_part` is used. `schur_num_parts` and `schur_part` should be properly set by the user. When using with `Plasmo`, `schur_num_parts` and `schur_part` are automatically set by the `Plasmo` interface.
-- `schur_num_parts::Int = 2` \
-   Number of parts (excluding the parent node). Valid range is ``[1,\infty)``
-- `schur_part::Vector{Int} = Int[]` \
-   Custom partition information in a vector form. The parent node should be labeled as `0`. Only valid if `schur_custom_partition` is `true`.
-
-#### Schwarz (requires `MadNLPGraphs`)
-- `schwarz_subproblem_solver::Module = DefaultSubproblemSolver` \
-   Linear solver used for solving subproblem. Valid values are: {`MadNLPUmfpack`, `MadNLPPardisoMKL`, `MadNLPMa27`, `MadNLPMa57`, `MadNLPMa77`, `MadNLPMa86`, `MadNLPMa97`, `MadNLPPardiso`}.
-- `schwarz_custom_partition::Bool = false` \
-    If `false`, Schwarz solver automatically detects the partition using `Metis`. If `true`, the partition information given in `schur_part` is used. `schur_num_parts` and `schur_part` should be properly set by the user. When using with `Plasmo`, `schur_num_parts` and `schur_part` are automatically set by the `Plasmo` interface.
-- `schwarz_num_parts::Int = 2` \
-    Number of parts. Valid range is ``[1,\infty)``
-- `schwarz_part::Vector{Int} = Int[]` \
-    Custom partition information in a vector form. Only valid if `schwar_custom_partition` is `true`.
-- `schwarz_num_parts_upper::Int = 0` \
-    Number of parts in upper level partition. If `schwarz_num_parts_upper!=0`, a bilevel partitioning scheme is used. Valid range is ``[1,\infty)``
-- `schwarz_part_upper::Vector{Int} = Int[]` \
-    Custom partition for the upper level partition.
-- `schwarz_fully_improve_subproblem_solver::Bool = true` \
-    If `true`, the subproblem solvers are fully improved when the linear solver is initialized.
-- `schwarz_max_expand_factor::Int = 4` \
-    The size of overlap is fully saturated when the `improve!` is called `schwarz_max_expand_factor-1` times. Valid range is ``[2,\infty)``.
-
-### Iterator Options
-#### Richardson
-- `richardson_max_iter::Int = 10` \
-    Maximum number of Richardson iteration steps. Valid range is ``[1,\infty)``.
-- `richardson_tol::Float64 = 1e-10` \
-    Convergence tolerance of Richardson iteration. Valid range is ``(0,\infty)``.
-- `richardson_acceptable_tol::Float64 = 1e-5` \
-    Acceptable convergence tolerance of Richardson iteration. If the Richardson iteration counter exceeds `richardson_max_iter` without satisfying the convergence criteria set with `richardson_tol`, the Richardson solver checks whether the acceptable convergence criteria set with `richardson_acceptable_tol` is satisfied; if the acceptable convergence criteria is satisfied, the computed step is used; otherwise, the augmented system is treated to be singular. Valid range is ``(0,\infty)``.
-
-#### Krylov (requires `MadNLPIterative`)
-- `krylov_max_iter::Int = 10` \
-    Maximum number of Krylov iteration steps. Valid range is ``[1,\infty)``.
-- `krylov_tol::Float64 = 1e-10` \
-    Convergence tolerance of Krylov iteration. Valid range is ``(0,\infty)``.
-- `krylov_acceptable_tol::Float64 = 1e-5` \
-    Acceptable convergence tolerance of Krylov iteration. If the Krylov iteration counter exceeds `krylov_max_iter` without satisfying the convergence criteria set with `krylov_tol`, the Krylov solver checks whether the acceptable convergence criteria set with `krylov_acceptable_tol` is satisfied; if the acceptable convergence criteria is satisfied, the computed step is used; otherwise, the augmented system is treated to be singular. Valid range is ``(0,\infty)``.
-- `krylov_restart::Int = 5` \
-    Maximum Krylov iteration before restarting. Valid range is ``[1,\infty)``.
-
-### Reference
-[Bunch, 1977]: J R Bunch and L Kaufman, Some stable methods for calculating inertia and solving symmetric linear systems, Mathematics of Computation 31:137 (1977), 163-179.
-
-[Greif, 2014]: Greif, Chen, Erin Moulding, and Dominique Orban. "Bounds on eigenvalues of matrices arising from interior-point methods." SIAM Journal on Optimization 24.1 (2014): 49-83.
-
-[Wächter, 2006]: Wächter, Andreas, and Lorenz T. Biegler. "On the implementation of an interior-point filter line-search algorithm for large-scale nonlinear programming." Mathematical programming 106.1 (2006): 25-57.
-
-[Chiang, 2016]: Chiang, Nai-Yuan, and Victor M. Zavala. "An inertia-free filter line-search algorithm for large-scale nonlinear programming." Computational Optimization and Applications 64.2 (2016): 327-354.
diff --git a/Project.toml b/Project.toml
index 9c65576d..65817575 100644
--- a/Project.toml
+++ b/Project.toml
@@ -1,14 +1,11 @@
 name = "MadNLP"
 uuid = "2621e9c9-9eb4-46b1-8089-e8c72242dfb6"
-authors = ["Sungho Shin"]
-repo = "https://github.com/sshin23/MadNLP.jl.git"
-version = "0.6.0"
+version = "0.8.4"
 
 [deps]
-Libdl = "8f399da3-3557-5675-b5ff-fb832c97cbdb"
+LDLFactorizations = "40e66cde-538c-5869-a4ad-c39174c6795b"
 LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
 Logging = "56ddb016-857b-54e1-b83d-db4d58db5568"
-MathOptInterface = "b8f27783-ece8-5eb3-8dc8-9495eed66fee"
 NLPModels = "a4795742-8479-5a88-8948-cc11e1c8c1a6"
 Pkg = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f"
 Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7"
@@ -16,19 +13,27 @@ SolverCore = "ff4d7338-4cf1-434d-91df-b86cb86fb843"
 SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"
 SuiteSparse = "4607b0f0-06f3-5cda-b6b1-a6196a1729e9"
 
+[weakdeps]
+MathOptInterface = "b8f27783-ece8-5eb3-8dc8-9495eed66fee"
+
+[extensions]
+MadNLPMOI = "MathOptInterface"
+
 [compat]
 MINLPTests = "~0.5"
-MadNLPTests = "~0.3"
+MadNLPTests = "0.5"
 MathOptInterface = "1"
-NLPModels = "~0.17.2, 0.18, 0.19"
+NLPModels = "~0.17.2, 0.18, 0.19, 0.20, 0.21"
 SolverCore = "~0.3"
-julia = "1.6"
+LDLFactorizations = "0.10"
+julia = "1.9"
 
 [extras]
 MINLPTests = "ee0a3090-8ee9-5cdb-b8cb-8eeba3165522"
 MadNLPTests = "b52a2a03-04ab-4a5f-9698-6a2deff93217"
+Quadmath = "be4d8f0f-7fa4-5f49-b795-2f01399ab2dd"
 Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
 
 [targets]
-test = ["Test", "MadNLPTests", "MINLPTests", "Random"]
+test = ["Test", "MadNLPTests", "MINLPTests", "Random", "MathOptInterface", "Quadmath"]
diff --git a/README.md b/README.md
index d5a23012..3ecb9f68 100644
--- a/README.md
+++ b/README.md
@@ -1,92 +1,90 @@
-![Logo](logo-full.svg)
+![logo](https://github.com/MadNLP/MadNLP.jl/blob/master/logo-full.svg)
+
+*A [nonlinear programming](https://en.wikipedia.org/wiki/Nonlinear_programming) solver based on the filter line-search [interior point method](https://en.wikipedia.org/wiki/Interior-point_method) (as in [Ipopt](https://github.com/coin-or/Ipopt)) that can handle/exploit diverse classes of data structures, either on [host](https://en.wikipedia.org/wiki/Central_processing_unit) or [device](https://en.wikipedia.org/wiki/Graphics_processing_unit) memories.*
+
 ---
-| **Documentation** | **Build Status** | **Coverage** | **DOI** |
-|:-----------------:|:----------------:|:----------------:|:----------------:|
-| [![doc](https://img.shields.io/badge/docs-dev-blue.svg)](https://madnlp.github.io/MadNLP.jl/dev) | [![build](https://github.com/MadNLP/MadNLP.jl/workflows/build/badge.svg?branch=dev%2Fgithub_actions)](https://github.com/MadNLP/MadNLP.jl/actions?query=workflow%3Abuild) | [![codecov](https://codecov.io/gh/MadNLP/MadNLP.jl/branch/master/graph/badge.svg?token=MBxH2AAu8Z)](https://codecov.io/gh/MadNLP/MadNLP.jl) | [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.5825776.svg)](https://doi.org/10.5281/zenodo.5825776) |
 
-MadNLP is a [nonlinear programming](https://en.wikipedia.org/wiki/Nonlinear_programming) (NLP) solver, purely implemented in [Julia](https://julialang.org/). MadNLP implements a filter line-search algorithm, as that used in [Ipopt](https://github.com/coin-or/Ipopt). MadNLP seeks to streamline the development of modeling and algorithmic paradigms in order to exploit structures and to make efficient use of high-performance computers.
+| **License** | **Documentation** | **Build Status** | **Coverage** | **DOI** |
+|:-----------------:|:-----------------:|:----------------:|:----------------:|:----------------:|
+| [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://github.com/MadNLP/MadNLP.jl/blob/master/LICENSE) | [![doc](https://img.shields.io/badge/docs-stable-blue.svg)](https://madnlp.github.io/MadNLP.jl/stable) [![doc](https://img.shields.io/badge/docs-dev-blue.svg)](https://madnlp.github.io/MadNLP.jl/dev) | [![build](https://github.com/MadNLP/MadNLP.jl/actions/workflows/test.yml/badge.svg)](https://github.com/MadNLP/MadNLP.jl/actions/workflows/test.yml) | [![codecov](https://codecov.io/gh/MadNLP/MadNLP.jl/branch/master/graph/badge.svg?token=MBxH2AAu8Z)](https://codecov.io/gh/MadNLP/MadNLP.jl) | [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.5825776.svg)](https://doi.org/10.5281/zenodo.5825776) |
 
 ## Installation
+
 ```julia
 pkg> add MadNLP
 ```
+
 Optionally, various extension packages can be installed together:
 ```julia
-pkg> add MadNLPHSL, MadNLPPardiso, MadNLPMumps, MadNLPGPU, MadNLPGraph, MadNLPKrylov
+pkg> add MadNLPHSL, MadNLPPardiso, MadNLPMumps, MadNLPGPU
 ```
-These packages are stored in the `lib` subdirectory within the main MadNLP repository. Some extension packages may require additional dependencies or specific hardware. For the instructions for the build procedure, see the following links: [MadNLPHSL](https://github.com/MadNLP/MadNLP.jl/tree/master/lib/MadNLPHSL), [MadNLPPardiso](https://github.com/MadNLP/MadNLP.jl/tree/master/lib/MadNLPHSL), [MadNLPGPU](https://github.com/MadNLP/MadNLP.jl/tree/master/lib/MadNLPGPU).
 
+These packages are stored in the `lib` subdirectory within the main MadNLP repository. Some extension packages may require additional dependencies or specific hardware. For the instructions for the build procedure, see the following links:
+
+ * [MadNLPHSL](https://github.com/MadNLP/MadNLP.jl/tree/master/lib/MadNLPHSL)
+ * [MadNLPMumps](https://github.com/MadNLP/MadNLP.jl/tree/master/lib/MadNLPMumps)
+ * [MadNLPPardiso](https://github.com/MadNLP/MadNLP.jl/tree/master/lib/MadNLPHSL)
+ * [MadNLPGPU](https://github.com/MadNLP/MadNLP.jl/tree/master/lib/MadNLPGPU)
 
 ## Usage
+
 ### Interfaces
+
 MadNLP is interfaced with modeling packages:
+
 - [JuMP](https://github.com/jump-dev/JuMP.jl)
-- [Plasmo](https://github.com/zavalab/Plasmo.jl)
 - [NLPModels](https://github.com/JuliaSmoothOptimizers/NLPModels.jl).
-Users can pass various options to MadNLP also through the modeling packages. The interface-specific syntaxes are shown below. To see the list of MadNLP solver options, check the [OPTIONS.md](https://github.com/MadNLP/MadNLP/blob/master/OPTIONS.md) file.
+
+Users can pass various options to MadNLP also through the modeling packages. The interface-specific syntax are shown below. To see the list of MadNLP solver options, check the [documentation](https://madnlp.github.io/MadNLP.jl/dev/options/).
 
 #### JuMP interface
+
 ```julia
 using MadNLP, JuMP
-
 model = Model(()->MadNLP.Optimizer(print_level=MadNLP.INFO, max_iter=100))
 @variable(model, x, start = 0.0)
 @variable(model, y, start = 0.0)
 @NLobjective(model, Min, (1 - x)^2 + 100 * (y - x^2)^2)
-
 optimize!(model)
-
 ```
 
 #### NLPModels interface
+
 ```julia
 using MadNLP, CUTEst
 model = CUTEstModel("PRIMALC1")
 madnlp(model, print_level=MadNLP.WARN, max_wall_time=3600)
 ```
 
-#### Plasmo interface (requires extension `MadNLPGraph`)
-```julia
-using MadNLP, MadNLPGraph, Plasmo
-
-graph = OptiGraph()
-@optinode(graph,n1)
-@optinode(graph,n2)
-@variable(n1,0 <= x <= 2)
-@variable(n1,0 <= y <= 3)
-@constraint(n1,x+y <= 4)
-@objective(n1,Min,x)
-@variable(n2,x)
-@NLnodeconstraint(n2,exp(x) >= 2)
-@linkconstraint(graph,n1[:x] == n2[:x])
-
-MadNLP.optimize!(graph; print_level=MadNLP.DEBUG, max_iter=100)
-
-```
-
 ### Linear Solvers
+
 MadNLP is interfaced with non-Julia sparse/dense linear solvers:
 - [Umfpack](https://people.engr.tamu.edu/davis/suitesparse.html)
-- [MKL-Pardiso](https://software.intel.com/content/www/us/en/develop/documentation/mkl-developer-reference-fortran/top/sparse-solver-routines/intel-mkl-pardiso-parallel-direct-sparse-solver-interface.html)
-- [MKL-Lapack](https://software.intel.com/content/www/us/en/develop/documentation/mkl-developer-reference-fortran/top/lapack-routines.html)
+- [Lapack](https://software.intel.com/content/www/us/en/develop/documentation/mkl-developer-reference-fortran/top/lapack-routines.html)
 - [HSL solvers](http://www.hsl.rl.ac.uk/ipopt/) (requires extension)
 - [Pardiso](https://www.pardiso-project.org/) (requires extension)
+- [Pardiso-MKL](https://software.intel.com/content/www/us/en/develop/documentation/mkl-developer-reference-fortran/top/sparse-solver-routines/intel-mkl-pardiso-parallel-direct-sparse-solver-interface.html) (requires extension)
 - [Mumps](http://mumps.enseeiht.fr/)  (requires extension)
 - [cuSOLVER](https://docs.nvidia.com/cuda/cusolver/index.html) (requires extension)
+- [cuDSS](https://docs.nvidia.com/cuda/cudss/index.html) (requires extension)
 
 Each linear solver in MadNLP is a Julia type, and the `linear_solver` option should be specified by the actual type. Note that the linear solvers are always exported to `Main`.
 
-#### Built-in Solvers: Umfpack, PardisoMKL, LapackCPU
+#### Built-in Solvers: Umfpack, LapackCPU
+
 ```julia
 using MadNLP, JuMP
 # ...
 model = Model(()->MadNLP.Optimizer(linear_solver=UmfpackSolver)) # default
+model = Model(()->MadNLP.Optimizer(linear_solver=LDLSolver))     # works only for convex problems
+model = Model(()->MadNLP.Optimizer(linear_solver=CHOLMODSolver)) # works only for convex problems
 model = Model(()->MadNLP.Optimizer(linear_solver=LapackCPUSolver))
 ```
 
 #### HSL (requires extension `MadNLPHSL`)
+
 ```julia
-using MadNLP, MadNLPHSL, JuMP
+using MadNLPHSL, JuMP
 # ...
 model = Model(()->MadNLP.Optimizer(linear_solver=Ma27Solver))
 model = Model(()->MadNLP.Optimizer(linear_solver=Ma57Solver))
@@ -96,43 +94,45 @@ model = Model(()->MadNLP.Optimizer(linear_solver=Ma97Solver))
 ```
 
 #### Mumps (requires extension `MadNLPMumps`)
+
 ```julia
-using MadNLP, MadNLPMumps, JuMP
+using MadNLPMumps, JuMP
 # ...
 model = Model(()->MadNLP.Optimizer(linear_solver=MumpsSolver))
 ```
 
 #### Pardiso (requires extension `MadNLPPardiso`)
+
 ```julia
-using MadNLP, MadNLPPardiso, JuMP
+using MadNLPPardiso, JuMP
 # ...
 model = Model(()->MadNLP.Optimizer(linear_solver=PardisoSolver))
 model = Model(()->MadNLP.Optimizer(linear_solver=PardisoMKLSolver))
 ```
 
-#### LapackGPU (requires extension `MadNLPGPU`)
-```julia
-using MadNLP, MadNLPGPU, JuMP
-# ...
-model = Model(()->MadNLP.Optimizer(linear_solver=LapackGPUSolver))
-```
-
+#### CUDA (requires extension `MadNLPGPU`)
 
-#### Schur and Schwarz (requires extension `MadNLPGraph`)
 ```julia
-using MadNLP, MadNLPGraph, JuMP
+using MadNLPGPU, JuMP
 # ...
-model = Model(()->MadNLP.Optimizer(linear_solver=MadNLPSchwarz))
-model = Model(()->MadNLP.Optimizer(linear_solver=MadNLPSchur))
-```
-The solvers in `MadNLPGraph` (`Schur` and `Schwarz`) use multi-thread parallelism; thus, Julia session should be started with `-t` flag.
-```sh
-julia -t 16 # to use 16 threads
+model = Model(()->MadNLP.Optimizer(linear_solver=LapackGPUSolver))  # for dense problems
+model = Model(()->MadNLP.Optimizer(linear_solver=CUDSSSolver))      # for sparse problems
+model = Model(()->MadNLP.Optimizer(linear_solver=CuCholeskySolver)) # for sparse problems
+model = Model(()->MadNLP.Optimizer(linear_solver=GLUSolver))        # for sparse problems
+model = Model(()->MadNLP.Optimizer(linear_solver=RFSolver))         # for sparse problems
 ```
 
 ## Citing MadNLP.jl
+
 If you use MadNLP.jl in your research, we would greatly appreciate your citing it.
+
 ```bibtex
+@article{shin2023accelerating,
+  title={Accelerating optimal power flow with {GPU}s: {SIMD} abstraction of nonlinear programs and condensed-space interior-point methods},
+  author={Shin, Sungho and Pacaud, Fran{\c{c}}ois and Anitescu, Mihai},
+  journal={arXiv preprint arXiv:2307.16830},
+  year={2023}
+}
 @article{shin2020graph,
   title={Graph-Based Modeling and Decomposition of Energy Infrastructures},
   author={Shin, Sungho and Coffrin, Carleton and Sundar, Kaarthik and Zavala, Victor M},
@@ -141,5 +141,6 @@ If you use MadNLP.jl in your research, we would greatly appreciate your citing i
 }
 ```
 
-## Bug reports and support
-Please report issues and feature requests via the [Github issue tracker](https://github.com/MadNLP/MadNLP.jl/issues).
+## Supporting MadNLP.jl
+- Please report issues and feature requests via the [GitHub issue tracker](https://github.com/MadNLP/MadNLP.jl/issues).
+- Questions are welcome at [GitHub discussion forum](https://github.com/MadNLP/MadNLP.jl/discussions).
diff --git a/benchmark/.Project.toml b/benchmark/.Project.toml
deleted file mode 100644
index 5490797b..00000000
--- a/benchmark/.Project.toml
+++ /dev/null
@@ -1,8 +0,0 @@
-[deps]
-CUTEst = "1b53aba6-35b6-5f92-a507-53c67d53f819"
-Ipopt = "b6b21f68-93f8-5de0-b562-5493be1d77c9"
-MathOptInterface = "b8f27783-ece8-5eb3-8dc8-9495eed66fee"
-NLPModelsIpopt = "f4238b75-b362-5c4c-b852-0801c9a21d71"
-Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80"
-PowerModels = "c36e90e8-916a-50a6-bd94-075b64ef4655"
-JuMP = "4076af6c-e467-56ae-b986-b466b2749572"
\ No newline at end of file
diff --git a/benchmark/README.md b/benchmark/README.md
deleted file mode 100644
index d7a88628..00000000
--- a/benchmark/README.md
+++ /dev/null
@@ -1,9 +0,0 @@
-Running benchmark requires pacakge ArgParse. First install ArgParse with:
-```
-julia -e 'import Pkg; Pkg.add("ArgParse")'
-```
-
-To get a help for running benchmark, run:
-```
-julia runbenchmark.jl --help
-```
diff --git a/benchmark/benchmark-cutest.jl b/benchmark/benchmark-cutest.jl
deleted file mode 100644
index b3b79ec5..00000000
--- a/benchmark/benchmark-cutest.jl
+++ /dev/null
@@ -1,111 +0,0 @@
-include("config.jl")
-Pkg.add(PackageSpec(name="CUTEst",rev="main")) # will be removed once the new CUTEst version is released
-
-@everywhere using CUTEst
-
-if SOLVER == "master" || SOLVER == "current"
-    @everywhere begin
-        using MadNLP, MadNLPHSL
-        solver = nlp -> madnlp(nlp,linear_solver=MadNLPMa57,max_wall_time=900., print_level=PRINT_LEVEL)
-        function get_status(code::MadNLP.Status)
-            if code == MadNLP.SOLVE_SUCCEEDED
-                return 1
-            elseif code == MadNLP.SOLVED_TO_ACCEPTABLE_LEVEL
-                return 2
-            else
-                return 3
-            end
-        end
-    end
-elseif SOLVER == "ipopt"
-    @everywhere begin
-        solver = nlp -> ipopt(nlp,linear_solver="ma57",max_cpu_time=900., print_level=PRINT_LEVEL)
-        using NLPModelsIpopt
-        function get_status(code::Symbol)
-            if code == :first_order
-                return 1
-            elseif code == :acceptable
-                return 2
-            else
-                return 3
-            end
-        end
-    end
-elseif SOLVER == "knitro"
-    # TODO
-else
-    error("Proper SOLVER should be given")
-end
-
-
-@everywhere function decodemodel(name)
-    println("Decoding $name")
-    finalize(CUTEstModel(name))
-end
-
-@everywhere function evalmodel(name,solver;gcoff=false)
-    println("Solving $name")
-    nlp = CUTEstModel(name; decode=false)
-    try
-        gcoff && GC.enable(false);
-        mem = @allocated begin
-            t = @elapsed begin
-                retval = solver(nlp)
-            end
-        end
-        gcoff && GC.enable(true);
-        finalize(nlp)
-        return (status=get_status(retval.status),time=t,mem=mem,iter=retval.iter)
-    catch e
-        finalize(nlp)
-        throw(e)
-    end
-end
-
-function benchmark(solver,probs;warm_up_probs = [], decode = false)
-    println("Warming up (forcing JIT compile)")
-    decode && broadcast(decodemodel,warm_up_probs)
-    r = [remotecall.(prob->evalmodel(prob,solver;gcoff=GCOFF),i,warm_up_probs) for i in procs() if i!= 1]
-    fetch.(r)
-
-    println("Decoding problems")
-    decode && broadcast(decodemodel,probs)
-
-    println("Solving problems")
-    retvals = pmap(prob->evalmodel(prob,solver;gcoff=GCOFF),probs)
-    status = [retval.status for retval in retvals]
-    time   = [retval.time for retval in retvals]
-    mem    = [retval.mem for retval in retvals]
-    iter   = [retval.iter for retval in retvals]
-    status,time,mem,iter
-end
-
-exclude = [
-    # MadNLP running into error
-    # Ipopt running into error
-    "EG3", # lfact blows up
-    # Problems that are hopelessly large
-    "TAX213322","TAXR213322","TAX53322","TAXR53322",
-    "YATP1LS","YATP2LS","YATP1CLS","YATP2CLS",
-    "CYCLOOCT","CYCLOOCF",
-    "LIPPERT1",
-    "GAUSSELM",
-    "BA-L52LS","BA-L73LS","BA-L21LS"
-]
-
-if QUICK
-    probs = readdlm("cutest-quick-names.csv")[:]
-else
-    probs = CUTEst.select()
-end
-
-filter!(e->!(e in exclude),probs)
-
-status,time,mem,iter = benchmark(solver,probs;warm_up_probs = ["EIGMINA"], decode = DECODE)
-
-f = !QUICK ? "" : "-quick"
-writedlm("name-cutest$f.csv",probs,',')
-writedlm("status-cutest-$(SOLVER)$f.csv",status,',')
-writedlm("time-cutest-$(SOLVER)$f.csv",time,',')
-writedlm("mem-cutest-$(SOLVER)$f.csv",mem,',')
-writedlm("iter-cutest-$(SOLVER)$f.csv",iter,',')
diff --git a/benchmark/benchmark-power.jl b/benchmark/benchmark-power.jl
deleted file mode 100644
index 645223d3..00000000
--- a/benchmark/benchmark-power.jl
+++ /dev/null
@@ -1,129 +0,0 @@
-include("config.jl")
-
-@everywhere begin
-    if haskey(ENV, "PGLIB_PATH")
-        const PGLIB_PATH = ENV["PGLIB_PATH"]
-    else
-        error("Unable to find path to PGLIB benchmark.\n"*
-            "Please set environment variable `PGLIB_PATH` to run benchmark with PowerModels.jl")
-    end
-
-    using PowerModels, MathOptInterface, JuMP
-    const MOI = MathOptInterface
-    
-    PowerModels.silence()
-
-    function evalmodel(prob,solver;gcoff=false)
-        case,type = prob
-        pm = instantiate_model(joinpath(PGLIB_PATH,case),type,PowerModels.build_opf)
-        println("Solving $(get_name(pm))")
-        gcoff && GC.enable(false);
-        retval = solver(pm)
-        gcoff && GC.enable(true);
-        return retval
-    end
-
-    function get_status(code::MOI.TerminationStatusCode)
-        if code == MOI.LOCALLY_SOLVED
-            return 1
-        elseif code == MOI.ALMOST_OPTIMAL
-            return 2
-        else
-            return 3
-        end
-    end
-
-    get_name(pm) = "$(pm.data["name"])-$(typeof(pm))"
-end
-
-if SOLVER == "master" || SOLVER == "current"
-    @everywhere begin
-        using MadNLP, MadNLPHSL
-        solver = pm -> begin
-            set_optimizer(pm.model,()->
-                MadNLP.Optimizer(linear_solver=MadNLPMa57,max_wall_time=900.,tol=1e-6, print_level=PRINT_LEVEL))
-            mem=@allocated begin
-                t=@elapsed begin
-                    optimize_model!(pm)
-                end
-            end
-            return get_status(termination_status(pm.model)),t,mem,barrier_iterations(pm.model)
-        end
-    end
-elseif SOLVER == "ipopt"
-    @everywhere begin
-        using Ipopt
-        
-        const ITER = [-1]
-        function ipopt_callback(
-            prob::IpoptProblem,alg_mod::Cint,iter_count::Cint,obj_value::Float64,
-            inf_pr::Float64,inf_du::Float64,mu::Float64,d_norm::Float64,
-            regularization_size::Float64,alpha_du::Float64,alpha_pr::Float64,ls_trials::Cint)
-            
-            ITER[] += 1
-            return true
-        end
-
-        solver = pm -> begin
-            ITER[] = 0
-            set_optimizer(pm.model,()->
-                Ipopt.Optimizer(linear_solver="ma57",max_cpu_time=900.,tol=1e-6, print_level=PRINT_LEVEL))
-            MOI.set(pm.model, Ipopt.CallbackFunction(), ipopt_callback)
-            mem=@allocated begin
-                t=@elapsed begin
-                    optimize_model!(pm)
-                end
-            end
-            return get_status(termination_status(pm.model)),t,mem,ITER[]
-        end
-    end
-elseif SOLVER == "knitro"
-    # TODO
-else
-    error("Proper SOLVER should be given")
-end
-
-
-function benchmark(solver,probs;warm_up_probs = [])
-    println("Warming up (forcing JIT compile)")
-    warm_up_pms = [
-        instantiate_model(joinpath(PGLIB_PATH,case),type,PowerModels.build_opf)
-        for (case,type) in warm_up_probs]
-    println(get_name.(warm_up_pms))
-    rs = [remotecall.(solver,i,warm_up_pms) for i in procs() if i!= 1]
-    ws = [wait.(r) for r in rs]
-    fs= [fetch.(r) for r in rs]
-
-    println("Solving problems")
-    retvals = pmap(prob->evalmodel(prob,solver;gcoff=GCOFF),probs)
-    
-    status = [status for (status,time,mem,iter) in retvals]
-    time   = [time for (status,time,mem,iter) in retvals]
-    mem    = [mem for (status,time,mem,iter) in retvals]
-    iter   = [iter for (status,time,mem,iter) in retvals]
-        
-    return status,time,mem,iter
-end
-
-if QUICK
-    cases = filter!(e->(occursin("pglib_opf_case",e) && occursin("pegase",e)),readdir(PGLIB_PATH))
-    types = [ACPPowerModel, ACRPowerModel]
-else
-    cases = filter!(e->occursin("pglib_opf_case",e),readdir(PGLIB_PATH))
-    types = [ACPPowerModel, ACRPowerModel, ACTPowerModel,
-             DCPPowerModel, DCMPPowerModel, NFAPowerModel,
-             DCPLLPowerModel,LPACCPowerModel, SOCWRPowerModel,
-             QCRMPowerModel,QCLSPowerModel]
-end
-probs = [(case,type) for case in cases for type in types]
-name =  ["$case-$type" for case in cases for type in types]
-
-status,time,mem,iter = benchmark(solver,probs;warm_up_probs = [
-    ("pglib_opf_case1888_rte.m",ACPPowerModel)
-])
-
-writedlm("name-power.csv",name,',')
-writedlm("status-power-$(SOLVER).csv",status)
-writedlm("time-power-$(SOLVER).csv",time)
-writedlm("mem-power-$(SOLVER).csv",mem)
-writedlm("iter-power-$(SOLVER).csv",iter)
diff --git a/benchmark/config.jl b/benchmark/config.jl
deleted file mode 100644
index 58c3acf1..00000000
--- a/benchmark/config.jl
+++ /dev/null
@@ -1,38 +0,0 @@
-using Pkg, Distributed, DelimitedFiles
-
-const NP = ARGS[1]
-const SOLVER = ARGS[2]
-const VERBOSE = ARGS[3] == "true"
-const QUICK = ARGS[4] == "true"
-const GCOFF = ARGS[5] == "true"
-const DECODE = ARGS[6] == "true"
-
-addprocs(parse(Int,NP),exeflags="--project=.")
-Pkg.instantiate()
-
-if SOLVER == "master"
-    Pkg.add(PackageSpec(name="MadNLP",rev="master"))
-    Pkg.add(PackageSpec(name="MadNLPHSL",rev="master"))
-    Pkg.build("MadNLPHSL")
-elseif SOLVER == "current"
-    Pkg.develop(path=joinpath(@__DIR__,".."))
-    Pkg.develop(path=joinpath(@__DIR__,"..","lib","MadNLPHSL"))
-    Pkg.build("MadNLPHSL")
-elseif SOLVER == "ipopt"
-elseif SOLVER == "knitro"
-else
-    error("Proper ARGS should be given")
-end
-
-# Set verbose option
-if SOLVER == "ipopt"
-    const PRINT_LEVEL = VERBOSE ? 5 : 0
-elseif SOLVER == "knitro"
-    const PRINT_LEVEL = VERBOSE ? 3 : 0
-else
-    using MadNLP
-    const PRINT_LEVEL = VERBOSE ? MadNLP.INFO : MadNLP.ERROR
-end
-
-# Set quick option
-
diff --git a/benchmark/cutest-quick-names.csv b/benchmark/cutest-quick-names.csv
deleted file mode 100644
index 33b05671..00000000
--- a/benchmark/cutest-quick-names.csv
+++ /dev/null
@@ -1,229 +0,0 @@
-PRIMALC1
-DIXMAANI
-HIER13
-LUKVLI7
-GAUSS2
-LUKSAN13LS
-CHARDIS1
-A5NSDSIL
-QPCBOEI1
-POLAK4
-DUAL2
-EXPFITA
-VAREIGVL
-MPC2
-BLOWEYA
-DECONVB
-MSS1
-POWELLBC
-ACOPP57
-WALL50
-FBRAIN2
-ACOPP300
-AUG2D
-HS106
-GMNCASE2
-LUKVLE8
-READING2
-MAXLIKA
-CHEBYQAD
-HYDROELM
-GULFNE
-CLEUVEN4
-HAIFAL
-JUDGENE
-DITTERT
-TRIGON1NE
-OBSTCLAE
-READING6
-SBRYBND
-ARGLINC
-CVXQP2
-TABLE8
-NINENEW
-STEENBRA
-BA-L1SP
-EXPFITNE
-LUKSAN17
-DUALC5
-STCQP1
-DEGENQP
-DEGDIAG
-LEUVEN7
-DALLASM
-READING8
-HS101
-GENROSEBNE
-EIGENALS
-READING7
-OET3
-CHANDHEQ
-YATP1LS
-OSCIPATH
-SEMICN2U
-MODBEALENE
-JANNSON4
-DTOC1NC
-KSS
-TABLE1
-DRCAV2LQ
-MGH17SLS
-BRAINPC2
-PROBPENL
-MGH17S
-DIAGPQB
-DEMBO7
-HS119
-PORTSNQP
-YATP1CNE
-THURBER
-VESUVIOU
-TAX213322
-NGONE
-MRIBASIS
-EXPFITC
-FBRAINNE
-LINVERSENE
-HYDCAR6LS
-GMNCASE4
-ZAMB2-11
-ALJAZZAF
-HIMMELBK
-WOODSNE
-LUKVLI10
-CHANNEL
-ORBIT2
-EIGENA2
-ACOPP118
-CHNRSNBM
-LHAIFAM
-NASH
-CYCLIC3LS
-CYCLIC3
-BA-L49LS
-HS99
-CATENA
-CHWIRUT1
-OPTPRLOC
-HYDROELL
-BIGBANK
-OSORIO
-SPINLS
-MNISTS0
-ANTWERP
-PORTFL4
-PDE1
-CURLY20
-DEGENLPA
-LUBRIFC
-MANCINONE
-DALE
-HATFLDC
-INTEGREQ
-NET1
-LUKSAN12
-UBH5
-AGG
-WATSONNE
-TAX13322
-PRIMAL1
-10FOLDTR
-QPCBLEND
-CYCLOOCT
-FIVE20B
-HUESTIS
-TWIRISM1
-DMN37142
-LIARWHDNE
-COOLHANS
-MSS3
-BDRY2
-TRO5X5
-MSS2
-TAX53322
-CORE1
-LINSPANH
-ZAMB2-9
-KSIP
-CHAINWOONE
-DEGTRIDL
-LINCONT
-TWIRIBG1
-POWER
-DMN37143
-PRIMAL3
-EIGENBCO
-TRIMLOSS
-SPANHYD
-OPTCNTRL
-ROSEPETAL
-SANTALS
-PRIMALC8
-SPECANNE
-READING5
-EXPQUAD
-ARGLCLE
-CHNRSBNE
-MODBEALE
-EIGENC
-ARGTRIG
-STATIC3
-CRESC132
-CHANDHEU
-KISSING2
-EXPLIN
-GILBERT
-GPP
-LUKVLI9
-RES
-LCH
-MUONSINE
-BA-L73
-TRO21X5
-SSEBNLN
-ELATTAR
-TWOD
-PRIMAL4
-COATING
-WALL100
-MSQRTA
-PRIMAL2
-ODNAMUR
-GENROSENE
-TRIGON2NE
-CHARDIS0
-SPMSQRT
-QING
-SMMPSF
-NUFFIELD
-GOFFIN
-ELEC
-BA-L16LS
-SYNPOP24
-ZIGZAG
-SSEBLIN
-BA-L1
-FCCU
-CONT5-QP
-QPNBAND
-AIRPORT
-FEEDLOC
-KISSING
-FERRISDC
-MAKELA4
-VANDANIUMS
-AVION2
-BROWNALE
-ROSEPETAL2
-DEGENQPC
-DRUGDISE
-QINGNE
-BA-L52LS
-JANNSON3
-NONMSQRTNE
-DECONVC
-BA-L52
-WALL10
-MODEL
-OPTMASS
-ORTHREGF
diff --git a/benchmark/plot.jl b/benchmark/plot.jl
deleted file mode 100644
index 1b24c8e6..00000000
--- a/benchmark/plot.jl
+++ /dev/null
@@ -1,84 +0,0 @@
-using Plots, DelimitedFiles
-
-const CLASSES = filter(e-> e in ["cutest","power"], ARGS)
-const SOLVERS = filter(e-> e in ["current","master","ipopt","knitro"], ARGS)
-
-const LABELS = Dict(
-    "current" => "MadNLP (dev)",
-    "master" => "MadNLP (master)",
-    "ipopt" => "Ipopt",
-    "knitro" => "Knitro",
-)
-
-for class in CLASSES
-    
-    time = Dict()
-    status = Dict()
-    mem = Dict()
-    iter = Dict()
-
-    name = readdlm("name-$class.csv")[:]
-    for solver in SOLVERS
-        status[solver] = readdlm("status-$class-$solver.csv")[:]
-        time[solver] = readdlm("time-$class-$solver.csv")[:]
-        mem[solver] = readdlm("mem-$class-$solver.csv")[:]
-        iter[solver] = readdlm("iter-$class-$solver.csv")[:]
-    end
-
-    for (str,metric) in [("time",time),("iterations",iter),("memory",mem)]
-        relmetric = deepcopy(metric)
-        
-        for i=1:length(name)
-            top = []
-            topstatus = 3
-            for solver in SOLVERS
-                if status[solver][i] < topstatus
-                    empty!(top)
-                    push!(top,solver)
-                    topstatus = status[solver][i]
-                elseif status[solver][i] == topstatus
-                    push!(top,solver)
-                end
-            end
-
-            fastest = Inf
-            for solver in top
-                fastest = min(fastest,metric[solver][i])
-            end
-
-            for solver in SOLVERS
-                if status[solver][i] == 3
-                    relmetric[solver][i] = NaN
-                else
-                    relmetric[solver][i] = log2(relmetric[solver][i]/fastest)
-                end
-            end
-        end
-
-        for solver in SOLVERS
-            filter!(a->!isnan(a),relmetric[solver])
-        end
-
-        p = plot(;
-                 ylim=(0,1),
-                 xlim=(
-                     minimum(minimum(relmetric[solver]) for solver in SOLVERS),
-                     maximum(maximum(relmetric[solver]) for solver in SOLVERS)
-                 ),
-                 xlabel="Not More Than 2ˣ-Times Worse Than Best Solver ($str)",
-                 ylabel="Fraction of Problems Solved",
-                 framestyle=:box,
-                 legend=:bottomright)
-        
-        for solver in SOLVERS
-            y = [0:length(relmetric[solver]);length(relmetric[solver])]/length(name)
-            push!(relmetric[solver],maximum(maximum(relmetric[s]) for s in SOLVERS))
-            push!(relmetric[solver],0)
-            plot!(p,sort(relmetric[solver]),y;
-                  qqline=:none,
-                  linetype=:steppost,
-                  label=LABELS[solver])
-        end
-        savefig(p,"$str-$class.pdf")
-    end
-end
diff --git a/benchmark/runbenchmarks.jl b/benchmark/runbenchmarks.jl
deleted file mode 100644
index 09b94412..00000000
--- a/benchmark/runbenchmarks.jl
+++ /dev/null
@@ -1,74 +0,0 @@
-try
-    using ArgParse
-catch e
-    println("Package ArgParse is required, but not installed. Install now? [y/n]")
-    if readline() == "y"
-        import Pkg
-        Pkg.add("ArgParse")
-        using ArgParse
-    else
-        exit()
-    end
-end
-
-function parse_commandline()
-    s = ArgParseSettings()
-
-    @add_arg_table s begin
-        "--nprocs", "-p"
-            help = "number of worker processors"
-            arg_type = Int
-            default = 1
-        "--verbose", "-v"
-            help = "print the solver outputs"
-            action = :store_true
-        "--gcoff", "-g"
-            help = "turn of the julia garbage collector"
-            action = :store_true
-        "--quick", "-q"
-            help = "run tests with reduced number of instances"
-            action = :store_true
-        "--decode", "-d"
-            help = "decode the cutest instances"
-            action = :store_true
-        "testsets"
-            help = "testsets for benchmark (separated by comma). possible values: cutest, power"
-            required = true
-        "solvers"
-            help = "solvers to benchmark (separated by comma). possible values: current, master, ipopt"
-            required = true
-    end
-
-    return parse_args(s)
-end
-
-function main()
-    pargs = parse_commandline()
-
-    CLASSES = split(pargs["testsets"],",")
-    SOLVERS = split(pargs["solvers"],",")
-
-    # sanity check
-    issubset(CLASSES,["cutest","power"]) || error("argument testsets is incorrect")
-    issubset(SOLVERS,["current","master","ipopt","knitro"]) || error("argument solvers is incorrect")
-
-    PROJECT_PATH = dirname(@__FILE__)
-    
-    cp(
-        joinpath(PROJECT_PATH, ".Project.toml"),
-        joinpath(PROJECT_PATH, "Project.toml"),
-        force=true
-    )
-
-    for class in CLASSES
-        for solver in SOLVERS
-            launch_script = joinpath(PROJECT_PATH, "benchmark-$class.jl")
-            run(`julia --project=$PROJECT_PATH $launch_script $(pargs["nprocs"]) $solver $(pargs["verbose"]) $(pargs["quick"]) $(pargs["gcoff"]) $(pargs["decode"])`)
-        end
-    end
-
-    run(Cmd([["julia","--project=$PROJECT_PATH","plot.jl"];String.(CLASSES);String.(SOLVERS)]))
-
-end
-
-main()
diff --git a/benchmark/smaller-set.jl b/benchmark/smaller-set.jl
deleted file mode 100644
index 5f7365b2..00000000
--- a/benchmark/smaller-set.jl
+++ /dev/null
@@ -1,33 +0,0 @@
-using CUTEst, DelimitedFiles
-
-probs= CUTEst.select()
-
-
-similar(a,b;atol=10,rtol=0.5) = (abs(a-b) < atol) || (abs(a-b)/max(a,b) < rtol)
-
-function is_similar_exists(bin,nlp)
-    for (name,nvar,ncon,nnzh,nnzj) in bin
-        if similar(nvar,nlp.meta.nvar) && similar(ncon,nlp.meta.ncon) && similar(nnzh,nlp.meta.nnzh) && similar(nnzj,nlp.meta.nnzj)
-            return true
-        end
-    end
-    return false
-end
-
-bin = Tuple{String,Int,Int,Int,Int}[]
-
-
-for i=1:length(probs)
-    prob = probs[i]
-    nlp = CUTEstModel(prob)
-    finalize(nlp)
-    
-    if is_similar_exists(bin,nlp)
-        println("#$cnt Skipping $(nlp.meta.name)")
-    else
-        println("#$cnt Putting $(nlp.meta.name) in the bin")
-        push!(bin, (nlp.meta.name,nlp.meta.nvar,nlp.meta.ncon,nlp.meta.nnzh,nlp.meta.nnzj))
-    end
-end
-
-writedlm("cutest-quick-names.csv",[name for (name,nvar,ncon,nnzh,nnzj) in bin],',')
diff --git a/docs/install.jl b/docs/install.jl
index bc80ecf9..44a55c90 100644
--- a/docs/install.jl
+++ b/docs/install.jl
@@ -2,7 +2,7 @@ using Pkg
 
 MADNLP_DIR = pwd()
 
+Pkg.develop(path=joinpath(MADNLP_DIR, "lib", "MadNLPTests"))
 Pkg.develop(path=MADNLP_DIR)
-Pkg.develop(PackageSpec(path=joinpath(dirname(@__FILE__), "../lib/MadNLPTests/")))
 Pkg.instantiate()
 
diff --git a/docs/make.jl b/docs/make.jl
index e47ac5c6..3f10590d 100644
--- a/docs/make.jl
+++ b/docs/make.jl
@@ -4,24 +4,27 @@ using MadNLP
 makedocs(
     sitename = "MadNLP.jl",
     format = Documenter.HTML(
+        assets = ["assets/favicon.ico"],
         prettyurls = Base.get(ENV, "CI", nothing) == "true",
         mathengine = Documenter.KaTeX()
     ),
     modules = [MadNLP],
     repo = "https://github.com/MadNLP/MadNLP.jl/blob/{commit}{path}#{line}",
-    strict = true,
     checkdocs = :exports,
+    clean=true,
     pages = [
         "Home" => "index.md",
         "Installation" => "installation.md",
         "Quickstart" => "quickstart.md",
         "Options" => "options.md",
         "Manual" => [
-            "Solver" => "man/solver.md",
+            "IPM solver" => "man/solver.md",
             "KKT systems" => "man/kkt.md",
             "Linear Solvers" => "man/linear_solvers.md",
         ],
         "API Reference" => [
+            "IPM solver" => "lib/ipm.md",
+            "Callback wrappers" => "lib/callbacks.md",
             "KKT systems" => "lib/kkt.md",
             "Linear Solvers" => "lib/linear_solvers.md",
         ]
diff --git a/docs/src/logo-full.svg b/docs/src/assets/logo-full.svg
similarity index 100%
rename from docs/src/logo-full.svg
rename to docs/src/assets/logo-full.svg
diff --git a/docs/src/assets/logo.ico b/docs/src/assets/logo.ico
new file mode 100644
index 00000000..89b48aff
Binary files /dev/null and b/docs/src/assets/logo.ico differ
diff --git a/docs/src/assets/logo.svg b/docs/src/assets/logo.svg
new file mode 100644
index 00000000..c8eb6f59
--- /dev/null
+++ b/docs/src/assets/logo.svg
@@ -0,0 +1,123 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
+<svg width="100%" height="100%" viewBox="0 0 107 105" version="1.1" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" xml:space="preserve" xmlns:serif="http://www.serif.com/" style="fill-rule:evenodd;clip-rule:evenodd;stroke-linecap:round;stroke-miterlimit:8;">
+    <g transform="matrix(1,0,0,1,-38,-48)">
+        <g transform="matrix(4.01303e-10,7.87402e-05,-7.87402e-05,4.01303e-10,92.501,132.167)">
+            <path d="M-98105,98105.5L98106,98105.5" style="fill:none;fill-rule:nonzero;stroke:rgb(191,191,191);stroke-width:38100px;"/>
+        </g>
+        <g transform="matrix(-6.81908e-05,3.93704e-05,-3.93704e-05,-6.81908e-05,68.7494,95.2914)">
+            <path d="M-98104.5,98105L98105.5,98105" style="fill:none;fill-rule:nonzero;stroke:rgb(191,191,191);stroke-width:38100px;"/>
+        </g>
+        <g transform="matrix(-6.81912e-05,-3.93697e-05,3.93697e-05,-6.81912e-05,87.6869,118.747)">
+            <path d="M-98104.5,98105L98105.5,98105" style="fill:none;fill-rule:nonzero;stroke:rgb(191,191,191);stroke-width:38100px;"/>
+        </g>
+        <g transform="matrix(-6.81912e-05,-3.93697e-05,3.93697e-05,-6.81912e-05,101.681,95.7778)">
+            <path d="M-98104.5,98105L98105.5,98105" style="fill:none;fill-rule:nonzero;stroke:rgb(191,191,191);stroke-width:38100px;"/>
+        </g>
+        <g transform="matrix(6.81908e-05,-3.93704e-05,3.93704e-05,6.81908e-05,73.8938,89.5749)">
+            <path d="M-98104.5,98105L98105.5,98105" style="fill:none;fill-rule:nonzero;stroke:rgb(191,191,191);stroke-width:38100px;"/>
+        </g>
+        <g transform="matrix(1,0,0,1,39.2761,-4.27618)">
+            <path d="M86.268,106.046L72.888,113.77C72.17,114.185 71.253,113.939 70.839,113.221C70.425,112.504 70.67,111.587 71.388,111.172L84.768,103.448C85.485,103.033 86.402,103.279 86.817,103.997C87.231,104.714 86.985,105.631 86.268,106.046ZM83.268,100.849C85.42,99.607 88.172,100.344 89.415,102.497C90.657,104.649 89.92,107.401 87.768,108.644C85.615,109.886 82.863,109.149 81.62,106.997C80.378,104.844 81.115,102.092 83.268,100.849ZM74.388,116.369C72.236,117.611 69.483,116.874 68.241,114.721C66.998,112.569 67.735,109.817 69.888,108.574C72.04,107.332 74.792,108.069 76.035,110.221C77.278,112.374 76.54,115.126 74.388,116.369Z" style="fill:rgb(132,79,156);fill-rule:nonzero;"/>
+        </g>
+        <g transform="matrix(1,0,0,1,39.2761,11.1732)">
+            <path d="M72.888,98.321L59.508,106.046C58.791,106.46 57.873,106.214 57.459,105.497C57.045,104.779 57.291,103.862 58.008,103.448L71.388,95.723C72.105,95.309 73.023,95.554 73.437,96.272C73.851,96.989 73.605,97.907 72.888,98.321ZM69.888,93.125C72.04,91.882 74.792,92.62 76.035,94.772C77.278,96.924 76.54,99.676 74.388,100.919C72.235,102.162 69.483,101.424 68.241,99.272C66.998,97.12 67.735,94.367 69.888,93.125ZM61.008,108.644C58.856,109.887 56.104,109.149 54.861,106.997C53.618,104.844 54.356,102.092 56.508,100.85C58.66,99.607 61.412,100.344 62.655,102.497C63.898,104.649 63.16,107.401 61.008,108.644Z" style="fill:rgb(132,79,156);fill-rule:nonzero;"/>
+        </g>
+        <g transform="matrix(1,0,0,1,39.2761,11.1734)">
+            <path d="M84.768,106.046L71.388,98.321C70.67,97.907 70.425,96.989 70.839,96.272C71.253,95.554 72.17,95.309 72.888,95.723L86.268,103.448C86.985,103.862 87.231,104.779 86.817,105.497C86.402,106.214 85.485,106.46 84.768,106.046ZM87.768,100.849C89.92,102.092 90.657,104.844 89.415,106.997C88.172,109.149 85.42,109.886 83.268,108.644C81.115,107.401 80.378,104.649 81.62,102.497C82.863,100.344 85.615,99.607 87.768,100.849ZM69.888,100.919C67.736,99.676 66.998,96.924 68.241,94.772C69.483,92.619 72.235,91.882 74.388,93.125C76.54,94.367 77.278,97.119 76.035,99.272C74.792,101.424 72.04,102.162 69.888,100.919Z" style="fill:rgb(132,79,156);fill-rule:nonzero;"/>
+        </g>
+        <g transform="matrix(1,0,0,1,39.2761,26.6231)">
+            <path d="M71.388,98.321L58.008,90.596C57.291,90.182 57.045,89.264 57.459,88.547C57.873,87.83 58.791,87.584 59.508,87.998L72.888,95.723C73.605,96.137 73.851,97.054 73.437,97.772C73.023,98.489 72.105,98.735 71.388,98.321ZM74.388,93.125C76.54,94.367 77.278,97.119 76.035,99.272C74.792,101.424 72.04,102.162 69.888,100.919C67.735,99.676 66.998,96.924 68.241,94.772C69.483,92.619 72.235,91.882 74.388,93.125ZM56.508,93.194C54.356,91.952 53.618,89.199 54.861,87.047C56.103,84.895 58.856,84.157 61.008,85.4C63.16,86.643 63.898,89.395 62.655,91.547C61.412,93.699 58.66,94.437 56.508,93.194Z" style="fill:rgb(132,79,156);fill-rule:nonzero;"/>
+        </g>
+        <g transform="matrix(1,0,0,1,39.2761,-10.6085)">
+            <path d="M74,104.05L74,119.5C74,120.328 73.329,121 72.5,121C71.672,121 71,120.328 71,119.5L71,104.05C71,103.222 71.672,102.55 72.5,102.55C73.328,102.55 74,103.222 74,104.05ZM68,104.05C68,101.565 70.015,99.55 72.5,99.55C74.985,99.55 77,101.565 77,104.05C77,106.536 74.985,108.55 72.5,108.55C70.015,108.55 68,106.536 68,104.05ZM77,119.5C77,121.985 74.985,124 72.5,124C70.015,124 68,121.985 68,119.5C68,117.015 70.015,115 72.5,115C74.985,115 77,117.015 77,119.5Z" style="fill:rgb(132,79,156);fill-rule:nonzero;"/>
+        </g>
+        <g transform="matrix(1,0,0,1,39.2761,51.3915)">
+            <path d="M74,73.05L74,88.5C74,89.328 73.329,90 72.5,90C71.672,90 71,89.328 71,88.5L71,73.05C71,72.222 71.672,71.55 72.5,71.55C73.328,71.55 74,72.222 74,73.05ZM68,73.05C68,70.565 70.015,68.55 72.5,68.55C74.985,68.55 77,70.565 77,73.05C77,75.536 74.985,77.55 72.5,77.55C70.015,77.55 68,75.536 68,73.05ZM77,88.5C77,90.985 74.985,93 72.5,93C70.015,93 68,90.985 68,88.5C68,86.015 70.015,84 72.5,84C74.985,84 77,86.015 77,88.5Z" style="fill:rgb(132,79,156);fill-rule:nonzero;"/>
+        </g>
+        <g transform="matrix(1,0,0,1,39.2761,72.972)">
+            <path d="M84.768,75.146L71.388,67.422C70.67,67.007 70.425,66.09 70.839,65.373C71.253,64.655 72.17,64.409 72.888,64.824L86.268,72.548C86.985,72.962 87.231,73.88 86.817,74.597C86.402,75.315 85.485,75.561 84.768,75.146ZM87.768,69.95C89.92,71.193 90.657,73.945 89.415,76.097C88.172,78.25 85.42,78.987 83.268,77.744C81.115,76.502 80.378,73.75 81.62,71.597C82.863,69.445 85.615,68.708 87.768,69.95ZM69.888,70.02C67.736,68.777 66.998,66.025 68.241,63.873C69.483,61.72 72.235,60.983 74.388,62.225C76.54,63.468 77.278,66.22 76.035,68.373C74.792,70.525 72.04,71.262 69.888,70.02Z" style="fill:rgb(132,79,156);fill-rule:nonzero;"/>
+        </g>
+        <g transform="matrix(1,0,0,1,39.2761,35.3915)">
+            <path d="M61,81.05L61,96.5C61,97.328 60.329,98 59.5,98C58.672,98 58,97.328 58,96.5L58,81.05C58,80.222 58.672,79.55 59.5,79.55C60.328,79.55 61,80.222 61,81.05ZM55,81.05C55,78.565 57.015,76.55 59.5,76.55C61.985,76.55 64,78.565 64,81.05C64,83.536 61.985,85.55 59.5,85.55C57.015,85.55 55,83.536 55,81.05ZM64,96.5C64,98.985 61.985,101 59.5,101C57.015,101 55,98.985 55,96.5C55,94.015 57.015,92 59.5,92C61.985,92 64,94.015 64,96.5Z" style="fill:rgb(132,79,156);fill-rule:nonzero;"/>
+        </g>
+        <g transform="matrix(1,0,0,1,39.2761,57.3884)">
+            <path d="M59.296,75.213L45.917,82.938C45.199,83.352 44.282,83.107 43.868,82.389C43.453,81.672 43.699,80.754 44.417,80.34L57.796,72.615C58.514,72.201 59.431,72.447 59.845,73.164C60.26,73.882 60.014,74.799 59.296,75.213ZM56.296,70.017C58.449,68.775 61.201,69.512 62.443,71.664C63.686,73.817 62.949,76.569 60.796,77.811C58.644,79.054 55.892,78.317 54.649,76.164C53.407,74.012 54.144,71.26 56.296,70.017ZM47.417,85.536C45.264,86.779 42.512,86.042 41.269,83.889C40.027,81.737 40.764,78.985 42.917,77.742C45.069,76.499 47.821,77.237 49.064,79.389C50.306,81.541 49.569,84.294 47.417,85.536Z" style="fill:rgb(132,79,156);fill-rule:nonzero;"/>
+        </g>
+        <g transform="matrix(1,0,0,1,39.2761,-88.6085)">
+            <path d="M34,143.05L34,158.5C34,159.328 33.329,160 32.5,160C31.672,160 31,159.328 31,158.5L31,143.05C31,142.222 31.672,141.55 32.5,141.55C33.328,141.55 34,142.222 34,143.05ZM28,143.05C28,140.565 30.015,138.55 32.5,138.55C34.985,138.55 37,140.565 37,143.05C37,145.536 34.985,147.55 32.5,147.55C30.015,147.55 28,145.536 28,143.05ZM37,158.5C37,160.985 34.985,163 32.5,163C30.015,163 28,160.985 28,158.5C28,156.015 30.015,154 32.5,154C34.985,154 37,156.015 37,158.5Z" style="fill:rgb(189,57,51);fill-rule:nonzero;"/>
+        </g>
+        <g transform="matrix(1,0,0,1,39.2761,-58.6085)">
+            <path d="M34,128.05L34,143.5C34,144.328 33.329,145 32.5,145C31.672,145 31,144.328 31,143.5L31,128.05C31,127.222 31.672,126.55 32.5,126.55C33.328,126.55 34,127.222 34,128.05ZM28,128.05C28,125.565 30.015,123.55 32.5,123.55C34.985,123.55 37,125.565 37,128.05C37,130.536 34.985,132.55 32.5,132.55C30.015,132.55 28,130.536 28,128.05ZM37,143.5C37,145.985 34.985,148 32.5,148C30.015,148 28,145.985 28,143.5C28,141.015 30.015,139 32.5,139C34.985,139 37,141.015 37,143.5Z" style="fill:rgb(189,57,51);fill-rule:nonzero;"/>
+        </g>
+        <g transform="matrix(1,0,0,1,39.2761,-82.0876)">
+            <path d="M46.431,144.951L33.051,152.676C32.333,153.09 31.416,152.845 31.002,152.127C30.588,151.41 30.833,150.492 31.551,150.078L44.931,142.353C45.648,141.939 46.565,142.185 46.98,142.902C47.394,143.62 47.148,144.537 46.431,144.951ZM43.431,139.755C45.583,138.513 48.335,139.25 49.578,141.402C50.82,143.555 50.083,146.307 47.931,147.549C45.778,148.792 43.026,148.055 41.783,145.902C40.541,143.75 41.278,140.998 43.431,139.755ZM34.551,155.274C32.399,156.517 29.646,155.78 28.404,153.627C27.161,151.475 27.899,148.723 30.051,147.48C32.203,146.237 34.955,146.975 36.198,149.127C37.441,151.279 36.703,154.032 34.551,155.274Z" style="fill:rgb(189,57,51);fill-rule:nonzero;"/>
+        </g>
+        <g transform="matrix(1,0,0,1,39.2761,-51.1882)">
+            <path d="M46.431,129.502L33.051,137.226C32.333,137.641 31.416,137.395 31.002,136.677C30.588,135.96 30.833,135.043 31.551,134.628L44.931,126.904C45.648,126.489 46.565,126.735 46.98,127.453C47.394,128.17 47.148,129.087 46.431,129.502ZM43.431,124.305C45.583,123.063 48.335,123.8 49.578,125.953C50.82,128.105 50.083,130.857 47.931,132.1C45.778,133.342 43.026,132.605 41.783,130.453C40.541,128.3 41.278,125.548 43.431,124.305ZM34.551,139.825C32.399,141.067 29.646,140.33 28.404,138.177C27.161,136.025 27.899,133.273 30.051,132.03C32.203,130.788 34.955,131.525 36.198,133.677C37.441,135.83 36.703,138.582 34.551,139.825Z" style="fill:rgb(189,57,51);fill-rule:nonzero;"/>
+        </g>
+        <g transform="matrix(1,0,0,1,39.2761,-82.0877)">
+            <path d="M19.671,142.353L33.051,150.078C33.768,150.492 34.014,151.41 33.6,152.127C33.186,152.844 32.268,153.09 31.551,152.676L18.171,144.951C17.454,144.537 17.208,143.62 17.622,142.902C18.036,142.185 18.954,141.939 19.671,142.353ZM16.671,147.549C14.519,146.307 13.781,143.555 15.024,141.402C16.266,139.25 19.019,138.513 21.171,139.755C23.323,140.998 24.061,143.75 22.818,145.902C21.575,148.055 18.823,148.792 16.671,147.549ZM34.551,147.48C36.703,148.723 37.441,151.475 36.198,153.627C34.955,155.779 32.203,156.517 30.051,155.274C27.898,154.032 27.161,151.279 28.404,149.127C29.646,146.975 32.398,146.237 34.551,147.48Z" style="fill:rgb(189,57,51);fill-rule:nonzero;"/>
+        </g>
+        <g transform="matrix(1,0,0,1,39.2761,-51.1884)">
+            <path d="M46.431,126.904L59.81,134.628C60.528,135.043 60.774,135.96 60.36,136.678C59.945,137.395 59.028,137.641 58.31,137.226L44.931,129.502C44.213,129.088 43.967,128.17 44.382,127.453C44.796,126.735 45.713,126.49 46.431,126.904ZM43.431,132.1C41.278,130.857 40.541,128.105 41.784,125.953C43.026,123.801 45.778,123.063 47.931,124.306C50.083,125.548 50.82,128.3 49.578,130.453C48.335,132.605 45.583,133.343 43.431,132.1ZM61.31,132.03C63.463,133.273 64.2,136.025 62.958,138.178C61.715,140.33 58.963,141.067 56.81,139.825C54.658,138.582 53.921,135.83 55.163,133.677C56.406,131.525 59.158,130.788 61.31,132.03Z" style="fill:rgb(189,57,51);fill-rule:nonzero;"/>
+        </g>
+        <g transform="matrix(1,0,0,1,39.2761,-51.1882)">
+            <path d="M73.19,129.502L59.81,137.226C59.093,137.641 58.176,137.395 57.761,136.677C57.347,135.96 57.593,135.043 58.31,134.628L71.69,126.904C72.408,126.489 73.325,126.735 73.739,127.453C74.153,128.17 73.908,129.087 73.19,129.502ZM70.19,124.305C72.342,123.063 75.095,123.8 76.337,125.953C77.58,128.105 76.843,130.857 74.69,132.1C72.538,133.342 69.786,132.605 68.543,130.453C67.3,128.3 68.038,125.548 70.19,124.305ZM61.31,139.825C59.158,141.067 56.406,140.33 55.163,138.177C53.921,136.025 54.658,133.273 56.81,132.03C58.963,130.788 61.715,131.525 62.958,133.677C64.2,135.83 63.463,138.582 61.31,139.825Z" style="fill:rgb(189,57,51);fill-rule:nonzero;"/>
+        </g>
+        <g transform="matrix(1,0,0,1,39.2761,-35.7389)">
+            <path d="M33.051,119.179L46.431,126.904C47.148,127.318 47.394,128.235 46.98,128.953C46.565,129.67 45.648,129.916 44.931,129.502L31.551,121.777C30.833,121.363 30.588,120.445 31.002,119.728C31.416,119.011 32.333,118.765 33.051,119.179ZM30.051,124.375C27.899,123.132 27.161,120.38 28.404,118.228C29.646,116.076 32.398,115.338 34.551,116.581C36.703,117.823 37.441,120.576 36.198,122.728C34.955,124.88 32.203,125.618 30.051,124.375ZM47.931,124.306C50.083,125.548 50.82,128.3 49.578,130.453C48.335,132.605 45.583,133.343 43.431,132.1C41.278,130.857 40.541,128.105 41.784,125.953C43.026,123.8 45.778,123.063 47.931,124.306Z" style="fill:rgb(189,57,51);fill-rule:nonzero;"/>
+        </g>
+        <g transform="matrix(1,0,0,1,39.2761,-82.0876)">
+            <path d="M19.671,144.951L6.291,152.676C5.574,153.09 4.656,152.845 4.242,152.127C3.828,151.41 4.074,150.492 4.791,150.078L18.171,142.353C18.888,141.939 19.806,142.185 20.22,142.902C20.634,143.62 20.388,144.537 19.671,144.951ZM16.671,139.755C18.823,138.513 21.575,139.25 22.818,141.402C24.06,143.555 23.323,146.307 21.171,147.549C19.018,148.792 16.266,148.055 15.024,145.902C13.781,143.75 14.518,140.998 16.671,139.755ZM7.791,155.274C5.639,156.517 2.887,155.78 1.644,153.627C0.401,151.475 1.139,148.723 3.291,147.48C5.443,146.237 8.195,146.975 9.438,149.127C10.681,151.279 9.943,154.032 7.791,155.274Z" style="fill:rgb(189,57,51);fill-rule:nonzero;"/>
+        </g>
+        <g transform="matrix(1,0,0,1,39.2761,42.0719)">
+            <path d="M4.274,87.998L17.654,80.274C18.371,79.859 19.288,80.105 19.703,80.823C20.117,81.54 19.871,82.457 19.154,82.872L5.774,90.596C5.056,91.011 4.139,90.765 3.725,90.047C3.311,89.33 3.556,88.413 4.274,87.998ZM7.274,93.195C5.122,94.437 2.369,93.7 1.127,91.547C-0.116,89.395 0.621,86.643 2.774,85.4C4.926,84.158 7.678,84.895 8.921,87.047C10.164,89.2 9.426,91.952 7.274,93.195ZM16.154,77.675C18.306,76.433 21.058,77.17 22.301,79.323C23.543,81.475 22.806,84.227 20.654,85.47C18.501,86.712 15.749,85.975 14.506,83.823C13.264,81.67 14.001,78.918 16.154,77.675Z" style="fill:rgb(61,151,47);fill-rule:nonzero;"/>
+        </g>
+        <g transform="matrix(1,0,0,1,39.2761,26.6226)">
+            <path d="M17.654,95.723L31.033,87.998C31.751,87.584 32.668,87.83 33.082,88.547C33.497,89.265 33.251,90.182 32.533,90.596L19.154,98.321C18.436,98.735 17.519,98.489 17.105,97.772C16.69,97.055 16.936,96.137 17.654,95.723ZM20.654,100.919C18.501,102.162 15.749,101.424 14.507,99.272C13.264,97.12 14.001,94.368 16.154,93.125C18.306,91.882 21.058,92.62 22.301,94.772C23.543,96.924 22.806,99.677 20.654,100.919ZM29.533,85.4C31.686,84.157 34.438,84.895 35.68,87.047C36.923,89.199 36.186,91.952 34.033,93.194C31.881,94.437 29.129,93.7 27.886,91.547C26.644,89.395 27.381,86.643 29.533,85.4Z" style="fill:rgb(61,151,47);fill-rule:nonzero;"/>
+        </g>
+        <g transform="matrix(1,0,0,1,39.2761,26.6223)">
+            <path d="M5.774,87.998L19.154,95.723C19.871,96.137 20.117,97.055 19.703,97.772C19.288,98.49 18.371,98.735 17.654,98.321L4.274,90.596C3.556,90.182 3.311,89.265 3.725,88.547C4.139,87.83 5.056,87.584 5.774,87.998ZM2.774,93.195C0.622,91.952 -0.116,89.2 1.127,87.047C2.369,84.895 5.121,84.158 7.274,85.4C9.426,86.643 10.164,89.395 8.921,91.547C7.678,93.7 4.926,94.437 2.774,93.195ZM20.654,93.125C22.806,94.368 23.543,97.12 22.301,99.272C21.058,101.424 18.306,102.162 16.154,100.919C14.001,99.677 13.264,96.924 14.506,94.772C15.749,92.62 18.501,91.882 20.654,93.125Z" style="fill:rgb(61,151,47);fill-rule:nonzero;"/>
+        </g>
+        <g transform="matrix(1,0,0,1,39.2761,11.1725)">
+            <path d="M19.154,95.723L32.533,103.448C33.251,103.862 33.497,104.78 33.083,105.497C32.668,106.214 31.751,106.46 31.033,106.046L17.654,98.321C16.936,97.907 16.69,96.99 17.105,96.272C17.519,95.555 18.436,95.309 19.154,95.723ZM16.154,100.919C14.001,99.677 13.264,96.925 14.507,94.772C15.749,92.62 18.501,91.883 20.654,93.125C22.806,94.368 23.543,97.12 22.301,99.272C21.058,101.425 18.306,102.162 16.154,100.919ZM34.033,100.85C36.186,102.093 36.923,104.845 35.681,106.997C34.438,109.149 31.686,109.887 29.533,108.644C27.381,107.402 26.644,104.649 27.886,102.497C29.129,100.345 31.881,99.607 34.033,100.85Z" style="fill:rgb(61,151,47);fill-rule:nonzero;"/>
+        </g>
+        <g transform="matrix(1,0,0,1,39.2761,51.3915)">
+            <path d="M17,88.5L17,73.05C17,72.222 17.671,71.55 18.5,71.55C19.328,71.55 20,72.222 20,73.05L20,88.5C20,89.328 19.328,90 18.5,90C17.672,90 17,89.328 17,88.5ZM23,88.5C23,90.985 20.985,93 18.5,93C16.015,93 14,90.985 14,88.5C14,86.015 16.015,84 18.5,84C20.985,84 23,86.015 23,88.5ZM14,73.05C14,70.565 16.015,68.55 18.5,68.55C20.985,68.55 23,70.565 23,73.05C23,75.536 20.985,77.55 18.5,77.55C16.015,77.55 14,75.536 14,73.05Z" style="fill:rgb(61,151,47);fill-rule:nonzero;"/>
+        </g>
+        <g transform="matrix(1,0,0,1,39.2761,-10.6085)">
+            <path d="M17,119.5L17,104.05C17,103.222 17.671,102.55 18.5,102.55C19.328,102.55 20,103.222 20,104.05L20,119.5C20,120.328 19.328,121 18.5,121C17.672,121 17,120.328 17,119.5ZM23,119.5C23,121.985 20.985,124 18.5,124C16.015,124 14,121.985 14,119.5C14,117.015 16.015,115 18.5,115C20.985,115 23,117.015 23,119.5ZM14,104.05C14,101.565 16.015,99.55 18.5,99.55C20.985,99.55 23,101.565 23,104.05C23,106.536 20.985,108.55 18.5,108.55C16.015,108.55 14,106.536 14,104.05Z" style="fill:rgb(61,151,47);fill-rule:nonzero;"/>
+        </g>
+        <g transform="matrix(1,0,0,1,39.2761,-35.1762)">
+            <path d="M5.774,118.898L19.154,126.622C19.871,127.036 20.117,127.954 19.703,128.671C19.288,129.389 18.371,129.635 17.654,129.22L4.274,121.496C3.556,121.081 3.311,120.164 3.725,119.447C4.139,118.729 5.056,118.483 5.774,118.898ZM2.774,124.094C0.622,122.851 -0.116,120.099 1.127,117.947C2.369,115.794 5.121,115.057 7.274,116.299C9.426,117.542 10.164,120.294 8.921,122.447C7.678,124.599 4.926,125.336 2.774,124.094ZM20.654,124.024C22.806,125.267 23.543,128.019 22.301,130.171C21.058,132.324 18.306,133.061 16.154,131.818C14.001,130.576 13.264,127.824 14.506,125.671C15.749,123.519 18.501,122.782 20.654,124.024Z" style="fill:rgb(61,151,47);fill-rule:nonzero;"/>
+        </g>
+        <g transform="matrix(1,0,0,1,39.2761,5.39155)">
+            <path d="M31,111.5L31,96.05C31,95.222 31.671,94.55 32.5,94.55C33.328,94.55 34,95.222 34,96.05L34,111.5C34,112.328 33.328,113 32.5,113C31.672,113 31,112.328 31,111.5ZM37,111.5C37,113.985 34.985,116 32.5,116C30.015,116 28,113.985 28,111.5C28,109.015 30.015,107 32.5,107C34.985,107 37,109.015 37,111.5ZM28,96.05C28,93.565 30.015,91.55 32.5,91.55C34.985,91.55 37,93.565 37,96.05C37,98.536 34.985,100.55 32.5,100.55C30.015,100.55 28,98.536 28,96.05Z" style="fill:rgb(61,151,47);fill-rule:nonzero;"/>
+        </g>
+        <g transform="matrix(1,0,0,1,39.2761,26.2129)">
+            <path d="M32.533,88.203L45.913,95.928C46.631,96.342 46.876,97.259 46.462,97.977C46.048,98.694 45.131,98.94 44.413,98.526L31.033,90.801C30.316,90.387 30.07,89.47 30.484,88.752C30.899,88.035 31.816,87.789 32.533,88.203ZM29.533,93.399C27.381,92.157 26.644,89.404 27.886,87.252C29.129,85.1 31.881,84.362 34.033,85.605C36.186,86.848 36.923,89.6 35.68,91.752C34.438,93.904 31.686,94.642 29.533,93.399ZM47.413,93.33C49.565,94.572 50.303,97.325 49.06,99.477C47.818,101.629 45.065,102.367 42.913,101.124C40.761,99.881 40.023,97.129 41.266,94.977C42.509,92.825 45.261,92.087 47.413,93.33Z" style="fill:rgb(61,151,47);fill-rule:nonzero;"/>
+        </g>
+        <g transform="matrix(1,0,0,1,39.2761,72.9717)">
+            <path d="M19.154,64.824L32.534,72.548C33.251,72.963 33.497,73.88 33.083,74.597C32.668,75.315 31.751,75.561 31.034,75.146L17.654,67.422C16.936,67.008 16.69,66.09 17.105,65.373C17.519,64.655 18.436,64.409 19.154,64.824ZM16.154,70.02C14.001,68.777 13.264,66.025 14.507,63.873C15.749,61.72 18.501,60.983 20.654,62.226C22.806,63.468 23.543,66.22 22.301,68.373C21.058,70.525 18.306,71.262 16.154,70.02ZM34.034,69.95C36.186,71.193 36.923,73.945 35.681,76.097C34.438,78.25 31.686,78.987 29.534,77.745C27.381,76.502 26.644,73.75 27.886,71.597C29.129,69.445 31.881,68.708 34.034,69.95Z" style="fill:rgb(61,151,47);fill-rule:nonzero;"/>
+        </g>
+        <g transform="matrix(1,0,0,1,39.2761,-51.4128)">
+            <path d="M85.653,137.339L72.273,129.614C71.556,129.2 71.31,128.282 71.724,127.565C72.139,126.848 73.056,126.602 73.773,127.016L87.153,134.741C87.871,135.155 88.117,136.072 87.702,136.79C87.288,137.507 86.371,137.753 85.653,137.339ZM88.653,132.143C90.806,133.385 91.543,136.137 90.3,138.29C89.058,140.442 86.306,141.179 84.153,139.937C82.001,138.694 81.264,135.942 82.506,133.79C83.749,131.637 86.501,130.9 88.653,132.143ZM70.773,132.212C68.621,130.969 67.884,128.217 69.126,126.065C70.369,123.913 73.121,123.175 75.273,124.418C77.426,125.661 78.163,128.413 76.921,130.565C75.678,132.717 72.926,133.455 70.773,132.212Z" style="fill:rgb(189,57,51);fill-rule:nonzero;"/>
+        </g>
+        <g transform="matrix(1,0,0,1,39.2761,-72.6085)">
+            <path d="M75,135.05L75,150.5C75,151.328 74.329,152 73.5,152C72.672,152 72,151.328 72,150.5L72,135.05C72,134.222 72.672,133.55 73.5,133.55C74.328,133.55 75,134.222 75,135.05ZM69,135.05C69,132.565 71.015,130.55 73.5,130.55C75.985,130.55 78,132.565 78,135.05C78,137.536 75.985,139.55 73.5,139.55C71.015,139.55 69,137.536 69,135.05ZM78,150.5C78,152.985 75.985,155 73.5,155C71.015,155 69,152.985 69,150.5C69,148.015 71.015,146 73.5,146C75.985,146 78,148.015 78,150.5Z" style="fill:rgb(189,57,51);fill-rule:nonzero;"/>
+        </g>
+        <g transform="matrix(1,0,0,1,39.2761,35.3915)">
+            <path d="M88,81.05L88,96.5C88,97.328 87.329,98 86.5,98C85.672,98 85,97.328 85,96.5L85,81.05C85,80.222 85.672,79.55 86.5,79.55C87.328,79.55 88,80.222 88,81.05ZM82,81.05C82,78.565 84.015,76.55 86.5,76.55C88.985,76.55 91,78.565 91,81.05C91,83.536 88.985,85.55 86.5,85.55C84.015,85.55 82,83.536 82,81.05ZM91,96.5C91,98.985 88.985,101 86.5,101C84.015,101 82,98.985 82,96.5C82,94.015 84.015,92 86.5,92C88.985,92 91,94.015 91,96.5Z" style="fill:rgb(132,79,156);fill-rule:nonzero;"/>
+        </g>
+        <g transform="matrix(1,0,0,1,39.2761,57.5216)">
+            <path d="M98.28,82.872L84.9,75.147C84.183,74.733 83.937,73.815 84.351,73.098C84.766,72.38 85.683,72.135 86.4,72.549L99.78,80.273C100.498,80.688 100.744,81.605 100.329,82.322C99.915,83.04 98.998,83.286 98.28,82.872ZM101.28,77.675C103.433,78.918 104.17,81.67 102.927,83.822C101.685,85.975 98.933,86.712 96.78,85.47C94.628,84.227 93.891,81.475 95.133,79.323C96.376,77.17 99.128,76.433 101.28,77.675ZM83.401,77.745C81.248,76.502 80.511,73.75 81.753,71.598C82.996,69.445 85.748,68.708 87.9,69.951C90.053,71.193 90.79,73.945 89.548,76.098C88.305,78.25 85.553,78.988 83.401,77.745Z" style="fill:rgb(132,79,156);fill-rule:nonzero;"/>
+        </g>
+        <g transform="matrix(1,0,0,1,39.2761,-10.6085)">
+            <path d="M47,104.05L47,119.5C47,120.328 46.329,121 45.5,121C44.672,121 44,120.328 44,119.5L44,104.05C44,103.222 44.672,102.55 45.5,102.55C46.328,102.55 47,103.222 47,104.05ZM41,104.05C41,101.565 43.015,99.55 45.5,99.55C47.985,99.55 50,101.565 50,104.05C50,106.536 47.985,108.55 45.5,108.55C43.015,108.55 41,106.536 41,104.05ZM50,119.5C50,121.985 47.985,124 45.5,124C43.015,124 41,121.985 41,119.5C41,117.015 43.015,115 45.5,115C47.985,115 50,117.015 50,119.5Z" style="fill:rgb(189,57,51);fill-rule:nonzero;"/>
+        </g>
+        <g transform="matrix(1,0,0,1,39.2761,65.3915)">
+            <path d="M4,81.5L4,66.05C4,65.222 4.671,64.55 5.5,64.55C6.328,64.55 7,65.222 7,66.05L7,81.5C7,82.328 6.328,83 5.5,83C4.672,83 4,82.328 4,81.5ZM10,81.5C10,83.985 7.985,86 5.5,86C3.015,86 1,83.985 1,81.5C1,79.015 3.015,77 5.5,77C7.985,77 10,79.015 10,81.5ZM1,66.05C1,63.565 3.015,61.55 5.5,61.55C7.985,61.55 10,63.565 10,66.05C10,68.536 7.985,70.55 5.5,70.55C3.015,70.55 1,68.536 1,66.05Z" style="fill:rgb(61,151,47);fill-rule:nonzero;"/>
+        </g>
+    </g>
+</svg>
diff --git a/docs/src/index.md b/docs/src/index.md
index 12e077ae..c47ac88c 100644
--- a/docs/src/index.md
+++ b/docs/src/index.md
@@ -1,4 +1,4 @@
-![Logo](logo-full.svg)
+![Logo](assets/logo-full.svg)
 
 MadNLP is an open-source nonlinear programming solver,
 purely implemented in Julia. MadNLP implements a filter line-search
@@ -14,8 +14,9 @@ MadNLP targets the resolution of constrained nonlinear problems,
 formulating as
 ```math
   \begin{aligned}
-    \min_{x_\ell \leq x \leq x_u} \; & f(x) \\
-    \text{subject to} \quad & g_\ell \leq g(x) \leq g_u
+    \min_{x} \; & f(x) \\
+    \text{subject to} \quad & g_\ell \leq g(x) \leq g_u \\
+                            & x_\ell \leq x \leq x_u
   \end{aligned}
 ```
 where $$x \in \mathbb{R}^n$$ is the decision variable, $$f: \mathbb{R}^n \to \mathbb{R}$$
@@ -43,8 +44,10 @@ a slack variables $$s \in \mathbb{R}^m$$ to rewrite all the inequality
 constraints as equality constraints:
 ```math
   \begin{aligned}
-    \min_{x_\ell \leq x \leq x_u, s} \; & f(x) \\
-    \text{subject to} \quad & g(x) - s = 0 , \quad g_\ell \leq s \leq g_u
+    \min_{x, s} \; & f(x) \\
+    \text{subject to} \quad & g(x) - s = 0  \\
+                            & g_\ell \leq s \leq g_u \\
+                            & x_\ell \leq x \leq x_u
   \end{aligned}
 ```
 
@@ -92,6 +95,12 @@ For that reason we recommend using efficient sparse-linear solvers
 If you use MadNLP.jl in your research, we would greatly appreciate your citing it.
 
 ```bibtex
+@article{shin2023accelerating,
+  title={Accelerating optimal power flow with {GPU}s: {SIMD} abstraction of nonlinear programs and condensed-space interior-point methods},
+  author={Shin, Sungho and Pacaud, Fran{\c{c}}ois and Anitescu, Mihai},
+  journal={arXiv preprint arXiv:2307.16830},
+  year={2023}
+}
 @article{shin2020graph,
   title={Graph-Based Modeling and Decomposition of Energy Infrastructures},
   author={Shin, Sungho and Coffrin, Carleton and Sundar, Kaarthik and Zavala, Victor M},
diff --git a/docs/src/installation.md b/docs/src/installation.md
index 40e9400d..a6952b7f 100644
--- a/docs/src/installation.md
+++ b/docs/src/installation.md
@@ -16,28 +16,17 @@ In addition to Lapack and Umfpack, the user can install the following extensions
 use a specialized linear solver.
 
 ## HSL linear solver
-If the user has access to HSL, we recommend using this set of linear
-solver inside the interior-point algorithm.
-
-To build MadNLP with HSL linear solvers (Ma27, Ma57, Ma77, Ma86, Ma97), the
-source codes need to be obtained by the user from
-<http://www.hsl.rl.ac.uk/ipopt/> under Coin-HSL Full (Stable). The source
-codes are distributed as a tarball file `coinhsl-*.tar.gz`. Once
-uncompressed, the absolute path to the extracted source code should be specified as:
+Obtain a license and download HSL_jll.jl from https://licences.stfc.ac.uk/product/julia-hsl. There are two versions available: LBT and OpenBLAS. LBT is the recommended option for Julia >= v1.9. Install this download into your current environment using:
 ```julia
-julia> ENV["MADNLP_HSL_SOURCE_PATH"] = "/opt/coinhsl"
+import Pkg
+Pkg.develop(path = "/full/path/to/HSL_jll.jl")
 ```
 
 If the user has already compiled the HSL solver library, one can
-simply provide a path to the compiled shared library (in this case, the source code is
-not compiled and the provided shared library is directly used):
-```julia
-julia> ENV["MADNLP_HSL_LIBRARY_PATH"] = "/usr/lib/libcoinhsl.so"
+simply override the path to the artifact by editing `~/.julia/artifacts/Overrides.toml`
 ```
-
-Once the environment variable set, build `MadNLPHSL` with
-```julia
-pkg> build MadNLPHSL
+# replace HSL_jll artifact /usr/local/lib/libhsl.so
+ecece3e2c69a413a0e935cf52e03a3ad5492e137 = "/usr/local"
 ```
 
 ## Mumps linear solver
@@ -52,7 +41,7 @@ pkg> add MadNLPMumps
 ## Pardiso linear solver
 
 To use Pardiso, the user needs to obtain the Pardiso shared libraries from
-<https://www.pardiso-project.org/>, provide the absolute path to the shared library:
+<https://panua.ch/>, provide the absolute path to the shared library:
 ```
 julia> ENV["MADNLP_PARDISO_LIBRARY_PATH"] = "/usr/lib/libpardiso600-GNU800-X86-64.so"
 ```
diff --git a/docs/src/lib/callbacks.md b/docs/src/lib/callbacks.md
new file mode 100644
index 00000000..65dbc11e
--- /dev/null
+++ b/docs/src/lib/callbacks.md
@@ -0,0 +1,54 @@
+```@meta
+CurrentModule = MadNLP
+```
+
+# Callbacks
+
+In MadNLP, a nonlinear program is implemented with a given `AbstractNLPModel`.
+The model may have a form unsuitable for the interior-point algorithm.
+For that reason, MadNLP wraps the `AbstractNLPModel` internally
+using custom data structures, encoded as a `AbstractCallback`.
+Depending on the setting, choose to wrap the `AbstractNLPModel`
+as a [`DenseCallback`](@ref) or alternatively, as a [`SparseCallback`](@ref).
+
+```@docs
+AbstractCallback
+DenseCallback
+SparseCallback
+
+```
+
+The function [`create_callback`](@ref) allows to instantiate a `AbstractCallback`
+from a given `NLPModel`:
+```@docs
+create_callback
+
+```
+
+Internally, a [`AbstractCallback`](@ref) reformulates the inequality
+constraints as equality constraints by introducing additional slack variables.
+The fixed variables are reformulated as parameters (using [`MakeParameter`](@ref))
+or are relaxed (using [`RelaxBound`](@ref)). The equality constraints can
+be keep as is with [`EnforceEquality`](@ref) (default option) or relaxed
+as inequality constraints with [`RelaxEquality`](@ref). In that later case,
+MadNLP solves a relaxation of the original problem.
+
+```@docs
+AbstractFixedVariableTreatment
+MakeParameter
+RelaxBound
+
+AbstractEqualityTreatment
+EnforceEquality
+RelaxEquality
+```
+
+MadNLP has to keep in memory all the indexes associated to the equality
+and inequality constraints. Similarly, MadNLP has to keep track
+of the indexes of the bounded variables and the fixed variables. MadNLP
+provides a utility [`get_index_constraints`](@ref) to import all the indexes required
+by MadNLP. Each index vector is encoded as a `Vector{Int}`.
+```@docs
+get_index_constraints
+
+```
diff --git a/docs/src/lib/ipm.md b/docs/src/lib/ipm.md
new file mode 100644
index 00000000..92b2ace2
--- /dev/null
+++ b/docs/src/lib/ipm.md
@@ -0,0 +1,23 @@
+```@meta
+CurrentModule = MadNLP
+```
+
+# MadNLP solver
+
+MadNLP takes as input a nonlinear program encoded as
+a `AbstractNLPModel` and solve it using interior-point.
+The main entry point is the function `madnlp`:
+```@docs
+madnlp
+MadNLPExecutionStats
+```
+
+In detail, the function [`madnlp`](@ref) builds a `MadNLPSolver` storing all
+the required structures in the solution algorithm. Once the
+`MadNLPSolver` instantiated, the function `solve!` is applied to solve the
+nonlinear program with MadNLP's interior-point algorithm.
+
+```@docs
+MadNLPSolver
+
+```
diff --git a/docs/src/lib/kkt.md b/docs/src/lib/kkt.md
index 6d261a57..4a2e2883 100644
--- a/docs/src/lib/kkt.md
+++ b/docs/src/lib/kkt.md
@@ -28,6 +28,8 @@ AbstractCondensedKKTSystem
 Each `AbstractKKTSystem` follows the interface described below:
 ```@docs
 
+create_kkt_system
+
 num_variables
 get_kkt
 get_jacobian
@@ -39,23 +41,27 @@ compress_hessian!
 compress_jacobian!
 jtprod!
 regularize_diagonal!
-set_jacobian_scaling!
 is_inertia_correct
-is_reduced
 nnz_jacobian
 ```
 
+## Sparse KKT systems
+
 By default, MadNLP stores a `AbstractReducedKKTSystem` in sparse format,
 as implemented by `SparseKKTSystem`:
 ```@docs
 SparseKKTSystem
 
 ```
-The user has the choice to store the KKT system as a sparse `AbstractUnreducedKKTSystem`:
+Alternatively, the user has the choice to store the KKT system as a `SparseUnreducedKKTSystem`
+or as a `SparseCondensedKKTSystem`:
 ```@docs
 SparseUnreducedKKTSystem
+SparseCondensedKKTSystem
 ```
 
+## Dense KKT systems
+
 MadNLP provides also two structures to store the KKT system
 in a dense matrix. Although less efficient than their sparse counterparts,
 these two structures allow to store the KKT system efficiently when the
@@ -66,10 +72,8 @@ DenseCondensedKKTSystem
 
 ```
 
-
 ## AbstractKKTVector
-Each instance of `AbstractKKTVector` implements
-the following interface.
+Each instance of `AbstractKKTVector` implements the following interface.
 
 ```@docs
 AbstractKKTVector
@@ -85,10 +89,9 @@ dual_ub
 
 ```
 
-By default, MadNLP provides two different `AbstractKKTVector`.
+By default, MadNLP provides one implementation of an `AbstractKKTVector`.
 
 ```@docs
-ReducedKKTVector
 UnreducedKKTVector
 ```
 
diff --git a/docs/src/man/kkt.md b/docs/src/man/kkt.md
index 2253b890..5aac432e 100644
--- a/docs/src/man/kkt.md
+++ b/docs/src/man/kkt.md
@@ -2,6 +2,7 @@
 CurrentModule = MadNLP
 ```
 ```@setup kkt_example
+using LinearAlgebra
 using SparseArrays
 using NLPModels
 using MadNLP
@@ -50,7 +51,7 @@ The Newton step associated to the KKT equations writes
  A & -I & 0& 0 & 0 \\
  V & 0 & 0 & X & 0 \\
  0 & W & 0 & 0 & S
-\end{pmatrix}}^{K_{1}}
+\end{pmatrix}}^{K_{3}}
 \begin{pmatrix}
     \Delta x \\
     \Delta s \\
@@ -63,7 +64,7 @@ The Newton step associated to the KKT equations writes
     F_1 \\ F_2 \\ F_3 \\ F_4 \\ F_5
 \end{pmatrix}
 ```
-The matrix $$K_1$$ is unsymmetric, but we can obtain an equivalent symmetric
+The matrix $$K_3$$ is unsymmetric, but we can obtain an equivalent symmetric
 system by eliminating the two last rows:
 ```math
 \overline{
@@ -86,48 +87,52 @@ with $$\Sigma_x = X^{-1} v$$ and $$\Sigma_s = S^{-1} w$$.
 The matrix $$K_2$$, symmetric, has a structure more favorable
 for a direct linear solver.
 
-In MadNLP, the matrix $$K_1$$ is encoded as an [`AbstractUnreducedKKTSystem`](@ref)
+In MadNLP, the matrix $$K_3$$ is encoded as an [`AbstractUnreducedKKTSystem`](@ref)
 and the matrix $$K_2$$ is encoded as an [`AbstractReducedKKTSystem`](@ref).
 
 
 ## Assembling a KKT system, step by step
 
-We note that both $$K_1$$ and $$K_2$$ depend on the Hessian
+We note that both $$K_3$$ and $$K_2$$ depend on the Hessian
 of the Lagrangian $$W$$, the Jacobian $$A$$ and the
 diagonal matrices $$\Sigma_x = X^{1}V$$ and $$\Sigma_s = S^{-1}W$$.
 Hence, we have to update the KKT system at each iteration
 of the interior-point algorithm.
 
-In what follows, we illustrate the inner working of any `AbstractKKTSystem`
-by using the KKT system used by default inside MadNLP: [`SparseKKTSystem`](@ref).
+By default, MadNLP stores the KKT system as a [`SparseKKTSystem`](@ref).
+The KKT system takes as input a [`SparseCallback`](@ref) wrapping
+a given `NLPModel` `nlp`. We instantiate the callback `cb` with
+the function [`create_callback`](@ref):
+```@example kkt_example
+cb = MadNLP.create_callback(
+    MadNLP.SparseCallback,
+    nlp,
+)
+ind_cons = MadNLP.get_index_constraints(nlp)
+
+```
 
 ### Initializing a KKT system
 
 The size of the KKT system depends directly on the problem's characteristics
 (number of variables, number of of equality and inequality constraints).
 A [`SparseKKTSystem`](@ref) stores the Hessian and the Jacobian in sparse
-(COO) format. Depending on how we parameterize the system,
-it can output either a sparse matrix or a dense matrix (according to the linear solver
-we are employing under the hood).
-
-For instance, we can parameterize a sparse KKT system as
+(COO) format. The KKT matrix can be factorized using either a
+dense or a sparse linear solvers. Here we use the solver provided
+in Lapack:
 ```@example kkt_example
-T = Float64
-VT = Vector{T}
-MT = SparseMatrixCSC{T, Int}
-QN = MadNLP.ExactHessian{T, VT}
-kkt = MadNLP.SparseKKTSystem{T, VT, MT, QN}(nlp)
-kkt.aug_com
-
+linear_solver = LapackCPUSolver
 ```
-and a dense KKT system as
+
+We can instantiate a `SparseKKTSystem` using
+the function [`create_kkt_system`](@ref):
 ```@example kkt_example
-T = Float64
-VT = Vector{T}
-MT = Matrix{T}
-QN = MadNLP.ExactHessian{T, VT}
-kkt = MadNLP.SparseKKTSystem{T, VT, MT, QN}(nlp)
-kkt.aug_com
+kkt = MadNLP.create_kkt_system(
+    MadNLP.SparseKKTSystem,
+    cb,
+    ind_cons,
+    linear_solver,
+)
 
 ```
 
@@ -163,13 +168,13 @@ Then, one can update the vector `hess_values` by using NLPModels.jl:
 ```@example kkt_example
 n = NLPModels.get_nvar(nlp)
 m = NLPModels.get_ncon(nlp)
-x = NLPModels.get_x0(nlp)
-l = zeros(m)
+x = NLPModels.get_x0(nlp) # primal variables
+l = zeros(m) # dual variables
 
 NLPModels.hess_coord!(nlp, x, l, hess_values)
 
 ```
-Eventually, a post-processing step can be applied to refresh all the values internally:
+Eventually, a post-processing step is applied to refresh all the values internally:
 ```@example kkt_example
 MadNLP.compress_hessian!(kkt)
 
@@ -198,42 +203,80 @@ MadNLP.compress_jacobian!(kkt)
 
 ```
 
-!!! note
-    On the contrary to `compress_hessian!`, `compress_jacobian!` is not
-    doing nothing by default. Instead, the post-processing step scales the values
-    of the Jacobian row by row, applying the scaling of the constraints
-    as computed initially by MadNLP.
-
 #### Updating the values of the diagonal matrices
-Once the Hessian and the Jacobian updated, it remains
-to udpate the values of the diagonal matrix $$\Sigma_x = X^{-1} V$$
-and $$\Sigma_s = S^{-1} W$$. In the KKT's interface, this amounts
-to call the [`regularize_diagonal!`](@ref) function:
+Once the Hessian and the Jacobian updated, the algorithm
+can apply primal and dual regularization terms on the diagonal
+of the KKT system, to improve the numerical behavior in the linear solver.
+This operation is implemented inside the [`regularize_diagonal!`](@ref) function:
 ```@example kkt_example
-pr_values = ones(n + m)
-du_values = zeros(m)
+pr_value = 1.0
+du_value = 0.0
 
-MadNLP.regularize_diagonal!(kkt, pr_values, du_values)
+MadNLP.regularize_diagonal!(kkt, pr_value, du_value)
 
 ```
-where `pr_values` stores the diagonal values for the primal
-terms (accounting both for $$\Sigma_x$$ and $$\Sigma_s$$) and `du_values`
-stores the diagonal values for the dual terms (mostly used during
-feasibility restoration).
 
 ### Assembling the KKT matrix
 Once the values updated, one can assemble the resulting KKT matrix.
 This translates to
 ```@example kkt_example
 MadNLP.build_kkt!(kkt)
-kkt_matrix
 ```
 By doing so, the values stored inside `kkt` will be transferred
-to the KKT matrix `kkt_matrix` (as returned by the function [`get_kkt`](@ref)).
+to the KKT matrix `kkt_matrix` (as returned by the function [`get_kkt`](@ref)):
+```@example kkt_example
+kkt_matrix
+```
 
-In details, a [`SparseKKTSystem`](@ref) stores internally the KKT system's values using
+Internally, a [`SparseKKTSystem`](@ref) stores the KKT system in
 a sparse COO format. When [`build_kkt!`](@ref) is called, the sparse COO matrix
-is transferred to `SparseMatrixCSC` (if `MT = SparseMatrixCSC`) or a `Matrix`
-(if `MT = Matrix`), or any format suitable for factorizing the KKT system
-inside a [linear solver](linear_solvers.md).
+is transferred to `SparseMatrixCSC` if the linear solver is sparse,
+or alternatively to a `Matrix` if the linear solver is dense.
+
+!!! note
+    The KKT system stores only the lower-triangular part of the KKT system,
+    as it is symmetric.
+
+
+## Solution of the KKT system
+Now the KKT system is assembled in a matrix ``K`` (here stored in `kkt_matrix`), we want
+to solve a linear system ``K x = b``, for instance to evaluate the
+next descent direction. To do so, we use the linear solver stored
+internally inside `kkt` (here an instance of `LapackCPUSolver`).
+
+We start by factorizing the KKT matrix ``K``:
+```@example kkt_example
+MadNLP.factorize!(kkt.linear_solver)
+
+```
+By default, MadNLP uses a LBL factorization to decompose the symmetric
+indefinite KKT matrix.
+
+Once the KKT matrix has been factorized, we can compute the solution of the linear
+system with a backsolve. The function takes as input a [`AbstractKKTVector`](@ref),
+an object used to do algebraic manipulation with a [`AbstractKKTSystem`](@ref).
+We start by instantiating two [`UnreducedKKTVector`](@ref) (encoding respectively
+the right-hand-side and the solution):
+```@example kkt_example
+b = MadNLP.UnreducedKKTVector(kkt)
+fill!(MadNLP.full(b), 1.0)
+x = copy(b)
+
+```
+The right-hand-side encodes a vector of 1:
+```@example kkt_example
+MadNLP.full(b)
+```
+We solve the system ``K x = b`` using the [`solve!`](@ref) function:
+```@example kkt_example
+MadNLP.solve!(kkt, x)
+MadNLP.full(x)
+```
+We verify that the solution is correct by multiplying it on the left
+with the KKT system, using `mul!`:
+```@example kkt_example
+mul!(b, kkt, x) # overwrite b!
+MadNLP.full(b)
+```
+We recover a vector filled with `1`, which was the initial right-hand-side.
 
diff --git a/docs/src/man/linear_solvers.md b/docs/src/man/linear_solvers.md
index cefccf95..eb59f8fb 100644
--- a/docs/src/man/linear_solvers.md
+++ b/docs/src/man/linear_solvers.md
@@ -9,11 +9,18 @@ using MadNLPTests
 # Build nonlinear model
 nlp = MadNLPTests.HS15Model()
 # Build KKT
-T = Float64
-VT = Vector{T}
-MT = Matrix{T}
-QN = MadNLP.ExactHessian{T, VT}
-kkt = MadNLP.SparseKKTSystem{T, VT, MT, QN}(nlp)
+cb = MadNLP.create_callback(
+    MadNLP.SparseCallback,
+    nlp,
+)
+ind_cons = MadNLP.get_index_constraints(nlp)
+linear_solver = LapackCPUSolver
+kkt = MadNLP.create_kkt_system(
+    MadNLP.SparseKKTSystem,
+    cb,
+    ind_cons,
+    linear_solver,
+)
 
 n = NLPModels.get_nvar(nlp)
 m = NLPModels.get_ncon(nlp)
@@ -65,8 +72,8 @@ a multiple of the identity to it: $$K_r = K + \alpha I$$.
 
 ## Factorization algorithm
 In nonlinear programming, it is common
-to employ a Bunch-Kaufman factorization (or LDL factorization)
-to factorize the matrix $$K$$, as this algorithm returns the inertia
+to employ a LBL factorization to decompose the symmetric indefinite
+matrix $$K$$, as this algorithm returns the inertia
 of the matrix directly as a result of the factorization.
 
 !!! note
@@ -94,7 +101,7 @@ linear_solver = LapackCPUSolver(K)
 The instance `linear_solver` does not copy the matrix $$K$$ and
 instead keep a reference to it.
 ```@example linear_solver_example
-linear_solver.dense === K
+linear_solver.A === K
 ```
 That way every time we re-assemble the matrix $$K$$ in `kkt`,
 the values are directly updated inside `linear_solver`.
diff --git a/docs/src/options.md b/docs/src/options.md
index 2077bfb7..2fff23f1 100644
--- a/docs/src/options.md
+++ b/docs/src/options.md
@@ -8,11 +8,20 @@ Depth=3
 ```
 
 ---
+## Primary options
+These options are used to set the values for other options. The default values are inferred from the NLP model.
+- `tol::Float64`\
+    Termination tolerance. The default value is `1e-8` for double precision. The solver terminates if the scaled primal, dual, complementary infeasibility is less than `tol`. Valid range is ``(0,\infty)``.
+- `callback::Type` 	
+  Valid values are: `MadNLP`.{`DenseCallback`,`SparseCallback`}.
+- `kkt_system::Type` 
+  The type of KKT system. Valid values are `MadNLP`.{`SpasreKKTSystem`,`SparseUnreducedKKTSystem`,`SparseCondensedKKTSystem`,`DenseKKTSystem`,`DenseCondensedKKTSystem`}.
+- `linear_solver::Type`\
+  Linear solver used for solving primal-dual system. Valid values are: {`MadNLP.UmfpackSolver`,`MadNLP.LDLSolver`,`MadNLP.CHOLMODSolver`, `MadNLP.MumpsSolver`, `MadNLP.PardisoSolver`, `MadNLP.PardisoMKLSolver`, `MadNLP.Ma27Solver`, `MadNLP.Ma57Solver`, `MadNLP.Ma77Solver`, `MadNLP.Ma86Solver`, `MadNLP.Ma97Solver`, `MadNLP.LapackCPUSolver`, `MadNLPGPU.LapackGPUSolver`,`MadNLPGPU.RFSolver`,`MadNLPGPU.GLUSolver`,`MadNLPGPU.CuCholeskySolver`,`MadNLPGPU.CUDSSSolver`} (some may require using extension packages). The selected solver should be properly built in the build procedure. See [README.md](https://github.com/sshin23/MadNLP.jl) file.
+
 ## General options
 
-- `linear_solver::Module = DefaultLinearSolver`:\
-    Linear solver used for solving primal-dual system. Valid values are: {`MadNLPUmfpack`, `MadNLPMumps`, `MadNLPPardisoMKL`, `MadNLPMa27`, `MadNLPMa57`, `MadNLPMa77`, `MadNLPMa86`, `MadNLPMa97`, `MadNLPPardiso`, `MadNLPSchur`, `MadNLPSchwarz`, `MadNLPLapackCPU`, `MadNLPLapackGPU`} (some may require using extension packages). The selected solver should be properly built in the build procedure. See [README.md](https://github.com/sshin23/MadNLP.jl) file.
-- `iterator::Module = Richardson `\
+- `iterator::Type = RichardsonIterator `\
     Iterator used for iterative refinement. Valid values are: {`MadNLPRichardson`,`MadNLPKrylov`}.
     - `Richardson` uses [Richardson iteration](https://en.wikipedia.org/wiki/Modified_Richardson_iteration)
     - `Krylov` uses [restarted Generalized Minimal Residual](https://en.wikipedia.org/wiki/Generalized_minimal_residual_method) method implemented in [IterativeSolvers.jl](https://github.com/JuliaMath/IterativeSolvers.jl).
@@ -40,8 +49,6 @@ Depth=3
 
 - `max_iter::Int = 3000`\
     Maximum number of interior point iterations. The solver terminates with exit symbol `:Maximum_Iterations_Exceeded` if the interior point iteration count exceeds `max_iter`.
-- `tol::Float64 = 1e-8`\
-    Termination tolerance. The solver terminates if the scaled primal, dual, complementary infeasibility is less than `tol`. Valid range is ``(0,\infty)``.
 - `acceptable_tol::Float64 = 1e-6`\
     Acceptable tolerance. The solver terminates if the scaled primal, dual, complementary infeasibility is less than `acceptable_tol`, for `acceptable_iter` consecutive interior point iteration steps.
 - `acceptable_iter::Int = 15`\
@@ -54,25 +61,20 @@ Depth=3
 
 
 ---
-## Nonlinear options
-- `nlp_scaling::Bool = true`: \
-    If `true`, MadNLP scales the nonlinear problem during the resolution.
-- `nlp_scaling_max_gradient::Float64 = 100.`
-- `fixed_variable_treatment::FixedVariableTreatments = MAKE_PARAMETER`\
-    Valid values are: `MadNLP`.{`RELAX_BOUND`,`MAKE_PARAMETER`}.
+## NLP options
+- `kappa_d::Float64 = 1e-5`
+- `fixed_variable_treatment::FixedVariableTreatments = MakeParameter`\
+    Valid values are: `MadNLP`.{`RelaxBound`,`MakeParameter`}.
+- `equality_treatment::FixedVariableTreatments = MakeParameter`\
+    Valid values are: `MadNLP`.{`RelaxEquality`,`EnforceEquality`}.
 - `jacobian_constant::Bool = false`\
     If `true`, constraint Jacobian is only evaluated once and reused.
 - `hessian_constant::Bool = false`\
     If `true`, Lagrangian Hessian is only evaluated once and reused.
-- `constr_mult_init_max::Float64 = 1e3`
 - `bound_push::Float64 = 1e-2`
 - `bound_fac::Float64 = 1e-2`
-- `kappa_d::Float64 = 1e-5`
-
-
----
-## Inertia options
-
+- `hessian_approximation::Type = ExactHessian`
+- `quasi_newton_options::QuasiNewtonOptions = QuasiNewtonOptions()`
 - `inertia_correction_method::InertiaCorrectionMethods = INERTIA_AUTO`\
     Valid values are: `MadNLP`.{`INERTIA_AUTO`,`INERTIA_BASED`, `INERTIA_FREE`}.
     - `INERTIA_BASED` uses the strategy in Ipopt.
@@ -80,6 +82,14 @@ Depth=3
     - `INERTIA_AUTO` uses `INERTIA_BASED` if inertia information is available and uses `INERTIA_FREE` otherwise.
 - `inertia_free_tol::Float64 = 0.`
 
+---
+## Initialization Options
+    dual_initialized::Bool = false
+    dual_initialization_method::Type = kkt_system <: MadNLP.SparseCondensedKKTSystem ? DualInitializeSetZero : DualInitializeLeastSquares
+- `constr_mult_init_max::Float64 = 1e3`
+- `nlp_scaling::Bool = true`: \
+    If `true`, MadNLP scales the nonlinear problem during the resolution.
+- `nlp_scaling_max_gradient::Float64 = 100.`
 
 ---
 ## Hessian perturbation options
@@ -220,36 +230,6 @@ is printed.
 #### LapackCPU
 - `lapackcpu_algorithm::LapackCPU.Algorithms = BUNCHKAUFMAN`
 
-#### Schur (requires `MadNLPGraphs`)
-- `schur_subproblem_solver::Module = DefaultLinearSolver` \
-   Linear solver used for solving subproblem. Valid values are: {`MadNLPUmfpack`, `MadNLPMa27`, `MadNLPMa57`, `MadNLPMa97`, `MadNLPMumps`}.
-- `schur_dense_solver::Module = DefaultDenseSolver` \
-   Linear solver used for solving Schur complement system
-- `schur_custom_partition::Bool = false` \
-   If `false`, Schur solver automatically detects the partition using `Metis`. If `true`, the partition information given in `schur_part` is used. `schur_num_parts` and `schur_part` should be properly set by the user. When using with `Plasmo`, `schur_num_parts` and `schur_part` are automatically set by the `Plasmo` interface.
-- `schur_num_parts::Int = 2` \
-   Number of parts (excluding the parent node). Valid range is ``[1,\infty)``
-- `schur_part::Vector{Int} = Int[]` \
-   Custom partition information in a vector form. The parent node should be labeled as `0`. Only valid if `schur_custom_partition` is `true`.
-
-#### Schwarz (requires `MadNLPGraphs`)
-- `schwarz_subproblem_solver::Module = DefaultSubproblemSolver` \
-   Linear solver used for solving subproblem. Valid values are: {`MadNLPUmfpack`, `MadNLPPardisoMKL`, `MadNLPMa27`, `MadNLPMa57`, `MadNLPMa77`, `MadNLPMa86`, `MadNLPMa97`, `MadNLPPardiso`}.
-- `schwarz_custom_partition::Bool = false` \
-    If `false`, Schwarz solver automatically detects the partition using `Metis`. If `true`, the partition information given in `schur_part` is used. `schur_num_parts` and `schur_part` should be properly set by the user. When using with `Plasmo`, `schur_num_parts` and `schur_part` are automatically set by the `Plasmo` interface.
-- `schwarz_num_parts::Int = 2` \
-    Number of parts. Valid range is ``[1,\infty)``
-- `schwarz_part::Vector{Int} = Int[]` \
-    Custom partition information in a vector form. Only valid if `schwar_custom_partition` is `true`.
-- `schwarz_num_parts_upper::Int = 0` \
-    Number of parts in upper level partition. If `schwarz_num_parts_upper!=0`, a bilevel partitioning scheme is used. Valid range is ``[1,\infty)``
-- `schwarz_part_upper::Vector{Int} = Int[]` \
-    Custom partition for the upper level partition.
-- `schwarz_fully_improve_subproblem_solver::Bool = true` \
-    If `true`, the subproblem solvers are fully improved when the linear solver is initialized.
-- `schwarz_max_expand_factor::Int = 4` \
-    The size of overlap is fully saturated when the `improve!` is called `schwarz_max_expand_factor-1` times. Valid range is ``[2,\infty)``.
-
 ### Iterator Options
 #### Richardson
 - `richardson_max_iter::Int = 10` \
diff --git a/docs/src/quickstart.md b/docs/src/quickstart.md
index b9a454ad..fcb6f9a7 100644
--- a/docs/src/quickstart.md
+++ b/docs/src/quickstart.md
@@ -1,5 +1,8 @@
 # Quickstart
 
+```@meta
+CurrentModule = MadNLP
+```
 ```@setup quickstart
 using NLPModels
 
@@ -43,9 +46,11 @@ model = Model()
 @variable(model, x1 <= 0.5)
 @variable(model, x2)
 
-@NLobjective(model, Min, 100.0 * (x2 - x1^2)^2 + (1.0 - x1)^2)
-@NLconstraint(model, x1 * x2 >= 1.0)
-@NLconstraint(model, x1 + x2^2 >= 0.0)
+@objective(model, Min, 100.0 * (x2 - x1^2)^2 + (1.0 - x1)^2)
+@constraint(model, x1 * x2 >= 1.0)
+@constraint(model, x1 + x2^2 >= 0.0)
+
+println(model)
 
 ```
 
@@ -190,16 +195,17 @@ a new MadNLP instance and solve it:
 ```@example quickstart
 x0 = zeros(2) # initial position
 nlp = HS15Model(x0)
-ips = MadNLP.MadNLPSolver(nlp; print_level=MadNLP.INFO)
-MadNLP.solve!(ips)
+solver = MadNLP.MadNLPSolver(nlp; print_level=MadNLP.INFO)
+results = MadNLP.solve!(solver)
 ```
 
 MadNLP converges in 19 iterations to a (local) optimal solution.
-We can query the primal and the dual solutions respectively by
+MadNLP returns a [`MadNLPExecutionStats`](@ref) storing all the
+results. We can query the primal and the dual solutions respectively by
 ```@example quickstart
-ips.x
+results.solution
 ```
 and
 ```@example quickstart
-ips.y
+results.multipliers
 ```
diff --git a/src/Interfaces/MOI_interface.jl b/ext/MadNLPMOI/MadNLPMOI.jl
similarity index 62%
rename from src/Interfaces/MOI_interface.jl
rename to ext/MadNLPMOI/MadNLPMOI.jl
index 66b022b2..aada2d4e 100644
--- a/src/Interfaces/MOI_interface.jl
+++ b/ext/MadNLPMOI/MadNLPMOI.jl
@@ -1,17 +1,27 @@
-# MadNLP.jl
-# Modified from Ipopt.jl (https://github.com/jump-dev/Ipopt.jl)
+module MadNLPMOI
+
+import MadNLP, MathOptInterface, NLPModels
+
+const MOI = MathOptInterface
+const MOIU = MathOptInterface.Utilities
 
 include("utils.jl")
 
+const _PARAMETER_OFFSET = 0x00f0000000000000
+
+_is_parameter(x::MOI.VariableIndex) = x.value >= _PARAMETER_OFFSET
+
+_is_parameter(term::MOI.ScalarAffineTerm) = _is_parameter(term.variable)
+
 """
     Optimizer()
 
 Create a new MadNLP optimizer.
 """
 mutable struct Optimizer <: MOI.AbstractOptimizer
-    solver::Union{Nothing,MadNLPSolver}
-    nlp::Union{Nothing,AbstractNLPModel}
-    result::Union{Nothing,MadNLPExecutionStats{Float64}}
+    solver::Union{Nothing,MadNLP.MadNLPSolver}
+    nlp::Union{Nothing,NLPModels.AbstractNLPModel}
+    result::Union{Nothing,MadNLP.MadNLPExecutionStats{Float64}}
 
     name::String
     invalid_model::Bool
@@ -21,7 +31,9 @@ mutable struct Optimizer <: MOI.AbstractOptimizer
     solve_iterations::Int
     sense::MOI.OptimizationSense
 
+    parameters::Dict{MOI.VariableIndex,MOI.Nonlinear.ParameterIndex}
     variables::MOI.Utilities.VariablesContainer{Float64}
+    list_of_variable_indices::Vector{MOI.VariableIndex}
     variable_primal_start::Vector{Union{Nothing,Float64}}
     mult_x_L::Vector{Union{Nothing,Float64}}
     mult_x_U::Vector{Union{Nothing,Float64}}
@@ -30,9 +42,10 @@ mutable struct Optimizer <: MOI.AbstractOptimizer
     nlp_dual_start::Union{Nothing,Vector{Float64}}
 
     qp_data::QPBlockData{Float64}
+    nlp_model::Union{Nothing,MOI.Nonlinear.Model}
 end
 
-function Optimizer(; kwargs...)
+function MadNLP.Optimizer(; kwargs...)
     option_dict = Dict{Symbol, Any}()
     for (name, value) in kwargs
         option_dict[name] = value
@@ -48,25 +61,33 @@ function Optimizer(; kwargs...)
         NaN,
         0,
         MOI.FEASIBILITY_SENSE,
+        Dict{MOI.VariableIndex,Float64}(),
         MOI.Utilities.VariablesContainer{Float64}(),
+        MOI.VariableIndex[],
         Union{Nothing,Float64}[],
         Union{Nothing,Float64}[],
         Union{Nothing,Float64}[],
         MOI.NLPBlockData([], _EmptyNLPEvaluator(), false),
         nothing,
         QPBlockData{Float64}(),
+        nothing,
     )
 end
 
-const _SETS =
-    Union{MOI.GreaterThan{Float64},MOI.LessThan{Float64},MOI.EqualTo{Float64}}
+const _SETS = Union{
+    MOI.GreaterThan{Float64},
+    MOI.LessThan{Float64},
+    MOI.EqualTo{Float64},
+    MOI.Interval{Float64},
+}
 
 const _FUNCTIONS = Union{
     MOI.ScalarAffineFunction{Float64},
     MOI.ScalarQuadraticFunction{Float64},
+    MOI.ScalarNonlinearFunction,
 }
 
-MOI.get(::Optimizer, ::MOI.SolverVersion) = "3.14.4"
+MOI.get(::Optimizer, ::MOI.SolverVersion) = MadNLP.version()
 
 ### _EmptyNLPEvaluator
 
@@ -78,19 +99,34 @@ MOI.eval_constraint(::_EmptyNLPEvaluator, g, x) = nothing
 MOI.jacobian_structure(::_EmptyNLPEvaluator) = Tuple{Int64,Int64}[]
 MOI.hessian_lagrangian_structure(::_EmptyNLPEvaluator) = Tuple{Int64,Int64}[]
 MOI.eval_constraint_jacobian(::_EmptyNLPEvaluator, J, x) = nothing
+MOI.eval_constraint_jacobian_transpose_product(::_EmptyNLPEvaluator, Jtv, x, v) = nothing
 MOI.eval_hessian_lagrangian(::_EmptyNLPEvaluator, H, x, σ, μ) = nothing
 
 function MOI.empty!(model::Optimizer)
     model.solver = nothing
+    model.nlp = nothing
+    model.result = nothing
     model.invalid_model = false
+    model.solve_time = NaN
+    model.solve_iterations = 0
     model.sense = MOI.FEASIBILITY_SENSE
+    empty!(model.parameters)
     MOI.empty!(model.variables)
+    empty!(model.list_of_variable_indices)
     empty!(model.variable_primal_start)
     empty!(model.mult_x_L)
     empty!(model.mult_x_U)
     model.nlp_data = MOI.NLPBlockData([], _EmptyNLPEvaluator(), false)
     model.nlp_dual_start = nothing
     model.qp_data = QPBlockData{Float64}()
+    model.nlp_model = nothing
+    # Delete options if they are problem dependent.
+    if haskey(model.options, :jacobian_constant)
+        delete!(model.options, :jacobian_constant)
+    end
+    if haskey(model.options, :hessian_approximation)
+        delete!(model.options, :hessian_approximation)
+    end
     return
 end
 
@@ -109,8 +145,85 @@ function MOI.copy_to(model::Optimizer, src::MOI.ModelLike)
     return MOI.Utilities.default_copy_to(model, src)
 end
 
+function _init_nlp_model(model)
+    if model.nlp_model === nothing
+        if !(model.nlp_data.evaluator isa _EmptyNLPEvaluator)
+            error("Cannot mix the new and legacy nonlinear APIs")
+        end
+        model.nlp_model = MOI.Nonlinear.Model()
+    end
+    return
+end
+
 MOI.get(::Optimizer, ::MOI.SolverName) = "MadNLP"
 
+function MOI.supports_add_constrained_variable(
+    ::Optimizer,
+    ::Type{MOI.Parameter{Float64}},
+)
+    return true
+end
+
+function MOI.add_constrained_variable(
+    model::Optimizer,
+    set::MOI.Parameter{Float64},
+)
+    model.inner = nothing
+    if model.nlp_model === nothing
+        model.nlp_model = MOI.Nonlinear.Model()
+    end
+    p = MOI.VariableIndex(_PARAMETER_OFFSET + length(model.parameters))
+    push!(model.list_of_variable_indices, p)
+    model.parameters[p] =
+        MOI.Nonlinear.add_parameter(model.nlp_model, set.value)
+    ci = MOI.ConstraintIndex{MOI.VariableIndex,typeof(set)}(p.value)
+    return p, ci
+end
+
+function MOI.set(
+    model::Optimizer,
+    ::MOI.ConstraintSet,
+    ci::MOI.ConstraintIndex{MOI.VariableIndex,MOI.Parameter{Float64}},
+    set::MOI.Parameter{Float64},
+)
+    p = model.parameters[MOI.VariableIndex(ci.value)]
+    model.nlp_model[p] = set.value
+    return
+end
+
+_replace_parameters(model::Optimizer, f) = f
+
+function _replace_parameters(model::Optimizer, f::MOI.VariableIndex)
+    if _is_parameter(f)
+        return model.parameters[f]
+    end
+    return f
+end
+
+function _replace_parameters(model::Optimizer, f::MOI.ScalarAffineFunction)
+    if any(_is_parameter, f.terms)
+        g = convert(MOI.ScalarNonlinearFunction, f)
+        return _replace_parameters(model, g)
+    end
+    return f
+end
+
+function _replace_parameters(model::Optimizer, f::MOI.ScalarQuadraticFunction)
+    if any(_is_parameter, f.affine_terms) ||
+       any(_is_parameter, f.quadratic_terms)
+        g = convert(MOI.ScalarNonlinearFunction, f)
+        return _replace_parameters(model, g)
+    end
+    return f
+end
+
+function _replace_parameters(model::Optimizer, f::MOI.ScalarNonlinearFunction)
+    for (i, arg) in enumerate(f.args)
+        f.args[i] = _replace_parameters(model, arg)
+    end
+    return f
+end
+
 function MOI.supports_constraint(
     ::Optimizer,
     ::Type{<:Union{MOI.VariableIndex,_FUNCTIONS}},
@@ -159,12 +272,12 @@ function MOI.set(model::Optimizer, ::MOI.TimeLimitSec, value::Real)
 end
 
 function MOI.set(model::Optimizer, ::MOI.TimeLimitSec, ::Nothing)
-    delete!(model.options, "max_cpu_time")
+    delete!(model.options, :max_cpu_time)
     return
 end
 
 function MOI.get(model::Optimizer, ::MOI.TimeLimitSec)
-    return get(model.options, "max_cpu_time", nothing)
+    return get(model.options, :max_cpu_time, nothing)
 end
 
 ### MOI.RawOptimizerAttribute
@@ -198,18 +311,24 @@ function MOI.add_variable(model::Optimizer)
     push!(model.mult_x_L, nothing)
     push!(model.mult_x_U, nothing)
     model.solver = nothing
-    return MOI.add_variable(model.variables)
+    x = MOI.add_variable(model.variables)
+    push!(model.list_of_variable_indices, x)
+    return x
 end
 
 function MOI.is_valid(model::Optimizer, x::MOI.VariableIndex)
+    if _is_parameter(x)
+        return haskey(model.parameters, x)
+    end
     return MOI.is_valid(model.variables, x)
 end
 
-function MOI.get(
-    model::Optimizer,
-    attr::Union{MOI.NumberOfVariables,MOI.ListOfVariableIndices},
-)
-    return MOI.get(model.variables, attr)
+function MOI.get(model::Optimizer, ::MOI.ListOfVariableIndices)
+    return model.list_of_variable_indices
+end
+
+function MOI.get(model::Optimizer, ::MOI.NumberOfVariables)
+    return length(model.list_of_variable_indices)
 end
 
 function MOI.is_valid(
@@ -328,6 +447,73 @@ function MOI.set(
     return
 end
 
+### ScalarNonlinearFunction
+
+function MOI.is_valid(
+    model::Optimizer,
+    ci::MOI.ConstraintIndex{MOI.ScalarNonlinearFunction,<:_SETS},
+)
+    if model.nlp_model === nothing
+        return false
+    end
+    index = MOI.Nonlinear.ConstraintIndex(ci.value)
+    return MOI.is_valid(model.nlp_model, index)
+end
+
+function MOI.add_constraint(
+    model::Optimizer,
+    f::MOI.ScalarNonlinearFunction,
+    s::_SETS,
+)
+    if model.nlp_model === nothing
+        model.nlp_model = MOI.Nonlinear.Model()
+    end
+    if !isempty(model.parameters)
+        _replace_parameters(model, f)
+    end
+    index = MOI.Nonlinear.add_constraint(model.nlp_model, f, s)
+    model.solver = nothing
+    return MOI.ConstraintIndex{typeof(f),typeof(s)}(index.value)
+end
+
+function MOI.set(
+    model::Optimizer,
+    attr::MOI.ObjectiveFunction{MOI.ScalarNonlinearFunction},
+    func::MOI.ScalarNonlinearFunction,
+)
+    if model.nlp_model === nothing
+        model.nlp_model = MOI.Nonlinear.Model()
+    end
+    if !isempty(model.parameters)
+        _replace_parameters(model, func)
+    end
+    MOI.Nonlinear.set_objective(model.nlp_model, func)
+    model.solver = nothing
+    return
+end
+
+### UserDefinedFunction
+
+MOI.supports(model::Optimizer, ::MOI.UserDefinedFunction) = true
+
+function MOI.set(model::Optimizer, attr::MOI.UserDefinedFunction, args)
+    _init_nlp_model(model)
+    MOI.Nonlinear.register_operator(
+        model.nlp_model,
+        attr.name,
+        attr.arity,
+        args...,
+    )
+    return
+end
+
+### ListOfSupportedNonlinearOperators
+
+function MOI.get(model::Optimizer, attr::MOI.ListOfSupportedNonlinearOperators)
+    _init_nlp_model(model)
+    return MOI.get(model.nlp_model, attr)
+end
+
 ### MOI.VariablePrimalStart
 
 function MOI.supports(
@@ -344,6 +530,9 @@ function MOI.set(
     vi::MOI.VariableIndex,
     value::Union{Real,Nothing},
 )
+    if _is_parameter(vi)
+        return  # Do nothing
+    end
     MOI.throw_if_not_valid(model, vi)
     model.variable_primal_start[column(vi)] = value
     # No need to reset model.solver, because this gets handled in optimize!.
@@ -355,7 +544,7 @@ end
 _dual_start(::Optimizer, ::Nothing, ::Int = 1) = 0.0
 
 function _dual_start(model::Optimizer, value::Real, scale::Int = 1)
-    return value * scale
+    return _dual_multiplier(model) * value * scale
 end
 
 function MOI.supports(
@@ -504,6 +693,9 @@ function MOI.set(
     func::F,
 ) where {F<:Union{MOI.VariableIndex,<:_FUNCTIONS}}
     MOI.set(model.qp_data, attr, func)
+    if model.nlp_model !== nothing
+        MOI.Nonlinear.set_objective(model.nlp_model, nothing)
+    end
     model.solver = nothing
     return
 end
@@ -556,11 +748,18 @@ end
 
 function MOI.eval_constraint_jacobian(model::Optimizer, values, x)
     offset = MOI.eval_constraint_jacobian(model.qp_data, values, x)
-    nlp_values = view(values, (offset+1):length(values))
+    nlp_values = view(values, offset:length(values))
     MOI.eval_constraint_jacobian(model.nlp_data.evaluator, nlp_values, x)
     return
 end
 
+function MOI.eval_constraint_jacobian_transpose_product(model::Optimizer, Jtv, x, v)
+    MOI.eval_constraint_jacobian_transpose_product(model.nlp_data.evaluator, Jtv, x, v)
+    # Evaluate QPBlockData after NLPEvaluator to ensure that Jtv is not reset.
+    MOI.eval_constraint_jacobian_transpose_product(model.qp_data, Jtv, x, v)
+    return
+end
+
 ### Eval_H_CB
 
 function MOI.hessian_lagrangian_structure(model::Optimizer)
@@ -571,38 +770,43 @@ end
 
 function MOI.eval_hessian_lagrangian(model::Optimizer, H, x, σ, μ)
     offset = MOI.eval_hessian_lagrangian(model.qp_data, H, x, σ, μ)
-    H_nlp = view(H, (offset+1):length(H))
+    H_nlp = view(H, offset:length(H))
     μ_nlp = view(μ, (length(model.qp_data)+1):length(μ))
     MOI.eval_hessian_lagrangian(model.nlp_data.evaluator, H_nlp, x, σ, μ_nlp)
     return
 end
 
 ### NLPModels wrapper
-struct MOIModel{T} <: AbstractNLPModel{T,Vector{T}}
-    meta::NLPModelMeta{T, Vector{T}}
+struct MOIModel{T} <: NLPModels.AbstractNLPModel{T,Vector{T}}
+    meta::NLPModels.NLPModelMeta{T, Vector{T}}
     model::Optimizer
     counters::NLPModels.Counters
 end
 
-obj(nlp::MOIModel,x::Vector{Float64}) = MOI.eval_objective(nlp.model,x)
+NLPModels.obj(nlp::MOIModel, x::AbstractVector{Float64}) = MOI.eval_objective(nlp.model,x)
+
+function NLPModels.grad!(nlp::MOIModel, x::AbstractVector{Float64}, g::AbstractVector{Float64})
+    MOI.eval_objective_gradient(nlp.model, g, x)
+end
 
-function grad!(nlp::MOIModel,x::Vector{Float64},f::Vector{Float64})
-    MOI.eval_objective_gradient(nlp.model,f,x)
+function NLPModels.cons!(nlp::MOIModel, x::AbstractVector{Float64}, c::AbstractVector{Float64})
+    MOI.eval_constraint(nlp.model, c, x)
 end
 
-function cons!(nlp::MOIModel,x::Vector{Float64},c::Vector{Float64})
-    MOI.eval_constraint(nlp.model,c,x)
+function NLPModels.jac_coord!(nlp::MOIModel, x::AbstractVector{Float64}, jac::AbstractVector{Float64})
+    MOI.eval_constraint_jacobian(nlp.model, jac, x)
 end
 
-function jac_coord!(nlp::MOIModel,x::Vector{Float64},jac::Vector{Float64})
-    MOI.eval_constraint_jacobian(nlp.model,jac,x)
+function NLPModels.jtprod!(nlp::MOIModel, x::AbstractVector{Float64}, v::Vector{Float64}, Jtv::AbstractVector{Float64})
+    MOI.eval_constraint_jacobian_transpose_product(nlp.model, Jtv, x, v)
 end
 
-function hess_coord!(nlp::MOIModel,x::Vector{Float64},l::Vector{Float64},hess::Vector{Float64}; obj_weight::Float64=1.)
-    MOI.eval_hessian_lagrangian(nlp.model,hess,x,obj_weight,l)
+function NLPModels.hess_coord!(nlp::MOIModel, x::AbstractVector{Float64}, l::AbstractVector{Float64}, hess::AbstractVector{Float64}; obj_weight::Float64=1.0)
+    MOI.eval_hessian_lagrangian(nlp.model, hess, x, obj_weight, l)
 end
 
-function hess_structure!(nlp::MOIModel, I::AbstractVector{T}, J::AbstractVector{T}) where T
+function NLPModels.hess_structure!(nlp::MOIModel, I::AbstractVector{T}, J::AbstractVector{T}) where T
+    @assert length(I) == length(J) == length(MOI.hessian_lagrangian_structure(nlp.model))
     cnt = 1
     for (row, col) in  MOI.hessian_lagrangian_structure(nlp.model)
         I[cnt], J[cnt] = row, col
@@ -610,7 +814,8 @@ function hess_structure!(nlp::MOIModel, I::AbstractVector{T}, J::AbstractVector{
     end
 end
 
-function jac_structure!(nlp::MOIModel, I::AbstractVector{T}, J::AbstractVector{T}) where T
+function NLPModels.jac_structure!(nlp::MOIModel, I::AbstractVector{T}, J::AbstractVector{T}) where T
+    @assert length(I) == length(J) == length(MOI.jacobian_structure(nlp.model))
     cnt = 1
     for (row, col) in  MOI.jacobian_structure(nlp.model)
         I[cnt], J[cnt] = row, col
@@ -618,14 +823,40 @@ function jac_structure!(nlp::MOIModel, I::AbstractVector{T}, J::AbstractVector{T
     end
 end
 
-### MOI.optimize!
+### TODO
 function MOIModel(model::Optimizer)
-    :Hess in MOI.features_available(model.nlp_data.evaluator) || error("Hessian information is needed.")
-    MOI.initialize(model.nlp_data.evaluator, [:Grad,:Hess,:Jac])
+    # Check model is nonempty.
+    vars = MOI.get(model.variables, MOI.ListOfVariableIndices())
+    if isempty(vars)
+        model.invalid_model = true
+        return
+    end
+    # Create NLP backend.
+    if model.nlp_model !== nothing
+        backend = MOI.Nonlinear.SparseReverseMode()
+        evaluator = MOI.Nonlinear.Evaluator(model.nlp_model, backend, vars)
+        model.nlp_data = MOI.NLPBlockData(evaluator)
+    end
+    # Check model's structure.
+    has_quadratic_constraints =
+        any(isequal(_kFunctionTypeScalarQuadratic), model.qp_data.function_type)
+    has_nlp_constraints = !isempty(model.nlp_data.constraint_bounds)
+    has_nlp_objective = model.nlp_data.has_objective
+    has_hessian = :Hess in MOI.features_available(model.nlp_data.evaluator)
+    is_nlp = has_nlp_constraints || has_nlp_objective
+    # Initialize evaluator using model's structure.
+    init_feat = [:Grad]
+    if has_hessian
+        push!(init_feat, :Hess)
+    end
+    if has_nlp_constraints
+        push!(init_feat, :Jac)
+    end
+    MOI.initialize(model.nlp_data.evaluator, init_feat)
 
     # Initial variable
     nvar = length(model.variables.lower)
-    x0  = Vector{Float64}(undef,nvar)
+    x0  = zeros(nvar)
     for i in 1:length(model.variable_primal_start)
         x0[i] = if model.variable_primal_start[i] !== nothing
             model.variable_primal_start[i]
@@ -633,7 +864,6 @@ function MOIModel(model::Optimizer)
             clamp(0.0, model.variables.lower[i], model.variables.upper[i])
         end
     end
-
     # Constraints bounds
     g_L, g_U = copy(model.qp_data.g_L), copy(model.qp_data.g_U)
     for bound in model.nlp_data.constraint_bounds
@@ -644,12 +874,16 @@ function MOIModel(model::Optimizer)
 
     # Sparsity
     jacobian_sparsity = MOI.jacobian_structure(model)
-    hessian_sparsity = MOI.hessian_lagrangian_structure(model)
+    hessian_sparsity = if has_hessian
+        MOI.hessian_lagrangian_structure(model)
+    else
+        Tuple{Int,Int}[]
+    end
     nnzh = length(hessian_sparsity)
     nnzj = length(jacobian_sparsity)
 
     # Dual multipliers
-    y0 = Vector{Float64}(undef,ncon)
+    y0 = zeros(ncon)
     for (i, start) in enumerate(model.qp_data.mult_g)
         y0[i] = _dual_start(model, start, -1)
     end
@@ -661,14 +895,17 @@ function MOIModel(model::Optimizer)
             y0[offset+i] = _dual_start(model, start, -1)
         end
     end
+    # TODO: initial bounds' multipliers.
 
-
-    # TODO
-    model.options[:jacobian_constant], model.options[:hessian_constant] = false, false
-    model.options[:dual_initialized] = !iszero(y0)
+    if !has_nlp_constraints && !has_quadratic_constraints
+        model.options[:jacobian_constant] = true
+    end
+    if !has_hessian
+        model.options[:hessian_approximation] = MadNLP.CompactLBFGS
+    end
 
     return MOIModel(
-        NLPModelMeta(
+        NLPModels.NLPModelMeta(
             nvar,
             x0 = x0,
             lvar = model.variables.lower,
@@ -681,45 +918,59 @@ function MOIModel(model::Optimizer)
             nnzh = nnzh,
             minimize = model.sense == MOI.MIN_SENSE
         ),
-        model,NLPModels.Counters())
+        model,
+        NLPModels.Counters(),
+    )
+end
+
+function copy_parameters(model::Optimizer)
+    if model.nlp_model === nothing
+        return
+    end
+    empty!(model.qp_data.parameters)
+    for (p, index) in model.parameters
+        model.qp_data.parameters[p.value] = model.nlp_model[index]
+    end
+    return
 end
 
 function MOI.optimize!(model::Optimizer)
     model.nlp = MOIModel(model)
+    if model.invalid_model
+        return
+    end
+    copy_parameters(model)
     if model.silent
         model.options[:print_level] = MadNLP.ERROR
     end
-    model.solver = MadNLPSolver(model.nlp; model.options...)
-    model.result = solve!(model.solver)
+    model.solver = MadNLP.MadNLPSolver(model.nlp; model.options...)
+    model.result = MadNLP.solve!(model.solver)
     model.solve_time = model.solver.cnt.total_time
     model.solve_iterations = model.solver.cnt.k
     return
 end
 
 # From Ipopt/src/Interfaces/IpReturnCodes_inc.h
-const _STATUS_CODES = Dict{Status,MOI.TerminationStatusCode}(
-    SOLVE_SUCCEEDED => MOI.LOCALLY_SOLVED,
-    SOLVED_TO_ACCEPTABLE_LEVEL => MOI.ALMOST_LOCALLY_SOLVED,
-    SEARCH_DIRECTION_BECOMES_TOO_SMALL => MOI.SLOW_PROGRESS,
-    DIVERGING_ITERATES => MOI.INFEASIBLE_OR_UNBOUNDED,
-    INFEASIBLE_PROBLEM_DETECTED => MOI.LOCALLY_INFEASIBLE,
-    MAXIMUM_ITERATIONS_EXCEEDED => MOI.ITERATION_LIMIT,
-    MAXIMUM_WALLTIME_EXCEEDED => MOI.TIME_LIMIT,
-    INITIAL => MOI.OPTIMIZE_NOT_CALLED,
-    # REGULAR
-    # RESTORE
-    # ROBUST
-    RESTORATION_FAILED => MOI.NUMERICAL_ERROR,
-    INVALID_NUMBER_DETECTED => MOI.INVALID_MODEL,
-    ERROR_IN_STEP_COMPUTATION => MOI.NUMERICAL_ERROR,
-    NOT_ENOUGH_DEGREES_OF_FREEDOM => MOI.INVALID_MODEL,
-    USER_REQUESTED_STOP => MOI.INTERRUPTED,
-    INTERNAL_ERROR => MOI.OTHER_ERROR,
-    INVALID_NUMBER_OBJECTIVE => MOI.INVALID_MODEL,
-    INVALID_NUMBER_GRADIENT => MOI.INVALID_MODEL,
-    INVALID_NUMBER_CONSTRAINTS => MOI.INVALID_MODEL,
-    INVALID_NUMBER_JACOBIAN => MOI.INVALID_MODEL,
-    INVALID_NUMBER_HESSIAN_LAGRANGIAN => MOI.INVALID_MODEL,
+const _STATUS_CODES = Dict{MadNLP.Status,MOI.TerminationStatusCode}(
+    MadNLP.SOLVE_SUCCEEDED => MOI.LOCALLY_SOLVED,
+    MadNLP.SOLVED_TO_ACCEPTABLE_LEVEL => MOI.ALMOST_LOCALLY_SOLVED,
+    MadNLP.SEARCH_DIRECTION_BECOMES_TOO_SMALL => MOI.SLOW_PROGRESS,
+    MadNLP.DIVERGING_ITERATES => MOI.INFEASIBLE_OR_UNBOUNDED,
+    MadNLP.INFEASIBLE_PROBLEM_DETECTED => MOI.LOCALLY_INFEASIBLE,
+    MadNLP.MAXIMUM_ITERATIONS_EXCEEDED => MOI.ITERATION_LIMIT,
+    MadNLP.MAXIMUM_WALLTIME_EXCEEDED => MOI.TIME_LIMIT,
+    MadNLP.INITIAL => MOI.OPTIMIZE_NOT_CALLED,
+    MadNLP.RESTORATION_FAILED => MOI.NUMERICAL_ERROR,
+    MadNLP.INVALID_NUMBER_DETECTED => MOI.INVALID_MODEL,
+    MadNLP.ERROR_IN_STEP_COMPUTATION => MOI.NUMERICAL_ERROR,
+    MadNLP.NOT_ENOUGH_DEGREES_OF_FREEDOM => MOI.INVALID_MODEL,
+    MadNLP.USER_REQUESTED_STOP => MOI.INTERRUPTED,
+    MadNLP.INTERNAL_ERROR => MOI.OTHER_ERROR,
+    MadNLP.INVALID_NUMBER_OBJECTIVE => MOI.INVALID_MODEL,
+    MadNLP.INVALID_NUMBER_GRADIENT => MOI.INVALID_MODEL,
+    MadNLP.INVALID_NUMBER_CONSTRAINTS => MOI.INVALID_MODEL,
+    MadNLP.INVALID_NUMBER_JACOBIAN => MOI.INVALID_MODEL,
+    MadNLP.INVALID_NUMBER_HESSIAN_LAGRANGIAN => MOI.INVALID_MODEL,
 )
 
 ### MOI.ResultCount
@@ -729,6 +980,16 @@ function MOI.get(model::Optimizer, ::MOI.ResultCount)
     return (model.solver !== nothing) ? 1 : 0
 end
 
+### MOI.TerminationStatus
+
+function MOI.get(model::Optimizer, ::MOI.TerminationStatus)
+    if model.invalid_model
+        return MOI.INVALID_MODEL
+    elseif model.solver === nothing
+        return MOI.OPTIMIZE_NOT_CALLED
+    end
+    return get(_STATUS_CODES, model.result.status, MOI.OTHER_ERROR)
+end
 
 ### MOI.RawStatusString
 
@@ -738,23 +999,9 @@ function MOI.get(model::Optimizer, ::MOI.RawStatusString)
     elseif model.solver === nothing
         return "Optimize not called"
     end
-    return get(
-        STATUS_OUTPUT_DICT,
-        model.result.status,
-        "Unknown result status: $(model.result.status)",
-    )
+    return MadNLP.get_status_output(model.result.status, model.result.options)
 end
 
-### MOI.TerminationStatus
-#
-function MOI.get(model::Optimizer, ::MOI.TerminationStatus)
-    if model.invalid_model
-        return MOI.INVALID_MODEL
-    elseif model.solver === nothing
-        return MOI.OPTIMIZE_NOT_CALLED
-    end
-    return get(_STATUS_CODES, model.result.status, MOI.OTHER_ERROR)
-end
 
 ### MOI.PrimalStatus
 
@@ -763,11 +1010,11 @@ function MOI.get(model::Optimizer, attr::MOI.PrimalStatus)
         return MOI.NO_SOLUTION
     end
     status = model.result.status
-    if status == SOLVE_SUCCEEDED
+    if status == MadNLP.SOLVE_SUCCEEDED
         return MOI.FEASIBLE_POINT
-    elseif status == SOLVED_TO_ACCEPTABLE_LEVEL
+    elseif status == MadNLP.SOLVED_TO_ACCEPTABLE_LEVEL
         return MOI.NEARLY_FEASIBLE_POINT
-    elseif status == INFEASIBLE_PROBLEM_DETECTED
+    elseif status == MadNLP.INFEASIBLE_PROBLEM_DETECTED
         return MOI.INFEASIBLE_POINT
     else
         return MOI.UNKNOWN_RESULT_STATUS
@@ -781,11 +1028,11 @@ function MOI.get(model::Optimizer, attr::MOI.DualStatus)
         return MOI.NO_SOLUTION
     end
     status = model.result.status
-    if status == SOLVE_SUCCEEDED
+    if status == MadNLP.SOLVE_SUCCEEDED
         return MOI.FEASIBLE_POINT
-    elseif status == SOLVED_TO_ACCEPTABLE_LEVEL
+    elseif status == MadNLP.SOLVED_TO_ACCEPTABLE_LEVEL
         return MOI.NEARLY_FEASIBLE_POINT
-    elseif status == INFEASIBLE_PROBLEM_DETECTED
+    elseif status == MadNLP.INFEASIBLE_PROBLEM_DETECTED
         return MOI.INFEASIBLE_POINT
     else
         return MOI.UNKNOWN_RESULT_STATUS
@@ -813,11 +1060,24 @@ function MOI.get(
 )
     MOI.check_result_index_bounds(model, attr)
     MOI.throw_if_not_valid(model, vi)
+    if _is_parameter(vi)
+        p = model.parameters[vi]
+        return model.nlp_model[p]
+    end
     return model.result.solution[vi.value]
 end
 
 ### MOI.ConstraintPrimal
 
+row(model::Optimizer, ci::MOI.ConstraintIndex{<:_FUNCTIONS}) = ci.value
+
+function row(
+    model::Optimizer,
+    ci::MOI.ConstraintIndex{MOI.ScalarNonlinearFunction},
+)
+    return length(model.qp_data) + ci.value
+end
+
 function MOI.get(
     model::Optimizer,
     attr::MOI.ConstraintPrimal,
@@ -825,7 +1085,7 @@ function MOI.get(
 )
     MOI.check_result_index_bounds(model, attr)
     MOI.throw_if_not_valid(model, ci)
-    return model.result.constraints[ci.value]
+    return model.result.constraints[row(model, ci)]
 end
 
 function MOI.get(
@@ -839,7 +1099,7 @@ function MOI.get(
 end
 
 ### MOI.ConstraintDual
-_dual_multiplier(model::Optimizer) = model.sense == MOI.MIN_SENSE ? 1.0 : -1.0
+_dual_multiplier(model::Optimizer) = 1.0
 
 function MOI.get(
     model::Optimizer,
@@ -860,7 +1120,7 @@ function MOI.get(
     MOI.check_result_index_bounds(model, attr)
     MOI.throw_if_not_valid(model, ci)
     rc = model.result.multipliers_L[ci.value] - model.result.multipliers_U[ci.value]
-    return min(0.0, rc)
+    return min(0.0, rc * _dual_multiplier(model))
 end
 
 function MOI.get(
@@ -871,7 +1131,7 @@ function MOI.get(
     MOI.check_result_index_bounds(model, attr)
     MOI.throw_if_not_valid(model, ci)
     rc = model.result.multipliers_L[ci.value] - model.result.multipliers_U[ci.value]
-    return max(0.0, rc)
+    return max(0.0, rc * _dual_multiplier(model))
 end
 
 function MOI.get(
@@ -885,11 +1145,22 @@ function MOI.get(
     return rc
 end
 
+function MOI.get(
+    model::Optimizer,
+    attr::MOI.ConstraintDual,
+    ci::MOI.ConstraintIndex{MOI.VariableIndex,MOI.Interval{Float64}},
+)
+    MOI.check_result_index_bounds(model, attr)
+    MOI.throw_if_not_valid(model, ci)
+    rc = model.result.multipliers_L[ci.value] - model.result.multipliers_U[ci.value]
+    return rc
+end
+
 ### MOI.NLPBlockDual
 
 function MOI.get(model::Optimizer, attr::MOI.NLPBlockDual)
     MOI.check_result_index_bounds(model, attr)
-    s = -1.0
+    s = -_dual_multiplier(model)
     offset = length(model.qp_data)
     return s .* model.result.multipliers[(offset+1):end]
 end
@@ -897,3 +1168,4 @@ end
 ### MOI.BarrierIterations
 MOI.get(model::Optimizer,::MOI.BarrierIterations) = model.solve_iterations
 
+end # module
diff --git a/ext/MadNLPMOI/utils.jl b/ext/MadNLPMOI/utils.jl
new file mode 100644
index 00000000..0990afbe
--- /dev/null
+++ b/ext/MadNLPMOI/utils.jl
@@ -0,0 +1,556 @@
+# Copyright (c) 2013: Iain Dunning, Miles Lubin, and contributors
+#
+# Use of this source code is governed by an MIT-style license that can be found
+# in the LICENSE.md file or at https://opensource.org/licenses/MIT.
+
+# !!! warning
+#
+#     The contents of this file are experimental.
+#
+#     Until this message is removed, breaking changes to the functions and
+#     types, including their deletion, may be introduced in any minor or patch
+#     release of Ipopt.
+
+@enum(
+    _FunctionType,
+    _kFunctionTypeVariableIndex,
+    _kFunctionTypeScalarAffine,
+    _kFunctionTypeScalarQuadratic,
+)
+
+function _function_type_to_set(::Type{T}, k::_FunctionType) where {T}
+    if k == _kFunctionTypeVariableIndex
+        return MOI.VariableIndex
+    elseif k == _kFunctionTypeScalarAffine
+        return MOI.ScalarAffineFunction{T}
+    else
+        @assert k == _kFunctionTypeScalarQuadratic
+        return MOI.ScalarQuadraticFunction{T}
+    end
+end
+
+_function_info(::MOI.VariableIndex) = _kFunctionTypeVariableIndex
+_function_info(::MOI.ScalarAffineFunction) = _kFunctionTypeScalarAffine
+_function_info(::MOI.ScalarQuadraticFunction) = _kFunctionTypeScalarQuadratic
+
+@enum(
+    _BoundType,
+    _kBoundTypeLessThan,
+    _kBoundTypeGreaterThan,
+    _kBoundTypeEqualTo,
+    _kBoundTypeInterval,
+)
+
+_set_info(s::MOI.LessThan) = _kBoundTypeLessThan, -Inf, s.upper
+_set_info(s::MOI.GreaterThan) = _kBoundTypeGreaterThan, s.lower, Inf
+_set_info(s::MOI.EqualTo) = _kBoundTypeEqualTo, s.value, s.value
+_set_info(s::MOI.Interval) = _kBoundTypeInterval, s.lower, s.upper
+
+function _bound_type_to_set(::Type{T}, k::_BoundType) where {T}
+    if k == _kBoundTypeEqualTo
+        return MOI.EqualTo{T}
+    elseif k == _kBoundTypeLessThan
+        return MOI.LessThan{T}
+    elseif k == _kBoundTypeGreaterThan
+        return MOI.GreaterThan{T}
+    else
+        @assert k == _kBoundTypeInterval
+        return MOI.Interval{T}
+    end
+end
+
+mutable struct QPBlockData{T}
+    objective::Union{MOI.ScalarAffineFunction{T},MOI.ScalarQuadraticFunction{T}}
+    objective_function_type::_FunctionType
+    constraints::Vector{
+        Union{MOI.ScalarAffineFunction{T},MOI.ScalarQuadraticFunction{T}},
+    }
+    g_L::Vector{T}
+    g_U::Vector{T}
+    mult_g::Vector{Union{Nothing,T}}
+    function_type::Vector{_FunctionType}
+    bound_type::Vector{_BoundType}
+    parameters::Dict{Int64,T}
+
+    function QPBlockData{T}() where {T}
+        return new(
+            zero(MOI.ScalarQuadraticFunction{T}),
+            _kFunctionTypeScalarAffine,
+            Union{MOI.ScalarAffineFunction{T},MOI.ScalarQuadraticFunction{T}}[],
+            T[],
+            T[],
+            Union{Nothing,T}[],
+            _FunctionType[],
+            _BoundType[],
+            Dict{Int64,T}(),
+        )
+    end
+end
+
+function _value(variable::MOI.VariableIndex, x::Vector, p::Dict)
+    if _is_parameter(variable)
+        return p[variable.value]
+    else
+        return x[variable.value]
+    end
+end
+
+function eval_function(
+    f::MOI.ScalarQuadraticFunction{T},
+    x::Vector{T},
+    p::Dict{Int64,T},
+)::T where {T}
+    y = f.constant
+    for term in f.affine_terms
+        y += term.coefficient * _value(term.variable, x, p)
+    end
+    for term in f.quadratic_terms
+        v1 = _value(term.variable_1, x, p)
+        v2 = _value(term.variable_2, x, p)
+        if term.variable_1 == term.variable_2
+            y += term.coefficient * v1 * v2 / 2
+        else
+            y += term.coefficient * v1 * v2
+        end
+    end
+    return y
+end
+
+function eval_function(
+    f::MOI.ScalarAffineFunction{T},
+    x::Vector{T},
+    p::Dict{Int64,T},
+)::T where {T}
+    y = f.constant
+    for term in f.terms
+        y += term.coefficient * _value(term.variable, x, p)
+    end
+    return y
+end
+
+function eval_dense_gradient(
+    ∇f::Vector{T},
+    f::MOI.ScalarQuadraticFunction{T},
+    x::Vector{T},
+    p::Dict{Int64,T},
+    adj::T,
+)::Nothing where {T}
+    for term in f.affine_terms
+        if !_is_parameter(term.variable)
+            ∇f[term.variable.value] += term.coefficient * adj
+        end
+    end
+    for term in f.quadratic_terms
+        if !_is_parameter(term.variable_1)
+            v = _value(term.variable_2, x, p)
+            ∇f[term.variable_1.value] += term.coefficient * v * adj
+        end
+        if term.variable_1 != term.variable_2 && !_is_parameter(term.variable_2)
+            v = _value(term.variable_1, x, p)
+            ∇f[term.variable_2.value] += term.coefficient * v * adj
+        end
+    end
+    return
+end
+
+function eval_dense_gradient(
+    ∇f::Vector{T},
+    f::MOI.ScalarAffineFunction{T},
+    x::Vector{T},
+    p::Dict{Int64,T},
+    adj::T,
+)::Nothing where {T}
+    for term in f.terms
+        if !_is_parameter(term.variable)
+            ∇f[term.variable.value] += term.coefficient * adj
+        end
+    end
+    return
+end
+
+function append_sparse_gradient_structure!(
+    f::MOI.ScalarQuadraticFunction,
+    J,
+    row,
+)
+    for term in f.affine_terms
+        if !_is_parameter(term.variable)
+            push!(J, (row, term.variable.value))
+        end
+    end
+    for term in f.quadratic_terms
+        if !_is_parameter(term.variable_1)
+            push!(J, (row, term.variable_1.value))
+        end
+        if term.variable_1 != term.variable_2 && !_is_parameter(term.variable_2)
+            push!(J, (row, term.variable_2.value))
+        end
+    end
+    return
+end
+
+function append_sparse_gradient_structure!(f::MOI.ScalarAffineFunction, J, row)
+    for term in f.terms
+        if !_is_parameter(term.variable)
+            push!(J, (row, term.variable.value))
+        end
+    end
+    return
+end
+
+function eval_sparse_gradient(
+    ∇f::AbstractVector{T},
+    f::MOI.ScalarQuadraticFunction{T},
+    x::Vector{T},
+    p::Dict{Int64,T},
+)::Int where {T}
+    i = 0
+    for term in f.affine_terms
+        if !_is_parameter(term.variable)
+            i += 1
+            ∇f[i] = term.coefficient
+        end
+    end
+    for term in f.quadratic_terms
+        if !_is_parameter(term.variable_1)
+            v = _value(term.variable_2, x, p)
+            i += 1
+            ∇f[i] = term.coefficient * v
+        end
+        if term.variable_1 != term.variable_2 && !_is_parameter(term.variable_2)
+            v = _value(term.variable_1, x, p)
+            i += 1
+            ∇f[i] = term.coefficient * v
+        end
+    end
+    return i
+end
+
+function eval_sparse_gradient(
+    ∇f::AbstractVector{T},
+    f::MOI.ScalarAffineFunction{T},
+    x::Vector{T},
+    p::Dict{Int64,T},
+)::Int where {T}
+    i = 0
+    for term in f.terms
+        if !_is_parameter(term.variable)
+            i += 1
+            ∇f[i] = term.coefficient
+        end
+    end
+    return i
+end
+
+function append_sparse_hessian_structure!(f::MOI.ScalarQuadraticFunction, H)
+    for term in f.quadratic_terms
+        if _is_parameter(term.variable_1) || _is_parameter(term.variable_2)
+            continue
+        end
+        push!(H, (term.variable_1.value, term.variable_2.value))
+    end
+    return
+end
+
+append_sparse_hessian_structure!(::MOI.ScalarAffineFunction, H) = nothing
+
+function eval_sparse_hessian(
+    ∇²f::AbstractVector{T},
+    f::MOI.ScalarQuadraticFunction{T},
+    σ::T,
+)::Int where {T}
+    i = 0
+    for term in f.quadratic_terms
+        if _is_parameter(term.variable_1) || _is_parameter(term.variable_2)
+            continue
+        end
+        i += 1
+        ∇²f[i] = term.coefficient * σ
+    end
+    return i
+end
+
+function eval_sparse_hessian(
+    ∇²f::AbstractVector{T},
+    f::MOI.ScalarAffineFunction{T},
+    σ::T,
+)::Int where {T}
+    return 0
+end
+
+Base.length(block::QPBlockData) = length(block.bound_type)
+
+function MOI.set(
+    block::QPBlockData{T},
+    ::MOI.ObjectiveFunction{F},
+    f::F,
+) where {T,F<:Union{MOI.VariableIndex,MOI.ScalarAffineFunction{T}}}
+    block.objective = convert(MOI.ScalarAffineFunction{T}, f)
+    block.objective_function_type = _function_info(f)
+    return
+end
+
+function MOI.set(
+    block::QPBlockData{T},
+    ::MOI.ObjectiveFunction{MOI.ScalarQuadraticFunction{T}},
+    f::MOI.ScalarQuadraticFunction{T},
+) where {T}
+    block.objective = f
+    block.objective_function_type = _function_info(f)
+    return
+end
+
+function MOI.get(block::QPBlockData{T}, ::MOI.ObjectiveFunctionType) where {T}
+    return _function_type_to_set(T, block.objective_function_type)
+end
+
+function MOI.get(block::QPBlockData{T}, ::MOI.ObjectiveFunction{F}) where {T,F}
+    return convert(F, block.objective)
+end
+
+function MOI.get(
+    block::QPBlockData{T},
+    ::MOI.ListOfConstraintTypesPresent,
+) where {T}
+    constraints = Set{Tuple{Type,Type}}()
+    for i in 1:length(block)
+        F = _function_type_to_set(T, block.function_type[i])
+        S = _bound_type_to_set(T, block.bound_type[i])
+        push!(constraints, (F, S))
+    end
+    return collect(constraints)
+end
+
+function MOI.is_valid(
+    block::QPBlockData{T},
+    ci::MOI.ConstraintIndex{F,S},
+) where {
+    T,
+    F<:Union{MOI.ScalarAffineFunction{T},MOI.ScalarQuadraticFunction{T}},
+    S<:Union{MOI.LessThan{T},MOI.GreaterThan{T},MOI.EqualTo{T},MOI.Interval{T}},
+}
+    return 1 <= ci.value <= length(block)
+end
+
+function MOI.get(
+    block::QPBlockData{T},
+    ::MOI.ListOfConstraintIndices{F,S},
+) where {
+    T,
+    F<:Union{MOI.ScalarAffineFunction{T},MOI.ScalarQuadraticFunction{T}},
+    S<:Union{MOI.LessThan{T},MOI.GreaterThan{T},MOI.EqualTo{T},MOI.Interval{T}},
+}
+    ret = MOI.ConstraintIndex{F,S}[]
+    for i in 1:length(block)
+        if _bound_type_to_set(T, block.bound_type[i]) != S
+            continue
+        elseif _function_type_to_set(T, block.function_type[i]) != F
+            continue
+        end
+        push!(ret, MOI.ConstraintIndex{F,S}(i))
+    end
+    return ret
+end
+
+function MOI.get(
+    block::QPBlockData{T},
+    ::MOI.NumberOfConstraints{F,S},
+) where {
+    T,
+    F<:Union{MOI.ScalarAffineFunction{T},MOI.ScalarQuadraticFunction{T}},
+    S<:Union{MOI.LessThan{T},MOI.GreaterThan{T},MOI.EqualTo{T},MOI.Interval{T}},
+}
+    return length(MOI.get(block, MOI.ListOfConstraintIndices{F,S}()))
+end
+
+function MOI.add_constraint(
+    block::QPBlockData{T},
+    f::Union{MOI.ScalarAffineFunction{T},MOI.ScalarQuadraticFunction{T}},
+    set::Union{MOI.LessThan{T},MOI.GreaterThan{T},MOI.EqualTo{T},MOI.Interval{T}},
+) where {T}
+    push!(block.constraints, f)
+    bound_type, l, u = _set_info(set)
+    push!(block.g_L, l)
+    push!(block.g_U, u)
+    push!(block.mult_g, nothing)
+    push!(block.bound_type, bound_type)
+    push!(block.function_type, _function_info(f))
+    return MOI.ConstraintIndex{typeof(f),typeof(set)}(length(block.bound_type))
+end
+
+function MOI.get(
+    block::QPBlockData{T},
+    ::MOI.ConstraintFunction,
+    c::MOI.ConstraintIndex{F,S},
+) where {T,F,S}
+    return convert(F, block.constraints[c.value])
+end
+
+function MOI.get(
+    block::QPBlockData{T},
+    ::MOI.ConstraintSet,
+    c::MOI.ConstraintIndex{F,S},
+) where {T,F,S}
+    row = c.value
+    if block.bound_type[row] == _kBoundTypeEqualTo
+        return MOI.EqualTo(block.g_L[row])
+    elseif block.bound_type[row] == _kBoundTypeLessThan
+        return MOI.LessThan(block.g_U[row])
+    elseif block.bound_type[row] == _kBoundTypeGreaterThan
+        return MOI.GreaterThan(block.g_L[row])
+    else
+        @assert block.bound_type[row] == _kBoundTypeInterval
+        return MOI.Interval(block.g_L[row], block.g_U[row])
+    end
+end
+
+function MOI.set(
+    block::QPBlockData{T},
+    ::MOI.ConstraintSet,
+    c::MOI.ConstraintIndex{F,MOI.LessThan{T}},
+    set::MOI.LessThan{T},
+) where {T,F}
+    row = c.value
+    block.g_U[row] = set.upper
+    return
+end
+
+function MOI.set(
+    block::QPBlockData{T},
+    ::MOI.ConstraintSet,
+    c::MOI.ConstraintIndex{F,MOI.GreaterThan{T}},
+    set::MOI.GreaterThan{T},
+) where {T,F}
+    row = c.value
+    block.g_L[row] = set.lower
+    return
+end
+
+function MOI.set(
+    block::QPBlockData{T},
+    ::MOI.ConstraintSet,
+    c::MOI.ConstraintIndex{F,MOI.EqualTo{T}},
+    set::MOI.EqualTo{T},
+) where {T,F}
+    row = c.value
+    block.g_L[row] = set.value
+    block.g_U[row] = set.value
+    return
+end
+
+function MOI.set(
+    block::QPBlockData{T},
+    ::MOI.ConstraintSet,
+    c::MOI.ConstraintIndex{F,MOI.Interval{T}},
+    set::MOI.Interval{T},
+) where {T,F}
+    row = c.value
+    block.g_L[row] = set.lower
+    block.g_U[row] = set.upper
+    return
+end
+
+function MOI.get(
+    block::QPBlockData{T},
+    ::MOI.ConstraintDualStart,
+    c::MOI.ConstraintIndex{F,S},
+) where {T,F,S}
+    return block.mult_g[c.value]
+end
+
+function MOI.set(
+    block::QPBlockData{T},
+    ::MOI.ConstraintDualStart,
+    c::MOI.ConstraintIndex{F,S},
+    value,
+) where {T,F,S}
+    block.mult_g[c.value] = value
+    return
+end
+
+function MOI.eval_objective(
+    block::QPBlockData{T},
+    x::AbstractVector{T},
+) where {T}
+    return eval_function(block.objective, x, block.parameters)
+end
+
+function MOI.eval_objective_gradient(
+    block::QPBlockData{T},
+    ∇f::AbstractVector{T},
+    x::AbstractVector{T},
+) where {T}
+    ∇f .= zero(T)
+    eval_dense_gradient(∇f, block.objective, x, block.parameters, one(T))
+    return
+end
+
+function MOI.eval_constraint(
+    block::QPBlockData{T},
+    g::AbstractVector{T},
+    x::AbstractVector{T},
+) where {T}
+    for (i, constraint) in enumerate(block.constraints)
+        g[i] = eval_function(constraint, x, block.parameters)
+    end
+    return
+end
+
+function MOI.jacobian_structure(block::QPBlockData)
+    J = Tuple{Int,Int}[]
+    for (row, constraint) in enumerate(block.constraints)
+        append_sparse_gradient_structure!(constraint, J, row)
+    end
+    return J
+end
+
+function MOI.eval_constraint_jacobian(
+    block::QPBlockData{T},
+    J::AbstractVector{T},
+    x::AbstractVector{T},
+) where {T}
+    i = 1
+    fill!(J, zero(T))
+    for constraint in block.constraints
+        ∇f = view(J, i:length(J))
+        i += eval_sparse_gradient(∇f, constraint, x, block.parameters)
+    end
+    return i
+end
+
+function MOI.eval_constraint_jacobian_transpose_product(
+    block::QPBlockData{T},
+    Jtv::AbstractVector{T},
+    x::AbstractVector{T},
+    v::AbstractVector{T},
+) where {T}
+    for (i, constraint) in enumerate(block.constraints)
+        eval_dense_gradient(Jtv, constraint, x, block.parameters, v[i])
+    end
+    return Jtv
+end
+
+function MOI.hessian_lagrangian_structure(block::QPBlockData)
+    H = Tuple{Int,Int}[]
+    append_sparse_hessian_structure!(block.objective, H)
+    for constraint in block.constraints
+        append_sparse_hessian_structure!(constraint, H)
+    end
+    return H
+end
+
+function MOI.eval_hessian_lagrangian(
+    block::QPBlockData{T},
+    H::AbstractVector{T},
+    x::AbstractVector{T},
+    σ::T,
+    μ::AbstractVector{T},
+) where {T}
+    i = 1
+    i += eval_sparse_hessian(H, block.objective, σ)
+    for (row, constraint) in enumerate(block.constraints)
+        ∇²f = view(H, i:length(H))
+        i += eval_sparse_hessian(∇²f, constraint, μ[row])
+    end
+    return i
+end
diff --git a/lib/MadNLPGPU/Project.toml b/lib/MadNLPGPU/Project.toml
index cca9a570..c8155014 100644
--- a/lib/MadNLPGPU/Project.toml
+++ b/lib/MadNLPGPU/Project.toml
@@ -1,26 +1,34 @@
 name = "MadNLPGPU"
 uuid = "d72a61cc-809d-412f-99be-fd81f4b8a598"
-authors = ["Sungho Shin <sungho.shin.ss@gmail.com>"]
-version = "0.5.0"
+version = "0.7.3"
 
 [deps]
+AMD = "14f7f29c-3bd6-536c-9a0b-7339e30b5a3e"
 CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
-CUDAKernels = "72cfdca4-0801-4ab0-bf6a-d52aa10adc57"
+CUDSS = "45b445bb-4962-46a0-9369-b4df9d0f772e"
+CUSOLVERRF = "a8cc9031-bad2-4722-94f5-40deabb4245c"
 KernelAbstractions = "63c18a36-062a-441e-b654-da1e3ab1ce7c"
 LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
 MadNLP = "2621e9c9-9eb4-46b1-8089-e8c72242dfb6"
+MadNLPTests = "b52a2a03-04ab-4a5f-9698-6a2deff93217"
+Metis = "2679e427-3c69-5b7f-982b-ece356f1e94b"
+SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"
 
 [compat]
-CUDA = "~4"
-CUDAKernels = "0.4"
-KernelAbstractions = "0.8"
-MadNLP = "0.6"
-MadNLPTests = "~0.3"
+AMD = "0.5"
+CUDA = "5"
+CUDSS = "0.3.1"
+CUSOLVERRF = "0.2"
+KernelAbstractions = "0.9"
+MadNLP = "0.7, 0.8"
+MadNLPTests = "0.5"
+Metis = "1"
 julia = "1.7"
 
 [extras]
+CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
 MadNLPTests = "b52a2a03-04ab-4a5f-9698-6a2deff93217"
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
 
 [targets]
-test = ["Test", "MadNLPTests"]
+test = ["Test", "MadNLPTests", "CUDA"]
diff --git a/lib/MadNLPGPU/src/KKT/dense.jl b/lib/MadNLPGPU/src/KKT/dense.jl
new file mode 100644
index 00000000..efae5525
--- /dev/null
+++ b/lib/MadNLPGPU/src/KKT/dense.jl
@@ -0,0 +1,235 @@
+#=
+    GPU wrappers for DenseKKTSystem/DenseCondensedKKTSystem
+=#
+
+#=
+    MadNLP.diag!
+=#
+
+@kernel function _copy_diag_kernel!(dest, src)
+    i = @index(Global)
+    @inbounds dest[i] = src[i, i]
+end
+
+function MadNLP.diag!(dest::CuVector{T}, src::CuMatrix{T}) where {T}
+    @assert length(dest) == size(src, 1)
+    _copy_diag_kernel!(CUDABackend())(dest, src, ndrange = length(dest))
+    synchronize(CUDABackend())
+    return
+end
+
+#=
+    MadNLP.diag_add!
+=#
+
+@kernel function _add_diagonal_kernel!(dest, src1, src2)
+    i = @index(Global)
+    @inbounds dest[i, i] = src1[i] + src2[i]
+end
+
+function MadNLP.diag_add!(dest::CuMatrix, src1::CuVector, src2::CuVector)
+    _add_diagonal_kernel!(CUDABackend())(dest, src1, src2, ndrange = size(dest, 1))
+    synchronize(CUDABackend())
+    return
+end
+
+#=
+    MadNLP._set_diag!
+=#
+
+@kernel function _set_diag_kernel!(A, inds, a)
+    i = @index(Global)
+    @inbounds begin
+        index = inds[i]
+        A[index, index] = a
+    end
+end
+
+function MadNLP._set_diag!(A::CuMatrix, inds, a)
+    if !isempty(inds)
+        _set_diag_kernel!(CUDABackend())(A, inds, a; ndrange = length(inds))
+        synchronize(CUDABackend())
+    end
+    return
+end
+
+#=
+    MadNLP._build_dense_kkt_system!
+=#
+
+@kernel function _build_dense_kkt_system_kernel!(
+    dest,
+    hess,
+    jac,
+    pr_diag,
+    du_diag,
+    diag_hess,
+    ind_ineq,
+    n,
+    m,
+    ns,
+)
+    i, j = @index(Global, NTuple)
+    @inbounds if (i <= n)
+        # Transfer Hessian
+        if (i == j)
+            dest[i, i] = pr_diag[i] + diag_hess[i]
+        else
+            dest[i, j] = hess[i, j]
+        end
+    elseif i <= n + ns
+        # Transfer slack diagonal
+        dest[i, i] = pr_diag[i]
+        # Transfer Jacobian wrt slack
+        js = i - n
+        is = ind_ineq[js]
+        dest[is+n+ns, is+n] = -1
+        dest[is+n, is+n+ns] = -1
+    elseif i <= n + ns + m
+        # Transfer Jacobian wrt variable x
+        i_ = i - n - ns
+        dest[i, j] = jac[i_, j]
+        dest[j, i] = jac[i_, j]
+        # Transfer dual regularization
+        dest[i, i] = du_diag[i_]
+    end
+end
+
+function MadNLP._build_dense_kkt_system!(
+    dest::CuMatrix,
+    hess::CuMatrix,
+    jac::CuMatrix,
+    pr_diag::CuVector,
+    du_diag::CuVector,
+    diag_hess::CuVector,
+    ind_ineq,
+    n,
+    m,
+    ns,
+)
+    ind_ineq_gpu = ind_ineq |> CuArray
+    ndrange = (n + m + ns, n)
+    _build_dense_kkt_system_kernel!(CUDABackend())(
+        dest,
+        hess,
+        jac,
+        pr_diag,
+        du_diag,
+        diag_hess,
+        ind_ineq_gpu,
+        n,
+        m,
+        ns,
+        ndrange = ndrange,
+    )
+    synchronize(CUDABackend())
+    return
+end
+
+#=
+    MadNLP._build_ineq_jac!
+=#
+
+@kernel function _build_jacobian_condensed_kernel!(dest, jac, diag_buffer, ind_ineq, m_ineq)
+    i, j = @index(Global, NTuple)
+    @inbounds is = ind_ineq[i]
+    @inbounds dest[i, j] = jac[is, j] * sqrt(diag_buffer[i])
+end
+
+function MadNLP._build_ineq_jac!(
+    dest::CuMatrix,
+    jac::CuMatrix,
+    diag_buffer::CuVector,
+    ind_ineq::AbstractVector,
+    n,
+    m_ineq,
+)
+    (m_ineq == 0) && return # nothing to do if no ineq. constraints
+    ind_ineq_gpu = ind_ineq |> CuArray
+    ndrange = (m_ineq, n)
+    _build_jacobian_condensed_kernel!(CUDABackend())(
+        dest,
+        jac,
+        diag_buffer,
+        ind_ineq_gpu,
+        m_ineq,
+        ndrange = ndrange,
+    )
+    synchronize(CUDABackend())
+    return
+end
+
+#=
+    MadNLP._build_condensed_kkt_system!
+=#
+
+@kernel function _build_condensed_kkt_system_kernel!(
+    dest,
+    hess,
+    jac,
+    pr_diag,
+    du_diag,
+    ind_eq,
+    n,
+    m_eq,
+)
+    i, j = @index(Global, NTuple)
+
+    # Transfer Hessian
+    @inbounds if i <= n
+        if i == j
+            dest[i, i] += pr_diag[i] + hess[i, i]
+        else
+            dest[i, j] += hess[i, j]
+        end
+    elseif i <= n + m_eq
+        i_ = i - n
+        is = ind_eq[i_]
+        # Jacobian / equality
+        dest[i_+n, j] = jac[is, j]
+        dest[j, i_+n] = jac[is, j]
+        # Transfer dual regularization
+        dest[i_+n, i_+n] = du_diag[is]
+    end
+end
+
+function MadNLP._build_condensed_kkt_system!(
+    dest::CuMatrix,
+    hess::CuMatrix,
+    jac::CuMatrix,
+    pr_diag::CuVector,
+    du_diag::CuVector,
+    ind_eq::AbstractVector,
+    n,
+    m_eq,
+)
+    ind_eq_gpu = ind_eq |> CuArray
+    ndrange = (n + m_eq, n)
+    _build_condensed_kkt_system_kernel!(CUDABackend())(
+        dest,
+        hess,
+        jac,
+        pr_diag,
+        du_diag,
+        ind_eq_gpu,
+        n,
+        m_eq,
+        ndrange = ndrange,
+    )
+    synchronize(CUDABackend())
+    return
+end
+
+if VERSION < v"1.10"
+    function MadNLP.mul_hess_blk!(
+        wx::CuVector{T},
+        kkt::Union{MadNLP.DenseKKTSystem,MadNLP.DenseCondensedKKTSystem},
+        t,
+    ) where {T}
+        n = size(kkt.hess, 1)
+        CUDA.CUBLAS.symv!('L', one(T), kkt.hess, @view(t[1:n]), zero(T), @view(wx[1:n]))
+        fill!(@view(wx[n+1:end]), 0)
+        return wx .+= t .* kkt.pr_diag
+    end
+end
+
diff --git a/lib/MadNLPGPU/src/KKT/sparse.jl b/lib/MadNLPGPU/src/KKT/sparse.jl
new file mode 100644
index 00000000..47d229cc
--- /dev/null
+++ b/lib/MadNLPGPU/src/KKT/sparse.jl
@@ -0,0 +1,617 @@
+#=
+    GPU wrappers for SparseCondensedKKTSystem.
+=#
+
+function MadNLP.mul!(
+    w::MadNLP.AbstractKKTVector{T,VT},
+    kkt::MadNLP.SparseCondensedKKTSystem,
+    x::MadNLP.AbstractKKTVector,
+    alpha = one(T),
+    beta = zero(T),
+) where {T,VT<:CuVector{T}}
+    n = size(kkt.hess_com, 1)
+    m = size(kkt.jt_csc, 2)
+
+    # Decompose results
+    xx = view(MadNLP.full(x), 1:n)
+    xs = view(MadNLP.full(x), n+1:n+m)
+    xz = view(MadNLP.full(x), n+m+1:n+2*m)
+
+    # Decompose buffers
+    wx = view(MadNLP.full(w), 1:n)
+    ws = view(MadNLP.full(w), n+1:n+m)
+    wz = view(MadNLP.full(w), n+m+1:n+2*m)
+
+    MadNLP.mul!(wx, kkt.hess_com, xx, alpha, beta)
+    MadNLP.mul!(wx, kkt.hess_com', xx, alpha, one(T))
+    MadNLP.mul!(wx, kkt.jt_csc, xz, alpha, beta)
+    if !isempty(kkt.ext.diag_map_to)
+        _diag_operation_kernel!(CUDABackend())(
+            wx,
+            kkt.hess_com.nzVal,
+            xx,
+            alpha,
+            kkt.ext.diag_map_to,
+            kkt.ext.diag_map_fr;
+            ndrange = length(kkt.ext.diag_map_to),
+        )
+        synchronize(CUDABackend())
+    end
+
+    MadNLP.mul!(wz, kkt.jt_csc', xx, alpha, one(T))
+    MadNLP.axpy!(-alpha, xz, ws)
+    MadNLP.axpy!(-alpha, xs, wz)
+    return MadNLP._kktmul!(
+        w,
+        x,
+        kkt.reg,
+        kkt.du_diag,
+        kkt.l_lower,
+        kkt.u_lower,
+        kkt.l_diag,
+        kkt.u_diag,
+        alpha,
+        beta,
+    )
+end
+
+function MadNLP.mul_hess_blk!(
+    wx::VT,
+    kkt::Union{MadNLP.SparseKKTSystem,MadNLP.SparseCondensedKKTSystem},
+    t,
+) where {T,VT<:CuVector{T}}
+    n = size(kkt.hess_com, 1)
+    wxx = @view(wx[1:n])
+    tx = @view(t[1:n])
+
+    MadNLP.mul!(wxx, kkt.hess_com, tx, one(T), zero(T))
+    MadNLP.mul!(wxx, kkt.hess_com', tx, one(T), one(T))
+    if !isempty(kkt.ext.diag_map_to)
+        _diag_operation_kernel!(CUDABackend())(
+            wxx,
+            kkt.hess_com.nzVal,
+            tx,
+            one(T),
+            kkt.ext.diag_map_to,
+            kkt.ext.diag_map_fr;
+            ndrange = length(kkt.ext.diag_map_to),
+        )
+        synchronize(CUDABackend())
+    end
+
+    fill!(@view(wx[n+1:end]), 0)
+    wx .+= t .* kkt.pr_diag
+    return
+end
+
+function MadNLP.get_tril_to_full(csc::CUSPARSE.CuSparseMatrixCSC{Tv,Ti}) where {Tv,Ti}
+    cscind = MadNLP.SparseMatrixCSC{Int,Ti}(
+        Symmetric(
+            MadNLP.SparseMatrixCSC{Int,Ti}(
+                size(csc)...,
+                Array(csc.colPtr),
+                Array(csc.rowVal),
+                collect(1:MadNLP.nnz(csc)),
+            ),
+            :L,
+        ),
+    )
+    return CUSPARSE.CuSparseMatrixCSC{Tv,Ti}(
+        CuArray(cscind.colptr),
+        CuArray(cscind.rowval),
+        CuVector{Tv}(undef, MadNLP.nnz(cscind)),
+        size(csc),
+    ),
+    view(csc.nzVal, CuArray(cscind.nzval))
+end
+
+function MadNLP.get_sparse_condensed_ext(
+    ::Type{VT},
+    hess_com,
+    jptr,
+    jt_map,
+    hess_map,
+) where {T,VT<:CuVector{T}}
+    hess_com_ptr = map((i, j) -> (i, j), hess_map, 1:length(hess_map))
+    if length(hess_com_ptr) > 0 # otherwise error is thrown
+        sort!(hess_com_ptr)
+    end
+
+    jt_csc_ptr = map((i, j) -> (i, j), jt_map, 1:length(jt_map))
+    if length(jt_csc_ptr) > 0 # otherwise error is thrown
+        sort!(jt_csc_ptr)
+    end
+
+    by = (i, j) -> i[1] != j[1]
+    jptrptr = MadNLP.getptr(jptr, by = by)
+    hess_com_ptrptr = MadNLP.getptr(hess_com_ptr, by = by)
+    jt_csc_ptrptr = MadNLP.getptr(jt_csc_ptr, by = by)
+
+    diag_map_to, diag_map_fr = get_diagonal_mapping(hess_com.colPtr, hess_com.rowVal)
+
+    return (
+        jptrptr = jptrptr,
+        hess_com_ptr = hess_com_ptr,
+        hess_com_ptrptr = hess_com_ptrptr,
+        jt_csc_ptr = jt_csc_ptr,
+        jt_csc_ptrptr = jt_csc_ptrptr,
+        diag_map_to = diag_map_to,
+        diag_map_fr = diag_map_fr,
+    )
+end
+
+function get_diagonal_mapping(colptr, rowval)
+    nnz = length(rowval)
+    if nnz == 0
+        return similar(colptr, 0), similar(colptr, 0)
+    end
+    inds1 = findall(
+        map(
+            (x, y) -> ((x <= nnz) && (x != y)),
+            @view(colptr[1:end-1]),
+            @view(colptr[2:end])
+        ),
+    )
+    if length(inds1) == 0
+        return similar(rows, 0), similar(ptrs, 0)
+    end
+    ptrs = colptr[inds1]
+    rows = rowval[ptrs]
+    inds2 = findall(inds1 .== rows)
+    if length(inds2) == 0
+        return similar(rows, 0), similar(ptrs, 0)
+    end
+
+    return rows[inds2], ptrs[inds2]
+end
+
+function MadNLP._sym_length(Jt::CUSPARSE.CuSparseMatrixCSC)
+    return mapreduce(
+        (x, y) -> begin
+            z = x - y
+            div(z^2 + z, 2)
+        end,
+        +,
+        @view(Jt.colPtr[2:end]),
+        @view(Jt.colPtr[1:end-1])
+    )
+end
+
+function MadNLP._first_and_last_col(sym2::CuVector, ptr2)
+    CUDA.@allowscalar begin
+        first = sym2[1][2]
+        last = sym2[ptr2[end]][2]
+    end
+    return (first, last)
+end
+
+MadNLP.nzval(H::CUSPARSE.CuSparseMatrixCSC) = H.nzVal
+
+function MadNLP._get_sparse_csc(dims, colptr::CuVector, rowval, nzval)
+    return CUSPARSE.CuSparseMatrixCSC(colptr, rowval, nzval, dims)
+end
+
+function getij(idx, n)
+    j = ceil(Int, ((2n + 1) - sqrt((2n + 1)^2 - 8 * idx)) / 2)
+    i = idx - div((j - 1) * (2n - j), 2)
+    return (i, j)
+end
+
+
+
+#=
+    MadNLP._set_colptr!
+=#
+
+@kernel function _set_colptr_kernel!(colptr, @Const(sym2), @Const(ptr2), @Const(guide))
+    idx = @index(Global)
+    @inbounds begin
+        i = ptr2[idx+1]
+
+        (~, prevcol) = sym2[i-1]
+        (row, col) = sym2[i]
+        g = guide[i]
+        for j in prevcol+1:col
+            colptr[j] = g
+        end
+    end
+end
+
+function MadNLP._set_colptr!(colptr::CuVector, ptr2, sym2, guide)
+    if length(ptr2) > 1 # otherwise error is thrown
+        _set_colptr_kernel!(CUDABackend())(
+            colptr,
+            sym2,
+            ptr2,
+            guide;
+            ndrange = length(ptr2) - 1,
+        )
+        synchronize(CUDABackend())
+    end
+    return
+end
+
+
+#=
+    MadNLP.tril_to_full!
+=#
+
+@kernel function _tril_to_full_kernel!(dense)
+    idx = @index(Global)
+    n = size(dense, 1)
+    i, j = getij(idx, n)
+    @inbounds dense[j, i] = dense[i, j]
+end
+
+function MadNLP.tril_to_full!(dense::CuMatrix{T}) where {T}
+    n = size(dense, 1)
+    _tril_to_full_kernel!(CUDABackend())(dense; ndrange = div(n^2 + n, 2))
+    synchronize(CUDABackend())
+    return
+end
+
+#=
+    MadNLP.force_lower_triangular!
+=#
+
+@kernel function _force_lower_triangular_kernel!(I, J)
+    i = @index(Global)
+
+    @inbounds if J[i] > I[i]
+        tmp = J[i]
+        J[i] = I[i]
+        I[i] = tmp
+    end
+end
+
+function MadNLP.force_lower_triangular!(I::CuVector{T}, J) where {T}
+    if !isempty(I)
+        _force_lower_triangular_kernel!(CUDABackend())(I, J; ndrange = length(I))
+        synchronize(CUDABackend())
+    end
+    return
+end
+
+#=
+    MadNLP.coo_to_csc
+=#
+
+@kernel function _set_coo_to_colptr_kernel!(colptr, @Const(coord))
+    index = @index(Global)
+
+    @inbounds begin
+        if index == 1
+            ((i2, j2), k2) = coord[index]
+            for k in 1:j2
+                colptr[k] = 1
+            end
+            if index == length(coord)
+                ip1 = index + 1
+                for k in j2+1:length(colptr)
+                    colptr[k] = ip1
+                end
+            end
+        else
+            ((i1, j1), k1) = coord[index-1]
+            ((i2, j2), k2) = coord[index]
+            if j1 != j2
+                for k in j1+1:j2
+                    colptr[k] = index
+                end
+            end
+            if index == length(coord)
+                ip1 = index + 1
+                for k in j2+1:length(colptr)
+                    colptr[k] = ip1
+                end
+            end
+        end
+    end
+end
+
+@kernel function _set_coo_to_csc_map_kernel!(cscmap, @Const(mapptr), @Const(coord))
+    index = @index(Global)
+    @inbounds for l in mapptr[index]:mapptr[index+1]-1
+        ((i, j), k) = coord[l]
+        cscmap[k] = index
+    end
+end
+
+function MadNLP.coo_to_csc(
+    coo::MadNLP.SparseMatrixCOO{T,I,VT,VI},
+) where {T,I,VT<:CuArray,VI<:CuArray}
+    coord = map((i, j, k) -> ((i, j), k), coo.I, coo.J, 1:length(coo.I))
+    if length(coord) > 0
+        sort!(coord, lt = (((i, j), k), ((n, m), l)) -> (j, i) < (m, n))
+    end
+
+    mapptr = MadNLP.getptr(coord; by = ((x1, x2), (y1, y2)) -> x1 != y1)
+
+    colptr = similar(coo.I, size(coo, 2) + 1)
+
+    coord_csc = coord[@view(mapptr[1:end-1])]
+
+    if length(coord_csc) > 0
+        _set_coo_to_colptr_kernel!(CUDABackend())(
+            colptr,
+            coord_csc,
+            ndrange = length(coord_csc),
+        )
+        synchronize(CUDABackend())
+    else
+        fill!(colptr, one(Int))
+    end
+
+    rowval = map(x -> x[1][1], coord_csc)
+    nzval = similar(rowval, T)
+
+    csc = CUSPARSE.CuSparseMatrixCSC(colptr, rowval, nzval, size(coo))
+
+    cscmap = similar(coo.I, Int)
+    if length(mapptr) > 1
+        _set_coo_to_csc_map_kernel!(CUDABackend())(
+            cscmap,
+            mapptr,
+            coord,
+            ndrange = length(mapptr) - 1,
+        )
+        synchronize(CUDABackend())
+    end
+
+    return csc, cscmap
+end
+
+#=
+    MadNLP.build_condensed_aug_coord!
+=#
+
+@kernel function _transfer_hessian_kernel!(y, @Const(ptr), @Const(x))
+    index = @index(Global)
+    @inbounds i, j = ptr[index]
+    @inbounds y[i] += x[j]
+end
+
+@kernel function _transfer_jtsj_kernel!(y, @Const(ptr), @Const(ptrptr), @Const(x), @Const(s))
+    index = @index(Global)
+    @inbounds for index2 in ptrptr[index]:ptrptr[index+1]-1
+        i, (j, k, l) = ptr[index2]
+        y[i] += s[j] * x[k] * x[l]
+    end
+end
+
+function MadNLP.build_condensed_aug_coord!(
+    kkt::MadNLP.AbstractCondensedKKTSystem{T,VT,MT},
+) where {T,VT,MT<:CUSPARSE.CuSparseMatrixCSC{T}}
+    fill!(kkt.aug_com.nzVal, zero(T))
+    if length(kkt.hptr) > 0
+        _transfer_hessian_kernel!(CUDABackend())(
+            kkt.aug_com.nzVal,
+            kkt.hptr,
+            kkt.hess_com.nzVal;
+            ndrange = length(kkt.hptr),
+        )
+        synchronize(CUDABackend())
+    end
+    if length(kkt.dptr) > 0
+        _transfer_hessian_kernel!(CUDABackend())(
+            kkt.aug_com.nzVal,
+            kkt.dptr,
+            kkt.pr_diag;
+            ndrange = length(kkt.dptr),
+        )
+        synchronize(CUDABackend())
+    end
+    if length(kkt.ext.jptrptr) > 1 # otherwise error is thrown
+        _transfer_jtsj_kernel!(CUDABackend())(
+            kkt.aug_com.nzVal,
+            kkt.jptr,
+            kkt.ext.jptrptr,
+            kkt.jt_csc.nzVal,
+            kkt.diag_buffer;
+            ndrange = length(kkt.ext.jptrptr) - 1,
+        )
+        synchronize(CUDABackend())
+    end
+    return
+end
+
+@kernel function _diag_operation_kernel!(
+    y,
+    @Const(A),
+    @Const(x),
+    @Const(alpha),
+    @Const(idx_to),
+    @Const(idx_fr)
+)
+    i = @index(Global)
+    @inbounds begin
+        to = idx_to[i]
+        fr = idx_fr[i]
+        y[to] -= alpha * A[fr] * x[to]
+    end
+end
+
+#=
+    MadNLP.compress_hessian! / MadNLP.compress_jacobian!
+=#
+
+@kernel function _transfer_to_csc_kernel!(y, @Const(ptr), @Const(ptrptr), @Const(x))
+    index = @index(Global)
+    @inbounds for index2 in ptrptr[index]:ptrptr[index+1]-1
+        i, j = ptr[index2]
+        y[i] += x[j]
+    end
+end
+
+function MadNLP.compress_hessian!(
+    kkt::MadNLP.AbstractSparseKKTSystem{T,VT,MT},
+) where {T,VT,MT<:CUSPARSE.CuSparseMatrixCSC{T,Int32}}
+    fill!(kkt.hess_com.nzVal, zero(T))
+    if length(kkt.ext.hess_com_ptrptr) > 1
+        _transfer_to_csc_kernel!(CUDABackend())(
+            kkt.hess_com.nzVal,
+            kkt.ext.hess_com_ptr,
+            kkt.ext.hess_com_ptrptr,
+            kkt.hess_raw.V;
+            ndrange = length(kkt.ext.hess_com_ptrptr) - 1,
+        )
+        synchronize(CUDABackend())
+    end
+    return
+end
+
+function MadNLP.compress_jacobian!(
+    kkt::MadNLP.SparseCondensedKKTSystem{T,VT,MT},
+) where {T,VT,MT<:CUDA.CUSOLVER.CuSparseMatrixCSC{T,Int32}}
+    fill!(kkt.jt_csc.nzVal, zero(T))
+    if length(kkt.ext.jt_csc_ptrptr) > 1 # otherwise error is thrown
+        _transfer_to_csc_kernel!(CUDABackend())(
+            kkt.jt_csc.nzVal,
+            kkt.ext.jt_csc_ptr,
+            kkt.ext.jt_csc_ptrptr,
+            kkt.jt_coo.V;
+            ndrange = length(kkt.ext.jt_csc_ptrptr) - 1,
+        )
+        synchronize(CUDABackend())
+    end
+    return
+end
+
+#=
+    MadNLP._set_con_scale_sparse!
+=#
+
+@kernel function _set_con_scale_sparse_kernel!(
+    con_scale,
+    @Const(ptr),
+    @Const(inds),
+    @Const(jac_I),
+    @Const(jac_buffer)
+)
+    index = @index(Global)
+
+    @inbounds begin
+        rng = ptr[index]:ptr[index+1]-1
+
+        for k in rng
+            (row, i) = inds[k]
+            con_scale[row] = max(con_scale[row], abs(jac_buffer[i]))
+        end
+    end
+end
+
+function MadNLP._set_con_scale_sparse!(
+    con_scale::VT,
+    jac_I,
+    jac_buffer,
+) where {T,VT<:CuVector{T}}
+    inds = map((i, j) -> (i, j), jac_I, 1:length(jac_I))
+    if !isempty(inds)
+        sort!(inds)
+    end
+    ptr = MadNLP.getptr(inds; by = ((x1, x2), (y1, y2)) -> x1 != y1)
+    if length(ptr) > 1
+        _set_con_scale_sparse_kernel!(CUDABackend())(
+            con_scale,
+            ptr,
+            inds,
+            jac_I,
+            jac_buffer;
+            ndrange = length(ptr) - 1,
+        )
+        synchronize(CUDABackend())
+    end
+    return
+end
+
+#=
+    MadNLP._build_condensed_aug_symbolic_hess
+=#
+
+@kernel function _build_condensed_aug_symbolic_hess_kernel!(
+    sym,
+    sym2,
+    @Const(colptr),
+    @Const(rowval)
+)
+    i = @index(Global)
+    @inbounds for j in colptr[i]:colptr[i+1]-1
+        c = rowval[j]
+        sym[j] = (0, j, 0)
+        sym2[j] = (c, i)
+    end
+end
+
+function MadNLP._build_condensed_aug_symbolic_hess(
+    H::CUSPARSE.CuSparseMatrixCSC{Tv,Ti},
+    sym,
+    sym2,
+) where {Tv,Ti}
+    if size(H, 2) > 0
+        _build_condensed_aug_symbolic_hess_kernel!(CUDABackend())(
+            sym,
+            sym2,
+            H.colPtr,
+            H.rowVal;
+            ndrange = size(H, 2),
+        )
+        synchronize(CUDABackend())
+    end
+    return
+end
+
+#=
+    MadNLP._build_condensed_aug_symbolic_jt
+=#
+
+@kernel function _build_condensed_aug_symbolic_jt_kernel!(
+    sym,
+    sym2,
+    @Const(colptr),
+    @Const(rowval),
+    @Const(offsets)
+)
+    i = @index(Global)
+    @inbounds begin
+        cnt = if i == 1
+            0
+        else
+            offsets[i-1]
+        end
+        for j in colptr[i]:colptr[i+1]-1
+            c1 = rowval[j]
+            for k in j:colptr[i+1]-1
+                c2 = rowval[k]
+                cnt += 1
+                sym[cnt] = (i, j, k)
+                sym2[cnt] = (c2, c1)
+            end
+        end
+    end
+end
+
+function MadNLP._build_condensed_aug_symbolic_jt(
+    Jt::CUSPARSE.CuSparseMatrixCSC{Tv,Ti},
+    sym,
+    sym2,
+) where {Tv,Ti}
+    if size(Jt, 2) > 0
+        _offsets = map(
+            (i, j) -> div((j - i)^2 + (j - i), 2),
+            @view(Jt.colPtr[1:end-1]),
+            @view(Jt.colPtr[2:end])
+        )
+        offsets = cumsum(_offsets)
+        _build_condensed_aug_symbolic_jt_kernel!(CUDABackend())(
+            sym,
+            sym2,
+            Jt.colPtr,
+            Jt.rowVal,
+            offsets;
+            ndrange = size(Jt, 2),
+        )
+        synchronize(CUDABackend())
+    end
+    return
+end
+
diff --git a/lib/MadNLPGPU/src/LinearSolvers/cudss.jl b/lib/MadNLPGPU/src/LinearSolvers/cudss.jl
new file mode 100644
index 00000000..07e2fa4c
--- /dev/null
+++ b/lib/MadNLPGPU/src/LinearSolvers/cudss.jl
@@ -0,0 +1,119 @@
+import CUDSS
+
+@kwdef mutable struct CudssSolverOptions <: MadNLP.AbstractOptions
+    # Use LDLᵀ by default in CUDSS as Cholesky can lead to undefined behavior.
+    cudss_algorithm::MadNLP.LinearFactorization = MadNLP.LDL
+    ordering::ORDERING = DEFAULT_ORDERING
+    perm::Vector{Cint} = Cint[]
+    ir::Int = 0
+    hybrid::Bool = false
+end
+
+mutable struct CUDSSSolver{T} <: MadNLP.AbstractLinearSolver{T}
+    inner::Union{Nothing, CUDSS.CudssSolver}
+    tril::CUSPARSE.CuSparseMatrixCSC{T}
+    x_gpu::CUDA.CuVector{T}
+    b_gpu::CUDA.CuVector{T}
+
+    opt::CudssSolverOptions
+    logger::MadNLP.MadNLPLogger
+end
+
+function CUDSSSolver(
+    csc::CUSPARSE.CuSparseMatrixCSC{T};
+    opt=CudssSolverOptions(),
+    logger=MadNLP.MadNLPLogger(),
+) where T
+    n, m = size(csc)
+    @assert n == m
+
+    view = 'U'
+    structure = if opt.cudss_algorithm == MadNLP.LU
+        "G"
+    elseif opt.cudss_algorithm == MadNLP.CHOLESKY
+        "SPD"
+    elseif opt.cudss_algorithm == MadNLP.LDL
+        "S"
+    end
+
+    matrix = CUDSS.CudssMatrix(
+        CUSPARSE.CuSparseMatrixCSR(csc.colPtr, csc.rowVal, csc.nzVal, csc.dims),
+        structure,
+        view
+    )
+
+    # TODO: pass config options here.
+    config = CUDSS.CudssConfig()
+    data = CUDSS.CudssData()
+    solver = CUDSS.CudssSolver(matrix, config, data)
+
+    if opt.ordering != DEFAULT_ORDERING
+        if opt.ordering == METIS_ORDERING
+            A = SparseMatrixCSC(csc)
+            A = A + A' - LinearAlgebra.Diagonal(A)
+            G = Metis.graph(A, check_hermitian=false)
+            opt.perm, _ = Metis.permutation(G)
+        elseif opt.ordering == AMD_ORDERING
+            A = SparseMatrixCSC(csc)
+            opt.perm = AMD.amd(A)
+        elseif opt.ordering == USER_ORDERING
+            (!isempty(opt.perm) && isperm(opt.perm)) || error("The vector opt.perm is not a valid permutation.")
+        else
+            error("The ordering $(opt.ordering) is not supported.")
+        end
+        CUDSS.cudss_set(solver, "user_perm", opt.perm)
+    end
+    (opt.ir > 0) && CUDSS.cudss_set(solver, "ir_n_steps", opt.ir)
+    opt.hybrid && CUDSS.cudss_set(solver, "hybrid_mode", 1)
+
+    x_gpu = CUDA.zeros(T, n)
+    b_gpu = CUDA.zeros(T, n)
+
+    CUDSS.cudss("analysis", solver, x_gpu, b_gpu)
+
+    return CUDSSSolver(
+        solver, csc,
+        # full, tril_to_full_view,
+        x_gpu, b_gpu,
+        opt, logger
+    )
+end
+
+function MadNLP.factorize!(M::CUDSSSolver)
+    # copyto!(M.full.nzVal, M.tril_to_full_view)
+    CUDSS.cudss_set(M.inner.matrix, nonzeros(M.tril))
+    CUDSS.cudss("factorization", M.inner, M.x_gpu, M.b_gpu)
+    synchronize(CUDABackend())
+    return M
+end
+
+function MadNLP.solve!(M::CUDSSSolver{T}, x) where T
+    CUDSS.cudss("solve", M.inner, M.x_gpu, x)
+    synchronize(CUDABackend())
+    copyto!(x, M.x_gpu)
+    return x
+end
+
+MadNLP.input_type(::Type{CUDSSSolver}) = :csc
+MadNLP.default_options(::Type{CUDSSSolver}) = CudssSolverOptions()
+MadNLP.is_inertia(M::CUDSSSolver) = (M.opt.cudss_algorithm ∈ (MadNLP.CHOLESKY, MadNLP.LDL))
+function inertia(M::CUDSSSolver)
+    n = size(M.tril, 1)
+    if M.opt.cudss_algorithm == MadNLP.CHOLESKY
+        info = CUDSS.cudss_get(M.inner, "info")
+        if info == 0
+            return (n, 0, 0)
+        else
+            return (0, n, 0)
+        end
+    elseif M.opt.cudss_algorithm == MadNLP.LDL
+        # N.B.: cuDSS does not always return the correct inertia.
+        (k, l) = CUDSS.cudss_get(M.inner, "inertia")
+        k = min(n, k) # TODO: add safeguard for inertia
+        return (k, n - k - l, l)
+    end
+end
+MadNLP.improve!(M::CUDSSSolver) = false
+MadNLP.is_supported(::Type{CUDSSSolver},::Type{Float32}) = true
+MadNLP.is_supported(::Type{CUDSSSolver},::Type{Float64}) = true
+MadNLP.introduce(M::CUDSSSolver) = "cuDSS v$(CUDSS.version())"
diff --git a/lib/MadNLPGPU/src/LinearSolvers/cusolverrf.jl b/lib/MadNLPGPU/src/LinearSolvers/cusolverrf.jl
new file mode 100644
index 00000000..74d10526
--- /dev/null
+++ b/lib/MadNLPGPU/src/LinearSolvers/cusolverrf.jl
@@ -0,0 +1,323 @@
+# MIT License
+
+# Copyright (c) 2020 Exanauts
+
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+#
+const CuSubVector{T} = SubArray{
+    T,
+    1,
+    CUDA.CuArray{T,1,CUDA.Mem.DeviceBuffer},
+    Tuple{CUDA.CuArray{Int64,1,CUDA.Mem.DeviceBuffer}},
+    false,
+}
+
+#=
+    cusolverRF
+=#
+
+@kwdef mutable struct RFSolverOptions <: MadNLP.AbstractOptions
+    rf_symbolic_analysis::Symbol = :klu
+    rf_fast_mode::Bool = true
+    rf_pivot_tol::Float64 = 1e-14
+    rf_boost::Float64 = 1e-14
+    rf_factorization_algo::CUSOLVER.cusolverRfFactorization_t =
+        CUSOLVER.CUSOLVERRF_FACTORIZATION_ALG0
+    rf_triangular_solve_algo::CUSOLVER.cusolverRfTriangularSolve_t =
+        CUSOLVER.CUSOLVERRF_TRIANGULAR_SOLVE_ALG1
+end
+
+mutable struct RFSolver{T} <: MadNLP.AbstractLinearSolver{T}
+    inner::Union{Nothing,CUSOLVERRF.RFLowLevel}
+
+    tril::CUSPARSE.CuSparseMatrixCSC{T}
+    full::CUSPARSE.CuSparseMatrixCSR{T}
+    tril_to_full_view::CuSubVector{T}
+    buffer::CUDA.CuVector{T}
+
+    opt::RFSolverOptions
+    logger::MadNLP.MadNLPLogger
+end
+
+function RFSolver(
+    csc::CUSPARSE.CuSparseMatrixCSC;
+    opt = RFSolverOptions(),
+    logger = MadNLP.MadNLPLogger(),
+)
+    n, m = size(csc)
+    @assert n == m
+
+    full, tril_to_full_view = MadNLP.get_tril_to_full(csc)
+
+    full = CUSPARSE.CuSparseMatrixCSR(full.colPtr, full.rowVal, full.nzVal, full.dims)
+
+    return RFSolver(
+        nothing,
+        csc,
+        full,
+        tril_to_full_view,
+        similar(csc.nzVal, 1),
+        opt,
+        logger,
+    )
+end
+
+function MadNLP.factorize!(M::RFSolver)
+    copyto!(M.full.nzVal, M.tril_to_full_view)
+    if M.inner == nothing
+        sym_lu = CUSOLVERRF.klu_symbolic_analysis(M.full)
+        M.inner = CUSOLVERRF.RFLowLevel(
+            sym_lu;
+            fast_mode = M.opt.rf_fast_mode,
+            factorization_algo = M.opt.rf_factorization_algo,
+            triangular_algo = M.opt.rf_triangular_solve_algo,
+            # nboost=M.opt.rf_boost,
+            # nzero=M.opt.rf_pivot_tol,
+        )
+    end
+    CUSOLVERRF.rf_refactor!(M.inner, M.full)
+    return M
+end
+
+function MadNLP.solve!(M::RFSolver{T}, x) where {T}
+    CUSOLVERRF.rf_solve!(M.inner, x)
+    # this is necessary to not distort the timing in MadNLP
+    copyto!(M.buffer, M.buffer)
+    synchronize(CUDABackend())
+    # -----------------------------------------------------
+    return x
+end
+
+MadNLP.input_type(::Type{RFSolver}) = :csc
+MadNLP.default_options(::Type{RFSolver}) = RFSolverOptions()
+MadNLP.is_inertia(M::RFSolver) = false
+MadNLP.improve!(M::RFSolver) = false
+MadNLP.is_supported(::Type{RFSolver}, ::Type{Float32}) = true
+MadNLP.is_supported(::Type{RFSolver}, ::Type{Float64}) = true
+MadNLP.introduce(M::RFSolver) = "cuSolverRF"
+
+#=
+    GLU
+=#
+
+@kwdef mutable struct GLUSolverOptions <: MadNLP.AbstractOptions
+    glu_symbolic_analysis::Symbol = :klu
+end
+
+mutable struct GLUSolver{T} <: MadNLP.AbstractLinearSolver{T}
+    inner::Union{Nothing,CUSOLVERRF.GLULowLevel}
+
+    tril::CUSPARSE.CuSparseMatrixCSC{T}
+    full::CUSPARSE.CuSparseMatrixCSR{T}
+    tril_to_full_view::CuSubVector{T}
+    buffer::CUDA.CuVector{T}
+
+    opt::GLUSolverOptions
+    logger::MadNLP.MadNLPLogger
+end
+
+function GLUSolver(
+    csc::CUSPARSE.CuSparseMatrixCSC;
+    opt = GLUSolverOptions(),
+    logger = MadNLP.MadNLPLogger(),
+)
+    n, m = size(csc)
+    @assert n == m
+
+    full, tril_to_full_view = MadNLP.get_tril_to_full(csc)
+
+    full = CUSPARSE.CuSparseMatrixCSR(full.colPtr, full.rowVal, full.nzVal, full.dims)
+
+    return GLUSolver(
+        nothing,
+        csc,
+        full,
+        tril_to_full_view,
+        similar(csc.nzVal, 1),
+        opt,
+        logger,
+    )
+end
+
+function MadNLP.factorize!(M::GLUSolver)
+    copyto!(M.full.nzVal, M.tril_to_full_view)
+    if M.inner == nothing
+        sym_lu = CUSOLVERRF.klu_symbolic_analysis(M.full)
+        M.inner = CUSOLVERRF.GLULowLevel(sym_lu)
+    end
+    CUSOLVERRF.glu_refactor!(M.inner, M.full)
+    return M
+end
+
+function MadNLP.solve!(M::GLUSolver{T}, x) where {T}
+    CUSOLVERRF.glu_solve!(M.inner, x)
+    # this is necessary to not distort the timing in MadNLP
+    copyto!(M.buffer, M.buffer)
+    synchronize(CUDABackend())
+    # -----------------------------------------------------
+    return x
+end
+
+MadNLP.input_type(::Type{GLUSolver}) = :csc
+MadNLP.default_options(::Type{GLUSolver}) = GLUSolverOptions()
+MadNLP.is_inertia(M::GLUSolver) = false
+MadNLP.improve!(M::GLUSolver) = false
+MadNLP.is_supported(::Type{GLUSolver}, ::Type{Float32}) = true
+MadNLP.is_supported(::Type{GLUSolver}, ::Type{Float64}) = true
+MadNLP.introduce(M::GLUSolver) = "GLU"
+
+#=
+    Undocumented Cholesky Solver
+=#
+
+@enum ORDERING begin
+    DEFAULT_ORDERING = 0
+    METIS_ORDERING = 1
+    AMD_ORDERING = 2
+    USER_ORDERING = 3
+end
+@kwdef mutable struct CuCholeskySolverOptions <: MadNLP.AbstractOptions
+    ordering::ORDERING = METIS_ORDERING
+end
+
+mutable struct CuCholeskySolver{T} <: MadNLP.AbstractLinearSolver{T}
+    inner::Union{Nothing,CUSOLVER.SparseCholesky}
+
+    tril::CUSPARSE.CuSparseMatrixCSC{T}
+    full::CUSPARSE.CuSparseMatrixCSR{T}
+    tril_to_full_view::CuSubVector{T}
+    buffer::CUDA.CuVector{T}
+
+    fullp::CUSPARSE.CuSparseMatrixCSR{T}
+    p::CUDA.CuVector{Int}
+    pnzval::CUDA.CuVector{Int}
+    rhs::CUDA.CuVector{T}
+
+    singularity::Bool
+
+    opt::CuCholeskySolverOptions
+    logger::MadNLP.MadNLPLogger
+end
+
+function CuCholeskySolver(
+    csc::CUSPARSE.CuSparseMatrixCSC;
+    opt = CuCholeskySolverOptions(),
+    logger = MadNLP.MadNLPLogger(),
+)
+    n, m = size(csc)
+    @assert n == m
+
+    full, tril_to_full_view = MadNLP.get_tril_to_full(csc)
+    buffer = similar(csc.nzVal, 1)
+
+    full = CUSPARSE.CuSparseMatrixCSR(full.colPtr, full.rowVal, full.nzVal, full.dims)
+
+    full_cpu = SparseMatrixCSC(full)
+    full_cpu_order = SparseMatrixCSC(
+        n,
+        m,
+        full_cpu.colptr,
+        full_cpu.rowval,
+        Array(1:length(full_cpu.nzval)),
+    )
+
+    if opt.ordering == AMD_ORDERING
+        p = AMD.amd(full_cpu_order)
+    else
+        g = Metis.graph(full_cpu_order; check_hermitian = false)
+        p, ~ = Metis.permutation(g)
+    end
+
+    full_cpu_reorder = full_cpu_order[p, p]
+    pnzval = full_cpu_reorder.nzval
+
+    fullp = CUSPARSE.CuSparseMatrixCSR(
+        CuArray(full_cpu_reorder.colptr),
+        CuArray(full_cpu_reorder.rowval),
+        similar(full.nzVal),
+        (n, m),
+    )
+
+    rhs = similar(csc.nzVal, n)
+
+    return CuCholeskySolver(
+        nothing,
+        csc,
+        full,
+        tril_to_full_view,
+        buffer,
+        fullp,
+        CuArray{Int}(p),
+        CuArray{Int}(pnzval),
+        rhs,
+        false,
+        opt,
+        logger,
+    )
+end
+
+function MadNLP.factorize!(M::CuCholeskySolver)
+    copyto!(M.full.nzVal, M.tril_to_full_view)
+    _copy_from_map_kernel!(CUDABackend())(
+        M.fullp.nzVal,
+        M.full.nzVal,
+        M.pnzval;
+        ndrange = length(M.pnzval),
+    )
+    synchronize(CUDABackend())
+    if M.inner == nothing
+        M.inner = CUSOLVER.SparseCholesky(M.fullp)
+        CUSOLVER.spcholesky_buffer(M.inner, M.fullp)
+    end
+    try
+        CUSOLVER.spcholesky_factorise(
+            M.inner,
+            M.fullp,
+            eltype(M.fullp.nzVal) == Float32 ? 1e-6 : 1e-12,
+        )
+        M.singularity = false
+    catch e
+        M.singularity = true
+    end
+
+    return M
+end
+
+function MadNLP.solve!(M::CuCholeskySolver{T}, x) where {T}
+    _copy_from_map_kernel!(CUDABackend())(M.rhs, x, M.p; ndrange = length(M.p))
+    synchronize(CUDABackend())
+    CUSOLVER.spcholesky_solve(M.inner, M.rhs, x)
+    _copy_to_map_kernel!(CUDABackend())(M.rhs, M.p, x; ndrange = length(M.p))
+    synchronize(CUDABackend())
+    copyto!(x, M.rhs)
+    return x
+end
+
+function MadNLP.inertia(M::CuCholeskySolver{T}) where {T}
+    return !(M.singularity) ? (size(M.fullp, 1), 0, 0) : (size(M.fullp, 1) - 2, 1, 1)
+end
+
+MadNLP.input_type(::Type{CuCholeskySolver}) = :csc
+MadNLP.default_options(::Type{CuCholeskySolver}) = CuCholeskySolverOptions()
+MadNLP.is_inertia(M::CuCholeskySolver) = true
+MadNLP.improve!(M::CuCholeskySolver) = false
+MadNLP.is_supported(::Type{CuCholeskySolver}, ::Type{Float32}) = true
+MadNLP.is_supported(::Type{CuCholeskySolver}, ::Type{Float64}) = true
+MadNLP.introduce(M::CuCholeskySolver) = "cuSolverCholesky"
+
diff --git a/lib/MadNLPGPU/src/LinearSolvers/lapackgpu.jl b/lib/MadNLPGPU/src/LinearSolvers/lapackgpu.jl
new file mode 100644
index 00000000..43ddd69c
--- /dev/null
+++ b/lib/MadNLPGPU/src/LinearSolvers/lapackgpu.jl
@@ -0,0 +1,419 @@
+mutable struct LapackGPUSolver{T} <: AbstractLinearSolver{T}
+    A::AbstractMatrix{T}
+    fact::CuMatrix{T}
+    rhs::CuVector{T}
+    work::CuVector{T}
+    lwork::Any
+    work_host::Vector{T}
+    lwork_host::Any
+    info::CuVector{Int32}
+    etc::Dict{Symbol,Any} # throw some algorithm-specific things here
+    opt::LapackOptions
+    logger::MadNLPLogger
+end
+
+function LapackGPUSolver(
+    A::MT;
+    option_dict::Dict{Symbol,Any} = Dict{Symbol,Any}(),
+    opt = LapackOptions(),
+    logger = MadNLPLogger(),
+    kwargs...,
+) where {T,MT<:AbstractMatrix{T}}
+    set_options!(opt, option_dict, kwargs...)
+    fact = CuMatrix{T}(undef, size(A))
+    rhs = CuVector{T}(undef, size(A, 1))
+    work = CuVector{T}(undef, 1)
+    lwork = Int32[1]
+    work_host = Vector{T}(undef, 1)
+    lwork_host = Int32[1]
+    info = CuVector{Int32}(undef, 1)
+    etc = Dict{Symbol,Any}()
+
+    return LapackGPUSolver{T}(
+        A,
+        fact,
+        rhs,
+        work,
+        lwork,
+        work_host,
+        lwork_host,
+        info,
+        etc,
+        opt,
+        logger,
+    )
+end
+
+function factorize!(M::LapackGPUSolver)
+    if M.opt.lapack_algorithm == MadNLP.BUNCHKAUFMAN
+        factorize_bunchkaufman!(M)
+    elseif M.opt.lapack_algorithm == MadNLP.LU
+        factorize_lu!(M)
+    elseif M.opt.lapack_algorithm == MadNLP.QR
+        factorize_qr!(M)
+    elseif M.opt.lapack_algorithm == MadNLP.CHOLESKY
+        factorize_cholesky!(M)
+    else
+        error(LOGGER, "Invalid lapack_algorithm")
+    end
+end
+function solve!(M::LapackGPUSolver, x)
+    if M.opt.lapack_algorithm == MadNLP.BUNCHKAUFMAN
+        solve_bunchkaufman!(M, x)
+    elseif M.opt.lapack_algorithm == MadNLP.LU
+        solve_lu!(M, x)
+    elseif M.opt.lapack_algorithm == MadNLP.QR
+        solve_qr!(M, x)
+    elseif M.opt.lapack_algorithm == MadNLP.CHOLESKY
+        solve_cholesky!(M, x)
+    else
+        error(LOGGER, "Invalid lapack_algorithm")
+    end
+end
+
+improve!(M::LapackGPUSolver) = false
+introduce(M::LapackGPUSolver) = "Lapack-GPU ($(M.opt.lapack_algorithm))"
+
+for (
+    sytrf,
+    sytrf_buffer,
+    getrf,
+    getrf_buffer,
+    getrs,
+    geqrf,
+    geqrf_buffer,
+    ormqr,
+    ormqr_buffer,
+    trsm,
+    potrf,
+    potrf_buffer,
+    potrs,
+    typ,
+    cutyp,
+) in (
+    (
+        :cusolverDnDsytrf,
+        :cusolverDnDsytrf_bufferSize,
+        :cusolverDnDgetrf,
+        :cusolverDnDgetrf_bufferSize,
+        :cusolverDnDgetrs,
+        :cusolverDnDgeqrf,
+        :cusolverDnDgeqrf_bufferSize,
+        :cusolverDnDormqr,
+        :cusolverDnDormqr_bufferSize,
+        :cublasDtrsm_v2,
+        :cusolverDnDpotrf,
+        :cusolverDnDpotrf_bufferSize,
+        :cusolverDnDpotrs,
+        Float64,
+        CUDA.R_64F,
+    ),
+    (
+        :cusolverDnSsytrf,
+        :cusolverDnSsytrf_bufferSize,
+        :cusolverDnSgetrf,
+        :cusolverDnSgetrf_bufferSize,
+        :cusolverDnSgetrs,
+        :cusolverDnSgeqrf,
+        :cusolverDnSgeqrf_bufferSize,
+        :cusolverDnSormqr,
+        :cusolverDnSormqr_bufferSize,
+        :cublasStrsm_v2,
+        :cusolverDnSpotrf,
+        :cusolverDnSpotrf_bufferSize,
+        :cusolverDnSpotrs,
+        Float32,
+        CUDA.R_32F,
+    ),
+)
+    @eval begin
+        function factorize_bunchkaufman!(M::LapackGPUSolver{$typ})
+            haskey(M.etc, :ipiv) || (M.etc[:ipiv] = CuVector{Int32}(undef, size(M.A, 1)))
+            haskey(M.etc, :ipiv64) ||
+                (M.etc[:ipiv64] = CuVector{Int64}(undef, length(M.etc[:ipiv])))
+
+            transfer!(M.fact, M.A)
+            CUSOLVER.$sytrf_buffer(
+                dense_handle(),
+                Int32(size(M.fact, 1)),
+                M.fact,
+                Int32(size(M.fact, 2)),
+                M.lwork,
+            )
+            length(M.work) < M.lwork[] && resize!(M.work, Int(M.lwork[]))
+            CUSOLVER.$sytrf(
+                dense_handle(),
+                CUBLAS_FILL_MODE_LOWER,
+                Int32(size(M.fact, 1)),
+                M.fact,
+                Int32(size(M.fact, 2)),
+                M.etc[:ipiv],
+                M.work,
+                M.lwork[],
+                M.info,
+            )
+            return M
+        end
+
+        function solve_bunchkaufman!(M::LapackGPUSolver{$typ}, x)
+            copyto!(M.etc[:ipiv64], M.etc[:ipiv])
+            copyto!(M.rhs, x)
+            ccall(
+                (:cusolverDnXsytrs_bufferSize, libcusolver),
+                cusolverStatus_t,
+                (
+                    cusolverDnHandle_t,
+                    cublasFillMode_t,
+                    Int64,
+                    Int64,
+                    cudaDataType,
+                    CuPtr{Cdouble},
+                    Int64,
+                    CuPtr{Int64},
+                    cudaDataType,
+                    CuPtr{Cdouble},
+                    Int64,
+                    Ptr{Int64},
+                    Ptr{Int64},
+                ),
+                dense_handle(),
+                CUBLAS_FILL_MODE_LOWER,
+                size(M.fact, 1),
+                1,
+                $cutyp,
+                M.fact,
+                size(M.fact, 2),
+                M.etc[:ipiv64],
+                $cutyp,
+                M.rhs,
+                length(M.rhs),
+                M.lwork,
+                M.lwork_host,
+            )
+            length(M.work) < M.lwork[] && resize!(M.work, Int(M.lwork[]))
+            length(M.work_host) < M.lwork_host[] && resize!(work_host, Int(M.lwork_host[]))
+            ccall(
+                (:cusolverDnXsytrs, libcusolver),
+                cusolverStatus_t,
+                (
+                    cusolverDnHandle_t,
+                    cublasFillMode_t,
+                    Int64,
+                    Int64,
+                    cudaDataType,
+                    CuPtr{Cdouble},
+                    Int64,
+                    CuPtr{Int64},
+                    cudaDataType,
+                    CuPtr{Cdouble},
+                    Int64,
+                    CuPtr{Cdouble},
+                    Int64,
+                    Ptr{Cdouble},
+                    Int64,
+                    CuPtr{Int64},
+                ),
+                dense_handle(),
+                CUBLAS_FILL_MODE_LOWER,
+                size(M.fact, 1),
+                1,
+                $cutyp,
+                M.fact,
+                size(M.fact, 2),
+                M.etc[:ipiv64],
+                $cutyp,
+                M.rhs,
+                length(M.rhs),
+                M.work,
+                M.lwork[],
+                M.work_host,
+                M.lwork_host[],
+                M.info,
+            )
+            copyto!(x, M.rhs)
+
+            return x
+        end
+
+        function factorize_lu!(M::LapackGPUSolver{$typ})
+            haskey(M.etc, :ipiv) || (M.etc[:ipiv] = CuVector{Int32}(undef, size(M.A, 1)))
+            transfer!(M.fact, M.A)
+            tril_to_full!(M.fact)
+            CUSOLVER.$getrf_buffer(
+                dense_handle(),
+                Int32(size(M.fact, 1)),
+                Int32(size(M.fact, 2)),
+                M.fact,
+                Int32(size(M.fact, 2)),
+                M.lwork,
+            )
+            length(M.work) < M.lwork[] && resize!(M.work, Int(M.lwork[]))
+            CUSOLVER.$getrf(
+                dense_handle(),
+                Int32(size(M.fact, 1)),
+                Int32(size(M.fact, 2)),
+                M.fact,
+                Int32(size(M.fact, 2)),
+                M.work,
+                M.etc[:ipiv],
+                M.info,
+            )
+            return M
+        end
+
+        function solve_lu!(M::LapackGPUSolver{$typ}, x)
+            copyto!(M.rhs, x)
+            CUSOLVER.$getrs(
+                dense_handle(),
+                CUBLAS_OP_N,
+                Int32(size(M.fact, 1)),
+                Int32(1),
+                M.fact,
+                Int32(size(M.fact, 2)),
+                M.etc[:ipiv],
+                M.rhs,
+                Int32(length(M.rhs)),
+                M.info,
+            )
+            copyto!(x, M.rhs)
+            return x
+        end
+
+        function factorize_qr!(M::LapackGPUSolver{$typ})
+            haskey(M.etc, :tau) || (M.etc[:tau] = CuVector{$typ}(undef, size(M.A, 1)))
+            haskey(M.etc, :one) || (M.etc[:one] = ones($typ, 1))
+            transfer!(M.fact, M.A)
+            tril_to_full!(M.fact)
+            CUSOLVER.$geqrf_buffer(
+                dense_handle(),
+                Int32(size(M.fact, 1)),
+                Int32(size(M.fact, 2)),
+                M.fact,
+                Int32(size(M.fact, 2)),
+                M.lwork,
+            )
+            length(M.work) < M.lwork[] && resize!(M.work, Int(M.lwork[]))
+            CUSOLVER.$geqrf(
+                dense_handle(),
+                Int32(size(M.fact, 1)),
+                Int32(size(M.fact, 2)),
+                M.fact,
+                Int32(size(M.fact, 2)),
+                M.etc[:tau],
+                M.work,
+                M.lwork[],
+                M.info,
+            )
+            return M
+        end
+
+        function solve_qr!(M::LapackGPUSolver{$typ}, x)
+            copyto!(M.rhs, x)
+            CUSOLVER.$ormqr_buffer(
+                dense_handle(),
+                CUBLAS_SIDE_LEFT,
+                CUBLAS_OP_T,
+                Int32(size(M.fact, 1)),
+                Int32(1),
+                Int32(length(M.etc[:tau])),
+                M.fact,
+                Int32(size(M.fact, 2)),
+                M.etc[:tau],
+                M.rhs,
+                Int32(length(M.rhs)),
+                M.lwork,
+            )
+            length(M.work) < M.lwork[] && resize!(M.work, Int(M.lwork[]))
+            CUSOLVER.$ormqr(
+                dense_handle(),
+                CUBLAS_SIDE_LEFT,
+                CUBLAS_OP_T,
+                Int32(size(M.fact, 1)),
+                Int32(1),
+                Int32(length(M.etc[:tau])),
+                M.fact,
+                Int32(size(M.fact, 2)),
+                M.etc[:tau],
+                M.rhs,
+                Int32(length(M.rhs)),
+                M.work,
+                M.lwork[],
+                M.info,
+            )
+            CUBLAS.$trsm(
+                handle(),
+                CUBLAS_SIDE_LEFT,
+                CUBLAS_FILL_MODE_UPPER,
+                CUBLAS_OP_N,
+                CUBLAS_DIAG_NON_UNIT,
+                Int32(size(M.fact, 1)),
+                Int32(1),
+                M.etc[:one],
+                M.fact,
+                Int32(size(M.fact, 2)),
+                M.rhs,
+                Int32(length(M.rhs)),
+            )
+            copyto!(x, M.rhs)
+            return x
+        end
+
+        function factorize_cholesky!(M::LapackGPUSolver{$typ})
+            transfer!(M.fact, M.A)
+            CUSOLVER.$potrf_buffer(
+                dense_handle(),
+                CUBLAS_FILL_MODE_LOWER,
+                Int32(size(M.fact, 1)),
+                M.fact,
+                Int32(size(M.fact, 2)),
+                M.lwork,
+            )
+            length(M.work) < M.lwork[] && resize!(M.work, Int(M.lwork[]))
+            CUSOLVER.$potrf(
+                dense_handle(),
+                CUBLAS_FILL_MODE_LOWER,
+                Int32(size(M.fact, 1)),
+                M.fact,
+                Int32(size(M.fact, 2)),
+                M.work,
+                M.lwork[],
+                M.info,
+            )
+            return M
+        end
+
+        function solve_cholesky!(M::LapackGPUSolver{$typ}, x)
+            copyto!(M.rhs, x)
+            CUSOLVER.$potrs(
+                dense_handle(),
+                CUBLAS_FILL_MODE_LOWER,
+                Int32(size(M.fact, 1)),
+                Int32(1),
+                M.fact,
+                Int32(size(M.fact, 2)),
+                M.rhs,
+                Int32(length(M.rhs)),
+                M.info,
+            )
+            copyto!(x, M.rhs)
+            return x
+        end
+    end
+end
+
+is_inertia(M::LapackGPUSolver) = M.opt.lapack_algorithm == MadNLP.CHOLESKY  # TODO: implement inertia(M::LapackGPUSolver) for BUNCHKAUFMAN
+function inertia(M::LapackGPUSolver)
+    if M.opt.lapack_algorithm == MadNLP.BUNCHKAUFMAN
+        inertia(M.etc[:fact_cpu], M.etc[:ipiv_cpu], M.etc[:info_cpu][])
+    elseif M.opt.lapack_algorithm == MadNLP.CHOLESKY
+        sum(M.info) == 0 ? (size(M.fact, 1), 0, 0) : (0, size(M.fact, 1), 0)
+    else
+        error(LOGGER, "Invalid lapackcpu_algorithm")
+    end
+end
+
+input_type(::Type{LapackGPUSolver}) = :dense
+MadNLP.default_options(::Type{LapackGPUSolver}) = LapackOptions()
+is_supported(::Type{LapackGPUSolver}, ::Type{Float32}) = true
+is_supported(::Type{LapackGPUSolver}, ::Type{Float64}) = true
+
diff --git a/lib/MadNLPGPU/src/MadNLPGPU.jl b/lib/MadNLPGPU/src/MadNLPGPU.jl
index 3c328c5b..bf2c2188 100644
--- a/lib/MadNLPGPU/src/MadNLPGPU.jl
+++ b/lib/MadNLPGPU/src/MadNLPGPU.jl
@@ -1,16 +1,19 @@
 module MadNLPGPU
 
 import LinearAlgebra
+import SparseArrays: SparseMatrixCSC, nonzeros
+import LinearAlgebra: Symmetric
 # CUDA
-import CUDA: CUDA, CUBLAS, CUSOLVER, CuVector, CuMatrix, CuArray, R_64F, has_cuda, @allowscalar, runtime_version
+import CUDA: CUDA, CUSPARSE, CUBLAS, CUSOLVER, CuVector, CuMatrix, CuArray, R_64F,
+    has_cuda, @allowscalar, runtime_version, CUDABackend
 import .CUSOLVER:
     libcusolver, cusolverStatus_t, CuPtr, cudaDataType, cublasFillMode_t, cusolverDnHandle_t, dense_handle
 import .CUBLAS: handle, CUBLAS_DIAG_NON_UNIT,
     CUBLAS_FILL_MODE_LOWER, CUBLAS_FILL_MODE_UPPER, CUBLAS_SIDE_LEFT, CUBLAS_OP_N, CUBLAS_OP_T
+import CUSOLVERRF
 
 # Kernels
-import KernelAbstractions: @kernel, @index, wait, Event
-import CUDAKernels: CUDADevice
+import KernelAbstractions: @kernel, @index, synchronize, @Const
 
 import MadNLP: NLPModels
 import MadNLP
@@ -21,18 +24,42 @@ import MadNLP:
     introduce, factorize!, solve!, improve!, is_inertia, inertia, tril_to_full!,
     LapackOptions, input_type, is_supported, default_options, symul!
 
-symul!(y, A, x::CuVector{T}, α = 1., β = 0.) where T = CUBLAS.symv!('L', T(α), A, x, T(β), y)
-MadNLP._ger!(alpha::Number, x::CuVector{T}, y::CuVector{T}, A::CuMatrix{T}) where T = CUBLAS.ger!(alpha, x, y, A)
+# AMD and Metis
+import AMD, Metis
 
+include("utils.jl")
+include("KKT/dense.jl")
+include("KKT/sparse.jl")
+include("LinearSolvers/lapackgpu.jl")
+include("LinearSolvers/cusolverrf.jl")
+include("LinearSolvers/cudss.jl")
 
-include("kernels.jl")
-include("callbacks.jl")
+# option preset
+function MadNLP.MadNLPOptions(
+    nlp::AbstractNLPModel{T,VT};
+    dense_callback = MadNLP.is_dense_callback(nlp),
+    callback = dense_callback ? MadNLP.DenseCallback : MadNLP.SparseCallback,
+    kkt_system = dense_callback ? MadNLP.DenseCondensedKKTSystem : MadNLP.SparseCondensedKKTSystem,
+    linear_solver = dense_callback ? LapackGPUSolver : CUDSSSolver,
+    tol = MadNLP.get_tolerance(T,kkt_system),
+    ) where {T, VT <: CuVector{T}}
 
-export CuMadNLPSolver
+    return MadNLP.MadNLPOptions(
+        tol = tol,
+        callback = callback,
+        kkt_system = kkt_system,
+        linear_solver = linear_solver,
+    )
+end
 
-include("interface.jl")
-include("lapackgpu.jl")
+export LapackGPUSolver, CuCholeskySolver, RFSolver
 
-export LapackGPUSolver
+# re-export MadNLP, including deprecated names
+for name in names(MadNLP, all=true)
+    if Base.isexported(MadNLP, name)
+        @eval using MadNLP: $(name)
+        @eval export $(name)
+    end
+end
 
 end # module
diff --git a/lib/MadNLPGPU/src/callbacks.jl b/lib/MadNLPGPU/src/callbacks.jl
deleted file mode 100644
index 54ff09ec..00000000
--- a/lib/MadNLPGPU/src/callbacks.jl
+++ /dev/null
@@ -1,68 +0,0 @@
-import MadNLP: variable
-import CUDA.CUBLAS: axpy!
-
-function _init_buffer_bfgs!(kkt::MadNLP.AbstractKKTSystem{T, VT, MT, QN}, n, m) where {T, VT, MT, QN}
-    haskey(kkt.etc, :x_gh) || (kkt.etc[:x_gh] = zeros(T, n))
-    haskey(kkt.etc, :j_gh) || (kkt.etc[:j_gh] = zeros(T, n))
-    haskey(kkt.etc, :j_gd) || (kkt.etc[:j_gd] = VT(undef, n))
-    return
-end
-
-function MadNLP.eval_lag_hess_wrapper!(
-    solver::MadNLP.MadNLPSolver,
-    kkt::MadNLP.AbstractKKTSystem{T, VT, MT, QN},
-    x::MadNLP.PrimalVector{T},
-    l::Vector{T};
-    is_resto=false,
-) where {T, VT<:CuVector{T}, MT<:CuMatrix{T}, QN<:MadNLP.AbstractQuasiNewton{T, VT}}
-    nlp = solver.nlp
-    cnt = solver.cnt
-    MadNLP.@trace(solver.logger, "Update BFGS matrices.")
-
-    qn = kkt.quasi_newton
-    Bk = kkt.hess
-    sk, yk = qn.sk, qn.yk
-    n = length(qn.sk)
-    m = size(kkt.jac, 1)
-
-    # Load the buffers to transfer data between the host and the device.
-    _init_buffer_bfgs!(kkt, n, m)
-    x_g = get(kkt.etc, :x_gh, nothing)
-    j_g = get(kkt.etc, :j_gh, nothing) # on host
-    j_d = get(kkt.etc, :j_gd, nothing) # on device
-    # Init buffers.
-    copyto!(x_g, qn.last_x)
-    fill!(j_d, zero(T))
-    fill!(j_g, zero(T))
-
-    if cnt.obj_grad_cnt >= 2
-        # Build sk = x+ - x
-        copyto!(sk, 1, variable(solver.x), 1, n)   # sₖ = x₊
-        axpy!(n, -one(T), qn.last_x, sk)    # sₖ = x₊ - x
-
-        # Build yk = ∇L+ - ∇L
-        copyto!(yk, 1, variable(solver.f), 1, n)   # yₖ = ∇f₊
-        axpy!(n, -one(T), qn.last_g, yk)    # yₖ = ∇f₊ - ∇f
-        if m > 0
-            MadNLP.jtprod!(solver.jacl, kkt, l)
-            copyto!(j_d, 1, solver.jacl, 1, n)
-            yk .+= j_d                   # yₖ += J₊ᵀ l₊
-            NLPModels.jtprod!(nlp, x_g, l, j_g)
-            copyto!(qn.last_jv, j_g)
-            axpy!(n, -one(T), qn.last_jv, yk)        # yₖ += J₊ᵀ l₊ - Jᵀ l₊
-        end
-
-        if cnt.obj_grad_cnt == 2
-            MadNLP.init!(qn, Bk, sk, yk)
-        end
-        MadNLP.update!(qn, Bk, sk, yk)
-    end
-
-    # Backup data for next step
-    copyto!(qn.last_x, 1, variable(solver.x), 1, n)
-    copyto!(qn.last_g, 1, variable(solver.f), 1, n)
-
-    MadNLP.compress_hessian!(kkt)
-    return MadNLP.get_hessian(kkt)
-end
-
diff --git a/lib/MadNLPGPU/src/interface.jl b/lib/MadNLPGPU/src/interface.jl
deleted file mode 100644
index 65450c7a..00000000
--- a/lib/MadNLPGPU/src/interface.jl
+++ /dev/null
@@ -1,24 +0,0 @@
-function CuMadNLPSolver(nlp::AbstractNLPModel{T}; kwargs...) where T
-    opt_ipm, opt_linear_solver, logger = MadNLP.load_options(; linear_solver=LapackGPUSolver, kwargs...)
-
-    @assert is_supported(opt_ipm.linear_solver, T)
-    MT = CuMatrix{T}
-    VT = CuVector{T}
-    # Determine Hessian approximation
-    QN = if opt_ipm.hessian_approximation == MadNLP.DENSE_BFGS
-        MadNLP.BFGS{T, VT}
-    elseif opt_ipm.hessian_approximation == MadNLP.DENSE_DAMPED_BFGS
-        MadNLP.DampedBFGS{T, VT}
-    else
-        MadNLP.ExactHessian{T, VT}
-    end
-    KKTSystem = if (opt_ipm.kkt_system == MadNLP.SPARSE_KKT_SYSTEM) || (opt_ipm.kkt_system == MadNLP.SPARSE_UNREDUCED_KKT_SYSTEM)
-        error("Sparse KKT system are currently not supported on CUDA GPU.\n" *
-            "Please use `DENSE_KKT_SYSTEM` or `DENSE_CONDENSED_KKT_SYSTEM` instead.")
-    elseif opt_ipm.kkt_system == MadNLP.DENSE_KKT_SYSTEM
-        MadNLP.DenseKKTSystem{T, VT, MT, QN}
-    elseif opt_ipm.kkt_system == MadNLP.DENSE_CONDENSED_KKT_SYSTEM
-        MadNLP.DenseCondensedKKTSystem{T, VT, MT, QN}
-    end
-    return MadNLP.MadNLPSolver{T,KKTSystem}(nlp, opt_ipm, opt_linear_solver; logger=logger)
-end
diff --git a/lib/MadNLPGPU/src/kernels.jl b/lib/MadNLPGPU/src/kernels.jl
deleted file mode 100644
index b278a1f0..00000000
--- a/lib/MadNLPGPU/src/kernels.jl
+++ /dev/null
@@ -1,307 +0,0 @@
-#=
-    MadNLP utils
-=#
-
-@kernel function _copy_diag!(dest, src)
-    i = @index(Global)
-    dest[i] = src[i, i]
-end
-
-function MadNLP.diag!(dest::CuVector{T}, src::CuMatrix{T}) where T
-    @assert length(dest) == size(src, 1)
-    ev = _copy_diag!(CUDADevice())(dest, src, ndrange=length(dest))
-    wait(ev)
-end
-
-@kernel function _add_diagonal!(dest, src1, src2)
-    i = @index(Global)
-    dest[i, i] = src1[i] + src2[i]
-end
-
-function MadNLP.diag_add!(dest::CuMatrix, src1::CuVector, src2::CuVector)
-    ev = _add_diagonal!(CUDADevice())(dest, src1, src2, ndrange=size(dest, 1))
-    wait(ev)
-end
-
-#=
-   Contiguous views do not dispatch to the correct copyto! kernel
-   in CUDA.jl. To avoid fallback to the (slow) implementation in Julia Base,
-   we overload the copyto! operator locally
-=#
-_copyto!(dest::CuArray, src::Array) = copyto!(dest, src)
-function _copyto!(dest::CuArray, src::SubArray)
-    @assert src.stride1 == 1 # src array should be one-strided
-    n = length(dest)
-    offset = src.offset1
-    p_src = parent(src)
-    copyto!(dest, 1, p_src, offset+1, n)
-end
-
-#=
-    MadNLP kernels
-=#
-
-# Overload MadNLP.is_valid to avoid fallback to default is_valid, slow on GPU
-MadNLP.is_valid(src::CuArray) = true
-
-# Constraint scaling
-function MadNLP.scale_constraints!(
-    nlp::AbstractNLPModel,
-    con_scale::AbstractVector,
-    jac::CuMatrix;
-    max_gradient=1e-8,
-)
-    # Compute reduction on the GPU with built-in CUDA.jl function
-    d_con_scale = maximum(abs, jac, dims=2)
-    copyto!(con_scale, d_con_scale)
-    con_scale .= min.(1.0, max_gradient ./ con_scale)
-end
-
-@kernel function _treat_fixed_variable_kernell!(dest, ind_fixed)
-    k, j = @index(Global, NTuple)
-    i = ind_fixed[k]
-
-    if i == j
-        dest[i, i] = 1.0
-    else
-        dest[i, j] = 0.0
-        dest[j, i] = 0.0
-    end
-end
-
-function MadNLP.treat_fixed_variable!(kkt::MadNLP.AbstractKKTSystem{T, VT, MT}) where {T, VT, MT<:CuMatrix{T}}
-    length(kkt.ind_fixed) == 0 && return
-    aug = kkt.aug_com
-    d_ind_fixed = kkt.ind_fixed |> CuVector # TODO: allocate ind_fixed directly on the GPU
-    ndrange = (length(d_ind_fixed), size(aug, 1))
-    ev = _treat_fixed_variable_kernell!(CUDADevice())(aug, d_ind_fixed, ndrange=ndrange)
-    wait(ev)
-end
-
-
-#=
-    AbstractDenseKKTSystem
-=#
-
-function MadNLP.jtprod!(y::AbstractVector, kkt::MadNLP.AbstractDenseKKTSystem{T, VT, MT}, x::AbstractVector) where {T, VT<:CuVector{T}, MT<:CuMatrix{T}}
-    # Load buffers
-    m = size(kkt.jac, 1)
-    nx = size(kkt.jac, 2)
-    ns = length(kkt.ind_ineq)
-    haskey(kkt.etc, :jac_w1) || (kkt.etc[:jac_w1] = CuVector{T}(undef, m))
-    haskey(kkt.etc, :jac_w2) || (kkt.etc[:jac_w2] = CuVector{T}(undef, nx))
-    haskey(kkt.etc, :jac_w3) || (kkt.etc[:jac_w3] = CuVector{T}(undef, ns))
-
-    d_x = kkt.etc[:jac_w1]::VT
-    d_yx = kkt.etc[:jac_w2]::VT
-    d_ys = kkt.etc[:jac_w3]::VT
-
-    # x and y can be host arrays. Copy them on the device to avoid side effect.
-    _copyto!(d_x, x)
-
-    # / x
-    LinearAlgebra.mul!(d_yx, kkt.jac', d_x)
-    copyto!(parent(y), 1, d_yx, 1, nx)
-
-    # / s
-    d_ys .= -d_x[kkt.ind_ineq] .* kkt.constraint_scaling[kkt.ind_ineq]
-    copyto!(parent(y), nx+1, d_ys, 1, ns)
-    return
-end
-
-function MadNLP.set_aug_diagonal!(kkt::MadNLP.AbstractDenseKKTSystem{T, VT, MT}, solver::MadNLP.MadNLPSolver) where {T, VT<:CuVector{T}, MT<:CuMatrix{T}}
-    haskey(kkt.etc, :pr_diag_host) || (kkt.etc[:pr_diag_host] = Vector{T}(undef, length(kkt.pr_diag)))
-    pr_diag_h = kkt.etc[:pr_diag_host]::Vector{T}
-    x = MadNLP.full(solver.x)
-    zl = MadNLP.full(solver.zl)
-    zu = MadNLP.full(solver.zu)
-    xl = MadNLP.full(solver.xl)
-    xu = MadNLP.full(solver.xu)
-    # Broadcast is not working as MadNLP array are allocated on the CPU,
-    # whereas pr_diag is allocated on the GPU
-    pr_diag_h .= zl./(x.-xl) .+ zu./(xu.-x)
-    copyto!(kkt.pr_diag, pr_diag_h)
-    fill!(kkt.du_diag, 0.0)
-end
-
-#=
-    DenseKKTSystem kernels
-=#
-
-function LinearAlgebra.mul!(y::AbstractVector, kkt::MadNLP.DenseKKTSystem{T, VT, MT}, x::AbstractVector) where {T, VT<:CuVector{T}, MT<:CuMatrix{T}}
-    # Load buffers
-    haskey(kkt.etc, :hess_w1) || (kkt.etc[:hess_w1] = CuVector{T}(undef, size(kkt.aug_com, 1)))
-    haskey(kkt.etc, :hess_w2) || (kkt.etc[:hess_w2] = CuVector{T}(undef, size(kkt.aug_com, 1)))
-
-    d_x = kkt.etc[:hess_w1]::VT
-    d_y = kkt.etc[:hess_w2]::VT
-
-    # x and y can be host arrays. Copy them on the device to avoid side effect.
-    copyto!(d_x, x)
-    symul!(d_y, kkt.aug_com, d_x)
-    copyto!(y, d_y)
-end
-function LinearAlgebra.mul!(y::MadNLP.ReducedKKTVector, kkt::MadNLP.DenseKKTSystem{T, VT, MT}, x::MadNLP.ReducedKKTVector) where {T, VT<:CuVector{T}, MT<:CuMatrix{T}}
-    LinearAlgebra.mul!(MadNLP.full(y), kkt, MadNLP.full(x))
-end
-
-@kernel function _build_dense_kkt_system_kernel!(
-    dest, hess, jac, pr_diag, du_diag, diag_hess, ind_ineq, con_scale, n, m, ns
-)
-    i, j = @index(Global, NTuple)
-    if (i <= n)
-        # Transfer Hessian
-        if (i == j)
-            dest[i, i] = pr_diag[i] + diag_hess[i]
-        else
-            dest[i, j] = hess[i, j]
-        end
-    elseif i <= n + ns
-        # Transfer slack diagonal
-        dest[i, i] = pr_diag[i]
-        # Transfer Jacobian wrt slack
-        js = i - n
-        is = ind_ineq[js]
-        dest[is + n + ns, is + n] = - con_scale[is]
-        dest[is + n, is + n + ns] = - con_scale[is]
-    elseif i <= n + ns + m
-        # Transfer Jacobian wrt variable x
-        i_ = i - n - ns
-        dest[i, j] = jac[i_, j]
-        dest[j, i] = jac[i_, j]
-        # Transfer dual regularization
-        dest[i, i] = du_diag[i_]
-    end
-end
-
-function MadNLP._build_dense_kkt_system!(
-    dest::CuMatrix, hess::CuMatrix, jac::CuMatrix,
-    pr_diag::CuVector, du_diag::CuVector, diag_hess::CuVector, ind_ineq, con_scale, n, m, ns
-)
-    ind_ineq_gpu = ind_ineq |> CuArray
-    ndrange = (n+m+ns, n)
-    ev = _build_dense_kkt_system_kernel!(CUDADevice())(
-        dest, hess, jac, pr_diag, du_diag, diag_hess, ind_ineq_gpu, con_scale, n, m, ns,
-        ndrange=ndrange
-    )
-    wait(ev)
-end
-
-
-#=
-    DenseCondensedKKTSystem
-=#
-function MadNLP.get_slack_regularization(kkt::MadNLP.DenseCondensedKKTSystem{T, VT, MT}) where {T, VT<:CuVector{T}, MT<:CuMatrix{T}}
-    n, ns = MadNLP.num_variables(kkt), kkt.n_ineq
-    return view(kkt.pr_diag, n+1:n+ns) |> Array
-end
-function MadNLP.get_scaling_inequalities(kkt::MadNLP.DenseCondensedKKTSystem{T, VT, MT}) where {T, VT<:CuVector{T}, MT<:CuMatrix{T}}
-    return kkt.constraint_scaling[kkt.ind_ineq] |> Array
-end
-
-@kernel function _build_jacobian_condensed_kernel!(
-    dest, jac, pr_diag, ind_ineq, con_scale, n, m_ineq,
-)
-    i, j = @index(Global, NTuple)
-    is = ind_ineq[i]
-    @inbounds dest[i, j] = jac[is, j] * sqrt(pr_diag[n+i]) / con_scale[is]
-end
-
-function MadNLP._build_ineq_jac!(
-    dest::CuMatrix, jac::CuMatrix, pr_diag::CuVector,
-    ind_ineq::AbstractVector, ind_fixed::AbstractVector, con_scale::CuVector, n, m_ineq,
-)
-    (m_ineq == 0) && return # nothing to do if no ineq. constraints
-    ind_ineq_gpu = ind_ineq |> CuArray
-    ndrange = (m_ineq, n)
-    ev = _build_jacobian_condensed_kernel!(CUDADevice())(
-        dest, jac, pr_diag, ind_ineq_gpu, con_scale, n, m_ineq,
-        ndrange=ndrange, dependencies=Event(CUDADevice()),
-    )
-    wait(ev)
-    # need to zero the fixed components
-    dest[:, ind_fixed] .= 0.0
-    return
-end
-
-@kernel function _build_condensed_kkt_system_kernel!(
-    dest, hess, jac, pr_diag, du_diag, ind_eq, n, m_eq,
-)
-    i, j = @index(Global, NTuple)
-
-    # Transfer Hessian
-    if i <= n
-        if i == j
-            @inbounds dest[i, i] += pr_diag[i] + hess[i, i]
-        else
-            @inbounds dest[i, j] += hess[i, j]
-        end
-    elseif i <= n + m_eq
-        i_ = i - n
-        @inbounds is = ind_eq[i_]
-        # Jacobian / equality
-        @inbounds dest[i_ + n, j] = jac[is, j]
-        @inbounds dest[j, i_ + n] = jac[is, j]
-        # Transfer dual regularization
-        @inbounds dest[i_ + n, i_ + n] = du_diag[is]
-    end
-end
-
-function MadNLP._build_condensed_kkt_system!(
-    dest::CuMatrix, hess::CuMatrix, jac::CuMatrix,
-    pr_diag::CuVector, du_diag::CuVector, ind_eq::AbstractVector, n, m_eq,
-)
-    ind_eq_gpu = ind_eq |> CuArray
-    ndrange = (n + m_eq, n)
-    ev = _build_condensed_kkt_system_kernel!(CUDADevice())(
-        dest, hess, jac, pr_diag, du_diag, ind_eq_gpu, n, m_eq,
-        ndrange=ndrange, dependencies=Event(CUDADevice()),
-    )
-    wait(ev)
-end
-
-function LinearAlgebra.mul!(y::AbstractVector, kkt::MadNLP.DenseCondensedKKTSystem{T, VT, MT}, x::AbstractVector) where {T, VT<:CuVector{T}, MT<:CuMatrix{T}}
-    if length(y) == length(x) == size(kkt.aug_com, 1)
-        # Load buffers
-        haskey(kkt.etc, :hess_w1) || (kkt.etc[:hess_w1] = CuVector{T}(undef, size(kkt.aug_com, 1)))
-        haskey(kkt.etc, :hess_w2) || (kkt.etc[:hess_w2] = CuVector{T}(undef, size(kkt.aug_com, 1)))
-
-        d_x = kkt.etc[:hess_w1]::VT
-        d_y = kkt.etc[:hess_w2]::VT
-
-        # Call parent() as CUDA does not dispatch on proper copyto! when passed a view
-        copyto!(d_x, 1, parent(x), 1, length(x))
-        symul!(d_y,  kkt.aug_com, d_x)
-        copyto!(y, d_y)
-    else
-        # Load buffers
-        haskey(kkt.etc, :hess_w3) || (kkt.etc[:hess_w3] = CuVector{T}(undef, length(x)))
-        haskey(kkt.etc, :hess_w4) || (kkt.etc[:hess_w4] = CuVector{T}(undef, length(y)))
-
-        d_x = kkt.etc[:hess_w3]::VT
-        d_y = kkt.etc[:hess_w4]::VT
-
-        # Call parent() as CUDA does not dispatch on proper copyto! when passed a view
-        copyto!(d_x, 1, parent(x), 1, length(x))
-        MadNLP._mul_expanded!(d_y, kkt, d_x)
-        copyto!(y, d_y)
-    end
-end
-function LinearAlgebra.mul!(y::MadNLP.ReducedKKTVector, kkt::MadNLP.DenseCondensedKKTSystem{T, VT, MT}, x::MadNLP.ReducedKKTVector) where {T, VT<:CuVector{T}, MT<:CuMatrix{T}}
-    LinearAlgebra.mul!(MadNLP.full(y), kkt, MadNLP.full(x))
-end
-
-function MadNLP.jprod_ineq!(y::AbstractVector, kkt::MadNLP.DenseCondensedKKTSystem{T, VT, MT}, x::AbstractVector) where {T, VT<:CuVector{T}, MT<:CuMatrix{T}}
-    # Create buffers
-    haskey(kkt.etc, :jac_ineq_w1) || (kkt.etc[:jac_ineq_w1] = CuVector{T}(undef, kkt.n_ineq))
-    haskey(kkt.etc, :jac_ineq_w2) || (kkt.etc[:jac_ineq_w2] = CuVector{T}(undef, size(kkt.jac_ineq, 2)))
-
-    y_d = kkt.etc[:jac_ineq_w1]::VT
-    x_d = kkt.etc[:jac_ineq_w2]::VT
-
-    # Call parent() as CUDA does not dispatch on proper copyto! when passed a view
-    copyto!(x_d, 1, parent(x), 1, length(x))
-    LinearAlgebra.mul!(y_d, kkt.jac_ineq, x_d)
-    copyto!(parent(y), 1, y_d, 1, length(y))
-end
-
diff --git a/lib/MadNLPGPU/src/lapackgpu.jl b/lib/MadNLPGPU/src/lapackgpu.jl
deleted file mode 100644
index ebea76a4..00000000
--- a/lib/MadNLPGPU/src/lapackgpu.jl
+++ /dev/null
@@ -1,218 +0,0 @@
-mutable struct LapackGPUSolver{T} <: AbstractLinearSolver{T}
-    dense::AbstractMatrix{T}
-    fact::CuMatrix{T}
-    rhs::CuVector{T}
-    work::CuVector{T}
-    lwork
-    work_host::Vector{T}
-    lwork_host
-    info::CuVector{Int32}
-    etc::Dict{Symbol,Any} # throw some algorithm-specific things here
-    opt::LapackOptions
-    logger::MadNLPLogger
-end
-
-
-function LapackGPUSolver(
-    dense::MT;
-    option_dict::Dict{Symbol,Any}=Dict{Symbol,Any}(),
-    opt=LapackOptions(),logger=MadNLPLogger(),
-    kwargs...) where {T,MT <: AbstractMatrix{T}}
-
-    set_options!(opt,option_dict,kwargs...)
-    fact = CuMatrix{T}(undef,size(dense))
-    rhs = CuVector{T}(undef,size(dense,1))
-    work  = CuVector{T}(undef, 1)
-    lwork = Int32[1]
-    work_host  = Vector{T}(undef, 1)
-    lwork_host = Int32[1]
-    info = CuVector{Int32}(undef,1)
-    etc = Dict{Symbol,Any}()
-
-
-    return LapackGPUSolver{T}(dense,fact,rhs,work,lwork,work_host,lwork_host,info,etc,opt,logger)
-end
-
-function factorize!(M::LapackGPUSolver)
-    if M.opt.lapack_algorithm == MadNLP.BUNCHKAUFMAN
-        factorize_bunchkaufman!(M)
-    elseif M.opt.lapack_algorithm == MadNLP.LU
-        factorize_lu!(M)
-    elseif M.opt.lapack_algorithm == MadNLP.QR
-        factorize_qr!(M)
-    elseif M.opt.lapack_algorithm == MadNLP.CHOLESKY
-        factorize_cholesky!(M)
-    else
-        error(LOGGER,"Invalid lapack_algorithm")
-    end
-end
-function solve!(M::LapackGPUSolver,x)
-    if M.opt.lapack_algorithm == MadNLP.BUNCHKAUFMAN
-        solve_bunchkaufman!(M,x)
-    elseif M.opt.lapack_algorithm == MadNLP.LU
-        solve_lu!(M,x)
-    elseif M.opt.lapack_algorithm == MadNLP.QR
-        solve_qr!(M,x)
-    elseif M.opt.lapack_algorithm == MadNLP.CHOLESKY
-        solve_cholesky!(M,x)
-    else
-        error(LOGGER,"Invalid lapack_algorithm")
-    end
-end
-
-improve!(M::LapackGPUSolver) = false
-introduce(M::LapackGPUSolver) = "Lapack-GPU ($(M.opt.lapack_algorithm))"
-
-for (sytrf,sytrf_buffer,getrf,getrf_buffer,getrs,geqrf,geqrf_buffer,ormqr,ormqr_buffer,trsm,potrf,potrf_buffer,potrs,typ,cutyp) in (
-    (
-        :cusolverDnDsytrf, :cusolverDnDsytrf_bufferSize,
-        :cusolverDnDgetrf, :cusolverDnDgetrf_bufferSize, :cusolverDnDgetrs,
-        :cusolverDnDgeqrf, :cusolverDnDgeqrf_bufferSize,
-        :cusolverDnDormqr, :cusolverDnDormqr_bufferSize,
-        :cublasDtrsm_v2,
-        :cusolverDnDpotrf, :cusolverDnDpotrf_bufferSize,
-        :cusolverDnDpotrs,
-        Float64, CUDA.R_64F
-    ),
-    (
-        :cusolverDnSsytrf, :cusolverDnSsytrf_bufferSize,
-        :cusolverDnSgetrf, :cusolverDnSgetrf_bufferSize, :cusolverDnSgetrs,
-        :cusolverDnSgeqrf, :cusolverDnSgeqrf_bufferSize,
-        :cusolverDnSormqr, :cusolverDnSormqr_bufferSize,
-        :cublasStrsm_v2,
-        :cusolverDnSpotrf, :cusolverDnSpotrf_bufferSize,
-        :cusolverDnSpotrs,
-        Float32, CUDA.R_32F
-    ),
-    )
-    @eval begin
-        function factorize_bunchkaufman!(M::LapackGPUSolver{$typ})
-            haskey(M.etc,:ipiv) || (M.etc[:ipiv] = CuVector{Int32}(undef,size(M.dense,1)))
-            haskey(M.etc,:ipiv64) || (M.etc[:ipiv64] = CuVector{Int64}(undef,length(M.etc[:ipiv])))
-
-            copyto!(M.fact,M.dense)
-            CUSOLVER.$sytrf_buffer(
-                dense_handle(),Int32(size(M.fact,1)),M.fact,Int32(size(M.fact,2)),M.lwork)
-            length(M.work) < M.lwork[] && resize!(M.work,Int(M.lwork[]))
-            CUSOLVER.$sytrf(
-                dense_handle(),CUBLAS_FILL_MODE_LOWER,
-                Int32(size(M.fact,1)),M.fact,Int32(size(M.fact,2)),
-                M.etc[:ipiv],M.work,M.lwork[],M.info)
-            return M
-        end
-
-        function solve_bunchkaufman!(M::LapackGPUSolver{$typ},x)
-
-            copyto!(M.etc[:ipiv64],M.etc[:ipiv])
-            copyto!(M.rhs,x)
-            ccall((:cusolverDnXsytrs_bufferSize, libcusolver), cusolverStatus_t,
-                  (cusolverDnHandle_t, cublasFillMode_t, Int64, Int64, cudaDataType,
-                   CuPtr{Cdouble}, Int64, CuPtr{Int64}, cudaDataType,
-                   CuPtr{Cdouble}, Int64, Ptr{Int64}, Ptr{Int64}),
-                  dense_handle(), CUBLAS_FILL_MODE_LOWER,
-                  size(M.fact,1),1,$cutyp,M.fact,size(M.fact,2),
-                  M.etc[:ipiv64],$cutyp,M.rhs,length(M.rhs),M.lwork,M.lwork_host)
-            length(M.work) < M.lwork[] && resize!(M.work,Int(M.lwork[]))
-            length(M.work_host) < M.lwork_host[] && resize!(work_host,Int(M.lwork_host[]))
-            ccall((:cusolverDnXsytrs, libcusolver), cusolverStatus_t,
-                  (cusolverDnHandle_t, cublasFillMode_t, Int64, Int64, cudaDataType,
-                   CuPtr{Cdouble}, Int64, CuPtr{Int64}, cudaDataType,
-                   CuPtr{Cdouble}, Int64, CuPtr{Cdouble}, Int64, Ptr{Cdouble}, Int64,
-                   CuPtr{Int64}),
-                  dense_handle(),CUBLAS_FILL_MODE_LOWER,
-                  size(M.fact,1),1,$cutyp,M.fact,size(M.fact,2),
-                  M.etc[:ipiv64],$cutyp,M.rhs,length(M.rhs),M.work,M.lwork[],M.work_host,M.lwork_host[],M.info)
-            copyto!(x,M.rhs)
-
-            return x
-        end
-
-        function factorize_lu!(M::LapackGPUSolver{$typ})
-            haskey(M.etc,:ipiv) || (M.etc[:ipiv] = CuVector{Int32}(undef,size(M.dense,1)))
-            tril_to_full!(M.dense)
-            copyto!(M.fact,M.dense)
-            CUSOLVER.$getrf_buffer(
-                dense_handle(),Int32(size(M.fact,1)),Int32(size(M.fact,2)),
-                M.fact,Int32(size(M.fact,2)),M.lwork)
-            length(M.work) < M.lwork[] && resize!(M.work,Int(M.lwork[]))
-            CUSOLVER.$getrf(
-                dense_handle(),Int32(size(M.fact,1)),Int32(size(M.fact,2)),
-                M.fact,Int32(size(M.fact,2)),M.work,M.etc[:ipiv],M.info)
-            return M
-        end
-
-        function solve_lu!(M::LapackGPUSolver{$typ},x)
-            copyto!(M.rhs,x)
-            CUSOLVER.$getrs(
-                dense_handle(),CUBLAS_OP_N,
-                Int32(size(M.fact,1)),Int32(1),M.fact,Int32(size(M.fact,2)),
-                M.etc[:ipiv],M.rhs,Int32(length(M.rhs)),M.info)
-            copyto!(x,M.rhs)
-            return x
-        end
-
-        function factorize_qr!(M::LapackGPUSolver{$typ})
-            haskey(M.etc,:tau) || (M.etc[:tau] = CuVector{$typ}(undef,size(M.dense,1)))
-            haskey(M.etc,:one) || (M.etc[:one] = ones($typ,1))
-            tril_to_full!(M.dense)
-            copyto!(M.fact,M.dense)
-            CUSOLVER.$geqrf_buffer(dense_handle(),Int32(size(M.fact,1)),Int32(size(M.fact,2)),M.fact,Int32(size(M.fact,2)),M.lwork)
-            length(M.work) < M.lwork[] && resize!(M.work,Int(M.lwork[]))
-            CUSOLVER.$geqrf(dense_handle(),Int32(size(M.fact,1)),Int32(size(M.fact,2)),M.fact,Int32(size(M.fact,2)),M.etc[:tau],M.work,M.lwork[],M.info)
-            return M
-        end
-
-        function solve_qr!(M::LapackGPUSolver{$typ},x)
-            copyto!(M.rhs,x)
-            CUSOLVER.$ormqr_buffer(dense_handle(),CUBLAS_SIDE_LEFT,CUBLAS_OP_T,
-                                   Int32(size(M.fact,1)),Int32(1),Int32(length(M.etc[:tau])),M.fact,Int32(size(M.fact,2)),M.etc[:tau],M.rhs,Int32(length(M.rhs)),M.lwork)
-            length(M.work) < M.lwork[] && resize!(M.work,Int(M.lwork[]))
-            CUSOLVER.$ormqr(dense_handle(),CUBLAS_SIDE_LEFT,CUBLAS_OP_T,
-                            Int32(size(M.fact,1)),Int32(1),Int32(length(M.etc[:tau])),M.fact,Int32(size(M.fact,2)),M.etc[:tau],M.rhs,Int32(length(M.rhs)),M.work,M.lwork[],M.info)
-            CUBLAS.$trsm(handle(),CUBLAS_SIDE_LEFT,CUBLAS_FILL_MODE_UPPER,CUBLAS_OP_N,CUBLAS_DIAG_NON_UNIT,
-                           Int32(size(M.fact,1)),Int32(1),M.etc[:one],M.fact,Int32(size(M.fact,2)),M.rhs,Int32(length(M.rhs)))
-            copyto!(x,M.rhs)
-            return x
-        end
-
-        function factorize_cholesky!(M::LapackGPUSolver{$typ})
-            copyto!(M.fact,M.dense)
-            CUSOLVER.$potrf_buffer(
-                dense_handle(),CUBLAS_FILL_MODE_LOWER,
-                Int32(size(M.fact,1)),M.fact,Int32(size(M.fact,2)),M.lwork)
-            length(M.work) < M.lwork[] && resize!(M.work,Int(M.lwork[]))
-            CUSOLVER.$potrf(
-                dense_handle(),CUBLAS_FILL_MODE_LOWER,
-                Int32(size(M.fact,1)),M.fact,Int32(size(M.fact,2)),
-                M.work,M.lwork[],M.info)
-            return M
-        end
-
-        function solve_cholesky!(M::LapackGPUSolver{$typ},x)
-            copyto!(M.rhs,x)
-            CUSOLVER.$potrs(
-                dense_handle(),CUBLAS_FILL_MODE_LOWER,
-                Int32(size(M.fact,1)),Int32(1),M.fact,Int32(size(M.fact,2)),
-                M.rhs,Int32(length(M.rhs)),M.info)
-            copyto!(x,M.rhs)
-            return x
-        end
-    end
-end
-
-is_inertia(M::LapackGPUSolver) = M.opt.lapack_algorithm == MadNLP.CHOLESKY  # TODO: implement inertia(M::LapackGPUSolver) for BUNCHKAUFMAN
-function inertia(M::LapackGPUSolver)
-    if M.opt.lapack_algorithm == MadNLP.BUNCHKAUFMAN
-        inertia(M.etc[:fact_cpu],M.etc[:ipiv_cpu],M.etc[:info_cpu][])
-    elseif M.opt.lapack_algorithm == MadNLP.CHOLESKY
-        sum(M.info) == 0 ? (size(M.fact,1),0,0) : (0,size(M.fact,1),0)
-    else
-        error(LOGGER,"Invalid lapackcpu_algorithm")
-    end
-end
-
-input_type(::Type{LapackGPUSolver}) = :dense
-MadNLP.default_options(::Type{LapackGPUSolver}) = LapackOptions()
-is_supported(::Type{LapackGPUSolver},::Type{Float32}) = true
-is_supported(::Type{LapackGPUSolver},::Type{Float64}) = true
-
diff --git a/lib/MadNLPGPU/src/utils.jl b/lib/MadNLPGPU/src/utils.jl
new file mode 100644
index 00000000..780fc834
--- /dev/null
+++ b/lib/MadNLPGPU/src/utils.jl
@@ -0,0 +1,71 @@
+function MadNLP._madnlp_unsafe_wrap(vec::VT, n, shift=1) where {T, VT <: CuVector{T}}
+    return view(vec,shift:shift+n-1)
+end
+
+# Local transfer! function to move data on the device.
+transfer!(x::AbstractArray, y::AbstractArray) = copyto!(x, y)
+
+#=
+    copyto!
+=#
+
+@kernel function _copy_to_map_kernel!(y, p, x)
+    i = @index(Global)
+    @inbounds y[p[i]] = x[i]
+end
+
+@kernel function _copy_from_map_kernel!(y, x, p)
+    i = @index(Global)
+    @inbounds y[i] = x[p[i]]
+end
+
+#=
+    SparseMatrixCSC to CuSparseMatrixCSC
+=#
+
+function CUSPARSE.CuSparseMatrixCSC{Tv,Ti}(A::SparseMatrixCSC{Tv,Ti}) where {Tv,Ti}
+    return CUSPARSE.CuSparseMatrixCSC{Tv,Ti}(
+        CuArray(A.colptr),
+        CuArray(A.rowval),
+        CuArray(A.nzval),
+        size(A),
+    )
+end
+
+#=
+    SparseMatrixCOO to CuSparseMatrixCSC
+=#
+
+function MadNLP.transfer!(
+    dest::CUSPARSE.CuSparseMatrixCSC,
+    src::MadNLP.SparseMatrixCOO,
+    map,
+)
+    return copyto!(view(dest.nzVal, map), src.V)
+end
+
+#=
+    CuSparseMatrixCSC to CuMatrix
+=#
+
+@kernel function _csc_to_dense_kernel!(y, @Const(colptr), @Const(rowval), @Const(nzval))
+    col = @index(Global)
+    @inbounds for ptr in colptr[col]:colptr[col+1]-1
+        row = rowval[ptr]
+        y[row, col] = nzval[ptr]
+    end
+end
+
+function transfer!(y::CuMatrix{T}, x::CUSPARSE.CuSparseMatrixCSC{T}) where {T}
+    n = size(y, 2)
+    fill!(y, zero(T))
+    _csc_to_dense_kernel!(CUDABackend())(y, x.colPtr, x.rowVal, x.nzVal, ndrange = n)
+    synchronize(CUDABackend())
+    return
+end
+
+# BLAS operations
+symul!(y, A, x::CuVector{T}, α = 1., β = 0.) where T = CUBLAS.symv!('L', T(α), A, x, T(β), y)
+
+MadNLP._ger!(alpha::Number, x::CuVector{T}, y::CuVector{T}, A::CuMatrix{T}) where T = CUBLAS.ger!(alpha, x, y, A)
+
diff --git a/lib/MadNLPGPU/test/densekkt_gpu.jl b/lib/MadNLPGPU/test/densekkt_gpu.jl
index 571eee83..7ea3ff55 100644
--- a/lib/MadNLPGPU/test/densekkt_gpu.jl
+++ b/lib/MadNLPGPU/test/densekkt_gpu.jl
@@ -3,37 +3,37 @@ using CUDA
 using MadNLPTests
 
 function _compare_gpu_with_cpu(KKTSystem, n, m, ind_fixed)
-
-    opt_kkt = if (KKTSystem == MadNLP.DenseKKTSystem)
-        MadNLP.DENSE_KKT_SYSTEM
-    elseif (KKTSystem == MadNLP.DenseCondensedKKTSystem)
-        MadNLP.DENSE_CONDENSED_KKT_SYSTEM
-    end
-
-    for (T,tol,atol) in [(Float32,1e-3,1e-1), (Float64,1e-8,1e-6)]
+    for (T,tol,atol) in [
+        (Float32,1e-3,1e-1),
+        (Float64,1e-8,1e-6)
+        ]
         madnlp_options = Dict{Symbol, Any}(
-            :kkt_system=>opt_kkt,
+            :callback=>MadNLP.DenseCallback,
+            :kkt_system=>KKTSystem,
             :linear_solver=>LapackGPUSolver,
             :print_level=>MadNLP.ERROR,
             :tol=>tol
         )
 
-        nlp = MadNLPTests.DenseDummyQP{T}(; n=n, m=m, fixed_variables=ind_fixed)
+        # Host evaluator
+        nlph = MadNLPTests.DenseDummyQP(zeros(T,n); m=m, fixed_variables=ind_fixed)
+        # Device evaluator
+        nlpd = MadNLPTests.DenseDummyQP(CUDA.zeros(T,n); m=m, fixed_variables=CuArray(ind_fixed))
 
         # Solve on CPU
-        h_solver = MadNLP.MadNLPSolver(nlp; madnlp_options...)
+        h_solver = MadNLPSolver(nlph; madnlp_options...)
         results_cpu = MadNLP.solve!(h_solver)
 
         # Solve on GPU
-        d_solver = MadNLPGPU.CuMadNLPSolver(nlp; madnlp_options...)
+        d_solver = MadNLPSolver(nlpd; madnlp_options...)
         results_gpu = MadNLP.solve!(d_solver)
 
-        @test isa(d_solver.kkt, KKTSystem{T, CuVector{T}, CuMatrix{T}})
+        @test isa(d_solver.kkt, KKTSystem{T})
         # # Check that both results match exactly
         @test h_solver.cnt.k == d_solver.cnt.k
         @test results_cpu.objective ≈ results_gpu.objective
-        @test results_cpu.solution ≈ results_gpu.solution atol=atol
-        @test results_cpu.multipliers ≈ results_gpu.multipliers atol=atol
+        @test results_cpu.solution ≈ Array(results_gpu.solution) atol=atol
+        @test results_cpu.multipliers ≈ Array(results_gpu.multipliers) atol=atol
     end
 end
 
@@ -44,31 +44,33 @@ end
     @testset "Size: ($n, $m)" for (n, m) in [(10, 0), (10, 5), (50, 10)]
         _compare_gpu_with_cpu(kkt_system, n, m, Int[])
     end
-    @testset "Fixed variables" begin
-        n, m = 20, 0 # warning: setting m >= 1 does not work in inertia free mode
+    @testset "Fixed variables" for (n,m) in [(10, 0), (10, 5), (50, 10)]
         _compare_gpu_with_cpu(kkt_system, n, m, Int[1, 2])
     end
 end
 
 @testset "MadNLP: $QN + $KKT" for QN in [
-    MadNLP.DENSE_BFGS,
-    MadNLP.DENSE_DAMPED_BFGS,
+    MadNLP.BFGS,
+    MadNLP.DampedBFGS,
 ], KKT in [
-    MadNLP.DENSE_KKT_SYSTEM,
-    MadNLP.DENSE_CONDENSED_KKT_SYSTEM,
+    MadNLP.DenseKKTSystem,
+    MadNLP.DenseCondensedKKTSystem,
 ]
     @testset "Size: ($n, $m)" for (n, m) in [(10, 0), (10, 5), (50, 10)]
-        nlp = MadNLPTests.DenseDummyQP{Float64}(; n=n, m=m)
-        solver_exact = MadNLP.MadNLPSolver(
+        nlp = MadNLPTests.DenseDummyQP(zeros(Float64, n); m=m)
+        solver_exact = MadNLPSolver(
             nlp;
+            callback=MadNLP.DenseCallback,
             print_level=MadNLP.ERROR,
             kkt_system=KKT,
             linear_solver=LapackGPUSolver,
         )
         results_ref = MadNLP.solve!(solver_exact)
 
-        solver_qn = MadNLPGPU.CuMadNLPSolver(
+        nlp = MadNLPTests.DenseDummyQP(CUDA.zeros(Float64, n); m=m)
+        solver_qn = MadNLPSolver(
             nlp;
+            callback=MadNLP.DenseCallback,
             print_level=MadNLP.ERROR,
             kkt_system=KKT,
             hessian_approximation=QN,
@@ -78,7 +80,7 @@ end
 
         @test results_qn.status == MadNLP.SOLVE_SUCCEEDED
         @test results_qn.objective ≈ results_ref.objective atol=1e-6
-        @test results_qn.solution ≈ results_ref.solution atol=1e-6
+        @test Array(results_qn.solution) ≈ Array(results_ref.solution) atol=1e-6
         @test solver_qn.cnt.lag_hess_cnt == 0
     end
 end
diff --git a/lib/MadNLPGPU/test/madnlpgpu_test.jl b/lib/MadNLPGPU/test/madnlpgpu_test.jl
new file mode 100644
index 00000000..a5ae04e0
--- /dev/null
+++ b/lib/MadNLPGPU/test/madnlpgpu_test.jl
@@ -0,0 +1,108 @@
+testset = [
+    # Temporarily commented out since LapackGPUSolver does not currently support sparse callbacks
+    [
+        "CUDSS",
+        ()->MadNLP.Optimizer(
+            linear_solver=MadNLPGPU.CUDSSSolver,
+            print_level=MadNLP.ERROR
+        ),
+        [],
+    ],
+    [
+        "CUDSS-AMD",
+        ()->MadNLP.Optimizer(
+            linear_solver=MadNLPGPU.CUDSSSolver,
+            print_level=MadNLP.ERROR,
+            ordering=MadNLPGPU.AMD_ORDERING,
+        ),
+        [],
+    ],
+    [
+        "CUDSS-METIS",
+        ()->MadNLP.Optimizer(
+            linear_solver=MadNLPGPU.CUDSSSolver,
+            print_level=MadNLP.ERROR,
+            ordering=MadNLPGPU.METIS_ORDERING,
+        ),
+        [],
+    ],
+    [
+        "CUDSS-HYBRID",
+        ()->MadNLP.Optimizer(
+            linear_solver=MadNLPGPU.CUDSSSolver,
+            print_level=MadNLP.ERROR,
+            hybrid=true,
+            ir=1,
+        ),
+        [],
+    ],
+    [
+        "CUSOLVERRF",
+        ()->MadNLP.Optimizer(
+            linear_solver=MadNLPGPU.RFSolver,
+            print_level=MadNLP.ERROR
+        ),
+        [],
+    ],
+    [
+        "CUSOLVER-CHOLESKY",
+        ()->MadNLP.Optimizer(
+            linear_solver=MadNLPGPU.CuCholeskySolver,
+            print_level=MadNLP.ERROR
+        ),
+        [],
+    ],
+    [
+        "GLU",
+        ()->MadNLP.Optimizer(
+            linear_solver=MadNLPGPU.GLUSolver,
+            print_level=MadNLP.ERROR
+        ),
+        [],
+    ],
+    [
+        "LapackGPU-BUNCHKAUFMAN",
+        ()->MadNLP.Optimizer(
+            linear_solver=LapackGPUSolver,
+            lapack_algorithm=MadNLP.BUNCHKAUFMAN,
+            print_level=MadNLP.ERROR
+        ),
+        [],
+    ],
+    [
+        "LapackGPU-LU",
+        ()->MadNLP.Optimizer(
+            linear_solver=LapackGPUSolver,
+            lapack_algorithm=MadNLP.LU,
+            print_level=MadNLP.ERROR
+        ),
+        [],
+    ],
+    [
+        "LapackGPU-QR",
+        ()->MadNLP.Optimizer(
+            linear_solver=LapackGPUSolver,
+            lapack_algorithm=MadNLP.QR,
+            print_level=MadNLP.ERROR
+        ),
+        [],
+    ],
+    [
+        "LapackGPU-CHOLESKY",
+        ()->MadNLP.Optimizer(
+            linear_solver=LapackGPUSolver,
+            lapack_algorithm=MadNLP.CHOLESKY,
+            print_level=MadNLP.ERROR
+        ),
+        ["infeasible", "lootsma", "eigmina", "lp_examodels_issue75"], # KKT system not PD
+    ],
+]
+
+@testset "MadNLPGPU test" begin
+    MadNLPTests.test_linear_solver(LapackGPUSolver,Float32)
+    MadNLPTests.test_linear_solver(LapackGPUSolver,Float64)
+    # Test LapackGPU wrapper
+    for (name,optimizer_constructor,exclude) in testset
+        test_madnlp(name,optimizer_constructor,exclude; Arr=CuArray)
+    end
+end
diff --git a/lib/MadNLPGPU/test/runtests.jl b/lib/MadNLPGPU/test/runtests.jl
index 07956b05..4e957a90 100644
--- a/lib/MadNLPGPU/test/runtests.jl
+++ b/lib/MadNLPGPU/test/runtests.jl
@@ -1,50 +1,9 @@
-using Test, MadNLP, MadNLPGPU, MadNLPTests
+using Test, CUDA, MadNLP, MadNLPGPU, MadNLPTests
 
-testset = [
-    [
-        "LapackGPU-BUNCHKAUFMAN",
-        ()->MadNLP.Optimizer(
-            linear_solver=LapackGPUSolver,
-            lapackgpu_algorithm=MadNLP.BUNCHKAUFMAN,
-            print_level=MadNLP.ERROR),
-        [],
-    ],
-    [
-        "LapackGPU-LU",
-        ()->MadNLP.Optimizer(
-            linear_solver=LapackGPUSolver,
-            lapackgpu_algorithm=MadNLP.LU,
-            print_level=MadNLP.ERROR),
-        [],
-    ],
-    [
-        "LapackGPU-QR",
-        ()->MadNLP.Optimizer(
-            linear_solver=LapackGPUSolver,
-            lapackgpu_algorithm=MadNLP.QR,
-            print_level=MadNLP.ERROR),
-        [],
-    ],
-    [
-        "LapackGPU-CHOLESKY",
-        ()->MadNLP.Optimizer(
-            linear_solver=LapackGPUSolver,
-            lapackgpu_algorithm=MadNLP.CHOLESKY,
-            print_level=MadNLP.ERROR),
-        ["infeasible", "lootsma", "eigmina"],
-    ],
-]
 
-@testset "MadNLPGPU test" begin
-
-    MadNLPTests.test_linear_solver(LapackGPUSolver,Float32)
-    MadNLPTests.test_linear_solver(LapackGPUSolver,Float64)
+# Test DenseKKTSystem on GPU
 
-    # Test LapackGPU wrapper
-    for (name,optimizer_constructor,exclude) in testset
-        test_madnlp(name,optimizer_constructor,exclude)
-    end
+@testset "MadNLPGPU test" begin
+    include("madnlpgpu_test.jl")
+    include("densekkt_gpu.jl")
 end
-
-# Test DenseKKTSystem on GPU
-include("densekkt_gpu.jl")
diff --git a/lib/MadNLPHSL/Artifacts.toml b/lib/MadNLPHSL/Artifacts.toml
deleted file mode 100644
index 92f699e4..00000000
--- a/lib/MadNLPHSL/Artifacts.toml
+++ /dev/null
@@ -1,667 +0,0 @@
-# METIS
-[[METIS]]
-arch = "i686"
-git-tree-sha1 = "3bb8df8f130cfa2f754c62195e8d45e185b70f4a"
-libc = "musl"
-os = "linux"
-
-[[METIS.download]]
-sha256 = "afe4bf4900c5b3e0ab71205f83ccb758e0a2a73111183d04215e2993e6ec5838"
-url = "https://github.com/JuliaBinaryWrappers/METIS_jll.jl/releases/download/METIS-v4.0.3+3/METIS.v4.0.3.i686-linux-musl.tar.gz"
-[[METIS]]
-arch = "aarch64"
-git-tree-sha1 = "c6799eb86a9f06fca3523408ebdaffea902f69b5"
-libc = "musl"
-os = "linux"
-
-[[METIS.download]]
-sha256 = "ffc12804776b476f76a4005baacd894c337aa448ea0410496f4c9e882442a66c"
-url = "https://github.com/JuliaBinaryWrappers/METIS_jll.jl/releases/download/METIS-v4.0.3+3/METIS.v4.0.3.aarch64-linux-musl.tar.gz"
-[[METIS]]
-arch = "i686"
-git-tree-sha1 = "45da1c3e85c743be740b119e9c454882d55c1057"
-os = "windows"
-
-[[METIS.download]]
-sha256 = "7b86213cd2833d8b7320c8a9f3cec53ddf78b72e156696e570df7e1e1748bb90"
-url = "https://github.com/JuliaBinaryWrappers/METIS_jll.jl/releases/download/METIS-v4.0.3+3/METIS.v4.0.3.i686-w64-mingw32.tar.gz"
-[[METIS]]
-arch = "x86_64"
-git-tree-sha1 = "c79c065dd80a73c7c89d1e9f60a1a14f8849be1e"
-os = "windows"
-
-[[METIS.download]]
-sha256 = "8939ec73d1d27aa5d8aadf5cbaf2bad9445188fedab4c20036a634ef8f56315d"
-url = "https://github.com/JuliaBinaryWrappers/METIS_jll.jl/releases/download/METIS-v4.0.3+3/METIS.v4.0.3.x86_64-w64-mingw32.tar.gz"
-[[METIS]]
-arch = "armv7l"
-git-tree-sha1 = "36e3bdd501217db399c5e04d800ae5c36edd6cdb"
-libc = "glibc"
-os = "linux"
-
-[[METIS.download]]
-sha256 = "74f0dd9679196d6c4b0513e517aa28fc1619079493c110e6ec3d994e57df8516"
-url = "https://github.com/JuliaBinaryWrappers/METIS_jll.jl/releases/download/METIS-v4.0.3+3/METIS.v4.0.3.armv7l-linux-gnueabihf.tar.gz"
-[[METIS]]
-arch = "armv7l"
-git-tree-sha1 = "dececfd251630954aafc8df205041b00861a1f6f"
-libc = "musl"
-os = "linux"
-
-[[METIS.download]]
-sha256 = "8844baa88bd72f80d71a5d29d64bbcce3b20f61a06b5888e761e468ded10fd08"
-url = "https://github.com/JuliaBinaryWrappers/METIS_jll.jl/releases/download/METIS-v4.0.3+3/METIS.v4.0.3.armv7l-linux-musleabihf.tar.gz"
-[[METIS]]
-arch = "i686"
-git-tree-sha1 = "46161f54d73be3f0e17d89c397a2142276b0cef5"
-libc = "glibc"
-os = "linux"
-
-[[METIS.download]]
-sha256 = "abeb28331b13f8047aa3c0fb8bc51b607e8f7039c72c095c0d14d1c911c6d51b"
-url = "https://github.com/JuliaBinaryWrappers/METIS_jll.jl/releases/download/METIS-v4.0.3+3/METIS.v4.0.3.i686-linux-gnu.tar.gz"
-[[METIS]]
-arch = "x86_64"
-git-tree-sha1 = "820382e6c2f6674e357232bdf4aab3b53d4b1fcd"
-libc = "glibc"
-os = "linux"
-
-[[METIS.download]]
-sha256 = "b470e6ac780e1266a8d703abcf68b8a3f746cdcc737bdf1558f0780a6549d9d4"
-url = "https://github.com/JuliaBinaryWrappers/METIS_jll.jl/releases/download/METIS-v4.0.3+3/METIS.v4.0.3.x86_64-linux-gnu.tar.gz"
-[[METIS]]
-arch = "powerpc64le"
-git-tree-sha1 = "8efbbc522c6d5cd487b67177a83850480c62cda2"
-libc = "glibc"
-os = "linux"
-
-[[METIS.download]]
-sha256 = "76fb068c5f594b9b6489f8d661dfbab27fd408e63f0e3422d5ea01007fea08b2"
-url = "https://github.com/JuliaBinaryWrappers/METIS_jll.jl/releases/download/METIS-v4.0.3+3/METIS.v4.0.3.powerpc64le-linux-gnu.tar.gz"
-[[METIS]]
-arch = "x86_64"
-git-tree-sha1 = "d5c6243ccf8d1bc26ac624fe7dba239df828e73f"
-os = "macos"
-
-[[METIS.download]]
-sha256 = "aa155bac90b8929dccc94556c44f7b29bcb943af7da9f2fae462530107bee5fd"
-url = "https://github.com/JuliaBinaryWrappers/METIS_jll.jl/releases/download/METIS-v4.0.3+3/METIS.v4.0.3.x86_64-apple-darwin14.tar.gz"
-[[METIS]]
-arch = "aarch64"
-git-tree-sha1 = "632e487bfe44fe18d705e8db2f2953eaf30ca5de"
-libc = "glibc"
-os = "linux"
-
-[[METIS.download]]
-sha256 = "41e127ee4644905ddb2c6ba3e9d53ebedad09ac53380bb107041f443cae46814"
-url = "https://github.com/JuliaBinaryWrappers/METIS_jll.jl/releases/download/METIS-v4.0.3+3/METIS.v4.0.3.aarch64-linux-gnu.tar.gz"
-[[METIS]]
-arch = "x86_64"
-git-tree-sha1 = "bfe48e2bc7b0e92735799dddbf451a92e33f1dc4"
-os = "freebsd"
-
-[[METIS.download]]
-sha256 = "40d6ff6085476383c99ac110174b6d49a0ea970b898991af6c731b0c08c7ed7d"
-url = "https://github.com/JuliaBinaryWrappers/METIS_jll.jl/releases/download/METIS-v4.0.3+3/METIS.v4.0.3.x86_64-unknown-freebsd11.1.tar.gz"
-[[METIS]]
-arch = "x86_64"
-git-tree-sha1 = "0bd5bc8040cd3ccae4f0e47de110b41a5b044056"
-libc = "musl"
-os = "linux"
-
-[[METIS.download]]
-sha256 = "0ded976613ba2ddd499a041807dd61707aacb1630060fe6b9315896ff385bf8e"
-url = "https://github.com/JuliaBinaryWrappers/METIS_jll.jl/releases/download/METIS-v4.0.3+3/METIS.v4.0.3.x86_64-linux-musl.tar.gz"
-
-
-[[OpenBLAS32]]
-arch = "aarch64"
-git-tree-sha1 = "a68a049a1bf490184600ddf88bc55e4b8583ddee"
-libgfortran_version = "5.0.0"
-os = "macos"
-
-    [[OpenBLAS32.download]]
-    sha256 = "bcce01615c17c53a143a466d19717c0ab684583ec808c45e85ab88fe4232f6e7"
-    url = "https://github.com/JuliaBinaryWrappers/OpenBLAS32_jll.jl/releases/download/OpenBLAS32-v0.3.13+4/OpenBLAS32.v0.3.13.aarch64-apple-darwin-libgfortran5.tar.gz"
-[[OpenBLAS32]]
-arch = "aarch64"
-git-tree-sha1 = "cc47493e00e6bd65d2eaa01dbffe9e1dd591cd37"
-libc = "glibc"
-libgfortran_version = "3.0.0"
-os = "linux"
-
-    [[OpenBLAS32.download]]
-    sha256 = "e77be41509489f80e7ffbcf3faf8bbd6a51243bf459fda40662faf8b193156fa"
-    url = "https://github.com/JuliaBinaryWrappers/OpenBLAS32_jll.jl/releases/download/OpenBLAS32-v0.3.13+4/OpenBLAS32.v0.3.13.aarch64-linux-gnu-libgfortran3.tar.gz"
-[[OpenBLAS32]]
-arch = "aarch64"
-git-tree-sha1 = "d76a7b0ee6f803d27a0b01f45ca0eb32273c4ec4"
-libc = "glibc"
-libgfortran_version = "4.0.0"
-os = "linux"
-
-    [[OpenBLAS32.download]]
-    sha256 = "1f1655645d4758223b8131a8e24a1eb0a19e11caf827e0823bd61df63fababd8"
-    url = "https://github.com/JuliaBinaryWrappers/OpenBLAS32_jll.jl/releases/download/OpenBLAS32-v0.3.13+4/OpenBLAS32.v0.3.13.aarch64-linux-gnu-libgfortran4.tar.gz"
-[[OpenBLAS32]]
-arch = "aarch64"
-git-tree-sha1 = "76ac31ac93f4e41c398033712851a19381694300"
-libc = "glibc"
-libgfortran_version = "5.0.0"
-os = "linux"
-
-    [[OpenBLAS32.download]]
-    sha256 = "3e9b8f7ce68aea6a49afe3720920623fe29a06fd0ebacf28720c76f1b9f4247a"
-    url = "https://github.com/JuliaBinaryWrappers/OpenBLAS32_jll.jl/releases/download/OpenBLAS32-v0.3.13+4/OpenBLAS32.v0.3.13.aarch64-linux-gnu-libgfortran5.tar.gz"
-[[OpenBLAS32]]
-arch = "aarch64"
-git-tree-sha1 = "78c873be1367ab4b7d8bfb338644b740c41b13a7"
-libc = "musl"
-libgfortran_version = "3.0.0"
-os = "linux"
-
-    [[OpenBLAS32.download]]
-    sha256 = "3d4eefbdc85032430f05740e9831736155de449b3cf60c7972c2d84d8790c545"
-    url = "https://github.com/JuliaBinaryWrappers/OpenBLAS32_jll.jl/releases/download/OpenBLAS32-v0.3.13+4/OpenBLAS32.v0.3.13.aarch64-linux-musl-libgfortran3.tar.gz"
-[[OpenBLAS32]]
-arch = "aarch64"
-git-tree-sha1 = "306dcc80dd49270be4828ebf0bade9b9bad9b352"
-libc = "musl"
-libgfortran_version = "4.0.0"
-os = "linux"
-
-    [[OpenBLAS32.download]]
-    sha256 = "9b2e3074b7e105676ecad18a85ba8f2abcd2a3a2eb5f2f20c4108d00fe2e30ae"
-    url = "https://github.com/JuliaBinaryWrappers/OpenBLAS32_jll.jl/releases/download/OpenBLAS32-v0.3.13+4/OpenBLAS32.v0.3.13.aarch64-linux-musl-libgfortran4.tar.gz"
-[[OpenBLAS32]]
-arch = "aarch64"
-git-tree-sha1 = "d51b369c9fdcbcd7dcb9a941b9c3859ed6a5d46a"
-libc = "musl"
-libgfortran_version = "5.0.0"
-os = "linux"
-
-    [[OpenBLAS32.download]]
-    sha256 = "5c8f01370032c20f579162fc33e31d759c9e7acef906e771140c3317513fff00"
-    url = "https://github.com/JuliaBinaryWrappers/OpenBLAS32_jll.jl/releases/download/OpenBLAS32-v0.3.13+4/OpenBLAS32.v0.3.13.aarch64-linux-musl-libgfortran5.tar.gz"
-[[OpenBLAS32]]
-arch = "armv6l"
-call_abi = "eabihf"
-git-tree-sha1 = "6efe72fa303c990d591c3f5a3ced7867d91c87af"
-libc = "glibc"
-libgfortran_version = "3.0.0"
-os = "linux"
-
-    [[OpenBLAS32.download]]
-    sha256 = "14d33f5f280e1e89dce17c48a5cca638f944c3c5fc2e45edd2e1c6e6761c398b"
-    url = "https://github.com/JuliaBinaryWrappers/OpenBLAS32_jll.jl/releases/download/OpenBLAS32-v0.3.13+4/OpenBLAS32.v0.3.13.armv6l-linux-gnueabihf-libgfortran3.tar.gz"
-[[OpenBLAS32]]
-arch = "armv6l"
-call_abi = "eabihf"
-git-tree-sha1 = "09ffd709268cc5020c68472e759123bfc87bdcd2"
-libc = "glibc"
-libgfortran_version = "4.0.0"
-os = "linux"
-
-    [[OpenBLAS32.download]]
-    sha256 = "b891d86fe65dcd2c10e2d99f2cd45d9b25773e95825f37a4cb776d63d98ba972"
-    url = "https://github.com/JuliaBinaryWrappers/OpenBLAS32_jll.jl/releases/download/OpenBLAS32-v0.3.13+4/OpenBLAS32.v0.3.13.armv6l-linux-gnueabihf-libgfortran4.tar.gz"
-[[OpenBLAS32]]
-arch = "armv6l"
-call_abi = "eabihf"
-git-tree-sha1 = "c615ae8bfc1c2182362b78a10419a5095e31dbfc"
-libc = "glibc"
-libgfortran_version = "5.0.0"
-os = "linux"
-
-    [[OpenBLAS32.download]]
-    sha256 = "df51a8923154f96a4a8d97101aa86f86b48e94a4c0486825fe4c4ae04507941e"
-    url = "https://github.com/JuliaBinaryWrappers/OpenBLAS32_jll.jl/releases/download/OpenBLAS32-v0.3.13+4/OpenBLAS32.v0.3.13.armv6l-linux-gnueabihf-libgfortran5.tar.gz"
-[[OpenBLAS32]]
-arch = "armv6l"
-call_abi = "eabihf"
-git-tree-sha1 = "3f0ceb70f9ee4721dde3e5942de892d3b0266e21"
-libc = "musl"
-libgfortran_version = "3.0.0"
-os = "linux"
-
-    [[OpenBLAS32.download]]
-    sha256 = "85ae39562e69f3a312acbf376999c26a0176206922ab75308efe125fe0605b14"
-    url = "https://github.com/JuliaBinaryWrappers/OpenBLAS32_jll.jl/releases/download/OpenBLAS32-v0.3.13+4/OpenBLAS32.v0.3.13.armv6l-linux-musleabihf-libgfortran3.tar.gz"
-[[OpenBLAS32]]
-arch = "armv6l"
-call_abi = "eabihf"
-git-tree-sha1 = "15c89ac17b0dfe0d8ed7c7cb2a55d73f8246d97d"
-libc = "musl"
-libgfortran_version = "4.0.0"
-os = "linux"
-
-    [[OpenBLAS32.download]]
-    sha256 = "ac7ae8e6c8421e17e9a35b854c6cdd2e72a73b8c2bbca18175d82a3a30f986a5"
-    url = "https://github.com/JuliaBinaryWrappers/OpenBLAS32_jll.jl/releases/download/OpenBLAS32-v0.3.13+4/OpenBLAS32.v0.3.13.armv6l-linux-musleabihf-libgfortran4.tar.gz"
-[[OpenBLAS32]]
-arch = "armv6l"
-call_abi = "eabihf"
-git-tree-sha1 = "e1de93faefcd5f7f05cc2244ede6dfc1c150a5c1"
-libc = "musl"
-libgfortran_version = "5.0.0"
-os = "linux"
-
-    [[OpenBLAS32.download]]
-    sha256 = "05df15c304eb1cf17acd5a28587edb784df452d79d01244288bb552b6c0fe4b9"
-    url = "https://github.com/JuliaBinaryWrappers/OpenBLAS32_jll.jl/releases/download/OpenBLAS32-v0.3.13+4/OpenBLAS32.v0.3.13.armv6l-linux-musleabihf-libgfortran5.tar.gz"
-[[OpenBLAS32]]
-arch = "armv7l"
-call_abi = "eabihf"
-git-tree-sha1 = "d0b149d43a18f72be7fa96c8ebb08fff5c6d3a50"
-libc = "glibc"
-libgfortran_version = "3.0.0"
-os = "linux"
-
-    [[OpenBLAS32.download]]
-    sha256 = "9e1458f0eb7d9e2d5fb0259d0f2c8b5251e7e445f86ed055ec2465d4678e3425"
-    url = "https://github.com/JuliaBinaryWrappers/OpenBLAS32_jll.jl/releases/download/OpenBLAS32-v0.3.13+4/OpenBLAS32.v0.3.13.armv7l-linux-gnueabihf-libgfortran3.tar.gz"
-[[OpenBLAS32]]
-arch = "armv7l"
-call_abi = "eabihf"
-git-tree-sha1 = "f9a50f31ac3524b0c0326c64b644b216f9ecc170"
-libc = "glibc"
-libgfortran_version = "4.0.0"
-os = "linux"
-
-    [[OpenBLAS32.download]]
-    sha256 = "e412f094c67de0f7a342465c0ba7741e0eb808f0da170f5c70c6750a40d27a38"
-    url = "https://github.com/JuliaBinaryWrappers/OpenBLAS32_jll.jl/releases/download/OpenBLAS32-v0.3.13+4/OpenBLAS32.v0.3.13.armv7l-linux-gnueabihf-libgfortran4.tar.gz"
-[[OpenBLAS32]]
-arch = "armv7l"
-call_abi = "eabihf"
-git-tree-sha1 = "e64d8f39c9f6b905ead8fc4146ec5d594715b164"
-libc = "glibc"
-libgfortran_version = "5.0.0"
-os = "linux"
-
-    [[OpenBLAS32.download]]
-    sha256 = "4e8a0fc2dac3b32613f78c655ef67287ccda5c5c2233d26a4333c425fcc19154"
-    url = "https://github.com/JuliaBinaryWrappers/OpenBLAS32_jll.jl/releases/download/OpenBLAS32-v0.3.13+4/OpenBLAS32.v0.3.13.armv7l-linux-gnueabihf-libgfortran5.tar.gz"
-[[OpenBLAS32]]
-arch = "armv7l"
-call_abi = "eabihf"
-git-tree-sha1 = "c0eeea855700c0d79790db0f9efd98d3e520fa60"
-libc = "musl"
-libgfortran_version = "3.0.0"
-os = "linux"
-
-    [[OpenBLAS32.download]]
-    sha256 = "76b93fd23828fb14f497ce8f0819bf12394cebaad12da00ed2ab2aee962bb6e2"
-    url = "https://github.com/JuliaBinaryWrappers/OpenBLAS32_jll.jl/releases/download/OpenBLAS32-v0.3.13+4/OpenBLAS32.v0.3.13.armv7l-linux-musleabihf-libgfortran3.tar.gz"
-[[OpenBLAS32]]
-arch = "armv7l"
-call_abi = "eabihf"
-git-tree-sha1 = "4b0cc79cc2246db74bc185847f6c2059f13b7488"
-libc = "musl"
-libgfortran_version = "4.0.0"
-os = "linux"
-
-    [[OpenBLAS32.download]]
-    sha256 = "f1332d398a33ad3686fb8a8f8bd83f6d40122e9e750dc470a71065e4669924cb"
-    url = "https://github.com/JuliaBinaryWrappers/OpenBLAS32_jll.jl/releases/download/OpenBLAS32-v0.3.13+4/OpenBLAS32.v0.3.13.armv7l-linux-musleabihf-libgfortran4.tar.gz"
-[[OpenBLAS32]]
-arch = "armv7l"
-call_abi = "eabihf"
-git-tree-sha1 = "c4efa623943c159336e93ee17536b227cac0e1df"
-libc = "musl"
-libgfortran_version = "5.0.0"
-os = "linux"
-
-    [[OpenBLAS32.download]]
-    sha256 = "9b251e4b220df62bcc74b9bcf15377af8a08b0d1e847c5d57b4186859afe653d"
-    url = "https://github.com/JuliaBinaryWrappers/OpenBLAS32_jll.jl/releases/download/OpenBLAS32-v0.3.13+4/OpenBLAS32.v0.3.13.armv7l-linux-musleabihf-libgfortran5.tar.gz"
-[[OpenBLAS32]]
-arch = "i686"
-git-tree-sha1 = "3eeeec91e659f6ff4a0193099587f38ce564ce55"
-libc = "glibc"
-libgfortran_version = "3.0.0"
-os = "linux"
-
-    [[OpenBLAS32.download]]
-    sha256 = "ab4a1b62728265d1417c2a947bf011942808863204302b1586ea591fb54b5cea"
-    url = "https://github.com/JuliaBinaryWrappers/OpenBLAS32_jll.jl/releases/download/OpenBLAS32-v0.3.13+4/OpenBLAS32.v0.3.13.i686-linux-gnu-libgfortran3.tar.gz"
-[[OpenBLAS32]]
-arch = "i686"
-git-tree-sha1 = "dee1f015fba8c4dbd024650e2831e148209c066c"
-libc = "glibc"
-libgfortran_version = "4.0.0"
-os = "linux"
-
-    [[OpenBLAS32.download]]
-    sha256 = "32cf4148341d4dc2cb7ad6541713bc6165c58a12cc036f4e9a663727e3b48012"
-    url = "https://github.com/JuliaBinaryWrappers/OpenBLAS32_jll.jl/releases/download/OpenBLAS32-v0.3.13+4/OpenBLAS32.v0.3.13.i686-linux-gnu-libgfortran4.tar.gz"
-[[OpenBLAS32]]
-arch = "i686"
-git-tree-sha1 = "d6654c1a6b59c18c1645c0bfef226c2625ede38f"
-libc = "glibc"
-libgfortran_version = "5.0.0"
-os = "linux"
-
-    [[OpenBLAS32.download]]
-    sha256 = "b10d282c807329280843a3ab55c123395613f0de3ea00f1710d3e40c9a0d7dc7"
-    url = "https://github.com/JuliaBinaryWrappers/OpenBLAS32_jll.jl/releases/download/OpenBLAS32-v0.3.13+4/OpenBLAS32.v0.3.13.i686-linux-gnu-libgfortran5.tar.gz"
-[[OpenBLAS32]]
-arch = "i686"
-git-tree-sha1 = "82c66f0af69784be66bad11c4937dfca1093ba9c"
-libc = "musl"
-libgfortran_version = "3.0.0"
-os = "linux"
-
-    [[OpenBLAS32.download]]
-    sha256 = "ed06d381ac00c3c6890ffd4de0e681825b5320e4cd32e7a1ae7539ca8bb76728"
-    url = "https://github.com/JuliaBinaryWrappers/OpenBLAS32_jll.jl/releases/download/OpenBLAS32-v0.3.13+4/OpenBLAS32.v0.3.13.i686-linux-musl-libgfortran3.tar.gz"
-[[OpenBLAS32]]
-arch = "i686"
-git-tree-sha1 = "106e06b02d8f9534ff8376fbb719a4258c1cf911"
-libc = "musl"
-libgfortran_version = "4.0.0"
-os = "linux"
-
-    [[OpenBLAS32.download]]
-    sha256 = "e568e92f636275497fdbd3dc6a234afc617099271776e9ae791a567c284d0a76"
-    url = "https://github.com/JuliaBinaryWrappers/OpenBLAS32_jll.jl/releases/download/OpenBLAS32-v0.3.13+4/OpenBLAS32.v0.3.13.i686-linux-musl-libgfortran4.tar.gz"
-[[OpenBLAS32]]
-arch = "i686"
-git-tree-sha1 = "4006236da1cf27da0f42817fce9592b3868272e8"
-libc = "musl"
-libgfortran_version = "5.0.0"
-os = "linux"
-
-    [[OpenBLAS32.download]]
-    sha256 = "2680fb754523dc1f983fc3fffe3066674eb12e6cf371c21ef8306226f7b9db29"
-    url = "https://github.com/JuliaBinaryWrappers/OpenBLAS32_jll.jl/releases/download/OpenBLAS32-v0.3.13+4/OpenBLAS32.v0.3.13.i686-linux-musl-libgfortran5.tar.gz"
-[[OpenBLAS32]]
-arch = "i686"
-git-tree-sha1 = "2b54484e7aa1a11c889d3c28bdb7c162a75eba00"
-libgfortran_version = "3.0.0"
-os = "windows"
-
-    [[OpenBLAS32.download]]
-    sha256 = "4efa2de62e09fd62f709055115814a49a04c690d366210a5e53fd118ae3a0edc"
-    url = "https://github.com/JuliaBinaryWrappers/OpenBLAS32_jll.jl/releases/download/OpenBLAS32-v0.3.13+4/OpenBLAS32.v0.3.13.i686-w64-mingw32-libgfortran3.tar.gz"
-[[OpenBLAS32]]
-arch = "i686"
-git-tree-sha1 = "fee6dc4a65bf6ae05ab2830bcd3de6c66ff7f6d1"
-libgfortran_version = "4.0.0"
-os = "windows"
-
-    [[OpenBLAS32.download]]
-    sha256 = "baccc902f3e0a57276f6682d8cb491059678c71811f0564e28264a69f9c2cf0e"
-    url = "https://github.com/JuliaBinaryWrappers/OpenBLAS32_jll.jl/releases/download/OpenBLAS32-v0.3.13+4/OpenBLAS32.v0.3.13.i686-w64-mingw32-libgfortran4.tar.gz"
-[[OpenBLAS32]]
-arch = "i686"
-git-tree-sha1 = "83d52f15226fd6f94451e3957c2ba99ebbe3ba5c"
-libgfortran_version = "5.0.0"
-os = "windows"
-
-    [[OpenBLAS32.download]]
-    sha256 = "af6a48c6c3a8b5c095a7af23ce122a3d5e6861fa1def0779fe4aa63e125f5549"
-    url = "https://github.com/JuliaBinaryWrappers/OpenBLAS32_jll.jl/releases/download/OpenBLAS32-v0.3.13+4/OpenBLAS32.v0.3.13.i686-w64-mingw32-libgfortran5.tar.gz"
-[[OpenBLAS32]]
-arch = "powerpc64le"
-git-tree-sha1 = "e89eb724d8323145816983deb23343b8bcdc43d4"
-libc = "glibc"
-libgfortran_version = "3.0.0"
-os = "linux"
-
-    [[OpenBLAS32.download]]
-    sha256 = "a2fb59d7eb66798f00ebf188465016193ad0bc89d6d7f1c150c20f757126a684"
-    url = "https://github.com/JuliaBinaryWrappers/OpenBLAS32_jll.jl/releases/download/OpenBLAS32-v0.3.13+4/OpenBLAS32.v0.3.13.powerpc64le-linux-gnu-libgfortran3.tar.gz"
-[[OpenBLAS32]]
-arch = "powerpc64le"
-git-tree-sha1 = "ec7003254493c0976b7600612e5e6a46d549baae"
-libc = "glibc"
-libgfortran_version = "4.0.0"
-os = "linux"
-
-    [[OpenBLAS32.download]]
-    sha256 = "e9a6144c5771e438d5d447d25743651fcc97ee88966bacf54921f94448ac9625"
-    url = "https://github.com/JuliaBinaryWrappers/OpenBLAS32_jll.jl/releases/download/OpenBLAS32-v0.3.13+4/OpenBLAS32.v0.3.13.powerpc64le-linux-gnu-libgfortran4.tar.gz"
-[[OpenBLAS32]]
-arch = "powerpc64le"
-git-tree-sha1 = "2b3cb787a8bec8833e263f08f44732e8c811f68c"
-libc = "glibc"
-libgfortran_version = "5.0.0"
-os = "linux"
-
-    [[OpenBLAS32.download]]
-    sha256 = "1d419e0690db24dd5146a5fe1c9967acc76f0c2a47d11879ea3f0ca8320e8542"
-    url = "https://github.com/JuliaBinaryWrappers/OpenBLAS32_jll.jl/releases/download/OpenBLAS32-v0.3.13+4/OpenBLAS32.v0.3.13.powerpc64le-linux-gnu-libgfortran5.tar.gz"
-[[OpenBLAS32]]
-arch = "x86_64"
-git-tree-sha1 = "74208dfd993c64ab5d9e980a66122f47d87741d2"
-libgfortran_version = "3.0.0"
-os = "macos"
-
-    [[OpenBLAS32.download]]
-    sha256 = "c28e72ce9333852d1ce99f51ada4ce17fa6a5ded72f39d8f089b7b5706b0bb35"
-    url = "https://github.com/JuliaBinaryWrappers/OpenBLAS32_jll.jl/releases/download/OpenBLAS32-v0.3.13+4/OpenBLAS32.v0.3.13.x86_64-apple-darwin-libgfortran3.tar.gz"
-[[OpenBLAS32]]
-arch = "x86_64"
-git-tree-sha1 = "44a6a955857d6c9528380281c8ae2ba9f5d35479"
-libgfortran_version = "4.0.0"
-os = "macos"
-
-    [[OpenBLAS32.download]]
-    sha256 = "4ffa2494be1ab5422b546e5e2763445b3ffae8a7ccecaa7082982087b0555406"
-    url = "https://github.com/JuliaBinaryWrappers/OpenBLAS32_jll.jl/releases/download/OpenBLAS32-v0.3.13+4/OpenBLAS32.v0.3.13.x86_64-apple-darwin-libgfortran4.tar.gz"
-[[OpenBLAS32]]
-arch = "x86_64"
-git-tree-sha1 = "7eeba3da309fa4ef906ef44ed54ff4a525e8bbb7"
-libgfortran_version = "5.0.0"
-os = "macos"
-
-    [[OpenBLAS32.download]]
-    sha256 = "402d8e5b10fd60a9008a574ea547d2df85579b277baefeaca67830704491b125"
-    url = "https://github.com/JuliaBinaryWrappers/OpenBLAS32_jll.jl/releases/download/OpenBLAS32-v0.3.13+4/OpenBLAS32.v0.3.13.x86_64-apple-darwin-libgfortran5.tar.gz"
-[[OpenBLAS32]]
-arch = "x86_64"
-git-tree-sha1 = "e9fe5e94aaaf74b0887b2d6ab71c304ba6a12fe8"
-libc = "glibc"
-libgfortran_version = "3.0.0"
-os = "linux"
-
-    [[OpenBLAS32.download]]
-    sha256 = "71427d0ea17e1507c961fee78eb9b87dd0bb297f9c118af2f7e86af7a921773e"
-    url = "https://github.com/JuliaBinaryWrappers/OpenBLAS32_jll.jl/releases/download/OpenBLAS32-v0.3.13+4/OpenBLAS32.v0.3.13.x86_64-linux-gnu-libgfortran3.tar.gz"
-[[OpenBLAS32]]
-arch = "x86_64"
-git-tree-sha1 = "a7ee97c9579eca581364592cfb5cc69917dad67e"
-libc = "glibc"
-libgfortran_version = "4.0.0"
-os = "linux"
-
-    [[OpenBLAS32.download]]
-    sha256 = "a193baae5a3f3764b25d8dc4be920c5e88b83db2af8c58c5e82c37237e24771f"
-    url = "https://github.com/JuliaBinaryWrappers/OpenBLAS32_jll.jl/releases/download/OpenBLAS32-v0.3.13+4/OpenBLAS32.v0.3.13.x86_64-linux-gnu-libgfortran4.tar.gz"
-[[OpenBLAS32]]
-arch = "x86_64"
-git-tree-sha1 = "3a4a848f317187b00e3bd14f1f01d314dea276aa"
-libc = "glibc"
-libgfortran_version = "5.0.0"
-os = "linux"
-
-    [[OpenBLAS32.download]]
-    sha256 = "4d6ab17d0b1c51d17c4b1890a59d4f72edbf7f9e6dd6b5a37e2a2df5eb55c477"
-    url = "https://github.com/JuliaBinaryWrappers/OpenBLAS32_jll.jl/releases/download/OpenBLAS32-v0.3.13+4/OpenBLAS32.v0.3.13.x86_64-linux-gnu-libgfortran5.tar.gz"
-[[OpenBLAS32]]
-arch = "x86_64"
-git-tree-sha1 = "7bd6b209099e251b424a251117b8b2dfdca27d2a"
-libc = "musl"
-libgfortran_version = "3.0.0"
-os = "linux"
-
-    [[OpenBLAS32.download]]
-    sha256 = "c6996798e76f77a539457a1380e91015487c7358b502077fe74beafbe736d752"
-    url = "https://github.com/JuliaBinaryWrappers/OpenBLAS32_jll.jl/releases/download/OpenBLAS32-v0.3.13+4/OpenBLAS32.v0.3.13.x86_64-linux-musl-libgfortran3.tar.gz"
-[[OpenBLAS32]]
-arch = "x86_64"
-git-tree-sha1 = "1d067cd563f248cf4a144fb8c51964f974c20424"
-libc = "musl"
-libgfortran_version = "4.0.0"
-os = "linux"
-
-    [[OpenBLAS32.download]]
-    sha256 = "156d4ebdccc7f271fecb1bd58b8be62eb938b57123f812ac68219ebe110923a4"
-    url = "https://github.com/JuliaBinaryWrappers/OpenBLAS32_jll.jl/releases/download/OpenBLAS32-v0.3.13+4/OpenBLAS32.v0.3.13.x86_64-linux-musl-libgfortran4.tar.gz"
-[[OpenBLAS32]]
-arch = "x86_64"
-git-tree-sha1 = "f6ee8313bbb68b68326bc0e347e61b734ba422a0"
-libc = "musl"
-libgfortran_version = "5.0.0"
-os = "linux"
-
-    [[OpenBLAS32.download]]
-    sha256 = "e472eaed8d8c4f9752a41e9a05d571fa681e13426a5ac20b8280a697d021f4d4"
-    url = "https://github.com/JuliaBinaryWrappers/OpenBLAS32_jll.jl/releases/download/OpenBLAS32-v0.3.13+4/OpenBLAS32.v0.3.13.x86_64-linux-musl-libgfortran5.tar.gz"
-[[OpenBLAS32]]
-arch = "x86_64"
-git-tree-sha1 = "bbaf9d77a94f2a1aa94ff4c3f1adeb4b192824fe"
-libgfortran_version = "3.0.0"
-os = "freebsd"
-
-    [[OpenBLAS32.download]]
-    sha256 = "eb306ef0b05cbab95e73455ba0c22b912b6b6e09e3204cba41739aaded616bc3"
-    url = "https://github.com/JuliaBinaryWrappers/OpenBLAS32_jll.jl/releases/download/OpenBLAS32-v0.3.13+4/OpenBLAS32.v0.3.13.x86_64-unknown-freebsd-libgfortran3.tar.gz"
-[[OpenBLAS32]]
-arch = "x86_64"
-git-tree-sha1 = "fad138f920e4ca8f2500b10506d2aa932c63b1a6"
-libgfortran_version = "4.0.0"
-os = "freebsd"
-
-    [[OpenBLAS32.download]]
-    sha256 = "f74b6849fdc0f0677fc838e9b12fe6fe528f1badc3cfd3da1cde7eddc1c5d4b9"
-    url = "https://github.com/JuliaBinaryWrappers/OpenBLAS32_jll.jl/releases/download/OpenBLAS32-v0.3.13+4/OpenBLAS32.v0.3.13.x86_64-unknown-freebsd-libgfortran4.tar.gz"
-[[OpenBLAS32]]
-arch = "x86_64"
-git-tree-sha1 = "22b96a054056ba56b8a23a193ed32627cf0a86a1"
-libgfortran_version = "5.0.0"
-os = "freebsd"
-
-    [[OpenBLAS32.download]]
-    sha256 = "cabb445276eea54252986d38943527e3673798132f728b7585c614a00321e740"
-    url = "https://github.com/JuliaBinaryWrappers/OpenBLAS32_jll.jl/releases/download/OpenBLAS32-v0.3.13+4/OpenBLAS32.v0.3.13.x86_64-unknown-freebsd-libgfortran5.tar.gz"
-[[OpenBLAS32]]
-arch = "x86_64"
-git-tree-sha1 = "76a336456d732ad6c81382fd7d3d4e7d2a704d71"
-libgfortran_version = "3.0.0"
-os = "windows"
-
-    [[OpenBLAS32.download]]
-    sha256 = "23313edd4ec55a6fe4ee02558a2e7f2be3e6e5fd2e61b72f4b285242d85486d6"
-    url = "https://github.com/JuliaBinaryWrappers/OpenBLAS32_jll.jl/releases/download/OpenBLAS32-v0.3.13+4/OpenBLAS32.v0.3.13.x86_64-w64-mingw32-libgfortran3.tar.gz"
-[[OpenBLAS32]]
-arch = "x86_64"
-git-tree-sha1 = "720b1e4b97f41280b92f32fee53b003c34104dc7"
-libgfortran_version = "4.0.0"
-os = "windows"
-
-    [[OpenBLAS32.download]]
-    sha256 = "08e91b6b274fc770722308f69352ce79412668343de61e00bb533c9d2ccfdc64"
-    url = "https://github.com/JuliaBinaryWrappers/OpenBLAS32_jll.jl/releases/download/OpenBLAS32-v0.3.13+4/OpenBLAS32.v0.3.13.x86_64-w64-mingw32-libgfortran4.tar.gz"
-[[OpenBLAS32]]
-arch = "x86_64"
-git-tree-sha1 = "4643110eab7822d8ab54d0d51589c6925b087323"
-libgfortran_version = "5.0.0"
-os = "windows"
-
-    [[OpenBLAS32.download]]
-    sha256 = "499fd4686fcf399113eadc4a6e23149147952a4a13fa57231ab72d4c73b1acb6"
-    url = "https://github.com/JuliaBinaryWrappers/OpenBLAS32_jll.jl/releases/download/OpenBLAS32-v0.3.13+4/OpenBLAS32.v0.3.13.x86_64-w64-mingw32-libgfortran5.tar.gz"
-
-[[MKL]]
-arch = "x86_64"
-git-tree-sha1 = "c284c144ceaa0b7ab2811b3726c4efc008a13bf0"
-lazy = true
-libc = "glibc"
-os = "linux"
-
-    [[MKL.download]]
-    sha256 = "28fafe3bf0a5d9025dba311b2a152fac5da1fc4050659cd0cb423591eb96f3ca"
-    url = "https://github.com/JuliaBinaryWrappers/MKL_jll.jl/releases/download/MKL-v2022.0.0+0/MKL.v2022.0.0.x86_64-linux-gnu.tar.gz"
-[[MKL]]
-arch = "i686"
-git-tree-sha1 = "169de1a9101def0abb5bd1e42838dfbd84c800c6"
-lazy = true
-libc = "glibc"
-os = "linux"
-
-    [[MKL.download]]
-    sha256 = "db2e00483b159ecdcdccbeec8a36c269b7907c3e2dd629e07e6c8eaf69d6d8d7"
-    url = "https://github.com/JuliaBinaryWrappers/MKL_jll.jl/releases/download/MKL-v2022.0.0+0/MKL.v2022.0.0.i686-linux-gnu.tar.gz"
-[[MKL]]
-arch = "x86_64"
-git-tree-sha1 = "5be07e58a7aedaa7b3028bed3cb71e2f05c76c2d"
-lazy = true
-os = "macos"
-
-    [[MKL.download]]
-    sha256 = "b60065229efa1f795630ab7c0033d81213a8507bd616cc03f611c02d7040c17a"
-    url = "https://github.com/JuliaBinaryWrappers/MKL_jll.jl/releases/download/MKL-v2022.0.0+0/MKL.v2022.0.0.x86_64-apple-darwin.tar.gz"
-[[MKL]]
-arch = "i686"
-git-tree-sha1 = "ee1f9da32eace02b0d1042d03186111e83474966"
-lazy = true
-os = "windows"
-
-    [[MKL.download]]
-    sha256 = "d549e44f06276963c85210362d5b4b5505cc93e085e24018ddd1671069506d7c"
-    url = "https://github.com/JuliaBinaryWrappers/MKL_jll.jl/releases/download/MKL-v2022.0.0+0/MKL.v2022.0.0.i686-w64-mingw32.tar.gz"
-[[MKL]]
-arch = "x86_64"
-git-tree-sha1 = "6523dce43888d76b70c34c4c68ce2ee4e02970e7"
-lazy = true
-os = "windows"
-
-    [[MKL.download]]
-    sha256 = "61e0605b102c9f434abcb1798e25005eac32e284a2bf120740d09589827fb986"
-    url = "https://github.com/JuliaBinaryWrappers/MKL_jll.jl/releases/download/MKL-v2022.0.0+0/MKL.v2022.0.0.x86_64-w64-mingw32.tar.gz"
-
-
-[[IntelOpenMP]]
-arch = "x86_64"
-git-tree-sha1 = "e76af028a823f7e7c18226c8079d03035c2e4c46"
-os = "macos"
-
-    [[IntelOpenMP.download]]
-    sha256 = "a6b58de42a450fa0253bbd417572e8f1549ebe5aaddae3bf088b2e29007de1bd"
-    url = "https://github.com/JuliaBinaryWrappers/IntelOpenMP_jll.jl/releases/download/IntelOpenMP-v2018.0.3+2/IntelOpenMP.v2018.0.3.x86_64-apple-darwin.tar.gz"
-[[IntelOpenMP]]
-arch = "i686"
-git-tree-sha1 = "2abeb9a6b565e7cb08542447d6c05b28c638f4a1"
-libc = "glibc"
-os = "linux"
-
-    [[IntelOpenMP.download]]
-    sha256 = "9e5659180ce6f80e771f436a840de8355f998fe3cae8dbfc1f9634c9dabca7ae"
-    url = "https://github.com/JuliaBinaryWrappers/IntelOpenMP_jll.jl/releases/download/IntelOpenMP-v2018.0.3+2/IntelOpenMP.v2018.0.3.i686-linux-gnu.tar.gz"
-[[IntelOpenMP]]
-arch = "i686"
-git-tree-sha1 = "1aef321e2cf79fc656be96b261396342d8f6d017"
-os = "windows"
-
-    [[IntelOpenMP.download]]
-    sha256 = "f9999c7d9ccc222f90db23513dc0b28d19bd9b92015d24b16640378e2a753b5b"
-    url = "https://github.com/JuliaBinaryWrappers/IntelOpenMP_jll.jl/releases/download/IntelOpenMP-v2018.0.3+2/IntelOpenMP.v2018.0.3.i686-w64-mingw32.tar.gz"
-[[IntelOpenMP]]
-arch = "x86_64"
-git-tree-sha1 = "947793e42b663bacd09f00d96aa96a47095f3b1c"
-libc = "glibc"
-os = "linux"
-
-    [[IntelOpenMP.download]]
-    sha256 = "f40e3d5f625eb5ba907c9b73fd8d4adf5e74dadbc0022f5be2185e93228d0703"
-    url = "https://github.com/JuliaBinaryWrappers/IntelOpenMP_jll.jl/releases/download/IntelOpenMP-v2018.0.3+2/IntelOpenMP.v2018.0.3.x86_64-linux-gnu.tar.gz"
-[[IntelOpenMP]]
-arch = "x86_64"
-git-tree-sha1 = "e32fbb6cb3c81caf8ba4aecdf54a6fc215996b78"
-os = "windows"
-
-    [[IntelOpenMP.download]]
-    sha256 = "684f9554587017d5487a9121ff10c173b4e232842f462fe25ce4fb279e84a5e7"
-    url = "https://github.com/JuliaBinaryWrappers/IntelOpenMP_jll.jl/releases/download/IntelOpenMP-v2018.0.3+2/IntelOpenMP.v2018.0.3.x86_64-w64-mingw32.tar.gz"
diff --git a/lib/MadNLPHSL/Project.toml b/lib/MadNLPHSL/Project.toml
index 9b5a8b07..bdd5b657 100644
--- a/lib/MadNLPHSL/Project.toml
+++ b/lib/MadNLPHSL/Project.toml
@@ -1,23 +1,21 @@
 name = "MadNLPHSL"
 uuid = "7fb6135f-58fe-4112-84ca-653cf5be0c77"
-authors = ["Sungho Shin <sungho.shin.ss@gmail.com>"]
-version = "0.3.1"
+version = "0.5.2"
 
 [deps]
+HSL = "34c5aeac-e683-54a6-a0e9-6e0fdc586c50"
+LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
 MadNLP = "2621e9c9-9eb4-46b1-8089-e8c72242dfb6"
-Pkg = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f"
-BinaryProvider = "b99e7846-7c00-51b0-8f62-c81ae34c0232"
-Libdl = "8f399da3-3557-5675-b5ff-fb832c97cbdb"
 
 [compat]
-MadNLP = "0.5, 0.6"
-MadNLPTests = "~0.3"
-BinaryProvider = "0.5"
+HSL = "0.4.2"
+MadNLP = "0.5,0.6,0.7,0.8"
+MadNLPTests = "0.5"
 julia = "1.6"
 
 [extras]
-Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
 MadNLPTests = "b52a2a03-04ab-4a5f-9698-6a2deff93217"
+Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
 
 [targets]
-test = ["Test","MadNLPTests"]
+test = ["Test", "MadNLPTests"]
diff --git a/lib/MadNLPHSL/README.md b/lib/MadNLPHSL/README.md
index 2fb8cec1..8596ff58 100644
--- a/lib/MadNLPHSL/README.md
+++ b/lib/MadNLPHSL/README.md
@@ -2,40 +2,21 @@
 ```julia
 pkg> add MadNLPHSL
 ```
+Obtain a license and download HSL_jll.jl from https://licences.stfc.ac.uk/product/libhsl.
 
-## Build
+There are two versions available: LBT and OpenBLAS.
+LBT is the recommended option for Julia ≥ v1.9.
 
-To build MadNLP with HSL linear solvers (Ma27, Ma57, Ma77, Ma86, Ma97), the source codes need to be obtained by the user from <http://www.hsl.rl.ac.uk/ipopt/> under Coin-HSL Full (Stable). The source codes are distribted as a tarball file `coinhsl-*.tar.gz`. The absolute path to the extracted source code or the complied library should be provided to the user. If the user has an already compiled HSL sovler library, one can simply provide a path to that shared library.In this case, the source code is not compiled and the provided shared library is directly used.
-```julia
-# at least one of the following should be given
-julia> ENV["MADNLP_HSL_SOURCE_PATH"] = "/opt/coinhsl" 
-julia> ENV["MADNLP_MA27_SOURCE_PATH"] = "/opt/coinhsl-archive-2021.05.05" 
-julia> ENV["MADNLP_MA57_SOURCE_PATH"] = "/opt/ma57-3.11.0/" 
-julia> ENV["MADNLP_MA77_SOURCE_PATH"] = "/opt/hsl_ma77-6.3.0" 
-julia> ENV["MADNLP_MA86_SOURCE_PATH"] = "/opt/hsl_ma86-1.7.2" 
-julia> ENV["MADNLP_MA97_SOURCE_PATH"] = "/opt/hsl_ma97-2.7.1" 
+Install this download into your current environment using:
 
-julia> ENV["MADNLP_HSL_LIBRARY_PATH"] = "/usr/lib/libcoinhsl.so"
-julia> ENV["MADNLP_MA27_LIBRARY_PATH"] = "/usr/lib/libma27.so"
-julia> ENV["MADNLP_MA57_LIBRARY_PATH"] = "/usr/lib/libma57.so"
-julia> ENV["MADNLP_MA77_LIBRARY_PATH"] = "/usr/lib/libma77.so"
-julia> ENV["MADNLP_MA86_LIBRARY_PATH"] = "/usr/lib/libma86.so"
-julia> ENV["MADNLP_MA97_LIBRARY_PATH"] = "/usr/lib/libma97.so"
-# optionally, one can specify
-julia> ENV["MADNLP_HSL_BLAS"] = "mkl" # default is "openblas"
-```
-After obtaining the source code or the libarary, run
 ```julia
-pkg> build MadNLPHSL
+import Pkg
+Pkg.develop(path = "/full/path/to/HSL_jll.jl")
 ```
 
-If HSL is built from the source code, the build process requires a Fortran compiler. If they are not installed, do:
-```julia
-shell> sudo apt install gfortran # Ubuntu, Debian
-shell> brew cask install gfortran # MacOS
+Alternatively, one can use a custom-compiled HSL library by overriding the `HSL_jll.jl` artifact.
+This can be done by editing `~/.julia/artifacts/Overrides.toml`
 ```
-The compiler can be customized by:
-```julia
-julia> ENV["MADNLP_FC"] = "/usr/local/bin/gfortran" # default is "gfortran"
+# replace HSL_jll.jl artifact /usr/local/lib/libhsl.so
+ecece3e2c69a413a0e935cf52e03a3ad5492e137 = "/usr/local"
 ```
-
diff --git a/lib/MadNLPHSL/deps/build.jl b/lib/MadNLPHSL/deps/build.jl
deleted file mode 100644
index 50862e9c..00000000
--- a/lib/MadNLPHSL/deps/build.jl
+++ /dev/null
@@ -1,147 +0,0 @@
-using Pkg.Artifacts, BinaryProvider
-
-if haskey(ENV,"MADNLP_HSL_BLAS") && ENV["MADNLP_HSL_BLAS"]=="mkl"
-     blasvendor = :mkl
-else
-     blasvendor = :openblas
- end
-
-const verbose = "--verbose" in ARGS
-const prefix = Prefix(@__DIR__)
-const so = BinaryProvider.platform_dlext()
-const rpath = `-Wl,-rpath,`
-const whole_archive= Sys.isapple() ? `-Wl,-all_load` : `-Wl,--whole-archive`
-const no_whole_archive = Sys.isapple() ? `-Wl,-noall_load` : `-Wl,--no-whole-archive`
-const libdir     = mkpath(joinpath(@__DIR__, "lib"))
-const FC = haskey(ENV,"MADNLP_FC") ? ENV["MADNLP_FC"] : `gfortran`
-const libmetis_dir = joinpath(artifact"METIS", "lib")
-const with_metis = `-L$libmetis_dir $rpath$libmetis_dir -lmetis`
-if blasvendor == :mkl 
-    const libblas_dir = joinpath(artifact"MKL","lib")
-    const libopenmp_dir = joinpath(artifact"IntelOpenMP","lib")
-    const with_blas = `-L$libblas_dir $rpath$libblas_dir -lmkl_rt -L$libopenmp_dir $rpath$libopenmp_dir -liomp5`
-else
-    const libblas_dir = joinpath(artifact"OpenBLAS32","lib")
-    const with_blas = `-L$libblas_dir $rpath$libblas_dir -lopenblas`
-end
-
-const supported_library = [
-    (:libhsl, "MADNLP_HSL_LIBRARY_PATH", "MADNLP_HSL_SOURCE_PATH")
-    (:libma27, "MADNLP_MA27_LIBRARY_PATH", "MADNLP_MA27_SOURCE_PATH")
-    (:libma57, "MADNLP_MA57_LIBRARY_PATH", "MADNLP_MA57_SOURCE_PATH")
-    (:libma77, "MADNLP_MA77_LIBRARY_PATH", "MADNLP_MA77_SOURCE_PATH")
-    (:libma86, "MADNLP_MA86_LIBRARY_PATH", "MADNLP_MA86_SOURCE_PATH")
-    (:libma97, "MADNLP_MA97_LIBRARY_PATH", "MADNLP_MA97_SOURCE_PATH")
-]
-
-const targets_dict = Dict(
-    :libhsl=> [
-        "deps.f",
-        "deps90.f90",
-        "ma27d.f",
-        "ma57d.f",
-        "hsl_ma77d.f90",
-        "hsl_ma86d.f90",
-        "hsl_ma97d.f90",
-        "hsl_mc68i_ciface.f90",
-        "hsl_ma77d_ciface.f90",
-        "hsl_ma86d_ciface.f90",
-        "hsl_ma97d_ciface.f90",
-    ],
-    :libma27 => [
-        "deps.f",
-        "ma27d.f",
-        "ma27s.f",
-    ],
-    :libma57 => [
-        "sdeps.f", "ddeps.f",
-        "ma57d.f", "ma57s.f", 
-    ],
-    :libma77 => [
-        "common.f", "common90.f90",
-        "ddeps90.f90", "sdeps90.f90", 
-        "hsl_ma77d.f90", "hsl_ma77s.f90",
-        "hsl_ma77d_ciface.f90", "hsl_ma77s_ciface.f90",
-    ],
-    :libma86 => [
-        "common.f", "common90.f90",
-        "sdeps90.f90",
-        "hsl_ma86d.f90", "hsl_ma86s.f90",
-        "hsl_ma86d_ciface.f90", "hsl_ma86s_ciface.f90",
-        "hsl_mc68i_ciface.f90",
-    ],
-    :libma97 => [
-        "common.f", "common90.f90",
-        "sdeps90.f90", "ddeps90.f90",
-        "hsl_ma97d.f90", "hsl_ma97s.f90",
-        "hsl_ma97d_ciface.f90", "hsl_ma97s_ciface.f90",
-    ]
-)
-
-rm(libdir;recursive=true,force=true)
-mkpath(libdir)
-isvalid(cmd::Cmd)=(try run(cmd) catch e return false end; return true)
-
-
-# HSL
-attempted = Tuple{Symbol,Product}[]
-
-for (lib, envlib, envsrc) in supported_library
-    if haskey(ENV,envlib)
-        push!(attempted, (lib,FileProduct(ENV[envlib], lib)))
-    elseif haskey(ENV,envsrc) && isvalid(`$FC --version`)
-        @info "Compiling $lib"
-        source_path = ENV[envsrc]
-        targets = targets_dict[lib]
-        
-        copied_path = joinpath(@__DIR__, "src", string(lib))
-        mkpath(joinpath(@__DIR__,"src"))
-        cp(source_path, copied_path; force = true)
-        cd(copied_path)
-
-        list = []
-        for (root, dir, files) in walkdir(copied_path)
-            for file in files
-                if file in targets
-                    @info "$file source code detected."
-                    push!(list, (root, dir, file))
-                end
-            end
-        end
-
-        succeeded = []
-        
-        for target in targets
-            for (root, dir, file) in list
-                if file == target
-                    name, ext = splitext(relpath(joinpath(root,file),copied_path))
-                    isvalid(`$FC -fopenmp -fPIC -c -O3 -o $name.o $name$ext`)
-                    push!(succeeded, (name, ext))
-                end
-            end
-        end
-        
-
-        cmd = `$FC -o$(libdir)/$lib.$so -shared -fPIC -O3 -fopenmp`
-        append!(cmd.exec, ["$name.o" for (name,ext) in succeeded])
-        append!(cmd.exec, with_metis.exec)
-        append!(cmd.exec, with_blas.exec)
-        
-        run(cmd)
-        cd("$(@__DIR__)")
-        push!(attempted, (lib,FileProduct(prefix,joinpath(libdir,"$lib.$so"), lib)))
-    end
-end
-
-# write deps.jl
-succeeded = Product[]
-for (lib, product) in attempted
-    if satisfied(product)
-        @info "Building $lib succeeded."
-        push!(succeeded, product)
-    else
-        @error "Building $lib failed."
-    end
-end
-
-write_deps_file(joinpath(@__DIR__, "deps.jl"), succeeded, verbose=verbose)
diff --git a/lib/MadNLPHSL/src/MadNLPHSL.jl b/lib/MadNLPHSL/src/MadNLPHSL.jl
index 72512f39..8bfd265b 100644
--- a/lib/MadNLPHSL/src/MadNLPHSL.jl
+++ b/lib/MadNLPHSL/src/MadNLPHSL.jl
@@ -1,64 +1,32 @@
 module MadNLPHSL
 
-import Libdl: dlopen, RTLD_DEEPBIND
-import MadNLP: @kwdef, MadNLPLogger, @debug, @warn, @error,
+import MadNLP: MadNLP, @kwdef, MadNLPLogger, @debug, @warn, @error,
     AbstractOptions, AbstractLinearSolver, set_options!, SparseMatrixCSC, SubVector,
     SymbolicException,FactorizationException,SolveException,InertiaException,
     introduce, factorize!, solve!, improve!, is_inertia, inertia, findIJ, nnz,
     get_tril_to_full, transfer!, input_type, _madnlp_unsafe_wrap,
     is_supported, default_options
 
-include(joinpath("..","deps","deps.jl"))
+import HSL
+import HSL: mc68_control, mc68_info, ma77_control, ma77_info, ma86_control, ma86_info, ma97_control, ma97_info
 
-include("common.jl")
-include("mc68.jl")
-
-if @isdefined(libhsl)
-    @isdefined(libma27) || const libma27 = libhsl
-    @isdefined(libma57) || const libma57 = libhsl
-    @isdefined(libma77) || const libma77 = libhsl
-    @isdefined(libma86) || const libma86 = libhsl
-    @isdefined(libma97) || const libma97 = libhsl
-end
-
-if @isdefined(libma27)
-    include("ma27.jl")
-    export Ma27Solver
-end
-
-if @isdefined(libma57)
-    include("ma57.jl")
-    export Ma57Solver
-end
+import LinearAlgebra
 
-if @isdefined(libma77)
-    include("ma77.jl")
-    export Ma77Solver
-end
-
-if @isdefined(libma86)
-    include("ma86.jl")
-    export Ma86Solver
-end
-
-if @isdefined(libma97)
-    include("ma97.jl")
-    export Ma97Solver
-end
-
-function __init__()
-    check_deps()
-    try
-        @isdefined(libhsl)  && dlopen(libhsl,RTLD_DEEPBIND)
-        @isdefined(libma27) && dlopen(libma27,RTLD_DEEPBIND)
-        @isdefined(libma77) && dlopen(libma57,RTLD_DEEPBIND)
-        @isdefined(libma77) && dlopen(libma77,RTLD_DEEPBIND)
-        @isdefined(libma86) && dlopen(libma77,RTLD_DEEPBIND)
-        @isdefined(libma97) && dlopen(libma97,RTLD_DEEPBIND)
-    catch e
-        println("HSL shared library cannot be loaded")
+include("common.jl")
+include("ma27.jl")
+include("ma57.jl")
+include("ma77.jl")
+include("ma86.jl")
+include("ma97.jl")
+
+export Ma27Solver, Ma57Solver, Ma77Solver, Ma86Solver, Ma97Solver
+
+# re-export MadNLP, including deprecated names
+for name in names(MadNLP, all=true)
+    if Base.isexported(MadNLP, name)
+        @eval using MadNLP: $(name)
+        @eval export $(name)
     end
 end
 
-
 end # module
diff --git a/lib/MadNLPHSL/src/ma27.jl b/lib/MadNLPHSL/src/ma27.jl
index 92539efa..9011100c 100644
--- a/lib/MadNLPHSL/src/ma27.jl
+++ b/lib/MadNLPHSL/src/ma27.jl
@@ -38,8 +38,8 @@ end
 
 
 for (fa, fb, fc, typ) in [
-    (:ma27ad_,:ma27bd_,:ma27cd_,Float64),
-    (:ma27a_,:ma27b_,:ma27c_,Float32)
+    (:ma27ad, :ma27bd, :ma27cd, Float64),
+    (:ma27a , :ma27b , :ma27c , Float32)
     ]
     @eval begin
         ma27a!(
@@ -47,44 +47,21 @@ for (fa, fb, fc, typ) in [
             iw::Vector{Cint},liw::Cint,ikeep::Vector{Cint},iw1::Vector{Cint},
             nsteps::Vector{Cint},iflag::Cint,icntl::Vector{Cint},cntl::Vector{$typ},
             info::Vector{Cint},ops::$typ
-        ) = ccall(
-            ($(string(fa)),libma27),
-            Nothing,
-            (Ref{Cint},Ref{Cint},Ptr{Cint},Ptr{Cint},
-             Ptr{Cint},Ref{Cint},Ptr{Cint},Ptr{Cint},
-             Ptr{Cint},Ref{Cint},Ptr{Cint},Ptr{$typ},
-             Ptr{Cint},Ref{$typ}),
-            n,nz,I,J,iw,liw,ikeep,iw1,nsteps,iflag,icntl,cntl,info,ops
-        )
+        ) = HSL.$fa(n,nz,I,J,iw,liw,ikeep,iw1,nsteps,iflag,icntl,cntl,info,ops)
 
         ma27b!(
             n::Cint,nz::Cint,I::Vector{Cint},J::Vector{Cint},
             a::Vector{$typ},la::Cint,iw::Vector{Cint},liw::Cint,
             ikeep::Vector{Cint},nsteps::Vector{Cint},maxfrt::Vector{Cint},iw1::Vector{Cint},
             icntl::Vector{Cint},cntl::Vector{$typ},info::Vector{Cint}
-        ) = ccall(
-            ($(string(fb)),libma27),
-            Nothing,
-            (Ref{Cint},Ref{Cint},Ptr{Cint},Ptr{Cint},
-             Ptr{$typ},Ref{Cint},Ptr{Cint},Ref{Cint},
-             Ptr{Cint},Ptr{Cint},Ptr{Cint},Ptr{Cint},
-             Ptr{Cint},Ptr{$typ},Ptr{Cint}),
-            n,nz,I,J,a,la,iw,liw,ikeep,nsteps,maxfrt,iw1,icntl,cntl,info
-        )
+        ) = HSL.$fb(n,nz,I,J,a,la,iw,liw,ikeep,nsteps,maxfrt,iw1,icntl,cntl,info)
 
         ma27c!(
             n::Cint,a::Vector{$typ},la::Cint,iw::Vector{Cint},
             liw::Cint,w::Vector{$typ},maxfrt::Vector{Cint},rhs::Vector{$typ},
             iw1::Vector{Cint},nsteps::Vector{Cint},icntl::Vector{Cint},
             info::Vector{Cint}
-        ) = ccall(
-            ($(string(fc)),libma27),
-            Nothing,
-            (Ref{Cint},Ptr{$typ},Ref{Cint},Ptr{Cint},
-             Ref{Cint},Ptr{$typ},Ptr{Cint},Ptr{$typ},
-             Ptr{Cint},Ptr{Cint},Ptr{Cint},Ptr{Cint}),
-            n,a,la,iw,liw,w,maxfrt,rhs,iw1,nsteps,icntl,info
-        )
+        ) = HSL.$fc(n,a,la,iw,liw,w,maxfrt,rhs,iw1,nsteps,icntl,info)
     end
 end
 
@@ -156,8 +133,11 @@ end
 
 is_inertia(::Ma27Solver) = true
 function inertia(M::Ma27Solver)
-    rank = M.info[1]==3 ? M.info[2] : rank = M.csc.n
-    return (rank-M.info[15],M.csc.n-rank,M.info[15])
+    dim = M.csc.n
+    rank = (Int(M.info[1])==3) ? Int(M.info[2]) : dim
+    neg = Int(M.info[15])
+
+    return (rank-neg,dim-rank,neg) 
 end
 
 function improve!(M::Ma27Solver)
@@ -170,7 +150,7 @@ function improve!(M::Ma27Solver)
     return true
 end
 
-introduce(::Ma27Solver)="ma27"
+introduce(::Ma27Solver)="ma27 v$(HSL.MA27_version())"
 input_type(::Type{Ma27Solver}) = :csc
 default_options(::Type{Ma27Solver}) = Ma27Options()
 is_supported(::Type{Ma27Solver},::Type{Float32}) = true
diff --git a/lib/MadNLPHSL/src/ma57.jl b/lib/MadNLPHSL/src/ma57.jl
index b6278087..5496c1da 100644
--- a/lib/MadNLPHSL/src/ma57.jl
+++ b/lib/MadNLPHSL/src/ma57.jl
@@ -43,44 +43,27 @@ end
 
 
 for (fa,fb,fc,typ) in (
-    (:ma57ad_, :ma57bd_, :ma57cd_, Float64),
-    (:ma57a_, :ma57b_, :ma57c_, Float32)
+    (:ma57ad, :ma57bd, :ma57cd, Float64),
+    (:ma57a , :ma57b , :ma57c , Float32)
     )
     @eval begin
 
         ma57ad!(n::Cint,nz::Cint,I::Vector{Cint},J::Vector{Cint},lkeep::Cint,
                 keep::Vector{Cint},iwork::Vector{Cint},icntl::Vector{Cint},
-                info::Vector{Cint},rinfo::Vector{$typ}) = ccall(
-                    ($(string(fa)),libma57),
-                    Nothing,
-                    (Ref{Cint},Ref{Cint},Ptr{Cint},Ptr{Cint},Ref{Cint},
-                     Ptr{Cint},Ptr{Cint},Ptr{Cint},
-                     Ptr{Cint},Ptr{$typ}),
-                    n,nz,I,J,lkeep,keep,iwork,icntl,info,rinfo)
+                info::Vector{Cint},rinfo::Vector{$typ}
+        ) = HSL.$fa(n,nz,I,J,lkeep,keep,iwork,icntl,info,rinfo)
 
         ma57bd!(n::Cint,nz::Cint,V::Vector{$typ},fact::Vector{$typ},
                 lfact::Cint,ifact::Vector{Cint},lifact::Cint,lkeep::Cint,
                 keep::Vector{Cint},iwork::Vector{Cint},icntl::Vector{Cint},cntl::Vector{$typ},
-                info::Vector{Cint},rinfo::Vector{$typ}) = ccall(
-                    ($(string(fb)),libma57),
-                    Nothing,
-                    (Ref{Cint},Ref{Cint},Ptr{$typ},Ptr{$typ},
-                     Ref{Cint},Ptr{Cint},Ref{Cint},Ref{Cint},
-                     Ptr{Cint},Ptr{Cint},Ptr{Cint},Ptr{$typ},
-                     Ptr{Cint},Ptr{$typ}),
-                    n,nz,V,fact,lfact,ifact,lifact,lkeep,keep,iwork,icntl,cntl,info,rinfo)
+                info::Vector{Cint},rinfo::Vector{$typ}
+        ) = HSL.$fb(n,nz,V,fact,lfact,ifact,lifact,lkeep,keep,iwork,icntl,cntl,info,rinfo)
 
         ma57cd!(job::Cint,n::Cint,fact::Vector{$typ},lfact::Cint,
                 ifact::Vector{Cint},lifact::Cint,nrhs::Cint,rhs::Vector{$typ},
                 lrhs::Cint,work::Vector{$typ},lwork::Cint,iwork::Vector{Cint},
-                icntl::Vector{Cint},info::Vector{Cint}) = ccall(
-                    ($(string(fc)),libma57),
-                    Nothing,
-                    (Ref{Cint},Ref{Cint},Ptr{$typ},Ref{Cint},
-                     Ptr{Cint},Ref{Cint},Ref{Cint},Ptr{$typ},
-                     Ref{Cint},Ptr{$typ},Ref{Cint},Ptr{Cint},
-                     Ptr{Cint},Ptr{Cint}),
-                    job,n,fact,lfact,ifact,lifact,nrhs,rhs,lrhs,work,lwork,iwork,icntl,info)
+                icntl::Vector{Cint},info::Vector{Cint}
+        ) = HSL.$fc(job,n,fact,lfact,ifact,lifact,nrhs,rhs,lrhs,work,lwork,iwork,icntl,info)
     end
 end
 
@@ -169,7 +152,7 @@ function improve!(M::Ma57Solver)
     return true
 end
 
-introduce(::Ma57Solver)="ma57"
+introduce(::Ma57Solver)="ma57 v$(HSL.MA57_version())"
 input_type(::Type{Ma57Solver}) = :csc
 default_options(::Type{Ma57Solver}) = Ma57Options()
 is_supported(::Type{Ma57Solver},::Type{Float32}) = true
diff --git a/lib/MadNLPHSL/src/ma77.jl b/lib/MadNLPHSL/src/ma77.jl
index e383fc76..6cdf2a2d 100644
--- a/lib/MadNLPHSL/src/ma77.jl
+++ b/lib/MadNLPHSL/src/ma77.jl
@@ -12,144 +12,16 @@
     ma77_umax::Float64 = 1e-4
 end
 
-@kwdef mutable struct Ma77Control{T}
-    f_arrays::Cint = 0
-    print_level::Cint = 0
-    unit_diagnostics::Cint = 0
-    unit_error::Cint = 0
-    unit_warning::Cint = 0
-    bits::Cint = 0
-
-    buffer_lpage_1::Cint = 0
-    buffer_lpage_2::Cint = 0
-    buffer_npage_1::Cint = 0
-    buffer_npage_2::Cint = 0
-
-    file_size::Clong = 0
-    maxstore::Clong = 0
-
-    storage_1::Clong = 0
-    storage_2::Clong = 0
-    storage_3::Clong = 0
-
-    nemin::Cint = 0
-    maxit::Cint = 0
-    infnorm::Cint = 0
-    thresh::T = 0.
-    nb54::Cint = 0
-    action::Cint = 0
-    multiplier::T = 0.
-    nb64::Cint = 0
-    nbi::Cint = 0
-    small::T = 0.
-    static_::T = 0.
-    storage_indef::Clong = 0
-    u::T = 0.
-    umin::T = 0.
-    consist_tol::T = 0.
-
-    ispare_1::Cint = 0
-    ispare_2::Cint = 0
-    ispare_3::Cint = 0
-    ispare_4::Cint = 0
-    ispare_5::Cint = 0
-
-    lspare_1::Clong = 0
-    lspare_2::Clong = 0
-    lspare_3::Clong = 0
-    lspare_4::Clong = 0
-    lspare_5::Clong = 0
-
-    rspare_1::T = 0.
-    rspare_2::T = 0.
-    rspare_3::T = 0.
-    rspare_4::T = 0.
-    rspare_5::T = 0.
-end
-
-@kwdef mutable struct Ma77Info{T}
-    detlog::T = 0.
-    detsign::Cint = 0
-    flag::Cint = 0
-    iostat::Cint = 0
-    matrix_dup::Cint = 0
-    matrix_rank::Cint = 0
-    matrix_outrange::Cint = 0
-    maxdepth::Cint = 0
-    maxfront::Cint = 0
-    minstore::Clong = 0
-    ndelay::Cint = 0
-    nfactor::Clong = 0
-    nflops::Clong = 0
-    niter::Cint = 0
-    nsup::Cint = 0
-    num_neg::Cint = 0
-    num_nothresh::Cint = 0
-    num_perturbed::Cint = 0
-    ntwo::Cint = 0
-
-    stat_1::Cint = 0
-    stat_2::Cint = 0
-    stat_3::Cint = 0
-    stat_4::Cint = 0
-
-    nio_read_1::Clong = 0 # 2
-    nio_read_2::Clong = 0 # 2
-
-    nio_write_1::Clong = 0 # 2
-    nio_write_2::Clong = 0 # 2
-
-    nwd_read_1::Clong = 0 # 2
-    nwd_read_2::Clong = 0 # 2
-
-    nwd_write_1::Clong = 0 # 2
-    nwd_write_2::Clong = 0 # 2
-
-    num_file_1::Cint = 0 # 4
-    num_file_2::Cint = 0 # 4
-    num_file_3::Cint = 0 # 4
-    num_file_4::Cint = 0 # 4
-
-    storage_1::Clong = 0 # 4
-    storage_2::Clong = 0 # 4
-    storage_3::Clong = 0 # 4
-    storage_4::Clong = 0 # 4
-
-    tree_nodes::Cint = 0
-    unit_restart::Cint = 0
-    unused::Cint = 0
-    usmall::T = 0.
-
-
-    ispare_1::Cint = 0
-    ispare_2::Cint = 0
-    ispare_3::Cint = 0
-    ispare_4::Cint = 0
-    ispare_5::Cint = 0
-
-    lspare_1::Clong = 0
-    lspare_2::Clong = 0
-    lspare_3::Clong = 0
-    lspare_4::Clong = 0
-    lspare_5::Clong = 0
-
-    rspare_1::T = 0.
-    rspare_2::T = 0.
-    rspare_3::T = 0.
-    rspare_4::T = 0.
-    rspare_5::T = 0.
-end
-
 mutable struct Ma77Solver{T} <: AbstractLinearSolver{T}
     tril::SparseMatrixCSC{T,Int32}
     full::SparseMatrixCSC{T,Int32}
     tril_to_full_view::SubVector{T}
 
-    control::Ma77Control{T}
-    info::Ma77Info{T}
+    control::ma77_control{T}
+    info::ma77_info{T}
 
-    mc68_control::Mc68Control
-    mc68_info::Mc68Info
+    mc68_control::mc68_control
+    mc68_info::mc68_info
 
     order::Vector{Int32}
     keep::Vector{Ptr{Nothing}}
@@ -167,79 +39,43 @@ for (fdefault, fanalyse, ffactor, fsolve, ffinalise, fopen, finputv, finputr, ty
      :ma77_open_s, :ma77_input_vars_s, :ma77_input_reals_s, Float32),
     ]
     @eval begin
-        ma77_default_control(control::Ma77Control{$typ}) = ccall(
-            ($(string(fdefault)),libma77),
-            Cvoid,
-            (Ref{Ma77Control{$typ}},),
-            control
-        )
+        ma77_default_control(control::ma77_control{$typ}
+        ) = HSL.$fdefault(control)
+
         ma77_open(
             n::Cint,fname1::String,fname2::String,fname3::String,fname4::String,
-            keep::Vector{Ptr{Cvoid}},control::Ma77Control{$typ},info::Ma77Info{$typ}
-        ) = ccall(
-            ($(string(fopen)),libma77),
-            Cvoid,
-            (Cint,Ptr{Cchar},Ptr{Cchar},Ptr{Cchar},Ptr{Cchar},
-             Ptr{Ptr{Cvoid}},Ref{Ma77Control{$typ}},Ref{Ma77Info{$typ}}),
-            n,fname1,fname2,fname3,fname4,keep,control,info
-        )
+            keep::Vector{Ptr{Cvoid}},control::ma77_control{$typ},info::ma77_info{$typ}
+        ) = HSL.$fopen(n,fname1,fname2,fname3,fname4,keep,control,info)
+
         ma77_input_vars(
             idx::Cint,nvar::Cint,list::Vector{Cint},
-            keep::Vector{Ptr{Cvoid}},control::Ma77Control{$typ},info::Ma77Info{$typ}
-        ) = ccall(
-            ($(string(finputv)),libma77),
-            Cvoid,
-            (Cint,Cint,Ptr{Cint},
-             Ptr{Ptr{Cvoid}},Ref{Ma77Control{$typ}},Ref{Ma77Info{$typ}}
-             ),
-                              idx,nvar,list,keep,control,info)
+            keep::Vector{Ptr{Cvoid}},control::ma77_control{$typ},info::ma77_info{$typ}
+        ) = HSL.$finputv(idx,nvar,list,keep,control,info)
+
         ma77_input_reals(
             idx::Cint,length::Cint,reals::Vector{$typ},
-            keep::Vector{Ptr{Cvoid}},control::Ma77Control{$typ},info::Ma77Info{$typ}
-        ) = ccall(
-            ($(string(finputr)),libma77),
-            Cvoid,
-            (Cint,Cint,Ptr{$typ},
-             Ptr{Ptr{Cvoid}},Ref{Ma77Control{$typ}},Ref{Ma77Info{$typ}}),
-            idx,length,reals,keep,control,info
-        )
+            keep::Vector{Ptr{Cvoid}},control::ma77_control{$typ},info::ma77_info{$typ}
+        ) = HSL.$finputr(idx,length,reals,keep,control,info)
+
         ma77_analyse(
             order::Vector{Cint},
-            keep::Vector{Ptr{Cvoid}},control::Ma77Control{$typ},info::Ma77Info{$typ}
-        ) = ccall(
-            ($(string(fanalyse)),libma77),
-            Cvoid,
-            (Ptr{Cint},Ptr{Ptr{Cvoid}},Ref{Ma77Control{$typ}},Ref{Ma77Info{$typ}}),
-            order,keep,control,info
-        )
+            keep::Vector{Ptr{Cvoid}},control::ma77_control{$typ},info::ma77_info{$typ}
+        ) = HSL.$fanalyse(order,keep,control,info)
+
         ma77_factor(
-            posdef::Cint,keep::Vector{Ptr{Cvoid}},control::Ma77Control{$typ},info::Ma77Info{$typ},
+            posdef::Cint,keep::Vector{Ptr{Cvoid}},control::ma77_control{$typ},info::ma77_info{$typ},
             scale::Ptr{Nothing}
-        ) = ccall(
-            ($(string(ffactor)),libma77),
-            Cvoid,
-            (Cint,Ptr{Ptr{Cvoid}},Ref{Ma77Control{$typ}},Ref{Ma77Info{$typ}},Ptr{Nothing}),
-            posdef,keep,control,info,scale
-        )
+        ) = HSL.$ffactor(posdef,keep,control,info,scale)
+
         ma77_solve(
             job::Cint,nrhs::Cint,lx::Cint,x::Vector{$typ},
-            keep::Vector{Ptr{Cvoid}},control::Ma77Control{$typ},info::Ma77Info{$typ},
+            keep::Vector{Ptr{Cvoid}},control::ma77_control{$typ},info::ma77_info{$typ},
             scale::Ptr{Nothing}
-        ) = ccall(
-            ($(string(fsolve)),libma77),
-            Cvoid,
-            (Cint,Cint,Cint,Ptr{$typ},
-             Ptr{Ptr{Cvoid}},Ref{Ma77Control{$typ}},Ref{Ma77Info{$typ}},Ptr{Nothing}),
-            job,nrhs,lx,x,keep,control,info,scale
-        );
+        ) = HSL.$fsolve(job,nrhs,lx,x,keep,control,info,scale)
+
         ma77_finalize(
-            keep::Vector{Ptr{Cvoid}},control::Ma77Control{$typ},info::Ma77Info{$typ}
-        ) = ccall(
-            ($(string(ffinalise)),libma77),
-            Cvoid,
-            (Ptr{Ptr{Cvoid}},Ref{Ma77Control{$typ}},Ref{Ma77Info{$typ}}),
-            keep,control,info
-        )
+            keep::Vector{Ptr{Cvoid}},control::ma77_control{$typ},info::ma77_info{$typ}
+        ) = HSL.$ffinalise(keep,control,info)
     end
 end
 
@@ -250,17 +86,18 @@ function Ma77Solver(
     full,tril_to_full_view = get_tril_to_full(csc)
     order = Vector{Int32}(undef,csc.n)
 
-    mc68_info = Mc68Info()
-    mc68_control = get_mc68_default_control()
+    mc68info = mc68_info()
+    mc68control = mc68_control()
+    HSL.mc68_default_control_i(mc68control)
 
     keep = [C_NULL]
 
-    mc68_control.f_array_in=1
-    mc68_control.f_array_out=1
-    mc68_order_i(Int32(opt.ma77_order),Int32(csc.n),csc.colptr,csc.rowval,order,mc68_control,mc68_info)
+    mc68control.f_array_in=1
+    mc68control.f_array_out=1
+    HSL.mc68_order_i(Int32(opt.ma77_order),Int32(csc.n),csc.colptr,csc.rowval,order,mc68control,mc68info)
 
-    info=Ma77Info{T}()
-    control=Ma77Control{T}()
+    info=ma77_info{T}()
+    control=ma77_control{T}()
     ma77_default_control(control)
     control.f_arrays = 1
     control.bits = 32
@@ -268,23 +105,19 @@ function Ma77Solver(
     control.maxstore = opt.ma77_maxstore
     control.print_level = -1
 
-    control.buffer_lpage_1=opt.ma77_buffer_lpage
-    control.buffer_lpage_2=opt.ma77_buffer_lpage
-    control.buffer_npage_1=opt.ma77_buffer_npage
-    control.buffer_npage_2=opt.ma77_buffer_npage
+    control.buffer_lpage = (opt.ma77_buffer_lpage, opt.ma77_buffer_lpage)
+    control.buffer_npage = (opt.ma77_buffer_npage, opt.ma77_buffer_npage)
 
     control.nemin = opt.ma77_nemin
     control.small = opt.ma77_small
     control.static_ = opt.ma77_static
     control.u = opt.ma77_u
 
-    isfile(".ma77_int")   && rm(".ma77_int")
-    isfile(".ma77_real")  && rm(".ma77_real")
-    isfile(".ma77_work")  && rm(".ma77_work")
-    isfile(".ma77_delay") && rm(".ma77_delay")
-
-    ma77_open(Int32(full.n),".ma77_int", ".ma77_real", ".ma77_work", ".ma77_delay",
-                keep,control,info)
+    ma77_open(
+        Int32(full.n),
+        tempname(cleanup=false), tempname(cleanup=false), tempname(cleanup=false), tempname(cleanup=false),
+        keep,control,info
+    )
 
     info.flag < 0 && throw(SymbolicException())
 
@@ -304,7 +137,7 @@ function Ma77Solver(
     info.flag<0 && throw(SymbolicException())
 
     M = Ma77Solver{T}(csc,full,tril_to_full_view,
-                   control,info,mc68_control,mc68_info,order,keep,opt,logger)
+                   control,info,mc68control,mc68info,order,keep,opt,logger)
     finalizer(finalize,M)
     return M
 end
@@ -338,7 +171,10 @@ function inertia(M::Ma77Solver)
     return (M.info.matrix_rank-M.info.num_neg,M.full.n-M.info.matrix_rank,M.info.num_neg)
 end
 
-finalize(M::Ma77Solver{T}) where T = ma77_finalize(M.keep,M.control,M.info)
+function finalize(M::Ma77Solver{T}) where T
+    
+    ma77_finalize(M.keep,M.control,M.info)
+end
 
 function improve!(M::Ma77Solver)
     if M.control.u == M.opt.ma77_umax
@@ -350,7 +186,7 @@ function improve!(M::Ma77Solver)
     return true
 end
 
-introduce(::Ma77Solver)="ma77"
+introduce(::Ma77Solver)="ma77 v$(HSL.HSL_MA77_version())"
 input_type(::Type{Ma77Solver}) = :csc
 default_options(::Type{Ma77Solver}) = Ma77Options()
 is_supported(::Type{Ma77Solver},::Type{Float32}) = true
diff --git a/lib/MadNLPHSL/src/ma86.jl b/lib/MadNLPHSL/src/ma86.jl
index 27057b9d..46663bdb 100644
--- a/lib/MadNLPHSL/src/ma86.jl
+++ b/lib/MadNLPHSL/src/ma86.jl
@@ -10,51 +10,14 @@
     ma86_umax::Float64 = 1e-4
 end
 
-@kwdef mutable struct Ma86Control{T}
-    f_arrays::Int32 = 0
-    diagnostics_level::Int32 = 0
-    unit_diagnostics::Int32 = 0
-    unit_error::Int32 = 0
-    unit_warning::Int32 = 0
-    nemin::Int32 = 0
-    nb::Int32 = 0
-    action::Int32 = 0
-    nbi::Int32 = 0
-    pool_size::Int32 = 0
-    small::T = 0.
-    static::T = 0.
-    u::T = 0.
-    umin::T = 0.
-    scaling::Int32 = 0
-end
-
-@kwdef mutable struct Ma86Info{T}
-    detlog::T = 0.
-    detsign::Int32 = 0
-    flag::Int32 = 0
-    matrix_rank::Int32 = 0
-    maxdepth::Int32 = 0
-    num_delay::Int32 = 0
-    num_factor::Clong = 0
-    num_flops::Clong = 0
-    num_neg::Int32 = 0
-    num_nodes::Int32 = 0
-    num_nothresh::Int32 = 0
-    num_perturbed::Int32 = 0
-    num_two::Int32 = 0
-    pool_size::Int32 = 0
-    stat::Int32 = 0
-    usmall::T = 0.
-end
-
 mutable struct Ma86Solver{T} <: AbstractLinearSolver{T}
     csc::SparseMatrixCSC{T,Int32}
 
-    control::Ma86Control{T}
-    info::Ma86Info{T}
+    control::ma86_control{T}
+    info::ma86_info{T}
 
-    mc68_control::Mc68Control
-    mc68_info::Mc68Info
+    mc68_control::mc68_control
+    mc68_info::mc68_info
 
     order::Vector{Int32}
     keep::Vector{Ptr{Nothing}}
@@ -72,93 +35,68 @@ for (fdefault, fanalyse, ffactor, fsolve, ffinalise, typ) in [
      ]
     @eval begin
         ma86_default_control(
-            control::Ma86Control{$typ}
-        ) = ccall(
-            ($(string(fdefault)),libma86),
-            Nothing,
-            (Ref{Ma86Control{$typ}},),
-            control
-        )
+            control::ma86_control{$typ}
+        ) = HSL.$fdefault(control)
+
         ma86_analyse(
             n::Cint,colptr::Vector{Cint},rowval::Vector{Cint},
             order::Vector{Cint},keep::Vector{Ptr{Nothing}},
-            control::Ma86Control{$typ},info::Ma86Info{$typ}
-        ) = ccall(
-            ($(string(fanalyse)),libma86),
-            Nothing,
-            (Cint,Ptr{Cint},Ptr{Cint},Ptr{$typ},
-             Ptr{Ptr{Nothing}},Ref{Ma86Control{$typ}},Ref{Ma86Info{$typ}}),
-            n,colptr,rowval,order,keep,control,info
-        )
+            control::ma86_control{$typ},info::ma86_info{$typ}
+        ) = HSL.$fanalyse(n,colptr,rowval,order,keep,control,info)
+
         ma86_factor(
             n::Cint,colptr::Vector{Cint},rowval::Vector{Cint},
             nzval::Vector{$typ},order::Vector{Cint},
-            keep::Vector{Ptr{Nothing}},control::Ma86Control,info::Ma86Info,
+            keep::Vector{Ptr{Nothing}},control::ma86_control,info::ma86_info,
             scale::Ptr{Nothing}
-        ) = ccall(
-            ($(string(ffactor)),libma86),
-            Nothing,
-            (Cint,Ptr{Cint},Ptr{Cint},Ptr{$typ},Ptr{Cint},
-             Ptr{Ptr{Nothing}},Ref{Ma86Control},Ref{Ma86Info},Ptr{Nothing}),
-            n,colptr,rowval,nzval,order,keep,control,info,scale
-        )
+        ) = HSL.$ffactor(n,colptr,rowval,nzval,order,keep,control,info,scale)
+
         ma86_solve(
             job::Cint,nrhs::Cint,n::Cint,rhs::Vector{$typ},
             order::Vector{Cint},keep::Vector{Ptr{Nothing}},
-            control::Ma86Control,info::Ma86Info,scale::Ptr{Nothing}
-        ) = ccall(
-            ($(string(fsolve)),libma86),
-            Nothing,
-            (Cint,Cint,Cint,Ptr{$typ},Ptr{Cint},Ptr{Ptr{Nothing}},
-             Ref{Ma86Control},Ref{Ma86Info},Ptr{Nothing}),
-            job,nrhs,n,rhs,order,keep,control,info,scale
-        )
+            control::ma86_control,info::ma86_info,scale::Ptr{Nothing}
+        ) = HSL.$fsolve(job,nrhs,n,rhs,order,keep,control,info,scale)
+
         ma86_finalize(
-            keep::Vector{Ptr{Nothing}},control::Ma86Control{$typ}
-        ) = ccall(
-            ($(string(ffinalise)),libma86),
-            Nothing,
-            (Ptr{Ptr{Nothing}},Ref{Ma86Control{$typ}}),
-            keep,control
-        )
+            keep::Vector{Ptr{Nothing}},control::ma86_control{$typ}
+        ) = HSL.$ffinalise(keep,control)
     end
 end
-ma86_set_num_threads(n) = ccall((:omp_set_num_threads_,libma86),
-                                Cvoid,
-                                (Ref{Int32},),
-                                Int32(n))
+ma86_set_num_threads(n) = HSL.omp_set_num_threads(n)
 
 function Ma86Solver(
     csc::SparseMatrixCSC{T,Int32};
     opt=Ma86Options(),logger=MadNLPLogger(),
 ) where T
+
     ma86_set_num_threads(opt.ma86_num_threads)
 
     order = Vector{Int32}(undef,csc.n)
 
-    info=Ma86Info{T}()
-    control=Ma86Control{T}()
-    mc68_info = Mc68Info()
-    mc68_control = get_mc68_default_control()
+    info = ma86_info{T}()
+    control = ma86_control{T}()
+    mc68info = mc68_info()
+    mc68control = mc68_control()
+    HSL.mc68_default_control_i(mc68control)
 
     keep = [C_NULL]
 
-    mc68_control.f_array_in=1
-    mc68_control.f_array_out=1
-    mc68_order_i(Int32(opt.ma86_order),Int32(csc.n),csc.colptr,csc.rowval,order,mc68_control,mc68_info)
+    mc68control.f_array_in=1
+    mc68control.f_array_out=1
+    HSL.mc68_order_i(Int32(opt.ma86_order),Int32(csc.n),csc.colptr,csc.rowval,order,mc68control,mc68info)
 
     ma86_default_control(control)
     control.diagnostics_level = Int32(opt.ma86_print_level)
     control.f_arrays = 1
     control.nemin = opt.ma86_nemin
-    control.small = opt.ma86_small
+    control.small_ = opt.ma86_small
     control.u = opt.ma86_u
     control.scaling = Int32(opt.ma86_scaling)
 
     ma86_analyse(Int32(csc.n),csc.colptr,csc.rowval,order,keep,control,info)
     info.flag<0 && throw(SymbolicException())
 
-    M = Ma86Solver{T}(csc,control,info,mc68_control,mc68_info,order,keep,opt,logger)
+    M = Ma86Solver{T}(csc,control,info,mc68control,mc68info,order,keep,opt,logger)
     finalizer(finalize,M)
 
     return M
@@ -189,7 +127,7 @@ function improve!(M::Ma86Solver)
     @debug(M.logger,"improved quality: pivtol = $(M.control.u)")
     return true
 end
-introduce(::Ma86Solver)="ma86"
+introduce(::Ma86Solver)="ma86 v$(HSL.HSL_MA86_version())"
 input_type(::Type{Ma86Solver}) = :csc
 default_options(::Type{Ma86Solver}) = Ma86Options()
 is_supported(::Type{Ma86Solver},::Type{Float32}) = true
diff --git a/lib/MadNLPHSL/src/ma97.jl b/lib/MadNLPHSL/src/ma97.jl
index f261d898..f45a0041 100644
--- a/lib/MadNLPHSL/src/ma97.jl
+++ b/lib/MadNLPHSL/src/ma97.jl
@@ -9,57 +9,13 @@
     ma97_umax::Float64 = 1e-4
 end
 
-@kwdef mutable struct Ma97Control{T}
-    f_arrays::Cint = 0
-    action::Cint = 0
-    nemin::Cint = 0
-    multiplier::T = 0.
-    ordering::Cint = 0
-    print_level::Cint = 0
-    scaling::Cint = 0
-    small::T = 0
-    u::T = 0
-    unit_diagnostics::Cint = 0
-    unit_error::Cint = 0
-    unit_warning::Cint = 0
-    factor_min::Clong = 0
-    solve_blas3::Cint = 0
-    solve_min::Clong = 0
-    solve_mf::Cint = 0
-    consist_tol::T = 0
-    ispare::Vector{Cint}
-    rspare::Vector{T}
-end
-
-@kwdef mutable struct Ma97Info{T}
-    flag::Cint = 0
-    flag68::Cint = 0
-    flag77::Cint = 0
-    matrix_dup::Cint = 0
-    matrix_rank::Cint = 0
-    matrix_outrange::Cint = 0
-    matrix_missing_diag::Cint = 0
-    maxdepth::Cint = 0
-    maxfront::Cint = 0
-    num_delay::Cint = 0
-    num_factor::Clong = 0
-    num_flops::Clong = 0
-    num_neg::Cint = 0
-    num_sup::Cint = 0
-    num_two::Cint = 0
-    ordering::Cint = 0
-    stat::Cint = 0
-    ispare::Vector{Cint}
-    rspare::Vector{T}
-end
-
 mutable struct Ma97Solver{T} <:AbstractLinearSolver{T}
     n::Int32
 
     csc::SparseMatrixCSC{T,Int32}
 
-    control::Ma97Control{T}
-    info::Ma97Info{T}
+    control::ma97_control{T}
+    info::ma97_info{T}
 
     akeep::Vector{Ptr{Nothing}}
     fkeep::Vector{Ptr{Nothing}}
@@ -76,61 +32,34 @@ for (fdefault, fanalyse, ffactor, fsolve, ffinalise, typ) in [
      ]
     @eval begin
         ma97_default_control(
-            control::Ma97Control{$typ}
-        ) = ccall(
-            ($(string(fdefault)), libma97),
-            Nothing,
-            (Ref{Ma97Control{$typ}},),
-            control
-        )
+            control::ma97_control{$typ}
+        ) = HSL.$fdefault(control)
+
         ma97_analyse(
             check::Cint,n::Cint,ptr::Vector{Cint},row::Vector{Cint},
             val::Ptr{Nothing},akeep::Vector{Ptr{Nothing}},
-            control::Ma97Control{$typ},info::Ma97Info{$typ},
+            control::ma97_control{$typ},info::ma97_info{$typ},
             order::Ptr{Nothing}
-        ) = ccall(
-            ($(string(fanalyse)),libma97),
-            Nothing,
-            (Cint,Cint,Ptr{Cint},Ptr{Cint},Ptr{$typ},
-             Ptr{Ptr{Nothing}},Ref{Ma97Control{$typ}},Ref{Ma97Info{$typ}},Ptr{Cint}),
-            check,n,ptr,row,val,akeep,control,info,order
-        )
+        ) = HSL.$fanalyse(check,n,ptr,row,val,akeep,control,info,order)
+
         ma97_factor(
             matrix_type::Cint,ptr::Ptr{Nothing},row::Ptr{Nothing},
             val::Vector{$typ},akeep::Vector{Ptr{Nothing}},fkeep::Vector{Ptr{Nothing}},
-            control::Ma97Control,info::Ma97Info,scale::Ptr{Nothing}
-        ) = ccall(
-            ($(string(ffactor)),libma97),
-            Nothing,
-            (Cint,Ptr{Cint},Ptr{Cint},Ptr{$typ},Ptr{Ptr{Nothing}},
-             Ptr{Ptr{Nothing}},Ref{Ma97Control},Ref{Ma97Info},Ptr{$typ}),
-            matrix_type,ptr,row,val,akeep,fkeep,control,info,scale
-        )
+            control::ma97_control,info::ma97_info,scale::Ptr{Nothing}
+        ) = HSL.$ffactor(matrix_type,ptr,row,val,akeep,fkeep,control,info,scale)
+
         ma97_solve(
             job::Cint,nrhs::Cint,x::Vector{$typ},ldx::Cint,
             akeep::Vector{Ptr{Nothing}},fkeep::Vector{Ptr{Nothing}},
-            control::Ma97Control,info::Ma97Info
-        ) = ccall(
-            ($(string(fsolve)),libma97),
-            Nothing,
-            (Cint,Cint,Ptr{$typ},Cint,Ptr{Ptr{Nothing}},
-             Ptr{Ptr{Nothing}},Ref{Ma97Control},Ref{Ma97Info}),
-            job,nrhs,x,ldx,akeep,fkeep,control,info
-        )
+            control::ma97_control,info::ma97_info
+        ) = HSL.$fsolve(job,nrhs,x,ldx,akeep,fkeep,control,info)
+
         ma97_finalize(
             ::Type{$typ},akeep::Vector{Ptr{Nothing}},fkeep::Vector{Ptr{Nothing}}
-        )=ccall(
-            ($(string(ffinalise)),libma97),
-            Nothing,
-            (Ptr{Ptr{Nothing}},Ptr{Ptr{Nothing}}),
-            akeep,fkeep
-        )
+        ) = HSL.$ffinalise(akeep,fkeep)
     end
 end
-ma97_set_num_threads(n) = ccall((:omp_set_num_threads_,libma97),
-                                Cvoid,
-                                (Ref{Cint},),
-                                Cint(n))
+ma97_set_num_threads(n) = HSL.omp_set_num_threads(n)
 
 
 function Ma97Solver(
@@ -142,8 +71,8 @@ function Ma97Solver(
 
     n = Int32(csc.n)
 
-    info = Ma97Info{T}(ispare=zeros(Int32,5),rspare=zeros(T,10))
-    control=Ma97Control{T}(ispare=zeros(Int32,5),rspare=zeros(T,10))
+    info = ma97_info{T}()
+    control=ma97_control{T}()
     ma97_default_control(control)
 
     control.print_level = opt.ma97_print_level
@@ -188,7 +117,7 @@ function improve!(M::Ma97Solver)
     @debug(M.logger,"improved quality: pivtol = $(M.control.u)")
     return true
 end
-introduce(::Ma97Solver)="ma97"
+introduce(::Ma97Solver)="ma97 v$(HSL.HSL_MA97_version())"
 input_type(::Type{Ma97Solver}) = :csc
 default_options(::Type{Ma97Solver}) = Ma97Options()
 is_supported(::Type{Ma97Solver},::Type{Float32}) = true
diff --git a/lib/MadNLPHSL/src/mc68.jl b/lib/MadNLPHSL/src/mc68.jl
deleted file mode 100644
index a5d2b75e..00000000
--- a/lib/MadNLPHSL/src/mc68.jl
+++ /dev/null
@@ -1,47 +0,0 @@
-# MadNLP.jl
-# Created by Sungho Shin (sungho.shin@wisc.edu)
-
-@kwdef mutable struct Mc68Control
-    f_array_in::Cint = 0
-    f_array_out::Cint = 0
-    min_l_workspace::Clong = 0
-    lp::Cint = 0
-    wp::Cint = 0
-    mp::Cint = 0
-    nemin::Cint = 0
-    print_level::Cint = 0
-    row_full_thresh::Cint = 0
-    row_search::Cint = 0
-end
-
-@kwdef mutable struct Mc68Info
-    flag::Cint = 0
-    iostat::Cint = 0
-    stat::Cint = 0
-    out_range::Cint = 0
-    duplicate::Cint = 0
-    n_compressions::Cint = 0
-    n_zero_eigs::Cint = 0
-    l_workspace::Clong = 0
-    zb01_info::Cint = 0
-    n_dense_rows::Cint = 0
-end
-
-function get_mc68_default_control()
-    control = Mc68Control(0,0,0,0,0,0,0,0,0,0)
-    mc68_default_control_i(control)
-    return control
-end
-
-mc68_default_control_i(control::Mc68Control) = ccall((:mc68_default_control_i,libhsl),
-                                                 Nothing,
-                                                 (Ref{Mc68Control},),
-                                                 control)
-
-mc68_order_i(ord::Int32,n::Int32,ptr::Array{Int32,1},row::Array{Int32,1},
-             perm::Array{Int32,1},control::Mc68Control,info::Mc68Info) = ccall(
-                 (:mc68_order_i,libhsl),
-                 Nothing,
-                 (Cint,Cint,Ptr{Cint},Ptr{Cint},Ptr{Cint},Ref{Mc68Control},Ref{Mc68Info}),
-                 ord,n,ptr,row,perm,control,info)
-
diff --git a/lib/MadNLPHSL/test/runtests.jl b/lib/MadNLPHSL/test/runtests.jl
index b7776e4d..23d1c116 100644
--- a/lib/MadNLPHSL/test/runtests.jl
+++ b/lib/MadNLPHSL/test/runtests.jl
@@ -1,4 +1,4 @@
-using Test, MadNLP, MadNLPHSL, MadNLPTests
+using Test, MadNLP, MadNLPHSL, MadNLPTests, HSL
 
 testset = [
     [
@@ -39,13 +39,13 @@ testset = [
 ]
 
 @testset "MadNLPHSL test" begin
-    for hsl_solver in [Ma27Solver, Ma57Solver, Ma77Solver, Ma86Solver, Ma97Solver]
-        # MadNLPTests.test_linear_solver(hsl_solver,Float32)
-        MadNLPTests.test_linear_solver(hsl_solver,Float64)
-    end
-    for (name,optimizer_constructor,exclude) in testset
-        test_madnlp(name,optimizer_constructor,exclude)
+    if LIBHSL_isfunctional()
+        for hsl_solver in [Ma27Solver, Ma57Solver, Ma77Solver, Ma86Solver, Ma97Solver]
+            # MadNLPTests.test_linear_solver(hsl_solver,Float32)
+            MadNLPTests.test_linear_solver(hsl_solver,Float64)
+        end
+        for (name,optimizer_constructor,exclude) in testset
+            test_madnlp(name,optimizer_constructor,exclude)
+        end
     end
 end
-
-
diff --git a/lib/MadNLPKrylov/Project.toml b/lib/MadNLPKrylov/Project.toml
index 6cd9c30e..4b683a80 100644
--- a/lib/MadNLPKrylov/Project.toml
+++ b/lib/MadNLPKrylov/Project.toml
@@ -1,7 +1,6 @@
 name = "MadNLPKrylov"
 uuid = "1888cb03-ce40-4a36-8d45-ae8231a0e17c"
-authors = ["Sungho Shin <sungho.shin.ss@gmail.com>"]
-version = "0.3.1"
+version = "0.3.3"
 
 [deps]
 IterativeSolvers = "42fd0dbc-a981-5370-80f2-aaf504508153"
@@ -9,9 +8,9 @@ MadNLP = "2621e9c9-9eb4-46b1-8089-e8c72242dfb6"
 
 [compat]
 IterativeSolvers = "~0.9"
-MadNLP = "0.5, 0.6"
+MadNLP = "0.5, 0.6, 0.7, 0.8"
 julia = "1.6"
-MadNLPTests = "~0.3"
+MadNLPTests = "0.5"
 
 [extras]
 MadNLPTests = "b52a2a03-04ab-4a5f-9698-6a2deff93217"
diff --git a/lib/MadNLPMumps/Project.toml b/lib/MadNLPMumps/Project.toml
index bf832597..053b049b 100644
--- a/lib/MadNLPMumps/Project.toml
+++ b/lib/MadNLPMumps/Project.toml
@@ -1,18 +1,18 @@
 name = "MadNLPMumps"
 uuid = "3b83494e-c0a4-4895-918b-9157a7a085a1"
-authors = ["Sungho Shin <sungho.shin.ss@gmail.com>"]
-version = "0.3.1"
+version = "0.4.1"
 
 [deps]
 MUMPS_seq_jll = "d7ed1dd3-d0ae-5e8e-bfb4-87a502085b8d"
+OpenBLAS32_jll = "656ef2d0-ae68-5445-9ca0-591084a874a2"
 MadNLP = "2621e9c9-9eb4-46b1-8089-e8c72242dfb6"
-StaticArrays = "90137ffa-7385-5640-81b9-e52037218182"
+LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
 
 [compat]
-MUMPS_seq_jll = "~5.2.1, ~5.3.5"
-MadNLP = "0.5, 0.6"
-MadNLPTests = "~0.3"
-StaticArrays = "1"
+MUMPS_seq_jll = "~5.3, ~500.600"
+OpenBLAS32_jll = "0.3"
+MadNLP = "0.5, 0.6, 0.7, 0.8"
+MadNLPTests = "0.5"
 julia = "1.6"
 
 [extras]
diff --git a/lib/MadNLPMumps/src/MadNLPMumps.jl b/lib/MadNLPMumps/src/MadNLPMumps.jl
index 652ff55b..3719280a 100644
--- a/lib/MadNLPMumps/src/MadNLPMumps.jl
+++ b/lib/MadNLPMumps/src/MadNLPMumps.jl
@@ -1,17 +1,29 @@
 module MadNLPMumps
 
-import StaticArrays: SVector, setindex
 import MUMPS_seq_jll
 import MadNLP:
-    parsefile, dlopen,
+    MadNLP, parsefile, dlopen,
     @kwdef, MadNLPLogger, @debug, @warn, @error,
     SparseMatrixCSC, SubVector,
     SymbolicException,FactorizationException,SolveException,InertiaException,
     AbstractOptions, AbstractLinearSolver, set_options!, input_type, default_options,
     introduce, factorize!, solve!, improve!, is_inertia, is_supported, inertia, findIJ, nnz
+import LinearAlgebra, OpenBLAS32_jll
+
+function __init__()
+    if VERSION ≥ v"1.9"
+        config = LinearAlgebra.BLAS.lbt_get_config()
+        if !any(lib -> lib.interface == :lp64, config.loaded_libs)
+            LinearAlgebra.BLAS.lbt_forward(OpenBLAS32_jll.libopenblas_path)
+        end
+    end
+end
 
 const version = parsefile(joinpath(dirname(pathof(MUMPS_seq_jll)),"..","Project.toml"))["version"]
 
+setindex(tup,a,n) = (tup[1:n-1]...,a,tup[n+1:end]...)
+tzeros(n) = tuple((0 for i=1:n)...)
+
 @kwdef mutable struct MumpsOptions <: AbstractOptions
     mumps_dep_tol::Float64 = 0.
     mumps_mem_percent::Int = 1000
@@ -22,212 +34,109 @@ const version = parsefile(joinpath(dirname(pathof(MUMPS_seq_jll)),"..","Project.
     mumps_scaling::Int = 77
 end
 
-if version == "5.3.5+0"
-    @kwdef mutable struct Struc{T}
-        sym::Cint = 0
-        par::Cint = 0
-        job::Cint = 0
-
-        comm_fortran::Cint = 0
-
-        icntl::SVector{60,Cint} = zeros(60)
-        keep::SVector{500,Cint} = zeros(500)
-        cntl::SVector{15,T} = zeros(15)
-        dkeep::SVector{230,T} = zeros(230)
-        keep8::SVector{150,Int64} = zeros(150)
-        n::Cint = 0
-        nblk::Cint = 0
-
-        nz_alloc::Cint = 0
-
-        nz::Cint = 0
-        nnz::Int64 = 0
-        irn::Ptr{Cint} = C_NULL
-        jcn::Ptr{Cint} = C_NULL
-        a::Ptr{T} = C_NULL
-
-        nz_loc::Cint = 0
-        nnz_loc::Int64 = 0
-        irn_loc::Ptr{Cint} = C_NULL
-        jcn_loc::Ptr{Cint} = C_NULL
-        a_loc::Ptr{T} = C_NULL ###
-
-        nelt::Cint = 0
-        eltptr::Ptr{Cint} = C_NULL
-        eltvar::Ptr{Cint} = C_NULL
-        a_elt::Ptr{T} = C_NULL
-
-        blkptr::Ptr{Cint} = C_NULL
-        blkvar::Ptr{Cint} = C_NULL
-
-        perm_in::Ptr{Cint} = C_NULL
-
-        sym_perm::Ptr{Cint} = C_NULL
-        uns_perm::Ptr{Cint} = C_NULL
-
-        colsca::Ptr{T} = C_NULL
-        rowsca::Ptr{T} = C_NULL
-        colsca_from_mumps::Cint = 0
-        rowsca_from_mumps::Cint = 0
-
-        rhs::Ptr{T} = C_NULL
-        redrhs::Ptr{T} = C_NULL
-        rhs_sparse::Ptr{T} = C_NULL
-        sol_loc::Ptr{T} = C_NULL
-        rhs_loc::Ptr{T} = C_NULL
-
-        irhs_sparse::Ptr{Cint} = C_NULL
-        irhs_ptr::Ptr{Cint} = C_NULL
-        isol_loc::Ptr{Cint} = C_NULL
-        irhs_loc::Ptr{Cint} = C_NULL
-
-        nrhs::Cint = 0
-        lrhs::Cint = 0
-        lredrhs::Cint = 0
-        nz_rhs::Cint = 0
-        lsol_loc::Cint = 0
-        nloc_rhs::Cint = 0
-        lrhs_loc::Cint = 0
-
-        schur_mloc::Cint = 0
-        schur_nloc::Cint = 0
-        schur_lld::Cint = 0
-
-        mblock::Cint = 0
-        nblock::Cint = 0
-        nprow::Cint = 0
-        npcol::Cint = 0
-
-        info::SVector{80,Cint} = zeros(80)
-        infog::SVector{80,Cint} = zeros(80)
-        rinfo::SVector{40,T} = zeros(40)
-        rinfog::SVector{40,T} = zeros(40)
-
-        deficiency::Cint = 0
-        pivnul_list::Ptr{Cint} = C_NULL
-        mapping::Ptr{Cint} = C_NULL
-
-        size_schur::Cint = 0
-        listvar_schur::Ptr{Cint} = C_NULL
-        schur::Ptr{T} = C_NULL ##
-
-        instance_number::Cint = 0
-        wk_user::Ptr{T} = C_NULL
-
-        version_number::SVector{32,Cchar} = zeros(32)
-
-        ooc_tmpdir::SVector{256,Cchar} = zeros(256)
-        ooc_prefix::SVector{64,Cchar} = zeros(64)
-
-        write_problem::SVector{256,Cchar} = zeros(256)
-        lwk_user::Cint = 0
-
-        save_dir::SVector{256,Cchar} = zeros(256)
-        save_prefix::SVector{256,Cchar} = zeros(256)
-
-        metis_options::SVector{40,Cint} = zeros(40)
-    end
-elseif version == "5.2.1+4"
-    @kwdef mutable struct Struc{T}
-        sym::Cint = 0
-        par::Cint = 0
-        job::Cint = 0
-
-        comm_fortran::Cint = 0
-
-        icntl::SVector{60,Cint} = zeros(60)
-        keep::SVector{500,Cint} = zeros(500)
-        cntl::SVector{15,T} = zeros(15)
-        dkeep::SVector{230,T} = zeros(230)
-        keep8::SVector{150,Int64} = zeros(150)
-        n::Cint = 0
-
-        nz_alloc::Cint = 0
-
-        nz::Cint = 0
-        nnz::Int64 = 0
-        irn::Ptr{Cint} = C_NULL
-        jcn::Ptr{Cint} = C_NULL
-        a::Ptr{T} = C_NULL
-
-        nz_loc::Cint = 0
-        nnz_loc::Int64 = 0
-        irn_loc::Ptr{Cint} = C_NULL
-        jcn_loc::Ptr{Cint} = C_NULL
-        a_loc::Ptr{T} = C_NULL ###
-
-        nelt::Cint = 0
-        eltptr::Ptr{Cint} = C_NULL
-        eltvar::Ptr{Cint} = C_NULL
-        a_elt::Ptr{T} = C_NULL
-
-        perm_in::Ptr{Cint} = C_NULL
-
-        sym_perm::Ptr{Cint} = C_NULL
-        uns_perm::Ptr{Cint} = C_NULL
-
-        colsca::Ptr{T} = C_NULL
-        rowsca::Ptr{T} = C_NULL
-        colsca_from_mumps::Cint = 0
-        rowsca_from_mumps::Cint = 0
-
-        rhs::Ptr{T} = C_NULL
-        redrhs::Ptr{T} = C_NULL
-        rhs_sparse::Ptr{T} = C_NULL
-        sol_loc::Ptr{T} = C_NULL
-        rhs_loc::Ptr{T} = C_NULL
-
-        irhs_sparse::Ptr{Cint} = C_NULL
-        irhs_ptr::Ptr{Cint} = C_NULL
-        isol_loc::Ptr{Cint} = C_NULL
-        irhs_loc::Ptr{Cint} = C_NULL
-
-        nrhs::Cint = 0
-        lrhs::Cint = 0
-        lredrhs::Cint = 0
-        nz_rhs::Cint = 0
-        lsol_loc::Cint = 0
-        nloc_rhs::Cint = 0
-        lrhs_loc::Cint = 0
-
-        schur_mloc::Cint = 0
-        schur_nloc::Cint = 0
-        schur_lld::Cint = 0
-
-        mblock::Cint = 0
-        nblock::Cint = 0
-        nprow::Cint = 0
-        npcol::Cint = 0
-
-        info::SVector{80,Cint} = zeros(80)
-        infog::SVector{80,Cint} = zeros(80)
-        rinfo::SVector{40,T} = zeros(40)
-        rinfog::SVector{40,T} = zeros(40)
-
-        deficiency::Cint = 0
-        pivnul_list::Ptr{Cint} = C_NULL
-        mapping::Ptr{Cint} = C_NULL
-
-        size_schur::Cint = 0
-        listvar_schur::Ptr{Cint} = C_NULL
-        schur::Ptr{T} = C_NULL ##
-
-        instance_number::Cint = 0
-        wk_user::Ptr{T} = C_NULL
-
-        version_number::SVector{32,Cchar} = zeros(32)
-
-        ooc_tmpdir::SVector{256,Cchar} = zeros(256)
-        ooc_prefix::SVector{64,Cchar} = zeros(64)
-
-        write_problem::SVector{256,Cchar} = zeros(256)
-        lwk_user::Cint = 0
-
-        save_dir::SVector{256,Cchar} = zeros(256)
-        save_prefix::SVector{256,Cchar} = zeros(256)
-
-        metis_options::SVector{40,Cint} = zeros(40)
-    end
+@kwdef mutable struct Struc{T}
+    sym::Cint = 0
+    par::Cint = 0
+    job::Cint = 0
+
+    comm_fortran::Cint = 0
+
+    icntl::NTuple{60,Cint} = tzeros(60)
+    keep::NTuple{500,Cint} = tzeros(500)
+    cntl::NTuple{15,T} = tzeros(15)
+    dkeep::NTuple{230,T} = tzeros(230)
+    keep8::NTuple{150,Int64} = tzeros(150)
+    n::Cint = 0
+    nblk::Cint = 0
+
+    nz_alloc::Cint = 0
+
+    nz::Cint = 0
+    nnz::Int64 = 0
+    irn::Ptr{Cint} = C_NULL
+    jcn::Ptr{Cint} = C_NULL
+    a::Ptr{T} = C_NULL
+
+    nz_loc::Cint = 0
+    nnz_loc::Int64 = 0
+    irn_loc::Ptr{Cint} = C_NULL
+    jcn_loc::Ptr{Cint} = C_NULL
+    a_loc::Ptr{T} = C_NULL ###
+
+    nelt::Cint = 0
+    eltptr::Ptr{Cint} = C_NULL
+    eltvar::Ptr{Cint} = C_NULL
+    a_elt::Ptr{T} = C_NULL
+
+    blkptr::Ptr{Cint} = C_NULL
+    blkvar::Ptr{Cint} = C_NULL
+
+    perm_in::Ptr{Cint} = C_NULL
+
+    sym_perm::Ptr{Cint} = C_NULL
+    uns_perm::Ptr{Cint} = C_NULL
+
+    colsca::Ptr{T} = C_NULL
+    rowsca::Ptr{T} = C_NULL
+    colsca_from_mumps::Cint = 0
+    rowsca_from_mumps::Cint = 0
+
+    rhs::Ptr{T} = C_NULL
+    redrhs::Ptr{T} = C_NULL
+    rhs_sparse::Ptr{T} = C_NULL
+    sol_loc::Ptr{T} = C_NULL
+    rhs_loc::Ptr{T} = C_NULL
+
+    irhs_sparse::Ptr{Cint} = C_NULL
+    irhs_ptr::Ptr{Cint} = C_NULL
+    isol_loc::Ptr{Cint} = C_NULL
+    irhs_loc::Ptr{Cint} = C_NULL
+
+    nrhs::Cint = 0
+    lrhs::Cint = 0
+    lredrhs::Cint = 0
+    nz_rhs::Cint = 0
+    lsol_loc::Cint = 0
+    nloc_rhs::Cint = 0
+    lrhs_loc::Cint = 0
+
+    schur_mloc::Cint = 0
+    schur_nloc::Cint = 0
+    schur_lld::Cint = 0
+
+    mblock::Cint = 0
+    nblock::Cint = 0
+    nprow::Cint = 0
+    npcol::Cint = 0
+
+    info::NTuple{80,Cint} = tzeros(80)
+    infog::NTuple{80,Cint} = tzeros(80)
+    rinfo::NTuple{40,T} = tzeros(40)
+    rinfog::NTuple{40,T} = tzeros(40)
+
+    deficiency::Cint = 0
+    pivnul_list::Ptr{Cint} = C_NULL
+    mapping::Ptr{Cint} = C_NULL
+
+    size_schur::Cint = 0
+    listvar_schur::Ptr{Cint} = C_NULL
+    schur::Ptr{T} = C_NULL ##
+
+    instance_number::Cint = 0
+    wk_user::Ptr{T} = C_NULL
+
+    version_number::NTuple{32,Cchar} = tzeros(32)
+
+    ooc_tmpdir::NTuple{256,Cchar} = tzeros(256)
+    ooc_prefix::NTuple{64,Cchar} = tzeros(64)
+
+    write_problem::NTuple{256,Cchar} = tzeros(256)
+    lwk_user::Cint = 0
+
+    save_dir::NTuple{256,Cchar} = tzeros(256)
+    save_prefix::NTuple{256,Cchar} = tzeros(256)
+
+    metis_options::NTuple{40,Cint} = tzeros(40)
 end
 
 mutable struct MumpsSolver{T} <: AbstractLinearSolver{T}
@@ -379,4 +288,12 @@ is_supported(::Type{MumpsSolver},::Type{Float64}) = true
 
 export MumpsSolver
 
+# re-export MadNLP, including deprecated names
+for name in names(MadNLP, all=true)
+    if Base.isexported(MadNLP, name)
+        @eval using MadNLP: $(name)
+        @eval export $(name)
+    end
+end
+
 end # module
diff --git a/lib/MadNLPPardiso/Project.toml b/lib/MadNLPPardiso/Project.toml
index c110593c..b43194e7 100644
--- a/lib/MadNLPPardiso/Project.toml
+++ b/lib/MadNLPPardiso/Project.toml
@@ -1,7 +1,6 @@
 name = "MadNLPPardiso"
 uuid = "312ee924-cb12-49df-b284-7304c3902fc0"
-authors = ["Sungho Shin <sungho.shin.ss@gmail.com>"]
-version = "0.3.1"
+version = "0.3.4"
 
 [deps]
 MadNLP = "2621e9c9-9eb4-46b1-8089-e8c72242dfb6"
@@ -11,9 +10,9 @@ Libdl = "8f399da3-3557-5675-b5ff-fb832c97cbdb"
 MKL_jll = "856f044c-d86e-5d09-b602-aeab76dc8ba7"
 
 [compat]
-MadNLP = "0.5, 0.6"
+MadNLP = "0.5, 0.6, 0.7, 0.8"
 BinaryProvider = "0.5"
-MadNLPTests = "~0.3"
+MadNLPTests = "0.5"
 julia = "1.6"
 MKL_jll = "~2021,2022"
 
diff --git a/lib/MadNLPPardiso/src/MadNLPPardiso.jl b/lib/MadNLPPardiso/src/MadNLPPardiso.jl
index 2a50c3b9..32a943ac 100644
--- a/lib/MadNLPPardiso/src/MadNLPPardiso.jl
+++ b/lib/MadNLPPardiso/src/MadNLPPardiso.jl
@@ -23,4 +23,12 @@ end
 
 export PardisoSolver, PardisoMKLSolver
 
+# re-export MadNLP, including deprecated names
+for name in names(MadNLP, all=true)
+    if Base.isexported(MadNLP, name)
+        @eval using MadNLP: $(name)
+        @eval export $(name)
+    end
+end
+
 end # module
diff --git a/lib/MadNLPTests/Project.toml b/lib/MadNLPTests/Project.toml
index ba83ba12..e6b14eb3 100644
--- a/lib/MadNLPTests/Project.toml
+++ b/lib/MadNLPTests/Project.toml
@@ -1,21 +1,20 @@
 name = "MadNLPTests"
 uuid = "b52a2a03-04ab-4a5f-9698-6a2deff93217"
-authors = ["Sungho Shin <sungho.shin.ss@gmail.com>"]
-version = "0.3.1"
+version = "0.5.1"
 
 [deps]
-ADNLPModels = "54578032-b7ea-4c30-94aa-7cbd1cce6c9a"
 JuMP = "4076af6c-e467-56ae-b986-b466b2749572"
 LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
 MadNLP = "2621e9c9-9eb4-46b1-8089-e8c72242dfb6"
 NLPModels = "a4795742-8479-5a88-8948-cc11e1c8c1a6"
+NLPModelsJuMP = "792afdf1-32c1-5681-94e0-d7bf7a5df49e"
 Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
 SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
 
 [compat]
-ADNLPModels = "0.3, 0.4, 0.5"
 JuMP = "1"
-MadNLP = "0.5, 0.6"
-NLPModels = "~0.17.2, 0.18, 0.19"
+MadNLP = "0.5, 0.6, 0.7, 0.8"
+NLPModels = "~0.17.2, 0.18, 0.19, 0.20, 0.21"
+NLPModelsJuMP = "0.12, 0.13"
 julia = "1.6"
diff --git a/lib/MadNLPTests/src/Instances/dummy_qp.jl b/lib/MadNLPTests/src/Instances/dummy_qp.jl
index 80a8b9a3..27894d70 100644
--- a/lib/MadNLPTests/src/Instances/dummy_qp.jl
+++ b/lib/MadNLPTests/src/Instances/dummy_qp.jl
@@ -1,12 +1,18 @@
-struct DenseDummyQP{T} <: NLPModels.AbstractNLPModel{T,Vector{T}}
-    meta::NLPModels.NLPModelMeta{T, Vector{T}}
-    P::Matrix{T} # primal hessian
-    A::Matrix{T} # constraint jacobian
-    q::Vector{T}
-    hrows::Vector{Int}
-    hcols::Vector{Int}
-    jrows::Vector{Int}
-    jcols::Vector{Int}
+struct DenseDummyQP{
+    T,
+    VT <: AbstractVector{T},
+    MT <: AbstractMatrix{T},
+    VI <: AbstractVector{Int}
+    } <: NLPModels.AbstractNLPModel{T,VT}
+    meta::NLPModels.NLPModelMeta{T, VT}
+    P::MT # primal hessian
+    A::MT # constraint jacobian
+    q::VT
+    buffer::VT
+    hrows::VI
+    hcols::VI
+    jrows::VI
+    jcols::VI
     counters::NLPModels.Counters
 end
 
@@ -19,8 +25,9 @@ function NLPModels.hess_structure!(qp::DenseDummyQP, I::AbstractVector{T}, J::Ab
     copyto!(J, qp.hcols)
 end
 
-function NLPModels.obj(qp::DenseDummyQP, x::AbstractVector)
-    return 0.5 * dot(x, qp.P, x) + dot(qp.q, x)
+function NLPModels.obj(qp::DenseDummyQP{T}, x::AbstractVector{T}) where T
+    mul!(qp.buffer, qp.P, x)
+    return 0.5 * dot(x, qp.buffer) + dot(qp.q, x)
 end
 function NLPModels.grad!(qp::DenseDummyQP, x::AbstractVector, g::AbstractVector)
     mul!(g, qp.P, x)
@@ -65,49 +72,59 @@ function MadNLP.hess_dense!(qp::DenseDummyQP{T}, x, l,hess::AbstractMatrix; obj_
     copyto!(hess, obj_weight .* qp.P)
 end
 
-function DenseDummyQP{T}(; n=100, m=10, fixed_variables=Int[], equality_cons=[]) where T
+function DenseDummyQP(
+    x0::AbstractVector{T} = zeros(100);
+    m=10, fixed_variables=similar(x0,Int,0), equality_cons=similar(x0,Int,0)
+    ) where {T}
+
+    n = length(x0)
+
     if m >= n
         error("The number of constraints `m` should be less than the number of variable `n`.")
     end
 
     Random.seed!(1)
+    # Generate random values.
+    # N.B.: we need to allocate the matrix P_ right after the vector
+    # q_ if we want to obtain a deterministic behavior: the seed is not working
+    # if we call the function `randn` after allocating vectors on the device.
+    q_ = randn(n)
+    P_ = randn(n, n)
 
-    # Build QP problem 0.5 * x' * P * x + q' * x
-    P = randn(T,n , n)
-    P += P' # P is symmetric
-    P += T(100.0) * I
-
-    q = randn(T,n)
-
-    # Build constraints gl <= Ax <= gu
-    A = zeros(T,m, n)
-    for j in 1:m
-        A[j, j]  = one(T)
-        A[j, j+1]  = -one(T)
-    end
-
-    x0 = zeros(T,n)
-    y0 = zeros(T,m)
+    y0 = fill!(similar(x0, m), zero(T))
+    q = copyto!(similar(x0, n), q_)
+    buffer = similar(x0, n)
 
     # Bound constraints
-    xu = fill(one(T), n)
-    xl = fill(zero(T), n)
-    gl = fill(zero(T), m)
-    gu = fill(one(T), m)
+    xl = fill!(similar(x0, n), zero(T))
+    xu = fill!(similar(x0, n), one(T))
+    gl = fill!(similar(x0, m), zero(T))
+    gu = fill!(similar(x0, m), one(T))
+
     # Update gu to load equality constraints
     gu[equality_cons] .= zero(T)
+    xl[fixed_variables] .= @view(xu[fixed_variables])
+
+    # Build QP problem 0.5 * x' * P * x + q' * x
+    P = copyto!(similar(x0, n , n), P_)
+    P = P*P' # P is symmetric
+    P += T(100.0) * I
+
 
-    xl[fixed_variables] .= xu[fixed_variables]
+    # Build constraints gl <= Ax <= gu
+    A = fill!(similar(x0, m, n), zero(T))
+    A[1:m+1:m^2] .= one(T)
+    A[m+1:m+1:m^2+m] .=-one(T)
 
-    hrows = [i for i in 1:n for j in 1:i]
-    hcols = [j for i in 1:n for j in 1:i]
     nnzh = div(n * (n + 1), 2)
+    hrows = copyto!(similar(x0, Int, nnzh), [i for i in 1:n for j in 1:i])
+    hcols = copyto!(similar(x0, Int, nnzh), [j for i in 1:n for j in 1:i])
 
-    jrows = [j for i in 1:n for j in 1:m]
-    jcols = [i for i in 1:n for j in 1:m]
     nnzj = n * m
+    jrows = copyto!(similar(x0, Int, nnzj), [j for i in 1:n for j in 1:m])
+    jcols = copyto!(similar(x0, Int, nnzj), [i for i in 1:n for j in 1:m])
 
-    return DenseDummyQP{T}(
+    return DenseDummyQP(
         NLPModels.NLPModelMeta(
             n,
             ncon = m,
@@ -121,9 +138,8 @@ function DenseDummyQP{T}(; n=100, m=10, fixed_variables=Int[], equality_cons=[])
             ucon = gu,
             minimize = true
         ),
-        P,A,q,hrows,hcols,jrows,jcols,
+        P,A,q,buffer,
+        hrows,hcols,jrows,jcols,
         NLPModels.Counters()
     )
 end
-
-DenseDummyQP(; kwargs...) = DenseDummyQP{Float64}(; kwargs...)
diff --git a/lib/MadNLPTests/src/Instances/hs15.jl b/lib/MadNLPTests/src/Instances/hs15.jl
index eb679449..fd369015 100644
--- a/lib/MadNLPTests/src/Instances/hs15.jl
+++ b/lib/MadNLPTests/src/Instances/hs15.jl
@@ -1,9 +1,9 @@
-struct HS15Model <: NLPModels.AbstractNLPModel{Float64,Vector{Float64}}
-    meta::NLPModels.NLPModelMeta{Float64, Vector{Float64}}
+struct HS15Model{T} <: NLPModels.AbstractNLPModel{T,Vector{T}}
+    meta::NLPModels.NLPModelMeta{T, Vector{T}}
     counters::NLPModels.Counters
 end
 
-function HS15Model(; x0=zeros(2), y0=zeros(2))
+function HS15Model(;T = Float64, x0=zeros(T,2), y0=zeros(T,2))
     return HS15Model(
         NLPModels.NLPModelMeta(
             2,     #nvar
@@ -12,10 +12,10 @@ function HS15Model(; x0=zeros(2), y0=zeros(2))
             nnzh = 3,
             x0 = x0,
             y0 = y0,
-            lvar = [-Inf, -Inf],
-            uvar = [0.5, Inf],
-            lcon = [1.0, 0.0],
-            ucon = [Inf, Inf],
+            lvar = T[-Inf, -Inf],
+            uvar = T[0.5, Inf],
+            lcon = T[1.0, 0.0],
+            ucon = T[Inf, Inf],
             minimize = true
         ),
         NLPModels.Counters()
diff --git a/lib/MadNLPTests/src/Instances/hs15nohessian.jl b/lib/MadNLPTests/src/Instances/hs15nohessian.jl
new file mode 100644
index 00000000..52a2021f
--- /dev/null
+++ b/lib/MadNLPTests/src/Instances/hs15nohessian.jl
@@ -0,0 +1,65 @@
+struct HS15NoHessianModel{T} <: NLPModels.AbstractNLPModel{T,Vector{T}}
+    meta::NLPModels.NLPModelMeta{T, Vector{T}}
+    counters::NLPModels.Counters
+end
+
+function HS15NoHessianModel(;T = Float64, x0=zeros(T,2), y0=zeros(T,2))
+    return HS15NoHessianModel(
+        NLPModels.NLPModelMeta(
+            2,     #nvar
+            ncon = 2,
+            nnzj = 4,
+            nnzh = 0,
+            x0 = x0,
+            y0 = y0,
+            lvar = T[-Inf, -Inf],
+            uvar = T[0.5, Inf],
+            lcon = T[1.0, 0.0],
+            ucon = T[Inf, Inf],
+            minimize = true
+        ),
+        NLPModels.Counters()
+    )
+end
+
+function NLPModels.obj(nlp::HS15NoHessianModel, x::AbstractVector)
+    return 100.0 * (x[2] - x[1]^2)^2 + (1.0 - x[1])^2
+end
+
+function NLPModels.grad!(nlp::HS15NoHessianModel, x::AbstractVector, g::AbstractVector)
+    z = x[2] - x[1]^2
+    g[1] = -400.0 * z * x[1] - 2.0 * (1.0 - x[1])
+    g[2] = 200.0 * z
+    return
+end
+
+function NLPModels.cons!(nlp::HS15NoHessianModel, x::AbstractVector, c::AbstractVector)
+    c[1] = x[1] * x[2]
+    c[2] = x[1] + x[2]^2
+end
+
+function NLPModels.jac_structure!(nlp::HS15NoHessianModel, I::AbstractVector{T}, J::AbstractVector{T}) where T
+    copyto!(I, [1, 1, 2, 2])
+    copyto!(J, [1, 2, 1, 2])
+end
+
+function NLPModels.jac_coord!(nlp::HS15NoHessianModel, x::AbstractVector, J::AbstractVector)
+    J[1] = x[2]    # (1, 1)
+    J[2] = x[1]    # (1, 2)
+    J[3] = 1.0     # (2, 1)
+    J[4] = 2*x[2]  # (2, 2)
+    return J
+end
+
+function NLPModels.jprod!(nlp::HS15NoHessianModel, x::AbstractVector, v::AbstractVector, jv::AbstractVector)
+    jv[1] = x[2] * v[1] + x[1] * v[2]
+    jv[2] = v[1] + 2 * x[2] * v[2]
+    return jv
+end
+
+function NLPModels.jtprod!(nlp::HS15NoHessianModel, x::AbstractVector, v::AbstractVector, jv::AbstractVector)
+    jv[1] = x[2] * v[1] + v[2]
+    jv[2] = x[1] * v[1] + 2 * x[2] * v[2]
+    return jv
+end
+
diff --git a/lib/MadNLPTests/src/Instances/nls.jl b/lib/MadNLPTests/src/Instances/nls.jl
index 3d20116b..8886f21f 100644
--- a/lib/MadNLPTests/src/Instances/nls.jl
+++ b/lib/MadNLPTests/src/Instances/nls.jl
@@ -1,6 +1,93 @@
-F(x) = [x[1] - 1.0; 10 * (x[2] - x[1]^2)]
+struct NLSModel <: NLPModels.AbstractNLSModel{Float64,Vector{Float64}}
+    meta::NLPModels.NLPModelMeta{Float64, Vector{Float64}}
+    nls_meta::NLPModels.NLSMeta{Float64, Vector{Float64}}
+    counters::NLPModels.NLSCounters
+end
 
 function NLSModel()
     x0 = [-1.2; 1.0]
-    return ADNLSModel(F, x0, 2)
+    return NLSModel(
+        NLPModels.NLPModelMeta(
+            2,     #nvar
+            ncon = 0,
+            nnzj = 0,
+            nnzh = 3,
+            x0 = x0,
+            lvar = zeros(2),
+            uvar = ones(2),
+            minimize = true
+        ),
+        NLPModels.NLSMeta(2, 2; nnzj=3, nnzh=4),
+        NLPModels.NLSCounters()
+    )
+end
+
+function NLPModels.residual!(nls::NLSModel, x, Fx)
+    Fx[1] = x[1] - 1.0
+    Fx[2] = 10.0 * (x[2] - x[1]^2)
+    return Fx
+end
+
+function NLPModels.jac_structure_residual!(
+    nls::NLSModel,
+    rows::AbstractVector{<:Integer},
+    cols::AbstractVector{<:Integer},
+)
+    copyto!(rows, [1, 2, 2])
+    copyto!(cols, [1, 1, 2])
+    return rows, cols
+end
+
+function NLPModels.jac_coord_residual!(nls::NLSModel, x::AbstractVector, vals::AbstractVector)
+    vals[1] = 1.0
+    vals[2] = -20.0 * x[1]
+    vals[3] = 10.0
+    return vals
+end
+
+function NLPModels.jprod_residual!(nls::NLSModel, x::AbstractVector, v::AbstractVector, Jv::AbstractVector)
+    Jv[1] = v[1]
+    Jv[2] = -20.0 * x[1] * v[1] + 10.0 * v[2]
+    return Jv
+end
+
+function NLPModels.jtprod_residual!(nls::NLSModel, x::AbstractVector, v::AbstractVector, Jtv::AbstractVector)
+    Jtv[1] = v[1] - 20.0 * x[1] * v[2]
+    Jtv[2] = 10.0 * v[2]
+    return Jtv
+end
+
+function NLPModels.hess_structure_residual!(
+    nls::NLSModel,
+    rows::AbstractVector{<:Integer},
+    cols::AbstractVector{<:Integer},
+)
+    rows[1] = 1
+    cols[1] = 1
+    return rows, cols
+end
+
+function NLPModels.hess_coord_residual!(
+    nls::NLSModel,
+    x::AbstractVector,
+    v::AbstractVector,
+    vals::AbstractVector,
+)
+    vals[1] = -20.0 * v[2]
+    return vals
 end
+
+function NLPModels.hess_structure!(nlp::NLSModel, rows::AbstractVector{T}, cols::AbstractVector{T}) where T
+    copyto!(rows, [1, 2, 2])
+    copyto!(cols, [1, 1, 2])
+    return rows, cols
+end
+
+function NLPModels.hess_coord!(nlp::NLSModel, x, y, H::AbstractVector; obj_weight=1.0)
+    # Objective
+    H[1] = obj_weight * (1.0 - 200.0 * x[2] + 600 * x[1]^2)
+    H[2] = obj_weight * (-200.0 * x[1])
+    H[3] = obj_weight * 100.0
+    return H
+end
+
diff --git a/lib/MadNLPTests/src/MadNLPTests.jl b/lib/MadNLPTests/src/MadNLPTests.jl
index 0288e9b8..b5012fdf 100644
--- a/lib/MadNLPTests/src/MadNLPTests.jl
+++ b/lib/MadNLPTests/src/MadNLPTests.jl
@@ -9,9 +9,9 @@ import Test: @test, @testset
 # Optimization packages
 import MadNLP
 import NLPModels
-import JuMP: Model, @variable, @constraint, @objective, @NLconstraint , @NLobjective, optimize!,
+import JuMP: Model, @variable, @NLconstraint, @NLobjective, @NLconstraint , @NLobjective, optimize!,
     MOI, termination_status, LowerBoundRef, UpperBoundRef, value, dual
-import ADNLPModels: ADNLSModel
+import NLPModelsJuMP
 
 export test_madnlp, solcmp
 
@@ -21,8 +21,7 @@ function solcmp(x,sol;atol=1e-4,rtol=1e-4)
     return (aerr < atol || rerr < rtol)
 end
 
-function test_linear_solver(solver,T; kwargs...)
-
+function test_linear_solver(solver, T; kwargs...)
     m = 2
     n = 2
     row = Int32[1,2,2]
@@ -30,62 +29,132 @@ function test_linear_solver(solver,T; kwargs...)
     val = T[1.,.1,2.]
     b = T[1.0,3.0]
     x = similar(b)
+    sol= [0.8542713567839195, 1.4572864321608041]
 
-    @testset "Linear solver $solver" begin
-
-        csc = sparse(row,col,val,m,n)
-        sol= [0.8542713567839195, 1.4572864321608041]
-        if MadNLP.input_type(solver) == :csc
-            opt = MadNLP.default_options(solver)
-            M = solver(csc; opt=opt)
-        elseif MadNLP.input_type(solver) == :dense
-            dense = Array(csc)
-            opt = MadNLP.default_options(solver)
-            M = solver(dense; opt=opt)
-        end
-        MadNLP.introduce(M)
-        MadNLP.improve!(M)
-        MadNLP.factorize!(M)
-        if MadNLP.is_inertia(M)
-            @test MadNLP.inertia(M) == (2, 0, 0)
-        end
-        x = MadNLP.solve!(M,copy(b))
-        @test solcmp(x,sol)
+    csc = sparse(row,col,val,m,n)
+    if MadNLP.input_type(solver) == :csc
+        opt = MadNLP.default_options(solver)
+        M = solver(csc; opt=opt)
+    elseif MadNLP.input_type(solver) == :dense
+        dense = Array(csc)
+        opt = MadNLP.default_options(solver)
+        M = solver(dense; opt=opt)
+    end
+    MadNLP.introduce(M)
+    MadNLP.improve!(M)
+    MadNLP.factorize!(M)
+    if MadNLP.is_inertia(M)
+        @test MadNLP.inertia(M) == (2, 0, 0)
     end
+    x = MadNLP.solve!(M,copy(b))
+    @test solcmp(x,sol)
 end
 
-function test_madnlp(name,optimizer_constructor::Function,exclude)
+function test_kkt_system(kkt, cb)
+    # Getters
+    n = MadNLP.num_variables(kkt)
+    (m, p) = size(kkt)
+    # system should be square
+    @test m == p
+
+    # Interface
+    MadNLP.initialize!(kkt)
+
+    # Update internal structure
+    x0 = NLPModels.get_x0(cb.nlp)
+    y0 = NLPModels.get_y0(cb.nlp)
+    # Update Jacobian manually
+    jac = MadNLP.get_jacobian(kkt)
+    MadNLP._eval_jac_wrapper!(cb, x0, jac)
+    MadNLP.compress_jacobian!(kkt)
+    # Update Hessian manually
+    hess = MadNLP.get_hessian(kkt)
+    MadNLP._eval_lag_hess_wrapper!(cb, x0, y0, hess)
+    MadNLP.compress_hessian!(kkt)
+
+    # N.B.: set non-trivial dual's bounds to ensure
+    # l_lower and u_lower are positive. If not we run into
+    # an issue inside SparseUnreducedKKTSystem, which symmetrize
+    # the system using the values in l_lower and u_lower.
+    fill!(kkt.l_lower, 1e-3)
+    fill!(kkt.u_lower, 1e-3)
+
+    # Update diagonal terms manually.
+    MadNLP._set_aug_diagonal!(kkt)
+
+    # Factorization
+    MadNLP.build_kkt!(kkt)
+    MadNLP.factorize!(kkt.linear_solver)
+
+    # Backsolve
+    x = MadNLP.UnreducedKKTVector(kkt)
+    fill!(MadNLP.full(x), 1.0)  # fill RHS with 1
+    out1 = MadNLP.solve!(kkt, x)
+    @test out1 === x
+
+    y = copy(x)
+    fill!(MadNLP.full(y), 0.0)
+    out2 = mul!(y, kkt, x)
+    @test out2 === y
+    @test MadNLP.full(y) ≈ ones(length(x))
+
+    if MadNLP.is_inertia(kkt.linear_solver)
+        ni, mi, pi = MadNLP.inertia(kkt.linear_solver)
+        @test MadNLP.is_inertia_correct(kkt, ni, mi, pi)
+    end
+
+    prim_reg, dual_reg = 1.0, 1.0
+    MadNLP.regularize_diagonal!(kkt, prim_reg, dual_reg)
+
+    return
+end
+
+function test_madnlp(name,optimizer_constructor::Function,exclude; Arr = Array)
     @testset "$name" begin
-        for f in [infeasible,unbounded,lootsma,eigmina]
-            !(string(f) in exclude) && f(optimizer_constructor)
+        for f in [infeasible,unbounded,lootsma,eigmina,lp_examodels_issue75]
+            !(string(f) in exclude) && f(optimizer_constructor; Arr = Arr)
         end
     end
 end
 
-function infeasible(optimizer_constructor::Function)
+function infeasible(optimizer_constructor::Function; Arr = Array)
     @testset "infeasible" begin
         m=Model(optimizer_constructor)
         @variable(m,x>=1)
-        @constraint(m,x==0.)
-        @objective(m,Min,x^2)
-        optimize!(m)
-        @test termination_status(m) == MOI.LOCALLY_INFEASIBLE
+        @NLconstraint(m,x==0.)
+        @NLobjective(m,Min,x^2)
+
+        nlp = SparseWrapperModel(
+            Arr,
+            NLPModelsJuMP.MathOptNLPModel(m)
+        )
+        optimizer = optimizer_constructor()
+        result = MadNLP.madnlp(nlp; optimizer.options...)
+
+        @test result.status == MadNLP.INFEASIBLE_PROBLEM_DETECTED
     end
 end
 
-function unbounded(optimizer_constructor::Function)
+function unbounded(optimizer_constructor::Function; Arr = Array)
     @testset "unbounded" begin
         m=Model(optimizer_constructor)
         @variable(m,x,start=1)
-        @objective(m,Max,x^2)
-        optimize!(m)
-        @test termination_status(m) == MOI.INFEASIBLE_OR_UNBOUNDED
+        @NLobjective(m,Max,x^2)
+
+        nlp = SparseWrapperModel(
+            Arr,
+            NLPModelsJuMP.MathOptNLPModel(m)
+        )
+        optimizer = optimizer_constructor()
+        result = MadNLP.madnlp(nlp; optimizer.options...)
+
+        @test result.status == MadNLP.DIVERGING_ITERATES
     end
 end
 
-function lootsma(optimizer_constructor::Function)
+function lootsma(optimizer_constructor::Function; Arr = Array)
     @testset "lootsma" begin
-        m=Model(optimizer_constructor)
+        m=Model()
         @variable(m, par == 6.)
         @variable(m,0 <= x[i=1:3] <= 5, start = 0.)
         l=[
@@ -94,22 +163,45 @@ function lootsma(optimizer_constructor::Function)
         ]
         @NLobjective(m,Min,x[1]^3 + 11. *x[1] - par*sqrt(x[1])  +x[3] )
 
-        optimize!(m)
 
-        @test solcmp(value.(x),[0.07415998565403112,2.9848713863700236,4.0000304145340415])
-        @test solcmp(dual.(l),[2.000024518601535,2.0000305441119535])
-        @test solcmp(dual.(LowerBoundRef.(x)),[0.,0.,0.])
-        @test solcmp(dual.(UpperBoundRef.(x)),[0.,0.,0.])
+        nlp = SparseWrapperModel(
+            Arr,
+            NLPModelsJuMP.MathOptNLPModel(m)
+        )
+
+        optimizer = optimizer_constructor()
+        result = MadNLP.madnlp(nlp; optimizer.options...)
+
+        @test solcmp(
+            Array(result.solution[2:4]),
+            [0.07415998565403112,2.9848713863700236,4.0000304145340415];
+            atol = sqrt(result.options.tol), rtol = sqrt(result.options.tol)
+        )
+        @test solcmp(
+            Array(result.multipliers),
+            [-2.000024518601535,-2.0000305441119535];
+            atol = sqrt(result.options.tol), rtol = sqrt(result.options.tol)
+        )
+        @test solcmp(
+            Array(result.multipliers_L[2:4]),
+            [0.,0.,0.];
+            atol = sqrt(result.options.tol), rtol = sqrt(result.options.tol)
+        )
+        @test solcmp(
+            Array(result.multipliers_U[2:4]),
+            [0.,0.,0.];
+            atol = sqrt(result.options.tol), rtol = sqrt(result.options.tol)
+        )
 
-        @test termination_status(m) == MOI.LOCALLY_SOLVED
+        @test result.status == MadNLP.SOLVE_SUCCEEDED
     end
 end
 
-function eigmina(optimizer_constructor::Function)
+function eigmina(optimizer_constructor::Function; Arr = Array)
     @testset "eigmina" begin
         m=Model(optimizer_constructor)
         @variable(m,-1 <= x[1:101] <= 1,start = .1)
-        @constraint(m, x[1]*x[1] + x[2]*x[2] + x[3]*x[3] + x[4]*x[4] + x[5]*x[5] + x[6]*x[6] +
+        @NLconstraint(m, x[1]*x[1] + x[2]*x[2] + x[3]*x[3] + x[4]*x[4] + x[5]*x[5] + x[6]*x[6] +
             x[7]*x[7] + x[8]*x[8] + x[9]*x[9] + x[10]*x[10] + x[11]*x[11] + x[12]*x[12] +
             x[13]*x[13] + x[14]*x[14] + x[15]*x[15] + x[16]*x[16] + x[17]*x[17] + x[18]*x[18] +
             x[19]*x[19] + x[20]*x[20] + x[21]*x[21] + x[22]*x[22] + x[23]*x[23] + x[24]*x[24] +
@@ -126,115 +218,144 @@ function eigmina(optimizer_constructor::Function)
             x[85]*x[85] + x[86]*x[86] + x[87]*x[87] + x[88]*x[88] + x[89]*x[89] + x[90]*x[90] +
             x[91]*x[91] + x[92]*x[92] + x[93]*x[93] + x[94]*x[94] + x[95]*x[95] + x[96]*x[96] +
             x[97]*x[97] + x[98]*x[98] + x[99]*x[99] + x[100]*x[100] == 1)
-        @constraint(m, x[1]*x[101] - x[1] == 0)
-        @constraint(m, x[2]*x[101] - 2*x[2] == 0)
-        @constraint(m, x[3]*x[101] - 3*x[3] == 0)
-        @constraint(m, x[4]*x[101] - 4*x[4] == 0)
-        @constraint(m, x[5]*x[101] - 5*x[5] == 0)
-        @constraint(m, x[6]*x[101] - 6*x[6] == 0)
-        @constraint(m, x[7]*x[101] - 7*x[7] == 0)
-        @constraint(m, x[8]*x[101] - 8*x[8] == 0)
-        @constraint(m, x[9]*x[101] - 9*x[9] == 0)
-        @constraint(m, x[10]*x[101] - 10*x[10] == 0)
-        @constraint(m, x[11]*x[101] - 11*x[11] == 0)
-        @constraint(m, x[12]*x[101] - 12*x[12] == 0)
-        @constraint(m, x[13]*x[101] - 13*x[13] == 0)
-        @constraint(m, x[14]*x[101] - 14*x[14] == 0)
-        @constraint(m, x[15]*x[101] - 15*x[15] == 0)
-        @constraint(m, x[16]*x[101] - 16*x[16] == 0)
-        @constraint(m, x[17]*x[101] - 17*x[17] == 0)
-        @constraint(m, x[18]*x[101] - 18*x[18] == 0)
-        @constraint(m, x[19]*x[101] - 19*x[19] == 0)
-        @constraint(m, x[20]*x[101] - 20*x[20] == 0)
-        @constraint(m, x[21]*x[101] - 21*x[21] == 0)
-        @constraint(m, x[22]*x[101] - 22*x[22] == 0)
-        @constraint(m, x[23]*x[101] - 23*x[23] == 0)
-        @constraint(m, x[24]*x[101] - 24*x[24] == 0)
-        @constraint(m, x[25]*x[101] - 25*x[25] == 0)
-        @constraint(m, x[26]*x[101] - 26*x[26] == 0)
-        @constraint(m, x[27]*x[101] - 27*x[27] == 0)
-        @constraint(m, x[28]*x[101] - 28*x[28] == 0)
-        @constraint(m, x[29]*x[101] - 29*x[29] == 0)
-        @constraint(m, x[30]*x[101] - 30*x[30] == 0)
-        @constraint(m, x[31]*x[101] - 31*x[31] == 0)
-        @constraint(m, x[32]*x[101] - 32*x[32] == 0)
-        @constraint(m, x[33]*x[101] - 33*x[33] == 0)
-        @constraint(m, x[34]*x[101] - 34*x[34] == 0)
-        @constraint(m, x[35]*x[101] - 35*x[35] == 0)
-        @constraint(m, x[36]*x[101] - 36*x[36] == 0)
-        @constraint(m, x[37]*x[101] - 37*x[37] == 0)
-        @constraint(m, x[38]*x[101] - 38*x[38] == 0)
-        @constraint(m, x[39]*x[101] - 39*x[39] == 0)
-        @constraint(m, x[40]*x[101] - 40*x[40] == 0)
-        @constraint(m, x[41]*x[101] - 41*x[41] == 0)
-        @constraint(m, x[42]*x[101] - 42*x[42] == 0)
-        @constraint(m, x[43]*x[101] - 43*x[43] == 0)
-        @constraint(m, x[44]*x[101] - 44*x[44] == 0)
-        @constraint(m, x[45]*x[101] - 45*x[45] == 0)
-        @constraint(m, x[46]*x[101] - 46*x[46] == 0)
-        @constraint(m, x[47]*x[101] - 47*x[47] == 0)
-        @constraint(m, x[48]*x[101] - 48*x[48] == 0)
-        @constraint(m, x[49]*x[101] - 49*x[49] == 0)
-        @constraint(m, x[50]*x[101] - 50*x[50] == 0)
-        @constraint(m, x[51]*x[101] - 51*x[51] == 0)
-        @constraint(m, x[52]*x[101] - 52*x[52] == 0)
-        @constraint(m, x[53]*x[101] - 53*x[53] == 0)
-        @constraint(m, x[54]*x[101] - 54*x[54] == 0)
-        @constraint(m, x[55]*x[101] - 55*x[55] == 0)
-        @constraint(m, x[56]*x[101] - 56*x[56] == 0)
-        @constraint(m, x[57]*x[101] - 57*x[57] == 0)
-        @constraint(m, x[58]*x[101] - 58*x[58] == 0)
-        @constraint(m, x[59]*x[101] - 59*x[59] == 0)
-        @constraint(m, x[60]*x[101] - 60*x[60] == 0)
-        @constraint(m, x[61]*x[101] - 61*x[61] == 0)
-        @constraint(m, x[62]*x[101] - 62*x[62] == 0)
-        @constraint(m, x[63]*x[101] - 63*x[63] == 0)
-        @constraint(m, x[64]*x[101] - 64*x[64] == 0)
-        @constraint(m, x[65]*x[101] - 65*x[65] == 0)
-        @constraint(m, x[66]*x[101] - 66*x[66] == 0)
-        @constraint(m, x[67]*x[101] - 67*x[67] == 0)
-        @constraint(m, x[68]*x[101] - 68*x[68] == 0)
-        @constraint(m, x[69]*x[101] - 69*x[69] == 0)
-        @constraint(m, x[70]*x[101] - 70*x[70] == 0)
-        @constraint(m, x[71]*x[101] - 71*x[71] == 0)
-        @constraint(m, x[72]*x[101] - 72*x[72] == 0)
-        @constraint(m, x[73]*x[101] - 73*x[73] == 0)
-        @constraint(m, x[74]*x[101] - 74*x[74] == 0)
-        @constraint(m, x[75]*x[101] - 75*x[75] == 0)
-        @constraint(m, x[76]*x[101] - 76*x[76] == 0)
-        @constraint(m, x[77]*x[101] - 77*x[77] == 0)
-        @constraint(m, x[78]*x[101] - 78*x[78] == 0)
-        @constraint(m, x[79]*x[101] - 79*x[79] == 0)
-        @constraint(m, x[80]*x[101] - 80*x[80] == 0)
-        @constraint(m, x[81]*x[101] - 81*x[81] == 0)
-        @constraint(m, x[82]*x[101] - 82*x[82] == 0)
-        @constraint(m, x[83]*x[101] - 83*x[83] == 0)
-        @constraint(m, x[84]*x[101] - 84*x[84] == 0)
-        @constraint(m, x[85]*x[101] - 85*x[85] == 0)
-        @constraint(m, x[86]*x[101] - 86*x[86] == 0)
-        @constraint(m, x[87]*x[101] - 87*x[87] == 0)
-        @constraint(m, x[88]*x[101] - 88*x[88] == 0)
-        @constraint(m, x[89]*x[101] - 89*x[89] == 0)
-        @constraint(m, x[90]*x[101] - 90*x[90] == 0)
-        @constraint(m, x[91]*x[101] - 91*x[91] == 0)
-        @constraint(m, x[92]*x[101] - 92*x[92] == 0)
-        @constraint(m, x[93]*x[101] - 93*x[93] == 0)
-        @constraint(m, x[94]*x[101] - 94*x[94] == 0)
-        @constraint(m, x[95]*x[101] - 95*x[95] == 0)
-        @constraint(m, x[96]*x[101] - 96*x[96] == 0)
-        @constraint(m, x[97]*x[101] - 97*x[97] == 0)
-        @constraint(m, x[98]*x[101] - 98*x[98] == 0)
-        @constraint(m, x[99]*x[101] - 99*x[99] == 0)
-        @constraint(m, x[100]*x[101] - 100*x[100] == 0)
-        @objective(m, Min, x[101])
-        optimize!(m)
-
-        @test termination_status(m) == MOI.LOCALLY_SOLVED
+        @NLconstraint(m, x[1]*x[101] - x[1] == 0)
+        @NLconstraint(m, x[2]*x[101] - 2*x[2] == 0)
+        @NLconstraint(m, x[3]*x[101] - 3*x[3] == 0)
+        @NLconstraint(m, x[4]*x[101] - 4*x[4] == 0)
+        @NLconstraint(m, x[5]*x[101] - 5*x[5] == 0)
+        @NLconstraint(m, x[6]*x[101] - 6*x[6] == 0)
+        @NLconstraint(m, x[7]*x[101] - 7*x[7] == 0)
+        @NLconstraint(m, x[8]*x[101] - 8*x[8] == 0)
+        @NLconstraint(m, x[9]*x[101] - 9*x[9] == 0)
+        @NLconstraint(m, x[10]*x[101] - 10*x[10] == 0)
+        @NLconstraint(m, x[11]*x[101] - 11*x[11] == 0)
+        @NLconstraint(m, x[12]*x[101] - 12*x[12] == 0)
+        @NLconstraint(m, x[13]*x[101] - 13*x[13] == 0)
+        @NLconstraint(m, x[14]*x[101] - 14*x[14] == 0)
+        @NLconstraint(m, x[15]*x[101] - 15*x[15] == 0)
+        @NLconstraint(m, x[16]*x[101] - 16*x[16] == 0)
+        @NLconstraint(m, x[17]*x[101] - 17*x[17] == 0)
+        @NLconstraint(m, x[18]*x[101] - 18*x[18] == 0)
+        @NLconstraint(m, x[19]*x[101] - 19*x[19] == 0)
+        @NLconstraint(m, x[20]*x[101] - 20*x[20] == 0)
+        @NLconstraint(m, x[21]*x[101] - 21*x[21] == 0)
+        @NLconstraint(m, x[22]*x[101] - 22*x[22] == 0)
+        @NLconstraint(m, x[23]*x[101] - 23*x[23] == 0)
+        @NLconstraint(m, x[24]*x[101] - 24*x[24] == 0)
+        @NLconstraint(m, x[25]*x[101] - 25*x[25] == 0)
+        @NLconstraint(m, x[26]*x[101] - 26*x[26] == 0)
+        @NLconstraint(m, x[27]*x[101] - 27*x[27] == 0)
+        @NLconstraint(m, x[28]*x[101] - 28*x[28] == 0)
+        @NLconstraint(m, x[29]*x[101] - 29*x[29] == 0)
+        @NLconstraint(m, x[30]*x[101] - 30*x[30] == 0)
+        @NLconstraint(m, x[31]*x[101] - 31*x[31] == 0)
+        @NLconstraint(m, x[32]*x[101] - 32*x[32] == 0)
+        @NLconstraint(m, x[33]*x[101] - 33*x[33] == 0)
+        @NLconstraint(m, x[34]*x[101] - 34*x[34] == 0)
+        @NLconstraint(m, x[35]*x[101] - 35*x[35] == 0)
+        @NLconstraint(m, x[36]*x[101] - 36*x[36] == 0)
+        @NLconstraint(m, x[37]*x[101] - 37*x[37] == 0)
+        @NLconstraint(m, x[38]*x[101] - 38*x[38] == 0)
+        @NLconstraint(m, x[39]*x[101] - 39*x[39] == 0)
+        @NLconstraint(m, x[40]*x[101] - 40*x[40] == 0)
+        @NLconstraint(m, x[41]*x[101] - 41*x[41] == 0)
+        @NLconstraint(m, x[42]*x[101] - 42*x[42] == 0)
+        @NLconstraint(m, x[43]*x[101] - 43*x[43] == 0)
+        @NLconstraint(m, x[44]*x[101] - 44*x[44] == 0)
+        @NLconstraint(m, x[45]*x[101] - 45*x[45] == 0)
+        @NLconstraint(m, x[46]*x[101] - 46*x[46] == 0)
+        @NLconstraint(m, x[47]*x[101] - 47*x[47] == 0)
+        @NLconstraint(m, x[48]*x[101] - 48*x[48] == 0)
+        @NLconstraint(m, x[49]*x[101] - 49*x[49] == 0)
+        @NLconstraint(m, x[50]*x[101] - 50*x[50] == 0)
+        @NLconstraint(m, x[51]*x[101] - 51*x[51] == 0)
+        @NLconstraint(m, x[52]*x[101] - 52*x[52] == 0)
+        @NLconstraint(m, x[53]*x[101] - 53*x[53] == 0)
+        @NLconstraint(m, x[54]*x[101] - 54*x[54] == 0)
+        @NLconstraint(m, x[55]*x[101] - 55*x[55] == 0)
+        @NLconstraint(m, x[56]*x[101] - 56*x[56] == 0)
+        @NLconstraint(m, x[57]*x[101] - 57*x[57] == 0)
+        @NLconstraint(m, x[58]*x[101] - 58*x[58] == 0)
+        @NLconstraint(m, x[59]*x[101] - 59*x[59] == 0)
+        @NLconstraint(m, x[60]*x[101] - 60*x[60] == 0)
+        @NLconstraint(m, x[61]*x[101] - 61*x[61] == 0)
+        @NLconstraint(m, x[62]*x[101] - 62*x[62] == 0)
+        @NLconstraint(m, x[63]*x[101] - 63*x[63] == 0)
+        @NLconstraint(m, x[64]*x[101] - 64*x[64] == 0)
+        @NLconstraint(m, x[65]*x[101] - 65*x[65] == 0)
+        @NLconstraint(m, x[66]*x[101] - 66*x[66] == 0)
+        @NLconstraint(m, x[67]*x[101] - 67*x[67] == 0)
+        @NLconstraint(m, x[68]*x[101] - 68*x[68] == 0)
+        @NLconstraint(m, x[69]*x[101] - 69*x[69] == 0)
+        @NLconstraint(m, x[70]*x[101] - 70*x[70] == 0)
+        @NLconstraint(m, x[71]*x[101] - 71*x[71] == 0)
+        @NLconstraint(m, x[72]*x[101] - 72*x[72] == 0)
+        @NLconstraint(m, x[73]*x[101] - 73*x[73] == 0)
+        @NLconstraint(m, x[74]*x[101] - 74*x[74] == 0)
+        @NLconstraint(m, x[75]*x[101] - 75*x[75] == 0)
+        @NLconstraint(m, x[76]*x[101] - 76*x[76] == 0)
+        @NLconstraint(m, x[77]*x[101] - 77*x[77] == 0)
+        @NLconstraint(m, x[78]*x[101] - 78*x[78] == 0)
+        @NLconstraint(m, x[79]*x[101] - 79*x[79] == 0)
+        @NLconstraint(m, x[80]*x[101] - 80*x[80] == 0)
+        @NLconstraint(m, x[81]*x[101] - 81*x[81] == 0)
+        @NLconstraint(m, x[82]*x[101] - 82*x[82] == 0)
+        @NLconstraint(m, x[83]*x[101] - 83*x[83] == 0)
+        @NLconstraint(m, x[84]*x[101] - 84*x[84] == 0)
+        @NLconstraint(m, x[85]*x[101] - 85*x[85] == 0)
+        @NLconstraint(m, x[86]*x[101] - 86*x[86] == 0)
+        @NLconstraint(m, x[87]*x[101] - 87*x[87] == 0)
+        @NLconstraint(m, x[88]*x[101] - 88*x[88] == 0)
+        @NLconstraint(m, x[89]*x[101] - 89*x[89] == 0)
+        @NLconstraint(m, x[90]*x[101] - 90*x[90] == 0)
+        @NLconstraint(m, x[91]*x[101] - 91*x[91] == 0)
+        @NLconstraint(m, x[92]*x[101] - 92*x[92] == 0)
+        @NLconstraint(m, x[93]*x[101] - 93*x[93] == 0)
+        @NLconstraint(m, x[94]*x[101] - 94*x[94] == 0)
+        @NLconstraint(m, x[95]*x[101] - 95*x[95] == 0)
+        @NLconstraint(m, x[96]*x[101] - 96*x[96] == 0)
+        @NLconstraint(m, x[97]*x[101] - 97*x[97] == 0)
+        @NLconstraint(m, x[98]*x[101] - 98*x[98] == 0)
+        @NLconstraint(m, x[99]*x[101] - 99*x[99] == 0)
+        @NLconstraint(m, x[100]*x[101] - 100*x[100] == 0)
+        @NLobjective(m, Min, x[101])
+
+        nlp = SparseWrapperModel(
+            Arr,
+            NLPModelsJuMP.MathOptNLPModel(m)
+        )
+        optimizer = optimizer_constructor()
+        result = MadNLP.madnlp(nlp; optimizer.options...)
+
+        @test result.status == MadNLP.SOLVE_SUCCEEDED
+    end
+end
+
+function lp_examodels_issue75(optimizer_constructor::Function; Arr = Array)
+    @testset "lp_examodels_issue75" begin
+        
+        m = Model()
+        @variable(m, x >= 0)
+        @variable(m, 0 <= y <= 3)
+        @NLobjective(m, Min, 12x + 20y)
+        @NLconstraint(m, c1, 6x + 8y >= 100)
+        @NLconstraint(m, c2, 7x + 12y >= 120)
+
+        nlp = SparseWrapperModel(
+            Arr,
+            NLPModelsJuMP.MathOptNLPModel(m)
+        )
+        optimizer = optimizer_constructor()
+        result = MadNLP.madnlp(nlp; optimizer.options...)
+
+        @test result.status == MadNLP.SOLVE_SUCCEEDED
     end
 end
 
 include("Instances/dummy_qp.jl")
 include("Instances/hs15.jl")
+include("Instances/hs15nohessian.jl")
 include("Instances/nls.jl")
+include("wrapper.jl")
 
 end # module
diff --git a/lib/MadNLPTests/src/wrapper.jl b/lib/MadNLPTests/src/wrapper.jl
new file mode 100644
index 00000000..86098c05
--- /dev/null
+++ b/lib/MadNLPTests/src/wrapper.jl
@@ -0,0 +1,215 @@
+abstract type AbstractWrapperModel{T,VT} <: NLPModels.AbstractNLPModel{T,VT} end
+
+struct DenseWrapperModel{T,VT,T2,VT2,MT2, I <: NLPModels.AbstractNLPModel{T2,VT2}} <: AbstractWrapperModel{T,VT}
+    inner::I
+    
+    x::VT2
+    y::VT2
+    
+    con::VT2
+    grad::VT2
+    jac::MT2
+    hess::MT2
+    
+    meta::NLPModels.NLPModelMeta{T, VT}
+    counters::NLPModels.Counters 
+end
+
+
+struct SparseWrapperModel{T,VT,T2,VI2,VT2,I <: NLPModels.AbstractNLPModel{T2,VT2}} <: AbstractWrapperModel{T,VT}
+    inner::I
+    
+    jrows::VI2
+    jcols::VI2
+    hrows::VI2
+    hcols::VI2
+    
+    x::VT2
+    y::VT2
+    
+    con::VT2
+    grad::VT2
+    jac::VT2
+    hess::VT2
+    
+    meta::NLPModels.NLPModelMeta{T, VT}
+    counters::NLPModels.Counters 
+end
+
+
+"""
+DenseWrapperModel(Arr, m)
+
+Construct a DenseWrapperModel (a subtype of `NLPModels.AbstractNLPModel{T,typeof(Arr(m.meta.x0))}`) from a generic NLP Model.
+
+DenseWrapperModel can be used to interface GPU-accelerated NLP models with solvers runing on CPUs.
+"""
+function DenseWrapperModel(Arr, m::NLPModels.AbstractNLPModel)
+    return DenseWrapperModel(
+        m,
+        similar(m.meta.x0, m.meta.nvar),
+        similar(m.meta.x0, m.meta.ncon),
+        similar(m.meta.x0, m.meta.ncon),
+        similar(m.meta.x0, m.meta.nvar),
+        similar(m.meta.x0, m.meta.ncon, m.meta.nvar),
+        similar(m.meta.x0, m.meta.nvar, m.meta.nvar),
+        NLPModels.NLPModelMeta(
+            m.meta.nvar,
+            x0 = Arr(m.meta.x0),
+            lvar = Arr(m.meta.lvar),
+            uvar = Arr(m.meta.uvar),
+            ncon = m.meta.ncon,
+            y0 = Arr(m.meta.y0),
+            lcon = Arr(m.meta.lcon),
+            ucon = Arr(m.meta.ucon),
+            nnzj = m.meta.nnzj,
+            nnzh = m.meta.nnzh,
+            minimize = m.meta.minimize
+        ),
+        NLPModels.Counters()
+    )
+end
+
+"""
+SparseWrapperModel(Arr, m)
+
+Construct a SparseWrapperModel (a subtype of `NLPModels.AbstractNLPModel{T,typeof(Arr(m.meta.x0))}`) from a generic NLP Model.
+
+SparseWrapperModel can be used to interface GPU-accelerated NLP models with solvers runing on CPUs.
+"""
+function SparseWrapperModel(Arr, m::NLPModels.AbstractNLPModel)
+    return SparseWrapperModel(
+        m,
+        similar(m.meta.x0, Int, m.meta.nnzj),
+        similar(m.meta.x0, Int, m.meta.nnzj),
+        similar(m.meta.x0, Int, m.meta.nnzh),
+        similar(m.meta.x0, Int, m.meta.nnzh),
+        similar(m.meta.x0, m.meta.nvar),
+        similar(m.meta.x0, m.meta.ncon),
+        similar(m.meta.x0, m.meta.ncon),
+        similar(m.meta.x0, m.meta.nvar),
+        similar(m.meta.x0, m.meta.nnzj),
+        similar(m.meta.x0, m.meta.nnzh),
+        NLPModels.NLPModelMeta(
+            m.meta.nvar,
+            x0 = Arr(m.meta.x0),
+            lvar = Arr(m.meta.lvar),
+            uvar = Arr(m.meta.uvar),
+            ncon = m.meta.ncon,
+            y0 = Arr(m.meta.y0),
+            lcon = Arr(m.meta.lcon),
+            ucon = Arr(m.meta.ucon),
+            nnzj = m.meta.nnzj,
+            nnzh = m.meta.nnzh,
+            minimize = m.meta.minimize
+        ),
+        NLPModels.Counters()
+    )
+end
+
+function NLPModels.obj(
+    m::M,
+    x::V
+    ) where {M <: AbstractWrapperModel, V <: AbstractVector}
+
+    copyto!(m.x, x)
+    return NLPModels.obj(m.inner, m.x)
+end
+function NLPModels.cons!(
+    m::M,
+    x::V,
+    g::V
+    ) where {M <: AbstractWrapperModel, V <: AbstractVector}
+
+    copyto!(m.x, x) 
+    NLPModels.cons!(m.inner, m.x, m.con)
+    copyto!(g, m.con)
+    return 
+end
+function NLPModels.grad!(
+    m::M,
+    x::V,
+    f::V
+    ) where {M <: AbstractWrapperModel, V <: AbstractVector}
+
+    copyto!(m.x, x)
+    NLPModels.grad!(m.inner, m.x, m.grad)
+    copyto!(f, m.grad)
+    return
+end
+
+function NLPModels.jac_structure!(
+    m::M,
+    rows::V,
+    cols::V
+    ) where {M <: SparseWrapperModel, V <: AbstractVector}
+    
+    NLPModels.jac_structure!(m.inner, m.jrows, m.jcols)
+    copyto!(rows, m.jrows)
+    copyto!(cols, m.jcols)
+end
+
+function NLPModels.hess_structure!(
+    m::M,
+    rows::V,
+    cols::V
+    ) where {M <: SparseWrapperModel, V <: AbstractVector}
+
+    NLPModels.hess_structure!(m.inner, m.hrows, m.hcols)
+    copyto!(rows, m.hrows)
+    copyto!(cols, m.hcols)
+end
+function NLPModels.jac_coord!(
+    m::M,
+    x::V,
+    jac::V
+    ) where {M <: SparseWrapperModel, V <: AbstractVector}
+
+    copyto!(m.x, x)    
+    NLPModels.jac_coord!(m.inner, m.x, m.jac)
+    copyto!(jac, m.jac)
+    return
+end
+function NLPModels.hess_coord!(
+    m::M,
+    x::AbstractVector,
+    y::AbstractVector,
+    hess::AbstractVector;
+    obj_weight = one(eltype(x))
+    ) where {M <: SparseWrapperModel}
+
+    copyto!(m.x, x)
+    copyto!(m.y, y)
+    NLPModels.hess_coord!(m.inner, m.x, m.y, m.hess; obj_weight=obj_weight)
+    copyto!(hess, m.hess)
+    return
+end
+
+
+
+function MadNLP.jac_dense!(
+    m::Model,
+    x::V,
+    jac::M
+    ) where {Model <: DenseWrapperModel, V <: AbstractVector, M <: AbstractMatrix}
+
+    copyto!(m.x, x)    
+    MadNLP.jac_dense!(m.inner, m.x, m.jac)
+    copyto!(jac, m.jac)
+    return
+end
+function MadNLP.hess_dense!(
+    m::Model,
+    x::V,
+    y::V,
+    hess::M;
+    obj_weight = one(eltype(x))
+    ) where {Model <: DenseWrapperModel, V <: AbstractVector, M <: AbstractMatrix}
+
+    copyto!(m.x, x)
+    copyto!(m.y, y)
+    MadNLP.hess_dense!(m.inner, m.x, m.y, m.hess; obj_weight=obj_weight)
+    copyto!(hess, m.hess)
+    return
+end
+
diff --git a/src/IPM/IPM.jl b/src/IPM/IPM.jl
index b57ef013..65fa4e74 100644
--- a/src/IPM/IPM.jl
+++ b/src/IPM/IPM.jl
@@ -4,9 +4,22 @@
 abstract type AbstractMadNLPSolver{T} end
 
 include("restoration.jl")
+include("inertiacorrector.jl")
+
+mutable struct MadNLPSolver{
+    T,
+    VT <: AbstractVector{T},
+    VI <: AbstractVector{Int},
+    KKTSystem <: AbstractKKTSystem{T},
+    Model <: AbstractNLPModel{T,VT},
+    CB <: AbstractCallback{T},
+    Iterator <: AbstractIterator{T},
+    IC <: AbstractInertiaCorrector,
+    KKTVec <: AbstractKKTVector{T, VT}
+    } <: AbstractMadNLPSolver{T}
 
-mutable struct MadNLPSolver{T, KKTSystem <: AbstractKKTSystem{T}, Model <: AbstractNLPModel, LinSolver <: AbstractLinearSolver{T}, Iterator <: AbstractIterator{T}, KKTVec <: AbstractKKTVector{T, Vector{T}}} <: AbstractMadNLPSolver{T}
     nlp::Model
+    cb::CB
     kkt::KKTSystem
 
     opt::MadNLPOptions
@@ -18,58 +31,52 @@ mutable struct MadNLPSolver{T, KKTSystem <: AbstractKKTSystem{T}, Model <: Abstr
     nlb::Int
     nub::Int
 
-    x::PrimalVector{T, Vector{T}} # primal (after reformulation)
-    y::Vector{T} # dual
-    zl::PrimalVector{T, Vector{T}} # dual (after reformulation)
-    zu::PrimalVector{T, Vector{T}} # dual (after reformulation)
-    xl::PrimalVector{T, Vector{T}} # primal lower bound (after reformulation)
-    xu::PrimalVector{T, Vector{T}} # primal upper bound (after reformulation)
+    x::PrimalVector{T, VT, VI} # primal (after reformulation)
+    y::VT # dual
+    zl::PrimalVector{T, VT, VI} # dual (after reformulation)
+    zu::PrimalVector{T, VT, VI} # dual (after reformulation)
+    xl::PrimalVector{T, VT, VI} # primal lower bound (after reformulation)
+    xu::PrimalVector{T, VT, VI} # primal upper bound (after reformulation)
 
     obj_val::T
-    f::PrimalVector{T, Vector{T}}
-    c::Vector{T}
+    f::PrimalVector{T, VT, VI}
+    c::VT
 
-    jacl::Vector{T}
+    jacl::VT
 
-    d::UnreducedKKTVector{T, Vector{T}}
-    p::UnreducedKKTVector{T, Vector{T}}
+    d::KKTVec
+    p::KKTVec
 
     _w1::KKTVec
     _w2::KKTVec
-
     _w3::KKTVec
     _w4::KKTVec
 
-    x_trial::PrimalVector{T, Vector{T}}
-    c_trial::Vector{T}
+    x_trial::PrimalVector{T, VT, VI}
+    c_trial::VT
     obj_val_trial::T
 
-    c_slk::SubVector{T}
-    rhs::Vector{T}
-
-    ind_ineq::Vector{Int}
-    ind_fixed::Vector{Int}
-    ind_llb::Vector{Int}
-    ind_uub::Vector{Int}
-
-    x_lr::SubVector{T}
-    x_ur::SubVector{T}
-    xl_r::SubVector{T}
-    xu_r::SubVector{T}
-    zl_r::SubVector{T}
-    zu_r::SubVector{T}
+    c_slk::SubVector{T,VT,VI}
+    rhs::VT
+
+    ind_ineq::VI
+    ind_fixed::VI
+    ind_llb::VI
+    ind_uub::VI
+
+    x_lr::SubVector{T,VT,VI}
+    x_ur::SubVector{T,VT,VI}
+    xl_r::SubVector{T,VT,VI}
+    xu_r::SubVector{T,VT,VI}
+    zl_r::SubVector{T,VT,VI}
+    zu_r::SubVector{T,VT,VI}
+    dx_lr::SubVector{T,VT,VI}
+    dx_ur::SubVector{T,VT,VI}
+    x_trial_lr::SubVector{T,VT,VI}
+    x_trial_ur::SubVector{T,VT,VI}
 
-    dx_lr::SubVector{T}
-    dx_ur::SubVector{T}
-    x_trial_lr::SubVector{T}
-    x_trial_ur::SubVector{T}
-
-    linear_solver::LinSolver
     iterator::Iterator
 
-    obj_scale::Vector{T}
-    con_scale::Vector{T}
-    con_jac_scale::Vector{T}
     inf_pr::T
     inf_du::T
     inf_compl::T
@@ -84,108 +91,101 @@ mutable struct MadNLPSolver{T, KKTSystem <: AbstractKKTSystem{T}, Model <: Abstr
     ftype::String
 
     del_w::T
-    del_c::T
     del_w_last::T
+    del_c::T
 
     filter::Vector{Tuple{T,T}}
 
+    inertia_corrector::IC
     RR::Union{Nothing,RobustRestorer{T}}
     status::Status
     output::Dict
 end
 
-function MadNLPSolver(nlp::AbstractNLPModel{T}; kwargs...) where T
-    opt_ipm, opt_linear_solver, logger = load_options(; kwargs...)
-    @assert is_supported(opt_ipm.linear_solver, T)
-
-    VT = Vector{T}
-    # Determine Hessian approximation
-    QN = if opt_ipm.hessian_approximation == DENSE_BFGS
-        BFGS{T, VT}
-    elseif opt_ipm.hessian_approximation == DENSE_DAMPED_BFGS
-        DampedBFGS{T, VT}
-    elseif opt_ipm.hessian_approximation == SPARSE_COMPACT_LBFGS
-        CompactLBFGS{T, VT, Matrix{T}}
-    else
-        ExactHessian{T, VT}
-    end
-    # Determine KKT system
-    KKTSystem = if opt_ipm.kkt_system == SPARSE_KKT_SYSTEM
-        MT = (input_type(opt_ipm.linear_solver) == :csc) ? SparseMatrixCSC{T, Int32} : Matrix{T}
-        SparseKKTSystem{T, VT, MT, QN}
-    elseif opt_ipm.kkt_system == SPARSE_UNREDUCED_KKT_SYSTEM
-        MT = (input_type(opt_ipm.linear_solver) == :csc) ? SparseMatrixCSC{T, Int32} : Matrix{T}
-        SparseUnreducedKKTSystem{T, VT, MT, QN}
-    elseif opt_ipm.kkt_system == DENSE_KKT_SYSTEM
-        MT = Matrix{T}
-        DenseKKTSystem{T, VT, MT, QN}
-    elseif opt_ipm.kkt_system == DENSE_CONDENSED_KKT_SYSTEM
-        MT = Matrix{T}
-        DenseCondensedKKTSystem{T, VT, MT, QN}
-    end
-    return MadNLPSolver{T,KKTSystem}(nlp, opt_ipm, opt_linear_solver; logger=logger)
-end
+"""
+    MadNLPSolver(nlp::AbstractNLPModel{T, VT}; options...) where {T, VT}
 
-# Constructor for unregistered KKT systems
-function MadNLPSolver{T, KKTSystem}(nlp::AbstractNLPModel{T}; options...) where {T, KKTSystem}
-    opt_ipm, opt_linear_solver, logger = load_options(; options...)
-    @assert is_supported(opt_ipm.linear_solver, T)
-    return MadNLPSolver{T,KKTSystem}(nlp, opt_ipm, opt_linear_solver; logger=logger)
-end
+Instantiate a new `MadNLPSolver` associated to the nonlinear program
+`nlp::AbstractNLPModel`. The options are passed as optional arguments.
+
+The constructor allocates all the memory required in the interior-point
+algorithm, so the main algorithm remains allocation free.
+
+"""
+function MadNLPSolver(nlp::AbstractNLPModel{T,VT}; kwargs...) where {T, VT}
+
+    options = load_options(nlp; kwargs...)
+
+    ipm_opt = options.interior_point
+    logger = options.logger
+    @assert is_supported(ipm_opt.linear_solver, T)
 
-# Inner constructor
-function MadNLPSolver{T,KKTSystem}(
-    nlp::AbstractNLPModel,
-    opt::MadNLPOptions,
-    opt_linear_solver::AbstractOptions;
-    logger=MadNLPLogger(),
-) where {T, KKTSystem<:AbstractKKTSystem{T}}
     cnt = MadNLPCounters(start_time=time())
+    cb = create_callback(
+        ipm_opt.callback,
+        nlp;
+        fixed_variable_treatment=ipm_opt.fixed_variable_treatment,
+        equality_treatment=ipm_opt.equality_treatment,
+    )
 
     # generic options
-    opt.disable_garbage_collector &&
+    ipm_opt.disable_garbage_collector &&
         (GC.enable(false); @warn(logger,"Julia garbage collector is temporarily disabled"))
-    set_blas_num_threads(opt.blas_num_threads; permanent=true)
-
+    set_blas_num_threads(ipm_opt.blas_num_threads; permanent=true)
     @trace(logger,"Initializing variables.")
-    ind_cons = get_index_constraints(nlp; fixed_variable_treatment=opt.fixed_variable_treatment)
+
+    ind_cons = get_index_constraints(
+        get_lvar(nlp), get_uvar(nlp),
+        get_lcon(nlp), get_ucon(nlp);
+        fixed_variable_treatment=ipm_opt.fixed_variable_treatment,
+        equality_treatment=ipm_opt.equality_treatment
+    )
+
+    ind_lb = ind_cons.ind_lb
+    ind_ub = ind_cons.ind_ub
+
     ns = length(ind_cons.ind_ineq)
     nx = get_nvar(nlp)
     n = nx+ns
     m = get_ncon(nlp)
+    nlb = length(ind_lb)
+    nub = length(ind_ub)
+
+    @trace(logger,"Initializing KKT system.")
+    kkt = create_kkt_system(
+        ipm_opt.kkt_system,
+        cb,
+        ind_cons,
+        ipm_opt.linear_solver;
+        hessian_approximation=ipm_opt.hessian_approximation,
+        opt_linear_solver=options.linear_solver,
+    )
 
-    # Initialize KKT
-    kkt = KKTSystem(nlp, ind_cons)
-
-    # Primal variable
-    x = PrimalVector{T, Vector{T}}(nx, ns)
-    variable(x) .= get_x0(nlp)
-    # Bounds
-    xl = PrimalVector{T, Vector{T}}(nx, ns)
-    variable(xl) .= get_lvar(nlp)
-    slack(xl) .= view(get_lcon(nlp), ind_cons.ind_ineq)
-    xu = PrimalVector{T, Vector{T}}(nx, ns)
-    variable(xu) .= get_uvar(nlp)
-    slack(xu) .= view(get_ucon(nlp), ind_cons.ind_ineq)
-    zl = PrimalVector{T, Vector{T}}(nx, ns)
-    zu = PrimalVector{T, Vector{T}}(nx, ns)
-    # Gradient
-    f = PrimalVector{T, Vector{T}}(nx, ns)
-
-    y = copy(get_y0(nlp))
-    c = zeros(T, m)
-
-    n_jac = nnz_jacobian(kkt)
-
-    nlb = length(ind_cons.ind_lb)
-    nub = length(ind_cons.ind_ub)
-
-    x_trial = PrimalVector{T, Vector{T}}(nx, ns)
-    c_trial = Vector{T}(undef, m)
+    @trace(logger,"Initializing iterative solver.")
+    iterator = ipm_opt.iterator(kkt; cnt = cnt, logger = logger, opt = options.iterative_refinement)
+
+    x = PrimalVector(VT, nx, ns, ind_lb, ind_ub)
+    xl = PrimalVector(VT, nx, ns, ind_lb, ind_ub)
+    xu = PrimalVector(VT, nx, ns, ind_lb, ind_ub)
+    zl = PrimalVector(VT, nx, ns, ind_lb, ind_ub)
+    zu = PrimalVector(VT, nx, ns, ind_lb, ind_ub)
+    f = PrimalVector(VT, nx, ns, ind_lb, ind_ub)
+    x_trial = PrimalVector(VT, nx, ns, ind_lb, ind_ub)
+
+    d = UnreducedKKTVector(VT, n, m, nlb, nub, ind_lb, ind_ub)
+    p = UnreducedKKTVector(VT, n, m, nlb, nub, ind_lb, ind_ub)
+    _w1 = UnreducedKKTVector(VT, n, m, nlb, nub, ind_lb, ind_ub)
+    _w2 = UnreducedKKTVector(VT, n, m, nlb, nub, ind_lb, ind_ub)
+    _w3 = UnreducedKKTVector(VT, n, m, nlb, nub, ind_lb, ind_ub)
+    _w4 = UnreducedKKTVector(VT, n, m, nlb, nub, ind_lb, ind_ub)
+
+    jacl = VT(undef,n)
+    c_trial = VT(undef, m)
+    y = VT(undef, m)
+    c = VT(undef, m)
+    rhs = VT(undef, m)
 
     c_slk = view(c,ind_cons.ind_ineq)
-    rhs = (get_lcon(nlp).==get_ucon(nlp)).*get_lcon(nlp)
-
     x_lr = view(full(x), ind_cons.ind_lb)
     x_ur = view(full(x), ind_cons.ind_ub)
     xl_r = view(full(xl), ind_cons.ind_lb)
@@ -194,61 +194,44 @@ function MadNLPSolver{T,KKTSystem}(
     zu_r = view(full(zu), ind_cons.ind_ub)
     x_trial_lr = view(full(x_trial), ind_cons.ind_lb)
     x_trial_ur = view(full(x_trial), ind_cons.ind_ub)
-
-    if is_reduced(kkt)
-        _w1 = ReducedKKTVector{T,typeof(c)}(n, m)
-        _w2 = ReducedKKTVector{T,typeof(c)}(n, m)
-        _w3 = ReducedKKTVector{T,typeof(c)}(n, m)
-        _w4 = ReducedKKTVector{T,typeof(c)}(n, m)
-    else
-        _w1 = UnreducedKKTVector{T,typeof(c)}(n, m, nlb, nub)
-        _w2 = UnreducedKKTVector{T,typeof(c)}(n, m, nlb, nub)
-        _w3 = UnreducedKKTVector{T,typeof(c)}(n, m, nlb, nub)
-        _w4 = UnreducedKKTVector{T,typeof(c)}(n, m, nlb, nub)
-    end
-
-    jacl = zeros(T,n) # spblas may throw an error if not initialized to zero
-
-    d = UnreducedKKTVector{T,typeof(c)}(n, m, nlb, nub)
     dx_lr = view(d.xp, ind_cons.ind_lb) # TODO
     dx_ur = view(d.xp, ind_cons.ind_ub) # TODO
 
-    p = UnreducedKKTVector{T,typeof(c)}(n, m, nlb, nub)
-
-    obj_scale = T[1.0]
-    con_scale = ones(T,m)
-    con_jac_scale = ones(T,n_jac)
-
-    @trace(logger,"Initializing linear solver.")
-    cnt.linear_solver_time =
-        @elapsed linear_solver = opt.linear_solver(get_kkt(kkt) ; opt=opt_linear_solver, logger=logger)
-
-    n_kkt = size(kkt, 1)
-    buffer_vec = similar(full(d), n_kkt)
-    @trace(logger,"Initializing iterative solver.")
-    iterator = opt.iterator(linear_solver, kkt, buffer_vec)
-
-    @trace(logger,"Initializing fixed variable treatment scheme.")
-
-    if opt.inertia_correction_method == INERTIA_AUTO
-        opt.inertia_correction_method = is_inertia(linear_solver)::Bool ? INERTIA_BASED : INERTIA_FREE
+    inertia_correction_method = if ipm_opt.inertia_correction_method == InertiaAuto
+        is_inertia(kkt.linear_solver)::Bool ? InertiaBased : InertiaFree
+    else
+        ipm_opt.inertia_correction_method
     end
 
+    inertia_corrector = build_inertia_corrector(
+        inertia_correction_method,
+        VT,
+        n, m, nlb, nub, ind_lb, ind_ub
+    )
+
+    cnt.init_time = time() - cnt.start_time
 
-    return MadNLPSolver{T,KKTSystem,typeof(nlp),typeof(linear_solver),typeof(iterator),typeof(_w1)}(
-        nlp,kkt,opt,cnt,logger,
-        n,m,nlb,nub,x,y,zl,zu,xl,xu,0.,f,c,
+    return MadNLPSolver(
+        nlp, cb, kkt,
+        ipm_opt, cnt, options.logger,
+        n, m, nlb, nub,
+        x, y, zl, zu, xl, xu,
+        zero(T), f, c,
         jacl,
         d, p,
         _w1, _w2, _w3, _w4,
-        x_trial,c_trial,0.,c_slk,rhs,
-        ind_cons.ind_ineq,ind_cons.ind_fixed,ind_cons.ind_llb,ind_cons.ind_uub,
-        x_lr,x_ur,xl_r,xu_r,zl_r,zu_r,dx_lr,dx_ur,x_trial_lr,x_trial_ur,
-        linear_solver,iterator,
-        obj_scale,con_scale,con_jac_scale,
-        0.,0.,0.,0.,0.,0.,0.,0.,0.," ",0.,0.,0.,
-        Vector{T}[],nothing,INITIAL,Dict(),
+        x_trial, c_trial, zero(T), c_slk, rhs,
+        ind_cons.ind_ineq, ind_cons.ind_fixed, ind_cons.ind_llb, ind_cons.ind_uub,
+        x_lr, x_ur, xl_r, xu_r, zl_r, zu_r, dx_lr, dx_ur, x_trial_lr, x_trial_ur,
+        iterator,
+        zero(T), zero(T), zero(T), zero(T), zero(T), zero(T), zero(T), zero(T), zero(T),
+        " ",
+        zero(T), zero(T), zero(T),
+        Tuple{T, T}[],
+        inertia_corrector, nothing,
+        INITIAL, Dict(),
     )
+
 end
 
 include("utils.jl")
@@ -257,3 +240,4 @@ include("callbacks.jl")
 include("factorization.jl")
 include("solver.jl")
 
+
diff --git a/src/IPM/callbacks.jl b/src/IPM/callbacks.jl
index 49042105..b594b551 100644
--- a/src/IPM/callbacks.jl
+++ b/src/IPM/callbacks.jl
@@ -4,45 +4,46 @@ function eval_f_wrapper(solver::MadNLPSolver, x::PrimalVector{T}) where T
     @trace(solver.logger,"Evaluating objective.")
     cnt.eval_function_time += @elapsed begin
         sense = (get_minimize(nlp) ? one(T) : -one(T))
-        obj_val = sense * obj(nlp, variable(x))
+        obj_val = sense * _eval_f_wrapper(solver.cb, variable(x))
     end
     cnt.obj_cnt += 1
     if cnt.obj_cnt == 1 && !is_valid(obj_val)
         throw(InvalidNumberException(:obj))
     end
-    return obj_val * solver.obj_scale[]
+    return obj_val
 end
 
 function eval_grad_f_wrapper!(solver::MadNLPSolver, f::PrimalVector{T}, x::PrimalVector{T}) where T
     nlp = solver.nlp
     cnt = solver.cnt
     @trace(solver.logger,"Evaluating objective gradient.")
-    obj_scaling = solver.obj_scale[] * (get_minimize(nlp) ? one(T) : -one(T))
-    cnt.eval_function_time += @elapsed grad!(
-        nlp,
+    cnt.eval_function_time += @elapsed _eval_grad_f_wrapper!(
+        solver.cb,
         variable(x),
         variable(f),
     )
-    _scal!(obj_scaling, full(f))
+    if !get_minimize(nlp)
+        variable(f) .*= -one(T)
+    end
     cnt.obj_grad_cnt+=1
+
     if cnt.obj_grad_cnt == 1 && !is_valid(full(f))
         throw(InvalidNumberException(:grad))
     end
     return f
 end
 
-function eval_cons_wrapper!(solver::MadNLPSolver, c::Vector{T}, x::PrimalVector{T}) where T
+function eval_cons_wrapper!(solver::MadNLPSolver, c::AbstractVector{T}, x::PrimalVector{T}) where T
     nlp = solver.nlp
     cnt = solver.cnt
     @trace(solver.logger, "Evaluating constraints.")
-    cnt.eval_function_time += @elapsed cons!(
-        nlp,
+    cnt.eval_function_time += @elapsed _eval_cons_wrapper!(
+        solver.cb,
         variable(x),
         c,
     )
     view(c,solver.ind_ineq) .-= slack(x)
     c .-= solver.rhs
-    c .*= solver.con_scale
     cnt.con_cnt+=1
     if cnt.con_cnt == 1 && !is_valid(c)
         throw(InvalidNumberException(:cons))
@@ -56,11 +57,11 @@ function eval_jac_wrapper!(solver::MadNLPSolver, kkt::AbstractKKTSystem, x::Prim
     ns = length(solver.ind_ineq)
     @trace(solver.logger, "Evaluating constraint Jacobian.")
     jac = get_jacobian(kkt)
-    cnt.eval_function_time += @elapsed jac_coord!(
-        nlp,
+    cnt.eval_function_time += @elapsed _eval_jac_wrapper!(
+        solver.cb,
         variable(x),
         jac,
-    )
+        )
     compress_jacobian!(kkt)
     cnt.con_jac_cnt += 1
     if cnt.con_jac_cnt == 1 && !is_valid(jac)
@@ -70,18 +71,16 @@ function eval_jac_wrapper!(solver::MadNLPSolver, kkt::AbstractKKTSystem, x::Prim
     return jac
 end
 
-function eval_lag_hess_wrapper!(solver::MadNLPSolver, kkt::AbstractKKTSystem, x::PrimalVector{T},l::Vector{T};is_resto=false) where T
+function eval_lag_hess_wrapper!(solver::MadNLPSolver, kkt::AbstractKKTSystem, x::PrimalVector{T},l::AbstractVector{T};is_resto=false) where T
     nlp = solver.nlp
     cnt = solver.cnt
     @trace(solver.logger,"Evaluating Lagrangian Hessian.")
-    dual(solver._w1) .= l .* solver.con_scale
     hess = get_hessian(kkt)
-    scale = (get_minimize(nlp) ? one(T) : -one(T))
-    scale *= (is_resto ? zero(T) : solver.obj_scale[])
-    cnt.eval_function_time += @elapsed hess_coord!(
-        nlp,
+    scale = (get_minimize(nlp) ? one(T) : -one(T)) * (is_resto ? zero(T) : one(T))
+    cnt.eval_function_time += @elapsed _eval_lag_hess_wrapper!(
+        solver.cb,
         variable(x),
-        dual(solver._w1),
+        l,
         hess;
         obj_weight = scale,
     )
@@ -99,8 +98,8 @@ function eval_jac_wrapper!(solver::MadNLPSolver, kkt::AbstractDenseKKTSystem, x:
     ns = length(solver.ind_ineq)
     @trace(solver.logger, "Evaluating constraint Jacobian.")
     jac = get_jacobian(kkt)
-    cnt.eval_function_time += @elapsed jac_dense!(
-        nlp,
+    cnt.eval_function_time += @elapsed _eval_jac_wrapper!(
+        solver.cb,
         variable(x),
         jac,
     )
@@ -117,19 +116,18 @@ function eval_lag_hess_wrapper!(
     solver::MadNLPSolver,
     kkt::AbstractDenseKKTSystem{T, VT, MT, QN},
     x::PrimalVector{T},
-    l::Vector{T};
+    l::AbstractVector{T};
     is_resto=false,
 ) where {T, VT, MT, QN<:ExactHessian}
     nlp = solver.nlp
     cnt = solver.cnt
     @trace(solver.logger,"Evaluating Lagrangian Hessian.")
-    dual(solver._w1) .= l .* solver.con_scale
     hess = get_hessian(kkt)
-    scale = is_resto ? zero(T) : get_minimize(nlp) ? solver.obj_scale[] : -solver.obj_scale[]
-    cnt.eval_function_time += @elapsed hess_dense!(
-        nlp,
+    scale = is_resto ? zero(T) : get_minimize(nlp) ? one(T) : -one(T)
+    cnt.eval_function_time += @elapsed _eval_lag_hess_wrapper!(
+        solver.cb,
         variable(x),
-        dual(solver._w1),
+        l,
         hess;
         obj_weight = scale,
     )
@@ -145,10 +143,10 @@ function eval_lag_hess_wrapper!(
     solver::MadNLPSolver,
     kkt::AbstractKKTSystem{T, VT, MT, QN},
     x::PrimalVector{T},
-    l::Vector{T};
+    l::AbstractVector{T};
     is_resto=false,
 ) where {T, VT, MT<:AbstractMatrix{T}, QN<:AbstractQuasiNewton{T, VT}}
-    nlp = solver.nlp
+    cb = solver.cb
     cnt = solver.cnt
     @trace(solver.logger, "Update BFGS matrices.")
 
@@ -162,21 +160,22 @@ function eval_lag_hess_wrapper!(
         # Build sk = x+ - x
         copyto!(sk, 1, variable(solver.x), 1, n)   # sₖ = x₊
         axpy!(-one(T), qn.last_x, sk)              # sₖ = x₊ - x
-
         # Build yk = ∇L+ - ∇L
         copyto!(yk, 1, variable(solver.f), 1, n)   # yₖ = ∇f₊
         axpy!(-one(T), qn.last_g, yk)              # yₖ = ∇f₊ - ∇f
         if m > 0
             jtprod!(solver.jacl, kkt, l)
-            axpy!(n, one(T), solver.jacl, 1, yk, 1)  # yₖ += J₊ᵀ l₊
-            NLPModels.jtprod!(nlp, qn.last_x, l, qn.last_jv)
-            axpy!(-one(T), qn.last_jv, yk)           # yₖ += J₊ᵀ l₊ - Jᵀ l₊
-        end
-
-        if cnt.obj_grad_cnt == 2
-            init!(qn, Bk, sk, yk)
+            yk .+= @view(solver.jacl[1:n])         # yₖ += J₊ᵀ l₊
+            _eval_jtprod_wrapper!(cb, qn.last_x, l, qn.last_jv)
+            axpy!(-one(T), qn.last_jv, yk)         # yₖ += J₊ᵀ l₊ - Jᵀ l₊
         end
+        # Update quasi-Newton approximation.
         update!(qn, Bk, sk, yk)
+    else
+        # Init quasi-Newton approximation
+        g0 = variable(solver.f)
+        f0 = solver.obj_val
+        init!(qn, Bk, g0, f0)
     end
 
     # Backup data for next step
diff --git a/src/IPM/factorization.jl b/src/IPM/factorization.jl
index 9681459a..e5c306d7 100644
--- a/src/IPM/factorization.jl
+++ b/src/IPM/factorization.jl
@@ -1,125 +1,64 @@
+function solve_refine_wrapper!(d, solver, p, w)
+    result = false
 
-function factorize_wrapper!(solver::MadNLPSolver)
-    @trace(solver.logger,"Factorization started.")
-    build_kkt!(solver.kkt)
-    solver.cnt.linear_solver_time += @elapsed factorize!(solver.linear_solver)
-end
-
-function solve_refine_wrapper!(
-    solver::MadNLPSolver,
-    x::AbstractKKTVector,
-    b::AbstractKKTVector,
-)
-    cnt = solver.cnt
-    @trace(solver.logger,"Iterative solution started.")
-    fixed_variable_treatment_vec!(full(b), solver.ind_fixed)
-
-    cnt.linear_solver_time += @elapsed begin
-        result = solve_refine!(x, solver.iterator, b)
-    end
-
-    if result == :Solved
-        solve_status =  true
-    else
-        if improve!(solver.linear_solver)
-            cnt.linear_solver_time += @elapsed begin
-                factorize!(solver.linear_solver)
-                ret = solve_refine!(x, solver.iterator, b)
-                solve_status = (ret == :Solved)
-            end
+    solver.cnt.linear_solver_time += @elapsed begin
+        if solve_refine!(d, solver.iterator, p, w)
+            result = true
         else
-            solve_status = false
+            if improve!(solver.kkt.linear_solver)
+                if solve_refine!(d, solver.iterator, p, w)
+                    result = true
+                end
+            end
         end
     end
-    fixed_variable_treatment_vec!(full(x), solver.ind_fixed)
-    return solve_status
-end
-
-function solve_refine_wrapper!(
-    solver::MadNLPSolver{T,<:DenseCondensedKKTSystem},
-    x::AbstractKKTVector,
-    b::AbstractKKTVector,
-) where T
-    cnt = solver.cnt
-    @trace(solver.logger,"Iterative solution started.")
-    fixed_variable_treatment_vec!(full(b), solver.ind_fixed)
-
-    kkt = solver.kkt
 
-    n = num_variables(kkt)
-    n_eq, ns = kkt.n_eq, kkt.n_ineq
-    n_condensed = n + n_eq
-
-    # load buffers
-    b_c = view(full(solver._w1), 1:n_condensed)
-    x_c = view(full(solver._w2), 1:n_condensed)
-    jv_x = view(full(solver._w3), 1:ns) # for jprod
-    jv_t = primal(solver._w4)             # for jtprod
-    v_c = dual(solver._w4)
-
-    Σs = get_slack_regularization(kkt)
-    α = get_scaling_inequalities(kkt)
+    return result
+end
 
-    # Decompose right hand side
-    bx = view(full(b), 1:n)
-    bs = view(full(b), n+1:n+ns)
-    by = view(full(b), kkt.ind_eq_shifted)
-    bz = view(full(b), kkt.ind_ineq_shifted)
+function factorize_wrapper!(solver::MadNLPSolver)
+    @trace(solver.logger,"Factorization started.")
+    build_kkt!(solver.kkt)
+    solver.cnt.linear_solver_time += @elapsed factorize!(solver.kkt.linear_solver)
+end
 
-    # Decompose results
-    xx = view(full(x), 1:n)
-    xs = view(full(x), n+1:n+ns)
-    xy = view(full(x), kkt.ind_eq_shifted)
-    xz = view(full(x), kkt.ind_ineq_shifted)
-
-    fill!(v_c, zero(T))
-    v_c[kkt.ind_ineq] .= (Σs .* bz .+ α .* bs) ./ α.^2
-    jtprod!(jv_t, kkt, v_c)
-    # init right-hand-side
-    b_c[1:n] .= bx .+ jv_t[1:n]
-    b_c[1+n:n+n_eq] .= by
-
-    cnt.linear_solver_time += @elapsed (result = solve_refine!(x_c, solver.iterator, b_c))
-    solve_status = (result == :Solved)
-
-    # Expand solution
-    xx .= x_c[1:n]
-    xy .= x_c[1+n:end]
-    jprod_ineq!(jv_x, kkt, xx)
-    xz .= sqrt.(Σs) ./ α .* jv_x .- Σs .* bz ./ α.^2 .- bs ./ α
-    xs .= (bs .+ α .* xz) ./ Σs
-
-    fixed_variable_treatment_vec!(full(x), solver.ind_fixed)
-    return solve_status
+function solve!(kkt::SparseUnreducedKKTSystem, w::AbstractKKTVector)
+    wzl = dual_lb(w)
+    wzu = dual_ub(w)
+    f(x,y) = iszero(y) ? x : x/y
+    wzl .= f.(wzl, kkt.l_lower_aug)
+    wzu .= f.(wzu, kkt.u_lower_aug)
+    solve!(kkt.linear_solver, full(w))
+    wzl .*= .-kkt.l_lower_aug
+    wzu .*= kkt.u_lower_aug
+    return w
 end
 
-# Set V1 = [U₁   U₂]   ,   V2 = [-U₁   U₂]
-function _init_lbfgs_factors!(V1, V2, U, n, p)
-    @inbounds for i in 1:n, j in 1:p
-        V1[i, j] = U[i, j]
-        V2[i, j] = -U[i, j]
-        V1[i, j+p] = U[i, j+p]
-        V2[i, j+p] = U[i, j+p]
-    end
+function solve!(kkt::AbstractReducedKKTSystem, w::AbstractKKTVector)
+    reduce_rhs!(w.xp_lr, dual_lb(w), kkt.l_diag, w.xp_ur, dual_ub(w), kkt.u_diag)
+    solve!(kkt.linear_solver, primal_dual(w))
+    finish_aug_solve!(kkt, w)
+    return w
 end
 
-function solve_refine_wrapper!(
-    solver::MadNLPSolver{T, <:SparseKKTSystem{T, VT, MT, QN}},
-    x::AbstractKKTVector,
-    b::AbstractKKTVector,
-) where {T, VT, MT, QN<:CompactLBFGS{T, Vector{T}, Matrix{T}}}
-    cnt = solver.cnt
-    kkt = solver.kkt
+function solve!(
+    kkt::SparseKKTSystem{T, VT, MT, QN},
+    w::AbstractKKTVector
+    ) where {T, VT, MT, QN<:CompactLBFGS}
+
     qn = kkt.quasi_newton
     n, p = size(qn)
     # Load buffers
     xr = qn._w2
-    Tk = qn.Tk  ; fill!(Tk, zero(T))
-    x_ = primal_dual(x)
-    b_ = primal_dual(b)
-    nn = length(x_)
+    Tk = qn.Tk
+    w_ = primal_dual(w)
+    nn = length(w_)
+
+    fill!(Tk, zero(T))
+    reduce_rhs!(w.xp_lr, dual_lb(w), kkt.l_diag, w.xp_ur, dual_ub(w), kkt.u_diag)
+
     # Resize arrays with correct dimension
-    if size(qn.V1, 2) < 2*p
+    if size(qn.V1) != (nn, 2*p)
         qn.V1 = zeros(nn, 2*p)
         qn.V2 = zeros(nn, 2*p)
     else
@@ -127,35 +66,208 @@ function solve_refine_wrapper!(
         fill!(qn.V2, zero(T))
     end
 
-    fixed_variable_treatment_vec!(full(b), solver.ind_fixed)
-
     # Solve LBFGS system with Sherman-Morrison-Woodbury formula
     # (C + U Vᵀ)⁻¹ = C⁻¹ - C⁻¹ U (I + Vᵀ C⁻¹ U) Vᵀ C⁻¹
 
     # Solve linear system without low-rank part
-    cnt.linear_solver_time += @elapsed begin
-        result = solve_refine!(x, solver.iterator, b)
-    end
+    solve!(kkt.linear_solver, w_)
 
     # Add low-rank correction
     if p > 0
         _init_lbfgs_factors!(qn.V1, qn.V2, qn.U, n, p)
 
-        cnt.linear_solver_time += @elapsed begin
-            multi_solve!(solver.linear_solver, qn.V2)  # V2 = C⁻¹ U
-        end
+        multi_solve!(kkt.linear_solver, qn.V2)      # V2 = C⁻¹ U
 
         Tk[diagind(Tk)] .= one(T)                   # Tₖ = I
         mul!(Tk, qn.V1', qn.V2, one(T), one(T))     # Tₖ = (I + Vᵀ C⁻¹ U)
         J1 = qr(Tk)                                 # Tₖ⁻¹
 
-        mul!(xr, qn.V1', x_)                        # xᵣ = Vᵀ C⁻¹ b
+        mul!(xr, qn.V1', w_)                        # xᵣ = Vᵀ C⁻¹ b
         ldiv!(J1, xr)                               # xᵣ = (I + Vᵀ C⁻¹ U)⁻¹ Vᵀ C⁻¹ b
-        mul!(x_, qn.V2, xr, -one(T), one(T))        # x = x - C⁻¹ U xᵣ
+        mul!(w_, qn.V2, xr, -one(T), one(T))        # x = x - C⁻¹ U xᵣ
+    end
+
+    finish_aug_solve!(kkt, w)
+    return w
+end
+
+
+function solve!(kkt::SparseCondensedKKTSystem{T}, w::AbstractKKTVector)  where T
+
+    (n,m) = size(kkt.jt_csc)
+
+    # Decompose buffers
+    wx = _madnlp_unsafe_wrap(full(w), n)
+    ws = view(full(w), n+1:n+m)
+    wz = view(full(w), n+m+1:n+2*m)
+    Σs = view(kkt.pr_diag, n+1:n+m)
+
+    reduce_rhs!(w.xp_lr, dual_lb(w), kkt.l_diag, w.xp_ur, dual_ub(w), kkt.u_diag)
+
+    kkt.buffer .= kkt.diag_buffer .* (wz .+ ws ./ Σs)
+
+    mul!(wx, kkt.jt_csc, kkt.buffer, one(T), one(T))
+    solve!(kkt.linear_solver, wx)
+
+    mul!(kkt.buffer2, kkt.jt_csc', wx) # TODO: investigate why directly using wz here is causing an error
+
+    wz .= .- kkt.buffer .+ kkt.diag_buffer .* kkt.buffer2
+    ws .= (ws .+ wz) ./ Σs
+
+    finish_aug_solve!(kkt, w)
+    return w
+end
+
+function solve!(
+    kkt::DenseCondensedKKTSystem,
+    w::AbstractKKTVector{T},
+    ) where T
+
+    n = num_variables(kkt)
+    n_eq, ns = kkt.n_eq, kkt.n_ineq
+    n_condensed = n + n_eq
+
+    # Decompose rhs
+    wx = view(full(w), 1:n)
+    ws = view(full(w), n+1:n+ns)
+    wy = view(full(w), kkt.ind_eq_shifted)
+    wz = view(full(w), kkt.ind_ineq_shifted)
+
+    x = kkt.pd_buffer
+    xx = view(x, 1:n)
+    xy = view(x, n+1:n+n_eq)
+
+    Σs = get_slack_regularization(kkt)
+
+    reduce_rhs!(w.xp_lr, dual_lb(w), kkt.l_diag, w.xp_ur, dual_ub(w), kkt.u_diag)
+
+    fill!(kkt.buffer, zero(T))
+    kkt.buffer[kkt.ind_ineq] .= kkt.diag_buffer .* (wz .+ ws ./ Σs)
+    mul!(xx, kkt.jac', kkt.buffer)
+    xx .+= wx
+    xy .= wy
+    solve!(kkt.linear_solver, x)
+
+    wx .= xx
+    mul!(dual(w), kkt.jac, wx)
+    wy .= xy
+    wz .*= kkt.diag_buffer
+    dual(w) .-= kkt.buffer
+    ws .= (ws .+ wz) ./ Σs
+
+    finish_aug_solve!(kkt, w)
+    return w
+end
+
+function mul!(w::AbstractKKTVector{T}, kkt::Union{SparseKKTSystem{T,VT,MT,QN},SparseUnreducedKKTSystem{T,VT,MT,QN}}, x::AbstractKKTVector, alpha = one(T), beta = zero(T)) where {T, VT, MT, QN<:ExactHessian}
+    mul!(primal(w), Symmetric(kkt.hess_com, :L), primal(x), alpha, beta)
+    mul!(primal(w), kkt.jac_com', dual(x), alpha, one(T))
+    mul!(dual(w), kkt.jac_com,  primal(x), alpha, beta)
+    _kktmul!(w,x,kkt.reg,kkt.du_diag,kkt.l_lower,kkt.u_lower,kkt.l_diag,kkt.u_diag, alpha, beta)
+    return w
+end
+
+function mul!(w::AbstractKKTVector{T}, kkt::Union{SparseKKTSystem{T,VT,MT,QN},SparseUnreducedKKTSystem{T,VT,MT,QN}}, x::AbstractKKTVector, alpha = one(T), beta = zero(T)) where {T, VT, MT, QN<:CompactLBFGS}
+    qn = kkt.quasi_newton
+    n, p = size(qn)
+    nn = length(primal_dual(w))
+    # Load buffers (size: 2p)
+    vx = qn._w2
+    # Reset V1 and V2
+    fill!(qn.V1, zero(T))
+    fill!(qn.V2, zero(T))
+    _init_lbfgs_factors!(qn.V1, qn.V2, qn.U, n, p)
+    # Upper-left block is C = ξ I + U Vᵀ
+    mul!(primal(w), Symmetric(kkt.hess_com, :L), primal(x), alpha, beta)
+    mul!(primal(w), kkt.jac_com', dual(x), alpha, one(T))
+    mul!(dual(w), kkt.jac_com,  primal(x), alpha, beta)
+    # Add (U Vᵀ) x contribution
+    mul!(vx, qn.V2', primal_dual(x))
+    mul!(primal_dual(w), qn.V1, vx, alpha, one(T))
+
+    _kktmul!(w,x,kkt.reg,kkt.du_diag,kkt.l_lower,kkt.u_lower,kkt.l_diag,kkt.u_diag, alpha, beta)
+end
+
+function mul!(w::AbstractKKTVector{T}, kkt::SparseCondensedKKTSystem, x::AbstractKKTVector, alpha, beta) where T
+    n = size(kkt.hess_com, 1)
+    m = size(kkt.jt_csc, 2)
+
+    # Decompose results
+    xx = view(full(x), 1:n)
+    xs = view(full(x), n+1:n+m)
+    xz = view(full(x), n+m+1:n+2*m)
+
+    # Decompose buffers
+    wx = _madnlp_unsafe_wrap(full(w), n)
+    ws = view(full(w), n+1:n+m)
+    wz = view(full(w), n+m+1:n+2*m)
+
+    mul!(wx, Symmetric(kkt.hess_com, :L), xx, alpha, beta) # TODO: make this symmetric
+
+    mul!(wx, kkt.jt_csc,  xz, alpha, one(T))
+    mul!(wz, kkt.jt_csc', xx, alpha, beta)
+    axpy!(-alpha, xs, wz)
+    ws .= beta.*ws .- alpha.* xz
+
+    _kktmul!(w,x,kkt.reg,kkt.du_diag,kkt.l_lower,kkt.u_lower,kkt.l_diag,kkt.u_diag, alpha, beta)
+    return w
+end
+
+function mul!(w::AbstractKKTVector{T}, kkt::AbstractDenseKKTSystem, x::AbstractKKTVector, alpha = one(T), beta = zero(T)) where T
+    (m, n) = size(kkt.jac)
+    wx = @view(primal(w)[1:n])
+    ws = @view(primal(w)[n+1:end])
+    wy = dual(w)
+    wz = @view(dual(w)[kkt.ind_ineq])
+
+    xx = @view(primal(x)[1:n])
+    xs = @view(primal(x)[n+1:end])
+    xy = dual(x)
+    xz = @view(dual(x)[kkt.ind_ineq])
+
+    symul!(wx, kkt.hess, xx, alpha, beta)
+    if m > 0  # otherwise, CUDA causes an error
+        mul!(wx, kkt.jac', dual(x), alpha, one(T))
+        mul!(wy, kkt.jac,  xx, alpha, beta)
     end
+    ws .= beta.*ws .- alpha.* xz
+    wz .-= alpha.* xs
+    _kktmul!(w,x,kkt.reg,kkt.du_diag,kkt.l_lower,kkt.u_lower,kkt.l_diag,kkt.u_diag, alpha, beta)
+    return w
+end
+
+function mul_hess_blk!(wx, kkt::Union{DenseKKTSystem,DenseCondensedKKTSystem}, t)
+    n = size(kkt.hess, 1)
+    mul!(@view(wx[1:n]), Symmetric(kkt.hess, :L), @view(t[1:n]))
+    fill!(@view(wx[n+1:end]), 0)
+    wx .+= t .* kkt.pr_diag
+end
 
-    fixed_variable_treatment_vec!(full(x), solver.ind_fixed)
-    solve_status = (result == :Solved)
-    return solve_status
+function mul_hess_blk!(wx, kkt::Union{SparseKKTSystem,SparseCondensedKKTSystem}, t)
+    n = size(kkt.hess_com, 1)
+    mul!(@view(wx[1:n]), Symmetric(kkt.hess_com, :L), @view(t[1:n]))
+    fill!(@view(wx[n+1:end]), 0)
+    wx .+= t .* kkt.pr_diag
+end
+function mul_hess_blk!(wx, kkt::SparseUnreducedKKTSystem, t)
+    ind_lb = kkt.ind_lb
+    ind_ub = kkt.ind_ub
+
+    n = size(kkt.hess_com, 1)
+    mul!(@view(wx[1:n]), Symmetric(kkt.hess_com, :L), @view(t[1:n]))
+    fill!(@view(wx[n+1:end]), 0)
+    wx .+= t .* kkt.pr_diag
+    wx[ind_lb] .-= @view(t[ind_lb]) .* (kkt.l_lower ./ kkt.l_diag)
+    wx[ind_ub] .-= @view(t[ind_ub]) .* (kkt.u_lower ./ kkt.u_diag)
+end
+
+# Set V1 = [U₁   U₂]   ,   V2 = [-U₁   U₂]
+function _init_lbfgs_factors!(V1, V2, U, n, p)
+    @inbounds for i in 1:n, j in 1:p
+        V1[i, j] = U[i, j]
+        V2[i, j] = -U[i, j]
+        V1[i, j+p] = U[i, j+p]
+        V2[i, j+p] = U[i, j+p]
+    end
 end
 
diff --git a/src/IPM/inertiacorrector.jl b/src/IPM/inertiacorrector.jl
new file mode 100644
index 00000000..c01077bc
--- /dev/null
+++ b/src/IPM/inertiacorrector.jl
@@ -0,0 +1,33 @@
+abstract type AbstractInertiaCorrector end
+struct InertiaAuto <: AbstractInertiaCorrector end
+struct InertiaBased <: AbstractInertiaCorrector end
+struct InertiaIgnore <: AbstractInertiaCorrector end
+struct InertiaFree{
+    T,
+    VT <: AbstractVector{T},
+    KKTVec <: AbstractKKTVector{T, VT}
+} <: AbstractInertiaCorrector 
+    p0::KKTVec
+    d0::KKTVec
+    t::VT
+    wx::VT
+    g::VT
+end
+
+function build_inertia_corrector(::Type{InertiaBased}, ::Type{VT}, n, m, nlb, nub, ind_lb, ind_ub) where VT
+    return InertiaBased()
+end
+function build_inertia_corrector(::Type{InertiaIgnore}, ::Type{VT}, n, m, nlb, nub, ind_lb, ind_ub) where VT
+    return InertiaIgnore()
+end
+function build_inertia_corrector(::Type{InertiaFree}, ::Type{VT}, n, m, nlb, nub, ind_lb, ind_ub) where VT
+    p0 = UnreducedKKTVector(VT, n, m, nlb, nub, ind_lb, ind_ub)
+    d0 = UnreducedKKTVector(VT, n, m, nlb, nub, ind_lb, ind_ub)
+    t = VT(undef, n)
+    wx= VT(undef, n)
+    g = VT(undef, n)
+    
+    return InertiaFree(
+        p0, d0, t, wx, g
+    )
+end
diff --git a/src/IPM/kernels.jl b/src/IPM/kernels.jl
index 5c3a77b0..a89c82b2 100644
--- a/src/IPM/kernels.jl
+++ b/src/IPM/kernels.jl
@@ -1,30 +1,35 @@
 
 # KKT system updates -------------------------------------------------------
 # Set diagonal
-function set_aug_diagonal!(kkt::AbstractKKTSystem, solver::MadNLPSolver{T}) where T
+function set_aug_diagonal!(kkt::AbstractKKTSystem{T}, solver::MadNLPSolver{T}) where T
     x = full(solver.x)
     xl = full(solver.xl)
     xu = full(solver.xu)
     zl = full(solver.zl)
     zu = full(solver.zu)
-    @inbounds @simd for i in eachindex(kkt.pr_diag)
-        kkt.pr_diag[i] = zl[i] /(x[i] - xl[i])
-        kkt.pr_diag[i] += zu[i] /(xu[i] - x[i])
-    end
+
+    fill!(kkt.reg, zero(T))
     fill!(kkt.du_diag, zero(T))
+    kkt.l_diag .= solver.xl_r .- solver.x_lr
+    kkt.u_diag .= solver.x_ur .- solver.xu_r
+    copyto!(kkt.l_lower, solver.zl_r)
+    copyto!(kkt.u_lower, solver.zu_r)
+
+    _set_aug_diagonal!(kkt)
     return
 end
-function set_aug_diagonal!(kkt::SparseUnreducedKKTSystem, solver::MadNLPSolver{T}) where T
-    fill!(kkt.pr_diag, zero(T))
-    fill!(kkt.du_diag, zero(T))
-    @inbounds @simd for i in eachindex(kkt.l_lower)
-        kkt.l_lower[i] = -sqrt(solver.zl_r[i])
-        kkt.l_diag[i]  = solver.xl_r[i] - solver.x_lr[i]
-    end
-    @inbounds @simd for i in eachindex(kkt.u_lower)
-        kkt.u_lower[i] = -sqrt(solver.zu_r[i])
-        kkt.u_diag[i] = solver.x_ur[i] - solver.xu_r[i]
-    end
+
+function _set_aug_diagonal!(kkt::AbstractKKTSystem)
+    copyto!(kkt.pr_diag, kkt.reg)
+    kkt.pr_diag[kkt.ind_lb] .-= kkt.l_lower ./ kkt.l_diag
+    kkt.pr_diag[kkt.ind_ub] .-= kkt.u_lower ./ kkt.u_diag
+    return
+end
+
+function _set_aug_diagonal!(kkt::AbstractUnreducedKKTSystem)
+    copyto!(kkt.pr_diag, kkt.reg)
+    kkt.l_lower_aug .= sqrt.(kkt.l_lower)
+    kkt.u_lower_aug .= sqrt.(kkt.u_lower)
     return
 end
 
@@ -35,88 +40,41 @@ function set_aug_RR!(kkt::AbstractKKTSystem, solver::MadNLPSolver, RR::RobustRes
     xu = full(solver.xu)
     zl = full(solver.zl)
     zu = full(solver.zu)
-    @inbounds @simd for i in eachindex(kkt.pr_diag)
-        kkt.pr_diag[i]  = zl[i] / (x[i] - xl[i])
-        kkt.pr_diag[i] += zu[i] / (xu[i] - x[i]) + RR.zeta * RR.D_R[i]^2
-    end
-    @inbounds @simd for i in eachindex(kkt.du_diag)
-        kkt.du_diag[i] = -RR.pp[i] /RR.zp[i] - RR.nn[i] /RR.zn[i]
-    end
-    return
-end
-function set_aug_RR!(kkt::SparseUnreducedKKTSystem, solver::MadNLPSolver, RR::RobustRestorer)
-    @inbounds @simd for i in eachindex(kkt.pr_diag)
-        kkt.pr_diag[i] = RR.zeta * RR.D_R[i]^2
-    end
-    @inbounds @simd for i in eachindex(kkt.du_diag)
-        kkt.du_diag[i] = -RR.pp[i] / RR.zp[i] - RR.nn[i] / RR.zn[i]
-    end
-    @inbounds @simd for i in eachindex(kkt.l_lower)
-        kkt.l_lower[i] = -sqrt(solver.zl_r[i])
-        kkt.l_diag[i]  = solver.xl_r[i] - solver.x_lr[i]
-    end
-    @inbounds @simd for i in eachindex(kkt.u_lower)
-        kkt.u_lower[i] = -sqrt(solver.zu_r[i])
-        kkt.u_diag[i]  = solver.x_ur[i] - solver.xu_r[i]
-    end
+    kkt.reg .= RR.zeta .* RR.D_R .^ 2
+    kkt.du_diag .= .- RR.pp ./ RR.zp .- RR.nn ./ RR.zn
+    copyto!(kkt.l_lower, solver.zl_r)
+    copyto!(kkt.u_lower, solver.zu_r)
+    kkt.l_diag .= solver.xl_r .- solver.x_lr
+    kkt.u_diag .= solver.x_ur .- solver.xu_r
+
+    _set_aug_diagonal!(kkt)
+
     return
 end
+
 function set_f_RR!(solver::MadNLPSolver, RR::RobustRestorer)
     x = full(solver.x)
-    @inbounds @simd for i in eachindex(RR.f_R)
-        RR.f_R[i] = RR.zeta * RR.D_R[i]^2 *(x[i]-RR.x_ref[i])
-    end
+    RR.f_R .= RR.zeta .* RR.D_R .^ 2 .* (x .- RR.x_ref)
+    return
 end
 
-
 # Set RHS
-function set_aug_rhs!(solver::MadNLPSolver, kkt::AbstractKKTSystem, c)
+function set_aug_rhs!(solver::MadNLPSolver, kkt::AbstractKKTSystem, c::AbstractVector)
     px = primal(solver.p)
     x = primal(solver.x)
     f = primal(solver.f)
     xl = primal(solver.xl)
     xu = primal(solver.xu)
-    @inbounds @simd for i in eachindex(px)
-        px[i] = -f[i] + solver.mu / (x[i] - xl[i]) - solver.mu / (xu[i] - x[i]) - solver.jacl[i]
-    end
-    py = dual(solver.p)
-    @inbounds @simd for i in eachindex(py)
-        py[i] = -c[i]
-    end
-    return
-end
-function set_aug_rhs!(solver::MadNLPSolver, kkt::SparseUnreducedKKTSystem, c)
-    f = primal(solver.f)
-    zl = primal(solver.zl)
-    zu = primal(solver.zu)
-    px = primal(solver.p)
-    @inbounds @simd for i in eachindex(px)
-        px[i] = -f[i] + zl[i] - zu[i] - solver.jacl[i]
-    end
+    zl = full(solver.zl)
+    zu = full(solver.zu)
     py = dual(solver.p)
-    @inbounds @simd for i in eachindex(py)
-        py[i] = -c[i]
-    end
     pzl = dual_lb(solver.p)
-    @inbounds @simd for i in eachindex(pzl)
-        pzl[i] = (solver.xl_r[i] - solver.x_lr[i]) * kkt.l_lower[i] + solver.mu / kkt.l_lower[i]
-    end
     pzu = dual_ub(solver.p)
-    @inbounds @simd for i in eachindex(pzu)
-        pzu[i] = (solver.xu_r[i] -solver.x_ur[i]) * kkt.u_lower[i] - solver.mu / kkt.u_lower[i]
-    end
-# >>>>>>> origin/master
-    return
-end
 
-function set_aug_rhs_ifr!(solver::MadNLPSolver{T}, kkt::SparseUnreducedKKTSystem,c) where T
-    fill!(primal(solver._w1), zero(T))
-    fill!(dual_lb(solver._w1), zero(T))
-    fill!(dual_ub(solver._w1), zero(T))
-    wy = dual(solver._w1)
-    @inbounds @simd for i in eachindex(wy)
-        wy[i] = -c[i]
-    end
+    px .= .-f .+ zl .- zu .- solver.jacl
+    py .= .-c
+    pzl .= (solver.xl_r .- solver.x_lr) .* solver.zl_r .+ solver.mu
+    pzu .= (solver.xu_r .- solver.x_ur) .* solver.zu_r .- solver.mu
     return
 end
 
@@ -127,70 +85,84 @@ function set_aug_rhs_RR!(
     x = full(solver.x)
     xl = full(solver.xl)
     xu = full(solver.xu)
+    zl = full(solver.zl)
+    zu = full(solver.zu)
 
     px = primal(solver.p)
-    @inbounds @simd for i in eachindex(px)
-        px[i] = -RR.f_R[i] -solver.jacl[i] + RR.mu_R / (x[i] - xl[i]) - RR.mu_R / (xu[i] - x[i])
-    end
     py = dual(solver.p)
-    @inbounds @simd for i in eachindex(py)
-        py[i] = -solver.c[i] + RR.pp[i] - RR.nn[i] + (RR.mu_R-(rho-solver.y[i])*RR.pp[i])/RR.zp[i]-(RR.mu_R-(rho+solver.y[i])*RR.nn[i]) / RR.zn[i]
-    end
+    pzl = dual_lb(solver.p)
+    pzu = dual_ub(solver.p)
+
+    mu = RR.mu_R
+
+    px .= .- RR.f_R .+ zl .- zu .- solver.jacl
+    py .= .- solver.c .+ RR.pp .- RR.nn .+
+        (mu .- (rho .- solver.y) .* RR.pp) ./ RR.zp .-
+        (mu .- (rho .+ solver.y) .* RR.nn) ./ RR.zn
+
+    pzl .= (solver.xl_r .- solver.x_lr) .* solver.zl_r .+ mu
+    pzu .= (solver.xu_r .- solver.x_ur) .* solver.zu_r .- mu
+
     return
 end
 
-# Finish
-function finish_aug_solve!(solver::MadNLPSolver, kkt::AbstractKKTSystem, mu)
-    dlb = dual_lb(solver.d)
-    @inbounds @simd for i in eachindex(dlb)
-        dlb[i] = (mu-solver.zl_r[i]*solver.dx_lr[i])/(solver.x_lr[i]-solver.xl_r[i])-solver.zl_r[i]
-    end
-    dub = dual_ub(solver.d)
-    @inbounds @simd for i in eachindex(dub)
-        dub[i] = (mu+solver.zu_r[i]*solver.dx_ur[i])/(solver.xu_r[i]-solver.x_ur[i])-solver.zu_r[i]
-    end
+# solving KKT system
+@inbounds function _kktmul!(
+    w::AbstractKKTVector,
+    x::AbstractKKTVector,
+    reg,
+    du_diag,
+    l_lower,
+    u_lower,
+    l_diag,
+    u_diag,
+    alpha,
+    beta,
+)
+    primal(w) .+= alpha .* reg .* primal(x)
+    dual(w) .+= alpha .* du_diag .* dual(x)
+    w.xp_lr .-= alpha .* dual_lb(x)
+    w.xp_ur .+= alpha .* dual_ub(x)
+    dual_lb(w) .= beta .* dual_lb(w) .+ alpha .* (x.xp_lr .* l_lower .- dual_lb(x) .* l_diag)
+    dual_ub(w) .= beta .* dual_ub(w) .+ alpha .* (x.xp_ur .* u_lower .+ dual_ub(x) .* u_diag)
     return
 end
-function finish_aug_solve!(solver::MadNLPSolver, kkt::SparseUnreducedKKTSystem, mu)
-    dlb = dual_lb(solver.d)
-    @inbounds @simd for i in eachindex(dlb)
-        dlb[i] = (mu-solver.zl_r[i]*solver.dx_lr[i]) / (solver.x_lr[i]-solver.xl_r[i]) - solver.zl_r[i]
-    end
-    dub = dual_ub(solver.d)
-    @inbounds @simd for i in eachindex(dub)
-        dub[i] = (mu+solver.zu_r[i]*solver.dx_ur[i]) / (solver.xu_r[i]-solver.x_ur[i]) - solver.zu_r[i]
-    end
+
+@inbounds function reduce_rhs!(
+    xp_lr, wl, l_diag,
+    xp_ur, wu, u_diag,
+)
+    xp_lr .-= wl ./ l_diag
+    xp_ur .-= wu ./ u_diag
     return
 end
 
-# Initial
-function set_initial_bounds!(solver::MadNLPSolver{T}) where T
-    @inbounds @simd for i in eachindex(solver.xl_r)
-        solver.xl_r[i] -= max(one(T),abs(solver.xl_r[i]))*solver.opt.tol
-    end
-    @inbounds @simd for i in eachindex(solver.xu_r)
-        solver.xu_r[i] += max(one(T),abs(solver.xu_r[i]))*solver.opt.tol
-    end
-end
-function set_initial_rhs!(solver::MadNLPSolver{T}, kkt::AbstractKKTSystem) where T
-    f = primal(solver.f)
-    zl = primal(solver.zl)
-    zu = primal(solver.zu)
-    px = primal(solver.p)
-    @inbounds @simd for i in eachindex(px)
-        px[i] = -f[i] + zl[i] - zu[i]
-    end
-    fill!(dual(solver.p), zero(T))
+# Finish
+function finish_aug_solve!(kkt::AbstractKKTSystem, d::AbstractKKTVector)
+    dlb = dual_lb(d)
+    dub = dual_ub(d)
+    dlb .= (.-dlb .+ kkt.l_lower .* d.xp_lr) ./ kkt.l_diag
+    dub .= (  dub .- kkt.u_lower .* d.xp_ur) ./ kkt.u_diag
     return
 end
-function set_initial_rhs!(solver::MadNLPSolver{T}, kkt::SparseUnreducedKKTSystem) where T
+
+function set_initial_bounds!(xl::AbstractVector{T}, xu::AbstractVector{T}, tol) where T
+    map!(
+        x->x - max(one(T), abs(x)) .* tol,
+        xl, xl
+    )
+    map!(
+        x->x + max(one(T), abs(x)) .* tol,
+        xu, xu
+    )
+end
+
+function set_initial_rhs!(solver::MadNLPSolver{T}, kkt::AbstractKKTSystem) where T
     f = primal(solver.f)
     zl = primal(solver.zl)
     zu = primal(solver.zu)
     px = primal(solver.p)
-    @inbounds @simd for i in eachindex(px)
-        px[i] = -f[i] + zl[i] - zu[i]
-    end
+    px .= .-f .+ zl .- zu
     fill!(dual(solver.p), zero(T))
     fill!(dual_lb(solver.p), zero(T))
     fill!(dual_ub(solver.p), zero(T))
@@ -198,313 +170,512 @@ function set_initial_rhs!(solver::MadNLPSolver{T}, kkt::SparseUnreducedKKTSystem
 end
 
 # Set ifr
-function set_aug_rhs_ifr!(solver::MadNLPSolver{T}, kkt::AbstractKKTSystem) where T
-    fill!(primal(solver._w1), zero(T))
-    wy = dual(solver._w1)
-    @inbounds @simd for i in eachindex(wy)
-        wy[i] = - solver.c[i]
-    end
+function set_aug_rhs_ifr!(solver::MadNLPSolver{T}, kkt::AbstractKKTSystem, p0::AbstractKKTVector) where T
+    fill!(primal(p0), zero(T))
+    fill!(dual_lb(p0), zero(T))
+    fill!(dual_ub(p0), zero(T))
+    wy = dual(p0)
+    wy .= .- solver.c
     return
 end
-function set_g_ifr!(solver::MadNLPSolver, g)
+
+function set_g_ifr!(solver::MadNLPSolver, g::AbstractArray)
     f = full(solver.f)
     x = full(solver.x)
     xl = full(solver.xl)
     xu = full(solver.xu)
-    @inbounds @simd for i in eachindex(g)
-        g[i] = f[i] - solver.mu / (x[i]-xl[i]) + solver.mu / (xu[i]-x[i]) + solver.jacl[i]
-    end
+    g .= f .- solver.mu ./ (x .- xl) .+ solver.mu ./ (xu .- x) .+ solver.jacl
 end
 
-
 # Finish RR
 function finish_aug_solve_RR!(dpp, dnn, dzp, dzn, l, dl, pp, nn, zp, zn, mu_R, rho)
-    @inbounds @simd for i in eachindex(dpp)
-        dpp[i] = (mu_R + pp[i] * dl[i] - (rho - l[i]) * pp[i]) / zp[i]
-        dnn[i] = (mu_R - nn[i] * dl[i] - (rho + l[i]) * nn[i]) / zn[i]
-        dzp[i] = (mu_R - zp[i] * dpp[i]) / pp[i] - zp[i]
-        dzn[i] = (mu_R - zn[i] * dnn[i]) / nn[i] - zn[i]
-    end
+    dzp .= rho .- l .- dl .- zp
+    dzn .= rho .+ l .+ dl .- zn
+    dpp .= .- pp .+ mu_R ./zp .- (pp./zp) .* dzp
+    dnn .= .- nn .+ mu_R ./zn .- (nn./zn) .* dzn
     return
 end
 
-# Scaling
-function unscale!(solver::AbstractMadNLPSolver)
-    x_slk = slack(solver.x)
-    solver.obj_val /= solver.obj_scale[]
-    @inbounds @simd for i in eachindex(solver.c)
-        solver.c[i] /= solver.con_scale[i]
-        solver.c[i] += solver.rhs[i]
-    end
-    @inbounds @simd for i in eachindex(solver.c_slk)
-        solver.c_slk[i] += x_slk[i]
-    end
-end
-
 # Kernel functions ---------------------------------------------------------
-is_valid(val::Real) = !(isnan(val) || isinf(val))
-function is_valid(vec::AbstractArray)
-    @inbounds @simd for i=1:length(vec)
-        is_valid(vec[i]) || return false
+is_valid(val::R) where R <: Real = !(isnan(val) || isinf(val))
+is_valid(vec::AbstractArray) = isempty(vec) ? true : mapreduce(is_valid, &, vec)
+
+function _get_varphi(x1::T, x2::T, mu::T) where T
+    x = x1 - x2
+    if x < 0
+        return T(Inf)
+    else
+        return -mu * log(x)
     end
-    return true
 end
-is_valid(args...) = all(is_valid(arg) for arg in args)
 
 function get_varphi(obj_val, x_lr, xl_r, xu_r, x_ur, mu)
-    varphi = obj_val
-    @inbounds @simd for i=1:length(x_lr)
-        xll = x_lr[i]-xl_r[i]
-        xll < 0 && return Inf
-        varphi -= mu*log(xll)
-    end
-    @inbounds @simd for i=1:length(x_ur)
-        xuu = xu_r[i]-x_ur[i]
-        xuu < 0 && return Inf
-        varphi -= mu*log(xuu)
-    end
-    return varphi
+    return obj_val + mapreduce(
+        (x1,x2) -> _get_varphi(x1,x2,mu), +, x_lr, xl_r
+    ) + mapreduce(
+        (x1,x2) -> _get_varphi(x1,x2,mu), +, xu_r, x_ur
+    )
 end
 
-@inline get_inf_pr(c) = norm(c, Inf)
+@inline get_inf_pr(c::AbstractVector) = norm(c, Inf)
 
 function get_inf_du(f, zl, zu, jacl, sd)
-    inf_du = 0.0
-    @inbounds @simd for i=1:length(f)
-        inf_du = max(inf_du,abs(f[i]-zl[i]+zu[i]+jacl[i]))
-    end
-    return inf_du/sd
+    return mapreduce((f,zl,zu,jacl) -> abs(f-zl+zu+jacl), max, f, zl, zu, jacl; init = zero(eltype(f))) / sd
 end
 
 function get_inf_compl(x_lr, xl_r, zl_r, xu_r, x_ur, zu_r, mu, sc)
-    inf_compl = 0.0
-    @inbounds @simd for i=1:length(x_lr)
-        inf_compl = max(inf_compl,abs((x_lr[i]-xl_r[i])*zl_r[i]-mu))
-    end
-    @inbounds @simd for i=1:length(x_ur)
-        inf_compl = max(inf_compl,abs((xu_r[i]-x_ur[i])*zu_r[i]-mu))
-    end
-    return inf_compl/sc
-end
-
-function get_varphi_d(f, x, xl, xu, dx, mu)
-    varphi_d = 0.0
-    @inbounds @simd for i=1:length(f)
-        varphi_d += (f[i] - mu/(x[i]-xl[i]) + mu/(xu[i]-x[i])) * dx[i]
-    end
-    return varphi_d
+    return max(
+        mapreduce(
+            (x_lr, xl_r, zl_r) -> abs((x_lr-xl_r)*zl_r-mu),
+            max,
+            x_lr, xl_r, zl_r;
+            init = zero(eltype(x_lr))
+        ),
+        mapreduce(
+            (xu_r, x_ur, zu_r) -> abs((xu_r-x_ur)*zu_r-mu),
+            max,
+            xu_r, x_ur, zu_r;
+            init = zero(eltype(x_lr))
+        )
+    ) / sc
+end
+
+function get_varphi_d(
+    f::AbstractVector{T},
+    x::AbstractVector{T},
+    xl::AbstractVector{T},
+    xu::AbstractVector{T},
+    dx::AbstractVector{T},
+    mu,
+) where T
+    return mapreduce(
+        (f,x,xl,xu,dx)-> (f - mu/(x-xl) + mu/(xu-x)) * dx,
+        +,
+        f, x, xl, xu, dx;
+        init = zero(T)
+    )
 end
 
-function get_alpha_max(x, xl, xu, dx, tau)
-    alpha_max = 1.0
-    @inbounds @simd for i=1:length(x)
-        dx[i]<0 && (alpha_max=min(alpha_max,(-x[i]+xl[i])*tau/dx[i]))
-        dx[i]>0 && (alpha_max=min(alpha_max,(-x[i]+xu[i])*tau/dx[i]))
-    end
-    return alpha_max
+function get_alpha_max(
+    x::AbstractVector{T},
+    xl::AbstractVector{T},
+    xu::AbstractVector{T},
+    dx::AbstractVector{T},
+    tau,
+) where T
+    return min(
+        mapreduce(
+            (x, xl, dx) -> dx < 0 ? (-x+xl)*tau/dx : T(Inf),
+            min,
+
+            x, xl, dx,
+            init = one(T)
+        ),
+        mapreduce(
+            (x, xu, dx) -> dx > 0 ? (-x+xu)*tau/dx : T(Inf),
+            min,
+            x, xu, dx,
+            init = one(T)
+        )
+    )
 end
 
-function get_alpha_z(zl_r, zu_r, dzl, dzu, tau)
-    alpha_z = 1.0
-    @inbounds @simd for i=1:length(zl_r)
-        dzl[i] < 0 && (alpha_z=min(alpha_z,-zl_r[i]*tau/dzl[i]))
-     end
-    @inbounds @simd for i=1:length(zu_r)
-        dzu[i] < 0 && (alpha_z=min(alpha_z,-zu_r[i]*tau/dzu[i]))
-    end
-    return alpha_z
+function get_alpha_z(
+    zl_r::AbstractVector{T},
+    zu_r::AbstractVector{T},
+    dzl::AbstractVector{T},
+    dzu::AbstractVector{T},
+    tau,
+)  where T
+    return min(
+        mapreduce(
+            (zl_r, dzl) -> dzl < 0 ? (-zl_r)*tau/dzl : T(Inf),
+            min,
+            zl_r, dzl,
+            init = one(T)
+        ),
+        mapreduce(
+            (zu_r, dzu) -> dzu < 0 ? (-zu_r)*tau/dzu : T(Inf),
+            min,
+            zu_r, dzu,
+            init = one(T)
+        )
+    )
 end
 
-function get_obj_val_R(p, n, D_R, x, x_ref, rho, zeta)
-    obj_val_R = 0.
-    @inbounds @simd for i=1:length(p)
-        obj_val_R += rho*(p[i]+n[i]) .+ zeta/2*D_R[i]^2*(x[i]-x_ref[i])^2
-    end
-    return obj_val_R
+function get_obj_val_R(
+    p::AbstractVector{T},
+    n::AbstractVector{T},
+    D_R::AbstractVector{T},
+    x::AbstractVector{T},
+    x_ref::AbstractVector{T},
+    rho,
+    zeta,
+) where T
+    return mapreduce(
+        (p,n,D_R,x,x_ref) -> rho*(p+n) .+ zeta/2*D_R^2*(x-x_ref)^2,
+        +,
+        p,n,D_R,x,x_ref;
+        init = zero(T)
+    )
 end
 
 @inline get_theta(c) = norm(c, 1)
 
-function get_theta_R(c, p, n)
-    theta_R = 0.0
-    @inbounds @simd for i=1:length(c)
-        theta_R += abs(c[i]-p[i]+n[i])
-    end
-    return theta_R
-end
-
-function get_inf_pr_R(c, p, n)
-    inf_pr_R = 0.0
-    @inbounds @simd for i=1:length(c)
-        inf_pr_R = max(inf_pr_R,abs(c[i]-p[i]+n[i]))
-    end
-    return inf_pr_R
-end
-
-function get_inf_du_R(f_R, l, zl, zu, jacl, zp, zn, rho, sd)
-    inf_du_R = 0.0
-    @inbounds @simd for i=1:length(zl)
-        inf_du_R = max(inf_du_R,abs(f_R[i]-zl[i]+zu[i]+jacl[i]))
-    end
-    @inbounds @simd for i=1:length(zp)
-        inf_du_R = max(inf_du_R,abs(rho-l[i]-zp[i]))
-    end
-    @inbounds @simd for i=1:length(zn)
-        inf_du_R = max(inf_du_R,abs(rho+l[i]-zn[i]))
-    end
-    return inf_du_R / sd
+function get_theta_R(
+    c::AbstractVector{T},
+    p::AbstractVector{T},
+    n::AbstractVector{T},
+) where T
+    return mapreduce(
+        (c,p,n) -> abs(c-p+n),
+        +,
+        c,p,n;
+        init = zero(T)
+    )
 end
 
-function get_inf_compl_R(x_lr, xl_r, zl_r, xu_r, x_ur, zu_r, pp, zp, nn, zn, mu_R, sc)
-    inf_compl_R = 0.0
-    @inbounds @simd for i=1:length(x_lr)
-        inf_compl_R = max(inf_compl_R,abs((x_lr[i]-xl_r[i])*zl_r[i]-mu_R))
-    end
-    @inbounds @simd for i=1:length(xu_r)
-        inf_compl_R = max(inf_compl_R,abs((xu_r[i]-x_ur[i])*zu_r[i]-mu_R))
-    end
-    @inbounds @simd for i=1:length(pp)
-        inf_compl_R = max(inf_compl_R,abs(pp[i]*zp[i]-mu_R))
-    end
-    @inbounds @simd for i=1:length(nn)
-        inf_compl_R = max(inf_compl_R,abs(nn[i]*zn[i]-mu_R))
-    end
-    return inf_compl_R / sc
+function get_inf_pr_R(
+    c::AbstractVector{T},
+    p::AbstractVector{T},
+    n::AbstractVector{T},
+) where T
+    return mapreduce(
+        (c,p,n) -> abs(c-p+n),
+        max,
+        c,p,n;
+        init = zero(T)
+    )
 end
 
-function get_alpha_max_R(x, xl, xu, dx, pp, dpp, nn, dnn, tau_R)
-    alpha_max_R = 1.0
-    @inbounds @simd for i=1:length(x)
-        dx[i]<0 && (alpha_max_R=min(alpha_max_R,(-x[i]+xl[i])*tau_R/dx[i]))
-        dx[i]>0 && (alpha_max_R=min(alpha_max_R,(-x[i]+xu[i])*tau_R/dx[i]))
-    end
-    @inbounds @simd for i=1:length(pp)
-        dpp[i]<0 && (alpha_max_R=min(alpha_max_R,-pp[i]*tau_R/dpp[i]))
-    end
-    @inbounds @simd for i=1:length(nn)
-        dnn[i]<0 && (alpha_max_R=min(alpha_max_R,-nn[i]*tau_R/dnn[i]))
-    end
-    return alpha_max_R
+function get_inf_du_R(
+    f_R::AbstractVector{T},
+    l::AbstractVector{T},
+    zl::AbstractVector{T},
+    zu::AbstractVector{T},
+    jacl::AbstractVector{T},
+    zp::AbstractVector{T},
+    zn::AbstractVector{T},
+    rho,
+    sd,
+)  where T
+    return max(
+        mapreduce(
+            (f_R, zl, zu, jacl) -> abs(f_R-zl+zu+jacl),
+            max,
+            f_R, zl, zu, jacl;
+            init = zero(T)
+        ),
+        mapreduce(
+            (l, zp) -> abs(rho-l-zp),
+            max,
+            l, zp;
+            init = zero(T)
+        ),
+        mapreduce(
+            (l, zn) -> abs(rho+l-zn),
+            max,
+            l, zn;
+            init = zero(T)
+        )
+    ) / sd
+end
+
+function get_inf_compl_R(
+    x_lr::SubVector{T, VT, VI},
+    xl_r,
+    zl_r,
+    xu_r,
+    x_ur,
+    zu_r,
+    pp,
+    zp,
+    nn,
+    zn,
+    mu_R,
+    sc
+) where {T, VT <: AbstractVector{T}, VI}
+    return max(
+        mapreduce(
+            (x_lr, xl_r, zl_r) -> abs((x_lr-xl_r)*zl_r-mu_R),
+            max,
+            x_lr, xl_r, zl_r;
+            init = zero(T)
+        ),
+        mapreduce(
+            (xu_r, x_ur, zu_r) -> abs((xu_r-x_ur)*zu_r-mu_R),
+            max,
+            xu_r, x_ur, zu_r;
+            init = zero(T)
+        ),
+        mapreduce(
+            (pp, zp) -> abs(pp*zp-mu_R),
+            max,
+            pp, zp;
+            init = zero(T)
+        ),
+        mapreduce(
+            (nn, zn) -> abs(nn*zn-mu_R),
+            max,
+            nn, zn;
+            init = zero(T)
+        ),
+    ) / sc
+end
+
+function get_alpha_max_R(
+    x::AbstractVector{T},
+    xl::AbstractVector{T},
+    xu::AbstractVector{T},
+    dx::AbstractVector{T},
+    pp::AbstractVector{T},
+    dpp::AbstractVector{T},
+    nn::AbstractVector{T},
+    dnn::AbstractVector{T},
+    tau_R,
+) where T
+    return min(
+        mapreduce(
+            (x,xl,xu,dx) -> if dx < 0
+                (-x+xl)*tau_R/dx
+            elseif dx > 0
+                (-x+xu)*tau_R/dx
+            else
+                T(Inf)
+            end,
+            min,
+            x,xl,xu,dx;
+            init = one(T)
+        ),
+        mapreduce(
+            (pp, dpp)-> if dpp < 0
+                -pp*tau_R/dpp
+            else
+                T(Inf)
+            end,
+            min,
+            pp, dpp;
+            init = one(T)
+        ),
+        mapreduce(
+            (nn, dnn)-> if dnn < 0
+                -nn*tau_R/dnn
+            else
+                T(Inf)
+            end,
+            min,
+            nn, dnn;
+            init = one(T)
+        )
+    )
 end
 
-function get_alpha_z_R(zl_r, zu_r, dzl, dzu, zp, dzp, zn, dzn, tau_R)
-    alpha_z_R = 1.0
-    @inbounds @simd for i=1:length(zl_r)
-        dzl[i]<0 && (alpha_z_R=min(alpha_z_R,-zl_r[i]*tau_R/dzl[i]))
-    end
-    @inbounds @simd for i=1:length(zu_r)
-        dzu[i]<0 && (alpha_z_R=min(alpha_z_R,-zu_r[i]*tau_R/dzu[i]))
-    end
-    @inbounds @simd for i=1:length(zp)
-        dzp[i]<0 && (alpha_z_R=min(alpha_z_R,-zp[i]*tau_R/dzp[i]))
-    end
-    @inbounds @simd for i=1:length(zn)
-        dzn[i]<0 && (alpha_z_R=min(alpha_z_R,-zn[i]*tau_R/dzn[i]))
-    end
-    return alpha_z_R
+function get_alpha_z_R(
+    zl_r::SubVector{T, VT, VI},
+    zu_r,
+    dzl,
+    dzu,
+    zp,
+    dzp,
+    zn,
+    dzn,
+    tau_R,
+) where {T, VT <: AbstractVector{T}, VI}
+
+    f(d,z) = d < 0 ? -z*tau_R/d : T(Inf)
+    return min(
+        mapreduce(
+            f,
+            min,
+            dzl, zl_r;
+            init = one(T)
+        ),
+        mapreduce(
+            f,
+            min,
+            dzu, zu_r;
+            init = one(T)
+        ),
+        mapreduce(
+            f,
+            min,
+            dzp, zp;
+            init = one(T)
+        ),
+        mapreduce(
+            f,
+            min,
+            dzn, zn;
+            init = one(T)
+        )
+    )
 end
 
-function get_varphi_R(obj_val, x_lr, xl_r, xu_r, x_ur, pp, nn, mu_R)
+function get_varphi_R(
+    obj_val,
+    x_lr::SubVector{T, VT, VI},
+    xl_r,
+    xu_r,
+    x_ur,
+    pp,
+    nn,
+    mu_R,
+)  where {T, VT <: AbstractVector{T}, VI}
     varphi_R = obj_val
-    @inbounds @simd for i=1:length(x_lr)
-        xll = x_lr[i]-xl_r[i]
-        xll < 0 && return Inf
-        varphi_R -= mu_R*log(xll)
-    end
-    @inbounds @simd for i=1:length(x_ur)
-        xuu = xu_r[i]-x_ur[i]
-        xuu < 0 && return Inf
-        varphi_R -= mu_R*log(xuu)
-    end
-    @inbounds @simd for i=1:length(pp)
-        pp[i] < 0 && return Inf
-        varphi_R -= mu_R*log(pp[i])
-    end
-    @inbounds @simd for i=1:length(pp)
-        nn[i] < 0 && return Inf
-        varphi_R -= mu_R*log(nn[i])
-    end
-    return varphi_R
+    f1(x) = x < 0 ? T(Inf) : mu_R*log(x)
+    function f2(x,y)
+        d = x - y
+        d < 0 ? T(Inf) : mu_R * log(d)
+    end
+
+    return obj_val - +(
+        mapreduce(
+            f2,
+            +,
+            x_lr, xl_r;
+            init = zero(T)
+        ),
+        mapreduce(
+            f2,
+            +,
+            xu_r, x_ur;
+            init = zero(T)
+        ),
+        mapreduce(
+            f1,
+            +,
+            pp;
+            init = zero(T)
+        ),
+        mapreduce(
+            f1,
+            +,
+            nn;
+            init = zero(T)
+        )
+    )
 end
 
-function get_F(c, f, zl, zu, jacl, x_lr, xl_r, zl_r, xu_r, x_ur, zu_r, mu)
-    F = 0.0
-    @inbounds @simd for i=1:length(c)
-        F = max(F, c[i])
-    end
-    @inbounds @simd for i=1:length(f)
-        F = max(F, f[i]-zl[i]+zu[i]+jacl[i])
-    end
-    @inbounds @simd for i=1:length(x_lr)
-        x_lr[i] >= xl_r[i] || return Inf
-        zl_r[i] >= 0       || return Inf
-        F = max(F, (x_lr[i]-xl_r[i])*zl_r[i]-mu)
-    end
-    @inbounds @simd for i=1:length(x_ur)
-        xu_r[i] >= x_ur[i] || return Inf
-        zu_r[i] >= 0       || return Inf
-        F = max(F, (xu_r[i]-xu_r[i])*zu_r[i]-mu)
-    end
-    return F
+function get_F(
+    c::AbstractVector{T},
+    f,
+    zl,
+    zu,
+    jacl,
+    x_lr,
+    xl_r,
+    zl_r,
+    xu_r,
+    x_ur,
+    zu_r,
+    mu,
+) where T
+    F1 = mapreduce(
+        abs,
+        +,
+        c;
+        init = zero(T)
+    )
+    F2 = mapreduce(
+        (f,zl,zu,jacl) -> abs(f-zl+zu+jacl),
+        +,
+        f,zl,zu,jacl;
+        init = zero(T)
+    )
+    F3 = mapreduce(
+        (x_lr,xl_r,zl_r) -> (x_lr >= xl_r && zl_r >= 0) ? abs((x_lr-xl_r)*zl_r-mu) : T(Inf),
+        +,
+        x_lr,xl_r,zl_r;
+        init = zero(T)
+    )
+    F4 = mapreduce(
+        (xu_r,x_ur,zu_r) -> (xu_r >= x_ur && zu_r >= 0) ? abs((xu_r-xu_r)*zu_r-mu) : T(Inf),
+        +,
+        xu_r,xu_r,zu_r;
+        init = zero(T)
+    )
+    return F1 + F2 + F3 + F4
+end
+
+function get_varphi_d_R(
+    f_R::AbstractVector{T},
+    x::AbstractVector{T},
+    xl::AbstractVector{T},
+    xu::AbstractVector{T},
+    dx::AbstractVector{T},
+    pp::AbstractVector{T},
+    nn::AbstractVector{T},
+    dpp::AbstractVector{T},
+    dnn::AbstractVector{T},
+    mu_R,
+    rho,
+) where T
+    f(x,dx) = (rho - mu_R/x) * dx
+    return +(
+        mapreduce(
+            (f_R, x, xl, xu, dx) -> (f_R - mu_R/(x-xl) + mu_R/(xu-x)) * dx,
+            +,
+            f_R, x, xl, xu, dx;
+            init = zero(T)
+        ),
+        mapreduce(
+            f,
+            +,
+            pp,dpp;
+            init = zero(T)
+        ),
+        mapreduce(
+            f,
+            +,
+            nn,dnn;
+            init = zero(T)
+        ),
+    )
 end
 
-function get_varphi_d_R(f_R, x, xl, xu, dx, pp, nn, dpp, dnn, mu_R, rho)
-    varphi_d = 0.0
-    @inbounds @simd for i=1:length(x)
-        varphi_d += (f_R[i] - mu_R/(x[i]-xl[i]) + mu_R/(xu[i]-x[i])) * dx[i]
-    end
-    @inbounds @simd for i=1:length(pp)
-        varphi_d += (rho - mu_R/pp[i]) * dpp[i]
-    end
-    @inbounds @simd for i=1:length(nn)
-        varphi_d += (rho - mu_R/nn[i]) * dnn[i]
+function _initialize_variables!(x::T, xl, xu, bound_push, bound_fac) where T
+    if xl!=-T(Inf) && xu!=T(Inf)
+        return min(
+            xu-min(bound_push*max(1,abs(xu)), bound_fac*(xu-xl)),
+            max(xl+min(bound_push*max(1,abs(xl)),bound_fac*(xu-xl)),x),
+        )
+    elseif xl!=-T(Inf) && xu==T(Inf)
+        return max(xl+bound_push*max(1,abs(xl)), x)
+    elseif xl==-T(Inf) && xu!=T(Inf)
+        return min(xu-bound_push*max(1,abs(xu)), x)
     end
-    return varphi_d
+    return x
 end
 
 function initialize_variables!(x, xl, xu, bound_push, bound_fac)
-    @inbounds @simd for i=1:length(x)
-        if xl[i]!=-Inf && xu[i]!=Inf
-            x[i] = min(
-                xu[i]-min(bound_push*max(1,abs(xu[i])), bound_fac*(xu[i]-xl[i])),
-                max(xl[i]+min(bound_push*max(1,abs(xl[i])),bound_fac*(xu[i]-xl[i])),x[i]),
-            )
-        elseif xl[i]!=-Inf && xu[i]==Inf
-            x[i] = max(xl[i]+bound_push*max(1,abs(xl[i])), x[i])
-        elseif xl[i]==-Inf && xu[i]!=Inf
-            x[i] = min(xu[i]-bound_push*max(1,abs(xu[i])), x[i])
-        end
-    end
+    map!((x,l,u) -> _initialize_variables!(x,l,u, bound_push, bound_fac), x, x, xl, xu)
 end
 
-function adjust_boundary!(x_lr::VT, xl_r, x_ur, xu_r, mu) where {T, VT <: AbstractVector{T}}
-    adjusted = 0
+function adjust_boundary!(
+    x_lr::AbstractVector{T},
+    xl_r::AbstractVector{T},
+    x_ur::AbstractVector{T},
+    xu_r::AbstractVector{T},
+    mu,
+) where T
     c1 = eps(T)*mu
-    c2= eps(T)^(3/4)
-    @inbounds @simd for i=1:length(xl_r)
-        if x_lr[i]-xl_r[i] < c1
-            xl_r[i] -= c2*max(1,abs(x_lr[i]))
-            adjusted += 1
-        end
-    end
-    @inbounds @simd for i=1:length(xu_r)
-        if xu_r[i]-x_ur[i] < c1
-            xu_r[i] += c2*max(1, abs(x_ur[i]))
-            adjusted += 1
-        end
-    end
-    return adjusted
+    c2 = eps(T)^(3/4)
+    map!(
+        (x_lr, xl_r) -> (x_lr-xl_r < c1) ? (xl_r - c2*max(1,abs(x_lr))) : xl_r,
+        xl_r, x_lr, xl_r
+    )
+    map!(
+        (xu_r, x_ur) -> (xu_r-x_ur < c1) ? (xu_r + c2*max(1,abs(x_ur))) : xu_r,
+        xu_r, xu_r, x_ur
+    )
 end
 
-function get_rel_search_norm(x, dx)
-    rel_search_norm = 0.0
-    @inbounds @simd for i=1:length(x)
-        rel_search_norm = max(
-            rel_search_norm,
-            abs(dx[i]) / (1.0 + abs(x[i])),
-        )
-    end
-    return rel_search_norm
+function get_rel_search_norm(x::AbstractVector{T}, dx::AbstractVector{T}) where T
+    return mapreduce(
+        (x,dx) -> abs(dx) / (one(T) + abs(x)),
+        max,
+        x, dx
+    )
 end
 
 # Utility functions
@@ -522,7 +693,13 @@ function get_sc(zl_r, zu_r, s_max)
     ) / s_max
 end
 
-function get_mu(mu, mu_min, mu_linear_decrease_factor, mu_superlinear_decrease_power, tol)
+function get_mu(
+    mu,
+    mu_min,
+    mu_linear_decrease_factor,
+    mu_superlinear_decrease_power,
+    tol,
+)
     # Warning: `a * tol` should be strictly less than 100 * mu_min, see issue #242
     a = min(99.0 * mu_min / tol, 0.01)
     return max(
@@ -534,7 +711,17 @@ end
 
 @inline get_tau(mu, tau_min) = max(tau_min, 1-mu)
 
-function get_alpha_min(theta, varphi_d, theta_min, gamma_theta, gamma_phi, alpha_min_frac, del, s_theta, s_phi)
+function get_alpha_min(
+    theta,
+    varphi_d,
+    theta_min,
+    gamma_theta,
+    gamma_phi,
+    alpha_min_frac,
+    del,
+    s_theta,
+    s_phi,
+)
     if varphi_d<0
         if theta<=theta_min
             return alpha_min_frac*min(
@@ -584,19 +771,34 @@ function is_barr_obj_rapid_increase(varphi, varphi_trial, obj_max_inc)
     return (varphi_trial >= varphi) && (log10(varphi_trial-varphi) > obj_max_inc + max(1.0, log10(abs(varphi))))
 end
 
-function reset_bound_dual!(z, x, mu, kappa_sigma)
-    @inbounds @simd for i in eachindex(z)
-        z[i] = max(min(z[i], (kappa_sigma*mu)/x[i]), (mu/kappa_sigma)/x[i])
-    end
+function reset_bound_dual!(
+    z::AbstractVector{T},
+    x::AbstractVector{T},
+    mu,
+    kappa_sigma,
+) where T
+    map!(
+        (z, x) -> max(min(z, (kappa_sigma*mu)/x), (mu/kappa_sigma)/x),
+        z, z, x
+    )
     return
 end
-function reset_bound_dual!(z, x1, x2, mu, kappa_sigma)
-    @inbounds @simd for i in eachindex(z)
-        z[i] = max(min(z[i], (kappa_sigma*mu)/(x1[i]-x2[i])), (mu/kappa_sigma)/(x1[i]-x2[i]))
-    end
+
+function reset_bound_dual!(
+    z::AbstractVector{T},
+    x1::AbstractVector{T},
+    x2::AbstractVector{T},
+    mu,
+    kappa_sigma,
+) where T
+    map!(
+        (z,x1,x2) -> max(min(z, (kappa_sigma*mu)/(x1-x2)), (mu/kappa_sigma)/(x1-x2)),
+        z,z,x1,x2
+    )
     return
 end
 
+
 function get_ftype(filter,theta,theta_trial,varphi,varphi_trial,switching_condition,armijo_condition,
                    theta_min,obj_max_inc,gamma_theta,gamma_phi,has_constraints)
     is_filter_acceptable(filter,theta_trial,varphi_trial) || return " "
@@ -625,31 +827,8 @@ function _get_fixed_variable_index(
     return fixed_aug_index
 end
 
-function fixed_variable_treatment_vec!(vec, ind_fixed)
-    @inbounds @simd for i in ind_fixed
-        vec[i] = 0.0
-    end
-end
-
-function fixed_variable_treatment_z!(zl, zu, f, jacl, ind_fixed)
-    @inbounds @simd for i in ind_fixed
-        z = f[i]+jacl[i]
-        if z >= 0.0
-            zl[i] = z
-            zu[i] = 0.0
-        else
-            zl[i] = 0.0
-            zu[i] = -z
-        end
-    end
-end
-
 function dual_inf_perturbation!(px, ind_llb, ind_uub, mu, kappa_d)
-    @inbounds @simd for i in ind_llb
-        px[i] -= mu*kappa_d
-    end
-    @inbounds @simd for i in ind_uub
-        px[i] += mu*kappa_d
-    end
+    px[ind_llb] .-= mu*kappa_d
+    px[ind_uub] .+= mu*kappa_d
 end
 
diff --git a/src/IPM/restoration.jl b/src/IPM/restoration.jl
index ff765c67..60d583dd 100644
--- a/src/IPM/restoration.jl
+++ b/src/IPM/restoration.jl
@@ -1,24 +1,24 @@
-mutable struct RobustRestorer{T}
+mutable struct RobustRestorer{T, VT}
     obj_val_R::T
-    f_R::Vector{T}
-    x_ref::Vector{T}
+    f_R::VT
+    x_ref::VT
 
     theta_ref::T
-    D_R::Vector{T}
+    D_R::VT
     obj_val_R_trial::T
 
-    pp::Vector{T}
-    nn::Vector{T}
-    zp::Vector{T}
-    zn::Vector{T}
+    pp::VT
+    nn::VT
+    zp::VT
+    zn::VT
 
-    dpp::Vector{T}
-    dnn::Vector{T}
-    dzp::Vector{T}
-    dzn::Vector{T}
+    dpp::VT
+    dnn::VT
+    dzp::VT
+    dzn::VT
 
-    pp_trial::Vector{T}
-    nn_trial::Vector{T}
+    pp_trial::VT
+    nn_trial::VT
 
     inf_pr_R::T
     inf_du_R::T
@@ -31,33 +31,41 @@ mutable struct RobustRestorer{T}
     filter::Vector{Tuple{T,T}}
 end
 
-function RobustRestorer(solver::AbstractMadNLPSolver{T}) where T
-
-    nn = Vector{T}(undef,solver.m)
-    zp = Vector{T}(undef,solver.m)
-    zn = Vector{T}(undef,solver.m)
-    dpp= Vector{T}(undef,solver.m)
-    dnn= Vector{T}(undef,solver.m)
-    dzp= Vector{T}(undef,solver.m)
-    dzn= Vector{T}(undef,solver.m)
-    pp_trial = Vector{T}(undef,solver.m)
-    nn_trial = Vector{T}(undef,solver.m)
-
-    return RobustRestorer{T}(
-        0.,
-        primal(solver._w2),
-        primal(solver._w1),
-        0.,
-        primal(solver._w3),
-        0.,
-        dual(solver._w3),
-        dual(solver._w4),
-        zp, zn,
-        dpp, dnn, dzp, dzn,
-        dual(solver._w2),
-        dual(solver._w1),
-        0.,0.,0.,0.,0.,0.,
-        Tuple{T,T}[],
+function RobustRestorer(solver::AbstractMadNLPSolver{T}) where {T}
+
+    f_R = similar(solver.y, solver.n)
+    x_ref = similar(solver.y, solver.n)
+    D_R = similar(solver.y, solver.n)
+    pp = similar(solver.y, solver.m)
+    nn = similar(solver.y, solver.m)
+    pp_trial = similar(solver.y, solver.m)
+    nn_trial = similar(solver.y, solver.m)
+
+    nn = similar(solver.y, solver.m)
+    zp = similar(solver.y, solver.m)
+    zn = similar(solver.y, solver.m)
+    dpp= similar(solver.y, solver.m)
+    dnn= similar(solver.y, solver.m)
+    dzp= similar(solver.y, solver.m)
+    dzn= similar(solver.y, solver.m)
+    pp_trial = similar(solver.y, solver.m)
+    nn_trial = similar(solver.y, solver.m)
+
+    return RobustRestorer(
+        zero(T), 
+        f_R, 
+        x_ref, 
+        zero(T), 
+        D_R, 
+        zero(T), 
+        pp, 
+        nn, 
+        zp, zn, 
+        dpp, dnn, dzp, dzn, 
+        pp_trial, 
+        nn_trial, 
+        zero(T), zero(T), zero(T), zero(T), zero(T), zero(T), 
+        Tuple{T, T}[], 
     )
 end
 
@@ -68,21 +76,22 @@ function initialize_robust_restorer!(solver::AbstractMadNLPSolver{T}) where T
 
     copyto!(RR.x_ref, full(solver.x))
     RR.theta_ref = get_theta(solver.c)
-    @inbounds @simd for i in eachindex(RR.D_R)
-        RR.D_R[i] = min(one(T), one(T) / abs(RR.x_ref[i]))
-    end
+    RR.D_R .= min.(one(T), one(T) ./ abs.(RR.x_ref))
 
     RR.mu_R = max(solver.mu, norm(solver.c, Inf))
     RR.tau_R= max(solver.opt.tau_min,1-RR.mu_R)
     RR.zeta = sqrt(RR.mu_R)
 
-    @inbounds @simd for i in eachindex(RR.nn)
-        RR.nn[i] = (RR.mu_R - solver.opt.rho*solver.c[i])/2 /solver.opt.rho +
-            sqrt(((RR.mu_R-solver.opt.rho*solver.c[i])/2 /solver.opt.rho)^2 + RR.mu_R*solver.c[i]/2 /solver.opt.rho)
-        RR.pp[i] = solver.c[i] + RR.nn[i]
-        RR.zp[i] = RR.mu_R / RR.pp[i]
-        RR.zn[i] = RR.mu_R / RR.nn[i]
-    end
+    rho = solver.opt.rho
+    mu = RR.mu_R
+    RR.nn .=
+        (mu .- rho*solver.c)./2 ./rho .+
+        sqrt.(
+            ((mu.-rho*solver.c)./2 ./rho).^2 + mu.*solver.c./2 ./rho
+        )
+    RR.pp .= solver.c .+ RR.nn
+    RR.zp .= RR.mu_R ./ RR.pp
+    RR.zn .= RR.mu_R ./ RR.nn
 
     RR.obj_val_R = get_obj_val_R(RR.pp,RR.nn,RR.D_R,full(solver.x),RR.x_ref,solver.opt.rho,RR.zeta)
     fill!(RR.f_R, zero(T))
@@ -90,12 +99,11 @@ function initialize_robust_restorer!(solver::AbstractMadNLPSolver{T}) where T
     push!(RR.filter, (solver.theta_max,-Inf))
 
     fill!(solver.y, zero(T))
-    @inbounds @simd for i in eachindex(solver.zl_r)
-        solver.zl_r[i] = min(solver.opt.rho, solver.zl_r[i])
-    end
-    @inbounds @simd for i in eachindex(solver.zu_r)
-        solver.zu_r[i] = min(solver.opt.rho, solver.zu_r[i])
-    end
+    solver.zl_r .= min.(solver.opt.rho, solver.zl_r)
+    solver.zu_r .= min.(solver.opt.rho, solver.zu_r)
+    # fill!(solver.zl_r, one(T)) # Experimental
+    # fill!(solver.zu_r, one(T)) # Experimental
+    
     solver.cnt.t = 0
 
     # misc
diff --git a/src/IPM/solver.jl b/src/IPM/solver.jl
index 23046fcb..4cd65ad3 100644
--- a/src/IPM/solver.jl
+++ b/src/IPM/solver.jl
@@ -1,6 +1,13 @@
+"""
+    madnlp(model::AbstractNLPModel; options...)
+
+Build a [`MadNLPSolver`](@ref) and solve it using
+the interior-point method. Return the solution
+as a [`MadNLPExecutionStats`](@ref).
+
+"""
 function madnlp(model::AbstractNLPModel; kwargs...)
     solver = MadNLPSolver(model;kwargs...)
-    initialize!(solver.kkt)
     return solve!(solver)
 end
 
@@ -13,57 +20,53 @@ solve!(solver::AbstractMadNLPSolver; kwargs...) = solve!(
 
 
 function initialize!(solver::AbstractMadNLPSolver{T}) where T
-    # initializing slack variables
-    @trace(solver.logger,"Initializing slack variables.")
-    cons!(solver.nlp,get_x0(solver.nlp),_madnlp_unsafe_wrap(solver.c,get_ncon(solver.nlp)))
-    solver.cnt.con_cnt += 1
-    copyto!(slack(solver.x), solver.c_slk)
-
-    # Initialization
-    @trace(solver.logger,"Initializing primal and bound duals.")
-    fill!(solver.zl_r, one(T))
-    fill!(solver.zu_r, one(T))
 
-    set_initial_bounds!(solver)
-    initialize_variables!(
-        full(solver.x),
-        full(solver.xl),
-        full(solver.xu),
-        solver.opt.bound_push,solver.opt.bound_fac
+    nlp = solver.nlp
+    opt = solver.opt
+
+    # Initializing variables
+    @trace(solver.logger,"Initializing variables.")
+    initialize!(
+        solver.cb,
+        solver.x,
+        solver.xl,
+        solver.xu,
+        solver.y,
+        solver.rhs,
+        solver.ind_ineq;
+        tol=opt.tol,
+        bound_push=opt.bound_push,
+        bound_fac=opt.bound_fac,
     )
+    fill!(solver.jacl, zero(T))
+    fill!(solver.zl_r, one(T))
+    fill!(solver.zu_r, one(T))
 
-    # Automatic scaling (constraints)
-    @trace(solver.logger,"Computing constraint scaling.")
-    eval_jac_wrapper!(solver, solver.kkt, solver.x)
-    compress_jacobian!(solver.kkt)
-    if (solver.m > 0) && solver.opt.nlp_scaling
-        jac = get_raw_jacobian(solver.kkt)
-        scale_constraints!(solver.nlp, solver.con_scale, jac; max_gradient=solver.opt.nlp_scaling_max_gradient)
-        set_jacobian_scaling!(solver.kkt, solver.con_scale)
-        solver.y ./= solver.con_scale
+    # Initializing scaling factors
+    if opt.nlp_scaling
+        set_scaling!(
+            solver.cb,
+            solver.x,
+            solver.xl,
+            solver.xu,
+            solver.y,
+            solver.rhs,
+            solver.ind_ineq,
+            opt.nlp_scaling_max_gradient
+        )
     end
-    compress_jacobian!(solver.kkt)
 
-    # Automatic scaling (objective)
+    # Initializing KKT system
+    initialize!(solver.kkt)
+
+    # Initializing jacobian and gradient
+    eval_jac_wrapper!(solver, solver.kkt, solver.x)
     eval_grad_f_wrapper!(solver, solver.f,solver.x)
-    @trace(solver.logger,"Computing objective scaling.")
-    if solver.opt.nlp_scaling
-        solver.obj_scale[] = scale_objective(solver.nlp, full(solver.f); max_gradient=solver.opt.nlp_scaling_max_gradient)
-        _scal!(solver.obj_scale[], full(solver.f))
-    end
 
-    # Initialize dual variables
+
     @trace(solver.logger,"Initializing constraint duals.")
     if !solver.opt.dual_initialized
-        set_initial_rhs!(solver, solver.kkt)
-        initialize!(solver.kkt)
-        factorize_wrapper!(solver)
-        is_solved = solve_refine_wrapper!(solver,solver.d,solver.p)
-        if !is_solved || (norm(dual(solver.d), Inf) > solver.opt.constr_mult_init_max)
-            fill!(solver.y, zero(T))
-        else
-            copyto!(solver.y, dual(solver.d))
-        end
+        initialize_dual(solver, opt.dual_initialization_method)
     end
 
     # Initializing
@@ -76,11 +79,30 @@ function initialize!(solver::AbstractMadNLPSolver{T}) where T
     solver.theta_min = 1e-4*max(1,theta)
     solver.mu = solver.opt.mu_init
     solver.tau = max(solver.opt.tau_min,1-solver.opt.mu_init)
-    solver.filter = [(solver.theta_max,-Inf)]
+    push!(solver.filter, (solver.theta_max,-Inf))
 
     return REGULAR
 end
 
+abstract type DualInitializeOptions end
+struct DualInitializeSetZero <: DualInitializeOptions end
+struct DualInitializeLeastSquares <: DualInitializeOptions end
+
+function initialize_dual(solver::MadNLPSolver{T}, ::Type{DualInitializeSetZero}) where T
+    fill!(solver.y, zero(T))
+end
+function initialize_dual(solver::MadNLPSolver{T}, ::Type{DualInitializeLeastSquares}) where T
+    set_initial_rhs!(solver, solver.kkt)
+    factorize_wrapper!(solver)
+    is_solved = solve_refine_wrapper!(
+        solver.d, solver, solver.p, solver._w4
+    )
+    if !is_solved || (norm(dual(solver.d), Inf) > solver.opt.constr_mult_init_max)
+        fill!(solver.y, zero(T))
+    else
+        copyto!(solver.y, dual(solver.d))
+    end
+end
 
 function reinitialize!(solver::AbstractMadNLPSolver)
     variable(solver.x) .= get_x0(solver.nlp)
@@ -96,7 +118,8 @@ function reinitialize!(solver::AbstractMadNLPSolver)
     solver.theta_min=1e-4*max(1,theta)
     solver.mu=solver.opt.mu_init
     solver.tau=max(solver.opt.tau_min,1-solver.opt.mu_init)
-    solver.filter = [(solver.theta_max,-Inf)]
+    empty!(solver.filter)
+    push!(solver.filter, (solver.theta_max,-Inf))
 
     return REGULAR
 end
@@ -109,7 +132,7 @@ function solve!(
     x = nothing, y = nothing,
     zl = nothing, zu = nothing,
     kwargs...
-)
+        )
 
     if x != nothing
         full(solver.x)[1:get_nvar(nlp)] .= x
@@ -131,7 +154,7 @@ function solve!(
 
     try
         if solver.status == INITIAL
-            @notice(solver.logger,"This is $(introduce()), running with $(introduce(solver.linear_solver))\n")
+            @notice(solver.logger,"This is $(introduce()), running with $(introduce(solver.kkt.linear_solver))\n")
             print_init(solver)
             solver.status = initialize!(solver)
         else # resolving the problem
@@ -172,10 +195,10 @@ function solve!(
         end
     finally
         solver.cnt.total_time = time() - solver.cnt.start_time
-        !(solver.status < SOLVE_SUCCEEDED) && (print_summary_1(solver);print_summary_2(solver))
-        # Unscale once the summary has been printed out
-        unscale!(solver)
-        @notice(solver.logger,"EXIT: $(STATUS_OUTPUT_DICT[solver.status])")
+        if !(solver.status < SOLVE_SUCCEEDED)
+            print_summary(solver)
+        end
+        @notice(solver.logger,"EXIT: $(get_status_output(solver.status, solver.opt))")
         solver.opt.disable_garbage_collector &&
             (GC.enable(true); @warn(solver.logger,"Julia garbage collector is turned back on"))
         finalize(solver.logger)
@@ -193,19 +216,10 @@ function regular!(solver::AbstractMadNLPSolver{T}) where T
         if (solver.cnt.k!=0 && !solver.opt.jacobian_constant)
             eval_jac_wrapper!(solver, solver.kkt, solver.x)
         end
+        
         jtprod!(solver.jacl, solver.kkt, solver.y)
-        fixed_variable_treatment_vec!(solver.jacl,solver.ind_fixed)
-        fixed_variable_treatment_z!(
-            full(solver.zl),
-            full(solver.zu),
-            full(solver.f),
-            solver.jacl,
-            solver.ind_fixed,
-        )
-
-        sd = get_sd(solver.y,solver.zl_r,solver.zu_r,solver.opt.s_max)
-        sc = get_sc(solver.zl_r,solver.zu_r,solver.opt.s_max)
-
+        sd = get_sd(solver.y,solver.zl_r,solver.zu_r,T(solver.opt.s_max))
+        sc = get_sc(solver.zl_r,solver.zu_r,T(solver.opt.s_max))
         solver.inf_pr = get_inf_pr(solver.c)
         solver.inf_du = get_inf_du(
             full(solver.f),
@@ -214,7 +228,7 @@ function regular!(solver::AbstractMadNLPSolver{T}) where T
             solver.jacl,
             sd,
         )
-        solver.inf_compl = get_inf_compl(solver.x_lr,solver.xl_r,solver.zl_r,solver.xu_r,solver.x_ur,solver.zu_r,0.,sc)
+        solver.inf_compl = get_inf_compl(solver.x_lr,solver.xl_r,solver.zl_r,solver.xu_r,solver.x_ur,solver.zu_r,zero(T),sc)
         inf_compl_mu = get_inf_compl(solver.x_lr,solver.xl_r,solver.zl_r,solver.xu_r,solver.x_ur,solver.zu_r,solver.mu,sc)
 
         print_iter(solver)
@@ -250,20 +264,9 @@ function regular!(solver::AbstractMadNLPSolver{T}) where T
 
         set_aug_diagonal!(solver.kkt,solver)
         set_aug_rhs!(solver, solver.kkt, solver.c)
-        if solver.opt.inertia_correction_method == INERTIA_FREE
-            set_aug_rhs_ifr!(solver, solver.kkt)
-        end
         dual_inf_perturbation!(primal(solver.p),solver.ind_llb,solver.ind_uub,solver.mu,solver.opt.kappa_d)
 
-        # start inertia conrrection
-        @trace(solver.logger,"Solving primal-dual system.")
-        if solver.opt.inertia_correction_method == INERTIA_FREE
-            inertia_free_reg(solver) || return ROBUST
-        elseif solver.opt.inertia_correction_method == INERTIA_BASED
-            inertia_based_reg(solver) || return ROBUST
-        end
-
-        finish_aug_solve!(solver, solver.kkt, solver.mu)
+        inertia_correction!(solver.inertia_corrector, solver) || return ROBUST
 
         # filter start
         @trace(solver.logger,"Backtracking line search initiated.")
@@ -290,15 +293,17 @@ function regular!(solver::AbstractMadNLPSolver{T}) where T
                                   solver.opt.alpha_min_frac,solver.opt.delta,solver.opt.s_theta,solver.opt.s_phi)
         solver.cnt.l = 1
         solver.alpha = alpha_max
-        varphi_trial= 0.
-            theta_trial = 0.
-            small_search_norm = get_rel_search_norm(primal(solver.x), primal(solver.d)) < 10*eps(T)
+        varphi_trial= zero(T)
+        theta_trial = zero(T)
+        small_search_norm = get_rel_search_norm(primal(solver.x), primal(solver.d)) < 10*eps(T)
         switching_condition = is_switching(varphi_d,solver.alpha,solver.opt.s_phi,solver.opt.delta,2.,solver.opt.s_theta)
         armijo_condition = false
+        unsuccessful_iterate = false
+
         while true
+
             copyto!(full(solver.x_trial), full(solver.x))
             axpy!(solver.alpha, primal(solver.d), primal(solver.x_trial))
-
             solver.obj_val_trial = eval_f_wrapper(solver, solver.x_trial)
             eval_cons_wrapper!(solver, solver.c_trial, solver.x_trial)
 
@@ -312,11 +317,21 @@ function regular!(solver::AbstractMadNLPSolver{T}) where T
                 solver.filter,theta,theta_trial,varphi,varphi_trial,switching_condition,armijo_condition,
                 solver.theta_min,solver.opt.obj_max_inc,solver.opt.gamma_theta,solver.opt.gamma_phi,
                 has_constraints(solver))
-            solver.ftype in ["f","h"] && (@trace(solver.logger,"Step accepted with type $(solver.ftype)"); break)
 
-            solver.cnt.l==1 && theta_trial>=theta && second_order_correction(
-                solver,alpha_max,theta,varphi,theta_trial,varphi_d,switching_condition) && break
+            if solver.ftype in ["f","h"]
+                @trace(solver.logger,"Step accepted with type $(solver.ftype)")
+                break
+            end
+
+            if solver.cnt.l==1 && theta_trial>=theta
+                if second_order_correction(
+                    solver,alpha_max,theta,varphi,theta_trial,varphi_d,switching_condition
+                    )
+                    break
+                end
+            end
 
+            unsuccessful_iterate = true
             solver.alpha /= 2
             solver.cnt.l += 1
             if solver.alpha < alpha_min
@@ -326,23 +341,54 @@ function regular!(solver::AbstractMadNLPSolver{T}) where T
                 return RESTORE
             else
                 @trace(solver.logger,"Step rejected; proceed with the next trial step.")
-                solver.alpha * norm(primal(solver.d)) < eps(T)*10 &&
-                    return solver.cnt.acceptable_cnt >0 ?
-                    SOLVED_TO_ACCEPTABLE_LEVEL : SEARCH_DIRECTION_BECOMES_TOO_SMALL
+                if solver.alpha * norm(primal(solver.d)) < eps(T)*10
+                    if (solver.cnt.restoration_fail_count += 1) >= 4
+                        return solver.cnt.acceptable_cnt >0 ?
+                            SOLVED_TO_ACCEPTABLE_LEVEL : SEARCH_DIRECTION_BECOMES_TOO_SMALL
+                    else
+                        # (experimental) while giving up directly
+                        # we give MadNLP.jl second chance to explore
+                        # some possibility at the current iterate
+
+                        fill!(solver.y, zero(T))
+                        fill!(solver.zl_r, one(T))
+                        fill!(solver.zu_r, one(T))
+                        empty!(solver.filter)
+                        push!(solver.filter,(solver.theta_max,-Inf))
+                        solver.cnt.k+=1
+
+                        return REGULAR
+                    end
+                end
             end
         end
 
+        # this implements the heuristics in Section 3.2 of Ipopt paper.
+        # Case I is only implemented
+        if unsuccessful_iterate
+            if (solver.cnt.unsuccessful_iterate += 1) >= 4
+                if solver.theta_max/10 > theta_trial
+                    @debug(solver.logger, "restarting filter")
+                    solver.theta_max /= 10
+                    empty!(solver.filter)
+                    push!(solver.filter,(solver.theta_max,-Inf))
+                end
+                solver.cnt.unsuccessful_iterate = 0
+            end
+        else
+            solver.cnt.unsuccessful_iterate = 0
+        end
+
         @trace(solver.logger,"Updating primal-dual variables.")
         copyto!(full(solver.x), full(solver.x_trial))
         copyto!(solver.c, solver.c_trial)
         solver.obj_val = solver.obj_val_trial
-        adjusted = adjust_boundary!(solver.x_lr,solver.xl_r,solver.x_ur,solver.xu_r,solver.mu)
-        adjusted > 0 &&
-            @warn(solver.logger,"In iteration $(solver.cnt.k), $adjusted Slack too small, adjusting variable bound")
+        adjust_boundary!(solver.x_lr,solver.xl_r,solver.x_ur,solver.xu_r,solver.mu)
 
         axpy!(solver.alpha,dual(solver.d),solver.y)
-        axpy!(solver.alpha_z, dual_lb(solver.d), solver.zl_r)
-        axpy!(solver.alpha_z, dual_ub(solver.d), solver.zu_r)
+
+        solver.zl_r .+= solver.alpha_z .* dual_lb(solver.d)
+        solver.zu_r .+= solver.alpha_z .* dual_ub(solver.d)
         reset_bound_dual!(
             primal(solver.zl),
             primal(solver.x),
@@ -355,6 +401,7 @@ function regular!(solver::AbstractMadNLPSolver{T}) where T
             primal(solver.x),
             solver.mu,solver.opt.kappa_sigma,
         )
+
         eval_grad_f_wrapper!(solver, solver.f,solver.x)
 
         if !switching_condition || !armijo_condition
@@ -363,11 +410,12 @@ function regular!(solver::AbstractMadNLPSolver{T}) where T
         end
 
         solver.cnt.k+=1
-        @trace(solver.logger,"Proceeding to the next interior point iteration.")
+            @trace(solver.logger,"Proceeding to the next interior point iteration.")
     end
 end
 
-function restore!(solver::AbstractMadNLPSolver)
+
+function restore!(solver::AbstractMadNLPSolver{T}) where T
     solver.del_w = 0
     # Backup the previous primal iterate
     copyto!(primal(solver._w1), full(solver.x))
@@ -388,10 +436,8 @@ function restore!(solver::AbstractMadNLPSolver)
         solver.zu_r,
         solver.mu,
     )
-    solver.cnt.t = 0
-    solver.alpha_z = 0.0
+    solver.alpha_z = zero(T)
     solver.ftype = "R"
-
     while true
         alpha_max = get_alpha_max(
             primal(solver.x),
@@ -407,8 +453,8 @@ function restore!(solver::AbstractMadNLPSolver)
 
         axpy!(solver.alpha, primal(solver.d), full(solver.x))
         axpy!(solver.alpha, dual(solver.d), solver.y)
-        axpy!(solver.alpha, dual_lb(solver.d), solver.zl_r)
-        axpy!(solver.alpha, dual_ub(solver.d), solver.zu_r)
+        solver.zl_r .+= solver.alpha .* dual_lb(solver.d)
+        solver.zu_r .+= solver.alpha .* dual_ub(solver.d)
 
         eval_cons_wrapper!(solver,solver.c,solver.x)
         eval_grad_f_wrapper!(solver,solver.f,solver.x)
@@ -438,10 +484,7 @@ function restore!(solver::AbstractMadNLPSolver)
             return ROBUST
         end
 
-        adjusted = adjust_boundary!(solver.x_lr,solver.xl_r,solver.x_ur,solver.xu_r,solver.mu)
-        adjusted > 0 &&
-            @warn(solver.logger,"In iteration $(solver.cnt.k), $adjusted Slack too small, adjusting variable bound")
-
+        adjust_boundary!(solver.x_lr,solver.xl_r,solver.x_ur,solver.xu_r,solver.mu)
 
         F = F_trial
 
@@ -466,7 +509,7 @@ function restore!(solver::AbstractMadNLPSolver)
             sd,
         )
 
-        solver.inf_compl = get_inf_compl(solver.x_lr,solver.xl_r,solver.zl_r,solver.xu_r,solver.x_ur,solver.zu_r,0.,sc)
+        solver.inf_compl = get_inf_compl(solver.x_lr,solver.xl_r,solver.zl_r,solver.xu_r,solver.x_ur,solver.zu_r,zero(T),sc)
         inf_compl_mu = get_inf_compl(solver.x_lr,solver.xl_r,solver.zl_r,solver.xu_r,solver.x_ur,solver.zu_r,solver.mu,sc)
         print_iter(solver)
 
@@ -476,8 +519,9 @@ function restore!(solver::AbstractMadNLPSolver)
 
         dual_inf_perturbation!(primal(solver.p),solver.ind_llb,solver.ind_uub,solver.mu,solver.opt.kappa_d)
         factorize_wrapper!(solver)
-        solve_refine_wrapper!(solver,solver.d,solver.p)
-        finish_aug_solve!(solver, solver.kkt, solver.mu)
+        solve_refine_wrapper!(
+            solver.d, solver, solver.p, solver._w4
+        )
 
         solver.ftype = "f"
     end
@@ -491,14 +535,6 @@ function robust!(solver::MadNLPSolver{T}) where T
             eval_jac_wrapper!(solver, solver.kkt, solver.x)
         end
         jtprod!(solver.jacl, solver.kkt, solver.y)
-        fixed_variable_treatment_vec!(solver.jacl,solver.ind_fixed)
-        fixed_variable_treatment_z!(
-            full(solver.zl),
-            full(solver.zu),
-            full(solver.f),
-            solver.jacl,
-            solver.ind_fixed,
-        )
 
         # evaluate termination criteria
         @trace(solver.logger,"Evaluating restoration phase termination criteria.")
@@ -512,13 +548,13 @@ function robust!(solver::MadNLPSolver{T}) where T
             solver.jacl,
             sd,
         )
-        solver.inf_compl = get_inf_compl(solver.x_lr,solver.xl_r,solver.zl_r,solver.xu_r,solver.x_ur,solver.zu_r,0.,sc)
+        solver.inf_compl = get_inf_compl(solver.x_lr,solver.xl_r,solver.zl_r,solver.xu_r,solver.x_ur,solver.zu_r,zero(T),sc)
 
         # Robust restoration phase error
         RR.inf_pr_R = get_inf_pr_R(solver.c,RR.pp,RR.nn)
         RR.inf_du_R = get_inf_du_R(RR.f_R,solver.y,primal(solver.zl),primal(solver.zu),solver.jacl,RR.zp,RR.zn,solver.opt.rho,sd)
         RR.inf_compl_R = get_inf_compl_R(
-            solver.x_lr,solver.xl_r,solver.zl_r,solver.xu_r,solver.x_ur,solver.zu_r,RR.pp,RR.zp,RR.nn,RR.zn,0.,sc)
+            solver.x_lr,solver.xl_r,solver.zl_r,solver.xu_r,solver.x_ur,solver.zu_r,RR.pp,RR.zp,RR.nn,RR.zn,zero(T),sc)
         inf_compl_mu_R = get_inf_compl_R(
             solver.x_lr,solver.xl_r,solver.zl_r,solver.xu_r,solver.x_ur,solver.zu_r,RR.pp,RR.zp,RR.nn,RR.zn,RR.mu_R,sc)
 
@@ -528,13 +564,12 @@ function robust!(solver::MadNLPSolver{T}) where T
         solver.cnt.k>=solver.opt.max_iter && return MAXIMUM_ITERATIONS_EXCEEDED
         time()-solver.cnt.start_time>=solver.opt.max_wall_time && return MAXIMUM_WALLTIME_EXCEEDED
 
-
         # update the barrier parameter
         @trace(solver.logger,"Updating restoration phase barrier parameter.")
-        while RR.mu_R >= solver.opt.mu_min*100 &&
+        while RR.mu_R >= solver.opt.mu_min &&
             max(RR.inf_pr_R,RR.inf_du_R,inf_compl_mu_R) <= solver.opt.barrier_tol_factor*RR.mu_R
             RR.mu_R = get_mu(RR.mu_R,solver.opt.mu_min,
-                            solver.opt.mu_linear_decrease_factor,solver.opt.mu_superlinear_decrease_power,solver.opt.tol)
+                             solver.opt.mu_linear_decrease_factor,solver.opt.mu_superlinear_decrease_power,solver.opt.tol)
             inf_compl_mu_R = get_inf_compl_R(
                 solver.x_lr,solver.xl_r,solver.zl_r,solver.xu_r,solver.x_ur,solver.zu_r,RR.pp,RR.zp,RR.nn,RR.zn,RR.mu_R,sc)
             RR.tau_R= max(solver.opt.tau_min,1-RR.mu_R)
@@ -549,15 +584,18 @@ function robust!(solver::MadNLPSolver{T}) where T
             eval_lag_hess_wrapper!(solver, solver.kkt, solver.x, solver.y; is_resto=true)
         end
         set_aug_RR!(solver.kkt, solver, RR)
-        set_aug_rhs_RR!(solver, solver.kkt, RR, solver.opt.rho)
 
         # without inertia correction,
         @trace(solver.logger,"Solving restoration phase primal-dual system.")
-        factorize_wrapper!(solver)
-        solve_refine_wrapper!(solver,solver.d,solver.p)
+        set_aug_rhs_RR!(solver, solver.kkt, RR, solver.opt.rho)
 
-        finish_aug_solve!(solver, solver.kkt, RR.mu_R)
-        finish_aug_solve_RR!(RR.dpp,RR.dnn,RR.dzp,RR.dzn,solver.y,dual(solver.d),RR.pp,RR.nn,RR.zp,RR.zn,RR.mu_R,solver.opt.rho)
+        inertia_correction!(solver.inertia_corrector, solver) || return RESTORATION_FAILED
+
+
+        finish_aug_solve_RR!(
+            RR.dpp,RR.dnn,RR.dzp,RR.dzn,solver.y,dual(solver.d),
+            RR.pp,RR.nn,RR.zp,RR.zn,RR.mu_R,solver.opt.rho
+        )
 
 
         theta_R = get_theta_R(solver.c,RR.pp,RR.nn)
@@ -587,8 +625,8 @@ function robust!(solver::MadNLPSolver{T}) where T
         @trace(solver.logger,"Backtracking line search initiated.")
         solver.alpha = alpha_max
         solver.cnt.l = 1
-        theta_R_trial = 0.
-        varphi_R_trial = 0.
+        theta_R_trial = zero(T)
+        varphi_R_trial = zero(T)
         small_search_norm = get_rel_search_norm(primal(solver.x), primal(solver.d)) < 10*eps(T)
         switching_condition = is_switching(varphi_d_R,solver.alpha,solver.opt.s_phi,solver.opt.delta,theta_R,solver.opt.s_theta)
         armijo_condition = false
@@ -608,7 +646,7 @@ function robust!(solver::MadNLPSolver{T}) where T
             varphi_R_trial = get_varphi_R(
                 RR.obj_val_R_trial,solver.x_trial_lr,solver.xl_r,solver.xu_r,solver.x_trial_ur,RR.pp_trial,RR.nn_trial,RR.mu_R)
 
-            armijo_condition = is_armijo(varphi_R_trial,varphi_R,0.,solver.alpha,varphi_d_R) #####
+            armijo_condition = is_armijo(varphi_R_trial,varphi_R,solver.opt.eta_phi,solver.alpha,varphi_d_R)
 
             small_search_norm && break
             solver.ftype = get_ftype(
@@ -622,7 +660,23 @@ function robust!(solver::MadNLPSolver{T}) where T
             solver.cnt.l += 1
             if solver.alpha < alpha_min
                 @debug(solver.logger,"Restoration phase cannot find an acceptable step at iteration $(solver.cnt.k).")
-                return RESTORATION_FAILED
+                if (solver.cnt.restoration_fail_count += 1) >= 4
+                    return RESTORATION_FAILED
+                else
+                    # (experimental) while giving up directly
+                    # we give MadNLP.jl second chance to explore
+                    # some possibility at the current iterate
+
+                    fill!(solver.y, zero(T))
+                    fill!(solver.zl_r, one(T))
+                    fill!(solver.zu_r, one(T))
+                    empty!(solver.filter)
+                    push!(solver.filter,(solver.theta_max,-Inf))
+
+                    solver.cnt.k+=1
+                    solver.cnt.t+=1
+                    return REGULAR
+                end
             else
                 @trace(solver.logger,"Step rejected; proceed with the next trial step.")
                 solver.alpha < eps(T)*10 && return solver.cnt.acceptable_cnt >0 ?
@@ -640,11 +694,12 @@ function robust!(solver::MadNLPSolver{T}) where T
         set_f_RR!(solver,RR)
 
         axpy!(solver.alpha, dual(solver.d), solver.y)
-        axpy!(solver.alpha_z, dual_lb(solver.d),solver.zl_r)
-        axpy!(solver.alpha_z, dual_ub(solver.d),solver.zu_r)
         axpy!(solver.alpha_z, RR.dzp,RR.zp)
         axpy!(solver.alpha_z, RR.dzn,RR.zn)
 
+        solver.zl_r .+= solver.alpha_z .* dual_lb(solver.d)
+        solver.zu_r .+= solver.alpha_z .* dual_ub(solver.d)
+
         reset_bound_dual!(
             primal(solver.zl),
             primal(solver.x),
@@ -660,9 +715,7 @@ function robust!(solver::MadNLPSolver{T}) where T
         reset_bound_dual!(RR.zp,RR.pp,RR.mu_R,solver.opt.kappa_sigma)
         reset_bound_dual!(RR.zn,RR.nn,RR.mu_R,solver.opt.kappa_sigma)
 
-        adjusted = adjust_boundary!(solver.x_lr,solver.xl_r,solver.x_ur,solver.xu_r,solver.mu)
-        adjusted > 0 &&
-            @warn(solver.logger,"In iteration $(solver.cnt.k), $adjusted Slack too small, adjusting variable bound")
+        adjust_boundary!(solver.x_lr,solver.xl_r,solver.x_ur,solver.xu_r,solver.mu)
 
         if !switching_condition || !armijo_condition
             @trace(solver.logger,"Augmenting restoration phase filter.")
@@ -680,20 +733,21 @@ function robust!(solver::MadNLPSolver{T}) where T
             theta <= solver.opt.required_infeasibility_reduction * RR.theta_ref
 
             @trace(solver.logger,"Going back to the regular phase.")
-            solver.zl_r.=1
-            solver.zu_r.=1
-
             set_initial_rhs!(solver, solver.kkt)
             initialize!(solver.kkt)
 
             factorize_wrapper!(solver)
-            solve_refine_wrapper!(solver,solver.d,solver.p)
+            solve_refine_wrapper!(
+                solver.d, solver, solver.p, solver._w4
+            )
             if norm(dual(solver.d), Inf)>solver.opt.constr_mult_init_max
-                fill!(solver.y, 0.0)
+                fill!(solver.y, zero(T))
             else
                 copyto!(solver.y, dual(solver.d))
             end
+
             solver.cnt.k+=1
+            solver.cnt.t+=1
 
             return REGULAR
         end
@@ -707,91 +761,179 @@ function robust!(solver::MadNLPSolver{T}) where T
     end
 end
 
-function inertia_based_reg(solver::MadNLPSolver)
+function second_order_correction(solver::AbstractMadNLPSolver,alpha_max,theta,varphi,
+                                 theta_trial,varphi_d,switching_condition::Bool)
+    @trace(solver.logger,"Second-order correction started.")
+
+    wx = primal(solver._w1)
+    wy = dual(solver._w1)
+    copyto!(wy, solver.c_trial)
+    axpy!(alpha_max, solver.c, wy)
+
+    theta_soc_old = theta_trial
+    for p=1:solver.opt.max_soc
+        # compute second order correction
+        set_aug_rhs!(solver, solver.kkt, wy)
+        dual_inf_perturbation!(
+            primal(solver.p),
+            solver.ind_llb,solver.ind_uub,solver.mu,solver.opt.kappa_d,
+        )
+        solve_refine_wrapper!(
+            solver._w1, solver, solver.p, solver._w4
+        )
+        alpha_soc = get_alpha_max(
+            primal(solver.x),
+            primal(solver.xl),
+            primal(solver.xu),
+            wx,solver.tau
+        )
+
+        copyto!(primal(solver.x_trial), primal(solver.x))
+        axpy!(alpha_soc, wx, primal(solver.x_trial))
+        eval_cons_wrapper!(solver, solver.c_trial, solver.x_trial)
+        solver.obj_val_trial = eval_f_wrapper(solver, solver.x_trial)
+
+        theta_soc = get_theta(solver.c_trial)
+        varphi_soc= get_varphi(solver.obj_val_trial,solver.x_trial_lr,solver.xl_r,solver.xu_r,solver.x_trial_ur,solver.mu)
+
+        !is_filter_acceptable(solver.filter,theta_soc,varphi_soc) && break
+
+        if theta <=solver.theta_min && switching_condition
+            # Case I
+            if is_armijo(varphi_soc,varphi,solver.opt.eta_phi,solver.alpha,varphi_d)
+                @trace(solver.logger,"Step in second order correction accepted by armijo condition.")
+                solver.ftype = "F"
+                solver.alpha=alpha_soc
+                return true
+            end
+        else
+            # Case II
+            if is_sufficient_progress(theta_soc,theta,solver.opt.gamma_theta,varphi_soc,varphi,solver.opt.gamma_phi,has_constraints(solver))
+                @trace(solver.logger,"Step in second order correction accepted by sufficient progress.")
+                solver.ftype = "H"
+                solver.alpha=alpha_soc
+                return true
+            end
+        end
+
+        theta_soc>solver.opt.kappa_soc*theta_soc_old && break
+        theta_soc_old = theta_soc
+    end
+    @trace(solver.logger,"Second-order correction terminated.")
+
+    return false
+end
+
+
+function inertia_correction!(
+    inertia_corrector::InertiaBased,
+    solver::MadNLPSolver{T}
+    ) where {T}
+
+    n_trial = 0
+    solver.del_w = del_w_prev = zero(T)
+
     @trace(solver.logger,"Inertia-based regularization started.")
 
     factorize_wrapper!(solver)
-    num_pos,num_zero,num_neg = inertia(solver.linear_solver)
-    solve_status = num_zero!= 0 ? false : solve_refine_wrapper!(solver,solver.d,solver.p)
 
-    n_trial = 0
-    solver.del_w = del_w_prev = 0.0
-    while !is_inertia_correct(solver.kkt, num_pos, num_zero, num_neg) || !solve_status
+    num_pos,num_zero,num_neg = inertia(solver.kkt.linear_solver)
+
+
+    solve_status = !is_inertia_correct(solver.kkt, num_pos, num_zero, num_neg) ?
+        false : solve_refine_wrapper!(
+            solver.d, solver, solver.p, solver._w4,
+        )
+
+
+    while !solve_status
         @debug(solver.logger,"Primal-dual perturbed.")
-        if solver.del_w == 0.0
-            solver.del_w = solver.del_w_last==0. ? solver.opt.first_hessian_perturbation :
+
+        if n_trial == 0
+            solver.del_w = solver.del_w_last==zero(T) ? solver.opt.first_hessian_perturbation :
                 max(solver.opt.min_hessian_perturbation,solver.opt.perturb_dec_fact*solver.del_w_last)
         else
-            solver.del_w*= solver.del_w_last==0. ? solver.opt.perturb_inc_fact_first : solver.opt.perturb_inc_fact
-            if solver.del_w>solver.opt.max_hessian_perturbation solver.cnt.k+=1
+            solver.del_w*= solver.del_w_last==zero(T) ? solver.opt.perturb_inc_fact_first : solver.opt.perturb_inc_fact
+            if solver.del_w>solver.opt.max_hessian_perturbation
+                solver.cnt.k+=1
                 @debug(solver.logger,"Primal regularization is too big. Switching to restoration phase.")
                 return false
             end
         end
-        solver.del_c = (num_zero == 0 || !solve_status) ?
-            solver.opt.jacobian_regularization_value * solver.mu^(solver.opt.jacobian_regularization_exponent) : 0.
+        solver.del_c = num_neg == 0 ? zero(T) : solver.opt.jacobian_regularization_value * solver.mu^(solver.opt.jacobian_regularization_exponent)
         regularize_diagonal!(solver.kkt, solver.del_w - del_w_prev, solver.del_c)
         del_w_prev = solver.del_w
 
         factorize_wrapper!(solver)
-        num_pos,num_zero,num_neg = inertia(solver.linear_solver)
-        solve_status = num_zero!= 0 ? false : solve_refine_wrapper!(solver,solver.d,solver.p)
+        num_pos,num_zero,num_neg = inertia(solver.kkt.linear_solver)
+
+        solve_status = !is_inertia_correct(solver.kkt, num_pos, num_zero, num_neg) ?
+            false : solve_refine_wrapper!(
+                solver.d, solver, solver.p, solver._w4
+            )
         n_trial += 1
     end
-    solver.del_w != 0 && (solver.del_w_last = solver.del_w)
 
+    solver.del_w != 0 && (solver.del_w_last = solver.del_w)
     return true
 end
 
-function inertia_free_reg(solver::MadNLPSolver)
+function inertia_correction!(
+    inertia_corrector::InertiaFree,
+    solver::MadNLPSolver{T}
+    ) where T
+
+    n_trial = 0
+    solver.del_w = del_w_prev = zero(T)
 
     @trace(solver.logger,"Inertia-free regularization started.")
     dx = primal(solver.d)
-    p0 = solver._w1
-    d0 = solver._w2
-    t = primal(solver._w3)
-    n = primal(solver._w2)
-    wx= primal(solver._w4)
-    g = full(solver.x_trial) # just to avoid new allocation
-
-    fill!(dual(solver._w3), 0)
-    set_g_ifr!(solver,g)
+    p0 = inertia_corrector.p0
+    d0 = inertia_corrector.d0
+    t = inertia_corrector.t
+    n = primal(d0)
+    wx= inertia_corrector.wx
+    g = inertia_corrector.g
 
-    fixed_variable_treatment_vec!(primal(solver._w1), solver.ind_fixed)
-    fixed_variable_treatment_vec!(primal(solver.p),   solver.ind_fixed)
-    fixed_variable_treatment_vec!(g, solver.ind_fixed)
+    set_g_ifr!(solver,g)
+    set_aug_rhs_ifr!(solver, solver.kkt, p0)
 
     factorize_wrapper!(solver)
-    solve_status = (solve_refine_wrapper!(solver,d0,p0) && solve_refine_wrapper!(solver,solver.d,solver.p))
+
+    solve_status = solve_refine_wrapper!(
+        d0, solver, p0, solver._w3,
+    ) && solve_refine_wrapper!(
+        solver.d, solver, solver.p, solver._w4,
+    )
     copyto!(t,dx)
     axpy!(-1.,n,t)
-    mul!(solver._w4, solver.kkt, solver._w3) # prepartation for curv_test
-    n_trial = 0
-    solver.del_w = del_w_prev = 0.
 
-    while !curv_test(t,n,g,wx,solver.opt.inertia_free_tol)  || !solve_status
+    while !curv_test(t,n,g,solver.kkt,wx,solver.opt.inertia_free_tol)  || !solve_status
         @debug(solver.logger,"Primal-dual perturbed.")
         if n_trial == 0
             solver.del_w = solver.del_w_last==.0 ? solver.opt.first_hessian_perturbation :
                 max(solver.opt.min_hessian_perturbation,solver.opt.perturb_dec_fact*solver.del_w_last)
         else
             solver.del_w*= solver.del_w_last==.0 ? solver.opt.perturb_inc_fact_first : solver.opt.perturb_inc_fact
-            if solver.del_w>solver.opt.max_hessian_perturbation solver.cnt.k+=1
+            if solver.del_w>solver.opt.max_hessian_perturbation
+                solver.cnt.k+=1
                 @debug(solver.logger,"Primal regularization is too big. Switching to restoration phase.")
                 return false
             end
         end
-        solver.del_c = !solve_status ?
-            solver.opt.jacobian_regularization_value * solver.mu^(solver.opt.jacobian_regularization_exponent) : 0.
+        solver.del_c = solver.opt.jacobian_regularization_value * solver.mu^(solver.opt.jacobian_regularization_exponent)
         regularize_diagonal!(solver.kkt, solver.del_w - del_w_prev, solver.del_c)
         del_w_prev = solver.del_w
 
         factorize_wrapper!(solver)
-        solve_status = (solve_refine_wrapper!(solver,d0,p0) && solve_refine_wrapper!(solver,solver.d,solver.p))
+        solve_status = solve_refine_wrapper!(
+            d0, solver, p0, solver._w3
+        ) && solve_refine_wrapper!(
+            solver.d, solver, solver.p, solver._w4
+        )
         copyto!(t,dx)
         axpy!(-1.,n,t)
 
-        mul!(solver._w4, solver.kkt, solver._w3) # prepartation for curv_test
         n_trial += 1
     end
 
@@ -799,66 +941,49 @@ function inertia_free_reg(solver::MadNLPSolver)
     return true
 end
 
-curv_test(t,n,g,wx,inertia_free_tol) = dot(wx,t) + max(dot(wx,n)-dot(g,n),0) - inertia_free_tol*dot(t,t) >=0
-
-function second_order_correction(solver::AbstractMadNLPSolver,alpha_max,theta,varphi,
-                                 theta_trial,varphi_d,switching_condition::Bool)
-    @trace(solver.logger,"Second-order correction started.")
-
-    wx = primal(solver._w1)
-    wy = dual(solver._w1)
-    copyto!(wy, solver.c_trial)
-    axpy!(alpha_max, solver.c, wy)
+function inertia_correction!(
+    inertia_corrector::InertiaIgnore,
+    solver::MadNLPSolver{T}
+    ) where T
 
-    theta_soc_old = theta_trial
-    for p=1:solver.opt.max_soc
-        # compute second order correction
-        set_aug_rhs!(solver, solver.kkt, wy)
-        dual_inf_perturbation!(
-            primal(solver.p),
-            solver.ind_llb,solver.ind_uub,solver.mu,solver.opt.kappa_d,
-        )
-        solve_refine_wrapper!(solver,solver._w1,solver.p)
-        alpha_soc = get_alpha_max(
-            primal(solver.x),
-            primal(solver.xl),
-            primal(solver.xu),
-            wx,solver.tau)
-
-        copyto!(primal(solver.x_trial), primal(solver.x))
-        axpy!(alpha_soc, wx, primal(solver.x_trial))
-        eval_cons_wrapper!(solver, solver.c_trial, solver.x_trial)
-        solver.obj_val_trial = eval_f_wrapper(solver, solver.x_trial)
+    n_trial = 0
+    solver.del_w = del_w_prev = zero(T)
 
-        theta_soc = get_theta(solver.c_trial)
-        varphi_soc= get_varphi(solver.obj_val_trial,solver.x_trial_lr,solver.xl_r,solver.xu_r,solver.x_trial_ur,solver.mu)
+    @trace(solver.logger,"Inertia-based regularization started.")
 
-        !is_filter_acceptable(solver.filter,theta_soc,varphi_soc) && break
+    factorize_wrapper!(solver)
 
-        if theta <=solver.theta_min && switching_condition
-            # Case I
-            if is_armijo(varphi_soc,varphi,solver.opt.eta_phi,solver.alpha,varphi_d)
-                @trace(solver.logger,"Step in second order correction accepted by armijo condition.")
-                solver.ftype = "F"
-                solver.alpha=alpha_soc
-                return true
-            end
+    solve_status = solve_refine_wrapper!(
+        solver.d, solver, solver.p, solver._w4,
+    )
+    while !solve_status
+        @debug(solver.logger,"Primal-dual perturbed.")
+        if n_trial == 0
+            solver.del_w = solver.del_w_last==zero(T) ? solver.opt.first_hessian_perturbation :
+                max(solver.opt.min_hessian_perturbation,solver.opt.perturb_dec_fact*solver.del_w_last)
         else
-            # Case II
-            if is_sufficient_progress(theta_soc,theta,solver.opt.gamma_theta,varphi_soc,varphi,solver.opt.gamma_phi,has_constraints(solver))
-                @trace(solver.logger,"Step in second order correction accepted by sufficient progress.")
-                solver.ftype = "H"
-                solver.alpha=alpha_soc
-                return true
+            solver.del_w*= solver.del_w_last==zero(T) ? solver.opt.perturb_inc_fact_first : solver.opt.perturb_inc_fact
+            if solver.del_w>solver.opt.max_hessian_perturbation
+                solver.cnt.k+=1
+                @debug(solver.logger,"Primal regularization is too big. Switching to restoration phase.")
+                return false
             end
         end
+        solver.del_c = solver.opt.jacobian_regularization_value * solver.mu^(solver.opt.jacobian_regularization_exponent)
+        regularize_diagonal!(solver.kkt, solver.del_w - del_w_prev, solver.del_c)
+        del_w_prev = solver.del_w
 
-        theta_soc>solver.opt.kappa_soc*theta_soc_old && break
-        theta_soc_old = theta_soc
+        factorize_wrapper!(solver)
+        solve_status = solve_refine_wrapper!(
+            solver.d, solver, solver.p, solver._w4
+        )
+        n_trial += 1
     end
-    @trace(solver.logger,"Second-order correction terminated.")
-
-    return false
+    solver.del_w != 0 && (solver.del_w_last = solver.del_w)
+    return true
 end
 
-
+function curv_test(t,n,g,kkt,wx,inertia_free_tol)
+    mul_hess_blk!(wx, kkt, t)
+    dot(wx,t) + max(dot(wx,n)-dot(g,n),0) - inertia_free_tol*dot(t,t) >=0
+end
diff --git a/src/IPM/utils.jl b/src/IPM/utils.jl
index 43e04569..834a67af 100644
--- a/src/IPM/utils.jl
+++ b/src/IPM/utils.jl
@@ -1,42 +1,66 @@
-mutable struct MadNLPExecutionStats{T} <: AbstractExecutionStats
+"""
+    MadNLPExecutionStats{T, VT} <: AbstractExecutionStats
+
+Store the results returned by MadNLP once the interior-point
+algorithm has terminated.
+
+"""
+mutable struct MadNLPExecutionStats{T, VT} <: AbstractExecutionStats
+    options::MadNLPOptions
     status::Status
-    solution::Vector{T}
+    solution::VT
     objective::T
-    constraints::Vector{T}
+    constraints::VT
     dual_feas::T
     primal_feas::T
-    multipliers::Vector{T}
-    multipliers_L::Vector{T}
-    multipliers_U::Vector{T}
+    multipliers::VT
+    multipliers_L::VT
+    multipliers_U::VT
     iter::Int
-    counters::NLPModels.Counters
-    elapsed_time::Real
+    counters::MadNLPCounters
 end
 
 MadNLPExecutionStats(solver::MadNLPSolver) =MadNLPExecutionStats(
+    solver.opt,
     solver.status,
-    primal(solver.x),
-    solver.obj_val,solver.c,
-    solver.inf_du, solver.inf_pr,
-    solver.y,
-    primal(solver.zl),
-    primal(solver.zu),
-    solver.cnt.k, get_counters(solver.nlp),solver.cnt.total_time
+    primal(solver.x)[1:get_nvar(solver.nlp)],
+    solver.obj_val / solver.cb.obj_scale[],
+    solver.c ./ solver.cb.con_scale,
+    solver.inf_du,
+    solver.inf_pr,
+    copy(solver.y),
+    primal(solver.zl)[1:get_nvar(solver.nlp)],
+    primal(solver.zu)[1:get_nvar(solver.nlp)],
+    0,
+    solver.cnt,
 )
 
 function update!(stats::MadNLPExecutionStats, solver::MadNLPSolver)
     stats.status = solver.status
-    stats.objective = solver.obj_val
+    stats.solution .= @view(primal(solver.x)[1:get_nvar(solver.nlp)])
+    stats.multipliers .= solver.y
+    stats.multipliers_L .= @view(primal(solver.zl)[1:get_nvar(solver.nlp)])
+    stats.multipliers_U .= @view(primal(solver.zu)[1:get_nvar(solver.nlp)])
+    # stats.solution .= min.(
+    #     max.(
+    #         @view(primal(solver.x)[1:get_nvar(solver.nlp)]),
+    #         get_lvar(solver.nlp)
+    #     ),
+    #     get_uvar(solver.nlp)
+    # )
+    stats.objective = solver.obj_val / solver.cb.obj_scale[]
+    stats.constraints .= solver.c ./ solver.cb.con_scale .+ solver.rhs
+    stats.constraints[solver.ind_ineq] .+= slack(solver.x)
     stats.dual_feas = solver.inf_du
     stats.primal_feas = solver.inf_pr
+    update_z!(solver.cb, stats.multipliers_L, stats.multipliers_U, solver.jacl)
     stats.iter = solver.cnt.k
-    stats.elapsed_time = solver.cnt.total_time
     return stats
 end
 
 get_counters(nlp::NLPModels.AbstractNLPModel) = nlp.counters
 get_counters(nlp::NLPModels.AbstractNLSModel) = nlp.counters.counters
-getStatus(result::MadNLPExecutionStats) = STATUS_OUTPUT_DICT[result.status]
+getStatus(result::MadNLPExecutionStats) = get_status_output(result.status, result.options)
 
 # Exceptions
 struct InvalidNumberException <: Exception
@@ -48,18 +72,16 @@ struct NotEnoughDegreesOfFreedomException <: Exception end
 has_constraints(solver) = solver.m != 0
 
 function get_vars_info(solver)
-    x_lb = get_lvar(solver.nlp)
-    x_ub = get_uvar(solver.nlp)
+    nlp = solver.nlp
+
+    x_lb = get_lvar(nlp)
+    x_ub = get_uvar(nlp)
     num_fixed = length(solver.ind_fixed)
-    num_var = get_nvar(solver.nlp) - num_fixed
+    num_var = get_nvar(nlp) - num_fixed
     num_llb_vars = length(solver.ind_llb)
-    num_lu_vars = -num_fixed
-    # Number of bounded variables
-    for i in 1:get_nvar(solver.nlp)
-        if (x_lb[i] > -Inf) && (x_ub[i] < Inf)
-            num_lu_vars += 1
-        end
-    end
+
+    # TODO make this non-allocating
+    num_lu_vars = sum((x_lb .!=-Inf) .& (x_ub .!= Inf)) - num_fixed
     num_uub_vars = length(solver.ind_uub)
     return (
         n_free=num_var,
@@ -71,26 +93,18 @@ function get_vars_info(solver)
 end
 
 function get_cons_info(solver)
-    g_lb = get_lcon(solver.nlp)
-    g_ub = get_ucon(solver.nlp)
-    # Classify constraints
-    num_eq_cons, num_ineq_cons = 0, 0
-    num_ue_cons, num_le_cons, num_lu_cons = 0, 0, 0
-    for i in 1:get_ncon(solver.nlp)
-        l, u = g_lb[i], g_ub[i]
-        if l == u
-            num_eq_cons += 1
-        elseif l < u
-            num_ineq_cons += 1
-            if isinf(l) && isfinite(u)
-                num_ue_cons += 1
-            elseif isfinite(l) && isinf(u)
-                num_le_cons +=1
-            else isfinite(l) && isfinite(u)
-                num_lu_cons += 1
-            end
-        end
-    end
+    nlp = solver.nlp
+
+    g_lb = get_lcon(nlp)
+    g_ub = get_ucon(nlp)
+
+    # TODO make this non-allocating
+    num_eq_cons = sum(g_lb .== g_ub)
+    num_ineq_cons = length(g_lb) - num_eq_cons
+    num_le_cons = sum((g_lb .!= -Inf) .& (g_ub .==  Inf))
+    num_ue_cons = sum((g_ub .!=  Inf) .& (g_lb .== -Inf))
+    num_lu_cons = num_ineq_cons - num_le_cons - num_ue_cons
+
     return (
         n_eq=num_eq_cons,
         n_ineq=num_ineq_cons,
@@ -124,6 +138,7 @@ function print_init(solver::AbstractMadNLPSolver)
 end
 
 function print_iter(solver::AbstractMadNLPSolver;is_resto=false)
+    obj_scale = solver.cb.obj_scale[]
     mod(solver.cnt.k,10)==0&& @info(solver.logger,@sprintf(
         "iter    objective    inf_pr   inf_du lg(mu)  ||d||  lg(rg) alpha_du alpha_pr  ls"))
     if is_resto
@@ -138,7 +153,7 @@ function print_iter(solver::AbstractMadNLPSolver;is_resto=false)
     end
     @info(solver.logger,@sprintf(
         "%4i%s% 10.7e %6.2e %6.2e %5.1f %6.2e %s %6.2e %6.2e%s  %i",
-        solver.cnt.k,is_resto ? "r" : " ",solver.obj_val/solver.obj_scale[],
+        solver.cnt.k,is_resto ? "r" : " ",solver.obj_val/obj_scale,
         inf_pr, inf_du, mu,
         solver.cnt.k == 0 ? 0. : norm(primal(solver.d),Inf),
         solver.del_w == 0 ? "   - " : @sprintf("%5.1f",log(10,solver.del_w)),
@@ -146,23 +161,23 @@ function print_iter(solver::AbstractMadNLPSolver;is_resto=false)
     return
 end
 
-function print_summary_1(solver::AbstractMadNLPSolver)
+function print_summary(solver::AbstractMadNLPSolver)
+    # TODO inquire this from nlpmodel wrapper
+    obj_scale = solver.cb.obj_scale[]
+    solver.cnt.solver_time = solver.cnt.total_time-solver.cnt.linear_solver_time-solver.cnt.eval_function_time
+
     @notice(solver.logger,"")
     @notice(solver.logger,"Number of Iterations....: $(solver.cnt.k)\n")
     @notice(solver.logger,"                                   (scaled)                 (unscaled)")
-    @notice(solver.logger,@sprintf("Objective...............:  % 1.16e   % 1.16e",solver.obj_val,solver.obj_val/solver.obj_scale[]))
-    @notice(solver.logger,@sprintf("Dual infeasibility......:   %1.16e    %1.16e",solver.inf_du,solver.inf_du/solver.obj_scale[]))
+    @notice(solver.logger,@sprintf("Objective...............:  % 1.16e   % 1.16e",solver.obj_val,solver.obj_val/obj_scale))
+    @notice(solver.logger,@sprintf("Dual infeasibility......:   %1.16e    %1.16e",solver.inf_du,solver.inf_du/obj_scale))
     @notice(solver.logger,@sprintf("Constraint violation....:   %1.16e    %1.16e",norm(solver.c,Inf),solver.inf_pr))
     @notice(solver.logger,@sprintf("Complementarity.........:   %1.16e    %1.16e",
-                                solver.inf_compl*solver.obj_scale[],solver.inf_compl))
+                                solver.inf_compl*obj_scale,solver.inf_compl))
     @notice(solver.logger,@sprintf("Overall NLP error.......:   %1.16e    %1.16e\n",
-                                max(solver.inf_du*solver.obj_scale[],norm(solver.c,Inf),solver.inf_compl),
+                                max(solver.inf_du*obj_scale,norm(solver.c,Inf),solver.inf_compl),
                                 max(solver.inf_du,solver.inf_pr,solver.inf_compl)))
-    return
-end
 
-function print_summary_2(solver::AbstractMadNLPSolver)
-    solver.cnt.solver_time = solver.cnt.total_time-solver.cnt.linear_solver_time-solver.cnt.eval_function_time
     @notice(solver.logger,"Number of objective function evaluations             = $(solver.cnt.obj_cnt)")
     @notice(solver.logger,"Number of objective gradient evaluations             = $(solver.cnt.obj_grad_cnt)")
     @notice(solver.logger,"Number of constraint evaluations                     = $(solver.cnt.con_cnt)")
diff --git a/src/Interfaces/interfaces.jl b/src/Interfaces/interfaces.jl
deleted file mode 100644
index 217813c8..00000000
--- a/src/Interfaces/interfaces.jl
+++ /dev/null
@@ -1 +0,0 @@
-include("MOI_interface.jl")
diff --git a/src/Interfaces/utils.jl b/src/Interfaces/utils.jl
deleted file mode 100644
index 70bcfa87..00000000
--- a/src/Interfaces/utils.jl
+++ /dev/null
@@ -1,533 +0,0 @@
-# Copyright (c) 2013: Iain Dunning, Miles Lubin, and contributors
-#
-# Use of this source code is governed by an MIT-style license that can be found
-# in the LICENSE.md file or at https://opensource.org/licenses/MIT.
-
-# !!! warning
-#
-#     The contents of this file are experimental.
-#
-#     Until this message is removed, breaking changes to the functions and types,
-#     including their deletion, may be introduced in any minor or patch release of Ipopt.
-
-@enum(
-    _FunctionType,
-    _kFunctionTypeVariableIndex,
-    _kFunctionTypeScalarAffine,
-    _kFunctionTypeScalarQuadratic,
-)
-
-@enum(
-    _BoundType,
-    _kBoundTypeLessThan,
-    _kBoundTypeGreaterThan,
-    _kBoundTypeEqualTo,
-)
-
-mutable struct QPBlockData{T}
-    objective_type::_FunctionType
-    objective_constant::T
-    objective_linear_columns::Vector{Int}
-    objective_linear_coefficients::Vector{T}
-    objective_hessian_structure::Vector{Tuple{Int,Int}}
-    objective_hessian_coefficients::Vector{T}
-
-    linear_row_ends::Vector{Int}
-    linear_jacobian_structure::Vector{Tuple{Int,Int}}
-    linear_coefficients::Vector{T}
-
-    quadratic_row_ends::Vector{Int}
-    hessian_structure::Vector{Tuple{Int,Int}}
-    quadratic_coefficients::Vector{T}
-
-    g_L::Vector{T}
-    g_U::Vector{T}
-    mult_g::Vector{Union{Nothing,T}}
-    function_type::Vector{_FunctionType}
-    bound_type::Vector{_BoundType}
-
-    function QPBlockData{T}() where {T}
-        return new(
-            # Objective coefficients
-            _kFunctionTypeScalarAffine,
-            zero(T),
-            Int[],
-            T[],
-            Tuple{Int,Int}[],
-            T[],
-            # Linear constraints
-            Int[],
-            Tuple{Int,Int}[],
-            T[],
-            # Affine constraints
-            Int[],
-            Tuple{Int,Int}[],
-            T[],
-            # Bounds
-            T[],
-            T[],
-            Union{Nothing,T}[],
-            _FunctionType[],
-            _BoundType[],
-        )
-    end
-end
-
-Base.length(block::QPBlockData) = length(block.bound_type)
-
-function _set_objective(block::QPBlockData{T}, f::MOI.VariableIndex) where {T}
-    push!(block.objective_linear_columns, f.value)
-    push!(block.objective_linear_coefficients, one(T))
-    return zero(T)
-end
-
-function _set_objective(
-    block::QPBlockData{T},
-    f::MOI.ScalarAffineFunction{T},
-) where {T}
-    _set_objective(block, f.terms)
-    return f.constant
-end
-
-function _set_objective(
-    block::QPBlockData{T},
-    f::MOI.ScalarQuadraticFunction{T},
-) where {T}
-    _set_objective(block, f.affine_terms)
-    for term in f.quadratic_terms
-        i, j = term.variable_1.value, term.variable_2.value
-        push!(block.objective_hessian_structure, (i, j))
-        push!(block.objective_hessian_coefficients, term.coefficient)
-    end
-    return f.constant
-end
-
-function _set_objective(
-    block::QPBlockData{T},
-    terms::Vector{MOI.ScalarAffineTerm{T}},
-) where {T}
-    for term in terms
-        push!(block.objective_linear_columns, term.variable.value)
-        push!(block.objective_linear_coefficients, term.coefficient)
-    end
-    return
-end
-
-function MOI.set(
-    block::QPBlockData{T},
-    ::MOI.ObjectiveFunction{F},
-    func::F,
-) where {
-    T,
-    F<:Union{
-        MOI.VariableIndex,
-        MOI.ScalarAffineFunction{T},
-        MOI.ScalarQuadraticFunction{T},
-    },
-}
-    empty!(block.objective_hessian_structure)
-    empty!(block.objective_hessian_coefficients)
-    empty!(block.objective_linear_columns)
-    empty!(block.objective_linear_coefficients)
-    block.objective_constant = _set_objective(block, func)
-    block.objective_type = _function_info(func)
-    return
-end
-
-function MOI.get(block::QPBlockData{T}, ::MOI.ObjectiveFunctionType) where {T}
-    return _function_type_to_set(T, block.objective_type)
-end
-
-function MOI.get(block::QPBlockData{T}, ::MOI.ObjectiveFunction{F}) where {T,F}
-    affine_terms = MOI.ScalarAffineTerm{T}[
-        MOI.ScalarAffineTerm(
-            block.objective_linear_coefficients[i],
-            MOI.VariableIndex(x),
-        ) for (i, x) in enumerate(block.objective_linear_columns)
-    ]
-    quadratic_terms = MOI.ScalarQuadraticTerm{T}[]
-    for (i, coef) in enumerate(block.objective_hessian_coefficients)
-        r, c = block.objective_hessian_structure[i]
-        push!(
-            quadratic_terms,
-            MOI.ScalarQuadraticTerm(
-                coef,
-                MOI.VariableIndex(r),
-                MOI.VariableIndex(c),
-            ),
-        )
-    end
-    obj = MOI.ScalarQuadraticFunction(
-        quadratic_terms,
-        affine_terms,
-        block.objective_constant,
-    )
-    return convert(F, obj)
-end
-
-function MOI.get(
-    block::QPBlockData{T},
-    ::MOI.ListOfConstraintTypesPresent,
-) where {T}
-    constraints = Set{Tuple{Type,Type}}()
-    for i in 1:length(block)
-        F = _function_type_to_set(T, block.function_type[i])
-        S = _bound_type_to_set(T, block.bound_type[i])
-        push!(constraints, (F, S))
-    end
-    return collect(constraints)
-end
-
-function MOI.is_valid(
-    block::QPBlockData{T},
-    ci::MOI.ConstraintIndex{F,S},
-) where {
-    T,
-    F<:Union{MOI.ScalarAffineFunction{T},MOI.ScalarQuadraticFunction{T}},
-    S<:Union{MOI.LessThan{T},MOI.GreaterThan{T},MOI.EqualTo{T}},
-}
-    return 1 <= ci.value <= length(block)
-end
-
-function MOI.get(
-    block::QPBlockData{T},
-    ::MOI.ListOfConstraintIndices{F,S},
-) where {
-    T,
-    F<:Union{MOI.ScalarAffineFunction{T},MOI.ScalarQuadraticFunction{T}},
-    S<:Union{MOI.LessThan{T},MOI.GreaterThan{T},MOI.EqualTo{T}},
-}
-    ret = MOI.ConstraintIndex{F,S}[]
-    for i in 1:length(block)
-        if _bound_type_to_set(T, block.bound_type[i]) != S
-            continue
-        elseif _function_type_to_set(T, block.function_type[i]) != F
-            continue
-        end
-        push!(ret, MOI.ConstraintIndex{F,S}(i))
-    end
-    return ret
-end
-
-function MOI.get(
-    block::QPBlockData{T},
-    ::MOI.NumberOfConstraints{F,S},
-) where {
-    T,
-    F<:Union{MOI.ScalarAffineFunction{T},MOI.ScalarQuadraticFunction{T}},
-    S<:Union{MOI.LessThan{T},MOI.GreaterThan{T},MOI.EqualTo{T}},
-}
-    return length(MOI.get(block, MOI.ListOfConstraintIndices{F,S}()))
-end
-
-function _bound_type_to_set(::Type{T}, k::_BoundType) where {T}
-    if k == _kBoundTypeEqualTo
-        return MOI.EqualTo{T}
-    elseif k == _kBoundTypeLessThan
-        return MOI.LessThan{T}
-    else
-        @assert k == _kBoundTypeGreaterThan
-        return MOI.GreaterThan{T}
-    end
-end
-
-function _function_type_to_set(::Type{T}, k::_FunctionType) where {T}
-    if k == _kFunctionTypeVariableIndex
-        return MOI.VariableIndex
-    elseif k == _kFunctionTypeScalarAffine
-        return MOI.ScalarAffineFunction{T}
-    else
-        @assert k == _kFunctionTypeScalarQuadratic
-        return MOI.ScalarQuadraticFunction{T}
-    end
-end
-
-_function_info(::MOI.VariableIndex) = _kFunctionTypeVariableIndex
-_function_info(::MOI.ScalarAffineFunction) = _kFunctionTypeScalarAffine
-_function_info(::MOI.ScalarQuadraticFunction) = _kFunctionTypeScalarQuadratic
-
-_set_info(s::MOI.LessThan) = _kBoundTypeLessThan, -Inf, s.upper
-_set_info(s::MOI.GreaterThan) = _kBoundTypeGreaterThan, s.lower, Inf
-_set_info(s::MOI.EqualTo) = _kBoundTypeEqualTo, s.value, s.value
-
-function _add_function(
-    block::QPBlockData{T},
-    f::MOI.ScalarAffineFunction{T},
-) where {T}
-    _add_function(block, f.terms)
-    push!(block.quadratic_row_ends, length(block.quadratic_coefficients))
-    return _kFunctionTypeScalarAffine, f.constant
-end
-
-function _add_function(
-    block::QPBlockData{T},
-    f::MOI.ScalarQuadraticFunction{T},
-) where {T}
-    _add_function(block, f.affine_terms)
-    for term in f.quadratic_terms
-        i, j = term.variable_1.value, term.variable_2.value
-        push!(block.hessian_structure, (i, j))
-        push!(block.quadratic_coefficients, term.coefficient)
-    end
-    push!(block.quadratic_row_ends, length(block.quadratic_coefficients))
-    return _kFunctionTypeScalarQuadratic, f.constant
-end
-
-function _add_function(
-    block::QPBlockData{T},
-    terms::Vector{MOI.ScalarAffineTerm{T}},
-) where {T}
-    row = length(block) + 1
-    for term in terms
-        push!(block.linear_jacobian_structure, (row, term.variable.value))
-        push!(block.linear_coefficients, term.coefficient)
-    end
-    push!(block.linear_row_ends, length(block.linear_jacobian_structure))
-    return
-end
-
-function MOI.add_constraint(
-    block::QPBlockData{T},
-    f::Union{MOI.ScalarAffineFunction{T},MOI.ScalarQuadraticFunction{T}},
-    set::Union{MOI.LessThan{T},MOI.GreaterThan{T},MOI.EqualTo{T}},
-) where {T}
-    function_type, constant = _add_function(block, f)
-    bound_type, l, u = _set_info(set)
-    push!(block.g_L, l - constant)
-    push!(block.g_U, u - constant)
-    push!(block.mult_g, nothing)
-    push!(block.bound_type, bound_type)
-    push!(block.function_type, function_type)
-    return MOI.ConstraintIndex{typeof(f),typeof(set)}(length(block.bound_type))
-end
-
-function MOI.get(
-    block::QPBlockData{T},
-    ::MOI.ConstraintFunction,
-    c::MOI.ConstraintIndex{F,S},
-) where {T,F,S}
-    row = c.value
-    offset = row == 1 ? 1 : (block.linear_row_ends[row-1] + 1)
-    affine_terms = MOI.ScalarAffineTerm{T}[
-        MOI.ScalarAffineTerm(
-            block.linear_coefficients[i],
-            MOI.VariableIndex(block.linear_jacobian_structure[i][2]),
-        ) for i in offset:block.linear_row_ends[row]
-    ]
-    quadratic_terms = MOI.ScalarQuadraticTerm{T}[]
-    offset = row == 1 ? 1 : (block.quadratic_row_ends[row-1] + 1)
-    for i in offset:block.quadratic_row_ends[row]
-        r, c = block.hessian_structure[i]
-        push!(
-            quadratic_terms,
-            MOI.ScalarQuadraticTerm(
-                block.quadratic_coefficients[i],
-                MOI.VariableIndex(r),
-                MOI.VariableIndex(c),
-            ),
-        )
-    end
-    if length(quadratic_terms) == 0
-        return MOI.ScalarAffineFunction(affine_terms, zero(T))
-    end
-    return MOI.ScalarQuadraticFunction(quadratic_terms, affine_terms, zero(T))
-end
-
-function MOI.get(
-    block::QPBlockData{T},
-    ::MOI.ConstraintSet,
-    c::MOI.ConstraintIndex{F,S},
-) where {T,F,S}
-    row = c.value
-    if block.bound_type[row] == _kBoundTypeEqualTo
-        return MOI.EqualTo(block.g_L[row])
-    elseif block.bound_type[row] == _kBoundTypeLessThan
-        return MOI.LessThan(block.g_U[row])
-    else
-        @assert block.bound_type[row] == _kBoundTypeGreaterThan
-        return MOI.GreaterThan(block.g_L[row])
-    end
-end
-
-function MOI.set(
-    block::QPBlockData{T},
-    ::MOI.ConstraintSet,
-    c::MOI.ConstraintIndex{F,MOI.LessThan{T}},
-    set::MOI.LessThan{T},
-) where {T,F}
-    row = c.value
-    block.g_U[row] = set.upper
-    return
-end
-
-function MOI.set(
-    block::QPBlockData{T},
-    ::MOI.ConstraintSet,
-    c::MOI.ConstraintIndex{F,MOI.GreaterThan{T}},
-    set::MOI.GreaterThan{T},
-) where {T,F}
-    row = c.value
-    block.g_L[row] = set.lower
-    return
-end
-
-function MOI.set(
-    block::QPBlockData{T},
-    ::MOI.ConstraintSet,
-    c::MOI.ConstraintIndex{F,MOI.EqualTo{T}},
-    set::MOI.EqualTo{T},
-) where {T,F}
-    row = c.value
-    block.g_L[row] = set.value
-    block.g_U[row] = set.value
-    return
-end
-
-function MOI.get(
-    block::QPBlockData{T},
-    ::MOI.ConstraintDualStart,
-    c::MOI.ConstraintIndex{F,S},
-) where {T,F,S}
-    return block.mult_g[c.value]
-end
-
-function MOI.set(
-    block::QPBlockData{T},
-    ::MOI.ConstraintDualStart,
-    c::MOI.ConstraintIndex{F,S},
-    value,
-) where {T,F,S}
-    block.mult_g[c.value] = value
-    return
-end
-
-function MOI.eval_objective(
-    block::QPBlockData{T},
-    x::AbstractVector{T},
-) where {T}
-    y = block.objective_constant
-    for (i, c) in enumerate(block.objective_linear_columns)
-        y += block.objective_linear_coefficients[i] * x[c]
-    end
-    for (i, (r, c)) in enumerate(block.objective_hessian_structure)
-        if r == c
-            y += block.objective_hessian_coefficients[i] * x[r] * x[c] / 2
-        else
-            y += block.objective_hessian_coefficients[i] * x[r] * x[c]
-        end
-    end
-    return y
-end
-
-function MOI.eval_objective_gradient(
-    block::QPBlockData{T},
-    g::AbstractVector{T},
-    x::AbstractVector{T},
-) where {T}
-    g .= zero(T)
-    for (i, c) in enumerate(block.objective_linear_columns)
-        g[c] += block.objective_linear_coefficients[i]
-    end
-    for (i, (r, c)) in enumerate(block.objective_hessian_structure)
-        g[r] += block.objective_hessian_coefficients[i] * x[c]
-        if r != c
-            g[c] += block.objective_hessian_coefficients[i] * x[r]
-        end
-    end
-    return
-end
-
-function MOI.eval_constraint(
-    block::QPBlockData{T},
-    g::AbstractVector{T},
-    x::AbstractVector{T},
-) where {T}
-    for i in 1:length(g)
-        g[i] = zero(T)
-    end
-    for (i, (r, c)) in enumerate(block.linear_jacobian_structure)
-        g[r] += block.linear_coefficients[i] * x[c]
-    end
-    i = 0
-    for row in 1:length(block.quadratic_row_ends)
-        while i < block.quadratic_row_ends[row]
-            i += 1
-            r, c = block.hessian_structure[i]
-            if r == c
-                g[row] += block.quadratic_coefficients[i] * x[r] * x[c] / 2
-            else
-                g[row] += block.quadratic_coefficients[i] * x[r] * x[c]
-            end
-        end
-    end
-    return
-end
-
-function MOI.jacobian_structure(block::QPBlockData)
-    J = copy(block.linear_jacobian_structure)
-    i = 0
-    for row in 1:length(block.quadratic_row_ends)
-        while i < block.quadratic_row_ends[row]
-            i += 1
-            r, c = block.hessian_structure[i]
-            push!(J, (row, r))
-            if r != c
-                push!(J, (row, c))
-            end
-        end
-    end
-    return J
-end
-
-function MOI.eval_constraint_jacobian(
-    block::QPBlockData{T},
-    J::AbstractVector{T},
-    x::AbstractVector{T},
-) where {T}
-    nterms = 0
-    for coef in block.linear_coefficients
-        nterms += 1
-        J[nterms] = coef
-    end
-    i = 0
-    for row in 1:length(block.quadratic_row_ends)
-        while i < block.quadratic_row_ends[row]
-            i += 1
-            r, c = block.hessian_structure[i]
-            nterms += 1
-            J[nterms] = block.quadratic_coefficients[i] * x[c]
-            if r != c
-                nterms += 1
-                J[nterms] = block.quadratic_coefficients[i] * x[r]
-            end
-        end
-    end
-    return nterms
-end
-
-function MOI.hessian_lagrangian_structure(block::QPBlockData)
-    return vcat(block.objective_hessian_structure, block.hessian_structure)
-end
-
-function MOI.eval_hessian_lagrangian(
-    block::QPBlockData{T},
-    H::AbstractVector{T},
-    ::AbstractVector{T},
-    σ::T,
-    μ::AbstractVector{T},
-) where {T}
-    nterms = 0
-    for c in block.objective_hessian_coefficients
-        nterms += 1
-        H[nterms] = σ * c
-    end
-    i = 0
-    for row in 1:length(block.quadratic_row_ends)
-        while i < block.quadratic_row_ends[row]
-            i += 1
-            nterms += 1
-            H[nterms] = μ[row] * block.quadratic_coefficients[i]
-        end
-    end
-    return nterms
-end
diff --git a/src/KKT/Dense/augmented.jl b/src/KKT/Dense/augmented.jl
new file mode 100644
index 00000000..5ecee833
--- /dev/null
+++ b/src/KKT/Dense/augmented.jl
@@ -0,0 +1,162 @@
+
+"""
+    DenseKKTSystem{T, VT, MT, QN, VI} <: AbstractReducedKKTSystem{T, VT, MT, QN}
+
+Implement [`AbstractReducedKKTSystem`](@ref) with dense matrices.
+
+Requires a dense linear solver to be factorized (otherwise an error is returned).
+
+"""
+struct DenseKKTSystem{
+    T,
+    VT <: AbstractVector{T},
+    MT <: AbstractMatrix{T},
+    QN,
+    LS,
+    VI <: AbstractVector{Int},
+    } <: AbstractReducedKKTSystem{T, VT, MT, QN}
+
+    hess::MT
+    jac::MT
+    quasi_newton::QN
+    reg::VT
+    pr_diag::VT
+    du_diag::VT
+    l_diag::VT
+    u_diag::VT
+    l_lower::VT
+    u_lower::VT
+    diag_hess::VT
+    # KKT system
+    aug_com::MT
+    # Info
+    ind_ineq::VI
+    ind_lb::VI
+    ind_ub::VI
+    # Linear Solver
+    linear_solver::LS
+    # Buffers
+    etc::Dict{Symbol, Any}
+end
+
+function create_kkt_system(
+    ::Type{DenseKKTSystem},
+    cb::AbstractCallback{T,VT},
+    ind_cons,
+    linear_solver::Type;
+    opt_linear_solver=default_options(linear_solver),
+    hessian_approximation=ExactHessian,
+) where {T, VT}
+
+    ind_ineq = ind_cons.ind_ineq
+    ind_lb = ind_cons.ind_lb
+    ind_ub = ind_cons.ind_ub
+
+    n = cb.nvar
+    m = cb.ncon
+    ns = length(ind_ineq)
+    nlb = length(ind_cons.ind_lb)
+    nub = length(ind_cons.ind_ub)
+
+    hess = create_array(cb, n, n)
+    jac = create_array(cb, m, n)
+    aug_com = create_array(cb, n+ns+m, n+ns+m)
+    reg = create_array(cb, n+ns)
+    pr_diag = create_array(cb, n+ns)
+    du_diag = create_array(cb, m)
+    diag_hess = create_array(cb, n)
+
+    l_diag = fill!(VT(undef, nlb), one(T))
+    u_diag = fill!(VT(undef, nub), one(T))
+    l_lower = fill!(VT(undef, nlb), zero(T))
+    u_lower = fill!(VT(undef, nub), zero(T))
+
+    # Init!
+    fill!(aug_com, zero(T))
+    fill!(hess,    zero(T))
+    fill!(jac,     zero(T))
+    fill!(reg,     zero(T))
+    fill!(pr_diag, zero(T))
+    fill!(du_diag, zero(T))
+    fill!(diag_hess, zero(T))
+
+    quasi_newton = create_quasi_newton(hessian_approximation, cb, n)
+    _linear_solver = linear_solver(aug_com; opt = opt_linear_solver)
+
+    return DenseKKTSystem(
+        hess, jac, quasi_newton,
+        reg, pr_diag, du_diag, l_diag, u_diag, l_lower, u_lower,
+        diag_hess, aug_com,
+        ind_ineq, ind_cons.ind_lb, ind_cons.ind_ub,
+        _linear_solver,
+        Dict{Symbol, Any}(),
+    )
+end
+
+num_variables(kkt::DenseKKTSystem) = length(kkt.pr_diag)
+
+function mul!(y::AbstractVector, kkt::DenseKKTSystem, x::AbstractVector)
+    symul!(y, kkt.aug_com, x)
+end
+
+# Special getters for Jacobian
+function get_jacobian(kkt::DenseKKTSystem)
+    n = size(kkt.hess, 1)
+    ns = length(kkt.ind_ineq)
+    return view(kkt.jac, :, 1:n)
+end
+
+function diag_add!(dest::AbstractMatrix, d1::AbstractVector, d2::AbstractVector)
+    n = length(d1)
+    @inbounds for i in 1:n
+        dest[i, i] = d1[i] + d2[i]
+    end
+end
+
+function _build_dense_kkt_system!(dest::VT, hess, jac, pr_diag, du_diag, diag_hess, ind_ineq, n, m, ns) where {T, VT <: AbstractMatrix{T}}
+    # Transfer Hessian
+    for i in 1:n, j in 1:i
+        if i == j
+            dest[i, i] = pr_diag[i] + diag_hess[i]
+        else
+            dest[i, j] = hess[i, j]
+            dest[j, i] = hess[j, i]
+        end
+    end
+    # Transfer slack diagonal
+    for i in 1:ns
+        dest[i+n, i+n] = pr_diag[i+n]
+    end
+    # Transfer Jacobian / variables
+    for i in 1:m, j in 1:n
+        dest[i + n + ns, j] = jac[i, j]
+        dest[j, i + n + ns] = jac[i, j]
+    end
+    # Transfer Jacobian / slacks
+    for j in 1:ns
+        is = ind_ineq[j]
+        dest[is + n + ns, j + n] = - one(T)
+        dest[j + n, is + n + ns] = - one(T)
+    end
+    # Transfer dual regularization
+    for i in 1:m
+        dest[i + n + ns, i + n + ns] = du_diag[i]
+    end
+end
+
+function build_kkt!(kkt::DenseKKTSystem{T, VT, MT}) where {T, VT, MT}
+    n = size(kkt.hess, 1)
+    m = size(kkt.jac, 1)
+    ns = length(kkt.ind_ineq)
+
+    _build_dense_kkt_system!(kkt.aug_com, kkt.hess, kkt.jac,
+                                kkt.pr_diag, kkt.du_diag, kkt.diag_hess,
+                                kkt.ind_ineq,
+                                n, m, ns)
+end
+
+function compress_hessian!(kkt::DenseKKTSystem)
+    # Transfer diagonal term for future regularization
+    diag!(kkt.diag_hess, kkt.hess)
+end
+
diff --git a/src/KKT/Dense/condensed.jl b/src/KKT/Dense/condensed.jl
new file mode 100644
index 00000000..f6ae1a17
--- /dev/null
+++ b/src/KKT/Dense/condensed.jl
@@ -0,0 +1,241 @@
+
+"""
+    DenseCondensedKKTSystem{T, VT, MT, QN} <: AbstractCondensedKKTSystem{T, VT, MT, QN}
+
+Implement [`AbstractCondensedKKTSystem`](@ref) with dense matrices.
+
+Requires a dense linear solver to factorize the associated KKT system (otherwise an error is returned).
+
+"""
+struct DenseCondensedKKTSystem{
+    T,
+    VT <: AbstractVector{T},
+    MT <: AbstractMatrix{T},
+    QN,
+    LS,
+    VI <: AbstractVector{Int}
+    } <: AbstractCondensedKKTSystem{T, VT, MT, QN}
+
+    hess::MT
+    jac::MT
+    quasi_newton::QN
+    jac_ineq::MT
+
+    reg::VT
+    pr_diag::VT
+    du_diag::VT
+    l_diag::VT
+    u_diag::VT
+    l_lower::VT
+    u_lower::VT
+
+    pd_buffer::VT
+    diag_buffer::VT
+    buffer::VT
+    # KKT system
+    aug_com::MT
+    # Info
+    n_eq::Int
+    ind_eq::VI
+    ind_eq_shifted::VI
+    n_ineq::Int
+    ind_ineq::VI
+    ind_lb::VI
+    ind_ub::VI
+    ind_ineq_shifted::VI
+    # Linear Solver
+    linear_solver::LS
+    # Buffers
+    etc::Dict{Symbol, Any}
+end
+
+function create_kkt_system(
+    ::Type{DenseCondensedKKTSystem},
+    cb::AbstractCallback{T,VT},
+    ind_cons,
+    linear_solver::Type;
+    opt_linear_solver=default_options(linear_solver),
+    hessian_approximation=ExactHessian,
+) where {T, VT}
+
+    n = cb.nvar
+    m = cb.ncon
+    ns = length(ind_cons.ind_ineq)
+    n_eq = m - ns
+    nlb = length(ind_cons.ind_lb)
+    nub = length(ind_cons.ind_ub)
+
+    aug_com  = create_array(cb, n+m-ns, n+m-ns)
+    hess     = create_array(cb, n, n)
+    jac      = create_array(cb, m, n)
+    jac_ineq = create_array(cb, ns, n)
+
+    reg  = VT(undef, n+ns)
+    pr_diag  = VT(undef, n+ns)
+    du_diag  = VT(undef, m)
+    l_diag = fill!(VT(undef, nlb), one(T))
+    u_diag = fill!(VT(undef, nub), one(T))
+    l_lower = fill!(VT(undef, nlb), zero(T))
+    u_lower = fill!(VT(undef, nub), zero(T))
+
+    pd_buffer = VT(undef, n + n_eq)
+    diag_buffer = VT(undef, ns)
+    buffer = VT(undef, m)
+
+    # Init!
+    fill!(aug_com, zero(T))
+    fill!(hess,    zero(T))
+    fill!(jac,     zero(T))
+    fill!(pr_diag, zero(T))
+    fill!(du_diag, zero(T))
+
+    # Shift indexes to avoid additional allocation in views
+    ind_eq_shifted = ind_cons.ind_eq .+ n .+ ns
+    ind_ineq_shifted = ind_cons.ind_ineq .+ n .+ ns
+
+    quasi_newton = create_quasi_newton(hessian_approximation, cb, n)
+    _linear_solver = linear_solver(aug_com; opt = opt_linear_solver)
+
+    return DenseCondensedKKTSystem(
+        hess, jac, quasi_newton, jac_ineq,
+        reg, pr_diag, du_diag, l_diag, u_diag, l_lower, u_lower,
+        pd_buffer, diag_buffer, buffer,
+        aug_com,
+        n_eq, ind_cons.ind_eq, ind_eq_shifted,
+        ns,
+        ind_cons.ind_ineq, ind_cons.ind_lb, ind_cons.ind_ub,
+        ind_ineq_shifted,
+        _linear_solver,
+        Dict{Symbol, Any}(),
+    )
+end
+
+num_variables(kkt::DenseCondensedKKTSystem) = size(kkt.hess, 1)
+
+function get_slack_regularization(kkt::DenseCondensedKKTSystem)
+    n, ns = num_variables(kkt), kkt.n_ineq
+    return view(kkt.pr_diag, n+1:n+ns)
+end
+
+function _build_condensed_kkt_system!(
+    dest::AbstractMatrix, hess::AbstractMatrix, jac::AbstractMatrix,
+    pr_diag::AbstractVector, du_diag::AbstractVector, ind_eq::AbstractVector, n, m_eq,
+)
+    # Transfer Hessian
+    @inbounds for i in 1:n, j in 1:i
+        if i == j
+            dest[i, i] += pr_diag[i] + hess[i, i]
+        else
+            dest[i, j] += hess[i, j]
+            dest[j, i] += hess[j, i]
+        end
+    end
+    # Transfer Jacobian / variables
+    @inbounds for i in 1:m_eq, j in 1:n
+        is = ind_eq[i]
+        dest[i + n, j] = jac[is, j]
+        dest[j, i + n] = jac[is, j]
+    end
+    # Transfer dual regularization
+    @inbounds for i in 1:m_eq
+        is = ind_eq[i]
+        dest[i + n, i + n] = du_diag[is]
+    end
+end
+
+function _build_ineq_jac!(
+    dest::AbstractMatrix, jac::AbstractMatrix, diag_buffer::AbstractVector,
+    ind_ineq::AbstractVector,
+    n, m_ineq,
+)
+    @inbounds for i in 1:m_ineq, j in 1:n
+        is = ind_ineq[i]
+        dest[i, j] = jac[is, j] * sqrt(diag_buffer[i])
+    end
+end
+
+function build_kkt!(kkt::DenseCondensedKKTSystem{T, VT, MT}) where {T, VT, MT}
+    n = size(kkt.hess, 1)
+    ns = kkt.n_ineq
+    n_eq = length(kkt.ind_eq)
+    m = size(kkt.jac, 1)
+
+    fill!(kkt.aug_com, zero(T))
+
+    # Build √Σₛ * J
+    Σs = view(kkt.pr_diag, n+1:n+ns)
+    Σd = @view(kkt.du_diag[kkt.ind_ineq])
+    kkt.diag_buffer .= Σs ./ ( 1 .- Σd .* Σs)
+    _build_ineq_jac!(kkt.jac_ineq, kkt.jac, kkt.diag_buffer, kkt.ind_ineq, n, ns)
+
+    # Select upper-left block
+    W = if n_eq > 0
+        view(kkt.aug_com, 1:n, 1:n) # TODO: does not work on GPU
+    else
+        kkt.aug_com
+    end
+    # Build J' * Σₛ * J
+    mul!(W, kkt.jac_ineq', kkt.jac_ineq)
+
+
+    _build_condensed_kkt_system!(
+        kkt.aug_com, kkt.hess, kkt.jac,
+        kkt.pr_diag, kkt.du_diag,
+        kkt.ind_eq, n, kkt.n_eq,
+    )
+end
+
+# TODO: check how to handle inertia with the condensed form
+function is_inertia_correct(kkt::DenseCondensedKKTSystem, num_pos, num_zero, num_neg)
+    return (num_zero == 0 && num_neg == kkt.n_eq)
+end
+
+# For inertia-free regularization
+function _mul_expanded!(y::AbstractVector, kkt::DenseCondensedKKTSystem, x::AbstractVector)
+    n = size(kkt.hess, 1)
+    ns = kkt.n_ineq
+    m = size(kkt.jac, 1)
+
+    Σx = view(kkt.pr_diag, 1:n)
+    Σs = view(kkt.pr_diag, 1+n:n+ns)
+    Σd = kkt.du_diag
+
+    # Decompose x
+    xx = view(x, 1:n)
+    xs = view(x, 1+n:n+ns)
+    xy = view(x, 1+n+ns:n+ns+m)
+
+    # Decompose y
+    yx = view(y, 1:n)
+    ys = view(y, 1+n:n+ns)
+    yy = view(y, 1+n+ns:n+ns+m)
+
+    # / x (variable)
+    yx .= Σx .* xx
+    symul!(yx, kkt.hess, xx)
+    mul!(yx, kkt.jac', xy, 1.0, 1.0)
+
+    # / s (slack)
+    ys .= Σs .* xs
+    ys .-= xy[kkt.ind_ineq]
+
+    # / y (multiplier)
+    yy .= Σd .* xy
+    mul!(yy, kkt.jac, xx, 1.0, 1.0)
+    yy[kkt.ind_ineq] .-= xs
+    return
+end
+
+function mul!(y::AbstractVector, kkt::DenseCondensedKKTSystem, x::AbstractVector)
+    # TODO: implement properly with AbstractKKTRHS
+    if length(y) == length(x) == size(kkt.aug_com, 1)
+        symul!(y, kkt.aug_com, x)
+    else
+        _mul_expanded!(y, kkt, x)
+    end
+end
+
+function jprod_ineq!(y::AbstractVector, kkt::DenseCondensedKKTSystem, x::AbstractVector)
+    mul!(y, kkt.jac_ineq, x)
+end
+
diff --git a/src/KKT/Dense/utils.jl b/src/KKT/Dense/utils.jl
new file mode 100644
index 00000000..884d7701
--- /dev/null
+++ b/src/KKT/Dense/utils.jl
@@ -0,0 +1,29 @@
+
+# For templating
+const AbstractDenseKKTSystem{T, VT, MT, QN} = Union{
+    DenseKKTSystem{T, VT, MT, QN},
+    DenseCondensedKKTSystem{T, VT, MT, QN},
+}
+
+#=
+    Generic functions
+=#
+
+function jtprod!(y::AbstractVector, kkt::AbstractDenseKKTSystem, x::AbstractVector)
+    nx = size(kkt.hess, 1)
+    ind_ineq = kkt.ind_ineq
+    ns = length(ind_ineq)
+    yx = view(y, 1:nx)
+    ys = view(y, 1+nx:nx+ns)
+    # / x
+    mul!(yx, kkt.jac', x)
+    # / s
+    ys .= -@view(x[ind_ineq])
+    return
+end
+
+function compress_jacobian!(kkt::AbstractDenseKKTSystem)
+    return
+end
+
+nnz_jacobian(kkt::AbstractDenseKKTSystem) = length(kkt.jac)
diff --git a/src/KKT/KKTsystem.jl b/src/KKT/KKTsystem.jl
index 811160b9..e30ffeae 100644
--- a/src/KKT/KKTsystem.jl
+++ b/src/KKT/KKTsystem.jl
@@ -14,12 +14,12 @@ conditions at the current primal-dual iterate ``(x, s, y, z, ν, w)``.
 
 The associated matrix is
 ```
-[Wₓₓ  0  Aₑ'  Aᵢ'  -I   0 ]  [Δx]
-[ 0   0   0   -I    0  -I ]  [Δs]
-[Aₑ   0   0    0    0   0 ]  [Δy]
-[Aᵢ  -I   0    0    0   0 ]  [Δz]
-[V    0   0    0    X   0 ]  [Δν]
-[0    W   0    0    0   S ]  [Δw]
+[Wₓₓ  0   Aₑ'  Aᵢ' V½  0  ]  [Δx]
+[0    0   0   -I   0   W½ ]  [Δs]
+[Aₑ   0   0    0   0   0  ]  [Δy]
+[Aᵢ  -I   0    0   0   0  ]  [Δz]
+[V½   0   0    0  -X   0  ]  [Δτ]
+[0    W½  0    0   0  -S  ]  [Δρ]
 ```
 with
 * ``Wₓₓ``: Hessian of the Lagrangian.
@@ -29,6 +29,8 @@ with
 * ``S = diag(s)``
 * ``V = diag(ν)``
 * ``W = diag(w)``
+* ``Δτ = -W^{-½}Δν``
+* ``Δρ = -W^{-½}Δw``
 """
 abstract type AbstractUnreducedKKTSystem{T, VT, MT, QN} <: AbstractKKTSystem{T, VT, MT, QN} end
 
@@ -42,7 +44,7 @@ the two last rows associated to the bounds' duals ``(ν, w)``.
 At a primal-dual iterate ``(x, s, y, z)``, the matrix writes
 ```
 [Wₓₓ + Σₓ   0    Aₑ'   Aᵢ']  [Δx]
-[ 0         Σₛ    0    -I ]  [Δs]
+[0          Σₛ    0    -I ]  [Δs]
 [Aₑ         0     0     0 ]  [Δy]
 [Aᵢ        -I     0     0 ]  [Δz]
 ```
@@ -76,11 +78,32 @@ with
 """
 abstract type AbstractCondensedKKTSystem{T, VT, MT, QN} <: AbstractKKTSystem{T, VT, MT, QN} end
 
+
 #=
     Templates
 =#
 
-"Number of primal variables associated to the KKT system."
+"""
+    create_kkt_system(
+        ::Type{KKT},
+        cb::AbstractCallback,
+        ind_cons::NamedTuple,
+        linear_solver::Type{LinSol};
+        opt_linear_solver=default_options(linear_solver),
+        hessian_approximation=ExactHessian,
+    ) where {KKT<:AbstractKKTSystem, LinSol<:AbstractLinearSolver}
+
+Instantiate a new KKT system with type `KKT`, associated to the
+the nonlinear program encoded inside the callback `cb`. The
+`NamedTuple` `ind_cons` stores the indexes of all the variables and
+constraints in the callback `cb`. In addition, the user should pass
+the linear solver `linear_solver` that will be used to solve the KKT system
+after it has been assembled.
+
+"""
+function create_kkt_system end
+
+"Number of primal variables (including slacks) associated to the KKT system."
 function num_variables end
 
 """
@@ -145,21 +168,21 @@ in `y`, such that ``y = A' x`` (with ``A`` current Jacobian).
 function jtprod! end
 
 """
-    regularize_diagonal!(kkt::AbstractKKTSystem, primal_values::AbstractVector, dual_values::AbstractVector)
+    solve!(kkt::AbstractKKTSystem, w::AbstractKKTVector)
 
-Regularize the values in the diagonal of the KKT system.
-Called internally inside the interior-point routine.
-"""
-function regularize_diagonal! end
+Solve the KKT system ``K x = w`` with the linear solver stored
+inside `kkt` and stores the result inplace inside the `AbstractKKTVector` `w`.
 
 """
-    set_jacobian_scaling!(kkt::AbstractKKTSystem, scaling::AbstractVector)
+function solve! end
 
-Set the scaling of the Jacobian with the vector `scaling` storing
-the scaling for all the constraints in the problem.
+"""
+    regularize_diagonal!(kkt::AbstractKKTSystem, primal_values::Number, dual_values::Number)
 
+Regularize the values in the diagonal of the KKT system.
+Called internally inside the interior-point routine.
 """
-function set_jacobian_scaling! end
+function regularize_diagonal! end
 
 """
     is_inertia_correct(kkt::AbstractKKTSystem, n::Int, m::Int, p::Int)
@@ -170,10 +193,6 @@ to the KKT system implemented in `kkt`.
 """
 function is_inertia_correct end
 
-# TODO: temporary
-"Return true if KKT system is reduced."
-function is_reduced end
-
 "Nonzero in Jacobian"
 function nnz_jacobian end
 
@@ -189,12 +208,15 @@ function hess_dense! end
     Generic functions
 =#
 function initialize!(kkt::AbstractKKTSystem)
+    fill!(kkt.reg, 1.0)
     fill!(kkt.pr_diag, 1.0)
     fill!(kkt.du_diag, 0.0)
     fill!(kkt.hess, 0.0)
+    return
 end
 
 function regularize_diagonal!(kkt::AbstractKKTSystem, primal, dual)
+    kkt.reg .+= primal
     kkt.pr_diag .+= primal
     kkt.du_diag .= .-dual
 end
@@ -206,48 +228,20 @@ Base.size(kkt::AbstractKKTSystem, dim::Int) = size(kkt.aug_com, dim)
 get_kkt(kkt::AbstractKKTSystem) = kkt.aug_com
 get_jacobian(kkt::AbstractKKTSystem) = kkt.jac
 get_hessian(kkt::AbstractKKTSystem) = kkt.hess
-get_raw_jacobian(kkt::AbstractKKTSystem) = kkt.jac_raw
-
 
-# Fix variable treatment
-function treat_fixed_variable!(kkt::AbstractKKTSystem{T, VT, MT}) where {T, VT, MT<:SparseMatrixCSC{T, Int32}}
-    length(kkt.ind_fixed) == 0 && return
-    aug = kkt.aug_com
-
-    fixed_aug_diag = view(aug.nzval, aug.colptr[kkt.ind_fixed])
-    fixed_aug_diag .= 1.0
-    fixed_aug = view(aug.nzval, kkt.ind_aug_fixed)
-    fixed_aug .= 0.0
-    return
-end
-function treat_fixed_variable!(kkt::AbstractKKTSystem{T, VT, MT}) where {T, VT, MT<:Matrix{T}}
-    length(kkt.ind_fixed) == 0 && return
-    aug = kkt.aug_com
-    @inbounds for i in kkt.ind_fixed
-        aug[i, :] .= 0.0
-        aug[:, i] .= 0.0
-        aug[i, i]  = 1.0
-    end
-end
 
 function is_inertia_correct(kkt::AbstractKKTSystem, num_pos, num_zero, num_neg)
     return (num_zero == 0) && (num_pos == num_variables(kkt))
 end
 
-function build_kkt!(kkt::AbstractKKTSystem{T, VT, MT}) where {T, VT, MT<:Matrix{T}}
-    copyto!(kkt.aug_com, kkt.aug_raw)
-    treat_fixed_variable!(kkt)
-end
-
-function build_kkt!(kkt::AbstractKKTSystem{T, VT, MT}) where {T, VT, MT<:SparseMatrixCSC{T, Int32}}
-    transfer!(kkt.aug_com, kkt.aug_raw, kkt.aug_csc_map)
-    treat_fixed_variable!(kkt)
-end
-
 compress_hessian!(kkt::AbstractKKTSystem) = nothing
 
-
 include("rhs.jl")
-include("sparse.jl")
-include("dense.jl")
+include("Dense/augmented.jl")
+include("Dense/condensed.jl")
+include("Dense/utils.jl")
+include("Sparse/unreduced.jl")
+include("Sparse/augmented.jl")
+include("Sparse/condensed.jl")
+include("Sparse/utils.jl")
 
diff --git a/src/KKT/Sparse/augmented.jl b/src/KKT/Sparse/augmented.jl
new file mode 100644
index 00000000..a54d0464
--- /dev/null
+++ b/src/KKT/Sparse/augmented.jl
@@ -0,0 +1,149 @@
+"""
+    SparseKKTSystem{T, VT, MT, QN} <: AbstractReducedKKTSystem{T, VT, MT, QN}
+
+Implement the [`AbstractReducedKKTSystem`](@ref) in sparse COO format.
+
+"""
+struct SparseKKTSystem{T, VT, MT, QN, LS, VI, VI32} <: AbstractReducedKKTSystem{T, VT, MT, QN}
+    hess::VT
+    jac_callback::VT
+    jac::VT
+    quasi_newton::QN
+    reg::VT
+    pr_diag::VT
+    du_diag::VT
+    l_diag::VT
+    u_diag::VT
+    l_lower::VT
+    u_lower::VT
+    # Augmented system
+    aug_raw::SparseMatrixCOO{T,Int32,VT, VI32}
+    aug_com::MT
+    aug_csc_map::Union{Nothing, VI}
+    # Hessian
+    hess_raw::SparseMatrixCOO{T,Int32,VT, VI32}
+    hess_com::MT
+    hess_csc_map::Union{Nothing, VI}
+    # Jacobian
+    jac_raw::SparseMatrixCOO{T,Int32,VT, VI32}
+    jac_com::MT
+    jac_csc_map::Union{Nothing, VI}
+    # LinearSolver
+    linear_solver::LS
+    # Info
+    ind_ineq::VI
+    ind_lb::VI
+    ind_ub::VI
+end
+
+# Build KKT system directly from SparseCallback
+function create_kkt_system(
+    ::Type{SparseKKTSystem},
+    cb::SparseCallback{T,VT},
+    ind_cons,
+    linear_solver::Type;
+    opt_linear_solver=default_options(linear_solver),
+    hessian_approximation=ExactHessian,
+) where {T,VT}
+
+    n_slack = length(ind_cons.ind_ineq)
+    # Deduce KKT size.
+
+    n = cb.nvar
+    m = cb.ncon
+    # Evaluate sparsity pattern
+    jac_sparsity_I = create_array(cb, Int32, cb.nnzj)
+    jac_sparsity_J = create_array(cb, Int32, cb.nnzj)
+    _jac_sparsity_wrapper!(cb,jac_sparsity_I, jac_sparsity_J)
+
+    quasi_newton = create_quasi_newton(hessian_approximation, cb, n)
+    hess_sparsity_I, hess_sparsity_J = build_hessian_structure(cb, hessian_approximation)
+
+    nlb = length(ind_cons.ind_lb)
+    nub = length(ind_cons.ind_ub)
+
+    force_lower_triangular!(hess_sparsity_I,hess_sparsity_J)
+
+    ind_ineq = ind_cons.ind_ineq
+
+    n_slack = length(ind_ineq)
+    n_jac = length(jac_sparsity_I)
+    n_hess = length(hess_sparsity_I)
+    n_tot = n + n_slack
+
+
+    aug_vec_length = n_tot+m
+    aug_mat_length = n_tot+m+n_hess+n_jac+n_slack
+
+    I = create_array(cb, Int32, aug_mat_length)
+    J = create_array(cb, Int32, aug_mat_length)
+    V = VT(undef, aug_mat_length)
+    fill!(V, 0.0)  # Need to initiate V to avoid NaN
+
+    offset = n_tot+n_jac+n_slack+n_hess+m
+
+    I[1:n_tot] .= 1:n_tot
+    I[n_tot+1:n_tot+n_hess] = hess_sparsity_I
+    I[n_tot+n_hess+1:n_tot+n_hess+n_jac] .= (jac_sparsity_I.+n_tot)
+    I[n_tot+n_hess+n_jac+1:n_tot+n_hess+n_jac+n_slack] .= ind_ineq .+ n_tot
+    I[n_tot+n_hess+n_jac+n_slack+1:offset] .= (n_tot+1:n_tot+m)
+
+    J[1:n_tot] .= 1:n_tot
+    J[n_tot+1:n_tot+n_hess] = hess_sparsity_J
+    J[n_tot+n_hess+1:n_tot+n_hess+n_jac] .= jac_sparsity_J
+    J[n_tot+n_hess+n_jac+1:n_tot+n_hess+n_jac+n_slack] .= (n+1:n+n_slack)
+    J[n_tot+n_hess+n_jac+n_slack+1:offset] .= (n_tot+1:n_tot+m)
+
+    pr_diag = _madnlp_unsafe_wrap(V, n_tot)
+    du_diag = _madnlp_unsafe_wrap(V, m, n_jac+n_slack+n_hess+n_tot+1)
+
+    reg = VT(undef, n_tot)
+    l_diag = VT(undef, nlb)
+    u_diag = VT(undef, nub)
+    l_lower = VT(undef, nlb)
+    u_lower = VT(undef, nub)
+
+    hess = _madnlp_unsafe_wrap(V, n_hess, n_tot+1)
+    jac = _madnlp_unsafe_wrap(V, n_jac+n_slack, n_hess+n_tot+1)
+    jac_callback = _madnlp_unsafe_wrap(V, n_jac, n_hess+n_tot+1)
+
+    aug_raw = SparseMatrixCOO(aug_vec_length,aug_vec_length,I,J,V)
+    jac_raw = SparseMatrixCOO(
+        m, n_tot,
+        Int32[jac_sparsity_I; ind_ineq],
+        Int32[jac_sparsity_J; n+1:n+n_slack],
+        jac,
+    )
+    hess_raw = SparseMatrixCOO(
+        n_tot, n_tot,
+        hess_sparsity_I,
+        hess_sparsity_J,
+        hess,
+    )
+
+    aug_com, aug_csc_map = coo_to_csc(aug_raw)
+    jac_com, jac_csc_map = coo_to_csc(jac_raw)
+    hess_com, hess_csc_map = coo_to_csc(hess_raw)
+
+    _linear_solver = linear_solver(
+        aug_com; opt = opt_linear_solver
+    )
+
+    return SparseKKTSystem(
+        hess, jac_callback, jac, quasi_newton, reg, pr_diag, du_diag,
+        l_diag, u_diag, l_lower, u_lower,
+        aug_raw, aug_com, aug_csc_map,
+        hess_raw, hess_com, hess_csc_map,
+        jac_raw, jac_com, jac_csc_map,
+        _linear_solver,
+        ind_ineq, ind_cons.ind_lb, ind_cons.ind_ub,
+    )
+
+end
+
+num_variables(kkt::SparseKKTSystem) = length(kkt.pr_diag)
+
+function build_kkt!(kkt::SparseKKTSystem)
+    transfer!(kkt.aug_com, kkt.aug_raw, kkt.aug_csc_map)
+end
+
diff --git a/src/KKT/Sparse/condensed.jl b/src/KKT/Sparse/condensed.jl
new file mode 100644
index 00000000..b4697b5d
--- /dev/null
+++ b/src/KKT/Sparse/condensed.jl
@@ -0,0 +1,364 @@
+
+"""
+    SparseCondensedKKTSystem{T, VT, MT, QN} <: AbstractCondensedKKTSystem{T, VT, MT, QN}
+
+Implement the [`AbstractCondensedKKTSystem`](@ref) in sparse COO format.
+
+"""
+struct SparseCondensedKKTSystem{T, VT, MT, QN, LS, VI, VI32, VTu1, VTu2, EXT} <: AbstractCondensedKKTSystem{T, VT, MT, QN}
+    # Hessian
+    hess::VT
+    hess_raw::SparseMatrixCOO{T,Int32,VT, VI32}
+    hess_com::MT
+    hess_csc_map::Union{Nothing, VI}
+
+    # Jacobian
+    jac::VT
+    jt_coo::SparseMatrixCOO{T,Int32,VT, VI32}
+    jt_csc::MT
+    jt_csc_map::Union{Nothing, VI}
+
+    quasi_newton::QN
+    reg::VT
+    pr_diag::VT
+    du_diag::VT
+    l_diag::VT
+    u_diag::VT
+    l_lower::VT
+    u_lower::VT
+
+    # buffer
+    buffer::VT
+    buffer2::VT
+
+    # Augmented system
+    aug_com::MT
+
+    # slack diagonal buffer
+    diag_buffer::VT
+    dptr::VTu1
+    hptr::VTu1
+    jptr::VTu2
+
+    # LinearSolver
+    linear_solver::LS
+
+    # Info
+    ind_ineq::VI
+    ind_lb::VI
+    ind_ub::VI
+
+    # extra
+    ext::EXT
+end
+
+function create_kkt_system(
+    ::Type{SparseCondensedKKTSystem},
+    cb::SparseCallback{T,VT},
+    ind_cons,
+    linear_solver::Type;
+    opt_linear_solver=default_options(linear_solver),
+    hessian_approximation=ExactHessian,
+) where {T, VT}
+    ind_ineq = ind_cons.ind_ineq
+    n = cb.nvar
+    m = cb.ncon
+    n_slack = length(ind_ineq)
+
+    if n_slack != m
+        error("SparseCondensedKKTSystem does not support equality constrained NLPs.")
+    end
+
+    # Evaluate sparsity pattern
+    jac_sparsity_I = create_array(cb, Int32, cb.nnzj)
+    jac_sparsity_J = create_array(cb, Int32, cb.nnzj)
+    _jac_sparsity_wrapper!(cb,jac_sparsity_I, jac_sparsity_J)
+
+    quasi_newton = create_quasi_newton(hessian_approximation, cb, n)
+    hess_sparsity_I, hess_sparsity_J = build_hessian_structure(cb, hessian_approximation)
+
+    force_lower_triangular!(hess_sparsity_I,hess_sparsity_J)
+
+    n_jac = length(jac_sparsity_I)
+    n_hess = length(hess_sparsity_I)
+    n_tot = n + n_slack
+    nlb = length(ind_cons.ind_lb)
+    nub = length(ind_cons.ind_ub)
+
+
+    reg = VT(undef, n_tot)
+    pr_diag = VT(undef, n_tot)
+    du_diag = VT(undef, m)
+    l_diag = VT(undef, nlb)
+    u_diag = VT(undef, nub)
+    l_lower = VT(undef, nlb)
+    u_lower = VT(undef, nub)
+    buffer = VT(undef, m)
+    buffer2= VT(undef, m)
+    hess = VT(undef, n_hess)
+    jac = VT(undef, n_jac)
+    diag_buffer = VT(undef, m)
+    fill!(jac, zero(T))
+
+    hess_raw = SparseMatrixCOO(n, n, hess_sparsity_I, hess_sparsity_J, hess)
+
+    jt_coo = SparseMatrixCOO(
+        n, m,
+        jac_sparsity_J,
+        jac_sparsity_I,
+        jac,
+    )
+    jt_csc, jt_csc_map = coo_to_csc(jt_coo)
+    hess_com, hess_csc_map = coo_to_csc(hess_raw)
+
+    aug_com, dptr, hptr, jptr = build_condensed_aug_symbolic(
+        hess_com,
+        jt_csc
+    )
+    _linear_solver = linear_solver(aug_com; opt = opt_linear_solver)
+    ext = get_sparse_condensed_ext(VT, hess_com, jptr, jt_csc_map, hess_csc_map)
+    return SparseCondensedKKTSystem(
+        hess, hess_raw, hess_com, hess_csc_map,
+        jac, jt_coo, jt_csc, jt_csc_map,
+        quasi_newton,
+        reg, pr_diag, du_diag,
+        l_diag, u_diag, l_lower, u_lower,
+        buffer, buffer2,
+        aug_com, diag_buffer, dptr, hptr, jptr,
+        _linear_solver,
+        ind_ineq, ind_cons.ind_lb, ind_cons.ind_ub,
+        ext
+    )
+end
+
+get_sparse_condensed_ext(::Type{Vector{T}},args...) where T = nothing
+
+num_variables(kkt::SparseCondensedKKTSystem) = length(kkt.pr_diag)
+function is_inertia_correct(kkt::SparseCondensedKKTSystem, num_pos, num_zero, num_neg)
+    return (num_zero == 0) && (num_pos == size(kkt.aug_com, 1))
+end
+
+Base.size(kkt::SparseCondensedKKTSystem,n::Int) = size(kkt.aug_com,n)
+
+function compress_jacobian!(kkt::SparseCondensedKKTSystem{T, VT, MT}) where {T, VT, MT<:SparseMatrixCSC{T, Int32}}
+    ns = length(kkt.ind_ineq)
+    transfer!(kkt.jt_csc, kkt.jt_coo, kkt.jt_csc_map)
+end
+
+function jtprod!(y::AbstractVector, kkt::SparseCondensedKKTSystem, x::AbstractVector)
+    n = size(kkt.hess_com, 1)
+    m = size(kkt.jt_csc, 2)
+
+    mul!(view(y, 1:n), kkt.jt_csc, x)
+    y[size(kkt.jt_csc,1)+1:end] .= -x
+end
+
+function _sym_length(Jt)
+    len = 0
+    for i=1:size(Jt,2)
+        n = Jt.colptr[i+1] - Jt.colptr[i]
+        len += div(n^2 + n, 2)
+    end
+    return len
+end
+
+function _build_condensed_aug_symbolic_hess(H, sym, sym2)
+    for i in 1:size(H,2)
+        for j in H.colptr[i]:H.colptr[i+1]-1
+            c = H.rowval[j]
+            sym[j] = (0,j,0)
+            sym2[j] = (c,i)
+        end
+    end
+end
+
+function _build_condensed_aug_symbolic_jt(Jt, sym, sym2)
+
+    cnt = 0
+    for i in 1:size(Jt,2)
+        for j in Jt.colptr[i]:Jt.colptr[i+1]-1
+            for k in j:Jt.colptr[i+1]-1
+                c1 = Jt.rowval[j]
+                c2 = Jt.rowval[k]
+                sym[cnt+=1] = (i,j,k)
+                sym2[cnt] = (c2,c1)
+            end
+        end
+    end
+end
+
+function getptr(array; by = (x,y)->x != y)
+    bitarray = similar(array, Bool, length(array)+1)
+    fill!(bitarray, true)
+    bitarray[2:end-1] .= by.(@view(array[1:end-1]),  @view(array[2:end]))
+    findall(bitarray)
+end
+
+nzval(H) = H.nzval
+
+@inbounds function build_condensed_aug_symbolic(H::AbstractSparseMatrix{Tv,Ti}, Jt) where {Tv, Ti}
+    nnzjtsj = _sym_length(Jt)
+
+    sym = similar(nzval(H), Tuple{Int,Int,Int},
+        size(H,2) + nnz(H) + nnzjtsj
+    )
+    sym2 = similar(nzval(H), Tuple{Int,Int},
+        size(H,2) + nnz(H) + nnzjtsj
+    )
+    dptr = similar(nzval(H), Tuple{Ti,Ti},
+        size(H,2)
+    )
+    hptr = similar(nzval(H), Tuple{Ti,Ti},
+        nnz(H)
+    )
+    jptr = similar(nzval(H), Tuple{Ti,Tuple{Ti,Ti,Ti}},
+        nnzjtsj
+    )
+    colptr = fill!(
+        similar(nzval(H), Ti, size(H,1)+1),
+        one(Tv)
+    )
+
+    n = size(H,2)
+
+    map!(
+        i->(-1,i,0),
+        @view(sym[1:n]),
+        1:size(H,2)
+    )
+    map!(
+        i->(i,i),
+        @view(sym2[1:n]),
+        1:size(H,2)
+    )
+
+    _build_condensed_aug_symbolic_hess(
+        H,
+        @view(sym[n+1:n+nnz(H)]),
+        @view(sym2[n+1:n+nnz(H)])
+    )
+    _build_condensed_aug_symbolic_jt(
+        Jt,
+        @view(sym[n+nnz(H)+1:n+nnz(H) + nnzjtsj]),
+        @view(sym2[n+nnz(H)+1:n+nnz(H)+nnzjtsj])
+    )
+
+    p = sortperm(sym2; by = ((i,j),) -> (j,i))
+    permute!(sym, p)
+    permute!(sym2, p)
+
+    by(x,y) = x != y
+
+    bitarray = similar(sym2, Bool, length(sym2))
+    fill!(bitarray, true)
+    bitarray[2:end] .= by.(@view(sym2[1:end-1]),  @view(sym2[2:end]))
+    guide = cumsum(bitarray)
+
+    b = findall(x->x[1] == -1, sym)
+    dptr = map((x,y)->(Int32(x),Int32(y[2])), @view(guide[b]), @view(sym[b]))
+
+    b = findall(x->x[1] == 0, sym)
+    hptr = map((x,y)->(Int32(x),Int32(y[2])), @view(guide[b]), @view(sym[b]))
+
+    b = findall(x->x[1] != -1 && x[1] != 0, sym)
+    jptr = map((x,y)->(Int32(x),y), @view(guide[b]), @view(sym[b]))
+
+
+    ptr = findall(bitarray)
+    rowval = map(((row,col),)->Int32(row), @view(sym2[ptr]))
+
+    by2(x,y) = x[2] != y[2]
+    bitarray[2:end] .= by2.(@view(sym2[1:end-1]),  @view(sym2[2:end]))
+    ptr2 = findall(bitarray)
+
+    first, last = _first_and_last_col(sym2,ptr2)
+
+    fill!(
+        @view(colptr[1:first]),
+        1
+    )
+
+    _set_colptr!(colptr, ptr2, sym2, guide)
+
+    fill!(
+        @view(colptr[last+1:end]),
+        length(ptr)+1
+    )
+
+    aug_com = _get_sparse_csc(
+        size(H),
+        colptr,
+        rowval,
+        similar(nzval(H), length(ptr))
+    )
+
+    return aug_com, dptr, hptr, jptr
+end
+
+function _get_sparse_csc(dims, colptr, rowval, nzval)
+    SparseMatrixCSC(
+        dims...,
+        colptr,
+        rowval,
+        nzval
+    )
+end
+
+function _first_and_last_col(sym2,ptr2)
+    first= sym2[1][2]
+    last = sym2[ptr2[end]][2]
+    return (first, last)
+end
+
+function _set_colptr!(colptr, ptr2, sym2, guide)
+    for i in @view(ptr2[2:end])
+
+        (~,prevcol) = sym2[i-1]
+        (row,col) = sym2[i]
+
+        fill!(@view(colptr[prevcol+1:col]), guide[i])
+    end
+end
+
+@inbounds function _build_condensed_aug_coord!(aug_com::SparseMatrixCSC{Tv,Ti}, pr_diag, H, Jt, diag_buffer, dptr, hptr, jptr) where {Tv, Ti}
+    fill!(aug_com.nzval, zero(Tv))
+
+    @simd for idx in eachindex(hptr)
+        i,j = hptr[idx]
+        aug_com.nzval[i] += H.nzval[j]
+    end
+
+    @simd for idx in eachindex(dptr)
+        i,j = dptr[idx]
+        aug_com.nzval[i] += pr_diag[j]
+    end
+
+    @simd for idx in eachindex(jptr)
+        (i,(j,k,l)) = jptr[idx]
+        aug_com.nzval[i] += diag_buffer[j] * Jt.nzval[k] * Jt.nzval[l]
+    end
+end
+
+function build_condensed_aug_coord!(kkt::AbstractCondensedKKTSystem{T,VT,MT}) where {T, VT, MT <: SparseMatrixCSC{T}}
+    _build_condensed_aug_coord!(
+        kkt.aug_com, kkt.pr_diag, kkt.hess_com, kkt.jt_csc, kkt.diag_buffer,
+        kkt.dptr, kkt.hptr, kkt.jptr
+    )
+end
+
+function build_kkt!(kkt::SparseCondensedKKTSystem)
+
+    n = size(kkt.hess_com, 1)
+    m = size(kkt.jt_csc, 2)
+
+
+    Σx = view(kkt.pr_diag, 1:n)
+    Σs = view(kkt.pr_diag, n+1:n+m)
+    Σd = kkt.du_diag
+
+    kkt.diag_buffer .= Σs ./ ( 1 .- Σd .* Σs)
+    build_condensed_aug_coord!(kkt)
+end
+
+get_jacobian(kkt::SparseCondensedKKTSystem) = kkt.jac
+
diff --git a/src/KKT/Sparse/unreduced.jl b/src/KKT/Sparse/unreduced.jl
new file mode 100644
index 00000000..8cd48ae1
--- /dev/null
+++ b/src/KKT/Sparse/unreduced.jl
@@ -0,0 +1,184 @@
+
+"""
+    SparseUnreducedKKTSystem{T, VT, MT, QN} <: AbstractUnreducedKKTSystem{T, VT, MT, QN}
+
+Implement the [`AbstractUnreducedKKTSystem`](@ref) in sparse COO format.
+
+"""
+struct SparseUnreducedKKTSystem{T, VT, MT, QN, LS, VI, VI32} <: AbstractUnreducedKKTSystem{T, VT, MT, QN}
+    hess::VT
+    jac_callback::VT
+    jac::VT
+    quasi_newton::QN
+    reg::VT
+    pr_diag::VT
+    du_diag::VT
+    l_diag::VT
+    u_diag::VT
+    l_lower::VT
+    u_lower::VT
+    l_lower_aug::VT
+    u_lower_aug::VT
+
+    # Augmented system
+    aug_raw::SparseMatrixCOO{T,Int32,VT, VI32}
+    aug_com::MT
+    aug_csc_map::Union{Nothing, VI}
+
+    # Hessian
+    hess_raw::SparseMatrixCOO{T,Int32,VT, VI32}
+    hess_com::MT
+    hess_csc_map::Union{Nothing, VI}
+
+    # Jacobian
+    jac_raw::SparseMatrixCOO{T,Int32,VT, VI32}
+    jac_com::MT
+    jac_csc_map::Union{Nothing, VI}
+
+    # LinearSolver
+    linear_solver::LS
+
+    # Info
+    ind_ineq::VI
+    ind_lb::VI
+    ind_ub::VI
+end
+
+function create_kkt_system(
+    ::Type{SparseUnreducedKKTSystem},
+    cb::SparseCallback{T,VT},
+    ind_cons,
+    linear_solver::Type;
+    opt_linear_solver=default_options(linear_solver),
+    hessian_approximation=ExactHessian,
+) where {T, VT}
+    ind_ineq = ind_cons.ind_ineq
+    ind_lb = ind_cons.ind_lb
+    ind_ub = ind_cons.ind_ub
+
+    n_slack = length(ind_ineq)
+    nlb = length(ind_cons.ind_lb)
+    nub = length(ind_cons.ind_ub)
+    # Deduce KKT size.
+    n = cb.nvar
+    m = cb.ncon
+
+    # Quasi-newton
+    quasi_newton = create_quasi_newton(hessian_approximation, cb, n)
+
+    # Evaluate sparsity pattern
+    jac_sparsity_I = create_array(cb, Int32, cb.nnzj)
+    jac_sparsity_J = create_array(cb, Int32, cb.nnzj)
+    _jac_sparsity_wrapper!(cb,jac_sparsity_I, jac_sparsity_J)
+
+    hess_sparsity_I = create_array(cb, Int32, cb.nnzh)
+    hess_sparsity_J = create_array(cb, Int32, cb.nnzh)
+    _hess_sparsity_wrapper!(cb,hess_sparsity_I,hess_sparsity_J)
+
+    force_lower_triangular!(hess_sparsity_I,hess_sparsity_J)
+
+    n_slack = length(ind_ineq)
+    n_jac = length(jac_sparsity_I)
+    n_hess = length(hess_sparsity_I)
+    n_tot = n + n_slack
+
+    aug_mat_length = n_tot + m + n_hess + n_jac + n_slack + 2*nlb + 2*nub
+    aug_vec_length = n_tot + m + nlb + nub
+
+    I = create_array(cb, Int32, aug_mat_length)
+    J = create_array(cb, Int32, aug_mat_length)
+    V = zeros(aug_mat_length)
+
+    offset = n_tot + n_jac + n_slack + n_hess + m
+
+    I[1:n_tot] .= 1:n_tot
+    I[n_tot+1:n_tot+n_hess] = hess_sparsity_I
+    I[n_tot+n_hess+1:n_tot+n_hess+n_jac].=(jac_sparsity_I.+n_tot)
+    I[n_tot+n_hess+n_jac+1:n_tot+n_hess+n_jac+n_slack].=(ind_ineq.+n_tot)
+    I[n_tot+n_hess+n_jac+n_slack+1:offset].=(n_tot+1:n_tot+m)
+
+    J[1:n_tot] .= 1:n_tot
+    J[n_tot+1:n_tot+n_hess] = hess_sparsity_J
+    J[n_tot+n_hess+1:n_tot+n_hess+n_jac] .= jac_sparsity_J
+    J[n_tot+n_hess+n_jac+1:n_tot+n_hess+n_jac+n_slack] .= (n+1:n+n_slack)
+    J[n_tot+n_hess+n_jac+n_slack+1:offset].=(n_tot+1:n_tot+m)
+
+    I[offset+1:offset+nlb] .= (1:nlb).+(n_tot+m)
+    I[offset+nlb+1:offset+2nlb] .= (1:nlb).+(n_tot+m)
+    I[offset+2nlb+1:offset+2nlb+nub] .= (1:nub).+(n_tot+m+nlb)
+    I[offset+2nlb+nub+1:offset+2nlb+2nub] .= (1:nub).+(n_tot+m+nlb)
+    J[offset+1:offset+nlb] .= (1:nlb).+(n_tot+m)
+    J[offset+nlb+1:offset+2nlb] .= ind_lb
+    J[offset+2nlb+1:offset+2nlb+nub] .= (1:nub).+(n_tot+m+nlb)
+    J[offset+2nlb+nub+1:offset+2nlb+2nub] .= ind_ub
+
+    pr_diag = _madnlp_unsafe_wrap(V,n_tot)
+    du_diag = _madnlp_unsafe_wrap(V,m, n_jac + n_slack+n_hess+n_tot+1)
+
+    l_diag = _madnlp_unsafe_wrap(V, nlb, offset+1)
+    u_diag = _madnlp_unsafe_wrap(V, nub, offset+2nlb+1)
+    l_lower_aug = _madnlp_unsafe_wrap(V, nlb, offset+nlb+1)
+    u_lower_aug = _madnlp_unsafe_wrap(V, nub, offset+2nlb+nub+1)
+    reg = VT(undef, n_tot)
+    l_lower = VT(undef, nlb)
+    u_lower = VT(undef, nub)
+
+    hess = _madnlp_unsafe_wrap(V, n_hess, n_tot+1)
+    jac = _madnlp_unsafe_wrap(V, n_jac + n_slack, n_hess+n_tot+1)
+    jac_callback = _madnlp_unsafe_wrap(V, n_jac, n_hess+n_tot+1)
+
+    hess_raw = SparseMatrixCOO(
+        n_tot, n_tot,
+        hess_sparsity_I,
+        hess_sparsity_J,
+        hess,
+    )
+    aug_raw = SparseMatrixCOO(aug_vec_length,aug_vec_length,I,J,V)
+    jac_raw = SparseMatrixCOO(
+        m, n_tot,
+        Int32[jac_sparsity_I; ind_ineq],
+        Int32[jac_sparsity_J; n+1:n+n_slack],
+        jac,
+    )
+
+    aug_com, aug_csc_map = coo_to_csc(aug_raw)
+    jac_com, jac_csc_map = coo_to_csc(jac_raw)
+    hess_com, hess_csc_map = coo_to_csc(hess_raw)
+
+    _linear_solver = linear_solver(aug_com; opt = opt_linear_solver)
+    return SparseUnreducedKKTSystem(
+        hess, jac_callback, jac, quasi_newton, reg, pr_diag, du_diag,
+        l_diag, u_diag, l_lower, u_lower, l_lower_aug, u_lower_aug,
+        aug_raw, aug_com, aug_csc_map,
+        hess_raw, hess_com, hess_csc_map,
+        jac_raw, jac_com, jac_csc_map,
+        _linear_solver,
+        ind_ineq, ind_lb, ind_ub,
+    )
+end
+
+function initialize!(kkt::SparseUnreducedKKTSystem{T}) where T
+    fill!(kkt.reg, one(T))
+    fill!(kkt.pr_diag, one(T))
+    fill!(kkt.du_diag, zero(T))
+    fill!(kkt.hess, zero(T))
+    fill!(kkt.l_lower, zero(T))
+    fill!(kkt.u_lower, zero(T))
+    fill!(kkt.l_diag, one(T))
+    fill!(kkt.u_diag, one(T))
+    fill!(kkt.l_lower_aug, zero(T))
+    fill!(kkt.u_lower_aug, zero(T))
+    fill!(nonzeros(kkt.hess_com), zero(T)) # so that mul! in the initial primal-dual solve has no effect
+end
+
+num_variables(kkt::SparseUnreducedKKTSystem) = length(kkt.pr_diag)
+
+function is_inertia_correct(kkt::SparseUnreducedKKTSystem, num_pos, num_zero, num_neg)
+    n, nlb, nub = num_variables(kkt), length(kkt.ind_lb), length(kkt.ind_ub)
+    return (num_zero == 0) && (num_pos == n + nlb + nub)
+end
+
+function build_kkt!(kkt::SparseUnreducedKKTSystem)
+    transfer!(kkt.aug_com, kkt.aug_raw, kkt.aug_csc_map)
+end
+
diff --git a/src/KKT/Sparse/utils.jl b/src/KKT/Sparse/utils.jl
new file mode 100644
index 00000000..94ef367d
--- /dev/null
+++ b/src/KKT/Sparse/utils.jl
@@ -0,0 +1,61 @@
+
+# Template to dispatch on sparse representation
+const AbstractSparseKKTSystem{T, VT, MT, QN} = Union{
+    SparseKKTSystem{T, VT, MT, QN},
+    SparseCondensedKKTSystem{T, VT, MT, QN},
+    SparseUnreducedKKTSystem{T, VT, MT, QN},
+}
+
+#=
+    Generic sparse methods
+=#
+function build_hessian_structure(cb::SparseCallback, ::Type{<:ExactHessian})
+    hess_I = create_array(cb, Int32, cb.nnzh)
+    hess_J = create_array(cb, Int32, cb.nnzh)
+    _hess_sparsity_wrapper!(cb,hess_I,hess_J)
+    return hess_I, hess_J
+end
+# NB. Quasi-Newton methods require only the sparsity pattern
+#     of the diagonal term to store the term ξ I.
+function build_hessian_structure(cb::SparseCallback, ::Type{<:AbstractQuasiNewton})
+    hess_I = collect(Int32, 1:cb.nvar)
+    hess_J = collect(Int32, 1:cb.nvar)
+    return hess_I, hess_J
+end
+
+function jtprod!(y::AbstractVector, kkt::AbstractSparseKKTSystem, x::AbstractVector)
+    mul!(y, kkt.jac_com', x)
+end
+
+get_jacobian(kkt::AbstractSparseKKTSystem) = kkt.jac_callback
+
+nnz_jacobian(kkt::AbstractSparseKKTSystem) = nnz(kkt.jac_raw)
+
+function compress_jacobian!(kkt::AbstractSparseKKTSystem)
+    ns = length(kkt.ind_ineq)
+    kkt.jac[end-ns+1:end] .= -1.0
+    transfer!(kkt.jac_com, kkt.jac_raw, kkt.jac_csc_map)
+end
+
+function compress_jacobian!(kkt::AbstractSparseKKTSystem{T, VT, MT}) where {T, VT, MT<:Matrix{T}}
+    ns = length(kkt.ind_ineq)
+    kkt.jac[end-ns+1:end] .= -1.0
+    copyto!(kkt.jac_com, kkt.jac_raw)
+end
+
+function compress_hessian!(kkt::AbstractSparseKKTSystem)
+    transfer!(kkt.hess_com, kkt.hess_raw, kkt.hess_csc_map)
+end
+
+function initialize!(kkt::AbstractSparseKKTSystem{T}) where T
+    fill!(kkt.reg, one(T))
+    fill!(kkt.pr_diag, one(T))
+    fill!(kkt.du_diag, zero(T))
+    fill!(kkt.hess, zero(T))
+    fill!(kkt.l_lower, zero(T))
+    fill!(kkt.u_lower, zero(T))
+    fill!(kkt.l_diag, one(T))
+    fill!(kkt.u_diag, one(T))
+    fill!(nonzeros(kkt.hess_com), zero(T)) # so that mul! in the initial primal-dual solve has no effect
+end
+
diff --git a/src/KKT/dense.jl b/src/KKT/dense.jl
deleted file mode 100644
index f7bba195..00000000
--- a/src/KKT/dense.jl
+++ /dev/null
@@ -1,378 +0,0 @@
-
-"""
-    DenseKKTSystem{T, VT, MT} <: AbstractReducedKKTSystem{T, VT, MT}
-
-Implement [`AbstractReducedKKTSystem`](@ref) with dense matrices.
-
-Requires a dense linear solver to be factorized (otherwise an error is returned).
-
-"""
-struct DenseKKTSystem{T, VT, MT, QN} <: AbstractReducedKKTSystem{T, VT, MT, QN}
-    hess::MT
-    jac::MT
-    quasi_newton::QN
-    pr_diag::VT
-    du_diag::VT
-    diag_hess::VT
-    # KKT system
-    aug_com::MT
-    # Info
-    n_ineq::Int
-    ind_ineq::Vector{Int}
-    ind_fixed::Vector{Int}
-    constraint_scaling::VT
-    # Buffers
-    etc::Dict{Symbol, Any}
-end
-
-function DenseKKTSystem{T, VT, MT, QN}(n, m, ind_ineq, ind_fixed) where {T, VT, MT, QN}
-    ns = length(ind_ineq)
-    hess = MT(undef, n, n)
-    jac = MT(undef, m, n)
-    aug_com = MT(undef, n+ns+m, n+ns+m)
-    pr_diag = VT(undef, n+ns)
-    du_diag = VT(undef, m)
-    diag_hess = VT(undef, n)
-
-    constraint_scaling = VT(undef, m)
-
-    # Init!
-    fill!(aug_com, zero(T))
-    fill!(hess,    zero(T))
-    fill!(jac,     zero(T))
-    fill!(pr_diag, zero(T))
-    fill!(du_diag, zero(T))
-    fill!(diag_hess, zero(T))
-    fill!(constraint_scaling, one(T))
-
-    quasi_newton = QN(n)
-
-    return DenseKKTSystem{T, VT, MT, QN}(
-        hess, jac, quasi_newton, pr_diag, du_diag, diag_hess, aug_com,
-        ns, ind_ineq, ind_fixed, constraint_scaling, Dict{Symbol, Any}(),
-    )
-end
-
-function DenseKKTSystem{T, VT, MT, QN}(nlp::AbstractNLPModel, info_constraints=get_index_constraints(nlp)) where {T, VT, MT, QN}
-    return DenseKKTSystem{T, VT, MT, QN}(
-        get_nvar(nlp), get_ncon(nlp), info_constraints.ind_ineq, info_constraints.ind_fixed
-    )
-end
-
-"""
-    DenseCondensedKKTSystem{T, VT, MT, QN} <: AbstractCondensedKKTSystem{T, VT, MT, QN}
-
-Implement [`AbstractCondensedKKTSystem`](@ref) with dense matrices.
-
-Requires a dense linear solver to factorize the associated KKT system (otherwise an error is returned).
-
-"""
-struct DenseCondensedKKTSystem{T, VT, MT, QN} <: AbstractCondensedKKTSystem{T, VT, MT, QN}
-    hess::MT
-    jac::MT
-    quasi_newton::QN
-    jac_ineq::MT
-    pr_diag::VT
-    du_diag::VT
-    # KKT system
-    aug_com::MT
-    # Info
-    n_eq::Int
-    ind_eq::Vector{Int}
-    ind_eq_shifted::Vector{Int}
-    n_ineq::Int
-    ind_ineq::Vector{Int}
-    ind_ineq_shifted::Vector{Int}
-    ind_fixed::Vector{Int}
-    constraint_scaling::VT
-    # Buffers
-    etc::Dict{Symbol, Any}
-end
-
-function DenseCondensedKKTSystem{T, VT, MT, QN}(nlp::AbstractNLPModel, info_constraints=get_index_constraints(nlp)) where {T, VT, MT, QN}
-    n = get_nvar(nlp)
-    m = get_ncon(nlp)
-    ns = length(info_constraints.ind_ineq)
-    n_eq = m - ns
-
-    aug_com  = MT(undef, n+m-ns, n+m-ns)
-    hess     = MT(undef, n, n)
-    jac      = MT(undef, m, n)
-    jac_ineq = MT(undef, ns, n)
-
-    pr_diag  = VT(undef, n+ns)
-    du_diag  = VT(undef, m)
-    constraint_scaling = VT(undef, m)
-
-    # Init!
-    fill!(aug_com, zero(T))
-    fill!(hess,    zero(T))
-    fill!(jac,     zero(T))
-    fill!(pr_diag, zero(T))
-    fill!(du_diag, zero(T))
-    fill!(constraint_scaling, one(T))
-
-    ind_eq = setdiff(1:m, info_constraints.ind_ineq)
-
-    # Shift indexes to avoid additional allocation in views
-    ind_eq_shifted = ind_eq .+ n .+ ns
-    ind_ineq_shifted = info_constraints.ind_ineq .+ n .+ ns
-
-    quasi_newton = QN(n)
-    return DenseCondensedKKTSystem{T, VT, MT, QN}(
-        hess, jac, quasi_newton, jac_ineq, pr_diag, du_diag, aug_com,
-        n_eq, ind_eq, ind_eq_shifted,
-        ns, info_constraints.ind_ineq, ind_ineq_shifted,
-        info_constraints.ind_fixed,
-        constraint_scaling, Dict{Symbol, Any}(),
-    )
-end
-
-# For templating
-const AbstractDenseKKTSystem{T, VT, MT, QN} = Union{
-    DenseKKTSystem{T, VT, MT, QN},
-    DenseCondensedKKTSystem{T, VT, MT, QN},
-}
-
-#=
-    Generic functions
-=#
-
-function jtprod!(y::AbstractVector, kkt::AbstractDenseKKTSystem, x::AbstractVector)
-    nx = size(kkt.hess, 1)
-    ns = kkt.n_ineq
-    yx = view(y, 1:nx)
-    ys = view(y, 1+nx:nx+ns)
-    # / x
-    mul!(yx, kkt.jac', x)
-    # / s
-    ys .= -x[kkt.ind_ineq] .* kkt.constraint_scaling[kkt.ind_ineq]
-    return
-end
-
-function set_jacobian_scaling!(kkt::AbstractDenseKKTSystem, constraint_scaling::AbstractVector)
-    copyto!(kkt.constraint_scaling, constraint_scaling)
-end
-
-function compress_jacobian!(kkt::AbstractDenseKKTSystem)
-    # Scale
-    kkt.jac .*= kkt.constraint_scaling
-    return
-end
-
-get_raw_jacobian(kkt::AbstractDenseKKTSystem) = kkt.jac
-nnz_jacobian(kkt::AbstractDenseKKTSystem) = length(kkt.jac)
-
-#=
-    DenseKKTSystem
-=#
-
-is_reduced(::DenseKKTSystem) = true
-num_variables(kkt::DenseKKTSystem) = length(kkt.pr_diag)
-
-function mul!(y::AbstractVector, kkt::DenseKKTSystem, x::AbstractVector)
-    symul!(y, kkt.aug_com, x)
-end
-function mul!(y::ReducedKKTVector, kkt::DenseKKTSystem, x::ReducedKKTVector)
-    mul!(full(y), kkt.aug_com, full(x))
-end
-
-# Special getters for Jacobian
-function get_jacobian(kkt::DenseKKTSystem)
-    n = size(kkt.hess, 1)
-    ns = length(kkt.ind_ineq)
-    return view(kkt.jac, :, 1:n)
-end
-
-function diag_add!(dest::AbstractMatrix, d1::AbstractVector, d2::AbstractVector)
-    n = length(d1)
-    @inbounds for i in 1:n
-        dest[i, i] = d1[i] + d2[i]
-    end
-end
-
-function _build_dense_kkt_system!(dest, hess, jac, pr_diag, du_diag, diag_hess, ind_ineq, con_scale, n, m, ns)
-    # Transfer Hessian
-    for i in 1:n, j in 1:i
-        if i == j
-            dest[i, i] = pr_diag[i] + diag_hess[i]
-        else
-            dest[i, j] = hess[i, j]
-            dest[j, i] = hess[j, i]
-        end
-    end
-    # Transfer slack diagonal
-    for i in 1:ns
-        dest[i+n, i+n] = pr_diag[i+n]
-    end
-    # Transfer Jacobian / variables
-    for i in 1:m, j in 1:n
-        dest[i + n + ns, j] = jac[i, j]
-        dest[j, i + n + ns] = jac[i, j]
-    end
-    # Transfer Jacobian / slacks
-    for j in 1:ns
-        is = ind_ineq[j]
-        dest[is + n + ns, j + n] = - con_scale[is]
-        dest[j + n, is + n + ns] = - con_scale[is]
-    end
-    # Transfer dual regularization
-    for i in 1:m
-        dest[i + n + ns, i + n + ns] = du_diag[i]
-    end
-end
-
-function build_kkt!(kkt::DenseKKTSystem{T, VT, MT}) where {T, VT, MT}
-    n = size(kkt.hess, 1)
-    m = size(kkt.jac, 1)
-    ns = length(kkt.ind_ineq)
-    _build_dense_kkt_system!(kkt.aug_com, kkt.hess, kkt.jac,
-                                kkt.pr_diag, kkt.du_diag, kkt.diag_hess,
-                                kkt.ind_ineq, kkt.constraint_scaling,
-                                n, m, ns)
-    treat_fixed_variable!(kkt)
-end
-
-function compress_hessian!(kkt::DenseKKTSystem)
-    # Transfer diagonal term for future regularization
-    diag!(kkt.diag_hess, kkt.hess)
-end
-
-#=
-    DenseCondensedKKTSystem
-=#
-
-is_reduced(kkt::DenseCondensedKKTSystem) = true
-num_variables(kkt::DenseCondensedKKTSystem) = size(kkt.hess, 1)
-
-function get_slack_regularization(kkt::DenseCondensedKKTSystem)
-    n, ns = num_variables(kkt), kkt.n_ineq
-    return view(kkt.pr_diag, n+1:n+ns)
-end
-get_scaling_inequalities(kkt::DenseCondensedKKTSystem) = kkt.constraint_scaling[kkt.ind_ineq]
-
-function _build_condensed_kkt_system!(
-    dest::AbstractMatrix, hess::AbstractMatrix, jac::AbstractMatrix,
-    pr_diag::AbstractVector, du_diag::AbstractVector, ind_eq::AbstractVector, n, m_eq,
-)
-    # Transfer Hessian
-    @inbounds for i in 1:n, j in 1:i
-        if i == j
-            dest[i, i] += pr_diag[i] + hess[i, i]
-        else
-            dest[i, j] += hess[i, j]
-            dest[j, i] += hess[j, i]
-        end
-    end
-    # Transfer Jacobian / variables
-    @inbounds for i in 1:m_eq, j in 1:n
-        is = ind_eq[i]
-        dest[i + n, j] = jac[is, j]
-        dest[j, i + n] = jac[is, j]
-    end
-    # Transfer dual regularization
-    @inbounds for i in 1:m_eq
-        is = ind_eq[i]
-        dest[i + n, i + n] = du_diag[is]
-    end
-end
-
-function _build_ineq_jac!(
-    dest::AbstractMatrix, jac::AbstractMatrix, pr_diag::AbstractVector,
-    ind_ineq::AbstractVector, ind_fixed::AbstractVector, con_scale::AbstractVector,
-    n, m_ineq,
-)
-    @inbounds for i in 1:m_ineq, j in 1:n
-        is = ind_ineq[i]
-        dest[i, j] = jac[is, j] * sqrt(pr_diag[n+i]) / con_scale[is]
-    end
-    # need to zero the fixed components
-    dest[:, ind_fixed] .= 0.0
-end
-
-function build_kkt!(kkt::DenseCondensedKKTSystem{T, VT, MT}) where {T, VT, MT}
-    n = size(kkt.hess, 1)
-    ns = kkt.n_ineq
-    n_eq = length(kkt.ind_eq)
-    m = size(kkt.jac, 1)
-
-    kkt.pr_diag[kkt.ind_fixed] .= 0
-    fill!(kkt.aug_com, zero(T))
-    # Build √Σₛ * J
-    _build_ineq_jac!(kkt.jac_ineq, kkt.jac, kkt.pr_diag, kkt.ind_ineq, kkt.ind_fixed, kkt.constraint_scaling, n, ns)
-
-    # Select upper-left block
-    W = if n_eq > 0
-        view(kkt.aug_com, 1:n, 1:n) # TODO: does not work on GPU
-    else
-        kkt.aug_com
-    end
-    # Build J' * Σₛ * J
-    mul!(W, kkt.jac_ineq', kkt.jac_ineq)
-
-    _build_condensed_kkt_system!(
-        kkt.aug_com, kkt.hess, kkt.jac,
-        kkt.pr_diag, kkt.du_diag,
-        kkt.ind_eq, n, kkt.n_eq,
-    )
-    treat_fixed_variable!(kkt)
-end
-
-# TODO: check how to handle inertia with the condensed form
-function is_inertia_correct(kkt::DenseCondensedKKTSystem, num_pos, num_zero, num_neg)
-    return (num_zero == 0)
-end
-
-# For inertia-free regularization
-function _mul_expanded!(y::AbstractVector, kkt::DenseCondensedKKTSystem, x::AbstractVector)
-    n = size(kkt.hess, 1)
-    ns = kkt.n_ineq
-    m = size(kkt.jac, 1)
-
-    Σx = view(kkt.pr_diag, 1:n)
-    Σs = view(kkt.pr_diag, 1+n:n+ns)
-    Σd = kkt.du_diag
-
-    # Decompose x
-    xx = view(x, 1:n)
-    xs = view(x, 1+n:n+ns)
-    xy = view(x, 1+n+ns:n+ns+m)
-
-    # Decompose y
-    yx = view(y, 1:n)
-    ys = view(y, 1+n:n+ns)
-    yy = view(y, 1+n+ns:n+ns+m)
-
-    # / x (variable)
-    yx .= Σx .* xx
-    symul!(yx, kkt.hess, xx)
-    mul!(yx, kkt.jac', xy, 1.0, 1.0)
-
-    # / s (slack)
-    ys .= Σs .* xs
-    ys .-= kkt.constraint_scaling[kkt.ind_ineq] .* xy[kkt.ind_ineq]
-
-    # / y (multiplier)
-    yy .= Σd .* xy
-    mul!(yy, kkt.jac, xx, 1.0, 1.0)
-    yy[kkt.ind_ineq] .-= kkt.constraint_scaling[kkt.ind_ineq] .* xs
-    return
-end
-
-function mul!(y::AbstractVector, kkt::DenseCondensedKKTSystem, x::AbstractVector)
-    # TODO: implement properly with AbstractKKTRHS
-    if length(y) == length(x) == size(kkt.aug_com, 1)
-        symul!(y, kkt.aug_com, x)
-    else
-        _mul_expanded!(y, kkt, x)
-    end
-end
-
-function mul!(y::ReducedKKTVector, kkt::DenseCondensedKKTSystem, x::ReducedKKTVector)
-    mul!(full(y), kkt, full(x))
-end
-
-function jprod_ineq!(y::AbstractVector, kkt::DenseCondensedKKTSystem, x::AbstractVector)
-    mul!(y, kkt.jac_ineq, x)
-end
-
diff --git a/src/KKT/rhs.jl b/src/KKT/rhs.jl
index 52aaa51c..5747ce3f 100644
--- a/src/KKT/rhs.jl
+++ b/src/KKT/rhs.jl
@@ -77,43 +77,6 @@ function axpy!(a::Number, X::AbstractKKTVector, Y::AbstractKKTVector)
     axpy!(a, full(X), full(Y))
 end
 
-#=
-    ReducedKKTVector
-=#
-
-"""
-    ReducedKKTVector{T, VT<:AbstractVector{T}} <: AbstractKKTVector{T, VT}
-
-KKT vector ``(x, s, y, z)``, associated to a [`AbstractReducedKKTSystem`](@ref).
-
-Compared to [`UnreducedKKTVector`](@ref), it does not store
-the dual values associated to the primal's lower and upper bounds.
-"""
-struct ReducedKKTVector{T, VT<:AbstractVector{T}} <: AbstractKKTVector{T, VT}
-    values::VT
-    xp::VT # unsafe view
-    xl::VT # unsafe view
-end
-
-ReducedKKTVector{T,VT}(n::Int, m::Int, nlb::Int, nub::Int) where {T, VT <: AbstractVector{T}} = ReducedKKTVector{T,VT}(n, m)
-function ReducedKKTVector{T,VT}(n::Int, m::Int) where {T, VT <: AbstractVector{T}}
-    x = VT(undef, n + m)
-    fill!(x, 0.0)
-    # Wrap directly array x to avoid dealing with views
-    xp = _madnlp_unsafe_wrap(x, n)
-    xl = _madnlp_unsafe_wrap(x, m, n+1)
-    return ReducedKKTVector{T, VT}(x, xp, xl)
-end
-function ReducedKKTVector(rhs::AbstractKKTVector)
-    return ReducedKKTVector(number_primal(rhs), number_dual(rhs))
-end
-
-full(rhs::ReducedKKTVector) = rhs.values
-primal(rhs::ReducedKKTVector) = rhs.xp
-dual(rhs::ReducedKKTVector) = rhs.xl
-primal_dual(rhs::ReducedKKTVector) = rhs.values
-
-
 #=
     UnreducedKKTVector
 =#
@@ -124,25 +87,59 @@ primal_dual(rhs::ReducedKKTVector) = rhs.values
 Full KKT vector ``(x, s, y, z, ν, w)``, associated to a [`AbstractUnreducedKKTSystem`](@ref).
 
 """
-struct UnreducedKKTVector{T, VT<:AbstractVector{T}} <: AbstractKKTVector{T, VT}
+struct UnreducedKKTVector{T, VT<:AbstractVector{T}, VI} <: AbstractKKTVector{T, VT}
     values::VT
     x::VT  # unsafe view
     xp::VT # unsafe view
+    xp_lr::SubVector{T, VT, VI}
+    xp_ur::SubVector{T, VT, VI}
     xl::VT # unsafe view
     xzl::VT # unsafe view
     xzu::VT # unsafe view
 end
 
-function UnreducedKKTVector{T, VT}(n::Int, m::Int, nlb::Int, nub::Int) where {T, VT <: AbstractVector{T}}
+function UnreducedKKTVector(
+    ::Type{VT}, n::Int, m::Int, nlb::Int, nub::Int, ind_lb, ind_ub
+) where {T, VT <: AbstractVector{T}}
     values = VT(undef,n+m+nlb+nub)
-    fill!(values, 0.0)
+    fill!(values, zero(T))
     # Wrap directly array x to avoid dealing with views
     x = _madnlp_unsafe_wrap(values, n + m) # Primal-Dual
     xp = _madnlp_unsafe_wrap(values, n) # Primal
     xl = _madnlp_unsafe_wrap(values, m, n+1) # Dual
     xzl = _madnlp_unsafe_wrap(values, nlb, n + m + 1) # Lower bound
     xzu = _madnlp_unsafe_wrap(values, nub, n + m + nlb + 1) # Upper bound
-    return UnreducedKKTVector{T, VT}(values, x, xp, xl, xzl, xzu)
+
+    xp_lr = view(xp, ind_lb)
+    xp_ur = view(xp, ind_ub)
+
+    return UnreducedKKTVector(values, x, xp, xp_lr, xp_ur, xl, xzl, xzu)
+end
+
+function UnreducedKKTVector(kkt::AbstractKKTSystem{T, VT}) where {T, VT}
+    return UnreducedKKTVector(
+        VT,
+        length(kkt.pr_diag),
+        length(kkt.du_diag),
+        length(kkt.l_diag),
+        length(kkt.u_diag),
+        kkt.ind_lb,
+        kkt.ind_ub,
+    )
+end
+
+function Base.copy(rhs::UnreducedKKTVector{T, VT}) where {T, VT}
+    new_rhs = UnreducedKKTVector(
+        VT,
+        length(rhs.xp),
+        length(rhs.xl),
+        length(rhs.xzl),
+        length(rhs.xzu),
+        rhs.xp_lr.indices[1],
+        rhs.xp_ur.indices[1],
+    )
+    copyto!(full(new_rhs), full(rhs))
+    return new_rhs
 end
 
 full(rhs::UnreducedKKTVector) = rhs.values
@@ -159,18 +156,24 @@ dual_ub(rhs::UnreducedKKTVector) = rhs.xzu
 Primal vector ``(x, s)``.
 
 """
-struct PrimalVector{T, VT<:AbstractVector{T}} <: AbstractKKTVector{T, VT}
+struct PrimalVector{T, VT<:AbstractVector{T}, VI} <: AbstractKKTVector{T, VT}
     values::VT
+    values_lr::SubVector{T, VT, VI}
+    values_ur::SubVector{T, VT, VI}
     x::VT  # unsafe view
     s::VT # unsafe view
 end
 
-function PrimalVector{T, VT}(nx::Int, ns::Int) where {T, VT <: AbstractVector{T}}
-    values = VT(undef, nx+ns) ; fill!(values, zero(T))
-    return PrimalVector{T, VT}(
-        values,
-        _madnlp_unsafe_wrap(values, nx),
-        _madnlp_unsafe_wrap(values, ns, nx+1),
+function PrimalVector(::Type{VT}, nx::Int, ns::Int, ind_lb, ind_ub) where {T, VT <: AbstractVector{T}}
+    values = VT(undef, nx+ns)
+    fill!(values, zero(T))
+    x = _madnlp_unsafe_wrap(values, nx)
+    s = _madnlp_unsafe_wrap(values, ns, nx+1)
+    values_lr = view(values, ind_lb)
+    values_ur = view(values, ind_ub)
+
+    return PrimalVector(
+        values, values_lr, values_ur, x, s,
     )
 end
 
diff --git a/src/KKT/sparse.jl b/src/KKT/sparse.jl
deleted file mode 100644
index c051d1d0..00000000
--- a/src/KKT/sparse.jl
+++ /dev/null
@@ -1,347 +0,0 @@
-
-"""
-    SparseKKTSystem{T, VT, MT, QN} <: AbstractReducedKKTSystem{T, VT, MT, QN}
-
-Implement the [`AbstractReducedKKTSystem`](@ref) in sparse COO format.
-
-"""
-struct SparseKKTSystem{T, VT, MT, QN} <: AbstractReducedKKTSystem{T, VT, MT, QN}
-    hess::VT
-    jac_callback::VT
-    jac::VT
-    quasi_newton::QN
-    pr_diag::VT
-    du_diag::VT
-    # Augmented system
-    aug_raw::SparseMatrixCOO{T,Int32,VT}
-    aug_com::MT
-    aug_csc_map::Union{Nothing, Vector{Int}}
-    # Jacobian
-    jac_raw::SparseMatrixCOO{T,Int32,VT}
-    jac_com::MT
-    jac_csc_map::Union{Nothing, Vector{Int}}
-    # Info
-    ind_ineq::Vector{Int}
-    ind_fixed::Vector{Int}
-    ind_aug_fixed::Vector{Int}
-    jacobian_scaling::VT
-end
-
-"""
-    SparseUnreducedKKTSystem{T, VT, MT, QN} <: AbstractUnreducedKKTSystem{T, VT, MT, QN}
-
-Implement the [`AbstractUnreducedKKTSystem`](@ref) in sparse COO format.
-
-"""
-struct SparseUnreducedKKTSystem{T, VT, MT, QN} <: AbstractUnreducedKKTSystem{T, VT, MT, QN}
-    hess::VT
-    jac_callback::VT
-    jac::VT
-    quasi_newton::QN
-    pr_diag::VT
-    du_diag::VT
-
-    l_diag::VT
-    u_diag::VT
-    l_lower::VT
-    u_lower::VT
-    aug_raw::SparseMatrixCOO{T,Int32,VT}
-    aug_com::MT
-    aug_csc_map::Union{Nothing, Vector{Int}}
-
-    jac_raw::SparseMatrixCOO{T,Int32,VT}
-    jac_com::MT
-    jac_csc_map::Union{Nothing, Vector{Int}}
-    ind_ineq::Vector{Int}
-    ind_fixed::Vector{Int}
-    ind_aug_fixed::Vector{Int}
-    jacobian_scaling::VT
-end
-
-# Template to dispatch on sparse representation
-const AbstractSparseKKTSystem{T, VT, MT, QN} = Union{
-    SparseKKTSystem{T, VT, MT, QN},
-    SparseUnreducedKKTSystem{T, VT, MT, QN},
-}
-
-#=
-    Generic sparse methods
-=#
-function build_hessian_structure(nlp::AbstractNLPModel, ::Type{<:ExactHessian})
-    hess_I = Vector{Int32}(undef, get_nnzh(nlp.meta))
-    hess_J = Vector{Int32}(undef, get_nnzh(nlp.meta))
-    hess_structure!(nlp,hess_I,hess_J)
-    return hess_I, hess_J
-end
-# NB. Quasi-Newton methods require only the sparsity pattern
-#     of the diagonal term to store the term ξ I.
-function build_hessian_structure(nlp::AbstractNLPModel, ::Type{<:AbstractQuasiNewton})
-    hess_I = collect(Int32, 1:get_nvar(nlp))
-    hess_J = collect(Int32, 1:get_nvar(nlp))
-    return hess_I, hess_J
-end
-
-function mul!(y::AbstractVector, kkt::AbstractSparseKKTSystem, x::AbstractVector)
-    mul!(y, Symmetric(kkt.aug_com, :L), x)
-end
-function mul!(y::AbstractKKTVector, kkt::AbstractSparseKKTSystem, x::AbstractKKTVector)
-    mul!(full(y), Symmetric(kkt.aug_com, :L), full(x))
-end
-
-function jtprod!(y::AbstractVector, kkt::AbstractSparseKKTSystem, x::AbstractVector)
-    mul!(y, kkt.jac_com', x)
-end
-
-get_jacobian(kkt::AbstractSparseKKTSystem) = kkt.jac_callback
-
-nnz_jacobian(kkt::AbstractSparseKKTSystem) = nnz(kkt.jac_raw)
-
-function compress_jacobian!(kkt::AbstractSparseKKTSystem{T, VT, MT}) where {T, VT, MT<:SparseMatrixCSC{T, Int32}}
-    ns = length(kkt.ind_ineq)
-    kkt.jac[end-ns+1:end] .= -1.0
-    kkt.jac .*= kkt.jacobian_scaling # scaling
-    transfer!(kkt.jac_com, kkt.jac_raw, kkt.jac_csc_map)
-end
-
-function compress_jacobian!(kkt::AbstractSparseKKTSystem{T, VT, MT}) where {T, VT, MT<:Matrix{T}}
-    ns = length(kkt.ind_ineq)
-    kkt.jac[end-ns+1:end] .= -1.0
-    kkt.jac .*= kkt.jacobian_scaling # scaling
-    copyto!(kkt.jac_com, kkt.jac_raw)
-end
-
-function set_jacobian_scaling!(kkt::AbstractSparseKKTSystem{T, VT, MT}, constraint_scaling::AbstractVector) where {T, VT, MT}
-    nnzJ = length(kkt.jac)::Int
-    @inbounds for i in 1:nnzJ
-        index = kkt.jac_raw.I[i]
-        kkt.jacobian_scaling[i] = constraint_scaling[index]
-    end
-end
-
-
-#=
-    SparseKKTSystem
-=#
-
-function SparseKKTSystem{T, VT, MT, QN}(
-    n::Int, m::Int, ind_ineq::Vector{Int}, ind_fixed::Vector{Int},
-    hess_sparsity_I, hess_sparsity_J,
-    jac_sparsity_I, jac_sparsity_J,
-) where {T, VT, MT, QN}
-    n_slack = length(ind_ineq)
-    n_jac = length(jac_sparsity_I)
-    n_hess = length(hess_sparsity_I)
-    n_tot = n + n_slack
-
-    aug_vec_length = n_tot+m
-    aug_mat_length = n_tot+m+n_hess+n_jac+n_slack
-
-    I = Vector{Int32}(undef, aug_mat_length)
-    J = Vector{Int32}(undef, aug_mat_length)
-    V = VT(undef, aug_mat_length)
-    fill!(V, 0.0)  # Need to initiate V to avoid NaN
-
-    offset = n_tot+n_jac+n_slack+n_hess+m
-
-    I[1:n_tot] .= 1:n_tot
-    I[n_tot+1:n_tot+n_hess] = hess_sparsity_I
-    I[n_tot+n_hess+1:n_tot+n_hess+n_jac] .= (jac_sparsity_I.+n_tot)
-    I[n_tot+n_hess+n_jac+1:n_tot+n_hess+n_jac+n_slack] .= ind_ineq .+ n_tot
-    I[n_tot+n_hess+n_jac+n_slack+1:offset] .= (n_tot+1:n_tot+m)
-
-    J[1:n_tot] .= 1:n_tot
-    J[n_tot+1:n_tot+n_hess] = hess_sparsity_J
-    J[n_tot+n_hess+1:n_tot+n_hess+n_jac] .= jac_sparsity_J
-    J[n_tot+n_hess+n_jac+1:n_tot+n_hess+n_jac+n_slack] .= (n+1:n+n_slack)
-    J[n_tot+n_hess+n_jac+n_slack+1:offset] .= (n_tot+1:n_tot+m)
-
-    pr_diag = _madnlp_unsafe_wrap(V, n_tot)
-    du_diag = _madnlp_unsafe_wrap(V, m, n_jac+n_slack+n_hess+n_tot+1)
-
-    hess = _madnlp_unsafe_wrap(V, n_hess, n_tot+1)
-    jac = _madnlp_unsafe_wrap(V, n_jac+n_slack, n_hess+n_tot+1)
-    jac_callback = _madnlp_unsafe_wrap(V, n_jac, n_hess+n_tot+1)
-
-    aug_raw = SparseMatrixCOO(aug_vec_length,aug_vec_length,I,J,V)
-    jac_raw = SparseMatrixCOO(
-        m, n_tot,
-        Int32[jac_sparsity_I; ind_ineq],
-        Int32[jac_sparsity_J; n+1:n+n_slack],
-        jac,
-    )
-
-    aug_com = MT(aug_raw)
-    jac_com = MT(jac_raw)
-
-    aug_csc_map = get_mapping(aug_com, aug_raw)
-    jac_csc_map = get_mapping(jac_com, jac_raw)
-
-    ind_aug_fixed = if isa(aug_com, SparseMatrixCSC)
-        _get_fixed_variable_index(aug_com, ind_fixed)
-    else
-        zeros(Int, 0)
-    end
-    jac_scaling = ones(T, n_jac+n_slack)
-
-    quasi_newton = QN(n)
-
-    return SparseKKTSystem{T, VT, MT, QN}(
-        hess, jac_callback, jac, quasi_newton, pr_diag, du_diag,
-        aug_raw, aug_com, aug_csc_map,
-        jac_raw, jac_com, jac_csc_map,
-        ind_ineq, ind_fixed, ind_aug_fixed, jac_scaling,
-    )
-end
-
-# Build KKT system directly from AbstractNLPModel
-function SparseKKTSystem{T, VT, MT, QN}(nlp::AbstractNLPModel, ind_cons=get_index_constraints(nlp)) where {T, VT, MT, QN}
-    n_slack = length(ind_cons.ind_ineq)
-    # Deduce KKT size.
-    n = get_nvar(nlp)
-    m = get_ncon(nlp)
-    # Evaluate sparsity pattern
-    jac_I = Vector{Int32}(undef, get_nnzj(nlp.meta))
-    jac_J = Vector{Int32}(undef, get_nnzj(nlp.meta))
-    jac_structure!(nlp,jac_I, jac_J)
-
-    hess_I, hess_J = build_hessian_structure(nlp, QN)
-
-    force_lower_triangular!(hess_I,hess_J)
-
-    return SparseKKTSystem{T, VT, MT, QN}(
-        n, m, ind_cons.ind_ineq, ind_cons.ind_fixed,
-        hess_I, hess_J, jac_I, jac_J,
-    )
-end
-
-is_reduced(::SparseKKTSystem) = true
-num_variables(kkt::SparseKKTSystem) = length(kkt.pr_diag)
-
-
-#=
-    SparseUnreducedKKTSystem
-=#
-
-function SparseUnreducedKKTSystem{T, VT, MT, QN}(
-    n::Int, m::Int, nlb::Int, nub::Int, ind_ineq, ind_fixed,
-    hess_sparsity_I, hess_sparsity_J,
-    jac_sparsity_I, jac_sparsity_J,
-    ind_lb, ind_ub,
-) where {T, VT, MT, QN}
-    n_slack = length(ind_ineq)
-    n_jac = length(jac_sparsity_I)
-    n_hess = length(hess_sparsity_I)
-    n_tot = n + n_slack
-
-    aug_mat_length = n_tot + m + n_hess + n_jac + n_slack + 2*nlb + 2*nub
-    aug_vec_length = n_tot + m + nlb + nub
-
-    I = Vector{Int32}(undef, aug_mat_length)
-    J = Vector{Int32}(undef, aug_mat_length)
-    V = zeros(aug_mat_length)
-
-    offset = n_tot + n_jac + n_slack + n_hess + m
-
-    I[1:n_tot] .= 1:n_tot
-    I[n_tot+1:n_tot+n_hess] = hess_sparsity_I
-    I[n_tot+n_hess+1:n_tot+n_hess+n_jac].=(jac_sparsity_I.+n_tot)
-    I[n_tot+n_hess+n_jac+1:n_tot+n_hess+n_jac+n_slack].=(ind_ineq.+n_tot)
-    I[n_tot+n_hess+n_jac+n_slack+1:offset].=(n_tot+1:n_tot+m)
-
-    J[1:n_tot] .= 1:n_tot
-    J[n_tot+1:n_tot+n_hess] = hess_sparsity_J
-    J[n_tot+n_hess+1:n_tot+n_hess+n_jac] .= jac_sparsity_J
-    J[n_tot+n_hess+n_jac+1:n_tot+n_hess+n_jac+n_slack] .= (n+1:n+n_slack)
-    J[n_tot+n_hess+n_jac+n_slack+1:offset].=(n_tot+1:n_tot+m)
-
-    I[offset+1:offset+nlb] .= (1:nlb).+(n_tot+m)
-    I[offset+nlb+1:offset+2nlb] .= (1:nlb).+(n_tot+m)
-    I[offset+2nlb+1:offset+2nlb+nub] .= (1:nub).+(n_tot+m+nlb)
-    I[offset+2nlb+nub+1:offset+2nlb+2nub] .= (1:nub).+(n_tot+m+nlb)
-    J[offset+1:offset+nlb] .= (1:nlb).+(n_tot+m)
-    J[offset+nlb+1:offset+2nlb] .= ind_lb
-    J[offset+2nlb+1:offset+2nlb+nub] .= (1:nub).+(n_tot+m+nlb)
-    J[offset+2nlb+nub+1:offset+2nlb+2nub] .= ind_ub
-
-    pr_diag = _madnlp_unsafe_wrap(V,n_tot)
-    du_diag = _madnlp_unsafe_wrap(V,m, n_jac + n_slack+n_hess+n_tot+1)
-
-    l_diag = _madnlp_unsafe_wrap(V, nlb, offset+1)
-    l_lower= _madnlp_unsafe_wrap(V, nlb, offset+nlb+1)
-    u_diag = _madnlp_unsafe_wrap(V, nub, offset+2nlb+1)
-    u_lower= _madnlp_unsafe_wrap(V, nub, offset+2nlb+nub+1)
-
-    hess = _madnlp_unsafe_wrap(V, n_hess, n_tot+1)
-    jac = _madnlp_unsafe_wrap(V, n_jac + n_slack, n_hess+n_tot+1)
-    jac_callback = _madnlp_unsafe_wrap(V, n_jac, n_hess+n_tot+1)
-
-    aug_raw = SparseMatrixCOO(aug_vec_length,aug_vec_length,I,J,V)
-    jac_raw = SparseMatrixCOO(
-        m, n_tot,
-        Int32[jac_sparsity_I; ind_ineq],
-        Int32[jac_sparsity_J; n+1:n+n_slack],
-        jac,
-    )
-
-    aug_com = MT(aug_raw)
-    jac_com = MT(jac_raw)
-
-    aug_csc_map = get_mapping(aug_com, aug_raw)
-    jac_csc_map = get_mapping(jac_com, jac_raw)
-
-    jac_scaling = ones(T, n_jac+n_slack)
-
-    ind_aug_fixed = if isa(aug_com, SparseMatrixCSC)
-        _get_fixed_variable_index(aug_com, ind_fixed)
-    else
-        zeros(Int, 0)
-    end
-
-    quasi_newton = QN(n)
-
-    return SparseUnreducedKKTSystem{T, VT, MT, QN}(
-        hess, jac_callback, jac, quasi_newton, pr_diag, du_diag,
-        l_diag, u_diag, l_lower, u_lower,
-        aug_raw, aug_com, aug_csc_map,
-        jac_raw, jac_com, jac_csc_map,
-        ind_ineq, ind_fixed, ind_aug_fixed, jac_scaling,
-    )
-end
-
-function SparseUnreducedKKTSystem{T, VT, MT, QN}(nlp::AbstractNLPModel, ind_cons=get_index_constraints(nlp)) where {T, VT, MT, QN}
-    n_slack = length(ind_cons.ind_ineq)
-    nlb = length(ind_cons.ind_lb)
-    nub = length(ind_cons.ind_ub)
-    # Deduce KKT size.
-    n = get_nvar(nlp)
-    m = get_ncon(nlp)
-    # Evaluate sparsity pattern
-    jac_I = Vector{Int32}(undef, get_nnzj(nlp.meta))
-    jac_J = Vector{Int32}(undef, get_nnzj(nlp.meta))
-    jac_structure!(nlp,jac_I, jac_J)
-
-    hess_I = Vector{Int32}(undef, get_nnzh(nlp.meta))
-    hess_J = Vector{Int32}(undef, get_nnzh(nlp.meta))
-    hess_structure!(nlp,hess_I,hess_J)
-
-    force_lower_triangular!(hess_I,hess_J)
-
-    return SparseUnreducedKKTSystem{T, VT, MT, QN}(
-        n, m, nlb, nub, ind_cons.ind_ineq, ind_cons.ind_fixed,
-        hess_I, hess_J, jac_I, jac_J, ind_cons.ind_lb, ind_cons.ind_ub,
-    )
-end
-
-function initialize!(kkt::SparseUnreducedKKTSystem)
-    kkt.pr_diag.=1
-    kkt.du_diag.=0
-    kkt.hess.=0
-    kkt.l_lower.=0
-    kkt.u_lower.=0
-    kkt.l_diag.=1
-    kkt.u_diag.=1
-end
-
-is_reduced(::SparseUnreducedKKTSystem) = false
-num_variables(kkt::SparseUnreducedKKTSystem) = length(kkt.pr_diag)
-
diff --git a/src/LinearSolvers/backsolve.jl b/src/LinearSolvers/backsolve.jl
index 054d4ef6..7c8414bc 100644
--- a/src/LinearSolvers/backsolve.jl
+++ b/src/LinearSolvers/backsolve.jl
@@ -1,94 +1,86 @@
-# MadNLP.jl
-# Created by Sungho Shin (sungho.shin@wisc.edu)
+@kwdef mutable struct RichardsonOptions <: AbstractOptions
+    richardson_max_iter::Int = 10
+    richardson_tol::Float64 = 1e-10
+    richardson_acceptable_tol::Float64 = 1e-5
+end
 
-struct RichardsonIterator{T, VT, KKT, LinSolver <: AbstractLinearSolver{T}} <: AbstractIterator{T}
-    linear_solver::LinSolver
+struct RichardsonIterator{T, KKT <: AbstractKKTSystem{T}} <: AbstractIterator{T}
     kkt::KKT
-    residual::VT
-    max_iter::Int
-    tol::T
-    acceptable_tol::T
+    opt::RichardsonOptions
+    cnt::MadNLPCounters
     logger::MadNLPLogger
 end
+
 function RichardsonIterator(
-    linear_solver::AbstractLinearSolver{T},
-    kkt::AbstractKKTSystem,
-    res::AbstractVector;
-    max_iter=10, tol=T(1e-10), acceptable_tol=T(1e-5), logger=MadNLPLogger(),
-) where T
+    kkt;
+    opt = RichardsonOptions(),
+    logger = MadNLPLogger(),
+    cnt = MadNLPCounters()
+)
     return RichardsonIterator(
-        linear_solver, kkt, res, max_iter, tol, acceptable_tol, logger,
+        kkt, opt, cnt, logger
     )
 end
 
-# Solve reduced KKT system. Require only the primal/dual values.
-function solve_refine!(
-    x::AbstractKKTVector{T, VT},
-    solver::RichardsonIterator{T, VT, KKT, LinSolver},
-    b::AbstractKKTVector{T, VT},
-) where {T, VT, KKT<:AbstractReducedKKTSystem, LinSolver}
-    solve_refine!(primal_dual(x), solver, primal_dual(b))
-end
-
-# Solve unreduced KKT system. Require UnreducedKKTVector as inputs.
-function solve_refine!(
-    x::UnreducedKKTVector{T, VT},
-    solver::RichardsonIterator{T, VT, KKT, LinSolver},
-    b::UnreducedKKTVector{T, VT},
-) where {T, VT, KKT<:AbstractUnreducedKKTSystem, LinSolver}
-    solve_refine!(full(x), solver, full(b))
-end
+default_options(::Type{RichardsonIterator}) = RichardsonOptions()
 
 function solve_refine!(
-    x::AbstractVector{T},
-    solver::RichardsonIterator{T},
-    b::AbstractVector{T},
-) where T
-    @debug(solver.logger, "Iterative solver initiated")
+    x::VT,
+    iterator::R,
+    b::VT,
+    w::VT
+    ) where {T, VT, R <: RichardsonIterator{T}}
+    @debug(iterator.logger, "Iterative solver initiated")
 
-    ε = solver.residual
-    norm_b = norm(b, Inf)
+    norm_b = norm(full(b), Inf)
+    residual_ratio = zero(T)
 
-    fill!(x, zero(T))
-    fill!(ε, zero(T))
+    fill!(full(x), zero(T))
 
-    ε = solver.residual
-    axpy!(-1, b, ε)
-    norm_res = norm(ε, Inf)
-    residual_ratio = norm_res / (one(T) + norm_b)
+    if norm_b == zero(T)
+        @debug(
+            iterator.logger,
+            @sprintf(
+                "Iterative solver terminated with %4i refinement steps and residual = %6.2e",
+                0, 0
+            ),
+        )
+        return true
+    end
 
+    copyto!(full(w), full(b))
     iter = 0
-    residual_ratio_old = Inf
-    noprogress = 0
 
     while true
-        mod(iter, 10)==0 &&
-            @debug(solver.logger,"iter ||res||")
-        @debug(solver.logger, @sprintf("%4i %6.2e", iter, residual_ratio))
+        solve!(iterator.kkt, w)
+        axpy!(1., full(w), full(x))
+        copyto!(full(w), full(b))
+
+        mul!(w, iterator.kkt, x, -one(T), one(T))
+
+        norm_w = norm(full(w), Inf)
+        norm_x = norm(full(x), Inf)
+        residual_ratio = norm_w / (min(norm_x, 1e6 * norm_b) + norm_b)
+
+        if mod(iter, 10)==0
+            @debug(iterator.logger,"iter ||res||")
+        end
+        @debug(iterator.logger, @sprintf("%4i %6.2e", iter, residual_ratio))
         iter += 1
-        if (iter > solver.max_iter) || (residual_ratio < solver.tol)
+
+        if (iter >= iterator.opt.richardson_max_iter) || (residual_ratio < iterator.opt.richardson_tol)
             break
         end
-
-        solve!(solver.linear_solver, ε)
-        axpy!(-1, ε, x)
-        mul!(ε, solver.kkt, x)
-        axpy!(-1, b, ε)
-        norm_res = norm(ε, Inf)
-
-        residual_ratio_old = residual_ratio
-        residual_ratio = norm_res / (one(T)+norm_b)
     end
 
-    @debug(solver.logger, @sprintf(
-        "Iterative solver terminated with %4i refinement steps and residual = %6.2e",
-        iter, residual_ratio),
+    @debug(
+        iterator.logger,
+        @sprintf(
+            "Iterative solver terminated with %4i refinement steps and residual = %6.2e",
+            iter, residual_ratio
+        ),
     )
 
-    if residual_ratio < solver.acceptable_tol
-        return :Solved
-    else
-        return :Failed
-    end
+    return residual_ratio < iterator.opt.richardson_acceptable_tol
 end
 
diff --git a/src/LinearSolvers/cholmod.jl b/src/LinearSolvers/cholmod.jl
new file mode 100644
index 00000000..89c53081
--- /dev/null
+++ b/src/LinearSolvers/cholmod.jl
@@ -0,0 +1,131 @@
+@kwdef mutable struct CHOLMODOptions <: AbstractOptions
+    cholmod_algorithm::LinearFactorization = CHOLESKY
+end
+
+mutable struct CHOLMODSolver{T} <: AbstractLinearSolver{T}
+    inner::CHOLMOD.Factor{Float64}
+    tril::SparseMatrixCSC{T,Int32}
+    full::SparseMatrixCSC{Float64,Int}
+    tril_to_full_view::SubVector{T}
+
+    p::Vector{Float64}
+    d::Vector{Float64}
+
+    opt::CHOLMODOptions
+    logger::MadNLPLogger
+end
+
+function CHOLMODSolver(
+    csc::SparseMatrixCSC{T};
+    opt=CHOLMODOptions(), logger=MadNLPLogger(),
+) where T
+    p = Vector{Float64}(undef,csc.n)
+    d = Vector{Float64}(undef,csc.n)
+    full, tril_to_full_view = get_tril_to_full(Float64, csc)
+
+    if opt.cholmod_algorithm == LDL && VERSION <= v"1.9"
+        error("[CHOLMOD] Option `cholmod_algorithm=LDL` is not supported for Julia version <= 1.9")
+    end
+
+    full = SparseMatrixCSC{Float64,Int}(
+        full.m,
+        full.n,
+        Vector{Int64}(full.colptr),
+        Vector{Int64}(full.rowval),
+        full.nzval
+    )
+    fill!(full.nzval, one(T))
+
+    A = CHOLMOD.Sparse(full)
+    inner = CHOLMOD.symbolic(A)
+
+    return CHOLMODSolver(inner, csc, full, tril_to_full_view, p, d, opt, logger)
+end
+
+function factorize!(M::CHOLMODSolver)
+    M.full.nzval .= M.tril_to_full_view
+    # We check the factorization succeeded later in the backsolve
+    if M.opt.cholmod_algorithm == LDL
+        CHOLMOD.ldlt!(M.inner, M.full; check=false)
+    elseif M.opt.cholmod_algorithm == CHOLESKY
+        CHOLMOD.cholesky!(M.inner, M.full; check=false)
+    end
+    return M
+end
+
+function solve!(M::CHOLMODSolver{T}, rhs::Vector{T}) where T
+    if issuccess(M.inner)
+        B = CHOLMOD.Dense(rhs)
+        X = CHOLMOD.solve(CHOLMOD.CHOLMOD_A, M.inner, B)
+        copyto!(rhs, X)
+    end
+    # If the factorization failed, we return the same
+    # rhs to enter into a primal-dual regularization phase.
+    return rhs
+end
+
+# Utils function to copy the diagonal entries directly from CHOLMOD's factor.
+function _get_diagonal!(F::CHOLMOD.Factor{T}, d::Vector{T}) where T
+    s = unsafe_load(CHOLMOD.typedpointer(F))
+    # Wrap in memory the factor LD stored in CHOLMOD.
+    colptr = unsafe_wrap(Array, s.p, s.n+1, own=false)
+    nz = unsafe_wrap(Array, s.nz, s.n, own=false)
+    rowval = unsafe_wrap(Array, s.i, s.nzmax, own=false)
+    nzvals = unsafe_wrap(Array, Ptr{T}(s.x), s.nzmax, own=false)
+    # Read LD factor and load diagonal entries
+    for j in 1:s.n
+        for c in colptr[j]:colptr[j]+nz[j]-1
+            i = rowval[c+1] + 1  # convert to 1-based indexing
+            if i == j
+                d[i] = nzvals[c+1]
+            end
+        end
+    end
+    return d
+end
+
+is_inertia(::CHOLMODSolver) = true
+function _inertia_cholesky(M::CHOLMODSolver)
+    n = size(M.full, 1)
+    if issuccess(M.inner)
+        return (n, 0, 0)
+    else
+        return (0, n, 0)
+    end
+end
+function _inertia_ldlt(M::CHOLMODSolver{T}) where T
+    n = size(M.full, 1)
+    if !issuccess(M.inner)
+        return (0, n, 0)
+    end
+    D = M.d
+    # Extract diagonal elements
+    _get_diagonal!(M.inner, D)
+    (pos, zero, neg) = (0, 0, 0)
+    @inbounds for i in 1:n
+        d = D[i]
+        if d > 0
+            pos += 1
+        elseif d == 0
+            zero += 1
+        else
+            neg += 1
+        end
+    end
+    @assert pos + zero + neg == n
+    return pos, zero, neg
+end
+function inertia(M::CHOLMODSolver)
+    if M.opt.cholmod_algorithm == CHOLESKY
+        return _inertia_cholesky(M)
+    elseif M.opt.cholmod_algorithm == LDL
+        return _inertia_ldlt(M)
+    end
+end
+input_type(::Type{CHOLMODSolver}) = :csc
+default_options(::Type{CHOLMODSolver}) = CHOLMODOptions()
+
+improve!(M::CHOLMODSolver) = false
+introduce(::CHOLMODSolver) = "cholmod v$(CHOLMOD.BUILD_VERSION)"
+is_supported(::Type{CHOLMODSolver},::Type{Float32}) = true
+is_supported(::Type{CHOLMODSolver},::Type{Float64}) = true
diff --git a/src/LinearSolvers/lapack.jl b/src/LinearSolvers/lapack.jl
index 68a2d10f..9c530d65 100644
--- a/src/LinearSolvers/lapack.jl
+++ b/src/LinearSolvers/lapack.jl
@@ -2,8 +2,8 @@
     lapack_algorithm::LinearFactorization = BUNCHKAUFMAN
 end
 
-mutable struct LapackCPUSolver{T} <: AbstractLinearSolver{T}
-    dense::Matrix{T}
+mutable struct LapackCPUSolver{T, MT} <: AbstractLinearSolver{T}
+    A::MT
     fact::Matrix{T}
     work::Vector{T}
     lwork::BlasInt
@@ -72,19 +72,19 @@ for (sytrf,sytrs,getrf,getrs,geqrf,ormqr,trsm,potrf,potrs,typ) in (
 end
 
 function LapackCPUSolver(
-    dense::Matrix{T};
+    A::MT;
     opt=LapackOptions(),
     logger=MadNLPLogger(),
-) where T
-    fact = copy(dense)
-
+) where {T, MT <: AbstractMatrix{T}}
+    fact = Matrix{T}(undef, size(A))
     etc = Dict{Symbol,Any}()
     work = Vector{T}(undef, 1)
-    info = 0
+    info = Ref(0)
 
-    return LapackCPUSolver{T}(dense,fact,work,-1,info,etc,opt,logger)
+    return LapackCPUSolver(A,fact,work,-1,info,etc,opt,logger)
 end
 
+
 function factorize!(M::LapackCPUSolver)
     if M.opt.lapack_algorithm == BUNCHKAUFMAN
         factorize_bunchkaufman!(M)
@@ -114,9 +114,9 @@ end
 
 function factorize_bunchkaufman!(M::LapackCPUSolver)
     size(M.fact,1) == 0 && return M
-    haskey(M.etc,:ipiv) || (M.etc[:ipiv] = Vector{BlasInt}(undef,size(M.dense,1)))
+    haskey(M.etc,:ipiv) || (M.etc[:ipiv] = Vector{BlasInt}(undef,size(M.A,1)))
     M.lwork = -1
-    M.fact .= M.dense
+    M.fact .= M.A
     sytrf('L',size(M.fact,1),M.fact,size(M.fact,2),M.etc[:ipiv],M.work,M.lwork,M.info)
     M.lwork = BlasInt(real(M.work[1]))
     length(M.work) < M.lwork && resize!(M.work,M.lwork)
@@ -131,9 +131,9 @@ end
 
 function factorize_lu!(M::LapackCPUSolver)
     size(M.fact,1) == 0 && return M
-    haskey(M.etc,:ipiv) || (M.etc[:ipiv] = Vector{BlasInt}(undef,size(M.dense,1)))
-    tril_to_full!(M.dense)
-    M.fact .= M.dense
+    haskey(M.etc,:ipiv) || (M.etc[:ipiv] = Vector{BlasInt}(undef,size(M.A,1)))
+    M.fact .= M.A
+    tril_to_full!(M.fact)
     getrf(size(M.fact,1),size(M.fact,2),M.fact,size(M.fact,2),M.etc[:ipiv],M.info)
     return M
 end
@@ -147,10 +147,10 @@ end
 
 function factorize_qr!(M::LapackCPUSolver{T}) where T
     size(M.fact,1) == 0 && return M
-    haskey(M.etc,:tau) || (M.etc[:tau] = Vector{T}(undef,size(M.dense,1)))
-    tril_to_full!(M.dense)
+    haskey(M.etc,:tau) || (M.etc[:tau] = Vector{T}(undef,size(M.A,1)))
     M.lwork = -1
-    M.fact .= M.dense
+    M.fact .= M.A
+    tril_to_full!(M.fact)
     geqrf(size(M.fact,1),size(M.fact,2),M.fact,size(M.fact,2),M.etc[:tau],M.work,M.lwork,M.info)
     M.lwork = BlasInt(real(M.work[1]))
     length(M.work) < M.lwork && resize!(M.work,M.lwork)
@@ -172,7 +172,7 @@ end
 function factorize_cholesky!(M::LapackCPUSolver)
     size(M.fact,1) == 0 && return M
     M.lwork = -1
-    M.fact .= M.dense
+    M.fact .= M.A
     potrf('L',size(M.fact,1),M.fact,size(M.fact,2),M.info)
     return M
 end
@@ -184,6 +184,7 @@ end
 
 is_inertia(M::LapackCPUSolver) =
     M.opt.lapack_algorithm == BUNCHKAUFMAN || M.opt.lapack_algorithm == CHOLESKY
+
 function inertia(M::LapackCPUSolver)
     if M.opt.lapack_algorithm == BUNCHKAUFMAN
         inertia(M.fact,M.etc[:ipiv],M.info[])
@@ -203,9 +204,10 @@ end
 
 improve!(M::LapackCPUSolver) = false
 
+input_type(::Type{LapackCPUSolver}) = :dense
+
 introduce(M::LapackCPUSolver) = "Lapack-CPU ($(M.opt.lapack_algorithm))"
 
-input_type(::Type{LapackCPUSolver}) = :dense
 default_options(::Type{LapackCPUSolver}) = LapackOptions()
 
 function num_neg_ev(n,D,ipiv)
diff --git a/src/LinearSolvers/ldl.jl b/src/LinearSolvers/ldl.jl
new file mode 100644
index 00000000..6063d473
--- /dev/null
+++ b/src/LinearSolvers/ldl.jl
@@ -0,0 +1,66 @@
+@kwdef mutable struct LDLFactorizationsOptions <: AbstractOptions
+end
+const LDLF = LDLFactorizations
+
+mutable struct LDLSolver{T} <: AbstractLinearSolver{T}
+    inner::LDLF.LDLFactorization{T, Int32}
+    tril::SparseMatrixCSC{T,Int32}
+    full::SparseMatrixCSC{T,Int32}
+    tril_to_full_view::SubVector{T}
+    opt::LDLFactorizationsOptions
+    logger::MadNLPLogger
+end
+
+function LDLSolver(
+    tril::SparseMatrixCSC{T};
+    opt=LDLFactorizationsOptions(), logger=MadNLPLogger(),
+) where T
+    # TODO: convert tril to triu, not full
+    full, tril_to_full_view = get_tril_to_full(T,tril)
+    
+    return LDLSolver(
+        LDLF.ldl(
+            full
+        ),
+        tril, full, tril_to_full_view, opt, logger
+    )
+end
+
+function factorize!(M::LDLSolver)
+    M.full.nzval .= M.tril_to_full_view
+    LDLF.ldl_factorize!(M.full, M.inner)
+    return M
+end
+
+function solve!(M::LDLSolver{T},rhs::Vector{T}) where T
+    ldiv!(M.inner, rhs)
+    # If the factorization failed, we return the same
+    # rhs to enter into a primal-dual regularization phase.
+    return rhs
+end
+
+is_inertia(::LDLSolver) = true
+function inertia(M::LDLSolver)
+    (m, n) = size(M.tril)
+    (pos, zero, neg) = (0, 0, 0)
+    D = M.inner.D
+    for i=1:n
+        d = D[i,i]
+        if d > 0
+            pos += 1
+        elseif d == 0
+            zero += 1
+        else
+            neg += 1
+        end            
+    end
+    return pos, zero, neg
+end
+input_type(::Type{LDLSolver}) = :csc
+default_options(::Type{LDLSolver}) = LDLFactorizationsOptions()
+
+function improve!(M::LDLSolver)
+    return false
+end
+introduce(::LDLSolver) = "LDLFactorizations v$(pkgversion(LDLF))"
+is_supported(::Type{LDLSolver},::Type{T}) where T <: AbstractFloat = true
diff --git a/src/LinearSolvers/linearsolvers.jl b/src/LinearSolvers/linearsolvers.jl
index f3aab734..cbad2073 100644
--- a/src/LinearSolvers/linearsolvers.jl
+++ b/src/LinearSolvers/linearsolvers.jl
@@ -103,7 +103,7 @@ end
 abstract type AbstractIterator{T} end
 
 """
-    solve_refine!(x, ::AbstractIterator, b)
+    solve_refine!(x::VT, ::AbstractIterator, b::VT, w::VT) where {VT <: AbstractKKTVector}
 
 Solve the linear system ``Ax = b`` using iterative
 refinement. The object `AbstractIterator` stores an instance
@@ -124,11 +124,13 @@ struct SolveException <: Exception end
 struct InertiaException <: Exception end
 LinearSolverException=Union{SymbolicException,FactorizationException,SolveException,InertiaException}
 
-@enum(LinearFactorization::Int,
+@enum(
+    LinearFactorization::Int,
     BUNCHKAUFMAN = 1,
     LU = 2,
     QR = 3,
     CHOLESKY = 4,
+    LDL = 5,
 )
 
 # iterative solvers
@@ -140,6 +142,6 @@ include("backsolve.jl")
 
 # dense solvers
 include("lapack.jl")
-
-# direct solvers
 include("umfpack.jl")
+include("cholmod.jl")
+include("ldl.jl")
diff --git a/src/LinearSolvers/umfpack.jl b/src/LinearSolvers/umfpack.jl
index 4fae6a18..6fb5c780 100644
--- a/src/LinearSolvers/umfpack.jl
+++ b/src/LinearSolvers/umfpack.jl
@@ -1,6 +1,3 @@
-const umfpack_default_ctrl = copy(UMFPACK.umf_ctrl)
-const umfpack_default_info = copy(UMFPACK.umf_info)
-
 @kwdef mutable struct UmfpackOptions <: AbstractOptions
     umfpack_pivtol::Float64 = 1e-4
     umfpack_pivtolmax::Float64 = 1e-1
@@ -10,104 +7,67 @@ const umfpack_default_info = copy(UMFPACK.umf_info)
 end
 
 mutable struct UmfpackSolver{T} <: AbstractLinearSolver{T}
-    inner::UMFPACK.UmfpackLU{T, Int32}
-    tril::SparseMatrixCSC{T}
-    full::SparseMatrixCSC{T}
+    inner::UMFPACK.UmfpackLU{Float64, Int32}
+    tril::SparseMatrixCSC{T,Int32}
+    full::SparseMatrixCSC{Float64,Int32}
     tril_to_full_view::SubVector{T}
 
-    p::Vector{T}
-
-    tmp::Vector{Ptr{Cvoid}}
-    ctrl::Vector{T}
-    info::Vector{T}
+    p::Vector{Float64}
+    d::Vector{Float64}
 
     opt::UmfpackOptions
     logger::MadNLPLogger
 end
 
-
-for (numeric,solve,T) in (
-    (:umfpack_di_numeric, :umfpack_di_solve, Float64),
-    (:umfpack_si_numeric, :umfpack_si_solve, Float32),
-    )
-    @eval begin
-        umfpack_numeric(
-            colptr::Vector{Int32},rowval::Vector{Int32},
-            nzval::Vector{$T},symbolic::Ptr{Nothing},
-            tmp::Vector{Ptr{Nothing}},ctrl::Vector{$T},
-            info::Vector{$T}) = ccall(
-                ($(string(numeric)),:libumfpack),
-                Int32,
-                (Ptr{Int32},Ptr{Int32},Ptr{$T},Ptr{Cvoid},Ptr{Cvoid},
-                 Ptr{$T},Ptr{$T}),
-                colptr,rowval,nzval,symbolic,tmp,ctrl,info)
-        umfpack_solve(
-            typ,colptr::Vector{Int32},rowval::Vector{Int32},
-            nzval::Vector{$T},x::Vector{$T},b::Vector{$T},
-            numeric,ctrl::Vector{$T},info::Vector{$T}) = ccall(
-                ($(string(solve)),:libumfpack),
-                Int32,
-                (Int32, Ptr{Int32}, Ptr{Int32}, Ptr{$T},Ptr{$T},
-                 Ptr{$T}, Ptr{Cvoid}, Ptr{$T},Ptr{$T}),
-                typ,colptr,rowval,nzval,x,b,numeric,ctrl,info)
-    end
-end
-
-
-
 function UmfpackSolver(
     csc::SparseMatrixCSC{T};
     opt=UmfpackOptions(), logger=MadNLPLogger(),
 ) where T
-    p = Vector{T}(undef,csc.n)
-    full,tril_to_full_view = get_tril_to_full(csc)
-
-    full.colptr.-=1; full.rowval.-=1
-
-    inner = UMFPACK.UmfpackLU(C_NULL,C_NULL,full.n,full.n,full.colptr,full.rowval,full.nzval,0)
-    UMFPACK.finalizer(UMFPACK.umfpack_free_symbolic,inner)
-    UMFPACK.umfpack_symbolic!(inner)
-    ctrl = copy(umfpack_default_ctrl)
-    info = copy(umfpack_default_info)
-    ctrl[4]=opt.umfpack_pivtol
-    ctrl[12]=opt.umfpack_sym_pivtol
-    ctrl[5]=opt.umfpack_block_size
-    ctrl[6]=opt.umfpack_strategy
-
-    tmp = Vector{Ptr{Cvoid}}(undef, 1)
-
-    return UmfpackSolver(inner,csc,full,tril_to_full_view,p,tmp,ctrl,info,opt,logger)
+    p = Vector{Float64}(undef,csc.n)
+    d = Vector{Float64}(undef,csc.n)
+    full, tril_to_full_view = get_tril_to_full(Float64,csc)
+    controls = UMFPACK.get_umfpack_control(Float64, Int)
+    # Override default controls with custom setting
+    controls[4] = opt.umfpack_pivtol
+    controls[5] = opt.umfpack_block_size
+    controls[6] = opt.umfpack_strategy
+    controls[12] = opt.umfpack_sym_pivtol
+    inner = UMFPACK.UmfpackLU(full; control=controls)
+    return UmfpackSolver(inner, csc, full, tril_to_full_view, p, d, opt, logger)
 end
 
 function factorize!(M::UmfpackSolver)
-    UMFPACK.umfpack_free_numeric(M.inner)
-    M.full.nzval.=M.tril_to_full_view
-    status = umfpack_numeric(M.inner.colptr,M.inner.rowval,M.inner.nzval,M.inner.symbolic,M.tmp,M.ctrl,M.info)
-    M.inner.numeric = M.tmp[]
-
-    M.inner.status = status
+    M.full.nzval .= M.tril_to_full_view
+    # We check the factorization succeeded later in the backsolve
+    UMFPACK.lu!(M.inner, M.full; check=false)
     return M
 end
+
 function solve!(M::UmfpackSolver{T},rhs::Vector{T}) where T
-    status = umfpack_solve(1,M.inner.colptr,M.inner.rowval,M.inner.nzval,M.p,rhs,M.inner.numeric,M.ctrl,M.info)
-    rhs .= M.p
+    if UMFPACK.issuccess(M.inner)
+        M.p .= rhs
+        UMFPACK.ldiv!(M.d, M.inner, M.p)
+        rhs .= M.d
+    end
+    # If the factorization failed, we return the same
+    # rhs to enter into a primal-dual regularization phase.
     return rhs
 end
+
 is_inertia(::UmfpackSolver) = false
 inertia(M::UmfpackSolver) = throw(InertiaException())
 input_type(::Type{UmfpackSolver}) = :csc
 default_options(::Type{UmfpackSolver}) = UmfpackOptions()
 
 function improve!(M::UmfpackSolver)
-    if M.ctrl[4] == M.opt.umfpack_pivtolmax
-        @debug(M.logger,"improve quality failed.")
+    if M.inner.control[4] == M.opt.umfpack_pivtolmax
+        @debug(M.logger, "improve quality failed.")
         return false
     end
-    M.ctrl[4] = min(M.opt.umfpack_pivtolmax,M.ctrl[4]^.75)
-    @debug(M.logger,"improved quality: pivtol = $(M.ctrl[4])")
+    M.inner.control[4] = min(M.opt.umfpack_pivtolmax, M.inner.control[4]^.75)
+    @debug(M.logger, "improved quality: pivtol = $(M.inner.control[4])")
     return true
-
-    return false
 end
-introduce(::UmfpackSolver)="umfpack"
+introduce(::UmfpackSolver) = "umfpack"
+is_supported(::Type{UmfpackSolver},::Type{Float32}) = true
 is_supported(::Type{UmfpackSolver},::Type{Float64}) = true
diff --git a/src/MadNLP.jl b/src/MadNLP.jl
index ed541470..6f8e71fe 100644
--- a/src/MadNLP.jl
+++ b/src/MadNLP.jl
@@ -1,26 +1,18 @@
-# MadNLP.jl
-# Created by Sungho Shin (sungho.shin@wisc.edu)
-
 module MadNLP
 
 import Pkg.TOML: parsefile
-import MathOptInterface
-import Libdl: dlopen, dlext, RTLD_DEEPBIND, RTLD_GLOBAL
 import Printf: @sprintf
-import LinearAlgebra: BLAS, Adjoint, Symmetric, mul!, ldiv!, norm, dot, diagind, normInf, transpose!
-import LinearAlgebra: cholesky, qr, lu, cholesky!
-import LinearAlgebra.BLAS: axpy!, symv!, ger!, libblas, liblapack, BlasInt, @blasfunc
-import SparseArrays: AbstractSparseMatrix, SparseMatrixCSC, sparse, getcolptr, rowvals, nnz
+import LinearAlgebra: BLAS, Adjoint, Symmetric, mul!, ldiv!, norm, dot, diagind, normInf, transpose!, issuccess
+import LinearAlgebra: cholesky, qr, lu, cholesky!, axpy!
+import LinearAlgebra.BLAS: symv!, ger!, libblas, liblapack, BlasInt, @blasfunc
+import SparseArrays: SparseArrays, AbstractSparseMatrix, SparseMatrixCSC, sparse, getcolptr, rowvals, nnz, nonzeros
 import Base: string, show, print, size, getindex, copyto!, @kwdef
-import SuiteSparse: UMFPACK
+import SuiteSparse: UMFPACK, CHOLMOD
 import NLPModels
-import NLPModels: finalize, AbstractNLPModel, obj, grad!, cons!, jac_coord!, hess_coord!, hess_structure!, jac_structure!, NLPModelMeta, get_nvar, get_ncon, get_minimize, get_x0, get_y0, get_nnzj, get_nnzh, get_lvar, get_uvar, get_lcon, get_ucon, Counters as _Counters # get_zl,get_zu
+import NLPModels: finalize, AbstractNLPModel, obj, grad!, cons!, jac_coord!, hess_coord!, hess_structure!, jac_structure!, NLPModelMeta, get_nvar, get_ncon, get_minimize, get_x0, get_y0, get_nnzj, get_nnzh, get_lvar, get_uvar, get_lcon, get_ucon
 import SolverCore: solve!, getStatus, AbstractOptimizationSolver, AbstractExecutionStats
-
-const MOI = MathOptInterface
-const MOIU = MathOptInterface.Utilities
-
-export MadNLPSolver, MadNLPOptions, UmfpackSolver, LapackCPUSolver, madnlp, solve!
+export MadNLPSolver, MadNLPOptions, UmfpackSolver, LDLSolver, CHOLMODSolver, LapackCPUSolver, madnlp, solve!
+import LDLFactorizations
 
 # Version info
 version() = parsefile(joinpath(@__DIR__,"..","Project.toml"))["version"]
@@ -35,6 +27,6 @@ include(joinpath("KKT", "KKTsystem.jl"))
 include(joinpath("LinearSolvers","linearsolvers.jl"))
 include("options.jl")
 include(joinpath("IPM", "IPM.jl"))
-include(joinpath("Interfaces","interfaces.jl"))
+include("extension_templates.jl")
 
 end # end module
diff --git a/src/enums.jl b/src/enums.jl
index ddfb9124..afe25555 100644
--- a/src/enums.jl
+++ b/src/enums.jl
@@ -10,29 +10,6 @@
       WARN   = 5,
       ERROR  = 6)
 
-@enum(FixedVariableTreatments::Int,
-      RELAX_BOUND = 1,
-      MAKE_PARAMETER = 2)
-
-@enum(InertiaCorrectionMethod::Int,
-      INERTIA_AUTO = 1,
-      INERTIA_BASED = 2,
-      INERTIA_FREE = 3)
-
-@enum(KKTLinearSystem::Int,
-      SPARSE_KKT_SYSTEM = 1,
-      SPARSE_UNREDUCED_KKT_SYSTEM = 2,
-      DENSE_KKT_SYSTEM = 3,
-      DENSE_CONDENSED_KKT_SYSTEM = 4,
-)
-
-@enum(HessianApproximation::Int,
-      EXACT_HESSIAN = 1,
-      DENSE_BFGS = 2,
-      DENSE_DAMPED_BFGS = 3,
-      SPARSE_COMPACT_LBFGS = 4,
-)
-
 @enum(BFGSInitStrategy::Int,
       SCALAR1  = 1,
       SCALAR2  = 2,
@@ -66,23 +43,44 @@
       INVALID_NUMBER_HESSIAN_LAGRANGIAN = -11,
 )
 
-const STATUS_OUTPUT_DICT = Dict(
-    SOLVE_SUCCEEDED => "Optimal Solution Found.",
-    SOLVED_TO_ACCEPTABLE_LEVEL => "Solved To Acceptable Level.",
-    SEARCH_DIRECTION_BECOMES_TOO_SMALL => "Search Direction is becoming Too Small.",
-    DIVERGING_ITERATES => "Iterates divering; problem might be unbounded.",
-    MAXIMUM_ITERATIONS_EXCEEDED => "Maximum Number of Iterations Exceeded.",
-    MAXIMUM_WALLTIME_EXCEEDED => "Maximum wall-clock Time Exceeded.",
-    RESTORATION_FAILED => "Restoration Failed",
-    INFEASIBLE_PROBLEM_DETECTED => "Converged to a point of local infeasibility. Problem may be infeasible.",
-    INVALID_NUMBER_DETECTED => "Invalid number in NLP function or derivative detected.",
-    ERROR_IN_STEP_COMPUTATION => "Error in step computation.",
-    NOT_ENOUGH_DEGREES_OF_FREEDOM => "Problem has too few degrees of freedom.",
-    USER_REQUESTED_STOP => "Stopping optimization at current point as requested by user.",
-    INTERNAL_ERROR => "Internal Error.",
-    INVALID_NUMBER_OBJECTIVE => "Invalid number in NLP objective function detected.",
-    INVALID_NUMBER_GRADIENT => "Invalid number in NLP objective gradient detected.",
-    INVALID_NUMBER_CONSTRAINTS => "Invalid number in NLP constraint function detected.",
-    INVALID_NUMBER_JACOBIAN => "Invalid number in NLP constraint Jacobian detected.",
-    INVALID_NUMBER_HESSIAN_LAGRANGIAN => "Invalid number in NLP Hessian Lagrangian detected.",
-)
+function get_status_output(status, opt)
+    if status == SOLVE_SUCCEEDED
+        return @sprintf "Optimal Solution Found (tol = %5.1e)." opt.tol 
+    elseif status == SOLVED_TO_ACCEPTABLE_LEVEL
+        return @sprintf "Solved To Acceptable Level (tol = %5.1e)." opt.acceptable_tol
+    elseif status == SEARCH_DIRECTION_BECOMES_TOO_SMALL
+        return "Search Direction is becoming Too Small."
+    elseif status == DIVERGING_ITERATES
+        return "Iterates divering; problem might be unbounded."
+    elseif status == MAXIMUM_ITERATIONS_EXCEEDED
+        return "Maximum Number of Iterations Exceeded."
+    elseif status == MAXIMUM_WALLTIME_EXCEEDED
+        return "Maximum wall-clock Time Exceeded."
+    elseif status == RESTORATION_FAILED
+        return "Restoration Failed"
+    elseif status == INFEASIBLE_PROBLEM_DETECTED
+        return "Converged to a point of local infeasibility. Problem may be infeasible."
+    elseif status == INVALID_NUMBER_DETECTED
+        return "Invalid number in NLP function or derivative detected."
+    elseif status == ERROR_IN_STEP_COMPUTATION
+        return "Error in step computation."
+    elseif status == NOT_ENOUGH_DEGREES_OF_FREEDOM
+        return "Problem has too few degrees of freedom."
+    elseif status == USER_REQUESTED_STOP
+        return "Stopping optimization at current point as requested by user."
+    elseif status == INTERNAL_ERROR
+        return "Internal Error."
+    elseif status == INVALID_NUMBER_OBJECTIVE
+        return "Invalid number in NLP objective function detected."
+    elseif status == INVALID_NUMBER_GRADIENT
+        return "Invalid number in NLP objective gradient detected."
+    elseif status == INVALID_NUMBER_CONSTRAINTS
+        return "Invalid number in NLP constraint function detected."
+    elseif status == INVALID_NUMBER_JACOBIAN
+        return "Invalid number in NLP constraint Jacobian detected."
+    elseif INVALID_NUMBER_HESSIAN_LAGRANGIAN
+        return "Invalid number in NLP Hessian Lagrangian detected."
+    else
+        error("status code is not valid") 
+    end
+end
diff --git a/src/extension_templates.jl b/src/extension_templates.jl
new file mode 100644
index 00000000..a672c146
--- /dev/null
+++ b/src/extension_templates.jl
@@ -0,0 +1,6 @@
+"""
+    Optimizer()
+
+Create a new MadNLP optimizer.
+"""
+function Optimizer end
diff --git a/src/matrixtools.jl b/src/matrixtools.jl
index da7d9838..13ba1ebb 100644
--- a/src/matrixtools.jl
+++ b/src/matrixtools.jl
@@ -3,11 +3,11 @@
 
 abstract type AbstractSparseMatrixCOO{Tv,Ti<:Integer} <: AbstractSparseMatrix{Tv,Ti} end
 
-mutable struct SparseMatrixCOO{Tv,Ti<:Integer, VTv<:AbstractVector{Tv}} <: AbstractSparseMatrixCOO{Tv,Ti}
+mutable struct SparseMatrixCOO{Tv,Ti,VTv<:AbstractVector{Tv},VTi<:AbstractVector{Ti}} <: AbstractSparseMatrixCOO{Tv,Ti}
     m::Int
     n::Int
-    I::Vector{Ti}
-    J::Vector{Ti}
+    I::VTi
+    J::VTi
     V::VTv
 end
 size(A::SparseMatrixCOO) = (A.m,A.n)
@@ -37,12 +37,12 @@ function diag!(dest::AbstractVector{T}, src::AbstractMatrix{T}) where T
         dest[i] = src[i, i]
     end
 end
-
-function get_tril_to_full(csc::SparseMatrixCSC{Tv,Ti}) where {Tv,Ti<:Integer}
+get_tril_to_full(csc::SparseMatrixCSC{Tv,Ti}) where {Tv, Ti} = get_tril_to_full(Tv,csc)
+function get_tril_to_full(T,csc::SparseMatrixCSC{Tv,Ti}) where {Tv,Ti<:Integer}
     cscind = SparseMatrixCSC{Int,Ti}(Symmetric(
         SparseMatrixCSC{Int,Ti}(csc.m,csc.n,csc.colptr,csc.rowval,collect(1:nnz(csc))),:L))
-    return SparseMatrixCSC{Tv,Ti}(
-        csc.m,csc.n,cscind.colptr,cscind.rowval,Vector{Tv}(undef,nnz(cscind))),view(csc.nzval,cscind.nzval)
+    return SparseMatrixCSC{T,Ti}(
+        csc.m,csc.n,cscind.colptr,cscind.rowval,Vector{T}(undef,nnz(cscind))),view(csc.nzval,cscind.nzval)
 end
 function tril_to_full!(dense::Matrix{T}) where T
     for i=1:size(dense,1)
@@ -52,13 +52,23 @@ function tril_to_full!(dense::Matrix{T}) where T
     end
 end
 
-function SparseMatrixCSC{Tv, Ti}(coo::SparseMatrixCOO{Tv,Ti}) where {Tv,Ti <: Integer}
-    cscind = sparse(coo.I,coo.J,ones(Ti,nnz(coo)),coo.m,coo.n)
-    nzval = Vector{Tv}(undef,nnz(cscind))
-    fill!(nzval, zero(Tv))
-    return SparseMatrixCSC{Tv, Ti}(
+function coo_to_csc(coo) 
+    cscind = sparse(
+        coo.I,
+        coo.J,
+        fill!(similar(coo.I,nnz(coo)), 1),
+        coo.m,
+        coo.n
+    )
+    nzval = similar(coo.V, nnz(cscind))
+    fill!(nzval, 0)
+    
+    csc = SparseMatrixCSC(
         coo.m,coo.n,cscind.colptr,cscind.rowval,nzval,
     )
+    map = get_mapping(csc, coo)
+    
+    return csc, map
 end
 
 function _get_coo_to_csc(I,J,cscind,map)
@@ -78,7 +88,7 @@ function transfer!(dest::SparseMatrixCSC, src::SparseMatrixCOO, map::Vector{Int}
 end
 
 function get_mapping(dest::SparseMatrixCSC{Tv1,Ti1}, src::SparseMatrixCOO{Tv2,Ti2}) where {Tv1,Tv2,Ti1,Ti2}
-    map = Vector{Int}(undef,nnz(src))
+    map = similar(src.V, Int, nnz(src))
     dest.nzval .= 1:nnz(dest)
     _get_coo_to_csc(src.I, src.J, dest, map)
     return map
@@ -88,7 +98,7 @@ function Matrix{Tv}(coo::SparseMatrixCOO{Tv,Ti}) where {Tv,Ti<:Integer}
     return Matrix{Tv}(undef,coo.m,coo.n)
 end
 
-Base.copyto!(dense::Matrix{Tv},coo::SparseMatrixCOO{Tv,Ti}) where {Tv,Ti<:Integer} = _copyto!(dense,coo.I,coo.J,coo.V)
+Base.copyto!(dense::Matrix,coo::SparseMatrixCOO) = _copyto!(dense,coo.I,coo.J,coo.V)
 function _copyto!(dense::Matrix{Tv},I,J,V) where Tv
     fill!(dense, zero(Tv))
     for i=1:length(I)
diff --git a/src/nlpmodels.jl b/src/nlpmodels.jl
index b990a938..8336995a 100644
--- a/src/nlpmodels.jl
+++ b/src/nlpmodels.jl
@@ -1,101 +1,785 @@
 
-function _set_scaling!(con_scale::AbstractVector, jac::SparseMatrixCOO)
-    @simd for i in 1:nnz(jac)
-        row = @inbounds jac.I[i]
-        @inbounds con_scale[row] = max(con_scale[row], abs(jac.V[i]))
-    end
-end
-function _set_scaling!(con_scale::AbstractVector, jac::Matrix)
-    for row in 1:size(jac, 1)
-        for col in 1:size(jac, 2)
-            @inbounds con_scale[row] = max(con_scale[row], abs(jac[row, col]))
-        end
-    end
+"""
+    AbstractFixedVariableTreatment
+
+Abstract type to define the reformulation of the fixed variables inside MadNLP.
+"""
+abstract type AbstractFixedVariableTreatment end
+
+"""
+    MakeParameter{VT, VI} <: AbstractFixedVariableTreatment
+
+Remove the fixed variables from the optimization variables and
+define them as problem's parameters.
+"""
+struct MakeParameter{VT,VI} <: AbstractFixedVariableTreatment
+    fixed::VI
+    fixedj::VI
+    fixedh::VI
+    grad_storage::VT
 end
 
 """
-    scale_constraints!(
-        nlp::AbstractNLPModel,
-        con_scale::AbstractVector,
-        jac::AbstractMatrix;
-        max_gradient=1e-8,
-    )
+    RelaxBound <: AbstractFixedVariableTreatment
 
-Compute the scaling of the constraints associated
-to the nonlinear model `nlp`. By default, Ipopt's scaling
-is applied. The user can write its own function to scale
-appropriately any custom `AbstractNLPModel`.
+Relax the fixed variables ``x = x_{fixed}`` as bounded
+variables ``x_{fixed} - ϵ ≤ x ≤ x_{fixed} + ϵ``, with
+``ϵ`` a small-enough parameter.
+"""
+struct RelaxBound <: AbstractFixedVariableTreatment end
 
-### Notes
 
-This function assumes that the Jacobian `jac` has been evaluated
-before calling this function.
+"""
+    AbstractEqualityTreatment
 
+Abstract type to define the reformulation of the equality
+constraints inside MadNLP.
 """
-function scale_constraints!(
-    nlp::AbstractNLPModel{T},
-    con_scale::AbstractVector,
-    jac::AbstractMatrix;
-    max_gradient=1e-8,
-) where T
-    fill!(con_scale, zero(T))
-    _set_scaling!(con_scale, jac)
-    @inbounds for i in eachindex(con_scale)
-        con_scale[i] = min(one(T), max_gradient / con_scale[i])
-    end
-end
+abstract type AbstractEqualityTreatment end
 
 """
-    scale_objective(
-        nlp::AbstractNLPModel,
-        grad::AbstractVector;
-        max_gradient=1e-8,
-    )
+    EnforceEquality <: AbstractEqualityTreatment
+
+Keep the equality constraints intact.
 
-Compute the scaling of the objective associated to the
-nonlinear model `nlp`. By default, Ipopt's scaling
-is applied. The user can write its own function to scale
-appropriately the objective of any custom `AbstractNLPModel`.
+The solution returned by MadNLP will respect the equality constraints.
+"""
+struct EnforceEquality <: AbstractEqualityTreatment end
+
+"""
+    RelaxEquality <: AbstractEqualityTreatment
 
-### Notes
+Relax the equality constraints ``g(x) = 0`` with two
+inequality constraints, as ``-ϵ ≤ g(x) ≤ ϵ``. The parameter
+``ϵ`` is usually small.
 
-This function assumes that the gradient `gradient` has been evaluated
-before calling this function.
+The solution returned by MadNLP will satisfy the equality
+constraints only up to a tolerance ``ϵ``.
 
 """
-function scale_objective(
-    nlp::AbstractNLPModel{T},
-    grad::AbstractVector;
-    max_gradient=1e-8,
-) where T
-    return min(one(T), max_gradient / normInf(grad))
+struct RelaxEquality <: AbstractEqualityTreatment end
+
+
+"""
+    get_index_constraints(nlp::AbstractNLPModel)
+
+Analyze the bounds of the variables and the constraints in the `AbstractNLPModel` `nlp`.
+Return a named-tuple witht the following keys:return (
+
+* `ind_eq`: indices of equality constraints.
+* `ind_ineq`: indices of inequality constraints.
+* `ind_fixed`: indices of fixed variables.
+* `ind_lb`: indices of variables with a lower-bound.
+* `ind_ub`: indices of variables with an upper-bound.
+* `ind_llb`: indices of variables with *only* a lower-bound.
+* `ind_uub`: indices of variables with *only* an upper-bound.
+
+"""
+function get_index_constraints(
+    nlp::AbstractNLPModel; options...
+)
+    get_index_constraints(
+        get_lvar(nlp), get_uvar(nlp),
+        get_lcon(nlp), get_ucon(nlp);
+        options...
+    )
 end
 
-function get_index_constraints(nlp::AbstractNLPModel; fixed_variable_treatment=MAKE_PARAMETER)
-    ind_ineq = findall(get_lcon(nlp) .!= get_ucon(nlp))
-    xl = [get_lvar(nlp);view(get_lcon(nlp),ind_ineq)]
-    xu = [get_uvar(nlp);view(get_ucon(nlp),ind_ineq)]
-    if fixed_variable_treatment == MAKE_PARAMETER
+function get_index_constraints(
+    lvar, uvar,
+    lcon, ucon;
+    fixed_variable_treatment=MakeParameter,
+    equality_treatment=EnforceEquality,
+)
+    ncon = length(lcon)
+
+    if ncon > 0
+        if equality_treatment == EnforceEquality
+            ind_eq = findall(lcon .== ucon)
+            ind_ineq = findall(lcon .!= ucon)
+        else
+            ind_eq = similar(lvar, Int, 0)
+            ind_ineq = similar(lvar, Int, ncon) .= 1:ncon
+        end
+        xl = [lvar;view(lcon,ind_ineq)]
+        xu = [uvar;view(ucon,ind_ineq)]
+    else
+        ind_eq   = similar(lvar, Int, 0)
+        ind_ineq = similar(lvar, Int, 0)
+        xl = lvar
+        xu = uvar
+    end
+
+    if fixed_variable_treatment == MakeParameter
         ind_fixed = findall(xl .== xu)
         ind_lb = findall((xl .!= -Inf) .* (xl .!= xu))
         ind_ub = findall((xu .!=  Inf) .* (xl .!= xu))
     else
-        ind_fixed = Int[]
+        ind_fixed = similar(xl, Int, 0)
         ind_lb = findall(xl .!=-Inf)
         ind_ub = findall(xu .!= Inf)
     end
 
-    ind_llb = findall((get_lvar(nlp) .!= -Inf).*(get_uvar(nlp) .== Inf))
-    ind_uub = findall((get_lvar(nlp) .== -Inf).*(get_uvar(nlp) .!= Inf))
+    ind_llb = findall((lvar .!= -Inf).*(uvar .== Inf))
+    ind_uub = findall((lvar .== -Inf).*(uvar .!= Inf))
 
     # Return named tuple
     return (
-        ind_ineq=ind_ineq,
-        ind_fixed=ind_fixed,
-        ind_lb=ind_lb,
-        ind_ub=ind_ub,
-        ind_llb=ind_llb,
-        ind_uub=ind_uub,
+        ind_eq = ind_eq,
+        ind_ineq = ind_ineq,
+        ind_fixed = ind_fixed,
+        ind_lb = ind_lb,
+        ind_ub = ind_ub,
+        ind_llb = ind_llb,
+        ind_uub = ind_uub,
+    )
+end
+
+"""
+    AbstractCallback{T, VT}
+
+Wrap the `AbstractNLPModel` passed by the user in a form amenable to MadNLP.
+
+An `AbstractCallback` handles the scaling of the problem and the
+reformulations of the equality constraints and fixed variables.
+
+"""
+abstract type AbstractCallback{T,VT} end
+
+"""
+    create_callback(
+        ::Type{Callback},
+        nlp::AbstractNLPModel{T, VT};
+        fixed_variable_treatment=MakeParameter,
+        equality_treatment=EnforceEquality,
+    ) where {T, VT}
+
+Wrap the nonlinear program `nlp` using the callback wrapper
+with type `Callback`. The option `fixed_variable_treatment`
+decides if the fixed variables are relaxed (`RelaxBound`)
+or removed (`MakeParameter`). The option `equality_treatment`
+decides if the the equality constraints are keep as is
+(`EnforceEquality`) or relaxed (`RelaxEquality`).
+
+"""
+function create_callback end
+
+"""
+    SparseCallback{T, VT} < AbstractCallback{T, VT}
+
+Wrap an `AbstractNLPModel` using sparse structures.
+
+"""
+struct SparseCallback{
+    T,
+    VT <: AbstractVector{T},
+    VI <: AbstractVector{Int},
+    I <: AbstractNLPModel{T, VT},
+    FH <: AbstractFixedVariableTreatment,
+    EH <: AbstractEqualityTreatment,
+    } <: AbstractCallback{T, VT}
+
+    nlp::I
+    nvar::Int
+    ncon::Int
+    nnzj::Int
+    nnzh::Int
+
+    con_buffer::VT
+    jac_buffer::VT
+    grad_buffer::VT
+    hess_buffer::VT
+
+    jac_I::VI
+    jac_J::VI
+    hess_I::VI
+    hess_J::VI
+
+    obj_scale::Base.RefValue{T}
+    con_scale::VT
+    jac_scale::VT
+
+    fixed_handler::FH
+    equality_handler::EH
+end
+
+"""
+    DenseCallback{T, VT} < AbstractCallback{T, VT}
+
+Wrap an `AbstractNLPModel` using dense structures.
+
+"""
+struct DenseCallback{
+    T,
+    VT <: AbstractVector{T},
+    MT <: AbstractMatrix{T},
+    I <: AbstractNLPModel{T, VT},
+    FH <: AbstractFixedVariableTreatment,
+    EH <: AbstractEqualityTreatment,
+    } <: AbstractCallback{T, VT}
+
+    nlp::I
+    nvar::Int
+    ncon::Int
+
+    con_buffer::VT
+    jac_buffer::MT
+    grad_buffer::VT
+
+    obj_scale::Base.RefValue{T}
+    con_scale::VT
+
+    fixed_handler::FH
+    equality_handler::EH
+end
+
+
+create_array(cb::AbstractCallback, args...) = similar(get_x0(cb.nlp), args...)
+
+function set_obj_scale!(obj_scale, f::VT, max_gradient) where {T, VT <: AbstractVector{T}}
+    obj_scale[] = min(one(T), max_gradient / norm(f, Inf))
+end
+
+function set_con_scale_sparse!(con_scale::VT, jac_I,jac_buffer, max_gradient) where {T, VT <: AbstractVector{T}}
+    fill!(con_scale, one(T))
+    _set_con_scale_sparse!(con_scale, jac_I, jac_buffer)
+    map!(x-> min(one(T), max_gradient / x), con_scale, con_scale)
+end
+function _set_con_scale_sparse!(con_scale, jac_I, jac_buffer)
+    @inbounds @simd for i in 1:length(jac_I)
+        row = jac_I[i]
+        con_scale[row] = max(con_scale[row], abs(jac_buffer[i]))
+    end
+end
+
+function set_jac_scale_sparse!(jac_scale::VT, con_scale, jac_I) where {T, VT <: AbstractVector{T}}
+    copyto!(jac_scale,  @view(con_scale[jac_I]))
+end
+
+function set_con_scale_dense!(con_scale::VT, jac_buffer, max_gradient) where {T, VT <: AbstractVector{T}}
+    con_scale .= min.(one(T), max_gradient ./ mapreduce(abs, max, jac_buffer, dims=2, init=one(T)))
+end
+
+
+function create_dense_fixed_handler(
+    fixed_variable_treatment::Type{MakeParameter},
+    nlp,
+)
+    lvar = get_lvar(nlp)
+    uvar = get_uvar(nlp)
+    isfixed  = (lvar .== uvar)
+    fixed  = findall(isfixed)
+    return MakeParameter(
+        fixed,
+        similar(fixed,0),
+        similar(fixed,0),
+        similar(lvar, length(fixed))
+    )
+end
+
+function create_sparse_fixed_handler(
+    fixed_variable_treatment::Type{MakeParameter},
+    nlp,
+    jac_I,
+    jac_J,
+    hess_I,
+    hess_J,
+    hess_buffer,
+)
+    lvar = get_lvar(nlp)
+    uvar = get_uvar(nlp)
+    nnzj = get_nnzj(nlp.meta)
+    nnzh = get_nnzh(nlp.meta)
+
+    isfixed  = (lvar .== uvar)
+
+    fixed  = findall(isfixed)
+    fixedj = findall(@view(isfixed[jac_J]))
+    fixedh = findall(@view(isfixed[hess_I]) .|| @view(isfixed[hess_J]))
+    nfixed = length(fixed)
+
+    nnzh = nnzh + nfixed
+    resize!(hess_I, nnzh)
+    resize!(hess_J, nnzh)
+    resize!(hess_buffer, nnzh)
+    copyto!(@view(hess_I[end-nfixed+1:end]), fixed)
+    copyto!(@view(hess_J[end-nfixed+1:end]), fixed)
+
+    fixed_handler = MakeParameter(
+        fixed,
+        fixedj,
+        fixedh,
+        similar(lvar, length(fixed))
+    )
+
+    return fixed_handler, nnzj, nnzh
+end
+
+function create_sparse_fixed_handler(
+    fixed_variable_treatment::Type{RelaxBound},
+    nlp,
+    jac_I,
+    jac_J,
+    hess_I,
+    hess_J,
+    hess_buffer,
+)
+    fixed_handler = RelaxBound()
+    return fixed_handler, get_nnzj(nlp.meta), get_nnzh(nlp.meta)
+end
+
+function create_callback(
+    ::Type{SparseCallback},
+    nlp::AbstractNLPModel{T, VT};
+    fixed_variable_treatment=MakeParameter,
+    equality_treatment=EnforceEquality,
+    ) where {T, VT}
+
+    n = get_nvar(nlp)
+    m = get_ncon(nlp)
+    nnzj = get_nnzj(nlp.meta)
+    nnzh = get_nnzh(nlp.meta)
+
+    x0   = get_x0(nlp)
+
+    con_buffer = similar(x0, m)     ; fill!(con_buffer, zero(T))
+    grad_buffer = similar(x0, n)    ; fill!(grad_buffer, zero(T))
+    jac_buffer = similar(x0, nnzj)  ; fill!(jac_buffer, zero(T))
+    hess_buffer = similar(x0, nnzh) ; fill!(hess_buffer, zero(T))
+
+    jac_I = similar(x0, Int, nnzj)
+    jac_J = similar(x0, Int, nnzj)
+    hess_I = similar(x0, Int, nnzh)
+    hess_J = similar(x0, Int, nnzh)
+
+    obj_scale = Ref(one(T))
+    con_scale = similar(jac_buffer, m)    ; fill!(con_scale, one(T))
+    jac_scale = similar(jac_buffer, nnzj) ; fill!(jac_scale, one(T))
+
+    NLPModels.jac_structure!(nlp, jac_I, jac_J)
+    if nnzh > 0
+        NLPModels.hess_structure!(nlp, hess_I, hess_J)
+    end
+
+    fixed_handler, nnzj, nnzh = create_sparse_fixed_handler(
+        fixed_variable_treatment,
+        nlp,
+        jac_I, jac_J, hess_I, hess_J,
+        hess_buffer,
+    )
+    equality_handler = equality_treatment()
+
+    return SparseCallback(
+        nlp,
+        n,m,nnzj,nnzh,
+        con_buffer,
+        jac_buffer,
+        grad_buffer,
+        hess_buffer,
+        jac_I,
+        jac_J,
+        hess_I,
+        hess_J,
+        obj_scale,
+        con_scale,
+        jac_scale,
+        fixed_handler,
+        equality_handler
     )
 end
 
+function create_callback(
+    ::Type{DenseCallback},
+    nlp::AbstractNLPModel{T, VT};
+    fixed_variable_treatment=MakeParameter,
+    equality_treatment=EnforceEquality,
+    ) where {T, VT}
+
+    n = get_nvar(nlp)
+    m = get_ncon(nlp)
+
+    x0   = similar(get_x0(nlp))
+    con_buffer = similar(x0, m) ;    fill!(con_buffer, zero(T))
+    jac_buffer = similar(x0, m, n) ; fill!(jac_buffer, zero(T))
+    grad_buffer = similar(x0, n) ;   fill!(grad_buffer, zero(T))
+    obj_scale = Ref(one(T))
+    con_scale = similar(x0, m) ; fill!(con_scale, one(T))
+
+    fixed_handler = create_dense_fixed_handler(
+        fixed_variable_treatment,
+        nlp,
+    )
+    equality_handler = equality_treatment()
+
+    return DenseCallback(
+        nlp,
+        n, m,
+        con_buffer,
+        jac_buffer,
+        grad_buffer,
+        obj_scale,
+        con_scale,
+        fixed_handler,
+        equality_handler
+    )
+end
+
+function _treat_fixed_variable_initialize!(fixed_handler::RelaxBound, x0, lvar, uvar) end
+function _treat_fixed_variable_initialize!(fixed_handler::MakeParameter, x0, lvar, uvar)
+    fixed = fixed_handler.fixed
+    copyto!(@view(x0[fixed]), @view(lvar[fixed]))
+    fill!(@view(lvar[fixed]), -Inf)
+    fill!(@view(uvar[fixed]),  Inf)
+end
+
+function _treat_equality_initialize!(equality_handler::EnforceEquality, lcon, ucon, tol) end
+function _treat_equality_initialize!(equality_handler::RelaxEquality, lcon, ucon, tol)
+    set_initial_bounds!(
+        lcon,
+        ucon,
+        tol
+    )
+end
+
+function initialize!(
+    cb::AbstractCallback,
+    x, xl, xu, y0, rhs,
+    ind_ineq;
+    tol=1e-8,
+    bound_push=1e-2,
+    bound_fac=1e-2,
+)
+
+    x0= variable(x)
+    lvar= variable(xl)
+    uvar= variable(xu)
+
+    nlp = cb.nlp
+    con_buffer =cb.con_buffer
+    grad_buffer =cb.grad_buffer
+
+
+    x0   .= get_x0(nlp)
+    y0   .= get_y0(nlp)
+    lvar .= get_lvar(nlp)
+    uvar .= get_uvar(nlp)
+    lcon = copy(get_lcon(nlp))
+    ucon = copy(get_ucon(nlp))
+
+    _treat_fixed_variable_initialize!(cb.fixed_handler, x0, lvar, uvar)
+    _treat_equality_initialize!(cb.equality_handler, lcon, ucon, tol)
+
+    set_initial_bounds!(
+        lvar,
+        uvar,
+        tol
+    )
+    initialize_variables!(
+        x0,
+        lvar,
+        uvar,
+        bound_push,
+        bound_fac
+    )
+
+    NLPModels.cons!(nlp,x0,con_buffer)
+
+    slack(xl) .= view(lcon, ind_ineq)
+    slack(xu) .= view(ucon, ind_ineq)
+    rhs .= (lcon.==ucon).*lcon
+    copyto!(slack(x), @view(con_buffer[ind_ineq]))
+
+    set_initial_bounds!(
+        slack(xl),
+        slack(xu),
+        tol
+    )
+    initialize_variables!(
+        slack(x),
+        slack(xl),
+        slack(xu),
+        bound_push,
+        bound_fac
+    )
+end
+
+function set_scaling!(
+    cb::SparseCallback,
+    x, xl, xu, y0, rhs,
+    ind_ineq,
+    nlp_scaling_max_gradient
+    )
+
+    x0= variable(x)
+
+    nlp = cb.nlp
+    obj_scale = cb.obj_scale
+    con_scale = cb.con_scale
+    jac_scale = cb.jac_scale
+    con_buffer =cb.con_buffer
+    jac_buffer =cb.jac_buffer
+    grad_buffer =cb.grad_buffer
+
+    # Set scaling
+    NLPModels.jac_coord!(nlp,x0,jac_buffer)
+    set_con_scale_sparse!(con_scale, cb.jac_I, jac_buffer, nlp_scaling_max_gradient)
+    set_jac_scale_sparse!(jac_scale, con_scale, cb.jac_I)
+
+    NLPModels.grad!(nlp,x0,grad_buffer)
+    set_obj_scale!(obj_scale, grad_buffer, nlp_scaling_max_gradient)
+
+    con_scale_slk = @view(con_scale[ind_ineq])
+    y0  ./= con_scale
+    rhs .*= con_scale
+    slack(x) .*= con_scale_slk
+    slack(xl) .*= con_scale_slk
+    slack(xu) .*= con_scale_slk
+    return
+end
+
+function set_scaling!(
+    cb::DenseCallback,
+    x, xl, xu, y0, rhs,
+    ind_ineq,
+    nlp_scaling_max_gradient
+    )
+
+    x0 = variable(x)
+
+    nlp = cb.nlp
+    obj_scale = cb.obj_scale
+    con_scale = cb.con_scale
+    con_buffer =cb.con_buffer
+    jac_buffer =cb.jac_buffer
+    grad_buffer =cb.grad_buffer
+
+    # Set scaling
+    jac_dense!(nlp,x0,jac_buffer)
+    set_con_scale_dense!(con_scale, jac_buffer, nlp_scaling_max_gradient)
+
+    NLPModels.grad!(nlp,x0,grad_buffer)
+    set_obj_scale!(obj_scale, grad_buffer, nlp_scaling_max_gradient)
+
+    con_scale_slk = @view(con_scale[ind_ineq])
+    y0  ./= con_scale
+    rhs .*= con_scale
+    slack(x) .*= con_scale_slk
+    slack(xl) .*= con_scale_slk
+    slack(xu) .*= con_scale_slk
+    return
+end
+
+function _jac_sparsity_wrapper!(
+    cb::SparseCallback,
+    I::AbstractVector,J::AbstractVector
+    )
+
+    copyto!(I, cb.jac_I)
+    copyto!(J, cb.jac_J)
+    return
+end
+
+function _hess_sparsity_wrapper!(
+    cb::SparseCallback,
+    I::AbstractVector,J::AbstractVector
+    )
+    copyto!(I, cb.hess_I)
+    copyto!(J, cb.hess_J)
+    return
+end
+
+
+function _eval_cons_wrapper!(cb::AbstractCallback,x::AbstractVector,c::AbstractVector)
+    NLPModels.cons!(cb.nlp, x,c)
+    c .*= cb.con_scale
+    return c
+end
+
+function _eval_jac_wrapper!(
+    cb::SparseCallback,
+    x::AbstractVector,
+    jac::AbstractVector
+    )
+
+    nnzj_orig = get_nnzj(cb.nlp.meta)
+    NLPModels.jac_coord!(cb.nlp, x, jac)
+    jac .*= cb.jac_scale
+
+    _treat_fixed_variable_jac_coord!(cb.fixed_handler, cb, x, jac)
+end
+
+function _eval_jtprod_wrapper!(
+    cb::AbstractCallback{T},
+    x::AbstractVector,
+    v::AbstractVector,
+    jvt::AbstractVector,
+    ) where T
+
+    y = cb.con_buffer
+    y .= v .* cb.con_scale
+    NLPModels.jtprod!(cb.nlp, x, y, jvt)
+    _treat_fixed_variable_grad!(cb.fixed_handler, cb, x, jvt)
+    return jvt
+end
+
+function _treat_fixed_variable_jac_coord!(fixed_handler::RelaxBound, cb, x, jac) end
+function _treat_fixed_variable_jac_coord!(fixed_handler::MakeParameter, cb::SparseCallback{T}, x, jac) where T
+    fill!(@view(jac[fixed_handler.fixedj]), zero(T))
+end
+
+function _eval_grad_f_wrapper!(
+    cb::AbstractCallback{T},
+    x::AbstractVector,
+    grad::AbstractVector
+    ) where T
+
+    NLPModels.grad!(cb.nlp, x, grad)
+    grad .*= cb.obj_scale[]
+    _treat_fixed_variable_grad!(cb.fixed_handler, cb, x, grad)
+end
+function _treat_fixed_variable_grad!(fixed_handler::RelaxBound, cb, x, grad) end
+function _treat_fixed_variable_grad!(fixed_handler::MakeParameter, cb::AbstractCallback{T}, x, grad) where T
+    fixed_handler.grad_storage .= @view(grad[fixed_handler.fixed])
+    map!(
+        (x,y)->x-y,
+        @view(grad[fixed_handler.fixed]),
+        @view(x[cb.fixed_handler.fixed]),
+        @view(get_lvar(cb.nlp)[cb.fixed_handler.fixed])
+    )
+end
+
+function _eval_f_wrapper(cb::AbstractCallback,x::AbstractVector)
+    return NLPModels.obj(cb.nlp,x)* cb.obj_scale[]
+end
+
+function _eval_lag_hess_wrapper!(
+    cb::SparseCallback{T},
+    x::AbstractVector,
+    y::AbstractVector,
+    hess::AbstractVector;
+    obj_weight = 1.0
+    ) where T
+
+    nnzh_orig = get_nnzh(cb.nlp.meta)
+
+    cb.con_buffer .= y .* cb.con_scale
+    NLPModels.hess_coord!(
+        cb.nlp, x, cb.con_buffer, view(hess, 1:nnzh_orig);
+        obj_weight=obj_weight * cb.obj_scale[]
+    )
+    _treat_fixed_variable_hess_coord!(cb.fixed_handler, cb, hess)
+end
+
+function _treat_fixed_variable_hess_coord!(fixed_handler::RelaxBound, cb, hess) end
+function _treat_fixed_variable_hess_coord!(fixed_handler::MakeParameter, cb::SparseCallback{T}, hess) where T
+    nnzh_orig = get_nnzh(cb.nlp.meta)
+    fill!(@view(hess[fixed_handler.fixedh]), zero(T))
+    fill!(@view(hess[nnzh_orig+1:end]), one(T))
+end
+
+function _eval_jac_wrapper!(
+    cb::SparseCallback{T},
+    x::AbstractVector,
+    jac::AbstractMatrix
+    ) where T
+
+    jac_buffer = cb.jac_buffer
+    _eval_jac_wrapper!(cb, x, jac_buffer)
+    fill!(jac, zero(T))
+    @inbounds @simd for k=1:length(cb.jac_I)
+        i,j = cb.jac_I[k], cb.jac_J[k]
+        jac[i,j] += jac_buffer[k]
+    end
+end
+
+function _eval_lag_hess_wrapper!(
+    cb::SparseCallback{T},
+    x::AbstractVector,
+    y::AbstractVector,
+    hess::AbstractMatrix;
+    obj_weight = one(T)
+    ) where T
+
+    hess_buffer = cb.hess_buffer
+    _eval_lag_hess_wrapper!(cb, x, y, hess_buffer; obj_weight=obj_weight * cb.obj_scale[])
+    fill!(hess, zero(T))
+    @inbounds @simd for k=1:length(cb.hess_I)
+        i,j = cb.hess_I[k], cb.hess_J[k]
+        hess[i,j] += hess_buffer[k]
+    end
+    _treat_fixed_variable_hess_dense!(cb.fixed_handler, cb, hess)
+end
+function _treat_fixed_variable_hess_dense!(fixed_handler::RelaxBound, cb, hess) end
+function _treat_fixed_variable_hess_dense!(fixed_handler::MakeParameter, cb::SparseCallback{T}, hess) where T
+    nnzh_orig = get_nnzh(cb.nlp.meta)
+
+    fixed = fixed_handler.fixed
+    _set_diag!(hess, fixed, one(T))
+end
+
+function _eval_jac_wrapper!(
+    cb::DenseCallback{T},
+    x::AbstractVector,
+    jac::AbstractMatrix
+    ) where T
+
+    jac_dense!(cb.nlp, x, jac)
+    jac .*= cb.con_scale
+    _treat_fixed_variable_jac_dense!(cb.fixed_handler, cb, jac)
+end
+function _treat_fixed_variable_jac_dense!(fixed_handler::RelaxBound, cb::DenseCallback, jac) end
+function _treat_fixed_variable_jac_dense!(fixed_handler::MakeParameter, cb::DenseCallback{T}, jac) where T
+    jac[:,fixed_handler.fixed] .= zero(T)
+end
+
+function _eval_lag_hess_wrapper!(
+    cb::DenseCallback{T},
+    x::AbstractVector,
+    y::AbstractVector,
+    hess::AbstractMatrix;
+    obj_weight = one(T)
+    ) where T
+
+    hess_dense!(
+        cb.nlp, x, y, hess;
+        obj_weight=obj_weight * cb.obj_scale[]
+    )
+
+    _treat_fixed_variable_lag_hess_dense!(cb.fixed_handler, cb, hess)
+end
+function _treat_fixed_variable_lag_hess_dense!(fixed_handler::RelaxBound, cb::DenseCallback, hess) end
+function _treat_fixed_variable_lag_hess_dense!(fixed_handler::MakeParameter, cb::DenseCallback{T}, hess) where T
+    fixed = fixed_handler.fixed
+    hess[:,fixed] .= zero(T)
+    hess[fixed,:] .= zero(T)
+    _set_diag!(hess, fixed, one(T))
+end
+
+function update_z!(cb, zl, zu, jacl)
+    _update_z!(cb.fixed_handler, zl, zu, jacl, get_minimize(cb.nlp) ? 1 : -1)
+end
+
+function _update_z!(fixed_handler::MakeParameter, zl, zu, jacl, sense)
+    zl_r = @view(zl[fixed_handler.fixed])
+    zu_r = @view(zu[fixed_handler.fixed])
+    jacl_r = @view(jacl[fixed_handler.fixed])
+    map!(
+        (x,y)->sense * max(x+y,0),
+        zl_r,
+        fixed_handler.grad_storage,
+        jacl_r
+    )
+    map!(
+        (x,y)->sense * max(-(x+y),0),
+        zu_r,
+        fixed_handler.grad_storage,
+        jacl_r,
+    )
+end
+function _update_z!(fixed_handler::RelaxBound, zl, zu, jacl, sense) end
+
+function _set_diag!(A, inds, a)
+    @inbounds @simd for i in inds
+        A[i,i] = a
+    end
+end
diff --git a/src/options.jl b/src/options.jl
index 30a0e71c..438b4207 100644
--- a/src/options.jl
+++ b/src/options.jl
@@ -2,6 +2,7 @@
 
 parse_option(::Type{Module},str::String) = eval(Symbol(str))
 
+
 function set_options!(opt::AbstractOptions, options)
     other_options = Dict{Symbol, Any}()
     for (key, val) in options
@@ -17,11 +18,16 @@ function set_options!(opt::AbstractOptions, options)
 end
 
 @kwdef mutable struct MadNLPOptions <: AbstractOptions
+    # Primary options
+    tol::Float64
+    callback::Type
+    kkt_system::Type
+    linear_solver::Type
+
     # General options
     rethrow_error::Bool = true
     disable_garbage_collector::Bool = false
     blas_num_threads::Int = 1
-    linear_solver::Type = LapackCPUSolver
     iterator::Type = RichardsonIterator
 
     # Output options
@@ -30,7 +36,6 @@ end
     file_print_level::LogLevels = INFO
 
     # Termination options
-    tol::Float64 = 1e-8
     acceptable_tol::Float64 = 1e-6
     acceptable_iter::Int = 15
     diverging_iterates_tol::Float64 = 1e20
@@ -40,21 +45,24 @@ end
 
     # NLP options
     kappa_d::Float64 = 1e-5
-    fixed_variable_treatment::FixedVariableTreatments = MAKE_PARAMETER
+    fixed_variable_treatment::Type = kkt_system <: MadNLP.SparseCondensedKKTSystem ? MadNLP.RelaxBound : MadNLP.MakeParameter 
+    equality_treatment::Type = kkt_system <: MadNLP.SparseCondensedKKTSystem ? MadNLP.RelaxEquality : MadNLP.EnforceEquality
+    boudn_relax_factor::Float64 = 1e-8
     jacobian_constant::Bool = false
     hessian_constant::Bool = false
-    kkt_system::KKTLinearSystem = SPARSE_KKT_SYSTEM
-    hessian_approximation::HessianApproximation = EXACT_HESSIAN
+    hessian_approximation::Type = ExactHessian
+    quasi_newton_options::QuasiNewtonOptions = QuasiNewtonOptions()
+    inertia_correction_method::Type = InertiaAuto
+    inertia_free_tol::Float64 = 0.
 
     # initialization options
     dual_initialized::Bool = false
-    inertia_correction_method::InertiaCorrectionMethod = INERTIA_AUTO
+    dual_initialization_method::Type = kkt_system <: MadNLP.SparseCondensedKKTSystem ? DualInitializeSetZero : DualInitializeLeastSquares
     constr_mult_init_max::Float64 = 1e3
     bound_push::Float64 = 1e-2
     bound_fac::Float64 = 1e-2
     nlp_scaling::Bool = true
     nlp_scaling_max_gradient::Float64 = 100.
-    inertia_free_tol::Float64 = 0.
 
     # Hessian Perturbation
     min_hessian_perturbation::Float64 = 1e-20
@@ -88,15 +96,49 @@ end
 
     # Barrier
     mu_init::Float64 = 1e-1
-    mu_min::Float64 = 1e-11
+    mu_min::Float64 = min(1e-4, tol ) / (barrier_tol_factor + 1) # by courtesy of Ipopt
     mu_superlinear_decrease_power::Float64 = 1.5
     tau_min::Float64 = 0.99
     mu_linear_decrease_factor::Float64 = .2
 end
 
+is_dense_callback(nlp) = hasmethod(MadNLP.jac_dense!, Tuple{typeof(nlp), AbstractVector, AbstractMatrix}) &&
+    hasmethod(MadNLP.hess_dense!, Tuple{typeof(nlp), AbstractVector, AbstractVector, AbstractMatrix})
+
+# smart option presets
+function MadNLPOptions(
+    nlp::AbstractNLPModel{T};
+    dense_callback = MadNLP.is_dense_callback(nlp),
+    callback = dense_callback ? DenseCallback : SparseCallback,
+    kkt_system = dense_callback ? DenseCondensedKKTSystem : SparseKKTSystem,
+    linear_solver = dense_callback ? LapackCPUSolver : default_sparse_solver(nlp),
+    tol = get_tolerance(T,kkt_system)
+    ) where T
+
+    return MadNLPOptions(
+        tol = tol,
+        callback = callback,
+        kkt_system = kkt_system,
+        linear_solver = linear_solver,
+    )
+end
+
+get_tolerance(::Type{T},::Type{KKT}) where {T, KKT} = 10^round(log10(eps(T))/2)
+get_tolerance(::Type{T},::Type{SparseCondensedKKTSystem}) where T = 10^(round(log10(eps(T))/4))
+
+function default_sparse_solver(nlp::AbstractNLPModel)
+    if isdefined(Main, :MadNLPHSL)
+        Main.MadNLPHSL.Ma27Solver
+    elseif isdefined(Main, :MadNLPMumps)
+        Main.MadNLPMumps.MumpsSolver
+    else
+        UmfpackSolver
+    end
+end
+
 function check_option_sanity(options)
-    is_kkt_dense = (options.kkt_system == DENSE_KKT_SYSTEM) || (options.kkt_system == DENSE_CONDENSED_KKT_SYSTEM)
-    is_hess_approx_dense = (options.hessian_approximation == DENSE_BFGS) || (options.hessian_approximation == DENSE_DAMPED_BFGS)
+    is_kkt_dense = options.kkt_system <: AbstractDenseKKTSystem
+    is_hess_approx_dense = options.hessian_approximation <: Union{BFGS, DampedBFGS}
     if input_type(options.linear_solver) == :csc && is_kkt_dense
         error("[options] Sparse Linear solver is not supported in dense mode.\n"*
               "Please use a dense linear solver or change `kkt_system` ")
@@ -114,14 +156,34 @@ function print_ignored_options(logger,option_dict)
     end
 end
 
-function load_options(; linear_solver=default_linear_solver(), options...)
+function _get_primary_options(options)
+    primary_opt = Dict{Symbol,Any}()
+    remaining_opt = Dict{Symbol,Any}()
+    for (k,v) in options
+        if k in [:tol, :linear_solver, :callback, :kkt_system]
+            primary_opt[k] = v
+        else
+            remaining_opt[k] = v
+        end
+    end
+
+    return primary_opt, remaining_opt
+end
+
+function load_options(nlp; options...)
+    
+    primary_opt, options = _get_primary_options(options)
+    
     # Initiate interior-point options
-    opt_ipm = MadNLPOptions(linear_solver=linear_solver)
+    opt_ipm = MadNLPOptions(nlp; primary_opt...)
     linear_solver_options = set_options!(opt_ipm, options)
     check_option_sanity(opt_ipm)
     # Initiate linear-solver options
     opt_linear_solver = default_options(opt_ipm.linear_solver)
-    remaining_options = set_options!(opt_linear_solver, linear_solver_options)
+    iterator_options = set_options!(opt_linear_solver, linear_solver_options)
+    # Initiate iterator options
+    opt_iterator = default_options(opt_ipm.iterator)
+    remaining_options = set_options!(opt_iterator, iterator_options)
 
     # Initiate logger
     logger = MadNLPLogger(
@@ -135,6 +197,11 @@ function load_options(; linear_solver=default_linear_solver(), options...)
     if !isempty(remaining_options)
         print_ignored_options(logger, remaining_options)
     end
-    return opt_ipm, opt_linear_solver, logger
+    return (
+        interior_point=opt_ipm,
+        linear_solver=opt_linear_solver,
+        iterative_refinement=opt_iterator,
+        logger=logger,
+    )
 end
 
diff --git a/src/quasi_newton.jl b/src/quasi_newton.jl
index 32ff9d8d..19852487 100644
--- a/src/quasi_newton.jl
+++ b/src/quasi_newton.jl
@@ -30,14 +30,34 @@ Return `true` if the update succeeded, `false` otherwise.
 """
 function update! end
 
+"""
+    init!(
+        qn::AbstractHessian{T},
+        Bk::AbstractArray{T},
+        g0::AbstractVector{T},
+        f0::T,
+    ) where T
+
+Instantiate the Hessian estimate `Bk` with the quasi-Newton algorithm `qn`.
+The function uses the initial gradient `g0` and the initial objective
+`f0` to build the initial estimate.
+
+"""
+function init! end
+
 curvature(::Val{SCALAR1}, sk, yk) = dot(yk, sk) / dot(sk, sk)
 curvature(::Val{SCALAR2}, sk, yk) = dot(yk, yk) / dot(sk, yk)
 curvature(::Val{SCALAR3}, sk, yk) = 0.5 * (curvature(Val(SCALAR1), sk, yk) + curvature(Val(SCALAR2), sk, yk))
 curvature(::Val{SCALAR4}, sk, yk) = sqrt(curvature(Val(SCALAR1), sk, yk) * curvature(Val(SCALAR2), sk, yk))
 
+@kwdef mutable struct QuasiNewtonOptions <: AbstractOptions
+    init_strategy::BFGSInitStrategy = SCALAR1
+    max_history::Int = 6
+    init_value::Float64 = 1.0
+    sigma_min::Float64 = 1e-8
+    sigma_max::Float64 = 1e+8
+end
 
-struct ExactHessian{T, VT} <: AbstractHessian{T, VT} end
-ExactHessian{T, VT}(n::Int) where {T, VT} = ExactHessian{T, VT}()
 
 """
     BFGS{T, VT} <: AbstractQuasiNewton{T, VT}
@@ -51,8 +71,9 @@ B_{k+1} = B_k - \frac{(B_k s_k)(B_k s_k)^⊤}{s_k^⊤ B_k s_k} + \frac{y_k y_k^
 The matrix is not updated if ``s_k^⊤ y_k < 10^{-8}``.
 
 """
-struct BFGS{T, VT} <: AbstractQuasiNewton{T, VT}
+struct BFGS{T, VT <: AbstractVector{T}} <: AbstractQuasiNewton{T, VT}
     init_strategy::BFGSInitStrategy
+    is_instantiated::Base.RefValue{Bool}
     sk::VT
     yk::VT
     bsk::VT
@@ -60,9 +81,16 @@ struct BFGS{T, VT} <: AbstractQuasiNewton{T, VT}
     last_x::VT
     last_jv::VT
 end
-function BFGS{T, VT}(n::Int; init_strategy=SCALAR1) where {T, VT}
-    return BFGS{T, VT}(
-        init_strategy,
+
+function create_quasi_newton(
+    ::Type{BFGS},
+    cb::AbstractCallback{T,VT},
+    n;
+    options=QuasiNewtonOptions(),
+    ) where {T,VT}
+    BFGS(
+        options.init_strategy,
+        Ref(false),
         VT(undef, n),
         VT(undef, n),
         VT(undef, n),
@@ -73,19 +101,28 @@ function BFGS{T, VT}(n::Int; init_strategy=SCALAR1) where {T, VT}
 end
 
 function update!(qn::BFGS{T, VT}, Bk::AbstractMatrix, sk::AbstractVector, yk::AbstractVector) where {T, VT}
-    if dot(sk, yk) < T(1e-8)
+    yksk = dot(sk, yk)
+    if yksk < T(1e-8)
         return false
     end
+    # Initial approximation (Nocedal & Wright, p.143)
+    if !qn.is_instantiated[]
+        sksk = dot(sk, sk)
+        Bk[diagind(Bk)] .= yksk ./ sksk
+        qn.is_instantiated[] = true
+    end
+    # BFGS update
     mul!(qn.bsk, Bk, sk)
     alpha1 = one(T) / dot(sk, qn.bsk)
-    alpha2 = one(T) / dot(yk, sk)
+    alpha2 = one(T) / yksk
     _ger!(-alpha1, qn.bsk, qn.bsk, Bk)  # Bk = Bk - alpha1 * bsk * bsk'
     _ger!(alpha2, yk, yk, Bk)           # Bk = Bk + alpha2 * yk * yk'
     return true
 end
 
-struct DampedBFGS{T, VT} <: AbstractQuasiNewton{T, VT}
+struct DampedBFGS{T, VT <: AbstractVector{T}} <: AbstractQuasiNewton{T, VT}
     init_strategy::BFGSInitStrategy
+    is_instantiated::Base.RefValue{Bool}
     sk::VT
     yk::VT
     bsk::VT
@@ -94,9 +131,16 @@ struct DampedBFGS{T, VT} <: AbstractQuasiNewton{T, VT}
     last_x::VT
     last_jv::VT
 end
-function DampedBFGS{T, VT}(n::Int; init_strategy=SCALAR1) where {T, VT}
-    return DampedBFGS{T, VT}(
-        init_strategy,
+
+function create_quasi_newton(
+    ::Type{DampedBFGS},
+    cb::AbstractCallback{T,VT},
+    n;
+    options=QuasiNewtonOptions(),
+    ) where {T,VT}
+    return DampedBFGS(
+        options.init_strategy,
+        Ref(false),
         VT(undef, n),
         VT(undef, n),
         VT(undef, n),
@@ -108,12 +152,20 @@ function DampedBFGS{T, VT}(n::Int; init_strategy=SCALAR1) where {T, VT}
 end
 
 function update!(qn::DampedBFGS{T, VT}, Bk::AbstractMatrix, sk::AbstractVector, yk::AbstractVector) where {T, VT}
+    yksk = dot(sk, yk)
+    # Initial approximation (Nocedal & Wright, p.143)
+    if !qn.is_instantiated[]
+        sksk = dot(sk, sk)
+        Bk[diagind(Bk)] .= yksk ./ sksk
+        qn.is_instantiated[] = true
+    end
+
     mul!(qn.bsk, Bk, sk)
     sBs = dot(sk, qn.bsk)
 
     # Procedure 18.2 (Nocedal & Wright, page 537)
     theta = if dot(sk, yk) < T(0.2) * sBs
-        T(0.8) * sBs / (sBs - dot(sk, yk))
+        T(0.8) * sBs / (sBs - yksk)
     else
         one(T)
     end
@@ -130,11 +182,17 @@ function update!(qn::DampedBFGS{T, VT}, Bk::AbstractMatrix, sk::AbstractVector,
     return true
 end
 
-# Initial update (Nocedal & Wright, p.143)
-function init!(qn::Union{BFGS, DampedBFGS}, Bk::AbstractMatrix, sk::AbstractVector, yk::AbstractVector)
-    yksk = dot(yk, sk)
-    sksk = dot(sk, sk)
-    Bk[diagind(Bk)] .= yksk ./ sksk
+function init!(qn::Union{BFGS, DampedBFGS}, Bk::AbstractMatrix{T}, g0::AbstractVector{T}, f0::T) where T
+    norm_g0 = dot(g0, g0)
+    # Initiate B0 with Gilbert & Lemaréchal rule.
+    rho0 = if norm_g0 < sqrt(eps(T))
+        one(T)
+    elseif f0 ≈ zero(T)
+        one(T) / norm_g0
+    else
+        abs(f0) / norm_g0
+    end
+    Bk[diagind(Bk)] .= T(2) * rho0
     return
 end
 
@@ -142,15 +200,19 @@ end
 """
     CompactLBFGS{T, VT} <: AbstractQuasiNewton
 """
-mutable struct CompactLBFGS{T, VT, MT} <: AbstractQuasiNewton{T, VT}
+mutable struct CompactLBFGS{T, VT <: AbstractVector{T}, MT <: AbstractMatrix{T}} <: AbstractQuasiNewton{T, VT}
     init_strategy::BFGSInitStrategy
     sk::VT
     yk::VT
     last_g::VT
     last_x::VT
     last_jv::VT
+    init_value::T
+    sigma_min::T
+    sigma_max::T
     max_mem::Int
     current_mem::Int
+    skipped_iter::Int
     Sk::MT       # n x p
     Yk::MT       # n x p
     Lk::MT       # p x p
@@ -167,30 +229,39 @@ mutable struct CompactLBFGS{T, VT, MT} <: AbstractQuasiNewton{T, VT}
     _w2::VT
 end
 
-function CompactLBFGS{T, VT, MT}(n::Int; max_mem=6, init_strategy=SCALAR1) where {T, VT<:AbstractVector{T}, MT<:AbstractMatrix{T}}
-    return CompactLBFGS{T, VT, MT}(
-        init_strategy,
-        zeros(T, n),
-        zeros(T, n),
-        zeros(T, n),
-        zeros(T, n),
-        zeros(T, n),
-        max_mem,
+function create_quasi_newton(
+    ::Type{CompactLBFGS},
+    cb::AbstractCallback{T,VT},
+    n;
+    options=QuasiNewtonOptions(),
+    ) where {T, VT}
+    return CompactLBFGS(
+        options.init_strategy,
+        fill!(create_array(cb, n), zero(T)),
+        fill!(create_array(cb, n), zero(T)),
+        fill!(create_array(cb, n), zero(T)),
+        fill!(create_array(cb, n), zero(T)),
+        fill!(create_array(cb, n), zero(T)),
+        options.init_value,
+        options.sigma_min,
+        options.sigma_max,
+        options.max_history,
         0,
-        zeros(T, n, 0),
-        zeros(T, n, 0),
-        zeros(T, n, 0),
-        zeros(T, 0, 0),
-        zeros(T, 0, 0),
-        zeros(T, 0, 0),
-        zeros(T, 0, 0),
-        zeros(T, 0, 0),
-        zeros(T, 0, 0),
-        zeros(T, 0, 0),
-        zeros(T, 0, 0),
-        zeros(T, 0),
-        zeros(T, 0),
-        zeros(T, 0),
+        0,
+        fill!(create_array(cb, n, 0), zero(T)),
+        fill!(create_array(cb, n, 0), zero(T)),
+        fill!(create_array(cb, n, 0), zero(T)),
+        fill!(create_array(cb, 0, 0), zero(T)),
+        fill!(create_array(cb, 0, 0), zero(T)),
+        fill!(create_array(cb, 0, 0), zero(T)),
+        fill!(create_array(cb, 0, 0), zero(T)),
+        fill!(create_array(cb, 0, 0), zero(T)),
+        fill!(create_array(cb, 0, 0), zero(T)),
+        fill!(create_array(cb, 0, 0), zero(T)),
+        fill!(create_array(cb, 0, 0), zero(T)),
+        fill!(create_array(cb, 0), zero(T)),
+        fill!(create_array(cb, 0), zero(T)),
+        fill!(create_array(cb, 0), zero(T)),
     )
 end
 
@@ -210,6 +281,17 @@ function _resize!(qn::CompactLBFGS{T, VT, MT}) where {T, VT, MT}
     return
 end
 
+function _reset!(qn::CompactLBFGS{T, VT, MT}) where {T, VT, MT}
+    n, _ = size(qn)
+    qn.current_mem = 0
+    qn.skipped_iter = 0
+    fill!(qn.last_jv, zero(T))
+    qn.Dk  = zeros(T, 0)
+    qn.Sk  = zeros(T, n, 0)
+    qn.Yk  = zeros(T, n, 0)
+    _resize!(qn)
+end
+
 # augment / shift
 function _update_SY!(qn::CompactLBFGS, s, y)
     if qn.current_mem < qn.max_mem
@@ -259,9 +341,19 @@ function _refresh_STS!(qn::CompactLBFGS{T, VT, MT}) where {T, VT, MT}
 end
 
 function update!(qn::CompactLBFGS{T, VT, MT}, Bk, sk, yk) where {T, VT, MT}
-    if dot(sk, yk) < sqrt(eps(T)) * norm(sk) * norm(yk)
+    norm_sk, norm_yk = norm(sk), norm(yk)
+    # Skip update if vectors are too small or local curvature is negative.
+    if ((norm_sk < T(100) * eps(T)) ||
+        (norm_yk < T(100) * eps(T)) ||
+        (dot(sk, yk) < sqrt(eps(T)) * norm_sk * norm_yk)
+    )
+        qn.skipped_iter += 1
+        if qn.skipped_iter >= 2
+            _reset!(qn)
+        end
         return false
     end
+
     # Refresh internal structures
     _update_SY!(qn, sk, yk)
     _refresh_D!(qn, sk, yk)
@@ -277,30 +369,44 @@ function update!(qn::CompactLBFGS{T, VT, MT}, Bk, sk, yk) where {T, VT, MT}
     #            [ U₂ ]          [  U₂ ]
 
     # Step 1: σₖ I
-    sigma = curvature(Val(qn.init_strategy), sk, yk) # σₖ
-    Bk .= sigma                                      # Hₖ .= σₖ I (diagonal Hessian approx.)
+    sigma = curvature(Val(qn.init_strategy), sk, yk)  # σₖ
+    sigma = clamp(sigma, qn.sigma_min, qn.sigma_max)
+    Bk .= sigma                                       # Hₖ .= σₖ I (diagonal Hessian approx.)
 
     # Step 2: Mₖ = σₖ Sₖᵀ Sₖ + Lₖ Dₖ⁻¹ Lₖᵀ
-    qn.DkLk .= (one(T) ./ qn.Dk) .* qn.Lk'           # DₖLₖ = Dₖ⁻¹ Lₖᵀ
-    qn.Mk .= qn.SdotS                                # Mₖ = Sₖᵀ Sₖ
-    mul!(qn.Mk, qn.Lk, qn.DkLk, one(T), sigma)       # Mₖ = σₖ Sₖᵀ Sₖ + Lₖ Dₖ⁻¹ Lₖᵀ
+    qn.DkLk .= (one(T) ./ qn.Dk) .* qn.Lk'            # DₖLₖ = Dₖ⁻¹ Lₖᵀ
+    qn.Mk .= qn.SdotS                                 # Mₖ = Sₖᵀ Sₖ
+    mul!(qn.Mk, qn.Lk, qn.DkLk, one(T), sigma)        # Mₖ = σₖ Sₖᵀ Sₖ + Lₖ Dₖ⁻¹ Lₖᵀ
     symmetrize!(qn.Mk)
 
     copyto!(qn.Jk, qn.Mk)
-    cholesky!(qn.Jk)                                 # Mₖ = Jₖᵀ Jₖ (factorization)
+    cholesky!(qn.Jk)                                  # Mₖ = Jₖᵀ Jₖ (factorization)
 
     # Step 3: Nₖ = [U₁ U₂]
     U1 = view(qn.U, :, 1:k)
-    copyto!(U1, qn.Sk)                               # U₁ = Sₖ
-    mul!(U1, qn.Yk, qn.DkLk, one(T), sigma)          # U₁ = σₖ Sₖ + Yₖ Dₖ⁻¹ Lₖ
+    copyto!(U1, qn.Sk)                                # U₁ = Sₖ
+    mul!(U1, qn.Yk, qn.DkLk, one(T), sigma)           # U₁ = σₖ Sₖ + Yₖ Dₖ⁻¹ Lₖ
     BLAS.trsm!('R', 'U', 'N', 'N', one(T), qn.Jk, U1) # U₁ = Jₖ⁻ᵀ (σₖ Sₖ + Yₖ Dₖ⁻¹ Lₖ)
     U2 = view(qn.U, :, 1+k:2*k)
-    δ .= .-one(T) ./ sqrt.(qn.Dk)                    # δ = 1 / √Dₖ
-    U2 .= δ' .* qn.Yk                                # U₂ = (1 / √Dₖ) * Yₖ
+    δ .= .-one(T) ./ sqrt.(qn.Dk)                     # δ = 1 / √Dₖ
+    U2 .= δ' .* qn.Yk                                 # U₂ = (1 / √Dₖ) * Yₖ
     return true
 end
 
-function init!(qn::CompactLBFGS, Bk::AbstractArray, sk::AbstractVector, yk::AbstractVector)
+function init!(qn::CompactLBFGS{T}, Bk::AbstractVector{T}, g0::AbstractVector{T}, f0::T) where T
+    norm_g0 = dot(g0, g0)
+    # Initiate B0 with Gilbert & Lemaréchal rule.
+    rho0 = if norm_g0 < sqrt(eps(T))
+        one(T)
+    elseif f0 ≈ zero(T)
+        one(T) / norm_g0
+    else
+        abs(f0) / norm_g0
+    end
+    Bk .= (T(2) * rho0 * qn.init_value)
     return
 end
 
+
+struct ExactHessian{T, VT} <: AbstractHessian{T, VT} end
+create_quasi_newton(::Type{ExactHessian}, cb::AbstractCallback{T,VT}, n; options...) where {T,VT} = ExactHessian{T, VT}()
diff --git a/src/utils.jl b/src/utils.jl
index 723e97ef..8a553cd4 100644
--- a/src/utils.jl
+++ b/src/utils.jl
@@ -1,9 +1,5 @@
 abstract type AbstractOptions end
 
-# Build info
-default_linear_solver() = UmfpackSolver
-default_dense_solver() = LapackCPUSolver
-
 # MadNLPLogger
 @kwdef mutable struct MadNLPLogger
     print_level::LogLevels = INFO
@@ -84,7 +80,7 @@ function _madnlp_unsafe_wrap(vec::VT, n, shift=1) where VT
 end
 
 # Type definitions for noncontiguous views
-const SubVector{Tv} = SubArray{Tv, 1, Vector{Tv}, Tuple{Vector{Int}}, false}
+const SubVector{Tv,VT, VI} = SubArray{Tv, 1, VT, Tuple{VI}, false}
 
 @kwdef mutable struct MadNLPCounters
     k::Int = 0 # total iteration counter
@@ -97,6 +93,7 @@ const SubVector{Tv} = SubArray{Tv, 1, Vector{Tv}, Tuple{Vector{Int}}, false}
     eval_function_time::Float64 = 0.
     solver_time::Float64 = 0.
     total_time::Float64 = 0.
+    init_time::Float64 = 0.
 
     obj_cnt::Int = 0
     obj_grad_cnt::Int = 0
@@ -104,7 +101,18 @@ const SubVector{Tv} = SubArray{Tv, 1, Vector{Tv}, Tuple{Vector{Int}}, false}
     con_jac_cnt::Int = 0
     lag_hess_cnt::Int = 0
 
+    t1::Float64 = 0.
+    t2::Float64 = 0.
+    t3::Float64 = 0.
+    t4::Float64 = 0.
+    t5::Float64 = 0.
+    t6::Float64 = 0.
+    t7::Float64 = 0.
+    t8::Float64 = 0.
+    
     acceptable_cnt::Int = 0
+    unsuccessful_iterate::Int = 0
+    restoration_fail_count::Int = 0
 end
 
 """
@@ -143,8 +151,8 @@ function timing_linear_solver(ips; ntrials=10)
     t_build, t_factorize, t_backsolve = (0.0, 0.0, 0.0)
     for _ in 1:ntrials
         t_build     += @elapsed build_kkt!(ips.kkt)
-        t_factorize += @elapsed factorize!(ips.linear_solver)
-        t_backsolve += @elapsed solve_refine_wrapper!(ips,ips.d,ips.p)
+        t_factorize += @elapsed factorize!(ips.kkt.linear_solver)
+        t_backsolve += @elapsed solve!(ips.kkt, ips.d)
     end
     return (
         time_build_kkt = t_build / ntrials,
diff --git a/test/MOI_interface_test.jl b/test/MOI_interface_test.jl
index 7119e3a9..750e4fc1 100644
--- a/test/MOI_interface_test.jl
+++ b/test/MOI_interface_test.jl
@@ -3,7 +3,8 @@ module TestMOIWrapper
 using MadNLP
 using Test
 
-const MOI = MadNLP.MathOptInterface
+using MathOptInterface
+const MOI = MathOptInterface
 
 function runtests()
     for name in names(@__MODULE__; all = true)
@@ -35,28 +36,30 @@ function test_MOI_Test()
                 MOI.ObjectiveBound,
             ]
         );
-        exclude = String[
-            "test_modification",
-            "test_attribute_TimeLimitSec",
+        exclude = [
             # TODO: MadNLP does not return the correct multiplier
-            # when a variable is fixed with MOI.EqualTo.
-            "test_linear_integration",
+            # when a variable is fixed with MOI.EqualTo (Issue #229).
+            r"^test_linear_integration$",
             "test_quadratic_constraint_GreaterThan",
             "test_quadratic_constraint_LessThan",
             # MadNLP reaches maximum number of iterations instead
             # of returning infeasibility certificate.
-            "test_linear_DUAL_INFEASIBLE",
+            r"test_linear_DUAL_INFEASIBLE.*",
             "test_solve_TerminationStatus_DUAL_INFEASIBLE",
             # Tests excluded on purpose
             # - Excluded because Hessian information is needed
             "test_nonlinear_hs071_hessian_vector_product",
             # - Excluded because Hessian information is needed
-            "test_nonlinear_hs071_no_hessian",
-            # - Excluded because Hessian information is needed
             "test_nonlinear_invalid",
 
             #  - Excluded because this test is optional
             "test_model_ScalarFunctionConstantNotZero",
+            # Throw an error: "Unable to query the dual of a variable
+            # bound that was reformulated using `ZerosBridge`."
+            "test_linear_VectorAffineFunction_empty_row",
+            "test_conic_linear_VectorOfVariables_2",
+            # TODO: investigate why it is breaking.
+            "test_nonlinear_expression_hs109",
         ]
     )
 
@@ -94,6 +97,20 @@ function test_invalid_number_in_hessian_lagrangian()
     return
 end
 
+# See issue #318
+function test_user_defined_function()
+    model = MadNLP.Optimizer()
+    MOI.set(model, MOI.Silent(), true)
+    # Define custom function.
+    f(a, b) = a^2 + b^2
+    x = MOI.add_variables(model, 2)
+    MOI.set(model, MOI.UserDefinedFunction(:f, 2), (f,))
+    obj_f = MOI.ScalarNonlinearFunction(:f, Any[x[1], x[2]])
+    MOI.set(model, MOI.ObjectiveFunction{typeof(obj_f)}(), obj_f)
+    MOI.optimize!(model)
+    @test MOI.get(model, MOI.TerminationStatus()) == MOI.LOCALLY_SOLVED
+end
+
 end
 
 TestMOIWrapper.runtests()
diff --git a/test/kkt_test.jl b/test/kkt_test.jl
index e4ec1f4f..8502b280 100644
--- a/test/kkt_test.jl
+++ b/test/kkt_test.jl
@@ -1,7 +1,6 @@
 using LinearAlgebra
 
-@testset "$KKTVector" for KKTVector in [
-    MadNLP.ReducedKKTVector,
+@testset "[KKT vector] $KKTVector" for KKTVector in [
     MadNLP.UnreducedKKTVector,
 ]
     T = Float64
@@ -9,7 +8,10 @@ using LinearAlgebra
     n, m = 10, 20
     nlb, nub = 5, 6
 
-    rhs = KKTVector{T, VT}(n, m, nlb, nub)
+    ind_lb = [2,3,4]
+    ind_ub = [4,5,6]
+
+    rhs = KKTVector(VT, n, m, nlb, nub, ind_lb, ind_ub)
     @test length(rhs) == length(MadNLP.full(rhs))
     @test MadNLP.number_primal(rhs) == length(MadNLP.primal(rhs)) == n
     @test MadNLP.number_dual(rhs) == length(MadNLP.dual(rhs)) == m
@@ -17,4 +19,35 @@ using LinearAlgebra
 
     fill!(rhs, one(T))
     @test norm(rhs) == sqrt(length(rhs))
+
+    # Test copy
+    copy_rhs = copy(rhs)
+    @test MadNLP.full(rhs) == MadNLP.full(copy_rhs)
 end
+
+@testset "[KKT system] $(KKTSystem)" for (KKTSystem, Callback) in [
+    (MadNLP.SparseKKTSystem, MadNLP.SparseCallback),
+    (MadNLP.SparseUnreducedKKTSystem, MadNLP.SparseCallback),
+    (MadNLP.SparseCondensedKKTSystem, MadNLP.SparseCallback),
+    (MadNLP.DenseKKTSystem, MadNLP.DenseCallback),
+    (MadNLP.DenseCondensedKKTSystem, MadNLP.DenseCallback),
+]
+    linear_solver = MadNLP.LapackCPUSolver
+    cnt = MadNLP.MadNLPCounters(; start_time=time())
+
+    nlp = MadNLPTests.HS15Model()
+    ind_cons = MadNLP.get_index_constraints(nlp)
+
+    cb = MadNLP.create_callback(
+        Callback, nlp,
+    )
+
+    kkt = MadNLP.create_kkt_system(
+        KKTSystem,
+        cb,
+        ind_cons,
+        linear_solver;
+    )
+    MadNLPTests.test_kkt_system(kkt, cb)
+end
+
diff --git a/test/madnlp_dense.jl b/test/madnlp_dense.jl
index 9fff7df4..495bad6a 100644
--- a/test/madnlp_dense.jl
+++ b/test/madnlp_dense.jl
@@ -8,13 +8,14 @@ using Random
 
 function _compare_dense_with_sparse(
     kkt_system, n, m, ind_fixed, ind_eq;
-    inertia=MadNLP.INERTIA_BASED,
+    inertia=MadNLP.InertiaBased,
 )
 
-    for (T,tol,atol) in [(Float32,1e-3,1e-1), (Float64,1e-8,1e-6)]
+    for (T,tol,atol) in [(Float32,1e-3,1e0), (Float64,1e-8,1e-6)]
 
         sparse_options = Dict{Symbol, Any}(
-            :kkt_system=>MadNLP.SPARSE_KKT_SYSTEM,
+            :kkt_system=>MadNLP.SparseKKTSystem,
+            :callback=>MadNLP.SparseCallback,
             :inertia_correction_method=>inertia,
             :linear_solver=>MadNLP.LapackCPUSolver,
             :print_level=>MadNLP.ERROR,
@@ -22,13 +23,14 @@ function _compare_dense_with_sparse(
         )
         dense_options = Dict{Symbol, Any}(
             :kkt_system=>kkt_system,
+            :callback=>MadNLP.DenseCallback,
             :inertia_correction_method=>inertia,
             :linear_solver=>MadNLP.LapackCPUSolver,
             :print_level=>MadNLP.ERROR,
             :tol=>tol
         )
 
-        nlp = MadNLPTests.DenseDummyQP{T}(; n=n, m=m, fixed_variables=ind_fixed, equality_cons=ind_eq)
+        nlp = MadNLPTests.DenseDummyQP(zeros(T,n), m=m, fixed_variables=ind_fixed, equality_cons=ind_eq)
 
         solver = MadNLPSolver(nlp; sparse_options...)
         solverd = MadNLPSolver(nlp; dense_options...)
@@ -38,7 +40,7 @@ function _compare_dense_with_sparse(
 
         # Check that dense formulation matches exactly sparse formulation
         @test result_dense.status == MadNLP.SOLVE_SUCCEEDED
-        @test result_sparse.iter == result_dense.iter
+        @test result_sparse.counters.k == result_dense.counters.k
         @test result_sparse.objective ≈ result_dense.objective atol=atol
         @test result_sparse.solution ≈ result_dense.solution atol=atol
         @test result_sparse.multipliers ≈ result_dense.multipliers atol=atol
@@ -49,50 +51,49 @@ function _compare_dense_with_sparse(
     end
 end
 
-@testset "MadNLP: API $(kkt_type)" for (kkt_type, kkt_options) in [
-        (MadNLP.DenseKKTSystem, MadNLP.DENSE_KKT_SYSTEM),
-        (MadNLP.DenseCondensedKKTSystem, MadNLP.DENSE_CONDENSED_KKT_SYSTEM),
+@testset "MadNLP: API $(kkt)" for kkt in [
+    MadNLP.DenseKKTSystem,
+    MadNLP.DenseCondensedKKTSystem,
     ]
 
     n = 10 # number of variables
     @testset "Unconstrained" begin
         dense_options = Dict{Symbol, Any}(
-            :kkt_system=>kkt_options,
+            :kkt_system=>kkt,
             :linear_solver=>MadNLP.LapackCPUSolver,
         )
         m = 0
-        nlp = MadNLPTests.DenseDummyQP(; n=n, m=m)
+        nlp = MadNLPTests.DenseDummyQP(zeros(n); m=m)
         solverd = MadNLPSolver(nlp; dense_options...)
 
         kkt = solverd.kkt
-        @test isa(kkt, kkt_type)
         @test isempty(kkt.jac)
-        @test solverd.linear_solver.dense === kkt.aug_com
+        @test solverd.kkt.linear_solver.A === kkt.aug_com 
         @test size(kkt.hess) == (n, n)
         @test length(kkt.pr_diag) == n
         @test length(kkt.du_diag) == m
 
         # Test that using a sparse solver is forbidden in dense mode
         dense_options_error = Dict{Symbol, Any}(
-            :kkt_system=>kkt_options,
+            :kkt_system=>kkt,
             :linear_solver=>MadNLP.UmfpackSolver,
         )
         @test_throws Exception MadNLPSolver(nlp; dense_options_error...)
     end
     @testset "Constrained" begin
         dense_options = Dict{Symbol, Any}(
-            :kkt_system=>MadNLP.DENSE_KKT_SYSTEM,
+            :kkt_system=>MadNLP.DenseKKTSystem,
             :linear_solver=>MadNLP.LapackCPUSolver,
         )
         m = 5
-        nlp = MadNLPTests.DenseDummyQP(; n=n, m=m)
+        nlp = MadNLPTests.DenseDummyQP(zeros(n); m=m)
         solverd = MadNLPSolver(nlp; dense_options...)
         ns = length(solverd.ind_ineq)
 
         kkt = solverd.kkt
         @test isa(kkt, MadNLP.DenseKKTSystem)
         @test size(kkt.jac) == (m, n)
-        @test solverd.linear_solver.dense === kkt.aug_com
+        @test solverd.kkt.linear_solver.A === kkt.aug_com
         @test size(kkt.hess) == (n, n)
         @test length(kkt.pr_diag) == n + ns
         @test length(kkt.du_diag) == m
@@ -100,39 +101,36 @@ end
 end
 
 
-@testset "MadNLP: option kkt_system=$(kkt_system)" for kkt_system in [MadNLP.DENSE_KKT_SYSTEM, MadNLP.DENSE_CONDENSED_KKT_SYSTEM]
+@testset "MadNLP: option kkt_system=$(kkt)" for kkt in [MadNLP.DenseKKTSystem, MadNLP.DenseCondensedKKTSystem]
     @testset "Size: ($n, $m)" for (n, m) in [(10, 0), (10, 5), (50, 10)]
-        _compare_dense_with_sparse(kkt_system, n, m, Int[], Int[])
-        _compare_dense_with_sparse(kkt_system, n, m, Int[], Int[]; inertia=MadNLP.INERTIA_FREE)
+        _compare_dense_with_sparse(kkt, n, m, Int[], Int[])
+        _compare_dense_with_sparse(kkt, n, m, Int[], Int[]; inertia=MadNLP.InertiaFree)
     end
     # Test with non-trivial equality constraints.
     @testset "Equality constraints" begin
         n, m = 20, 15
-        _compare_dense_with_sparse(kkt_system, n, m, Int[], Int[1, 8])
-        _compare_dense_with_sparse(kkt_system, n, m, Int[], Int[1, 8]; inertia=MadNLP.INERTIA_FREE)
+        _compare_dense_with_sparse(kkt, n, m, Int[], Int[1, 8])
+        _compare_dense_with_sparse(kkt, n, m, Int[], Int[1, 8]; inertia=MadNLP.InertiaFree)
     end
     @testset "Fixed variables" begin
         n, m = 10, 5
-        _compare_dense_with_sparse(kkt_system, n, m, Int[1, 2], Int[])
-        _compare_dense_with_sparse(kkt_system, n, m, Int[1, 2], Int[]; inertia=MadNLP.INERTIA_FREE)
+        _compare_dense_with_sparse(kkt, n, m, Int[1, 2], Int[])
+        _compare_dense_with_sparse(kkt, n, m, Int[1, 2], Int[]; inertia=MadNLP.InertiaFree)
     end
 end
 
-@testset "MadNLP: custom KKT constructor" begin
-    T, VT, MT = Float64, Vector{Float64}, Matrix{Float64}
-    QN = MadNLP.ExactHessian{T, VT}
-    nlp = MadNLPTests.DenseDummyQP{T}(; n=10, m=5)
-    KKT = MadNLP.DenseKKTSystem{T, VT, MT, QN}
-    solver = MadNLPSolver{T, KKT}(nlp; linear_solver=LapackCPUSolver)
-    @test isa(solver.kkt, KKT)
-end
+# Now we do not support custom KKT constructor
+# @testset "MadNLP: custom KKT constructor" begin
+#     solver = MadNLPSolver(nlp; kkt_system = MadNLP.DenseKKTSystem, linear_solver=LapackCPUSolver)
+#     @test isa(solver.kkt, KKT)
+# end
 
 @testset "MadNLP: restart (PR #113)" begin
     n, m = 10, 5
-    nlp = MadNLPTests.DenseDummyQP(; n=n, m=m)
+    nlp = MadNLPTests.DenseDummyQP(zeros(n); m=m)
     sparse_options = Dict{Symbol, Any}(
-        :kkt_system=>MadNLP.SPARSE_KKT_SYSTEM,
-        :linear_solver=>MadNLP.LapackCPUSolver,
+        :kkt_system=>MadNLP.SparseKKTSystem,
+        :callback=>MadNLP.SparseCallback,
         :print_level=>MadNLP.ERROR,
     )
 
@@ -144,74 +142,3 @@ end
     @test solver.status == MadNLP.SOLVE_SUCCEEDED
 end
 
-@testset "MadNLP: $QN + $KKT" for QN in [
-    MadNLP.DENSE_BFGS,
-    MadNLP.DENSE_DAMPED_BFGS,
-], KKT in [
-    MadNLP.DENSE_KKT_SYSTEM,
-    MadNLP.DENSE_CONDENSED_KKT_SYSTEM,
-]
-    @testset "Size: ($n, $m)" for (n, m) in [(10, 0), (10, 5), (50, 10)]
-        nlp = MadNLPTests.DenseDummyQP{Float64}(; n=n, m=m)
-        solver_exact = MadNLP.MadNLPSolver(
-            nlp;
-            print_level=MadNLP.ERROR,
-            kkt_system=MadNLP.DENSE_KKT_SYSTEM,
-            linear_solver=LapackCPUSolver,
-        )
-        results_ref = MadNLP.solve!(solver_exact)
-
-        solver_qn = MadNLP.MadNLPSolver(
-            nlp;
-            print_level=MadNLP.ERROR,
-            kkt_system=KKT,
-            hessian_approximation=QN,
-            linear_solver=LapackCPUSolver,
-        )
-        results_qn = MadNLP.solve!(solver_qn)
-
-        @test results_qn.status == MadNLP.SOLVE_SUCCEEDED
-        @test results_qn.objective ≈ results_ref.objective atol=1e-6
-        @test results_qn.solution ≈ results_ref.solution atol=1e-6
-        @test solver_qn.cnt.lag_hess_cnt == 0
-        # TODO: this test is currently breaking the CI, investigate why.
-        # @test solver_exact.y ≈ solver_qn.y atol=1e-4
-    end
-end
-
-@testset "MadNLP: LBFGS" begin
-    @testset "HS15" begin
-        nlp = MadNLPTests.HS15Model()
-        solver_qn = MadNLP.MadNLPSolver(
-            nlp;
-            hessian_approximation=MadNLP.SPARSE_COMPACT_LBFGS,
-            print_level=MadNLP.ERROR,
-        )
-        results_qn = MadNLP.solve!(solver_qn)
-        @test results_qn.status == MadNLP.SOLVE_SUCCEEDED
-    end
-    @testset "Size: ($n, $m)" for (n, m) in [(10, 0), (10, 5), (50, 10)]
-        nlp = MadNLPTests.DenseDummyQP{Float64}(; )
-        # Reference solve with exact Hessian
-        solver_exact = MadNLP.MadNLPSolver(
-            nlp;
-            print_level=MadNLP.ERROR,
-        )
-        results_ref = MadNLP.solve!(solver_exact)
-
-        # LBFGS solve
-        solver_qn = MadNLP.MadNLPSolver(
-            nlp;
-            hessian_approximation=MadNLP.SPARSE_COMPACT_LBFGS,
-            print_level=MadNLP.ERROR,
-        )
-        results_qn = MadNLP.solve!(solver_qn)
-        @test results_qn.status == MadNLP.SOLVE_SUCCEEDED
-        @test results_qn.objective ≈ results_ref.objective atol=1e-6
-        @test results_qn.solution ≈ results_ref.solution atol=1e-6
-        @test solver_qn.cnt.lag_hess_cnt == 0
-        # TODO: this test is currently breaking the CI, investigate why.
-        # @test solver_exact.y ≈ solver_qn.y atol=1e-4
-    end
-end
-
diff --git a/test/madnlp_quasi_newton.jl b/test/madnlp_quasi_newton.jl
new file mode 100644
index 00000000..88cbb001
--- /dev/null
+++ b/test/madnlp_quasi_newton.jl
@@ -0,0 +1,85 @@
+@testset "MadNLP: $QN + $KKT" for QN in [
+    MadNLP.BFGS,
+    MadNLP.DampedBFGS,
+], KKT in [
+    MadNLP.DenseKKTSystem,
+    MadNLP.DenseCondensedKKTSystem,
+]
+    @testset "Size: ($n, $m)" for (n, m) in [(10, 0), (10, 5), (50, 10)]
+        nlp = MadNLPTests.DenseDummyQP(zeros(Float64, n); m=m)
+        solver_exact = MadNLP.MadNLPSolver(
+            nlp;
+            print_level=MadNLP.ERROR,
+            kkt_system=MadNLP.DenseKKTSystem,
+            linear_solver=LapackCPUSolver,
+        )
+        results_ref = MadNLP.solve!(solver_exact)
+
+        solver_qn = MadNLP.MadNLPSolver(
+            nlp;
+            print_level=MadNLP.ERROR,
+            kkt_system=KKT,
+            hessian_approximation=QN,
+            linear_solver=LapackCPUSolver,
+        )
+        results_qn = MadNLP.solve!(solver_qn)
+
+        @test results_qn.status == MadNLP.SOLVE_SUCCEEDED
+        @test results_qn.objective ≈ results_ref.objective atol=1e-6
+        @test results_qn.solution ≈ results_ref.solution atol=1e-6
+        @test solver_qn.cnt.lag_hess_cnt == 0
+        @test solver_exact.y ≈ solver_qn.y atol=1e-4
+    end
+end
+
+@testset "MadNLP: LBFGS" begin
+    @testset "HS15" begin
+        nlp = MadNLPTests.HS15NoHessianModel()
+        solver_qn = MadNLP.MadNLPSolver(
+            nlp;
+            callback = MadNLP.SparseCallback,
+            kkt_system = MadNLP.SparseKKTSystem,
+            hessian_approximation=MadNLP.CompactLBFGS,
+            print_level=MadNLP.ERROR,
+        )
+        results_qn = MadNLP.solve!(solver_qn)
+        @test results_qn.status == MadNLP.SOLVE_SUCCEEDED
+
+    end
+    @testset "Size: ($n, $m)" for (n, m) in [(10, 0), (10, 5), (50, 10)]
+        x0 = zeros(Float64,n)
+        nlp = MadNLPTests.DenseDummyQP(x0; m=m)
+        # Reference solve with exact Hessian
+        solver_exact = MadNLP.MadNLPSolver(
+            nlp;
+            callback = MadNLP.SparseCallback,
+            kkt_system = MadNLP.SparseKKTSystem,
+            print_level=MadNLP.ERROR,
+        )
+        results_ref = MadNLP.solve!(solver_exact)
+
+        # LBFGS solve
+        solver_qn = MadNLP.MadNLPSolver(
+            nlp;
+            callback = MadNLP.SparseCallback,
+            kkt_system = MadNLP.SparseKKTSystem,
+            hessian_approximation=MadNLP.CompactLBFGS,
+            print_level=MadNLP.ERROR,
+        )
+        results_qn = MadNLP.solve!(solver_qn)
+
+        @test results_qn.status == MadNLP.SOLVE_SUCCEEDED
+        @test results_qn.objective ≈ results_ref.objective atol=1e-6
+        @test results_qn.solution ≈ results_ref.solution atol=1e-6
+        @test solver_qn.cnt.lag_hess_cnt == 0
+        @test solver_exact.y ≈ solver_qn.y atol=1e-4
+
+        # Test accuracy of KKT solver with LBFGS
+        b, x, w = solver_qn.p, solver_qn.d, solver_qn._w4
+        fill!(b.values, 1.0)
+        MadNLP.solve_refine_wrapper!(x, solver_qn, b, w)
+        mul!(w, solver_qn.kkt, x)
+        @assert norm(w.values .- b.values, Inf) <= 1e-6
+    end
+end
+
diff --git a/test/madnlp_test.jl b/test/madnlp_test.jl
index 5cc383fe..a9863dbc 100644
--- a/test/madnlp_test.jl
+++ b/test/madnlp_test.jl
@@ -1,75 +1,116 @@
 testset = [
     [
-        "Umfpack",
+        "SparseKKTSystem + Umfpack",
         ()->MadNLP.Optimizer(
             linear_solver=MadNLP.UmfpackSolver,
             print_level=MadNLP.ERROR),
         []
     ],
     [
-        "LapackCPU-BUNCHKAUFMAN",
+        "SparseKKTSystem + InertiaFree",
         ()->MadNLP.Optimizer(
+            inertia_correction_method=MadNLP.InertiaFree,
+            print_level=MadNLP.ERROR),
+        []
+    ],
+    [
+        "SparseKKTSystem + RelaxBound",
+        ()->MadNLP.Optimizer(
+            fixed_variable_treatment=MadNLP.RelaxBound,
+            print_level=MadNLP.ERROR),
+        []
+    ],
+    [
+        "DenseKKTSystem + LapackCPU-BUNCHKAUFMAN",
+        ()->MadNLP.Optimizer(
+            kkt_system=MadNLP.DenseKKTSystem,
             linear_solver=MadNLP.LapackCPUSolver,
             lapack_algorithm=MadNLP.BUNCHKAUFMAN,
             print_level=MadNLP.ERROR),
         []
     ],
     [
-        "LapackCPU-LU",
+        "DenseKKTSystem + LapackCPU-LU",
         ()->MadNLP.Optimizer(
+            kkt_system=MadNLP.DenseKKTSystem,
             linear_solver=MadNLP.LapackCPUSolver,
             lapack_algorithm=MadNLP.LU,
             print_level=MadNLP.ERROR),
         []
     ],
     [
-        "LapackCPU-QR",
+        "DenseKKTSystem + LapackCPU-QR",
         ()->MadNLP.Optimizer(
+            kkt_system=MadNLP.DenseKKTSystem,
             linear_solver=MadNLP.LapackCPUSolver,
             lapack_algorithm=MadNLP.QR,
             print_level=MadNLP.ERROR),
         []
     ],
     [
-        "LapackCPU-CHOLESKY",
+        "DenseKKTSystem + LapackCPU-CHOLESKY",
         ()->MadNLP.Optimizer(
+            kkt_system=MadNLP.DenseKKTSystem,
             linear_solver=MadNLP.LapackCPUSolver,
             lapack_algorithm=MadNLP.CHOLESKY,
             print_level=MadNLP.ERROR),
-        ["infeasible", "lootsma", "eigmina"]
+        ["infeasible", "lootsma", "eigmina", "lp_examodels_issue75"]
     ],
     [
-        "Option: RELAX_BOUND",
+        "SparseUnreducedKKTSystem",
         ()->MadNLP.Optimizer(
-            fixed_variable_treatment=MadNLP.RELAX_BOUND,
+            kkt_system=MadNLP.SparseUnreducedKKTSystem,
             print_level=MadNLP.ERROR),
-        [],
-        true
+        []
     ],
     [
-        "Option: AUGMENTED KKT SYSTEM",
+        "SparseUnreducedKKTSystem + InertiaFree",
         ()->MadNLP.Optimizer(
-            kkt_system=MadNLP.SPARSE_UNREDUCED_KKT_SYSTEM,
+            inertia_correction_method=MadNLP.InertiaFree,
+            kkt_system=MadNLP.SparseUnreducedKKTSystem,
             print_level=MadNLP.ERROR),
-        ["infeasible","eigmina"] # numerical errors
+        []
     ],
     [
-        "Option: INERTIA_FREE & AUGMENTED KKT SYSTEM",
+        "SparseCondensedKKTSystem + CHOLMOD-CHOLESKY",
         ()->MadNLP.Optimizer(
-            inertia_correction_method=MadNLP.INERTIA_FREE,
-            kkt_system=MadNLP.SPARSE_UNREDUCED_KKT_SYSTEM,
+            kkt_system=MadNLP.SparseCondensedKKTSystem,
+            equality_treatment = MadNLP.RelaxEquality,
+            fixed_variable_treatment = MadNLP.RelaxBound,
+            linear_solver=MadNLP.CHOLMODSolver,
             print_level=MadNLP.ERROR),
-        ["infeasible","eigmina"] # numerical errors
+        []
     ],
     [
-        "Option: INERTIA_FREE",
+        "SparseCondensedKKTSystem + InertiaFree",
         ()->MadNLP.Optimizer(
-            inertia_correction_method=MadNLP.INERTIA_FREE,
+            inertia_correction_method=MadNLP.InertiaFree,
+            kkt_system=MadNLP.SparseCondensedKKTSystem,
+            equality_treatment = MadNLP.RelaxEquality,
+            fixed_variable_treatment = MadNLP.RelaxBound,
             print_level=MadNLP.ERROR),
         []
     ],
 ]
 
+# N.B. Current CHOLMOD interface is supported only starting from Julia v1.10.
+if VERSION >= v"1.10"
+    push!(
+        testset,
+        [
+            "SparseCondensedKKTSystem + CHOLMOD-LDL",
+            ()->MadNLP.Optimizer(
+                kkt_system=MadNLP.SparseCondensedKKTSystem,
+                equality_treatment = MadNLP.RelaxEquality,
+                fixed_variable_treatment = MadNLP.RelaxBound,
+                linear_solver=MadNLP.CHOLMODSolver,
+                cholmod_algorithm=MadNLP.LDL,
+                print_level=MadNLP.ERROR),
+            []
+        ]
+    )
+end
+
 
 for (name,optimizer_constructor,exclude) in testset
     test_madnlp(name,optimizer_constructor,exclude)
@@ -121,6 +162,7 @@ end
 @testset "MadNLP callback allocations" begin
     nlp = MadNLPTests.HS15Model()
     solver = MadNLPSolver(nlp)
+    MadNLP.initialize!(solver)
     kkt = solver.kkt
     x, f, c = solver.x, solver.f, solver.c
     # Precompile
@@ -145,6 +187,7 @@ end
 @testset "MadNLP timings" begin
     nlp = MadNLPTests.HS15Model()
     solver = MadNLPSolver(nlp)
+    MadNLP.initialize!(solver)
     time_callbacks = MadNLP.timing_callbacks(solver)
     @test isa(time_callbacks, NamedTuple)
     time_linear_solver = MadNLP.timing_linear_solver(solver)
@@ -154,3 +197,14 @@ end
     @test isa(time_madnlp.time_callbacks, NamedTuple)
 end
 
+@testset "Quadmath test" begin
+    nlp = MadNLPTests.HS15Model(T = Float128)
+    result = madnlp(
+        nlp;
+        print_level = MadNLP.ERROR,
+        callback = MadNLP.SparseCallback,
+        linear_solver=LDLSolver,
+        kkt_system = MadNLP.SparseCondensedKKTSystem
+    )
+    @test result.status == MadNLP.SOLVE_SUCCEEDED
+end
diff --git a/test/matrix_test.jl b/test/matrix_test.jl
index 75f1568b..b2badbd2 100644
--- a/test/matrix_test.jl
+++ b/test/matrix_test.jl
@@ -30,6 +30,10 @@ end
 end
 
 
+MadNLPTests.test_linear_solver(LDLSolver,Float32)
+MadNLPTests.test_linear_solver(LDLSolver,Float64)
+MadNLPTests.test_linear_solver(LDLSolver,Float128)
 MadNLPTests.test_linear_solver(UmfpackSolver,Float64)
+MadNLPTests.test_linear_solver(CHOLMODSolver,Float64)
 MadNLPTests.test_linear_solver(LapackCPUSolver,Float32)
 MadNLPTests.test_linear_solver(LapackCPUSolver,Float64)
diff --git a/test/minlp_test.jl b/test/minlp_test.jl
index 5e7014a1..787ac844 100644
--- a/test/minlp_test.jl
+++ b/test/minlp_test.jl
@@ -1,4 +1,5 @@
 const OPTIMIZER = ()->MadNLP.Optimizer(
+    kkt_system=MadNLP.DenseKKTSystem,
     linear_solver=MadNLP.LapackCPUSolver,
     print_level=MadNLP.ERROR
 )
@@ -13,6 +14,7 @@ const OPTIMIZER = ()->MadNLP.Optimizer(
         exclude = [
             "005_011",  # Uses the function `\`
             "006_010",  # User-defined function without Hessian (autodiff only provides 1st order)
+            "009_010",  # Objective is non-smooth
         ],
         objective_tol = 1e-5,
         primal_tol = 1e-5,
diff --git a/test/runtests.jl b/test/runtests.jl
index cd365282..5da670ed 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -1,5 +1,6 @@
 using Test, MadNLP, MadNLPTests, MINLPTests
 using NLPModels
+using Quadmath
 import MathOptInterface
 import SparseArrays: sparse
 
@@ -19,6 +20,7 @@ import SparseArrays: sparse
     @testset "MadNLP test" begin
         include("madnlp_test.jl")
         include("madnlp_dense.jl")
+        include("madnlp_quasi_newton.jl")
     end
 
     @testset "MINLP test" begin