From 0b05c8cec17b44e6c62ae288628ba5eb98380a90 Mon Sep 17 00:00:00 2001 From: Ronny Bergmann Date: Fri, 2 Aug 2024 18:20:43 +0200 Subject: [PATCH] The Riemannian Interior Point Newton Method (#399) * Implemented conjugate residual method as abstract solver * implement the interior point method * introduce several KKT vector field related functions * Introduce a ClosedFormSolverState for consistency --------- Co-authored-by: mstokkenes <77609740+mstokkenes@users.noreply.github.com> Co-authored-by: Mateusz Baran --- .zenodo.json | 5 + Changelog.md | 18 + Project.toml | 2 +- docs/make.jl | 2 + docs/src/about.md | 1 + docs/src/extensions.md | 8 + docs/src/plans/objective.md | 19 +- docs/src/plans/stepsize.md | 7 + docs/src/references.bib | 22 + docs/src/solvers/conjugate_residual.md | 34 + docs/src/solvers/index.md | 21 +- docs/src/solvers/interior_point_Newton.md | 48 + .../config/vocabularies/Manopt/accept.txt | 2 + ext/ManoptManifoldsExt/ManoptManifoldsExt.jl | 6 +- src/Manopt.jl | 55 +- src/plans/augmented_lagrangian_plan.jl | 29 +- src/plans/conjugate_residual_plan.jl | 332 ++++++ src/plans/constrained_plan.jl | 305 +++++- src/plans/debug.jl | 85 +- src/plans/interior_point_Newton_plan.jl | 984 ++++++++++++++++++ src/plans/plan.jl | 2 + src/plans/solver_state.jl | 25 + src/plans/stepsize.jl | 132 ++- src/solvers/FrankWolfe.jl | 30 +- .../adaptive_regularization_with_cubics.jl | 21 +- src/solvers/augmented_Lagrangian_method.jl | 40 +- src/solvers/cma_es.jl | 6 +- src/solvers/conjugate_residual.jl | 123 +++ src/solvers/convex_bundle_method.jl | 26 +- .../difference-of-convex-proximal-point.jl | 27 +- src/solvers/difference_of_convex_algorithm.jl | 37 +- src/solvers/exact_penalty_method.jl | 50 +- src/solvers/interior_point_Newton.jl | 383 +++++++ src/solvers/proximal_bundle_method.jl | 23 +- src/solvers/trust_regions.jl | 43 +- test/helpers/test_manifold_extra_functions.jl | 5 +- test/plans/test_conjugate_residual_plan.jl | 84 ++ test/plans/test_constrained_plan.jl | 212 +++- test/plans/test_interior_point_newton_plan.jl | 95 ++ test/plans/test_state.jl | 4 + test/plans/test_stepsize.jl | 7 + test/runtests.jl | 6 +- ...est_adaptive_regularization_with_cubics.jl | 10 +- test/solvers/test_conjugate_residual.jl | 21 + test/solvers/test_difference_of_convex.jl | 2 +- test/solvers/test_interior_point_Newton.jl | 123 +++ .../test_primal_dual_semismooth_Newton.jl | 3 +- 47 files changed, 3271 insertions(+), 254 deletions(-) create mode 100644 docs/src/solvers/conjugate_residual.md create mode 100644 docs/src/solvers/interior_point_Newton.md create mode 100644 src/plans/conjugate_residual_plan.jl create mode 100644 src/plans/interior_point_Newton_plan.jl create mode 100644 src/solvers/conjugate_residual.jl create mode 100644 src/solvers/interior_point_Newton.jl create mode 100644 test/plans/test_conjugate_residual_plan.jl create mode 100644 test/plans/test_interior_point_newton_plan.jl create mode 100644 test/solvers/test_conjugate_residual.jl create mode 100644 test/solvers/test_interior_point_Newton.jl diff --git a/.zenodo.json b/.zenodo.json index 73ab025f17..d4c2bec6ee 100644 --- a/.zenodo.json +++ b/.zenodo.json @@ -53,6 +53,11 @@ "name": "Kjemsås, Even Stephansen", "type": "ProjectMember" }, + { + "affiliation": "NTNU Trondheim", + "name": "Stokkenes, Markus A.", + "type": "ProjectMember" + }, { "name": "Daniel VandenHeuvel", "type": "other", diff --git a/Changelog.md b/Changelog.md index d433600717..5b07d13aa8 100644 --- a/Changelog.md +++ b/Changelog.md @@ -5,6 +5,24 @@ All notable Changes to the Julia package `Manopt.jl` will be documented in this The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [0.4.68] – August 2, 2024 + +### Added + +* an Interior Point Newton Method, the `interior_point_newton` +* a `conjugate_residual` Algorithm to solve a linear system on a tangent space. +* `ArmijoLinesearch` now allows for additional `additional_decrease_condition` and `additional_increase_condition` keywords to add further conditions to accept additional conditions when to accept an decreasing or increase of the stepsize. +* add a `DebugFeasibility` to have a debug print about feasibility of points in constrained optimisation employing the new `is_feasible` function +* add a `InteriorPointCentralityCondition` check that can be added for step candidates within the line search of `interior_point_newton` +* Add Several new functors + * the `LagrangianCost`, `LagrangianGradient`, `LagrangianHessian`, that based on a constrained objective allow to construct the hessian objective of its Lagrangian + * the `CondensedKKTVectorField` and its `CondensedKKTVectorFieldJacobian`, that are being used to solve a linear system within `interior_point_newton` + * the `KKTVectorField` as well as its `KKTVectorFieldJacobian` and ``KKTVectorFieldAdjointJacobian` + * the `KKTVectorFieldNormSq` and its `KKTVectorFieldNormSqGradient` used within the Armijo line search of `interior_point_newton` +* New stopping criteria + * A `StopWhenRelativeResidualLess` for the `conjugate_residual` + * A `StopWhenKKTResidualLess` for the `interior_point_newton` + ## [0.4.67] – July 25, 2024 ### Added diff --git a/Project.toml b/Project.toml index 9290f5990b..9d694d4479 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "Manopt" uuid = "0fc0a36d-df90-57f3-8f93-d78a9fc72bb5" authors = ["Ronny Bergmann "] -version = "0.4.67" +version = "0.4.68" [deps] ColorSchemes = "35d6a980-a343-548e-a6ea-1d62b119f2f4" diff --git a/docs/make.jl b/docs/make.jl index e9afa93ec9..55da2c0db7 100755 --- a/docs/make.jl +++ b/docs/make.jl @@ -168,6 +168,7 @@ makedocs(; "Chambolle-Pock" => "solvers/ChambollePock.md", "CMA-ES" => "solvers/cma_es.md", "Conjugate gradient descent" => "solvers/conjugate_gradient_descent.md", + "Conjugate Residual" => "solvers/conjugate_residual.md", "Convex bundle method" => "solvers/convex_bundle_method.md", "Cyclic Proximal Point" => "solvers/cyclic_proximal_point.md", "Difference of Convex" => "solvers/difference_of_convex.md", @@ -175,6 +176,7 @@ makedocs(; "Exact Penalty Method" => "solvers/exact_penalty_method.md", "Frank-Wolfe" => "solvers/FrankWolfe.md", "Gradient Descent" => "solvers/gradient_descent.md", + "Interior Point Newton" => "solvers/interior_point_Newton.md", "Levenberg–Marquardt" => "solvers/LevenbergMarquardt.md", "Nelder–Mead" => "solvers/NelderMead.md", "Particle Swarm Optimization" => "solvers/particle_swarm.md", diff --git a/docs/src/about.md b/docs/src/about.md index 727ba7e887..a1a3c1eccb 100644 --- a/docs/src/about.md +++ b/docs/src/about.md @@ -11,6 +11,7 @@ The following people contributed * Even Stephansen Kjemsås contributed to the implementation of the [Frank Wolfe Method](solvers/FrankWolfe.md) solver * Mathias Ravn Munkvold contributed most of the implementation of the [Adaptive Regularization with Cubics](solvers/adaptive-regularization-with-cubics.md) solver * [Tom-Christian Riemer](https://www.tu-chemnitz.de/mathematik/wire/mitarbeiter.php) implemented the [trust regions](solvers/trust_regions.md) and [quasi Newton](solvers/quasi_Newton.md) solvers. +* [Markus A. Stokkenes](https://www.linkedin.com/in/markus-a-stokkenes-b41bba17b/) contributed most of the implementation of the [Interior Point Newton Method](solvers/interior_point_Newton.md) * [Manuel Weiss](https://scoop.iwr.uni-heidelberg.de/author/manuel-weiß/) implemented most of the [conjugate gradient update rules](@ref cg-coeffs) as well as various [contributors](https://github.com/JuliaManifolds/Manopt.jl/graphs/contributors) providing small extensions, finding small bugs and mistakes and fixing them by opening [PR](https://github.com/JuliaManifolds/Manopt.jl/pulls)s. diff --git a/docs/src/extensions.md b/docs/src/extensions.md index ce9b5ee0aa..bd1f0957f1 100644 --- a/docs/src/extensions.md +++ b/docs/src/extensions.md @@ -62,6 +62,14 @@ Manopt.max_stepsize(::TangentBundle, ::Any) mid_point ``` +Internally, `Manopt.jl` provides the two additional functions to choose some +Euclidean space when needed as + +```@docs +Manopt.Rn +Manopt.Rn_default +``` + ## JuMP.jl Manopt can be used using the [JuMP.jl](https://github.com/jump-dev/JuMP.jl) interface. diff --git a/docs/src/plans/objective.md b/docs/src/plans/objective.md index 69c5695a78..48d10b2529 100644 --- a/docs/src/plans/objective.md +++ b/docs/src/plans/objective.md @@ -211,6 +211,16 @@ It might be beneficial to use the adapted problem to specify different ranges fo ConstrainedManoptProblem ``` +as well as the helper functions + +```@docs +AbstractConstrainedFunctor +AbstractConstrainedSlackFunctor +LagrangianCost +LagrangianGradient +LagrangianHessian +``` + #### Access functions ```@docs @@ -223,9 +233,16 @@ get_grad_equality_constraint get_grad_inequality_constraint get_hess_equality_constraint get_hess_inequality_constraint +is_feasible +``` + +#### Internal functions + +```@docs +Manopt.get_feasibility_status ``` -### A vectorial cost function +### Vectorial objectives ```@docs Manopt.AbstractVectorFunction diff --git a/docs/src/plans/stepsize.md b/docs/src/plans/stepsize.md index 8094887db8..27ee9ebc35 100644 --- a/docs/src/plans/stepsize.md +++ b/docs/src/plans/stepsize.md @@ -33,6 +33,13 @@ Tangent bundle with the Sasaki metric has 0 injectivity radius, so the maximum s `Hyperrectangle` also has 0 injectivity radius and an estimate based on maximum of dimensions along each index is used instead. For manifolds with corners, however, a line search capable of handling break points along the projected search direction should be used, and such algorithms do not call `max_stepsize`. +Some solvers have a different iterate from the one used for linesearch. Then the following state can be used to wrap +these locally + +```@docs +StepsizeState +``` + ## Literature ```@bibliography diff --git a/docs/src/references.bib b/docs/src/references.bib index a06ec4661d..c7294640ea 100644 --- a/docs/src/references.bib +++ b/docs/src/references.bib @@ -341,6 +341,16 @@ @article{DuranMoelleSbertCremers:2016 % --- E % % +@article{El-BakryTapiaTsuchiyaZhang:1996, + AUTHOR = {El-Bakry, A. S. and Tapia, R. A. and Tsuchiya, T. and Zhang, Y.}, + DOI = {10.1007/bf02275347}, + JOURNAL = {Journal of Optimization Theory and Applications}, + NUMBER = {3}, + PAGES = {507–541}, + TITLE = {On the formulation and theory of the Newton interior-point method for nonlinear programming}, + VOLUME = {89}, + YEAR = {1996} +} % --- F % @@ -532,6 +542,18 @@ @article{Karcher:1977 % --- L % % +@article{LaiYoshise:2024, + AUTHOR = {Lai, Zhijian and Yoshise, Akiko}, + DOI = {10.1007/s10957-024-02403-8}, + EPRINT = {2203.09762}, + EPRINTTYPE = {arXiv}, + JOURNAL = {Journal of Optimization Theory and Applications}, + NUMBER = {1}, + PAGES = {433–469}, + TITLE = {Riemannian Interior Point Methods for Constrained Optimization on Manifolds}, + VOLUME = {201}, + YEAR = {2024} +} @article{LausNikolovaPerschSteidl:2017, AUTHOR = {Laus, F. and Nikolova, M. and Persch, J. and Steidl, G.}, YEAR = {2017}, diff --git a/docs/src/solvers/conjugate_residual.md b/docs/src/solvers/conjugate_residual.md new file mode 100644 index 0000000000..d20efad989 --- /dev/null +++ b/docs/src/solvers/conjugate_residual.md @@ -0,0 +1,34 @@ +# Conjugate Residual Solver in a Tangent space + +```@meta +CurrentModule = Manopt +``` + +```@docs +conjugate_residual +``` + +## State + +```@docs +ConjugateResidualState +``` + +## Objetive + +```@docs +SymmetricLinearSystemObjective +``` + +## Additional stopping criterion + +```@docs +StopWhenRelativeResidualLess +``` + +## Literature + +```@bibliography +Pages = ["conjugate_residual.md"] +Canonical=false +``` \ No newline at end of file diff --git a/docs/src/solvers/index.md b/docs/src/solvers/index.md index d427062625..f086f39f6f 100644 --- a/docs/src/solvers/index.md +++ b/docs/src/solvers/index.md @@ -88,9 +88,16 @@ For these you can use * The [Augmented Lagrangian Method](augmented_Lagrangian_method.md) (ALM), where both `g` and `grad_g` as well as `h` and `grad_h` are keyword arguments, and one of these pairs is mandatory. * The [Exact Penalty Method](exact_penalty_method.md) (EPM) uses a penalty term instead of augmentation, but has the same interface as ALM. +* The [Interior Point Newton Method](interior_point_Newton.md) (IPM) rephrases the KKT system of a constrained problem into an Newton iteration being performed in every iteration. * [Frank-Wolfe algorithm](FrankWolfe.md), where besides the gradient of ``f`` either a closed form solution or a (maybe even automatically generated) sub problem solver for ``\operatorname*{arg\,min}_{q ∈ C} ⟨\operatorname{grad} f(p_k), \log_{p_k}q⟩`` is required, where ``p_k`` is a fixed point on the manifold (changed in every iteration). -# Alphabetical list List of algorithms +## On the tangent space + +* [Conjugate Residual](conjugate_residual.md) a solver for a linear system ``\mathcal A[X] + b = 0`` on a tangent space. +* [Steihaug-Toint Truncated Conjugate-Gradient Method](truncated_conjugate_gradient_descent.md) a solver for a constrained problem defined on a tangent space. + + +## Alphabetical list List of algorithms | Solver | Function | State | |:---------|:----------------|:---------| @@ -98,6 +105,7 @@ For these you can use | [Augmented Lagrangian Method](augmented_Lagrangian_method.md) | [`augmented_Lagrangian_method`](@ref) | [`AugmentedLagrangianMethodState`](@ref) | | [Chambolle-Pock](ChambollePock.md) | [`ChambollePock`](@ref) | [`ChambollePockState`](@ref) | | [Conjugate Gradient Descent](conjugate_gradient_descent.md) | [`conjugate_gradient_descent`](@ref) | [`ConjugateGradientDescentState`](@ref) | +| [Conjugate Residual](conjugate_residual.md) | [`conjugate_residual`](@ref) | [`ConjugateResidualState`](@ref) | | [Convex Bundle Method](convex_bundle_method.md) | [`convex_bundle_method`](@ref) | [`ConvexBundleMethodState`](@ref) | | [Cyclic Proximal Point](cyclic_proximal_point.md) | [`cyclic_proximal_point`](@ref) | [`CyclicProximalPointState`](@ref) | | [Difference of Convex Algorithm](@ref solver-difference-of-convex) | [`difference_of_convex_algorithm`](@ref) | [`DifferenceOfConvexState`](@ref) | @@ -106,6 +114,7 @@ For these you can use | [Exact Penalty Method](exact_penalty_method.md) | [`exact_penalty_method`](@ref) | [`ExactPenaltyMethodState`](@ref) | | [Frank-Wolfe algorithm](FrankWolfe.md) | [`Frank_Wolfe_method`](@ref) | [`FrankWolfeState`](@ref) | | [Gradient Descent](gradient_descent.md) | [`gradient_descent`](@ref) | [`GradientDescentState`](@ref) | +| [Interior Point Newton](interior_point_Newton.md) | [`interior_point_Newton`](@ref) | | | [Levenberg-Marquardt](LevenbergMarquardt.md) | [`LevenbergMarquardt`](@ref) | [`LevenbergMarquardtState`](@ref) | ``f = \sum_i f_i`` ``\operatorname{grad} f_i`` (Jacobian)| | [Nelder-Mead](NelderMead.md) | [`NelderMead`](@ref) | [`NelderMeadState`](@ref) | | [Particle Swarm](particle_swarm.md) | [`particle_swarm`](@ref) | [`ParticleSwarmState`](@ref) | @@ -189,4 +198,12 @@ also use the third (lowest level) and just call ``` solve!(problem, state) -``` \ No newline at end of file +``` + +### Closed-form subsolvers + +If a subsolver solution is available in closed form, `ClosedFormSubSolverState` is used to indicate that. + +```@docs +Manopt.ClosedFormSubSolverState +``` diff --git a/docs/src/solvers/interior_point_Newton.md b/docs/src/solvers/interior_point_Newton.md new file mode 100644 index 0000000000..02bc3520ce --- /dev/null +++ b/docs/src/solvers/interior_point_Newton.md @@ -0,0 +1,48 @@ +# Interior Point Newton method + +```@meta +CurrentModule = Manopt +``` + +```@docs +interior_point_Newton +interior_point_Newton! +``` + +## State + +```@docs +InteriorPointNewtonState +``` + +## Subproblem functions + +```@docs +CondensedKKTVectorField +CondensedKKTVectorFieldJacobian +KKTVectorField +KKTVectorFieldJacobian +KKTVectorFieldAdjointJacobian +KKTVectorFieldNormSq +KKTVectorFieldNormSqGradient +``` + +## Helpers + +```@docs +InteriorPointCentralityCondition +Manopt.calculate_σ +``` + +## Additional stopping criteria + +```@docs +StopWhenKKTResidualLess +``` + +## References + +```@bibliography +Pages = ["interior_point_Newton.md"] +Canonical=false +``` diff --git a/docs/styles/config/vocabularies/Manopt/accept.txt b/docs/styles/config/vocabularies/Manopt/accept.txt index 109e1977f4..66fdfd3115 100644 --- a/docs/styles/config/vocabularies/Manopt/accept.txt +++ b/docs/styles/config/vocabularies/Manopt/accept.txt @@ -49,6 +49,7 @@ Jasa Jax JuMP.jl kwargs +Lai Levenberg Lagrangian Lanczos @@ -112,5 +113,6 @@ tridiagonal Weinmann Willem Wolfe +Yoshise Yuan Zhang \ No newline at end of file diff --git a/ext/ManoptManifoldsExt/ManoptManifoldsExt.jl b/ext/ManoptManifoldsExt/ManoptManifoldsExt.jl index d55b9b1395..679b7d6a7b 100644 --- a/ext/ManoptManifoldsExt/ManoptManifoldsExt.jl +++ b/ext/ManoptManifoldsExt/ManoptManifoldsExt.jl @@ -10,7 +10,9 @@ import Manopt: get_gradient!, set_manopt_parameter!, reflect, - reflect! + reflect!, + Rn, + Rn_default using LinearAlgebra: cholesky, det, diag, dot, Hermitian, qr, Symmetric, triu, I, Diagonal import ManifoldsBase: copy, mid_point, mid_point! @@ -24,6 +26,8 @@ else using ..Manifolds end +Rn(::Val{:Manifolds}, args...; kwargs...) = Euclidean(args...; kwargs...) + const NONMUTATINGMANIFOLDS = Union{Circle,PositiveNumbers,Euclidean{Tuple{}}} include("manifold_functions.jl") include("ChambollePockManifolds.jl") diff --git a/src/Manopt.jl b/src/Manopt.jl index 722e0cf818..b0c7043a62 100644 --- a/src/Manopt.jl +++ b/src/Manopt.jl @@ -11,7 +11,7 @@ module Manopt import Base: &, copy, getindex, identity, length, setindex!, show, | import LinearAlgebra: reflect! import ManifoldsBase: embed!, plot_slope, prepare_check_result, find_best_slope_window - +import ManifoldsBase: base_manifold, base_point using ColorSchemes using ColorTypes using Colors @@ -149,6 +149,30 @@ using Requires using SparseArrays using Statistics +""" + Rn(args; kwargs...) + Rn(s::Symbol=:Manifolds, args; kwargs...) + +A small internal helper function to choose a Euclidean space. +By default, this uses the [`DefaultManifold`](@extref ManifoldsBase.DefaultManifold) unless you load +a more advanced Euclidean space like [`Euclidean`](@extref Manifolds.Euclidean) +from [`Manifolds.jl`](@extref Manifolds.Manifolds) +""" +Rn(args...; kwargs...) = Rn(Val(Rn_default()), args...; kwargs...) + +@doc """ + Rn_default() + +Specify a default value to dispatch [`Rn`](@ref) on. +This default is set to `Manifolds`, indicating, that when this package is loded, +it is the preferred package to ask for a vector space space. + +The default within `Manopt.jl` is to use the [`DefaultManifold`](@extref ManifoldsBase.DefaultManifold) from `ManifoldsBase.jl`. +If you load `Manifolds.jl` this switches to using [`Euclidan`](@extref Manifolds.Euclidean). +""" +Rn_default() = :Manifolds +Rn(::Val{T}, args...; kwargs...) where {T} = DefaultManifold(args...; kwargs...) + include("plans/plan.jl") # solvers general framework include("solvers/solver.jl") @@ -160,6 +184,7 @@ include("solvers/convex_bundle_method.jl") include("solvers/ChambollePock.jl") include("solvers/cma_es.jl") include("solvers/conjugate_gradient_descent.jl") +include("solvers/conjugate_residual.jl") include("solvers/cyclic_proximal_point.jl") include("solvers/difference_of_convex_algorithm.jl") include("solvers/difference-of-convex-proximal-point.jl") @@ -169,6 +194,7 @@ include("solvers/Lanczos.jl") include("solvers/NelderMead.jl") include("solvers/FrankWolfe.jl") include("solvers/gradient_descent.jl") +include("solvers/interior_point_Newton.jl") include("solvers/LevenbergMarquardt.jl") include("solvers/particle_swarm.jl") include("solvers/primal_dual_semismooth_Newton.jl") @@ -282,6 +308,7 @@ export AbstractDecoratedManifoldObjective, AbstractManifoldGradientObjective, AbstractManifoldCostObjective, AbstractManifoldObjective, + AbstractManifoldSubObjective, AbstractPrimalDualManifoldObjective, ConstrainedManifoldObjective, EmbeddedManifoldObjective, @@ -322,6 +349,7 @@ export AbstractGradientSolverState, ConvexBundleMethodState, ChambollePockState, ConjugateGradientDescentState, + ConjugateResidualState, CyclicProximalPointState, DifferenceOfConvexState, DifferenceOfConvexProximalState, @@ -329,6 +357,7 @@ export AbstractGradientSolverState, ExactPenaltyMethodState, FrankWolfeState, GradientDescentState, + InteriorPointNewtonState, LanczosState, LevenbergMarquardtState, NelderMeadState, @@ -336,6 +365,7 @@ export AbstractGradientSolverState, PrimalDualSemismoothNewtonState, ProximalBundleMethodState, RecordSolverState, + StepsizeState, StochasticGradientDescentState, SubGradientMethodState, TruncatedConjugateGradientState, @@ -382,7 +412,6 @@ export ApproxHessianFiniteDifference export is_state_decorator, dispatch_state_decorator export primal_residual, dual_residual export equality_constraints_length, - inequality_constraints_length, get_constraints, get_inequality_constraint, get_equality_constraint, @@ -393,12 +422,19 @@ export equality_constraints_length, get_hess_inequality_constraint, get_hess_inequality_constraint!, get_hess_equality_constraint, - get_hess_equality_constraint! + get_hess_equality_constraint!, + inequality_constraints_length, + is_feasible # Subproblem cost/grad export AugmentedLagrangianCost, AugmentedLagrangianGrad, ExactPenaltyCost, ExactPenaltyGrad +export KKTVectorField, KKTVectorFieldJacobian, KKTVectorFieldAdjointJacobian +export KKTVectorFieldNormSq, KKTVectorFieldNormSqGradient +export LagrangianCost, LagrangianGradient, LagrangianHessian export ProximalDCCost, ProximalDCGrad, LinearizedDCCost, LinearizedDCGrad export FrankWolfeCost, FrankWolfeGradient export TrustRegionModelObjective +export CondensedKKTVectorField, CondensedKKTVectorFieldJacobian +export SymmetricLinearSystemObjective export QuasiNewtonState, QuasiNewtonLimitedMemoryDirectionUpdate export QuasiNewtonMatrixDirectionUpdate @@ -440,6 +476,8 @@ export adaptive_regularization_with_cubics, cma_es!, conjugate_gradient_descent, conjugate_gradient_descent!, + conjugate_residual, + conjugate_residual!, cyclic_proximal_point, cyclic_proximal_point!, difference_of_convex_algorithm, @@ -454,6 +492,8 @@ export adaptive_regularization_with_cubics, Frank_Wolfe_method!, gradient_descent, gradient_descent!, + interior_point_Newton, + interior_point_Newton!, LevenbergMarquardt, LevenbergMarquardt!, NelderMead, @@ -473,6 +513,7 @@ export adaptive_regularization_with_cubics, truncated_conjugate_gradient_descent!, trust_regions, trust_regions! +# # Solver helpers export decorate_state!, decorate_objective! export initialize_solver!, step_solver!, get_solver_result, stop_solver! @@ -489,6 +530,7 @@ export Stepsize export AdaptiveWNGradient, ConstantStepsize, DecreasingStepsize, PolyakStepsize export ArmijoLinesearch, Linesearch, NonmonotoneLinesearch export get_stepsize, get_initial_stepsize, get_last_stepsize +export InteriorPointCentralityCondition # # Stopping Criteria export StoppingCriterion, StoppingCriterionSet @@ -510,12 +552,14 @@ export StopAfter, StopWhenGradientNormLess, StopWhenFirstOrderProgress, StopWhenIterateNaN, + StopWhenKKTResidualLess, StopWhenLagrangeMultiplierLess, StopWhenModelIncreased, StopWhenPopulationCostConcentrated, StopWhenPopulationConcentrated, StopWhenPopulationDiverges, StopWhenPopulationStronglyConcentrated, + StopWhenRelativeResidualLess, StopWhenSmallerOrEqual, StopWhenStepsizeLess, StopWhenSubgradientNormLess, @@ -531,8 +575,9 @@ export render_asymptote # # Debugs export DebugSolverState, DebugAction, DebugGroup, DebugEntry, DebugEntryChange, DebugEvery -export DebugChange, - DebugGradientChange, DebugIterate, DebugIteration, DebugDivider, DebugTime +export DebugChange, DebugGradientChange +export DebugIterate, DebugIteration, DebugDivider, DebugTime +export DebugFeasibility export DebugCost, DebugStoppingCriterion export DebugGradient, DebugGradientNorm, DebugStepsize export DebugPrimalBaseChange, DebugPrimalBaseIterate, DebugPrimalChange, DebugPrimalIterate diff --git a/src/plans/augmented_lagrangian_plan.jl b/src/plans/augmented_lagrangian_plan.jl index e364764342..e62c1ad60f 100644 --- a/src/plans/augmented_lagrangian_plan.jl +++ b/src/plans/augmented_lagrangian_plan.jl @@ -1,5 +1,5 @@ @doc raw""" - AugmentedLagrangianCost{CO,R,T} + AugmentedLagrangianCost{CO,R,T} <: AbstractConstrainedFunctor Stores the parameters ``ρ ∈ ℝ``, ``μ ∈ ℝ^m``, ``λ ∈ ℝ^n`` of the augmented Lagrangian associated to the [`ConstrainedManifoldObjective`](@ref) `co`. @@ -25,7 +25,7 @@ number type used and ``T`` the vector type. AugmentedLagrangianCost(co, ρ, μ, λ) """ -mutable struct AugmentedLagrangianCost{CO,R,T} +mutable struct AugmentedLagrangianCost{CO,R,T} <: AbstractConstrainedFunctor{T} co::CO ρ::R μ::T @@ -35,14 +35,8 @@ function set_manopt_parameter!(alc::AugmentedLagrangianCost, ::Val{:ρ}, ρ) alc.ρ = ρ return alc end -function set_manopt_parameter!(alc::AugmentedLagrangianCost, ::Val{:μ}, μ) - alc.μ = μ - return alc -end -function set_manopt_parameter!(alc::AugmentedLagrangianCost, ::Val{:λ}, λ) - alc.λ = λ - return alc -end +get_manopt_parameter(alc::AugmentedLagrangianCost, ::Val{:ρ}) = alc.ρ + function (L::AugmentedLagrangianCost)(M::AbstractManifold, p) gp = get_inequality_constraint(M, L.co, p, :) hp = get_equality_constraint(M, L.co, p, :) @@ -56,7 +50,7 @@ function (L::AugmentedLagrangianCost)(M::AbstractManifold, p) end @doc raw""" - AugmentedLagrangianGrad{CO,R,T} + AugmentedLagrangianGrad{CO,R,T} <: AbstractConstrainedFunctor{T} Stores the parameters ``ρ ∈ ℝ``, ``μ ∈ ℝ^m``, ``λ ∈ ℝ^n`` of the augmented Lagrangian associated to the [`ConstrainedManifoldObjective`](@ref) `co`. @@ -81,7 +75,7 @@ number type used and ``T`` the vector type. AugmentedLagrangianGrad(co, ρ, μ, λ) """ -mutable struct AugmentedLagrangianGrad{CO,R,T} +mutable struct AugmentedLagrangianGrad{CO,R,T} <: AbstractConstrainedFunctor{T} co::CO ρ::R μ::T @@ -91,20 +85,11 @@ function (LG::AugmentedLagrangianGrad)(M::AbstractManifold, p) X = zero_vector(M, p) return LG(M, X, p) end - function set_manopt_parameter!(alg::AugmentedLagrangianGrad, ::Val{:ρ}, ρ) alg.ρ = ρ return alg end -function set_manopt_parameter!(alg::AugmentedLagrangianGrad, ::Val{:μ}, μ) - alg.μ = μ - return alg -end -function set_manopt_parameter!(alg::AugmentedLagrangianGrad, ::Val{:λ}, λ) - alg.λ = λ - return alg -end - +get_manopt_parameter(alg::AugmentedLagrangianGrad, ::Val{:ρ}) = alg.ρ # default, that is especially when the `grad_g` and `grad_h` are functions. function (LG::AugmentedLagrangianGrad)( M::AbstractManifold, X, p, range=NestedPowerRepresentation() diff --git a/src/plans/conjugate_residual_plan.jl b/src/plans/conjugate_residual_plan.jl new file mode 100644 index 0000000000..7331ec3806 --- /dev/null +++ b/src/plans/conjugate_residual_plan.jl @@ -0,0 +1,332 @@ +# +# +# Objective. +@doc raw""" + SymmetricLinearSystemObjective{E<:AbstractEvaluationType,TA,T} <: AbstractManifoldObjective{E} + +Model the objective + +```math +f(X) = \frac{1}{2} \lVert \mathcal A[X] + b \rVert_p^2,\qquad X ∈ T_p\mathcal M, +``` + +defined on the tangent space ``T_p\mathcal M`` at ``p`` on the manifold ``\mathcal M``. + +In other words this is an objective to solve ``\mathcal A(p)[X] = -b(p)`` +for some linear symmetric operator and a vector function. +Note the minus on the right hand side, which makes this objective especially tailored +for (iteratively) solving Newton-like equations. + +# Fields + +* `A!!`: a symmetric, linear operator on the tangent space +* `b!!`: a gradient function + +where `A!!` can work as an allocating operator `(M, p, X) -> Y` or an in-place one `(M, Y, p, X) -> Y`, +and similarly `b!!` can either be a function `(M, p) -> X` or `(M, X, p) -> X`. +The first variants allocate for the result, the second variants work in-place. + +# Constructor + + SymmetricLinearSystemObjective(A, b; evaluation=AllocatingEvaluation()) + +Generate the objective specifying whether the two parts work allocating or in-place. +""" +mutable struct SymmetricLinearSystemObjective{E<:AbstractEvaluationType,TA,T} <: + AbstractManifoldObjective{E} + A!!::TA + b!!::T +end + +function SymmetricLinearSystemObjective( + A::TA, b::T; evaluation::E=AllocatingEvaluation(), kwargs... +) where {TA,T,E<:AbstractEvaluationType} + return SymmetricLinearSystemObjective{E,TA,T}(A, b) +end + +function set_manopt_parameter!(slso::SymmetricLinearSystemObjective, symbol::Symbol, value) + set_manopt_parameter!(slso.A!!, symbol, value) + set_manopt_parameter!(slso.b!!, symbol, value) + return slso +end + +function get_cost( + TpM::TangentSpace, slso::SymmetricLinearSystemObjective{AllocatingEvaluation}, X +) + M = base_manifold(TpM) + p = base_point(TpM) + return 0.5 * norm(M, p, slso.A!!(M, p, X) + slso.b!!(M, p))^2 +end +function get_cost( + TpM::TangentSpace, slso::SymmetricLinearSystemObjective{InplaceEvaluation}, X +) + M = base_manifold(TpM) + p = base_point(TpM) + Y = zero_vector(M, p) + W = copy(M, p, Y) + slso.b!!(M, Y, p) + slso.A!!(M, W, p, X) + return 0.5 * norm(M, p, W + Y)^2 +end + +function get_b( + TpM::TangentSpace, slso::SymmetricLinearSystemObjective{AllocatingEvaluation}, X +) + return slso.b!!(base_manifold(TpM), base_point(TpM)) +end +function get_b( + TpM::TangentSpace, slso::SymmetricLinearSystemObjective{InplaceEvaluation}, X +) + M = base_manifold(TpM) + p = base_point(TpM) + Y = zero_vector(M, p) + return slso.b!!(M, Y, p) +end + +function get_gradient(TpM::TangentSpace, slso::SymmetricLinearSystemObjective, X) + p = base_point(TpM) + return get_hessian(TpM, slso, p, X) + get_b(TpM, slso, X) +end +function get_gradient!( + TpM::TangentSpace, Y, slso::SymmetricLinearSystemObjective{AllocatingEvaluation}, X +) + M = base_manifold(TpM) + p = base_point(TpM) + # Evaluate A[X] + b + Y .= slso.A!!(M, p, X) + slso.b!!(M, p) + return Y +end +function get_gradient!( + TpM::TangentSpace, Y, slso::SymmetricLinearSystemObjective{InplaceEvaluation}, X +) + M = base_manifold(TpM) + p = base_point(TpM) + W = copy(M, p, Y) + slso.b!!(M, Y, p) + slso.A!!(M, W, p, X) + Y .+= W + return Y +end + +# evaluate Hessian: ∇²Q(X) = A[X] +function get_hessian( + TpM::TangentSpace, slso::SymmetricLinearSystemObjective{AllocatingEvaluation}, X, V +) + return slso.A!!(base_manifold(TpM), base_point(TpM), V) +end +function get_hessian( + TpM::TangentSpace, slso::SymmetricLinearSystemObjective{InplaceEvaluation}, X, V +) + M = base_manifold(TpM) + p = base_point(TpM) + W = copy(M, p, V) + slso.A!!(M, W, p, V) + return W +end +function get_hessian!( + TpM::TangentSpace, W, slso::SymmetricLinearSystemObjective{AllocatingEvaluation}, X, V +) + M = base_manifold(TpM) + p = base_point(TpM) + copyto!(M, W, p, slso.A!!(M, p, V)) + return W +end +function get_hessian!( + TpM::TangentSpace, W, slso::SymmetricLinearSystemObjective{InplaceEvaluation}, X, V +) + return slso.A!!(base_manifold(TpM), W, base_point(TpM), V) +end + +# +# +# State +@doc raw""" + ConjugateResidualState{T,R,TStop<:StoppingCriterion} <: AbstractManoptSolverState + +A state for the [`conjugate_residual`](@ref) solver. + +# Fields + +* `X::T`: the iterate +* `r::T`: the residual ``r = -b(p) - \mathcal A(p)[X]`` +* `d::T`: the conjugate direction +* `Ar::T`, `Ad::T`: storages for ``\mathcal A`` +* `rAr::R`: internal field for storing ``⟨ r, \mathcal A(p)[r] ⟩`` +* `α::R`: a step length +* `β::R`: the conjugate coefficient +* `stop::TStop`: a [`StoppingCriterion`](@ref) for the solver + +# Constructor + + function ConjugateResidualState( + TpM::TangentSpace, + slso::SymmetricLinearSystemObjective; + X=rand(TpM), + r=-get_gradient(TpM, slso, X), + d=copy(TpM, r), + Ar=get_hessian(TpM, slso, X, r), + Ad=copy(TpM, Ar), + α::R=0.0, + β::R=0.0, + stopping_criterion=StopAfterIteration(manifold_dimension(TpM)) | + StopWhenGradientNormLess(1e-8), + kwargs..., + ) + + Initialise the state with default values. +""" +mutable struct ConjugateResidualState{T,R,TStop<:StoppingCriterion} <: + AbstractManoptSolverState + X::T + r::T + d::T + Ar::T + Ad::T + rAr::R + α::R + β::R + stop::TStop + function ConjugateResidualState( + TpM::TangentSpace, + slso::SymmetricLinearSystemObjective; + X::T=rand(TpM), + r::T=-get_gradient(TpM, slso, X), + d::T=copy(TpM, r), + Ar::T=get_hessian(TpM, slso, X, r), + Ad::T=copy(TpM, Ar), + α::R=0.0, + β::R=0.0, + stopping_criterion::SC=StopAfterIteration(manifold_dimension(TpM)) | + StopWhenGradientNormLess(1e-8), + kwargs..., + ) where {T,R,SC<:StoppingCriterion} + M = base_manifold(TpM) + p = base_point(TpM) + crs = new{T,R,SC}() + crs.X = X + crs.r = r + crs.d = d + crs.Ar = Ar + crs.Ad = Ad + crs.α = α + crs.β = β + crs.rAr = zero(R) + crs.stop = stopping_criterion + return crs + end +end + +get_iterate(crs::ConjugateResidualState) = crs.X +function set_iterate!(crs::ConjugateResidualState, ::AbstractManifold, X) + crs.X = X + return crs +end + +get_gradient(crs::ConjugateResidualState) = crs.r +function set_gradient!(crs::ConjugateResidualState, ::AbstractManifold, r) + crs.r = r + return crs +end + +function show(io::IO, crs::ConjugateResidualState) + i = get_count(crs, :Iterations) + Iter = (i > 0) ? "After $i iterations\n" : "" + Conv = indicates_convergence(crs.stop) ? "Yes" : "No" + s = """ + # Solver state for `Manopt.jl`s Conjugate Residual Method + $Iter + ## Parameters + * α: $(crs.α) + * β: $(crs.β) + + ## Stopping criterion + $(status_summary(crs.stop)) + + This indicates convergence: $Conv + """ + return print(io, s) +end + +# +# +# Stopping Criterion +@doc raw""" + StopWhenRelativeResidualLess <: StoppingCriterion + +Stop when re relative residual in the [`conjugate_residual`](@ref) +is below a certain threshold, i.e. + +```math + \frac{\lVert r^{(k)} \rVert}{c} ≤ ε, +``` + +where ``c = \lVert b \rVert`` of the initial vector from the vector field in ``\mathcal A(p)[X] + b(p) = 0_p``, +from the [`conjugate_residual`](@ref) + +# Fields + +* `at_iteration` indicates at which iteration (including `i=0`) the stopping criterion + was fulfilled and is `-1` while it is not fulfilled. +* `c`: the initial norm +* `ε`: the threshold +* `norm_rk`: the last computed norm of the residual + +# Constructor + + StopWhenRelativeResidualLess(c, ε; norm_r = 2*c*ε) + +Initialise the stopping criterion. + +!!! note + + The initial norm of the vector field ``c = \lVert b \rVert`` + that is stored internally is updated on initialisation, that is, + if this stopping criterion is called with `k<=0`. +""" +mutable struct StopWhenRelativeResidualLess{R} <: StoppingCriterion + c::R + ε::R + norm_r::R + at_iteration::Int + function StopWhenRelativeResidualLess(c::R, ε::R; norm_r::R=2 * c * ε) where {R} + return new{R}(c, ε, norm_r, -1) + end +end +function (swrr::StopWhenRelativeResidualLess)( + amp::AbstractManoptProblem{<:TangentSpace}, crs::ConjugateResidualState, k::Int +) + TpM = get_manifold(amp) + M = base_manifold(TpM) + p = base_point(TpM) + #compute current r-norm + swrr.norm_r = norm(M, p, crs.r) + if k <= 0 + # on init also update the right hand side norm + swrr.c = norm(M, p, get_b(TpM, get_objective(amp), crs.X)) + return false # just init the norm, but do not stop + end + # now k > 0 + if swrr.norm_r / swrr.c < swrr.ε #residual small enough + swrr.at_iteration = k + return true + end + return false +end +function get_reason(swrr::StopWhenRelativeResidualLess) + if (swrr.at_iteration >= 0) + return "After iteration #$(swrr.at_iteration) the algorithm stopped with a relative residual $(swrr.norm_r/swrr.c) < $(swrr.ε).\n" + end + return "" +end +function status_summary(swrr::StopWhenRelativeResidualLess) + has_stopped = (swrr.at_iteration >= 0) + s = has_stopped ? "reached" : "not reached" + return "‖r^(k)‖ / c < ε:\t$s" +end +indicates_convergence(::StopWhenRelativeResidualLess) = true +function show(io::IO, swrr::StopWhenRelativeResidualLess) + return print( + io, + "StopWhenRelativeResidualLess($(swrr.c), $(swrr.ε))\n $(status_summary(swrr))", + ) +end diff --git a/src/plans/constrained_plan.jl b/src/plans/constrained_plan.jl index fce9c1fb61..65cb7609fa 100644 --- a/src/plans/constrained_plan.jl +++ b/src/plans/constrained_plan.jl @@ -1,3 +1,64 @@ + +""" + AbstractConstrainedFunctor{T} + +A common supertype for fucntors that model constraint functions. + +This supertype provides access for the fields ``λ`` and ``μ``, the dual variables of +constraintsnof type `T`. +""" +abstract type AbstractConstrainedFunctor{T} end + +function set_manopt_parameter!( + acf::AbstractConstrainedFunctor{T}, ::Val{:μ}, μ::T +) where {T} + acf.μ = μ + return acf +end +get_manopt_parameter(acf::AbstractConstrainedFunctor, ::Val{:μ}) = acf.μ +function set_manopt_parameter!( + acf::AbstractConstrainedFunctor{T}, ::Val{:λ}, λ::T +) where {T} + acf.λ = λ + return acf +end +get_manopt_parameter(acf::AbstractConstrainedFunctor, ::Val{:λ}) = acf.λ + +""" + AbstractConstrainedSlackFunctor{T,R} + +A common supertype for fucntors that model constraint functions with slack. + +This supertype additionally provides access for the fields +* `μ::T` the dual for the inequality constraints +* `s::T` the slack parametyer, and +* `β::R` the the barrier parameter +which is also of typee `T`. +""" +abstract type AbstractConstrainedSlackFunctor{T,R} end + +function set_manopt_parameter!( + acsf::AbstractConstrainedSlackFunctor{T}, ::Val{:s}, s::T +) where {T} + acsf.s = s + return acsf +end +get_manopt_parameter(acsf::AbstractConstrainedSlackFunctor, ::Val{:s}) = acsf.s +function set_manopt_parameter!( + acsf::AbstractConstrainedSlackFunctor{T}, ::Val{:μ}, μ::T +) where {T} + acsf.μ = μ + return acsf +end +get_manopt_parameter(acsf::AbstractConstrainedSlackFunctor, ::Val{:μ}) = acsf.μ +function set_manopt_parameter!( + acsf::AbstractConstrainedSlackFunctor{T,R}, ::Val{:β}, β::R +) where {T,R} + acsf.β = β + return acsf +end +get_manopt_parameter(acsf::AbstractConstrainedSlackFunctor, ::Val{:β}) = acsf.β + @doc raw""" ConstrainedManifoldObjective{T<:AbstractEvaluationType, C<:ConstraintType} <: AbstractManifoldObjective{T} @@ -296,7 +357,7 @@ problem is for. hessian_inequality_range=range ) -Creates a constrained Manopt problem specifying an [`AbstractPowerRepresentation`](@ref) +Creates a constrained Manopt problem specifying an [`AbstractPowerRepresentation`](@extref ManifoldsBase.AbstractPowerRepresentation) for both the `gradient_equality_range` and the `gradient_inequality_range`, respectively. """ struct ConstrainedManoptProblem{ @@ -343,6 +404,160 @@ end get_manifold(cmp::ConstrainedManoptProblem) = cmp.manifold get_objective(cmp::ConstrainedManoptProblem) = cmp.objective +@doc raw""" + LagrangianCost{CO,T} <: AbstractConstrainedFunctor{T} + +Implement the Lagrangian of a [`ConstrainedManifoldObjective`](@ref) `co`. + +```math +\mathcal L(p; μ, λ) += f(p) + \sum_{i=1}^m μ_ig_i(p) + \sum_{j=1}^n λ_jh_j(p) +``` + +# Fields + +* `co::CO`, `μ::T`, `λ::T` as mentioned, where `T` represents a vector type. + +# Constructor + + LagrangianCost(co, μ, λ) + +Create a functor for the Lagrangian with fixed dual variables. + +# Example + +When you directly want to evaluate the Lagrangian ``\mathcal L`` +you can also call + +``` +LagrangianCost(co, μ, λ)(M,p) +``` +""" +mutable struct LagrangianCost{CO,T} <: AbstractConstrainedFunctor{T} + co::CO + μ::T + λ::T +end +function (lc::LagrangianCost)(M, p) + c = get_cost(M, lc.co, p) + g = get_inequality_constraint(M, lc.co, p, :) + h = get_equality_constraint(M, lc.co, p, :) + (length(g) > 0) && (c += sum(lc.μ .* g)) + (length(h) > 0) && (c += sum(lc.λ .* h)) + return c +end +function show(io::IO, lc::LagrangianCost) + return print(io, "LagrangianCost\n\twith μ=$(lc.μ), λ=$(lc.λ)") +end + +@doc raw""" + LagrangianGradient{CO,T} + +The gradient of the Lagrangian of a [`ConstrainedManifoldObjective`](@ref) `co` +with respect to the variable ``p``. The formula reads + +```math +\operatorname{grad}_p \mathcal L(p; μ, λ) += \operatorname{grad} f(p) + \sum_{i=1}^m μ_i \operatorname{grad} g_i(p) + \sum_{j=1}^n λ_j \operatorname{grad} h_j(p) +``` + +# Fields + +* `co::CO`, `μ::T`, `λ::T` as mentioned, where `T` represents a vector type. + +# Constructor + + LagrangianGradient(co, μ, λ) + +Create a functor for the Lagrangian with fixed dual variables. + +# Example + +When you directly want to evaluate the gradient of the Lagrangian ``\operatorname{grad}_p \mathcal L`` +you can also call `LagrangianGradient(co, μ, λ)(M,p)` or `LagrangianGradient(co, μ, λ)(M,X,p)` for the in-place variant. +""" +mutable struct LagrangianGradient{CO,T} <: AbstractConstrainedFunctor{T} + co::CO + μ::T + λ::T +end +function (lg::LagrangianGradient)(M, p) + X = zero_vector(M, p) + return lg(M, X, p) +end +function (lg::LagrangianGradient)(M, X, p) + Y = copy(M, p, X) + get_gradient!(M, X, lg.co, p) + m = inequality_constraints_length(lg.co) + n = equality_constraints_length(lg.co) + for i in 1:m + get_grad_inequality_constraint!(M, Y, lg.co, p, i) + copyto!(M, X, p, X + lg.μ[i] * Y) + end + for j in 1:n + get_grad_equality_constraint!(M, Y, lg.co, p, j) + copyto!(M, X, p, X + lg.λ[j] * Y) + end + return X +end +function show(io::IO, lg::LagrangianGradient) + return print(io, "LagrangianGradient\n\twith μ=$(lg.μ), λ=$(lg.λ)") +end + +@doc raw""" + LagrangianHessian{CO, V, T} + +The Hesian of the Lagrangian of a [`ConstrainedManifoldObjective`](@ref) `co` +with respect to the variable ``p``. The formula reads + +```math +\operatorname{Hess}_p \mathcal L(p; μ, λ)[X] += \operatorname{Hess} f(p) + \sum_{i=1}^m μ_i \operatorname{Hess} g_i(p)[X] + \sum_{j=1}^n λ_j \operatorname{Hess} h_j(p)[X] +``` + +# Fields + +* `co::CO`, `μ::T`, `λ::T` as mentioned, where `T` represents a vector type. + +# Constructor + + LagrangianHessian(co, μ, λ) + +Create a functor for the Lagrangian with fixed dual variables. + +# Example + +When you directly want to evaluate the Hessian of the Lagrangian ``\operatorname{Hess}_p \mathcal L`` +you can also call `LagrangianHessian(co, μ, λ)(M, p, X)` or `LagrangianHessian(co, μ, λ)(M, Y, p, X)` for the in-place variant. +""" +mutable struct LagrangianHessian{CO,T} <: AbstractConstrainedFunctor{T} + co::CO + μ::T + λ::T +end +function (lH::LagrangianHessian)(M, p, X) + Y = zero_vector(M, p) + return lH(M, Y, p, X) +end +function (lH::LagrangianHessian)(M, Y, p, X) + Z = copy(M, p, X) + get_hessian!(M, Y, lH.co, p, X) + n = inequality_constraints_length(lH.co) + m = equality_constraints_length(lH.co) + for i in 1:n + get_hess_inequality_constraint!(M, Z, lH.co, p, X, i) + copyto!(M, Y, p, Y + lH.μ[i] * Z) + end + for j in 1:m + get_hess_equality_constraint!(M, Z, lH.co, p, X, j) + copyto!(M, Y, p, Y + lH.λ[j] * Z) + end + return Y +end +function show(io::IO, lh::LagrangianHessian) + return print(io, "LagrangianHessian\n\twith μ=$(lh.μ), λ=$(lh.λ)") +end + @doc raw""" equality_constraints_length(co::ConstrainedManifoldObjective) @@ -640,7 +855,7 @@ function get_grad_inequality_constraint!( j=:, range::AbstractPowerRepresentation=NestedPowerRepresentation(), ) - isnothing(co.equality_constraints) && (return X) + isnothing(co.inequality_constraints) && (return X) return get_gradient!(M, X, co.inequality_constraints, p, j, range) end @@ -826,21 +1041,93 @@ function get_hess_inequality_constraint!( j=:, range::AbstractPowerRepresentation=NestedPowerRepresentation(), ) - isnothing(co.equality_constraints) && (return X) + isnothing(co.inequality_constraints) && (return X) return get_hessian!(M, Y, co.inequality_constraints, p, X, j, range) end @doc raw""" - inequality_constraints_length(co::ConstrainedManifoldObjective) + inequality_constraints_length(cmo::ConstrainedManifoldObjective) -Return the number of inequality constraints of an [`ConstrainedManifoldObjective`](@ref). +Return the number of inequality constraints of an [`ConstrainedManifoldObjective`](@ref) `cmo`. This acts transparently through [`AbstractDecoratedManifoldObjective`](@ref)s """ -function inequality_constraints_length(co::ConstrainedManifoldObjective) - return isnothing(co.inequality_constraints) ? 0 : length(co.inequality_constraints) +function inequality_constraints_length(cmo::ConstrainedManifoldObjective) + return isnothing(cmo.inequality_constraints) ? 0 : length(cmo.inequality_constraints) +end +function inequality_constraints_length(admo::AbstractDecoratedManifoldObjective) + return inequality_constraints_length(get_objective(admo, false)) end -function inequality_constraints_length(co::AbstractDecoratedManifoldObjective) - return inequality_constraints_length(get_objective(co, false)) + +@doc raw""" + is_feasible(M::AbstractManifold, cmo::ConstrainedManifoldObjective, p, kwargs...) + +Evaluate whether a boint `p` on `M` is feasible with respect to the [`ConstrainedManifoldObjective`](@ref) `cmo`. +That is for the provided inequality constaints ``g: \mathcal M → ℝ^m`` and equality constaints ``h: \mathcal M \to ℝ^m`` +from within `cmo`, the point ``p ∈ \mathcal M`` is feasible if +```math +g_i(p) ≤ 0, \text{ for all } i=1,…,m\quad\text{ and }\quad h_j(p) = 0, \text{ for all } j=1,…,n. +``` + +# Keyword arguments +* `check_point::Bool=true`: whether to also verify that ``p∈\mathcal M` holds, using [`is_point`](@extref ManifoldsBase.is_point) +* `error::Symbol=:none`: if the point is not feasible, this symbol determines how to report the error. + * `:error`: throws an error + * `:info`: displays the error message as an @info + * `:none`: (default) the function just returns true/false + * `:warn`: displays the error message as a @warning. + +The keyword `error=` and all other `kwargs...` are passed on to [`is_point`](@extref ManifoldsBase.is_point) if the point is verfied (see `check_point`). + +All other keywords are passed on to `is_poi` +""" +function is_feasible(M, cmo, p; check_point::Bool=true, error::Symbol=:none, kwargs...) + v = !check_point || is_point(M, p; error=error) + g = get_inequality_constraint(M, cmo, p, :) + h = get_equality_constraint(M, cmo, p, :) + feasible = v && all(g .<= 0) && all(h .== 0) + # if we are feasible or no error shall be generated + ((error === :none) || feasible) && return feasible + # collect information about infeasibily + if (error === :info) || (error === :warn) || (error === :error) + s = get_feasibility_status(M, cmo, p; g=g, h=h) + (error === :error) && throw(ErrorException(s)) + (error === :info) && @info s + (error === :warn) && @warn s + end + return feasible +end + +@doc raw""" + get_feasibility_status( + M::AbstractManifold, + cmo::ConstrainedManifoldObjective, + g = get_inequality_constraints(M, cmo, p), + h = get_equality_constraints(M, cmo, p), + ) + +Generate a message about the feasibiliy of `p` with respect to the [`ConstrainedManifoldObjective`](@ref). +You can also provide the evaluated vectors for the values of `g` and `h` as keyword arguments, +in case you had them evaluated before. +""" +function get_feasibility_status( + M, + cmo, + p; + g=get_inequality_constraints(M, cmo, p), + h=get_equality_constraints(M, cmo, p), +) + g_violated = sum(g .> 0) + h_violated = sum(h .!= 0) + return """ + The point $p on $M is not feasible for the provided constants. + + * There are $(g_violated) of $(length(g)) inequality constraints violated. $( + g_violated > 0 ? "The sum of violation is $(sum(max.(g,Ref(0))))." : "" + ) + * There are $(h_violated) of $(length(h)) equality constraints violated. $( + h_violated > 0 ? "The sum of violation is $(sum(abs.(h)))." : "" + ) + """ end function Base.show( diff --git a/src/plans/debug.jl b/src/plans/debug.jl index dccba59f3f..340323aad7 100644 --- a/src/plans/debug.jl +++ b/src/plans/debug.jl @@ -382,6 +382,83 @@ function show(io::IO, di::DebugEntry) return print(io, "DebugEntry(:$(di.field); format=\"$(escape_string(di.format))\")") end +""" + DebugFeasibility <: DebugAction + +Display information about the feasibility of the current iterate + +# Fields +* `atol`: absolute tolerance for when either equality or inequality constraints are counted as violated +* `format`: a vector of symbols and string formatting the output +* `io`: default stream to print the debug to. + +The following symbols are filled with values + +* `:Feasbile` display true or false depending on whether the iterate is feasible +* `:FeasbileEq` display `=` or `≠` equality constraints are fulfilled or not +* `:FeasbileInEq` display `≤` or `≰` inequality constraints are fulfilled or not +* `:NumEq` display the number of equality constraints infeasible +* `:NumEqNz` display the number of equality constraints infeasible if exists +* `:NumIneq` display the number of inequality constraints infeasible +* `:NumIneqNz` display the number of inequality constraints infeasible if exists +* `:TotalEq` display the sum of how much the equality constraints are violated +* `:TotalInEq` display the sum of how much the inequality constraints are violated + +format to print the output. + +# Constructor + +DebugFeasibility( + format=["feasible: ", :Feasible]; + io::IO=stdout, + atol=1e-13 +) + +""" +mutable struct DebugFeasibility <: DebugAction + atol::Float64 + format::Vector{Union{String,Symbol}} + io::IO + function DebugFeasibility(format=["feasible: ", :Feasible]; io::IO=stdout, atol=1e-13) + return new(atol, format, io) + end +end +function (d::DebugFeasibility)( + mp::AbstractManoptProblem, st::AbstractManoptSolverState, k::Int +) + s = "" + p = get_iterate(st) + eqc = get_equality_constraint(mp, p, :) + eqc_nz = eqc[abs.(eqc) .> d.atol] + ineqc = get_inequality_constraint(mp, p, :) + ineqc_pos = ineqc[ineqc .> d.atol] + feasible = (length(eqc_nz) == 0) && (length(ineqc_pos) == 0) + n_eq = length(eqc_nz) + n_ineq = length(ineqc_pos) + for f in d.format + (f isa String) && (s *= f) + (f === :Feasible) && (s *= feasible ? "Yes" : "No") + (f === :FeasibleEq) && (s *= n_eq == 0 ? "=" : "≠") + (f === :FeasibleIneq) && (s *= n_ineq == 0 ? "≤" : "≰") + (f === :NumEq) && (s *= "$(n_eq)") + (f === :NumEqNz) && (s *= n_eq == 0 ? "" : "$(n_eq)") + (f === :NumIneq) && (s *= "$(n_ineq)") + (f === :NumIneqNz) && (s *= n_ineq == 0 ? "" : "$(n_ineq)") + (f === :TotalEq) && (s *= "$(sum(abs.(eqc_nz);init=0.0))") + (f === :TotalInEq) && (s *= "$(sum(ineq_pos;init=0.0))") + end + print(d.io, (k > 0) ? s : "") + return nothing +end +function show(io::IO, d::DebugFeasibility) + sf = "[" * (join([e isa String ? "\"$e\"" : ":$e" for e in d.format], ", ")) * "]" + return print(io, "DebugFeasibility($sf; atol=$(d.atol))") +end +function status_summary(d::DebugFeasibility) + sf = "[" * (join([e isa String ? "\"$e\"" : ":$e" for e in d.format], ", ")) * "]" + return "(:Feasibility, $sf)" +end + @doc raw""" DebugIfEntry <: DebugAction @@ -1220,8 +1297,8 @@ Note that the Shortcut symbols should all start with a capital letter. * `:WarnGradient` creates a [`DebugWarnIfFieldNotFinite`](@ref) for the `::Gradient`. * `:WarnBundle` creates a [`DebugWarnIfLagrangeMultiplierIncreases`](@ref) * `:Time` creates a [`DebugTime`](@ref) -* `:WarningMessages`creates a [`DebugMessages`](@ref)`(:Warning)` -* `:InfoMessages`creates a [`DebugMessages`](@ref)`(:Info)` +* `:WarningMessages` creates a [`DebugMessages`](@ref)`(:Warning)` +* `:InfoMessages` creates a [`DebugMessages`](@ref)`(:Info)` * `:ErrorMessages` creates a [`DebugMessages`](@ref)`(:Error)` * `:Messages` creates a [`DebugMessages`](@ref)`()` (the same as `:InfoMessages`) @@ -1235,6 +1312,7 @@ function DebugActionFactory(d::Symbol) (d == :GradientNorm) && return DebugGradientNorm() (d == :Iterate) && return DebugIterate() (d == :Iteration) && return DebugIteration() + (d == :Feasibility) && return DebugFeasibility() (d == :Stepsize) && return DebugStepsize() (d == :Stop) && return DebugStoppingCriterion() (d == :WarnBundle) && return DebugWarnIfLagrangeMultiplierIncreases() @@ -1272,9 +1350,10 @@ Note that the Shortcut symbols `t[1]` should all start with a capital letter. any other symbol creates a `DebugEntry(s)` to print the entry (o.:s) from the options. """ -function DebugActionFactory(t::Tuple{Symbol,String}) +function DebugActionFactory(t::Tuple{Symbol,Any}) (t[1] == :Change) && return DebugChange(; format=t[2]) (t[1] == :Cost) && return DebugCost(; format=t[2]) + (t[1] == :Feasibility) && return DebugFeasibility(t[2]) (t[1] == :Gradient) && return DebugGradient(; format=t[2]) (t[1] == :GradientChange) && return DebugGradientChange(; format=t[2]) (t[1] == :GradientNorm) && return DebugGradientNorm(; format=t[2]) diff --git a/src/plans/interior_point_Newton_plan.jl b/src/plans/interior_point_Newton_plan.jl new file mode 100644 index 0000000000..833b6694d0 --- /dev/null +++ b/src/plans/interior_point_Newton_plan.jl @@ -0,0 +1,984 @@ +""" + StepsizeState{P,T} <: AbstractManoptSolverState + +A state to store a point and a descent direction used within a linesearch, +if these are different from the iterate and search direction of the main solver. + +# Fields + +* `p::P`: a point on a manifold +* `X::T`: a tangent vector at `p`. + +# Constructor + + StepsizeState(p,X) + +# See also + +[`interior_point_Newton`](@ref) +""" +struct StepsizeState{P,T} <: AbstractManoptSolverState + p::P + X::T +end +get_iterate(s::StepsizeState) = s.p +get_gradient(s::StepsizeState) = s.X +set_iterate!(s::StepsizeState, M, p) = copyto!(M, s.p, p) +set_gradient!(s::StepsizeState, M, p, X) = copyto!(M, s.X, p, X) + +@doc raw""" + InteriorPointNewtonState <: AbstractHessianSolverState + +# Fields + +* `λ`: the Lagrange multiplier with respect to the equality constraints +* `μ`: the Lagrange multiplier with respect to the inequality constraints +* `p`: the current iterate +* `s`: the current slack variable +* `sub_problem`: an [`AbstractManoptProblem`](@ref) problem for the subsolver +* `sub_state`: an [`AbstractManoptSolverState`](@ref) for the subsolver +* `X`: the current gradient with respct to `p` +* `Y`: the current gradient with respct to `μ` +* `Z`: the current gradient with respct to `λ` +* `W`: the current gradient with respct to `s` +* `ρ`: store the orthogonality `μ's/m` to compute the barrier parameter `β` in the sub problem +* `σ`: scaling factor for the barrier parameter `β` in the sub problem +* `stop`: a [`StoppingCriterion`](@ref) indicating when to stop. +* `retraction_method`: the retraction method to use on `M`. +* `stepsize::`[`Stepsize`](@ref): the stepsize to use +* `step_problem`: an [`AbstractManoptProblem`](@ref) storing the manifold and objective for the line search +* `step_state`: storing iterate and search direction in a state for the line search, see [`StepsizeState`](@ref) + +# Constructor + + InteriorPointNewtonState( + M::AbstractManifold, + cmo::ConstrainedManifoldObjective, + p, + sub_problem::Pr, + sub_state::St; + kwargs... + ) + +Initialize the state, where both the [`AbstractManifold`](@extref `ManifoldsBase.AbstractManifold`) and the [`ConstrainedManifoldObjective`](@ref) +are used to fill in reasonable defaults for the keywords. + +# Input + +* `M`: a Riemannian manifold +* `cmo`: a [`ConstrainedManifoldObjective`](@ref) +* `p`: a point on `M` as the inital point of the algorithm +* `sub_problem`: an [`AbstractManoptProblem`](@ref) problem for the sub solver +* `sub_state`: an [`AbstractManoptSolverState`](@ref) for the sub solver + +# Keyword arguments + +Let `m` and `n` denote the number of inequality and equality constraints, respectively + +* `μ=ones(m)` +* `X=`[`zero_vector`](@extref `ManifoldsBase.zero_vector-Tuple{AbstractManifold, Any}`)`(M,p)` +* `Y=zero(μ)` +* `λ=zeros(n)` +* `Z=zero(λ)` +* `s=ones(m)` +* `W=zero(s)` +* `ρ=μ's/m` +* `σ=`[`calculate_σ`](@ref)`(M, cmo, p, μ, λ, s)` +* `stopping_criterion=`[`StopAfterIteration`](@ref)`(200)`[` | `](@ref StopWhenAny)[`StopWhenChangeLess`](@ref)`(1e-8)` +* `retraction_method=default_retraction_method(M, typeof(p))` +* `step_objective=`[`ManifoldGradientObjective`](@ref)`(`[`KKTVectorFieldNormSq`](@ref)`(cmo)`, [`KKTVectorFieldNormSqGradient`](@ref)`(cmo)`; evaluation=[`InplaceEvaluation`](@ref)`())` +* `vector_space=`[`Rn`](@ref Manopt.Rn): a function that, given an integer, returns the manifold to be used for the vector space components ``ℝ^m,ℝ^n`` +* `step_problem`: wrap the manifold ``\mathcal M × ℝ^m × ℝ^n × ℝ^m`` +* `step_state`: the [`StepsizeState`](@ref) with point and search direction +* `stepsize`: an [`ArmijoLinesearch`](@ref) with the [`InteriorPointCentralityCondition`](@ref) as + additional condition to accept a step. Note that this step size operates on its own `step_problem`and `step_state` + +and internally `_step_M` and `_step_p` for the manifold and point in the stepsize. +""" +mutable struct InteriorPointNewtonState{ + P, + T, + Pr<:Union{AbstractManoptProblem,F} where {F}, + St<:AbstractManoptSolverState, + V, + R, + SC<:StoppingCriterion, + TRTM<:AbstractRetractionMethod, + TStepsize<:Stepsize, + TStepPr<:AbstractManoptProblem, + TStepSt<:AbstractManoptSolverState, +} <: AbstractHessianSolverState + p::P + X::T + sub_problem::Pr + sub_state::St + μ::V + λ::V + s::V + Y::V + Z::V + W::V + ρ::R + σ::R + stop::SC + retraction_method::TRTM + stepsize::TStepsize + step_problem::TStepPr + step_state::TStepSt + function InteriorPointNewtonState( + M::AbstractManifold, + cmo::ConstrainedManifoldObjective, + p::P, + sub_problem::Pr, + sub_state::Union{AbstractEvaluationType,AbstractManoptSolverState}; + X::T=zero_vector(M, p), + μ::V=ones(length(get_inequality_constraint(M, cmo, p, :))), + Y::V=zero(μ), + λ::V=zeros(length(get_equality_constraint(M, cmo, p, :))), + Z::V=zero(λ), + s::V=ones(length(get_inequality_constraint(M, cmo, p, :))), + W::V=zero(s), + ρ::R=μ's / length(get_inequality_constraint(M, cmo, p, :)), + σ::R=calculate_σ(M, cmo, p, μ, λ, s), + stopping_criterion::SC=StopAfterIteration(200) | StopWhenChangeLess(1e-8), + retraction_method::RTM=default_retraction_method(M), + step_objective=ManifoldGradientObjective( + KKTVectorFieldNormSq(cmo), + KKTVectorFieldNormSqGradient(cmo); + evaluation=InplaceEvaluation(), + ), + vector_space=Rn, + _step_M=M × vector_space(length(μ)) × vector_space(length(λ)) × + vector_space(length(s)), + step_problem::StepPr=DefaultManoptProblem(_step_M, step_objective), + _step_p=rand(_step_M), + step_state::StepSt=StepsizeState(_step_p, zero_vector(_step_M, _step_p)), + centrality_condition::F=(N, p) -> true, # Todo + stepsize::S=ArmijoLinesearch( + get_manifold(step_problem); + retraction_method=default_retraction_method(get_manifold(step_problem)), + initial_stepsize=1.0, + additional_decrease_condition=centrality_condition, + ), + kwargs..., + ) where { + P, + T, + Pr, + V, + R, + F, + SC<:StoppingCriterion, + StepPr<:AbstractManoptProblem, + StepSt<:AbstractManoptSolverState, + RTM<:AbstractRetractionMethod, + S<:Stepsize, + } + sub_state_storage = maybe_wrap_evaluation_type(sub_state) + ips = new{P,T,Pr,typeof(sub_state_storage),V,R,SC,RTM,S,StepPr,StepSt}() + ips.p = p + ips.sub_problem = sub_problem + ips.sub_state = sub_state_storage + ips.μ = μ + ips.λ = λ + ips.s = s + ips.ρ = ρ + ips.σ = σ + ips.X = X + ips.Y = Y + ips.Z = Z + ips.W = W + ips.stop = stopping_criterion + ips.retraction_method = retraction_method + ips.stepsize = stepsize + ips.step_problem = step_problem + ips.step_state = step_state + return ips + end +end + +# get & set iterate +get_iterate(ips::InteriorPointNewtonState) = ips.p +function set_iterate!(ips::InteriorPointNewtonState, ::AbstractManifold, p) + ips.p = p + return ips +end +# get & set gradient (not sure if needed?) +get_gradient(ips::InteriorPointNewtonState) = ips.X +function set_gradient!(ips::InteriorPointNewtonState, ::AbstractManifold, X) + ips.X = X + return ips +end +# only message on stepsize for now +function get_message(ips::InteriorPointNewtonState) + return get_message(ips.stepsize) +end +# pretty print state info +function show(io::IO, ips::InteriorPointNewtonState) + i = get_count(ips, :Iterations) + Iter = (i > 0) ? "After $i iterations\n" : "" + Conv = indicates_convergence(ips.stop) ? "Yes" : "No" + s = """ + # Solver state for `Manopt.jl`s Interior Point Newton Method + $Iter + ## Parameters + * ρ: $(ips.ρ) + * σ: $(ips.σ) + * retraction method: $(ips.retraction_method) + + ## Stopping criterion + $(status_summary(ips.stop)) + ## Stepsize + $(ips.stepsize) + This indicates convergence: $Conv + """ + return print(io, s) +end + +# +# Constraint functors +# + +@doc raw""" + CondensedKKTVectorField{O<:ConstrainedManifoldObjective,T,R} <: AbstractConstrainedSlackFunctor{T,R} + +Fiven the constrained optimixzation problem + +```math +\begin{aligned} +\min_{p ∈\mathcal{M}} &f(p)\\ +\text{subject to } &g_i(p)\leq 0 \quad \text{ for } i= 1, …, m,\\ +\quad &h_j(p)=0 \quad \text{ for } j=1,…,n, +\end{aligned} +``` + +Then reformulating the KKT conditions of the Lagrangian +from the optimality conditions of the Lagrangian + +```math +\mathcal L(p, μ, λ) = f(p) + \sum_{j=1}^n λ_jh_j(p) + \sum_{i=1}^m μ_ig_i(p) +``` + +in a perturbed / barrier method in a condensed form +using a slack variable ``s ∈ ℝ^m`` and a barrier parameter ``β`` +and the Riemannian gradient of the Lagrangian with respect to the first parameter +``\operatorname{grad}_p L(p, μ, λ)``. + +Let ``\mathcal N = \mathcal M × ℝ^n``. We obtain the linear system + +```math +\mathcal A(p,λ)[X,Y] = -b(p,λ),\qquad \text{where } (X,Y) ∈ T_{(p,λ)}\mathcal N +``` + +where ``\mathcal A: T_{(p,λ)}\mathcal N → T_{(p,λ)}\mathcal N`` is a linear operator and +this struct models the right hand side ``b(p,λ) ∈ T_{(p,λ)}\mathcal M`` given by + +```math +b(p,λ) = \begin{pmatrix} +\operatorname{grad} f(p) ++ \displaystyle\sum_{j=1}^n λ_j \operatorname{grad} h_j(p) ++ \displaystyle\sum_{i=1}^m μ_i \operatorname{grad} g_i(p) ++ \displaystyle\sum_{i=1}^m \frac{μ_i}{s_i}\bigl( + μ_i(g_i(p)+s_i) + β - μ_is_i +\bigr)\operatorname{grad} g_i(p)\\ +h(p) +\end{pmatrix} +``` + +# Fields + +* `cmo` the [`ConstrainedManifoldObjective`](@ref) +* `μ::T` the vector in ``ℝ^m`` of coefficients for the inequality constraints +* `s::T` the vector in ``ℝ^m`` of sclack variables +* `β::R` the barrier parameter ``β∈ℝ`` + +# Constructor + + CondensedKKTVectorField(cmo, μ, s, β) +""" +mutable struct CondensedKKTVectorField{O<:ConstrainedManifoldObjective,T,R} <: + AbstractConstrainedSlackFunctor{T,R} + cmo::O + μ::T + s::T + β::R +end +function (cKKTvf::CondensedKKTVectorField)(N, q) + Y = zero_vector(N, q) + cKKTvf(N, Y, q) + return Y +end +function (cKKTvf::CondensedKKTVectorField)(N, Y, q) + M = N[1] + cmo = cKKTvf.cmo + p, μ, λ, s = q[N, 1], cKKTvf.μ, q[N, 2], cKKTvf.s + β = cKKTvf.β + m, n = length(μ), length(λ) + # First term of the lagrangian + get_gradient!(M, Y[N, 1], cmo, p) #grad f + X = zero_vector(M, p) + for i in 1:m + get_grad_inequality_constraint!(M, X, cKKTvf.cmo, p, i) + gi = get_inequality_constraint(M, cKKTvf.cmo, p, i) + # Lagrangian term + Y[N, 1] += μ[i] * X + # + Y[N, 1] += (μ[i] / s[i]) * (μ[i] * (gi + s[i]) + β - μ[i] * s[i]) * X + end + for j in 1:n + get_grad_equality_constraint!(M, X, cKKTvf.cmo, p, j) + hj = get_equality_constraint(M, cKKTvf.cmo, p, j) + Y[N, 1] += λ[j] * X + Y[N, 2][j] = hj + end + return Y +end + +function show(io::IO, CKKTvf::CondensedKKTVectorField) + return print( + io, "CondensedKKTVectorField\n\twith μ=$(CKKTvf.μ), s=$(CKKTvf.s), β=$(CKKTvf.β)" + ) +end + +@doc raw""" + CondensedKKTVectorFieldJacobian{O<:ConstrainedManifoldObjective,T,R} <: AbstractConstrainedSlackFunctor{T,R} + +Fiven the constrained optimixzation problem + +```math +\begin{aligned} +\min_{p ∈\mathcal{M}} &f(p)\\ +\text{subject to } &g_i(p)\leq 0 \quad \text{ for } i= 1, …, m,\\ +\quad &h_j(p)=0 \quad \text{ for } j=1,…,n, +\end{aligned} +``` + +we reformulate the KKT conditions of the Lagrangian +from the optimality conditions of the Lagrangian + +```math +\mathcal L(p, μ, λ) = f(p) + \sum_{j=1}^n λ_jh_j(p) + \sum_{i=1}^m μ_ig_i(p) +``` + +in a perturbed / barrier method enhanced as well as condensed form as using ``\operatorname{grad}_o L(p, μ, λ)`` +the Riemannian gradient of the Lagrangian with respect to the first parameter. + +Let ``\mathcal N = \mathcal M × ℝ^n``. We obtain the linear system + +```math +\mathcal A(p,λ)[X,Y] = -b(p,λ),\qquad \text{where } X ∈ T_p\mathcal M, Y ∈ ℝ^n +``` +where ``\mathcal A: T_{(p,λ)}\mathcal N → T_{(p,λ)}\mathcal N`` is a linear operator +on ``T_{(p,λ)}\mathcal N = T_p\mathcal M × ℝ^n`` given by + +```math +\mathcal A(p,λ)[X,Y] = \begin{pmatrix} +\operatorname{Hess}_p\mathcal L(p, μ, λ)[X] ++ \displaystyle\sum_{i=1}^m \frac{μ_i}{s_i}⟨\operatorname{grad} g_i(p), X⟩\operatorname{grad} g_i(p) ++ \displaystyle\sum_{j=1}^n Y_j \operatorname{grad} h_j(p) +\\ +\Bigl( ⟨\operatorname{grad} h_j(p), X⟩ \Bigr)_{j=1}^n +\end{pmatrix} +``` + +# Fields + +* `cmo` the [`ConstrainedManifoldObjective`](@ref) +* `μ::V` the vector in ``ℝ^m`` of coefficients for the inequality constraints +* `s::V` the vector in ``ℝ^m`` of sclack variables +* `β::R` the barrier parameter ``β∈ℝ`` + +# Constructor + + CondensedKKTVectorFieldJacobian(cmo, μ, s, β) +""" +mutable struct CondensedKKTVectorFieldJacobian{O<:ConstrainedManifoldObjective,T,R} <: + AbstractConstrainedSlackFunctor{T,R} + cmo::O + μ::T + s::T + β::R +end +function (cKKTvfJ::CondensedKKTVectorFieldJacobian)(N, q, X) + Y = zero_vector(N, q) + cKKTvfJ(N, Y, q, X) + return Y +end +function (cKKTvfJ::CondensedKKTVectorFieldJacobian)(N, Y, q, X) + M = N[1] + p, μ, λ, s = q[N, 1], cKKTvfJ.μ, q[N, 2], cKKTvfJ.s + m, n = length(μ), length(λ) + Xp, Xλ = X[N, 1], X[N, 2] + zero_vector!(N, Y, q) + Xt = zero_vector(M, p) + # First Summand of Hess L + copyto!(M, Y[N, 1], get_hessian(M, cKKTvfJ.cmo, p, Xp)) # Hess f + # Build the rest iteratively + for i in 1:m #ineq + get_hess_inequality_constraint!(M, Xt, cKKTvfJ.cmo, p, Xp, i) + # Summand of Hess L + Y[N, 1] += μ[i] * Xt + get_grad_inequality_constraint!(M, Xt, cKKTvfJ.cmo, p, i) + # condensed term + Y[N, 1] += (μ[i] / s[i]) * inner(M, p, Xt, Xp) * Xt + end + for j in 1:n #eq + get_hess_equality_constraint!(M, Xt, cKKTvfJ.cmo, p, Xp, j) + # Summand of Hess L + Y[N, 1] += λ[j] * Xt + get_grad_equality_constraint!(M, Xt, cKKTvfJ.cmo, p, j) + # condensed term + Y[N, 1] += Xλ[j] * Xt + # condensed term in second part + Y[N, 2][j] = inner(M, p, Xt, Xp) + end + return Y +end +function show(io::IO, CKKTvfJ::CondensedKKTVectorFieldJacobian) + return print( + io, + "CondensedKKTVectorFieldJacobian\n\twith μ=$(CKKTvfJ.μ), s=$(CKKTvfJ.s), β=$(CKKTvfJ.β)", + ) +end + +@doc raw""" + KKTVectorField{O<:ConstrainedManifoldObjective} + +Implement the vectorfield ``F`` KKT-conditions, inlcuding a slack variable +for the inequality constraints. + +Given the [`LagrangianCost`](@ref) + +```math +\mathcal L(p; μ, λ) = f(p) + \sum_{i=1}^m μ_ig_i(p) + \sum_{j=1}^n λ_jh_j(p) +``` + +the [`LagrangianGradient`](@ref) + +```math +\operatorname{grad}\mathcal L(p, μ, λ) = \operatorname{grad}f(p) + \sum_{j=1}^n λ_j \operatorname{grad} h_j(p) + \sum_{i=1}^m μ_i \operatorname{grad} g_i(p), +``` + +and introducing the slack variables ``s=-g(p) ∈ ℝ^m`` +the vector field is given by + +```math +F(p, μ, λ, s) = \begin{pmatrix} +\operatorname{grad}_p \mathcal L(p, μ, λ)\\ +g(p) + s\\ +h(p)\\ +μ ⊙ s +\end{pmatrix}, \text{ where } p \in \mathcal M, μ, s \in ℝ^m\text{ and } λ \in ℝ^n, +``` +where ``⊙`` denotes the Hadamard (or elementwise) product + +# Fields + +* `cmo` the [`ConstrainedManifoldObjective`](@ref) + +While the point `p` is arbitrary and usually not needed, it serves as internal memory +in the computations. Furthermore Both fields together also calrify the product manifold structure to use. + +# Constructor + + KKTVectorField(cmo::ConstrainedManifoldObjective) + +# Example + +Define `F = KKTVectorField(cmo)` for some [`ConstrainedManifoldObjective`](@ref) `cmo` +and let `N` be the product manifold of ``\mathcal M×ℝ^m×ℝ^n×ℝ^m``. +Then, you can call this cost as `F(N, q)` or as the in-place variant `F(N, Y, q)`, +where `q` is a point on `N` and `Y` is a tangent vector at `q` for the result. +""" +struct KKTVectorField{O<:ConstrainedManifoldObjective} + cmo::O +end +function (KKTvf::KKTVectorField)(N, q) + Y = zero_vector(N, q) + return KKTvf(N, Y, q) +end +function (KKTvf::KKTVectorField)(N, Y, q) + M = N[1] + p = q[N, 1] + # To improve readability + μ, λ, s = q[N, 2], q[N, 3], q[N, 4] + LagrangianGradient(KKTvf.cmo, μ, λ)(M, Y[N, 1], p) + m, n = length(μ), length(λ) + (m > 0) && (Y[N, 2] = get_inequality_constraint(M, KKTvf.cmo, p, :) .+ s) + (n > 0) && (Y[N, 3] = get_equality_constraint(M, KKTvf.cmo, p, :)) + (m > 0) && (Y[N, 4] = μ .* s) + return Y +end +function show(io::IO, KKTvf::KKTVectorField) + return print(io, "KKTVectorField\nwith the objective\n\t$(KKTvf.cmo)") +end + +@doc raw""" + KKTVectorFieldJacobian{O<:ConstrainedManifoldObjective} + +Implement the Jacobian of the vector field ``F`` of the KKT-conditions, inlcuding a slack variable +for the inequality constraints, see [`KKTVectorField`](@ref) and [`KKTVectorFieldAdjointJacobian`](@ref).. + +```math +\operatorname{J} F(p, μ, λ, s)[X, Y, Z, W] = \begin{pmatrix} + \operatorname{Hess}_p \mathcal L(p, μ, λ)[X] + \displaystyle\sum_{i=1}^m Y_i \operatorname{grad} g_i(p) + \displaystyle\sum_{j=1}^n Z_j \operatorname{grad} h_j(p)\\ + \Bigl( ⟨\operatorname{grad} g_i(p), X⟩ + W_i\Bigr)_{i=1}^m\\ + \Bigl( ⟨\operatorname{grad} h_j(p), X⟩ \Bigr)_{j=1}^n\\ + μ ⊙ W + s ⊙ Y +\end{pmatrix}, +``` +where ``⊙`` denotes the Hadamard (or elementwise) product + +See also the [`LagrangianHessian`](@ref) ``\operatorname{Hess}_p \mathcal L(p, μ, λ)[X]``. + +# Fields + +* `cmo` the [`ConstrainedManifoldObjective`](@ref) + +# Constructor + + KKTVectorFieldJacobian(cmo::ConstrainedManifoldObjective) + +Generate the Jacobian of the KKT vector field related to some [`ConstrainedManifoldObjective`](@ref) `cmo`. + +# Example + +Define `JF = KKTVectorFieldJacobian(cmo)` for some [`ConstrainedManifoldObjective`](@ref) `cmo` +and let `N` be the product manifold of ``\mathcal M×ℝ^m×ℝ^n×ℝ^m``. +Then, you can call this cost as `JF(N, q, Y)` or as the in-place variant `JF(N, Z, q, Y)`, +where `q` is a point on `N` and `Y` and `Z` are a tangent vector at `q`. +""" +mutable struct KKTVectorFieldJacobian{O<:ConstrainedManifoldObjective} + cmo::O +end +function (KKTvfJ::KKTVectorFieldJacobian)(N, q, Y) + Z = zero_vector(N, q) + return KKTvfJ(N, Z, q, Y) +end +function (KKTvfJ::KKTVectorFieldJacobian)(N, Z, q, Y) + M = N[1] + p = q[N, 1] + μ, λ, s = q[N, 2], q[N, 3], q[N, 4] + X = Y[N, 1] + # First component + LagrangianHessian(KKTvfJ.cmo, μ, λ)(M, Z[N, 1], p, Y[N, 1]) + Xt = copy(M, p, X) + m, n = length(μ), length(λ) + for i in 1:m + get_grad_inequality_constraint!(M, Xt, KKTvfJ.cmo, p, i) + copyto!(M, Z[N, 1], p, Z[N, 1] + Y[N, 2][i] * Xt) + # set second components as well + Z[N, 2][i] = inner(M, p, Xt, X) + Y[N, 4][i] + end + for j in 1:n + get_grad_equality_constraint!(M, Xt, KKTvfJ.cmo, p, j) + copyto!(M, Z[N, 1], p, Z[N, 1] + Y[N, 3][j] * Xt) + # set third components as well + Z[N, 3][j] = inner(M, p, Xt, X) + end + # Fourth component + Z[N, 4] = μ .* Y[N, 4] + s .* Y[N, 2] + return Z +end +function show(io::IO, KKTvfJ::KKTVectorFieldJacobian) + return print(io, "KKTVectorFieldJacobian\nwith the objective\n\t$(KKTvfJ.cmo)") +end + +@doc raw""" + KKTVectorFieldAdjointJacobian{O<:ConstrainedManifoldObjective} + +Implement the Adjoint of the Jacobian of the vector field ``F`` of the KKT-conditions, inlcuding a slack variable +for the inequality constraints, see [`KKTVectorField`](@ref) and [`KKTVectorFieldJacobian`](@ref). + +```math +\operatorname{J}^* F(p, μ, λ, s)[X, Y, Z, W] = \begin{pmatrix} + \operatorname{Hess}_p \mathcal L(p, μ, λ)[X] + \displaystyle\sum_{i=1}^m Y_i \operatorname{grad} g_i(p) + \displaystyle\sum_{j=1}^n Z_j \operatorname{grad} h_j(p)\\ + \Bigl( ⟨\operatorname{grad} g_i(p), X⟩ + s_iW_i\Bigr)_{i=1}^m\\ + \Bigl( ⟨\operatorname{grad} h_j(p), X⟩ \Bigr)_{j=1}^n\\ + μ ⊙ W + Y +\end{pmatrix}, +``` +where ``⊙`` denotes the Hadamard (or elementwise) product + +See also the [`LagrangianHessian`](@ref) ``\operatorname{Hess}_p \mathcal L(p, μ, λ)[X]``. + +# Fields + +* `cmo` the [`ConstrainedManifoldObjective`](@ref) + +# Constructor + + KKTVectorFieldAdjointJacobian(cmo::ConstrainedManifoldObjective) + +Generate the Adjoint Jacobian of the KKT vector field related to some [`ConstrainedManifoldObjective`](@ref) `cmo`. + +# Example + +Define `AdJF = KKTVectorFieldAdjointJacobian(cmo)` for some [`ConstrainedManifoldObjective`](@ref) `cmo` +and let `N` be the product manifold of ``\mathcal M×ℝ^m×ℝ^n×ℝ^m``. +Then, you can call this cost as `AdJF(N, q, Y)` or as the in-place variant `AdJF(N, Z, q, Y)`, +where `q` is a point on `N` and `Y` and `Z` are a tangent vector at `q`. +""" +mutable struct KKTVectorFieldAdjointJacobian{O<:ConstrainedManifoldObjective} + cmo::O +end +function (KKTvfJa::KKTVectorFieldAdjointJacobian)(N, q, Y) + Z = zero_vector(N, q) + return KKTvfJa(N, Z, q, Y) +end +function (KKTvfAdJ::KKTVectorFieldAdjointJacobian)(N, Z, q, Y) + M = N[1] + p = q[N, 1] + μ, λ, s = q[N, 2], q[N, 3], q[N, 4] + X = Y[N, 1] + # First component + LagrangianHessian(KKTvfAdJ.cmo, μ, λ)(M, Z[N, 1], p, X) + Xt = copy(M, p, X) + m, n = length(μ), length(λ) + for i in 1:m + get_grad_inequality_constraint!(M, Xt, KKTvfAdJ.cmo, p, i) + copyto!(M, Z[N, 1], p, Z[N, 1] + Y[N, 2][i] * Xt) + # set second components as well + Z[N, 2][i] = inner(M, p, Xt, X) + s[i] * Y[N, 4][i] + end + for j in 1:n + get_grad_equality_constraint!(M, Xt, KKTvfAdJ.cmo, p, j) + copyto!(M, Z[N, 1], p, Z[N, 1] + Y[N, 3][j] * Xt) + # set third components as well + Z[N, 3][j] = inner(M, p, Xt, X) + end + # Fourth component + Z[N, 4] = μ .* Y[N, 4] + Y[N, 2] + return Z +end +function show(io::IO, KKTvfAdJ::KKTVectorFieldAdjointJacobian) + return print(io, "KKTVectorFieldAdjointJacobian\nwith the objective\n\t$(KKTvfAdJ.cmo)") +end + +@doc raw""" + KKTVectorFieldNormSq{O<:ConstrainedManifoldObjective} + +Implement the square of the norm of the vectorfield ``F`` of the KKT-conditions, inlcuding a slack variable +for the inequality constraints, see [`KKTVectorField`](@ref), where this functor applies the norm to. +In [LaiYoshise:2024](@cite) this is called the merit function. + +# Fields + +* `cmo` the [`ConstrainedManifoldObjective`](@ref) + +# Constructor + + KKTVectorFieldNormSq(cmo::ConstrainedManifoldObjective) + +# Example + +Define `f = KKTVectorFieldNormSq(cmo)` for some [`ConstrainedManifoldObjective`](@ref) `cmo` +and let `N` be the product manifold of ``\mathcal M×ℝ^m×ℝ^n×ℝ^m``. +Then, you can call this cost as `f(N, q)`, where `q` is a point on `N`. +""" +mutable struct KKTVectorFieldNormSq{O<:ConstrainedManifoldObjective} + cmo::O +end +function (KKTvc::KKTVectorFieldNormSq)(N, q) + Y = zero_vector(N, q) + KKTVectorField(KKTvc.cmo)(N, Y, q) + return inner(N, q, Y, Y) +end +function show(io::IO, KKTvfNSq::KKTVectorFieldNormSq) + return print(io, "KKTVectorFieldNormSq\nwith the objective\n\t$(KKTvfNSq.cmo)") +end + +@doc raw""" + KKTVectorFieldNormSqGradient{O<:ConstrainedManifoldObjective} + +Compute the gradient of the [`KKTVectorFieldNormSq`](@ref) ``φ(p,μ,λ,s) = \lVert F(p,μ,λ,s)\rVert^2``, +that is of the norm squared of the [`KKTVectorField`](@ref) ``F``. + +This is given in [LaiYoshise:2024](@cite) as the gradient of their merit function, +which we can write with the adjoint ``J^*`` of the Jacobian + +```math +\operatorname{grad} φ = 2\operatorname{J}^* F(p, μ, λ, s)[F(p, μ, λ, s)], +``` + +and hence is computed with [`KKTVectorFieldAdjointJacobian`](@ref) and [`KKTVectorField`](@ref). + +For completeness, the gradient reads, using the [`LagrangianGradient`](@ref) ``L = \operatorname{grad}_p \mathcal L(p,μ,λ) ∈ T_p\mathcal M``, +for a shorthand of the first component of ``F``, as + +```math +\operatorname{grad} φ += +2 \begin{pmatrix} +\operatorname{grad}_p \mathcal L(p,μ,λ)[L] + (g_i(p) + s_i)\operatorname{grad} g_i(p) + h_j(p)\operatorname{grad} h_j(p)\\ + \Bigl( ⟨\operatorname{grad} g_i(p), L⟩ + s_i\Bigr)_{i=1}^m + μ ⊙ s ⊙ s\\ + \Bigl( ⟨\operatorname{grad} h_j(p), L⟩ \Bigr)_{j=1}^n\\ + g + s + μ ⊙ μ ⊙ s +\end{pmatrix}, +``` +where ``⊙`` denotes the Hadamard (or elementwise) product. + +# Fields + +* `cmo` the [`ConstrainedManifoldObjective`](@ref) + +# Constructor + + KKTVectorFieldNormSqGradient(cmo::ConstrainedManifoldObjective) + +# Example + +Define `grad_f = KKTVectorFieldNormSqGradient(cmo)` for some [`ConstrainedManifoldObjective`](@ref) `cmo` +and let `N` be the product manifold of ``\mathcal M×ℝ^m×ℝ^n×ℝ^m``. +Then, you can call this cost as `grad_f(N, q)` or as the in-place variant `grad_f(N, Y, q)`, +where `q` is a point on `N` and `Y` is a tangent vector at `q` returning the resulting gradient at. +""" +mutable struct KKTVectorFieldNormSqGradient{O<:ConstrainedManifoldObjective} + cmo::O +end +function (KKTcfNG::KKTVectorFieldNormSqGradient)(N, q) + Y = zero_vector(N, q) + KKTcfNG(N, Y, q) + return Y +end +function (KKTcfNG::KKTVectorFieldNormSqGradient)(N, Y, q) + Z = allocate(N, Y) + KKTVectorField(KKTcfNG.cmo)(N, Z, q) + KKTVectorFieldAdjointJacobian(KKTcfNG.cmo)(N, Y, q, Z) + Y .*= 2 + return Y +end +function show(io::IO, KKTvfNSqGrad::KKTVectorFieldNormSqGradient) + return print( + io, "KKTVectorFieldNormSqGradient\nwith the objective\n\t$(KKTvfNSqGrad.cmo)" + ) +end + +# +# +# A special linesearch for IP Newton +function interior_point_initial_guess( + mp::AbstractManoptProblem, ips::StepsizeState, ::Int, l::R +) where {R<:Real} + N = get_manifold(mp) + Y = get_gradient(N, get_objective(mp), ips.p) + grad_norm = norm(N, ips.p, Y) + max_step = max_stepsize(N, ips.p) + return ifelse(isfinite(max_step), min(l, max_step / grad_norm), l) +end + +@doc raw""" + InteriorPointCentralityCondition{CO,R} + +A functor to check the centrality condition. + +In order to obtain a step in the linesearch performed within the [`interior_point_Newton`](@ref), +Section 6 of [LaiYoshise:2024](@cite) propose the following additional conditions to hold +inspired by the Euclidean case described in Section 6 [El-BakryTapiaTsuchiyaZhang:1996](@cite): + +For a given [`ConstrainedManifoldObjective`](@ref) assume consider the [`KKTVectorField`](@ref) ``F``, +that is we are at a point ``q = (p, λ, μ, s)`` on ``\mathcal M × ℝ^m × ℝ^n × ℝ^m``and a search direction ``V = (X, Y, Z, W)``. + +Then, let + +```math +τ_1 = \frac{m⋅\min\{ μ ⊙ s\}}{μ^{\mathrm{T}}s} +\quad\text{ and }\quad +τ_2 = \frac{μ^{\mathrm{T}}s}{\lVert F(q) \rVert} +``` +where ``⊙`` denotes the Hadamard (or elementwise) product. + +For a new candidate ``q(α) = \bigl(p(α), λ(α), μ(α), s(α)\bigr) := (\operatorname{retr}_p(αX), λ+αY, μ+αZ, s+αW)``, +we then define two functions + +```math +c_1(α) = \min\{ μ(α) ⊙ s(α) \} - \frac{γτ_1 μ(α)^{\mathrm{T}}s(α)}{m} +\quad\text{ and }\quad +c_2(α) = μ(α)^{\mathrm{T}}s(α) – γτ_2 \lVert F(q(α)) \rVert. +``` + +While the paper now states that the (Armijo) linesearch starts at a point +``\tilde α``, it is easier to include the condition that ``c_1(α) ≥ 0`` and ``c_2(α) ≥ 0`` +into the linesearch as well. + +The functor `InteriorPointCentralityCondition(cmo, γ, μ, s, normKKT)(N,qα)` +defined here evaluates this condition and returns true if both ``c_1`` and ``c_2`` are nonnegative. + +# Fields + +* `cmo`: a [`ConstrainedManifoldObjective`](@ref) +* `γ`: a constant +* `τ1`, `τ2`: the constants given in the formula. + +# Constructor + + InteriorPointCentralityCondition(cmo, γ) + InteriorPointCentralityCondition(cmo, γ, τ1, τ2) + +Initialise the centrality conditions. +The parameters `τ1`, `τ2` are initialise to zero if not provided. + +!!! note + + Besides [`get_manopt_parameter`](@ref) for all three constants, + and [`set_manopt_parameter!`](@ref) for ``γ``, + to update ``τ_1`` and ``τ_2``, call `set_manopt_parameter(ipcc, :τ, N, q)` to update + both ``τ_1`` and ``τ_2`` according to the formulae above. +""" +mutable struct InteriorPointCentralityCondition{CO,R} + cmo::CO + γ::R + τ1::R + τ2::R +end +function InteriorPointCentralityCondition(cmo::CO, γ::R) where {CO,R} + return InteriorPointCentralityCondition{CO,R}(cmo, γ, zero(γ), zero(γ)) +end +function (ipcc::InteriorPointCentralityCondition)(N, qα) + μα = qα[N, 2] + sα = qα[N, 4] + m = length(μα) + # f1 false + (minimum(μα .* sα) - ipcc.γ * ipcc.τ1 * sum(μα .* sα) / m < 0) && return false + normKKTqα = sqrt(KKTVectorFieldNormSq(ipcc.cmo)(N, qα)) + # f2 false + (sum(μα .* sα) - ipcc.γ * ipcc.τ2 * normKKTqα < 0) && return false + return true +end +function get_manopt_parameter(ipcc::InteriorPointCentralityCondition, ::Val{:γ}) + return ipcc.γ +end +function set_manopt_parameter!(ipcc::InteriorPointCentralityCondition, ::Val{:γ}, γ) + ipcc.γ = γ + return ipcc +end +function get_manopt_parameter(ipcc::InteriorPointCentralityCondition, ::Val{:τ1}) + return ipcc.τ1 +end +function get_manopt_parameter(ipcc::InteriorPointCentralityCondition, ::Val{:τ2}) + return ipcc.τ2 +end +function set_manopt_parameter!(ipcc::InteriorPointCentralityCondition, ::Val{:τ}, N, q) + μ = q[N, 2] + s = q[N, 4] + m = length(μ) + normKKTq = sqrt(KKTVectorFieldNormSq(ipcc.cmo)(N, q)) + ipcc.τ1 = m * minimum(μ .* s) / sum(μ .* s) + ipcc.τ2 = sum(μ .* s) / normKKTq + return ipcc +end + +@doc raw""" + StopWhenKKTResidualLess <: StoppingCriterion + +Stop when the KKT residual + +``` +r^2 += \lVert \operatorname{grad}_p \mathcal L(p, μ, λ) \rVert^2 ++ \sum_{i=1}^m [μ_i]_{-}^2 + [g_i(p)]_+^2 + \lvert \mu_ig_i(p)^2 ++ \sum_{j=1}^n \lvert h_i(p)\rvert^2. +``` + +is less than a given threshold ``r < ε``. +We use ``[v]_+ = \max\{0,v\}`` and ``[v]_- = \min\{0,t\}`` +for the positive and negative part of ``v``, respectively + +# Fields + +* `ε`: a threshold +* `at_iteration`: + +""" +mutable struct StopWhenKKTResidualLess{R} <: StoppingCriterion + ε::R + residual::R + at_iteration::Int + function StopWhenKKTResidualLess(ε::R) where {R} + return new{R}(ε, zero(ε), -1) + end +end +function (c::StopWhenKKTResidualLess)( + amp::AbstractManoptProblem, ipns::InteriorPointNewtonState, k::Int +) + M = get_manifold(amp) + (k <= 0) && return false + # now k > 0 + # Check residual + μ, λ, s, p = ipns.μ, ipns.λ, ipns.s, ipns.p + c.residual = 0.0 + m, n = length(ipns.μ), length(ipns.λ) + # First component + c.residual += norm(M, p, LagrangianGradient(get_objective(amp), μ, λ)(M, p)) + # ineq constr part + for i in 1:m + gi = get_inequality_constraint(amp, ipns.p, i) + c.residual += min(0.0, μ[i])^2 + max(gi, 0)^2 + abs(μ[i] * gi)^2 + end + # eq constr part + for j in 1:n + hj = get_equality_constraint(amp, ipns.p, j) + c.residual += abs(hj)^2 + end + c.residual = sqrt(c.residual) + if c.residual < c.ε + c.at_iteration = k + return true + end + return false +end +function get_reason(c::StopWhenKKTResidualLess) + if (c.at_iteration >= 0) + return "After iteration #$(c.at_iteration) the algorithm stopped with a KKT residual $(c.residual) < $(c.ε).\n" + end + return "" +end +function status_summary(swrr::StopWhenKKTResidualLess) + has_stopped = (swrr.at_iteration >= 0) + s = has_stopped ? "reached" : "not reached" + return "‖F(p, λ, μ)‖ < ε:\t$s" +end +indicates_convergence(::StopWhenKKTResidualLess) = true +function show(io::IO, c::StopWhenKKTResidualLess) + return print(io, "StopWhenKKTResidualLess($(c.ε))\n $(status_summary(c))") +end + +# An internal function to compute the new σ +@doc raw""" + calculate_σ(M, cmo, p, μ, λ, s) + +Compute the new ``σ`` factor for the barrier parameter in [`interior_point_Newton`](@ref) as + +```math +\min\{\frac{1}{2}, \lVert F(p; μ, λ, s)\rVert^{\frac{1}{2}} \}, +``` +where ``F`` is the KKT vector field, hence the [`KKTVectorFieldNormSq`](@ref) is used. + +# Keyword arguments + +* `vector_space=`[`Rn`](@ref Manopt.Rn) a function that, given an integer, returns the manifold to be used for the vector space components ``ℝ^m,ℝ^n`` +* `N` the manifold ``\mathcal M × ℝ^m × ℝ^n × ℝ^m`` the vector field lives on (generated using `vector_space`) +* `q` provide memory on `N` for interims computations +""" +function calculate_σ( + N::AbstractManifold, cmo::AbstractDecoratedManifoldObjective, p, μ, λ, s; kwargs... +) + return calculate_σ(N, get_objective(cmo, true), p, μ, λ, s; kwargs...) +end +function calculate_σ( + M::AbstractManifold, + cmo::ConstrainedManifoldObjective, + p, + μ, + λ, + s; + vector_space=Rn, + N=M × vector_space(length(μ)) × vector_space(length(λ)) × vector_space(length(s)), + q=allocate_result(N, rand), +) + copyto!(N[1], q[N, 1], p) + copyto!(N[2], q[N, 2], μ) + copyto!(N[3], q[N, 3], λ) + copyto!(N[4], q[N, 4], s) + return min(0.5, (KKTVectorFieldNormSq(cmo)(N, q))^(1 / 4)) +end diff --git a/src/plans/plan.jl b/src/plans/plan.jl index fdf69084d7..e299d5e2d7 100644 --- a/src/plans/plan.jl +++ b/src/plans/plan.jl @@ -128,8 +128,10 @@ include("adabtive_regularization_with_cubics_plan.jl") include("alternating_gradient_plan.jl") include("augmented_lagrangian_plan.jl") include("conjugate_gradient_plan.jl") +include("conjugate_residual_plan.jl") include("exact_penalty_method_plan.jl") include("frank_wolfe_plan.jl") +include("interior_point_Newton_plan.jl") include("quasi_newton_plan.jl") include("nonlinear_least_squares_plan.jl") include("difference_of_convex_plan.jl") diff --git a/src/plans/solver_state.jl b/src/plans/solver_state.jl index 34beccb7d1..c6b489e395 100644 --- a/src/plans/solver_state.jl +++ b/src/plans/solver_state.jl @@ -16,6 +16,31 @@ provide the access functions accordingly """ abstract type AbstractManoptSolverState end +""" + ClosedFormSubSolverState{E<:AbstractEvaluationType} <: AbstractManoptSolverState + +Subsolver state indicating that a closed-form solution is available with +[`AbstractEvaluationType`](@ref) `E`. + +# Constructor + + ClosedFormSubSolverState(; evaluation=AllocatingEvaluation()) +""" +struct ClosedFormSubSolverState{E<:AbstractEvaluationType} <: AbstractManoptSolverState end +function ClosedFormSubSolverState(::E) where {E<:AbstractEvaluationType} + return ClosedFormSubSolverState{E}() +end +function ClosedFormSubSolverState(; + evaluation::E=AllocatingEvaluation() +) where {E<:AbstractEvaluationType} + return ClosedFormSubSolverState(evaluation) +end + +maybe_wrap_evaluation_type(s::AbstractManoptSolverState) = s +function maybe_wrap_evaluation_type(::E) where {E<:AbstractEvaluationType} + return ClosedFormSubSolverState{E}() +end + @doc raw""" AbstractGradientSolverState <: AbstractManoptSolverState diff --git a/src/plans/stepsize.jl b/src/plans/stepsize.jl index 82bf725bcc..476c4d26e8 100644 --- a/src/plans/stepsize.jl +++ b/src/plans/stepsize.jl @@ -206,14 +206,18 @@ A functor representing Armijo line search including the last runs state string t # Fields -* `initial_stepsize`: (`1.0`) and initial step size -* `retraction_method`: (`default_retraction_method(M)`) the retraction to use -* `contraction_factor`: (`0.95`) exponent for line search reduction -* `sufficient_decrease`: (`0.1`) gain within Armijo's rule -* `last_stepsize`: (`initialstepsize`) the last step size to start the search with -* `initial_guess`: (`(p,s,i,l) -> l`) based on a [`AbstractManoptProblem`](@ref) `p`, +* `initial_stepsize`: (`1.0`) and initial step size +* `retraction_method`: (`default_retraction_method(M)`) the retraction to use +* `contraction_factor`: (`0.95`) exponent for line search reduction +* `sufficient_decrease`: (`0.1`) gain within Armijo's rule +* `last_stepsize`: (`initialstepsize`) the last step size to start the search with +* `initial_guess`: (`(p,s,i,l) -> l`) based on a [`AbstractManoptProblem`](@ref) `p`, [`AbstractManoptSolverState`](@ref) `s` and a current iterate `i` and a last step size `l`, this returns an initial guess. The default uses the last obtained stepsize +* `additional_decrease_condition`: (`(M,p) -> true`) specify a condition a new point has to additionally + fulfill. The default accepts all points. +* `additional_increase_condition`: (`(M,p) -> true`) specify a condtion that additionally to + checking a valid increase has to be fulfilled. The default accepts all points. as well as for internal use @@ -247,33 +251,37 @@ with keywords. * `η`: (`-get_gradient(mp, get_iterate(s));`) the search direction to use, by default the steepest descent direction. """ -mutable struct ArmijoLinesearch{TRM<:AbstractRetractionMethod,P,F} <: Linesearch +mutable struct ArmijoLinesearch{TRM<:AbstractRetractionMethod,P,I,F,IGF,DF,IF} <: Linesearch candidate_point::P - contraction_factor::Float64 - initial_guess::F - initial_stepsize::Float64 - last_stepsize::Float64 + contraction_factor::F + initial_guess::IGF + initial_stepsize::F + last_stepsize::F message::String retraction_method::TRM - sufficient_decrease::Float64 - stop_when_stepsize_less::Float64 - stop_when_stepsize_exceeds::Float64 - stop_increasing_at_step::Int - stop_decreasing_at_step::Int + sufficient_decrease::F + stop_when_stepsize_less::F + stop_when_stepsize_exceeds::F + stop_increasing_at_step::I + stop_decreasing_at_step::I + additional_decrease_condition::DF + additional_increase_condition::IF function ArmijoLinesearch( M::AbstractManifold=DefaultManifold(); + additional_decrease_condition::DF=(M, p) -> true, + additional_increase_condition::IF=(M, p) -> true, candidate_point::P=allocate_result(M, rand), - contraction_factor::Real=0.95, - initial_stepsize::Real=1.0, - initial_guess=armijo_initial_guess, + contraction_factor::F=0.95, + initial_stepsize::F=1.0, + initial_guess::IGF=armijo_initial_guess, retraction_method::TRM=default_retraction_method(M), - stop_when_stepsize_less::Real=0.0, - stop_when_stepsize_exceeds::Real=max_stepsize(M), - stop_increasing_at_step::Int=100, - stop_decreasing_at_step::Int=1000, + stop_when_stepsize_less::F=0.0, + stop_when_stepsize_exceeds=max_stepsize(M), + stop_increasing_at_step::I=100, + stop_decreasing_at_step::I=1000, sufficient_decrease=0.1, - ) where {TRM,P} - return new{TRM,P,typeof(initial_guess)}( + ) where {TRM,P,I,F,IGF,DF,IF} + return new{TRM,P,I,F,IGF,DF,IF}( candidate_point, contraction_factor, initial_guess, @@ -286,6 +294,8 @@ mutable struct ArmijoLinesearch{TRM<:AbstractRetractionMethod,P,F} <: Linesearch stop_when_stepsize_exceeds, stop_increasing_at_step, stop_decreasing_at_step, + additional_decrease_condition, + additional_increase_condition, ) end end @@ -298,22 +308,29 @@ function (a::ArmijoLinesearch)( ) p = get_iterate(s) X = get_gradient!(mp, get_gradient(s), p) + return a(mp, p, X, η; initial_guess=a.initial_guess(mp, s, i, a.last_stepsize)) +end +function (a::ArmijoLinesearch)( + mp::AbstractManoptProblem, p, X, η; initial_guess=1.0, kwargs... +) + l = norm(get_manifold(mp), p, η) (a.last_stepsize, a.message) = linesearch_backtrack!( get_manifold(mp), a.candidate_point, (M, p) -> get_cost_function(get_objective(mp))(M, p), p, X, - a.initial_guess(mp, s, i, a.last_stepsize), + initial_guess, a.sufficient_decrease, a.contraction_factor, η; retraction_method=a.retraction_method, - stop_when_stepsize_less=a.stop_when_stepsize_less / norm(get_manifold(mp), p, η), - stop_when_stepsize_exceeds=a.stop_when_stepsize_exceeds / - norm(get_manifold(mp), p, η), + stop_when_stepsize_less=a.stop_when_stepsize_less / l, + stop_when_stepsize_exceeds=a.stop_when_stepsize_exceeds / l, stop_increasing_at_step=a.stop_increasing_at_step, stop_decreasing_at_step=a.stop_decreasing_at_step, + additional_decrease_condition=a.additional_decrease_condition, + additional_increase_condition=a.additional_increase_condition, ) return a.last_stepsize end @@ -333,10 +350,41 @@ function status_summary(als::ArmijoLinesearch) return "$(als)\nand a computed last stepsize of $(als.last_stepsize)" end get_message(a::ArmijoLinesearch) = a.message +function get_manopt_parameter(a::ArmijoLinesearch, s::Val{:DecreaseCondition}, args...) + return get_manopt_parameter(a.additional_decrease_condition, args...) +end +function get_manopt_parameter(a::ArmijoLinesearch, ::Val{:IncreaseCondition}, args...) + return get_manopt_parameter(a.additional_increase_condition, args...) +end +function set_manopt_parameter!(a::ArmijoLinesearch, s::Val{:DecreaseCondition}, args...) + set_manopt_parameter!(a.additional_decrease_condition, args...) + return a +end +function set_manopt_parameter!(a::ArmijoLinesearch, ::Val{:IncreaseCondition}, args...) + set_manopt_parameter!(a.additional_increase_condition, args...) + return a +end @doc raw""" - (s, msg) = linesearch_backtrack(M, F, p, X, s, decrease, contract η = -X, f0 = f(p)) - (s, msg) = linesearch_backtrack!(M, q, F, p, X, s, decrease, contract η = -X, f0 = f(p)) + (s, msg) = linesearch_backtrack( + M, + F, + p, + X, + s, + decrease, + contract, + η = -X, + f0 = f(p); + retraction_method=default_retraction_method(M), + stop_when_stepsize_less=0.0, + stop_when_stepsize_exceeds=max_stepsize(M, p) / norm(M, p, η), + stop_increasing_at_step=100, + stop_decreasing_at_step=1000 + additional_increase_condition = (M,p) -> true, + additional_decrease_condition = (M,p) -> true, + ) + (s, msg) = linesearch_backtrack!(M, q, F, p, X, s, decrease, contract, η = -X, f0 = f(p)) perform a line search @@ -356,12 +404,12 @@ with the step size `s` as second argument. ## Keywords -* `retraction_method`: (`default_retraction_method(M)`) the retraction to use. -* `stop_when_stepsize_less`: (`0.0`) to avoid numerical underflow -* `stop_when_stepsize_exceeds`: ([`max_stepsize`](@ref)`(M, p) / norm(M, p, η)`) to avoid leaving the injectivity radius on a manifold -* `stop_increasing_at_step`: (`100`) stop the initial increase of step size after these many steps -* `stop_decreasing_at_step`: (`1000`) stop the decreasing search after these many steps - +* `retraction_method`: the retraction to use. +* `stop_when_stepsize_less`: stop a bit early to avoid numerical underflow +* `stop_when_stepsize_exceeds`: stop at a max step size to avoid leaving the injectivity radius on a manifold +* `stop_increasing_at_step`: stop the initial increase of step size after these many steps +* `stop_decreasing_at_step`: stop the decreasing search after these many steps +* `increase_condition`: These keywords are used as safeguards, where only the max stepsize is a very manifold specific one. # Return value @@ -393,6 +441,8 @@ function linesearch_backtrack!( η::T=-X, f0=f(M, p); retraction_method::AbstractRetractionMethod=default_retraction_method(M), + additional_increase_condition=(M, p) -> true, + additional_decrease_condition=(M, p) -> true, stop_when_stepsize_less=0.0, stop_when_stepsize_exceeds=max_stepsize(M, p) / norm(M, p, η), stop_increasing_at_step=100, @@ -406,7 +456,9 @@ function linesearch_backtrack!( msg = "The search direction η might not be a descent direction, since ⟨η, grad_f(p)⟩ ≥ 0." end i = 0 - while f_q < f0 + decrease * s * search_dir_inner # increase + # Ensure that both the original condition and the additional one are fulfilled afterwards + while f_q < f0 + decrease * s * search_dir_inner || !additional_increase_condition(M, q) + (stop_increasing_at_step == 0) && break i = i + 1 s = s / contract retract!(M, q, p, η, s, retraction_method) @@ -424,7 +476,9 @@ function linesearch_backtrack!( end end i = 0 - while f_q > f0 + decrease * s * search_dir_inner # decrease + # Ensure that both the original condition and the additional one are fulfilled afterwards + while (f_q > f0 + decrease * s * search_dir_inner) || + (!additional_decrease_condition(M, q)) i = i + 1 s = contract * s retract!(M, q, p, η, s, retraction_method) diff --git a/src/solvers/FrankWolfe.jl b/src/solvers/FrankWolfe.jl index 30e716c597..842899f9e9 100644 --- a/src/solvers/FrankWolfe.jl +++ b/src/solvers/FrankWolfe.jl @@ -32,7 +32,7 @@ mutable struct FrankWolfeState{ P, T, Pr, - St, + St<:AbstractManoptSolverState, TStep<:Stepsize, TStop<:StoppingCriterion, TM<:AbstractRetractionMethod, @@ -50,7 +50,7 @@ mutable struct FrankWolfeState{ M::AbstractManifold, p::P, sub_problem::Pr, - sub_state::Op; + sub_state::Union{AbstractEvaluationType,AbstractManoptSolverState}; initial_vector::T=zero_vector(M, p), stopping_criterion::TStop=StopAfterIteration(200) | StopWhenGradientNormLess(1.0e-6), @@ -60,18 +60,18 @@ mutable struct FrankWolfeState{ ) where { P, Pr, - Op, T, TStop<:StoppingCriterion, TStep<:Stepsize, TM<:AbstractRetractionMethod, ITM<:AbstractInverseRetractionMethod, } - return new{P,T,Pr,Op,TStep,TStop,TM,ITM}( + sub_state_storage = maybe_wrap_evaluation_type(sub_state) + return new{P,T,Pr,typeof(sub_state_storage),TStep,TStop,TM,ITM}( p, initial_vector, sub_problem, - sub_state, + sub_state_storage, stopping_criterion, stepsize, retraction_method, @@ -88,9 +88,6 @@ function get_message(fws::FrankWolfeState) # for now only the sub solver might have messages return get_message(fws.sub_state) end -function get_message(::FrankWolfeState{P,T,F,<:InplaceEvaluation}) where {P,T,F} - return "" -end function set_iterate!(fws::FrankWolfeState, p) fws.p = p @@ -291,11 +288,12 @@ function Frank_Wolfe_method!( } dmgo = decorate_objective!(M, mgo; objective_type=objective_type, kwargs...) dmp = DefaultManoptProblem(M, dmgo) + sub_state_storage = maybe_wrap_evaluation_type(sub_state) fws = FrankWolfeState( M, p, sub_problem, - sub_state; + sub_state_storage; initial_vector=initial_vector, retraction_method=retraction_method, stepsize=stepsize, @@ -309,11 +307,7 @@ function initialize_solver!(amp::AbstractManoptProblem, fws::FrankWolfeState) get_gradient!(amp, fws.X, fws.p) return fws end -function step_solver!( - amp::AbstractManoptProblem, - fws::FrankWolfeState{P,T,<:AbstractManoptProblem,<:AbstractManoptSolverState}, - i, -) where {P,T} +function step_solver!(amp::AbstractManoptProblem, fws::FrankWolfeState, i) M = get_manifold(amp) # update gradient get_gradient!(amp, fws.X, fws.p) # evaluate grad F(p), store the result in O.X @@ -335,7 +329,9 @@ end # Variant 2: sub task is a mutating function providing a closed form solution # function step_solver!( - amp::AbstractManoptProblem, fws::FrankWolfeState{P,T,F,InplaceEvaluation}, i + amp::AbstractManoptProblem, + fws::FrankWolfeState{P,T,F,ClosedFormSubSolverState{InplaceEvaluation}}, + i, ) where {P,T,F} M = get_manifold(amp) get_gradient!(amp, fws.X, fws.p) # evaluate grad F in place for O.X @@ -356,7 +352,9 @@ end # Variant 3: sub task is an allocating function providing a closed form solution # function step_solver!( - amp::AbstractManoptProblem, fws::FrankWolfeState{P,T,F,AllocatingEvaluation}, i + amp::AbstractManoptProblem, + fws::FrankWolfeState{P,T,F,ClosedFormSubSolverState{AllocatingEvaluation}}, + i, ) where {P,T,F} M = get_manifold(amp) get_gradient!(amp, fws.X, fws.p) # evaluate grad F in place for O.X diff --git a/src/solvers/adaptive_regularization_with_cubics.jl b/src/solvers/adaptive_regularization_with_cubics.jl index a6cb74ecf2..850aa9057f 100644 --- a/src/solvers/adaptive_regularization_with_cubics.jl +++ b/src/solvers/adaptive_regularization_with_cubics.jl @@ -21,9 +21,7 @@ a default value is given in brackets if a parameter can be left out in initializ * `retraction_method`: (`default_retraction_method(M)`) the retraction to use * `stopping_criterion`: ([`StopAfterIteration`](@ref)`(100)`) a [`StoppingCriterion`](@ref) * `sub_problem`: sub problem solved in each iteration -* `sub_state`: sub state for solving the sub problem, either a solver state if - the problem is an [`AbstractManoptProblem`](@ref) or an [`AbstractEvaluationType`](@ref) if it is a function, - where it defaults to [`AllocatingEvaluation`](@ref). +* `sub_state`: an [`AbstractManoptSolverState`](@ref) for the subsolver Furthermore the following integral fields are defined @@ -43,8 +41,8 @@ Construct the solver state with all fields stated as keyword arguments. mutable struct AdaptiveRegularizationState{ P, T, - Pr<:Union{AbstractManoptProblem,<:Function}, - St<:Union{AbstractManoptSolverState,<:AbstractEvaluationType}, + Pr, + St<:AbstractManoptSolverState, TStop<:StoppingCriterion, R, TRTM<:AbstractRetractionMethod, @@ -103,12 +101,12 @@ function AdaptiveRegularizationState( RTM<:AbstractRetractionMethod, } isnothing(sub_problem) && error("No sub_problem provided,") - - return AdaptiveRegularizationState{P,T,Pr,St,SC,R,RTM}( + sub_state_storage = maybe_wrap_evaluation_type(sub_state) + return AdaptiveRegularizationState{P,T,Pr,typeof(sub_state_storage),SC,R,RTM}( p, X, sub_problem, - sub_state, + sub_state_storage, copy(M, p), copy(M, p, X), copy(M, p, X), @@ -428,13 +426,14 @@ function adaptive_regularization_with_cubics!( if isnothing(sub_problem) sub_problem = DefaultManoptProblem(TangentSpace(M, copy(M, p)), sub_objective) end + sub_state_storage = maybe_wrap_evaluation_type(sub_state) X = copy(M, p, initial_tangent_vector) dmp = DefaultManoptProblem(M, dmho) arcs = AdaptiveRegularizationState( M, p, X; - sub_state=sub_state, + sub_state=sub_state_storage, sub_problem=sub_problem, σ=σ, ρ_regularization=ρ_regularization, @@ -500,13 +499,13 @@ function solve_arc_subproblem!( return s end function solve_arc_subproblem!( - M, s, problem::P, ::AllocatingEvaluation, p + M, s, problem::P, ::ClosedFormSubSolverState{AllocatingEvaluation}, p ) where {P<:Function} copyto!(M, s, p, problem(M, p)) return s end function solve_arc_subproblem!( - M, s, problem!::P, ::InplaceEvaluation, p + M, s, problem!::P, ::ClosedFormSubSolverState{InplaceEvaluation}, p ) where {P<:Function} problem!(M, s, p) return s diff --git a/src/solvers/augmented_Lagrangian_method.jl b/src/solvers/augmented_Lagrangian_method.jl index 624ccf705b..d25bd2dd59 100644 --- a/src/solvers/augmented_Lagrangian_method.jl +++ b/src/solvers/augmented_Lagrangian_method.jl @@ -25,7 +25,7 @@ a default value is given in brackets if a parameter can be left out in initializ * `θ_ρ`: (`0.3`) the scaling factor of the penalty parameter * `θ_ϵ`: ((`(ϵ_min/ϵ)^(ϵ_exponent)`) the scaling factor of the accuracy tolerance * `penalty`: evaluation of the current penalty term, initialized to `Inf`. -* `stopping_criterion`: (`(`[`StopAfterIteration`](@ref)`(300) | (`[`StopWhenSmallerOrEqual`](@ref)`(ϵ, ϵ_min) & `[`StopWhenChangeLess`](@ref)`(1e-10))`) a functor inheriting from [`StoppingCriterion`](@ref) indicating when to stop. +* `stop`: (`(`[`StopAfterIteration`](@ref)`(300) | (`[`StopWhenSmallerOrEqual`](@ref)`(ϵ, ϵ_min) & `[`StopWhenChangeLess`](@ref)`(1e-10))`) a functor inheriting from [`StoppingCriterion`](@ref) indicating when to stop. # Constructor @@ -42,7 +42,7 @@ manifold- or objective specific defaults. """ mutable struct AugmentedLagrangianMethodState{ P, - Pr<:AbstractManoptProblem, + Pr<:Union{F,AbstractManoptProblem} where {F}, St<:AbstractManoptSolverState, R<:Real, V<:AbstractVector{<:R}, @@ -70,7 +70,7 @@ mutable struct AugmentedLagrangianMethodState{ co::ConstrainedManifoldObjective, p::P, sub_problem::Pr, - sub_state::St; + sub_state::Union{AbstractEvaluationType,AbstractManoptSolverState}; ϵ::R=1e-3, ϵ_min::R=1e-6, λ_max::R=20.0, @@ -90,18 +90,12 @@ mutable struct AugmentedLagrangianMethodState{ ) | StopWhenChangeLess(1e-10), kwargs..., - ) where { - P, - Pr<:AbstractManoptProblem, - R<:Real, - V, - SC<:StoppingCriterion, - St<:AbstractManoptSolverState, - } - alms = new{P,Pr,St,R,V,SC}() + ) where {P,Pr<:Union{F,AbstractManoptProblem} where {F},R<:Real,V,SC<:StoppingCriterion} + sub_state_storage = maybe_wrap_evaluation_type(sub_state) + alms = new{P,Pr,typeof(sub_state_storage),R,V,SC}() alms.p = p alms.sub_problem = sub_problem - alms.sub_state = sub_state + alms.sub_state = sub_state_storage alms.ϵ = ϵ alms.ϵ_min = ϵ_min alms.λ_max = λ_max @@ -268,8 +262,8 @@ the obtained (approximate) minimizer ``p^*``, see [`get_solver_return`](@ref) fo """ function augmented_Lagrangian_method( M::AbstractManifold, - f::TF, - grad_f::TGF, + f, + grad_f, p=rand(M); evaluation::AbstractEvaluationType=AllocatingEvaluation(), g=nothing, @@ -279,18 +273,8 @@ function augmented_Lagrangian_method( inequality_constrains::Union{Integer,Nothing}=nothing, equality_constrains::Union{Nothing,Integer}=nothing, kwargs..., -) where {TF,TGF} +) q = copy(M, p) - num_eq = if isnothing(equality_constrains) - _number_of_constraints(h, grad_h; M=M, p=p) - else - inequality_constrains - end - num_ineq = if isnothing(inequality_constrains) - _number_of_constraints(g, grad_g; M=M, p=p) - else - inequality_constrains - end cmo = ConstrainedManifoldObjective( f, grad_f, @@ -299,8 +283,8 @@ function augmented_Lagrangian_method( h, grad_h; evaluation=evaluation, - inequality_constrains=num_ineq, - equality_constrains=num_eq, + inequality_constrains=inequality_constrains, + equality_constrains=equality_constrains, M=M, p=p, ) diff --git a/src/solvers/cma_es.jl b/src/solvers/cma_es.jl index 952560edea..5e57298a57 100644 --- a/src/solvers/cma_es.jl +++ b/src/solvers/cma_es.jl @@ -23,8 +23,8 @@ State of covariance matrix adaptation evolution strategy. * `population` population of the current generation * `ys_c` coordinates of random vectors for the current generation * `covariance_matrix` coordinates of the covariance matrix -* `covariance_matrix_eigen` eigendecomposition of `covariance_matrix` -* `covariance_matrix_cond` condition number of `covariance_matrix`, updated after eigendecomposition +* `covariance_matrix_eigen` eigen decomposition of `covariance_matrix` +* `covariance_matrix_cond` condition number of `covariance_matrix`, updated after eigen decomposition * `best_fitness_current_gen` best fitness value of individuals in the current generation * `median_fitness_current_gen` median fitness value of individuals in the current generation * `worst_fitness_current_gen` worst fitness value of individuals in the current generation @@ -491,7 +491,7 @@ end vtm::AbstractVectorTransportMethod, ) -Transport the matrix with `matrix_eig` eigendecomposition when expanded in `basis` from +Transport the matrix with `matrix_eig` eigen decomposition when expanded in `basis` from point `p` to point `q` on `M`. Update `matrix_eigen` in-place. `(p, matrix_eig)` belongs to the fiber bundle of ``B = \mathcal M × SPD(n)``, where `n` diff --git a/src/solvers/conjugate_residual.jl b/src/solvers/conjugate_residual.jl new file mode 100644 index 0000000000..414b4d50dc --- /dev/null +++ b/src/solvers/conjugate_residual.jl @@ -0,0 +1,123 @@ +@doc raw""" + conjugate_residual(TpM::TangentSpace, A, b, p=rand(TpM)) + conjugate_residual(TpM::TangentSpace, slso::SymmetricLinearSystemObjective, p=rand(TpM)) + conjugate_residual!(TpM::TangentSpace, A, b, p) + conjugate_residual!(TpM::TangentSpace, slso::SymmetricLinearSystemObjective, p) + +Compute the solution of ``\mathcal A(p)[X] + b(p) = 0_p ``, where + +* ``\mathcal A`` is a linear, symmetric operator on ``T_p\mathcal M`` +* ``b`` is a vector field on the manifold +* ``X ∈ T_p\mathcal M`` is a tangent vector +* ``0_p`` is the zero vector ``T_p\mathcal M``. + +This implementation follows Algorithm 3 in [LaiYoshise:2024](@cite) and +is initalised with ``X^{(0)}`` as the zero vector and + +* the initial residual ``r^{(0)} = -b(p) - \mathcal A(p)[X^{(0)}]`` +* the initial conjugate direction ``d^{(0)} = r^{(0)}`` +* initialize ``Y^{(0)} = \mathcal A(p)[X^{(0)}]`` + +performed the following steps at iteration ``k=0,…`` until the `stopping_criterion` is fulfilled. + +1. compute a step size ``α_k = \displaystyle\frac{\langle r^{(k)}, \mathcal A(p)[r^{(k)}] \rangle_p}{\langle \mathcal A(p)[d^{(k)}], \mathcal A(p)[d^{(k)}] \rangle_p}`` +2. do a step ``X^{(k+1)} = X^{(k)} + α_kd^{(k)}`` +2. update the residual ``r^{(k+1)} = r^{(k)} + α_k Y^{(k)}`` +4. compute ``Z = \mathcal A(p)[r^{(k+1)}]`` +5. Update the conjugate coefficient ``β_k = \displaystyle\frac{\langle r^{(k+1)}, \mathcal A(p)[r^{(k+1)}] \rangle_p}{\langle r^{(k)}, \mathcal A(p)[r^{(k)}] \rangle_p}`` +6. Update the conjugate direction ``d^{(k+1)} = r^{(k+1)} + β_kd^{(k)}`` +7. Update ``Y^{(k+1)} = -Z + β_k Y^{(k)}`` + +Note that the right hand side of Step 7 is the same as evaluating ``\mathcal A[d^{(k+1)]``, but avoids the actual evaluation + +# Input + +* `TpM` the [`TangentSpace`](@extref `ManifoldsBase.TangentSpace`) as the domain +* `A` a symmetric linear operator on the tangent space `(M, p, X) -> Y` +* `b` a vector field on the tangent space `(M, p) -> X` + + +# Keyword arguments + +* `evaluation=`[`AllocatingEvaluation`](@ref) specify whether `A` and `b` are implemented allocating or in-place +* `stopping_criterion::`[`StoppingCriterion`](@ref)`=`[`StopAfterIteration`](@ref)`(`[`manifold_dimension`](@extref ManifoldsBase.manifold_dimension-Tuple{AbstractManifold})`(TpM))`[` | `](@ref StopWhenAny)[`StopWhenRelativeResidualLess`](@ref)`(c,1e-8)`, + where `c` is the norm of ``\lVert b \rVert``. + +# Output + +the obtained (approximate) minimizer ``X^*``. +To obtain the whole final state of the solver, see [`get_solver_return`](@ref) for details. +""" +conjugate_residual(TpM::TangentSpace, args...; kwargs...) + +function conjugate_residual( + TpM::TangentSpace, + A, + b, + X=zero_vector(TpM); + evaluation::AbstractEvaluationType=AllocatingEvaluation(), + kwargs..., +) + slso = SymmetricLinearSystemObjective(A, b; evaluation=evaluation, kwargs...) + return conjugate_residual(TpM, slso, X; evaluation=evaluation, kwargs...) +end +function conjugate_residual( + TpM::TangentSpace, slso::SymmetricLinearSystemObjective, X=zero_vector(TpM); kwargs... +) + Y = copy(TpM, X) + return conjugate_residual!(TpM, slso, Y; kwargs...) +end + +function conjugate_residual!( + TpM::TangentSpace, + slso::SymmetricLinearSystemObjective, + X; + stopping_criterion::SC=StopAfterIteration(manifold_dimension(TpM)) | + StopWhenRelativeResidualLess( + norm(base_manifold(TpM), base_point(TpM), get_b(TpM, slso, X)), 1e-8 + ), + kwargs..., +) where {SC<:StoppingCriterion} + crs = ConjugateResidualState( + TpM, slso; stopping_criterion=stopping_criterion, kwargs... + ) + dslso = decorate_objective!(TpM, slso; kwargs...) + dmp = DefaultManoptProblem(TpM, dslso) + dcrs = decorate_state!(crs; kwargs...) + solve!(dmp, dcrs) + return get_solver_return(get_objective(dmp), dcrs) +end + +function initialize_solver!( + amp::AbstractManoptProblem{<:TangentSpace}, crs::ConjugateResidualState +) + TpM = get_manifold(amp) + get_hessian!(TpM, crs.r, get_objective(amp), base_point(TpM), crs.X) + crs.r .*= -1 + crs.r .-= get_b(TpM, get_objective(amp), crs.X) + copyto!(TpM, crs.d, crs.r) + get_hessian!(amp, crs.Ar, crs.X, crs.r) + copyto!(TpM, crs.Ad, crs.Ar) + crs.α = 0.0 + crs.β = 0.0 + return crs +end + +function step_solver!( + amp::AbstractManoptProblem{<:TangentSpace}, crs::ConjugateResidualState, i +) + TpM = get_manifold(amp) + M = base_manifold(TpM) + p = base_point(TpM) + crs.α = inner(M, p, crs.r, crs.Ar) / inner(M, p, crs.Ad, crs.Ad) + crs.X .+= crs.α .* crs.d + crs.rAr = inner(M, p, crs.r, crs.Ar) + crs.r .-= crs.α .* crs.Ad + get_hessian!(amp, crs.Ar, crs.X, crs.r) + crs.β = inner(M, p, crs.r, crs.Ar) / crs.rAr + crs.d .= crs.r .+ crs.β .* crs.d + crs.Ad .= crs.Ar .+ crs.β .* crs.Ad + return crs +end + +get_solver_result(crs::ConjugateResidualState) = crs.X diff --git a/src/solvers/convex_bundle_method.jl b/src/solvers/convex_bundle_method.jl index ca6eba61ac..de3c1c292f 100644 --- a/src/solvers/convex_bundle_method.jl +++ b/src/solvers/convex_bundle_method.jl @@ -30,7 +30,7 @@ Stores option values for a [`convex_bundle_method`](@ref) solver. * `λ`: convex coefficients that solve the subproblem * `ξ`: the stopping parameter given by ``ξ = -\lvert g\rvert^2 – ε`` * `sub_problem`: ([`convex_bundle_method_subsolver`]) a function that solves the sub problem on `M` given the last serious iterate `p_last_serious`, the linearization errors `linearization_errors`, and the transported subgradients `transported_subgradients`, -* `sub_state`: an [`AbstractEvaluationType`](@ref) indicating whether `sub_problem` works in-place of `λ` or allocates a solution +* `sub_state`: an [`AbstractManoptSolverState`](@ref) for the subsolver # Constructor @@ -42,8 +42,8 @@ with keywords for all fields with defaults besides `p_last_serious` which obtain mutable struct ConvexBundleMethodState{ P, T, - Pr, - St, + Pr<:Union{F,AbstractManoptProblem} where {F}, + St<:AbstractManoptSolverState, R, A<:AbstractVector{<:R}, B<:AbstractVector{Tuple{<:P,<:T}}, @@ -55,7 +55,7 @@ mutable struct ConvexBundleMethodState{ TS<:Stepsize, TSC<:StoppingCriterion, VT<:AbstractVectorTransportMethod, -} <: AbstractManoptSolverState where {R<:Real,P,T,I<:Int,Pr,St} +} <: AbstractManoptSolverState where {R<:Real,P,T,I<:Int,Pr} atol_λ::R atol_errors::R bundle::B @@ -100,7 +100,7 @@ mutable struct ConvexBundleMethodState{ X::T=zero_vector(M, p), vector_transport_method::VT=default_vector_transport_method(M, typeof(p)), sub_problem::Pr=convex_bundle_method_subsolver, - sub_state::St=AllocatingEvaluation(), + sub_state::Union{AbstractEvaluationType,AbstractManoptSolverState}=AllocatingEvaluation(), k_size=nothing,# deprecated p_estimate=nothing,# deprecated ϱ=nothing,# deprecated @@ -110,8 +110,6 @@ mutable struct ConvexBundleMethodState{ P, T, Pr, - St, - TM<:AbstractManifold, TR<:AbstractRetractionMethod, SC<:StoppingCriterion, @@ -119,6 +117,7 @@ mutable struct ConvexBundleMethodState{ VT<:AbstractVectorTransportMethod, R<:Real, } + sub_state_storage = maybe_wrap_evaluation_type(sub_state) bundle = Vector{Tuple{P,T}}() g = zero_vector(M, p) last_stepsize = one(R) @@ -133,7 +132,7 @@ mutable struct ConvexBundleMethodState{ P, T, Pr, - St, + typeof(sub_state_storage), typeof(m), typeof(linearization_errors), typeof(bundle), @@ -170,7 +169,7 @@ mutable struct ConvexBundleMethodState{ ξ, λ, sub_problem, - sub_state, + sub_state_storage, ϱ,# deprecated ) end @@ -332,7 +331,7 @@ function convex_bundle_method!( ), vector_transport_method::VTransp=default_vector_transport_method(M, typeof(p)), sub_problem=convex_bundle_method_subsolver, - sub_state=evaluation, + sub_state::Union{AbstractEvaluationType,AbstractManoptSolverState}=evaluation, k_size=nothing,# deprecated p_estimate=nothing,# deprecated ϱ=nothing,# deprecated @@ -341,6 +340,7 @@ function convex_bundle_method!( sgo = ManifoldSubgradientObjective(f, ∂f!!; evaluation=evaluation) dsgo = decorate_objective!(M, sgo; kwargs...) mp = DefaultManoptProblem(M, dsgo) + sub_state_storage = maybe_wrap_evaluation_type(sub_state) bms = ConvexBundleMethodState( M, p; @@ -357,7 +357,7 @@ function convex_bundle_method!( stopping_criterion=stopping_criterion, vector_transport_method=vector_transport_method, sub_problem=sub_problem, - sub_state=sub_state, + sub_state=sub_state_storage, k_size=k_size,# deprecated p_estimate=p_estimate,# deprecated ϱ=ϱ,# deprecated @@ -461,7 +461,7 @@ end # Dispatching on different types of subsolvers # (a) closed form allocating function _convex_bundle_subsolver!( - M, bms::ConvexBundleMethodState{P,T,F,AllocatingEvaluation} + M, bms::ConvexBundleMethodState{P,T,F,ClosedFormSubSolverState{AllocatingEvaluation}} ) where {P,T,F} bms.λ = bms.sub_problem( M, bms.p_last_serious, bms.linearization_errors, bms.transported_subgradients @@ -470,7 +470,7 @@ function _convex_bundle_subsolver!( end # (b) closed form in-place function _convex_bundle_subsolver!( - M, bms::ConvexBundleMethodState{P,T,F,InplaceEvaluation} + M, bms::ConvexBundleMethodState{P,T,F,ClosedFormSubSolverState{InplaceEvaluation}} ) where {P,T,F} bms.sub_problem( M, bms.λ, bms.p_last_serious, bms.linearization_errors, bms.transported_subgradients diff --git a/src/solvers/difference-of-convex-proximal-point.jl b/src/solvers/difference-of-convex-proximal-point.jl index b43fc0a002..66fe545977 100644 --- a/src/solvers/difference-of-convex-proximal-point.jl +++ b/src/solvers/difference-of-convex-proximal-point.jl @@ -1,6 +1,7 @@ @doc raw""" - DifferenceOfConvexProximalState{Type} <: Options + DifferenceOfConvexProximalState{Type} <: AbstractSubProblemSolverState + A struct to store the current state of the algorithm as well as the form. It comes in two forms, depending on the realisation of the `subproblem`. @@ -14,6 +15,8 @@ It comes in two forms, depending on the realisation of the `subproblem`. * `stop`: ([`StopWhenChangeLess`](@ref)`(1e-8)`) a [`StoppingCriterion`](@ref) * `X`, `Y`: (`zero_vector(M,p)`) the current gradient and descent direction, respectively their common type is set by the keyword `X` +* `sub_problem`: an [`AbstractManoptProblem`](@ref) problem or a function `(M, p, X) -> q` or `(M, q, p, X)` for the a closed form solution of the sub problem +* `sub_state`: an [`AbstractManoptSolverState`](@ref) for the subsolver or an [`AbstractEvaluationType`](@ref) in case the sub problem is provided as a function # Constructor @@ -28,7 +31,7 @@ mutable struct DifferenceOfConvexProximalState{ P, T, Pr, - St, + St<:AbstractManoptSolverState, S<:Stepsize, SC<:StoppingCriterion, RTR<:AbstractRetractionMethod, @@ -50,7 +53,7 @@ mutable struct DifferenceOfConvexProximalState{ M::AbstractManifold, p::P, sub_problem::Pr, - sub_state::St; + sub_state::Union{AbstractEvaluationType,AbstractManoptSolverState}; X::T=zero_vector(M, p), stepsize::S=ConstantStepsize(M), stopping_criterion::SC=StopWhenChangeLess(1e-8), @@ -61,20 +64,20 @@ mutable struct DifferenceOfConvexProximalState{ P, T, Pr, - St, S<:Stepsize, SC<:StoppingCriterion, I<:AbstractInverseRetractionMethod, R<:AbstractRetractionMethod, Fλ, } - return new{P,T,Pr,St,S,SC,R,I,Fλ}( + sub_state_storage = maybe_wrap_evaluation_type(sub_state) + return new{P,T,Pr,typeof(sub_state_storage),S,SC,R,I,Fλ}( λ, p, copy(M, p), copy(M, p), sub_problem, - sub_state, + sub_state_storage, X, retraction_method, inverse_retraction_method, @@ -368,7 +371,7 @@ function difference_of_convex_proximal_point!( sub_state::Union{AbstractEvaluationType,AbstractManoptSolverState,Nothing}=if !isnothing( prox_g ) - evaluation + maybe_wrap_evaluation_type(evaluation) elseif isnothing(sub_objective) nothing else @@ -414,7 +417,7 @@ function difference_of_convex_proximal_point!( M, p, sub_problem, - sub_state; + maybe_wrap_evaluation_type(sub_state); X=X, stepsize=stepsize, stopping_criterion=stopping_criterion, @@ -435,7 +438,9 @@ end =# function step_solver!( amp::AbstractManoptProblem, - dcps::DifferenceOfConvexProximalState{P,T,<:Function,AllocatingEvaluation}, + dcps::DifferenceOfConvexProximalState{ + P,T,<:Function,ClosedFormSubSolverState{AllocatingEvaluation} + }, i, ) where {P,T} M = get_manifold(amp) @@ -454,7 +459,9 @@ end =# function step_solver!( amp::AbstractManoptProblem, - dcps::DifferenceOfConvexProximalState{P,T,<:Function,InplaceEvaluation}, + dcps::DifferenceOfConvexProximalState{ + P,T,<:Function,ClosedFormSubSolverState{InplaceEvaluation} + }, i, ) where {P,T} M = get_manifold(amp) diff --git a/src/solvers/difference_of_convex_algorithm.jl b/src/solvers/difference_of_convex_algorithm.jl index 2049dc6d7b..45ffddc888 100644 --- a/src/solvers/difference_of_convex_algorithm.jl +++ b/src/solvers/difference_of_convex_algorithm.jl @@ -40,8 +40,9 @@ Here the elements passed are the current iterate `p` and the subgradient `X` of * `initial_vector=zero_vector` (`zero_vectoir(M,p)`) how to initialize the inner gradient tangent vector * `stopping_criterion` a [`StopAfterIteration`](@ref)`(200)` a stopping criterion """ -mutable struct DifferenceOfConvexState{Pr,St,P,T,SC<:StoppingCriterion} <: - AbstractSubProblemSolverState +mutable struct DifferenceOfConvexState{ + Pr,St<:AbstractManoptSolverState,P,T,SC<:StoppingCriterion +} <: AbstractSubProblemSolverState p::P X::T sub_problem::Pr @@ -51,13 +52,12 @@ mutable struct DifferenceOfConvexState{Pr,St,P,T,SC<:StoppingCriterion} <: M::AbstractManifold, p::P, sub_problem::Pr, - sub_state::St; + sub_state::Union{AbstractEvaluationType,AbstractManoptSolverState}; initial_vector::T=zero_vector(M, p), stopping_criterion::SC=StopAfterIteration(300) | StopWhenChangeLess(1e-9), - ) where { - P,Pr<:AbstractManoptProblem,St<:AbstractManoptSolverState,T,SC<:StoppingCriterion - } - return new{Pr,St,P,T,SC}( + ) where {P,Pr<:AbstractManoptProblem,T,SC<:StoppingCriterion} + sub_state_storage = maybe_wrap_evaluation_type(sub_state) + return new{Pr,typeof(sub_state_storage),P,T,SC}( p, initial_vector, sub_problem, sub_state, stopping_criterion ) end @@ -68,10 +68,11 @@ mutable struct DifferenceOfConvexState{Pr,St,P,T,SC<:StoppingCriterion} <: sub_problem::S; initial_vector::T=zero_vector(M, p), stopping_criterion::SC=StopAfterIteration(300) | StopWhenChangeLess(1e-9), - evaluation=AllocatingEvaluation(), + evaluation::AbstractEvaluationType=AllocatingEvaluation(), ) where {P,S<:Function,T,SC<:StoppingCriterion} - return new{S,typeof(evaluation),P,T,SC}( - p, initial_vector, sub_problem, evaluation, stopping_criterion + sub_state_storage = maybe_wrap_evaluation_type(evaluation) + return new{S,typeof(sub_state_storage),P,T,SC}( + p, initial_vector, sub_problem, sub_state_storage, stopping_criterion ) end end @@ -374,7 +375,7 @@ function difference_of_convex_algorithm!( M, p, sub_problem; - evaluation=sub_state, + evaluation=evaluation, stopping_criterion=stopping_criterion, initial_vector=initial_vector, ) @@ -386,11 +387,7 @@ end function initialize_solver!(::AbstractManoptProblem, dcs::DifferenceOfConvexState) return dcs end -function step_solver!( - amp::AbstractManoptProblem, - dcs::DifferenceOfConvexState{<:AbstractManoptProblem,<:AbstractManoptSolverState}, - i, -) +function step_solver!(amp::AbstractManoptProblem, dcs::DifferenceOfConvexState, i) M = get_manifold(amp) get_subtrahend_gradient!(amp, dcs.X, dcs.p) set_manopt_parameter!(dcs.sub_problem, :Objective, :Cost, :p, dcs.p) @@ -412,9 +409,9 @@ end # function step_solver!( amp::AbstractManoptProblem, - dcs::DifferenceOfConvexState{<:Function,InplaceEvaluation}, + dcs::DifferenceOfConvexState{F,ClosedFormSubSolverState{InplaceEvaluation}}, i, -) +) where {F} M = get_manifold(amp) get_subtrahend_gradient!(amp, dcs.X, dcs.p) # evaluate grad F in place for O.X dcs.sub_problem(M, dcs.p, dcs.p, dcs.X) # evaluate the closed form solution and store the result in p @@ -425,9 +422,9 @@ end # function step_solver!( amp::AbstractManoptProblem, - dcs::DifferenceOfConvexState{<:Function,AllocatingEvaluation}, + dcs::DifferenceOfConvexState{F,ClosedFormSubSolverState{AllocatingEvaluation}}, i, -) +) where {F} M = get_manifold(amp) get_subtrahend_gradient!(amp, dcs.X, dcs.p) # evaluate grad F in place for O.X # run the subsolver in-place of a copy of the current iterate and copy it back to the current iterate diff --git a/src/solvers/exact_penalty_method.jl b/src/solvers/exact_penalty_method.jl index 46b1447962..fca9cabfce 100644 --- a/src/solvers/exact_penalty_method.jl +++ b/src/solvers/exact_penalty_method.jl @@ -27,8 +27,13 @@ construct an exact penalty options with the remaining previously mentioned field [`exact_penalty_method`](@ref) """ -mutable struct ExactPenaltyMethodState{P,Pr,St,R<:Real,TStopping<:StoppingCriterion} <: - AbstractSubProblemSolverState +mutable struct ExactPenaltyMethodState{ + P, + Pr<:Union{F,AbstractManoptProblem} where {F}, + St<:AbstractManoptSolverState, + R<:Real, + TStopping<:StoppingCriterion, +} <: AbstractSubProblemSolverState p::P sub_problem::Pr sub_state::St @@ -45,7 +50,7 @@ mutable struct ExactPenaltyMethodState{P,Pr,St,R<:Real,TStopping<:StoppingCriter ::AbstractManifold, p::P, sub_problem::Pr, - sub_state::St; + sub_state::Union{AbstractEvaluationType,AbstractManoptSolverState}; ϵ::R=1e-3, ϵ_min::R=1e-6, ϵ_exponent=1 / 100, @@ -59,17 +64,12 @@ mutable struct ExactPenaltyMethodState{P,Pr,St,R<:Real,TStopping<:StoppingCriter stopping_criterion::SC=StopAfterIteration(300) | ( StopWhenSmallerOrEqual(:ϵ, ϵ_min) | StopWhenChangeLess(1e-10) ), - ) where { - P, - Pr<:AbstractManoptProblem, - St<:AbstractManoptSolverState, - R<:Real, - SC<:StoppingCriterion, - } - epms = new{P,Pr,St,R,SC}() + ) where {P,Pr<:Union{F,AbstractManoptProblem} where {F},R<:Real,SC<:StoppingCriterion} + sub_state_storage = maybe_wrap_evaluation_type(sub_state) + epms = new{P,Pr,typeof(sub_state_storage),R,SC}() epms.p = p epms.sub_problem = sub_problem - epms.sub_state = sub_state + epms.sub_state = sub_state_storage epms.ϵ = ϵ epms.ϵ_min = ϵ_min epms.u = u @@ -233,16 +233,6 @@ function exact_penalty_method( equality_constrains::Union{Nothing,Integer}=nothing, kwargs..., ) where {TF,TGF} - num_eq = if isnothing(equality_constrains) - _number_of_constraints(h, grad_h; M=M, p=p) - else - inequality_constrains - end - num_ineq = if isnothing(inequality_constrains) - _number_of_constraints(g, grad_g; M=M, p=p) - else - inequality_constrains - end cmo = ConstrainedManifoldObjective( f, grad_f, @@ -251,8 +241,8 @@ function exact_penalty_method( h, grad_h; evaluation=evaluation, - equality_constrains=num_eq, - inequality_constrains=num_ineq, + equality_constrains=equality_constrains, + inequality_constrains=inequality_constrains, M=M, p=p, ) @@ -373,7 +363,7 @@ function exact_penalty_method!( sub_cost=ExactPenaltyCost(cmo, ρ, u; smoothing=smoothing), sub_grad=ExactPenaltyGrad(cmo, ρ, u; smoothing=smoothing), sub_kwargs=(;), - sub_problem::AbstractManoptProblem=DefaultManoptProblem( + sub_problem::Pr=DefaultManoptProblem( M, decorate_objective!( M, @@ -385,7 +375,7 @@ function exact_penalty_method!( sub_stopping_criterion=StopAfterIteration(300) | StopWhenGradientNormLess(ϵ) | StopWhenStepsizeLess(1e-8), - sub_state::AbstractManoptSolverState=decorate_state!( + sub_state::Union{AbstractEvaluationType,AbstractManoptSolverState}=decorate_state!( QuasiNewtonState( M, copy(M, p); @@ -403,12 +393,16 @@ function exact_penalty_method!( StopWhenSmallerOrEqual(:ϵ, ϵ_min) & StopWhenChangeLess(1e-10) ), kwargs..., -) where {O<:Union{ConstrainedManifoldObjective,AbstractDecoratedManifoldObjective}} +) where { + O<:Union{ConstrainedManifoldObjective,AbstractDecoratedManifoldObjective}, + Pr<:Union{F,AbstractManoptProblem} where {F}, +} + sub_state_storage = maybe_wrap_evaluation_type(sub_state) emps = ExactPenaltyMethodState( M, p, sub_problem, - sub_state; + sub_state_storage; ϵ=ϵ, ϵ_min=ϵ_min, u=u, diff --git a/src/solvers/interior_point_Newton.jl b/src/solvers/interior_point_Newton.jl new file mode 100644 index 0000000000..9822f39522 --- /dev/null +++ b/src/solvers/interior_point_Newton.jl @@ -0,0 +1,383 @@ +_doc_IPN = raw""" + interior_point_Newton(M, f, grad_f, Hess_f, p=rand(M); kwargs...) + interior_point_Newton(M, cmo::ConstrainedManifoldObjective, p=rand(M); kwargs...) + interior_point_Newton!(M, f, grad_f, Hess_f, p; kwargs...) + interior_point_Newton(M, ConstrainedManifoldObjective, p; kwargs...) + +perform the interior point Newton method following [LaiYoshise:2024](@cite). + +In order to solve the constrained problem + +```math +\begin{aligned} +\min_{p ∈\mathcal{M}} &f(p)\\ +\text{subject to } &g_i(p)\leq 0 \quad \text{ for } i= 1, …, m,\\ +\quad &h_j(p)=0 \quad \text{ for } j=1,…,n, +\end{aligned} +``` + +This algorithms iteratively solves the linear system based on extending the KKT system +by a slack variable `s`. + +```math +\operatorname{J} F(p, μ, λ, s)[X, Y, Z, W] = -F(p, μ, λ, s), +\text{ where } +X ∈ T_p\mathcal M, Y,W ∈ ℝ^m, Z ∈ ℝ^n, +``` + +see [`CondensedKKTVectorFieldJacobian`](@ref) and [`CondensedKKTVectorField`](@ref), respectively, +for the reduced form, this is usually solved in. +From the resulting `X` and `Z` in the reeuced form, the other two, ``Y``, ``W``, are then computed. + +From the gradient ``(X,Y,Z,W)`` at the current iterate ``(p, μ, λ, s)``, +a line search is performed using the [`KKTVectorFieldNormSq`](@ref) norm of the KKT vector field (squared) +and its gradient [`KKTVectorFieldNormSqGradient`](@ref) together with the [`InteriorPointCentralityCondition`](@ref). + +Note that since the vector field ``F`` includes the gradients of the constraint +functions ``g, h``, its gradient or Jacobian requires the Hessians of the constraints. + +For that seach direction a line search is performed, that additionally ensures that +the constraints are further fulfilled. + +# Input + +* `M`: a manifold ``\mathcal M`` +* `f`: a cost function ``f : \mathcal M → ℝ`` to minimize +* `grad_f`: the gradient ``\operatorname{grad} f : \mathcal M → T \mathcal M`` of ``f`` +* `Hess_f`: the Hessian ``\operatorname{Hess}f(p): T_p\mathcal M → T_p\mathcal M``, ``X ↦ \operatorname{Hess}f(p)[X] = ∇_X\operatorname{grad}f(p)`` +* `p=`[`rand`](@extref Base.rand-Tuple{AbstractManifold})`(M)`: an initial value ``p ∈ \mathcal M`` + +or a [`ConstrainedManifoldObjective`](@ref) `cmo` containing `f`, `grad_f`, `Hess_f`, and the constraints + +# Keyword arguments + +The keyword arguments related to the constraints (the first eleven) are ignored if you +pass a [`ConstrainedManifoldObjective`](@ref) `cmo` + +* `centrality_condition=missing`; an additional condition when to accept a step size. + This can be used to ensure that the resulting iterate is still an interior point if you provide a check `(N,q) -> true/false`, + where `N` is the manifold of the `step_problem`. +* `equality_constraints=nothing`: the number ``n`` of equality constraints. +* `evaluation=`[`AllocatingEvaluation`](@ref)`()`: + specify whether the functions that return an array, for example a point or a tangent vector, work by allocating its result ([`AllocatingEvaluation`](@ref)) or whether they modify their input argument to return the result therein ([`InplaceEvaluation`](@ref)). Since usually the first argument is the manifold, the modified argument is the second. +* `g=nothing`: the inequality constraints +* `grad_g=nothing`: the gradient of the inequality constraints +* `grad_h=nothing`: the gradient of the equality constraints +* `gradient_range=nothing`: specify how gradients are represented, where `nothing` is equivalent to [`NestedPowerRepresentation`](@extref) +* `gradient_equality_range=gradient_range`: specify how the gradients of the equality constraints are represented +* `gradient_inequality_range=gradient_range`: specify how the gradients of the inequality constraints are represented +* `h=nothing`: the equality constraints +* `Hess_g=nothing`: the Hessian of the inequality constraints +* `Hess_h=nothing`: the Hessian of the equality constraints +* `inequality_constraints=nothing`: the number ``m`` of inequality constraints. +* `λ=ones(length(h(M, p)))`: the Lagrange multiplier with respect to the equality constraints ``h`` +* `μ=ones(length(g(M, p)))`: the Lagrange multiplier with respect to the inequality constraints ``g`` +* `retraction_method=`[`default_retraction_method`](@extref `ManifoldsBase.default_retraction_method-Tuple{AbstractManifold}`)`(M, typeof(p))`: + the retraction to use, defaults to the default set `M` with respect to the representation for `p` chosen. +* `ρ=μ's / length(μ)`: store the orthogonality `μ's/m` to compute the barrier parameter `β` in the sub problem. +* `s=copy(μ)`: initial value for the slack variables +* `σ=`[`calculate_σ`](@ref)`(M, cmo, p, μ, λ, s)`: scaling factor for the barrier parameter `β` in the sub problem, which is updated during the iterations +* `step_objective`: a [`ManifoldGradientObjective`](@ref) of the norm of the KKT vector field [`KKTVectorFieldNormSq`](@ref) and its gradient [`KKTVectorFieldNormSqGradient`](@ref) +* `step_problem`: the manifold ``\mathcal M × ℝ^m × ℝ^n × ℝ^m`` together with the `step_objective` + as the problem the linesearch `stepsize=` employs for determining a step size +* `step_state`: the [`StepsizeState`](@ref) with point and search direction +* `stepsize` an [`ArmijoLinesearch`](@ref) with the [`InteriorPointCentralityCondition`](@ref) as + additional condition to accept a step. Note that this step size operates on its own `step_problem`and `step_state` +* `stopping_criterion=`[`StopAfterIteration`](@ref)`(200)`[` | `](@ref StopWhenAny)[`StopWhenKKTResidualLess`](@ref)`(1e-8)`: + a stopping criterion, by default depending on the residual of the KKT vector field or a maximal number of steps, which ever hits first. +* `sub_kwargs=(;)`: keyword arguments to decorate the sub options, for example debug, that automatically respects the main solvers debug options (like sub-sampling) as well +* `sub_objective`: The [`SymmetricLinearSystemObjective`](@ref) modelling the system of equations to use in the sub solver, + includes the [`CondensedKKTVectorFieldJacobian`](@ref) ``\mathcal A(X)`` and the [`CondensedKKTVectorField`](@ref) ``b`` in ``\mathcal A(X) + b = 0`` we aim to solve. + This is used to setup the `sub_problem`. If you set the `sub_problem` directly, this keyword has no effect. +* `sub_stopping_criterion=`[`StopAfterIteration`](@ref)`(manifold_dimension(M))`[` | `](@ref StopWhenAny)[`StopWhenRelativeResidualLess`](@ref)`(c,1e-8)`, where ``c = \lVert b \rVert`` from the system to solve. + This keyword is used in the `sub_state`. If you set that keyword diretly, this keyword does not have an effect. +* `sub_problem`: combining the `sub_objective` and the tangent space at ``(p,λ)``` on the manifold ``\mathcal M × ℝ^n`` to a manopt problem. + This is the manifold and objective for the sub solver. +* `sub_state=`[`ConjugateResidualState`](@ref): a state specifying the subsolver. This default is also decorated with the `sub_kwargs...`. +* `vector_space=`[`Rn`](@ref Manopt.Rn) a function that, given an integer, returns the manifold to be used for the vector space components ``ℝ^m,ℝ^n`` +* `X=`[`zero_vector`](@extref `ManifoldsBase.zero_vector-Tuple{AbstractManifold, Any}`)`(M,p)`: + th initial gradient with respect to `p`. +* `Y=zero(μ)`: the initial gradient with respct to `μ` +* `Z=zero(λ)`: the initial gradient with respct to `λ` +* `W=zero(s)`: the initial gradient with respct to `s` + +As well as internal keywords used to set up these given keywords like `_step_M`, `_step_p`, `_sub_M`, `_sub_p`, and `_sub_X`, +that should not be changed. + +All other keyword arguments are passed to [`decorate_state!`](@ref) for state decorators or +[`decorate_objective!`](@ref) for objective, respectively. + +!!! note + + The `centrality_condition=mising` disables to check centrality during the line search, + but you can pass [`InteriorPointCentralityCondition`](@ref)`(cmo, γ)`, where `γ` is a constant, + to activate this check. + +# Output + +The obtained approximate constrained minimizer ``p^*``. +To obtain the whole final state of the solver, see [`get_solver_return`](@ref) for details, especially the `return_state=` keyword. +""" + +@doc "$(_doc_IPN)" +interior_point_Newton(M::AbstractManifold, args...; kwargs...) +function interior_point_Newton( + M::AbstractManifold, + f, + grad_f, + Hess_f, + p=rand(M); + evaluation::AbstractEvaluationType=AllocatingEvaluation(), + g=nothing, + h=nothing, + grad_g=nothing, + grad_h=nothing, + Hess_g=nothing, + Hess_h=nothing, + inequality_constrains::Union{Integer,Nothing}=nothing, + equality_constrains::Union{Nothing,Integer}=nothing, + kwargs..., +) + q = copy(M, p) + cmo = ConstrainedManifoldObjective( + f, + grad_f, + g, + grad_g, + h, + grad_h; + hess_f=Hess_f, + hess_g=Hess_g, + hess_h=Hess_h, + evaluation=evaluation, + inequality_constrains=inequality_constrains, + equality_constrains=equality_constrains, + M=M, + p=p, + ) + return interior_point_Newton!(M, cmo, q; evaluation=evaluation, kwargs...) +end +function interior_point_Newton( + M::AbstractManifold, cmo::O, p; kwargs... +) where {O<:Union{ConstrainedManifoldObjective,AbstractDecoratedManifoldObjective}} + q = copy(M, p) + return interior_point_Newton!(M, cmo, q; kwargs...) +end + +@doc "$(_doc_IPN)" +interior_point_Newton!(M::AbstractManifold, args...; kwargs...) + +function interior_point_Newton!( + M::AbstractManifold, + f, + grad_f, + Hess_f, + p; + evaluation::AbstractEvaluationType=AllocatingEvaluation(), + g=nothing, + h=nothing, + grad_g=nothing, + grad_h=nothing, + Hess_g=nothing, + Hess_h=nothing, + inequality_constrains=nothing, + equality_constrains=nothing, + kwargs..., +) + cmo = ConstrainedManifoldObjective( + f, + grad_f, + g, + grad_g, + h, + grad_h; + hess_f=Hess_f, + hess_g=Hess_g, + hess_h=Hess_h, + evaluation=evaluation, + equality_constrains=equality_constrains, + inequality_constrains=inequality_constrains, + M=M, + p=p, + ) + dcmo = decorate_objective!(M, cmo; kwargs...) + return interior_point_Newton!( + M, + dcmo, + p; + evaluation=evaluation, + equality_constrains=equality_constrains, + inequality_constrains=inequality_constrains, + kwargs..., + ) +end +function interior_point_Newton!( + M::AbstractManifold, + cmo::O, + p; + evaluation::AbstractEvaluationType=AllocatingEvaluation(), + X=get_gradient(M, cmo, p), + μ::AbstractVector=ones(inequality_constraints_length(cmo)), + Y::AbstractVector=zero(μ), + λ::AbstractVector=zeros(equality_constraints_length(cmo)), + Z::AbstractVector=zero(λ), + s::AbstractVector=copy(μ), + W::AbstractVector=zero(s), + ρ::Real=μ's / length(μ), + σ::Real=calculate_σ(M, cmo, p, μ, λ, s), + retraction_method::AbstractRetractionMethod=default_retraction_method(M, typeof(p)), + sub_kwargs=(;), + vector_space=Rn, + #γ=0.9, + centrality_condition=missing, #InteriorPointCentralityCondition(cmo, γ, zero(γ), zero(γ)), + step_objective=ManifoldGradientObjective( + KKTVectorFieldNormSq(cmo), KKTVectorFieldNormSqGradient(cmo); evaluation=evaluation + ), + _step_M::AbstractManifold=M × vector_space(length(μ)) × vector_space(length(λ)) × + vector_space(length(s)), + step_problem=DefaultManoptProblem(_step_M, step_objective), + _step_p=rand(_step_M), + step_state=StepsizeState(_step_p, zero_vector(_step_M, _step_p)), + stepsize::Stepsize=ArmijoLinesearch( + _step_M; + retraction_method=default_retraction_method(_step_M), + initial_guess=interior_point_initial_guess, + additional_decrease_condition=if ismissing(centrality_condition) + (M, p) -> true + else + centrality_condition + end, + ), + stopping_criterion::StoppingCriterion=StopAfterIteration(800) | + StopWhenKKTResidualLess(1e-8), + _sub_M=M × vector_space(length(λ)), + _sub_p=rand(_sub_M), + _sub_X=rand(_sub_M; vector_at=_sub_p), + sub_objective=decorate_objective!( + TangentSpace(_sub_M, _sub_p), + SymmetricLinearSystemObjective( + CondensedKKTVectorFieldJacobian(cmo, μ, s, σ * ρ), + CondensedKKTVectorField(cmo, μ, s, σ * ρ), + ), + sub_kwargs..., + ), + sub_stopping_criterion::StoppingCriterion=StopAfterIteration(manifold_dimension(M)) | + StopWhenRelativeResidualLess( + norm(_sub_M, _sub_p, get_b(TangentSpace(_sub_M, _sub_p), sub_objective, _sub_X)), + 1e-8, + ), + sub_state::St=decorate_state!( + ConjugateResidualState( + TangentSpace(_sub_M, _sub_p), + sub_objective; + X=_sub_X, + stop=sub_stopping_criterion, + sub_kwargs..., + ); + sub_kwargs..., + ), + sub_problem::Pr=DefaultManoptProblem(TangentSpace(_sub_M, _sub_p), sub_objective), + kwargs..., +) where { + O<:Union{ConstrainedManifoldObjective,AbstractDecoratedManifoldObjective}, + St<:AbstractManoptSolverState, + Pr<:Union{F,AbstractManoptProblem} where {F}, +} + !is_feasible(M, cmo, p; error=:error) + dcmo = decorate_objective!(M, cmo; kwargs...) + dmp = DefaultManoptProblem(M, dcmo) + ips = InteriorPointNewtonState( + M, + cmo, + p, + sub_problem, + sub_state; + X=X, + Y=Y, + Z=Z, + W=W, + μ=μ, + λ=λ, + s=s, + stopping_criterion=stopping_criterion, + retraction_method=retraction_method, + step_problem=step_problem, + step_state=step_state, + stepsize=stepsize, + kwargs..., + ) + ips = decorate_state!(ips; kwargs...) + solve!(dmp, ips) + return get_solver_return(get_objective(dmp), ips) +end +function initialize_solver!(::AbstractManoptProblem, ips::InteriorPointNewtonState) + return ips +end + +function step_solver!(amp::AbstractManoptProblem, ips::InteriorPointNewtonState, i) + M = get_manifold(amp) + cmo = get_objective(amp) + N = base_manifold(get_manifold(ips.sub_problem)) + q = base_point(get_manifold(ips.sub_problem)) + copyto!(N[1], q[N, 1], ips.p) + copyto!(N[2], q[N, 2], ips.λ) + set_iterate!(ips.sub_state, get_manifold(ips.sub_problem), zero_vector(N, q)) + + set_manopt_parameter!(ips.sub_problem, :Manifold, :Basepoint, q) + set_manopt_parameter!(ips.sub_problem, :Objective, :μ, ips.μ) + set_manopt_parameter!(ips.sub_problem, :Objective, :λ, ips.λ) + set_manopt_parameter!(ips.sub_problem, :Objective, :s, ips.s) + set_manopt_parameter!(ips.sub_problem, :Objective, :β, ips.ρ * ips.σ) + # product manifold on which to perform linesearch + + X2 = get_solver_result(solve!(ips.sub_problem, ips.sub_state)) + ips.X, ips.Z = X2[N, 1], X2[N, 2] #for p and λ + + # Compute the remaining part of the solution + m, n = length(ips.μ), length(ips.λ) + if m > 0 + g = get_inequality_constraint(amp, ips.p, :) + grad_g = get_grad_inequality_constraint(amp, ips.p, :) + β = ips.ρ * ips.σ + # for s and μ + ips.W = -[inner(M, ips.p, grad_g[i], ips.X) for i in 1:m] - g - ips.s + ips.Y = (β .- ips.μ .* (ips.s + ips.W)) ./ ips.s + end + + N = get_manifold(ips.step_problem) + # generate current full iterate in step state + q = get_iterate(ips.step_state) + copyto!(N[1], q[N, 1], get_iterate(ips)) + copyto!(N[2], q[N, 2], ips.μ) + copyto!(N[3], q[N, 3], ips.λ) + copyto!(N[4], q[N, 4], ips.s) + set_iterate!(ips.step_state, M, q) + # generate current full gradient in step state + X = get_gradient(ips.step_state) + copyto!(N[1], X[N, 1], ips.X) + (m > 0) && (copyto!(N[2], X[N, 2], ips.Z)) + (n > 0) && (copyto!(N[3], X[N, 3], ips.Y)) + (m > 0) && (copyto!(N[4], X[N, 4], ips.W)) + set_gradient!(ips.step_state, M, q, X) + # Update centrality factor – Maybe do this as an update function? + γ = get_manopt_parameter(ips.stepsize, :DecreaseCondition, :γ) + if !isnothing(γ) + set_manopt_parameter!(ips.stepsize, :DecreaseCondition, :γ, (γ + 0.5) / 2) + end + set_manopt_parameter!(ips.stepsize, :DecreaseCondition, :τ, N, q) + # determine stepsize + α = ips.stepsize(ips.step_problem, ips.step_state, i) + # Update Parameters and slack + retract!(M, ips.p, ips.p, α * ips.X, ips.retraction_method) + if m > 0 + ips.μ += α * ips.Y + ips.s += α * ips.W + ips.ρ = ips.μ'ips.s / m + # we can use the memory from above still + ips.σ = calculate_σ(M, cmo, ips.p, ips.μ, ips.λ, ips.s; N=N, q=q) + end + (n > 0) && (ips.λ += α * ips.Z) + return ips +end + +get_solver_result(ips::InteriorPointNewtonState) = ips.p diff --git a/src/solvers/proximal_bundle_method.jl b/src/solvers/proximal_bundle_method.jl index 5339c6a4c4..26afda17f9 100644 --- a/src/solvers/proximal_bundle_method.jl +++ b/src/solvers/proximal_bundle_method.jl @@ -29,6 +29,7 @@ stores option values for a [`proximal_bundle_method`](@ref) solver. * `μ`: (`0.5`) (initial) proximal parameter for the subproblem * `ν`: the stopping parameter given by ``ν = - μ |d|^2 - c`` * `sub_problem`: a function evaluating with new allocations that solves the sub problem on `M` given the last serious iterate `p_last_serious`, the linearization errors `linearization_errors`, and the transported subgradients `transported_subgradients`, +* `sub_state`: an [`AbstractManoptSolverState`](@ref) for the subsolver # Constructor @@ -42,7 +43,7 @@ mutable struct ProximalBundleMethodState{ P, T, Pr, - St, + St<:AbstractManoptSolverState, R<:Real, IR<:AbstractInverseRetractionMethod, TR<:AbstractRetractionMethod, @@ -90,12 +91,11 @@ mutable struct ProximalBundleMethodState{ δ::R=1.0, μ::R=0.5, sub_problem::Pr=proximal_bundle_method_subsolver, - sub_state::St=AllocatingEvaluation(), + sub_state::Union{AbstractEvaluationType,AbstractManoptSolverState}=AllocatingEvaluation(), ) where { P, T, Pr, - St, R<:Real, IR<:AbstractInverseRetractionMethod, TM<:AbstractManifold, @@ -103,6 +103,7 @@ mutable struct ProximalBundleMethodState{ SC<:StoppingCriterion, VT<:AbstractVectorTransportMethod, } + sub_state_storage = maybe_wrap_evaluation_type(sub_state) # Initialize index set, bundle points, linearization errors, and stopping parameter approx_errors = [zero(R)] bundle = [(copy(M, p), copy(M, p, X))] @@ -110,11 +111,12 @@ mutable struct ProximalBundleMethodState{ d = copy(M, p, X) lin_errors = [zero(R)] transported_subgradients = [copy(M, p, X)] + sub_state_storage = maybe_wrap_evaluation_type(sub_state) α = zero(R) λ = [zero(R)] η = zero(R) ν = zero(R) - return new{P,T,Pr,St,R,IR,TR,SC,VT}( + return new{P,T,Pr,typeof(sub_state_storage),R,IR,TR,SC,VT}( approx_errors, bundle, c, @@ -139,7 +141,7 @@ mutable struct ProximalBundleMethodState{ μ, ν, sub_problem, - sub_state, + sub_state_storage, ) end end @@ -265,12 +267,13 @@ function proximal_bundle_method!( δ=-1.0,#0.0, μ=0.5,#1.0, sub_problem=proximal_bundle_method_subsolver, - sub_state=evaluation, + sub_state::Union{AbstractEvaluationType,AbstractManoptSolverState}=evaluation, kwargs..., #especially may contain debug ) where {TF,TdF,TRetr,IR,VTransp} sgo = ManifoldSubgradientObjective(f, ∂f!!; evaluation=evaluation) dsgo = decorate_objective!(M, sgo; kwargs...) mp = DefaultManoptProblem(M, dsgo) + sub_state_storage = maybe_wrap_evaluation_type(sub_state) pbms = ProximalBundleMethodState( M, p; @@ -285,14 +288,14 @@ function proximal_bundle_method!( δ=δ, μ=μ, sub_problem=sub_problem, - sub_state=sub_state, + sub_state=sub_state_storage, ) pbms = decorate_state!(pbms; kwargs...) return get_solver_return(solve!(mp, pbms)) end function initialize_solver!( mp::AbstractManoptProblem, pbms::ProximalBundleMethodState{P,T,Pr,St,R} -) where {P,T,Pr,St,R} +) where {P,T,Pr,St<:AbstractManoptSolverState,R<:Real} M = get_manifold(mp) copyto!(M, pbms.p_last_serious, pbms.p) get_subgradient!(mp, pbms.X, pbms.p) @@ -414,7 +417,7 @@ get_solver_result(pbms::ProximalBundleMethodState) = pbms.p_last_serious # Dispatching on different types of subsolvers # (a) closed form allocating function _proximal_bundle_subsolver!( - M, pbms::ProximalBundleMethodState{P,T,F,AllocatingEvaluation} + M, pbms::ProximalBundleMethodState{P,T,F,ClosedFormSubSolverState{AllocatingEvaluation}} ) where {P,T,F} pbms.λ = pbms.sub_problem( M, pbms.p_last_serious, pbms.μ, pbms.approx_errors, pbms.transported_subgradients @@ -423,7 +426,7 @@ function _proximal_bundle_subsolver!( end # (b) closed form in-place function _proximal_bundle_subsolver!( - M, pbms::ProximalBundleMethodState{P,T,F,InplaceEvaluation} + M, pbms::ProximalBundleMethodState{P,T,F,ClosedFormSubSolverState{InplaceEvaluation}} ) where {P,T,F} pbms.sub_problem( M, diff --git a/src/solvers/trust_regions.jl b/src/solvers/trust_regions.jl index cfbba2ed64..0a8f671314 100644 --- a/src/solvers/trust_regions.jl +++ b/src/solvers/trust_regions.jl @@ -65,7 +65,14 @@ A trust region state, where the sub problem is solved in closed form by a functi [`trust_regions`](@ref), [`trust_regions!`](@ref) """ mutable struct TrustRegionsState{ - P,T,Pr,St,SC<:StoppingCriterion,RTR<:AbstractRetractionMethod,R<:Real,Proj + P, + T, + Pr, + St<:AbstractManoptSolverState, + SC<:StoppingCriterion, + RTR<:AbstractRetractionMethod, + R<:Real, + Proj, } <: AbstractSubProblemSolverState p::P X::T @@ -111,7 +118,16 @@ mutable struct TrustRegionsState{ reduction_factor=0.25, augmentation_factor=2.0, σ::R=random ? 1e-6 : 0.0, - ) where {P,T,Pr,St,SC<:StoppingCriterion,RTR<:AbstractRetractionMethod,R<:Real,Proj} + ) where { + P, + T, + Pr, + St<:AbstractManoptSolverState, + SC<:StoppingCriterion, + RTR<:AbstractRetractionMethod, + R<:Real, + Proj, + } trs = new{P,T,Pr,St,SC,RTR,R,Proj}() trs.p = p trs.X = X @@ -140,23 +156,20 @@ function TrustRegionsState( return TrustRegionsState(M, rand(M), sub_problem; kwargs...) end # No point but state -> add point -function TrustRegionsState( - M, sub_problem::Pr, sub_state::St; kwargs... -) where { - Pr<:Union{AbstractManoptProblem,T} where {T}, - St<:Union{AbstractManoptSolverState,AbstractEvaluationType}, -} +function TrustRegionsState(M, sub_problem, sub_state::AbstractManoptSolverState; kwargs...) return TrustRegionsState(M, rand(M), sub_problem, sub_state; kwargs...) end # point, but no state for a function -> add evaluation as state function TrustRegionsState( M, p, - sub_problem::Pr; + sub_problem::Function; evaluation::AbstractEvaluationType=AllocatingEvaluation(), kwargs..., -) where {Pr<:Function} - return TrustRegionsState(M, p, sub_problem, evaluation; kwargs...) +) + return TrustRegionsState( + M, p, sub_problem, ClosedFormSubSolverState(evaluation); kwargs... + ) end function TrustRegionsState( M, p, mho::H; kwargs... @@ -170,7 +183,7 @@ function TrustRegionsState( M::TM, p::P, sub_problem::Pr, - sub_state::St=TruncatedConjugateGradientState( + sub_state::Union{AbstractEvaluationType,AbstractManoptSolverState}=TruncatedConjugateGradientState( TangentSpace(M, copy(M, p)), zero_vector(M, p) ); X::T=zero_vector(M, p), @@ -191,7 +204,6 @@ function TrustRegionsState( ) where { TM<:AbstractManifold, Pr<:AbstractManoptProblem, - St, P, T, R<:Real, @@ -199,7 +211,8 @@ function TrustRegionsState( RTR<:AbstractRetractionMethod, Proj, } - return TrustRegionsState{P,T,Pr,St,SC,RTR,R,Proj}( + sub_state_storage = maybe_wrap_evaluation_type(sub_state) + return TrustRegionsState{P,T,Pr,typeof(sub_state_storage),SC,RTR,R,Proj}( p, X, trust_region_radius, @@ -212,7 +225,7 @@ function TrustRegionsState( reduction_threshold, augmentation_threshold, sub_problem, - sub_state, + sub_state_storage, project!, reduction_factor, augmentation_factor, diff --git a/test/helpers/test_manifold_extra_functions.jl b/test/helpers/test_manifold_extra_functions.jl index 71e9e5df3d..d924887b5c 100644 --- a/test/helpers/test_manifold_extra_functions.jl +++ b/test/helpers/test_manifold_extra_functions.jl @@ -62,7 +62,6 @@ Random.seed!(42) # Without being too far away -> classical mid point @test mid_point(M, 0, 0.1, π / 2) == mid_point(M, 0, 0.1) end - @testset "max_stepsize" begin M = Sphere(2) TM = TangentBundle(M) @@ -103,4 +102,8 @@ Random.seed!(42) @test Manopt.max_stepsize(M) ≈ 6.0 @test Manopt.max_stepsize(M, [-1, 0.5]) ≈ 4.0 end + @testset "Vector space default" begin + @test Manopt.Rn(Val(:Manopt), 3) isa ManifoldsBase.DefaultManifold + @test Manopt.Rn(Val(:Manifolds), 3) isa Euclidean + end end diff --git a/test/plans/test_conjugate_residual_plan.jl b/test/plans/test_conjugate_residual_plan.jl new file mode 100644 index 0000000000..65b0d13017 --- /dev/null +++ b/test/plans/test_conjugate_residual_plan.jl @@ -0,0 +1,84 @@ + +using Manifolds, Manopt, Test + +@testset "Conjugate Residual Plan" begin + M = ℝ^2 + p = [1.0, 1.0] + TpM = TangentSpace(M, p) + + Am = [2.0 1.0; 1.0 4.0] + bv = [1.0, 2.0] + ps = Am \ (-bv) + X0 = [3.0, 4.0] + A(TpM, X, V) = Am * V + b(TpM, p) = bv + A!(M, W, X, V) = (W .= Am * V) + b!(M, W, p) = (W .= bv) + + slso = SymmetricLinearSystemObjective(A, b) + slso2 = SymmetricLinearSystemObjective(A!, b!; evaluation=InplaceEvaluation()) + @testset "Objective" begin + grad_value = A(TpM, p, X0) + b(TpM, p) + cost_value = 0.5 * norm(M, p, grad_value)^2 + @test get_cost(TpM, slso, X0) ≈ cost_value + @test get_cost(TpM, slso2, X0) ≈ cost_value + + @test Manopt.get_b(TpM, slso, X0) == bv + @test Manopt.get_b(TpM, slso2, X0) == bv + + @test get_gradient(TpM, slso, X0) == grad_value + @test get_gradient(TpM, slso2, X0) == grad_value + Y0 = similar(X0) + @test get_gradient!(TpM, Y0, slso, X0) == grad_value + @test Y0 == grad_value + zero_vector!(TpM, Y0, X0) + @test get_gradient!(TpM, Y0, slso2, X0) == grad_value + @test Y0 == grad_value + + hessAX0 = A(TpM, p, X0) + @test get_hessian(TpM, slso, p, X0) == hessAX0 + @test get_hessian(TpM, slso2, p, X0) == hessAX0 + zero_vector!(TpM, Y0, X0) + @test get_hessian!(TpM, Y0, slso, p, X0) == hessAX0 + @test Y0 == hessAX0 + zero_vector!(TpM, Y0, X0) + @test get_hessian!(TpM, Y0, slso2, p, X0) == hessAX0 + @test Y0 == hessAX0 + end + @testset "Conjugate residual state" begin + crs = ConjugateResidualState(TpM, slso) + @test set_iterate!(crs, TpM, X0) == crs # setters return state + @test get_iterate(crs) == X0 + @test set_gradient!(crs, TpM, X0) == crs # setters return state + @test get_gradient(crs) == X0 + @test startswith( + repr(crs), "# Solver state for `Manopt.jl`s Conjugate Residual Method" + ) + crs2 = ConjugateResidualState(TpM, slso2) + @test set_iterate!(crs2, TpM, X0) == crs2 # setters return state + @test get_iterate(crs2) == X0 + @test set_gradient!(crs2, TpM, X0) == crs2 # setters return state + @test get_gradient(crs2) == X0 + @test startswith( + repr(crs2), "# Solver state for `Manopt.jl`s Conjugate Residual Method" + ) + end + @testset "StopWhenRelativeResidualLess" begin + dmp = DefaultManoptProblem(TpM, slso) + crs = ConjugateResidualState(TpM, slso; X=X0) + swrr = StopWhenRelativeResidualLess(1.0, 1e-3) #initial norm 1.0, ε=1e-9 + @test startswith(repr(swrr), "StopWhenRelativeResidualLess(1.0, 0.001)") + # initially this resets norm + swrr(dmp, crs, 0) + @test swrr.c == norm(bv) + @test swrr(dmp, crs, 1) == false + # sop reason is also empty still + @test length(get_reason(swrr)) == 0 + # Manually set residual small + crs.r = [1e-5, 1e-5] + @test swrr(dmp, crs, 2) == true + @test swrr.norm_r == norm(crs.r) + @test length(get_reason(swrr)) > 0 + @test Manopt.indicates_convergence(swrr) + end +end diff --git a/test/plans/test_constrained_plan.jl b/test/plans/test_constrained_plan.jl index e30f3f82ca..0a3deb84c6 100644 --- a/test/plans/test_constrained_plan.jl +++ b/test/plans/test_constrained_plan.jl @@ -54,6 +54,19 @@ include("../utils/dummy_types.jl") grad_h1!(M, X, p) = (X .= [0.0, 0.0, 2.0]) hess_h1(M, p, X) = [0.0, 0.0, 0.0] hess_h1!(M, Y, p, X) = (Y .= [0.0, 0.0, 0.0]) + + #A set of values for an example point and tangent + p = [1.0, 2.0, 3.0] + c = [[0.0, -3.0], [5.0]] + fp = 14.0 + gg = [[1.0, 0.0, 0.0], [0.0, -1.0, 0.0]] + gh = [[0.0, 0.0, 2.0]] + gf = 2 * p + X = [1.0, 0.0, 0.0] + hf = [2.0, 2.0, 2.0] + hg = [X, -X] + hh = [[0.0, 0.0, 0.0]] + cofa = ConstrainedManifoldObjective( f, grad_f, g, grad_g, h, grad_h; inequality_constraints=2, equality_constraints=1 ) @@ -184,16 +197,6 @@ include("../utils/dummy_types.jl") gradient_inequality_range=ArrayPowerRepresentation(), ) - p = [1.0, 2.0, 3.0] - c = [[0.0, -3.0], [5.0]] - gg = [[1.0, 0.0, 0.0], [0.0, -1.0, 0.0]] - gh = [[0.0, 0.0, 2.0]] - gf = 2 * p - X = [1.0, 0.0, 0.0] - hf = [2.0, 2.0, 2.0] - hg = [X, -X] - hh = [[0.0, 0.0, 0.0]] - @testset "ConstrainedManoptProblem special cases" begin Y = zero_vector(M, p) for mcp in [mp, cop] @@ -351,7 +354,192 @@ include("../utils/dummy_types.jl") end end end + @testset "is_feasible & DebugFeasibility" begin + coh = ConstrainedManifoldObjective( + f, + grad_f; + hess_f=hess_f, + g=g, + grad_g=grad_g, + hess_g=hess_g, + h=h, + grad_h=grad_h, + hess_h=hess_h, + M=M, + ) + @test is_feasible(M, coh, [-2.0, 3.0, 0.5]; error=:info) + @test_throws ErrorException is_feasible(M, coh, p; error=:error) + @test_logs (:info,) !is_feasible(M, coh, p; error=:info) + @test_logs (:warn,) !is_feasible(M, coh, p; error=:warn) + # Dummy state + st = Manopt.StepsizeState(p, X) + mp = DefaultManoptProblem(M, coh) + io = IOBuffer() + df = DebugFeasibility(; io=io) + @test repr(df) === "DebugFeasibility([\"feasible: \", :Feasible]; atol=1.0e-13)" + # short form: + @test Manopt.status_summary(df) === "(:Feasibility, [\"feasible: \", :Feasible])" + df(mp, st, 1) + @test String(take!(io)) == "feasible: No" + end + @testset "Lagrangians" begin + μ = [1.0, 1.0] + λ = [1.0] + β = 7.0 + s = [1.0, 2.0] + N = M × ℝ^2 × ℝ^1 × ℝ^2 + q = rand(N) + q[N, 1] = p + q[N, 2] = μ + q[N, 3] = λ + q[N, 4] = s + coh = ConstrainedManifoldObjective( + f, + grad_f; + hess_f=hess_f, + g=g, + grad_g=grad_g, + hess_g=hess_g, + h=h, + grad_h=grad_h, + hess_h=hess_h, + M=M, + ) + @testset "Lagrangian Cost, Grad and Hessian" begin + Lc = LagrangianCost(coh, μ, λ) + @test startswith(repr(Lc), "LagrangianCost") + Lg = LagrangianGradient(coh, μ, λ) + @test startswith(repr(Lg), "LagrangianGradient") + Lh = LagrangianHessian(coh, μ, λ) + @test startswith(repr(Lh), "LagrangianHessian") + @test Lc(M, p) == f(M, p) + g(M, p)'μ + h(M, p)'λ + @test Lg(M, p) == gf + sum(gg .* μ) + sum(gh .* λ) + LX = zero_vector(M, p) + Lg(M, LX, p) + @test LX == Lg(M, p) + @test Lh(M, p, X) == hf + sum(hg .* μ) + sum(hh .* λ) + Lh(M, LX, p, X) + @test LX == Lh(M, p, X) + # Get & Set + @test Manopt.set_manopt_parameter!(Lc, :μ, [2.0, 2.0]) == Lc + @test Manopt.get_manopt_parameter(Lc, :μ) == [2.0, 2.0] + @test Manopt.set_manopt_parameter!(Lc, :λ, [2.0]) == Lc + @test Manopt.get_manopt_parameter(Lc, :λ) == [2.0] + + @test Manopt.set_manopt_parameter!(Lg, :μ, [2.0, 2.0]) == Lg + @test Manopt.get_manopt_parameter(Lg, :μ) == [2.0, 2.0] + @test Manopt.set_manopt_parameter!(Lg, :λ, [2.0]) == Lg + @test Manopt.get_manopt_parameter(Lg, :λ) == [2.0] + @test Manopt.set_manopt_parameter!(Lh, :μ, [2.0, 2.0]) == Lh + @test Manopt.get_manopt_parameter(Lh, :μ) == [2.0, 2.0] + @test Manopt.set_manopt_parameter!(Lh, :λ, [2.0]) == Lh + @test Manopt.get_manopt_parameter(Lh, :λ) == [2.0] + end + @testset "Full KKT and its norm" begin + # Full KKT Vector field + KKTvf = KKTVectorField(coh) + @test startswith(repr(KKTvf), "KKTVectorField\n") + Xp = LagrangianGradient(coh, μ, λ)(M, p) #Xμ = g + s; Xλ = h, Xs = μ .* s + Y = KKTvf(N, q) + @test Y[N, 1] == Xp + @test Y[N, 2] == c[1] .+ s + @test Y[N, 3] == c[2] + @test Y[N, 4] == μ .* s + KKTvfJ = KKTVectorFieldJacobian(coh) + @test startswith(repr(KKTvfJ), "KKTVectorFieldJacobian\n") + # + Xp = + LagrangianHessian(coh, μ, λ)(M, p, Y[N, 1]) + + sum(Y[N, 2] .* gg) + + sum(Y[N, 3] .* gh) + Xμ = [inner(M, p, gg[i], Y[N, 1]) + Y[N, 4][i] for i in 1:length(gg)] + Xλ = [inner(M, p, gh[j], Y[N, 1]) for j in 1:length(gh)] + Z = KKTvfJ(N, q, Y) + @test Z[N, 1] == Xp + @test Z[N, 2] == Xμ + @test Z[N, 3] == Xλ + @test Z[N, 4] == μ .* Y[N, 4] + s .* Y[N, 2] + + KKTvfAdJ = KKTVectorFieldAdjointJacobian(coh) + @test startswith(repr(KKTvfAdJ), "KKTVectorFieldAdjointJacobian\n") + Xp2 = + LagrangianHessian(coh, μ, λ)(M, p, Y[N, 1]) + + sum(Y[N, 2] .* gg) + + sum(Y[N, 3] .* gh) + Xμ2 = [inner(M, p, gg[i], Y[N, 1]) + s[i] * Y[N, 4][i] for i in 1:length(gg)] + Xλ2 = [inner(M, p, gh[j], Y[N, 1]) for j in 1:length(gh)] + Z2 = KKTvfAdJ(N, q, Y) + @test Z2[N, 1] == Xp2 + @test Z2[N, 2] == Xμ2 + @test Z2[N, 3] == Xλ2 + @test Z2[N, 4] == μ .* Y[N, 4] + Y[N, 2] + + # Full KKT Vector field norm – the Merit function + KKTvfN = KKTVectorFieldNormSq(coh) + @test startswith(repr(KKTvfN), "KKTVectorFieldNormSq\n") + vfn = KKTvfN(N, q) + @test vfn == norm(N, q, Y)^2 + KKTvfNG = KKTVectorFieldNormSqGradient(coh) + @test startswith(repr(KKTvfNG), "KKTVectorFieldNormSqGradient\n") + Zg1 = KKTvf(N, q) + Zg2 = 2.0 * KKTvfAdJ(N, q, Zg1) + W = KKTvfNG(N, q) + @test W == Zg2 + end + @testset "Condensed KKT, Jacobian" begin + CKKTvf = CondensedKKTVectorField(coh, μ, s, β) + @test startswith(repr(CKKTvf), "CondensedKKTVectorField\n") + b1 = + gf + + sum(λ .* gh) + + sum(μ .* gg) + + sum(((μ ./ s) .* (μ .* (c[1] .+ s) .+ β .- μ .* s)) .* gg) + b2 = c[2] + Nc = M × ℝ^1 # (p,λ) + qc = rand(Nc) + qc[Nc, 1] = p + qc[Nc, 2] = λ + V = CKKTvf(Nc, qc) + @test V[Nc, 1] == b1 + @test V[Nc, 2] == b2 + V2 = copy(Nc, qc, V) + CKKTvf(Nc, V2, qc) + @test V2 == V + CKKTVfJ = CondensedKKTVectorFieldJacobian(coh, μ, s, β) + @test startswith(repr(CKKTVfJ), "CondensedKKTVectorFieldJacobian\n") + Yc = zero_vector(Nc, qc) + Yc[Nc, 1] = [1.0, 3.0, 5.0] + Yc[Nc, 2] = [7.0] + # Compute by hand – somehow the formula is still missing a Y + Wc = zero_vector(Nc, qc) + # (1) Hess L + The g sum + the grad g sum + Wc[N, 1] = hf + sum(hg .* μ) + sum(hh .* Yc[2]) + # (2) grad g terms + Wc[N, 1] += sum( + (μ ./ s) .* + [inner(Nc[1], qc[N, 1], gg[i], Yc[Nc, 1]) for i in 1:length(gg)] .* gg, + ) + # (3) grad h terms (note the Y_2 component) + Wc[N, 1] += sum(Yc[N, 2] .* gh) + # Second component, just h terms + Wc[N, 2] = [inner(Nc[1], qc[N, 1], gh[j], Yc[Nc, 1]) for j in 1:length(gh)] + W = CKKTVfJ(Nc, qc, Yc) + W2 = copy(Nc, qc, Yc) + CKKTVfJ(Nc, W2, qc, Yc) + @test W2 == W + @test Wc == W + # get & set + for ck in [CKKTvf, CKKTVfJ] + @test Manopt.set_manopt_parameter!(ck, :μ, [2.0, 2.0]) == ck + @test Manopt.get_manopt_parameter(ck, :μ) == [2.0, 2.0] + @test Manopt.set_manopt_parameter!(ck, :s, [2.0, 2.0]) == ck + @test Manopt.get_manopt_parameter(ck, :s) == [2.0, 2.0] + @test Manopt.set_manopt_parameter!(ck, :β, 2.0) == ck + @test Manopt.get_manopt_parameter(ck, :β) == 2.0 + end + end + end @testset "Augmented Lagrangian Cost & Grad" begin μ = [1.0, 1.0] λ = [1.0] @@ -371,6 +559,10 @@ include("../utils/dummy_types.jl") @test gALC(M, p) ≈ ag gALC(M, X, p) @test gALC(M, X, p) ≈ ag + @test Manopt.set_manopt_parameter!(ALC, :ρ, 2 * ρ) == ALC + @test Manopt.get_manopt_parameter(ALC, :ρ) == 2 * ρ + @test Manopt.set_manopt_parameter!(gALC, :ρ, 2 * ρ) == gALC + @test Manopt.get_manopt_parameter(gALC, :ρ) == 2 * ρ end end end diff --git a/test/plans/test_interior_point_newton_plan.jl b/test/plans/test_interior_point_newton_plan.jl new file mode 100644 index 0000000000..148d38c2fe --- /dev/null +++ b/test/plans/test_interior_point_newton_plan.jl @@ -0,0 +1,95 @@ +using ManifoldsBase, Manifolds, Manopt, Test + +@testset "InteriorPointNewtonState" begin + M = ManifoldsBase.DefaultManifold(3) + # Cost + f(::ManifoldsBase.DefaultManifold, p) = norm(p)^2 + grad_f(M, p) = 2 * p + hess_f(M, p, X) = [2.0, 2.0, 2.0] + # Inequality constraints + g(M, p) = [p[1] - 1, -p[2] - 1] + # # Function + grad_g(M, p) = [[1.0, 0.0, 0.0], [0.0, -1.0, 0.0]] + hess_g(M, p, X) = [copy(X), -copy(X)] + + h(M, p) = [2 * p[3] - 1] + grad_h(M, p) = [[0.0, 0.0, 2.0]] + hess_h(M, p, X) = [[0.0, 0.0, 0.0]] + + #A set of values for an example point and tangent + p = [1.0, 2.0, 3.0] + c = [[0.0, -3.0], [5.0]] + fp = 14.0 + gg = [[1.0, 0.0, 0.0], [0.0, -1.0, 0.0]] + gh = [[0.0, 0.0, 2.0]] + gf = 2 * p + X = [1.0, 0.0, 0.0] + hf = [2.0, 2.0, 2.0] + hg = [X, -X] + hh = [[0.0, 0.0, 0.0]] + + μ = [1.0, 1.0] + λ = [1.0] + β = 7.0 + s = [1.0, 2.0] + β = 6.0 + step_M = M × ℝ^2 × ℝ^1 × ℝ^2 + step_p = rand(step_M) + step_p[step_M, 1] = p + step_p[step_M, 2] = μ + step_p[step_M, 3] = λ + step_p[step_M, 4] = s + sub_M = M × ℝ^1 + sub_p = rand(sub_M) + sub_p[sub_M, 1] = p + sub_p[sub_M, 2] = λ + coh = ConstrainedManifoldObjective( + f, + grad_f; + hess_f=hess_f, + g=g, + grad_g=grad_g, + hess_g=hess_g, + h=h, + grad_h=grad_h, + hess_h=hess_h, + M=M, + ) + sub_obj = SymmetricLinearSystemObjective( + CondensedKKTVectorFieldJacobian(coh, μ, s, β), CondensedKKTVectorField(coh, μ, s, β) + ) + sub_state = ConjugateResidualState(TangentSpace(sub_M, sub_p), sub_obj) + dmp = DefaultManoptProblem(M, coh) + ipns = InteriorPointNewtonState( + M, coh, p, DefaultManoptProblem(sub_M, sub_obj), sub_state + ) + # Getters & Setters + @test length(Manopt.get_message(ipns)) == 0 + @test set_iterate!(ipns, M, 2 * p) == ipns + @test get_iterate(ipns) == 2 * p + @test set_gradient!(ipns, M, 3 * p) == ipns + @test get_gradient(ipns) == 3 * p + show_str = "# Solver state for `Manopt.jl`s Interior Point Newton Method\n" + @test startswith(repr(ipns), show_str) + # + sc = StopWhenKKTResidualLess(1e-5) + @test length(get_reason(sc)) == 0 + @test !sc(dmp, ipns, 1) #not yet reached + @test Manopt.indicates_convergence(sc) + @test startswith(repr(sc), "StopWhenKKTResidualLess(1.0e-5)\n") + # Fake stop + sc.residual = 1e-7 + sc.at_iteration = 1 + @test length(get_reason(sc)) > 0 + # + ipcc = InteriorPointCentralityCondition(coh, 1.0) + @test Manopt.set_manopt_parameter!(ipcc, :τ, step_M, step_p) == ipcc + @test Manopt.set_manopt_parameter!(ipcc, :γ, 2.0) == ipcc + @test Manopt.get_manopt_parameter(ipcc, :γ) == 2.0 + @test Manopt.get_manopt_parameter(ipcc, :τ1) == 2 / 3 + @test Manopt.get_manopt_parameter(ipcc, :τ2) ≈ 0.2809757 atol = 1e-7 + @test !ipcc(step_M, step_p) + ipcc.τ1 = 0.01 # trick conditions so ipcc succeeds + ipcc.τ2 = 0.01 + @test ipcc(step_M, step_p) +end diff --git a/test/plans/test_state.jl b/test/plans/test_state.jl index c9030dee7a..fc77d189f0 100644 --- a/test/plans/test_state.jl +++ b/test/plans/test_state.jl @@ -89,6 +89,10 @@ struct NoIterateState <: AbstractManoptSolverState end @test d2.state.X == ones(3) @test get_stopping_criterion(d2) === s2.stop end + @testset "Closed Form State" begin + @test Manopt.ClosedFormSubSolverState() isa + Manopt.ClosedFormSubSolverState{AllocatingEvaluation} + end @testset "Generic Objective and State solver returns" begin f(M, p) = 1 o = ManifoldCostObjective(f) diff --git a/test/plans/test_stepsize.jl b/test/plans/test_stepsize.jl index 440c225067..66ff5df647 100644 --- a/test/plans/test_stepsize.jl +++ b/test/plans/test_stepsize.jl @@ -26,6 +26,13 @@ using Manopt, Manifolds, Test # no stepsize yet so `repr` and summary are the same @test repr(s4) == Manopt.status_summary(s4) @test Manopt.get_message(s4) == "" + @testset "Armijo setter / getters" begin + # Check that the passdowns work, though; since the defaults are functions, they return nothing + @test isnothing(Manopt.get_manopt_parameter(s, :IncreaseCondition, :Dummy)) + @test isnothing(Manopt.get_manopt_parameter(s, :DecreaseCondition, :Dummy)) + @test Manopt.set_manopt_parameter!(s, :IncreaseCondition, :Dummy, 1) == s #setters return s + @test Manopt.set_manopt_parameter!(s, :DecreaseCondition, :Dummy, 1) == s + end @testset "Linesearch safeguards" begin M = Euclidean(2) f(M, p) = sum(p .^ 2) diff --git a/test/runtests.jl b/test/runtests.jl index f39af5cda2..da48b4ce85 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -12,8 +12,9 @@ include("utils/example_tasks.jl") include("plans/test_debug.jl") include("plans/test_difference_of_convex_plan.jl") include("plans/test_embedded.jl") - include("plans/test_storage.jl") include("plans/test_cache.jl") + include("plans/test_conjugate_residual_plan.jl") + include("plans/test_interior_point_newton_plan.jl") include("plans/test_nelder_mead_plan.jl") include("plans/test_gradient_plan.jl") include("plans/test_constrained_plan.jl") @@ -26,6 +27,7 @@ include("utils/example_tasks.jl") include("plans/test_stepsize.jl") include("plans/test_stochastic_gradient_plan.jl") include("plans/test_stopping_criteria.jl") + include("plans/test_storage.jl") include("plans/test_subgradient_plan.jl") include("plans/test_vectorial_plan.jl") end @@ -44,10 +46,12 @@ include("utils/example_tasks.jl") include("solvers/test_conjugate_gradient.jl") include("solvers/test_difference_of_convex.jl") include("solvers/test_Douglas_Rachford.jl") + include("solvers/test_conjugate_residual.jl") include("solvers/test_cyclic_proximal_point.jl") include("solvers/test_exact_penalty.jl") include("solvers/test_Frank_Wolfe.jl") include("solvers/test_gradient_descent.jl") + include("solvers/test_interior_point_Newton.jl") include("solvers/test_Levenberg_Marquardt.jl") include("solvers/test_Nelder_Mead.jl") include("solvers/test_proximal_bundle_method.jl") diff --git a/test/solvers/test_adaptive_regularization_with_cubics.jl b/test/solvers/test_adaptive_regularization_with_cubics.jl index f2a11a4bfb..8b6a65589f 100644 --- a/test/solvers/test_adaptive_regularization_with_cubics.jl +++ b/test/solvers/test_adaptive_regularization_with_cubics.jl @@ -132,14 +132,18 @@ include("../utils/example_tasks.jl") f1(M, p) = p f1!(M, q, p) = copyto!(M, q, p) r = copy(M, p1) - Manopt.solve_arc_subproblem!(M, r, f1, AllocatingEvaluation(), p0) + Manopt.solve_arc_subproblem!( + M, r, f1, Manopt.ClosedFormSubSolverState{AllocatingEvaluation}(), p0 + ) @test r == p0 r = copy(M, p1) - Manopt.solve_arc_subproblem!(M, r, f1!, InplaceEvaluation(), p0) + Manopt.solve_arc_subproblem!( + M, r, f1!, Manopt.ClosedFormSubSolverState{InplaceEvaluation}(), p0 + ) @test r == p0 # Dummy construction with a function for the `sub_problem` arcs4 = AdaptiveRegularizationState(M, p0; sub_problem=f1) - @test arcs4.sub_state isa AbstractEvaluationType + @test arcs4.sub_state isa Manopt.ClosedFormSubSolverState end @testset "A few solver runs" begin diff --git a/test/solvers/test_conjugate_residual.jl b/test/solvers/test_conjugate_residual.jl new file mode 100644 index 0000000000..c55f91d0f4 --- /dev/null +++ b/test/solvers/test_conjugate_residual.jl @@ -0,0 +1,21 @@ +using Manifolds, Manopt, Test + +@testset "Conjugate Residual" begin + M = ℝ^2 + p = [1.0, 1.0] + TpM = TangentSpace(M, p) + + Am = [2.0 1.0; 1.0 4.0] + bv = [1.0, 2.0] + ps = Am \ (-bv) + X0 = [3.0, 4.0] + A(M, X, V) = Am * V + b(M, p) = bv + + slso = SymmetricLinearSystemObjective(A, b) + pT = conjugate_residual(TpM, slso, X0) + pT2 = conjugate_residual(TpM, A, b, X0) + @test norm(ps - pT) < 3e-15 + @test norm(pT2 - pT) < 3e-15 + @test get_cost(TpM, slso, pT) < 5e-15 +end diff --git a/test/solvers/test_difference_of_convex.jl b/test/solvers/test_difference_of_convex.jl index a07b719e68..c9b1e469dc 100644 --- a/test/solvers/test_difference_of_convex.jl +++ b/test/solvers/test_difference_of_convex.jl @@ -65,7 +65,7 @@ import Manifolds: inner dcps = DifferenceOfConvexProximalState( #Initialize with random point M, dcppa_sub_problem, - dcppa_sub_objective, + dcppa_sub_state, ) set_iterate!(dcps, M, p1) @test dcps.p == p1 diff --git a/test/solvers/test_interior_point_Newton.jl b/test/solvers/test_interior_point_Newton.jl new file mode 100644 index 0000000000..ddd646a97d --- /dev/null +++ b/test/solvers/test_interior_point_Newton.jl @@ -0,0 +1,123 @@ +using Manifolds, Manopt, LinearAlgebra, Random, Test + +@testset "Interior Point Newton Solver" begin + @testset "A solver run on the Sphere" begin + # We can take a look at debug prints of one run and plot the result + # on CI and when running with ] test Manopt, both have to be set to false. + _debug_iterates_plot = false + _debug = false + + A = -[1.0 0.0 0.0; 0.0 1.0 0.0; 0.0 0.0 2.0] + f(M, p) = 0.5 * p' * A * p + grad_f(M, p) = (I - p * p') * A * p + Hess_f(M, p, X) = A * X - (p' * A * X) * p - (p' * A * p) * X + + g(M, p) = -p + grad_g(M, p) = [(p * p' - I)[:, i] for i in 1:3] + Hess_g(M, p, X) = [(X * p')[:, i] for i in 1:3] + M = Manifolds.Sphere(2) + + p_0 = (1.0 / (sqrt(3.0))) .* [1.0, 1.0, 1.0] + # p_0 = 1.0 / sqrt(2) .* [0.0, 1.0, 1.0] + p_opt = [0.0, 0.0, 1.0] + record = [:Iterate] + dbg = [ + :Iteration, + " ", + :Cost, + " ", + :Stepsize, + " ", + :Change, + " ", + :Feasibility, + "\n", + :Stop, + 10, + ] + + sc = StopAfterIteration(800) | StopWhenKKTResidualLess(1e-2) + # (a) classical call w/ recording + res = interior_point_Newton( + M, + f, + grad_f, + Hess_f, + p_0; + g=g, + grad_g=grad_g, + Hess_g=Hess_g, + stopping_criterion=sc, + debug=_debug ? dbg : [], + record=_debug_iterates_plot ? record : [], + return_state=true, + return_objective=true, + ) + + q = get_solver_result(res) + @test distance(M, q, [0.0, 0.0, 1.0]) < 2e-4 + + # (b) inplace call + q2 = copy(M, p_0) + interior_point_Newton!( + M, + f, + grad_f, + Hess_f, + q2; + g=g, + grad_g=grad_g, + Hess_g=Hess_g, + stopping_criterion=sc, + ) + @test q == q2 + + # (c) call with objective - but then we also test the Centrality cond + coh = ConstrainedManifoldObjective( + f, grad_f, g, grad_g, nothing, nothing; hess_f=Hess_f, hess_g=Hess_g, M=M, p=p_0 + ) + ipcc = InteriorPointCentralityCondition(coh, 0.9) + q3 = interior_point_Newton( + M, coh, p_0; stopping_criterion=sc, centrality_condition=ipcc + ) + @test distance(M, q3, [0.0, 0.0, 1.0]) < 2e-4 + if _debug_iterates_plot + using GLMakie, Makie, GeometryTypes + rec = get_record(res[2]) + prepend!(rec, [p_0]) + add_scale = 0.0075 + rec .+= add_scale * rec # scale slighly to lie on the sphere + n = 30 + π1(x) = x[1] + π2(x) = x[2] + π3(x) = x[3] + h(x) = [cos(x[1])sin(x[2]), sin(x[1])sin(x[2]), cos(x[2])] + U = [[θ, ϕ] for θ in LinRange(0, 2π, n), ϕ in LinRange(0, π, n)] + V = [[θ, ϕ] for θ in LinRange(0, π / 2, n), ϕ in LinRange(0, π / 2, n)] + + pts, pts_ = h.(U), h.(V) + f_ = p -> f(M, p) + s = maximum(f_.(pts)) - minimum(f_.(pts)) + s_ = maximum(f_.(pts_)) - minimum(f_.(pts_)) + x1, x2, x3, x1_, x2_ = π1.(pts), π2.(pts), π3.(pts), π1.(pts_), π2.(pts_) + x3_ = π3.(pts_) + grads = grad_f.(Ref(M), pts) + normgrads = grads ./ norm.(grads) + v1, v2, v3 = π1.(normgrads), π2.(normgrads), π3.(normgrads) + + scene = Scene() + cam3d!(scene) + range_f = (minimum(f_.(pts)), maximum(f_.(pts))) + + pa = [:color => f_.(pts), :backlight => 1.0f0, :colorrange => range_f] + # light colormap on sphere + surface!(scene, x1, x2, x3; colormap=(:viridis, 0.4), pa...) + # ful color on feasible set + surface!(scene, x1_, x2_, x3_; colormap=(:viridis, 1.0), backlight=1.0f0, pa...) + scatter!(scene, π1.(rec), π2.(rec), π3.(rec); color=:black, markersize=8) + P = [(1 + add_scale) .* p_opt] + scatter!(scene, π1.(P), π2.(P), π3.(P); color=:white, markersize=9) + display(scene) + end + end +end diff --git a/test/solvers/test_primal_dual_semismooth_Newton.jl b/test/solvers/test_primal_dual_semismooth_Newton.jl index fb8a3254eb..ead3aea0a1 100644 --- a/test/solvers/test_primal_dual_semismooth_Newton.jl +++ b/test/solvers/test_primal_dual_semismooth_Newton.jl @@ -1,5 +1,6 @@ using Manopt, Manifolds, ManifoldsBase, Test -using ManoptExamples: adjoint_differential_forward_logs, differential_forward_logs +using ManoptExamples: + adjoint_differential_forward_logs, differential_forward_logs, project_collaborative_TV using ManifoldDiff: differential_shortest_geodesic_startpoint @testset "PD-RSSN" begin