From 4e843bb1b72066f261cb144b96a916b52d546d6a Mon Sep 17 00:00:00 2001 From: Hamzah Khan <2308491+khanh111@users.noreply.github.com> Date: Fri, 3 Mar 2023 10:10:51 -0600 Subject: [PATCH 1/4] [in progress] Implement a derived LQR Solver and add tests to validate it and the original one. --- src/StackelbergControlHypothesesFiltering.jl | 2 + .../FeedbackGainControlStrategy.jl | 10 +- src/costs/QuadraticCost.jl | 11 +- src/dynamics/LinearDynamics.jl | 8 +- src/solvers/LQRFeedbackSolver.jl | 11 +- src/solvers/NewLQRFeedbackSolver.jl | 121 +++++ test/TestLQSolvers.jl | 514 +++++++++++++----- test/runtests.jl | 4 +- 8 files changed, 519 insertions(+), 162 deletions(-) create mode 100644 src/solvers/NewLQRFeedbackSolver.jl diff --git a/src/StackelbergControlHypothesesFiltering.jl b/src/StackelbergControlHypothesesFiltering.jl index 64c887a8..277aaf79 100644 --- a/src/StackelbergControlHypothesesFiltering.jl +++ b/src/StackelbergControlHypothesesFiltering.jl @@ -19,6 +19,8 @@ include("solvers/LQNashFeedbackSolver.jl") include("solvers/LQStackelbergFeedbackSolver.jl") include("solvers/LQRFeedbackSolver.jl") +include("solvers/NewLQRFeedbackSolver.jl") + include("TwoStateParticleFilter.jl") end # module diff --git a/src/control_strategies/FeedbackGainControlStrategy.jl b/src/control_strategies/FeedbackGainControlStrategy.jl index f2c284b1..c192e05b 100644 --- a/src/control_strategies/FeedbackGainControlStrategy.jl +++ b/src/control_strategies/FeedbackGainControlStrategy.jl @@ -21,21 +21,21 @@ end # Define some getters. -function get_linear_feedback_term(strategy::FeedbackGainControlStrategy) +function get_linear_feedback_gains(strategy::FeedbackGainControlStrategy) return strategy.Ps end -function get_linear_feedback_term(strategy::FeedbackGainControlStrategy, player_idx::Int) +function get_linear_feedback_gains(strategy::FeedbackGainControlStrategy, player_idx::Int) return strategy.Ps[player_idx] end -function get_constant_feedback_term(strategy::FeedbackGainControlStrategy) +function get_constant_feedback_gains(strategy::FeedbackGainControlStrategy) return strategy.ps end -function get_constant_feedback_term(strategy::FeedbackGainControlStrategy, player_idx::Int) +function get_constant_feedback_gains(strategy::FeedbackGainControlStrategy, player_idx::Int) return strategy.ps[player_idx] end # Export a commonly used control strategy for convenience and its required method. -export FeedbackGainControlStrategy, apply_control_strategy, get_linear_feedback_terms, get_constant_feedback_terms +export FeedbackGainControlStrategy, apply_control_strategy, get_linear_feedback_gains, get_constant_feedback_gains diff --git a/src/costs/QuadraticCost.jl b/src/costs/QuadraticCost.jl index a62a5995..92dbccf1 100644 --- a/src/costs/QuadraticCost.jl +++ b/src/costs/QuadraticCost.jl @@ -34,7 +34,7 @@ function quadraticize_costs(c::QuadraticCost, time_range, x::AbstractVector{Floa # Fill control costs. num_players = length(c.Rs) for ii in 1:num_players - R̃ = homogenize_cost_matrix(c.Rs[ii], c.rs[ii], c.crs[ii];) + R̃ = homogenize_cost_matrix(c.Rs[ii], c.rs[ii], c.crs[ii]) add_control_cost!(q_cost, ii, R̃) end @@ -44,9 +44,8 @@ end function compute_cost(c::QuadraticCost, time_range, xh::AbstractVector{Float64}, uhs::AbstractVector{<:AbstractVector{Float64}}) num_players = length(uhs) out_size = size(c.Q, 1) - x = xh[1:out_size] - ctrl_sizes = [size(c.rs[ii], 1) for ii in 1:num_players] - us = [uhs[ii][1:ctrl_sizes[ii]] for ii in 1:num_players] + x = xh[1:size(xh,1)-1] + us = [uhs[ii][1:size(uhs[ii], 1)-1] for ii in 1:num_players] total = (1/2.) * (x' * c.Q * x + 2 * c.q' * x + c.cq) if !isempty(c.Rs) @@ -92,6 +91,10 @@ function get_constant_control_cost_term(c::QuadraticCost, player_idx::Int) return c.crs[player_idx] end +# Export the helpers. +export get_quadratic_state_cost_term, get_linear_state_cost_term, get_constant_state_cost_term, + get_quadratic_control_cost_term, get_linear_control_cost_term, get_constant_control_cost_term + # Export all the cost type. export QuadraticCost diff --git a/src/dynamics/LinearDynamics.jl b/src/dynamics/LinearDynamics.jl index 43614309..e4b810dd 100644 --- a/src/dynamics/LinearDynamics.jl +++ b/src/dynamics/LinearDynamics.jl @@ -34,14 +34,18 @@ export get_homogenized_state_dynamics_matrix, get_homogenized_control_dynamics_m # Get the linear term. -function get_linear_dynamics_term(dyn::Dynamics) +function get_linear_state_dynamics(dyn::Dynamics) return dyn.A end -function get_constant_dynamics_term(dyn::Dynamics) +function get_constant_state_dynamics(dyn::Dynamics) return dyn.a end +function get_control_dynamics(dyn::Dynamics, player_idx::Int) + return dyn.Bs[player_idx] +end + # A function definition that does not accept process noise input and reroutes to the type-specific propagate_dynamics that does. function propagate_dynamics(dyn::Dynamics, diff --git a/src/solvers/LQRFeedbackSolver.jl b/src/solvers/LQRFeedbackSolver.jl index 118f9a27..731e21b0 100644 --- a/src/solvers/LQRFeedbackSolver.jl +++ b/src/solvers/LQRFeedbackSolver.jl @@ -50,13 +50,20 @@ function solve_lqr_feedback(dyns::AbstractVector{LinearDynamics}, costs::Abstrac # Solve the LQR using induction and optimizing the quadratic cost for P and Z. r_terms = R + B' * Zₜ₊₁ * B + # println(tt, " - old r_terms", r_terms) # This is equivalent to inv(r_terms) * B' * Zₜ₊₁ * A Ps[:, :, tt] = r_terms \ B' * Zₜ₊₁ * A + # println(tt, " - old divider, ", B' * Zₜ₊₁ * A) + println(tt, " - old P: ", Ps[1, 1:2, tt]) + println(tt, " - old p: ", Ps[1, 3, tt]) # Update Zₜ₊₁ at t+1 to be the one at t as we go to t-1. - Zₜ₊₁ = Q + A' * Zₜ₊₁ * A - A' * Zₜ₊₁ * B * Ps[:, :, tt] - Zs[:, :, tt] = Zₜ₊₁ + Zₜ₊₁ = Q + A' * Zₜ₊₁ * (A - B * Ps[:, :, tt]) + println(tt, " - old Z: ", Zₜ₊₁[1:2, 1:2]) + println(tt, " - old z: ", Zₜ₊₁[1:2, 3], " ", Zₜ₊₁[3, 1:2]', " ", Zₜ₊₁[1:2, 3] == Zₜ₊₁[3, 1:2]') + println(tt, " - old cz: ", Zₜ₊₁[3, 3]) + Zs[:, :, tt] = Zₜ₊₁ # 0.5 * (Zₜ₊₁ + Zₜ₊₁') end # Cut off the extra dimension of the homgenized coordinates system. diff --git a/src/solvers/NewLQRFeedbackSolver.jl b/src/solvers/NewLQRFeedbackSolver.jl new file mode 100644 index 00000000..003fa4d1 --- /dev/null +++ b/src/solvers/NewLQRFeedbackSolver.jl @@ -0,0 +1,121 @@ +using LinearAlgebra + +# Solve a finite horizon, discrete time LQR problem. +# Returns feedback matrices P[:, :, time]. + +# Shorthand function for LQ time-invariant dynamics and costs. +function solve_lqr_feedback(dyn::LinearDynamics, cost::QuadraticCost, horizon::Int) + dyns = [dyn for _ in 1:horizon] + costs = [cost for _ in 1:horizon] + return solve_lqr_feedback(dyns, costs, horizon) +end + +function solve_lqr_feedback(dyn::LinearDynamics, costs::AbstractVector{QuadraticCost}, horizon::Int) + dyns = [dyn for _ in 1:horizon] + return solve_lqr_feedback(dyns, costs, horizon) +end + +function solve_lqr_feedback(dyns::AbstractVector{LinearDynamics}, cost::QuadraticCost, horizon::Int) + costs = [cost for _ in 1:horizon] + return solve_lqr_feedback(dyns, costs, horizon) +end + +function solve_lqr_feedback(dyns::AbstractVector{LinearDynamics}, costs::AbstractVector{QuadraticCost}, horizon::Int) + + # Ensure the number of dynamics and costs are the same as the horizon. + @assert !isempty(dyns) && size(dyns, 1) == horizon + @assert !isempty(costs) && size(costs, 1) == horizon + + # Note: There should only be one "player" for an LQR problem. + num_states = xdim(dyns[1]) + num_ctrls = udim(dyns[1], 1) + + Ps = zeros((num_ctrls, num_states, horizon)) + ps = zeros((num_ctrls, horizon)) + Zs = zeros((num_states, num_states, horizon)) + zs = zeros((num_states, horizon)) + czs = zeros(horizon) + + Zₜ₊₁ = get_quadratic_state_cost_term(costs[horizon]) + zₜ₊₁ = get_linear_state_cost_term(costs[horizon]) + czₜ₊₁ = get_constant_state_cost_term(costs[horizon]) + Zs[:, :, horizon] = Zₜ₊₁ + + # base case + if horizon == 1 + return Ps, costs[horizon] + end + + # At each horizon running backwards, solve the LQR problem inductively. + for tt in horizon:-1:1 + A = get_linear_state_dynamics(dyns[tt]) + a = get_constant_state_dynamics(dyns[tt]) + B = get_control_dynamics(dyns[tt], 1) + + Q = get_quadratic_state_cost_term(costs[tt]) + q = get_linear_state_cost_term(costs[tt]) + cq = get_constant_state_cost_term(costs[tt]) + R = get_quadratic_control_cost_term(costs[tt], 1) + r = get_linear_control_cost_term(costs[tt], 1) + cr = get_constant_control_cost_term(costs[tt], 1) + + # Solve the LQR using induction and optimizing the quadratic cost for P and Z. + r_terms = R + B' * Zₜ₊₁ * B + + # This solves a constrained system to identify the feedback gain and constant gain. + lhs = vcat(hcat(r_terms, r), hcat(r', cr)) + rhs = vcat(hcat(B' * Zₜ₊₁ * A, B' * (Zₜ₊₁ * a + zₜ₊₁)), zeros(1, num_states+1)) + Pp = lhs \ rhs + # println(Pp) + # Ps[:, :, tt] = r_terms \ (B' * Zₜ₊₁ * A) + Ps[:, :, tt] = Pp[1:num_ctrls, 1:num_states][:,:] + ps[:, tt] = Pp[1:num_ctrls, num_states+1] + # = r' \ (B' * zₜ₊₁ + B' * Zₜ₊₁ * a) + # r_terms \ (r + B' * zₜ₊₁ + B' * Zₜ₊₁ * a) + # r_terms \ (B' * zₜ₊₁ + r + B' * Zₜ₊₁ * a) + + + # println(tt, " - new r_terms", r_terms) + # println(tt, " - new divider 1: ", B' * Zₜ₊₁ * A) + # println(tt, " - new divider 2: ", r + B' * zₜ₊₁ + B' * Zₜ₊₁ * a) + + # Update feedback and cost terms. + Pₜ = Ps[:, :, tt] + pₜ = ps[:, tt] + println(tt, " - new P: ", Pₜ) + println(tt, " - new p: ", pₜ) + Zₜ = Q + Pₜ' * R * Pₜ + (A - B * Pₜ)' * Zₜ₊₁ * (A - B * Pₜ) + zₜ = q + zₜ += Pₜ' * (R * pₜ - r) + zₜ += (A - B * Pₜ)' * Zₜ₊₁ * (a - B * pₜ) + zₜ += (A - B * Pₜ)' * zₜ₊₁ + czₜ = czₜ₊₁ + cq + czₜ += pₜ' * (R * pₜ - 2 * r) + czₜ += ((a - B * pₜ)' * Zₜ₊₁ + 2 * zₜ₊₁') * (a - B * pₜ) + println(tt, " - new Z: ", Zₜ) + println(tt, " - new z: ", zₜ) + println(tt, " - new cz: ", czₜ) + + # zₜ = Pₜ' * R * pₜ + (A - B * Pₜ)' * (zₜ₊₁ + Zₜ₊₁ * (a - B * pₜ) ) + # czₜ = pₜ' * (R + B' * Zₜ₊₁ * B) * pₜ - 2 * zₜ₊₁' * B * pₜ + # czₜ += 2 * (zₜ₊₁' - pₜ' * B' * Zₜ₊₁) * a + a' * Zₜ₊₁ * a + + # czₜ = pₜ' * (R + B' * Zₜ₊₁ * B) * pₜ + # czₜ -= 2 * zₜ₊₁' * B * pₜ + # czₜ += (2 * zₜ₊₁ + Zₜ₊₁ * (a - B * pₜ))' * a + + Zs[:, :, tt] = Zₜ # 0.5 * (Zₜ' + Zₜ) + zs[:, tt] = zₜ + czs[tt] = czₜ₊₁ + + Zₜ₊₁ = Zₜ + zₜ₊₁ = zₜ + czₜ₊₁ = czₜ + end + + Ps_feedback_strategies = FeedbackGainControlStrategy([Ps], [ps]) + Zs_future_costs = [QuadraticCost(Zs[:, :, tt], zs[:, tt], czs[tt]) for tt in 1:horizon] + return Ps_feedback_strategies, Zs_future_costs +end + +export solve_lqr_feedback diff --git a/test/TestLQSolvers.jl b/test/TestLQSolvers.jl index 2db44140..2b997a92 100644 --- a/test/TestLQSolvers.jl +++ b/test/TestLQSolvers.jl @@ -5,6 +5,8 @@ using LinearAlgebra using Random: seed! using Test: @test, @testset +include("TestUtils.jl") + seed!(0) @testset "TestLQSolvers" begin @@ -46,197 +48,415 @@ seed!(0) costs = [quadraticize_costs(c₁, dummy_time_range, dummy_x, dummy_us), quadraticize_costs(c₂, dummy_time_range, dummy_x, dummy_us)] - # Ensure that the feedback solution satisfies Nash conditions of optimality - # for each player, holding others' strategies fixed. - # Note: This test, as formulated, allows some false positive cases. See Basar and Olsder (Eq. 3.22) for the exact - # conditions. - @testset "CheckFeedbackSatisfiesNash" begin - Ps, _ = solve_lq_nash_feedback(dyn, costs, horizon) - xs, us = unroll_feedback(dyn, times, FeedbackGainControlStrategy(Ps), x₁) - nash_costs = [evaluate(c, xs, us) for c in costs] + # @testset "CheckPureNonpureLQRMatch" begin + # num_loops = 1 + # horizon_lqr = 3 + # dt = 0.02 + # times_lqr = dt * (cumsum(ones(horizon_lqr)) .- 1.) + + # for _ in 1:num_loops + # # # This is the same LQR problem that MATLAB has as an example - dt = 0.02s and it's a double integrator with + # # # state [x ẋ] and input control α acceleration. We've added a constrant drift of +1 m and +1 m/s in dynamics. + # # dt = 0.01 + # # num_states = 4 + # # num_ctrls = 2 + # # A = rand(num_states, num_states) + # # B = rand(num_states, num_ctrls) + # # a = rand(num_states) + # # lin_dyn = LinearDynamics(A, [B]; a=a) + + # # Q = make_symmetric_pos_def_matrix(num_states) + # # q = zeros(num_states) + # # cq = 0. #rand() + # # R = make_symmetric_pos_def_matrix(num_ctrls) + # # r = zeros(num_ctrls) + # # cr = 0. #rand() + + # # This is the same LQR problem that MATLAB has as an example - dt = 0.02s and it's a double integrator with + # # state [x ẋ] and input control α acceleration. We've added a constrant drift of +1 m and +1 m/s in dynamics. + # num_states = 2 + # lin_dyn = LinearDynamics([1. dt; 0 1.], [[0.; dt][:,:]]; a=ones(2)) + # q_cost = QuadraticCost([1. 0.; 0. 0.], ones(2), 12.) + # add_control_cost!(q_cost, 1, ones(1, 1); r=2*ones(1), cr=2.) + + # # lin_dyn = LinearDynamics([1. dt; 0 1.], [[0.; dt][:,:]]) + # # q_cost = QuadraticCost([1. 0.; 0. 0.]) + # # add_control_cost!(q_cost, 1, ones(1, 1)) + + # # q_cost = QuadraticCost(Q, q, cq) + # # add_control_cost!(q_cost, 1, R; r=r, cr=cr) + + # new_x₁ = ones(num_states) + + # # New LQR + # strategy, _ = solve_lqr_feedback(lin_dyn, q_cost, horizon_lqr) + # xs, us = unroll_feedback(lin_dyn, times_lqr, strategy, new_x₁) + # eval_cost = evaluate(q_cost, xs, us) + + # # OG homogenized LQR + # p_cost = quadraticize_costs(q_cost, dummy_time_range, dummy_x, dummy_us) + + # # Ensure no dumb mistakes with cost input type. + # @test evaluate(q_cost, ones(2, 1), [ones(1,1)]) ≈ evaluate(p_cost, ones(2, 1), [ones(1,1)]) + + # strategy_p, _ = solve_lqr_feedback(lin_dyn, p_cost, horizon_lqr) + # strategy_p = FeedbackGainControlStrategy([strategy_p]) + # xs_p, us_p = unroll_feedback(lin_dyn, times_lqr, strategy_p, new_x₁) + # eval_cost_p = evaluate(p_cost, xs_p, us_p) + + # @test eval_cost ≈ eval_cost_p + # println(xs - xs_p) + # @test all(xs .≈ xs_p) + # @test all(us .≈ us_p) + # end + # end + + @testset "CheckFeedbackSatisfiesLQROptimum" begin + # This is similar to the LQR problem that MATLAB has as an example - dt = 0.02s and it's a double integrator + # with state [x ẋ] and input control α acceleration. We've added a constrant drift of +1 m and +1 m/s in + # dynamics. + dt = 0.02 + lin_dyn = LinearDynamics([1. dt; 0 1.], [[0.; dt][:,:]]; a=ones(2)) + q_cost = QuadraticCost([1. 0.; 0. 0.], zeros(2), 1.) + add_control_cost!(q_cost, 1, ones(1, 1); r=zeros(1), cr=2.) + + + # lin_dyn = LinearDynamics([1. dt; 0 1.], [[0.; dt][:,:]]) + # q_cost = QuadraticCost([1. 0.; 0. 0.]) + # add_control_cost!(q_cost, 1, ones(1, 1)) + new_x₁ = ones(2) + + strategy, _ = solve_lqr_feedback(lin_dyn, q_cost, horizon) + xs, us = unroll_feedback(lin_dyn, times, strategy, new_x₁) + eval_cost = evaluate(q_cost, xs, us) # Perturb each strategy a little bit and confirm that cost only # increases for that player. ϵ = 1e-1 - for ii in 1:2 - for tt in 1:horizon - P̃s = deepcopy(Ps) - P̃s[ii][:, :, tt] += ϵ * randn(udim(dyn, ii), xhdim(dyn)) - - x̃s, ũs = unroll_feedback(dyn, times, FeedbackGainControlStrategy(P̃s), x₁) - new_nash_costs = [evaluate(c, x̃s, ũs) for c in costs] - @test new_nash_costs[ii] ≥ nash_costs[ii] - end + for tt in 1:horizon + P̃s = deepcopy(get_linear_feedback_gains(strategy, 1)) + p̃s = deepcopy(get_constant_feedback_gains(strategy, 1)) + P̃s[:, :, tt] += ϵ * randn(udim(lin_dyn, 1), xdim(lin_dyn)) + # p̃s[:, tt] += ϵ * randn(udim(lin_dyn, 1)) + + x̃s, ũs = unroll_feedback(lin_dyn, times, FeedbackGainControlStrategy([P̃s], [p̃s]), new_x₁) + new_eval_cost = evaluate(q_cost, x̃s, ũs) + @test new_eval_cost ≥ eval_cost end end + # # Ensure that the costs match up at each time step with manually calculate cost matrices. + # @testset "CheckLQRCostsAreConsistentAtEquilibrium" begin + # # This is the same LQR problem that MATLAB has as an example - dt = 0.02s and it's a double integrator with + # # state [x ẋ] and input control α acceleration. + # dt = 0.01 + # lin_dyn = LinearDynamics([1. dt; 0 1.], [[0.; dt][:,:]]; a=ones(2)) + # q_cost = QuadraticCost([1. 0.; 0. 0.]) + # add_control_cost!(q_cost, 1, ones(1, 1)) + # new_x₁ = ones(2) - # Ensure that the costs match up at each time step with manually calculate cost matrices. - @testset "CheckNashCostsAreConsistentAtEquilibrium" begin - Ps, future_costs = solve_lq_nash_feedback(dyn, costs, horizon) - xs, us = unroll_feedback(dyn, times, FeedbackGainControlStrategy(Ps), x₁) + # strategy, future_costs = solve_lqr_feedback(lin_dyn, q_cost, horizon) + # xs, us = unroll_feedback(lin_dyn, times, strategy, new_x₁) + # eval_cost = evaluate(q_cost, xs, us) - # Compute the costs using the t+1 cost matrix and compare with the cost using the cost matrix at time t. - num_players = num_agents(dyn) + # # Compute the costs using the t+1 cost matrix and compare with the cost using the cost matrix at time t. + # num_players = num_agents(lin_dyn) - # Homgenize states and controls. - xhs = homogenize_vector(xs) + # # Homgenize states and controls. + # xhs = homogenize_vector(xs) - for ii in 1:2 - for tt in 1:horizon-1 - time_range = (tt, tt+1) + # for tt in 1:horizon-1 + # time_range = (tt, tt+1) - u_tt = [us[ii][:, tt] for ii in 1:num_players] - uh_tt = homogenize_ctrls(dyn, u_tt) + # u_tt = us[1][:, tt] + # uh_tt = homogenize_ctrls(lin_dyn, [u_tt]) - u_ttp1 = [us[ii][:, tt+1] for ii in 1:num_players] - uh_ttp1 = homogenize_ctrls(dyn, u_ttp1) + # u_ttp1 = us[1][:, tt+1] + # uh_ttp1 = homogenize_ctrls(lin_dyn, [u_ttp1]) - # TODO(hamzah) Fix discrepancy in extra cost in quad cost. + # # TODO(hamzah) Fix discrepancy in extra cost in quad cost. - # Manual cost is formed by the sum of the current state/ctrls costs and the future costs. - manual_cost = compute_cost(costs[ii], time_range, xhs[:, tt], uh_tt) - manual_cost += compute_cost(future_costs[ii][tt+1], time_range, xhs[:, tt+1], uh_ttp1) - computed_cost = compute_cost(future_costs[ii][tt], time_range, xhs[:, tt], uh_tt) + # # Manual cost is formed by the sum of the current state/ctrls costs and the future costs. + # manual_cost = compute_cost(q_cost, time_range, xhs[:, tt], uh_tt) + # manual_cost += compute_cost(future_costs[tt+1], time_range, xhs[:, tt+1], uh_ttp1) + # computed_cost = compute_cost(future_costs[tt], time_range, xhs[:, tt], uh_tt) - @test manual_cost ≈ computed_cost - end - end - end + # @test manual_cost ≈ computed_cost + # end + # end - # Ensure that the feedback solution satisfies Stackelberg conditions of optimality - # for player 1, holding others' strategies fixed. - # Note: This test, as formulated, allows some false positive cases. See Khan and Fridovich-Keil 2023 for the exact - # conditions. - @testset "CheckFeedbackSatisfiesStackelbergEquilibriumForLeader" begin - Ss, future_costs = solve_lq_stackelberg_feedback(dyn, costs, horizon, stackelberg_leader_idx) - xs, us = unroll_feedback(dyn, times, FeedbackGainControlStrategy(Ss), x₁) - optimal_stackelberg_costs = [evaluate(c, xs, us) for c in costs] + @testset "CheckPureFeedbackSatisfiesLQROptimum" begin + # This is the same LQR problem that MATLAB has as an example - dt = 0.02s and it's a double integrator with + # state [x ẋ] and input control α acceleration. We've added a constrant drift of +1 m and +1 m/s in dynamics. + dt = 0.02 + lin_dyn = LinearDynamics([1. dt; 0 1.], [[0.; dt][:,:]]; a=ones(2)) + q_cost = QuadraticCost([1. 0.; 0. 0.], zeros(2), 1.) + add_control_cost!(q_cost, 1, ones(1, 1); r=zeros(1), cr=2.) - # Define some useful constants. - ϵ = 1e-1 - leader_idx = stackelberg_leader_idx - follower_idx = 3 - stackelberg_leader_idx - num_players = num_agents(dyn) + # lin_dyn = LinearDynamics([1. dt; 0 1.], [[0.; dt][:,:]]) + # q_cost = QuadraticCost([1. 0.; 0. 0.]) + # add_control_cost!(q_cost, 1, ones(1, 1)) + new_x₁ = ones(2) - # Homgenize states and controls. - xhs = homogenize_vector(xs) + p_cost = quadraticize_costs(q_cost, dummy_time_range, dummy_x, dummy_us) - for tt in horizon-1:-1:1 - time_range = (tt, tt+1) + strategy, _ = solve_lqr_feedback(lin_dyn, p_cost, horizon) + strategy = FeedbackGainControlStrategy([strategy]) + xs, us = unroll_feedback(lin_dyn, times, strategy, new_x₁) + eval_cost = evaluate(p_cost, xs, us) - # Copy the things we will alter. - ũs = deepcopy(us) + # Perturb each strategy a little bit and confirm that cost only + # increases for that player. + ϵ = 1e-1 + for tt in 1:horizon + P̃s = deepcopy(get_linear_feedback_gains(strategy, 1)) + p̃s = deepcopy(get_constant_feedback_gains(strategy, 1)) + P̃s[:, :, tt] += ϵ * randn(udim(lin_dyn, 1), xhdim(lin_dyn)) + # p̃s[:, tt] += ϵ * randn(udim(lin_dyn, 1)) + + x̃s, ũs = unroll_feedback(lin_dyn, times, FeedbackGainControlStrategy([P̃s], [p̃s]), new_x₁) + new_eval_cost = evaluate(p_cost, x̃s, ũs) + @test new_eval_cost ≥ eval_cost + end + end - # Perturb the leader input u1 at the current time. - ũs[leader_idx][:, tt] += ϵ * randn(udim(dyn, leader_idx)) - ũhs = homogenize_ctrls(dyn, ũs) + # Ensure that the costs match up at each time step with manually calculate cost matrices. + # @testset "CheckPureLQRCostsAreConsistentAtEquilibrium" begin + # # This is the same LQR problem that MATLAB has as an example - dt = 0.02s and it's a double integrator with + # # state [x ẋ] and input control α acceleration. + # dt = 0.02 + # lin_dyn = LinearDynamics([1. dt; 0 1.], [[0.; dt][:,:]]; a=ones(2)) + # q_cost = QuadraticCost([1. 0.; 0. 0.]) + # add_control_cost!(q_cost, 1, ones(1, 1)) + # new_x₁ = ones(2) - # Re-solve for the optimal follower input given the perturbed leader trajectory. - A = get_homogenized_state_dynamics_matrix(dyn) - B₂ = get_homogenized_control_dynamics_matrix(dyn, follower_idx) - L₂_ttp1 = get_homogenized_state_cost_matrix(future_costs[follower_idx][tt+1]) - G = get_homogenized_control_cost_matrix(costs[follower_idx], follower_idx) + B₂' * L₂_ttp1 * B₂ + # p_cost = quadraticize_costs(q_cost, dummy_time_range, dummy_x, dummy_us) - B₁ = get_homogenized_control_dynamics_matrix(dyn, leader_idx) - ũh1ₜ = ũhs[leader_idx][:, tt] - ũh2ₜ = - G \ (B₂' * L₂_ttp1 * (A * xhs[:,tt] + B₁ * ũh1ₜ)) - ũh_tt = [ũh1ₜ, ũh2ₜ] + # strategy, future_costs = solve_lqr_feedback(lin_dyn, p_cost, horizon) + # xs, us = unroll_feedback(lin_dyn, times, strategy, new_x₁) + # eval_cost = evaluate(p_cost, xs, us) - ũhs[follower_idx][:, tt] = ũh2ₜ + # # Compute the costs using the t+1 cost matrix and compare with the cost using the cost matrix at time t. + # num_players = num_agents(lin_dyn) - # The cost of the solution trajectory, computed as x_t^T * L^1_tt x_t for at time tt. - # We test the accuracy of this cost in `CheckStackelbergCostsAreConsistentAtEquilibrium`. - opt_P1_cost = compute_cost(future_costs[leader_idx][tt], time_range, xhs[:, tt], ũh_tt) + # # Homgenize states and controls. + # xhs = homogenize_vector(xs) - # Compute the homogenized controls for time tt+1. - uh_ttp1 = [ũhs[ii][:, tt+1] for ii in 1:num_players] + # for tt in 1:horizon-1 + # time_range = (tt, tt+1) - # The cost computed manually for perturbed inputs using - # x_t^T Q_t x_t^T + ... + + ... + x_{t+1}^T * L^1_{t+1} x_{t+1}. - state_and_controls_cost = compute_cost(costs[leader_idx], time_range, xhs[:, tt], ũh_tt) - ũ_tt = [ũs[ii][:, tt] for ii in 1:num_players] - xhₜ₊₁ = propagate_dynamics(dyn, time_range, xs[:, tt], ũ_tt) - x̃hₜ₊₁ = homogenize_vector(xhₜ₊₁) - future_cost = compute_cost(future_costs[leader_idx][tt+1], time_range, x̃hₜ₊₁, uh_ttp1) - new_P1_cost = state_and_controls_cost + future_cost + # u_tt = us[1][:, tt] + # uh_tt = homogenize_ctrls(lin_dyn, [u_tt]) - # The costs from time t+1 of the perturbed and optimal trajectories should also satisfy this condition. - @test new_P1_cost ≥ opt_P1_cost + # u_ttp1 = us[1][:, tt+1] + # uh_ttp1 = homogenize_ctrls(lin_dyn, [u_ttp1]) - x̃s = unroll_raw_controls(dyn, times, ũs, x₁) - new_stack_costs = [evaluate(c, x̃s, ũs) for c in costs] - optimal_stackelberg_costs = [evaluate(c, xs, us) for c in costs] + # # TODO(hamzah) Fix discrepancy in extra cost in quad cost. - # This test evaluates the cost for the entire perturbed trajectory against the optimal cost. - @test new_stack_costs[leader_idx] ≥ optimal_stackelberg_costs[leader_idx] - end - end + # # Manual cost is formed by the sum of the current state/ctrls costs and the future costs. + # manual_cost = compute_cost(q_cost, time_range, xhs[:, tt], uh_tt) + # println(tt, " - manual 0 -> ", manual_cost) + # manual_cost += compute_cost(future_costs[tt+1], time_range, xhs[:, tt+1], uh_ttp1) + # println("manual -> ", manual_cost) + # computed_cost = compute_cost(future_costs[tt], time_range, xhs[:, tt], uh_tt) + # println(computed_cost) + # @test manual_cost ≈ computed_cost + # end + # end - # Ensure that the feedback solution satisfies Stackelberg conditions of optimality - # for player 2, holding others' strategies fixed. - @testset "CheckFeedbackSatisfiesStackelbergEquilibriumForFollower" begin - Ss, Ls = solve_lq_stackelberg_feedback(dyn, costs, horizon, stackelberg_leader_idx) - xs, us = unroll_feedback(dyn, times, FeedbackGainControlStrategy(Ss), x₁) - optimal_stackelberg_costs = [evaluate(c, xs, us) for c in costs] + # # Ensure that the feedback solution satisfies Nash conditions of optimality + # # for each player, holding others' strategies fixed. + # # Note: This test, as formulated, allows some false positive cases. See Basar and Olsder (Eq. 3.22) for the exact + # # conditions. + # @testset "CheckFeedbackSatisfiesNash" begin + # Ps, _ = solve_lq_nash_feedback(dyn, costs, horizon) + # xs, us = unroll_feedback(dyn, times, FeedbackGainControlStrategy(Ps), x₁) + # nash_costs = [evaluate(c, xs, us) for c in costs] - # Define some useful constants. - ϵ = 1e-1 - leader_idx = stackelberg_leader_idx - follower_idx = 3 - stackelberg_leader_idx - num_players = follower_idx - - # Perturb each optimized P2 strategy a little bit and confirm that cost only increases for player 2. - leader_idx = 1 - follower_idx = 2 - for tt in 1:horizon-1 - P̃s = deepcopy(Ss) - P̃s[follower_idx][:, :, tt] += ϵ * randn(udim(dyn, follower_idx), xhdim(dyn)) - - x̃s, ũs = unroll_feedback(dyn, times, FeedbackGainControlStrategy(P̃s), x₁) - new_stack_costs = [evaluate(c, x̃s, ũs) for c in costs] - @test new_stack_costs[follower_idx] ≥ optimal_stackelberg_costs[follower_idx] - end - end + # # Perturb each strategy a little bit and confirm that cost only + # # increases for that player. + # ϵ = 1e-1 + # for ii in 1:2 + # for tt in 1:horizon + # P̃s = deepcopy(Ps) + # P̃s[ii][:, :, tt] += ϵ * randn(udim(dyn, ii), xhdim(dyn)) + # x̃s, ũs = unroll_feedback(dyn, times, FeedbackGainControlStrategy(P̃s), x₁) + # new_nash_costs = [evaluate(c, x̃s, ũs) for c in costs] + # @test new_nash_costs[ii] ≥ nash_costs[ii] + # end + # end + # end - # Ensure that the costs match up at each time step with manually calculate cost matrices. - @testset "CheckStackelbergCostsAreConsistentAtEquilibrium" begin - Ss, future_costs = solve_lq_stackelberg_feedback(dyn, costs, horizon, stackelberg_leader_idx) - xs, us = unroll_feedback(dyn, times, FeedbackGainControlStrategy(Ss), x₁) - # For each player, compute the costs using the t+1 cost matrix and compare with the cost using the cost matrix - # at time t. + # # Ensure that the costs match up at each time step with manually calculate cost matrices. + # @testset "CheckNashCostsAreConsistentAtEquilibrium" begin + # Ps, future_costs = solve_lq_nash_feedback(dyn, costs, horizon) + # xs, us = unroll_feedback(dyn, times, FeedbackGainControlStrategy(Ps), x₁) - # Compute the costs using the t+1 cost matrix and compare with the cost using the cost matrix at time t. - num_players = num_agents(dyn) + # # Compute the costs using the t+1 cost matrix and compare with the cost using the cost matrix at time t. + # num_players = num_agents(dyn) - # Homgenize states and controls. - xhs = homogenize_vector(xs) + # # Homgenize states and controls. + # xhs = homogenize_vector(xs) - for ii in 1:2 - jj = 3 - ii - for tt in 1:horizon-1 - time_range = (tt, tt+1) + # for ii in 1:2 + # for tt in 1:horizon-1 + # time_range = (tt, tt+1) - u_tt = [us[ii][:, tt] for ii in 1:num_players] - uh_tt = homogenize_ctrls(dyn, u_tt) + # u_tt = [us[ii][:, tt] for ii in 1:num_players] + # uh_tt = homogenize_ctrls(dyn, u_tt) - u_ttp1 = [us[ii][:, tt+1] for ii in 1:num_players] - uh_ttp1 = homogenize_ctrls(dyn, u_ttp1) + # u_ttp1 = [us[ii][:, tt+1] for ii in 1:num_players] + # uh_ttp1 = homogenize_ctrls(dyn, u_ttp1) - # TODO(hamzah) Fix discrepancy in extra cost in quad cost. - state_and_control_costs = compute_cost(costs[ii], time_range, xhs[:, tt], uh_tt) - future_cost = compute_cost(future_costs[ii][tt+1], time_range, xhs[:, tt+1], uh_ttp1) + # # TODO(hamzah) Fix discrepancy in extra cost in quad cost. - manual_cost = state_and_control_costs + future_cost - computed_cost = compute_cost(future_costs[ii][tt], time_range, xhs[:, tt], uh_tt) + # # Manual cost is formed by the sum of the current state/ctrls costs and the future costs. + # manual_cost = compute_cost(costs[ii], time_range, xhs[:, tt], uh_tt) + # manual_cost += compute_cost(future_costs[ii][tt+1], time_range, xhs[:, tt+1], uh_ttp1) + # computed_cost = compute_cost(future_costs[ii][tt], time_range, xhs[:, tt], uh_tt) - # The manually recursion at time t should match the computed L cost at time t. - @test manual_cost ≈ computed_cost - end - end - end + # @test manual_cost ≈ computed_cost + # end + # end + # end + + + # Ensure that the feedback solution satisfies Stackelberg conditions of optimality + # for player 1, holding others' strategies fixed. + # Note: This test, as formulated, allows some false positive cases. See Khan and Fridovich-Keil 2023 for the exact + # conditions. + # @testset "CheckFeedbackSatisfiesStackelbergEquilibriumForLeader" begin + # Ss, future_costs = solve_lq_stackelberg_feedback(dyn, costs, horizon, stackelberg_leader_idx) + # xs, us = unroll_feedback(dyn, times, FeedbackGainControlStrategy(Ss), x₁) + # optimal_stackelberg_costs = [evaluate(c, xs, us) for c in costs] + + # # Define some useful constants. + # ϵ = 1e-1 + # leader_idx = stackelberg_leader_idx + # follower_idx = 3 - stackelberg_leader_idx + # num_players = num_agents(dyn) + + # # Homgenize states and controls. + # xhs = homogenize_vector(xs) + + # for tt in horizon-1:-1:1 + # time_range = (tt, tt+1) + + # # Copy the things we will alter. + # ũs = deepcopy(us) + + # # Perturb the leader input u1 at the current time. + # ũs[leader_idx][:, tt] += ϵ * randn(udim(dyn, leader_idx)) + # ũhs = homogenize_ctrls(dyn, ũs) + + # # Re-solve for the optimal follower input given the perturbed leader trajectory. + # A = get_homogenized_state_dynamics_matrix(dyn) + # B₂ = get_homogenized_control_dynamics_matrix(dyn, follower_idx) + # L₂_ttp1 = get_homogenized_state_cost_matrix(future_costs[follower_idx][tt+1]) + # G = get_homogenized_control_cost_matrix(costs[follower_idx], follower_idx) + B₂' * L₂_ttp1 * B₂ + + # B₁ = get_homogenized_control_dynamics_matrix(dyn, leader_idx) + # ũh1ₜ = ũhs[leader_idx][:, tt] + # ũh2ₜ = - G \ (B₂' * L₂_ttp1 * (A * xhs[:,tt] + B₁ * ũh1ₜ)) + # ũh_tt = [ũh1ₜ, ũh2ₜ] + + # ũhs[follower_idx][:, tt] = ũh2ₜ + + # # The cost of the solution trajectory, computed as x_t^T * L^1_tt x_t for at time tt. + # # We test the accuracy of this cost in `CheckStackelbergCostsAreConsistentAtEquilibrium`. + # opt_P1_cost = compute_cost(future_costs[leader_idx][tt], time_range, xhs[:, tt], ũh_tt) + + # # Compute the homogenized controls for time tt+1. + # uh_ttp1 = [ũhs[ii][:, tt+1] for ii in 1:num_players] + + # # The cost computed manually for perturbed inputs using + # # x_t^T Q_t x_t^T + ... + + ... + x_{t+1}^T * L^1_{t+1} x_{t+1}. + # state_and_controls_cost = compute_cost(costs[leader_idx], time_range, xhs[:, tt], ũh_tt) + # ũ_tt = [ũs[ii][:, tt] for ii in 1:num_players] + # xhₜ₊₁ = propagate_dynamics(dyn, time_range, xs[:, tt], ũ_tt) + # x̃hₜ₊₁ = homogenize_vector(xhₜ₊₁) + # future_cost = compute_cost(future_costs[leader_idx][tt+1], time_range, x̃hₜ₊₁, uh_ttp1) + # new_P1_cost = state_and_controls_cost + future_cost + + # # The costs from time t+1 of the perturbed and optimal trajectories should also satisfy this condition. + # @test new_P1_cost ≥ opt_P1_cost + + # x̃s = unroll_raw_controls(dyn, times, ũs, x₁) + # new_stack_costs = [evaluate(c, x̃s, ũs) for c in costs] + # optimal_stackelberg_costs = [evaluate(c, xs, us) for c in costs] + + # # This test evaluates the cost for the entire perturbed trajectory against the optimal cost. + # @test new_stack_costs[leader_idx] ≥ optimal_stackelberg_costs[leader_idx] + # end + # end + + + # # Ensure that the feedback solution satisfies Stackelberg conditions of optimality + # # for player 2, holding others' strategies fixed. + # @testset "CheckFeedbackSatisfiesStackelbergEquilibriumForFollower" begin + # Ss, Ls = solve_lq_stackelberg_feedback(dyn, costs, horizon, stackelberg_leader_idx) + # xs, us = unroll_feedback(dyn, times, FeedbackGainControlStrategy(Ss), x₁) + # optimal_stackelberg_costs = [evaluate(c, xs, us) for c in costs] + + # # Define some useful constants. + # ϵ = 1e-1 + # leader_idx = stackelberg_leader_idx + # follower_idx = 3 - stackelberg_leader_idx + # num_players = follower_idx + + # # Perturb each optimized P2 strategy a little bit and confirm that cost only increases for player 2. + # leader_idx = 1 + # follower_idx = 2 + # for tt in 1:horizon-1 + # P̃s = deepcopy(Ss) + # P̃s[follower_idx][:, :, tt] += ϵ * randn(udim(dyn, follower_idx), xhdim(dyn)) + + # x̃s, ũs = unroll_feedback(dyn, times, FeedbackGainControlStrategy(P̃s), x₁) + # new_stack_costs = [evaluate(c, x̃s, ũs) for c in costs] + # @test new_stack_costs[follower_idx] ≥ optimal_stackelberg_costs[follower_idx] + # end + # end + + + # # Ensure that the costs match up at each time step with manually calculate cost matrices. + # @testset "CheckStackelbergCostsAreConsistentAtEquilibrium" begin + # Ss, future_costs = solve_lq_stackelberg_feedback(dyn, costs, horizon, stackelberg_leader_idx) + # xs, us = unroll_feedback(dyn, times, FeedbackGainControlStrategy(Ss), x₁) + + # # For each player, compute the costs using the t+1 cost matrix and compare with the cost using the cost matrix + # # at time t. + + # # Compute the costs using the t+1 cost matrix and compare with the cost using the cost matrix at time t. + # num_players = num_agents(dyn) + + # # Homgenize states and controls. + # xhs = homogenize_vector(xs) + + # for ii in 1:2 + # jj = 3 - ii + # for tt in 1:horizon-1 + # time_range = (tt, tt+1) + + # u_tt = [us[ii][:, tt] for ii in 1:num_players] + # uh_tt = homogenize_ctrls(dyn, u_tt) + + # u_ttp1 = [us[ii][:, tt+1] for ii in 1:num_players] + # uh_ttp1 = homogenize_ctrls(dyn, u_ttp1) + + # # TODO(hamzah) Fix discrepancy in extra cost in quad cost. + # state_and_control_costs = compute_cost(costs[ii], time_range, xhs[:, tt], uh_tt) + # future_cost = compute_cost(future_costs[ii][tt+1], time_range, xhs[:, tt+1], uh_ttp1) + + # manual_cost = state_and_control_costs + future_cost + # computed_cost = compute_cost(future_costs[ii][tt], time_range, xhs[:, tt], uh_tt) + + # # The manually recursion at time t should match the computed L cost at time t. + # @test manual_cost ≈ computed_cost + # end + # end + # end end diff --git a/test/runtests.jl b/test/runtests.jl index 15b6a5ae..0370471f 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -1,4 +1,4 @@ -include("TestApproximatedLQSolvers.jl") +# include("TestApproximatedLQSolvers.jl") include("TestLQSolvers.jl") -include("TestLQStackelbergDerivation.jl") +# include("TestLQStackelbergDerivation.jl") From 6b112c915959679cd76c83a491124471582d059d Mon Sep 17 00:00:00 2001 From: Hamzah Khan <2308491+khanh111@users.noreply.github.com> Date: Fri, 3 Mar 2023 10:35:27 -0600 Subject: [PATCH 2/4] More changes --- src/solvers/LQRFeedbackSolver.jl | 9 +- src/solvers/NewLQRFeedbackSolver.jl | 12 +- test/TestLQSolvers.jl | 256 ++++++++++++++-------------- 3 files changed, 144 insertions(+), 133 deletions(-) diff --git a/src/solvers/LQRFeedbackSolver.jl b/src/solvers/LQRFeedbackSolver.jl index 731e21b0..30777982 100644 --- a/src/solvers/LQRFeedbackSolver.jl +++ b/src/solvers/LQRFeedbackSolver.jl @@ -36,9 +36,9 @@ function solve_lqr_feedback(dyns::AbstractVector{LinearDynamics}, costs::Abstrac Zs[:, :, horizon] = Zₜ₊₁ # base case - if horizon == 1 - return Ps - end + # if horizon == 1 + # return Ps + # end # At each horizon running backwards, solve the LQR problem inductively. for tt in horizon:-1:1 @@ -60,6 +60,9 @@ function solve_lqr_feedback(dyns::AbstractVector{LinearDynamics}, costs::Abstrac # Update Zₜ₊₁ at t+1 to be the one at t as we go to t-1. Zₜ₊₁ = Q + A' * Zₜ₊₁ * (A - B * Ps[:, :, tt]) + # println(tt, " - old Zₜ: ", Zₜ₊₁, Zₜ₊₁') + # Zₜ₊₁ = (Zₜ₊₁ + Zₜ₊₁')/2. + # @assert Zₜ₊₁ == Zₜ₊₁' println(tt, " - old Z: ", Zₜ₊₁[1:2, 1:2]) println(tt, " - old z: ", Zₜ₊₁[1:2, 3], " ", Zₜ₊₁[3, 1:2]', " ", Zₜ₊₁[1:2, 3] == Zₜ₊₁[3, 1:2]') println(tt, " - old cz: ", Zₜ₊₁[3, 3]) diff --git a/src/solvers/NewLQRFeedbackSolver.jl b/src/solvers/NewLQRFeedbackSolver.jl index 003fa4d1..0152b014 100644 --- a/src/solvers/NewLQRFeedbackSolver.jl +++ b/src/solvers/NewLQRFeedbackSolver.jl @@ -42,9 +42,9 @@ function solve_lqr_feedback(dyns::AbstractVector{LinearDynamics}, costs::Abstrac Zs[:, :, horizon] = Zₜ₊₁ # base case - if horizon == 1 - return Ps, costs[horizon] - end + # if horizon == 1 + # return Ps, costs[horizon] + # end # At each horizon running backwards, solve the LQR problem inductively. for tt in horizon:-1:1 @@ -85,13 +85,19 @@ function solve_lqr_feedback(dyns::AbstractVector{LinearDynamics}, costs::Abstrac println(tt, " - new P: ", Pₜ) println(tt, " - new p: ", pₜ) Zₜ = Q + Pₜ' * R * Pₜ + (A - B * Pₜ)' * Zₜ₊₁ * (A - B * Pₜ) + # println(tt, " - new Zₜ: ", Zₜ, Zₜ') + # Zₜ = (Zₜ + Zₜ')/2. + # @assert Zₜ == Zₜ' zₜ = q zₜ += Pₜ' * (R * pₜ - r) zₜ += (A - B * Pₜ)' * Zₜ₊₁ * (a - B * pₜ) zₜ += (A - B * Pₜ)' * zₜ₊₁ czₜ = czₜ₊₁ + cq + println(tt, " - new cz 1: ", czₜ₊₁ + cq) czₜ += pₜ' * (R * pₜ - 2 * r) + println(tt, " - new cz 2: ", pₜ' * (R * pₜ - 2 * r)) czₜ += ((a - B * pₜ)' * Zₜ₊₁ + 2 * zₜ₊₁') * (a - B * pₜ) + println(tt, " - new cz 3: ", ((a - B * pₜ)' * Zₜ₊₁ + 2 * zₜ₊₁') * (a - B * pₜ)) println(tt, " - new Z: ", Zₜ) println(tt, " - new z: ", zₜ) println(tt, " - new cz: ", czₜ) diff --git a/test/TestLQSolvers.jl b/test/TestLQSolvers.jl index 2b997a92..30987cb9 100644 --- a/test/TestLQSolvers.jl +++ b/test/TestLQSolvers.jl @@ -48,103 +48,105 @@ seed!(0) costs = [quadraticize_costs(c₁, dummy_time_range, dummy_x, dummy_us), quadraticize_costs(c₂, dummy_time_range, dummy_x, dummy_us)] - # @testset "CheckPureNonpureLQRMatch" begin - # num_loops = 1 - # horizon_lqr = 3 - # dt = 0.02 - # times_lqr = dt * (cumsum(ones(horizon_lqr)) .- 1.) - - # for _ in 1:num_loops - # # # This is the same LQR problem that MATLAB has as an example - dt = 0.02s and it's a double integrator with - # # # state [x ẋ] and input control α acceleration. We've added a constrant drift of +1 m and +1 m/s in dynamics. - # # dt = 0.01 - # # num_states = 4 - # # num_ctrls = 2 - # # A = rand(num_states, num_states) - # # B = rand(num_states, num_ctrls) - # # a = rand(num_states) - # # lin_dyn = LinearDynamics(A, [B]; a=a) - - # # Q = make_symmetric_pos_def_matrix(num_states) - # # q = zeros(num_states) - # # cq = 0. #rand() - # # R = make_symmetric_pos_def_matrix(num_ctrls) - # # r = zeros(num_ctrls) - # # cr = 0. #rand() - - # # This is the same LQR problem that MATLAB has as an example - dt = 0.02s and it's a double integrator with - # # state [x ẋ] and input control α acceleration. We've added a constrant drift of +1 m and +1 m/s in dynamics. - # num_states = 2 - # lin_dyn = LinearDynamics([1. dt; 0 1.], [[0.; dt][:,:]]; a=ones(2)) - # q_cost = QuadraticCost([1. 0.; 0. 0.], ones(2), 12.) - # add_control_cost!(q_cost, 1, ones(1, 1); r=2*ones(1), cr=2.) - - # # lin_dyn = LinearDynamics([1. dt; 0 1.], [[0.; dt][:,:]]) - # # q_cost = QuadraticCost([1. 0.; 0. 0.]) - # # add_control_cost!(q_cost, 1, ones(1, 1)) - - # # q_cost = QuadraticCost(Q, q, cq) - # # add_control_cost!(q_cost, 1, R; r=r, cr=cr) - - # new_x₁ = ones(num_states) - - # # New LQR - # strategy, _ = solve_lqr_feedback(lin_dyn, q_cost, horizon_lqr) - # xs, us = unroll_feedback(lin_dyn, times_lqr, strategy, new_x₁) - # eval_cost = evaluate(q_cost, xs, us) - - # # OG homogenized LQR - # p_cost = quadraticize_costs(q_cost, dummy_time_range, dummy_x, dummy_us) - - # # Ensure no dumb mistakes with cost input type. - # @test evaluate(q_cost, ones(2, 1), [ones(1,1)]) ≈ evaluate(p_cost, ones(2, 1), [ones(1,1)]) - - # strategy_p, _ = solve_lqr_feedback(lin_dyn, p_cost, horizon_lqr) - # strategy_p = FeedbackGainControlStrategy([strategy_p]) - # xs_p, us_p = unroll_feedback(lin_dyn, times_lqr, strategy_p, new_x₁) - # eval_cost_p = evaluate(p_cost, xs_p, us_p) - - # @test eval_cost ≈ eval_cost_p - # println(xs - xs_p) - # @test all(xs .≈ xs_p) - # @test all(us .≈ us_p) - # end - # end - - @testset "CheckFeedbackSatisfiesLQROptimum" begin - # This is similar to the LQR problem that MATLAB has as an example - dt = 0.02s and it's a double integrator - # with state [x ẋ] and input control α acceleration. We've added a constrant drift of +1 m and +1 m/s in - # dynamics. + @testset "CheckPureNonpureLQRMatch" begin + num_loops = 1 + horizon_lqr = 3 dt = 0.02 - lin_dyn = LinearDynamics([1. dt; 0 1.], [[0.; dt][:,:]]; a=ones(2)) - q_cost = QuadraticCost([1. 0.; 0. 0.], zeros(2), 1.) - add_control_cost!(q_cost, 1, ones(1, 1); r=zeros(1), cr=2.) - - - # lin_dyn = LinearDynamics([1. dt; 0 1.], [[0.; dt][:,:]]) - # q_cost = QuadraticCost([1. 0.; 0. 0.]) - # add_control_cost!(q_cost, 1, ones(1, 1)) - new_x₁ = ones(2) - - strategy, _ = solve_lqr_feedback(lin_dyn, q_cost, horizon) - xs, us = unroll_feedback(lin_dyn, times, strategy, new_x₁) - eval_cost = evaluate(q_cost, xs, us) - - # Perturb each strategy a little bit and confirm that cost only - # increases for that player. - ϵ = 1e-1 - for tt in 1:horizon - P̃s = deepcopy(get_linear_feedback_gains(strategy, 1)) - p̃s = deepcopy(get_constant_feedback_gains(strategy, 1)) - P̃s[:, :, tt] += ϵ * randn(udim(lin_dyn, 1), xdim(lin_dyn)) - # p̃s[:, tt] += ϵ * randn(udim(lin_dyn, 1)) - - x̃s, ũs = unroll_feedback(lin_dyn, times, FeedbackGainControlStrategy([P̃s], [p̃s]), new_x₁) - new_eval_cost = evaluate(q_cost, x̃s, ũs) - @test new_eval_cost ≥ eval_cost + times_lqr = dt * (cumsum(ones(horizon_lqr)) .- 1.) + + for _ in 1:num_loops + # # This is the same LQR problem that MATLAB has as an example - dt = 0.02s and it's a double integrator with + # # state [x ẋ] and input control α acceleration. We've added a constrant drift of +1 m and +1 m/s in dynamics. + # dt = 0.01 + # num_states = 4 + # num_ctrls = 2 + # A = rand(num_states, num_states) + # B = rand(num_states, num_ctrls) + # a = rand(num_states) + # lin_dyn = LinearDynamics(A, [B]; a=a) + + # Q = make_symmetric_pos_def_matrix(num_states) + # q = zeros(num_states) + # cq = 0. #rand() + # R = make_symmetric_pos_def_matrix(num_ctrls) + # r = zeros(num_ctrls) + # cr = 0. #rand() + + # This is the same LQR problem that MATLAB has as an example - dt = 0.02s and it's a double integrator with + # state [x ẋ] and input control α acceleration. We've added a constrant drift of +1 m and +1 m/s in dynamics. + num_states = 2 + lin_dyn = LinearDynamics([1. dt; 0 1.], [[0.; dt][:,:]]; a=ones(2)) + q_cost = QuadraticCost([1. 0.; 0. 0.], ones(2), 12.) + add_control_cost!(q_cost, 1, ones(1, 1); r=2*ones(1), cr=2.) + + # lin_dyn = LinearDynamics([1. dt; 0 1.], [[0.; dt][:,:]]) + # q_cost = QuadraticCost([1. 0.; 0. 0.]) + # add_control_cost!(q_cost, 1, ones(1, 1)) + + # q_cost = QuadraticCost(Q, q, cq) + # add_control_cost!(q_cost, 1, R; r=r, cr=cr) + + new_x₁ = ones(num_states) + + # New LQR + strategy, _ = solve_lqr_feedback(lin_dyn, q_cost, horizon_lqr) + xs, us = unroll_feedback(lin_dyn, times_lqr, strategy, new_x₁) + eval_cost = evaluate(q_cost, xs, us) + + # OG homogenized LQR + p_cost = quadraticize_costs(q_cost, dummy_time_range, dummy_x, dummy_us) + + # Ensure no dumb mistakes with cost input type. + @test evaluate(q_cost, ones(2, 1), [ones(1,1)]) ≈ evaluate(p_cost, ones(2, 1), [ones(1,1)]) + + strategy_p, _ = solve_lqr_feedback(lin_dyn, p_cost, horizon_lqr) + strategy_p = FeedbackGainControlStrategy([strategy_p]) + xs_p, us_p = unroll_feedback(lin_dyn, times_lqr, strategy_p, new_x₁) + eval_cost_p = evaluate(p_cost, xs_p, us_p) + + println("costs (old, new): ", eval_cost, " ", eval_cost_p) + @test eval_cost ≈ eval_cost_p + println("dx: ", xs - xs_p) + println("du: ", us - us_p) + @test all(xs .≈ xs_p) + @test all(us .≈ us_p) end end + # @testset "CheckFeedbackSatisfiesLQROptimum" begin + # # This is similar to the LQR problem that MATLAB has as an example - dt = 0.02s and it's a double integrator + # # with state [x ẋ] and input control α acceleration. We've added a constrant drift of +1 m and +1 m/s in + # # dynamics. + # dt = 0.02 + # lin_dyn = LinearDynamics([1. dt; 0 1.], [[0.; dt][:,:]]; a=ones(2)) + # q_cost = QuadraticCost([1. 0.; 0. 0.], zeros(2), 1.) + # add_control_cost!(q_cost, 1, ones(1, 1); r=zeros(1), cr=2.) + + + # # lin_dyn = LinearDynamics([1. dt; 0 1.], [[0.; dt][:,:]]) + # # q_cost = QuadraticCost([1. 0.; 0. 0.]) + # # add_control_cost!(q_cost, 1, ones(1, 1)) + # new_x₁ = ones(2) + + # strategy, _ = solve_lqr_feedback(lin_dyn, q_cost, horizon) + # xs, us = unroll_feedback(lin_dyn, times, strategy, new_x₁) + # eval_cost = evaluate(q_cost, xs, us) + + # # Perturb each strategy a little bit and confirm that cost only + # # increases for that player. + # ϵ = 1e-1 + # for tt in 1:horizon + # P̃s = deepcopy(get_linear_feedback_gains(strategy, 1)) + # p̃s = deepcopy(get_constant_feedback_gains(strategy, 1)) + # P̃s[:, :, tt] += ϵ * randn(udim(lin_dyn, 1), xdim(lin_dyn)) + # # p̃s[:, tt] += ϵ * randn(udim(lin_dyn, 1)) + + # x̃s, ũs = unroll_feedback(lin_dyn, times, FeedbackGainControlStrategy([P̃s], [p̃s]), new_x₁) + # new_eval_cost = evaluate(q_cost, x̃s, ũs) + # @test new_eval_cost ≥ eval_cost + # end + # end + # # Ensure that the costs match up at each time step with manually calculate cost matrices. # @testset "CheckLQRCostsAreConsistentAtEquilibrium" begin # # This is the same LQR problem that MATLAB has as an example - dt = 0.02s and it's a double integrator with @@ -186,40 +188,40 @@ seed!(0) # end - @testset "CheckPureFeedbackSatisfiesLQROptimum" begin - # This is the same LQR problem that MATLAB has as an example - dt = 0.02s and it's a double integrator with - # state [x ẋ] and input control α acceleration. We've added a constrant drift of +1 m and +1 m/s in dynamics. - dt = 0.02 - lin_dyn = LinearDynamics([1. dt; 0 1.], [[0.; dt][:,:]]; a=ones(2)) - q_cost = QuadraticCost([1. 0.; 0. 0.], zeros(2), 1.) - add_control_cost!(q_cost, 1, ones(1, 1); r=zeros(1), cr=2.) - - # lin_dyn = LinearDynamics([1. dt; 0 1.], [[0.; dt][:,:]]) - # q_cost = QuadraticCost([1. 0.; 0. 0.]) - # add_control_cost!(q_cost, 1, ones(1, 1)) - new_x₁ = ones(2) - - p_cost = quadraticize_costs(q_cost, dummy_time_range, dummy_x, dummy_us) - - strategy, _ = solve_lqr_feedback(lin_dyn, p_cost, horizon) - strategy = FeedbackGainControlStrategy([strategy]) - xs, us = unroll_feedback(lin_dyn, times, strategy, new_x₁) - eval_cost = evaluate(p_cost, xs, us) - - # Perturb each strategy a little bit and confirm that cost only - # increases for that player. - ϵ = 1e-1 - for tt in 1:horizon - P̃s = deepcopy(get_linear_feedback_gains(strategy, 1)) - p̃s = deepcopy(get_constant_feedback_gains(strategy, 1)) - P̃s[:, :, tt] += ϵ * randn(udim(lin_dyn, 1), xhdim(lin_dyn)) - # p̃s[:, tt] += ϵ * randn(udim(lin_dyn, 1)) - - x̃s, ũs = unroll_feedback(lin_dyn, times, FeedbackGainControlStrategy([P̃s], [p̃s]), new_x₁) - new_eval_cost = evaluate(p_cost, x̃s, ũs) - @test new_eval_cost ≥ eval_cost - end - end + # @testset "CheckPureFeedbackSatisfiesLQROptimum" begin + # # This is the same LQR problem that MATLAB has as an example - dt = 0.02s and it's a double integrator with + # # state [x ẋ] and input control α acceleration. We've added a constrant drift of +1 m and +1 m/s in dynamics. + # dt = 0.02 + # lin_dyn = LinearDynamics([1. dt; 0 1.], [[0.; dt][:,:]]; a=ones(2)) + # q_cost = QuadraticCost([1. 0.; 0. 0.], zeros(2), 1.) + # add_control_cost!(q_cost, 1, ones(1, 1); r=zeros(1), cr=2.) + + # # lin_dyn = LinearDynamics([1. dt; 0 1.], [[0.; dt][:,:]]) + # # q_cost = QuadraticCost([1. 0.; 0. 0.]) + # # add_control_cost!(q_cost, 1, ones(1, 1)) + # new_x₁ = ones(2) + + # p_cost = quadraticize_costs(q_cost, dummy_time_range, dummy_x, dummy_us) + + # strategy, _ = solve_lqr_feedback(lin_dyn, p_cost, horizon) + # strategy = FeedbackGainControlStrategy([strategy]) + # xs, us = unroll_feedback(lin_dyn, times, strategy, new_x₁) + # eval_cost = evaluate(p_cost, xs, us) + + # # Perturb each strategy a little bit and confirm that cost only + # # increases for that player. + # ϵ = 1e-1 + # for tt in 1:horizon + # P̃s = deepcopy(get_linear_feedback_gains(strategy, 1)) + # p̃s = deepcopy(get_constant_feedback_gains(strategy, 1)) + # P̃s[:, :, tt] += ϵ * randn(udim(lin_dyn, 1), xhdim(lin_dyn)) + # # p̃s[:, tt] += ϵ * randn(udim(lin_dyn, 1)) + + # x̃s, ũs = unroll_feedback(lin_dyn, times, FeedbackGainControlStrategy([P̃s], [p̃s]), new_x₁) + # new_eval_cost = evaluate(p_cost, x̃s, ũs) + # @test new_eval_cost ≥ eval_cost + # end + # end # Ensure that the costs match up at each time step with manually calculate cost matrices. # @testset "CheckPureLQRCostsAreConsistentAtEquilibrium" begin From 621f3fb319db43eff9599c7f905d73a767befafd Mon Sep 17 00:00:00 2001 From: Hamzah Khan <2308491+khanh111@users.noreply.github.com> Date: Mon, 6 Mar 2023 10:07:05 -0600 Subject: [PATCH 3/4] rename fn --- src/solvers/NewLQRFeedbackSolver.jl | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/solvers/NewLQRFeedbackSolver.jl b/src/solvers/NewLQRFeedbackSolver.jl index 0152b014..c5a9d362 100644 --- a/src/solvers/NewLQRFeedbackSolver.jl +++ b/src/solvers/NewLQRFeedbackSolver.jl @@ -4,23 +4,23 @@ using LinearAlgebra # Returns feedback matrices P[:, :, time]. # Shorthand function for LQ time-invariant dynamics and costs. -function solve_lqr_feedback(dyn::LinearDynamics, cost::QuadraticCost, horizon::Int) +function solve_new_lqr_feedback(dyn::LinearDynamics, cost::QuadraticCost, horizon::Int) dyns = [dyn for _ in 1:horizon] costs = [cost for _ in 1:horizon] - return solve_lqr_feedback(dyns, costs, horizon) + return solve_new_lqr_feedback(dyns, costs, horizon) end -function solve_lqr_feedback(dyn::LinearDynamics, costs::AbstractVector{QuadraticCost}, horizon::Int) +function solve_new_lqr_feedback(dyn::LinearDynamics, costs::AbstractVector{QuadraticCost}, horizon::Int) dyns = [dyn for _ in 1:horizon] - return solve_lqr_feedback(dyns, costs, horizon) + return solve_new_lqr_feedback(dyns, costs, horizon) end -function solve_lqr_feedback(dyns::AbstractVector{LinearDynamics}, cost::QuadraticCost, horizon::Int) +function solve_new_lqr_feedback(dyns::AbstractVector{LinearDynamics}, cost::QuadraticCost, horizon::Int) costs = [cost for _ in 1:horizon] - return solve_lqr_feedback(dyns, costs, horizon) + return solve_new_lqr_feedback(dyns, costs, horizon) end -function solve_lqr_feedback(dyns::AbstractVector{LinearDynamics}, costs::AbstractVector{QuadraticCost}, horizon::Int) +function solve_new_lqr_feedback(dyns::AbstractVector{LinearDynamics}, costs::AbstractVector{QuadraticCost}, horizon::Int) # Ensure the number of dynamics and costs are the same as the horizon. @assert !isempty(dyns) && size(dyns, 1) == horizon @@ -124,4 +124,4 @@ function solve_lqr_feedback(dyns::AbstractVector{LinearDynamics}, costs::Abstrac return Ps_feedback_strategies, Zs_future_costs end -export solve_lqr_feedback +export solve_new_lqr_feedback From 7c21d92672ed1638065723c93f7a3511cf6562b5 Mon Sep 17 00:00:00 2001 From: Hamzah Khan <2308491+khanh111@users.noreply.github.com> Date: Wed, 15 Mar 2023 11:11:23 -0700 Subject: [PATCH 4/4] changes --- .../FeedbackGainControlStrategy.jl | 2 +- src/dynamics/LinearDynamics.jl | 2 +- test/TestLQSolvers.jl | 349 +++++------------- 3 files changed, 103 insertions(+), 250 deletions(-) diff --git a/src/control_strategies/FeedbackGainControlStrategy.jl b/src/control_strategies/FeedbackGainControlStrategy.jl index bce442b6..9a46c69f 100644 --- a/src/control_strategies/FeedbackGainControlStrategy.jl +++ b/src/control_strategies/FeedbackGainControlStrategy.jl @@ -31,7 +31,7 @@ function get_constant_feedback_gains(strategy::FeedbackGainControlStrategy) return strategy.ps end -function get_constant_feedback_gains(strategy::FeedbackGainControlStrategy, player_idx::Int) +function get_constant_feedback_gain(strategy::FeedbackGainControlStrategy, player_idx::Int) return strategy.ps[player_idx] end diff --git a/src/dynamics/LinearDynamics.jl b/src/dynamics/LinearDynamics.jl index 7b09682a..fdd1c773 100644 --- a/src/dynamics/LinearDynamics.jl +++ b/src/dynamics/LinearDynamics.jl @@ -74,7 +74,7 @@ function propagate_dynamics(dyn::LinearDynamics, # Incorporate the dynamics based on the state and the controls. dt = time_range[2] - time_range[1] - x_next = dyn.A * x + dyn.a * dt + sum(dyn.Bs[ii] * us[ii] for ii in 1:N) + x_next = dyn.A * x + dyn.a + sum(dyn.Bs[ii] * us[ii] for ii in 1:N) if dyn.D != nothing && v != nothing x_next += dyn.D * v diff --git a/test/TestLQSolvers.jl b/test/TestLQSolvers.jl index 30c11dcd..5176e8cf 100644 --- a/test/TestLQSolvers.jl +++ b/test/TestLQSolvers.jl @@ -5,8 +5,6 @@ using LinearAlgebra using Random: seed! using Test: @test, @testset -include("TestUtils.jl") - seed!(0) @testset "TestLQSolvers" begin @@ -38,230 +36,87 @@ seed!(0) add_control_cost!(c₂, 1, zeros(1, 1)) x₁ = [1., 0, 1, 0] - horizon = 10 + horizon = 3 times = cumsum(ones(horizon)) .- 1. costs = [c₁, c₂] - @testset "CheckPureNonpureLQRMatch" begin - num_loops = 1 - horizon_lqr = 3 + @testset "CheckFeedbackSatisfiesLQROptimum" begin + # This is similar to the LQR problem that MATLAB has as an example - dt = 0.02s and it's a double integrator + # with state [x ẋ] and input control α acceleration. We've added a constrant drift of +1 m and +1 m/s in + # dynamics. dt = 0.02 - times_lqr = dt * (cumsum(ones(horizon_lqr)) .- 1.) - - for _ in 1:num_loops - # # This is the same LQR problem that MATLAB has as an example - dt = 0.02s and it's a double integrator with - # # state [x ẋ] and input control α acceleration. We've added a constrant drift of +1 m and +1 m/s in dynamics. - # dt = 0.01 - # num_states = 4 - # num_ctrls = 2 - # A = rand(num_states, num_states) - # B = rand(num_states, num_ctrls) - # a = rand(num_states) - # lin_dyn = LinearDynamics(A, [B]; a=a) - - # Q = make_symmetric_pos_def_matrix(num_states) - # q = zeros(num_states) - # cq = 0. #rand() - # R = make_symmetric_pos_def_matrix(num_ctrls) - # r = zeros(num_ctrls) - # cr = 0. #rand() - - # This is the same LQR problem that MATLAB has as an example - dt = 0.02s and it's a double integrator with - # state [x ẋ] and input control α acceleration. We've added a constrant drift of +1 m and +1 m/s in dynamics. - num_states = 2 - lin_dyn = LinearDynamics([1. dt; 0 1.], [[0.; dt][:,:]]; a=ones(2)) - q_cost = QuadraticCost([1. 0.; 0. 0.], ones(2), 12.) - add_control_cost!(q_cost, 1, ones(1, 1); r=2*ones(1), cr=2.) - - # lin_dyn = LinearDynamics([1. dt; 0 1.], [[0.; dt][:,:]]) - # q_cost = QuadraticCost([1. 0.; 0. 0.]) - # add_control_cost!(q_cost, 1, ones(1, 1)) - - new_x₁ = ones(num_states) - - # New LQR - strategy, _ = solve_new_lqr_feedback(lin_dyn, q_cost, horizon_lqr) - xs, us = unroll_feedback(lin_dyn, times_lqr, strategy, new_x₁) - eval_cost = evaluate(q_cost, xs, us) - - # Ensure no dumb mistakes with cost input type. - @test evaluate(q_cost, ones(2, 1), [ones(1,1)]) ≈ evaluate(q_cost, ones(2, 1), [ones(1,1)]) - - strategy_p, _ = solve_lqr_feedback(lin_dyn, q_cost, horizon_lqr) - xs_p, us_p = unroll_feedback(lin_dyn, times_lqr, strategy_p, new_x₁) - eval_cost_p = evaluate(q_cost, xs_p, us_p) - - println("costs (old, new): ", eval_cost, " ", eval_cost_p) - @test eval_cost ≈ eval_cost_p - println("dx: ", xs - xs_p) - println("du: ", us - us_p) - @test all(xs .≈ xs_p) - @test all(us .≈ us_p) + times_lqr = dt * (cumsum(ones(horizon)) .- 1.) + lin_dyn = LinearDynamics([1. dt; 0 1.], [[0.; dt][:,:]]; a=dt*ones(2)) + q_cost = QuadraticCost([1. 0.; 0. 0.], ones(2), 1.) + add_control_cost!(q_cost, 1, ones(1, 1); r=zeros(1), cr=2.) + new_x₁ = ones(2) + + strategy, _ = solve_lqr_feedback(lin_dyn, q_cost, horizon) + xs, us = unroll_feedback(lin_dyn, times_lqr, strategy, new_x₁) + eval_cost = evaluate(q_cost, xs, us) + + # Perturb each strategy a little bit and confirm that cost only + # increases for that player. + ϵ = 1e-1 + for tt in 1:horizon + P̃s = deepcopy(get_linear_feedback_gain(strategy, 1)) + p̃s = deepcopy(get_constant_feedback_gain(strategy, 1)) + P̃s[:, :, tt] += ϵ * randn(udim(lin_dyn, 1), xdim(lin_dyn)) + p̃s[:, tt] += ϵ * randn(udim(lin_dyn, 1)) + + x̃s, ũs = unroll_feedback(lin_dyn, times, FeedbackGainControlStrategy([P̃s], [p̃s]), new_x₁) + new_eval_cost = evaluate(q_cost, x̃s, ũs) + println(new_eval_cost, " ≥ ", eval_cost) + @test new_eval_cost ≥ eval_cost end end - # @testset "CheckFeedbackSatisfiesLQROptimum" begin - # # This is similar to the LQR problem that MATLAB has as an example - dt = 0.02s and it's a double integrator - # # with state [x ẋ] and input control α acceleration. We've added a constrant drift of +1 m and +1 m/s in - # # dynamics. - # dt = 0.02 - # lin_dyn = LinearDynamics([1. dt; 0 1.], [[0.; dt][:,:]]; a=ones(2)) - # q_cost = QuadraticCost([1. 0.; 0. 0.], zeros(2), 1.) - # add_control_cost!(q_cost, 1, ones(1, 1); r=zeros(1), cr=2.) - - - # # lin_dyn = LinearDynamics([1. dt; 0 1.], [[0.; dt][:,:]]) - # # q_cost = QuadraticCost([1. 0.; 0. 0.]) - # # add_control_cost!(q_cost, 1, ones(1, 1)) - # new_x₁ = ones(2) - - # strategy, _ = solve_new_lqr_feedback(lin_dyn, q_cost, horizon) - # xs, us = unroll_feedback(lin_dyn, times, strategy, new_x₁) - # eval_cost = evaluate(q_cost, xs, us) - - # # Perturb each strategy a little bit and confirm that cost only - # # increases for that player. - # ϵ = 1e-1 - # for tt in 1:horizon - # P̃s = deepcopy(get_linear_feedback_gains(strategy, 1)) - # p̃s = deepcopy(get_constant_feedback_gains(strategy, 1)) - # P̃s[:, :, tt] += ϵ * randn(udim(lin_dyn, 1), xdim(lin_dyn)) - # # p̃s[:, tt] += ϵ * randn(udim(lin_dyn, 1)) - - # x̃s, ũs = unroll_feedback(lin_dyn, times, FeedbackGainControlStrategy([P̃s], [p̃s]), new_x₁) - # new_eval_cost = evaluate(q_cost, x̃s, ũs) - # @test new_eval_cost ≥ eval_cost - # end - # end - - # # Ensure that the costs match up at each time step with manually calculate cost matrices. - # @testset "CheckLQRCostsAreConsistentAtEquilibrium" begin - # # This is the same LQR problem that MATLAB has as an example - dt = 0.02s and it's a double integrator with - # # state [x ẋ] and input control α acceleration. - # dt = 0.01 - # lin_dyn = LinearDynamics([1. dt; 0 1.], [[0.; dt][:,:]]; a=ones(2)) - # q_cost = QuadraticCost([1. 0.; 0. 0.]) - # add_control_cost!(q_cost, 1, ones(1, 1)) - # new_x₁ = ones(2) - - # strategy, future_costs = solve_new_lqr_feedback(lin_dyn, q_cost, horizon) - # xs, us = unroll_feedback(lin_dyn, times, strategy, new_x₁) - # eval_cost = evaluate(q_cost, xs, us) - - # # Compute the costs using the t+1 cost matrix and compare with the cost using the cost matrix at time t. - # num_players = num_agents(lin_dyn) - - # # Homgenize states and controls. - # xhs = homogenize_vector(xs) - - # for tt in 1:horizon-1 - # time_range = (tt, tt+1) - - # u_tt = us[1][:, tt] - # uh_tt = homogenize_ctrls(lin_dyn, [u_tt]) - - # u_ttp1 = us[1][:, tt+1] - # uh_ttp1 = homogenize_ctrls(lin_dyn, [u_ttp1]) - - # # TODO(hamzah) Fix discrepancy in extra cost in quad cost. - - # # Manual cost is formed by the sum of the current state/ctrls costs and the future costs. - # manual_cost = compute_cost(q_cost, time_range, xhs[:, tt], uh_tt) - # manual_cost += compute_cost(future_costs[tt+1], time_range, xhs[:, tt+1], uh_ttp1) - # computed_cost = compute_cost(future_costs[tt], time_range, xhs[:, tt], uh_tt) - - # @test manual_cost ≈ computed_cost - # end - # end - - - # @testset "CheckPureFeedbackSatisfiesLQROptimum" begin - # # This is the same LQR problem that MATLAB has as an example - dt = 0.02s and it's a double integrator with - # # state [x ẋ] and input control α acceleration. We've added a constrant drift of +1 m and +1 m/s in dynamics. - # dt = 0.02 - # lin_dyn = LinearDynamics([1. dt; 0 1.], [[0.; dt][:,:]]; a=ones(2)) - # q_cost = QuadraticCost([1. 0.; 0. 0.], zeros(2), 1.) - # add_control_cost!(q_cost, 1, ones(1, 1); r=zeros(1), cr=2.) - - # # lin_dyn = LinearDynamics([1. dt; 0 1.], [[0.; dt][:,:]]) - # # q_cost = QuadraticCost([1. 0.; 0. 0.]) - # # add_control_cost!(q_cost, 1, ones(1, 1)) - # new_x₁ = ones(2) - - # q_cost = quadraticize_costs(q_cost, dummy_time_range, dummy_x, dummy_us) - - # strategy, _ = solve_lqr_feedback(lin_dyn, q_cost, horizon) - # strategy = FeedbackGainControlStrategy([strategy]) - # xs, us = unroll_feedback(lin_dyn, times, strategy, new_x₁) - # eval_cost = evaluate(q_cost, xs, us) - - # # Perturb each strategy a little bit and confirm that cost only - # # increases for that player. - # ϵ = 1e-1 - # for tt in 1:horizon - # P̃s = deepcopy(get_linear_feedback_gains(strategy, 1)) - # p̃s = deepcopy(get_constant_feedback_gains(strategy, 1)) - # P̃s[:, :, tt] += ϵ * randn(udim(lin_dyn, 1), xhdim(lin_dyn)) - # # p̃s[:, tt] += ϵ * randn(udim(lin_dyn, 1)) - - # x̃s, ũs = unroll_feedback(lin_dyn, times, FeedbackGainControlStrategy([P̃s], [p̃s]), new_x₁) - # new_eval_cost = evaluate(q_cost, x̃s, ũs) - # @test new_eval_cost ≥ eval_cost - # end - # end - # Ensure that the costs match up at each time step with manually calculate cost matrices. - # @testset "CheckPureLQRCostsAreConsistentAtEquilibrium" begin - # # This is the same LQR problem that MATLAB has as an example - dt = 0.02s and it's a double integrator with - # # state [x ẋ] and input control α acceleration. - # dt = 0.02 - # lin_dyn = LinearDynamics([1. dt; 0 1.], [[0.; dt][:,:]]; a=ones(2)) - # q_cost = QuadraticCost([1. 0.; 0. 0.]) - # add_control_cost!(q_cost, 1, ones(1, 1)) - # new_x₁ = ones(2) + @testset "CheckLQRCostsAreConsistentAtEquilibrium" begin + # This is the same LQR problem that MATLAB has as an example - dt = 0.02s and it's a double integrator with + # state [x ẋ] and input control α acceleration. + dt = 0.02 + times_lqr = dt * (cumsum(ones(horizon)) .- 1.) + lin_dyn = LinearDynamics([1. dt; 0 1.], [[0.; dt][:,:]]; a=dt*ones(2)) + q_cost = QuadraticCost([1. 0.; 0. 0.], zeros(2), 1.) + add_control_cost!(q_cost, 1, ones(1, 1); r=zeros(1), cr=2.) + new_x₁ = ones(2) - # q_cost = quadraticize_costs(q_cost, dummy_time_range, dummy_x, dummy_us) + strategy, future_costs = solve_lqr_feedback(lin_dyn, q_cost, horizon) + xs, us = unroll_feedback(lin_dyn, times_lqr, strategy, new_x₁) + eval_cost = evaluate(q_cost, xs, us) - # strategy, future_costs = solve_lqr_feedback(lin_dyn, q_cost, horizon) - # xs, us = unroll_feedback(lin_dyn, times, strategy, new_x₁) - # eval_cost = evaluate(q_cost, xs, us) + # Compute the costs using the t+1 cost matrix and compare with the cost using the cost matrix at time t. + num_players = num_agents(lin_dyn) - # # Compute the costs using the t+1 cost matrix and compare with the cost using the cost matrix at time t. - # num_players = num_agents(lin_dyn) + # Homgenize states and controls. + # xhs = homogenize_vector(xs) - # # Homgenize states and controls. - # xhs = homogenize_vector(xs) + for tt in 1:horizon-1 + time_range = (tt, tt+1) - # for tt in 1:horizon-1 - # time_range = (tt, tt+1) - - # u_tt = us[1][:, tt] - # uh_tt = homogenize_ctrls(lin_dyn, [u_tt]) + u_tt = us[1][:, tt] + u_ttp1 = us[1][:, tt+1] - # u_ttp1 = us[1][:, tt+1] - # uh_ttp1 = homogenize_ctrls(lin_dyn, [u_ttp1]) + # TODO(hamzah) Fix discrepancy in extra cost in quad cost. - # # TODO(hamzah) Fix discrepancy in extra cost in quad cost. + # Manual cost is formed by the sum of the current state/ctrls costs and the future costs. + manual_cost = compute_cost(q_cost, time_range, xs[:, tt], [u_tt]) + manual_cost += compute_cost(future_costs[tt+1], time_range, xs[:, tt+1], [u_ttp1]) + computed_cost = compute_cost(future_costs[tt], time_range, xs[:, tt], [u_tt]) - # # Manual cost is formed by the sum of the current state/ctrls costs and the future costs. - # manual_cost = compute_cost(q_cost, time_range, xhs[:, tt], uh_tt) - # println(tt, " - manual 0 -> ", manual_cost) - # manual_cost += compute_cost(future_costs[tt+1], time_range, xhs[:, tt+1], uh_ttp1) - # println("manual -> ", manual_cost) - # computed_cost = compute_cost(future_costs[tt], time_range, xhs[:, tt], uh_tt) - # println(computed_cost) - # @test manual_cost ≈ computed_cost - # end - # end + @test manual_cost ≈ computed_cost + end + end # # Ensure that the feedback solution satisfies Nash conditions of optimality # # for each player, holding others' strategies fixed. # # Note: This test, as formulated, allows some false positive cases. See Basar and Olsder (Eq. 3.22) for the exact # # conditions. # @testset "CheckFeedbackSatisfiesNash" begin - # Ps, _ = solve_lq_nash_feedback(dyn, costs, horizon) - # xs, us = unroll_feedback(dyn, times, FeedbackGainControlStrategy(Ps), x₁) + # ctrl_strat_Ps, _ = solve_lq_nash_feedback(dyn, costs, horizon) + # xs, us = unroll_feedback(dyn, times, ctrl_strat_Ps, x₁) # nash_costs = [evaluate(c, xs, us) for c in costs] # # Perturb each strategy a little bit and confirm that cost only @@ -269,55 +124,53 @@ seed!(0) # ϵ = 1e-1 # for ii in 1:2 # for tt in 1:horizon - # P̃s = deepcopy(Ps) - # P̃s[ii][:, :, tt] += ϵ * randn(udim(dyn, ii), xhdim(dyn)) - # x̃s, ũs = unroll_feedback(dyn, times, FeedbackGainControlStrategy(P̃s), x₁) + # ctrl_strat_P̃s = deepcopy(ctrl_strat_Ps) + # ctrl_strat_P̃s.Ps[ii][:, :, tt] += ϵ * randn(udim(dyn, ii), xdim(dyn)) + # ctrl_strat_P̃s.ps[ii][:, tt] += ϵ * randn(udim(dyn, ii)) + + # x̃s, ũs = unroll_feedback(dyn, times, ctrl_strat_P̃s, x₁) # new_nash_costs = [evaluate(c, x̃s, ũs) for c in costs] # @test new_nash_costs[ii] ≥ nash_costs[ii] # end # end # end + # # Ensure that the costs match up at each time step with manually calculate cost matrices. # @testset "CheckNashCostsAreConsistentAtEquilibrium" begin - # Ps, future_costs = solve_lq_nash_feedback(dyn, costs, horizon) - # xs, us = unroll_feedback(dyn, times, FeedbackGainControlStrategy(Ps), x₁) + # ctrl_strat_Ps, future_costs = solve_lq_nash_feedback(dyn, costs, horizon) + # xs, us = unroll_feedback(dyn, times, ctrl_strat_Ps, x₁) # # Compute the costs using the t+1 cost matrix and compare with the cost using the cost matrix at time t. # num_players = num_agents(dyn) - # # Homgenize states and controls. - # xhs = homogenize_vector(xs) - # for ii in 1:2 # for tt in 1:horizon-1 # time_range = (tt, tt+1) # u_tt = [us[ii][:, tt] for ii in 1:num_players] - # uh_tt = homogenize_ctrls(dyn, u_tt) - # u_ttp1 = [us[ii][:, tt+1] for ii in 1:num_players] - # uh_ttp1 = homogenize_ctrls(dyn, u_ttp1) # # TODO(hamzah) Fix discrepancy in extra cost in quad cost. # # Manual cost is formed by the sum of the current state/ctrls costs and the future costs. - # manual_cost = compute_cost(costs[ii], time_range, xhs[:, tt], uh_tt) - # manual_cost += compute_cost(future_costs[ii][tt+1], time_range, xhs[:, tt+1], uh_ttp1) - # computed_cost = compute_cost(future_costs[ii][tt], time_range, xhs[:, tt], uh_tt) + # manual_cost = compute_cost(costs[ii], time_range, xs[:, tt], u_tt) + # manual_cost += compute_cost(future_costs[ii][tt+1], time_range, xs[:, tt+1], u_ttp1) + # computed_cost = compute_cost(future_costs[ii][tt], time_range, xs[:, tt], u_tt) + # @test manual_cost ≈ computed_cost # end # end # end - # Ensure that the feedback solution satisfies Stackelberg conditions of optimality - # for player 1, holding others' strategies fixed. - # Note: This test, as formulated, allows some false positive cases. See Khan and Fridovich-Keil 2023 for the exact - # conditions. + # # Ensure that the feedback solution satisfies Stackelberg conditions of optimality + # # for player 1, holding others' strategies fixed. + # # Note: This test, as formulated, allows some false positive cases. See Khan and Fridovich-Keil 2023 for the exact + # # conditions. # @testset "CheckFeedbackSatisfiesStackelbergEquilibriumForLeader" begin - # Ss, future_costs = solve_lq_stackelberg_feedback(dyn, costs, horizon, stackelberg_leader_idx) - # xs, us = unroll_feedback(dyn, times, FeedbackGainControlStrategy(Ss), x₁) + # ctrl_strat_Ss, future_costs = solve_lq_stackelberg_feedback(dyn, costs, horizon, stackelberg_leader_idx) + # xs, us = unroll_feedback(dyn, times, ctrl_strat_Ss, x₁) # optimal_stackelberg_costs = [evaluate(c, xs, us) for c in costs] # # Define some useful constants. @@ -326,15 +179,15 @@ seed!(0) # follower_idx = 3 - stackelberg_leader_idx # num_players = num_agents(dyn) - # # Homgenize states and controls. - # xhs = homogenize_vector(xs) - # for tt in horizon-1:-1:1 # time_range = (tt, tt+1) # # Copy the things we will alter. # ũs = deepcopy(us) + # # In this test, we need to solve the "optimal control" problem for the second player given player 1's + # # control. + # # Perturb the leader input u1 at the current time. # ũs[leader_idx][:, tt] += ϵ * randn(udim(dyn, leader_idx)) # ũhs = homogenize_ctrls(dyn, ũs) @@ -345,27 +198,29 @@ seed!(0) # L₂_ttp1 = get_homogenized_state_cost_matrix(future_costs[follower_idx][tt+1]) # G = get_homogenized_control_cost_matrix(costs[follower_idx], follower_idx) + B₂' * L₂_ttp1 * B₂ + # # Homogenize states and controls. + # xhs = homogenize_vector(xs) + # B₁ = get_homogenized_control_dynamics_matrix(dyn, leader_idx) # ũh1ₜ = ũhs[leader_idx][:, tt] # ũh2ₜ = - G \ (B₂' * L₂_ttp1 * (A * xhs[:,tt] + B₁ * ũh1ₜ)) - # ũh_tt = [ũh1ₜ, ũh2ₜ] + # ũ_tt = [ũh1ₜ[1:udim(dyn, 1)], ũh2ₜ[1:udim(dyn, 2)]] # ũhs[follower_idx][:, tt] = ũh2ₜ # # The cost of the solution trajectory, computed as x_t^T * L^1_tt x_t for at time tt. # # We test the accuracy of this cost in `CheckStackelbergCostsAreConsistentAtEquilibrium`. - # opt_P1_cost = compute_cost(future_costs[leader_idx][tt], time_range, xhs[:, tt], ũh_tt) + # opt_P1_cost = compute_cost(future_costs[leader_idx][tt], time_range, xs[:, tt], ũ_tt) # # Compute the homogenized controls for time tt+1. - # uh_ttp1 = [ũhs[ii][:, tt+1] for ii in 1:num_players] + # u_ttp1 = [ũhs[ii][:, tt+1][1:udim(dyn, ii)] for ii in 1:num_players] # # The cost computed manually for perturbed inputs using # # x_t^T Q_t x_t^T + ... + + ... + x_{t+1}^T * L^1_{t+1} x_{t+1}. - # state_and_controls_cost = compute_cost(costs[leader_idx], time_range, xhs[:, tt], ũh_tt) + # state_and_controls_cost = compute_cost(costs[leader_idx], time_range, xs[:, tt], ũ_tt) # ũ_tt = [ũs[ii][:, tt] for ii in 1:num_players] - # xhₜ₊₁ = propagate_dynamics(dyn, time_range, xs[:, tt], ũ_tt) - # x̃hₜ₊₁ = homogenize_vector(xhₜ₊₁) - # future_cost = compute_cost(future_costs[leader_idx][tt+1], time_range, x̃hₜ₊₁, uh_ttp1) + # xₜ₊₁ = propagate_dynamics(dyn, time_range, xs[:, tt], ũ_tt) + # future_cost = compute_cost(future_costs[leader_idx][tt+1], time_range, xₜ₊₁, u_ttp1) # new_P1_cost = state_and_controls_cost + future_cost # # The costs from time t+1 of the perturbed and optimal trajectories should also satisfy this condition. @@ -384,8 +239,8 @@ seed!(0) # # Ensure that the feedback solution satisfies Stackelberg conditions of optimality # # for player 2, holding others' strategies fixed. # @testset "CheckFeedbackSatisfiesStackelbergEquilibriumForFollower" begin - # Ss, Ls = solve_lq_stackelberg_feedback(dyn, costs, horizon, stackelberg_leader_idx) - # xs, us = unroll_feedback(dyn, times, FeedbackGainControlStrategy(Ss), x₁) + # ctrl_strat_Ss, future_costs = solve_lq_stackelberg_feedback(dyn, costs, horizon, stackelberg_leader_idx) + # xs, us = unroll_feedback(dyn, times, ctrl_strat_Ss, x₁) # optimal_stackelberg_costs = [evaluate(c, xs, us) for c in costs] # # Define some useful constants. @@ -398,10 +253,11 @@ seed!(0) # leader_idx = 1 # follower_idx = 2 # for tt in 1:horizon-1 - # P̃s = deepcopy(Ss) - # P̃s[follower_idx][:, :, tt] += ϵ * randn(udim(dyn, follower_idx), xhdim(dyn)) + # ctrl_strat_S̃s = deepcopy(ctrl_strat_Ss) + # ctrl_strat_S̃s.Ps[follower_idx][:, :, tt] += ϵ * randn(udim(dyn, follower_idx), xdim(dyn)) + # ctrl_strat_S̃s.ps[follower_idx][:, tt] += ϵ * randn(udim(dyn, follower_idx)) - # x̃s, ũs = unroll_feedback(dyn, times, FeedbackGainControlStrategy(P̃s), x₁) + # x̃s, ũs = unroll_feedback(dyn, times, ctrl_strat_S̃s, x₁) # new_stack_costs = [evaluate(c, x̃s, ũs) for c in costs] # @test new_stack_costs[follower_idx] ≥ optimal_stackelberg_costs[follower_idx] # end @@ -410,8 +266,8 @@ seed!(0) # # Ensure that the costs match up at each time step with manually calculate cost matrices. # @testset "CheckStackelbergCostsAreConsistentAtEquilibrium" begin - # Ss, future_costs = solve_lq_stackelberg_feedback(dyn, costs, horizon, stackelberg_leader_idx) - # xs, us = unroll_feedback(dyn, times, FeedbackGainControlStrategy(Ss), x₁) + # ctrl_strat_Ss, future_costs = solve_lq_stackelberg_feedback(dyn, costs, horizon, stackelberg_leader_idx) + # xs, us = unroll_feedback(dyn, times, ctrl_strat_Ss, x₁) # # For each player, compute the costs using the t+1 cost matrix and compare with the cost using the cost matrix # # at time t. @@ -419,26 +275,23 @@ seed!(0) # # Compute the costs using the t+1 cost matrix and compare with the cost using the cost matrix at time t. # num_players = num_agents(dyn) - # # Homgenize states and controls. - # xhs = homogenize_vector(xs) - # for ii in 1:2 # jj = 3 - ii # for tt in 1:horizon-1 # time_range = (tt, tt+1) # u_tt = [us[ii][:, tt] for ii in 1:num_players] - # uh_tt = homogenize_ctrls(dyn, u_tt) + # # uh_tt = homogenize_ctrls(dyn, u_tt) # u_ttp1 = [us[ii][:, tt+1] for ii in 1:num_players] - # uh_ttp1 = homogenize_ctrls(dyn, u_ttp1) + # # uh_ttp1 = homogenize_ctrls(dyn, u_ttp1) # # TODO(hamzah) Fix discrepancy in extra cost in quad cost. - # state_and_control_costs = compute_cost(costs[ii], time_range, xhs[:, tt], uh_tt) - # future_cost = compute_cost(future_costs[ii][tt+1], time_range, xhs[:, tt+1], uh_ttp1) + # state_and_control_costs = compute_cost(costs[ii], time_range, xs[:, tt], u_tt) + # future_cost = compute_cost(future_costs[ii][tt+1], time_range, xs[:, tt+1], u_ttp1) # manual_cost = state_and_control_costs + future_cost - # computed_cost = compute_cost(future_costs[ii][tt], time_range, xhs[:, tt], uh_tt) + # computed_cost = compute_cost(future_costs[ii][tt], time_range, xs[:, tt], u_tt) # # The manually recursion at time t should match the computed L cost at time t. # @test manual_cost ≈ computed_cost