From 4e843bb1b72066f261cb144b96a916b52d546d6a Mon Sep 17 00:00:00 2001
From: Hamzah Khan <2308491+khanh111@users.noreply.github.com>
Date: Fri, 3 Mar 2023 10:10:51 -0600
Subject: [PATCH 1/4] [in progress] Implement a derived LQR Solver and add
 tests to validate it and the original one.

---
 src/StackelbergControlHypothesesFiltering.jl  |   2 +
 .../FeedbackGainControlStrategy.jl            |  10 +-
 src/costs/QuadraticCost.jl                    |  11 +-
 src/dynamics/LinearDynamics.jl                |   8 +-
 src/solvers/LQRFeedbackSolver.jl              |  11 +-
 src/solvers/NewLQRFeedbackSolver.jl           | 121 +++++
 test/TestLQSolvers.jl                         | 514 +++++++++++++-----
 test/runtests.jl                              |   4 +-
 8 files changed, 519 insertions(+), 162 deletions(-)
 create mode 100644 src/solvers/NewLQRFeedbackSolver.jl

diff --git a/src/StackelbergControlHypothesesFiltering.jl b/src/StackelbergControlHypothesesFiltering.jl
index 64c887a8..277aaf79 100644
--- a/src/StackelbergControlHypothesesFiltering.jl
+++ b/src/StackelbergControlHypothesesFiltering.jl
@@ -19,6 +19,8 @@ include("solvers/LQNashFeedbackSolver.jl")
 include("solvers/LQStackelbergFeedbackSolver.jl")
 include("solvers/LQRFeedbackSolver.jl")
 
+include("solvers/NewLQRFeedbackSolver.jl")
+
 include("TwoStateParticleFilter.jl")
 
 end # module
diff --git a/src/control_strategies/FeedbackGainControlStrategy.jl b/src/control_strategies/FeedbackGainControlStrategy.jl
index f2c284b1..c192e05b 100644
--- a/src/control_strategies/FeedbackGainControlStrategy.jl
+++ b/src/control_strategies/FeedbackGainControlStrategy.jl
@@ -21,21 +21,21 @@ end
 
 
 # Define some getters.
-function get_linear_feedback_term(strategy::FeedbackGainControlStrategy)
+function get_linear_feedback_gains(strategy::FeedbackGainControlStrategy)
     return strategy.Ps
 end
 
-function get_linear_feedback_term(strategy::FeedbackGainControlStrategy, player_idx::Int)
+function get_linear_feedback_gains(strategy::FeedbackGainControlStrategy, player_idx::Int)
     return strategy.Ps[player_idx]
 end
 
-function get_constant_feedback_term(strategy::FeedbackGainControlStrategy)
+function get_constant_feedback_gains(strategy::FeedbackGainControlStrategy)
     return strategy.ps
 end
 
-function get_constant_feedback_term(strategy::FeedbackGainControlStrategy, player_idx::Int)
+function get_constant_feedback_gains(strategy::FeedbackGainControlStrategy, player_idx::Int)
     return strategy.ps[player_idx]
 end
 
 # Export a commonly used control strategy for convenience and its required method.
-export FeedbackGainControlStrategy, apply_control_strategy, get_linear_feedback_terms, get_constant_feedback_terms
+export FeedbackGainControlStrategy, apply_control_strategy, get_linear_feedback_gains, get_constant_feedback_gains
diff --git a/src/costs/QuadraticCost.jl b/src/costs/QuadraticCost.jl
index a62a5995..92dbccf1 100644
--- a/src/costs/QuadraticCost.jl
+++ b/src/costs/QuadraticCost.jl
@@ -34,7 +34,7 @@ function quadraticize_costs(c::QuadraticCost, time_range, x::AbstractVector{Floa
     # Fill control costs.
     num_players = length(c.Rs)
     for ii in 1:num_players
-        R̃ = homogenize_cost_matrix(c.Rs[ii], c.rs[ii], c.crs[ii];)
+        R̃ = homogenize_cost_matrix(c.Rs[ii], c.rs[ii], c.crs[ii])
         add_control_cost!(q_cost, ii, R̃)
     end
 
@@ -44,9 +44,8 @@ end
 function compute_cost(c::QuadraticCost, time_range, xh::AbstractVector{Float64}, uhs::AbstractVector{<:AbstractVector{Float64}})
     num_players = length(uhs)
     out_size = size(c.Q, 1)
-    x = xh[1:out_size]
-    ctrl_sizes = [size(c.rs[ii], 1) for ii in 1:num_players]
-    us = [uhs[ii][1:ctrl_sizes[ii]] for ii in 1:num_players]
+    x = xh[1:size(xh,1)-1]
+    us = [uhs[ii][1:size(uhs[ii], 1)-1] for ii in 1:num_players]
 
     total = (1/2.) * (x' * c.Q * x + 2 * c.q' * x + c.cq)
     if !isempty(c.Rs)
@@ -92,6 +91,10 @@ function get_constant_control_cost_term(c::QuadraticCost, player_idx::Int)
     return c.crs[player_idx]
 end
 
+# Export the helpers.
+export get_quadratic_state_cost_term, get_linear_state_cost_term, get_constant_state_cost_term,
+       get_quadratic_control_cost_term, get_linear_control_cost_term, get_constant_control_cost_term
+
 
 # Export all the cost type.
 export QuadraticCost
diff --git a/src/dynamics/LinearDynamics.jl b/src/dynamics/LinearDynamics.jl
index 43614309..e4b810dd 100644
--- a/src/dynamics/LinearDynamics.jl
+++ b/src/dynamics/LinearDynamics.jl
@@ -34,14 +34,18 @@ export get_homogenized_state_dynamics_matrix, get_homogenized_control_dynamics_m
 
 
 # Get the linear term.
-function get_linear_dynamics_term(dyn::Dynamics)
+function get_linear_state_dynamics(dyn::Dynamics)
     return dyn.A
 end
 
-function get_constant_dynamics_term(dyn::Dynamics)
+function get_constant_state_dynamics(dyn::Dynamics)
     return dyn.a
 end
 
+function get_control_dynamics(dyn::Dynamics, player_idx::Int)
+    return dyn.Bs[player_idx]
+end
+
 
 # A function definition that does not accept process noise input and reroutes to the type-specific propagate_dynamics that does.
 function propagate_dynamics(dyn::Dynamics,
diff --git a/src/solvers/LQRFeedbackSolver.jl b/src/solvers/LQRFeedbackSolver.jl
index 118f9a27..731e21b0 100644
--- a/src/solvers/LQRFeedbackSolver.jl
+++ b/src/solvers/LQRFeedbackSolver.jl
@@ -50,13 +50,20 @@ function solve_lqr_feedback(dyns::AbstractVector{LinearDynamics}, costs::Abstrac
 
         # Solve the LQR using induction and optimizing the quadratic cost for P and Z.
         r_terms = R + B' * Zₜ₊₁ * B
+        # println(tt, " - old r_terms", r_terms)
 
         # This is equivalent to inv(r_terms) * B' * Zₜ₊₁ * A
         Ps[:, :, tt] = r_terms \ B' * Zₜ₊₁ * A
+        # println(tt, " - old divider, ", B' * Zₜ₊₁ * A)
+        println(tt, " - old P: ",  Ps[1, 1:2, tt])
+        println(tt, " - old p: ",  Ps[1, 3, tt])
         
         # Update Zₜ₊₁ at t+1 to be the one at t as we go to t-1.
-        Zₜ₊₁ = Q + A' * Zₜ₊₁ * A - A' * Zₜ₊₁ * B * Ps[:, :, tt]
-        Zs[:, :, tt] = Zₜ₊₁
+        Zₜ₊₁ = Q +  A' * Zₜ₊₁ * (A - B * Ps[:, :, tt])
+        println(tt, " - old Z: ", Zₜ₊₁[1:2, 1:2])
+        println(tt, " - old z: ", Zₜ₊₁[1:2, 3], " ", Zₜ₊₁[3, 1:2]', " ", Zₜ₊₁[1:2, 3] == Zₜ₊₁[3, 1:2]')
+        println(tt, " - old cz: ", Zₜ₊₁[3, 3])
+        Zs[:, :, tt] = Zₜ₊₁ # 0.5 * (Zₜ₊₁ + Zₜ₊₁')
     end
 
     # Cut off the extra dimension of the homgenized coordinates system.
diff --git a/src/solvers/NewLQRFeedbackSolver.jl b/src/solvers/NewLQRFeedbackSolver.jl
new file mode 100644
index 00000000..003fa4d1
--- /dev/null
+++ b/src/solvers/NewLQRFeedbackSolver.jl
@@ -0,0 +1,121 @@
+using LinearAlgebra
+
+# Solve a finite horizon, discrete time LQR problem.
+# Returns feedback matrices P[:, :, time].
+
+# Shorthand function for LQ time-invariant dynamics and costs.
+function solve_lqr_feedback(dyn::LinearDynamics, cost::QuadraticCost, horizon::Int)
+    dyns = [dyn for _ in 1:horizon]
+    costs = [cost for _ in 1:horizon]
+    return solve_lqr_feedback(dyns, costs, horizon)
+end
+
+function solve_lqr_feedback(dyn::LinearDynamics, costs::AbstractVector{QuadraticCost}, horizon::Int)
+    dyns = [dyn for _ in 1:horizon]
+    return solve_lqr_feedback(dyns, costs, horizon)
+end
+
+function solve_lqr_feedback(dyns::AbstractVector{LinearDynamics}, cost::QuadraticCost, horizon::Int)
+    costs = [cost for _ in 1:horizon]
+    return solve_lqr_feedback(dyns, costs, horizon)
+end
+
+function solve_lqr_feedback(dyns::AbstractVector{LinearDynamics}, costs::AbstractVector{QuadraticCost}, horizon::Int)
+
+    # Ensure the number of dynamics and costs are the same as the horizon.
+    @assert !isempty(dyns) && size(dyns, 1) == horizon
+    @assert !isempty(costs) && size(costs, 1) == horizon
+
+    # Note: There should only be one "player" for an LQR problem.
+    num_states = xdim(dyns[1])
+    num_ctrls = udim(dyns[1], 1)
+
+    Ps = zeros((num_ctrls, num_states, horizon))
+    ps = zeros((num_ctrls, horizon))
+    Zs = zeros((num_states, num_states, horizon))
+    zs = zeros((num_states, horizon))
+    czs = zeros(horizon)
+
+    Zₜ₊₁ = get_quadratic_state_cost_term(costs[horizon])
+    zₜ₊₁ = get_linear_state_cost_term(costs[horizon])
+    czₜ₊₁ = get_constant_state_cost_term(costs[horizon])
+    Zs[:, :, horizon] = Zₜ₊₁
+
+    # base case
+    if horizon == 1
+        return Ps, costs[horizon]
+    end
+
+    # At each horizon running backwards, solve the LQR problem inductively.
+    for tt in horizon:-1:1
+        A = get_linear_state_dynamics(dyns[tt])
+        a = get_constant_state_dynamics(dyns[tt])
+        B = get_control_dynamics(dyns[tt], 1)
+
+        Q = get_quadratic_state_cost_term(costs[tt])
+        q = get_linear_state_cost_term(costs[tt])
+        cq = get_constant_state_cost_term(costs[tt])
+        R = get_quadratic_control_cost_term(costs[tt], 1)
+        r = get_linear_control_cost_term(costs[tt], 1)
+        cr = get_constant_control_cost_term(costs[tt], 1)
+
+        # Solve the LQR using induction and optimizing the quadratic cost for P and Z.
+        r_terms = R + B' * Zₜ₊₁ * B
+
+        # This solves a constrained system to identify the feedback gain and constant gain.
+        lhs = vcat(hcat(r_terms, r), hcat(r', cr))
+        rhs = vcat(hcat(B' * Zₜ₊₁ * A,  B' * (Zₜ₊₁ * a + zₜ₊₁)), zeros(1, num_states+1))
+        Pp = lhs \ rhs
+        # println(Pp)
+        # Ps[:, :, tt] = r_terms \ (B' * Zₜ₊₁ * A)
+        Ps[:, :, tt] = Pp[1:num_ctrls, 1:num_states][:,:]
+        ps[:, tt] = Pp[1:num_ctrls, num_states+1]
+        # = r' \ (B' * zₜ₊₁ + B' * Zₜ₊₁ * a)
+        # r_terms \ (r + B' * zₜ₊₁ + B' * Zₜ₊₁ * a)
+        # r_terms \ (B' * zₜ₊₁ + r + B' * Zₜ₊₁ * a)
+
+
+        # println(tt, " - new r_terms", r_terms)
+        # println(tt, " - new divider 1: ", B' * Zₜ₊₁ * A)
+        # println(tt, " - new divider 2: ", r + B' * zₜ₊₁ + B' * Zₜ₊₁ * a)
+        
+        # Update feedback and cost terms.
+        Pₜ = Ps[:, :, tt]
+        pₜ = ps[:, tt]
+        println(tt, " - new P: ", Pₜ)
+        println(tt, " - new p: ", pₜ)
+        Zₜ = Q + Pₜ' * R * Pₜ + (A - B * Pₜ)' * Zₜ₊₁ * (A - B * Pₜ)
+        zₜ = q 
+        zₜ += Pₜ' * (R * pₜ - r)
+        zₜ += (A - B * Pₜ)' * Zₜ₊₁ * (a - B * pₜ)
+        zₜ += (A - B * Pₜ)' * zₜ₊₁
+        czₜ = czₜ₊₁ + cq
+        czₜ += pₜ' * (R * pₜ - 2 * r)
+        czₜ += ((a - B * pₜ)' * Zₜ₊₁ + 2 * zₜ₊₁') * (a - B * pₜ)
+        println(tt, " - new Z: ", Zₜ)
+        println(tt, " - new z: ", zₜ)
+        println(tt, " - new cz: ", czₜ)
+
+        # zₜ = Pₜ' * R * pₜ + (A - B * Pₜ)' * (zₜ₊₁ + Zₜ₊₁ * (a - B * pₜ) )
+        # czₜ = pₜ' * (R + B' * Zₜ₊₁ * B) * pₜ - 2 * zₜ₊₁' * B * pₜ
+        # czₜ += 2 * (zₜ₊₁' - pₜ' * B' * Zₜ₊₁) * a + a' * Zₜ₊₁ * a
+
+        # czₜ = pₜ' * (R + B' * Zₜ₊₁ * B) * pₜ 
+        # czₜ -= 2 * zₜ₊₁' * B * pₜ 
+        # czₜ += (2 * zₜ₊₁ + Zₜ₊₁ * (a - B * pₜ))' * a
+
+        Zs[:, :, tt] = Zₜ # 0.5 * (Zₜ' + Zₜ)
+        zs[:, tt] = zₜ
+        czs[tt] = czₜ₊₁
+
+        Zₜ₊₁ = Zₜ
+        zₜ₊₁ = zₜ
+        czₜ₊₁ = czₜ
+    end
+
+    Ps_feedback_strategies = FeedbackGainControlStrategy([Ps], [ps])
+    Zs_future_costs = [QuadraticCost(Zs[:, :, tt], zs[:, tt], czs[tt]) for tt in 1:horizon]
+    return Ps_feedback_strategies, Zs_future_costs
+end
+
+export solve_lqr_feedback
diff --git a/test/TestLQSolvers.jl b/test/TestLQSolvers.jl
index 2db44140..2b997a92 100644
--- a/test/TestLQSolvers.jl
+++ b/test/TestLQSolvers.jl
@@ -5,6 +5,8 @@ using LinearAlgebra
 using Random: seed!
 using Test: @test, @testset
 
+include("TestUtils.jl")
+
 seed!(0)
 
 @testset "TestLQSolvers" begin
@@ -46,197 +48,415 @@ seed!(0)
     costs = [quadraticize_costs(c₁, dummy_time_range, dummy_x, dummy_us),
              quadraticize_costs(c₂, dummy_time_range, dummy_x, dummy_us)]
 
-    # Ensure that the feedback solution satisfies Nash conditions of optimality
-    # for each player, holding others' strategies fixed.
-    # Note: This test, as formulated, allows some false positive cases. See Basar and Olsder (Eq. 3.22) for the exact
-    #       conditions.
-    @testset "CheckFeedbackSatisfiesNash" begin
-        Ps, _ = solve_lq_nash_feedback(dyn, costs, horizon)
-        xs, us = unroll_feedback(dyn, times, FeedbackGainControlStrategy(Ps), x₁)
-        nash_costs = [evaluate(c, xs, us) for c in costs]
+    # @testset "CheckPureNonpureLQRMatch" begin
+    #     num_loops = 1
+    #     horizon_lqr = 3
+    #     dt = 0.02
+    #     times_lqr = dt * (cumsum(ones(horizon_lqr)) .- 1.)
+
+    #     for _ in 1:num_loops
+    #         # # This is the same LQR problem that MATLAB has as an example - dt = 0.02s and it's a double integrator with
+    #         # # state [x ẋ] and input control α acceleration. We've added a constrant drift of +1 m and +1 m/s in dynamics.
+    #         # dt = 0.01
+    #         # num_states = 4
+    #         # num_ctrls = 2
+    #         # A = rand(num_states, num_states) 
+    #         # B = rand(num_states, num_ctrls)
+    #         # a = rand(num_states)
+    #         # lin_dyn = LinearDynamics(A, [B]; a=a)
+
+    #         # Q = make_symmetric_pos_def_matrix(num_states)
+    #         # q = zeros(num_states)
+    #         # cq = 0. #rand()
+    #         # R = make_symmetric_pos_def_matrix(num_ctrls)
+    #         # r = zeros(num_ctrls)
+    #         # cr = 0. #rand()
+
+    #         # This is the same LQR problem that MATLAB has as an example - dt = 0.02s and it's a double integrator with
+    #         # state [x ẋ] and input control α acceleration. We've added a constrant drift of +1 m and +1 m/s in dynamics.
+    #         num_states = 2
+    #         lin_dyn = LinearDynamics([1. dt; 0 1.], [[0.; dt][:,:]]; a=ones(2))
+    #         q_cost = QuadraticCost([1. 0.; 0. 0.], ones(2), 12.)
+    #         add_control_cost!(q_cost, 1, ones(1, 1); r=2*ones(1), cr=2.)
+
+    #         # lin_dyn = LinearDynamics([1. dt; 0 1.], [[0.; dt][:,:]])
+    #         # q_cost = QuadraticCost([1. 0.; 0. 0.])
+    #         # add_control_cost!(q_cost, 1, ones(1, 1))
+
+    #         # q_cost = QuadraticCost(Q, q, cq)
+    #         # add_control_cost!(q_cost, 1, R; r=r, cr=cr)
+
+    #         new_x₁ = ones(num_states)
+
+    #         # New LQR
+    #         strategy, _ = solve_lqr_feedback(lin_dyn, q_cost, horizon_lqr)
+    #         xs, us = unroll_feedback(lin_dyn, times_lqr, strategy, new_x₁)
+    #         eval_cost = evaluate(q_cost, xs, us)
+
+    #         # OG homogenized LQR
+    #         p_cost = quadraticize_costs(q_cost, dummy_time_range, dummy_x, dummy_us)
+
+    #         # Ensure no dumb mistakes with cost input type.
+    #         @test evaluate(q_cost, ones(2, 1), [ones(1,1)]) ≈ evaluate(p_cost, ones(2, 1), [ones(1,1)])
+
+    #         strategy_p, _ = solve_lqr_feedback(lin_dyn, p_cost, horizon_lqr)
+    #         strategy_p = FeedbackGainControlStrategy([strategy_p])
+    #         xs_p, us_p = unroll_feedback(lin_dyn, times_lqr, strategy_p, new_x₁)
+    #         eval_cost_p = evaluate(p_cost, xs_p, us_p)
+
+    #         @test eval_cost ≈ eval_cost_p
+    #         println(xs - xs_p)
+    #         @test all(xs .≈ xs_p)
+    #         @test all(us .≈ us_p)
+    #     end
+    # end
+
+    @testset "CheckFeedbackSatisfiesLQROptimum" begin
+        # This is similar to the LQR problem that MATLAB has as an example - dt = 0.02s and it's a double integrator
+        # with state [x ẋ] and input control α acceleration. We've added a constrant drift of +1 m and +1 m/s in
+        # dynamics.
+        dt = 0.02
+        lin_dyn = LinearDynamics([1. dt; 0 1.], [[0.; dt][:,:]]; a=ones(2))
+        q_cost = QuadraticCost([1. 0.; 0. 0.], zeros(2), 1.)
+        add_control_cost!(q_cost, 1, ones(1, 1); r=zeros(1), cr=2.)
+
+
+        # lin_dyn = LinearDynamics([1. dt; 0 1.], [[0.; dt][:,:]])
+        # q_cost = QuadraticCost([1. 0.; 0. 0.])
+        # add_control_cost!(q_cost, 1, ones(1, 1))
+        new_x₁ = ones(2)
+
+        strategy, _ = solve_lqr_feedback(lin_dyn, q_cost, horizon)
+        xs, us = unroll_feedback(lin_dyn, times, strategy, new_x₁)
+        eval_cost = evaluate(q_cost, xs, us)
 
         # Perturb each strategy a little bit and confirm that cost only
         # increases for that player.
         ϵ = 1e-1
-        for ii in 1:2
-            for tt in 1:horizon
-                P̃s = deepcopy(Ps)
-                P̃s[ii][:, :, tt] += ϵ * randn(udim(dyn, ii), xhdim(dyn))
-
-                x̃s, ũs = unroll_feedback(dyn, times, FeedbackGainControlStrategy(P̃s), x₁)
-                new_nash_costs = [evaluate(c, x̃s, ũs) for c in costs]
-                @test new_nash_costs[ii] ≥ nash_costs[ii]
-            end
+        for tt in 1:horizon
+            P̃s = deepcopy(get_linear_feedback_gains(strategy, 1))
+            p̃s = deepcopy(get_constant_feedback_gains(strategy, 1))
+            P̃s[:, :, tt] += ϵ * randn(udim(lin_dyn, 1), xdim(lin_dyn))
+            # p̃s[:, tt] += ϵ * randn(udim(lin_dyn, 1))
+
+            x̃s, ũs = unroll_feedback(lin_dyn, times, FeedbackGainControlStrategy([P̃s], [p̃s]), new_x₁)
+            new_eval_cost = evaluate(q_cost, x̃s, ũs)
+            @test new_eval_cost ≥ eval_cost
         end
     end
 
+    # # Ensure that the costs match up at each time step with manually calculate cost matrices.
+    # @testset "CheckLQRCostsAreConsistentAtEquilibrium" begin
+    #     # This is the same LQR problem that MATLAB has as an example - dt = 0.02s and it's a double integrator with
+    #     # state [x ẋ] and input control α acceleration.
+    #     dt = 0.01
+    #     lin_dyn = LinearDynamics([1. dt; 0 1.], [[0.; dt][:,:]]; a=ones(2))
+    #     q_cost = QuadraticCost([1. 0.; 0. 0.])
+    #     add_control_cost!(q_cost, 1, ones(1, 1))
+    #     new_x₁ = ones(2)
 
-    # Ensure that the costs match up at each time step with manually calculate cost matrices.
-    @testset "CheckNashCostsAreConsistentAtEquilibrium" begin
-        Ps, future_costs = solve_lq_nash_feedback(dyn, costs, horizon)
-        xs, us = unroll_feedback(dyn, times, FeedbackGainControlStrategy(Ps), x₁)
+    #     strategy, future_costs = solve_lqr_feedback(lin_dyn, q_cost, horizon)
+    #     xs, us = unroll_feedback(lin_dyn, times, strategy, new_x₁)
+    #     eval_cost = evaluate(q_cost, xs, us)
 
-        # Compute the costs using the t+1 cost matrix and compare with the cost using the cost matrix at time t.
-        num_players = num_agents(dyn)
+    #     # Compute the costs using the t+1 cost matrix and compare with the cost using the cost matrix at time t.
+    #     num_players = num_agents(lin_dyn)
 
-        # Homgenize states and controls.
-        xhs = homogenize_vector(xs)
+    #     # Homgenize states and controls.
+    #     xhs = homogenize_vector(xs)
 
-        for ii in 1:2
-            for tt in 1:horizon-1
-                time_range = (tt, tt+1)
+    #     for tt in 1:horizon-1
+    #         time_range = (tt, tt+1)
 
-                u_tt = [us[ii][:, tt] for ii in 1:num_players]
-                uh_tt = homogenize_ctrls(dyn, u_tt)
+    #         u_tt = us[1][:, tt]
+    #         uh_tt = homogenize_ctrls(lin_dyn, [u_tt])
 
-                u_ttp1 = [us[ii][:, tt+1] for ii in 1:num_players]
-                uh_ttp1 = homogenize_ctrls(dyn, u_ttp1)
+    #         u_ttp1 = us[1][:, tt+1]
+    #         uh_ttp1 = homogenize_ctrls(lin_dyn, [u_ttp1])
 
-                # TODO(hamzah) Fix discrepancy in extra cost in quad cost.
+    #         # TODO(hamzah) Fix discrepancy in extra cost in quad cost.
 
-                # Manual cost is formed by the sum of the current state/ctrls costs and the future costs.
-                manual_cost = compute_cost(costs[ii], time_range, xhs[:, tt], uh_tt)
-                manual_cost += compute_cost(future_costs[ii][tt+1], time_range, xhs[:, tt+1], uh_ttp1)
-                computed_cost = compute_cost(future_costs[ii][tt], time_range, xhs[:, tt], uh_tt)
+    #         # Manual cost is formed by the sum of the current state/ctrls costs and the future costs.
+    #         manual_cost = compute_cost(q_cost, time_range, xhs[:, tt], uh_tt)
+    #         manual_cost += compute_cost(future_costs[tt+1], time_range, xhs[:, tt+1], uh_ttp1)
+    #         computed_cost = compute_cost(future_costs[tt], time_range, xhs[:, tt], uh_tt)
 
-                @test manual_cost ≈ computed_cost
-            end
-        end
-    end
+    #         @test manual_cost ≈ computed_cost
+    #     end
+    # end
 
 
-    # Ensure that the feedback solution satisfies Stackelberg conditions of optimality
-    # for player 1, holding others' strategies fixed.
-    # Note: This test, as formulated, allows some false positive cases. See Khan and Fridovich-Keil 2023 for the exact
-    #       conditions.
-    @testset "CheckFeedbackSatisfiesStackelbergEquilibriumForLeader" begin
-        Ss, future_costs = solve_lq_stackelberg_feedback(dyn, costs, horizon, stackelberg_leader_idx)
-        xs, us = unroll_feedback(dyn, times, FeedbackGainControlStrategy(Ss), x₁)
-        optimal_stackelberg_costs = [evaluate(c, xs, us) for c in costs]
+    @testset "CheckPureFeedbackSatisfiesLQROptimum" begin
+        # This is the same LQR problem that MATLAB has as an example - dt = 0.02s and it's a double integrator with
+        # state [x ẋ] and input control α acceleration. We've added a constrant drift of +1 m and +1 m/s in dynamics.
+        dt = 0.02
+        lin_dyn = LinearDynamics([1. dt; 0 1.], [[0.; dt][:,:]]; a=ones(2))
+        q_cost = QuadraticCost([1. 0.; 0. 0.], zeros(2), 1.)
+        add_control_cost!(q_cost, 1, ones(1, 1); r=zeros(1), cr=2.)
 
-        # Define some useful constants.
-        ϵ = 1e-1
-        leader_idx = stackelberg_leader_idx
-        follower_idx = 3 - stackelberg_leader_idx
-        num_players = num_agents(dyn)
+        # lin_dyn = LinearDynamics([1. dt; 0 1.], [[0.; dt][:,:]])
+        # q_cost = QuadraticCost([1. 0.; 0. 0.])
+        # add_control_cost!(q_cost, 1, ones(1, 1))
+        new_x₁ = ones(2)
 
-        # Homgenize states and controls.
-        xhs = homogenize_vector(xs)
+        p_cost = quadraticize_costs(q_cost, dummy_time_range, dummy_x, dummy_us)
 
-        for tt in horizon-1:-1:1
-            time_range = (tt, tt+1)
+        strategy, _ = solve_lqr_feedback(lin_dyn, p_cost, horizon)
+        strategy = FeedbackGainControlStrategy([strategy])
+        xs, us = unroll_feedback(lin_dyn, times, strategy, new_x₁)
+        eval_cost = evaluate(p_cost, xs, us)
 
-            # Copy the things we will alter.
-            ũs = deepcopy(us)
+        # Perturb each strategy a little bit and confirm that cost only
+        # increases for that player.
+        ϵ = 1e-1
+        for tt in 1:horizon
+            P̃s = deepcopy(get_linear_feedback_gains(strategy, 1))
+            p̃s = deepcopy(get_constant_feedback_gains(strategy, 1))
+            P̃s[:, :, tt] += ϵ * randn(udim(lin_dyn, 1), xhdim(lin_dyn))
+            # p̃s[:, tt] += ϵ * randn(udim(lin_dyn, 1))
+
+            x̃s, ũs = unroll_feedback(lin_dyn, times, FeedbackGainControlStrategy([P̃s], [p̃s]), new_x₁)
+            new_eval_cost = evaluate(p_cost, x̃s, ũs)
+            @test new_eval_cost ≥ eval_cost
+        end
+    end
 
-            # Perturb the leader input u1 at the current time.
-            ũs[leader_idx][:, tt] += ϵ * randn(udim(dyn, leader_idx))
-            ũhs = homogenize_ctrls(dyn, ũs)
+    # Ensure that the costs match up at each time step with manually calculate cost matrices.
+    # @testset "CheckPureLQRCostsAreConsistentAtEquilibrium" begin
+    #     # This is the same LQR problem that MATLAB has as an example - dt = 0.02s and it's a double integrator with
+    #     # state [x ẋ] and input control α acceleration.
+    #     dt = 0.02
+    #     lin_dyn = LinearDynamics([1. dt; 0 1.], [[0.; dt][:,:]]; a=ones(2))
+    #     q_cost = QuadraticCost([1. 0.; 0. 0.])
+    #     add_control_cost!(q_cost, 1, ones(1, 1))
+    #     new_x₁ = ones(2)
 
-            # Re-solve for the optimal follower input given the perturbed leader trajectory.
-            A = get_homogenized_state_dynamics_matrix(dyn)
-            B₂ = get_homogenized_control_dynamics_matrix(dyn, follower_idx)
-            L₂_ttp1 = get_homogenized_state_cost_matrix(future_costs[follower_idx][tt+1])
-            G = get_homogenized_control_cost_matrix(costs[follower_idx], follower_idx) + B₂' * L₂_ttp1 * B₂
+    #     p_cost = quadraticize_costs(q_cost, dummy_time_range, dummy_x, dummy_us)
 
-            B₁ = get_homogenized_control_dynamics_matrix(dyn, leader_idx)
-            ũh1ₜ = ũhs[leader_idx][:, tt]
-            ũh2ₜ = - G \ (B₂' * L₂_ttp1 * (A * xhs[:,tt] + B₁ * ũh1ₜ))
-            ũh_tt = [ũh1ₜ, ũh2ₜ]
+    #     strategy, future_costs = solve_lqr_feedback(lin_dyn, p_cost, horizon)
+    #     xs, us = unroll_feedback(lin_dyn, times, strategy, new_x₁)
+    #     eval_cost = evaluate(p_cost, xs, us)
 
-            ũhs[follower_idx][:, tt] = ũh2ₜ
+    #     # Compute the costs using the t+1 cost matrix and compare with the cost using the cost matrix at time t.
+    #     num_players = num_agents(lin_dyn)
 
-            # The cost of the solution trajectory, computed as x_t^T * L^1_tt x_t for at time tt.
-            # We test the accuracy of this cost in `CheckStackelbergCostsAreConsistentAtEquilibrium`.
-            opt_P1_cost = compute_cost(future_costs[leader_idx][tt], time_range, xhs[:, tt], ũh_tt)
+    #     # Homgenize states and controls.
+    #     xhs = homogenize_vector(xs)
 
-            # Compute the homogenized controls for time tt+1.
-            uh_ttp1 = [ũhs[ii][:, tt+1] for ii in 1:num_players]
+    #     for tt in 1:horizon-1
+    #         time_range = (tt, tt+1)
 
-            # The cost computed manually for perturbed inputs using
-            # x_t^T Q_t x_t^T + ... + <control costs> + ... + x_{t+1}^T * L^1_{t+1} x_{t+1}.
-            state_and_controls_cost = compute_cost(costs[leader_idx], time_range, xhs[:, tt], ũh_tt)
-            ũ_tt = [ũs[ii][:, tt] for ii in 1:num_players]
-            xhₜ₊₁ = propagate_dynamics(dyn, time_range, xs[:, tt], ũ_tt)
-            x̃hₜ₊₁ = homogenize_vector(xhₜ₊₁)
-            future_cost = compute_cost(future_costs[leader_idx][tt+1], time_range, x̃hₜ₊₁, uh_ttp1)
-            new_P1_cost = state_and_controls_cost + future_cost
+    #         u_tt = us[1][:, tt]
+    #         uh_tt = homogenize_ctrls(lin_dyn, [u_tt])
 
-            # The costs from time t+1 of the perturbed and optimal trajectories should also satisfy this condition.
-            @test new_P1_cost ≥ opt_P1_cost
+    #         u_ttp1 = us[1][:, tt+1]
+    #         uh_ttp1 = homogenize_ctrls(lin_dyn, [u_ttp1])
 
-            x̃s = unroll_raw_controls(dyn, times, ũs, x₁)
-            new_stack_costs = [evaluate(c, x̃s, ũs) for c in costs]
-            optimal_stackelberg_costs = [evaluate(c, xs, us) for c in costs]
+    #         # TODO(hamzah) Fix discrepancy in extra cost in quad cost.
 
-            # This test evaluates the cost for the entire perturbed trajectory against the optimal cost.
-            @test new_stack_costs[leader_idx] ≥ optimal_stackelberg_costs[leader_idx]
-        end
-    end
+    #         # Manual cost is formed by the sum of the current state/ctrls costs and the future costs.
+    #         manual_cost = compute_cost(q_cost, time_range, xhs[:, tt], uh_tt)
+    #         println(tt, " - manual 0 -> ", manual_cost)
+    #         manual_cost += compute_cost(future_costs[tt+1], time_range, xhs[:, tt+1], uh_ttp1)
+    #         println("manual -> ", manual_cost)
+    #         computed_cost = compute_cost(future_costs[tt], time_range, xhs[:, tt], uh_tt)
+    #         println(computed_cost)
 
+    #         @test manual_cost ≈ computed_cost
+    #     end
+    # end
 
-    # Ensure that the feedback solution satisfies Stackelberg conditions of optimality
-    # for player 2, holding others' strategies fixed.
-    @testset "CheckFeedbackSatisfiesStackelbergEquilibriumForFollower" begin
-        Ss, Ls = solve_lq_stackelberg_feedback(dyn, costs, horizon, stackelberg_leader_idx)
-        xs, us = unroll_feedback(dyn, times, FeedbackGainControlStrategy(Ss), x₁)
-        optimal_stackelberg_costs = [evaluate(c, xs, us) for c in costs]
+    # # Ensure that the feedback solution satisfies Nash conditions of optimality
+    # # for each player, holding others' strategies fixed.
+    # # Note: This test, as formulated, allows some false positive cases. See Basar and Olsder (Eq. 3.22) for the exact
+    # #       conditions.
+    # @testset "CheckFeedbackSatisfiesNash" begin
+    #     Ps, _ = solve_lq_nash_feedback(dyn, costs, horizon)
+    #     xs, us = unroll_feedback(dyn, times, FeedbackGainControlStrategy(Ps), x₁)
+    #     nash_costs = [evaluate(c, xs, us) for c in costs]
 
-        # Define some useful constants.
-        ϵ = 1e-1
-        leader_idx = stackelberg_leader_idx
-        follower_idx = 3 - stackelberg_leader_idx
-        num_players = follower_idx
-
-        # Perturb each optimized P2 strategy a little bit and confirm that cost only increases for player 2.
-        leader_idx = 1
-        follower_idx = 2
-        for tt in 1:horizon-1
-            P̃s = deepcopy(Ss)
-            P̃s[follower_idx][:, :, tt] += ϵ * randn(udim(dyn, follower_idx), xhdim(dyn))
-
-            x̃s, ũs = unroll_feedback(dyn, times, FeedbackGainControlStrategy(P̃s), x₁)
-            new_stack_costs = [evaluate(c, x̃s, ũs) for c in costs]
-            @test new_stack_costs[follower_idx] ≥ optimal_stackelberg_costs[follower_idx]
-        end
-    end
+    #     # Perturb each strategy a little bit and confirm that cost only
+    #     # increases for that player.
+    #     ϵ = 1e-1
+    #     for ii in 1:2
+    #         for tt in 1:horizon
+    #             P̃s = deepcopy(Ps)
+    #             P̃s[ii][:, :, tt] += ϵ * randn(udim(dyn, ii), xhdim(dyn))
 
+    #             x̃s, ũs = unroll_feedback(dyn, times, FeedbackGainControlStrategy(P̃s), x₁)
+    #             new_nash_costs = [evaluate(c, x̃s, ũs) for c in costs]
+    #             @test new_nash_costs[ii] ≥ nash_costs[ii]
+    #         end
+    #     end
+    # end
 
-    # Ensure that the costs match up at each time step with manually calculate cost matrices.
-    @testset "CheckStackelbergCostsAreConsistentAtEquilibrium" begin
-        Ss, future_costs = solve_lq_stackelberg_feedback(dyn, costs, horizon, stackelberg_leader_idx)
-        xs, us = unroll_feedback(dyn, times, FeedbackGainControlStrategy(Ss), x₁)
 
-        # For each player, compute the costs using the t+1 cost matrix and compare with the cost using the cost matrix
-        # at time t.
+    # # Ensure that the costs match up at each time step with manually calculate cost matrices.
+    # @testset "CheckNashCostsAreConsistentAtEquilibrium" begin
+    #     Ps, future_costs = solve_lq_nash_feedback(dyn, costs, horizon)
+    #     xs, us = unroll_feedback(dyn, times, FeedbackGainControlStrategy(Ps), x₁)
 
-        # Compute the costs using the t+1 cost matrix and compare with the cost using the cost matrix at time t.
-        num_players = num_agents(dyn)
+    #     # Compute the costs using the t+1 cost matrix and compare with the cost using the cost matrix at time t.
+    #     num_players = num_agents(dyn)
 
-        # Homgenize states and controls.
-        xhs = homogenize_vector(xs)
+    #     # Homgenize states and controls.
+    #     xhs = homogenize_vector(xs)
 
-        for ii in 1:2
-            jj = 3 - ii
-            for tt in 1:horizon-1
-                time_range = (tt, tt+1)
+    #     for ii in 1:2
+    #         for tt in 1:horizon-1
+    #             time_range = (tt, tt+1)
 
-                u_tt = [us[ii][:, tt] for ii in 1:num_players]
-                uh_tt = homogenize_ctrls(dyn, u_tt)
+    #             u_tt = [us[ii][:, tt] for ii in 1:num_players]
+    #             uh_tt = homogenize_ctrls(dyn, u_tt)
 
-                u_ttp1 = [us[ii][:, tt+1] for ii in 1:num_players]
-                uh_ttp1 = homogenize_ctrls(dyn, u_ttp1)
+    #             u_ttp1 = [us[ii][:, tt+1] for ii in 1:num_players]
+    #             uh_ttp1 = homogenize_ctrls(dyn, u_ttp1)
 
-                # TODO(hamzah) Fix discrepancy in extra cost in quad cost.
-                state_and_control_costs = compute_cost(costs[ii], time_range, xhs[:, tt], uh_tt)
-                future_cost = compute_cost(future_costs[ii][tt+1], time_range, xhs[:, tt+1], uh_ttp1)
+    #             # TODO(hamzah) Fix discrepancy in extra cost in quad cost.
 
-                manual_cost = state_and_control_costs + future_cost
-                computed_cost = compute_cost(future_costs[ii][tt], time_range, xhs[:, tt], uh_tt)
+    #             # Manual cost is formed by the sum of the current state/ctrls costs and the future costs.
+    #             manual_cost = compute_cost(costs[ii], time_range, xhs[:, tt], uh_tt)
+    #             manual_cost += compute_cost(future_costs[ii][tt+1], time_range, xhs[:, tt+1], uh_ttp1)
+    #             computed_cost = compute_cost(future_costs[ii][tt], time_range, xhs[:, tt], uh_tt)
 
-                # The manually recursion at time t should match the computed L cost at time t.
-                @test manual_cost ≈ computed_cost
-            end
-        end
-    end
+    #             @test manual_cost ≈ computed_cost
+    #         end
+    #     end
+    # end
+
+
+    # Ensure that the feedback solution satisfies Stackelberg conditions of optimality
+    # for player 1, holding others' strategies fixed.
+    # Note: This test, as formulated, allows some false positive cases. See Khan and Fridovich-Keil 2023 for the exact
+    #       conditions.
+    # @testset "CheckFeedbackSatisfiesStackelbergEquilibriumForLeader" begin
+    #     Ss, future_costs = solve_lq_stackelberg_feedback(dyn, costs, horizon, stackelberg_leader_idx)
+    #     xs, us = unroll_feedback(dyn, times, FeedbackGainControlStrategy(Ss), x₁)
+    #     optimal_stackelberg_costs = [evaluate(c, xs, us) for c in costs]
+
+    #     # Define some useful constants.
+    #     ϵ = 1e-1
+    #     leader_idx = stackelberg_leader_idx
+    #     follower_idx = 3 - stackelberg_leader_idx
+    #     num_players = num_agents(dyn)
+
+    #     # Homgenize states and controls.
+    #     xhs = homogenize_vector(xs)
+
+    #     for tt in horizon-1:-1:1
+    #         time_range = (tt, tt+1)
+
+    #         # Copy the things we will alter.
+    #         ũs = deepcopy(us)
+
+    #         # Perturb the leader input u1 at the current time.
+    #         ũs[leader_idx][:, tt] += ϵ * randn(udim(dyn, leader_idx))
+    #         ũhs = homogenize_ctrls(dyn, ũs)
+
+    #         # Re-solve for the optimal follower input given the perturbed leader trajectory.
+    #         A = get_homogenized_state_dynamics_matrix(dyn)
+    #         B₂ = get_homogenized_control_dynamics_matrix(dyn, follower_idx)
+    #         L₂_ttp1 = get_homogenized_state_cost_matrix(future_costs[follower_idx][tt+1])
+    #         G = get_homogenized_control_cost_matrix(costs[follower_idx], follower_idx) + B₂' * L₂_ttp1 * B₂
+
+    #         B₁ = get_homogenized_control_dynamics_matrix(dyn, leader_idx)
+    #         ũh1ₜ = ũhs[leader_idx][:, tt]
+    #         ũh2ₜ = - G \ (B₂' * L₂_ttp1 * (A * xhs[:,tt] + B₁ * ũh1ₜ))
+    #         ũh_tt = [ũh1ₜ, ũh2ₜ]
+
+    #         ũhs[follower_idx][:, tt] = ũh2ₜ
+
+    #         # The cost of the solution trajectory, computed as x_t^T * L^1_tt x_t for at time tt.
+    #         # We test the accuracy of this cost in `CheckStackelbergCostsAreConsistentAtEquilibrium`.
+    #         opt_P1_cost = compute_cost(future_costs[leader_idx][tt], time_range, xhs[:, tt], ũh_tt)
+
+    #         # Compute the homogenized controls for time tt+1.
+    #         uh_ttp1 = [ũhs[ii][:, tt+1] for ii in 1:num_players]
+
+    #         # The cost computed manually for perturbed inputs using
+    #         # x_t^T Q_t x_t^T + ... + <control costs> + ... + x_{t+1}^T * L^1_{t+1} x_{t+1}.
+    #         state_and_controls_cost = compute_cost(costs[leader_idx], time_range, xhs[:, tt], ũh_tt)
+    #         ũ_tt = [ũs[ii][:, tt] for ii in 1:num_players]
+    #         xhₜ₊₁ = propagate_dynamics(dyn, time_range, xs[:, tt], ũ_tt)
+    #         x̃hₜ₊₁ = homogenize_vector(xhₜ₊₁)
+    #         future_cost = compute_cost(future_costs[leader_idx][tt+1], time_range, x̃hₜ₊₁, uh_ttp1)
+    #         new_P1_cost = state_and_controls_cost + future_cost
+
+    #         # The costs from time t+1 of the perturbed and optimal trajectories should also satisfy this condition.
+    #         @test new_P1_cost ≥ opt_P1_cost
+
+    #         x̃s = unroll_raw_controls(dyn, times, ũs, x₁)
+    #         new_stack_costs = [evaluate(c, x̃s, ũs) for c in costs]
+    #         optimal_stackelberg_costs = [evaluate(c, xs, us) for c in costs]
+
+    #         # This test evaluates the cost for the entire perturbed trajectory against the optimal cost.
+    #         @test new_stack_costs[leader_idx] ≥ optimal_stackelberg_costs[leader_idx]
+    #     end
+    # end
+
+
+    # # Ensure that the feedback solution satisfies Stackelberg conditions of optimality
+    # # for player 2, holding others' strategies fixed.
+    # @testset "CheckFeedbackSatisfiesStackelbergEquilibriumForFollower" begin
+    #     Ss, Ls = solve_lq_stackelberg_feedback(dyn, costs, horizon, stackelberg_leader_idx)
+    #     xs, us = unroll_feedback(dyn, times, FeedbackGainControlStrategy(Ss), x₁)
+    #     optimal_stackelberg_costs = [evaluate(c, xs, us) for c in costs]
+
+    #     # Define some useful constants.
+    #     ϵ = 1e-1
+    #     leader_idx = stackelberg_leader_idx
+    #     follower_idx = 3 - stackelberg_leader_idx
+    #     num_players = follower_idx
+
+    #     # Perturb each optimized P2 strategy a little bit and confirm that cost only increases for player 2.
+    #     leader_idx = 1
+    #     follower_idx = 2
+    #     for tt in 1:horizon-1
+    #         P̃s = deepcopy(Ss)
+    #         P̃s[follower_idx][:, :, tt] += ϵ * randn(udim(dyn, follower_idx), xhdim(dyn))
+
+    #         x̃s, ũs = unroll_feedback(dyn, times, FeedbackGainControlStrategy(P̃s), x₁)
+    #         new_stack_costs = [evaluate(c, x̃s, ũs) for c in costs]
+    #         @test new_stack_costs[follower_idx] ≥ optimal_stackelberg_costs[follower_idx]
+    #     end
+    # end
+
+
+    # # Ensure that the costs match up at each time step with manually calculate cost matrices.
+    # @testset "CheckStackelbergCostsAreConsistentAtEquilibrium" begin
+    #     Ss, future_costs = solve_lq_stackelberg_feedback(dyn, costs, horizon, stackelberg_leader_idx)
+    #     xs, us = unroll_feedback(dyn, times, FeedbackGainControlStrategy(Ss), x₁)
+
+    #     # For each player, compute the costs using the t+1 cost matrix and compare with the cost using the cost matrix
+    #     # at time t.
+
+    #     # Compute the costs using the t+1 cost matrix and compare with the cost using the cost matrix at time t.
+    #     num_players = num_agents(dyn)
+
+    #     # Homgenize states and controls.
+    #     xhs = homogenize_vector(xs)
+
+    #     for ii in 1:2
+    #         jj = 3 - ii
+    #         for tt in 1:horizon-1
+    #             time_range = (tt, tt+1)
+
+    #             u_tt = [us[ii][:, tt] for ii in 1:num_players]
+    #             uh_tt = homogenize_ctrls(dyn, u_tt)
+
+    #             u_ttp1 = [us[ii][:, tt+1] for ii in 1:num_players]
+    #             uh_ttp1 = homogenize_ctrls(dyn, u_ttp1)
+
+    #             # TODO(hamzah) Fix discrepancy in extra cost in quad cost.
+    #             state_and_control_costs = compute_cost(costs[ii], time_range, xhs[:, tt], uh_tt)
+    #             future_cost = compute_cost(future_costs[ii][tt+1], time_range, xhs[:, tt+1], uh_ttp1)
+
+    #             manual_cost = state_and_control_costs + future_cost
+    #             computed_cost = compute_cost(future_costs[ii][tt], time_range, xhs[:, tt], uh_tt)
+
+    #             # The manually recursion at time t should match the computed L cost at time t.
+    #             @test manual_cost ≈ computed_cost
+    #         end
+    #     end
+    # end
 end
diff --git a/test/runtests.jl b/test/runtests.jl
index 15b6a5ae..0370471f 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -1,4 +1,4 @@
 
-include("TestApproximatedLQSolvers.jl")
+# include("TestApproximatedLQSolvers.jl")
 include("TestLQSolvers.jl")
-include("TestLQStackelbergDerivation.jl")
+# include("TestLQStackelbergDerivation.jl")

From 6b112c915959679cd76c83a491124471582d059d Mon Sep 17 00:00:00 2001
From: Hamzah Khan <2308491+khanh111@users.noreply.github.com>
Date: Fri, 3 Mar 2023 10:35:27 -0600
Subject: [PATCH 2/4] More changes

---
 src/solvers/LQRFeedbackSolver.jl    |   9 +-
 src/solvers/NewLQRFeedbackSolver.jl |  12 +-
 test/TestLQSolvers.jl               | 256 ++++++++++++++--------------
 3 files changed, 144 insertions(+), 133 deletions(-)

diff --git a/src/solvers/LQRFeedbackSolver.jl b/src/solvers/LQRFeedbackSolver.jl
index 731e21b0..30777982 100644
--- a/src/solvers/LQRFeedbackSolver.jl
+++ b/src/solvers/LQRFeedbackSolver.jl
@@ -36,9 +36,9 @@ function solve_lqr_feedback(dyns::AbstractVector{LinearDynamics}, costs::Abstrac
     Zs[:, :, horizon] = Zₜ₊₁
 
     # base case
-    if horizon == 1
-        return Ps
-    end
+    # if horizon == 1
+    #     return Ps
+    # end
 
     # At each horizon running backwards, solve the LQR problem inductively.
     for tt in horizon:-1:1
@@ -60,6 +60,9 @@ function solve_lqr_feedback(dyns::AbstractVector{LinearDynamics}, costs::Abstrac
         
         # Update Zₜ₊₁ at t+1 to be the one at t as we go to t-1.
         Zₜ₊₁ = Q +  A' * Zₜ₊₁ * (A - B * Ps[:, :, tt])
+        # println(tt, " - old Zₜ: ", Zₜ₊₁,  Zₜ₊₁')
+        # Zₜ₊₁ = (Zₜ₊₁ + Zₜ₊₁')/2.
+        # @assert Zₜ₊₁ == Zₜ₊₁'
         println(tt, " - old Z: ", Zₜ₊₁[1:2, 1:2])
         println(tt, " - old z: ", Zₜ₊₁[1:2, 3], " ", Zₜ₊₁[3, 1:2]', " ", Zₜ₊₁[1:2, 3] == Zₜ₊₁[3, 1:2]')
         println(tt, " - old cz: ", Zₜ₊₁[3, 3])
diff --git a/src/solvers/NewLQRFeedbackSolver.jl b/src/solvers/NewLQRFeedbackSolver.jl
index 003fa4d1..0152b014 100644
--- a/src/solvers/NewLQRFeedbackSolver.jl
+++ b/src/solvers/NewLQRFeedbackSolver.jl
@@ -42,9 +42,9 @@ function solve_lqr_feedback(dyns::AbstractVector{LinearDynamics}, costs::Abstrac
     Zs[:, :, horizon] = Zₜ₊₁
 
     # base case
-    if horizon == 1
-        return Ps, costs[horizon]
-    end
+    # if horizon == 1
+    #     return Ps, costs[horizon]
+    # end
 
     # At each horizon running backwards, solve the LQR problem inductively.
     for tt in horizon:-1:1
@@ -85,13 +85,19 @@ function solve_lqr_feedback(dyns::AbstractVector{LinearDynamics}, costs::Abstrac
         println(tt, " - new P: ", Pₜ)
         println(tt, " - new p: ", pₜ)
         Zₜ = Q + Pₜ' * R * Pₜ + (A - B * Pₜ)' * Zₜ₊₁ * (A - B * Pₜ)
+        # println(tt, " - new Zₜ: ", Zₜ, Zₜ')
+        # Zₜ = (Zₜ + Zₜ')/2.
+        # @assert Zₜ == Zₜ'
         zₜ = q 
         zₜ += Pₜ' * (R * pₜ - r)
         zₜ += (A - B * Pₜ)' * Zₜ₊₁ * (a - B * pₜ)
         zₜ += (A - B * Pₜ)' * zₜ₊₁
         czₜ = czₜ₊₁ + cq
+        println(tt, " - new cz 1: ", czₜ₊₁ + cq)
         czₜ += pₜ' * (R * pₜ - 2 * r)
+        println(tt, " - new cz 2: ", pₜ' * (R * pₜ - 2 * r))
         czₜ += ((a - B * pₜ)' * Zₜ₊₁ + 2 * zₜ₊₁') * (a - B * pₜ)
+        println(tt, " - new cz 3: ", ((a - B * pₜ)' * Zₜ₊₁ + 2 * zₜ₊₁') * (a - B * pₜ))
         println(tt, " - new Z: ", Zₜ)
         println(tt, " - new z: ", zₜ)
         println(tt, " - new cz: ", czₜ)
diff --git a/test/TestLQSolvers.jl b/test/TestLQSolvers.jl
index 2b997a92..30987cb9 100644
--- a/test/TestLQSolvers.jl
+++ b/test/TestLQSolvers.jl
@@ -48,103 +48,105 @@ seed!(0)
     costs = [quadraticize_costs(c₁, dummy_time_range, dummy_x, dummy_us),
              quadraticize_costs(c₂, dummy_time_range, dummy_x, dummy_us)]
 
-    # @testset "CheckPureNonpureLQRMatch" begin
-    #     num_loops = 1
-    #     horizon_lqr = 3
-    #     dt = 0.02
-    #     times_lqr = dt * (cumsum(ones(horizon_lqr)) .- 1.)
-
-    #     for _ in 1:num_loops
-    #         # # This is the same LQR problem that MATLAB has as an example - dt = 0.02s and it's a double integrator with
-    #         # # state [x ẋ] and input control α acceleration. We've added a constrant drift of +1 m and +1 m/s in dynamics.
-    #         # dt = 0.01
-    #         # num_states = 4
-    #         # num_ctrls = 2
-    #         # A = rand(num_states, num_states) 
-    #         # B = rand(num_states, num_ctrls)
-    #         # a = rand(num_states)
-    #         # lin_dyn = LinearDynamics(A, [B]; a=a)
-
-    #         # Q = make_symmetric_pos_def_matrix(num_states)
-    #         # q = zeros(num_states)
-    #         # cq = 0. #rand()
-    #         # R = make_symmetric_pos_def_matrix(num_ctrls)
-    #         # r = zeros(num_ctrls)
-    #         # cr = 0. #rand()
-
-    #         # This is the same LQR problem that MATLAB has as an example - dt = 0.02s and it's a double integrator with
-    #         # state [x ẋ] and input control α acceleration. We've added a constrant drift of +1 m and +1 m/s in dynamics.
-    #         num_states = 2
-    #         lin_dyn = LinearDynamics([1. dt; 0 1.], [[0.; dt][:,:]]; a=ones(2))
-    #         q_cost = QuadraticCost([1. 0.; 0. 0.], ones(2), 12.)
-    #         add_control_cost!(q_cost, 1, ones(1, 1); r=2*ones(1), cr=2.)
-
-    #         # lin_dyn = LinearDynamics([1. dt; 0 1.], [[0.; dt][:,:]])
-    #         # q_cost = QuadraticCost([1. 0.; 0. 0.])
-    #         # add_control_cost!(q_cost, 1, ones(1, 1))
-
-    #         # q_cost = QuadraticCost(Q, q, cq)
-    #         # add_control_cost!(q_cost, 1, R; r=r, cr=cr)
-
-    #         new_x₁ = ones(num_states)
-
-    #         # New LQR
-    #         strategy, _ = solve_lqr_feedback(lin_dyn, q_cost, horizon_lqr)
-    #         xs, us = unroll_feedback(lin_dyn, times_lqr, strategy, new_x₁)
-    #         eval_cost = evaluate(q_cost, xs, us)
-
-    #         # OG homogenized LQR
-    #         p_cost = quadraticize_costs(q_cost, dummy_time_range, dummy_x, dummy_us)
-
-    #         # Ensure no dumb mistakes with cost input type.
-    #         @test evaluate(q_cost, ones(2, 1), [ones(1,1)]) ≈ evaluate(p_cost, ones(2, 1), [ones(1,1)])
-
-    #         strategy_p, _ = solve_lqr_feedback(lin_dyn, p_cost, horizon_lqr)
-    #         strategy_p = FeedbackGainControlStrategy([strategy_p])
-    #         xs_p, us_p = unroll_feedback(lin_dyn, times_lqr, strategy_p, new_x₁)
-    #         eval_cost_p = evaluate(p_cost, xs_p, us_p)
-
-    #         @test eval_cost ≈ eval_cost_p
-    #         println(xs - xs_p)
-    #         @test all(xs .≈ xs_p)
-    #         @test all(us .≈ us_p)
-    #     end
-    # end
-
-    @testset "CheckFeedbackSatisfiesLQROptimum" begin
-        # This is similar to the LQR problem that MATLAB has as an example - dt = 0.02s and it's a double integrator
-        # with state [x ẋ] and input control α acceleration. We've added a constrant drift of +1 m and +1 m/s in
-        # dynamics.
+    @testset "CheckPureNonpureLQRMatch" begin
+        num_loops = 1
+        horizon_lqr = 3
         dt = 0.02
-        lin_dyn = LinearDynamics([1. dt; 0 1.], [[0.; dt][:,:]]; a=ones(2))
-        q_cost = QuadraticCost([1. 0.; 0. 0.], zeros(2), 1.)
-        add_control_cost!(q_cost, 1, ones(1, 1); r=zeros(1), cr=2.)
-
-
-        # lin_dyn = LinearDynamics([1. dt; 0 1.], [[0.; dt][:,:]])
-        # q_cost = QuadraticCost([1. 0.; 0. 0.])
-        # add_control_cost!(q_cost, 1, ones(1, 1))
-        new_x₁ = ones(2)
-
-        strategy, _ = solve_lqr_feedback(lin_dyn, q_cost, horizon)
-        xs, us = unroll_feedback(lin_dyn, times, strategy, new_x₁)
-        eval_cost = evaluate(q_cost, xs, us)
-
-        # Perturb each strategy a little bit and confirm that cost only
-        # increases for that player.
-        ϵ = 1e-1
-        for tt in 1:horizon
-            P̃s = deepcopy(get_linear_feedback_gains(strategy, 1))
-            p̃s = deepcopy(get_constant_feedback_gains(strategy, 1))
-            P̃s[:, :, tt] += ϵ * randn(udim(lin_dyn, 1), xdim(lin_dyn))
-            # p̃s[:, tt] += ϵ * randn(udim(lin_dyn, 1))
-
-            x̃s, ũs = unroll_feedback(lin_dyn, times, FeedbackGainControlStrategy([P̃s], [p̃s]), new_x₁)
-            new_eval_cost = evaluate(q_cost, x̃s, ũs)
-            @test new_eval_cost ≥ eval_cost
+        times_lqr = dt * (cumsum(ones(horizon_lqr)) .- 1.)
+
+        for _ in 1:num_loops
+            # # This is the same LQR problem that MATLAB has as an example - dt = 0.02s and it's a double integrator with
+            # # state [x ẋ] and input control α acceleration. We've added a constrant drift of +1 m and +1 m/s in dynamics.
+            # dt = 0.01
+            # num_states = 4
+            # num_ctrls = 2
+            # A = rand(num_states, num_states) 
+            # B = rand(num_states, num_ctrls)
+            # a = rand(num_states)
+            # lin_dyn = LinearDynamics(A, [B]; a=a)
+
+            # Q = make_symmetric_pos_def_matrix(num_states)
+            # q = zeros(num_states)
+            # cq = 0. #rand()
+            # R = make_symmetric_pos_def_matrix(num_ctrls)
+            # r = zeros(num_ctrls)
+            # cr = 0. #rand()
+
+            # This is the same LQR problem that MATLAB has as an example - dt = 0.02s and it's a double integrator with
+            # state [x ẋ] and input control α acceleration. We've added a constrant drift of +1 m and +1 m/s in dynamics.
+            num_states = 2
+            lin_dyn = LinearDynamics([1. dt; 0 1.], [[0.; dt][:,:]]; a=ones(2))
+            q_cost = QuadraticCost([1. 0.; 0. 0.], ones(2), 12.)
+            add_control_cost!(q_cost, 1, ones(1, 1); r=2*ones(1), cr=2.)
+
+            # lin_dyn = LinearDynamics([1. dt; 0 1.], [[0.; dt][:,:]])
+            # q_cost = QuadraticCost([1. 0.; 0. 0.])
+            # add_control_cost!(q_cost, 1, ones(1, 1))
+
+            # q_cost = QuadraticCost(Q, q, cq)
+            # add_control_cost!(q_cost, 1, R; r=r, cr=cr)
+
+            new_x₁ = ones(num_states)
+
+            # New LQR
+            strategy, _ = solve_lqr_feedback(lin_dyn, q_cost, horizon_lqr)
+            xs, us = unroll_feedback(lin_dyn, times_lqr, strategy, new_x₁)
+            eval_cost = evaluate(q_cost, xs, us)
+
+            # OG homogenized LQR
+            p_cost = quadraticize_costs(q_cost, dummy_time_range, dummy_x, dummy_us)
+
+            # Ensure no dumb mistakes with cost input type.
+            @test evaluate(q_cost, ones(2, 1), [ones(1,1)]) ≈ evaluate(p_cost, ones(2, 1), [ones(1,1)])
+
+            strategy_p, _ = solve_lqr_feedback(lin_dyn, p_cost, horizon_lqr)
+            strategy_p = FeedbackGainControlStrategy([strategy_p])
+            xs_p, us_p = unroll_feedback(lin_dyn, times_lqr, strategy_p, new_x₁)
+            eval_cost_p = evaluate(p_cost, xs_p, us_p)
+
+            println("costs (old, new): ", eval_cost, " ", eval_cost_p)
+            @test eval_cost ≈ eval_cost_p
+            println("dx: ", xs - xs_p)
+            println("du: ", us - us_p)
+            @test all(xs .≈ xs_p)
+            @test all(us .≈ us_p)
         end
     end
 
+    # @testset "CheckFeedbackSatisfiesLQROptimum" begin
+    #     # This is similar to the LQR problem that MATLAB has as an example - dt = 0.02s and it's a double integrator
+    #     # with state [x ẋ] and input control α acceleration. We've added a constrant drift of +1 m and +1 m/s in
+    #     # dynamics.
+    #     dt = 0.02
+    #     lin_dyn = LinearDynamics([1. dt; 0 1.], [[0.; dt][:,:]]; a=ones(2))
+    #     q_cost = QuadraticCost([1. 0.; 0. 0.], zeros(2), 1.)
+    #     add_control_cost!(q_cost, 1, ones(1, 1); r=zeros(1), cr=2.)
+
+
+    #     # lin_dyn = LinearDynamics([1. dt; 0 1.], [[0.; dt][:,:]])
+    #     # q_cost = QuadraticCost([1. 0.; 0. 0.])
+    #     # add_control_cost!(q_cost, 1, ones(1, 1))
+    #     new_x₁ = ones(2)
+
+    #     strategy, _ = solve_lqr_feedback(lin_dyn, q_cost, horizon)
+    #     xs, us = unroll_feedback(lin_dyn, times, strategy, new_x₁)
+    #     eval_cost = evaluate(q_cost, xs, us)
+
+    #     # Perturb each strategy a little bit and confirm that cost only
+    #     # increases for that player.
+    #     ϵ = 1e-1
+    #     for tt in 1:horizon
+    #         P̃s = deepcopy(get_linear_feedback_gains(strategy, 1))
+    #         p̃s = deepcopy(get_constant_feedback_gains(strategy, 1))
+    #         P̃s[:, :, tt] += ϵ * randn(udim(lin_dyn, 1), xdim(lin_dyn))
+    #         # p̃s[:, tt] += ϵ * randn(udim(lin_dyn, 1))
+
+    #         x̃s, ũs = unroll_feedback(lin_dyn, times, FeedbackGainControlStrategy([P̃s], [p̃s]), new_x₁)
+    #         new_eval_cost = evaluate(q_cost, x̃s, ũs)
+    #         @test new_eval_cost ≥ eval_cost
+    #     end
+    # end
+
     # # Ensure that the costs match up at each time step with manually calculate cost matrices.
     # @testset "CheckLQRCostsAreConsistentAtEquilibrium" begin
     #     # This is the same LQR problem that MATLAB has as an example - dt = 0.02s and it's a double integrator with
@@ -186,40 +188,40 @@ seed!(0)
     # end
 
 
-    @testset "CheckPureFeedbackSatisfiesLQROptimum" begin
-        # This is the same LQR problem that MATLAB has as an example - dt = 0.02s and it's a double integrator with
-        # state [x ẋ] and input control α acceleration. We've added a constrant drift of +1 m and +1 m/s in dynamics.
-        dt = 0.02
-        lin_dyn = LinearDynamics([1. dt; 0 1.], [[0.; dt][:,:]]; a=ones(2))
-        q_cost = QuadraticCost([1. 0.; 0. 0.], zeros(2), 1.)
-        add_control_cost!(q_cost, 1, ones(1, 1); r=zeros(1), cr=2.)
-
-        # lin_dyn = LinearDynamics([1. dt; 0 1.], [[0.; dt][:,:]])
-        # q_cost = QuadraticCost([1. 0.; 0. 0.])
-        # add_control_cost!(q_cost, 1, ones(1, 1))
-        new_x₁ = ones(2)
-
-        p_cost = quadraticize_costs(q_cost, dummy_time_range, dummy_x, dummy_us)
-
-        strategy, _ = solve_lqr_feedback(lin_dyn, p_cost, horizon)
-        strategy = FeedbackGainControlStrategy([strategy])
-        xs, us = unroll_feedback(lin_dyn, times, strategy, new_x₁)
-        eval_cost = evaluate(p_cost, xs, us)
-
-        # Perturb each strategy a little bit and confirm that cost only
-        # increases for that player.
-        ϵ = 1e-1
-        for tt in 1:horizon
-            P̃s = deepcopy(get_linear_feedback_gains(strategy, 1))
-            p̃s = deepcopy(get_constant_feedback_gains(strategy, 1))
-            P̃s[:, :, tt] += ϵ * randn(udim(lin_dyn, 1), xhdim(lin_dyn))
-            # p̃s[:, tt] += ϵ * randn(udim(lin_dyn, 1))
-
-            x̃s, ũs = unroll_feedback(lin_dyn, times, FeedbackGainControlStrategy([P̃s], [p̃s]), new_x₁)
-            new_eval_cost = evaluate(p_cost, x̃s, ũs)
-            @test new_eval_cost ≥ eval_cost
-        end
-    end
+    # @testset "CheckPureFeedbackSatisfiesLQROptimum" begin
+    #     # This is the same LQR problem that MATLAB has as an example - dt = 0.02s and it's a double integrator with
+    #     # state [x ẋ] and input control α acceleration. We've added a constrant drift of +1 m and +1 m/s in dynamics.
+    #     dt = 0.02
+    #     lin_dyn = LinearDynamics([1. dt; 0 1.], [[0.; dt][:,:]]; a=ones(2))
+    #     q_cost = QuadraticCost([1. 0.; 0. 0.], zeros(2), 1.)
+    #     add_control_cost!(q_cost, 1, ones(1, 1); r=zeros(1), cr=2.)
+
+    #     # lin_dyn = LinearDynamics([1. dt; 0 1.], [[0.; dt][:,:]])
+    #     # q_cost = QuadraticCost([1. 0.; 0. 0.])
+    #     # add_control_cost!(q_cost, 1, ones(1, 1))
+    #     new_x₁ = ones(2)
+
+    #     p_cost = quadraticize_costs(q_cost, dummy_time_range, dummy_x, dummy_us)
+
+    #     strategy, _ = solve_lqr_feedback(lin_dyn, p_cost, horizon)
+    #     strategy = FeedbackGainControlStrategy([strategy])
+    #     xs, us = unroll_feedback(lin_dyn, times, strategy, new_x₁)
+    #     eval_cost = evaluate(p_cost, xs, us)
+
+    #     # Perturb each strategy a little bit and confirm that cost only
+    #     # increases for that player.
+    #     ϵ = 1e-1
+    #     for tt in 1:horizon
+    #         P̃s = deepcopy(get_linear_feedback_gains(strategy, 1))
+    #         p̃s = deepcopy(get_constant_feedback_gains(strategy, 1))
+    #         P̃s[:, :, tt] += ϵ * randn(udim(lin_dyn, 1), xhdim(lin_dyn))
+    #         # p̃s[:, tt] += ϵ * randn(udim(lin_dyn, 1))
+
+    #         x̃s, ũs = unroll_feedback(lin_dyn, times, FeedbackGainControlStrategy([P̃s], [p̃s]), new_x₁)
+    #         new_eval_cost = evaluate(p_cost, x̃s, ũs)
+    #         @test new_eval_cost ≥ eval_cost
+    #     end
+    # end
 
     # Ensure that the costs match up at each time step with manually calculate cost matrices.
     # @testset "CheckPureLQRCostsAreConsistentAtEquilibrium" begin

From 621f3fb319db43eff9599c7f905d73a767befafd Mon Sep 17 00:00:00 2001
From: Hamzah Khan <2308491+khanh111@users.noreply.github.com>
Date: Mon, 6 Mar 2023 10:07:05 -0600
Subject: [PATCH 3/4] rename fn

---
 src/solvers/NewLQRFeedbackSolver.jl | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/src/solvers/NewLQRFeedbackSolver.jl b/src/solvers/NewLQRFeedbackSolver.jl
index 0152b014..c5a9d362 100644
--- a/src/solvers/NewLQRFeedbackSolver.jl
+++ b/src/solvers/NewLQRFeedbackSolver.jl
@@ -4,23 +4,23 @@ using LinearAlgebra
 # Returns feedback matrices P[:, :, time].
 
 # Shorthand function for LQ time-invariant dynamics and costs.
-function solve_lqr_feedback(dyn::LinearDynamics, cost::QuadraticCost, horizon::Int)
+function solve_new_lqr_feedback(dyn::LinearDynamics, cost::QuadraticCost, horizon::Int)
     dyns = [dyn for _ in 1:horizon]
     costs = [cost for _ in 1:horizon]
-    return solve_lqr_feedback(dyns, costs, horizon)
+    return solve_new_lqr_feedback(dyns, costs, horizon)
 end
 
-function solve_lqr_feedback(dyn::LinearDynamics, costs::AbstractVector{QuadraticCost}, horizon::Int)
+function solve_new_lqr_feedback(dyn::LinearDynamics, costs::AbstractVector{QuadraticCost}, horizon::Int)
     dyns = [dyn for _ in 1:horizon]
-    return solve_lqr_feedback(dyns, costs, horizon)
+    return solve_new_lqr_feedback(dyns, costs, horizon)
 end
 
-function solve_lqr_feedback(dyns::AbstractVector{LinearDynamics}, cost::QuadraticCost, horizon::Int)
+function solve_new_lqr_feedback(dyns::AbstractVector{LinearDynamics}, cost::QuadraticCost, horizon::Int)
     costs = [cost for _ in 1:horizon]
-    return solve_lqr_feedback(dyns, costs, horizon)
+    return solve_new_lqr_feedback(dyns, costs, horizon)
 end
 
-function solve_lqr_feedback(dyns::AbstractVector{LinearDynamics}, costs::AbstractVector{QuadraticCost}, horizon::Int)
+function solve_new_lqr_feedback(dyns::AbstractVector{LinearDynamics}, costs::AbstractVector{QuadraticCost}, horizon::Int)
 
     # Ensure the number of dynamics and costs are the same as the horizon.
     @assert !isempty(dyns) && size(dyns, 1) == horizon
@@ -124,4 +124,4 @@ function solve_lqr_feedback(dyns::AbstractVector{LinearDynamics}, costs::Abstrac
     return Ps_feedback_strategies, Zs_future_costs
 end
 
-export solve_lqr_feedback
+export solve_new_lqr_feedback

From 7c21d92672ed1638065723c93f7a3511cf6562b5 Mon Sep 17 00:00:00 2001
From: Hamzah Khan <2308491+khanh111@users.noreply.github.com>
Date: Wed, 15 Mar 2023 11:11:23 -0700
Subject: [PATCH 4/4] changes

---
 .../FeedbackGainControlStrategy.jl            |   2 +-
 src/dynamics/LinearDynamics.jl                |   2 +-
 test/TestLQSolvers.jl                         | 349 +++++-------------
 3 files changed, 103 insertions(+), 250 deletions(-)

diff --git a/src/control_strategies/FeedbackGainControlStrategy.jl b/src/control_strategies/FeedbackGainControlStrategy.jl
index bce442b6..9a46c69f 100644
--- a/src/control_strategies/FeedbackGainControlStrategy.jl
+++ b/src/control_strategies/FeedbackGainControlStrategy.jl
@@ -31,7 +31,7 @@ function get_constant_feedback_gains(strategy::FeedbackGainControlStrategy)
     return strategy.ps
 end
 
-function get_constant_feedback_gains(strategy::FeedbackGainControlStrategy, player_idx::Int)
+function get_constant_feedback_gain(strategy::FeedbackGainControlStrategy, player_idx::Int)
     return strategy.ps[player_idx]
 end
 
diff --git a/src/dynamics/LinearDynamics.jl b/src/dynamics/LinearDynamics.jl
index 7b09682a..fdd1c773 100644
--- a/src/dynamics/LinearDynamics.jl
+++ b/src/dynamics/LinearDynamics.jl
@@ -74,7 +74,7 @@ function propagate_dynamics(dyn::LinearDynamics,
 
     # Incorporate the dynamics based on the state and the controls.
     dt = time_range[2] - time_range[1]
-    x_next = dyn.A * x + dyn.a * dt + sum(dyn.Bs[ii] * us[ii] for ii in 1:N)
+    x_next = dyn.A * x + dyn.a + sum(dyn.Bs[ii] * us[ii] for ii in 1:N)
 
     if dyn.D != nothing && v != nothing
         x_next += dyn.D * v
diff --git a/test/TestLQSolvers.jl b/test/TestLQSolvers.jl
index 30c11dcd..5176e8cf 100644
--- a/test/TestLQSolvers.jl
+++ b/test/TestLQSolvers.jl
@@ -5,8 +5,6 @@ using LinearAlgebra
 using Random: seed!
 using Test: @test, @testset
 
-include("TestUtils.jl")
-
 seed!(0)
 
 @testset "TestLQSolvers" begin
@@ -38,230 +36,87 @@ seed!(0)
     add_control_cost!(c₂, 1, zeros(1, 1))
 
     x₁ = [1., 0, 1, 0]
-    horizon = 10
+    horizon = 3
     times = cumsum(ones(horizon)) .- 1.
 
     costs = [c₁, c₂]
 
-    @testset "CheckPureNonpureLQRMatch" begin
-        num_loops = 1
-        horizon_lqr = 3
+     @testset "CheckFeedbackSatisfiesLQROptimum" begin
+        # This is similar to the LQR problem that MATLAB has as an example - dt = 0.02s and it's a double integrator
+        # with state [x ẋ] and input control α acceleration. We've added a constrant drift of +1 m and +1 m/s in
+        # dynamics.
         dt = 0.02
-        times_lqr = dt * (cumsum(ones(horizon_lqr)) .- 1.)
-
-        for _ in 1:num_loops
-            # # This is the same LQR problem that MATLAB has as an example - dt = 0.02s and it's a double integrator with
-            # # state [x ẋ] and input control α acceleration. We've added a constrant drift of +1 m and +1 m/s in dynamics.
-            # dt = 0.01
-            # num_states = 4
-            # num_ctrls = 2
-            # A = rand(num_states, num_states) 
-            # B = rand(num_states, num_ctrls)
-            # a = rand(num_states)
-            # lin_dyn = LinearDynamics(A, [B]; a=a)
-
-            # Q = make_symmetric_pos_def_matrix(num_states)
-            # q = zeros(num_states)
-            # cq = 0. #rand()
-            # R = make_symmetric_pos_def_matrix(num_ctrls)
-            # r = zeros(num_ctrls)
-            # cr = 0. #rand()
-
-            # This is the same LQR problem that MATLAB has as an example - dt = 0.02s and it's a double integrator with
-            # state [x ẋ] and input control α acceleration. We've added a constrant drift of +1 m and +1 m/s in dynamics.
-            num_states = 2
-            lin_dyn = LinearDynamics([1. dt; 0 1.], [[0.; dt][:,:]]; a=ones(2))
-            q_cost = QuadraticCost([1. 0.; 0. 0.], ones(2), 12.)
-            add_control_cost!(q_cost, 1, ones(1, 1); r=2*ones(1), cr=2.)
-
-            # lin_dyn = LinearDynamics([1. dt; 0 1.], [[0.; dt][:,:]])
-            # q_cost = QuadraticCost([1. 0.; 0. 0.])
-            # add_control_cost!(q_cost, 1, ones(1, 1))
-
-            new_x₁ = ones(num_states)
-
-            # New LQR
-            strategy, _ = solve_new_lqr_feedback(lin_dyn, q_cost, horizon_lqr)
-            xs, us = unroll_feedback(lin_dyn, times_lqr, strategy, new_x₁)
-            eval_cost = evaluate(q_cost, xs, us)
-
-            # Ensure no dumb mistakes with cost input type.
-            @test evaluate(q_cost, ones(2, 1), [ones(1,1)]) ≈ evaluate(q_cost, ones(2, 1), [ones(1,1)])
-
-            strategy_p, _ = solve_lqr_feedback(lin_dyn, q_cost, horizon_lqr)
-            xs_p, us_p = unroll_feedback(lin_dyn, times_lqr, strategy_p, new_x₁)
-            eval_cost_p = evaluate(q_cost, xs_p, us_p)
-
-            println("costs (old, new): ", eval_cost, " ", eval_cost_p)
-            @test eval_cost ≈ eval_cost_p
-            println("dx: ", xs - xs_p)
-            println("du: ", us - us_p)
-            @test all(xs .≈ xs_p)
-            @test all(us .≈ us_p)
+        times_lqr = dt * (cumsum(ones(horizon)) .- 1.)
+        lin_dyn = LinearDynamics([1. dt; 0 1.], [[0.; dt][:,:]]; a=dt*ones(2))
+        q_cost = QuadraticCost([1. 0.; 0. 0.], ones(2), 1.)
+        add_control_cost!(q_cost, 1, ones(1, 1); r=zeros(1), cr=2.)
+        new_x₁ = ones(2)
+
+        strategy, _ = solve_lqr_feedback(lin_dyn, q_cost, horizon)
+        xs, us = unroll_feedback(lin_dyn, times_lqr, strategy, new_x₁)
+        eval_cost = evaluate(q_cost, xs, us)
+
+        # Perturb each strategy a little bit and confirm that cost only
+        # increases for that player.
+        ϵ = 1e-1
+        for tt in 1:horizon
+            P̃s = deepcopy(get_linear_feedback_gain(strategy, 1))
+            p̃s = deepcopy(get_constant_feedback_gain(strategy, 1))
+            P̃s[:, :, tt] += ϵ * randn(udim(lin_dyn, 1), xdim(lin_dyn))
+            p̃s[:, tt] += ϵ * randn(udim(lin_dyn, 1))
+
+            x̃s, ũs = unroll_feedback(lin_dyn, times, FeedbackGainControlStrategy([P̃s], [p̃s]), new_x₁)
+            new_eval_cost = evaluate(q_cost, x̃s, ũs)
+            println(new_eval_cost, " ≥ ", eval_cost)
+            @test new_eval_cost ≥ eval_cost
         end
     end
 
-    # @testset "CheckFeedbackSatisfiesLQROptimum" begin
-    #     # This is similar to the LQR problem that MATLAB has as an example - dt = 0.02s and it's a double integrator
-    #     # with state [x ẋ] and input control α acceleration. We've added a constrant drift of +1 m and +1 m/s in
-    #     # dynamics.
-    #     dt = 0.02
-    #     lin_dyn = LinearDynamics([1. dt; 0 1.], [[0.; dt][:,:]]; a=ones(2))
-    #     q_cost = QuadraticCost([1. 0.; 0. 0.], zeros(2), 1.)
-    #     add_control_cost!(q_cost, 1, ones(1, 1); r=zeros(1), cr=2.)
-
-
-    #     # lin_dyn = LinearDynamics([1. dt; 0 1.], [[0.; dt][:,:]])
-    #     # q_cost = QuadraticCost([1. 0.; 0. 0.])
-    #     # add_control_cost!(q_cost, 1, ones(1, 1))
-    #     new_x₁ = ones(2)
-
-    #     strategy, _ = solve_new_lqr_feedback(lin_dyn, q_cost, horizon)
-    #     xs, us = unroll_feedback(lin_dyn, times, strategy, new_x₁)
-    #     eval_cost = evaluate(q_cost, xs, us)
-
-    #     # Perturb each strategy a little bit and confirm that cost only
-    #     # increases for that player.
-    #     ϵ = 1e-1
-    #     for tt in 1:horizon
-    #         P̃s = deepcopy(get_linear_feedback_gains(strategy, 1))
-    #         p̃s = deepcopy(get_constant_feedback_gains(strategy, 1))
-    #         P̃s[:, :, tt] += ϵ * randn(udim(lin_dyn, 1), xdim(lin_dyn))
-    #         # p̃s[:, tt] += ϵ * randn(udim(lin_dyn, 1))
-
-    #         x̃s, ũs = unroll_feedback(lin_dyn, times, FeedbackGainControlStrategy([P̃s], [p̃s]), new_x₁)
-    #         new_eval_cost = evaluate(q_cost, x̃s, ũs)
-    #         @test new_eval_cost ≥ eval_cost
-    #     end
-    # end
-
-    # # Ensure that the costs match up at each time step with manually calculate cost matrices.
-    # @testset "CheckLQRCostsAreConsistentAtEquilibrium" begin
-    #     # This is the same LQR problem that MATLAB has as an example - dt = 0.02s and it's a double integrator with
-    #     # state [x ẋ] and input control α acceleration.
-    #     dt = 0.01
-    #     lin_dyn = LinearDynamics([1. dt; 0 1.], [[0.; dt][:,:]]; a=ones(2))
-    #     q_cost = QuadraticCost([1. 0.; 0. 0.])
-    #     add_control_cost!(q_cost, 1, ones(1, 1))
-    #     new_x₁ = ones(2)
-
-    #     strategy, future_costs = solve_new_lqr_feedback(lin_dyn, q_cost, horizon)
-    #     xs, us = unroll_feedback(lin_dyn, times, strategy, new_x₁)
-    #     eval_cost = evaluate(q_cost, xs, us)
-
-    #     # Compute the costs using the t+1 cost matrix and compare with the cost using the cost matrix at time t.
-    #     num_players = num_agents(lin_dyn)
-
-    #     # Homgenize states and controls.
-    #     xhs = homogenize_vector(xs)
-
-    #     for tt in 1:horizon-1
-    #         time_range = (tt, tt+1)
-
-    #         u_tt = us[1][:, tt]
-    #         uh_tt = homogenize_ctrls(lin_dyn, [u_tt])
-
-    #         u_ttp1 = us[1][:, tt+1]
-    #         uh_ttp1 = homogenize_ctrls(lin_dyn, [u_ttp1])
-
-    #         # TODO(hamzah) Fix discrepancy in extra cost in quad cost.
-
-    #         # Manual cost is formed by the sum of the current state/ctrls costs and the future costs.
-    #         manual_cost = compute_cost(q_cost, time_range, xhs[:, tt], uh_tt)
-    #         manual_cost += compute_cost(future_costs[tt+1], time_range, xhs[:, tt+1], uh_ttp1)
-    #         computed_cost = compute_cost(future_costs[tt], time_range, xhs[:, tt], uh_tt)
-
-    #         @test manual_cost ≈ computed_cost
-    #     end
-    # end
-
-
-    # @testset "CheckPureFeedbackSatisfiesLQROptimum" begin
-    #     # This is the same LQR problem that MATLAB has as an example - dt = 0.02s and it's a double integrator with
-    #     # state [x ẋ] and input control α acceleration. We've added a constrant drift of +1 m and +1 m/s in dynamics.
-    #     dt = 0.02
-    #     lin_dyn = LinearDynamics([1. dt; 0 1.], [[0.; dt][:,:]]; a=ones(2))
-    #     q_cost = QuadraticCost([1. 0.; 0. 0.], zeros(2), 1.)
-    #     add_control_cost!(q_cost, 1, ones(1, 1); r=zeros(1), cr=2.)
-
-    #     # lin_dyn = LinearDynamics([1. dt; 0 1.], [[0.; dt][:,:]])
-    #     # q_cost = QuadraticCost([1. 0.; 0. 0.])
-    #     # add_control_cost!(q_cost, 1, ones(1, 1))
-    #     new_x₁ = ones(2)
-
-    #     q_cost = quadraticize_costs(q_cost, dummy_time_range, dummy_x, dummy_us)
-
-    #     strategy, _ = solve_lqr_feedback(lin_dyn, q_cost, horizon)
-    #     strategy = FeedbackGainControlStrategy([strategy])
-    #     xs, us = unroll_feedback(lin_dyn, times, strategy, new_x₁)
-    #     eval_cost = evaluate(q_cost, xs, us)
-
-    #     # Perturb each strategy a little bit and confirm that cost only
-    #     # increases for that player.
-    #     ϵ = 1e-1
-    #     for tt in 1:horizon
-    #         P̃s = deepcopy(get_linear_feedback_gains(strategy, 1))
-    #         p̃s = deepcopy(get_constant_feedback_gains(strategy, 1))
-    #         P̃s[:, :, tt] += ϵ * randn(udim(lin_dyn, 1), xhdim(lin_dyn))
-    #         # p̃s[:, tt] += ϵ * randn(udim(lin_dyn, 1))
-
-    #         x̃s, ũs = unroll_feedback(lin_dyn, times, FeedbackGainControlStrategy([P̃s], [p̃s]), new_x₁)
-    #         new_eval_cost = evaluate(q_cost, x̃s, ũs)
-    #         @test new_eval_cost ≥ eval_cost
-    #     end
-    # end
-
     # Ensure that the costs match up at each time step with manually calculate cost matrices.
-    # @testset "CheckPureLQRCostsAreConsistentAtEquilibrium" begin
-    #     # This is the same LQR problem that MATLAB has as an example - dt = 0.02s and it's a double integrator with
-    #     # state [x ẋ] and input control α acceleration.
-    #     dt = 0.02
-    #     lin_dyn = LinearDynamics([1. dt; 0 1.], [[0.; dt][:,:]]; a=ones(2))
-    #     q_cost = QuadraticCost([1. 0.; 0. 0.])
-    #     add_control_cost!(q_cost, 1, ones(1, 1))
-    #     new_x₁ = ones(2)
+    @testset "CheckLQRCostsAreConsistentAtEquilibrium" begin
+        # This is the same LQR problem that MATLAB has as an example - dt = 0.02s and it's a double integrator with
+        # state [x ẋ] and input control α acceleration.
+        dt = 0.02
+        times_lqr = dt * (cumsum(ones(horizon)) .- 1.)
+        lin_dyn = LinearDynamics([1. dt; 0 1.], [[0.; dt][:,:]]; a=dt*ones(2))
+        q_cost = QuadraticCost([1. 0.; 0. 0.], zeros(2), 1.)
+        add_control_cost!(q_cost, 1, ones(1, 1); r=zeros(1), cr=2.)
+        new_x₁ = ones(2)
 
-    #     q_cost = quadraticize_costs(q_cost, dummy_time_range, dummy_x, dummy_us)
+        strategy, future_costs = solve_lqr_feedback(lin_dyn, q_cost, horizon)
+        xs, us = unroll_feedback(lin_dyn, times_lqr, strategy, new_x₁)
+        eval_cost = evaluate(q_cost, xs, us)
 
-    #     strategy, future_costs = solve_lqr_feedback(lin_dyn, q_cost, horizon)
-    #     xs, us = unroll_feedback(lin_dyn, times, strategy, new_x₁)
-    #     eval_cost = evaluate(q_cost, xs, us)
+        # Compute the costs using the t+1 cost matrix and compare with the cost using the cost matrix at time t.
+        num_players = num_agents(lin_dyn)
 
-    #     # Compute the costs using the t+1 cost matrix and compare with the cost using the cost matrix at time t.
-    #     num_players = num_agents(lin_dyn)
+        # Homgenize states and controls.
+        # xhs = homogenize_vector(xs)
 
-    #     # Homgenize states and controls.
-    #     xhs = homogenize_vector(xs)
+        for tt in 1:horizon-1
+            time_range = (tt, tt+1)
 
-    #     for tt in 1:horizon-1
-    #         time_range = (tt, tt+1)
-
-    #         u_tt = us[1][:, tt]
-    #         uh_tt = homogenize_ctrls(lin_dyn, [u_tt])
+            u_tt = us[1][:, tt]
+            u_ttp1 = us[1][:, tt+1]
 
-    #         u_ttp1 = us[1][:, tt+1]
-    #         uh_ttp1 = homogenize_ctrls(lin_dyn, [u_ttp1])
+            # TODO(hamzah) Fix discrepancy in extra cost in quad cost.
 
-    #         # TODO(hamzah) Fix discrepancy in extra cost in quad cost.
+            # Manual cost is formed by the sum of the current state/ctrls costs and the future costs.
+            manual_cost = compute_cost(q_cost, time_range, xs[:, tt], [u_tt])
+            manual_cost += compute_cost(future_costs[tt+1], time_range, xs[:, tt+1], [u_ttp1])
+            computed_cost = compute_cost(future_costs[tt], time_range, xs[:, tt], [u_tt])
 
-    #         # Manual cost is formed by the sum of the current state/ctrls costs and the future costs.
-    #         manual_cost = compute_cost(q_cost, time_range, xhs[:, tt], uh_tt)
-    #         println(tt, " - manual 0 -> ", manual_cost)
-    #         manual_cost += compute_cost(future_costs[tt+1], time_range, xhs[:, tt+1], uh_ttp1)
-    #         println("manual -> ", manual_cost)
-    #         computed_cost = compute_cost(future_costs[tt], time_range, xhs[:, tt], uh_tt)
-    #         println(computed_cost)
-    #         @test manual_cost ≈ computed_cost
-    #     end
-    # end
+            @test manual_cost ≈ computed_cost
+        end
+    end
 
     # # Ensure that the feedback solution satisfies Nash conditions of optimality
     # # for each player, holding others' strategies fixed.
     # # Note: This test, as formulated, allows some false positive cases. See Basar and Olsder (Eq. 3.22) for the exact
     # #       conditions.
     # @testset "CheckFeedbackSatisfiesNash" begin
-    #     Ps, _ = solve_lq_nash_feedback(dyn, costs, horizon)
-    #     xs, us = unroll_feedback(dyn, times, FeedbackGainControlStrategy(Ps), x₁)
+    #     ctrl_strat_Ps, _ = solve_lq_nash_feedback(dyn, costs, horizon)
+    #     xs, us = unroll_feedback(dyn, times, ctrl_strat_Ps, x₁)
     #     nash_costs = [evaluate(c, xs, us) for c in costs]
 
     #     # Perturb each strategy a little bit and confirm that cost only
@@ -269,55 +124,53 @@ seed!(0)
     #     ϵ = 1e-1
     #     for ii in 1:2
     #         for tt in 1:horizon
-    #             P̃s = deepcopy(Ps)
-    #             P̃s[ii][:, :, tt] += ϵ * randn(udim(dyn, ii), xhdim(dyn))
-    #             x̃s, ũs = unroll_feedback(dyn, times, FeedbackGainControlStrategy(P̃s), x₁)
+    #             ctrl_strat_P̃s = deepcopy(ctrl_strat_Ps)
+    #             ctrl_strat_P̃s.Ps[ii][:, :, tt] += ϵ * randn(udim(dyn, ii), xdim(dyn))
+    #             ctrl_strat_P̃s.ps[ii][:, tt] += ϵ * randn(udim(dyn, ii))
+
+    #             x̃s, ũs = unroll_feedback(dyn, times, ctrl_strat_P̃s, x₁)
     #             new_nash_costs = [evaluate(c, x̃s, ũs) for c in costs]
     #             @test new_nash_costs[ii] ≥ nash_costs[ii]
     #         end
     #     end
     # end
 
+
     # # Ensure that the costs match up at each time step with manually calculate cost matrices.
     # @testset "CheckNashCostsAreConsistentAtEquilibrium" begin
-    #     Ps, future_costs = solve_lq_nash_feedback(dyn, costs, horizon)
-    #     xs, us = unroll_feedback(dyn, times, FeedbackGainControlStrategy(Ps), x₁)
+    #     ctrl_strat_Ps, future_costs = solve_lq_nash_feedback(dyn, costs, horizon)
+    #     xs, us = unroll_feedback(dyn, times, ctrl_strat_Ps, x₁)
 
     #     # Compute the costs using the t+1 cost matrix and compare with the cost using the cost matrix at time t.
     #     num_players = num_agents(dyn)
 
-    #     # Homgenize states and controls.
-    #     xhs = homogenize_vector(xs)
-
     #     for ii in 1:2
     #         for tt in 1:horizon-1
     #             time_range = (tt, tt+1)
 
     #             u_tt = [us[ii][:, tt] for ii in 1:num_players]
-    #             uh_tt = homogenize_ctrls(dyn, u_tt)
-
     #             u_ttp1 = [us[ii][:, tt+1] for ii in 1:num_players]
-    #             uh_ttp1 = homogenize_ctrls(dyn, u_ttp1)
 
     #             # TODO(hamzah) Fix discrepancy in extra cost in quad cost.
 
     #             # Manual cost is formed by the sum of the current state/ctrls costs and the future costs.
-    #             manual_cost = compute_cost(costs[ii], time_range, xhs[:, tt], uh_tt)
-    #             manual_cost += compute_cost(future_costs[ii][tt+1], time_range, xhs[:, tt+1], uh_ttp1)
-    #             computed_cost = compute_cost(future_costs[ii][tt], time_range, xhs[:, tt], uh_tt)
+    #             manual_cost = compute_cost(costs[ii], time_range, xs[:, tt], u_tt)
+    #             manual_cost += compute_cost(future_costs[ii][tt+1], time_range, xs[:, tt+1], u_ttp1)
+    #             computed_cost = compute_cost(future_costs[ii][tt], time_range, xs[:, tt], u_tt)
+
     #             @test manual_cost ≈ computed_cost
     #         end
     #     end
     # end
 
 
-    # Ensure that the feedback solution satisfies Stackelberg conditions of optimality
-    # for player 1, holding others' strategies fixed.
-    # Note: This test, as formulated, allows some false positive cases. See Khan and Fridovich-Keil 2023 for the exact
-    #       conditions.
+    # # Ensure that the feedback solution satisfies Stackelberg conditions of optimality
+    # # for player 1, holding others' strategies fixed.
+    # # Note: This test, as formulated, allows some false positive cases. See Khan and Fridovich-Keil 2023 for the exact
+    # #       conditions.
     # @testset "CheckFeedbackSatisfiesStackelbergEquilibriumForLeader" begin
-    #     Ss, future_costs = solve_lq_stackelberg_feedback(dyn, costs, horizon, stackelberg_leader_idx)
-    #     xs, us = unroll_feedback(dyn, times, FeedbackGainControlStrategy(Ss), x₁)
+    #     ctrl_strat_Ss, future_costs = solve_lq_stackelberg_feedback(dyn, costs, horizon, stackelberg_leader_idx)
+    #     xs, us = unroll_feedback(dyn, times, ctrl_strat_Ss, x₁)
     #     optimal_stackelberg_costs = [evaluate(c, xs, us) for c in costs]
 
     #     # Define some useful constants.
@@ -326,15 +179,15 @@ seed!(0)
     #     follower_idx = 3 - stackelberg_leader_idx
     #     num_players = num_agents(dyn)
 
-    #     # Homgenize states and controls.
-    #     xhs = homogenize_vector(xs)
-
     #     for tt in horizon-1:-1:1
     #         time_range = (tt, tt+1)
 
     #         # Copy the things we will alter.
     #         ũs = deepcopy(us)
 
+    #         # In this test, we need to solve the "optimal control" problem for the second player given player 1's
+    #         # control.
+
     #         # Perturb the leader input u1 at the current time.
     #         ũs[leader_idx][:, tt] += ϵ * randn(udim(dyn, leader_idx))
     #         ũhs = homogenize_ctrls(dyn, ũs)
@@ -345,27 +198,29 @@ seed!(0)
     #         L₂_ttp1 = get_homogenized_state_cost_matrix(future_costs[follower_idx][tt+1])
     #         G = get_homogenized_control_cost_matrix(costs[follower_idx], follower_idx) + B₂' * L₂_ttp1 * B₂
 
+    #         # Homogenize states and controls.
+    #         xhs = homogenize_vector(xs)
+
     #         B₁ = get_homogenized_control_dynamics_matrix(dyn, leader_idx)
     #         ũh1ₜ = ũhs[leader_idx][:, tt]
     #         ũh2ₜ = - G \ (B₂' * L₂_ttp1 * (A * xhs[:,tt] + B₁ * ũh1ₜ))
-    #         ũh_tt = [ũh1ₜ, ũh2ₜ]
+    #         ũ_tt = [ũh1ₜ[1:udim(dyn, 1)], ũh2ₜ[1:udim(dyn, 2)]]
 
     #         ũhs[follower_idx][:, tt] = ũh2ₜ
 
     #         # The cost of the solution trajectory, computed as x_t^T * L^1_tt x_t for at time tt.
     #         # We test the accuracy of this cost in `CheckStackelbergCostsAreConsistentAtEquilibrium`.
-    #         opt_P1_cost = compute_cost(future_costs[leader_idx][tt], time_range, xhs[:, tt], ũh_tt)
+    #         opt_P1_cost = compute_cost(future_costs[leader_idx][tt], time_range, xs[:, tt], ũ_tt)
 
     #         # Compute the homogenized controls for time tt+1.
-    #         uh_ttp1 = [ũhs[ii][:, tt+1] for ii in 1:num_players]
+    #         u_ttp1 = [ũhs[ii][:, tt+1][1:udim(dyn, ii)] for ii in 1:num_players]
 
     #         # The cost computed manually for perturbed inputs using
     #         # x_t^T Q_t x_t^T + ... + <control costs> + ... + x_{t+1}^T * L^1_{t+1} x_{t+1}.
-    #         state_and_controls_cost = compute_cost(costs[leader_idx], time_range, xhs[:, tt], ũh_tt)
+    #         state_and_controls_cost = compute_cost(costs[leader_idx], time_range, xs[:, tt], ũ_tt)
     #         ũ_tt = [ũs[ii][:, tt] for ii in 1:num_players]
-    #         xhₜ₊₁ = propagate_dynamics(dyn, time_range, xs[:, tt], ũ_tt)
-    #         x̃hₜ₊₁ = homogenize_vector(xhₜ₊₁)
-    #         future_cost = compute_cost(future_costs[leader_idx][tt+1], time_range, x̃hₜ₊₁, uh_ttp1)
+    #         xₜ₊₁ = propagate_dynamics(dyn, time_range, xs[:, tt], ũ_tt)
+    #         future_cost = compute_cost(future_costs[leader_idx][tt+1], time_range, xₜ₊₁, u_ttp1)
     #         new_P1_cost = state_and_controls_cost + future_cost
 
     #         # The costs from time t+1 of the perturbed and optimal trajectories should also satisfy this condition.
@@ -384,8 +239,8 @@ seed!(0)
     # # Ensure that the feedback solution satisfies Stackelberg conditions of optimality
     # # for player 2, holding others' strategies fixed.
     # @testset "CheckFeedbackSatisfiesStackelbergEquilibriumForFollower" begin
-    #     Ss, Ls = solve_lq_stackelberg_feedback(dyn, costs, horizon, stackelberg_leader_idx)
-    #     xs, us = unroll_feedback(dyn, times, FeedbackGainControlStrategy(Ss), x₁)
+    #     ctrl_strat_Ss, future_costs = solve_lq_stackelberg_feedback(dyn, costs, horizon, stackelberg_leader_idx)
+    #     xs, us = unroll_feedback(dyn, times, ctrl_strat_Ss, x₁)
     #     optimal_stackelberg_costs = [evaluate(c, xs, us) for c in costs]
 
     #     # Define some useful constants.
@@ -398,10 +253,11 @@ seed!(0)
     #     leader_idx = 1
     #     follower_idx = 2
     #     for tt in 1:horizon-1
-    #         P̃s = deepcopy(Ss)
-    #         P̃s[follower_idx][:, :, tt] += ϵ * randn(udim(dyn, follower_idx), xhdim(dyn))
+    #         ctrl_strat_S̃s = deepcopy(ctrl_strat_Ss)
+    #         ctrl_strat_S̃s.Ps[follower_idx][:, :, tt] += ϵ * randn(udim(dyn, follower_idx), xdim(dyn))
+    #         ctrl_strat_S̃s.ps[follower_idx][:, tt] += ϵ * randn(udim(dyn, follower_idx))
 
-    #         x̃s, ũs = unroll_feedback(dyn, times, FeedbackGainControlStrategy(P̃s), x₁)
+    #         x̃s, ũs = unroll_feedback(dyn, times, ctrl_strat_S̃s, x₁)
     #         new_stack_costs = [evaluate(c, x̃s, ũs) for c in costs]
     #         @test new_stack_costs[follower_idx] ≥ optimal_stackelberg_costs[follower_idx]
     #     end
@@ -410,8 +266,8 @@ seed!(0)
 
     # # Ensure that the costs match up at each time step with manually calculate cost matrices.
     # @testset "CheckStackelbergCostsAreConsistentAtEquilibrium" begin
-    #     Ss, future_costs = solve_lq_stackelberg_feedback(dyn, costs, horizon, stackelberg_leader_idx)
-    #     xs, us = unroll_feedback(dyn, times, FeedbackGainControlStrategy(Ss), x₁)
+    #     ctrl_strat_Ss, future_costs = solve_lq_stackelberg_feedback(dyn, costs, horizon, stackelberg_leader_idx)
+    #     xs, us = unroll_feedback(dyn, times, ctrl_strat_Ss, x₁)
 
     #     # For each player, compute the costs using the t+1 cost matrix and compare with the cost using the cost matrix
     #     # at time t.
@@ -419,26 +275,23 @@ seed!(0)
     #     # Compute the costs using the t+1 cost matrix and compare with the cost using the cost matrix at time t.
     #     num_players = num_agents(dyn)
 
-    #     # Homgenize states and controls.
-    #     xhs = homogenize_vector(xs)
-
     #     for ii in 1:2
     #         jj = 3 - ii
     #         for tt in 1:horizon-1
     #             time_range = (tt, tt+1)
 
     #             u_tt = [us[ii][:, tt] for ii in 1:num_players]
-    #             uh_tt = homogenize_ctrls(dyn, u_tt)
+    #             # uh_tt = homogenize_ctrls(dyn, u_tt)
 
     #             u_ttp1 = [us[ii][:, tt+1] for ii in 1:num_players]
-    #             uh_ttp1 = homogenize_ctrls(dyn, u_ttp1)
+    #             # uh_ttp1 = homogenize_ctrls(dyn, u_ttp1)
 
     #             # TODO(hamzah) Fix discrepancy in extra cost in quad cost.
-    #             state_and_control_costs = compute_cost(costs[ii], time_range, xhs[:, tt], uh_tt)
-    #             future_cost = compute_cost(future_costs[ii][tt+1], time_range, xhs[:, tt+1], uh_ttp1)
+    #             state_and_control_costs = compute_cost(costs[ii], time_range, xs[:, tt], u_tt)
+    #             future_cost = compute_cost(future_costs[ii][tt+1], time_range, xs[:, tt+1], u_ttp1)
 
     #             manual_cost = state_and_control_costs + future_cost
-    #             computed_cost = compute_cost(future_costs[ii][tt], time_range, xhs[:, tt], uh_tt)
+    #             computed_cost = compute_cost(future_costs[ii][tt], time_range, xs[:, tt], u_tt)
 
     #             # The manually recursion at time t should match the computed L cost at time t.
     #             @test manual_cost ≈ computed_cost