Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[in progress] Implement a derived LQR Solver and add tests to validate the impls. #60

Open
wants to merge 6 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions src/StackelbergControlHypothesesFiltering.jl
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@ include("solvers/LQNashFeedbackSolver.jl")
include("solvers/LQStackelbergFeedbackSolver.jl")
include("solvers/LQRFeedbackSolver.jl")

include("solvers/NewLQRFeedbackSolver.jl")

include("TwoStateParticleFilter.jl")

end # module
1 change: 1 addition & 0 deletions src/control_strategies/FeedbackGainControlStrategy.jl
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ function apply_control_strategy(tt::Int, strategy::FeedbackGainControlStrategy,
end


# Define some getters.
function get_linear_feedback_gains(strategy::FeedbackGainControlStrategy)
return strategy.Ps
end
Expand Down
2 changes: 1 addition & 1 deletion src/dynamics/LinearDynamics.jl
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ function propagate_dynamics(dyn::LinearDynamics,

# Incorporate the dynamics based on the state and the controls.
dt = time_range[2] - time_range[1]
x_next = dyn.A * x + dyn.a * dt + sum(dyn.Bs[ii] * us[ii] for ii in 1:N)
x_next = dyn.A * x + dyn.a + sum(dyn.Bs[ii] * us[ii] for ii in 1:N)

if dyn.D != nothing && v != nothing
x_next += dyn.D * v
Expand Down
20 changes: 15 additions & 5 deletions src/solvers/LQRFeedbackSolver.jl
Original file line number Diff line number Diff line change
Expand Up @@ -36,9 +36,9 @@ function solve_lqr_feedback(dyns::AbstractVector{LinearDynamics}, costs::Abstrac
Zs[:, :, horizon] = Zₜ₊₁

# base case
if horizon == 1
return Ps
end
# if horizon == 1
# return Ps
# end

# At each horizon running backwards, solve the LQR problem inductively.
for tt in horizon:-1:1
Expand All @@ -50,13 +50,23 @@ function solve_lqr_feedback(dyns::AbstractVector{LinearDynamics}, costs::Abstrac

# Solve the LQR using induction and optimizing the quadratic cost for P and Z.
r_terms = R + B' * Zₜ₊₁ * B
# println(tt, " - old r_terms", r_terms)

# This is equivalent to inv(r_terms) * B' * Zₜ₊₁ * A
Ps[:, :, tt] = r_terms \ B' * Zₜ₊₁ * A
# println(tt, " - old divider, ", B' * Zₜ₊₁ * A)
println(tt, " - old P: ", Ps[1, 1:2, tt])
println(tt, " - old p: ", Ps[1, 3, tt])

# Update Zₜ₊₁ at t+1 to be the one at t as we go to t-1.
Zₜ₊₁ = Q + A' * Zₜ₊₁ * A - A' * Zₜ₊₁ * B * Ps[:, :, tt]
Zs[:, :, tt] = Zₜ₊₁
Zₜ₊₁ = Q + A' * Zₜ₊₁ * (A - B * Ps[:, :, tt])
# println(tt, " - old Zₜ: ", Zₜ₊₁, Zₜ₊₁')
# Zₜ₊₁ = (Zₜ₊₁ + Zₜ₊₁')/2.
# @assert Zₜ₊₁ == Zₜ₊₁'
println(tt, " - old Z: ", Zₜ₊₁[1:2, 1:2])
println(tt, " - old z: ", Zₜ₊₁[1:2, 3], " ", Zₜ₊₁[3, 1:2]', " ", Zₜ₊₁[1:2, 3] == Zₜ₊₁[3, 1:2]')
println(tt, " - old cz: ", Zₜ₊₁[3, 3])
Zs[:, :, tt] = Zₜ₊₁ # 0.5 * (Zₜ₊₁ + Zₜ₊₁')
end

# Cut off the extra dimension of the homgenized coordinates system.
Expand Down
127 changes: 127 additions & 0 deletions src/solvers/NewLQRFeedbackSolver.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,127 @@
using LinearAlgebra

# Solve a finite horizon, discrete time LQR problem.
# Returns feedback matrices P[:, :, time].

# Shorthand function for LQ time-invariant dynamics and costs.
function solve_new_lqr_feedback(dyn::LinearDynamics, cost::QuadraticCost, horizon::Int)
dyns = [dyn for _ in 1:horizon]
costs = [cost for _ in 1:horizon]
return solve_new_lqr_feedback(dyns, costs, horizon)
end

function solve_new_lqr_feedback(dyn::LinearDynamics, costs::AbstractVector{QuadraticCost}, horizon::Int)
dyns = [dyn for _ in 1:horizon]
return solve_new_lqr_feedback(dyns, costs, horizon)
end

function solve_new_lqr_feedback(dyns::AbstractVector{LinearDynamics}, cost::QuadraticCost, horizon::Int)
costs = [cost for _ in 1:horizon]
return solve_new_lqr_feedback(dyns, costs, horizon)
end

function solve_new_lqr_feedback(dyns::AbstractVector{LinearDynamics}, costs::AbstractVector{QuadraticCost}, horizon::Int)

# Ensure the number of dynamics and costs are the same as the horizon.
@assert !isempty(dyns) && size(dyns, 1) == horizon
@assert !isempty(costs) && size(costs, 1) == horizon

# Note: There should only be one "player" for an LQR problem.
num_states = xdim(dyns[1])
num_ctrls = udim(dyns[1], 1)

Ps = zeros((num_ctrls, num_states, horizon))
ps = zeros((num_ctrls, horizon))
Zs = zeros((num_states, num_states, horizon))
zs = zeros((num_states, horizon))
czs = zeros(horizon)

Zₜ₊₁ = get_quadratic_state_cost_term(costs[horizon])
zₜ₊₁ = get_linear_state_cost_term(costs[horizon])
czₜ₊₁ = get_constant_state_cost_term(costs[horizon])
Zs[:, :, horizon] = Zₜ₊₁

# base case
# if horizon == 1
# return Ps, costs[horizon]
# end

# At each horizon running backwards, solve the LQR problem inductively.
for tt in horizon:-1:1
A = get_linear_state_dynamics(dyns[tt])
a = get_constant_state_dynamics(dyns[tt])
B = get_control_dynamics(dyns[tt], 1)

Q = get_quadratic_state_cost_term(costs[tt])
q = get_linear_state_cost_term(costs[tt])
cq = get_constant_state_cost_term(costs[tt])
R = get_quadratic_control_cost_term(costs[tt], 1)
r = get_linear_control_cost_term(costs[tt], 1)
cr = get_constant_control_cost_term(costs[tt], 1)

# Solve the LQR using induction and optimizing the quadratic cost for P and Z.
r_terms = R + B' * Zₜ₊₁ * B

# This solves a constrained system to identify the feedback gain and constant gain.
lhs = vcat(hcat(r_terms, r), hcat(r', cr))
rhs = vcat(hcat(B' * Zₜ₊₁ * A, B' * (Zₜ₊₁ * a + zₜ₊₁)), zeros(1, num_states+1))
Pp = lhs \ rhs
# println(Pp)
# Ps[:, :, tt] = r_terms \ (B' * Zₜ₊₁ * A)
Ps[:, :, tt] = Pp[1:num_ctrls, 1:num_states][:,:]
ps[:, tt] = Pp[1:num_ctrls, num_states+1]
# = r' \ (B' * zₜ₊₁ + B' * Zₜ₊₁ * a)
# r_terms \ (r + B' * zₜ₊₁ + B' * Zₜ₊₁ * a)
# r_terms \ (B' * zₜ₊₁ + r + B' * Zₜ₊₁ * a)


# println(tt, " - new r_terms", r_terms)
# println(tt, " - new divider 1: ", B' * Zₜ₊₁ * A)
# println(tt, " - new divider 2: ", r + B' * zₜ₊₁ + B' * Zₜ₊₁ * a)

# Update feedback and cost terms.
Pₜ = Ps[:, :, tt]
pₜ = ps[:, tt]
println(tt, " - new P: ", Pₜ)
println(tt, " - new p: ", pₜ)
Zₜ = Q + Pₜ' * R * Pₜ + (A - B * Pₜ)' * Zₜ₊₁ * (A - B * Pₜ)
# println(tt, " - new Zₜ: ", Zₜ, Zₜ')
# Zₜ = (Zₜ + Zₜ')/2.
# @assert Zₜ == Zₜ'
zₜ = q
zₜ += Pₜ' * (R * pₜ - r)
zₜ += (A - B * Pₜ)' * Zₜ₊₁ * (a - B * pₜ)
zₜ += (A - B * Pₜ)' * zₜ₊₁
czₜ = czₜ₊₁ + cq
println(tt, " - new cz 1: ", czₜ₊₁ + cq)
czₜ += pₜ' * (R * pₜ - 2 * r)
println(tt, " - new cz 2: ", pₜ' * (R * pₜ - 2 * r))
czₜ += ((a - B * pₜ)' * Zₜ₊₁ + 2 * zₜ₊₁') * (a - B * pₜ)
println(tt, " - new cz 3: ", ((a - B * pₜ)' * Zₜ₊₁ + 2 * zₜ₊₁') * (a - B * pₜ))
println(tt, " - new Z: ", Zₜ)
println(tt, " - new z: ", zₜ)
println(tt, " - new cz: ", czₜ)

# zₜ = Pₜ' * R * pₜ + (A - B * Pₜ)' * (zₜ₊₁ + Zₜ₊₁ * (a - B * pₜ) )
# czₜ = pₜ' * (R + B' * Zₜ₊₁ * B) * pₜ - 2 * zₜ₊₁' * B * pₜ
# czₜ += 2 * (zₜ₊₁' - pₜ' * B' * Zₜ₊₁) * a + a' * Zₜ₊₁ * a

# czₜ = pₜ' * (R + B' * Zₜ₊₁ * B) * pₜ
# czₜ -= 2 * zₜ₊₁' * B * pₜ
# czₜ += (2 * zₜ₊₁ + Zₜ₊₁ * (a - B * pₜ))' * a

Zs[:, :, tt] = Zₜ # 0.5 * (Zₜ' + Zₜ)
zs[:, tt] = zₜ
czs[tt] = czₜ₊₁

Zₜ₊₁ = Zₜ
zₜ₊₁ = zₜ
czₜ₊₁ = czₜ
end

Ps_feedback_strategies = FeedbackGainControlStrategy([Ps], [ps])
Zs_future_costs = [QuadraticCost(Zs[:, :, tt], zs[:, tt], czs[tt]) for tt in 1:horizon]
return Ps_feedback_strategies, Zs_future_costs
end

export solve_new_lqr_feedback
Loading