diff --git a/docs/src/tutorials/algorithms/benders_decomposition.jl b/docs/src/tutorials/algorithms/benders_decomposition.jl index 274e61d3464..cc3eb6d7b3e 100644 --- a/docs/src/tutorials/algorithms/benders_decomposition.jl +++ b/docs/src/tutorials/algorithms/benders_decomposition.jl @@ -27,6 +27,7 @@ using JuMP import GLPK +import HiGHS import Printf import Test #src @@ -35,19 +36,22 @@ import Test #src # Benders decomposition is a useful algorithm for solving convex optimization # problems with a large number of variables. It works best when a larger problem # can be decomposed into two (or more) smaller problems that are individually -# much easier to solve. This tutorial demonstrates Benders decomposition on the -# following mixed-integer linear program: +# much easier to solve. +# +# This tutorial demonstrates Benders decomposition on the following +# mixed-integer linear program: # ```math # \begin{aligned} -# \text{min} \ & c_1^\top x + c_2^\top y \\ -# \text{subject to} \ & A_1 x + A_2 y \le b \\ -# & x \ge 0 \\ -# & y \ge 0 \\ -# & x \in \mathbb{Z}^n +# \text{min} \ & c_1(x) + c_2(y) \\ +# \text{subject to} \ & f_1(x) \in S_1 \\ +# & f_2(y) \in S_2 \\ +# & f_3(x, y) \in S_3 \\ +# & x \in \mathbb{Z}^m \\ +# & y \in \mathbb{R}^n \\ # \end{aligned} # ``` -# where $b \in \mathbb{R}^m$, $A_1 \in \mathbb{R}^{m \times n}$, -# $A_2 \in \mathbb{R}^{m \times p}$ and $\mathbb{Z}$ is the set of integers. +# where the functions $f$ and $c$ are linear, and the sets $S$ are inequality +# sets like $\ge l$, $\le u$, or $= b$. # # Any mixed integer programming problem can be written in the form above. @@ -61,27 +65,28 @@ import Test #src # solution for ``y`` by solving: # ```math # \begin{aligned} -# V_2(\bar{x}) = & \text{min} \ & c_2^\top y \\ -# & \text{subject to} \ & A_1 x + A_2 y \le b \\ -# & & x = \bar{x} & \quad [\pi] \\ -# & & y \ge 0, +# V_2(\bar{x}) = & \text{min} \ & c_2(y)\\ +# & \text{subject to} \ & f_2(y) \in S_2 \\ +# & & f_3(x, y) \in S_3 \\ +# & & x = \bar{x} \\ +# & & y \in \mathbb{R}^n \\ # \end{aligned} # ``` # Note that we have included a "copy" of the `x` variable to simplify computing # the dual of $V_2$ with respect to $\bar{x}$. Because this is a linear program, # it is easy to solve. -# Replacing the ``c_2^\top y`` component of the objective in our original -# problem with ``V_2`` yields: +# Replacing the ``c_2(y)`` component of the objective in our original problem +# with ``V_2`` yields: # ```math # \begin{aligned} -# \text{min} \ & c_1^\top x + V_2(x) \\ -# \text{subject to} \ & x \ge 0 \\ -# & x \in \mathbb{Z}^n +# V_1 = \text{min} \ & c_1(x) + V_2(x) \\ +# \text{subject to} \ & f_1(x) \in S_1 \\ +# & x \in \mathbb{Z}^m. # \end{aligned} # ``` -# This problem looks a lot simpler to solve, but we need to do something else -# with ``V_2`` first. +# This problem looks a lot simpler to solve because it involves only $x$ and a +# subset of the constraints, but we need to do something else with ``V_2`` first. # Because ``\bar{x}`` is a constant that appears on the right-hand side of the # constraints, ``V_2`` is a convex function with respect to ``\bar{x}``, and the @@ -100,11 +105,11 @@ import Test #src # a new decision variable ``\theta``, and approximates it from below using cuts: # ```math # \begin{aligned} -# V_1^K = & \text{min} \ & c_1^\top x + \theta \\ -# & \text{subject to} \ & x \ge 0 \\ -# & \ & x \in \mathbb{Z}^n \\ -# & \ & \theta \ge M \\ -# & \ & \theta \ge V_2(x_k) + \pi_k^\top(x - x_k) & \quad \forall k = 1,\ldots,K. +# V_1^K = & \text{min} \ & c_1(x) + \theta \\ +# & \text{subject to} \ & f_1(x) \in S_1 \\ +# & & x \in \mathbb{Z}^m \\ +# & & \theta \ge M \\ +# & & \theta \ge V_2(x_k) + \pi_k^\top(x - x_k) & \quad \forall k = 1,\ldots,K. # \end{aligned} # ``` # This integer program is called the _first-stage_ subproblem. @@ -119,53 +124,91 @@ import Test #src # Due to convexity, we know that ``V_2(x) \ge \theta`` for all ``x``. Therefore, # the optimal objective value of ``V_1^K`` provides a valid _lower_ bound on the # objective value of the full problem. In addition, if we take a feasible -# solution for ``x`` from the first-stage problem, then ``c_1^\top x + V_2(x)`` +# solution for ``x`` from the first-stage problem, then ``c_1(x) + V_2(x)`` # is a valid _upper_ bound on the objective value of the full problem. # Benders decomposition uses the lower and upper bounds to determine when it has # found the global optimal solution. -# ## Input data +# ## Monolithic problem + +# As an example problem, we consider the following variant of +# [The max-flow problem](@ref), in which there is a binary variable to decide +# whether to open each arc for a cost of 0.1 unit, and we can open at most 11 +# arcs: + +G = [ + 0 3 2 2 0 0 0 0 + 0 0 0 0 5 1 0 0 + 0 0 0 0 1 3 1 0 + 0 0 0 0 0 1 0 0 + 0 0 0 0 0 0 0 4 + 0 0 0 0 0 0 0 2 + 0 0 0 0 0 0 0 4 + 0 0 0 0 0 0 0 0 +] +n = size(G, 1) +model = Model(HiGHS.Optimizer) +set_silent(model) +@variable(model, x[1:n, 1:n], Bin) +@variable(model, y[1:n, 1:n] >= 0) +@constraint(model, sum(x) <= 11) +@constraint(model, [i = 1:n, j = 1:n], y[i, j] <= G[i, j] * x[i, j]) +@constraint(model, [i = 2:n-1], sum(y[i, :]) == sum(y[:, i])) +@objective(model, Min, 0.1 * sum(x) - sum(y[1, :])) +optimize!(model) +@assert is_solved_and_feasible(model) #src +solution_summary(model) + +# The optimal objective value is -5.1: -# As an example for this tutorial, we use the input data is from page 139 of -# Garfinkel, R. & Nemhauser, G. L. Integer programming. (Wiley, 1972). +@assert isapprox(objective_value(model), -5.1; atol = 1e-4) #src +objective_value(model) -c_1 = [1, 4] -c_2 = [2, 3] -dim_x = length(c_1) -dim_y = length(c_2) -b = [-2; -3] -A_1 = [1 -3; -1 -3] -A_2 = [1 -2; -1 -1] -M = -1000; +# and the optimal flows are: + +function optimal_flows(x_sol) + flows = [(i, j) => value(x[i, j]) for i in 1:n for j in 1:n] + return filter!(flow -> last(flow) > 0, flows) +end + +monolithic_solution = optimal_flows(value.(y)) # ## [Iterative method](@id benders_iterative) # !!! warning # This is a basic implementation for pedagogical purposes. We haven't -# discussed Benders feasibility cuts, or any of the computational tricks -# that are required to build a performant implementation for large-scale -# problems. See [In-place iterative method](@ref) for one improvement that -# helps computation time. - -# We start by formulating the first-stage subproblem: - -model = Model(GLPK.Optimizer) -@variable(model, x[1:dim_x] >= 0, Int) +# discussed any of the computational tricks that are required to build a +# performant implementation for large-scale problems. See [In-place iterative method](@ref) +# for one improvement that helps computation time. + +# We start by formulating the first-stage subproblem. It includes the `x` +# variables, and the constraints involving only `x`, and the terms in the +# objective containing only `x`. We also need an initial lower bound on the +# cost-to-go variable `θ`. One valid lower bound is to assume that we do not pay +# for opening arcs, and there is flow all the arcs. + +M = -sum(G) +model = Model(HiGHS.Optimizer) +set_silent(model) +@variable(model, x[1:n, 1:n], Bin) @variable(model, θ >= M) -@objective(model, Min, c_1' * x + θ) -print(model) +@constraint(model, sum(x) <= 11) +@objective(model, Min, 0.1 * sum(x) + θ) +model # For the next step, we need a function that takes a first-stage candidate # solution `x` and returns the optimal solution from the second-stage # subproblem: -function solve_subproblem(x_bar::Vector) - model = Model(GLPK.Optimizer) - @variable(model, x[i in 1:dim_x] == x_bar[i]) - @variable(model, y[1:dim_y] >= 0) - @constraint(model, A_1 * x + A_2 * y .<= b) - @objective(model, Min, c_2' * y) +function solve_subproblem(x_bar) + model = Model(HiGHS.Optimizer) + set_silent(model) + @variable(model, x[i in 1:n, j in 1:n] == x_bar[i, j]) + @variable(model, y[1:n, 1:n] >= 0) + @constraint(model, [i = 1:n, j = 1:n], y[i, j] <= G[i, j] * x[i, j]) + @constraint(model, [i = 2:n-1], sum(y[i, :]) == sum(y[:, i])) + @objective(model, Min, -sum(y[1, :])) optimize!(model) @assert is_solved_and_feasible(model; dual = true) return (obj = objective_value(model), y = value.(y), π = reduced_cost.(x)) @@ -202,29 +245,32 @@ for k in 1:MAXIMUM_ITERATIONS lower_bound = objective_value(model) x_k = value.(x) ret = solve_subproblem(x_k) - upper_bound = c_1' * x_k + ret.obj - gap = (upper_bound - lower_bound) / upper_bound + upper_bound = (objective_value(model) - value(θ)) + ret.obj + gap = abs(upper_bound - lower_bound) / abs(upper_bound) print_iteration(k, lower_bound, upper_bound, gap) if gap < ABSOLUTE_OPTIMALITY_GAP println("Terminating with the optimal solution") break end - cut = @constraint(model, θ >= ret.obj + ret.π' * (x .- x_k)) + cut = @constraint(model, θ >= ret.obj + sum(ret.π .* (x .- x_k))) @info "Adding the cut $(cut)" end -# Finally, we can obtain the optimal solution +# Finally, we can obtain the optimal solution: optimize!(model) @assert is_solved_and_feasible(model) -Test.@test value.(x) == [0.0, 1.0] #src x_optimal = value.(x) +optimal_ret = solve_subproblem(x_optimal) +iterative_solution = optimal_flows(optimal_ret.y) -#- +# which is the same as the monolithic_solution: -optimal_ret = solve_subproblem(x_optimal) -Test.@test optimal_ret.y == [0.0, 0.0] #src -y_optimal = optimal_ret.y +iterative_solution == monolithic_solution + +# and it has the same objective value: + +objective_value(model) # ## Callback method @@ -237,16 +283,19 @@ y_optimal = optimal_ret.y # node of the first-stage MILP at each iteration. # !!! tip +# We use GLPK for this model because HiGHS does not support lazy constraints. # For more information on callbacks, read the page # [Solver-independent callbacks](@ref callbacks_manual). # As before, we construct the same first-stage subproblem: lazy_model = Model(GLPK.Optimizer) -@variable(lazy_model, x[1:dim_x] >= 0, Int) +set_silent(lazy_model) +@variable(lazy_model, x[1:n, 1:n], Bin) @variable(lazy_model, θ >= M) -@objective(lazy_model, Min, c_1' * x + θ) -print(lazy_model) +@constraint(lazy_model, sum(x) <= 11) +@objective(lazy_model, Min, 0.1 * sum(x) + θ) +lazy_model # What differs is that we write a callback function instead of a loop: @@ -263,7 +312,7 @@ function my_callback(cb_data) ret = solve_subproblem(x_k) if θ_k < (ret.obj - 1e-6) ## Only add the constraint if θ_k violates the constraint - cut = @build_constraint(θ >= ret.obj + ret.π' * (x .- x_k)) + cut = @build_constraint(θ >= ret.obj + sum(ret.π .* (x .- x_k))) MOI.submit(lazy_model, MOI.LazyConstraint(cb_data), cut) end return @@ -285,14 +334,14 @@ number_of_subproblem_solves # Finally, we can obtain the optimal solution: -Test.@test value.(x) == [0.0, 1.0] #src x_optimal = value.(x) +optimal_ret = solve_subproblem(x_optimal) +callback_solution = optimal_flows(optimal_ret.y) -#- +# which is the same as the monolithic solution: -optimal_ret = solve_subproblem(x_optimal) -Test.@test optimal_ret.y == [0.0, 0.0] #src -y_optimal = optimal_ret.y +@assert callback_solution == monolithic_solution #src +callback_solution == monolithic_solution # ## In-place iterative method @@ -303,20 +352,23 @@ y_optimal = optimal_ret.y # First, we create our first-stage problem as usual: -model = Model(GLPK.Optimizer) -@variable(model, x[1:dim_x] >= 0, Int) +model = Model(HiGHS.Optimizer) +set_silent(model) +@variable(model, x[1:n, 1:n], Bin) @variable(model, θ >= M) -@objective(model, Min, c_1' * x + θ) -print(model) +@constraint(model, sum(x) <= 11) +@objective(model, Min, 0.1 * sum(x) + θ) +model # Then, instead of building the subproblem in a function, we build it once here: -subproblem = Model(GLPK.Optimizer) -@variable(subproblem, x_copy[1:dim_x]) -@variable(subproblem, y[1:dim_y] >= 0) -@constraint(subproblem, A_1 * x_copy + A_2 * y .<= b) -@objective(subproblem, Min, c_2' * y) -print(subproblem) +subproblem = Model(HiGHS.Optimizer) +set_silent(subproblem) +@variable(subproblem, x_copy[i in 1:n, j in 1:n]) +@variable(subproblem, y[1:n, 1:n] >= 0) +@constraint(subproblem, [i = 1:n, j = 1:n], y[i, j] <= G[i, j] * x_copy[i, j]) +@constraint(subproblem, [i = 2:n-1], sum(y[i, :]) == sum(y[:, i])) +@objective(subproblem, Min, -sum(y[1, :])) # Our function to solve the subproblem is also slightly different. First, we # need to fix the value of the `x_copy` variables to the value of `x` from the @@ -343,14 +395,14 @@ for k in 1:MAXIMUM_ITERATIONS lower_bound = objective_value(model) x_k = value.(x) ret = solve_subproblem(subproblem, x_k) - upper_bound = c_1' * x_k + ret.obj - gap = (upper_bound - lower_bound) / upper_bound + upper_bound = (objective_value(model) - value(θ)) + ret.obj + gap = abs(upper_bound - lower_bound) / abs(upper_bound) print_iteration(k, lower_bound, upper_bound, gap) if gap < ABSOLUTE_OPTIMALITY_GAP println("Terminating with the optimal solution") break end - cut = @constraint(model, θ >= ret.obj + ret.π' * (x .- x_k)) + cut = @constraint(model, θ >= ret.obj + sum(ret.π .* (x .- x_k))) @info "Adding the cut $(cut)" end @@ -358,11 +410,111 @@ end optimize!(model) @assert is_solved_and_feasible(model) -Test.@test value.(x) == [0.0, 1.0] #src x_optimal = value.(x) +optimal_ret = solve_subproblem(subproblem, x_optimal) +inplace_solution = optimal_flows(optimal_ret.y) + +# which is the same as the monolithic solution: + +@assert inplace_solution == monolithic_solution #src +inplace_solution == monolithic_solution + +# ## Feasibility cuts -#- +# So far, we have discussed only Benders optimality cuts. However, for some +# first-stage values of `x`, the subproblem might be infeasible. The solution is +# to add a Benders feasibility cut: +# ```math +# v_k + u_k^\top (x - x_k) \le 0. +# ``` +# where $u_k$ is an dual unbounded ray of the subproblem, and $v_k$ is the +# intercept of the unbounded ray. + +# As a variation of our example which leads to infeasibilities, we add a +# constraint that `sum(y) >= 1`. This means we need a choice of first-stage `x` +# for which at least one unit can flow. +# The first-stage problem remains the same: + +model = Model(HiGHS.Optimizer) +set_silent(model) +@variable(model, x[1:n, 1:n], Bin) +@variable(model, θ >= M) +@constraint(model, sum(x) <= 11) +@objective(model, Min, 0.1 * sum(x) + θ) + +# But the subproblem has a new constraint that `sum(y) >= 1`: + +subproblem = Model(HiGHS.Optimizer) +set_silent(subproblem) +## We need to turn presolve off so that HiGHS will return an infeasibility +## certificate. +set_attribute(subproblem, "presolve", "off") +@variable(subproblem, x_copy[i in 1:n, j in 1:n]) +@variable(subproblem, y[1:n, 1:n] >= 0) +@constraint(subproblem, sum(y) >= 1) # <--- THIS IS NEW +@constraint(subproblem, [i = 1:n, j = 1:n], y[i, j] <= G[i, j] * x_copy[i, j]) +@constraint(subproblem, [i = 2:n-1], sum(y[i, :]) == sum(y[:, i])) +@objective(subproblem, Min, -sum(y[1, :])) + +# The function to solve the subproblem now checks for feasibility, and returns +# the dual objective value and an dual unbounded ray if the subproblem is +# infeasible: + +function solve_subproblem_with_feasibility(model, x) + fix.(model[:x_copy], x) + optimize!(model) + if is_solved_and_feasible(model; dual = true) + return ( + is_feasible = true, + obj = objective_value(model), + y = value.(model[:y]), + π = reduced_cost.(model[:x_copy]), + ) + end + return ( + is_feasible = false, + v = dual_objective_value(model), + u = reduced_cost.(model[:x_copy]), + ) +end + +# Now we're ready to iterate our in-place Benders decomposition: + +println("Iteration Lower Bound Upper Bound Gap") +for k in 1:MAXIMUM_ITERATIONS + optimize!(model) + @assert is_solved_and_feasible(model) + lower_bound = objective_value(model) + x_k = value.(x) + ret = solve_subproblem_with_feasibility(subproblem, x_k) + if ret.is_feasible + ## Benders Optimality Cuts + upper_bound = (objective_value(model) - value(θ)) + ret.obj + gap = abs(upper_bound - lower_bound) / abs(upper_bound) + print_iteration(k, lower_bound, upper_bound, gap) + if gap < ABSOLUTE_OPTIMALITY_GAP + println("Terminating with the optimal solution") + break + end + @constraint(model, θ >= ret.obj + sum(ret.π .* (x .- x_k))) + else + ## Benders Feasibility Cuts + cut = @constraint(model, ret.v + sum(ret.u .* (x .- x_k)) <= 0) + @info "Adding the feasibility cut $(cut)" + end +end + +# Finally, we can obtain the optimal solution: + +optimize!(model) +@assert is_solved_and_feasible(model) +x_optimal = value.(x) optimal_ret = solve_subproblem(subproblem, x_optimal) -Test.@test optimal_ret.y == [0.0, 0.0] #src -y_optimal = optimal_ret.y +feasible_inplace_solution = optimal_flows(optimal_ret.y) + +# which is the same as the monolithic solution (because `sum(y) >= 1` in the +# monolithic solution): + +@assert feasible_inplace_solution == monolithic_solution #src +feasible_inplace_solution == monolithic_solution