diff --git a/docs/src/tutorials/algorithms/benders_decomposition.jl b/docs/src/tutorials/algorithms/benders_decomposition.jl
index 274e61d3464..50c1b1d9528 100644
--- a/docs/src/tutorials/algorithms/benders_decomposition.jl
+++ b/docs/src/tutorials/algorithms/benders_decomposition.jl
@@ -27,6 +27,7 @@
 
 using JuMP
 import GLPK
+import HiGHS
 import Printf
 import Test  #src
 
@@ -35,19 +36,22 @@ import Test  #src
 # Benders decomposition is a useful algorithm for solving convex optimization
 # problems with a large number of variables. It works best when a larger problem
 # can be decomposed into two (or more) smaller problems that are individually
-# much easier to solve. This tutorial demonstrates Benders decomposition on the
-# following mixed-integer linear program:
+# much easier to solve.
+#
+# This tutorial demonstrates Benders decomposition on the following
+# mixed-integer linear program:
 # ```math
 # \begin{aligned}
-# \text{min}        \ & c_1^\top x + c_2^\top y \\
-# \text{subject to} \ & A_1 x + A_2 y \le b     \\
-#                     & x \ge 0                 \\
-#                     & y \ge 0                 \\
-#                     & x \in \mathbb{Z}^n
+# \text{min}        \ & c_1(x) + c_2(y) \\
+# \text{subject to} \ & f_1(x) \in S_1 \\
+#                     & f_2(y) \in S_2 \\
+#                     & f_3(x, y) \in S_3 \\
+#                     & x \in \mathbb{Z}^m \\
+#                     & y \in \mathbb{R}^n \\
 # \end{aligned}
 # ```
-# where $b \in \mathbb{R}^m$, $A_1 \in \mathbb{R}^{m \times n}$,
-# $A_2 \in \mathbb{R}^{m \times p}$ and $\mathbb{Z}$ is the set of integers.
+# where the functions $f$ and $c$ are linear, and the sets $S$ are inequality
+# sets like $\ge l$, $\le u$, or $= b$.
 #
 # Any mixed integer programming problem can be written in the form above.
 
@@ -61,27 +65,29 @@ import Test  #src
 # solution for ``y`` by solving:
 # ```math
 # \begin{aligned}
-# V_2(\bar{x}) = & \text{min}    \ & c_2^\top y                        \\
-#          & \text{subject to}   \ & A_1 x + A_2 y \le b               \\
-#          &                       & x = \bar{x}         & \quad [\pi] \\
-#          &                       & y \ge 0,
+# V_2(\bar{x}) = & \text{min}  \ & c_2(y)\\
+#          & \text{subject to} \ & f_2(y) \in S_2 \\
+#          &                     & f_3(x, y) \in S_3 \\
+#          &                     & x = \bar{x} & \ [\pi] \\
+#          &                     & y \in \mathbb{R}^n \\
 # \end{aligned}
 # ```
 # Note that we have included a "copy" of the `x` variable to simplify computing
-# the dual of $V_2$ with respect to $\bar{x}$. Because this is a linear program,
-# it is easy to solve.
+# $\pi$, which is the dual of $V_2$ with respect to $\bar{x}$.
+#
+# Because this model is a linear program, it is easy to solve.
 
-# Replacing the ``c_2^\top y`` component of the objective in our original
-# problem with ``V_2`` yields:
+# Replacing the ``c_2(y)`` component of the objective in our original problem
+# with ``V_2`` yields:
 # ```math
 # \begin{aligned}
-# \text{min}        \ & c_1^\top x + V_2(x) \\
-# \text{subject to} \ & x \ge 0             \\
-#                     & x \in \mathbb{Z}^n
+# V_1 = & \text{min}        \ & c_1(x) + V_2(x) \\
+# & \text{subject to} \ & f_1(x) \in S_1 \\
+# &                     & x \in \mathbb{Z}^m.
 # \end{aligned}
 # ```
-# This problem looks a lot simpler to solve, but we need to do something else
-# with ``V_2`` first.
+# This problem looks a lot simpler to solve because it involves only $x$ and a
+# subset of the constraints, but we need to do something else with ``V_2`` first.
 
 # Because ``\bar{x}`` is a constant that appears on the right-hand side of the
 # constraints, ``V_2`` is a convex function with respect to ``\bar{x}``, and the
@@ -100,11 +106,11 @@ import Test  #src
 # a new decision variable ``\theta``, and approximates it from below using cuts:
 # ```math
 # \begin{aligned}
-# V_1^K = & \text{min}         \ & c_1^\top x + \theta  \\
-#         &  \text{subject to} \ & x \ge 0              \\
-#         &                    \ & x \in \mathbb{Z}^n   \\
-#         &                    \ & \theta \ge M         \\
-#         &                    \ & \theta \ge V_2(x_k) + \pi_k^\top(x - x_k) & \quad \forall k = 1,\ldots,K.
+# V_1^K = & \text{min}        \ & c_1(x) + \theta      \\
+#         & \text{subject to} \ & f_1(x) \in S_1       \\
+#         &                     & x \in \mathbb{Z}^m   \\
+#         &                     & \theta \ge M         \\
+#         &                     & \theta \ge V_2(x_k) + \pi_k^\top(x - x_k) & \quad \forall k = 1,\ldots,K.
 # \end{aligned}
 # ```
 # This integer program is called the _first-stage_ subproblem.
@@ -119,53 +125,91 @@ import Test  #src
 # Due to convexity, we know that ``V_2(x) \ge \theta`` for all ``x``. Therefore,
 # the optimal objective value of ``V_1^K`` provides a valid _lower_ bound on the
 # objective value of the full problem. In addition, if we take a feasible
-# solution for ``x`` from the first-stage problem, then ``c_1^\top x + V_2(x)``
+# solution for ``x`` from the first-stage problem, then ``c_1(x) + V_2(x)``
 # is a valid _upper_ bound on the objective value of the full problem.
 
 # Benders decomposition uses the lower and upper bounds to determine when it has
 # found the global optimal solution.
 
-# ## Input data
+# ## Monolithic problem
+
+# As an example problem, we consider the following variant of
+# [The max-flow problem](@ref), in which there is a binary variable to decide
+# whether to open each arc for a cost of 0.1 unit, and we can open at most 11
+# arcs:
+
+G = [
+    0 3 2 2 0 0 0 0
+    0 0 0 0 5 1 0 0
+    0 0 0 0 1 3 1 0
+    0 0 0 0 0 1 0 0
+    0 0 0 0 0 0 0 4
+    0 0 0 0 0 0 0 2
+    0 0 0 0 0 0 0 4
+    0 0 0 0 0 0 0 0
+]
+n = size(G, 1)
+model = Model(HiGHS.Optimizer)
+set_silent(model)
+@variable(model, x[1:n, 1:n], Bin)
+@variable(model, y[1:n, 1:n] >= 0)
+@constraint(model, sum(x) <= 11)
+@constraint(model, [i = 1:n, j = 1:n], y[i, j] <= G[i, j] * x[i, j])
+@constraint(model, [i = 2:n-1], sum(y[i, :]) == sum(y[:, i]))
+@objective(model, Min, 0.1 * sum(x) - sum(y[1, :]))
+optimize!(model)
+Test.@test is_solved_and_feasible(model)  #src
+solution_summary(model)
+
+# The optimal objective value is -5.1:
 
-# As an example for this tutorial, we use the input data is from page 139 of
-# Garfinkel, R. & Nemhauser, G. L. Integer programming. (Wiley, 1972).
+Test.@test isapprox(objective_value(model), -5.1; atol = 1e-4)  #src
+objective_value(model)
+
+# and the optimal flows are:
+
+function optimal_flows(x_sol)
+    flows = [(i, j) => value(x[i, j]) for i in 1:n for j in 1:n]
+    return filter!(flow -> last(flow) > 0, flows)
+end
 
-c_1 = [1, 4]
-c_2 = [2, 3]
-dim_x = length(c_1)
-dim_y = length(c_2)
-b = [-2; -3]
-A_1 = [1 -3; -1 -3]
-A_2 = [1 -2; -1 -1]
-M = -1000;
+monolithic_solution = optimal_flows(value.(y))
 
 # ## [Iterative method](@id benders_iterative)
 
 # !!! warning
 #     This is a basic implementation for pedagogical purposes. We haven't
-#     discussed Benders feasibility cuts, or any of the computational tricks
-#     that are required to build a performant implementation for large-scale
-#     problems. See [In-place iterative method](@ref) for one improvement that
-#     helps computation time.
-
-# We start by formulating the first-stage subproblem:
-
-model = Model(GLPK.Optimizer)
-@variable(model, x[1:dim_x] >= 0, Int)
+#     discussed any of the computational tricks that are required to build a
+#     performant implementation for large-scale problems. See [In-place iterative method](@ref)
+#     for one improvement that helps computation time.
+
+# We start by formulating the first-stage subproblem. It includes the `x`
+# variables, and the constraints involving only `x`, and the terms in the
+# objective containing only `x`. We also need an initial lower bound on the
+# cost-to-go variable `θ`. One valid lower bound is to assume that we do not pay
+# for opening arcs, and there is flow all the arcs.
+
+M = -sum(G)
+model = Model(HiGHS.Optimizer)
+set_silent(model)
+@variable(model, x[1:n, 1:n], Bin)
 @variable(model, θ >= M)
-@objective(model, Min, c_1' * x + θ)
-print(model)
+@constraint(model, sum(x) <= 11)
+@objective(model, Min, 0.1 * sum(x) + θ)
+model
 
 # For the next step, we need a function that takes a first-stage candidate
 # solution `x` and returns the optimal solution from the second-stage
 # subproblem:
 
-function solve_subproblem(x_bar::Vector)
-    model = Model(GLPK.Optimizer)
-    @variable(model, x[i in 1:dim_x] == x_bar[i])
-    @variable(model, y[1:dim_y] >= 0)
-    @constraint(model, A_1 * x + A_2 * y .<= b)
-    @objective(model, Min, c_2' * y)
+function solve_subproblem(x_bar)
+    model = Model(HiGHS.Optimizer)
+    set_silent(model)
+    @variable(model, x[i in 1:n, j in 1:n] == x_bar[i, j])
+    @variable(model, y[1:n, 1:n] >= 0)
+    @constraint(model, [i = 1:n, j = 1:n], y[i, j] <= G[i, j] * x[i, j])
+    @constraint(model, [i = 2:n-1], sum(y[i, :]) == sum(y[:, i]))
+    @objective(model, Min, -sum(y[1, :]))
     optimize!(model)
     @assert is_solved_and_feasible(model; dual = true)
     return (obj = objective_value(model), y = value.(y), π = reduced_cost.(x))
@@ -202,29 +246,32 @@ for k in 1:MAXIMUM_ITERATIONS
     lower_bound = objective_value(model)
     x_k = value.(x)
     ret = solve_subproblem(x_k)
-    upper_bound = c_1' * x_k + ret.obj
-    gap = (upper_bound - lower_bound) / upper_bound
+    upper_bound = (objective_value(model) - value(θ)) + ret.obj
+    gap = abs(upper_bound - lower_bound) / abs(upper_bound)
     print_iteration(k, lower_bound, upper_bound, gap)
     if gap < ABSOLUTE_OPTIMALITY_GAP
         println("Terminating with the optimal solution")
         break
     end
-    cut = @constraint(model, θ >= ret.obj + ret.π' * (x .- x_k))
+    cut = @constraint(model, θ >= ret.obj + sum(ret.π .* (x .- x_k)))
     @info "Adding the cut $(cut)"
 end
 
-# Finally, we can obtain the optimal solution
+# Finally, we can obtain the optimal solution:
 
 optimize!(model)
 @assert is_solved_and_feasible(model)
-Test.@test value.(x) == [0.0, 1.0]  #src
 x_optimal = value.(x)
+optimal_ret = solve_subproblem(x_optimal)
+iterative_solution = optimal_flows(optimal_ret.y)
 
-#-
+# which is the same as the monolithic solution:
 
-optimal_ret = solve_subproblem(x_optimal)
-Test.@test optimal_ret.y == [0.0, 0.0]  #src
-y_optimal = optimal_ret.y
+iterative_solution == monolithic_solution
+
+# and it has the same objective value:
+
+objective_value(model)
 
 # ## Callback method
 
@@ -237,16 +284,19 @@ y_optimal = optimal_ret.y
 # node of the first-stage MILP at each iteration.
 
 # !!! tip
+#     We use GLPK for this model because HiGHS does not support lazy constraints.
 #     For more information on callbacks, read the page
 #     [Solver-independent callbacks](@ref callbacks_manual).
 
 # As before, we construct the same first-stage subproblem:
 
 lazy_model = Model(GLPK.Optimizer)
-@variable(lazy_model, x[1:dim_x] >= 0, Int)
+set_silent(lazy_model)
+@variable(lazy_model, x[1:n, 1:n], Bin)
 @variable(lazy_model, θ >= M)
-@objective(lazy_model, Min, c_1' * x + θ)
-print(lazy_model)
+@constraint(lazy_model, sum(x) <= 11)
+@objective(lazy_model, Min, 0.1 * sum(x) + θ)
+lazy_model
 
 # What differs is that we write a callback function instead of a loop:
 
@@ -263,7 +313,7 @@ function my_callback(cb_data)
     ret = solve_subproblem(x_k)
     if θ_k < (ret.obj - 1e-6)
         ## Only add the constraint if θ_k violates the constraint
-        cut = @build_constraint(θ >= ret.obj + ret.π' * (x .- x_k))
+        cut = @build_constraint(θ >= ret.obj + sum(ret.π .* (x .- x_k)))
         MOI.submit(lazy_model, MOI.LazyConstraint(cb_data), cut)
     end
     return
@@ -285,14 +335,14 @@ number_of_subproblem_solves
 
 # Finally, we can obtain the optimal solution:
 
-Test.@test value.(x) == [0.0, 1.0]  #src
 x_optimal = value.(x)
+optimal_ret = solve_subproblem(x_optimal)
+callback_solution = optimal_flows(optimal_ret.y)
 
-#-
+# which is the same as the monolithic solution:
 
-optimal_ret = solve_subproblem(x_optimal)
-Test.@test optimal_ret.y == [0.0, 0.0]  #src
-y_optimal = optimal_ret.y
+Test.@test callback_solution == monolithic_solution  #src
+callback_solution == monolithic_solution
 
 # ## In-place iterative method
 
@@ -303,20 +353,23 @@ y_optimal = optimal_ret.y
 
 # First, we create our first-stage problem as usual:
 
-model = Model(GLPK.Optimizer)
-@variable(model, x[1:dim_x] >= 0, Int)
+model = Model(HiGHS.Optimizer)
+set_silent(model)
+@variable(model, x[1:n, 1:n], Bin)
 @variable(model, θ >= M)
-@objective(model, Min, c_1' * x + θ)
-print(model)
+@constraint(model, sum(x) <= 11)
+@objective(model, Min, 0.1 * sum(x) + θ)
+model
 
 # Then, instead of building the subproblem in a function, we build it once here:
 
-subproblem = Model(GLPK.Optimizer)
-@variable(subproblem, x_copy[1:dim_x])
-@variable(subproblem, y[1:dim_y] >= 0)
-@constraint(subproblem, A_1 * x_copy + A_2 * y .<= b)
-@objective(subproblem, Min, c_2' * y)
-print(subproblem)
+subproblem = Model(HiGHS.Optimizer)
+set_silent(subproblem)
+@variable(subproblem, x_copy[i in 1:n, j in 1:n])
+@variable(subproblem, y[1:n, 1:n] >= 0)
+@constraint(subproblem, [i = 1:n, j = 1:n], y[i, j] <= G[i, j] * x_copy[i, j])
+@constraint(subproblem, [i = 2:n-1], sum(y[i, :]) == sum(y[:, i]))
+@objective(subproblem, Min, -sum(y[1, :]))
 
 # Our function to solve the subproblem is also slightly different. First, we
 # need to fix the value of the `x_copy` variables to the value of `x` from the
@@ -343,14 +396,14 @@ for k in 1:MAXIMUM_ITERATIONS
     lower_bound = objective_value(model)
     x_k = value.(x)
     ret = solve_subproblem(subproblem, x_k)
-    upper_bound = c_1' * x_k + ret.obj
-    gap = (upper_bound - lower_bound) / upper_bound
+    upper_bound = (objective_value(model) - value(θ)) + ret.obj
+    gap = abs(upper_bound - lower_bound) / abs(upper_bound)
     print_iteration(k, lower_bound, upper_bound, gap)
     if gap < ABSOLUTE_OPTIMALITY_GAP
         println("Terminating with the optimal solution")
         break
     end
-    cut = @constraint(model, θ >= ret.obj + ret.π' * (x .- x_k))
+    cut = @constraint(model, θ >= ret.obj + sum(ret.π .* (x .- x_k)))
     @info "Adding the cut $(cut)"
 end
 
@@ -358,11 +411,111 @@ end
 
 optimize!(model)
 @assert is_solved_and_feasible(model)
-Test.@test value.(x) == [0.0, 1.0]  #src
 x_optimal = value.(x)
+optimal_ret = solve_subproblem(subproblem, x_optimal)
+inplace_solution = optimal_flows(optimal_ret.y)
+
+# which is the same as the monolithic solution:
+
+Test.@test inplace_solution == monolithic_solution  #src
+inplace_solution == monolithic_solution
+
+# ## Feasibility cuts
+
+# So far, we have discussed only Benders optimality cuts. However, for some
+# first-stage values of `x`, the subproblem might be infeasible. The solution is
+# to add a Benders feasibility cut:
+# ```math
+# v_k + u_k^\top (x - x_k) \le 0.
+# ```
+# where $u_k$ is an dual unbounded ray of the subproblem, and $v_k$ is the
+# intercept of the unbounded ray.
+
+# As a variation of our example which leads to infeasibilities, we add a
+# constraint that `sum(y) >= 1`. This means we need a choice of first-stage `x`
+# for which at least one unit can flow.
+
+# The first-stage problem remains the same:
 
-#-
+model = Model(HiGHS.Optimizer)
+set_silent(model)
+@variable(model, x[1:n, 1:n], Bin)
+@variable(model, θ >= M)
+@constraint(model, sum(x) <= 11)
+@objective(model, Min, 0.1 * sum(x) + θ)
+
+# But the subproblem has a new constraint that `sum(y) >= 1`:
+
+subproblem = Model(HiGHS.Optimizer)
+set_silent(subproblem)
+## We need to turn presolve off so that HiGHS will return an infeasibility
+## certificate.
+set_attribute(subproblem, "presolve", "off")
+@variable(subproblem, x_copy[i in 1:n, j in 1:n])
+@variable(subproblem, y[1:n, 1:n] >= 0)
+@constraint(subproblem, sum(y) >= 1)  # <--- THIS IS NEW
+@constraint(subproblem, [i = 1:n, j = 1:n], y[i, j] <= G[i, j] * x_copy[i, j])
+@constraint(subproblem, [i = 2:n-1], sum(y[i, :]) == sum(y[:, i]))
+@objective(subproblem, Min, -sum(y[1, :]))
+
+# The function to solve the subproblem now checks for feasibility, and returns
+# the dual objective value and an dual unbounded ray if the subproblem is
+# infeasible:
+
+function solve_subproblem_with_feasibility(model, x)
+    fix.(model[:x_copy], x)
+    optimize!(model)
+    if is_solved_and_feasible(model; dual = true)
+        return (
+            is_feasible = true,
+            obj = objective_value(model),
+            y = value.(model[:y]),
+            π = reduced_cost.(model[:x_copy]),
+        )
+    end
+    return (
+        is_feasible = false,
+        v = dual_objective_value(model),
+        u = reduced_cost.(model[:x_copy]),
+    )
+end
 
+# Now we're ready to iterate our in-place Benders decomposition:
+
+println("Iteration  Lower Bound  Upper Bound          Gap")
+for k in 1:MAXIMUM_ITERATIONS
+    optimize!(model)
+    @assert is_solved_and_feasible(model)
+    lower_bound = objective_value(model)
+    x_k = value.(x)
+    ret = solve_subproblem_with_feasibility(subproblem, x_k)
+    if ret.is_feasible
+        ## Benders Optimality Cuts
+        upper_bound = (objective_value(model) - value(θ)) + ret.obj
+        gap = abs(upper_bound - lower_bound) / abs(upper_bound)
+        print_iteration(k, lower_bound, upper_bound, gap)
+        if gap < ABSOLUTE_OPTIMALITY_GAP
+            println("Terminating with the optimal solution")
+            break
+        end
+        @constraint(model, θ >= ret.obj + sum(ret.π .* (x .- x_k)))
+    else
+        ## Benders Feasibility Cuts
+        cut = @constraint(model, ret.v + sum(ret.u .* (x .- x_k)) <= 0)
+        @info "Adding the feasibility cut $(cut)"
+    end
+end
+
+# Finally, we can obtain the optimal solution:
+
+optimize!(model)
+@assert is_solved_and_feasible(model)
+x_optimal = value.(x)
 optimal_ret = solve_subproblem(subproblem, x_optimal)
-Test.@test optimal_ret.y == [0.0, 0.0]  #src
-y_optimal = optimal_ret.y
+feasible_inplace_solution = optimal_flows(optimal_ret.y)
+
+# which is the same as the monolithic solution (because `sum(y) >= 1` in the
+# monolithic solution):
+
+Test.@test feasible_inplace_solution == monolithic_solution  #src
+feasible_inplace_solution == monolithic_solution