From fbc8ad0d140df6e533f89d7a35ee1309f4b838dc Mon Sep 17 00:00:00 2001
From: Marek Petrik <mpetrik@cs.unh.edu>
Date: Fri, 2 Aug 2024 11:19:52 -0400
Subject: [PATCH 01/10] transient gambler

---
 src/domains/gambler.jl | 94 ++++++++++++++++++++++++++++++++++++------
 1 file changed, 81 insertions(+), 13 deletions(-)

diff --git a/src/domains/gambler.jl b/src/domains/gambler.jl
index 4568589..88bbc09 100644
--- a/src/domains/gambler.jl
+++ b/src/domains/gambler.jl
@@ -2,41 +2,53 @@ module Gambler
 
 import ...TabMDP, ...transition, ...state_count, ...action_count
 
+# create the transition representation for this domain
+# (state_to, probability, reward)
 mt(st, prob,rew) =
     (Int(st), Float64(prob), Float64(rew))::Tuple{Int, Float64, Float64}
 
+
+# ------------------------------------------------------------------------------------------------------------------------
+# Discounted ruin
+# ------------------------------------------------------------------------------------------------------------------------
+
+
 """
     Ruin(win, max_capital)
 
-Gambler's ruin. Can decide how much to bet at any point in time. With some
+Gambler's ruin; the discounted version. Can decide how much to bet at any point in time. With some
 probability `win`, the bet is doubled, and with `1-win` it is lost. The
-reward is 1 if it achieves some terminal capital and 0 otherwise.
+reward is `1` if it achieves some terminal capital and `0` otherwise. State `max_capital+1`
+is an absorbing win state in which `1` is received forever.
 
-- Capital = state - 1
-- Bet = action - 1 
+- Capital = `state - 1`
+- Bet     = `action - 1` 
 
-Available actions are 1, ..., state - 1.
+Available actions are `1`, ..., `state`.
 
-Special states: state=1 is broke and state=max_capital+1 is a terminal winning state.
+Special states: `state=1` is broke and `state=max_capital+1` is a terminal winning state.
 """
 struct Ruin <: TabMDP
     win :: Float64
     max_capital :: Int
 
     function Ruin(win::Number, max_capital::Integer)
-        zero(win) ≤ win ≤ one(win) || error("win probability must be in [0,1]")
+        zero(win) ≤ win ≤ one(win) || error("Win probability must be in [0,1]")
         max_capital ≥ one(max_capital) || error("Max capital must be positive")
         new(win, max_capital)
     end
 end
 
+state_count(model::Ruin) = model.max_capital + 1
+action_count(model::Ruin, state::Int) = state < model.max_capital + 1 ? state : 1 # only one action in the terminal state
+
 function transition(model::Ruin, state::Int, action::Int)
-    1 ≤ state ≤ model.max_capital+1 || error("invalid state")
-    1 ≤ action ≤ state || error("invalid action")
+    1 ≤ state ≤ model.max_capital + 1 || error("invalid state")
+    1 ≤ action ≤ state_count || error("invalid action")
 
-    if state == 1
+    if state == 1  # overall loss state
         (mt(1, 1.0, 0.0),)
-    elseif state == model.max_capital + 1 # the state is absorbing
+    elseif state == model.max_capital + 1 # overall win state
         (mt(state, 1.0, 1.0),)
     else
         win_state = min(model.max_capital + 1, (state - 1) + (action - 1) + 1)
@@ -45,7 +57,63 @@ function transition(model::Ruin, state::Int, action::Int)
     end
 end
 
-state_count(model::Ruin) = model.max_capital + 1
-action_count(model::Ruin, state::Int) = state
+
+# ------------------------------------------------------------------------------------------------------------------------
+# Transient ruin
+# ------------------------------------------------------------------------------------------------------------------------
+
+
+"""
+    RuinTransient(win, max_capital)
+
+Gambler's ruin; the transient version. Can decide how much to bet at any point in time. With some
+probability `win`, the bet is doubled, and with `1-win` it is lost. The reward is `1` if it achieves
+some terminal capital and `0` otherwise. State `max_capital+1` is an absorbing win state
+in which `1` is received forever.
+
+- Capital = `state - 1`
+- Bet     = `action - 1` 
+
+Available actions are `1`, ..., `state`.
+
+Special states: `state=1` is broke and `state=max_capital+1` is an abosrbing state.
+
+The reward is `-1` when the gambler goes broke and `+1` when it achieves the target capital.
+"""
+struct RuinTransient <: TabMDP
+    win :: Float64
+    max_capital :: Int
+
+    function RuinTransient(win::Number, max_capital::Integer)
+        zero(win) ≤ win ≤ one(win) || error("Win probability must be in [0,1]")
+        max_capital ≥ one(max_capital) || error("Max capital must be positive")
+        new(win, max_capital)
+    end
+end
+
+state_count(model::RuinTransient) = model.max_capital + 1
+action_count(model::RuinTransient, state::Int) = state < model.max_capital + 1 ? state : 1 # only one action in the terminal state
+
+function transition(model::RuinTransient, state::Int, action::Int)
+    absorbing :: Int = model.max_capital + 1
+    
+    1 ≤ state ≤ absorbing || error("invalid state")
+    1 ≤ action ≤ state_count || error("invalid action")
+
+    if state == 1  # broke
+        (mt(absorbing, 1.0, -1.0),)
+    elseif state == absorbing   # absorbing terminal state; no reward
+        (mt(state, 1.0, 1.0),)
+    else
+        win_state = min(model.max_capital + 1, (state - 1) + (action - 1) + 1)
+        lose_state = max(1, (state - 1) - (action - 1) + 1)
+
+        # reward 1.0 if an donly if we achieve the target capital
+        win_reward = win_state == absorbing ? 1.0 : 0
+
+        # the reward is 0 when we lose
+        (mt(win_state, model.win, win_reward), mt(lose_state, 1.0 - model.win, 0.))
+    end
+end
 
 end # Gambler

From a34278d866f25d81210ab67b6737e80bc882e618 Mon Sep 17 00:00:00 2001
From: Marek Petrik <mpetrik@cs.unh.edu>
Date: Fri, 2 Aug 2024 11:22:15 -0400
Subject: [PATCH 02/10] type stability

---
 src/domains/gambler.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/domains/gambler.jl b/src/domains/gambler.jl
index 88bbc09..f8b4518 100644
--- a/src/domains/gambler.jl
+++ b/src/domains/gambler.jl
@@ -109,7 +109,7 @@ function transition(model::RuinTransient, state::Int, action::Int)
         lose_state = max(1, (state - 1) - (action - 1) + 1)
 
         # reward 1.0 if an donly if we achieve the target capital
-        win_reward = win_state == absorbing ? 1.0 : 0
+        win_reward = win_state == absorbing ? 1.0 : 0.0
 
         # the reward is 0 when we lose
         (mt(win_state, model.win, win_reward), mt(lose_state, 1.0 - model.win, 0.))

From b710c1e75f803f8f4901f59afc0f94304143decb Mon Sep 17 00:00:00 2001
From: Marek Petrik <mpetrik@cs.unh.edu>
Date: Fri, 16 Aug 2024 21:43:26 +0200
Subject: [PATCH 03/10] tweaks to lp

---
 src/algorithms/linprogsolve.jl | 42 ++++++++++++++++++++++------------
 src/domains/gambler.jl         |  4 ++--
 src/objectives.jl              | 14 ++++++++++++
 3 files changed, 44 insertions(+), 16 deletions(-)

diff --git a/src/algorithms/linprogsolve.jl b/src/algorithms/linprogsolve.jl
index 6cf906d..621b6ab 100644
--- a/src/algorithms/linprogsolve.jl
+++ b/src/algorithms/linprogsolve.jl
@@ -6,28 +6,42 @@ using JuMP
 
 
 """
-lp_solve(model, γ, lpm)
+    lp_solve(model, γ, lpm, [silent = true])
 
 Implements the linear program primal problem for an MDP `model` with a discount factor `γ`.
 It uses the JuMP model `lpm` as the linear program solver and returns the state values
-found by `lpm`.
+found by `lpm`. 
 """
 
-function lp_solve(model::TabMDP, γ::Number, lpm)
+function lp_solve(model::TabMDP, obj::InfiniteH, lpm; silent = true)
+    γ = discount(obj)
     0 ≤ γ < 1 || error("γ must be between 0 and 1")
-    set_silent(lpm)
+
+    silent && set_silent(lpm)
     n = state_count(model)
+    
     @variable(lpm, v[1:n])
-    @objective(lpm,Min, sum(v[1:n]))
-    π::Vector{Vector{ConstraintRef}} = []
-    for s in 1:n
-        m = action_count(model,s)
-        π_s::Vector{ConstraintRef} = []
-        for a in 1:m
-            push!(π_s, @constraint(lpm, v[s] ≥ sum(sp[2]*(sp[3]+γ*v[sp[1]]) for sp in transition(model,s,a))))
-        end
+    @objective(lpm, Min, sum(v[1:n]))
+
+    π::Vector{Vector{ConstraintRef}} = [] # constraints for recovering the policy
+    for s ∈ 1:n
+        π_s = [@constraint(lpm, v[s] ≥ sum(sp[2]*(sp[3]+γ*v[sp[1]])
+                                        for sp in transition(model,s,a)))
+            for a ∈ 1:action_count(model,s)]
         push!(π, π_s)
     end
+    
     optimize!(lpm)
-    (value = value.(v), policy = map(x->argmax(dual.(x)), π))
-end
\ No newline at end of file
+
+    if !is_solved_and_feasible(lpm; dual = true)
+        error("could not solve the MDP linear program")
+    end
+    
+    (status = :optimal,
+     value = value.(v),
+     policy = map(x->argmax(dual.(x)), π))
+end
+
+lp_solve(model::TabMDP, γ::Number, lpm; args...) =
+    lp_solve(model, InfiniteH(γ), lpm; args...)
+    
diff --git a/src/domains/gambler.jl b/src/domains/gambler.jl
index f8b4518..72bea60 100644
--- a/src/domains/gambler.jl
+++ b/src/domains/gambler.jl
@@ -44,7 +44,7 @@ action_count(model::Ruin, state::Int) = state < model.max_capital + 1 ? state :
 
 function transition(model::Ruin, state::Int, action::Int)
     1 ≤ state ≤ model.max_capital + 1 || error("invalid state")
-    1 ≤ action ≤ state_count || error("invalid action")
+    1 ≤ action ≤ action_count(model, state) || error("invalid action")
 
     if state == 1  # overall loss state
         (mt(1, 1.0, 0.0),)
@@ -98,7 +98,7 @@ function transition(model::RuinTransient, state::Int, action::Int)
     absorbing :: Int = model.max_capital + 1
     
     1 ≤ state ≤ absorbing || error("invalid state")
-    1 ≤ action ≤ state_count || error("invalid action")
+    1 ≤ action ≤ action_count(model, state) || error("invalid action")
 
     if state == 1  # broke
         (mt(absorbing, 1.0, -1.0),)
diff --git a/src/objectives.jl b/src/objectives.jl
index 8e2cebf..60e61c2 100644
--- a/src/objectives.jl
+++ b/src/objectives.jl
@@ -53,6 +53,20 @@ struct FiniteH <: MarkovDet
     end
 end
 
+
+"""
+Total reward criterion. The objective is to maximize the sum
+of the rewards. The model assumes that there is a terminal state,
+which must
+
+1) have a single action,
+2) transition to itself,
+3) have a reward of 0. 
+"""
+struct TotalReward <: StationaryDet
+    terminal_state :: Int 
+end
+
 """
     horizon(objective)
 

From f4e93c2608e2560d6c0c56e2a37a9d56a32450db Mon Sep 17 00:00:00 2001
From: Marek Petrik <mpetrik@cs.unh.edu>
Date: Sun, 1 Sep 2024 07:31:30 -0400
Subject: [PATCH 04/10] removed isterminal

---
 src/algorithms/linprogsolve.jl             | 14 ++--
 src/algorithms/mrp.jl                      | 15 ++--
 src/domains/gambler.jl                     |  2 +-
 src/models/integral.jl                     | 43 ++++------
 src/models/mdp.jl                          | 12 +--
 src/models/tabular.jl                      |  4 +-
 src/objectives.jl                          | 10 +--
 src/simulation.jl                          |  1 -
 src/valuefunction/bellman.jl               | 24 ++----
 test/src/domains/.inventory.jl.~undo-tree~ | 97 +++++++++++++++++++++-
 test/src/domains/garnet.jl                 |  4 +-
 test/src/domains/gridworld.jl              |  4 +-
 test/src/domains/inventory.jl              | 11 +--
 13 files changed, 148 insertions(+), 93 deletions(-)

diff --git a/src/algorithms/linprogsolve.jl b/src/algorithms/linprogsolve.jl
index 621b6ab..c7c60e7 100644
--- a/src/algorithms/linprogsolve.jl
+++ b/src/algorithms/linprogsolve.jl
@@ -15,7 +15,7 @@ found by `lpm`.
 
 function lp_solve(model::TabMDP, obj::InfiniteH, lpm; silent = true)
     γ = discount(obj)
-    0 ≤ γ < 1 || error("γ must be between 0 and 1")
+    0 ≤ γ < 1 || error("γ must be between 0 and 1.")
 
     silent && set_silent(lpm)
     n = state_count(model)
@@ -23,23 +23,21 @@ function lp_solve(model::TabMDP, obj::InfiniteH, lpm; silent = true)
     @variable(lpm, v[1:n])
     @objective(lpm, Min, sum(v[1:n]))
 
-    π::Vector{Vector{ConstraintRef}} = [] # constraints for recovering the policy
+    u = Vector{Vector{ConstraintRef}}(undef, n)
     for s ∈ 1:n
-        π_s = [@constraint(lpm, v[s] ≥ sum(sp[2]*(sp[3]+γ*v[sp[1]])
+        u[s] = [@constraint(lpm, v[s] ≥ sum(sp[2]*(sp[3]+γ*v[sp[1]])
                                         for sp in transition(model,s,a)))
             for a ∈ 1:action_count(model,s)]
-        push!(π, π_s)
     end
     
     optimize!(lpm)
 
     if !is_solved_and_feasible(lpm; dual = true)
-        error("could not solve the MDP linear program")
+        error("Failed to solve the MDP linear program")
     end
     
-    (status = :optimal,
-     value = value.(v),
-     policy = map(x->argmax(dual.(x)), π))
+    (value = value.(v),
+     policy = map(x->argmax(dual.(x)), u))
 end
 
 lp_solve(model::TabMDP, γ::Number, lpm; args...) =
diff --git a/src/algorithms/mrp.jl b/src/algorithms/mrp.jl
index f97ab39..8fab46d 100644
--- a/src/algorithms/mrp.jl
+++ b/src/algorithms/mrp.jl
@@ -16,16 +16,11 @@ function mrp!(P_π::AbstractMatrix{<:Real}, r_π::AbstractVector{<:Real},
     S = state_count(model)
     fill!(P_π, 0.); fill!(r_π, 0.)
     for s ∈ 1:S
-        #TODO: remove the definition of terminal states
-        if !isterminal(model, s)
-            for (sn, p, r) ∈ transition(model, s, π[s])
-                P_π[s,sn] ≈ 0. ||
-                    error("duplicated transition entries (s1->s2, s1->s2) not allowed")
-                P_π[s,sn] += p
-                r_π[s] += p * r
-            end
-        else
-            r_π[s] = reward_T(model, s)
+        for (sn, p, r) ∈ transition(model, s, π[s])
+            P_π[s,sn] ≈ 0. ||
+                error("duplicated transition entries (s1->s2, s1->s2) not allowed")
+            P_π[s,sn] += p
+            r_π[s] += p * r
         end
     end
 end
diff --git a/src/domains/gambler.jl b/src/domains/gambler.jl
index 72bea60..4dc30d3 100644
--- a/src/domains/gambler.jl
+++ b/src/domains/gambler.jl
@@ -76,7 +76,7 @@ in which `1` is received forever.
 
 Available actions are `1`, ..., `state`.
 
-Special states: `state=1` is broke and `state=max_capital+1` is an abosrbing state.
+Special states: `state=1` is broke and `state=max_capital+1` is an absorbing state.
 
 The reward is `-1` when the gambler goes broke and `+1` when it achieves the target capital.
 """
diff --git a/src/models/integral.jl b/src/models/integral.jl
index b82db4e..1aadfee 100644
--- a/src/models/integral.jl
+++ b/src/models/integral.jl
@@ -239,41 +239,30 @@ values.
 The option `docompress` combined transitions to the same state into a single transition.
 This improves efficiency in risk-neutral settings, but may change the outcome
 in risk-averse settings.
-
-The function adds one more state at the end which represents a catch-all terminal state
 """
 function make_int_mdp(mdp::TabMDP; docompress = false)
     statecount = state_count(mdp)
-    states = Vector{IntState}(undef, statecount + 1) # + terminal
+    states = Vector{IntState}(undef, statecount) 
    
-    # add a self-looping state to model a terminal state
-    # needed to handle terminal state
-    states[statecount+1] = IntState([IntAction([statecount+1],[1.0],[0.0])])
-                          
     Threads.@threads for s ∈ 1:statecount
         action_vals = 1:action_count(mdp, s)
-        if isterminal(mdp, s)
-            states[s]  = IntState([IntAction(
-                [statecount+1], [1.0], [0.])])
-        else
-            acts = Vector{IntAction}(undef, length(action_vals))
-            for (ia,a) ∈ enumerate(action_vals)
-                ns = Array{Int}(undef, 0)     # next state
-                np = Array{Float64}(undef, 0) # next probalbility
-                nr = Array{Float64}(undef, 0) # next reward
-
-                for (nexts, nextp, nextr) ∈ transition(mdp, s, a)
-                    # check where to insert the next state transition
-                    i = searchsortedfirst(ns, nexts)
-                    insert!(ns, i, nexts)
-                    insert!(np, i, nextp)
-                    insert!(nr, i, nextr)
-                end
-                a = IntAction(ns, np, nr)
-                acts[ia] = docompress ? compress(a) : a
+        acts = Vector{IntAction}(undef, length(action_vals))
+        for (ia,a) ∈ enumerate(action_vals)
+            ns = Array{Int}(undef, 0)     # next state
+            np = Array{Float64}(undef, 0) # next probalbility
+            nr = Array{Float64}(undef, 0) # next reward
+
+            for (nexts, nextp, nextr) ∈ transition(mdp, s, a)
+                # check where to insert the next state transition
+                i = searchsortedfirst(ns, nexts)
+                insert!(ns, i, nexts)
+                insert!(np, i, nextp)
+                insert!(nr, i, nextr)
             end
-            states[s] = IntState(acts)
+            a = IntAction(ns, np, nr)
+            acts[ia] = docompress ? compress(a) : a
         end
+        states[s] = IntState(acts)
     end
     IntMDP(states)
 end
diff --git a/src/models/mdp.jl b/src/models/mdp.jl
index 81d87a7..8d02cbb 100644
--- a/src/models/mdp.jl
+++ b/src/models/mdp.jl
@@ -18,18 +18,12 @@ abstract type MDP{S,A} end
 # Default definition of functions
 # ----------------------------------------------------------------
 
-"""
-    isterminal(mdp, state)
-
-Return true if the state is terminal
-"""
-function isterminal end
 
 """
     (sn, p, r) ∈ transition(model, s, a)
 
-Return a list with next states, probabilities, and rewards.
-Returns an iterator. 
+Return an iterator with next states, probabilities, and rewards for
+`model` taking an action `a` in state `s`.
 
 Use `getnext` instead, which is more efficient and convenient to use. 
 """
@@ -38,7 +32,7 @@ function transition end
 """
     valuefunction(mdp, state, valuefunction)
 
-Evaluates the value function for an MDP in a state
+Evaluates the value function for an `mdp` in a `state`
 """
 function valuefunction end
 
diff --git a/src/models/tabular.jl b/src/models/tabular.jl
index e458477..06f98e6 100644
--- a/src/models/tabular.jl
+++ b/src/models/tabular.jl
@@ -4,11 +4,10 @@ using SparseArrays
 
 """ 
 An abstract tabular Markov Decision Process which is specified by a transition function. 
-
 Functions that should be defined for any subtype for value and policy iterations
 to work are: `state_count`, `states`, `action_count`, `actions`, and `transition`.
 
-Generally, states should be 1-based.
+Generally, states and actions are 1-based.
 
 The methods `state_count` and `states` should only include non-terminal states
 """
@@ -18,7 +17,6 @@ abstract type TabMDP <: MDP{Int,Int} end
 # General MDP interface functions
 # ----------------------------------------------------------------
 
-isterminal(::TabMDP, s::Int) = s ≤ 0
 valuefunction(::TabMDP, s::Int, v) = v[s]
 
 function state_count end
diff --git a/src/objectives.jl b/src/objectives.jl
index 60e61c2..d4c7338 100644
--- a/src/objectives.jl
+++ b/src/objectives.jl
@@ -56,12 +56,12 @@ end
 
 """
 Total reward criterion. The objective is to maximize the sum
-of the rewards. The model assumes that there is a terminal state,
-which must
+of the undiscounted rewards. The model assumes that there is a terminal
+ state, which must satisfy that it
 
-1) have a single action,
-2) transition to itself,
-3) have a reward of 0. 
+1) has a single action,
+2) transitions to itself,
+3) has a reward 0. 
 """
 struct TotalReward <: StationaryDet
     terminal_state :: Int 
diff --git a/src/simulation.jl b/src/simulation.jl
index fa482a4..49f9a0d 100644
--- a/src/simulation.jl
+++ b/src/simulation.jl
@@ -172,7 +172,6 @@ function simulate(model::MDP{S,A}, π::Policy{S,A}, initial,
             prob = rand()            
             tot_prob = 0.
             for (sn,pn,rn) ∈ transition(model, states[t-1,run], actions[t-1,run])
-                isterminal(model, sn) && error("Terminal states unsupported.")    
                 if prob ≤ (tot_prob += pn) # state sn was sampled
                     # update internal state using the current time step
                     let tr = Transition(states[t-1,run], actions[t-1,run], rn, sn, t)
diff --git a/src/valuefunction/bellman.jl b/src/valuefunction/bellman.jl
index 6ba69bb..a9e5ebc 100644
--- a/src/valuefunction/bellman.jl
+++ b/src/valuefunction/bellman.jl
@@ -39,14 +39,9 @@ See `qvalues` for more information.
 function qvalues!(qvalues::AbstractVector{<:Real}, model::MDP{S,A},
                   obj::Objective, t::Integer, s::S, v) where {S,A}
 
-    if isterminal(model, s)
-        qvalues .= -Inf
-        qvalues[1] = 0 
-    else
-        acts = actions(model, s)
-        for (ia,a) ∈ enumerate(acts)
-            qvalues[ia] = qvalue(model, obj, t, s, a, v)
-        end
+    acts = actions(model, s)
+    for (ia,a) ∈ enumerate(acts)
+        qvalues[ia] = qvalue(model, obj, t, s, a, v)
     end
 end
 
@@ -100,15 +95,10 @@ time-dependent updates.
 The function uses `qvalue` to compute the Bellman operator and the greedy policy.
 """
 function bellmangreedy(model::MDP{S,A}, obj::Objective, t::Integer, s::S, v) where {S,A}
-    if isterminal(model, s)
-        (qvalue = 0 :: Float64,
-         action = emptyaction(model) :: A) 
-    else
-        acts = actions(model, s)
-        (qval, ia) = findmax(a->qvalue(model, obj, t, s, a, v), acts) 
-        (qvalue = qval :: Float64,
-         action = acts[ia] :: A)
-    end
+    acts = actions(model, s)
+    (qval, ia) = findmax(a->qvalue(model, obj, t, s, a, v), acts) 
+    (qvalue = qval :: Float64,
+        action = acts[ia] :: A)
 end
 
 # default fallback when t is 
diff --git a/test/src/domains/.inventory.jl.~undo-tree~ b/test/src/domains/.inventory.jl.~undo-tree~
index fd6b827..18baa83 100644
--- a/test/src/domains/.inventory.jl.~undo-tree~
+++ b/test/src/domains/.inventory.jl.~undo-tree~
@@ -1,9 +1,100 @@
 (undo-tree-save-format-version . 1)
-"7f5c20e0493d52a78b3976a36ddef4420bed61da"
+"1c6e24de46538ceabe890182c5a7a01897449b6b"
 [nil nil nil nil (26303 463 21508 884000) 0 nil]
 ([nil nil ((#("=======
 >>>>>>> main
 " 0 7 (face smerge-markers fontified t) 7 8 (face nil fontified t) 8 21 (face smerge-markers fontified t)) . 1463) (undo-tree-id3 . -8) (undo-tree-id4 . -8) (undo-tree-id5 . -8) (undo-tree-id6 . -8) (undo-tree-id7 . -21) (undo-tree-id8 . -21) (undo-tree-id9 . -21) (undo-tree-id10 . -21) (undo-tree-id11 . -8) (undo-tree-id12 . -8) (undo-tree-id13 . -21) (undo-tree-id14 . -20) 1471 (t 26303 431 259432 314000)) nil (26303 463 21506 379000) 0 nil])
-([nil current ((#("<<<<<<< HEAD
-" 0 1 (face smerge-markers smerge-refine-part (13 . 2) fontified t) 1 13 (face smerge-markers fontified t)) . 1391) (undo-tree-id0 . -13) (undo-tree-id1 . -12) (undo-tree-id2 . -13)) nil (26303 463 21492 460000) 0 nil])
+([nil nil ((#("<<<<<<< HEAD
+" 0 1 (face smerge-markers smerge-refine-part (13 . 2) fontified t) 1 13 (face smerge-markers fontified t)) . 1391) (undo-tree-id0 . -13) (undo-tree-id1 . -12) (undo-tree-id2 . -13)) nil (26324 18994 911358 849000) 0 nil])
+([nil nil ((#("    # note that the IntMDP does not have terminal states,
+    # so the last action will not be -1
+
+" 0 4 (fontified t) 4 6 (face font-lock-comment-delimiter-face fontified t) 6 50 (face font-lock-comment-face fontified t) 50 58 (face font-lock-comment-face fontified t) 58 62 (fontified t) 62 64 (face font-lock-comment-delimiter-face fontified t) 64 98 (face font-lock-comment-face fontified t) 98 99 (fontified t)) . 1451) (undo-tree-id94 . -98) (undo-tree-id95 . -36) (undo-tree-id96 . -98) (undo-tree-id97 . -36) (undo-tree-id98 . -36) (undo-tree-id99 . -98) (undo-tree-id100 . -98) (undo-tree-id101 . -98)) ((1451 . 1550)) (26324 18997 1184 116000) 0 nil])
+([nil nil ((1450 . 1455) (t 26324 18998 432489 309000) 1450) nil (26324 19516 549097 781000) 0 nil] [nil nil ((#("]" 0 1 (fontified t)) . -1533) (undo-tree-id82 . -1) (undo-tree-id83 . -1) 1534) ((1533 . 1534)) (26324 18993 956985 308000) 0 nil])
+([nil nil ((1455 . 1469)) nil (26324 19516 549096 335000) 0 nil])
+([nil nil ((#("[" 0 1 (fontified t)) . -1523) (undo-tree-id44 . -1) (undo-tree-id45 . -1) (undo-tree-id46 . -1) (undo-tree-id47 . -1) (#("1" 0 1 (fontified t)) . -1524) (undo-tree-id48 . -1) (undo-tree-id49 . -1) (undo-tree-id50 . -1) (undo-tree-id51 . -1) (#(":" 0 1 (fontified t)) . -1525) (undo-tree-id52 . -1) (undo-tree-id53 . -1) (undo-tree-id54 . -1) (undo-tree-id55 . -1) (#("(" 0 1 (fontified t)) . -1526) (undo-tree-id56 . -1) (undo-tree-id57 . -1) (undo-tree-id58 . -1) (undo-tree-id59 . -1) (#("e" 0 1 (fontified t)) . -1527) (undo-tree-id60 . -1) (undo-tree-id61 . -1) (undo-tree-id62 . -1) (undo-tree-id63 . -1) (#("n" 0 1 (fontified t)) . -1528) (undo-tree-id64 . -1) (undo-tree-id65 . -1) (undo-tree-id66 . -1) (undo-tree-id67 . -1) (#("d" 0 1 (face font-lock-keyword-face fontified t)) . -1529) (undo-tree-id68 . -1) (undo-tree-id69 . -1) (undo-tree-id70 . -1) (undo-tree-id71 . -1) (#("-" 0 1 (fontified t)) . -1530) (undo-tree-id72 . -1) (undo-tree-id73 . -1) (undo-tree-id74 . -1) (undo-tree-id75 . -1) (#("1" 0 1 (fontified t)) . -1531) (undo-tree-id76 . -1) (undo-tree-id77 . -1) (undo-tree-id78 . -1) (undo-tree-id79 . -1) (#(")" 0 1 (fontified t)) . -1532) (undo-tree-id80 . -1) (undo-tree-id81 . -1) 1533) ((1523 . 1533)) (26324 18993 762605 944000) 0 nil])
+([nil nil ((1469 . 1486)) nil (26324 19516 549095 415000) 0 nil])
+([nil nil ((#("[" 0 1 (fontified t)) . -1533) (undo-tree-id22 . -1) (undo-tree-id23 . -1) (#("1" 0 1 (fontified t)) . -1534) (undo-tree-id24 . -1) (undo-tree-id25 . -1) (#(":" 0 1 (fontified t)) . -1535) (undo-tree-id26 . -1) (undo-tree-id27 . -1) (#("(" 0 1 (fontified t)) . -1536) (undo-tree-id28 . -1) (undo-tree-id29 . -1) (#("e" 0 1 (fontified t)) . -1537) (undo-tree-id30 . -1) (undo-tree-id31 . -1) (#("n" 0 1 (fontified t)) . -1538) (undo-tree-id32 . -1) (undo-tree-id33 . -1) (#("d" 0 1 (face font-lock-keyword-face fontified t)) . -1539) (undo-tree-id34 . -1) (undo-tree-id35 . -1) (#("-" 0 1 (fontified t)) . -1540) (undo-tree-id36 . -1) (undo-tree-id37 . -1) (#("1" 0 1 (fontified t)) . -1541) (undo-tree-id38 . -1) (undo-tree-id39 . -1) (#(")" 0 1 (fontified t)) . -1542) (undo-tree-id40 . -1) (undo-tree-id41 . -1) (#("]" 0 1 (fontified t)) . -1543) (undo-tree-id42 . -1) (undo-tree-id43 . -1) 1544) ((1533 . 1544)) (26324 18993 540465 741000) 0 nil])
+([nil nil ((#("." 0 1 (fontified t)) . -1474) (undo-tree-id102 . -1) (undo-tree-id103 . -1) (#("v" 0 1 (fontified t)) . -1475) (undo-tree-id104 . -1) (undo-tree-id105 . -1) (#("2" 0 1 (fontified t)) . -1476) (undo-tree-id106 . -1) (undo-tree-id107 . -1) (#("." 0 1 (fontified t)) . -1477) (undo-tree-id108 . -1) (undo-tree-id109 . -1) (#("v" 0 1 (fontified t)) . -1478) (undo-tree-id110 . -1) (undo-tree-id111 . -1) (#("3" 0 1 (fontified t)) . -1479) (undo-tree-id112 . -1) (undo-tree-id113 . -1) (#("." 0 1 (fontified t)) . -1480) (undo-tree-id114 . -1) (undo-tree-id115 . -1) (#("v" 0 1 (fontified t)) . -1481) (undo-tree-id116 . -1) (undo-tree-id117 . -1) (#("4" 0 1 (fontified t)) . -1482) (undo-tree-id118 . -1) (undo-tree-id119 . -1) (#("." 0 1 (fontified t)) . -1483) (undo-tree-id120 . -1) (undo-tree-id121 . -1) (#("v" 0 1 (fontified t)) . -1484) (undo-tree-id122 . -1) (undo-tree-id123 . -1) (#("5" 0 1 (fontified t)) . -1485) (undo-tree-id124 . -1) (undo-tree-id125 . -1) 1486) nil (26324 19516 549090 926000) 0 nil])
+([nil nil ((#("[" 0 1 (fontified t)) . -1543) (undo-tree-id0 . -1) (undo-tree-id1 . -1) (#("1" 0 1 (fontified t)) . -1544) (undo-tree-id2 . -1) (undo-tree-id3 . -1) (#(":" 0 1 (fontified t)) . -1545) (undo-tree-id4 . -1) (undo-tree-id5 . -1) (#("(" 0 1 (fontified t)) . -1546) (undo-tree-id6 . -1) (undo-tree-id7 . -1) (#("e" 0 1 (fontified t)) . -1547) (undo-tree-id8 . -1) (undo-tree-id9 . -1) (#("n" 0 1 (fontified t)) . -1548) (undo-tree-id10 . -1) (undo-tree-id11 . -1) (#("d" 0 1 (face font-lock-keyword-face fontified t)) . -1549) (undo-tree-id12 . -1) (undo-tree-id13 . -1) (#("-" 0 1 (fontified t)) . -1550) (undo-tree-id14 . -1) (undo-tree-id15 . -1) (#("1" 0 1 (fontified t)) . -1551) (undo-tree-id16 . -1) (undo-tree-id17 . -1) (#(")" 0 1 (fontified t)) . -1552) (undo-tree-id18 . -1) (undo-tree-id19 . -1) (#("]" 0 1 (fontified t)) . -1553) (undo-tree-id20 . -1) (undo-tree-id21 . -1) 1554) ((1543 . 1554) (t 26324 18989 445845 445000)) (26324 18989 407450 327000) 0 nil])
+([nil nil ((1474 . 1489)) nil (26324 19516 549056 45000) 0 nil])
+nil
+([nil nil ((1484 . 1485) (t 26324 19516 584508 526000)) nil (26324 19631 262319 676000) 0 nil])
+([nil nil ((1481 . 1482)) nil (26324 19631 262318 484000) 0 nil])
+([nil nil ((#("[" 0 1 (fontified t)) . -1471) (undo-tree-id128 . -1) (undo-tree-id129 . -1) 1472) nil (26324 19631 262317 492000) 0 nil])
+([nil nil ((1471 . 1472)) nil (26324 19631 262314 658000) 0 nil])
+([nil nil ((#("]" 0 1 (fontified t)) . -1488) (undo-tree-id126 . -1) (undo-tree-id127 . -1) 1489) nil (26324 19631 262312 144000) 0 nil])
+([nil nil ((1488 . 1489)) nil (26324 19631 262296 664000) 0 nil])
+([nil nil ((1451 . 1455) (#("    " 0 4 (fontified nil)) . 1450) (1450 . 1454) (1450 . 1451) (t 26324 19631 297551 70000)) nil (26324 19757 589979 427000) 0 nil])
+([nil nil ((1455 . 1458)) nil (26324 19757 589978 53000) 0 nil])
+([nil nil ((#("o" 0 1 (fontified t)) . -1457) (undo-tree-id151 . -1) (undo-tree-id152 . -1) (undo-tree-id153 . -1) (undo-tree-id154 . -1) (undo-tree-id155 . -1) (undo-tree-id156 . -1) 1458) nil (26324 19757 589976 899000) 0 nil])
+([nil nil ((1457 . 1460)) nil (26324 19757 589972 240000) 0 nil])
+([nil nil ((1455 . 1462) (#("print" 0 5 (fontified t)) . -1455) (undo-tree-id142 . -2) (undo-tree-id143 . -2) (undo-tree-id144 . -5) (undo-tree-id145 . -2) (undo-tree-id146 . -2) (undo-tree-id147 . -2) (undo-tree-id148 . -2) (undo-tree-id149 . -5) (undo-tree-id150 . -5) 1460) nil (26324 19757 589970 889000) 0 nil])
+([nil nil ((1462 . 1471)) nil (26324 19757 589963 993000) 0 nil])
+([nil nil ((1463 . 1474) (#("state_co" 0 8 (fontified t)) . -1463) (undo-tree-id130 . -8) (undo-tree-id131 . -5) (undo-tree-id132 . -5) (undo-tree-id133 . -6) (undo-tree-id134 . -6) (undo-tree-id135 . -7) (undo-tree-id136 . -7) (undo-tree-id137 . -8) (undo-tree-id138 . -8) (undo-tree-id139 . -8) (undo-tree-id140 . -8) (undo-tree-id141 . -8) 1471) nil (26324 19757 589961 887000) 0 nil])
+([nil nil ((1474 . 1479)) nil (26324 19757 589941 843000) 0 nil])
+([nil nil ((1479 . 1500)) nil (26324 19757 589940 150000) 0 nil])
+([nil nil ((1500 . 1502)) nil (26324 19757 589934 739000) 0 nil])
+([nil nil ((1476 . 1477) (t 26324 19757 627230 872000)) nil (26324 19811 605786 9000) 0 nil])
+([nil nil ((1501 . 1502)) nil (26324 19811 605779 786000) 0 nil])
+([nil nil ((1528 . 1534) (t 26324 19811 640427 304000)) nil (26324 19882 603897 537000) 0 nil])
+([nil nil ((1537 . 1543)) nil (26324 19882 603896 446000) 0 nil])
+([nil nil ((1546 . 1552)) nil (26324 19882 603895 739000) 0 nil])
+([nil nil ((1556 . 1562)) nil (26324 19882 603895 35000) 0 nil])
+([nil nil ((1566 . 1572)) nil (26324 19882 603894 270000) 0 nil])
+([nil nil ((1535 . 1536)) nil (26324 19882 603892 818000) 0 nil])
+([nil nil ((1545 . 1546)) nil (26324 19882 603887 901000) 0 nil])
+([nil nil ((#("[" 0 1 (fontified t)) . -1650) (undo-tree-id305 . -1) (undo-tree-id306 . -1) (undo-tree-id307 . -1) (undo-tree-id308 . -1) (undo-tree-id309 . -1) (undo-tree-id310 . -1) (undo-tree-id311 . -1) (undo-tree-id312 . -1) (undo-tree-id313 . -1) (#("1" 0 1 (fontified t)) . -1651) (undo-tree-id314 . -1) (undo-tree-id315 . -1) (undo-tree-id316 . -1) (undo-tree-id317 . -1) (undo-tree-id318 . -1) (undo-tree-id319 . -1) (undo-tree-id320 . -1) (undo-tree-id321 . -1) (undo-tree-id322 . -1) (#(":" 0 1 (fontified t)) . -1652) (undo-tree-id323 . -1) (undo-tree-id324 . -1) (undo-tree-id325 . -1) (undo-tree-id326 . -1) (undo-tree-id327 . -1) (undo-tree-id328 . -1) (undo-tree-id329 . -1) (undo-tree-id330 . -1) (undo-tree-id331 . -1) (#("(" 0 1 (fontified t)) . -1653) (undo-tree-id332 . -1) (undo-tree-id333 . -1) (undo-tree-id334 . -1) (undo-tree-id335 . -1) (undo-tree-id336 . -1) (undo-tree-id337 . -1) (undo-tree-id338 . -1) (undo-tree-id339 . -1) (undo-tree-id340 . -1) (#("e" 0 1 (fontified t)) . -1654) (undo-tree-id341 . -1) (undo-tree-id342 . -1) (undo-tree-id343 . -1) (undo-tree-id344 . -1) (undo-tree-id345 . -1) (undo-tree-id346 . -1) (undo-tree-id347 . -1) (undo-tree-id348 . -1) (undo-tree-id349 . -1) (#("n" 0 1 (fontified t)) . -1655) (undo-tree-id350 . -1) (undo-tree-id351 . -1) (undo-tree-id352 . -1) (undo-tree-id353 . -1) (undo-tree-id354 . -1) (undo-tree-id355 . -1) (undo-tree-id356 . -1) (#("d" 0 1 (face font-lock-keyword-face fontified t)) . -1656) (undo-tree-id357 . 1) (undo-tree-id358 . -1) (undo-tree-id359 . -1) (undo-tree-id360 . -1) (undo-tree-id361 . -1) (undo-tree-id362 . -1) (undo-tree-id363 . -1) (undo-tree-id364 . -1) (#("-" 0 1 (fontified t)) . -1657) (undo-tree-id365 . -1) (undo-tree-id366 . -1) (undo-tree-id367 . -1) (undo-tree-id368 . -1) (undo-tree-id369 . -1) (undo-tree-id370 . -1) (undo-tree-id371 . -1) (#("1" 0 1 (fontified t)) . -1658) (undo-tree-id372 . -1) (undo-tree-id373 . -1) (undo-tree-id374 . -1) (undo-tree-id375 . -1) (undo-tree-id376 . -1) (undo-tree-id377 . -1) (undo-tree-id378 . -1) (#(")" 0 1 (fontified t)) . -1659) (undo-tree-id379 . -1) (undo-tree-id380 . -1) (undo-tree-id381 . -1) (undo-tree-id382 . -1) (undo-tree-id383 . -1) (undo-tree-id384 . -1) (undo-tree-id385 . -1) (#("]" 0 1 (fontified t)) . -1660) (undo-tree-id386 . -1) (undo-tree-id387 . -1) (undo-tree-id388 . -1) (undo-tree-id389 . -1) 1661 (t 26324 19882 643580 642000)) nil (26324 19949 714487 992000) 0 nil])
+([nil nil ((#("[" 0 1 (fontified t)) . -1660) (undo-tree-id201 . -1) (undo-tree-id202 . -1) (undo-tree-id203 . -1) (undo-tree-id204 . -1) (undo-tree-id205 . -1) (undo-tree-id206 . -1) (undo-tree-id207 . -1) (undo-tree-id208 . -1) (undo-tree-id209 . -1) (undo-tree-id210 . -1) (#("1" 0 1 (fontified t)) . -1661) (undo-tree-id211 . -1) (undo-tree-id212 . -1) (undo-tree-id213 . -1) (undo-tree-id214 . -1) (undo-tree-id215 . -1) (undo-tree-id216 . -1) (undo-tree-id217 . -1) (undo-tree-id218 . -1) (undo-tree-id219 . -1) (undo-tree-id220 . -1) (#(":" 0 1 (fontified t)) . -1662) (undo-tree-id221 . -1) (undo-tree-id222 . -1) (undo-tree-id223 . -1) (undo-tree-id224 . -1) (undo-tree-id225 . -1) (undo-tree-id226 . -1) (undo-tree-id227 . -1) (undo-tree-id228 . -1) (undo-tree-id229 . -1) (undo-tree-id230 . -1) (#("(" 0 1 (fontified t)) . -1663) (undo-tree-id231 . -1) (undo-tree-id232 . -1) (undo-tree-id233 . -1) (undo-tree-id234 . -1) (undo-tree-id235 . -1) (undo-tree-id236 . -1) (undo-tree-id237 . -1) (undo-tree-id238 . -1) (undo-tree-id239 . -1) (undo-tree-id240 . -1) (#("e" 0 1 (fontified t)) . -1664) (undo-tree-id241 . -1) (undo-tree-id242 . -1) (undo-tree-id243 . -1) (undo-tree-id244 . -1) (undo-tree-id245 . -1) (undo-tree-id246 . -1) (undo-tree-id247 . -1) (undo-tree-id248 . -1) (undo-tree-id249 . -1) (undo-tree-id250 . -1) (#("n" 0 1 (fontified t)) . -1665) (undo-tree-id251 . -1) (undo-tree-id252 . -1) (undo-tree-id253 . -1) (undo-tree-id254 . -1) (undo-tree-id255 . -1) (undo-tree-id256 . -1) (undo-tree-id257 . -1) (undo-tree-id258 . -1) (undo-tree-id259 . -1) (undo-tree-id260 . -1) (#("d" 0 1 (face font-lock-keyword-face fontified t)) . -1666) (undo-tree-id261 . -1) (undo-tree-id262 . -1) (undo-tree-id263 . -1) (undo-tree-id264 . -1) (undo-tree-id265 . -1) (undo-tree-id266 . -1) (undo-tree-id267 . -1) (undo-tree-id268 . -1) (undo-tree-id269 . -1) (undo-tree-id270 . -1) (#("-" 0 1 (fontified t)) . -1667) (undo-tree-id271 . -1) (undo-tree-id272 . -1) (undo-tree-id273 . -1) (undo-tree-id274 . -1) (undo-tree-id275 . -1) (undo-tree-id276 . -1) (undo-tree-id277 . -1) (undo-tree-id278 . -1) (undo-tree-id279 . -1) (undo-tree-id280 . -1) (#("1" 0 1 (fontified t)) . -1668) (undo-tree-id281 . -1) (undo-tree-id282 . -1) (undo-tree-id283 . -1) (undo-tree-id284 . -1) (undo-tree-id285 . -1) (undo-tree-id286 . -1) (undo-tree-id287 . -1) (undo-tree-id288 . -1) (undo-tree-id289 . -1) (undo-tree-id290 . -1) (#(")" 0 1 (fontified t)) . -1669) (undo-tree-id291 . -1) (undo-tree-id292 . -1) (undo-tree-id293 . -1) (undo-tree-id294 . -1) (undo-tree-id295 . -1) (undo-tree-id296 . -1) (undo-tree-id297 . -1) (undo-tree-id298 . -1) (undo-tree-id299 . -1) (undo-tree-id300 . -1) (#("]" 0 1 (fontified t)) . -1670) (undo-tree-id301 . -1) (undo-tree-id302 . -1) (undo-tree-id303 . -1) (undo-tree-id304 . -1) 1671) nil (26324 19949 714425 317000) 0 nil])
+([nil nil ((#("[" 0 1 (fontified t)) . -1670) (undo-tree-id157 . -1) (undo-tree-id158 . -1) (undo-tree-id159 . -1) (undo-tree-id160 . -1) (#("1" 0 1 (fontified t)) . -1671) (undo-tree-id161 . -1) (undo-tree-id162 . -1) (undo-tree-id163 . -1) (undo-tree-id164 . -1) (#(":" 0 1 (fontified t)) . -1672) (undo-tree-id165 . -1) (undo-tree-id166 . -1) (undo-tree-id167 . -1) (undo-tree-id168 . -1) (#("(" 0 1 (fontified t)) . -1673) (undo-tree-id169 . -1) (undo-tree-id170 . -1) (undo-tree-id171 . -1) (undo-tree-id172 . -1) (#("e" 0 1 (fontified t)) . -1674) (undo-tree-id173 . -1) (undo-tree-id174 . -1) (undo-tree-id175 . -1) (undo-tree-id176 . -1) (#("n" 0 1 (fontified t)) . -1675) (undo-tree-id177 . -1) (undo-tree-id178 . -1) (undo-tree-id179 . -1) (undo-tree-id180 . -1) (#("d" 0 1 (face font-lock-keyword-face fontified t)) . -1676) (undo-tree-id181 . -1) (undo-tree-id182 . -1) (undo-tree-id183 . -1) (undo-tree-id184 . -1) (#("-" 0 1 (fontified t)) . -1677) (undo-tree-id185 . -1) (undo-tree-id186 . -1) (undo-tree-id187 . -1) (undo-tree-id188 . -1) (#("1" 0 1 (fontified t)) . -1678) (undo-tree-id189 . -1) (undo-tree-id190 . -1) (undo-tree-id191 . -1) (undo-tree-id192 . -1) (#(")" 0 1 (fontified t)) . -1679) (undo-tree-id193 . -1) (undo-tree-id194 . -1) (undo-tree-id195 . -1) (undo-tree-id196 . -1) (#("]" 0 1 (fontified t)) . -1680) (undo-tree-id197 . -1) (undo-tree-id198 . -1) (undo-tree-id199 . -1) (undo-tree-id200 . -1) 1681) nil (26324 19949 714350 980000) 0 nil])
+([nil nil ((#("p" 0 1 (fontified t)) . -1509) (undo-tree-id426 . -1) (undo-tree-id427 . -1) (undo-tree-id428 . -1) (#("r" 0 1 (fontified t)) . -1510) (undo-tree-id429 . -1) (undo-tree-id430 . -1) (undo-tree-id431 . -1) (#("i" 0 1 (fontified t)) . -1511) (undo-tree-id432 . -1) (undo-tree-id433 . -1) (undo-tree-id434 . -1) (#("n" 0 1 (fontified t)) . -1512) (undo-tree-id435 . -1) (undo-tree-id436 . -1) (undo-tree-id437 . -1) (#("t" 0 1 (fontified t)) . -1513) (undo-tree-id438 . -1) (undo-tree-id439 . -1) (undo-tree-id440 . -1) (#("l" 0 1 (fontified t)) . -1514) (undo-tree-id441 . -1) (undo-tree-id442 . -1) (undo-tree-id443 . -1) (#("n" 0 1 (fontified t)) . -1515) (undo-tree-id444 . -1) (undo-tree-id445 . -1) (undo-tree-id446 . -1) (#("(" 0 1 (fontified t)) . -1516) (undo-tree-id447 . -1) (undo-tree-id448 . -1) 1517 (t 26324 19949 726743 921000)) nil (26324 20086 93153 710000) 0 nil])
+([nil nil ((#("model" 0 5 (fontified t)) . 1477) (undo-tree-id416 . -4) (undo-tree-id417 . -4) (undo-tree-id418 . -4) (undo-tree-id419 . -3) (undo-tree-id420 . -4) (undo-tree-id421 . -4) (undo-tree-id422 . -4) (undo-tree-id423 . -4) (undo-tree-id424 . -5) (undo-tree-id425 . -5)) nil (26324 20086 93140 719000) 0 nil])
+([nil nil ((#("," 0 1 (fontified t)) . -1477) (undo-tree-id406 . 1) (undo-tree-id407 . 1) (undo-tree-id408 . -1) (undo-tree-id409 . -1) (undo-tree-id410 . -1) (undo-tree-id411 . -1) (#(" " 0 1 (fontified t)) . -1478) (undo-tree-id412 . -1) (undo-tree-id413 . -1) (undo-tree-id414 . -1) (undo-tree-id415 . -1) 1479) nil (26324 20086 92707 559000) 0 nil])
+([nil nil ((#("p" 0 1 (fontified t)) . -1455) (undo-tree-id390 . -1) (undo-tree-id391 . -1) (#("r" 0 1 (fontified t)) . -1456) (undo-tree-id392 . -1) (undo-tree-id393 . -1) (#("i" 0 1 (fontified t)) . -1457) (undo-tree-id394 . -1) (undo-tree-id395 . -1) (#("n" 0 1 (fontified t)) . -1458) (undo-tree-id396 . -1) (undo-tree-id397 . -1) (#("t" 0 1 (fontified t)) . -1459) (undo-tree-id398 . -1) (undo-tree-id399 . -1) (#("l" 0 1 (fontified t)) . -1460) (undo-tree-id400 . -1) (undo-tree-id401 . -1) (#("n" 0 1 (fontified t)) . -1461) (undo-tree-id402 . -1) (undo-tree-id403 . -1) (#("(" 0 1 (fontified t)) . -1462) (undo-tree-id404 . -1) (undo-tree-id405 . -1) 1463) nil (26324 20086 92700 938000) 0 nil])
+([nil nil ((1455 . 1466)) nil (26324 20086 92683 898000) 0 nil])
+([nil nil ((1466 . 1483)) nil (26324 20086 92683 390000) 0 nil])
+([nil nil ((1461 . 1465)) nil (26324 20086 92682 410000) 0 nil])
+([nil nil ((1521 . 1522)) nil (26324 20086 92678 466000) 0 nil])
+([nil nil ((#("l" 0 1 (fontified t)) . -1527) (undo-tree-id451 . -1) (undo-tree-id452 . -1) (undo-tree-id453 . -1) (#("e" 0 1 (fontified t)) . -1528) (undo-tree-id454 . -1) (undo-tree-id455 . -1) (undo-tree-id456 . -1) (#("n" 0 1 (fontified t)) . -1529) (undo-tree-id457 . -1) (undo-tree-id458 . -1) (undo-tree-id459 . -1) (#("g" 0 1 (fontified t)) . -1530) (undo-tree-id460 . -1) (undo-tree-id461 . -1) (undo-tree-id462 . -1) (#("t" 0 1 (fontified t)) . -1531) (undo-tree-id463 . -1) (undo-tree-id464 . -1) (undo-tree-id465 . -1) (#("h" 0 1 (fontified t)) . -1532) (undo-tree-id466 . -1) (undo-tree-id467 . -1) (undo-tree-id468 . -1) (#("." 0 1 (fontified t)) . -1533) (undo-tree-id469 . -1) (undo-tree-id470 . -1) (undo-tree-id471 . -1) (#("(" 0 1 (fontified t)) . -1534) (undo-tree-id472 . -1) (undo-tree-id473 . -1) 1535 (t 26324 20086 129731 498000)) nil (26324 20116 336966 122000) 0 nil])
+([nil nil ((1527 . 1533)) nil (26324 20116 336953 527000) 0 nil])
+([nil nil ((nil rear-nonsticky nil 1540 . 1541) (nil fontified nil 1533 . 1541) (1533 . 1541) 1532) nil (26324 20116 336952 938000) 0 nil])
+([nil nil ((1533 . 1544)) nil (26324 20116 336952 15000) 0 nil])
+([nil nil ((1552 . 1558)) nil (26324 20116 336951 78000) 0 nil])
+([nil nil ((#(")" 0 1 (fontified t)) . -1609) (undo-tree-id449 . -1) (undo-tree-id450 . -1) 1610) nil (26324 20116 336948 56000) 0 nil])
+([nil nil ((#("y" 0 1 (fontified t)) . -1609) (undo-tree-id530 . -1) (undo-tree-id531 . -1) (undo-tree-id532 . -1) (#("y" 0 1 (fontified t)) . -1610) (undo-tree-id533 . -1) (undo-tree-id534 . -1) (#("p" 0 1 (fontified t)) . -1611) (undo-tree-id535 . -1) (undo-tree-id536 . -1) 1612 (1609 . 1612) (t 26324 20116 346321 571000)) nil (26324 20146 384641 434000) 0 nil])
+([nil nil ((nil rear-nonsticky nil 1613 . 1614) (#("
+" 0 1 (fontified nil)) . -1696) (1609 . 1697) 1608) nil (26324 20146 384636 950000) 0 nil])
+([nil nil ((1696 . 1701) 1695) nil (26324 20146 384635 968000) 0 nil])
+([nil nil ((#("value" 0 4 (fontified t) 4 5 (fontified t rear-nonsticky t)) . 1634) (undo-tree-id524 . -4) (undo-tree-id525 . 3) (undo-tree-id526 . -1) (undo-tree-id527 . -1) (undo-tree-id528 . -5) (undo-tree-id529 . -5)) nil (26324 20146 384635 49000) 0 nil])
+([nil nil ((1634 . 1640)) nil (26324 20146 384631 842000) 0 nil])
+([nil nil ((#("v" 0 1 (fontified t)) . -1650) (undo-tree-id514 . -1) (undo-tree-id515 . -1) (#("a" 0 1 (fontified t)) . -1651) (undo-tree-id516 . -1) (undo-tree-id517 . -1) (#("l" 0 1 (fontified t)) . -1652) (undo-tree-id518 . -1) (undo-tree-id519 . -1) (#("u" 0 1 (fontified t)) . -1653) (undo-tree-id520 . -1) (undo-tree-id521 . -1) (#("e" 0 1 (fontified t)) . -1654) (undo-tree-id522 . -1) (undo-tree-id523 . -1) 1655) nil (26324 20146 384630 529000) 0 nil])
+([nil nil ((1650 . 1656)) nil (26324 20146 384624 192000) 0 nil])
+([nil nil ((#("v" 0 1 (fontified t)) . -1661) (undo-tree-id504 . -1) (undo-tree-id505 . -1) (#("a" 0 1 (fontified t)) . -1662) (undo-tree-id506 . -1) (undo-tree-id507 . -1) (#("l" 0 1 (fontified t)) . -1663) (undo-tree-id508 . -1) (undo-tree-id509 . -1) (#("u" 0 1 (fontified t)) . -1664) (undo-tree-id510 . -1) (undo-tree-id511 . -1) (#("e" 0 1 (fontified t)) . -1665) (undo-tree-id512 . -1) (undo-tree-id513 . -1) 1666) nil (26324 20146 384623 307000) 0 nil])
+([nil nil ((1661 . 1667)) nil (26324 20146 384618 87000) 0 nil])
+([nil nil ((#("v" 0 1 (fontified t)) . -1672) (undo-tree-id494 . -1) (undo-tree-id495 . -1) (#("a" 0 1 (fontified t)) . -1673) (undo-tree-id496 . -1) (undo-tree-id497 . -1) (#("l" 0 1 (fontified t)) . -1674) (undo-tree-id498 . -1) (undo-tree-id499 . -1) (#("u" 0 1 (fontified t)) . -1675) (undo-tree-id500 . -1) (undo-tree-id501 . -1) (#("e" 0 1 (fontified t)) . -1676) (undo-tree-id502 . -1) (undo-tree-id503 . -1) 1677) nil (26324 20146 384616 870000) 0 nil])
+([nil nil ((1672 . 1678)) nil (26324 20146 384611 500000) 0 nil])
+([nil nil ((#("v" 0 1 (fontified t)) . -1683) (undo-tree-id484 . -1) (undo-tree-id485 . -1) (#("a" 0 1 (fontified t)) . -1684) (undo-tree-id486 . -1) (undo-tree-id487 . -1) (#("l" 0 1 (fontified t)) . -1685) (undo-tree-id488 . -1) (undo-tree-id489 . -1) (#("u" 0 1 (fontified t)) . -1686) (undo-tree-id490 . -1) (undo-tree-id491 . -1) (#("e" 0 1 (fontified t)) . -1687) (undo-tree-id492 . -1) (undo-tree-id493 . -1) 1688) nil (26324 20146 384610 329000) 0 nil])
+([nil nil ((1683 . 1689)) nil (26324 20146 384604 883000) 0 nil])
+([nil nil ((#("v" 0 1 (fontified t)) . -1694) (undo-tree-id474 . -1) (undo-tree-id475 . -1) (#("a" 0 1 (fontified t)) . -1695) (undo-tree-id476 . -1) (undo-tree-id477 . -1) (#("l" 0 1 (fontified t)) . -1696) (undo-tree-id478 . -1) (undo-tree-id479 . -1) (#("u" 0 1 (fontified t)) . -1697) (undo-tree-id480 . -1) (undo-tree-id481 . -1) (#("e" 0 1 (fontified t)) . -1698) (undo-tree-id482 . -1) (undo-tree-id483 . -1) 1699) nil (26324 20146 384602 941000) 0 nil])
+([nil nil ((1694 . 1700)) nil (26324 20146 384585 295000) 0 nil])
+([nil nil ((#(")" 0 1 (fontified t)) . -1521) (undo-tree-id537 . -1) (undo-tree-id538 . -1) 1522 (t 26324 20146 392912 80000)) nil (26324 20181 3408 139000) 0 nil])
+([nil nil ((1470 . 1471) (t 26324 20181 12824 335000)) nil (26324 20224 111667 442000) 0 nil])
+([nil nil ((1558 . 1565) (t 26324 20224 149381 677000)) nil (26324 20277 29979 822000) 0 nil])
+([nil nil ((1653 . 1660)) nil (26324 20277 29975 793000) 0 nil])
+([nil nil ((1566 . 1567) (t 26324 20277 39247 633000)) nil (26324 20287 891179 960000) 0 nil])
+([nil nil ((1615 . 1616)) nil (26324 20287 891179 159000) 0 nil])
+([nil nil ((1663 . 1664)) nil (26324 20287 891178 52000) 0 nil])
+([nil nil ((1717 . 1718)) nil (26324 20287 891174 609000) 0 nil])
+([nil nil ((#("    @test all(length(v1.policy) .== length.((v1.policy, v2.policy, v3.policy, v4.policy, v5.policy)))
+" 0 3 (fontified t) 3 4 (fontified t rear-nonsticky t) 4 9 (face julia-macro-face fontified t) 9 21 (fontified t) 21 30 (fontified t) 30 43 (fontified t) 43 101 (fontified t) 101 102 (fontified t)) . 1619) (undo-tree-id720 . -4) (undo-tree-id721 . -101) (undo-tree-id722 . 75) (undo-tree-id723 . -98) (undo-tree-id724 . -26) (undo-tree-id725 . -4) (undo-tree-id726 . -34) (undo-tree-id727 . -99) (undo-tree-id728 . -99) (undo-tree-id729 . -4) (undo-tree-id730 . -4) (undo-tree-id731 . -4) (undo-tree-id732 . -4) (undo-tree-id733 . -4) (undo-tree-id734 . -27) (undo-tree-id735 . -27) (undo-tree-id736 . -27) (undo-tree-id737 . -27) (undo-tree-id738 . -27) (undo-tree-id739 . -26) (undo-tree-id740 . -26) (undo-tree-id741 . -26) (undo-tree-id742 . -26) (undo-tree-id743 . -26) (undo-tree-id744 . -102) 1645 (t 26324 20287 929220 28000)) nil (26324 20423 130576 322000) 0 nil])
+([nil nil ((#("
+" 0 1 (fontified nil)) . -2146) (2044 . 2147)) nil (26324 20423 130565 689000) 0 nil])
+([nil nil ((#("[1:(end-1)]" 0 4 (fontified t) 4 7 (face font-lock-keyword-face fontified t) 7 11 (fontified t)) . 2166) (undo-tree-id718 . -10) (undo-tree-id719 . -11)) nil (26324 20423 130565 129000) 0 nil])
+([nil nil ((#("[1:(end-1)]" 0 4 (fontified t) 4 7 (face font-lock-keyword-face fontified t) 7 11 (fontified t)) . 2170) (undo-tree-id716 . -10) (undo-tree-id717 . -11)) nil (26324 20423 130563 795000) 0 nil])
+([nil nil ((#("[1:(end-1)]" 0 4 (fontified t) 4 7 (face font-lock-keyword-face fontified t) 7 11 (fontified t)) . 2174) (undo-tree-id714 . -10) (undo-tree-id715 . -11)) nil (26324 20423 130562 372000) 0 nil])
+([nil nil ((2099 . 2101) (#("v" 0 1 (fontified t)) . -2099) (undo-tree-id582 . -1) (undo-tree-id583 . -1) (#("5" 0 1 (fontified t)) . -2100) (undo-tree-id584 . -1) (undo-tree-id585 . -1) 2101 (2088 . 2090) (#("v" 0 1 (fontified t)) . -2088) (undo-tree-id586 . -1) (undo-tree-id587 . -1) (#("4" 0 1 (fontified t)) . -2089) (undo-tree-id588 . -1) (undo-tree-id589 . -1) 2090 (2077 . 2079) (#("v" 0 1 (fontified t)) . -2077) (undo-tree-id590 . -1) (undo-tree-id591 . -1) (#("3" 0 1 (fontified t)) . -2078) (undo-tree-id592 . -1) (undo-tree-id593 . -1) 2079 (2076 . 2077) (#("," 0 1 (fontified t)) . -2076) (undo-tree-id594 . -1) (undo-tree-id595 . -1) 2077 (#(" " 0 1 (fontified t)) . -2077) (undo-tree-id596 . -1) (undo-tree-id597 . -1) 2078 (#("v" 0 1 (fontified t)) . -1567) (undo-tree-id598 . -1) (undo-tree-id599 . -1) (undo-tree-id600 . -1) (undo-tree-id601 . -1) (undo-tree-id602 . -1) (#("1" 0 1 (fontified t)) . -1568) (undo-tree-id603 . -1) (undo-tree-id604 . -1) (undo-tree-id605 . -1) (undo-tree-id606 . -1) (undo-tree-id607 . -1) (#("." 0 1 (fontified t)) . -1569) (undo-tree-id608 . -1) (undo-tree-id609 . -1) (undo-tree-id610 . -1) (undo-tree-id611 . -1) (undo-tree-id612 . -1) (#("v" 0 1 (fontified t)) . -1570) (undo-tree-id613 . -1) (undo-tree-id614 . -1) (undo-tree-id615 . -1) (undo-tree-id616 . -1) (undo-tree-id617 . -1) (#("a" 0 1 (fontified t)) . -1571) (undo-tree-id618 . -1) (undo-tree-id619 . -1) (undo-tree-id620 . -1) (undo-tree-id621 . -1) (undo-tree-id622 . -1) (#("l" 0 1 (fontified t)) . -1572) (undo-tree-id623 . -1) (undo-tree-id624 . -1) (undo-tree-id625 . -1) (undo-tree-id626 . -1) (undo-tree-id627 . -1) (#("u" 0 1 (fontified t)) . -1573) (undo-tree-id628 . -1) (undo-tree-id629 . -1) (undo-tree-id630 . -1) (undo-tree-id631 . -1) (undo-tree-id632 . -1) (#("e" 0 1 (fontified t)) . -1574) (undo-tree-id633 . -1) (undo-tree-id634 . -1) (#("," 0 1 (fontified t)) . -1575) (undo-tree-id635 . 1) (undo-tree-id636 . -1) (undo-tree-id637 . -1) (#(" " 0 1 (fontified t)) . -1576) (undo-tree-id638 . -1) (undo-tree-id639 . -1) 1577 (#(" " 0 1 (fontified t)) . -2086) (undo-tree-id640 . -1) (undo-tree-id641 . -1) (#("v" 0 1 (fontified t)) . -2087) (undo-tree-id642 . -1) (undo-tree-id643 . -1) (#("2" 0 1 (fontified t)) . -2088) (undo-tree-id644 . -1) (undo-tree-id645 . -1) (#("." 0 1 (fontified t)) . -2089) (undo-tree-id646 . -1) (undo-tree-id647 . -1) (#("p" 0 1 (fontified t)) . -2090) (undo-tree-id648 . -1) (undo-tree-id649 . -1) (#("o" 0 1 (fontified t)) . -2091) (undo-tree-id650 . -1) (undo-tree-id651 . -1) (#("l" 0 1 (fontified t)) . -2092) (undo-tree-id652 . -1) (undo-tree-id653 . -1) (#("i" 0 1 (fontified t)) . -2093) (undo-tree-id654 . -1) (undo-tree-id655 . -1) (#("c" 0 1 (fontified t)) . -2094) (undo-tree-id656 . -1) (undo-tree-id657 . -1) (#("y" 0 1 (fontified t)) . -2095) (undo-tree-id658 . -1) (undo-tree-id659 . -1) 2096 (2083 . 2085) (#("v" 0 1 (fontified t)) . -2083) (undo-tree-id660 . -1) (undo-tree-id661 . -1) (#("1" 0 1 (fontified t)) . -2084) (undo-tree-id662 . -1) (undo-tree-id663 . -1) (#("." 0 1 (fontified t)) . -2085) (undo-tree-id664 . -1) (undo-tree-id665 . -1) (#("p" 0 1 (fontified t)) . -2086) (undo-tree-id666 . -1) (undo-tree-id667 . -1) (#("o" 0 1 (fontified t)) . -2087) (undo-tree-id668 . -1) (undo-tree-id669 . -1) (#("l" 0 1 (fontified t)) . -2088) (undo-tree-id670 . -1) (undo-tree-id671 . -1) (#("i" 0 1 (fontified t)) . -2089) (undo-tree-id672 . -1) (undo-tree-id673 . -1) (#("c" 0 1 (fontified t)) . -2090) (undo-tree-id674 . -1) (undo-tree-id675 . -1) (#("y" 0 1 (fontified t)) . -2091) (undo-tree-id676 . -1) (undo-tree-id677 . -1) 2092 (2066 . 2068) (#("v" 0 1 (fontified t)) . -2066) (undo-tree-id678 . -1) (undo-tree-id679 . -1) (undo-tree-id680 . -1) (undo-tree-id681 . -1) (#("1" 0 1 (fontified t)) . -2067) (undo-tree-id682 . -1) (undo-tree-id683 . -1) (undo-tree-id684 . -1) (undo-tree-id685 . -1) (#("." 0 1 (fontified t)) . -2068) (undo-tree-id686 . -1) (undo-tree-id687 . -1) (undo-tree-id688 . -1) (undo-tree-id689 . -1) (#("p" 0 1 (fontified t)) . -2069) (undo-tree-id690 . -1) (undo-tree-id691 . -1) (undo-tree-id692 . -1) (undo-tree-id693 . -1) (#("o" 0 1 (fontified t)) . -2070) (undo-tree-id694 . -1) (undo-tree-id695 . -1) (undo-tree-id696 . -1) (undo-tree-id697 . -1) (#("l" 0 1 (fontified t)) . -2071) (undo-tree-id698 . -1) (undo-tree-id699 . -1) (undo-tree-id700 . -1) (undo-tree-id701 . -1) (#("i" 0 1 (fontified t)) . -2072) (undo-tree-id702 . -1) (undo-tree-id703 . -1) (undo-tree-id704 . -1) (undo-tree-id705 . -1) (#("c" 0 1 (fontified t)) . -2073) (undo-tree-id706 . -1) (undo-tree-id707 . -1) (undo-tree-id708 . -1) (undo-tree-id709 . -1) (#("y" 0 1 (fontified t)) . -2074) (undo-tree-id710 . -1) (undo-tree-id711 . -1) (undo-tree-id712 . -1) (undo-tree-id713 . -1) 2075) nil (26324 20423 130554 184000) 0 nil])
+([nil nil ((#("policy" 0 6 (fontified t)) . 2102) (undo-tree-id577 . -5) (undo-tree-id578 . 1) (undo-tree-id579 . -4) (undo-tree-id580 . -4) (undo-tree-id581 . -6)) nil (26324 20423 129767 154000) 0 nil])
+([nil nil ((#("." 0 1 (fontified t)) . -2101) (undo-tree-id567 . -1) (undo-tree-id568 . -1) (undo-tree-id569 . -1) (undo-tree-id570 . -1) (undo-tree-id571 . -1) (undo-tree-id572 . -1) (undo-tree-id573 . -1) (undo-tree-id574 . -1) (undo-tree-id575 . -1) (undo-tree-id576 . -1) 2102) nil (26324 20423 129764 250000) 0 nil])
+([nil nil ((#("." 0 1 (fontified t)) . -2090) (undo-tree-id553 . -1) (undo-tree-id554 . -1) (#("p" 0 1 (fontified t)) . -2091) (undo-tree-id555 . -1) (undo-tree-id556 . -1) (#("o" 0 1 (fontified t)) . -2092) (undo-tree-id557 . -1) (undo-tree-id558 . -1) (#("l" 0 1 (fontified t)) . -2093) (undo-tree-id559 . -1) (undo-tree-id560 . -1) (#("i" 0 1 (fontified t)) . -2094) (undo-tree-id561 . -1) (undo-tree-id562 . -1) (#("c" 0 1 (fontified t)) . -2095) (undo-tree-id563 . -1) (undo-tree-id564 . -1) (#("y" 0 1 (fontified t)) . -2096) (undo-tree-id565 . -1) (undo-tree-id566 . -1) 2097) nil (26324 20423 129757 524000) 0 nil])
+([nil current ((#("." 0 1 (fontified t)) . -2079) (undo-tree-id539 . -1) (undo-tree-id540 . -1) (#("p" 0 1 (fontified t)) . -2080) (undo-tree-id541 . -1) (undo-tree-id542 . -1) (#("o" 0 1 (fontified t)) . -2081) (undo-tree-id543 . -1) (undo-tree-id544 . -1) (#("l" 0 1 (fontified t)) . -2082) (undo-tree-id545 . -1) (undo-tree-id546 . -1) (#("i" 0 1 (fontified t)) . -2083) (undo-tree-id547 . -1) (undo-tree-id548 . -1) (#("c" 0 1 (fontified t)) . -2084) (undo-tree-id549 . -1) (undo-tree-id550 . -1) (#("y" 0 1 (fontified t)) . -2085) (undo-tree-id551 . -1) (undo-tree-id552 . -1) 2086) nil (26324 20423 129747 339000) 0 nil])
 nil
diff --git a/test/src/domains/garnet.jl b/test/src/domains/garnet.jl
index 1054a5e..1485953 100644
--- a/test/src/domains/garnet.jl
+++ b/test/src/domains/garnet.jl
@@ -15,7 +15,7 @@ import HiGHS, JuMP
     v5 = lp_solve(g, .95, JuMP.Model(HiGHS.Optimizer))
 
     # Ensure value functions are close
-    V = hcat(v1.value, v2.value[1:end-1], v3.value[1:end-1], v4.value[1:end-1], v5.value)
+    V = hcat(v1.value, v2.value, v3.value, v4.value, v5.value)
     @test map(x -> x[2] - x[1], mapslices(extrema, V; dims=2)) |> maximum ≤ 1e-6
 
     # Ensure policies are identical
@@ -25,6 +25,6 @@ import HiGHS, JuMP
     p4 = v4.policy
     p5 = v5.policy
 
-    P = hcat(p1, p2[1:end-1], p3[1:end-1], p4[1:end-1])
+    P = hcat(p1, p2, p3, p4)
     @test all(mapslices(allequal, P; dims=2))
 end
diff --git a/test/src/domains/gridworld.jl b/test/src/domains/gridworld.jl
index 797e5bc..1594db5 100644
--- a/test/src/domains/gridworld.jl
+++ b/test/src/domains/gridworld.jl
@@ -17,7 +17,7 @@ using MDPs.Domains
     v4 = policy_iteration(model_gc, 0.95)
 
     # Ensure value functions are close
-    V = hcat(v1.value, v2.value[1:end-1], v3.value[1:end-1], v4.value[1:end-1])
+    V = hcat(v1.value, v2.value, v3.value, v4.value)
     @test map(x -> x[2] - x[1], mapslices(extrema, V; dims=2)) |> maximum ≤ 1e-6
 
     # Ensure policies are identical
@@ -26,6 +26,6 @@ using MDPs.Domains
     p3 = greedy(model_gc, InfiniteH(0.95), v3.value)
     p4 = v4.policy
 
-    P = hcat(p1, p2[1:end-1], p3[1:end-1], p4[1:end-1])
+    P = hcat(p1, p2, p3, p4)
     @test all(mapslices(allequal, P; dims=2))
 end
diff --git a/test/src/domains/inventory.jl b/test/src/domains/inventory.jl
index e693da1..ed533ca 100644
--- a/test/src/domains/inventory.jl
+++ b/test/src/domains/inventory.jl
@@ -38,11 +38,11 @@ import HiGHS, JuMP
     v4 = policy_iteration(model_gc, 0.95)
     v5 = lp_solve(model, .95, JuMP.Model(HiGHS.Optimizer))
 
-    # note that the IntMDP does not have terminal states,
-    # so the last action will not be -1
-
+    @test all(state_count(model) .== state_count.((model_g, model_gc)))
+    @test all(length(v1.value) .== length.((v2.value, v3.value, v4.value, v5.value)))
+    
     #make sure value functions are close
-    V = hcat(v1.value, v2.value[1:(end-1)], v3.value[1:(end-1)], v4.value[1:(end-1)], v5.value)
+    V = hcat(v1.value, v2.value, v3.value, v4.value, v5.value)
     @test map(x->x[2] - x[1], mapslices(extrema, V; dims = 2)) |> maximum ≤ 1e-6
 
     # make sure policies are identical
@@ -52,6 +52,7 @@ import HiGHS, JuMP
     p4 = v4.policy
     p5 = v5.policy
 
-    P = hcat(p1, p2[1:(end-1)], p3[1:(end-1)], p4[1:(end-1)])
+    @test all(length(p1) .== length.((p2, p3, p4, p5)))
+    P = hcat(p1, p2, p3, p4)
     @test all(mapslices(allequal, P; dims = 2))
 end

From aedd9194623014c2b1837c2773c4cb9e44c98d67 Mon Sep 17 00:00:00 2001
From: Marek Petrik <mpetrik@cs.unh.edu>
Date: Sun, 1 Sep 2024 09:09:05 -0400
Subject: [PATCH 05/10] transient example and algorithms

---
 src/MDPs.jl                    |  7 ++++-
 src/algorithms/linprogsolve.jl | 20 +++++++++----
 src/domains/gambler.jl         | 51 ++++++++++++++++++++++------------
 src/objectives.jl              |  9 ++----
 4 files changed, 57 insertions(+), 30 deletions(-)

diff --git a/src/MDPs.jl b/src/MDPs.jl
index 6d18757..9dba514 100644
--- a/src/MDPs.jl
+++ b/src/MDPs.jl
@@ -2,10 +2,11 @@ module MDPs
 
 include("objectives.jl")
 export InfiniteH, FiniteH, Markov, Stationary, MarkovDet, StationaryDet
+export TotalReward
 
 include("models/mdp.jl")
 export MDP
-export getnext, transition, isterminal
+export getnext, transition
 export valuefunction
 
 
@@ -37,6 +38,10 @@ export policy_iteration, policy_iteration_sparse
 include("algorithms/linprogsolve.jl")
 export lp_solve
 
+include("algorithms/transient.jl")
+export lp_solve, anytransient, alltransient
+export isterminal
+
 include("simulation.jl")
 export simulate, random_π
 export Policy, PolicyStationary, PolicyMarkov
diff --git a/src/algorithms/linprogsolve.jl b/src/algorithms/linprogsolve.jl
index c7c60e7..326be19 100644
--- a/src/algorithms/linprogsolve.jl
+++ b/src/algorithms/linprogsolve.jl
@@ -6,17 +6,27 @@ using JuMP
 
 
 """
-    lp_solve(model, γ, lpm, [silent = true])
+    lp_solve(model, γ, lpmf, [silent = true])
 
 Implements the linear program primal problem for an MDP `model` with a discount factor `γ`.
 It uses the JuMP model `lpm` as the linear program solver and returns the state values
-found by `lpm`. 
+found by `lpmf`. The `lpmf` is a factory that can be passed to `JuMP.Model`. 
+
+The function needs to be provided with a solver. See the example below.
+
+# Example
+
+    using MDPs, HiGHS
+    model = Domains.Gambler.Ruin(0.5, 10)
+    lp_solve(model, 0.9, HiGHS.Optimizer)
 """
 
-function lp_solve(model::TabMDP, obj::InfiniteH, lpm; silent = true)
+function lp_solve(model::TabMDP, obj::InfiniteH, lpm::JuMP.Model; silent = true)
     γ = discount(obj)
     0 ≤ γ < 1 || error("γ must be between 0 and 1.")
 
+    
+    lpm = Model(lpmf)
     silent && set_silent(lpm)
     n = state_count(model)
     
@@ -31,10 +41,8 @@ function lp_solve(model::TabMDP, obj::InfiniteH, lpm; silent = true)
     end
     
     optimize!(lpm)
-
-    if !is_solved_and_feasible(lpm; dual = true)
+    is_solved_and_feasible(lpm; dual = true) ||
         error("Failed to solve the MDP linear program")
-    end
     
     (value = value.(v),
      policy = map(x->argmax(dual.(x)), u))
diff --git a/src/domains/gambler.jl b/src/domains/gambler.jl
index 4dc30d3..5187228 100644
--- a/src/domains/gambler.jl
+++ b/src/domains/gambler.jl
@@ -8,9 +8,9 @@ mt(st, prob,rew) =
     (Int(st), Float64(prob), Float64(rew))::Tuple{Int, Float64, Float64}
 
 
-# ------------------------------------------------------------------------------------------------------------------------
+# ----------------------------------------------------------------------------------------------------
 # Discounted ruin
-# ------------------------------------------------------------------------------------------------------------------------
+# ----------------------------------------------------------------------------------------------------
 
 
 """
@@ -58,9 +58,9 @@ function transition(model::Ruin, state::Int, action::Int)
 end
 
 
-# ------------------------------------------------------------------------------------------------------------------------
+# ----------------------------------------------------------------------------------------------------
 # Transient ruin
-# ------------------------------------------------------------------------------------------------------------------------
+# ----------------------------------------------------------------------------------------------------
 
 
 """
@@ -72,41 +72,58 @@ some terminal capital and `0` otherwise. State `max_capital+1` is an absorbing w
 in which `1` is received forever.
 
 - Capital = `state - 1`
-- Bet     = `action - 1` 
 
-Available actions are `1`, ..., `state`.
+If `noop = true` then the available actions are `1, ..., capital+1` and bet = `action - 1`. This
+allows a bet of 0 which is not a transient policy. 
+
+If `noop = false` then the available actions are `1, ..., capital` and bet = `action `.
 
-Special states: `state=1` is broke and `state=max_capital+1` is an absorbing state.
+Special states: `state=1` is broke and `state=max_capital+1` is maximal capital. Both of the
+states are absorbing/terminal.
 
-The reward is `-1` when the gambler goes broke and `+1` when it achieves the target capital.
+The reward is `0` when the gambler goes broke and `+1` when it achieves the target capital. The
+difference from `Ruin` is that the reward is not received in the terminal state.  
 """
 struct RuinTransient <: TabMDP
     win :: Float64
     max_capital :: Int
+    noop :: Bool
 
-    function RuinTransient(win::Number, max_capital::Integer)
+    function RuinTransient(win::Number, max_capital::Integer, noop::Bool)
         zero(win) ≤ win ≤ one(win) || error("Win probability must be in [0,1]")
         max_capital ≥ one(max_capital) || error("Max capital must be positive")
-        new(win, max_capital)
+        new(win, max_capital, noop)
     end
 end
 
 state_count(model::RuinTransient) = model.max_capital + 1
-action_count(model::RuinTransient, state::Int) = state < model.max_capital + 1 ? state : 1 # only one action in the terminal state
+
+function action_count(model::RuinTransient, state::Int)
+    ns = state_count(model)
+    @assert state ≥ 1 && state ≤ ns 
+    if state == 1 || state == ns 
+        1
+    else
+        capital = state - 1
+        model.noop ? model.max_capital + 1 : model.max_capital
+    end
+end
 
 function transition(model::RuinTransient, state::Int, action::Int)
-    absorbing :: Int = model.max_capital + 1
+    absorbing = state_count(model)  # the "last" state
     
     1 ≤ state ≤ absorbing || error("invalid state")
     1 ≤ action ≤ action_count(model, state) || error("invalid action")
 
     if state == 1  # broke
-        (mt(absorbing, 1.0, -1.0),)
-    elseif state == absorbing   # absorbing terminal state; no reward
-        (mt(state, 1.0, 1.0),)
+        (mt(state, 1.0, 0.0),)
+    elseif state == model.max_capital+1   # absorbing terminal state; no reward
+        (mt(state, 1.0, 0.0),)
     else
-        win_state = min(model.max_capital + 1, (state - 1) + (action - 1) + 1)
-        lose_state = max(1, (state - 1) - (action - 1) + 1)
+        bet = model.noop ? action - 1 : action
+        
+        win_state = min(model.max_capital + 1, (state - 1) + bet + 1)
+        lose_state = max(1, (state - 1) - bet + 1)
 
         # reward 1.0 if an donly if we achieve the target capital
         win_reward = win_state == absorbing ? 1.0 : 0.0
diff --git a/src/objectives.jl b/src/objectives.jl
index d4c7338..114f28a 100644
--- a/src/objectives.jl
+++ b/src/objectives.jl
@@ -56,15 +56,12 @@ end
 
 """
 Total reward criterion. The objective is to maximize the sum
-of the undiscounted rewards. The model assumes that there is a terminal
- state, which must satisfy that it
+of the undiscounted rewards. 
 
-1) has a single action,
-2) transitions to itself,
-3) has a reward 0. 
+This objective can generally only be applied to transient states,
+which have a terminal state; see `isterminal` for more details.
 """
 struct TotalReward <: StationaryDet
-    terminal_state :: Int 
 end
 
 """

From b2457ab32c0713e9ac2ebae5b7b4d5b5457a4a82 Mon Sep 17 00:00:00 2001
From: Marek Petrik <mpetrik@cs.unh.edu>
Date: Thu, 5 Sep 2024 13:26:02 -0400
Subject: [PATCH 06/10] fixed documentation and jldoctests

---
 docs/src/index.md                          | 10 ++++++++++
 src/algorithms/linprogsolve.jl             | 11 +++++++++--
 src/domains/gambler.jl                     | 12 +++++++-----
 src/domains/gridworld.jl                   |  6 +++---
 src/models/integral.jl                     |  1 +
 test/src/domains/.inventory.jl.~undo-tree~ |  6 ++++--
 test/src/domains/garnet.jl                 |  2 +-
 test/src/domains/inventory.jl              |  2 +-
 8 files changed, 36 insertions(+), 14 deletions(-)

diff --git a/docs/src/index.md b/docs/src/index.md
index 5c53a2c..1de25d9 100644
--- a/docs/src/index.md
+++ b/docs/src/index.md
@@ -56,6 +56,12 @@ Pages = ["mrp.jl"]
 Modules = [MDPs]
 Pages = ["policyiteration.jl"]
 ```
+
+```@autodocs
+Modules = [MDPs]
+Pages = ["transient.jl"]
+```
+
 ## Value Function Manipulation
 
 ```@autodocs
@@ -95,3 +101,7 @@ Modules = [MDPs.Domains.Inventory]
 ```@autodocs
 Modules = [MDPs.Domains.Machine]
 ```
+
+```@autodocs
+Modules = [MDPs.Domains.GridWorld]
+```
diff --git a/src/algorithms/linprogsolve.jl b/src/algorithms/linprogsolve.jl
index 326be19..71cc40d 100644
--- a/src/algorithms/linprogsolve.jl
+++ b/src/algorithms/linprogsolve.jl
@@ -16,12 +16,19 @@ The function needs to be provided with a solver. See the example below.
 
 # Example
 
+```jldoctest
     using MDPs, HiGHS
     model = Domains.Gambler.Ruin(0.5, 10)
-    lp_solve(model, 0.9, HiGHS.Optimizer)
+    val = lp_solve(model, 0.9, HiGHS.Optimizer)
+    maximum(val.policy)
+
+# output
+
+    6
+```
 """
 
-function lp_solve(model::TabMDP, obj::InfiniteH, lpm::JuMP.Model; silent = true)
+function lp_solve(model::TabMDP, obj::InfiniteH, lpmf; silent = true)
     γ = discount(obj)
     0 ≤ γ < 1 || error("γ must be between 0 and 1.")
 
diff --git a/src/domains/gambler.jl b/src/domains/gambler.jl
index 5187228..59bef70 100644
--- a/src/domains/gambler.jl
+++ b/src/domains/gambler.jl
@@ -8,9 +8,9 @@ mt(st, prob,rew) =
     (Int(st), Float64(prob), Float64(rew))::Tuple{Int, Float64, Float64}
 
 
-# ----------------------------------------------------------------------------------------------------
+# -------------------------------------------------------------------------------------
 # Discounted ruin
-# ----------------------------------------------------------------------------------------------------
+# -------------------------------------------------------------------------------------
 
 
 """
@@ -58,9 +58,9 @@ function transition(model::Ruin, state::Int, action::Int)
 end
 
 
-# ----------------------------------------------------------------------------------------------------
+# --------------------------------------------------------------------------------------
 # Transient ruin
-# ----------------------------------------------------------------------------------------------------
+# --------------------------------------------------------------------------------------
 
 
 """
@@ -76,7 +76,9 @@ in which `1` is received forever.
 If `noop = true` then the available actions are `1, ..., capital+1` and bet = `action - 1`. This
 allows a bet of 0 which is not a transient policy. 
 
-If `noop = false` then the available actions are `1, ..., capital` and bet = `action `.
+If `noop = false` then the available actions are `1, ..., capital` and bet = `action `. The MDP is not
+transient if `noop = true`, but has some transient policies. When `noop = false`, the MDP is
+transient.
 
 Special states: `state=1` is broke and `state=max_capital+1` is maximal capital. Both of the
 states are absorbing/terminal.
diff --git a/src/domains/gridworld.jl b/src/domains/gridworld.jl
index 649cb3d..ea711a1 100644
--- a/src/domains/gridworld.jl
+++ b/src/domains/gridworld.jl
@@ -3,11 +3,9 @@ module GridWorld
 import ...TabMDP, ...transition, ...state_count, ...action_count
 import ...actions, ...states
 
-# TODO: Add docs, with method signatures
 """
 Models values of demand in `values` and probabilities in `probabilities`.
 """
-
 @enum Action begin
     UP = 1
     DOWN = 2
@@ -16,6 +14,9 @@ Models values of demand in `values` and probabilities in `probabilities`.
 end
 
 """
+    Parameters(reward_s, max_side_length, wind)
+
+
 Parameters that define a GridWorld problem
 
 - `rewards_s`: A vector of rewards for each state
@@ -58,7 +59,6 @@ function transition(model::Model, state::Int, action::Int)
     remaining_wind = model.params.wind / 3
     ret = []
     # Wrap the state around the grid 1-based indexing
-    # NOTE: Julia for the love of God please implement a proper modulo function
     upstate = state - n <= 0 ? state + n_states - n : state - n
     downstate = (state + n) > n_states ? state - n_states + n : state + n
     leftstate = state % n == 1 ? state + (n - 1) : state - 1
diff --git a/src/models/integral.jl b/src/models/integral.jl
index 1aadfee..c5bec81 100644
--- a/src/models/integral.jl
+++ b/src/models/integral.jl
@@ -129,6 +129,7 @@ state_count(model)
 # output
 20
 ```
+
 Load the model from an Arrow file (a binary tabular file format)
 ```jldoctest
 using MDPs, Arrow
diff --git a/test/src/domains/.inventory.jl.~undo-tree~ b/test/src/domains/.inventory.jl.~undo-tree~
index 18baa83..af4201c 100644
--- a/test/src/domains/.inventory.jl.~undo-tree~
+++ b/test/src/domains/.inventory.jl.~undo-tree~
@@ -1,5 +1,5 @@
 (undo-tree-save-format-version . 1)
-"1c6e24de46538ceabe890182c5a7a01897449b6b"
+"7e4de13a0e6c881fe30ed3257ccf97c825e8ac4c"
 [nil nil nil nil (26303 463 21508 884000) 0 nil]
 ([nil nil ((#("=======
 >>>>>>> main
@@ -96,5 +96,7 @@ nil
 ([nil nil ((#("policy" 0 6 (fontified t)) . 2102) (undo-tree-id577 . -5) (undo-tree-id578 . 1) (undo-tree-id579 . -4) (undo-tree-id580 . -4) (undo-tree-id581 . -6)) nil (26324 20423 129767 154000) 0 nil])
 ([nil nil ((#("." 0 1 (fontified t)) . -2101) (undo-tree-id567 . -1) (undo-tree-id568 . -1) (undo-tree-id569 . -1) (undo-tree-id570 . -1) (undo-tree-id571 . -1) (undo-tree-id572 . -1) (undo-tree-id573 . -1) (undo-tree-id574 . -1) (undo-tree-id575 . -1) (undo-tree-id576 . -1) 2102) nil (26324 20423 129764 250000) 0 nil])
 ([nil nil ((#("." 0 1 (fontified t)) . -2090) (undo-tree-id553 . -1) (undo-tree-id554 . -1) (#("p" 0 1 (fontified t)) . -2091) (undo-tree-id555 . -1) (undo-tree-id556 . -1) (#("o" 0 1 (fontified t)) . -2092) (undo-tree-id557 . -1) (undo-tree-id558 . -1) (#("l" 0 1 (fontified t)) . -2093) (undo-tree-id559 . -1) (undo-tree-id560 . -1) (#("i" 0 1 (fontified t)) . -2094) (undo-tree-id561 . -1) (undo-tree-id562 . -1) (#("c" 0 1 (fontified t)) . -2095) (undo-tree-id563 . -1) (undo-tree-id564 . -1) (#("y" 0 1 (fontified t)) . -2096) (undo-tree-id565 . -1) (undo-tree-id566 . -1) 2097) nil (26324 20423 129757 524000) 0 nil])
-([nil current ((#("." 0 1 (fontified t)) . -2079) (undo-tree-id539 . -1) (undo-tree-id540 . -1) (#("p" 0 1 (fontified t)) . -2080) (undo-tree-id541 . -1) (undo-tree-id542 . -1) (#("o" 0 1 (fontified t)) . -2081) (undo-tree-id543 . -1) (undo-tree-id544 . -1) (#("l" 0 1 (fontified t)) . -2082) (undo-tree-id545 . -1) (undo-tree-id546 . -1) (#("i" 0 1 (fontified t)) . -2083) (undo-tree-id547 . -1) (undo-tree-id548 . -1) (#("c" 0 1 (fontified t)) . -2084) (undo-tree-id549 . -1) (undo-tree-id550 . -1) (#("y" 0 1 (fontified t)) . -2085) (undo-tree-id551 . -1) (undo-tree-id552 . -1) 2086) nil (26324 20423 129747 339000) 0 nil])
+([nil nil ((#("." 0 1 (fontified t)) . -2079) (undo-tree-id539 . -1) (undo-tree-id540 . -1) (#("p" 0 1 (fontified t)) . -2080) (undo-tree-id541 . -1) (undo-tree-id542 . -1) (#("o" 0 1 (fontified t)) . -2081) (undo-tree-id543 . -1) (undo-tree-id544 . -1) (#("l" 0 1 (fontified t)) . -2082) (undo-tree-id545 . -1) (undo-tree-id546 . -1) (#("i" 0 1 (fontified t)) . -2083) (undo-tree-id547 . -1) (undo-tree-id548 . -1) (#("c" 0 1 (fontified t)) . -2084) (undo-tree-id549 . -1) (undo-tree-id550 . -1) (#("y" 0 1 (fontified t)) . -2085) (undo-tree-id551 . -1) (undo-tree-id552 . -1) 2086) nil (26324 20423 129747 339000) 0 nil])
+([nil nil ((#("JuMP.Model" 0 10 (fontified t)) . 1421) (undo-tree-id751 . -9) (undo-tree-id752 . -9) (undo-tree-id753 . -9) (undo-tree-id754 . -9) (undo-tree-id755 . -9) (undo-tree-id756 . -10) (undo-tree-id757 . -10) 1430 (t 26324 20423 165543 962000)) nil (26328 39912 301975 923000) 0 nil])
+([nil current ((#(")" 0 1 (fontified nil)) . 1436) (#("(" 0 1 (fontified t)) . -1421) (undo-tree-id745 . 1) (undo-tree-id746 . -1) (undo-tree-id747 . -1) (undo-tree-id748 . -1) (undo-tree-id749 . -1) (undo-tree-id750 . -1) 1422) nil (26328 39912 301964 767000) 0 nil])
 nil
diff --git a/test/src/domains/garnet.jl b/test/src/domains/garnet.jl
index 1485953..b0a2232 100644
--- a/test/src/domains/garnet.jl
+++ b/test/src/domains/garnet.jl
@@ -12,7 +12,7 @@ import HiGHS, JuMP
     v2 = value_iteration(g1, InfiniteH(0.95); ϵ=1e-10)
     v3 = value_iteration(g2, InfiniteH(0.95); ϵ=1e-10)
     v4 = policy_iteration(g2, 0.95)
-    v5 = lp_solve(g, .95, JuMP.Model(HiGHS.Optimizer))
+    v5 = lp_solve(g, .95, HiGHS.Optimizer)
 
     # Ensure value functions are close
     V = hcat(v1.value, v2.value, v3.value, v4.value, v5.value)
diff --git a/test/src/domains/inventory.jl b/test/src/domains/inventory.jl
index ed533ca..aef15e8 100644
--- a/test/src/domains/inventory.jl
+++ b/test/src/domains/inventory.jl
@@ -36,7 +36,7 @@ import HiGHS, JuMP
     v2 = value_iteration(model_g, InfiniteH(0.95); ϵ = 1e-10)
     v3 = value_iteration(model_gc, InfiniteH(0.95); ϵ = 1e-10)
     v4 = policy_iteration(model_gc, 0.95)
-    v5 = lp_solve(model, .95, JuMP.Model(HiGHS.Optimizer))
+    v5 = lp_solve(model, .95, HiGHS.Optimizer)
 
     @test all(state_count(model) .== state_count.((model_g, model_gc)))
     @test all(length(v1.value) .== length.((v2.value, v3.value, v4.value, v5.value)))

From f2c4b9ee39ef6ada77b7b21b728b2b3a8447ff1a Mon Sep 17 00:00:00 2001
From: Marek Petrik <mpetrik@cs.unh.edu>
Date: Thu, 5 Sep 2024 13:29:25 -0400
Subject: [PATCH 07/10] tests

---
 test/src/transient.jl | 25 +++++++++++++++++++++++++
 1 file changed, 25 insertions(+)
 create mode 100644 test/src/transient.jl

diff --git a/test/src/transient.jl b/test/src/transient.jl
new file mode 100644
index 0000000..b60e6f8
--- /dev/null
+++ b/test/src/transient.jl
@@ -0,0 +1,25 @@
+using Revise
+using HiGHS
+
+@testset "Transience - all" begin
+    opt = HiGHS.Optimizer
+    model = Domains.Gambler.RuinTransient(0.5, 20, false) # no noop
+
+    @test anytransient(model, opt)
+    @test alltransient(model, opt)
+    val = lp_solve(model, TotalReward(), opt)
+    @test val.value[2] ≈ 0.5
+    @test val.policy[2] = 14
+end
+
+
+@testset "Transience - some" begin
+    opt = HiGHS.Optimizer
+    model = Domains.Gambler.RuinTransient(0.5, 20, true)
+
+    @test anytransient(model, opt)
+    @test alltransient(model, opt)
+    val = lp_solve(model, TotalReward(), opt)
+    @test val.value[2] ≈ 0.5
+    @test val.policy[2] = 20
+end

From 9ad7d6b23c866ec7d8d532cf3641da1ebc33745f Mon Sep 17 00:00:00 2001
From: Marek Petrik <mpetrik@cs.unh.edu>
Date: Thu, 5 Sep 2024 13:31:05 -0400
Subject: [PATCH 08/10] transient code

---
 src/algorithms/transient.jl | 150 ++++++++++++++++++++++++++++++++++++
 1 file changed, 150 insertions(+)
 create mode 100644 src/algorithms/transient.jl

diff --git a/src/algorithms/transient.jl b/src/algorithms/transient.jl
new file mode 100644
index 0000000..ed6408c
--- /dev/null
+++ b/src/algorithms/transient.jl
@@ -0,0 +1,150 @@
+using JuMP
+
+# ----------------------------------------------------------------
+# Linear Program Solver
+# ----------------------------------------------------------------
+
+
+"""
+    isterminal(model, state)
+
+Checks that the `state` is terminal in `model`. A state is terminal if it
+
+1) has a single action,
+2) transitions to itself,
+3) has a reward 0. 
+
+
+# Example
+
+```jldoctest
+    using MDPs
+    model = Domains.Gambler.RuinTransient(0.5, 4, true)
+    isterminal.((model,), states(model))[1:2]
+
+# output
+
+2-element BitVector:
+ 1
+ 0
+```
+"""
+function isterminal(model::MDP{S,A}, state::S) where {S,A}
+    as = actions(model, state)
+    length(as) == 1 || return false
+    trs = transition(model, state, first(actions(model, state)))
+    length(trs) == 1 || return false
+    t = first(trs)
+    (t[1] == state && t[2] ≈ 1.0 && t[3] ≈ 0.0) || return false
+    return true
+end
+
+
+# a helper function used to check for transience
+# reward: a function that specifies whether the reward
+# from the MDP is used or a custom reward
+# the function treats terminal states as having value 0
+function _transient_lp(model::TabMDP, reward::Union{Float64, Nothing},
+                       lpmf; silent) :: Union{Nothing,NamedTuple}
+
+    @assert minimum(states(model)) == 1 # make sure that the index is 1-based
+
+    lpm = Model(lpmf)
+    silent && set_silent(lpm)
+
+    rew(r) = isnothing(reward) ? r :: Float64 : reward :: Float64
+    
+    n = state_count(model)
+    
+    @variable(lpm, v[1:n])
+    @objective(lpm, Min, sum(v))
+
+    u = Vector{Vector{ConstraintRef}}(undef, n)
+    for s ∈ 1:n
+        @assert minimum(actions(model,s)) == 1 # make sure that the index is 1-based
+        if isterminal(model, s) # set terminal state(s) to 0 value
+            u[s] = [@constraint(lpm, v[s] == 0)]
+        else
+            u[s] = [@constraint(lpm, v[s] ≥ sum(p*(rew(r) + v[sn])
+                                                for (sn,p,r) ∈ transition(model,s,a)))
+                    for a in actions(model,s)]
+        end
+    end
+    
+    optimize!(lpm)
+
+    if is_solved_and_feasible(lpm) 
+        (value = value.(v), policy = map(x -> argmax(dual.(x)), u))
+    else
+        nothing
+    end
+end
+
+
+"""
+    lp_solve(model, lpmf, [silent = true])
+
+Implements the linear program primal problem for an MDP `model` with a discount factor `γ`.
+It uses the JuMP model `lpm` as the linear program solver and returns the state values
+found found using the solver constructed by `JuMP.Model(lpmf)`.
+
+## Examples
+
+
+# Example
+
+```jldoctest
+    using MDPs, HiGHS
+    model = Domains.Gambler.RuinTransient(0.5, 4, true)
+    lp_solve(model, TotalReward(), HiGHS.Optimizer).policy
+
+# output
+
+5-element Vector{Int64}:
+ 1
+ 4
+ 2
+ 2
+ 1
+```
+"""
+function lp_solve(model::TabMDP, obj::TotalReward, lpmf; silent = true)
+    # nothing => run with the true rewards
+    solution = _transient_lp(model, nothing, lpmf; silent = silent)
+    if isnothing(solution)
+        error("Failed to solve LP formulation. Is MDP transient?")
+    else
+        solution
+    end
+end
+
+
+"""
+    anytransient(model, lpmf, [silent = true])
+
+Checks if the MDP `model` has some transient policy. A policy is transient if it
+is guaranteed to terminate with positive probability after some finite number of steps.
+
+Note that the function returns true even when there are some policies that are not transient.
+
+The parameters match the use in `lp_solve`.
+"""
+function anytransient(model::TabMDP, lpmf; silent = true)
+    solution = _transient_lp(model, -1., lpmf; silent = silent)
+    !isnothing(solution)
+end
+
+"""
+    anytransient(model, lpmf, [silent = true])
+
+Checks if the MDP `model` has all transient policies. A policy is transient if it
+is guaranteed to terminate with positive probability after some finite number of steps.
+
+Note that the function returns true only if all policies are transient.
+
+The parameters match the use in `lp_solve`.
+"""
+function alltransient(model::TabMDP, lpmf; silent = true)
+    solution = _transient_lp(model, 1., lpmf; silent = silent)
+    !isnothing(solution)
+end

From d4a2fd558dc4be755850263e47fbe130558ee01c Mon Sep 17 00:00:00 2001
From: Marek Petrik <mpetrik@cs.unh.edu>
Date: Fri, 6 Sep 2024 15:05:42 -0400
Subject: [PATCH 09/10] julia version

---
 .github/workflows/ci.yml   | 1 +
 .github/workflows/docs.yml | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index a859244..e99df52 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -25,6 +25,7 @@ jobs:
       matrix:
         version:
           - 1.9
+          - 1.10
         os:
           - ubuntu-latest
           #- macOS-latest
diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml
index 0b2a037..89192ce 100644
--- a/.github/workflows/docs.yml
+++ b/.github/workflows/docs.yml
@@ -17,7 +17,7 @@ jobs:
       - uses: actions/checkout@v4
       - uses: julia-actions/setup-julia@v1
         with:
-          version: '1.9'
+          version: '1.10'
       - name: Install dependencies
         run: julia --project=docs/ -e 'using Pkg; Pkg.develop(PackageSpec(path=pwd())); Pkg.instantiate()'
       - name: Build and deploy

From 245fd75784a49f6e235f3721163af242f73143e0 Mon Sep 17 00:00:00 2001
From: Marek Petrik <mpetrik@cs.unh.edu>
Date: Fri, 6 Sep 2024 16:43:39 -0400
Subject: [PATCH 10/10] package versions

---
 Project.toml | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/Project.toml b/Project.toml
index 8783c2a..c1067e3 100644
--- a/Project.toml
+++ b/Project.toml
@@ -14,10 +14,10 @@ StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91"
 
 [compat]
 DataFrames = "1.6.1"
-DataFramesMeta = "0.14.1"
-Distributions = "0.25.107"
-StatsBase = "0.34.2"
-julia = "1.9"
+DataFramesMeta = "0.15"
+Distributions = "0.25"
+StatsBase = "0.34"
+julia = "1.10"
 
 [extras]
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"