diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index a859244..e99df52 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -25,6 +25,7 @@ jobs: matrix: version: - 1.9 + - 1.10 os: - ubuntu-latest #- macOS-latest diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index 0b2a037..89192ce 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -17,7 +17,7 @@ jobs: - uses: actions/checkout@v4 - uses: julia-actions/setup-julia@v1 with: - version: '1.9' + version: '1.10' - name: Install dependencies run: julia --project=docs/ -e 'using Pkg; Pkg.develop(PackageSpec(path=pwd())); Pkg.instantiate()' - name: Build and deploy diff --git a/Project.toml b/Project.toml index 8783c2a..c1067e3 100644 --- a/Project.toml +++ b/Project.toml @@ -14,10 +14,10 @@ StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91" [compat] DataFrames = "1.6.1" -DataFramesMeta = "0.14.1" -Distributions = "0.25.107" -StatsBase = "0.34.2" -julia = "1.9" +DataFramesMeta = "0.15" +Distributions = "0.25" +StatsBase = "0.34" +julia = "1.10" [extras] Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" diff --git a/docs/src/index.md b/docs/src/index.md index 5c53a2c..1de25d9 100644 --- a/docs/src/index.md +++ b/docs/src/index.md @@ -56,6 +56,12 @@ Pages = ["mrp.jl"] Modules = [MDPs] Pages = ["policyiteration.jl"] ``` + +```@autodocs +Modules = [MDPs] +Pages = ["transient.jl"] +``` + ## Value Function Manipulation ```@autodocs @@ -95,3 +101,7 @@ Modules = [MDPs.Domains.Inventory] ```@autodocs Modules = [MDPs.Domains.Machine] ``` + +```@autodocs +Modules = [MDPs.Domains.GridWorld] +``` diff --git a/src/MDPs.jl b/src/MDPs.jl index 6d18757..9dba514 100644 --- a/src/MDPs.jl +++ b/src/MDPs.jl @@ -2,10 +2,11 @@ module MDPs include("objectives.jl") export InfiniteH, FiniteH, Markov, Stationary, MarkovDet, StationaryDet +export TotalReward include("models/mdp.jl") export MDP -export getnext, transition, isterminal +export getnext, transition export valuefunction @@ -37,6 +38,10 @@ export policy_iteration, policy_iteration_sparse include("algorithms/linprogsolve.jl") export lp_solve +include("algorithms/transient.jl") +export lp_solve, anytransient, alltransient +export isterminal + include("simulation.jl") export simulate, random_π export Policy, PolicyStationary, PolicyMarkov diff --git a/src/algorithms/linprogsolve.jl b/src/algorithms/linprogsolve.jl index 3c9961d..d8784d6 100644 --- a/src/algorithms/linprogsolve.jl +++ b/src/algorithms/linprogsolve.jl @@ -6,28 +6,59 @@ using JuMP """ - lp_solve(model, γ, lpm) + lp_solve(model, γ, lpmf, [silent = true]) + Implements the linear program primal problem for an MDP `model` with a discount factor `γ`. It uses the JuMP model `lpm` as the linear program solver and returns the state values -found by `lpm`. +found by `lpmf`. The `lpmf` is a factory that can be passed to `JuMP.Model`. + +The function needs to be provided with a solver. See the example below. + +# Example + +```jldoctest + using MDPs, HiGHS + model = Domains.Gambler.Ruin(0.5, 10) + val = lp_solve(model, 0.9, HiGHS.Optimizer) + maximum(val.policy) + +# output + + 6 +``` """ -function lp_solve(model::TabMDP, γ::Number, lpm) - 0 ≤ γ < 1 || error("γ must be between 0 and 1") - set_silent(lpm) +function lp_solve(model::TabMDP, obj::InfiniteH, lpmf; silent = true) + γ = discount(obj) + 0 ≤ γ < 1 || error("γ must be between 0 and 1.") + + + lpm = Model(lpmf) + silent && set_silent(lpm) n = state_count(model) + @variable(lpm, v[1:n]) - @objective(lpm,Min, sum(v[1:n])) - π::Vector{Vector{ConstraintRef}} = [] - for s in 1:n - m = action_count(model,s) - π_s::Vector{ConstraintRef} = [] - for a in 1:m - push!(π_s, @constraint(lpm, v[s] ≥ sum(sp[2]*(sp[3]+γ*v[sp[1]]) for sp in transition(model,s,a)))) - end - push!(π, π_s) + @objective(lpm, Min, sum(v[1:n])) + + u = Vector{Vector{ConstraintRef}}(undef, n) + for s ∈ 1:n + u[s] = [@constraint(lpm, v[s] ≥ sum(sp[2]*(sp[3]+γ*v[sp[1]]) + for sp in transition(model,s,a))) + for a ∈ 1:action_count(model,s)] end + optimize!(lpm) - (value = value.(v), policy = map(x->argmax(dual.(x)), π)) + + is_solved_and_feasible(lpm; dual = true) || + error("Failed to solve the MDP linear program") + + (value = value.(v), + policy = map(x->argmax(dual.(x)), u)) end + +lp_solve(model::TabMDP, γ::Number, lpm; args...) = + lp_solve(model, InfiniteH(γ), lpm; args...) + + + diff --git a/src/algorithms/mrp.jl b/src/algorithms/mrp.jl index f97ab39..8fab46d 100644 --- a/src/algorithms/mrp.jl +++ b/src/algorithms/mrp.jl @@ -16,16 +16,11 @@ function mrp!(P_π::AbstractMatrix{<:Real}, r_π::AbstractVector{<:Real}, S = state_count(model) fill!(P_π, 0.); fill!(r_π, 0.) for s ∈ 1:S - #TODO: remove the definition of terminal states - if !isterminal(model, s) - for (sn, p, r) ∈ transition(model, s, π[s]) - P_π[s,sn] ≈ 0. || - error("duplicated transition entries (s1->s2, s1->s2) not allowed") - P_π[s,sn] += p - r_π[s] += p * r - end - else - r_π[s] = reward_T(model, s) + for (sn, p, r) ∈ transition(model, s, π[s]) + P_π[s,sn] ≈ 0. || + error("duplicated transition entries (s1->s2, s1->s2) not allowed") + P_π[s,sn] += p + r_π[s] += p * r end end end diff --git a/src/algorithms/transient.jl b/src/algorithms/transient.jl new file mode 100644 index 0000000..ed6408c --- /dev/null +++ b/src/algorithms/transient.jl @@ -0,0 +1,150 @@ +using JuMP + +# ---------------------------------------------------------------- +# Linear Program Solver +# ---------------------------------------------------------------- + + +""" + isterminal(model, state) + +Checks that the `state` is terminal in `model`. A state is terminal if it + +1) has a single action, +2) transitions to itself, +3) has a reward 0. + + +# Example + +```jldoctest + using MDPs + model = Domains.Gambler.RuinTransient(0.5, 4, true) + isterminal.((model,), states(model))[1:2] + +# output + +2-element BitVector: + 1 + 0 +``` +""" +function isterminal(model::MDP{S,A}, state::S) where {S,A} + as = actions(model, state) + length(as) == 1 || return false + trs = transition(model, state, first(actions(model, state))) + length(trs) == 1 || return false + t = first(trs) + (t[1] == state && t[2] ≈ 1.0 && t[3] ≈ 0.0) || return false + return true +end + + +# a helper function used to check for transience +# reward: a function that specifies whether the reward +# from the MDP is used or a custom reward +# the function treats terminal states as having value 0 +function _transient_lp(model::TabMDP, reward::Union{Float64, Nothing}, + lpmf; silent) :: Union{Nothing,NamedTuple} + + @assert minimum(states(model)) == 1 # make sure that the index is 1-based + + lpm = Model(lpmf) + silent && set_silent(lpm) + + rew(r) = isnothing(reward) ? r :: Float64 : reward :: Float64 + + n = state_count(model) + + @variable(lpm, v[1:n]) + @objective(lpm, Min, sum(v)) + + u = Vector{Vector{ConstraintRef}}(undef, n) + for s ∈ 1:n + @assert minimum(actions(model,s)) == 1 # make sure that the index is 1-based + if isterminal(model, s) # set terminal state(s) to 0 value + u[s] = [@constraint(lpm, v[s] == 0)] + else + u[s] = [@constraint(lpm, v[s] ≥ sum(p*(rew(r) + v[sn]) + for (sn,p,r) ∈ transition(model,s,a))) + for a in actions(model,s)] + end + end + + optimize!(lpm) + + if is_solved_and_feasible(lpm) + (value = value.(v), policy = map(x -> argmax(dual.(x)), u)) + else + nothing + end +end + + +""" + lp_solve(model, lpmf, [silent = true]) + +Implements the linear program primal problem for an MDP `model` with a discount factor `γ`. +It uses the JuMP model `lpm` as the linear program solver and returns the state values +found found using the solver constructed by `JuMP.Model(lpmf)`. + +## Examples + + +# Example + +```jldoctest + using MDPs, HiGHS + model = Domains.Gambler.RuinTransient(0.5, 4, true) + lp_solve(model, TotalReward(), HiGHS.Optimizer).policy + +# output + +5-element Vector{Int64}: + 1 + 4 + 2 + 2 + 1 +``` +""" +function lp_solve(model::TabMDP, obj::TotalReward, lpmf; silent = true) + # nothing => run with the true rewards + solution = _transient_lp(model, nothing, lpmf; silent = silent) + if isnothing(solution) + error("Failed to solve LP formulation. Is MDP transient?") + else + solution + end +end + + +""" + anytransient(model, lpmf, [silent = true]) + +Checks if the MDP `model` has some transient policy. A policy is transient if it +is guaranteed to terminate with positive probability after some finite number of steps. + +Note that the function returns true even when there are some policies that are not transient. + +The parameters match the use in `lp_solve`. +""" +function anytransient(model::TabMDP, lpmf; silent = true) + solution = _transient_lp(model, -1., lpmf; silent = silent) + !isnothing(solution) +end + +""" + anytransient(model, lpmf, [silent = true]) + +Checks if the MDP `model` has all transient policies. A policy is transient if it +is guaranteed to terminate with positive probability after some finite number of steps. + +Note that the function returns true only if all policies are transient. + +The parameters match the use in `lp_solve`. +""" +function alltransient(model::TabMDP, lpmf; silent = true) + solution = _transient_lp(model, 1., lpmf; silent = silent) + !isnothing(solution) +end diff --git a/src/domains/gambler.jl b/src/domains/gambler.jl index 4568589..59bef70 100644 --- a/src/domains/gambler.jl +++ b/src/domains/gambler.jl @@ -2,41 +2,53 @@ module Gambler import ...TabMDP, ...transition, ...state_count, ...action_count +# create the transition representation for this domain +# (state_to, probability, reward) mt(st, prob,rew) = (Int(st), Float64(prob), Float64(rew))::Tuple{Int, Float64, Float64} + +# ------------------------------------------------------------------------------------- +# Discounted ruin +# ------------------------------------------------------------------------------------- + + """ Ruin(win, max_capital) -Gambler's ruin. Can decide how much to bet at any point in time. With some +Gambler's ruin; the discounted version. Can decide how much to bet at any point in time. With some probability `win`, the bet is doubled, and with `1-win` it is lost. The -reward is 1 if it achieves some terminal capital and 0 otherwise. +reward is `1` if it achieves some terminal capital and `0` otherwise. State `max_capital+1` +is an absorbing win state in which `1` is received forever. -- Capital = state - 1 -- Bet = action - 1 +- Capital = `state - 1` +- Bet = `action - 1` -Available actions are 1, ..., state - 1. +Available actions are `1`, ..., `state`. -Special states: state=1 is broke and state=max_capital+1 is a terminal winning state. +Special states: `state=1` is broke and `state=max_capital+1` is a terminal winning state. """ struct Ruin <: TabMDP win :: Float64 max_capital :: Int function Ruin(win::Number, max_capital::Integer) - zero(win) ≤ win ≤ one(win) || error("win probability must be in [0,1]") + zero(win) ≤ win ≤ one(win) || error("Win probability must be in [0,1]") max_capital ≥ one(max_capital) || error("Max capital must be positive") new(win, max_capital) end end +state_count(model::Ruin) = model.max_capital + 1 +action_count(model::Ruin, state::Int) = state < model.max_capital + 1 ? state : 1 # only one action in the terminal state + function transition(model::Ruin, state::Int, action::Int) - 1 ≤ state ≤ model.max_capital+1 || error("invalid state") - 1 ≤ action ≤ state || error("invalid action") + 1 ≤ state ≤ model.max_capital + 1 || error("invalid state") + 1 ≤ action ≤ action_count(model, state) || error("invalid action") - if state == 1 + if state == 1 # overall loss state (mt(1, 1.0, 0.0),) - elseif state == model.max_capital + 1 # the state is absorbing + elseif state == model.max_capital + 1 # overall win state (mt(state, 1.0, 1.0),) else win_state = min(model.max_capital + 1, (state - 1) + (action - 1) + 1) @@ -45,7 +57,82 @@ function transition(model::Ruin, state::Int, action::Int) end end -state_count(model::Ruin) = model.max_capital + 1 -action_count(model::Ruin, state::Int) = state + +# -------------------------------------------------------------------------------------- +# Transient ruin +# -------------------------------------------------------------------------------------- + + +""" + RuinTransient(win, max_capital) + +Gambler's ruin; the transient version. Can decide how much to bet at any point in time. With some +probability `win`, the bet is doubled, and with `1-win` it is lost. The reward is `1` if it achieves +some terminal capital and `0` otherwise. State `max_capital+1` is an absorbing win state +in which `1` is received forever. + +- Capital = `state - 1` + +If `noop = true` then the available actions are `1, ..., capital+1` and bet = `action - 1`. This +allows a bet of 0 which is not a transient policy. + +If `noop = false` then the available actions are `1, ..., capital` and bet = `action `. The MDP is not +transient if `noop = true`, but has some transient policies. When `noop = false`, the MDP is +transient. + +Special states: `state=1` is broke and `state=max_capital+1` is maximal capital. Both of the +states are absorbing/terminal. + +The reward is `0` when the gambler goes broke and `+1` when it achieves the target capital. The +difference from `Ruin` is that the reward is not received in the terminal state. +""" +struct RuinTransient <: TabMDP + win :: Float64 + max_capital :: Int + noop :: Bool + + function RuinTransient(win::Number, max_capital::Integer, noop::Bool) + zero(win) ≤ win ≤ one(win) || error("Win probability must be in [0,1]") + max_capital ≥ one(max_capital) || error("Max capital must be positive") + new(win, max_capital, noop) + end +end + +state_count(model::RuinTransient) = model.max_capital + 1 + +function action_count(model::RuinTransient, state::Int) + ns = state_count(model) + @assert state ≥ 1 && state ≤ ns + if state == 1 || state == ns + 1 + else + capital = state - 1 + model.noop ? model.max_capital + 1 : model.max_capital + end +end + +function transition(model::RuinTransient, state::Int, action::Int) + absorbing = state_count(model) # the "last" state + + 1 ≤ state ≤ absorbing || error("invalid state") + 1 ≤ action ≤ action_count(model, state) || error("invalid action") + + if state == 1 # broke + (mt(state, 1.0, 0.0),) + elseif state == model.max_capital+1 # absorbing terminal state; no reward + (mt(state, 1.0, 0.0),) + else + bet = model.noop ? action - 1 : action + + win_state = min(model.max_capital + 1, (state - 1) + bet + 1) + lose_state = max(1, (state - 1) - bet + 1) + + # reward 1.0 if an donly if we achieve the target capital + win_reward = win_state == absorbing ? 1.0 : 0.0 + + # the reward is 0 when we lose + (mt(win_state, model.win, win_reward), mt(lose_state, 1.0 - model.win, 0.)) + end +end end # Gambler diff --git a/src/domains/gridworld.jl b/src/domains/gridworld.jl index 649cb3d..ea711a1 100644 --- a/src/domains/gridworld.jl +++ b/src/domains/gridworld.jl @@ -3,11 +3,9 @@ module GridWorld import ...TabMDP, ...transition, ...state_count, ...action_count import ...actions, ...states -# TODO: Add docs, with method signatures """ Models values of demand in `values` and probabilities in `probabilities`. """ - @enum Action begin UP = 1 DOWN = 2 @@ -16,6 +14,9 @@ Models values of demand in `values` and probabilities in `probabilities`. end """ + Parameters(reward_s, max_side_length, wind) + + Parameters that define a GridWorld problem - `rewards_s`: A vector of rewards for each state @@ -58,7 +59,6 @@ function transition(model::Model, state::Int, action::Int) remaining_wind = model.params.wind / 3 ret = [] # Wrap the state around the grid 1-based indexing - # NOTE: Julia for the love of God please implement a proper modulo function upstate = state - n <= 0 ? state + n_states - n : state - n downstate = (state + n) > n_states ? state - n_states + n : state + n leftstate = state % n == 1 ? state + (n - 1) : state - 1 diff --git a/src/models/integral.jl b/src/models/integral.jl index b82db4e..c5bec81 100644 --- a/src/models/integral.jl +++ b/src/models/integral.jl @@ -129,6 +129,7 @@ state_count(model) # output 20 ``` + Load the model from an Arrow file (a binary tabular file format) ```jldoctest using MDPs, Arrow @@ -239,41 +240,30 @@ values. The option `docompress` combined transitions to the same state into a single transition. This improves efficiency in risk-neutral settings, but may change the outcome in risk-averse settings. - -The function adds one more state at the end which represents a catch-all terminal state """ function make_int_mdp(mdp::TabMDP; docompress = false) statecount = state_count(mdp) - states = Vector{IntState}(undef, statecount + 1) # + terminal + states = Vector{IntState}(undef, statecount) - # add a self-looping state to model a terminal state - # needed to handle terminal state - states[statecount+1] = IntState([IntAction([statecount+1],[1.0],[0.0])]) - Threads.@threads for s ∈ 1:statecount action_vals = 1:action_count(mdp, s) - if isterminal(mdp, s) - states[s] = IntState([IntAction( - [statecount+1], [1.0], [0.])]) - else - acts = Vector{IntAction}(undef, length(action_vals)) - for (ia,a) ∈ enumerate(action_vals) - ns = Array{Int}(undef, 0) # next state - np = Array{Float64}(undef, 0) # next probalbility - nr = Array{Float64}(undef, 0) # next reward - - for (nexts, nextp, nextr) ∈ transition(mdp, s, a) - # check where to insert the next state transition - i = searchsortedfirst(ns, nexts) - insert!(ns, i, nexts) - insert!(np, i, nextp) - insert!(nr, i, nextr) - end - a = IntAction(ns, np, nr) - acts[ia] = docompress ? compress(a) : a + acts = Vector{IntAction}(undef, length(action_vals)) + for (ia,a) ∈ enumerate(action_vals) + ns = Array{Int}(undef, 0) # next state + np = Array{Float64}(undef, 0) # next probalbility + nr = Array{Float64}(undef, 0) # next reward + + for (nexts, nextp, nextr) ∈ transition(mdp, s, a) + # check where to insert the next state transition + i = searchsortedfirst(ns, nexts) + insert!(ns, i, nexts) + insert!(np, i, nextp) + insert!(nr, i, nextr) end - states[s] = IntState(acts) + a = IntAction(ns, np, nr) + acts[ia] = docompress ? compress(a) : a end + states[s] = IntState(acts) end IntMDP(states) end diff --git a/src/models/mdp.jl b/src/models/mdp.jl index 81d87a7..8d02cbb 100644 --- a/src/models/mdp.jl +++ b/src/models/mdp.jl @@ -18,18 +18,12 @@ abstract type MDP{S,A} end # Default definition of functions # ---------------------------------------------------------------- -""" - isterminal(mdp, state) - -Return true if the state is terminal -""" -function isterminal end """ (sn, p, r) ∈ transition(model, s, a) -Return a list with next states, probabilities, and rewards. -Returns an iterator. +Return an iterator with next states, probabilities, and rewards for +`model` taking an action `a` in state `s`. Use `getnext` instead, which is more efficient and convenient to use. """ @@ -38,7 +32,7 @@ function transition end """ valuefunction(mdp, state, valuefunction) -Evaluates the value function for an MDP in a state +Evaluates the value function for an `mdp` in a `state` """ function valuefunction end diff --git a/src/models/tabular.jl b/src/models/tabular.jl index e458477..06f98e6 100644 --- a/src/models/tabular.jl +++ b/src/models/tabular.jl @@ -4,11 +4,10 @@ using SparseArrays """ An abstract tabular Markov Decision Process which is specified by a transition function. - Functions that should be defined for any subtype for value and policy iterations to work are: `state_count`, `states`, `action_count`, `actions`, and `transition`. -Generally, states should be 1-based. +Generally, states and actions are 1-based. The methods `state_count` and `states` should only include non-terminal states """ @@ -18,7 +17,6 @@ abstract type TabMDP <: MDP{Int,Int} end # General MDP interface functions # ---------------------------------------------------------------- -isterminal(::TabMDP, s::Int) = s ≤ 0 valuefunction(::TabMDP, s::Int, v) = v[s] function state_count end diff --git a/src/objectives.jl b/src/objectives.jl index 8e2cebf..114f28a 100644 --- a/src/objectives.jl +++ b/src/objectives.jl @@ -53,6 +53,17 @@ struct FiniteH <: MarkovDet end end + +""" +Total reward criterion. The objective is to maximize the sum +of the undiscounted rewards. + +This objective can generally only be applied to transient states, +which have a terminal state; see `isterminal` for more details. +""" +struct TotalReward <: StationaryDet +end + """ horizon(objective) diff --git a/src/simulation.jl b/src/simulation.jl index fa482a4..49f9a0d 100644 --- a/src/simulation.jl +++ b/src/simulation.jl @@ -172,7 +172,6 @@ function simulate(model::MDP{S,A}, π::Policy{S,A}, initial, prob = rand() tot_prob = 0. for (sn,pn,rn) ∈ transition(model, states[t-1,run], actions[t-1,run]) - isterminal(model, sn) && error("Terminal states unsupported.") if prob ≤ (tot_prob += pn) # state sn was sampled # update internal state using the current time step let tr = Transition(states[t-1,run], actions[t-1,run], rn, sn, t) diff --git a/src/valuefunction/bellman.jl b/src/valuefunction/bellman.jl index 6ba69bb..a9e5ebc 100644 --- a/src/valuefunction/bellman.jl +++ b/src/valuefunction/bellman.jl @@ -39,14 +39,9 @@ See `qvalues` for more information. function qvalues!(qvalues::AbstractVector{<:Real}, model::MDP{S,A}, obj::Objective, t::Integer, s::S, v) where {S,A} - if isterminal(model, s) - qvalues .= -Inf - qvalues[1] = 0 - else - acts = actions(model, s) - for (ia,a) ∈ enumerate(acts) - qvalues[ia] = qvalue(model, obj, t, s, a, v) - end + acts = actions(model, s) + for (ia,a) ∈ enumerate(acts) + qvalues[ia] = qvalue(model, obj, t, s, a, v) end end @@ -100,15 +95,10 @@ time-dependent updates. The function uses `qvalue` to compute the Bellman operator and the greedy policy. """ function bellmangreedy(model::MDP{S,A}, obj::Objective, t::Integer, s::S, v) where {S,A} - if isterminal(model, s) - (qvalue = 0 :: Float64, - action = emptyaction(model) :: A) - else - acts = actions(model, s) - (qval, ia) = findmax(a->qvalue(model, obj, t, s, a, v), acts) - (qvalue = qval :: Float64, - action = acts[ia] :: A) - end + acts = actions(model, s) + (qval, ia) = findmax(a->qvalue(model, obj, t, s, a, v), acts) + (qvalue = qval :: Float64, + action = acts[ia] :: A) end # default fallback when t is diff --git a/test/src/domains/.inventory.jl.~undo-tree~ b/test/src/domains/.inventory.jl.~undo-tree~ index fd6b827..af4201c 100644 --- a/test/src/domains/.inventory.jl.~undo-tree~ +++ b/test/src/domains/.inventory.jl.~undo-tree~ @@ -1,9 +1,102 @@ (undo-tree-save-format-version . 1) -"7f5c20e0493d52a78b3976a36ddef4420bed61da" +"7e4de13a0e6c881fe30ed3257ccf97c825e8ac4c" [nil nil nil nil (26303 463 21508 884000) 0 nil] ([nil nil ((#("======= >>>>>>> main " 0 7 (face smerge-markers fontified t) 7 8 (face nil fontified t) 8 21 (face smerge-markers fontified t)) . 1463) (undo-tree-id3 . -8) (undo-tree-id4 . -8) (undo-tree-id5 . -8) (undo-tree-id6 . -8) (undo-tree-id7 . -21) (undo-tree-id8 . -21) (undo-tree-id9 . -21) (undo-tree-id10 . -21) (undo-tree-id11 . -8) (undo-tree-id12 . -8) (undo-tree-id13 . -21) (undo-tree-id14 . -20) 1471 (t 26303 431 259432 314000)) nil (26303 463 21506 379000) 0 nil]) -([nil current ((#("<<<<<<< HEAD -" 0 1 (face smerge-markers smerge-refine-part (13 . 2) fontified t) 1 13 (face smerge-markers fontified t)) . 1391) (undo-tree-id0 . -13) (undo-tree-id1 . -12) (undo-tree-id2 . -13)) nil (26303 463 21492 460000) 0 nil]) +([nil nil ((#("<<<<<<< HEAD +" 0 1 (face smerge-markers smerge-refine-part (13 . 2) fontified t) 1 13 (face smerge-markers fontified t)) . 1391) (undo-tree-id0 . -13) (undo-tree-id1 . -12) (undo-tree-id2 . -13)) nil (26324 18994 911358 849000) 0 nil]) +([nil nil ((#(" # note that the IntMDP does not have terminal states, + # so the last action will not be -1 + +" 0 4 (fontified t) 4 6 (face font-lock-comment-delimiter-face fontified t) 6 50 (face font-lock-comment-face fontified t) 50 58 (face font-lock-comment-face fontified t) 58 62 (fontified t) 62 64 (face font-lock-comment-delimiter-face fontified t) 64 98 (face font-lock-comment-face fontified t) 98 99 (fontified t)) . 1451) (undo-tree-id94 . -98) (undo-tree-id95 . -36) (undo-tree-id96 . -98) (undo-tree-id97 . -36) (undo-tree-id98 . -36) (undo-tree-id99 . -98) (undo-tree-id100 . -98) (undo-tree-id101 . -98)) ((1451 . 1550)) (26324 18997 1184 116000) 0 nil]) +([nil nil ((1450 . 1455) (t 26324 18998 432489 309000) 1450) nil (26324 19516 549097 781000) 0 nil] [nil nil ((#("]" 0 1 (fontified t)) . -1533) (undo-tree-id82 . -1) (undo-tree-id83 . -1) 1534) ((1533 . 1534)) (26324 18993 956985 308000) 0 nil]) +([nil nil ((1455 . 1469)) nil (26324 19516 549096 335000) 0 nil]) +([nil nil ((#("[" 0 1 (fontified t)) . -1523) (undo-tree-id44 . -1) (undo-tree-id45 . -1) (undo-tree-id46 . -1) (undo-tree-id47 . -1) (#("1" 0 1 (fontified t)) . -1524) (undo-tree-id48 . -1) (undo-tree-id49 . -1) (undo-tree-id50 . -1) (undo-tree-id51 . -1) (#(":" 0 1 (fontified t)) . -1525) (undo-tree-id52 . -1) (undo-tree-id53 . -1) (undo-tree-id54 . -1) (undo-tree-id55 . -1) (#("(" 0 1 (fontified t)) . -1526) (undo-tree-id56 . -1) (undo-tree-id57 . -1) (undo-tree-id58 . -1) (undo-tree-id59 . -1) (#("e" 0 1 (fontified t)) . -1527) (undo-tree-id60 . -1) (undo-tree-id61 . -1) (undo-tree-id62 . -1) (undo-tree-id63 . -1) (#("n" 0 1 (fontified t)) . -1528) (undo-tree-id64 . -1) (undo-tree-id65 . -1) (undo-tree-id66 . -1) (undo-tree-id67 . -1) (#("d" 0 1 (face font-lock-keyword-face fontified t)) . -1529) (undo-tree-id68 . -1) (undo-tree-id69 . -1) (undo-tree-id70 . -1) (undo-tree-id71 . -1) (#("-" 0 1 (fontified t)) . -1530) (undo-tree-id72 . -1) (undo-tree-id73 . -1) (undo-tree-id74 . -1) (undo-tree-id75 . -1) (#("1" 0 1 (fontified t)) . -1531) (undo-tree-id76 . -1) (undo-tree-id77 . -1) (undo-tree-id78 . -1) (undo-tree-id79 . -1) (#(")" 0 1 (fontified t)) . -1532) (undo-tree-id80 . -1) (undo-tree-id81 . -1) 1533) ((1523 . 1533)) (26324 18993 762605 944000) 0 nil]) +([nil nil ((1469 . 1486)) nil (26324 19516 549095 415000) 0 nil]) +([nil nil ((#("[" 0 1 (fontified t)) . -1533) (undo-tree-id22 . -1) (undo-tree-id23 . -1) (#("1" 0 1 (fontified t)) . -1534) (undo-tree-id24 . -1) (undo-tree-id25 . -1) (#(":" 0 1 (fontified t)) . -1535) (undo-tree-id26 . -1) (undo-tree-id27 . -1) (#("(" 0 1 (fontified t)) . -1536) (undo-tree-id28 . -1) (undo-tree-id29 . -1) (#("e" 0 1 (fontified t)) . -1537) (undo-tree-id30 . -1) (undo-tree-id31 . -1) (#("n" 0 1 (fontified t)) . -1538) (undo-tree-id32 . -1) (undo-tree-id33 . -1) (#("d" 0 1 (face font-lock-keyword-face fontified t)) . -1539) (undo-tree-id34 . -1) (undo-tree-id35 . -1) (#("-" 0 1 (fontified t)) . -1540) (undo-tree-id36 . -1) (undo-tree-id37 . -1) (#("1" 0 1 (fontified t)) . -1541) (undo-tree-id38 . -1) (undo-tree-id39 . -1) (#(")" 0 1 (fontified t)) . -1542) (undo-tree-id40 . -1) (undo-tree-id41 . -1) (#("]" 0 1 (fontified t)) . -1543) (undo-tree-id42 . -1) (undo-tree-id43 . -1) 1544) ((1533 . 1544)) (26324 18993 540465 741000) 0 nil]) +([nil nil ((#("." 0 1 (fontified t)) . -1474) (undo-tree-id102 . -1) (undo-tree-id103 . -1) (#("v" 0 1 (fontified t)) . -1475) (undo-tree-id104 . -1) (undo-tree-id105 . -1) (#("2" 0 1 (fontified t)) . -1476) (undo-tree-id106 . -1) (undo-tree-id107 . -1) (#("." 0 1 (fontified t)) . -1477) (undo-tree-id108 . -1) (undo-tree-id109 . -1) (#("v" 0 1 (fontified t)) . -1478) (undo-tree-id110 . -1) (undo-tree-id111 . -1) (#("3" 0 1 (fontified t)) . -1479) (undo-tree-id112 . -1) (undo-tree-id113 . -1) (#("." 0 1 (fontified t)) . -1480) (undo-tree-id114 . -1) (undo-tree-id115 . -1) (#("v" 0 1 (fontified t)) . -1481) (undo-tree-id116 . -1) (undo-tree-id117 . -1) (#("4" 0 1 (fontified t)) . -1482) (undo-tree-id118 . -1) (undo-tree-id119 . -1) (#("." 0 1 (fontified t)) . -1483) (undo-tree-id120 . -1) (undo-tree-id121 . -1) (#("v" 0 1 (fontified t)) . -1484) (undo-tree-id122 . -1) (undo-tree-id123 . -1) (#("5" 0 1 (fontified t)) . -1485) (undo-tree-id124 . -1) (undo-tree-id125 . -1) 1486) nil (26324 19516 549090 926000) 0 nil]) +([nil nil ((#("[" 0 1 (fontified t)) . -1543) (undo-tree-id0 . -1) (undo-tree-id1 . -1) (#("1" 0 1 (fontified t)) . -1544) (undo-tree-id2 . -1) (undo-tree-id3 . -1) (#(":" 0 1 (fontified t)) . -1545) (undo-tree-id4 . -1) (undo-tree-id5 . -1) (#("(" 0 1 (fontified t)) . -1546) (undo-tree-id6 . -1) (undo-tree-id7 . -1) (#("e" 0 1 (fontified t)) . -1547) (undo-tree-id8 . -1) (undo-tree-id9 . -1) (#("n" 0 1 (fontified t)) . -1548) (undo-tree-id10 . -1) (undo-tree-id11 . -1) (#("d" 0 1 (face font-lock-keyword-face fontified t)) . -1549) (undo-tree-id12 . -1) (undo-tree-id13 . -1) (#("-" 0 1 (fontified t)) . -1550) (undo-tree-id14 . -1) (undo-tree-id15 . -1) (#("1" 0 1 (fontified t)) . -1551) (undo-tree-id16 . -1) (undo-tree-id17 . -1) (#(")" 0 1 (fontified t)) . -1552) (undo-tree-id18 . -1) (undo-tree-id19 . -1) (#("]" 0 1 (fontified t)) . -1553) (undo-tree-id20 . -1) (undo-tree-id21 . -1) 1554) ((1543 . 1554) (t 26324 18989 445845 445000)) (26324 18989 407450 327000) 0 nil]) +([nil nil ((1474 . 1489)) nil (26324 19516 549056 45000) 0 nil]) +nil +([nil nil ((1484 . 1485) (t 26324 19516 584508 526000)) nil (26324 19631 262319 676000) 0 nil]) +([nil nil ((1481 . 1482)) nil (26324 19631 262318 484000) 0 nil]) +([nil nil ((#("[" 0 1 (fontified t)) . -1471) (undo-tree-id128 . -1) (undo-tree-id129 . -1) 1472) nil (26324 19631 262317 492000) 0 nil]) +([nil nil ((1471 . 1472)) nil (26324 19631 262314 658000) 0 nil]) +([nil nil ((#("]" 0 1 (fontified t)) . -1488) (undo-tree-id126 . -1) (undo-tree-id127 . -1) 1489) nil (26324 19631 262312 144000) 0 nil]) +([nil nil ((1488 . 1489)) nil (26324 19631 262296 664000) 0 nil]) +([nil nil ((1451 . 1455) (#(" " 0 4 (fontified nil)) . 1450) (1450 . 1454) (1450 . 1451) (t 26324 19631 297551 70000)) nil (26324 19757 589979 427000) 0 nil]) +([nil nil ((1455 . 1458)) nil (26324 19757 589978 53000) 0 nil]) +([nil nil ((#("o" 0 1 (fontified t)) . -1457) (undo-tree-id151 . -1) (undo-tree-id152 . -1) (undo-tree-id153 . -1) (undo-tree-id154 . -1) (undo-tree-id155 . -1) (undo-tree-id156 . -1) 1458) nil (26324 19757 589976 899000) 0 nil]) +([nil nil ((1457 . 1460)) nil (26324 19757 589972 240000) 0 nil]) +([nil nil ((1455 . 1462) (#("print" 0 5 (fontified t)) . -1455) (undo-tree-id142 . -2) (undo-tree-id143 . -2) (undo-tree-id144 . -5) (undo-tree-id145 . -2) (undo-tree-id146 . -2) (undo-tree-id147 . -2) (undo-tree-id148 . -2) (undo-tree-id149 . -5) (undo-tree-id150 . -5) 1460) nil (26324 19757 589970 889000) 0 nil]) +([nil nil ((1462 . 1471)) nil (26324 19757 589963 993000) 0 nil]) +([nil nil ((1463 . 1474) (#("state_co" 0 8 (fontified t)) . -1463) (undo-tree-id130 . -8) (undo-tree-id131 . -5) (undo-tree-id132 . -5) (undo-tree-id133 . -6) (undo-tree-id134 . -6) (undo-tree-id135 . -7) (undo-tree-id136 . -7) (undo-tree-id137 . -8) (undo-tree-id138 . -8) (undo-tree-id139 . -8) (undo-tree-id140 . -8) (undo-tree-id141 . -8) 1471) nil (26324 19757 589961 887000) 0 nil]) +([nil nil ((1474 . 1479)) nil (26324 19757 589941 843000) 0 nil]) +([nil nil ((1479 . 1500)) nil (26324 19757 589940 150000) 0 nil]) +([nil nil ((1500 . 1502)) nil (26324 19757 589934 739000) 0 nil]) +([nil nil ((1476 . 1477) (t 26324 19757 627230 872000)) nil (26324 19811 605786 9000) 0 nil]) +([nil nil ((1501 . 1502)) nil (26324 19811 605779 786000) 0 nil]) +([nil nil ((1528 . 1534) (t 26324 19811 640427 304000)) nil (26324 19882 603897 537000) 0 nil]) +([nil nil ((1537 . 1543)) nil (26324 19882 603896 446000) 0 nil]) +([nil nil ((1546 . 1552)) nil (26324 19882 603895 739000) 0 nil]) +([nil nil ((1556 . 1562)) nil (26324 19882 603895 35000) 0 nil]) +([nil nil ((1566 . 1572)) nil (26324 19882 603894 270000) 0 nil]) +([nil nil ((1535 . 1536)) nil (26324 19882 603892 818000) 0 nil]) +([nil nil ((1545 . 1546)) nil (26324 19882 603887 901000) 0 nil]) +([nil nil ((#("[" 0 1 (fontified t)) . -1650) (undo-tree-id305 . -1) (undo-tree-id306 . -1) (undo-tree-id307 . -1) (undo-tree-id308 . -1) (undo-tree-id309 . -1) (undo-tree-id310 . -1) (undo-tree-id311 . -1) (undo-tree-id312 . -1) (undo-tree-id313 . -1) (#("1" 0 1 (fontified t)) . -1651) (undo-tree-id314 . -1) (undo-tree-id315 . -1) (undo-tree-id316 . -1) (undo-tree-id317 . -1) (undo-tree-id318 . -1) (undo-tree-id319 . -1) (undo-tree-id320 . -1) (undo-tree-id321 . -1) (undo-tree-id322 . -1) (#(":" 0 1 (fontified t)) . -1652) (undo-tree-id323 . -1) (undo-tree-id324 . -1) (undo-tree-id325 . -1) (undo-tree-id326 . -1) (undo-tree-id327 . -1) (undo-tree-id328 . -1) (undo-tree-id329 . -1) (undo-tree-id330 . -1) (undo-tree-id331 . -1) (#("(" 0 1 (fontified t)) . -1653) (undo-tree-id332 . -1) (undo-tree-id333 . -1) (undo-tree-id334 . -1) (undo-tree-id335 . -1) (undo-tree-id336 . -1) (undo-tree-id337 . -1) (undo-tree-id338 . -1) (undo-tree-id339 . -1) (undo-tree-id340 . -1) (#("e" 0 1 (fontified t)) . -1654) (undo-tree-id341 . -1) (undo-tree-id342 . -1) (undo-tree-id343 . -1) (undo-tree-id344 . -1) (undo-tree-id345 . -1) (undo-tree-id346 . -1) (undo-tree-id347 . -1) (undo-tree-id348 . -1) (undo-tree-id349 . -1) (#("n" 0 1 (fontified t)) . -1655) (undo-tree-id350 . -1) (undo-tree-id351 . -1) (undo-tree-id352 . -1) (undo-tree-id353 . -1) (undo-tree-id354 . -1) (undo-tree-id355 . -1) (undo-tree-id356 . -1) (#("d" 0 1 (face font-lock-keyword-face fontified t)) . -1656) (undo-tree-id357 . 1) (undo-tree-id358 . -1) (undo-tree-id359 . -1) (undo-tree-id360 . -1) (undo-tree-id361 . -1) (undo-tree-id362 . -1) (undo-tree-id363 . -1) (undo-tree-id364 . -1) (#("-" 0 1 (fontified t)) . -1657) (undo-tree-id365 . -1) (undo-tree-id366 . -1) (undo-tree-id367 . -1) (undo-tree-id368 . -1) (undo-tree-id369 . -1) (undo-tree-id370 . -1) (undo-tree-id371 . -1) (#("1" 0 1 (fontified t)) . -1658) (undo-tree-id372 . -1) (undo-tree-id373 . -1) (undo-tree-id374 . -1) (undo-tree-id375 . -1) (undo-tree-id376 . -1) (undo-tree-id377 . -1) (undo-tree-id378 . -1) (#(")" 0 1 (fontified t)) . -1659) (undo-tree-id379 . -1) (undo-tree-id380 . -1) (undo-tree-id381 . -1) (undo-tree-id382 . -1) (undo-tree-id383 . -1) (undo-tree-id384 . -1) (undo-tree-id385 . -1) (#("]" 0 1 (fontified t)) . -1660) (undo-tree-id386 . -1) (undo-tree-id387 . -1) (undo-tree-id388 . -1) (undo-tree-id389 . -1) 1661 (t 26324 19882 643580 642000)) nil (26324 19949 714487 992000) 0 nil]) +([nil nil ((#("[" 0 1 (fontified t)) . -1660) (undo-tree-id201 . -1) (undo-tree-id202 . -1) (undo-tree-id203 . -1) (undo-tree-id204 . -1) (undo-tree-id205 . -1) (undo-tree-id206 . -1) (undo-tree-id207 . -1) (undo-tree-id208 . -1) (undo-tree-id209 . -1) (undo-tree-id210 . -1) (#("1" 0 1 (fontified t)) . -1661) (undo-tree-id211 . -1) (undo-tree-id212 . -1) (undo-tree-id213 . -1) (undo-tree-id214 . -1) (undo-tree-id215 . -1) (undo-tree-id216 . -1) (undo-tree-id217 . -1) (undo-tree-id218 . -1) (undo-tree-id219 . -1) (undo-tree-id220 . -1) (#(":" 0 1 (fontified t)) . -1662) (undo-tree-id221 . -1) (undo-tree-id222 . -1) (undo-tree-id223 . -1) (undo-tree-id224 . -1) (undo-tree-id225 . -1) (undo-tree-id226 . -1) (undo-tree-id227 . -1) (undo-tree-id228 . -1) (undo-tree-id229 . -1) (undo-tree-id230 . -1) (#("(" 0 1 (fontified t)) . -1663) (undo-tree-id231 . -1) (undo-tree-id232 . -1) (undo-tree-id233 . -1) (undo-tree-id234 . -1) (undo-tree-id235 . -1) (undo-tree-id236 . -1) (undo-tree-id237 . -1) (undo-tree-id238 . -1) (undo-tree-id239 . -1) (undo-tree-id240 . -1) (#("e" 0 1 (fontified t)) . -1664) (undo-tree-id241 . -1) (undo-tree-id242 . -1) (undo-tree-id243 . -1) (undo-tree-id244 . -1) (undo-tree-id245 . -1) (undo-tree-id246 . -1) (undo-tree-id247 . -1) (undo-tree-id248 . -1) (undo-tree-id249 . -1) (undo-tree-id250 . -1) (#("n" 0 1 (fontified t)) . -1665) (undo-tree-id251 . -1) (undo-tree-id252 . -1) (undo-tree-id253 . -1) (undo-tree-id254 . -1) (undo-tree-id255 . -1) (undo-tree-id256 . -1) (undo-tree-id257 . -1) (undo-tree-id258 . -1) (undo-tree-id259 . -1) (undo-tree-id260 . -1) (#("d" 0 1 (face font-lock-keyword-face fontified t)) . -1666) (undo-tree-id261 . -1) (undo-tree-id262 . -1) (undo-tree-id263 . -1) (undo-tree-id264 . -1) (undo-tree-id265 . -1) (undo-tree-id266 . -1) (undo-tree-id267 . -1) (undo-tree-id268 . -1) (undo-tree-id269 . -1) (undo-tree-id270 . -1) (#("-" 0 1 (fontified t)) . -1667) (undo-tree-id271 . -1) (undo-tree-id272 . -1) (undo-tree-id273 . -1) (undo-tree-id274 . -1) (undo-tree-id275 . -1) (undo-tree-id276 . -1) (undo-tree-id277 . -1) (undo-tree-id278 . -1) (undo-tree-id279 . -1) (undo-tree-id280 . -1) (#("1" 0 1 (fontified t)) . -1668) (undo-tree-id281 . -1) (undo-tree-id282 . -1) (undo-tree-id283 . -1) (undo-tree-id284 . -1) (undo-tree-id285 . -1) (undo-tree-id286 . -1) (undo-tree-id287 . -1) (undo-tree-id288 . -1) (undo-tree-id289 . -1) (undo-tree-id290 . -1) (#(")" 0 1 (fontified t)) . -1669) (undo-tree-id291 . -1) (undo-tree-id292 . -1) (undo-tree-id293 . -1) (undo-tree-id294 . -1) (undo-tree-id295 . -1) (undo-tree-id296 . -1) (undo-tree-id297 . -1) (undo-tree-id298 . -1) (undo-tree-id299 . -1) (undo-tree-id300 . -1) (#("]" 0 1 (fontified t)) . -1670) (undo-tree-id301 . -1) (undo-tree-id302 . -1) (undo-tree-id303 . -1) (undo-tree-id304 . -1) 1671) nil (26324 19949 714425 317000) 0 nil]) +([nil nil ((#("[" 0 1 (fontified t)) . -1670) (undo-tree-id157 . -1) (undo-tree-id158 . -1) (undo-tree-id159 . -1) (undo-tree-id160 . -1) (#("1" 0 1 (fontified t)) . -1671) (undo-tree-id161 . -1) (undo-tree-id162 . -1) (undo-tree-id163 . -1) (undo-tree-id164 . -1) (#(":" 0 1 (fontified t)) . -1672) (undo-tree-id165 . -1) (undo-tree-id166 . -1) (undo-tree-id167 . -1) (undo-tree-id168 . -1) (#("(" 0 1 (fontified t)) . -1673) (undo-tree-id169 . -1) (undo-tree-id170 . -1) (undo-tree-id171 . -1) (undo-tree-id172 . -1) (#("e" 0 1 (fontified t)) . -1674) (undo-tree-id173 . -1) (undo-tree-id174 . -1) (undo-tree-id175 . -1) (undo-tree-id176 . -1) (#("n" 0 1 (fontified t)) . -1675) (undo-tree-id177 . -1) (undo-tree-id178 . -1) (undo-tree-id179 . -1) (undo-tree-id180 . -1) (#("d" 0 1 (face font-lock-keyword-face fontified t)) . -1676) (undo-tree-id181 . -1) (undo-tree-id182 . -1) (undo-tree-id183 . -1) (undo-tree-id184 . -1) (#("-" 0 1 (fontified t)) . -1677) (undo-tree-id185 . -1) (undo-tree-id186 . -1) (undo-tree-id187 . -1) (undo-tree-id188 . -1) (#("1" 0 1 (fontified t)) . -1678) (undo-tree-id189 . -1) (undo-tree-id190 . -1) (undo-tree-id191 . -1) (undo-tree-id192 . -1) (#(")" 0 1 (fontified t)) . -1679) (undo-tree-id193 . -1) (undo-tree-id194 . -1) (undo-tree-id195 . -1) (undo-tree-id196 . -1) (#("]" 0 1 (fontified t)) . -1680) (undo-tree-id197 . -1) (undo-tree-id198 . -1) (undo-tree-id199 . -1) (undo-tree-id200 . -1) 1681) nil (26324 19949 714350 980000) 0 nil]) +([nil nil ((#("p" 0 1 (fontified t)) . -1509) (undo-tree-id426 . -1) (undo-tree-id427 . -1) (undo-tree-id428 . -1) (#("r" 0 1 (fontified t)) . -1510) (undo-tree-id429 . -1) (undo-tree-id430 . -1) (undo-tree-id431 . -1) (#("i" 0 1 (fontified t)) . -1511) (undo-tree-id432 . -1) (undo-tree-id433 . -1) (undo-tree-id434 . -1) (#("n" 0 1 (fontified t)) . -1512) (undo-tree-id435 . -1) (undo-tree-id436 . -1) (undo-tree-id437 . -1) (#("t" 0 1 (fontified t)) . -1513) (undo-tree-id438 . -1) (undo-tree-id439 . -1) (undo-tree-id440 . -1) (#("l" 0 1 (fontified t)) . -1514) (undo-tree-id441 . -1) (undo-tree-id442 . -1) (undo-tree-id443 . -1) (#("n" 0 1 (fontified t)) . -1515) (undo-tree-id444 . -1) (undo-tree-id445 . -1) (undo-tree-id446 . -1) (#("(" 0 1 (fontified t)) . -1516) (undo-tree-id447 . -1) (undo-tree-id448 . -1) 1517 (t 26324 19949 726743 921000)) nil (26324 20086 93153 710000) 0 nil]) +([nil nil ((#("model" 0 5 (fontified t)) . 1477) (undo-tree-id416 . -4) (undo-tree-id417 . -4) (undo-tree-id418 . -4) (undo-tree-id419 . -3) (undo-tree-id420 . -4) (undo-tree-id421 . -4) (undo-tree-id422 . -4) (undo-tree-id423 . -4) (undo-tree-id424 . -5) (undo-tree-id425 . -5)) nil (26324 20086 93140 719000) 0 nil]) +([nil nil ((#("," 0 1 (fontified t)) . -1477) (undo-tree-id406 . 1) (undo-tree-id407 . 1) (undo-tree-id408 . -1) (undo-tree-id409 . -1) (undo-tree-id410 . -1) (undo-tree-id411 . -1) (#(" " 0 1 (fontified t)) . -1478) (undo-tree-id412 . -1) (undo-tree-id413 . -1) (undo-tree-id414 . -1) (undo-tree-id415 . -1) 1479) nil (26324 20086 92707 559000) 0 nil]) +([nil nil ((#("p" 0 1 (fontified t)) . -1455) (undo-tree-id390 . -1) (undo-tree-id391 . -1) (#("r" 0 1 (fontified t)) . -1456) (undo-tree-id392 . -1) (undo-tree-id393 . -1) (#("i" 0 1 (fontified t)) . -1457) (undo-tree-id394 . -1) (undo-tree-id395 . -1) (#("n" 0 1 (fontified t)) . -1458) (undo-tree-id396 . -1) (undo-tree-id397 . -1) (#("t" 0 1 (fontified t)) . -1459) (undo-tree-id398 . -1) (undo-tree-id399 . -1) (#("l" 0 1 (fontified t)) . -1460) (undo-tree-id400 . -1) (undo-tree-id401 . -1) (#("n" 0 1 (fontified t)) . -1461) (undo-tree-id402 . -1) (undo-tree-id403 . -1) (#("(" 0 1 (fontified t)) . -1462) (undo-tree-id404 . -1) (undo-tree-id405 . -1) 1463) nil (26324 20086 92700 938000) 0 nil]) +([nil nil ((1455 . 1466)) nil (26324 20086 92683 898000) 0 nil]) +([nil nil ((1466 . 1483)) nil (26324 20086 92683 390000) 0 nil]) +([nil nil ((1461 . 1465)) nil (26324 20086 92682 410000) 0 nil]) +([nil nil ((1521 . 1522)) nil (26324 20086 92678 466000) 0 nil]) +([nil nil ((#("l" 0 1 (fontified t)) . -1527) (undo-tree-id451 . -1) (undo-tree-id452 . -1) (undo-tree-id453 . -1) (#("e" 0 1 (fontified t)) . -1528) (undo-tree-id454 . -1) (undo-tree-id455 . -1) (undo-tree-id456 . -1) (#("n" 0 1 (fontified t)) . -1529) (undo-tree-id457 . -1) (undo-tree-id458 . -1) (undo-tree-id459 . -1) (#("g" 0 1 (fontified t)) . -1530) (undo-tree-id460 . -1) (undo-tree-id461 . -1) (undo-tree-id462 . -1) (#("t" 0 1 (fontified t)) . -1531) (undo-tree-id463 . -1) (undo-tree-id464 . -1) (undo-tree-id465 . -1) (#("h" 0 1 (fontified t)) . -1532) (undo-tree-id466 . -1) (undo-tree-id467 . -1) (undo-tree-id468 . -1) (#("." 0 1 (fontified t)) . -1533) (undo-tree-id469 . -1) (undo-tree-id470 . -1) (undo-tree-id471 . -1) (#("(" 0 1 (fontified t)) . -1534) (undo-tree-id472 . -1) (undo-tree-id473 . -1) 1535 (t 26324 20086 129731 498000)) nil (26324 20116 336966 122000) 0 nil]) +([nil nil ((1527 . 1533)) nil (26324 20116 336953 527000) 0 nil]) +([nil nil ((nil rear-nonsticky nil 1540 . 1541) (nil fontified nil 1533 . 1541) (1533 . 1541) 1532) nil (26324 20116 336952 938000) 0 nil]) +([nil nil ((1533 . 1544)) nil (26324 20116 336952 15000) 0 nil]) +([nil nil ((1552 . 1558)) nil (26324 20116 336951 78000) 0 nil]) +([nil nil ((#(")" 0 1 (fontified t)) . -1609) (undo-tree-id449 . -1) (undo-tree-id450 . -1) 1610) nil (26324 20116 336948 56000) 0 nil]) +([nil nil ((#("y" 0 1 (fontified t)) . -1609) (undo-tree-id530 . -1) (undo-tree-id531 . -1) (undo-tree-id532 . -1) (#("y" 0 1 (fontified t)) . -1610) (undo-tree-id533 . -1) (undo-tree-id534 . -1) (#("p" 0 1 (fontified t)) . -1611) (undo-tree-id535 . -1) (undo-tree-id536 . -1) 1612 (1609 . 1612) (t 26324 20116 346321 571000)) nil (26324 20146 384641 434000) 0 nil]) +([nil nil ((nil rear-nonsticky nil 1613 . 1614) (#(" +" 0 1 (fontified nil)) . -1696) (1609 . 1697) 1608) nil (26324 20146 384636 950000) 0 nil]) +([nil nil ((1696 . 1701) 1695) nil (26324 20146 384635 968000) 0 nil]) +([nil nil ((#("value" 0 4 (fontified t) 4 5 (fontified t rear-nonsticky t)) . 1634) (undo-tree-id524 . -4) (undo-tree-id525 . 3) (undo-tree-id526 . -1) (undo-tree-id527 . -1) (undo-tree-id528 . -5) (undo-tree-id529 . -5)) nil (26324 20146 384635 49000) 0 nil]) +([nil nil ((1634 . 1640)) nil (26324 20146 384631 842000) 0 nil]) +([nil nil ((#("v" 0 1 (fontified t)) . -1650) (undo-tree-id514 . -1) (undo-tree-id515 . -1) (#("a" 0 1 (fontified t)) . -1651) (undo-tree-id516 . -1) (undo-tree-id517 . -1) (#("l" 0 1 (fontified t)) . -1652) (undo-tree-id518 . -1) (undo-tree-id519 . -1) (#("u" 0 1 (fontified t)) . -1653) (undo-tree-id520 . -1) (undo-tree-id521 . -1) (#("e" 0 1 (fontified t)) . -1654) (undo-tree-id522 . -1) (undo-tree-id523 . -1) 1655) nil (26324 20146 384630 529000) 0 nil]) +([nil nil ((1650 . 1656)) nil (26324 20146 384624 192000) 0 nil]) +([nil nil ((#("v" 0 1 (fontified t)) . -1661) (undo-tree-id504 . -1) (undo-tree-id505 . -1) (#("a" 0 1 (fontified t)) . -1662) (undo-tree-id506 . -1) (undo-tree-id507 . -1) (#("l" 0 1 (fontified t)) . -1663) (undo-tree-id508 . -1) (undo-tree-id509 . -1) (#("u" 0 1 (fontified t)) . -1664) (undo-tree-id510 . -1) (undo-tree-id511 . -1) (#("e" 0 1 (fontified t)) . -1665) (undo-tree-id512 . -1) (undo-tree-id513 . -1) 1666) nil (26324 20146 384623 307000) 0 nil]) +([nil nil ((1661 . 1667)) nil (26324 20146 384618 87000) 0 nil]) +([nil nil ((#("v" 0 1 (fontified t)) . -1672) (undo-tree-id494 . -1) (undo-tree-id495 . -1) (#("a" 0 1 (fontified t)) . -1673) (undo-tree-id496 . -1) (undo-tree-id497 . -1) (#("l" 0 1 (fontified t)) . -1674) (undo-tree-id498 . -1) (undo-tree-id499 . -1) (#("u" 0 1 (fontified t)) . -1675) (undo-tree-id500 . -1) (undo-tree-id501 . -1) (#("e" 0 1 (fontified t)) . -1676) (undo-tree-id502 . -1) (undo-tree-id503 . -1) 1677) nil (26324 20146 384616 870000) 0 nil]) +([nil nil ((1672 . 1678)) nil (26324 20146 384611 500000) 0 nil]) +([nil nil ((#("v" 0 1 (fontified t)) . -1683) (undo-tree-id484 . -1) (undo-tree-id485 . -1) (#("a" 0 1 (fontified t)) . -1684) (undo-tree-id486 . -1) (undo-tree-id487 . -1) (#("l" 0 1 (fontified t)) . -1685) (undo-tree-id488 . -1) (undo-tree-id489 . -1) (#("u" 0 1 (fontified t)) . -1686) (undo-tree-id490 . -1) (undo-tree-id491 . -1) (#("e" 0 1 (fontified t)) . -1687) (undo-tree-id492 . -1) (undo-tree-id493 . -1) 1688) nil (26324 20146 384610 329000) 0 nil]) +([nil nil ((1683 . 1689)) nil (26324 20146 384604 883000) 0 nil]) +([nil nil ((#("v" 0 1 (fontified t)) . -1694) (undo-tree-id474 . -1) (undo-tree-id475 . -1) (#("a" 0 1 (fontified t)) . -1695) (undo-tree-id476 . -1) (undo-tree-id477 . -1) (#("l" 0 1 (fontified t)) . -1696) (undo-tree-id478 . -1) (undo-tree-id479 . -1) (#("u" 0 1 (fontified t)) . -1697) (undo-tree-id480 . -1) (undo-tree-id481 . -1) (#("e" 0 1 (fontified t)) . -1698) (undo-tree-id482 . -1) (undo-tree-id483 . -1) 1699) nil (26324 20146 384602 941000) 0 nil]) +([nil nil ((1694 . 1700)) nil (26324 20146 384585 295000) 0 nil]) +([nil nil ((#(")" 0 1 (fontified t)) . -1521) (undo-tree-id537 . -1) (undo-tree-id538 . -1) 1522 (t 26324 20146 392912 80000)) nil (26324 20181 3408 139000) 0 nil]) +([nil nil ((1470 . 1471) (t 26324 20181 12824 335000)) nil (26324 20224 111667 442000) 0 nil]) +([nil nil ((1558 . 1565) (t 26324 20224 149381 677000)) nil (26324 20277 29979 822000) 0 nil]) +([nil nil ((1653 . 1660)) nil (26324 20277 29975 793000) 0 nil]) +([nil nil ((1566 . 1567) (t 26324 20277 39247 633000)) nil (26324 20287 891179 960000) 0 nil]) +([nil nil ((1615 . 1616)) nil (26324 20287 891179 159000) 0 nil]) +([nil nil ((1663 . 1664)) nil (26324 20287 891178 52000) 0 nil]) +([nil nil ((1717 . 1718)) nil (26324 20287 891174 609000) 0 nil]) +([nil nil ((#(" @test all(length(v1.policy) .== length.((v1.policy, v2.policy, v3.policy, v4.policy, v5.policy))) +" 0 3 (fontified t) 3 4 (fontified t rear-nonsticky t) 4 9 (face julia-macro-face fontified t) 9 21 (fontified t) 21 30 (fontified t) 30 43 (fontified t) 43 101 (fontified t) 101 102 (fontified t)) . 1619) (undo-tree-id720 . -4) (undo-tree-id721 . -101) (undo-tree-id722 . 75) (undo-tree-id723 . -98) (undo-tree-id724 . -26) (undo-tree-id725 . -4) (undo-tree-id726 . -34) (undo-tree-id727 . -99) (undo-tree-id728 . -99) (undo-tree-id729 . -4) (undo-tree-id730 . -4) (undo-tree-id731 . -4) (undo-tree-id732 . -4) (undo-tree-id733 . -4) (undo-tree-id734 . -27) (undo-tree-id735 . -27) (undo-tree-id736 . -27) (undo-tree-id737 . -27) (undo-tree-id738 . -27) (undo-tree-id739 . -26) (undo-tree-id740 . -26) (undo-tree-id741 . -26) (undo-tree-id742 . -26) (undo-tree-id743 . -26) (undo-tree-id744 . -102) 1645 (t 26324 20287 929220 28000)) nil (26324 20423 130576 322000) 0 nil]) +([nil nil ((#(" +" 0 1 (fontified nil)) . -2146) (2044 . 2147)) nil (26324 20423 130565 689000) 0 nil]) +([nil nil ((#("[1:(end-1)]" 0 4 (fontified t) 4 7 (face font-lock-keyword-face fontified t) 7 11 (fontified t)) . 2166) (undo-tree-id718 . -10) (undo-tree-id719 . -11)) nil (26324 20423 130565 129000) 0 nil]) +([nil nil ((#("[1:(end-1)]" 0 4 (fontified t) 4 7 (face font-lock-keyword-face fontified t) 7 11 (fontified t)) . 2170) (undo-tree-id716 . -10) (undo-tree-id717 . -11)) nil (26324 20423 130563 795000) 0 nil]) +([nil nil ((#("[1:(end-1)]" 0 4 (fontified t) 4 7 (face font-lock-keyword-face fontified t) 7 11 (fontified t)) . 2174) (undo-tree-id714 . -10) (undo-tree-id715 . -11)) nil (26324 20423 130562 372000) 0 nil]) +([nil nil ((2099 . 2101) (#("v" 0 1 (fontified t)) . -2099) (undo-tree-id582 . -1) (undo-tree-id583 . -1) (#("5" 0 1 (fontified t)) . -2100) (undo-tree-id584 . -1) (undo-tree-id585 . -1) 2101 (2088 . 2090) (#("v" 0 1 (fontified t)) . -2088) (undo-tree-id586 . -1) (undo-tree-id587 . -1) (#("4" 0 1 (fontified t)) . -2089) (undo-tree-id588 . -1) (undo-tree-id589 . -1) 2090 (2077 . 2079) (#("v" 0 1 (fontified t)) . -2077) (undo-tree-id590 . -1) (undo-tree-id591 . -1) (#("3" 0 1 (fontified t)) . -2078) (undo-tree-id592 . -1) (undo-tree-id593 . -1) 2079 (2076 . 2077) (#("," 0 1 (fontified t)) . -2076) (undo-tree-id594 . -1) (undo-tree-id595 . -1) 2077 (#(" " 0 1 (fontified t)) . -2077) (undo-tree-id596 . -1) (undo-tree-id597 . -1) 2078 (#("v" 0 1 (fontified t)) . -1567) (undo-tree-id598 . -1) (undo-tree-id599 . -1) (undo-tree-id600 . -1) (undo-tree-id601 . -1) (undo-tree-id602 . -1) (#("1" 0 1 (fontified t)) . -1568) (undo-tree-id603 . -1) (undo-tree-id604 . -1) (undo-tree-id605 . -1) (undo-tree-id606 . -1) (undo-tree-id607 . -1) (#("." 0 1 (fontified t)) . -1569) (undo-tree-id608 . -1) (undo-tree-id609 . -1) (undo-tree-id610 . -1) (undo-tree-id611 . -1) (undo-tree-id612 . -1) (#("v" 0 1 (fontified t)) . -1570) (undo-tree-id613 . -1) (undo-tree-id614 . -1) (undo-tree-id615 . -1) (undo-tree-id616 . -1) (undo-tree-id617 . -1) (#("a" 0 1 (fontified t)) . -1571) (undo-tree-id618 . -1) (undo-tree-id619 . -1) (undo-tree-id620 . -1) (undo-tree-id621 . -1) (undo-tree-id622 . -1) (#("l" 0 1 (fontified t)) . -1572) (undo-tree-id623 . -1) (undo-tree-id624 . -1) (undo-tree-id625 . -1) (undo-tree-id626 . -1) (undo-tree-id627 . -1) (#("u" 0 1 (fontified t)) . -1573) (undo-tree-id628 . -1) (undo-tree-id629 . -1) (undo-tree-id630 . -1) (undo-tree-id631 . -1) (undo-tree-id632 . -1) (#("e" 0 1 (fontified t)) . -1574) (undo-tree-id633 . -1) (undo-tree-id634 . -1) (#("," 0 1 (fontified t)) . -1575) (undo-tree-id635 . 1) (undo-tree-id636 . -1) (undo-tree-id637 . -1) (#(" " 0 1 (fontified t)) . -1576) (undo-tree-id638 . -1) (undo-tree-id639 . -1) 1577 (#(" " 0 1 (fontified t)) . -2086) (undo-tree-id640 . -1) (undo-tree-id641 . -1) (#("v" 0 1 (fontified t)) . -2087) (undo-tree-id642 . -1) (undo-tree-id643 . -1) (#("2" 0 1 (fontified t)) . -2088) (undo-tree-id644 . -1) (undo-tree-id645 . -1) (#("." 0 1 (fontified t)) . -2089) (undo-tree-id646 . -1) (undo-tree-id647 . -1) (#("p" 0 1 (fontified t)) . -2090) (undo-tree-id648 . -1) (undo-tree-id649 . -1) (#("o" 0 1 (fontified t)) . -2091) (undo-tree-id650 . -1) (undo-tree-id651 . -1) (#("l" 0 1 (fontified t)) . -2092) (undo-tree-id652 . -1) (undo-tree-id653 . -1) (#("i" 0 1 (fontified t)) . -2093) (undo-tree-id654 . -1) (undo-tree-id655 . -1) (#("c" 0 1 (fontified t)) . -2094) (undo-tree-id656 . -1) (undo-tree-id657 . -1) (#("y" 0 1 (fontified t)) . -2095) (undo-tree-id658 . -1) (undo-tree-id659 . -1) 2096 (2083 . 2085) (#("v" 0 1 (fontified t)) . -2083) (undo-tree-id660 . -1) (undo-tree-id661 . -1) (#("1" 0 1 (fontified t)) . -2084) (undo-tree-id662 . -1) (undo-tree-id663 . -1) (#("." 0 1 (fontified t)) . -2085) (undo-tree-id664 . -1) (undo-tree-id665 . -1) (#("p" 0 1 (fontified t)) . -2086) (undo-tree-id666 . -1) (undo-tree-id667 . -1) (#("o" 0 1 (fontified t)) . -2087) (undo-tree-id668 . -1) (undo-tree-id669 . -1) (#("l" 0 1 (fontified t)) . -2088) (undo-tree-id670 . -1) (undo-tree-id671 . -1) (#("i" 0 1 (fontified t)) . -2089) (undo-tree-id672 . -1) (undo-tree-id673 . -1) (#("c" 0 1 (fontified t)) . -2090) (undo-tree-id674 . -1) (undo-tree-id675 . -1) (#("y" 0 1 (fontified t)) . -2091) (undo-tree-id676 . -1) (undo-tree-id677 . -1) 2092 (2066 . 2068) (#("v" 0 1 (fontified t)) . -2066) (undo-tree-id678 . -1) (undo-tree-id679 . -1) (undo-tree-id680 . -1) (undo-tree-id681 . -1) (#("1" 0 1 (fontified t)) . -2067) (undo-tree-id682 . -1) (undo-tree-id683 . -1) (undo-tree-id684 . -1) (undo-tree-id685 . -1) (#("." 0 1 (fontified t)) . -2068) (undo-tree-id686 . -1) (undo-tree-id687 . -1) (undo-tree-id688 . -1) (undo-tree-id689 . -1) (#("p" 0 1 (fontified t)) . -2069) (undo-tree-id690 . -1) (undo-tree-id691 . -1) (undo-tree-id692 . -1) (undo-tree-id693 . -1) (#("o" 0 1 (fontified t)) . -2070) (undo-tree-id694 . -1) (undo-tree-id695 . -1) (undo-tree-id696 . -1) (undo-tree-id697 . -1) (#("l" 0 1 (fontified t)) . -2071) (undo-tree-id698 . -1) (undo-tree-id699 . -1) (undo-tree-id700 . -1) (undo-tree-id701 . -1) (#("i" 0 1 (fontified t)) . -2072) (undo-tree-id702 . -1) (undo-tree-id703 . -1) (undo-tree-id704 . -1) (undo-tree-id705 . -1) (#("c" 0 1 (fontified t)) . -2073) (undo-tree-id706 . -1) (undo-tree-id707 . -1) (undo-tree-id708 . -1) (undo-tree-id709 . -1) (#("y" 0 1 (fontified t)) . -2074) (undo-tree-id710 . -1) (undo-tree-id711 . -1) (undo-tree-id712 . -1) (undo-tree-id713 . -1) 2075) nil (26324 20423 130554 184000) 0 nil]) +([nil nil ((#("policy" 0 6 (fontified t)) . 2102) (undo-tree-id577 . -5) (undo-tree-id578 . 1) (undo-tree-id579 . -4) (undo-tree-id580 . -4) (undo-tree-id581 . -6)) nil (26324 20423 129767 154000) 0 nil]) +([nil nil ((#("." 0 1 (fontified t)) . -2101) (undo-tree-id567 . -1) (undo-tree-id568 . -1) (undo-tree-id569 . -1) (undo-tree-id570 . -1) (undo-tree-id571 . -1) (undo-tree-id572 . -1) (undo-tree-id573 . -1) (undo-tree-id574 . -1) (undo-tree-id575 . -1) (undo-tree-id576 . -1) 2102) nil (26324 20423 129764 250000) 0 nil]) +([nil nil ((#("." 0 1 (fontified t)) . -2090) (undo-tree-id553 . -1) (undo-tree-id554 . -1) (#("p" 0 1 (fontified t)) . -2091) (undo-tree-id555 . -1) (undo-tree-id556 . -1) (#("o" 0 1 (fontified t)) . -2092) (undo-tree-id557 . -1) (undo-tree-id558 . -1) (#("l" 0 1 (fontified t)) . -2093) (undo-tree-id559 . -1) (undo-tree-id560 . -1) (#("i" 0 1 (fontified t)) . -2094) (undo-tree-id561 . -1) (undo-tree-id562 . -1) (#("c" 0 1 (fontified t)) . -2095) (undo-tree-id563 . -1) (undo-tree-id564 . -1) (#("y" 0 1 (fontified t)) . -2096) (undo-tree-id565 . -1) (undo-tree-id566 . -1) 2097) nil (26324 20423 129757 524000) 0 nil]) +([nil nil ((#("." 0 1 (fontified t)) . -2079) (undo-tree-id539 . -1) (undo-tree-id540 . -1) (#("p" 0 1 (fontified t)) . -2080) (undo-tree-id541 . -1) (undo-tree-id542 . -1) (#("o" 0 1 (fontified t)) . -2081) (undo-tree-id543 . -1) (undo-tree-id544 . -1) (#("l" 0 1 (fontified t)) . -2082) (undo-tree-id545 . -1) (undo-tree-id546 . -1) (#("i" 0 1 (fontified t)) . -2083) (undo-tree-id547 . -1) (undo-tree-id548 . -1) (#("c" 0 1 (fontified t)) . -2084) (undo-tree-id549 . -1) (undo-tree-id550 . -1) (#("y" 0 1 (fontified t)) . -2085) (undo-tree-id551 . -1) (undo-tree-id552 . -1) 2086) nil (26324 20423 129747 339000) 0 nil]) +([nil nil ((#("JuMP.Model" 0 10 (fontified t)) . 1421) (undo-tree-id751 . -9) (undo-tree-id752 . -9) (undo-tree-id753 . -9) (undo-tree-id754 . -9) (undo-tree-id755 . -9) (undo-tree-id756 . -10) (undo-tree-id757 . -10) 1430 (t 26324 20423 165543 962000)) nil (26328 39912 301975 923000) 0 nil]) +([nil current ((#(")" 0 1 (fontified nil)) . 1436) (#("(" 0 1 (fontified t)) . -1421) (undo-tree-id745 . 1) (undo-tree-id746 . -1) (undo-tree-id747 . -1) (undo-tree-id748 . -1) (undo-tree-id749 . -1) (undo-tree-id750 . -1) 1422) nil (26328 39912 301964 767000) 0 nil]) nil diff --git a/test/src/domains/garnet.jl b/test/src/domains/garnet.jl index 1054a5e..b0a2232 100644 --- a/test/src/domains/garnet.jl +++ b/test/src/domains/garnet.jl @@ -12,10 +12,10 @@ import HiGHS, JuMP v2 = value_iteration(g1, InfiniteH(0.95); ϵ=1e-10) v3 = value_iteration(g2, InfiniteH(0.95); ϵ=1e-10) v4 = policy_iteration(g2, 0.95) - v5 = lp_solve(g, .95, JuMP.Model(HiGHS.Optimizer)) + v5 = lp_solve(g, .95, HiGHS.Optimizer) # Ensure value functions are close - V = hcat(v1.value, v2.value[1:end-1], v3.value[1:end-1], v4.value[1:end-1], v5.value) + V = hcat(v1.value, v2.value, v3.value, v4.value, v5.value) @test map(x -> x[2] - x[1], mapslices(extrema, V; dims=2)) |> maximum ≤ 1e-6 # Ensure policies are identical @@ -25,6 +25,6 @@ import HiGHS, JuMP p4 = v4.policy p5 = v5.policy - P = hcat(p1, p2[1:end-1], p3[1:end-1], p4[1:end-1]) + P = hcat(p1, p2, p3, p4) @test all(mapslices(allequal, P; dims=2)) end diff --git a/test/src/domains/gridworld.jl b/test/src/domains/gridworld.jl index 797e5bc..1594db5 100644 --- a/test/src/domains/gridworld.jl +++ b/test/src/domains/gridworld.jl @@ -17,7 +17,7 @@ using MDPs.Domains v4 = policy_iteration(model_gc, 0.95) # Ensure value functions are close - V = hcat(v1.value, v2.value[1:end-1], v3.value[1:end-1], v4.value[1:end-1]) + V = hcat(v1.value, v2.value, v3.value, v4.value) @test map(x -> x[2] - x[1], mapslices(extrema, V; dims=2)) |> maximum ≤ 1e-6 # Ensure policies are identical @@ -26,6 +26,6 @@ using MDPs.Domains p3 = greedy(model_gc, InfiniteH(0.95), v3.value) p4 = v4.policy - P = hcat(p1, p2[1:end-1], p3[1:end-1], p4[1:end-1]) + P = hcat(p1, p2, p3, p4) @test all(mapslices(allequal, P; dims=2)) end diff --git a/test/src/domains/inventory.jl b/test/src/domains/inventory.jl index e693da1..aef15e8 100644 --- a/test/src/domains/inventory.jl +++ b/test/src/domains/inventory.jl @@ -36,13 +36,13 @@ import HiGHS, JuMP v2 = value_iteration(model_g, InfiniteH(0.95); ϵ = 1e-10) v3 = value_iteration(model_gc, InfiniteH(0.95); ϵ = 1e-10) v4 = policy_iteration(model_gc, 0.95) - v5 = lp_solve(model, .95, JuMP.Model(HiGHS.Optimizer)) - - # note that the IntMDP does not have terminal states, - # so the last action will not be -1 + v5 = lp_solve(model, .95, HiGHS.Optimizer) + @test all(state_count(model) .== state_count.((model_g, model_gc))) + @test all(length(v1.value) .== length.((v2.value, v3.value, v4.value, v5.value))) + #make sure value functions are close - V = hcat(v1.value, v2.value[1:(end-1)], v3.value[1:(end-1)], v4.value[1:(end-1)], v5.value) + V = hcat(v1.value, v2.value, v3.value, v4.value, v5.value) @test map(x->x[2] - x[1], mapslices(extrema, V; dims = 2)) |> maximum ≤ 1e-6 # make sure policies are identical @@ -52,6 +52,7 @@ import HiGHS, JuMP p4 = v4.policy p5 = v5.policy - P = hcat(p1, p2[1:(end-1)], p3[1:(end-1)], p4[1:(end-1)]) + @test all(length(p1) .== length.((p2, p3, p4, p5))) + P = hcat(p1, p2, p3, p4) @test all(mapslices(allequal, P; dims = 2)) end diff --git a/test/src/transient.jl b/test/src/transient.jl new file mode 100644 index 0000000..b60e6f8 --- /dev/null +++ b/test/src/transient.jl @@ -0,0 +1,25 @@ +using Revise +using HiGHS + +@testset "Transience - all" begin + opt = HiGHS.Optimizer + model = Domains.Gambler.RuinTransient(0.5, 20, false) # no noop + + @test anytransient(model, opt) + @test alltransient(model, opt) + val = lp_solve(model, TotalReward(), opt) + @test val.value[2] ≈ 0.5 + @test val.policy[2] = 14 +end + + +@testset "Transience - some" begin + opt = HiGHS.Optimizer + model = Domains.Gambler.RuinTransient(0.5, 20, true) + + @test anytransient(model, opt) + @test alltransient(model, opt) + val = lp_solve(model, TotalReward(), opt) + @test val.value[2] ≈ 0.5 + @test val.policy[2] = 20 +end