diff --git a/docs/src/recipes.md b/docs/src/recipes.md index e1c0400..15eb48c 100644 --- a/docs/src/recipes.md +++ b/docs/src/recipes.md @@ -37,4 +37,28 @@ CSV.write(fileout, output) # output 1 -``` +``` + +## Making a small MDP + +```jldoctest +using MDPs + +ε = 0.01 +# transition probabilities +P1 = [1 0 0; 0 1 0; 0 0 1] +P2 = [0 1 0; 1-ε 0 ε; 0 0 1] +Ps = [P1, P2] + +# rewards +R = [10 -4 0; -1 -3 0; 0 0 0] # use the same reward for both actions +Rs = [R, R] + +M = make_int_mdp(Ps, Rs) + +state_count(M) + +# output + +3 +``` diff --git a/src/algorithms/mrp.jl b/src/algorithms/mrp.jl index b04a1b0..f97ab39 100644 --- a/src/algorithms/mrp.jl +++ b/src/algorithms/mrp.jl @@ -12,10 +12,11 @@ MDP `model` and policy `π`. Also supports terminal states. Does not support duplicate entries in transition probabilities. """ function mrp!(P_π::AbstractMatrix{<:Real}, r_π::AbstractVector{<:Real}, - model::TabMDP, π::AbstractVector{Int}) + model::TabMDP, π::AbstractVector{<:Integer}) S = state_count(model) fill!(P_π, 0.); fill!(r_π, 0.) for s ∈ 1:S + #TODO: remove the definition of terminal states if !isterminal(model, s) for (sn, p, r) ∈ transition(model, s, π[s]) P_π[s,sn] ≈ 0. || @@ -35,10 +36,10 @@ end Compute the transition matrix `P_π` and reward vector `r_π` for the MDP `model` and policy `π`. See mrp! for more details. """ -function mrp(model::TabMDP, π::AbstractVector{Int}) +function mrp(model::TabMDP, π::AbstractVector{<:Integer}) S = state_count(model) P_π = Matrix{Float64}(undef,S,S) - r_π = Vector(undef, S) + r_π = Vector{Float64}(undef, S) mrp!(P_π, r_π, model, π) (P_π, r_π) end diff --git a/src/domains/gambler.jl b/src/domains/gambler.jl index 1f9afb5..4568589 100644 --- a/src/domains/gambler.jl +++ b/src/domains/gambler.jl @@ -6,12 +6,14 @@ mt(st, prob,rew) = (Int(st), Float64(prob), Float64(rew))::Tuple{Int, Float64, Float64} """ + Ruin(win, max_capital) + Gambler's ruin. Can decide how much to bet at any point in time. With some -probability `p`, the bet is doubled, and with `1-p` it is lost. The +probability `win`, the bet is doubled, and with `1-win` it is lost. The reward is 1 if it achieves some terminal capital and 0 otherwise. -Capital = state - 1 -Bet = action - 1 +- Capital = state - 1 +- Bet = action - 1 Available actions are 1, ..., state - 1. diff --git a/src/models/integral.jl b/src/models/integral.jl index 880e6c5..b82db4e 100644 --- a/src/models/integral.jl +++ b/src/models/integral.jl @@ -197,29 +197,32 @@ function load_mdp(input; idoutcome = nothing, docompress = false) IntMDP(states) end + +_make_reward(r::Vector{<:Number}, s, n) = repeat([r[s]], n) +_make_reward(R::Matrix{<:Number}, s, n) = R[s,:] + + """ make_int_mdp(Ps, rs) Build IntMDP from a list of transition probabilities `Ps` and reward vectors -`rs` for each action in the MDP. Each row of the transition matrix represents -the probabilities of transitioning to next states. +`rs` for each action in the MDP. If `rs` are vectors, then they are assumed +to be state action rewards. If `rs` are matrixes then they are assumed to be +state-action-state rewwards. Each row of the transition matrix (and the reward +matrix) represents the probabilities of transitioning to next states. """ -function make_int_mdp(Ps::AbstractVector{Matrix{X}}, - rs::AbstractVector{Vector{Y}}) where {X <: Number, Y <: Number} +function make_int_mdp(Ps::AbstractVector{<:Matrix}, rs::AbstractVector{<:Array}) isempty(Ps) && error("Must have at least one action.") - length(Ps) == length(rs) || error("Dimensions must match.") + length(Ps) == length(rs) || error("Ps and rs lengths must match.") statecount = size(Ps[1])[1] - actioncount = length(Ps) states = Vector{IntState}(undef, statecount) for s ∈ 1:statecount - actions = Vector{IntAction}(undef, actioncount) - for a ∈ 1:actioncount - actions[a] = IntAction(1:statecount, Ps[a][s,:], - repeat([rs[a][s]], statecount) ) - end + actions = [ + IntAction(1:statecount, Ps[a][s,:], _make_reward(rs[a], s, statecount)) + for a ∈ eachindex(Ps,rs)] states[s] = IntState(actions) end IntMDP(states)