Skip to content

Commit

Permalink
mrp
Browse files Browse the repository at this point in the history
  • Loading branch information
Marek Petrik committed Apr 2, 2024
1 parent 2fc119e commit 0835ecb
Show file tree
Hide file tree
Showing 4 changed files with 48 additions and 18 deletions.
26 changes: 25 additions & 1 deletion docs/src/recipes.md
Original file line number Diff line number Diff line change
Expand Up @@ -37,4 +37,28 @@ CSV.write(fileout, output)
# output
1
```
```

## Making a small MDP

```jldoctest
using MDPs
ε = 0.01
# transition probabilities
P1 = [1 0 0; 0 1 0; 0 0 1]
P2 = [0 1 0; 1-ε 0 ε; 0 0 1]
Ps = [P1, P2]
# rewards
R = [10 -4 0; -1 -3 0; 0 0 0] # use the same reward for both actions
Rs = [R, R]
M = make_int_mdp(Ps, Rs)
state_count(M)
# output
3
```
7 changes: 4 additions & 3 deletions src/algorithms/mrp.jl
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,11 @@ MDP `model` and policy `π`. Also supports terminal states.
Does not support duplicate entries in transition probabilities.
"""
function mrp!(P_π::AbstractMatrix{<:Real}, r_π::AbstractVector{<:Real},
model::TabMDP, π::AbstractVector{Int})
model::TabMDP, π::AbstractVector{<:Integer})
S = state_count(model)
fill!(P_π, 0.); fill!(r_π, 0.)
for s 1:S
#TODO: remove the definition of terminal states
if !isterminal(model, s)
for (sn, p, r) transition(model, s, π[s])
P_π[s,sn] 0. ||
Expand All @@ -35,10 +36,10 @@ end
Compute the transition matrix `P_π` and reward vector `r_π` for the
MDP `model` and policy `π`. See mrp! for more details.
"""
function mrp(model::TabMDP, π::AbstractVector{Int})
function mrp(model::TabMDP, π::AbstractVector{<:Integer})
S = state_count(model)
P_π = Matrix{Float64}(undef,S,S)
r_π = Vector(undef, S)
r_π = Vector{Float64}(undef, S)
mrp!(P_π, r_π, model, π)
(P_π, r_π)
end
Expand Down
8 changes: 5 additions & 3 deletions src/domains/gambler.jl
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,14 @@ mt(st, prob,rew) =
(Int(st), Float64(prob), Float64(rew))::Tuple{Int, Float64, Float64}

"""
Ruin(win, max_capital)
Gambler's ruin. Can decide how much to bet at any point in time. With some
probability `p`, the bet is doubled, and with `1-p` it is lost. The
probability `win`, the bet is doubled, and with `1-win` it is lost. The
reward is 1 if it achieves some terminal capital and 0 otherwise.
Capital = state - 1
Bet = action - 1
- Capital = state - 1
- Bet = action - 1
Available actions are 1, ..., state - 1.
Expand Down
25 changes: 14 additions & 11 deletions src/models/integral.jl
Original file line number Diff line number Diff line change
Expand Up @@ -197,29 +197,32 @@ function load_mdp(input; idoutcome = nothing, docompress = false)
IntMDP(states)
end


_make_reward(r::Vector{<:Number}, s, n) = repeat([r[s]], n)
_make_reward(R::Matrix{<:Number}, s, n) = R[s,:]


"""
make_int_mdp(Ps, rs)
Build IntMDP from a list of transition probabilities `Ps` and reward vectors
`rs` for each action in the MDP. Each row of the transition matrix represents
the probabilities of transitioning to next states.
`rs` for each action in the MDP. If `rs` are vectors, then they are assumed
to be state action rewards. If `rs` are matrixes then they are assumed to be
state-action-state rewwards. Each row of the transition matrix (and the reward
matrix) represents the probabilities of transitioning to next states.
"""
function make_int_mdp(Ps::AbstractVector{Matrix{X}},
rs::AbstractVector{Vector{Y}}) where {X <: Number, Y <: Number}
function make_int_mdp(Ps::AbstractVector{<:Matrix}, rs::AbstractVector{<:Array})

isempty(Ps) && error("Must have at least one action.")
length(Ps) == length(rs) || error("Dimensions must match.")
length(Ps) == length(rs) || error("Ps and rs lengths must match.")

statecount = size(Ps[1])[1]
actioncount = length(Ps)

states = Vector{IntState}(undef, statecount)
for s 1:statecount
actions = Vector{IntAction}(undef, actioncount)
for a 1:actioncount
actions[a] = IntAction(1:statecount, Ps[a][s,:],
repeat([rs[a][s]], statecount) )
end
actions = [
IntAction(1:statecount, Ps[a][s,:], _make_reward(rs[a], s, statecount))
for a eachindex(Ps,rs)]
states[s] = IntState(actions)
end
IntMDP(states)
Expand Down

0 comments on commit 0835ecb

Please sign in to comment.