RiskAverseRL · marekpetrik · Aug 16, 2024 · Jun 25, 2024 · Jun 25, 2024 · Jun 26, 2024
diff --git a/Project.toml b/Project.toml
@@ -6,13 +6,18 @@ version = "0.1.5"
 [deps]
 DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
 DataFramesMeta = "1313f7d8-7da2-5740-9ea0-a2ca25f37964"
+Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f"
+JuMP = "4076af6c-e467-56ae-b986-b466b2749572"
 LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
 SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"
+StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91"
 
 [compat]
-julia = "1.9"
 DataFrames = "1.6.1"
 DataFramesMeta = "0.14.1"
+Distributions = "0.25.107"
+StatsBase = "0.34.2"
+julia = "1.9"
 
 [extras]
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"

diff --git a/src/MDPs.jl b/src/MDPs.jl
@@ -34,6 +34,9 @@ export mrp!, mrp, mrp_sparse
 include("algorithms/policyiteration.jl")
 export policy_iteration, policy_iteration_sparse
 
+include("algorithms/linprogsolve.jl")
+export lp_solve
+
 include("simulation.jl")
 export simulate, random_π
 export Policy, PolicyStationary, PolicyMarkov
@@ -45,6 +48,8 @@ export Transition
 module Domains
 include("domains/simple.jl")
 export Simple
+include("domains/garnet.jl")
+export Garnet
 include("domains/inventory.jl")
 export Inventory
 include("domains/machine.jl")

diff --git a/src/algorithms/linprogsolve.jl b/src/algorithms/linprogsolve.jl
@@ -0,0 +1,33 @@
+using JuMP
+
+# ----------------------------------------------------------------
+# Linear Program Solver
+# ----------------------------------------------------------------
+
+
+"""
+lp_solve(model, γ, lpm)
+
+Implements the linear program primal problem for an MDP `model` with a discount factor `γ`.
+It uses the JuMP model `lpm` as the linear program solver and returns the state values
+found by `lpm`.
+"""
+
+function lp_solve(model::TabMDP, γ::Number, lpm)
+    0 ≤ γ < 1 || error("γ must be between 0 and 1")
+    set_silent(lpm)
+    n = state_count(model)
+    @variable(lpm, v[1:n])
+    @objective(lpm,Min, sum(v[1:n]))
+    π::Vector{Vector{ConstraintRef}} = []
+    for s in 1:n
+        m = action_count(model,s)
+        π_s::Vector{ConstraintRef} = []
+        for a in 1:m
+            push!(π_s, @constraint(lpm, v[s] ≥ sum(sp[2]*(sp[3]+γ*v[sp[1]]) for sp in transition(model,s,a))))
+        end
+        push!(π, π_s)
+    end
+    optimize!(lpm)
+    (value = value.(v), policy = map(x->argmax(dual.(x)), π))
+end
diff --git a/src/domains/garnet.jl b/src/domains/garnet.jl
@@ -0,0 +1,73 @@
+module Garnet
+
+import ...TabMDP, ...transition, ...state_count, ...action_count
+import ...actions, ...states
+
+# TODO: are these reasonable or can we replace them?
+import StatsBase, Distributions
+# ----------------------------------------------------------------
+# A Garnet MDP
+# ----------------------------------------------------------------
+
+struct GarnetMDP <: TabMDP
+    reward::Vector{Vector{Float64}}
+    transition::Vector{Vector{Vector{Float64}}}
+    S::Int
+    A::Vector{Int}
+
+    # TODO: add a constructor that checks for consistency
+end
+
+"""
+A Garnet MDP is a tabular MDP where the number of next states available from any current state is a fixed proportion of the total number of states in the model.
+This proportion is called "nbranch" and it must between 0 and 1.
+"""
+
+function make_garnet(S::Integer, A::AbstractVector{Int}, nbranch::Number, min_reward::Integer, max_reward::Integer)
+
+    0.0 ≤ nbranch ≤ 1.0 || error("nbranch must be in [0,1]")
+
+    reward = Vector{Vector{Float64}}()
+    transition = Vector{Vector{Vector{Float64}}}()
+    dist = Distributions.Exponential(1)
+    sout = Int(round(nbranch*S))
+
+    for i in 1:S
+        r = Vector{Float64}()
+        p = Vector{Vector{Float64}}()
+        for j in 1:A[i]
+            push!(r, rand(min_reward:max_reward))
+            inds = StatsBase.sample(1:S, sout, replace=false)
+            z = rand(dist,sout)
+            z /= sum(z)
+            pp = zeros(S)
+            for (k,l) in enumerate(inds) pp[l] = z[k] end
+            push!(p,pp)
+        end
+        push!(reward,r)
+        push!(transition, p)
+    end
+
+    GarnetMDP(reward,transition,S,A)
+end
+
+make_garnet(S::Integer, A::Integer, nbranch, min_reward, max_reward) = make_garnet(S, fill(Int(A),S), nbranch, min_reward, max_reward)
+
+function transition(model::GarnetMDP, state::Int, action::Int)
+    @assert state in 1:model.S
+    @assert action in 1:model.A[state]
+
+    next = []
+    for (s,p) in enumerate(model.transition[state][action])
+        if p != 0
+            push!(next, (s,p,model.reward[state][action]))
+        end
+    end
+    return next
+end
+
+state_count(model::GarnetMDP) = model.S
+action_count(model::GarnetMDP, s::Int) = model.A[s]
+
+end   
+# Module: Garnet