From 36ec59b368cb6c12e1177bb45263dbc462b9a25c Mon Sep 17 00:00:00 2001
From: "Documenter.jl" <documenter@juliadocs.github.io>
Date: Tue, 16 Jul 2024 21:36:19 +0000
Subject: [PATCH] build based on 06ff515

---
 previews/PR2/.documenter-siteinfo.json | 2 +-
 previews/PR2/index.html                | 4 ++--
 previews/PR2/recipes/index.html        | 2 +-
 previews/PR2/simulation/index.html     | 2 +-
 4 files changed, 5 insertions(+), 5 deletions(-)
diff --git a/previews/PR2/.documenter-siteinfo.json b/previews/PR2/.documenter-siteinfo.json
index fed0967..f3ed390 100644
--- a/previews/PR2/.documenter-siteinfo.json
+++ b/previews/PR2/.documenter-siteinfo.json
@@ -1 +1 @@
-{"documenter":{"julia_version":"1.9.4","generation_timestamp":"2024-07-16T18:44:35","documenter_version":"1.5.0"}}
\ No newline at end of file
+{"documenter":{"julia_version":"1.9.4","generation_timestamp":"2024-07-16T21:36:15","documenter_version":"1.5.0"}}
\ No newline at end of file
diff --git a/previews/PR2/index.html b/previews/PR2/index.html
index ae23c0b..5b42a94 100644
--- a/previews/PR2/index.html
+++ b/previews/PR2/index.html
@@ -1,5 +1,5 @@
 <!DOCTYPE html>
-<html lang="en"><head><meta charset="UTF-8"/><meta name="viewport" content="width=device-width, initial-scale=1.0"/><title>MDPs.jl: Markov Decision Processes · MDPs.jl</title><meta name="title" content="MDPs.jl: Markov Decision Processes · MDPs.jl"/><meta property="og:title" content="MDPs.jl: Markov Decision Processes · MDPs.jl"/><meta property="twitter:title" content="MDPs.jl: Markov Decision Processes · MDPs.jl"/><meta name="description" content="Documentation for MDPs.jl."/><meta property="og:description" content="Documentation for MDPs.jl."/><meta property="twitter:description" content="Documentation for MDPs.jl."/><script data-outdated-warner src="assets/warner.js"></script><link href="https://cdnjs.cloudflare.com/ajax/libs/lato-font/3.0.0/css/lato-font.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/juliamono/0.050/juliamono.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.2/css/fontawesome.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.2/css/solid.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.2/css/brands.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/KaTeX/0.16.8/katex.min.css" rel="stylesheet" type="text/css"/><script>documenterBaseURL="."</script><script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.6/require.min.js" data-main="assets/documenter.js"></script><script src="search_index.js"></script><script src="siteinfo.js"></script><script src="../versions.js"></script><link class="docs-theme-link" rel="stylesheet" type="text/css" href="assets/themes/catppuccin-mocha.css" data-theme-name="catppuccin-mocha"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="assets/themes/catppuccin-macchiato.css" data-theme-name="catppuccin-macchiato"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="assets/themes/catppuccin-frappe.css" data-theme-name="catppuccin-frappe"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="assets/themes/catppuccin-latte.css" data-theme-name="catppuccin-latte"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="assets/themes/documenter-dark.css" data-theme-name="documenter-dark" data-theme-primary-dark/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="assets/themes/documenter-light.css" data-theme-name="documenter-light" data-theme-primary/><script src="assets/themeswap.js"></script></head><body><div id="documenter"><nav class="docs-sidebar"><div class="docs-package-name"><span class="docs-autofit"><a href>MDPs.jl</a></span></div><button class="docs-search-query input is-rounded is-small is-clickable my-2 mx-auto py-1 px-2" id="documenter-search-query">Search docs (Ctrl + /)</button><ul class="docs-menu"><li class="is-active"><a class="tocitem" href>MDPs.jl: Markov Decision Processes</a><ul class="internal"><li><a class="tocitem" href="#Models"><span>Models</span></a></li><li><a class="tocitem" href="#Objectives"><span>Objectives</span></a></li><li><a class="tocitem" href="#Algorithms"><span>Algorithms</span></a></li><li><a class="tocitem" href="#Value-Function-Manipulation"><span>Value Function Manipulation</span></a></li><li><a class="tocitem" href="#Simulation"><span>Simulation</span></a></li><li><a class="tocitem" href="#Domains"><span>Domains</span></a></li></ul></li><li><a class="tocitem" href="simulation/">Simulation</a></li><li><a class="tocitem" href="recipes/">Recipes</a></li></ul><div class="docs-version-selector field has-addons"><div class="control"><span class="docs-label button is-static is-size-7">Version</span></div><div class="docs-selector control is-expanded"><div class="select is-fullwidth is-size-7"><select id="documenter-version-selector"></select></div></div></div></nav><div class="docs-main"><header class="docs-navbar"><a class="docs-sidebar-button docs-navbar-link fa-solid fa-bars is-hidden-desktop" id="documenter-sidebar-button" href="#"></a><nav class="breadcrumb"><ul class="is-hidden-mobile"><li class="is-active"><a href>MDPs.jl: Markov Decision Processes</a></li></ul><ul class="is-hidden-tablet"><li class="is-active"><a href>MDPs.jl: Markov Decision Processes</a></li></ul></nav><div class="docs-right"><a class="docs-navbar-link" href="https://github.com/RiskAverseRL/MDPs.jl" title="View the repository on GitHub"><span class="docs-icon fa-brands"></span><span class="docs-label is-hidden-touch">GitHub</span></a><a class="docs-navbar-link" href="https://github.com/RiskAverseRL/MDPs.jl/blob/main/docs/src/index.md" title="Edit source on GitHub"><span class="docs-icon fa-solid"></span></a><a class="docs-settings-button docs-navbar-link fa-solid fa-gear" id="documenter-settings-button" href="#" title="Settings"></a><a class="docs-article-toggle-button fa-solid fa-chevron-up" id="documenter-article-toggle-button" href="javascript:;" title="Collapse all docstrings"></a></div></header><article class="content" id="documenter-page"><h1 id="MDPs.jl:-Markov-Decision-Processes"><a class="docs-heading-anchor" href="#MDPs.jl:-Markov-Decision-Processes">MDPs.jl: Markov Decision Processes</a><a id="MDPs.jl:-Markov-Decision-Processes-1"></a><a class="docs-heading-anchor-permalink" href="#MDPs.jl:-Markov-Decision-Processes" title="Permalink"></a></h1><h2 id="Models"><a class="docs-heading-anchor" href="#Models">Models</a><a id="Models-1"></a><a class="docs-heading-anchor-permalink" href="#Models" title="Permalink"></a></h2><p>This section describes the data structures that can be used to model various types on MDPs.</p><h3 id="MDP"><a class="docs-heading-anchor" href="#MDP">MDP</a><a id="MDP-1"></a><a class="docs-heading-anchor-permalink" href="#MDP" title="Permalink"></a></h3><p>This is a general MDP data structure that supports basic functions. See IntMDP and TabMDP below for more models that can be used more directly to model and solve.</p><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="MDPs.MDP" href="#MDPs.MDP"><code>MDPs.MDP</code></a> — <span class="docstring-category">Type</span></header><section><div><p>A general MDP representation with time-independent  transition probabilities and rewards. The model makes no assumption that the states can be efficiently enumerated, but assumes that there is small number of actions</p><p>S: state type A: action type</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/RiskAverseRL/MDPs.jl/blob/cf2fe8cd370a7959b5b827ea4fc5a2b977c004fa/src/models/mdp.jl#L6-L14">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="MDPs.getnext-Union{Tuple{A}, Tuple{S}, Tuple{MDP{S, A}, S, A}} where {S, A}" href="#MDPs.getnext-Union{Tuple{A}, Tuple{S}, Tuple{MDP{S, A}, S, A}} where {S, A}"><code>MDPs.getnext</code></a> — <span class="docstring-category">Method</span></header><section><div><pre><code class="language-julia hljs">getnext(model, s, a)</code></pre><p>Compute next states using <code>transition</code> function.</p><p>Returns an object that can return a <code>NamedTuple</code> with <code>states</code>, <code>probabilities</code>, and <code>transitions</code> as <code>AbstractArrays</code>. This is a more-efficient version of transition (when supported).</p><p>The standard implementation is not memory efficient.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/RiskAverseRL/MDPs.jl/blob/cf2fe8cd370a7959b5b827ea4fc5a2b977c004fa/src/models/mdp.jl#L49-L59">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="MDPs.isterminal" href="#MDPs.isterminal"><code>MDPs.isterminal</code></a> — <span class="docstring-category">Function</span></header><section><div><pre><code class="language-julia hljs">isterminal(mdp, state)</code></pre><p>Return true if the state is terminal</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/RiskAverseRL/MDPs.jl/blob/cf2fe8cd370a7959b5b827ea4fc5a2b977c004fa/src/models/mdp.jl#L21-L25">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="MDPs.transition" href="#MDPs.transition"><code>MDPs.transition</code></a> — <span class="docstring-category">Function</span></header><section><div><pre><code class="language-julia hljs">(sn, p, r) ∈ transition(model, s, a)</code></pre><p>Return a list with next states, probabilities, and rewards. Returns an iterator. </p><p>Use <code>getnext</code> instead, which is more efficient and convenient to use. </p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/RiskAverseRL/MDPs.jl/blob/cf2fe8cd370a7959b5b827ea4fc5a2b977c004fa/src/models/mdp.jl#L28-L35">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="MDPs.valuefunction" href="#MDPs.valuefunction"><code>MDPs.valuefunction</code></a> — <span class="docstring-category">Function</span></header><section><div><pre><code class="language-julia hljs">valuefunction(mdp, state, valuefunction)</code></pre><p>Evaluates the value function for an MDP in a state</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/RiskAverseRL/MDPs.jl/blob/cf2fe8cd370a7959b5b827ea4fc5a2b977c004fa/src/models/mdp.jl#L38-L42">source</a></section></article><h3 id="Tabular-MDPs"><a class="docs-heading-anchor" href="#Tabular-MDPs">Tabular MDPs</a><a id="Tabular-MDPs-1"></a><a class="docs-heading-anchor-permalink" href="#Tabular-MDPs" title="Permalink"></a></h3><p>This is an MDP instance that assumes that the states and actions are tabular. </p><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="MDPs.TabMDP" href="#MDPs.TabMDP"><code>MDPs.TabMDP</code></a> — <span class="docstring-category">Type</span></header><section><div><p>An abstract tabular Markov Decision Process which is specified by a transition function. </p><p>Functions that should be defined for any subtype for value and policy iterations to work are: <code>state_count</code>, <code>states</code>, <code>action_count</code>, <code>actions</code>, and <code>transition</code>.</p><p>Generally, states should be 1-based.</p><p>The methods <code>state_count</code> and <code>states</code> should only include non-terminal states</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/RiskAverseRL/MDPs.jl/blob/cf2fe8cd370a7959b5b827ea4fc5a2b977c004fa/src/models/tabular.jl#L5-L15">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="MDPs.save_mdp-Tuple{Type{DataFrames.DataFrame}, TabMDP}" href="#MDPs.save_mdp-Tuple{Type{DataFrames.DataFrame}, TabMDP}"><code>MDPs.save_mdp</code></a> — <span class="docstring-category">Method</span></header><section><div><pre><code class="language-julia hljs">save_mdp(T::DataFrame, model::TabMDP)</code></pre><p>Convert an MDP <code>model</code> to a <code>DataFrame</code> representation with 0-based indices.</p><p>Important: The MDP representation uses 0-based indexes while the output DataFrame is 0-based for backwards compatibility.</p><p>The columns are: <code>idstatefrom</code>, <code>idaction</code>, <code>idstateto</code>, <code>probability</code>, and <code>reward</code>.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/RiskAverseRL/MDPs.jl/blob/cf2fe8cd370a7959b5b827ea4fc5a2b977c004fa/src/models/tabular.jl#L40-L50">source</a></section></article><h3 id="Integral-MDPs"><a class="docs-heading-anchor" href="#Integral-MDPs">Integral MDPs</a><a id="Integral-MDPs-1"></a><a class="docs-heading-anchor-permalink" href="#Integral-MDPs" title="Permalink"></a></h3><p>This is a specific MDP instance in which states and actions are specified by integers. </p><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="MDPs.FormatError" href="#MDPs.FormatError"><code>MDPs.FormatError</code></a> — <span class="docstring-category">Type</span></header><section><div><p>An incorrect parameter value</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/RiskAverseRL/MDPs.jl/blob/cf2fe8cd370a7959b5b827ea4fc5a2b977c004fa/src/models/integral.jl#L6-L8">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="MDPs.IntAction" href="#MDPs.IntAction"><code>MDPs.IntAction</code></a> — <span class="docstring-category">Type</span></header><section><div><p>Represents transitions that follow an action. The lengths <code>nextstate</code>, <code>probability</code>, and <code>reward</code> must be the same.</p><p>Nextstate may not be unique and each transition can have a different reward associated with the transition. The transitions are not aggregated to allow for comuting the risk of a transition. Aggregating the values by state would change the risk value of the transition. </p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/RiskAverseRL/MDPs.jl/blob/cf2fe8cd370a7959b5b827ea4fc5a2b977c004fa/src/models/integral.jl#L20-L29">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="MDPs.IntMDP" href="#MDPs.IntMDP"><code>MDPs.IntMDP</code></a> — <span class="docstring-category">Type</span></header><section><div><p>MDP with integral states and stationary transitions  State and action indexes are all 1-based integers</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/RiskAverseRL/MDPs.jl/blob/cf2fe8cd370a7959b5b827ea4fc5a2b977c004fa/src/models/integral.jl#L68-L72">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="MDPs.IntState" href="#MDPs.IntState"><code>MDPs.IntState</code></a> — <span class="docstring-category">Type</span></header><section><div><p>Represents a discrete state </p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/RiskAverseRL/MDPs.jl/blob/cf2fe8cd370a7959b5b827ea4fc5a2b977c004fa/src/models/integral.jl#L59">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="MDPs.compress-Tuple{IntAction}" href="#MDPs.compress-Tuple{IntAction}"><code>MDPs.compress</code></a> — <span class="docstring-category">Method</span></header><section><div><pre><code class="language-julia hljs">compress(nextstate, probability, reward)</code></pre><p>The command will combine mulitple transitions to the same state into a single transition. Reward is computed as a weigted average of the individual rewards, assuming expected reward objective.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/RiskAverseRL/MDPs.jl/blob/cf2fe8cd370a7959b5b827ea4fc5a2b977c004fa/src/models/integral.jl#L281-L286">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="MDPs.load_mdp-Tuple{Any}" href="#MDPs.load_mdp-Tuple{Any}"><code>MDPs.load_mdp</code></a> — <span class="docstring-category">Method</span></header><section><div><pre><code class="language-julia hljs">load_mdp(input, idoutcome)</code></pre><p>Load the MDP from <code>input</code>. The function <strong>assumes 0-based indexes</strong> of states and actions, which is transformed to 1-based index.</p><p>Input formats are anything that is supported by DataFrame. Some options are <code>CSV.File(...)</code> or <code>Arrow.Table(...)</code>.</p><p>States that have no transition probabilities defined are assumed to be terminal and are set to transition to themselves.</p><p>If <code>docombine</code> is true then the method combines transitions that have the same statefrom, action, stateto. This makes risk-neutral value iteration faster, but may change the value of a risk-averse solution.</p><p>The formulation allows for multiple transitions s,a → s&#39;. When this is the case, the transition probability is assumed to be their sum and the reward is the weighted average of the rewards.</p><p>The method can also process CSV files for MDPO/MMDP, in which case <code>idoutcome</code> specifies a 1-based outcome to load.</p><p><strong>Examples</strong></p><p>Load the model from a CSV</p><pre><code class="language-julia hljs">using CSV: File
+<html lang="en"><head><meta charset="UTF-8"/><meta name="viewport" content="width=device-width, initial-scale=1.0"/><title>MDPs.jl: Markov Decision Processes · MDPs.jl</title><meta name="title" content="MDPs.jl: Markov Decision Processes · MDPs.jl"/><meta property="og:title" content="MDPs.jl: Markov Decision Processes · MDPs.jl"/><meta property="twitter:title" content="MDPs.jl: Markov Decision Processes · MDPs.jl"/><meta name="description" content="Documentation for MDPs.jl."/><meta property="og:description" content="Documentation for MDPs.jl."/><meta property="twitter:description" content="Documentation for MDPs.jl."/><script data-outdated-warner src="assets/warner.js"></script><link href="https://cdnjs.cloudflare.com/ajax/libs/lato-font/3.0.0/css/lato-font.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/juliamono/0.050/juliamono.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.2/css/fontawesome.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.2/css/solid.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.2/css/brands.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/KaTeX/0.16.8/katex.min.css" rel="stylesheet" type="text/css"/><script>documenterBaseURL="."</script><script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.6/require.min.js" data-main="assets/documenter.js"></script><script src="search_index.js"></script><script src="siteinfo.js"></script><script src="../versions.js"></script><link class="docs-theme-link" rel="stylesheet" type="text/css" href="assets/themes/catppuccin-mocha.css" data-theme-name="catppuccin-mocha"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="assets/themes/catppuccin-macchiato.css" data-theme-name="catppuccin-macchiato"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="assets/themes/catppuccin-frappe.css" data-theme-name="catppuccin-frappe"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="assets/themes/catppuccin-latte.css" data-theme-name="catppuccin-latte"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="assets/themes/documenter-dark.css" data-theme-name="documenter-dark" data-theme-primary-dark/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="assets/themes/documenter-light.css" data-theme-name="documenter-light" data-theme-primary/><script src="assets/themeswap.js"></script></head><body><div id="documenter"><nav class="docs-sidebar"><div class="docs-package-name"><span class="docs-autofit"><a href>MDPs.jl</a></span></div><button class="docs-search-query input is-rounded is-small is-clickable my-2 mx-auto py-1 px-2" id="documenter-search-query">Search docs (Ctrl + /)</button><ul class="docs-menu"><li class="is-active"><a class="tocitem" href>MDPs.jl: Markov Decision Processes</a><ul class="internal"><li><a class="tocitem" href="#Models"><span>Models</span></a></li><li><a class="tocitem" href="#Objectives"><span>Objectives</span></a></li><li><a class="tocitem" href="#Algorithms"><span>Algorithms</span></a></li><li><a class="tocitem" href="#Value-Function-Manipulation"><span>Value Function Manipulation</span></a></li><li><a class="tocitem" href="#Simulation"><span>Simulation</span></a></li><li><a class="tocitem" href="#Domains"><span>Domains</span></a></li></ul></li><li><a class="tocitem" href="simulation/">Simulation</a></li><li><a class="tocitem" href="recipes/">Recipes</a></li></ul><div class="docs-version-selector field has-addons"><div class="control"><span class="docs-label button is-static is-size-7">Version</span></div><div class="docs-selector control is-expanded"><div class="select is-fullwidth is-size-7"><select id="documenter-version-selector"></select></div></div></div></nav><div class="docs-main"><header class="docs-navbar"><a class="docs-sidebar-button docs-navbar-link fa-solid fa-bars is-hidden-desktop" id="documenter-sidebar-button" href="#"></a><nav class="breadcrumb"><ul class="is-hidden-mobile"><li class="is-active"><a href>MDPs.jl: Markov Decision Processes</a></li></ul><ul class="is-hidden-tablet"><li class="is-active"><a href>MDPs.jl: Markov Decision Processes</a></li></ul></nav><div class="docs-right"><a class="docs-navbar-link" href="https://github.com/RiskAverseRL/MDPs.jl" title="View the repository on GitHub"><span class="docs-icon fa-brands"></span><span class="docs-label is-hidden-touch">GitHub</span></a><a class="docs-navbar-link" href="https://github.com/RiskAverseRL/MDPs.jl/blob/main/docs/src/index.md" title="Edit source on GitHub"><span class="docs-icon fa-solid"></span></a><a class="docs-settings-button docs-navbar-link fa-solid fa-gear" id="documenter-settings-button" href="#" title="Settings"></a><a class="docs-article-toggle-button fa-solid fa-chevron-up" id="documenter-article-toggle-button" href="javascript:;" title="Collapse all docstrings"></a></div></header><article class="content" id="documenter-page"><h1 id="MDPs.jl:-Markov-Decision-Processes"><a class="docs-heading-anchor" href="#MDPs.jl:-Markov-Decision-Processes">MDPs.jl: Markov Decision Processes</a><a id="MDPs.jl:-Markov-Decision-Processes-1"></a><a class="docs-heading-anchor-permalink" href="#MDPs.jl:-Markov-Decision-Processes" title="Permalink"></a></h1><h2 id="Models"><a class="docs-heading-anchor" href="#Models">Models</a><a id="Models-1"></a><a class="docs-heading-anchor-permalink" href="#Models" title="Permalink"></a></h2><p>This section describes the data structures that can be used to model various types on MDPs.</p><h3 id="MDP"><a class="docs-heading-anchor" href="#MDP">MDP</a><a id="MDP-1"></a><a class="docs-heading-anchor-permalink" href="#MDP" title="Permalink"></a></h3><p>This is a general MDP data structure that supports basic functions. See IntMDP and TabMDP below for more models that can be used more directly to model and solve.</p><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="MDPs.MDP" href="#MDPs.MDP"><code>MDPs.MDP</code></a> — <span class="docstring-category">Type</span></header><section><div><p>A general MDP representation with time-independent  transition probabilities and rewards. The model makes no assumption that the states can be efficiently enumerated, but assumes that there is small number of actions</p><p>S: state type A: action type</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/RiskAverseRL/MDPs.jl/blob/06ff515293515d2542144ca40c108291d9f86ade/src/models/mdp.jl#L6-L14">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="MDPs.getnext-Union{Tuple{A}, Tuple{S}, Tuple{MDP{S, A}, S, A}} where {S, A}" href="#MDPs.getnext-Union{Tuple{A}, Tuple{S}, Tuple{MDP{S, A}, S, A}} where {S, A}"><code>MDPs.getnext</code></a> — <span class="docstring-category">Method</span></header><section><div><pre><code class="language-julia hljs">getnext(model, s, a)</code></pre><p>Compute next states using <code>transition</code> function.</p><p>Returns an object that can return a <code>NamedTuple</code> with <code>states</code>, <code>probabilities</code>, and <code>transitions</code> as <code>AbstractArrays</code>. This is a more-efficient version of transition (when supported).</p><p>The standard implementation is not memory efficient.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/RiskAverseRL/MDPs.jl/blob/06ff515293515d2542144ca40c108291d9f86ade/src/models/mdp.jl#L49-L59">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="MDPs.isterminal" href="#MDPs.isterminal"><code>MDPs.isterminal</code></a> — <span class="docstring-category">Function</span></header><section><div><pre><code class="language-julia hljs">isterminal(mdp, state)</code></pre><p>Return true if the state is terminal</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/RiskAverseRL/MDPs.jl/blob/06ff515293515d2542144ca40c108291d9f86ade/src/models/mdp.jl#L21-L25">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="MDPs.transition" href="#MDPs.transition"><code>MDPs.transition</code></a> — <span class="docstring-category">Function</span></header><section><div><pre><code class="language-julia hljs">(sn, p, r) ∈ transition(model, s, a)</code></pre><p>Return a list with next states, probabilities, and rewards. Returns an iterator. </p><p>Use <code>getnext</code> instead, which is more efficient and convenient to use. </p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/RiskAverseRL/MDPs.jl/blob/06ff515293515d2542144ca40c108291d9f86ade/src/models/mdp.jl#L28-L35">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="MDPs.valuefunction" href="#MDPs.valuefunction"><code>MDPs.valuefunction</code></a> — <span class="docstring-category">Function</span></header><section><div><pre><code class="language-julia hljs">valuefunction(mdp, state, valuefunction)</code></pre><p>Evaluates the value function for an MDP in a state</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/RiskAverseRL/MDPs.jl/blob/06ff515293515d2542144ca40c108291d9f86ade/src/models/mdp.jl#L38-L42">source</a></section></article><h3 id="Tabular-MDPs"><a class="docs-heading-anchor" href="#Tabular-MDPs">Tabular MDPs</a><a id="Tabular-MDPs-1"></a><a class="docs-heading-anchor-permalink" href="#Tabular-MDPs" title="Permalink"></a></h3><p>This is an MDP instance that assumes that the states and actions are tabular. </p><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="MDPs.TabMDP" href="#MDPs.TabMDP"><code>MDPs.TabMDP</code></a> — <span class="docstring-category">Type</span></header><section><div><p>An abstract tabular Markov Decision Process which is specified by a transition function. </p><p>Functions that should be defined for any subtype for value and policy iterations to work are: <code>state_count</code>, <code>states</code>, <code>action_count</code>, <code>actions</code>, and <code>transition</code>.</p><p>Generally, states should be 1-based.</p><p>The methods <code>state_count</code> and <code>states</code> should only include non-terminal states</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/RiskAverseRL/MDPs.jl/blob/06ff515293515d2542144ca40c108291d9f86ade/src/models/tabular.jl#L5-L15">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="MDPs.save_mdp-Tuple{Type{DataFrames.DataFrame}, TabMDP}" href="#MDPs.save_mdp-Tuple{Type{DataFrames.DataFrame}, TabMDP}"><code>MDPs.save_mdp</code></a> — <span class="docstring-category">Method</span></header><section><div><pre><code class="language-julia hljs">save_mdp(T::DataFrame, model::TabMDP)</code></pre><p>Convert an MDP <code>model</code> to a <code>DataFrame</code> representation with 0-based indices.</p><p>Important: The MDP representation uses 0-based indexes while the output DataFrame is 0-based for backwards compatibility.</p><p>The columns are: <code>idstatefrom</code>, <code>idaction</code>, <code>idstateto</code>, <code>probability</code>, and <code>reward</code>.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/RiskAverseRL/MDPs.jl/blob/06ff515293515d2542144ca40c108291d9f86ade/src/models/tabular.jl#L40-L50">source</a></section></article><h3 id="Integral-MDPs"><a class="docs-heading-anchor" href="#Integral-MDPs">Integral MDPs</a><a id="Integral-MDPs-1"></a><a class="docs-heading-anchor-permalink" href="#Integral-MDPs" title="Permalink"></a></h3><p>This is a specific MDP instance in which states and actions are specified by integers. </p><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="MDPs.FormatError" href="#MDPs.FormatError"><code>MDPs.FormatError</code></a> — <span class="docstring-category">Type</span></header><section><div><p>An incorrect parameter value</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/RiskAverseRL/MDPs.jl/blob/06ff515293515d2542144ca40c108291d9f86ade/src/models/integral.jl#L6-L8">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="MDPs.IntAction" href="#MDPs.IntAction"><code>MDPs.IntAction</code></a> — <span class="docstring-category">Type</span></header><section><div><p>Represents transitions that follow an action. The lengths <code>nextstate</code>, <code>probability</code>, and <code>reward</code> must be the same.</p><p>Nextstate may not be unique and each transition can have a different reward associated with the transition. The transitions are not aggregated to allow for comuting the risk of a transition. Aggregating the values by state would change the risk value of the transition. </p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/RiskAverseRL/MDPs.jl/blob/06ff515293515d2542144ca40c108291d9f86ade/src/models/integral.jl#L20-L29">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="MDPs.IntMDP" href="#MDPs.IntMDP"><code>MDPs.IntMDP</code></a> — <span class="docstring-category">Type</span></header><section><div><p>MDP with integral states and stationary transitions  State and action indexes are all 1-based integers</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/RiskAverseRL/MDPs.jl/blob/06ff515293515d2542144ca40c108291d9f86ade/src/models/integral.jl#L68-L72">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="MDPs.IntState" href="#MDPs.IntState"><code>MDPs.IntState</code></a> — <span class="docstring-category">Type</span></header><section><div><p>Represents a discrete state </p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/RiskAverseRL/MDPs.jl/blob/06ff515293515d2542144ca40c108291d9f86ade/src/models/integral.jl#L59">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="MDPs.compress-Tuple{IntAction}" href="#MDPs.compress-Tuple{IntAction}"><code>MDPs.compress</code></a> — <span class="docstring-category">Method</span></header><section><div><pre><code class="language-julia hljs">compress(nextstate, probability, reward)</code></pre><p>The command will combine mulitple transitions to the same state into a single transition. Reward is computed as a weigted average of the individual rewards, assuming expected reward objective.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/RiskAverseRL/MDPs.jl/blob/06ff515293515d2542144ca40c108291d9f86ade/src/models/integral.jl#L281-L286">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="MDPs.load_mdp-Tuple{Any}" href="#MDPs.load_mdp-Tuple{Any}"><code>MDPs.load_mdp</code></a> — <span class="docstring-category">Method</span></header><section><div><pre><code class="language-julia hljs">load_mdp(input, idoutcome)</code></pre><p>Load the MDP from <code>input</code>. The function <strong>assumes 0-based indexes</strong> of states and actions, which is transformed to 1-based index.</p><p>Input formats are anything that is supported by DataFrame. Some options are <code>CSV.File(...)</code> or <code>Arrow.Table(...)</code>.</p><p>States that have no transition probabilities defined are assumed to be terminal and are set to transition to themselves.</p><p>If <code>docombine</code> is true then the method combines transitions that have the same statefrom, action, stateto. This makes risk-neutral value iteration faster, but may change the value of a risk-averse solution.</p><p>The formulation allows for multiple transitions s,a → s&#39;. When this is the case, the transition probability is assumed to be their sum and the reward is the weighted average of the rewards.</p><p>The method can also process CSV files for MDPO/MMDP, in which case <code>idoutcome</code> specifies a 1-based outcome to load.</p><p><strong>Examples</strong></p><p>Load the model from a CSV</p><pre><code class="language-julia hljs">using CSV: File
 using MDPs
 filepath = joinpath(dirname(pathof(MDPs)), &quot;..&quot;,
                     &quot;data&quot;, &quot;riverswim.csv&quot;)
@@ -14,4 +14,4 @@
 state_count(model)
 
 # output
-21</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/RiskAverseRL/MDPs.jl/blob/cf2fe8cd370a7959b5b827ea4fc5a2b977c004fa/src/models/integral.jl#L95-L143">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="MDPs.make_int_mdp-Tuple{AbstractVector{&lt;:Matrix}, AbstractVector{&lt;:Array}}" href="#MDPs.make_int_mdp-Tuple{AbstractVector{&lt;:Matrix}, AbstractVector{&lt;:Array}}"><code>MDPs.make_int_mdp</code></a> — <span class="docstring-category">Method</span></header><section><div><pre><code class="language-julia hljs">make_int_mdp(Ps, rs)</code></pre><p>Build IntMDP from a list of transition probabilities <code>Ps</code> and reward vectors <code>rs</code> for each action in the MDP. If <code>rs</code> are vectors, then they are assumed to be state action rewards. If <code>rs</code> are matrixes then they are assumed to be state-action-state rewwards. Each row of the transition matrix (and the reward matrix) represents the probabilities of transitioning to next states.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/RiskAverseRL/MDPs.jl/blob/cf2fe8cd370a7959b5b827ea4fc5a2b977c004fa/src/models/integral.jl#L205-L213">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="MDPs.make_int_mdp-Tuple{TabMDP}" href="#MDPs.make_int_mdp-Tuple{TabMDP}"><code>MDPs.make_int_mdp</code></a> — <span class="docstring-category">Method</span></header><section><div><pre><code class="language-julia hljs">make_int_mdp(mdp::TabMDP, docompress = false)</code></pre><p>Transform any tabular MDP <code>mdp</code> to a numeric one. This helps to accelerate operations and value function computation. The actions are also turned into 1-based integer values.</p><p>The option <code>docompress</code> combined transitions to the same state into a single transition. This improves efficiency in risk-neutral settings, but may change the outcome in risk-averse settings.</p><p>The function adds one more state at the end which represents a catch-all terminal state</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/RiskAverseRL/MDPs.jl/blob/cf2fe8cd370a7959b5b827ea4fc5a2b977c004fa/src/models/integral.jl#L232-L244">source</a></section></article><h2 id="Objectives"><a class="docs-heading-anchor" href="#Objectives">Objectives</a><a id="Objectives-1"></a><a class="docs-heading-anchor-permalink" href="#Objectives" title="Permalink"></a></h2><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="MDPs.FiniteH" href="#MDPs.FiniteH"><code>MDPs.FiniteH</code></a> — <span class="docstring-category">Type</span></header><section><div><p>Finite-horizon discounted model. The discount factor <code>γ</code> can be in [0,1]. The optimal policy is Markov but time dependent.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/RiskAverseRL/MDPs.jl/blob/cf2fe8cd370a7959b5b827ea4fc5a2b977c004fa/src/objectives.jl#L41-L44">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="MDPs.InfiniteH" href="#MDPs.InfiniteH"><code>MDPs.InfiniteH</code></a> — <span class="docstring-category">Type</span></header><section><div><p>Inifinite-horizon discounted objective. The discount factor <code>γ</code> can be in [0,1]. The optimal policy is stationary.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/RiskAverseRL/MDPs.jl/blob/cf2fe8cd370a7959b5b827ea4fc5a2b977c004fa/src/objectives.jl#L28-L31">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="MDPs.Markov" href="#MDPs.Markov"><code>MDPs.Markov</code></a> — <span class="docstring-category">Type</span></header><section><div><p>Objective solved by a randomized Markov non-stationary policy. In other words, the solution is time-dependent.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/RiskAverseRL/MDPs.jl/blob/cf2fe8cd370a7959b5b827ea4fc5a2b977c004fa/src/objectives.jl#L6-L9">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="MDPs.MarkovDet" href="#MDPs.MarkovDet"><code>MDPs.MarkovDet</code></a> — <span class="docstring-category">Type</span></header><section><div><p>Objective solved by a deterministic Markov non-stationary policy. In other words, the solution is time-dependent.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/RiskAverseRL/MDPs.jl/blob/cf2fe8cd370a7959b5b827ea4fc5a2b977c004fa/src/objectives.jl#L17-L20">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="MDPs.Objective" href="#MDPs.Objective"><code>MDPs.Objective</code></a> — <span class="docstring-category">Type</span></header><section><div><p>Abstract objective for an MDP.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/RiskAverseRL/MDPs.jl/blob/cf2fe8cd370a7959b5b827ea4fc5a2b977c004fa/src/objectives.jl#L1-L3">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="MDPs.Stationary" href="#MDPs.Stationary"><code>MDPs.Stationary</code></a> — <span class="docstring-category">Type</span></header><section><div><p>Objective that is solved by a randomized stationary policy</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/RiskAverseRL/MDPs.jl/blob/cf2fe8cd370a7959b5b827ea4fc5a2b977c004fa/src/objectives.jl#L12-L14">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="MDPs.StationaryDet" href="#MDPs.StationaryDet"><code>MDPs.StationaryDet</code></a> — <span class="docstring-category">Type</span></header><section><div><p>Objective that is solved by a randomized stationary policy</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/RiskAverseRL/MDPs.jl/blob/cf2fe8cd370a7959b5b827ea4fc5a2b977c004fa/src/objectives.jl#L23-L25">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="MDPs.horizon" href="#MDPs.horizon"><code>MDPs.horizon</code></a> — <span class="docstring-category">Function</span></header><section><div><pre><code class="language-julia hljs">horizon(objective)</code></pre><p>Return the horizon length for <code>objective</code>.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/RiskAverseRL/MDPs.jl/blob/cf2fe8cd370a7959b5b827ea4fc5a2b977c004fa/src/objectives.jl#L56-L60">source</a></section></article><h2 id="Algorithms"><a class="docs-heading-anchor" href="#Algorithms">Algorithms</a><a id="Algorithms-1"></a><a class="docs-heading-anchor-permalink" href="#Algorithms" title="Permalink"></a></h2><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="MDPs.value_iteration" href="#MDPs.value_iteration"><code>MDPs.value_iteration</code></a> — <span class="docstring-category">Function</span></header><section><div><pre><code class="language-julia hljs">value_iteration(model, objective; [v_terminal, iterations = 1000, ϵ = 1e-3] )</code></pre><p>Compute value function and policy for a tabular MDP <code>model</code> with an objective <code>objective</code>. The time steps go from 1 to T+1, the last decision happens at time T.</p><p>The supported objectives are <code>FiniteH</code>, and <code>InfiniteH</code>. When provided with a a real number <code>γ ∈ [0,1]</code> then the objective is treated as an infinite horizon problem. </p><p><strong>Finite Horizon</strong></p><p>Use finite-horizon value iteration for a tabular MDP <code>model</code> with  a discount factor <code>γ</code> and horizon <code>T</code> (time steps <code>1</code> to <code>T+1</code>) the last decision happens at time T. Returns a vector of value functions for each time step.</p><p>The argument <code>v_terminal</code> represents the terminal value function. It should be provided as a function that maps the state id to its terminal value (at time T+1). If this value is provided, then it is used in place of 0.</p><p><strong>Infinite Horizon</strong></p><p>For a Bellman error <code>ϵ</code>, the computed value function is quaranteed to be within ϵ ⋅ γ / (1 - γ) of the optimal value function (all in terms of the L_∞ norm).</p><p>The value function is parallelized when <code>parallel</code> is true. This is also known as a Jacobi type of value iteration (as opposed to Gauss-Seidel)</p><p>Note that for the purpose of the greedy policy, minimizing the span seminorm is more efficient, but the goal of this function is also to compute the value  function.</p><p>The time steps go from 1 to T+1.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/RiskAverseRL/MDPs.jl/blob/cf2fe8cd370a7959b5b827ea4fc5a2b977c004fa/src/algorithms/valueiteration.jl#L1-L35">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="MDPs.value_iteration!" href="#MDPs.value_iteration!"><code>MDPs.value_iteration!</code></a> — <span class="docstring-category">Function</span></header><section><div><pre><code class="language-julia hljs">value_iteration!(v, π, model, objective; [v_terminal] )</code></pre><p>Run value iteration using the provided <code>v</code> and <code>π</code> storage for the value function and the policy. See <code>value_iteration</code> for more details.</p><p>Only support <code>FiniteH</code> objective. </p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/RiskAverseRL/MDPs.jl/blob/cf2fe8cd370a7959b5b827ea4fc5a2b977c004fa/src/algorithms/valueiteration.jl#L39-L46">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="MDPs.mrp!-Tuple{AbstractMatrix{&lt;:Real}, AbstractVector{&lt;:Real}, TabMDP, AbstractVector{&lt;:Integer}}" href="#MDPs.mrp!-Tuple{AbstractMatrix{&lt;:Real}, AbstractVector{&lt;:Real}, TabMDP, AbstractVector{&lt;:Integer}}"><code>MDPs.mrp!</code></a> — <span class="docstring-category">Method</span></header><section><div><pre><code class="language-julia hljs">mrp!(P_π, r_π, model, π)</code></pre><p>Save the transition matrix <code>P_π</code> and reward vector <code>r_π</code> for the  MDP <code>model</code> and policy <code>π</code>. Also supports terminal states.</p><p>Does not support duplicate entries in transition probabilities.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/RiskAverseRL/MDPs.jl/blob/cf2fe8cd370a7959b5b827ea4fc5a2b977c004fa/src/algorithms/mrp.jl#L6-L13">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="MDPs.mrp-Tuple{TabMDP, AbstractVector{&lt;:Integer}}" href="#MDPs.mrp-Tuple{TabMDP, AbstractVector{&lt;:Integer}}"><code>MDPs.mrp</code></a> — <span class="docstring-category">Method</span></header><section><div><pre><code class="language-julia hljs">mrp(model, π)</code></pre><p>Compute the transition matrix <code>P_π</code> and reward vector <code>r_π</code> for the  MDP <code>model</code> and policy <code>π</code>. See mrp! for more details. </p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/RiskAverseRL/MDPs.jl/blob/cf2fe8cd370a7959b5b827ea4fc5a2b977c004fa/src/algorithms/mrp.jl#L33-L38">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="MDPs.mrp_sparse-Tuple{TabMDP, AbstractVector{Int64}}" href="#MDPs.mrp_sparse-Tuple{TabMDP, AbstractVector{Int64}}"><code>MDPs.mrp_sparse</code></a> — <span class="docstring-category">Method</span></header><section><div><pre><code class="language-julia hljs">mrp(model, π)</code></pre><p>Compute a sparse transition matrix <code>P_π</code> and reward vector <code>r_π</code> for the  MDP <code>model</code> and policy <code>π</code>.</p><p>This function does not support duplicate entries in transition probabilities.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/RiskAverseRL/MDPs.jl/blob/cf2fe8cd370a7959b5b827ea4fc5a2b977c004fa/src/algorithms/mrp.jl#L47-L54">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="MDPs.policy_iteration-Tuple{TabMDP, Real}" href="#MDPs.policy_iteration-Tuple{TabMDP, Real}"><code>MDPs.policy_iteration</code></a> — <span class="docstring-category">Method</span></header><section><div><pre><code class="language-julia hljs">policy_iteration(model, γ; [iterations=1000])</code></pre><p>Implements policy iteration for MDP <code>model</code> with a discount factor <code>γ</code>. The algorithm runs until the policy stops changing or the number of iterations is reached.</p><p>Does not support duplicate entries in transition probabilities.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/RiskAverseRL/MDPs.jl/blob/cf2fe8cd370a7959b5b827ea4fc5a2b977c004fa/src/algorithms/policyiteration.jl#L14-L21">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="MDPs.policy_iteration_sparse-Tuple{TabMDP, Real}" href="#MDPs.policy_iteration_sparse-Tuple{TabMDP, Real}"><code>MDPs.policy_iteration_sparse</code></a> — <span class="docstring-category">Method</span></header><section><div><pre><code class="language-julia hljs">policy_iteration_sparse(model, γ; iterations)</code></pre><p>Implements policy iteration for MDP <code>model</code> with a discount factor <code>γ</code>. The algorithm runs until the policy stops changing or the number of iterations is reached. The value function is computed using sparse linear algebra.</p><p>Does not support duplicate entries in transition probabilities.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/RiskAverseRL/MDPs.jl/blob/cf2fe8cd370a7959b5b827ea4fc5a2b977c004fa/src/algorithms/policyiteration.jl#L51-L59">source</a></section></article><h2 id="Value-Function-Manipulation"><a class="docs-heading-anchor" href="#Value-Function-Manipulation">Value Function Manipulation</a><a id="Value-Function-Manipulation-1"></a><a class="docs-heading-anchor-permalink" href="#Value-Function-Manipulation" title="Permalink"></a></h2><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="MDPs.make_value-Tuple{TabMDP, Markov}" href="#MDPs.make_value-Tuple{TabMDP, Markov}"><code>MDPs.make_value</code></a> — <span class="docstring-category">Method</span></header><section><div><pre><code class="language-julia hljs">make_value(model, objective)</code></pre><p>Creates an <em>undefined</em> policy and value function for the <code>model</code> and <code>objective</code>.</p><p><strong>See Also</strong></p><p><code>value_iteration!</code></p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/RiskAverseRL/MDPs.jl/blob/cf2fe8cd370a7959b5b827ea4fc5a2b977c004fa/src/valuefunction/valuefunction.jl#L1-L10">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="MDPs.bellman-Union{Tuple{A}, Tuple{S}, Tuple{MDP{S, A}, MDPs.Objective, Integer, S, Any}} where {S, A}" href="#MDPs.bellman-Union{Tuple{A}, Tuple{S}, Tuple{MDP{S, A}, MDPs.Objective, Integer, S, Any}} where {S, A}"><code>MDPs.bellman</code></a> — <span class="docstring-category">Method</span></header><section><div><pre><code class="language-julia hljs">bellman(model, obj, [t=0,] s, v)</code></pre><p>Compute the Bellman operator for state <code>s</code>, and value function <code>v</code> assuming an objective <code>obj</code>.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/RiskAverseRL/MDPs.jl/blob/cf2fe8cd370a7959b5b827ea4fc5a2b977c004fa/src/valuefunction/bellman.jl#L169-L175">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="MDPs.bellmangreedy-Union{Tuple{A}, Tuple{S}, Tuple{MDP{S, A}, MDPs.Objective, Integer, S, Any}} where {S, A}" href="#MDPs.bellmangreedy-Union{Tuple{A}, Tuple{S}, Tuple{MDP{S, A}, MDPs.Objective, Integer, S, Any}} where {S, A}"><code>MDPs.bellmangreedy</code></a> — <span class="docstring-category">Method</span></header><section><div><pre><code class="language-julia hljs">bellmangreedy(model, obj, [t=0,] s, v)</code></pre><p>Compute the Bellman operator and greedy action for state <code>s</code>, and value  function <code>v</code> assuming an objective <code>obj</code>. The optional time parameter <code>t</code> allows for time-dependent updates.</p><p>The function uses <code>qvalue</code> to compute the Bellman operator and the greedy policy.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/RiskAverseRL/MDPs.jl/blob/cf2fe8cd370a7959b5b827ea4fc5a2b977c004fa/src/valuefunction/bellman.jl#L93-L102">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="MDPs.greedy!-Tuple{Vector{Int64}, TabMDP, MDPs.Objective, Integer, AbstractVector{&lt;:Real}}" href="#MDPs.greedy!-Tuple{Vector{Int64}, TabMDP, MDPs.Objective, Integer, AbstractVector{&lt;:Real}}"><code>MDPs.greedy!</code></a> — <span class="docstring-category">Method</span></header><section><div><pre><code class="language-julia hljs">greedy!(π, model, obj, v)</code></pre><p>Update policy <code>π</code> with the greedy policy for value function <code>v</code> and MDP <code>model</code> and an objective <code>obj</code>.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/RiskAverseRL/MDPs.jl/blob/cf2fe8cd370a7959b5b827ea4fc5a2b977c004fa/src/valuefunction/bellman.jl#L136-L141">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="MDPs.greedy-Tuple{TabMDP, Stationary, AbstractVector{&lt;:Real}}" href="#MDPs.greedy-Tuple{TabMDP, Stationary, AbstractVector{&lt;:Real}}"><code>MDPs.greedy</code></a> — <span class="docstring-category">Method</span></header><section><div><pre><code class="language-julia hljs">greedy(model, obj, v)</code></pre><p>Compute the greedy action for all states and value function <code>v</code> assuming an objective <code>obj</code> and time <code>t=0</code>.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/RiskAverseRL/MDPs.jl/blob/cf2fe8cd370a7959b5b827ea4fc5a2b977c004fa/src/valuefunction/bellman.jl#L156-L162">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="MDPs.greedy-Union{Tuple{A}, Tuple{S}, Tuple{MDP{S, A}, MDPs.Objective, Integer, S, Any}} where {S, A}" href="#MDPs.greedy-Union{Tuple{A}, Tuple{S}, Tuple{MDP{S, A}, MDPs.Objective, Integer, S, Any}} where {S, A}"><code>MDPs.greedy</code></a> — <span class="docstring-category">Method</span></header><section><div><pre><code class="language-julia hljs">greedy(model, obj, [t=0,] s, v)</code></pre><p>Compute the greedy action for state <code>s</code> and value  function <code>v</code> assuming an objective <code>obj</code>.</p><p>If <code>s</code> is not provided, then computes a value function for all states. The model must support <code>states</code> function.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/RiskAverseRL/MDPs.jl/blob/cf2fe8cd370a7959b5b827ea4fc5a2b977c004fa/src/valuefunction/bellman.jl#L121-L130">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="MDPs.qvalue-Union{Tuple{A}, Tuple{S}, Tuple{MDP{S, A}, Union{FiniteH, InfiniteH}, Integer, S, A, Any}} where {S, A}" href="#MDPs.qvalue-Union{Tuple{A}, Tuple{S}, Tuple{MDP{S, A}, Union{FiniteH, InfiniteH}, Integer, S, A, Any}} where {S, A}"><code>MDPs.qvalue</code></a> — <span class="docstring-category">Method</span></header><section><div><pre><code class="language-julia hljs">qvalue(model, objective, [t=0,] s, a, v)</code></pre><p>Compute the state-action-values for state <code>s</code>, action <code>a</code>, and value function <code>v</code> for an <code>objective</code>.</p><p>There is no set representation for the value function.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/RiskAverseRL/MDPs.jl/blob/cf2fe8cd370a7959b5b827ea4fc5a2b977c004fa/src/valuefunction/bellman.jl#L55-L62">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="MDPs.qvalues!-Union{Tuple{A}, Tuple{S}, Tuple{AbstractVector{&lt;:Real}, MDP{S, A}, MDPs.Objective, Integer, S, Any}} where {S, A}" href="#MDPs.qvalues!-Union{Tuple{A}, Tuple{S}, Tuple{AbstractVector{&lt;:Real}, MDP{S, A}, MDPs.Objective, Integer, S, Any}} where {S, A}"><code>MDPs.qvalues!</code></a> — <span class="docstring-category">Method</span></header><section><div><pre><code class="language-julia hljs">qvalues!(qvalues, model, objective, [t=0,] s, v)</code></pre><p>Compute the state-action-values for state <code>s</code>, and value function <code>v</code> for the <code>objective</code>.</p><p>Saves the values to <code>qvalue</code> which should be at least as long as the number of actions. Values of elements in <code>qvalues</code> that are beyond the action count are set to <code>-Inf</code>.</p><p>See <code>qvalues</code> for more information.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/RiskAverseRL/MDPs.jl/blob/cf2fe8cd370a7959b5b827ea4fc5a2b977c004fa/src/valuefunction/bellman.jl#L27-L38">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="MDPs.qvalues-Union{Tuple{A}, Tuple{S}, Tuple{MDP{S, A}, Union{FiniteH, InfiniteH}, Integer, S, Any}} where {S, A}" href="#MDPs.qvalues-Union{Tuple{A}, Tuple{S}, Tuple{MDP{S, A}, Union{FiniteH, InfiniteH}, Integer, S, Any}} where {S, A}"><code>MDPs.qvalues</code></a> — <span class="docstring-category">Method</span></header><section><div><pre><code class="language-julia hljs">qvalues(model, objective, [t=0,] s, v)</code></pre><p>Compute the state-action-value for state <code>s</code>, and value function <code>v</code> for  <code>objective</code>. There is no set representation of the value function <code>v</code>.</p><p>The function is tractable only if there are a small number of actions and transitions.</p><p>The function is tractable only if there are a small number of actions and transitions.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/RiskAverseRL/MDPs.jl/blob/cf2fe8cd370a7959b5b827ea4fc5a2b977c004fa/src/valuefunction/bellman.jl#L6-L16">source</a></section></article><h2 id="Simulation"><a class="docs-heading-anchor" href="#Simulation">Simulation</a><a id="Simulation-1"></a><a class="docs-heading-anchor-permalink" href="#Simulation" title="Permalink"></a></h2><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="MDPs.FPolicyM" href="#MDPs.FPolicyM"><code>MDPs.FPolicyM</code></a> — <span class="docstring-category">Type</span></header><section><div><p>General stationary policy specified by a function s,t → a </p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/RiskAverseRL/MDPs.jl/blob/cf2fe8cd370a7959b5b827ea4fc5a2b977c004fa/src/simulation.jl#L85">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="MDPs.FPolicyS" href="#MDPs.FPolicyS"><code>MDPs.FPolicyS</code></a> — <span class="docstring-category">Type</span></header><section><div><p>General stationary policy specified by a function s → a </p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/RiskAverseRL/MDPs.jl/blob/cf2fe8cd370a7959b5b827ea4fc5a2b977c004fa/src/simulation.jl#L78">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="MDPs.Policy" href="#MDPs.Policy"><code>MDPs.Policy</code></a> — <span class="docstring-category">Type</span></header><section><div><p>Defines a policy, whether a stationary deterministic, or randomized, Markov, or even history-dependent. The policy should support functions <code>make_internal</code>, <code>append_history</code> that initialize and update the internal state. The function <code>take_action</code> then chooses an action to take.</p><p>It is important that the tracker keeps their own internal states in order to be thread safe.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/RiskAverseRL/MDPs.jl/blob/cf2fe8cd370a7959b5b827ea4fc5a2b977c004fa/src/simulation.jl#L2-L9">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="MDPs.TabPolicyMD" href="#MDPs.TabPolicyMD"><code>MDPs.TabPolicyMD</code></a> — <span class="docstring-category">Type</span></header><section><div><p>Markov deterministic policy for tabular MDPs. The policy <code>π</code> has an outer array over time steps and an inner array over states.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/RiskAverseRL/MDPs.jl/blob/cf2fe8cd370a7959b5b827ea4fc5a2b977c004fa/src/simulation.jl#L107-L110">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="MDPs.TabPolicySD" href="#MDPs.TabPolicySD"><code>MDPs.TabPolicySD</code></a> — <span class="docstring-category">Type</span></header><section><div><p>Stationary deterministic policy for tabular MDPs </p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/RiskAverseRL/MDPs.jl/blob/cf2fe8cd370a7959b5b827ea4fc5a2b977c004fa/src/simulation.jl#L100">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="MDPs.TabPolicyStationary" href="#MDPs.TabPolicyStationary"><code>MDPs.TabPolicyStationary</code></a> — <span class="docstring-category">Type</span></header><section><div><p>Generic policy for tabular MDPs </p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/RiskAverseRL/MDPs.jl/blob/cf2fe8cd370a7959b5b827ea4fc5a2b977c004fa/src/simulation.jl#L96">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="MDPs.Transition" href="#MDPs.Transition"><code>MDPs.Transition</code></a> — <span class="docstring-category">Type</span></header><section><div><p>Information about a transition from <code>state</code> to <code>nstate</code> after than an <code>action</code>. <code>time</code> is the time at which <code>nstate</code> is observed.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/RiskAverseRL/MDPs.jl/blob/cf2fe8cd370a7959b5b827ea4fc5a2b977c004fa/src/simulation.jl#L12-L14">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="MDPs.append_history" href="#MDPs.append_history"><code>MDPs.append_history</code></a> — <span class="docstring-category">Function</span></header><section><div><pre><code class="language-julia hljs">append_history(policy, internal, transition) :: internal</code></pre><p>Update the internal state for a policy by the transition information.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/RiskAverseRL/MDPs.jl/blob/cf2fe8cd370a7959b5b827ea4fc5a2b977c004fa/src/simulation.jl#L30-L34">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="MDPs.cumulative-Tuple{Matrix{&lt;:Number}, Number}" href="#MDPs.cumulative-Tuple{Matrix{&lt;:Number}, Number}"><code>MDPs.cumulative</code></a> — <span class="docstring-category">Method</span></header><section><div><pre><code class="language-julia hljs">cumulative(rewards, γ)</code></pre><p>Computes the cumulative return from rewards returned by the simulation function.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/RiskAverseRL/MDPs.jl/blob/cf2fe8cd370a7959b5b827ea4fc5a2b977c004fa/src/simulation.jl#L205-L209">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="MDPs.make_internal" href="#MDPs.make_internal"><code>MDPs.make_internal</code></a> — <span class="docstring-category">Function</span></header><section><div><pre><code class="language-julia hljs">make_internal(model, policy, state) -&gt; internal</code></pre><p>Initialize the internal state for a policy with the initial state. Returns the initial state.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/RiskAverseRL/MDPs.jl/blob/cf2fe8cd370a7959b5b827ea4fc5a2b977c004fa/src/simulation.jl#L23-L27">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="MDPs.random_π-Tuple{TabMDP}" href="#MDPs.random_π-Tuple{TabMDP}"><code>MDPs.random_π</code></a> — <span class="docstring-category">Method</span></header><section><div><pre><code class="language-julia hljs">random_π(model)</code></pre><p>Construct a random policy for a tabular MDP</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/RiskAverseRL/MDPs.jl/blob/cf2fe8cd370a7959b5b827ea4fc5a2b977c004fa/src/simulation.jl#L196-L200">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="MDPs.simulate-Union{Tuple{A}, Tuple{S}, Tuple{MDP{S, A}, Policy{S, A}, S, Integer, Integer}} where {S, A}" href="#MDPs.simulate-Union{Tuple{A}, Tuple{S}, Tuple{MDP{S, A}, Policy{S, A}, S, Integer, Integer}} where {S, A}"><code>MDPs.simulate</code></a> — <span class="docstring-category">Method</span></header><section><div><pre><code class="language-julia hljs">simulate(model, π, initial, horizon, episodes; [stationary = true])</code></pre><p>Simulate a policy <code>π</code> in a <code>model</code> and generate states and actions for the <code>horizon</code> decisions and <code>episodes</code> episodes. The initial state is <code>initial</code>.</p><p>The policy <code>π</code> can be a function, or a array, or an array of arrays depending on whether the policy is stationary, Markovian, deterministic, or randomized. When the policy is provided as a function, then the parameter <code>stationary</code> is used.</p><p>There are horizon+1 states generated in every episode including the terminal state at T+1.</p><p>The function requires that each state and action transition to a reasonable small number of next states.</p><p><strong>See Also</strong></p><p><code>cumulative</code> to compute the cumulative rewards</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/RiskAverseRL/MDPs.jl/blob/cf2fe8cd370a7959b5b827ea4fc5a2b977c004fa/src/simulation.jl#L117-L137">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="MDPs.take_action" href="#MDPs.take_action"><code>MDPs.take_action</code></a> — <span class="docstring-category">Function</span></header><section><div><pre><code class="language-julia hljs">take_action(policy, internal, state) -&gt; action</code></pre><p>Return which action to take with the <code>internal</code> state and the MDP state <code>state</code>. </p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/RiskAverseRL/MDPs.jl/blob/cf2fe8cd370a7959b5b827ea4fc5a2b977c004fa/src/simulation.jl#L37-L41">source</a></section></article><h2 id="Domains"><a class="docs-heading-anchor" href="#Domains">Domains</a><a id="Domains-1"></a><a class="docs-heading-anchor-permalink" href="#Domains" title="Permalink"></a></h2><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="MDPs.Domains.Gambler.Ruin" href="#MDPs.Domains.Gambler.Ruin"><code>MDPs.Domains.Gambler.Ruin</code></a> — <span class="docstring-category">Type</span></header><section><div><pre><code class="language-julia hljs">Ruin(win, max_capital)</code></pre><p>Gambler&#39;s ruin. Can decide how much to bet at any point in time. With some probability <code>win</code>, the bet is doubled, and with <code>1-win</code> it is lost. The reward is 1 if it achieves some terminal capital and 0 otherwise.</p><ul><li>Capital = state - 1</li><li>Bet = action - 1 </li></ul><p>Available actions are 1, ..., state - 1.</p><p>Special states: state=1 is broke and state=max_capital+1 is a terminal winning state.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/RiskAverseRL/MDPs.jl/blob/cf2fe8cd370a7959b5b827ea4fc5a2b977c004fa/src/domains/gambler.jl#L8-L21">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="MDPs.Domains.Inventory.Demand" href="#MDPs.Domains.Inventory.Demand"><code>MDPs.Domains.Inventory.Demand</code></a> — <span class="docstring-category">Type</span></header><section><div><p>Models values of demand in <code>values</code> and probabilities in <code>probabilities</code>.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/RiskAverseRL/MDPs.jl/blob/cf2fe8cd370a7959b5b827ea4fc5a2b977c004fa/src/domains/inventory.jl#L6-L8">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="MDPs.Domains.Inventory.Model" href="#MDPs.Domains.Inventory.Model"><code>MDPs.Domains.Inventory.Model</code></a> — <span class="docstring-category">Type</span></header><section><div><p>An inventory MDP problem simulator</p><p>The states and actions are 1-based integers.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/RiskAverseRL/MDPs.jl/blob/cf2fe8cd370a7959b5b827ea4fc5a2b977c004fa/src/domains/inventory.jl#L105-L109">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="MDPs.Domains.Inventory.Parameters" href="#MDPs.Domains.Inventory.Parameters"><code>MDPs.Domains.Inventory.Parameters</code></a> — <span class="docstring-category">Type</span></header><section><div><p>Parameters that define an inventory problem</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/RiskAverseRL/MDPs.jl/blob/cf2fe8cd370a7959b5b827ea4fc5a2b977c004fa/src/domains/inventory.jl#L36-L38">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="MDPs.transition-Tuple{MDPs.Domains.Inventory.Parameters, Int64, Int64, Int64}" href="#MDPs.transition-Tuple{MDPs.Domains.Inventory.Parameters, Int64, Int64, Int64}"><code>MDPs.transition</code></a> — <span class="docstring-category">Method</span></header><section><div><pre><code class="language-julia hljs">transition(params, stock, order, demand)</code></pre><p>Update the inventory value and compute the profit.</p><p>Starting with a <code>stock</code> number of items, then <code>order</code> of items arrive, after <code>demand</code> of items are sold. Sale price is collected even if it is backlogged (not beyond backlog level). Negative stock means backlog.</p><p>Stocking costs are asessed after all the orders are fulfilled. </p><p>Causes an error when the <code>order</code> is too large, but no error when the demand cannot be satisfied or backlogged.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/RiskAverseRL/MDPs.jl/blob/cf2fe8cd370a7959b5b827ea4fc5a2b977c004fa/src/domains/inventory.jl#L46-L59">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="MDPs.Domains.Machine.Replacement" href="#MDPs.Domains.Machine.Replacement"><code>MDPs.Domains.Machine.Replacement</code></a> — <span class="docstring-category">Type</span></header><section><div><p>Standard machine replacement simulator. See Figure 3 in Delage 2009 for details.</p><p>States are: 1: repair 1 2: repair 2 3 - 10: utility state</p><p>Actions: 1: Do nothing 2: Repair</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/RiskAverseRL/MDPs.jl/blob/cf2fe8cd370a7959b5b827ea4fc5a2b977c004fa/src/domains/machine.jl#L8-L20">source</a></section></article></article><nav class="docs-footer"><a class="docs-footer-nextpage" href="simulation/">Simulation »</a><div class="flexbox-break"></div><p class="footer-message">Powered by <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> and the <a href="https://julialang.org/">Julia Programming Language</a>.</p></nav></div><div class="modal" id="documenter-settings"><div class="modal-background"></div><div class="modal-card"><header class="modal-card-head"><p class="modal-card-title">Settings</p><button class="delete"></button></header><section class="modal-card-body"><p><label class="label">Theme</label><div class="select"><select id="documenter-themepicker"><option value="auto">Automatic (OS)</option><option value="documenter-light">documenter-light</option><option value="documenter-dark">documenter-dark</option><option value="catppuccin-latte">catppuccin-latte</option><option value="catppuccin-frappe">catppuccin-frappe</option><option value="catppuccin-macchiato">catppuccin-macchiato</option><option value="catppuccin-mocha">catppuccin-mocha</option></select></div></p><hr/><p>This document was generated with <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> version 1.5.0 on <span class="colophon-date" title="Tuesday 16 July 2024 18:44">Tuesday 16 July 2024</span>. Using Julia version 1.9.4.</p></section><footer class="modal-card-foot"></footer></div></div></div></body></html>
+21</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/RiskAverseRL/MDPs.jl/blob/06ff515293515d2542144ca40c108291d9f86ade/src/models/integral.jl#L95-L143">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="MDPs.make_int_mdp-Tuple{AbstractVector{&lt;:Matrix}, AbstractVector{&lt;:Array}}" href="#MDPs.make_int_mdp-Tuple{AbstractVector{&lt;:Matrix}, AbstractVector{&lt;:Array}}"><code>MDPs.make_int_mdp</code></a> — <span class="docstring-category">Method</span></header><section><div><pre><code class="language-julia hljs">make_int_mdp(Ps, rs)</code></pre><p>Build IntMDP from a list of transition probabilities <code>Ps</code> and reward vectors <code>rs</code> for each action in the MDP. If <code>rs</code> are vectors, then they are assumed to be state action rewards. If <code>rs</code> are matrixes then they are assumed to be state-action-state rewwards. Each row of the transition matrix (and the reward matrix) represents the probabilities of transitioning to next states.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/RiskAverseRL/MDPs.jl/blob/06ff515293515d2542144ca40c108291d9f86ade/src/models/integral.jl#L205-L213">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="MDPs.make_int_mdp-Tuple{TabMDP}" href="#MDPs.make_int_mdp-Tuple{TabMDP}"><code>MDPs.make_int_mdp</code></a> — <span class="docstring-category">Method</span></header><section><div><pre><code class="language-julia hljs">make_int_mdp(mdp::TabMDP, docompress = false)</code></pre><p>Transform any tabular MDP <code>mdp</code> to a numeric one. This helps to accelerate operations and value function computation. The actions are also turned into 1-based integer values.</p><p>The option <code>docompress</code> combined transitions to the same state into a single transition. This improves efficiency in risk-neutral settings, but may change the outcome in risk-averse settings.</p><p>The function adds one more state at the end which represents a catch-all terminal state</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/RiskAverseRL/MDPs.jl/blob/06ff515293515d2542144ca40c108291d9f86ade/src/models/integral.jl#L232-L244">source</a></section></article><h2 id="Objectives"><a class="docs-heading-anchor" href="#Objectives">Objectives</a><a id="Objectives-1"></a><a class="docs-heading-anchor-permalink" href="#Objectives" title="Permalink"></a></h2><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="MDPs.FiniteH" href="#MDPs.FiniteH"><code>MDPs.FiniteH</code></a> — <span class="docstring-category">Type</span></header><section><div><p>Finite-horizon discounted model. The discount factor <code>γ</code> can be in [0,1]. The optimal policy is Markov but time dependent.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/RiskAverseRL/MDPs.jl/blob/06ff515293515d2542144ca40c108291d9f86ade/src/objectives.jl#L41-L44">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="MDPs.InfiniteH" href="#MDPs.InfiniteH"><code>MDPs.InfiniteH</code></a> — <span class="docstring-category">Type</span></header><section><div><p>Inifinite-horizon discounted objective. The discount factor <code>γ</code> can be in [0,1]. The optimal policy is stationary.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/RiskAverseRL/MDPs.jl/blob/06ff515293515d2542144ca40c108291d9f86ade/src/objectives.jl#L28-L31">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="MDPs.Markov" href="#MDPs.Markov"><code>MDPs.Markov</code></a> — <span class="docstring-category">Type</span></header><section><div><p>Objective solved by a randomized Markov non-stationary policy. In other words, the solution is time-dependent.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/RiskAverseRL/MDPs.jl/blob/06ff515293515d2542144ca40c108291d9f86ade/src/objectives.jl#L6-L9">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="MDPs.MarkovDet" href="#MDPs.MarkovDet"><code>MDPs.MarkovDet</code></a> — <span class="docstring-category">Type</span></header><section><div><p>Objective solved by a deterministic Markov non-stationary policy. In other words, the solution is time-dependent.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/RiskAverseRL/MDPs.jl/blob/06ff515293515d2542144ca40c108291d9f86ade/src/objectives.jl#L17-L20">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="MDPs.Objective" href="#MDPs.Objective"><code>MDPs.Objective</code></a> — <span class="docstring-category">Type</span></header><section><div><p>Abstract objective for an MDP.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/RiskAverseRL/MDPs.jl/blob/06ff515293515d2542144ca40c108291d9f86ade/src/objectives.jl#L1-L3">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="MDPs.Stationary" href="#MDPs.Stationary"><code>MDPs.Stationary</code></a> — <span class="docstring-category">Type</span></header><section><div><p>Objective that is solved by a randomized stationary policy</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/RiskAverseRL/MDPs.jl/blob/06ff515293515d2542144ca40c108291d9f86ade/src/objectives.jl#L12-L14">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="MDPs.StationaryDet" href="#MDPs.StationaryDet"><code>MDPs.StationaryDet</code></a> — <span class="docstring-category">Type</span></header><section><div><p>Objective that is solved by a randomized stationary policy</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/RiskAverseRL/MDPs.jl/blob/06ff515293515d2542144ca40c108291d9f86ade/src/objectives.jl#L23-L25">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="MDPs.horizon" href="#MDPs.horizon"><code>MDPs.horizon</code></a> — <span class="docstring-category">Function</span></header><section><div><pre><code class="language-julia hljs">horizon(objective)</code></pre><p>Return the horizon length for <code>objective</code>.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/RiskAverseRL/MDPs.jl/blob/06ff515293515d2542144ca40c108291d9f86ade/src/objectives.jl#L56-L60">source</a></section></article><h2 id="Algorithms"><a class="docs-heading-anchor" href="#Algorithms">Algorithms</a><a id="Algorithms-1"></a><a class="docs-heading-anchor-permalink" href="#Algorithms" title="Permalink"></a></h2><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="MDPs.value_iteration" href="#MDPs.value_iteration"><code>MDPs.value_iteration</code></a> — <span class="docstring-category">Function</span></header><section><div><pre><code class="language-julia hljs">value_iteration(model, objective; [v_terminal, iterations = 1000, ϵ = 1e-3] )</code></pre><p>Compute value function and policy for a tabular MDP <code>model</code> with an objective <code>objective</code>. The time steps go from 1 to T+1, the last decision happens at time T.</p><p>The supported objectives are <code>FiniteH</code>, and <code>InfiniteH</code>. When provided with a a real number <code>γ ∈ [0,1]</code> then the objective is treated as an infinite horizon problem. </p><p><strong>Finite Horizon</strong></p><p>Use finite-horizon value iteration for a tabular MDP <code>model</code> with  a discount factor <code>γ</code> and horizon <code>T</code> (time steps <code>1</code> to <code>T+1</code>) the last decision happens at time T. Returns a vector of value functions for each time step.</p><p>The argument <code>v_terminal</code> represents the terminal value function. It should be provided as a function that maps the state id to its terminal value (at time T+1). If this value is provided, then it is used in place of 0.</p><p><strong>Infinite Horizon</strong></p><p>For a Bellman error <code>ϵ</code>, the computed value function is quaranteed to be within ϵ ⋅ γ / (1 - γ) of the optimal value function (all in terms of the L_∞ norm).</p><p>The value function is parallelized when <code>parallel</code> is true. This is also known as a Jacobi type of value iteration (as opposed to Gauss-Seidel)</p><p>Note that for the purpose of the greedy policy, minimizing the span seminorm is more efficient, but the goal of this function is also to compute the value  function.</p><p>The time steps go from 1 to T+1.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/RiskAverseRL/MDPs.jl/blob/06ff515293515d2542144ca40c108291d9f86ade/src/algorithms/valueiteration.jl#L1-L35">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="MDPs.value_iteration!" href="#MDPs.value_iteration!"><code>MDPs.value_iteration!</code></a> — <span class="docstring-category">Function</span></header><section><div><pre><code class="language-julia hljs">value_iteration!(v, π, model, objective; [v_terminal] )</code></pre><p>Run value iteration using the provided <code>v</code> and <code>π</code> storage for the value function and the policy. See <code>value_iteration</code> for more details.</p><p>Only support <code>FiniteH</code> objective. </p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/RiskAverseRL/MDPs.jl/blob/06ff515293515d2542144ca40c108291d9f86ade/src/algorithms/valueiteration.jl#L39-L46">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="MDPs.mrp!-Tuple{AbstractMatrix{&lt;:Real}, AbstractVector{&lt;:Real}, TabMDP, AbstractVector{&lt;:Integer}}" href="#MDPs.mrp!-Tuple{AbstractMatrix{&lt;:Real}, AbstractVector{&lt;:Real}, TabMDP, AbstractVector{&lt;:Integer}}"><code>MDPs.mrp!</code></a> — <span class="docstring-category">Method</span></header><section><div><pre><code class="language-julia hljs">mrp!(P_π, r_π, model, π)</code></pre><p>Save the transition matrix <code>P_π</code> and reward vector <code>r_π</code> for the  MDP <code>model</code> and policy <code>π</code>. Also supports terminal states.</p><p>Does not support duplicate entries in transition probabilities.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/RiskAverseRL/MDPs.jl/blob/06ff515293515d2542144ca40c108291d9f86ade/src/algorithms/mrp.jl#L6-L13">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="MDPs.mrp-Tuple{TabMDP, AbstractVector{&lt;:Integer}}" href="#MDPs.mrp-Tuple{TabMDP, AbstractVector{&lt;:Integer}}"><code>MDPs.mrp</code></a> — <span class="docstring-category">Method</span></header><section><div><pre><code class="language-julia hljs">mrp(model, π)</code></pre><p>Compute the transition matrix <code>P_π</code> and reward vector <code>r_π</code> for the  MDP <code>model</code> and policy <code>π</code>. See mrp! for more details. </p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/RiskAverseRL/MDPs.jl/blob/06ff515293515d2542144ca40c108291d9f86ade/src/algorithms/mrp.jl#L33-L38">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="MDPs.mrp_sparse-Tuple{TabMDP, AbstractVector{Int64}}" href="#MDPs.mrp_sparse-Tuple{TabMDP, AbstractVector{Int64}}"><code>MDPs.mrp_sparse</code></a> — <span class="docstring-category">Method</span></header><section><div><pre><code class="language-julia hljs">mrp(model, π)</code></pre><p>Compute a sparse transition matrix <code>P_π</code> and reward vector <code>r_π</code> for the  MDP <code>model</code> and policy <code>π</code>.</p><p>This function does not support duplicate entries in transition probabilities.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/RiskAverseRL/MDPs.jl/blob/06ff515293515d2542144ca40c108291d9f86ade/src/algorithms/mrp.jl#L47-L54">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="MDPs.policy_iteration-Tuple{TabMDP, Real}" href="#MDPs.policy_iteration-Tuple{TabMDP, Real}"><code>MDPs.policy_iteration</code></a> — <span class="docstring-category">Method</span></header><section><div><pre><code class="language-julia hljs">policy_iteration(model, γ; [iterations=1000])</code></pre><p>Implements policy iteration for MDP <code>model</code> with a discount factor <code>γ</code>. The algorithm runs until the policy stops changing or the number of iterations is reached.</p><p>Does not support duplicate entries in transition probabilities.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/RiskAverseRL/MDPs.jl/blob/06ff515293515d2542144ca40c108291d9f86ade/src/algorithms/policyiteration.jl#L14-L21">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="MDPs.policy_iteration_sparse-Tuple{TabMDP, Real}" href="#MDPs.policy_iteration_sparse-Tuple{TabMDP, Real}"><code>MDPs.policy_iteration_sparse</code></a> — <span class="docstring-category">Method</span></header><section><div><pre><code class="language-julia hljs">policy_iteration_sparse(model, γ; iterations)</code></pre><p>Implements policy iteration for MDP <code>model</code> with a discount factor <code>γ</code>. The algorithm runs until the policy stops changing or the number of iterations is reached. The value function is computed using sparse linear algebra.</p><p>Does not support duplicate entries in transition probabilities.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/RiskAverseRL/MDPs.jl/blob/06ff515293515d2542144ca40c108291d9f86ade/src/algorithms/policyiteration.jl#L51-L59">source</a></section></article><h2 id="Value-Function-Manipulation"><a class="docs-heading-anchor" href="#Value-Function-Manipulation">Value Function Manipulation</a><a id="Value-Function-Manipulation-1"></a><a class="docs-heading-anchor-permalink" href="#Value-Function-Manipulation" title="Permalink"></a></h2><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="MDPs.make_value-Tuple{TabMDP, Markov}" href="#MDPs.make_value-Tuple{TabMDP, Markov}"><code>MDPs.make_value</code></a> — <span class="docstring-category">Method</span></header><section><div><pre><code class="language-julia hljs">make_value(model, objective)</code></pre><p>Creates an <em>undefined</em> policy and value function for the <code>model</code> and <code>objective</code>.</p><p><strong>See Also</strong></p><p><code>value_iteration!</code></p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/RiskAverseRL/MDPs.jl/blob/06ff515293515d2542144ca40c108291d9f86ade/src/valuefunction/valuefunction.jl#L1-L10">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="MDPs.bellman-Union{Tuple{A}, Tuple{S}, Tuple{MDP{S, A}, MDPs.Objective, Integer, S, Any}} where {S, A}" href="#MDPs.bellman-Union{Tuple{A}, Tuple{S}, Tuple{MDP{S, A}, MDPs.Objective, Integer, S, Any}} where {S, A}"><code>MDPs.bellman</code></a> — <span class="docstring-category">Method</span></header><section><div><pre><code class="language-julia hljs">bellman(model, obj, [t=0,] s, v)</code></pre><p>Compute the Bellman operator for state <code>s</code>, and value function <code>v</code> assuming an objective <code>obj</code>.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/RiskAverseRL/MDPs.jl/blob/06ff515293515d2542144ca40c108291d9f86ade/src/valuefunction/bellman.jl#L169-L175">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="MDPs.bellmangreedy-Union{Tuple{A}, Tuple{S}, Tuple{MDP{S, A}, MDPs.Objective, Integer, S, Any}} where {S, A}" href="#MDPs.bellmangreedy-Union{Tuple{A}, Tuple{S}, Tuple{MDP{S, A}, MDPs.Objective, Integer, S, Any}} where {S, A}"><code>MDPs.bellmangreedy</code></a> — <span class="docstring-category">Method</span></header><section><div><pre><code class="language-julia hljs">bellmangreedy(model, obj, [t=0,] s, v)</code></pre><p>Compute the Bellman operator and greedy action for state <code>s</code>, and value  function <code>v</code> assuming an objective <code>obj</code>. The optional time parameter <code>t</code> allows for time-dependent updates.</p><p>The function uses <code>qvalue</code> to compute the Bellman operator and the greedy policy.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/RiskAverseRL/MDPs.jl/blob/06ff515293515d2542144ca40c108291d9f86ade/src/valuefunction/bellman.jl#L93-L102">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="MDPs.greedy!-Tuple{Vector{Int64}, TabMDP, MDPs.Objective, Integer, AbstractVector{&lt;:Real}}" href="#MDPs.greedy!-Tuple{Vector{Int64}, TabMDP, MDPs.Objective, Integer, AbstractVector{&lt;:Real}}"><code>MDPs.greedy!</code></a> — <span class="docstring-category">Method</span></header><section><div><pre><code class="language-julia hljs">greedy!(π, model, obj, v)</code></pre><p>Update policy <code>π</code> with the greedy policy for value function <code>v</code> and MDP <code>model</code> and an objective <code>obj</code>.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/RiskAverseRL/MDPs.jl/blob/06ff515293515d2542144ca40c108291d9f86ade/src/valuefunction/bellman.jl#L136-L141">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="MDPs.greedy-Tuple{TabMDP, Stationary, AbstractVector{&lt;:Real}}" href="#MDPs.greedy-Tuple{TabMDP, Stationary, AbstractVector{&lt;:Real}}"><code>MDPs.greedy</code></a> — <span class="docstring-category">Method</span></header><section><div><pre><code class="language-julia hljs">greedy(model, obj, v)</code></pre><p>Compute the greedy action for all states and value function <code>v</code> assuming an objective <code>obj</code> and time <code>t=0</code>.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/RiskAverseRL/MDPs.jl/blob/06ff515293515d2542144ca40c108291d9f86ade/src/valuefunction/bellman.jl#L156-L162">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="MDPs.greedy-Union{Tuple{A}, Tuple{S}, Tuple{MDP{S, A}, MDPs.Objective, Integer, S, Any}} where {S, A}" href="#MDPs.greedy-Union{Tuple{A}, Tuple{S}, Tuple{MDP{S, A}, MDPs.Objective, Integer, S, Any}} where {S, A}"><code>MDPs.greedy</code></a> — <span class="docstring-category">Method</span></header><section><div><pre><code class="language-julia hljs">greedy(model, obj, [t=0,] s, v)</code></pre><p>Compute the greedy action for state <code>s</code> and value  function <code>v</code> assuming an objective <code>obj</code>.</p><p>If <code>s</code> is not provided, then computes a value function for all states. The model must support <code>states</code> function.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/RiskAverseRL/MDPs.jl/blob/06ff515293515d2542144ca40c108291d9f86ade/src/valuefunction/bellman.jl#L121-L130">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="MDPs.qvalue-Union{Tuple{A}, Tuple{S}, Tuple{MDP{S, A}, Union{FiniteH, InfiniteH}, Integer, S, A, Any}} where {S, A}" href="#MDPs.qvalue-Union{Tuple{A}, Tuple{S}, Tuple{MDP{S, A}, Union{FiniteH, InfiniteH}, Integer, S, A, Any}} where {S, A}"><code>MDPs.qvalue</code></a> — <span class="docstring-category">Method</span></header><section><div><pre><code class="language-julia hljs">qvalue(model, objective, [t=0,] s, a, v)</code></pre><p>Compute the state-action-values for state <code>s</code>, action <code>a</code>, and value function <code>v</code> for an <code>objective</code>.</p><p>There is no set representation for the value function.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/RiskAverseRL/MDPs.jl/blob/06ff515293515d2542144ca40c108291d9f86ade/src/valuefunction/bellman.jl#L55-L62">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="MDPs.qvalues!-Union{Tuple{A}, Tuple{S}, Tuple{AbstractVector{&lt;:Real}, MDP{S, A}, MDPs.Objective, Integer, S, Any}} where {S, A}" href="#MDPs.qvalues!-Union{Tuple{A}, Tuple{S}, Tuple{AbstractVector{&lt;:Real}, MDP{S, A}, MDPs.Objective, Integer, S, Any}} where {S, A}"><code>MDPs.qvalues!</code></a> — <span class="docstring-category">Method</span></header><section><div><pre><code class="language-julia hljs">qvalues!(qvalues, model, objective, [t=0,] s, v)</code></pre><p>Compute the state-action-values for state <code>s</code>, and value function <code>v</code> for the <code>objective</code>.</p><p>Saves the values to <code>qvalue</code> which should be at least as long as the number of actions. Values of elements in <code>qvalues</code> that are beyond the action count are set to <code>-Inf</code>.</p><p>See <code>qvalues</code> for more information.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/RiskAverseRL/MDPs.jl/blob/06ff515293515d2542144ca40c108291d9f86ade/src/valuefunction/bellman.jl#L27-L38">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="MDPs.qvalues-Union{Tuple{A}, Tuple{S}, Tuple{MDP{S, A}, Union{FiniteH, InfiniteH}, Integer, S, Any}} where {S, A}" href="#MDPs.qvalues-Union{Tuple{A}, Tuple{S}, Tuple{MDP{S, A}, Union{FiniteH, InfiniteH}, Integer, S, Any}} where {S, A}"><code>MDPs.qvalues</code></a> — <span class="docstring-category">Method</span></header><section><div><pre><code class="language-julia hljs">qvalues(model, objective, [t=0,] s, v)</code></pre><p>Compute the state-action-value for state <code>s</code>, and value function <code>v</code> for  <code>objective</code>. There is no set representation of the value function <code>v</code>.</p><p>The function is tractable only if there are a small number of actions and transitions.</p><p>The function is tractable only if there are a small number of actions and transitions.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/RiskAverseRL/MDPs.jl/blob/06ff515293515d2542144ca40c108291d9f86ade/src/valuefunction/bellman.jl#L6-L16">source</a></section></article><h2 id="Simulation"><a class="docs-heading-anchor" href="#Simulation">Simulation</a><a id="Simulation-1"></a><a class="docs-heading-anchor-permalink" href="#Simulation" title="Permalink"></a></h2><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="MDPs.FPolicyM" href="#MDPs.FPolicyM"><code>MDPs.FPolicyM</code></a> — <span class="docstring-category">Type</span></header><section><div><p>General stationary policy specified by a function s,t → a </p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/RiskAverseRL/MDPs.jl/blob/06ff515293515d2542144ca40c108291d9f86ade/src/simulation.jl#L85">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="MDPs.FPolicyS" href="#MDPs.FPolicyS"><code>MDPs.FPolicyS</code></a> — <span class="docstring-category">Type</span></header><section><div><p>General stationary policy specified by a function s → a </p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/RiskAverseRL/MDPs.jl/blob/06ff515293515d2542144ca40c108291d9f86ade/src/simulation.jl#L78">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="MDPs.Policy" href="#MDPs.Policy"><code>MDPs.Policy</code></a> — <span class="docstring-category">Type</span></header><section><div><p>Defines a policy, whether a stationary deterministic, or randomized, Markov, or even history-dependent. The policy should support functions <code>make_internal</code>, <code>append_history</code> that initialize and update the internal state. The function <code>take_action</code> then chooses an action to take.</p><p>It is important that the tracker keeps their own internal states in order to be thread safe.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/RiskAverseRL/MDPs.jl/blob/06ff515293515d2542144ca40c108291d9f86ade/src/simulation.jl#L2-L9">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="MDPs.TabPolicyMD" href="#MDPs.TabPolicyMD"><code>MDPs.TabPolicyMD</code></a> — <span class="docstring-category">Type</span></header><section><div><p>Markov deterministic policy for tabular MDPs. The policy <code>π</code> has an outer array over time steps and an inner array over states.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/RiskAverseRL/MDPs.jl/blob/06ff515293515d2542144ca40c108291d9f86ade/src/simulation.jl#L107-L110">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="MDPs.TabPolicySD" href="#MDPs.TabPolicySD"><code>MDPs.TabPolicySD</code></a> — <span class="docstring-category">Type</span></header><section><div><p>Stationary deterministic policy for tabular MDPs </p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/RiskAverseRL/MDPs.jl/blob/06ff515293515d2542144ca40c108291d9f86ade/src/simulation.jl#L100">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="MDPs.TabPolicyStationary" href="#MDPs.TabPolicyStationary"><code>MDPs.TabPolicyStationary</code></a> — <span class="docstring-category">Type</span></header><section><div><p>Generic policy for tabular MDPs </p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/RiskAverseRL/MDPs.jl/blob/06ff515293515d2542144ca40c108291d9f86ade/src/simulation.jl#L96">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="MDPs.Transition" href="#MDPs.Transition"><code>MDPs.Transition</code></a> — <span class="docstring-category">Type</span></header><section><div><p>Information about a transition from <code>state</code> to <code>nstate</code> after than an <code>action</code>. <code>time</code> is the time at which <code>nstate</code> is observed.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/RiskAverseRL/MDPs.jl/blob/06ff515293515d2542144ca40c108291d9f86ade/src/simulation.jl#L12-L14">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="MDPs.append_history" href="#MDPs.append_history"><code>MDPs.append_history</code></a> — <span class="docstring-category">Function</span></header><section><div><pre><code class="language-julia hljs">append_history(policy, internal, transition) :: internal</code></pre><p>Update the internal state for a policy by the transition information.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/RiskAverseRL/MDPs.jl/blob/06ff515293515d2542144ca40c108291d9f86ade/src/simulation.jl#L30-L34">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="MDPs.cumulative-Tuple{Matrix{&lt;:Number}, Number}" href="#MDPs.cumulative-Tuple{Matrix{&lt;:Number}, Number}"><code>MDPs.cumulative</code></a> — <span class="docstring-category">Method</span></header><section><div><pre><code class="language-julia hljs">cumulative(rewards, γ)</code></pre><p>Computes the cumulative return from rewards returned by the simulation function.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/RiskAverseRL/MDPs.jl/blob/06ff515293515d2542144ca40c108291d9f86ade/src/simulation.jl#L205-L209">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="MDPs.make_internal" href="#MDPs.make_internal"><code>MDPs.make_internal</code></a> — <span class="docstring-category">Function</span></header><section><div><pre><code class="language-julia hljs">make_internal(model, policy, state) -&gt; internal</code></pre><p>Initialize the internal state for a policy with the initial state. Returns the initial state.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/RiskAverseRL/MDPs.jl/blob/06ff515293515d2542144ca40c108291d9f86ade/src/simulation.jl#L23-L27">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="MDPs.random_π-Tuple{TabMDP}" href="#MDPs.random_π-Tuple{TabMDP}"><code>MDPs.random_π</code></a> — <span class="docstring-category">Method</span></header><section><div><pre><code class="language-julia hljs">random_π(model)</code></pre><p>Construct a random policy for a tabular MDP</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/RiskAverseRL/MDPs.jl/blob/06ff515293515d2542144ca40c108291d9f86ade/src/simulation.jl#L196-L200">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="MDPs.simulate-Union{Tuple{A}, Tuple{S}, Tuple{MDP{S, A}, Policy{S, A}, S, Integer, Integer}} where {S, A}" href="#MDPs.simulate-Union{Tuple{A}, Tuple{S}, Tuple{MDP{S, A}, Policy{S, A}, S, Integer, Integer}} where {S, A}"><code>MDPs.simulate</code></a> — <span class="docstring-category">Method</span></header><section><div><pre><code class="language-julia hljs">simulate(model, π, initial, horizon, episodes; [stationary = true])</code></pre><p>Simulate a policy <code>π</code> in a <code>model</code> and generate states and actions for the <code>horizon</code> decisions and <code>episodes</code> episodes. The initial state is <code>initial</code>.</p><p>The policy <code>π</code> can be a function, or a array, or an array of arrays depending on whether the policy is stationary, Markovian, deterministic, or randomized. When the policy is provided as a function, then the parameter <code>stationary</code> is used.</p><p>There are horizon+1 states generated in every episode including the terminal state at T+1.</p><p>The function requires that each state and action transition to a reasonable small number of next states.</p><p><strong>See Also</strong></p><p><code>cumulative</code> to compute the cumulative rewards</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/RiskAverseRL/MDPs.jl/blob/06ff515293515d2542144ca40c108291d9f86ade/src/simulation.jl#L117-L137">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="MDPs.take_action" href="#MDPs.take_action"><code>MDPs.take_action</code></a> — <span class="docstring-category">Function</span></header><section><div><pre><code class="language-julia hljs">take_action(policy, internal, state) -&gt; action</code></pre><p>Return which action to take with the <code>internal</code> state and the MDP state <code>state</code>. </p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/RiskAverseRL/MDPs.jl/blob/06ff515293515d2542144ca40c108291d9f86ade/src/simulation.jl#L37-L41">source</a></section></article><h2 id="Domains"><a class="docs-heading-anchor" href="#Domains">Domains</a><a id="Domains-1"></a><a class="docs-heading-anchor-permalink" href="#Domains" title="Permalink"></a></h2><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="MDPs.Domains.Gambler.Ruin" href="#MDPs.Domains.Gambler.Ruin"><code>MDPs.Domains.Gambler.Ruin</code></a> — <span class="docstring-category">Type</span></header><section><div><pre><code class="language-julia hljs">Ruin(win, max_capital)</code></pre><p>Gambler&#39;s ruin. Can decide how much to bet at any point in time. With some probability <code>win</code>, the bet is doubled, and with <code>1-win</code> it is lost. The reward is 1 if it achieves some terminal capital and 0 otherwise.</p><ul><li>Capital = state - 1</li><li>Bet = action - 1 </li></ul><p>Available actions are 1, ..., state - 1.</p><p>Special states: state=1 is broke and state=max_capital+1 is a terminal winning state.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/RiskAverseRL/MDPs.jl/blob/06ff515293515d2542144ca40c108291d9f86ade/src/domains/gambler.jl#L8-L21">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="MDPs.Domains.Inventory.Demand" href="#MDPs.Domains.Inventory.Demand"><code>MDPs.Domains.Inventory.Demand</code></a> — <span class="docstring-category">Type</span></header><section><div><p>Models values of demand in <code>values</code> and probabilities in <code>probabilities</code>.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/RiskAverseRL/MDPs.jl/blob/06ff515293515d2542144ca40c108291d9f86ade/src/domains/inventory.jl#L6-L8">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="MDPs.Domains.Inventory.Model" href="#MDPs.Domains.Inventory.Model"><code>MDPs.Domains.Inventory.Model</code></a> — <span class="docstring-category">Type</span></header><section><div><p>An inventory MDP problem simulator</p><p>The states and actions are 1-based integers.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/RiskAverseRL/MDPs.jl/blob/06ff515293515d2542144ca40c108291d9f86ade/src/domains/inventory.jl#L105-L109">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="MDPs.Domains.Inventory.Parameters" href="#MDPs.Domains.Inventory.Parameters"><code>MDPs.Domains.Inventory.Parameters</code></a> — <span class="docstring-category">Type</span></header><section><div><p>Parameters that define an inventory problem</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/RiskAverseRL/MDPs.jl/blob/06ff515293515d2542144ca40c108291d9f86ade/src/domains/inventory.jl#L36-L38">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="MDPs.transition-Tuple{MDPs.Domains.Inventory.Parameters, Int64, Int64, Int64}" href="#MDPs.transition-Tuple{MDPs.Domains.Inventory.Parameters, Int64, Int64, Int64}"><code>MDPs.transition</code></a> — <span class="docstring-category">Method</span></header><section><div><pre><code class="language-julia hljs">transition(params, stock, order, demand)</code></pre><p>Update the inventory value and compute the profit.</p><p>Starting with a <code>stock</code> number of items, then <code>order</code> of items arrive, after <code>demand</code> of items are sold. Sale price is collected even if it is backlogged (not beyond backlog level). Negative stock means backlog.</p><p>Stocking costs are asessed after all the orders are fulfilled. </p><p>Causes an error when the <code>order</code> is too large, but no error when the demand cannot be satisfied or backlogged.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/RiskAverseRL/MDPs.jl/blob/06ff515293515d2542144ca40c108291d9f86ade/src/domains/inventory.jl#L46-L59">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="MDPs.Domains.Machine.Replacement" href="#MDPs.Domains.Machine.Replacement"><code>MDPs.Domains.Machine.Replacement</code></a> — <span class="docstring-category">Type</span></header><section><div><p>Standard machine replacement simulator. See Figure 3 in Delage 2009 for details.</p><p>States are: 1: repair 1 2: repair 2 3 - 10: utility state</p><p>Actions: 1: Do nothing 2: Repair</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/RiskAverseRL/MDPs.jl/blob/06ff515293515d2542144ca40c108291d9f86ade/src/domains/machine.jl#L8-L20">source</a></section></article></article><nav class="docs-footer"><a class="docs-footer-nextpage" href="simulation/">Simulation »</a><div class="flexbox-break"></div><p class="footer-message">Powered by <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> and the <a href="https://julialang.org/">Julia Programming Language</a>.</p></nav></div><div class="modal" id="documenter-settings"><div class="modal-background"></div><div class="modal-card"><header class="modal-card-head"><p class="modal-card-title">Settings</p><button class="delete"></button></header><section class="modal-card-body"><p><label class="label">Theme</label><div class="select"><select id="documenter-themepicker"><option value="auto">Automatic (OS)</option><option value="documenter-light">documenter-light</option><option value="documenter-dark">documenter-dark</option><option value="catppuccin-latte">catppuccin-latte</option><option value="catppuccin-frappe">catppuccin-frappe</option><option value="catppuccin-macchiato">catppuccin-macchiato</option><option value="catppuccin-mocha">catppuccin-mocha</option></select></div></p><hr/><p>This document was generated with <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> version 1.5.0 on <span class="colophon-date" title="Tuesday 16 July 2024 21:36">Tuesday 16 July 2024</span>. Using Julia version 1.9.4.</p></section><footer class="modal-card-foot"></footer></div></div></div></body></html>
diff --git a/previews/PR2/recipes/index.html b/previews/PR2/recipes/index.html
index 6d46edb..e9fb02c 100644
--- a/previews/PR2/recipes/index.html
+++ b/previews/PR2/recipes/index.html
@@ -40,4 +40,4 @@
 
 # output
 
-3</code></pre></article><nav class="docs-footer"><a class="docs-footer-prevpage" href="../simulation/">« Simulation</a><div class="flexbox-break"></div><p class="footer-message">Powered by <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> and the <a href="https://julialang.org/">Julia Programming Language</a>.</p></nav></div><div class="modal" id="documenter-settings"><div class="modal-background"></div><div class="modal-card"><header class="modal-card-head"><p class="modal-card-title">Settings</p><button class="delete"></button></header><section class="modal-card-body"><p><label class="label">Theme</label><div class="select"><select id="documenter-themepicker"><option value="auto">Automatic (OS)</option><option value="documenter-light">documenter-light</option><option value="documenter-dark">documenter-dark</option><option value="catppuccin-latte">catppuccin-latte</option><option value="catppuccin-frappe">catppuccin-frappe</option><option value="catppuccin-macchiato">catppuccin-macchiato</option><option value="catppuccin-mocha">catppuccin-mocha</option></select></div></p><hr/><p>This document was generated with <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> version 1.5.0 on <span class="colophon-date" title="Tuesday 16 July 2024 18:44">Tuesday 16 July 2024</span>. Using Julia version 1.9.4.</p></section><footer class="modal-card-foot"></footer></div></div></div></body></html>
+3</code></pre></article><nav class="docs-footer"><a class="docs-footer-prevpage" href="../simulation/">« Simulation</a><div class="flexbox-break"></div><p class="footer-message">Powered by <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> and the <a href="https://julialang.org/">Julia Programming Language</a>.</p></nav></div><div class="modal" id="documenter-settings"><div class="modal-background"></div><div class="modal-card"><header class="modal-card-head"><p class="modal-card-title">Settings</p><button class="delete"></button></header><section class="modal-card-body"><p><label class="label">Theme</label><div class="select"><select id="documenter-themepicker"><option value="auto">Automatic (OS)</option><option value="documenter-light">documenter-light</option><option value="documenter-dark">documenter-dark</option><option value="catppuccin-latte">catppuccin-latte</option><option value="catppuccin-frappe">catppuccin-frappe</option><option value="catppuccin-macchiato">catppuccin-macchiato</option><option value="catppuccin-mocha">catppuccin-mocha</option></select></div></p><hr/><p>This document was generated with <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> version 1.5.0 on <span class="colophon-date" title="Tuesday 16 July 2024 21:36">Tuesday 16 July 2024</span>. Using Julia version 1.9.4.</p></section><footer class="modal-card-foot"></footer></div></div></div></body></html>
diff --git a/previews/PR2/simulation/index.html b/previews/PR2/simulation/index.html
index 381c4fe..b327818 100644
--- a/previews/PR2/simulation/index.html
+++ b/previews/PR2/simulation/index.html
@@ -1,2 +1,2 @@
 <!DOCTYPE html>
-<html lang="en"><head><meta charset="UTF-8"/><meta name="viewport" content="width=device-width, initial-scale=1.0"/><title>Simulation · MDPs.jl</title><meta name="title" content="Simulation · MDPs.jl"/><meta property="og:title" content="Simulation · MDPs.jl"/><meta property="twitter:title" content="Simulation · MDPs.jl"/><meta name="description" content="Documentation for MDPs.jl."/><meta property="og:description" content="Documentation for MDPs.jl."/><meta property="twitter:description" content="Documentation for MDPs.jl."/><script data-outdated-warner src="../assets/warner.js"></script><link href="https://cdnjs.cloudflare.com/ajax/libs/lato-font/3.0.0/css/lato-font.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/juliamono/0.050/juliamono.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.2/css/fontawesome.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.2/css/solid.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.2/css/brands.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/KaTeX/0.16.8/katex.min.css" rel="stylesheet" type="text/css"/><script>documenterBaseURL=".."</script><script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.6/require.min.js" data-main="../assets/documenter.js"></script><script src="../search_index.js"></script><script src="../siteinfo.js"></script><script src="../../versions.js"></script><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../assets/themes/catppuccin-mocha.css" data-theme-name="catppuccin-mocha"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../assets/themes/catppuccin-macchiato.css" data-theme-name="catppuccin-macchiato"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../assets/themes/catppuccin-frappe.css" data-theme-name="catppuccin-frappe"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../assets/themes/catppuccin-latte.css" data-theme-name="catppuccin-latte"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../assets/themes/documenter-dark.css" data-theme-name="documenter-dark" data-theme-primary-dark/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../assets/themes/documenter-light.css" data-theme-name="documenter-light" data-theme-primary/><script src="../assets/themeswap.js"></script></head><body><div id="documenter"><nav class="docs-sidebar"><div class="docs-package-name"><span class="docs-autofit"><a href="../">MDPs.jl</a></span></div><button class="docs-search-query input is-rounded is-small is-clickable my-2 mx-auto py-1 px-2" id="documenter-search-query">Search docs (Ctrl + /)</button><ul class="docs-menu"><li><a class="tocitem" href="../">MDPs.jl: Markov Decision Processes</a></li><li class="is-active"><a class="tocitem" href>Simulation</a></li><li><a class="tocitem" href="../recipes/">Recipes</a></li></ul><div class="docs-version-selector field has-addons"><div class="control"><span class="docs-label button is-static is-size-7">Version</span></div><div class="docs-selector control is-expanded"><div class="select is-fullwidth is-size-7"><select id="documenter-version-selector"></select></div></div></div></nav><div class="docs-main"><header class="docs-navbar"><a class="docs-sidebar-button docs-navbar-link fa-solid fa-bars is-hidden-desktop" id="documenter-sidebar-button" href="#"></a><nav class="breadcrumb"><ul class="is-hidden-mobile"><li class="is-active"><a href>Simulation</a></li></ul><ul class="is-hidden-tablet"><li class="is-active"><a href>Simulation</a></li></ul></nav><div class="docs-right"><a class="docs-navbar-link" href="https://github.com/RiskAverseRL/MDPs.jl" title="View the repository on GitHub"><span class="docs-icon fa-brands"></span><span class="docs-label is-hidden-touch">GitHub</span></a><a class="docs-navbar-link" href="https://github.com/RiskAverseRL/MDPs.jl/blob/main/docs/src/simulation.md" title="Edit source on GitHub"><span class="docs-icon fa-solid"></span></a><a class="docs-settings-button docs-navbar-link fa-solid fa-gear" id="documenter-settings-button" href="#" title="Settings"></a><a class="docs-article-toggle-button fa-solid fa-chevron-up" id="documenter-article-toggle-button" href="javascript:;" title="Collapse all docstrings"></a></div></header><article class="content" id="documenter-page"><h1 id="Simulation"><a class="docs-heading-anchor" href="#Simulation">Simulation</a><a id="Simulation-1"></a><a class="docs-heading-anchor-permalink" href="#Simulation" title="Permalink"></a></h1><p>This will be more extended documentation that will discuss how to simulate policies that are history dependent.</p></article><nav class="docs-footer"><a class="docs-footer-prevpage" href="../">« MDPs.jl: Markov Decision Processes</a><a class="docs-footer-nextpage" href="../recipes/">Recipes »</a><div class="flexbox-break"></div><p class="footer-message">Powered by <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> and the <a href="https://julialang.org/">Julia Programming Language</a>.</p></nav></div><div class="modal" id="documenter-settings"><div class="modal-background"></div><div class="modal-card"><header class="modal-card-head"><p class="modal-card-title">Settings</p><button class="delete"></button></header><section class="modal-card-body"><p><label class="label">Theme</label><div class="select"><select id="documenter-themepicker"><option value="auto">Automatic (OS)</option><option value="documenter-light">documenter-light</option><option value="documenter-dark">documenter-dark</option><option value="catppuccin-latte">catppuccin-latte</option><option value="catppuccin-frappe">catppuccin-frappe</option><option value="catppuccin-macchiato">catppuccin-macchiato</option><option value="catppuccin-mocha">catppuccin-mocha</option></select></div></p><hr/><p>This document was generated with <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> version 1.5.0 on <span class="colophon-date" title="Tuesday 16 July 2024 18:44">Tuesday 16 July 2024</span>. Using Julia version 1.9.4.</p></section><footer class="modal-card-foot"></footer></div></div></div></body></html>
+<html lang="en"><head><meta charset="UTF-8"/><meta name="viewport" content="width=device-width, initial-scale=1.0"/><title>Simulation · MDPs.jl</title><meta name="title" content="Simulation · MDPs.jl"/><meta property="og:title" content="Simulation · MDPs.jl"/><meta property="twitter:title" content="Simulation · MDPs.jl"/><meta name="description" content="Documentation for MDPs.jl."/><meta property="og:description" content="Documentation for MDPs.jl."/><meta property="twitter:description" content="Documentation for MDPs.jl."/><script data-outdated-warner src="../assets/warner.js"></script><link href="https://cdnjs.cloudflare.com/ajax/libs/lato-font/3.0.0/css/lato-font.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/juliamono/0.050/juliamono.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.2/css/fontawesome.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.2/css/solid.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.2/css/brands.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/KaTeX/0.16.8/katex.min.css" rel="stylesheet" type="text/css"/><script>documenterBaseURL=".."</script><script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.6/require.min.js" data-main="../assets/documenter.js"></script><script src="../search_index.js"></script><script src="../siteinfo.js"></script><script src="../../versions.js"></script><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../assets/themes/catppuccin-mocha.css" data-theme-name="catppuccin-mocha"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../assets/themes/catppuccin-macchiato.css" data-theme-name="catppuccin-macchiato"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../assets/themes/catppuccin-frappe.css" data-theme-name="catppuccin-frappe"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../assets/themes/catppuccin-latte.css" data-theme-name="catppuccin-latte"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../assets/themes/documenter-dark.css" data-theme-name="documenter-dark" data-theme-primary-dark/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../assets/themes/documenter-light.css" data-theme-name="documenter-light" data-theme-primary/><script src="../assets/themeswap.js"></script></head><body><div id="documenter"><nav class="docs-sidebar"><div class="docs-package-name"><span class="docs-autofit"><a href="../">MDPs.jl</a></span></div><button class="docs-search-query input is-rounded is-small is-clickable my-2 mx-auto py-1 px-2" id="documenter-search-query">Search docs (Ctrl + /)</button><ul class="docs-menu"><li><a class="tocitem" href="../">MDPs.jl: Markov Decision Processes</a></li><li class="is-active"><a class="tocitem" href>Simulation</a></li><li><a class="tocitem" href="../recipes/">Recipes</a></li></ul><div class="docs-version-selector field has-addons"><div class="control"><span class="docs-label button is-static is-size-7">Version</span></div><div class="docs-selector control is-expanded"><div class="select is-fullwidth is-size-7"><select id="documenter-version-selector"></select></div></div></div></nav><div class="docs-main"><header class="docs-navbar"><a class="docs-sidebar-button docs-navbar-link fa-solid fa-bars is-hidden-desktop" id="documenter-sidebar-button" href="#"></a><nav class="breadcrumb"><ul class="is-hidden-mobile"><li class="is-active"><a href>Simulation</a></li></ul><ul class="is-hidden-tablet"><li class="is-active"><a href>Simulation</a></li></ul></nav><div class="docs-right"><a class="docs-navbar-link" href="https://github.com/RiskAverseRL/MDPs.jl" title="View the repository on GitHub"><span class="docs-icon fa-brands"></span><span class="docs-label is-hidden-touch">GitHub</span></a><a class="docs-navbar-link" href="https://github.com/RiskAverseRL/MDPs.jl/blob/main/docs/src/simulation.md" title="Edit source on GitHub"><span class="docs-icon fa-solid"></span></a><a class="docs-settings-button docs-navbar-link fa-solid fa-gear" id="documenter-settings-button" href="#" title="Settings"></a><a class="docs-article-toggle-button fa-solid fa-chevron-up" id="documenter-article-toggle-button" href="javascript:;" title="Collapse all docstrings"></a></div></header><article class="content" id="documenter-page"><h1 id="Simulation"><a class="docs-heading-anchor" href="#Simulation">Simulation</a><a id="Simulation-1"></a><a class="docs-heading-anchor-permalink" href="#Simulation" title="Permalink"></a></h1><p>This will be more extended documentation that will discuss how to simulate policies that are history dependent.</p></article><nav class="docs-footer"><a class="docs-footer-prevpage" href="../">« MDPs.jl: Markov Decision Processes</a><a class="docs-footer-nextpage" href="../recipes/">Recipes »</a><div class="flexbox-break"></div><p class="footer-message">Powered by <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> and the <a href="https://julialang.org/">Julia Programming Language</a>.</p></nav></div><div class="modal" id="documenter-settings"><div class="modal-background"></div><div class="modal-card"><header class="modal-card-head"><p class="modal-card-title">Settings</p><button class="delete"></button></header><section class="modal-card-body"><p><label class="label">Theme</label><div class="select"><select id="documenter-themepicker"><option value="auto">Automatic (OS)</option><option value="documenter-light">documenter-light</option><option value="documenter-dark">documenter-dark</option><option value="catppuccin-latte">catppuccin-latte</option><option value="catppuccin-frappe">catppuccin-frappe</option><option value="catppuccin-macchiato">catppuccin-macchiato</option><option value="catppuccin-mocha">catppuccin-mocha</option></select></div></p><hr/><p>This document was generated with <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> version 1.5.0 on <span class="colophon-date" title="Tuesday 16 July 2024 21:36">Tuesday 16 July 2024</span>. Using Julia version 1.9.4.</p></section><footer class="modal-card-foot"></footer></div></div></div></body></html>