diff --git a/docs/attacks/quantile.html b/docs/attacks/quantile.html
index 6e42fbd..c03cb29 100644
--- a/docs/attacks/quantile.html
+++ b/docs/attacks/quantile.html
@@ -180,7 +180,7 @@ <h2 id="args">Args</h2>
 The function to use to form a batch from a list of elements of <code>train_dataset</code> or <code>eval_dataset</code>. Will
 default to [<code>default_data_collator</code>] if no <code>tokenizer</code> is provided, an instance of
 [<code>DataCollatorWithPadding</code>] otherwise.
-train_dataset (<code>torch.utils.data.Dataset</code> or <code>torch.utils.data.IterableDataset</code>, <em>optional</em>):
+train_dataset (Union[<code>torch.utils.data.Dataset</code>, <code>torch.utils.data.IterableDataset</code>, <code>datasets.Dataset</code>], <em>optional</em>):
 The dataset to use for training. If it is a [<code>~datasets.Dataset</code>], columns not accepted by the
 <code>model.forward()</code> method are automatically removed.</p>
 <pre><code>Note that if it's a &lt;code&gt;torch.utils.data.IterableDataset&lt;/code&gt; with some randomization and you are training in a
@@ -189,7 +189,7 @@ <h2 id="args">Args</h2>
 manually set the seed of this &lt;code&gt;generator&lt;/code&gt; at each epoch) or have a &lt;code&gt;set\_epoch()&lt;/code&gt; method that internally
 sets the seed of the RNGs used.
 </code></pre>
-<p>eval_dataset (Union[<code>torch.utils.data.Dataset</code>, Dict[str, <code>torch.utils.data.Dataset</code>]), <em>optional</em>):
+<p>eval_dataset (Union[<code>torch.utils.data.Dataset</code>, Dict[str, <code>torch.utils.data.Dataset</code>, <code>datasets.Dataset</code>]), <em>optional</em>):
 The dataset to use for evaluation. If it is a [<code>~datasets.Dataset</code>], columns not accepted by the
 <code>model.forward()</code> method are automatically removed. If it is a dictionary, it will evaluate on each
 dataset prepending the dictionary key to the metric name.
diff --git a/docs/config.html b/docs/config.html
index fa45df3..9c1fb4f 100644
--- a/docs/config.html
+++ b/docs/config.html
@@ -172,7 +172,7 @@ <h1 class="title">Module <code>mimir.config</code></h1>
     max_substrs: Optional[int] = 20
     &#34;&#34;&#34;If full_doc, determines the maximum number of sample substrs to evaluate on&#34;&#34;&#34;
     dump_cache: Optional[bool] = False
-    &#34;Dump data to cache? Exits program after dumping&#34;
+    &#34;&#34;&#34;Dump data to cache? Exits program after dumping&#34;&#34;&#34;
     load_from_cache: Optional[bool] = False
     &#34;&#34;&#34;Load data from cache?&#34;&#34;&#34;
     load_from_hf: Optional[bool] = True
@@ -216,9 +216,9 @@ <h1 class="title">Module <code>mimir.config</code></h1>
     pre_perturb_span_length: Optional[int] = 5
     &#34;&#34;&#34;Span length for pre-perturbation&#34;&#34;&#34;
     tok_by_tok: Optional[bool] = False
-    &#34;&#34;&#34;FPRs at which to compute TPR&#34;&#34;&#34;
-    fpr_list: Optional[List[float]] = field(default_factory=lambda: [0.001, 0.01])
     &#34;&#34;&#34;Process data token-wise?&#34;&#34;&#34;
+    fpr_list: Optional[List[float]] = field(default_factory=lambda: [0.001, 0.01])
+    &#34;&#34;&#34;FPRs at which to compute TPR&#34;&#34;&#34;
     random_seed: Optional[int] = 0
     &#34;&#34;&#34;Random seed&#34;&#34;&#34;
     ref_config: Optional[ReferenceConfig] = None
@@ -400,7 +400,7 @@ <h3>Class variables</h3>
     max_substrs: Optional[int] = 20
     &#34;&#34;&#34;If full_doc, determines the maximum number of sample substrs to evaluate on&#34;&#34;&#34;
     dump_cache: Optional[bool] = False
-    &#34;Dump data to cache? Exits program after dumping&#34;
+    &#34;&#34;&#34;Dump data to cache? Exits program after dumping&#34;&#34;&#34;
     load_from_cache: Optional[bool] = False
     &#34;&#34;&#34;Load data from cache?&#34;&#34;&#34;
     load_from_hf: Optional[bool] = True
@@ -444,9 +444,9 @@ <h3>Class variables</h3>
     pre_perturb_span_length: Optional[int] = 5
     &#34;&#34;&#34;Span length for pre-perturbation&#34;&#34;&#34;
     tok_by_tok: Optional[bool] = False
-    &#34;&#34;&#34;FPRs at which to compute TPR&#34;&#34;&#34;
-    fpr_list: Optional[List[float]] = field(default_factory=lambda: [0.001, 0.01])
     &#34;&#34;&#34;Process data token-wise?&#34;&#34;&#34;
+    fpr_list: Optional[List[float]] = field(default_factory=lambda: [0.001, 0.01])
+    &#34;&#34;&#34;FPRs at which to compute TPR&#34;&#34;&#34;
     random_seed: Optional[int] = 0
     &#34;&#34;&#34;Random seed&#34;&#34;&#34;
     ref_config: Optional[ReferenceConfig] = None
@@ -539,7 +539,7 @@ <h3>Class variables</h3>
 </dd>
 <dt id="mimir.config.ExperimentConfig.fpr_list"><code class="name">var <span class="ident">fpr_list</span> : Optional[List[float]]</code></dt>
 <dd>
-<div class="desc"><p>Process data token-wise?</p></div>
+<div class="desc"><p>FPRs at which to compute TPR</p></div>
 </dd>
 <dt id="mimir.config.ExperimentConfig.full_doc"><code class="name">var <span class="ident">full_doc</span> : Optional[bool]</code></dt>
 <dd>
@@ -639,7 +639,7 @@ <h3>Class variables</h3>
 </dd>
 <dt id="mimir.config.ExperimentConfig.tok_by_tok"><code class="name">var <span class="ident">tok_by_tok</span> : Optional[bool]</code></dt>
 <dd>
-<div class="desc"><p>FPRs at which to compute TPR</p></div>
+<div class="desc"><p>Process data token-wise?</p></div>
 </dd>
 <dt id="mimir.config.ExperimentConfig.token_frequency_map"><code class="name">var <span class="ident">token_frequency_map</span> : Optional[str]</code></dt>
 <dd>