diff --git a/docs/attacks/loss.html b/docs/attacks/loss.html
index 81718cc..f9007d2 100644
--- a/docs/attacks/loss.html
+++ b/docs/attacks/loss.html
@@ -46,7 +46,7 @@ <h1 class="title">Module <code>mimir.attacks.loss</code></h1>
         &#34;&#34;&#34;
             LOSS-score. Use log-likelihood from model.
         &#34;&#34;&#34;
-        return self.model.get_ll(document, probs=probs, tokens=tokens)</code></pre>
+        return self.target_model.get_ll(document, probs=probs, tokens=tokens)</code></pre>
 </details>
 </section>
 <section>
@@ -78,7 +78,7 @@ <h2 class="section-title" id="header-classes">Classes</h2>
         &#34;&#34;&#34;
             LOSS-score. Use log-likelihood from model.
         &#34;&#34;&#34;
-        return self.model.get_ll(document, probs=probs, tokens=tokens)</code></pre>
+        return self.target_model.get_ll(document, probs=probs, tokens=tokens)</code></pre>
 </details>
 <h3>Ancestors</h3>
 <ul class="hlist">
diff --git a/docs/attacks/min_k.html b/docs/attacks/min_k.html
index 0c67d1d..06545d8 100644
--- a/docs/attacks/min_k.html
+++ b/docs/attacks/min_k.html
@@ -45,7 +45,7 @@ <h1 class="title">Module <code>mimir.attacks.min_k</code></h1>
     @ch.no_grad()
     def _attack(self, document, probs, tokens=None, **kwargs):
         &#34;&#34;&#34;
-        Min-k % Prob Attack. Gets model praobbilities and returns likelihood when computed over top k% of ngrams.
+        Min-k % Prob Attack. Gets model probabilities and returns likelihood when computed over top k% of ngrams.
         &#34;&#34;&#34;
         # Hyper-params specific to min-k attack
         k: float = kwargs.get(&#34;k&#34;, 0.2)
@@ -55,7 +55,7 @@ <h1 class="title">Module <code>mimir.attacks.min_k</code></h1>
         all_prob = (
             probs
             if probs is not None
-            else self.model.get_probabilities(document, tokens=tokens)
+            else self.target_model.get_probabilities(document, tokens=tokens)
         )
         # iterate through probabilities by ngram defined by window size at given stride
         ngram_probs = []
@@ -94,7 +94,7 @@ <h2 class="section-title" id="header-classes">Classes</h2>
     @ch.no_grad()
     def _attack(self, document, probs, tokens=None, **kwargs):
         &#34;&#34;&#34;
-        Min-k % Prob Attack. Gets model praobbilities and returns likelihood when computed over top k% of ngrams.
+        Min-k % Prob Attack. Gets model probabilities and returns likelihood when computed over top k% of ngrams.
         &#34;&#34;&#34;
         # Hyper-params specific to min-k attack
         k: float = kwargs.get(&#34;k&#34;, 0.2)
@@ -104,7 +104,7 @@ <h2 class="section-title" id="header-classes">Classes</h2>
         all_prob = (
             probs
             if probs is not None
-            else self.model.get_probabilities(document, tokens=tokens)
+            else self.target_model.get_probabilities(document, tokens=tokens)
         )
         # iterate through probabilities by ngram defined by window size at given stride
         ngram_probs = []
diff --git a/docs/attacks/neighborhood.html b/docs/attacks/neighborhood.html
index db0624b..90cb325 100644
--- a/docs/attacks/neighborhood.html
+++ b/docs/attacks/neighborhood.html
@@ -619,7 +619,7 @@ <h2 class="section-title" id="header-classes">Classes</h2>
 </code></dt>
 <dd>
 <div class="desc"><p>Base class (for LLMs).</p>
-<p>Initializes internal Module state, shared by both nn.Module and ScriptModule.</p></div>
+<p>Initialize internal Module state, shared by both nn.Module and ScriptModule.</p></div>
 <details class="source">
 <summary>
 <span>Expand source code</span>
@@ -1074,7 +1074,7 @@ <h3>Inherited members</h3>
 </code></dt>
 <dd>
 <div class="desc"><p>Base class (for LLMs).</p>
-<p>Initializes internal Module state, shared by both nn.Module and ScriptModule.</p></div>
+<p>Initialize internal Module state, shared by both nn.Module and ScriptModule.</p></div>
 <details class="source">
 <summary>
 <span>Expand source code</span>
@@ -1367,7 +1367,7 @@ <h3>Inherited members</h3>
 </code></dt>
 <dd>
 <div class="desc"><p>Base class (for LLMs).</p>
-<p>Initializes internal Module state, shared by both nn.Module and ScriptModule.</p></div>
+<p>Initialize internal Module state, shared by both nn.Module and ScriptModule.</p></div>
 <details class="source">
 <summary>
 <span>Expand source code</span>
diff --git a/docs/attacks/quantile.html b/docs/attacks/quantile.html
index f4cde95..817ee02 100644
--- a/docs/attacks/quantile.html
+++ b/docs/attacks/quantile.html
@@ -121,7 +121,7 @@ <h1 class="title">Module <code>mimir.attacks.quantile</code></h1>
         # Step 1: Use non-member dataset, collect confidence scores for correct label.
         # Get likelihood scores from target model for known_non_members
         # Note that these non-members should be different from the ones in testing
-        scores = [self.model.get_ll(x) for x in known_non_members]
+        scores = [self.target_model.get_ll(x) for x in known_non_members]
         # Construct a dataset out of this to be used in Huggingface, with
         # &#34;text&#34; containing the actual data, and &#34;labels&#34; containing the scores
         dataset = Dataset.from_dict({&#34;text&#34;: known_non_members, &#34;labels&#34;: scores})
@@ -133,7 +133,7 @@ <h1 class="title">Module <code>mimir.attacks.quantile</code></h1>
         # Step 3: Test by checking if member: score is higher than output of quantile regression model.
 
         # Get likelihood score from target model for doc
-        ll = self.model.get_ll(document)
+        ll = self.target_model.get_ll(document)
 
         # Return ll - quantile_model(doc)
         tokenized = self.ref_model.tokenizer(document, return_tensors=&#34;pt&#34;)
@@ -361,7 +361,7 @@ <h3>Methods</h3>
         # Step 1: Use non-member dataset, collect confidence scores for correct label.
         # Get likelihood scores from target model for known_non_members
         # Note that these non-members should be different from the ones in testing
-        scores = [self.model.get_ll(x) for x in known_non_members]
+        scores = [self.target_model.get_ll(x) for x in known_non_members]
         # Construct a dataset out of this to be used in Huggingface, with
         # &#34;text&#34; containing the actual data, and &#34;labels&#34; containing the scores
         dataset = Dataset.from_dict({&#34;text&#34;: known_non_members, &#34;labels&#34;: scores})
@@ -373,7 +373,7 @@ <h3>Methods</h3>
         # Step 3: Test by checking if member: score is higher than output of quantile regression model.
 
         # Get likelihood score from target model for doc
-        ll = self.model.get_ll(document)
+        ll = self.target_model.get_ll(document)
 
         # Return ll - quantile_model(doc)
         tokenized = self.ref_model.tokenizer(document, return_tensors=&#34;pt&#34;)
@@ -413,7 +413,7 @@ <h3>Methods</h3>
     # Step 1: Use non-member dataset, collect confidence scores for correct label.
     # Get likelihood scores from target model for known_non_members
     # Note that these non-members should be different from the ones in testing
-    scores = [self.model.get_ll(x) for x in known_non_members]
+    scores = [self.target_model.get_ll(x) for x in known_non_members]
     # Construct a dataset out of this to be used in Huggingface, with
     # &#34;text&#34; containing the actual data, and &#34;labels&#34; containing the scores
     dataset = Dataset.from_dict({&#34;text&#34;: known_non_members, &#34;labels&#34;: scores})
diff --git a/docs/attacks/reference.html b/docs/attacks/reference.html
index e2009e8..fdc20a8 100644
--- a/docs/attacks/reference.html
+++ b/docs/attacks/reference.html
@@ -50,7 +50,7 @@ <h1 class="title">Module <code>mimir.attacks.reference</code></h1>
         &#34;&#34;&#34;
         loss = kwargs.get(&#39;loss&#39;, None)
         if loss is None:
-            loss = self.model.get_ll(document, probs=probs, tokens=tokens)
+            loss = self.target_model.get_ll(document, probs=probs, tokens=tokens)
         ref_loss = self.ref_model.get_ll(document, probs=probs, tokens=tokens)
         return loss - ref_loss</code></pre>
 </details>
@@ -89,7 +89,7 @@ <h2 class="section-title" id="header-classes">Classes</h2>
         &#34;&#34;&#34;
         loss = kwargs.get(&#39;loss&#39;, None)
         if loss is None:
-            loss = self.model.get_ll(document, probs=probs, tokens=tokens)
+            loss = self.target_model.get_ll(document, probs=probs, tokens=tokens)
         ref_loss = self.ref_model.get_ll(document, probs=probs, tokens=tokens)
         return loss - ref_loss</code></pre>
 </details>
diff --git a/docs/attacks/zlib.html b/docs/attacks/zlib.html
index e6acfc9..aa463d0 100644
--- a/docs/attacks/zlib.html
+++ b/docs/attacks/zlib.html
@@ -59,7 +59,7 @@ <h1 class="title">Module <code>mimir.attacks.zlib</code></h1>
         &#34;&#34;&#34;
         loss = kwargs.get(&#34;loss&#34;, None)
         if loss is None:
-            loss = self.model.get_ll(document, probs=probs, tokens=tokens)
+            loss = self.target_model.get_ll(document, probs=probs, tokens=tokens)
         zlib_entropy = len(zlib.compress(bytes(document, &#34;utf-8&#34;)))
         return loss / zlib_entropy</code></pre>
 </details>
@@ -103,7 +103,7 @@ <h2 class="section-title" id="header-classes">Classes</h2>
         &#34;&#34;&#34;
         loss = kwargs.get(&#34;loss&#34;, None)
         if loss is None:
-            loss = self.model.get_ll(document, probs=probs, tokens=tokens)
+            loss = self.target_model.get_ll(document, probs=probs, tokens=tokens)
         zlib_entropy = len(zlib.compress(bytes(document, &#34;utf-8&#34;)))
         return loss / zlib_entropy</code></pre>
 </details>
diff --git a/docs/models.html b/docs/models.html
index 738eed2..b46f2ec 100644
--- a/docs/models.html
+++ b/docs/models.html
@@ -96,10 +96,10 @@ <h1 class="title">Module <code>mimir.models</code></h1>
             pass
         print(f&#39;DONE ({time.time() - start:.2f}s)&#39;)
 
-    @torch.no_grad()
     def get_probabilities(self,
                           text: str,
-                          tokens: np.ndarray = None):
+                          tokens: np.ndarray = None,
+                          no_grads: bool = True):
         &#34;&#34;&#34;
             Get the probabilities or log-softmaxed logits for a text under the current model.
             Args:
@@ -113,45 +113,50 @@ <h1 class="title">Module <code>mimir.models</code></h1>
             Returns:
                 list: A list of probabilities.
         &#34;&#34;&#34;
-        if self.device is None or self.name is None:
-            raise ValueError(&#34;Please set self.device and self.name in child class&#34;)
+        with torch.set_grad_enabled(not no_grads):
+            if self.device is None or self.name is None:
+                raise ValueError(&#34;Please set self.device and self.name in child class&#34;)
 
-        if tokens is not None:
-            labels = torch.from_numpy(tokens.astype(np.int64)).type(torch.LongTensor)
-            if labels.shape[0] != 1:
-                # expand first dimension
-                labels = labels.unsqueeze(0)
-        else:
-            tokenized = self.tokenizer(
-                text, return_tensors=&#34;pt&#34;)
-            labels = tokenized.input_ids
+            if tokens is not None:
+                labels = torch.from_numpy(tokens.astype(np.int64)).type(torch.LongTensor)
+                if labels.shape[0] != 1:
+                    # expand first dimension
+                    labels = labels.unsqueeze(0)
+            else:
+                tokenized = self.tokenizer(
+                    text, return_tensors=&#34;pt&#34;)
+                labels = tokenized.input_ids
 
-        all_prob = []
-        for i in range(0, labels.size(1), self.stride):
-            begin_loc = max(i + self.stride - self.max_length, 0)
-            end_loc = min(i + self.stride, labels.size(1))
-            trg_len = end_loc - i  # may be different from stride on last loop
-            input_ids = labels[:, begin_loc:end_loc].to(self.device)
-            target_ids = input_ids.clone()
-            target_ids[:, :-trg_len] = -100
+            all_prob = []
+            for i in range(0, labels.size(1), self.stride):
+                begin_loc = max(i + self.stride - self.max_length, 0)
+                end_loc = min(i + self.stride, labels.size(1))
+                trg_len = end_loc - i  # may be different from stride on last loop
+                input_ids = labels[:, begin_loc:end_loc].to(self.device)
+                target_ids = input_ids.clone()
+                target_ids[:, :-trg_len] = -100
 
-            logits = self.model(input_ids, labels=target_ids).logits.cpu()
-            shift_logits = logits[..., :-1, :].contiguous()
-            probabilities = torch.nn.functional.log_softmax(shift_logits, dim=-1)
-            shift_labels = target_ids[..., 1:].cpu().contiguous()
-            labels_processed = shift_labels[0]
+                logits = self.model(input_ids, labels=target_ids).logits.cpu()
+                shift_logits = logits[..., :-1, :].contiguous()
+                probabilities = torch.nn.functional.log_softmax(shift_logits, dim=-1)
+                shift_labels = target_ids[..., 1:].cpu().contiguous()
+                labels_processed = shift_labels[0]
 
-            del input_ids
-            del target_ids
+                del input_ids
+                del target_ids
 
-            for i, token_id in enumerate(labels_processed):
-                if token_id != -100:
-                    probability = probabilities[0, i, token_id].item()
-                    all_prob.append(probability)
-        # Should be equal to # of tokens - 1 to account for shift
-        assert len(all_prob) == labels.size(1) - 1
+                for i, token_id in enumerate(labels_processed):
+                    if token_id != -100:
+                        probability = probabilities[0, i, token_id]
+                        if no_grads:
+                            probability = probability.item()
+                        all_prob.append(probability)
+            # Should be equal to # of tokens - 1 to account for shift
+            assert len(all_prob) == labels.size(1) - 1
 
-        return all_prob
+            if no_grads:
+                return all_prob
+            return torch.tensor(all_prob)
 
     @torch.no_grad()
     def get_ll(self,
@@ -649,7 +654,7 @@ <h2 class="section-title" id="header-classes">Classes</h2>
 </code></dt>
 <dd>
 <div class="desc"><p>Generic LM- used most often for target model</p>
-<p>Initializes internal Module state, shared by both nn.Module and ScriptModule.</p></div>
+<p>Initialize internal Module state, shared by both nn.Module and ScriptModule.</p></div>
 <details class="source">
 <summary>
 <span>Expand source code</span>
@@ -1177,7 +1182,7 @@ <h3>Inherited members</h3>
 </code></dt>
 <dd>
 <div class="desc"><p>Base class (for LLMs).</p>
-<p>Initializes internal Module state, shared by both nn.Module and ScriptModule.</p></div>
+<p>Initialize internal Module state, shared by both nn.Module and ScriptModule.</p></div>
 <details class="source">
 <summary>
 <span>Expand source code</span>
@@ -1230,10 +1235,10 @@ <h3>Inherited members</h3>
             pass
         print(f&#39;DONE ({time.time() - start:.2f}s)&#39;)
 
-    @torch.no_grad()
     def get_probabilities(self,
                           text: str,
-                          tokens: np.ndarray = None):
+                          tokens: np.ndarray = None,
+                          no_grads: bool = True):
         &#34;&#34;&#34;
             Get the probabilities or log-softmaxed logits for a text under the current model.
             Args:
@@ -1247,45 +1252,50 @@ <h3>Inherited members</h3>
             Returns:
                 list: A list of probabilities.
         &#34;&#34;&#34;
-        if self.device is None or self.name is None:
-            raise ValueError(&#34;Please set self.device and self.name in child class&#34;)
+        with torch.set_grad_enabled(not no_grads):
+            if self.device is None or self.name is None:
+                raise ValueError(&#34;Please set self.device and self.name in child class&#34;)
 
-        if tokens is not None:
-            labels = torch.from_numpy(tokens.astype(np.int64)).type(torch.LongTensor)
-            if labels.shape[0] != 1:
-                # expand first dimension
-                labels = labels.unsqueeze(0)
-        else:
-            tokenized = self.tokenizer(
-                text, return_tensors=&#34;pt&#34;)
-            labels = tokenized.input_ids
+            if tokens is not None:
+                labels = torch.from_numpy(tokens.astype(np.int64)).type(torch.LongTensor)
+                if labels.shape[0] != 1:
+                    # expand first dimension
+                    labels = labels.unsqueeze(0)
+            else:
+                tokenized = self.tokenizer(
+                    text, return_tensors=&#34;pt&#34;)
+                labels = tokenized.input_ids
 
-        all_prob = []
-        for i in range(0, labels.size(1), self.stride):
-            begin_loc = max(i + self.stride - self.max_length, 0)
-            end_loc = min(i + self.stride, labels.size(1))
-            trg_len = end_loc - i  # may be different from stride on last loop
-            input_ids = labels[:, begin_loc:end_loc].to(self.device)
-            target_ids = input_ids.clone()
-            target_ids[:, :-trg_len] = -100
+            all_prob = []
+            for i in range(0, labels.size(1), self.stride):
+                begin_loc = max(i + self.stride - self.max_length, 0)
+                end_loc = min(i + self.stride, labels.size(1))
+                trg_len = end_loc - i  # may be different from stride on last loop
+                input_ids = labels[:, begin_loc:end_loc].to(self.device)
+                target_ids = input_ids.clone()
+                target_ids[:, :-trg_len] = -100
 
-            logits = self.model(input_ids, labels=target_ids).logits.cpu()
-            shift_logits = logits[..., :-1, :].contiguous()
-            probabilities = torch.nn.functional.log_softmax(shift_logits, dim=-1)
-            shift_labels = target_ids[..., 1:].cpu().contiguous()
-            labels_processed = shift_labels[0]
+                logits = self.model(input_ids, labels=target_ids).logits.cpu()
+                shift_logits = logits[..., :-1, :].contiguous()
+                probabilities = torch.nn.functional.log_softmax(shift_logits, dim=-1)
+                shift_labels = target_ids[..., 1:].cpu().contiguous()
+                labels_processed = shift_labels[0]
 
-            del input_ids
-            del target_ids
+                del input_ids
+                del target_ids
 
-            for i, token_id in enumerate(labels_processed):
-                if token_id != -100:
-                    probability = probabilities[0, i, token_id].item()
-                    all_prob.append(probability)
-        # Should be equal to # of tokens - 1 to account for shift
-        assert len(all_prob) == labels.size(1) - 1
+                for i, token_id in enumerate(labels_processed):
+                    if token_id != -100:
+                        probability = probabilities[0, i, token_id]
+                        if no_grads:
+                            probability = probability.item()
+                        all_prob.append(probability)
+            # Should be equal to # of tokens - 1 to account for shift
+            assert len(all_prob) == labels.size(1) - 1
 
-        return all_prob
+            if no_grads:
+                return all_prob
+            return torch.tensor(all_prob)
 
     @torch.no_grad()
     def get_ll(self,
@@ -1412,7 +1422,7 @@ <h3>Methods</h3>
 <span>def <span class="ident">forward</span></span>(<span>self, *input: Any) ‑> None</span>
 </code></dt>
 <dd>
-<div class="desc"><p>Defines the computation performed at every call.</p>
+<div class="desc"><p>Define the computation performed at every call.</p>
 <p>Should be overridden by all subclasses.</p>
 <div class="admonition note">
 <p class="admonition-title">Note</p>
@@ -1426,7 +1436,7 @@ <h3>Methods</h3>
 <span>Expand source code</span>
 </summary>
 <pre><code class="python">def _forward_unimplemented(self, *input: Any) -&gt; None:
-    r&#34;&#34;&#34;Defines the computation performed at every call.
+    r&#34;&#34;&#34;Define the computation performed at every call.
 
     Should be overridden by all subclasses.
 
@@ -1479,7 +1489,7 @@ <h2 id="args">Args</h2>
 </details>
 </dd>
 <dt id="mimir.models.Model.get_probabilities"><code class="name flex">
-<span>def <span class="ident">get_probabilities</span></span>(<span>self, text: str, tokens: numpy.ndarray = None)</span>
+<span>def <span class="ident">get_probabilities</span></span>(<span>self, text: str, tokens: numpy.ndarray = None, no_grads: bool = True)</span>
 </code></dt>
 <dd>
 <div class="desc"><p>Get the probabilities or log-softmaxed logits for a text under the current model.</p>
@@ -1505,10 +1515,10 @@ <h2 id="returns">Returns</h2>
 <summary>
 <span>Expand source code</span>
 </summary>
-<pre><code class="python">@torch.no_grad()
-def get_probabilities(self,
+<pre><code class="python">def get_probabilities(self,
                       text: str,
-                      tokens: np.ndarray = None):
+                      tokens: np.ndarray = None,
+                      no_grads: bool = True):
     &#34;&#34;&#34;
         Get the probabilities or log-softmaxed logits for a text under the current model.
         Args:
@@ -1522,45 +1532,50 @@ <h2 id="returns">Returns</h2>
         Returns:
             list: A list of probabilities.
     &#34;&#34;&#34;
-    if self.device is None or self.name is None:
-        raise ValueError(&#34;Please set self.device and self.name in child class&#34;)
+    with torch.set_grad_enabled(not no_grads):
+        if self.device is None or self.name is None:
+            raise ValueError(&#34;Please set self.device and self.name in child class&#34;)
 
-    if tokens is not None:
-        labels = torch.from_numpy(tokens.astype(np.int64)).type(torch.LongTensor)
-        if labels.shape[0] != 1:
-            # expand first dimension
-            labels = labels.unsqueeze(0)
-    else:
-        tokenized = self.tokenizer(
-            text, return_tensors=&#34;pt&#34;)
-        labels = tokenized.input_ids
+        if tokens is not None:
+            labels = torch.from_numpy(tokens.astype(np.int64)).type(torch.LongTensor)
+            if labels.shape[0] != 1:
+                # expand first dimension
+                labels = labels.unsqueeze(0)
+        else:
+            tokenized = self.tokenizer(
+                text, return_tensors=&#34;pt&#34;)
+            labels = tokenized.input_ids
 
-    all_prob = []
-    for i in range(0, labels.size(1), self.stride):
-        begin_loc = max(i + self.stride - self.max_length, 0)
-        end_loc = min(i + self.stride, labels.size(1))
-        trg_len = end_loc - i  # may be different from stride on last loop
-        input_ids = labels[:, begin_loc:end_loc].to(self.device)
-        target_ids = input_ids.clone()
-        target_ids[:, :-trg_len] = -100
+        all_prob = []
+        for i in range(0, labels.size(1), self.stride):
+            begin_loc = max(i + self.stride - self.max_length, 0)
+            end_loc = min(i + self.stride, labels.size(1))
+            trg_len = end_loc - i  # may be different from stride on last loop
+            input_ids = labels[:, begin_loc:end_loc].to(self.device)
+            target_ids = input_ids.clone()
+            target_ids[:, :-trg_len] = -100
 
-        logits = self.model(input_ids, labels=target_ids).logits.cpu()
-        shift_logits = logits[..., :-1, :].contiguous()
-        probabilities = torch.nn.functional.log_softmax(shift_logits, dim=-1)
-        shift_labels = target_ids[..., 1:].cpu().contiguous()
-        labels_processed = shift_labels[0]
+            logits = self.model(input_ids, labels=target_ids).logits.cpu()
+            shift_logits = logits[..., :-1, :].contiguous()
+            probabilities = torch.nn.functional.log_softmax(shift_logits, dim=-1)
+            shift_labels = target_ids[..., 1:].cpu().contiguous()
+            labels_processed = shift_labels[0]
 
-        del input_ids
-        del target_ids
+            del input_ids
+            del target_ids
 
-        for i, token_id in enumerate(labels_processed):
-            if token_id != -100:
-                probability = probabilities[0, i, token_id].item()
-                all_prob.append(probability)
-    # Should be equal to # of tokens - 1 to account for shift
-    assert len(all_prob) == labels.size(1) - 1
+            for i, token_id in enumerate(labels_processed):
+                if token_id != -100:
+                    probability = probabilities[0, i, token_id]
+                    if no_grads:
+                        probability = probability.item()
+                    all_prob.append(probability)
+        # Should be equal to # of tokens - 1 to account for shift
+        assert len(all_prob) == labels.size(1) - 1
 
-    return all_prob</code></pre>
+        if no_grads:
+            return all_prob
+        return torch.tensor(all_prob)</code></pre>
 </details>
 </dd>
 <dt id="mimir.models.Model.load"><code class="name flex">
@@ -1729,7 +1744,7 @@ <h2 id="returns">Returns</h2>
 </code></dt>
 <dd>
 <div class="desc"><p>Wrapper for OpenAI API calls</p>
-<p>Initializes internal Module state, shared by both nn.Module and ScriptModule.</p></div>
+<p>Initialize internal Module state, shared by both nn.Module and ScriptModule.</p></div>
 <details class="source">
 <summary>
 <span>Expand source code</span>
@@ -1982,7 +1997,7 @@ <h3>Inherited members</h3>
 </code></dt>
 <dd>
 <div class="desc"><p>Wrapper for referenc model, specifically used for quantile regression</p>
-<p>Initializes internal Module state, shared by both nn.Module and ScriptModule.</p></div>
+<p>Initialize internal Module state, shared by both nn.Module and ScriptModule.</p></div>
 <details class="source">
 <summary>
 <span>Expand source code</span>
@@ -2047,7 +2062,7 @@ <h3>Inherited members</h3>
 </code></dt>
 <dd>
 <div class="desc"><p>Wrapper for reference model</p>
-<p>Initializes internal Module state, shared by both nn.Module and ScriptModule.</p></div>
+<p>Initialize internal Module state, shared by both nn.Module and ScriptModule.</p></div>
 <details class="source">
 <summary>
 <span>Expand source code</span>