diff --git a/docs/attacks/all_attacks.html b/docs/attacks/all_attacks.html
index 2f8af53..a6a4391 100644
--- a/docs/attacks/all_attacks.html
+++ b/docs/attacks/all_attacks.html
@@ -41,6 +41,7 @@ <h1 class="title">Module <code>mimir.attacks.all_attacks</code></h1>
     REFERENCE_BASED = &#34;ref&#34; # Done
     ZLIB = &#34;zlib&#34; # Done
     MIN_K = &#34;min_k&#34; # Done
+    MIN_K_PLUS_PLUS = &#34;min_k++&#34; # Done
     NEIGHBOR = &#34;ne&#34; # Done
     GRADNORM = &#34;gradnorm&#34; # Done
     # QUANTILE = &#34;quantile&#34; # Uncomment when tested implementation is available
@@ -122,6 +123,7 @@ <h2 class="section-title" id="header-classes">Classes</h2>
     REFERENCE_BASED = &#34;ref&#34; # Done
     ZLIB = &#34;zlib&#34; # Done
     MIN_K = &#34;min_k&#34; # Done
+    MIN_K_PLUS_PLUS = &#34;min_k++&#34; # Done
     NEIGHBOR = &#34;ne&#34; # Done
     GRADNORM = &#34;gradnorm&#34; # Done
     # QUANTILE = &#34;quantile&#34; # Uncomment when tested implementation is available</code></pre>
@@ -145,6 +147,10 @@ <h3>Class variables</h3>
 <dd>
 <div class="desc"></div>
 </dd>
+<dt id="mimir.attacks.all_attacks.AllAttacks.MIN_K_PLUS_PLUS"><code class="name">var <span class="ident">MIN_K_PLUS_PLUS</span></code></dt>
+<dd>
+<div class="desc"></div>
+</dd>
 <dt id="mimir.attacks.all_attacks.AllAttacks.NEIGHBOR"><code class="name">var <span class="ident">NEIGHBOR</span></code></dt>
 <dd>
 <div class="desc"></div>
@@ -224,6 +230,7 @@ <h3>Subclasses</h3>
 <li><a title="mimir.attacks.gradnorm.GradNormAttack" href="gradnorm.html#mimir.attacks.gradnorm.GradNormAttack">GradNormAttack</a></li>
 <li><a title="mimir.attacks.loss.LOSSAttack" href="loss.html#mimir.attacks.loss.LOSSAttack">LOSSAttack</a></li>
 <li><a title="mimir.attacks.min_k.MinKProbAttack" href="min_k.html#mimir.attacks.min_k.MinKProbAttack">MinKProbAttack</a></li>
+<li><a title="mimir.attacks.min_k_plus_plus.MinKPlusPlusAttack" href="min_k_plus_plus.html#mimir.attacks.min_k_plus_plus.MinKPlusPlusAttack">MinKPlusPlusAttack</a></li>
 <li><a title="mimir.attacks.neighborhood.NeighborhoodAttack" href="neighborhood.html#mimir.attacks.neighborhood.NeighborhoodAttack">NeighborhoodAttack</a></li>
 <li><a title="mimir.attacks.quantile.QuantileAttack" href="quantile.html#mimir.attacks.quantile.QuantileAttack">QuantileAttack</a></li>
 <li><a title="mimir.attacks.reference.ReferenceAttack" href="reference.html#mimir.attacks.reference.ReferenceAttack">ReferenceAttack</a></li>
@@ -326,6 +333,7 @@ <h4><code><a title="mimir.attacks.all_attacks.AllAttacks" href="#mimir.attacks.a
 <li><code><a title="mimir.attacks.all_attacks.AllAttacks.GRADNORM" href="#mimir.attacks.all_attacks.AllAttacks.GRADNORM">GRADNORM</a></code></li>
 <li><code><a title="mimir.attacks.all_attacks.AllAttacks.LOSS" href="#mimir.attacks.all_attacks.AllAttacks.LOSS">LOSS</a></code></li>
 <li><code><a title="mimir.attacks.all_attacks.AllAttacks.MIN_K" href="#mimir.attacks.all_attacks.AllAttacks.MIN_K">MIN_K</a></code></li>
+<li><code><a title="mimir.attacks.all_attacks.AllAttacks.MIN_K_PLUS_PLUS" href="#mimir.attacks.all_attacks.AllAttacks.MIN_K_PLUS_PLUS">MIN_K_PLUS_PLUS</a></code></li>
 <li><code><a title="mimir.attacks.all_attacks.AllAttacks.NEIGHBOR" href="#mimir.attacks.all_attacks.AllAttacks.NEIGHBOR">NEIGHBOR</a></code></li>
 <li><code><a title="mimir.attacks.all_attacks.AllAttacks.REFERENCE_BASED" href="#mimir.attacks.all_attacks.AllAttacks.REFERENCE_BASED">REFERENCE_BASED</a></code></li>
 <li><code><a title="mimir.attacks.all_attacks.AllAttacks.ZLIB" href="#mimir.attacks.all_attacks.AllAttacks.ZLIB">ZLIB</a></code></li>
diff --git a/docs/attacks/index.html b/docs/attacks/index.html
index 66733df..7d97cc6 100644
--- a/docs/attacks/index.html
+++ b/docs/attacks/index.html
@@ -55,6 +55,10 @@ <h2 class="section-title" id="header-submodules">Sub-modules</h2>
 <dd>
 <div class="desc"><p>Min-k % Prob Attack: <a href="https://arxiv.org/pdf/2310.16789.pdf">https://arxiv.org/pdf/2310.16789.pdf</a></p></div>
 </dd>
+<dt><code class="name"><a title="mimir.attacks.min_k_plus_plus" href="min_k_plus_plus.html">mimir.attacks.min_k_plus_plus</a></code></dt>
+<dd>
+<div class="desc"><p>Min-K%++ Attack: <a href="https://github.com/zjysteven/mink-plus-plus">https://github.com/zjysteven/mink-plus-plus</a></p></div>
+</dd>
 <dt><code class="name"><a title="mimir.attacks.neighborhood" href="neighborhood.html">mimir.attacks.neighborhood</a></code></dt>
 <dd>
 <div class="desc"><p>Neighborhood-MIA attack <a href="https://arxiv.org/pdf/2305.18462.pdf">https://arxiv.org/pdf/2305.18462.pdf</a></p></div>
@@ -108,6 +112,7 @@ <h1>Index</h1>
 <li><code><a title="mimir.attacks.gradnorm" href="gradnorm.html">mimir.attacks.gradnorm</a></code></li>
 <li><code><a title="mimir.attacks.loss" href="loss.html">mimir.attacks.loss</a></code></li>
 <li><code><a title="mimir.attacks.min_k" href="min_k.html">mimir.attacks.min_k</a></code></li>
+<li><code><a title="mimir.attacks.min_k_plus_plus" href="min_k_plus_plus.html">mimir.attacks.min_k_plus_plus</a></code></li>
 <li><code><a title="mimir.attacks.neighborhood" href="neighborhood.html">mimir.attacks.neighborhood</a></code></li>
 <li><code><a title="mimir.attacks.quantile" href="quantile.html">mimir.attacks.quantile</a></code></li>
 <li><code><a title="mimir.attacks.reference" href="reference.html">mimir.attacks.reference</a></code></li>
diff --git a/docs/attacks/min_k_plus_plus.html b/docs/attacks/min_k_plus_plus.html
new file mode 100644
index 0000000..cb4bd9f
--- /dev/null
+++ b/docs/attacks/min_k_plus_plus.html
@@ -0,0 +1,165 @@
+<!doctype html>
+<html lang="en">
+<head>
+<meta charset="utf-8">
+<meta name="viewport" content="width=device-width, initial-scale=1, minimum-scale=1" />
+<meta name="generator" content="pdoc 0.10.0" />
+<title>mimir.attacks.min_k_plus_plus API documentation</title>
+<meta name="description" content="Min-K%++ Attack: https://github.com/zjysteven/mink-plus-plus" />
+<link rel="preload stylesheet" as="style" href="https://cdnjs.cloudflare.com/ajax/libs/10up-sanitize.css/11.0.1/sanitize.min.css" integrity="sha256-PK9q560IAAa6WVRRh76LtCaI8pjTJ2z11v0miyNNjrs=" crossorigin>
+<link rel="preload stylesheet" as="style" href="https://cdnjs.cloudflare.com/ajax/libs/10up-sanitize.css/11.0.1/typography.min.css" integrity="sha256-7l/o7C8jubJiy74VsKTidCy1yBkRtiUGbVkYBylBqUg=" crossorigin>
+<link rel="stylesheet preload" as="style" href="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/10.1.1/styles/github.min.css" crossorigin>
+<style>:root{--highlight-color:#fe9}.flex{display:flex !important}body{line-height:1.5em}#content{padding:20px}#sidebar{padding:30px;overflow:hidden}#sidebar > *:last-child{margin-bottom:2cm}.http-server-breadcrumbs{font-size:130%;margin:0 0 15px 0}#footer{font-size:.75em;padding:5px 30px;border-top:1px solid #ddd;text-align:right}#footer p{margin:0 0 0 1em;display:inline-block}#footer p:last-child{margin-right:30px}h1,h2,h3,h4,h5{font-weight:300}h1{font-size:2.5em;line-height:1.1em}h2{font-size:1.75em;margin:1em 0 .50em 0}h3{font-size:1.4em;margin:25px 0 10px 0}h4{margin:0;font-size:105%}h1:target,h2:target,h3:target,h4:target,h5:target,h6:target{background:var(--highlight-color);padding:.2em 0}a{color:#058;text-decoration:none;transition:color .3s ease-in-out}a:hover{color:#e82}.title code{font-weight:bold}h2[id^="header-"]{margin-top:2em}.ident{color:#900}pre code{background:#f8f8f8;font-size:.8em;line-height:1.4em}code{background:#f2f2f1;padding:1px 4px;overflow-wrap:break-word}h1 code{background:transparent}pre{background:#f8f8f8;border:0;border-top:1px solid #ccc;border-bottom:1px solid #ccc;margin:1em 0;padding:1ex}#http-server-module-list{display:flex;flex-flow:column}#http-server-module-list div{display:flex}#http-server-module-list dt{min-width:10%}#http-server-module-list p{margin-top:0}.toc ul,#index{list-style-type:none;margin:0;padding:0}#index code{background:transparent}#index h3{border-bottom:1px solid #ddd}#index ul{padding:0}#index h4{margin-top:.6em;font-weight:bold}@media (min-width:200ex){#index .two-column{column-count:2}}@media (min-width:300ex){#index .two-column{column-count:3}}dl{margin-bottom:2em}dl dl:last-child{margin-bottom:4em}dd{margin:0 0 1em 3em}#header-classes + dl > dd{margin-bottom:3em}dd dd{margin-left:2em}dd p{margin:10px 0}.name{background:#eee;font-weight:bold;font-size:.85em;padding:5px 10px;display:inline-block;min-width:40%}.name:hover{background:#e0e0e0}dt:target .name{background:var(--highlight-color)}.name > span:first-child{white-space:nowrap}.name.class > span:nth-child(2){margin-left:.4em}.inherited{color:#999;border-left:5px solid #eee;padding-left:1em}.inheritance em{font-style:normal;font-weight:bold}.desc h2{font-weight:400;font-size:1.25em}.desc h3{font-size:1em}.desc dt code{background:inherit}.source summary,.git-link-div{color:#666;text-align:right;font-weight:400;font-size:.8em;text-transform:uppercase}.source summary > *{white-space:nowrap;cursor:pointer}.git-link{color:inherit;margin-left:1em}.source pre{max-height:500px;overflow:auto;margin:0}.source pre code{font-size:12px;overflow:visible}.hlist{list-style:none}.hlist li{display:inline}.hlist li:after{content:',\2002'}.hlist li:last-child:after{content:none}.hlist .hlist{display:inline;padding-left:1em}img{max-width:100%}td{padding:0 .5em}.admonition{padding:.1em .5em;margin-bottom:1em}.admonition-title{font-weight:bold}.admonition.note,.admonition.info,.admonition.important{background:#aef}.admonition.todo,.admonition.versionadded,.admonition.tip,.admonition.hint{background:#dfd}.admonition.warning,.admonition.versionchanged,.admonition.deprecated{background:#fd4}.admonition.error,.admonition.danger,.admonition.caution{background:lightpink}</style>
+<style media="screen and (min-width: 700px)">@media screen and (min-width:700px){#sidebar{width:30%;height:100vh;overflow:auto;position:sticky;top:0}#content{width:70%;max-width:100ch;padding:3em 4em;border-left:1px solid #ddd}pre code{font-size:1em}.item .name{font-size:1em}main{display:flex;flex-direction:row-reverse;justify-content:flex-end}.toc ul ul,#index ul{padding-left:1.5em}.toc > ul > li{margin-top:.5em}}</style>
+<style media="print">@media print{#sidebar h1{page-break-before:always}.source{display:none}}@media print{*{background:transparent !important;color:#000 !important;box-shadow:none !important;text-shadow:none !important}a[href]:after{content:" (" attr(href) ")";font-size:90%}a[href][title]:after{content:none}abbr[title]:after{content:" (" attr(title) ")"}.ir a:after,a[href^="javascript:"]:after,a[href^="#"]:after{content:""}pre,blockquote{border:1px solid #999;page-break-inside:avoid}thead{display:table-header-group}tr,img{page-break-inside:avoid}img{max-width:100% !important}@page{margin:0.5cm}p,h2,h3{orphans:3;widows:3}h1,h2,h3,h4,h5,h6{page-break-after:avoid}}</style>
+<script defer src="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/10.1.1/highlight.min.js" integrity="sha256-Uv3H6lx7dJmRfRvH8TH6kJD1TSK1aFcwgx+mdg3epi8=" crossorigin></script>
+<script>window.addEventListener('DOMContentLoaded', () => hljs.initHighlighting())</script>
+</head>
+<body>
+<main>
+<article id="content">
+<header>
+<h1 class="title">Module <code>mimir.attacks.min_k_plus_plus</code></h1>
+</header>
+<section id="section-intro">
+<p>Min-K%++ Attack: <a href="https://github.com/zjysteven/mink-plus-plus">https://github.com/zjysteven/mink-plus-plus</a></p>
+<details class="source">
+<summary>
+<span>Expand source code</span>
+</summary>
+<pre><code class="python">&#34;&#34;&#34;
+    Min-K%++ Attack: https://github.com/zjysteven/mink-plus-plus
+&#34;&#34;&#34;
+import torch as ch
+import numpy as np
+from mimir.attacks.all_attacks import Attack
+from mimir.models import Model
+from mimir.config import ExperimentConfig
+
+
+class MinKPlusPlusAttack(Attack):
+
+    def __init__(self, config: ExperimentConfig, model: Model):
+        super().__init__(config, model, ref_model=None)
+
+    @ch.no_grad()
+    def _attack(self, document, probs, tokens=None, **kwargs):
+        &#34;&#34;&#34;
+        Min-K%++ Attack. 
+        Gets token probabilties, normalize with the mean and std over the whole categorical distribution,
+        and returns normalized likelihood when computed over top k% of ngrams.
+        &#34;&#34;&#34;
+        # Hyper-params specific to min-k attack
+        k: float = kwargs.get(&#34;k&#34;, 0.2)
+        all_probs = kwargs.get(&#34;all_probs&#34;, None)
+
+        # these are all log probabilites
+        target_prob, all_probs = (
+            (probs, all_probs)
+            if (probs is not None and all_probs is not None)
+            else self.model.get_probabilities(document, tokens=tokens, return_all_probs=True)
+        )
+        
+        mu = (ch.exp(all_probs) * all_probs).sum(-1)
+        sigma = (ch.exp(all_probs) * ch.square(all_probs)).sum(-1) - ch.square(mu)
+        scores = (np.array(target_prob) - mu.numpy()) / sigma.sqrt().numpy()
+        
+        return -np.mean(sorted(scores)[:int(len(scores) * k)])</code></pre>
+</details>
+</section>
+<section>
+</section>
+<section>
+</section>
+<section>
+</section>
+<section>
+<h2 class="section-title" id="header-classes">Classes</h2>
+<dl>
+<dt id="mimir.attacks.min_k_plus_plus.MinKPlusPlusAttack"><code class="flex name class">
+<span>class <span class="ident">MinKPlusPlusAttack</span></span>
+<span>(</span><span>config: <a title="mimir.config.ExperimentConfig" href="../config.html#mimir.config.ExperimentConfig">ExperimentConfig</a>, model: <a title="mimir.models.Model" href="../models.html#mimir.models.Model">Model</a>)</span>
+</code></dt>
+<dd>
+<div class="desc"></div>
+<details class="source">
+<summary>
+<span>Expand source code</span>
+</summary>
+<pre><code class="python">class MinKPlusPlusAttack(Attack):
+
+    def __init__(self, config: ExperimentConfig, model: Model):
+        super().__init__(config, model, ref_model=None)
+
+    @ch.no_grad()
+    def _attack(self, document, probs, tokens=None, **kwargs):
+        &#34;&#34;&#34;
+        Min-K%++ Attack. 
+        Gets token probabilties, normalize with the mean and std over the whole categorical distribution,
+        and returns normalized likelihood when computed over top k% of ngrams.
+        &#34;&#34;&#34;
+        # Hyper-params specific to min-k attack
+        k: float = kwargs.get(&#34;k&#34;, 0.2)
+        all_probs = kwargs.get(&#34;all_probs&#34;, None)
+
+        # these are all log probabilites
+        target_prob, all_probs = (
+            (probs, all_probs)
+            if (probs is not None and all_probs is not None)
+            else self.model.get_probabilities(document, tokens=tokens, return_all_probs=True)
+        )
+        
+        mu = (ch.exp(all_probs) * all_probs).sum(-1)
+        sigma = (ch.exp(all_probs) * ch.square(all_probs)).sum(-1) - ch.square(mu)
+        scores = (np.array(target_prob) - mu.numpy()) / sigma.sqrt().numpy()
+        
+        return -np.mean(sorted(scores)[:int(len(scores) * k)])</code></pre>
+</details>
+<h3>Ancestors</h3>
+<ul class="hlist">
+<li><a title="mimir.attacks.all_attacks.Attack" href="all_attacks.html#mimir.attacks.all_attacks.Attack">Attack</a></li>
+</ul>
+<h3>Inherited members</h3>
+<ul class="hlist">
+<li><code><b><a title="mimir.attacks.all_attacks.Attack" href="all_attacks.html#mimir.attacks.all_attacks.Attack">Attack</a></b></code>:
+<ul class="hlist">
+<li><code><a title="mimir.attacks.all_attacks.Attack.attack" href="all_attacks.html#mimir.attacks.all_attacks.Attack.attack">attack</a></code></li>
+<li><code><a title="mimir.attacks.all_attacks.Attack.load" href="all_attacks.html#mimir.attacks.all_attacks.Attack.load">load</a></code></li>
+</ul>
+</li>
+</ul>
+</dd>
+</dl>
+</section>
+</article>
+<nav id="sidebar">
+<header>
+<a class="homelink" rel="home" title="MIMIR Home" href="https://iamgroot42.github.io/mimir/">
+<img src="https://raw.githubusercontent.com/iamgroot42/mimir/8ed6886fb6df7a72f2f0f398688f48b68c5f48b0/assets/logo.png" alt="MIMIR">
+</a>
+</header>
+<h1>Index</h1>
+<div class="toc">
+<ul></ul>
+</div>
+<ul id="index">
+<li><h3>Super-module</h3>
+<ul>
+<li><code><a title="mimir.attacks" href="index.html">mimir.attacks</a></code></li>
+</ul>
+</li>
+<li><h3><a href="#header-classes">Classes</a></h3>
+<ul>
+<li>
+<h4><code><a title="mimir.attacks.min_k_plus_plus.MinKPlusPlusAttack" href="#mimir.attacks.min_k_plus_plus.MinKPlusPlusAttack">MinKPlusPlusAttack</a></code></h4>
+</li>
+</ul>
+</li>
+</ul>
+</nav>
+</main>
+<footer id="footer">
+<p>Generated by <a href="https://pdoc3.github.io/pdoc" title="pdoc: Python API documentation generator"><cite>pdoc</cite> 0.10.0</a>.</p>
+</footer>
+</body>
+</html>
\ No newline at end of file
diff --git a/docs/attacks/utils.html b/docs/attacks/utils.html
index 6ec49c8..264ee55 100644
--- a/docs/attacks/utils.html
+++ b/docs/attacks/utils.html
@@ -32,6 +32,7 @@ <h1 class="title">Module <code>mimir.attacks.utils</code></h1>
 from mimir.attacks.reference import ReferenceAttack
 from mimir.attacks.zlib import ZLIBAttack
 from mimir.attacks.min_k import MinKProbAttack
+from mimir.attacks.min_k_plus_plus import MinKPlusPlusAttack
 from mimir.attacks.neighborhood import NeighborhoodAttack
 from mimir.attacks.gradnorm import GradNormAttack
 
@@ -43,6 +44,7 @@ <h1 class="title">Module <code>mimir.attacks.utils</code></h1>
         AllAttacks.REFERENCE_BASED: ReferenceAttack,
         AllAttacks.ZLIB: ZLIBAttack,
         AllAttacks.MIN_K: MinKProbAttack,
+        AllAttacks.MIN_K_PLUS_PLUS: MinKPlusPlusAttack,
         AllAttacks.NEIGHBOR: NeighborhoodAttack,
         AllAttacks.GRADNORM: GradNormAttack,
     }
@@ -74,6 +76,7 @@ <h2 class="section-title" id="header-functions">Functions</h2>
         AllAttacks.REFERENCE_BASED: ReferenceAttack,
         AllAttacks.ZLIB: ZLIBAttack,
         AllAttacks.MIN_K: MinKProbAttack,
+        AllAttacks.MIN_K_PLUS_PLUS: MinKPlusPlusAttack,
         AllAttacks.NEIGHBOR: NeighborhoodAttack,
         AllAttacks.GRADNORM: GradNormAttack,
     }
diff --git a/docs/models.html b/docs/models.html
index 9638c98..568f229 100644
--- a/docs/models.html
+++ b/docs/models.html
@@ -99,7 +99,8 @@ <h1 class="title">Module <code>mimir.models</code></h1>
     def get_probabilities(self,
                           text: str,
                           tokens: np.ndarray = None,
-                          no_grads: bool = True):
+                          no_grads: bool = True,
+                          return_all_probs: bool = False):
         &#34;&#34;&#34;
             Get the probabilities or log-softmaxed logits for a text under the current model.
             Args:
@@ -127,7 +128,8 @@ <h1 class="title">Module <code>mimir.models</code></h1>
                     text, return_tensors=&#34;pt&#34;)
                 labels = tokenized.input_ids
 
-            all_prob = []
+            target_token_log_prob = []
+            all_token_log_prob = []
             for i in range(0, labels.size(1), self.stride):
                 begin_loc = max(i + self.stride - self.max_length, 0)
                 end_loc = min(i + self.stride, labels.size(1))
@@ -140,7 +142,7 @@ <h1 class="title">Module <code>mimir.models</code></h1>
                 if no_grads:
                     logits = logits.cpu()
                 shift_logits = logits[..., :-1, :].contiguous()
-                probabilities = torch.nn.functional.log_softmax(shift_logits, dim=-1)
+                log_probabilities = torch.nn.functional.log_softmax(shift_logits, dim=-1)
                 shift_labels = target_ids[..., 1:]
                 if no_grads:
                     shift_labels = shift_labels.cpu()
@@ -152,17 +154,23 @@ <h1 class="title">Module <code>mimir.models</code></h1>
 
                 for i, token_id in enumerate(labels_processed):
                     if token_id != -100:
-                        probability = probabilities[0, i, token_id]
+                        log_probability = log_probabilities[0, i, token_id]
                         if no_grads:
-                            probability = probability.item()
-                        all_prob.append(probability)
+                            log_probability = log_probability.item()
+                        target_token_log_prob.append(log_probability)
+                        all_token_log_prob.append(log_probabilities[0, i])
+            
             # Should be equal to # of tokens - 1 to account for shift
-            assert len(all_prob) == labels.size(1) - 1
+            assert len(target_token_log_prob) == labels.size(1) - 1
+            all_token_log_prob = torch.stack(all_token_log_prob, dim=0)
+            assert len(target_token_log_prob) == len(all_token_log_prob)
 
         if not no_grads:
-            all_prob = torch.stack(all_prob)
+            target_token_log_prob = torch.stack(target_token_log_prob)
 
-        return all_prob
+        if not return_all_probs:
+            return target_token_log_prob
+        return target_token_log_prob, all_token_log_prob
 
     @torch.no_grad()
     def get_ll(self,
@@ -1244,7 +1252,8 @@ <h3>Inherited members</h3>
     def get_probabilities(self,
                           text: str,
                           tokens: np.ndarray = None,
-                          no_grads: bool = True):
+                          no_grads: bool = True,
+                          return_all_probs: bool = False):
         &#34;&#34;&#34;
             Get the probabilities or log-softmaxed logits for a text under the current model.
             Args:
@@ -1272,7 +1281,8 @@ <h3>Inherited members</h3>
                     text, return_tensors=&#34;pt&#34;)
                 labels = tokenized.input_ids
 
-            all_prob = []
+            target_token_log_prob = []
+            all_token_log_prob = []
             for i in range(0, labels.size(1), self.stride):
                 begin_loc = max(i + self.stride - self.max_length, 0)
                 end_loc = min(i + self.stride, labels.size(1))
@@ -1285,7 +1295,7 @@ <h3>Inherited members</h3>
                 if no_grads:
                     logits = logits.cpu()
                 shift_logits = logits[..., :-1, :].contiguous()
-                probabilities = torch.nn.functional.log_softmax(shift_logits, dim=-1)
+                log_probabilities = torch.nn.functional.log_softmax(shift_logits, dim=-1)
                 shift_labels = target_ids[..., 1:]
                 if no_grads:
                     shift_labels = shift_labels.cpu()
@@ -1297,17 +1307,23 @@ <h3>Inherited members</h3>
 
                 for i, token_id in enumerate(labels_processed):
                     if token_id != -100:
-                        probability = probabilities[0, i, token_id]
+                        log_probability = log_probabilities[0, i, token_id]
                         if no_grads:
-                            probability = probability.item()
-                        all_prob.append(probability)
+                            log_probability = log_probability.item()
+                        target_token_log_prob.append(log_probability)
+                        all_token_log_prob.append(log_probabilities[0, i])
+            
             # Should be equal to # of tokens - 1 to account for shift
-            assert len(all_prob) == labels.size(1) - 1
+            assert len(target_token_log_prob) == labels.size(1) - 1
+            all_token_log_prob = torch.stack(all_token_log_prob, dim=0)
+            assert len(target_token_log_prob) == len(all_token_log_prob)
 
         if not no_grads:
-            all_prob = torch.stack(all_prob)
+            target_token_log_prob = torch.stack(target_token_log_prob)
 
-        return all_prob
+        if not return_all_probs:
+            return target_token_log_prob
+        return target_token_log_prob, all_token_log_prob
 
     @torch.no_grad()
     def get_ll(self,
@@ -1501,7 +1517,7 @@ <h2 id="args">Args</h2>
 </details>
 </dd>
 <dt id="mimir.models.Model.get_probabilities"><code class="name flex">
-<span>def <span class="ident">get_probabilities</span></span>(<span>self, text: str, tokens: numpy.ndarray = None, no_grads: bool = True)</span>
+<span>def <span class="ident">get_probabilities</span></span>(<span>self, text: str, tokens: numpy.ndarray = None, no_grads: bool = True, return_all_probs: bool = False)</span>
 </code></dt>
 <dd>
 <div class="desc"><p>Get the probabilities or log-softmaxed logits for a text under the current model.</p>
@@ -1530,7 +1546,8 @@ <h2 id="returns">Returns</h2>
 <pre><code class="python">def get_probabilities(self,
                       text: str,
                       tokens: np.ndarray = None,
-                      no_grads: bool = True):
+                      no_grads: bool = True,
+                      return_all_probs: bool = False):
     &#34;&#34;&#34;
         Get the probabilities or log-softmaxed logits for a text under the current model.
         Args:
@@ -1558,7 +1575,8 @@ <h2 id="returns">Returns</h2>
                 text, return_tensors=&#34;pt&#34;)
             labels = tokenized.input_ids
 
-        all_prob = []
+        target_token_log_prob = []
+        all_token_log_prob = []
         for i in range(0, labels.size(1), self.stride):
             begin_loc = max(i + self.stride - self.max_length, 0)
             end_loc = min(i + self.stride, labels.size(1))
@@ -1571,7 +1589,7 @@ <h2 id="returns">Returns</h2>
             if no_grads:
                 logits = logits.cpu()
             shift_logits = logits[..., :-1, :].contiguous()
-            probabilities = torch.nn.functional.log_softmax(shift_logits, dim=-1)
+            log_probabilities = torch.nn.functional.log_softmax(shift_logits, dim=-1)
             shift_labels = target_ids[..., 1:]
             if no_grads:
                 shift_labels = shift_labels.cpu()
@@ -1583,17 +1601,23 @@ <h2 id="returns">Returns</h2>
 
             for i, token_id in enumerate(labels_processed):
                 if token_id != -100:
-                    probability = probabilities[0, i, token_id]
+                    log_probability = log_probabilities[0, i, token_id]
                     if no_grads:
-                        probability = probability.item()
-                    all_prob.append(probability)
+                        log_probability = log_probability.item()
+                    target_token_log_prob.append(log_probability)
+                    all_token_log_prob.append(log_probabilities[0, i])
+        
         # Should be equal to # of tokens - 1 to account for shift
-        assert len(all_prob) == labels.size(1) - 1
+        assert len(target_token_log_prob) == labels.size(1) - 1
+        all_token_log_prob = torch.stack(all_token_log_prob, dim=0)
+        assert len(target_token_log_prob) == len(all_token_log_prob)
 
     if not no_grads:
-        all_prob = torch.stack(all_prob)
+        target_token_log_prob = torch.stack(target_token_log_prob)
 
-    return all_prob</code></pre>
+    if not return_all_probs:
+        return target_token_log_prob
+    return target_token_log_prob, all_token_log_prob</code></pre>
 </details>
 </dd>
 <dt id="mimir.models.Model.load"><code class="name flex">