Update documentation

iamgroot42 · Feb 15, 2024 · c413117 · c413117
1 parent 9d20190
commit c413117
Show file tree

Hide file tree

Showing 3 changed files with 102 additions and 83 deletions.
diff --git a/docs/data_utils.html b/docs/data_utils.html
@@ -39,6 +39,9 @@ <h1 class="title">Module <code>mimir.data_utils</code></h1>
 
 
 class Data:
+    &#34;&#34;&#34;
+    Data class to load and cache datasets.
+    &#34;&#34;&#34;
     def __init__(self, name,
                  config: ExperimentConfig,
                  presampled: str = None,
@@ -65,6 +68,9 @@ <h1 class="title">Module <code>mimir.data_utils</code></h1>
         model: str = &#34;bert&#34;,
         in_place_swap: bool = False,
     ):
+        &#34;&#34;&#34;
+        Load neighbors from cache (local or from HF)
+        &#34;&#34;&#34;
         data_split = &#34;train&#34; if train else &#34;test&#34;
         data_split += &#34;_neighbors&#34;
         filename = self._get_name_to_save() + &#34;_neighbors_{}_{}&#34;.format(
@@ -92,6 +98,9 @@ <h1 class="title">Module <code>mimir.data_utils</code></h1>
         model: str = &#34;bert&#34;,
         in_place_swap: bool = False,
     ):
+        &#34;&#34;&#34;
+        Dump neighbors to cache local cache.
+        &#34;&#34;&#34;
         data_split = &#34;train&#34; if train else &#34;test&#34;
         data_split += &#34;_neighbors&#34;
         filename = self._get_name_to_save() + &#34;_neighbors_{}_{}&#34;.format(
@@ -338,6 +347,9 @@ <h1 class="title">Module <code>mimir.data_utils</code></h1>
 
 
 def sourcename_process(x: str):
+    &#34;&#34;&#34;
+        Helper function to process source name.
+    &#34;&#34;&#34;
     return x.replace(&#34; &#34;, &#34;_&#34;).replace(&#34;-&#34;, &#34;_&#34;).lower()
 
 
@@ -398,12 +410,15 @@ <h2 class="section-title" id="header-functions">Functions</h2>
 <span>def <span class="ident">sourcename_process</span></span>(<span>x: str)</span>
 </code></dt>
 <dd>
-<div class="desc"></div>
+<div class="desc"><p>Helper function to process source name.</p></div>
 <details class="source">
 <summary>
 <span>Expand source code</span>
 </summary>
 <pre><code class="python">def sourcename_process(x: str):
+    &#34;&#34;&#34;
+        Helper function to process source name.
+    &#34;&#34;&#34;
     return x.replace(&#34; &#34;, &#34;_&#34;).replace(&#34;-&#34;, &#34;_&#34;).lower()</code></pre>
 </details>
 </dd>
@@ -476,12 +491,15 @@ <h2 class="section-title" id="header-classes">Classes</h2>
 <span>(</span><span>name, config: <a title="mimir.config.ExperimentConfig" href="config.html#mimir.config.ExperimentConfig">ExperimentConfig</a>, presampled: str = None, name_key_mapping: dict = {'the_pile': 'text', 'xsum': 'document'})</span>
 </code></dt>
 <dd>
-<div class="desc"></div>
+<div class="desc"><p>Data class to load and cache datasets.</p></div>
 <details class="source">
 <summary>
 <span>Expand source code</span>
 </summary>
 <pre><code class="python">class Data:
+    &#34;&#34;&#34;
+    Data class to load and cache datasets.
+    &#34;&#34;&#34;
     def __init__(self, name,
                  config: ExperimentConfig,
                  presampled: str = None,
@@ -508,6 +526,9 @@ <h2 class="section-title" id="header-classes">Classes</h2>
         model: str = &#34;bert&#34;,
         in_place_swap: bool = False,
     ):
+        &#34;&#34;&#34;
+        Load neighbors from cache (local or from HF)
+        &#34;&#34;&#34;
         data_split = &#34;train&#34; if train else &#34;test&#34;
         data_split += &#34;_neighbors&#34;
         filename = self._get_name_to_save() + &#34;_neighbors_{}_{}&#34;.format(
@@ -535,6 +556,9 @@ <h2 class="section-title" id="header-classes">Classes</h2>
         model: str = &#34;bert&#34;,
         in_place_swap: bool = False,
     ):
+        &#34;&#34;&#34;
+        Dump neighbors to cache local cache.
+        &#34;&#34;&#34;
         data_split = &#34;train&#34; if train else &#34;test&#34;
         data_split += &#34;_neighbors&#34;
         filename = self._get_name_to_save() + &#34;_neighbors_{}_{}&#34;.format(
@@ -739,7 +763,7 @@ <h3>Methods</h3>
 <span>def <span class="ident">dump_neighbors</span></span>(<span>self, data, train: bool, num_neighbors: int, model: str = 'bert', in_place_swap: bool = False)</span>
 </code></dt>
 <dd>
-<div class="desc"></div>
+<div class="desc"><p>Dump neighbors to cache local cache.</p></div>
 <details class="source">
 <summary>
 <span>Expand source code</span>
@@ -752,6 +776,9 @@ <h3>Methods</h3>
     model: str = &#34;bert&#34;,
     in_place_swap: bool = False,
 ):
+    &#34;&#34;&#34;
+    Dump neighbors to cache local cache.
+    &#34;&#34;&#34;
     data_split = &#34;train&#34; if train else &#34;test&#34;
     data_split += &#34;_neighbors&#34;
     filename = self._get_name_to_save() + &#34;_neighbors_{}_{}&#34;.format(
@@ -967,7 +994,7 @@ <h3>Methods</h3>
 <span>def <span class="ident">load_neighbors</span></span>(<span>self, train: bool, num_neighbors: int, model: str = 'bert', in_place_swap: bool = False)</span>
 </code></dt>
 <dd>
-<div class="desc"></div>
+<div class="desc"><p>Load neighbors from cache (local or from HF)</p></div>
 <details class="source">
 <summary>
 <span>Expand source code</span>
@@ -979,6 +1006,9 @@ <h3>Methods</h3>
     model: str = &#34;bert&#34;,
     in_place_swap: bool = False,
 ):
+    &#34;&#34;&#34;
+    Load neighbors from cache (local or from HF)
+    &#34;&#34;&#34;
     data_split = &#34;train&#34; if train else &#34;test&#34;
     data_split += &#34;_neighbors&#34;
     filename = self._get_name_to_save() + &#34;_neighbors_{}_{}&#34;.format(

diff --git a/docs/models.html b/docs/models.html
@@ -248,10 +248,16 @@ <h1 class="title">Module <code>mimir.models</code></h1>
         self.load_model_properties()
 
     def load(self):
+        &#34;&#34;&#34;
+        Load reference model noto GPU(s)
+        &#34;&#34;&#34;
         if &#34;llama&#34; not in self.name and &#34;alpaca&#34; not in self.name:
             super().load()
 
     def unload(self):
+        &#34;&#34;&#34;
+        Unload reference model from GPU(s)
+        &#34;&#34;&#34;
         if &#34;llama&#34; not in self.name and &#34;alpaca&#34; not in self.name:
             super().unload()
 
@@ -398,19 +404,6 @@ <h1 class="title">Module <code>mimir.models</code></h1>
             del label_batch
             del attention_mask
         return losses #np.mean(losses)
-
-    @torch.no_grad()
-    def get_min_k_prob(self, text: str, tokens=None, probs=None, k=.2, window=1, stride=1):
-        all_prob = probs if probs is not None else self.get_probabilities(text, tokens=tokens)
-        # iterate through probabilities by ngram defined by window size at given stride
-        ngram_probs = []
-        for i in range(0, len(all_prob) - window + 1, stride):
-            ngram_prob = all_prob[i:i+window]
-            ngram_probs.append(np.mean(ngram_prob))
-        min_k_probs = sorted(ngram_probs)[:int(len(ngram_probs) * k)]
-
-        return -np.mean(min_k_probs)
-
 
     def sample_from_model(self, texts: List[str], **kwargs):
         &#34;&#34;&#34;
@@ -464,11 +457,6 @@ <h1 class="title">Module <code>mimir.models</code></h1>
         logits = self.model(**tokenized).logits[:,:-1]
         neg_entropy = F.softmax(logits, dim=-1) * F.log_softmax(logits, dim=-1)
         return -neg_entropy.sum(-1).mean().item()
-
-    @torch.no_grad()
-    def get_zlib_entropy(self, text: str, tokens=None, probs=None):
-        zlib_entropy = len(zlib.compress(bytes(text, &#39;utf-8&#39;)))
-        return self.get_ll(text, tokens=tokens, probs=probs) / zlib_entropy
 
     @torch.no_grad()
     def get_max_norm(self, text: str, context_len=None, tk_freq_map=None):
@@ -525,6 +513,9 @@ <h1 class="title">Module <code>mimir.models</code></h1>
 
     @property
     def api_calls(self):
+        &#34;&#34;&#34;
+            Get the number of tokens used in API calls
+        &#34;&#34;&#34;
         return self.API_TOKEN_COUNTER
 
     @torch.no_grad()
@@ -763,19 +754,6 @@ <h2 class="section-title" id="header-classes">Classes</h2>
             del label_batch
             del attention_mask
         return losses #np.mean(losses)
-
-    @torch.no_grad()
-    def get_min_k_prob(self, text: str, tokens=None, probs=None, k=.2, window=1, stride=1):
-        all_prob = probs if probs is not None else self.get_probabilities(text, tokens=tokens)
-        # iterate through probabilities by ngram defined by window size at given stride
-        ngram_probs = []
-        for i in range(0, len(all_prob) - window + 1, stride):
-            ngram_prob = all_prob[i:i+window]
-            ngram_probs.append(np.mean(ngram_prob))
-        min_k_probs = sorted(ngram_probs)[:int(len(ngram_probs) * k)]
-
-        return -np.mean(min_k_probs)
-
 
     def sample_from_model(self, texts: List[str], **kwargs):
         &#34;&#34;&#34;
@@ -829,11 +807,6 @@ <h2 class="section-title" id="header-classes">Classes</h2>
         logits = self.model(**tokenized).logits[:,:-1]
         neg_entropy = F.softmax(logits, dim=-1) * F.log_softmax(logits, dim=-1)
         return -neg_entropy.sum(-1).mean().item()
-
-    @torch.no_grad()
-    def get_zlib_entropy(self, text: str, tokens=None, probs=None):
-        zlib_entropy = len(zlib.compress(bytes(text, &#39;utf-8&#39;)))
-        return self.get_ll(text, tokens=tokens, probs=probs) / zlib_entropy
 
     @torch.no_grad()
     def get_max_norm(self, text: str, context_len=None, tk_freq_map=None):
@@ -1049,28 +1022,6 @@ <h3>Methods</h3>
     return -np.mean(all_prob)</code></pre>
 </details>
 </dd>
-<dt id="mimir.models.LanguageModel.get_min_k_prob"><code class="name flex">
-<span>def <span class="ident">get_min_k_prob</span></span>(<span>self, text: str, tokens=None, probs=None, k=0.2, window=1, stride=1)</span>
-</code></dt>
-<dd>
-<div class="desc"></div>
-<details class="source">
-<summary>
-<span>Expand source code</span>
-</summary>
-<pre><code class="python">@torch.no_grad()
-def get_min_k_prob(self, text: str, tokens=None, probs=None, k=.2, window=1, stride=1):
-    all_prob = probs if probs is not None else self.get_probabilities(text, tokens=tokens)
-    # iterate through probabilities by ngram defined by window size at given stride
-    ngram_probs = []
-    for i in range(0, len(all_prob) - window + 1, stride):
-        ngram_prob = all_prob[i:i+window]
-        ngram_probs.append(np.mean(ngram_prob))
-    min_k_probs = sorted(ngram_probs)[:int(len(ngram_probs) * k)]
-
-    return -np.mean(min_k_probs)</code></pre>
-</details>
-</dd>
 <dt id="mimir.models.LanguageModel.get_rank"><code class="name flex">
 <span>def <span class="ident">get_rank</span></span>(<span>self, text: str, log: bool = False)</span>
 </code></dt>
@@ -1129,21 +1080,6 @@ <h3>Methods</h3>
     return lls - lls_ref</code></pre>
 </details>
 </dd>
-<dt id="mimir.models.LanguageModel.get_zlib_entropy"><code class="name flex">
-<span>def <span class="ident">get_zlib_entropy</span></span>(<span>self, text: str, tokens=None, probs=None)</span>
-</code></dt>
-<dd>
-<div class="desc"></div>
-<details class="source">
-<summary>
-<span>Expand source code</span>
-</summary>
-<pre><code class="python">@torch.no_grad()
-def get_zlib_entropy(self, text: str, tokens=None, probs=None):
-    zlib_entropy = len(zlib.compress(bytes(text, &#39;utf-8&#39;)))
-    return self.get_ll(text, tokens=tokens, probs=probs) / zlib_entropy</code></pre>
-</details>
-</dd>
 <dt id="mimir.models.LanguageModel.sample_from_model"><code class="name flex">
 <span>def <span class="ident">sample_from_model</span></span>(<span>self, texts: List[str], **kwargs)</span>
 </code></dt>
@@ -1710,6 +1646,9 @@ <h3>Methods</h3>
 
     @property
     def api_calls(self):
+        &#34;&#34;&#34;
+            Get the number of tokens used in API calls
+        &#34;&#34;&#34;
         return self.API_TOKEN_COUNTER
 
     @torch.no_grad()
@@ -1829,13 +1768,16 @@ <h3>Instance variables</h3>
 <dl>
 <dt id="mimir.models.OpenAI_APIModel.api_calls"><code class="name">var <span class="ident">api_calls</span></code></dt>
 <dd>
-<div class="desc"></div>
+<div class="desc"><p>Get the number of tokens used in API calls</p></div>
 <details class="source">
 <summary>
 <span>Expand source code</span>
 </summary>
 <pre><code class="python">@property
 def api_calls(self):
+    &#34;&#34;&#34;
+        Get the number of tokens used in API calls
+    &#34;&#34;&#34;
     return self.API_TOKEN_COUNTER</code></pre>
 </details>
 </dd>
@@ -2009,10 +1951,16 @@ <h3>Inherited members</h3>
         self.load_model_properties()
 
     def load(self):
+        &#34;&#34;&#34;
+        Load reference model noto GPU(s)
+        &#34;&#34;&#34;
         if &#34;llama&#34; not in self.name and &#34;alpaca&#34; not in self.name:
             super().load()
 
     def unload(self):
+        &#34;&#34;&#34;
+        Unload reference model from GPU(s)
+        &#34;&#34;&#34;
         if &#34;llama&#34; not in self.name and &#34;alpaca&#34; not in self.name:
             super().unload()</code></pre>
 </details>
@@ -2036,18 +1984,53 @@ <h3>Class variables</h3>
 <div class="desc"></div>
 </dd>
 </dl>
+<h3>Methods</h3>
+<dl>
+<dt id="mimir.models.ReferenceModel.load"><code class="name flex">
+<span>def <span class="ident">load</span></span>(<span>self)</span>
+</code></dt>
+<dd>
+<div class="desc"><p>Load reference model noto GPU(s)</p></div>
+<details class="source">
+<summary>
+<span>Expand source code</span>
+</summary>
+<pre><code class="python">def load(self):
+    &#34;&#34;&#34;
+    Load reference model noto GPU(s)
+    &#34;&#34;&#34;
+    if &#34;llama&#34; not in self.name and &#34;alpaca&#34; not in self.name:
+        super().load()</code></pre>
+</details>
+</dd>
+<dt id="mimir.models.ReferenceModel.unload"><code class="name flex">
+<span>def <span class="ident">unload</span></span>(<span>self)</span>
+</code></dt>
+<dd>
+<div class="desc"><p>Unload reference model from GPU(s)</p></div>
+<details class="source">
+<summary>
+<span>Expand source code</span>
+</summary>
+<pre><code class="python">def unload(self):
+    &#34;&#34;&#34;
+    Unload reference model from GPU(s)
+    &#34;&#34;&#34;
+    if &#34;llama&#34; not in self.name and &#34;alpaca&#34; not in self.name:
+        super().unload()</code></pre>
+</details>
+</dd>
+</dl>
 <h3>Inherited members</h3>
 <ul class="hlist">
 <li><code><b><a title="mimir.models.Model" href="#mimir.models.Model">Model</a></b></code>:
 <ul class="hlist">
 <li><code><a title="mimir.models.Model.forward" href="#mimir.models.Model.forward">forward</a></code></li>
 <li><code><a title="mimir.models.Model.get_ll" href="#mimir.models.Model.get_ll">get_ll</a></code></li>
 <li><code><a title="mimir.models.Model.get_probabilities" href="#mimir.models.Model.get_probabilities">get_probabilities</a></code></li>
-<li><code><a title="mimir.models.Model.load" href="#mimir.models.Model.load">load</a></code></li>
 <li><code><a title="mimir.models.Model.load_base_model_and_tokenizer" href="#mimir.models.Model.load_base_model_and_tokenizer">load_base_model_and_tokenizer</a></code></li>
 <li><code><a title="mimir.models.Model.load_model_properties" href="#mimir.models.Model.load_model_properties">load_model_properties</a></code></li>
 <li><code><a title="mimir.models.Model.to" href="#mimir.models.Model.to">to</a></code></li>
-<li><code><a title="mimir.models.Model.unload" href="#mimir.models.Model.unload">unload</a></code></li>
 </ul>
 </li>
 </ul>
@@ -2076,10 +2059,8 @@ <h4><code><a title="mimir.models.LanguageModel" href="#mimir.models.LanguageMode
 <li><code><a title="mimir.models.LanguageModel.get_entropy" href="#mimir.models.LanguageModel.get_entropy">get_entropy</a></code></li>
 <li><code><a title="mimir.models.LanguageModel.get_lls" href="#mimir.models.LanguageModel.get_lls">get_lls</a></code></li>
 <li><code><a title="mimir.models.LanguageModel.get_max_norm" href="#mimir.models.LanguageModel.get_max_norm">get_max_norm</a></code></li>
-<li><code><a title="mimir.models.LanguageModel.get_min_k_prob" href="#mimir.models.LanguageModel.get_min_k_prob">get_min_k_prob</a></code></li>
 <li><code><a title="mimir.models.LanguageModel.get_rank" href="#mimir.models.LanguageModel.get_rank">get_rank</a></code></li>
 <li><code><a title="mimir.models.LanguageModel.get_ref" href="#mimir.models.LanguageModel.get_ref">get_ref</a></code></li>
-<li><code><a title="mimir.models.LanguageModel.get_zlib_entropy" href="#mimir.models.LanguageModel.get_zlib_entropy">get_zlib_entropy</a></code></li>
 <li><code><a title="mimir.models.LanguageModel.sample_from_model" href="#mimir.models.LanguageModel.sample_from_model">sample_from_model</a></code></li>
 <li><code><a title="mimir.models.LanguageModel.training" href="#mimir.models.LanguageModel.training">training</a></code></li>
 </ul>
@@ -2124,7 +2105,9 @@ <h4><code><a title="mimir.models.ReferenceModel" href="#mimir.models.ReferenceMo
 <ul class="">
 <li><code><a title="mimir.models.ReferenceModel.call_super_init" href="#mimir.models.ReferenceModel.call_super_init">call_super_init</a></code></li>
 <li><code><a title="mimir.models.ReferenceModel.dump_patches" href="#mimir.models.ReferenceModel.dump_patches">dump_patches</a></code></li>
+<li><code><a title="mimir.models.ReferenceModel.load" href="#mimir.models.ReferenceModel.load">load</a></code></li>
 <li><code><a title="mimir.models.ReferenceModel.training" href="#mimir.models.ReferenceModel.training">training</a></code></li>
+<li><code><a title="mimir.models.ReferenceModel.unload" href="#mimir.models.ReferenceModel.unload">unload</a></code></li>
 </ul>
 </li>
 </ul>