Skip to content

Commit

Permalink
Update documentation
Browse files Browse the repository at this point in the history
  • Loading branch information
actions-user committed Feb 7, 2024
1 parent 28a1180 commit 19e7983
Show file tree
Hide file tree
Showing 3 changed files with 20 additions and 14 deletions.
4 changes: 2 additions & 2 deletions docs/attacks/quantile.html
Original file line number Diff line number Diff line change
Expand Up @@ -138,7 +138,7 @@ <h1 class="title">Module <code>mimir.attacks.quantile</code></h1>
# Return ll - quantile_model(doc)
tokenized = self.ref_model.tokenizer(document, return_tensors=&#34;pt&#34;)
# Shift items in the dictionary to the correct device
tokenized = {k: v.to(self.ref_model.model.device) for k, v in tokenized.items()}
tokenized = {k: v.to(self.ref_model.model.device, non_blocking=True) for k, v in tokenized.items()}
quantile_score = self.ref_model.model(**tokenized)
print(quantile_score)
quantile_score = quantile_score.logits.item()
Expand Down Expand Up @@ -378,7 +378,7 @@ <h3>Methods</h3>
# Return ll - quantile_model(doc)
tokenized = self.ref_model.tokenizer(document, return_tensors=&#34;pt&#34;)
# Shift items in the dictionary to the correct device
tokenized = {k: v.to(self.ref_model.model.device) for k, v in tokenized.items()}
tokenized = {k: v.to(self.ref_model.model.device, non_blocking=True) for k, v in tokenized.items()}
quantile_score = self.ref_model.model(**tokenized)
print(quantile_score)
quantile_score = quantile_score.logits.item()
Expand Down
6 changes: 6 additions & 0 deletions docs/attacks/zlib.html
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,9 @@ <h1 class="title">Module <code>mimir.attacks.zlib</code></h1>
tokens=None,
**kwargs
):
&#34;&#34;&#34;
zlib-based attack score. Performs difficulty calibration in model likelihood by normalizing with zlib entropy.
&#34;&#34;&#34;
loss = kwargs.get(&#34;loss&#34;, None)
if loss is None:
loss = self.model.get_ll(document, probs=probs, tokens=tokens)
Expand Down Expand Up @@ -87,6 +90,9 @@ <h2 class="section-title" id="header-classes">Classes</h2>
tokens=None,
**kwargs
):
&#34;&#34;&#34;
zlib-based attack score. Performs difficulty calibration in model likelihood by normalizing with zlib entropy.
&#34;&#34;&#34;
loss = kwargs.get(&#34;loss&#34;, None)
if loss is None:
loss = self.model.get_ll(document, probs=probs, tokens=tokens)
Expand Down
24 changes: 12 additions & 12 deletions docs/models.html
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ <h1 class="title">Module <code>mimir.models</code></h1>
except NameError:
pass
if self.config.openai_config is None:
self.model.to(self.device)
self.model.to(self.device, non_blocking=True)
if self.config.env_config.compile:
torch.compile(self.model)
print(f&#39;DONE ({time.time() - start:.2f}s)&#39;)
Expand Down Expand Up @@ -423,9 +423,9 @@ <h1 class="title">Module <code>mimir.models</code></h1>
# encode each text as a list of token ids
if self.config.dataset_member == &#39;pubmed&#39;:
texts = [t[:t.index(SEPARATOR)] for t in texts]
all_encoded = self.tokenizer(texts, return_tensors=&#34;pt&#34;, padding=True).to(self.device)
all_encoded = self.tokenizer(texts, return_tensors=&#34;pt&#34;, padding=True).to(self.device, non_blocking=True)
else:
all_encoded = self.tokenizer(texts, return_tensors=&#34;pt&#34;, padding=True).to(self.device)
all_encoded = self.tokenizer(texts, return_tensors=&#34;pt&#34;, padding=True).to(self.device, non_blocking=True)
all_encoded = {key: value[:, :prompt_tokens] for key, value in all_encoded.items()}

decoded = [&#39;&#39; for _ in range(len(texts))]
Expand Down Expand Up @@ -472,7 +472,7 @@ <h1 class="title">Module <code>mimir.models</code></h1>

@torch.no_grad()
def get_max_norm(self, text: str, context_len=None, tk_freq_map=None):
# TODO: update like oher attacks
# TODO: update like other attacks
tokenized = self.tokenizer(
text, return_tensors=&#34;pt&#34;).to(self.device)
labels = tokenized.input_ids
Expand Down Expand Up @@ -788,9 +788,9 @@ <h2 class="section-title" id="header-classes">Classes</h2>
# encode each text as a list of token ids
if self.config.dataset_member == &#39;pubmed&#39;:
texts = [t[:t.index(SEPARATOR)] for t in texts]
all_encoded = self.tokenizer(texts, return_tensors=&#34;pt&#34;, padding=True).to(self.device)
all_encoded = self.tokenizer(texts, return_tensors=&#34;pt&#34;, padding=True).to(self.device, non_blocking=True)
else:
all_encoded = self.tokenizer(texts, return_tensors=&#34;pt&#34;, padding=True).to(self.device)
all_encoded = self.tokenizer(texts, return_tensors=&#34;pt&#34;, padding=True).to(self.device, non_blocking=True)
all_encoded = {key: value[:, :prompt_tokens] for key, value in all_encoded.items()}

decoded = [&#39;&#39; for _ in range(len(texts))]
Expand Down Expand Up @@ -837,7 +837,7 @@ <h2 class="section-title" id="header-classes">Classes</h2>

@torch.no_grad()
def get_max_norm(self, text: str, context_len=None, tk_freq_map=None):
# TODO: update like oher attacks
# TODO: update like other attacks
tokenized = self.tokenizer(
text, return_tensors=&#34;pt&#34;).to(self.device)
labels = tokenized.input_ids
Expand Down Expand Up @@ -1008,7 +1008,7 @@ <h3>Methods</h3>
</summary>
<pre><code class="python">@torch.no_grad()
def get_max_norm(self, text: str, context_len=None, tk_freq_map=None):
# TODO: update like oher attacks
# TODO: update like other attacks
tokenized = self.tokenizer(
text, return_tensors=&#34;pt&#34;).to(self.device)
labels = tokenized.input_ids
Expand Down Expand Up @@ -1164,9 +1164,9 @@ <h3>Methods</h3>
# encode each text as a list of token ids
if self.config.dataset_member == &#39;pubmed&#39;:
texts = [t[:t.index(SEPARATOR)] for t in texts]
all_encoded = self.tokenizer(texts, return_tensors=&#34;pt&#34;, padding=True).to(self.device)
all_encoded = self.tokenizer(texts, return_tensors=&#34;pt&#34;, padding=True).to(self.device, non_blocking=True)
else:
all_encoded = self.tokenizer(texts, return_tensors=&#34;pt&#34;, padding=True).to(self.device)
all_encoded = self.tokenizer(texts, return_tensors=&#34;pt&#34;, padding=True).to(self.device, non_blocking=True)
all_encoded = {key: value[:, :prompt_tokens] for key, value in all_encoded.items()}

decoded = [&#39;&#39; for _ in range(len(texts))]
Expand Down Expand Up @@ -1255,7 +1255,7 @@ <h3>Inherited members</h3>
except NameError:
pass
if self.config.openai_config is None:
self.model.to(self.device)
self.model.to(self.device, non_blocking=True)
if self.config.env_config.compile:
torch.compile(self.model)
print(f&#39;DONE ({time.time() - start:.2f}s)&#39;)
Expand Down Expand Up @@ -1550,7 +1550,7 @@ <h3>Methods</h3>
except NameError:
pass
if self.config.openai_config is None:
self.model.to(self.device)
self.model.to(self.device, non_blocking=True)
if self.config.env_config.compile:
torch.compile(self.model)
print(f&#39;DONE ({time.time() - start:.2f}s)&#39;)</code></pre>
Expand Down

0 comments on commit 19e7983

Please sign in to comment.