diff --git a/docs/attacks/index.html b/docs/attacks/index.html index 5021964..fa9f341 100644 --- a/docs/attacks/index.html +++ b/docs/attacks/index.html @@ -45,7 +45,7 @@

Sub-modules

mimir.attacks.loss
-

Straight-forward LOSS attack

+

Straight-forward LOSS attack, as described in https://ieeexplore.ieee.org/abstract/document/8429311

mimir.attacks.min_k
diff --git a/docs/attacks/loss.html b/docs/attacks/loss.html index 68e8256..4f5ff2f 100644 --- a/docs/attacks/loss.html +++ b/docs/attacks/loss.html @@ -5,7 +5,7 @@ mimir.attacks.loss API documentation - + @@ -22,14 +22,15 @@

Module mimir.attacks.loss

-

Straight-forward LOSS attack

+

Straight-forward LOSS attack, as described in https://ieeexplore.ieee.org/abstract/document/8429311

Expand source code
"""
-    Straight-forward LOSS attack
+    Straight-forward LOSS attack, as described in https://ieeexplore.ieee.org/abstract/document/8429311
 """
+import torch as ch
 from mimir.attacks.blackbox_attacks import Attack
 
 
@@ -37,7 +38,11 @@ 

Module mimir.attacks.loss

def __init__(self, config, model): super().__init__(config, model, ref_model=None) + @ch.no_grad() def _attack(self, document, probs, tokens=None, **kwargs): + """ + LOSS-score. Use log-likelihood from model. + """ return self.model.get_ll(document, probs=probs, tokens=tokens)
@@ -64,7 +69,11 @@

Classes

def __init__(self, config, model): super().__init__(config, model, ref_model=None) + @ch.no_grad() def _attack(self, document, probs, tokens=None, **kwargs): + """ + LOSS-score. Use log-likelihood from model. + """ return self.model.get_ll(document, probs=probs, tokens=tokens)

Ancestors

diff --git a/docs/attacks/min_k.html b/docs/attacks/min_k.html index 79db63b..a10cec3 100644 --- a/docs/attacks/min_k.html +++ b/docs/attacks/min_k.html @@ -32,7 +32,6 @@

Module mimir.attacks.min_k

""" import torch as ch import numpy as np - from mimir.attacks.blackbox_attacks import Attack @@ -42,6 +41,9 @@

Module mimir.attacks.min_k

@ch.no_grad() def _attack(self, document, probs, tokens=None, **kwargs): + """ + Min-k % Prob Attack. Gets model praobbilities and returns likelihood when computed over top k% of ngrams. + """ # Hyper-params specific to min-k attack k: float = kwargs.get("k", 0.2) window: int = kwargs.get("window", 1) @@ -87,6 +89,9 @@

Classes

@ch.no_grad() def _attack(self, document, probs, tokens=None, **kwargs): + """ + Min-k % Prob Attack. Gets model praobbilities and returns likelihood when computed over top k% of ngrams. + """ # Hyper-params specific to min-k attack k: float = kwargs.get("k", 0.2) window: int = kwargs.get("window", 1) diff --git a/docs/attacks/neighborhood.html b/docs/attacks/neighborhood.html index f99417e..923d950 100644 --- a/docs/attacks/neighborhood.html +++ b/docs/attacks/neighborhood.html @@ -46,6 +46,17 @@

Module mimir.attacks.neighborhood

from mimir.attacks.blackbox_attacks import Attack +# def get_mask_model(config: ExperimentConfig, **kwargs): +# if "t5" in config.neighborhood_config.model: +# mask_model = T5Model( +# config, model_kwargs=model_kwargs, tokenizer_kwargs=tokenizer_kwargs +# ) +# elif "bert" in config.neighborhood_config.model: +# mask_model = BertModel(config) +# else: +# raise ValueError(f"Unknown model {config.neighborhood_config.model}") + + class NeighborhoodAttack(Attack): def __init__( self, @@ -58,43 +69,56 @@

Module mimir.attacks.neighborhood

self.ref_model = self._pick_neighbor_model() assert issubclass(type(self.ref_model), MaskFillingModel), "ref_model must be MaskFillingModel for neighborhood attack" + def get_mask_model(self): + return self.ref_model + + def create_fill_dictionary(self, data): + neigh_config = self.config.neighborhood_config + if "t5" in neigh_config.model and neigh_config.random_fills: + if not self.config.pretokenized: + # TODO: maybe can be done if detokenized, but currently not supported + self.ref_model.create_fill_dictionary(data) + def _pick_neighbor_model(self): # mask filling t5 model mask_model = None neigh_config = self.config.neighborhood_config env_config = self.config.env_config - if neigh_config: - model_kwargs = dict() - if not neigh_config.random_fills: - if env_config.int8: - model_kwargs = dict( - load_in_8bit=True, device_map="auto", torch_dtype=torch.bfloat16 - ) - elif env_config.half: - model_kwargs = dict(torch_dtype=torch.bfloat16) - try: - n_positions = ( - 512 # Should fix later, but for T-5 this is 512 indeed - ) - # mask_model.config.n_positions - except AttributeError: - n_positions = self.config.max_tokens - else: + + model_kwargs = dict() + if not neigh_config.random_fills: + if env_config.int8: + model_kwargs = dict( + load_in_8bit=True, device_map="auto", torch_dtype=torch.bfloat16 + ) + elif env_config.half: + model_kwargs = dict(torch_dtype=torch.bfloat16) + try: + n_positions = ( + 512 # Should fix later, but for T-5 this is 512 indeed + ) + # mask_model.config.n_positions + except AttributeError: n_positions = self.config.max_tokens - tokenizer_kwargs = { - "model_max_length": n_positions, - } + else: + n_positions = self.config.max_tokens + tokenizer_kwargs = { + "model_max_length": n_positions, + } - if "t5" in self.config.neighborhood_config.model: + print(f"Loading mask filling model {neigh_config.model}...") + if "t5" in neigh_config.model: mask_model = T5Model( self.config, model_kwargs=model_kwargs, tokenizer_kwargs=tokenizer_kwargs, ) - elif "bert" in self.config.neighborhood_config.model: + elif "bert" in neigh_config.model: mask_model = BertModel(self.config) else: - raise ValueError(f"Unknown model {self.config.neighborhood_config.model}") + raise ValueError(f"Unknown model {neigh_config.model}") + # if config.dataset_member in ['english', 'german']: + # preproc_tokenizer = mask_tokenizer return mask_model def load(self): @@ -124,6 +148,9 @@

Module mimir.attacks.neighborhood

return neighbors def _attack(self, document, probs, tokens=None, **kwargs): + """ + Neighborhood attack score. Looks at difference in likelihood for given document and average likelihood of its neighbors + """ # documents here are actually neighbors batch_size = kwargs.get("batch_size", 4) substr_neighbors = kwargs.get("substr_neighbors", None) @@ -1138,43 +1165,56 @@

Inherited members

self.ref_model = self._pick_neighbor_model() assert issubclass(type(self.ref_model), MaskFillingModel), "ref_model must be MaskFillingModel for neighborhood attack" + def get_mask_model(self): + return self.ref_model + + def create_fill_dictionary(self, data): + neigh_config = self.config.neighborhood_config + if "t5" in neigh_config.model and neigh_config.random_fills: + if not self.config.pretokenized: + # TODO: maybe can be done if detokenized, but currently not supported + self.ref_model.create_fill_dictionary(data) + def _pick_neighbor_model(self): # mask filling t5 model mask_model = None neigh_config = self.config.neighborhood_config env_config = self.config.env_config - if neigh_config: - model_kwargs = dict() - if not neigh_config.random_fills: - if env_config.int8: - model_kwargs = dict( - load_in_8bit=True, device_map="auto", torch_dtype=torch.bfloat16 - ) - elif env_config.half: - model_kwargs = dict(torch_dtype=torch.bfloat16) - try: - n_positions = ( - 512 # Should fix later, but for T-5 this is 512 indeed - ) - # mask_model.config.n_positions - except AttributeError: - n_positions = self.config.max_tokens - else: + + model_kwargs = dict() + if not neigh_config.random_fills: + if env_config.int8: + model_kwargs = dict( + load_in_8bit=True, device_map="auto", torch_dtype=torch.bfloat16 + ) + elif env_config.half: + model_kwargs = dict(torch_dtype=torch.bfloat16) + try: + n_positions = ( + 512 # Should fix later, but for T-5 this is 512 indeed + ) + # mask_model.config.n_positions + except AttributeError: n_positions = self.config.max_tokens - tokenizer_kwargs = { - "model_max_length": n_positions, - } + else: + n_positions = self.config.max_tokens + tokenizer_kwargs = { + "model_max_length": n_positions, + } - if "t5" in self.config.neighborhood_config.model: + print(f"Loading mask filling model {neigh_config.model}...") + if "t5" in neigh_config.model: mask_model = T5Model( self.config, model_kwargs=model_kwargs, tokenizer_kwargs=tokenizer_kwargs, ) - elif "bert" in self.config.neighborhood_config.model: + elif "bert" in neigh_config.model: mask_model = BertModel(self.config) else: - raise ValueError(f"Unknown model {self.config.neighborhood_config.model}") + raise ValueError(f"Unknown model {neigh_config.model}") + # if config.dataset_member in ['english', 'german']: + # preproc_tokenizer = mask_tokenizer return mask_model def load(self): @@ -1204,6 +1244,9 @@

Inherited members

return neighbors def _attack(self, document, probs, tokens=None, **kwargs): + """ + Neighborhood attack score. Looks at difference in likelihood for given document and average likelihood of its neighbors + """ # documents here are actually neighbors batch_size = kwargs.get("batch_size", 4) substr_neighbors = kwargs.get("substr_neighbors", None) @@ -1224,6 +1267,36 @@

Ancestors

Methods

+
+def create_fill_dictionary(self, data) +
+
+
+
+ +Expand source code + +
def create_fill_dictionary(self, data):
+    neigh_config = self.config.neighborhood_config
+    if "t5" in neigh_config.model and neigh_config.random_fills:
+        if not self.config.pretokenized:
+            # TODO: maybe can be done if detokenized, but currently not supported
+            self.ref_model.create_fill_dictionary(data)
+
+
+
+def get_mask_model(self) +
+
+
+
+ +Expand source code + +
def get_mask_model(self):
+    return self.ref_model
+
+
def get_neighbors(self, documents, **kwargs)
@@ -1833,6 +1906,8 @@

NeighborhoodAttack

diff --git a/docs/attacks/reference.html b/docs/attacks/reference.html index 468f340..612a83c 100644 --- a/docs/attacks/reference.html +++ b/docs/attacks/reference.html @@ -41,6 +41,9 @@

Module mimir.attacks.reference

self.ref_model.load() def _attack(self, document, probs, tokens=None, **kwargs): + """ + Reference-based attack score. Performs difficulty calibration in model likelihood using a reference model. + """ loss = kwargs.get('loss', None) if loss is None: loss = self.model.get_ll(document, probs=probs, tokens=tokens) @@ -75,6 +78,9 @@

Classes

self.ref_model.load() def _attack(self, document, probs, tokens=None, **kwargs): + """ + Reference-based attack score. Performs difficulty calibration in model likelihood using a reference model. + """ loss = kwargs.get('loss', None) if loss is None: loss = self.model.get_ll(document, probs=probs, tokens=tokens)