From 79760db38346127277c0f6b1a7e9911eb2aee081 Mon Sep 17 00:00:00 2001 From: GitHub Actions Date: Wed, 27 Mar 2024 16:51:30 +0000 Subject: [PATCH] Update documentation --- ...blackbox_attacks.html => all_attacks.html} | 146 +++++++------- docs/attacks/gradnorm.html | 182 ++++++++++++++++++ docs/attacks/index.html | 11 +- docs/attacks/loss.html | 10 +- docs/attacks/min_k.html | 10 +- docs/attacks/neighborhood.html | 10 +- docs/attacks/quantile.html | 10 +- docs/attacks/reference.html | 10 +- docs/attacks/utils.html | 25 +-- docs/attacks/zlib.html | 10 +- docs/models.html | 48 +++-- 11 files changed, 345 insertions(+), 127 deletions(-) rename docs/attacks/{blackbox_attacks.html => all_attacks.html} (80%) create mode 100644 docs/attacks/gradnorm.html diff --git a/docs/attacks/blackbox_attacks.html b/docs/attacks/all_attacks.html similarity index 80% rename from docs/attacks/blackbox_attacks.html rename to docs/attacks/all_attacks.html index ba2426a..2f8af53 100644 --- a/docs/attacks/blackbox_attacks.html +++ b/docs/attacks/all_attacks.html @@ -4,7 +4,7 @@ -mimir.attacks.blackbox_attacks API documentation +mimir.attacks.all_attacks API documentation @@ -19,7 +19,7 @@
-

Module mimir.attacks.blackbox_attacks

+

Module mimir.attacks.all_attacks

Enum class for attacks. Also contains the base attack class.

@@ -36,22 +36,24 @@

Module mimir.attacks.blackbox_attacks

# Attack definitions -class BlackBoxAttacks(str, Enum): +class AllAttacks(str, Enum): LOSS = "loss" # Done REFERENCE_BASED = "ref" # Done ZLIB = "zlib" # Done MIN_K = "min_k" # Done NEIGHBOR = "ne" # Done + GRADNORM = "gradnorm" # Done # QUANTILE = "quantile" # Uncomment when tested implementation is available # Base attack class class Attack: - def __init__(self, config, target_model: Model, ref_model: Model = None): + def __init__(self, config, target_model: Model, ref_model: Model = None, is_blackbox: bool = True): self.config = config self.target_model = target_model self.ref_model = ref_model self.is_loaded = False + self.is_blackbox = is_blackbox def load(self): """ @@ -105,9 +107,61 @@

Module mimir.attacks.blackbox_attacks

Classes

-
+
+class AllAttacks +(value, names=None, *, module=None, qualname=None, type=None, start=1) +
+
+

An enumeration.

+
+ +Expand source code + +
class AllAttacks(str, Enum):
+    LOSS = "loss" # Done
+    REFERENCE_BASED = "ref" # Done
+    ZLIB = "zlib" # Done
+    MIN_K = "min_k" # Done
+    NEIGHBOR = "ne" # Done
+    GRADNORM = "gradnorm" # Done
+    # QUANTILE = "quantile" # Uncomment when tested implementation is available
+
+

Ancestors

+
    +
  • builtins.str
  • +
  • enum.Enum
  • +
+

Class variables

+
+
var GRADNORM
+
+
+
+
var LOSS
+
+
+
+
var MIN_K
+
+
+
+
var NEIGHBOR
+
+
+
+
var REFERENCE_BASED
+
+
+
+
var ZLIB
+
+
+
+
+
+
class Attack -(config, target_model: Model, ref_model: Model = None) +(config, target_model: Model, ref_model: Model = None, is_blackbox: bool = True)
@@ -116,11 +170,12 @@

Classes

Expand source code
class Attack:
-    def __init__(self, config, target_model: Model, ref_model: Model = None):
+    def __init__(self, config, target_model: Model, ref_model: Model = None, is_blackbox: bool = True):
         self.config = config
         self.target_model = target_model
         self.ref_model = ref_model
         self.is_loaded = False
+        self.is_blackbox = is_blackbox
 
     def load(self):
         """
@@ -166,6 +221,7 @@ 

Classes

Subclasses

Methods

-
+
def attack(self, document, probs, **kwargs)
@@ -208,7 +264,7 @@

Methods

return score
-
+
def load(self)
@@ -226,7 +282,7 @@

Methods

self.is_loaded = True
-
+
def unload(self)
@@ -243,53 +299,6 @@

Methods

-
-class BlackBoxAttacks -(value, names=None, *, module=None, qualname=None, type=None, start=1) -
-
-

An enumeration.

-
- -Expand source code - -
class BlackBoxAttacks(str, Enum):
-    LOSS = "loss" # Done
-    REFERENCE_BASED = "ref" # Done
-    ZLIB = "zlib" # Done
-    MIN_K = "min_k" # Done
-    NEIGHBOR = "ne" # Done
-    # QUANTILE = "quantile" # Uncomment when tested implementation is available
-
-

Ancestors

-
    -
  • builtins.str
  • -
  • enum.Enum
  • -
-

Class variables

-
-
var LOSS
-
-
-
-
var MIN_K
-
-
-
-
var NEIGHBOR
-
-
-
-
var REFERENCE_BASED
-
-
-
-
var ZLIB
-
-
-
-
-
@@ -312,21 +321,22 @@

Index

  • Classes

    • -

      Attack

      - diff --git a/docs/attacks/gradnorm.html b/docs/attacks/gradnorm.html new file mode 100644 index 0000000..092f675 --- /dev/null +++ b/docs/attacks/gradnorm.html @@ -0,0 +1,182 @@ + + + + + + +mimir.attacks.gradnorm API documentation + + + + + + + + + + + +
      +
      +
      +

      Module mimir.attacks.gradnorm

      +
      +
      +

      Gradient-norm attack. Proposed for MIA in multiple settings, and particularly experimented for pre-training data and LLMs in https://arxiv.org/abs/2402.17012

      +
      + +Expand source code + +
      """
      +    Gradient-norm attack. Proposed for MIA in multiple settings, and particularly experimented for pre-training data and LLMs in https://arxiv.org/abs/2402.17012
      +"""
      +
      +import torch as ch
      +import numpy as np
      +from mimir.attacks.all_attacks import Attack
      +from mimir.models import Model
      +from mimir.config import ExperimentConfig
      +
      +
      +class GradNormAttack(Attack):
      +    def __init__(self, config: ExperimentConfig, model: Model):
      +        super().__init__(config, model, ref_model=None, is_blackbox=False)
      +
      +    def _attack(self, document, probs, tokens=None, **kwargs):
      +        """
      +        Gradient Norm Attack. Computes p-norm of gradients w.r.t. input tokens.
      +        """
      +        # We ignore probs here since they are computed in the general case without gradient-tracking (to save memory)
      +
      +        # Hyper-params specific to min-k attack
      +        p: float = kwargs.get("p", np.inf)
      +        if p not in [1, 2, np.inf]:
      +            raise ValueError(f"Invalid p-norm value: {p}.")
      +
      +        # Make sure model params require gradients
      +        # for name, param in self.target_model.model.named_parameters():
      +        #    param.requires_grad = True
      +
      +        # Get gradients for model parameters
      +        self.target_model.model.zero_grad()
      +        all_prob = self.target_model.get_probabilities(document, tokens=tokens, no_grads=False)
      +        loss = - ch.mean(all_prob)
      +        loss.backward()
      +
      +        # Compute p-norm of gradients (for all model params where grad exists)
      +        grad_norms = []
      +        for param in self.target_model.model.parameters():
      +            if param.grad is not None:
      +                grad_norms.append(param.grad.detach().norm(p))
      +        grad_norm = ch.stack(grad_norms).mean()
      +
      +        # Zero out gradients again
      +        self.target_model.model.zero_grad()
      +
      +        return -grad_norm.cpu().numpy()
      +
      +
      +
      +
      +
      +
      +
      +
      +
      +

      Classes

      +
      +
      +class GradNormAttack +(config: ExperimentConfig, model: Model) +
      +
      +
      +
      + +Expand source code + +
      class GradNormAttack(Attack):
      +    def __init__(self, config: ExperimentConfig, model: Model):
      +        super().__init__(config, model, ref_model=None, is_blackbox=False)
      +
      +    def _attack(self, document, probs, tokens=None, **kwargs):
      +        """
      +        Gradient Norm Attack. Computes p-norm of gradients w.r.t. input tokens.
      +        """
      +        # We ignore probs here since they are computed in the general case without gradient-tracking (to save memory)
      +
      +        # Hyper-params specific to min-k attack
      +        p: float = kwargs.get("p", np.inf)
      +        if p not in [1, 2, np.inf]:
      +            raise ValueError(f"Invalid p-norm value: {p}.")
      +
      +        # Make sure model params require gradients
      +        # for name, param in self.target_model.model.named_parameters():
      +        #    param.requires_grad = True
      +
      +        # Get gradients for model parameters
      +        self.target_model.model.zero_grad()
      +        all_prob = self.target_model.get_probabilities(document, tokens=tokens, no_grads=False)
      +        loss = - ch.mean(all_prob)
      +        loss.backward()
      +
      +        # Compute p-norm of gradients (for all model params where grad exists)
      +        grad_norms = []
      +        for param in self.target_model.model.parameters():
      +            if param.grad is not None:
      +                grad_norms.append(param.grad.detach().norm(p))
      +        grad_norm = ch.stack(grad_norms).mean()
      +
      +        # Zero out gradients again
      +        self.target_model.model.zero_grad()
      +
      +        return -grad_norm.cpu().numpy()
      +
      +

      Ancestors

      + +

      Inherited members

      + +
      +
      +
      +
      + +
      + + + \ No newline at end of file diff --git a/docs/attacks/index.html b/docs/attacks/index.html index 39c2b24..66733df 100644 --- a/docs/attacks/index.html +++ b/docs/attacks/index.html @@ -35,13 +35,17 @@

      Module mimir.attacks

      Sub-modules

      +
      mimir.attacks.all_attacks
      +
      +

      Enum class for attacks. Also contains the base attack class.

      +
      mimir.attacks.attack_utils

      Utility functions for attacks

      -
      mimir.attacks.blackbox_attacks
      +
      mimir.attacks.gradnorm
      -

      Enum class for attacks. Also contains the base attack class.

      +

      Gradient-norm attack. Proposed for MIA in multiple settings, and particularly experimented for pre-training data and LLMs in …

      mimir.attacks.loss
      @@ -99,8 +103,9 @@

      Index

    • Sub-modules

        +
      • mimir.attacks.all_attacks
      • mimir.attacks.attack_utils
      • -
      • mimir.attacks.blackbox_attacks
      • +
      • mimir.attacks.gradnorm
      • mimir.attacks.loss
      • mimir.attacks.min_k
      • mimir.attacks.neighborhood
      • diff --git a/docs/attacks/loss.html b/docs/attacks/loss.html index f9007d2..9e2b6e0 100644 --- a/docs/attacks/loss.html +++ b/docs/attacks/loss.html @@ -31,7 +31,7 @@

        Module mimir.attacks.loss

        Straight-forward LOSS attack, as described in https://ieeexplore.ieee.org/abstract/document/8429311 """ import torch as ch -from mimir.attacks.blackbox_attacks import Attack +from mimir.attacks.all_attacks import Attack from mimir.models import Model from mimir.config import ExperimentConfig @@ -82,14 +82,14 @@

        Classes

        Ancestors

        Inherited members

        diff --git a/docs/attacks/min_k.html b/docs/attacks/min_k.html index 06545d8..375ac50 100644 --- a/docs/attacks/min_k.html +++ b/docs/attacks/min_k.html @@ -32,7 +32,7 @@

        Module mimir.attacks.min_k

        """ import torch as ch import numpy as np -from mimir.attacks.blackbox_attacks import Attack +from mimir.attacks.all_attacks import Attack from mimir.models import Model from mimir.config import ExperimentConfig @@ -117,14 +117,14 @@

        Classes

        Ancestors

        Inherited members

        diff --git a/docs/attacks/neighborhood.html b/docs/attacks/neighborhood.html index 90cb325..211105d 100644 --- a/docs/attacks/neighborhood.html +++ b/docs/attacks/neighborhood.html @@ -43,7 +43,7 @@

        Module mimir.attacks.neighborhood

        from mimir.config import ExperimentConfig from mimir.attacks.attack_utils import count_masks, apply_extracted_fills from mimir.models import Model, ReferenceModel -from mimir.attacks.blackbox_attacks import Attack +from mimir.attacks.all_attacks import Attack class NeighborhoodAttack(Attack): @@ -1278,7 +1278,7 @@

        Inherited members

        Ancestors

        Methods

        @@ -1353,10 +1353,10 @@

        Methods

        Inherited members

        diff --git a/docs/attacks/quantile.html b/docs/attacks/quantile.html index 817ee02..6e42fbd 100644 --- a/docs/attacks/quantile.html +++ b/docs/attacks/quantile.html @@ -40,7 +40,7 @@

        Module mimir.attacks.quantile

        from transformers import TrainingArguments, Trainer from datasets import Dataset -from mimir.attacks.blackbox_attacks import Attack +from mimir.attacks.all_attacks import Attack class CustomTrainer(Trainer): @@ -388,7 +388,7 @@

        Methods

        Ancestors

        Methods

        @@ -425,10 +425,10 @@

        Methods

        Inherited members

        diff --git a/docs/attacks/reference.html b/docs/attacks/reference.html index fdc20a8..7d1dd6d 100644 --- a/docs/attacks/reference.html +++ b/docs/attacks/reference.html @@ -30,7 +30,7 @@

        Module mimir.attacks.reference

        """
             Reference-based attacks.
         """
        -from mimir.attacks.blackbox_attacks import Attack
        +from mimir.attacks.all_attacks import Attack
         from mimir.models import Model, ReferenceModel
         from mimir.config import ExperimentConfig
         
        @@ -95,14 +95,14 @@ 

        Classes

        Ancestors

        Inherited members

        diff --git a/docs/attacks/utils.html b/docs/attacks/utils.html index 01cd8c0..6ec49c8 100644 --- a/docs/attacks/utils.html +++ b/docs/attacks/utils.html @@ -26,23 +26,25 @@

        Module mimir.attacks.utils

        Expand source code -
        from mimir.attacks.blackbox_attacks import BlackBoxAttacks
        +
        from mimir.attacks.all_attacks import AllAttacks
         
         from mimir.attacks.loss import LOSSAttack
         from mimir.attacks.reference import ReferenceAttack
         from mimir.attacks.zlib import ZLIBAttack
         from mimir.attacks.min_k import MinKProbAttack
         from mimir.attacks.neighborhood import NeighborhoodAttack
        +from mimir.attacks.gradnorm import GradNormAttack
         
         
         # TODO Use decorators to link attack implementations with enum above
         def get_attacker(attack: str):
             mapping = {
        -        BlackBoxAttacks.LOSS: LOSSAttack,
        -        BlackBoxAttacks.REFERENCE_BASED: ReferenceAttack,
        -        BlackBoxAttacks.ZLIB: ZLIBAttack,
        -        BlackBoxAttacks.MIN_K: MinKProbAttack,
        -        BlackBoxAttacks.NEIGHBOR: NeighborhoodAttack,
        +        AllAttacks.LOSS: LOSSAttack,
        +        AllAttacks.REFERENCE_BASED: ReferenceAttack,
        +        AllAttacks.ZLIB: ZLIBAttack,
        +        AllAttacks.MIN_K: MinKProbAttack,
        +        AllAttacks.NEIGHBOR: NeighborhoodAttack,
        +        AllAttacks.GRADNORM: GradNormAttack,
             }
             attack_cls = mapping.get(attack, None)
             if attack_cls is None:
        @@ -68,11 +70,12 @@ 

        Functions

        def get_attacker(attack: str):
             mapping = {
        -        BlackBoxAttacks.LOSS: LOSSAttack,
        -        BlackBoxAttacks.REFERENCE_BASED: ReferenceAttack,
        -        BlackBoxAttacks.ZLIB: ZLIBAttack,
        -        BlackBoxAttacks.MIN_K: MinKProbAttack,
        -        BlackBoxAttacks.NEIGHBOR: NeighborhoodAttack,
        +        AllAttacks.LOSS: LOSSAttack,
        +        AllAttacks.REFERENCE_BASED: ReferenceAttack,
        +        AllAttacks.ZLIB: ZLIBAttack,
        +        AllAttacks.MIN_K: MinKProbAttack,
        +        AllAttacks.NEIGHBOR: NeighborhoodAttack,
        +        AllAttacks.GRADNORM: GradNormAttack,
             }
             attack_cls = mapping.get(attack, None)
             if attack_cls is None:
        diff --git a/docs/attacks/zlib.html b/docs/attacks/zlib.html
        index aa463d0..fda63ec 100644
        --- a/docs/attacks/zlib.html
        +++ b/docs/attacks/zlib.html
        @@ -34,7 +34,7 @@ 

        Module mimir.attacks.zlib

        import torch as ch import zlib -from mimir.attacks.blackbox_attacks import Attack +from mimir.attacks.all_attacks import Attack from mimir.models import Model from mimir.config import ExperimentConfig @@ -109,14 +109,14 @@

        Classes

        Ancestors

        Inherited members

        diff --git a/docs/models.html b/docs/models.html index b46f2ec..9638c98 100644 --- a/docs/models.html +++ b/docs/models.html @@ -136,10 +136,15 @@

        Module mimir.models

        target_ids = input_ids.clone() target_ids[:, :-trg_len] = -100 - logits = self.model(input_ids, labels=target_ids).logits.cpu() + logits = self.model(input_ids, labels=target_ids).logits + if no_grads: + logits = logits.cpu() shift_logits = logits[..., :-1, :].contiguous() probabilities = torch.nn.functional.log_softmax(shift_logits, dim=-1) - shift_labels = target_ids[..., 1:].cpu().contiguous() + shift_labels = target_ids[..., 1:] + if no_grads: + shift_labels = shift_labels.cpu() + shift_labels = shift_labels.contiguous() labels_processed = shift_labels[0] del input_ids @@ -154,9 +159,10 @@

        Module mimir.models

        # Should be equal to # of tokens - 1 to account for shift assert len(all_prob) == labels.size(1) - 1 - if no_grads: - return all_prob - return torch.tensor(all_prob) + if not no_grads: + all_prob = torch.stack(all_prob) + + return all_prob @torch.no_grad() def get_ll(self, @@ -1275,10 +1281,15 @@

        Inherited members

        target_ids = input_ids.clone() target_ids[:, :-trg_len] = -100 - logits = self.model(input_ids, labels=target_ids).logits.cpu() + logits = self.model(input_ids, labels=target_ids).logits + if no_grads: + logits = logits.cpu() shift_logits = logits[..., :-1, :].contiguous() probabilities = torch.nn.functional.log_softmax(shift_logits, dim=-1) - shift_labels = target_ids[..., 1:].cpu().contiguous() + shift_labels = target_ids[..., 1:] + if no_grads: + shift_labels = shift_labels.cpu() + shift_labels = shift_labels.contiguous() labels_processed = shift_labels[0] del input_ids @@ -1293,9 +1304,10 @@

        Inherited members

        # Should be equal to # of tokens - 1 to account for shift assert len(all_prob) == labels.size(1) - 1 - if no_grads: - return all_prob - return torch.tensor(all_prob) + if not no_grads: + all_prob = torch.stack(all_prob) + + return all_prob @torch.no_grad() def get_ll(self, @@ -1555,10 +1567,15 @@

        Returns

        target_ids = input_ids.clone() target_ids[:, :-trg_len] = -100 - logits = self.model(input_ids, labels=target_ids).logits.cpu() + logits = self.model(input_ids, labels=target_ids).logits + if no_grads: + logits = logits.cpu() shift_logits = logits[..., :-1, :].contiguous() probabilities = torch.nn.functional.log_softmax(shift_logits, dim=-1) - shift_labels = target_ids[..., 1:].cpu().contiguous() + shift_labels = target_ids[..., 1:] + if no_grads: + shift_labels = shift_labels.cpu() + shift_labels = shift_labels.contiguous() labels_processed = shift_labels[0] del input_ids @@ -1573,9 +1590,10 @@

        Returns

        # Should be equal to # of tokens - 1 to account for shift assert len(all_prob) == labels.size(1) - 1 - if no_grads: - return all_prob - return torch.tensor(all_prob)
        + if not no_grads: + all_prob = torch.stack(all_prob) + + return all_prob