diff --git a/mimir/attacks/loss.py b/mimir/attacks/loss.py index 6bf17e6..c1bda89 100644 --- a/mimir/attacks/loss.py +++ b/mimir/attacks/loss.py @@ -17,4 +17,4 @@ def _attack(self, document, probs, tokens=None, **kwargs): """ LOSS-score. Use log-likelihood from model. """ - return self.model.get_ll(document, probs=probs, tokens=tokens) + return self.target_model.get_ll(document, probs=probs, tokens=tokens) diff --git a/mimir/attacks/min_k.py b/mimir/attacks/min_k.py index bd81fe1..908e20c 100644 --- a/mimir/attacks/min_k.py +++ b/mimir/attacks/min_k.py @@ -26,7 +26,7 @@ def _attack(self, document, probs, tokens=None, **kwargs): all_prob = ( probs if probs is not None - else self.model.get_probabilities(document, tokens=tokens) + else self.target_model.get_probabilities(document, tokens=tokens) ) # iterate through probabilities by ngram defined by window size at given stride ngram_probs = [] diff --git a/mimir/attacks/quantile.py b/mimir/attacks/quantile.py index 5866444..5a1e67e 100644 --- a/mimir/attacks/quantile.py +++ b/mimir/attacks/quantile.py @@ -90,7 +90,7 @@ def prepare(self, known_non_members): # Step 1: Use non-member dataset, collect confidence scores for correct label. # Get likelihood scores from target model for known_non_members # Note that these non-members should be different from the ones in testing - scores = [self.model.get_ll(x) for x in known_non_members] + scores = [self.target_model.get_ll(x) for x in known_non_members] # Construct a dataset out of this to be used in Huggingface, with # "text" containing the actual data, and "labels" containing the scores dataset = Dataset.from_dict({"text": known_non_members, "labels": scores}) @@ -102,7 +102,7 @@ def attack(self, document, **kwargs): # Step 3: Test by checking if member: score is higher than output of quantile regression model. # Get likelihood score from target model for doc - ll = self.model.get_ll(document) + ll = self.target_model.get_ll(document) # Return ll - quantile_model(doc) tokenized = self.ref_model.tokenizer(document, return_tensors="pt") diff --git a/mimir/attacks/reference.py b/mimir/attacks/reference.py index c0c50fe..a568205 100644 --- a/mimir/attacks/reference.py +++ b/mimir/attacks/reference.py @@ -21,6 +21,6 @@ def _attack(self, document, probs, tokens=None, **kwargs): """ loss = kwargs.get('loss', None) if loss is None: - loss = self.model.get_ll(document, probs=probs, tokens=tokens) + loss = self.target_model.get_ll(document, probs=probs, tokens=tokens) ref_loss = self.ref_model.get_ll(document, probs=probs, tokens=tokens) return loss - ref_loss diff --git a/mimir/attacks/zlib.py b/mimir/attacks/zlib.py index 2c2aaea..1624bef 100644 --- a/mimir/attacks/zlib.py +++ b/mimir/attacks/zlib.py @@ -30,6 +30,6 @@ def _attack( """ loss = kwargs.get("loss", None) if loss is None: - loss = self.model.get_ll(document, probs=probs, tokens=tokens) + loss = self.target_model.get_ll(document, probs=probs, tokens=tokens) zlib_entropy = len(zlib.compress(bytes(document, "utf-8"))) return loss / zlib_entropy diff --git a/mimir/models.py b/mimir/models.py index 4633ff7..f43f848 100644 --- a/mimir/models.py +++ b/mimir/models.py @@ -67,10 +67,10 @@ def unload(self): pass print(f'DONE ({time.time() - start:.2f}s)') - @torch.no_grad() def get_probabilities(self, text: str, - tokens: np.ndarray = None): + tokens: np.ndarray = None, + no_grads: bool = True): """ Get the probabilities or log-softmaxed logits for a text under the current model. Args: @@ -84,45 +84,50 @@ def get_probabilities(self, Returns: list: A list of probabilities. """ - if self.device is None or self.name is None: - raise ValueError("Please set self.device and self.name in child class") + with torch.set_grad_enabled(not no_grads): + if self.device is None or self.name is None: + raise ValueError("Please set self.device and self.name in child class") - if tokens is not None: - labels = torch.from_numpy(tokens.astype(np.int64)).type(torch.LongTensor) - if labels.shape[0] != 1: - # expand first dimension - labels = labels.unsqueeze(0) - else: - tokenized = self.tokenizer( - text, return_tensors="pt") - labels = tokenized.input_ids - - all_prob = [] - for i in range(0, labels.size(1), self.stride): - begin_loc = max(i + self.stride - self.max_length, 0) - end_loc = min(i + self.stride, labels.size(1)) - trg_len = end_loc - i # may be different from stride on last loop - input_ids = labels[:, begin_loc:end_loc].to(self.device) - target_ids = input_ids.clone() - target_ids[:, :-trg_len] = -100 - - logits = self.model(input_ids, labels=target_ids).logits.cpu() - shift_logits = logits[..., :-1, :].contiguous() - probabilities = torch.nn.functional.log_softmax(shift_logits, dim=-1) - shift_labels = target_ids[..., 1:].cpu().contiguous() - labels_processed = shift_labels[0] + if tokens is not None: + labels = torch.from_numpy(tokens.astype(np.int64)).type(torch.LongTensor) + if labels.shape[0] != 1: + # expand first dimension + labels = labels.unsqueeze(0) + else: + tokenized = self.tokenizer( + text, return_tensors="pt") + labels = tokenized.input_ids - del input_ids - del target_ids + all_prob = [] + for i in range(0, labels.size(1), self.stride): + begin_loc = max(i + self.stride - self.max_length, 0) + end_loc = min(i + self.stride, labels.size(1)) + trg_len = end_loc - i # may be different from stride on last loop + input_ids = labels[:, begin_loc:end_loc].to(self.device) + target_ids = input_ids.clone() + target_ids[:, :-trg_len] = -100 - for i, token_id in enumerate(labels_processed): - if token_id != -100: - probability = probabilities[0, i, token_id].item() - all_prob.append(probability) - # Should be equal to # of tokens - 1 to account for shift - assert len(all_prob) == labels.size(1) - 1 + logits = self.model(input_ids, labels=target_ids).logits.cpu() + shift_logits = logits[..., :-1, :].contiguous() + probabilities = torch.nn.functional.log_softmax(shift_logits, dim=-1) + shift_labels = target_ids[..., 1:].cpu().contiguous() + labels_processed = shift_labels[0] - return all_prob + del input_ids + del target_ids + + for i, token_id in enumerate(labels_processed): + if token_id != -100: + probability = probabilities[0, i, token_id] + if no_grads: + probability = probability.item() + all_prob.append(probability) + # Should be equal to # of tokens - 1 to account for shift + assert len(all_prob) == labels.size(1) - 1 + + if no_grads: + return all_prob + return torch.tensor(all_prob) @torch.no_grad() def get_ll(self,