From aece367c3b06029be6aa4dc4cde35bf2db5906ca Mon Sep 17 00:00:00 2001 From: Anshuman Suri Date: Tue, 6 Feb 2024 22:44:11 -0500 Subject: [PATCH] Remove misc. file(s) --- mimir/attacks/tokenization.py | 22 ---------------------- 1 file changed, 22 deletions(-) delete mode 100644 mimir/attacks/tokenization.py diff --git a/mimir/attacks/tokenization.py b/mimir/attacks/tokenization.py deleted file mode 100644 index 0c6ab6c..0000000 --- a/mimir/attacks/tokenization.py +++ /dev/null @@ -1,22 +0,0 @@ -""" - Implementation of tokenization-based attack (unpublished) -""" -from llm_di.model_utils import AlternativeTokenizationsGenerator -from llm_di.config import TokenizerConfig -import numpy as np - - -def token_attack(model, doc): - # Create config - config = TokenizerConfig(stochastic=True, num_tokenizations=100, max_tokens=2048) - # Wrap our model and tokenizer into object) - attacker = AlternativeTokenizationsGenerator(model.tokenizer, config) - - # Get alt tokenizations - alt_toks = attacker.get_tokenizations(doc) - # Covert to strings - alt_strings = model.tokenizer.batch_decode(alt_toks) - # Get scores - scores = model.get_lls(alt_strings) - # return np.mean(scores) - return np.array(scores)