diff --git a/docs/data_utils.html b/docs/data_utils.html index a69976b..67685cb 100644 --- a/docs/data_utils.html +++ b/docs/data_utils.html @@ -39,6 +39,9 @@
mimir.data_utils
mimir.data_utils
mimir.data_utils
mimir.data_utils
Helper function to process source name.
def sourcename_process(x: str):
+ """
+ Helper function to process source name.
+ """
return x.replace(" ", "_").replace("-", "_").lower()
Data class to load and cache datasets.
class Data:
+ """
+ Data class to load and cache datasets.
+ """
def __init__(self, name,
config: ExperimentConfig,
presampled: str = None,
@@ -508,6 +526,9 @@ Classes
model: str = "bert",
in_place_swap: bool = False,
):
+ """
+ Load neighbors from cache (local or from HF)
+ """
data_split = "train" if train else "test"
data_split += "_neighbors"
filename = self._get_name_to_save() + "_neighbors_{}_{}".format(
@@ -535,6 +556,9 @@ Classes
model: str = "bert",
in_place_swap: bool = False,
):
+ """
+ Dump neighbors to cache local cache.
+ """
data_split = "train" if train else "test"
data_split += "_neighbors"
filename = self._get_name_to_save() + "_neighbors_{}_{}".format(
@@ -739,7 +763,7 @@ Methods
def dump_neighbors(self, data, train: bool, num_neighbors: int, model: str = 'bert', in_place_swap: bool = False)
Dump neighbors to cache local cache.
Load neighbors from cache (local or from HF)
mimir.models
mimir.models
mimir.models
mimir.models
-def get_min_k_prob(self, text: str, tokens=None, probs=None, k=0.2, window=1, stride=1)
-
@torch.no_grad()
-def get_min_k_prob(self, text: str, tokens=None, probs=None, k=.2, window=1, stride=1):
- all_prob = probs if probs is not None else self.get_probabilities(text, tokens=tokens)
- # iterate through probabilities by ngram defined by window size at given stride
- ngram_probs = []
- for i in range(0, len(all_prob) - window + 1, stride):
- ngram_prob = all_prob[i:i+window]
- ngram_probs.append(np.mean(ngram_prob))
- min_k_probs = sorted(ngram_probs)[:int(len(ngram_probs) * k)]
-
- return -np.mean(min_k_probs)
-
def get_rank(self, text: str, log: bool = False)
-def get_zlib_entropy(self, text: str, tokens=None, probs=None)
-
@torch.no_grad()
-def get_zlib_entropy(self, text: str, tokens=None, probs=None):
- zlib_entropy = len(zlib.compress(bytes(text, 'utf-8')))
- return self.get_ll(text, tokens=tokens, probs=probs) / zlib_entropy
-
def sample_from_model(self, texts: List[str], **kwargs)
var api_calls
Get the number of tokens used in API calls
@property
def api_calls(self):
+ """
+ Get the number of tokens used in API calls
+ """
return self.API_TOKEN_COUNTER
+def load(self)
+
Load reference model noto GPU(s)
def load(self):
+ """
+ Load reference model noto GPU(s)
+ """
+ if "llama" not in self.name and "alpaca" not in self.name:
+ super().load()
+
+def unload(self)
+
Unload reference model from GPU(s)
def unload(self):
+ """
+ Unload reference model from GPU(s)
+ """
+ if "llama" not in self.name and "alpaca" not in self.name:
+ super().unload()
+Model
:
@@ -2043,11 +2028,9 @@ forward
get_ll
get_probabilities
load
load_base_model_and_tokenizer
load_model_properties
to
unload
get_entropy
get_lls
get_max_norm
get_min_k_prob
get_rank
get_ref
get_zlib_entropy
sample_from_model
training
call_super_init
dump_patches
+load
training
+unload
diff --git a/docs/utils.html b/docs/utils.html
index fc83948..652a515 100644
--- a/docs/utils.html
+++ b/docs/utils.html
@@ -41,6 +41,9 @@ Module mimir.utils
def fix_seed(seed: int = 0):
+ """
+ Fix seed for reproducibility.
+ """
ch.manual_seed(seed)
np.random.seed(seed)
random.seed(seed)
@@ -79,12 +82,15 @@ Functions
def fix_seed(seed: int = 0)
Fix seed for reproducibility.
def fix_seed(seed: int = 0):
+ """
+ Fix seed for reproducibility.
+ """
ch.manual_seed(seed)
np.random.seed(seed)
random.seed(seed)