From 2be80d7d23c944c90529b48c852aa7e55b7d889e Mon Sep 17 00:00:00 2001 From: Lucie Date: Thu, 19 Oct 2023 15:16:51 +0200 Subject: [PATCH] refactor(structure): refactor file structure for pylint --- .../{model => }/helpers/__init__.py | 0 .../{model => }/helpers/embedding_metrics.py | 9 +- .../{model => }/helpers/language_metrics.py | 3 +- .../{model => }/helpers/llm_metrics.py | 179 +++++++++++------- .../{model => }/helpers/utils.py | 4 +- tests/test_embedding_metrics.py | 2 +- tests/test_helpers.py | 12 +- tests/test_language_metrics.py | 2 +- tests/test_llm_metrics.py | 55 +++--- 9 files changed, 164 insertions(+), 102 deletions(-) rename saga_llm_evaluation_ml/{model => }/helpers/__init__.py (100%) rename saga_llm_evaluation_ml/{model => }/helpers/embedding_metrics.py (90%) rename saga_llm_evaluation_ml/{model => }/helpers/language_metrics.py (91%) rename saga_llm_evaluation_ml/{model => }/helpers/llm_metrics.py (68%) rename saga_llm_evaluation_ml/{model => }/helpers/utils.py (92%) diff --git a/saga_llm_evaluation_ml/model/helpers/__init__.py b/saga_llm_evaluation_ml/helpers/__init__.py similarity index 100% rename from saga_llm_evaluation_ml/model/helpers/__init__.py rename to saga_llm_evaluation_ml/helpers/__init__.py diff --git a/saga_llm_evaluation_ml/model/helpers/embedding_metrics.py b/saga_llm_evaluation_ml/helpers/embedding_metrics.py similarity index 90% rename from saga_llm_evaluation_ml/model/helpers/embedding_metrics.py rename to saga_llm_evaluation_ml/helpers/embedding_metrics.py index 6af0c38..fa06db7 100644 --- a/saga_llm_evaluation_ml/model/helpers/embedding_metrics.py +++ b/saga_llm_evaluation_ml/helpers/embedding_metrics.py @@ -4,7 +4,8 @@ class BERTScore: def __init__(self, model_type="distilbert-base-uncased"): """ - BERTScore computes a similarity score for each token in the candidate sentence with each token in the reference sentence. + BERTScore computes a similarity score for each token in the candidate sentence with each + token in the reference sentence. The final score is the average of the similarity scores of all tokens in the candidate sentence. Args: @@ -20,7 +21,8 @@ def compute(self, references, predictions, **kwargs): predictions (list): List of candidate sentences. Returns: - list: List of scores for each candidate sentence. Contains a list of scores for precisions, recalls, and F1 scores. + list: List of scores for each candidate sentence. Contains a list of scores + for precisions, recalls, and F1 scores. """ assert len(references) == len( predictions @@ -39,7 +41,8 @@ def compute(self, references, predictions, **kwargs): class MAUVE: def __init__(self, featurize_model_name="gpt2"): """ - MAUVE score computes the difference between the candidate sentence distribution and the reference sentence distribution. + MAUVE score computes the difference between the candidate sentence distribution + and the reference sentence distribution. The bigger the MAUVE score, the better. """ self.metric = load("mauve") diff --git a/saga_llm_evaluation_ml/model/helpers/language_metrics.py b/saga_llm_evaluation_ml/helpers/language_metrics.py similarity index 91% rename from saga_llm_evaluation_ml/model/helpers/language_metrics.py rename to saga_llm_evaluation_ml/helpers/language_metrics.py index 490a3f2..0d887d0 100644 --- a/saga_llm_evaluation_ml/model/helpers/language_metrics.py +++ b/saga_llm_evaluation_ml/helpers/language_metrics.py @@ -4,7 +4,8 @@ class BLEURTScore: def __init__(self, checkpoint="BLEURT-tiny"): """ - BLEURT is a learnt metric that uses BERT to compute a similarity score for each token in the candidate sentence with each token in the reference sentence. + BLEURT is a learnt metric that uses BERT to compute a similarity score for + each token in the candidate sentence with each token in the reference sentence. Args: checkpoint (str, optional): Checkpoint to use. Defaults to BLEURT-tiny if not specified. diff --git a/saga_llm_evaluation_ml/model/helpers/llm_metrics.py b/saga_llm_evaluation_ml/helpers/llm_metrics.py similarity index 68% rename from saga_llm_evaluation_ml/model/helpers/llm_metrics.py rename to saga_llm_evaluation_ml/helpers/llm_metrics.py index 3760bf5..d270915 100644 --- a/saga_llm_evaluation_ml/model/helpers/llm_metrics.py +++ b/saga_llm_evaluation_ml/helpers/llm_metrics.py @@ -4,7 +4,7 @@ class GPTScore: - def __init__(self): + def __init__(self, model="gpt2"): """ GPTScore is a metric which allows to evaluate generative models on a variety of tasks. GPTScore(h|d, a, S) = sum_{t=1}^m w_t * log p(h_t | h_{