Sagacify · LucieNvz · Oct 25, 2023 · Oct 19, 2023 · Oct 19, 2023 · Oct 19, 2023
diff --git a/poetry.lock b/poetry.lock
diff --git a/pyproject.toml b/pyproject.toml
@@ -18,6 +18,9 @@ tensorflow = {version = "^2.14.0", platform = "linux"}
 tensorflow-macos = {version = "^2.14.0", platform = "darwin"}
 elemeta = "1.0.7"
 torch = ">=2.0.0, !=2.0.1, !=2.1.0"
+openai = "^0.28.1"
+huggingface-hub = "^0.18.0"
+llama-cpp-python = "^0.2.11"
 en-core-web-sm = {url = "https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.7.0/en_core_web_sm-3.7.0-py3-none-any.whl"}
 fr-core-news-sm = {url = "https://github.com/explosion/spacy-models/releases/download/fr_core_news_sm-3.7.0/fr_core_news_sm-3.7.0-py3-none-any.whl"}
 

diff --git a/...m_evaluation_ml/model/helpers/__init__.py → saga_llm_evaluation_ml/helpers/__init__.py b/...m_evaluation_ml/model/helpers/__init__.py → saga_llm_evaluation_ml/helpers/__init__.py
diff --git a/...ion_ml/model/helpers/embedding_metrics.py → ...valuation_ml/helpers/embedding_metrics.py b/...ion_ml/model/helpers/embedding_metrics.py → ...valuation_ml/helpers/embedding_metrics.py
@@ -5,8 +5,8 @@ class BERTScore:
     def __init__(self, lan="en", model_type=None):
         """
         BERTScore computes a similarity score for each token in the candidate sentence with each
-        token in the reference sentence. The final score is the average of the similarity scores of
-        all tokens in the candidate sentence.
+        token in the reference sentence.
+        The final score is the average of the similarity scores of all tokens in the candidate sentence.
 
         Args:
             lan (str, optional): language to use. Defaults to "en", It may also be "fr". Depending
@@ -51,8 +51,9 @@ def compute(self, references, predictions, **kwargs):
 class MAUVE:
     def __init__(self, featurize_model_name="gpt2"):
         """
-        MAUVE score computes the difference between the candidate sentence distribution and the
-        reference sentence distribution. The bigger the MAUVE score, the better.
+        MAUVE score computes the difference between the candidate sentence distribution
+        and the reference sentence distribution.
+        The bigger the MAUVE score, the better.
         """
         self.metric = load("mauve")
         self.featurize_model_name = featurize_model_name

diff --git a/...tion_ml/model/helpers/language_metrics.py → ...evaluation_ml/helpers/language_metrics.py b/...tion_ml/model/helpers/language_metrics.py → ...evaluation_ml/helpers/language_metrics.py
@@ -7,8 +7,8 @@
     AutoTokenizer,
 )
 
-from saga_llm_evaluation_ml.model.helpers.embedding_metrics import BERTScore
-from saga_llm_evaluation_ml.model.helpers.utils import (
+from saga_llm_evaluation_ml.helpers.embedding_metrics import BERTScore
+from saga_llm_evaluation_ml.helpers.utils import (
     INVALID_QUESTION,
     NO_ANS,
     filter_questions,