Merge pull request #15 from Sagacify/make-publishing-public

feat: make publishing public to PyPI
Sagacify · Jun 4, 2024 · 094648b · 094648b
2 parents 1f16275 + 2d3b3c7
commit 094648b
Show file tree

Hide file tree

Showing 21 changed files with 47 additions and 46 deletions.
diff --git a/.github/workflows/cd.yaml b/.github/workflows/cd.yaml
@@ -52,4 +52,4 @@ jobs:
 
       - name: Push to private PyPI registry
         if: ${{ steps.new_version.outputs.version != steps.current_version.outputs.version }}
-        run: poetry publish --repository sagacify
+        run: poetry publish
diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml
@@ -40,11 +40,11 @@ jobs:
 
       - name: Run formatter
         run: |
-          poetry run black --check saga_llm_evaluation_ml tests
+          poetry run black --check saga_llm_evaluation tests
 
       - name: Run linter
         run: |
-          poetry run pylint saga_llm_evaluation_ml tests
+          poetry run pylint saga_llm_evaluation tests
 
       - name: Run tests
         run: |

diff --git a/.pylintrc b/.pylintrc
@@ -98,7 +98,6 @@ disable=missing-module-docstring,
         fixme,
         unspecified-encoding,
         duplicate-code,
-        no-self-use,
         too-few-public-methods,
         attribute-defined-outside-init
 

diff --git a/README.md b/README.md
@@ -28,7 +28,7 @@ Each of these metrics uses the [LLAMA model](https://ai.meta.com/llama/) to eval
 ## Installation
 To install the Saga LLM Evaluation ML library, use the following command:
 
-```pip install saga_llm_evaluation_ml```
+```pip install saga-llm-evaluation```
 
 Be aware that by default the library will run pytorch on the CPU. If you want to run it on the GPU, you need to install pytorch with GPU support. You can find the instructions [here](https://pytorch.org/get-started/locally/).
 
@@ -40,7 +40,7 @@ The Scorer is a class that allows you to run multiple metrics at once. The metri
 
 
 ```python
-from saga_llm_evaluation_ml import LLMScorer
+from saga_llm_evaluation import LLMScorer
 scorer = LLMScorer(
     metrics = ["bertscore", "mauve", "bleurtscore", "q_squared", "selcheckgpt", "geval", "gptscore"],
     model = transformers.PreTrainedModel, # language model that inherits from transformers.PreTrainedModel which needs to be evaluated. Needed for SelCheck-GPT
@@ -94,7 +94,7 @@ scorer.score(
 
 ### BERTScore
 ```python
-from saga_llm_evaluation_ml import BERTScore
+from saga_llm_evaluation import BERTScore
 
 bert_score = BERTScore()
 scores = bert_score.compute(
@@ -105,7 +105,7 @@ scores = bert_score.compute(
 
 ### MAUVE
 ```python
-from saga_llm_evaluation_ml import MAUVE
+from saga_llm_evaluation import MAUVE
 mauve = MAUVE()
 scores = mauve.compute(
     references=["This is a reference sentence"],
@@ -115,7 +115,7 @@ scores = mauve.compute(
 
 ### BLEURTScore
 ```python
-from saga_llm_evaluation_ml import BLEURTScore
+from saga_llm_evaluation import BLEURTScore
 bleurt_score = BLEURTScore()
 scores = bleurt_score.compute(
     references=["This is a reference sentence"],
@@ -125,7 +125,7 @@ scores = bleurt_score.compute(
 
 ### Q-Squared
 ```python
-from saga_llm_evaluation_ml import QSquared
+from saga_llm_evaluation import QSquared
 q_squared = QSquared()
 scores = q_squared.compute(
     knowledges=["This is the text gave to the LLM as knowledge"],
@@ -135,7 +135,7 @@ scores = q_squared.compute(
 
 ### SelCheck-GPT
 ```python
-from saga_llm_evaluation_ml import SelCheckGPT
+from saga_llm_evaluation import SelCheckGPT
 selcheck_gpt = SelCheckGPT(
     model = transformers.PreTrainedModel, # language model that inherits from transformers.PreTrainedModel which needs to be evaluated.
     eval_model = transformers.PreTrainedModel, # language model that inherits from transformers.PreTrainedModel which is used to evaluate the model.
@@ -148,7 +148,7 @@ scores = selcheck_gpt.compute(
 
 ### G-Eval
 ```python
-from saga_llm_evaluation_ml import GEval
+from saga_llm_evaluation import GEval
 g_eval = GEval(
     model = transformers.PreTrainedModel, # language model that inherits from transformers.PreTrainedModel which is used to evaluate the model.
 )
@@ -175,7 +175,7 @@ scores = g_eval.compute(
 
 ### GPT-Score
 ```python
-from saga_llm_evaluation_ml import GPTScore
+from saga_llm_evaluation import GPTScore
 gpt_score = GPTScore(
     model = transformers.PreTrainedModel, # language model that inherits from transformers.PreTrainedModel which is used to evaluate the model.
 )
@@ -206,7 +206,7 @@ You can use a different LLAMA model as evaluator by using the get_llama_model fu
 The full list of quantized LLAMA models that may be used is available [here](https://huggingface.co/TheBloke/Llama-2-7b-Chat-GGUF).
 
 ```python
-from saga_llm_evaluation_ml import get_llama_model
+from saga_llm_evaluation import get_llama_model
 
 llama_model = get_llama_model(
     repo_id = "TheBloke/Llama-2-7b-Chat-GGUF", 
@@ -219,7 +219,7 @@ You can also download the LLAMA model manually and specify the local path to the
 ```huggingface-cli download TheBloke/Llama-2-7b-Chat-GGUF llama-2-7b-chat.Q2_K.gguf --local-dir path_to_model_folder --local-dir-use-symlinks False```
 
 ```python
-from saga_llm_evaluation_ml import get_llama_model
+from saga_llm_evaluation import get_llama_model
 
 llama_model = get_llama_model(
     model_path = "path_to_model_folder", 

diff --git a/pyproject.toml b/pyproject.toml
@@ -1,9 +1,12 @@
 [tool.poetry]
-name = "saga_llm_evaluation_ml"
+name = "saga-llm-evaluation"
 version = "0.7.2"
 description = "Versatile Python library designed for evaluating the performance of large language models in Natural Language Processing (NLP) tasks. Developed by Sagacify"
 readme = "README.md"
-authors = ["Leonardo Remondini <[email protected]>", "Lucie Navez <[email protected]>"]
+authors = [
+    "Leonardo Remondini <[email protected]>",
+    "Lucie Navez <[email protected]>",
+]
 
 [tool.poetry.dependencies]
 python = ">=3.9,<3.11"
@@ -13,15 +16,15 @@ spacy = "^3.1.3"
 evaluate = "^0.4.1"
 mauve-text = "^0.3.0"
 bert-score = "^0.3.13"
-bleurt = {git = "https://github.com/google-research/bleurt.git"}
+bleurt = { git = "https://github.com/google-research/bleurt.git" }
 torch = "2.1.1"
-tensorflow = {version = "^2.14", markers = "sys_platform == 'linux'"}
-tensorflow-macos = {version = "^2.14", markers = "sys_platform == 'darwin' and platform_machine == 'arm64'"}
+tensorflow = { version = "^2.14", markers = "sys_platform == 'linux'" }
+tensorflow-macos = { version = "^2.14", markers = "sys_platform == 'darwin' and platform_machine == 'arm64'" }
 elemeta = "1.0.7"
 huggingface-hub = "^0.18.0"
 llama-cpp-python = "^0.2.11"
-en-core-web-sm = {url = "https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.7.0/en_core_web_sm-3.7.0-py3-none-any.whl"}
-fr-core-news-sm = {url = "https://github.com/explosion/spacy-models/releases/download/fr_core_news_sm-3.7.0/fr_core_news_sm-3.7.0-py3-none-any.whl"}
+en-core-web-sm = { url = "https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.7.0/en_core_web_sm-3.7.0-py3-none-any.whl" }
+fr-core-news-sm = { url = "https://github.com/explosion/spacy-models/releases/download/fr_core_news_sm-3.7.0/fr_core_news_sm-3.7.0-py3-none-any.whl" }
 faiss-cpu = "^1.7.4"
 
 [tool.poetry.dev-dependencies]
@@ -31,12 +34,6 @@ black = "^22.10.0"
 pytest = "^7.1.3"
 pytest-env = "^0.8.1"
 
-
-[[tool.poetry.source]]
-name = "sagacify"
-url = "https://pypiserver.sagacify.com/"
-priority = "supplemental"
-
 [tool.poetry.group.dev.dependencies]
 pytest = "^7.4.2"
 jupyterlab = "^4.0.7"
@@ -46,7 +43,7 @@ requires = ["poetry-core>=1.0.0"]
 build-backend = "poetry.core.masonry.api"
 
 [tool.semantic_release]
-version_variable = "saga_llm_evaluation_ml/__init__.py:__version__"
+version_variable = "saga_llm_evaluation/__init__.py:__version__"
 branch = "master"
 version_toml = "pyproject.toml:tool.poetry.version"
 commit_subject = 'chore(release): Release v{version} [skip ci]'

diff --git a/saga_llm_evaluation_ml/__init__.py → saga_llm_evaluation/__init__.py b/saga_llm_evaluation_ml/__init__.py → saga_llm_evaluation/__init__.py
diff --git a/saga_llm_evaluation_ml/helpers/__init__.py → saga_llm_evaluation/helpers/__init__.py b/saga_llm_evaluation_ml/helpers/__init__.py → saga_llm_evaluation/helpers/__init__.py
diff --git a/...valuation_ml/helpers/embedding_metrics.py → ...m_evaluation/helpers/embedding_metrics.py b/...valuation_ml/helpers/embedding_metrics.py → ...m_evaluation/helpers/embedding_metrics.py
diff --git a/...evaluation_ml/helpers/language_metrics.py → ...lm_evaluation/helpers/language_metrics.py b/...evaluation_ml/helpers/language_metrics.py → ...lm_evaluation/helpers/language_metrics.py
@@ -7,8 +7,8 @@
     AutoTokenizer,
 )
 
-from saga_llm_evaluation_ml.helpers.embedding_metrics import BERTScore
-from saga_llm_evaluation_ml.helpers.utils import (
+from saga_llm_evaluation.helpers.embedding_metrics import BERTScore
+from saga_llm_evaluation.helpers.utils import (
     INVALID_QUESTION,
     NO_ANS,
     check_list_type,

diff --git a/..._llm_evaluation_ml/helpers/llm_metrics.py → saga_llm_evaluation/helpers/llm_metrics.py b/..._llm_evaluation_ml/helpers/llm_metrics.py → saga_llm_evaluation/helpers/llm_metrics.py
@@ -2,7 +2,7 @@
 from huggingface_hub import hf_hub_download
 from llama_cpp import Llama
 
-from saga_llm_evaluation_ml.helpers.utils import check_list_type
+from saga_llm_evaluation.helpers.utils import check_list_type
 
 # pylint: disable=consider-iterating-dictionary
 # pylint: disable=too-many-locals

diff --git a/saga_llm_evaluation_ml/helpers/utils.py → saga_llm_evaluation/helpers/utils.py b/saga_llm_evaluation_ml/helpers/utils.py → saga_llm_evaluation/helpers/utils.py
diff --git a/saga_llm_evaluation_ml/model/__init__.py → saga_llm_evaluation/resources/__init__.py b/saga_llm_evaluation_ml/model/__init__.py → saga_llm_evaluation/resources/__init__.py
diff --git a/saga_llm_evaluation_ml/score.py → saga_llm_evaluation/score.py b/saga_llm_evaluation_ml/score.py → saga_llm_evaluation/score.py
@@ -1,7 +1,7 @@
-from saga_llm_evaluation_ml.helpers.embedding_metrics import MAUVE, BERTScore
-from saga_llm_evaluation_ml.helpers.language_metrics import BLEURTScore, QSquared
-from saga_llm_evaluation_ml.helpers.llm_metrics import GEval, GPTScore, SelfCheckGPT
-from saga_llm_evaluation_ml.helpers.utils import (
+from saga_llm_evaluation.helpers.embedding_metrics import MAUVE, BERTScore
+from saga_llm_evaluation.helpers.language_metrics import BLEURTScore, QSquared
+from saga_llm_evaluation.helpers.llm_metrics import GEval, GPTScore, SelfCheckGPT
+from saga_llm_evaluation.helpers.utils import (
     MetadataExtractor,
     check_list_type,
     filter_class_input,
@@ -65,7 +65,7 @@ def __init__(
     ) -> None:
 
         self.config = (
-            config if config else load_json("./saga_llm_evaluation_ml/scorer.json")
+            config if config else load_json("./saga_llm_evaluation/scorer.json")
         )
         assert isinstance(metrics, list), "metrics must be a list."
         assert isinstance(self.config, dict), "config file must be a dict."

diff --git a/saga_llm_evaluation_ml/scorer.json → saga_llm_evaluation/scorer.json b/saga_llm_evaluation_ml/scorer.json → saga_llm_evaluation/scorer.json
diff --git a/saga_llm_evaluation_ml/resources/__init__.py b/saga_llm_evaluation_ml/resources/__init__.py
diff --git a/tests/__init__.py b/tests/__init__.py
@@ -1,7 +1,7 @@
 import os
 import sys
 
-from saga_llm_evaluation_ml.helpers.utils import get_llama_model
+from saga_llm_evaluation.helpers.utils import get_llama_model
 
 MODULE_ROOT = os.path.abspath("/www/app/src")
 sys.path.append(MODULE_ROOT)

diff --git a/tests/test_embedding_metrics.py b/tests/test_embedding_metrics.py
@@ -1,6 +1,6 @@
 import unittest
 
-from saga_llm_evaluation_ml.helpers.embedding_metrics import MAUVE, BERTScore
+from saga_llm_evaluation.helpers.embedding_metrics import MAUVE, BERTScore
 
 
 class TestBERTScore(unittest.TestCase):

diff --git a/tests/test_helpers.py b/tests/test_helpers.py
@@ -1,6 +1,6 @@
 import unittest
 
-from saga_llm_evaluation_ml.helpers.utils import MetadataExtractor
+from saga_llm_evaluation.helpers.utils import MetadataExtractor
 
 
 class TestMetadataExtractor(unittest.TestCase):

diff --git a/tests/test_language_metrics.py b/tests/test_language_metrics.py
@@ -1,6 +1,11 @@
 import unittest
 
-from saga_llm_evaluation_ml.helpers.language_metrics import BLEURTScore, QSquared
+import pytest
+
+from saga_llm_evaluation.helpers.language_metrics import BLEURTScore, QSquared
+
+# skip it for github actions, too many resources needed. Test locally
+pytest.skip(allow_module_level=True)
 
 
 class TestBLEURTScore(unittest.TestCase):

diff --git a/tests/test_llm_metrics.py b/tests/test_llm_metrics.py
@@ -2,7 +2,7 @@
 
 import pytest
 
-from saga_llm_evaluation_ml.helpers.llm_metrics import GEval, GPTScore, SelfCheckGPT
+from saga_llm_evaluation.helpers.llm_metrics import GEval, GPTScore, SelfCheckGPT
 from tests import LLAMA_MODEL
 
 # skip it for github actions, too many resources needed. Test locally

diff --git a/tests/test_score.py b/tests/test_score.py
@@ -2,8 +2,8 @@
 
 import pytest
 
-from saga_llm_evaluation_ml.helpers.utils import load_json
-from saga_llm_evaluation_ml.score import LLMScorer
+from saga_llm_evaluation.helpers.utils import load_json
+from saga_llm_evaluation.score import LLMScorer
 from tests import LLAMA_MODEL
 
 # skip it for github actions, too many resources needed. Test locally
@@ -27,7 +27,7 @@ def test_score_bad_arguments(self):
         knowledge = "You are a cat. You don't like dogs."
         prediction = "I am a cat, I don't like dogs."
         reference = "I am a cat, I don't like dogs, miau."
-        config = load_json("saga_llm_evaluation_ml/scorer.json")
+        config = load_json("saga_llm_evaluation/scorer.json")
 
         with self.assertRaises(AssertionError):
             self.scorer.score(False, knowledge, prediction, reference, config)
@@ -56,7 +56,7 @@ def test_score(self):
         knowledge = "Example: Eww, I hate dogs."
         prediction = "I am a cat, I don't like dogs."
         reference = "I am a cat, I don't like dogs, miau."
-        config = load_json("saga_llm_evaluation_ml/scorer.json")
+        config = load_json("saga_llm_evaluation/scorer.json")
 
         scores = self.scorer.score(
             user_prompt=user_prompt,