feat(structure): improve code structure (second review)

fmind · Mar 16, 2024 · 20671c7 · 20671c7
1 parent f568fbc
commit 20671c7
Show file tree

Hide file tree

Showing 24 changed files with 139 additions and 134 deletions.
diff --git a/src/bikes/core/models.py b/src/bikes/core/models.py
@@ -155,7 +155,7 @@ def fit(self, inputs: schemas.Inputs, targets: schemas.Targets) -> "BaselineSkle
     @T.override
     def predict(self, inputs: schemas.Inputs) -> schemas.Outputs:
         model = self.get_internal_model()
-        prediction = model.predict(inputs)  # np.ndarray
+        prediction = model.predict(inputs)
         outputs = schemas.Outputs(
             {schemas.OutputsSchema.prediction: prediction}, index=inputs.index
         )

diff --git a/src/bikes/io/registries.py b/src/bikes/io/registries.py
@@ -57,7 +57,7 @@ class Saver(abc.ABC, pdt.BaseModel, strict=True, frozen=True, extra="forbid"):
     e.g., to switch between serialization flavors.
 
     Parameters:
-        path (str): model path inside the MLflow store.
+        path (str): model path inside the Mlflow store.
     """
 
     KIND: str
@@ -81,15 +81,15 @@ def save(
 
 
 class CustomSaver(Saver):
-    """Saver for project models using the MLflow PyFunc module.
+    """Saver for project models using the Mlflow PyFunc module.
 
     https://mlflow.org/docs/latest/python_api/mlflow.pyfunc.html
     """
 
     KIND: T.Literal["CustomSaver"] = "CustomSaver"
 
     class Adapter(mlflow.pyfunc.PythonModel):  # type: ignore[misc]
-        """Adapt a custom model to the MLflow PyFunc flavor for saving operations.
+        """Adapt a custom model to the Mlflow PyFunc flavor for saving operations.
 
         https://mlflow.org/docs/latest/python_api/mlflow.pyfunc.html?#mlflow.pyfunc.PythonModel
         """
@@ -134,12 +134,12 @@ def save(
 
 
 class BuiltinSaver(Saver):
-    """Saver for built-in models using an MLflow flavor module.
+    """Saver for built-in models using an Mlflow flavor module.
 
     https://mlflow.org/docs/latest/models.html#built-in-model-flavors
 
     Parameters:
-        flavor (str): MLflow flavor module to use for the serialization.
+        flavor (str): Mlflow flavor module to use for the serialization.
     """
 
     KIND: T.Literal["BuiltinSaver"] = "BuiltinSaver"
@@ -201,7 +201,7 @@ def load(self, uri: str) -> "Loader.Adapter":
 
 
 class CustomLoader(Loader):
-    """Loader for custom models using the MLflow PyFunc module.
+    """Loader for custom models using the Mlflow PyFunc module.
 
     https://mlflow.org/docs/latest/python_api/mlflow.pyfunc.html
     """
@@ -233,9 +233,9 @@ def load(self, uri: str) -> "CustomLoader.Adapter":
 
 
 class BuiltinLoader(Loader):
-    """Loader for built-in models using the MLflow PyFunc module.
+    """Loader for built-in models using the Mlflow PyFunc module.
 
-    Note: use MLflow PyFunc instead of flavors to use standard API.
+    Note: use Mlflow PyFunc instead of flavors to use standard API.
 
     https://mlflow.org/docs/latest/models.html#built-in-model-flavors
     """
@@ -298,17 +298,17 @@ def register(self, name: str, model_uri: str) -> Version:
         """
 
 
-class MLflowRegister(Register):
-    """Register for models in the MLflow Model Registry.
+class MlflowRegister(Register):
+    """Register for models in the Mlflow Model Registry.
 
     https://mlflow.org/docs/latest/model-registry.html
     """
 
-    KIND: T.Literal["MLflowRegister"] = "MLflowRegister"
+    KIND: T.Literal["MlflowRegister"] = "MlflowRegister"
 
     @T.override
     def register(self, name: str, model_uri: str) -> Version:
         return mlflow.register_model(name=name, model_uri=model_uri, tags=self.tags)
 
 
-RegisterKind = MLflowRegister
+RegisterKind = MlflowRegister
diff --git a/src/bikes/io/services.py b/src/bikes/io/services.py
@@ -81,12 +81,12 @@ def logger(self) -> loguru.Logger:
         return loguru.logger
 
 
-class MLflowService(Service):
-    """Service for MLflow tracking and registry.
+class MlflowService(Service):
+    """Service for Mlflow tracking and registry.
 
     Parameters:
-        tracking_uri (str): the URI for the MLflow tracking server.
-        registry_uri (str): the URI for the MLflow model registry.
+        tracking_uri (str): the URI for the Mlflow tracking server.
+        registry_uri (str): the URI for the Mlflow model registry.
         experiment_name (str): the name of tracking experiment.
         registry_name (str): the name of model registry.
         autolog_disable (bool): disable autologging.
@@ -96,9 +96,24 @@ class MLflowService(Service):
         autolog_log_model_signatures (bool): If True, logs model signatures during autologging.
         autolog_log_models (bool): If True, enables logging of models during autologging.
         autolog_log_datasets (bool): If True, logs datasets used during autologging.
-        autolog_silent (bool): If True, suppresses all MLflow warnings during autologging.
+        autolog_silent (bool): If True, suppresses all Mlflow warnings during autologging.
     """
 
+    class RunConfig(pdt.BaseModel, strict=True, frozen=True, extra="forbid"):
+        """Run configuration for Mlflow tracking.
+
+        Parameters:
+            name (str): name of the run.
+            description (str | None): description of the run.
+            tags (dict[str, T.Any] | None): tags for the run.
+            log_system_metrics (bool | None): enable system metrics logging.
+        """
+
+        name: str
+        description: str | None = None
+        tags: dict[str, T.Any] | None = None
+        log_system_metrics: bool | None = None
+
     # server uri
     tracking_uri: str = "./mlruns"
     registry_uri: str = "./mlruns"
@@ -135,31 +150,25 @@ def start(self) -> None:
         )
 
     @ctx.contextmanager
-    def run(
-        self,
-        name: str,
-        description: str | None = None,
-        tags: dict[str, T.Any] | None = None,
-        log_system_metrics: bool | None = None,
-    ) -> T.Generator[mlflow.ActiveRun, None, None]:
-        """Yield an active MLflow run and exit it afterwards.
+    def run_context(self, run_config: RunConfig) -> T.Generator[mlflow.ActiveRun, None, None]:
+        """Yield an active Mlflow run and exit it afterwards.
 
         Args:
-            name (str): name of the run.
-            description (str | None, optional): description of the run. Defaults to None.
-            tags (dict[str, T.Any] | None, optional): dict of tags of the run. Defaults to None.
-            log_system_metrics (bool | None, optional): enable system metrics logging. Defaults to None.
+            run (str): run parameters.
 
         Yields:
             T.Generator[mlflow.ActiveRun, None, None]: active run context. Will be closed as the end of context.
         """
         with mlflow.start_run(
-            run_name=name, description=description, tags=tags, log_system_metrics=log_system_metrics
+            run_name=run_config.name,
+            tags=run_config.tags,
+            description=run_config.description,
+            log_system_metrics=run_config.log_system_metrics,
         ) as run:
             yield run
 
     def client(self) -> mt.MlflowClient:
-        """Return a new MLflow client.
+        """Return a new Mlflow client.
 
         Returns:
             MlflowClient: the mlflow client.

diff --git a/src/bikes/jobs/base.py b/src/bikes/jobs/base.py
@@ -26,13 +26,13 @@ class Job(abc.ABC, pdt.BaseModel, strict=True, frozen=True, extra="forbid"):
 
     Parameters:
         logger_service (services.LoggerService): manage the logging system.
-        mlflow_service (services.MLflowService): manage the mlflow system.
+        mlflow_service (services.MlflowService): manage the mlflow system.
     """
 
     KIND: str
 
     logger_service: services.LoggerService = services.LoggerService()
-    mlflow_service: services.MLflowService = services.MLflowService()
+    mlflow_service: services.MlflowService = services.MlflowService()
 
     def __enter__(self) -> T.Self:
         """Enter the job context.
@@ -43,7 +43,7 @@ def __enter__(self) -> T.Self:
         self.logger_service.start()
         logger = self.logger_service.logger()
         logger.debug("[START] Logger service: {}", self.logger_service)
-        logger.debug("[START] MLflow service: {}", self.mlflow_service)
+        logger.debug("[START] Mlflow service: {}", self.mlflow_service)
         self.mlflow_service.start()
         return self
 
@@ -64,7 +64,7 @@ def __exit__(
             T.Literal[False]: always propagate exceptions.
         """
         logger = self.logger_service.logger()
-        logger.debug("[STOP] MLflow service: {}", self.mlflow_service)
+        logger.debug("[STOP] Mlflow service: {}", self.mlflow_service)
         self.mlflow_service.stop()
         logger.debug("[STOP] Logger service: {}", self.logger_service)
         self.logger_service.stop()

diff --git a/src/bikes/jobs/promotion.py b/src/bikes/jobs/promotion.py
@@ -21,8 +21,8 @@ class PromotionJob(base.Job):
 
     KIND: T.Literal["PromotionJob"] = "PromotionJob"
 
-    version: int | None = None
     alias: str = "Champion"
+    version: int | None = None
 
     @T.override
     def run(self) -> base.Locals:

diff --git a/src/bikes/jobs/training.py b/src/bikes/jobs/training.py
@@ -8,7 +8,7 @@
 
 from bikes.core import metrics as metrics_
 from bikes.core import models, schemas
-from bikes.io import datasets, registries
+from bikes.io import datasets, registries, services
 from bikes.jobs import base
 from bikes.utils import signers, splitters
 
@@ -19,9 +19,7 @@ class TrainingJob(base.Job):
     """Train and register a single AI/ML model.
 
     Parameters:
-        run_name (str): name of the run.
-        run_description (str, optional): description of the run.
-        run_tags: (dict[str, T.Any], optional): tags for the run.
+        run_config (services.MlflowService.RunConfig): mlflow run config.
         inputs (datasets.ReaderKind): reader for the inputs data.
         targets (datasets.ReaderKind): reader for the targets data.
         model (models.ModelKind): machine learning model to train.
@@ -35,9 +33,7 @@ class TrainingJob(base.Job):
     KIND: T.Literal["TrainingJob"] = "TrainingJob"
 
     # Run
-    run_name: str = "Tuning"
-    run_description: str | None = None
-    run_tags: dict[str, T.Any] | None = None
+    run_config: services.MlflowService.RunConfig = services.MlflowService.RunConfig(name="Training")
     # Data
     inputs: datasets.ReaderKind = pdt.Field(..., discriminator="KIND")
     targets: datasets.ReaderKind = pdt.Field(..., discriminator="KIND")
@@ -55,7 +51,7 @@ class TrainingJob(base.Job):
     signer: signers.SignerKind = pdt.Field(signers.InferSigner(), discriminator="KIND")
     # Registrer
     # - avoid shadowing pydantic `register` pydantic function
-    registry: registries.RegisterKind = pdt.Field(registries.MLflowRegister(), discriminator="KIND")
+    registry: registries.RegisterKind = pdt.Field(registries.MlflowRegister(), discriminator="KIND")
 
     @T.override
     def run(self) -> base.Locals:
@@ -65,9 +61,7 @@ def run(self) -> base.Locals:
         logger.info("With logger: {}", logger)
         # - mlflow
         client = self.mlflow_service.client()
-        with self.mlflow_service.run(
-            name=self.run_name, description=self.run_description, tags=self.run_tags
-        ) as run:
+        with self.mlflow_service.run_context(run_config=self.run_config) as run:
             logger.info("With mlflow run id: {}", run.info.run_id)
             # data
             # - inputs

diff --git a/src/bikes/jobs/tuning.py b/src/bikes/jobs/tuning.py
@@ -7,7 +7,7 @@
 import pydantic as pdt
 
 from bikes.core import metrics, models, schemas
-from bikes.io import datasets
+from bikes.io import datasets, services
 from bikes.jobs import base
 from bikes.utils import searchers, splitters
 
@@ -18,9 +18,7 @@ class TuningJob(base.Job):
     """Find the best hyperparameters for a model.
 
     Parameters:
-        run_name (str): name of the run.
-        run_description (str, optional): description of the run.
-        run_tags: (dict[str, T.Any], optional): tags for the run.
+        run_config (services.MlflowService.RunConfig): mlflow run config.
         inputs (datasets.ReaderKind): reader for the inputs data.
         targets (datasets.ReaderKind): reader for the targets data.
         model (models.ModelKind): machine learning model to tune.
@@ -32,9 +30,7 @@ class TuningJob(base.Job):
     KIND: T.Literal["TuningJob"] = "TuningJob"
 
     # Run
-    run_name: str = "Tuning"
-    run_description: str | None = None
-    run_tags: dict[str, T.Any] | None = None
+    run_config: services.MlflowService.RunConfig = services.MlflowService.RunConfig(name="Tuning")
     # Data
     inputs: datasets.ReaderKind = pdt.Field(..., discriminator="KIND")
     targets: datasets.ReaderKind = pdt.Field(..., discriminator="KIND")
@@ -64,9 +60,7 @@ def run(self) -> base.Locals:
         logger = self.logger_service.logger()
         logger.info("With logger: {}", logger)
         # - mlflow
-        with self.mlflow_service.run(
-            name=self.run_name, description=self.run_description, tags=self.run_tags
-        ) as run:
+        with self.mlflow_service.run_context(run_config=self.run_config) as run:
             logger.info("With mlflow run id: {}", run.info.run_id)
             # data
             # - inputs

diff --git a/src/bikes/utils/searchers.py b/src/bikes/utils/searchers.py
@@ -60,17 +60,17 @@ def search(
             metric (metrics.Metric): main metric to optimize.
             inputs (schemas.Inputs): model inputs for tuning.
             targets (schemas.Targets): model targets for tuning.
-            cv (CrossValidation): structure for cross-folds strategy.
+            cv (CrossValidation): choice for cross-fold validation.
 
         Returns:
-            Results: all the results of the searcher process.
+            Results: all the results of the searcher execution process.
         """
 
 
 class GridCVSearcher(Searcher):
     """Grid searcher with cross-fold validation.
 
-    Metric should return higher values for better models.
+    Convention: metric returns higher values for better models.
 
     Parameters:
         n_jobs (int, optional): number of jobs to run in parallel.

diff --git a/src/bikes/utils/signers.py b/src/bikes/utils/signers.py
@@ -22,7 +22,7 @@ class Signer(abc.ABC, pdt.BaseModel, strict=True, frozen=True, extra="forbid"):
     """Base class for generating model signatures.
 
     Allow to switch between model signing strategies.
-    e.g., automatic inference, manual signatures, ...
+    e.g., automatic inference, manual model signature, ...
 
     https://mlflow.org/docs/latest/models.html#model-signature-and-input-example
     """

diff --git a/src/bikes/utils/splitters.py b/src/bikes/utils/splitters.py
@@ -67,7 +67,7 @@ class TrainTestSplitter(Splitter):
     """Split a dataframe into a train and test set.
 
     Parameters:
-        shuffle (bool): shuffle dataset before splitting it.
+        shuffle (bool): shuffle the dataset. Default is False.
         test_size (int | float): number/ratio for the test set.
         random_state (int): random state for the splitter object.
     """

diff --git a/tasks/cleans.py b/tasks/cleans.py
@@ -30,7 +30,7 @@ def pytest(ctx: Context) -> None:
 
 @task
 def coverage(ctx: Context) -> None:
-    """Clean coverage tool."""
+    """Clean the coverage tool."""
     ctx.run("rm -f .coverage*")
 
 
@@ -104,7 +104,7 @@ def folders(_: Context) -> None:
 
 @task(pre=[venv, poetry, python])
 def sources(_: Context) -> None:
-    """Run all folders tasks."""
+    """Run all sources tasks."""
 
 
 @task(pre=[tools, folders], default=True)

diff --git a/tasks/formats.py b/tasks/formats.py
@@ -10,7 +10,7 @@
 
 @task
 def code(ctx: Context) -> None:
-    """Format code with ruff."""
+    """Format python code with ruff."""
     ctx.run("poetry run ruff format src/ tasks/ tests/")
 
 

diff --git a/tasks/installs.py b/tasks/installs.py
@@ -16,7 +16,7 @@ def poetry(ctx: Context) -> None:
 
 @task
 def pre_commit(ctx: Context) -> None:
-    """Run pre-commit install."""
+    """Install pre-commit hooks on git."""
     ctx.run("poetry run pre-commit install --hook-type pre-push")
     ctx.run("poetry run pre-commit install --hook-type commit-msg")
 

diff --git a/tasks/mlflow.py b/tasks/mlflow.py
@@ -18,7 +18,7 @@ def doctor(ctx: Context) -> None:
 def serve(
     ctx: Context, host: str = "127.0.0.1", port: str = "5000", backend_uri: str = "./mlruns"
 ) -> None:
-    """"""
+    """Start mlflow server with the given host, port, and backend uri."""
     ctx.run(
         f"poetry run mlflow server --host={host} --port={port} --backend-store-uri={backend_uri}"
     )