From 427dfc0f4fd6167671b1ff8a1da0f6016c4d6019 Mon Sep 17 00:00:00 2001 From: awaelchli Date: Mon, 12 Jun 2023 18:48:48 +0200 Subject: [PATCH 01/10] trigger creation of lazy experiment --- src/lightning/pytorch/trainer/call.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/lightning/pytorch/trainer/call.py b/src/lightning/pytorch/trainer/call.py index 8e5cf8dac976b..a3ab6ad716bdb 100644 --- a/src/lightning/pytorch/trainer/call.py +++ b/src/lightning/pytorch/trainer/call.py @@ -73,6 +73,9 @@ def _call_setup_hook(trainer: "pl.Trainer") -> None: assert trainer.state.fn is not None fn = trainer.state.fn + # Trigger lazy creation of experiment in loggers + _ = trainer.loggers[0].experiment + trainer.strategy.barrier("pre_setup") if trainer.datamodule is not None: From b9e1f2dd1789c5ff0965013d41c555c01affca9e Mon Sep 17 00:00:00 2001 From: awaelchli Date: Mon, 12 Jun 2023 19:12:03 +0200 Subject: [PATCH 02/10] fix --- src/lightning/pytorch/trainer/call.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/lightning/pytorch/trainer/call.py b/src/lightning/pytorch/trainer/call.py index a3ab6ad716bdb..043b6135838eb 100644 --- a/src/lightning/pytorch/trainer/call.py +++ b/src/lightning/pytorch/trainer/call.py @@ -73,8 +73,9 @@ def _call_setup_hook(trainer: "pl.Trainer") -> None: assert trainer.state.fn is not None fn = trainer.state.fn - # Trigger lazy creation of experiment in loggers - _ = trainer.loggers[0].experiment + # Trigger lazy creation of experiment in loggers so loggers have their metadata available + for logger in trainer.loggers: + _ = logger.experiment trainer.strategy.barrier("pre_setup") From 14066503bfce1c21b21f7919a9b417c762dc7ffb Mon Sep 17 00:00:00 2001 From: awaelchli Date: Mon, 12 Jun 2023 19:14:08 +0200 Subject: [PATCH 03/10] changelog --- src/lightning/pytorch/CHANGELOG.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/lightning/pytorch/CHANGELOG.md b/src/lightning/pytorch/CHANGELOG.md index da1c9e26b41f8..3e56804266391 100644 --- a/src/lightning/pytorch/CHANGELOG.md +++ b/src/lightning/pytorch/CHANGELOG.md @@ -125,6 +125,9 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). - Fixed deriving default map location in `LightningModule.load_from_checkpoint` when there is extra state ([#17812](https://github.com/Lightning-AI/lightning/pull/17812)) +- Fixed delayed creation of experiment metadata and checkpoint/log dir name when using `WandbLogger` ([#17818](https://github.com/Lightning-AI/lightning/pull/17818)) + + ## [2.0.3] - 2023-06-07 ### Changed From 54a9ab18f7403ddb0dcb5b462ed5d6cf90cd91f6 Mon Sep 17 00:00:00 2001 From: awaelchli Date: Mon, 12 Jun 2023 19:27:23 +0200 Subject: [PATCH 04/10] include wandb --- tests/tests_pytorch/loggers/test_all.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tests/tests_pytorch/loggers/test_all.py b/tests/tests_pytorch/loggers/test_all.py index 2afb9408a806d..93c91456f3ccf 100644 --- a/tests/tests_pytorch/loggers/test_all.py +++ b/tests/tests_pytorch/loggers/test_all.py @@ -237,7 +237,7 @@ def on_train_batch_start(self, trainer, pl_module, batch, batch_idx): assert pl_module.logger.experiment.something(foo="bar") is None -@pytest.mark.parametrize("logger_class", ALL_LOGGER_CLASSES_WO_NEPTUNE_WANDB) +@pytest.mark.parametrize("logger_class", ALL_LOGGER_CLASSES) @RunIf(skip_windows=True) def test_logger_created_on_rank_zero_only(tmpdir, monkeypatch, logger_class): """Test that loggers get replaced by dummy loggers on global rank > 0.""" @@ -262,7 +262,6 @@ def _test_logger_created_on_rank_zero_only(tmpdir, logger_class): callbacks=[RankZeroLoggerCheck()], ) trainer.fit(model) - assert trainer.state.finished, f"Training failed with {trainer.state}" def test_logger_with_prefix_all(tmpdir, monkeypatch): From 856a0d4d9b6af9af08d67b722b7204f659e3a852 Mon Sep 17 00:00:00 2001 From: awaelchli Date: Mon, 12 Jun 2023 19:28:48 +0200 Subject: [PATCH 05/10] update --- tests/tests_pytorch/loggers/test_all.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/tests_pytorch/loggers/test_all.py b/tests/tests_pytorch/loggers/test_all.py index 93c91456f3ccf..a869fafc4a865 100644 --- a/tests/tests_pytorch/loggers/test_all.py +++ b/tests/tests_pytorch/loggers/test_all.py @@ -61,7 +61,6 @@ WandbLogger, ) ALL_LOGGER_CLASSES_WO_NEPTUNE = tuple(filter(lambda cls: cls is not NeptuneLogger, ALL_LOGGER_CLASSES)) -ALL_LOGGER_CLASSES_WO_NEPTUNE_WANDB = tuple(filter(lambda cls: cls is not WandbLogger, ALL_LOGGER_CLASSES_WO_NEPTUNE)) def _get_logger_args(logger_class, save_dir): From 4896f1f68094746f96cb9904f438f947bbd14722 Mon Sep 17 00:00:00 2001 From: awaelchli Date: Mon, 12 Jun 2023 19:56:18 +0200 Subject: [PATCH 06/10] add test --- tests/tests_pytorch/loggers/test_all.py | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/tests/tests_pytorch/loggers/test_all.py b/tests/tests_pytorch/loggers/test_all.py index a869fafc4a865..28f213d0e653a 100644 --- a/tests/tests_pytorch/loggers/test_all.py +++ b/tests/tests_pytorch/loggers/test_all.py @@ -223,10 +223,17 @@ def __init__(self, lr=0.1, batch_size=1): assert logger2 == logger3, "Finder altered the logger of model" -class RankZeroLoggerCheck(Callback): - # this class has to be defined outside the test function, otherwise we get pickle error - # due to the way ddp process is launched +class LazyInitExperimentCheck(Callback): + def setup(self, trainer, pl_module, stage=None): + if trainer.global_rank != 0: + return + if isinstance(trainer.logger, MLFlowLogger): + assert trainer.logger._mlflow_client is not None + else: + assert trainer.logger._experiment is not None + +class RankZeroLoggerCheck(Callback): def on_train_batch_start(self, trainer, pl_module, batch, batch_idx): is_dummy = isinstance(trainer.logger.experiment, DummyExperiment) if trainer.is_global_zero: @@ -238,8 +245,9 @@ def on_train_batch_start(self, trainer, pl_module, batch, batch_idx): @pytest.mark.parametrize("logger_class", ALL_LOGGER_CLASSES) @RunIf(skip_windows=True) -def test_logger_created_on_rank_zero_only(tmpdir, monkeypatch, logger_class): - """Test that loggers get replaced by dummy loggers on global rank > 0.""" +def test_logger_initialization(tmpdir, monkeypatch, logger_class): + """Test that loggers get replaced by dummy loggers on global rank > 0 and that the experiment object is available + at the right time in Trainer.""" _patch_comet_atexit(monkeypatch) try: _test_logger_created_on_rank_zero_only(tmpdir, logger_class) @@ -258,7 +266,7 @@ def _test_logger_created_on_rank_zero_only(tmpdir, logger_class): accelerator="cpu", devices=2, max_steps=1, - callbacks=[RankZeroLoggerCheck()], + callbacks=[RankZeroLoggerCheck(), LazyInitExperimentCheck()], ) trainer.fit(model) From 432bec2a6455fcd8d34b40067271b95319f22b3c Mon Sep 17 00:00:00 2001 From: awaelchli Date: Mon, 12 Jun 2023 19:57:51 +0200 Subject: [PATCH 07/10] update test --- tests/tests_pytorch/loggers/test_all.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/tests/tests_pytorch/loggers/test_all.py b/tests/tests_pytorch/loggers/test_all.py index 28f213d0e653a..4b41f0889a47f 100644 --- a/tests/tests_pytorch/loggers/test_all.py +++ b/tests/tests_pytorch/loggers/test_all.py @@ -225,12 +225,14 @@ def __init__(self, lr=0.1, batch_size=1): class LazyInitExperimentCheck(Callback): def setup(self, trainer, pl_module, stage=None): - if trainer.global_rank != 0: + if trainer.global_rank > 0: return if isinstance(trainer.logger, MLFlowLogger): - assert trainer.logger._mlflow_client is not None + assert trainer.logger._mlflow_client + elif isinstance(trainer.logger, NeptuneLogger): + assert trainer.logger._run_instance else: - assert trainer.logger._experiment is not None + assert trainer.logger._experiment class RankZeroLoggerCheck(Callback): From 5d994273e1d17c230251be1a9f6532adbb3a4493 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 12 Jun 2023 18:04:02 +0000 Subject: [PATCH 08/10] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- tests/tests_pytorch/loggers/test_all.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/tests_pytorch/loggers/test_all.py b/tests/tests_pytorch/loggers/test_all.py index 4b41f0889a47f..85865a5d2dfb3 100644 --- a/tests/tests_pytorch/loggers/test_all.py +++ b/tests/tests_pytorch/loggers/test_all.py @@ -248,8 +248,8 @@ def on_train_batch_start(self, trainer, pl_module, batch, batch_idx): @pytest.mark.parametrize("logger_class", ALL_LOGGER_CLASSES) @RunIf(skip_windows=True) def test_logger_initialization(tmpdir, monkeypatch, logger_class): - """Test that loggers get replaced by dummy loggers on global rank > 0 and that the experiment object is available - at the right time in Trainer.""" + """Test that loggers get replaced by dummy loggers on global rank > 0 and that the experiment object is + available at the right time in Trainer.""" _patch_comet_atexit(monkeypatch) try: _test_logger_created_on_rank_zero_only(tmpdir, logger_class) From f7902be61cd4e8f6630d9e8be281c940f820de21 Mon Sep 17 00:00:00 2001 From: awaelchli Date: Tue, 13 Jun 2023 11:54:36 +0200 Subject: [PATCH 09/10] ci --- tests/tests_pytorch/loggers/test_all.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/tests_pytorch/loggers/test_all.py b/tests/tests_pytorch/loggers/test_all.py index 85865a5d2dfb3..1cd197ba74589 100644 --- a/tests/tests_pytorch/loggers/test_all.py +++ b/tests/tests_pytorch/loggers/test_all.py @@ -245,19 +245,19 @@ def on_train_batch_start(self, trainer, pl_module, batch, batch_idx): assert pl_module.logger.experiment.something(foo="bar") is None -@pytest.mark.parametrize("logger_class", ALL_LOGGER_CLASSES) +@pytest.mark.parametrize("logger_class", ALL_LOGGER_CLASSES_WO_NEPTUNE) @RunIf(skip_windows=True) def test_logger_initialization(tmpdir, monkeypatch, logger_class): """Test that loggers get replaced by dummy loggers on global rank > 0 and that the experiment object is available at the right time in Trainer.""" _patch_comet_atexit(monkeypatch) try: - _test_logger_created_on_rank_zero_only(tmpdir, logger_class) + _test_logger_initialization(tmpdir, logger_class) except (ImportError, ModuleNotFoundError): pytest.xfail(f"multi-process test requires {logger_class.__class__} dependencies to be installed.") -def _test_logger_created_on_rank_zero_only(tmpdir, logger_class): +def _test_logger_initialization(tmpdir, logger_class): logger_args = _get_logger_args(logger_class, tmpdir) logger = logger_class(**logger_args) model = BoringModel() From 7d6d687e9ea170b5fda039a8089a5c4e0dc596de Mon Sep 17 00:00:00 2001 From: awaelchli Date: Sun, 25 Jun 2023 16:43:40 -0700 Subject: [PATCH 10/10] fix neptune test --- tests/tests_pytorch/loggers/test_all.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tests/tests_pytorch/loggers/test_all.py b/tests/tests_pytorch/loggers/test_all.py index 1cd197ba74589..6030b309764b7 100644 --- a/tests/tests_pytorch/loggers/test_all.py +++ b/tests/tests_pytorch/loggers/test_all.py @@ -125,6 +125,11 @@ def log_metrics(self, metrics, step): logger.experiment.id = "foo" logger.experiment.project_name = "bar" + if logger_class == NeptuneLogger: + logger._retrieve_run_data = Mock() + logger._run_short_id = "foo" + logger._run_name = "bar" + if logger_class == MLFlowLogger: logger = mock_mlflow_run_creation(logger, experiment_id="foo", run_id="bar")