From 9ebfd13903e28015d8008eb814ccc6ea7d7d735a Mon Sep 17 00:00:00 2001 From: Valentin Rigal Date: Mon, 12 Feb 2024 18:16:36 +0100 Subject: [PATCH] Publish YAML configuration to group_logs run (#386) Co-authored-by: Evgeny Pavlov --- .../translations_parser/cli/experiments.py | 33 ++++++++++++++----- 1 file changed, 24 insertions(+), 9 deletions(-) diff --git a/tracking/translations_parser/cli/experiments.py b/tracking/translations_parser/cli/experiments.py index c3c4fb0dd..ed1f271bc 100644 --- a/tracking/translations_parser/cli/experiments.py +++ b/tracking/translations_parser/cli/experiments.py @@ -14,6 +14,7 @@ from pathlib import Path import wandb +import yaml from translations_parser.data import Metric from translations_parser.parser import TrainingParser @@ -67,7 +68,6 @@ def parse_experiment( project=project, name=name, group=group, - config={"logs_file": logs_file}, ) ], ) @@ -133,7 +133,6 @@ def publish_group_logs( missing_run_metrics = { name: metrics for name, metrics in metrics.items() if name not in existing_runs } - for model_name, model_metrics in missing_run_metrics.items(): logger.info(f"Creating missing run {model_name} with associated metrics") publisher = WandB(project=project, name=model_name, group=group) @@ -141,22 +140,38 @@ def publish_group_logs( publisher.handle_metrics(model_metrics) publisher.close() - # Start publication of `group_logs` fake run + # Publication of the `group_logs` fake run + config = {} + config_path = Path("/".join([*prefix[:-1], "experiments", project, group, "config.yml"])) + if not config_path.is_file(): + logger.warning(f"No configuration file at {config_path}, skipping.") + else: + # Publish the YAML configuration as configuration on the group run + with config_path.open("r") as f: + data = f.read() + try: + config.update(yaml.safe_load(data)) + except Exception as e: + logger.error(f"Config could not be read at {config_path}: {e}") + publisher = WandB( project=project, group=group, name="group_logs", + notes=( + "Experiments summary for the group.\n" + "The configuration section contains `config.yaml` values, logs " + "are uploaded as artifacts and all metrics are reported in a table." + ), ) publisher.wandb = wandb.init( project=project, group=group, name="group_logs", + config=config, ) - - # Publish all evaluation metrics to a table - if publisher.wandb is None: - return if metrics: + # Publish all evaluation metrics to a table table = wandb.Table( columns=["Group", "Model", "Dataset", "BLEU", "chrF"], data=[ @@ -166,12 +181,12 @@ def publish_group_logs( ], ) publisher.wandb.log({"metrics": table}) - - # Publish logs directory content as artifacts if logs_dir.is_dir(): + # Publish logs directory content as artifacts artifact = wandb.Artifact(name=group, type="logs") artifact.add_dir(local_path=str(logs_dir.resolve())) publisher.wandb.log_artifact(artifact) + publisher.wandb.finish()