Skip to content

Commit

Permalink
Publish YAML configuration to group_logs run (#386)
Browse files Browse the repository at this point in the history
Co-authored-by: Evgeny Pavlov <[email protected]>
  • Loading branch information
vrigal and eu9ene authored Feb 12, 2024
1 parent 90d3279 commit 9ebfd13
Showing 1 changed file with 24 additions and 9 deletions.
33 changes: 24 additions & 9 deletions tracking/translations_parser/cli/experiments.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
from pathlib import Path

import wandb
import yaml

from translations_parser.data import Metric
from translations_parser.parser import TrainingParser
Expand Down Expand Up @@ -67,7 +68,6 @@ def parse_experiment(
project=project,
name=name,
group=group,
config={"logs_file": logs_file},
)
],
)
Expand Down Expand Up @@ -133,30 +133,45 @@ def publish_group_logs(
missing_run_metrics = {
name: metrics for name, metrics in metrics.items() if name not in existing_runs
}

for model_name, model_metrics in missing_run_metrics.items():
logger.info(f"Creating missing run {model_name} with associated metrics")
publisher = WandB(project=project, name=model_name, group=group)
publisher.open(TrainingParser(logs_iter=iter([]), publishers=[]))
publisher.handle_metrics(model_metrics)
publisher.close()

# Start publication of `group_logs` fake run
# Publication of the `group_logs` fake run
config = {}
config_path = Path("/".join([*prefix[:-1], "experiments", project, group, "config.yml"]))
if not config_path.is_file():
logger.warning(f"No configuration file at {config_path}, skipping.")
else:
# Publish the YAML configuration as configuration on the group run
with config_path.open("r") as f:
data = f.read()
try:
config.update(yaml.safe_load(data))
except Exception as e:
logger.error(f"Config could not be read at {config_path}: {e}")

publisher = WandB(
project=project,
group=group,
name="group_logs",
notes=(
"Experiments summary for the group.\n"
"The configuration section contains `config.yaml` values, logs "
"are uploaded as artifacts and all metrics are reported in a table."
),
)
publisher.wandb = wandb.init(
project=project,
group=group,
name="group_logs",
config=config,
)

# Publish all evaluation metrics to a table
if publisher.wandb is None:
return
if metrics:
# Publish all evaluation metrics to a table
table = wandb.Table(
columns=["Group", "Model", "Dataset", "BLEU", "chrF"],
data=[
Expand All @@ -166,12 +181,12 @@ def publish_group_logs(
],
)
publisher.wandb.log({"metrics": table})

# Publish logs directory content as artifacts
if logs_dir.is_dir():
# Publish logs directory content as artifacts
artifact = wandb.Artifact(name=group, type="logs")
artifact.add_dir(local_path=str(logs_dir.resolve()))
publisher.wandb.log_artifact(artifact)

publisher.wandb.finish()


Expand Down

0 comments on commit 9ebfd13

Please sign in to comment.