From 531eef7d950ca7fa2ab52a345e6338ce6dc4e5aa Mon Sep 17 00:00:00 2001 From: Albert Villanova del Moral <8515462+albertvillanova@users.noreply.github.com> Date: Tue, 17 Dec 2024 14:07:15 +0100 Subject: [PATCH] Support custom details path --- src/lighteval/logging/evaluation_tracker.py | 23 ++++++++++++++------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/src/lighteval/logging/evaluation_tracker.py b/src/lighteval/logging/evaluation_tracker.py index ffa901de1..c30adb436 100644 --- a/src/lighteval/logging/evaluation_tracker.py +++ b/src/lighteval/logging/evaluation_tracker.py @@ -186,7 +186,7 @@ def save(self) -> None: self.save_results(date_id=date_id, results_dict=results_dict) if self.should_save_details: - self.save_details(date_id, details_datasets) + self.save_details(date_id=date_id, details_datasets=details_datasets) if self.should_push_to_hub: self.push_to_hub( @@ -214,14 +214,21 @@ def save_results( with self.fs.open(output_path, "w") as f: f.write(json.dumps(results_dict, cls=EnhancedJSONEncoder, indent=2, ensure_ascii=False)) - def save_details(self, date_id: str, details_datasets: dict[str, Dataset]): - output_dir_details = Path(self.output_dir) / "details" / self.general_config_logger.model_name - output_dir_details_sub_folder = output_dir_details / date_id - self.fs.mkdirs(output_dir_details_sub_folder, exist_ok=True) - logger.info(f"Saving details to {output_dir_details_sub_folder}") + def save_details(self, date_id: str, details_datasets: dict[str, Dataset], output_path: str | None = None): + if output_path: + output_path = Path(self.output_dir) / output_path + else: + output_path = ( + Path(self.output_dir) + / "details" + / self.general_config_logger.model_name + / date_id + / f"details_{{task_name}}_{date_id}.parquet" + ) + self.fs.mkdirs(output_path.parent, exist_ok=True) + logger.info(f"Saving details to {output_path}") for task_name, dataset in details_datasets.items(): - output_file_details = output_dir_details_sub_folder / f"details_{task_name}_{date_id}.parquet" - with self.fs.open(str(output_file_details), "wb") as f: + with self.fs.open(output_path.as_posix().format(task_name=task_name), "wb") as f: dataset.to_parquet(f) def generate_final_dict(self) -> dict: