From c45e3c8b96b0f5e99eab4d1c152198792be24251 Mon Sep 17 00:00:00 2001 From: Aoi <82735346+ryan-minato@users.noreply.github.com> Date: Thu, 19 Dec 2024 21:31:26 +0900 Subject: [PATCH 1/3] fix: CACHE_DIR Default Value in Accelerate Pipeline (#461) --- src/lighteval/main_accelerate.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/lighteval/main_accelerate.py b/src/lighteval/main_accelerate.py index a0a01abe8..2dd78f445 100644 --- a/src/lighteval/main_accelerate.py +++ b/src/lighteval/main_accelerate.py @@ -31,7 +31,7 @@ logger = logging.getLogger(__name__) TOKEN = os.getenv("HF_TOKEN") -CACHE_DIR: str = os.getenv("HF_HOME", "/scratch") +CACHE_DIR: str = os.getenv("HF_HOME") HELP_PANEL_NAME_1 = "Common Parameters" HELP_PANEL_NAME_2 = "Logging Parameters" From 988fa94db7cdd539f2a7a4971ed67890b0da4184 Mon Sep 17 00:00:00 2001 From: Albert Villanova del Moral <8515462+albertvillanova@users.noreply.github.com> Date: Thu, 19 Dec 2024 14:55:05 +0100 Subject: [PATCH 2/3] Add EvaluationTracker to docs and fix its docstring (#464) * Fix definition of public in docstring * Fix push_to_tensorboard param name in docstring * Fix docstring style * Add EvaluationTracker to docs * Fix docstring style * Move docstring to class header * Add attributes to docstring * Fix style * Fix style * Fix style * Fix style * Fix style * Fix style * Fix internal links in docstring --- docs/source/package_reference/logging.mdx | 5 +- src/lighteval/logging/evaluation_tracker.py | 51 +++++++++++---------- 2 files changed, 31 insertions(+), 25 deletions(-) diff --git a/docs/source/package_reference/logging.mdx b/docs/source/package_reference/logging.mdx index 9fd01154e..9102755c1 100644 --- a/docs/source/package_reference/logging.mdx +++ b/docs/source/package_reference/logging.mdx @@ -1,4 +1,7 @@ -# Loggers +# Logging + +## EvaluationTracker +[[autodoc]] logging.evaluation_tracker.EvaluationTracker ## GeneralConfigLogger [[autodoc]] logging.info_loggers.GeneralConfigLogger diff --git a/src/lighteval/logging/evaluation_tracker.py b/src/lighteval/logging/evaluation_tracker.py index 017055348..8cc8c09e9 100644 --- a/src/lighteval/logging/evaluation_tracker.py +++ b/src/lighteval/logging/evaluation_tracker.py @@ -82,16 +82,35 @@ def default(self, o): class EvaluationTracker: - """ - Keeps track of the overall evaluation process and relevant informations. + """Keeps track of the overall evaluation process and relevant information. - The [`EvaluationTracker`] contains specific loggers for experiments details - ([`DetailsLogger`]), metrics ([`MetricsLogger`]), task versions - ([`VersionsLogger`]) as well as for the general configurations of both the - specific task ([`TaskConfigLogger`]) and overall evaluation run - ([`GeneralConfigLogger`]). It compiles the data from these loggers and + The [`~logging.evaluation_tracker.EvaluationTracker`] contains specific loggers for experiments details + ([`~logging.evaluation_tracker.DetailsLogger`]), metrics ([`~logging.evaluation_tracker.MetricsLogger`]), task versions + ([`~logging.evaluation_tracker.VersionsLogger`]) as well as for the general configurations of both the + specific task ([`~logging.evaluation_tracker.TaskConfigLogger`]) and overall evaluation run + ([`~logging.evaluation_tracker.GeneralConfigLogger`]). It compiles the data from these loggers and writes it to files, which can be published to the Hugging Face hub if requested. + + Args: + output_dir (`str`): Local folder path where you want results to be saved. + save_details (`bool`, defaults to True): If True, details are saved to the `output_dir`. + push_to_hub (`bool`, defaults to False): If True, details are pushed to the hub. + Results are pushed to `{hub_results_org}/details__{sanitized model_name}` for the model `model_name`, a public dataset, + if `public` is True else `{hub_results_org}/details__{sanitized model_name}_private`, a private dataset. + push_to_tensorboard (`bool`, defaults to False): If True, will create and push the results for a tensorboard folder on the hub. + hub_results_org (`str`, *optional*): The organisation to push the results to. + See more details about the datasets organisation in [`EvaluationTracker.save`]. + tensorboard_metric_prefix (`str`, defaults to "eval"): Prefix for the metrics in the tensorboard logs. + public (`bool`, defaults to False): If True, results and details are pushed to public orgs. + nanotron_run_info ([`~nanotron.config.GeneralArgs`], *optional*): Reference to information about Nanotron models runs. + + **Attributes**: + - **details_logger** ([`~logging.info_loggers.DetailsLogger`]) -- Logger for experiment details. + - **metrics_logger** ([`~logging.info_loggers.MetricsLogger`]) -- Logger for experiment metrics. + - **versions_logger** ([`~logging.info_loggers.VersionsLogger`]) -- Logger for task versions. + - **general_config_logger** ([`~logging.info_loggers.GeneralConfigLogger`]) -- Logger for general configuration. + - **task_config_logger** ([`~logging.info_loggers.TaskConfigLogger`]) -- Logger for task configuration. """ def __init__( @@ -105,23 +124,7 @@ def __init__( public: bool = False, nanotron_run_info: "GeneralArgs" = None, ) -> None: - """ - Creates all the necessary loggers for evaluation tracking. - - Args: - output_dir (str): Local folder path where you want results to be saved - save_details (bool): If True, details are saved to the output_dir - push_to_hub (bool): If True, details are pushed to the hub. - Results are pushed to `{hub_results_org}/details__{sanitized model_name}` for the model `model_name`, a public dataset, - if `public` is True else `{hub_results_org}/details__{sanitized model_name}_private`, a private dataset. - push_results_to_tensorboard (bool): If True, will create and push the results for a tensorboard folder on the hub - hub_results_org (str): The organisation to push the results to. See - more details about the datasets organisation in - [`EvaluationTracker.save`] - tensorboard_metric_prefix (str): Prefix for the metrics in the tensorboard logs - public (bool): If True, results and details are pushed in private orgs - nanotron_run_info (GeneralArgs): Reference to informations about Nanotron models runs - """ + """Creates all the necessary loggers for evaluation tracking.""" self.details_logger = DetailsLogger() self.metrics_logger = MetricsLogger() self.versions_logger = VersionsLogger() From a1c610daab3f796a273ab56f78b5f5fe9614b8aa Mon Sep 17 00:00:00 2001 From: Albert Villanova del Moral <8515462+albertvillanova@users.noreply.github.com> Date: Thu, 19 Dec 2024 16:03:44 +0100 Subject: [PATCH 3/3] Remove unnecessary deepcopy in evaluation_tracker (#459) * Remove unnecessary deepcopy in evaluation_tracker * Fix style --- src/lighteval/logging/evaluation_tracker.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/lighteval/logging/evaluation_tracker.py b/src/lighteval/logging/evaluation_tracker.py index 8cc8c09e9..6cad9189f 100644 --- a/src/lighteval/logging/evaluation_tracker.py +++ b/src/lighteval/logging/evaluation_tracker.py @@ -20,7 +20,6 @@ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. -import copy import json import logging import os @@ -156,8 +155,7 @@ def save(self) -> None: date_id = datetime.now().isoformat().replace(":", "-") # We first prepare data to save - config_general = copy.deepcopy(self.general_config_logger) - config_general = asdict(config_general) + config_general = asdict(self.general_config_logger) # We remove the config from logging, which contains context/accelerator objects config_general.pop("config")