From 84ab65b5a24c92e07b4dab1f7f315cd7cb3f2541 Mon Sep 17 00:00:00 2001
From: "Jian Zhang (James)" <6593865@qq.com>
Date: Thu, 11 Jan 2024 08:39:05 -0800
Subject: [PATCH] [Enhancement] change the input argument of GSTaskTrackerAbc
 to be an integer (#699)

*Issue #, if available:*

*Description of changes:*
- This PR changes the input argument of `GSTaskTrackerAbc` from
`GSConfig` object into an integer because the `GSTaskTrackerAbc` only
needs an integer to set the `log_report_frequency` attribute.
- Using the `GSConfig` object will prevent users from using task tracker
to monitor running process because creating a GSConfig is NOT publicly
open, and is very complex.
- Decouple the `GSTaskTracker` from using `GSConfig` could help users to
construct task trackers and use them in the GraphStorm programming APIs.

By submitting this pull request, I confirm that you can use, modify,
copy, and redistribute this contribution, under the terms of your
choice.

---------

Co-authored-by: Ubuntu <ubuntu@ip-172-31-35-48.us-west-2.compute.internal>
Co-authored-by: xiang song(charlie.song) <classicxsong@gmail.com>
---
 docs/source/advanced/own-models.rst              |  2 +-
 docs/source/configuration/configuration-run.rst  |  7 +------
 examples/customized_models/HGT/hgt_nc.py         |  2 +-
 examples/peft_llm_gnn/main_nc.py                 |  2 +-
 examples/peft_llm_gnn/nc_config_Video_Games.yaml |  3 ---
 python/graphstorm/gsf.py                         |  2 +-
 python/graphstorm/tracker/graphstorm_tracker.py  | 10 ++++++----
 python/graphstorm/tracker/sagemaker_tracker.py   |  7 +++++--
 8 files changed, 16 insertions(+), 19 deletions(-)

diff --git a/docs/source/advanced/own-models.rst b/docs/source/advanced/own-models.rst
index 3b9d9eb8c5..bde3b66d46 100644
--- a/docs/source/advanced/own-models.rst
+++ b/docs/source/advanced/own-models.rst
@@ -272,7 +272,7 @@ The GraphStorm trainers can have evaluators and task trackers associated. The fo
                                   config.early_stop_strategy)
     trainer.setup_evaluator(evaluator)
     # Optional: set up a task tracker to show the progress of training.
-    tracker = GSSageMakerTaskTracker(config)
+    tracker = GSSageMakerTaskTracker(config.eval_frequency)
     trainer.setup_task_tracker(tracker)
 
 GraphStorm's `evaluators <https://github.com/awslabs/graphstorm/blob/main/python/graphstorm/eval/evaluator.py>`_ could help to compute the required evaluation metrics, such as ``accuracy``, ``f1``, ``mrr``, and etc. Users can select the proper evaluator and use the trainer's ``setup_evaluator()`` method to attach them. GraphStorm's `task trackers <https://github.com/awslabs/graphstorm/blob/main/python/graphstorm/tracker/graphstorm_tracker.py>`_ serve as log collectors, which are used to show the process information.
diff --git a/docs/source/configuration/configuration-run.rst b/docs/source/configuration/configuration-run.rst
index f0c1afa854..276e97d08d 100644
--- a/docs/source/configuration/configuration-run.rst
+++ b/docs/source/configuration/configuration-run.rst
@@ -126,11 +126,6 @@ GraphStorm provides a set of parameters to control how and where to save and res
     - Yaml: ``task_tracker: sagemaker_task_tracker``
     - Argument: ``--task_tracker sagemaker_task_tracker``
     - Default value: ``sagemaker_task_tracker``
-- **log_report_frequency**: The frequency of reporting model performance metrics through task_tracker. The frequency is defined by using number of iterations, i.e., every N iterations the evaluation metrics will be reported. (Please note the evaluation metrics should be generated at the reporting iteration. See "eval_frequency" for how evaluation frequency is controlled.)
-
-    - Yaml: ``log_report_frequency: 1000``
-    - Argument: ``--log-report-frequency 1000``
-    - Default value: ``1000``
 - **restore_model_path**: A path where GraphStorm model parameters were saved. For training, if restore_model_path is set, GraphStom will retrieve the model parameters from restore_model_path instead of initializing the parameters. For inference, restore_model_path must be provided.
 
     - Yaml: ``restore_model_path: /model/checkpoint/``
@@ -278,7 +273,7 @@ GraphStorm provides a set of parameters to control model evaluation.
     - Yaml: ``use_mini_batch_infer: false``
     - Argument: ``--use-mini-batch-infer false``
     - Default value: ``true``
-- **eval_frequency**: The frequency of doing evaluation. GraphStorm trainers do evaluation at the end of each epoch. However, for large-scale graphs, training one epoch may take hundreds of thousands of iterations. One may want to do evaluations in the middle of an epoch. When eval_frequency is set, every **eval_frequency** iterations, the trainer will do evaluation once. The evaluation results can be printed and reported. See **log_report_frequency** for more details.
+- **eval_frequency**: The frequency of doing evaluation. GraphStorm trainers do evaluation at the end of each epoch. However, for large-scale graphs, training one epoch may take hundreds of thousands of iterations. One may want to do evaluations in the middle of an epoch. When eval_frequency is set, every **eval_frequency** iterations, the trainer will do evaluation once. The evaluation results can be printed and reported.
 
     - Yaml: ``eval_frequency: 10000``
     - Argument: ``--eval-frequency 10000``
diff --git a/examples/customized_models/HGT/hgt_nc.py b/examples/customized_models/HGT/hgt_nc.py
index 6da88e6870..debb185858 100644
--- a/examples/customized_models/HGT/hgt_nc.py
+++ b/examples/customized_models/HGT/hgt_nc.py
@@ -335,7 +335,7 @@ def main(args):
                                   config.early_stop_strategy)
     trainer.setup_evaluator(evaluator)
     # Optional: set up a task tracker to show the progress of training.
-    tracker = GSSageMakerTaskTracker(config)
+    tracker = GSSageMakerTaskTracker(config.eval_frequency)
     trainer.setup_task_tracker(tracker)
 
     # Start the training process.
diff --git a/examples/peft_llm_gnn/main_nc.py b/examples/peft_llm_gnn/main_nc.py
index ae45ffeb1f..6fcd7237f9 100644
--- a/examples/peft_llm_gnn/main_nc.py
+++ b/examples/peft_llm_gnn/main_nc.py
@@ -62,7 +62,7 @@ def main(config_args):
         config.early_stop_strategy,
     )
     trainer.setup_evaluator(evaluator)
-    tracker = GSSageMakerTaskTracker(config)
+    tracker = GSSageMakerTaskTracker(config.eval_frequency)
     trainer.setup_task_tracker(tracker)
 
     # create train loader
diff --git a/examples/peft_llm_gnn/nc_config_Video_Games.yaml b/examples/peft_llm_gnn/nc_config_Video_Games.yaml
index 6a2d0129dd..626553d8c9 100644
--- a/examples/peft_llm_gnn/nc_config_Video_Games.yaml
+++ b/examples/peft_llm_gnn/nc_config_Video_Games.yaml
@@ -19,11 +19,8 @@ gsf:
     batch_size: 4
     dropout: 0.1
     eval_batch_size: 4
-    # eval_frequency: 100
-    #log_report_frequency: 50
     lr: 0.0001
     num_epochs: 10
-    # save_model_frequency: 300
     wd_l2norm: 1.0e-06
   input:
     restore_model_path: null
diff --git a/python/graphstorm/gsf.py b/python/graphstorm/gsf.py
index 124ed576a6..97dabd1164 100644
--- a/python/graphstorm/gsf.py
+++ b/python/graphstorm/gsf.py
@@ -656,4 +656,4 @@ def check_homo(g):
 
 def create_builtin_task_tracker(config):
     tracker_class = get_task_tracker_class(config.task_tracker)
-    return tracker_class(config)
+    return tracker_class(config.eval_frequency)
diff --git a/python/graphstorm/tracker/graphstorm_tracker.py b/python/graphstorm/tracker/graphstorm_tracker.py
index ece0955e84..a9e0c6055c 100644
--- a/python/graphstorm/tracker/graphstorm_tracker.py
+++ b/python/graphstorm/tracker/graphstorm_tracker.py
@@ -22,11 +22,13 @@ class GSTaskTrackerAbc():
 
         Parameters
         ----------
-        config: GSConfig
-            Configurations. Users can add their own configures in the yaml config file.
+        log_report_frequency: int
+            The frequency of reporting model performance metrics through task_tracker. 
+            The frequency is defined by using number of iterations, i.e., every N iterations
+            the evaluation metrics will be reported.
     """
-    def __init__(self, config):
-        self._report_frequency = config.log_report_frequency # Can be None if not provided
+    def __init__(self, log_report_frequency):
+        self._report_frequency = log_report_frequency # Can be None if not provided
 
     @abc.abstractmethod
     def log_metric(self, metric_name, metric_value, step, force_report=False):
diff --git a/python/graphstorm/tracker/sagemaker_tracker.py b/python/graphstorm/tracker/sagemaker_tracker.py
index 06276943bd..3ae5fd5a8a 100644
--- a/python/graphstorm/tracker/sagemaker_tracker.py
+++ b/python/graphstorm/tracker/sagemaker_tracker.py
@@ -25,8 +25,11 @@ class GSSageMakerTaskTracker(GSTaskTrackerAbc):
 
         Parameters
         ----------
-        config: GSConfig
-            Configurations. Users can add their own configures in the yaml config file.
+        log_report_frequency: int
+            The frequency of reporting model performance metrics through task_tracker. 
+            The frequency is defined by using number of iterations, i.e., every N iterations
+            the evaluation metrics will be reported.
+
     """
 
     def _do_report(self, step):