From ebca53035da1ac790a850a58fe21176c741060eb Mon Sep 17 00:00:00 2001
From: Partho Sarthi <psarthi@nvidia.com>
Date: Thu, 2 Nov 2023 09:29:29 -0700
Subject: [PATCH] Support extra arguments in new user tools CLI (#646)

* Support extra arguments in new user tools CLI

Signed-off-by: Partho Sarthi <psarthi@nvidia.com>

* Update tests

Signed-off-by: Partho Sarthi <psarthi@nvidia.com>

---------

Signed-off-by: Partho Sarthi <psarthi@nvidia.com>
---
 .../src/spark_rapids_tools/cmdli/tools_cli.py | 22 +++++++++++++++----
 .../src/spark_rapids_tools/utils/util.py      |  2 +-
 .../test_tool_argprocessor.py                 |  4 ++--
 3 files changed, 21 insertions(+), 7 deletions(-)

diff --git a/user_tools/src/spark_rapids_tools/cmdli/tools_cli.py b/user_tools/src/spark_rapids_tools/cmdli/tools_cli.py
index a8ed0bffb..5499bc961 100644
--- a/user_tools/src/spark_rapids_tools/cmdli/tools_cli.py
+++ b/user_tools/src/spark_rapids_tools/cmdli/tools_cli.py
@@ -48,7 +48,8 @@ def qualification(self,
                       global_discount: int = None,
                       gpu_cluster_recommendation: str = QualGpuClusterReshapeType.tostring(
                           QualGpuClusterReshapeType.get_default()),
-                      verbose: bool = False):
+                      verbose: bool = False,
+                      **rapids_options):
         """The Qualification cmd provides estimated running costs and speedups by migrating Apache
         Spark applications to GPU accelerated clusters.
 
@@ -98,6 +99,11 @@ def qualification(self,
                 "CLUSTER": recommend optimal GPU cluster by cost for entire cluster;
                 "JOB": recommend optimal GPU cluster by cost per job
         :param verbose: True or False to enable verbosity of the script.
+        :param rapids_options: A list of valid Qualification tool options.
+                Note that the wrapper ignores ["output-directory", "platform"] flags, and it does not support
+                multiple "spark-property" arguments.
+                For more details on Qualification tool options, please visit
+                https://nvidia.github.io/spark-rapids/docs/spark-qualification-tool.html#qualification-tool-options
         """
         if verbose:
             ToolLogging.enable_debug_mode()
@@ -118,7 +124,8 @@ def qualification(self,
         if qual_args:
             tool_obj = QualificationAsLocal(platform_type=qual_args['runtimePlatform'],
                                             output_folder=qual_args['outputFolder'],
-                                            wrapper_options=qual_args)
+                                            wrapper_options=qual_args,
+                                            rapids_options=rapids_options)
             tool_obj.launch()
 
     def profiling(self,
@@ -126,7 +133,8 @@ def profiling(self,
                   cluster: str = None,
                   platform: str = None,
                   output_folder: str = None,
-                  verbose: bool = False):
+                  verbose: bool = False,
+                  **rapids_options):
         """The Profiling cmd provides information which can be used for debugging and profiling
         Apache Spark applications running on accelerated GPU cluster.
 
@@ -145,6 +153,11 @@ def profiling(self,
                 and "databricks-azure".
         :param output_folder: path to store the output.
         :param verbose: True or False to enable verbosity of the script.
+        :param rapids_options: A list of valid Profiling tool options.
+                Note that the wrapper ignores ["output-directory", "worker-info"] flags, and it does not support
+                multiple "spark-property" arguments.
+                For more details on Profiling tool options, please visit
+                https://nvidia.github.io/spark-rapids/docs/spark-profiling-tool.html#profiling-tool-options
         """
         if verbose:
             ToolLogging.enable_debug_mode()
@@ -157,7 +170,8 @@ def profiling(self,
         if prof_args:
             tool_obj = ProfilingAsLocal(platform_type=prof_args['runtimePlatform'],
                                         output_folder=prof_args['outputFolder'],
-                                        wrapper_options=prof_args)
+                                        wrapper_options=prof_args,
+                                        rapids_options=rapids_options)
             tool_obj.launch()
 
     def bootstrap(self,
diff --git a/user_tools/src/spark_rapids_tools/utils/util.py b/user_tools/src/spark_rapids_tools/utils/util.py
index 71497eddc..e8ed7e05d 100644
--- a/user_tools/src/spark_rapids_tools/utils/util.py
+++ b/user_tools/src/spark_rapids_tools/utils/util.py
@@ -92,7 +92,7 @@ def dump_tool_usage(tool_name: Optional[str], raise_sys_exit: Optional[bool] = T
     imported_module = __import__('spark_rapids_tools.cmdli', globals(), locals(), ['ToolsCLI'])
     wrapper_clzz = getattr(imported_module, 'ToolsCLI')
     help_name = 'ascli'
-    usage_cmd = f'{tool_name} --help'
+    usage_cmd = f'{tool_name} -- --help'
     try:
         fire.Fire(wrapper_clzz(), name=help_name, command=usage_cmd)
     except fire.core.FireExit:
diff --git a/user_tools/tests/spark_rapids_tools_ut/test_tool_argprocessor.py b/user_tools/tests/spark_rapids_tools_ut/test_tool_argprocessor.py
index f1c719421..49d8f9cec 100644
--- a/user_tools/tests/spark_rapids_tools_ut/test_tool_argprocessor.py
+++ b/user_tools/tests/spark_rapids_tools_ut/test_tool_argprocessor.py
@@ -166,8 +166,8 @@ def test_cluster_props_no_eventlogs_on_prem(self, capsys, tool_name):
                                                  platform='onprem')
         assert pytest_wrapped_e.type == SystemExit
         captured = capsys.readouterr()
-        # Verify there is no URL in error message
-        assert 'https://' not in captured.err
+        # Verify there is no URL in error message except for the one from the documentation
+        assert 'https://' not in captured.err or 'nvidia.github.io' in captured.err
 
     @pytest.mark.skip(reason='Unit tests are not completed yet')
     def test_arg_cases_coverage(self):