From ebca53035da1ac790a850a58fe21176c741060eb Mon Sep 17 00:00:00 2001 From: Partho Sarthi Date: Thu, 2 Nov 2023 09:29:29 -0700 Subject: [PATCH] Support extra arguments in new user tools CLI (#646) * Support extra arguments in new user tools CLI Signed-off-by: Partho Sarthi * Update tests Signed-off-by: Partho Sarthi --------- Signed-off-by: Partho Sarthi --- .../src/spark_rapids_tools/cmdli/tools_cli.py | 22 +++++++++++++++---- .../src/spark_rapids_tools/utils/util.py | 2 +- .../test_tool_argprocessor.py | 4 ++-- 3 files changed, 21 insertions(+), 7 deletions(-) diff --git a/user_tools/src/spark_rapids_tools/cmdli/tools_cli.py b/user_tools/src/spark_rapids_tools/cmdli/tools_cli.py index a8ed0bffb..5499bc961 100644 --- a/user_tools/src/spark_rapids_tools/cmdli/tools_cli.py +++ b/user_tools/src/spark_rapids_tools/cmdli/tools_cli.py @@ -48,7 +48,8 @@ def qualification(self, global_discount: int = None, gpu_cluster_recommendation: str = QualGpuClusterReshapeType.tostring( QualGpuClusterReshapeType.get_default()), - verbose: bool = False): + verbose: bool = False, + **rapids_options): """The Qualification cmd provides estimated running costs and speedups by migrating Apache Spark applications to GPU accelerated clusters. @@ -98,6 +99,11 @@ def qualification(self, "CLUSTER": recommend optimal GPU cluster by cost for entire cluster; "JOB": recommend optimal GPU cluster by cost per job :param verbose: True or False to enable verbosity of the script. + :param rapids_options: A list of valid Qualification tool options. + Note that the wrapper ignores ["output-directory", "platform"] flags, and it does not support + multiple "spark-property" arguments. + For more details on Qualification tool options, please visit + https://nvidia.github.io/spark-rapids/docs/spark-qualification-tool.html#qualification-tool-options """ if verbose: ToolLogging.enable_debug_mode() @@ -118,7 +124,8 @@ def qualification(self, if qual_args: tool_obj = QualificationAsLocal(platform_type=qual_args['runtimePlatform'], output_folder=qual_args['outputFolder'], - wrapper_options=qual_args) + wrapper_options=qual_args, + rapids_options=rapids_options) tool_obj.launch() def profiling(self, @@ -126,7 +133,8 @@ def profiling(self, cluster: str = None, platform: str = None, output_folder: str = None, - verbose: bool = False): + verbose: bool = False, + **rapids_options): """The Profiling cmd provides information which can be used for debugging and profiling Apache Spark applications running on accelerated GPU cluster. @@ -145,6 +153,11 @@ def profiling(self, and "databricks-azure". :param output_folder: path to store the output. :param verbose: True or False to enable verbosity of the script. + :param rapids_options: A list of valid Profiling tool options. + Note that the wrapper ignores ["output-directory", "worker-info"] flags, and it does not support + multiple "spark-property" arguments. + For more details on Profiling tool options, please visit + https://nvidia.github.io/spark-rapids/docs/spark-profiling-tool.html#profiling-tool-options """ if verbose: ToolLogging.enable_debug_mode() @@ -157,7 +170,8 @@ def profiling(self, if prof_args: tool_obj = ProfilingAsLocal(platform_type=prof_args['runtimePlatform'], output_folder=prof_args['outputFolder'], - wrapper_options=prof_args) + wrapper_options=prof_args, + rapids_options=rapids_options) tool_obj.launch() def bootstrap(self, diff --git a/user_tools/src/spark_rapids_tools/utils/util.py b/user_tools/src/spark_rapids_tools/utils/util.py index 71497eddc..e8ed7e05d 100644 --- a/user_tools/src/spark_rapids_tools/utils/util.py +++ b/user_tools/src/spark_rapids_tools/utils/util.py @@ -92,7 +92,7 @@ def dump_tool_usage(tool_name: Optional[str], raise_sys_exit: Optional[bool] = T imported_module = __import__('spark_rapids_tools.cmdli', globals(), locals(), ['ToolsCLI']) wrapper_clzz = getattr(imported_module, 'ToolsCLI') help_name = 'ascli' - usage_cmd = f'{tool_name} --help' + usage_cmd = f'{tool_name} -- --help' try: fire.Fire(wrapper_clzz(), name=help_name, command=usage_cmd) except fire.core.FireExit: diff --git a/user_tools/tests/spark_rapids_tools_ut/test_tool_argprocessor.py b/user_tools/tests/spark_rapids_tools_ut/test_tool_argprocessor.py index f1c719421..49d8f9cec 100644 --- a/user_tools/tests/spark_rapids_tools_ut/test_tool_argprocessor.py +++ b/user_tools/tests/spark_rapids_tools_ut/test_tool_argprocessor.py @@ -166,8 +166,8 @@ def test_cluster_props_no_eventlogs_on_prem(self, capsys, tool_name): platform='onprem') assert pytest_wrapped_e.type == SystemExit captured = capsys.readouterr() - # Verify there is no URL in error message - assert 'https://' not in captured.err + # Verify there is no URL in error message except for the one from the documentation + assert 'https://' not in captured.err or 'nvidia.github.io' in captured.err @pytest.mark.skip(reason='Unit tests are not completed yet') def test_arg_cases_coverage(self):