Skip to content

Commit

Permalink
Fix incorrect processing of short flags for user tools cli (#677)
Browse files Browse the repository at this point in the history
* Add support for shortcut flags in user tools cli

Signed-off-by: Partho Sarthi <[email protected]>

* Add support for shortcut flags in new user tools cli

Signed-off-by: Partho Sarthi <[email protected]>

* Refactor code

Signed-off-by: Partho Sarthi <[email protected]>

* Fix warning message

Signed-off-by: Partho Sarthi <[email protected]>

* Fix default value for verbose in new cli

Signed-off-by: Partho Sarthi <[email protected]>

* Minor fix

Signed-off-by: Partho Sarthi <[email protected]>

---------

Signed-off-by: Partho Sarthi <[email protected]>
  • Loading branch information
parthosa authored Dec 7, 2023
1 parent 6e8f1f5 commit 2fdf994
Show file tree
Hide file tree
Showing 8 changed files with 151 additions and 32 deletions.
21 changes: 21 additions & 0 deletions user_tools/src/spark_rapids_pytools/common/utilities.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@

class Utils:
"""Utility class used to enclose common helpers and utilities."""
warning_issued = False

@classmethod
def gen_random_string(cls, str_length: int) -> str:
Expand Down Expand Up @@ -208,6 +209,26 @@ def gen_multiline_str(cls, *items) -> str:
def get_os_name(cls) -> str:
return os.uname().sysname

@classmethod
def get_value_or_pop(cls, provided_value, options_dict, short_flag, default_value=None):
"""
Gets a value or pops it from the provided options dictionary if the value is not explicitly provided.
:param provided_value: The value to return if not None.
:param options_dict: Dictionary containing options.
:param short_flag: Flag to look for in options_dict.
:param default_value: The default value to return if the target_key is not found. Defaults to None.
:return: provided_value or the value from options_dict or the default_value.
"""
if provided_value is not None:
return provided_value
if short_flag in options_dict:
if not cls.warning_issued:
cls.warning_issued = True
print('Warning: Instead of using short flags for argument, consider providing the value directly.')
return options_dict.pop(short_flag)
return default_value


class ToolLogging:
"""Holds global utilities used for logging."""
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
"""Wrapper class to run tools associated with RAPIDS Accelerator for Apache Spark plugin on DATABRICKS_AWS."""
from spark_rapids_tools import CspEnv
from spark_rapids_pytools.cloud_api.sp_types import DeployMode
from spark_rapids_pytools.common.utilities import ToolLogging
from spark_rapids_pytools.common.utilities import Utils, ToolLogging
from spark_rapids_pytools.rapids.diagnostic import Diagnostic
from spark_rapids_pytools.rapids.profiling import ProfilingAsLocal
from spark_rapids_pytools.rapids.qualification import QualFilterApp, QualificationAsLocal, QualGpuClusterReshapeType
Expand All @@ -40,8 +40,8 @@ def qualification(cpu_cluster: str = None,
filter_apps: str = QualFilterApp.tostring(QualFilterApp.SAVINGS),
gpu_cluster_recommendation: str = QualGpuClusterReshapeType.tostring(
QualGpuClusterReshapeType.get_default()),
jvm_heap_size: int = 24,
verbose: bool = False,
jvm_heap_size: int = None,
verbose: bool = None,
cpu_discount: int = None,
gpu_discount: int = None,
global_discount: int = None,
Expand Down Expand Up @@ -105,6 +105,15 @@ def qualification(cpu_cluster: str = None,
For more details on Qualification tool options, please visit
https://docs.nvidia.com/spark-rapids/user-guide/latest/spark-qualification-tool.html#qualification-tool-options
"""
verbose = Utils.get_value_or_pop(verbose, rapids_options, 'v', False)
profile = Utils.get_value_or_pop(profile, rapids_options, 'p')
aws_profile = Utils.get_value_or_pop(aws_profile, rapids_options, 'a')
remote_folder = Utils.get_value_or_pop(remote_folder, rapids_options, 'r')
jvm_heap_size = Utils.get_value_or_pop(jvm_heap_size, rapids_options, 'j', 24)
eventlogs = Utils.get_value_or_pop(eventlogs, rapids_options, 'e')
filter_apps = Utils.get_value_or_pop(filter_apps, rapids_options, 'f')
tools_jar = Utils.get_value_or_pop(tools_jar, rapids_options, 't')
local_folder = Utils.get_value_or_pop(local_folder, rapids_options, 'l')
if verbose:
# when debug is set to true set it in the environment.
ToolLogging.enable_debug_mode()
Expand Down Expand Up @@ -150,8 +159,8 @@ def profiling(gpu_cluster: str = None,
remote_folder: str = None,
tools_jar: str = None,
credentials_file: str = None,
jvm_heap_size: int = 24,
verbose: bool = False,
jvm_heap_size: int = None,
verbose: bool = None,
**rapids_options) -> None:
"""
The Profiling tool analyzes both CPU or GPU generated event logs and generates information
Expand Down Expand Up @@ -192,6 +201,17 @@ def profiling(gpu_cluster: str = None,
For more details on Profiling tool options, please visit
https://docs.nvidia.com/spark-rapids/user-guide/latest/spark-profiling-tool.html#profiling-tool-options
"""
verbose = Utils.get_value_or_pop(verbose, rapids_options, 'v', False)
profile = Utils.get_value_or_pop(profile, rapids_options, 'p')
aws_profile = Utils.get_value_or_pop(aws_profile, rapids_options, 'a')
credentials_file = Utils.get_value_or_pop(credentials_file, rapids_options, 'c')
gpu_cluster = Utils.get_value_or_pop(gpu_cluster, rapids_options, 'g')
remote_folder = Utils.get_value_or_pop(remote_folder, rapids_options, 'r')
jvm_heap_size = Utils.get_value_or_pop(jvm_heap_size, rapids_options, 'j', 24)
eventlogs = Utils.get_value_or_pop(eventlogs, rapids_options, 'e')
tools_jar = Utils.get_value_or_pop(tools_jar, rapids_options, 't')
worker_info = Utils.get_value_or_pop(worker_info, rapids_options, 'w')
local_folder = Utils.get_value_or_pop(local_folder, rapids_options, 'l')
if verbose:
# when debug is set to true set it in the environment.
ToolLogging.enable_debug_mode()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
"""Wrapper class to run tools associated with RAPIDS Accelerator for Apache Spark plugin on DATABRICKS_AZURE."""
from spark_rapids_tools import CspEnv
from spark_rapids_pytools.cloud_api.sp_types import DeployMode
from spark_rapids_pytools.common.utilities import ToolLogging
from spark_rapids_pytools.common.utilities import Utils, ToolLogging
from spark_rapids_pytools.rapids.diagnostic import Diagnostic
from spark_rapids_pytools.rapids.profiling import ProfilingAsLocal
from spark_rapids_pytools.rapids.qualification import QualFilterApp, QualificationAsLocal, QualGpuClusterReshapeType
Expand All @@ -39,8 +39,8 @@ def qualification(cpu_cluster: str = None,
filter_apps: str = QualFilterApp.tostring(QualFilterApp.SAVINGS),
gpu_cluster_recommendation: str = QualGpuClusterReshapeType.tostring(
QualGpuClusterReshapeType.get_default()),
jvm_heap_size: int = 24,
verbose: bool = False,
jvm_heap_size: int = None,
verbose: bool = None,
cpu_discount: int = None,
gpu_discount: int = None,
global_discount: int = None,
Expand Down Expand Up @@ -103,6 +103,14 @@ def qualification(cpu_cluster: str = None,
For more details on Qualification tool options, please visit
https://docs.nvidia.com/spark-rapids/user-guide/latest/spark-qualification-tool.html#qualification-tool-options
"""
verbose = Utils.get_value_or_pop(verbose, rapids_options, 'v', False)
profile = Utils.get_value_or_pop(profile, rapids_options, 'p')
remote_folder = Utils.get_value_or_pop(remote_folder, rapids_options, 'r')
jvm_heap_size = Utils.get_value_or_pop(jvm_heap_size, rapids_options, 'j', 24)
eventlogs = Utils.get_value_or_pop(eventlogs, rapids_options, 'e')
filter_apps = Utils.get_value_or_pop(filter_apps, rapids_options, 'f')
tools_jar = Utils.get_value_or_pop(tools_jar, rapids_options, 't')
local_folder = Utils.get_value_or_pop(local_folder, rapids_options, 'l')
if verbose:
# when debug is set to true set it in the environment.
ToolLogging.enable_debug_mode()
Expand Down Expand Up @@ -146,8 +154,8 @@ def profiling(gpu_cluster: str = None,
remote_folder: str = None,
tools_jar: str = None,
credentials_file: str = None,
jvm_heap_size: int = 24,
verbose: bool = False,
jvm_heap_size: int = None,
verbose: bool = None,
**rapids_options) -> None:
"""
The Profiling tool analyzes both CPU or GPU generated event logs and generates information
Expand Down Expand Up @@ -186,6 +194,16 @@ def profiling(gpu_cluster: str = None,
For more details on Profiling tool options, please visit
https://docs.nvidia.com/spark-rapids/user-guide/latest/spark-profiling-tool.html#profiling-tool-options
"""
verbose = Utils.get_value_or_pop(verbose, rapids_options, 'v', False)
profile = Utils.get_value_or_pop(profile, rapids_options, 'p')
credentials_file = Utils.get_value_or_pop(credentials_file, rapids_options, 'c')
gpu_cluster = Utils.get_value_or_pop(gpu_cluster, rapids_options, 'g')
remote_folder = Utils.get_value_or_pop(remote_folder, rapids_options, 'r')
jvm_heap_size = Utils.get_value_or_pop(jvm_heap_size, rapids_options, 'j', 24)
eventlogs = Utils.get_value_or_pop(eventlogs, rapids_options, 'e')
tools_jar = Utils.get_value_or_pop(tools_jar, rapids_options, 't')
worker_info = Utils.get_value_or_pop(worker_info, rapids_options, 'w')
local_folder = Utils.get_value_or_pop(local_folder, rapids_options, 'l')
if verbose:
# when debug is set to true set it in the environment.
ToolLogging.enable_debug_mode()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@

from spark_rapids_tools import CspEnv
from spark_rapids_pytools.cloud_api.sp_types import DeployMode
from spark_rapids_pytools.common.utilities import ToolLogging
from spark_rapids_pytools.common.utilities import Utils, ToolLogging
from spark_rapids_pytools.rapids.qualification import QualFilterApp, QualificationAsLocal, QualGpuClusterReshapeType


Expand All @@ -36,8 +36,8 @@ def qualification(cpu_cluster: str = None,
filter_apps: str = QualFilterApp.tostring(QualFilterApp.SAVINGS),
gpu_cluster_recommendation: str = QualGpuClusterReshapeType.tostring(
QualGpuClusterReshapeType.get_default()),
jvm_heap_size: int = 24,
verbose: bool = False,
jvm_heap_size: int = None,
verbose: bool = None,
cpu_discount: int = None,
gpu_discount: int = None,
global_discount: int = None,
Expand Down Expand Up @@ -100,6 +100,13 @@ def qualification(cpu_cluster: str = None,
For more details on Qualification tool options, please visit
https://docs.nvidia.com/spark-rapids/user-guide/latest/spark-qualification-tool.html#qualification-tool-options
"""
verbose = Utils.get_value_or_pop(verbose, rapids_options, 'v', False)
remote_folder = Utils.get_value_or_pop(remote_folder, rapids_options, 'r')
jvm_heap_size = Utils.get_value_or_pop(jvm_heap_size, rapids_options, 'j', 24)
eventlogs = Utils.get_value_or_pop(eventlogs, rapids_options, 'e')
filter_apps = Utils.get_value_or_pop(filter_apps, rapids_options, 'f')
tools_jar = Utils.get_value_or_pop(tools_jar, rapids_options, 't')
local_folder = Utils.get_value_or_pop(local_folder, rapids_options, 'l')
if verbose:
# when debug is set to true set it in the environment.
ToolLogging.enable_debug_mode()
Expand Down
26 changes: 21 additions & 5 deletions user_tools/src/spark_rapids_pytools/wrappers/dataproc_wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@

from spark_rapids_tools import CspEnv
from spark_rapids_pytools.cloud_api.sp_types import DeployMode
from spark_rapids_pytools.common.utilities import ToolLogging
from spark_rapids_pytools.common.utilities import Utils, ToolLogging
from spark_rapids_pytools.rapids.bootstrap import Bootstrap
from spark_rapids_pytools.rapids.diagnostic import Diagnostic
from spark_rapids_pytools.rapids.profiling import ProfilingAsLocal
Expand All @@ -39,8 +39,8 @@ def qualification(cpu_cluster: str = None,
filter_apps: str = QualFilterApp.tostring(QualFilterApp.SAVINGS),
gpu_cluster_recommendation: str = QualGpuClusterReshapeType.tostring(
QualGpuClusterReshapeType.get_default()),
jvm_heap_size: int = 24,
verbose: bool = False,
jvm_heap_size: int = None,
verbose: bool = None,
cpu_discount: int = None,
gpu_discount: int = None,
global_discount: int = None,
Expand Down Expand Up @@ -102,6 +102,13 @@ def qualification(cpu_cluster: str = None,
For more details on Qualification tool options, please visit
https://docs.nvidia.com/spark-rapids/user-guide/latest/spark-qualification-tool.html#qualification-tool-options
"""
verbose = Utils.get_value_or_pop(verbose, rapids_options, 'v', False)
remote_folder = Utils.get_value_or_pop(remote_folder, rapids_options, 'r')
jvm_heap_size = Utils.get_value_or_pop(jvm_heap_size, rapids_options, 'j', 24)
eventlogs = Utils.get_value_or_pop(eventlogs, rapids_options, 'e')
filter_apps = Utils.get_value_or_pop(filter_apps, rapids_options, 'f')
tools_jar = Utils.get_value_or_pop(tools_jar, rapids_options, 't')
local_folder = Utils.get_value_or_pop(local_folder, rapids_options, 'l')
if verbose:
# when debug is set to true set it in the environment.
ToolLogging.enable_debug_mode()
Expand Down Expand Up @@ -143,8 +150,8 @@ def profiling(gpu_cluster: str = None,
remote_folder: str = None,
tools_jar: str = None,
credentials_file: str = None,
jvm_heap_size: int = 24,
verbose: bool = False,
jvm_heap_size: int = None,
verbose: bool = None,
**rapids_options) -> None:
"""
The Profiling tool analyzes both CPU or GPU generated event logs and generates information
Expand Down Expand Up @@ -183,6 +190,15 @@ def profiling(gpu_cluster: str = None,
For more details on Profiling tool options, please visit
https://docs.nvidia.com/spark-rapids/user-guide/latest/spark-profiling-tool.html#profiling-tool-options
"""
verbose = Utils.get_value_or_pop(verbose, rapids_options, 'v', False)
credentials_file = Utils.get_value_or_pop(credentials_file, rapids_options, 'c')
gpu_cluster = Utils.get_value_or_pop(gpu_cluster, rapids_options, 'g')
remote_folder = Utils.get_value_or_pop(remote_folder, rapids_options, 'r')
jvm_heap_size = Utils.get_value_or_pop(jvm_heap_size, rapids_options, 'j', 24)
eventlogs = Utils.get_value_or_pop(eventlogs, rapids_options, 'e')
tools_jar = Utils.get_value_or_pop(tools_jar, rapids_options, 't')
worker_info = Utils.get_value_or_pop(worker_info, rapids_options, 'w')
local_folder = Utils.get_value_or_pop(local_folder, rapids_options, 'l')
if verbose:
# when debug is set to true set it in the environment.
ToolLogging.enable_debug_mode()
Expand Down
27 changes: 22 additions & 5 deletions user_tools/src/spark_rapids_pytools/wrappers/emr_wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
"""Wrapper class to run tools associated with RAPIDS Accelerator for Apache Spark plugin on AWS-EMR."""
from spark_rapids_tools import CspEnv
from spark_rapids_pytools.cloud_api.sp_types import DeployMode
from spark_rapids_pytools.common.utilities import ToolLogging
from spark_rapids_pytools.common.utilities import Utils, ToolLogging
from spark_rapids_pytools.rapids.bootstrap import Bootstrap
from spark_rapids_pytools.rapids.diagnostic import Diagnostic
from spark_rapids_pytools.rapids.qualification import QualFilterApp, QualificationAsLocal, \
Expand All @@ -40,8 +40,8 @@ def qualification(cpu_cluster: str = None,
filter_apps: str = QualFilterApp.tostring(QualFilterApp.SAVINGS),
gpu_cluster_recommendation: str = QualGpuClusterReshapeType.tostring(
QualGpuClusterReshapeType.get_default()),
jvm_heap_size: int = 24,
verbose: bool = False,
jvm_heap_size: int = None,
verbose: bool = None,
cpu_discount: int = None,
gpu_discount: int = None,
global_discount: int = None,
Expand Down Expand Up @@ -100,6 +100,14 @@ def qualification(cpu_cluster: str = None,
For more details on Qualification tool options, please visit
https://docs.nvidia.com/spark-rapids/user-guide/latest/spark-qualification-tool.html#qualification-tool-options
"""
verbose = Utils.get_value_or_pop(verbose, rapids_options, 'v', False)
profile = Utils.get_value_or_pop(profile, rapids_options, 'p')
remote_folder = Utils.get_value_or_pop(remote_folder, rapids_options, 'r')
jvm_heap_size = Utils.get_value_or_pop(jvm_heap_size, rapids_options, 'j', 24)
eventlogs = Utils.get_value_or_pop(eventlogs, rapids_options, 'e')
filter_apps = Utils.get_value_or_pop(filter_apps, rapids_options, 'f')
tools_jar = Utils.get_value_or_pop(tools_jar, rapids_options, 't')
local_folder = Utils.get_value_or_pop(local_folder, rapids_options, 'l')
if verbose:
# when debug is set to true set it in the environment.
ToolLogging.enable_debug_mode()
Expand Down Expand Up @@ -140,8 +148,8 @@ def profiling(gpu_cluster: str = None,
local_folder: str = None,
remote_folder: str = None,
tools_jar: str = None,
jvm_heap_size: int = 24,
verbose: bool = False,
jvm_heap_size: int = None,
verbose: bool = None,
**rapids_options) -> None:
"""
The Profiling tool analyzes both CPU or GPU generated event logs and generates information
Expand Down Expand Up @@ -177,6 +185,15 @@ def profiling(gpu_cluster: str = None,
For more details on Profiling tool options, please visit
https://docs.nvidia.com/spark-rapids/user-guide/latest/spark-profiling-tool.html#profiling-tool-options
"""
verbose = Utils.get_value_or_pop(verbose, rapids_options, 'v', False)
profile = Utils.get_value_or_pop(profile, rapids_options, 'p')
gpu_cluster = Utils.get_value_or_pop(gpu_cluster, rapids_options, 'g')
remote_folder = Utils.get_value_or_pop(remote_folder, rapids_options, 'r')
jvm_heap_size = Utils.get_value_or_pop(jvm_heap_size, rapids_options, 'j', 24)
eventlogs = Utils.get_value_or_pop(eventlogs, rapids_options, 'e')
tools_jar = Utils.get_value_or_pop(tools_jar, rapids_options, 't')
worker_info = Utils.get_value_or_pop(worker_info, rapids_options, 'w')
local_folder = Utils.get_value_or_pop(local_folder, rapids_options, 'l')
if verbose:
# when debug is set to true set it in the environment.
ToolLogging.enable_debug_mode()
Expand Down
Loading

0 comments on commit 2fdf994

Please sign in to comment.