Fix incorrect processing of short flags for user tools cli (#677)

* Add support for shortcut flags in user tools cli Signed-off-by: Partho Sarthi <[email protected]> * Add support for shortcut flags in new user tools cli Signed-off-by: Partho Sarthi <[email protected]> * Refactor code Signed-off-by: Partho Sarthi <[email protected]> * Fix warning message Signed-off-by: Partho Sarthi <[email protected]> * Fix default value for verbose in new cli Signed-off-by: Partho Sarthi <[email protected]> * Minor fix Signed-off-by: Partho Sarthi <[email protected]> --------- Signed-off-by: Partho Sarthi <[email protected]>
NVIDIA · Dec 7, 2023 · 2fdf994 · 2fdf994
1 parent 6e8f1f5
commit 2fdf994
Show file tree

Hide file tree

Showing 8 changed files with 151 additions and 32 deletions.
diff --git a/user_tools/src/spark_rapids_pytools/common/utilities.py b/user_tools/src/spark_rapids_pytools/common/utilities.py
@@ -45,6 +45,7 @@
 
 class Utils:
     """Utility class used to enclose common helpers and utilities."""
+    warning_issued = False
 
     @classmethod
     def gen_random_string(cls, str_length: int) -> str:
@@ -208,6 +209,26 @@ def gen_multiline_str(cls, *items) -> str:
     def get_os_name(cls) -> str:
         return os.uname().sysname
 
+    @classmethod
+    def get_value_or_pop(cls, provided_value, options_dict, short_flag, default_value=None):
+        """
+        Gets a value or pops it from the provided options dictionary if the value is not explicitly provided.
+
+        :param provided_value: The value to return if not None.
+        :param options_dict: Dictionary containing options.
+        :param short_flag: Flag to look for in options_dict.
+        :param default_value: The default value to return if the target_key is not found. Defaults to None.
+        :return: provided_value or the value from options_dict or the default_value.
+        """
+        if provided_value is not None:
+            return provided_value
+        if short_flag in options_dict:
+            if not cls.warning_issued:
+                cls.warning_issued = True
+                print('Warning: Instead of using short flags for argument, consider providing the value directly.')
+            return options_dict.pop(short_flag)
+        return default_value
+
 
 class ToolLogging:
     """Holds global utilities used for logging."""

diff --git a/user_tools/src/spark_rapids_pytools/wrappers/databricks_aws_wrapper.py b/user_tools/src/spark_rapids_pytools/wrappers/databricks_aws_wrapper.py
@@ -16,7 +16,7 @@
 """Wrapper class to run tools associated with RAPIDS Accelerator for Apache Spark plugin on DATABRICKS_AWS."""
 from spark_rapids_tools import CspEnv
 from spark_rapids_pytools.cloud_api.sp_types import DeployMode
-from spark_rapids_pytools.common.utilities import ToolLogging
+from spark_rapids_pytools.common.utilities import Utils, ToolLogging
 from spark_rapids_pytools.rapids.diagnostic import Diagnostic
 from spark_rapids_pytools.rapids.profiling import ProfilingAsLocal
 from spark_rapids_pytools.rapids.qualification import QualFilterApp, QualificationAsLocal, QualGpuClusterReshapeType
@@ -40,8 +40,8 @@ def qualification(cpu_cluster: str = None,
                       filter_apps: str = QualFilterApp.tostring(QualFilterApp.SAVINGS),
                       gpu_cluster_recommendation: str = QualGpuClusterReshapeType.tostring(
                           QualGpuClusterReshapeType.get_default()),
-                      jvm_heap_size: int = 24,
-                      verbose: bool = False,
+                      jvm_heap_size: int = None,
+                      verbose: bool = None,
                       cpu_discount: int = None,
                       gpu_discount: int = None,
                       global_discount: int = None,
@@ -105,6 +105,15 @@ def qualification(cpu_cluster: str = None,
                 For more details on Qualification tool options, please visit
                 https://docs.nvidia.com/spark-rapids/user-guide/latest/spark-qualification-tool.html#qualification-tool-options
         """
+        verbose = Utils.get_value_or_pop(verbose, rapids_options, 'v', False)
+        profile = Utils.get_value_or_pop(profile, rapids_options, 'p')
+        aws_profile = Utils.get_value_or_pop(aws_profile,  rapids_options, 'a')
+        remote_folder = Utils.get_value_or_pop(remote_folder, rapids_options, 'r')
+        jvm_heap_size = Utils.get_value_or_pop(jvm_heap_size, rapids_options, 'j', 24)
+        eventlogs = Utils.get_value_or_pop(eventlogs, rapids_options, 'e')
+        filter_apps = Utils.get_value_or_pop(filter_apps, rapids_options, 'f')
+        tools_jar = Utils.get_value_or_pop(tools_jar, rapids_options, 't')
+        local_folder = Utils.get_value_or_pop(local_folder, rapids_options, 'l')
         if verbose:
             # when debug is set to true set it in the environment.
             ToolLogging.enable_debug_mode()
@@ -150,8 +159,8 @@ def profiling(gpu_cluster: str = None,
                   remote_folder: str = None,
                   tools_jar: str = None,
                   credentials_file: str = None,
-                  jvm_heap_size: int = 24,
-                  verbose: bool = False,
+                  jvm_heap_size: int = None,
+                  verbose: bool = None,
                   **rapids_options) -> None:
         """
         The Profiling tool analyzes both CPU or GPU generated event logs and generates information
@@ -192,6 +201,17 @@ def profiling(gpu_cluster: str = None,
                 For more details on Profiling tool options, please visit
                 https://docs.nvidia.com/spark-rapids/user-guide/latest/spark-profiling-tool.html#profiling-tool-options
         """
+        verbose = Utils.get_value_or_pop(verbose, rapids_options, 'v', False)
+        profile = Utils.get_value_or_pop(profile, rapids_options, 'p')
+        aws_profile = Utils.get_value_or_pop(aws_profile,  rapids_options, 'a')
+        credentials_file = Utils.get_value_or_pop(credentials_file, rapids_options, 'c')
+        gpu_cluster = Utils.get_value_or_pop(gpu_cluster, rapids_options, 'g')
+        remote_folder = Utils.get_value_or_pop(remote_folder, rapids_options, 'r')
+        jvm_heap_size = Utils.get_value_or_pop(jvm_heap_size, rapids_options, 'j', 24)
+        eventlogs = Utils.get_value_or_pop(eventlogs, rapids_options, 'e')
+        tools_jar = Utils.get_value_or_pop(tools_jar, rapids_options, 't')
+        worker_info = Utils.get_value_or_pop(worker_info, rapids_options, 'w')
+        local_folder = Utils.get_value_or_pop(local_folder, rapids_options, 'l')
         if verbose:
             # when debug is set to true set it in the environment.
             ToolLogging.enable_debug_mode()

diff --git a/user_tools/src/spark_rapids_pytools/wrappers/databricks_azure_wrapper.py b/user_tools/src/spark_rapids_pytools/wrappers/databricks_azure_wrapper.py
@@ -16,7 +16,7 @@
 """Wrapper class to run tools associated with RAPIDS Accelerator for Apache Spark plugin on DATABRICKS_AZURE."""
 from spark_rapids_tools import CspEnv
 from spark_rapids_pytools.cloud_api.sp_types import DeployMode
-from spark_rapids_pytools.common.utilities import ToolLogging
+from spark_rapids_pytools.common.utilities import Utils, ToolLogging
 from spark_rapids_pytools.rapids.diagnostic import Diagnostic
 from spark_rapids_pytools.rapids.profiling import ProfilingAsLocal
 from spark_rapids_pytools.rapids.qualification import QualFilterApp, QualificationAsLocal, QualGpuClusterReshapeType
@@ -39,8 +39,8 @@ def qualification(cpu_cluster: str = None,
                       filter_apps: str = QualFilterApp.tostring(QualFilterApp.SAVINGS),
                       gpu_cluster_recommendation: str = QualGpuClusterReshapeType.tostring(
                           QualGpuClusterReshapeType.get_default()),
-                      jvm_heap_size: int = 24,
-                      verbose: bool = False,
+                      jvm_heap_size: int = None,
+                      verbose: bool = None,
                       cpu_discount: int = None,
                       gpu_discount: int = None,
                       global_discount: int = None,
@@ -103,6 +103,14 @@ def qualification(cpu_cluster: str = None,
                 For more details on Qualification tool options, please visit
                 https://docs.nvidia.com/spark-rapids/user-guide/latest/spark-qualification-tool.html#qualification-tool-options
         """
+        verbose = Utils.get_value_or_pop(verbose, rapids_options, 'v', False)
+        profile = Utils.get_value_or_pop(profile, rapids_options, 'p')
+        remote_folder = Utils.get_value_or_pop(remote_folder, rapids_options, 'r')
+        jvm_heap_size = Utils.get_value_or_pop(jvm_heap_size, rapids_options, 'j', 24)
+        eventlogs = Utils.get_value_or_pop(eventlogs, rapids_options, 'e')
+        filter_apps = Utils.get_value_or_pop(filter_apps, rapids_options, 'f')
+        tools_jar = Utils.get_value_or_pop(tools_jar, rapids_options, 't')
+        local_folder = Utils.get_value_or_pop(local_folder, rapids_options, 'l')
         if verbose:
             # when debug is set to true set it in the environment.
             ToolLogging.enable_debug_mode()
@@ -146,8 +154,8 @@ def profiling(gpu_cluster: str = None,
                   remote_folder: str = None,
                   tools_jar: str = None,
                   credentials_file: str = None,
-                  jvm_heap_size: int = 24,
-                  verbose: bool = False,
+                  jvm_heap_size: int = None,
+                  verbose: bool = None,
                   **rapids_options) -> None:
         """
         The Profiling tool analyzes both CPU or GPU generated event logs and generates information
@@ -186,6 +194,16 @@ def profiling(gpu_cluster: str = None,
                 For more details on Profiling tool options, please visit
                 https://docs.nvidia.com/spark-rapids/user-guide/latest/spark-profiling-tool.html#profiling-tool-options
         """
+        verbose = Utils.get_value_or_pop(verbose, rapids_options, 'v', False)
+        profile = Utils.get_value_or_pop(profile, rapids_options, 'p')
+        credentials_file = Utils.get_value_or_pop(credentials_file, rapids_options, 'c')
+        gpu_cluster = Utils.get_value_or_pop(gpu_cluster, rapids_options, 'g')
+        remote_folder = Utils.get_value_or_pop(remote_folder, rapids_options, 'r')
+        jvm_heap_size = Utils.get_value_or_pop(jvm_heap_size, rapids_options, 'j', 24)
+        eventlogs = Utils.get_value_or_pop(eventlogs, rapids_options, 'e')
+        tools_jar = Utils.get_value_or_pop(tools_jar, rapids_options, 't')
+        worker_info = Utils.get_value_or_pop(worker_info, rapids_options, 'w')
+        local_folder = Utils.get_value_or_pop(local_folder, rapids_options, 'l')
         if verbose:
             # when debug is set to true set it in the environment.
             ToolLogging.enable_debug_mode()

diff --git a/user_tools/src/spark_rapids_pytools/wrappers/dataproc_gke_wrapper.py b/user_tools/src/spark_rapids_pytools/wrappers/dataproc_gke_wrapper.py
@@ -16,7 +16,7 @@
 
 from spark_rapids_tools import CspEnv
 from spark_rapids_pytools.cloud_api.sp_types import DeployMode
-from spark_rapids_pytools.common.utilities import ToolLogging
+from spark_rapids_pytools.common.utilities import Utils, ToolLogging
 from spark_rapids_pytools.rapids.qualification import QualFilterApp, QualificationAsLocal, QualGpuClusterReshapeType
 
 
@@ -36,8 +36,8 @@ def qualification(cpu_cluster: str = None,
                       filter_apps: str = QualFilterApp.tostring(QualFilterApp.SAVINGS),
                       gpu_cluster_recommendation: str = QualGpuClusterReshapeType.tostring(
                           QualGpuClusterReshapeType.get_default()),
-                      jvm_heap_size: int = 24,
-                      verbose: bool = False,
+                      jvm_heap_size: int = None,
+                      verbose: bool = None,
                       cpu_discount: int = None,
                       gpu_discount: int = None,
                       global_discount: int = None,
@@ -100,6 +100,13 @@ def qualification(cpu_cluster: str = None,
                 For more details on Qualification tool options, please visit
                 https://docs.nvidia.com/spark-rapids/user-guide/latest/spark-qualification-tool.html#qualification-tool-options
         """
+        verbose = Utils.get_value_or_pop(verbose, rapids_options, 'v', False)
+        remote_folder = Utils.get_value_or_pop(remote_folder, rapids_options, 'r')
+        jvm_heap_size = Utils.get_value_or_pop(jvm_heap_size, rapids_options, 'j', 24)
+        eventlogs = Utils.get_value_or_pop(eventlogs, rapids_options, 'e')
+        filter_apps = Utils.get_value_or_pop(filter_apps, rapids_options, 'f')
+        tools_jar = Utils.get_value_or_pop(tools_jar, rapids_options, 't')
+        local_folder = Utils.get_value_or_pop(local_folder, rapids_options, 'l')
         if verbose:
             # when debug is set to true set it in the environment.
             ToolLogging.enable_debug_mode()

diff --git a/user_tools/src/spark_rapids_pytools/wrappers/dataproc_wrapper.py b/user_tools/src/spark_rapids_pytools/wrappers/dataproc_wrapper.py
@@ -16,7 +16,7 @@
 
 from spark_rapids_tools import CspEnv
 from spark_rapids_pytools.cloud_api.sp_types import DeployMode
-from spark_rapids_pytools.common.utilities import ToolLogging
+from spark_rapids_pytools.common.utilities import Utils, ToolLogging
 from spark_rapids_pytools.rapids.bootstrap import Bootstrap
 from spark_rapids_pytools.rapids.diagnostic import Diagnostic
 from spark_rapids_pytools.rapids.profiling import ProfilingAsLocal
@@ -39,8 +39,8 @@ def qualification(cpu_cluster: str = None,
                       filter_apps: str = QualFilterApp.tostring(QualFilterApp.SAVINGS),
                       gpu_cluster_recommendation: str = QualGpuClusterReshapeType.tostring(
                           QualGpuClusterReshapeType.get_default()),
-                      jvm_heap_size: int = 24,
-                      verbose: bool = False,
+                      jvm_heap_size: int = None,
+                      verbose: bool = None,
                       cpu_discount: int = None,
                       gpu_discount: int = None,
                       global_discount: int = None,
@@ -102,6 +102,13 @@ def qualification(cpu_cluster: str = None,
                 For more details on Qualification tool options, please visit
                 https://docs.nvidia.com/spark-rapids/user-guide/latest/spark-qualification-tool.html#qualification-tool-options
         """
+        verbose = Utils.get_value_or_pop(verbose, rapids_options, 'v', False)
+        remote_folder = Utils.get_value_or_pop(remote_folder, rapids_options, 'r')
+        jvm_heap_size = Utils.get_value_or_pop(jvm_heap_size, rapids_options, 'j', 24)
+        eventlogs = Utils.get_value_or_pop(eventlogs, rapids_options, 'e')
+        filter_apps = Utils.get_value_or_pop(filter_apps, rapids_options, 'f')
+        tools_jar = Utils.get_value_or_pop(tools_jar, rapids_options, 't')
+        local_folder = Utils.get_value_or_pop(local_folder, rapids_options, 'l')
         if verbose:
             # when debug is set to true set it in the environment.
             ToolLogging.enable_debug_mode()
@@ -143,8 +150,8 @@ def profiling(gpu_cluster: str = None,
                   remote_folder: str = None,
                   tools_jar: str = None,
                   credentials_file: str = None,
-                  jvm_heap_size: int = 24,
-                  verbose: bool = False,
+                  jvm_heap_size: int = None,
+                  verbose: bool = None,
                   **rapids_options) -> None:
         """
         The Profiling tool analyzes both CPU or GPU generated event logs and generates information
@@ -183,6 +190,15 @@ def profiling(gpu_cluster: str = None,
                 For more details on Profiling tool options, please visit
                 https://docs.nvidia.com/spark-rapids/user-guide/latest/spark-profiling-tool.html#profiling-tool-options
         """
+        verbose = Utils.get_value_or_pop(verbose, rapids_options, 'v', False)
+        credentials_file = Utils.get_value_or_pop(credentials_file, rapids_options, 'c')
+        gpu_cluster = Utils.get_value_or_pop(gpu_cluster, rapids_options, 'g')
+        remote_folder = Utils.get_value_or_pop(remote_folder, rapids_options, 'r')
+        jvm_heap_size = Utils.get_value_or_pop(jvm_heap_size, rapids_options, 'j', 24)
+        eventlogs = Utils.get_value_or_pop(eventlogs, rapids_options, 'e')
+        tools_jar = Utils.get_value_or_pop(tools_jar, rapids_options, 't')
+        worker_info = Utils.get_value_or_pop(worker_info, rapids_options, 'w')
+        local_folder = Utils.get_value_or_pop(local_folder, rapids_options, 'l')
         if verbose:
             # when debug is set to true set it in the environment.
             ToolLogging.enable_debug_mode()

diff --git a/user_tools/src/spark_rapids_pytools/wrappers/emr_wrapper.py b/user_tools/src/spark_rapids_pytools/wrappers/emr_wrapper.py
@@ -16,7 +16,7 @@
 """Wrapper class to run tools associated with RAPIDS Accelerator for Apache Spark plugin on AWS-EMR."""
 from spark_rapids_tools import CspEnv
 from spark_rapids_pytools.cloud_api.sp_types import DeployMode
-from spark_rapids_pytools.common.utilities import ToolLogging
+from spark_rapids_pytools.common.utilities import Utils, ToolLogging
 from spark_rapids_pytools.rapids.bootstrap import Bootstrap
 from spark_rapids_pytools.rapids.diagnostic import Diagnostic
 from spark_rapids_pytools.rapids.qualification import QualFilterApp, QualificationAsLocal, \
@@ -40,8 +40,8 @@ def qualification(cpu_cluster: str = None,
                       filter_apps: str = QualFilterApp.tostring(QualFilterApp.SAVINGS),
                       gpu_cluster_recommendation: str = QualGpuClusterReshapeType.tostring(
                           QualGpuClusterReshapeType.get_default()),
-                      jvm_heap_size: int = 24,
-                      verbose: bool = False,
+                      jvm_heap_size: int = None,
+                      verbose: bool = None,
                       cpu_discount: int = None,
                       gpu_discount: int = None,
                       global_discount: int = None,
@@ -100,6 +100,14 @@ def qualification(cpu_cluster: str = None,
                 For more details on Qualification tool options, please visit
                 https://docs.nvidia.com/spark-rapids/user-guide/latest/spark-qualification-tool.html#qualification-tool-options
         """
+        verbose = Utils.get_value_or_pop(verbose, rapids_options, 'v', False)
+        profile = Utils.get_value_or_pop(profile, rapids_options, 'p')
+        remote_folder = Utils.get_value_or_pop(remote_folder, rapids_options, 'r')
+        jvm_heap_size = Utils.get_value_or_pop(jvm_heap_size, rapids_options, 'j', 24)
+        eventlogs = Utils.get_value_or_pop(eventlogs, rapids_options, 'e')
+        filter_apps = Utils.get_value_or_pop(filter_apps, rapids_options, 'f')
+        tools_jar = Utils.get_value_or_pop(tools_jar, rapids_options, 't')
+        local_folder = Utils.get_value_or_pop(local_folder, rapids_options, 'l')
         if verbose:
             # when debug is set to true set it in the environment.
             ToolLogging.enable_debug_mode()
@@ -140,8 +148,8 @@ def profiling(gpu_cluster: str = None,
                   local_folder: str = None,
                   remote_folder: str = None,
                   tools_jar: str = None,
-                  jvm_heap_size: int = 24,
-                  verbose: bool = False,
+                  jvm_heap_size: int = None,
+                  verbose: bool = None,
                   **rapids_options) -> None:
         """
         The Profiling tool analyzes both CPU or GPU generated event logs and generates information
@@ -177,6 +185,15 @@ def profiling(gpu_cluster: str = None,
                 For more details on Profiling tool options, please visit
                 https://docs.nvidia.com/spark-rapids/user-guide/latest/spark-profiling-tool.html#profiling-tool-options
         """
+        verbose = Utils.get_value_or_pop(verbose, rapids_options, 'v', False)
+        profile = Utils.get_value_or_pop(profile, rapids_options, 'p')
+        gpu_cluster = Utils.get_value_or_pop(gpu_cluster, rapids_options, 'g')
+        remote_folder = Utils.get_value_or_pop(remote_folder, rapids_options, 'r')
+        jvm_heap_size = Utils.get_value_or_pop(jvm_heap_size, rapids_options, 'j', 24)
+        eventlogs = Utils.get_value_or_pop(eventlogs, rapids_options, 'e')
+        tools_jar = Utils.get_value_or_pop(tools_jar, rapids_options, 't')
+        worker_info = Utils.get_value_or_pop(worker_info, rapids_options, 'w')
+        local_folder = Utils.get_value_or_pop(local_folder, rapids_options, 'l')
         if verbose:
             # when debug is set to true set it in the environment.
             ToolLogging.enable_debug_mode()