From b97f25859ae3fb5f301fec83db34b176febaba1d Mon Sep 17 00:00:00 2001 From: Partho Sarthi Date: Fri, 3 Nov 2023 09:20:11 -0700 Subject: [PATCH 01/10] Add unit tests to cover all cases Signed-off-by: Partho Sarthi --- .../spark_rapids_tools/cmdli/argprocessor.py | 26 +- .../tests/spark_rapids_tools_ut/conftest.py | 3 +- .../resources/worker_info.yaml | 19 ++ .../test_tool_argprocessor.py | 272 ++++++++++++++---- 4 files changed, 261 insertions(+), 59 deletions(-) create mode 100644 user_tools/tests/spark_rapids_tools_ut/resources/worker_info.yaml diff --git a/user_tools/src/spark_rapids_tools/cmdli/argprocessor.py b/user_tools/src/spark_rapids_tools/cmdli/argprocessor.py index 67624ac7a..aea23f59f 100644 --- a/user_tools/src/spark_rapids_tools/cmdli/argprocessor.py +++ b/user_tools/src/spark_rapids_tools/cmdli/argprocessor.py @@ -172,6 +172,12 @@ def validate_onprem_with_cluster_name(self): raise PydanticCustomError( 'invalid_argument', f'Cannot run cluster by name with platform [{CspEnv.ONPREM}]\n Error:') + + def validate_onprem_with_cluster_props(self): + if self.platform == CspEnv.ONPREM: + raise PydanticCustomError( + 'invalid_argument', + f'Cannot run cluster by properties with platform [{CspEnv.ONPREM}]\n Error:') def init_extra_arg_cases(self) -> list: return [] @@ -229,12 +235,19 @@ def get_or_set_platform(self) -> CspEnv: def post_platform_assignment_validation(self, assigned_platform): # do some validation after we decide the cluster type - if self.argv_cases[1] == ArgValueCase.VALUE_A: - if assigned_platform == CspEnv.ONPREM: - # it is not allowed to run cluster_by_name on an OnPrem platform + if assigned_platform == CspEnv.ONPREM: + cluster_case = self.argv_cases[1] + eventlogs_case = self.argv_cases[2] + if cluster_case == ArgValueCase.VALUE_A: + # it is not allowed to run cluster by name on an OnPrem platform raise PydanticCustomError( 'invalid_argument', f'Cannot run cluster by name with platform [{CspEnv.ONPREM}]\n Error:') + if cluster_case == ArgValueCase.VALUE_B and eventlogs_case == ArgValueCase.UNDEFINED: + # it is not allowed to run cluster by props on an OnPrem platform without eventlogs + raise PydanticCustomError( + 'invalid_argument', + f'Cannot run cluster by properties with platform [{CspEnv.ONPREM}] without eventlogs\n Error:') @dataclass @@ -278,6 +291,13 @@ def define_invalid_arg_cases(self): [ArgValueCase.VALUE_A, ArgValueCase.VALUE_A, ArgValueCase.IGNORE] ] } + self.rejected['Cluster By Properties Cannot go with OnPrem'] = { + 'valid': False, + 'callable': partial(self.validate_onprem_with_cluster_props), + 'cases': [ + [ArgValueCase.VALUE_A, ArgValueCase.VALUE_C, ArgValueCase.UNDEFINED] + ] + } def define_detection_cases(self): self.detected['Define Platform from Cluster Properties file'] = { diff --git a/user_tools/tests/spark_rapids_tools_ut/conftest.py b/user_tools/tests/spark_rapids_tools_ut/conftest.py index e29cefc3c..71310cb78 100644 --- a/user_tools/tests/spark_rapids_tools_ut/conftest.py +++ b/user_tools/tests/spark_rapids_tools_ut/conftest.py @@ -39,14 +39,13 @@ def gen_cpu_cluster_props(): ('databricks_azure', 'cluster/databricks/azure-cpu-00.json') ] - all_cpu_cluster_props = gen_cpu_cluster_props() # all cpu_cluster_props except the onPrem csp_cpu_cluster_props = [(e_1, e_2) for (e_1, e_2) in all_cpu_cluster_props if e_1 != 'onprem'] # all csps except onprem csps = ['dataproc', 'dataproc_gke', 'emr', 'databricks_aws', 'databricks_azure'] all_csps = csps + ['onprem'] - +autotuner_prop_path = 'worker_info.yaml' class SparkRapidsToolsUT: # pylint: disable=too-few-public-methods diff --git a/user_tools/tests/spark_rapids_tools_ut/resources/worker_info.yaml b/user_tools/tests/spark_rapids_tools_ut/resources/worker_info.yaml new file mode 100644 index 000000000..d9aaa14d5 --- /dev/null +++ b/user_tools/tests/spark_rapids_tools_ut/resources/worker_info.yaml @@ -0,0 +1,19 @@ +system: + numCores: 32 + memory: 212992MiB + numWorkers: 5 +gpu: + memory: 15109MiB + count: 4 + name: T4 +softwareProperties: + spark.driver.maxResultSize: 7680m + spark.driver.memory: 15360m + spark.executor.cores: '8' + spark.executor.instances: '2' + spark.executor.memory: 47222m + spark.executorEnv.OPENBLAS_NUM_THREADS: '1' + spark.scheduler.mode: FAIR + spark.sql.cbo.enabled: 'true' + spark.ui.port: '0' + spark.yarn.am.memory: 640m diff --git a/user_tools/tests/spark_rapids_tools_ut/test_tool_argprocessor.py b/user_tools/tests/spark_rapids_tools_ut/test_tool_argprocessor.py index 49d8f9cec..98bf203d6 100644 --- a/user_tools/tests/spark_rapids_tools_ut/test_tool_argprocessor.py +++ b/user_tools/tests/spark_rapids_tools_ut/test_tool_argprocessor.py @@ -20,11 +20,12 @@ import fire import pytest # pylint: disable=import-error +import warnings from spark_rapids_tools import CspEnv -from spark_rapids_tools.cmdli.argprocessor import AbsToolUserArgModel, ArgValueCase +from spark_rapids_tools.cmdli.argprocessor import AbsToolUserArgModel, ArgValueCase, user_arg_validation_registry from spark_rapids_tools.enums import QualFilterApp -from .conftest import SparkRapidsToolsUT, all_cpu_cluster_props, csp_cpu_cluster_props, csps +from .conftest import SparkRapidsToolsUT, autotuner_prop_path, all_cpu_cluster_props, all_csps, csp_cpu_cluster_props, csps @dataclasses.dataclass @@ -75,25 +76,42 @@ def validate_args_w_savings_disabled(tool_name: str, t_args: dict): assert t_args['filterApps'] == QualFilterApp.SPEEDUPS @pytest.mark.parametrize('tool_name', ['qualification', 'profiling', 'bootstrap']) - @register_triplet_test([ArgValueCase.IGNORE, ArgValueCase.UNDEFINED, ArgValueCase.UNDEFINED]) - def test_no_args(self, tool_name): + + @register_triplet_test([ArgValueCase.UNDEFINED, ArgValueCase.UNDEFINED, ArgValueCase.UNDEFINED]) + def test_with_no_args(self, tool_name): + # should fail: cannot run with no args fire.core.Display = lambda lines, out: out.write('\n'.join(lines) + '\n') with pytest.raises(SystemExit) as pytest_wrapped_e: AbsToolUserArgModel.create_tool_args(tool_name) assert pytest_wrapped_e.type == SystemExit @pytest.mark.parametrize('tool_name', ['qualification', 'profiling', 'bootstrap']) + @register_triplet_test([ArgValueCase.UNDEFINED, ArgValueCase.IGNORE, ArgValueCase.UNDEFINED]) @register_triplet_test([ArgValueCase.UNDEFINED, ArgValueCase.VALUE_A, ArgValueCase.UNDEFINED]) - def test_cluster__name_no_hints(self, tool_name): + def test_with_cluster_name(self, tool_name): + # should fail: cannot run with cluster name only fire.core.Display = lambda lines, out: out.write('\n'.join(lines) + '\n') with pytest.raises(SystemExit) as pytest_wrapped_e: AbsToolUserArgModel.create_tool_args(tool_name, cluster='mycluster') assert pytest_wrapped_e.type == SystemExit + + @pytest.mark.parametrize('tool_name', ['qualification', 'profiling']) + @pytest.mark.parametrize('csp', all_csps) + @register_triplet_test([ArgValueCase.IGNORE, ArgValueCase.UNDEFINED, ArgValueCase.UNDEFINED]) + @register_triplet_test([ArgValueCase.VALUE_A, ArgValueCase.UNDEFINED, ArgValueCase.UNDEFINED]) + def test_with_platform(self, tool_name, csp): + # should fail: cannot run with platform only + with pytest.raises(SystemExit) as pytest_exit_e: + AbsToolUserArgModel.create_tool_args(tool_name, + platform=csp) + assert pytest_exit_e.type == SystemExit @pytest.mark.parametrize('tool_name', ['qualification', 'profiling']) @pytest.mark.parametrize('csp,prop_path', all_cpu_cluster_props) @register_triplet_test([ArgValueCase.UNDEFINED, ArgValueCase.VALUE_B, ArgValueCase.VALUE_A]) - def test_with_eventlogs(self, get_ut_data_dir, tool_name, csp, prop_path): + @register_triplet_test([ArgValueCase.UNDEFINED, ArgValueCase.VALUE_B, ArgValueCase.IGNORE]) + def test_with_cluster_props_with_eventlogs(self, get_ut_data_dir, tool_name, csp, prop_path): + # should pass: cluster properties and eventlogs are provided cluster_prop_file = f'{get_ut_data_dir}/{prop_path}' tool_args = AbsToolUserArgModel.create_tool_args(tool_name, cluster=f'{cluster_prop_file}', @@ -107,79 +125,225 @@ def test_with_eventlogs(self, get_ut_data_dir, tool_name, csp, prop_path): self.validate_args_w_savings_disabled(tool_name, tool_args) @pytest.mark.parametrize('tool_name', ['qualification', 'profiling']) + @register_triplet_test([ArgValueCase.UNDEFINED, ArgValueCase.UNDEFINED, ArgValueCase.IGNORE]) @register_triplet_test([ArgValueCase.UNDEFINED, ArgValueCase.UNDEFINED, ArgValueCase.VALUE_A]) - def test_no_cluster_props(self, get_ut_data_dir, tool_name): - # all eventlogs are stored on local path. There is no way to find which cluster - # we refer to. + def test_with_eventlogs(self, get_ut_data_dir, tool_name): + # should pass: eventlogs are provided tool_args = AbsToolUserArgModel.create_tool_args(tool_name, eventlogs=f'{get_ut_data_dir}/eventlogs') assert tool_args['runtimePlatform'] == CspEnv.ONPREM # for qualification, cost savings should be disabled self.validate_args_w_savings_disabled(tool_name, tool_args) + @pytest.mark.parametrize('tool_name', ['qualification', 'profiling']) - @register_triplet_test([ArgValueCase.UNDEFINED, ArgValueCase.VALUE_A, ArgValueCase.VALUE_A]) + @pytest.mark.parametrize('csp', all_csps) + @register_triplet_test([ArgValueCase.VALUE_A, ArgValueCase.UNDEFINED, ArgValueCase.VALUE_A]) + @register_triplet_test([ArgValueCase.IGNORE, ArgValueCase.UNDEFINED, ArgValueCase.VALUE_A]) + @register_triplet_test([ArgValueCase.VALUE_A, ArgValueCase.UNDEFINED, ArgValueCase.IGNORE]) + @register_triplet_test([ArgValueCase.IGNORE, ArgValueCase.UNDEFINED, ArgValueCase.IGNORE]) + def test_with_platform_with_eventlogs(self, get_ut_data_dir, tool_name, csp): + # should pass: platform and eventlogs are provided + tool_args = AbsToolUserArgModel.create_tool_args(tool_name, + platform=csp, + eventlogs=f'{get_ut_data_dir}/eventlogs') + assert tool_args['runtimePlatform'] == CspEnv(csp) + # for qualification, cost savings should be disabled + self.validate_args_w_savings_disabled(tool_name, tool_args) + + @pytest.mark.parametrize('tool_name', ['qualification', 'profiling']) + @pytest.mark.parametrize('csp', all_csps) + @register_triplet_test([ArgValueCase.IGNORE, ArgValueCase.IGNORE, ArgValueCase.IGNORE]) + @register_triplet_test([ArgValueCase.IGNORE, ArgValueCase.IGNORE, ArgValueCase.VALUE_A]) + @register_triplet_test([ArgValueCase.IGNORE, ArgValueCase.VALUE_A, ArgValueCase.IGNORE]) + @register_triplet_test([ArgValueCase.IGNORE, ArgValueCase.VALUE_A, ArgValueCase.VALUE_A]) + @register_triplet_test([ArgValueCase.VALUE_A, ArgValueCase.IGNORE, ArgValueCase.IGNORE]) + @register_triplet_test([ArgValueCase.VALUE_A, ArgValueCase.IGNORE, ArgValueCase.VALUE_A]) @register_triplet_test([ArgValueCase.VALUE_A, ArgValueCase.VALUE_A, ArgValueCase.IGNORE]) - def test_onprem_disallow_cluster_by_name(self, get_ut_data_dir, tool_name): - # onprem platform cannot run when the cluster is by_name + @register_triplet_test([ArgValueCase.VALUE_A, ArgValueCase.VALUE_A, ArgValueCase.VALUE_A]) + def test_with_platform_with_cluster_name_with_eventlogs(self, get_ut_data_dir, tool_name, csp): + if CspEnv(csp) != CspEnv.ONPREM: + # should pass: platform, cluster name and eventlogs are provided + tool_args = AbsToolUserArgModel.create_tool_args(tool_name, + platform=csp, + cluster='my_cluster', + eventlogs=f'{get_ut_data_dir}/eventlogs') + assert tool_args['runtimePlatform'] == CspEnv(csp) + self.validate_args_w_savings_enabled(tool_name, tool_args) + else: + # should fail: onprem platform cannot run when the cluster is by name + with pytest.raises(SystemExit) as pytest_exit_e: + AbsToolUserArgModel.create_tool_args(tool_name, + platform=csp, + cluster='my_cluster', + eventlogs=f'{get_ut_data_dir}/eventlogs') + assert pytest_exit_e.type == SystemExit + + @pytest.mark.parametrize('tool_name', ['qualification', 'profiling']) + @register_triplet_test([ArgValueCase.UNDEFINED, ArgValueCase.IGNORE, ArgValueCase.IGNORE]) + @register_triplet_test([ArgValueCase.UNDEFINED, ArgValueCase.IGNORE, ArgValueCase.VALUE_A]) + @register_triplet_test([ArgValueCase.UNDEFINED, ArgValueCase.VALUE_A, ArgValueCase.IGNORE]) + @register_triplet_test([ArgValueCase.UNDEFINED, ArgValueCase.VALUE_A, ArgValueCase.VALUE_A]) + def test_with_cluster_name_with_eventlogs(self, get_ut_data_dir, tool_name): + # should fail: defaults to onprem, cannot run when the cluster is by name with pytest.raises(SystemExit) as pytest_exit_e: AbsToolUserArgModel.create_tool_args(tool_name, cluster='my_cluster', eventlogs=f'{get_ut_data_dir}/eventlogs') assert pytest_exit_e.type == SystemExit - with pytest.raises(SystemExit) as pytest_wrapped_e: - AbsToolUserArgModel.create_tool_args(tool_name, - platform='onprem', - cluster='my_cluster') - assert pytest_wrapped_e.type == SystemExit + @pytest.mark.parametrize('tool_name', ['qualification', 'profiling']) - @pytest.mark.parametrize('csp', csps) + @pytest.mark.parametrize('csp', all_csps) + @register_triplet_test([ArgValueCase.IGNORE, ArgValueCase.IGNORE, ArgValueCase.UNDEFINED]) + @register_triplet_test([ArgValueCase.IGNORE, ArgValueCase.VALUE_A, ArgValueCase.UNDEFINED]) + @register_triplet_test([ArgValueCase.VALUE_A, ArgValueCase.IGNORE, ArgValueCase.UNDEFINED]) @register_triplet_test([ArgValueCase.VALUE_A, ArgValueCase.VALUE_A, ArgValueCase.UNDEFINED]) - def test_cluster_name_no_eventlogs(self, tool_name, csp): - # Missing eventlogs should be accepted for all CSPs (except onPrem) - # because the eventlogs can be retrieved from the cluster - tool_args = AbsToolUserArgModel.create_tool_args(tool_name, - platform=csp, - cluster='my_cluster') - assert tool_args['runtimePlatform'] == CspEnv(csp) - self.validate_args_w_savings_enabled(tool_name, tool_args) + def test_with_platform_with_cluster_name(self, tool_name, csp): + if CspEnv(csp) != CspEnv.ONPREM: + # should pass: missing eventlogs should be accepted for all CSPs (except onPrem) because + # the eventlogs can be retrieved from the cluster + tool_args = AbsToolUserArgModel.create_tool_args(tool_name, + platform=csp, + cluster='my_cluster') + assert tool_args['runtimePlatform'] == CspEnv(csp) + self.validate_args_w_savings_enabled(tool_name, tool_args) + else: + # should fail: onprem platform needs eventlogs + with pytest.raises(SystemExit) as pytest_wrapped_e: + AbsToolUserArgModel.create_tool_args(tool_name, + platform=csp, + cluster='my_cluster') + assert pytest_wrapped_e.type == SystemExit @pytest.mark.parametrize('tool_name', ['qualification', 'profiling']) - @pytest.mark.parametrize('csp,prop_path', csp_cpu_cluster_props) + @pytest.mark.parametrize('csp,prop_path', all_cpu_cluster_props) @register_triplet_test([ArgValueCase.UNDEFINED, ArgValueCase.VALUE_B, ArgValueCase.UNDEFINED]) - def test_cluster_props_no_eventlogs(self, get_ut_data_dir, tool_name, csp, prop_path): - # Missing eventlogs should be accepted for all CSPs (except onPrem) - # because the eventlogs can be retrieved from the cluster + def test_with_cluster_props(self, get_ut_data_dir, tool_name, csp, prop_path): + cluster_prop_file = f'{get_ut_data_dir}/{prop_path}' + if CspEnv(csp) != CspEnv.ONPREM: + # should pass: missing eventlogs should be accepted for all CSPs (except onPrem) because + # the eventlogs can be retrieved from the cluster + tool_args = AbsToolUserArgModel.create_tool_args(tool_name, + cluster=f'{cluster_prop_file}') + assert tool_args['runtimePlatform'] == CspEnv(csp) + self.validate_args_w_savings_enabled(tool_name, tool_args) + else: + # should fail: defaults to onprem, cannot retrieve eventlogs from cluster properties + with pytest.raises(SystemExit) as pytest_wrapped_e: + AbsToolUserArgModel.create_tool_args(tool_name, + cluster=f'{cluster_prop_file}') + assert pytest_wrapped_e.type == SystemExit + + @pytest.mark.parametrize('tool_name', ['qualification', 'profiling']) + @pytest.mark.parametrize('csp,prop_path', all_cpu_cluster_props) + @register_triplet_test([ArgValueCase.IGNORE, ArgValueCase.VALUE_B, ArgValueCase.UNDEFINED]) + @register_triplet_test([ArgValueCase.VALUE_A, ArgValueCase.VALUE_B, ArgValueCase.UNDEFINED]) + def test_with_platform_with_cluster_props(self, get_ut_data_dir, tool_name, csp, prop_path): + cluster_prop_file = f'{get_ut_data_dir}/{prop_path}' + if CspEnv(csp) != CspEnv.ONPREM: + # should pass: missing eventlogs should be accepted for all CSPs (except onPrem) because + # the eventlogs can be retrieved from the cluster + tool_args = AbsToolUserArgModel.create_tool_args(tool_name, + platform=csp, + cluster=f'{cluster_prop_file}') + assert tool_args['runtimePlatform'] == CspEnv(csp) + self.validate_args_w_savings_enabled(tool_name, tool_args) + else: + # should fail: onprem platform cannot retrieve eventlogs from cluster properties + with pytest.raises(SystemExit) as pytest_wrapped_e: + AbsToolUserArgModel.create_tool_args(tool_name, + platform=csp, + cluster=f'{cluster_prop_file}') + assert pytest_wrapped_e.type == SystemExit + + @pytest.mark.parametrize('tool_name', ['qualification', 'profiling']) + @pytest.mark.parametrize('csp,prop_path', csp_cpu_cluster_props) + @register_triplet_test([ArgValueCase.IGNORE, ArgValueCase.VALUE_B, ArgValueCase.IGNORE]) + @register_triplet_test([ArgValueCase.IGNORE, ArgValueCase.VALUE_B, ArgValueCase.VALUE_A]) + @register_triplet_test([ArgValueCase.VALUE_A, ArgValueCase.VALUE_B, ArgValueCase.IGNORE]) + @register_triplet_test([ArgValueCase.VALUE_A, ArgValueCase.VALUE_B, ArgValueCase.VALUE_A]) + def test_with_platform_with_cluster_props_with_eventlogs(self, get_ut_data_dir, tool_name, csp, prop_path): + # should pass: platform, cluster properties and eventlogs are provided cluster_prop_file = f'{get_ut_data_dir}/{prop_path}' tool_args = AbsToolUserArgModel.create_tool_args(tool_name, - cluster=f'{cluster_prop_file}') + platform=csp, + cluster=f'{cluster_prop_file}', + eventlogs=f'{get_ut_data_dir}/eventlogs') assert tool_args['runtimePlatform'] == CspEnv(csp) self.validate_args_w_savings_enabled(tool_name, tool_args) - @pytest.mark.parametrize('tool_name', ['qualification', 'profiling']) - @register_triplet_test([ArgValueCase.IGNORE, ArgValueCase.UNDEFINED, ArgValueCase.UNDEFINED]) - def test_cluster_props_no_eventlogs_on_prem(self, capsys, tool_name): - # Missing eventlogs is not accepted for onPrem - with pytest.raises(SystemExit) as pytest_wrapped_e: + @pytest.mark.parametrize('tool_name', ['profiling']) + @pytest.mark.parametrize('autotuner_prop_path', [autotuner_prop_path]) + @register_triplet_test([ArgValueCase.UNDEFINED, ArgValueCase.VALUE_C, ArgValueCase.UNDEFINED]) + def test_with_autotuner(self, get_ut_data_dir, tool_name, autotuner_prop_path): + # should fail: autotuner needs eventlogs + autotuner_prop_file = f'{get_ut_data_dir}/{autotuner_prop_path}' + with pytest.raises(SystemExit) as pytest_exit_e: AbsToolUserArgModel.create_tool_args(tool_name, - platform='onprem') - assert pytest_wrapped_e.type == SystemExit - captured = capsys.readouterr() - # Verify there is no URL in error message except for the one from the documentation - assert 'https://' not in captured.err or 'nvidia.github.io' in captured.err + cluster=f'{autotuner_prop_file}') + assert pytest_exit_e.type == SystemExit + + @pytest.mark.parametrize('tool_name', ['profiling']) + @pytest.mark.parametrize('autotuner_prop_path', [autotuner_prop_path]) + @register_triplet_test([ArgValueCase.UNDEFINED, ArgValueCase.VALUE_C, ArgValueCase.IGNORE]) + @register_triplet_test([ArgValueCase.UNDEFINED, ArgValueCase.VALUE_C, ArgValueCase.VALUE_A]) + def test_with_autotuner_with_eventlogs(self, get_ut_data_dir, tool_name, autotuner_prop_path): + autotuner_prop_file = f'{get_ut_data_dir}/{autotuner_prop_path}' + # should pass: missing eventlogs should be accepted for all CSPs (except onPrem) because + # the eventlogs can be retrieved from the cluster + tool_args = AbsToolUserArgModel.create_tool_args(tool_name, + cluster=f'{autotuner_prop_file}', + eventlogs=f'{get_ut_data_dir}/eventlogs') + self.validate_args_w_savings_enabled(tool_name, tool_args) + + @pytest.mark.parametrize('tool_name', ['profiling']) + @pytest.mark.parametrize('csp', all_csps) + @pytest.mark.parametrize('autotuner_prop_path', [autotuner_prop_path]) + @register_triplet_test([ArgValueCase.IGNORE, ArgValueCase.VALUE_C, ArgValueCase.UNDEFINED]) + @register_triplet_test([ArgValueCase.VALUE_A, ArgValueCase.VALUE_C, ArgValueCase.UNDEFINED]) + def test_with_platform_with_autotuner(self, get_ut_data_dir, tool_name, csp, autotuner_prop_path): + # should fail: autotuner needs eventlogs + autotuner_prop_file = f'{get_ut_data_dir}/{autotuner_prop_path}' + with pytest.raises(SystemExit) as pytest_exit_e: + AbsToolUserArgModel.create_tool_args(tool_name, + platform=csp, + cluster=f'{autotuner_prop_file}') + assert pytest_exit_e.type == SystemExit + + @pytest.mark.parametrize('tool_name', ['profiling']) + @pytest.mark.parametrize('csp', all_csps) + @pytest.mark.parametrize('autotuner_prop_path', [autotuner_prop_path]) + @register_triplet_test([ArgValueCase.IGNORE, ArgValueCase.VALUE_C, ArgValueCase.IGNORE]) + @register_triplet_test([ArgValueCase.IGNORE, ArgValueCase.VALUE_C, ArgValueCase.VALUE_A]) + @register_triplet_test([ArgValueCase.VALUE_A, ArgValueCase.VALUE_C, ArgValueCase.IGNORE]) + @register_triplet_test([ArgValueCase.VALUE_A, ArgValueCase.VALUE_C, ArgValueCase.VALUE_A]) + def test_with_platform_with_autotuner_with_eventlogs(self, get_ut_data_dir, tool_name, csp, autotuner_prop_path): + # should pass: platform, autotuner properties and eventlogs are provided + autotuner_prop_file = f'{get_ut_data_dir}/{autotuner_prop_path}' + tool_args = AbsToolUserArgModel.create_tool_args(tool_name, + platform=csp, + cluster=f'{autotuner_prop_file}', + eventlogs=f'{get_ut_data_dir}/eventlogs') + assert tool_args['runtimePlatform'] == CspEnv(csp) + self.validate_args_w_savings_enabled(tool_name, tool_args) - @pytest.mark.skip(reason='Unit tests are not completed yet') def test_arg_cases_coverage(self): - args_keys = [ - [ArgValueCase.IGNORE, ArgValueCase.UNDEFINED, ArgValueCase.UNDEFINED], - [ArgValueCase.UNDEFINED, ArgValueCase.VALUE_A, ArgValueCase.UNDEFINED], - [ArgValueCase.VALUE_A, ArgValueCase.VALUE_A, ArgValueCase.IGNORE], - [ArgValueCase.UNDEFINED, ArgValueCase.VALUE_B, ArgValueCase.IGNORE], - [ArgValueCase.UNDEFINED, ArgValueCase.UNDEFINED, ArgValueCase.VALUE_A], - [ArgValueCase.UNDEFINED, ArgValueCase.VALUE_A, ArgValueCase.VALUE_A], - [ArgValueCase.IGNORE, ArgValueCase.UNDEFINED, ArgValueCase.VALUE_A] - ] - - for arg_key in args_keys: - assert str(arg_key) in triplet_test_registry + ''' + This test is to make sure that all argument cases are covered by unit tests. Ensure that a new + unit test is added for each new argument case. + ''' + arg_platform_cases = [ArgValueCase.UNDEFINED, ArgValueCase.VALUE_A, ArgValueCase.IGNORE] + arg_cluster_cases = [ArgValueCase.UNDEFINED, ArgValueCase.VALUE_A, ArgValueCase.VALUE_B, ArgValueCase.VALUE_C, ArgValueCase.IGNORE] + arg_eventlogs_cases = [ArgValueCase.UNDEFINED, ArgValueCase.VALUE_A, ArgValueCase.IGNORE] + + all_args_keys = [str([p, c, e]) for p in arg_platform_cases for c in arg_cluster_cases for e in arg_eventlogs_cases] + args_covered = set(triplet_test_registry.keys()) + args_not_covered = set(all_args_keys) - args_covered + + if args_not_covered: + # cases not covered + args_not_covered_str = '\n'.join(args_not_covered) + warnings.warn(f'Cases not covered:\n{args_not_covered_str}') + coverage_percentage = (len(args_covered) / len(all_args_keys)) * 100 + warnings.warn(f'Coverage of all argument cases: {len(args_covered)}/{len(all_args_keys)} ({coverage_percentage:.2f}%)') From 740feefd5a4bdd43b73a6486c5a1ad7f1439e17f Mon Sep 17 00:00:00 2001 From: Partho Sarthi Date: Tue, 7 Nov 2023 10:35:49 -0800 Subject: [PATCH 02/10] Refactor code Signed-off-by: Partho Sarthi --- .../spark_rapids_tools/cmdli/argprocessor.py | 2 +- .../tests/spark_rapids_tools_ut/conftest.py | 6 +- .../test_tool_argprocessor.py | 276 +++++++++--------- 3 files changed, 143 insertions(+), 141 deletions(-) diff --git a/user_tools/src/spark_rapids_tools/cmdli/argprocessor.py b/user_tools/src/spark_rapids_tools/cmdli/argprocessor.py index aea23f59f..35551d4d3 100644 --- a/user_tools/src/spark_rapids_tools/cmdli/argprocessor.py +++ b/user_tools/src/spark_rapids_tools/cmdli/argprocessor.py @@ -172,7 +172,7 @@ def validate_onprem_with_cluster_name(self): raise PydanticCustomError( 'invalid_argument', f'Cannot run cluster by name with platform [{CspEnv.ONPREM}]\n Error:') - + def validate_onprem_with_cluster_props(self): if self.platform == CspEnv.ONPREM: raise PydanticCustomError( diff --git a/user_tools/tests/spark_rapids_tools_ut/conftest.py b/user_tools/tests/spark_rapids_tools_ut/conftest.py index 71310cb78..145355f24 100644 --- a/user_tools/tests/spark_rapids_tools_ut/conftest.py +++ b/user_tools/tests/spark_rapids_tools_ut/conftest.py @@ -16,7 +16,7 @@ import sys -import pytest # pylint: disable=import-error +import pytest # pylint: disable=import-error def get_test_resources_path(): @@ -39,6 +39,7 @@ def gen_cpu_cluster_props(): ('databricks_azure', 'cluster/databricks/azure-cpu-00.json') ] + all_cpu_cluster_props = gen_cpu_cluster_props() # all cpu_cluster_props except the onPrem csp_cpu_cluster_props = [(e_1, e_2) for (e_1, e_2) in all_cpu_cluster_props if e_1 != 'onprem'] @@ -47,7 +48,8 @@ def gen_cpu_cluster_props(): all_csps = csps + ['onprem'] autotuner_prop_path = 'worker_info.yaml' -class SparkRapidsToolsUT: # pylint: disable=too-few-public-methods + +class SparkRapidsToolsUT: # pylint: disable=too-few-public-methods @pytest.fixture(autouse=True) def get_ut_data_dir(self): diff --git a/user_tools/tests/spark_rapids_tools_ut/test_tool_argprocessor.py b/user_tools/tests/spark_rapids_tools_ut/test_tool_argprocessor.py index 98bf203d6..3ef60c0aa 100644 --- a/user_tools/tests/spark_rapids_tools_ut/test_tool_argprocessor.py +++ b/user_tools/tests/spark_rapids_tools_ut/test_tool_argprocessor.py @@ -15,17 +15,16 @@ """Test Tool argument validators""" import dataclasses +import warnings from collections import defaultdict from typing import Dict, Callable, List -import fire import pytest # pylint: disable=import-error -import warnings from spark_rapids_tools import CspEnv -from spark_rapids_tools.cmdli.argprocessor import AbsToolUserArgModel, ArgValueCase, user_arg_validation_registry +from spark_rapids_tools.cmdli.argprocessor import AbsToolUserArgModel, ArgValueCase from spark_rapids_tools.enums import QualFilterApp -from .conftest import SparkRapidsToolsUT, autotuner_prop_path, all_cpu_cluster_props, all_csps, csp_cpu_cluster_props, csps +from .conftest import SparkRapidsToolsUT, autotuner_prop_path, all_cpu_cluster_props, all_csps @dataclasses.dataclass @@ -75,67 +74,74 @@ def validate_args_w_savings_disabled(tool_name: str, t_args: dict): # filterApps should be set to savings assert t_args['filterApps'] == QualFilterApp.SPEEDUPS - @pytest.mark.parametrize('tool_name', ['qualification', 'profiling', 'bootstrap']) + @staticmethod + def create_tool_args_should_pass(tool_name: str, platform=None, cluster=None, eventlogs=None): + return AbsToolUserArgModel.create_tool_args(tool_name, + platform=platform, + cluster=cluster, + eventlogs=eventlogs) + + def validate_tool_args(self, tool_name: str, tool_args: dict, cost_savings_enabled=True, expected_platform=None): + assert tool_args['runtimePlatform'] == CspEnv(expected_platform) + if cost_savings_enabled: + self.validate_args_w_savings_enabled(tool_name, tool_args) + else: + self.validate_args_w_savings_disabled(tool_name, tool_args) + @staticmethod + def create_tool_args_should_fail(tool_name: str, platform=None, cluster=None, eventlogs=None): + with pytest.raises(SystemExit) as pytest_wrapped_e: + AbsToolUserArgModel.create_tool_args(tool_name, + platform=platform, + cluster=cluster, + eventlogs=eventlogs) + assert pytest_wrapped_e.type == SystemExit + + @pytest.mark.parametrize('tool_name', ['qualification', 'profiling', 'bootstrap']) @register_triplet_test([ArgValueCase.UNDEFINED, ArgValueCase.UNDEFINED, ArgValueCase.UNDEFINED]) def test_with_no_args(self, tool_name): # should fail: cannot run with no args - fire.core.Display = lambda lines, out: out.write('\n'.join(lines) + '\n') - with pytest.raises(SystemExit) as pytest_wrapped_e: - AbsToolUserArgModel.create_tool_args(tool_name) - assert pytest_wrapped_e.type == SystemExit + self.create_tool_args_should_fail(tool_name=tool_name) @pytest.mark.parametrize('tool_name', ['qualification', 'profiling', 'bootstrap']) @register_triplet_test([ArgValueCase.UNDEFINED, ArgValueCase.IGNORE, ArgValueCase.UNDEFINED]) @register_triplet_test([ArgValueCase.UNDEFINED, ArgValueCase.VALUE_A, ArgValueCase.UNDEFINED]) def test_with_cluster_name(self, tool_name): # should fail: cannot run with cluster name only - fire.core.Display = lambda lines, out: out.write('\n'.join(lines) + '\n') - with pytest.raises(SystemExit) as pytest_wrapped_e: - AbsToolUserArgModel.create_tool_args(tool_name, cluster='mycluster') - assert pytest_wrapped_e.type == SystemExit - + self.create_tool_args_should_fail(tool_name, cluster='my_cluster') + @pytest.mark.parametrize('tool_name', ['qualification', 'profiling']) @pytest.mark.parametrize('csp', all_csps) @register_triplet_test([ArgValueCase.IGNORE, ArgValueCase.UNDEFINED, ArgValueCase.UNDEFINED]) @register_triplet_test([ArgValueCase.VALUE_A, ArgValueCase.UNDEFINED, ArgValueCase.UNDEFINED]) def test_with_platform(self, tool_name, csp): # should fail: cannot run with platform only - with pytest.raises(SystemExit) as pytest_exit_e: - AbsToolUserArgModel.create_tool_args(tool_name, - platform=csp) - assert pytest_exit_e.type == SystemExit + self.create_tool_args_should_fail(tool_name, platform=csp) @pytest.mark.parametrize('tool_name', ['qualification', 'profiling']) @pytest.mark.parametrize('csp,prop_path', all_cpu_cluster_props) @register_triplet_test([ArgValueCase.UNDEFINED, ArgValueCase.VALUE_B, ArgValueCase.VALUE_A]) @register_triplet_test([ArgValueCase.UNDEFINED, ArgValueCase.VALUE_B, ArgValueCase.IGNORE]) def test_with_cluster_props_with_eventlogs(self, get_ut_data_dir, tool_name, csp, prop_path): - # should pass: cluster properties and eventlogs are provided - cluster_prop_file = f'{get_ut_data_dir}/{prop_path}' - tool_args = AbsToolUserArgModel.create_tool_args(tool_name, - cluster=f'{cluster_prop_file}', - eventlogs=f'{get_ut_data_dir}/eventlogs') - assert tool_args['runtimePlatform'] == CspEnv(csp) - # for qualification, passing the cluster properties should be enabled unless it is - # onprem platform that requires target_platform - if CspEnv(csp) != CspEnv.ONPREM: - self.validate_args_w_savings_enabled(tool_name, tool_args) - else: - self.validate_args_w_savings_disabled(tool_name, tool_args) + # should pass: cluster properties and eventlogs are provided + tool_args = self.create_tool_args_should_pass(tool_name, + cluster=f'{get_ut_data_dir}/{prop_path}', + eventlogs=f'{get_ut_data_dir}/eventlogs') + self.validate_tool_args(tool_name=tool_name, tool_args=tool_args, + cost_savings_enabled=CspEnv(csp) != CspEnv.ONPREM, + expected_platform=csp) @pytest.mark.parametrize('tool_name', ['qualification', 'profiling']) @register_triplet_test([ArgValueCase.UNDEFINED, ArgValueCase.UNDEFINED, ArgValueCase.IGNORE]) @register_triplet_test([ArgValueCase.UNDEFINED, ArgValueCase.UNDEFINED, ArgValueCase.VALUE_A]) def test_with_eventlogs(self, get_ut_data_dir, tool_name): - # should pass: eventlogs are provided - tool_args = AbsToolUserArgModel.create_tool_args(tool_name, - eventlogs=f'{get_ut_data_dir}/eventlogs') - assert tool_args['runtimePlatform'] == CspEnv.ONPREM - # for qualification, cost savings should be disabled - self.validate_args_w_savings_disabled(tool_name, tool_args) + # should pass: defaults to onprem, eventlogs are provided + tool_args = self.create_tool_args_should_pass(tool_name, + eventlogs=f'{get_ut_data_dir}/eventlogs') + self.validate_tool_args(tool_name=tool_name, tool_args=tool_args, + cost_savings_enabled=False, + expected_platform=CspEnv.ONPREM) - @pytest.mark.parametrize('tool_name', ['qualification', 'profiling']) @pytest.mark.parametrize('csp', all_csps) @register_triplet_test([ArgValueCase.VALUE_A, ArgValueCase.UNDEFINED, ArgValueCase.VALUE_A]) @@ -144,12 +150,13 @@ def test_with_eventlogs(self, get_ut_data_dir, tool_name): @register_triplet_test([ArgValueCase.IGNORE, ArgValueCase.UNDEFINED, ArgValueCase.IGNORE]) def test_with_platform_with_eventlogs(self, get_ut_data_dir, tool_name, csp): # should pass: platform and eventlogs are provided - tool_args = AbsToolUserArgModel.create_tool_args(tool_name, - platform=csp, - eventlogs=f'{get_ut_data_dir}/eventlogs') - assert tool_args['runtimePlatform'] == CspEnv(csp) # for qualification, cost savings should be disabled - self.validate_args_w_savings_disabled(tool_name, tool_args) + tool_args = self.create_tool_args_should_pass(tool_name, + platform=csp, + eventlogs=f'{get_ut_data_dir}/eventlogs') + self.validate_tool_args(tool_name=tool_name, tool_args=tool_args, + cost_savings_enabled=False, + expected_platform=csp) @pytest.mark.parametrize('tool_name', ['qualification', 'profiling']) @pytest.mark.parametrize('csp', all_csps) @@ -164,20 +171,19 @@ def test_with_platform_with_eventlogs(self, get_ut_data_dir, tool_name, csp): def test_with_platform_with_cluster_name_with_eventlogs(self, get_ut_data_dir, tool_name, csp): if CspEnv(csp) != CspEnv.ONPREM: # should pass: platform, cluster name and eventlogs are provided - tool_args = AbsToolUserArgModel.create_tool_args(tool_name, - platform=csp, - cluster='my_cluster', - eventlogs=f'{get_ut_data_dir}/eventlogs') - assert tool_args['runtimePlatform'] == CspEnv(csp) - self.validate_args_w_savings_enabled(tool_name, tool_args) + tool_args = self.create_tool_args_should_pass(tool_name, + platform=csp, + cluster='my_cluster', + eventlogs=f'{get_ut_data_dir}/eventlogs') + self.validate_tool_args(tool_name=tool_name, tool_args=tool_args, + cost_savings_enabled=True, + expected_platform=csp) else: # should fail: onprem platform cannot run when the cluster is by name - with pytest.raises(SystemExit) as pytest_exit_e: - AbsToolUserArgModel.create_tool_args(tool_name, - platform=csp, - cluster='my_cluster', - eventlogs=f'{get_ut_data_dir}/eventlogs') - assert pytest_exit_e.type == SystemExit + self.create_tool_args_should_fail(tool_name, + platform=csp, + cluster='my_cluster', + eventlogs=f'{get_ut_data_dir}/eventlogs') @pytest.mark.parametrize('tool_name', ['qualification', 'profiling']) @register_triplet_test([ArgValueCase.UNDEFINED, ArgValueCase.IGNORE, ArgValueCase.IGNORE]) @@ -186,12 +192,9 @@ def test_with_platform_with_cluster_name_with_eventlogs(self, get_ut_data_dir, t @register_triplet_test([ArgValueCase.UNDEFINED, ArgValueCase.VALUE_A, ArgValueCase.VALUE_A]) def test_with_cluster_name_with_eventlogs(self, get_ut_data_dir, tool_name): # should fail: defaults to onprem, cannot run when the cluster is by name - with pytest.raises(SystemExit) as pytest_exit_e: - AbsToolUserArgModel.create_tool_args(tool_name, - cluster='my_cluster', - eventlogs=f'{get_ut_data_dir}/eventlogs') - assert pytest_exit_e.type == SystemExit - + self.create_tool_args_should_fail(tool_name, + cluster='my_cluster', + eventlogs=f'{get_ut_data_dir}/eventlogs') @pytest.mark.parametrize('tool_name', ['qualification', 'profiling']) @pytest.mark.parametrize('csp', all_csps) @@ -200,40 +203,38 @@ def test_with_cluster_name_with_eventlogs(self, get_ut_data_dir, tool_name): @register_triplet_test([ArgValueCase.VALUE_A, ArgValueCase.IGNORE, ArgValueCase.UNDEFINED]) @register_triplet_test([ArgValueCase.VALUE_A, ArgValueCase.VALUE_A, ArgValueCase.UNDEFINED]) def test_with_platform_with_cluster_name(self, tool_name, csp): - if CspEnv(csp) != CspEnv.ONPREM: + if CspEnv(csp) != CspEnv.ONPREM: # should pass: missing eventlogs should be accepted for all CSPs (except onPrem) because # the eventlogs can be retrieved from the cluster - tool_args = AbsToolUserArgModel.create_tool_args(tool_name, - platform=csp, - cluster='my_cluster') - assert tool_args['runtimePlatform'] == CspEnv(csp) - self.validate_args_w_savings_enabled(tool_name, tool_args) + tool_args = self.create_tool_args_should_pass(tool_name, + platform=csp, + cluster='my_cluster') + self.validate_tool_args(tool_name=tool_name, tool_args=tool_args, + cost_savings_enabled=True, + expected_platform=csp) else: # should fail: onprem platform needs eventlogs - with pytest.raises(SystemExit) as pytest_wrapped_e: - AbsToolUserArgModel.create_tool_args(tool_name, - platform=csp, - cluster='my_cluster') - assert pytest_wrapped_e.type == SystemExit + self.create_tool_args_should_fail(tool_name, + platform=csp, + cluster='my_cluster') @pytest.mark.parametrize('tool_name', ['qualification', 'profiling']) @pytest.mark.parametrize('csp,prop_path', all_cpu_cluster_props) @register_triplet_test([ArgValueCase.UNDEFINED, ArgValueCase.VALUE_B, ArgValueCase.UNDEFINED]) def test_with_cluster_props(self, get_ut_data_dir, tool_name, csp, prop_path): cluster_prop_file = f'{get_ut_data_dir}/{prop_path}' - if CspEnv(csp) != CspEnv.ONPREM: + if CspEnv(csp) != CspEnv.ONPREM: # should pass: missing eventlogs should be accepted for all CSPs (except onPrem) because # the eventlogs can be retrieved from the cluster - tool_args = AbsToolUserArgModel.create_tool_args(tool_name, - cluster=f'{cluster_prop_file}') - assert tool_args['runtimePlatform'] == CspEnv(csp) - self.validate_args_w_savings_enabled(tool_name, tool_args) + tool_args = self.create_tool_args_should_pass(tool_name, + cluster=f'{cluster_prop_file}') + self.validate_tool_args(tool_name=tool_name, tool_args=tool_args, + cost_savings_enabled=True, + expected_platform=csp) else: # should fail: defaults to onprem, cannot retrieve eventlogs from cluster properties - with pytest.raises(SystemExit) as pytest_wrapped_e: - AbsToolUserArgModel.create_tool_args(tool_name, - cluster=f'{cluster_prop_file}') - assert pytest_wrapped_e.type == SystemExit + self.create_tool_args_should_fail(tool_name, + cluster=f'{cluster_prop_file}') @pytest.mark.parametrize('tool_name', ['qualification', 'profiling']) @pytest.mark.parametrize('csp,prop_path', all_cpu_cluster_props) @@ -241,24 +242,23 @@ def test_with_cluster_props(self, get_ut_data_dir, tool_name, csp, prop_path): @register_triplet_test([ArgValueCase.VALUE_A, ArgValueCase.VALUE_B, ArgValueCase.UNDEFINED]) def test_with_platform_with_cluster_props(self, get_ut_data_dir, tool_name, csp, prop_path): cluster_prop_file = f'{get_ut_data_dir}/{prop_path}' - if CspEnv(csp) != CspEnv.ONPREM: + if CspEnv(csp) != CspEnv.ONPREM: # should pass: missing eventlogs should be accepted for all CSPs (except onPrem) because # the eventlogs can be retrieved from the cluster - tool_args = AbsToolUserArgModel.create_tool_args(tool_name, - platform=csp, - cluster=f'{cluster_prop_file}') - assert tool_args['runtimePlatform'] == CspEnv(csp) - self.validate_args_w_savings_enabled(tool_name, tool_args) + tool_args = self.create_tool_args_should_pass(tool_name, + platform=csp, + cluster=f'{cluster_prop_file}') + self.validate_tool_args(tool_name=tool_name, tool_args=tool_args, + cost_savings_enabled=True, + expected_platform=csp) else: # should fail: onprem platform cannot retrieve eventlogs from cluster properties - with pytest.raises(SystemExit) as pytest_wrapped_e: - AbsToolUserArgModel.create_tool_args(tool_name, - platform=csp, - cluster=f'{cluster_prop_file}') - assert pytest_wrapped_e.type == SystemExit - + self.create_tool_args_should_fail(tool_name, + platform=csp, + cluster=f'{cluster_prop_file}') + @pytest.mark.parametrize('tool_name', ['qualification', 'profiling']) - @pytest.mark.parametrize('csp,prop_path', csp_cpu_cluster_props) + @pytest.mark.parametrize('csp,prop_path', all_cpu_cluster_props) @register_triplet_test([ArgValueCase.IGNORE, ArgValueCase.VALUE_B, ArgValueCase.IGNORE]) @register_triplet_test([ArgValueCase.IGNORE, ArgValueCase.VALUE_B, ArgValueCase.VALUE_A]) @register_triplet_test([ArgValueCase.VALUE_A, ArgValueCase.VALUE_B, ArgValueCase.IGNORE]) @@ -266,78 +266,79 @@ def test_with_platform_with_cluster_props(self, get_ut_data_dir, tool_name, csp, def test_with_platform_with_cluster_props_with_eventlogs(self, get_ut_data_dir, tool_name, csp, prop_path): # should pass: platform, cluster properties and eventlogs are provided cluster_prop_file = f'{get_ut_data_dir}/{prop_path}' - tool_args = AbsToolUserArgModel.create_tool_args(tool_name, - platform=csp, - cluster=f'{cluster_prop_file}', - eventlogs=f'{get_ut_data_dir}/eventlogs') - assert tool_args['runtimePlatform'] == CspEnv(csp) - self.validate_args_w_savings_enabled(tool_name, tool_args) + tool_args = self.create_tool_args_should_pass(tool_name, + platform=csp, + cluster=f'{cluster_prop_file}', + eventlogs=f'{get_ut_data_dir}/eventlogs') + self.validate_tool_args(tool_name=tool_name, tool_args=tool_args, + cost_savings_enabled=CspEnv(csp) != CspEnv.ONPREM, + expected_platform=csp) @pytest.mark.parametrize('tool_name', ['profiling']) - @pytest.mark.parametrize('autotuner_prop_path', [autotuner_prop_path]) + @pytest.mark.parametrize('prop_path', [autotuner_prop_path]) @register_triplet_test([ArgValueCase.UNDEFINED, ArgValueCase.VALUE_C, ArgValueCase.UNDEFINED]) - def test_with_autotuner(self, get_ut_data_dir, tool_name, autotuner_prop_path): + def test_with_autotuner(self, get_ut_data_dir, tool_name, prop_path): # should fail: autotuner needs eventlogs - autotuner_prop_file = f'{get_ut_data_dir}/{autotuner_prop_path}' - with pytest.raises(SystemExit) as pytest_exit_e: - AbsToolUserArgModel.create_tool_args(tool_name, - cluster=f'{autotuner_prop_file}') - assert pytest_exit_e.type == SystemExit + autotuner_prop_file = f'{get_ut_data_dir}/{prop_path}' + self.create_tool_args_should_fail(tool_name, + cluster=f'{autotuner_prop_file}') @pytest.mark.parametrize('tool_name', ['profiling']) - @pytest.mark.parametrize('autotuner_prop_path', [autotuner_prop_path]) + @pytest.mark.parametrize('prop_path', [autotuner_prop_path]) @register_triplet_test([ArgValueCase.UNDEFINED, ArgValueCase.VALUE_C, ArgValueCase.IGNORE]) @register_triplet_test([ArgValueCase.UNDEFINED, ArgValueCase.VALUE_C, ArgValueCase.VALUE_A]) - def test_with_autotuner_with_eventlogs(self, get_ut_data_dir, tool_name, autotuner_prop_path): - autotuner_prop_file = f'{get_ut_data_dir}/{autotuner_prop_path}' - # should pass: missing eventlogs should be accepted for all CSPs (except onPrem) because - # the eventlogs can be retrieved from the cluster - tool_args = AbsToolUserArgModel.create_tool_args(tool_name, - cluster=f'{autotuner_prop_file}', - eventlogs=f'{get_ut_data_dir}/eventlogs') - self.validate_args_w_savings_enabled(tool_name, tool_args) + def test_with_autotuner_with_eventlogs(self, get_ut_data_dir, tool_name, prop_path): + autotuner_prop_file = f'{get_ut_data_dir}/{prop_path}' + # should pass: autotuner properties and eventlogs are provided + tool_args = self.create_tool_args_should_pass(tool_name, + cluster=f'{autotuner_prop_file}', + eventlogs=f'{get_ut_data_dir}/eventlogs') + self.validate_tool_args(tool_name=tool_name, tool_args=tool_args, + cost_savings_enabled=False, + expected_platform=CspEnv.ONPREM) @pytest.mark.parametrize('tool_name', ['profiling']) @pytest.mark.parametrize('csp', all_csps) - @pytest.mark.parametrize('autotuner_prop_path', [autotuner_prop_path]) + @pytest.mark.parametrize('prop_path', [autotuner_prop_path]) @register_triplet_test([ArgValueCase.IGNORE, ArgValueCase.VALUE_C, ArgValueCase.UNDEFINED]) @register_triplet_test([ArgValueCase.VALUE_A, ArgValueCase.VALUE_C, ArgValueCase.UNDEFINED]) - def test_with_platform_with_autotuner(self, get_ut_data_dir, tool_name, csp, autotuner_prop_path): + def test_with_platform_with_autotuner(self, get_ut_data_dir, tool_name, csp, prop_path): # should fail: autotuner needs eventlogs - autotuner_prop_file = f'{get_ut_data_dir}/{autotuner_prop_path}' - with pytest.raises(SystemExit) as pytest_exit_e: - AbsToolUserArgModel.create_tool_args(tool_name, - platform=csp, - cluster=f'{autotuner_prop_file}') - assert pytest_exit_e.type == SystemExit + autotuner_prop_file = f'{get_ut_data_dir}/{prop_path}' + self.create_tool_args_should_fail(tool_name, + platform=csp, + cluster=f'{autotuner_prop_file}') @pytest.mark.parametrize('tool_name', ['profiling']) @pytest.mark.parametrize('csp', all_csps) - @pytest.mark.parametrize('autotuner_prop_path', [autotuner_prop_path]) + @pytest.mark.parametrize('prop_path', [autotuner_prop_path]) @register_triplet_test([ArgValueCase.IGNORE, ArgValueCase.VALUE_C, ArgValueCase.IGNORE]) @register_triplet_test([ArgValueCase.IGNORE, ArgValueCase.VALUE_C, ArgValueCase.VALUE_A]) @register_triplet_test([ArgValueCase.VALUE_A, ArgValueCase.VALUE_C, ArgValueCase.IGNORE]) @register_triplet_test([ArgValueCase.VALUE_A, ArgValueCase.VALUE_C, ArgValueCase.VALUE_A]) - def test_with_platform_with_autotuner_with_eventlogs(self, get_ut_data_dir, tool_name, csp, autotuner_prop_path): + def test_with_platform_with_autotuner_with_eventlogs(self, get_ut_data_dir, tool_name, csp, prop_path): # should pass: platform, autotuner properties and eventlogs are provided - autotuner_prop_file = f'{get_ut_data_dir}/{autotuner_prop_path}' - tool_args = AbsToolUserArgModel.create_tool_args(tool_name, - platform=csp, - cluster=f'{autotuner_prop_file}', - eventlogs=f'{get_ut_data_dir}/eventlogs') - assert tool_args['runtimePlatform'] == CspEnv(csp) - self.validate_args_w_savings_enabled(tool_name, tool_args) + autotuner_prop_file = f'{get_ut_data_dir}/{prop_path}' + tool_args = self.create_tool_args_should_pass(tool_name, + platform=csp, + cluster=f'{autotuner_prop_file}', + eventlogs=f'{get_ut_data_dir}/eventlogs') + self.validate_tool_args(tool_name=tool_name, tool_args=tool_args, + cost_savings_enabled=False, + expected_platform=csp) def test_arg_cases_coverage(self): - ''' + """ This test is to make sure that all argument cases are covered by unit tests. Ensure that a new unit test is added for each new argument case. - ''' + """ arg_platform_cases = [ArgValueCase.UNDEFINED, ArgValueCase.VALUE_A, ArgValueCase.IGNORE] - arg_cluster_cases = [ArgValueCase.UNDEFINED, ArgValueCase.VALUE_A, ArgValueCase.VALUE_B, ArgValueCase.VALUE_C, ArgValueCase.IGNORE] + arg_cluster_cases = [ArgValueCase.UNDEFINED, ArgValueCase.VALUE_A, ArgValueCase.VALUE_B, ArgValueCase.VALUE_C, + ArgValueCase.IGNORE] arg_eventlogs_cases = [ArgValueCase.UNDEFINED, ArgValueCase.VALUE_A, ArgValueCase.IGNORE] - all_args_keys = [str([p, c, e]) for p in arg_platform_cases for c in arg_cluster_cases for e in arg_eventlogs_cases] + all_args_keys = [str([p, c, e]) for p in arg_platform_cases for c in arg_cluster_cases for e in + arg_eventlogs_cases] args_covered = set(triplet_test_registry.keys()) args_not_covered = set(all_args_keys) - args_covered @@ -345,5 +346,4 @@ def test_arg_cases_coverage(self): # cases not covered args_not_covered_str = '\n'.join(args_not_covered) warnings.warn(f'Cases not covered:\n{args_not_covered_str}') - coverage_percentage = (len(args_covered) / len(all_args_keys)) * 100 - warnings.warn(f'Coverage of all argument cases: {len(args_covered)}/{len(all_args_keys)} ({coverage_percentage:.2f}%)') + warnings.warn(f'Coverage of all argument cases: {len(args_covered)}/{len(all_args_keys)}') From a37d83aea46e99e311032681c1d6809df7912472 Mon Sep 17 00:00:00 2001 From: Partho Sarthi Date: Tue, 7 Nov 2023 13:09:00 -0800 Subject: [PATCH 03/10] Reorder tests and improve docs Signed-off-by: Partho Sarthi --- .../test_tool_argprocessor.py | 158 +++++++++--------- 1 file changed, 82 insertions(+), 76 deletions(-) diff --git a/user_tools/tests/spark_rapids_tools_ut/test_tool_argprocessor.py b/user_tools/tests/spark_rapids_tools_ut/test_tool_argprocessor.py index 3ef60c0aa..713be7589 100644 --- a/user_tools/tests/spark_rapids_tools_ut/test_tool_argprocessor.py +++ b/user_tools/tests/spark_rapids_tools_ut/test_tool_argprocessor.py @@ -81,13 +81,6 @@ def create_tool_args_should_pass(tool_name: str, platform=None, cluster=None, ev cluster=cluster, eventlogs=eventlogs) - def validate_tool_args(self, tool_name: str, tool_args: dict, cost_savings_enabled=True, expected_platform=None): - assert tool_args['runtimePlatform'] == CspEnv(expected_platform) - if cost_savings_enabled: - self.validate_args_w_savings_enabled(tool_name, tool_args) - else: - self.validate_args_w_savings_disabled(tool_name, tool_args) - @staticmethod def create_tool_args_should_fail(tool_name: str, platform=None, cluster=None, eventlogs=None): with pytest.raises(SystemExit) as pytest_wrapped_e: @@ -97,19 +90,20 @@ def create_tool_args_should_fail(tool_name: str, platform=None, cluster=None, ev eventlogs=eventlogs) assert pytest_wrapped_e.type == SystemExit + @staticmethod + def validate_tool_args(tool_name: str, tool_args: dict, cost_savings_enabled, expected_platform): + assert tool_args['runtimePlatform'] == CspEnv(expected_platform) + if cost_savings_enabled: + TestToolArgProcessor.validate_args_w_savings_enabled(tool_name, tool_args) + else: + TestToolArgProcessor.validate_args_w_savings_disabled(tool_name, tool_args) + @pytest.mark.parametrize('tool_name', ['qualification', 'profiling', 'bootstrap']) @register_triplet_test([ArgValueCase.UNDEFINED, ArgValueCase.UNDEFINED, ArgValueCase.UNDEFINED]) def test_with_no_args(self, tool_name): # should fail: cannot run with no args self.create_tool_args_should_fail(tool_name=tool_name) - @pytest.mark.parametrize('tool_name', ['qualification', 'profiling', 'bootstrap']) - @register_triplet_test([ArgValueCase.UNDEFINED, ArgValueCase.IGNORE, ArgValueCase.UNDEFINED]) - @register_triplet_test([ArgValueCase.UNDEFINED, ArgValueCase.VALUE_A, ArgValueCase.UNDEFINED]) - def test_with_cluster_name(self, tool_name): - # should fail: cannot run with cluster name only - self.create_tool_args_should_fail(tool_name, cluster='my_cluster') - @pytest.mark.parametrize('tool_name', ['qualification', 'profiling']) @pytest.mark.parametrize('csp', all_csps) @register_triplet_test([ArgValueCase.IGNORE, ArgValueCase.UNDEFINED, ArgValueCase.UNDEFINED]) @@ -118,30 +112,46 @@ def test_with_platform(self, tool_name, csp): # should fail: cannot run with platform only self.create_tool_args_should_fail(tool_name, platform=csp) - @pytest.mark.parametrize('tool_name', ['qualification', 'profiling']) - @pytest.mark.parametrize('csp,prop_path', all_cpu_cluster_props) - @register_triplet_test([ArgValueCase.UNDEFINED, ArgValueCase.VALUE_B, ArgValueCase.VALUE_A]) - @register_triplet_test([ArgValueCase.UNDEFINED, ArgValueCase.VALUE_B, ArgValueCase.IGNORE]) - def test_with_cluster_props_with_eventlogs(self, get_ut_data_dir, tool_name, csp, prop_path): - # should pass: cluster properties and eventlogs are provided - tool_args = self.create_tool_args_should_pass(tool_name, - cluster=f'{get_ut_data_dir}/{prop_path}', - eventlogs=f'{get_ut_data_dir}/eventlogs') - self.validate_tool_args(tool_name=tool_name, tool_args=tool_args, - cost_savings_enabled=CspEnv(csp) != CspEnv.ONPREM, - expected_platform=csp) + @pytest.mark.parametrize('tool_name', ['qualification', 'profiling', 'bootstrap']) + @register_triplet_test([ArgValueCase.UNDEFINED, ArgValueCase.IGNORE, ArgValueCase.UNDEFINED]) + @register_triplet_test([ArgValueCase.UNDEFINED, ArgValueCase.VALUE_A, ArgValueCase.UNDEFINED]) + def test_with_cluster_name(self, tool_name): + # should fail: cannot run with cluster name only + self.create_tool_args_should_fail(tool_name, cluster='my_cluster') @pytest.mark.parametrize('tool_name', ['qualification', 'profiling']) @register_triplet_test([ArgValueCase.UNDEFINED, ArgValueCase.UNDEFINED, ArgValueCase.IGNORE]) @register_triplet_test([ArgValueCase.UNDEFINED, ArgValueCase.UNDEFINED, ArgValueCase.VALUE_A]) def test_with_eventlogs(self, get_ut_data_dir, tool_name): - # should pass: defaults to onprem, eventlogs are provided + # should pass: defaults to onprem, event logs are provided tool_args = self.create_tool_args_should_pass(tool_name, eventlogs=f'{get_ut_data_dir}/eventlogs') self.validate_tool_args(tool_name=tool_name, tool_args=tool_args, cost_savings_enabled=False, expected_platform=CspEnv.ONPREM) + @pytest.mark.parametrize('tool_name', ['qualification', 'profiling']) + @pytest.mark.parametrize('csp', all_csps) + @register_triplet_test([ArgValueCase.IGNORE, ArgValueCase.IGNORE, ArgValueCase.UNDEFINED]) + @register_triplet_test([ArgValueCase.IGNORE, ArgValueCase.VALUE_A, ArgValueCase.UNDEFINED]) + @register_triplet_test([ArgValueCase.VALUE_A, ArgValueCase.IGNORE, ArgValueCase.UNDEFINED]) + @register_triplet_test([ArgValueCase.VALUE_A, ArgValueCase.VALUE_A, ArgValueCase.UNDEFINED]) + def test_with_platform_with_cluster_name(self, tool_name, csp): + if CspEnv(csp) != CspEnv.ONPREM: + # should pass: missing eventlogs should be accepted for all CSPs (except onPrem) because + # the event logs can be retrieved from the cluster + tool_args = self.create_tool_args_should_pass(tool_name, + platform=csp, + cluster='my_cluster') + self.validate_tool_args(tool_name=tool_name, tool_args=tool_args, + cost_savings_enabled=True, + expected_platform=csp) + else: + # should fail: onprem platform needs event logs + self.create_tool_args_should_fail(tool_name, + platform=csp, + cluster='my_cluster') + @pytest.mark.parametrize('tool_name', ['qualification', 'profiling']) @pytest.mark.parametrize('csp', all_csps) @register_triplet_test([ArgValueCase.VALUE_A, ArgValueCase.UNDEFINED, ArgValueCase.VALUE_A]) @@ -149,15 +159,26 @@ def test_with_eventlogs(self, get_ut_data_dir, tool_name): @register_triplet_test([ArgValueCase.VALUE_A, ArgValueCase.UNDEFINED, ArgValueCase.IGNORE]) @register_triplet_test([ArgValueCase.IGNORE, ArgValueCase.UNDEFINED, ArgValueCase.IGNORE]) def test_with_platform_with_eventlogs(self, get_ut_data_dir, tool_name, csp): - # should pass: platform and eventlogs are provided - # for qualification, cost savings should be disabled + # should pass: platform and event logs are provided tool_args = self.create_tool_args_should_pass(tool_name, platform=csp, eventlogs=f'{get_ut_data_dir}/eventlogs') + # for qualification, cost savings should be disabled because cluster is not provided self.validate_tool_args(tool_name=tool_name, tool_args=tool_args, cost_savings_enabled=False, expected_platform=csp) + @pytest.mark.parametrize('tool_name', ['qualification', 'profiling']) + @register_triplet_test([ArgValueCase.UNDEFINED, ArgValueCase.IGNORE, ArgValueCase.IGNORE]) + @register_triplet_test([ArgValueCase.UNDEFINED, ArgValueCase.IGNORE, ArgValueCase.VALUE_A]) + @register_triplet_test([ArgValueCase.UNDEFINED, ArgValueCase.VALUE_A, ArgValueCase.IGNORE]) + @register_triplet_test([ArgValueCase.UNDEFINED, ArgValueCase.VALUE_A, ArgValueCase.VALUE_A]) + def test_with_cluster_name_with_eventlogs(self, get_ut_data_dir, tool_name): + # should fail: defaults to onprem, cannot run when the cluster is by name + self.create_tool_args_should_fail(tool_name, + cluster='my_cluster', + eventlogs=f'{get_ut_data_dir}/eventlogs') + @pytest.mark.parametrize('tool_name', ['qualification', 'profiling']) @pytest.mark.parametrize('csp', all_csps) @register_triplet_test([ArgValueCase.IGNORE, ArgValueCase.IGNORE, ArgValueCase.IGNORE]) @@ -185,39 +206,6 @@ def test_with_platform_with_cluster_name_with_eventlogs(self, get_ut_data_dir, t cluster='my_cluster', eventlogs=f'{get_ut_data_dir}/eventlogs') - @pytest.mark.parametrize('tool_name', ['qualification', 'profiling']) - @register_triplet_test([ArgValueCase.UNDEFINED, ArgValueCase.IGNORE, ArgValueCase.IGNORE]) - @register_triplet_test([ArgValueCase.UNDEFINED, ArgValueCase.IGNORE, ArgValueCase.VALUE_A]) - @register_triplet_test([ArgValueCase.UNDEFINED, ArgValueCase.VALUE_A, ArgValueCase.IGNORE]) - @register_triplet_test([ArgValueCase.UNDEFINED, ArgValueCase.VALUE_A, ArgValueCase.VALUE_A]) - def test_with_cluster_name_with_eventlogs(self, get_ut_data_dir, tool_name): - # should fail: defaults to onprem, cannot run when the cluster is by name - self.create_tool_args_should_fail(tool_name, - cluster='my_cluster', - eventlogs=f'{get_ut_data_dir}/eventlogs') - - @pytest.mark.parametrize('tool_name', ['qualification', 'profiling']) - @pytest.mark.parametrize('csp', all_csps) - @register_triplet_test([ArgValueCase.IGNORE, ArgValueCase.IGNORE, ArgValueCase.UNDEFINED]) - @register_triplet_test([ArgValueCase.IGNORE, ArgValueCase.VALUE_A, ArgValueCase.UNDEFINED]) - @register_triplet_test([ArgValueCase.VALUE_A, ArgValueCase.IGNORE, ArgValueCase.UNDEFINED]) - @register_triplet_test([ArgValueCase.VALUE_A, ArgValueCase.VALUE_A, ArgValueCase.UNDEFINED]) - def test_with_platform_with_cluster_name(self, tool_name, csp): - if CspEnv(csp) != CspEnv.ONPREM: - # should pass: missing eventlogs should be accepted for all CSPs (except onPrem) because - # the eventlogs can be retrieved from the cluster - tool_args = self.create_tool_args_should_pass(tool_name, - platform=csp, - cluster='my_cluster') - self.validate_tool_args(tool_name=tool_name, tool_args=tool_args, - cost_savings_enabled=True, - expected_platform=csp) - else: - # should fail: onprem platform needs eventlogs - self.create_tool_args_should_fail(tool_name, - platform=csp, - cluster='my_cluster') - @pytest.mark.parametrize('tool_name', ['qualification', 'profiling']) @pytest.mark.parametrize('csp,prop_path', all_cpu_cluster_props) @register_triplet_test([ArgValueCase.UNDEFINED, ArgValueCase.VALUE_B, ArgValueCase.UNDEFINED]) @@ -257,6 +245,19 @@ def test_with_platform_with_cluster_props(self, get_ut_data_dir, tool_name, csp, platform=csp, cluster=f'{cluster_prop_file}') + @pytest.mark.parametrize('tool_name', ['qualification', 'profiling']) + @pytest.mark.parametrize('csp,prop_path', all_cpu_cluster_props) + @register_triplet_test([ArgValueCase.UNDEFINED, ArgValueCase.VALUE_B, ArgValueCase.VALUE_A]) + @register_triplet_test([ArgValueCase.UNDEFINED, ArgValueCase.VALUE_B, ArgValueCase.IGNORE]) + def test_with_cluster_props_with_eventlogs(self, get_ut_data_dir, tool_name, csp, prop_path): + # should pass: cluster properties and eventlogs are provided + tool_args = self.create_tool_args_should_pass(tool_name, + cluster=f'{get_ut_data_dir}/{prop_path}', + eventlogs=f'{get_ut_data_dir}/eventlogs') + self.validate_tool_args(tool_name=tool_name, tool_args=tool_args, + cost_savings_enabled=CspEnv(csp) != CspEnv.ONPREM, + expected_platform=csp) + @pytest.mark.parametrize('tool_name', ['qualification', 'profiling']) @pytest.mark.parametrize('csp,prop_path', all_cpu_cluster_props) @register_triplet_test([ArgValueCase.IGNORE, ArgValueCase.VALUE_B, ArgValueCase.IGNORE]) @@ -283,6 +284,18 @@ def test_with_autotuner(self, get_ut_data_dir, tool_name, prop_path): self.create_tool_args_should_fail(tool_name, cluster=f'{autotuner_prop_file}') + @pytest.mark.parametrize('tool_name', ['profiling']) + @pytest.mark.parametrize('csp', all_csps) + @pytest.mark.parametrize('prop_path', [autotuner_prop_path]) + @register_triplet_test([ArgValueCase.IGNORE, ArgValueCase.VALUE_C, ArgValueCase.UNDEFINED]) + @register_triplet_test([ArgValueCase.VALUE_A, ArgValueCase.VALUE_C, ArgValueCase.UNDEFINED]) + def test_with_platform_with_autotuner(self, get_ut_data_dir, tool_name, csp, prop_path): + # should fail: autotuner needs eventlogs + autotuner_prop_file = f'{get_ut_data_dir}/{prop_path}' + self.create_tool_args_should_fail(tool_name, + platform=csp, + cluster=f'{autotuner_prop_file}') + @pytest.mark.parametrize('tool_name', ['profiling']) @pytest.mark.parametrize('prop_path', [autotuner_prop_path]) @register_triplet_test([ArgValueCase.UNDEFINED, ArgValueCase.VALUE_C, ArgValueCase.IGNORE]) @@ -297,18 +310,6 @@ def test_with_autotuner_with_eventlogs(self, get_ut_data_dir, tool_name, prop_pa cost_savings_enabled=False, expected_platform=CspEnv.ONPREM) - @pytest.mark.parametrize('tool_name', ['profiling']) - @pytest.mark.parametrize('csp', all_csps) - @pytest.mark.parametrize('prop_path', [autotuner_prop_path]) - @register_triplet_test([ArgValueCase.IGNORE, ArgValueCase.VALUE_C, ArgValueCase.UNDEFINED]) - @register_triplet_test([ArgValueCase.VALUE_A, ArgValueCase.VALUE_C, ArgValueCase.UNDEFINED]) - def test_with_platform_with_autotuner(self, get_ut_data_dir, tool_name, csp, prop_path): - # should fail: autotuner needs eventlogs - autotuner_prop_file = f'{get_ut_data_dir}/{prop_path}' - self.create_tool_args_should_fail(tool_name, - platform=csp, - cluster=f'{autotuner_prop_file}') - @pytest.mark.parametrize('tool_name', ['profiling']) @pytest.mark.parametrize('csp', all_csps) @pytest.mark.parametrize('prop_path', [autotuner_prop_path]) @@ -329,8 +330,13 @@ def test_with_platform_with_autotuner_with_eventlogs(self, get_ut_data_dir, tool def test_arg_cases_coverage(self): """ - This test is to make sure that all argument cases are covered by unit tests. Ensure that a new - unit test is added for each new argument case. + This test ensures that above tests have covered all possible states of the `platform`, `cluster`, + and `event logs` fields. + + Possible States: + - platform:`undefined`, `actual value`, or `ignore`. + - cluster: `undefined`, `cluster name`, `cluster property file`, `auto tuner file`, or `ignore`. + - event logs: `undefined`, `actual value`, or `ignore`. """ arg_platform_cases = [ArgValueCase.UNDEFINED, ArgValueCase.VALUE_A, ArgValueCase.IGNORE] arg_cluster_cases = [ArgValueCase.UNDEFINED, ArgValueCase.VALUE_A, ArgValueCase.VALUE_B, ArgValueCase.VALUE_C, From dd33fa179c03b06b0a9249ab90460d2245de33b0 Mon Sep 17 00:00:00 2001 From: Partho Sarthi Date: Tue, 7 Nov 2023 14:00:37 -0800 Subject: [PATCH 04/10] Remove `bootstrap` from tests for initial version Signed-off-by: Partho Sarthi --- .../tests/spark_rapids_tools_ut/test_tool_argprocessor.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/user_tools/tests/spark_rapids_tools_ut/test_tool_argprocessor.py b/user_tools/tests/spark_rapids_tools_ut/test_tool_argprocessor.py index 713be7589..f70749665 100644 --- a/user_tools/tests/spark_rapids_tools_ut/test_tool_argprocessor.py +++ b/user_tools/tests/spark_rapids_tools_ut/test_tool_argprocessor.py @@ -98,7 +98,7 @@ def validate_tool_args(tool_name: str, tool_args: dict, cost_savings_enabled, ex else: TestToolArgProcessor.validate_args_w_savings_disabled(tool_name, tool_args) - @pytest.mark.parametrize('tool_name', ['qualification', 'profiling', 'bootstrap']) + @pytest.mark.parametrize('tool_name', ['qualification', 'profiling']) @register_triplet_test([ArgValueCase.UNDEFINED, ArgValueCase.UNDEFINED, ArgValueCase.UNDEFINED]) def test_with_no_args(self, tool_name): # should fail: cannot run with no args @@ -112,7 +112,7 @@ def test_with_platform(self, tool_name, csp): # should fail: cannot run with platform only self.create_tool_args_should_fail(tool_name, platform=csp) - @pytest.mark.parametrize('tool_name', ['qualification', 'profiling', 'bootstrap']) + @pytest.mark.parametrize('tool_name', ['qualification', 'profiling']) @register_triplet_test([ArgValueCase.UNDEFINED, ArgValueCase.IGNORE, ArgValueCase.UNDEFINED]) @register_triplet_test([ArgValueCase.UNDEFINED, ArgValueCase.VALUE_A, ArgValueCase.UNDEFINED]) def test_with_cluster_name(self, tool_name): From 80ad3e3f1ee80229cd0275cbfac9cc18cf2b9e90 Mon Sep 17 00:00:00 2001 From: Partho Sarthi Date: Mon, 13 Nov 2023 17:09:39 -0800 Subject: [PATCH 05/10] Update tests to fix IGNORE cases Signed-off-by: Partho Sarthi --- .../spark_rapids_tools/cmdli/argprocessor.py | 2 +- .../test_tool_argprocessor.py | 214 +++++++----------- 2 files changed, 82 insertions(+), 134 deletions(-) diff --git a/user_tools/src/spark_rapids_tools/cmdli/argprocessor.py b/user_tools/src/spark_rapids_tools/cmdli/argprocessor.py index 35551d4d3..47712583b 100644 --- a/user_tools/src/spark_rapids_tools/cmdli/argprocessor.py +++ b/user_tools/src/spark_rapids_tools/cmdli/argprocessor.py @@ -295,7 +295,7 @@ def define_invalid_arg_cases(self): 'valid': False, 'callable': partial(self.validate_onprem_with_cluster_props), 'cases': [ - [ArgValueCase.VALUE_A, ArgValueCase.VALUE_C, ArgValueCase.UNDEFINED] + [ArgValueCase.VALUE_A, ArgValueCase.UNDEFINED, ArgValueCase.UNDEFINED] ] } diff --git a/user_tools/tests/spark_rapids_tools_ut/test_tool_argprocessor.py b/user_tools/tests/spark_rapids_tools_ut/test_tool_argprocessor.py index f70749665..15f26d552 100644 --- a/user_tools/tests/spark_rapids_tools_ut/test_tool_argprocessor.py +++ b/user_tools/tests/spark_rapids_tools_ut/test_tool_argprocessor.py @@ -52,6 +52,7 @@ def decorator(func_cb: Callable): triplet_test_registry[obj_k] = argv_obj argv_obj.tests.append(func_cb.__name__) return func_cb + return decorator @@ -98,66 +99,21 @@ def validate_tool_args(tool_name: str, tool_args: dict, cost_savings_enabled, ex else: TestToolArgProcessor.validate_args_w_savings_disabled(tool_name, tool_args) - @pytest.mark.parametrize('tool_name', ['qualification', 'profiling']) - @register_triplet_test([ArgValueCase.UNDEFINED, ArgValueCase.UNDEFINED, ArgValueCase.UNDEFINED]) - def test_with_no_args(self, tool_name): - # should fail: cannot run with no args - self.create_tool_args_should_fail(tool_name=tool_name) - @pytest.mark.parametrize('tool_name', ['qualification', 'profiling']) @pytest.mark.parametrize('csp', all_csps) - @register_triplet_test([ArgValueCase.IGNORE, ArgValueCase.UNDEFINED, ArgValueCase.UNDEFINED]) @register_triplet_test([ArgValueCase.VALUE_A, ArgValueCase.UNDEFINED, ArgValueCase.UNDEFINED]) + @register_triplet_test([ArgValueCase.UNDEFINED, ArgValueCase.UNDEFINED, ArgValueCase.UNDEFINED]) def test_with_platform(self, tool_name, csp): - # should fail: cannot run with platform only + # should fail: platform provided; cannot run with platform only self.create_tool_args_should_fail(tool_name, platform=csp) - @pytest.mark.parametrize('tool_name', ['qualification', 'profiling']) - @register_triplet_test([ArgValueCase.UNDEFINED, ArgValueCase.IGNORE, ArgValueCase.UNDEFINED]) - @register_triplet_test([ArgValueCase.UNDEFINED, ArgValueCase.VALUE_A, ArgValueCase.UNDEFINED]) - def test_with_cluster_name(self, tool_name): - # should fail: cannot run with cluster name only - self.create_tool_args_should_fail(tool_name, cluster='my_cluster') - - @pytest.mark.parametrize('tool_name', ['qualification', 'profiling']) - @register_triplet_test([ArgValueCase.UNDEFINED, ArgValueCase.UNDEFINED, ArgValueCase.IGNORE]) - @register_triplet_test([ArgValueCase.UNDEFINED, ArgValueCase.UNDEFINED, ArgValueCase.VALUE_A]) - def test_with_eventlogs(self, get_ut_data_dir, tool_name): - # should pass: defaults to onprem, event logs are provided - tool_args = self.create_tool_args_should_pass(tool_name, - eventlogs=f'{get_ut_data_dir}/eventlogs') - self.validate_tool_args(tool_name=tool_name, tool_args=tool_args, - cost_savings_enabled=False, - expected_platform=CspEnv.ONPREM) - - @pytest.mark.parametrize('tool_name', ['qualification', 'profiling']) - @pytest.mark.parametrize('csp', all_csps) - @register_triplet_test([ArgValueCase.IGNORE, ArgValueCase.IGNORE, ArgValueCase.UNDEFINED]) - @register_triplet_test([ArgValueCase.IGNORE, ArgValueCase.VALUE_A, ArgValueCase.UNDEFINED]) - @register_triplet_test([ArgValueCase.VALUE_A, ArgValueCase.IGNORE, ArgValueCase.UNDEFINED]) - @register_triplet_test([ArgValueCase.VALUE_A, ArgValueCase.VALUE_A, ArgValueCase.UNDEFINED]) - def test_with_platform_with_cluster_name(self, tool_name, csp): - if CspEnv(csp) != CspEnv.ONPREM: - # should pass: missing eventlogs should be accepted for all CSPs (except onPrem) because - # the event logs can be retrieved from the cluster - tool_args = self.create_tool_args_should_pass(tool_name, - platform=csp, - cluster='my_cluster') - self.validate_tool_args(tool_name=tool_name, tool_args=tool_args, - cost_savings_enabled=True, - expected_platform=csp) - else: - # should fail: onprem platform needs event logs - self.create_tool_args_should_fail(tool_name, - platform=csp, - cluster='my_cluster') + # should fail: platform not provided; cannot run with no args + self.create_tool_args_should_fail(tool_name=tool_name) @pytest.mark.parametrize('tool_name', ['qualification', 'profiling']) @pytest.mark.parametrize('csp', all_csps) @register_triplet_test([ArgValueCase.VALUE_A, ArgValueCase.UNDEFINED, ArgValueCase.VALUE_A]) - @register_triplet_test([ArgValueCase.IGNORE, ArgValueCase.UNDEFINED, ArgValueCase.VALUE_A]) - @register_triplet_test([ArgValueCase.VALUE_A, ArgValueCase.UNDEFINED, ArgValueCase.IGNORE]) - @register_triplet_test([ArgValueCase.IGNORE, ArgValueCase.UNDEFINED, ArgValueCase.IGNORE]) + @register_triplet_test([ArgValueCase.UNDEFINED, ArgValueCase.UNDEFINED, ArgValueCase.VALUE_A]) def test_with_platform_with_eventlogs(self, get_ut_data_dir, tool_name, csp): # should pass: platform and event logs are provided tool_args = self.create_tool_args_should_pass(tool_name, @@ -168,27 +124,17 @@ def test_with_platform_with_eventlogs(self, get_ut_data_dir, tool_name, csp): cost_savings_enabled=False, expected_platform=csp) - @pytest.mark.parametrize('tool_name', ['qualification', 'profiling']) - @register_triplet_test([ArgValueCase.UNDEFINED, ArgValueCase.IGNORE, ArgValueCase.IGNORE]) - @register_triplet_test([ArgValueCase.UNDEFINED, ArgValueCase.IGNORE, ArgValueCase.VALUE_A]) - @register_triplet_test([ArgValueCase.UNDEFINED, ArgValueCase.VALUE_A, ArgValueCase.IGNORE]) - @register_triplet_test([ArgValueCase.UNDEFINED, ArgValueCase.VALUE_A, ArgValueCase.VALUE_A]) - def test_with_cluster_name_with_eventlogs(self, get_ut_data_dir, tool_name): - # should fail: defaults to onprem, cannot run when the cluster is by name - self.create_tool_args_should_fail(tool_name, - cluster='my_cluster', - eventlogs=f'{get_ut_data_dir}/eventlogs') + # should pass: platform not provided; event logs are provided + tool_args = self.create_tool_args_should_pass(tool_name, + eventlogs=f'{get_ut_data_dir}/eventlogs') + self.validate_tool_args(tool_name=tool_name, tool_args=tool_args, + cost_savings_enabled=False, + expected_platform=CspEnv.ONPREM) @pytest.mark.parametrize('tool_name', ['qualification', 'profiling']) @pytest.mark.parametrize('csp', all_csps) - @register_triplet_test([ArgValueCase.IGNORE, ArgValueCase.IGNORE, ArgValueCase.IGNORE]) - @register_triplet_test([ArgValueCase.IGNORE, ArgValueCase.IGNORE, ArgValueCase.VALUE_A]) - @register_triplet_test([ArgValueCase.IGNORE, ArgValueCase.VALUE_A, ArgValueCase.IGNORE]) - @register_triplet_test([ArgValueCase.IGNORE, ArgValueCase.VALUE_A, ArgValueCase.VALUE_A]) - @register_triplet_test([ArgValueCase.VALUE_A, ArgValueCase.IGNORE, ArgValueCase.IGNORE]) - @register_triplet_test([ArgValueCase.VALUE_A, ArgValueCase.IGNORE, ArgValueCase.VALUE_A]) - @register_triplet_test([ArgValueCase.VALUE_A, ArgValueCase.VALUE_A, ArgValueCase.IGNORE]) @register_triplet_test([ArgValueCase.VALUE_A, ArgValueCase.VALUE_A, ArgValueCase.VALUE_A]) + @register_triplet_test([ArgValueCase.VALUE_A, ArgValueCase.VALUE_A, ArgValueCase.UNDEFINED]) def test_with_platform_with_cluster_name_with_eventlogs(self, get_ut_data_dir, tool_name, csp): if CspEnv(csp) != CspEnv.ONPREM: # should pass: platform, cluster name and eventlogs are provided @@ -199,42 +145,60 @@ def test_with_platform_with_cluster_name_with_eventlogs(self, get_ut_data_dir, t self.validate_tool_args(tool_name=tool_name, tool_args=tool_args, cost_savings_enabled=True, expected_platform=csp) + + # should pass: event logs not provided; missing eventlogs should be accepted for + # all CSPs (except onPrem) because the event logs can be retrieved from the cluster + tool_args = self.create_tool_args_should_pass(tool_name, + platform=csp, + cluster='my_cluster') + self.validate_tool_args(tool_name=tool_name, tool_args=tool_args, + cost_savings_enabled=True, + expected_platform=csp) else: - # should fail: onprem platform cannot run when the cluster is by name + # should fail: platform, cluster name and eventlogs are provided; onprem platform + # cannot run when the cluster is by name self.create_tool_args_should_fail(tool_name, platform=csp, cluster='my_cluster', eventlogs=f'{get_ut_data_dir}/eventlogs') + # should fail: event logs not provided; onprem platform cannot run when the cluster is by name + self.create_tool_args_should_fail(tool_name, + platform=csp, + cluster='my_cluster') + + @pytest.mark.parametrize('tool_name', ['qualification', 'profiling']) + @register_triplet_test([ArgValueCase.UNDEFINED, ArgValueCase.VALUE_A, ArgValueCase.VALUE_A]) + @register_triplet_test([ArgValueCase.UNDEFINED, ArgValueCase.VALUE_A, ArgValueCase.UNDEFINED]) + def test_with_cluster_name_with_eventlogs(self, get_ut_data_dir, tool_name): + # should fail: eventlogs provided; defaults platform to onprem, cannot run when the cluster is by name + self.create_tool_args_should_fail(tool_name, + cluster='my_cluster', + eventlogs=f'{get_ut_data_dir}/eventlogs') + + # should fail: eventlogs not provideds; defaults platform to onprem, cannot run when the cluster is by name + self.create_tool_args_should_fail(tool_name, + cluster='my_cluster') + @pytest.mark.parametrize('tool_name', ['qualification', 'profiling']) @pytest.mark.parametrize('csp,prop_path', all_cpu_cluster_props) + @register_triplet_test([ArgValueCase.VALUE_A, ArgValueCase.VALUE_B, ArgValueCase.UNDEFINED]) @register_triplet_test([ArgValueCase.UNDEFINED, ArgValueCase.VALUE_B, ArgValueCase.UNDEFINED]) - def test_with_cluster_props(self, get_ut_data_dir, tool_name, csp, prop_path): + def test_with_platform_with_cluster_props(self, get_ut_data_dir, tool_name, csp, prop_path): cluster_prop_file = f'{get_ut_data_dir}/{prop_path}' if CspEnv(csp) != CspEnv.ONPREM: - # should pass: missing eventlogs should be accepted for all CSPs (except onPrem) because - # the eventlogs can be retrieved from the cluster + # should pass: platform provided; missing eventlogs should be accepted for all CSPs (except onPrem) + # because the eventlogs can be retrieved from the cluster properties tool_args = self.create_tool_args_should_pass(tool_name, + platform=csp, cluster=f'{cluster_prop_file}') self.validate_tool_args(tool_name=tool_name, tool_args=tool_args, cost_savings_enabled=True, expected_platform=csp) - else: - # should fail: defaults to onprem, cannot retrieve eventlogs from cluster properties - self.create_tool_args_should_fail(tool_name, - cluster=f'{cluster_prop_file}') - @pytest.mark.parametrize('tool_name', ['qualification', 'profiling']) - @pytest.mark.parametrize('csp,prop_path', all_cpu_cluster_props) - @register_triplet_test([ArgValueCase.IGNORE, ArgValueCase.VALUE_B, ArgValueCase.UNDEFINED]) - @register_triplet_test([ArgValueCase.VALUE_A, ArgValueCase.VALUE_B, ArgValueCase.UNDEFINED]) - def test_with_platform_with_cluster_props(self, get_ut_data_dir, tool_name, csp, prop_path): - cluster_prop_file = f'{get_ut_data_dir}/{prop_path}' - if CspEnv(csp) != CspEnv.ONPREM: - # should pass: missing eventlogs should be accepted for all CSPs (except onPrem) because - # the eventlogs can be retrieved from the cluster + # should pass: platform not provided; missing eventlogs should be accepted for all CSPs (except onPrem) + # because the eventlogs can be retrieved from the cluster properties tool_args = self.create_tool_args_should_pass(tool_name, - platform=csp, cluster=f'{cluster_prop_file}') self.validate_tool_args(tool_name=tool_name, tool_args=tool_args, cost_savings_enabled=True, @@ -245,25 +209,15 @@ def test_with_platform_with_cluster_props(self, get_ut_data_dir, tool_name, csp, platform=csp, cluster=f'{cluster_prop_file}') - @pytest.mark.parametrize('tool_name', ['qualification', 'profiling']) - @pytest.mark.parametrize('csp,prop_path', all_cpu_cluster_props) - @register_triplet_test([ArgValueCase.UNDEFINED, ArgValueCase.VALUE_B, ArgValueCase.VALUE_A]) - @register_triplet_test([ArgValueCase.UNDEFINED, ArgValueCase.VALUE_B, ArgValueCase.IGNORE]) - def test_with_cluster_props_with_eventlogs(self, get_ut_data_dir, tool_name, csp, prop_path): - # should pass: cluster properties and eventlogs are provided - tool_args = self.create_tool_args_should_pass(tool_name, - cluster=f'{get_ut_data_dir}/{prop_path}', - eventlogs=f'{get_ut_data_dir}/eventlogs') - self.validate_tool_args(tool_name=tool_name, tool_args=tool_args, - cost_savings_enabled=CspEnv(csp) != CspEnv.ONPREM, - expected_platform=csp) + # should fail: platform not provided; defaults platform to onprem, cannot retrieve eventlogs from + # cluster properties + self.create_tool_args_should_fail(tool_name, + cluster=f'{cluster_prop_file}') @pytest.mark.parametrize('tool_name', ['qualification', 'profiling']) @pytest.mark.parametrize('csp,prop_path', all_cpu_cluster_props) - @register_triplet_test([ArgValueCase.IGNORE, ArgValueCase.VALUE_B, ArgValueCase.IGNORE]) - @register_triplet_test([ArgValueCase.IGNORE, ArgValueCase.VALUE_B, ArgValueCase.VALUE_A]) - @register_triplet_test([ArgValueCase.VALUE_A, ArgValueCase.VALUE_B, ArgValueCase.IGNORE]) @register_triplet_test([ArgValueCase.VALUE_A, ArgValueCase.VALUE_B, ArgValueCase.VALUE_A]) + @register_triplet_test([ArgValueCase.UNDEFINED, ArgValueCase.VALUE_B, ArgValueCase.VALUE_A]) def test_with_platform_with_cluster_props_with_eventlogs(self, get_ut_data_dir, tool_name, csp, prop_path): # should pass: platform, cluster properties and eventlogs are provided cluster_prop_file = f'{get_ut_data_dir}/{prop_path}' @@ -275,48 +229,35 @@ def test_with_platform_with_cluster_props_with_eventlogs(self, get_ut_data_dir, cost_savings_enabled=CspEnv(csp) != CspEnv.ONPREM, expected_platform=csp) - @pytest.mark.parametrize('tool_name', ['profiling']) - @pytest.mark.parametrize('prop_path', [autotuner_prop_path]) - @register_triplet_test([ArgValueCase.UNDEFINED, ArgValueCase.VALUE_C, ArgValueCase.UNDEFINED]) - def test_with_autotuner(self, get_ut_data_dir, tool_name, prop_path): - # should fail: autotuner needs eventlogs - autotuner_prop_file = f'{get_ut_data_dir}/{prop_path}' - self.create_tool_args_should_fail(tool_name, - cluster=f'{autotuner_prop_file}') + # should pass: platform not provided; cluster properties and eventlogs are provided + tool_args = self.create_tool_args_should_pass(tool_name, + cluster=f'{get_ut_data_dir}/{prop_path}', + eventlogs=f'{get_ut_data_dir}/eventlogs') + self.validate_tool_args(tool_name=tool_name, tool_args=tool_args, + cost_savings_enabled=CspEnv(csp) != CspEnv.ONPREM, + expected_platform=csp) @pytest.mark.parametrize('tool_name', ['profiling']) @pytest.mark.parametrize('csp', all_csps) @pytest.mark.parametrize('prop_path', [autotuner_prop_path]) - @register_triplet_test([ArgValueCase.IGNORE, ArgValueCase.VALUE_C, ArgValueCase.UNDEFINED]) @register_triplet_test([ArgValueCase.VALUE_A, ArgValueCase.VALUE_C, ArgValueCase.UNDEFINED]) + @register_triplet_test([ArgValueCase.UNDEFINED, ArgValueCase.VALUE_C, ArgValueCase.UNDEFINED]) def test_with_platform_with_autotuner(self, get_ut_data_dir, tool_name, csp, prop_path): - # should fail: autotuner needs eventlogs + # should fail: platform provided; autotuner needs eventlogs autotuner_prop_file = f'{get_ut_data_dir}/{prop_path}' self.create_tool_args_should_fail(tool_name, platform=csp, cluster=f'{autotuner_prop_file}') - @pytest.mark.parametrize('tool_name', ['profiling']) - @pytest.mark.parametrize('prop_path', [autotuner_prop_path]) - @register_triplet_test([ArgValueCase.UNDEFINED, ArgValueCase.VALUE_C, ArgValueCase.IGNORE]) - @register_triplet_test([ArgValueCase.UNDEFINED, ArgValueCase.VALUE_C, ArgValueCase.VALUE_A]) - def test_with_autotuner_with_eventlogs(self, get_ut_data_dir, tool_name, prop_path): - autotuner_prop_file = f'{get_ut_data_dir}/{prop_path}' - # should pass: autotuner properties and eventlogs are provided - tool_args = self.create_tool_args_should_pass(tool_name, - cluster=f'{autotuner_prop_file}', - eventlogs=f'{get_ut_data_dir}/eventlogs') - self.validate_tool_args(tool_name=tool_name, tool_args=tool_args, - cost_savings_enabled=False, - expected_platform=CspEnv.ONPREM) + # should fail: platform not provided; autotuner needs eventlogs + self.create_tool_args_should_fail(tool_name, + cluster=f'{autotuner_prop_file}') @pytest.mark.parametrize('tool_name', ['profiling']) @pytest.mark.parametrize('csp', all_csps) @pytest.mark.parametrize('prop_path', [autotuner_prop_path]) - @register_triplet_test([ArgValueCase.IGNORE, ArgValueCase.VALUE_C, ArgValueCase.IGNORE]) - @register_triplet_test([ArgValueCase.IGNORE, ArgValueCase.VALUE_C, ArgValueCase.VALUE_A]) - @register_triplet_test([ArgValueCase.VALUE_A, ArgValueCase.VALUE_C, ArgValueCase.IGNORE]) @register_triplet_test([ArgValueCase.VALUE_A, ArgValueCase.VALUE_C, ArgValueCase.VALUE_A]) + @register_triplet_test([ArgValueCase.UNDEFINED, ArgValueCase.VALUE_C, ArgValueCase.VALUE_A]) def test_with_platform_with_autotuner_with_eventlogs(self, get_ut_data_dir, tool_name, csp, prop_path): # should pass: platform, autotuner properties and eventlogs are provided autotuner_prop_file = f'{get_ut_data_dir}/{prop_path}' @@ -328,20 +269,27 @@ def test_with_platform_with_autotuner_with_eventlogs(self, get_ut_data_dir, tool cost_savings_enabled=False, expected_platform=csp) + # should pass: platform not provided; autotuner properties and eventlogs are provided + tool_args = self.create_tool_args_should_pass(tool_name, + cluster=f'{autotuner_prop_file}', + eventlogs=f'{get_ut_data_dir}/eventlogs') + self.validate_tool_args(tool_name=tool_name, tool_args=tool_args, + cost_savings_enabled=False, + expected_platform=CspEnv.ONPREM) + def test_arg_cases_coverage(self): """ This test ensures that above tests have covered all possible states of the `platform`, `cluster`, and `event logs` fields. Possible States: - - platform:`undefined`, `actual value`, or `ignore`. - - cluster: `undefined`, `cluster name`, `cluster property file`, `auto tuner file`, or `ignore`. - - event logs: `undefined`, `actual value`, or `ignore`. + - platform:`undefined` or `actual value`. + - cluster: `undefined`, `cluster name`, `cluster property file` or `auto tuner file`. + - event logs: `undefined` or `actual value`. """ - arg_platform_cases = [ArgValueCase.UNDEFINED, ArgValueCase.VALUE_A, ArgValueCase.IGNORE] - arg_cluster_cases = [ArgValueCase.UNDEFINED, ArgValueCase.VALUE_A, ArgValueCase.VALUE_B, ArgValueCase.VALUE_C, - ArgValueCase.IGNORE] - arg_eventlogs_cases = [ArgValueCase.UNDEFINED, ArgValueCase.VALUE_A, ArgValueCase.IGNORE] + arg_platform_cases = [ArgValueCase.UNDEFINED, ArgValueCase.VALUE_A] + arg_cluster_cases = [ArgValueCase.UNDEFINED, ArgValueCase.VALUE_A, ArgValueCase.VALUE_B, ArgValueCase.VALUE_C] + arg_eventlogs_cases = [ArgValueCase.UNDEFINED, ArgValueCase.VALUE_A] all_args_keys = [str([p, c, e]) for p in arg_platform_cases for c in arg_cluster_cases for e in arg_eventlogs_cases] From df15aa1f476714c34d3299de30f94aa24bf41e1b Mon Sep 17 00:00:00 2001 From: Partho Sarthi Date: Tue, 14 Nov 2023 09:39:37 -0800 Subject: [PATCH 06/10] Fix typo Signed-off-by: Partho Sarthi --- .../tests/spark_rapids_tools_ut/test_tool_argprocessor.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/user_tools/tests/spark_rapids_tools_ut/test_tool_argprocessor.py b/user_tools/tests/spark_rapids_tools_ut/test_tool_argprocessor.py index 15f26d552..1ce7f688b 100644 --- a/user_tools/tests/spark_rapids_tools_ut/test_tool_argprocessor.py +++ b/user_tools/tests/spark_rapids_tools_ut/test_tool_argprocessor.py @@ -176,7 +176,7 @@ def test_with_cluster_name_with_eventlogs(self, get_ut_data_dir, tool_name): cluster='my_cluster', eventlogs=f'{get_ut_data_dir}/eventlogs') - # should fail: eventlogs not provideds; defaults platform to onprem, cannot run when the cluster is by name + # should fail: eventlogs not provided; defaults platform to onprem, cannot run when the cluster is by name self.create_tool_args_should_fail(tool_name, cluster='my_cluster') From 48c9b23d932f8c7b3eed278e81d1deb4f96f70cf Mon Sep 17 00:00:00 2001 From: Partho Sarthi Date: Wed, 15 Nov 2023 11:25:50 -0800 Subject: [PATCH 07/10] Fix cluster by prop case Signed-off-by: Partho Sarthi --- user_tools/src/spark_rapids_tools/cmdli/argprocessor.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/user_tools/src/spark_rapids_tools/cmdli/argprocessor.py b/user_tools/src/spark_rapids_tools/cmdli/argprocessor.py index 47712583b..328f125b7 100644 --- a/user_tools/src/spark_rapids_tools/cmdli/argprocessor.py +++ b/user_tools/src/spark_rapids_tools/cmdli/argprocessor.py @@ -295,7 +295,7 @@ def define_invalid_arg_cases(self): 'valid': False, 'callable': partial(self.validate_onprem_with_cluster_props), 'cases': [ - [ArgValueCase.VALUE_A, ArgValueCase.UNDEFINED, ArgValueCase.UNDEFINED] + [ArgValueCase.VALUE_A, ArgValueCase.VALUE_B, ArgValueCase.UNDEFINED] ] } From 6064615857f4208293404ae0d74e70f0f23483fa Mon Sep 17 00:00:00 2001 From: Partho Sarthi Date: Thu, 16 Nov 2023 11:33:20 -0800 Subject: [PATCH 08/10] Add comments and fix variable substitution Signed-off-by: Partho Sarthi --- .../test_tool_argprocessor.py | 29 ++++++++++++------- 1 file changed, 19 insertions(+), 10 deletions(-) diff --git a/user_tools/tests/spark_rapids_tools_ut/test_tool_argprocessor.py b/user_tools/tests/spark_rapids_tools_ut/test_tool_argprocessor.py index 1ce7f688b..76e694c81 100644 --- a/user_tools/tests/spark_rapids_tools_ut/test_tool_argprocessor.py +++ b/user_tools/tests/spark_rapids_tools_ut/test_tool_argprocessor.py @@ -127,6 +127,7 @@ def test_with_platform_with_eventlogs(self, get_ut_data_dir, tool_name, csp): # should pass: platform not provided; event logs are provided tool_args = self.create_tool_args_should_pass(tool_name, eventlogs=f'{get_ut_data_dir}/eventlogs') + # for qualification, cost savings should be disabled because cluster is not provided self.validate_tool_args(tool_name=tool_name, tool_args=tool_args, cost_savings_enabled=False, expected_platform=CspEnv.ONPREM) @@ -142,6 +143,7 @@ def test_with_platform_with_cluster_name_with_eventlogs(self, get_ut_data_dir, t platform=csp, cluster='my_cluster', eventlogs=f'{get_ut_data_dir}/eventlogs') + # for qualification, cost savings should be enabled because cluster is provided self.validate_tool_args(tool_name=tool_name, tool_args=tool_args, cost_savings_enabled=True, expected_platform=csp) @@ -151,6 +153,7 @@ def test_with_platform_with_cluster_name_with_eventlogs(self, get_ut_data_dir, t tool_args = self.create_tool_args_should_pass(tool_name, platform=csp, cluster='my_cluster') + # for qualification, cost savings should be enabled because cluster is provided self.validate_tool_args(tool_name=tool_name, tool_args=tool_args, cost_savings_enabled=True, expected_platform=csp) @@ -191,7 +194,8 @@ def test_with_platform_with_cluster_props(self, get_ut_data_dir, tool_name, csp, # because the eventlogs can be retrieved from the cluster properties tool_args = self.create_tool_args_should_pass(tool_name, platform=csp, - cluster=f'{cluster_prop_file}') + cluster=cluster_prop_file) + # for qualification, cost savings should be enabled because cluster is provided self.validate_tool_args(tool_name=tool_name, tool_args=tool_args, cost_savings_enabled=True, expected_platform=csp) @@ -199,7 +203,8 @@ def test_with_platform_with_cluster_props(self, get_ut_data_dir, tool_name, csp, # should pass: platform not provided; missing eventlogs should be accepted for all CSPs (except onPrem) # because the eventlogs can be retrieved from the cluster properties tool_args = self.create_tool_args_should_pass(tool_name, - cluster=f'{cluster_prop_file}') + cluster=cluster_prop_file) + # for qualification, cost savings should be enabled because cluster is provided self.validate_tool_args(tool_name=tool_name, tool_args=tool_args, cost_savings_enabled=True, expected_platform=csp) @@ -207,12 +212,12 @@ def test_with_platform_with_cluster_props(self, get_ut_data_dir, tool_name, csp, # should fail: onprem platform cannot retrieve eventlogs from cluster properties self.create_tool_args_should_fail(tool_name, platform=csp, - cluster=f'{cluster_prop_file}') + cluster=cluster_prop_file) # should fail: platform not provided; defaults platform to onprem, cannot retrieve eventlogs from # cluster properties self.create_tool_args_should_fail(tool_name, - cluster=f'{cluster_prop_file}') + cluster=cluster_prop_file) @pytest.mark.parametrize('tool_name', ['qualification', 'profiling']) @pytest.mark.parametrize('csp,prop_path', all_cpu_cluster_props) @@ -223,16 +228,18 @@ def test_with_platform_with_cluster_props_with_eventlogs(self, get_ut_data_dir, cluster_prop_file = f'{get_ut_data_dir}/{prop_path}' tool_args = self.create_tool_args_should_pass(tool_name, platform=csp, - cluster=f'{cluster_prop_file}', + cluster=cluster_prop_file, eventlogs=f'{get_ut_data_dir}/eventlogs') + # for qualification, cost savings should be enabled because cluster is provided (except for onprem) self.validate_tool_args(tool_name=tool_name, tool_args=tool_args, cost_savings_enabled=CspEnv(csp) != CspEnv.ONPREM, expected_platform=csp) # should pass: platform not provided; cluster properties and eventlogs are provided tool_args = self.create_tool_args_should_pass(tool_name, - cluster=f'{get_ut_data_dir}/{prop_path}', + cluster=cluster_prop_file, eventlogs=f'{get_ut_data_dir}/eventlogs') + # for qualification, cost savings should be enabled because cluster is provided (except for onprem) self.validate_tool_args(tool_name=tool_name, tool_args=tool_args, cost_savings_enabled=CspEnv(csp) != CspEnv.ONPREM, expected_platform=csp) @@ -247,11 +254,11 @@ def test_with_platform_with_autotuner(self, get_ut_data_dir, tool_name, csp, pro autotuner_prop_file = f'{get_ut_data_dir}/{prop_path}' self.create_tool_args_should_fail(tool_name, platform=csp, - cluster=f'{autotuner_prop_file}') + cluster=autotuner_prop_file) # should fail: platform not provided; autotuner needs eventlogs self.create_tool_args_should_fail(tool_name, - cluster=f'{autotuner_prop_file}') + cluster=autotuner_prop_file) @pytest.mark.parametrize('tool_name', ['profiling']) @pytest.mark.parametrize('csp', all_csps) @@ -263,16 +270,18 @@ def test_with_platform_with_autotuner_with_eventlogs(self, get_ut_data_dir, tool autotuner_prop_file = f'{get_ut_data_dir}/{prop_path}' tool_args = self.create_tool_args_should_pass(tool_name, platform=csp, - cluster=f'{autotuner_prop_file}', + cluster=autotuner_prop_file, eventlogs=f'{get_ut_data_dir}/eventlogs') + # cost savings should be disabled for profiling self.validate_tool_args(tool_name=tool_name, tool_args=tool_args, cost_savings_enabled=False, expected_platform=csp) # should pass: platform not provided; autotuner properties and eventlogs are provided tool_args = self.create_tool_args_should_pass(tool_name, - cluster=f'{autotuner_prop_file}', + cluster=autotuner_prop_file, eventlogs=f'{get_ut_data_dir}/eventlogs') + # cost savings should be disabled for profiling self.validate_tool_args(tool_name=tool_name, tool_args=tool_args, cost_savings_enabled=False, expected_platform=CspEnv.ONPREM) From 33811eca9e89eba83a754f871971b5e6045da758 Mon Sep 17 00:00:00 2001 From: Partho Sarthi Date: Thu, 16 Nov 2023 11:34:02 -0800 Subject: [PATCH 09/10] Remove redundant post platform assignment validation Signed-off-by: Partho Sarthi --- .../spark_rapids_tools/cmdli/argprocessor.py | 44 ++++++++----------- 1 file changed, 19 insertions(+), 25 deletions(-) diff --git a/user_tools/src/spark_rapids_tools/cmdli/argprocessor.py b/user_tools/src/spark_rapids_tools/cmdli/argprocessor.py index 328f125b7..b0135f124 100644 --- a/user_tools/src/spark_rapids_tools/cmdli/argprocessor.py +++ b/user_tools/src/spark_rapids_tools/cmdli/argprocessor.py @@ -168,16 +168,20 @@ def detect_platform_from_eventlogs_prefix(self): self.p_args['toolArgs']['platform'] = map_storage_to_platform[storage_type] def validate_onprem_with_cluster_name(self): - if self.platform == CspEnv.ONPREM: + # this field has already been populated during initialization + selected_platform = self.p_args['toolArgs']['platform'] + if selected_platform == CspEnv.ONPREM: raise PydanticCustomError( 'invalid_argument', f'Cannot run cluster by name with platform [{CspEnv.ONPREM}]\n Error:') - def validate_onprem_with_cluster_props(self): - if self.platform == CspEnv.ONPREM: + def validate_onprem_with_cluster_props_without_eventlogs(self): + # this field has already been populated during initialization + selected_platform = self.p_args['toolArgs']['platform'] + if selected_platform == CspEnv.ONPREM: raise PydanticCustomError( 'invalid_argument', - f'Cannot run cluster by properties with platform [{CspEnv.ONPREM}]\n Error:') + f'Cannot run cluster by properties with platform [{CspEnv.ONPREM}] without event logs\n Error:') def init_extra_arg_cases(self) -> list: return [] @@ -208,21 +212,24 @@ def define_extra_arg_cases(self): def build_tools_args(self) -> dict: pass - def apply_arg_cases(self): - for curr_cases in [self.rejected, self.detected, self.extra]: + def apply_arg_cases(self, cases_list: list): + for curr_cases in cases_list: for case_key, case_value in curr_cases.items(): if any(ArgValueCase.array_equal(self.argv_cases, case_i) for case_i in case_value['cases']): # debug the case key self.logger.info('...applying argument case: %s', case_key) case_value['callable']() + def apply_all_arg_cases(self): + self.apply_arg_cases([self.rejected, self.detected, self.extra]) + def validate_arguments(self): self.init_tool_args() self.init_arg_cases() self.define_invalid_arg_cases() self.define_detection_cases() self.define_extra_arg_cases() - self.apply_arg_cases() + self.apply_all_arg_cases() def get_or_set_platform(self) -> CspEnv: if self.p_args['toolArgs']['platform'] is None: @@ -230,24 +237,11 @@ def get_or_set_platform(self) -> CspEnv: runtime_platform = CspEnv.get_default() else: runtime_platform = self.p_args['toolArgs']['platform'] - self.post_platform_assignment_validation(runtime_platform) - return runtime_platform - def post_platform_assignment_validation(self, assigned_platform): - # do some validation after we decide the cluster type - if assigned_platform == CspEnv.ONPREM: - cluster_case = self.argv_cases[1] - eventlogs_case = self.argv_cases[2] - if cluster_case == ArgValueCase.VALUE_A: - # it is not allowed to run cluster by name on an OnPrem platform - raise PydanticCustomError( - 'invalid_argument', - f'Cannot run cluster by name with platform [{CspEnv.ONPREM}]\n Error:') - if cluster_case == ArgValueCase.VALUE_B and eventlogs_case == ArgValueCase.UNDEFINED: - # it is not allowed to run cluster by props on an OnPrem platform without eventlogs - raise PydanticCustomError( - 'invalid_argument', - f'Cannot run cluster by properties with platform [{CspEnv.ONPREM}] without eventlogs\n Error:') + # Update argv_cases to reflect the platform and validate post platform assignment + self.argv_cases[0] = ArgValueCase.VALUE_A + self.apply_arg_cases([self.rejected, self.extra]) + return runtime_platform @dataclass @@ -293,7 +287,7 @@ def define_invalid_arg_cases(self): } self.rejected['Cluster By Properties Cannot go with OnPrem'] = { 'valid': False, - 'callable': partial(self.validate_onprem_with_cluster_props), + 'callable': partial(self.validate_onprem_with_cluster_props_without_eventlogs), 'cases': [ [ArgValueCase.VALUE_A, ArgValueCase.VALUE_B, ArgValueCase.UNDEFINED] ] From e9abdf651d0bd20e15bff62a17d1a5463d4341db Mon Sep 17 00:00:00 2001 From: Partho Sarthi Date: Thu, 16 Nov 2023 11:40:49 -0800 Subject: [PATCH 10/10] Add separate method for post validation Signed-off-by: Partho Sarthi --- user_tools/src/spark_rapids_tools/cmdli/argprocessor.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/user_tools/src/spark_rapids_tools/cmdli/argprocessor.py b/user_tools/src/spark_rapids_tools/cmdli/argprocessor.py index b0135f124..6605ecb3b 100644 --- a/user_tools/src/spark_rapids_tools/cmdli/argprocessor.py +++ b/user_tools/src/spark_rapids_tools/cmdli/argprocessor.py @@ -237,11 +237,14 @@ def get_or_set_platform(self) -> CspEnv: runtime_platform = CspEnv.get_default() else: runtime_platform = self.p_args['toolArgs']['platform'] + self.post_platform_assignment_validation() + return runtime_platform - # Update argv_cases to reflect the platform and validate post platform assignment + def post_platform_assignment_validation(self): + # Update argv_cases to reflect the platform self.argv_cases[0] = ArgValueCase.VALUE_A + # Any validation post platform assignment should be done here self.apply_arg_cases([self.rejected, self.extra]) - return runtime_platform @dataclass