From 8df1407d6fa3f3193890b26b342ccafcba7b4e1e Mon Sep 17 00:00:00 2001 From: Ahmed Hussein <50450311+amahussein@users.noreply.github.com> Date: Thu, 12 Dec 2024 14:54:50 -0600 Subject: [PATCH 1/6] Fix dataframe handling of column-types (#1458) Signed-off-by: Ahmed Hussein (amahussein) Fixes #1456 --- .../tools/qualification_stats_report.py | 15 ++++++++++++--- .../spark_rapids_tools/tools/qualx/preprocess.py | 4 ++-- 2 files changed, 14 insertions(+), 5 deletions(-) diff --git a/user_tools/src/spark_rapids_tools/tools/qualification_stats_report.py b/user_tools/src/spark_rapids_tools/tools/qualification_stats_report.py index de762013d..aedda60f7 100644 --- a/user_tools/src/spark_rapids_tools/tools/qualification_stats_report.py +++ b/user_tools/src/spark_rapids_tools/tools/qualification_stats_report.py @@ -75,7 +75,10 @@ def _read_csv_files(self) -> None: 'toolOutput', 'csv', 'unsupportedOperatorsReport', 'fileName') rapids_unsupported_operators_file = FSUtil.build_path( qual_output_dir, unsupported_operator_report_file) - self.unsupported_operators_df = pd.read_csv(rapids_unsupported_operators_file) + # load the unsupported operators and drop operators that have no names. + self.unsupported_operators_df = ( + pd.read_csv(rapids_unsupported_operators_file, + dtype={'Unsupported Operator': str})).dropna(subset=['Unsupported Operator']) stages_report_file = self.ctxt.get_value('toolOutput', 'csv', 'stagesInformation', 'fileName') @@ -84,7 +87,14 @@ def _read_csv_files(self) -> None: rapids_execs_file = self.ctxt.get_value('toolOutput', 'csv', 'execsInformation', 'fileName') - self.execs_df = pd.read_csv(FSUtil.build_path(qual_output_dir, rapids_execs_file)) + # Load the execs CSV file and drop execs that have no stages or name + self.execs_df = ( + pd.read_csv(FSUtil.build_path(qual_output_dir, rapids_execs_file), + dtype={'Exec Name': str, + 'Exec Stages': str, + 'Exec Children': str, + 'Exec Children Node Ids': str}) + .dropna(subset=['Exec Stages', 'Exec Name'])) self.logger.info('Reading CSV files completed.') def _convert_durations(self) -> None: @@ -103,7 +113,6 @@ def _preprocess_dataframes(self) -> None: # from this dataframe can be matched with the stageID of stages dataframe self.execs_df['Exec Stages'] = self.execs_df['Exec Stages'].str.split(':') self.execs_df = (self.execs_df.explode('Exec Stages'). - dropna(subset=['Exec Stages']). rename(columns={'Exec Stages': 'Stage ID'})) self.execs_df['Stage ID'] = self.execs_df['Stage ID'].astype(int) diff --git a/user_tools/src/spark_rapids_tools/tools/qualx/preprocess.py b/user_tools/src/spark_rapids_tools/tools/qualx/preprocess.py index 1bdbd76b7..a47b45d73 100644 --- a/user_tools/src/spark_rapids_tools/tools/qualx/preprocess.py +++ b/user_tools/src/spark_rapids_tools/tools/qualx/preprocess.py @@ -461,7 +461,7 @@ def combine_tables(table_name: str) -> pd.DataFrame: # normalize WholeStageCodegen labels ops_tbl.loc[ - ops_tbl['nodeName'].str.startswith('WholeStageCodegen'), 'nodeName' + ops_tbl['nodeName'].astype(str).str.startswith('WholeStageCodegen'), 'nodeName' ] = 'WholeStageCodegen' # format WholeStageCodegen for merging @@ -1140,7 +1140,7 @@ def _is_ignore_no_perf(action: str) -> bool: node_level_supp['Exec Is Supported'] = ( node_level_supp['Exec Is Supported'] | node_level_supp['Action'].apply(_is_ignore_no_perf) - | node_level_supp['Exec Name'].apply( + | node_level_supp['Exec Name'].astype(str).apply( lambda x: x.startswith('WholeStageCodegen') ) ) From 75760a9044387027988c73731cc17f61122a3ea7 Mon Sep 17 00:00:00 2001 From: spark-rapids automation <70000568+nvauto@users.noreply.github.com> Date: Fri, 13 Dec 2024 01:57:42 +0000 Subject: [PATCH 2/6] Update dev-version by jenkins-spark-rapids-tools-auto-release-99 Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com> --- core/pom.xml | 2 +- user_tools/src/spark_rapids_pytools/__init__.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/core/pom.xml b/core/pom.xml index cde7561fb..7ae043493 100644 --- a/core/pom.xml +++ b/core/pom.xml @@ -23,7 +23,7 @@ rapids-4-spark-tools_2.12 RAPIDS Accelerator for Apache Spark tools RAPIDS Accelerator for Apache Spark tools - 24.10.3-SNAPSHOT + 24.10.4-SNAPSHOT jar http://github.com/NVIDIA/spark-rapids-tools diff --git a/user_tools/src/spark_rapids_pytools/__init__.py b/user_tools/src/spark_rapids_pytools/__init__.py index f4ec6f064..13d08a4dd 100644 --- a/user_tools/src/spark_rapids_pytools/__init__.py +++ b/user_tools/src/spark_rapids_pytools/__init__.py @@ -16,7 +16,7 @@ from spark_rapids_pytools.build import get_version, get_spark_dep_version -VERSION = '24.10.3' +VERSION = '24.10.4' # defines the default runtime build version for the user tools environment SPARK_DEP_VERSION = '350' __version__ = get_version(VERSION) From a1f866fc4361124aae8ea9e1b3542cb2e4b8feb1 Mon Sep 17 00:00:00 2001 From: Ahmed Hussein <50450311+amahussein@users.noreply.github.com> Date: Mon, 16 Dec 2024 09:20:02 -0600 Subject: [PATCH 3/6] Deduplicate calls to aggregateSparkMetricsBySql (#1464) Signed-off-by: Ahmed Hussein (amahussein) Contributes to #1461 AppSparkMetricsAnalyzer was calling `aggregateSparkMetricsBySql` twice. This code change eleiminates this redundancy to save CPU time and memory allocations. --- .../spark/rapids/tool/analysis/AppSparkMetricsAggTrait.scala | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/core/src/main/scala/com/nvidia/spark/rapids/tool/analysis/AppSparkMetricsAggTrait.scala b/core/src/main/scala/com/nvidia/spark/rapids/tool/analysis/AppSparkMetricsAggTrait.scala index 0f43ae8b2..30fb10ac9 100644 --- a/core/src/main/scala/com/nvidia/spark/rapids/tool/analysis/AppSparkMetricsAggTrait.scala +++ b/core/src/main/scala/com/nvidia/spark/rapids/tool/analysis/AppSparkMetricsAggTrait.scala @@ -35,12 +35,13 @@ trait AppSparkMetricsAggTrait extends AppIndexMapperTrait { def getAggRawMetrics(app: AppBase, index: Int, sqlAnalyzer: Option[AppSQLPlanAnalyzer] = None): AggRawMetricsResult = { val analysisObj = new AppSparkMetricsAnalyzer(app) + val sqlMetricsAgg = analysisObj.aggregateSparkMetricsBySql(index) AggRawMetricsResult( analysisObj.aggregateSparkMetricsByJob(index), analysisObj.aggregateSparkMetricsByStage(index), analysisObj.shuffleSkewCheck(index), - analysisObj.aggregateSparkMetricsBySql(index), - analysisObj.aggregateIOMetricsBySql(analysisObj.aggregateSparkMetricsBySql(index)), + sqlMetricsAgg, + analysisObj.aggregateIOMetricsBySql(sqlMetricsAgg), analysisObj.aggregateDurationAndCPUTimeBySql(index), Seq(analysisObj.maxTaskInputSizeBytesPerSQL(index)), analysisObj.aggregateDiagnosticMetricsByStage(index, sqlAnalyzer)) From 9564d0b400ff49b1c39ce52ab6f508641c2b4d74 Mon Sep 17 00:00:00 2001 From: Ahmed Hussein <50450311+amahussein@users.noreply.github.com> Date: Mon, 16 Dec 2024 09:23:19 -0600 Subject: [PATCH 4/6] Mark RunningWindowFunction as supported in Qual tool (#1465) Signed-off-by: Ahmed Hussein (amahussein) Fix #1460 `RunningWindowFunction` should be marked as supported by the qualification tool. --- .../src/main/resources/operatorsScore-databricks-aws-a10G.csv | 1 + core/src/main/resources/operatorsScore-databricks-aws-t4.csv | 1 + .../src/main/resources/operatorsScore-databricks-azure-t4.csv | 1 + core/src/main/resources/operatorsScore-dataproc-gke-l4.csv | 1 + core/src/main/resources/operatorsScore-dataproc-gke-t4.csv | 1 + core/src/main/resources/operatorsScore-dataproc-l4.csv | 1 + .../main/resources/operatorsScore-dataproc-serverless-l4.csv | 1 + core/src/main/resources/operatorsScore-dataproc-t4.csv | 1 + core/src/main/resources/operatorsScore-emr-a10.csv | 1 + core/src/main/resources/operatorsScore-emr-a10G.csv | 1 + core/src/main/resources/operatorsScore-emr-t4.csv | 1 + core/src/main/resources/operatorsScore-onprem-a100.csv | 1 + core/src/main/resources/supportedExecs.csv | 1 + .../spark/rapids/tool/planparser/GenericExecParser.scala | 3 ++- .../nvidia/spark/rapids/tool/planparser/SQLPlanParser.scala | 4 ++-- 15 files changed, 17 insertions(+), 3 deletions(-) diff --git a/core/src/main/resources/operatorsScore-databricks-aws-a10G.csv b/core/src/main/resources/operatorsScore-databricks-aws-a10G.csv index e5096e1a9..52eb193f2 100644 --- a/core/src/main/resources/operatorsScore-databricks-aws-a10G.csv +++ b/core/src/main/resources/operatorsScore-databricks-aws-a10G.csv @@ -305,3 +305,4 @@ DecimalSum,1.5 MaxBy,1.5 MinBy,1.5 ArrayJoin,1.5 +RunningWindowFunctionExec,1.5 diff --git a/core/src/main/resources/operatorsScore-databricks-aws-t4.csv b/core/src/main/resources/operatorsScore-databricks-aws-t4.csv index e5096e1a9..52eb193f2 100644 --- a/core/src/main/resources/operatorsScore-databricks-aws-t4.csv +++ b/core/src/main/resources/operatorsScore-databricks-aws-t4.csv @@ -305,3 +305,4 @@ DecimalSum,1.5 MaxBy,1.5 MinBy,1.5 ArrayJoin,1.5 +RunningWindowFunctionExec,1.5 diff --git a/core/src/main/resources/operatorsScore-databricks-azure-t4.csv b/core/src/main/resources/operatorsScore-databricks-azure-t4.csv index 66c738016..5ad387036 100644 --- a/core/src/main/resources/operatorsScore-databricks-azure-t4.csv +++ b/core/src/main/resources/operatorsScore-databricks-azure-t4.csv @@ -293,3 +293,4 @@ DecimalSum,1.5 MaxBy,1.5 MinBy,1.5 ArrayJoin,1.5 +RunningWindowFunctionExec,1.5 diff --git a/core/src/main/resources/operatorsScore-dataproc-gke-l4.csv b/core/src/main/resources/operatorsScore-dataproc-gke-l4.csv index 57fc5b44d..902e598a1 100644 --- a/core/src/main/resources/operatorsScore-dataproc-gke-l4.csv +++ b/core/src/main/resources/operatorsScore-dataproc-gke-l4.csv @@ -287,3 +287,4 @@ DecimalSum,1.5 MaxBy,1.5 MinBy,1.5 ArrayJoin,1.5 +RunningWindowFunctionExec,1.5 diff --git a/core/src/main/resources/operatorsScore-dataproc-gke-t4.csv b/core/src/main/resources/operatorsScore-dataproc-gke-t4.csv index 3459e64cb..e30f156f4 100644 --- a/core/src/main/resources/operatorsScore-dataproc-gke-t4.csv +++ b/core/src/main/resources/operatorsScore-dataproc-gke-t4.csv @@ -287,3 +287,4 @@ DecimalSum,1.5 MaxBy,1.5 MinBy,1.5 ArrayJoin,1.5 +RunningWindowFunctionExec,1.5 diff --git a/core/src/main/resources/operatorsScore-dataproc-l4.csv b/core/src/main/resources/operatorsScore-dataproc-l4.csv index 422020970..0660dbdee 100644 --- a/core/src/main/resources/operatorsScore-dataproc-l4.csv +++ b/core/src/main/resources/operatorsScore-dataproc-l4.csv @@ -293,3 +293,4 @@ DecimalSum,1.5 MaxBy,1.5 MinBy,1.5 ArrayJoin,1.5 +RunningWindowFunctionExec,1.5 diff --git a/core/src/main/resources/operatorsScore-dataproc-serverless-l4.csv b/core/src/main/resources/operatorsScore-dataproc-serverless-l4.csv index 61d9e3f1a..8dc9faa90 100644 --- a/core/src/main/resources/operatorsScore-dataproc-serverless-l4.csv +++ b/core/src/main/resources/operatorsScore-dataproc-serverless-l4.csv @@ -287,3 +287,4 @@ DecimalSum,1.5 MaxBy,1.5 MinBy,1.5 ArrayJoin,1.5 +RunningWindowFunctionExec,1.5 diff --git a/core/src/main/resources/operatorsScore-dataproc-t4.csv b/core/src/main/resources/operatorsScore-dataproc-t4.csv index 10ef53900..e2eb69f60 100644 --- a/core/src/main/resources/operatorsScore-dataproc-t4.csv +++ b/core/src/main/resources/operatorsScore-dataproc-t4.csv @@ -293,3 +293,4 @@ DecimalSum,1.5 MaxBy,1.5 MinBy,1.5 ArrayJoin,1.5 +RunningWindowFunctionExec,1.5 diff --git a/core/src/main/resources/operatorsScore-emr-a10.csv b/core/src/main/resources/operatorsScore-emr-a10.csv index 77befd12e..0d350be80 100644 --- a/core/src/main/resources/operatorsScore-emr-a10.csv +++ b/core/src/main/resources/operatorsScore-emr-a10.csv @@ -293,3 +293,4 @@ DecimalSum,1.5 MaxBy,1.5 MinBy,1.5 ArrayJoin,1.5 +RunningWindowFunctionExec,1.5 diff --git a/core/src/main/resources/operatorsScore-emr-a10G.csv b/core/src/main/resources/operatorsScore-emr-a10G.csv index 77befd12e..0d350be80 100644 --- a/core/src/main/resources/operatorsScore-emr-a10G.csv +++ b/core/src/main/resources/operatorsScore-emr-a10G.csv @@ -293,3 +293,4 @@ DecimalSum,1.5 MaxBy,1.5 MinBy,1.5 ArrayJoin,1.5 +RunningWindowFunctionExec,1.5 diff --git a/core/src/main/resources/operatorsScore-emr-t4.csv b/core/src/main/resources/operatorsScore-emr-t4.csv index 3f1296c38..c651cf976 100644 --- a/core/src/main/resources/operatorsScore-emr-t4.csv +++ b/core/src/main/resources/operatorsScore-emr-t4.csv @@ -293,3 +293,4 @@ DecimalSum,1.5 MaxBy,1.5 MinBy,1.5 ArrayJoin,1.5 +RunningWindowFunctionExec,1.5 diff --git a/core/src/main/resources/operatorsScore-onprem-a100.csv b/core/src/main/resources/operatorsScore-onprem-a100.csv index 4dc58f0c0..7cdd59978 100644 --- a/core/src/main/resources/operatorsScore-onprem-a100.csv +++ b/core/src/main/resources/operatorsScore-onprem-a100.csv @@ -305,3 +305,4 @@ DecimalSum,1.5 MaxBy,1.5 MinBy,1.5 ArrayJoin,1.5 +RunningWindowFunctionExec,1.5 diff --git a/core/src/main/resources/supportedExecs.csv b/core/src/main/resources/supportedExecs.csv index 06e35b026..50e47f42f 100644 --- a/core/src/main/resources/supportedExecs.csv +++ b/core/src/main/resources/supportedExecs.csv @@ -57,3 +57,4 @@ WriteFilesExec,S,None,Input/Output,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,S,S,S CustomShuffleReaderExec,S,None,Input/Output,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS WindowGroupLimitExec,S,None,Input/Output,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,PS,PS,PS,NS,NS,NS MapInArrowExec,S,None,Input/Output,S,S,S,S,S,S,S,S,PS,S,NS,NS,NS,NS,PS,NS,PS,NS,NS,NS +RunningWindowFunctionExec,S,None,Input/Output,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,PS,PS,PS,NS,NS,NS diff --git a/core/src/main/scala/com/nvidia/spark/rapids/tool/planparser/GenericExecParser.scala b/core/src/main/scala/com/nvidia/spark/rapids/tool/planparser/GenericExecParser.scala index 6295c5533..cc60904be 100644 --- a/core/src/main/scala/com/nvidia/spark/rapids/tool/planparser/GenericExecParser.scala +++ b/core/src/main/scala/com/nvidia/spark/rapids/tool/planparser/GenericExecParser.scala @@ -91,7 +91,8 @@ class GenericExecParser( ExecInfo( node, sqlID, - node.name, + // Remove trailing spaces from node name if any + node.name.trim, "", speedupFactor, duration, diff --git a/core/src/main/scala/com/nvidia/spark/rapids/tool/planparser/SQLPlanParser.scala b/core/src/main/scala/com/nvidia/spark/rapids/tool/planparser/SQLPlanParser.scala index 4d9c59dd8..8471e8a57 100644 --- a/core/src/main/scala/com/nvidia/spark/rapids/tool/planparser/SQLPlanParser.scala +++ b/core/src/main/scala/com/nvidia/spark/rapids/tool/planparser/SQLPlanParser.scala @@ -483,8 +483,8 @@ object SQLPlanParser extends Logging { case "AggregateInPandas" | "ArrowEvalPython" | "AQEShuffleRead" | "CartesianProduct" | "Coalesce" | "CollectLimit" | "CustomShuffleReader" | "FlatMapGroupsInPandas" | "GlobalLimit" | "LocalLimit" | "InMemoryTableScan" | "MapInPandas" - | "PythonMapInArrow" | "MapInArrow" | "Range" | "Sample" | "Union" - | "WindowInPandas" => + | "PythonMapInArrow" | "MapInArrow" | "Range" | "RunningWindowFunction" + | "Sample" | "Union" | "WindowInPandas" => GenericExecParser(node, checker, sqlID, app = Some(app)).parse case "BatchScan" => BatchScanExecParser(node, checker, sqlID, app).parse From aa59d200bcf5638b7712b745a54fce59b2cf58b2 Mon Sep 17 00:00:00 2001 From: Matt Ahrens Date: Mon, 16 Dec 2024 15:33:30 -0600 Subject: [PATCH 5/6] Adding Spark 3.5.2 support in auto tuner for EMR (#1466) Signed-off-by: mattahrens --- .../scala/com/nvidia/spark/rapids/tool/profiling/AutoTuner.scala | 1 + 1 file changed, 1 insertion(+) diff --git a/core/src/main/scala/com/nvidia/spark/rapids/tool/profiling/AutoTuner.scala b/core/src/main/scala/com/nvidia/spark/rapids/tool/profiling/AutoTuner.scala index 783726cd9..44ff839ba 100644 --- a/core/src/main/scala/com/nvidia/spark/rapids/tool/profiling/AutoTuner.scala +++ b/core/src/main/scala/com/nvidia/spark/rapids/tool/profiling/AutoTuner.scala @@ -760,6 +760,7 @@ class AutoTuner( } } else if (sparkVersion.contains("amzn")) { sparkVersion match { + case ver if ver.contains("3.5.2") => "352" case ver if ver.contains("3.5.1") => "351" case ver if ver.contains("3.5.0") => "350" case ver if ver.contains("3.4.1") => "341" From 4143cccefc06d6d5585b30b93e0c14f533b4e824 Mon Sep 17 00:00:00 2001 From: Partho Sarthi Date: Tue, 17 Dec 2024 11:17:23 -0800 Subject: [PATCH 6/6] Make platform mandatory for qualification and profiling CLI (#1463) Signed-off-by: Partho Sarthi --- .../spark_rapids_tools/cmdli/argprocessor.py | 9 +++ .../test_tool_argprocessor.py | 65 +++++++------------ 2 files changed, 32 insertions(+), 42 deletions(-) diff --git a/user_tools/src/spark_rapids_tools/cmdli/argprocessor.py b/user_tools/src/spark_rapids_tools/cmdli/argprocessor.py index 978f683c1..87b39c40e 100644 --- a/user_tools/src/spark_rapids_tools/cmdli/argprocessor.py +++ b/user_tools/src/spark_rapids_tools/cmdli/argprocessor.py @@ -400,6 +400,15 @@ def init_extra_arg_cases(self) -> list: def define_invalid_arg_cases(self) -> None: super().define_invalid_arg_cases() self.define_rejected_missing_eventlogs() + self.rejected['Missing Platform argument'] = { + 'valid': False, + 'callable': partial(self.raise_validation_exception, + 'Cannot run tool cmd without platform argument. Re-run the command ' + 'providing the platform argument.'), + 'cases': [ + [ArgValueCase.UNDEFINED, ArgValueCase.IGNORE, ArgValueCase.IGNORE] + ] + } self.rejected['Cluster By Name Without Platform Hints'] = { 'valid': False, 'callable': partial(self.raise_validation_exception, diff --git a/user_tools/tests/spark_rapids_tools_ut/test_tool_argprocessor.py b/user_tools/tests/spark_rapids_tools_ut/test_tool_argprocessor.py index 300751242..ff4bfff35 100644 --- a/user_tools/tests/spark_rapids_tools_ut/test_tool_argprocessor.py +++ b/user_tools/tests/spark_rapids_tools_ut/test_tool_argprocessor.py @@ -131,13 +131,9 @@ def test_with_platform_with_eventlogs(self, get_ut_data_dir, tool_name, csp): cost_savings_enabled=False, expected_platform=csp) - # should pass: platform not provided; event logs are provided - tool_args = self.create_tool_args_should_pass(tool_name, - eventlogs=f'{get_ut_data_dir}/eventlogs') - # for qualification, cost savings should be disabled because cluster is not provided - self.validate_tool_args(tool_name=tool_name, tool_args=tool_args, - cost_savings_enabled=False, - expected_platform=CspEnv.ONPREM) + # should fail: platform must be provided + self.create_tool_args_should_fail(tool_name, + eventlogs=f'{get_ut_data_dir}/eventlogs') @pytest.mark.parametrize('tool_name', ['qualification', 'profiling']) @pytest.mark.parametrize('csp', all_csps) @@ -150,17 +146,19 @@ def test_with_platform_with_eventlogs_with_jar_files(self, get_ut_data_dir, tool tools_jar=f'{get_ut_data_dir}/tools_mock.jar') assert tool_args['toolsJar'] == f'{get_ut_data_dir}/tools_mock.jar' - # should pass: tools_jar is correct - tool_args = self.create_tool_args_should_pass(tool_name, eventlogs=f'{get_ut_data_dir}/eventlogs', - tools_jar=f'{get_ut_data_dir}/tools_mock.jar') - assert tool_args['toolsJar'] == f'{get_ut_data_dir}/tools_mock.jar' + # should fail: platform must be provided + self.create_tool_args_should_fail(tool_name, + eventlogs=f'{get_ut_data_dir}/eventlogs', + tools_jar=f'{get_ut_data_dir}/tools_mock.jar') # should fail: tools_jar does not exist - self.create_tool_args_should_fail(tool_name, eventlogs=f'{get_ut_data_dir}/eventlogs', + self.create_tool_args_should_fail(tool_name, platform=csp, + eventlogs=f'{get_ut_data_dir}/eventlogs', tools_jar=f'{get_ut_data_dir}/tools_mock.txt') # should fail: tools_jar is not .jar extension - self.create_tool_args_should_fail(tool_name, eventlogs=f'{get_ut_data_dir}/eventlogs', + self.create_tool_args_should_fail(tool_name, platform=csp, + eventlogs=f'{get_ut_data_dir}/eventlogs', tools_jar=f'{get_ut_data_dir}/worker_info.yaml') @pytest.mark.parametrize('tool_name', ['qualification', 'profiling']) @@ -230,25 +228,15 @@ def test_with_platform_with_cluster_props(self, get_ut_data_dir, tool_name, csp, self.validate_tool_args(tool_name=tool_name, tool_args=tool_args, cost_savings_enabled=True, expected_platform=csp) - - # should pass: platform not provided; missing eventlogs should be accepted for all CSPs (except onPrem) - # because the eventlogs can be retrieved from the cluster properties - tool_args = self.create_tool_args_should_pass(tool_name, - cluster=cluster_prop_file) - # for qualification, cost savings should be enabled because cluster is provided - self.validate_tool_args(tool_name=tool_name, tool_args=tool_args, - cost_savings_enabled=True, - expected_platform=csp) else: # should fail: onprem platform cannot retrieve eventlogs from cluster properties self.create_tool_args_should_fail(tool_name, platform=csp, cluster=cluster_prop_file) - # should fail: platform not provided; defaults platform to onprem, cannot retrieve eventlogs from - # cluster properties - self.create_tool_args_should_fail(tool_name, - cluster=cluster_prop_file) + # should fail: platform must be provided for all CSPs as well as onprem + self.create_tool_args_should_fail(tool_name, + cluster=cluster_prop_file) @pytest.mark.parametrize('tool_name', ['qualification', 'profiling']) @pytest.mark.parametrize('csp,prop_path', all_cpu_cluster_props) @@ -266,14 +254,10 @@ def test_with_platform_with_cluster_props_with_eventlogs(self, get_ut_data_dir, cost_savings_enabled=CspEnv(csp) != CspEnv.ONPREM, expected_platform=csp) - # should pass: platform not provided; cluster properties and eventlogs are provided - tool_args = self.create_tool_args_should_pass(tool_name, - cluster=cluster_prop_file, - eventlogs=f'{get_ut_data_dir}/eventlogs') - # for qualification, cost savings should be enabled because cluster is provided (except for onprem) - self.validate_tool_args(tool_name=tool_name, tool_args=tool_args, - cost_savings_enabled=CspEnv(csp) != CspEnv.ONPREM, - expected_platform=csp) + # should fail: platform must be provided + self.create_tool_args_should_fail(tool_name, + cluster=cluster_prop_file, + eventlogs=f'{get_ut_data_dir}/eventlogs') @pytest.mark.parametrize('tool_name', ['profiling']) @pytest.mark.parametrize('csp', all_csps) @@ -308,18 +292,15 @@ def test_with_platform_with_autotuner_with_eventlogs(self, get_ut_data_dir, tool cost_savings_enabled=False, expected_platform=csp) - # should pass: platform not provided; autotuner properties and eventlogs are provided - tool_args = self.create_tool_args_should_pass(tool_name, - cluster=autotuner_prop_file, - eventlogs=f'{get_ut_data_dir}/eventlogs') - # cost savings should be disabled for profiling - self.validate_tool_args(tool_name=tool_name, tool_args=tool_args, - cost_savings_enabled=False, - expected_platform=CspEnv.ONPREM) + # should fail: platform must be provided + self.create_tool_args_should_fail(tool_name, + cluster=autotuner_prop_file, + eventlogs=f'{get_ut_data_dir}/eventlogs') @pytest.mark.parametrize('prop_path', [autotuner_prop_path]) def test_profiler_with_driverlog(self, get_ut_data_dir, prop_path): prof_args = AbsToolUserArgModel.create_tool_args('profiling', + platform=CspEnv.get_default(), driverlog=f'{get_ut_data_dir}/{prop_path}') assert not prof_args['requiresEventlogs'] assert prof_args['rapidOptions']['driverlog'] == f'{get_ut_data_dir}/{prop_path}'