From 8df1407d6fa3f3193890b26b342ccafcba7b4e1e Mon Sep 17 00:00:00 2001
From: Ahmed Hussein <50450311+amahussein@users.noreply.github.com>
Date: Thu, 12 Dec 2024 14:54:50 -0600
Subject: [PATCH 1/6] Fix dataframe handling of column-types (#1458)
Signed-off-by: Ahmed Hussein (amahussein)
Fixes #1456
---
.../tools/qualification_stats_report.py | 15 ++++++++++++---
.../spark_rapids_tools/tools/qualx/preprocess.py | 4 ++--
2 files changed, 14 insertions(+), 5 deletions(-)
diff --git a/user_tools/src/spark_rapids_tools/tools/qualification_stats_report.py b/user_tools/src/spark_rapids_tools/tools/qualification_stats_report.py
index de762013d..aedda60f7 100644
--- a/user_tools/src/spark_rapids_tools/tools/qualification_stats_report.py
+++ b/user_tools/src/spark_rapids_tools/tools/qualification_stats_report.py
@@ -75,7 +75,10 @@ def _read_csv_files(self) -> None:
'toolOutput', 'csv', 'unsupportedOperatorsReport', 'fileName')
rapids_unsupported_operators_file = FSUtil.build_path(
qual_output_dir, unsupported_operator_report_file)
- self.unsupported_operators_df = pd.read_csv(rapids_unsupported_operators_file)
+ # load the unsupported operators and drop operators that have no names.
+ self.unsupported_operators_df = (
+ pd.read_csv(rapids_unsupported_operators_file,
+ dtype={'Unsupported Operator': str})).dropna(subset=['Unsupported Operator'])
stages_report_file = self.ctxt.get_value('toolOutput', 'csv', 'stagesInformation',
'fileName')
@@ -84,7 +87,14 @@ def _read_csv_files(self) -> None:
rapids_execs_file = self.ctxt.get_value('toolOutput', 'csv', 'execsInformation',
'fileName')
- self.execs_df = pd.read_csv(FSUtil.build_path(qual_output_dir, rapids_execs_file))
+ # Load the execs CSV file and drop execs that have no stages or name
+ self.execs_df = (
+ pd.read_csv(FSUtil.build_path(qual_output_dir, rapids_execs_file),
+ dtype={'Exec Name': str,
+ 'Exec Stages': str,
+ 'Exec Children': str,
+ 'Exec Children Node Ids': str})
+ .dropna(subset=['Exec Stages', 'Exec Name']))
self.logger.info('Reading CSV files completed.')
def _convert_durations(self) -> None:
@@ -103,7 +113,6 @@ def _preprocess_dataframes(self) -> None:
# from this dataframe can be matched with the stageID of stages dataframe
self.execs_df['Exec Stages'] = self.execs_df['Exec Stages'].str.split(':')
self.execs_df = (self.execs_df.explode('Exec Stages').
- dropna(subset=['Exec Stages']).
rename(columns={'Exec Stages': 'Stage ID'}))
self.execs_df['Stage ID'] = self.execs_df['Stage ID'].astype(int)
diff --git a/user_tools/src/spark_rapids_tools/tools/qualx/preprocess.py b/user_tools/src/spark_rapids_tools/tools/qualx/preprocess.py
index 1bdbd76b7..a47b45d73 100644
--- a/user_tools/src/spark_rapids_tools/tools/qualx/preprocess.py
+++ b/user_tools/src/spark_rapids_tools/tools/qualx/preprocess.py
@@ -461,7 +461,7 @@ def combine_tables(table_name: str) -> pd.DataFrame:
# normalize WholeStageCodegen labels
ops_tbl.loc[
- ops_tbl['nodeName'].str.startswith('WholeStageCodegen'), 'nodeName'
+ ops_tbl['nodeName'].astype(str).str.startswith('WholeStageCodegen'), 'nodeName'
] = 'WholeStageCodegen'
# format WholeStageCodegen for merging
@@ -1140,7 +1140,7 @@ def _is_ignore_no_perf(action: str) -> bool:
node_level_supp['Exec Is Supported'] = (
node_level_supp['Exec Is Supported']
| node_level_supp['Action'].apply(_is_ignore_no_perf)
- | node_level_supp['Exec Name'].apply(
+ | node_level_supp['Exec Name'].astype(str).apply(
lambda x: x.startswith('WholeStageCodegen')
)
)
From 75760a9044387027988c73731cc17f61122a3ea7 Mon Sep 17 00:00:00 2001
From: spark-rapids automation <70000568+nvauto@users.noreply.github.com>
Date: Fri, 13 Dec 2024 01:57:42 +0000
Subject: [PATCH 2/6] Update dev-version by
jenkins-spark-rapids-tools-auto-release-99
Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com>
---
core/pom.xml | 2 +-
user_tools/src/spark_rapids_pytools/__init__.py | 2 +-
2 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/core/pom.xml b/core/pom.xml
index cde7561fb..7ae043493 100644
--- a/core/pom.xml
+++ b/core/pom.xml
@@ -23,7 +23,7 @@
rapids-4-spark-tools_2.12
RAPIDS Accelerator for Apache Spark tools
RAPIDS Accelerator for Apache Spark tools
- 24.10.3-SNAPSHOT
+ 24.10.4-SNAPSHOT
jar
http://github.com/NVIDIA/spark-rapids-tools
diff --git a/user_tools/src/spark_rapids_pytools/__init__.py b/user_tools/src/spark_rapids_pytools/__init__.py
index f4ec6f064..13d08a4dd 100644
--- a/user_tools/src/spark_rapids_pytools/__init__.py
+++ b/user_tools/src/spark_rapids_pytools/__init__.py
@@ -16,7 +16,7 @@
from spark_rapids_pytools.build import get_version, get_spark_dep_version
-VERSION = '24.10.3'
+VERSION = '24.10.4'
# defines the default runtime build version for the user tools environment
SPARK_DEP_VERSION = '350'
__version__ = get_version(VERSION)
From a1f866fc4361124aae8ea9e1b3542cb2e4b8feb1 Mon Sep 17 00:00:00 2001
From: Ahmed Hussein <50450311+amahussein@users.noreply.github.com>
Date: Mon, 16 Dec 2024 09:20:02 -0600
Subject: [PATCH 3/6] Deduplicate calls to aggregateSparkMetricsBySql (#1464)
Signed-off-by: Ahmed Hussein (amahussein)
Contributes to #1461
AppSparkMetricsAnalyzer was calling `aggregateSparkMetricsBySql` twice.
This code change eleiminates this redundancy to save CPU time and memory
allocations.
---
.../spark/rapids/tool/analysis/AppSparkMetricsAggTrait.scala | 5 +++--
1 file changed, 3 insertions(+), 2 deletions(-)
diff --git a/core/src/main/scala/com/nvidia/spark/rapids/tool/analysis/AppSparkMetricsAggTrait.scala b/core/src/main/scala/com/nvidia/spark/rapids/tool/analysis/AppSparkMetricsAggTrait.scala
index 0f43ae8b2..30fb10ac9 100644
--- a/core/src/main/scala/com/nvidia/spark/rapids/tool/analysis/AppSparkMetricsAggTrait.scala
+++ b/core/src/main/scala/com/nvidia/spark/rapids/tool/analysis/AppSparkMetricsAggTrait.scala
@@ -35,12 +35,13 @@ trait AppSparkMetricsAggTrait extends AppIndexMapperTrait {
def getAggRawMetrics(app: AppBase, index: Int, sqlAnalyzer: Option[AppSQLPlanAnalyzer] = None):
AggRawMetricsResult = {
val analysisObj = new AppSparkMetricsAnalyzer(app)
+ val sqlMetricsAgg = analysisObj.aggregateSparkMetricsBySql(index)
AggRawMetricsResult(
analysisObj.aggregateSparkMetricsByJob(index),
analysisObj.aggregateSparkMetricsByStage(index),
analysisObj.shuffleSkewCheck(index),
- analysisObj.aggregateSparkMetricsBySql(index),
- analysisObj.aggregateIOMetricsBySql(analysisObj.aggregateSparkMetricsBySql(index)),
+ sqlMetricsAgg,
+ analysisObj.aggregateIOMetricsBySql(sqlMetricsAgg),
analysisObj.aggregateDurationAndCPUTimeBySql(index),
Seq(analysisObj.maxTaskInputSizeBytesPerSQL(index)),
analysisObj.aggregateDiagnosticMetricsByStage(index, sqlAnalyzer))
From 9564d0b400ff49b1c39ce52ab6f508641c2b4d74 Mon Sep 17 00:00:00 2001
From: Ahmed Hussein <50450311+amahussein@users.noreply.github.com>
Date: Mon, 16 Dec 2024 09:23:19 -0600
Subject: [PATCH 4/6] Mark RunningWindowFunction as supported in Qual tool
(#1465)
Signed-off-by: Ahmed Hussein (amahussein)
Fix #1460
`RunningWindowFunction` should be marked as supported by the
qualification tool.
---
.../src/main/resources/operatorsScore-databricks-aws-a10G.csv | 1 +
core/src/main/resources/operatorsScore-databricks-aws-t4.csv | 1 +
.../src/main/resources/operatorsScore-databricks-azure-t4.csv | 1 +
core/src/main/resources/operatorsScore-dataproc-gke-l4.csv | 1 +
core/src/main/resources/operatorsScore-dataproc-gke-t4.csv | 1 +
core/src/main/resources/operatorsScore-dataproc-l4.csv | 1 +
.../main/resources/operatorsScore-dataproc-serverless-l4.csv | 1 +
core/src/main/resources/operatorsScore-dataproc-t4.csv | 1 +
core/src/main/resources/operatorsScore-emr-a10.csv | 1 +
core/src/main/resources/operatorsScore-emr-a10G.csv | 1 +
core/src/main/resources/operatorsScore-emr-t4.csv | 1 +
core/src/main/resources/operatorsScore-onprem-a100.csv | 1 +
core/src/main/resources/supportedExecs.csv | 1 +
.../spark/rapids/tool/planparser/GenericExecParser.scala | 3 ++-
.../nvidia/spark/rapids/tool/planparser/SQLPlanParser.scala | 4 ++--
15 files changed, 17 insertions(+), 3 deletions(-)
diff --git a/core/src/main/resources/operatorsScore-databricks-aws-a10G.csv b/core/src/main/resources/operatorsScore-databricks-aws-a10G.csv
index e5096e1a9..52eb193f2 100644
--- a/core/src/main/resources/operatorsScore-databricks-aws-a10G.csv
+++ b/core/src/main/resources/operatorsScore-databricks-aws-a10G.csv
@@ -305,3 +305,4 @@ DecimalSum,1.5
MaxBy,1.5
MinBy,1.5
ArrayJoin,1.5
+RunningWindowFunctionExec,1.5
diff --git a/core/src/main/resources/operatorsScore-databricks-aws-t4.csv b/core/src/main/resources/operatorsScore-databricks-aws-t4.csv
index e5096e1a9..52eb193f2 100644
--- a/core/src/main/resources/operatorsScore-databricks-aws-t4.csv
+++ b/core/src/main/resources/operatorsScore-databricks-aws-t4.csv
@@ -305,3 +305,4 @@ DecimalSum,1.5
MaxBy,1.5
MinBy,1.5
ArrayJoin,1.5
+RunningWindowFunctionExec,1.5
diff --git a/core/src/main/resources/operatorsScore-databricks-azure-t4.csv b/core/src/main/resources/operatorsScore-databricks-azure-t4.csv
index 66c738016..5ad387036 100644
--- a/core/src/main/resources/operatorsScore-databricks-azure-t4.csv
+++ b/core/src/main/resources/operatorsScore-databricks-azure-t4.csv
@@ -293,3 +293,4 @@ DecimalSum,1.5
MaxBy,1.5
MinBy,1.5
ArrayJoin,1.5
+RunningWindowFunctionExec,1.5
diff --git a/core/src/main/resources/operatorsScore-dataproc-gke-l4.csv b/core/src/main/resources/operatorsScore-dataproc-gke-l4.csv
index 57fc5b44d..902e598a1 100644
--- a/core/src/main/resources/operatorsScore-dataproc-gke-l4.csv
+++ b/core/src/main/resources/operatorsScore-dataproc-gke-l4.csv
@@ -287,3 +287,4 @@ DecimalSum,1.5
MaxBy,1.5
MinBy,1.5
ArrayJoin,1.5
+RunningWindowFunctionExec,1.5
diff --git a/core/src/main/resources/operatorsScore-dataproc-gke-t4.csv b/core/src/main/resources/operatorsScore-dataproc-gke-t4.csv
index 3459e64cb..e30f156f4 100644
--- a/core/src/main/resources/operatorsScore-dataproc-gke-t4.csv
+++ b/core/src/main/resources/operatorsScore-dataproc-gke-t4.csv
@@ -287,3 +287,4 @@ DecimalSum,1.5
MaxBy,1.5
MinBy,1.5
ArrayJoin,1.5
+RunningWindowFunctionExec,1.5
diff --git a/core/src/main/resources/operatorsScore-dataproc-l4.csv b/core/src/main/resources/operatorsScore-dataproc-l4.csv
index 422020970..0660dbdee 100644
--- a/core/src/main/resources/operatorsScore-dataproc-l4.csv
+++ b/core/src/main/resources/operatorsScore-dataproc-l4.csv
@@ -293,3 +293,4 @@ DecimalSum,1.5
MaxBy,1.5
MinBy,1.5
ArrayJoin,1.5
+RunningWindowFunctionExec,1.5
diff --git a/core/src/main/resources/operatorsScore-dataproc-serverless-l4.csv b/core/src/main/resources/operatorsScore-dataproc-serverless-l4.csv
index 61d9e3f1a..8dc9faa90 100644
--- a/core/src/main/resources/operatorsScore-dataproc-serverless-l4.csv
+++ b/core/src/main/resources/operatorsScore-dataproc-serverless-l4.csv
@@ -287,3 +287,4 @@ DecimalSum,1.5
MaxBy,1.5
MinBy,1.5
ArrayJoin,1.5
+RunningWindowFunctionExec,1.5
diff --git a/core/src/main/resources/operatorsScore-dataproc-t4.csv b/core/src/main/resources/operatorsScore-dataproc-t4.csv
index 10ef53900..e2eb69f60 100644
--- a/core/src/main/resources/operatorsScore-dataproc-t4.csv
+++ b/core/src/main/resources/operatorsScore-dataproc-t4.csv
@@ -293,3 +293,4 @@ DecimalSum,1.5
MaxBy,1.5
MinBy,1.5
ArrayJoin,1.5
+RunningWindowFunctionExec,1.5
diff --git a/core/src/main/resources/operatorsScore-emr-a10.csv b/core/src/main/resources/operatorsScore-emr-a10.csv
index 77befd12e..0d350be80 100644
--- a/core/src/main/resources/operatorsScore-emr-a10.csv
+++ b/core/src/main/resources/operatorsScore-emr-a10.csv
@@ -293,3 +293,4 @@ DecimalSum,1.5
MaxBy,1.5
MinBy,1.5
ArrayJoin,1.5
+RunningWindowFunctionExec,1.5
diff --git a/core/src/main/resources/operatorsScore-emr-a10G.csv b/core/src/main/resources/operatorsScore-emr-a10G.csv
index 77befd12e..0d350be80 100644
--- a/core/src/main/resources/operatorsScore-emr-a10G.csv
+++ b/core/src/main/resources/operatorsScore-emr-a10G.csv
@@ -293,3 +293,4 @@ DecimalSum,1.5
MaxBy,1.5
MinBy,1.5
ArrayJoin,1.5
+RunningWindowFunctionExec,1.5
diff --git a/core/src/main/resources/operatorsScore-emr-t4.csv b/core/src/main/resources/operatorsScore-emr-t4.csv
index 3f1296c38..c651cf976 100644
--- a/core/src/main/resources/operatorsScore-emr-t4.csv
+++ b/core/src/main/resources/operatorsScore-emr-t4.csv
@@ -293,3 +293,4 @@ DecimalSum,1.5
MaxBy,1.5
MinBy,1.5
ArrayJoin,1.5
+RunningWindowFunctionExec,1.5
diff --git a/core/src/main/resources/operatorsScore-onprem-a100.csv b/core/src/main/resources/operatorsScore-onprem-a100.csv
index 4dc58f0c0..7cdd59978 100644
--- a/core/src/main/resources/operatorsScore-onprem-a100.csv
+++ b/core/src/main/resources/operatorsScore-onprem-a100.csv
@@ -305,3 +305,4 @@ DecimalSum,1.5
MaxBy,1.5
MinBy,1.5
ArrayJoin,1.5
+RunningWindowFunctionExec,1.5
diff --git a/core/src/main/resources/supportedExecs.csv b/core/src/main/resources/supportedExecs.csv
index 06e35b026..50e47f42f 100644
--- a/core/src/main/resources/supportedExecs.csv
+++ b/core/src/main/resources/supportedExecs.csv
@@ -57,3 +57,4 @@ WriteFilesExec,S,None,Input/Output,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,S,S,S
CustomShuffleReaderExec,S,None,Input/Output,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS
WindowGroupLimitExec,S,None,Input/Output,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,PS,PS,PS,NS,NS,NS
MapInArrowExec,S,None,Input/Output,S,S,S,S,S,S,S,S,PS,S,NS,NS,NS,NS,PS,NS,PS,NS,NS,NS
+RunningWindowFunctionExec,S,None,Input/Output,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,PS,PS,PS,NS,NS,NS
diff --git a/core/src/main/scala/com/nvidia/spark/rapids/tool/planparser/GenericExecParser.scala b/core/src/main/scala/com/nvidia/spark/rapids/tool/planparser/GenericExecParser.scala
index 6295c5533..cc60904be 100644
--- a/core/src/main/scala/com/nvidia/spark/rapids/tool/planparser/GenericExecParser.scala
+++ b/core/src/main/scala/com/nvidia/spark/rapids/tool/planparser/GenericExecParser.scala
@@ -91,7 +91,8 @@ class GenericExecParser(
ExecInfo(
node,
sqlID,
- node.name,
+ // Remove trailing spaces from node name if any
+ node.name.trim,
"",
speedupFactor,
duration,
diff --git a/core/src/main/scala/com/nvidia/spark/rapids/tool/planparser/SQLPlanParser.scala b/core/src/main/scala/com/nvidia/spark/rapids/tool/planparser/SQLPlanParser.scala
index 4d9c59dd8..8471e8a57 100644
--- a/core/src/main/scala/com/nvidia/spark/rapids/tool/planparser/SQLPlanParser.scala
+++ b/core/src/main/scala/com/nvidia/spark/rapids/tool/planparser/SQLPlanParser.scala
@@ -483,8 +483,8 @@ object SQLPlanParser extends Logging {
case "AggregateInPandas" | "ArrowEvalPython" | "AQEShuffleRead" | "CartesianProduct"
| "Coalesce" | "CollectLimit" | "CustomShuffleReader" | "FlatMapGroupsInPandas"
| "GlobalLimit" | "LocalLimit" | "InMemoryTableScan" | "MapInPandas"
- | "PythonMapInArrow" | "MapInArrow" | "Range" | "Sample" | "Union"
- | "WindowInPandas" =>
+ | "PythonMapInArrow" | "MapInArrow" | "Range" | "RunningWindowFunction"
+ | "Sample" | "Union" | "WindowInPandas" =>
GenericExecParser(node, checker, sqlID, app = Some(app)).parse
case "BatchScan" =>
BatchScanExecParser(node, checker, sqlID, app).parse
From aa59d200bcf5638b7712b745a54fce59b2cf58b2 Mon Sep 17 00:00:00 2001
From: Matt Ahrens
Date: Mon, 16 Dec 2024 15:33:30 -0600
Subject: [PATCH 5/6] Adding Spark 3.5.2 support in auto tuner for EMR (#1466)
Signed-off-by: mattahrens
---
.../scala/com/nvidia/spark/rapids/tool/profiling/AutoTuner.scala | 1 +
1 file changed, 1 insertion(+)
diff --git a/core/src/main/scala/com/nvidia/spark/rapids/tool/profiling/AutoTuner.scala b/core/src/main/scala/com/nvidia/spark/rapids/tool/profiling/AutoTuner.scala
index 783726cd9..44ff839ba 100644
--- a/core/src/main/scala/com/nvidia/spark/rapids/tool/profiling/AutoTuner.scala
+++ b/core/src/main/scala/com/nvidia/spark/rapids/tool/profiling/AutoTuner.scala
@@ -760,6 +760,7 @@ class AutoTuner(
}
} else if (sparkVersion.contains("amzn")) {
sparkVersion match {
+ case ver if ver.contains("3.5.2") => "352"
case ver if ver.contains("3.5.1") => "351"
case ver if ver.contains("3.5.0") => "350"
case ver if ver.contains("3.4.1") => "341"
From 4143cccefc06d6d5585b30b93e0c14f533b4e824 Mon Sep 17 00:00:00 2001
From: Partho Sarthi
Date: Tue, 17 Dec 2024 11:17:23 -0800
Subject: [PATCH 6/6] Make platform mandatory for qualification and profiling
CLI (#1463)
Signed-off-by: Partho Sarthi
---
.../spark_rapids_tools/cmdli/argprocessor.py | 9 +++
.../test_tool_argprocessor.py | 65 +++++++------------
2 files changed, 32 insertions(+), 42 deletions(-)
diff --git a/user_tools/src/spark_rapids_tools/cmdli/argprocessor.py b/user_tools/src/spark_rapids_tools/cmdli/argprocessor.py
index 978f683c1..87b39c40e 100644
--- a/user_tools/src/spark_rapids_tools/cmdli/argprocessor.py
+++ b/user_tools/src/spark_rapids_tools/cmdli/argprocessor.py
@@ -400,6 +400,15 @@ def init_extra_arg_cases(self) -> list:
def define_invalid_arg_cases(self) -> None:
super().define_invalid_arg_cases()
self.define_rejected_missing_eventlogs()
+ self.rejected['Missing Platform argument'] = {
+ 'valid': False,
+ 'callable': partial(self.raise_validation_exception,
+ 'Cannot run tool cmd without platform argument. Re-run the command '
+ 'providing the platform argument.'),
+ 'cases': [
+ [ArgValueCase.UNDEFINED, ArgValueCase.IGNORE, ArgValueCase.IGNORE]
+ ]
+ }
self.rejected['Cluster By Name Without Platform Hints'] = {
'valid': False,
'callable': partial(self.raise_validation_exception,
diff --git a/user_tools/tests/spark_rapids_tools_ut/test_tool_argprocessor.py b/user_tools/tests/spark_rapids_tools_ut/test_tool_argprocessor.py
index 300751242..ff4bfff35 100644
--- a/user_tools/tests/spark_rapids_tools_ut/test_tool_argprocessor.py
+++ b/user_tools/tests/spark_rapids_tools_ut/test_tool_argprocessor.py
@@ -131,13 +131,9 @@ def test_with_platform_with_eventlogs(self, get_ut_data_dir, tool_name, csp):
cost_savings_enabled=False,
expected_platform=csp)
- # should pass: platform not provided; event logs are provided
- tool_args = self.create_tool_args_should_pass(tool_name,
- eventlogs=f'{get_ut_data_dir}/eventlogs')
- # for qualification, cost savings should be disabled because cluster is not provided
- self.validate_tool_args(tool_name=tool_name, tool_args=tool_args,
- cost_savings_enabled=False,
- expected_platform=CspEnv.ONPREM)
+ # should fail: platform must be provided
+ self.create_tool_args_should_fail(tool_name,
+ eventlogs=f'{get_ut_data_dir}/eventlogs')
@pytest.mark.parametrize('tool_name', ['qualification', 'profiling'])
@pytest.mark.parametrize('csp', all_csps)
@@ -150,17 +146,19 @@ def test_with_platform_with_eventlogs_with_jar_files(self, get_ut_data_dir, tool
tools_jar=f'{get_ut_data_dir}/tools_mock.jar')
assert tool_args['toolsJar'] == f'{get_ut_data_dir}/tools_mock.jar'
- # should pass: tools_jar is correct
- tool_args = self.create_tool_args_should_pass(tool_name, eventlogs=f'{get_ut_data_dir}/eventlogs',
- tools_jar=f'{get_ut_data_dir}/tools_mock.jar')
- assert tool_args['toolsJar'] == f'{get_ut_data_dir}/tools_mock.jar'
+ # should fail: platform must be provided
+ self.create_tool_args_should_fail(tool_name,
+ eventlogs=f'{get_ut_data_dir}/eventlogs',
+ tools_jar=f'{get_ut_data_dir}/tools_mock.jar')
# should fail: tools_jar does not exist
- self.create_tool_args_should_fail(tool_name, eventlogs=f'{get_ut_data_dir}/eventlogs',
+ self.create_tool_args_should_fail(tool_name, platform=csp,
+ eventlogs=f'{get_ut_data_dir}/eventlogs',
tools_jar=f'{get_ut_data_dir}/tools_mock.txt')
# should fail: tools_jar is not .jar extension
- self.create_tool_args_should_fail(tool_name, eventlogs=f'{get_ut_data_dir}/eventlogs',
+ self.create_tool_args_should_fail(tool_name, platform=csp,
+ eventlogs=f'{get_ut_data_dir}/eventlogs',
tools_jar=f'{get_ut_data_dir}/worker_info.yaml')
@pytest.mark.parametrize('tool_name', ['qualification', 'profiling'])
@@ -230,25 +228,15 @@ def test_with_platform_with_cluster_props(self, get_ut_data_dir, tool_name, csp,
self.validate_tool_args(tool_name=tool_name, tool_args=tool_args,
cost_savings_enabled=True,
expected_platform=csp)
-
- # should pass: platform not provided; missing eventlogs should be accepted for all CSPs (except onPrem)
- # because the eventlogs can be retrieved from the cluster properties
- tool_args = self.create_tool_args_should_pass(tool_name,
- cluster=cluster_prop_file)
- # for qualification, cost savings should be enabled because cluster is provided
- self.validate_tool_args(tool_name=tool_name, tool_args=tool_args,
- cost_savings_enabled=True,
- expected_platform=csp)
else:
# should fail: onprem platform cannot retrieve eventlogs from cluster properties
self.create_tool_args_should_fail(tool_name,
platform=csp,
cluster=cluster_prop_file)
- # should fail: platform not provided; defaults platform to onprem, cannot retrieve eventlogs from
- # cluster properties
- self.create_tool_args_should_fail(tool_name,
- cluster=cluster_prop_file)
+ # should fail: platform must be provided for all CSPs as well as onprem
+ self.create_tool_args_should_fail(tool_name,
+ cluster=cluster_prop_file)
@pytest.mark.parametrize('tool_name', ['qualification', 'profiling'])
@pytest.mark.parametrize('csp,prop_path', all_cpu_cluster_props)
@@ -266,14 +254,10 @@ def test_with_platform_with_cluster_props_with_eventlogs(self, get_ut_data_dir,
cost_savings_enabled=CspEnv(csp) != CspEnv.ONPREM,
expected_platform=csp)
- # should pass: platform not provided; cluster properties and eventlogs are provided
- tool_args = self.create_tool_args_should_pass(tool_name,
- cluster=cluster_prop_file,
- eventlogs=f'{get_ut_data_dir}/eventlogs')
- # for qualification, cost savings should be enabled because cluster is provided (except for onprem)
- self.validate_tool_args(tool_name=tool_name, tool_args=tool_args,
- cost_savings_enabled=CspEnv(csp) != CspEnv.ONPREM,
- expected_platform=csp)
+ # should fail: platform must be provided
+ self.create_tool_args_should_fail(tool_name,
+ cluster=cluster_prop_file,
+ eventlogs=f'{get_ut_data_dir}/eventlogs')
@pytest.mark.parametrize('tool_name', ['profiling'])
@pytest.mark.parametrize('csp', all_csps)
@@ -308,18 +292,15 @@ def test_with_platform_with_autotuner_with_eventlogs(self, get_ut_data_dir, tool
cost_savings_enabled=False,
expected_platform=csp)
- # should pass: platform not provided; autotuner properties and eventlogs are provided
- tool_args = self.create_tool_args_should_pass(tool_name,
- cluster=autotuner_prop_file,
- eventlogs=f'{get_ut_data_dir}/eventlogs')
- # cost savings should be disabled for profiling
- self.validate_tool_args(tool_name=tool_name, tool_args=tool_args,
- cost_savings_enabled=False,
- expected_platform=CspEnv.ONPREM)
+ # should fail: platform must be provided
+ self.create_tool_args_should_fail(tool_name,
+ cluster=autotuner_prop_file,
+ eventlogs=f'{get_ut_data_dir}/eventlogs')
@pytest.mark.parametrize('prop_path', [autotuner_prop_path])
def test_profiler_with_driverlog(self, get_ut_data_dir, prop_path):
prof_args = AbsToolUserArgModel.create_tool_args('profiling',
+ platform=CspEnv.get_default(),
driverlog=f'{get_ut_data_dir}/{prop_path}')
assert not prof_args['requiresEventlogs']
assert prof_args['rapidOptions']['driverlog'] == f'{get_ut_data_dir}/{prop_path}'