diff --git a/user_tools/src/spark_rapids_pytools/rapids/qualification.py b/user_tools/src/spark_rapids_pytools/rapids/qualification.py index 1e7f1dd89..c9a910955 100644 --- a/user_tools/src/spark_rapids_pytools/rapids/qualification.py +++ b/user_tools/src/spark_rapids_pytools/rapids/qualification.py @@ -787,9 +787,8 @@ def _infer_clusters_for_apps(self, cluster_info_df: pd.DataFrame) -> None: if self.ctxt.get_ctxt('cpuClusterProxy') is not None or not self.ctxt.platform.cluster_inference_supported: self.logger.info('CPU cluster is already set. Skipping cluster inference.') return - cpu_cluster_cols = ['Num Executor Nodes', 'Executor Instance', 'Cores Per Executor'] - gpu_cluster_cols = ['Recommended Num Executor Nodes', 'Recommended Executor Instance', - 'Recommended Cores Per Executor'] + cpu_cluster_cols = self.ctxt.get_value('local', 'output', 'clusterInference', 'cpuClusterColumns') + gpu_cluster_cols = self.ctxt.get_value('local', 'output', 'clusterInference', 'gpuClusterColumns') # == Infer CPU clusters per app == # Drop GPU/Recommended columns to infer the CPU cluster information cpu_cluster_df = cluster_info_df.drop(columns=gpu_cluster_cols, errors='ignore') diff --git a/user_tools/src/spark_rapids_pytools/resources/qualification-conf.yaml b/user_tools/src/spark_rapids_pytools/resources/qualification-conf.yaml index 49ee8fe18..0a65f486c 100644 --- a/user_tools/src/spark_rapids_pytools/resources/qualification-conf.yaml +++ b/user_tools/src/spark_rapids_pytools/resources/qualification-conf.yaml @@ -341,6 +341,16 @@ local: dstCol: 'Estimated GPU Speedup' - srcCol: 'appDuration_pred' dstCol: 'Estimated GPU Duration' + clusterInference: + cpuClusterColumns: + - 'Num Executor Nodes' + - 'Executor Instance' + - 'Cores Per Executor' + gpuClusterColumns: + - 'Recommended Num Executor Nodes' + - 'Recommended Executor Instance' + - 'Recommended Cores Per Executor' + platform: shortName: 'qual' outputDir: qual-tool-output