diff --git a/tests/ttnn/unit_tests/operations/ccl/perf/perf_csv.py b/tests/ttnn/unit_tests/operations/ccl/perf/perf_csv.py index f7b8acef65b8..ba79b6e3c17f 100644 --- a/tests/ttnn/unit_tests/operations/ccl/perf/perf_csv.py +++ b/tests/ttnn/unit_tests/operations/ccl/perf/perf_csv.py @@ -1,15 +1,14 @@ # SPDX-FileCopyrightText: © 2023 Tenstorrent Inc. + # SPDX-License-Identifier: Apache-2.0 import pandas as pd import os -from tabulate import tabulate import re def perf_report(original_file_path): df = pd.read_csv(original_file_path) - filtered_df = df[ [ "ATTRIBUTES", @@ -25,7 +24,6 @@ def perf_report(original_file_path): "OP TO OP LATENCY [ns]", "DEVICE FW DURATION [ns]", "DEVICE KERNEL DURATION [ns]", - "PM BANDWIDTH [ns]", ] ].copy() @@ -43,49 +41,33 @@ def perf_report(original_file_path): def clean_attributes(attributes): attributes_list = attributes.split(";") - filtered_attributes = [] - for attr in attributes_list: - attr = attr.strip() - if not any(field in attr for field in fields_to_remove): - filtered_attributes.append(attr) + filtered_attributes = [ + attr.strip() for attr in attributes_list if not any(field in attr for field in fields_to_remove) + ] return "; ".join(filtered_attributes).strip("; ") filtered_df["ATTRIBUTES"] = filtered_df["ATTRIBUTES"].apply(clean_attributes) + filtered_df["ATTRIBUTES_BACKUP"] = filtered_df["ATTRIBUTES"] filtered_df["Input Shape"] = filtered_df.apply( lambda row: f"[{row['INPUT_0_W']}, {row['INPUT_0_Z']}, {row['INPUT_0_Y']}, {row['INPUT_0_X']}]", axis=1 ) - filtered_df["Cycles Count"] = filtered_df["DEVICE FW END CYCLE"] - filtered_df["DEVICE FW START CYCLE"] def split_attributes(attributes): - attr_dict = {} - matches = re.findall(r"'([^']+)':\s*'([^']+)'", attributes) - for key, value in matches: - attr_dict[key.strip()] = value.strip() - - topology = attr_dict.get("topology", None) - if topology: - topology = topology.split("::")[-1] - attr_dict["topology"] = topology - + attr_dict = {key: value for key, value in re.findall(r"'([^']+)':\s*'([^']+)'", attributes)} + attr_dict["topology"] = attr_dict.get("topology", "").split("::")[-1] return pd.Series( - { - "dim": attr_dict.get("dim", None), - "num_links": attr_dict.get("num_links", None), - "topology": attr_dict.get("topology", None), - } + {"dim": attr_dict.get("dim"), "num_links": attr_dict.get("num_links"), "topology": attr_dict["topology"]} ) split_attrs_df = filtered_df["ATTRIBUTES"].apply(split_attributes) - filtered_df = pd.concat([filtered_df, split_attrs_df], axis=1) - filtered_df.drop(columns=["ATTRIBUTES", "INPUT_0_W", "INPUT_0_Z", "INPUT_0_Y", "INPUT_0_X"], inplace=True) filtered_df.rename(columns={"INPUT_0_LAYOUT": "Layout", "INPUT_0_DATATYPE": "Data Type"}, inplace=True) - new_order = [ + "ATTRIBUTES_BACKUP", "Input Shape", "dim", "num_links", @@ -97,9 +79,7 @@ def split_attributes(attributes): "OP TO OP LATENCY [ns]", "DEVICE FW DURATION [ns]", "DEVICE KERNEL DURATION [ns]", - "PM BANDWIDTH [ns]", ] - filtered_df = filtered_df[new_order] numeric_columns = [ @@ -108,25 +88,42 @@ def split_attributes(attributes): "OP TO OP LATENCY [ns]", "DEVICE FW DURATION [ns]", "DEVICE KERNEL DURATION [ns]", - "PM BANDWIDTH [ns]", ] - filtered_df[numeric_columns] = filtered_df[numeric_columns].apply(pd.to_numeric, errors="coerce") - avg_values = filtered_df[numeric_columns].iloc[4:].mean().round(2).tolist() + def calculate_min_avg_max_by_common_runs(df): + group_columns = ["ATTRIBUTES_BACKUP", "Input Shape", "dim", "num_links", "topology", "Layout", "Data Type"] + results = [] + + for name, group in df.groupby(group_columns): + group_excluded = group.iloc[4:] + if not group_excluded.empty: + min_values = group_excluded[numeric_columns].min().round(2) + avg_values = group_excluded[numeric_columns].mean().round(2) + max_values = group_excluded[numeric_columns].max().round(2) + + result_row = { + **{col: f"{min_values[col]} - {avg_values[col]} - {max_values[col]}" for col in numeric_columns}, + **{key: value for key, value in zip(group_columns, name)}, + } + results.append(result_row) - avg_row = pd.Series(["Average", None, None, None, None, None] + avg_values, index=filtered_df.columns) + return pd.DataFrame(results) - filtered_df = pd.concat([filtered_df, avg_row.to_frame().T], ignore_index=True) + average_values_by_common_run = calculate_min_avg_max_by_common_runs(filtered_df) - avg_df = avg_row.to_frame().T - avg_df.columns = filtered_df.columns + print_order = ["Input Shape", "dim", "num_links", "topology", "Layout", "Data Type"] + numeric_columns + average_values_by_common_run = average_values_by_common_run[print_order] + + filtered_df.drop(columns=["ATTRIBUTES_BACKUP"], inplace=True) base, ext = os.path.splitext(original_file_path) modified_file_path = f"{base}_modified{ext}" - filtered_df.to_csv(modified_file_path, index=False) - print(f"Filtered CSV created successfully at: {modified_file_path}") - return avg_df + averages_file_path = f"{base}_averages.csv" + average_values_by_common_run.to_csv(averages_file_path, index=False) + print(f"Averages CSV created successfully at: {averages_file_path}") + + return average_values_by_common_run diff --git a/tests/ttnn/unit_tests/operations/ccl/perf/perf_run.py b/tests/ttnn/unit_tests/operations/ccl/perf/perf_run.py index a647d5f179dd..8e430eed00df 100644 --- a/tests/ttnn/unit_tests/operations/ccl/perf/perf_run.py +++ b/tests/ttnn/unit_tests/operations/ccl/perf/perf_run.py @@ -8,23 +8,6 @@ from perf_csv import perf_report -def validate_results(df): - reference_values = { - "HOST DURATION [ns]": 10000, - "Cycles Count": 120000, - "OP TO OP LATENCY [ns]": 3500, - "DEVICE FW DURATION [ns]": 115000, - "DEVICE KERNEL DURATION [ns]": 110000, - "PM BANDWIDTH [ns]": 8000, - } - - averages = df.iloc[-1] - - for column, reference in reference_values.items(): - if averages[column] > reference: - assert False, f"Output exceeds reference for {column}: {averages[column]} > {reference}" - - def run_profile_and_extract_csv(): command = [ "./tt_metal/tools/profiler/profile_this.py", @@ -45,8 +28,8 @@ def run_profile_and_extract_csv(): average_values = perf_report(csv_path) + print("Min - Avg - Max by Common Runs:") print(tabulate(average_values, headers="keys", tablefmt="pretty")) - validate_results(average_values) else: print("CSV path not found in the command output.") diff --git a/tests/ttnn/unit_tests/operations/ccl/perf/test_all_gather_N300_post_commit.py b/tests/ttnn/unit_tests/operations/ccl/perf/test_all_gather_N300_post_commit.py index eee8e840914f..5d990132d9db 100644 --- a/tests/ttnn/unit_tests/operations/ccl/perf/test_all_gather_N300_post_commit.py +++ b/tests/ttnn/unit_tests/operations/ccl/perf/test_all_gather_N300_post_commit.py @@ -16,38 +16,24 @@ "num_devices, num_links, output_shape, dim, layout", [ (2, 1, [1, 1, 64, 16384], 3, ttnn.TILE_LAYOUT), - # (2, 1, [8, 5, 32, 768], 3, ttnn.TILE_LAYOUT), - # (2, 1, [1, 1, 32, 736], 3, ttnn.TILE_LAYOUT), - # (2, 1, [1, 1, 32, 704], 3, ttnn.TILE_LAYOUT), - # (2, 1, [1, 1, 64, 704], 3, ttnn.TILE_LAYOUT), - # (2, 1, [1, 1, 32, 736], 3, ttnn.ROW_MAJOR_LAYOUT), - # (2, 1, [1, 1, 32, 704], 3, ttnn.ROW_MAJOR_LAYOUT), - # (2, 1, [1, 1, 64, 704], 3, ttnn.ROW_MAJOR_LAYOUT), - # (2, 1, [4, 1, 256, 32], 0, ttnn.ROW_MAJOR_LAYOUT), - # (2, 1, [8, 1, 256, 32], 0, ttnn.ROW_MAJOR_LAYOUT), - # (2, 1, [1, 1, 32, 8192], 3, ttnn.ROW_MAJOR_LAYOUT), - # (2, 1, [8, 5, 13, 512], 3, ttnn.ROW_MAJOR_LAYOUT), - # (2, 1, [8, 5, 13, 768], 3, ttnn.ROW_MAJOR_LAYOUT), - # (2, 1, [8, 8, 256, 384], 1, ttnn.ROW_MAJOR_LAYOUT), - # (2, 1, [1, 1, 64, 2048], 3, ttnn.TILE_LAYOUT), - # (2, 1, [1, 1, 32, 4096], 3, ttnn.TILE_LAYOUT), - # (2, 1, [1, 1, 32, 1024], 3, ttnn.ROW_MAJOR_LAYOUT), - # (2, 1, [1, 2, 32, 4096], 3, ttnn.ROW_MAJOR_LAYOUT), - # (2, 1, [1, 2, 32, 1024], 3, ttnn.TILE_LAYOUT), + (2, 1, [8, 5, 32, 768], 3, ttnn.TILE_LAYOUT), + (2, 1, [1, 1, 32, 736], 3, ttnn.TILE_LAYOUT), + (2, 1, [1, 1, 32, 704], 3, ttnn.TILE_LAYOUT), + (2, 1, [1, 1, 64, 704], 3, ttnn.TILE_LAYOUT), + (2, 1, [1, 1, 32, 736], 3, ttnn.ROW_MAJOR_LAYOUT), + (2, 1, [1, 1, 32, 704], 3, ttnn.ROW_MAJOR_LAYOUT), ], ) @pytest.mark.parametrize( "input_dtype", [ ttnn.bfloat16, - # ttnn.bfloat8_b, ], ) @pytest.mark.parametrize( "mem_config", [ ttnn.MemoryConfig(buffer_type=ttnn.BufferType.DRAM), - # ttnn.MemoryConfig(buffer_type=ttnn.BufferType.L1), ], ) @pytest.mark.parametrize("num_iters", [1])