From cc2301bfcc9372b9e268245f2e52812de2020f1c Mon Sep 17 00:00:00 2001 From: Saad Jameel Date: Mon, 16 Dec 2024 21:11:22 +0000 Subject: [PATCH] #0: add comments --- .../dataflow/reader_permute_interleaved_rm_blocked_generic.cpp | 1 + .../dataflow/writer_permute_interleaved_rm_blocked_generic.cpp | 1 + .../dataflow/writer_permute_interleaved_rm_row_invariant.cpp | 1 + 3 files changed, 3 insertions(+) diff --git a/ttnn/cpp/ttnn/operations/data_movement/permute/device/kernels/dataflow/reader_permute_interleaved_rm_blocked_generic.cpp b/ttnn/cpp/ttnn/operations/data_movement/permute/device/kernels/dataflow/reader_permute_interleaved_rm_blocked_generic.cpp index 15b8b199c3f0..f63aaab6d09f 100644 --- a/ttnn/cpp/ttnn/operations/data_movement/permute/device/kernels/dataflow/reader_permute_interleaved_rm_blocked_generic.cpp +++ b/ttnn/cpp/ttnn/operations/data_movement/permute/device/kernels/dataflow/reader_permute_interleaved_rm_blocked_generic.cpp @@ -27,6 +27,7 @@ void kernel_main() { uint32_t end_block = get_arg_val(2); // Input shape and strides (excluding W dimension and measured in rows, not bytes) + // start at runtime arg 3 since address/start_block/end_block make up the first 3 args uint32_t input_shape[N], src_strides[N]; for (uint32_t i = 3; i < N + 3; i++) { input_shape[i - 3] = get_arg_val(i); diff --git a/ttnn/cpp/ttnn/operations/data_movement/permute/device/kernels/dataflow/writer_permute_interleaved_rm_blocked_generic.cpp b/ttnn/cpp/ttnn/operations/data_movement/permute/device/kernels/dataflow/writer_permute_interleaved_rm_blocked_generic.cpp index 910b23b72d51..5af2edb379f1 100644 --- a/ttnn/cpp/ttnn/operations/data_movement/permute/device/kernels/dataflow/writer_permute_interleaved_rm_blocked_generic.cpp +++ b/ttnn/cpp/ttnn/operations/data_movement/permute/device/kernels/dataflow/writer_permute_interleaved_rm_blocked_generic.cpp @@ -49,6 +49,7 @@ void kernel_main() { const InterleavedAddrGen s0 = {.bank_base_address = dst_addr, .page_size = output_tensor_page_size}; // Input shape, permutation, and destination strides + // start at runtime arg 3 since address/start_block/end_block make up the first 3 args uint32_t input_shape[N], perm[N], dest_strides[N]; for (uint32_t i = 3; i < N + 3; i++) { input_shape[i - 3] = get_arg_val(i); diff --git a/ttnn/cpp/ttnn/operations/data_movement/permute/device/kernels/dataflow/writer_permute_interleaved_rm_row_invariant.cpp b/ttnn/cpp/ttnn/operations/data_movement/permute/device/kernels/dataflow/writer_permute_interleaved_rm_row_invariant.cpp index 46903375ff67..a06e5d568921 100644 --- a/ttnn/cpp/ttnn/operations/data_movement/permute/device/kernels/dataflow/writer_permute_interleaved_rm_row_invariant.cpp +++ b/ttnn/cpp/ttnn/operations/data_movement/permute/device/kernels/dataflow/writer_permute_interleaved_rm_row_invariant.cpp @@ -17,6 +17,7 @@ void kernel_main() { const InterleavedAddrGen s0 = {.bank_base_address = dst_addr, .page_size = page_size}; + // start at runtime arg 3 since address/start_block/end_block make up the first 3 args uint32_t input_shape[N], perm[N], dest_strides[N]; for (uint32_t i = 3; i < N + 3; i++) { input_shape[i - 3] = get_arg_val(i);