From e9de0106d53c386906a03bd2af4d7466013500a6 Mon Sep 17 00:00:00 2001 From: Shwetank Singh Date: Sun, 24 Nov 2024 16:57:09 +0000 Subject: [PATCH] #0: testing --- .../ttnn/operations/conv/conv2d/conv2d.cpp | 49 +++++--- .../conv/conv2d/prepare_conv2d_weights.cpp | 113 ++++++++++++++++++ .../conv/conv2d/prepare_conv2d_weights.hpp | 14 +++ 3 files changed, 158 insertions(+), 18 deletions(-) diff --git a/ttnn/cpp/ttnn/operations/conv/conv2d/conv2d.cpp b/ttnn/cpp/ttnn/operations/conv/conv2d/conv2d.cpp index af3c7ac6b79..d73e519f8fb 100644 --- a/ttnn/cpp/ttnn/operations/conv/conv2d/conv2d.cpp +++ b/ttnn/cpp/ttnn/operations/conv/conv2d/conv2d.cpp @@ -112,26 +112,39 @@ Result conv2d( .act_block_h_ntiles = opt_conv_op_block_config.act_block_h_ntiles, .shard_layout = parallel_config.shard_scheme, }; - tie(weight_tensor_on_device, bias_tensor_on_device) = prepare_conv_weights( + // tie(weight_tensor_on_device, bias_tensor_on_device) = prepare_conv_weights( + // weight_tensor, + // input_tensor_post_tm.memory_config(), + // input_tensor_post_tm.layout(), + // "OIHW", + // in_channels, + // out_channels, + // batch_size, + // input_height, + // input_width, + // kernel_size, + // stride, + // padding, + // dilation, + // groups, + // device, + // bias_tensor, + // conv_config, + // weight_config + // ); + + tie(weight_tensor_on_device, bias_tensor_on_device) = prepare_conv_weights_biases_and_move_to_device( weight_tensor, - input_tensor_post_tm.memory_config(), - input_tensor_post_tm.layout(), - "OIHW", - in_channels, - out_channels, - batch_size, - input_height, - input_width, - kernel_size, - stride, - padding, - dilation, - groups, - device, bias_tensor, - conv_config, - weight_config - ); + conv_config.input_channels_alignment, + conv_config.weights_dtype, + opt_conv_op_block_config.act_block_w_ntiles, + opt_conv_op_block_config.out_subblock_w_ntiles, + parallel_config, + device, + groups, + opt_conv_op_block_config.act_block_h_ntiles, + input_width); weight_tensor_on_device = ttnn::operations::core::to_device(weight_tensor_on_device, device, std::nullopt); if(bias_tensor.has_value()){ bias_tensor_on_device = ttnn::operations::core::to_device(bias_tensor_on_device.value(), device, std::nullopt); diff --git a/ttnn/cpp/ttnn/operations/conv/conv2d/prepare_conv2d_weights.cpp b/ttnn/cpp/ttnn/operations/conv/conv2d/prepare_conv2d_weights.cpp index f0ecbb2da91..7f5d1bf700d 100644 --- a/ttnn/cpp/ttnn/operations/conv/conv2d/prepare_conv2d_weights.cpp +++ b/ttnn/cpp/ttnn/operations/conv/conv2d/prepare_conv2d_weights.cpp @@ -117,6 +117,116 @@ OptimizedConvBlockConfig get_opt_block_config( conv_config.enable_split_reader); } + + +template +std::pair> prepare_conv_weights_biases_and_move_to_device( + const ttnn::Tensor& weight_tensor, + std::optional& bias_tensor, + uint32_t input_channels_alignment, + DataType weights_bias_dtype, + uint32_t weight_block_h_ntiles, + uint32_t weight_block_w_ntiles, + const ParallelConfig& parallel_config, + T * device, + uint32_t groups, + uint32_t act_block_h_ntiles, + uint32_t input_width) { + + validate_weight_and_bias_tensors(weight_tensor, bias_tensor); + ttnn::Tensor weight_tensor_; // tensor to return + ttnn::Tensor bias_tensor_; + + auto original_weights_shape = weight_tensor.get_shape(); + uint32_t original_weights_out_channels = original_weights_shape[0]; + uint32_t original_weights_in_channels = original_weights_shape[1]; + uint32_t original_weights_window_h = original_weights_shape[2]; + uint32_t original_weights_window_w = original_weights_shape[3]; + + bool is_conv1d = original_weights_window_w == 1 && input_width == 1; + bool is_depthwise_conv = groups == original_weights_out_channels && original_weights_in_channels == 1; + + weight_tensor_ = weight_tensor; + + // Convert weight tensor to 0 padded shape if groups > 1 + if (!is_conv1d and groups > 1) { + weight_tensor_ = tt::tt_metal::convert_conv_weight_tensor_to_grouped_layout(weight_tensor_, groups, weights_bias_dtype); + } + else if (is_conv1d and groups > 1) { + if (is_depthwise_conv) { + weight_tensor_ = convert_conv_weight_tensor_to_depthwise_layout(weight_tensor_, act_block_h_ntiles, weights_bias_dtype); + weight_block_h_ntiles = act_block_h_ntiles; + } + else{ + weight_tensor_ = tt::tt_metal::convert_conv_weight_tensor_to_grouped_layout(weight_tensor_, groups, weights_bias_dtype); + } + } + + auto weights_shape = weight_tensor_.get_shape(); + uint32_t out_channels = weights_shape[0]; + uint32_t in_channels = weights_shape[1]; + uint32_t window_h = weights_shape[2]; + uint32_t window_w = weights_shape[3]; + uint32_t out_channel_padding = tt::round_up(out_channels, 32) - out_channels; + tt::tt_metal::LegacyShape weights_channels_padded_shape = tt::tt_metal::LegacyShape(std::array( + {tt::round_up(out_channels, 32), tt::round_up(in_channels, input_channels_alignment), window_h, window_w})); + if (weights_bias_dtype == DataType::BFLOAT8_B) { + TT_ASSERT(weight_tensor_.get_dtype() == DataType::FLOAT32); + if (bias_tensor.has_value()) { + TT_ASSERT(bias_tensor.value().get_dtype() == DataType::FLOAT32); + } + } else { + // TODO: fix the need to check this. We should be able to accept any datatype and convert + TT_ASSERT(weight_tensor_.get_dtype() == weights_bias_dtype); + if (bias_tensor.has_value()) { + TT_ASSERT(bias_tensor.value().get_dtype() == weights_bias_dtype); + } + } + weight_tensor_ = ttnn::pad(weight_tensor_, weights_channels_padded_shape.to_array_4D(), tt::tt_metal::Array4D({0, 0, 0, 0}), 0); + + // for conv op, pad the weights to block shape + if (parallel_config.shard_scheme == TensorMemoryLayout::HEIGHT_SHARDED) { + weight_tensor_ = tt::tt_metal::convert_conv_weight_tensor_to_special_padding_tiled_layout( + weight_tensor_, weight_block_h_ntiles, weight_block_w_ntiles, weights_bias_dtype); + } else { + weight_tensor_ = tt::tt_metal::convert_conv_weight_tensor_to_tiled_layout( + weight_tensor_, weight_block_h_ntiles, weight_block_w_ntiles, weights_bias_dtype); + } + + uint32_t weight_matrix_height = in_channels * window_h * window_w; + int32_t weight_matrix_height_padding = weight_tensor_.shape()[2] - weight_matrix_height; + TT_FATAL(weight_matrix_height_padding >= 0," Matrix Height Padding can't be negative"); + + // convert_conv_weight_tensor adds the padding to the base shape. + // Reshape the weights to remove padding from the base shape. + weight_tensor_.set_shape( + ttnn::Shape(std::array{1, 1, weight_matrix_height, out_channels}, + std::array, 4>{ + std::array{0, 0}, + std::array{0, 0}, + std::array{0, weight_matrix_height_padding}, + std::array{0, out_channel_padding} + })); + + weight_tensor_ = ttnn::operations::core::to_device(weight_tensor_, device, std::nullopt); + if (bias_tensor.has_value()) { + bias_tensor_ = bias_tensor.value(); + auto bias_shape = bias_tensor_.get_shape(); + TT_ASSERT(bias_shape[3] == out_channels && bias_shape[0] == 1 && bias_shape[1] == 1 && bias_shape[2] == 1); + tt::tt_metal::LegacyShape bias_channels_padded_shape = tt::tt_metal::LegacyShape( + std::array({1, 1, 32, tt::round_up(out_channels, weight_block_w_ntiles * 32)})); + bias_tensor_ = ttnn::pad(bias_tensor_, bias_channels_padded_shape.to_array_4D(), tt::tt_metal::Array4D({0, 0, 0, 0}), 0); + bias_tensor_ = ttnn::to_layout( + bias_tensor_, Layout::TILE, std::nullopt, std::nullopt, (T*)nullptr); + if (bias_tensor_.get_dtype() != weights_bias_dtype) { + bias_tensor_ = ttnn::to_dtype(bias_tensor_, weights_bias_dtype); + } + bias_tensor_ = ttnn::operations::core::to_device(bias_tensor_, device, std::nullopt); + } + + return {weight_tensor_, bias_tensor.has_value() ? bias_tensor_ : std::optional()}; +} + template std::pair> prepare_conv_weights( const ttnn::Tensor& weight_tensor, @@ -175,6 +285,9 @@ std::pair> prepare_conv_weights( tensor_layout = conv_config.shard_layout.value(); } + // std::cout << "Weight Block H TILES: " << weight_block_h_ntiles << " weight_block_w_ntiles: " << weight_block_w_ntiles << "act_block_h_ntiles: " << act_block_h_ntiles + // << " act_block_h_ntiles: " << act_block_h_ntiles << " tensor_layout: "<< (int)tensor_layout << std::endl; + validate_weight_tensor(weight_tensor); ttnn::Tensor weight_tensor_ = weight_tensor; // tensor to return diff --git a/ttnn/cpp/ttnn/operations/conv/conv2d/prepare_conv2d_weights.hpp b/ttnn/cpp/ttnn/operations/conv/conv2d/prepare_conv2d_weights.hpp index 4cf349da240..d4b25eb7d80 100644 --- a/ttnn/cpp/ttnn/operations/conv/conv2d/prepare_conv2d_weights.hpp +++ b/ttnn/cpp/ttnn/operations/conv/conv2d/prepare_conv2d_weights.hpp @@ -65,6 +65,20 @@ ttnn::Tensor prepare_conv_bias( T *device, std::optional conv_config_); +template +std::pair> prepare_conv_weights_biases_and_move_to_device( + const ttnn::Tensor& weight_tensor, + std::optional& bias_tensor, + uint32_t input_channels_alignment, + DataType weights_bias_dtype, + uint32_t weight_block_h_ntiles, + uint32_t weight_block_w_ntiles, + const sliding_window::ParallelConfig& parallel_config, + T * device, + uint32_t groups, + uint32_t act_block_h_ntiles, + uint32_t input_width); + } // namespace conv2d } // namespace operations::conv } // namespace ttnn