From 0a7ffbd8fd3028b88e2344d9133f5efe8c5677d1 Mon Sep 17 00:00:00 2001 From: Shehtab Zaman Date: Sat, 18 Mar 2023 01:29:49 -0700 Subject: [PATCH] Buildling and linking implementation - Run with new pre-commit hook --- .../distconv/distconv_layer_norm.hpp | 15 +- .../lbann/layers/regularizers/layer_norm.hpp | 24 +- .../layers/regularizers/layer_norm_impl.hpp | 92 ++++---- .../transform/distconv/distconv_gather.hpp | 3 +- .../transform/distconv/distconv_scatter.hpp | 3 +- .../distconv/distconv_layer_norm.cu | 209 ++++++++++-------- src/layers/regularizers/layer_norm.cpp | 2 +- src/layers/regularizers/layer_norm.cu | 15 +- 8 files changed, 182 insertions(+), 181 deletions(-) diff --git a/include/lbann/layers/regularizers/distconv/distconv_layer_norm.hpp b/include/lbann/layers/regularizers/distconv/distconv_layer_norm.hpp index e2f7fda48b3..e181ee28f0e 100644 --- a/include/lbann/layers/regularizers/distconv/distconv_layer_norm.hpp +++ b/include/lbann/layers/regularizers/distconv/distconv_layer_norm.hpp @@ -27,7 +27,7 @@ #ifndef LBANN_LAYERSE_REGULARIZERS_DISTCONV_LAYER_NORM #define LBANN_LAYERSE_REGULARIZERS_DISTCONV_LAYER_NORM -#if LBANN_HAS_DISTCONV +#ifdef LBANN_HAS_DISTCONV namespace distconv { template @@ -39,17 +39,13 @@ class LayerNormalization using DCTensor = tensor::Tensor; public: - LayerNormalization(Backend& backend, - Datatype epsilon, - size_t max_mini_batch_size) - : m_backend(backend), - m_epsilon(epsilon), - m_max_mini_batch_size(max_mini_batch_size) + LayerNormalization(Backend& backend, DataType epsilon) + : m_backend(backend), m_epsilon(epsilon) {} template void calculate_forward_stats(const DCTensor& input, - DC& statistics); + DCTensor& statistics); template void apply_normalization(const DCTensor& input, @@ -74,10 +70,9 @@ class LayerNormalization private: DataType m_epsilon; - size_t m_max_mini_batch_size; }; // class definition LayerNorm } // namespace distconv #endif // LBANN_HAS_DISTONV -#endif // LBANN_LAYERSE_REGULARIZERS_DISTCONV_LAYER_NORM \ No newline at end of file +#endif // LBANN_LAYERSE_REGULARIZERS_DISTCONV_LAYER_NORM diff --git a/include/lbann/layers/regularizers/layer_norm.hpp b/include/lbann/layers/regularizers/layer_norm.hpp index 13a20a043ad..e2a214459bf 100644 --- a/include/lbann/layers/regularizers/layer_norm.hpp +++ b/include/lbann/layers/regularizers/layer_norm.hpp @@ -35,19 +35,20 @@ #include #ifdef LBANN_HAS_DISTCONV -#include "lbann/utils/distconv.hpp" #include "lbann/layers/data_type_distconv_adapter.hpp" #include "lbann/layers/regularizers/distconv/distconv_layer_norm.hpp" +#include "lbann/utils/distconv.hpp" #endif // LBANN_HAS_DISTCONV namespace lbann { #ifdef LBANN_HAS_DISTCONV namespace dc { -using Shape = ::distconv::tensor::Shape; -using Backend= ::distconv::BackendDNNLib; +using Shape = ::distconv::tensor::Shape; +using Backend = ::distconv::BackendDNNLib; template -using LayerNormalization = ::distconv::LayerNormalization; +using LayerNormalization = + ::distconv::LayerNormalization; } // namespace dc template @@ -66,12 +67,10 @@ class layer_norm_distconv_adapter void setup_distributions(tensor_overlap_constraints& constraints) override; void setup_layer(size_t workspace_capacity) override; - void setup_fp_tensors() override; - void setup_bp_tensors() override; void fp_compute(); void bp_compute(); - + TensorDevType m_statistics; TensorDevType m_statistics_grad; std::unique_ptr> m_layer_norm_operator; @@ -167,14 +166,11 @@ class layer_norm_layer : public data_type_layer std::unique_ptr m_statistics_gradient; }; +LBANN_DEFINE_LAYER_BUILDER(layer_norm); -#endif // LBANN_HAS_DISTCONV - - LBANN_DEFINE_LAYER_BUILDER(layer_norm); - - // ========================================================= - // Explicit template instantiation - // ========================================================= +// ========================================================= +// Explicit template instantiation +// ========================================================= #ifndef LBANN_LAYER_NORM_LAYER_INSTANTIATE #define PROTO_DEVICE(T, Device) \ diff --git a/include/lbann/layers/regularizers/layer_norm_impl.hpp b/include/lbann/layers/regularizers/layer_norm_impl.hpp index 0cc91e17573..972cb7330a1 100644 --- a/include/lbann/layers/regularizers/layer_norm_impl.hpp +++ b/include/lbann/layers/regularizers/layer_norm_impl.hpp @@ -29,12 +29,11 @@ #include "lbann/layers/regularizers/layer_norm.hpp" -#ifdef LBANN_HAS_DISTONV +#ifdef LBANN_HAS_DISTCONV #include "lbann/layers/data_type_distconv_adapter.hpp" -#endif - -namespace lbann{ +#endif // LBANN_HAS_DISTCONV +namespace lbann { // ========================================================= // Implementation @@ -135,7 +134,6 @@ void layer_norm_layer::setup_data( m_statistics_gradient.reset(AbsDistMatrixType::Instantiate(dist)); } - #ifdef LBANN_HAS_DISTCONV // ============================================================= @@ -174,57 +172,53 @@ layer_norm_layer::get_distconv_adapter() layer_norm_distconv_adapter&>( static_cast&>(*this) .get_distconv_adapter()); +} // ============================================================= // LayerNorm DistConv Adapter implementation // ============================================================= - template - void layer_norm_distconv_adapter:: - setup_distributions(tensor_overlap_constraints & constraints) - { - data_type_distconv_adapter::setup_distributions( - constraints); - // no overlap needed - for (auto& d : this->m_prev_activations_dists) { - d.clear_overlap(); - constraints.mark_updated(d); - constraints.mark_invariant(d); - } - for (auto& d : this->m_activations_dists) { - d.clear_overlap(); - constraints.mark_updated(d); - constraints.mark_invariant(d); - } - for (auto& d : this->m_prev_error_signals_dists) { - d.clear_overlap(); - constraints.mark_updated(d); - constraints.mark_invariant(d); - } - for (auto& d : this->m_error_signals_dists) { - d.clear_overlap(); - constraints.mark_updated(d); - constraints.mark_invariant(d); - } +template +void layer_norm_distconv_adapter:: + setup_distributions(tensor_overlap_constraints& constraints) +{ + data_type_distconv_adapter::setup_distributions(constraints); + // no overlap needed + for (auto& d : this->m_prev_activations_dists) { + d.clear_overlap(); + constraints.mark_updated(d); + constraints.mark_invariant(d); } - - template - void layer_norm_distconv_adapter::setup_layer( - size_t workspace_capacity) - { - data_type_distconv_adapter::setup_layer(workspace_capacity); - auto& layer = dynamic_cast< - channelwise_fully_connected_layer&>( - this->layer()); - const auto max_mini_batch_size = - layer.get_model()->m_max_mini_batch_size_distconv; - - m_layer_norm_operator = - make_unique>(dc::get_backend(), - layer.m_epsilon, - max_mini_batch_size); + for (auto& d : this->m_activations_dists) { + d.clear_overlap(); + constraints.mark_updated(d); + constraints.mark_invariant(d); + } + for (auto& d : this->m_prev_error_signals_dists) { + d.clear_overlap(); + constraints.mark_updated(d); + constraints.mark_invariant(d); } + for (auto& d : this->m_error_signals_dists) { + d.clear_overlap(); + constraints.mark_updated(d); + constraints.mark_invariant(d); + } +} + +template +void layer_norm_distconv_adapter::setup_layer( + size_t workspace_capacity) +{ + data_type_distconv_adapter::setup_layer(workspace_capacity); + auto& layer = dynamic_cast&>( + this->layer()); + + m_layer_norm_operator = + make_unique>(dc::get_backend(), + layer.m_epsilon); +} -#endif LBANN_HAS_DISTCONV +#endif // LBANN_HAS_DISTCONV } // namespace lbann #endif // LBANN_LAYER_REGULARIZER_LAYER_NORM_IMPL_HPP_INCLUDED \ No newline at end of file diff --git a/include/lbann/layers/transform/distconv/distconv_gather.hpp b/include/lbann/layers/transform/distconv/distconv_gather.hpp index 016730d4331..77b6993e2d9 100644 --- a/include/lbann/layers/transform/distconv/distconv_gather.hpp +++ b/include/lbann/layers/transform/distconv/distconv_gather.hpp @@ -29,11 +29,12 @@ #include "distconv/base.hpp" #include "distconv/tensor/tensor.hpp" #include "distconv/tensor/tensor_mpi.hpp" -#include "lbann/layers/transform/distconv/distconv_nvshmem_vector_addressing.hpp" #include "lbann/utils/distconv.hpp" #if defined(LBANN_HAS_NVSHMEM) && defined(LBANN_HAS_DISTCONV) +#include "lbann/layers/transform/distconv/distconv_nvshmem_vector_addressing.hpp" + namespace distconv { template class Gather diff --git a/include/lbann/layers/transform/distconv/distconv_scatter.hpp b/include/lbann/layers/transform/distconv/distconv_scatter.hpp index b703f50b001..3847f4727f3 100644 --- a/include/lbann/layers/transform/distconv/distconv_scatter.hpp +++ b/include/lbann/layers/transform/distconv/distconv_scatter.hpp @@ -29,11 +29,12 @@ #include "distconv/base.hpp" #include "distconv/tensor/tensor.hpp" #include "distconv/tensor/tensor_mpi.hpp" -#include "lbann/layers/transform/distconv/distconv_nvshmem_vector_addressing.hpp" #include "lbann/utils/distconv.hpp" #if defined(LBANN_HAS_NVSHMEM) && defined(LBANN_HAS_DISTCONV) +#include "lbann/layers/transform/distconv/distconv_nvshmem_vector_addressing.hpp" + namespace distconv { template class Scatter diff --git a/src/layers/regularizers/distconv/distconv_layer_norm.cu b/src/layers/regularizers/distconv/distconv_layer_norm.cu index 6efdab532b1..b6efeef9071 100644 --- a/src/layers/regularizers/distconv/distconv_layer_norm.cu +++ b/src/layers/regularizers/distconv/distconv_layer_norm.cu @@ -26,24 +26,29 @@ #define LBANN_LAYERS_REGULARIZERS_DISTCONV_LAYER_NORM_INSTANTIATE -#include "../layer_norm_kernel.cuh" -#include "lbann/layers/regularizers/distconv/distonv_layer_norm.hpp" +#include "../layer_norm_kernels.cuh" +#include "lbann/utils/gpu/sync_info_helpers.hpp" +#include #ifdef LBANN_HAS_DISTCONV +#include "lbann/layers/regularizers/distconv/distconv_layer_norm.hpp" + +namespace distconv { + template template -void LayerNormalization ::calculate_forward_stats( +void LayerNormalization::calculate_forward_stats( const DCTensor& input, DCTensor& statistics) { - if (input_0.get_local_size() == 0) { + if (input.get_local_size() == 0) { util::MPIRootPrintStreamInfo() << "WARNING: EMPTY INPUT FOUND \n"; return; // no op for empty inputs } const auto& input_dims = input.get_local_shape(); const auto& statistics_dims = statistics.get_local_shape(); - const auto local_num_samples = input_0_dims[3]; + const auto local_num_samples = input_dims[3]; const auto global_num_samples = statistics_dims[3]; const auto local_sample_size = std::accumulate(input_dims.begin(), input_dims.end() - 1, @@ -56,10 +61,7 @@ void LayerNormalization ::calculate_forward_stats( input.get_buffer(), local_sample_size); - LocalMat local_statistics(2, - global_num_samples, - statistics.get_local_shape(), - 2); + LocalMat local_statistics(2, global_num_samples, statistics.get_buffer(), 2); El::Zero(local_statistics); auto local_means = El::View(local_statistics, El::IR(0), El::ALL); @@ -67,15 +69,20 @@ void LayerNormalization ::calculate_forward_stats( { using namespace hydrogen; - auto multisync = El::MakeMultiSync(gpu::get_sync_info(local_statistics), - gpu::get_sync_info(local_input)); + El::Zero(local_statistics); + auto local_means = El::View(local_statistics, El::IR(0), El::ALL); + auto local_vars = El::View(local_statistics, El::IR(1), El::ALL); + + auto multisync = + El::MakeMultiSync(::lbann::gpu::get_sync_info(local_statistics), + ::lbann::gpu::get_sync_info(local_input)); constexpr size_t block_size = 256; dim3 block_dims, grid_dims; block_dims.x = block_size; grid_dims.x = (local_sample_size + block_size - 1) / block_size; grid_dims.y = local_num_samples; hydrogen::gpu::LaunchKernel( - ::lbann::layer_norm_fp_sums_kernel, + ::lbann::layer_norm_fp_sums_kernel, grid_dims, block_dims, 0, @@ -93,14 +100,14 @@ void LayerNormalization ::calculate_forward_stats( template template -void LayerNormalization::apply_normalization( +void LayerNormalization::apply_normalization( const DCTensor& input, const DCTensor& statistics, DCTensor& output) { const auto& input_dims = input.get_local_shape(); const auto& statistics_dims = statistics.get_local_shape(); - const auto local_num_samples = input_0_dims[3]; + const auto local_num_samples = input_dims[3]; const auto global_num_samples = statistics_dims[3]; const auto local_sample_size = std::accumulate(input_dims.begin(), input_dims.end() - 1, @@ -113,53 +120,52 @@ void LayerNormalization::apply_normalization( input.get_buffer(), local_sample_size); - const LocalMat local_statistics(2, - global_num_samples, - statistics.get_local_shape(), - 2); + LocalMat local_statistics(2, global_num_samples, statistics.get_buffer(), 2); LocalMat local_output(local_sample_size, local_num_samples, output.get_buffer(), local_sample_size); - const auto local_means = El::View(local_statistics, El::IR(0), El::ALL); - const auto local_vars = El::View(local_statistics, El::IR(1), El::ALL); { using namespace hydrogen; - auto sync_info = gpu::get_sync_info(local_statistics); - constexpr size_t block_size = 256; - dim3 block_dims, grid_dims; - block_dims.x = block_size; - grid_dims.x = (local_num_samples + block_size - 1) / block_size; - hydrogen::gpu::LaunchKernel(layer_norm_fp_statistics_kernel, - grid_dims, - block_dims, - 0, - sync_info, - sample_size, - local_num_samples, - local_means.Buffer(), - local_means.LDim(), - local_vars.Buffer(), - local_vars.LDim()); - auto multisync = El::MakeMultiSync(gpu::get_sync_info(local_output), - gpu::get_sync_info(local_statistics), - gpu::get_sync_info(local_input)); + auto local_means = El::View(local_statistics, El::IR(0), El::ALL); + auto local_vars = El::View(local_statistics, El::IR(1), El::ALL); + + auto sync_info = ::lbann::gpu::get_sync_info(local_statistics); constexpr size_t block_size = 256; dim3 block_dims, grid_dims; block_dims.x = block_size; - grid_dims.x = (local_sample_size + block_size - 1) / block_size; + grid_dims.x = (local_num_samples + block_size - 1) / block_size; grid_dims.y = local_num_samples; - hydrogen::gpu::LaunchKernel(layer_norm_fp_output_kernel, + + hydrogen::gpu::LaunchKernel( + ::lbann::layer_norm_fp_statistics_kernel, + grid_dims, + block_dims, + 0, + sync_info, + local_sample_size, + local_num_samples, + local_means.Buffer(), + local_means.LDim(), + local_vars.Buffer(), + local_vars.LDim()); + + auto multisync = + El::MakeMultiSync(::lbann::gpu::get_sync_info(local_output), + ::lbann::gpu::get_sync_info(local_statistics), + ::lbann::gpu::get_sync_info(local_input)); + + hydrogen::gpu::LaunchKernel(::lbann::layer_norm_fp_output_kernel, grid_dims, block_dims, 0, multisync, local_num_samples, local_sample_size, - epsilon, + m_epsilon, local_input.LockedBuffer(), local_input.LDim(), local_output.Buffer(), @@ -173,7 +179,7 @@ void LayerNormalization::apply_normalization( template template -void LayerNormalization::calculate_backward_stats( +void LayerNormalization::calculate_backward_stats( const DCTensor& input, const DCTensor& output_grad, const DCTensor& statistics, @@ -181,7 +187,7 @@ void LayerNormalization::calculate_backward_stats( { const auto& input_dims = input.get_local_shape(); const auto& statistics_dims = statistics.get_local_shape(); - const auto local_num_samples = input_0_dims[3]; + const auto local_num_samples = input_dims[3]; const auto global_num_samples = statistics_dims[3]; const auto local_sample_size = std::accumulate(input_dims.begin(), input_dims.end() - 1, @@ -197,10 +203,7 @@ void LayerNormalization::calculate_backward_stats( output_grad.get_buffer(), local_sample_size); - const LocalMat local_statistics(2, - global_num_samples, - statistics.get_local_shape(), - 2); + LocalMat local_statistics(2, global_num_samples, statistics.get_buffer(), 2); LocalMat local_statistics_grad(2, global_num_samples, @@ -208,18 +211,24 @@ void LayerNormalization::calculate_backward_stats( 2); { using namespace hydrogen; + const auto local_means = El::View(local_statistics, El::IR(0), El::ALL); + const auto local_vars = El::View(local_statistics, El::IR(1), El::ALL); + + auto local_means_grad = El::View(local_statistics_grad, El::IR(0), El::ALL); + auto local_vars_grad = El::View(local_statistics_grad, El::IR(1), El::ALL); + auto multisync = - El::MakeMultiSync(gpu::get_sync_info(local_statistics_grad), - gpu::get_sync_info(local_output_grad), - gpu::get_sync_info(local_statistics), - gpu::get_sync_info(local_input)); + El::MakeMultiSync(::lbann::gpu::get_sync_info(local_statistics_grad), + ::lbann::gpu::get_sync_info(local_output_grad), + ::lbann::gpu::get_sync_info(local_statistics), + ::lbann::gpu::get_sync_info(local_input)); constexpr size_t block_size = 256; dim3 block_dims, grid_dims; block_dims.x = block_size; grid_dims.x = (local_sample_size + block_size - 1) / block_size; grid_dims.y = local_num_samples; hydrogen::gpu::LaunchKernel( - layer_norm_bp_statistics_grad_kernel, + ::lbann::layer_norm_bp_statistics_grad_kernel, grid_dims, block_dims, 0, @@ -244,15 +253,16 @@ void LayerNormalization::calculate_backward_stats( template template -void LayerNormalization::apply_grad(const DCTensor& input, - const DCTensor& output_grad, - const DCTensor& statistics, - const DCTensor& statistics_grad, - DCTensor& input_grad) +void LayerNormalization::apply_grad( + const DCTensor& input, + const DCTensor& output_grad, + const DCTensor& statistics, + const DCTensor& statistics_grad, + DCTensor& input_grad) { const auto& input_dims = input.get_local_shape(); const auto& statistics_dims = statistics.get_local_shape(); - const auto local_num_samples = input_0_dims[3]; + const auto local_num_samples = input_dims[3]; const auto global_num_samples = statistics_dims[3]; const auto local_sample_size = std::accumulate(input_dims.begin(), input_dims.end() - 1, @@ -268,15 +278,12 @@ void LayerNormalization::apply_grad(const DCTensor& input, output_grad.get_buffer(), local_sample_size); - const LocalMat local_statistics(2, - global_num_samples, - statistics.get_local_shape(), - 2); + LocalMat local_statistics(2, global_num_samples, statistics.get_buffer(), 2); - const LocalMat local_statistics_grad(2, - global_num_samples, - statistics_grad.get_buffer(), - 2); + LocalMat local_statistics_grad(2, + global_num_samples, + statistics_grad.get_buffer(), + 2); LocalMat local_input_grad(local_sample_size, local_num_samples, @@ -285,38 +292,46 @@ void LayerNormalization::apply_grad(const DCTensor& input, { using namespace hydrogen; auto multisync = - El::MakeMultiSync(gpu::get_sync_info(local_statistics_grad), - gpu::get_sync_info(local_output_grad), - gpu::get_sync_info(local_statistics), - gpu::get_sync_info(local_input)); + El::MakeMultiSync(::lbann::gpu::get_sync_info(local_statistics_grad), + ::lbann::gpu::get_sync_info(local_output_grad), + ::lbann::gpu::get_sync_info(local_statistics), + ::lbann::gpu::get_sync_info(local_input)); + const auto local_means = El::View(local_statistics, El::IR(0), El::ALL); + const auto local_vars = El::View(local_statistics, El::IR(1), El::ALL); + + auto local_means_grad = El::View(local_statistics_grad, El::IR(0), El::ALL); + auto local_vars_grad = El::View(local_statistics_grad, El::IR(1), El::ALL); + constexpr size_t block_size = 256; dim3 block_dims, grid_dims; block_dims.x = block_size; grid_dims.x = (local_sample_size + block_size - 1) / block_size; grid_dims.y = local_num_samples; - hydrogen::gpu::LaunchKernel(layer_norm_bp_input_grad_kernel, - grid_dims, - block_dims, - 0, - multisync, - sample_size, - local_num_samples, - local_sample_size, - m_epsilon, - local_input.LockedBuffer(), - local_input.LDim(), - local_output_grad.LockedBuffer(), - local_output_grad.LDim(), - local_input_grad.Buffer(), - local_input_grad.LDim(), - local_means.LockedBuffer(), - local_means.LDim(), - local_vars.LockedBuffer(), - local_vars.LDim(), - local_means_grad.LockedBuffer(), - local_means_grad.LDim(), - local_vars_grad.LockedBuffer(), - local_vars_grad.LDim()); + + hydrogen::gpu::LaunchKernel( + ::lbann::layer_norm_bp_input_grad_kernel, + grid_dims, + block_dims, + 0, + multisync, + local_sample_size, + local_num_samples, + local_sample_size, + m_epsilon, + local_input.LockedBuffer(), + local_input.LDim(), + local_output_grad.LockedBuffer(), + local_output_grad.LDim(), + local_input_grad.Buffer(), + local_input_grad.LDim(), + local_means.LockedBuffer(), + local_means.LDim(), + local_vars.LockedBuffer(), + local_vars.LDim(), + local_means_grad.LockedBuffer(), + local_means_grad.LDim(), + local_vars_grad.LockedBuffer(), + local_vars_grad.LDim()); } } @@ -354,5 +369,7 @@ void LayerNormalization::apply_grad(const DCTensor& input, ETI(float, BackendDNNLib) ETI(double, BackendDNNLib) -#endef ETI -#endif // LBANN_HAS_DISTCONV \ No newline at end of file +#undef ETI + +#endif // LBANN_HAS_DISTCONV +} // namespace distconv diff --git a/src/layers/regularizers/layer_norm.cpp b/src/layers/regularizers/layer_norm.cpp index eb48302589e..458cbaa5d56 100644 --- a/src/layers/regularizers/layer_norm.cpp +++ b/src/layers/regularizers/layer_norm.cpp @@ -25,8 +25,8 @@ //////////////////////////////////////////////////////////////////////////////// #define LBANN_LAYER_NORM_LAYER_INSTANTIATE -#include "lbann/layers/regularizers/layer_norm.hpp" #include "lbann/comm_impl.hpp" +#include "lbann/layers/regularizers/layer_norm_impl.hpp" #ifdef LBANN_HAS_DISTCONV #include "lbann/layers/data_type_distconv_adapter.hpp" diff --git a/src/layers/regularizers/layer_norm.cu b/src/layers/regularizers/layer_norm.cu index dd15451c3ff..00d3a5c4622 100644 --- a/src/layers/regularizers/layer_norm.cu +++ b/src/layers/regularizers/layer_norm.cu @@ -25,8 +25,9 @@ //////////////////////////////////////////////////////////////////////////////// #define LBANN_LAYER_NORM_LAYER_INSTANTIATE +#include "layer_norm_kernels.cuh" #include "lbann/comm_impl.hpp" -#include "lbann/layers/regularizers/layer_norm.hpp" +#include "lbann/layers/regularizers/layer_norm_impl.hpp" #include "lbann/utils/gpu/helpers.hpp" #ifdef LBANN_HAS_DISTCONV @@ -281,17 +282,15 @@ void bp_impl(lbann_comm& comm, #ifdef LBANN_HAS_DISTCONV template -void layer_norm_distconv_adapter fp_compute() +void layer_norm_distconv_adapter::fp_compute() { - auto& l = dynamic_cast< - channelwise_fully_connected_layer&>( + auto& l = dynamic_cast&>( this->layer()); lbann_comm& comm = *(l.get_comm()); auto& statistics = *l.m_statistics; assert0(dc::tensor::View(m_statistics, statistics.Buffer())); - using GPUMatType = El::Matrix; m_layer_norm_operator->calculate_forward_stats(this->get_prev_activations(), m_statistics); comm.allreduce(statistics, statistics.RedundantComm(), El::mpi::SUM); @@ -301,10 +300,9 @@ void layer_norm_distconv_adapter fp_compute() } template -void layer_norm_distconv_adapter bp_compute() +void layer_norm_distconv_adapter::bp_compute() { - auto& l = dynamic_cast< - channelwise_fully_connected_layer&>( + auto& l = dynamic_cast&>( this->layer()); lbann_comm& comm = *(l.get_comm()); @@ -313,7 +311,6 @@ void layer_norm_distconv_adapter bp_compute() assert0(dc::tensor::View(m_statistics, statistics.Buffer())); assert0(dc::tensor::View(m_statistics_grad, statistics_grad.Buffer())); - using GPUMatType = El::Matrix; m_layer_norm_operator->calculate_backward_stats( this->get_prev_activations(), this->get_prev_error_signals(),