diff --git a/tt_metal/impl/dispatch/command_queue.cpp b/tt_metal/impl/dispatch/command_queue.cpp
index e825c3761a8e..afc6b344740b 100644
--- a/tt_metal/impl/dispatch/command_queue.cpp
+++ b/tt_metal/impl/dispatch/command_queue.cpp
@@ -3269,7 +3269,6 @@ void EnqueueProgramImpl(
     detail::CompileProgram(device, program);
     program.allocate_circular_buffers(device);
     detail::ValidateCircularBufferRegion(program, device);
-    program.call_pre_exec_callback();
     cq.hw_command_queue().enqueue_program(program, blocking);
     // Program relinquishes ownership of all global buffers its using, once its been enqueued. Avoid mem
     // leaks on device.
diff --git a/tt_metal/impl/program/program.cpp b/tt_metal/impl/program/program.cpp
index bd375cebad3a..cea2b98e4111 100644
--- a/tt_metal/impl/program/program.cpp
+++ b/tt_metal/impl/program/program.cpp
@@ -128,7 +128,7 @@ class Program_ {
 
     void invalidate_circular_buffer_allocation();
 
-    uint32_t get_max_cb_memory_usage(const Device *device) const;
+    uint32_t get_cb_memory_size() const;
     void allocate_circular_buffers(const Device *device);
 
     bool is_finalized() const;
@@ -754,18 +754,21 @@ void detail::Program_::invalidate_circular_buffer_allocation() {
 }
 
 void Program::invalidate_circular_buffer_allocation() { pimpl_->invalidate_circular_buffer_allocation(); }
-uint32_t Program::get_max_cb_memory_usage(const Device *device) const { return pimpl_->get_max_cb_memory_usage(device); }
+uint32_t Program::get_cb_memory_size() const { return pimpl_->get_cb_memory_size(); }
 
 
-uint32_t detail::Program_::get_max_cb_memory_usage(const Device *device) const{
-    uint64_t base_cb_address = device->get_base_allocator_addr(HalMemType::L1);
-    uint64_t end_cb_address = base_cb_address;
+uint32_t detail::Program_::get_cb_memory_size() const{
+
+    uint32_t total_cb_size = 0;
+    for (const auto& circular_buffer : this->circular_buffers_) {
+        if (circular_buffer->globally_allocated()) {
+            continue;
+        }
+        total_cb_size += circular_buffer->size();
 
-    for (const CircularBufferAllocator &cb_allocator : this->cb_allocators_) {
-            end_cb_address = std::max(end_cb_address, cb_allocator.get_cb_region_end());
     }
-    log_info("Base CB address: {}, End CB Address: {}", base_cb_address,end_cb_address);
-    return end_cb_address - base_cb_address;
+    log_info("Total CB Size : {}", total_cb_size);
+    return total_cb_size;
 }
 
 void detail::Program_::allocate_circular_buffers(const Device *device) {
@@ -1467,19 +1470,6 @@ const std::vector<SubDeviceId> &detail::Program_::determine_sub_device_ids(const
     return sub_device_ids->second;
 }
 
-void Program::set_pre_exec_callback(std::function<void(const Program&)> callback){
-    this->pre_exec_callback_ = std::move(callback);
-}
-
-void Program::call_pre_exec_callback(){
-    if (this->pre_exec_callback_) {
-        this->pre_exec_callback_(*this);
-    } else {
-        log_debug("No pre-exec callback set for program {}", this->get_id());
-    }
-}
-
-
 void detail::Program_::finalize(Device *device) {
     // Store the number of tensix "go signals" for use by CQ
     // CQ iterates over these to update runtime addresses, needs to know when eth begins (after tensix)
diff --git a/tt_metal/impl/program/program.hpp b/tt_metal/impl/program/program.hpp
index d7475329f334..1ea32a54cbe2 100644
--- a/tt_metal/impl/program/program.hpp
+++ b/tt_metal/impl/program/program.hpp
@@ -166,15 +166,10 @@ class Program {
 
     const std::vector<SubDeviceId> &determine_sub_device_ids(const Device *device);
 
-    void set_pre_exec_callback(std::function<void(const Program&)> callback);
-    void call_pre_exec_callback();
-    uint32_t get_max_cb_memory_usage(const Device *device) const;
+    uint32_t get_cb_memory_size() const;
 
 
    private:
-
-    std::function<void(const Program&)> pre_exec_callback_;
-
     std::unique_ptr<detail::Program_> pimpl_;
 
     friend CBHandle CreateCircularBuffer(Program &program, const std::variant<CoreCoord, CoreRange, CoreRangeSet> &core_spec, const CircularBufferConfig &config);
diff --git a/ttnn/cpp/ttnn/operations/conv/conv2d/device/conv2d_op.cpp b/ttnn/cpp/ttnn/operations/conv/conv2d/device/conv2d_op.cpp
index 7f5b9391e8fe..82b8baebf130 100644
--- a/ttnn/cpp/ttnn/operations/conv/conv2d/device/conv2d_op.cpp
+++ b/ttnn/cpp/ttnn/operations/conv/conv2d/device/conv2d_op.cpp
@@ -265,38 +265,33 @@ operation::ProgramWithCallbacks OptimizedConvNew::create_program(const std::vect
         enable_subblock_padding,
         use_non_tile_height);
 
-    if(std::getenv("TT_DISABLE_CONV_L1_CHECK")==nullptr) {
-        tt::log_info(tt::LogOp, "Allocation Stats before Op: {}", this->pre_op_l1_allocation_size_bytes);
-        program_with_cbs.program.set_pre_exec_callback([this, arch, input_dtype, weights_dtype, output_dtype, weights_shape, has_bias, device](const Program& program) {
-            const uint32_t post_op_l1_stats = device->get_memory_allocation_statistics(tt::tt_metal::BufferType::L1).total_allocated_bytes;
-            auto actual_cb_size = program.get_max_cb_memory_usage(device);
-
-            auto [calc_output_size, calc_CB_size] = estimate_L1_usage(
-                    arch, this->memory_config.memory_layout,
-                    input_dtype, weights_dtype, output_dtype,
-                    compute_kernel_config,
-                    block_config, parallelization_config,
-                    input_tensor_shape, weights_shape, sliding_window_config.get_output_shape(),
-                    output_channels, groups, std::array<uint32_t,2>({sliding_window_config.window_hw.first, sliding_window_config.window_hw.second}),
-                    Conv2dConfig{
-                        .enable_act_double_buffer=enable_act_double_buffer,
-                        .enable_weights_double_buffer=enable_weights_double_buffer,
-                        .enable_split_reader=enable_split_reader,
-                        .enable_subblock_padding=enable_subblock_padding
-                    },
-                    has_bias, use_non_tile_height);
-            if(calc_CB_size > 0) {
-                if(calc_CB_size != actual_cb_size) {
-                    tt::log_error("Calculated CB size {} does not match with the actual CB size {}",calc_CB_size,actual_cb_size);
-                    TT_ASSERT(actual_cb_size==calc_CB_size);
-                }
-            }
-            if(calc_output_size > 0) {
-                if(post_op_l1_stats != this->pre_op_l1_allocation_size_bytes + calc_output_size) {
-                    tt::log_error(tt::LogOp, "Mismatch!! L1 Allocation Pre Op =  {}, Post Op = {} Calculated Size = {}", this->pre_op_l1_allocation_size_bytes, post_op_l1_stats,calc_output_size);
-                }
-            }
-        });
+        const uint32_t post_op_l1_stats = device->get_memory_allocation_statistics(tt::tt_metal::BufferType::L1).total_allocated_bytes;
+        auto actual_cb_size = program_with_cbs.program.get_cb_memory_size();
+
+        auto [calc_output_size, calc_CB_size] = estimate_L1_usage(
+                arch, this->memory_config.memory_layout,
+                input_dtype, weights_dtype, output_dtype,
+                compute_kernel_config,
+                block_config, parallelization_config,
+                input_tensor_shape, weights_shape, sliding_window_config.get_output_shape(),
+                output_channels, groups, std::array<uint32_t,2>({sliding_window_config.window_hw.first, sliding_window_config.window_hw.second}),
+                Conv2dConfig{
+                    .enable_act_double_buffer=enable_act_double_buffer,
+                    .enable_weights_double_buffer=enable_weights_double_buffer,
+                    .enable_split_reader=enable_split_reader,
+                    .enable_subblock_padding=enable_subblock_padding
+                },
+                has_bias, use_non_tile_height);
+    if(calc_CB_size > 0) {
+        if(calc_CB_size != actual_cb_size) {
+            tt::log_error("Calculated CB size {} does not match with the actual CB size {}",calc_CB_size,actual_cb_size);
+            TT_ASSERT(actual_cb_size==calc_CB_size);
+        }
+    }
+    if(calc_output_size > 0) {
+        if(post_op_l1_stats != this->pre_op_l1_allocation_size_bytes + calc_output_size) {
+            tt::log_error(tt::LogOp, "Mismatch!! L1 Allocation Pre Op =  {}, Post Op = {} Calculated Size = {}", this->pre_op_l1_allocation_size_bytes, post_op_l1_stats,calc_output_size);
+        }
     }
     return program_with_cbs;
 }