From 31f80a86c99959197a4ccfbde10579568b46c792 Mon Sep 17 00:00:00 2001
From: mtairum <mtairum@tenstorrent.com>
Date: Mon, 11 Nov 2024 10:53:45 +0000
Subject: [PATCH 01/69] #14934: Comment Llama3-70B frequent tests. Update
 Mixtral and LLama3-1B perf targets on CI tests

---
 .github/workflows/t3000-frequent-tests-impl.yaml          | 2 +-
 models/demos/llama3/tests/test_llama_perf.py              | 2 +-
 models/demos/t3000/mixtral8x7b/tests/test_mixtral_perf.py | 4 ++--
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/.github/workflows/t3000-frequent-tests-impl.yaml b/.github/workflows/t3000-frequent-tests-impl.yaml
index 67036141b0e..3ee0715eefe 100644
--- a/.github/workflows/t3000-frequent-tests-impl.yaml
+++ b/.github/workflows/t3000-frequent-tests-impl.yaml
@@ -22,7 +22,7 @@ jobs:
           { name: "t3k n300 mesh llama3.2-vision tests", arch: wormhole_b0, cmd: run_t3000_spoof_n300_llama3.2-11b-vision_freq_tests, timeout: 60, owner_id: U03FJB5TM5Y}, #Colman Glagovich
           { name: "t3k llama3 tests", arch: wormhole_b0, cmd: run_t3000_llama3_tests, timeout: 45, owner_id: U03PUAKE719}, #Miguel Tairum Cruz
           { name: "t3k llama2_70b tests", arch: wormhole_b0, cmd: run_t3000_llama2_70b_tests, timeout: 45, owner_id: U03FJB5TM5Y}, #Colman Glagovich
-          { name: "t3k llama3_70b tests", arch: wormhole_b0, cmd: run_t3000_llama3_70b_tests, timeout: 45, owner_id: U03FJB5TM5Y}, #Colman Glagovich
+          # { name: "t3k llama3_70b tests", arch: wormhole_b0, cmd: run_t3000_llama3_70b_tests, timeout: 45, owner_id: U03FJB5TM5Y}, #Colman Glagovich  # FIXME issue #14934
           { name: "t3k mixtral tests", arch: wormhole_b0, cmd: run_t3000_mixtral_tests, timeout: 60, owner_id: U03PUAKE719}, #Miguel Tairum Cruz
           { name: "t3k resnet tests", arch: wormhole_b0, cmd: run_t3000_resnet_tests, timeout: 30, owner_id: U013121KDH9}, #Austin Ho
         ]
diff --git a/models/demos/llama3/tests/test_llama_perf.py b/models/demos/llama3/tests/test_llama_perf.py
index 5b3bb019538..ce0ba43a3e3 100644
--- a/models/demos/llama3/tests/test_llama_perf.py
+++ b/models/demos/llama3/tests/test_llama_perf.py
@@ -45,7 +45,7 @@ def test_llama_model_perf(mesh_device, kv_cache_len, expected_compile_time, use_
     tokenizer = Tokenizer(model_args.tokenizer_path)
 
     if "3.2-1B" in model_args.DEFAULT_CACHE_PATH:
-        expected_inference_time = 0.04
+        expected_inference_time = 0.045
     elif "3.2-3B" in model_args.DEFAULT_CACHE_PATH:
         expected_inference_time = 0.065
     elif "3.1-8B" in model_args.DEFAULT_CACHE_PATH:
diff --git a/models/demos/t3000/mixtral8x7b/tests/test_mixtral_perf.py b/models/demos/t3000/mixtral8x7b/tests/test_mixtral_perf.py
index 25a2af5c8b4..d02f236c35d 100644
--- a/models/demos/t3000/mixtral8x7b/tests/test_mixtral_perf.py
+++ b/models/demos/t3000/mixtral8x7b/tests/test_mixtral_perf.py
@@ -141,8 +141,8 @@ def test_mixtral_model_perf(
     "prefill_seqlen, expected_compile_time, expected_inference_time",
     (
         (128, 80, 0.23),
-        (1024, 80, 1.5),  # FIXME #12318
-        (1024 * 2, 80, 4.7),  # FIXME #12318
+        (1024, 80, 1.55),  # FIXME #12318
+        (1024 * 2, 80, 5.5),  # FIXME #12318
         # (1024*4, 80, 60),
         # (1024*8, 150, 80),
         # (1024*16, 150, 100),

From 3f12bb9bf54a140a54fe95d240f354bdcd190546 Mon Sep 17 00:00:00 2001
From: Dalar Vartanians <132954887+dvartaniansTT@users.noreply.github.com>
Date: Fri, 8 Nov 2024 18:02:59 -0800
Subject: [PATCH 02/69] [skip ci] Update README.md

---
 README.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/README.md b/README.md
index 452a5d2baeb..133a9edaf3c 100644
--- a/README.md
+++ b/README.md
@@ -72,6 +72,7 @@ For the latest model updates and features, please see [MODEL_UPDATES.md](models/
 - [Programming Mesh of Devices](./tech_reports/Programming%20Mesh%20of%20Devices/Programming%20Mesh%20of%20Devices%20with%20TT-NN.md) (updated Sept 9th)
 - [ViT Implementation in TT-NN on GS](./tech_reports/ViT-TTNN/vit.md)  (updated Sept 22nd)
 - [LLMs Bring up in TT-NN](./tech_reports/LLMs/llms.md)  (updated Oct 29th)
+- [YOLOv4 Implementation in TT-NN on WH](./tech_reports/YoloV4-TTNN/yolov4.md)  (updated November 8th)
 ---
 
 <div align="center">

From ef7190196db4e98f5d8ec6f79f9b1fad37ca8fc2 Mon Sep 17 00:00:00 2001
From: Atul Krishnadas <atul.krishnadas@outlook.com>
Date: Mon, 11 Nov 2024 15:06:43 -0800
Subject: [PATCH 03/69] Embedding RM convertion and fused tilized recondition
 PR (#14389)

### Ticket
#13593

### Problem description
 - Fix Embedding RM conversion, PCC errors were a sweep/untilize issue

### What's changed
 - uint32 untilize provided by Naif's changes
 - Convert inputs to RM for embedding op, recondition fused tilized

### Checklist
- [ ] Post commit CI passes:
https://github.com/tenstorrent/tt-metal/actions/runs/11693157456
- [ ] T3K passes:
https://github.com/tenstorrent/tt-metal/actions/runs/11693180731

---------
---
 .../unit_tests/operations/test_embedding.py   | 53 ++++++++++++
 ttnn/CMakeLists.txt                           |  1 +
 .../kernels/dataflow/embeddings_tilize.cpp    | 24 +++---
 .../ttnn/operations/embedding/embedding.cpp   | 85 +++++++++++++++++++
 .../ttnn/operations/embedding/embedding.hpp   | 46 ++--------
 .../operations/embedding/embedding_pybind.hpp |  6 +-
 6 files changed, 160 insertions(+), 55 deletions(-)
 create mode 100644 ttnn/cpp/ttnn/operations/embedding/embedding.cpp

diff --git a/tests/ttnn/unit_tests/operations/test_embedding.py b/tests/ttnn/unit_tests/operations/test_embedding.py
index a9b6f106a1f..89dc39a0788 100644
--- a/tests/ttnn/unit_tests/operations/test_embedding.py
+++ b/tests/ttnn/unit_tests/operations/test_embedding.py
@@ -121,3 +121,56 @@ def test_moe_embedding(
     output_tensor = ttnn.to_torch(output_tensor)
 
     assert_with_pcc(torch_output_tensor, output_tensor)
+
+
+@pytest.mark.parametrize("batch_size", [1, 8, 9])
+@pytest.mark.parametrize("sentence_size", [32, 256, 512])
+@pytest.mark.parametrize("hidden_embedding_dim", [768, 4096])  # Bert_Num_Cols_768, Llama_Num_Cols
+@pytest.mark.parametrize(
+    "vocabulary_size", [512, 30522, 2048]
+)  # Bert_Position_Embeddings_512, Bert_Word_Embeddings_30528, Llama_Position_Embeddings,
+@pytest.mark.parametrize("input_mem_config", [ttnn.DRAM_MEMORY_CONFIG])
+@pytest.mark.parametrize("output_mem_config", [ttnn.DRAM_MEMORY_CONFIG])
+@pytest.mark.parametrize("layout", [ttnn.TILE_LAYOUT, ttnn.ROW_MAJOR_LAYOUT])
+def test_embedding_tiled_input(
+    device,
+    batch_size,
+    sentence_size,
+    hidden_embedding_dim,
+    vocabulary_size,
+    input_mem_config,
+    output_mem_config,
+    layout,
+):
+    torch.manual_seed(1234)
+
+    torch_input_tensor = torch.randint(0, vocabulary_size - 1, (batch_size, sentence_size))
+    torch_weights = torch_random((vocabulary_size, hidden_embedding_dim), -0.1, 0.1, dtype=torch.bfloat16)
+    # torch_output_tensor = torch.nn.functional.embedding(torch_input_tensor, torch_weights)
+    torch_embedding = torch.nn.Embedding.from_pretrained(torch_weights)
+    torch_output_tensor = torch_embedding(torch_input_tensor)
+
+    input_tensor = ttnn.to_device(
+        ttnn.from_torch(torch_input_tensor, dtype=ttnn.uint32, layout=ttnn.ROW_MAJOR_LAYOUT),
+        device,
+        memory_config=input_mem_config,
+    )
+    weights = ttnn.to_device(
+        ttnn.from_torch(torch_weights, dtype=ttnn.bfloat16, layout=ttnn.TILE_LAYOUT),
+        device,
+        memory_config=input_mem_config,
+    )
+
+    # output_tensor = ttnn.embedding(input_tensor, weights, memory_config=output_mem_config, layout=ttnn.ROW_MAJOR_LAYOUT)
+    output_tensor = ttnn.embedding(
+        input_tensor,
+        weights,
+        embeddings_type=ttnn.EmbeddingsType.GENERIC,  # Default embeddings type
+        dtype=ttnn.bfloat16,
+        memory_config=output_mem_config,  # Default memory config
+        queue_id=0,  # Default queue id
+        layout=layout,
+    )
+    output_tensor = ttnn.to_torch(output_tensor)
+
+    assert_with_pcc(torch_output_tensor, output_tensor)
diff --git a/ttnn/CMakeLists.txt b/ttnn/CMakeLists.txt
index bc2b1773cc2..8ce161317e3 100644
--- a/ttnn/CMakeLists.txt
+++ b/ttnn/CMakeLists.txt
@@ -158,6 +158,7 @@ set(ALL_TTNN_SRCS
     ${CMAKE_CURRENT_SOURCE_DIR}/cpp/ttnn/operations/eltwise/unary/unary.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/cpp/ttnn/operations/eltwise/unary_backward/unary_backward.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/cpp/ttnn/operations/embedding/device/embedding_device_operation.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/cpp/ttnn/operations/embedding/embedding.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/cpp/ttnn/operations/embedding_backward/embedding_backward.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/cpp/ttnn/operations/embedding_backward/embedding_backward_pybind.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/cpp/ttnn/operations/embedding_backward/device/embedding_backward_device_operation.cpp
diff --git a/ttnn/cpp/ttnn/operations/embedding/device/kernels/dataflow/embeddings_tilize.cpp b/ttnn/cpp/ttnn/operations/embedding/device/kernels/dataflow/embeddings_tilize.cpp
index 3361b261937..a3cd6e05a01 100644
--- a/ttnn/cpp/ttnn/operations/embedding/device/kernels/dataflow/embeddings_tilize.cpp
+++ b/ttnn/cpp/ttnn/operations/embedding/device/kernels/dataflow/embeddings_tilize.cpp
@@ -95,21 +95,21 @@ void kernel_main() {
             uint64_t src_noc_addr;
             uint32_t token = input_l1_ptr[k];
             #if defined PADDED
-            if (token == pad_token) {
-                src_noc_addr = pad_noc_addr;
-            } else {
-                src_noc_addr = get_noc_addr(token, weights);
-            }
-            #elif defined BINARY
-            if (token == 0) {
-                src_noc_addr = zero_noc_addr;
-            } else {
-                src_noc_addr = one_noc_addr;
-            }
+                if (token == pad_token) {
+                    src_noc_addr = pad_noc_addr;
+                } else {
+                    src_noc_addr = get_noc_addr(token, weights);
+                }
+                #elif defined BINARY
+                if (token == 0) {
+                    src_noc_addr = zero_noc_addr;
+                } else {
+                    src_noc_addr = one_noc_addr;
+                }
             #else
                 #if defined BFP16
                 union { float f; uint32_t u; } u;
-                u.u = (uint32_t)input_l1_ptr[token_idx] << 16;
+                u.u = (uint32_t)input_l1_ptr[k] << 16;
                 uint32_t token_casted = static_cast<uint32_t>(u.f);
                 src_noc_addr = get_noc_addr(token_casted, weights);
                 #else
diff --git a/ttnn/cpp/ttnn/operations/embedding/embedding.cpp b/ttnn/cpp/ttnn/operations/embedding/embedding.cpp
new file mode 100644
index 00000000000..8be8ab3d8c3
--- /dev/null
+++ b/ttnn/cpp/ttnn/operations/embedding/embedding.cpp
@@ -0,0 +1,85 @@
+// SPDX-FileCopyrightText: © 2023 Tenstorrent Inc.
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#include "ttnn/operations/embedding/embedding.hpp"
+#include "ttnn/operations/core/core.hpp"
+#include "ttnn/common/constants.hpp"
+#include "ttnn/operations/embedding/device/embedding_device_operation.hpp"
+#include "ttnn/run_operation.hpp"
+#include "ttnn/operations/data_movement/unsqueeze/unsqueeze.hpp"
+
+namespace ttnn::operations::embedding{
+
+ttnn::Tensor EmbeddingOperation::invoke(
+    uint8_t queue_id,
+    const Tensor& input_tensor_arg,
+    const Tensor& weight_arg,
+    const std::optional<int>& pad_token,
+    const std::optional<ttnn::Layout>& layout,
+    EmbeddingsType embeddings_type,
+    const std::optional<const DataType> dtype,
+    const std::optional<MemoryConfig>& memory_config,
+    std::optional<Tensor> optional_output_tensor) {
+    if (pad_token.has_value()) {
+        embeddings_type = EmbeddingsType::PADDED;
+    }
+    Tensor mutable_input_tensor = input_tensor_arg;
+    Tensor mutable_weight = weight_arg;
+
+    // TODO: Add support for indices tensor in tile layout
+    // Issue #: 14915
+    TT_FATAL(input_tensor_arg.get_layout() == ttnn::ROW_MAJOR_LAYOUT, "Indices tensor must be in row major layout.");
+
+    if (mutable_weight.get_layout() == ttnn::TILE_LAYOUT) {
+        mutable_weight = ttnn::to_layout(mutable_weight, ttnn::ROW_MAJOR_LAYOUT, std::nullopt, std::nullopt, mutable_weight.device());
+    }
+    auto hidden_embedding_dim = mutable_weight.get_shape()[-1];
+    auto padded_hidden_embedding_dim = mutable_weight.get_shape().with_tile_padding()[-1];
+    auto weight = ttnn::unsqueeze_to_4D(mutable_weight);
+
+    auto batch_size = mutable_input_tensor.get_shape()[0];
+    auto sentence_size = mutable_input_tensor.get_shape()[-1];
+    auto input_tensor =
+        ttnn::reshape(mutable_input_tensor, ttnn::Shape{std::array<uint32_t, 4>{batch_size, 1, 1, sentence_size}});
+
+    // If layout is row major, OR if the input tensor is not a multiple of TILE_HEIGHT, then we cannot use tilized
+    bool fused_tilized = false;
+    if(input_tensor.get_legacy_shape()[-1] % TILE_HEIGHT == 0 &&
+        weight.get_legacy_shape()[-1] % TILE_WIDTH == 0){
+        if(layout.has_value()){
+            if(layout.value() == ttnn::TILE_LAYOUT) fused_tilized = true;
+        }
+        else if(weight_arg.get_layout() == ttnn::TILE_LAYOUT){
+            fused_tilized = true;
+        }
+    }
+
+    auto embeddings = operation::run(
+                            Embeddings{
+                                .output_mem_config = memory_config.value_or(input_tensor.memory_config()),
+                                .tilized = fused_tilized,
+                                .embeddings_type = embeddings_type,
+                                .pad_token = pad_token,
+                                .output_dtype = dtype.value_or(weight.get_dtype())},
+                            {input_tensor, weight})
+                            .at(0);
+    embeddings = ttnn::reshape(
+        embeddings, ttnn::Shape{std::array<uint32_t, 3>{batch_size, sentence_size, hidden_embedding_dim}});
+    embeddings = ttnn::to_layout(embeddings, layout.value_or(weight_arg.get_layout()), std::nullopt, std::nullopt, (Device*)nullptr);
+    return embeddings;
+}
+ttnn::Tensor EmbeddingOperation::invoke(
+    const Tensor& input_tensor_arg,
+    const Tensor& weight_arg,
+    const std::optional<int>& pad_token,
+    const std::optional<ttnn::Layout>& layout,
+    EmbeddingsType embeddings_type,
+    const std::optional<const DataType> dtype,
+    const std::optional<MemoryConfig>& memory_config,
+    std::optional<Tensor> optional_output_tensor
+    ) {
+    return invoke(DefaultQueueId, input_tensor_arg, weight_arg, pad_token, layout, embeddings_type, dtype, memory_config, optional_output_tensor);
+}
+
+}  // namespace ttnn::operations::embedding
diff --git a/ttnn/cpp/ttnn/operations/embedding/embedding.hpp b/ttnn/cpp/ttnn/operations/embedding/embedding.hpp
index 52439fd693d..03679ffd40e 100644
--- a/ttnn/cpp/ttnn/operations/embedding/embedding.hpp
+++ b/ttnn/cpp/ttnn/operations/embedding/embedding.hpp
@@ -4,11 +4,8 @@
 
 #pragma once
 
-#include "ttnn/common/constants.hpp"
 #include "ttnn/operations/embedding/device/embedding_device_operation.hpp"
-#include "ttnn/run_operation.hpp"
 #include "ttnn/decorators.hpp"
-#include "ttnn/operations/core/core.hpp"
 
 namespace ttnn {
 
@@ -17,56 +14,25 @@ namespace operations {
 namespace embedding {
 
 struct EmbeddingOperation {
-    static inline Tensor invoke(
+    static ttnn::Tensor invoke(
         uint8_t queue_id,
         const Tensor& input_tensor_arg,
         const Tensor& weight_arg,
         const std::optional<int>& pad_token = std::nullopt,
-        const Layout& layout = ttnn::ROW_MAJOR_LAYOUT,
+        const std::optional<Layout>& layout = std::nullopt,
         EmbeddingsType embeddings_type = EmbeddingsType::GENERIC,
         const std::optional<const DataType> dtype = std::nullopt,
         const std::optional<MemoryConfig>& memory_config = std::nullopt,
-        std::optional<Tensor> optional_output_tensor = std::nullopt) {
-        if (pad_token.has_value()) {
-            embeddings_type = EmbeddingsType::PADDED;
-        }
-
-        auto hidden_embedding_dim = weight_arg.get_shape()[-1];
-        auto padded_hidden_embedding_dim = weight_arg.get_shape().with_tile_padding()[-1];
-        auto weight = ttnn::unsqueeze_to_4D(weight_arg);
-
-        auto batch_size = input_tensor_arg.get_shape()[0];
-        auto sentence_size = input_tensor_arg.get_shape()[-1];
-        auto input_tensor =
-            ttnn::reshape(input_tensor_arg, ttnn::SimpleShape{std::array<uint32_t, 4>{batch_size, 1, 1, sentence_size}});
-
-        bool tilized = layout == ttnn::TILE_LAYOUT;
-        auto embeddings = operation::run(
-                              Embeddings{
-                                  .output_mem_config = memory_config.value_or(input_tensor.memory_config()),
-                                  .tilized = tilized,
-                                  .embeddings_type = embeddings_type,
-                                  .pad_token = pad_token,
-                                  .output_dtype = dtype.value_or(weight.get_dtype())},
-                              {input_tensor, weight})
-                              .at(0);
-        embeddings = ttnn::reshape(
-            embeddings, ttnn::SimpleShape{std::array<uint32_t, 3>{batch_size, sentence_size, hidden_embedding_dim}});
-        return embeddings;
-    }
-
-    static inline auto invoke(
+        std::optional<Tensor> optional_output_tensor = std::nullopt);
+    static ttnn::Tensor invoke(
         const Tensor& input_tensor_arg,
         const Tensor& weight_arg,
         const std::optional<int>& pad_token = std::nullopt,
-        const Layout& layout = ttnn::ROW_MAJOR_LAYOUT,
+        const std::optional<Layout>& layout = std::nullopt,
         EmbeddingsType embeddings_type = EmbeddingsType::GENERIC,
         const std::optional<const DataType> dtype = std::nullopt,
         const std::optional<MemoryConfig>& memory_config = std::nullopt,
-        std::optional<Tensor> optional_output_tensor = std::nullopt
-        ) {
-            return invoke(DefaultQueueId, input_tensor_arg, weight_arg, pad_token, layout, embeddings_type, dtype, memory_config, optional_output_tensor);
-        }
+        std::optional<Tensor> optional_output_tensor = std::nullopt);
 };
 
 }  // namespace embedding
diff --git a/ttnn/cpp/ttnn/operations/embedding/embedding_pybind.hpp b/ttnn/cpp/ttnn/operations/embedding/embedding_pybind.hpp
index dbd2f167c5b..49fa7769122 100644
--- a/ttnn/cpp/ttnn/operations/embedding/embedding_pybind.hpp
+++ b/ttnn/cpp/ttnn/operations/embedding/embedding_pybind.hpp
@@ -40,7 +40,7 @@ void py_module(py::module& module) {
 
 
         Returns:
-            ttnn.Tensor: the output tensor.
+            ttnn.Tensor: the output tensor of layout == layout or layout of the weights tensor.
 
 
         Example:
@@ -69,7 +69,7 @@ void py_module(py::module& module) {
             const ttnn::Tensor& input_tensor,
             const ttnn::Tensor& weight,
             const std::optional<int>& padding_idx,
-            const ttnn::Layout& layout,
+            const std::optional<ttnn::Layout>& layout,
             EmbeddingsType embeddings_type,
             const std::optional<const DataType> dtype,
             std::optional<ttnn::Tensor> &optional_output_tensor,
@@ -81,7 +81,7 @@ void py_module(py::module& module) {
             py::arg("weight").noconvert(),
             py::kw_only(),
             py::arg("padding_idx") = std::nullopt,
-            py::arg("layout") = ttnn::ROW_MAJOR_LAYOUT,
+            py::arg("layout") = std::nullopt,
             py::arg("embeddings_type").noconvert() = EmbeddingsType::GENERIC,
             py::arg("dtype").noconvert() = std::nullopt,
             py::arg("output_tensor").noconvert() = std::nullopt,

From 9387e7b208e67eceebba13fc3c32f3861422737a Mon Sep 17 00:00:00 2001
From: Aswin Zayasankaran <156493059+Aswinmcw@users.noreply.github.com>
Date: Tue, 12 Nov 2024 11:50:25 +0530
Subject: [PATCH 04/69] #14406: Add CCL Perf tests to pipeline  (#14836)

### Ticket
#14406

### Problem description
Need CCL Perf to be triggered in pipeline

### What's changed
Adds CCL Perf tests to T3K perf pipeline

T3K Model Perf -
https://github.com/tenstorrent/tt-metal/actions/runs/11718145347/job/32639080900
Perf Artifact -
https://github.com/tenstorrent/tt-metal/actions/runs/11718145347/job/32639080900#step:13:33

### Checklist
- [ ] Post commit CI passes
- [ ] Blackhole Post commit (if applicable)
- [ ] Model regression CI testing passes (if applicable)
- [ ] Device performance regression CI testing passes (if applicable)
- [ ] New/Existing tests provide coverage for changes
---
 .../t3000-model-perf-tests-impl.yaml          | 41 ++++++++++++++++---
 .github/workflows/t3000-model-perf-tests.yaml |  8 +++-
 .../t3000/run_t3000_model_perf_tests.sh       | 29 ++++++++++++-
 .../operations/ccl/perf/perf_csv.py           |  9 ++--
 .../ccl/perf/run_all_gather_profile.sh        | 30 +++++++++-----
 .../ccl/perf/run_reduce_scatter_profile.sh    | 30 +++++++++-----
 6 files changed, 116 insertions(+), 31 deletions(-)

diff --git a/.github/workflows/t3000-model-perf-tests-impl.yaml b/.github/workflows/t3000-model-perf-tests-impl.yaml
index 91e208c214b..c104d01fbaa 100644
--- a/.github/workflows/t3000-model-perf-tests-impl.yaml
+++ b/.github/workflows/t3000-model-perf-tests-impl.yaml
@@ -22,6 +22,7 @@ jobs:
           { name: "t3k LLM llama3 model perf tests", model: "llama3", model-type: "LLM", arch: wormhole_b0, cmd: run_t3000_llama3_tests, timeout: 60, owner_id: U03PUAKE719}, # Miguel Tairum
           { name: "t3k LLM falcon40b model perf tests", model: "falcon40b", model-type: "LLM", arch: wormhole_b0, cmd: run_t3000_falcon40b_tests, timeout: 75, owner_id: U053W15B6JF}, # Djordje Ivanovic
           { name: "t3k CNN resnet50 model perf tests", model: "resnet50", model-type: "CNN", arch: wormhole_b0, cmd: run_t3000_resnet50_tests, timeout: 75, owner_id: U013121KDH9}, # Austin Ho
+          { name: "t3k CCL all_gather perf tests", arch: wormhole_b0, cmd: run_t3000_ccl_all_gather_perf_tests, timeout: 75, tracy: true, owner_id: ULMEPM2MA}, # Sean Nijjar
           #{ name: "t3k CNN model perf tests ", model-type: "CNN", arch: wormhole_b0, cmd: run_t3000_cnn_tests, timeout: 120, owner_id: }, #No tests are being run?
         ]
     name: ${{ matrix.test-group.name }}
@@ -45,13 +46,25 @@ jobs:
         run: |
           echo "TT_METAL_HOME=$(pwd)" >> $GITHUB_ENV
           echo "PYTHONPATH=$(pwd)" >> $GITHUB_ENV
-      - uses: actions/download-artifact@v4
+      - name: Download profiler build artifact
+        id: download-profiler-artifact
+        if: ${{ matrix.test-group.tracy }}
+        uses: actions/download-artifact@v4
+        with:
+          name: TTMetal_build_${{ matrix.test-group.arch }}_profiler
+        continue-on-error: true
+      - name: Download build artifact
+        id: download-artifact
+        if: ${{ !matrix.test-group.tracy }}
+        uses: actions/download-artifact@v4
         with:
           name: TTMetal_build_${{ matrix.test-group.arch }}
       - name: Extract files
+        if: ${{ matrix.test-group.tracy && steps.download-profiler-artifact.outcome == 'success' || !matrix.test-group.tracy }}
         run: tar -xvf ttm_${{ matrix.test-group.arch }}.tar
       - uses: ./.github/actions/install-python-deps
       - name: Run model perf regression tests
+        if: ${{ matrix.test-group.tracy && steps.download-profiler-artifact.outcome == 'success' || !matrix.test-group.tracy }}
         shell: bash {0}
         timeout-minutes: ${{ matrix.test-group.timeout }}
         run: |
@@ -63,12 +76,28 @@ jobs:
           env python models/perf/merge_perf_results.py
       - name: Check perf report exists
         id: check-perf-report
-        if: ${{ !cancelled() }}
+        if: ${{ !cancelled() && (matrix.test-group.tracy && steps.download-profiler-artifact.outcome == 'success' || !matrix.test-group.tracy) }}
         run: |
-          ls -hal
-          export PERF_REPORT_FILENAME="Models_Perf_$(date +%Y_%m_%d).csv"
-          ls -hal $PERF_REPORT_FILENAME
-          echo "perf_report_filename=$PERF_REPORT_FILENAME" >> "$GITHUB_OUTPUT"
+          TODAY=$(date +%Y_%m_%d)
+          PERF_REPORT_FILENAME_MODELS="Models_Perf_${TODAY}.csv"
+          PERF_REPORT_FILENAME_CCL="CCL_Perf_${TODAY}.csv"
+          if [ "${{ matrix.test-group.tracy }}" == "true" ]; then
+            if [ -f "$PERF_REPORT_FILENAME_CCL" ]; then
+              echo "Found CCL Perf report: $PERF_REPORT_FILENAME_CCL"
+              echo "perf_report_filename=$PERF_REPORT_FILENAME_CCL" >> "$GITHUB_OUTPUT"
+            else
+              echo "No CCL perf report found for today."
+              exit 1
+            fi
+          else
+            if [ -f "$PERF_REPORT_FILENAME_MODELS" ]; then
+              echo "Found Models Perf report: $PERF_REPORT_FILENAME_MODELS"
+              echo "perf_report_filename=$PERF_REPORT_FILENAME_MODELS" >> "$GITHUB_OUTPUT"
+            else
+              echo "No Models perf report found for today."
+              exit 1
+            fi
+          fi
       - name: Upload perf report
         if: ${{ !cancelled() && steps.check-perf-report.conclusion == 'success' }}
         uses: actions/upload-artifact@v4
diff --git a/.github/workflows/t3000-model-perf-tests.yaml b/.github/workflows/t3000-model-perf-tests.yaml
index 0a8759af27c..15d96746889 100644
--- a/.github/workflows/t3000-model-perf-tests.yaml
+++ b/.github/workflows/t3000-model-perf-tests.yaml
@@ -11,7 +11,13 @@ jobs:
     with:
       arch: '["wormhole_b0"]'
     secrets: inherit
+  build-artifact-profiler:
+    uses: ./.github/workflows/build-artifact.yaml
+    with:
+      arch: '["wormhole_b0"]'
+      tracy: true
+    secrets: inherit
   t3000-model-perf-tests:
-    needs: build-artifact
+    needs: [build-artifact, build-artifact-profiler]
     secrets: inherit
     uses: ./.github/workflows/t3000-model-perf-tests-impl.yaml
diff --git a/tests/scripts/t3000/run_t3000_model_perf_tests.sh b/tests/scripts/t3000/run_t3000_model_perf_tests.sh
index 70baaa85ae3..19a54d710b1 100755
--- a/tests/scripts/t3000/run_t3000_model_perf_tests.sh
+++ b/tests/scripts/t3000/run_t3000_model_perf_tests.sh
@@ -142,6 +142,25 @@ run_t3000_resnet50_tests() {
   fi
 }
 
+run_t3000_ccl_all_gather_perf_tests() {
+  # Record the start time
+  fail=0
+  start_time=$(date +%s)
+
+  echo "LOG_METAL: Running run_t3000_ccl_all_gather_perf_tests"
+
+  tests/ttnn/unit_tests/operations/ccl/perf/run_all_gather_profile.sh -t t3000
+  fail+=$?
+
+  # Record the end time
+  end_time=$(date +%s)
+  duration=$((end_time - start_time))
+  echo "LOG_METAL: run_t3000_ccl_all_gather_perf_tests $duration seconds to complete"
+  if [[ $fail -ne 0 ]]; then
+    exit 1
+  fi
+}
+
 run_t3000_llm_tests() {
   # Run falcon7b tests
   run_t3000_falcon7b_tests
@@ -173,6 +192,12 @@ run_t3000_cnn_tests() {
   env python models/perf/merge_perf_results.py
 }
 
+run_t3000_ccl_tests() {
+  # Run ccl performance tests
+  run_t3000_ccl_all_gather_perf_tests
+
+}
+
 fail=0
 main() {
   # For CI pipeline - source func commands but don't execute tests if not invoked directly
@@ -219,8 +244,10 @@ main() {
     run_t3000_llm_tests
   elif [[ "$pipeline_type" == "cnn_model_perf_t3000_device" ]]; then
     run_t3000_cnn_tests
+  elif [[ "$pipeline_type" == "ccl_perf_t3000_device" ]]; then
+    run_t3000_ccl_tests
   else
-    echo "$pipeline_type is invalid (supported: [cnn_model_perf_t3000_device, cnn_model_perf_t3000_device])" 2>&1
+    echo "$pipeline_type is invalid (supported: [cnn_model_perf_t3000_device, cnn_model_perf_t3000_device, ccl_perf_t3000_device])" 2>&1
     exit 1
   fi
 
diff --git a/tests/ttnn/unit_tests/operations/ccl/perf/perf_csv.py b/tests/ttnn/unit_tests/operations/ccl/perf/perf_csv.py
index 31f4636aa66..3d5cc2aaeb5 100644
--- a/tests/ttnn/unit_tests/operations/ccl/perf/perf_csv.py
+++ b/tests/ttnn/unit_tests/operations/ccl/perf/perf_csv.py
@@ -5,6 +5,7 @@
 import pandas as pd
 import os
 import re
+import time
 
 
 def perf_report(file_path):
@@ -214,10 +215,12 @@ def calculate_bandwidth(row):
 
     averages_df = pd.DataFrame(averages_data)
 
-    averages_file_path = file_path.replace(".csv", "_averages.csv")
+    today = time.strftime("%Y_%m_%d")
+    ccl_perf_file_path = f"CCL_Perf_{today}.csv"
+    os.rename(file_path, ccl_perf_file_path)
 
-    averages_df.to_csv(averages_file_path, index=False)
+    averages_df.to_csv(ccl_perf_file_path, index=False)
 
-    print(f"Averages CSV saved to: {averages_file_path}")
+    print(f"CCL Perf report CSV saved to: {ccl_perf_file_path}")
 
     return averages_df
diff --git a/tests/ttnn/unit_tests/operations/ccl/perf/run_all_gather_profile.sh b/tests/ttnn/unit_tests/operations/ccl/perf/run_all_gather_profile.sh
index 8422bde56d0..0e714429b88 100755
--- a/tests/ttnn/unit_tests/operations/ccl/perf/run_all_gather_profile.sh
+++ b/tests/ttnn/unit_tests/operations/ccl/perf/run_all_gather_profile.sh
@@ -72,24 +72,34 @@ run_profile_and_extract_csv() {
 
     if [ -n "$csv_path" ]; then
         echo "CSV path found: $csv_path"
+        echo "Generating performance report..."
 
-        # Run the Python script to generate performance report
-        average_values=$(PYTHONPATH="$MODULE_DIR" python3 -c "
+        tmp_file="/tmp/perf_report_output.log"
+        PYTHONPATH="$MODULE_DIR" python3 -c "
+import sys
 import pandas as pd
 from perf_csv import perf_report
 from tabulate import tabulate
 
-# Generate the report and convert it to a DataFrame
-average_df = perf_report('$csv_path')
-# Print the DataFrame in a pretty table format
-print(tabulate(average_df, headers='keys', tablefmt='pretty'))
-")
+try:
+    # Generate the report and convert it to a DataFrame
+    average_df = perf_report('$csv_path')
+    # Print the DataFrame in a pretty table format
+    print('Min - Avg - Max by Common Runs:')
+    print(tabulate(average_df, headers='keys', tablefmt='pretty'))
+except Exception as e:
+    print(f'Error in performance report generation: {e}', file=sys.stderr)
+    sys.exit(1)
+" 2>&1 | tee "$tmp_file"
+
+        if grep -q "Error in performance report generation" "$tmp_file"; then
+            echo "Error: Performance report generation failed."
+            exit 1
+        fi
 
-        # Print the output
-        echo "Min - Avg - Max by Common Runs:"
-        echo "$average_values"
     else
         echo "CSV path not found in the command output."
+        exit 1
     fi
 }
 
diff --git a/tests/ttnn/unit_tests/operations/ccl/perf/run_reduce_scatter_profile.sh b/tests/ttnn/unit_tests/operations/ccl/perf/run_reduce_scatter_profile.sh
index 23071225ac1..2f054ca348c 100755
--- a/tests/ttnn/unit_tests/operations/ccl/perf/run_reduce_scatter_profile.sh
+++ b/tests/ttnn/unit_tests/operations/ccl/perf/run_reduce_scatter_profile.sh
@@ -72,24 +72,34 @@ run_profile_and_extract_csv() {
 
     if [ -n "$csv_path" ]; then
         echo "CSV path found: $csv_path"
+        echo "Generating performance report..."
 
-        # Run the Python script to generate performance report
-        average_values=$(PYTHONPATH="$MODULE_DIR" python3 -c "
+        tmp_file="/tmp/perf_report_output.log"
+        PYTHONPATH="$MODULE_DIR" python3 -c "
+import sys
 import pandas as pd
 from perf_csv import perf_report
 from tabulate import tabulate
 
-# Generate the report and convert it to a DataFrame
-average_df = perf_report('$csv_path')
-# Print the DataFrame in a pretty table format
-print(tabulate(average_df, headers='keys', tablefmt='pretty'))
-")
+try:
+    # Generate the report and convert it to a DataFrame
+    average_df = perf_report('$csv_path')
+    # Print the DataFrame in a pretty table format
+    print('Min - Avg - Max by Common Runs:')
+    print(tabulate(average_df, headers='keys', tablefmt='pretty'))
+except Exception as e:
+    print(f'Error in performance report generation: {e}', file=sys.stderr)
+    sys.exit(1)
+" 2>&1 | tee "$tmp_file"
+
+        if grep -q "Error in performance report generation" "$tmp_file"; then
+            echo "Error: Performance report generation failed."
+            exit 1
+        fi
 
-        # Print the output
-        echo "Min - Avg - Max by Common Runs:"
-        echo "$average_values"
     else
         echo "CSV path not found in the command output."
+        exit 1
     fi
 }
 

From 4c3aef1233cdad4830c86731b6401171f9a8ea21 Mon Sep 17 00:00:00 2001
From: umadevimcw <umadevi@multicorewareinc.com>
Date: Fri, 8 Nov 2024 09:55:30 +0000
Subject: [PATCH 05/69] #14879: Remove unnecessary usage of creation ops

---
 .../eltwise/binary/device/binary_composite_op.cpp  | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/ttnn/cpp/ttnn/operations/eltwise/binary/device/binary_composite_op.cpp b/ttnn/cpp/ttnn/operations/eltwise/binary/device/binary_composite_op.cpp
index b447a6c8d67..3821e67304f 100644
--- a/ttnn/cpp/ttnn/operations/eltwise/binary/device/binary_composite_op.cpp
+++ b/ttnn/cpp/ttnn/operations/eltwise/binary/device/binary_composite_op.cpp
@@ -32,7 +32,7 @@ Tensor _hypot(const Tensor& input_a, const Tensor& input_b, const std::optional<
 
 // xlogy(x,y)=x*log(y)
 Tensor _xlogy(const Tensor& input_a, const Tensor& input_b, const std::optional<MemoryConfig>& output_mem_config) {
-    Tensor t_nan = ttnn::full_like(input_b, std::nanf(" "));
+    float t_nan = std::nanf(" ");
     Tensor result = ttnn::multiply(input_a, ttnn::log(input_b, output_mem_config), std::nullopt, output_mem_config);
     result = ttnn::where(
         ttnn::logical_or(
@@ -254,7 +254,7 @@ Tensor ExecuteDiv::invoke(const Tensor& input_a, const Tensor& input_b, bool acc
 
 Tensor _div_no_nan_overload(const Tensor& input_a, float value, const std::optional<MemoryConfig>& output_mem_config) {
     if (value == 0)
-        return ttnn::full_like(input_a, 0.0f);
+        return ttnn::zeros_like(input_a);
     else
         return ttnn::multiply(input_a, (1.0f/value));
 }
@@ -274,7 +274,7 @@ Tensor ExecuteBinaryRemainder::invoke(const Tensor& input_a, const Tensor& input
     Tensor result = ttnn::subtract(a, ttnn::multiply(b, ttnn::div(input_a, input_b, true, "floor", output_mem_config), std::nullopt, output_mem_config), std::nullopt, output_mem_config);
     result = ttnn::where(ttnn::ge(result, b), ttnn::subtract(result, b), result);
     result = ttnn::where(ttnn::ltz(b), ttnn::add(result, b), result);
-    result = ttnn::where(ttnn::eq(a, b, std::nullopt, output_mem_config), ttnn::full_like(input_a, 0.0f), result);
+    result = ttnn::where(ttnn::eq(a, b, std::nullopt, output_mem_config), 0.0f, result);
     return typecast(result, input_dtype);
 }
 
@@ -291,7 +291,7 @@ Tensor ExecuteBinaryFmod::invoke(const Tensor& input_a, const Tensor& input_b, c
     Tensor b = typecast(input_b, DataType::FLOAT32);
     Tensor div_res = typecast(ttnn::div(input_a, input_b, true, "trunc", output_mem_config), DataType::FLOAT32);
     Tensor result = ttnn::subtract(a, ttnn::multiply(div_res, b, std::nullopt, output_mem_config), std::nullopt, output_mem_config);
-    result = ttnn::where(ttnn::eq(a, b, std::nullopt, output_mem_config), ttnn::full_like(input_a, 0.0f), result);
+    result = ttnn::where(ttnn::eq(a, b, std::nullopt, output_mem_config), 0.0f, result);
     return typecast(result, input_dtype);
 }
 
@@ -303,12 +303,12 @@ Tensor _floor_div_overload(const Tensor& input_a, float value, const std::option
     auto arch = input_a.device()->arch();
     TT_FATAL(arch == tt::ARCH::WORMHOLE_B0, "Op is only supported on Wormhole");
     if (value == 0) {
-        Tensor t_inf = ttnn::full_like(input_a, std::numeric_limits<float>::infinity());
-        Tensor t_nan = ttnn::full_like(input_a, std::nanf(""));
+        float t_inf = std::numeric_limits<float>::infinity();
+        float t_nan = std::nanf("");
         return ttnn::where(
             ttnn::eqz(input_a, output_mem_config),
             t_nan,
-            ttnn::multiply(t_inf, ttnn::sign(input_a, output_mem_config), std::nullopt, output_mem_config));
+            ttnn::multiply(ttnn::sign(input_a, output_mem_config), t_inf, std::nullopt, output_mem_config));
     }
     Tensor temp = ttnn::multiply(input_a, (1.0f/value), std::nullopt, output_mem_config);
     return ttnn::floor(temp);

From e37271f61923e1672ebfa020b23e4388929e0754 Mon Sep 17 00:00:00 2001
From: umadevimcw <umadevi@multicorewareinc.com>
Date: Fri, 8 Nov 2024 10:32:40 +0000
Subject: [PATCH 06/69] #14880: Remove uncessary usage of creation ops

---
 .../ttnn/operations/eltwise/ternary/ternary_composite_op.cpp  | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/ttnn/cpp/ttnn/operations/eltwise/ternary/ternary_composite_op.cpp b/ttnn/cpp/ttnn/operations/eltwise/ternary/ternary_composite_op.cpp
index 65887f3cd80..0db768081eb 100644
--- a/ttnn/cpp/ttnn/operations/eltwise/ternary/ternary_composite_op.cpp
+++ b/ttnn/cpp/ttnn/operations/eltwise/ternary/ternary_composite_op.cpp
@@ -37,7 +37,7 @@ Tensor _addcdiv(
     Tensor t_factor = ttnn::multiply(t_div, value, std::nullopt, output_mem_config);
     t_div.deallocate();
     Tensor result = ttnn::add(input_a, t_factor, std::nullopt, output_mem_config);
-    Tensor t_inf = ttnn::full_like(input_a, std::numeric_limits<float>::infinity());
+    float t_inf = std::numeric_limits<float>::infinity();
     Tensor t_nan = ttnn::full_like(input_a, std::nanf(""));
     return ttnn::where(
         ttnn::eqz(input_c, output_mem_config),
@@ -45,7 +45,7 @@ Tensor _addcdiv(
                      : ttnn::where(
                            ttnn::eqz(input_b, output_mem_config),
                            t_nan,
-                           ttnn::multiply(t_inf, ttnn::sign(input_b, output_mem_config), std::nullopt, output_mem_config)),
+                           ttnn::multiply(ttnn::sign(input_b, output_mem_config), t_inf, std::nullopt, output_mem_config)),
         result,
         output_mem_config);
 }

From 9db5fb5fd548632a5d8f335d882e03104552888d Mon Sep 17 00:00:00 2001
From: umadevimcw <umadevi@multicorewareinc.com>
Date: Mon, 11 Nov 2024 06:53:21 +0000
Subject: [PATCH 07/69] #14928: Remove unnecessary usage of creation op

---
 .../eltwise/ternary_backward/ternary_backward.cpp         | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/ttnn/cpp/ttnn/operations/eltwise/ternary_backward/ternary_backward.cpp b/ttnn/cpp/ttnn/operations/eltwise/ternary_backward/ternary_backward.cpp
index e6c7a9b4a80..8badc5dff9a 100644
--- a/ttnn/cpp/ttnn/operations/eltwise/ternary_backward/ternary_backward.cpp
+++ b/ttnn/cpp/ttnn/operations/eltwise/ternary_backward/ternary_backward.cpp
@@ -41,8 +41,8 @@ std::vector<Tensor> AddcdivBackwardOperation::invoke(
     const MemoryConfig& output_mem_config) {
     std::vector<Tensor> grad_tensor;
     grad_tensor.emplace_back(grad);
-    Tensor t_inf = ttnn::operations::creation::full_like(input, std::numeric_limits<float>::infinity(), input.get_dtype(), input.get_layout(), std::nullopt, output_mem_config);
-    Tensor t_nan = ttnn::operations::creation::full_like(input, std::nanf(""), input.get_dtype(), input.get_layout(), std::nullopt, output_mem_config);
+    float t_inf = std::numeric_limits<float>::infinity();
+    float t_nan = std::nanf("");
     Tensor grad_a = ttnn::multiply(ttnn::multiply(grad, value, std::nullopt, output_mem_config), ttnn::reciprocal(tensor2, output_mem_config));
     grad_tensor.emplace_back(where(
         ttnn::eqz(tensor2, output_mem_config),
@@ -55,7 +55,7 @@ std::vector<Tensor> AddcdivBackwardOperation::invoke(
         ttnn::multiply(tmp, ttnn::reciprocal(ttnn::square(tensor2, output_mem_config), output_mem_config), std::nullopt, output_mem_config);
     grad_tensor.emplace_back(where(
         ttnn::eqz(tensor2, output_mem_config),
-        where(ttnn::eqz(grad, output_mem_config), t_nan, ttnn::neg(t_inf, output_mem_config), output_mem_config),
+        where(ttnn::eqz(grad, output_mem_config), t_nan, -t_inf, output_mem_config),
         grad_b,
         output_mem_config));
     return grad_tensor;
@@ -99,7 +99,7 @@ std::vector<OptionalTensor> WhereBackwardOperation::invoke(
 std::vector<Tensor> LerpBackwardOperation::invoke(
     const Tensor& grad, const Tensor& input, const Tensor& end, const Tensor& weight, const std::optional<MemoryConfig>& output_mem_config) {
     std::vector<Tensor> grad_tensor;
-    Tensor result_1 = ttnn::multiply(grad, ttnn::subtract(ttnn::operations::creation::full_like(weight, 1.0), weight, std::nullopt, output_mem_config), std::nullopt, output_mem_config);
+    Tensor result_1 = ttnn::multiply(grad, ttnn::rsub(weight, 1.0, output_mem_config), std::nullopt, output_mem_config);
     grad_tensor.emplace_back(result_1);
     Tensor result_2 = ttnn::multiply(grad, weight, std::nullopt, output_mem_config);
     grad_tensor.emplace_back(result_2);

From becbf96c9d885768b7b207146e997cb853bb906c Mon Sep 17 00:00:00 2001
From: umadevimcw <umadevi@multicorewareinc.com>
Date: Mon, 11 Nov 2024 07:52:58 +0000
Subject: [PATCH 08/69] #14930: Remove unnecessary usage of creation ops

---
 .../device/complex_unary_backward_op.cpp           | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/ttnn/cpp/ttnn/operations/eltwise/complex_unary_backward/device/complex_unary_backward_op.cpp b/ttnn/cpp/ttnn/operations/eltwise/complex_unary_backward/device/complex_unary_backward_op.cpp
index e39cf2ee7bb..b565bf83180 100644
--- a/ttnn/cpp/ttnn/operations/eltwise/complex_unary_backward/device/complex_unary_backward_op.cpp
+++ b/ttnn/cpp/ttnn/operations/eltwise/complex_unary_backward/device/complex_unary_backward_op.cpp
@@ -24,15 +24,15 @@ std::vector<ComplexTensor> _polar_bw(const ComplexTensor& grad, const ComplexTen
     std::vector<ComplexTensor> grad_tensor;
     ComplexTensor result = ttnn::polar(input, output_mem_config);
     Tensor abs_result = ttnn::abs(result, output_mem_config);
-    Tensor sgn_result_r = ttnn::where(ttnn::eqz(abs_result, output_mem_config), ttnn::zeros_like(result.real(), result.real().get_dtype(), result.real().get_layout(), std::nullopt, output_mem_config), ttnn::multiply(result.real(), ttnn::reciprocal(abs_result, output_mem_config), std::nullopt, output_mem_config), output_mem_config );
-    Tensor sgn_result_i = ttnn::where(ttnn::eqz(abs_result, output_mem_config), ttnn::zeros_like(result.imag(), result.imag().get_dtype(), result.imag().get_layout(), std::nullopt, output_mem_config), ttnn::multiply(result.imag(), ttnn::reciprocal(abs_result, output_mem_config), std::nullopt, output_mem_config), output_mem_config );
+    Tensor sgn_result_r = ttnn::where(ttnn::eqz(abs_result, output_mem_config), 0.0f, ttnn::multiply(result.real(), ttnn::reciprocal(abs_result, output_mem_config), std::nullopt, output_mem_config), output_mem_config );
+    Tensor sgn_result_i = ttnn::where(ttnn::eqz(abs_result, output_mem_config), 0.0f, ttnn::multiply(result.imag(), ttnn::reciprocal(abs_result, output_mem_config), std::nullopt, output_mem_config), output_mem_config );
     abs_result.deallocate();
     ComplexTensor sgn_result = ComplexTensor({ sgn_result_r, sgn_result_i });
     sgn_result_r.deallocate();
     sgn_result_i.deallocate();
     Tensor grad_abs = ttnn::real(ttnn::operations::complex_binary::_mul(ttnn::conj(grad, output_mem_config), sgn_result, output_mem_config), output_mem_config);
     sgn_result.deallocate();
-    ComplexTensor flip_tensor = ComplexTensor({ttnn::zeros_like(input.real(), input.real().get_dtype(), input.real().get_layout(), std::nullopt, output_mem_config), ttnn::full_like(input.imag(), 1.0f) });
+    ComplexTensor flip_tensor = ComplexTensor({ttnn::zeros_like(input.real(), input.real().get_dtype(), input.real().get_layout(), std::nullopt, output_mem_config), ttnn::ones_like(input.imag()) });
     Tensor grad_angle = ttnn::real(ttnn::operations::complex_binary::_mul(ttnn::conj(grad, output_mem_config), ttnn::operations::complex_binary::_mul(result, flip_tensor, output_mem_config), output_mem_config), output_mem_config);
     result.deallocate();
     flip_tensor.deallocate();
@@ -74,8 +74,8 @@ std::vector<ComplexTensor> _angle_bw(const Tensor& grad, const ComplexTensor& in
     const Tensor &inp_i = input.imag();
     Tensor condition_zero = ttnn::logical_and(ttnn::eqz(input.real(),output_mem_config), ttnn::eqz(input.imag(),output_mem_config), std::nullopt, output_mem_config);
     Tensor abs_squared = ttnn::reciprocal(ttnn::add(ttnn::square(inp_r, output_mem_config), ttnn::square(inp_i, output_mem_config), std::nullopt, output_mem_config), output_mem_config);
-    Tensor res_real = ttnn::where(condition_zero, ttnn::zeros_like(inp_r, inp_r.get_dtype(), inp_r.get_layout(), std::nullopt, output_mem_config), ttnn::multiply(grad, ttnn::multiply(ttnn::neg(inp_i, output_mem_config), abs_squared, std::nullopt, output_mem_config), std::nullopt, output_mem_config), output_mem_config);
-    Tensor res_imag = ttnn::where(condition_zero, ttnn::zeros_like(inp_i, inp_i.get_dtype(), inp_i.get_layout(), std::nullopt, output_mem_config), ttnn::multiply(grad, ttnn::multiply(inp_r, abs_squared, std::nullopt, output_mem_config), std::nullopt, output_mem_config), output_mem_config);
+    Tensor res_real = ttnn::where(condition_zero, 0.0f, ttnn::multiply(grad, ttnn::multiply(ttnn::neg(inp_i, output_mem_config), abs_squared, std::nullopt, output_mem_config), std::nullopt, output_mem_config), output_mem_config);
+    Tensor res_imag = ttnn::where(condition_zero, 0.0f, ttnn::multiply(grad, ttnn::multiply(inp_r, abs_squared, std::nullopt, output_mem_config), std::nullopt, output_mem_config), output_mem_config);
     condition_zero.deallocate();
     abs_squared.deallocate();
     ComplexTensor grad_result = ComplexTensor({res_real, res_imag});
@@ -99,8 +99,8 @@ std::vector<ComplexTensor> _conj_bw(const ComplexTensor& grad, const ComplexTens
 std::vector<ComplexTensor> _complex_abs_bw(const Tensor& grad, const ComplexTensor& input, const MemoryConfig& output_mem_config) {
     std::vector<ComplexTensor> grad_tensor;
     Tensor result = ttnn::abs(input, output_mem_config);
-    Tensor grad_inp_r = ttnn::where(ttnn::eqz(result, output_mem_config), ttnn::zeros_like(result, result.get_dtype(), result.get_layout(), std::nullopt, output_mem_config), ttnn::multiply(grad, ttnn::multiply(input.real(), ttnn::reciprocal(result, output_mem_config), std::nullopt, output_mem_config),std::nullopt, output_mem_config), output_mem_config );
-    Tensor grad_inp_i = ttnn::where(ttnn::eqz(result, output_mem_config), ttnn::zeros_like(result, result.get_dtype(), result.get_layout(), std::nullopt, output_mem_config), ttnn::multiply(grad, ttnn::multiply(input.imag(), ttnn::reciprocal(result, output_mem_config), std::nullopt, output_mem_config),std::nullopt, output_mem_config), output_mem_config );
+    Tensor grad_inp_r = ttnn::where(ttnn::eqz(result, output_mem_config), 0.0f, ttnn::multiply(grad, ttnn::multiply(input.real(), ttnn::reciprocal(result, output_mem_config), std::nullopt, output_mem_config),std::nullopt, output_mem_config), output_mem_config );
+    Tensor grad_inp_i = ttnn::where(ttnn::eqz(result, output_mem_config), 0.0f, ttnn::multiply(grad, ttnn::multiply(input.imag(), ttnn::reciprocal(result, output_mem_config), std::nullopt, output_mem_config),std::nullopt, output_mem_config), output_mem_config );
     ComplexTensor grad_inp = ComplexTensor({ grad_inp_r, grad_inp_i});
     result.deallocate();
     grad_inp_r.deallocate();

From f051a63760dce3772d9b86c8ad707804c07a342f Mon Sep 17 00:00:00 2001
From: Miguel Tairum <150826086+mtairum@users.noreply.github.com>
Date: Tue, 12 Nov 2024 10:01:26 +0000
Subject: [PATCH 09/69] #0: Update Mixtral target (#14947)

---
 models/demos/t3000/mixtral8x7b/tests/test_mixtral_perf.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/models/demos/t3000/mixtral8x7b/tests/test_mixtral_perf.py b/models/demos/t3000/mixtral8x7b/tests/test_mixtral_perf.py
index d02f236c35d..a109eeef9d0 100644
--- a/models/demos/t3000/mixtral8x7b/tests/test_mixtral_perf.py
+++ b/models/demos/t3000/mixtral8x7b/tests/test_mixtral_perf.py
@@ -142,7 +142,7 @@ def test_mixtral_model_perf(
     (
         (128, 80, 0.23),
         (1024, 80, 1.55),  # FIXME #12318
-        (1024 * 2, 80, 5.5),  # FIXME #12318
+        (1024 * 2, 80, 5.6),  # FIXME #12318
         # (1024*4, 80, 60),
         # (1024*8, 150, 80),
         # (1024*16, 150, 100),

From 24a6dbfeb7a219feeb7465b92dc9dd98b0b369a0 Mon Sep 17 00:00:00 2001
From: Le Quy Duong <160108926+DuongQLee@users.noreply.github.com>
Date: Tue, 12 Nov 2024 17:52:26 +0700
Subject: [PATCH 10/69] #14665: add new moreh_clip_grad_norm and test in ttnn
 (#14667)

### Ticket
Link to Github Issue:
https://github.com/tenstorrent/tt-metal/issues/14665

### Problem description
moreh_clip_grad_norm is in tt-eager deprecated.

### What's changed
Currently, moreh_clip_grad_norm codes contain some bugs that can't be
tested with normal test. I write a new test to ensure the output of the
new version in ttnn produces similar output with the deprecated version.
When the PR is approved, I will remove the deprecated
moreh_clip_grad_norm in tt-eager and the newly added test.

### Checklist
- [x] Post commit CI passes:
https://github.com/tenstorrent/tt-metal/actions/runs/11793801221
- [x] Blackhole Post commit (if applicable): NA
- [x] Model regression CI testing passes (if applicable): NA
- [x] Device performance regression CI testing passes (if applicable):
NA
- [x] New/Existing tests provide coverage for changes: 18 tests passed
---
 .../operations}/test_moreh_clip_grad_norm.py  |   8 +-
 ttnn/CMakeLists.txt                           |   8 +
 .../tt_dnn/op_library/CMakeLists.txt          |   9 +-
 .../moreh_clip_grad_norm_op.cpp               | 264 ------------------
 .../moreh_clip_grad_norm_op.hpp               | 100 -------
 .../moreh_clip_grad_norm_step1.cpp            | 223 ---------------
 .../moreh_clip_grad_norm_step2.cpp            | 160 -----------
 .../moreh_clip_grad_norm_step3.cpp            | 162 -----------
 .../tt_lib/csrc/operations/primary/module.hpp |  19 +-
 .../moreh_clip_grad_norm.cpp                  | 122 ++++++++
 .../moreh_clip_grad_norm.hpp                  |  29 ++
 .../moreh_clip_grad_norm_pybind.cpp           |  28 ++
 .../moreh_clip_grad_norm_pybind.hpp           |  13 +
 .../moreh_clip_grad_norm_step1_kernel.cpp     |   0
 .../reader_moreh_clip_grad_norm_step1.cpp     |   0
 .../writer_moreh_clip_grad_norm_step1.cpp     |   0
 ..._clip_grad_norm_step1_device_operation.cpp |  65 +++++
 ..._clip_grad_norm_step1_device_operation.hpp |  78 ++++++
 ...h_clip_grad_norm_step1_program_factory.cpp | 223 +++++++++++++++
 .../moreh_clip_grad_norm_step2_kernel.cpp     |   0
 .../reader_moreh_clip_grad_norm_step2.cpp     |   0
 .../writer_moreh_clip_grad_norm_step2.cpp     |   0
 ..._clip_grad_norm_step2_device_operation.cpp |  68 +++++
 ..._clip_grad_norm_step2_device_operation.hpp |  77 +++++
 ...h_clip_grad_norm_step2_program_factory.cpp | 162 +++++++++++
 .../moreh_clip_grad_norm_step3_kernel.cpp     |   0
 .../reader_moreh_clip_grad_norm_step3.cpp     |   0
 .../writer_moreh_clip_grad_norm_step3.cpp     |   0
 ..._clip_grad_norm_step3_device_operation.cpp |  61 ++++
 ..._clip_grad_norm_step3_device_operation.hpp |  75 +++++
 ...h_clip_grad_norm_step3_program_factory.cpp | 167 +++++++++++
 .../ttnn/operations/moreh/moreh_pybind.cpp    |   2 +
 ttnn/ttnn/operations/moreh.py                 |   1 +
 33 files changed, 1185 insertions(+), 939 deletions(-)
 rename tests/{tt_eager/python_api_testing/unit_testing/misc => ttnn/unit_tests/operations}/test_moreh_clip_grad_norm.py (94%)
 delete mode 100644 ttnn/cpp/ttnn/deprecated/tt_dnn/op_library/moreh_clip_grad_norm/moreh_clip_grad_norm_op.cpp
 delete mode 100644 ttnn/cpp/ttnn/deprecated/tt_dnn/op_library/moreh_clip_grad_norm/moreh_clip_grad_norm_op.hpp
 delete mode 100644 ttnn/cpp/ttnn/deprecated/tt_dnn/op_library/moreh_clip_grad_norm/moreh_clip_grad_norm_step1/moreh_clip_grad_norm_step1.cpp
 delete mode 100644 ttnn/cpp/ttnn/deprecated/tt_dnn/op_library/moreh_clip_grad_norm/moreh_clip_grad_norm_step2/moreh_clip_grad_norm_step2.cpp
 delete mode 100644 ttnn/cpp/ttnn/deprecated/tt_dnn/op_library/moreh_clip_grad_norm/moreh_clip_grad_norm_step3/moreh_clip_grad_norm_step3.cpp
 create mode 100644 ttnn/cpp/ttnn/operations/moreh/moreh_clip_grad_norm/moreh_clip_grad_norm.cpp
 create mode 100644 ttnn/cpp/ttnn/operations/moreh/moreh_clip_grad_norm/moreh_clip_grad_norm.hpp
 create mode 100644 ttnn/cpp/ttnn/operations/moreh/moreh_clip_grad_norm/moreh_clip_grad_norm_pybind.cpp
 create mode 100644 ttnn/cpp/ttnn/operations/moreh/moreh_clip_grad_norm/moreh_clip_grad_norm_pybind.hpp
 rename ttnn/cpp/ttnn/{deprecated/tt_dnn/op_library/moreh_clip_grad_norm/moreh_clip_grad_norm_step1 => operations/moreh/moreh_clip_grad_norm/moreh_clip_grad_norm_step1/device}/kernels/moreh_clip_grad_norm_step1_kernel.cpp (100%)
 rename ttnn/cpp/ttnn/{deprecated/tt_dnn/op_library/moreh_clip_grad_norm/moreh_clip_grad_norm_step1 => operations/moreh/moreh_clip_grad_norm/moreh_clip_grad_norm_step1/device}/kernels/reader_moreh_clip_grad_norm_step1.cpp (100%)
 rename ttnn/cpp/ttnn/{deprecated/tt_dnn/op_library/moreh_clip_grad_norm/moreh_clip_grad_norm_step1 => operations/moreh/moreh_clip_grad_norm/moreh_clip_grad_norm_step1/device}/kernels/writer_moreh_clip_grad_norm_step1.cpp (100%)
 create mode 100644 ttnn/cpp/ttnn/operations/moreh/moreh_clip_grad_norm/moreh_clip_grad_norm_step1/device/moreh_clip_grad_norm_step1_device_operation.cpp
 create mode 100644 ttnn/cpp/ttnn/operations/moreh/moreh_clip_grad_norm/moreh_clip_grad_norm_step1/device/moreh_clip_grad_norm_step1_device_operation.hpp
 create mode 100644 ttnn/cpp/ttnn/operations/moreh/moreh_clip_grad_norm/moreh_clip_grad_norm_step1/device/moreh_clip_grad_norm_step1_program_factory.cpp
 rename ttnn/cpp/ttnn/{deprecated/tt_dnn/op_library/moreh_clip_grad_norm/moreh_clip_grad_norm_step2 => operations/moreh/moreh_clip_grad_norm/moreh_clip_grad_norm_step2/device}/kernels/moreh_clip_grad_norm_step2_kernel.cpp (100%)
 rename ttnn/cpp/ttnn/{deprecated/tt_dnn/op_library/moreh_clip_grad_norm/moreh_clip_grad_norm_step2 => operations/moreh/moreh_clip_grad_norm/moreh_clip_grad_norm_step2/device}/kernels/reader_moreh_clip_grad_norm_step2.cpp (100%)
 rename ttnn/cpp/ttnn/{deprecated/tt_dnn/op_library/moreh_clip_grad_norm/moreh_clip_grad_norm_step2 => operations/moreh/moreh_clip_grad_norm/moreh_clip_grad_norm_step2/device}/kernels/writer_moreh_clip_grad_norm_step2.cpp (100%)
 create mode 100644 ttnn/cpp/ttnn/operations/moreh/moreh_clip_grad_norm/moreh_clip_grad_norm_step2/device/moreh_clip_grad_norm_step2_device_operation.cpp
 create mode 100644 ttnn/cpp/ttnn/operations/moreh/moreh_clip_grad_norm/moreh_clip_grad_norm_step2/device/moreh_clip_grad_norm_step2_device_operation.hpp
 create mode 100644 ttnn/cpp/ttnn/operations/moreh/moreh_clip_grad_norm/moreh_clip_grad_norm_step2/device/moreh_clip_grad_norm_step2_program_factory.cpp
 rename ttnn/cpp/ttnn/{deprecated/tt_dnn/op_library/moreh_clip_grad_norm/moreh_clip_grad_norm_step3 => operations/moreh/moreh_clip_grad_norm/moreh_clip_grad_norm_step3/device}/kernels/moreh_clip_grad_norm_step3_kernel.cpp (100%)
 rename ttnn/cpp/ttnn/{deprecated/tt_dnn/op_library/moreh_clip_grad_norm/moreh_clip_grad_norm_step3 => operations/moreh/moreh_clip_grad_norm/moreh_clip_grad_norm_step3/device}/kernels/reader_moreh_clip_grad_norm_step3.cpp (100%)
 rename ttnn/cpp/ttnn/{deprecated/tt_dnn/op_library/moreh_clip_grad_norm/moreh_clip_grad_norm_step3 => operations/moreh/moreh_clip_grad_norm/moreh_clip_grad_norm_step3/device}/kernels/writer_moreh_clip_grad_norm_step3.cpp (100%)
 create mode 100644 ttnn/cpp/ttnn/operations/moreh/moreh_clip_grad_norm/moreh_clip_grad_norm_step3/device/moreh_clip_grad_norm_step3_device_operation.cpp
 create mode 100644 ttnn/cpp/ttnn/operations/moreh/moreh_clip_grad_norm/moreh_clip_grad_norm_step3/device/moreh_clip_grad_norm_step3_device_operation.hpp
 create mode 100644 ttnn/cpp/ttnn/operations/moreh/moreh_clip_grad_norm/moreh_clip_grad_norm_step3/device/moreh_clip_grad_norm_step3_program_factory.cpp

diff --git a/tests/tt_eager/python_api_testing/unit_testing/misc/test_moreh_clip_grad_norm.py b/tests/ttnn/unit_tests/operations/test_moreh_clip_grad_norm.py
similarity index 94%
rename from tests/tt_eager/python_api_testing/unit_testing/misc/test_moreh_clip_grad_norm.py
rename to tests/ttnn/unit_tests/operations/test_moreh_clip_grad_norm.py
index 6f708c1228d..c4989099cc4 100644
--- a/tests/tt_eager/python_api_testing/unit_testing/misc/test_moreh_clip_grad_norm.py
+++ b/tests/ttnn/unit_tests/operations/test_moreh_clip_grad_norm.py
@@ -10,6 +10,7 @@
 import ttnn
 from models.utility_functions import comp_allclose_and_pcc
 from loguru import logger
+from tests.ttnn.utils_for_testing import assert_equal
 
 from tests.tt_eager.python_api_testing.unit_testing.misc.test_utils import TILE_HEIGHT, TILE_WIDTH
 
@@ -31,8 +32,7 @@ def to_npu(
 ):
     if cpu_tensor is None:
         return None
-    npu_tensor = ttnn.Tensor(cpu_tensor, npu_dtype).pad_to_tile(padding_value).to(npu_layout).to(device)
-    return npu_tensor
+    return ttnn.from_torch(cpu_tensor, npu_dtype, device=device, layout=npu_layout)
 
 
 @pytest.mark.skip(reason="assertion fails during binary op input shape comparison because of different padding")
@@ -92,7 +92,7 @@ def test_moreh_clip_grad_norm(
             input_shapes.append(input_shape)
 
         cpu_total_norm = torch.nn.utils.clip_grad_norm_(cpu_inputs, max_norm, norm_type)
-        npu_total_norm = ttnn.experimental.operations.primary.moreh_clip_grad_norm_(npu_inputs, max_norm, norm_type)
+        npu_total_norm = ttnn.operations.moreh.clip_grad_norm(npu_inputs, max_norm, norm_type)
 
         expected_total_norm = cpu_total_norm
         actual_total_norm = to_cpu(npu_total_norm, [1, 1, 1, 1])
@@ -144,7 +144,7 @@ def test_moreh_clip_grad_norm(
 
 #     # Check tt behavior
 #     try:
-#         ttnn.experimental.operations.primary.moreh_clip_grad_norm_(
+#         ttnn.operations.moreh.clip_grad_norm(
 #             [to_npu(param.grad.bfloat16(), device, npu_dtype=npu_dtype)], max_norm, norm_type, error_if_nonfinite
 #         )
 #         assert not error_if_nonfinite
diff --git a/ttnn/CMakeLists.txt b/ttnn/CMakeLists.txt
index 8ce161317e3..0a12a92e04d 100644
--- a/ttnn/CMakeLists.txt
+++ b/ttnn/CMakeLists.txt
@@ -547,6 +547,14 @@ set(ALL_TTNN_SRCS
     ${CMAKE_CURRENT_SOURCE_DIR}/cpp/ttnn/operations/index_fill/index_fill_pybind.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/cpp/ttnn/operations/index_fill/device/index_fill_device_operation.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/cpp/ttnn/operations/index_fill/device/index_fill_multi_core_factory.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/cpp/ttnn/operations/moreh/moreh_clip_grad_norm/moreh_clip_grad_norm_step1/device/moreh_clip_grad_norm_step1_device_operation.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/cpp/ttnn/operations/moreh/moreh_clip_grad_norm/moreh_clip_grad_norm_step1/device/moreh_clip_grad_norm_step1_program_factory.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/cpp/ttnn/operations/moreh/moreh_clip_grad_norm/moreh_clip_grad_norm_step2/device/moreh_clip_grad_norm_step2_device_operation.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/cpp/ttnn/operations/moreh/moreh_clip_grad_norm/moreh_clip_grad_norm_step2/device/moreh_clip_grad_norm_step2_program_factory.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/cpp/ttnn/operations/moreh/moreh_clip_grad_norm/moreh_clip_grad_norm_step3/device/moreh_clip_grad_norm_step3_device_operation.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/cpp/ttnn/operations/moreh/moreh_clip_grad_norm/moreh_clip_grad_norm_step3/device/moreh_clip_grad_norm_step3_program_factory.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/cpp/ttnn/operations/moreh/moreh_clip_grad_norm/moreh_clip_grad_norm.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/cpp/ttnn/operations/moreh/moreh_clip_grad_norm/moreh_clip_grad_norm_pybind.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/cpp/ttnn/operations/data_movement/expand/expand.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/cpp/ttnn/operations/data_movement/expand/expand_pybind.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/cpp/ttnn/operations/data_movement/expand/device/expand_rm_program_factory.cpp
diff --git a/ttnn/cpp/ttnn/deprecated/tt_dnn/op_library/CMakeLists.txt b/ttnn/cpp/ttnn/deprecated/tt_dnn/op_library/CMakeLists.txt
index f39b5296fe5..eaa0b187c62 100644
--- a/ttnn/cpp/ttnn/deprecated/tt_dnn/op_library/CMakeLists.txt
+++ b/ttnn/cpp/ttnn/deprecated/tt_dnn/op_library/CMakeLists.txt
@@ -1,9 +1,2 @@
 # We do not use GLOB here since build system won't be able to pick up changes to the file list generated
-set(TT_DNN_SRCS
-    ${CMAKE_CURRENT_SOURCE_DIR}/moreh_clip_grad_norm/moreh_clip_grad_norm_op.cpp
-    ${CMAKE_CURRENT_SOURCE_DIR}/moreh_clip_grad_norm/moreh_clip_grad_norm_step1/moreh_clip_grad_norm_step1.cpp
-    ${CMAKE_CURRENT_SOURCE_DIR}/moreh_clip_grad_norm/moreh_clip_grad_norm_step2/moreh_clip_grad_norm_step2.cpp
-    ${CMAKE_CURRENT_SOURCE_DIR}/moreh_clip_grad_norm/moreh_clip_grad_norm_step3/moreh_clip_grad_norm_step3.cpp
-    CACHE INTERNAL
-    "tt_dnn sources to reuse in ttnn build"
-)
+set(TT_DNN_SRCS CACHE INTERNAL "tt_dnn sources to reuse in ttnn build")
diff --git a/ttnn/cpp/ttnn/deprecated/tt_dnn/op_library/moreh_clip_grad_norm/moreh_clip_grad_norm_op.cpp b/ttnn/cpp/ttnn/deprecated/tt_dnn/op_library/moreh_clip_grad_norm/moreh_clip_grad_norm_op.cpp
deleted file mode 100644
index 882db9c4842..00000000000
--- a/ttnn/cpp/ttnn/deprecated/tt_dnn/op_library/moreh_clip_grad_norm/moreh_clip_grad_norm_op.cpp
+++ /dev/null
@@ -1,264 +0,0 @@
-// SPDX-FileCopyrightText: © 2023 Tenstorrent Inc.
-//
-// SPDX-License-Identifier: Apache-2.0
-
-#include <cmath>
-#include <optional>
-#include <utility>
-#include <vector>
-
-#include "ttnn/deprecated/tt_dnn/op_library/moreh_clip_grad_norm/moreh_clip_grad_norm_op.hpp"
-#include "ttnn/operations/creation.hpp"
-#include "ttnn/operations/moreh/moreh_helper_functions.hpp"
-#include "ttnn/operations/eltwise/binary/binary_composite.hpp"
-
-namespace tt {
-
-namespace operations {
-
-namespace primary {
-
-namespace {
-
-inline uint32_t get_num_device_cores(Device *device) {
-    const auto num_cores_x = static_cast<uint32_t>(device->compute_with_storage_grid_size().x);
-    const auto num_cores_y = static_cast<uint32_t>(device->compute_with_storage_grid_size().y);
-    return num_cores_x * num_cores_y;
-}
-}  // namespace
-
-std::tuple<uint32_t, float, bool> get_p_decimal_p_is_negative(float ord) {
-    auto p = std::floor(ord);
-    auto decimal = ord - p;
-    const bool p_is_negative = p < 0.0f;
-    if (p_is_negative) {
-        p = -p;
-    }
-    return std::make_tuple(static_cast<uint32_t>(p), decimal, p_is_negative);
-}
-
-void MorehClipGradNormStep1::validate(
-    const std::vector<Tensor> &input_tensors,
-    const std::vector<std::optional<const Tensor>> &optional_input_tensors) const {
-    for (const auto &input : input_tensors) {
-        ttnn::operations::check_tensor(input, "moreh_clip_grad_norm_step1", "input");
-    }
-
-    const auto &tmp_pow_sum = optional_input_tensors.at(0).value();
-    ttnn::operations::check_tensor(tmp_pow_sum, "moreh_clip_grad_norm_step1", "tmp_pow_sum");
-};
-
-std::vector<ttnn::SimpleShape> MorehClipGradNormStep1::compute_output_shapes(const std::vector<Tensor> &) const { return {}; }
-
-std::vector<Tensor> MorehClipGradNormStep1::create_output_tensors(const std::vector<Tensor> &) const { return {}; }
-
-operation::ProgramWithCallbacks MorehClipGradNormStep1::create_program(
-    const std::vector<Tensor> &input_tensors,
-    const std::vector<std::optional<const Tensor>> &optional_input_tensors,
-    std::vector<Tensor> &) const {
-    const auto &tmp_pow_sum = optional_input_tensors.at(0).value();
-    return moreh_clip_grad_norm_step1_impl(
-        input_tensors, this->norm_type, this->tile_offset_of_tmp_pow_sum, tmp_pow_sum);
-}
-
-void moreh_clip_grad_norm_step1(const std::vector<Tensor> &inputs, float norm_type, const Tensor &tmp_pow_sum) {
-    auto device = inputs.at(0).device();
-    const auto max_num_inputs = get_num_device_cores(device);
-    const auto total_num_inputs = static_cast<uint32_t>(inputs.size());
-
-    const auto num_iter = (total_num_inputs + max_num_inputs - 1) / max_num_inputs;
-
-    uint32_t tile_offset{0};
-    auto num_inputs = total_num_inputs;
-    for (uint32_t i = 0; i < num_iter; ++i) {
-        const auto num_inputs_at_this_iter = std::min(num_inputs, max_num_inputs);
-
-        std::vector<Tensor> dummy_output_tensors = {Tensor(operation::get_workers_for_op_output({tmp_pow_sum}))};
-
-        operation::launch_op(
-            [norm_type, tile_offset](
-                const std::vector<Tensor> &input_tensors,
-                const std::vector<std::optional<const Tensor>> &optional_input_tensors,
-                const std::vector<std::optional<Tensor>> &optional_output_tensors) mutable -> std::vector<Tensor> {
-                return operation::run(
-                    MorehClipGradNormStep1{.norm_type = norm_type, .tile_offset_of_tmp_pow_sum = tile_offset},
-                    input_tensors,
-                    optional_input_tensors,
-                    optional_output_tensors);
-            },
-            std::vector<Tensor>(inputs.begin() + tile_offset, inputs.begin() + tile_offset + num_inputs_at_this_iter),
-            dummy_output_tensors,
-            {tmp_pow_sum});
-
-        if (i < (num_iter - 1)) {
-            tile_offset += num_inputs_at_this_iter;
-            num_inputs -= num_inputs_at_this_iter;
-        }
-    }
-}
-
-void MorehClipGradNormStep2::validate(const std::vector<Tensor> &input_tensors) const {
-    const auto &tmp_pow_sum = input_tensors.at(0);
-    ttnn::operations::check_tensor(tmp_pow_sum, "moreh_clip_grad_norm_step2", "tmp_pow_sum");
-
-    const auto &total_norm = input_tensors.at(1);
-    ttnn::operations::check_tensor(total_norm, "moreh_clip_grad_norm_step2", "total_norm");
-}
-
-std::vector<ttnn::SimpleShape> MorehClipGradNormStep2::compute_output_shapes(const std::vector<Tensor> &) const { return {}; }
-
-std::vector<Tensor> MorehClipGradNormStep2::create_output_tensors(const std::vector<Tensor> &) const { return {}; }
-
-operation::ProgramWithCallbacks MorehClipGradNormStep2::create_program(
-    const std::vector<Tensor> &input_tensors, std::vector<Tensor> &) const {
-    const auto &tmp_pow_sum = input_tensors.at(0);
-    const auto &total_norm = input_tensors.at(1);
-    return moreh_clip_grad_norm_step2_impl(tmp_pow_sum, this->norm_type, total_norm);
-}
-
-void moreh_clip_grad_norm_step2(const Tensor &tmp_pow_sum, float norm_type, const Tensor &total_norm) {
-    std::vector<Tensor> dummy_output_tensors = {
-        Tensor(operation::get_workers_for_op_output({tmp_pow_sum, total_norm}))};
-
-    operation::launch_op(
-        [norm_type](
-            const std::vector<Tensor> &input_tensors,
-            const std::vector<std::optional<const Tensor>> &optional_input_tensors,
-            const std::vector<std::optional<Tensor>> &optional_output_tensors) mutable -> std::vector<Tensor> {
-            return operation::run(
-                MorehClipGradNormStep2{.norm_type = norm_type},
-                input_tensors,
-                optional_input_tensors,
-                optional_output_tensors);
-        },
-        {tmp_pow_sum, total_norm},
-        dummy_output_tensors);
-}
-
-void MorehClipGradNormStep3::validate(
-    const std::vector<Tensor> &input_tensors,
-    const std::vector<std::optional<const Tensor>> &optional_input_tensors) const {
-    for (const auto &input : input_tensors) {
-        ttnn::operations::check_tensor(input, "moreh_clip_grad_norm_step3", "input");
-    }
-
-    const auto &clip_coef_clamped = optional_input_tensors.at(0).value();
-    ttnn::operations::check_tensor(clip_coef_clamped, "moreh_clip_grad_norm_step3", "clip_coef_clamped");
-}
-
-std::vector<ttnn::SimpleShape> MorehClipGradNormStep3::compute_output_shapes(const std::vector<Tensor> &) const { return {}; }
-
-std::vector<Tensor> MorehClipGradNormStep3::create_output_tensors(const std::vector<Tensor> &) const { return {}; }
-
-operation::ProgramWithCallbacks MorehClipGradNormStep3::create_program(
-    const std::vector<Tensor> &input_tensors,
-    const std::vector<std::optional<const Tensor>> &optional_input_tensors,
-    std::vector<Tensor> &) const {
-    const auto &clip_coef_clamped = optional_input_tensors.at(0).value();
-    return moreh_clip_grad_norm_step3_impl(input_tensors, clip_coef_clamped);
-}
-
-void moreh_clip_grad_norm_step3(const std::vector<Tensor> &inputs, const Tensor &clip_coef_clamped) {
-    auto device = inputs.at(0).device();
-    const auto max_num_inputs = get_num_device_cores(device);
-    const auto total_num_inputs = static_cast<uint32_t>(inputs.size());
-
-    const auto num_iter = (total_num_inputs + max_num_inputs - 1) / max_num_inputs;
-
-    uint32_t start_input_idx{0};
-    auto num_inputs = total_num_inputs;
-    for (uint32_t i = 0; i < num_iter; ++i) {
-        const auto num_inputs_at_this_iter = std::min(num_inputs, max_num_inputs);
-
-        auto input_tensors = std::vector<Tensor>(
-            inputs.begin() + start_input_idx, inputs.begin() + start_input_idx + num_inputs_at_this_iter);
-        std::vector<Tensor> dummy_output_tensors = {Tensor(operation::get_workers_for_op_output(input_tensors))};
-
-        operation::launch_op(
-            [](const std::vector<Tensor> &input_tensors,
-               const std::vector<std::optional<const Tensor>> &optional_input_tensors,
-               const std::vector<std::optional<Tensor>> &optional_output_tensors) mutable -> std::vector<Tensor> {
-                return operation::run(
-                    MorehClipGradNormStep3{}, input_tensors, optional_input_tensors, optional_output_tensors);
-            },
-            input_tensors,
-            dummy_output_tensors,
-            {clip_coef_clamped});
-
-        if (i < (num_iter - 1)) {
-            start_input_idx += num_inputs_at_this_iter;
-            num_inputs -= num_inputs_at_this_iter;
-        }
-    }
-}
-
-Tensor moreh_clip_grad_norm_impl(
-    const std::vector<Tensor> &inputs,
-    float max_norm,
-    float norm_type,
-    bool error_if_nonfinite,
-    const Tensor &tmp_pow_sum,
-    const Tensor &total_norm) {
-    // Sum[|e|^p]
-    moreh_clip_grad_norm_step1(inputs, norm_type, tmp_pow_sum);
-
-    // Sum[Sum[|e|^p]]^(1/p)
-    moreh_clip_grad_norm_step2(tmp_pow_sum, norm_type, total_norm);
-
-    if (error_if_nonfinite) {
-        const auto fp32_total_norm =
-            tensor_impl::cast_vec<float>(owned_buffer::get_as<bfloat16>(total_norm.cpu())).at(0);
-        TT_ASSERT(
-            std::isfinite(fp32_total_norm),
-                "The total norm of order {} for gradients from `parameters` is non-finite, so it cannot be "
-                "clipped. To disable this error and scale the gradients by the non-finite norm anyway, set "
-                "`error_if_nonfinite=False`",
-                norm_type);
-    }
-
-    // max_norm / (total_norm + 1e-6)
-    const auto &clip_coef = ttnn::multiply(ttnn::add(total_norm, 1e-6f), (1 / max_norm));
-    // min(clip_coef, 1.0f)
-    Tensor scalar = ttnn::operations::creation::create_scalar(1.0f,inputs.at(0).get_dtype(),Layout::TILE, inputs.at(0).device());
-    const auto &clip_coef_clamped = ttnn::minimum(clip_coef, scalar);
-    scalar.deallocate();
-
-    // Inplace update inputs(inputs *= clip_coef_clamped)
-    moreh_clip_grad_norm_step3(inputs, clip_coef_clamped);
-
-    return total_norm;
-}
-
-[[maybe_unused]] Tensor moreh_clip_grad_norm(
-    const std::vector<Tensor> &inputs,
-    float max_norm,
-    float norm_type,
-    bool error_if_nonfinite,
-    const std::optional<std::reference_wrapper<const Tensor>> total_norm,
-    const MemoryConfig &output_mem_config) {
-    using namespace tt::constants;
-    // Create tmp_pow_sum[1, 1, TILE_HEIGHT, TILE_WIDTH * total_num_inputs]
-    const auto total_num_inputs = static_cast<uint32_t>(inputs.size());
-    tt::tt_metal::LegacyShape tmp_pow_sum_shape{1, 1, TILE_HEIGHT, TILE_WIDTH * total_num_inputs};
-    const auto &tmp_pow_sum =
-        create_device_tensor(tmp_pow_sum_shape, inputs.at(0).get_dtype(), Layout::TILE, inputs.at(0).device());
-
-    if (total_norm.has_value() && (total_norm != std::nullopt)) {
-        return moreh_clip_grad_norm_impl(
-            inputs, max_norm, norm_type, error_if_nonfinite, tmp_pow_sum, total_norm->get());
-    }
-
-    // Create total_norm[1, 1, 1, 1]
-    Padding padding{{{0, 0}, {0, 0}, {0, TILE_HEIGHT - 1}, {0, TILE_WIDTH - 1}}, Padding::PadValue::Zero};
-    tt::tt_metal::LegacyShape total_norm_shape{{1, 1, TILE_HEIGHT, TILE_WIDTH}, padding};
-    const auto &created_total_norm = create_device_tensor(
-        total_norm_shape, inputs.at(0).get_dtype(), Layout::TILE, inputs.at(0).device(), output_mem_config);
-
-    return moreh_clip_grad_norm_impl(inputs, max_norm, norm_type, error_if_nonfinite, tmp_pow_sum, created_total_norm);
-}
-
-}  // namespace primary
-
-}  // namespace operations
-
-}  // namespace tt
diff --git a/ttnn/cpp/ttnn/deprecated/tt_dnn/op_library/moreh_clip_grad_norm/moreh_clip_grad_norm_op.hpp b/ttnn/cpp/ttnn/deprecated/tt_dnn/op_library/moreh_clip_grad_norm/moreh_clip_grad_norm_op.hpp
deleted file mode 100644
index 3e84fee79c3..00000000000
--- a/ttnn/cpp/ttnn/deprecated/tt_dnn/op_library/moreh_clip_grad_norm/moreh_clip_grad_norm_op.hpp
+++ /dev/null
@@ -1,100 +0,0 @@
-// SPDX-FileCopyrightText: © 2023 Tenstorrent Inc.
-//
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include <functional>
-#include <optional>
-#include <tuple>
-#include <utility>
-#include <vector>
-
-#include "ttnn/run_operation.hpp"
-#include "ttnn/tensor/tensor.hpp"
-#include "ttnn/tensor/tensor_impl.hpp"
-#include "ttnn/operations/eltwise/binary/binary.hpp"
-
-namespace tt {
-
-namespace operations {
-
-namespace primary {
-
-using namespace tt_metal;
-
-std::tuple<uint32_t, float, bool> get_p_decimal_p_is_negative(float ord);
-
-struct MorehClipGradNormStep1 {
-    float norm_type;
-    uint32_t tile_offset_of_tmp_pow_sum;
-
-    void validate(
-        const std::vector<Tensor> &input_tensors,
-        const std::vector<std::optional<const Tensor>> &optional_input_tensors) const;
-    std::vector<ttnn::SimpleShape> compute_output_shapes(const std::vector<Tensor> &) const;
-    std::vector<Tensor> create_output_tensors(const std::vector<Tensor> &) const;
-    operation::ProgramWithCallbacks create_program(
-        const std::vector<Tensor> &input_tensors,
-        const std::vector<std::optional<const Tensor>> &optional_input_tensors,
-        std::vector<Tensor> &) const;
-};
-
-operation::ProgramWithCallbacks moreh_clip_grad_norm_step1_impl(
-    const std::vector<Tensor> &inputs, float norm_type, uint32_t tile_offset_of_tmp_pow_sum, const Tensor &tmp_pow_sum);
-
-void moreh_clip_grad_norm_step1(const std::vector<Tensor> &inputs, float norm_type, const Tensor &tmp_pow_sum);
-
-struct MorehClipGradNormStep2 {
-    float norm_type;
-
-    void validate(const std::vector<Tensor> &input_tensors) const;
-    std::vector<ttnn::SimpleShape> compute_output_shapes(const std::vector<Tensor> &) const;
-    std::vector<Tensor> create_output_tensors(const std::vector<Tensor> &) const;
-    operation::ProgramWithCallbacks create_program(
-        const std::vector<Tensor> &input_tensors, std::vector<Tensor> &) const;
-};
-
-operation::ProgramWithCallbacks moreh_clip_grad_norm_step2_impl(
-    const Tensor &tmp_pow_sum, float norm_type, const Tensor &total_norm);
-
-void moreh_clip_grad_norm_step2(const Tensor &tmp_pow_sum, float norm_type, const Tensor &total_norm);
-
-struct MorehClipGradNormStep3 {
-    void validate(
-        const std::vector<Tensor> &input_tensors,
-        const std::vector<std::optional<const Tensor>> &optional_input_tensors) const;
-    std::vector<ttnn::SimpleShape> compute_output_shapes(const std::vector<Tensor> &) const;
-    std::vector<Tensor> create_output_tensors(const std::vector<Tensor> &) const;
-    operation::ProgramWithCallbacks create_program(
-        const std::vector<Tensor> &input_tensors,
-        const std::vector<std::optional<const Tensor>> &optional_input_tensors,
-        std::vector<Tensor> &) const;
-};
-
-operation::ProgramWithCallbacks moreh_clip_grad_norm_step3_impl(
-    const std::vector<Tensor> &inputs, const Tensor &clip_coef_clamped);
-
-void moreh_clip_grad_norm_step3(const std::vector<Tensor> &inputs, const Tensor &clip_coef_clamped);
-
-Tensor moreh_clip_grad_norm_impl(
-    const std::vector<Tensor> &inputs,
-    float max_norm,
-    float norm_type,
-    bool error_if_nonfinite,
-    const Tensor &tmp_pow_sum,
-    const Tensor &total_norm);
-
-[[maybe_unused]] Tensor moreh_clip_grad_norm(
-    const std::vector<Tensor> &inputs,
-    float max_norm,
-    float norm_type,
-    bool error_if_nonfinite,
-    const std::optional<std::reference_wrapper<const Tensor>> total_norm,
-    const MemoryConfig &output_mem_config = operation::DEFAULT_OUTPUT_MEMORY_CONFIG);
-
-}  // namespace primary
-
-}  // namespace operations
-
-}  // namespace tt
diff --git a/ttnn/cpp/ttnn/deprecated/tt_dnn/op_library/moreh_clip_grad_norm/moreh_clip_grad_norm_step1/moreh_clip_grad_norm_step1.cpp b/ttnn/cpp/ttnn/deprecated/tt_dnn/op_library/moreh_clip_grad_norm/moreh_clip_grad_norm_step1/moreh_clip_grad_norm_step1.cpp
deleted file mode 100644
index 3ae17e72f25..00000000000
--- a/ttnn/cpp/ttnn/deprecated/tt_dnn/op_library/moreh_clip_grad_norm/moreh_clip_grad_norm_step1/moreh_clip_grad_norm_step1.cpp
+++ /dev/null
@@ -1,223 +0,0 @@
-// SPDX-FileCopyrightText: © 2023 Tenstorrent Inc.
-//
-// SPDX-License-Identifier: Apache-2.0
-
-#include <functional>
-#include <map>
-#include <optional>
-#include <utility>
-#include <vector>
-
-#include "ttnn/run_operation.hpp"
-#include "ttnn/tensor/tensor.hpp"
-#include "ttnn/tensor/tensor_impl.hpp"
-#include "ttnn/deprecated/tt_dnn/op_library/moreh_clip_grad_norm/moreh_clip_grad_norm_op.hpp"
-#include "ttnn/operations/moreh/moreh_helper_functions.hpp"
-#include "tt_metal/common/work_split.hpp"
-#include "tt_metal/detail/util.hpp"
-#include "tt_metal/host_api.hpp"
-
-namespace tt {
-
-namespace operations {
-
-namespace primary {
-
-operation::ProgramWithCallbacks moreh_clip_grad_norm_step1_impl(
-    const std::vector<Tensor>& inputs,
-    float norm_type,
-    uint32_t tile_offset_of_tmp_pow_sum,
-    const Tensor& tmp_pow_sum) {
-    ////////////////////////////////////////////////////////////////////////////
-    //                      Device Setup
-    ////////////////////////////////////////////////////////////////////////////
-    auto device = tmp_pow_sum.device();
-    auto program = CreateProgram();
-
-    ////////////////////////////////////////////////////////////////////////////
-    //                         Parameters Setup
-    ////////////////////////////////////////////////////////////////////////////
-    const auto num_inputs = static_cast<uint32_t>(inputs.size());
-
-    std::vector<std::pair<uint32_t, uint32_t>> origin_hw_vec;
-    origin_hw_vec.reserve(num_inputs);
-
-    for (uint32_t j = 0; j < num_inputs; ++j) {
-        const auto& input_shape_without_padding = inputs.at(j).get_legacy_shape().without_padding();
-        origin_hw_vec.emplace_back(input_shape_without_padding[2], input_shape_without_padding[3]);
-    }
-
-    auto [p, decimal, p_is_negative] = get_p_decimal_p_is_negative(norm_type);
-
-    ////////////////////////////////////////////////////////////////////////////
-    //                         Core Setup
-    ////////////////////////////////////////////////////////////////////////////
-    auto grid = device->compute_with_storage_grid_size();
-    const auto num_cores_y = grid.y;
-    const auto
-        [num_cores_to_be_used,
-         all_cores,
-         core_group_1,
-         core_group_2,
-         num_inputs_per_core_group_1,
-         num_inputs_per_core_group_2] = tt_metal::split_work_to_cores(grid, num_inputs);
-    TT_ASSERT(core_group_2.ranges().empty());
-    TT_ASSERT(num_inputs_per_core_group_1 == 1);
-    TT_ASSERT(num_inputs_per_core_group_2 == 0);
-
-    ////////////////////////////////////////////////////////////////////////////
-    //                         CircularBuffer Setup
-    ////////////////////////////////////////////////////////////////////////////
-    const uint32_t in0_t = 1;  // input(==x)
-    const uint32_t in1_t = 1;  // one
-    const uint32_t in2_t = 1;  // decimal
-    const uint32_t in3_t = 2;  // mask_h_w
-
-    const uint32_t out0_t = 1;  // output(==y)
-
-    const uint32_t im0_t = 1;  // |x|
-    const uint32_t im1_t = 1;  // |x|^p
-    const uint32_t im2_t = 1;  // Add[|x|^p * exp(log(|x|) * decimal)]
-    const uint32_t im3_t = 1;  // log(|x|)
-    const uint32_t im4_t = 1;  // exp(log(|x|) * decimal)
-    const uint32_t im5_t = 1;  // |x|^p * exp(log(|x|) * decimal)
-
-    const auto cb_data_format = tt_metal::datatype_to_dataformat_converter(tmp_pow_sum.get_dtype());
-
-    ttnn::operations::CreateCircularBuffer(
-        program,
-        core_group_1,
-        cb_data_format,
-        {
-            {CB::c_in0, in0_t},        // input(==x)
-            {CB::c_in1, in1_t},        // one
-            {CB::c_in2, in2_t},        // decimal
-            {CB::c_in3, in3_t},        // mask_h_w
-            {CB::c_out0, out0_t},      // output(==y)
-            {CB::c_intermed0, im0_t},  // |x|
-            {CB::c_intermed1, im1_t},  // |x|^p
-            {CB::c_intermed2, im2_t},  // Add[|x|^p * exp(log(|x|) * decimal)]
-            {CB::c_intermed3, im3_t},  // log(|x|)
-            {CB::c_intermed4, im4_t},  // exp(log(|x|) * decimal)
-            {CB::c_intermed5, im5_t},  // |x|^p * exp(log(|x|) * decimal)
-        });
-
-    ////////////////////////////////////////////////////////////////////////////
-    //                      DataMovementKernel SetUp
-    ////////////////////////////////////////////////////////////////////////////
-    const auto reader_kernel_file =
-        "ttnn/cpp/ttnn/deprecated/tt_dnn/op_library/moreh_clip_grad_norm/moreh_clip_grad_norm_step1/kernels/"
-        "reader_moreh_clip_grad_norm_step1.cpp";
-    const auto writer_kernel_file =
-        "ttnn/cpp/ttnn/deprecated/tt_dnn/op_library/moreh_clip_grad_norm/moreh_clip_grad_norm_step1/kernels/"
-        "writer_moreh_clip_grad_norm_step1.cpp";
-
-    const auto reader_kernels_id = ttnn::operations::CreateReadKernel(program, reader_kernel_file, core_group_1);
-    const auto writer_kernels_id = ttnn::operations::CreateWriteKernel(program, writer_kernel_file, core_group_1);
-
-    ////////////////////////////////////////////////////////////////////////////
-    //                      ComputeKernel SetUp
-    ////////////////////////////////////////////////////////////////////////////
-    std::map<std::string, std::string> compute_defines{};
-    compute_defines["REDUCE_OP"] = "PoolType::SUM";
-    compute_defines["REDUCE_DIM"] = "ReduceDim::REDUCE_SCALAR";
-
-    const auto compute_kernel_file =
-        "ttnn/cpp/ttnn/deprecated/tt_dnn/op_library/moreh_clip_grad_norm/moreh_clip_grad_norm_step1/kernels/"
-        "moreh_clip_grad_norm_step1_kernel.cpp";
-
-    const auto compute_kernels_id =
-        ttnn::operations::CreateComputeKernel(program, compute_kernel_file, {core_group_1, num_inputs_per_core_group_1}, compute_defines);
-
-    ////////////////////////////////////////////////////////////////////////////
-    //                      RuntimeArgs SetUp
-    ////////////////////////////////////////////////////////////////////////////
-    const auto output_addr = tmp_pow_sum.buffer()->address();
-
-    uint32_t tile_offset = tile_offset_of_tmp_pow_sum;
-    for (uint32_t i = 0; i < num_cores_to_be_used; ++i) {
-        CoreCoord core = {i / num_cores_y, i % num_cores_y};
-
-        const auto& input = inputs.at(i);
-        const auto input_addr = input.buffer()->address();
-        const auto num_tiles = input.volume() / tt::constants::TILE_HW;
-        const auto [origin_h, origin_w] = origin_hw_vec.at(i);
-
-        // reader
-        const std::array reader_runtime_args{
-            input_addr,
-            static_cast<uint32_t>(ttnn::operations::is_dram(input)),
-            num_tiles,
-            *reinterpret_cast<uint32_t*>(&decimal),
-            origin_h,
-            origin_w};
-        SetRuntimeArgs(program, reader_kernels_id, core, reader_runtime_args);
-
-        // writer
-        const std::array writer_runtime_args{
-            output_addr, static_cast<uint32_t>(ttnn::operations::is_dram(tmp_pow_sum)), tile_offset};
-        SetRuntimeArgs(program, writer_kernels_id, core, writer_runtime_args);
-
-        // compute
-        const std::array compute_runtime_args{
-            num_tiles,
-            p,
-            static_cast<uint32_t>(p_is_negative),
-            origin_h,
-            origin_w,
-        };
-        SetRuntimeArgs(program, compute_kernels_id, core, compute_runtime_args);
-
-        tile_offset++;
-    }
-
-    ////////////////////////////////////////////////////////////////////////////
-    //                      Callback SetUp
-    ////////////////////////////////////////////////////////////////////////////
-    auto override_runtime_args_callback = [reader_kernels_id = reader_kernels_id,
-                                           writer_kernels_id = writer_kernels_id,
-                                           compute_kernels_id = compute_kernels_id,
-                                           num_cores_to_be_used = num_cores_to_be_used,
-                                           num_cores_y = num_cores_y](
-                                              const void* operation,
-                                              Program& program,
-                                              const std::vector<Tensor>& input_tensors,
-                                              const std::vector<std::optional<const Tensor>>& optional_input_tensors,
-                                              const std::vector<Tensor>&) {
-        const auto norm_type = static_cast<const MorehClipGradNormStep1*>(operation)->norm_type;
-
-        auto [p, decimal, p_is_negative] = get_p_decimal_p_is_negative(norm_type);
-
-        auto output_buffer = optional_input_tensors.at(0).value().buffer();
-        const auto output_address = output_buffer->address();
-
-        for (uint32_t i = 0; i < num_cores_to_be_used; ++i) {
-            CoreCoord core = {i / num_cores_y, i % num_cores_y};
-
-            {
-                auto &runtime_args = GetRuntimeArgs(program, reader_kernels_id, core);
-                runtime_args[0] = input_tensors.at(i).buffer()->address();
-                runtime_args[3] = *reinterpret_cast<uint32_t*>(&decimal);
-            }
-
-            {
-                auto &runtime_args = GetRuntimeArgs(program, writer_kernels_id, core);
-                runtime_args[0] = output_address;
-            }
-
-            {
-                auto &runtime_args = GetRuntimeArgs(program, compute_kernels_id, core);
-                runtime_args[1] = p;
-                runtime_args[2] = static_cast<uint32_t>(p_is_negative);
-            }
-        }
-    };
-
-    return {.program = std::move(program), .override_runtime_arguments_callback = override_runtime_args_callback};
-}
-
-}  // namespace primary
-
-}  // namespace operations
-
-}  // namespace tt
diff --git a/ttnn/cpp/ttnn/deprecated/tt_dnn/op_library/moreh_clip_grad_norm/moreh_clip_grad_norm_step2/moreh_clip_grad_norm_step2.cpp b/ttnn/cpp/ttnn/deprecated/tt_dnn/op_library/moreh_clip_grad_norm/moreh_clip_grad_norm_step2/moreh_clip_grad_norm_step2.cpp
deleted file mode 100644
index 5b7826cbb6e..00000000000
--- a/ttnn/cpp/ttnn/deprecated/tt_dnn/op_library/moreh_clip_grad_norm/moreh_clip_grad_norm_step2/moreh_clip_grad_norm_step2.cpp
+++ /dev/null
@@ -1,160 +0,0 @@
-// SPDX-FileCopyrightText: © 2023 Tenstorrent Inc.
-//
-// SPDX-License-Identifier: Apache-2.0
-
-#include <map>
-#include <optional>
-#include <utility>
-#include <vector>
-
-#include "ttnn/run_operation.hpp"
-#include "ttnn/tensor/tensor.hpp"
-#include "ttnn/tensor/tensor_impl.hpp"
-#include "ttnn/deprecated/tt_dnn/op_library/moreh_clip_grad_norm/moreh_clip_grad_norm_op.hpp"
-#include "ttnn/operations/moreh/moreh_helper_functions.hpp"
-#include "tt_metal/common/work_split.hpp"
-#include "tt_metal/detail/util.hpp"
-#include "tt_metal/host_api.hpp"
-
-namespace tt {
-
-namespace operations {
-
-namespace primary {
-
-operation::ProgramWithCallbacks moreh_clip_grad_norm_step2_impl(
-    const Tensor& tmp_pow_sum, float norm_type, const Tensor& total_norm) {
-    ////////////////////////////////////////////////////////////////////////////
-    //                      Device Setup
-    ////////////////////////////////////////////////////////////////////////////
-    auto device = tmp_pow_sum.device();
-    auto program = CreateProgram();
-
-    ////////////////////////////////////////////////////////////////////////////
-    //                         Parameters Setup
-    ////////////////////////////////////////////////////////////////////////////
-    const auto num_tiles = tmp_pow_sum.volume() / tt::constants::TILE_HW;
-
-    auto [p, decimal, p_is_negative] = get_p_decimal_p_is_negative(1.0f / norm_type);
-
-    ////////////////////////////////////////////////////////////////////////////
-    //                         Core Setup
-    ////////////////////////////////////////////////////////////////////////////
-    CoreCoord single_core = {0, 0};
-
-    ////////////////////////////////////////////////////////////////////////////
-    //                         CircularBuffer Setup
-    ////////////////////////////////////////////////////////////////////////////
-    const uint32_t in0_t = 1;  // input(==tmp_pow_sum)
-    const uint32_t in1_t = 1;  // decimal
-
-    // x^p * exp(log(x) * decimal)
-    const uint32_t out0_t = 1;  // output(==total_norm)
-
-    const uint32_t im0_t = 1;  // Sum[tmp_pow_sum](==x)
-    const uint32_t im1_t = 1;  // x^p
-    const uint32_t im2_t = 1;  // log(x)
-    const uint32_t im3_t = 1;  // exp(log(x) * decimal)
-
-    const auto cb_data_format = tt_metal::datatype_to_dataformat_converter(total_norm.get_dtype());
-
-    ttnn::operations::CreateCircularBuffer(
-        program,
-        single_core,
-        cb_data_format,
-        {
-            {CB::c_in0, in0_t},        // input(==tmp_pow_sum)
-            {CB::c_in1, in1_t},        // decimal
-            {CB::c_out0, out0_t},      // output(==total_norm)
-            {CB::c_intermed0, im0_t},  // Sum[tmp_pow_sum](==x)
-            {CB::c_intermed1, im1_t},  // x^p
-            {CB::c_intermed2, im2_t},  // log(x)
-            {CB::c_intermed3, im3_t},  // exp(log(x) * decimal)
-        });
-
-    ////////////////////////////////////////////////////////////////////////////
-    //                      DataMovementKernel SetUp
-    ////////////////////////////////////////////////////////////////////////////
-    const auto reader_kernel_file =
-        "ttnn/cpp/ttnn/deprecated/tt_dnn/op_library/moreh_clip_grad_norm/moreh_clip_grad_norm_step2/kernels/"
-        "reader_moreh_clip_grad_norm_step2.cpp";
-    const auto writer_kernel_file =
-        "ttnn/cpp/ttnn/deprecated/tt_dnn/op_library/moreh_clip_grad_norm/moreh_clip_grad_norm_step2/kernels/"
-        "writer_moreh_clip_grad_norm_step2.cpp";
-
-    const auto reader_kernels_id = ttnn::operations::CreateReadKernel(program, reader_kernel_file, single_core);
-    const auto writer_kernels_id = ttnn::operations::CreateWriteKernel(program, writer_kernel_file, single_core);
-
-    ////////////////////////////////////////////////////////////////////////////
-    //                      ComputeKernel SetUp
-    ////////////////////////////////////////////////////////////////////////////
-    const auto compute_kernel_file =
-        "ttnn/cpp/ttnn/deprecated/tt_dnn/op_library/moreh_clip_grad_norm/moreh_clip_grad_norm_step2/kernels/"
-        "moreh_clip_grad_norm_step2_kernel.cpp";
-
-    const auto compute_kernels_id = ttnn::operations::CreateComputeKernel(program, compute_kernel_file, {single_core, num_tiles});
-
-    ////////////////////////////////////////////////////////////////////////////
-    //                      RuntimeArgs SetUp
-    ////////////////////////////////////////////////////////////////////////////
-    const auto input_addr = tmp_pow_sum.buffer()->address();
-    const auto output_addr = total_norm.buffer()->address();
-
-    // reader
-    const std::array reader_runtime_args{
-        input_addr, static_cast<uint32_t>(ttnn::operations::is_dram(tmp_pow_sum)), num_tiles, *reinterpret_cast<uint32_t*>(&decimal)};
-    SetRuntimeArgs(program, reader_kernels_id, single_core, reader_runtime_args);
-
-    // writer
-    const std::array writer_runtime_args{output_addr, static_cast<uint32_t>(ttnn::operations::is_dram(total_norm))};
-    SetRuntimeArgs(program, writer_kernels_id, single_core, writer_runtime_args);
-
-    // compute
-    const std::array compute_runtime_args{num_tiles, p, static_cast<uint32_t>(p_is_negative)};
-    SetRuntimeArgs(program, compute_kernels_id, single_core, compute_runtime_args);
-
-    ////////////////////////////////////////////////////////////////////////////
-    //                      Callback SetUp
-    ////////////////////////////////////////////////////////////////////////////
-    auto override_runtime_args_callback = [reader_kernels_id = reader_kernels_id,
-                                           writer_kernels_id = writer_kernels_id,
-                                           compute_kernels_id = compute_kernels_id,
-                                           single_core = single_core](
-                                              const void* operation,
-                                              Program& program,
-                                              const std::vector<Tensor>& input_tensors,
-                                              const std::vector<std::optional<const Tensor>>&,
-                                              const std::vector<Tensor>&) {
-        const auto norm_type = static_cast<const MorehClipGradNormStep2*>(operation)->norm_type;
-
-        auto [p, decimal, p_is_negative] = get_p_decimal_p_is_negative(1.0f / norm_type);
-
-        const auto input_address = input_tensors.at(0).buffer()->address();
-        const auto output_address = input_tensors.at(1).buffer()->address();
-
-        {
-            auto &runtime_args = GetRuntimeArgs(program, reader_kernels_id, single_core);
-            runtime_args[0] = input_address;
-            runtime_args[3] = *reinterpret_cast<uint32_t*>(&decimal);
-        }
-
-        {
-            auto &runtime_args = GetRuntimeArgs(program, writer_kernels_id, single_core);
-            runtime_args[0] = output_address;
-        }
-
-        {
-            auto &runtime_args = GetRuntimeArgs(program, compute_kernels_id, single_core);
-            runtime_args[1] = p;
-            runtime_args[2] = static_cast<uint32_t>(p_is_negative);
-        }
-    };
-
-    return {.program = std::move(program), .override_runtime_arguments_callback = override_runtime_args_callback};
-}
-
-}  // namespace primary
-
-}  // namespace operations
-
-}  // namespace tt
diff --git a/ttnn/cpp/ttnn/deprecated/tt_dnn/op_library/moreh_clip_grad_norm/moreh_clip_grad_norm_step3/moreh_clip_grad_norm_step3.cpp b/ttnn/cpp/ttnn/deprecated/tt_dnn/op_library/moreh_clip_grad_norm/moreh_clip_grad_norm_step3/moreh_clip_grad_norm_step3.cpp
deleted file mode 100644
index 0d5696ea59a..00000000000
--- a/ttnn/cpp/ttnn/deprecated/tt_dnn/op_library/moreh_clip_grad_norm/moreh_clip_grad_norm_step3/moreh_clip_grad_norm_step3.cpp
+++ /dev/null
@@ -1,162 +0,0 @@
-// SPDX-FileCopyrightText: © 2023 Tenstorrent Inc.
-//
-// SPDX-License-Identifier: Apache-2.0
-
-#include <map>
-#include <optional>
-#include <utility>
-#include <vector>
-
-#include "ttnn/run_operation.hpp"
-#include "ttnn/tensor/tensor.hpp"
-#include "ttnn/tensor/tensor_impl.hpp"
-#include "ttnn/deprecated/tt_dnn/op_library/moreh_clip_grad_norm/moreh_clip_grad_norm_op.hpp"
-#include "ttnn/operations/moreh/moreh_helper_functions.hpp"
-#include "tt_metal/common/work_split.hpp"
-#include "tt_metal/detail/util.hpp"
-#include "tt_metal/host_api.hpp"
-
-namespace tt {
-
-namespace operations {
-
-namespace primary {
-
-operation::ProgramWithCallbacks moreh_clip_grad_norm_step3_impl(
-    const std::vector<Tensor>& inputs, const Tensor& clip_coef_clamped) {
-    ////////////////////////////////////////////////////////////////////////////
-    //                      Device Setup
-    ////////////////////////////////////////////////////////////////////////////
-    auto device = inputs.at(0).device();
-    auto program = CreateProgram();
-
-    ////////////////////////////////////////////////////////////////////////////
-    //                         Parameters Setup
-    ////////////////////////////////////////////////////////////////////////////
-    const auto num_inputs = static_cast<uint32_t>(inputs.size());
-
-    ////////////////////////////////////////////////////////////////////////////
-    //                         Core Setup
-    ////////////////////////////////////////////////////////////////////////////
-    auto grid = device->compute_with_storage_grid_size();
-    const auto num_cores_y = grid.y;
-
-    const auto
-        [num_cores_to_be_used,
-         all_cores,
-         core_group_1,
-         core_group_2,
-         num_inputs_per_core_group_1,
-         num_inputs_per_core_group_2] = tt_metal::split_work_to_cores(grid, num_inputs);
-    TT_ASSERT(core_group_2.ranges().empty());
-    TT_ASSERT(num_inputs_per_core_group_1 == 1);
-    TT_ASSERT(num_inputs_per_core_group_2 == 0);
-
-    ////////////////////////////////////////////////////////////////////////////
-    //                         CircularBuffer Setup
-    ////////////////////////////////////////////////////////////////////////////
-    const uint32_t in0_t = 1;  // input(inplace)
-    const uint32_t in1_t = 1;  // clip_coef_clamped
-
-    const uint32_t out0_t = 1;  // output(inplace)
-
-    const auto cb_data_format = tt_metal::datatype_to_dataformat_converter(inputs.at(0).get_dtype());
-
-    ttnn::operations::CreateCircularBuffer(
-        program,
-        core_group_1,
-        cb_data_format,
-        {
-            {CB::c_in0, in0_t},    // input(inplace)
-            {CB::c_in1, in1_t},    // clip_coef_clamped
-            {CB::c_out0, out0_t},  // output(inplace)
-        });
-
-    ////////////////////////////////////////////////////////////////////////////
-    //                      DataMovementKernel SetUp
-    ////////////////////////////////////////////////////////////////////////////
-    const auto reader_kernel_file =
-        "ttnn/cpp/ttnn/deprecated/tt_dnn/op_library/moreh_clip_grad_norm/moreh_clip_grad_norm_step3/kernels/"
-        "reader_moreh_clip_grad_norm_step3.cpp";
-    const auto writer_kernel_file =
-        "ttnn/cpp/ttnn/deprecated/tt_dnn/op_library/moreh_clip_grad_norm/moreh_clip_grad_norm_step3/kernels/"
-        "writer_moreh_clip_grad_norm_step3.cpp";
-
-    const auto reader_kernels_id = ttnn::operations::CreateReadKernel(program, reader_kernel_file, core_group_1);
-    const auto writer_kernels_id = ttnn::operations::CreateWriteKernel(program, writer_kernel_file, core_group_1);
-
-    ////////////////////////////////////////////////////////////////////////////
-    //                      ComputeKernel SetUp
-    ////////////////////////////////////////////////////////////////////////////
-    const auto compute_kernel_file =
-        "ttnn/cpp/ttnn/deprecated/tt_dnn/op_library/moreh_clip_grad_norm/moreh_clip_grad_norm_step3/kernels/"
-        "moreh_clip_grad_norm_step3_kernel.cpp";
-
-    const auto compute_kernels_id =
-        ttnn::operations::CreateComputeKernel(program, compute_kernel_file, {core_group_1, num_inputs_per_core_group_1});
-
-    ////////////////////////////////////////////////////////////////////////////
-    //                      RuntimeArgs SetUp
-    ////////////////////////////////////////////////////////////////////////////
-    const auto clip_coef_clamped_addr = clip_coef_clamped.buffer()->address();
-    for (uint32_t i = 0; i < num_cores_to_be_used; ++i) {
-        CoreCoord core = {i / num_cores_y, i % num_cores_y};
-
-        const auto& input = inputs.at(i);
-        const auto input_addr = input.buffer()->address();
-        const auto num_tiles = input.volume() / tt::constants::TILE_HW;
-
-        // reader
-        const std::array reader_runtime_args{
-            input_addr,
-            static_cast<uint32_t>(ttnn::operations::is_dram(input)),
-            clip_coef_clamped_addr,
-            static_cast<uint32_t>(ttnn::operations::is_dram(clip_coef_clamped)),
-            num_tiles};
-        SetRuntimeArgs(program, reader_kernels_id, core, reader_runtime_args);
-
-        // writer
-        const std::array writer_runtime_args{input_addr, static_cast<uint32_t>(ttnn::operations::is_dram(input)), num_tiles};
-        SetRuntimeArgs(program, writer_kernels_id, core, writer_runtime_args);
-
-        // compute
-        const std::array compute_runtime_args{num_tiles};
-        SetRuntimeArgs(program, compute_kernels_id, core, compute_runtime_args);
-    }
-
-    ////////////////////////////////////////////////////////////////////////////
-    //                      Callback SetUp
-    ////////////////////////////////////////////////////////////////////////////
-    auto override_addresses_callback =
-        [reader_kernels_id = reader_kernels_id,
-         writer_kernels_id = writer_kernels_id,
-         num_cores_to_be_used = num_cores_to_be_used,
-         num_cores_y = num_cores_y](
-            const Program& program, const std::vector<Buffer*>& input_buffers, const std::vector<Buffer*>&) {
-            auto clip_coef_clamped_buffer = input_buffers.at(input_buffers.size() - 1);
-            const auto clip_coef_clamped_address = clip_coef_clamped_buffer->address();
-
-            for (uint32_t i = 0; i < num_cores_to_be_used; ++i) {
-                CoreCoord core = {i / num_cores_y, i % num_cores_y};
-
-                {
-                    auto &runtime_args = GetRuntimeArgs(program, reader_kernels_id, core);
-                    runtime_args[0] = input_buffers.at(i)->address();
-                    runtime_args[2] = clip_coef_clamped_address;
-                }
-
-                {
-                    auto &runtime_args = GetRuntimeArgs(program, writer_kernels_id, core);
-                    runtime_args[0] = input_buffers.at(i)->address();
-                }
-            }
-        };
-
-    return {.program = std::move(program), .override_addresses_callback = override_addresses_callback};
-}
-
-}  // namespace primary
-
-}  // namespace operations
-
-}  // namespace tt
diff --git a/ttnn/cpp/ttnn/deprecated/tt_lib/csrc/operations/primary/module.hpp b/ttnn/cpp/ttnn/deprecated/tt_lib/csrc/operations/primary/module.hpp
index e4feea2fc70..7ba77e96656 100644
--- a/ttnn/cpp/ttnn/deprecated/tt_lib/csrc/operations/primary/module.hpp
+++ b/ttnn/cpp/ttnn/deprecated/tt_lib/csrc/operations/primary/module.hpp
@@ -7,30 +7,13 @@
 #include <pybind11/pybind11.h>
 #include <pybind11/stl.h>
 
-#include "ttnn/deprecated/tt_dnn/op_library/moreh_clip_grad_norm/moreh_clip_grad_norm_op.hpp"
-
 namespace py = pybind11;
 
 namespace tt {
 namespace operations {
 namespace primary {
 
-
-void py_module(py::module& m_primary) {
-    m_primary.def(
-        "moreh_clip_grad_norm_",
-        &moreh_clip_grad_norm,
-        py::arg("inputs").noconvert(),
-        py::arg("max_norm").noconvert(),
-        py::arg("norm_type").noconvert() = 2.0f,
-        py::arg("error_if_nonfinite").noconvert() = false,
-        py::kw_only(),
-        py::arg("total_norm").noconvert() = std::nullopt,
-        py::arg("output_mem_config").noconvert() = operation::DEFAULT_OUTPUT_MEMORY_CONFIG,
-        R"doc(
-        "Performs a moreh_clip_grad_norm operation.
-    )doc");
-}
+void py_module(py::module& m_primary) {}
 
 }  // namespace
    // primary
diff --git a/ttnn/cpp/ttnn/operations/moreh/moreh_clip_grad_norm/moreh_clip_grad_norm.cpp b/ttnn/cpp/ttnn/operations/moreh/moreh_clip_grad_norm/moreh_clip_grad_norm.cpp
new file mode 100644
index 00000000000..df7ddd0d58c
--- /dev/null
+++ b/ttnn/cpp/ttnn/operations/moreh/moreh_clip_grad_norm/moreh_clip_grad_norm.cpp
@@ -0,0 +1,122 @@
+// SPDX-FileCopyrightText: © 2024 Tenstorrent Inc.
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#include "moreh_clip_grad_norm.hpp"
+
+#include <optional>
+
+#include "common/base_types.hpp"
+#include "common/constants.hpp"
+#include "moreh_clip_grad_norm_step1/device/moreh_clip_grad_norm_step1_device_operation.hpp"
+#include "moreh_clip_grad_norm_step2/device/moreh_clip_grad_norm_step2_device_operation.hpp"
+#include "moreh_clip_grad_norm_step3/device/moreh_clip_grad_norm_step3_device_operation.hpp"
+#include "ttnn/cpp/ttnn/operations/eltwise/binary/binary.hpp"
+#include "ttnn/cpp/ttnn/operations/eltwise/binary/binary_composite.hpp"
+#include "ttnn/operations/creation.hpp"
+#include "ttnn/tensor/shape/shape.hpp"
+#include "ttnn/tensor/tensor.hpp"
+
+namespace ttnn::operations::moreh::moreh_clip_grad_norm {
+
+inline uint32_t get_num_device_cores(Device *device) {
+    const auto num_cores_x = static_cast<uint32_t>(device->compute_with_storage_grid_size().x);
+    const auto num_cores_y = static_cast<uint32_t>(device->compute_with_storage_grid_size().y);
+    return num_cores_x * num_cores_y;
+}
+
+Tensor MorehClipGradNorm::invoke(
+    const std::vector<Tensor> &inputs,
+    float max_norm,
+    float norm_type,
+    bool error_if_nonfinite,
+    const std::optional<const Tensor> &total_norm,
+    const std::optional<MemoryConfig> &memory_config,
+    const std::optional<DeviceComputeKernelConfig> &compute_kernel_config) {
+    auto device = inputs.at(0).device();
+    const auto compute_kernel_config_val =
+        init_device_compute_kernel_config(device->arch(), compute_kernel_config, MathFidelity::HiFi4);
+
+    // Loop variable
+    const auto max_num_inputs = get_num_device_cores(device);
+    const auto total_num_inputs = static_cast<uint32_t>(inputs.size());
+    const auto num_iter = (total_num_inputs + max_num_inputs - 1) / max_num_inputs;
+
+    auto tmp_pow_sum = create_device_tensor(
+        SimpleShape{tt::constants::TILE_HEIGHT, tt::constants::TILE_WIDTH * static_cast<uint32_t>(inputs.size())},
+        inputs.at(0).get_dtype(),
+        Layout::TILE,
+        device,
+        memory_config.value_or(inputs.at(0).memory_config()));
+
+    // Run Step 1
+    // Sum[|e|^p]
+    uint32_t tile_offset{0};
+    auto num_inputs = total_num_inputs;
+    for (uint32_t i = 0; i < num_iter; i++) {
+        const auto num_inputs_at_this_iter = std::min(num_inputs, max_num_inputs);
+
+        ttnn::prim::moreh_clip_grad_norm_step1(
+            std::vector<Tensor>(inputs.begin() + tile_offset, inputs.begin() + tile_offset + num_inputs_at_this_iter),
+            norm_type,
+            tile_offset,
+            tmp_pow_sum,
+            memory_config,
+            compute_kernel_config_val);
+
+        if (i < (num_iter - 1)) {
+            tile_offset += num_inputs_at_this_iter;
+            num_inputs -= num_inputs_at_this_iter;
+        }
+    }
+
+    // Run Step 2
+    // Sum[Sum[|e|^p]]^(1/p)
+    auto output_total_norm = ttnn::prim::moreh_clip_grad_norm_step2(
+        tmp_pow_sum,
+        norm_type,
+        total_norm,
+        memory_config,
+        init_device_compute_kernel_config(inputs.at(0).device()->arch(), compute_kernel_config, MathFidelity::HiFi4));
+
+    if (error_if_nonfinite) {
+        const auto fp32_total_norm =
+            tensor_impl::cast_vec<float>(owned_buffer::get_as<bfloat16>(output_total_norm.cpu())).at(0);
+        TT_FATAL(
+            std::isfinite(fp32_total_norm),
+            "The total norm of order {} for gradients from `parameters` is non-finite, so it cannot be "
+            "clipped. To disable this error and scale the gradients by the non-finite norm anyway, set "
+            "`error_if_nonfinite=False`",
+            norm_type);
+    }
+
+    // max_norm / (total_norm + 1e-6)
+    auto clip_coef = ttnn::multiply(ttnn::add(output_total_norm, 1e-6f), (1 / max_norm));
+    // min(clip_coef, 1.0f)
+    Tensor scalar = creation::create_scalar(1.0f, inputs.at(0).get_dtype(), Layout::TILE, device);
+    auto clip_coef_clamped = ttnn::minimum(clip_coef, scalar);
+    scalar.deallocate();
+
+    // Run Step 3
+    // Inplace update inputs(inputs *= clip_coef_clamped)
+    uint32_t start_input_idx{0};
+    num_inputs = total_num_inputs;
+    for (uint32_t i = 0; i < num_iter; ++i) {
+        const auto num_inputs_at_this_iter = std::min(num_inputs, max_num_inputs);
+
+        auto input_tensors = std::vector<Tensor>(
+            inputs.begin() + start_input_idx, inputs.begin() + start_input_idx + num_inputs_at_this_iter);
+
+        ttnn::prim::moreh_clip_grad_norm_step3(
+            input_tensors, clip_coef_clamped, memory_config, compute_kernel_config_val);
+
+        if (i < (num_iter - 1)) {
+            start_input_idx += num_inputs_at_this_iter;
+            num_inputs -= num_inputs_at_this_iter;
+        }
+    }
+
+    return output_total_norm;
+}
+
+}  // namespace ttnn::operations::moreh::moreh_clip_grad_norm
diff --git a/ttnn/cpp/ttnn/operations/moreh/moreh_clip_grad_norm/moreh_clip_grad_norm.hpp b/ttnn/cpp/ttnn/operations/moreh/moreh_clip_grad_norm/moreh_clip_grad_norm.hpp
new file mode 100644
index 00000000000..4a77ed52de9
--- /dev/null
+++ b/ttnn/cpp/ttnn/operations/moreh/moreh_clip_grad_norm/moreh_clip_grad_norm.hpp
@@ -0,0 +1,29 @@
+// SPDX-FileCopyrightText: © 2024 Tenstorrent Inc.
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#pragma once
+
+#include "ttnn/decorators.hpp"
+#include "ttnn/operations/core/compute_kernel/compute_kernel_config.hpp"
+
+namespace ttnn::operations::moreh::moreh_clip_grad_norm {
+
+struct MorehClipGradNorm {
+    static Tensor invoke(
+        const std::vector<Tensor> &inputs,
+        float max_norm,
+        float norm_type,
+        bool error_if_nonfinite,
+        const std::optional<const Tensor> &total_norm,
+        const std::optional<MemoryConfig> &memory_config,
+        const std::optional<DeviceComputeKernelConfig> &compute_kernel_config);
+};
+
+}  // namespace ttnn::operations::moreh::moreh_clip_grad_norm
+
+namespace ttnn {
+constexpr auto moreh_clip_grad_norm = ttnn::register_operation_with_auto_launch_op<
+    "ttnn::moreh_clip_grad_norm",
+    ttnn::operations::moreh::moreh_clip_grad_norm::MorehClipGradNorm>();
+}  // namespace ttnn
diff --git a/ttnn/cpp/ttnn/operations/moreh/moreh_clip_grad_norm/moreh_clip_grad_norm_pybind.cpp b/ttnn/cpp/ttnn/operations/moreh/moreh_clip_grad_norm/moreh_clip_grad_norm_pybind.cpp
new file mode 100644
index 00000000000..7fbddbe6800
--- /dev/null
+++ b/ttnn/cpp/ttnn/operations/moreh/moreh_clip_grad_norm/moreh_clip_grad_norm_pybind.cpp
@@ -0,0 +1,28 @@
+// SPDX-FileCopyrightText: © 2024 Tenstorrent Inc.
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#include "moreh_clip_grad_norm_pybind.hpp"
+
+#include "moreh_clip_grad_norm.hpp"
+#include "ttnn/cpp/pybind11/decorators.hpp"
+
+namespace ttnn::operations::moreh::moreh_clip_grad_norm {
+
+void bind_moreh_clip_grad_norm_operation(py::module &module) {
+    bind_registered_operation(
+        module,
+        ttnn::moreh_clip_grad_norm,
+        "moreh_clip_grad_norm",
+        ttnn::pybind_arguments_t{
+            py::arg("inputs"),
+            py::arg("max_norm"),
+            py::arg("norm_type") = 2.0f,
+            py::arg("error_if_nonfinite") = false,
+            py::kw_only(),
+            py::arg("total_norm") = std::nullopt,
+            py::arg("memory_config") = std::nullopt,
+            py::arg("compute_kernel_config") = std::nullopt});
+}
+
+}  // namespace ttnn::operations::moreh::moreh_clip_grad_norm
diff --git a/ttnn/cpp/ttnn/operations/moreh/moreh_clip_grad_norm/moreh_clip_grad_norm_pybind.hpp b/ttnn/cpp/ttnn/operations/moreh/moreh_clip_grad_norm/moreh_clip_grad_norm_pybind.hpp
new file mode 100644
index 00000000000..c3a449415a0
--- /dev/null
+++ b/ttnn/cpp/ttnn/operations/moreh/moreh_clip_grad_norm/moreh_clip_grad_norm_pybind.hpp
@@ -0,0 +1,13 @@
+// SPDX-FileCopyrightText: © 2024 Tenstorrent Inc.
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#pragma once
+
+#include <pybind11/pybind11.h>
+
+namespace py = pybind11;
+
+namespace ttnn::operations::moreh::moreh_clip_grad_norm {
+void bind_moreh_clip_grad_norm_operation(py::module &module);
+}  // namespace ttnn::operations::moreh::moreh_clip_grad_norm
diff --git a/ttnn/cpp/ttnn/deprecated/tt_dnn/op_library/moreh_clip_grad_norm/moreh_clip_grad_norm_step1/kernels/moreh_clip_grad_norm_step1_kernel.cpp b/ttnn/cpp/ttnn/operations/moreh/moreh_clip_grad_norm/moreh_clip_grad_norm_step1/device/kernels/moreh_clip_grad_norm_step1_kernel.cpp
similarity index 100%
rename from ttnn/cpp/ttnn/deprecated/tt_dnn/op_library/moreh_clip_grad_norm/moreh_clip_grad_norm_step1/kernels/moreh_clip_grad_norm_step1_kernel.cpp
rename to ttnn/cpp/ttnn/operations/moreh/moreh_clip_grad_norm/moreh_clip_grad_norm_step1/device/kernels/moreh_clip_grad_norm_step1_kernel.cpp
diff --git a/ttnn/cpp/ttnn/deprecated/tt_dnn/op_library/moreh_clip_grad_norm/moreh_clip_grad_norm_step1/kernels/reader_moreh_clip_grad_norm_step1.cpp b/ttnn/cpp/ttnn/operations/moreh/moreh_clip_grad_norm/moreh_clip_grad_norm_step1/device/kernels/reader_moreh_clip_grad_norm_step1.cpp
similarity index 100%
rename from ttnn/cpp/ttnn/deprecated/tt_dnn/op_library/moreh_clip_grad_norm/moreh_clip_grad_norm_step1/kernels/reader_moreh_clip_grad_norm_step1.cpp
rename to ttnn/cpp/ttnn/operations/moreh/moreh_clip_grad_norm/moreh_clip_grad_norm_step1/device/kernels/reader_moreh_clip_grad_norm_step1.cpp
diff --git a/ttnn/cpp/ttnn/deprecated/tt_dnn/op_library/moreh_clip_grad_norm/moreh_clip_grad_norm_step1/kernels/writer_moreh_clip_grad_norm_step1.cpp b/ttnn/cpp/ttnn/operations/moreh/moreh_clip_grad_norm/moreh_clip_grad_norm_step1/device/kernels/writer_moreh_clip_grad_norm_step1.cpp
similarity index 100%
rename from ttnn/cpp/ttnn/deprecated/tt_dnn/op_library/moreh_clip_grad_norm/moreh_clip_grad_norm_step1/kernels/writer_moreh_clip_grad_norm_step1.cpp
rename to ttnn/cpp/ttnn/operations/moreh/moreh_clip_grad_norm/moreh_clip_grad_norm_step1/device/kernels/writer_moreh_clip_grad_norm_step1.cpp
diff --git a/ttnn/cpp/ttnn/operations/moreh/moreh_clip_grad_norm/moreh_clip_grad_norm_step1/device/moreh_clip_grad_norm_step1_device_operation.cpp b/ttnn/cpp/ttnn/operations/moreh/moreh_clip_grad_norm/moreh_clip_grad_norm_step1/device/moreh_clip_grad_norm_step1_device_operation.cpp
new file mode 100644
index 00000000000..65127d37be5
--- /dev/null
+++ b/ttnn/cpp/ttnn/operations/moreh/moreh_clip_grad_norm/moreh_clip_grad_norm_step1/device/moreh_clip_grad_norm_step1_device_operation.cpp
@@ -0,0 +1,65 @@
+// SPDX-FileCopyrightText: © 2023 Tenstorrent Inc.
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#include "moreh_clip_grad_norm_step1_device_operation.hpp"
+
+#include "common/constants.hpp"
+#include "ttnn/operations/core/compute_kernel/compute_kernel_config.hpp"
+#include "ttnn/operations/moreh/moreh_helper_functions.hpp"
+#include "ttnn/tensor/tensor.hpp"
+
+namespace ttnn::operations::moreh::moreh_clip_grad_norm_step1 {
+
+void MorehClipGradNormStep1Operation::validate_inputs(
+    const operation_attributes_t& operation_attributes, const tensor_args_t& tensor_args) {
+    auto input_tensors = tensor_args.inputs;
+    for (const auto& input : input_tensors) {
+        ttnn::operations::check_tensor(input, "moreh_clip_grad_norm_step1", "input");
+    }
+
+    ttnn::operations::check_tensor(tensor_args.tmp_pow_sum, "moreh_clip_grad_norm_step1", "tmp_pow_sum");
+};
+
+MorehClipGradNormStep1Operation::program_factory_t MorehClipGradNormStep1Operation::select_program_factory(
+    const operation_attributes_t& operation_attributes, const tensor_args_t& tensor_args) {
+    return ProgramFactory{};
+};
+
+void MorehClipGradNormStep1Operation::validate_on_program_cache_miss(
+    const operation_attributes_t& operation_attributes, const tensor_args_t& tensor_args) {
+    validate_inputs(operation_attributes, tensor_args);
+};
+
+void MorehClipGradNormStep1Operation::validate_on_program_cache_hit(
+    const operation_attributes_t& operation_attributes, const tensor_args_t& tensor_args) {
+    validate_inputs(operation_attributes, tensor_args);
+};
+
+MorehClipGradNormStep1Operation::shape_return_value_t MorehClipGradNormStep1Operation::compute_output_shapes(
+    const operation_attributes_t& operation_attributes, const tensor_args_t& tensor_args) {
+    return {};
+};
+
+MorehClipGradNormStep1Operation::tensor_return_value_t MorehClipGradNormStep1Operation::create_output_tensors(
+    const operation_attributes_t& operation_attributes, const tensor_args_t& tensor_args) {
+    return tensor_args.tmp_pow_sum;
+};
+
+std::tuple<MorehClipGradNormStep1Operation::operation_attributes_t, MorehClipGradNormStep1Operation::tensor_args_t>
+MorehClipGradNormStep1Operation::invoke(
+    const std::vector<Tensor>& inputs,
+    const float norm_type,
+    const uint32_t tile_offset_of_tmp_pow_sum,
+    const Tensor& tmp_pow_sum,
+    const std::optional<MemoryConfig>& memory_config,
+    const DeviceComputeKernelConfig& compute_kernel_config) {
+    return {
+        operation_attributes_t{
+            norm_type,
+            tile_offset_of_tmp_pow_sum,
+            memory_config.value_or(inputs.at(0).memory_config()),
+            compute_kernel_config},
+        tensor_args_t{inputs, tmp_pow_sum}};
+};
+}  // namespace ttnn::operations::moreh::moreh_clip_grad_norm_step1
diff --git a/ttnn/cpp/ttnn/operations/moreh/moreh_clip_grad_norm/moreh_clip_grad_norm_step1/device/moreh_clip_grad_norm_step1_device_operation.hpp b/ttnn/cpp/ttnn/operations/moreh/moreh_clip_grad_norm/moreh_clip_grad_norm_step1/device/moreh_clip_grad_norm_step1_device_operation.hpp
new file mode 100644
index 00000000000..518a4b03c96
--- /dev/null
+++ b/ttnn/cpp/ttnn/operations/moreh/moreh_clip_grad_norm/moreh_clip_grad_norm_step1/device/moreh_clip_grad_norm_step1_device_operation.hpp
@@ -0,0 +1,78 @@
+// SPDX-FileCopyrightText: © 2024 Tenstorrent Inc.
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#pragma once
+
+#include <vector>
+
+#include "ttnn/decorators.hpp"
+#include "ttnn/device_operation.hpp"
+#include "ttnn/operations/core/compute_kernel/compute_kernel_config.hpp"
+#include "ttnn/tensor/tensor.hpp"
+
+namespace ttnn::operations::moreh::moreh_clip_grad_norm_step1 {
+
+struct MorehClipGradNormStep1Operation {
+    struct operation_attributes_t {
+        const float norm_type;
+        const uint32_t tile_offset_of_tmp_pow_sum;
+        const MemoryConfig memory_config;
+        const DeviceComputeKernelConfig compute_kernel_config;
+    };
+
+    struct tensor_args_t {
+        const std::vector<Tensor>& inputs;
+        const Tensor& tmp_pow_sum;
+    };
+
+    using shape_return_value_t = SimpleShape;
+    using tensor_return_value_t = Tensor;
+
+    struct ProgramFactory {
+        struct shared_variables_t {
+            KernelHandle reader_kernel_id;
+            KernelHandle writer_kernel_id;
+            KernelHandle compute_kernel_id;
+            uint32_t num_cores_to_be_used;
+            size_t num_cores_y;
+        };
+
+        using cached_program_t = ttnn::device_operation::CachedProgram<shared_variables_t>;
+
+        static cached_program_t create(
+            const operation_attributes_t& operation_attributes,
+            const tensor_args_t& tensor_args,
+            tensor_return_value_t& tmp_pow_sum);
+
+        static void override_runtime_arguments(
+            cached_program_t& cached_program,
+            const operation_attributes_t& operation_attributes,
+            const tensor_args_t& tensor_args,
+            tensor_return_value_t& tmp_pow_sum);
+    };
+
+    using program_factory_t = std::variant<ProgramFactory>;
+
+    static void validate_inputs(const operation_attributes_t&, const tensor_args_t&);
+    static program_factory_t select_program_factory(const operation_attributes_t&, const tensor_args_t&);
+    static void validate_on_program_cache_miss(const operation_attributes_t&, const tensor_args_t&);
+    static void validate_on_program_cache_hit(const operation_attributes_t&, const tensor_args_t&);
+    static shape_return_value_t compute_output_shapes(const operation_attributes_t&, const tensor_args_t&);
+    static tensor_return_value_t create_output_tensors(const operation_attributes_t&, const tensor_args_t&);
+    static std::tuple<operation_attributes_t, tensor_args_t> invoke(
+        const std::vector<Tensor>& inputs,
+        const float norm_type,
+        const uint32_t tile_offset_of_tmp_pow_sum,
+        const Tensor& tmp_pow_sum,
+        const std::optional<MemoryConfig>& memory_config,
+        const DeviceComputeKernelConfig& compute_kernel_config);
+};
+
+}  // namespace ttnn::operations::moreh::moreh_clip_grad_norm_step1
+
+namespace ttnn::prim {
+constexpr auto moreh_clip_grad_norm_step1 = ttnn::register_operation<
+    "ttnn::prim::moreh_clip_grad_norm_step1",
+    ttnn::operations::moreh::moreh_clip_grad_norm_step1::MorehClipGradNormStep1Operation>();
+}  // namespace ttnn::prim
diff --git a/ttnn/cpp/ttnn/operations/moreh/moreh_clip_grad_norm/moreh_clip_grad_norm_step1/device/moreh_clip_grad_norm_step1_program_factory.cpp b/ttnn/cpp/ttnn/operations/moreh/moreh_clip_grad_norm/moreh_clip_grad_norm_step1/device/moreh_clip_grad_norm_step1_program_factory.cpp
new file mode 100644
index 00000000000..d51e19e546f
--- /dev/null
+++ b/ttnn/cpp/ttnn/operations/moreh/moreh_clip_grad_norm/moreh_clip_grad_norm_step1/device/moreh_clip_grad_norm_step1_program_factory.cpp
@@ -0,0 +1,223 @@
+// SPDX-FileCopyrightText: © 2024 Tenstorrent Inc.
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#include <vector>
+
+#include "moreh_clip_grad_norm_step1_device_operation.hpp"
+#include "tt_metal/common/assert.hpp"
+#include "tt_metal/common/work_split.hpp"
+#include "ttnn/operations/core/compute_kernel/compute_kernel_config.hpp"
+#include "ttnn/operations/moreh/moreh_helper_functions.hpp"
+
+namespace ttnn::operations::moreh::moreh_clip_grad_norm_step1 {
+
+std::tuple<uint32_t, float, bool> get_p_decimal_p_is_negative(float ord) {
+    auto p = std::floor(ord);
+    auto decimal = ord - p;
+    const bool p_is_negative = p < 0.0f;
+    if (p_is_negative) {
+        p = -p;
+    }
+    return std::make_tuple(static_cast<uint32_t>(p), decimal, p_is_negative);
+}
+
+MorehClipGradNormStep1Operation::ProgramFactory::cached_program_t
+MorehClipGradNormStep1Operation::ProgramFactory::create(
+    const operation_attributes_t& operation_attributes,
+    const tensor_args_t& tensor_args,
+    tensor_return_value_t& tmp_pow_sum) {
+    auto& inputs = tensor_args.inputs;
+    auto norm_type = operation_attributes.norm_type;
+    auto tile_offset_of_tmp_pow_sum = operation_attributes.tile_offset_of_tmp_pow_sum;
+
+    ////////////////////////////////////////////////////////////////////////////
+    //                      Device Setup
+    ////////////////////////////////////////////////////////////////////////////
+    auto device = tmp_pow_sum.device();
+    auto program = CreateProgram();
+
+    ////////////////////////////////////////////////////////////////////////////
+    //                         Parameters Setup
+    ////////////////////////////////////////////////////////////////////////////
+    const auto num_inputs = static_cast<uint32_t>(inputs.size());
+
+    std::vector<std::pair<uint32_t, uint32_t>> origin_hw_vec;
+    origin_hw_vec.reserve(num_inputs);
+
+    for (uint32_t j = 0; j < num_inputs; ++j) {
+        const auto& input_shape_without_padding = inputs.at(j).get_logical_shape();
+        origin_hw_vec.emplace_back(input_shape_without_padding[2], input_shape_without_padding[3]);
+    }
+
+    auto [p, decimal, p_is_negative] = get_p_decimal_p_is_negative(norm_type);
+
+    ////////////////////////////////////////////////////////////////////////////
+    //                         Core Setup
+    ////////////////////////////////////////////////////////////////////////////
+    auto grid = device->compute_with_storage_grid_size();
+    const auto num_cores_x = grid.x;
+    const auto num_cores_y = grid.y;
+    const auto
+        [num_cores_to_be_used,
+         all_cores,
+         core_group_1,
+         core_group_2,
+         num_inputs_per_core_group_1,
+         num_inputs_per_core_group_2] = tt::tt_metal::split_work_to_cores(grid, num_inputs);
+    TT_FATAL(core_group_2.ranges().empty(), "core_group_2 must be empty");
+    TT_FATAL(num_inputs_per_core_group_1 == 1, "num_inputs_per_core_group_1 must be 1");
+    TT_FATAL(num_inputs_per_core_group_2 == 0, "num_inputs_per_core_group_2 must be 0");
+
+    ////////////////////////////////////////////////////////////////////////////
+    //                         CircularBuffer Setup
+    ////////////////////////////////////////////////////////////////////////////
+    const uint32_t in0_t = 1;  // input(==x)
+    const uint32_t in1_t = 1;  // one
+    const uint32_t in2_t = 1;  // decimal
+    const uint32_t in3_t = 2;  // mask_h_w
+
+    const uint32_t out0_t = 1;  // output(==y)
+
+    const uint32_t im0_t = 1;  // |x|
+    const uint32_t im1_t = 1;  // |x|^p
+    const uint32_t im2_t = 1;  // Add[|x|^p * exp(log(|x|) * decimal)]
+    const uint32_t im3_t = 1;  // log(|x|)
+    const uint32_t im4_t = 1;  // exp(log(|x|) * decimal)
+    const uint32_t im5_t = 1;  // |x|^p * exp(log(|x|) * decimal)
+
+    const auto cb_data_format = tt::tt_metal::datatype_to_dataformat_converter(tmp_pow_sum.get_dtype());
+
+    CreateCircularBuffer(
+        program,
+        core_group_1,
+        cb_data_format,
+        {
+            {tt::CB::c_in0, in0_t},        // input(==x)
+            {tt::CB::c_in1, in1_t},        // one
+            {tt::CB::c_in2, in2_t},        // decimal
+            {tt::CB::c_in3, in3_t},        // mask_h_w
+            {tt::CB::c_out0, out0_t},      // output(==y)
+            {tt::CB::c_intermed0, im0_t},  // |x|
+            {tt::CB::c_intermed1, im1_t},  // |x|^p
+            {tt::CB::c_intermed2, im2_t},  // Add[|x|^p * exp(log(|x|) * decimal)]
+            {tt::CB::c_intermed3, im3_t},  // log(|x|)
+            {tt::CB::c_intermed4, im4_t},  // exp(log(|x|) * decimal)
+            {tt::CB::c_intermed5, im5_t},  // |x|^p * exp(log(|x|) * decimal)
+        });
+
+    ////////////////////////////////////////////////////////////////////////////
+    //                      DataMovementKernel SetUp
+    ////////////////////////////////////////////////////////////////////////////
+    const auto reader_kernel_file =
+        "ttnn/cpp/ttnn/operations/moreh/moreh_clip_grad_norm/moreh_clip_grad_norm_step1/device/kernels/"
+        "reader_moreh_clip_grad_norm_step1.cpp";
+    const auto writer_kernel_file =
+        "ttnn/cpp/ttnn/operations/moreh/moreh_clip_grad_norm/moreh_clip_grad_norm_step1/device/kernels/"
+        "writer_moreh_clip_grad_norm_step1.cpp";
+
+    const auto reader_kernel_id = CreateReadKernel(program, reader_kernel_file, core_group_1);
+    const auto writer_kernel_id = CreateWriteKernel(program, writer_kernel_file, core_group_1);
+
+    ////////////////////////////////////////////////////////////////////////////
+    //                      ComputeKernel SetUp
+    ////////////////////////////////////////////////////////////////////////////
+    std::map<std::string, std::string> compute_defines{};
+    compute_defines["REDUCE_OP"] = "PoolType::SUM";
+    compute_defines["REDUCE_DIM"] = "ReduceDim::REDUCE_SCALAR";
+
+    const auto compute_kernel_file =
+        "ttnn/cpp/ttnn/operations/moreh/moreh_clip_grad_norm/moreh_clip_grad_norm_step1/device/kernels/"
+        "moreh_clip_grad_norm_step1_kernel.cpp";
+
+    const auto compute_kernel_id =
+        CreateComputeKernel(program, compute_kernel_file, {core_group_1, num_inputs_per_core_group_1}, compute_defines);
+
+    ////////////////////////////////////////////////////////////////////////////
+    //                      RuntimeArgs SetUp
+    ////////////////////////////////////////////////////////////////////////////
+
+    const auto output_addr = tmp_pow_sum.buffer()->address();
+    auto cores = grid_to_cores(num_cores_to_be_used, num_cores_x, num_cores_y, false);
+
+    uint32_t tile_offset = tile_offset_of_tmp_pow_sum;
+    for (uint32_t i = 0; i < cores.size(); ++i) {
+        const CoreCoord& core = cores.at(i);
+
+        const auto& input = inputs.at(i);
+        const auto input_addr = input.buffer()->address();
+        const auto num_tiles = input.volume() / tt::constants::TILE_HW;
+        const auto [origin_h, origin_w] = origin_hw_vec.at(i);
+
+        // reader
+        const std::array reader_runtime_args{
+            input_addr,
+            static_cast<uint32_t>(input.buffer()->is_dram()),
+            num_tiles,
+            *reinterpret_cast<uint32_t*>(&decimal),
+            origin_h,
+            origin_w};
+        SetRuntimeArgs(program, reader_kernel_id, core, reader_runtime_args);
+
+        // writer
+        const std::array writer_runtime_args{
+            output_addr, static_cast<uint32_t>(tmp_pow_sum.buffer()->is_dram()), tile_offset};
+        SetRuntimeArgs(program, writer_kernel_id, core, writer_runtime_args);
+
+        // compute
+        const std::array compute_runtime_args{
+            num_tiles,
+            p,
+            static_cast<uint32_t>(p_is_negative),
+            origin_h,
+            origin_w,
+        };
+        SetRuntimeArgs(program, compute_kernel_id, core, compute_runtime_args);
+
+        tile_offset++;
+    }
+
+    return {
+        std::move(program), {reader_kernel_id, writer_kernel_id, compute_kernel_id, num_cores_to_be_used, num_cores_y}};
+}
+
+void MorehClipGradNormStep1Operation::ProgramFactory::override_runtime_arguments(
+    cached_program_t& cached_program,
+    const operation_attributes_t& operation_attributes,
+    const tensor_args_t& tensor_args,
+    tensor_return_value_t& tmp_pow_sum) {
+    auto& program = cached_program.program;
+    auto& reader_kernel_id = cached_program.shared_variables.reader_kernel_id;
+    auto& writer_kernel_id = cached_program.shared_variables.writer_kernel_id;
+    auto& compute_kernel_id = cached_program.shared_variables.compute_kernel_id;
+    auto num_cores_to_be_used = cached_program.shared_variables.num_cores_to_be_used;
+    auto num_cores_y = cached_program.shared_variables.num_cores_y;
+
+    const auto norm_type = operation_attributes.norm_type;
+    auto [p, decimal, p_is_negative] = get_p_decimal_p_is_negative(norm_type);
+    auto output_buffer = tmp_pow_sum.buffer();
+    const auto output_address = output_buffer->address();
+
+    for (uint32_t i = 0; i < num_cores_to_be_used; ++i) {
+        CoreCoord core = {i / num_cores_y, i % num_cores_y};
+
+        {
+            auto& runtime_args = GetRuntimeArgs(program, reader_kernel_id, core);
+            runtime_args[0] = tensor_args.inputs.at(i).buffer()->address();
+            runtime_args[3] = *reinterpret_cast<uint32_t*>(&decimal);
+        }
+
+        {
+            auto& runtime_args = GetRuntimeArgs(program, writer_kernel_id, core);
+            runtime_args[0] = output_address;
+        }
+
+        {
+            auto& runtime_args = GetRuntimeArgs(program, compute_kernel_id, core);
+            runtime_args[1] = p;
+            runtime_args[2] = static_cast<uint32_t>(p_is_negative);
+        }
+    }
+}
+
+}  // namespace ttnn::operations::moreh::moreh_clip_grad_norm_step1
diff --git a/ttnn/cpp/ttnn/deprecated/tt_dnn/op_library/moreh_clip_grad_norm/moreh_clip_grad_norm_step2/kernels/moreh_clip_grad_norm_step2_kernel.cpp b/ttnn/cpp/ttnn/operations/moreh/moreh_clip_grad_norm/moreh_clip_grad_norm_step2/device/kernels/moreh_clip_grad_norm_step2_kernel.cpp
similarity index 100%
rename from ttnn/cpp/ttnn/deprecated/tt_dnn/op_library/moreh_clip_grad_norm/moreh_clip_grad_norm_step2/kernels/moreh_clip_grad_norm_step2_kernel.cpp
rename to ttnn/cpp/ttnn/operations/moreh/moreh_clip_grad_norm/moreh_clip_grad_norm_step2/device/kernels/moreh_clip_grad_norm_step2_kernel.cpp
diff --git a/ttnn/cpp/ttnn/deprecated/tt_dnn/op_library/moreh_clip_grad_norm/moreh_clip_grad_norm_step2/kernels/reader_moreh_clip_grad_norm_step2.cpp b/ttnn/cpp/ttnn/operations/moreh/moreh_clip_grad_norm/moreh_clip_grad_norm_step2/device/kernels/reader_moreh_clip_grad_norm_step2.cpp
similarity index 100%
rename from ttnn/cpp/ttnn/deprecated/tt_dnn/op_library/moreh_clip_grad_norm/moreh_clip_grad_norm_step2/kernels/reader_moreh_clip_grad_norm_step2.cpp
rename to ttnn/cpp/ttnn/operations/moreh/moreh_clip_grad_norm/moreh_clip_grad_norm_step2/device/kernels/reader_moreh_clip_grad_norm_step2.cpp
diff --git a/ttnn/cpp/ttnn/deprecated/tt_dnn/op_library/moreh_clip_grad_norm/moreh_clip_grad_norm_step2/kernels/writer_moreh_clip_grad_norm_step2.cpp b/ttnn/cpp/ttnn/operations/moreh/moreh_clip_grad_norm/moreh_clip_grad_norm_step2/device/kernels/writer_moreh_clip_grad_norm_step2.cpp
similarity index 100%
rename from ttnn/cpp/ttnn/deprecated/tt_dnn/op_library/moreh_clip_grad_norm/moreh_clip_grad_norm_step2/kernels/writer_moreh_clip_grad_norm_step2.cpp
rename to ttnn/cpp/ttnn/operations/moreh/moreh_clip_grad_norm/moreh_clip_grad_norm_step2/device/kernels/writer_moreh_clip_grad_norm_step2.cpp
diff --git a/ttnn/cpp/ttnn/operations/moreh/moreh_clip_grad_norm/moreh_clip_grad_norm_step2/device/moreh_clip_grad_norm_step2_device_operation.cpp b/ttnn/cpp/ttnn/operations/moreh/moreh_clip_grad_norm/moreh_clip_grad_norm_step2/device/moreh_clip_grad_norm_step2_device_operation.cpp
new file mode 100644
index 00000000000..d042ac172d0
--- /dev/null
+++ b/ttnn/cpp/ttnn/operations/moreh/moreh_clip_grad_norm/moreh_clip_grad_norm_step2/device/moreh_clip_grad_norm_step2_device_operation.cpp
@@ -0,0 +1,68 @@
+// SPDX-FileCopyrightText: © 2023 Tenstorrent Inc.
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#include "moreh_clip_grad_norm_step2_device_operation.hpp"
+
+#include "common/constants.hpp"
+#include "ttnn/operations/core/compute_kernel/compute_kernel_config.hpp"
+#include "ttnn/operations/moreh/moreh_helper_functions.hpp"
+#include "ttnn/tensor/tensor.hpp"
+
+namespace ttnn::operations::moreh::moreh_clip_grad_norm_step2 {
+
+void MorehClipGradNormStep2Operation::validate_inputs(
+    const operation_attributes_t& operation_attributes, const tensor_args_t& tensor_args) {
+    check_tensor(tensor_args.tmp_pow_sum, "moreh_clip_grad_norm_step2", "tmp_pow_sum");
+
+    if (tensor_args.total_norm.has_value())
+        check_tensor(tensor_args.total_norm, "moreh_clip_grad_norm_step2", "total_norm");
+};
+
+MorehClipGradNormStep2Operation::program_factory_t MorehClipGradNormStep2Operation::select_program_factory(
+    const operation_attributes_t& operation_attributes, const tensor_args_t& tensor_args) {
+    return ProgramFactory{};
+};
+
+void MorehClipGradNormStep2Operation::validate_on_program_cache_miss(
+    const operation_attributes_t& operation_attributes, const tensor_args_t& tensor_args) {
+    validate_inputs(operation_attributes, tensor_args);
+};
+
+void MorehClipGradNormStep2Operation::validate_on_program_cache_hit(
+    const operation_attributes_t& operation_attributes, const tensor_args_t& tensor_args) {
+    validate_inputs(operation_attributes, tensor_args);
+};
+
+MorehClipGradNormStep2Operation::shape_return_value_t MorehClipGradNormStep2Operation::compute_output_shapes(
+    const operation_attributes_t& operation_attributes, const tensor_args_t& tensor_args) {
+    return SimpleShape{tt::constants::TILE_HEIGHT, tt::constants::TILE_WIDTH};
+};
+
+MorehClipGradNormStep2Operation::tensor_return_value_t MorehClipGradNormStep2Operation::create_output_tensors(
+    const operation_attributes_t& operation_attributes, const tensor_args_t& tensor_args) {
+    if (tensor_args.total_norm.has_value()) {
+        return tensor_args.total_norm.value();
+    }
+    const auto& total_norm_shape = compute_output_shapes(operation_attributes, tensor_args);
+
+    return create_device_tensor(
+        total_norm_shape,
+        tensor_args.tmp_pow_sum.get_dtype(),
+        Layout::TILE,
+        tensor_args.tmp_pow_sum.device(),
+        operation_attributes.memory_config);
+};
+
+std::tuple<MorehClipGradNormStep2Operation::operation_attributes_t, MorehClipGradNormStep2Operation::tensor_args_t>
+MorehClipGradNormStep2Operation::invoke(
+    const Tensor& tmp_pow_sum,
+    const float norm_type,
+    const std::optional<Tensor>& total_norm,
+    const std::optional<MemoryConfig>& memory_config,
+    const DeviceComputeKernelConfig compute_kernel_config) {
+    return {
+        operation_attributes_t{norm_type, memory_config.value_or(tmp_pow_sum.memory_config()), compute_kernel_config},
+        tensor_args_t{tmp_pow_sum, total_norm}};
+};
+}  // namespace ttnn::operations::moreh::moreh_clip_grad_norm_step2
diff --git a/ttnn/cpp/ttnn/operations/moreh/moreh_clip_grad_norm/moreh_clip_grad_norm_step2/device/moreh_clip_grad_norm_step2_device_operation.hpp b/ttnn/cpp/ttnn/operations/moreh/moreh_clip_grad_norm/moreh_clip_grad_norm_step2/device/moreh_clip_grad_norm_step2_device_operation.hpp
new file mode 100644
index 00000000000..73d1c514d6f
--- /dev/null
+++ b/ttnn/cpp/ttnn/operations/moreh/moreh_clip_grad_norm/moreh_clip_grad_norm_step2/device/moreh_clip_grad_norm_step2_device_operation.hpp
@@ -0,0 +1,77 @@
+// SPDX-FileCopyrightText: © 2024 Tenstorrent Inc.
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#pragma once
+
+#include <vector>
+
+#include "common/core_coord.hpp"
+#include "ttnn/decorators.hpp"
+#include "ttnn/device_operation.hpp"
+#include "ttnn/operations/core/compute_kernel/compute_kernel_config.hpp"
+#include "ttnn/tensor/tensor.hpp"
+#include "ttnn/tensor/types.hpp"
+
+namespace ttnn::operations::moreh::moreh_clip_grad_norm_step2 {
+
+struct MorehClipGradNormStep2Operation {
+    struct operation_attributes_t {
+        const float norm_type;
+        const MemoryConfig memory_config;
+        const DeviceComputeKernelConfig compute_kernel_config;
+    };
+
+    struct tensor_args_t {
+        const Tensor& tmp_pow_sum;
+        const std::optional<Tensor>& total_norm;
+    };
+
+    using shape_return_value_t = SimpleShape;
+    using tensor_return_value_t = Tensor;
+
+    struct ProgramFactory {
+        struct shared_variables_t {
+            KernelHandle reader_kernel_id;
+            KernelHandle writer_kernel_id;
+            KernelHandle compute_kernel_id;
+            CoreCoord single_core;
+        };
+
+        using cached_program_t = ttnn::device_operation::CachedProgram<shared_variables_t>;
+
+        static cached_program_t create(
+            const operation_attributes_t& operation_attributes,
+            const tensor_args_t& tensor_args,
+            tensor_return_value_t& total_norm);
+
+        static void override_runtime_arguments(
+            cached_program_t& cached_program,
+            const operation_attributes_t& operation_attributes,
+            const tensor_args_t& tensor_args,
+            tensor_return_value_t& total_norm);
+    };
+
+    using program_factory_t = std::variant<ProgramFactory>;
+
+    static void validate_inputs(const operation_attributes_t&, const tensor_args_t&);
+    static program_factory_t select_program_factory(const operation_attributes_t&, const tensor_args_t&);
+    static void validate_on_program_cache_miss(const operation_attributes_t&, const tensor_args_t&);
+    static void validate_on_program_cache_hit(const operation_attributes_t&, const tensor_args_t&);
+    static shape_return_value_t compute_output_shapes(const operation_attributes_t&, const tensor_args_t&);
+    static tensor_return_value_t create_output_tensors(const operation_attributes_t&, const tensor_args_t&);
+    static std::tuple<operation_attributes_t, tensor_args_t> invoke(
+        const Tensor& tmp_pow_sum,
+        const float norm_type,
+        const std::optional<Tensor>& total_norm,
+        const std::optional<MemoryConfig>& memory_config,
+        const DeviceComputeKernelConfig compute_kernel_config);
+};
+
+}  // namespace ttnn::operations::moreh::moreh_clip_grad_norm_step2
+
+namespace ttnn::prim {
+constexpr auto moreh_clip_grad_norm_step2 = ttnn::register_operation<
+    "ttnn::prim::moreh_clip_grad_norm_step2",
+    ttnn::operations::moreh::moreh_clip_grad_norm_step2::MorehClipGradNormStep2Operation>();
+}  // namespace ttnn::prim
diff --git a/ttnn/cpp/ttnn/operations/moreh/moreh_clip_grad_norm/moreh_clip_grad_norm_step2/device/moreh_clip_grad_norm_step2_program_factory.cpp b/ttnn/cpp/ttnn/operations/moreh/moreh_clip_grad_norm/moreh_clip_grad_norm_step2/device/moreh_clip_grad_norm_step2_program_factory.cpp
new file mode 100644
index 00000000000..03427d636ed
--- /dev/null
+++ b/ttnn/cpp/ttnn/operations/moreh/moreh_clip_grad_norm/moreh_clip_grad_norm_step2/device/moreh_clip_grad_norm_step2_program_factory.cpp
@@ -0,0 +1,162 @@
+// SPDX-FileCopyrightText: © 2024 Tenstorrent Inc.
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#include <vector>
+
+#include "moreh_clip_grad_norm_step2_device_operation.hpp"
+#include "tt_metal/common/work_split.hpp"
+#include "ttnn/operations/core/compute_kernel/compute_kernel_config.hpp"
+#include "ttnn/operations/moreh/moreh_helper_functions.hpp"
+
+namespace ttnn::operations::moreh::moreh_clip_grad_norm_step2 {
+
+std::tuple<uint32_t, float, bool> get_p_decimal_p_is_negative(float ord) {
+    auto p = std::floor(ord);
+    auto decimal = ord - p;
+    const bool p_is_negative = p < 0.0f;
+    if (p_is_negative) {
+        p = -p;
+    }
+    return std::make_tuple(static_cast<uint32_t>(p), decimal, p_is_negative);
+}
+
+MorehClipGradNormStep2Operation::ProgramFactory::cached_program_t
+MorehClipGradNormStep2Operation::ProgramFactory::create(
+    const operation_attributes_t& operation_attributes,
+    const tensor_args_t& tensor_args,
+    tensor_return_value_t& total_norm) {
+    const auto& tmp_pow_sum = tensor_args.tmp_pow_sum;
+    auto norm_type = operation_attributes.norm_type;
+
+    ////////////////////////////////////////////////////////////////////////////
+    //                      Device Setup
+    ////////////////////////////////////////////////////////////////////////////
+    auto device = tmp_pow_sum.device();
+    auto program = CreateProgram();
+
+    ////////////////////////////////////////////////////////////////////////////
+    //                         Parameters Setup
+    ////////////////////////////////////////////////////////////////////////////
+    const auto num_tiles = tmp_pow_sum.volume() / tt::constants::TILE_HW;
+
+    auto [p, decimal, p_is_negative] = get_p_decimal_p_is_negative(1.0f / norm_type);
+
+    ////////////////////////////////////////////////////////////////////////////
+    //                         Core Setup
+    ////////////////////////////////////////////////////////////////////////////
+    CoreCoord single_core = {0, 0};
+
+    ////////////////////////////////////////////////////////////////////////////
+    //                         CircularBuffer Setup
+    ////////////////////////////////////////////////////////////////////////////
+    const uint32_t in0_t = 1;  // input(==tmp_pow_sum)
+    const uint32_t in1_t = 1;  // decimal
+
+    // x^p * exp(log(x) * decimal)
+    const uint32_t out0_t = 1;  // output(==total_norm)
+
+    const uint32_t im0_t = 1;  // Sum[tmp_pow_sum](==x)
+    const uint32_t im1_t = 1;  // x^p
+    const uint32_t im2_t = 1;  // log(x)
+    const uint32_t im3_t = 1;  // exp(log(x) * decimal)
+
+    const auto cb_data_format = tt::tt_metal::datatype_to_dataformat_converter(total_norm.get_dtype());
+
+    CreateCircularBuffer(
+        program,
+        single_core,
+        cb_data_format,
+        {
+            {tt::CB::c_in0, in0_t},        // input(==tmp_pow_sum)
+            {tt::CB::c_in1, in1_t},        // decimal
+            {tt::CB::c_out0, out0_t},      // output(==total_norm)
+            {tt::CB::c_intermed0, im0_t},  // Sum[tmp_pow_sum](==x)
+            {tt::CB::c_intermed1, im1_t},  // x^p
+            {tt::CB::c_intermed2, im2_t},  // log(x)
+            {tt::CB::c_intermed3, im3_t},  // exp(log(x) * decimal)
+        });
+
+    ////////////////////////////////////////////////////////////////////////////
+    //                      DataMovementKernel SetUp
+    ////////////////////////////////////////////////////////////////////////////
+    const auto reader_kernel_file =
+        "ttnn/cpp/ttnn/operations/moreh/moreh_clip_grad_norm/moreh_clip_grad_norm_step2/device/kernels/"
+        "reader_moreh_clip_grad_norm_step2.cpp";
+    const auto writer_kernel_file =
+        "ttnn/cpp/ttnn/operations/moreh/moreh_clip_grad_norm/moreh_clip_grad_norm_step2/device/kernels/"
+        "writer_moreh_clip_grad_norm_step2.cpp";
+
+    const auto reader_kernel_id = CreateReadKernel(program, reader_kernel_file, single_core);
+    const auto writer_kernel_id = CreateWriteKernel(program, writer_kernel_file, single_core);
+
+    ////////////////////////////////////////////////////////////////////////////
+    //                      ComputeKernel SetUp
+    ////////////////////////////////////////////////////////////////////////////
+    const auto compute_kernel_file =
+        "ttnn/cpp/ttnn/operations/moreh/moreh_clip_grad_norm/moreh_clip_grad_norm_step2/device/kernels/"
+        "moreh_clip_grad_norm_step2_kernel.cpp";
+
+    const auto compute_kernel_id = CreateComputeKernel(program, compute_kernel_file, {single_core, num_tiles});
+
+    ////////////////////////////////////////////////////////////////////////////
+    //                      RuntimeArgs SetUp
+    ////////////////////////////////////////////////////////////////////////////
+    const auto input_addr = tmp_pow_sum.buffer()->address();
+    const auto output_addr = total_norm.buffer()->address();
+
+    // reader
+    const std::array reader_runtime_args{
+        input_addr,
+        static_cast<uint32_t>(tmp_pow_sum.buffer()->is_dram()),
+        num_tiles,
+        *reinterpret_cast<uint32_t*>(&decimal)};
+    SetRuntimeArgs(program, reader_kernel_id, single_core, reader_runtime_args);
+
+    // writer
+    const std::array writer_runtime_args{output_addr, static_cast<uint32_t>(total_norm.buffer()->is_dram())};
+    SetRuntimeArgs(program, writer_kernel_id, single_core, writer_runtime_args);
+
+    // compute
+    const std::array compute_runtime_args{num_tiles, p, static_cast<uint32_t>(p_is_negative)};
+    SetRuntimeArgs(program, compute_kernel_id, single_core, compute_runtime_args);
+
+    return {std::move(program), {reader_kernel_id, writer_kernel_id, compute_kernel_id, single_core}};
+}
+
+void MorehClipGradNormStep2Operation::ProgramFactory::override_runtime_arguments(
+    cached_program_t& cached_program,
+    const operation_attributes_t& operation_attributes,
+    const tensor_args_t& tensor_args,
+    tensor_return_value_t& total_norm) {
+    auto& program = cached_program.program;
+    auto& reader_kernel_id = cached_program.shared_variables.reader_kernel_id;
+    auto& writer_kernel_id = cached_program.shared_variables.writer_kernel_id;
+    auto& compute_kernel_id = cached_program.shared_variables.compute_kernel_id;
+    auto single_core = cached_program.shared_variables.single_core;
+
+    const auto norm_type = operation_attributes.norm_type;
+    auto [p, decimal, p_is_negative] = get_p_decimal_p_is_negative(1.0f / norm_type);
+
+    const auto input_address = tensor_args.tmp_pow_sum.buffer()->address();
+    const auto output_address = total_norm.buffer()->address();
+
+    {
+        auto& runtime_args = GetRuntimeArgs(program, reader_kernel_id, single_core);
+        runtime_args[0] = input_address;
+        runtime_args[3] = *reinterpret_cast<uint32_t*>(&decimal);
+    }
+
+    {
+        auto& runtime_args = GetRuntimeArgs(program, writer_kernel_id, single_core);
+        runtime_args[0] = output_address;
+    }
+
+    {
+        auto& runtime_args = GetRuntimeArgs(program, compute_kernel_id, single_core);
+        runtime_args[1] = p;
+        runtime_args[2] = static_cast<uint32_t>(p_is_negative);
+    }
+}
+
+}  // namespace ttnn::operations::moreh::moreh_clip_grad_norm_step2
diff --git a/ttnn/cpp/ttnn/deprecated/tt_dnn/op_library/moreh_clip_grad_norm/moreh_clip_grad_norm_step3/kernels/moreh_clip_grad_norm_step3_kernel.cpp b/ttnn/cpp/ttnn/operations/moreh/moreh_clip_grad_norm/moreh_clip_grad_norm_step3/device/kernels/moreh_clip_grad_norm_step3_kernel.cpp
similarity index 100%
rename from ttnn/cpp/ttnn/deprecated/tt_dnn/op_library/moreh_clip_grad_norm/moreh_clip_grad_norm_step3/kernels/moreh_clip_grad_norm_step3_kernel.cpp
rename to ttnn/cpp/ttnn/operations/moreh/moreh_clip_grad_norm/moreh_clip_grad_norm_step3/device/kernels/moreh_clip_grad_norm_step3_kernel.cpp
diff --git a/ttnn/cpp/ttnn/deprecated/tt_dnn/op_library/moreh_clip_grad_norm/moreh_clip_grad_norm_step3/kernels/reader_moreh_clip_grad_norm_step3.cpp b/ttnn/cpp/ttnn/operations/moreh/moreh_clip_grad_norm/moreh_clip_grad_norm_step3/device/kernels/reader_moreh_clip_grad_norm_step3.cpp
similarity index 100%
rename from ttnn/cpp/ttnn/deprecated/tt_dnn/op_library/moreh_clip_grad_norm/moreh_clip_grad_norm_step3/kernels/reader_moreh_clip_grad_norm_step3.cpp
rename to ttnn/cpp/ttnn/operations/moreh/moreh_clip_grad_norm/moreh_clip_grad_norm_step3/device/kernels/reader_moreh_clip_grad_norm_step3.cpp
diff --git a/ttnn/cpp/ttnn/deprecated/tt_dnn/op_library/moreh_clip_grad_norm/moreh_clip_grad_norm_step3/kernels/writer_moreh_clip_grad_norm_step3.cpp b/ttnn/cpp/ttnn/operations/moreh/moreh_clip_grad_norm/moreh_clip_grad_norm_step3/device/kernels/writer_moreh_clip_grad_norm_step3.cpp
similarity index 100%
rename from ttnn/cpp/ttnn/deprecated/tt_dnn/op_library/moreh_clip_grad_norm/moreh_clip_grad_norm_step3/kernels/writer_moreh_clip_grad_norm_step3.cpp
rename to ttnn/cpp/ttnn/operations/moreh/moreh_clip_grad_norm/moreh_clip_grad_norm_step3/device/kernels/writer_moreh_clip_grad_norm_step3.cpp
diff --git a/ttnn/cpp/ttnn/operations/moreh/moreh_clip_grad_norm/moreh_clip_grad_norm_step3/device/moreh_clip_grad_norm_step3_device_operation.cpp b/ttnn/cpp/ttnn/operations/moreh/moreh_clip_grad_norm/moreh_clip_grad_norm_step3/device/moreh_clip_grad_norm_step3_device_operation.cpp
new file mode 100644
index 00000000000..823cbd24cc1
--- /dev/null
+++ b/ttnn/cpp/ttnn/operations/moreh/moreh_clip_grad_norm/moreh_clip_grad_norm_step3/device/moreh_clip_grad_norm_step3_device_operation.cpp
@@ -0,0 +1,61 @@
+// SPDX-FileCopyrightText: © 2023 Tenstorrent Inc.
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#include "moreh_clip_grad_norm_step3_device_operation.hpp"
+
+#include "common/constants.hpp"
+#include "ttnn/operations/core/compute_kernel/compute_kernel_config.hpp"
+#include "ttnn/operations/moreh/moreh_helper_functions.hpp"
+#include "ttnn/tensor/tensor.hpp"
+
+namespace ttnn::operations::moreh::moreh_clip_grad_norm_step3 {
+
+void MorehClipGradNormStep3Operation::validate_inputs(
+    const operation_attributes_t& operation_attributes, const tensor_args_t& tensor_args) {
+    auto input_tensors = tensor_args.inputs;
+    for (const auto& input : input_tensors) {
+        ttnn::operations::check_tensor(input, "moreh_clip_grad_norm_step3", "input");
+    }
+
+    ttnn::operations::check_tensor(tensor_args.clip_coef_clamped, "moreh_clip_grad_norm_step3", "clip_coef_clamped");
+};
+
+MorehClipGradNormStep3Operation::program_factory_t MorehClipGradNormStep3Operation::select_program_factory(
+    const operation_attributes_t& operation_attributes, const tensor_args_t& tensor_args) {
+    return ProgramFactory{};
+};
+
+void MorehClipGradNormStep3Operation::validate_on_program_cache_miss(
+    const operation_attributes_t& operation_attributes, const tensor_args_t& tensor_args) {
+    validate_inputs(operation_attributes, tensor_args);
+};
+
+void MorehClipGradNormStep3Operation::validate_on_program_cache_hit(
+    const operation_attributes_t& operation_attributes, const tensor_args_t& tensor_args) {
+    validate_inputs(operation_attributes, tensor_args);
+};
+
+// No output
+MorehClipGradNormStep3Operation::shape_return_value_t MorehClipGradNormStep3Operation::compute_output_shapes(
+    const operation_attributes_t& operation_attributes, const tensor_args_t& tensor_args) {
+    return {};
+};
+
+// No output
+MorehClipGradNormStep3Operation::tensor_return_value_t MorehClipGradNormStep3Operation::create_output_tensors(
+    const operation_attributes_t& operation_attributes, const tensor_args_t& tensor_args) {
+    return tensor_args.inputs;
+};
+
+std::tuple<MorehClipGradNormStep3Operation::operation_attributes_t, MorehClipGradNormStep3Operation::tensor_args_t>
+MorehClipGradNormStep3Operation::invoke(
+    const std::vector<Tensor>& inputs,
+    const Tensor& clip_coef_clamped,
+    const std::optional<MemoryConfig>& memory_config,
+    const DeviceComputeKernelConfig compute_kernel_config) {
+    return {
+        operation_attributes_t{memory_config.value_or(inputs.at(0).memory_config()), compute_kernel_config},
+        tensor_args_t{inputs, clip_coef_clamped}};
+};
+}  // namespace ttnn::operations::moreh::moreh_clip_grad_norm_step3
diff --git a/ttnn/cpp/ttnn/operations/moreh/moreh_clip_grad_norm/moreh_clip_grad_norm_step3/device/moreh_clip_grad_norm_step3_device_operation.hpp b/ttnn/cpp/ttnn/operations/moreh/moreh_clip_grad_norm/moreh_clip_grad_norm_step3/device/moreh_clip_grad_norm_step3_device_operation.hpp
new file mode 100644
index 00000000000..d9bca439855
--- /dev/null
+++ b/ttnn/cpp/ttnn/operations/moreh/moreh_clip_grad_norm/moreh_clip_grad_norm_step3/device/moreh_clip_grad_norm_step3_device_operation.hpp
@@ -0,0 +1,75 @@
+// SPDX-FileCopyrightText: © 2024 Tenstorrent Inc.
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#pragma once
+
+#include <vector>
+
+#include "common/core_coord.hpp"
+#include "ttnn/decorators.hpp"
+#include "ttnn/device_operation.hpp"
+#include "ttnn/operations/core/compute_kernel/compute_kernel_config.hpp"
+#include "ttnn/tensor/tensor.hpp"
+#include "ttnn/tensor/types.hpp"
+
+namespace ttnn::operations::moreh::moreh_clip_grad_norm_step3 {
+
+struct MorehClipGradNormStep3Operation {
+    struct operation_attributes_t {
+        const MemoryConfig memory_config;
+        const DeviceComputeKernelConfig compute_kernel_config;
+    };
+
+    struct tensor_args_t {
+        const std::vector<Tensor>& inputs;
+        const Tensor& clip_coef_clamped;
+    };
+
+    using shape_return_value_t = SimpleShape;
+    using tensor_return_value_t = std::vector<Tensor>;
+
+    struct ProgramFactory {
+        struct shared_variables_t {
+            KernelHandle reader_kernel_id;
+            KernelHandle writer_kernel_id;
+            uint32_t num_cores_to_be_used;
+            size_t num_cores_y;
+        };
+
+        using cached_program_t = ttnn::device_operation::CachedProgram<shared_variables_t>;
+
+        static cached_program_t create(
+            const operation_attributes_t& operation_attributes,
+            const tensor_args_t& tensor_args,
+            tensor_return_value_t& tensor_return_value);
+
+        static void override_runtime_arguments(
+            cached_program_t& cached_program,
+            const operation_attributes_t& operation_attributes,
+            const tensor_args_t& tensor_args,
+            tensor_return_value_t& tensor_return_value);
+    };
+
+    using program_factory_t = std::variant<ProgramFactory>;
+
+    static void validate_inputs(const operation_attributes_t&, const tensor_args_t&);
+    static program_factory_t select_program_factory(const operation_attributes_t&, const tensor_args_t&);
+    static void validate_on_program_cache_miss(const operation_attributes_t&, const tensor_args_t&);
+    static void validate_on_program_cache_hit(const operation_attributes_t&, const tensor_args_t&);
+    static shape_return_value_t compute_output_shapes(const operation_attributes_t&, const tensor_args_t&);
+    static tensor_return_value_t create_output_tensors(const operation_attributes_t&, const tensor_args_t&);
+    static std::tuple<operation_attributes_t, tensor_args_t> invoke(
+        const std::vector<Tensor>& inputs,
+        const Tensor& clip_coef_clamped,
+        const std::optional<MemoryConfig>& memory_config,
+        const DeviceComputeKernelConfig compute_kernel_config);
+};
+
+}  // namespace ttnn::operations::moreh::moreh_clip_grad_norm_step3
+
+namespace ttnn::prim {
+constexpr auto moreh_clip_grad_norm_step3 = ttnn::register_operation<
+    "ttnn::prim::moreh_clip_grad_norm_step3",
+    ttnn::operations::moreh::moreh_clip_grad_norm_step3::MorehClipGradNormStep3Operation>();
+}  // namespace ttnn::prim
diff --git a/ttnn/cpp/ttnn/operations/moreh/moreh_clip_grad_norm/moreh_clip_grad_norm_step3/device/moreh_clip_grad_norm_step3_program_factory.cpp b/ttnn/cpp/ttnn/operations/moreh/moreh_clip_grad_norm/moreh_clip_grad_norm_step3/device/moreh_clip_grad_norm_step3_program_factory.cpp
new file mode 100644
index 00000000000..4b20990ce63
--- /dev/null
+++ b/ttnn/cpp/ttnn/operations/moreh/moreh_clip_grad_norm/moreh_clip_grad_norm_step3/device/moreh_clip_grad_norm_step3_program_factory.cpp
@@ -0,0 +1,167 @@
+// SPDX-FileCopyrightText: © 2024 Tenstorrent Inc.
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#include <vector>
+
+#include "moreh_clip_grad_norm_step3_device_operation.hpp"
+#include "tt_metal/common/assert.hpp"
+#include "tt_metal/common/work_split.hpp"
+#include "ttnn/operations/core/compute_kernel/compute_kernel_config.hpp"
+#include "ttnn/operations/moreh/moreh_helper_functions.hpp"
+
+namespace ttnn::operations::moreh::moreh_clip_grad_norm_step3 {
+
+std::tuple<uint32_t, float, bool> get_p_decimal_p_is_negative(float ord) {
+    auto p = std::floor(ord);
+    auto decimal = ord - p;
+    const bool p_is_negative = p < 0.0f;
+    if (p_is_negative) {
+        p = -p;
+    }
+    return std::make_tuple(static_cast<uint32_t>(p), decimal, p_is_negative);
+}
+
+MorehClipGradNormStep3Operation::ProgramFactory::cached_program_t
+MorehClipGradNormStep3Operation::ProgramFactory::create(
+    const operation_attributes_t& operation_attributes,
+    const tensor_args_t& tensor_args,
+    tensor_return_value_t& inputs) {
+    auto& clip_coef_clamped = tensor_args.clip_coef_clamped;
+
+    ////////////////////////////////////////////////////////////////////////////
+    //                      Device Setup
+    ////////////////////////////////////////////////////////////////////////////
+    auto device = inputs.at(0).device();
+    auto program = CreateProgram();
+
+    ////////////////////////////////////////////////////////////////////////////
+    //                         Parameters Setup
+    ////////////////////////////////////////////////////////////////////////////
+    const auto num_inputs = static_cast<uint32_t>(inputs.size());
+
+    ////////////////////////////////////////////////////////////////////////////
+    //                         Core Setup
+    ////////////////////////////////////////////////////////////////////////////
+    auto grid = device->compute_with_storage_grid_size();
+    const auto num_cores_x = grid.x;
+    const auto num_cores_y = grid.y;
+
+    const auto
+        [num_cores_to_be_used,
+         all_cores,
+         core_group_1,
+         core_group_2,
+         num_inputs_per_core_group_1,
+         num_inputs_per_core_group_2] = tt::tt_metal::split_work_to_cores(grid, num_inputs);
+    TT_FATAL(core_group_2.ranges().empty(), "core_group_2 must be empty");
+    TT_FATAL(num_inputs_per_core_group_1 == 1, "num_inputs_per_core_group_1 must be 1");
+    TT_FATAL(num_inputs_per_core_group_2 == 0, "num_inputs_per_core_group_2 must be 0");
+
+    ////////////////////////////////////////////////////////////////////////////
+    //                         CircularBuffer Setup
+    ////////////////////////////////////////////////////////////////////////////
+    const uint32_t in0_t = 1;  // input(inplace)
+    const uint32_t in1_t = 1;  // clip_coef_clamped
+
+    const uint32_t out0_t = 1;  // output(inplace)
+
+    const auto cb_data_format = tt::tt_metal::datatype_to_dataformat_converter(inputs.at(0).get_dtype());
+
+    CreateCircularBuffer(
+        program,
+        core_group_1,
+        cb_data_format,
+        {
+            {tt::CB::c_in0, in0_t},    // input(inplace)
+            {tt::CB::c_in1, in1_t},    // clip_coef_clamped
+            {tt::CB::c_out0, out0_t},  // output(inplace)
+        });
+
+    ////////////////////////////////////////////////////////////////////////////
+    //                      DataMovementKernel SetUp
+    ////////////////////////////////////////////////////////////////////////////
+    const auto reader_kernel_file =
+        "ttnn/cpp/ttnn/operations/moreh/moreh_clip_grad_norm/moreh_clip_grad_norm_step3/device/kernels/"
+        "reader_moreh_clip_grad_norm_step3.cpp";
+    const auto writer_kernel_file =
+        "ttnn/cpp/ttnn/operations/moreh/moreh_clip_grad_norm/moreh_clip_grad_norm_step3/device/kernels/"
+        "writer_moreh_clip_grad_norm_step3.cpp";
+
+    const auto reader_kernel_id = CreateReadKernel(program, reader_kernel_file, core_group_1);
+    const auto writer_kernel_id = CreateWriteKernel(program, writer_kernel_file, core_group_1);
+
+    ////////////////////////////////////////////////////////////////////////////
+    //                      ComputeKernel SetUp
+    ////////////////////////////////////////////////////////////////////////////
+    const auto compute_kernel_file =
+        "ttnn/cpp/ttnn/operations/moreh/moreh_clip_grad_norm/moreh_clip_grad_norm_step3/device/kernels/"
+        "moreh_clip_grad_norm_step3_kernel.cpp";
+
+    const auto compute_kernel_id =
+        CreateComputeKernel(program, compute_kernel_file, {core_group_1, num_inputs_per_core_group_1});
+
+    ////////////////////////////////////////////////////////////////////////////
+    //                      RuntimeArgs SetUp
+    ////////////////////////////////////////////////////////////////////////////
+    auto cores = grid_to_cores(num_cores_to_be_used, num_cores_x, num_cores_y, false);
+    const auto clip_coef_clamped_addr = clip_coef_clamped.buffer()->address();
+    for (uint32_t i = 0; i < cores.size(); ++i) {
+        const CoreCoord& core = cores.at(i);
+
+        const auto& input = inputs.at(i);
+        const auto input_addr = input.buffer()->address();
+        const auto num_tiles = input.volume() / tt::constants::TILE_HW;
+
+        // reader
+        const std::array reader_runtime_args{
+            input_addr,
+            static_cast<uint32_t>(input.buffer()->is_dram()),
+            clip_coef_clamped_addr,
+            static_cast<uint32_t>(clip_coef_clamped.buffer()->is_dram()),
+            num_tiles};
+        SetRuntimeArgs(program, reader_kernel_id, core, reader_runtime_args);
+
+        // writer
+        const std::array writer_runtime_args{input_addr, static_cast<uint32_t>(input.buffer()->is_dram()), num_tiles};
+        SetRuntimeArgs(program, writer_kernel_id, core, writer_runtime_args);
+
+        // compute
+        const std::array compute_runtime_args{num_tiles};
+        SetRuntimeArgs(program, compute_kernel_id, core, compute_runtime_args);
+    }
+
+    return {std::move(program), {reader_kernel_id, writer_kernel_id, num_cores_to_be_used, num_cores_y}};
+}
+
+void MorehClipGradNormStep3Operation::ProgramFactory::override_runtime_arguments(
+    cached_program_t& cached_program,
+    const operation_attributes_t& operation_attributes,
+    const tensor_args_t& tensor_args,
+    tensor_return_value_t& inputs) {
+    auto& program = cached_program.program;
+    auto& reader_kernel_id = cached_program.shared_variables.reader_kernel_id;
+    auto& writer_kernel_id = cached_program.shared_variables.writer_kernel_id;
+    auto num_cores_to_be_used = cached_program.shared_variables.num_cores_to_be_used;
+    auto num_cores_y = cached_program.shared_variables.num_cores_y;
+
+    auto clip_coef_clamped_buffer = tensor_args.clip_coef_clamped.buffer();
+    const auto clip_coef_clamped_address = clip_coef_clamped_buffer->address();
+
+    for (uint32_t i = 0; i < num_cores_to_be_used; ++i) {
+        CoreCoord core = {i / num_cores_y, i % num_cores_y};
+
+        {
+            auto& runtime_args = GetRuntimeArgs(program, reader_kernel_id, core);
+            runtime_args[0] = inputs.at(i).buffer()->address();
+            runtime_args[2] = clip_coef_clamped_address;
+        }
+
+        {
+            auto& runtime_args = GetRuntimeArgs(program, writer_kernel_id, core);
+            runtime_args[0] = inputs.at(i).buffer()->address();
+        }
+    }
+}
+
+}  // namespace ttnn::operations::moreh::moreh_clip_grad_norm_step3
diff --git a/ttnn/cpp/ttnn/operations/moreh/moreh_pybind.cpp b/ttnn/cpp/ttnn/operations/moreh/moreh_pybind.cpp
index b741779c2b9..5d19227e880 100644
--- a/ttnn/cpp/ttnn/operations/moreh/moreh_pybind.cpp
+++ b/ttnn/cpp/ttnn/operations/moreh/moreh_pybind.cpp
@@ -9,6 +9,7 @@
 #include "ttnn/operations/moreh/moreh_arange/moreh_arange_pybind.hpp"
 #include "ttnn/operations/moreh/moreh_bmm/moreh_bmm_pybind.hpp"
 #include "ttnn/operations/moreh/moreh_bmm_backward/moreh_bmm_backward_pybind.hpp"
+#include "ttnn/operations/moreh/moreh_clip_grad_norm/moreh_clip_grad_norm_pybind.hpp"
 #include "ttnn/operations/moreh/moreh_cumsum/moreh_cumsum_pybind.hpp"
 #include "ttnn/operations/moreh/moreh_dot/moreh_dot_pybind.hpp"
 #include "ttnn/operations/moreh/moreh_dot_backward/moreh_dot_backward_pybind.hpp"
@@ -68,5 +69,6 @@ void bind_moreh_operations(py::module &module) {
     moreh_softmax::bind_moreh_softmax_operation(module);
     moreh_sum_backward::bind_moreh_sum_backward_operation(module);
     moreh_sum::bind_moreh_sum_operation(module);
+    moreh_clip_grad_norm::bind_moreh_clip_grad_norm_operation(module);
 }
 }  // namespace ttnn::operations::moreh
diff --git a/ttnn/ttnn/operations/moreh.py b/ttnn/ttnn/operations/moreh.py
index 21a8f0330d6..28d27466d01 100644
--- a/ttnn/ttnn/operations/moreh.py
+++ b/ttnn/ttnn/operations/moreh.py
@@ -9,6 +9,7 @@
 arange = ttnn._ttnn.operations.moreh.moreh_arange
 bmm = ttnn._ttnn.operations.moreh.moreh_bmm
 bmm_backward = ttnn._ttnn.operations.moreh.moreh_bmm_backward
+clip_grad_norm = ttnn._ttnn.operations.moreh.moreh_clip_grad_norm
 cumsum = ttnn._ttnn.operations.moreh.moreh_cumsum
 cumsum_backward = ttnn._ttnn.operations.moreh.moreh_cumsum_backward
 dot = ttnn._ttnn.operations.moreh.moreh_dot

From 274f58a0c88d0e6926fe626fe4679f7ba778ec89 Mon Sep 17 00:00:00 2001
From: Kalaivani Baskar <156762498+KalaivaniMCW@users.noreply.github.com>
Date: Tue, 12 Nov 2024 16:44:53 +0530
Subject: [PATCH 11/69] #14730: Support unequal ranked inputs for eltwise
 binary (#14803)

### Ticket
Link to Github Issue #14730
also #14731
### Problem description
Need support for unequal ranked inputs in eltwise binary

### What's changed
Added support using ttnn.reshape when inputs are of different ranks

### Checklist
- [x] Post commit CI passes
https://github.com/tenstorrent/tt-metal/actions/runs/11736842353

https://github.com/tenstorrent/tt-metal/actions/runs/11794021567/attempts/2
- [ ] Nightly FD
https://github.com/tenstorrent/tt-metal/actions/runs/11736844358

https://github.com/tenstorrent/tt-metal/actions/runs/11794025766/job/32854516936
- [ ] Blackhole Post commit (if applicable)
- [ ] Model regression CI testing passes (if applicable)
- [ ] Device performance regression CI testing passes (if applicable)
- [x] New/Existing tests provide coverage for changes
---
 .../unit_tests/operations/eltwise/test_add.py | 27 ++++++++++++++++
 .../unit_tests/operations/eltwise/test_mul.py | 32 +++++++++++++++++++
 .../ttnn/operations/eltwise/binary/binary.cpp | 32 +++++++++++++++++--
 3 files changed, 88 insertions(+), 3 deletions(-)

diff --git a/tests/ttnn/unit_tests/operations/eltwise/test_add.py b/tests/ttnn/unit_tests/operations/eltwise/test_add.py
index 9344e59ccf0..b52bf99d2b8 100644
--- a/tests/ttnn/unit_tests/operations/eltwise/test_add.py
+++ b/tests/ttnn/unit_tests/operations/eltwise/test_add.py
@@ -10,6 +10,33 @@
 from tests.ttnn.utils_for_testing import assert_with_pcc
 
 
+@pytest.mark.parametrize(
+    "shapes",
+    [
+        [[1, 71, 7, 7], [7, 7]],
+        [[920, 1, 256], [256]],
+    ],
+)
+def test_unequal_ranks(device, shapes):
+    torch.manual_seed(0)
+
+    torch_input_tensor_a = torch.rand(shapes[0], dtype=torch.bfloat16)
+    torch_input_tensor_b = torch.rand(shapes[1], dtype=torch.bfloat16)
+    torch_output_tensor = torch_input_tensor_a + torch_input_tensor_b
+
+    input_tensor_a = ttnn.from_torch(
+        torch_input_tensor_a, layout=ttnn.TILE_LAYOUT, device=device, memory_config=ttnn.DRAM_MEMORY_CONFIG
+    )
+    input_tensor_b = ttnn.from_torch(
+        torch_input_tensor_b, layout=ttnn.TILE_LAYOUT, device=device, memory_config=ttnn.DRAM_MEMORY_CONFIG
+    )
+
+    output_tensor = ttnn.add(input_tensor_a, input_tensor_b, memory_config=ttnn.DRAM_MEMORY_CONFIG)
+    output_tensor = ttnn.to_torch(output_tensor)
+
+    assert ttnn.pearson_correlation_coefficient(torch_output_tensor, output_tensor) >= 0.99988
+
+
 @pytest.mark.parametrize(
     "shapes",
     [
diff --git a/tests/ttnn/unit_tests/operations/eltwise/test_mul.py b/tests/ttnn/unit_tests/operations/eltwise/test_mul.py
index 2227226f1c4..f0b51eb314a 100644
--- a/tests/ttnn/unit_tests/operations/eltwise/test_mul.py
+++ b/tests/ttnn/unit_tests/operations/eltwise/test_mul.py
@@ -12,6 +12,38 @@
 from torch.nn import functional as F
 
 
+@pytest.mark.parametrize(
+    "shapes",
+    [
+        [[4, 12, 64, 64], [12, 1, 1]],
+        [[4, 16, 64, 64], [16, 1, 1]],
+        [[64, 3, 64, 64], [3, 1, 1]],
+        [[64, 4, 64, 64], [4, 1, 1]],
+        [[16, 6, 64, 64], [6, 1, 1]],
+        [[16, 8, 64, 64], [8, 1, 1]],
+        [[1, 1], [1, 1, 32]],
+    ],
+)
+def test_unequal_ranks(device, shapes):
+    torch.manual_seed(0)
+
+    torch_input_tensor_a = torch.rand(shapes[0], dtype=torch.bfloat16)
+    torch_input_tensor_b = torch.rand(shapes[1], dtype=torch.bfloat16)
+    torch_output_tensor = torch_input_tensor_a * torch_input_tensor_b
+
+    input_tensor_a = ttnn.from_torch(
+        torch_input_tensor_a, layout=ttnn.TILE_LAYOUT, device=device, memory_config=ttnn.DRAM_MEMORY_CONFIG
+    )
+    input_tensor_b = ttnn.from_torch(
+        torch_input_tensor_b, layout=ttnn.TILE_LAYOUT, device=device, memory_config=ttnn.DRAM_MEMORY_CONFIG
+    )
+
+    output_tensor = ttnn.mul(input_tensor_a, input_tensor_b, memory_config=ttnn.DRAM_MEMORY_CONFIG)
+    output_tensor = ttnn.to_torch(output_tensor)
+
+    assert ttnn.pearson_correlation_coefficient(torch_output_tensor, output_tensor) >= 0.99988
+
+
 # fmt: off
 @pytest.mark.parametrize("scalar", [3.0])
 # fmt: on
diff --git a/ttnn/cpp/ttnn/operations/eltwise/binary/binary.cpp b/ttnn/cpp/ttnn/operations/eltwise/binary/binary.cpp
index 194e294de31..ff7aa1738bb 100644
--- a/ttnn/cpp/ttnn/operations/eltwise/binary/binary.cpp
+++ b/ttnn/cpp/ttnn/operations/eltwise/binary/binary.cpp
@@ -9,6 +9,7 @@
 #include "ttnn/device_operation.hpp"
 #include "ttnn/operations/data_movement/repeat/repeat.hpp"
 #include "ttnn/operations/eltwise/unary/unary.hpp"
+#include "ttnn/operations/data_movement/reshape_view/reshape.hpp"
 
 namespace ttnn::operations::binary {
 
@@ -99,10 +100,34 @@ inline Tensor binary_impl(
 }
 
 template <BinaryOpType binary_op_type>
-auto preprocess_inputs(const Tensor &input_tensor_a_arg, const Tensor &input_tensor_b_arg) {
+auto preprocess_inputs(const Tensor &input_tensor_a_arg, const Tensor &input_tensor_b_arg, const std::optional<Tensor> &optional_output_tensor) {
     Tensor input_tensor_a = input_tensor_a_arg;
     Tensor input_tensor_b = input_tensor_b_arg;
 
+    auto rank_a = input_tensor_a.get_shape().rank();
+    auto rank_b = input_tensor_b.get_shape().rank();
+
+    if(rank_a != rank_b){
+
+        auto max_rank = std::max(rank_a, rank_b);
+        auto min_rank = std::min(rank_a, rank_b);
+
+        if(optional_output_tensor.has_value()) {
+            auto opt_rank = optional_output_tensor.value().get_shape().rank();
+            TT_FATAL( max_rank == opt_rank,
+            "Output Tensor rank {} doesn't match input tensor rank {}.", opt_rank, max_rank );
+        }
+
+        std::vector<int32_t> shape_vector(max_rank, 1);
+        auto& reshaped_tensor = (rank_a > rank_b) ? input_tensor_b : input_tensor_a;
+        auto s_b = reshaped_tensor.get_shape();
+        for(int i=0; i < min_rank; ++i){
+            shape_vector[(max_rank - min_rank) + i] = s_b[i];
+        }
+        reshaped_tensor = ttnn::reshape(reshaped_tensor, shape_vector);
+
+    }
+
     // TODO: #7731 (Remove calls to repeat )
     auto repeat_smaller = [](const auto &first, auto &second) {
         const auto first_shape = first.get_shape();
@@ -149,8 +174,9 @@ Tensor BinaryOperation<binary_op_type>::invoke(
     std::optional<Tensor> optional_output_tensor,
     std::optional<unary::FusedActivations> activations,
     std::optional<unary::UnaryWithParam> input_tensor_a_activation) {
+
     auto [input_tensor_a, input_tensor_b] =
-        detail::preprocess_inputs<binary_op_type>(input_tensor_a_arg, input_tensor_b_arg);
+        detail::preprocess_inputs<binary_op_type>(input_tensor_a_arg, input_tensor_b_arg, optional_output_tensor);
 
     return ttnn::prim::binary(
         queue_id,
@@ -245,7 +271,7 @@ Tensor RelationalBinary<binary_op_type>::invoke(
     }
 
     auto [input_tensor_a, input_tensor_b] =
-        detail::preprocess_inputs<binary_op_type>(input_tensor_a_arg, input_tensor_b_arg);
+        detail::preprocess_inputs<binary_op_type>(input_tensor_a_arg, input_tensor_b_arg, optional_output_tensor);
 
     auto output_memory_config = memory_config.value_or(input_tensor_a.memory_config());
     DataType dtype = output_dtype.value_or(input_tensor_a.get_dtype());

From a39f99800564fa2bb02ebb231dc13cd2b8d92794 Mon Sep 17 00:00:00 2001
From: Mark O'Connor <moconnor@tenstorrent.com>
Date: Tue, 12 Nov 2024 11:27:06 +0000
Subject: [PATCH 12/69] #0: Fix double deallocate

---
 models/demos/llama3/tt/llama_attention.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/models/demos/llama3/tt/llama_attention.py b/models/demos/llama3/tt/llama_attention.py
index c12a1a69af5..9a51aad2a74 100644
--- a/models/demos/llama3/tt/llama_attention.py
+++ b/models/demos/llama3/tt/llama_attention.py
@@ -351,7 +351,6 @@ def forward_decode(
             dense_out_sharded, ttnn.L1_MEMORY_CONFIG
         )  # TODO: remove as soon as we have sharded support in for all CCL
 
-        ttnn.deallocate(attn_output_cat)
         ttnn.deallocate(dense_out_sharded)
 
         # All reduce

From 003993815a0b6295bc9490ca7848e215ea5ca797 Mon Sep 17 00:00:00 2001
From: Kalaivani Baskar <156762498+KalaivaniMCW@users.noreply.github.com>
Date: Tue, 12 Nov 2024 20:01:28 +0530
Subject: [PATCH 13/69] #14862: fp32 support in unary (#14899)

### Ticket
Link to Github Issue #14862

### Problem description
Provide context for the problem.

### What's changed
Enabled `preserve_fp32_precision` flag  for float32 input

### Checklist
- [x] Post commit CI passes
https://github.com/tenstorrent/tt-metal/actions/runs/11752673780
https://github.com/tenstorrent/tt-metal/actions/runs/11797734586
- [ ] Nightly fd
https://github.com/tenstorrent/tt-metal/actions/runs/11797739127
- [ ] Model perf
- [ ] Device perf
- [ ] Demo tests
- [ ] Blackhole Post commit (if applicable)
- [ ] Model regression CI testing passes (if applicable)
- [ ] Device performance regression CI testing passes (if applicable)
- [x] New/Existing tests provide coverage for changes
---
 .../operations/eltwise/test_unary_fp32.py     | 224 ++++++++++++++++++
 .../ttnn/operations/eltwise/unary/unary.cpp   |   6 +-
 2 files changed, 227 insertions(+), 3 deletions(-)
 create mode 100644 tests/ttnn/unit_tests/operations/eltwise/test_unary_fp32.py

diff --git a/tests/ttnn/unit_tests/operations/eltwise/test_unary_fp32.py b/tests/ttnn/unit_tests/operations/eltwise/test_unary_fp32.py
new file mode 100644
index 00000000000..86b65b38028
--- /dev/null
+++ b/tests/ttnn/unit_tests/operations/eltwise/test_unary_fp32.py
@@ -0,0 +1,224 @@
+# SPDX-FileCopyrightText: © 2024 Tenstorrent Inc.
+
+# SPDX-License-Identifier: Apache-2.0
+
+import torch
+import ttnn
+
+import pytest
+from models.utility_functions import skip_for_grayskull
+from tests.ttnn.utils_for_testing import assert_with_pcc
+
+
+@skip_for_grayskull("Unsupported dtype for Grayskull")
+@pytest.mark.parametrize(
+    "ttnn_function",
+    [
+        ttnn.neg,
+    ],
+)
+def test_neg_fp32(device, ttnn_function):
+    x_torch = torch.tensor([[0.00001]], dtype=torch.float32)
+    y_torch = -x_torch
+
+    x_tt = ttnn.from_torch(x_torch, dtype=ttnn.float32, layout=ttnn.TILE_LAYOUT, device=device)
+
+    y_tt = ttnn_function(x_tt)
+
+    tt_out = ttnn.to_torch(y_tt)
+    status = torch.allclose(y_torch, tt_out, atol=1e-10, rtol=1e-5, equal_nan=False)
+    assert status
+
+
+@skip_for_grayskull("Unsupported dtype for Grayskull")
+@pytest.mark.parametrize(
+    "ttnn_function",
+    [
+        ttnn.sin,
+    ],
+)
+def test_sin_fp32(device, ttnn_function):
+    x_torch = torch.rand((64, 128), dtype=torch.float32)
+    y_torch = torch.sin(x_torch)
+
+    x_tt = ttnn.from_torch(x_torch, dtype=ttnn.float32, layout=ttnn.TILE_LAYOUT, device=device)
+
+    y_tt = ttnn_function(x_tt)
+
+    tt_out = ttnn.to_torch(y_tt)
+    status = torch.allclose(y_torch, tt_out, atol=1e-10, rtol=1e-5, equal_nan=False)
+    assert status
+
+
+@skip_for_grayskull("Unsupported dtype for Grayskull")
+@pytest.mark.parametrize(
+    "ttnn_function",
+    [
+        ttnn.cos,
+    ],
+)
+def test_cos_fp32(device, ttnn_function):
+    x_torch = torch.rand((64, 128), dtype=torch.float32)
+    y_torch = torch.cos(x_torch)
+
+    x_tt = ttnn.from_torch(x_torch, dtype=ttnn.float32, layout=ttnn.TILE_LAYOUT, device=device)
+
+    y_tt = ttnn_function(x_tt)
+
+    tt_out = ttnn.to_torch(y_tt)
+    status = torch.allclose(y_torch, tt_out, atol=1e-10, rtol=1e-5, equal_nan=False)
+    assert status
+
+
+@skip_for_grayskull("Unsupported dtype for Grayskull")
+@pytest.mark.parametrize(
+    "ttnn_function",
+    [
+        ttnn.tan,
+    ],
+)
+def test_tan_fp32(device, ttnn_function):
+    x_torch = torch.rand((64, 128), dtype=torch.float32)
+    y_torch = torch.tan(x_torch)
+
+    x_tt = ttnn.from_torch(x_torch, dtype=ttnn.float32, layout=ttnn.TILE_LAYOUT, device=device)
+
+    y_tt = ttnn_function(x_tt)
+
+    tt_out = ttnn.to_torch(y_tt)
+    status = torch.allclose(y_torch, tt_out, atol=1e-10, rtol=1e-5, equal_nan=False)
+    assert status
+
+
+@skip_for_grayskull("Unsupported dtype for Grayskull")
+@pytest.mark.parametrize(
+    "ttnn_function",
+    [
+        ttnn.relu,
+    ],
+)
+def test_relu_fp32(device, ttnn_function):
+    x_torch = torch.rand((64, 128), dtype=torch.float32)
+    y_torch = torch.relu(x_torch)
+
+    x_tt = ttnn.from_torch(x_torch, dtype=ttnn.float32, layout=ttnn.TILE_LAYOUT, device=device)
+
+    y_tt = ttnn_function(x_tt)
+
+    tt_out = ttnn.to_torch(y_tt)
+    status = torch.allclose(y_torch, tt_out, atol=1e-10, rtol=1e-5, equal_nan=False)
+    assert status
+
+
+def run_unary_test(device, h, w, ttnn_function, pcc=0.9999):
+    torch.manual_seed(0)
+
+    torch_input_tensor = torch.rand((h, w), dtype=torch.float32)
+    golden_function = ttnn.get_golden_function(ttnn_function)
+    torch_output_tensor = golden_function(torch_input_tensor, device=device)
+
+    input_tensor = ttnn.from_torch(torch_input_tensor, dtype=ttnn.float32, layout=ttnn.TILE_LAYOUT, device=device)
+    output_tensor = ttnn_function(input_tensor)
+    output_tensor = ttnn.to_layout(output_tensor, ttnn.ROW_MAJOR_LAYOUT)
+    output_tensor = ttnn.from_device(output_tensor)
+    output_tensor = ttnn.to_torch(output_tensor)
+
+    assert_with_pcc(torch_output_tensor, output_tensor, pcc)
+
+
+@skip_for_grayskull("Unsupported dtype for Grayskull")
+@pytest.mark.parametrize("h", [64])
+@pytest.mark.parametrize("w", [128])
+def test_exp(device, h, w):
+    run_unary_test(device, h, w, ttnn.exp, pcc=0.9998)
+
+
+@skip_for_grayskull("Unsupported dtype for Grayskull")
+@pytest.mark.parametrize("h", [64])
+@pytest.mark.parametrize("w", [128])
+def test_tanh(device, h, w):
+    run_unary_test(device, h, w, ttnn.tanh, pcc=0.993)
+
+
+@skip_for_grayskull("Unsupported dtype for Grayskull")
+@pytest.mark.parametrize("h", [64])
+@pytest.mark.parametrize("w", [128])
+def test_gelu(device, h, w):
+    run_unary_test(device, h, w, ttnn.gelu, pcc=0.9996)
+
+
+@skip_for_grayskull("Unsupported dtype for Grayskull")
+@pytest.mark.parametrize("h", [64])
+@pytest.mark.parametrize("w", [128])
+def test_rsqrt(device, h, w):
+    run_unary_test(device, h, w, ttnn.rsqrt)
+
+
+@skip_for_grayskull("Unsupported dtype for Grayskull")
+@pytest.mark.parametrize("h", [64])
+@pytest.mark.parametrize("w", [128])
+def test_silu(device, h, w):
+    run_unary_test(device, h, w, ttnn.silu)
+
+
+@skip_for_grayskull("Unsupported dtype for Grayskull")
+@pytest.mark.parametrize("h", [64])
+@pytest.mark.parametrize("w", [128])
+def test_log(device, h, w):
+    run_unary_test(device, h, w, ttnn.log)
+
+
+@skip_for_grayskull("Unsupported dtype for Grayskull")
+@pytest.mark.parametrize("h", [64])
+@pytest.mark.parametrize("w", [128])
+def test_asin(device, h, w):
+    run_unary_test(device, h, w, ttnn.asin, pcc=0.998)
+
+
+@skip_for_grayskull("Unsupported dtype for Grayskull")
+@pytest.mark.parametrize("h", [64])
+@pytest.mark.parametrize("w", [128])
+def test_acos(device, h, w):
+    run_unary_test(device, h, w, ttnn.acos, pcc=0.998)
+
+
+@skip_for_grayskull("Unsupported dtype for Grayskull")
+@pytest.mark.parametrize("h", [64])
+@pytest.mark.parametrize("w", [128])
+def test_atan(device, h, w):
+    run_unary_test(device, h, w, ttnn.atan)
+
+
+@skip_for_grayskull("Unsupported dtype for Grayskull")
+@pytest.mark.parametrize("h", [64])
+@pytest.mark.parametrize("w", [128])
+def test_sinh(device, h, w):
+    run_unary_test(device, h, w, ttnn.sinh)
+
+
+@skip_for_grayskull("Unsupported dtype for Grayskull")
+@pytest.mark.parametrize("h", [64])
+@pytest.mark.parametrize("w", [128])
+def test_asinh(device, h, w):
+    run_unary_test(device, h, w, ttnn.asinh, pcc=0.9997)
+
+
+@skip_for_grayskull("Unsupported dtype for Grayskull")
+@pytest.mark.parametrize("h", [64])
+@pytest.mark.parametrize("w", [128])
+def test_cosh(device, h, w):
+    run_unary_test(device, h, w, ttnn.cosh, pcc=0.999)
+
+
+@skip_for_grayskull("Unsupported dtype for Grayskull")
+@pytest.mark.parametrize("h", [64])
+@pytest.mark.parametrize("w", [128])
+def test_acosh(device, h, w):
+    run_unary_test(device, h, w, ttnn.acosh)
+
+
+@skip_for_grayskull("Unsupported dtype for Grayskull")
+@pytest.mark.parametrize("h", [64])
+@pytest.mark.parametrize("w", [128])
+def test_atanh(device, h, w):
+    run_unary_test(device, h, w, ttnn.atanh, pcc=0.997)
diff --git a/ttnn/cpp/ttnn/operations/eltwise/unary/unary.cpp b/ttnn/cpp/ttnn/operations/eltwise/unary/unary.cpp
index f661b1cfedd..7a40003fa52 100644
--- a/ttnn/cpp/ttnn/operations/eltwise/unary/unary.cpp
+++ b/ttnn/cpp/ttnn/operations/eltwise/unary/unary.cpp
@@ -23,14 +23,14 @@ inline Tensor unary_impl(
     const std::optional<MemoryConfig>& memory_config = std::nullopt,
     const std::optional<Tensor>& optional_output_tensor = std::nullopt) {
     DataType output_dtype = (op_chain[0].op_type == UnaryOpType::TYPECAST) ? static_cast<DataType>(op_chain[0].params[1]) : input_tensor.get_dtype();
-    bool preserve_fp32_precision = (op_chain[0].op_type == UnaryOpType::TYPECAST) and (input_tensor.get_dtype() == DataType::FLOAT32);
+    auto arch = input_tensor.device()->arch();
+    bool preserve_fp32_precision = (arch != tt::ARCH::GRAYSKULL) and (input_tensor.get_dtype() == DataType::FLOAT32);
     bool fp32_dest_acc_en = preserve_fp32_precision or
                             output_dtype == DataType::UINT32 or
                             output_dtype == DataType::INT32 or
                             output_dtype == DataType::FLOAT32 or
                             input_tensor.get_dtype() == DataType::UINT32 or
-                            input_tensor.get_dtype() == DataType::INT32;  // MT: Currently only uint32/int32 is moved to
-                                                                          // DST directly, fp32 is converted to fp16b
+                            input_tensor.get_dtype() == DataType::INT32;
 
     auto output_memory_config = optional_output_tensor.has_value() ? optional_output_tensor.value().memory_config() : memory_config.value_or(input_tensor.memory_config());
     return prim::unary(queue_id, input_tensor, op_chain, output_dtype, output_memory_config, fp32_dest_acc_en, preserve_fp32_precision, optional_output_tensor);

From eedfd3847212d9f4f97fee7b5bf065bc6352bc38 Mon Sep 17 00:00:00 2001
From: Andrija Malbasa <amalbasa@tenstorrent.com>
Date: Tue, 12 Nov 2024 16:20:31 +0100
Subject: [PATCH 14/69] Angle op fix (#14129)

### Ticket
[Link to Github
Issue](https://github.com/tenstorrent/tt-metal/issues/14088)

### Problem description
ttnn.angle op incorrect implementation.

### What's changed
1. Fixed the op in
ttnn/cpp/ttnn/operations/eltwise/complex_unary/device/complex_unary_op.cpp
2. Added nightly suite in
tests/sweep_framework/sweeps/eltwise/unary_complex/angle/angle.py sweep
3. Minor modficiations inside unary_complex sweeps invalidate vector
functions

### Checklist
Unit test from the issue link now fails if the mentioned code lines are
not removed/commented.
---
 .../sweeps/eltwise/unary_complex/angle/angle.py     | 13 +++++++++++--
 .../eltwise/unary_complex/angle_bw/angle_bw.py      |  2 +-
 .../sweeps/eltwise/unary_complex/polar/polar.py     |  2 +-
 .../eltwise/unary_complex/polar_bw/polar_bw.py      |  2 +-
 .../complex_unary/device/complex_unary_op.cpp       |  2 +-
 5 files changed, 15 insertions(+), 6 deletions(-)

diff --git a/tests/sweep_framework/sweeps/eltwise/unary_complex/angle/angle.py b/tests/sweep_framework/sweeps/eltwise/unary_complex/angle/angle.py
index e1e872d0585..98d7fc4660f 100644
--- a/tests/sweep_framework/sweeps/eltwise/unary_complex/angle/angle.py
+++ b/tests/sweep_framework/sweeps/eltwise/unary_complex/angle/angle.py
@@ -25,7 +25,7 @@
 # Each suite has a key name (in this case "suite_1" and "suite_2") which will associate the test vectors to this specific suite of inputs.
 # Developers can create their own generator functions and pass them to the parameters as inputs.
 parameters = {
-    "xfail": {
+    "nightly": {
         "input_shape": gen_shapes([1, 1, 1, 1], [6, 12, 256, 256], [1, 1, 1, 1], 16)
         + gen_shapes([1, 1, 1], [12, 256, 256], [1, 1, 1], 16)
         + gen_shapes([1, 1], [256, 256], [1, 1], 16),
@@ -34,6 +34,15 @@
         "input_a_memory_config": [ttnn.DRAM_MEMORY_CONFIG, ttnn.L1_MEMORY_CONFIG],
         "output_memory_config": [ttnn.DRAM_MEMORY_CONFIG, ttnn.L1_MEMORY_CONFIG],
     },
+    "xfail": {
+        "input_shape": gen_shapes([1, 1, 1, 1], [6, 12, 256, 256], [1, 1, 1, 1], 16)
+        + gen_shapes([1, 1, 1], [12, 256, 256], [1, 1, 1], 16)
+        + gen_shapes([1, 1], [256, 256], [1, 1], 16),
+        "input_a_dtype": [ttnn.bfloat16, ttnn.bfloat8_b],
+        "input_layout": [ttnn.TILE_LAYOUT, ttnn.ROW_MAJOR_LAYOUT],
+        "input_a_memory_config": [ttnn.DRAM_MEMORY_CONFIG, ttnn.L1_MEMORY_CONFIG],
+        "output_memory_config": [ttnn.DRAM_MEMORY_CONFIG, ttnn.L1_MEMORY_CONFIG],
+    },
 }
 
 
@@ -42,7 +51,7 @@
 # Returns False, None if the vector is valid, and True, str with a reason for invalidation if it is invalid.
 def invalidate_vector(test_vector) -> Tuple[bool, Optional[str]]:
     if test_vector["input_layout"] == ttnn.ROW_MAJOR_LAYOUT:
-        return True, "Inputs to eltwise binary must be tilized"
+        return True, "Unary operation requires tensor to be in Tile layout when working with non-sharded input tensor"
     if test_vector["input_layout"] == ttnn.ROW_MAJOR_LAYOUT and test_vector["input_a_dtype"] == ttnn.bfloat8_b:
         return True, "bfloat8_b is only supported on tiled layout"
     return False, None
diff --git a/tests/sweep_framework/sweeps/eltwise/unary_complex/angle_bw/angle_bw.py b/tests/sweep_framework/sweeps/eltwise/unary_complex/angle_bw/angle_bw.py
index ce3dd28f636..d802c1fc9ae 100644
--- a/tests/sweep_framework/sweeps/eltwise/unary_complex/angle_bw/angle_bw.py
+++ b/tests/sweep_framework/sweeps/eltwise/unary_complex/angle_bw/angle_bw.py
@@ -44,7 +44,7 @@
 # Returns False, None if the vector is valid, and True, str with a reason for invalidation if it is invalid.
 def invalidate_vector(test_vector) -> Tuple[bool, Optional[str]]:
     if test_vector["input_layout"] == ttnn.ROW_MAJOR_LAYOUT:
-        return True, "Inputs to eltwise binary must be tilized"
+        return True, "Unary operation requires tensor to be in Tile layout when working with non-sharded input tensor"
     if test_vector["input_a_dtype"] == ttnn.bfloat8_b:
         return True, "bfloat8_b is not supported on input_tensor_a"
     if test_vector["input_layout"] == ttnn.ROW_MAJOR_LAYOUT and test_vector["input_a_dtype"] == ttnn.bfloat8_b:
diff --git a/tests/sweep_framework/sweeps/eltwise/unary_complex/polar/polar.py b/tests/sweep_framework/sweeps/eltwise/unary_complex/polar/polar.py
index 857f4d533fd..e534e272f87 100644
--- a/tests/sweep_framework/sweeps/eltwise/unary_complex/polar/polar.py
+++ b/tests/sweep_framework/sweeps/eltwise/unary_complex/polar/polar.py
@@ -42,7 +42,7 @@
 # Returns False, None if the vector is valid, and True, str with a reason for invalidation if it is invalid.
 def invalidate_vector(test_vector) -> Tuple[bool, Optional[str]]:
     if test_vector["input_layout"] == ttnn.ROW_MAJOR_LAYOUT:
-        return True, "Inputs to eltwise binary must be tilized"
+        return True, "Unary operation requires tensor to be in Tile layout when working with non-sharded input tensor"
     if test_vector["input_layout"] == ttnn.ROW_MAJOR_LAYOUT and test_vector["input_a_dtype"] == ttnn.bfloat8_b:
         return True, "bfloat8_b is only supported on tiled layout"
     return False, None
diff --git a/tests/sweep_framework/sweeps/eltwise/unary_complex/polar_bw/polar_bw.py b/tests/sweep_framework/sweeps/eltwise/unary_complex/polar_bw/polar_bw.py
index 2ac0d2dec36..b8e5040cd53 100644
--- a/tests/sweep_framework/sweeps/eltwise/unary_complex/polar_bw/polar_bw.py
+++ b/tests/sweep_framework/sweeps/eltwise/unary_complex/polar_bw/polar_bw.py
@@ -44,7 +44,7 @@
 # Returns False, None if the vector is valid, and True, str with a reason for invalidation if it is invalid.
 def invalidate_vector(test_vector) -> Tuple[bool, Optional[str]]:
     if test_vector["input_layout"] == ttnn.ROW_MAJOR_LAYOUT:
-        return True, "Inputs to eltwise binary must be tilized"
+        return True, "Unary operation requires tensor to be in Tile layout when working with non-sharded input tensor"
     if test_vector["input_a_dtype"] == ttnn.bfloat8_b:
         return True, "bfloat8_b is not supported on input_tensor_a"
     if test_vector["input_layout"] == ttnn.ROW_MAJOR_LAYOUT and test_vector["input_a_dtype"] == ttnn.bfloat8_b:
diff --git a/ttnn/cpp/ttnn/operations/eltwise/complex_unary/device/complex_unary_op.cpp b/ttnn/cpp/ttnn/operations/eltwise/complex_unary/device/complex_unary_op.cpp
index 278a08bd844..f432ea54793 100644
--- a/ttnn/cpp/ttnn/operations/eltwise/complex_unary/device/complex_unary_op.cpp
+++ b/ttnn/cpp/ttnn/operations/eltwise/complex_unary/device/complex_unary_op.cpp
@@ -22,7 +22,7 @@ Tensor _imag(const ComplexTensor& input, const MemoryConfig& output_mem_config)
 }
 
 Tensor _angle(const ComplexTensor& input, const MemoryConfig& output_mem_config) {
-    return ttnn::neg( atan2(input[1],input[0],output_mem_config), output_mem_config );
+    return ttnn::atan2(input[0],input[1],output_mem_config);
 }
 
 Tensor _is_imag(const ComplexTensor& input, const MemoryConfig& output_mem_config) {

From b074101881de366207cb149867bda73fb1be1156 Mon Sep 17 00:00:00 2001
From: Guangyu Feng <gfeng@tenstorrent.com>
Date: Tue, 5 Nov 2024 15:34:19 -0500
Subject: [PATCH 15/69] Fix a non-c-typedef-for-linkage error

Allowing compilation to pass on Blackhole.
---
 tt_metal/hw/inc/blackhole/tensix_types.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tt_metal/hw/inc/blackhole/tensix_types.h b/tt_metal/hw/inc/blackhole/tensix_types.h
index b64779765ad..955677d9a94 100644
--- a/tt_metal/hw/inc/blackhole/tensix_types.h
+++ b/tt_metal/hw/inc/blackhole/tensix_types.h
@@ -57,7 +57,7 @@ typedef struct {
     uint32_t reserved_3 : 32;
 } packer_config_t;  // 16B
 
-typedef struct {
+struct fifo_ctl_t {
     uint32_t rd_ptr;
     uint32_t wr_ptr;
     uint32_t rsvd0;
@@ -67,7 +67,7 @@ typedef struct {
         return fmt::format("Fifo Control: rd_ptr(0x{:08x}) wr_ptr(0x{:08x})", rd_ptr, wr_ptr);
     }
 #endif
-} fifo_ctl_t;
+};
 
 typedef struct {
     uint32_t val[4];

From 4f13f0482b2bc7686a58f6fb4db251997f06ae56 Mon Sep 17 00:00:00 2001
From: Kartik Paigwar <132708568+kpaigwar@users.noreply.github.com>
Date: Tue, 12 Nov 2024 12:58:07 -0500
Subject: [PATCH 16/69] Add experimental fused qk ROPE (#14860)

#14540 : Added a new experimental Op `ttnn.experimental.rotary_embedding_llama_fused_qk` which
---
 .../misc/test_rotary_embedding_llama.py       | 109 ++++++--
 .../test_rotary_embedding_llama_fused_qk.py   | 137 ++++++++++
 ttnn/CMakeLists.txt                           |   4 +
 .../experimental/experimental_pybind.cpp      |   2 +
 .../rotary_embedding_llama_sharded.cpp        |   2 +-
 ...edding_llama_fused_qk_device_operation.cpp | 106 ++++++++
 ...edding_llama_fused_qk_device_operation.hpp |  32 +++
 ...bedding_llama_fused_qk_program_factory.cpp | 251 ++++++++++++++++++
 ...bedding_llama_fused_qk_program_factory.hpp |  20 ++
 .../rotary_embedding_llama_fused_qk.cpp       |  44 +++
 .../rotary_embedding_llama_fused_qk.hpp       |  34 +++
 ...rotary_embedding_llama_fused_qk_pybind.cpp |  50 ++++
 ...rotary_embedding_llama_fused_qk_pybind.hpp |  13 +
 13 files changed, 781 insertions(+), 23 deletions(-)
 create mode 100644 tests/tt_eager/python_api_testing/unit_testing/misc/test_rotary_embedding_llama_fused_qk.py
 create mode 100644 ttnn/cpp/ttnn/operations/experimental/transformer/rotary_embedding_llama_fused_qk/device/rotary_embedding_llama_fused_qk_device_operation.cpp
 create mode 100644 ttnn/cpp/ttnn/operations/experimental/transformer/rotary_embedding_llama_fused_qk/device/rotary_embedding_llama_fused_qk_device_operation.hpp
 create mode 100644 ttnn/cpp/ttnn/operations/experimental/transformer/rotary_embedding_llama_fused_qk/device/rotary_embedding_llama_fused_qk_program_factory.cpp
 create mode 100644 ttnn/cpp/ttnn/operations/experimental/transformer/rotary_embedding_llama_fused_qk/device/rotary_embedding_llama_fused_qk_program_factory.hpp
 create mode 100644 ttnn/cpp/ttnn/operations/experimental/transformer/rotary_embedding_llama_fused_qk/rotary_embedding_llama_fused_qk.cpp
 create mode 100644 ttnn/cpp/ttnn/operations/experimental/transformer/rotary_embedding_llama_fused_qk/rotary_embedding_llama_fused_qk.hpp
 create mode 100644 ttnn/cpp/ttnn/operations/experimental/transformer/rotary_embedding_llama_fused_qk/rotary_embedding_llama_fused_qk_pybind.cpp
 create mode 100644 ttnn/cpp/ttnn/operations/experimental/transformer/rotary_embedding_llama_fused_qk/rotary_embedding_llama_fused_qk_pybind.hpp

diff --git a/tests/tt_eager/python_api_testing/unit_testing/misc/test_rotary_embedding_llama.py b/tests/tt_eager/python_api_testing/unit_testing/misc/test_rotary_embedding_llama.py
index aa9db9dae89..617d72af3fb 100644
--- a/tests/tt_eager/python_api_testing/unit_testing/misc/test_rotary_embedding_llama.py
+++ b/tests/tt_eager/python_api_testing/unit_testing/misc/test_rotary_embedding_llama.py
@@ -11,7 +11,7 @@
 from tests.tt_eager.python_api_testing.sweep_tests.comparison_funcs import (
     comp_pcc,
 )
-from models.utility_functions import skip_for_grayskull, skip_for_blackhole
+from models.utility_functions import skip_for_grayskull, skip_for_blackhole, nearest_32
 from models.demos.t3000.llama2_70b.tt.llama_common import precompute_freqs, freqs_to_rotation_matrix, gather_rotary_emb
 from models.demos.t3000.llama2_70b.tt.llama_rope import TtLlamaRotarySetup
 
@@ -33,22 +33,20 @@ def __init__(
         head_dim: int,
         mode: str,
         datatype=ttnn.bfloat16,
+        fuse_qk=False,
     ):
         super().__init__()
 
         self.head_dim = head_dim
         self.device = device
         self.mode = mode
+        self.fuse_qk = fuse_qk
 
         self.transformation_mat = ttnn.from_torch(
             get_rot_transformation_mat(dhead=ttnn.TILE_SIZE), device=device, layout=ttnn.TILE_LAYOUT, dtype=datatype
         )
 
-    def apply_rotary(self, x, cos, sin):
-        # n_head = 8 for Q
-        # n_head = 1 for K
-
-        compute_kernel_config = ttnn.WormholeComputeKernelConfig(
+        self.compute_kernel_config = ttnn.WormholeComputeKernelConfig(
             # math_fidelity=ttnn.MathFidelity.LoFi,
             math_fidelity=ttnn.MathFidelity.HiFi4,
             math_approx_mode=True,
@@ -56,20 +54,41 @@ def apply_rotary(self, x, cos, sin):
             packer_l1_acc=True,
         )
 
+    def apply_rotary(self, x, cos, sin):
+        # n_head = 8 for Q
+        # n_head = 1 for K
+
         rotary_output = ttnn.experimental.rotary_embedding_llama(
             x,
             cos,
             sin,
             self.transformation_mat,
             is_decode_mode=self.mode == "decode",
-            compute_kernel_config=compute_kernel_config,
+            compute_kernel_config=self.compute_kernel_config,
         )
 
         return rotary_output
 
+    def apply_fused_rotary(self, q, k, cos, sin):
+        # n_head = 8 for Q
+        # n_head = 1 for K
+        rotary_output_q, rotary_output_k = ttnn.experimental.rotary_embedding_llama_fused_qk(
+            q,
+            k,
+            cos,
+            sin,
+            self.transformation_mat,
+            compute_kernel_config=self.compute_kernel_config,
+        )
+
+        return rotary_output_q, rotary_output_k
+
     def forward(self, xq, xk, cos, sin):
-        xq = self.apply_rotary(xq, cos, sin)
-        xk = self.apply_rotary(xk, cos, sin)
+        if self.fuse_qk:
+            xq, xk = self.apply_fused_rotary(xq, xk, cos, sin)
+        else:
+            xq = self.apply_rotary(xq, cos, sin)
+            xk = self.apply_rotary(xk, cos, sin)
         return xq, xk
 
 
@@ -118,6 +137,7 @@ def run_test_rotary_embedding_llama(
     head_dim,
     max_seq_len,
     datatype=ttnn.bfloat16,
+    fuse_qk=False,
 ):
     # Prepare input
     torch.manual_seed(0)
@@ -162,30 +182,75 @@ def run_test_rotary_embedding_llama(
     pytorch_out = (torch_xq, torch_xk)
 
     # TT hardware / Modified PyTorch execution -------------------------------------------------------------
-    tt_model = TtLlamaRotary(device, head_dim, mode, datatype)
+    tt_model = TtLlamaRotary(device, head_dim, mode, datatype, fuse_qk)
 
     if mode == "decode":
         rope_setup_decode = TtLlamaRotarySetup(device, head_dim, max_seq_len)
-        cos, sin = rope_setup_decode.get_rot_mats(position_ids)
         tt_model.transformation_mat = rope_setup_decode.transformation_mat
 
         # For decode, TTNN expects inputs to be [1, batch, nh, dhead]
         inp = [x.transpose(1, 2) for x in inp]
         # inp: [seq_len, batch, n_heads, head_dim]
 
-        grid = (
-            ttnn.num_cores_to_corerangeset(batch, rope_setup_decode.core_grid, row_wise=True).bounding_box().grid_size()
-        )
-        input_mem_config = ttnn.create_sharded_memory_config(
-            shape=(1, batch, ttnn.TILE_SIZE, head_dim),
-            core_grid=ttnn.CoreGrid(y=grid.y, x=grid.x),
-            strategy=ttnn.ShardStrategy.HEIGHT,
-            orientation=ttnn.ShardOrientation.ROW_MAJOR,
-        )
+        if fuse_qk:
+            # For fused_qk, repeat the position_ids for q and k
+            position_ids = torch.concat([position_ids, position_ids])
+            cos, sin = rope_setup_decode.get_rot_mats(position_ids)
+            assert (
+                batch % 8 == 0 or batch == 1
+            ), "Batch size must be a multiple of 8 or less than 8 for fused_qk rotary embedding"
+            if batch == 1:
+                q_core_grid_start = (0, 0)
+                q_core_grid_end = (0, 0)
+                k_core_grid_start = (1, 0)
+                k_core_grid_end = (1, 0)
+            else:
+                q_core_grid_start = (0, 0)
+                q_core_grid_end = ((batch - 1) % 8, (batch // 8) - 1)
+                k_core_grid_start = (0, (batch // 8))
+                k_core_grid_end = ((batch - 1) % 8, (batch // 8) * 2 - 1)
+            q_input_mem_config = ttnn.create_sharded_memory_config(
+                shape=(nearest_32(n_heads), head_dim),
+                core_grid=ttnn.CoreRangeSet(
+                    {ttnn.CoreRange(ttnn.CoreCoord(*q_core_grid_start), ttnn.CoreCoord(*q_core_grid_end))}
+                ),
+                strategy=ttnn.ShardStrategy.HEIGHT,
+                orientation=ttnn.ShardOrientation.ROW_MAJOR,
+                use_height_and_width_as_shard_shape=True,
+            )
+            k_input_mem_config = ttnn.create_sharded_memory_config(
+                shape=(nearest_32(n_kv_heads), head_dim),
+                core_grid=ttnn.CoreRangeSet(
+                    {ttnn.CoreRange(ttnn.CoreCoord(*k_core_grid_start), ttnn.CoreCoord(*k_core_grid_end))}
+                ),
+                strategy=ttnn.ShardStrategy.HEIGHT,
+                orientation=ttnn.ShardOrientation.ROW_MAJOR,
+                use_height_and_width_as_shard_shape=True,
+            )
+            input_mem_configs = [q_input_mem_config, k_input_mem_config]
+
+        else:
+            cos, sin = rope_setup_decode.get_rot_mats(position_ids)
+            grid = (
+                ttnn.num_cores_to_corerangeset(batch, rope_setup_decode.core_grid, row_wise=True)
+                .bounding_box()
+                .grid_size()
+            )
+            input_mem_configs = [
+                ttnn.create_sharded_memory_config(
+                    shape=(1, batch, ttnn.TILE_SIZE, head_dim),
+                    core_grid=ttnn.CoreGrid(y=grid.y, x=grid.x),
+                    strategy=ttnn.ShardStrategy.HEIGHT,
+                    orientation=ttnn.ShardOrientation.ROW_MAJOR,
+                )
+                for _ in range(len(inp))
+            ]
 
         tt_inp = [
-            ttnn.from_torch(i, device=device, dtype=datatype, memory_config=input_mem_config, layout=ttnn.TILE_LAYOUT)
-            for i in inp
+            ttnn.from_torch(
+                x, device=device, dtype=datatype, memory_config=input_mem_configs[i], layout=ttnn.TILE_LAYOUT
+            )
+            for i, x in enumerate(inp)
         ]
         tt_inp += [cos, sin]  # Append cos and sin to the input list
     else:
diff --git a/tests/tt_eager/python_api_testing/unit_testing/misc/test_rotary_embedding_llama_fused_qk.py b/tests/tt_eager/python_api_testing/unit_testing/misc/test_rotary_embedding_llama_fused_qk.py
new file mode 100644
index 00000000000..e7de947c9a6
--- /dev/null
+++ b/tests/tt_eager/python_api_testing/unit_testing/misc/test_rotary_embedding_llama_fused_qk.py
@@ -0,0 +1,137 @@
+# SPDX-FileCopyrightText: © 2023 Tenstorrent Inc.
+
+# SPDX-License-Identifier: Apache-2.0
+
+import pytest
+from loguru import logger
+import torch
+import ttnn
+from models.utility_functions import skip_for_grayskull, skip_for_blackhole
+from tests.tt_eager.python_api_testing.unit_testing.misc.test_rotary_embedding_llama import (
+    run_test_rotary_embedding_llama,
+)
+
+
+@skip_for_blackhole("Requires eth connected devices to run, only single chip BH available. See #12349")
+@skip_for_grayskull("Requires eth connected devices to run")
+@pytest.mark.parametrize(
+    "batch, seq_len",
+    (
+        (32, 1),
+        (16, 1),
+        (8, 1),
+        (1, 1),
+    ),
+    ids=(
+        "decode_32",
+        "decode_16",
+        "decode_8",
+        "decode_1",
+    ),
+)
+@pytest.mark.parametrize(
+    "n_heads, n_kv_heads, head_dim",
+    (
+        (8, 1, 128),
+        (71, 32, 64),
+        (8, 1, 256),
+    ),
+)
+@pytest.mark.parametrize("datatype", (ttnn.bfloat16,))
+@pytest.mark.parametrize("pcc", (0.9997,))
+def test_rotary_embedding_llama_fused_qk(
+    batch,
+    seq_len,
+    n_heads,
+    n_kv_heads,
+    head_dim,
+    datatype,
+    pcc,
+    device,
+):
+    compute_grid_size = device.compute_with_storage_grid_size()
+    if compute_grid_size.x < 8 or compute_grid_size.y < 8:
+        pytest.skip(f"Requires grid size of at least {(8, 8)} to run")
+
+    run_test_rotary_embedding_llama(
+        device, batch, seq_len, pcc, n_heads, n_kv_heads, head_dim, 1, datatype, fuse_qk=True
+    )
+
+    # shift input/output tensor by creating very small tensor between loop
+    inp = torch.randn(1, 1, 32, 32)
+    test_tensor = (
+        ttnn.Tensor(
+            inp.reshape(-1).tolist(),
+            inp.shape,
+            ttnn.bfloat16,
+            ttnn.ROW_MAJOR_LAYOUT,
+        )
+        .to(ttnn.TILE_LAYOUT)
+        .to(device)
+    )
+
+
+@skip_for_blackhole("Requires eth connected devices to run, only single chip BH available. See #12349")
+@skip_for_grayskull("Requires eth connected devices to run")
+@pytest.mark.parametrize(
+    "batch, seq_len",
+    (
+        (32, 1),
+        (16, 1),
+        (8, 1),
+        (1, 1),
+    ),
+    ids=(
+        "decode_32",
+        "decode_16",
+        "decode_8",
+        "decode_1",
+    ),
+)
+@pytest.mark.parametrize(
+    "n_heads, n_kv_heads, head_dim",
+    ((8, 1, 128),),
+)
+@pytest.mark.parametrize("datatype", (ttnn.bfloat16,))
+@pytest.mark.parametrize("pcc", (0.9997,))
+def test_rotary_embedding_llama_fused_qk_with_program_cache(
+    batch,
+    seq_len,
+    n_heads,
+    n_kv_heads,
+    head_dim,
+    datatype,
+    pcc,
+    device,
+    use_program_cache,
+):
+    compute_grid_size = device.compute_with_storage_grid_size()
+    if compute_grid_size.x < 8 or compute_grid_size.y < 8:
+        pytest.skip(f"Requires grid size of at least {(8, 8)} to run")
+
+    mode = "decode" if seq_len == 1 else "prefill"
+
+    cache_tensors = []
+    for _ in range(3):
+        run_test_rotary_embedding_llama(
+            device, batch, seq_len, pcc, n_heads, n_kv_heads, head_dim, 1, datatype, fuse_qk=True
+        )
+
+        # shift input/output tensor by creating very small tensor between loop
+        inp = torch.randn(1, 1, 32, 32)
+        test_tensor = (
+            ttnn.Tensor(
+                inp.reshape(-1).tolist(),
+                inp.shape,
+                ttnn.bfloat16,
+                ttnn.ROW_MAJOR_LAYOUT,
+            )
+            .to(ttnn.TILE_LAYOUT)
+            .to(device)
+        )
+
+        cache_tensors.append(test_tensor)
+
+    num_ops = 5  # embedding + fused_qk_rope + transpose + pad + interleaved_to_sharded
+
+    assert device.num_program_cache_entries() == num_ops
diff --git a/ttnn/CMakeLists.txt b/ttnn/CMakeLists.txt
index 0a12a92e04d..be490c58913 100644
--- a/ttnn/CMakeLists.txt
+++ b/ttnn/CMakeLists.txt
@@ -256,6 +256,10 @@ set(ALL_TTNN_SRCS
     ${CMAKE_CURRENT_SOURCE_DIR}/cpp/ttnn/operations/experimental/transformer/rotary_embedding_llama/device/rotary_embedding_llama_program_factory.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/cpp/ttnn/operations/experimental/transformer/rotary_embedding_llama/rotary_embedding_llama.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/cpp/ttnn/operations/experimental/transformer/rotary_embedding_llama/rotary_embedding_llama_pybind.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/cpp/ttnn/operations/experimental/transformer/rotary_embedding_llama_fused_qk/device/rotary_embedding_llama_fused_qk_device_operation.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/cpp/ttnn/operations/experimental/transformer/rotary_embedding_llama_fused_qk/device/rotary_embedding_llama_fused_qk_program_factory.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/cpp/ttnn/operations/experimental/transformer/rotary_embedding_llama_fused_qk/rotary_embedding_llama_fused_qk.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/cpp/ttnn/operations/experimental/transformer/rotary_embedding_llama_fused_qk/rotary_embedding_llama_fused_qk_pybind.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/cpp/ttnn/operations/experimental/transformer/rotate_half/device/rotate_half_device_operation.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/cpp/ttnn/operations/experimental/transformer/rotate_half/device/single_core/rotate_half_program_factory.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/cpp/ttnn/operations/experimental/transformer/rotate_half/rotate_half.cpp
diff --git a/ttnn/cpp/ttnn/operations/experimental/experimental_pybind.cpp b/ttnn/cpp/ttnn/operations/experimental/experimental_pybind.cpp
index 8f5c68b23de..9dc27bf43c9 100644
--- a/ttnn/cpp/ttnn/operations/experimental/experimental_pybind.cpp
+++ b/ttnn/cpp/ttnn/operations/experimental/experimental_pybind.cpp
@@ -26,6 +26,7 @@
 #include "ttnn/operations/experimental/paged_cache/paged_cache_pybind.hpp"
 #include "ttnn/operations/experimental/transformer/rotary_embedding/rotary_embedding_pybind.hpp"
 #include "ttnn/operations/experimental/transformer/rotary_embedding_llama/rotary_embedding_llama_pybind.hpp"
+#include "ttnn/operations/experimental/transformer/rotary_embedding_llama_fused_qk/rotary_embedding_llama_fused_qk_pybind.hpp"
 #include "ttnn/operations/experimental/transformer/rotate_half/rotate_half_pybind.hpp"
 #include "ttnn/operations/experimental/transformer/split_query_key_value_and_split_heads/split_query_key_value_and_split_heads_pybind.hpp"
 #include "ttnn/cpp/ttnn/operations/experimental/copy/typecast/typecast_pybind.hpp"
@@ -52,6 +53,7 @@ void py_module(py::module& module) {
 
     transformer::py_bind_rotary_embedding(module);
     transformer::py_bind_rotary_embedding_llama(module);
+    transformer::py_bind_rotary_embedding_llama_fused_qk(module);
     transformer::py_bind_rotate_half(module);
 
     reduction::detail::bind_argmax_operation(module);
diff --git a/ttnn/cpp/ttnn/operations/experimental/transformer/rotary_embedding_llama/device/kernels/compute/rotary_embedding_llama_sharded.cpp b/ttnn/cpp/ttnn/operations/experimental/transformer/rotary_embedding_llama/device/kernels/compute/rotary_embedding_llama_sharded.cpp
index f6ccfba7910..e4505ad04f9 100644
--- a/ttnn/cpp/ttnn/operations/experimental/transformer/rotary_embedding_llama/device/kernels/compute/rotary_embedding_llama_sharded.cpp
+++ b/ttnn/cpp/ttnn/operations/experimental/transformer/rotary_embedding_llama/device/kernels/compute/rotary_embedding_llama_sharded.cpp
@@ -29,7 +29,7 @@ void MAIN {
     constexpr uint32_t Ht = get_compile_time_arg_val(9); // How many rows (tiles) in n_heads dimension
 
     mm_init();
-    binary_op_init_common(rotated_in_interm_cb, cos_cb); // General Init for all binary ops
+    binary_op_init_common(rotated_in_interm_cb, sin_cb, sin_interm_cb); // General Init for all binary ops
 
     // Get the trans_mat
     cb_reserve_back(trans_mat_cb, onetile);
diff --git a/ttnn/cpp/ttnn/operations/experimental/transformer/rotary_embedding_llama_fused_qk/device/rotary_embedding_llama_fused_qk_device_operation.cpp b/ttnn/cpp/ttnn/operations/experimental/transformer/rotary_embedding_llama_fused_qk/device/rotary_embedding_llama_fused_qk_device_operation.cpp
new file mode 100644
index 00000000000..7e6787ef498
--- /dev/null
+++ b/ttnn/cpp/ttnn/operations/experimental/transformer/rotary_embedding_llama_fused_qk/device/rotary_embedding_llama_fused_qk_device_operation.cpp
@@ -0,0 +1,106 @@
+// SPDX-FileCopyrightText: © 2023 Tenstorrent Inc.
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#include "rotary_embedding_llama_fused_qk_device_operation.hpp"
+#include "rotary_embedding_llama_fused_qk_program_factory.hpp"
+
+#include "tt_metal/common/constants.hpp"
+#include "tt_metal/host_api.hpp"
+
+namespace tt {
+
+namespace tt_metal {
+
+void RotaryEmbeddingLlamaFusedQK::validate(const std::vector<Tensor>& input_tensors) const {
+    using namespace tt::constants;
+    TT_FATAL(input_tensors.size() == 5, "Error");
+    const auto& q_input_tensor = input_tensors.at(0);
+    const auto& k_input_tensor = input_tensors.at(1);
+    const auto& cos = input_tensors.at(2);
+    const auto& sin = input_tensors.at(3);
+    const auto& trans_mat = input_tensors.at(4);
+
+    auto ref_device = q_input_tensor.device();
+    for (const auto& input : input_tensors) {
+        TT_FATAL(input.storage_type() == StorageType::DEVICE || input.storage_type() == StorageType::MULTI_DEVICE, "Operands to rotary embedding need to be on device!");
+        TT_FATAL(input.buffer() != nullptr, "Operands to rotary embedding need to be allocated in buffers on device!");
+        TT_FATAL(input.device() == ref_device, "Operands to rotary embedding need to be on same device!");
+        TT_FATAL((input.get_layout() == Layout::TILE), "Inputs to rotary embedding must be tilized");
+        TT_FATAL((input.memory_config().memory_layout == TensorMemoryLayout::HEIGHT_SHARDED), "inputs for RoPE must be HEIGHT_SHARDED.");
+        TT_FATAL((input.get_dtype() == DataType::BFLOAT16), "Inputs to rotary embedding must be bfloat16");
+    }
+
+    // Check for decode mode
+    TT_FATAL(q_input_tensor.get_logical_shape()[0] == 1 && k_input_tensor.get_logical_shape()[0] == 1, "rotary_embedding_llama_fused_qk currently only supports deocde mode qith seq_len=1.");
+
+    TT_FATAL(q_input_tensor.get_logical_shape()[-1] == k_input_tensor.get_logical_shape()[-1], "Q input tensor and K input tensor must have same head dimensions");
+    uint32_t head_dim = q_input_tensor.get_logical_shape()[-1];
+    TT_FATAL(head_dim <= 128 || std::get<ttnn::WormholeComputeKernelConfig>(this->compute_kernel_config).fp32_dest_acc_en == false, "If head_dim is > 128, fp32_dest_acc_en must be False");
+
+    // Check that head_dim is a multiple of 32
+    TT_FATAL(head_dim % TILE_WIDTH == 0, "Head dim must be a multiple of TILE_WIDTH");
+
+    TT_FATAL(q_input_tensor.memory_config().memory_layout == this->q_output_mem_config.memory_layout, "Q Input tensor and Q output tensor must have same memory layout");
+    TT_FATAL(k_input_tensor.memory_config().memory_layout == this->k_output_mem_config.memory_layout, "K Input tensor and K output tensor must have same memory layout");
+
+    // check that q and k have same batch size and lesser that equal to 32
+    uint32_t q_batch_size = q_input_tensor.get_logical_shape()[1];
+    uint32_t k_batch_size = k_input_tensor.get_logical_shape()[1];
+    TT_FATAL(q_batch_size == k_batch_size, "Q and K must have the equal batch size");
+    TT_FATAL(q_batch_size <= 32, "Q and K must have batch size less than or equal to 32, due to parallelization over core-grid of 64");
+    uint32_t q_num_cores = q_input_tensor.shard_spec()->grid.bounding_box().grid_size().x * q_input_tensor.shard_spec()->grid.bounding_box().grid_size().y;
+    uint32_t k_num_cores = k_input_tensor.shard_spec()->grid.bounding_box().grid_size().x * k_input_tensor.shard_spec()->grid.bounding_box().grid_size().y;
+    TT_FATAL(q_num_cores + k_num_cores <= 64, "Q and K must not exceed max core grid size of 64");
+
+    bool is_overlap = q_input_tensor.shard_spec()->grid.intersects(k_input_tensor.shard_spec()->grid);
+    TT_FATAL(!is_overlap, "Q and K must not overlap");
+
+    // Check that cos and sin have same dims
+    TT_FATAL(cos.get_logical_shape() == sin.get_logical_shape(), "Cos and Sin dims must match");
+    uint32_t cos_sin_batch_size = cos.get_logical_shape()[1];
+    TT_FATAL(cos_sin_batch_size == (q_batch_size + k_batch_size), "Cos and Sin are repeated for Q and K, so they must have the same batch size as the sum of Q and K batch sizes");
+
+    // Checks for transformation matrix
+    uint32_t trans_mat_num_cores = trans_mat.shard_spec()->grid.bounding_box().grid_size().x * trans_mat.shard_spec()->grid.bounding_box().grid_size().y;
+    TT_FATAL(trans_mat_num_cores >= (q_num_cores + k_num_cores), "Transformation matrix is repeated for Q and K must be sharded over core grid of Q and K");
+    TT_FATAL(trans_mat.shard_spec()->shape[0] == TILE_HEIGHT && trans_mat.shard_spec()->shape[1] == TILE_WIDTH, "Transformation matrix must be sharded to single tile of shape (32, 32)");
+
+}
+
+std::vector<ttnn::SimpleShape> RotaryEmbeddingLlamaFusedQK::compute_output_shapes(const std::vector<Tensor>& input_tensors) const {
+    const auto& q_input_tensor = input_tensors.at(0);
+    const auto& k_input_tensor = input_tensors.at(1);
+    auto q_shape = q_input_tensor.get_logical_shape();
+    auto k_shape = k_input_tensor.get_logical_shape();
+    return {q_shape, k_shape};
+}
+
+std::vector<Tensor> RotaryEmbeddingLlamaFusedQK::create_output_tensors(const std::vector<Tensor>& input_tensors) const {
+    const auto& q_input_tensor = input_tensors.at(0);
+    const auto& k_input_tensor = input_tensors.at(1);
+    auto output_shapes = this->compute_output_shapes(input_tensors);
+    return {create_device_tensor(
+        output_shapes[0], q_input_tensor.get_dtype(), q_input_tensor.get_layout(), q_input_tensor.device(), this->q_output_mem_config),
+        create_device_tensor(
+        output_shapes[1], k_input_tensor.get_dtype(), k_input_tensor.get_layout(), k_input_tensor.device(), this->k_output_mem_config)
+        };
+}
+
+operation::ProgramWithCallbacks RotaryEmbeddingLlamaFusedQK::create_program(
+    const std::vector<Tensor>& input_tensors, std::vector<Tensor>& output_tensors) const {
+    const auto& q_input_tensor = input_tensors.at(0);
+    const auto& k_input_tensor = input_tensors.at(1);
+    const auto& cos = input_tensors.at(2);
+    const auto& sin = input_tensors.at(3);
+    const auto& trans_mat = input_tensors.at(4);
+    auto& q_output_tensor = output_tensors.at(0);
+    auto& k_output_tensor = output_tensors.at(1);
+
+    return rotary_embedding_llama_fused_qk_multi_core_sharded(q_input_tensor, k_input_tensor, cos, sin, trans_mat, q_output_tensor, k_output_tensor, this->compute_kernel_config);
+
+}
+
+}  // namespace tt_metal
+
+}  // namespace tt
diff --git a/ttnn/cpp/ttnn/operations/experimental/transformer/rotary_embedding_llama_fused_qk/device/rotary_embedding_llama_fused_qk_device_operation.hpp b/ttnn/cpp/ttnn/operations/experimental/transformer/rotary_embedding_llama_fused_qk/device/rotary_embedding_llama_fused_qk_device_operation.hpp
new file mode 100644
index 00000000000..5c1deaa919b
--- /dev/null
+++ b/ttnn/cpp/ttnn/operations/experimental/transformer/rotary_embedding_llama_fused_qk/device/rotary_embedding_llama_fused_qk_device_operation.hpp
@@ -0,0 +1,32 @@
+// SPDX-FileCopyrightText: © 2023 Tenstorrent Inc.
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#pragma once
+
+#include <functional>
+
+#include "ttnn/tensor/tensor.hpp"
+#include "ttnn/run_operation.hpp"
+#include "ttnn/operations/core/compute_kernel/compute_kernel_config.hpp"
+
+namespace tt {
+
+namespace tt_metal {
+
+struct RotaryEmbeddingLlamaFusedQK {
+    const MemoryConfig q_output_mem_config;
+    const MemoryConfig k_output_mem_config;
+    const ttnn::DeviceComputeKernelConfig compute_kernel_config;
+
+    void validate(const std::vector<Tensor> &input_tensors) const;
+    std::vector<ttnn::SimpleShape> compute_output_shapes(const std::vector<Tensor> &input_tensors) const;
+    std::vector<Tensor> create_output_tensors(const std::vector<Tensor> &input_tensors) const;
+
+    operation::ProgramWithCallbacks create_program(
+        const std::vector<Tensor> &input_tensors, std::vector<Tensor> &output_tensors) const;
+};
+
+}  // namespace tt_metal
+
+}  // namespace tt
diff --git a/ttnn/cpp/ttnn/operations/experimental/transformer/rotary_embedding_llama_fused_qk/device/rotary_embedding_llama_fused_qk_program_factory.cpp b/ttnn/cpp/ttnn/operations/experimental/transformer/rotary_embedding_llama_fused_qk/device/rotary_embedding_llama_fused_qk_program_factory.cpp
new file mode 100644
index 00000000000..7e5d610176c
--- /dev/null
+++ b/ttnn/cpp/ttnn/operations/experimental/transformer/rotary_embedding_llama_fused_qk/device/rotary_embedding_llama_fused_qk_program_factory.cpp
@@ -0,0 +1,251 @@
+// SPDX-FileCopyrightText: © 2023 Tenstorrent Inc.
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#include <vector>
+#include "rotary_embedding_llama_fused_qk_program_factory.hpp"
+#include "tt_metal/common/work_split.hpp"
+
+#include "tt_metal/common/constants.hpp"
+#include "tt_metal/detail/util.hpp"
+#include "tt_metal/host_api.hpp"
+
+namespace tt {
+
+namespace tt_metal {
+
+operation::ProgramWithCallbacks rotary_embedding_llama_fused_qk_multi_core_sharded(
+    const Tensor &q_input,
+    const Tensor &k_input,
+    const Tensor &cos,
+    const Tensor &sin,
+    const Tensor &trans_mat,
+    Tensor &q_output,
+    Tensor &k_output,
+    ttnn::DeviceComputeKernelConfig compute_kernel_config
+) {
+    Program program{};
+
+    const tt::DataFormat input_cb_data_format = tt_metal::datatype_to_dataformat_converter(q_input.get_dtype());
+    const uint32_t input_single_tile_size = tt_metal::detail::TileSize(input_cb_data_format);
+
+    const tt::DataFormat cos_cb_data_format = tt_metal::datatype_to_dataformat_converter(cos.get_dtype());
+    const uint32_t cos_single_tile_size = tt_metal::detail::TileSize(cos_cb_data_format);
+
+    const tt::DataFormat sin_cb_data_format = tt_metal::datatype_to_dataformat_converter(sin.get_dtype());
+    const uint32_t sin_single_tile_size = tt_metal::detail::TileSize(sin_cb_data_format);
+
+    const tt::DataFormat trans_mat_cb_data_format = tt_metal::datatype_to_dataformat_converter(trans_mat.get_dtype());
+    const uint32_t trans_mat_single_tile_size = tt_metal::detail::TileSize(trans_mat_cb_data_format);
+
+    const tt::DataFormat output_cb_data_format = tt_metal::datatype_to_dataformat_converter(q_output.get_dtype());
+    const uint32_t output_single_tile_size = tt_metal::detail::TileSize(output_cb_data_format);
+
+
+    std::optional<ShardSpec> q_shard_spec = q_input.shard_spec();
+    std::optional<ShardSpec> k_shard_spec = k_input.shard_spec();
+    std::optional<ShardSpec> cos_sin_shard_spec = cos.shard_spec();
+
+
+    const uint32_t batch = q_input.get_padded_shape()[1];
+    const uint32_t q_n_heads_t = q_shard_spec->shape[0] / constants::TILE_HEIGHT;
+    const uint32_t k_n_heads_t = k_shard_spec->shape[0] / constants::TILE_HEIGHT;
+
+    const uint32_t head_dim_t = q_shard_spec->shape[1] / constants::TILE_WIDTH;
+
+    tt_metal::Device *device = q_input.device();
+
+    auto [math_fidelity, math_approx_mode, fp32_dest_acc_en, packer_l1_acc, dst_full_sync_en] =
+        get_compute_kernel_config_args(device->arch(), compute_kernel_config);
+
+
+    CoreRange q_cores = q_shard_spec->grid.bounding_box();
+    uint32_t q_num_cores_x = q_cores.grid_size().x;
+    uint32_t q_num_cores_y = q_cores.grid_size().y;
+
+    CoreRange k_cores = k_shard_spec->grid.bounding_box();
+    uint32_t k_num_cores_x = k_cores.grid_size().x;
+    uint32_t k_num_cores_y = k_cores.grid_size().y;
+
+    CoreRange all_cores = cos_sin_shard_spec->grid.bounding_box();
+
+    const uint32_t num_q_input_tiles = q_n_heads_t * head_dim_t;
+    const uint32_t num_q_output_tiles = num_q_input_tiles;
+
+    const uint32_t num_k_input_tiles = k_n_heads_t * head_dim_t;
+    const uint32_t num_k_output_tiles = num_k_input_tiles;
+
+
+    // Parallelization
+
+    const uint32_t batch_per_core = 1; // TODO: To make general, add support for batch_per_core > 1
+
+    const uint32_t num_sin_cos_rows_per_core = batch_per_core;
+    uint32_t num_cos_sin_tiles = head_dim_t * num_sin_cos_rows_per_core;
+
+
+    // Set up the CBs
+    auto q_src_buffer = q_input.buffer();
+    auto k_src_buffer = k_input.buffer();
+    auto cos_buffer = cos.buffer();
+    auto sin_buffer = sin.buffer();
+    auto trans_mat_buffer = trans_mat.buffer();
+    auto q_dst_buffer = q_output.buffer();
+    auto k_dst_buffer = k_output.buffer();
+
+    uint32_t q_input_cb_index = CB::c_in0;
+    tt_metal::CircularBufferConfig cb_q_input_config =
+        tt_metal::CircularBufferConfig(
+            num_q_input_tiles * input_single_tile_size, {{q_input_cb_index, input_cb_data_format}})
+            .set_page_size(q_input_cb_index, input_single_tile_size)
+            .set_globally_allocated_address(*q_src_buffer);
+    auto cb_q_input = tt_metal::CreateCircularBuffer(program, q_cores, cb_q_input_config);
+
+    uint32_t k_input_cb_index = CB::c_in1;
+    tt_metal::CircularBufferConfig cb_k_input_config =
+        tt_metal::CircularBufferConfig(
+            num_k_input_tiles * input_single_tile_size, {{k_input_cb_index, input_cb_data_format}})
+            .set_page_size(k_input_cb_index, input_single_tile_size)
+            .set_globally_allocated_address(*k_src_buffer);
+    auto cb_k_input = tt_metal::CreateCircularBuffer(program, k_cores, cb_k_input_config);
+
+    uint32_t cos_cb_index = CB::c_in2;
+    tt_metal::CircularBufferConfig cb_cos_config =
+        tt_metal::CircularBufferConfig(num_cos_sin_tiles * cos_single_tile_size, {{cos_cb_index, cos_cb_data_format}})
+            .set_page_size(cos_cb_index, cos_single_tile_size)
+            .set_globally_allocated_address(*cos_buffer);
+    auto cb_cos = tt_metal::CreateCircularBuffer(program, all_cores, cb_cos_config);
+
+    uint32_t sin_cb_index = CB::c_in3;
+    tt_metal::CircularBufferConfig cb_sin_config =
+        tt_metal::CircularBufferConfig(num_cos_sin_tiles * sin_single_tile_size, {{sin_cb_index, sin_cb_data_format}})
+            .set_page_size(sin_cb_index, sin_single_tile_size)
+            .set_globally_allocated_address(*sin_buffer);
+    auto cb_sin = tt_metal::CreateCircularBuffer(program, all_cores, cb_sin_config);
+
+    uint32_t trans_mat_cb_index = CB::c_in4;
+    // We only take one tile of trans_mat
+    uint32_t num_trans_mat_tiles = 1;
+    tt_metal::CircularBufferConfig cb_trans_mat_config =
+        tt_metal::CircularBufferConfig(num_trans_mat_tiles * trans_mat_single_tile_size, {{trans_mat_cb_index, trans_mat_cb_data_format}})
+            .set_page_size(trans_mat_cb_index, trans_mat_single_tile_size).
+            set_globally_allocated_address(*trans_mat_buffer);
+    auto cb_trans_mat = tt_metal::CreateCircularBuffer(program, all_cores, cb_trans_mat_config);
+
+    uint32_t num_interm_tiles = head_dim_t;
+    uint32_t rotated_input_interm_cb_index = CB::c_intermed0;
+    tt_metal::CircularBufferConfig cb_rotated_input_interm_config =
+        tt_metal::CircularBufferConfig(
+            num_interm_tiles * input_single_tile_size, {{rotated_input_interm_cb_index, input_cb_data_format}})
+            .set_page_size(rotated_input_interm_cb_index, input_single_tile_size);
+    auto cb_rotated_input_interm = tt_metal::CreateCircularBuffer(program, all_cores, cb_rotated_input_interm_config);
+
+    uint32_t cos_interm_cb_index = CB::c_intermed1;
+    tt_metal::CircularBufferConfig cb_cos_interm_config =
+        tt_metal::CircularBufferConfig(
+            num_interm_tiles * input_single_tile_size, {{cos_interm_cb_index, cos_cb_data_format}})
+            .set_page_size(cos_interm_cb_index, cos_single_tile_size);
+    auto cb_cos_interm = tt_metal::CreateCircularBuffer(program, all_cores, cb_cos_interm_config);
+
+    uint32_t sin_interm_cb_index = CB::c_intermed2;
+    tt_metal::CircularBufferConfig cb_sin_interm_config =
+        tt_metal::CircularBufferConfig(
+            num_interm_tiles * input_single_tile_size, {{sin_interm_cb_index, sin_cb_data_format}})
+            .set_page_size(sin_interm_cb_index, sin_single_tile_size);
+    auto cb_sin_interm = tt_metal::CreateCircularBuffer(program, all_cores, cb_sin_interm_config);
+
+    uint32_t q_output_cb_index = CB::c_out0;  // output operands start at index 16
+    tt_metal::CircularBufferConfig cb_q_output_config =
+        tt_metal::CircularBufferConfig(
+            num_q_output_tiles * output_single_tile_size, {{q_output_cb_index, output_cb_data_format}})
+            .set_page_size(q_output_cb_index, output_single_tile_size)
+            .set_globally_allocated_address(*q_dst_buffer);
+    auto cb_q_output = tt_metal::CreateCircularBuffer(program, q_cores, cb_q_output_config);
+    uint32_t k_output_cb_index = CB::c_out1;  // output operands start at index 17
+    tt_metal::CircularBufferConfig cb_k_output_config =
+        tt_metal::CircularBufferConfig(
+            num_k_output_tiles * output_single_tile_size, {{k_output_cb_index, output_cb_data_format}})
+            .set_page_size(k_output_cb_index, output_single_tile_size)
+            .set_globally_allocated_address(*k_dst_buffer);
+    auto cb_k_output = tt_metal::CreateCircularBuffer(program, k_cores, cb_k_output_config);
+
+
+    // Set up the kernel
+    std::vector<uint32_t> q_compute_kernel_args = {
+        q_input_cb_index,
+        cos_cb_index,
+        sin_cb_index,
+        trans_mat_cb_index,
+        rotated_input_interm_cb_index,
+        cos_interm_cb_index,
+        sin_interm_cb_index,
+        q_output_cb_index,
+        head_dim_t,
+        q_n_heads_t,
+        };
+
+    auto q_rotary_embedding_kernel_id = tt_metal::CreateKernel(
+        program,
+        "ttnn/cpp/ttnn/operations/experimental/transformer/rotary_embedding_llama/device/kernels/compute/rotary_embedding_llama_sharded.cpp",
+        q_cores,
+        tt_metal::ComputeConfig{.math_fidelity=math_fidelity, .fp32_dest_acc_en=fp32_dest_acc_en, .compile_args = q_compute_kernel_args});
+
+    std::vector<uint32_t> k_compute_kernel_args = {
+        k_input_cb_index,
+        cos_cb_index,
+        sin_cb_index,
+        trans_mat_cb_index,
+        rotated_input_interm_cb_index,
+        cos_interm_cb_index,
+        sin_interm_cb_index,
+        k_output_cb_index,
+        head_dim_t,
+        k_n_heads_t,
+        };
+
+    auto k_rotary_embedding_kernel_id = tt_metal::CreateKernel(
+        program,
+        "ttnn/cpp/ttnn/operations/experimental/transformer/rotary_embedding_llama/device/kernels/compute/rotary_embedding_llama_sharded.cpp",
+        k_cores,
+        tt_metal::ComputeConfig{.math_fidelity=math_fidelity, .fp32_dest_acc_en=fp32_dest_acc_en, .compile_args = k_compute_kernel_args});
+
+    auto override_runtime_arguments_callback = [
+        cb_q_input,
+        cb_k_input,
+        cb_cos,
+        cb_sin,
+        cb_trans_mat,
+        cb_q_output,
+        cb_k_output
+    ](  const void *operation,
+        Program &program,
+        const std::vector<Tensor>& input_tensors,
+        const std::vector<std::optional<const Tensor>> &,
+        const std::vector<Tensor> &output_tensors) {
+
+        auto q_src_buffer = input_tensors.at(0).buffer();
+        auto k_src_buffer = input_tensors.at(1).buffer();
+        auto cos_buffer = input_tensors.at(2).buffer();
+        auto sin_buffer = input_tensors.at(3).buffer();
+        auto trans_mat_buffer = input_tensors.at(4).buffer();
+        auto q_dst_buffer = output_tensors.at(0).buffer();
+        auto k_dst_buffer = output_tensors.at(1).buffer();
+
+        // Update the CB globally allocated addresses here
+        UpdateDynamicCircularBufferAddress(program, cb_q_input, *q_src_buffer);
+        UpdateDynamicCircularBufferAddress(program, cb_k_input, *k_src_buffer);
+        UpdateDynamicCircularBufferAddress(program, cb_cos, *cos_buffer);
+        UpdateDynamicCircularBufferAddress(program, cb_sin, *sin_buffer);
+        UpdateDynamicCircularBufferAddress(program, cb_trans_mat, *trans_mat_buffer);
+        UpdateDynamicCircularBufferAddress(program, cb_q_output, *q_dst_buffer);
+        UpdateDynamicCircularBufferAddress(program, cb_k_output, *k_dst_buffer);
+
+
+    };
+
+    return {.program = std::move(program), .override_runtime_arguments_callback = override_runtime_arguments_callback};
+}
+
+}  // namespace tt_metal
+
+}  // namespace tt
diff --git a/ttnn/cpp/ttnn/operations/experimental/transformer/rotary_embedding_llama_fused_qk/device/rotary_embedding_llama_fused_qk_program_factory.hpp b/ttnn/cpp/ttnn/operations/experimental/transformer/rotary_embedding_llama_fused_qk/device/rotary_embedding_llama_fused_qk_program_factory.hpp
new file mode 100644
index 00000000000..4894dd68718
--- /dev/null
+++ b/ttnn/cpp/ttnn/operations/experimental/transformer/rotary_embedding_llama_fused_qk/device/rotary_embedding_llama_fused_qk_program_factory.hpp
@@ -0,0 +1,20 @@
+// SPDX-FileCopyrightText: © 2023 Tenstorrent Inc.
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#pragma once
+
+#include <functional>
+
+#include "ttnn/tensor/tensor.hpp"
+#include "ttnn/run_operation.hpp"
+#include "ttnn/operations/core/compute_kernel/compute_kernel_config.hpp"
+
+namespace tt {
+namespace tt_metal {
+
+operation::ProgramWithCallbacks rotary_embedding_llama_fused_qk_multi_core_sharded(
+    const Tensor &q_input, const Tensor &k_input, const Tensor &cos, const Tensor &sin, const Tensor &trans_mat, Tensor &q_output, Tensor &k_output, ttnn::DeviceComputeKernelConfig compute_kernel_config);
+
+}  // namespace tt_metal
+}  // namespace tt
diff --git a/ttnn/cpp/ttnn/operations/experimental/transformer/rotary_embedding_llama_fused_qk/rotary_embedding_llama_fused_qk.cpp b/ttnn/cpp/ttnn/operations/experimental/transformer/rotary_embedding_llama_fused_qk/rotary_embedding_llama_fused_qk.cpp
new file mode 100644
index 00000000000..ab32489933e
--- /dev/null
+++ b/ttnn/cpp/ttnn/operations/experimental/transformer/rotary_embedding_llama_fused_qk/rotary_embedding_llama_fused_qk.cpp
@@ -0,0 +1,44 @@
+// SPDX-FileCopyrightText: © 2023 Tenstorrent Inc.
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#include "rotary_embedding_llama_fused_qk.hpp"
+
+#include "device/rotary_embedding_llama_fused_qk_device_operation.hpp"
+
+namespace ttnn::operations::experimental::transformer {
+
+std::tuple<ttnn::Tensor, ttnn::Tensor> RotaryEmbeddingLlamaFusedQKOperation::invoke(
+    const Tensor &q_input_tensor,
+    const Tensor &k_input_tensor,
+    const Tensor &cos_cache,
+    const Tensor &sin_cache,
+    const Tensor& trans_mat,
+    std::optional<const ttnn::DeviceComputeKernelConfig> compute_kernel_config) {
+
+    std::vector<Tensor> output_tensors = {Tensor(operation::get_workers_for_op_output({q_input_tensor, k_input_tensor, cos_cache, sin_cache, trans_mat})),
+    Tensor(operation::get_workers_for_op_output({q_input_tensor, k_input_tensor, cos_cache, sin_cache, trans_mat}))};
+    operation::launch_op(
+        [compute_kernel_config] (const std::vector<Tensor>& input_tensors, const std::vector<std::optional<const Tensor>>& optional_input_tensors, const std::vector<std::optional<Tensor>>& optional_output_tensors) mutable -> std::vector<Tensor> {
+            auto& q_input_tensor = input_tensors.at(0);
+            auto& k_input_tensor = input_tensors.at(1);
+
+            auto arch = q_input_tensor.storage_type() == StorageType::DEVICE ? q_input_tensor.device()->arch() : ttnn::operations::experimental::auto_format::AutoFormat::GetDefaultDevice()->arch();
+            auto kernel_config_val = init_device_compute_kernel_config(arch, compute_kernel_config, MathFidelity::HiFi4, true, false, false);
+
+            tt::tt_metal::MemoryConfig q_output_memory_config = operation::DEFAULT_OUTPUT_MEMORY_CONFIG;
+            tt::tt_metal::MemoryConfig k_output_memory_config = operation::DEFAULT_OUTPUT_MEMORY_CONFIG;
+            if(q_input_tensor.storage_type() == StorageType::DEVICE) {
+                q_output_memory_config = q_input_tensor.memory_config();
+            }
+            if(k_input_tensor.storage_type() == StorageType::DEVICE) {
+                k_output_memory_config = k_input_tensor.memory_config();
+            }
+
+            return operation::run(
+                    RotaryEmbeddingLlamaFusedQK{q_output_memory_config, k_output_memory_config, kernel_config_val}, input_tensors);
+        }, {q_input_tensor, k_input_tensor, cos_cache, sin_cache, trans_mat}, output_tensors);
+    return {output_tensors.at(0), output_tensors.at(1)};
+}
+
+} // namespace ttnn::operations::experimental::transformer
diff --git a/ttnn/cpp/ttnn/operations/experimental/transformer/rotary_embedding_llama_fused_qk/rotary_embedding_llama_fused_qk.hpp b/ttnn/cpp/ttnn/operations/experimental/transformer/rotary_embedding_llama_fused_qk/rotary_embedding_llama_fused_qk.hpp
new file mode 100644
index 00000000000..633dc678442
--- /dev/null
+++ b/ttnn/cpp/ttnn/operations/experimental/transformer/rotary_embedding_llama_fused_qk/rotary_embedding_llama_fused_qk.hpp
@@ -0,0 +1,34 @@
+// SPDX-FileCopyrightText: © 2023 Tenstorrent Inc.
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#pragma once
+
+#include "ttnn/decorators.hpp"
+
+#include "ttnn/operations/core/compute_kernel/compute_kernel_config.hpp"
+
+namespace ttnn {
+namespace operations::experimental::transformer {
+
+ struct RotaryEmbeddingLlamaFusedQKOperation {
+    static std::tuple<ttnn::Tensor, ttnn::Tensor> invoke(
+        const Tensor& q_input_tensor,
+        const Tensor& k_input_tensor,
+        const Tensor& cos_cache,
+        const Tensor& sin_cache,
+        const Tensor& trans_mat,
+        const std::optional<const ttnn::DeviceComputeKernelConfig> compute_kernel_config = std::nullopt);
+ };
+
+}  // namespace operations::::experimental::transformer
+
+namespace experimental {
+
+constexpr auto rotary_embedding_llama_fused_qk = ttnn::register_operation_with_auto_launch_op<
+    "ttnn::experimental::rotary_embedding_llama_fused_qk",
+    ttnn::operations::experimental::transformer::RotaryEmbeddingLlamaFusedQKOperation>();
+
+}  // namespace experimental
+
+}  // namespace ttnn
diff --git a/ttnn/cpp/ttnn/operations/experimental/transformer/rotary_embedding_llama_fused_qk/rotary_embedding_llama_fused_qk_pybind.cpp b/ttnn/cpp/ttnn/operations/experimental/transformer/rotary_embedding_llama_fused_qk/rotary_embedding_llama_fused_qk_pybind.cpp
new file mode 100644
index 00000000000..0086113fd2f
--- /dev/null
+++ b/ttnn/cpp/ttnn/operations/experimental/transformer/rotary_embedding_llama_fused_qk/rotary_embedding_llama_fused_qk_pybind.cpp
@@ -0,0 +1,50 @@
+// SPDX-FileCopyrightText: © 2023 Tenstorrent Inc.
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#include "rotary_embedding_llama_fused_qk_pybind.hpp"
+
+#include <pybind11/pybind11.h>
+#include <pybind11/stl.h>
+
+#include "ttnn/cpp/pybind11/decorators.hpp"
+
+#include "rotary_embedding_llama_fused_qk.hpp"
+
+namespace ttnn::operations::experimental::transformer {
+
+void py_bind_rotary_embedding_llama_fused_qk(pybind11::module& module) {
+    namespace py = pybind11;
+
+    ttnn::bind_registered_operation(
+        module,
+        ttnn::experimental::rotary_embedding_llama_fused_qk,
+        R"doc(
+
+            Applies rotary embeddings to both `q_input_tensor` and `k_input_tensor` in parallel using precomputed sine and cosine values. This function is optimized for parallel execution, and both input tensors should share the same batch size and head dimensions.
+
+            Args:
+                q_input_tensor (ttnn.Tensor): The Q input tensor, with shape [1, batch, num_heads, head_dim].
+                k_input_tensor (ttnn.Tensor): The K input tensor, with shape [1, batch, num_kv_heads, head_dim].
+                cos_cache (ttnn.Tensor): Precomputed cosine values, with shape [1, 2 * batch, 32, head_dim].
+                sin_cache (ttnn.Tensor): Precomputed sine values, with shape [1, 2 * batch, 32, head_dim].
+                trans_mat (ttnn.Tensor): Transformation matrix tensor, with shape [1, 2 * batch, 32, 32].
+
+            Keyword args:
+                compute_kernel_config (DeviceComputeKernelConfig, optional): Optional configuration for the device compute kernel. Defaults to None.
+
+            Returns:
+                ttnn.Tensor, ttnn.Tensor: q and k output tensors with rotary embeddings applied.
+
+        )doc",
+        ttnn::pybind_arguments_t {
+            py::arg("q_input_tensor"),
+            py::arg("k_input_tensor"),
+            py::arg("cos_cache"),
+            py::arg("sin_cache"),
+            py::arg("trans_mat"),
+            py::kw_only(),
+            py::arg("compute_kernel_config") = std::nullopt});
+}
+
+}  // namespace ttnn::operations::experimental::transformer
diff --git a/ttnn/cpp/ttnn/operations/experimental/transformer/rotary_embedding_llama_fused_qk/rotary_embedding_llama_fused_qk_pybind.hpp b/ttnn/cpp/ttnn/operations/experimental/transformer/rotary_embedding_llama_fused_qk/rotary_embedding_llama_fused_qk_pybind.hpp
new file mode 100644
index 00000000000..71b111ad94d
--- /dev/null
+++ b/ttnn/cpp/ttnn/operations/experimental/transformer/rotary_embedding_llama_fused_qk/rotary_embedding_llama_fused_qk_pybind.hpp
@@ -0,0 +1,13 @@
+// SPDX-FileCopyrightText: © 2023 Tenstorrent Inc.
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#pragma once
+
+#include "pybind11/pybind_fwd.hpp"
+
+namespace ttnn::operations::experimental::transformer {
+
+void py_bind_rotary_embedding_llama_fused_qk(pybind11::module& module);
+
+}  // namespace ttnn::operations::experimental::transformer

From d1d3944a552522d441433794acda316165792be7 Mon Sep 17 00:00:00 2001
From: Andrew Fuller <afuller@tenstorrent.com>
Date: Tue, 12 Nov 2024 14:41:56 -0500
Subject: [PATCH 17/69] [skip ci] #14001: Add an ALIAS target for consuming
 TTNN (#14965)

### Ticket
#14371

### Problem description
tt-train is joining the mono repo party; it should have a proper
namespace'd target to consume

### What's changed
New ALIAS target for TTNN with a namespace.
---
 ttnn/CMakeLists.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/ttnn/CMakeLists.txt b/ttnn/CMakeLists.txt
index be490c58913..4d286fc692c 100644
--- a/ttnn/CMakeLists.txt
+++ b/ttnn/CMakeLists.txt
@@ -654,6 +654,7 @@ if(WITH_PYTHON_BINDINGS)
 endif()
 
 add_library(ttnn SHARED ${TTNN_FINAL_SRC})
+add_library(Metalium::TTNN ALIAS ttnn)
 target_compile_options(
     ttnn
     PUBLIC

From 4624f4ea0781e9240b1565235edf6b286d548630 Mon Sep 17 00:00:00 2001
From: mtairum <mtairum@tenstorrent.com>
Date: Tue, 12 Nov 2024 19:46:01 +0000
Subject: [PATCH 18/69] #0: Disable llama test_model from all-post-commit CI
 pipeline. See issue #14474

---
 tests/scripts/run_python_model_tests.sh | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/tests/scripts/run_python_model_tests.sh b/tests/scripts/run_python_model_tests.sh
index e3e6674a633..09aca8be769 100755
--- a/tests/scripts/run_python_model_tests.sh
+++ b/tests/scripts/run_python_model_tests.sh
@@ -52,9 +52,10 @@ run_python_model_tests_wormhole_b0() {
     # Llama3.2-11B  (#Skip: Weights too big for single-chip ci VM)
     llama11b=/mnt/MLPerf/tt_dnn-models/llama/Llama3.2-11B-Vision-Instruct/
 
+    # FIXME Issue #14474
     # Run all Llama3 tests for 8B, 1B, and 3B weights - dummy weights with tight PCC check
-    for llama_dir in "$llama8b" "$llama1b" "$llama3b"; do
-        LLAMA_DIR=$llama_dir WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest -n auto models/demos/llama3/tests/test_llama_model.py -k "quick" ; fail+=$?
-        echo "LOG_METAL: Llama3 tests for $llama_dir completed"
-    done
+    # for llama_dir in "$llama8b" "$llama1b" "$llama3b"; do
+    #     LLAMA_DIR=$llama_dir WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest -n auto models/demos/llama3/tests/test_llama_model.py -k "quick" ; fail+=$?
+    #     echo "LOG_METAL: Llama3 tests for $llama_dir completed"
+    # done
 }

From a080e2f035990d57ce5436a8affb3f052a5a1b5f Mon Sep 17 00:00:00 2001
From: Atul Krishnadas <atul.krishnadas@outlook.com>
Date: Tue, 12 Nov 2024 12:08:11 -0800
Subject: [PATCH 19/69] float32 tilize support (#14963)

### Ticket
[#14885](https://github.com/tenstorrent/tt-metal/issues/14885)
[#14570](https://github.com/tenstorrent/tt-metal/issues/14570)

### Problem description
 - Need to add support for fp32 tilize/untilize

### What's changed
 - FP32 was already supported with prior changes it seems
- I just allowed FP32 to be used for tilize/reshape, and tests seem to
check out!

### Checklist
- [ ] Post commit CI passes:
https://github.com/tenstorrent/tt-metal/actions/runs/11803430759
---
 tests/ttnn/unit_tests/test_reshape.py         | 36 ++++++++++++++++++-
 .../reshape_on_device/device/reshape_op.cpp   |  2 +-
 .../device/tilize_with_val_padding_op.cpp     |  2 +-
 3 files changed, 37 insertions(+), 3 deletions(-)

diff --git a/tests/ttnn/unit_tests/test_reshape.py b/tests/ttnn/unit_tests/test_reshape.py
index 89ef751fcaa..f3ae5e8112f 100644
--- a/tests/ttnn/unit_tests/test_reshape.py
+++ b/tests/ttnn/unit_tests/test_reshape.py
@@ -343,10 +343,44 @@ def test_reshape_int(input_shape, output_shape, device):
     torch_result = torch_input_tensor.reshape(output_shape)
 
     input_tensor = ttnn.from_torch(
-        torch_input_tensor, layout=ttnn.TILE_LAYOUT, device=device, memory_config=ttnn.DRAM_MEMORY_CONFIG
+        torch_input_tensor,
+        layout=ttnn.TILE_LAYOUT,
+        device=device,
+        memory_config=ttnn.DRAM_MEMORY_CONFIG,
     )
     ttnn_output = ttnn.reshape(input_tensor, output_shape)
 
     output = ttnn.to_torch(ttnn_output)
 
     assert_with_pcc(torch_result, output, 0.9999)
+
+
+@pytest.mark.parametrize(
+    "input_shape, output_shape",
+    [
+        ((1, 1, 756, 128), (1, 27, 28, 128)),
+        ((1, 256, 16), (16, 256)),
+        ((1, 256, 1024), (1, 256, 16, 64)),
+        ((16, 16), (32, 8)),
+        ((1, 1445, 192), (1445, 192)),
+        ((1, 256), (1, 1, 256)),
+        ((16, 1, 32), (16, 1, 32)),
+    ],
+)
+def test_fp32_support(input_shape, output_shape, device):
+    torch_input_tensor = torch.randint(0, 100, input_shape)
+    torch_result = torch_input_tensor.reshape(output_shape)
+
+    input_tensor = ttnn.from_torch(
+        torch_input_tensor,
+        dtype=ttnn.float32,
+        layout=ttnn.TILE_LAYOUT,
+        device=device,
+        memory_config=ttnn.DRAM_MEMORY_CONFIG,
+    )
+
+    ttnn_output = ttnn.reshape(input_tensor, output_shape)
+
+    output = ttnn.to_torch(ttnn_output)
+
+    assert_with_pcc(torch_result, output, 0.9999)
diff --git a/ttnn/cpp/ttnn/operations/data_movement/reshape_on_device/device/reshape_op.cpp b/ttnn/cpp/ttnn/operations/data_movement/reshape_on_device/device/reshape_op.cpp
index 3aff0667de1..20c7045b827 100644
--- a/ttnn/cpp/ttnn/operations/data_movement/reshape_on_device/device/reshape_op.cpp
+++ b/ttnn/cpp/ttnn/operations/data_movement/reshape_on_device/device/reshape_op.cpp
@@ -17,7 +17,7 @@ void ReshapeDeviceOperation::validate(const std::vector<Tensor> &input_tensors)
     const auto& input_tensor_a = input_tensors.at(0);
     TT_FATAL(input_tensor_a.storage_type() == StorageType::DEVICE, "Operands to reshape need to be on device!");
     TT_FATAL(input_tensor_a.buffer() != nullptr , "Operands to reshape need to be allocated in buffers on device!");
-    TT_FATAL(input_tensor_a.get_dtype() == DataType::BFLOAT16, "Error");
+    TT_FATAL(input_tensor_a.get_dtype() == DataType::BFLOAT16 or input_tensor_a.get_dtype() == DataType::FLOAT32, "Error");
 
     TT_FATAL(input_tensor_a.get_layout() == Layout::TILE || input_tensor_a.get_layout() == Layout::ROW_MAJOR, "Only tile and row major reshape supported!");
 
diff --git a/ttnn/cpp/ttnn/operations/data_movement/tilize_with_val_padding/device/tilize_with_val_padding_op.cpp b/ttnn/cpp/ttnn/operations/data_movement/tilize_with_val_padding/device/tilize_with_val_padding_op.cpp
index f60e7e2bbb7..646af783bab 100644
--- a/ttnn/cpp/ttnn/operations/data_movement/tilize_with_val_padding/device/tilize_with_val_padding_op.cpp
+++ b/ttnn/cpp/ttnn/operations/data_movement/tilize_with_val_padding/device/tilize_with_val_padding_op.cpp
@@ -16,7 +16,7 @@ void TilizeWithValPadding::validate(const std::vector<Tensor>& input_tensors) co
     TT_FATAL(input_tensor_a.storage_type() == StorageType::DEVICE, "Operands need to be on device!");
     TT_FATAL(input_tensor_a.buffer() != nullptr, "Operands need to be allocated in buffers on device!");
     TT_FATAL(input_tensor_a.get_layout() == Layout::ROW_MAJOR, "Can only tilize row major data");
-    TT_FATAL(input_tensor_a.get_dtype() == DataType::BFLOAT16 or input_tensor_a.get_dtype() == DataType::UINT32, "Can only tilize bfloat16 or uint32 tensors");
+    TT_FATAL(input_tensor_a.get_dtype() == DataType::BFLOAT16 or input_tensor_a.get_dtype() == DataType::UINT32 or input_tensor_a.get_dtype() == DataType::FLOAT32, "Can only tilize bfloat16/float32 or uint32 tensors");
     TT_FATAL(input_shape.rank() >= 2, "Input tensor must be of rank >2, but its shape is {}", input_shape);
 
 
From 4f4542d67252883bbfe7149b7e11091adbf21f9f Mon Sep 17 00:00:00 2001
From: Bryan Wilder Field Lozano <blozano@tenstorrent.com>
Date: Tue, 12 Nov 2024 12:22:51 -0800
Subject: [PATCH 20/69] Move NUM_CIRCULAR_BUFFERS to hw/inc (#14908)

### What's changed
- `NUM_CIRCULAR_BUFFERS` moved to `circular_buffer.h`
- FW Includes no longer include `common_runtime_address_map.h`
---
 .../tt_metal/perf_microbenchmark/dispatch/common.h    |  2 ++
 .../perf_microbenchmark/dispatch/test_prefetcher.cpp  |  1 +
 tt_metal/hostdevcommon/common_runtime_address_map.h   |  3 ---
 tt_metal/hw/inc/blackhole/eth_l1_address_map.h        |  2 +-
 tt_metal/hw/inc/blackhole/noc_nonblocking_api.h       |  4 ++++
 tt_metal/hw/inc/circular_buffer.h                     | 11 ++++++++++-
 tt_metal/hw/inc/dataflow_api.h                        |  1 -
 tt_metal/hw/inc/debug/sanitize_noc.h                  |  1 +
 tt_metal/hw/inc/grayskull/noc_nonblocking_api.h       |  4 ++++
 tt_metal/hw/inc/risc_attribs.h                        |  1 -
 tt_metal/hw/inc/risc_common.h                         |  1 -
 tt_metal/hw/inc/wormhole/noc_nonblocking_api.h        |  4 ++++
 tt_metal/impl/buffers/circular_buffer_types.hpp       |  3 ++-
 tt_metal/impl/device/device.cpp                       |  2 ++
 tt_metal/impl/program/program.cpp                     |  2 ++
 tt_metal/jit_build/data_format.cpp                    |  2 +-
 tt_metal/jit_build/genfiles.cpp                       |  4 +++-
 17 files changed, 37 insertions(+), 11 deletions(-)

diff --git a/tests/tt_metal/tt_metal/perf_microbenchmark/dispatch/common.h b/tests/tt_metal/tt_metal/perf_microbenchmark/dispatch/common.h
index fada32bb47c..3140eec26a9 100644
--- a/tests/tt_metal/tt_metal/perf_microbenchmark/dispatch/common.h
+++ b/tests/tt_metal/tt_metal/perf_microbenchmark/dispatch/common.h
@@ -13,6 +13,8 @@
 #include "tt_metal/impl/dispatch/cq_commands.hpp"
 #include "noc/noc_parameters.h"
 
+#include "tt_metal/hostdevcommon/common_runtime_address_map.h" // NOC_0_X
+
 extern bool debug_g;
 extern bool use_coherent_data_g;
 extern uint32_t dispatch_buffer_page_size_g;
diff --git a/tests/tt_metal/tt_metal/perf_microbenchmark/dispatch/test_prefetcher.cpp b/tests/tt_metal/tt_metal/perf_microbenchmark/dispatch/test_prefetcher.cpp
index eb2fbae0807..a272a402425 100644
--- a/tests/tt_metal/tt_metal/perf_microbenchmark/dispatch/test_prefetcher.cpp
+++ b/tests/tt_metal/tt_metal/perf_microbenchmark/dispatch/test_prefetcher.cpp
@@ -16,6 +16,7 @@
 #include "tt_metal/impl/dispatch/kernels/packet_queue_ctrl.hpp"
 #include "tests/tt_metal/tt_metal/perf_microbenchmark/routing/kernels/traffic_gen_test.hpp"
 
+#include "tt_metal/hostdevcommon/common_runtime_address_map.h" // NOC_0_X
 
 #include "llrt/hal.hpp"
 
diff --git a/tt_metal/hostdevcommon/common_runtime_address_map.h b/tt_metal/hostdevcommon/common_runtime_address_map.h
index 3b1d25268bc..4d49751e22b 100644
--- a/tt_metal/hostdevcommon/common_runtime_address_map.h
+++ b/tt_metal/hostdevcommon/common_runtime_address_map.h
@@ -17,9 +17,6 @@
 constexpr static std::uint32_t L1_KERNEL_CONFIG_BASE = MEM_MAP_END;
 constexpr static std::uint32_t L1_KERNEL_CONFIG_SIZE = 69 * 1024;
 
-constexpr static std::uint32_t NUM_CIRCULAR_BUFFERS = 32;
-constexpr static std::uint32_t UINT32_WORDS_PER_CIRCULAR_BUFFER_CONFIG = 4;
-
 // Helper functions to convert NoC coordinates to NoC-0 coordinates, used in metal as "physical" coordinates.
 #define NOC_0_X(noc_index, noc_size_x, x) (noc_index == 0 ? (x) : (noc_size_x-1-(x)))
 #define NOC_0_Y(noc_index, noc_size_y, y) (noc_index == 0 ? (y) : (noc_size_y-1-(y)))
diff --git a/tt_metal/hw/inc/blackhole/eth_l1_address_map.h b/tt_metal/hw/inc/blackhole/eth_l1_address_map.h
index 297846fd4bb..ef658b163f1 100644
--- a/tt_metal/hw/inc/blackhole/eth_l1_address_map.h
+++ b/tt_metal/hw/inc/blackhole/eth_l1_address_map.h
@@ -6,7 +6,7 @@
 
 #include <cstdint>
 
-#include "tt_metal/hostdevcommon/common_runtime_address_map.h"
+#include "noc/noc_parameters.h" // L1_ALIGNMENT
 
 namespace eth_l1_mem {
 
diff --git a/tt_metal/hw/inc/blackhole/noc_nonblocking_api.h b/tt_metal/hw/inc/blackhole/noc_nonblocking_api.h
index 3501cea7279..6fd84212e02 100644
--- a/tt_metal/hw/inc/blackhole/noc_nonblocking_api.h
+++ b/tt_metal/hw/inc/blackhole/noc_nonblocking_api.h
@@ -9,6 +9,10 @@
 #include "noc_parameters.h"
 #include "dev_msgs.h"
 
+// Helper functions to convert NoC coordinates to NoC-0 coordinates, used in metal as "physical" coordinates.
+#define NOC_0_X(noc_index, noc_size_x, x) (noc_index == 0 ? (x) : (noc_size_x-1-(x)))
+#define NOC_0_Y(noc_index, noc_size_y, y) (noc_index == 0 ? (y) : (noc_size_y-1-(y)))
+
 ////
 /*TODO: RT review this file, currently using wormhole b0 copy, check if any changes needed for BH*/
 constexpr uint32_t DYNAMIC_NOC_NCRISC_WR_CMD_BUF = 2; // all writes share cmd buf
diff --git a/tt_metal/hw/inc/circular_buffer.h b/tt_metal/hw/inc/circular_buffer.h
index c4e5a8a9eef..54823427734 100644
--- a/tt_metal/hw/inc/circular_buffer.h
+++ b/tt_metal/hw/inc/circular_buffer.h
@@ -4,8 +4,17 @@
 
 #pragma once
 
-#include "hostdevcommon/common_runtime_address_map.h"
+#include <cstdint>
+
+#if defined(KERNEL_BUILD) || defined(FW_BUILD)
 #include "risc_attribs.h"
+#else
+#define tt_l1_ptr
+#define tt_reg_ptr
+#endif
+
+constexpr static std::uint32_t NUM_CIRCULAR_BUFFERS = 32;
+constexpr static std::uint32_t UINT32_WORDS_PER_CIRCULAR_BUFFER_CONFIG = 4;
 
 // The command queue read interface controls reads from the issue region, host owns the issue region write interface
 // Commands and data to send to device are pushed into the issue region
diff --git a/tt_metal/hw/inc/dataflow_api.h b/tt_metal/hw/inc/dataflow_api.h
index c91e4398772..e902f27be24 100644
--- a/tt_metal/hw/inc/dataflow_api.h
+++ b/tt_metal/hw/inc/dataflow_api.h
@@ -21,7 +21,6 @@
 #include "debug/sanitize_noc.h"
 #include "debug/waypoint.h"
 #include "eth_l1_address_map.h"
-#include "hostdevcommon/common_runtime_address_map.h"
 #include "hostdevcommon/common_values.hpp"
 #include "risc_attribs.h"
 #include "third_party/umd/device/tt_silicon_driver_common.hpp"
diff --git a/tt_metal/hw/inc/debug/sanitize_noc.h b/tt_metal/hw/inc/debug/sanitize_noc.h
index 35f005b9801..75ad7e0b577 100644
--- a/tt_metal/hw/inc/debug/sanitize_noc.h
+++ b/tt_metal/hw/inc/debug/sanitize_noc.h
@@ -28,6 +28,7 @@
 #include "dev_msgs.h"
 #include "noc_overlay_parameters.h"
 #include "noc_parameters.h"
+#include "noc_nonblocking_api.h"
 
 // A couple defines for specifying read/write and multi/unicast
 #define DEBUG_SANITIZE_NOC_READ true
diff --git a/tt_metal/hw/inc/grayskull/noc_nonblocking_api.h b/tt_metal/hw/inc/grayskull/noc_nonblocking_api.h
index 8fe58acf1a6..2fc64b5351c 100644
--- a/tt_metal/hw/inc/grayskull/noc_nonblocking_api.h
+++ b/tt_metal/hw/inc/grayskull/noc_nonblocking_api.h
@@ -9,6 +9,10 @@
 #include "noc_parameters.h"
 #include "dev_msgs.h"
 
+// Helper functions to convert NoC coordinates to NoC-0 coordinates, used in metal as "physical" coordinates.
+#define NOC_0_X(noc_index, noc_size_x, x) (noc_index == 0 ? (x) : (noc_size_x-1-(x)))
+#define NOC_0_Y(noc_index, noc_size_y, y) (noc_index == 0 ? (y) : (noc_size_y-1-(y)))
+
 ////
 
 constexpr uint32_t DYNAMIC_NOC_NCRISC_WR_CMD_BUF = 2; // all writes share cmd buf
diff --git a/tt_metal/hw/inc/risc_attribs.h b/tt_metal/hw/inc/risc_attribs.h
index d7c910b3f39..647a60df99a 100644
--- a/tt_metal/hw/inc/risc_attribs.h
+++ b/tt_metal/hw/inc/risc_attribs.h
@@ -18,7 +18,6 @@ union tt_uint64_t {
 #define tt_l1_ptr __attribute__((rvtt_l1_ptr))
 #define tt_reg_ptr __attribute__((rvtt_reg_ptr))
 
-
 inline __attribute__((always_inline)) uint64_t tt_l1_load(tt_uint64_t tt_l1_ptr *p)
 {
     tt_uint64_t v;
diff --git a/tt_metal/hw/inc/risc_common.h b/tt_metal/hw/inc/risc_common.h
index 66e859260cc..1794c60bd27 100644
--- a/tt_metal/hw/inc/risc_common.h
+++ b/tt_metal/hw/inc/risc_common.h
@@ -10,7 +10,6 @@
 #include <cstdint>
 
 #include "eth_l1_address_map.h"
-#include "hostdevcommon/common_runtime_address_map.h"
 #include "limits.h"
 #include "mod_div_lib.h"
 #include "noc_overlay_parameters.h"
diff --git a/tt_metal/hw/inc/wormhole/noc_nonblocking_api.h b/tt_metal/hw/inc/wormhole/noc_nonblocking_api.h
index 48b6411911d..5eecc54540d 100644
--- a/tt_metal/hw/inc/wormhole/noc_nonblocking_api.h
+++ b/tt_metal/hw/inc/wormhole/noc_nonblocking_api.h
@@ -9,6 +9,10 @@
 #include "noc_parameters.h"
 #include "dev_msgs.h"
 
+// Helper functions to convert NoC coordinates to NoC-0 coordinates, used in metal as "physical" coordinates.
+#define NOC_0_X(noc_index, noc_size_x, x) (noc_index == 0 ? (x) : (noc_size_x-1-(x)))
+#define NOC_0_Y(noc_index, noc_size_y, y) (noc_index == 0 ? (y) : (noc_size_y-1-(y)))
+
 ////
 
 // Use VC 1 for unicast writes, and VC 4 for mcast writes
diff --git a/tt_metal/impl/buffers/circular_buffer_types.hpp b/tt_metal/impl/buffers/circular_buffer_types.hpp
index a0349316c05..512876f7091 100644
--- a/tt_metal/impl/buffers/circular_buffer_types.hpp
+++ b/tt_metal/impl/buffers/circular_buffer_types.hpp
@@ -12,10 +12,11 @@
 
 #include "tt_metal/common/logger.hpp"
 #include "tt_metal/common/tt_backend_api_types.hpp"
-#include "tt_metal/hostdevcommon/common_runtime_address_map.h" // NUM_CIRCULAR_BUFFERS
 #include "tt_metal/impl/buffers/buffer.hpp"
 #include "tt_metal/impl/tile/tile.hpp"
 
+#include "tt_metal/hw/inc/circular_buffer.h"
+
 namespace tt::tt_metal {
 inline namespace v0 {
 
diff --git a/tt_metal/impl/device/device.cpp b/tt_metal/impl/device/device.cpp
index 95aaeda9372..fba7276a1c0 100644
--- a/tt_metal/impl/device/device.cpp
+++ b/tt_metal/impl/device/device.cpp
@@ -24,6 +24,8 @@
 #include "tt_metal/tools/profiler/tt_metal_tracy.hpp"
 #include "llrt/hal.hpp"
 
+#include "tt_metal/hostdevcommon/common_runtime_address_map.h" // NOC_0_X
+
 namespace tt {
 
 namespace tt_metal {
diff --git a/tt_metal/impl/program/program.cpp b/tt_metal/impl/program/program.cpp
index 26dff0abc73..9dd1b98d811 100644
--- a/tt_metal/impl/program/program.cpp
+++ b/tt_metal/impl/program/program.cpp
@@ -29,6 +29,8 @@
 #include "tt_metal/program.hpp"
 #include "tt_metal/third_party/tracy/public/tracy/Tracy.hpp"
 
+#include "tt_metal/hostdevcommon/common_runtime_address_map.h" // L1_KERNEL_CONFIG_SIZE
+
 namespace tt::tt_metal {
 
 namespace {
diff --git a/tt_metal/jit_build/data_format.cpp b/tt_metal/jit_build/data_format.cpp
index 6e7d67276ad..367d727073d 100644
--- a/tt_metal/jit_build/data_format.cpp
+++ b/tt_metal/jit_build/data_format.cpp
@@ -13,7 +13,7 @@
 #include "fmt/base.h"                                  // for format_string
 #include "tt_metal/common/assert.hpp"                  // for tt_throw, TT_FATAL
 #include "tt_metal/common/base_types.hpp"              // for UnpackToDestMode
-#include "hostdevcommon/common_runtime_address_map.h"  // for NUM_CIRCULAR_B...
+#include "tt_metal/hw/inc/circular_buffer.h"
 
 namespace tt {
 
diff --git a/tt_metal/jit_build/genfiles.cpp b/tt_metal/jit_build/genfiles.cpp
index df6c4fa5b55..de1b29d8257 100644
--- a/tt_metal/jit_build/genfiles.cpp
+++ b/tt_metal/jit_build/genfiles.cpp
@@ -11,11 +11,13 @@
 
 #include "common/tt_backend_api_types.hpp"
 #include "common/utils.hpp"
-#include "hostdevcommon/common_runtime_address_map.h"
+#include "hostdevcommon/common_runtime_address_map.h" // NOC_0_X
 #include "hostdevcommon/common_values.hpp"
 #include "jit_build/build.hpp"
 #include "jit_build/settings.hpp"
 
+#include "tt_metal/hw/inc/circular_buffer.h"
+
 namespace fs = std::filesystem;
 
 using namespace std;

From ae51f42df146072f837f9937dda26a61d55b6fae Mon Sep 17 00:00:00 2001
From: Michael Chiou <mchiou@tenstorrent.com>
Date: Tue, 12 Nov 2024 10:15:00 -0800
Subject: [PATCH 21/69] #14961: skip test_with_ops on profiler_sync test

---
 tests/ttnn/tracy/test_profiler_sync.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/ttnn/tracy/test_profiler_sync.py b/tests/ttnn/tracy/test_profiler_sync.py
index 4e1b629775d..dbeba8d0b9e 100644
--- a/tests/ttnn/tracy/test_profiler_sync.py
+++ b/tests/ttnn/tracy/test_profiler_sync.py
@@ -40,6 +40,7 @@ def test_with_ops(device):
     output = ttnn.matmul(a, b, memory_config=ttnn.L1_MEMORY_CONFIG, core_grid=ttnn.CoreGrid(y=8, x=8))
 
 
+@pytest.mark.skip("#14961 - Ring Buffer issue")
 @pytest.mark.parametrize("num_devices", [(8)])
 def test_all_devices(
     all_devices,

From 16123a16d93e9b8014688ad0024bc82540bd2fbb Mon Sep 17 00:00:00 2001
From: Michael Chiou <mchiou@tenstorrent.com>
Date: Tue, 12 Nov 2024 11:35:45 -0800
Subject: [PATCH 22/69] #14961: test with skip for grayskull

---
 tests/tt_metal/tools/profiler/test_device_profiler.py | 1 +
 tests/ttnn/tracy/test_profiler_sync.py                | 1 -
 2 files changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/tt_metal/tools/profiler/test_device_profiler.py b/tests/tt_metal/tools/profiler/test_device_profiler.py
index 4132d4d90bc..af182dfc0ad 100644
--- a/tests/tt_metal/tools/profiler/test_device_profiler.py
+++ b/tests/tt_metal/tools/profiler/test_device_profiler.py
@@ -200,6 +200,7 @@ def test_dispatch_cores():
     os.environ["TT_METAL_DEVICE_PROFILER_DISPATCH"] = "0"
 
 
+@skip_for_grayskull()
 def test_profiler_host_device_sync():
     TOLERANCE = 0.1
 
diff --git a/tests/ttnn/tracy/test_profiler_sync.py b/tests/ttnn/tracy/test_profiler_sync.py
index dbeba8d0b9e..4e1b629775d 100644
--- a/tests/ttnn/tracy/test_profiler_sync.py
+++ b/tests/ttnn/tracy/test_profiler_sync.py
@@ -40,7 +40,6 @@ def test_with_ops(device):
     output = ttnn.matmul(a, b, memory_config=ttnn.L1_MEMORY_CONFIG, core_grid=ttnn.CoreGrid(y=8, x=8))
 
 
-@pytest.mark.skip("#14961 - Ring Buffer issue")
 @pytest.mark.parametrize("num_devices", [(8)])
 def test_all_devices(
     all_devices,

From a8ceec98842371bced1527d007bcad428fbe2c74 Mon Sep 17 00:00:00 2001
From: Joseph Chu <122298491+cfjchu@users.noreply.github.com>
Date: Tue, 12 Nov 2024 23:36:04 -0800
Subject: [PATCH 23/69] #14990: Address feedback in Programming Mesh of Devices
 Tech Report (#14991)

### Ticket
[Link to Github
Issue](https://github.com/tenstorrent/tt-metal/issues/14990)

### Problem description
Address feedback on typos and suggestions.

### What's changed
Some minor fixes to typos and add more description to the
line-all-gather operation.


### Checklist
- [ ] Post commit CI passes
- [ ] Blackhole Post commit (if applicable)
- [ ] Model regression CI testing passes (if applicable)
- [ ] Device performance regression CI testing passes (if applicable)
- [ ] New/Existing tests provide coverage for changes
---
 .../Programming Mesh of Devices with TT-NN.md | 29 +++++++++++++------
 1 file changed, 20 insertions(+), 9 deletions(-)

diff --git a/tech_reports/Programming Mesh of Devices/Programming Mesh of Devices with TT-NN.md b/tech_reports/Programming Mesh of Devices/Programming Mesh of Devices with TT-NN.md
index 7fc0fbd5eb7..6f876565566 100644
--- a/tech_reports/Programming Mesh of Devices/Programming Mesh of Devices with TT-NN.md	
+++ b/tech_reports/Programming Mesh of Devices/Programming Mesh of Devices with TT-NN.md	
@@ -185,8 +185,8 @@ ttnn.Tensor([[[[ 2.00000,  2.00000,  ...,  2.00000,  2.00000],
 
 We now see that the following:
 
-- 32x32 chunk with elements of 1.0 is residing in Device 11 DRAM
-- 32x32 chunk with elements of 2.0 is residing in Device 10 DRAM
+- 32x32 chunk with elements of 1.0 is residing in Device 0 DRAM
+- 32x32 chunk with elements of 2.0 is residing in Device 1 DRAM
 
 We can also visualize this tensor distributed across our MeshDevice. The visualization will color devices that have shards resident to the device.
 
@@ -196,7 +196,7 @@ ttnn.visualize_mesh_device(mesh_device, tensor=mesh_tensor)
 >
                   DeviceMesh(rows=1, cols=2):
 ┌──────────────────────────────┬──────────────────────────────┐
-│         Dev. ID: 11          │         Dev. ID: 10          │
+│         Dev. ID: 0           │         Dev. ID: 1           │
 │            (0, 0)            │            (0, 1)            │
 │  ttnn.Shape([1, 1, 32, 32])  │  ttnn.Shape([1, 1, 32, 32])  │
 └──────────────────────────────┴──────────────────────────────┘
@@ -299,11 +299,11 @@ import ttnn
 mesh_device = ttnn.open_mesh_device(ttnn.MeshShape(2, 4), mesh_type=ttnn.MeshType.Ring)
 
 # Construct test tensor of data; 8 chunks of 32x32
-torch_tensor = torch.rand((1,1,32,128), dtype=torch.bfloat16)
+torch_tensor = torch.rand((1,1,32,256), dtype=torch.bfloat16)
 
 # Convert to ttnn.Tensor, tilize and move onto devices across mesh DRAM
 mesh_tensor = ttnn.from_torch(
-    torch_input_tensor,
+    torch_tensor,
     layout=ttnn.TILE_LAYOUT,
     device=mesh_device,
     mesh_mapper=ttnn.ShardTensorToMesh(mesh_device, dim=3),
@@ -316,11 +316,14 @@ output_tensor = ttnn.all_gather(mesh_tensor, dim=3, num_links=1)
 
 #### 5.2.2 Programming Example: All-Gather (Line)
 
-This time, we'll issue the CCL Line All-Gather operation along the cluster y-axis:
+Here we issue a Line All-Gather operation along the cluster-axis 0 (y-dimension), where the y-dimension is the height of the cluster.
+This kicks off four parallel CCL Line All-Gather operations, one for each column in the cluster. Each "line" is a list of two devices.
 
 <img src="images/image5_line_all_gather.png" style="width:500px;"/>
 
-*Figure 6: Line All-Gather execution on 2x4 MeshDevice *
+*Figure 6: Line All-Gather execution on 2x4 MeshDevice*
+
+The result tensor for each device in the column is the concatenation in `dim=3` for each device in the column. The per-device tensor shape is `[1, 1, 32, 32]` before the operation and `[1, 1, 32, 64]` after the operation.
 
 ```py
 import ttnn
@@ -328,7 +331,7 @@ import ttnn
 mesh_device = ttnn.open_mesh_device(ttnn.MeshShape(2, 4), mesh_type=ttnn.MeshType.Ring)
 
 # Construct test tensor of data; 8 chunks of 32x32
-torch_tensor = torch.rand((1,1,32,128), dtype=torch.bfloat16)
+torch_tensor = torch.rand((1,1,32,256), dtype=torch.bfloat16)
 
 # Convert to ttnn.Tensor, tilize and move onto devices across mesh DRAM
 mesh_tensor = ttnn.from_torch(
@@ -339,7 +342,15 @@ mesh_tensor = ttnn.from_torch(
 )
 
 # Execute Line All-Gather on the tensor
-output_tensor = ttnn.all_gather(mesh_tensor, dim=3, cluster_axis=0, mesh_device=mesh_device, topology=ttnn.Topology.Linear)
+output_tensor = ttnn.all_gather(
+    mesh_tensor,
+    dim=3,
+    cluster_axis=0,
+    mesh_device=mesh_device,
+    topology=ttnn.Topology.Linear,
+)
+
+ttnn.close_mesh_device(mesh_device)
 ```
 
 
From a5d9979eb38ca89d7dea9ccaf5c3c684a7f6b2d8 Mon Sep 17 00:00:00 2001
From: Nemanja Grujic <109360083+nemanjagrujic@users.noreply.github.com>
Date: Wed, 13 Nov 2024 12:05:07 +0100
Subject: [PATCH 24/69] #11512: Add sweep test for
 ttnn.transformers.attention_softmax (#14655)

---
 .github/workflows/ttnn-run-sweeps.yaml        |   4 +
 .../sweeps/pooling/global_avg_pool2d.py       | 135 ++++++++++++++++
 .../sweeps/pooling/max_pool2d.py              | 153 ++++++++++++++++++
 .../attention_softmax/attention_softmax.py    | 114 +++++++++++++
 .../attention_softmax/attention_softmax_.py   | 115 +++++++++++++
 5 files changed, 521 insertions(+)
 create mode 100644 tests/sweep_framework/sweeps/pooling/global_avg_pool2d.py
 create mode 100644 tests/sweep_framework/sweeps/pooling/max_pool2d.py
 create mode 100644 tests/sweep_framework/sweeps/transformer/attention_softmax/attention_softmax.py
 create mode 100644 tests/sweep_framework/sweeps/transformer/attention_softmax/attention_softmax_.py

diff --git a/.github/workflows/ttnn-run-sweeps.yaml b/.github/workflows/ttnn-run-sweeps.yaml
index 20f23c51c74..92b69dae806 100644
--- a/.github/workflows/ttnn-run-sweeps.yaml
+++ b/.github/workflows/ttnn-run-sweeps.yaml
@@ -312,12 +312,16 @@ on:
           - conv2d.full.conv2d_sharding
           - conv2d.full.conv2d_sliding_window
           - conv2d.short.conv2d_short_sweep
+          - pooling.global_avg_pool2d
+          - pooling.max_pool2d
           - max_pool2d.short.max_pool2d_short_sweep
           - max_pool2d.full.max_pool2d_params
           - max_pool2d.full.max_pool2d_large_dims
           - transformer.concatenate_heads.concatenate_heads
           - transformer.split_query_key_value_and_split_heads.split_query_key_value_and_split_heads
           - transformer.split_query_key_value_and_split_heads.split_query_key_value_and_split_heads_kv_input
+          - transformer.attention_softmax.attention_softmax
+          - transformer.attention_softmax.attention_softmax_
           - data_movement.stack.stack_pytorch2
           - data_movement.repeat.repeat_pytorch2
           - data_movement.split.split_pytorch2
diff --git a/tests/sweep_framework/sweeps/pooling/global_avg_pool2d.py b/tests/sweep_framework/sweeps/pooling/global_avg_pool2d.py
new file mode 100644
index 00000000000..1d4630e9326
--- /dev/null
+++ b/tests/sweep_framework/sweeps/pooling/global_avg_pool2d.py
@@ -0,0 +1,135 @@
+# SPDX-FileCopyrightText: © 2024 Tenstorrent Inc.
+
+# SPDX-License-Identifier: Apache-2.0
+
+from typing import Optional, Tuple
+from functools import partial
+
+import torch
+import random
+import ttnn
+from tests.sweep_framework.sweep_utils.utils import gen_shapes
+from tests.tt_eager.python_api_testing.sweep_tests.generation_funcs import gen_func_with_cast_tt
+
+from tests.ttnn.utils_for_testing import check_with_pcc, start_measuring_time, stop_measuring_time
+from models.utility_functions import torch_random
+
+# Override the default timeout in seconds for hang detection.
+TIMEOUT = 30
+
+random.seed(0)
+
+# Parameters provided to the test vector generator are defined here.
+# They are defined as dict-type suites that contain the arguments to the run function as keys, and lists of possible inputs as values.
+# Each suite has a key name (in this case "suite_1") which will associate the test vectors to this specific suite of inputs.
+# Developers can create their own generator functions and pass them to the parameters as inputs.
+parameters = {
+    "nightly": {
+        "input_shape": gen_shapes([1, 1, 1, 1], [6, 12, 256, 256], [1, 1, 1, 1], 8)
+        + gen_shapes([1, 1, 1], [12, 256, 256], [1, 1, 1], 8),
+        "input_a_dtype": [ttnn.bfloat16, ttnn.bfloat8_b],
+        "input_a_layout": [ttnn.TILE_LAYOUT, ttnn.ROW_MAJOR_LAYOUT],
+        "input_a_memory_config": [ttnn.DRAM_MEMORY_CONFIG, ttnn.L1_MEMORY_CONFIG],
+        "output_memory_config": [ttnn.DRAM_MEMORY_CONFIG, ttnn.L1_MEMORY_CONFIG],
+    },
+}
+
+
+# Invalidate vector is called during the generation phase where each vector will be passed in.
+# If invalidated, the vector will still be stored but will be skipped.
+# Returns False, None if the vector is valid, and True, str with a reason for invalidation if it is invalid.
+def invalidate_vector(test_vector) -> Tuple[bool, Optional[str]]:
+    # input_shape = test_vector["input_shape"]
+
+    if test_vector["input_a_layout"] == ttnn.ROW_MAJOR_LAYOUT and test_vector["input_a_dtype"] == ttnn.bfloat8_b:
+        return True, "bfloat8_b/bfloat4_b requires TILE_LAYOUT!"
+
+    return False, None
+
+
+# This is the run instructions for the test, defined by the developer.
+# The run function must take the above-defined parameters as inputs.
+# The runner will call this run function with each test vector, and the returned results from this function will be stored.
+# If you defined a device_mesh_fixture above, the object you yielded will be passed into this function as 'device'. Otherwise, it will be the default ttnn device opened by the infra.
+def run(
+    input_shape,
+    input_a_dtype,
+    input_a_layout,
+    input_a_memory_config,
+    output_memory_config,
+    *,
+    device,
+) -> list:
+    data_seed = random.randint(0, 20000000)
+    torch.manual_seed(data_seed)
+
+    if input_a_layout == ttnn.ROW_MAJOR_LAYOUT and input_shape[-3] % 2 == 1:
+        input_shape[-3] += 1
+
+    torch_input_tensor_a = gen_func_with_cast_tt(
+        partial(torch_random, low=-100, high=100, dtype=torch.float32), input_a_dtype
+    )(input_shape)
+
+    # print(f"input_shape {input_shape} input_a_dtype {input_a_dtype} input_a_layout {input_a_layout}")
+
+    torch_output_tensor = torch.nn.functional.adaptive_avg_pool2d(torch_input_tensor_a, (1, 1))
+
+    # ttnn operates on channels-last tensors
+    if len(input_shape) == 4:
+        torch_input_tensor_a = torch.permute(torch_input_tensor_a, (0, 2, 3, 1))
+    elif len(input_shape) == 3:
+        torch_input_tensor_a = torch.permute(torch_input_tensor_a, (1, 2, 0))
+    elif len(input_shape) == 2:
+        torch_input_tensor_a = torch.permute(torch_input_tensor_a, (1, 0))
+
+    input_tensor_a = ttnn.from_torch(
+        torch_input_tensor_a,
+        dtype=input_a_dtype,
+        layout=input_a_layout,
+        device=device,
+        memory_config=input_a_memory_config,
+    )
+
+    start_time = start_measuring_time()
+    result = ttnn.global_avg_pool2d(input_tensor_a, memory_config=output_memory_config)
+    result = ttnn.to_torch(result)
+    e2e_perf = stop_measuring_time(start_time)
+
+    # ttnn operates on channels-last tensors
+    if len(input_shape) == 4:
+        output_tensor = torch.permute(result, (0, 3, 1, 2))
+    elif len(input_shape) == 3:
+        output_tensor = torch.permute(result, (2, 0, 1))
+    elif len(input_shape) == 2:
+        output_tensor = torch.permute(result, (1, 0))
+
+    pcc = check_with_pcc(torch_output_tensor, output_tensor, 0.99)
+    # print(f"pcc {pcc}")
+    return [pcc, e2e_perf]
+
+
+# Run sweeps locally
+# from tests.sweep_framework.framework.permutations import *
+
+# start_time = start_measuring_time()
+# for suite in parameters.keys():
+#     device_id = 0
+#     device = ttnn.open_device(device_id=device_id)
+#     suite_vectors = list(permutations(parameters[suite]))
+#     print(len(suite_vectors))
+#     for vector in suite_vectors:
+#         invalidate_res = invalidate_vector(vector)
+#         if invalidate_res[0]:
+#             print(f"Invalidated: {invalidate_res[1]}")
+#             continue
+#         try:
+#             passed, _ = run(**vector, device=device)
+#             if passed[0] != True:
+#                 print(passed)
+#         except Exception as e:
+#             print(e)
+
+#     ttnn.close_device(device)
+
+# e2e_perf = stop_measuring_time(start_time)
+# print(f"time {e2e_perf / 1000000000}s")
diff --git a/tests/sweep_framework/sweeps/pooling/max_pool2d.py b/tests/sweep_framework/sweeps/pooling/max_pool2d.py
new file mode 100644
index 00000000000..cf0a9138768
--- /dev/null
+++ b/tests/sweep_framework/sweeps/pooling/max_pool2d.py
@@ -0,0 +1,153 @@
+# SPDX-FileCopyrightText: © 2024 Tenstorrent Inc.
+
+# SPDX-License-Identifier: Apache-2.0
+
+from typing import Optional, Tuple
+from functools import partial
+
+import torch
+import random
+import ttnn
+from tests.sweep_framework.sweep_utils.utils import gen_shapes
+from tests.tt_eager.python_api_testing.sweep_tests.generation_funcs import gen_func_with_cast_tt
+
+from tests.ttnn.utils_for_testing import check_with_pcc, start_measuring_time, stop_measuring_time
+from models.utility_functions import torch_random
+
+# Override the default timeout in seconds for hang detection.
+TIMEOUT = 30
+
+random.seed(0)
+
+# Parameters provided to the test vector generator are defined here.
+# They are defined as dict-type suites that contain the arguments to the run function as keys, and lists of possible inputs as values.
+# Each suite has a key name (in this case "suite_1") which will associate the test vectors to this specific suite of inputs.
+# Developers can create their own generator functions and pass them to the parameters as inputs.
+parameters = {
+    "xfail": {
+        "input_shape": [[4, 256, 40, 40]],  # gen_shapes([1, 1, 1, 1], [6, 12, 256, 256], [1, 1, 1, 1], 16),
+        "kH": [2],
+        "kW": [2],
+        "stride": [1],
+        "padding": [0],
+        "dilation": [1],
+        "input_a_dtype": [ttnn.bfloat16, ttnn.bfloat8_b],
+        "input_a_layout": [ttnn.TILE_LAYOUT],  # ttnn.ROW_MAJOR_LAYOUT
+        "input_a_memory_config": [ttnn.DRAM_MEMORY_CONFIG],  # ttnn.L1_MEMORY_CONFIG
+        "output_memory_config": [ttnn.DRAM_MEMORY_CONFIG],
+    },
+}
+
+
+# Invalidate vector is called during the generation phase where each vector will be passed in.
+# If invalidated, the vector will still be stored but will be skipped.
+# Returns False, None if the vector is valid, and True, str with a reason for invalidation if it is invalid.
+def invalidate_vector(test_vector) -> Tuple[bool, Optional[str]]:
+    # input_shape = test_vector["input_shape"]
+
+    if test_vector["input_a_layout"] == ttnn.ROW_MAJOR_LAYOUT and test_vector["input_a_dtype"] == ttnn.bfloat8_b:
+        return True, "bfloat8_b/bfloat4_b requires TILE_LAYOUT!"
+
+    return False, None
+
+
+# This is the run instructions for the test, defined by the developer.
+# The run function must take the above-defined parameters as inputs.
+# The runner will call this run function with each test vector, and the returned results from this function will be stored.
+# If you defined a device_mesh_fixture above, the object you yielded will be passed into this function as 'device'. Otherwise, it will be the default ttnn device opened by the infra.
+def run(
+    input_shape,
+    kH,
+    kW,
+    stride,
+    padding,
+    dilation,
+    input_a_dtype,
+    input_a_layout,
+    input_a_memory_config,
+    output_memory_config,
+    *,
+    device,
+) -> list:
+    data_seed = random.randint(0, 20000000)
+    torch.manual_seed(data_seed)
+
+    if input_a_layout == ttnn.ROW_MAJOR_LAYOUT and input_shape[-3] % 2 == 1:
+        input_shape[-3] += 1
+
+    torch_input_tensor_a = gen_func_with_cast_tt(
+        partial(torch_random, low=-100, high=100, dtype=torch.float32), input_a_dtype
+    )(input_shape)
+
+    # print(f"input_shape {input_shape} input_a_dtype {input_a_dtype} input_a_layout {input_a_layout}")
+
+    torch_output_tensor = torch.nn.functional.max_pool2d(
+        torch_input_tensor_a, (kH, kW), stride=(stride, stride), padding=padding, dilation=dilation
+    )
+
+    # The input tensor is expected to be in [NHW, C]
+    [N, C, H, W] = input_shape
+    torch_input_tensor_a = torch.permute(torch_input_tensor_a, (0, 2, 3, 1))
+    torch_input_tensor_a = torch.reshape(torch_input_tensor_a, [1, 1, N * H * W, C])
+
+    # print(f"bla {torch_input_tensor_a.shape}")
+
+    input_tensor_a = ttnn.from_torch(
+        torch_input_tensor_a,
+        dtype=input_a_dtype,
+        layout=input_a_layout,
+        device=device,
+        memory_config=input_a_memory_config,
+    )
+
+    start_time = start_measuring_time()
+    result = ttnn.max_pool2d(
+        input_tensor=input_tensor_a,
+        batch_size=N,
+        input_h=H,
+        input_w=W,
+        channels=C,
+        kernel_size=[kH, kW],
+        stride=[stride, stride],
+        padding=[padding, padding],
+        dilation=[dilation, dilation],
+        memory_config=output_memory_config,
+        applied_shard_scheme=ttnn.TensorMemoryLayout.BLOCK_SHARDED,
+    )
+
+    result = ttnn.to_torch(result)
+    e2e_perf = stop_measuring_time(start_time)
+
+    # ttnn operates on channels-last tensors
+    output_tensor = torch.permute(result, (0, 3, 1, 2))
+
+    pcc = check_with_pcc(torch_output_tensor, output_tensor, 0.99)
+    print(f"pcc {pcc}")
+    return [pcc, e2e_perf]
+
+
+# Run sweeps locally
+# from tests.sweep_framework.framework.permutations import *
+
+# start_time = start_measuring_time()
+# for suite in parameters.keys():
+#     device_id = 0
+#     device = ttnn.open_device(device_id=device_id)
+#     suite_vectors = list(permutations(parameters[suite]))
+#     print(len(suite_vectors))
+#     for vector in suite_vectors:
+#         invalidate_res = invalidate_vector(vector)
+#         if invalidate_res[0]:
+#             print(f"Invalidated: {invalidate_res[1]}")
+#             continue
+#         try:
+#             passed, _ = run(**vector, device=device)
+#             if passed[0] != True:
+#                 print(passed)
+#         except Exception as e:
+#             print(e)
+
+#     ttnn.close_device(device)
+
+# e2e_perf = stop_measuring_time(start_time)
+# print(f"time {e2e_perf / 1000000000}s")
diff --git a/tests/sweep_framework/sweeps/transformer/attention_softmax/attention_softmax.py b/tests/sweep_framework/sweeps/transformer/attention_softmax/attention_softmax.py
new file mode 100644
index 00000000000..4abe4f402fe
--- /dev/null
+++ b/tests/sweep_framework/sweeps/transformer/attention_softmax/attention_softmax.py
@@ -0,0 +1,114 @@
+# SPDX-FileCopyrightText: © 2024 Tenstorrent Inc.
+
+# SPDX-License-Identifier: Apache-2.0
+
+from typing import Optional, Tuple
+from functools import partial
+from itertools import combinations
+
+import torch
+import random
+import ttnn
+from functools import lru_cache
+from tests.sweep_framework.sweep_utils.utils import gen_shapes
+from tests.tt_eager.python_api_testing.sweep_tests.generation_funcs import gen_func_with_cast_tt
+
+from tests.ttnn.utils_for_testing import check_with_pcc, start_measuring_time, stop_measuring_time
+from models.utility_functions import torch_random
+
+# Override the default timeout in seconds for hang detection.
+TIMEOUT = 360
+random.seed(0)
+
+
+# Does not have memory_config parameter
+parameters = {
+    "nightly": {
+        "input_shape": gen_shapes([1, 1, 1, 8], [6, 1, 256, 256], [1, 1, 1, 8], 4)
+        + gen_shapes([1, 1, 8], [6, 256, 256], [1, 1, 8], 4)
+        + gen_shapes([1, 8], [256, 256], [1, 8], 4),
+        "num_heads": [1, 2, 4, 8],
+        "input_a_dtype": [ttnn.bfloat16, ttnn.bfloat8_b],
+        "input_a_layout": [ttnn.TILE_LAYOUT, ttnn.ROW_MAJOR_LAYOUT],
+        "input_a_memory_config": [ttnn.DRAM_MEMORY_CONFIG, ttnn.L1_MEMORY_CONFIG],
+        "mask_dtype": [ttnn.bfloat16, ttnn.bfloat8_b],
+        "mask_layout": [ttnn.TILE_LAYOUT, ttnn.ROW_MAJOR_LAYOUT],
+        "mask_memory_config": [ttnn.DRAM_MEMORY_CONFIG, ttnn.L1_MEMORY_CONFIG],
+        "output_memory_config": [ttnn.DRAM_MEMORY_CONFIG, ttnn.L1_MEMORY_CONFIG],
+    },
+}
+
+
+# Invalidate vector is called during the generation phase where each vector will be passed in.
+# If invalidated, the vector will still be stored but will be skipped.
+# Returns False, None if the vector is valid, and True, str with a reason for invalidation if it is invalid.
+def invalidate_vector(test_vector) -> Tuple[bool, Optional[str]]:
+    # input_shape = test_vector["input_shape"]
+
+    if test_vector["input_a_layout"] == ttnn.ROW_MAJOR_LAYOUT and test_vector["input_a_dtype"] == ttnn.bfloat8_b:
+        return True, "bfloat8_b/bfloat4_b requires TILE_LAYOUT!"
+
+    return False, None
+
+
+def run(
+    input_shape,
+    num_heads,
+    input_a_dtype,
+    input_a_layout,
+    input_a_memory_config,
+    mask_dtype,
+    mask_layout,
+    mask_memory_config,
+    output_memory_config,
+    *,
+    device,
+) -> list:
+    data_seed = random.randint(0, 20000000)
+    torch.manual_seed(data_seed)
+
+    hidden_size = input_shape[-1]
+    head_size = hidden_size // num_heads
+
+    # Fix shape for row mayor
+    if input_a_layout == ttnn.ROW_MAJOR_LAYOUT and input_shape[-1] % 2 == 1:
+        input_shape[-1] += 1
+
+    torch_input_tensor_a = gen_func_with_cast_tt(
+        partial(torch_random, low=-100, high=100, dtype=torch.float32), input_a_dtype
+    )(input_shape)
+
+    torch_mask_tensor = gen_func_with_cast_tt(
+        partial(torch_random, low=-100, high=100, dtype=torch.float32), mask_dtype
+    )(input_shape)
+    torch_mask_tensor = (torch_mask_tensor > 0).to(torch.float32)
+
+    # print(f"input_shape {input_shape} input_a_dtype {input_a_dtype} input_a_layout {input_a_layout}")
+
+    golden_function = ttnn.get_golden_function(ttnn.transformer.attention_softmax)
+    torch_output_tensor = golden_function(torch_input_tensor_a, head_size=head_size, attention_mask=torch_mask_tensor)
+
+    input_tensor_a = ttnn.from_torch(
+        torch_input_tensor_a,
+        dtype=input_a_dtype,
+        layout=input_a_layout,
+        device=device,
+        memory_config=input_a_memory_config,
+    )
+
+    mask_tensor = ttnn.from_torch(
+        torch_mask_tensor,
+        dtype=mask_dtype,
+        layout=mask_layout,
+        device=device,
+        memory_config=mask_memory_config,
+    )
+
+    start_time = start_measuring_time()
+    result = ttnn.transformer.attention_softmax(input_tensor_a, head_size=head_size, attention_mask=mask_tensor)
+    output_tensor = ttnn.to_torch(result)
+    e2e_perf = stop_measuring_time(start_time)
+
+    pcc = check_with_pcc(torch_output_tensor, output_tensor, 0.999)
+    # print(pcc)
+    return [pcc, e2e_perf]
diff --git a/tests/sweep_framework/sweeps/transformer/attention_softmax/attention_softmax_.py b/tests/sweep_framework/sweeps/transformer/attention_softmax/attention_softmax_.py
new file mode 100644
index 00000000000..b850630312d
--- /dev/null
+++ b/tests/sweep_framework/sweeps/transformer/attention_softmax/attention_softmax_.py
@@ -0,0 +1,115 @@
+# SPDX-FileCopyrightText: © 2024 Tenstorrent Inc.
+
+# SPDX-License-Identifier: Apache-2.0
+
+from typing import Optional, Tuple
+from functools import partial
+from itertools import combinations
+
+import torch
+import random
+import ttnn
+from functools import lru_cache
+from tests.sweep_framework.sweep_utils.utils import gen_shapes
+from tests.tt_eager.python_api_testing.sweep_tests.generation_funcs import gen_func_with_cast_tt
+
+from tests.ttnn.utils_for_testing import check_with_pcc, start_measuring_time, stop_measuring_time
+from models.utility_functions import torch_random
+
+# Override the default timeout in seconds for hang detection.
+TIMEOUT = 360
+random.seed(0)
+
+
+# Does not have memory_config parameter
+parameters = {
+    "nightly": {
+        "input_shape": gen_shapes([1, 1, 1, 8], [6, 1, 256, 256], [1, 1, 1, 8], 4)
+        + gen_shapes([1, 1, 8], [6, 256, 256], [1, 1, 8], 4)
+        + gen_shapes([1, 8], [256, 256], [1, 8], 4),
+        "num_heads": [1, 2, 4, 8],
+        "input_a_dtype": [ttnn.bfloat16, ttnn.bfloat8_b],
+        "input_a_layout": [ttnn.TILE_LAYOUT, ttnn.ROW_MAJOR_LAYOUT],
+        "input_a_memory_config": [ttnn.DRAM_MEMORY_CONFIG, ttnn.L1_MEMORY_CONFIG],
+        "mask_dtype": [ttnn.bfloat16, ttnn.bfloat8_b],
+        "mask_layout": [ttnn.TILE_LAYOUT, ttnn.ROW_MAJOR_LAYOUT],
+        "mask_memory_config": [ttnn.DRAM_MEMORY_CONFIG, ttnn.L1_MEMORY_CONFIG],
+        "output_memory_config": [ttnn.DRAM_MEMORY_CONFIG, ttnn.L1_MEMORY_CONFIG],
+    },
+}
+
+
+# Invalidate vector is called during the generation phase where each vector will be passed in.
+# If invalidated, the vector will still be stored but will be skipped.
+# Returns False, None if the vector is valid, and True, str with a reason for invalidation if it is invalid.
+def invalidate_vector(test_vector) -> Tuple[bool, Optional[str]]:
+    # input_shape = test_vector["input_shape"]
+
+    if test_vector["input_a_layout"] == ttnn.ROW_MAJOR_LAYOUT and test_vector["input_a_dtype"] == ttnn.bfloat8_b:
+        return True, "bfloat8_b/bfloat4_b requires TILE_LAYOUT!"
+
+    return False, None
+
+
+def run(
+    input_shape,
+    num_heads,
+    input_a_dtype,
+    input_a_layout,
+    input_a_memory_config,
+    mask_dtype,
+    mask_layout,
+    mask_memory_config,
+    output_memory_config,
+    *,
+    device,
+) -> list:
+    data_seed = random.randint(0, 20000000)
+    torch.manual_seed(data_seed)
+
+    hidden_size = input_shape[-1]
+    head_size = hidden_size // num_heads
+
+    # Fix shape for row mayor
+    if input_a_layout == ttnn.ROW_MAJOR_LAYOUT and input_shape[-1] % 2 == 1:
+        input_shape[-1] += 1
+
+    torch_input_tensor_a = gen_func_with_cast_tt(
+        partial(torch_random, low=-100, high=100, dtype=torch.float32), input_a_dtype
+    )(input_shape)
+
+    torch_mask_tensor = gen_func_with_cast_tt(
+        partial(torch_random, low=-100, high=100, dtype=torch.float32), mask_dtype
+    )(input_shape)
+    torch_mask_tensor = (torch_mask_tensor > 0).to(torch.float32)
+
+    # print(f"input_shape {input_shape} input_a_dtype {input_a_dtype} input_a_layout {input_a_layout}")
+
+    golden_function = ttnn.get_golden_function(ttnn.transformer.attention_softmax_)
+    tmp_input = torch.clone(torch_input_tensor_a)
+    torch_output_tensor = golden_function(tmp_input, head_size=head_size, attention_mask=torch_mask_tensor)
+
+    input_tensor_a = ttnn.from_torch(
+        torch_input_tensor_a,
+        dtype=input_a_dtype,
+        layout=input_a_layout,
+        device=device,
+        memory_config=input_a_memory_config,
+    )
+
+    mask_tensor = ttnn.from_torch(
+        torch_mask_tensor,
+        dtype=mask_dtype,
+        layout=mask_layout,
+        device=device,
+        memory_config=mask_memory_config,
+    )
+
+    start_time = start_measuring_time()
+    result = ttnn.transformer.attention_softmax_(input_tensor_a, head_size=head_size, attention_mask=mask_tensor)
+    output_tensor = ttnn.to_torch(result)
+    e2e_perf = stop_measuring_time(start_time)
+
+    pcc = check_with_pcc(torch_output_tensor, output_tensor, 0.999)
+    # print(pcc)
+    return [pcc, e2e_perf]

From 650c5c33628f31c947c26af26439f33b3f263d86 Mon Sep 17 00:00:00 2001
From: Nathan Sidwell <nsidwell@tenstorrent.com>
Date: Wed, 13 Nov 2024 10:41:47 -0500
Subject: [PATCH 25/69] #14826: Remove misoptimizations from init code (#14861)

1) Stop wzerorange being recognized as memset. Memset is no longer
pulled in.

2) Reduce insns in data image copy. Original loop was 21 isnsn (3.5 per
word), new loop is 10 insns (3.3 per word).

3) Do not use a loop for residue. We only have to handle 0, 1 and 2
cases. A loop is more overhead.

4) Sprinkle a few more unroll-inhibiting pragmas around.

Rename init code as do_crt1, to make it clearer what it is doing.

These changes remove 436 bytes from a kernel code.
---
 tt_metal/hw/firmware/src/brisc.cc       |  3 +-
 tt_metal/hw/firmware/src/brisck.cc      |  8 +--
 tt_metal/hw/firmware/src/erisc.cc       |  8 ++-
 tt_metal/hw/firmware/src/idle_erisc.cc  |  7 +--
 tt_metal/hw/firmware/src/idle_erisck.cc |  8 +--
 tt_metal/hw/firmware/src/ncrisc.cc      |  3 +-
 tt_metal/hw/firmware/src/ncrisck.cc     | 13 ++--
 tt_metal/hw/firmware/src/trisc.cc       |  5 +-
 tt_metal/hw/firmware/src/trisck.cc      |  8 +--
 tt_metal/hw/inc/firmware_common.h       | 79 +++++++++++++------------
 tt_metal/hw/toolchain/substitutes.cpp   | 22 +++----
 tt_metal/hw/toolchain/tmu-crt0.S        | 42 ++++---------
 tt_metal/hw/toolchain/tmu-crt0k.S       |  1 +
 13 files changed, 91 insertions(+), 116 deletions(-)

diff --git a/tt_metal/hw/firmware/src/brisc.cc b/tt_metal/hw/firmware/src/brisc.cc
index c019e5a4764..51c2bcc4338 100644
--- a/tt_metal/hw/firmware/src/brisc.cc
+++ b/tt_metal/hw/firmware/src/brisc.cc
@@ -340,8 +340,7 @@ int main() {
     DIRTY_STACK_MEMORY();
     WAYPOINT("I");
 
-    int32_t num_words = ((uint)__ldm_data_end - (uint)__ldm_data_start) >> 2;
-    l1_to_local_mem_copy((uint*)__ldm_data_start, (uint tt_l1_ptr*)MEM_BRISC_INIT_LOCAL_L1_BASE_SCRATCH, num_words);
+    do_crt1((uint32_t*)MEM_BRISC_INIT_LOCAL_L1_BASE_SCRATCH);
 
     mailboxes->launch_msg_rd_ptr = 0; // Initialize the rdptr to 0
     noc_index = 0;
diff --git a/tt_metal/hw/firmware/src/brisck.cc b/tt_metal/hw/firmware/src/brisck.cc
index f9f04eec011..21ab5c2bdd2 100644
--- a/tt_metal/hw/firmware/src/brisck.cc
+++ b/tt_metal/hw/firmware/src/brisck.cc
@@ -18,9 +18,6 @@
 #include "tools/profiler/kernel_profiler.hpp"
 #include <kernel_includes.hpp>
 
-extern uint32_t __kernel_init_local_l1_base[];
-extern uint32_t __fw_export_end_text[];
-
 void kernel_launch(uint32_t kernel_base_addr) {
 
 #if defined(DEBUG_NULL_KERNELS) && !defined(DISPATCH_KERNEL)
@@ -29,7 +26,10 @@ void kernel_launch(uint32_t kernel_base_addr) {
     while (c_tensix_core::read_wall_clock() < end_time);
 #endif
 #else
-    firmware_kernel_common_init((void tt_l1_ptr *)(kernel_base_addr + (uint32_t) __kernel_init_local_l1_base - (uint32_t)__fw_export_end_text));
+    extern uint32_t __kernel_init_local_l1_base[];
+    extern uint32_t __fw_export_end_text[];
+    do_crt1((uint32_t tt_l1_ptr
+                 *)(kernel_base_addr + (uint32_t)__kernel_init_local_l1_base - (uint32_t)__fw_export_end_text));
 
     if constexpr (NOC_MODE == DM_DEDICATED_NOC) {
         noc_local_state_init(NOC_INDEX);
diff --git a/tt_metal/hw/firmware/src/erisc.cc b/tt_metal/hw/firmware/src/erisc.cc
index e56b40ce1a2..664afdc89f9 100644
--- a/tt_metal/hw/firmware/src/erisc.cc
+++ b/tt_metal/hw/firmware/src/erisc.cc
@@ -36,15 +36,17 @@ uint32_t tt_l1_ptr *sem_l1_base[ProgrammableCoreType::COUNT] __attribute__((used
 
 void __attribute__((noinline)) Application(void) {
     WAYPOINT("I");
-    rtos_context_switch_ptr = (void (*)())RtosTable[0];
 
-    // Not using firmware_kernel_common_init since it is copying to registers
+    // Not using do_crt1 since it is copying to registers???
     // TODO: need to find free space that routing FW is not using
+    extern uint32_t __ldm_bss_start[];
+    extern uint32_t __ldm_bss_end[];
     wzerorange(__ldm_bss_start, __ldm_bss_end);
 
+    rtos_context_switch_ptr = (void (*)())RtosTable[0];
+
     risc_init();
     noc_init(MEM_NOC_ATOMIC_RET_VAL_ADDR);
-    wzerorange(__ldm_bss_start, __ldm_bss_end);
 
     for (uint32_t n = 0; n < NUM_NOCS; n++) {
         noc_local_state_init(n);
diff --git a/tt_metal/hw/firmware/src/idle_erisc.cc b/tt_metal/hw/firmware/src/idle_erisc.cc
index 78ccf5fd14f..554dd5952ab 100644
--- a/tt_metal/hw/firmware/src/idle_erisc.cc
+++ b/tt_metal/hw/firmware/src/idle_erisc.cc
@@ -98,13 +98,8 @@ int main() {
     conditionally_disable_l1_cache();
     DIRTY_STACK_MEMORY();
     WAYPOINT("I");
-    int32_t num_words = ((uint)__ldm_data_end - (uint)__ldm_data_start) >> 2;
-    uint32_t *local_mem_ptr = (uint32_t *)__ldm_data_start;
-    uint32_t *l1_data_ptr = (uint32_t *)MEM_IERISC_INIT_LOCAL_L1_BASE_SCRATCH;
+    do_crt1((uint32_t *)MEM_IERISC_INIT_LOCAL_L1_BASE_SCRATCH);
     uint32_t heartbeat = 0;
-    for (int32_t i = 0; i < num_words; i++) {
-        local_mem_ptr[i] = l1_data_ptr[i];
-    }
 
     risc_init();
 
diff --git a/tt_metal/hw/firmware/src/idle_erisck.cc b/tt_metal/hw/firmware/src/idle_erisck.cc
index 756c71d0448..e7fdeb2f718 100644
--- a/tt_metal/hw/firmware/src/idle_erisck.cc
+++ b/tt_metal/hw/firmware/src/idle_erisck.cc
@@ -21,13 +21,13 @@
 
 #include <kernel_includes.hpp>
 
-extern uint32_t __kernel_init_local_l1_base[];
-extern uint32_t __fw_export_end_text[];
-
 void kernel_launch(uint32_t kernel_base_addr) {
     DeviceZoneScopedMainChildN("ERISC-KERNEL");
 
-    firmware_kernel_common_init((void tt_l1_ptr *)(kernel_base_addr + (uint32_t) __kernel_init_local_l1_base - (uint32_t)__fw_export_end_text));
+    extern uint32_t __kernel_init_local_l1_base[];
+    extern uint32_t __fw_export_end_text[];
+    do_crt1((uint32_t tt_l1_ptr
+                 *)(kernel_base_addr + (uint32_t)__kernel_init_local_l1_base - (uint32_t)__fw_export_end_text));
 
     noc_local_state_init(NOC_INDEX);
 
diff --git a/tt_metal/hw/firmware/src/ncrisc.cc b/tt_metal/hw/firmware/src/ncrisc.cc
index 99ca7a6566c..53cc38c894e 100644
--- a/tt_metal/hw/firmware/src/ncrisc.cc
+++ b/tt_metal/hw/firmware/src/ncrisc.cc
@@ -76,8 +76,7 @@ int main(int argc, char *argv[]) {
     DIRTY_STACK_MEMORY();
     WAYPOINT("I");
 
-    int32_t num_words = ((uint)__ldm_data_end - (uint)__ldm_data_start) >> 2;
-    l1_to_local_mem_copy((uint *)__ldm_data_start, (uint tt_l1_ptr *)MEM_NCRISC_INIT_LOCAL_L1_BASE_SCRATCH, num_words);
+    do_crt1((uint32_t tt_l1_ptr *)MEM_NCRISC_INIT_LOCAL_L1_BASE_SCRATCH);
 
     risc_init();
 
diff --git a/tt_metal/hw/firmware/src/ncrisck.cc b/tt_metal/hw/firmware/src/ncrisck.cc
index 6f24d5b107b..e3c228f96fb 100644
--- a/tt_metal/hw/firmware/src/ncrisck.cc
+++ b/tt_metal/hw/firmware/src/ncrisck.cc
@@ -26,9 +26,6 @@ uint32_t noc_nonposted_writes_acked[NUM_NOCS];
 uint32_t noc_nonposted_atomics_acked[NUM_NOCS];
 uint32_t noc_posted_writes_num_issued[NUM_NOCS];
 
-extern uint32_t __kernel_init_local_l1_base[];
-extern uint32_t __fw_export_end_text[];
-
 void kernel_launch(uint32_t kernel_base_addr) {
 
   DeviceZoneScopedMainChildN("NCRISC-KERNEL");
@@ -38,11 +35,13 @@ void kernel_launch(uint32_t kernel_base_addr) {
     while (c_tensix_core::read_wall_clock() < KERNEL_RUN_TIME);
 #endif
 #else
+  extern uint32_t __kernel_init_local_l1_base[];
+  extern uint32_t __fw_export_end_text[];
+  do_crt1((
+      uint32_t tt_l1_ptr *)(kernel_base_addr + (uint32_t)__kernel_init_local_l1_base - (uint32_t)__fw_export_end_text));
 
-    firmware_kernel_common_init((void tt_l1_ptr *)(kernel_base_addr + (uint32_t) __kernel_init_local_l1_base - (uint32_t)__fw_export_end_text));
-
-    if constexpr (NOC_MODE == DM_DEDICATED_NOC) {
-        noc_local_state_init(NOC_INDEX);
+  if constexpr (NOC_MODE == DM_DEDICATED_NOC) {
+      noc_local_state_init(NOC_INDEX);
     } else {
         noc_local_state_init(NOC_0);
         noc_local_state_init(NOC_1);
diff --git a/tt_metal/hw/firmware/src/trisc.cc b/tt_metal/hw/firmware/src/trisc.cc
index 505e0bce3bf..1a1f770d1cd 100644
--- a/tt_metal/hw/firmware/src/trisc.cc
+++ b/tt_metal/hw/firmware/src/trisc.cc
@@ -77,10 +77,7 @@ int main(int argc, char *argv[]) {
     DIRTY_STACK_MEMORY();
     WAYPOINT("I");
 
-    uint tt_l1_ptr *local_l1_start_addr =
-        (uint tt_l1_ptr *)PREPROCESSOR_EXPAND(MEM_TRISC, COMPILE_FOR_TRISC, _INIT_LOCAL_L1_BASE_SCRATCH);
-    int32_t num_words = ((uint)__ldm_data_end - (uint)__ldm_data_start) >> 2;
-    l1_to_local_mem_copy((uint *)__ldm_data_start, local_l1_start_addr, num_words);
+    do_crt1((uint32_t tt_l1_ptr *)PREPROCESSOR_EXPAND(MEM_TRISC, COMPILE_FOR_TRISC, _INIT_LOCAL_L1_BASE_SCRATCH));
 
     // Initialize GPRs to all 0s
 #pragma GCC unroll 0
diff --git a/tt_metal/hw/firmware/src/trisck.cc b/tt_metal/hw/firmware/src/trisck.cc
index 862c2964808..7e624b5767c 100644
--- a/tt_metal/hw/firmware/src/trisck.cc
+++ b/tt_metal/hw/firmware/src/trisck.cc
@@ -33,9 +33,6 @@ volatile tt_reg_ptr uint * mailbox_base[4] = {
 };
 }
 
-extern uint32_t __kernel_init_local_l1_base[];
-extern uint32_t __fw_export_end_text[];
-
 void kernel_launch(uint32_t kernel_base_addr)
 {
   DeviceZoneScopedMainChildN("TRISC-KERNEL");
@@ -44,7 +41,10 @@ void kernel_launch(uint32_t kernel_base_addr)
     ckernel::wait(KERNEL_RUN_TIME);
 #endif
 #else
-    firmware_kernel_common_init((void tt_l1_ptr *)(kernel_base_addr + (uint32_t) __kernel_init_local_l1_base - (uint32_t)__fw_export_end_text));
+  extern uint32_t __kernel_init_local_l1_base[];
+  extern uint32_t __fw_export_end_text[];
+  do_crt1((
+      uint32_t tt_l1_ptr *)(kernel_base_addr + (uint32_t)__kernel_init_local_l1_base - (uint32_t)__fw_export_end_text));
 
 #if defined(UCK_CHLKC_UNPACK)
     // Make sure DBG_FEATURE_DISABLE register is cleared before every kernel is executed
diff --git a/tt_metal/hw/inc/firmware_common.h b/tt_metal/hw/inc/firmware_common.h
index fd048640f3c..f346cffab41 100644
--- a/tt_metal/hw/inc/firmware_common.h
+++ b/tt_metal/hw/inc/firmware_common.h
@@ -14,53 +14,58 @@
 #include "hostdevcommon/kernel_structs.h"
 #include "dev_msgs.h"
 
-extern uint32_t __ldm_bss_start[];
-extern uint32_t __ldm_bss_end[];
-extern uint32_t __ldm_data_start[];
-extern uint32_t __ldm_data_end[];
-extern void (* __init_array_start[])();
-extern void (* __init_array_end[])();
-
 extern void kernel_init(uint32_t kernel_init);
 extern void kernel_launch(uint32_t kernel_base_addr);
 
-inline void l1_to_local_mem_copy(uint32_t *local_mem_addr, uint32_t tt_l1_ptr *l1_addr, int32_t len) {
-    // Cover L1 load latency of 6 cycles for the bulk of the copy
-    int32_t n = 0;
-    while (n < len - 5) {
-        uint32_t v0 = l1_addr[n + 0];
-        uint32_t v1 = l1_addr[n + 1];
-        uint32_t v2 = l1_addr[n + 2];
-        uint32_t v3 = l1_addr[n + 3];
-        uint32_t v4 = l1_addr[n + 4];
-        uint32_t v5 = l1_addr[n + 5];
-        local_mem_addr[n + 0] = v0;
-        local_mem_addr[n + 1] = v1;
-        local_mem_addr[n + 2] = v2;
-        local_mem_addr[n + 3] = v3;
-        local_mem_addr[n + 4] = v4;
-        local_mem_addr[n + 5] = v5;
-        n += 6;
-    }
-    // Could optimize this further (eg, loop of 2 or 4), probably not worth it
-    while (n < len) {
-        local_mem_addr[n] = l1_addr[n];
-        n++;
-    }
-}
-
-inline void firmware_kernel_common_init(void *init_local_l1_base) {
-
-    // Handle stuff typically done in crt0 in asm.  Easier to do in C
+// Clear bss, copy initial data image, run global constructors.
+inline void do_crt1(uint32_t tt_l1_ptr *data_image) {
+    // Clear bss.
+    extern uint32_t __ldm_bss_start[];
+    extern uint32_t __ldm_bss_end[];
     wzerorange(__ldm_bss_start, __ldm_bss_end);
 
-    int32_t num_words = ((uint)__ldm_data_end - (uint)__ldm_data_start) >> 2;
-    l1_to_local_mem_copy((uint32_t *)__ldm_data_start, (uint32_t *)((uint8_t *)init_local_l1_base), num_words);
+    // Copy initialized data.
+    extern uint32_t __ldm_data_start[];
+    extern uint32_t __ldm_data_end[];
+    uint32_t *dst = __ldm_data_start;
+    uint32_t tt_l1_ptr *src = data_image;
+    unsigned len = __ldm_data_end - __ldm_data_start;
+#pragma GCC unroll 0
+    while (len >= 3) {
+        auto v0 = src[0], v1 = src[1], v2 = src[2];
+        // 1) Make sure the optimizer does not think this is memcpy by
+        // hiding the pointer bookkeeping in an asm.
+        // 2) The scheduler doesn't know the above loads have 6 cycle
+        // latency. We emit the 3 bookkeeping adds as a single block
+        // in the load shadow before the stores. The optimizer will
+        // not be able to move these.
+        // 3) We don't need early clobbers here because of the +r
+        // constraint -- early clobbers would pessimize.
+        asm inline(
+            "addi %0,%0,3*%3\n\t"
+            "addi %1,%1,3*%3\n\t"
+            "addi %2,%2,-3"
+            : "+r"(src), "+r"(dst), "+r"(len)
+            : "i"(sizeof(v0)));
+        dst[-3] = v0, dst[-2] = v1, dst[-1] = v2;
+    }
+    // There are 0, 1 or 2 words of residue. This is smaller than a loop.
+    // We get smaller code layout by expecting the conditions to be true.
+    if (__builtin_expect(len >= 1, true)) {
+        dst[0] = src[0];
+        if (__builtin_expect(len >= 2, true))
+            dst[1] = src[1];
+    }
 
+    // Run constructors.
+    extern void (*__init_array_start[])();
+    extern void (*__init_array_end[])();
+#pragma GCC unroll 0
     for (void (** fptr)() = __init_array_start; fptr < __init_array_end; fptr++) {
         (**fptr)();
     }
 }
+
 FORCE_INLINE
 uint32_t firmware_config_init(tt_l1_ptr mailboxes_t* const mailboxes, uint32_t core_type_index, uint32_t dispatch_class) {
 
diff --git a/tt_metal/hw/toolchain/substitutes.cpp b/tt_metal/hw/toolchain/substitutes.cpp
index 731741bc2de..f2ddf66984e 100644
--- a/tt_metal/hw/toolchain/substitutes.cpp
+++ b/tt_metal/hw/toolchain/substitutes.cpp
@@ -1,4 +1,4 @@
-// SPDX-FileCopyrightText: © 2023 Tenstorrent Inc.
+// SPDX-FileCopyrightText: © 2023, 2024 Tenstorrent Inc.
 //
 // SPDX-License-Identifier: Apache-2.0
 
@@ -7,22 +7,18 @@
 
 using namespace std;
 
-extern "C" int atexit(void (*f)(void))
-{
-    return 0;
-}
+extern "C" int atexit(void (*f)(void)) { return 0; }
 
-extern "C" void exit(int ec)
-{
+extern "C" void exit(int ec) {
     while (1) { asm volatile ("" ::: "memory"); }
 }
 
-extern "C" void wzerorange(uint32_t *start, uint32_t *end) __attribute__((aligned(16)));
-
-extern "C" void wzerorange(uint32_t *start, uint32_t *end)
-{
-    for (; start != end; start++)
-    {
+extern "C" void wzerorange(uint32_t *start, uint32_t *end) {
+#pragma GCC unroll 0
+    while (start != end) {
         *start = 0;
+        // Prevent optimizer considering this loop equivalent to
+        // memset (start, 0, end - start) -- that's code bloat.
+        asm inline("addi %0,%0,%1" : "+r"(start) : "i"(sizeof(*start)));
     }
 }
diff --git a/tt_metal/hw/toolchain/tmu-crt0.S b/tt_metal/hw/toolchain/tmu-crt0.S
index 6ca0b611054..c19f66a573a 100644
--- a/tt_metal/hw/toolchain/tmu-crt0.S
+++ b/tt_metal/hw/toolchain/tmu-crt0.S
@@ -14,28 +14,11 @@ _start:
 	addi gp,gp,%lo(__global_pointer$)
 	.option pop
 
-	// set stack pointer
-	lui	sp, %hi(__stack_top)
-	addi	sp, sp, %lo(__stack_top)
+	// set stack pointer, reserve 16 bytes for main's arguments
+	lui	sp, %hi(__stack_top - 16)
+	addi	sp, sp, %lo(__stack_top - 16)
 
-	// Clear bss
-	lui	a0, %hi(__ldm_bss_start)
-	addi	a0, a0, %lo(__ldm_bss_start)
-	lui	a1, %hi(__ldm_bss_end)
-	addi	a1, a1, %lo(__ldm_bss_end)
-	call    wzerorange
-
-	// Run global initializers
-	lui	s2, %hi(__init_array_start)
-	addi	s2, s2, %lo(__init_array_start)
-	lui	s3, %hi(__init_array_end)
-	addi	s3, s3, %lo(__init_array_end)
-	beq	s2, s3, 2f
-1:	lw      a0, 0(s2)
-	jalr    a0
-	addi  	s2, s2, 4
-	bne	s2, s3, 1b
-2:
+	// main is responsible for the rest of crt -- clear bss, copy data image, run global constructors
 
   /* Pass in the tensix coordinates as argv[0][0] through argv[0][3].
      argc = 1, envp = NULL. In memory, we'll have
@@ -44,16 +27,15 @@ _start:
    * sp+8: s1
    * sp+c: 0
    */
-  addi    sp, sp, -16 /* (stack is aligned to 16 bytes in riscv calling convention) */
   addi    a0, sp, 8
-  sw      a0, 0(sp)
-  sw      zero, 4(sp)
-  sw      s1, 8(sp)
-  sw      zero, 12(sp)
-
-  li      a0, 1 # argc = 1
-  mv      a1, sp
-  mv      a2, zero
+  sw      a0, 0(sp) // argv[0]
+  sw      zero, 4(sp) // argv[1]
+  sw      s1, 8(sp) // argv[0][0..3]
+  sw      zero, 12(sp) // argv[0][4..7]
+
+  li      a0, 1 // argc = 1
+  mv      a1, sp // argv
+  mv      a2, zero // env
 
   call    main
   tail    exit
diff --git a/tt_metal/hw/toolchain/tmu-crt0k.S b/tt_metal/hw/toolchain/tmu-crt0k.S
index 177d79cdb84..849e31eb199 100644
--- a/tt_metal/hw/toolchain/tmu-crt0k.S
+++ b/tt_metal/hw/toolchain/tmu-crt0k.S
@@ -3,5 +3,6 @@
 .type   _start, @function
 
 _start:
+	// kernel_launch is responsible for the rest of crt -- clear bss, copy data image, run global constructors
 	tail    _Z13kernel_launchm
 	.size  _start, .-_start

From 8631caaf4d63fc59688675080c617ba8d2b7c7a1 Mon Sep 17 00:00:00 2001
From: Almeet Bhullar <abhullar@tenstorrent.com>
Date: Mon, 11 Nov 2024 22:58:08 +0000
Subject: [PATCH 26/69] UMD bump to include changes for parsing BH cluster desc
 yaml

---
 tt_metal/third_party/umd | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tt_metal/third_party/umd b/tt_metal/third_party/umd
index baed1c889ff..4b5dffd5828 160000
--- a/tt_metal/third_party/umd
+++ b/tt_metal/third_party/umd
@@ -1 +1 @@
-Subproject commit baed1c889ff63e4d84b23d6d8cb3ad24a2390384
+Subproject commit 4b5dffd5828baf05d2c999f5f9217c90ead3975b

From 58063dbf6c9107ffce0154b513a9d073e20a9af3 Mon Sep 17 00:00:00 2001
From: Almeet Bhullar <abhullar@tenstorrent.com>
Date: Tue, 12 Nov 2024 00:10:23 +0000
Subject: [PATCH 27/69] #14617: Use BH cluster desc and pass physical pcie
 endpoint to device since it is dependent on BH board type

---
 tt_metal/common/metal_soc_descriptor.cpp      | 22 ++++++++++++++++++-
 tt_metal/common/metal_soc_descriptor.h        |  6 ++++-
 .../hw/inc/blackhole/noc/noc_parameters.h     |  7 ------
 .../hw/inc/grayskull/noc/noc_parameters.h     |  5 -----
 tt_metal/hw/inc/wormhole/noc/noc_parameters.h |  6 -----
 tt_metal/impl/device/device.cpp               | 14 +++++++++---
 tt_metal/jit_build/genfiles.cpp               | 15 -------------
 tt_metal/llrt/tt_cluster.cpp                  | 18 +++++++--------
 .../soc_descriptors/blackhole_140_arch.yaml   |  4 ++--
 9 files changed, 48 insertions(+), 49 deletions(-)

diff --git a/tt_metal/common/metal_soc_descriptor.cpp b/tt_metal/common/metal_soc_descriptor.cpp
index 0a3565fac3f..f69e0c728dd 100644
--- a/tt_metal/common/metal_soc_descriptor.cpp
+++ b/tt_metal/common/metal_soc_descriptor.cpp
@@ -361,6 +361,25 @@ void metal_SocDescriptor::generate_physical_routing_to_profiler_flat_id() {
 #endif
 }
 
+// TODO: This should be deleted once we switch to virtual coordinates
+void metal_SocDescriptor::update_pcie_cores(const BoardType &board_type) {
+    if (this->arch != tt::ARCH::BLACKHOLE) {
+        return;
+    }
+    switch (board_type) {
+        case DEFAULT: { // Workaround for BHs running FW that does not return board type in the cluster yaml
+            this->pcie_cores = {CoreCoord(11, 0)};
+        }
+        break;
+        case P150A: {
+            this->pcie_cores = {CoreCoord(2 , 0)};
+        }
+        break;
+        default:
+            TT_THROW("Need to update PCIe core assignment for new Blackhole type, file issue to abhullar");
+    }
+}
+
 // UMD initializes and owns tt_SocDescriptor
 // For architectures with translation tables enabled, UMD will remove the last x rows from the descriptors in
 // tt_SocDescriptor (workers list and worker_log_to_routing_x/y maps) This creates a virtual coordinate system, where
@@ -369,10 +388,11 @@ void metal_SocDescriptor::generate_physical_routing_to_profiler_flat_id() {
 // removing the harvested physical coordiniates Metal needs the true harvesting state so we generate physical
 // descriptors from virtual coordinates We also initialize additional lookup tables to translate physical coordinates to
 // virtual coordinates because UMD APIs expect virtual coordinates.
-metal_SocDescriptor::metal_SocDescriptor(const tt_SocDescriptor& other, uint32_t harvesting_mask) :
+metal_SocDescriptor::metal_SocDescriptor(const tt_SocDescriptor& other, uint32_t harvesting_mask, const BoardType &board_type) :
     tt_SocDescriptor(other) {
     this->generate_physical_descriptors_from_virtual(harvesting_mask);
     this->load_dram_metadata_from_device_descriptor();
     this->generate_logical_eth_coords_mapping();
     this->generate_physical_routing_to_profiler_flat_id();
+    this->update_pcie_cores(board_type);
 }
diff --git a/tt_metal/common/metal_soc_descriptor.h b/tt_metal/common/metal_soc_descriptor.h
index 3ff1079af42..cca59dd4e5f 100644
--- a/tt_metal/common/metal_soc_descriptor.h
+++ b/tt_metal/common/metal_soc_descriptor.h
@@ -7,6 +7,7 @@
 #include "common/tt_backend_api_types.hpp"
 #include "core_coord.hpp"
 #include "third_party/umd/device/tt_soc_descriptor.h"
+#include "third_party/umd/device/tt_cluster_descriptor.h"
 
 //! tt_SocDescriptor contains information regarding the SOC configuration targetted.
 /*!
@@ -37,7 +38,7 @@ struct metal_SocDescriptor : public tt_SocDescriptor {
     std::map<CoreCoord, int> logical_eth_core_to_chan_map;
     std::map<int, CoreCoord> chan_to_logical_eth_core_map;
 
-    metal_SocDescriptor(const tt_SocDescriptor& other, uint32_t harvesting_mask);
+    metal_SocDescriptor(const tt_SocDescriptor& other, uint32_t harvesting_mask, const BoardType &board_type);
     metal_SocDescriptor() = default;
 
     CoreCoord get_preferred_worker_core_for_dram_channel(int dram_chan) const;
@@ -73,4 +74,7 @@ struct metal_SocDescriptor : public tt_SocDescriptor {
     void load_dram_metadata_from_device_descriptor();
     void generate_logical_eth_coords_mapping();
     void generate_physical_routing_to_profiler_flat_id();
+    // This is temporary until virtual coordinates are enabled because BH chips on
+    //  different cards use different physical PCIe NoC endpoints
+    void update_pcie_cores(const BoardType &board_type);
 };
diff --git a/tt_metal/hw/inc/blackhole/noc/noc_parameters.h b/tt_metal/hw/inc/blackhole/noc/noc_parameters.h
index 0603066257d..11fde1045cf 100644
--- a/tt_metal/hw/inc/blackhole/noc/noc_parameters.h
+++ b/tt_metal/hw/inc/blackhole/noc/noc_parameters.h
@@ -342,13 +342,6 @@
    (((uint32_t)(y_end  )) << (1*NOC_ADDR_NODE_ID_BITS)) |   \
     ((uint32_t)(x_end  )))
 
-
-#define PCIE_NOC_X 11
-#define PCIE_NOC_Y 0
-
-#define PCIE_NOC1_X 5
-#define PCIE_NOC1_Y 11
-
 // BH has 64 bit address space but pipegen was not updated to support this so WH scheme of encoding addresses is used (36 bits of address followed by coordinates)
 // This means that lo and mid registers need to have the address portion while the coordinates go into hi register
 #define NOC_COORD_REG_OFFSET 0 // offset (from LSB) in register holding x-y coordinate
diff --git a/tt_metal/hw/inc/grayskull/noc/noc_parameters.h b/tt_metal/hw/inc/grayskull/noc/noc_parameters.h
index 59ab2b932cf..ad3d95e2abb 100644
--- a/tt_metal/hw/inc/grayskull/noc/noc_parameters.h
+++ b/tt_metal/hw/inc/grayskull/noc/noc_parameters.h
@@ -246,11 +246,6 @@
    (((uint64_t)(y_end))   << (NOC_ADDR_LOCAL_BITS+NOC_ADDR_NODE_ID_BITS)) |     \
    ((uint64_t)(addr)))
 
-
-
-#define PCIE_NOC_X 0
-#define PCIE_NOC_Y 4
-
 // GS address encoding is 32 bits of address followed by coordinate. First address goes into lo register, coordinates are in the mid register
 #define NOC_COORD_REG_OFFSET 0 // offset (from LSB) in register holding x-y coordinate
 
diff --git a/tt_metal/hw/inc/wormhole/noc/noc_parameters.h b/tt_metal/hw/inc/wormhole/noc/noc_parameters.h
index 87c9ca24415..1703ff54fbf 100644
--- a/tt_metal/hw/inc/wormhole/noc/noc_parameters.h
+++ b/tt_metal/hw/inc/wormhole/noc/noc_parameters.h
@@ -257,12 +257,6 @@
    (((uint64_t)(y_end))   << (NOC_ADDR_LOCAL_BITS+NOC_ADDR_NODE_ID_BITS)) |     \
    ((uint64_t)(addr)))
 
-#define PCIE_NOC_X 0
-#define PCIE_NOC_Y 3
-
-#define PCIE_NOC1_X 9
-#define PCIE_NOC1_Y 8
-
 // 36 bits of address followed by coordinate. First 32 bits of address go into lo register, remaining address bits and coordinates are in the mid register
 #define NOC_COORD_REG_OFFSET 4 // offset (from LSB) in register holding x-y coordinate
 
diff --git a/tt_metal/impl/device/device.cpp b/tt_metal/impl/device/device.cpp
index fba7276a1c0..33dbe2b4547 100644
--- a/tt_metal/impl/device/device.cpp
+++ b/tt_metal/impl/device/device.cpp
@@ -297,6 +297,16 @@ void Device::initialize_device_kernel_defines()
     } else {
         this->device_kernel_defines_.emplace("IS_NOT_POW2_NUM_L1_BANKS", "1");
     }
+
+    // TODO (abhullar): Until we switch to virtual coordinates, we need to pass physical PCIe coordinates to device
+    //  because Blackhole PCIe endpoint is dependent on board type
+    const metal_SocDescriptor& soc_d = tt::Cluster::instance().get_soc_desc(this->id());
+    auto pcie_cores = soc_d.get_pcie_cores();
+    auto grid_size = this->grid_size();
+    this->device_kernel_defines_.emplace("PCIE_NOC_X", std::to_string(pcie_cores[0].x));
+    this->device_kernel_defines_.emplace("PCIE_NOC_Y", std::to_string(pcie_cores[0].y));
+    this->device_kernel_defines_.emplace("PCIE_NOC1_X", std::to_string(NOC_0_X(NOC::NOC_1, grid_size.x, pcie_cores[0].x)));
+    this->device_kernel_defines_.emplace("PCIE_NOC1_Y", std::to_string(NOC_0_X(NOC::NOC_1, grid_size.x, pcie_cores[0].y)));
 }
 
 void Device::initialize_build() {
@@ -3401,8 +3411,6 @@ void Device::MarkAllocationsSafe() {
 
 void Device::generate_device_headers(const std::string &path) const
 {
-
-    // Basic Allocator generates number of banks which may not be power of 2, so we could just pad and alias for now
     const size_t num_dram_banks = this->num_banks(BufferType::DRAM);
     const size_t num_dram_banks_pow2 = std::pow(2, std::ceil(std::log2(num_dram_banks)));
     std::vector<CoreCoord> dram_noc_coord_per_bank(num_dram_banks);
@@ -3411,7 +3419,7 @@ void Device::generate_device_headers(const std::string &path) const
         dram_noc_coord_per_bank[bank_id] = this->dram_core_from_dram_channel(this->dram_channel_from_bank_id(bank_id));
         dram_offsets_per_bank[bank_id] = this->bank_offset(BufferType::DRAM, bank_id);
     }
-    const size_t num_l1_banks = this->num_banks(BufferType::L1); // 128
+    const size_t num_l1_banks = this->num_banks(BufferType::L1);
     const size_t num_l1_banks_pow2 = std::pow(2, std::ceil(std::log2(num_l1_banks)));
     std::vector<CoreCoord> l1_noc_coord_per_bank(num_l1_banks);
     std::vector<int32_t> l1_offset_per_bank(num_l1_banks);
diff --git a/tt_metal/jit_build/genfiles.cpp b/tt_metal/jit_build/genfiles.cpp
index de1b29d8257..a21211cbc0a 100644
--- a/tt_metal/jit_build/genfiles.cpp
+++ b/tt_metal/jit_build/genfiles.cpp
@@ -667,19 +667,4 @@ void jit_build_genfiles_bank_to_noc_coord_descriptor(
     file_stream_siec.close();
 }
 
-static string generate_noc_core_xy_range_define(const std::vector<CoreCoord>& cores) {
-    stringstream ss;
-
-    string end_of_line = " \\\n    ( \\";
-    for (const auto& core : cores) {
-        ss << end_of_line << endl;
-        ss << "    ((x) == NOC_0_X(noc_idx, noc_size_x, (uint32_t)" << core.x
-           << ") && (y) == NOC_0_Y(noc_idx, noc_size_y, (uint32_t)" << core.y << "))";
-        end_of_line = " || \\";
-    }
-    ss << ")" << endl;
-
-    return ss.str();
-}
-
 }  // namespace tt::tt_metal
diff --git a/tt_metal/llrt/tt_cluster.cpp b/tt_metal/llrt/tt_cluster.cpp
index 715ab3c974e..16b28d3f3b8 100644
--- a/tt_metal/llrt/tt_cluster.cpp
+++ b/tt_metal/llrt/tt_cluster.cpp
@@ -137,20 +137,16 @@ BoardType Cluster::get_board_type(chip_id_t chip_id) const {
 }
 
 void Cluster::generate_cluster_descriptor() {
-    this->cluster_desc_path_ = (this->target_type_ == TargetDevice::Silicon and this->arch_ == tt::ARCH::WORMHOLE_B0)
+    this->cluster_desc_path_ = (this->target_type_ == TargetDevice::Silicon)
                                    ? tt_ClusterDescriptor::get_cluster_descriptor_file_path()
                                    : "";
 
     // Cluster descriptor yaml not available for Blackhole bring up
-    if (this->arch_ == tt::ARCH::GRAYSKULL or this->arch_ == tt::ARCH::BLACKHOLE or this->target_type_ == TargetDevice::Simulator) {
+    if (this->target_type_ == TargetDevice::Simulator) {
         // Cannot use tt_SiliconDevice::detect_available_device_ids because that returns physical device IDs
         std::vector<chip_id_t> physical_mmio_device_ids;
         std::set<chip_id_t> logical_mmio_device_ids;
-        if (this->target_type_ == TargetDevice::Simulator) {
-            physical_mmio_device_ids = tt_SimulationDevice::detect_available_device_ids();
-        } else{
-            physical_mmio_device_ids = tt_SiliconDevice::detect_available_device_ids();
-        }
+        physical_mmio_device_ids = tt_SimulationDevice::detect_available_device_ids();
         for (chip_id_t logical_mmio_device_id = 0; logical_mmio_device_id < physical_mmio_device_ids.size();
              logical_mmio_device_id++) {
             logical_mmio_device_ids.insert(logical_mmio_device_id);
@@ -239,7 +235,7 @@ void Cluster::get_metal_desc_from_tt_desc(
     const std::unordered_map<chip_id_t, uint32_t> &per_chip_id_harvesting_masks) {
     for (const auto it : input) {
         chip_id_t id = it.first;
-        this->sdesc_per_chip_.emplace(id, metal_SocDescriptor(it.second, per_chip_id_harvesting_masks.at(id)));
+        this->sdesc_per_chip_.emplace(id, metal_SocDescriptor(it.second, per_chip_id_harvesting_masks.at(id), this->cluster_desc_->get_board_type(id)));
     }
 }
 
@@ -942,6 +938,9 @@ uint32_t Cluster::get_mmio_device_max_tunnel_depth(chip_id_t mmio_device) const
     uint32_t depth = 0;
     for (const auto &[assoc_mmio_device, devices] : this->devices_grouped_by_assoc_mmio_device_) {
         for (const auto &chip_id : devices) {
+            if (chip_id == assoc_mmio_device) {
+                continue;
+            }
             depth =
                 std::max(depth, uint32_t(this->cluster_desc_->get_ethernet_link_distance(chip_id, assoc_mmio_device)));
         }
@@ -960,7 +959,8 @@ uint32_t Cluster::get_mmio_device_tunnel_count(chip_id_t mmio_device) const {
 }
 
 uint32_t Cluster::get_device_tunnel_depth(chip_id_t chip_id) const {
-    return this->cluster_desc_->get_ethernet_link_distance(chip_id, this->get_associated_mmio_device(chip_id));
+    chip_id_t mmio_device_id = this->get_associated_mmio_device(chip_id);
+    return (mmio_device_id == chip_id) ? 0 : this->cluster_desc_->get_ethernet_link_distance(chip_id, mmio_device_id);
 }
 
 }  // namespace tt
diff --git a/tt_metal/soc_descriptors/blackhole_140_arch.yaml b/tt_metal/soc_descriptors/blackhole_140_arch.yaml
index f2f56ccd0c3..a9d4246d8a3 100644
--- a/tt_metal/soc_descriptors/blackhole_140_arch.yaml
+++ b/tt_metal/soc_descriptors/blackhole_140_arch.yaml
@@ -10,7 +10,7 @@ arc:
   [ 8-0 ]
 
 pcie:
-  [ 11-0 ]
+  [ 2-0, 11-0 ]
 
 dram:
   [
@@ -57,7 +57,7 @@ harvested_workers:
 
 router_only:
   [
-   1-0, 2-0, 3-0, 4-0, 5-0, 6-0, 7-0, 10-0, 12-0, 13-0, 14-0, 15-0, 16-0,
+   1-0, 3-0, 4-0, 5-0, 6-0, 7-0, 10-0, 12-0, 13-0, 14-0, 15-0, 16-0,
    8-1, 8-2, 8-3, 8-4, 8-5, 8-6, 8-7, 8-8, 8-9, 8-10, 8-11
   ]
 

From b324bece9f67e1e601bec8d566403cc17a31aa6d Mon Sep 17 00:00:00 2001
From: Almeet Bhullar <abhullar@tenstorrent.com>
Date: Tue, 12 Nov 2024 19:54:11 +0000
Subject: [PATCH 28/69] Add cluster desc yaml to gitignore - UMD will be
 updating this so it isn't exposed to client

---
 .gitignore | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.gitignore b/.gitignore
index 14d54714f2b..b0967985a2e 100644
--- a/.gitignore
+++ b/.gitignore
@@ -23,6 +23,7 @@ coremodel/model/release/
 
 pipegen.yaml
 device_desc.yaml
+cluster_descriptor.yaml
 .umd/
 /clean
 *coverage.txt

From 4efc0a270d5a9385349c4fcfce1a8b66d156ae15 Mon Sep 17 00:00:00 2001
From: Radomir Djogo <159184120+rdjogoTT@users.noreply.github.com>
Date: Wed, 13 Nov 2024 14:07:58 -0500
Subject: [PATCH 29/69] Increase packer precision for bfp8 formats (#14822)

### Ticket
[Link to Github
Issue](https://github.com/tenstorrent/tt-metal/issues/14032)

### Problem description
bfp8 packing is inaccurate when pack_src_format is also bpf8, since this
results in double rounding in the HW. First the gasket rounds to 7 bits,
then rounding occurs again when the mantissas are being shifted in order
to have common exponent.

### What's changed
Add a flag to compute config called `bfp_pack_precise` which toggles the
pack_src_format to either fp16 or fp32 (depending on fp32_mode_en) in
order to get more accurate output. This however will half the packer
bandwidth in the case of fp16, and reduce it to one quarter in the case
of fp32.
---
 .../eltwise/test_eltwise_typecast.py          | 170 ++++++++++++++++++
 tt_metal/impl/kernels/kernel.cpp              |   1 +
 tt_metal/impl/kernels/kernel_types.hpp        |   1 +
 tt_metal/jit_build/data_format.cpp            |  12 +-
 tt_metal/jit_build/data_format.hpp            |   2 +-
 tt_metal/jit_build/genfiles.cpp               |   5 +-
 tt_metal/jit_build/settings.cpp               |   3 +-
 tt_metal/jit_build/settings.hpp               |   1 +
 ttnn/cpp/ttnn/operations/copy.hpp             |  11 +-
 .../unary/device/unary_device_operation.cpp   |   2 +
 .../unary/device/unary_device_operation.hpp   |   1 +
 .../device/unary_device_operation_types.hpp   |   1 +
 .../unary/device/unary_program_factory.cpp    |   2 +
 .../device/unary_sharded_program_factory.cpp  |   1 +
 .../ttnn/operations/eltwise/unary/unary.cpp   |   3 +-
 15 files changed, 201 insertions(+), 15 deletions(-)

diff --git a/tests/ttnn/unit_tests/operations/eltwise/test_eltwise_typecast.py b/tests/ttnn/unit_tests/operations/eltwise/test_eltwise_typecast.py
index 287a30c7900..bc37a109a2e 100644
--- a/tests/ttnn/unit_tests/operations/eltwise/test_eltwise_typecast.py
+++ b/tests/ttnn/unit_tests/operations/eltwise/test_eltwise_typecast.py
@@ -22,6 +22,12 @@
     ttnn.L1_MEMORY_CONFIG,
 ]
 
+TILE_HEIGHT = 32
+TILE_WIDTH = 32
+
+cpu_layout = ttnn.Layout.ROW_MAJOR
+npu_layout = ttnn.Layout.TILE
+
 
 @pytest.mark.parametrize(
     "pt_input_dtype, tt_input_dtype, tt_output_dtype",
@@ -98,3 +104,167 @@ def test_run_eltwise_typecast_op(
             device,
             test_args,
         )
+
+
+@skip_for_grayskull("Op not supported for Grayskull, supported for wormhole_b0")
+def test_typecast_bf16_to_bfp8_b(device):
+    torch.manual_seed(0)
+    shape = [32, 32]
+
+    # bf16 --> bfp8_b by cpu.
+    torch_bf16 = torch.randn(shape, dtype=torch.bfloat16)
+    bfp8_b_by_cpu = ttnn.Tensor(torch_bf16, ttnn.bfloat8_b).to(npu_layout)
+    cpu_version = bfp8_b_by_cpu.to(cpu_layout).to_torch()
+
+    # bf16 --> bfp8_b by npu
+    tt_bf16 = ttnn.Tensor(torch_bf16, ttnn.bfloat16).to(npu_layout).to(device)
+    bfp8_b_by_npu = ttnn.typecast(tt_bf16, ttnn.bfloat8_b)
+    npu_version = bfp8_b_by_npu.cpu().to(cpu_layout).to_torch()
+
+    passed = torch.equal(cpu_version, npu_version)
+    # print(cpu_version[0, 0:16])
+    # print(npu_version[0, 0:16])
+    assert passed
+
+
+def print_mismatches(cpu, npu, num_max_print):
+    different_indices = (cpu != npu).nonzero(as_tuple=True)
+    count = 0
+    for idx in zip(*different_indices):
+        count = count + 1
+        print(f"idx={idx} cpu={cpu[idx]} npu={npu[idx]}")
+        if count > num_max_print:
+            break
+
+
+@pytest.mark.parametrize("seed", [0, 2, 4, 6, 8])
+@pytest.mark.parametrize("scale", [1, 2, 4, 8, 16, 32, 64, 128, 256, 512])
+@pytest.mark.parametrize("bias", [0, 1, 2, 4, 8, 16, 32, 64, 128])
+@skip_for_grayskull("Op not supported for Grayskull, supported for wormhole_b0")
+def test_typecast_bf16_to_bfp8_b_various_input(seed, scale, bias, device):
+    torch.manual_seed(seed)
+    shape = [1024, 1024]
+
+    bias = bias
+    low = bias - scale
+    high = bias + scale
+    torch_bf16 = random_tensor = torch.empty(shape).uniform_(low, high).to(torch.bfloat16)
+
+    random_signs = torch.randint(0, 2, shape) * 2 - 1
+    torch_bf16 = torch_bf16 * random_signs
+
+    # bf16 --> bfp8_b by cpu.
+    bfp8_b_by_cpu = ttnn.Tensor(torch_bf16, ttnn.bfloat8_b).to(npu_layout)
+    cpu_version = bfp8_b_by_cpu.to(cpu_layout).to_torch()
+
+    # bf16 --> bfp8_b by npu
+    tt_bf16 = ttnn.Tensor(torch_bf16, ttnn.bfloat16).to(npu_layout).to(device)
+    bfp8_b_by_npu = ttnn.typecast(tt_bf16, ttnn.bfloat8_b)
+    npu_version = bfp8_b_by_npu.cpu().to(cpu_layout).to_torch()
+
+    passed = torch.equal(cpu_version, npu_version)
+    if not passed:
+        print_mismatches(cpu_version, npu_version, 16)
+    assert passed
+
+
+@pytest.mark.parametrize("seed", [0])
+@pytest.mark.parametrize("scale", [4])
+@pytest.mark.parametrize("bias", [2])
+# NaN becomes -Inf when converted to bfloat8_b format, skip testing
+@pytest.mark.parametrize("insert_inf, insert_nan", [[True, False]])  # , [False, True], [True, True]])
+@skip_for_grayskull("Op not supported for Grayskull, supported for wormhole_b0")
+def test_typecast_bf16_to_bfp8_b_with_inf_nan(seed, scale, bias, insert_inf, insert_nan, device):
+    torch.manual_seed(seed)
+    shape = [1024, 1024]
+
+    bias = bias
+    low = bias - scale
+    high = bias + scale
+
+    torch_bf16 = random_tensor = torch.empty(shape).uniform_(low, high).to(torch.bfloat16)
+    if insert_inf:
+        num_inf = torch_bf16.numel() // 8  # 16 elements are pcked into
+        inf_indices = torch.randint(0, torch_bf16.numel(), (num_inf,))
+        torch_bf16.view(-1)[inf_indices] = float("inf")
+    if insert_nan:
+        num_nan = torch_bf16.numel() // 8
+        nan_indices = torch.randint(0, torch_bf16.numel(), (num_nan,))
+        torch_bf16.view(-1)[nan_indices] = float("nan")
+    random_signs = torch.randint(0, 2, shape) * 2 - 1
+    torch_bf16 = torch_bf16 * random_signs
+
+    # bf16 --> bfp8_b by cpu.
+    bfp8_b_by_cpu = ttnn.Tensor(torch_bf16, ttnn.bfloat8_b).to(npu_layout)
+    cpu_version = bfp8_b_by_cpu.to(cpu_layout).to_torch()
+
+    # bf16 --> bfp8_b by npu
+    tt_bf16 = ttnn.Tensor(torch_bf16, ttnn.bfloat16).to(npu_layout).to(device)
+    bfp8_b_by_npu = ttnn.typecast(tt_bf16, ttnn.bfloat8_b)
+    npu_version = bfp8_b_by_npu.cpu().to(cpu_layout).to_torch()
+
+    passed = torch.equal(cpu_version, npu_version)
+    if not passed:
+        print_mismatches(cpu_version, npu_version, 16)
+    assert passed
+
+
+@skip_for_grayskull("Op not supported for Grayskull, supported for wormhole_b0")
+def test_typecast_bfp8_b_to_bf16(device):
+    torch.manual_seed(0)
+    shape = [1024, 1024]
+
+    # bfp8_b --> bf16 by cpu.
+    torch_bf16 = torch.randn(shape, dtype=torch.bfloat16)
+    bfp8_b = ttnn.Tensor(torch_bf16, ttnn.bfloat8_b).to(npu_layout)
+    cpu_version = bfp8_b.to(cpu_layout).to_torch()
+
+    # bfp8_b --> bf16 by npu.
+    bf16_by_npu = ttnn.typecast(bfp8_b.to(device), ttnn.bfloat16)
+    npu_version = bf16_by_npu.cpu().to(cpu_layout).to_torch()
+
+    passed = torch.equal(cpu_version, npu_version)
+    # print(cpu_version[0, 0:16])
+    # print(npu_version[0, 0:16])
+    assert passed
+
+
+@skip_for_grayskull("Op not supported for Grayskull, supported for wormhole_b0")
+def test_typecast_fp32_to_bfp8_b(device):
+    torch.manual_seed(0)
+    shape = [32, 32]
+
+    # fp32 --> bfp8_b by cpu.
+    torch_fp32 = torch.randn(shape, dtype=torch.float32)
+    bfp8_b_by_cpu = ttnn.Tensor(torch_fp32, ttnn.bfloat8_b).to(npu_layout)
+    cpu_version = bfp8_b_by_cpu.to(cpu_layout).to_torch()
+
+    # fp32 --> bfp8_b by npu
+    tt_fp32 = ttnn.Tensor(torch_fp32, ttnn.float32).to(npu_layout).to(device)
+    bfp8_b_by_npu = ttnn.typecast(tt_fp32, ttnn.bfloat8_b)
+    npu_version = bfp8_b_by_npu.cpu().to(cpu_layout).to_torch()
+
+    passed = torch.equal(cpu_version, npu_version)
+    # print(cpu_version[0, 0:16])
+    # print(npu_version[0, 0:16])
+    assert passed
+
+
+@skip_for_grayskull("Op not supported for Grayskull, supported for wormhole_b0")
+def test_typecast_bfp8_b_to_fp32(device):
+    torch.manual_seed(0)
+    shape = [1024, 1024]
+
+    # bfp8_b --> fp32 by cpu.
+    torch_fp32 = torch.randn(shape, dtype=torch.float32)
+    bfp8_b = ttnn.Tensor(torch_fp32, ttnn.bfloat8_b).to(npu_layout)
+    cpu_version = bfp8_b.to(cpu_layout).to_torch()
+
+    # bfp8_b --> fp32 by npu.
+    fp32_by_npu = ttnn.typecast(bfp8_b.to(device), ttnn.float32)
+    npu_version = fp32_by_npu.cpu().to(cpu_layout).to_torch()
+
+    passed = torch.equal(cpu_version, npu_version)
+    # print(cpu_version[0, 0:16])
+    # print(npu_version[0, 0:16])
+    assert passed
diff --git a/tt_metal/impl/kernels/kernel.cpp b/tt_metal/impl/kernels/kernel.cpp
index 5400c40edc0..3cc42624190 100644
--- a/tt_metal/impl/kernels/kernel.cpp
+++ b/tt_metal/impl/kernels/kernel.cpp
@@ -316,6 +316,7 @@ void ComputeKernel::set_build_options(JitBuildOptions &build_options) const {
     build_options.fp32_dest_acc_en = this->config_.fp32_dest_acc_en;
     build_options.dst_full_sync_en = this->config_.dst_full_sync_en;
     build_options.unpack_to_dest_mode = this->config_.unpack_to_dest_mode;
+    build_options.bfp8_pack_precise = this->config_.bfp8_pack_precise;
 }
 
 void DataMovementKernel::generate_binaries(Device *device, JitBuildOptions &build_options) const {
diff --git a/tt_metal/impl/kernels/kernel_types.hpp b/tt_metal/impl/kernels/kernel_types.hpp
index f41cc7ebf37..3023ed589b9 100644
--- a/tt_metal/impl/kernels/kernel_types.hpp
+++ b/tt_metal/impl/kernels/kernel_types.hpp
@@ -53,6 +53,7 @@ struct ComputeConfig {
     bool fp32_dest_acc_en = false;
     bool dst_full_sync_en = false;
     std::vector<UnpackToDestMode> unpack_to_dest_mode;
+    bool bfp8_pack_precise = false;
     bool math_approx_mode = false;
     std::vector<uint32_t> compile_args;
     // Will cause CompileProgram to emit a file hlk_defines_generated.h
diff --git a/tt_metal/jit_build/data_format.cpp b/tt_metal/jit_build/data_format.cpp
index 367d727073d..2bf3fa9fd0c 100644
--- a/tt_metal/jit_build/data_format.cpp
+++ b/tt_metal/jit_build/data_format.cpp
@@ -303,6 +303,7 @@ const DataFormat get_single_pack_src_format(
     DataFormat output_format,
     DataFormat unpack_conditional_dst_format,
     bool fp32_dest_acc_en,
+    bool bfp8_pack_precise,
     bool int_fpu_en,
     tt::ARCH arch) {
 
@@ -334,7 +335,7 @@ const DataFormat get_single_pack_src_format(
         TT_FATAL(arch != tt::ARCH::GRAYSKULL, "Dest Fp32 mode is not supported for arch grayskull");
 
         if (is_bfp_format(output_format)) {
-            pack_src_format = DataFormat::Bfp8_b;
+            pack_src_format = bfp8_pack_precise ? DataFormat::Float32 : (is_exp_b_format(output_format) ? DataFormat::Bfp8_b : DataFormat::Bfp8);
         } else if(is_exp_b_format(output_format) || (output_format == DataFormat::Float32)) {
             pack_src_format = output_format;
         } else if(output_format == DataFormat::Float16){
@@ -374,7 +375,7 @@ const DataFormat get_single_pack_src_format(
             }
             pack_src_format = unpack_conditional_dst_format;
         } else if (is_bfp_format(output_format)) {
-            pack_src_format = is_exp_b_format(output_format) ? DataFormat::Bfp8_b : DataFormat::Bfp8;
+            pack_src_format = bfp8_pack_precise ? (is_exp_b_format(output_format) ? DataFormat::Float16_b : DataFormat::Float16) : (is_exp_b_format(output_format) ? DataFormat::Bfp8_b : DataFormat::Bfp8);
         } else {
             pack_src_format = output_format;
         }
@@ -390,7 +391,7 @@ const DataFormat get_single_pack_src_format(
         DataFormat pack_src_format_tmp = output_format;
 
         if (is_bfp_format(output_format)) {
-            pack_src_format_tmp = is_exp_b_format(output_format) ? DataFormat::Bfp8_b : DataFormat::Bfp8;
+            pack_src_format_tmp = bfp8_pack_precise ? (is_exp_b_format(output_format) ? DataFormat::Float16_b : DataFormat::Float16) : (is_exp_b_format(output_format) ? DataFormat::Bfp8_b : DataFormat::Bfp8);
         }
 
         if (pack_src_format_tmp != DataFormat::Float32) {
@@ -413,6 +414,7 @@ std::vector<DataFormat> get_pack_src_formats(
     DataFormat output_formats[NUM_OPERANDS],
     DataFormat unpack_conditional_dst_format,
     bool fp32_dest_acc_en,
+    bool bfp8_pack_precise,
     bool int_fpu_en,
     tt::ARCH arch
 ) {
@@ -421,14 +423,14 @@ std::vector<DataFormat> get_pack_src_formats(
     std::vector<DataFormat> pack_src_formats;
     DataFormat pack_src_format;
     for (int i = 0; i < NUM_OPERANDS; i++) {
-        pack_src_format = get_single_pack_src_format(input_formats[i], pack_output_format, unpack_conditional_dst_format, fp32_dest_acc_en, int_fpu_en, arch);
+        pack_src_format = get_single_pack_src_format(input_formats[i], pack_output_format, unpack_conditional_dst_format, fp32_dest_acc_en, bfp8_pack_precise, int_fpu_en, arch);
         pack_src_formats.push_back(pack_src_format);
     }
 
     // Intermediates
     for (int i = 0; i < NUM_OPERANDS; i++) {
         //Intermediates can be inputs & outputs to same op, provide same format per operand id
-        pack_src_format = get_single_pack_src_format(intermed_formats[i], intermed_formats[i], unpack_conditional_dst_format, fp32_dest_acc_en, int_fpu_en, arch);
+        pack_src_format = get_single_pack_src_format(intermed_formats[i], intermed_formats[i], unpack_conditional_dst_format, fp32_dest_acc_en, bfp8_pack_precise, int_fpu_en, arch);
         pack_src_formats.push_back(pack_src_format);
     }
     return pack_src_formats;
diff --git a/tt_metal/jit_build/data_format.hpp b/tt_metal/jit_build/data_format.hpp
index c4ab84f2679..ed1e718c61b 100644
--- a/tt_metal/jit_build/data_format.hpp
+++ b/tt_metal/jit_build/data_format.hpp
@@ -62,7 +62,7 @@ const DataFormat get_single_pack_src_format(DataFormat input_format, DataFormat
 
 std::vector<DataFormat> get_unpack_src_formats(DataFormat input_formats[NUM_OPERANDS], DataFormat param_formats[NUM_OPERANDS], DataFormat intermed_formats[NUM_OPERANDS]);
 std::vector<DataFormat> get_unpack_dst_formats(DataFormat input_formats[NUM_OPERANDS], DataFormat param_formats[NUM_OPERANDS], DataFormat intermed_formats[NUM_OPERANDS], DataFormat output_formats[NUM_OPERANDS], DataFormat unpack_conditional_dst_format, bool fp32_dest_acc_en, std::vector<UnpackToDestMode> unpack_to_dest_mode, bool int_fpu_en = false);
-std::vector<DataFormat> get_pack_src_formats(DataFormat input_formats[NUM_OPERANDS], DataFormat param_formats[NUM_OPERANDS], DataFormat intermed_formats[NUM_OPERANDS], DataFormat output_formats[NUM_OPERANDS], DataFormat unpack_conditional_dst_format, bool fp32_dest_acc_en, bool int_fpu_en = false, tt::ARCH arch = tt::ARCH::GRAYSKULL);
+std::vector<DataFormat> get_pack_src_formats(DataFormat input_formats[NUM_OPERANDS], DataFormat param_formats[NUM_OPERANDS], DataFormat intermed_formats[NUM_OPERANDS], DataFormat output_formats[NUM_OPERANDS], DataFormat unpack_conditional_dst_format, bool fp32_dest_acc_en, bool bfp8_pack_precise, bool int_fpu_en = false, tt::ARCH arch = tt::ARCH::GRAYSKULL);
 std::vector<DataFormat> get_pack_dst_formats(DataFormat input_formats[NUM_OPERANDS], DataFormat param_formats[NUM_OPERANDS], DataFormat intermed_formats[NUM_OPERANDS], DataFormat output_formats[NUM_OPERANDS]);
 
 }
diff --git a/tt_metal/jit_build/genfiles.cpp b/tt_metal/jit_build/genfiles.cpp
index a21211cbc0a..cc8aee4e951 100644
--- a/tt_metal/jit_build/genfiles.cpp
+++ b/tt_metal/jit_build/genfiles.cpp
@@ -272,7 +272,7 @@ static void emit_unpack_data_formats(
 }
 
 static std::pair<std::vector<DataFormat>, std::vector<DataFormat>> generate_pack_data_formats(
-    tt_hlk_desc& desc, DataFormat unpack_conditional_dst_format, bool fp32_dest_acc_en, const tt::ARCH arch) {
+    tt_hlk_desc& desc, DataFormat unpack_conditional_dst_format, bool fp32_dest_acc_en, bool bfp8_pack_precise, const tt::ARCH arch) {
     vector<DataFormat> src_formats = tt::get_pack_src_formats(
         desc.input_buf_dataformat_arr,
         desc.param_buf_dataformat_arr,
@@ -280,6 +280,7 @@ static std::pair<std::vector<DataFormat>, std::vector<DataFormat>> generate_pack
         desc.output_buf_dataformat_arr,
         unpack_conditional_dst_format,
         fp32_dest_acc_en,
+        bfp8_pack_precise,
         false,
         arch);
 
@@ -399,7 +400,7 @@ static void generate_data_format_descriptors(JitBuildOptions& options, const tt:
 
     vector<DataFormat> pack_src_formats_all_cbs, pack_dst_formats_all_cbs;
     tie(pack_src_formats_all_cbs, pack_dst_formats_all_cbs) =
-        generate_pack_data_formats(desc, unpack_conditional_dst_format, options.fp32_dest_acc_en, arch);
+        generate_pack_data_formats(desc, unpack_conditional_dst_format, options.fp32_dest_acc_en, options.bfp8_pack_precise, arch);
 
     // equalize "upack src" and "pack dst" data format vectors
     // both "unpack src" and "pack dst" refer to data in L1, "unpack src" == L1, and "pack dst" == L1
diff --git a/tt_metal/jit_build/settings.cpp b/tt_metal/jit_build/settings.cpp
index d73874be35b..d5f18dc2036 100644
--- a/tt_metal/jit_build/settings.cpp
+++ b/tt_metal/jit_build/settings.cpp
@@ -12,7 +12,8 @@ namespace tt::tt_metal
 
     JitBuildOptions::JitBuildOptions(const JitBuildEnv& env) :
       build_env(env),
-      fp32_dest_acc_en(false) {}
+      fp32_dest_acc_en(false),
+      bfp8_pack_precise(false) {}
 
     void JitBuildOptions::set_name(const string& n)
     {
diff --git a/tt_metal/jit_build/settings.hpp b/tt_metal/jit_build/settings.hpp
index 363fba4df57..f5cac441b4f 100644
--- a/tt_metal/jit_build/settings.hpp
+++ b/tt_metal/jit_build/settings.hpp
@@ -26,6 +26,7 @@ class JitBuildOptions {
     // We can keep for future WH support, otherwise not used in GS
     bool fp32_dest_acc_en;
     std::vector<UnpackToDestMode> unpack_to_dest_mode;
+    bool bfp8_pack_precise;
 
     bool dst_full_sync_en;
 
diff --git a/ttnn/cpp/ttnn/operations/copy.hpp b/ttnn/cpp/ttnn/operations/copy.hpp
index ab75133a5f3..9991455a4fa 100644
--- a/ttnn/cpp/ttnn/operations/copy.hpp
+++ b/ttnn/cpp/ttnn/operations/copy.hpp
@@ -23,18 +23,19 @@ inline Tensor copy_impl(
     const std::vector<ttnn::operations::unary::UnaryWithParam>& op_chain,
     const std::optional<MemoryConfig>& memory_config = std::nullopt,
     const std::optional<Tensor>& optional_output_tensor = std::nullopt) {
-    DataType output_dtype = (op_chain[0].op_type == ttnn::operations::unary::UnaryOpType::TYPECAST) ? static_cast<DataType>(op_chain[0].params[1]) : input_tensor.get_dtype();
-    bool preserve_fp32_precision = (op_chain[0].op_type == ttnn::operations::unary::UnaryOpType::TYPECAST) and (input_tensor.get_dtype() == DataType::FLOAT32);
+    DataType output_dtype = (op_chain[0].op_type == unary::UnaryOpType::TYPECAST) ? static_cast<DataType>(op_chain[0].params[1]) : input_tensor.get_dtype();
+    auto arch = input_tensor.device()->arch();
+    bool preserve_fp32_precision = (arch != tt::ARCH::GRAYSKULL) and (input_tensor.get_dtype() == DataType::FLOAT32);
     bool fp32_dest_acc_en = preserve_fp32_precision or
                             output_dtype == DataType::UINT32 or
                             output_dtype == DataType::INT32 or
                             output_dtype == DataType::FLOAT32 or
                             input_tensor.get_dtype() == DataType::UINT32 or
-                            input_tensor.get_dtype() == DataType::INT32; // MT: Currently only uint32/int32 is moved to
-                                                                          // DST directly, fp32 is converted to fp16b
+                            input_tensor.get_dtype() == DataType::INT32;
+    bool bfp8_pack_precise = (op_chain[0].op_type == unary::UnaryOpType::TYPECAST && output_dtype == DataType::BFLOAT8_B);
 
     auto output_memory_config = optional_output_tensor.has_value() ? optional_output_tensor.value().memory_config() : memory_config.value_or(input_tensor.memory_config());
-    return prim::unary(queue_id, input_tensor, op_chain, output_dtype, output_memory_config, fp32_dest_acc_en, preserve_fp32_precision, optional_output_tensor);
+    return prim::unary(queue_id, input_tensor, op_chain, output_dtype, output_memory_config, fp32_dest_acc_en, preserve_fp32_precision, bfp8_pack_precise, optional_output_tensor);
 }
 }  // namespace detail
 
diff --git a/ttnn/cpp/ttnn/operations/eltwise/unary/device/unary_device_operation.cpp b/ttnn/cpp/ttnn/operations/eltwise/unary/device/unary_device_operation.cpp
index 255ca459504..179077d0507 100644
--- a/ttnn/cpp/ttnn/operations/eltwise/unary/device/unary_device_operation.cpp
+++ b/ttnn/cpp/ttnn/operations/eltwise/unary/device/unary_device_operation.cpp
@@ -192,6 +192,7 @@ UnaryDeviceOperation::invoke(
     const MemoryConfig& output_memory_config,
     bool fp32_dest_acc_en,
     bool preserve_fp32_precision,
+    bool bfp8_pack_precise,
     const std::optional<Tensor>& preallocated_output) {
     return {
         operation_attributes_t{
@@ -200,6 +201,7 @@ UnaryDeviceOperation::invoke(
             .output_memory_config = output_memory_config,
             .fp32_dest_acc_en = fp32_dest_acc_en,
             .preserve_fp32_precision = preserve_fp32_precision,
+            .bfp8_pack_precise = bfp8_pack_precise,
         },
         tensor_args_t{.input = input, .preallocated_output = preallocated_output}};
 }
diff --git a/ttnn/cpp/ttnn/operations/eltwise/unary/device/unary_device_operation.hpp b/ttnn/cpp/ttnn/operations/eltwise/unary/device/unary_device_operation.hpp
index 30cb9296c91..a8bdafcf64b 100644
--- a/ttnn/cpp/ttnn/operations/eltwise/unary/device/unary_device_operation.hpp
+++ b/ttnn/cpp/ttnn/operations/eltwise/unary/device/unary_device_operation.hpp
@@ -46,6 +46,7 @@ struct UnaryDeviceOperation {
         const MemoryConfig& output_memory_config,
         bool fp32_dest_acc_en,
         bool preserve_fp32_precision,
+        bool bfp8_pack_precise,
         const std::optional<Tensor>& preallocated_output);
 };
 
diff --git a/ttnn/cpp/ttnn/operations/eltwise/unary/device/unary_device_operation_types.hpp b/ttnn/cpp/ttnn/operations/eltwise/unary/device/unary_device_operation_types.hpp
index 95d100a9c85..3c9ce09fb75 100644
--- a/ttnn/cpp/ttnn/operations/eltwise/unary/device/unary_device_operation_types.hpp
+++ b/ttnn/cpp/ttnn/operations/eltwise/unary/device/unary_device_operation_types.hpp
@@ -18,6 +18,7 @@ struct operation_attributes_t {
     const MemoryConfig output_memory_config;
     const bool fp32_dest_acc_en = false;
     const bool preserve_fp32_precision = false;
+    const bool bfp8_pack_precise = false;
 };
 
 struct tensor_args_t {
diff --git a/ttnn/cpp/ttnn/operations/eltwise/unary/device/unary_program_factory.cpp b/ttnn/cpp/ttnn/operations/eltwise/unary/device/unary_program_factory.cpp
index dccef6c39d6..ab8166c1f4c 100644
--- a/ttnn/cpp/ttnn/operations/eltwise/unary/device/unary_program_factory.cpp
+++ b/ttnn/cpp/ttnn/operations/eltwise/unary/device/unary_program_factory.cpp
@@ -101,6 +101,7 @@ UnaryProgramFactory::cached_program_t UnaryProgramFactory::create(
             .math_fidelity = MathFidelity::HiFi4,
             .fp32_dest_acc_en = args.fp32_dest_acc_en,
             .unpack_to_dest_mode = unpack_to_dest_mode,
+            .bfp8_pack_precise = args.bfp8_pack_precise,
             .math_approx_mode = math_approx_mode,
             .compile_args = compute_kernel_args_group_1,
             .defines = unary_defines});
@@ -119,6 +120,7 @@ UnaryProgramFactory::cached_program_t UnaryProgramFactory::create(
                 .math_fidelity = MathFidelity::HiFi4,
                 .fp32_dest_acc_en = args.fp32_dest_acc_en,
                 .unpack_to_dest_mode = unpack_to_dest_mode,
+                .bfp8_pack_precise = args.bfp8_pack_precise,
                 .math_approx_mode = math_approx_mode,
                 .compile_args = compute_kernel_args_group_2,
                 .defines = unary_defines});
diff --git a/ttnn/cpp/ttnn/operations/eltwise/unary/device/unary_sharded_program_factory.cpp b/ttnn/cpp/ttnn/operations/eltwise/unary/device/unary_sharded_program_factory.cpp
index e2f771f37f6..b693504d98a 100644
--- a/ttnn/cpp/ttnn/operations/eltwise/unary/device/unary_sharded_program_factory.cpp
+++ b/ttnn/cpp/ttnn/operations/eltwise/unary/device/unary_sharded_program_factory.cpp
@@ -130,6 +130,7 @@ UnaryShardedProgramFactory::cached_program_t UnaryShardedProgramFactory::create(
             .math_fidelity = MathFidelity::HiFi4,
             .fp32_dest_acc_en = args.fp32_dest_acc_en,
             .unpack_to_dest_mode = unpack_to_dest_mode,
+            .bfp8_pack_precise = args.bfp8_pack_precise,
             .math_approx_mode = math_approx_mode,
             .compile_args = compute_kernel_args_group_1,
             .defines = unary_defines});
diff --git a/ttnn/cpp/ttnn/operations/eltwise/unary/unary.cpp b/ttnn/cpp/ttnn/operations/eltwise/unary/unary.cpp
index 7a40003fa52..e68ec9535d6 100644
--- a/ttnn/cpp/ttnn/operations/eltwise/unary/unary.cpp
+++ b/ttnn/cpp/ttnn/operations/eltwise/unary/unary.cpp
@@ -31,9 +31,10 @@ inline Tensor unary_impl(
                             output_dtype == DataType::FLOAT32 or
                             input_tensor.get_dtype() == DataType::UINT32 or
                             input_tensor.get_dtype() == DataType::INT32;
+    bool bfp8_pack_precise = (op_chain[0].op_type == UnaryOpType::TYPECAST && output_dtype == DataType::BFLOAT8_B);
 
     auto output_memory_config = optional_output_tensor.has_value() ? optional_output_tensor.value().memory_config() : memory_config.value_or(input_tensor.memory_config());
-    return prim::unary(queue_id, input_tensor, op_chain, output_dtype, output_memory_config, fp32_dest_acc_en, preserve_fp32_precision, optional_output_tensor);
+    return prim::unary(queue_id, input_tensor, op_chain, output_dtype, output_memory_config, fp32_dest_acc_en, preserve_fp32_precision, bfp8_pack_precise, optional_output_tensor);
 }
 
 }  // namespace detail

From fb36091bff705eee5e2ea5b0f48b2e7babd482f5 Mon Sep 17 00:00:00 2001
From: Michael Chiou <156848643+ttmchiou@users.noreply.github.com>
Date: Wed, 13 Nov 2024 11:07:16 -0800
Subject: [PATCH 30/69] Revert "Angle op fix (#14129)"

This reverts commit eedfd3847212d9f4f97fee7b5bf065bc6352bc38.
---
 .../sweeps/eltwise/unary_complex/angle/angle.py     | 13 ++-----------
 .../eltwise/unary_complex/angle_bw/angle_bw.py      |  2 +-
 .../sweeps/eltwise/unary_complex/polar/polar.py     |  2 +-
 .../eltwise/unary_complex/polar_bw/polar_bw.py      |  2 +-
 .../complex_unary/device/complex_unary_op.cpp       |  2 +-
 5 files changed, 6 insertions(+), 15 deletions(-)

diff --git a/tests/sweep_framework/sweeps/eltwise/unary_complex/angle/angle.py b/tests/sweep_framework/sweeps/eltwise/unary_complex/angle/angle.py
index 98d7fc4660f..e1e872d0585 100644
--- a/tests/sweep_framework/sweeps/eltwise/unary_complex/angle/angle.py
+++ b/tests/sweep_framework/sweeps/eltwise/unary_complex/angle/angle.py
@@ -25,20 +25,11 @@
 # Each suite has a key name (in this case "suite_1" and "suite_2") which will associate the test vectors to this specific suite of inputs.
 # Developers can create their own generator functions and pass them to the parameters as inputs.
 parameters = {
-    "nightly": {
-        "input_shape": gen_shapes([1, 1, 1, 1], [6, 12, 256, 256], [1, 1, 1, 1], 16)
-        + gen_shapes([1, 1, 1], [12, 256, 256], [1, 1, 1], 16)
-        + gen_shapes([1, 1], [256, 256], [1, 1], 16),
-        "input_a_dtype": [ttnn.bfloat16],
-        "input_layout": [ttnn.TILE_LAYOUT, ttnn.ROW_MAJOR_LAYOUT],
-        "input_a_memory_config": [ttnn.DRAM_MEMORY_CONFIG, ttnn.L1_MEMORY_CONFIG],
-        "output_memory_config": [ttnn.DRAM_MEMORY_CONFIG, ttnn.L1_MEMORY_CONFIG],
-    },
     "xfail": {
         "input_shape": gen_shapes([1, 1, 1, 1], [6, 12, 256, 256], [1, 1, 1, 1], 16)
         + gen_shapes([1, 1, 1], [12, 256, 256], [1, 1, 1], 16)
         + gen_shapes([1, 1], [256, 256], [1, 1], 16),
-        "input_a_dtype": [ttnn.bfloat16, ttnn.bfloat8_b],
+        "input_a_dtype": [ttnn.bfloat16],
         "input_layout": [ttnn.TILE_LAYOUT, ttnn.ROW_MAJOR_LAYOUT],
         "input_a_memory_config": [ttnn.DRAM_MEMORY_CONFIG, ttnn.L1_MEMORY_CONFIG],
         "output_memory_config": [ttnn.DRAM_MEMORY_CONFIG, ttnn.L1_MEMORY_CONFIG],
@@ -51,7 +42,7 @@
 # Returns False, None if the vector is valid, and True, str with a reason for invalidation if it is invalid.
 def invalidate_vector(test_vector) -> Tuple[bool, Optional[str]]:
     if test_vector["input_layout"] == ttnn.ROW_MAJOR_LAYOUT:
-        return True, "Unary operation requires tensor to be in Tile layout when working with non-sharded input tensor"
+        return True, "Inputs to eltwise binary must be tilized"
     if test_vector["input_layout"] == ttnn.ROW_MAJOR_LAYOUT and test_vector["input_a_dtype"] == ttnn.bfloat8_b:
         return True, "bfloat8_b is only supported on tiled layout"
     return False, None
diff --git a/tests/sweep_framework/sweeps/eltwise/unary_complex/angle_bw/angle_bw.py b/tests/sweep_framework/sweeps/eltwise/unary_complex/angle_bw/angle_bw.py
index d802c1fc9ae..ce3dd28f636 100644
--- a/tests/sweep_framework/sweeps/eltwise/unary_complex/angle_bw/angle_bw.py
+++ b/tests/sweep_framework/sweeps/eltwise/unary_complex/angle_bw/angle_bw.py
@@ -44,7 +44,7 @@
 # Returns False, None if the vector is valid, and True, str with a reason for invalidation if it is invalid.
 def invalidate_vector(test_vector) -> Tuple[bool, Optional[str]]:
     if test_vector["input_layout"] == ttnn.ROW_MAJOR_LAYOUT:
-        return True, "Unary operation requires tensor to be in Tile layout when working with non-sharded input tensor"
+        return True, "Inputs to eltwise binary must be tilized"
     if test_vector["input_a_dtype"] == ttnn.bfloat8_b:
         return True, "bfloat8_b is not supported on input_tensor_a"
     if test_vector["input_layout"] == ttnn.ROW_MAJOR_LAYOUT and test_vector["input_a_dtype"] == ttnn.bfloat8_b:
diff --git a/tests/sweep_framework/sweeps/eltwise/unary_complex/polar/polar.py b/tests/sweep_framework/sweeps/eltwise/unary_complex/polar/polar.py
index e534e272f87..857f4d533fd 100644
--- a/tests/sweep_framework/sweeps/eltwise/unary_complex/polar/polar.py
+++ b/tests/sweep_framework/sweeps/eltwise/unary_complex/polar/polar.py
@@ -42,7 +42,7 @@
 # Returns False, None if the vector is valid, and True, str with a reason for invalidation if it is invalid.
 def invalidate_vector(test_vector) -> Tuple[bool, Optional[str]]:
     if test_vector["input_layout"] == ttnn.ROW_MAJOR_LAYOUT:
-        return True, "Unary operation requires tensor to be in Tile layout when working with non-sharded input tensor"
+        return True, "Inputs to eltwise binary must be tilized"
     if test_vector["input_layout"] == ttnn.ROW_MAJOR_LAYOUT and test_vector["input_a_dtype"] == ttnn.bfloat8_b:
         return True, "bfloat8_b is only supported on tiled layout"
     return False, None
diff --git a/tests/sweep_framework/sweeps/eltwise/unary_complex/polar_bw/polar_bw.py b/tests/sweep_framework/sweeps/eltwise/unary_complex/polar_bw/polar_bw.py
index b8e5040cd53..2ac0d2dec36 100644
--- a/tests/sweep_framework/sweeps/eltwise/unary_complex/polar_bw/polar_bw.py
+++ b/tests/sweep_framework/sweeps/eltwise/unary_complex/polar_bw/polar_bw.py
@@ -44,7 +44,7 @@
 # Returns False, None if the vector is valid, and True, str with a reason for invalidation if it is invalid.
 def invalidate_vector(test_vector) -> Tuple[bool, Optional[str]]:
     if test_vector["input_layout"] == ttnn.ROW_MAJOR_LAYOUT:
-        return True, "Unary operation requires tensor to be in Tile layout when working with non-sharded input tensor"
+        return True, "Inputs to eltwise binary must be tilized"
     if test_vector["input_a_dtype"] == ttnn.bfloat8_b:
         return True, "bfloat8_b is not supported on input_tensor_a"
     if test_vector["input_layout"] == ttnn.ROW_MAJOR_LAYOUT and test_vector["input_a_dtype"] == ttnn.bfloat8_b:
diff --git a/ttnn/cpp/ttnn/operations/eltwise/complex_unary/device/complex_unary_op.cpp b/ttnn/cpp/ttnn/operations/eltwise/complex_unary/device/complex_unary_op.cpp
index f432ea54793..278a08bd844 100644
--- a/ttnn/cpp/ttnn/operations/eltwise/complex_unary/device/complex_unary_op.cpp
+++ b/ttnn/cpp/ttnn/operations/eltwise/complex_unary/device/complex_unary_op.cpp
@@ -22,7 +22,7 @@ Tensor _imag(const ComplexTensor& input, const MemoryConfig& output_mem_config)
 }
 
 Tensor _angle(const ComplexTensor& input, const MemoryConfig& output_mem_config) {
-    return ttnn::atan2(input[0],input[1],output_mem_config);
+    return ttnn::neg( atan2(input[1],input[0],output_mem_config), output_mem_config );
 }
 
 Tensor _is_imag(const ComplexTensor& input, const MemoryConfig& output_mem_config) {

From fc6f73ef11f63b10a93232ac8c08a75b057ee8d5 Mon Sep 17 00:00:00 2001
From: Arik  Yaacob <ayaacob@tenstorrent.com>
Date: Wed, 13 Nov 2024 14:50:43 -0600
Subject: [PATCH 31/69] use do_crt1 like other cores (#15009)

### Problem description
There were compilation errors on BH due to unresolved variables

### What's changed
Changed the initialization of slave_idle_erisc to match that of other
cores, using do_crt1

### Checklist
- [x] Post commit CI passes
- [x] Blackhole Post commit (if applicable)
- [ ] Model regression CI testing passes (if applicable)
- [ ] Device performance regression CI testing passes (if applicable)
- [ ] New/Existing tests provide coverage for changes
---
 tt_metal/hw/firmware/src/slave_idle_erisc.cc | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/tt_metal/hw/firmware/src/slave_idle_erisc.cc b/tt_metal/hw/firmware/src/slave_idle_erisc.cc
index b0cc1723c5d..452877eb8d6 100644
--- a/tt_metal/hw/firmware/src/slave_idle_erisc.cc
+++ b/tt_metal/hw/firmware/src/slave_idle_erisc.cc
@@ -54,9 +54,7 @@ int main(int argc, char *argv[]) {
     conditionally_disable_l1_cache();
     DIRTY_STACK_MEMORY();
     WAYPOINT("I");
-
-    int32_t num_words = ((uint)__ldm_data_end - (uint)__ldm_data_start) >> 2;
-    l1_to_local_mem_copy((uint *)__ldm_data_start, (uint tt_l1_ptr *)MEM_SLAVE_IERISC_INIT_LOCAL_L1_BASE_SCRATCH, num_words);
+    do_crt1((uint32_t *)MEM_SLAVE_IERISC_INIT_LOCAL_L1_BASE_SCRATCH);
 
     risc_init();
 

From b15475fcb96a8e674c37dcb7c49c019d89c3a630 Mon Sep 17 00:00:00 2001
From: Samarth Agarwal <sagarwal@tenstorrent.com>
Date: Wed, 13 Nov 2024 15:51:25 -0500
Subject: [PATCH 32/69] Fixed incorrect mem size for DebugIErisc (#15021)

---
 tt_metal/impl/debug/watcher_device_reader.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tt_metal/impl/debug/watcher_device_reader.cpp b/tt_metal/impl/debug/watcher_device_reader.cpp
index f3c7f8529be..a07ec3ae064 100644
--- a/tt_metal/impl/debug/watcher_device_reader.cpp
+++ b/tt_metal/impl/debug/watcher_device_reader.cpp
@@ -63,7 +63,7 @@ static uint32_t get_riscv_stack_size(const CoreDescriptor &core, uint32_t type)
         case DebugBrisc: return MEM_BRISC_STACK_SIZE;
         case DebugNCrisc: return MEM_NCRISC_STACK_SIZE;
         case DebugErisc: return 0; // Not managed/checked by us.
-        case DebugIErisc: return MEM_BRISC_STACK_SIZE;
+        case DebugIErisc: return MEM_IERISC_STACK_SIZE;
         case DebugSlaveIErisc: return MEM_BRISC_STACK_SIZE;
         case DebugTrisc0: return MEM_TRISC0_STACK_SIZE;
         case DebugTrisc1: return MEM_TRISC1_STACK_SIZE;

From 9974e3cff667d245daccabd80afaa9c5cf9dae02 Mon Sep 17 00:00:00 2001
From: Mohamed Bahnas <mbahnas@tenstorrent.com>
Date: Sat, 9 Nov 2024 21:20:40 +0000
Subject: [PATCH 33/69] #0: Yolov4 real demo traced

---
 .../wormhole/yolov4/test_yolov4_performant.py |  22 ++-
 models/demos/yolov4/tests/yolov4_perfomant.py | 149 +++++++++++++++---
 2 files changed, 145 insertions(+), 26 deletions(-)

diff --git a/models/demos/wormhole/yolov4/test_yolov4_performant.py b/models/demos/wormhole/yolov4/test_yolov4_performant.py
index cf7daddc4db..7774e07922d 100644
--- a/models/demos/wormhole/yolov4/test_yolov4_performant.py
+++ b/models/demos/wormhole/yolov4/test_yolov4_performant.py
@@ -4,12 +4,15 @@
 
 import pytest
 import ttnn
+import time
+import torch
 
 from models.utility_functions import run_for_wormhole_b0
 from models.demos.yolov4.tests.yolov4_perfomant import (
     run_yolov4_inference,
     run_yolov4_trace_inference,
     run_yolov4_trace_2cqs_inference,
+    Yolov4Trace2CQ,
 )
 
 
@@ -66,10 +69,27 @@ def test_run_yolov4_trace_2cqs_inference(
     enable_async_mode,
     model_location_generator,
 ):
-    run_yolov4_trace_2cqs_inference(
+    yolov4_trac2_2cq = Yolov4Trace2CQ()
+
+    yolov4_trac2_2cq.initialize_yolov4_trace_2cqs_inference(
         device,
         batch_size,
         act_dtype,
         weight_dtype,
         model_location_generator,
     )
+    for iter in range(0, 10):
+        input_shape = (1, 3, 320, 320)
+        torch_input_tensor = torch.randn(input_shape, dtype=torch.float32)
+        n, c, h, w = torch_input_tensor.shape
+        torch_input_tensor = torch_input_tensor.permute(0, 2, 3, 1)
+        torch_input_tensor = torch_input_tensor.reshape(1, 1, h * w * n, c)
+        tt_inputs_host = ttnn.from_torch(torch_input_tensor, dtype=ttnn.bfloat16, layout=ttnn.ROW_MAJOR_LAYOUT)
+        tt_inputs_host = ttnn.pad(tt_inputs_host, [1, 1, n * h * w, 16], [0, 0, 0, 0], 0)
+
+        t0 = time.time()
+        output = yolov4_trac2_2cq.execute_yolov4_trace_2cqs_inference(tt_inputs_host)
+        t1 = time.time()
+        print("TIME", t1 - t0)
+
+    yolov4_trac2_2cq.release_yolov4_trace_2cqs_inference()
diff --git a/models/demos/yolov4/tests/yolov4_perfomant.py b/models/demos/yolov4/tests/yolov4_perfomant.py
index d35644da15c..35e46d60073 100644
--- a/models/demos/yolov4/tests/yolov4_perfomant.py
+++ b/models/demos/yolov4/tests/yolov4_perfomant.py
@@ -44,16 +44,16 @@ def run_yolov4_inference(
         model_location_generator=model_location_generator,
     )
 
-    tt_inputs_host, input_mem_config = test_infra.setup_l1_sharded_input(device)
+    tt_inputs_host, self.input_mem_config = test_infra.setup_l1_sharded_input(device)
 
     # # First run configures convs JIT
-    test_infra.input_tensor = tt_inputs_host.to(device, input_mem_config)
+    test_infra.input_tensor = tt_inputs_host.to(device, self.input_mem_config)
     test_infra.run()
     test_infra.validate()
     test_infra.dealloc_output()
 
     # Optimized run
-    test_infra.input_tensor = tt_inputs_host.to(device, input_mem_config)
+    test_infra.input_tensor = tt_inputs_host.to(device, self.input_mem_config)
     test_infra.run()
     test_infra.validate()
     test_infra.dealloc_output()
@@ -61,7 +61,7 @@ def run_yolov4_inference(
     # More optimized run with caching
     if use_signpost:
         signpost(header="start")
-    test_infra.input_tensor = tt_inputs_host.to(device, input_mem_config)
+    test_infra.input_tensor = tt_inputs_host.to(device, self.input_mem_config)
     test_infra.run()
     if use_signpost:
         signpost(header="stop")
@@ -83,10 +83,10 @@ def run_yolov4_trace_inference(
         weight_dtype,
         model_location_generator=model_location_generator,
     )
-    tt_inputs_host, input_mem_config = test_infra.setup_l1_sharded_input(device)
+    tt_inputs_host, self.input_mem_config = test_infra.setup_l1_sharded_input(device)
 
     # First run configures convs JIT
-    test_infra.input_tensor = tt_inputs_host.to(device, input_mem_config)
+    test_infra.input_tensor = tt_inputs_host.to(device, self.input_mem_config)
     shape = test_infra.input_tensor.shape
     dtype = test_infra.input_tensor.dtype
     layout = test_infra.input_tensor.layout
@@ -95,36 +95,36 @@ def run_yolov4_trace_inference(
     test_infra.dealloc_output()
 
     # Optimized run
-    test_infra.input_tensor = tt_inputs_host.to(device, input_mem_config)
+    test_infra.input_tensor = tt_inputs_host.to(device, self.input_mem_config)
     test_infra.run()
     test_infra.validate()
 
     # Capture
-    test_infra.input_tensor = tt_inputs_host.to(device, input_mem_config)
+    test_infra.input_tensor = tt_inputs_host.to(device, self.input_mem_config)
     test_infra.dealloc_output()
     trace_input_addr = ttnn.buffer_address(test_infra.input_tensor)
-    tid = ttnn.begin_trace_capture(device, cq_id=0)
+    self.tid = ttnn.begin_trace_capture(device, cq_id=0)
     test_infra.run()
     tt_image_res = ttnn.allocate_tensor_on_device(
         shape,
         dtype,
         layout,
         device,
-        input_mem_config,
+        self.input_mem_config,
     )
-    ttnn.end_trace_capture(device, tid, cq_id=0)
+    ttnn.end_trace_capture(device, self.tid, cq_id=0)
     assert trace_input_addr == ttnn.buffer_address(tt_image_res)
 
     # More optimized run with caching
     if use_signpost:
         signpost(header="start")
     ttnn.copy_host_to_device_tensor(tt_inputs_host, tt_image_res, 0)
-    ttnn.execute_trace(device, tid, cq_id=0, blocking=True)
+    ttnn.execute_trace(device, self.tid, cq_id=0, blocking=True)
     if use_signpost:
         signpost(header="stop")
     test_infra.validate()
 
-    ttnn.release_trace(device, tid)
+    ttnn.release_trace(device, self.tid)
     test_infra.dealloc_output()
 
 
@@ -142,7 +142,7 @@ def run_yolov4_trace_2cqs_inference(
         weight_dtype,
         model_location_generator=model_location_generator,
     )
-    tt_inputs_host, sharded_mem_config_DRAM, input_mem_config = test_infra.setup_dram_sharded_input(device)
+    tt_inputs_host, sharded_mem_config_DRAM, self.input_mem_config = test_infra.setup_dram_sharded_input(device)
     tt_image_res = tt_inputs_host.to(device, sharded_mem_config_DRAM)
     op_event = ttnn.create_event(device)
     write_event = ttnn.create_event(device)
@@ -154,7 +154,7 @@ def run_yolov4_trace_2cqs_inference(
     ttnn.copy_host_to_device_tensor(tt_inputs_host, tt_image_res, 1)
     ttnn.record_event(1, write_event)
     ttnn.wait_for_event(0, write_event)
-    test_infra.input_tensor = ttnn.to_memory_config(tt_image_res, input_mem_config)
+    test_infra.input_tensor = ttnn.to_memory_config(tt_image_res, self.input_mem_config)
     shape = test_infra.input_tensor.shape
     dtype = test_infra.input_tensor.dtype
     layout = test_infra.input_tensor.layout
@@ -168,7 +168,7 @@ def run_yolov4_trace_2cqs_inference(
     ttnn.copy_host_to_device_tensor(tt_inputs_host, tt_image_res, 1)
     ttnn.record_event(1, write_event)
     ttnn.wait_for_event(0, write_event)
-    test_infra.input_tensor = ttnn.to_memory_config(tt_image_res, input_mem_config)
+    test_infra.input_tensor = ttnn.to_memory_config(tt_image_res, self.input_mem_config)
     ttnn.record_event(0, op_event)
     test_infra.run()
     test_infra.validate()
@@ -178,21 +178,21 @@ def run_yolov4_trace_2cqs_inference(
     ttnn.copy_host_to_device_tensor(tt_inputs_host, tt_image_res, 1)
     ttnn.record_event(1, write_event)
     ttnn.wait_for_event(0, write_event)
-    test_infra.input_tensor = ttnn.to_memory_config(tt_image_res, input_mem_config)
+    test_infra.input_tensor = ttnn.to_memory_config(tt_image_res, self.input_mem_config)
     ttnn.record_event(0, op_event)
     test_infra.dealloc_output()
     trace_input_addr = ttnn.buffer_address(test_infra.input_tensor)
-    tid = ttnn.begin_trace_capture(device, cq_id=0)
+    self.tid = ttnn.begin_trace_capture(device, cq_id=0)
     test_infra.run()
-    input_tensor = ttnn.allocate_tensor_on_device(
+    self.input_tensor = ttnn.allocate_tensor_on_device(
         shape,
         dtype,
         layout,
         device,
-        input_mem_config,
+        self.input_mem_config,
     )
-    ttnn.end_trace_capture(device, tid, cq_id=0)
-    assert trace_input_addr == ttnn.buffer_address(input_tensor)
+    ttnn.end_trace_capture(device, self.tid, cq_id=0)
+    assert trace_input_addr == ttnn.buffer_address(self.input_tensor)
 
     # More optimized run with caching
     if use_signpost:
@@ -203,12 +203,111 @@ def run_yolov4_trace_2cqs_inference(
         ttnn.record_event(1, write_event)
         ttnn.wait_for_event(0, write_event)
         # TODO: Add in place support to ttnn to_memory_config
-        input_tensor = ttnn.reshard(tt_image_res, input_mem_config, input_tensor)
+        self.input_tensor = ttnn.reshard(tt_image_res, self.input_mem_config, self.input_tensor)
         ttnn.record_event(0, op_event)
-        ttnn.execute_trace(device, tid, cq_id=0, blocking=False)
+        ttnn.execute_trace(device, self.tid, cq_id=0, blocking=False)
     ttnn.synchronize_devices(device)
 
     if use_signpost:
         signpost(header="stop")
 
-    ttnn.release_trace(device, tid)
+    ttnn.release_trace(device, self.tid)
+
+
+class Yolov4Trace2CQ:
+    def __init__(self):
+        ...
+
+    def initialize_yolov4_trace_2cqs_inference(
+        self,
+        device,
+        device_batch_size,
+        act_dtype,
+        weight_dtype,
+        model_location_generator,
+    ):
+        self.test_infra = create_test_infra(
+            device,
+            device_batch_size,
+            act_dtype,
+            weight_dtype,
+            model_location_generator=model_location_generator,
+        )
+        self.tt_inputs_host, sharded_mem_config_DRAM, self.input_mem_config = self.test_infra.setup_dram_sharded_input(
+            device
+        )
+        self.tt_image_res = self.tt_inputs_host.to(device, sharded_mem_config_DRAM)
+        self.op_event = ttnn.create_event(device)
+        self.write_event = ttnn.create_event(device)
+        # Initialize the op event so we can write
+        ttnn.record_event(0, self.op_event)
+
+        # First run configures convs JIT
+        ttnn.wait_for_event(1, self.op_event)
+        ttnn.copy_host_to_device_tensor(self.tt_inputs_host, self.tt_image_res, 1)
+        ttnn.record_event(1, self.write_event)
+        ttnn.wait_for_event(0, self.write_event)
+        self.test_infra.input_tensor = ttnn.to_memory_config(self.tt_image_res, self.input_mem_config)
+        shape = self.test_infra.input_tensor.shape
+        dtype = self.test_infra.input_tensor.dtype
+        layout = self.test_infra.input_tensor.layout
+        ttnn.record_event(0, self.op_event)
+        self.test_infra.run()
+        self.test_infra.validate()
+        self.test_infra.dealloc_output()
+
+        # Optimized run
+        ttnn.wait_for_event(1, self.op_event)
+        ttnn.copy_host_to_device_tensor(self.tt_inputs_host, self.tt_image_res, 1)
+        ttnn.record_event(1, self.write_event)
+        ttnn.wait_for_event(0, self.write_event)
+        self.test_infra.input_tensor = ttnn.to_memory_config(self.tt_image_res, self.input_mem_config)
+        ttnn.record_event(0, self.op_event)
+        self.test_infra.run()
+        self.test_infra.validate()
+
+        # Capture
+        ttnn.wait_for_event(1, self.op_event)
+        ttnn.copy_host_to_device_tensor(self.tt_inputs_host, self.tt_image_res, 1)
+        ttnn.record_event(1, self.write_event)
+        ttnn.wait_for_event(0, self.write_event)
+        self.test_infra.input_tensor = ttnn.to_memory_config(self.tt_image_res, self.input_mem_config)
+        ttnn.record_event(0, self.op_event)
+        self.test_infra.dealloc_output()
+        trace_input_addr = ttnn.buffer_address(self.test_infra.input_tensor)
+        self.tid = ttnn.begin_trace_capture(device, cq_id=0)
+        self.test_infra.run()
+        self.input_tensor = ttnn.allocate_tensor_on_device(
+            shape,
+            dtype,
+            layout,
+            device,
+            self.input_mem_config,
+        )
+        ttnn.end_trace_capture(device, self.tid, cq_id=0)
+        assert trace_input_addr == ttnn.buffer_address(self.input_tensor)
+
+        self.device = device
+
+        # More optimized run with caching
+        # if use_signpost:
+        #    signpost(header="start")
+
+    def execute_yolov4_trace_2cqs_inference(self, tt_inputs_host=None):
+        tt_inputs_host = self.tt_inputs_host if tt_inputs_host is None else tt_inputs_host
+        ttnn.wait_for_event(1, self.op_event)
+        ttnn.copy_host_to_device_tensor(tt_inputs_host, self.tt_image_res, 1)
+        ttnn.record_event(1, self.write_event)
+        ttnn.wait_for_event(0, self.write_event)
+        # TODO: Add in place support to ttnn to_memory_config
+        self.input_tensor = ttnn.reshard(self.tt_image_res, self.input_mem_config, self.input_tensor)
+        ttnn.record_event(0, self.op_event)
+        ttnn.execute_trace(self.device, self.tid, cq_id=0, blocking=False)
+        ttnn.synchronize_devices(self.device)
+        return self.test_infra.output_tensor
+
+        # if use_signpost:
+        #    signpost(header="stop")
+
+    def release_yolov4_trace_2cqs_inference(self):
+        ttnn.release_trace(self.device, self.tid)

From 063114a1aafa6c9444bc4c7a021a2d9decfc2854 Mon Sep 17 00:00:00 2001
From: Mohamed Bahnas <mbahnas@tenstorrent.com>
Date: Sat, 9 Nov 2024 22:19:34 +0000
Subject: [PATCH 34/69] #0: yolov4 web demo

---
 .../wormhole/yolov4/test_yolov4_performant.py |  2 +-
 models/demos/yolov4/tests/yolov4_perfomant.py | 11 +++
 .../demos/yolov4/tests/yolov4_test_infra.py   |  7 +-
 .../yolov4/web_demo/client/requirements.txt   |  3 +
 .../client/run_on_client_YOLOv4_Metal         |  2 +
 models/demos/yolov4/web_demo/client/yolov4.py | 96 +++++++++++++++++++
 .../yolov4/web_demo/server/fast_api_yolov4.py | 46 +++++++++
 .../yolov4/web_demo/server/run_uvicorn.sh     |  2 +
 .../yolov4/web_demo/server/test_model.py      | 27 ++++++
 9 files changed, 193 insertions(+), 3 deletions(-)
 create mode 100644 models/demos/yolov4/web_demo/client/requirements.txt
 create mode 100755 models/demos/yolov4/web_demo/client/run_on_client_YOLOv4_Metal
 create mode 100644 models/demos/yolov4/web_demo/client/yolov4.py
 create mode 100755 models/demos/yolov4/web_demo/server/fast_api_yolov4.py
 create mode 100755 models/demos/yolov4/web_demo/server/run_uvicorn.sh
 create mode 100644 models/demos/yolov4/web_demo/server/test_model.py

diff --git a/models/demos/wormhole/yolov4/test_yolov4_performant.py b/models/demos/wormhole/yolov4/test_yolov4_performant.py
index 7774e07922d..049b979e0dd 100644
--- a/models/demos/wormhole/yolov4/test_yolov4_performant.py
+++ b/models/demos/wormhole/yolov4/test_yolov4_performant.py
@@ -76,7 +76,7 @@ def test_run_yolov4_trace_2cqs_inference(
         batch_size,
         act_dtype,
         weight_dtype,
-        model_location_generator,
+        model_location_generator=None,
     )
     for iter in range(0, 10):
         input_shape = (1, 3, 320, 320)
diff --git a/models/demos/yolov4/tests/yolov4_perfomant.py b/models/demos/yolov4/tests/yolov4_perfomant.py
index 35e46d60073..1841288c300 100644
--- a/models/demos/yolov4/tests/yolov4_perfomant.py
+++ b/models/demos/yolov4/tests/yolov4_perfomant.py
@@ -311,3 +311,14 @@ def execute_yolov4_trace_2cqs_inference(self, tt_inputs_host=None):
 
     def release_yolov4_trace_2cqs_inference(self):
         ttnn.release_trace(self.device, self.tid)
+
+    def run_traced_inference(self, torch_input_tensor):
+        ##
+        ## Add more pre-processing
+        ##
+        n, c, h, w = torch_input_tensor.shape
+        torch_input_tensor = torch_input_tensor.permute(0, 2, 3, 1)
+        torch_input_tensor = torch_input_tensor.reshape(1, 1, h * w * n, c)
+        tt_inputs_host = ttnn.from_torch(torch_input_tensor, dtype=ttnn.bfloat16, layout=ttnn.ROW_MAJOR_LAYOUT)
+        tt_inputs_host = ttnn.pad(tt_inputs_host, [1, 1, n * h * w, 16], [0, 0, 0, 0], 0)
+        return self.execute_yolov4_trace_2cqs_inference(tt_inputs_host)
diff --git a/models/demos/yolov4/tests/yolov4_test_infra.py b/models/demos/yolov4/tests/yolov4_test_infra.py
index 5fdcf28aa2e..1bd1a670ac0 100644
--- a/models/demos/yolov4/tests/yolov4_test_infra.py
+++ b/models/demos/yolov4/tests/yolov4_test_infra.py
@@ -19,8 +19,11 @@
 )
 
 
-def load_yolov4_weight(model_location_generator):
-    model_path = model_location_generator("models", model_subdir="Yolo")
+def load_yolov4_weight(model_location_generator=None):
+    if model_location_generator == None:
+        model_path = "models"
+    else:
+        model_path = model_location_generator("models", model_subdir="Yolo")
     if model_path == "models":
         if not os.path.exists("tests/ttnn/integration_tests/yolov4/yolov4.pth"):  # check if yolov4.th is availble
             os.system(
diff --git a/models/demos/yolov4/web_demo/client/requirements.txt b/models/demos/yolov4/web_demo/client/requirements.txt
new file mode 100644
index 00000000000..282195275da
--- /dev/null
+++ b/models/demos/yolov4/web_demo/client/requirements.txt
@@ -0,0 +1,3 @@
+opencv-python==4.6.0.66
+streamlit==1.26.0
+streamlit-webrtc==0.47.0
diff --git a/models/demos/yolov4/web_demo/client/run_on_client_YOLOv4_Metal b/models/demos/yolov4/web_demo/client/run_on_client_YOLOv4_Metal
new file mode 100755
index 00000000000..acb66b33bab
--- /dev/null
+++ b/models/demos/yolov4/web_demo/client/run_on_client_YOLOv4_Metal
@@ -0,0 +1,2 @@
+#!/bin/bash
+streamlit run yolov4.py -- "$@"
diff --git a/models/demos/yolov4/web_demo/client/yolov4.py b/models/demos/yolov4/web_demo/client/yolov4.py
new file mode 100644
index 00000000000..9665b9f87c2
--- /dev/null
+++ b/models/demos/yolov4/web_demo/client/yolov4.py
@@ -0,0 +1,96 @@
+import time
+import io
+import json
+import argparse
+import cv2
+import requests
+import streamlit as st
+from streamlit_webrtc import VideoProcessorBase, webrtc_streamer
+
+
+class VideoProcessor(VideoProcessorBase):
+    def __init__(self):
+        ...
+
+    def cv2_plot_results(self, bgr_image, selected_classes, prob, boxes):
+        for selected_class, p, [xmin, ymin, xmax, ymax] in zip(selected_classes, prob, boxes):
+            xmin = int(xmin)
+            ymin = int(ymin)
+            xmax = int(xmax)
+            ymax = int(ymax)
+            cv2.rectangle(bgr_image, (xmin, ymin), (xmax, ymax), (45, 200, 200), 2)
+            p = int(p * 100)
+            text = f"{selected_class}:{p}%"
+            font = cv2.FONT_HERSHEY_COMPLEX
+            fontScale = 1
+            fontColor = (255, 255, 255)
+            thickness = 1
+            lineType = 2
+            text_color_bg = (127, 50, 127)
+            text_size, _ = cv2.getTextSize(text, font, fontScale, thickness)
+            text_w, text_h = text_size[0], text_size[1]
+            cv2.rectangle(
+                bgr_image,
+                (xmin - 2, ymin - 2),
+                (xmin + text_w + 2, ymin + text_h + 2),
+                text_color_bg,
+                -1,
+            )
+            cv2.putText(
+                bgr_image,
+                text,
+                (xmin, ymin + text_h),
+                font,
+                fontScale,
+                fontColor,
+                thickness,
+            )
+        return bgr_image
+
+    def transform(self, frame):
+        t0 = time.time()
+        pil_image = frame.to_image()
+        t1 = time.time()
+        buf = io.BytesIO()
+        pil_image.save(buf, format="JPEG")
+        byte_im = buf.getvalue()
+        file = {"file": byte_im}
+        # Argument Parser to grab namespace_id of server pod from user
+        parser = argparse.ArgumentParser(description="YOLOv4 script")
+        parser.add_argument("--api-url", type=str, help="URL for the object detection API", required=True)
+        args = parser.parse_args()
+        apiurl = args.api_url
+        url = f"{apiurl}/objdetection_v2"
+        r = requests.post(url, files=file)
+        data = json.loads(r.content).replace("\n", " ").replace("  ", "")
+        data = json.loads(data)
+        selected_classes, selected_scores, selected_boxes = (
+            data["labels"],
+            data["scores"],
+            data["bboxes"],
+        )
+        t3 = time.time()
+        bgr_image = frame.to_ndarray(format="bgr24")
+        image_final = self.cv2_plot_results(bgr_image, selected_classes, selected_scores, selected_boxes)
+        t4 = time.time()
+        print()
+        print(f" IMG-IN | WH | Post | Total time: ")
+        print(f" {(t1-t0):.3f} | {(t3-t1):.3f} | {(t4-t3):.3f} || {(t4-t0):.3f} ")
+
+        return image_final
+
+
+st.sidebar.image("TT.png", use_column_width=True)
+st.sidebar.image("GS.png", use_column_width=True)
+
+webrtc_streamer(
+    key="example",
+    video_transformer_factory=VideoProcessor,
+    media_stream_constraints={
+        "video": {
+            "width": {"min": 640, "ideal": 800, "max": 1920},
+            "height": {"min": 360, "ideal": 450, "max": 900},
+            "frameRate": {"min": 1, "ideal": 20, "max": 40},
+        }
+    },
+)
diff --git a/models/demos/yolov4/web_demo/server/fast_api_yolov4.py b/models/demos/yolov4/web_demo/server/fast_api_yolov4.py
new file mode 100755
index 00000000000..c64d90a33bf
--- /dev/null
+++ b/models/demos/yolov4/web_demo/server/fast_api_yolov4.py
@@ -0,0 +1,46 @@
+import json
+from fastapi import FastAPI, File, UploadFile
+from io import BytesIO
+from PIL import Image
+from models.demos.yolov4.tests.yolov4_perfomant import Yolov4Trace2CQ
+
+import cv2
+import numpy as np
+
+app = FastAPI(
+    title="YOLOv4 object detection",
+    description="Inference engine to detect objects in image.",
+    version="0.0",
+)
+
+
+@app.get("/")
+async def root():
+    return {"message": "Hello World"}
+
+
+@app.on_event("startup")
+async def startup():
+    device_id = 0
+    device = ttnn.CreateDevice(device_id=device_id)
+    global model
+    model = Yolov4Trace2CQ()
+    model.initialize_yolov4_trace_2cqs_inference(
+        device,
+        batch_size=1,
+        act_dtype=DataType.BFLOAT16,
+        weight_dtype=DataType.BFLOAT16,
+        model_location_generator=None,
+    )
+
+
+@app.on_event("shutdown")
+async def shutdown():
+    model.release_yolov4_trace_2cqs_inference()
+
+
+@app.post("/objdetection_v2")
+async def objdetection_v2(file: UploadFile = File(...)):
+    contents = await file.read()
+    response = model.run_traced_inference(Image.open(BytesIO(contents)))
+    return json.dumps(response, indent=4)
diff --git a/models/demos/yolov4/web_demo/server/run_uvicorn.sh b/models/demos/yolov4/web_demo/server/run_uvicorn.sh
new file mode 100755
index 00000000000..8876da3fa68
--- /dev/null
+++ b/models/demos/yolov4/web_demo/server/run_uvicorn.sh
@@ -0,0 +1,2 @@
+#!/bin/bash
+TT_BACKEND_TIMEOUT=0 /home/dvartanians/Metal/tt-metal/python_env/bin/uvicorn --host 0.0.0.0 --port 7000 fast_api_yolov5:app
diff --git a/models/demos/yolov4/web_demo/server/test_model.py b/models/demos/yolov4/web_demo/server/test_model.py
new file mode 100644
index 00000000000..2ac7b7b056c
--- /dev/null
+++ b/models/demos/yolov4/web_demo/server/test_model.py
@@ -0,0 +1,27 @@
+"""
+Model pytests for yolov5
+"""
+import pytest
+import os
+from PIL import Image
+from yolov5_320 import startup_pybuda, clear_pybuda, YoloV5Handler
+
+MLDATA = "/mnt/mldata"
+
+
+@pytest.mark.skipif(
+    not os.path.isdir(MLDATA),
+    reason="Skipping test as we are not in a TT devtools environment.",
+)
+def test_model():
+    startup_pybuda()
+    model = YoloV5Handler()
+    model.initialize()
+    response = model.handle(Image.open("puppy.jpg"))
+    print("the response is: ", response)
+    assert response["labels"][0] == "dog"
+    print("test_model PASSED")
+
+
+if __name__ == "__main__":
+    test_model()

From 2a40f0385110d8bf48bf53db4b7d41bb1258b067 Mon Sep 17 00:00:00 2001
From: Mohamed Bahnas <mbahnas@tenstorrent.com>
Date: Sun, 10 Nov 2024 00:45:47 +0000
Subject: [PATCH 35/69] #0: segformer trace_2cq

---
 .../demo/test_segformer_performant.py         |  97 +++++
 .../segformer/tests/segformer_perfomant.py    | 335 ++++++++++++++++++
 .../segformer/tests/segformer_test_infra.py   | 226 ++++++++++++
 3 files changed, 658 insertions(+)
 create mode 100644 models/demos/segformer/demo/test_segformer_performant.py
 create mode 100644 models/demos/segformer/tests/segformer_perfomant.py
 create mode 100644 models/demos/segformer/tests/segformer_test_infra.py

diff --git a/models/demos/segformer/demo/test_segformer_performant.py b/models/demos/segformer/demo/test_segformer_performant.py
new file mode 100644
index 00000000000..e982e663d2e
--- /dev/null
+++ b/models/demos/segformer/demo/test_segformer_performant.py
@@ -0,0 +1,97 @@
+# SPDX-FileCopyrightText: © 2024 Tenstorrent Inc.
+
+# SPDX-License-Identifier: Apache-2.0
+
+import pytest
+import ttnn
+import time
+import torch
+
+from models.utility_functions import run_for_wormhole_b0
+from models.demos.segformer.tests.segformer_perfomant import (
+    run_segformer_inference,
+    run_segformer_trace_inference,
+    run_segformer_trace_2cqs_inference,
+    SegformerTrace2CQ,
+)
+
+
+@run_for_wormhole_b0()
+@pytest.mark.parametrize("device_params", [{"l1_small_size": 24576}], indirect=True)
+@pytest.mark.parametrize(
+    "batch_size, act_dtype, weight_dtype",
+    ((1, ttnn.bfloat16, ttnn.bfloat16),),
+)
+def test_run_segformer_inference(
+    device, use_program_cache, batch_size, act_dtype, weight_dtype, model_location_generator
+):
+    run_segformer_inference(device, batch_size, act_dtype, weight_dtype, model_location_generator)
+
+
+@run_for_wormhole_b0()
+@pytest.mark.parametrize("device_params", [{"l1_small_size": 24576, "trace_region_size": 1617920}], indirect=True)
+@pytest.mark.parametrize(
+    "batch_size, act_dtype, weight_dtype",
+    ((1, ttnn.bfloat16, ttnn.bfloat16),),
+)
+@pytest.mark.parametrize("enable_async_mode", (False, True), indirect=True)
+def test_run_segformer_trace_inference(
+    device,
+    use_program_cache,
+    batch_size,
+    act_dtype,
+    weight_dtype,
+    enable_async_mode,
+    model_location_generator,
+):
+    run_segformer_trace_inference(
+        device,
+        batch_size,
+        act_dtype,
+        weight_dtype,
+        model_location_generator,
+    )
+
+
+@run_for_wormhole_b0()
+@pytest.mark.parametrize(
+    "device_params", [{"l1_small_size": 24576, "trace_region_size": 1617920, "num_command_queues": 2}], indirect=True
+)
+@pytest.mark.parametrize(
+    "batch_size, act_dtype, weight_dtype",
+    ((1, ttnn.bfloat16, ttnn.bfloat16),),
+)
+@pytest.mark.parametrize("enable_async_mode", (False, True), indirect=True)
+def test_run_segformer_trace_2cqs_inference(
+    device,
+    use_program_cache,
+    batch_size,
+    act_dtype,
+    weight_dtype,
+    enable_async_mode,
+    model_location_generator,
+):
+    segformer_trac2_2cq = SegformerTrace2CQ()
+
+    segformer_trac2_2cq.initialize_segformer_trace_2cqs_inference(
+        device,
+        batch_size,
+        act_dtype,
+        weight_dtype,
+        model_location_generator=None,
+    )
+    for iter in range(0, 10):
+        input_shape = (1, 3, 320, 320)
+        torch_input_tensor = torch.randn(input_shape, dtype=torch.float32)
+        n, c, h, w = torch_input_tensor.shape
+        torch_input_tensor = torch_input_tensor.permute(0, 2, 3, 1)
+        # torch_input_tensor = torch_input_tensor.reshape(1, 1, h * w * n, c)
+        tt_inputs_host = ttnn.from_torch(torch_input_tensor, dtype=ttnn.bfloat16, layout=ttnn.ROW_MAJOR_LAYOUT)
+        # tt_inputs_host = ttnn.pad(tt_inputs_host, [1, 1, n * h * w, 16], [0, 0, 0, 0], 0)
+
+        t0 = time.time()
+        output = segformer_trac2_2cq.execute_segformer_trace_2cqs_inference(tt_inputs_host)
+        t1 = time.time()
+        print("TIME", t1 - t0)
+
+    segformer_trac2_2cq.release_segformer_trace_2cqs_inference()
diff --git a/models/demos/segformer/tests/segformer_perfomant.py b/models/demos/segformer/tests/segformer_perfomant.py
new file mode 100644
index 00000000000..01d9950285b
--- /dev/null
+++ b/models/demos/segformer/tests/segformer_perfomant.py
@@ -0,0 +1,335 @@
+# SPDX-FileCopyrightText: © 2024 Tenstorrent Inc.
+
+# SPDX-License-Identifier: Apache-2.0
+
+import pytest
+import torch
+import ttnn
+from models.utility_functions import (
+    is_wormhole_b0,
+)
+from models.demos.segformer.tests.segformer_test_infra import create_test_infra
+
+try:
+    from tracy import signpost
+
+    use_signpost = True
+except ModuleNotFoundError:
+    use_signpost = False
+
+
+def buffer_address(tensor):
+    addr = []
+    for ten in ttnn.get_device_tensors(tensor):
+        addr.append(ten.buffer_address())
+    return addr
+
+
+# TODO: Create ttnn apis for this
+ttnn.buffer_address = buffer_address
+
+
+def run_segformer_inference(
+    device,
+    device_batch_size,
+    act_dtype,
+    weight_dtype,
+    model_location_generator,
+):
+    test_infra = create_test_infra(
+        device,
+        device_batch_size,
+        act_dtype,
+        weight_dtype,
+        model_location_generator=model_location_generator,
+    )
+
+    tt_inputs_host, self.input_mem_config = test_infra.setup_l1_sharded_input(device)
+
+    # # First run configures convs JIT
+    test_infra.input_tensor = tt_inputs_host.to(device, self.input_mem_config)
+    test_infra.run()
+    test_infra.validate()
+    test_infra.dealloc_output()
+
+    # Optimized run
+    test_infra.input_tensor = tt_inputs_host.to(device, self.input_mem_config)
+    test_infra.run()
+    test_infra.validate()
+    test_infra.dealloc_output()
+
+    # More optimized run with caching
+    if use_signpost:
+        signpost(header="start")
+    test_infra.input_tensor = tt_inputs_host.to(device, self.input_mem_config)
+    test_infra.run()
+    if use_signpost:
+        signpost(header="stop")
+    test_infra.validate()
+    test_infra.dealloc_output()
+
+
+def run_segformer_trace_inference(
+    device,
+    device_batch_size,
+    act_dtype,
+    weight_dtype,
+    model_location_generator,
+):
+    test_infra = create_test_infra(
+        device,
+        device_batch_size,
+        act_dtype,
+        weight_dtype,
+        model_location_generator=model_location_generator,
+    )
+    tt_inputs_host, self.input_mem_config = test_infra.setup_l1_sharded_input(device)
+
+    # First run configures convs JIT
+    test_infra.input_tensor = tt_inputs_host.to(device, self.input_mem_config)
+    shape = test_infra.input_tensor.shape
+    dtype = test_infra.input_tensor.dtype
+    layout = test_infra.input_tensor.layout
+    test_infra.run()
+    test_infra.validate()
+    test_infra.dealloc_output()
+
+    # Optimized run
+    test_infra.input_tensor = tt_inputs_host.to(device, self.input_mem_config)
+    test_infra.run()
+    test_infra.validate()
+
+    # Capture
+    test_infra.input_tensor = tt_inputs_host.to(device, self.input_mem_config)
+    test_infra.dealloc_output()
+    trace_input_addr = ttnn.buffer_address(test_infra.input_tensor)
+    self.tid = ttnn.begin_trace_capture(device, cq_id=0)
+    test_infra.run()
+    tt_image_res = ttnn.allocate_tensor_on_device(
+        shape,
+        dtype,
+        layout,
+        device,
+        self.input_mem_config,
+    )
+    ttnn.end_trace_capture(device, self.tid, cq_id=0)
+    assert trace_input_addr == ttnn.buffer_address(tt_image_res)
+
+    # More optimized run with caching
+    if use_signpost:
+        signpost(header="start")
+    ttnn.copy_host_to_device_tensor(tt_inputs_host, tt_image_res, 0)
+    ttnn.execute_trace(device, self.tid, cq_id=0, blocking=True)
+    if use_signpost:
+        signpost(header="stop")
+    test_infra.validate()
+
+    ttnn.release_trace(device, self.tid)
+    test_infra.dealloc_output()
+
+
+def run_segformer_trace_2cqs_inference(
+    device,
+    device_batch_size,
+    act_dtype,
+    weight_dtype,
+    model_location_generator,
+):
+    test_infra = create_test_infra(
+        device,
+        device_batch_size,
+        act_dtype,
+        weight_dtype,
+        model_location_generator=model_location_generator,
+    )
+    tt_inputs_host, sharded_mem_config_DRAM, self.input_mem_config = test_infra.setup_dram_sharded_input(device)
+    tt_image_res = tt_inputs_host.to(device, sharded_mem_config_DRAM)
+    op_event = ttnn.create_event(device)
+    write_event = ttnn.create_event(device)
+    # Initialize the op event so we can write
+    ttnn.record_event(0, op_event)
+
+    # First run configures convs JIT
+    ttnn.wait_for_event(1, op_event)
+    ttnn.copy_host_to_device_tensor(tt_inputs_host, tt_image_res, 1)
+    ttnn.record_event(1, write_event)
+    ttnn.wait_for_event(0, write_event)
+    test_infra.input_tensor = ttnn.to_memory_config(tt_image_res, self.input_mem_config)
+    shape = test_infra.input_tensor.shape
+    dtype = test_infra.input_tensor.dtype
+    layout = test_infra.input_tensor.layout
+    ttnn.record_event(0, op_event)
+    test_infra.run()
+    test_infra.validate()
+    test_infra.dealloc_output()
+
+    print("2")
+
+    # Optimized run
+    ttnn.wait_for_event(1, op_event)
+    ttnn.copy_host_to_device_tensor(tt_inputs_host, tt_image_res, 1)
+    ttnn.record_event(1, write_event)
+    ttnn.wait_for_event(0, write_event)
+    test_infra.input_tensor = ttnn.to_memory_config(tt_image_res, self.input_mem_config)
+    ttnn.record_event(0, op_event)
+    test_infra.run()
+    test_infra.validate()
+    test_infra.dealloc_output()
+
+    print("3")
+
+    # Capture
+    ttnn.wait_for_event(1, op_event)
+    ttnn.copy_host_to_device_tensor(tt_inputs_host, tt_image_res, 1)
+    ttnn.record_event(1, write_event)
+    ttnn.wait_for_event(0, write_event)
+    test_infra.input_tensor = ttnn.to_memory_config(tt_image_res, self.input_mem_config)
+    ttnn.record_event(0, op_event)
+    test_infra.dealloc_output()
+    trace_input_addr = ttnn.buffer_address(test_infra.input_tensor)
+    self.tid = ttnn.begin_trace_capture(device, cq_id=0)
+    test_infra.run()
+    self.input_tensor = ttnn.allocate_tensor_on_device(
+        shape,
+        dtype,
+        layout,
+        device,
+        self.input_mem_config,
+    )
+    ttnn.end_trace_capture(device, self.tid, cq_id=0)
+    assert trace_input_addr == ttnn.buffer_address(self.input_tensor)
+
+    print("4")
+
+    # More optimized run with caching
+    if use_signpost:
+        signpost(header="start")
+    for iter in range(0, 2):
+        ttnn.wait_for_event(1, op_event)
+        ttnn.copy_host_to_device_tensor(tt_inputs_host, tt_image_res, 1)
+        ttnn.record_event(1, write_event)
+        ttnn.wait_for_event(0, write_event)
+        # TODO: Add in place support to ttnn to_memory_config
+        # self.input_tensor = ttnn.reshard(tt_image_res, self.input_mem_config, self.input_tensor)
+        self.input_tensor = ttnn.to_memory_config(tt_image_res, self.input_mem_config)
+        ttnn.record_event(0, op_event)
+        ttnn.execute_trace(device, self.tid, cq_id=0, blocking=False)
+    ttnn.synchronize_devices(device)
+
+    if use_signpost:
+        signpost(header="stop")
+
+    ttnn.release_trace(device, self.tid)
+
+
+class SegformerTrace2CQ:
+    def __init__(self):
+        ...
+
+    def initialize_segformer_trace_2cqs_inference(
+        self,
+        device,
+        device_batch_size,
+        act_dtype,
+        weight_dtype,
+        model_location_generator,
+    ):
+        self.test_infra = create_test_infra(
+            device,
+            device_batch_size,
+            act_dtype,
+            weight_dtype,
+            model_location_generator=model_location_generator,
+        )
+        self.tt_inputs_host, sharded_mem_config_DRAM, self.input_mem_config = self.test_infra.setup_dram_sharded_input(
+            device
+        )
+        self.tt_image_res = self.tt_inputs_host.to(device, sharded_mem_config_DRAM)
+        self.op_event = ttnn.create_event(device)
+        self.write_event = ttnn.create_event(device)
+        # Initialize the op event so we can write
+        ttnn.record_event(0, self.op_event)
+
+        # First run configures convs JIT
+        ttnn.wait_for_event(1, self.op_event)
+        ttnn.copy_host_to_device_tensor(self.tt_inputs_host, self.tt_image_res, 1)
+        ttnn.record_event(1, self.write_event)
+        ttnn.wait_for_event(0, self.write_event)
+        self.test_infra.input_tensor = ttnn.to_memory_config(self.tt_image_res, self.input_mem_config)
+        shape = self.test_infra.input_tensor.shape
+        dtype = self.test_infra.input_tensor.dtype
+        layout = self.test_infra.input_tensor.layout
+        ttnn.record_event(0, self.op_event)
+        self.test_infra.run()
+        self.test_infra.validate()
+        self.test_infra.dealloc_output()
+
+        # Optimized run
+        ttnn.wait_for_event(1, self.op_event)
+        ttnn.copy_host_to_device_tensor(self.tt_inputs_host, self.tt_image_res, 1)
+        ttnn.record_event(1, self.write_event)
+        ttnn.wait_for_event(0, self.write_event)
+        self.test_infra.input_tensor = ttnn.to_memory_config(self.tt_image_res, self.input_mem_config)
+        ttnn.record_event(0, self.op_event)
+        self.test_infra.run()
+        self.test_infra.validate()
+
+        # Capture
+        ttnn.wait_for_event(1, self.op_event)
+        ttnn.copy_host_to_device_tensor(self.tt_inputs_host, self.tt_image_res, 1)
+        ttnn.record_event(1, self.write_event)
+        ttnn.wait_for_event(0, self.write_event)
+        self.test_infra.input_tensor = ttnn.to_memory_config(self.tt_image_res, self.input_mem_config)
+        ttnn.record_event(0, self.op_event)
+        self.test_infra.dealloc_output()
+        trace_input_addr = ttnn.buffer_address(self.test_infra.input_tensor)
+        self.tid = ttnn.begin_trace_capture(device, cq_id=0)
+        self.test_infra.run()
+        self.input_tensor = ttnn.allocate_tensor_on_device(
+            shape,
+            dtype,
+            layout,
+            device,
+            self.input_mem_config,
+        )
+        ttnn.end_trace_capture(device, self.tid, cq_id=0)
+        assert trace_input_addr == ttnn.buffer_address(self.input_tensor)
+
+        self.device = device
+
+        print("4")
+
+        # More optimized run with caching
+        # if use_signpost:
+        #    signpost(header="start")
+
+    def execute_segformer_trace_2cqs_inference(self, tt_inputs_host=None):
+        tt_inputs_host = self.tt_inputs_host if tt_inputs_host is None else tt_inputs_host
+        ttnn.wait_for_event(1, self.op_event)
+        ttnn.copy_host_to_device_tensor(tt_inputs_host, self.tt_image_res, 1)
+        ttnn.record_event(1, self.write_event)
+        ttnn.wait_for_event(0, self.write_event)
+        # TODO: Add in place support to ttnn to_memory_config
+        # self.input_tensor = ttnn.reshard(self.tt_image_res, self.input_mem_config, self.input_tensor)
+        self.input_tensor = ttnn.to_memory_config(tt_image_res, self.input_mem_config)
+        ttnn.record_event(0, self.op_event)
+        ttnn.execute_trace(self.device, self.tid, cq_id=0, blocking=False)
+        ttnn.synchronize_devices(self.device)
+        return self.test_infra.output_tensor
+
+        # if use_signpost:
+        #    signpost(header="stop")
+
+    def release_segformer_trace_2cqs_inference(self):
+        ttnn.release_trace(self.device, self.tid)
+
+    def run_traced_inference(self, torch_input_tensor):
+        ##
+        ## Add more pre-processing
+        ##
+        n, c, h, w = torch_input_tensor.shape
+        torch_input_tensor = torch_input_tensor.permute(0, 2, 3, 1)
+        torch_input_tensor = torch_input_tensor.reshape(1, 1, h * w * n, c)
+        tt_inputs_host = ttnn.from_torch(torch_input_tensor, dtype=ttnn.bfloat16, layout=ttnn.ROW_MAJOR_LAYOUT)
+        tt_inputs_host = ttnn.pad(tt_inputs_host, [1, 1, n * h * w, 16], [0, 0, 0, 0], 0)
+        return self.execute_segformer_trace_2cqs_inference(tt_inputs_host)
diff --git a/models/demos/segformer/tests/segformer_test_infra.py b/models/demos/segformer/tests/segformer_test_infra.py
new file mode 100644
index 00000000000..cc8224dd327
--- /dev/null
+++ b/models/demos/segformer/tests/segformer_test_infra.py
@@ -0,0 +1,226 @@
+# SPDX-FileCopyrightText: © 2024 Tenstorrent Inc.
+
+# SPDX-License-Identifier: Apache-2.0
+
+from loguru import logger
+import os
+import pytest
+import torch
+import torchvision
+from PIL import Image
+import requests
+import math
+from tests.ttnn.utils_for_testing import assert_with_pcc
+from ttnn.model_preprocessing import preprocess_model_parameters, ParameterDict, ParameterList
+import ttnn
+
+from models.demos.segformer.tt.ttnn_segformer_for_semantic_segmentation import (
+    TtSegformerForSemanticSegmentation,
+)
+from transformers import SegformerForSemanticSegmentation, SegformerImageProcessor
+from models.demos.segformer.reference.segformer_for_semantic_segmentation import (
+    SegformerForSemanticSegmentationReference,
+)
+from tests.ttnn.integration_tests.segformer.test_segformer_model import (
+    create_custom_preprocessor as create_custom_preprocessor_model,
+)
+from tests.ttnn.integration_tests.segformer.test_segformer_decode_head import (
+    create_custom_preprocessor as create_custom_preprocessor_deocde_head,
+)
+from models.utility_functions import skip_for_grayskull
+
+from models.utility_functions import (
+    is_wormhole_b0,
+    is_grayskull,
+    divup,
+)
+
+
+def create_custom_preprocessor(device):
+    def custom_preprocessor(model, name, ttnn_module_args):
+        parameters = {}
+        if isinstance(model, SegformerForSemanticSegmentationReference):
+            parameters["segformer"] = {}
+            segformer_preprocess = create_custom_preprocessor_model(device)
+            parameters["segformer"] = segformer_preprocess(model.segformer, None, None)
+            parameters["decode_head"] = {}
+            deocde_preprocess = create_custom_preprocessor_deocde_head(device)
+            parameters["decode_head"] = deocde_preprocess(model.decode_head, None, None)
+
+        return parameters
+
+    return custom_preprocessor
+
+
+def move_to_device(object, device):
+    if isinstance(object, ParameterDict):
+        for name, value in list(object.items()):
+            if name in ["sr", "proj", "dwconv", "linear_fuse", "classifier"]:
+                continue
+            object[name] = move_to_device(value, device)
+        return object
+    elif isinstance(object, ParameterList):
+        for index, element in enumerate(object):
+            object[index] = move_to_device(element, device)
+        return object
+    elif isinstance(object, ttnn.Tensor):
+        return ttnn.to_device(object, device)
+    else:
+        return object
+
+
+def load_segformer_torch_model(device, model_location_generator=None):
+    torch_model = SegformerForSemanticSegmentation.from_pretrained("nvidia/segformer-b0-finetuned-ade-512-512")
+    config = torch_model.config
+    reference_model = SegformerForSemanticSegmentationReference(config=config)
+    state_dict = torch_model.state_dict()
+    new_state_dict = {}
+    keys = [name for name, parameter in reference_model.state_dict().items()]
+    values = [parameter for name, parameter in state_dict.items()]
+    for i in range(len(keys)):
+        new_state_dict[keys[i]] = values[i]
+
+    reference_model.load_state_dict(new_state_dict)
+    reference_model.eval()
+
+    parameters = preprocess_model_parameters(
+        initialize_model=lambda: reference_model, custom_preprocessor=create_custom_preprocessor(device), device=None
+    )
+    parameters = move_to_device(parameters, device)
+
+    for i in range(4):
+        parameters["decode_head"]["linear_c"][i]["proj"]["weight"] = ttnn.to_device(
+            parameters["decode_head"]["linear_c"][i]["proj"]["weight"], device=device
+        )
+        parameters["decode_head"]["linear_c"][i]["proj"]["bias"] = ttnn.to_device(
+            parameters["decode_head"]["linear_c"][i]["proj"]["bias"], device=device
+        )
+
+    return reference_model, config, parameters
+
+
+class SegformerTestInfra:
+    def __init__(
+        self,
+        device,
+        batch_size,
+        act_dtype,
+        weight_dtype,
+        model_location_generator=None,
+    ):
+        super().__init__()
+        torch.manual_seed(0)
+        self.pcc_passed = False
+        self.pcc_message = "Did you forget to call validate()?"
+        self.device = device
+        self.batch_size = batch_size
+        self.act_dtype = act_dtype
+        self.weight_dtype = weight_dtype
+        self.model_location_generator = model_location_generator
+        reference_model, config, self.parameters = load_segformer_torch_model(device)
+        self.ttnn_segformer_model = TtSegformerForSemanticSegmentation(config, self.parameters)
+
+        processor = SegformerImageProcessor.from_pretrained("nvidia/segformer-b0-finetuned-ade-512-512")
+        url = "http://images.cocodataset.org/val2017/000000039769.jpg"
+        image = Image.open(requests.get(url, stream=True).raw)
+        self.inputs = processor(images=image, return_tensors="pt")
+        self.torch_output_tensor = reference_model(self.inputs.pixel_values)
+
+    def run(self):
+        self.output_tensor = self.ttnn_segformer_model(
+            self.input_tensor,
+            output_attentions=None,
+            output_hidden_states=None,
+            return_dict=None,
+            parameters=self.parameters,
+        )
+
+    def setup_l1_sharded_input(self, device, torch_input_tensor=None):
+        if is_wormhole_b0():
+            core_grid = ttnn.CoreGrid(y=8, x=8)
+        else:
+            exit("Unsupported device")
+        num_devices = 1 if isinstance(device, ttnn.Device) else device.get_num_devices()
+
+        """
+        # torch tensor
+        torch_input_tensor = self.torch_input_tensor if torch_input_tensor is None else torch_input_tensor
+        n, c, h, w = torch_input_tensor.shape
+        # sharded mem config for fold input
+        num_cores = core_grid.x * core_grid.y
+        shard_h = (n * w * h + num_cores - 1) // num_cores
+        grid_size = core_grid
+        grid_coord = ttnn.CoreCoord(grid_size.x - 1, grid_size.y - 1)
+        shard_grid = ttnn.CoreRangeSet({ttnn.CoreRange(ttnn.CoreCoord(0, 0), grid_coord)})
+        shard_spec = ttnn.ShardSpec(shard_grid, (shard_h, 16), ttnn.ShardOrientation.ROW_MAJOR, False)
+        input_mem_config = ttnn.MemoryConfig(
+            ttnn.types.TensorMemoryLayout.HEIGHT_SHARDED, ttnn.types.BufferType.L1, shard_spec
+        )
+        """
+
+        torch_input_tensor_permuted = torch.permute(self.inputs.pixel_values, (0, 2, 3, 1))
+        # tt_inputs_host = ttnn.from_torch(
+        #     torch_input_tensor_permuted,
+        #     dtype=ttnn.bfloat16,
+        #     memory_config=ttnn.L1_MEMORY_CONFIG,
+        #     device=device,
+        #     layout=ttnn.TILE_LAYOUT,
+        # )
+        tt_inputs_host = ttnn.from_torch(torch_input_tensor_permuted, dtype=ttnn.bfloat16, layout=ttnn.TILE_LAYOUT)
+        input_mem_config = ttnn.DRAM_MEMORY_CONFIG
+
+        return tt_inputs_host, input_mem_config
+
+    def setup_dram_sharded_input(self, device, torch_input_tensor=None, mesh_mapper=None, mesh_composer=None):
+        tt_inputs_host, input_mem_config = self.setup_l1_sharded_input(device)
+        dram_grid_size = device.dram_grid_size()
+        dram_shard_spec = ttnn.ShardSpec(
+            ttnn.CoreRangeSet(
+                {ttnn.CoreRange(ttnn.CoreCoord(0, 0), ttnn.CoreCoord(dram_grid_size.x - 1, dram_grid_size.y - 1))}
+            ),
+            [
+                divup(tt_inputs_host.volume() // tt_inputs_host.shape[-1], (dram_grid_size.x * dram_grid_size.y)),
+                16,
+            ],
+            ttnn.ShardOrientation.ROW_MAJOR,
+            False,
+        )
+        sharded_mem_config_DRAM = ttnn.MemoryConfig(
+            ttnn.TensorMemoryLayout.HEIGHT_SHARDED, ttnn.BufferType.DRAM, dram_shard_spec
+        )
+        sharded_mem_config_DRAM = ttnn.DRAM_MEMORY_CONFIG
+
+        return tt_inputs_host, sharded_mem_config_DRAM, input_mem_config
+
+    def validate(self, output_tensor=None):
+        output_tensor = self.output_tensor if output_tensor is None else output_tensor
+        output_tensor = ttnn.to_torch(self.output_tensor.logits)
+        output_tensor = torch.permute(output_tensor, (0, 3, 1, 2))
+        h = w = int(math.sqrt(output_tensor.shape[-1]))
+        final_output_tensor = torch.reshape(output_tensor, (output_tensor.shape[0], output_tensor.shape[1], h, w))
+
+        valid_pcc = 0  # 0.985
+        self.pcc_passed, self.pcc_message = assert_with_pcc(
+            self.torch_output_tensor.logits, final_output_tensor, pcc=valid_pcc
+        )
+
+        logger.info(f"Segformer , PCC={self.pcc_message}")
+
+    def dealloc_output(self):
+        ttnn.deallocate(self.output_tensor.logits)
+
+
+def create_test_infra(
+    device,
+    batch_size,
+    act_dtype,
+    weight_dtype,
+    model_location_generator=None,
+):
+    return SegformerTestInfra(
+        device,
+        batch_size,
+        act_dtype,
+        weight_dtype,
+        model_location_generator,
+    )

From 311b3550208bd2ea9feede6ff60354ab4f0333ef Mon Sep 17 00:00:00 2001
From: Mohamed Bahnas <mbahnas@tenstorrent.com>
Date: Mon, 11 Nov 2024 21:54:06 +0000
Subject: [PATCH 36/69] #0: yolov4 web demo

---
 models/demos/yolov4/tests/yolov4_perfomant.py         |  8 ++++----
 .../demos/yolov4/web_demo/server/fast_api_yolov4.py   | 11 +++--------
 models/demos/yolov4/web_demo/server/run_uvicorn.sh    |  2 +-
 3 files changed, 8 insertions(+), 13 deletions(-)

diff --git a/models/demos/yolov4/tests/yolov4_perfomant.py b/models/demos/yolov4/tests/yolov4_perfomant.py
index 1841288c300..c5b35660c0e 100644
--- a/models/demos/yolov4/tests/yolov4_perfomant.py
+++ b/models/demos/yolov4/tests/yolov4_perfomant.py
@@ -221,10 +221,10 @@ def __init__(self):
     def initialize_yolov4_trace_2cqs_inference(
         self,
         device,
-        device_batch_size,
-        act_dtype,
-        weight_dtype,
-        model_location_generator,
+        device_batch_size=1,
+        act_dtype=ttnn.bfloat16,
+        weight_dtype=ttnn.bfloat16,
+        model_location_generator=None,
     ):
         self.test_infra = create_test_infra(
             device,
diff --git a/models/demos/yolov4/web_demo/server/fast_api_yolov4.py b/models/demos/yolov4/web_demo/server/fast_api_yolov4.py
index c64d90a33bf..b8e93e42d19 100755
--- a/models/demos/yolov4/web_demo/server/fast_api_yolov4.py
+++ b/models/demos/yolov4/web_demo/server/fast_api_yolov4.py
@@ -3,6 +3,7 @@
 from io import BytesIO
 from PIL import Image
 from models.demos.yolov4.tests.yolov4_perfomant import Yolov4Trace2CQ
+import ttnn
 
 import cv2
 import numpy as np
@@ -22,16 +23,10 @@ async def root():
 @app.on_event("startup")
 async def startup():
     device_id = 0
-    device = ttnn.CreateDevice(device_id=device_id)
+    device = ttnn.CreateDevice(device_id, l1_small_siz=24576, trace_region_size=1617920, num_command_queues=2)
     global model
     model = Yolov4Trace2CQ()
-    model.initialize_yolov4_trace_2cqs_inference(
-        device,
-        batch_size=1,
-        act_dtype=DataType.BFLOAT16,
-        weight_dtype=DataType.BFLOAT16,
-        model_location_generator=None,
-    )
+    model.initialize_yolov4_trace_2cqs_inference(device)
 
 
 @app.on_event("shutdown")
diff --git a/models/demos/yolov4/web_demo/server/run_uvicorn.sh b/models/demos/yolov4/web_demo/server/run_uvicorn.sh
index 8876da3fa68..b91f8b19e84 100755
--- a/models/demos/yolov4/web_demo/server/run_uvicorn.sh
+++ b/models/demos/yolov4/web_demo/server/run_uvicorn.sh
@@ -1,2 +1,2 @@
 #!/bin/bash
-TT_BACKEND_TIMEOUT=0 /home/dvartanians/Metal/tt-metal/python_env/bin/uvicorn --host 0.0.0.0 --port 7000 fast_api_yolov5:app
+uvicorn --host 0.0.0.0 --port 7000 models.demos.yolov4.web_demo.server.fast_api_yolov4:app

From 57d3a4221e1b428314088591dc3942fc6dcef81f Mon Sep 17 00:00:00 2001
From: Mohamed Bahnas <mbahnas@tenstorrent.com>
Date: Mon, 11 Nov 2024 21:57:01 +0000
Subject: [PATCH 37/69] #0: yolov4 web demo

---
 models/demos/yolov4/web_demo/server/fast_api_yolov4.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/models/demos/yolov4/web_demo/server/fast_api_yolov4.py b/models/demos/yolov4/web_demo/server/fast_api_yolov4.py
index b8e93e42d19..85c61f2db43 100755
--- a/models/demos/yolov4/web_demo/server/fast_api_yolov4.py
+++ b/models/demos/yolov4/web_demo/server/fast_api_yolov4.py
@@ -23,7 +23,7 @@ async def root():
 @app.on_event("startup")
 async def startup():
     device_id = 0
-    device = ttnn.CreateDevice(device_id, l1_small_siz=24576, trace_region_size=1617920, num_command_queues=2)
+    device = ttnn.CreateDevice(device_id, l1_small_size=24576, trace_region_size=1617920, num_command_queues=2)
     global model
     model = Yolov4Trace2CQ()
     model.initialize_yolov4_trace_2cqs_inference(device)

From 68dae5fc9165b135b0166134a363980ad966ac9b Mon Sep 17 00:00:00 2001
From: Dalar Vartanians <dvartanians@tenstorrent.com>
Date: Tue, 12 Nov 2024 00:26:30 +0000
Subject: [PATCH 38/69] #0: wip yolov4 web demo

---
 models/demos/yolov4/tests/yolov4_perfomant.py | 89 ++++++++++++++++++-
 .../yolov4/web_demo/server/fast_api_yolov4.py | 35 +++++++-
 2 files changed, 119 insertions(+), 5 deletions(-)

diff --git a/models/demos/yolov4/tests/yolov4_perfomant.py b/models/demos/yolov4/tests/yolov4_perfomant.py
index c5b35660c0e..8a990e307be 100644
--- a/models/demos/yolov4/tests/yolov4_perfomant.py
+++ b/models/demos/yolov4/tests/yolov4_perfomant.py
@@ -9,6 +9,8 @@
     is_wormhole_b0,
 )
 from models.demos.yolov4.tests.yolov4_test_infra import create_test_infra
+from models.demos.yolov4.demo.demo import YoloLayer
+
 
 try:
     from tracy import signpost
@@ -293,6 +295,22 @@ def initialize_yolov4_trace_2cqs_inference(
         # if use_signpost:
         #    signpost(header="start")
 
+    def get_region_boxes(self, boxes_and_confs):
+        print("Getting boxes from boxes and confs ...")
+        boxes_list = []
+        confs_list = []
+
+        for item in boxes_and_confs:
+            boxes_list.append(item[0])
+            confs_list.append(item[1])
+
+        # boxes: [batch, num1 + num2 + num3, 1, 4]
+        # confs: [batch, num1 + num2 + num3, num_classes]
+        boxes = torch.cat(boxes_list, dim=1)
+        confs = torch.cat(confs_list, dim=1)
+
+        return [boxes, confs]
+
     def execute_yolov4_trace_2cqs_inference(self, tt_inputs_host=None):
         tt_inputs_host = self.tt_inputs_host if tt_inputs_host is None else tt_inputs_host
         ttnn.wait_for_event(1, self.op_event)
@@ -304,7 +322,54 @@ def execute_yolov4_trace_2cqs_inference(self, tt_inputs_host=None):
         ttnn.record_event(0, self.op_event)
         ttnn.execute_trace(self.device, self.tid, cq_id=0, blocking=False)
         ttnn.synchronize_devices(self.device)
-        return self.test_infra.output_tensor
+        output = self.test_infra.output_tensor
+
+        output_tensor1 = ttnn.to_torch(output[0])
+        output_tensor1 = output_tensor1.reshape(1, 40, 40, 255)
+        output_tensor1 = torch.permute(output_tensor1, (0, 3, 1, 2))
+
+        output_tensor2 = ttnn.to_torch(output[1])
+        output_tensor2 = output_tensor2.reshape(1, 20, 20, 255)
+        output_tensor2 = torch.permute(output_tensor2, (0, 3, 1, 2))
+
+        output_tensor3 = ttnn.to_torch(output[2])
+        output_tensor3 = output_tensor3.reshape(1, 10, 10, 255)
+        output_tensor3 = torch.permute(output_tensor3, (0, 3, 1, 2))
+
+        n_classes = 80
+
+        yolo1 = YoloLayer(
+            anchor_mask=[0, 1, 2],
+            num_classes=n_classes,
+            anchors=[12, 16, 19, 36, 40, 28, 36, 75, 76, 55, 72, 146, 142, 110, 192, 243, 459, 401],
+            num_anchors=9,
+            stride=8,
+        )
+
+        yolo2 = YoloLayer(
+            anchor_mask=[3, 4, 5],
+            num_classes=n_classes,
+            anchors=[12, 16, 19, 36, 40, 28, 36, 75, 76, 55, 72, 146, 142, 110, 192, 243, 459, 401],
+            num_anchors=9,
+            stride=16,
+        )
+
+        yolo3 = YoloLayer(
+            anchor_mask=[6, 7, 8],
+            num_classes=n_classes,
+            anchors=[12, 16, 19, 36, 40, 28, 36, 75, 76, 55, 72, 146, 142, 110, 192, 243, 459, 401],
+            num_anchors=9,
+            stride=32,
+        )
+
+        y1 = yolo1(output_tensor1)
+        y2 = yolo2(output_tensor2)
+        y3 = yolo3(output_tensor3)
+
+        output = self.get_region_boxes([y1, y2, y3])
+
+        return output
+        # return self.test_infra.output_tensor
 
         # if use_signpost:
         #    signpost(header="stop")
@@ -316,8 +381,26 @@ def run_traced_inference(self, torch_input_tensor):
         ##
         ## Add more pre-processing
         ##
-        n, c, h, w = torch_input_tensor.shape
-        torch_input_tensor = torch_input_tensor.permute(0, 2, 3, 1)
+        print("\n\n\n")
+        print("type of torch_input_tensor: ", type(torch_input_tensor))
+        print("unszqueeze: ")
+        torch_input_tensor = torch_input_tensor.unsqueeze(0)
+        try:
+            n, h, w, c = torch_input_tensor.shape
+            print("we are inside try")
+            print("n: ", n)
+            print("c: ", c)
+            print("h: ", h)
+            print("w: ", w)
+
+        except:
+            h, w, c = torch_input_tensor.shape
+            n = 1
+            print("n: ", n)
+            print("c: ", c)
+            print("h: ", h)
+            print("w: ", w)
+        # torch_input_tensor = torch_input_tensor.permute(0, 2, 3, 1)
         torch_input_tensor = torch_input_tensor.reshape(1, 1, h * w * n, c)
         tt_inputs_host = ttnn.from_torch(torch_input_tensor, dtype=ttnn.bfloat16, layout=ttnn.ROW_MAJOR_LAYOUT)
         tt_inputs_host = ttnn.pad(tt_inputs_host, [1, 1, n * h * w, 16], [0, 0, 0, 0], 0)
diff --git a/models/demos/yolov4/web_demo/server/fast_api_yolov4.py b/models/demos/yolov4/web_demo/server/fast_api_yolov4.py
index 85c61f2db43..7f6d363bfef 100755
--- a/models/demos/yolov4/web_demo/server/fast_api_yolov4.py
+++ b/models/demos/yolov4/web_demo/server/fast_api_yolov4.py
@@ -7,6 +7,7 @@
 
 import cv2
 import numpy as np
+import torch
 
 app = FastAPI(
     title="YOLOv4 object detection",
@@ -24,6 +25,7 @@ async def root():
 async def startup():
     device_id = 0
     device = ttnn.CreateDevice(device_id, l1_small_size=24576, trace_region_size=1617920, num_command_queues=2)
+    ttnn.enable_program_cache(device)
     global model
     model = Yolov4Trace2CQ()
     model.initialize_yolov4_trace_2cqs_inference(device)
@@ -34,8 +36,37 @@ async def shutdown():
     model.release_yolov4_trace_2cqs_inference()
 
 
+# @app.post("/objdetection_v2")
+# async def objdetection_v2(file: UploadFile = File(...)):
+#    contents = await file.read()
+#    response = model.run_traced_inference(Image.open(BytesIO(contents)))
+#    return json.dumps(response, indent=4)
+#
+#
+#
+
+
+def process_request(output):
+    # Convert all tensors to lists for JSON serialization
+    # output_serializable = {'output': [tensor.tolist() for tensor in output['output']]}
+    output_serializable = {"output": [tensor.tolist() for tensor in output]}
+    return output_serializable
+
+
 @app.post("/objdetection_v2")
 async def objdetection_v2(file: UploadFile = File(...)):
     contents = await file.read()
-    response = model.run_traced_inference(Image.open(BytesIO(contents)))
-    return json.dumps(response, indent=4)
+
+    # Load and convert the image to RGB
+    image = Image.open(BytesIO(contents)).convert("RGB")
+    image = np.array(image)
+    image = torch.from_numpy(image)
+    # Perform object detection
+    # response = model.do_detect(image)
+    response = model.run_traced_inference(image)
+
+    print("response in fastapi is:", response)
+
+    # Convert response tensors to JSON-serializable format
+    output = process_request(response)
+    return output

From f3b42a10301d2f11678ae503f4c54eb4a4428a53 Mon Sep 17 00:00:00 2001
From: Dalar Vartanians <dvartanians@tenstorrent.com>
Date: Tue, 12 Nov 2024 00:32:50 +0000
Subject: [PATCH 39/69] #0: wip yolov4 web demo

---
 models/demos/yolov4/web_demo/client/yolov4.py | 286 +++++++++++++++---
 1 file changed, 237 insertions(+), 49 deletions(-)

diff --git a/models/demos/yolov4/web_demo/client/yolov4.py b/models/demos/yolov4/web_demo/client/yolov4.py
index 9665b9f87c2..be36d2a2109 100644
--- a/models/demos/yolov4/web_demo/client/yolov4.py
+++ b/models/demos/yolov4/web_demo/client/yolov4.py
@@ -6,50 +6,211 @@
 import requests
 import streamlit as st
 from streamlit_webrtc import VideoProcessorBase, webrtc_streamer
+import torch
+import numpy as np
+
+
+from torch import nn
+import cv2
+import time
+import numpy as np
+import math
 
 
 class VideoProcessor(VideoProcessorBase):
     def __init__(self):
-        ...
-
-    def cv2_plot_results(self, bgr_image, selected_classes, prob, boxes):
-        for selected_class, p, [xmin, ymin, xmax, ymax] in zip(selected_classes, prob, boxes):
-            xmin = int(xmin)
-            ymin = int(ymin)
-            xmax = int(xmax)
-            ymax = int(ymax)
-            cv2.rectangle(bgr_image, (xmin, ymin), (xmax, ymax), (45, 200, 200), 2)
-            p = int(p * 100)
-            text = f"{selected_class}:{p}%"
-            font = cv2.FONT_HERSHEY_COMPLEX
-            fontScale = 1
-            fontColor = (255, 255, 255)
-            thickness = 1
-            lineType = 2
-            text_color_bg = (127, 50, 127)
-            text_size, _ = cv2.getTextSize(text, font, fontScale, thickness)
-            text_w, text_h = text_size[0], text_size[1]
-            cv2.rectangle(
-                bgr_image,
-                (xmin - 2, ymin - 2),
-                (xmin + text_w + 2, ymin + text_h + 2),
-                text_color_bg,
-                -1,
-            )
-            cv2.putText(
-                bgr_image,
-                text,
-                (xmin, ymin + text_h),
-                font,
-                fontScale,
-                fontColor,
-                thickness,
-            )
-        return bgr_image
-
-    def transform(self, frame):
+        self.frame_count = 0
+
+    def post_processing(self, img, conf_thresh, nms_thresh, output):
+        # anchors = [12, 16, 19, 36, 40, 28, 36, 75, 76, 55, 72, 146, 142, 110, 192, 243, 459, 401]
+        # num_anchors = 9
+        # anchor_masks = [[0, 1, 2], [3, 4, 5], [6, 7, 8]]
+        # strides = [8, 16, 32]
+        # anchor_step = len(anchors) // num_anchors
+
+        print("in post_processing the output type is: ", type(output))
+        print("in post_processing some of the output[1] is: ", output[1][:5])
+        # [batch, num, 1, 4]
+        box_array = output[0]
+        # [batch, num, num_classes]
+        confs = output[1].float()
+
+        t1 = time.time()
+
+        if type(box_array).__name__ != "ndarray":
+            box_array = box_array.cpu().detach().numpy()
+            confs = confs.cpu().detach().numpy()
+
+        num_classes = confs.shape[2]
+
+        # [batch, num, 4]
+        box_array = box_array[:, :, 0]
+
+        # [batch, num, num_classes] --> [batch, num]
+        max_conf = np.max(confs, axis=2)
+        max_id = np.argmax(confs, axis=2)
+
+        t2 = time.time()
+
+        bboxes_batch = []
+        for i in range(box_array.shape[0]):
+            argwhere = max_conf[i] > conf_thresh
+            l_box_array = box_array[i, argwhere, :]
+            l_max_conf = max_conf[i, argwhere]
+            l_max_id = max_id[i, argwhere]
+
+            bboxes = []
+            # nms for each class
+            for j in range(num_classes):
+                cls_argwhere = l_max_id == j
+                ll_box_array = l_box_array[cls_argwhere, :]
+                ll_max_conf = l_max_conf[cls_argwhere]
+                ll_max_id = l_max_id[cls_argwhere]
+
+                keep = self.nms_cpu(ll_box_array, ll_max_conf, nms_thresh)
+
+                if keep.size > 0:
+                    ll_box_array = ll_box_array[keep, :]
+                    ll_max_conf = ll_max_conf[keep]
+                    ll_max_id = ll_max_id[keep]
+
+                    for k in range(ll_box_array.shape[0]):
+                        bboxes.append(
+                            [
+                                ll_box_array[k, 0],
+                                ll_box_array[k, 1],
+                                ll_box_array[k, 2],
+                                ll_box_array[k, 3],
+                                ll_max_conf[k],
+                                ll_max_conf[k],
+                                ll_max_id[k],
+                            ]
+                        )
+
+            bboxes_batch.append(bboxes)
+
+        t3 = time.time()
+
+        print("-----------------------------------")
+        print("       max and argmax : %f" % (t2 - t1))
+        print("                  nms : %f" % (t3 - t2))
+        print("Post processing total : %f" % (t3 - t1))
+        print("-----------------------------------")
+
+        return bboxes_batch
+
+    def load_class_names(self, namesfile):
+        class_names = []
+        with open(namesfile, "r") as fp:
+            lines = fp.readlines()
+            for line in lines:
+                line = line.rstrip()
+                class_names.append(line)
+        return class_names
+
+    def nms_cpu(self, boxes, confs, nms_thresh=0.5, min_mode=False):
+        x1 = boxes[:, 0]
+        y1 = boxes[:, 1]
+        x2 = boxes[:, 2]
+        y2 = boxes[:, 3]
+
+        areas = (x2 - x1) * (y2 - y1)
+        order = confs.argsort()[::-1]
+
+        keep = []
+        while order.size > 0:
+            idx_self = order[0]
+            idx_other = order[1:]
+
+            keep.append(idx_self)
+
+            xx1 = np.maximum(x1[idx_self], x1[idx_other])
+            yy1 = np.maximum(y1[idx_self], y1[idx_other])
+            xx2 = np.minimum(x2[idx_self], x2[idx_other])
+            yy2 = np.minimum(y2[idx_self], y2[idx_other])
+
+            w = np.maximum(0.0, xx2 - xx1)
+            h = np.maximum(0.0, yy2 - yy1)
+            inter = w * h
+
+            if min_mode:
+                over = inter / np.minimum(areas[order[0]], areas[order[1:]])
+            else:
+                over = inter / (areas[order[0]] + areas[order[1:]] - inter)
+
+            inds = np.where(over <= nms_thresh)[0]
+            order = order[inds + 1]
+
+        return np.array(keep)
+
+    def plot_boxes_cv2(self, bgr_img, boxes, savename=None, class_names=None, color=None):
+        img = np.copy(bgr_img)
+        colors = np.array([[1, 0, 1], [0, 0, 1], [0, 1, 1], [0, 1, 0], [1, 1, 0], [1, 0, 0]], dtype=np.float32)
+
+        def get_color(c, x, max_val):
+            ratio = float(x) / max_val * 5
+            i = int(math.floor(ratio))
+            j = int(math.ceil(ratio))
+            ratio = ratio - i
+            r = (1 - ratio) * colors[i][c] + ratio * colors[j][c]
+            return int(r * 255)
+
+        width = img.shape[1]
+        height = img.shape[0]
+        for i in range(len(boxes)):
+            box = boxes[i]
+            x1 = int(box[0] * width)
+            y1 = int(box[1] * height)
+            x2 = int(box[2] * width)
+            y2 = int(box[3] * height)
+            bbox_thick = int(0.6 * (height + width) / 600)
+            if color:
+                rgb = color
+            else:
+                rgb = (255, 0, 0)
+            if len(box) >= 7 and class_names:
+                cls_conf = box[5]
+                cls_id = box[6]
+                print("%s: %f" % (class_names[cls_id], cls_conf))
+                classes = len(class_names)
+                offset = cls_id * 123457 % classes
+                red = get_color(2, offset, classes)
+                green = get_color(1, offset, classes)
+                blue = get_color(0, offset, classes)
+                if color is None:
+                    rgb = (red, green, blue)
+                msg = str(class_names[cls_id]) + " " + str(round(cls_conf, 3))
+                t_size = cv2.getTextSize(msg, 0, 0.7, thickness=bbox_thick // 2)[0]
+                c1, c2 = (x1, y1), (x2, y2)
+                c3 = (c1[0] + t_size[0], c1[1] - t_size[1] - 3)
+                cv2.rectangle(img, (x1, y1), (int(np.float32(c3[0])), int(np.float32(c3[1]))), rgb, -1)
+                img = cv2.putText(
+                    img,
+                    msg,
+                    (c1[0], int(np.float32(c1[1] - 2))),
+                    cv2.FONT_HERSHEY_SIMPLEX,
+                    0.7,
+                    (0, 0, 0),
+                    bbox_thick // 2,
+                    lineType=cv2.LINE_AA,
+                )
+
+            img = cv2.rectangle(img, (x1, y1), (int(x2), int(y2)), rgb, bbox_thick)
+        if savename:
+            print("save plot results to %s" % savename)
+            cv2.imwrite(savename, img)
+        return img
+
+    # def transform(self, frame):
+    def recv(self, frame):
+        #        self.frame_count += 1
+        #        if self.frame_count % 999 != 0:
+        #            return frame # Skip frame processing
         t0 = time.time()
         pil_image = frame.to_image()
+        # resize on the client side
+        new_size = (320, 320)
+        pil_image = pil_image.resize(new_size)
         t1 = time.time()
         buf = io.BytesIO()
         pil_image.save(buf, format="JPEG")
@@ -62,16 +223,39 @@ def transform(self, frame):
         apiurl = args.api_url
         url = f"{apiurl}/objdetection_v2"
         r = requests.post(url, files=file)
-        data = json.loads(r.content).replace("\n", " ").replace("  ", "")
-        data = json.loads(data)
-        selected_classes, selected_scores, selected_boxes = (
-            data["labels"],
-            data["scores"],
-            data["bboxes"],
-        )
+
+        if r.status_code == 200:
+            try:
+                # Get the JSON response as a dictionary
+                response_dict = r.json()
+                output = [torch.tensor(tensor_data) for tensor_data in response_dict["output"]]
+                print("\n\n\n")
+                # print("response_dict: ", response_dict)
+                print("\n\n\n")
+                # st.write(response_dict)  # Display the dictionary response in Streamlit
+            except ValueError:
+                st.error("Failed to parse JSON. The response is not in JSON format.")
+        else:
+            st.error(f"Request failed with status code {r.status_code}")
+
+        # print("\n\n\n\n\n content in r is: ", r.content)
+        # r = json.loads(r.content).replace("\n", " ").replace("  ", "")
+        # output = json.loads(r.content)
+        # output = response_dict["output"]
         t3 = time.time()
         bgr_image = frame.to_ndarray(format="bgr24")
-        image_final = self.cv2_plot_results(bgr_image, selected_classes, selected_scores, selected_boxes)
+        conf_thresh = 0.6
+        nms_thresh = 0.5
+        boxes = self.post_processing(bgr_image, conf_thresh, nms_thresh, output)
+        namesfile = "coco.names"
+        class_names = self.load_class_names(namesfile)
+        import random
+
+        random_number = random.randint(1, 100)
+
+        save_name = "ttnn_prediction_demo" + str(random_number) + ".jpg"
+        # save_name = None
+        image_final = self.plot_boxes_cv2(bgr_image, boxes[0], save_name, class_names)
         t4 = time.time()
         print()
         print(f" IMG-IN | WH | Post | Total time: ")
@@ -88,9 +272,13 @@ def transform(self, frame):
     video_transformer_factory=VideoProcessor,
     media_stream_constraints={
         "video": {
-            "width": {"min": 640, "ideal": 800, "max": 1920},
-            "height": {"min": 360, "ideal": 450, "max": 900},
-            "frameRate": {"min": 1, "ideal": 20, "max": 40},
+            #            "width": {"min": 640, "ideal": 800, "max": 1920},
+            #            "height": {"min": 360, "ideal": 450, "max": 900},
+            "width": {"min": 320, "ideal": 400, "max": 960},
+            "height": {"min": 180, "ideal": 225, "max": 450},
+            # "frameRate": {"min": 0.01, "ideal": 0.2, "max": 0.4},
+            # "frameRate": {"ideal": 5, "max": 10},
         }
     },
+    async_processing=True,  # Use asynchronous processing for long tasks
 )

From 7f4c679c4851e5a9e40786f3543a1911ab963b5a Mon Sep 17 00:00:00 2001
From: Dalar Vartanians <dvartanians@tenstorrent.com>
Date: Tue, 12 Nov 2024 04:01:21 +0000
Subject: [PATCH 40/69] #0: wip fixed accuracy issue in yolov4 web demo

---
 models/demos/yolov4/tests/yolov4_perfomant.py |  5 ++++-
 .../yolov4/web_demo/server/fast_api_yolov4.py | 19 +++++++++++++++++--
 2 files changed, 21 insertions(+), 3 deletions(-)

diff --git a/models/demos/yolov4/tests/yolov4_perfomant.py b/models/demos/yolov4/tests/yolov4_perfomant.py
index 8a990e307be..54f384fce2f 100644
--- a/models/demos/yolov4/tests/yolov4_perfomant.py
+++ b/models/demos/yolov4/tests/yolov4_perfomant.py
@@ -384,7 +384,7 @@ def run_traced_inference(self, torch_input_tensor):
         print("\n\n\n")
         print("type of torch_input_tensor: ", type(torch_input_tensor))
         print("unszqueeze: ")
-        torch_input_tensor = torch_input_tensor.unsqueeze(0)
+        # torch_input_tensor = torch_input_tensor.unsqueeze(0)
         try:
             n, h, w, c = torch_input_tensor.shape
             print("we are inside try")
@@ -400,7 +400,10 @@ def run_traced_inference(self, torch_input_tensor):
             print("c: ", c)
             print("h: ", h)
             print("w: ", w)
+
+        print("the shape of the input tensor before permute is: ", torch_input_tensor.shape)
         # torch_input_tensor = torch_input_tensor.permute(0, 2, 3, 1)
+        print("the shape of the input tensor after permute is: ", torch_input_tensor.shape)
         torch_input_tensor = torch_input_tensor.reshape(1, 1, h * w * n, c)
         tt_inputs_host = ttnn.from_torch(torch_input_tensor, dtype=ttnn.bfloat16, layout=ttnn.ROW_MAJOR_LAYOUT)
         tt_inputs_host = ttnn.pad(tt_inputs_host, [1, 1, n * h * w, 16], [0, 0, 0, 0], 0)
diff --git a/models/demos/yolov4/web_demo/server/fast_api_yolov4.py b/models/demos/yolov4/web_demo/server/fast_api_yolov4.py
index 7f6d363bfef..21f092c964b 100755
--- a/models/demos/yolov4/web_demo/server/fast_api_yolov4.py
+++ b/models/demos/yolov4/web_demo/server/fast_api_yolov4.py
@@ -8,6 +8,7 @@
 import cv2
 import numpy as np
 import torch
+import time
 
 app = FastAPI(
     title="YOLOv4 object detection",
@@ -60,11 +61,25 @@ async def objdetection_v2(file: UploadFile = File(...)):
     # Load and convert the image to RGB
     image = Image.open(BytesIO(contents)).convert("RGB")
     image = np.array(image)
-    image = torch.from_numpy(image)
+    print("\n\n\n\n the shape of numpy image is: ", image.shape)
+    if type(image) == np.ndarray and len(image.shape) == 3:  # cv2 image
+        print("we are inside len image = 3")
+        # image = torch.from_numpy(image.transpose(2, 0, 1)).float().div(255.0).unsqueeze(0)
+        image = torch.from_numpy(image).float().div(255.0).unsqueeze(0)
+    elif type(image) == np.ndarray and len(image.shape) == 4:
+        print("we are inside len image = 4")
+        # image = torch.from_numpy(image.transpose(0, 3, 1, 2)).float().div(255.0)
+        image = torch.from_numpy(image).float().div(255.0)
+    else:
+        print("unknow image type")
+        exit(-1)
+    # image = torch.from_numpy(image)
     # Perform object detection
     # response = model.do_detect(image)
+    t1 = time.time()
     response = model.run_traced_inference(image)
-
+    t2 = time.time()
+    print("the inference on the sever side took: ", t2 - t1)
     print("response in fastapi is:", response)
 
     # Convert response tensors to JSON-serializable format

From 5da89f655fbd68571f47d0e8da6691cbb6561fca Mon Sep 17 00:00:00 2001
From: Dalar Vartanians <dvartanians@tenstorrent.com>
Date: Tue, 12 Nov 2024 04:53:22 +0000
Subject: [PATCH 41/69] #0: some changes on the client side

---
 models/demos/yolov4/web_demo/client/yolov4.py | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/models/demos/yolov4/web_demo/client/yolov4.py b/models/demos/yolov4/web_demo/client/yolov4.py
index be36d2a2109..4df32cf8961 100644
--- a/models/demos/yolov4/web_demo/client/yolov4.py
+++ b/models/demos/yolov4/web_demo/client/yolov4.py
@@ -272,13 +272,11 @@ def recv(self, frame):
     video_transformer_factory=VideoProcessor,
     media_stream_constraints={
         "video": {
-            #            "width": {"min": 640, "ideal": 800, "max": 1920},
-            #            "height": {"min": 360, "ideal": 450, "max": 900},
             "width": {"min": 320, "ideal": 400, "max": 960},
-            "height": {"min": 180, "ideal": 225, "max": 450},
-            # "frameRate": {"min": 0.01, "ideal": 0.2, "max": 0.4},
-            # "frameRate": {"ideal": 5, "max": 10},
+            # "height": {"min": 180, "ideal": 225, "max": 450},
+            "height": {"min": 320, "ideal": 400, "max": 960},
+            "frameRate": {"min": 1, "ideal": 50, "max": 60},
         }
     },
-    async_processing=True,  # Use asynchronous processing for long tasks
+    # async_processing=True  # Use asynchronous processing for long tasks
 )

From 8b9d4923879e1d3c8a2bc50090a3b3bc0b515e93 Mon Sep 17 00:00:00 2001
From: Dalar Vartanians <dvartanians@tenstorrent.com>
Date: Tue, 12 Nov 2024 05:07:04 +0000
Subject: [PATCH 42/69] #0: some changes on the client side

---
 models/demos/yolov4/web_demo/client/yolov4.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/models/demos/yolov4/web_demo/client/yolov4.py b/models/demos/yolov4/web_demo/client/yolov4.py
index 4df32cf8961..30039f5d825 100644
--- a/models/demos/yolov4/web_demo/client/yolov4.py
+++ b/models/demos/yolov4/web_demo/client/yolov4.py
@@ -8,6 +8,7 @@
 from streamlit_webrtc import VideoProcessorBase, webrtc_streamer
 import torch
 import numpy as np
+import av
 
 
 from torch import nn
@@ -261,7 +262,8 @@ def recv(self, frame):
         print(f" IMG-IN | WH | Post | Total time: ")
         print(f" {(t1-t0):.3f} | {(t3-t1):.3f} | {(t4-t3):.3f} || {(t4-t0):.3f} ")
 
-        return image_final
+        # return image_final
+        return av.VideoFrame.from_ndarray(image_final, format="bgr24")
 
 
 st.sidebar.image("TT.png", use_column_width=True)

From 4ea84aeae32fc0704e7f7e1331fd2281b7da8eda Mon Sep 17 00:00:00 2001
From: Dalar Vartanians <dvartanians@tenstorrent.com>
Date: Tue, 12 Nov 2024 05:43:22 +0000
Subject: [PATCH 43/69] #0: some cleanup

---
 .../yolov4/web_demo/server/fast_api_yolov4.py | 19 -------------------
 1 file changed, 19 deletions(-)

diff --git a/models/demos/yolov4/web_demo/server/fast_api_yolov4.py b/models/demos/yolov4/web_demo/server/fast_api_yolov4.py
index 21f092c964b..6127957883b 100755
--- a/models/demos/yolov4/web_demo/server/fast_api_yolov4.py
+++ b/models/demos/yolov4/web_demo/server/fast_api_yolov4.py
@@ -37,19 +37,8 @@ async def shutdown():
     model.release_yolov4_trace_2cqs_inference()
 
 
-# @app.post("/objdetection_v2")
-# async def objdetection_v2(file: UploadFile = File(...)):
-#    contents = await file.read()
-#    response = model.run_traced_inference(Image.open(BytesIO(contents)))
-#    return json.dumps(response, indent=4)
-#
-#
-#
-
-
 def process_request(output):
     # Convert all tensors to lists for JSON serialization
-    # output_serializable = {'output': [tensor.tolist() for tensor in output['output']]}
     output_serializable = {"output": [tensor.tolist() for tensor in output]}
     return output_serializable
 
@@ -61,21 +50,13 @@ async def objdetection_v2(file: UploadFile = File(...)):
     # Load and convert the image to RGB
     image = Image.open(BytesIO(contents)).convert("RGB")
     image = np.array(image)
-    print("\n\n\n\n the shape of numpy image is: ", image.shape)
     if type(image) == np.ndarray and len(image.shape) == 3:  # cv2 image
-        print("we are inside len image = 3")
-        # image = torch.from_numpy(image.transpose(2, 0, 1)).float().div(255.0).unsqueeze(0)
         image = torch.from_numpy(image).float().div(255.0).unsqueeze(0)
     elif type(image) == np.ndarray and len(image.shape) == 4:
-        print("we are inside len image = 4")
-        # image = torch.from_numpy(image.transpose(0, 3, 1, 2)).float().div(255.0)
         image = torch.from_numpy(image).float().div(255.0)
     else:
         print("unknow image type")
         exit(-1)
-    # image = torch.from_numpy(image)
-    # Perform object detection
-    # response = model.do_detect(image)
     t1 = time.time()
     response = model.run_traced_inference(image)
     t2 = time.time()

From 741b5cce3db2ca45ea1e12f050e0f0573b34a98a Mon Sep 17 00:00:00 2001
From: Dalar Vartanians <dvartanians@tenstorrent.com>
Date: Tue, 12 Nov 2024 05:54:30 +0000
Subject: [PATCH 44/69] #0: some cleanup

---
 models/demos/yolov4/tests/yolov4_perfomant.py | 27 +------------------
 1 file changed, 1 insertion(+), 26 deletions(-)

diff --git a/models/demos/yolov4/tests/yolov4_perfomant.py b/models/demos/yolov4/tests/yolov4_perfomant.py
index 54f384fce2f..75a479c0801 100644
--- a/models/demos/yolov4/tests/yolov4_perfomant.py
+++ b/models/demos/yolov4/tests/yolov4_perfomant.py
@@ -378,32 +378,7 @@ def release_yolov4_trace_2cqs_inference(self):
         ttnn.release_trace(self.device, self.tid)
 
     def run_traced_inference(self, torch_input_tensor):
-        ##
-        ## Add more pre-processing
-        ##
-        print("\n\n\n")
-        print("type of torch_input_tensor: ", type(torch_input_tensor))
-        print("unszqueeze: ")
-        # torch_input_tensor = torch_input_tensor.unsqueeze(0)
-        try:
-            n, h, w, c = torch_input_tensor.shape
-            print("we are inside try")
-            print("n: ", n)
-            print("c: ", c)
-            print("h: ", h)
-            print("w: ", w)
-
-        except:
-            h, w, c = torch_input_tensor.shape
-            n = 1
-            print("n: ", n)
-            print("c: ", c)
-            print("h: ", h)
-            print("w: ", w)
-
-        print("the shape of the input tensor before permute is: ", torch_input_tensor.shape)
-        # torch_input_tensor = torch_input_tensor.permute(0, 2, 3, 1)
-        print("the shape of the input tensor after permute is: ", torch_input_tensor.shape)
+        n, h, w, c = torch_input_tensor.shape
         torch_input_tensor = torch_input_tensor.reshape(1, 1, h * w * n, c)
         tt_inputs_host = ttnn.from_torch(torch_input_tensor, dtype=ttnn.bfloat16, layout=ttnn.ROW_MAJOR_LAYOUT)
         tt_inputs_host = ttnn.pad(tt_inputs_host, [1, 1, n * h * w, 16], [0, 0, 0, 0], 0)

From 59662905ac5845a3328981b77d0527de073fc52d Mon Sep 17 00:00:00 2001
From: Dalar Vartanians <dvartanians@tenstorrent.com>
Date: Tue, 12 Nov 2024 06:06:31 +0000
Subject: [PATCH 45/69] #0: some cleanup on clinet and sever side

---
 models/demos/yolov4/web_demo/client/yolov4.py | 22 -------------------
 .../yolov4/web_demo/server/fast_api_yolov4.py |  1 -
 2 files changed, 23 deletions(-)

diff --git a/models/demos/yolov4/web_demo/client/yolov4.py b/models/demos/yolov4/web_demo/client/yolov4.py
index 30039f5d825..7f70e9acc0d 100644
--- a/models/demos/yolov4/web_demo/client/yolov4.py
+++ b/models/demos/yolov4/web_demo/client/yolov4.py
@@ -23,17 +23,7 @@ def __init__(self):
         self.frame_count = 0
 
     def post_processing(self, img, conf_thresh, nms_thresh, output):
-        # anchors = [12, 16, 19, 36, 40, 28, 36, 75, 76, 55, 72, 146, 142, 110, 192, 243, 459, 401]
-        # num_anchors = 9
-        # anchor_masks = [[0, 1, 2], [3, 4, 5], [6, 7, 8]]
-        # strides = [8, 16, 32]
-        # anchor_step = len(anchors) // num_anchors
-
-        print("in post_processing the output type is: ", type(output))
-        print("in post_processing some of the output[1] is: ", output[1][:5])
-        # [batch, num, 1, 4]
         box_array = output[0]
-        # [batch, num, num_classes]
         confs = output[1].float()
 
         t1 = time.time()
@@ -202,11 +192,7 @@ def get_color(c, x, max_val):
             cv2.imwrite(savename, img)
         return img
 
-    # def transform(self, frame):
     def recv(self, frame):
-        #        self.frame_count += 1
-        #        if self.frame_count % 999 != 0:
-        #            return frame # Skip frame processing
         t0 = time.time()
         pil_image = frame.to_image()
         # resize on the client side
@@ -230,19 +216,11 @@ def recv(self, frame):
                 # Get the JSON response as a dictionary
                 response_dict = r.json()
                 output = [torch.tensor(tensor_data) for tensor_data in response_dict["output"]]
-                print("\n\n\n")
-                # print("response_dict: ", response_dict)
-                print("\n\n\n")
-                # st.write(response_dict)  # Display the dictionary response in Streamlit
             except ValueError:
                 st.error("Failed to parse JSON. The response is not in JSON format.")
         else:
             st.error(f"Request failed with status code {r.status_code}")
 
-        # print("\n\n\n\n\n content in r is: ", r.content)
-        # r = json.loads(r.content).replace("\n", " ").replace("  ", "")
-        # output = json.loads(r.content)
-        # output = response_dict["output"]
         t3 = time.time()
         bgr_image = frame.to_ndarray(format="bgr24")
         conf_thresh = 0.6
diff --git a/models/demos/yolov4/web_demo/server/fast_api_yolov4.py b/models/demos/yolov4/web_demo/server/fast_api_yolov4.py
index 6127957883b..a950d0316a2 100755
--- a/models/demos/yolov4/web_demo/server/fast_api_yolov4.py
+++ b/models/demos/yolov4/web_demo/server/fast_api_yolov4.py
@@ -61,7 +61,6 @@ async def objdetection_v2(file: UploadFile = File(...)):
     response = model.run_traced_inference(image)
     t2 = time.time()
     print("the inference on the sever side took: ", t2 - t1)
-    print("response in fastapi is:", response)
 
     # Convert response tensors to JSON-serializable format
     output = process_request(response)

From 34cbf07041a63f5e41c07b0cd4c4b6892fd72b1e Mon Sep 17 00:00:00 2001
From: Dalar Vartanians <dvartanians@tenstorrent.com>
Date: Tue, 12 Nov 2024 06:14:38 +0000
Subject: [PATCH 46/69] #0: remove segformer files

---
 .../demo/test_segformer_performant.py         |  97 -----
 .../segformer/tests/segformer_perfomant.py    | 335 ------------------
 .../segformer/tests/segformer_test_infra.py   | 226 ------------
 3 files changed, 658 deletions(-)
 delete mode 100644 models/demos/segformer/demo/test_segformer_performant.py
 delete mode 100644 models/demos/segformer/tests/segformer_perfomant.py
 delete mode 100644 models/demos/segformer/tests/segformer_test_infra.py

diff --git a/models/demos/segformer/demo/test_segformer_performant.py b/models/demos/segformer/demo/test_segformer_performant.py
deleted file mode 100644
index e982e663d2e..00000000000
--- a/models/demos/segformer/demo/test_segformer_performant.py
+++ /dev/null
@@ -1,97 +0,0 @@
-# SPDX-FileCopyrightText: © 2024 Tenstorrent Inc.
-
-# SPDX-License-Identifier: Apache-2.0
-
-import pytest
-import ttnn
-import time
-import torch
-
-from models.utility_functions import run_for_wormhole_b0
-from models.demos.segformer.tests.segformer_perfomant import (
-    run_segformer_inference,
-    run_segformer_trace_inference,
-    run_segformer_trace_2cqs_inference,
-    SegformerTrace2CQ,
-)
-
-
-@run_for_wormhole_b0()
-@pytest.mark.parametrize("device_params", [{"l1_small_size": 24576}], indirect=True)
-@pytest.mark.parametrize(
-    "batch_size, act_dtype, weight_dtype",
-    ((1, ttnn.bfloat16, ttnn.bfloat16),),
-)
-def test_run_segformer_inference(
-    device, use_program_cache, batch_size, act_dtype, weight_dtype, model_location_generator
-):
-    run_segformer_inference(device, batch_size, act_dtype, weight_dtype, model_location_generator)
-
-
-@run_for_wormhole_b0()
-@pytest.mark.parametrize("device_params", [{"l1_small_size": 24576, "trace_region_size": 1617920}], indirect=True)
-@pytest.mark.parametrize(
-    "batch_size, act_dtype, weight_dtype",
-    ((1, ttnn.bfloat16, ttnn.bfloat16),),
-)
-@pytest.mark.parametrize("enable_async_mode", (False, True), indirect=True)
-def test_run_segformer_trace_inference(
-    device,
-    use_program_cache,
-    batch_size,
-    act_dtype,
-    weight_dtype,
-    enable_async_mode,
-    model_location_generator,
-):
-    run_segformer_trace_inference(
-        device,
-        batch_size,
-        act_dtype,
-        weight_dtype,
-        model_location_generator,
-    )
-
-
-@run_for_wormhole_b0()
-@pytest.mark.parametrize(
-    "device_params", [{"l1_small_size": 24576, "trace_region_size": 1617920, "num_command_queues": 2}], indirect=True
-)
-@pytest.mark.parametrize(
-    "batch_size, act_dtype, weight_dtype",
-    ((1, ttnn.bfloat16, ttnn.bfloat16),),
-)
-@pytest.mark.parametrize("enable_async_mode", (False, True), indirect=True)
-def test_run_segformer_trace_2cqs_inference(
-    device,
-    use_program_cache,
-    batch_size,
-    act_dtype,
-    weight_dtype,
-    enable_async_mode,
-    model_location_generator,
-):
-    segformer_trac2_2cq = SegformerTrace2CQ()
-
-    segformer_trac2_2cq.initialize_segformer_trace_2cqs_inference(
-        device,
-        batch_size,
-        act_dtype,
-        weight_dtype,
-        model_location_generator=None,
-    )
-    for iter in range(0, 10):
-        input_shape = (1, 3, 320, 320)
-        torch_input_tensor = torch.randn(input_shape, dtype=torch.float32)
-        n, c, h, w = torch_input_tensor.shape
-        torch_input_tensor = torch_input_tensor.permute(0, 2, 3, 1)
-        # torch_input_tensor = torch_input_tensor.reshape(1, 1, h * w * n, c)
-        tt_inputs_host = ttnn.from_torch(torch_input_tensor, dtype=ttnn.bfloat16, layout=ttnn.ROW_MAJOR_LAYOUT)
-        # tt_inputs_host = ttnn.pad(tt_inputs_host, [1, 1, n * h * w, 16], [0, 0, 0, 0], 0)
-
-        t0 = time.time()
-        output = segformer_trac2_2cq.execute_segformer_trace_2cqs_inference(tt_inputs_host)
-        t1 = time.time()
-        print("TIME", t1 - t0)
-
-    segformer_trac2_2cq.release_segformer_trace_2cqs_inference()
diff --git a/models/demos/segformer/tests/segformer_perfomant.py b/models/demos/segformer/tests/segformer_perfomant.py
deleted file mode 100644
index 01d9950285b..00000000000
--- a/models/demos/segformer/tests/segformer_perfomant.py
+++ /dev/null
@@ -1,335 +0,0 @@
-# SPDX-FileCopyrightText: © 2024 Tenstorrent Inc.
-
-# SPDX-License-Identifier: Apache-2.0
-
-import pytest
-import torch
-import ttnn
-from models.utility_functions import (
-    is_wormhole_b0,
-)
-from models.demos.segformer.tests.segformer_test_infra import create_test_infra
-
-try:
-    from tracy import signpost
-
-    use_signpost = True
-except ModuleNotFoundError:
-    use_signpost = False
-
-
-def buffer_address(tensor):
-    addr = []
-    for ten in ttnn.get_device_tensors(tensor):
-        addr.append(ten.buffer_address())
-    return addr
-
-
-# TODO: Create ttnn apis for this
-ttnn.buffer_address = buffer_address
-
-
-def run_segformer_inference(
-    device,
-    device_batch_size,
-    act_dtype,
-    weight_dtype,
-    model_location_generator,
-):
-    test_infra = create_test_infra(
-        device,
-        device_batch_size,
-        act_dtype,
-        weight_dtype,
-        model_location_generator=model_location_generator,
-    )
-
-    tt_inputs_host, self.input_mem_config = test_infra.setup_l1_sharded_input(device)
-
-    # # First run configures convs JIT
-    test_infra.input_tensor = tt_inputs_host.to(device, self.input_mem_config)
-    test_infra.run()
-    test_infra.validate()
-    test_infra.dealloc_output()
-
-    # Optimized run
-    test_infra.input_tensor = tt_inputs_host.to(device, self.input_mem_config)
-    test_infra.run()
-    test_infra.validate()
-    test_infra.dealloc_output()
-
-    # More optimized run with caching
-    if use_signpost:
-        signpost(header="start")
-    test_infra.input_tensor = tt_inputs_host.to(device, self.input_mem_config)
-    test_infra.run()
-    if use_signpost:
-        signpost(header="stop")
-    test_infra.validate()
-    test_infra.dealloc_output()
-
-
-def run_segformer_trace_inference(
-    device,
-    device_batch_size,
-    act_dtype,
-    weight_dtype,
-    model_location_generator,
-):
-    test_infra = create_test_infra(
-        device,
-        device_batch_size,
-        act_dtype,
-        weight_dtype,
-        model_location_generator=model_location_generator,
-    )
-    tt_inputs_host, self.input_mem_config = test_infra.setup_l1_sharded_input(device)
-
-    # First run configures convs JIT
-    test_infra.input_tensor = tt_inputs_host.to(device, self.input_mem_config)
-    shape = test_infra.input_tensor.shape
-    dtype = test_infra.input_tensor.dtype
-    layout = test_infra.input_tensor.layout
-    test_infra.run()
-    test_infra.validate()
-    test_infra.dealloc_output()
-
-    # Optimized run
-    test_infra.input_tensor = tt_inputs_host.to(device, self.input_mem_config)
-    test_infra.run()
-    test_infra.validate()
-
-    # Capture
-    test_infra.input_tensor = tt_inputs_host.to(device, self.input_mem_config)
-    test_infra.dealloc_output()
-    trace_input_addr = ttnn.buffer_address(test_infra.input_tensor)
-    self.tid = ttnn.begin_trace_capture(device, cq_id=0)
-    test_infra.run()
-    tt_image_res = ttnn.allocate_tensor_on_device(
-        shape,
-        dtype,
-        layout,
-        device,
-        self.input_mem_config,
-    )
-    ttnn.end_trace_capture(device, self.tid, cq_id=0)
-    assert trace_input_addr == ttnn.buffer_address(tt_image_res)
-
-    # More optimized run with caching
-    if use_signpost:
-        signpost(header="start")
-    ttnn.copy_host_to_device_tensor(tt_inputs_host, tt_image_res, 0)
-    ttnn.execute_trace(device, self.tid, cq_id=0, blocking=True)
-    if use_signpost:
-        signpost(header="stop")
-    test_infra.validate()
-
-    ttnn.release_trace(device, self.tid)
-    test_infra.dealloc_output()
-
-
-def run_segformer_trace_2cqs_inference(
-    device,
-    device_batch_size,
-    act_dtype,
-    weight_dtype,
-    model_location_generator,
-):
-    test_infra = create_test_infra(
-        device,
-        device_batch_size,
-        act_dtype,
-        weight_dtype,
-        model_location_generator=model_location_generator,
-    )
-    tt_inputs_host, sharded_mem_config_DRAM, self.input_mem_config = test_infra.setup_dram_sharded_input(device)
-    tt_image_res = tt_inputs_host.to(device, sharded_mem_config_DRAM)
-    op_event = ttnn.create_event(device)
-    write_event = ttnn.create_event(device)
-    # Initialize the op event so we can write
-    ttnn.record_event(0, op_event)
-
-    # First run configures convs JIT
-    ttnn.wait_for_event(1, op_event)
-    ttnn.copy_host_to_device_tensor(tt_inputs_host, tt_image_res, 1)
-    ttnn.record_event(1, write_event)
-    ttnn.wait_for_event(0, write_event)
-    test_infra.input_tensor = ttnn.to_memory_config(tt_image_res, self.input_mem_config)
-    shape = test_infra.input_tensor.shape
-    dtype = test_infra.input_tensor.dtype
-    layout = test_infra.input_tensor.layout
-    ttnn.record_event(0, op_event)
-    test_infra.run()
-    test_infra.validate()
-    test_infra.dealloc_output()
-
-    print("2")
-
-    # Optimized run
-    ttnn.wait_for_event(1, op_event)
-    ttnn.copy_host_to_device_tensor(tt_inputs_host, tt_image_res, 1)
-    ttnn.record_event(1, write_event)
-    ttnn.wait_for_event(0, write_event)
-    test_infra.input_tensor = ttnn.to_memory_config(tt_image_res, self.input_mem_config)
-    ttnn.record_event(0, op_event)
-    test_infra.run()
-    test_infra.validate()
-    test_infra.dealloc_output()
-
-    print("3")
-
-    # Capture
-    ttnn.wait_for_event(1, op_event)
-    ttnn.copy_host_to_device_tensor(tt_inputs_host, tt_image_res, 1)
-    ttnn.record_event(1, write_event)
-    ttnn.wait_for_event(0, write_event)
-    test_infra.input_tensor = ttnn.to_memory_config(tt_image_res, self.input_mem_config)
-    ttnn.record_event(0, op_event)
-    test_infra.dealloc_output()
-    trace_input_addr = ttnn.buffer_address(test_infra.input_tensor)
-    self.tid = ttnn.begin_trace_capture(device, cq_id=0)
-    test_infra.run()
-    self.input_tensor = ttnn.allocate_tensor_on_device(
-        shape,
-        dtype,
-        layout,
-        device,
-        self.input_mem_config,
-    )
-    ttnn.end_trace_capture(device, self.tid, cq_id=0)
-    assert trace_input_addr == ttnn.buffer_address(self.input_tensor)
-
-    print("4")
-
-    # More optimized run with caching
-    if use_signpost:
-        signpost(header="start")
-    for iter in range(0, 2):
-        ttnn.wait_for_event(1, op_event)
-        ttnn.copy_host_to_device_tensor(tt_inputs_host, tt_image_res, 1)
-        ttnn.record_event(1, write_event)
-        ttnn.wait_for_event(0, write_event)
-        # TODO: Add in place support to ttnn to_memory_config
-        # self.input_tensor = ttnn.reshard(tt_image_res, self.input_mem_config, self.input_tensor)
-        self.input_tensor = ttnn.to_memory_config(tt_image_res, self.input_mem_config)
-        ttnn.record_event(0, op_event)
-        ttnn.execute_trace(device, self.tid, cq_id=0, blocking=False)
-    ttnn.synchronize_devices(device)
-
-    if use_signpost:
-        signpost(header="stop")
-
-    ttnn.release_trace(device, self.tid)
-
-
-class SegformerTrace2CQ:
-    def __init__(self):
-        ...
-
-    def initialize_segformer_trace_2cqs_inference(
-        self,
-        device,
-        device_batch_size,
-        act_dtype,
-        weight_dtype,
-        model_location_generator,
-    ):
-        self.test_infra = create_test_infra(
-            device,
-            device_batch_size,
-            act_dtype,
-            weight_dtype,
-            model_location_generator=model_location_generator,
-        )
-        self.tt_inputs_host, sharded_mem_config_DRAM, self.input_mem_config = self.test_infra.setup_dram_sharded_input(
-            device
-        )
-        self.tt_image_res = self.tt_inputs_host.to(device, sharded_mem_config_DRAM)
-        self.op_event = ttnn.create_event(device)
-        self.write_event = ttnn.create_event(device)
-        # Initialize the op event so we can write
-        ttnn.record_event(0, self.op_event)
-
-        # First run configures convs JIT
-        ttnn.wait_for_event(1, self.op_event)
-        ttnn.copy_host_to_device_tensor(self.tt_inputs_host, self.tt_image_res, 1)
-        ttnn.record_event(1, self.write_event)
-        ttnn.wait_for_event(0, self.write_event)
-        self.test_infra.input_tensor = ttnn.to_memory_config(self.tt_image_res, self.input_mem_config)
-        shape = self.test_infra.input_tensor.shape
-        dtype = self.test_infra.input_tensor.dtype
-        layout = self.test_infra.input_tensor.layout
-        ttnn.record_event(0, self.op_event)
-        self.test_infra.run()
-        self.test_infra.validate()
-        self.test_infra.dealloc_output()
-
-        # Optimized run
-        ttnn.wait_for_event(1, self.op_event)
-        ttnn.copy_host_to_device_tensor(self.tt_inputs_host, self.tt_image_res, 1)
-        ttnn.record_event(1, self.write_event)
-        ttnn.wait_for_event(0, self.write_event)
-        self.test_infra.input_tensor = ttnn.to_memory_config(self.tt_image_res, self.input_mem_config)
-        ttnn.record_event(0, self.op_event)
-        self.test_infra.run()
-        self.test_infra.validate()
-
-        # Capture
-        ttnn.wait_for_event(1, self.op_event)
-        ttnn.copy_host_to_device_tensor(self.tt_inputs_host, self.tt_image_res, 1)
-        ttnn.record_event(1, self.write_event)
-        ttnn.wait_for_event(0, self.write_event)
-        self.test_infra.input_tensor = ttnn.to_memory_config(self.tt_image_res, self.input_mem_config)
-        ttnn.record_event(0, self.op_event)
-        self.test_infra.dealloc_output()
-        trace_input_addr = ttnn.buffer_address(self.test_infra.input_tensor)
-        self.tid = ttnn.begin_trace_capture(device, cq_id=0)
-        self.test_infra.run()
-        self.input_tensor = ttnn.allocate_tensor_on_device(
-            shape,
-            dtype,
-            layout,
-            device,
-            self.input_mem_config,
-        )
-        ttnn.end_trace_capture(device, self.tid, cq_id=0)
-        assert trace_input_addr == ttnn.buffer_address(self.input_tensor)
-
-        self.device = device
-
-        print("4")
-
-        # More optimized run with caching
-        # if use_signpost:
-        #    signpost(header="start")
-
-    def execute_segformer_trace_2cqs_inference(self, tt_inputs_host=None):
-        tt_inputs_host = self.tt_inputs_host if tt_inputs_host is None else tt_inputs_host
-        ttnn.wait_for_event(1, self.op_event)
-        ttnn.copy_host_to_device_tensor(tt_inputs_host, self.tt_image_res, 1)
-        ttnn.record_event(1, self.write_event)
-        ttnn.wait_for_event(0, self.write_event)
-        # TODO: Add in place support to ttnn to_memory_config
-        # self.input_tensor = ttnn.reshard(self.tt_image_res, self.input_mem_config, self.input_tensor)
-        self.input_tensor = ttnn.to_memory_config(tt_image_res, self.input_mem_config)
-        ttnn.record_event(0, self.op_event)
-        ttnn.execute_trace(self.device, self.tid, cq_id=0, blocking=False)
-        ttnn.synchronize_devices(self.device)
-        return self.test_infra.output_tensor
-
-        # if use_signpost:
-        #    signpost(header="stop")
-
-    def release_segformer_trace_2cqs_inference(self):
-        ttnn.release_trace(self.device, self.tid)
-
-    def run_traced_inference(self, torch_input_tensor):
-        ##
-        ## Add more pre-processing
-        ##
-        n, c, h, w = torch_input_tensor.shape
-        torch_input_tensor = torch_input_tensor.permute(0, 2, 3, 1)
-        torch_input_tensor = torch_input_tensor.reshape(1, 1, h * w * n, c)
-        tt_inputs_host = ttnn.from_torch(torch_input_tensor, dtype=ttnn.bfloat16, layout=ttnn.ROW_MAJOR_LAYOUT)
-        tt_inputs_host = ttnn.pad(tt_inputs_host, [1, 1, n * h * w, 16], [0, 0, 0, 0], 0)
-        return self.execute_segformer_trace_2cqs_inference(tt_inputs_host)
diff --git a/models/demos/segformer/tests/segformer_test_infra.py b/models/demos/segformer/tests/segformer_test_infra.py
deleted file mode 100644
index cc8224dd327..00000000000
--- a/models/demos/segformer/tests/segformer_test_infra.py
+++ /dev/null
@@ -1,226 +0,0 @@
-# SPDX-FileCopyrightText: © 2024 Tenstorrent Inc.
-
-# SPDX-License-Identifier: Apache-2.0
-
-from loguru import logger
-import os
-import pytest
-import torch
-import torchvision
-from PIL import Image
-import requests
-import math
-from tests.ttnn.utils_for_testing import assert_with_pcc
-from ttnn.model_preprocessing import preprocess_model_parameters, ParameterDict, ParameterList
-import ttnn
-
-from models.demos.segformer.tt.ttnn_segformer_for_semantic_segmentation import (
-    TtSegformerForSemanticSegmentation,
-)
-from transformers import SegformerForSemanticSegmentation, SegformerImageProcessor
-from models.demos.segformer.reference.segformer_for_semantic_segmentation import (
-    SegformerForSemanticSegmentationReference,
-)
-from tests.ttnn.integration_tests.segformer.test_segformer_model import (
-    create_custom_preprocessor as create_custom_preprocessor_model,
-)
-from tests.ttnn.integration_tests.segformer.test_segformer_decode_head import (
-    create_custom_preprocessor as create_custom_preprocessor_deocde_head,
-)
-from models.utility_functions import skip_for_grayskull
-
-from models.utility_functions import (
-    is_wormhole_b0,
-    is_grayskull,
-    divup,
-)
-
-
-def create_custom_preprocessor(device):
-    def custom_preprocessor(model, name, ttnn_module_args):
-        parameters = {}
-        if isinstance(model, SegformerForSemanticSegmentationReference):
-            parameters["segformer"] = {}
-            segformer_preprocess = create_custom_preprocessor_model(device)
-            parameters["segformer"] = segformer_preprocess(model.segformer, None, None)
-            parameters["decode_head"] = {}
-            deocde_preprocess = create_custom_preprocessor_deocde_head(device)
-            parameters["decode_head"] = deocde_preprocess(model.decode_head, None, None)
-
-        return parameters
-
-    return custom_preprocessor
-
-
-def move_to_device(object, device):
-    if isinstance(object, ParameterDict):
-        for name, value in list(object.items()):
-            if name in ["sr", "proj", "dwconv", "linear_fuse", "classifier"]:
-                continue
-            object[name] = move_to_device(value, device)
-        return object
-    elif isinstance(object, ParameterList):
-        for index, element in enumerate(object):
-            object[index] = move_to_device(element, device)
-        return object
-    elif isinstance(object, ttnn.Tensor):
-        return ttnn.to_device(object, device)
-    else:
-        return object
-
-
-def load_segformer_torch_model(device, model_location_generator=None):
-    torch_model = SegformerForSemanticSegmentation.from_pretrained("nvidia/segformer-b0-finetuned-ade-512-512")
-    config = torch_model.config
-    reference_model = SegformerForSemanticSegmentationReference(config=config)
-    state_dict = torch_model.state_dict()
-    new_state_dict = {}
-    keys = [name for name, parameter in reference_model.state_dict().items()]
-    values = [parameter for name, parameter in state_dict.items()]
-    for i in range(len(keys)):
-        new_state_dict[keys[i]] = values[i]
-
-    reference_model.load_state_dict(new_state_dict)
-    reference_model.eval()
-
-    parameters = preprocess_model_parameters(
-        initialize_model=lambda: reference_model, custom_preprocessor=create_custom_preprocessor(device), device=None
-    )
-    parameters = move_to_device(parameters, device)
-
-    for i in range(4):
-        parameters["decode_head"]["linear_c"][i]["proj"]["weight"] = ttnn.to_device(
-            parameters["decode_head"]["linear_c"][i]["proj"]["weight"], device=device
-        )
-        parameters["decode_head"]["linear_c"][i]["proj"]["bias"] = ttnn.to_device(
-            parameters["decode_head"]["linear_c"][i]["proj"]["bias"], device=device
-        )
-
-    return reference_model, config, parameters
-
-
-class SegformerTestInfra:
-    def __init__(
-        self,
-        device,
-        batch_size,
-        act_dtype,
-        weight_dtype,
-        model_location_generator=None,
-    ):
-        super().__init__()
-        torch.manual_seed(0)
-        self.pcc_passed = False
-        self.pcc_message = "Did you forget to call validate()?"
-        self.device = device
-        self.batch_size = batch_size
-        self.act_dtype = act_dtype
-        self.weight_dtype = weight_dtype
-        self.model_location_generator = model_location_generator
-        reference_model, config, self.parameters = load_segformer_torch_model(device)
-        self.ttnn_segformer_model = TtSegformerForSemanticSegmentation(config, self.parameters)
-
-        processor = SegformerImageProcessor.from_pretrained("nvidia/segformer-b0-finetuned-ade-512-512")
-        url = "http://images.cocodataset.org/val2017/000000039769.jpg"
-        image = Image.open(requests.get(url, stream=True).raw)
-        self.inputs = processor(images=image, return_tensors="pt")
-        self.torch_output_tensor = reference_model(self.inputs.pixel_values)
-
-    def run(self):
-        self.output_tensor = self.ttnn_segformer_model(
-            self.input_tensor,
-            output_attentions=None,
-            output_hidden_states=None,
-            return_dict=None,
-            parameters=self.parameters,
-        )
-
-    def setup_l1_sharded_input(self, device, torch_input_tensor=None):
-        if is_wormhole_b0():
-            core_grid = ttnn.CoreGrid(y=8, x=8)
-        else:
-            exit("Unsupported device")
-        num_devices = 1 if isinstance(device, ttnn.Device) else device.get_num_devices()
-
-        """
-        # torch tensor
-        torch_input_tensor = self.torch_input_tensor if torch_input_tensor is None else torch_input_tensor
-        n, c, h, w = torch_input_tensor.shape
-        # sharded mem config for fold input
-        num_cores = core_grid.x * core_grid.y
-        shard_h = (n * w * h + num_cores - 1) // num_cores
-        grid_size = core_grid
-        grid_coord = ttnn.CoreCoord(grid_size.x - 1, grid_size.y - 1)
-        shard_grid = ttnn.CoreRangeSet({ttnn.CoreRange(ttnn.CoreCoord(0, 0), grid_coord)})
-        shard_spec = ttnn.ShardSpec(shard_grid, (shard_h, 16), ttnn.ShardOrientation.ROW_MAJOR, False)
-        input_mem_config = ttnn.MemoryConfig(
-            ttnn.types.TensorMemoryLayout.HEIGHT_SHARDED, ttnn.types.BufferType.L1, shard_spec
-        )
-        """
-
-        torch_input_tensor_permuted = torch.permute(self.inputs.pixel_values, (0, 2, 3, 1))
-        # tt_inputs_host = ttnn.from_torch(
-        #     torch_input_tensor_permuted,
-        #     dtype=ttnn.bfloat16,
-        #     memory_config=ttnn.L1_MEMORY_CONFIG,
-        #     device=device,
-        #     layout=ttnn.TILE_LAYOUT,
-        # )
-        tt_inputs_host = ttnn.from_torch(torch_input_tensor_permuted, dtype=ttnn.bfloat16, layout=ttnn.TILE_LAYOUT)
-        input_mem_config = ttnn.DRAM_MEMORY_CONFIG
-
-        return tt_inputs_host, input_mem_config
-
-    def setup_dram_sharded_input(self, device, torch_input_tensor=None, mesh_mapper=None, mesh_composer=None):
-        tt_inputs_host, input_mem_config = self.setup_l1_sharded_input(device)
-        dram_grid_size = device.dram_grid_size()
-        dram_shard_spec = ttnn.ShardSpec(
-            ttnn.CoreRangeSet(
-                {ttnn.CoreRange(ttnn.CoreCoord(0, 0), ttnn.CoreCoord(dram_grid_size.x - 1, dram_grid_size.y - 1))}
-            ),
-            [
-                divup(tt_inputs_host.volume() // tt_inputs_host.shape[-1], (dram_grid_size.x * dram_grid_size.y)),
-                16,
-            ],
-            ttnn.ShardOrientation.ROW_MAJOR,
-            False,
-        )
-        sharded_mem_config_DRAM = ttnn.MemoryConfig(
-            ttnn.TensorMemoryLayout.HEIGHT_SHARDED, ttnn.BufferType.DRAM, dram_shard_spec
-        )
-        sharded_mem_config_DRAM = ttnn.DRAM_MEMORY_CONFIG
-
-        return tt_inputs_host, sharded_mem_config_DRAM, input_mem_config
-
-    def validate(self, output_tensor=None):
-        output_tensor = self.output_tensor if output_tensor is None else output_tensor
-        output_tensor = ttnn.to_torch(self.output_tensor.logits)
-        output_tensor = torch.permute(output_tensor, (0, 3, 1, 2))
-        h = w = int(math.sqrt(output_tensor.shape[-1]))
-        final_output_tensor = torch.reshape(output_tensor, (output_tensor.shape[0], output_tensor.shape[1], h, w))
-
-        valid_pcc = 0  # 0.985
-        self.pcc_passed, self.pcc_message = assert_with_pcc(
-            self.torch_output_tensor.logits, final_output_tensor, pcc=valid_pcc
-        )
-
-        logger.info(f"Segformer , PCC={self.pcc_message}")
-
-    def dealloc_output(self):
-        ttnn.deallocate(self.output_tensor.logits)
-
-
-def create_test_infra(
-    device,
-    batch_size,
-    act_dtype,
-    weight_dtype,
-    model_location_generator=None,
-):
-    return SegformerTestInfra(
-        device,
-        batch_size,
-        act_dtype,
-        weight_dtype,
-        model_location_generator,
-    )

From f88308e26fc6602f896d651fdbeb2e307735f6f1 Mon Sep 17 00:00:00 2001
From: Dalar Vartanians <dvartanians@tenstorrent.com>
Date: Tue, 12 Nov 2024 06:23:08 +0000
Subject: [PATCH 47/69] #0: add requirements.txt on the server side

---
 models/demos/yolov4/web_demo/server/requirements.txt | 8 ++++++++
 1 file changed, 8 insertions(+)
 create mode 100644 models/demos/yolov4/web_demo/server/requirements.txt

diff --git a/models/demos/yolov4/web_demo/server/requirements.txt b/models/demos/yolov4/web_demo/server/requirements.txt
new file mode 100644
index 00000000000..38db3a7b1a0
--- /dev/null
+++ b/models/demos/yolov4/web_demo/server/requirements.txt
@@ -0,0 +1,8 @@
+fastapi==0.85.1
+uvicorn==0.19.0
+python-multipart==0.0.5
+transformers==4.20.1
+yolov5==7.0.11
+pytest==6.2.4
+
+-f https://download.pytorch.org/whl/cpu/torch_stable.html

From 8827492a626e3fb462889b851d7d4811b2d72773 Mon Sep 17 00:00:00 2001
From: Dalar Vartanians <dvartanians@tenstorrent.com>
Date: Tue, 12 Nov 2024 06:32:23 +0000
Subject: [PATCH 48/69] #0 remove not intended file

---
 .../yolov4/web_demo/server/test_model.py      | 27 -------------------
 1 file changed, 27 deletions(-)
 delete mode 100644 models/demos/yolov4/web_demo/server/test_model.py

diff --git a/models/demos/yolov4/web_demo/server/test_model.py b/models/demos/yolov4/web_demo/server/test_model.py
deleted file mode 100644
index 2ac7b7b056c..00000000000
--- a/models/demos/yolov4/web_demo/server/test_model.py
+++ /dev/null
@@ -1,27 +0,0 @@
-"""
-Model pytests for yolov5
-"""
-import pytest
-import os
-from PIL import Image
-from yolov5_320 import startup_pybuda, clear_pybuda, YoloV5Handler
-
-MLDATA = "/mnt/mldata"
-
-
-@pytest.mark.skipif(
-    not os.path.isdir(MLDATA),
-    reason="Skipping test as we are not in a TT devtools environment.",
-)
-def test_model():
-    startup_pybuda()
-    model = YoloV5Handler()
-    model.initialize()
-    response = model.handle(Image.open("puppy.jpg"))
-    print("the response is: ", response)
-    assert response["labels"][0] == "dog"
-    print("test_model PASSED")
-
-
-if __name__ == "__main__":
-    test_model()

From 64c956e2452e7fb7e1414120cedd81891a6884a5 Mon Sep 17 00:00:00 2001
From: Dalar Vartanians <dvartanians@tenstorrent.com>
Date: Tue, 12 Nov 2024 06:40:19 +0000
Subject: [PATCH 49/69] #0: add missing SPDX license headers

---
 models/demos/yolov4/web_demo/client/yolov4.py          | 3 +++
 models/demos/yolov4/web_demo/server/fast_api_yolov4.py | 3 +++
 2 files changed, 6 insertions(+)

diff --git a/models/demos/yolov4/web_demo/client/yolov4.py b/models/demos/yolov4/web_demo/client/yolov4.py
index 7f70e9acc0d..11daa2bb051 100644
--- a/models/demos/yolov4/web_demo/client/yolov4.py
+++ b/models/demos/yolov4/web_demo/client/yolov4.py
@@ -1,3 +1,6 @@
+# SPDX-FileCopyrightText: © 2023 Tenstorrent Inc.
+
+# SPDX-License-Identifier: Apache-2.0
 import time
 import io
 import json
diff --git a/models/demos/yolov4/web_demo/server/fast_api_yolov4.py b/models/demos/yolov4/web_demo/server/fast_api_yolov4.py
index a950d0316a2..7f03d989e8a 100755
--- a/models/demos/yolov4/web_demo/server/fast_api_yolov4.py
+++ b/models/demos/yolov4/web_demo/server/fast_api_yolov4.py
@@ -1,3 +1,6 @@
+# SPDX-FileCopyrightText: © 2023 Tenstorrent Inc.
+
+# SPDX-License-Identifier: Apache-2.0
 import json
 from fastapi import FastAPI, File, UploadFile
 from io import BytesIO

From 6ff0eebabf7baeb410c6eddc22216d381f554597 Mon Sep 17 00:00:00 2001
From: Dalar Vartanians <dvartanians@tenstorrent.com>
Date: Tue, 12 Nov 2024 20:51:32 +0000
Subject: [PATCH 50/69] #0: make web demo modules independant of pytest modules

---
 .../wormhole/yolov4/test_yolov4_performant.py |  24 +-
 .../yolov4/test_yolov4_performant_webdemo.py  |  95 +++++
 models/demos/yolov4/tests/yolov4_perfomant.py | 221 ++--------
 .../yolov4/tests/yolov4_perfomant_webdemo.py  | 385 ++++++++++++++++++
 .../yolov4/web_demo/server/fast_api_yolov4.py |   2 +-
 .../yolov4/web_demo/server/requirements.txt   |   3 -
 6 files changed, 508 insertions(+), 222 deletions(-)
 create mode 100644 models/demos/wormhole/yolov4/test_yolov4_performant_webdemo.py
 create mode 100644 models/demos/yolov4/tests/yolov4_perfomant_webdemo.py

diff --git a/models/demos/wormhole/yolov4/test_yolov4_performant.py b/models/demos/wormhole/yolov4/test_yolov4_performant.py
index 049b979e0dd..cf7daddc4db 100644
--- a/models/demos/wormhole/yolov4/test_yolov4_performant.py
+++ b/models/demos/wormhole/yolov4/test_yolov4_performant.py
@@ -4,15 +4,12 @@
 
 import pytest
 import ttnn
-import time
-import torch
 
 from models.utility_functions import run_for_wormhole_b0
 from models.demos.yolov4.tests.yolov4_perfomant import (
     run_yolov4_inference,
     run_yolov4_trace_inference,
     run_yolov4_trace_2cqs_inference,
-    Yolov4Trace2CQ,
 )
 
 
@@ -69,27 +66,10 @@ def test_run_yolov4_trace_2cqs_inference(
     enable_async_mode,
     model_location_generator,
 ):
-    yolov4_trac2_2cq = Yolov4Trace2CQ()
-
-    yolov4_trac2_2cq.initialize_yolov4_trace_2cqs_inference(
+    run_yolov4_trace_2cqs_inference(
         device,
         batch_size,
         act_dtype,
         weight_dtype,
-        model_location_generator=None,
+        model_location_generator,
     )
-    for iter in range(0, 10):
-        input_shape = (1, 3, 320, 320)
-        torch_input_tensor = torch.randn(input_shape, dtype=torch.float32)
-        n, c, h, w = torch_input_tensor.shape
-        torch_input_tensor = torch_input_tensor.permute(0, 2, 3, 1)
-        torch_input_tensor = torch_input_tensor.reshape(1, 1, h * w * n, c)
-        tt_inputs_host = ttnn.from_torch(torch_input_tensor, dtype=ttnn.bfloat16, layout=ttnn.ROW_MAJOR_LAYOUT)
-        tt_inputs_host = ttnn.pad(tt_inputs_host, [1, 1, n * h * w, 16], [0, 0, 0, 0], 0)
-
-        t0 = time.time()
-        output = yolov4_trac2_2cq.execute_yolov4_trace_2cqs_inference(tt_inputs_host)
-        t1 = time.time()
-        print("TIME", t1 - t0)
-
-    yolov4_trac2_2cq.release_yolov4_trace_2cqs_inference()
diff --git a/models/demos/wormhole/yolov4/test_yolov4_performant_webdemo.py b/models/demos/wormhole/yolov4/test_yolov4_performant_webdemo.py
new file mode 100644
index 00000000000..b4940fbd2ab
--- /dev/null
+++ b/models/demos/wormhole/yolov4/test_yolov4_performant_webdemo.py
@@ -0,0 +1,95 @@
+# SPDX-FileCopyrightText: © 2024 Tenstorrent Inc.
+
+# SPDX-License-Identifier: Apache-2.0
+
+import pytest
+import ttnn
+import time
+import torch
+
+from models.utility_functions import run_for_wormhole_b0
+from models.demos.yolov4.tests.yolov4_perfomant_webdemo import (
+    run_yolov4_inference,
+    run_yolov4_trace_inference,
+    run_yolov4_trace_2cqs_inference,
+    Yolov4Trace2CQ,
+)
+
+
+@run_for_wormhole_b0()
+@pytest.mark.parametrize("device_params", [{"l1_small_size": 24576}], indirect=True)
+@pytest.mark.parametrize(
+    "batch_size, act_dtype, weight_dtype",
+    ((1, ttnn.bfloat16, ttnn.bfloat16),),
+)
+def test_run_yolov4_inference(device, use_program_cache, batch_size, act_dtype, weight_dtype, model_location_generator):
+    run_yolov4_inference(device, batch_size, act_dtype, weight_dtype, model_location_generator)
+
+
+@run_for_wormhole_b0()
+@pytest.mark.parametrize("device_params", [{"l1_small_size": 24576, "trace_region_size": 1617920}], indirect=True)
+@pytest.mark.parametrize(
+    "batch_size, act_dtype, weight_dtype",
+    ((1, ttnn.bfloat16, ttnn.bfloat16),),
+)
+@pytest.mark.parametrize("enable_async_mode", (False, True), indirect=True)
+def test_run_yolov4_trace_inference(
+    device,
+    use_program_cache,
+    batch_size,
+    act_dtype,
+    weight_dtype,
+    enable_async_mode,
+    model_location_generator,
+):
+    run_yolov4_trace_inference(
+        device,
+        batch_size,
+        act_dtype,
+        weight_dtype,
+        model_location_generator,
+    )
+
+
+@run_for_wormhole_b0()
+@pytest.mark.parametrize(
+    "device_params", [{"l1_small_size": 24576, "trace_region_size": 1617920, "num_command_queues": 2}], indirect=True
+)
+@pytest.mark.parametrize(
+    "batch_size, act_dtype, weight_dtype",
+    ((1, ttnn.bfloat16, ttnn.bfloat16),),
+)
+@pytest.mark.parametrize("enable_async_mode", (False, True), indirect=True)
+def test_run_yolov4_trace_2cqs_inference(
+    device,
+    use_program_cache,
+    batch_size,
+    act_dtype,
+    weight_dtype,
+    enable_async_mode,
+    model_location_generator,
+):
+    yolov4_trac2_2cq = Yolov4Trace2CQ()
+
+    yolov4_trac2_2cq.initialize_yolov4_trace_2cqs_inference(
+        device,
+        batch_size,
+        act_dtype,
+        weight_dtype,
+        model_location_generator=None,
+    )
+    for iter in range(0, 10):
+        input_shape = (1, 3, 320, 320)
+        torch_input_tensor = torch.randn(input_shape, dtype=torch.float32)
+        n, c, h, w = torch_input_tensor.shape
+        torch_input_tensor = torch_input_tensor.permute(0, 2, 3, 1)
+        torch_input_tensor = torch_input_tensor.reshape(1, 1, h * w * n, c)
+        tt_inputs_host = ttnn.from_torch(torch_input_tensor, dtype=ttnn.bfloat16, layout=ttnn.ROW_MAJOR_LAYOUT)
+        tt_inputs_host = ttnn.pad(tt_inputs_host, [1, 1, n * h * w, 16], [0, 0, 0, 0], 0)
+
+        t0 = time.time()
+        output = yolov4_trac2_2cq.execute_yolov4_trace_2cqs_inference(tt_inputs_host)
+        t1 = time.time()
+        print("TIME", t1 - t0)
+
+    yolov4_trac2_2cq.release_yolov4_trace_2cqs_inference()
diff --git a/models/demos/yolov4/tests/yolov4_perfomant.py b/models/demos/yolov4/tests/yolov4_perfomant.py
index 75a479c0801..d35644da15c 100644
--- a/models/demos/yolov4/tests/yolov4_perfomant.py
+++ b/models/demos/yolov4/tests/yolov4_perfomant.py
@@ -9,8 +9,6 @@
     is_wormhole_b0,
 )
 from models.demos.yolov4.tests.yolov4_test_infra import create_test_infra
-from models.demos.yolov4.demo.demo import YoloLayer
-
 
 try:
     from tracy import signpost
@@ -46,16 +44,16 @@ def run_yolov4_inference(
         model_location_generator=model_location_generator,
     )
 
-    tt_inputs_host, self.input_mem_config = test_infra.setup_l1_sharded_input(device)
+    tt_inputs_host, input_mem_config = test_infra.setup_l1_sharded_input(device)
 
     # # First run configures convs JIT
-    test_infra.input_tensor = tt_inputs_host.to(device, self.input_mem_config)
+    test_infra.input_tensor = tt_inputs_host.to(device, input_mem_config)
     test_infra.run()
     test_infra.validate()
     test_infra.dealloc_output()
 
     # Optimized run
-    test_infra.input_tensor = tt_inputs_host.to(device, self.input_mem_config)
+    test_infra.input_tensor = tt_inputs_host.to(device, input_mem_config)
     test_infra.run()
     test_infra.validate()
     test_infra.dealloc_output()
@@ -63,7 +61,7 @@ def run_yolov4_inference(
     # More optimized run with caching
     if use_signpost:
         signpost(header="start")
-    test_infra.input_tensor = tt_inputs_host.to(device, self.input_mem_config)
+    test_infra.input_tensor = tt_inputs_host.to(device, input_mem_config)
     test_infra.run()
     if use_signpost:
         signpost(header="stop")
@@ -85,10 +83,10 @@ def run_yolov4_trace_inference(
         weight_dtype,
         model_location_generator=model_location_generator,
     )
-    tt_inputs_host, self.input_mem_config = test_infra.setup_l1_sharded_input(device)
+    tt_inputs_host, input_mem_config = test_infra.setup_l1_sharded_input(device)
 
     # First run configures convs JIT
-    test_infra.input_tensor = tt_inputs_host.to(device, self.input_mem_config)
+    test_infra.input_tensor = tt_inputs_host.to(device, input_mem_config)
     shape = test_infra.input_tensor.shape
     dtype = test_infra.input_tensor.dtype
     layout = test_infra.input_tensor.layout
@@ -97,36 +95,36 @@ def run_yolov4_trace_inference(
     test_infra.dealloc_output()
 
     # Optimized run
-    test_infra.input_tensor = tt_inputs_host.to(device, self.input_mem_config)
+    test_infra.input_tensor = tt_inputs_host.to(device, input_mem_config)
     test_infra.run()
     test_infra.validate()
 
     # Capture
-    test_infra.input_tensor = tt_inputs_host.to(device, self.input_mem_config)
+    test_infra.input_tensor = tt_inputs_host.to(device, input_mem_config)
     test_infra.dealloc_output()
     trace_input_addr = ttnn.buffer_address(test_infra.input_tensor)
-    self.tid = ttnn.begin_trace_capture(device, cq_id=0)
+    tid = ttnn.begin_trace_capture(device, cq_id=0)
     test_infra.run()
     tt_image_res = ttnn.allocate_tensor_on_device(
         shape,
         dtype,
         layout,
         device,
-        self.input_mem_config,
+        input_mem_config,
     )
-    ttnn.end_trace_capture(device, self.tid, cq_id=0)
+    ttnn.end_trace_capture(device, tid, cq_id=0)
     assert trace_input_addr == ttnn.buffer_address(tt_image_res)
 
     # More optimized run with caching
     if use_signpost:
         signpost(header="start")
     ttnn.copy_host_to_device_tensor(tt_inputs_host, tt_image_res, 0)
-    ttnn.execute_trace(device, self.tid, cq_id=0, blocking=True)
+    ttnn.execute_trace(device, tid, cq_id=0, blocking=True)
     if use_signpost:
         signpost(header="stop")
     test_infra.validate()
 
-    ttnn.release_trace(device, self.tid)
+    ttnn.release_trace(device, tid)
     test_infra.dealloc_output()
 
 
@@ -144,7 +142,7 @@ def run_yolov4_trace_2cqs_inference(
         weight_dtype,
         model_location_generator=model_location_generator,
     )
-    tt_inputs_host, sharded_mem_config_DRAM, self.input_mem_config = test_infra.setup_dram_sharded_input(device)
+    tt_inputs_host, sharded_mem_config_DRAM, input_mem_config = test_infra.setup_dram_sharded_input(device)
     tt_image_res = tt_inputs_host.to(device, sharded_mem_config_DRAM)
     op_event = ttnn.create_event(device)
     write_event = ttnn.create_event(device)
@@ -156,7 +154,7 @@ def run_yolov4_trace_2cqs_inference(
     ttnn.copy_host_to_device_tensor(tt_inputs_host, tt_image_res, 1)
     ttnn.record_event(1, write_event)
     ttnn.wait_for_event(0, write_event)
-    test_infra.input_tensor = ttnn.to_memory_config(tt_image_res, self.input_mem_config)
+    test_infra.input_tensor = ttnn.to_memory_config(tt_image_res, input_mem_config)
     shape = test_infra.input_tensor.shape
     dtype = test_infra.input_tensor.dtype
     layout = test_infra.input_tensor.layout
@@ -170,7 +168,7 @@ def run_yolov4_trace_2cqs_inference(
     ttnn.copy_host_to_device_tensor(tt_inputs_host, tt_image_res, 1)
     ttnn.record_event(1, write_event)
     ttnn.wait_for_event(0, write_event)
-    test_infra.input_tensor = ttnn.to_memory_config(tt_image_res, self.input_mem_config)
+    test_infra.input_tensor = ttnn.to_memory_config(tt_image_res, input_mem_config)
     ttnn.record_event(0, op_event)
     test_infra.run()
     test_infra.validate()
@@ -180,21 +178,21 @@ def run_yolov4_trace_2cqs_inference(
     ttnn.copy_host_to_device_tensor(tt_inputs_host, tt_image_res, 1)
     ttnn.record_event(1, write_event)
     ttnn.wait_for_event(0, write_event)
-    test_infra.input_tensor = ttnn.to_memory_config(tt_image_res, self.input_mem_config)
+    test_infra.input_tensor = ttnn.to_memory_config(tt_image_res, input_mem_config)
     ttnn.record_event(0, op_event)
     test_infra.dealloc_output()
     trace_input_addr = ttnn.buffer_address(test_infra.input_tensor)
-    self.tid = ttnn.begin_trace_capture(device, cq_id=0)
+    tid = ttnn.begin_trace_capture(device, cq_id=0)
     test_infra.run()
-    self.input_tensor = ttnn.allocate_tensor_on_device(
+    input_tensor = ttnn.allocate_tensor_on_device(
         shape,
         dtype,
         layout,
         device,
-        self.input_mem_config,
+        input_mem_config,
     )
-    ttnn.end_trace_capture(device, self.tid, cq_id=0)
-    assert trace_input_addr == ttnn.buffer_address(self.input_tensor)
+    ttnn.end_trace_capture(device, tid, cq_id=0)
+    assert trace_input_addr == ttnn.buffer_address(input_tensor)
 
     # More optimized run with caching
     if use_signpost:
@@ -205,181 +203,12 @@ def run_yolov4_trace_2cqs_inference(
         ttnn.record_event(1, write_event)
         ttnn.wait_for_event(0, write_event)
         # TODO: Add in place support to ttnn to_memory_config
-        self.input_tensor = ttnn.reshard(tt_image_res, self.input_mem_config, self.input_tensor)
+        input_tensor = ttnn.reshard(tt_image_res, input_mem_config, input_tensor)
         ttnn.record_event(0, op_event)
-        ttnn.execute_trace(device, self.tid, cq_id=0, blocking=False)
+        ttnn.execute_trace(device, tid, cq_id=0, blocking=False)
     ttnn.synchronize_devices(device)
 
     if use_signpost:
         signpost(header="stop")
 
-    ttnn.release_trace(device, self.tid)
-
-
-class Yolov4Trace2CQ:
-    def __init__(self):
-        ...
-
-    def initialize_yolov4_trace_2cqs_inference(
-        self,
-        device,
-        device_batch_size=1,
-        act_dtype=ttnn.bfloat16,
-        weight_dtype=ttnn.bfloat16,
-        model_location_generator=None,
-    ):
-        self.test_infra = create_test_infra(
-            device,
-            device_batch_size,
-            act_dtype,
-            weight_dtype,
-            model_location_generator=model_location_generator,
-        )
-        self.tt_inputs_host, sharded_mem_config_DRAM, self.input_mem_config = self.test_infra.setup_dram_sharded_input(
-            device
-        )
-        self.tt_image_res = self.tt_inputs_host.to(device, sharded_mem_config_DRAM)
-        self.op_event = ttnn.create_event(device)
-        self.write_event = ttnn.create_event(device)
-        # Initialize the op event so we can write
-        ttnn.record_event(0, self.op_event)
-
-        # First run configures convs JIT
-        ttnn.wait_for_event(1, self.op_event)
-        ttnn.copy_host_to_device_tensor(self.tt_inputs_host, self.tt_image_res, 1)
-        ttnn.record_event(1, self.write_event)
-        ttnn.wait_for_event(0, self.write_event)
-        self.test_infra.input_tensor = ttnn.to_memory_config(self.tt_image_res, self.input_mem_config)
-        shape = self.test_infra.input_tensor.shape
-        dtype = self.test_infra.input_tensor.dtype
-        layout = self.test_infra.input_tensor.layout
-        ttnn.record_event(0, self.op_event)
-        self.test_infra.run()
-        self.test_infra.validate()
-        self.test_infra.dealloc_output()
-
-        # Optimized run
-        ttnn.wait_for_event(1, self.op_event)
-        ttnn.copy_host_to_device_tensor(self.tt_inputs_host, self.tt_image_res, 1)
-        ttnn.record_event(1, self.write_event)
-        ttnn.wait_for_event(0, self.write_event)
-        self.test_infra.input_tensor = ttnn.to_memory_config(self.tt_image_res, self.input_mem_config)
-        ttnn.record_event(0, self.op_event)
-        self.test_infra.run()
-        self.test_infra.validate()
-
-        # Capture
-        ttnn.wait_for_event(1, self.op_event)
-        ttnn.copy_host_to_device_tensor(self.tt_inputs_host, self.tt_image_res, 1)
-        ttnn.record_event(1, self.write_event)
-        ttnn.wait_for_event(0, self.write_event)
-        self.test_infra.input_tensor = ttnn.to_memory_config(self.tt_image_res, self.input_mem_config)
-        ttnn.record_event(0, self.op_event)
-        self.test_infra.dealloc_output()
-        trace_input_addr = ttnn.buffer_address(self.test_infra.input_tensor)
-        self.tid = ttnn.begin_trace_capture(device, cq_id=0)
-        self.test_infra.run()
-        self.input_tensor = ttnn.allocate_tensor_on_device(
-            shape,
-            dtype,
-            layout,
-            device,
-            self.input_mem_config,
-        )
-        ttnn.end_trace_capture(device, self.tid, cq_id=0)
-        assert trace_input_addr == ttnn.buffer_address(self.input_tensor)
-
-        self.device = device
-
-        # More optimized run with caching
-        # if use_signpost:
-        #    signpost(header="start")
-
-    def get_region_boxes(self, boxes_and_confs):
-        print("Getting boxes from boxes and confs ...")
-        boxes_list = []
-        confs_list = []
-
-        for item in boxes_and_confs:
-            boxes_list.append(item[0])
-            confs_list.append(item[1])
-
-        # boxes: [batch, num1 + num2 + num3, 1, 4]
-        # confs: [batch, num1 + num2 + num3, num_classes]
-        boxes = torch.cat(boxes_list, dim=1)
-        confs = torch.cat(confs_list, dim=1)
-
-        return [boxes, confs]
-
-    def execute_yolov4_trace_2cqs_inference(self, tt_inputs_host=None):
-        tt_inputs_host = self.tt_inputs_host if tt_inputs_host is None else tt_inputs_host
-        ttnn.wait_for_event(1, self.op_event)
-        ttnn.copy_host_to_device_tensor(tt_inputs_host, self.tt_image_res, 1)
-        ttnn.record_event(1, self.write_event)
-        ttnn.wait_for_event(0, self.write_event)
-        # TODO: Add in place support to ttnn to_memory_config
-        self.input_tensor = ttnn.reshard(self.tt_image_res, self.input_mem_config, self.input_tensor)
-        ttnn.record_event(0, self.op_event)
-        ttnn.execute_trace(self.device, self.tid, cq_id=0, blocking=False)
-        ttnn.synchronize_devices(self.device)
-        output = self.test_infra.output_tensor
-
-        output_tensor1 = ttnn.to_torch(output[0])
-        output_tensor1 = output_tensor1.reshape(1, 40, 40, 255)
-        output_tensor1 = torch.permute(output_tensor1, (0, 3, 1, 2))
-
-        output_tensor2 = ttnn.to_torch(output[1])
-        output_tensor2 = output_tensor2.reshape(1, 20, 20, 255)
-        output_tensor2 = torch.permute(output_tensor2, (0, 3, 1, 2))
-
-        output_tensor3 = ttnn.to_torch(output[2])
-        output_tensor3 = output_tensor3.reshape(1, 10, 10, 255)
-        output_tensor3 = torch.permute(output_tensor3, (0, 3, 1, 2))
-
-        n_classes = 80
-
-        yolo1 = YoloLayer(
-            anchor_mask=[0, 1, 2],
-            num_classes=n_classes,
-            anchors=[12, 16, 19, 36, 40, 28, 36, 75, 76, 55, 72, 146, 142, 110, 192, 243, 459, 401],
-            num_anchors=9,
-            stride=8,
-        )
-
-        yolo2 = YoloLayer(
-            anchor_mask=[3, 4, 5],
-            num_classes=n_classes,
-            anchors=[12, 16, 19, 36, 40, 28, 36, 75, 76, 55, 72, 146, 142, 110, 192, 243, 459, 401],
-            num_anchors=9,
-            stride=16,
-        )
-
-        yolo3 = YoloLayer(
-            anchor_mask=[6, 7, 8],
-            num_classes=n_classes,
-            anchors=[12, 16, 19, 36, 40, 28, 36, 75, 76, 55, 72, 146, 142, 110, 192, 243, 459, 401],
-            num_anchors=9,
-            stride=32,
-        )
-
-        y1 = yolo1(output_tensor1)
-        y2 = yolo2(output_tensor2)
-        y3 = yolo3(output_tensor3)
-
-        output = self.get_region_boxes([y1, y2, y3])
-
-        return output
-        # return self.test_infra.output_tensor
-
-        # if use_signpost:
-        #    signpost(header="stop")
-
-    def release_yolov4_trace_2cqs_inference(self):
-        ttnn.release_trace(self.device, self.tid)
-
-    def run_traced_inference(self, torch_input_tensor):
-        n, h, w, c = torch_input_tensor.shape
-        torch_input_tensor = torch_input_tensor.reshape(1, 1, h * w * n, c)
-        tt_inputs_host = ttnn.from_torch(torch_input_tensor, dtype=ttnn.bfloat16, layout=ttnn.ROW_MAJOR_LAYOUT)
-        tt_inputs_host = ttnn.pad(tt_inputs_host, [1, 1, n * h * w, 16], [0, 0, 0, 0], 0)
-        return self.execute_yolov4_trace_2cqs_inference(tt_inputs_host)
+    ttnn.release_trace(device, tid)
diff --git a/models/demos/yolov4/tests/yolov4_perfomant_webdemo.py b/models/demos/yolov4/tests/yolov4_perfomant_webdemo.py
new file mode 100644
index 00000000000..75a479c0801
--- /dev/null
+++ b/models/demos/yolov4/tests/yolov4_perfomant_webdemo.py
@@ -0,0 +1,385 @@
+# SPDX-FileCopyrightText: © 2024 Tenstorrent Inc.
+
+# SPDX-License-Identifier: Apache-2.0
+
+import pytest
+import torch
+import ttnn
+from models.utility_functions import (
+    is_wormhole_b0,
+)
+from models.demos.yolov4.tests.yolov4_test_infra import create_test_infra
+from models.demos.yolov4.demo.demo import YoloLayer
+
+
+try:
+    from tracy import signpost
+
+    use_signpost = True
+except ModuleNotFoundError:
+    use_signpost = False
+
+
+def buffer_address(tensor):
+    addr = []
+    for ten in ttnn.get_device_tensors(tensor):
+        addr.append(ten.buffer_address())
+    return addr
+
+
+# TODO: Create ttnn apis for this
+ttnn.buffer_address = buffer_address
+
+
+def run_yolov4_inference(
+    device,
+    device_batch_size,
+    act_dtype,
+    weight_dtype,
+    model_location_generator,
+):
+    test_infra = create_test_infra(
+        device,
+        device_batch_size,
+        act_dtype,
+        weight_dtype,
+        model_location_generator=model_location_generator,
+    )
+
+    tt_inputs_host, self.input_mem_config = test_infra.setup_l1_sharded_input(device)
+
+    # # First run configures convs JIT
+    test_infra.input_tensor = tt_inputs_host.to(device, self.input_mem_config)
+    test_infra.run()
+    test_infra.validate()
+    test_infra.dealloc_output()
+
+    # Optimized run
+    test_infra.input_tensor = tt_inputs_host.to(device, self.input_mem_config)
+    test_infra.run()
+    test_infra.validate()
+    test_infra.dealloc_output()
+
+    # More optimized run with caching
+    if use_signpost:
+        signpost(header="start")
+    test_infra.input_tensor = tt_inputs_host.to(device, self.input_mem_config)
+    test_infra.run()
+    if use_signpost:
+        signpost(header="stop")
+    test_infra.validate()
+    test_infra.dealloc_output()
+
+
+def run_yolov4_trace_inference(
+    device,
+    device_batch_size,
+    act_dtype,
+    weight_dtype,
+    model_location_generator,
+):
+    test_infra = create_test_infra(
+        device,
+        device_batch_size,
+        act_dtype,
+        weight_dtype,
+        model_location_generator=model_location_generator,
+    )
+    tt_inputs_host, self.input_mem_config = test_infra.setup_l1_sharded_input(device)
+
+    # First run configures convs JIT
+    test_infra.input_tensor = tt_inputs_host.to(device, self.input_mem_config)
+    shape = test_infra.input_tensor.shape
+    dtype = test_infra.input_tensor.dtype
+    layout = test_infra.input_tensor.layout
+    test_infra.run()
+    test_infra.validate()
+    test_infra.dealloc_output()
+
+    # Optimized run
+    test_infra.input_tensor = tt_inputs_host.to(device, self.input_mem_config)
+    test_infra.run()
+    test_infra.validate()
+
+    # Capture
+    test_infra.input_tensor = tt_inputs_host.to(device, self.input_mem_config)
+    test_infra.dealloc_output()
+    trace_input_addr = ttnn.buffer_address(test_infra.input_tensor)
+    self.tid = ttnn.begin_trace_capture(device, cq_id=0)
+    test_infra.run()
+    tt_image_res = ttnn.allocate_tensor_on_device(
+        shape,
+        dtype,
+        layout,
+        device,
+        self.input_mem_config,
+    )
+    ttnn.end_trace_capture(device, self.tid, cq_id=0)
+    assert trace_input_addr == ttnn.buffer_address(tt_image_res)
+
+    # More optimized run with caching
+    if use_signpost:
+        signpost(header="start")
+    ttnn.copy_host_to_device_tensor(tt_inputs_host, tt_image_res, 0)
+    ttnn.execute_trace(device, self.tid, cq_id=0, blocking=True)
+    if use_signpost:
+        signpost(header="stop")
+    test_infra.validate()
+
+    ttnn.release_trace(device, self.tid)
+    test_infra.dealloc_output()
+
+
+def run_yolov4_trace_2cqs_inference(
+    device,
+    device_batch_size,
+    act_dtype,
+    weight_dtype,
+    model_location_generator,
+):
+    test_infra = create_test_infra(
+        device,
+        device_batch_size,
+        act_dtype,
+        weight_dtype,
+        model_location_generator=model_location_generator,
+    )
+    tt_inputs_host, sharded_mem_config_DRAM, self.input_mem_config = test_infra.setup_dram_sharded_input(device)
+    tt_image_res = tt_inputs_host.to(device, sharded_mem_config_DRAM)
+    op_event = ttnn.create_event(device)
+    write_event = ttnn.create_event(device)
+    # Initialize the op event so we can write
+    ttnn.record_event(0, op_event)
+
+    # First run configures convs JIT
+    ttnn.wait_for_event(1, op_event)
+    ttnn.copy_host_to_device_tensor(tt_inputs_host, tt_image_res, 1)
+    ttnn.record_event(1, write_event)
+    ttnn.wait_for_event(0, write_event)
+    test_infra.input_tensor = ttnn.to_memory_config(tt_image_res, self.input_mem_config)
+    shape = test_infra.input_tensor.shape
+    dtype = test_infra.input_tensor.dtype
+    layout = test_infra.input_tensor.layout
+    ttnn.record_event(0, op_event)
+    test_infra.run()
+    test_infra.validate()
+    test_infra.dealloc_output()
+
+    # Optimized run
+    ttnn.wait_for_event(1, op_event)
+    ttnn.copy_host_to_device_tensor(tt_inputs_host, tt_image_res, 1)
+    ttnn.record_event(1, write_event)
+    ttnn.wait_for_event(0, write_event)
+    test_infra.input_tensor = ttnn.to_memory_config(tt_image_res, self.input_mem_config)
+    ttnn.record_event(0, op_event)
+    test_infra.run()
+    test_infra.validate()
+
+    # Capture
+    ttnn.wait_for_event(1, op_event)
+    ttnn.copy_host_to_device_tensor(tt_inputs_host, tt_image_res, 1)
+    ttnn.record_event(1, write_event)
+    ttnn.wait_for_event(0, write_event)
+    test_infra.input_tensor = ttnn.to_memory_config(tt_image_res, self.input_mem_config)
+    ttnn.record_event(0, op_event)
+    test_infra.dealloc_output()
+    trace_input_addr = ttnn.buffer_address(test_infra.input_tensor)
+    self.tid = ttnn.begin_trace_capture(device, cq_id=0)
+    test_infra.run()
+    self.input_tensor = ttnn.allocate_tensor_on_device(
+        shape,
+        dtype,
+        layout,
+        device,
+        self.input_mem_config,
+    )
+    ttnn.end_trace_capture(device, self.tid, cq_id=0)
+    assert trace_input_addr == ttnn.buffer_address(self.input_tensor)
+
+    # More optimized run with caching
+    if use_signpost:
+        signpost(header="start")
+    for iter in range(0, 2):
+        ttnn.wait_for_event(1, op_event)
+        ttnn.copy_host_to_device_tensor(tt_inputs_host, tt_image_res, 1)
+        ttnn.record_event(1, write_event)
+        ttnn.wait_for_event(0, write_event)
+        # TODO: Add in place support to ttnn to_memory_config
+        self.input_tensor = ttnn.reshard(tt_image_res, self.input_mem_config, self.input_tensor)
+        ttnn.record_event(0, op_event)
+        ttnn.execute_trace(device, self.tid, cq_id=0, blocking=False)
+    ttnn.synchronize_devices(device)
+
+    if use_signpost:
+        signpost(header="stop")
+
+    ttnn.release_trace(device, self.tid)
+
+
+class Yolov4Trace2CQ:
+    def __init__(self):
+        ...
+
+    def initialize_yolov4_trace_2cqs_inference(
+        self,
+        device,
+        device_batch_size=1,
+        act_dtype=ttnn.bfloat16,
+        weight_dtype=ttnn.bfloat16,
+        model_location_generator=None,
+    ):
+        self.test_infra = create_test_infra(
+            device,
+            device_batch_size,
+            act_dtype,
+            weight_dtype,
+            model_location_generator=model_location_generator,
+        )
+        self.tt_inputs_host, sharded_mem_config_DRAM, self.input_mem_config = self.test_infra.setup_dram_sharded_input(
+            device
+        )
+        self.tt_image_res = self.tt_inputs_host.to(device, sharded_mem_config_DRAM)
+        self.op_event = ttnn.create_event(device)
+        self.write_event = ttnn.create_event(device)
+        # Initialize the op event so we can write
+        ttnn.record_event(0, self.op_event)
+
+        # First run configures convs JIT
+        ttnn.wait_for_event(1, self.op_event)
+        ttnn.copy_host_to_device_tensor(self.tt_inputs_host, self.tt_image_res, 1)
+        ttnn.record_event(1, self.write_event)
+        ttnn.wait_for_event(0, self.write_event)
+        self.test_infra.input_tensor = ttnn.to_memory_config(self.tt_image_res, self.input_mem_config)
+        shape = self.test_infra.input_tensor.shape
+        dtype = self.test_infra.input_tensor.dtype
+        layout = self.test_infra.input_tensor.layout
+        ttnn.record_event(0, self.op_event)
+        self.test_infra.run()
+        self.test_infra.validate()
+        self.test_infra.dealloc_output()
+
+        # Optimized run
+        ttnn.wait_for_event(1, self.op_event)
+        ttnn.copy_host_to_device_tensor(self.tt_inputs_host, self.tt_image_res, 1)
+        ttnn.record_event(1, self.write_event)
+        ttnn.wait_for_event(0, self.write_event)
+        self.test_infra.input_tensor = ttnn.to_memory_config(self.tt_image_res, self.input_mem_config)
+        ttnn.record_event(0, self.op_event)
+        self.test_infra.run()
+        self.test_infra.validate()
+
+        # Capture
+        ttnn.wait_for_event(1, self.op_event)
+        ttnn.copy_host_to_device_tensor(self.tt_inputs_host, self.tt_image_res, 1)
+        ttnn.record_event(1, self.write_event)
+        ttnn.wait_for_event(0, self.write_event)
+        self.test_infra.input_tensor = ttnn.to_memory_config(self.tt_image_res, self.input_mem_config)
+        ttnn.record_event(0, self.op_event)
+        self.test_infra.dealloc_output()
+        trace_input_addr = ttnn.buffer_address(self.test_infra.input_tensor)
+        self.tid = ttnn.begin_trace_capture(device, cq_id=0)
+        self.test_infra.run()
+        self.input_tensor = ttnn.allocate_tensor_on_device(
+            shape,
+            dtype,
+            layout,
+            device,
+            self.input_mem_config,
+        )
+        ttnn.end_trace_capture(device, self.tid, cq_id=0)
+        assert trace_input_addr == ttnn.buffer_address(self.input_tensor)
+
+        self.device = device
+
+        # More optimized run with caching
+        # if use_signpost:
+        #    signpost(header="start")
+
+    def get_region_boxes(self, boxes_and_confs):
+        print("Getting boxes from boxes and confs ...")
+        boxes_list = []
+        confs_list = []
+
+        for item in boxes_and_confs:
+            boxes_list.append(item[0])
+            confs_list.append(item[1])
+
+        # boxes: [batch, num1 + num2 + num3, 1, 4]
+        # confs: [batch, num1 + num2 + num3, num_classes]
+        boxes = torch.cat(boxes_list, dim=1)
+        confs = torch.cat(confs_list, dim=1)
+
+        return [boxes, confs]
+
+    def execute_yolov4_trace_2cqs_inference(self, tt_inputs_host=None):
+        tt_inputs_host = self.tt_inputs_host if tt_inputs_host is None else tt_inputs_host
+        ttnn.wait_for_event(1, self.op_event)
+        ttnn.copy_host_to_device_tensor(tt_inputs_host, self.tt_image_res, 1)
+        ttnn.record_event(1, self.write_event)
+        ttnn.wait_for_event(0, self.write_event)
+        # TODO: Add in place support to ttnn to_memory_config
+        self.input_tensor = ttnn.reshard(self.tt_image_res, self.input_mem_config, self.input_tensor)
+        ttnn.record_event(0, self.op_event)
+        ttnn.execute_trace(self.device, self.tid, cq_id=0, blocking=False)
+        ttnn.synchronize_devices(self.device)
+        output = self.test_infra.output_tensor
+
+        output_tensor1 = ttnn.to_torch(output[0])
+        output_tensor1 = output_tensor1.reshape(1, 40, 40, 255)
+        output_tensor1 = torch.permute(output_tensor1, (0, 3, 1, 2))
+
+        output_tensor2 = ttnn.to_torch(output[1])
+        output_tensor2 = output_tensor2.reshape(1, 20, 20, 255)
+        output_tensor2 = torch.permute(output_tensor2, (0, 3, 1, 2))
+
+        output_tensor3 = ttnn.to_torch(output[2])
+        output_tensor3 = output_tensor3.reshape(1, 10, 10, 255)
+        output_tensor3 = torch.permute(output_tensor3, (0, 3, 1, 2))
+
+        n_classes = 80
+
+        yolo1 = YoloLayer(
+            anchor_mask=[0, 1, 2],
+            num_classes=n_classes,
+            anchors=[12, 16, 19, 36, 40, 28, 36, 75, 76, 55, 72, 146, 142, 110, 192, 243, 459, 401],
+            num_anchors=9,
+            stride=8,
+        )
+
+        yolo2 = YoloLayer(
+            anchor_mask=[3, 4, 5],
+            num_classes=n_classes,
+            anchors=[12, 16, 19, 36, 40, 28, 36, 75, 76, 55, 72, 146, 142, 110, 192, 243, 459, 401],
+            num_anchors=9,
+            stride=16,
+        )
+
+        yolo3 = YoloLayer(
+            anchor_mask=[6, 7, 8],
+            num_classes=n_classes,
+            anchors=[12, 16, 19, 36, 40, 28, 36, 75, 76, 55, 72, 146, 142, 110, 192, 243, 459, 401],
+            num_anchors=9,
+            stride=32,
+        )
+
+        y1 = yolo1(output_tensor1)
+        y2 = yolo2(output_tensor2)
+        y3 = yolo3(output_tensor3)
+
+        output = self.get_region_boxes([y1, y2, y3])
+
+        return output
+        # return self.test_infra.output_tensor
+
+        # if use_signpost:
+        #    signpost(header="stop")
+
+    def release_yolov4_trace_2cqs_inference(self):
+        ttnn.release_trace(self.device, self.tid)
+
+    def run_traced_inference(self, torch_input_tensor):
+        n, h, w, c = torch_input_tensor.shape
+        torch_input_tensor = torch_input_tensor.reshape(1, 1, h * w * n, c)
+        tt_inputs_host = ttnn.from_torch(torch_input_tensor, dtype=ttnn.bfloat16, layout=ttnn.ROW_MAJOR_LAYOUT)
+        tt_inputs_host = ttnn.pad(tt_inputs_host, [1, 1, n * h * w, 16], [0, 0, 0, 0], 0)
+        return self.execute_yolov4_trace_2cqs_inference(tt_inputs_host)
diff --git a/models/demos/yolov4/web_demo/server/fast_api_yolov4.py b/models/demos/yolov4/web_demo/server/fast_api_yolov4.py
index 7f03d989e8a..19732cbc074 100755
--- a/models/demos/yolov4/web_demo/server/fast_api_yolov4.py
+++ b/models/demos/yolov4/web_demo/server/fast_api_yolov4.py
@@ -5,7 +5,7 @@
 from fastapi import FastAPI, File, UploadFile
 from io import BytesIO
 from PIL import Image
-from models.demos.yolov4.tests.yolov4_perfomant import Yolov4Trace2CQ
+from models.demos.yolov4.tests.yolov4_perfomant_webdemo import Yolov4Trace2CQ
 import ttnn
 
 import cv2
diff --git a/models/demos/yolov4/web_demo/server/requirements.txt b/models/demos/yolov4/web_demo/server/requirements.txt
index 38db3a7b1a0..a1eb55ce21b 100644
--- a/models/demos/yolov4/web_demo/server/requirements.txt
+++ b/models/demos/yolov4/web_demo/server/requirements.txt
@@ -1,8 +1,5 @@
 fastapi==0.85.1
 uvicorn==0.19.0
 python-multipart==0.0.5
-transformers==4.20.1
-yolov5==7.0.11
-pytest==6.2.4
 
 -f https://download.pytorch.org/whl/cpu/torch_stable.html

From 7768b644afa6a9ca0946d2a420b1a6bddc0e1a0e Mon Sep 17 00:00:00 2001
From: Dalar Vartanians <dvartanians@tenstorrent.com>
Date: Tue, 12 Nov 2024 21:24:57 +0000
Subject: [PATCH 51/69] #0: cleanup on the client side and add License

---
 models/demos/yolov4/web_demo/client/yolov4.py | 22 +++++++++----------
 1 file changed, 10 insertions(+), 12 deletions(-)

diff --git a/models/demos/yolov4/web_demo/client/yolov4.py b/models/demos/yolov4/web_demo/client/yolov4.py
index 11daa2bb051..5fc4ea6c692 100644
--- a/models/demos/yolov4/web_demo/client/yolov4.py
+++ b/models/demos/yolov4/web_demo/client/yolov4.py
@@ -1,24 +1,23 @@
-# SPDX-FileCopyrightText: © 2023 Tenstorrent Inc.
+# SPDX-FileCopyrightText: © 2024 Tenstorrent Inc.
 
 # SPDX-License-Identifier: Apache-2.0
+
 import time
 import io
+import math
 import json
+import random
 import argparse
 import cv2
 import requests
-import streamlit as st
-from streamlit_webrtc import VideoProcessorBase, webrtc_streamer
 import torch
-import numpy as np
 import av
+import streamlit as st
+import numpy as np
 
 
 from torch import nn
-import cv2
-import time
-import numpy as np
-import math
+from streamlit_webrtc import VideoProcessorBase, webrtc_streamer
 
 
 class VideoProcessor(VideoProcessorBase):
@@ -231,12 +230,11 @@ def recv(self, frame):
         boxes = self.post_processing(bgr_image, conf_thresh, nms_thresh, output)
         namesfile = "coco.names"
         class_names = self.load_class_names(namesfile)
-        import random
 
-        random_number = random.randint(1, 100)
+        # random_number = random.randint(1, 100)
+        # save_name = "ttnn_prediction_demo" + str(random_number) + ".jpg"
+        save_name = None
 
-        save_name = "ttnn_prediction_demo" + str(random_number) + ".jpg"
-        # save_name = None
         image_final = self.plot_boxes_cv2(bgr_image, boxes[0], save_name, class_names)
         t4 = time.time()
         print()

From c3936c58b2f4380327c5889e62f3623edf08d5b1 Mon Sep 17 00:00:00 2001
From: Dalar Vartanians <dvartanians@tenstorrent.com>
Date: Tue, 12 Nov 2024 21:26:11 +0000
Subject: [PATCH 52/69] add instructions on how to run the web demo

---
 models/demos/yolov4/web_demo/README.md | 33 ++++++++++++++++++++++++++
 1 file changed, 33 insertions(+)
 create mode 100644 models/demos/yolov4/web_demo/README.md

diff --git a/models/demos/yolov4/web_demo/README.md b/models/demos/yolov4/web_demo/README.md
new file mode 100644
index 00000000000..9bef3e340c0
--- /dev/null
+++ b/models/demos/yolov4/web_demo/README.md
@@ -0,0 +1,33 @@
+# Yolov4 Demo
+
+## How to run web demo demo
+
+- ssh into the server specifying the port:
+  ```
+  ssh -L 7000:localhost:7000 user@IP.ADDRESS
+  ```
+
+- After building metal, once you activate your python env. pip install the requirements on the server side:
+  ```
+  pip install -r models/demos/yolov4/web_demo/server/requirements.txt
+  ```
+
+- From the server run:
+  ```
+  source models/demos/yolov4/web_demo/server/run_uvicorn.sh
+  ```
+
+- git clone metal repo locally/on client side as well.
+  ```
+  cd models/demos/yolov4/web_demo/client
+  ```
+- you may create a python virtual env and pip install the client side requirements.
+
+  ```
+  pip install -r models/demos/yolov4/web_demo/client/requirements.txt
+  ```
+- on the client side run:
+  ```
+  source run_on_client_YOLOV4 --api-url http://IP.ADDRESS:7000
+  ```
+a browser should automatically open and you will see the live object detection demo using your local camera.

From 9a2baa3532b9143b8e9bc34becbfbe75167b9461 Mon Sep 17 00:00:00 2001
From: Bill Teng <135061747+TT-billteng@users.noreply.github.com>
Date: Wed, 13 Nov 2024 13:52:51 -0800
Subject: [PATCH 53/69] [skip ci] Update CODEOWNERS (#15023)

### What's changed
Removing myself on CODEOWNERS for certain files

### Checklist
- [ ] Post commit CI passes
- [ ] Blackhole Post commit (if applicable)
- [ ] Model regression CI testing passes (if applicable)
- [ ] Device performance regression CI testing passes (if applicable)
- [ ] New/Existing tests provide coverage for changes
---
 CODEOWNERS | 17 ++++++++---------
 1 file changed, 8 insertions(+), 9 deletions(-)

diff --git a/CODEOWNERS b/CODEOWNERS
index 9f7c869b4a7..fb11d4c9911 100644
--- a/CODEOWNERS
+++ b/CODEOWNERS
@@ -23,16 +23,15 @@ MANIFEST.in @tt-rkim
 setup.py @tt-rkim
 pyproject.toml @tt-rkim @TT-billteng
 requirements*.txt @tt-rkim @TT-billteng @ttmchiou
-setup_hugepages.py @tt-rkim @TT-billteng
+setup_hugepages.py @tt-rkim
 
-scripts/docker @TT-billteng
-scripts/build_scripts/ @tt-rkim @vtangTT @TT-billteng
-cmake/ @tt-rkim @vtangTT @TT-billteng @afuller-TT
-build_metal.sh @tt-rkim @vtangTT @TT-billteng
+scripts/build_scripts/ @tt-rkim @vtangTT
+cmake/ @tt-rkim @vtangTT @afuller-TT
+build_metal.sh @tt-rkim @vtangTT
 
 Makefile @tt-rkim
-/CMakeLists.txt @tt-rkim @vtangTT @TT-billteng @blozano-tt @afuller-TT
-tests/CMakeLists.txt @tt-rkim @vtangTT @TT-billteng @blozano-tt @afuller-TT
+/CMakeLists.txt @tt-rkim @vtangTT @blozano-tt @afuller-TT
+tests/CMakeLists.txt @tt-rkim @vtangTT @blozano-tt @afuller-TT
 
 # Testing scripts and infra
 
@@ -176,9 +175,9 @@ tests/device_perf_tests/stable_diffusion/test_perf_stable_diffusion.py @esmalTT
 tests/ttnn/integration_tests/unet @esmalTT @uaydonat @mywoodstock
 tests/nightly/wh_b0_only_eth/experimental/functional_unet @esmalTT @uaydonat @mywoodstock
 scripts/profiler/ @mo-tenstorrent
-scripts/docker @ttmchiou @TT-billteng @tt-rkim
+scripts/docker @ttmchiou @tt-rkim
 
-dockerfile @ttmchiou @TT-billteng @tt-rkim
+dockerfile @ttmchiou @tt-rkim
 
 tt_metal/CMakeLists.txt @abhullar-tt @pgkeller @aliuTT @tt-aho @tt-dma @tt-asaigal @ubcheema @blozano-tt
 ttnn/CMakeLists.txt @ayerofieiev-tt @dmakoviichuk-tt @yan-zaretskiy

From 268f4298a13d630e49a51e8c542c764729d39416 Mon Sep 17 00:00:00 2001
From: Denys Makoviichuk <dmakoviichuk@tenstorrent.com>
Date: Wed, 13 Nov 2024 14:21:21 -0800
Subject: [PATCH 54/69] Added tt-train to the tt-metal monorepo (#14875)

### Problem description
We decided to move tt-train inside of the monorepo
### What's changed
Added tt-train.

### TODO List:
- [x] Add removed files because git-lfs didn't work: tokenizer,
Shakespeare.
- [x] Return back 3rd_party deps: wandbcpp and tokenizers
- [x] CI
- [x] building integration

### Checklist
- [ ] Post commit CI passes
- [ ] Blackhole Post commit (if applicable)
- [ ] Model regression CI testing passes (if applicable)
- [ ] Device performance regression CI testing passes (if applicable)
- [ ] New/Existing tests provide coverage for changes

---------

Co-authored-by: Bryan Wilder Field Lozano <blozano@tenstorrent.com>
Co-authored-by: Andrew Fuller <afuller@tenstorrent.com>
Co-authored-by: Roman Furko <rfurko@tenstorrent.com>
---
 .gitattributes                                |   2 +
 .../workflows/all-post-commit-workflows.yaml  |  14 +
 .github/workflows/build-artifact.yaml         |   5 +-
 .../tt-train-post-commit-wrapper.yaml         |  27 ++
 .github/workflows/tt-train-post-commit.yaml   |  81 ++++
 .gitmodules                                   |   6 +
 CMakeLists.txt                                |   4 +
 CODEOWNERS                                    |   4 +
 build_metal.sh                                |  21 +-
 cmake/project_options.cmake                   |   1 +
 tt-train/.clang-format                        | 146 +++++++
 tt-train/.clang-tidy                          |  30 ++
 .../workflows/builld_and_test_all.yaml        |  97 +++++
 tt-train/.github/workflows/pull_request.yaml  |  90 ++++
 .../.github/workflows/run_precommit_all.yaml  |  44 ++
 tt-train/.gitignore                           |  51 +++
 tt-train/.vscode/launch.json                  |  27 ++
 tt-train/.vscode/settings.json                |   3 +
 tt-train/3rd_party/tokenizers-cpp             |   1 +
 tt-train/3rd_party/wandb-cpp                  |   1 +
 tt-train/CMakeLists.txt                       |  59 +++
 tt-train/LICENSE                              | 214 ++++++++++
 tt-train/PULL_REQUEST_TEMPLATE.md             |  22 +
 tt-train/README.md                            |  75 ++++
 tt-train/build_all.sh                         |  13 +
 tt-train/cmake/CPM.cmake                      |  26 ++
 tt-train/cmake/compilers.cmake                |  63 +++
 tt-train/cmake/dependencies.cmake             |  63 +++
 tt-train/cmake/fetch_boost.cmake              |  27 ++
 tt-train/cmake/fetch_cli11.cmake              |   5 +
 tt-train/cmake/fetch_msgpack.cmake            |  25 ++
 tt-train/images/nano-gpt-training-example.png | Bin 0 -> 116000 bytes
 tt-train/init_repo.sh                         |  18 +
 tt-train/scripts/install_cmake_3_30.sh        |  16 +
 tt-train/sources/CMakeLists.txt               |   2 +
 tt-train/sources/examples/CMakeLists.txt      |   5 +
 .../examples/graph_capture/CMakeLists.txt     |   6 +
 .../sources/examples/graph_capture/main.cpp   | 119 ++++++
 .../examples/graph_capture/visualize_graph.py |  19 +
 .../examples/linear_regression/CMakeLists.txt |   6 +
 .../examples/linear_regression/main.cpp       |  89 ++++
 .../sources/examples/mnist_mlp/CMakeLists.txt |  15 +
 tt-train/sources/examples/mnist_mlp/main.cpp  | 183 ++++++++
 .../sources/examples/mnist_mlp/models.cpp     |  46 ++
 .../sources/examples/mnist_mlp/models.hpp     |  27 ++
 .../mnist_mlp/pytorch_mnist_bfloat16.py       |  78 ++++
 tt-train/sources/examples/mnist_mlp/utils.cpp |  33 ++
 tt-train/sources/examples/mnist_mlp/utils.hpp |  64 +++
 .../sources/examples/nano_gpt/CMakeLists.txt  |  12 +
 .../sources/examples/nano_gpt/chat_demo.py    | 115 +++++
 .../examples/nano_gpt/data/shakespeare.txt    |   3 +
 tt-train/sources/examples/nano_gpt/eval.sh    |  13 +
 tt-train/sources/examples/nano_gpt/main.cpp   | 366 ++++++++++++++++
 tt-train/sources/examples/nano_gpt/models.cpp |  94 +++++
 tt-train/sources/examples/nano_gpt/models.hpp |  52 +++
 tt-train/sources/examples/nano_gpt/runner.sh  |  23 +
 tt-train/sources/examples/nano_gpt/utils.cpp  |  37 ++
 tt-train/sources/examples/nano_gpt/utils.hpp  |  54 +++
 .../examples/sample_app/CMakeLists.txt        |   6 +
 tt-train/sources/examples/sample_app/main.cpp | 104 +++++
 .../examples/simple_cnn/CMakeLists.txt        |   6 +
 tt-train/sources/examples/simple_cnn/main.cpp |  21 +
 tt-train/sources/ttml/CMakeLists.txt          | 131 ++++++
 .../sources/ttml/autograd/auto_context.cpp    |  51 +++
 .../sources/ttml/autograd/auto_context.hpp    |  59 +++
 .../sources/ttml/autograd/autocast_tensor.cpp |  42 ++
 .../sources/ttml/autograd/autocast_tensor.hpp |  32 ++
 .../ttml/autograd/clip_gradient_norm.cpp      |  30 ++
 .../ttml/autograd/clip_gradient_norm.hpp      |  23 +
 tt-train/sources/ttml/autograd/graph.cpp      |  65 +++
 tt-train/sources/ttml/autograd/graph.hpp      |  46 ++
 .../sources/ttml/autograd/graph_utils.hpp     |  28 ++
 .../sources/ttml/autograd/module_base.cpp     |  80 ++++
 .../sources/ttml/autograd/module_base.hpp     |  50 +++
 tt-train/sources/ttml/autograd/tensor.cpp     | 135 ++++++
 tt-train/sources/ttml/autograd/tensor.hpp     |  63 +++
 .../ttml/core/compute_kernel_config.cpp       |  45 ++
 .../ttml/core/compute_kernel_config.hpp       |  19 +
 tt-train/sources/ttml/core/debug.hpp          |  19 +
 tt-train/sources/ttml/core/device.cpp         |  28 ++
 tt-train/sources/ttml/core/device.hpp         |  28 ++
 tt-train/sources/ttml/core/not_null.hpp       |  60 +++
 tt-train/sources/ttml/core/system_utils.cpp   |  21 +
 tt-train/sources/ttml/core/system_utils.hpp   |  11 +
 tt-train/sources/ttml/core/template_utils.hpp |  12 +
 .../sources/ttml/core/tt_tensor_utils.cpp     | 331 +++++++++++++++
 .../sources/ttml/core/tt_tensor_utils.hpp     |  40 ++
 .../sources/ttml/core/ttnn_all_includes.hpp   |  60 +++
 tt-train/sources/ttml/core/ttnn_fwd.hpp       |  21 +
 .../data/tokenizers/gpt2-tokenizer.json       |   3 +
 tt-train/sources/ttml/datasets/dataloader.hpp | 101 +++++
 .../sources/ttml/datasets/dataset_base.hpp    |  45 ++
 .../sources/ttml/datasets/dataset_subset.hpp  |  38 ++
 tt-train/sources/ttml/datasets/generators.cpp |  55 +++
 tt-train/sources/ttml/datasets/generators.hpp |  23 +
 .../ttml/datasets/in_memory_dataset.hpp       |  38 ++
 .../ttml/datasets/in_memory_token_dataset.cpp |  35 ++
 .../ttml/datasets/in_memory_token_dataset.hpp |  35 ++
 tt-train/sources/ttml/datasets/utils.cpp      |  37 ++
 tt-train/sources/ttml/datasets/utils.hpp      |  49 +++
 .../sources/ttml/init/cpu_initializers.cpp    |  78 ++++
 .../sources/ttml/init/cpu_initializers.hpp    |  39 ++
 .../sources/ttml/init/tensor_initializers.cpp |  77 ++++
 .../sources/ttml/init/tensor_initializers.hpp |  24 ++
 .../sources/ttml/modules/dropout_module.cpp   |  23 +
 .../sources/ttml/modules/dropout_module.hpp   |  22 +
 .../sources/ttml/modules/embedding_module.cpp |  48 +++
 .../sources/ttml/modules/embedding_module.hpp |  21 +
 tt-train/sources/ttml/modules/gpt_block.cpp   |  59 +++
 tt-train/sources/ttml/modules/gpt_block.hpp   |  39 ++
 .../ttml/modules/layer_norm_module.cpp        |  30 ++
 .../ttml/modules/layer_norm_module.hpp        |  27 ++
 .../sources/ttml/modules/linear_module.cpp    |  38 ++
 .../sources/ttml/modules/linear_module.hpp    |  30 ++
 .../ttml/modules/multi_head_attention.cpp     |  42 ++
 .../ttml/modules/multi_head_attention.hpp     |  28 ++
 .../ttml/modules/multi_layer_perceptron.cpp   |  41 ++
 .../ttml/modules/multi_layer_perceptron.hpp   |  31 ++
 .../ttml/modules/single_head_attention.cpp    |  39 ++
 .../ttml/modules/single_head_attention.hpp    |  25 ++
 tt-train/sources/ttml/ops/binary_ops.cpp      | 104 +++++
 tt-train/sources/ttml/ops/binary_ops.hpp      |  20 +
 tt-train/sources/ttml/ops/dropout_op.cpp      |  43 ++
 tt-train/sources/ttml/ops/dropout_op.hpp      |  12 +
 tt-train/sources/ttml/ops/embedding_op.cpp    |  42 ++
 tt-train/sources/ttml/ops/embedding_op.hpp    |  13 +
 tt-train/sources/ttml/ops/layernorm_op.cpp    |  77 ++++
 tt-train/sources/ttml/ops/layernorm_op.hpp    |  13 +
 tt-train/sources/ttml/ops/linear_op.cpp       | 143 +++++++
 tt-train/sources/ttml/ops/linear_op.hpp       |  29 ++
 tt-train/sources/ttml/ops/losses.cpp          | 115 +++++
 tt-train/sources/ttml/ops/losses.hpp          |  22 +
 .../sources/ttml/ops/multi_head_utils.cpp     |  85 ++++
 .../sources/ttml/ops/multi_head_utils.hpp     |  14 +
 .../ttml/ops/scaled_dot_product_attention.cpp | 158 +++++++
 .../ttml/ops/scaled_dot_product_attention.hpp |  23 +
 tt-train/sources/ttml/ops/unary_ops.cpp       | 109 +++++
 tt-train/sources/ttml/ops/unary_ops.hpp       |  18 +
 tt-train/sources/ttml/optimizers/adamw.cpp    | 232 ++++++++++
 tt-train/sources/ttml/optimizers/adamw.hpp    |  63 +++
 .../ttml/optimizers/optimizer_base.cpp        |  25 ++
 .../ttml/optimizers/optimizer_base.hpp        |  36 ++
 tt-train/sources/ttml/optimizers/sgd.cpp      |  98 +++++
 tt-train/sources/ttml/optimizers/sgd.hpp      |  44 ++
 .../ttml/serialization/msgpack_file.cpp       | 398 ++++++++++++++++++
 .../ttml/serialization/msgpack_file.hpp       |  80 ++++
 .../ttml/serialization/serialization.cpp      | 174 ++++++++
 .../ttml/serialization/serialization.hpp      |  35 ++
 .../sources/ttml/tokenizers/bpe_tokenizer.cpp |  86 ++++
 .../sources/ttml/tokenizers/bpe_tokenizer.hpp |  31 ++
 .../ttml/tokenizers/char_tokenizer.cpp        |  58 +++
 .../ttml/tokenizers/char_tokenizer.hpp        |  47 +++
 .../tokenizers/char_tokenizer_trainer.cpp     |  30 ++
 .../tokenizers/char_tokenizer_trainer.hpp     |  15 +
 .../ttml/tokenizers/tokenizer_base.hpp        |  30 ++
 tt-train/sources/ttml/ttml.cpp                |  10 +
 tt-train/sources/ttml/ttml.hpp                |   9 +
 .../ttml/ttnn_fixed/trivial_ttnn_ops.cpp      |  57 +++
 .../ttml/ttnn_fixed/trivial_ttnn_ops.hpp      |  17 +
 tt-train/tests/3rd_party/tokenizers_test.cpp  |  63 +++
 tt-train/tests/3rd_party/xtensor_test.cpp     |  29 ++
 tt-train/tests/CMakeLists.txt                 |  22 +
 tt-train/tests/autograd/autograd_tensor.cpp   |  44 ++
 tt-train/tests/autograd/autograd_test.cpp     | 105 +++++
 .../autograd/clip_gradient_norm_test.cpp      |  73 ++++
 .../autograd/module_base_parameters_test.cpp  | 111 +++++
 tt-train/tests/core/tensor_utils_test.cpp     | 214 ++++++++++
 tt-train/tests/datasets/dataloader_test.cpp   | 140 ++++++
 tt-train/tests/datasets/generators_test.cpp   |  79 ++++
 .../datasets/in_memory_token_dataset_test.cpp |  79 ++++
 tt-train/tests/datasets/random_split_test.cpp |  84 ++++
 tt-train/tests/datasets/utils_test.cpp        |   9 +
 .../model/linear_regression_full_test.cpp     |  58 +++
 tt-train/tests/model/model_names_test.cpp     |  75 ++++
 tt-train/tests/ops/embedding_op_test.cpp      | 119 ++++++
 tt-train/tests/ops/layer_norm_op_test.cpp     | 102 +++++
 tt-train/tests/ops/linear_op_test.cpp         | 116 +++++
 tt-train/tests/ops/unary_ops_test.cpp         |  58 +++
 tt-train/tests/optimizers/adamw_test.cpp      |  70 +++
 .../serialization/msgpack_serializer_test.cpp | 241 +++++++++++
 .../serialization/tensor_serializer_test.cpp  |  89 ++++
 tt-train/tests/test_data/tokenizer.json       |   3 +
 .../tests/tokenizers/bpe_tokenizer_test.cpp   |  37 ++
 .../tests/tokenizers/char_tokenizer_test.cpp  |  62 +++
 .../char_tokenizer_trainer_test.cpp           |  81 ++++
 .../ttnn_fixed/trivial_ttnn_ops_test.cpp      | 230 ++++++++++
 186 files changed, 10634 insertions(+), 3 deletions(-)
 create mode 100644 .github/workflows/tt-train-post-commit-wrapper.yaml
 create mode 100644 .github/workflows/tt-train-post-commit.yaml
 create mode 100644 tt-train/.clang-format
 create mode 100644 tt-train/.clang-tidy
 create mode 100644 tt-train/.github/workflows/builld_and_test_all.yaml
 create mode 100644 tt-train/.github/workflows/pull_request.yaml
 create mode 100644 tt-train/.github/workflows/run_precommit_all.yaml
 create mode 100644 tt-train/.gitignore
 create mode 100644 tt-train/.vscode/launch.json
 create mode 100644 tt-train/.vscode/settings.json
 create mode 160000 tt-train/3rd_party/tokenizers-cpp
 create mode 160000 tt-train/3rd_party/wandb-cpp
 create mode 100644 tt-train/CMakeLists.txt
 create mode 100644 tt-train/LICENSE
 create mode 100644 tt-train/PULL_REQUEST_TEMPLATE.md
 create mode 100644 tt-train/README.md
 create mode 100755 tt-train/build_all.sh
 create mode 100644 tt-train/cmake/CPM.cmake
 create mode 100644 tt-train/cmake/compilers.cmake
 create mode 100644 tt-train/cmake/dependencies.cmake
 create mode 100644 tt-train/cmake/fetch_boost.cmake
 create mode 100644 tt-train/cmake/fetch_cli11.cmake
 create mode 100644 tt-train/cmake/fetch_msgpack.cmake
 create mode 100644 tt-train/images/nano-gpt-training-example.png
 create mode 100755 tt-train/init_repo.sh
 create mode 100755 tt-train/scripts/install_cmake_3_30.sh
 create mode 100644 tt-train/sources/CMakeLists.txt
 create mode 100644 tt-train/sources/examples/CMakeLists.txt
 create mode 100644 tt-train/sources/examples/graph_capture/CMakeLists.txt
 create mode 100644 tt-train/sources/examples/graph_capture/main.cpp
 create mode 100644 tt-train/sources/examples/graph_capture/visualize_graph.py
 create mode 100644 tt-train/sources/examples/linear_regression/CMakeLists.txt
 create mode 100644 tt-train/sources/examples/linear_regression/main.cpp
 create mode 100644 tt-train/sources/examples/mnist_mlp/CMakeLists.txt
 create mode 100644 tt-train/sources/examples/mnist_mlp/main.cpp
 create mode 100644 tt-train/sources/examples/mnist_mlp/models.cpp
 create mode 100644 tt-train/sources/examples/mnist_mlp/models.hpp
 create mode 100644 tt-train/sources/examples/mnist_mlp/pytorch_mnist_bfloat16.py
 create mode 100644 tt-train/sources/examples/mnist_mlp/utils.cpp
 create mode 100644 tt-train/sources/examples/mnist_mlp/utils.hpp
 create mode 100644 tt-train/sources/examples/nano_gpt/CMakeLists.txt
 create mode 100644 tt-train/sources/examples/nano_gpt/chat_demo.py
 create mode 100644 tt-train/sources/examples/nano_gpt/data/shakespeare.txt
 create mode 100755 tt-train/sources/examples/nano_gpt/eval.sh
 create mode 100644 tt-train/sources/examples/nano_gpt/main.cpp
 create mode 100644 tt-train/sources/examples/nano_gpt/models.cpp
 create mode 100644 tt-train/sources/examples/nano_gpt/models.hpp
 create mode 100755 tt-train/sources/examples/nano_gpt/runner.sh
 create mode 100644 tt-train/sources/examples/nano_gpt/utils.cpp
 create mode 100644 tt-train/sources/examples/nano_gpt/utils.hpp
 create mode 100644 tt-train/sources/examples/sample_app/CMakeLists.txt
 create mode 100644 tt-train/sources/examples/sample_app/main.cpp
 create mode 100644 tt-train/sources/examples/simple_cnn/CMakeLists.txt
 create mode 100644 tt-train/sources/examples/simple_cnn/main.cpp
 create mode 100644 tt-train/sources/ttml/CMakeLists.txt
 create mode 100644 tt-train/sources/ttml/autograd/auto_context.cpp
 create mode 100644 tt-train/sources/ttml/autograd/auto_context.hpp
 create mode 100644 tt-train/sources/ttml/autograd/autocast_tensor.cpp
 create mode 100644 tt-train/sources/ttml/autograd/autocast_tensor.hpp
 create mode 100644 tt-train/sources/ttml/autograd/clip_gradient_norm.cpp
 create mode 100644 tt-train/sources/ttml/autograd/clip_gradient_norm.hpp
 create mode 100644 tt-train/sources/ttml/autograd/graph.cpp
 create mode 100644 tt-train/sources/ttml/autograd/graph.hpp
 create mode 100644 tt-train/sources/ttml/autograd/graph_utils.hpp
 create mode 100644 tt-train/sources/ttml/autograd/module_base.cpp
 create mode 100644 tt-train/sources/ttml/autograd/module_base.hpp
 create mode 100644 tt-train/sources/ttml/autograd/tensor.cpp
 create mode 100644 tt-train/sources/ttml/autograd/tensor.hpp
 create mode 100644 tt-train/sources/ttml/core/compute_kernel_config.cpp
 create mode 100644 tt-train/sources/ttml/core/compute_kernel_config.hpp
 create mode 100644 tt-train/sources/ttml/core/debug.hpp
 create mode 100644 tt-train/sources/ttml/core/device.cpp
 create mode 100644 tt-train/sources/ttml/core/device.hpp
 create mode 100644 tt-train/sources/ttml/core/not_null.hpp
 create mode 100644 tt-train/sources/ttml/core/system_utils.cpp
 create mode 100644 tt-train/sources/ttml/core/system_utils.hpp
 create mode 100644 tt-train/sources/ttml/core/template_utils.hpp
 create mode 100644 tt-train/sources/ttml/core/tt_tensor_utils.cpp
 create mode 100644 tt-train/sources/ttml/core/tt_tensor_utils.hpp
 create mode 100644 tt-train/sources/ttml/core/ttnn_all_includes.hpp
 create mode 100644 tt-train/sources/ttml/core/ttnn_fwd.hpp
 create mode 100644 tt-train/sources/ttml/data/tokenizers/data/tokenizers/gpt2-tokenizer.json
 create mode 100644 tt-train/sources/ttml/datasets/dataloader.hpp
 create mode 100644 tt-train/sources/ttml/datasets/dataset_base.hpp
 create mode 100644 tt-train/sources/ttml/datasets/dataset_subset.hpp
 create mode 100644 tt-train/sources/ttml/datasets/generators.cpp
 create mode 100644 tt-train/sources/ttml/datasets/generators.hpp
 create mode 100644 tt-train/sources/ttml/datasets/in_memory_dataset.hpp
 create mode 100644 tt-train/sources/ttml/datasets/in_memory_token_dataset.cpp
 create mode 100644 tt-train/sources/ttml/datasets/in_memory_token_dataset.hpp
 create mode 100644 tt-train/sources/ttml/datasets/utils.cpp
 create mode 100644 tt-train/sources/ttml/datasets/utils.hpp
 create mode 100644 tt-train/sources/ttml/init/cpu_initializers.cpp
 create mode 100644 tt-train/sources/ttml/init/cpu_initializers.hpp
 create mode 100644 tt-train/sources/ttml/init/tensor_initializers.cpp
 create mode 100644 tt-train/sources/ttml/init/tensor_initializers.hpp
 create mode 100644 tt-train/sources/ttml/modules/dropout_module.cpp
 create mode 100644 tt-train/sources/ttml/modules/dropout_module.hpp
 create mode 100644 tt-train/sources/ttml/modules/embedding_module.cpp
 create mode 100644 tt-train/sources/ttml/modules/embedding_module.hpp
 create mode 100644 tt-train/sources/ttml/modules/gpt_block.cpp
 create mode 100644 tt-train/sources/ttml/modules/gpt_block.hpp
 create mode 100644 tt-train/sources/ttml/modules/layer_norm_module.cpp
 create mode 100644 tt-train/sources/ttml/modules/layer_norm_module.hpp
 create mode 100644 tt-train/sources/ttml/modules/linear_module.cpp
 create mode 100644 tt-train/sources/ttml/modules/linear_module.hpp
 create mode 100644 tt-train/sources/ttml/modules/multi_head_attention.cpp
 create mode 100644 tt-train/sources/ttml/modules/multi_head_attention.hpp
 create mode 100644 tt-train/sources/ttml/modules/multi_layer_perceptron.cpp
 create mode 100644 tt-train/sources/ttml/modules/multi_layer_perceptron.hpp
 create mode 100644 tt-train/sources/ttml/modules/single_head_attention.cpp
 create mode 100644 tt-train/sources/ttml/modules/single_head_attention.hpp
 create mode 100644 tt-train/sources/ttml/ops/binary_ops.cpp
 create mode 100644 tt-train/sources/ttml/ops/binary_ops.hpp
 create mode 100644 tt-train/sources/ttml/ops/dropout_op.cpp
 create mode 100644 tt-train/sources/ttml/ops/dropout_op.hpp
 create mode 100644 tt-train/sources/ttml/ops/embedding_op.cpp
 create mode 100644 tt-train/sources/ttml/ops/embedding_op.hpp
 create mode 100644 tt-train/sources/ttml/ops/layernorm_op.cpp
 create mode 100644 tt-train/sources/ttml/ops/layernorm_op.hpp
 create mode 100644 tt-train/sources/ttml/ops/linear_op.cpp
 create mode 100644 tt-train/sources/ttml/ops/linear_op.hpp
 create mode 100644 tt-train/sources/ttml/ops/losses.cpp
 create mode 100644 tt-train/sources/ttml/ops/losses.hpp
 create mode 100644 tt-train/sources/ttml/ops/multi_head_utils.cpp
 create mode 100644 tt-train/sources/ttml/ops/multi_head_utils.hpp
 create mode 100644 tt-train/sources/ttml/ops/scaled_dot_product_attention.cpp
 create mode 100644 tt-train/sources/ttml/ops/scaled_dot_product_attention.hpp
 create mode 100644 tt-train/sources/ttml/ops/unary_ops.cpp
 create mode 100644 tt-train/sources/ttml/ops/unary_ops.hpp
 create mode 100644 tt-train/sources/ttml/optimizers/adamw.cpp
 create mode 100644 tt-train/sources/ttml/optimizers/adamw.hpp
 create mode 100644 tt-train/sources/ttml/optimizers/optimizer_base.cpp
 create mode 100644 tt-train/sources/ttml/optimizers/optimizer_base.hpp
 create mode 100644 tt-train/sources/ttml/optimizers/sgd.cpp
 create mode 100644 tt-train/sources/ttml/optimizers/sgd.hpp
 create mode 100644 tt-train/sources/ttml/serialization/msgpack_file.cpp
 create mode 100644 tt-train/sources/ttml/serialization/msgpack_file.hpp
 create mode 100644 tt-train/sources/ttml/serialization/serialization.cpp
 create mode 100644 tt-train/sources/ttml/serialization/serialization.hpp
 create mode 100644 tt-train/sources/ttml/tokenizers/bpe_tokenizer.cpp
 create mode 100644 tt-train/sources/ttml/tokenizers/bpe_tokenizer.hpp
 create mode 100644 tt-train/sources/ttml/tokenizers/char_tokenizer.cpp
 create mode 100644 tt-train/sources/ttml/tokenizers/char_tokenizer.hpp
 create mode 100644 tt-train/sources/ttml/tokenizers/char_tokenizer_trainer.cpp
 create mode 100644 tt-train/sources/ttml/tokenizers/char_tokenizer_trainer.hpp
 create mode 100644 tt-train/sources/ttml/tokenizers/tokenizer_base.hpp
 create mode 100644 tt-train/sources/ttml/ttml.cpp
 create mode 100644 tt-train/sources/ttml/ttml.hpp
 create mode 100644 tt-train/sources/ttml/ttnn_fixed/trivial_ttnn_ops.cpp
 create mode 100644 tt-train/sources/ttml/ttnn_fixed/trivial_ttnn_ops.hpp
 create mode 100644 tt-train/tests/3rd_party/tokenizers_test.cpp
 create mode 100644 tt-train/tests/3rd_party/xtensor_test.cpp
 create mode 100644 tt-train/tests/CMakeLists.txt
 create mode 100644 tt-train/tests/autograd/autograd_tensor.cpp
 create mode 100644 tt-train/tests/autograd/autograd_test.cpp
 create mode 100644 tt-train/tests/autograd/clip_gradient_norm_test.cpp
 create mode 100644 tt-train/tests/autograd/module_base_parameters_test.cpp
 create mode 100644 tt-train/tests/core/tensor_utils_test.cpp
 create mode 100644 tt-train/tests/datasets/dataloader_test.cpp
 create mode 100644 tt-train/tests/datasets/generators_test.cpp
 create mode 100644 tt-train/tests/datasets/in_memory_token_dataset_test.cpp
 create mode 100644 tt-train/tests/datasets/random_split_test.cpp
 create mode 100644 tt-train/tests/datasets/utils_test.cpp
 create mode 100644 tt-train/tests/model/linear_regression_full_test.cpp
 create mode 100644 tt-train/tests/model/model_names_test.cpp
 create mode 100644 tt-train/tests/ops/embedding_op_test.cpp
 create mode 100644 tt-train/tests/ops/layer_norm_op_test.cpp
 create mode 100644 tt-train/tests/ops/linear_op_test.cpp
 create mode 100644 tt-train/tests/ops/unary_ops_test.cpp
 create mode 100644 tt-train/tests/optimizers/adamw_test.cpp
 create mode 100644 tt-train/tests/serialization/msgpack_serializer_test.cpp
 create mode 100644 tt-train/tests/serialization/tensor_serializer_test.cpp
 create mode 100644 tt-train/tests/test_data/tokenizer.json
 create mode 100644 tt-train/tests/tokenizers/bpe_tokenizer_test.cpp
 create mode 100644 tt-train/tests/tokenizers/char_tokenizer_test.cpp
 create mode 100644 tt-train/tests/tokenizers/char_tokenizer_trainer_test.cpp
 create mode 100644 tt-train/tests/ttnn_fixed/trivial_ttnn_ops_test.cpp

diff --git a/.gitattributes b/.gitattributes
index e69de29bb2d..a305078387a 100644
--- a/.gitattributes
+++ b/.gitattributes
@@ -0,0 +1,2 @@
+*tokenizer.json filter=lfs diff=lfs merge=lfs -text
+tt-train/sources/examples/nano_gpt/data/shakespeare.txt filter=lfs diff=lfs merge=lfs -text
diff --git a/.github/workflows/all-post-commit-workflows.yaml b/.github/workflows/all-post-commit-workflows.yaml
index f07f6b7dcbb..e4817e6e4fd 100644
--- a/.github/workflows/all-post-commit-workflows.yaml
+++ b/.github/workflows/all-post-commit-workflows.yaml
@@ -168,6 +168,20 @@ jobs:
     with:
       arch: ${{ matrix.test-group.arch }}
       runner-label: ${{ matrix.test-group.runner-label }}
+  tt-train-cpp-unit-tests:
+    needs: build-artifact
+    secrets: inherit
+    strategy:
+      fail-fast: false
+      matrix:
+        test-group: [
+          { arch: wormhole_b0, runner-label: N150 },
+          { arch: wormhole_b0, runner-label: N300 },
+        ]
+    uses: ./.github/workflows/tt-train-post-commit.yaml
+    with:
+      arch: ${{ matrix.test-group.arch }}
+      runner-label: ${{ matrix.test-group.runner-label }}
   profiler-regression:
     needs: build-artifact-profiler
     uses: ./.github/workflows/run-profiler-regression.yaml
diff --git a/.github/workflows/build-artifact.yaml b/.github/workflows/build-artifact.yaml
index 8dcdcc6a551..f0dad00701a 100644
--- a/.github/workflows/build-artifact.yaml
+++ b/.github/workflows/build-artifact.yaml
@@ -124,6 +124,7 @@ jobs:
             -v /home/ubuntu/.ccache-ci:/home/ubuntu/.ccache
             -v /mnt/MLPerf/ccache:/mnt/MLPerf/ccache
             -e ARCH_NAME=${{ matrix.arch }}
+            -e CARGO_HOME=${{ github.workspace }}/.cargo
             -w ${{ github.workspace }}
           run: |
             set -eu # basic shell hygiene
@@ -136,7 +137,7 @@ jobs:
             # NOTE: may be inaccurate if we have >1 build runner on the same machine, using the same local cache
             ccache -z
 
-            build_command="./build_metal.sh --build-type ${{ inputs.build-type }} --build-tests --build-programming-examples --enable-ccache"
+            build_command="./build_metal.sh --build-type ${{ inputs.build-type }} --build-all --enable-ccache"
             echo "${{ inputs.tracy }}"
             if [ "${{ inputs.tracy }}" = "true" ]; then
               build_command="$build_command --enable-profiler"
@@ -150,7 +151,7 @@ jobs:
           cat build/ccache.stats >> $GITHUB_STEP_SUMMARY
           echo '```' >> $GITHUB_STEP_SUMMARY
       - name: 'Tar files'
-        run: tar -cvf ttm_${{ matrix.arch }}.tar build/lib ttnn/ttnn/*.so build/programming_examples build/test build/tools runtime
+        run: tar -cvhf ttm_${{ matrix.arch }}.tar ttnn/ttnn/*.so build/lib ttnn/ttnn/*.so build/programming_examples build/test build/tools build/tt-train runtime
       - name: 'Upload Artifact'
         uses: actions/upload-artifact@v4
         with:
diff --git a/.github/workflows/tt-train-post-commit-wrapper.yaml b/.github/workflows/tt-train-post-commit-wrapper.yaml
new file mode 100644
index 00000000000..e6585e47922
--- /dev/null
+++ b/.github/workflows/tt-train-post-commit-wrapper.yaml
@@ -0,0 +1,27 @@
+name: "[post-commit] tt-train C++ tests"
+
+on:
+  workflow_call:
+  workflow_dispatch:
+
+jobs:
+  static-checks:
+    uses: ./.github/workflows/all-static-checks.yaml
+    secrets: inherit
+  build-artifact:
+    uses: ./.github/workflows/build-artifact.yaml
+    secrets: inherit
+  tt-train-cpp-unit-tests:
+    needs: build-artifact
+    secrets: inherit
+    strategy:
+      fail-fast: false
+      matrix:
+        test-group: [
+          { arch: wormhole_b0, runner-label: N150 },
+          { arch: wormhole_b0, runner-label: N300 },
+        ]
+    uses: ./.github/workflows/tt-train-post-commit.yaml
+    with:
+      arch: ${{ matrix.test-group.arch}}
+      runner-label: ${{ matrix.test-group.runner-label}}
diff --git a/.github/workflows/tt-train-post-commit.yaml b/.github/workflows/tt-train-post-commit.yaml
new file mode 100644
index 00000000000..4fcc31a66d7
--- /dev/null
+++ b/.github/workflows/tt-train-post-commit.yaml
@@ -0,0 +1,81 @@
+name: "[internal] tt-train C++ tests impl"
+
+on:
+  workflow_call:
+    inputs:
+      arch:
+        required: true
+        type: string
+      runner-label:
+        required: true
+        type: string
+      timeout:
+        required: false
+        type: number
+        default: 20
+  workflow_dispatch:
+    inputs:
+      arch:
+        required: true
+        type: choice
+        options:
+          - wormhole_b0
+      runner-label:
+        required: true
+        type: choice
+        options:
+          - N150
+          - N300
+      timeout:
+        required: false
+        type: number
+        default: 20
+
+jobs:
+  models:
+    strategy:
+      # Do not fail-fast because we need to ensure all tests go to completion
+      # so we try not to get hanging machines
+      fail-fast: false
+      matrix:
+        test-group: [
+          {name: tt-train, cmd: ctest --no-tests=error --output-on-failure},
+        ]
+    name: ${{ matrix.test-group.name }} ${{ inputs.arch }} ${{ inputs.runner-label }}
+    env:
+      TT_METAL_ENV: ${{ vars.TT_METAL_ENV }}
+      ARCH_NAME: ${{ inputs.arch }}
+      LOGURU_LEVEL: INFO
+      LD_LIBRARY_PATH: ${{ github.workspace }}/build/lib
+      TEST_DATA_DIR: ${{ github.workspace }}/tt-train/tests/test_data
+    runs-on:
+      - ${{ inputs.runner-label }}
+      - cloud-virtual-machine
+      - in-service
+    steps:
+      - uses: tenstorrent-metal/metal-workflows/.github/actions/checkout-with-submodule-lfs@v2.0.0
+      - name: Set up dynamic env vars for build
+        run: |
+          echo "TT_METAL_HOME=$(pwd)" >> $GITHUB_ENV
+      - uses: ./.github/actions/prepare-metal-run
+        with:
+          arch: ${{ inputs.arch }}
+      - name: ${{ matrix.test-group.name }} tests
+        timeout-minutes: ${{ inputs.timeout }}
+        run: |
+          source ${{ github.workspace }}/python_env/bin/activate
+          export PYTHONPATH=$TT_METAL_HOME
+          cd $TT_METAL_HOME
+          cp ./build/tt-train/3rd_party/wandb-cpp/libwandbcpp.so build/lib/
+          find ./build -type f -name "*.tcl" -o -name "*.cmake" -exec sed -i "s|/home/ubuntu/[^/]*/_work/tt-metal/tt-metal/build_Release|${TT_METAL_HOME}/build|g" {} +
+          cd $TT_METAL_HOME/build/tt-train
+          ldd tests/ttml_tests || true
+          ${{ matrix.test-group.cmd }}
+      - uses: ./.github/actions/slack-report
+        if: ${{ failure() }}
+        with:
+          slack_webhook_url: ${{ secrets.SLACK_WEBHOOK_URL }}
+          owner: U07ASPTGJTS # Denys
+      - name: Generate system logs on failure
+        uses: ./.github/actions/generate-system-logs
+        if: ${{ failure() }}
diff --git a/.gitmodules b/.gitmodules
index ab121e423f3..a304dbb3332 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -28,3 +28,9 @@
 [submodule "tt_metal/third_party/tt_llk_blackhole"]
 	path = tt_metal/third_party/tt_llk_blackhole
 	url = https://github.com/tenstorrent/tt-llk-bh.git
+[submodule "tokenizers-cpp"]
+	path = tt-train/3rd_party/tokenizers-cpp
+	url = https://github.com/mlc-ai/tokenizers-cpp.git
+[submodule "3rd_party/wandb-cpp"]
+	path = tt-train/3rd_party/wandb-cpp
+	url = https://github.com/yhisaki/wandb-cpp
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 0c2b8357f3f..34ee4355dd3 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -331,3 +331,7 @@ add_custom_target(
 )
 
 include(packaging)
+
+if(BUILD_TT_TRAIN)
+    add_subdirectory(tt-train)
+endif()
diff --git a/CODEOWNERS b/CODEOWNERS
index fb11d4c9911..88e215bc7ed 100644
--- a/CODEOWNERS
+++ b/CODEOWNERS
@@ -181,3 +181,7 @@ dockerfile @ttmchiou @tt-rkim
 
 tt_metal/CMakeLists.txt @abhullar-tt @pgkeller @aliuTT @tt-aho @tt-dma @tt-asaigal @ubcheema @blozano-tt
 ttnn/CMakeLists.txt @ayerofieiev-tt @dmakoviichuk-tt @yan-zaretskiy
+
+
+# tt-train
+tt-train/** @dmakoviichuk-tt @rfurko-tt
diff --git a/build_metal.sh b/build_metal.sh
index a3653d8c1ed..b021b6ed543 100755
--- a/build_metal.sh
+++ b/build_metal.sh
@@ -21,6 +21,7 @@ show_help() {
     echo "  --build-metal-tests              Build metal Testcases."
     echo "  --build-umd-tests                Build umd Testcases."
     echo "  --build-programming-examples     Build programming examples."
+    echo "  --build-tt-train                 Build tt-train."
     echo "  --release                        Set the build type as Release."
     echo "  --development                    Set the build type as RelWithDebInfo."
     echo "  --debug                          Set the build type as Debug."
@@ -49,13 +50,15 @@ build_ttnn_tests="OFF"
 build_metal_tests="OFF"
 build_umd_tests="OFF"
 build_programming_examples="OFF"
+build_tt_train="OFF"
 build_static_libs="OFF"
 unity_builds="ON"
+build_all="OFF"
 
 declare -a cmake_args
 
 OPTIONS=h,e,c,t,a,m,s,u,b:,p
-LONGOPTIONS=help,export-compile-commands,enable-ccache,enable-time-trace,enable-asan,enable-msan,enable-tsan,enable-ubsan,build-type:,enable-profiler,install-prefix:,build-tests,build-ttnn-tests,build-metal-tests,build-umd-tests,build-programming-examples,build-static-libs,disable-unity-builds,release,development,debug,clean
+LONGOPTIONS=help,build-all,export-compile-commands,enable-ccache,enable-time-trace,enable-asan,enable-msan,enable-tsan,enable-ubsan,build-type:,enable-profiler,install-prefix:,build-tests,build-ttnn-tests,build-metal-tests,build-umd-tests,build-programming-examples,build-tt-train,build-static-libs,disable-unity-builds,release,development,debug,clean
 
 # Parse the options
 PARSED=$(getopt --options=$OPTIONS --longoptions=$LONGOPTIONS --name "$0" -- "$@")
@@ -101,8 +104,12 @@ while true; do
             build_umd_tests="ON";;
         --build-programming-examples)
             build_programming_examples="ON";;
+        --build-tt-train)
+            build_tt_train="ON";;
         --build-static-libs)
             build_static_libs="ON";;
+        --build-all)
+            build_all="ON";;
         --disable-unity-builds)
 	    unity_builds="OFF";;
         --release)
@@ -225,6 +232,10 @@ if [ "$build_programming_examples" = "ON" ]; then
     cmake_args+=("-DBUILD_PROGRAMMING_EXAMPLES=ON")
 fi
 
+if [ "$build_tt_train" = "ON" ]; then
+    cmake_args+=("-DBUILD_TT_TRAIN=ON")
+fi
+
 if [ "$build_static_libs" = "ON" ]; then
     cmake_args+=("-DBUILD_SHARED_LIBS=OFF")
 fi
@@ -235,6 +246,14 @@ else
     cmake_args+=("-DTT_UNITY_BUILDS=OFF")
 fi
 
+if [ "$build_all" = "ON" ]; then
+    cmake_args+=("-DTT_METAL_BUILD_TESTS=ON")
+    cmake_args+=("-DTTNN_BUILD_TESTS=ON")
+    cmake_args+=("-DTT_UMD_BUILD_TESTS=ON")
+    cmake_args+=("-DBUILD_PROGRAMMING_EXAMPLES=ON")
+    cmake_args+=("-DBUILD_TT_TRAIN=ON")
+fi
+
 # Create and link the build directory
 mkdir -p $build_dir
 ln -nsf $build_dir build
diff --git a/cmake/project_options.cmake b/cmake/project_options.cmake
index 756f24d127a..926ea730760 100644
--- a/cmake/project_options.cmake
+++ b/cmake/project_options.cmake
@@ -17,6 +17,7 @@ option(TT_METAL_BUILD_TESTS "Enables build of tt_metal tests" OFF)
 option(TTNN_BUILD_TESTS "Enables build of ttnn tests" OFF)
 option(ENABLE_CCACHE "Build with compiler cache" FALSE)
 option(TT_UNITY_BUILDS "Build with Unity builds" ON)
+option(BUILD_TT_TRAIN "Enables build of tt-train" OFF)
 ###########################################################################################
 
 if(CMAKE_CXX_CLANG_TIDY AND TT_UNITY_BUILDS)
diff --git a/tt-train/.clang-format b/tt-train/.clang-format
new file mode 100644
index 00000000000..87469336a0c
--- /dev/null
+++ b/tt-train/.clang-format
@@ -0,0 +1,146 @@
+---
+Language:        Cpp
+# BasedOnStyle:  Google
+AccessModifierOffset: -4
+AlignAfterOpenBracket: AlwaysBreak
+AlignConsecutiveAssignments: false
+AlignConsecutiveDeclarations: false
+AlignEscapedNewlines: Left
+AlignOperands:   true
+AlignTrailingComments: true
+AllowShortBlocksOnASingleLine: false
+AllowShortCaseLabelsOnASingleLine: true
+AllowShortFunctionsOnASingleLine: None
+AllowShortLoopsOnASingleLine: true
+AlwaysBreakAfterDefinitionReturnType: None
+AlwaysBreakAfterReturnType: None
+AlwaysBreakBeforeMultilineStrings: true
+AlwaysBreakTemplateDeclarations: Yes
+BinPackArguments: false
+BinPackParameters: false
+BraceWrapping:
+# AfterCaseLabel:  false
+  AfterClass:      false
+  AfterControlStatement: false
+  AfterEnum:       false
+  AfterFunction:   false
+  AfterNamespace:  false
+  AfterObjCDeclaration: false
+  AfterStruct:     false
+  AfterUnion:      false
+  AfterExternBlock: false
+  BeforeCatch:     false
+  BeforeElse:      false
+  IndentBraces:    false
+  SplitEmptyFunction: true
+  SplitEmptyRecord: true
+  SplitEmptyNamespace: true
+BreakBeforeBinaryOperators: None
+BreakBeforeBraces: Attach
+BreakBeforeInheritanceComma: false
+BreakInheritanceList: BeforeColon
+BreakBeforeTernaryOperators: true
+BreakConstructorInitializersBeforeComma: false
+BreakConstructorInitializers: AfterColon
+BreakAfterJavaFieldAnnotations: false
+BreakStringLiterals: true
+ColumnLimit:     120
+CommentPragmas:  '^ IWYU pragma:'
+CompactNamespaces: false
+ConstructorInitializerAllOnOneLineOrOnePerLine: true
+ConstructorInitializerIndentWidth: 4
+ContinuationIndentWidth: 4
+Cpp11BracedListStyle: true
+DerivePointerAlignment: true
+DisableFormat:   false
+ExperimentalAutoDetectBinPacking: false
+FixNamespaceComments: true
+ForEachMacros:
+  - foreach
+  - Q_FOREACH
+  - BOOST_FOREACH
+IncludeBlocks:   Regroup
+IncludeCategories:
+  - Regex:           '^<ext/.*\.h>'
+    Priority:        2
+  - Regex:           '^<.*\.h>'
+    Priority:        1
+  - Regex:           '^<.*'
+    Priority:        2
+  - Regex:           '.*'
+    Priority:        3
+IncludeIsMainRegex: '([-_](test|unittest))?$'
+IndentCaseLabels: true
+IndentPPDirectives: None
+IndentWidth:     4
+IndentWrappedFunctionNames: false
+JavaScriptQuotes: Leave
+JavaScriptWrapImports: true
+KeepEmptyLinesAtTheStartOfBlocks: false
+MacroBlockBegin: ''
+MacroBlockEnd:   ''
+MaxEmptyLinesToKeep: 1
+NamespaceIndentation: None
+ObjCBinPackProtocolList: Never
+ObjCBlockIndentWidth: 2
+ObjCSpaceAfterProperty: false
+ObjCSpaceBeforeProtocolList: true
+PenaltyBreakAssignment: 2
+PenaltyBreakBeforeFirstCallParameter: 1
+PenaltyBreakComment: 300
+PenaltyBreakFirstLessLess: 120
+PenaltyBreakString: 1000
+PenaltyBreakTemplateDeclaration: 10
+PenaltyExcessCharacter: 1000000
+PenaltyReturnTypeOnItsOwnLine: 200
+PointerAlignment: Right
+RawStringFormats:
+  - Language:        Cpp
+    Delimiters:
+      - cc
+      - CC
+      - cpp
+      - Cpp
+      - CPP
+      - 'c++'
+      - 'C++'
+    CanonicalDelimiter: ''
+    BasedOnStyle:    google
+  - Language:        TextProto
+    Delimiters:
+      - pb
+      - PB
+      - proto
+      - PROTO
+    EnclosingFunctions:
+      - EqualsProto
+      - EquivToProto
+      - PARSE_PARTIAL_TEXT_PROTO
+      - PARSE_TEST_PROTO
+      - PARSE_TEXT_PROTO
+      - ParseTextOrDie
+      - ParseTextProtoOrDie
+    CanonicalDelimiter: ''
+    BasedOnStyle:    google
+ReflowComments:  true
+SortIncludes:    true
+SortUsingDeclarations: true
+SpaceAfterCStyleCast: false
+SpaceAfterTemplateKeyword: true
+SpaceBeforeAssignmentOperators: true
+SpaceBeforeCpp11BracedList: false
+SpaceBeforeCtorInitializerColon: true
+SpaceBeforeInheritanceColon: true
+SpaceBeforeParens: ControlStatements
+SpaceBeforeRangeBasedForLoopColon: true
+SpaceInEmptyParentheses: false
+SpacesBeforeTrailingComments: 2
+SpacesInAngles:  false
+SpacesInContainerLiterals: true
+SpacesInCStyleCastParentheses: false
+SpacesInParentheses: false
+SpacesInSquareBrackets: false
+Standard:        Cpp11
+TabWidth:        4
+UseTab:          Never
+...
diff --git a/tt-train/.clang-tidy b/tt-train/.clang-tidy
new file mode 100644
index 00000000000..a5ddf6e0843
--- /dev/null
+++ b/tt-train/.clang-tidy
@@ -0,0 +1,30 @@
+Checks: "*,
+        -abseil-*,
+        -altera-*,
+        -android-*,
+        -fuchsia-*,
+        -google-*,
+        -llvm*,
+        -modernize-use-trailing-return-type,
+        -zircon-*,
+        -readability-else-after-return,
+        -readability-static-accessed-through-instance,
+        -readability-avoid-const-params-in-decls,
+        -cppcoreguidelines-non-private-member-variables-in-classes,
+        -misc-non-private-member-variables-in-classes,
+        -include-what-you-use,
+        -cppcoreguidelines-avoid-magic-numbers,
+        -readability-magic-numbers,
+        -misc-include-cleaner,
+        -easily-swappable-parameters,
+        -misc-no-recursion
+"
+WarningsAsErrors: ''
+HeaderFilterRegex: ''
+FormatStyle:     none
+
+CheckOptions:
+  - key: readability-identifier-length.IgnoredVariableNames
+    value: 'x|y|z|i|j|k|t|it|a|b'
+  - key: readability-identifier-length.IgnoredParameterNames
+    value: 'x|y|z|t|a|b'
diff --git a/tt-train/.github/workflows/builld_and_test_all.yaml b/tt-train/.github/workflows/builld_and_test_all.yaml
new file mode 100644
index 00000000000..57f73d62cea
--- /dev/null
+++ b/tt-train/.github/workflows/builld_and_test_all.yaml
@@ -0,0 +1,97 @@
+name: Build Tests
+
+on:
+  workflow_call:
+  workflow_dispatch:
+  merge_group:
+
+jobs:
+  run-tests:
+    runs-on: ["n150"]
+    env:
+      ARCH_NAME: wormhole_b0
+      TT_METAL_HOME: ${{ github.workspace }}/3rd_party/tt-metal
+      PYTHONPATH: ${{ github.workspace }}/3rd_party/tt-metal
+    steps:
+      - name: Setup Telemetry
+        uses: catchpoint/workflow-telemetry-action@v2
+      - uses: actions/checkout@v4
+        with:
+          submodules: recursive
+          lfs: true
+
+      - name: LFS pull
+        run: |
+          git submodule foreach --recursive git lfs pull
+
+      # actions/checkout runs `git clean -ffdx && git reset --hard HEAD` before fetching
+      # but `build`, `build_Release`, `built` (contains compiled kernels) dirs are not removed because they are in .gitignore
+      # Keep things hermetic - wiping those folders
+      - name: Clear Metal Repo Artifacts
+        run: |
+          rm -rf ${{ env.TT_METAL_HOME }}/build
+          rm -rf ${{ env.TT_METAL_HOME }}/build_Release
+          rm -rf ${{ env.TT_METAL_HOME }}/built
+
+      - name: Get submodule commit hash
+        id: submodule-commit
+        run: |
+          commit=$(git -C 3rd_party/tt-metal rev-parse HEAD)
+          echo "Submodule commit hash: $commit"
+          echo "commit=$commit" >> $GITHUB_OUTPUT
+
+      - name: Restore Metal Build Cache
+        id: restore-cache
+        uses: actions/cache/restore@v4
+        with:
+          path:
+            ${{ env.TT_METAL_HOME }}/build_Release
+          key: metal-build-${{ steps.submodule-commit.outputs.commit }}
+
+      # Making sure that valid `build` symlink exists
+      # It can't be cached by actions/cache
+      - name: Restore Metal build -> build_Release symlink
+        if: steps.restore-cache.outputs.cache-hit == 'true'
+        run: |
+          ln -nsf ${{ env.TT_METAL_HOME }}/build_Release ${{ env.TT_METAL_HOME }}/build
+
+      - name: Build Metal
+        if: steps.restore-cache.outputs.cache-hit != 'true'
+        run: |
+          cd 3rd_party/tt-metal
+          ./build_metal.sh -b Release
+          cd ../../
+
+      # See https://github.com/marketplace/actions/cache#cache-limits
+      - name: Update Metal Build Cache
+        if: steps.restore-cache.outputs.cache-hit != 'true'
+        uses: actions/cache/save@v4
+        with:
+          path:
+            ${{ env.TT_METAL_HOME }}/build_Release
+          key: metal-build-${{ steps.submodule-commit.outputs.commit }}
+
+      - name: Build
+        run: |
+          cmake -DCMAKE_BUILD_TYPE=Release -B build -GNinja
+          cmake --build build --config Release --clean-first
+
+      - name: Tests
+        run: |
+          cd build
+          ctest --no-tests=error --output-on-failure
+
+  # Status check requires job with exact same name
+  # This approach allows to further extend this workflow to allow for a multijob run
+  validate-pr:
+    if: ${{ always() }}
+    runs-on: ubuntu-latest
+    needs: [run-tests]
+    steps:
+      - run: |
+          test_result="${{ needs.run-tests.result}}"
+          if [[ $test_result == "success" ]] ; then
+            exit 0
+          else
+            exit 1
+          fi
diff --git a/tt-train/.github/workflows/pull_request.yaml b/tt-train/.github/workflows/pull_request.yaml
new file mode 100644
index 00000000000..d06e0bf2a8b
--- /dev/null
+++ b/tt-train/.github/workflows/pull_request.yaml
@@ -0,0 +1,90 @@
+name: Pull Request Validation
+
+on:
+  pull_request:
+
+jobs:
+  validate-pr:
+    runs-on: ubuntu-24.04
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+
+      - name: Run pre-commit hooks (check only)
+        id: tests-step
+        run: |
+          python3 -m venv venv
+          source venv/bin/activate
+          pip install pre-commit
+          pre-commit install
+          pre-commit run --all-files
+          if ! git diff --exit-code; then
+            echo "Pre-commit hooks made changes, please commit them."
+            exit 1
+          fi
+
+  clang-tidy:
+    runs-on: ubuntu-24.04
+    # container: ghcr.io/tenstorrent/tt-metal/tt-metalium/ubuntu-22.04-amd64:latest
+    env:
+      ARCH_NAME: wormhole_b0
+      TT_METAL_HOME: ${{ github.workspace }}/3rd_party/tt-metal
+      PYTHONPATH: ${{ github.workspace }}/3rd_party/tt-meta
+    permissions:
+      pull-requests: write
+      contents: write
+    steps:
+    - uses: actions/checkout@v4
+      with:
+        ref: ${{ github.event.pull_request.head.sha }}
+        fetch-depth: 0
+        submodules: "recursive"
+
+    - name: Set safe directory for Git
+      run: git config --global --add safe.directory $GITHUB_WORKSPACE
+
+    - name: Fetch base branch
+      run: |
+        git remote add upstream "https://github.com/${{ github.event.pull_request.base.repo.full_name }}"
+        git fetch --no-tags upstream "${{ github.event.pull_request.base.ref }}"
+
+    - name: Install dependencies
+      run: |
+        sudo apt-get update
+        sudo DEBIAN_FRONTEND=noninteractive apt-get install -y clang-tidy-17 libc++-17-dev libc++abi-17-dev
+        sudo DEBIAN_FRONTEND=noninteractive apt-get install python3-dev python3-numpy
+        source ~/.bashrc
+        # sudo ln -s $(which clang-tidy-17) /usr/local/bin/clang-tidy
+
+    - name: Prepare compile_commands.json
+      run: |
+        cmake -B build -DCMAKE_EXPORT_COMPILE_COMMANDS=ON -DCMAKE_DISABLE_PRECOMPILE_HEADERS=ON
+
+    - name: Create results directory
+      run: |
+        mkdir clang-tidy-result
+
+    - name: Analyze
+      run: |
+        git diff -U0 "$(git merge-base HEAD "upstream/${{ github.event.pull_request.base.ref }}")" | python3 /usr/bin/clang-tidy-diff-17.py -p1 -path build -export-fixes clang-tidy-result/fixes.yml -j4
+      timeout-minutes: 10
+      continue-on-error: true
+
+    - name: Run clang-tidy-pr-comments action
+      uses: platisd/clang-tidy-pr-comments@837ad8077b1f554dab31a8a43e8bb12c89d2f144
+      with:
+        # The GitHub token (or a personal access token)
+        github_token: ${{ secrets.GITHUB_TOKEN }}
+        # The path to the clang-tidy fixes generated above
+        clang_tidy_fixes: clang-tidy-result/fixes.yml
+        # Optionally set to true if you want the Action to request
+        # changes in case warnings are found
+        request_changes: true
+        # Optionally set the number of comments per review
+        # to avoid GitHub API timeouts for heavily loaded
+        # pull requests
+        suggestions_per_comment: 10
+      continue-on-error: true
+
+    - name: Final step to signal success
+      run: echo "The job passes even if 3rd party action fails."
diff --git a/tt-train/.github/workflows/run_precommit_all.yaml b/tt-train/.github/workflows/run_precommit_all.yaml
new file mode 100644
index 00000000000..3e16b9e8692
--- /dev/null
+++ b/tt-train/.github/workflows/run_precommit_all.yaml
@@ -0,0 +1,44 @@
+name: Reformat Code in PR
+
+on:
+  workflow_dispatch:
+
+permissions:
+  contents: write
+  pages: write
+
+jobs:
+  format-pr:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+
+      - name: Set up Python environment
+        run: |
+          python3 -m venv venv
+          source venv/bin/activate
+          pip install pre-commit
+
+      - name: Run pre-commit hooks
+        run: |
+          source venv/bin/activate
+          pre-commit install
+          pre-commit run --all-files || true
+
+      - name: Configure git
+        run: |
+          git config --global user.name "GitHub Action"
+          git config --global user.email "action@github.com"
+
+      - name: Check for changes
+        id: check_changes
+        run: |
+          git diff --quiet || echo "changes_detected=true" >> $GITHUB_ENV
+
+      - name: Commit and push changes
+        if: env.changes_detected == 'true'
+        run: |
+          git add --all
+          git commit -m "[auto] Pre-commit run on all files"
+          git push
diff --git a/tt-train/.gitignore b/tt-train/.gitignore
new file mode 100644
index 00000000000..dd0b5ded9db
--- /dev/null
+++ b/tt-train/.gitignore
@@ -0,0 +1,51 @@
+# Prerequisites
+*.d
+
+# Compiled Object files
+*.slo
+*.lo
+*.o
+*.obj
+
+# Precompiled Headers
+*.gch
+*.pch
+
+# Compiled Dynamic libraries
+*.so
+*.dylib
+*.dll
+
+# Fortran module files
+*.mod
+*.smod
+
+# Compiled Static libraries
+*.lai
+*.la
+*.a
+*.lib
+
+# Executables
+*.exe
+*.out
+*.app
+debug/
+build/
+
+# cpm
+.cpmcache/
+# cache
+.cache/
+
+Testing/
+
+venv/
+
+.envrc
+
+wandb/
+
+cluster_descriptor.yaml
+
+!data/
diff --git a/tt-train/.vscode/launch.json b/tt-train/.vscode/launch.json
new file mode 100644
index 00000000000..b12145a9478
--- /dev/null
+++ b/tt-train/.vscode/launch.json
@@ -0,0 +1,27 @@
+{
+    "version": "0.2.0",
+    "configurations": [
+        {
+            "type": "cppdbg",
+            "request": "launch",
+            "name": "Debug Google Tests",
+            "program": "${command:cmake.launchTargetPath}",
+            "args": [
+                "${cmake.testArgs}"
+            ],
+            "cwd": "${workspaceFolder}",
+            "environment": [],
+            "stopAtEntry": false,
+            "externalConsole": false,
+            "MIMode": "gdb",
+            "miDebuggerPath": "gdb-14.2",
+            "setupCommands": [
+                {
+                    "description": "Enable pretty-printing for gdb",
+                    "text": "-enable-pretty-printing",
+                    "ignoreFailures": true
+                }
+            ]
+        }
+    ]
+}
diff --git a/tt-train/.vscode/settings.json b/tt-train/.vscode/settings.json
new file mode 100644
index 00000000000..f89ed5f1d98
--- /dev/null
+++ b/tt-train/.vscode/settings.json
@@ -0,0 +1,3 @@
+{
+    "editor.formatOnSave": true
+}
diff --git a/tt-train/3rd_party/tokenizers-cpp b/tt-train/3rd_party/tokenizers-cpp
new file mode 160000
index 00000000000..5de6f656c06
--- /dev/null
+++ b/tt-train/3rd_party/tokenizers-cpp
@@ -0,0 +1 @@
+Subproject commit 5de6f656c06da557d4f0fb1ca611b16d6e9ff11d
diff --git a/tt-train/3rd_party/wandb-cpp b/tt-train/3rd_party/wandb-cpp
new file mode 160000
index 00000000000..368cd07f89f
--- /dev/null
+++ b/tt-train/3rd_party/wandb-cpp
@@ -0,0 +1 @@
+Subproject commit 368cd07f89f497df20a66936fbfae3956f151af4
diff --git a/tt-train/CMakeLists.txt b/tt-train/CMakeLists.txt
new file mode 100644
index 00000000000..38b5c784d61
--- /dev/null
+++ b/tt-train/CMakeLists.txt
@@ -0,0 +1,59 @@
+cmake_minimum_required(VERSION 3.18..3.30)
+include(cmake/compilers.cmake)
+
+if(DEFINED ENV{CMAKE_C_COMPILER} AND DEFINED ENV{CMAKE_CXX_COMPILER})
+    message(STATUS "Setting C and C++ compiler from environment variables")
+    set(CMAKE_C_COMPILER $ENV{CMAKE_C_COMPILER})
+    set(CMAKE_CXX_COMPILER $ENV{CMAKE_CXX_COMPILER})
+endif()
+
+if(CMAKE_CXX_COMPILER AND CMAKE_C_COMPILER)
+    message(STATUS "Using specifed C++ compiler: ${CMAKE_CXX_COMPILER}")
+    message(STATUS "Using specifed C compiler: ${CMAKE_C_COMPILER}")
+else()
+    message(STATUS "No C or C++ compiler specified, defaulting to Clang-17")
+    FIND_AND_SET_CLANG17()
+endif()
+
+project(ml-framework-cpp)
+CHECK_COMPILERS()
+
+message(STATUS "Build type: ${CMAKE_BUILD_TYPE}")
+set(CMAKE_CXX_FLAGS_RELEASE "-O3")
+set(CMAKE_CXX_FLAGS_DEBUG "-O0 -g -DDEBUG=DEBUG")
+set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "-O3 -g -DDEBUG=DEBUG")
+set(CMAKE_CXX_FLAGS_CI "-O3 -DDEBUG=DEBUG")
+
+set(CMAKE_CXX_STANDARD 20)
+set(CMAKE_CXX_STANDARD_REQUIRED ON)
+set(CMAKE_CXX_EXTENSIONS ON)
+
+if(CMAKE_CXX_COMPILER_ID MATCHES "Clang")
+    find_library(LIBC++ c++)
+    find_library(LIBC++ABI c++abi)
+    if(NOT LIBC++ OR NOT LIBC++ABI)
+        message(
+            FATAL_ERROR
+            "libc++ or libc++abi not found. Make sure you have libc++ and libc++abi installed and in your PATH"
+        )
+    endif()
+    # making it global settings for now
+    add_compile_options(-stdlib=libc++)
+    add_link_options(-stdlib=libc++)
+endif()
+
+message(STATUS "c++ Standard: ${CMAKE_CXX_STANDARD}")
+
+include(cmake/dependencies.cmake)
+
+set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
+
+# 3rd party projects
+add_subdirectory(3rd_party/tokenizers-cpp)
+add_subdirectory(3rd_party/wandb-cpp)
+# ttml projects
+
+add_subdirectory(sources)
+include(CTest)
+enable_testing()
+add_subdirectory(tests)
diff --git a/tt-train/LICENSE b/tt-train/LICENSE
new file mode 100644
index 00000000000..f7ad2d329b0
--- /dev/null
+++ b/tt-train/LICENSE
@@ -0,0 +1,214 @@
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
+   Copyright (c) 2024 Tenstorrent AI ULC
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+
+-------------------------------------------------------------------------------
+
+Third-Party Dependencies:
+
+The following separate and independent dependencies are utilized by this
+project and are included in a distributed build of a Python Wheel and
+are subject to their own license terms listed as follows:
+
+- sfpi-gcc - [License available here](https://github.com/tenstorrent-metal/sfpi-rel-temp/blob/master/LICENSE) [and here](https://github.com/tenstorrent-metal/sfpi-rel-temp/blob/master/compiler/LICENSE)
+
+The following dependencies are utilized by this project but are not explicitly
+distributed as part of the software:
+
+- yaml-cpp - [License available here](https://github.com/jbeder/yaml-cpp/blob/master/LICENSE)
+- Doxygen - [License available here](https://github.com/jbeder/yaml-cpp/blob/master/LICENSE)
+- boost - [License available here](https://www.boost.org/LICENSE_1_0.txt)
+- glog - [License available here](https://github.com/google/glog/blob/v0.4.0/COPYING)
+- icu - [License available here](http://www.unicode.org/copyright.html#license)
+- tokenizers-cpp - [License available here](https://github.com/mlc-ai/tokenizers-cpp?tab=Apache-2.0-1-ov-file)
+- xtensor - [License available here](https://github.com/xtensor-stack/xtensor?tab=BSD-3-Clause-1-ov-file)
+- xtl - [License available here](https://github.com/xtensor-stack/xtl?tab=BSD-3-Clause-1-ov-file)
+- msgpack - [License available here](https://github.com/msgpack/msgpack-c?tab=License-1-ov-file)
+- cli11 - [License available here](https://github.com/CLIUtils/CLI11?tab=License-1-ov-file)
diff --git a/tt-train/PULL_REQUEST_TEMPLATE.md b/tt-train/PULL_REQUEST_TEMPLATE.md
new file mode 100644
index 00000000000..c9a4cb3dd54
--- /dev/null
+++ b/tt-train/PULL_REQUEST_TEMPLATE.md
@@ -0,0 +1,22 @@
+### Description
+Please provide a brief overview of the changes introduced in this pull request, including the rationale behind them.
+
+### Changes Made
+- [ ] **New Feature:** Describe the new feature and its purpose.
+- [ ] **Improvement:** Summarize the enhancements implemented.
+- [ ] **Bug Fix:** Detail the issue that was addressed.
+- [ ] **Refactor:** Outline any significant code refactoring efforts.
+
+### Testing
+- [ ] Unit tests added or updated
+- [ ] Manual testing conducted
+
+Include details about the tests performed and their outcomes to ensure coverage and functionality.
+
+### Review Checklist
+- [ ] No breaking changes introduced
+- [ ] All tests pass successfully
+- [ ] Code complies with project style guidelines
+
+### Additional Context
+Share any relevant information, links, or context that may assist reviewers in understanding the changes or any dependencies involved.
diff --git a/tt-train/README.md b/tt-train/README.md
new file mode 100644
index 00000000000..5cefa8568f5
--- /dev/null
+++ b/tt-train/README.md
@@ -0,0 +1,75 @@
+# tt-train: CPP ML training framework
+
+## Overview
+This repository contains a high-performance training framework developed in C++ designed to efficiently leverage the computational capabilities of Tenstorrent hardware. The framework is optimized to accelerate model training tasks, providing a seamless interface for training deep learning models on Tenstorrent's advanced hardware architecture.
+
+# Install
+1. Initialize and update submodules
+```
+git submodule update --init --recursive
+```
+2. Install CMake 3.30
+3. Run setup script to configure env variables, direnv, clang-tidy and clang-format.
+```
+source ./init_repo.sh
+```
+4. Navigate to `tt-metal` folder and follow repository instructions to build it
+
+
+# Building the project:
+You have two options for building the project:
+
+## 1. VSCode
+* Install the [CMake](https://marketplace.visualstudio.com/items?itemName=twxs.cmake) and [direnv](https://marketplace.visualstudio.com/items?itemName=mkhl.direnv) extensions for VSCode.
+* Use UI to build all targets.
+
+## 2. Terminal
+### Debug
+```
+cmake -DCMAKE_BUILD_TYPE=Debug -B build -GNinja
+cmake --build build --config Debug --clean-first
+```
+### Release
+```
+cmake -DCMAKE_BUILD_TYPE=Release -B build -GNinja
+cmake --build build --config Release --clean-first
+```
+
+
+# Run
+## MNIST
+### Training
+```
+# Navigate to the root directory of the repository
+./build/sources/examples/mnist_mlp/mnist_mlp --model_path mnist_mlp.msgpack --num_epochs 10
+```
+### Evaluation
+```
+# Navigate to the root directory of the repository
+./build/sources/examples/mnist_mlp/mnist_mlp --model_path mnist_mlp.msgpack -e 1
+```
+
+## NanoGPT Shakespeare
+### Training
+```
+# Navigate to the root directory of the repository
+TT_METAL_LOGGER_LEVEL=FATAL ./build/sources/examples/nano_gpt/nano_gpt --model_path nano_gpt.msgpack --data_path sources/examples/nano_gpt/data/shakespeare.txt
+```
+
+Training loss example from [wandb project](https://wandb.ai/tenstorrent-ml/tt_train_nano_gpt):
+![NanoGPT training wandb chart](./images/nano-gpt-training-example.png)
+
+
+### Evaluation
+```
+# Navigate to the root directory of the repository
+TT_METAL_LOGGER_LEVEL=FATAL ./build/sources/examples/nano_gpt/nano_gpt --model_path nano_gpt.msgpack -e 1 --data_path sources/examples/nano_gpt/data/shakespeare.txt
+
+```
+
+# Contributing
+* Create a new branch.
+* Make your changes and commit them.
+* Add new tests and run existing ones
+* Open a pull request (PR).
+* Ensure the PR is approved by at least one code owner before merging.
diff --git a/tt-train/build_all.sh b/tt-train/build_all.sh
new file mode 100755
index 00000000000..a3c50e57276
--- /dev/null
+++ b/tt-train/build_all.sh
@@ -0,0 +1,13 @@
+# SPDX-FileCopyrightText: (c) 2024 Tenstorrent AI ULC
+#
+# SPDX-License-Identifier: Apache-2.0
+
+# Initialize submodules
+git submodule update --init --recursive
+# Run setup script to configure env variables, direnv, clang-tidy and clang-format
+chmod +x init_repo.sh
+source ./init_repo.sh
+
+# Build project
+cmake -DCMAKE_BUILD_TYPE=Release -B build -GNinja
+cmake --build build --config Release --clean-first
diff --git a/tt-train/cmake/CPM.cmake b/tt-train/cmake/CPM.cmake
new file mode 100644
index 00000000000..842ed293327
--- /dev/null
+++ b/tt-train/cmake/CPM.cmake
@@ -0,0 +1,26 @@
+# SPDX-License-Identifier: MIT
+#
+# SPDX-FileCopyrightText: Copyright (c) 2019-2023 Lars Melchior and contributors
+
+set(CPM_DOWNLOAD_VERSION 0.39.0)
+set(CPM_HASH_SUM "66639bcac9dd2907b2918de466783554c1334446b9874e90d38e3778d404c2ef")
+
+if(CPM_SOURCE_CACHE)
+    set(CPM_DOWNLOAD_LOCATION "${CPM_SOURCE_CACHE}/cpm/CPM_${CPM_DOWNLOAD_VERSION}.cmake")
+elseif(DEFINED ENV{CPM_SOURCE_CACHE})
+    set(CPM_DOWNLOAD_LOCATION "$ENV{CPM_SOURCE_CACHE}/cpm/CPM_${CPM_DOWNLOAD_VERSION}.cmake")
+else()
+    set(CPM_DOWNLOAD_LOCATION "${PROJECT_BINARY_DIR}/cmake/CPM_${CPM_DOWNLOAD_VERSION}.cmake")
+endif()
+
+# Expand relative path. This is important if the provided path contains a tilde (~)
+get_filename_component(CPM_DOWNLOAD_LOCATION ${CPM_DOWNLOAD_LOCATION} ABSOLUTE)
+
+file(
+    DOWNLOAD
+        https://github.com/cpm-cmake/CPM.cmake/releases/download/v${CPM_DOWNLOAD_VERSION}/CPM.cmake
+        ${CPM_DOWNLOAD_LOCATION}
+    EXPECTED_HASH SHA256=${CPM_HASH_SUM}
+)
+
+include(${CPM_DOWNLOAD_LOCATION})
diff --git a/tt-train/cmake/compilers.cmake b/tt-train/cmake/compilers.cmake
new file mode 100644
index 00000000000..f86a298d5e7
--- /dev/null
+++ b/tt-train/cmake/compilers.cmake
@@ -0,0 +1,63 @@
+function(FIND_AND_SET_CLANG17)
+    find_program(CLANGPP_17 clang++-17)
+    find_program(CLANG_17 clang-17)
+
+    if(NOT CLANGPP_17 OR NOT CLANG_17)
+        message(FATAL_ERROR "Clang-17 not found. Make sure you have clang-17 and clang++-17 installed and in your PATH")
+    endif()
+
+    set(CMAKE_CXX_COMPILER "${CLANGPP_17}" PARENT_SCOPE)
+    set(CMAKE_C_COMPILER "${CLANG_17}" PARENT_SCOPE)
+endfunction()
+
+function(CHECK_COMPILERS)
+    message(STATUS "Checking compilers")
+
+    if(CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
+        if(CMAKE_CXX_COMPILER_VERSION VERSION_LESS "17.0.0" OR CMAKE_CXX_COMPILER_VERSION GREATER_EQUAL "18.0.0")
+            message(WARNING "Only Clang-17 is tested right now")
+        endif()
+    elseif(CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
+        if(CMAKE_CXX_COMPILER_VERSION VERSION_LESS "12.0.0")
+            message(FATAL_ERROR "GCC-12 or higher is required")
+        elseif(CMAKE_CXX_COMPILER_VERSION GREATER_EQUAL "13.0.0")
+            message(WARNING "Only GCC-12 is tested right now")
+        endif()
+    else()
+        message(FATAL_ERROR "Unsupported compiler: ${CMAKE_CXX_COMPILER_ID} ! Only Clang and GCC are supported")
+    endif()
+endfunction()
+
+function(ADJUST_COMPILER_WARNINGS)
+    if(CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
+        target_compile_options(
+            compiler_warnings
+            INTERFACE
+                -Wsometimes-uninitialized
+                -Wno-c++11-narrowing
+                -Wno-error=local-type-template-args
+                -Wno-delete-non-abstract-non-virtual-dtor
+                -Wno-c99-designator
+                -Wno-shift-op-parentheses
+                -Wno-non-c-typedef-for-linkage
+                -Wno-deprecated-this-capture
+                -Wno-deprecated-volatile
+                -Wno-deprecated-builtins
+                -Wno-deprecated-declarations
+        )
+    else() # GCC-12 or higher
+        target_compile_options(
+            compiler_warnings
+            INTERFACE
+                -Wno-deprecated
+                -Wno-attributes
+                -Wno-stringop-overread
+                -Wno-stringop-overflow
+                -Wno-maybe-uninitialized
+                -Wno-missing-requires
+                -Wno-narrowing
+                -Wno-non-template-friend
+                -Wno-error=non-template-friend
+        )
+    endif()
+endfunction()
diff --git a/tt-train/cmake/dependencies.cmake b/tt-train/cmake/dependencies.cmake
new file mode 100644
index 00000000000..8972da32891
--- /dev/null
+++ b/tt-train/cmake/dependencies.cmake
@@ -0,0 +1,63 @@
+set(ENV{CPM_SOURCE_CACHE} "${PROJECT_SOURCE_DIR}/.cpmcache")
+
+############################################################################################################################
+# Boost
+############################################################################################################################
+
+include(${PROJECT_SOURCE_DIR}/cmake/fetch_boost.cmake)
+fetch_boost_library(core)
+fetch_boost_library(smart_ptr)
+fetch_boost_library(container)
+
+############################################################################################################################
+# yaml-cpp
+############################################################################################################################
+
+CPMAddPackage(
+    NAME yaml-cpp
+    GITHUB_REPOSITORY jbeder/yaml-cpp
+    GIT_TAG 0.8.0
+    OPTIONS
+        "YAML_CPP_BUILD_TESTS OFF"
+        "YAML_CPP_BUILD_TOOLS OFF"
+        "YAML_BUILD_SHARED_LIBS OFF"
+)
+
+############################################################################################################################
+# googletest
+############################################################################################################################
+
+CPMAddPackage(
+    NAME googletest
+    GITHUB_REPOSITORY google/googletest
+    GIT_TAG v1.13.0
+    VERSION 1.13.0
+    OPTIONS
+        "INSTALL_GTEST OFF"
+)
+
+############################################################################################################################
+# boost-ext reflect : https://github.com/boost-ext/reflect
+############################################################################################################################
+
+CPMAddPackage(NAME reflect GITHUB_REPOSITORY boost-ext/reflect GIT_TAG v1.1.1)
+
+############################################################################################################################
+# fmt : https://github.com/fmtlib/fmt
+############################################################################################################################
+
+CPMAddPackage(NAME fmt GITHUB_REPOSITORY fmtlib/fmt GIT_TAG 11.0.1)
+
+############################################################################################################################
+# magic_enum : https://github.com/Neargye/magic_enum
+############################################################################################################################
+
+CPMAddPackage(NAME magic_enum GITHUB_REPOSITORY Neargye/magic_enum GIT_TAG v0.9.6)
+
+CPMAddPackage(NAME xtl GITHUB_REPOSITORY xtensor-stack/xtl GIT_TAG 0.7.7 OPTIONS "XTL_ENABLE_TESTS OFF")
+
+CPMAddPackage(NAME xtensor GITHUB_REPOSITORY xtensor-stack/xtensor GIT_TAG 0.25.0 OPTIONS "XTENSOR_ENABLE_TESTS OFF")
+
+include(${PROJECT_SOURCE_DIR}/cmake/fetch_msgpack.cmake)
+
+include(${PROJECT_SOURCE_DIR}/cmake/fetch_cli11.cmake)
diff --git a/tt-train/cmake/fetch_boost.cmake b/tt-train/cmake/fetch_boost.cmake
new file mode 100644
index 00000000000..4987d256c45
--- /dev/null
+++ b/tt-train/cmake/fetch_boost.cmake
@@ -0,0 +1,27 @@
+include(${PROJECT_SOURCE_DIR}/cmake/CPM.cmake)
+
+function(fetch_boost_library BOOST_PROJECT_NAME)
+    CPMAddPackage(
+        NAME boost_${BOOST_PROJECT_NAME}
+        GITHUB_REPOSITORY boostorg/${BOOST_PROJECT_NAME}
+        GIT_TAG boost-1.85.0
+        OPTIONS
+            "BUILD_SHARED_LIBS OFF"
+    )
+
+    get_target_property(BOOST_INTERFACE_LINK_LIBRARIES boost_${BOOST_PROJECT_NAME} INTERFACE_LINK_LIBRARIES)
+
+    if(NOT BOOST_INTERFACE_LINK_LIBRARIES STREQUAL BOOST_INTERFACE_LINK_LIBRARIES-NOTFOUND)
+        foreach(BOOST_INTERFACE_LINK_LIBRARY IN ITEMS ${BOOST_INTERFACE_LINK_LIBRARIES})
+            if(
+                NOT TARGET
+                    ${BOOST_INTERFACE_LINK_LIBRARY}
+                AND BOOST_INTERFACE_LINK_LIBRARY
+                    MATCHES
+                    "^Boost::([a-z0-9_]+)$"
+            )
+                fetch_boost_library(${CMAKE_MATCH_1})
+            endif()
+        endforeach()
+    endif()
+endfunction()
diff --git a/tt-train/cmake/fetch_cli11.cmake b/tt-train/cmake/fetch_cli11.cmake
new file mode 100644
index 00000000000..acba911019f
--- /dev/null
+++ b/tt-train/cmake/fetch_cli11.cmake
@@ -0,0 +1,5 @@
+include(FetchContent)
+
+FetchContent_Declare(CLI11 GIT_REPOSITORY https://github.com/CLIUtils/CLI11.git GIT_TAG v2.4.2)
+
+FetchContent_MakeAvailable(CLI11)
diff --git a/tt-train/cmake/fetch_msgpack.cmake b/tt-train/cmake/fetch_msgpack.cmake
new file mode 100644
index 00000000000..9218abd9372
--- /dev/null
+++ b/tt-train/cmake/fetch_msgpack.cmake
@@ -0,0 +1,25 @@
+include(FetchContent)
+
+# Declare should be defined in the global scope
+FetchContent_Declare(
+    msgpack
+    GIT_REPOSITORY https://github.com/msgpack/msgpack-c.git
+    GIT_TAG
+        cpp-6.1.0 # You can specify a version tag or branch name
+)
+
+FetchContent_GetProperties(msgpack)
+FetchContent_Populate(msgpack)
+
+set(MSGPACK_BUILD_EXAMPLES OFF CACHE INTERNAL "")
+set(MSGPACK_BUILD_TESTS OFF CACHE INTERNAL "")
+set(MSGPACK_BUILD_DOCS OFF CACHE INTERNAL "")
+set(MSGPACK_ENABLE_CXX ON CACHE INTERNAL "")
+set(MSGPACK_USE_BOOST OFF CACHE INTERNAL "")
+set(MSGPACK_BUILD_HEADER_ONLY ON CACHE INTERNAL "")
+set(MSGPACK_ENABLE_SHARED OFF CACHE INTERNAL "")
+set(MSGPACK_ENABLE_STATIC OFF CACHE INTERNAL "")
+set(MSGPACK_CXX20 ON CACHE INTERNAL "")
+set(MSGPACK_NO_BOOST ON CACHE BOOL "Disable Boost in msgpack" FORCE)
+
+FetchContent_MakeAvailable(msgpack)
diff --git a/tt-train/images/nano-gpt-training-example.png b/tt-train/images/nano-gpt-training-example.png
new file mode 100644
index 0000000000000000000000000000000000000000..96b9019f75aa6a84b2d76e36bff72c50bf09132d
GIT binary patch
literal 116000
zcmeFZc{r5q|37T6NE9VxDTN5xvrh?Smt-fhWGB19B%x%NW$ZiI#xBdCBw5G48_B*7
z#x^sU=NjFg`*+{J-|>9z`;X^1p5u6)uMRVpS+4WE-sgLHy<X@0>b<7A5;Y|gB@q!3
z_2WkmpAZq9p(i3Dd2^Nmc;)cYI);eoJjg~~Uh}cMJiF#ACo3C!OCq92@8fjO=xKLc
zNi%%%_$4t}&RZne#oNR=Z~I6d3A`zIdlSShNbx3=#rUc7hB+~FJx|scae2nORE6?`
zwl{Ao_^+&YCe!!1n_`f@GgBCpob`$8;uI8&BFb=i!xcTBl|(fEo+_vOI&rsP$8FUo
z4D2Kn8N@=)v)kxf>&we$iMl|zgPlzZ(ENsuQr(Cg4sX|g=`0#U#NJPvPql;brez}|
zg3YVskQ31^G`7u?yua2;>6UXB^p4?&OWq~jPM7*iD4nry?XTD!iC?SEZRbFV3QU8g
zE2GlSi4K9Udt9M<Xig-ONQ+xsP@@N>P?YmkMT72)TIb?3Vlm=9yZ6;t1zc%)m$JC$
zL0U~W6U6uMs_l4w@45<Nwvn=dik!^y9f{V)=}zcX{&taf;BPC@TC{fZNB3`Pj>(5T
zskF#Erwkr?myQUKAoZb@qVfuurDeW<?M`oanCA8hW7c#2Z_Ys(+7HR6yx#3yH(~Fl
z3Dq9Ak5LcP!8}+K%DGk(D%T#%bJXc46SZ)&Oa}|Ad}|aZn`iRzaypsdW%I5FTIM&u
zt<p*@EPV?qvRAxp>2l+YeNKg*_fb4;J7wE-Vr9SLyYfueiYf$py4Vs0YHq813x7I$
zOs>D3*?IJw&o}Lc|2>BF3e9fyt9=(2Bi~<5`~bRoH77-C<Q6^Gw@+@ek0?pZ<6gKF
z-V`^mj7+9-m7i~v_*RMuaw#nLGAA~QQudRUe?rRw3!@Y0Tntj+o%@pT=4=ab!0U4s
zWqE<`*_q!>_^g6|rdDuW-Fi)LL2}*ssURsw484rGS2(G{PmOnP*&)tGud{T|QeMZq
zN9mZb+z-yG)fBvOMmp#o&D%gDF9GeaOEf+zWE$)o*bq<FV&AJy{KY#b5Cl!x45OIE
zji+JuEgae}T4Js~JbyFzQs8@!&(}9Ufafl5t<n@x^?sE(v$#E-w)L@3-Rt3J^=0;{
zG<Kf&6Z(aheZ+f#I(3imf()NUhum68o!JU5#JR{!(cb%2+DF5D5STX2-lf&XhoD4r
zZ8Z1%49U-|$iAno*-0BJ*S5bDy}G~Z7|63@gimC%EM5u3ES}##T2DvI?~|8N<Hs)<
zzGi9ZHRmcvk7*UN-Yw@WCbnyf=-B?b>XW5sOo}wWygd(M>6bgAme%*?M5o%NG^93j
z<0#*1&!b+pK)Injmo2R>-yPrjz;5xX%{SjnmBE@pmuP;5VPS1eE$mlAz;d<!(&7p6
zYYlA(1Rsqu(xW57N&noJJ4&sz{Gt)UM6`4PZ8`AZR!Els*<pO&YpEWhL?Wu^R7-Ot
zQvu>zXYR9~e;D8ub^d$na^+9Sgf|_}B@;=NL5zBo&jQ?^bBvLAyq<n8S8zt@%VTE>
zBeH}q?#>LQY3f@fyKg*y3cq2$`8MJ%E$KJ<@(0&w8QumR-njpW?9zvu95#<QZzx*)
zaI@fl5`0H#<a%<ntp(ews}k>1lqRAlf5_)Bdx!fz5d0Xtn(@6rRPvf-roG6uaHaN4
z=<>~~v#BBcilZ4i%luUrWZs)Rgk`fVQ&rKBCNkxPK+PhqQq4Wr(xZ$Icw7$EyZtOe
z$E;e9!?#zr{6g^=+&elKir#N5vy>Ow=$vI`&!xW?_&Pc(wI$g|-W)3T4ZQ6{Ci&&v
zPtev4=W{R4Lf>}0z4SoBjCq!JR%A*g^{wZ_8(NVs<#%6$1)+&*DT+o9jWnj@rSIEw
z$#U*TKe|a8L)sBSukb6H^3koF>Kx-7&VkEMxK>E@!|jzluAhn3{$c(~ur8%B);GmB
z=qT(6n)SFm%S$VZo22t;=cUeP${Y)qa$|R(g$mV~Sk1Xl<3kcdqC*C&HmfqL%%Y;A
zuA&;%S4E>I?5jm>!Na|uWS-(44d#sx-m7|=6jqa1lj`xtBY8hqJ>PKfc;)sk3D(?4
z^qBsb_&88fe)@505x-`7u1TgzzQ4k8E`>s?a$t#|N%o^`qq*1LZ-uj}h2~XrD#hkY
zXjeW!mo2LG$nph`oGsxlkuFg%kgD#rbGM1DDya^vBA-B4)xJz0@b7N9dgc6uKty8K
zSm(W-<)Q1VW6RF&74DsG9&1%AyQ{Wq^{bq#>is2sGI^7kAuIZ8E_<)`;+Ho@8!``U
z-fh$F)6PZ($=ik)G47Z^3YwamG@GA7B2V0pH7HaA*#oZyT2ky$98kN+gIlk*b_6q@
z*I?d1Kg_r)^HHwKZTTeAGfa~MoUNB_qi7gr5RMH~3zHAK_1!L9gYh778G4v(kz6by
z;W*&Xd3R6rGPG30LCCLn$g%=j*Dcz8#R>+E)-Ql|+gIu*k6XJkqsh?bXqm+WzJe=v
zKAis$ccteFJU%Zzn=jvRL=Rb>rB_~DQT)s>wc5wY;YF&2%uGG9zDsc?*iPF*`-Lka
z-f_)Zs-vcJKZ&J3!ZP7SEpoF$#ujCjZfR-;=^%D@_N0|4_I$~vcy&=iRl3-AOid|?
zW3s%j{QGuhY?}64?Q(7SVApJXt9Gyev%kloxV89_(NamTr_#&Fv&>r3SuWq~7{Clo
zDnVV<(Yw)KIIP3%uVOlfzvwL2f7y#?Zgr@d^sTk3g>ABKJ~=QraHLU*6p2)hOuobN
zOvLWuS_;<};TG5UB`rjx$)*Xp2@Za;<FVVjja-=)?X>#nJT#&e>DTS&C7bLg=?iVL
zX-f8$I_f_1^4IZ?JmNbdKbbsAFW(p&UpHP)Anqj|cr8zik}yeOOrZ<Vd;aLT&2z&j
z56uUC^%-ZrEPjE05&Z(c?s$FhfjSUEt?~&$$ayAL%#^L8Q=-yXSXsQYftS%?{g%?j
z{fh(dzV|OLm-X{1&U~l}rB@nJ3gC7e3N66(i}lB5MSi1wk@bSdJnehLyHr-NSj0u~
z>yFk_4TrA}Z*6<h+0$8^>v_-g{?q&6_o!Hs*my4YFh*Ic?};Bef;+P2gXy0rn|IcH
zY%0J1LN|Ov)m??}F<h|MYU-oQQaf`5f(2s#<~tekEve1hs+_7+8&fZ>MjjJWNpZLk
zJOMEaTWqF1q<ThKA0+#A;j1;HEMu2&y^x=fu7!s+#xl-&@7=vD_blD<%yH(*nWcvd
z_J|dPTIY1EQm;yv)SZzlOYaOhlT=fL%8h@q-;Q=u-ASsleF76Ps^??tU`v?M%pir#
z8(P*>m|D<K`BCvw%BiZS_&yIEq2K;E@@Ze2LEY~ez0vNZh%Xbw0PR+};+iLy@+Ga@
zn^Q>G&kB9#gFf2Iz4rnX{^EkWdT+|ihtv-V49%&@BDF4Q+l}LBR{W(M6}9(1@2ln)
zeNh-@hM!aqsV1&O7=8Abt$lKTB}izgVy8mj!{G=1BgtQ*qRsmPy?RqM-~2j)bHC;O
z$`y~t@J*H**?ITxwDT&$3Jg1p^y_yv3eOewCN#u58L1ib8BUjTnya-MbXBX2`s*iD
zZ~7>J+R~J>JA>j-qf{fN<3F1-J+imISA?gFy%(FFxK>eD+F4)dOXZ?68=r4TSDSVm
zzGt{6*6Lt6+2qLJgc`fpkexe`UGJ0M-+N2+Wq5HOSa-FwtAW&0d`NP{5t&_^Z!a*$
zJBjl(ooHglwH}DGJ!dJ9dFfuZLp!Hw!}3JDy0lids44q;UFNa#*zBSr>btyBN`$?f
ztw+_b#Le<s?VsA0SWr$pwyRRtCHh^3(DQSP=XyLf!X?~GZsp}}xE-#zTE9W0NOs9i
zy7ewi^`lLeXiT3Q7c?*XmzNHg?De4>o7rq#or)UfT|UiLEG>NB*V<@@9U4n`i*Bt#
z?WXJ)U`i%V&C$+tBRxaOEVX2{^^QwOfA{>Y-lc|0)Wyv><Y=SmNz3y6VKxuQ`ie8^
z-u~S!v$gEcN}g<Jwq~yw{JhFiWBzXb0zx%i$Ft6jW#htT(~<8wx|bn_AswuIGK8(%
z$=&D*gkbQ)6H<A#=(t@S{F;K(d89pL4O>$uP>=f3(CC>yli+{%uxyTFZEjrSaeA6x
z+ELK1WshzHEPUo`^Yj{G4VAE!Bi#lgqTo%6CT>~VAnJ=He%^v+bT8gH%IAK!(_IMq
zDQC98sHHVUTo8mu3Ce7t7CDK)wO{8LhDINevnO2uHFANJBCZvaT8p;TJu!K=b*=37
zqMeV4<e|8N_u|p@&2&R{U3YT1T0Duqo3A~~?I<sRQm9+%J+@L)BjN#$&k~UlGZB#k
zN5sIdEHU$+$BM+<M5L#$lK^Cd?vG=_r+)uL0Z)X#PM>eVMs5Sg*K8hYJO!SCZxDWn
z?*k9qzn=-m$4|2EAk~P7Ktzup-hb*vygq%Vk$!l*eLLjBz{4bZaxKmK>*d!y+Jdvb
zfBS|sG&lDP4}T65^mu_%UWfCBlE;l{K3%6INyrk3KIwJkw??h6cO#*2@aFXRanBN+
zr{`jKx5Nw-?8<f`B^-X`j$WB+A}xrBnC!nj1keU3l7(L9>3;AJA0*}gW&C*mnuz2q
z`+s|gCLs%r7P<3J!xEDy^#1pW0q?$}XV(nNF(dwmA<xoszq;^WCk0FyltUC?ev_K&
zzuAH?q1T{j*MG7E*$wiu^c*&N|IH4kRw4^cA^q>>I?GOb8<cVNaVyDxT_s><?3(k}
z{>iEVkBCXy9x+|{Cu0x=+*tajg*^L^l9pR>pzI%wah6`<%s*L`{mE;PU`#OiKN$mX
zF^PY)>i-)q_R%flp6jA2RM<KKh$~P$pW*lZ|1Dx3vG)~daoF@{1ywmrK6_?hlS^hd
zVyeB`gsrsF`0Ish`jhPG9K=~<e16$~1GAE!K3_ZI&)dw=_3m$G44|NWIbM<LhC&)A
ziQ0`2ff3WU+)Yc*x&HlA^=Cg9<CKLok4|tXwZySP9p06J+z(YYgNjVzjz$dfBMN_8
zK_2zz!23d;n?|1s3u7?or4LsU;68gi=gzD)#oRmZM)KFFe8R73>TcRW9_rJOY%`em
zKKQ*KcK7-tRm^+UgNuK0#*A6=s+Ob0WGB^vu{zf!iss|n?xwBjbi02Wx>KdaeOmWb
zK-O`5p}$>?af8Q#y9guo-=ez@L{e#uxKdK9Jj<i7+C(9FZjA4*ANW}Px~m>GPttad
zj`K%_WtT<EYht5Ry^(y;K>5E67J#Mib1L%3uH+8@YqBy5Gxgh)v^E{`lepRamw5x<
zSiSSJVw1NHwEaP#>F#sVGZ%`5uQC7qBeVq<uZlaSV9FMg>_)onUwaPuQNQlx|C<4B
zkQoj8k%eA%!QOrJw@3zk(=me%#e)PtUETQWH;4kfNJ>{_N!q+ls{ZasPXvAzmZ?(8
zXCyui+yATIyaq`V>$<ZAm`g(puKz7cJ~24!=y3*J*Hq<E1pjS#_O#bU^^qXKG`W($
zyUh^QXP!H;Uun5HFK5gA)xzww8ZA20N*PxX>CgT?`YO56<@m>DXX#@@`&s|$CbaLk
zeim+WXof9m@cw=DJ(5z&uAIAMp^w{MM*c11e4}&LIk?FE9i4yqZ=*-~5bFk6KDkQL
zmSbL}^p_<iCi`{nb0O|5{TybH@^7O*g1;{MDs=mNfcX=_368%RX8ZTDY#-SK{cZFd
zlNMlE=!5@%w=C>R&nK#EtdRNX{z;YHa@np+y+Y4hi15V~{xE(?D^4<6ZW+P?3`Af0
zSDpciXbGf|O%xWe4W_W4w69bf`c;t#yZ@^|E4y3It@v%IrHdZpL+Ckzvk6`NSEqda
z^7%6z9ZQqAR+CIUL(Kx+yAwtG_CH2)RT3&Fr;g0BWOUA6F1j_Oh#;2Pw~^iSxVC@z
zoke5Jr#@V{-gY!8?rRa?4l85-%1;ZNiK*Xyk7P^F&d5}D2*@(2&d*eKUY%)%=)dKW
z6a!nDJBP)w@&vQsL|6|B&~m+cs}+u0zQ0_jPlExh_-~>VL{4=`dXbuTuagJ<q@N;8
zQeeiS^&z$Cy2Yp(_~#-Gox87x(aC`gTeghVV^fF3f~61{r6XCJx(5R5N3~ZxjP_lJ
zijEok7EaPrm1V@UQdVCy0heeqbMD`SV!?A_<^nR<_X_k}AKB<$h#FI2jtgb*d#0%I
zCFRWqRMVnZx)`Xa>gNwuQ<+j*GcLTuT)I<2!mpwY*R5-%DyAio6frh3_%)%EV53nW
zY}{1;Ii5k#vq#u9n2^h_hx#_TaXH%g>g9@}PDts>h?CaZQU>cJ3D>;3Hp@s>nH;rb
zF_&l2D}(tTN=GZLHT2DbE}NIgXJapx%_=?RUTntl6e@k;t+T(HFam1ZH*H^|NjeJ1
z;;%IO*c7=lXt^LVj&6&%?G?7MQ|0w}872dHBQ_GI>v@`bWH|phCw78<Rw86$HDob2
zg=1v>us;-;9X?{N?JMGG4$%lX#GTd2T3B)O2^NYlbCSg_b3pH!J+SIGuq(L*)QfK4
z3t%)*d%H$WA*uQ~x;{3KTAwnJknz;Df1wCExvOPJPO6Z~F#IebZqfpAnfW8R+?rR(
z#eiSjuDp8g&kN;F!|md~NW`}p^lI#((%9wf3X6`I?ngj<zat-VA#SlJ*?JK(^U}~*
zX|!5<Fi)*V63ZmEA_-hj&K-j)?#j<v*@{B;V|U*hC{3<PerejM9yQ7Gv#|9dW~RI>
zOvDqyTjh41DFwsD98d1F#5gk2Y$DNEyY!x#ZX@vs?GA-g+xPvb5vbOkgfJ{Y?nd!f
zmnXb{5Q;&`KL=UBN2-_Hhhr}yJASo&5a#G-*CK5z=yL=>2&d+du2i-C_!EyAKhOTC
zyY>S$8Zvty0er<?saKmWaP2(R^<+^ys|P>26}<*CNcc(Po*xcnkn(-)%_eEPVPBsk
zZia|1n6pWkyq|OX(cs<Bg@wwyoh#<*J`1)9{Kl^yte9^GIxJ8E$;b8JBjiGWxj$R{
zjlUI$Aj;F4LM3yfID}9kq9lpoxyvDJ{@h~hM&ONV%T)J~4yg-p_fIG3WyVUAYd#oG
zIQ9`f(};_jZqFnC7`xY18#*2WuP!XjuGKO2+Z#rgwtmIZm6=6-;H}CcDZnxtyBs`y
zACM&@x5J_Wp41&y*G|-%s^owh4r=N|X0px|JRhD>RAL(G4X%84qd44nadjAova05P
z6=ipfK5GIvhihf5JR88BVj~6XlBtsJF=7rA-wdobr|N|Kux=|zIC!2<Cy+S_z^~nj
zf*k#lUmUi^!(UT}ou#)sD5(PVr!l{MU!Bfu<6st9-i-e;Lw?dSRzpS6Zst(s;5UOE
z(J`}WXE}%i36|1t{2fVH{KhdB?Excy;04V95;4uQ!Drnw`*`Er8Zd7P>g}5|M@n*l
zVHhwGnQ{*)DO9Q5!^WI^iNQy^SJ&`vl(M9QHT{+~sSb1aR5OU!8;9M>nj%;S!4UX=
zHUzMAPs?6YM;y(*HqpX%^1|Mkd||R4u=a4Qo%Vq8z8Jj9T^Oi7;hFlCM2I<w3|7Ar
z!mqWK6rMLPx1aKZfdbQh%_=S?^O>9Y24)>UUfYiBt!y8MggG#S{m0Th+p6T4J<=Ss
zP5TIN=9ufBa|$ge;|;m}z2~H4xdniADR@r?Ti7zjFg2;vYU{qt^<xOE!OZ<o$c($U
z6C%X-_#jfe`x8ITdK&PY_6dBY6T(FbQL~*+vwt}Gg6#_HgtuhUPsE3=i}gSu-G}}g
z+!8=}r7R$Tnl|*lSt}XjtX9}Y{a7%ylG|i8<5<}(Q&856QHNDKhe=n#w0ir&G%8F)
z@=O<nvimsKnpnzf8RP`1OuSF;D&DPP>9hZcha~yrVPy01c%QfAxC3S~kvsxF2jJ8R
zhku3Cc=TF=DcjCB+n<dVNaVTviaDz7oUHvz^VawR!5o_-kMkq<<&^Zl*}SN2x057W
zm)Jl#Y#rhv8ypzyUm(02j7*t-EzRhdt+45)9FT>&e0&-BH#~O}0Kfp6<dnNo9Pq~*
zmpL_80?cE!C%1C1kt#@ufw5;uIUoh}MpmP<qNyZ4_8UXFDhzaF6Nvrcn*N{80FlUa
zs4A%`DhZ#?`=Cp}Ku!Mw16i_{J^w<&DCJSC3KdcSuoXwLWBsS%&%Z*wmG&o{nxXD_
z>Zv>qlQnTYVw0*9)piM<spFN_U5SD(x`p@aJvMllof^xl?8k*rlFdgu9~#lUQUrFU
z@4XP$Bgtn_^`qee)tUx7k7nhPc6$U1+QaI57~?>Rq4@aA>~Nut^<iadm!Kzkbb>Pv
z!{zMVnd{>WGp?SeYL8@d3U&%?{rQUeW2fbSYjt5^=Ua=mYq3cRL9T66G?SfI0M1~A
z%H7u>3$14E{~Pb4<$id|0rY<KKW8Bz`hcgDVXQI}Z{OFT3CYEwc6!ABydRM4bS<&F
zD^N?OeQH+eduWMuHNOom`<ZT64%#aPYH8B?NUyo@hf-6oy7<(}3V1JaKa=z1rw%A&
z6=1#^O#V)iLMBaed|^=@dej~Pf8vBO<O~rMJ`dR5>WttFE%*1gS&V;Y2b!nzO56hV
zT_bP+_EZv;1pYW6U*F3|1GO-`(Z{(zOJN5nH<l%X0QDPpMabmpS6b<#H$uwX-Nwtz
zT5pW@7qO@N?mTXI-@PV_TjvO87Vq_4RBJu=mOONVnfh|V9sNs1FVvDmI^Mpoe^5se
zD#=ehW}7@9;?JTo7MHN!SoVUi{jOHKRr}+NmyvGurp7=e)cF^cP>9xRHi*Guq*xys
zy<DOaa-J~;fEkP9*6E_P*Q>iE-Im#<*2@BquVS$SDgqq{IedzKUaju5WrDF#dMi>I
ze}vAeRj$^??QdA-r+QSdc{@B0P^Njx<9BqjrR`){sJp(BgWh;rYL*oN8;ER{`M$8T
z3dHE@zu=M|16~KecWT1qbtj8!?Ai+Y1)BrdY$rlB@JlVyN5%hmPq(u7ilMz(noFY7
zEkDKV(rn$*lCrOqS!*w8p-xF6HZk78S*3&Z?skkKFQZ--`~W~u9BaW-?4d)(RcYou
zWmW2{b4JmOM&fGe8{4BUt2&`=iR%teyCmN61;>7HATWe|puYSQPv)j0hVQMUNVw+c
z6g?Yg^hWDt54i8H-lxW9Il41VMIo_I16AOZLYodM4E#pi=p{r#jl<-36RyvuvJDr@
z0+a=vFo}7f+<|tOb)1X|w&}^6hb%?vCm<ct7JlRUrvh6i%;KthrahnH_k{z@gDa3o
z!qhGR0@goG4Tuv$&J7Lb5>qXt3YkuG<e&}+-GH%xr)Y0?{%{oFn~P*py47=Vrtrk2
zT>`RJQ3^3_6o??uR~ytnI}qUU0AB&enFun9B=U~%-TW{PD|4v3+PrB!<99$kY24BK
zfwDcu?VkGC1Pk!q@U!;PSc<P6p{`xJ4yHOfab7YFG$tQ-3G%{(%D+n6w0CRaL(bpW
z3h2LP-Oqwgz@qk;ZBxzP+Dv$YgI*YXecJ3Pcz%G18_VLAZgNW8FaEppo+>Q=L)`yI
zPx%+A=}$HL{|hpg1c`N;jFIfITjMqF`Pqs|eyOE98j>%zf3>|Doj&M1!{&E$(y_97
zUB(`flbrE~GUrVY8@xT4DxyGwWI_6;F#DE(<~mc^{2A3Vr?TRc+ouwo>cR>rA9x{P
z;Bun4Q`!;gm_~?Y${gL31L(I<#KMl%mp9}Utx?97K$(0sBjed=3kP>Ciz8K~8^s(9
zx`LVQpO1iN+Y;teAZf|t-={s1oPMKv9B8QcZ*V`PLr~7DO#j5>A8T4BUwrqf?l&Lo
zI5{4TS7(um_Xwd{HZG%Bubi-zF#J@Qv^d;~>N=RgvsW&4NBjGYMpcg*s5~=;Yv_-(
zNf7;3WVz4%7B?SXGm#`<xQ~%70Hfx>vAI360tratK33U{@}ujFkYV0S#u(h={KN;C
zPh6~*;q}g4CABKjjsAYPPGiq0+Y#`7-IC=uZw8AR7hH)r^p~)3;D08sbvk*&zfNAg
zVVxh4oW)Wb55J-<lS^uOHZCyqH9V(Yo_QM_UOVHGFm=%KNdL2QCM4Rsd8d=N3!0zK
z*^Hb1F448wh-1~Sa*lS}m{17bY%=V9W%b)xqn`f8DZ7vQPL)!27$}!4bkxWbJ_Vj3
zt?25Nw>TWPzH&=*sEEZ0zHtHba_Zi=&(`yCR3vV(+Uojm(DV3IW5V74kH!SDp?dlt
z<7zSSvC{ndEXRCXg2_ID(hu^J7}vGk&YG&9C;IcJLydU)sIh0plv}Gi)UxsZsf3!L
z`P;aeN^kd0+1mqho?DvITR&UpyI@T*uDz1-9(!r&pWRlTcrVW?y;5cMpBKcOgs^oV
zsflA!&rg`6i^X_LKWD91&0Ww(AO7ahuX#_ki<3WLvgQOy94$A69SHm|4Kl01DEmtq
zr$yl;@u=onRnYG0uu2nl)z13>elWk+@APQyd-%c(zTQY|+c@`daK5AB0EK@!a!a$D
zc87uh?lK~NGfPR<tItlA;2&4Gtuj6XP}z0Cw9=}l!t+TN7xh8>9sL`RN0RZAP6LIO
zT?REq@WZ8aNW5{#1-gwH?MS&B0AdX_LcMKHpT8XSqGw0`g6yLRiPd*MYTWfRX6$>!
zkrVMs8-`A!g8-N)XzM;5>Jju*QzrdWQ_khnEC!YLF=@K{qiyTs1??mqOpBSE58t^i
zU>HmW)O--C>%5*w{|_RzLqB|L@ZL+)2Ns<lZ&xg_%5U63W4GJq<_lg#ilbsFLfuP-
zDN8O^QnZ{o#nv(Or{pDmel|}V2r`fl`6?6l1n;ql--bK%?yQP7{r={($_UHnh-)rm
zW&`DO%9-~J&LhDy$2(JKfgkf7+N#OJ&(`=zu?GpHCr4cRmT_qkYvIfX@27m%9Ck1a
zblqs#UE43!BkNp-PStU`%NveO_me7P#OHz-Ok-S6yi1Em8fvE?C}T4<A#@B`sL!pr
zTL~u7`Uew5u<q5?J@FGIS&T)aI4s)e7zJTWZrE(nMKTG0_eHz!$@*Eus)@yO(8zVu
z4;X~WZO?1(bQWdLAJfQwjOR1>0YYr=4ywoT8%tX6)A^$J#AkdE_|Mw1EOJOakBtEZ
z?A{u02<xk|T~lz$b643@{XXXrAx=O33Aq9Wt`w+b=)I&a*o!b>DM0WY&4#e4`3+@*
zJ@p5RR`w@cBm0&v*B1~U$Su?MOX}rkM@oM!O1@CI_uQuC@Q_ZU^V{QC)#)PMD#vMq
zk@Zo5%FS~SncgCe(Qe-n@yB+e5Nhw?INe7&!y<LZy$y}nEmEimps9>kh7>m=eRdM&
zBI5QpCM`EU(41z$mCL6qmUobnu|!z0$CN`rD6=~s)-ApFR}r=!AW>>9bEqAcv@7?g
z+uP!Z6l>ENd9%Q_+X)}Uqn?iUG>CpmP4nKDkT^J^+GtPFJ+@lu!Pl7>cwIMguK$^(
zGT)J(7B6=)d+_SljQUyrUTn%u&7*ZRVxg}!va=XJvaF>a@T(*LB)t(cGHF#OYd9Cv
z#ZSXh8KK=?G)D!FXjkTSkBUB+;dPj->R9h;V#LgZ%-1tJ(jV8@IX`KSJvk8{w;N;q
zqTs7Y$PbU+6TE$8>rXwI+acfqY1;!k&~TZ%{>SdS0#BuntF(na7!SYBKbeuU*kyJ~
zI$lmt$Gf&x50o08IBy0-({wh&nm7k*%fv^$Om7^gqV@}dhHOumUg70XT#;Up^L+wy
z6#1+&K0o4gjd+SmE0motrvI*8+`ntrYzHWTP`jR7;TF*xa>`}(_|}gceq)+AyHa8+
zHsMT)GK|?+fZ)NVdCzp97=WQgOF1^^V@t~wgJ*-8onoX=P3^v$L*?1)Rf87@9GOw>
zMA&;}K{p4k+db{x?vQmqN!@j^H!c&r>Cp70FkO7cUKj;Q{?QGU1B&2`$PB;C&b!w6
zEqR*$Hi^MB@G)|CX-7ps4DtPk&Cldn9?c<V4JWPa8S{<mRUV;}N`xQya{n9Wm;;B=
zJhkK`r)qyevo|(Q%q*VW{jd50aFdaXd)1>#UHIcURkTSus(kLkq<8K5iggWoeh`tk
z=MLsTH7o5VH>>{kD_~Phk28+_Kl)Ne6*=WO`n4ydtC#&DQri}RS=IefsU9Mp+L-+Y
zlp6ItyOBF}i-o*uYM;W5MqWqC?!U!U<jXuT@MQJ<8Bl?9Y)JI!={;y(<$HoUC@oHr
zeG#mhE<1f^K*@R8m0`ne;x^Y~{E1z1WYCE138>$(cC@f4X=7C<)kl{}>ez8EJPdl=
z%}XdoKvFv0;9Qa6T9yD_(2OAoq6jrmHFocDw^9Y<jXlQ|@1SD%v%L+`B}BRawggdb
zQdSU)^gzcaJ9ZnvJ?916-y`c4^U;du58jk*^ni~egKYsk&|$B#v?5F`x;*$v7pAii
z07msn&pr5Hz&BEZv~{+^<<0KkkGAF_cS4T$#qUp3A!Kjy7MHz&xb%W`1F|+B!J>u?
z2VW8&_tegGy0oXWH+uzDrivk7%1huIFIHqRboM3+YO6~fhpB$K>MI+<gx^M-Ni_Vq
zjFvrd$K2w4Sv~3FK4iBODU1+f+oSwZiI8mN|8@CPTu?aOLLmDKS^!-SFgLbsVaB^w
zPPvRuQz)P^b|Cp9M(^(dTN@a!sn5ke-z}*NMCV-~68B|~mgkBChkzaVaqk}y6$le>
z2a{cKT6MR7pWw2d8|BE$8Z~2EQAkFC>4ePQJe_5&v1fU5Jh-tv*a$pL4Mo%<eY3so
zA6Zd2o!0FC6XF43GW7!Hr<$0{LDeAX{OjA}oe;SV0fXWdx7KRiE(+#iC*+#^`i8@k
zNV(0c63%$JWQ-Qx8$m^nub0*wvOb6OYu_kOXV@mBl?8nLlMzEtV^`kf0>3d#DM_15
z@Fu@?iht5{xHo+3mr9Q?-Cp38V$Ug`%5$;~9C&eS-OULdR`C4&h96U3b(|P;27Bc_
zVjyX*&#a(L`en}p)<2^i7oQcRN_oa}v4QW|NwA$>$iQ_11(o#qlPW43W<-I?`9*(*
zi%;M1^g@sf*>v4E{E^FGdKDX_S+rPBG1TsC<kn%i2#xSaKhZt@x_S;G(L43{yPTYQ
zMnU8xchxiWnb44HZW-5HccV|Hld(QyvN$W-ZBm*s(+Ii!<zg}4-;^``_kOtF5{V2U
z^(h+wBfCU+l4l0%c=&7u;4I>Fd?q5HqZ|>E&tx&X+GtEpb2>X@?BHPW*w}N17hC2n
zMU8wFb->qWR?)%POvT5u$DC#$(vu`QS)5h-c-nKu&m!Tv{6o_3zOg0?2iRrt2l9xr
zJtOi^pvpyE^P9h@-_W=)GPw{x=_rmCn|e-KcQR2_5#PMMX!xqrCO_HRM`vzXOTaqb
z)O&b13o+mK_(S)XKnlzFaq%TnukvMA);-GaS^~|qSl`E!@w%rFVgP^;dYmkLe}OAe
zBAUf^)CHae{Q~j)hBe(SyK9r9Q(r6_9BMDZh3?)P)}=0p%=)~9#90j4!K|Ck+u;V9
z{G{r3cDjUW1&1*%ByIIT`UVLW2M@`nfKP(55D7yzAse-^XvA^8Hv;l|DFHPB*!fpQ
zN!&*eQg;kJ_f5?q!`5rAQ>NWsBE>$(!wKCE(t)JX;KQ@t+qtW>-H@O(lWMQ;`b&vf
zJ<aN){Kl%w1+^5z)_6?*f!txs6p7pBls9x7JJoXE(4N2jntM)hlTUiLFHZ5Ga*)5_
zeW@!te6*`2U~Nnx8n(yCz3O^%&pSqP1<eI(;FQwbomo_pL3(Y(sW#(ZeVq}m{n*pp
zjb5C^Bh%3v6Vg5UZwUgi=8e<XZUv_PyJZ4MwJ`)_MyxZ3#IZ)LIer%8{XBTr1&GOb
zjtg?#(%3JsXQrYVo35QhpLWVTJ(Js6Az!7N`}gJ3O}$Xlo))Wv_Xx?M`d+{<_t6<{
zPw@2ubpVnvz&CEwou<(GKT@cyz%qf?X}%%}A`5kjWaQvVGxEdra;haI@-B%?QK>H(
zJ4Z_&cUM}~$Z~0=CoHEEz;s;~)o|0vq9NX1->ye=A$YB{7n8RN^!dy$8al=<HSS&E
zH%io3!a0vH=6CT+(R6eViqFYno<>Mx^=zi=tC!Jq)y59m1|)5*5z+?VBk{#Gvlj7k
zxS_d$O!<&*Yvd_&$&n`@FSp&l=DYz=xt?eS@{1yBXs5RUWm)>{>1{aE7S;|HJ&Y@k
zC)Z^Y&47gX+{#0iK85L34eIEcTu=7#9PGcXo}rguz^9(!H9S1pdCY;g!_s-=GgrRG
z{~k;EH%TfSeBDdfmCF^-e3Wsox=?w4stCKEEBU}rHeEJf;2x$K>fNoS^yB&&HhOz=
zS;!VZQ1GN2JtdDF0{!-pUDB_S{`<|nX)=asTWKoWEu{Fk6=ADHg`Q;bgfmJ=E}yvU
zRL6#6^}3aKLj|W#KI69-#>c%)2QP82Zm6dFU>SwuwbdoH_K|jo#yLtVUvz29gWtfM
z_B5)l{XMGgcu*hCMba%a)5zGT-G(6`qc+t@_`(UeD82IPkgGl^#X&XD*oA&a<JX&|
z&ULau<_do1$Ht{utWt7?OLM5<xN&Eddm{p%_!<%$L$N*Jlhe(NC_q3Xboclm&iO<T
z2yhNiZL%&{gj{b(G<?5`6UJ{tgxn62ta(-&C0v&}S;3op=mZ9{1XG`Ad&Zp<IR`r~
zKh6wUrw71OwrsNCOb+7dkP}255VA#>1&^*VRt#Q%>g?7O``D$$YG51I8xTk+27}DG
zrP+u&iGBqj6=<e?)40{Eh06(x%vZX8R~;TgvhMsV3B5^8?=He`+al1E$R5J#@^n$G
zKttx_o!jcr?Mtw$q8SuHPEC6?_RZ5Z(YSH<<uoDJMT8Y*+&cAo!$w7=H>}nHzF5E3
z1NK}04)>fkL*nq-2?ixKAg$I>nFFM*KW2fUQUx-$f+9to;=_Skzmv?UH^}KU$uJP`
z;be>Sit(+2SXtd)i+Ce}jfv{6u!9+0Qx2Li-#*_FLkHV?V|i+JXz#BHrk%Sh!WJKT
z#Jj3S#pmR3Mi<bbk5lX2R_<s>0|_05_nz->f9RAv0Vu`Y(tfT3xAQv;ogxL%y<2a3
zWa7^x11hrk43BZ6S9w??igjh!Zn$_aOBOKP(QG@LC7zV#;_-g5x{s9p-Z^z2J3Y8h
z?733}k$ReCnEo0~zY&BtdhxyNj`>&`_#J-pwgF;Y(k*ehMA3am4mYhpP2k}Y$6=M-
z(=HSa<#k6hpYjtY1|d~x{s3h_9Z?BavVJ~rT$yf2z#RQj5$GCkRZ2VvXA+IEPWQjD
zG#=^xY4|AMm&c?d2S;!^A5faQb{D=UJ4Q=w7B_`zFGV)uSyzxHhE@{4fyKZd+R+jQ
z1Ut3w<+Bu_Yb6b(2Q8Q_YwFU5O^^ik2S>NwZoDw$aF7`4q-guRcBwCOahxH0#!Gk&
zN_SGG64(?-BlDo@$Ar~diN&NtO}zgxHWay|*t~)1Z)Z#1SgTbKxCg7(1RuTUTF6r2
z7e)|G0dhxYi#tW)bC+%@R427K@*_s92+6AC6q_CEKlk*5Ae?-4E>hNFu??ePK%&6%
z0iIjbG@4}uyEJXP6;GhTK!?dqZvud;I|!dSBh3K0ShxZGze5=<Y(TZ3(lv5v>$^~E
zTm5Ev+RBZmiekt;B=X)U6}@Q;K|?At13$u7kH*aS8?tN2q3stx6!Mk~-B0u0adF@;
zwFOb%7Jwg_TCri-sL88?hYWOKCtwu39NKQX{HjD>plQq3&n^$ugfrQn#Dj2JBc@Kq
z{q63{4Em7TZAEAiFI5S!I9Zzy*Vc{UMI2II?MoYis6-~jrZ0ggL1a$u;i7+6b$Y7P
z<^*luCw=ZfLdu{ge-mKdBx|eZzS1T-Gvo{IzKY)H1{y9EN^1^GTIZ{@)jRJ%0O6rq
z{DG)mSkf-fw+7T_oo6QAmij>FNQ&3ar>t4CrWFm`vDV4{=$x?H?#L$}s^MvmX@y={
zG#eCC4S%eiQJ}B}wWdiJHDOEj)WfP$`_)kVE?`(PLeIyoKQ}749f%<riLiX11ZYAU
zL~Jl8ipd$+4txbb{SH8t5Sly(&PKr58W-5}MKeYcXk>N@j3BL$o2F`o$AcMra^gnm
zb%PNb=-c_J{7#LS_~S(Hy&0zJK7$@$oe~~wu-FJm>7Q7E@F%YTc0Rw$Hx_~T&69w{
z3~cx2%^eP^0ES@~C2>DT58R@uGuY_lWn$LES4U`&`3&A&zRs0?Ser7LWE;pRY}Kh&
zJGE-uc9^s*eD^3eIKG_?tlG<`yl2}m2{&x;-5VZ-@?LV6fv2v{hOpiMd(XUb7|2=d
z?3813F7rii)FS!JE7FUWXLQGKBIKbpc%RACHK!vD^#Ek0HI2ki1Vkv4N0-}x&x8;}
zZl^tOJb(7}0NNP9m?R4y03P(6^(w~Hd%nEgbv3@puK0R8n`fAP%?hXfOMW{LwRkn>
zvDXyW&Y*@Ily-2_2=-C>u!R)POB3SliL$Y^wl>+@FJAJgeK;(IKFpUp-lo~9r91hR
zev2SvWOxxuB?;={m4hiTgj3HR=to``bn}#$6=-*1NV;PqOQ^joz~(Q(fVjqt0|L7n
zr+gW3I6P|FH6`Ji3-hEJygXSODLV2=M`x!;e8#I**;wnDer|r1!yqTh2dfIAzHALv
zNU1?B8n-P&C_+l5{l1?c_ltygZ`h^z{}SwfQ+A1y+IsKm%d!dUYNS4*s`%};up+^S
zubpa||Eq6^8xjD>4Ak>YiCej!46E|ihRR=<+!+P%hKu#r1iD6SdHL?XqhO(_LAdbo
z5gK7~ycdr7Y8SaVt<c=ZDB4B{re9i0*Z4zNe9G!BuPvGSUhhc~6ON=hM<tQH-{pH+
z$=%MLM$(|m(aNn;EJjKxjhjLX{c5DL2p4^p0N!kb6NA5&o|YThg5=X3IoO|$4$6=v
z@64sx1t`WJ7=EwgN7HGRVFIKopj&nfbmrsqX=_oA5~%edA9o-^M-_2)`8HuRB9h_L
zI?(iI9bnf?Cs}wxdWm~vNxzkFqJV*mzeT`a!91?huiWCvsFfO?Ogq7ac3D*Ri1GEC
z4Db;yhM3Hj(C$0Zw6uPOEOeaJ_q2VOn=jziC*pJTDr=2d1hpIF_q$tmZS8CY+o=tn
zQg)}ynJUU2My=rdA!E7*KuYLz>9hHLPC&3WgFa38Uz?n(Dj6xZAd)t$q}><|0j?VS
zR$~Vwdv{Oe*;8Se@ZijOx}#|JC!GT#vaW^Msp+=dvB-GwSTla2qW3j%hxk{f(<}S*
zyY)A3^Jzxw=?njP0GXYd3I0EQ%>UEJ{M#!}-TnWW_Hr*0Duz4pLCt*8S4xtUD;L$C
zm{oZ7U&3sRw&gWXI02o{pp2N)<fQhe3<|_0mM&wnNX~Abz#2dQE<5%24D9I$@Gj#f
z+4soQJF(5_w~}dAIC!gUOfLB;gSpb+C_3+r?Q=rAKiw_+O6_vY@1fv3snSR32bQrL
z;4Y++Lt?1x;A7wvEpex*2J*j#WLJJ&FQA;ag&Fb-nk?reL>}eu$a`|HewP>+_a#vP
za2fG1S?HCzrQYd@M@y^@90>_nl4He%ck<~mw^ZUgqvWWg970O;SB=#b@Z5K_T@sgX
z0;i${kX~8Xl-V~DlD4x~tq5Yf!j+6oUZIyk_7U~42Ny(I9})#r{Fb-lH*UAK5OUKI
z30*Trzx24hC_21m#+q+@o937!A>Zr^hd5k7&VR3R9XzEt06z$|S$@@V5dF1cp5`LU
zLm)Nm5Lz$Mn<5qf=sinWNm$J@Btw1|G<l!L4*6Ynj$*t4@SN&XNv6D5gPDTRgZYrk
z7vmB)<@1Gkz;l|vVslZ(mC2T9G@ItS(FN5YL`GTh7TxPvj@O|7{eA>))z|f~yS2)u
zAL?NP2%_)64F@*A`@-^$yEE{Fq!>WL)-0;E*|2KNSds22)d&B~@-iUoa;-sLnY4ur
zV^!Dcr>poet(h))@uNs<?z~?w`+-$*<~vPrPJbpiO28?2yGsT}vo?!%g<o$2lbQHU
z?rr3Rm*s$KYHXQDS$FV8`tm1l3@KrEQFmUL9;?gZW-OqGdx(KdSG#fZCD{sOhR%cb
z@|Uxh2=21eb7&*vuw%4R^@@2#IHS_OQc2EcB<rzN>VXMHozquDM+}*lP+a$=6N=gI
znbN14-=38H7*}5tZ+y}i0X^>hFw>b`>&uS22G2ZlY(Uo7^qU;dnK+cF?~I^LyR2Vu
zbqNSBb}^AkbzPBvmQY$$6>Hh`Einvi9ixgdt0``@l^Se!gikE(DopxgH*X4$q&Fqs
zs<i4&;GdrCR78FjpRyfJ*%t`ACNSuq#L>Wdx3ALVdZp14UeYu3yp*qB<xy%vQyi>a
zjm~e=940E&>3F7uw`5WbQ6(@R{$blr?pGRJ4MK7RgVa0N{a#oL@ms5d^<b(NO6`99
zoW@O9Ss@b{n)W8TKN3O|A<JTcn}MY+<b^w!)s0?c{(&{z|0te*Y|zA{duuSiJ2nnJ
zS6Auj?~Y0ZuM0_qVmMdIM|$|t&wS8~C}}XHLN!@LsebpRHFm5~L7Ohkslv}`99KYH
z=~MRd%O!6F4tnJORTZ<+U}{l*Ae=up=08&%9?)Da8;K4mnh}<3Cj@nR(?P87U91wi
zve9-Xu3O^sNbs;6#CE57)c<ZNuZPi>x3D!wE!SIHZ!jMgGMVv9`r@%OY_Zc^IP^rZ
zHyz3Y&a9HHC<^bJ!~fjF$6kh`y+@CWAn|tbI~RahE-(h_u0AfU(vX(h3cLld#sPt!
zcwBmYWIc@E9d;t3c@Q*uXe&RRPf6G!0Qvvc2>d;PK4wrTc|5;9r>XBf1%I5oR;p6}
zt01e2MJ%t%9A^_`JhE>19GiOFB)^Y#4NFZPLt@X)*NEGkW68KK61wLrRu}Gh3+r3Q
zZt5GRY?8n8G}*5?r)W@XXZEJ%lU(l=YGz6B^@;oWj#eh_xdO6@H)>6f;&%<3i>~cQ
z-q)Y(r+Hy=sL0x|Cr?Mau}pqT5{_MF*f}Ci@BD(V1@-}?$pooFciE&i9U5Kiys&#*
zIs=~!4WY8$5$7&S`OGeF1AU#?+?k|}p_1_K6iJPEeq*bz1KN0aY?a|Bojh?SBBZo=
zmFoWaWHCGQ2fTbYIsI@SmdlYFEdlz(&{D5yXtpY7zgDG|0RsAj-qTz6{EC`y9t{^=
zTWhjXh1%`sJ7|2iz!kG|4MrzAh06F(JH)tt_0VoNSO~IfPpz#s@*LsTuXy>X3N@s>
z%Lrw5nC=n*uVf<T7eZtco|Pm&MpaAtw5>hC>{qS~6nnprSP#2j(aFR+-eXD9#^-4$
zFMH6tz-SURRtIJlNKCb(l%oEk4(Gv*Sw4%v<VC$GA)cPr4HKNTiscN?#o6*ErTfXu
z?2h&Byjg#qad`Ze@zue?i*Cfn2xBL-6fR+=8(}nP6~6Gpl|lvP&ms&rX?*!U#l2+?
z>F}l@35}}e;!JFD-&`lp`eG5A7rp>vaInU1?M}0)OkOB?h^v0?zlX{og{Py5o6^x;
z_>(shdHE(X4<&ufnp0)H=9Cz{d%%m?X(1=C_t!M?I(nAE<JCO%)b=G`kv34m2%cq6
z*vntPaL4J<AMFOTw?Nan9P-mjr~h=(%mJQ2E$aAPi<D)Hu1b+u@^5`vX0w>De(^!9
zQ|`p_#rSjQO&#azjTQ=(3H882xnl=9ROhD}AqesY-Lg%8zU~%_jCoOa-tKBKb1&m9
zIWJu`8|Q2@eGZh+tyFxs%$&upwrNt-(r0w77`pmh)6Ycfd2y#?>*>bibHPH)Xylad
z=1UBA(B{=noRrmrN7KD)8vZt=1Ie@G-agecp)7_SwsxK|)#9s*rtpk|6PE)cc;FER
zmHb25*dUo`fmq;9f0Rp*SWm7R?h2=-PHcMq@v5<(=tx7MU40#O5i(*}PG)(!rlQu9
zd^Xq+#>2$4lH=g+TQVUMI|!T9-?bHyTA-reQ*Qho&fLL|)G_hdE1)o&@s~B;toy|R
zT_S&|lqRG1%rw7p`suGx^GLZB2{S2s>Rn1TxQ`>wZ$xa-1Czj}v6q&M$&=V*vY+G?
z7X1w0P8rR$L)5&Q!c2}WLP1*A#mx&mLPHIEpGvcrqF5VHvgu}DJ6Z63a+T`Wonlz+
zz_NCS9`6`pChz+E5|;yKZsT)of32s0v;v#J_~TN0nJUwfa(~P$VxbKmk>J1PO$=u!
znCPGg#p=uZ%jD}GlwKVn+J%B;1$8G@CE(w1-cu+TGZ;B-G3$hNl|>pc7S7HC8+dm4
zT#oo3-J?aEqU%+$5ymXm>Zx|6Bh#G?H^$X{+CG_V`?fqz?vza4FRSz>+-Pb}xUKMo
z=W}oQKZ<1lH}Pr{6XR;r6ezkLhCRyI%-$@*sC|(Fav$06WOge>ChTGW<z=tw2A^dp
zSnl@3^4|WN6UbO(gBkN3cf)G<Il83U;&Y86@9rIJ<*O3iJF3Zqs!e3+1%7Te`s`rU
z<mWmKg(Xeup3EMG3)f<+X1}2#r+-XghiZm-nFQ`(8Bw7n`-`i$rSO@|GvdVW{2fu`
z@0m)Eg^qRU3|9GkE=c&Hi=BH*84g5+9R(f^)3&kgj&B341EU{myC^|>9%Y^;*qU>q
zG_nq8y@m#xZ)==-L&t|O?n*O*vCE+bdf4Ojh~l}vz|pw-NQe7WsX)T9i=R*FYcdQh
zDAL{Ik)V#%sC&ib%6iz2n>tSKENE|Hu@x)$&;JE*x4j_5vn^eHKR(|l*$8f23c=1#
zr#FH;!p<KWdrEA5>^b@nx-Yw~x8@BVpE23m^yA<Z88yUVx!||9TJhd;T$;PJ+s&hs
zRN?p5Z##_$NuV~(pu9Q`83nomX<ls+`ns1_FGn%392AeMZWTVD<_}@p$*GH5e<!v7
zV_g}x)i@VZYaxSrR)AF?m0I!G4PAgW7G0cle|bA3y)M^y-mm|sC|V)V)p1&8ZHaU6
z^(n*TBd|3CkL1ckkYLDLMj$z45V@O*UUQ#O&D>08^d7wdth(vlA-_7#?)yXmJAW`-
zgv8fJ07GQoS`;?zi1qZpmS}j`{(zK|`Hn)+omLv}uTxjHkHGsPwL3$swuhP59}S_Y
z+as_htUIOV6JC=@=n`xNy)E+cJcZJJjrbSCH|l$(V*{3f9{R0Qj!zGYYL6Ocz=|m*
zGM`r>kbIXTxsT#QG2cscO{ec2iJ}|7zCe`JnXBM3biW(BuXbe-#|MW4_w;mXJCWiV
z8GKiUgCIMOW~&g^T<XCr%C4Wj(+MWWnBD0)i!~XXWtun?Mx)ggDs%Fn_uNLZ=snxR
z=je4g2*etHlx(zYJYJvYSu`at?-+-Be_+%FKG{=`FQTSz^=IAPuN+M|39a*a=45Ty
zeS4<-<VS<$=CLl^&XKFI_A<5lW0JOUlh2ZEQu(_)6Q)adJ_<Wc4L7*ks8l{?26ig6
zj!lXBf_%tVpg1{O$8_+lA9!Jk)%%;7Mp99XD6pCPbTtRDKJ@bm%vKsL*O26mU2o@A
z_vtqZC<yp9rrVqVK{hpDrZZ}Fj+Zhul*Lgz-DLtyiPT#Jbs*ykA^Y<+^ysT*B+7q1
z58MW-34m#)h<X7@%^V<PsZM+{H>pCm7>UU`)lL-;Jx6QGIbd1CUY`UU4jIpd?w@a)
zreR8~3pcmo5=72Z#k=jHBYh^MTYK0<j8HObn;`<$oq`-S034*+R7^kiXdQ=ne^EPw
zl6Whsdv~?=0;Cixx>l3#Gsz6+Tb6SiYctxQpL({1?k@Cx)%IQ>sR-T+2S;|7C<JcU
z;!>QZVw<+#<U<-SF#)13iTgEP*o69t|M&%o2Ix+!y@37kvt8SJa2{1IKac5{HWgPA
zJ?1=esviZIdOpK<G)OqCZn(IY*MevNkV=MYCwv&WuLygy_S2Y;DP<Rv%`~=oluw+6
z_&RjkG@}4vdz8Qs+;%Q75J!o-cA&Sm@pw-xcX9(BzrNc(WmvKZ)C7qX)9|7gPt%j$
zMqB4kx@*94pof<3ucBq$I~llFcU&*{e{E=6F+WE|FZy|yNEdczh^Sv3EB(-q|JP&P
zb#`N)8sS=y-%dp2{Cct{gN!WG*4pJzztN3fex*B(8n|F@)-_sG;$6b6-$7?j+vS7)
zkZTB>5eCi7+*G;3{p4Wy=%Lnu*1>QJb)!5{z^gyPu8Z;OJz$|N$alz9CRWNoTJnt4
z)^BRns{*TZx7|09BU8Wndm$JA0kxfxqXSQBgC~<^NhP7yBoiLh@YTz2>LcmkQQ1Wd
zujqvqm_0KCexYf6d0?{Gpep0?T>C0D0lJE1*V}s|DX6Ah2mLdlL1}atlD5d1H0}d$
z1noKi^p;DWX6NmlDKSrLF6dh0)l_{|yy0M*?Wl6?GWY6b$SDN9B-(=XN=9<Fpz4*S
z>Q<`~deE}QXF_4#wO0Nk4Wj6@kP-m`F8B04##mx~QPvPCv;yhzaYn7dNV$G22Vy%7
zwIb6HP1t`S+{FU)stWu^cFX0Df}Qj?X&+e5Ksjd^9&gORhx|5N)o}$7s*FuK*M$5s
zC1cHlg%m}gB{hKZH`{;K($PwL5tLy@PI>zdK;u~$#jRqzFQg)QMDK;ONsW`yU{k6|
zeQUi^1l`k&Jiekik0j>tHFtH5WnK;97HbN8z(b#7`4o5Kv-iinT}G=ncETH?K-<~k
z5D{IwAeou%=wvx=9MD_n`jxfor%sYj$}VLKBpn;W(Cx8T`i9wp#uzz@1+++3JKR*t
z{f1(Gc^Z9ze7qKLOQDnf^CeR)Sj;eF-kF74{RG+224gTOZSd9@M>JT%WVPK1dPC`%
z^CHvzbAz8<UeMiLb$GK^Q_<?gvN}ytUEbW#Na%O`Kg;$Lh9CbIUvC}{W&6I5KeiCr
zmrB;MRF<Obj3q^7D<NwVvM2j8maI{TvWKx`-<Pq=PDt4qjD0Z1*v4Rt`Q1}b&-1*$
zuixwQ&-}r-UH5gL*KuygdE5qsm+%hKE)_y|-WE(}KY7a|vd}wg73q`f6Z#y2xYJVc
z2>XQKOX8k^f4cxMX09p!1_o*yddp%el4eKl$F^r*T6p8SobMCfH>L*OnC#z+@i$r>
zG@4fsELokhE0f=?Uvs=9HQkU{KYgPv+8^n(Sp@H+Yc)5n#hxNEoTL%AkJe!sgCfkB
z`q$>*M=J^$j#9u}G^XU0gRz3m#oL7~HQAr@%8pH-G|@iYGb(PKpXWrhn_+L+03(H-
ziJVB^i7algz9%!@SW*-)%z%hwHjgd!=^uGu@8o?9M|OXs%nrB#4KT5M#!k4F)%L?z
zEa18?H>;3vvvH5siQSj_;m2h!sz=-z_uT8*5@O&75Qlo>S!eHy?lUwO0)FknhU0UB
z_Zsu+`3#>i2HV-vQ2t22F(qBHrdfI;gEOdxyZUm?g$Li_%|C7(pY%uVf(YXFU0=g<
zdW@U{e9^7l>g?!m?aZo9@kK#dXOPD+bUx9O>x-@Og09W5Sh`kl{w@2t&hL_CiN1(=
zTC|D$3VaGGuYmj7qm}{jie}%S5Cn4-&h75M@pT1$uuw77d&$|{1|A3Qe~ce1>EB0e
zevOxu0I`RsDfY@wJc5=Haf$Tvf<ph!aH}H1RC!8+PVTbVA;%_n24lo^h6BPmmx)}{
z^<N4R(P@8UOy($&i-vJNVZR%V-TKIAid~0S%58Bx!g|MiO<EoUBfJ94?PjsLo*aZK
zr~CTY&jByoXFaz}-HQMo+M4E1v34%6`v-diG)H^|<D}@;)8I=k;|Waq*iX36wr2qd
zrie7c+LmP9?y)uKDZ`oh`n{o5kZ#(#-kmK9guErVt@`Kh);O)SnwnPOXE)Jd`C@Tq
zekZP9*l-9=UUzc^@Q2Y+@SNq1hqDOqRTFL!*ELWjvPCNW^cxPEYP!Nyre*5rxS{B4
z0z|4o?Z1^FLOJQjRWftPshA&Zyjpe>Ffae|9<X_DSGI4gg3UZMO*WI9E(gz^4$?4g
z?$075mf6tt_JhW^KgAPjEVZa!kX69Bt3JqkAj1Wcw#~cpVHe~!e2>0fA3oiOYUXI-
zQW!Lba_`+v+lw5}|7-LhyLQ5FN%V@&h_P!lhr~~l`rIL@>X8_;XI?R$Y6u6kcjU??
zAvPbaWg3>TZGJRcy%^{u`H1o_1@1Q_yFi5@H`KFqssK<80gNZZ(W}7Is`QKp&I{3e
z8vO3FfBHeuj7y*KquVYXTA42c4o(+CGfuC&5D<7jV1<-(63mIR^ei>NwEwK#OAs?x
zx+`pJxJj+19xpTPQC8Cg!~2UiZh~}bFB-X?!d!ME18GR!sIeEGYt<x<Lw@+9-|(@4
z$V@omfaCa56+5(??YWC_`}GmW(8E51unDjc8s)orcXv&M`<oGm+hBw5d!I3V?7C}b
z$D6NL#*ey9)id*J0u@s%pk$QywPvwpo-Yy%uqaoz`KTP5Xr>aHU{9+)_R3EitiOCn
zcVW+CWJ_Yxy__}E=*4kxDH(A0)1%-3@SJ?=oZ@O%deN7y#zT?hjWMuK4?6+C^|7`#
zxf`~#*fo(c_Y9=m$V-r8U_%X7o#q^UI6U1HYkQ-0cFz87*f*b^i-1A65e-7+Z-cmW
zZd^H2os>LC?Tzrx;G`1Iwah9=7~!lM=Ww|5$;;DuhnbuYYpDB}vyE5hP>eui`n}QT
zRWh*zTG=-L#Pu$ri2bhF8hEcH)RT3rtctz&bnc0^(XGQgM6(TDw9g5RYwnW_64M;0
zXYTxPZvS$;r(j=<`S*|GX1)0al*b)+3CAvT2o6tRi<`ea#MSnB6lwI7maM>rzHsER
zHWoiC6}XO2(jBCc5wrtBiCc{eX}WVyN?MbypGfY1FQEHu5Su)Qa_#X(s3IjrDoZ1Z
zVw5U?f`q~5#-~P~pD3ST{ZCKE*Pcc8xu74$v5BO96EiD6-=yLKr11(P+%Hz1z*22q
zOFGS{D+UBa%9QQPr{2a-_yA|WF1qPJVJ}AQx-SUPb5gZhXbPU|iRj4KcXFA&!!}@k
zC7{{McYUsL)ea^NimF!5giufka2!*;Km((@$<2w=AV#5ar#mzN;W$yL<(cNa=c?}+
zJx6Df?;;EcBGUTRTO+Loj<eWwEVg7E93ZXOu+=!fW|Na^fF0c1P{cFY8qQ)jEXUm8
z8_2^AMYQuWcT<BlvbojQ^Dy6j)?nZ|3+jpWbe?8ySpiP*JU-iPdqc4~TT|v&Fj7Mi
z`DIJa_oH{n8?y!If__WGkQ}!Wbg%?x0{s3G05Dt)6%AX)0T0Y)5enjWk&5Tof`hWg
z!9LJzCnKyQtlu~1j{7Wj@Nj)~hR{`n+qkSVnt`D#Ehxq9BTml4XH%-Ta0rXr?r`cv
z9qjrLDbO`%5&cD75k5&O7a)C0PDRh`^6ANvUt0>@Er3jH1rNBX%5~s0COz2rDd)2+
z=H+$VrFw`bvLo6<|2xt&u~{So?_60wSTMJBYt!5QVp`KCcu&)r+D`DC<@i)}-saZq
zc#dg!f`^B4+3$k|rSjhR9<z)>v@+uMMsscCrUr4Vb4+5i&B$Tsx8t*Fz6^2c%)nkW
zn2LyEV1>>gY)$=2$!y}BiW^K};v)uX()B$K2sdbC#;29>0#*JGd{1XWzqE=N-Hsph
z+_;?TO=vtkv~eX>I|s)YSC|JFSxqBBi)#|HSa}DqHzL=$+4;TyQ?a0|Q?efvb0snE
zGca^j?dJUK@Hf!xGFSo++ZlO5fljEl@dgd?dR{w_LMV_HFz~!^sOwlJ@<RFklnx&K
z;`z8Fx9&8H^{ihe-yD64R788=YsSD!J^B-1pT?2i2K2(vAgiAT>=?WJbuiKZ%POH$
zUGvp=XlxX0TC=rRYB<_})<SPxUf~kjaJlcJHOeNmAqg1Sc-549#+i38y8b@~z!_eE
zYbtsjfEfcD(*h4wRCa4BP3h-2_*nY<=jttEu#)Di)Nz3r65#qbXtx?>E{*q0ERTXO
z-|d$&o6U>i`<Y?0q3Bw72((LCepA|ow*$*Bk{*#eOnxXk*UzBNcVoi0JIkNB>M>N(
z{rKSJq@p`aqG&Z4q+b(T4mF~+H$J;p-8v#L^a1GWu+3(%bhAV0MZZ(yXLAwBY}Tee
z)e;8f-Kdx-55QnWz5|s`r0F2ku)*74cO9y2pIY6#^#GQ5d}IzYG>leCp`f~eM6A^l
zCu`IHxN#<Olau$(vu?0au>NI*GT`DHiW@EaC*(gK0}=D@xh2CjMhgXUKT`4tMcTQ9
z+(3VrzVll^Pr-S31Et<?Te!b7v10h#fL&o+Ewhq>>iq1b0HTAK`)3en-Yj#N;88~W
z9&Jads84Ye(AZ%y{~aHmSaz{pR}bW=cthtAN{>kk=1NqfN$h+yc{GHKP<+vp_T@fz
zfN3<phx<=UQ*s6!k~dw8>6Ap}h2|@N=$c=fzfMthm*G67T2D4wh~NKyV{;>`r-4|?
z-1*&$=>q9HRFINWF<qbMk?0f-0{UW+osCLA<zq5uPIsWgJsIxeS55*=Rc@pJ4V)aM
zdVIhGK=@jWNcuxb{zLlPqPT{4#{M7w;vEGQQ~%+-_$?rjCicVCL;q@fK1&x*oa+4#
zhk*F2U%&rXEvx7(JCFL`?JQt}fTj0QGYGSJs1tcMzW?`V<db}*i@}|@4(e63#lqhI
z`RkmUIS(u|ICj4_0uMd<?Z55(-~Sp&YyOoTE4&d@9rD_C-6r(2A}#x`)df{DsXus@
znBXtD-?u<Ki=zMZy#8F^A04jjlSC0_+fgCA0(CNxiNE?zEj8k>l3r(xrF>#PK|5t{
zkM?r-5YF9pIv_rLA`4>W@-xDmLp~#G17RLMK20cg6d~Z?a4py&2-dnHjy$qE6Ch_=
z1`!|{)6U0-nQL-)A(k&QS5Ei%`*8CzYvxjJegnkD;@^wOd_#T`z)eZ;kfa?2dk>u<
z?P`)2E@lD7;j&wLU^=)Gv`Z!E?=R<@1U;<NA0j_j1NY&wUYrGj839X*KUey7O`a#e
zi`#mi&rHDs$4e=hG5G<~J5ZX^MDaIS9#BQH=HnRy-4B!Jf8P&K@p=)I-}*GFiKD9C
z2+MkS#%>#J9guO-zv+wHa8C3_H#UekHx8WLg8xd_n_iUaJe=O~vE!CEKgqzOgJ-El
zY5eJG5fXo{2W%26FHm;>*zqvakmasqoFs`zv8gtuf_}m>q7i@pqP+?8^l$Aq9fKgF
z;PQs8%#iW2n2PZp#rqMrDzE?lh;M#V`E~4);zl~oNa_+8vijYVy)oT6E>HE0IbS=%
zMkH6Hr(7D(r}jJctUIfsL0a6lf7CVB8G5mvFTn4<uABzL##<q7W}`*X6Qcc69x#E!
z4Rg3?Ka_K#pJ=VPLDtBH=RVPEEb`ip_P%t<2F?QR8J~!G;UCKhdKpyQnytD4G99UX
zJ9l4`12*1rJb@kRhl(Wj|C%lljkyRDy+DHJ^|)O>Q+qIlIWVm+2O!|ru>qJu`9HD^
zFMHO56Jes`ho%la#O%K8Pj>&y#D0JLBQ7U)q%VmnMb#`*K3b-0pk0crM?H5c>J4!q
z_TGLUR$-mZ^3qAz9(}J9?35E_{skSp7o6efD`5`uf_sczE&Tj+GLYaxq}MY6m{75C
zaL`nUswpq2V*AfiTM>Cih^C7cmj)e9)k^JQR71xBIlzO5^EJif`;@RTy<$gpd%4o?
zK9AoNBj6xDr=I*#S_->9?$Lg{@ET@;v(NV?k37de<p1X@zmMec?*h(UPSClWguDTW
z=7x8@Mhh6f6Rz79a8lmCH~@82?MWI=7VXx1=g4Ns;X<bYsBmbR|D%zz&O~*JUn|7A
z&uW`a#9T~tngDK9GdH6Mj;vziuql-v^{y>(8mh%Q%hk;grr@QD@L%u&0qnIH(IDD$
zwVdFGc(Q#`fiOuss&)K+n*2v)g@{=cXD3@BD0ts-{jq|(dCN$U(#;II{zrn(0!Ubj
zmfWYzOvle511g~Gwe+k1=oY$=JM6ILIi^X5I+gvv>|*-5B==BdEr~#c2$jbDSIZI5
zXf*ikI9huu=9_JkmI*!Y%ZD0V0#eIAhnD0aCy#9X$9CAN2){11FD-1^FnX!udGzJu
z!&U!|7XJfdE`)LQ^`#~G!c*{jGP;xWd8~yko=a#8u5t9;8E-5E-}MuH&%u6x)qo(?
zFpr^#jOrneVfS5455G1^-MwIxNq`5@DR}moK$5DLqG6On@#RC)p*aZ1EM3_Nm<i(>
zb96G>j;8bnZTkko$xgWDu}b`>T-yZiG7y=^qbR?4yf=02+Z1jG#Bmi-k?|+ygC!2b
zH&3uAL(<Vq>GL9Af@qAz2x3)Nmc$$g8V5?oe_QCwmqF8KAusSnSIZYm4SG*$0FwXX
zH6IJhtE;u`wQ!w1;_>VGznldj?q%*m3_koEZm#86ZY{$G4x4x;hcAXz_m3PFS_<j6
z#+&gbRW~0<_y=USLd<*pNBq;)0~!LfWc)9+KF6n9%lTQx!GT#@+#Ny~HCEdrJai4U
zoDL5)@lJYyJKX^&k*D}|1Tev{wkmE;x(9`}#ykvOAyfn)k6=fMiS52bM6X~Jx*K&g
zBtl%p#O_cP8f9Pp+5aFTv6i!}zSZ-UT^%7~!Ef1b84;5K@~8iMBVXZtd5X8HfH@ZM
zJ^ZW#n=nD!P>!(s$O}yzJP{7fe!vZgZh)F7oSz*<YCz+2z$di}23+0dtxBQWvut%Y
z_5U`epg1OVe`SlTE&DcZ#V(l3{_&LYd)W6@VCxSjpy;R?&c^02e|O?RXaC3f{|hSr
zNJ(-ueG0q3au9RDT}?#)N5Ts^7g)D_BA))$&iK>Pgl0R_t53QApi90fkJ;UqwallU
z_NV7U%Hm-qbttz%X-ACn4M!Vuyzi*@F{t^`%{uwRsp33TB3jC=K49tG{u}^oZqnoc
zpyv9_g<uov|6K+ZFrjh!+0h)|JP!OVfzy6-xBS3tDY67m#zZBz*%tzEQ-A*P42mVs
z)Z^@V`_!qT5IUK050!}*L@p8FU;q}72!r4|340)g6ZZ67xV~CTgSM=Ig`#t<oo4W3
z#kwhg7RiIXx*z{5=9y{u+dJl8&Ix+5ERxpr>7tLfUaRHz4Eg$R)lC~{=NIF3UdwaI
z*xZvZ=Shra96KGcus5ONo^yW51Q_YlCa<?W?5FfEayRe8|M3Z7tS_$s0)QX%MU2%`
z0f{iHsMU%FW~sF(JI`<TGE(s-wOc743~o&$43LuhXjR<1_s=L%@aS68QN>{77{Z2V
zST{coutuIUofxYn<dg>;*p}i*RTnP*xm)0tDQI4nd?Acn6e-lIa?jWbX%I_GQMx&k
zyNUQxUtUendAz0^S>>N5*%06;wS<@wx7lh^Y3%U%ch`AIfkE1YXwGRhHs8}HS!Y!O
zIKbTF-_JxQmO=H)y}u*MF|HRs9nz9^F@coSw#*D?Jx67XH8My(M>Yvc{!*6vwe!G7
zF5mBr@Y7|JA-uZ1#RcGA7vEL+$4NfU8@y$}zj3PX0N0xKfq!wHYkIl{(pSU0J^FVg
zW9Fkqb+0}Q>}(xx_ZLrv-6_-Goc8A5Q`AZ_Q0L$f>TXuK30K^Ne>j@HwO>I*Elz{q
zsCCsoW4uAkYQXJxs7Ex>%ub4G{Sn#0mN54TqS&T_K_77fWit6O$=<M$;WYPnPPovP
zPfg997}P*O(tcV`e`{FRsIy7LV7O)E9}U?2TR{|asO3Dp!C18@_9#H(^5Ure%vh}n
zbjjJ0tV`NJ=*&b7`2NWCpyw&k`m?x{@rJUk%QiIs-W8Qz>2DI4%22^AwuI9gVtp1-
zjb<$#ZB{f*&O_Siva@cMpSDb?nd2`pO#dU{ca!{<-pNj0!51rN>F1nKPj)J~BQj;V
zA%YR?wWB?3Qt9RvYvggs7ywL%o{8Sh1nW^pq;DgDO-x=y@iN(4KtOg)-w=0e931-o
zW6d9{1okF^tsIjKK4-t)s-g>*a+;AYPA<T>Df1KEIwbGT(a=Ln3tK{vK;O@|Ra%aY
zuM?$q(^IAU<)w6;U8ikx4>TI?`~h&l*C&yV*nAg<g3g)czBCwOpIk9sHHV*xbScvr
zbzgFTZDo&|0n}YoL>)@oa2DEuo`~Z-K%qT)960|V2{Bl?`0$q>oO~8uZQ+?#+b%9O
zS~M%RC98{SM@WgwrAyb}XY+;5FK%>B$B>EqxtSmpip?jHn$5-o*v)aPBkP0}2hnTu
zQ7LHn*8=VU3&qt&6<v-sc-SfG2yRo0sQEb;D%yX`Zc>Vvab+)_1%!A*YqX7L4JL)2
z()_y|fE8GqUR9=BVsGp;UNlb>8QvJUx_4xIT^K_t)5ih@>ov|CjVWh+Cg3}-nt3Ml
zZ8ZBP?1Rqp-^~6;(pkrj^cXrZ0oUAb7*qGbm`u5q2^9CA;>xZ44}`K-VfsFyS+yFk
zL$gYaVIHW12dGa<6{FD+u+yB9E!U#9@&80h`Lw^;1fxgUBneo0nh%dv#bP!Mn<#9r
z2|GrxJHHr%bH$8E+BMeiRdw8%J-{*xrkWO4OkZ>HP~-U9)dEk7l%nPM;9iw=S^BKF
zaX$6l2G_M?XK$mb*@4ml4xI2gd}hG)X&YN|7_9ZOdrJ$w=+K;9hu>@BInxBr+2jv*
zQ%9WkS&o<7`dG|5{Au(>Md=@LG5<03k7a@5Hnn%73P-XwS~BnRPXHFxRs~T}%qu9b
z24%X?Y0lK@qFw_fF-x8CmS(>l95*#f)x*CvHvs&WX6hx6Eb<D<5(5=qB?U_~D~=8f
zZRw1j0P}|j33iHVj#XdO)|&z6*lf92dC#^GXChnf_|Gi@jT(*L@sx(m2j^njtHQ2i
zif|s4xo%tTF#%71ucZ7&ejVC2n}6clc_p6fI-@8XM0h`0=Vr35viYPRhtRZ*P*_a`
zT<@ImYK9FVeSgSR=~130QJ>JG@OPr~u}1sBY@)b+!=D5Y(AN|vDH*HQsw%UX9WuZk
z!3<sl<oPua^R+dw(b)&Mjtht%owgif0>1uRB3jKo2QLI1alwGF7!_20uzdR;-uWxO
zXJlPZnr(!vDcoCVD|}j<!zSX3Yg^JO5HO{m1lq1=9PkYp=lJTur}ZS(3__BB`H*di
zi@2MUF;&?AXD7aC{oV<Wy!%ZgxsdV!x+6Po14mSLg3VA29Wx#U$YX-Is()n)#E(ec
zd3qLi-=`$$VyeZ-<`PxbW**undybhe%X9{&!lgP0noq#a8IYGyJE#r8Y;@>&?mHB9
ze}O3I>E-|gk9x)B2Nv!bPV0*weA#Q2$NwY!%@ayms|ak2YJSO96;hA5F)xSSob^3-
zT^I-ONAI2EHW6n!pp?)Gx3lM{k@xj)x}l3x%{-6D|MAbh<Tona(5W^a2ywrC1QGjM
zl$>m9V_<S>N8FWw&@9P{ezKxa|A)hpZe$qgc%=%iiU-dOfc1DpDOHfu(V6~pqX0Ac
z!!Jbs`IF>2W-!6WBAIi4tTed{JX*PVw9|3N6y2loSTnC~#TL~Q#`ed&l}yPUGVAWg
zl*;g$3-lKC(SsfBtgWXhWV3)hS_7aom15#&D@pL)NvcGDlVrn$AL;~otMhkw_$m}g
zuiB>*eE_J*H~cTuy!Z_@_7k-;#RhFe7d4vvcy;eVbNMLuOx@#_THXFigdh!b4Tc^N
zhoq<fN1-%GmimxPZC2+-alhXI{>V^aNb2$n1z5pj&}n<qIWtJUgaoYi{a%xRZK{pL
zcXmc0SwYbP&O2k^d!MU5?O*s&kaK+B<`0nm@|e<p2SH2OK?zuZuU7y`yyqc2_a?u+
zlEn!e&v~%Itxa4HaDG8c0#$h{;R=&lAeh7(rJLDZ|KQBmg1~bu2AEQvQ4P!XXI_a%
zoBa$$h$HKe;yhSY(DUuHYNpmu`;1)3-oA7DyabDTOTaK@03Lm`K}|S|Is6qLQt`Wn
zK3AP^uR(#XI%I<Nh??zzi~oi_N>6ECg01au86zktFG06PP-}W`iCec_To5~yOZ4vD
zb>Y0`xN@udy}0nV4ez-F3dK#A6Y^YQ{*`zI`ebi{vYc|0AXxy51z0awjWxAFSeoCK
z!vxx*$SgFT=w`c_r-y7A{sCeCV+H_7ea!pq(B+2`jqW`u47GBaXIJ>>=h%_3^8LE-
z!%;3E8Q^A5_#Kb}VNn-LdC=mu*~X;7=^v#(bbgjP<6e?=m0xFZ<iN{4nxHAQPlQd>
zp&W2wUcW;02Y-SZ^Fnv?Z|B!V{|Wb$zEL=oSeti*S(fR%dyHK_uBoAc-2)H05+|O<
zCm4k*@E@OB6o3XazT_Y5tg>?n*Z}|=f9dMKaYZN%Ibbz$A~TC2k+qH(_xEG?{NzMJ
zLSF5}>4Aq?%R~-p&#`Foci}?=;7bChTGt&8^sOIwOhN-JIIRAj)cI{XLbktMlLohk
z=jW(v!W_F#FX}2kOHyg6wEztF35fcpgjlx(vxH-!Cm>0`nSX=+kDnE4Q8LWT_pN}K
znyRY#hA!rk;nWm4bXyAwhcrjQ(^!kkH31TK#$adM`SDuFDzVfX9S&6e3@uRE#J3By
z%m2q`18SMS5GH%u9CAKyYs5zke$alPYrglZ_=0gKf8YSp4Yg9Gh&&EXd=Y79jyq`x
zed~c^7SxuQUAYjc5q;6|kH-yK1~h&*r%GJ4lx)Zvdv$(d1+Cp-DHl9U0kV>awtMAX
zu)=#F-L(M;Iyz6d;)yzx)4v$>p*1muI3gpSh-i~{|Fb>-VDB6K2IS%JSyKc+zY074
zQh07-@tt&`W7AO$d_xafb&{pFcaAI|V^fie1%K@qec{(O&-^;-4{qdFWduXAK8xMH
zu(<1PZ~YPoih)kzu}qa&Z|ts!#9(vlWk-;mBS$6vIBA8rDo<jK^Vh0?uE*Yse;-|t
z+u5Xmy%-lY2=HJ&wls)2H=Z$BBc`1WUxTM(8>6oY_lxLMlRT?&M;QpFn4j;be-~g%
z@HKsyS$F9z`yW5^wE_U;(b=O8S=?NA$bqYV;|h&^!jqvNLy|hx213>)dE9=F2aj8l
z83Bj&eljF}iWqr)=Kmd<<^z#c#SQKM@f7F&2BGXS==}a^5btZdwUrN?QBa=3a)$_|
zo9D;JB;NpjX3tG-YR|PyYhMhCM(IK`f?+?8;=eqUngtGdSkUhweBuH84uJNg<%$+W
zIcsY&%=04iG`3cCb#GUx;)~<pyOD0q8=5HA$lH#aY@I-S3(Y2sv{GO$ytRnO<0@)B
zERf?9^29XkH1T2Pf8rnFaXXWF81Z-U7bupFlNuGR6C6z0B@J=3YNiP}TO*DM)62i|
z_?pY6+W4!v?Zc-X57Hp_YR;u*D2Pct9K|2O)_if<+-c4@YXq~95hHQ9^7rY0lAbE*
zVwc6n+vg}%yB?;d(7GH+7TT<_MoxDJgg$=-I%Tx3MPyvqQPhA%XtIE7i3Rz44**5@
z>79>6BXm2@9y23nl?`e94{-4?-1}vM=kK-B!tSspVDOqxIDT~TN852rf_&4(bWGAO
z9w#DkbrxAfxza7hY|h`%<3zst-Do~kiLV4!^QwxB()hEK4Mf<uVF2m6SFAJ8R{G5@
zzD-G&5<$PwJ~zR3O}{LjQ|P!%hjy#O|M8QFJ+v5=BcX-J<D{>3ju;}^fF*Gf4oFY(
z8?>Q!GYylj+W%8Oqsr0#dV*^;7@C?WXFNo<l&^RIS*K0nr*B2X@=}Ff^eYAI<-&5|
z6{Vl2nP&>xS8ay&Lf<yLPFMQbYSw}UX@0h7XcN&6z8Cy^rdBzq4?Yy<Vkyvf9PD#H
zF&*_L>c}MU>!R9&<zg}}A|<|W8y3b^9vb#^C2J}D(^FuS<bu@JHI!_%Ly}g`t1<u%
z=4veUlYsjJ%HMWkcdIkpCs&(IpMm--2P9a;%$-XvaT1PCLv{D?j+?kEjwo5tU(&fa
zfOGg8K{K4*1GfJ7zqSt0PJW?NuKzM8My#;Bcu46d_hi=vYdu`~(bS$_(DQU440<+E
zRd(uq3P?j2>K@(#*ycwI{mJ5(@TZl--yCy%%ML0ZO^kowI6+OQi7#Xq?Dkii=c*=-
z4?8!D4re{7N;@FVXB~x4r%|+AXdXAwI4*^aN#&J8X;#G&%DSKQ0TjgHY;DT*k=Dmq
z=bBGD$)DCedrLdh6z=w(@x~QLLZ4jxGGVHbsZQ?89yp~dj)zbA`CwQC_<D_)GU`D=
z))RZ7!uu38cb=P18$}64+8x0PO0p92!ROviKBuYVJD@@l<ZZU(fL9PpxZ12Xx}x#B
z3WBXOuHSv<ss<!n(Nbvny%w8qz8M9%XAbm&CpP2jY7H_nvJ}0L%wte<`kG<c2Uj+3
z7cLZuwLFoIalIwU;UL!=OMyw_;L>z%l~o-)KC}nVvFSgs_iJ!NrO9^f(}N7e6cnEh
zPLvc)>y%v;v1-=0dWUW$l=;{!KE$-;Xg@)>(B!k~o13_AEy5EY98Y?W?>S&?yLORc
zT2BDIS#$JogUF4T4XL_BXh}1bn8G?EW(z4X=sZLs=<vfVVRjgK1u{9Dm$wyFMXg3F
zO8$Adc0kky^XN|R<0~QZH9gfmm2f?^dRH3)oUT5+Q(EY<XV0zp+dJ999pY%-4>|);
zhBxt78tJpo$$4AeewOQpE$`BAGrJQ=#X!QRL>}=MBM<4F%zCN_ut2@@?b6waf_TD(
z&qG^)Jj}zVPd&J)yR6g41ipWMVOV<&B4qxET|7fxw5s=x+jvOUZ1n3;%i_3xlPocr
z<tr={pHGflW15Pb>NmYH=A65OTK0|&#nY(R+eKUr%{QG28y4DaSlT-^Hd^N8?S|kd
zI_He6ZZamH#4&T}qg#($9<J(3PW#K$%!=u|bR{DVv6oY5{kB&P#ynm5NXf6Pq_;l3
z7j%;UDgEMJZ38Nj{oV9jtEoXr<y>5W>F!3bded56!1dTCplQ3n>Gi;|qV9}a77Y#9
z;~iczd&kP+Y5b3x!u^Z<NqEd9%$lileg(nrF>?t#**2#6u%{4_HM>z6a)BER**b#t
z+qGA5cXgB9#P4N~kKNHeF?z$)yLeNoxzOx9F8vg&V=ZF%_|Z4llIQOXGH~j%vd0vd
zqda(s>cMd5vDtBoVI`p*B3ux=A}@$>ynWP%o4Jtg>FJ24#OhPV%uKREi)33mVrKWg
zOgkW0M^iY&MoP!$3ZwWTlZ7^4(%kDq_^cV@v|bxqCpPCiUtzm*%q8K(*O0>$dAIE-
z_B6V#Fy%>Ep{2%~O4|OWett+2d&To(4G-?Smm!9<j}Q1r|L6IZL@WJzlKP-M{t~gl
zatFONj?~N|_au!5Mei##`R(O%XV9ImXq$CDf>PLG^=ZA0kON&XWgyt^yhj^m+TigB
zoUjG?Vv8<wZl$Km;YQcTP3wVPCGPKxij5t0aNj~5Qv}qkG0|M}iVB)ZwrLUF=yiqa
zM7@kr(|AhNWlCqHS|V57>elxe4xa5#+4t$#5AK;JRakw(QxrSD<FQtd`o$*+oYgE$
z@Y*NqCZh)*$nJ*Qjd6>FW=XA#5ztt|V|Gr0b0|ZUI?`6f#U@H8O=gA0z2}M8g&1w$
zc59|c9wVihw#FeX3?GZ2_VFE4|8Lp6tFyOwo0VdYMyvZWlGj(0p;5;u?y#_B{-(?5
z29Go!ec$`jol?DnS5$7IYt9b+u>bBqGe95H*|lGzu!0{N@FbfUD;t`5h#8)1)F>GD
z*4mhe+(p;eEK>WzOWC$&H63^2H(mwP2+L;}-R$bGWsP}je~p`jDRRYf=Dq|KWYV)j
zO`Q3y%Az;5+NCtLYiA)53M^pf%rMIZwOcc>dU2*~7n~k3A!t9P7z_265BTa`VcFx;
ze+U}f`u};3F9WFNlNBAL`{JWB^&8g_6}Y%=`-CRp#+Z=jRo`F6BQzz1VG@uCF~5(~
ztDj%S@LTn12;CJzT%CP=v?q|Z@+O7na8Snhyf__wDB<<ArNHK86~)u7lahAcg)Yh=
zt{n4A&FMz#;SYpDyiQ=;8RC^!&;aWT{Eq7=KjoH181J#xrDoHG8!1$Y-m7I>x6=c7
zNCJE&r)5p71#j!ds|J+Bs&4y-u)lcJu>XSdc)R8qz1G?&AIsiGa1uv$vmpNwRhMhP
zp4iB{<{4`{ahCLVgI8Fbo7OI1+AYyVrF$OwH0$=3Uj*VlY$yx8@oJo(q#V=J93PYm
zV6VX|78TY=8IR8>@r<sqW_PD7!M(y)Mr%Rz@&-E+ug5Lg6m59k=@NFUrV6+sz01Zx
zoC<d}$3j-0KM>8QlefKGm3XmtDJp;vn@mGKEf!-6pP$F&yQhP}Mo+@fW40Yzl4e<y
zY2_A0qVY((o8;5iAztGa*aB@Ezb*w9S2Jx%NJOd1a((@*b)t)>Py^&r`~8cjyU2!p
z&=Q2|LkY4-HEel<Vt8Yy*k??k|HDTs!&Ore>aL~;))DP0oVpNO$H(U1So;Tfm5MIw
zB+TAZxw$^DL|utF^ZJuf6|9Kuqjs#dVeLhT(3N!G5JhhjYsSb=iG3)KkGvjM;xWCm
zCg$g-Rd4cyq^v*=j|AcQHe5dUz}eUx`#UMx%r}t^<JuyVt}T+9o?Qi&W5#6<rEr}n
zPrvGz$yTSd%u_J(4vKY~%)J}rantpyz3uWT;g$u1@?4NtyxUkZL3h&iT)K&%X@#j~
z%-1UV)*&}F!?D?bC-TLD2M1(VBbVb$Upw$bL?16D-0i`8Et<!VavHRpXU+?iA1op)
z$W@JYP0Rf}DBSx{%7Z>G`O(ktB9F@r%3GeIA;jl~u*vW%Ia6Al?_*>se7^pCPhh`c
z8y4FV@XjD+W~X6#A+ab0d;wLO7~`eatl$!6bRQk;U3YNyW5{;y9&)?p!2!D9)c^GH
z#)=?F9XKgwMLZ9O6c+s@Slb6keKqLk#B~+61DGGOPIzELS3iBZG31lpLHCI?!ZL*Z
z?n@-Mqr%rsl1O`t;lp)1Mb2aQURm%MTVu(~mT)KY(G2jR>1{LSV{SY21jT+sIS645
zumt64)VR&QP%X*E2!wk+I4o6gAzabi);37ps(!Px@%gk3u|a=NA#;54<%OD_1^a#-
z_rV>GV7$yi32s3yqfijzxuc0UCZ|WRPHYrOPFiOs({ugMR@|=j)_9igEnCEU%zws9
za}nC78IPIE7CS|NF&|FnG)H=VWKW#<1Wa_@4hzQ2+3lRiiP@#~fc79p#)LB^<##h)
z33)LsLFqd|-}|eMJvkw-Zh8!<H5Oq4<dsTl+lC?jKUFUD#MHc~7kDxF26*9fi-aZG
zf8^O4bUGeK_Z$y_32Z0`f+Wql9<1R_CngY-hm#sqYO$wYy32zGUkccJTFU0Wc`3jw
z$F3n{CdAp~Gr8;c&Q*B|lxA3AoJC7>i@qPm)SLMSOx3Qck^a1wZo}2)nuiaL9pB#f
z*KC~`F4&_V-SrV0fA48H*CZc|US7gBTeT}J$pymwn+!>`9W}O_DEr>BKcBzqkO#xS
zg`0GOs7OmE_L~C&)WG7X>v&W1#{r1O$=MCFXezbMybUZ<wT9f)2V49s72nPl2!-&;
zv;4ew+!vH7>T8?ibnG1ECEn2R8crqit=E+E0`#f((=e;KCEE)sx*3AtxVS|4=E3qO
zD?5!UT-ZkIQ&msyINM9t82=4|fP)!y@b0h8DkDjwxLE`jbZE=fJ;hY49{}UYO$k`e
z(Fkq--R6R8=1oQnSJsb~KMe3O-wg7<%TPmM<?X=px(_01)wd*``{kjtQ}1Km(el!}
z{PLMIa6Xc#f$Ln6d!Fb*x(oGhjr`U_2NXjR(@)BepAF~Ptt%^xS>!oehjj-(@v`S_
zW7av~#Lc6$$1*TxZ+Y&;X2$qyP}3uKZg8$o*f^%!IydB0Y)y2IszO_>+e@Mwtb3BC
zRiVvawpuNkKD@893%@3&E`Np4;mrM`2L~=Tj|+cp2KHL~8~_L%WlpDm#pqPSbl6lF
z4@nV5)JQ^*zw{Q9K+Un0BUEE_f!&dl#x~WRHe=2{CD6>eUajzZ|ED~{y&!qFW=qt~
zQo>^;p+#k!OQIecl7uzt@=Ia|R<&XODE!-}eIRfh%N103Uby(_x?^uty3IZ-=@}eY
zw8}(V^@YXIke2N7HM2)bq!qOtEk((demvyD>Q6#!L)ouVWe!y<wj<t*_P9&DK(%B!
zwLrfvmybw|!3Wh;WX>LHDlEnNV)_|eM>SH&Ro0%%tk!NhUrg;S;JLK>5o?`Xa|-7l
zx;V5bYAP3wGuSFr<;9L$*p$hksa1QI>Z}J(rr#}gEO$p`-1fD9bLHy;avpz>7moS1
zaqBY4o882Y02X7C2zKs*_w1LqeWROMD-@qKMa<pXi_*3U2j16g;%ie@{C2Q5$A>#$
z&Ysksot93~%97LLNDbVSk&MP>K4E*(NM}59i7T3$tKYQ+Z)SlW@NRU!Ifpd2=cDbM
zYA`;aw}?evxU-d#^3MM-dSZa9wx$&ED)3!s6XIgWB1K5*=}DSQIL$h{%PTFHrn*yQ
zQGfhO+BN#Frah{3kc^k+o@uw|Ym4`xUS7cwa~|kVgBXgeOUv~kh^<BkdiTyL<B*fS
z64{GiM`SPl)oo<6=g%vi0Y0gbRrOuD>{w>rJNXKxY{BvU=T+RSRky;i_G#ivl@(c*
z5S`5`A(iwqWM?%5WpW-R%p`bgbe$&#Dv46TF#P+k7mYXKaBjJpyTY<ru!tsNS}|=L
zrZdpE?eQhU8v8-V!A3B?pZ|X&&I^n}#AB1RLq6bWXB%ea%g^GtOHcdUuuR_TEb{n8
z7H!4#_&wp>4f!800oNMtwdL<xc~JRU{uyXxgLC?pynL{z`~lpWmVtVk&2MF$e(b??
zKV{#+xJv%xkPmlccV{4lL*g_S=(B0|(<65?xgB{({gWyM`PzNo92)9@_Cd03t_({r
zG*q+}+vW=<Q282fzgs{PxiX4RUD`&zM3<NF<X>6u*EE&nqjEocX@P<-Jz5di?BgU#
zGskTwRZ>RxS?=^(RvF=0w)uX`yevDJ$4r{L?PO<o?HL<t=L&>XenPeUT9uu^r|VL7
z<#ab2oK+%z8ihQcvuEXXX8guxOMp8+YnpxmHtb)ggj*K2*f*|ucL%C`&7|dHoD1~Q
z{fnzaO#V2%;iwXE&O+!8_kL_hU10d6m_O7^{adu}^GZ^giTVdN7&%R$2+F;HE%1X`
z%tYeDHFFcBhYxP-f;FgrY~1`l>L~Rq(op408DS|%fOdDZFQdrmI5)Tj&n79x#8dpr
zIDWG%(%v)Nywv(Yw@cJU8+6{Qv%T@G^w(h8PJ#V-lCULYp#rP+rREa4WY}$!Gfiy_
zh3@g3WMVgN1^U%5cZLt5M<98Qc2o}QJm(wCDpiD3=&#V<AW^1nrxp|X8qd@@4QQAo
zK+djU6#Zq=izmq0M8M_jS5MJ73asoC7K!{ML!grb^TMcZ4tP{c|Nafl?ZU?0{hs~P
zdh>is%!4zFHOBis2}hNy-b1%rIuM*kV$E1IHY|i0J%L_5F(BM+wTrMau`dmOKIcuY
zeW_<U6*Dq{xwd*S6pMC8DA<<r`VCRit~*j0dtGVCYgXKWlGxwzT3FFH9rzm9+7U3a
zJ0@jfgx{P%p1+`&?z?R2zLz^>KU|*5JA;*T!k3@pf&|E5ZlcuWbWnBXC;TBg1<@3#
zI-TVjIa!)B8c$z~K_c4RI<{TPl9h*^TFI#1zt4T^K|7qbcUBTAijA(F5$*RH9OxGp
z-s58azk<GLZqnx?8e4LtJ37Ty9Y>praIe9SpR9<7X!WV%;tX(7aDpskXLMU8F?(9?
z)bBGzc8SeVVov?(*TAPU<n(FHF-Bs3>MxOPOY}InMXMYGlgIRHEURYHU1I^OkThKO
zToIjZYO}t6>L!a9m%8uVr3E9#X_h|zGu@|q=R(U})bFJ=*BhzC^TwTK_k}l>Ua%?T
z(cO85O>s@X;hsTs6(O4Bx>5FA3lcl8JUd%FQ+AN+5b1@fn9&-<8Kj~*8YlH?#fwTs
zX0CSm{S_AF^Rt=FkXVLv<^>^e0UOaN;5`mJ?M6vFIo>$0xF_oQ+A>j-voXoP5|b6>
znKg~ctWC3dCqhn3B^$dhx&Ge6tFArj?2DVdA~`X6;DXav%Vg70JCqxBQ303YWWMMI
z%=k7RG&SzI2Y(Kcrnu?WUACpGz?KXTIm2eUy!}wM_wgqETQJ{;bby-^A$Ns=XV#sn
zKPJ!5wkdYu357GrO9IMI4XF(mcrgAW=j~?(f3_F(QBrQNL|<uD`bhQ9B>-Zu{LFOv
ztBkun_*n)O#^tFpt=Ud5hx^<Nw%K%tdAzT7I3^SHhL*%$Yd%B0AyN6r<=Uoy^9>tn
zksq-)0LJs9`U+G-r+r7+>eFv(H$NJ#Pr&Ob@1i`^=e=!*yjLM@r^TX>m&nPL9Kd|N
zSlX+8Xfd7p3r9yd$#=ITp9Gb0=5dWISUU>uycy0XBiOdU1=n<n<K}P6s1d;HWg2}p
z+&%_?XUE;>tP8mYM<j7~$$tb!1on(V_*R*NhXzr#+X5ZKP-?Xuop%^zHNgu~`Dz&&
zyXu#Ptj1h%pLE55<g!tpDbkzqIuWxMx?(LZOJ0+_;rqYg4+-CoN~({)%yH_m*;e~7
z5brQV@=h5n-H6}!+f`>{46ydXrzzS+H8KAbc-;pUw+)_1s@DFz^Y*HgJ$=`~2862Y
z9i`jh&z{?m8^+=C)g#xCA_WKXe0=)En##t{#<?5U^2}SScE*)CEd=RD=0JJeuVZXs
zhQ3t1#i?Q%5s#|JEJ2Fzf?s{4`WZpaR2?H=ru6!a!^c}EDx8In!|PN@b$0|c?G_iL
znv~~R-zW^nQ30Sm8AWTBDjiDA{YML6vf~{6#scTt@B}M{Na=v>Ze*6~i$#liuZSeF
zl97AaRB9m~o7K@$M`H+hN;CS^cUk1pZZ3e{AM9D@Y^dU+$kE0OQQSy-3g2Dc;;T%(
zRP*mf$M@dd3<*^3wcu6`wP%_TP~qrsWu*2M<|7Bn_F+7U-;Xf(RUgW*xYBV8c}DAT
zI^Rh6Bd-uXlO5_rXO6Hz3YP<Cl&PFoiK5+)Z&As-+qhSVv88w6EWMJO&dF~6FTPis
zP~A=yW(9jaZG&xmWtP$l)~8-gl=O0aKQU*_bDQzot*a!)ev}J4RAnk&njIZ_aj_tc
zkj48*VT&e^n)z$L7{<Gp>Q)rXUPQt{vA04@>)LJJ^R#0)H@Y#gH+9rz;O#UHmlZnR
zHc{Eb^f%j;E+VfL`n??c>_6Lw)ZUHw2~R6}dlZ?Nt6;9LQZX+7CeK&XLUiW~+;5`I
zu1I0gH=Ey^p~L>-+{pHF_&pF)Q|=D59zJg-ySXzG<`&y53?ZDRawGR@z^pP3vJ*$D
zR9#)ZSlYJ=M;uq~d``8bE~RZP<)eYs{O}U8Ym+G|WjJ-L!emClZ}e_+JC}Upid>Ku
zTDyXM|07;8wsA>$yjU;6A{pzYF0`svoCIj7-&3hL|7BC{@3Q5uRQks$0|V~iYdGU1
zS7hm|4N`ClT&q>pZ#4zJ!jcNMPQ4Y7Vll-<t#-sj`3iWz^ZU$-GdEGJ);h=P(UMmi
z>-}k2?Cm8XhlwNmWZQ{(6ydN;ZTXiyWZ2p(UfXT)^whXIk=w5<Zg0J%f2jVbS1CQ>
zUbvT)5?ykLrTk{~;KP?)k1-uqIXU4Y(t|d3Vplh>TcHPE%GN9_U2Yzl`gAeC%Yr{Q
z9z&+@Wn^(KF>O1x-TyUJDpgrpX!V3X6C}xidk>z6b@w|w2uzCGF5uz$#={aH4Ncz#
zj@~5U9L`m1jH_53XG`iBdNycPF5$P4+SmClXRl6g4^k&Ldzu*!Npu&feGR;!bpCOw
ze7v{v{lK(W%%xND{w|HY^7l5tYK*ch8ciS+n2~?x>OmW9<!gk4YtP4qX7lB8Mvl}U
z?}rnlv@egA%HUk8JRvbUA(YpnyA;{QrFzQ{T>cZWr6aeKsoFVn1bfZBl-oHTJz_e#
z%oPck9<g%Ldiye=A$w;yVP|(0BY&DTafj)QG}~s6=dPk>3cRlcPl`6go>%q|X-%2y
zHwaf%yY^b4(bBraMqxr4D~NIpKhBt2WWQ1#L#}<9rRsaId)~rTu02H+#vE)(c1GFR
zCy}vJR0Z#Js;NFHVDLku8U9k;AwRlLtGwRYRGI4QY`26aVtVnX1qq`89rOzBBgHpr
zHHNf>76rt3j$XXtMa^77h0PZ&m!57XzG=-yl!n1&@E!UZ;#!ZI6HKQ0J$snFv-gOV
z)E;G(n_|zq_D41q0ndBGO6;Es`EOI(_!}KD66V9l06JD;hZ!nG`yu@64ZHbVHzF)d
z4LT~*8CH}o$2Fo16MM7d4NFE-HHF$6H-H$>X;LwA5{nKu-j~9eQK&ByP9FItr9rsW
z7lVp4FFiLe0A(D;9&dksYW)MAE;Sk=)#?9;P#i-2DSTfgX$tZXJa^%|dN0jKDBR{r
z&4NcTpoRVB-k2du<Y<nAn`5}#6Ta0QMTHZZ_SO8RYWlEYG%(#dL-Au?z`k0Y^OYu0
zdCX)=s<5Rz<cfnG8@QOv_u`vTlAyQE#j$}qs;qIk-O9ZULD!#z3k_C<XI*Goj;W+p
zi&dOhFE1%m1a)2N8~5G#@lmcU)<>^~sw->a+Bm7R<R)(fS{X{wP^gw@cthTk$=Y={
zM|;{Y-6c?sDhXER_p|sXJDr{qM?JyRx>eGP^IJ;_&qM987lxjkbL?|%>P%ZKIFje)
zUxlfCZ?<RK<>7DlRbT>zO`BvfS4MA;n4SIVz7wnk(kUv1epGqc$A63Q;CpAXz`L!P
zheB-40>e}{nledtS#Wx#`3g_shnc1?Uou-)S1U#2xQr}&f3NX+<nb9=C5xH<+|(h!
z80o#*_SM$w8$4Y;WD)1Q0l$DJr9Bl`a2D~O*lIbj0^Tb*KX%fS&2@v<n;v8-f$6TZ
zwKne;sfJ!{kCx(_BD&k>r6p#O;?WF|wc%L`G2nN@g1TP_Lcwvx1vd0Q=3f(w4jnwd
z3-Y!=L0%r&Z5nlM;Qwv7aPkHsj>|``XdC&Ic#Md3Z9TXM{kdC7w7d6;@gm~p0BFro
ztMTGPWN48WefNTO`d0MReP46Eyf0@rD8OQcLR}58*z`S%yU9zf%ZDUvf7Tx43e2Z*
zd4`^#JQ6tO-Ie_G<BJ&RD9AgBz8<9++K>Y~-F@S;5d_t&F<a2GaxfR=<-(Os8gu%6
z>%Oa|PMgtkU2bjpJunl@?X1#U@7N-{Ur{!B*|TZOzBG<s;Si-B!bc?^t3+k-Hbmie
z1=PAkFG|M&Sz|X8gQuzx<8^wa)l6q0Di_)rRFbHtB|G#(Oy%pdR#k=|VjFFfW8-_&
zkDSW~&HW71JY1V0V8#j%U;8J~sn$Hw55rH<tTHEA+{mS>+wuJ>DU-NnetSV^ujfzp
z-CJehd}eFXXeolYuL9i&T-8_XTcTU<bbv{|YUlSZu1K{qYMDe0v7l$Iy)v?Ho4rMz
z#b>=Y<f_z4ZQ#<jm3)Yc=?UBOs)iN}E>63Qk!q_N9aU722;WO53TrtUyUh2T^R}B0
zPe!IzaZ#3admD)Zr+J6zmVO~m2KQbEMybyEh(jW{<9i?c6xhkxTos)PKahr;fA{wG
zee<Vf#%2789&*|jJI1dzngGtqbM~Jaen-R=mZ*hq|L>#!^mlITHjWAYG%A=r=%j7r
zdDlC|;;8`SW&G=>2^%AxRy#f{DWm)>5W%7H+?Zv<sE<W^&vI-1rU$g&mp}CtI&<C5
zJYzSIxkwSngstJyGNd8=p;$g9DlP_vUs?Z>0ruhr`NL>==cw(rIgqHAQ5y#XHN9em
zXXfW^ObE0;F=J34Tyu{`J;X;qUO3@L^m%q9=s`R`Pw}P#=RzNu^L;=Sh<RZ-mTjb`
z_N&UCkXIltizyerZ#tXYcdMQn8JNd1tfGLB2<ZVprD%N4?Ww_-ywVvhd#9#%$LkjM
z1~pPfI$y%G7)+$U<mlA5ES0DMCsuzJNi~5wlI`SCshGC#$r#7!*J9+C>LM8c<qA=b
zorsMw8xr^ZjM`f-g5Q5=68Z~m5If_+Bqrdc!J8v^P&}jO8+>Z$8#+zhTB1Qec@J4$
z9xpHuftt0(4jPpTLJo`RTz!D-E}5Zx>(q@iNJrR5Gl%h+bd15fi$!x6K?=3uHq%Ey
z)RXrfVSoK~=@7{JUCQp9O?N;2oc|x|czimyx}(!6ODHAOMvnv-Gp6IRmbf1wH=31J
z)VQd}@KNv*n+P?pwHP2Ke<h}el}4JftGHB{(S*0M?>6#^3EjcWI}MdsYVwVQzIq#*
ztN=(pY)=&ln@@4*8aRMWYq3LL(fV@=EQ5jHllbi2%!pBrv{9H&C-*aCO1^BgHPk>v
zD1&EktAkkRUeIv-uOkXNc>C9;wD%d->9FsRUam-nMvsh=NXaYjE?Z8cN_0ej6)ex%
zK&kE3C4~ED9;miBJf)M-nE2F#bW!=~r23SL`|ZqNg{ZOqkt3DWu$Icr{tNGry&dZ6
z6C*eYR5ihFAy{E5p2o4ZXCYS*lvVD`CNIEn*8L|xyWYsYnN@(S&pR1GGPm>En<cAX
zJk2u-M1i*T<pTvBcZ}}6NsZ0xHSHI{qoR~0x@u|a8T50fdb6*m_BJA_q}V?+92fVD
z*^52A*&}h1Rbn3ouwN&Uz1)RQ0;=_-6Ut%@S_c$ZD%iXRE$9ctADOA_m`iE>NY9TS
zO#pE|FTD;=VIV!IZEea=2d{CCZ0^OjD<(cTx4DjVYPz_<+V~3Xv{Coq&GzR~pbmpc
zEn8jFX{ApEYgCj8#Bs{HV^t>}+1oNni>$~DozD{@k?>hI)ZEaNy2C@ItidxZHt}L<
zpun8?l5_QQCNf-)*gGYA<BuALvh~M#l^1?4w><o7cmEiqXw~b$9>9CucRvLln=JWO
z%)U2eB3|cCl2Maz`Jxec5dEo`1P5QRqGZT)tqSLhhW6h6a5vE6VeDHTR%J8ptd7O|
zhbGm$Z5(X8y8Nuo>A3+3O}+L0CoRx-Qs!_gpL~mH<Jp5pA(Z0sKV33*aQ(Hn#r&Re
z{z5Jxh)|~hAL+9apk1YWXdDAi{XjXBI}Upt=(F{~leK9FtKfkbzA>&Q_*tiO)^z{*
zoAY6DmL!i7E`7=P_<;s;K1BXpw<_q$PZ~Qw*CBfC__e2ehSpRiG5$e1T>x64?XYAZ
z)DBm@;88i3=auUK>4i-}T<ia*rF<u|gYQ@Ssv_9z5wUE+&mtnu2(a+znO-wI`8ar)
z&w~28xa}Sy_U=-z*A*5rlU=_zCV^O1D&?H?D31|BqdmH%Y>c)!k7xLK$_?^`9~$9m
zcj71qggnh(npI8Ak78}EdL1sP?FMU4-NA6o&<s|P$5Ag+3=>P`P0i2GYEj^t-;XXF
zYzJOk7o;pJ7@um&xC-OmzSF3VetK9h@JPu&aQ^W5xYWW;$7=lRvycn+xlQC2W_bBK
zkJDrGoZqDuVh4|7WSTm(;Cf39>Lsd!v+3bFen?hLr8HT|w522(sD3jWnrl4wS$HdJ
za_W*vbc4q|1BZ<oc^1e=gc1G}M<~3m?K{ArO_f-$GjueT=G<x!4?labYyx!uH+0@R
zBffubLNZ#U@4Zu0qfS~smct?9xg>E{+DBgfxE7U+MgepQ8~L{Z`OQflV^^BQEVOlE
zISx-n>$aO#agNA^=`$^dPtPd-bPhG^UOJyMQzs~pN_j+?UJdaiX#1uat@toSeqB1T
z<hsSxhjf}U7pa=z+sO8`N!~7<>~}j3&lvju=z8n0DA(={RHX)qp`<&966uf}KtLn~
zX^@l#=|*5A1_T781&8kLlx_*>Qo2(@LEyZ=-uu(vIp;4guM3!Wo@d3q?sc#A9$%NV
zwD0aj|8Narv^*diQrK%mU$i{PB;rb0{aLgm{oeTDq;@o1FU(_jOKY_^ib!x`68u0P
z{<A)C@6?ivt#CCVyWC@^gW2rc+^=I^|7IFKgr-Rfk8mg+9FP4D@sfbzu%S*VRR(>7
zz}-Fh$a;(OJh+dDBS4?kKzpU#AL*!pcj=-w0hTwVtEr$Z$O^HA2<9V^oL8$9!5@Ao
zceJdpvN>YAW?@sMueEYxh5zUoj85kq4*Dz)lUh<%PJlRQ!=uUgC(RtMgZpS!94i7^
zdTu#ot+m}`wxoI8Q7xT%RVDW$)X=@<eS~`9%OYxmfHhf8=&*3<xD2a-ThHA$F-z`J
z%ALy|+ZNo!hZUQxJ=Aab@r2#>#YYspW3^;=j+3YoaXz`s{M%Mx9|52G59NS3HswKu
zKd)syt{2K@?W|S&XQCqS6VS5H=hOlJH&@27ocuAY_>-=D{>8;u__?%xoJ+lqkGiL%
zkuOWeGfKGNar4FMW9LJTJ)WgDGN(197yPE*f^gvbfdLx2+WxMq3{1knD<UO+PkVM{
z9`}7U!-rR%G*!q6_xnBa-nVu5>g?;rHOoJ26YUXK^l{9>cSbI+R2OL6eZN+tcoh2U
z2F0;9c+JATeFK=n3Eg_zB%MLWl;LW2<bFVEpRZGawnxUHfZ@v_kwW9e4C=@**^9WZ
zH`Ata_MIW%{EAawVN}0qpYcP*@|(BEmGw^si0(;=u@^HTg568_mnk$FO@becE+|wV
zVD}{u;VKR{GdR`*0Zwr5Ccp{H?LTXj*`vEZqJY<`ZA>+YRYgJ^m}J{`%(i6K^CQJ7
z?AI94x4fhy%CwrS6$(b(K{u@xW{Otg^YdvK?b-52Cal3U80|O>jgB|&ozB_cd8CeC
z^Oc`BU$iOzh>FPN`A$bq;K^YfEXKJD)K*ez6a4puYMtK$`ieQXFD!mUE<<4LVdAXC
z+=(dp`j35e)>?FZrX;cM5q>Zu{dV|9=naG5^EZPajyz3w?}jg!SsGIg;td6$H%OZ1
z1liwBg!WD}z;!8|jeZHx$~!t@-TwLt)$Y^*EG>xt>yY0dB}+VSU?@enF-}uC2zxsn
zwXi{Rks9QAb}ea8%JR)59M1ScO2(cZt2MtzfmXKqtr~egB-GrFcN;67LBZ{W?s#&}
zZWAp7BaecIGx$+{6YSvIgr|&OKQz8yRH$GM_}xOnv99;LEsiVF-omo^zs-9R!KQ$w
zBIa)GTV$o)7ON9Z({Ekrz|U4sOP1R;Q<|kdP7P;olf3+*Cqx3K)zL5JdL;c*iyjB$
zQX~G29K0ShV`pW@_x&fGLdqH52{Qhmrom&AeYmunt^u^>`L?^D!D*4DWMftZ5aduw
z!?)H&cHB`S*tV-tp`2E|jw$8gTJm6e#_>5leBn^G@kT~111sTr3d%U{Y#)SN-CkHV
zR|BbzAPL)%^IfA#Oozq~5p()FS`)~^t+<Lr6C|zkeB2L2CTe1{&5O_ULphWyCyMvn
z;9r)=tZbvv5jEz;lxKv4N`#B1bTpf@<Zd-wNoqWlaP7724{`h+ezxL}rv~Q=>a%2Z
zDR8c)S^l)}vTn|B`qXFBr0?Udg%^)+Ie4OLWen%<$<RNgZZCJm#kaY8yk$&hydeN^
zI_fbx4Y4i1ExiVW(iKuPFy*Nb!I>YOmx?kAlmYC7db&|pmP0A5dw;8rH1^m8BH8xA
zkb*Y<%kXA}MkfP;_+^{YH9ixk!@aZcyQ5$#Aq%FFRL^0Y70zBTIvml=o{hRp4-GYw
zb3>T?8ta>4;_Lf8n)`(M3B+G;R5vx!L51=tbhGBij|~(DAZ}8^*Vle1S9Co>(l&lN
zmu*~KYpRvbYNytC-FrKeyWzr`CcOdnWoQmHA5jlx<`ejTGr3X!a1}jY^)A!eORUwQ
zD%~PP=o*w`OY<S?M3o6}lHORksW|!IvOGTFmXQ!xnx7xlb&~fBq_sazxEl5xRv6$}
zyPt?Nz4sG~Yom%NGu-F5ji6G5x!ty>Gh~b{H-G-e{^-_kn#l5_Y{d)Q5_IV>Y!2z9
z7)ik#8A`vA-PJ6y1~g9WZn=*z<uFf38bi$<zQ`<58k@8EWKVZxxJcF9!<CH75q+0y
z1pII5WU$@DwCAlM;_Qn<(?$tA*E`g{zrcqh>pi1ExfG}5%4vmWOc#TZpYNjLZfNUK
z66;d;_LA)!7_AR6y&Q?V8n$-2$0sU)fkr?kIhn502M?a#l@Ha0;zS}5cpJCZ#r7%1
z#m^ashX<$RKTV&Wm2J${K4(;C;YGa*m_fqZ_)v#bwlj+0BNnb8W#L!qjX_2$T<A^#
z@T2H-=dH@+k%#W8z7`yhZ#Z__TaKN>#Q+ZxY`@j^#9GbT{{PDHq&_kHTLRWe2&hwX
zxv3}4?dqwyAc$k-5O#qd8%jw_RQI-#%ti_Pi(>1PIP?fll`EW8VrArLi$9{VRvg2g
z!K^#GQb|yD5t>`i%0B*K;Wm=uppN-aHUf80qR#PmY$Xs7AtN?l+d*~0ANW0lu^lPv
zO79^wYppPy=%6YmBaw6mJDGl>4ttQ5m*g7{mPNI6(`U7>c?|VxOvI#hebM-DcR%N&
zj@_i+t86GUg9`KndQzG<XZf#&G@cCiO<~|wNmrCL*UWA}(cmr;-*rMZ{wpC*r2sVS
zA}Y=ku>C%Hx2+^eM3OGaSqNJSnsJAzcJ*Pe5TGJO8vP~rKYrJ*4A&lAx*u&(DeN-@
z`~q)Gua#3DXmlJ2yH>W;EO0Xu)6o(j<<6POkE|4B6%2mh&$}_D3wBA!s-D~9uSE46
zA<FFw$$&TSZ0FSI{#i&}Yd=PCZadY1XS%bDfu^eKNo6M^I8jA}6`~><gvApP%A;Ng
zIWC|0XypRu=}E;u9<7cpQVL>3OS2Y<fo|TXOQA*XqzlERQIb%5;?2VUR{PC)i+Q-Y
zK8ey<w|9C$i*1~PcG7xG!cB<c{x?K{8>6f0A*@0Qr4P!9uC=%~Or!Yq$K;6JbPJOt
zgbA#!@r~%WpYQ(Z7A|X9BWl}Ao_Rd4EEwWHG?fpamFxN-b1aY$T$*0hi$pEIS{2gN
z-3Uwbse-u-BZ6p^jsFmZivI#l0t!=%5Rrk?5CqYRsFpb1Q-dBoC})qP%F%oIs9zc3
zvrOePnaS!(M_V6no|x%>PfSMfF>4Y&^;cZIjvH0~lmvyZvOb;=wK>=_5lGzWrK6`o
zV(7}1)Y?>`Odib%f0^Avwn+<UY7{wu7d!lCx#q5*pCD;+slt+M=&YRWnfaj9;|aD;
zQ<At`ONihTxUta?M;)3J#nr*MC4j331mviO+PsY)J<;uB3bl&SVZH8gS^n#PR31#y
z+00v}BSx%d|A)m@_C;<Njh^KX*g*tyohlbR34s~$GEzW|$gaJwcT!#wRLc_??WF6j
zrgWkVQ^Z3+F7ki8F>fbr;|o=WS+6psCP@;H`fgN68j}5aW8!4EIM7m!K@=<rCYUZe
z6!$As<0#X1?Gr|X_b=yQe-Q$eeH{1+vGVJ!NE(Kofbs9#q2YxFdtB%p9(2>wOH(ce
z=?088HYPPfe_QFJ)7yi6z1wJ}8qv?kM)u(BNq6n&+l@|ok?VjCGolA~_R2s|jeCd<
zi&EL%n&QDL6=`NdZq(g37NyLOJOYw_MvF{w_=(&db=s!rA%Mx_R}sOl?K#%NgE+Tg
z=Zay>9xh2SU#F$A`)`CEi5s^>X4PCAVCl<T`i$&VM*laQ9m|Ms3-&~t<&4>$S<IBg
z&c*Tgq%Ys#DKckN9c#*&5^FGwwyE0|Zc>TfA<Iw8CT+IH^^QFtly0*sj1s;(_43aE
zwnF_K4<3(_kxa9C#GZ!Zgm{+ZQuXY}TX6Rg*$%?I7-{86_H~)?FIjI>52mA+&L(j=
z?6my+v@ukn_5Uz#Z$K3UxA-nvkvpE@;u9?=Z@tKC!zohhHZBW}cZhnd7uRwyo6n@$
z(9SVs?g7y`CoLD~Fr`ed%Pt+k+yy>D2_Q)lGO+)+?&)xGN+@pm0$~#&f_QzyQ&tZL
zda$h?3qI+7qn!OR?34ew7zzGL)i#3oolTa-Pnqk8iUi2fa8bq4O3&@XcS$c20bQ-f
z*aj^^69paR56`E*=ZJf+WU0Lff98vbFyI0X>j2~=qOlPgND-+w;?m&flN{j+H08!_
z@^hSq839r2t|X!Qb-IaDU%-gx{pIx4)P{+`)Jw}u4ecYbh%BBUdPl*lXpcWW#GLG&
z9E|o!VP!-bF+3_s^uZyBYjy=4p~<?;d^X)f`FvS4p`zBEQm@)hXOo^_#$W$@%WB43
z(;E+=sWel3foOk#e;q;plePNQV))%K*p3s<QNPTs+sZ`n;LvRpwB%=yO%3!g__{nc
zO|NRi;mdYSGW3o+1U`8G`~6So*C8nj4VTG8R|LI-jS|jPTtJ78ty9b{B<DRtdy`%n
zn`9HOmF`t@LAXtqHPNIZo5Oi6vXB=N(7S(JP4H)3*qxNqHpS>!SMx=LW?u^aLqu$f
zUHFrp-ZDUp%eRSHH7Q&K>^+L>)Z*WrUj5KV#MUP+S63vgUrYp0yL84uf?fm^Mp2Y-
ztuKtu|9wNCLGFTVi3^Xv8<<S&SZA)POK4+?v$5eR29S{*cZ#3*Mk!lkE*9Om3-58s
z1*xN2^4tGH+--FC<3W~%WFgwBW<fPK3CCkQlg30hR+{mqizG}Az)Tj?<-_&vSVM}-
zx*+9(u(i!-<$edgT**HUyNd2R9RX(^4JNCrC!d@-J{*w(V!e>E#fMu^!rh)fIDhq$
zvOes!7rMu){%GRFSTt|_VcFJoF*KoY+vU=$FSh&EG5Xqbc9WujvMSZ}<^4Yb25_<B
z(YQCQRR?RZbm_U5N9zL<;GYXJ8rHtu^dKX;FZvi?7D!`v5|m>&O8jcEU`8G2oQixN
zAnE?ogKUHN4b)?UI+VeOaZ-OgcP*Nq8Wb1y(G2BnvLt0pfIXjhi|voGpyuCocUf(~
z()Fn-<5XNlqYO$w3Ae0uEY+-kqvSlkon`T!*hNAJ0K6TqMe9-CUAh0)-QU4AkNXGu
z#>Gj_5bco0E>R^RI%kDTEx-Tc>Z%Nbbiuejyh67}-f&?PI5rp^nStdlO%Poxq<V@A
z7n)>FRPgoG#e<{9#$NtBnXyx!DAL2HI;lKbWDQLs3rC22pY65sI_Mx8TL_{mI+=P!
z#cMrTGArM>mf|`3V^FL(TwoDYIRwR6*)?zpiV(Zh+^Ja#U%lF2XzNP}Ujlq*ND)XF
zuiw8VdgMFtHdxZO*O`{!f)aj*y@pCkZq=nPCkaOOR4c9<&%v_*`Gacv_2m)C|9g2H
z@01IIb~LO%{NHM}SNgL(%Jhq9VT&s5ovhTV^Op>KeOdC__3SrVR`c{(zusHpe?t?R
z2Pi=-@wuSDAEyT>VwvOGROR1GR988+%@-%cUKkH+K51KIe(1kSy*$dcy!^EmQRzC`
zo1N;DcX?9X%p3{%nythSTg=NZ%EK01#;H8p%nvFdcbPP_{E{qad9*%^=zkbS#;-`>
zwcoM{_Wc>4?R#<jfYOb^X-EQbKe)GpQ0Lk4-Zz7trt@9Pcf8}EnQ85A4@{enK^HS<
z_o$I1=r4C&o1crFPYpV7cqG|WaejNNlKv#urh1Og^{VV~f3o0no5thu%{NHc%FSnE
z#l8QXEdU(JQ8D>B=be~YtlWnvNn8>7lZ-+VqF%FB$}ft7()#IOX<^2ULsYgmePE?c
zjvdpL8xQ&KkioFUj05h`b(M%0SeoFOVoT-we?)_u;AbxIZ*nnR*;&jmQ!q_J11nf5
zB@5^TSZTVL0CksKvT8<7I6u}s8k4Gq_muEgsoqs}8(B%CSxLZDl<S`DxPa9-8?Rw2
zGRj^G?}YAiJ^L0EiWD_uKalD@7`bqYIQ^G+PRTj@HZ0Wa{Gz(2?9IOqQN`99k4EG-
zTQN9zU<h1oDr&wI_f?XBEmSh=mnDHglS2r}u}B)p{u(8b;~0;v!s=%nkr8|g=2#!{
z<JX8Cu(lxv-p6C8aZ3?m^ST;}-1%70?>Xt=s?@~?7rRsC*{<VNc<|G2zRe!HwQFH<
z#;hKY@g(Kt%ALkA2>rbsO1~$#@S-5vHyE<bS8vZ5p@E*zDJ5Lz#|SL%eS=<?dOb!&
zPKfTu7h0wd$YML=43X8k&GwcDY)>Du)DKmj18^j)w<LS2G?Veq*e!l8apxpq(p;`8
zej5>-?*DLG)BsARN?1QQ(TvLlv`59KxVwO-DxUW~^*-(-iyp21_C@8<=NEd;ro4l~
zdLq+3Y3TU+&w=5(j~aLBhd=vWG}xG;dk3yu2fB|%g?4$pME#Fz=$?%@uc$Qc0Ey<s
zitFd!nlLV9_y^R-cBlxq8Jp_d%((EqDYq|zYdKfv%~#LQcdAqUC-h_#sejM=UU@qY
zOB3aO(9joz-OFGFHBXhl%J&7_CNSm!ARzl*IiXLsnRe$8<xG~^(gu349IeD>l4&tY
z^*mdVcix!FYv1qeYwBeN-!x<@>9nlxAvbJwHOcPbT>hbOm-3ik(97lS_nW{ggH^A7
zuZgLrih4U7Wmt;f-Z^;ZwD!@J5qstZl=Jm>LJJ^#TwR=M1W|gJCuww8)eqn}+@T$c
z0`b6=F+~XcpexelpNGQ2q{%+f)A7jJo=OYmr_2-=9^cTcmb~DOaFf#oU4|(ibGlp*
zLHysQEq*Tj379UTgfmaH>)16G7S$|jA9oNLbXIsAYfJ4kU(H)inJz6WKI_RPcODH%
zjQl<R9(BwQ={*12WmPF+-V3(4{@!>j1O02Gm35yFDYV@k%>{OyW%IfK97x$9{c;bn
zUS(+6Ej14abcBCSnnWv%es2jFGq^k?LL10|8~@&K17qc4sd8=X+&}ucnO{7*+!H;f
zD$kx-=e$LT6{YN7P2U;#@C<m{y`Tg)j_;2F{>{eId2SDRwo)lQVb-@!EuycfS7win
zzAELruYJ@gteUpWLI>CZwS)1VUu&twe6#j#C4GKqlsWqDD-QzH0oeZWyJ4W{h4as^
zWOfal<X!C6r5%KwZMgb=O);m$UdARLE-p`-HLlJknn#TzM6!U3YkD^K=D`>4FIv;b
z(H12H43QQ48GU%2L=%*%5tJgTPr?%IF>9)b^xYO>Czb5;3#+(bMe>$1tS`*2B>9&&
zWT?DNn#4f$I(spv=gF=<zbRm^IE1+*%qD0FUVjmvbln({W^ekr$Xnb<J8=?c;A5eW
z{rTr2O3G<XjuMcjYe59SPy|b)3v`xY1@DmONLflINV$FQ{niJ_l7t@KZtb?9^lqQ`
zg}29X?1yWH>=i36oC4@B);=x2m_CDA%v<@L{Q(UDpz_HYx%~xDB~fiRv7L~2id)7i
ztm)LV0-2u<u}-U(3j`o!g(2N+Dq)TcpWjRDPVsFmfiK;ThM7hKz@lm5EGhP_!wJF<
zB+h=X@^@{a<zVF7RFP=&E<rKab42o96hy@_@8iOaZ4*BMshRWjbXrE5R|_6m6{%O<
zM%gqh?I@1?>otKB-uF&F)DvR#u70;J|DIfsHxh#%Ps==fGReAn&gJ?3bOMh>lm)7m
z<I%88{!Z15;;6zUa3y(P6(1|SqY|5c=}*;uJg%c;u*T%pIN2FSVthg@6|X2aIrAED
zB|b`@hbll6v5R;9zVQBKcq3?G%ys#_3~Aca&Yq-HhhFADY9v(ZP)i1Yq}OcsE%;Hl
zRm82A;<ku>yk&4T{Ss|(kPSvF6i&<uo%24OGb@&l%xeOK5&fQ+O@bpPsop0Em+a@I
zD(~aSRuf*Ol=OY}Ly{ziA|5teoXi9kJPFo-MWZxr71bA^b3A_e`HoJdJ&9GDKEk*w
zT%EdN|FcW&N-X#J^F<E_{F5GJ8ll%l`<^IJ?>_cwwdS8497cJ>dy~cyW6jOYDi~}f
zZ-h=~9gi_+4Tvd@{JL^sEpKrq7#qA@r%Z{Q`#HPXJE5jO_ZK#Mm{Qfi!uxM%a`Fkd
zK$PywxC|L(d&1w(;mSEtF!N4IIA|$!Z+)w{*)|aki!|e5?K0lQX--^D((f@11KzbR
zLVd>pUexEnc<K+I)SV`+UV-sYj;6`T-JeOpFmDCl^O<jXfD^}^F?_5EqHTR(41OLt
zZ{T}TIox%8-%Gq?(d#LThs`(OmEW6(eH{Gs<za8Pu&vG(z`hXLRlpD{?p)U<v1VZ+
z0Ntip1S^UG#a3}bI#ZG`5VBxtB;z(w?YGL?!7fya4(u}#8AfeQ_mhMF2HK+(z$B(~
z5RbRG@aKk=*m@_oHk%5jK7Y=Y=Sm_<p4`v4_2E`UByPeT1j)T?slEs*>WC42lfDh_
z0&IY(d47xw*OX&#J1ih`9cX+>6H(@Lw2{>Z$A@#-MxrgXo19G%Y*sz%EyO!?ny#=2
zB-ZuZsT|9U(<g<74W&{`i~oM3tNH4qGXaPlhZl5?t_lKgF*N>M3}u|9hQ2Yn5_j8t
z(=AS{6bYML!lLwvD)FTgzAMrBnH8sfB!>ApCL-`rA~mZ-nH;pn07sM>Pf_?l3O|ZK
zUm`;EEQhAFBn1e`m073n!`4>Mck7F3C`N>lcSNZP*7_5SL6FGD)a88hZpMz!_QQ}k
zQKGn65g;gD9ks1q+KW{{iLokzO2Ti24MkhSMPGY>-UR`xzs8=eTsphgf?7OA;WF8>
zJ@E@b7V$VA_hmbLoNbPyJOOtZr3oTW6Tk0=EK{Q$vj@CV%@NDr`d$y~2As9%5k1Zc
z_MFR%%{yW#t|ARr4*5t|>cQ%7KAw`|yFgI3eEZ<Vqw`mbLMRB#J#iezDEr-mi<bF-
z6W$0HoFnh^6LsH<Edyy0*YMKUb96oKcl;R8zcG6m0P5{vM<jnI0Q;#}6_f_Hx)PoT
z_{TUlM&qs$A=8YsfyD~GBhF8ouSA}t^#1I=wa7jJFg04~1f*N2;iWy`E%9+Q-%Gdg
z-~!-%{aMFBkpV%ZmSFpK*V)%W2fbCk&2Zfr0W@mc*NO*oUB?k<Gr)7#(-6Pbd*fsV
z5NDrwam)!2@ne$1YlRTm1$`CSXUy_RNqyg5YUsP%@bYy!fIa58P<_$3vB&xR#J>Y-
zA^gqctElsF3r`4qZtZIuv24l4Nav4Zo0x)|9E<Yh`<&~Q{G8_=@sdr=oQuvF`Q2qY
z*>2}?t>>ObzYDT_=m?^K!s;X8WmdNj=^;v=gYGp=(M3ZbG0#@J9EI9BD0VgL#(eQ>
z(3fvH2*<#`6bGJDPmd8S?B++~#DM2RE>ZF3W&P#ZmN9*A^VNmj(gzeMT6;LXTQH5<
zI*=T*`R1IS<0g@3P&QzjdVwS?{i*K=-Z5%Xq((1c6dvVB6fb*2(0Uo76$Qk?>DEM?
ztOxo^nq(%aUWalIp(??w=(YE~Bs8bcJt3{<=<R9rZrrpc?5gv|k(}^l0Ux(ozfVu)
z0Vj#`Ut=jMrn*>{L9jF&*%y2!mr@^w?eT#a-8z6I{9zj3OV&Dv`-?zZ<f4DVcT6r^
z<T@BI3W=$Tw@eny*rIkMkzw4da;}Y1ivoSHOxPq7B@Su9upiA)p)m~Mi`3sobqV3v
zh~r1A#criqa5>O>`V<OTL`9k4ZwO>)F~<8gj7H`4h;w`EEs?&t>{lK@^Kh!QI+%#z
z9(3%_6N?;$Lm9MB>U;_;WCVr6iAcBUySP1$%q8t-Y#^^1s}LBpV~wjg<3!7%`ca=<
zV?zMyb6lJnj0k>!2GcJ&0r<5?y|_~jGm<K2^)yP7wV291oZ(4cTpDP{1s+XJHt%n8
z@bi2exW!h=%#J9zCuy!MlyhhE<zIXvP|4~c-w+mr0&B*d!Uc`QyTg<(XDl43sgXtk
z6L>cCcJ4ZdjL&JFPKhd18roREXbIm-u@ItrZqgw~V~=JhJ?vA0^tv5190POb@ig01
z-A$>#mzhe<Xb2XZk(4;L#bRS%CF17YbFSYSpJFXjgUBPFM(y7L(bEgNiJ!5R_j8Rb
z4=f-+O4PyvNGj!k?RXx;?@RQ5M#7U)j-`juHblaBq#>Ga9{z54WpcsV2j8K={S>aV
zrIz>Ibo}DmYxei0+_aJeayON6-$=c6r3cI*2U7*oRx6pK4Z~7+O8vsVU~&z-HmOji
zG567Ro`fz!udzZXxCd}+-3tKy@-L_4hwkNElsqsrlo*}&`Z17a(CowM>?Y)&?9WdN
zOid}nCQ>7QB8P-+QBB<W0;tjna7YPL!l(=B7qsstpnwf<+H4f4hb6og>qLI^bK?g^
z@2vp{!L~?22e%8hg=SK)4YsUKXG7K{1;$s-x4)?5hwlj&@qV`snvxca%%DC1h;fT&
zk&yyoAmh+Dg4vjrX_xaFXWczekanoO>v<{A6%cp7q10Qxq5e9@i9N<hQ|B7E|Dw;|
zdD;OuY=n6PljB=5mKUqoPl6-xd-t>QDw~vdnY&4a6*A$yz~IosIuRT=MayoRyFA^4
z&<5qiQ|(Wpf7V5Ja3N%O-M>KGN4$Wxmzd3|MIT26e{`p1*6j)azw)y+F@sDIGR95s
zSZsA9w3B?nR@UN%JQ0ebac~6x&^#f=`cUL^^=jK6R(9nKJRv17Sf-vk?)%>d<h*iA
zuX2Xd{Y$(~2&Cs#JK`Q$9RkUYR1(rlN^ke1`#FyK{-i`}lkdK5Ta)Wty=$?*21wfN
z!`R`pWR8lkIF*o7j<(5r>AX&;fLy;PP(O7{<9O_Emk;u#yrU+^#lv3?CSz0;T9Coa
zU}!Zz+57elM~vEc_TrnAm=gA0lWCkDU%AYC1;}EuKg{jARFzeZOb6Qfuo&T^A<2Px
zH|TD!=xYaZ>ecTSXqfa160{9#k%yE4VJh@!2Z4{?QzJ#=0gDj>L0#xxv&eQYVB77L
z0miIRI6?&Hn}$^4IC+(LB`65kqm*4q&14xj>XY5M8Q4g>CKN*Zo8#o3^H;4`A5FV^
zfa1D$TM4p5{h>67vFI0&&ZmPV+*-JaP0U2_TBGjqM^;hiMFRnlT7N<C6%$PxP+O}x
zUQF7kegWp<_<=amIT+QJkFOv6c~rr5&N&YK!8RSiYsH0@usud7%hKbwaS-vNMeWE&
z$PQ<@?o5?vYo3v>GS2!W9RX}j5-egO+<ZW>f5A9?p#_LtK5<@`azIB|cGd3L$_ejJ
zPkgo6W)Vcu#slx4M``0ZG3p$fDygd1-11l@8nl#d(%WG^N*}_2fPik>ojCt-tUGA8
z^sL;(9oa`0CEpaHnS4JrN<AC_7ZMGFb0V-McWXU_+-xVWHnLMEXx3~Wu}r#RW-Nx0
za1XQ%>o*b((=eZoQHpvmN@`v#VQo}ed^KCgiFG3*qer3iK?aNaFD$!VzH-`rKZ`~h
z{VhJzw#hBI5HD7#!C|W;Ig^@RS4Gd@N;WoCxq`DL(Z4iLgdpG))88dQ>2S*N(hB>P
z^3@ybZU<{=XYpyny~jzjIlQqr@W(q9(Lx&;(W+w|k|4xYoy*SV*xS?C?Nc+>?=-@}
zpvZgF_i<BjfFW@)W~qciu<Bi8hTwt_d?r#ZiWCj%DUYLd!~9hiD!1=`W|utv{ggwD
z1T;a}<US_2KKRVQWZ#c~AFxNsCzq~fCA~6*A_PAu14LZrxuCAJx;fjq>st}_5tB(u
zCvi^B^kEF+?|$gf`+?ey1Dua0=pD81B_R8g&-+nxd!x5AHAtrdb%CQ76yhJ&CFqLZ
z3JT~)_6y!p#sd|IoJ_XixKSUP6sy9(od@G|Es_NET%eMr@W&5d%hC||p4NZMO}*HD
z#VI1#RMGgj91l(`uy-n06(4h7pZqhh%Sv9Y+buHlvo>B+v;YUl`+gLa6lamvc5w*W
zkZ6R$(SU=EBp4<HGW|_y_)QI`i<-5xcvYUG5&1#6OlD(q7VcpHkf*Y!sbPx^dI53v
z_TN}eiwvh`J<v|;5NtpnD6_Bek?mKyIhAO}r1KLVu(anD<}A3^J3Nf;^CyMXxU!1c
z+L;L(j{KVjYa`HLWt`Nb=-7?^5o>bAUf)W)Nz|5p@liTe@b9o?t^{)0n`fWhd(=Z1
zFvnz51<cknO-Fvx>DzTY=L6cc*Gn+ENZL`H=iQ06(2{{!nWKx*Ue@CH+*1t$@65Jr
z5%VJrO~c3ypt}FI9<=UiJ{Y`Kk!@tqs5FP$vuu*3b&p}<$0L~umNA`&K3WPsjxRoV
z4W^H)^|4X8^{VrFARnUq;872`E-X!;!;y6!fxV|>4BV+<M1wzY;aBP{NQEz$2wdmg
zUpX&^Fuho?#`w+)Lpv>si!FSfAGX)YJt&+GkRSJt7ehJ5kLp_8CX>L(NZqo+{XuJS
z!pZr(sWYNR?I20aS%*G$-nQbjVtGj2(h38?pvbB2k~fNj5x^EX2XP+*vpe&~`-)L2
zkOfU|N@FgV8-(#~Y{PEd)-&SSDmTbtV%z0gxs1#LX$QAVzhI>IjJ~s(Sl?d3a8NF5
zup?piZNE@+*BQ5~gK@GZC2<x!cVME|WKtba0Sr`zIEGz3SRm1jK(-TbLkW0O4XCY4
z$>botv(in2BNK6S6>P1jbg&%Hc}lnMcb6=Ho&oidebWmAZ|5K9rh8rR1KdCoPr1e9
z8s@&rccPPG_$G`E_PEe7Yz+1Zm&VqaIx7>PQRnv?1gy4^L*Z~*sFw0kYIOkG37hx@
z;vnhFFJ8WI8Umnccr{y*Dbt4oPYGBzccG$wGlzCQ7<V}B+r$Z>`?JMOeQ!YO)2qS7
zFKh$4uEVu*5JKsq@C$%?w&gYJr#qurW0{uE^w`U4c;2EcvJ@t^scif8$VWNId@$**
zdr;2E)um_2`cqG8RIskv+n-Ri4%ri=3uPa(g1xlR7eRFmhZS98GsRzmHxN<$+qpQd
z>PYU$*Ql8g%K1(fF#5@`$ZTn0f^r=AB2A1Gxw~TY)GKW8K90wjVohfW)jYs9DQG@T
zqLvDlG59Ht&JnU72c*<85#3cT=*!lCd_qAwr15BF<PBKk?6{^e7K%R$^BA!5hEj4w
zSZi58Mu3W1JbL7s-)WsRn%i%3!j~-pu;&m?D^Tw^CvlHSJfg|i1Zt*Nt@`7e$V$RE
zL)1_-%-}2i-lh+rMhpi+U@HfXvM5*wmWzUmZ71)RJB2<;O~Bl(h!gI(+OM*2UZBw$
z$~Y;BuMgiJefIPnH9@JJ$^4$<h=;esw#|L6ao72F$+NiT-<eZj!i(1_r5cRgPqvY)
zm>PobC=Cqo&GgGdR%@#k-g*H1z+_v}A<&)}@QI)&$p3@@v@%$CX$Xk4-0qeKMoNuV
zmLvlzM=|<ktrO*T5$s*7jMNR?4Gz=g8au2B6MnbeT}{dls!}&?ikV)Rkxcpd@+`xY
zFQzBU|4ByX#h~Cq#@aoBm#3zsYp`PT|JbGUptMjY=*6&1>JV7cjvFBdl)LtgO0&Y~
ziND(Uzj<Xd>Qg=!yV&@0ws3g%OQ>36=GbI=#<etWcoS%3dbQ*+?t=hF2Z&-y;(<$p
zN&bfzh~Ob#H*s(TGc3jFl*j9I&UN4tb|MT)2XW!1$k<L>6*m%e<Kcj!D1CpwUP4E0
z#qx}qLWf}i<kfe!D$QeO^R2883kxu`=OAN{67OR_;(H0`#q=2-xbS_R&Urgf`Nj{2
z@(KH$%89&7p#rAv!i<<T;;7DUTCtUw=a`1Z_`)D&A?HoC3$kh{0pws+SXPHS(k}*d
zsqZQ>6s8Sl-v`ur79x07-7bzNuq-TsC_3WzqhL&=t){ec@nh;5dSlz^O!}spk6XvW
z$@m|x?f}bAj_lt}(bG44x2O0lk24XLwBLAVD#$)*sVhPQFt-ZyfX&UyOzcB^E14X9
zK|<J(RFttT@-@6oK8;_f3{x%zzI7Ek1{=Rlx!C|$v&g<sCA#$QqK3m>;8l^hhKdXe
z9xblK%}jDIqgbo+saL7Ih8_|SV_%ua3w58h4_t!)Sr%sB_2#(IP~~=&Zo9~j*VIf$
zXUf_O27<?Fb4;&9EvPygIAVI;`u}b#0(A}cJzzL9pg!SvB@P&M4xg|D$Ns9|Z^Jda
zYWHAY3xB*jtWONy@=mRq@PtZ(x3_qGMg+7#(nlh;5ju^M%kJ`HG9VAMF=(o7T^KA4
zEI%d530(8#zN+wIFdb%yU(g0?VMWwI_dS|`pv`^aX^*X!d;NP&=ZZ8(^uj;J4Sc=J
z<49zQ4&0D70!o3_U8=nW^^rKSzj7PpD<m{*9?uUgkGBC<Q0<zSyO%p03yXwPlZkhK
zevW&-<Na$Tu9NjX40Zg@J+xd1q6TRAB~}_4@Z*U51Pjw!8|jQkG`WW(*d6D5uP!{m
zllTO|g4+v0jzL2yAR^1uAQEmf;#Puo@JWk~Rbk~Qz(%}s0zjrQ*mI_m!LOv9GHlWS
zAHh_L#z1-4YK%XD2byWae$18(ZOt&+nX#=ee7`h(FJT=|3IH0<I^(GeWA$9LB%`i5
zxh#&NW_oZR(0=@e)3GmG3w-AW%BqFd`vf=-PsVlZc)RYVk<7d1wDn<INwYv9t&+qU
zBzO(7rP=hL34q(5O)kdbpB<`LNbf!s+)hq^Gt~Flet>s^XN}~?qUM4B-6Bt%zWt~t
z{?&)TPh0!2zCqE~zr&}Y@1FTfF+EudVpdyqQ2*xJlVlCIa@tvTS%vMH;9Ad<ZHp6I
zdPMvZ^79X%m@x5lPzReQWN|OVjdii)Z9D<U{@Z<$OL?v7+1Z_~syOwVZvXrXdV<Gg
zb3s4ij-brbhm*cTul~?5T}xXEVYj4;nI~l0B{e^kGqP;b+xFX&vA(kNb%f>;X4Bc`
z7QirG<u%F(ZP{D59ks1zS{>!sD%Q(#)O_dKnd6ZYlNHen5~k3ctEHC%x+)v&W5TGJ
zUM*$ha3y}dC(z*_D}5-g@#jA56z!j-Enr4G*>r%+nsGar1UCXWwYeQ2F(=&kNKG&9
z8lmdDP?PV#4s2@{P|JxZx&`DwuRvf<H(%$SY4!HEKN7fK<$YBW)7w)xi=kmFHH(bf
zPn><-MbY*eP}6hWv2Us2;scF&WEOM{jsL>AN8pLo+J`s)dY=-BhTo(+a&l;28Rb1v
znRM%Ts}&Bhe{#Nz|10`#-ei6p1=-KU8Kl9CkDVS)iyu*wGd4^{;;^`0)5O$XJM^$c
zphs;B>H`?vd7PCYEuUX21IZU>$L00yy)`c+-UK8+G5_Mh<aS7_#?dY2B?EL<%z`~5
zeHROLV9Erl1J(LC9_ca>;k+vsqFq9-3O?iGpPMeuww&XmQ+{f`JTAu4diU+FE|To$
zdlO72>e4w{^twnoHkBU)srFw!qa{EjtxR|BkwXV))r#m}`uslVVui(_CMZBvA`KOh
z6C(hFkoJJ^o#<ARA|{7-L<P@}ORh}VuBqVv)i1z*c+db&SK+hwVmm)if?H+6uOUfr
zj@(c0!=CKKmn7+OftS;v7VKa5dU%3U?%WYRlarEAOOMFf3_6(D2wL=1d;fk=_(wzC
zY9iSYrHV+!=_sc<g1S$5xd65ppTwBM$*)wP{WPV7Uldwm`5FEhAW)Ld(L3l1EPD>9
zDHrbPab>*_6W;a(m{2FpZ|({|`*_lnT1{wmH%H_-3JG=XTeRC)-5OiSKnV4>Jh@ZM
z+4n%`<VeC_XGJC*uuVr_W8EJ{iBfHSPDsLPmFhtiV5i@ImkYesg3Z416$SNq)-x6F
zv!mR@&ixId_X~+AEdg8I*o}!i;{ZKvn;2E}7$Ez!{gRPi2Ohk}{As`nyz^zi4IEC&
z%W@Q0FqSR*SXQq9i!zPxwSh@N)e-Zd_m{_1Uk+yMnr(_|wZAF2;kDwE{(SQjr6-x!
zw6^NC0<hd0*u?>?N-$eYJ)X>wK^CID_l}m}t1tnI>#1vNp;fh1LssI(X+~c0GgXHh
z+&@()1sT~5{h|+yu6JFAJ~0N^um38{YvvyHo*OnY*|Wn8^)XBSizCf_73)}qxC$4Y
zYoHI7CNs_dy_ys~$`Ge((LfigTUI>v`1Q%Qo#Y_93WbmpKCk)Eo#%}JT|X)be#Z-5
z9Tc*Hp}&Wh2x{p6;&((q?Y@8ZIWzoquaouQ*O~zr5rr^SoI%s;`4CNOU}XWNq1`@1
zf8s3w&??QO#9cQB)xeAjm(UQX_w;R1AJ%-o=iEi%8jn*9#7N)5UF%yNt6vmO4iN&<
zyr-EJ^WG8M5v8d6&Jdt<Gv<iIR4h)u-72lS(zV(>%b)yD&2sb2#aXGvp`|Ua7EWe%
zUl>z*WvmtecsvgKK~3H8`Jivg2+fEsrzcyufP2lEpU<OrJf^)o&TAh}^A47nScFSr
zEBd+QbAkFWv)6IBLSx>RaL!{*3lO~~RYcHEm%&7jasAh~<N?@9f-X%J9s}&`_@oBJ
zB!qaW0sRTLRF29vZiszuSt<?Z?G`4#Rq7!Klyg85H)Rm-`0i<zDxr~Tqy(ACPYzQH
zN5|^o8T}VGoVCZz7u4`xjcOo3$f=f+ay<R31FTJTck-1{I>VGv$$17=ozZEnOb}lN
zn@j3UwaPJ|^aAt%zc3X)3Mm|KPdyLfD4YlHCfS)K*++DrA8F~bmx!MV4W@Y42*ZpL
zB{sgejlZj}qx~UWh{p(8?lb4l?u!C+yyt$DvS-<AEdo`F-)qQ^s#i2;x>9`%V+C@2
zFGYA2WPAYYQWm=Bj^5Gtu>P4IKp&e$h{1H{e9m_soN&jGg^UpOqu)OD=g{jgvWr-M
z+stDKkgL*Qd)tb0<WS)gsxuS!9<@%hKh(RoH}?w^smw;Tgv~2S3FTzpeut50XNN_z
zq^oBz2lzf~gfcQ&NUf<){WIvW>j_Z06b9XCs&)rSY<~#x7A@Z{o%E3DdvLQvWmOTG
zR2pJ-pzL&&Rpb1teyP{{6=P_4v4M{1jiO|19?;Kr&9GevyQDUDHe{sjb*Op@$Nr6s
z#JO>CbQB!EX}3$3#uf&0Mp|zCS~dTtL-0;0fSpfCzl{r=yB^r`o<o!HAE2vcpZS+C
zk@X%Q0)m?KF{!L-Ezg9-BNy3b1e-h~EZGynx7JIIm?WP4G3|YlcNht8B9h-J{d=nF
zy7&7UI2LR2aSEd>+Q5~60%JccLV@KB)RG}_>s@l1s(&1(U+rDLL|7WS0~HfGuabF`
z0{#7vU{vNAn9&P>NDz}IVTxxDw!KSoxkNc`LnPvfRaJm{Gwfx>bSp8H-WK&_jDqu#
zvi&70tkAd->$;Ky)Alx}wRNo;fCO!}>0KV6_aBxlRe4p?vC3?zNl5cx1tS*a-VGi!
zBGmm$DR%<D#v2Wu5XSd!<s68v3OsC^tH!UlH5G;4js+1_o|{w&zqcF;N`d>6JN_wZ
z1XNnWw<J8@UNWEaY87#;`@AXzAU4griqubt;Qc5Z78r7Lhz}=5xiAf*26u+^2zvq(
zD!T~cj~KCk_>(f@J$GQNXanv6aoHSY7KnK%9|jLH%uLfU@q$dPe2Tto*C%}d?of_Z
z-ak<qD$K6Ap7$WVo%it3*%nQBpOHr0lKf}9{nSYNX6>@1g<eT09MF1E)u8ABrPROC
z7CQW-7U{#5q2_(vN>C@a-K4ad)hcwo77i#d;$$J4(%-WsR?8gm;JYM7e=BeSVLSXF
z3n?ryL<MY$0!?WJS{Y<p$`PcaNKj7eeDW-vxb**Tecpu!U|RPrrHc^RY3~UCp4*W6
zRC=q-dnOd|p*l1=ab;&ItCC4ExezgbBhN_`czs3FIQHfN9Y+T+*CIFiVC0LsJ)qJN
zKm-du3J-hxobp2enhZU`9CwQlfdBr52c$Qizm14rMwsuV`khO!{{l%=NQ7UhDnk1v
z#N>c&D%Xk^D{DY~`{a$NK}hb@P6$TdxUyaSU<xlruE6Jz7tv-I)5hh`{*LXNz@#;I
z>2Q=aCm_Q80p+g%e|-B1i#c>|&B;cy@@DmJo1&qWf|^|$mk6ffJ_<ha8CXjV*tJcU
z+v)I&s}okY*qiQ=3{=e)lHoE)E)+YmRctIWHi&{l51~!Ov$zdle5m7QeCQvocFa3*
zLZ}L6D=_Ww)`LD&mQIHc(5nS1-xnb$)R_?H;3C{H{=jKX(kpkE_CG_;Hw7#l!|v7D
zan<>3Td_1t-I=5-S%EA|T~5g#Zh9T{HbZZ>L7$~$yKrlh1e%s{T8MnerLO2u`W=`N
z>GCl0+B_s7Mn3(1zJ^*8nLzLpZvp?|6fxoADnP%lQM(ogjtEL~-XcA^NW}w7lVG6P
z#GxS1ia~d+@F{xA*Mn_EZuY8J0nq<iw`)4-z56t3M_hwDCtB@j3CjG(9bR`(t~mia
z#_Q#1`G%Mo+8#}UqzpHTlgS+JNuwXYR^79Aa$C7!Zv!IKVmQ(>&8{tZf{}pp&H6d8
zo~oa8Ob&t?ydV9CiwsD9Qh}vyA&5zWe2h6QBaEZzBdmA_f3o$JI7Jk{_y{v%hi!fZ
zu*q7{7dx~%ZIzE1Mlclle=Y0Qe?QIN+HJvUf+f?^2Mf&?WYIoQTxANUksHmq<S48k
z1LcE1Ys4lY0;EUL^K?2={w1ppDz8D~p^-$vJ4(2C=$}AoK=LyK!P^oHNAE78SXR#k
zn*3N4*T5p-9!bt2j9uev8Ei7_{#l~uaIc4#M;W$(PQuNAwiV+)2RZG%3(o@%l1`r~
zm$)l{_mA7b@jJZTi1ZAInUmh4A`65-Rn&C(F7kHMT>F$&i(oGwn@j|GKBGdszTlw=
z6!$(~F2vvo5f?B`5P&8L?6JuV6E=U>_o%9n?wYmC=mOMaDENO|7pdPH9C)M0L-Q<r
zTI^cpEu8_D1Z*9<z9G(B(cdc*12sP2j&;!&XJn>U!nt&=&f9#|$ubi=GqE1my$arc
z7SosQh`Oo+3nvYkx(psUcNzZ~g|4pv;2LlCk@fpNT!mA<d&Fk=jMlFq>1gxWv5V}Z
zPMQkI)ceCk6Tu+y_g%+q9MYl6JarFZm5;tUP$3|c5w^_<sb02$RHMEYT6lkjbw4~D
z2*BIvxmmweb~;tY1U0`=mLfR~;6pc$TPZo<BJ=n;EhTfk^HOK<W)$MzI83)K>S}6)
z2KVY8z|YkD046TWA>}hlXA%5czc2ME1r#j8<UTTa)2t&it2~cdWxm0{(9&)0iL@pD
zk&^Z}`;ng{oABpU3Q|k`WG+}UpY|Amp(7@I_z*YPg~j-(m@j1*zEwU>3}=EqIlA;I
z=5?#_-=Pja2OL=j0@eeF_!o2uCY2}Y9;1N|rX6OT?_bY#K)ko<`dP})Cru4}>l+Jh
z7SkL@-M$at$+Wi|D~w&hL#>QmmZO7y8S>XO7xlf4f2N|GM8(B>zJgK}%V)f;dTFdO
z44F-INpm$>w{yb`sf_4Z-z$&eFM1;-$UES{PZC}x{NZ^kG=M<;ZP-^e!!YW0mSsXp
zAwZ$7@K~Q8BUm~~`PmPELi`l=)*)N*V{08r|F1Vib6pmE>Lh~`FR}`*GhZUGWz@P#
zPs8f6;j_@$yNdH<V52pU>U(`SXO1S#f3Y7p4#?eqacWr*Z5C1eU^PJnjp=_<bzsye
zxO~Fjk!PeNhJ*Mev4~{>|B;0+tWHo6<ZQf!;j~fq8%EpN5zl2CgU0>DS(1qqQ3wot
zd=5$dBMd;uuJx|&!UWNnL-e)abm6+DOX*u|_mvCa_1{)=UjXM?`H4itp&&7)1Pis1
zzzWgj@~3CsrH!Px?ZHhAw&-R~$+oF>3*L$EkRKkemT<kza|fK^Ve0O+p8<`e^l<90
z9-{SJ)Ds<+;~l4U?!VUc5lG%SHMeWz)^fe{+Z=QB+i#NZuAv8alr%0T#H&?{5PStm
z7a$qeEYVYZSZAn|Op#_P^c}9ck$%$Lh7sUUEej*{;gvW>kN@!PU~w%_umV%99p(RT
zD84z%c=5>P%wQKc7E3-QEaDM)o6;Ga4I>BF<eU^>_k{S!V4Q{z#-dP)H-T8fab_#H
ze?*?GX!WCO%A}CiUl*7IzE}RrH-G-hJ`bI9x?JXT=w-lySs1@fR|qaq?~%Mg6(*}(
z-VAK9HXjR{kYY0MgQ$<BN~dE4m)!0qB1L9mqB-ZrW<37c6i2ty#K!vUhpN>2bGZ_<
zpk2ypw9I$^r!Pf)Z{r^{(&b%bV*{9W`^Ptet6AuqEjo-(D_MwC_dZ4QsUp4Dq^PQ2
zk~#)32=+*z@FWA%Lr3Q{k4Ld{R7js}`<i}klJR<;kXPDAyj9zi_W1Pp&oCJ*{t-9+
z;DGITqhRm|LsxI?-rOoTeKOALeMPDJTTm`SdV+Ef$YN&MmyeILDVfJjb|sxtVoc`b
zc1(M25Ng~lLggRX?JH~L;gGHtYx`6e{C7@QCb(NV2yD$W21aaWbDuM%YIoxUFu4JV
zNBnmF@-9>HgKYB(z<HBjzWzOE`@u;4A#Cvt0JS7Oct1cahvSaP16j2ClF90G10bAk
zOg#Rj@#G1IG!l~7f8hqG!vHY;7&qWVvm|MAB-8N$0+OK{gC-B`?8%_x$<QMjz21<j
zc%+P3Vl@hk3wBb^nb&jlsbsn{PhTvQ3Rt&4sR}mz+aJV{xq#xC6G>LBCtN%ot1tLB
z#&|s8i5f`SJjK23;etf1vD7ipv~w!Lgq%gCW0~g4xIZ#gi`YwtI2@>MPiKa1mTPhZ
ziekN7V*0~^r^D`aw=;h=%M<+OP4q*t{WBDmaVHf5M4%N0^S=E$;!P-o6x<O%Va#x1
zAq_r#<Ko-D#5fekRW!3YZp!wbsOul_oer2&eny-TVNA{+BRIf;C?U6}0CN`G*VtR&
zFYFC49V^&swiSjU9BCh4O6kAeOs|5l&E&aEK6eds6H#QDwt*xCJ^tV~$M*~~q1*H_
zhmwgvn(Egp1Fyo>{$JSA_?J#CyFc>XS2w0d#2C#=e^dW2<cWvUyzvCy#qlv+^y<(G
zf0dwbYq~O2+uZNhz|#&*V36eOWF%Gseb%s&ZC@30I)KZm{SE<i*bVZ+Rx5xr?OcQ8
zD_eU%{ySzaPcNNS*7~JiPQ6h#q9l&nDNnnKxj5$!N2>jBg5y67KpfDy_}o@Qj2|&d
zp1RS#k4c<MsVE9NG$W3M4)x$gHxRrX3P_<i8Qq~Oe#Jz<S#)%~!sT0_U~oUD%jCoB
zrr#b_z>vTa*I%J*6z%SI(xmsLyD>5X=e5n07;b)>z{6`e57L{GNAi9xHaRXrup;nC
zKmGYNd{{CEfIR<I#ra`M6W`WX`O+KRJMeV>rABg4ce(eZk=#kV)#+<+hvge!P{??M
zhQ3f7a?hMNiZS((f{0t6)N?}UpxRN88}t1HCWqzs1$@AoR{86st3GB%`S@3ev>Wr6
z@vG*m-R8~ntHlF?%fCagiOhGjJV&OBW_c#w;*)1E*EC`e_1bVT6^8r<3{7@0scx+T
zBn=CeHrt9)*K(136uo4>MJtms&gh}#Bvl=j3^O9hsoISnC^;`nqp6}kVzThG?;r3J
zSwC6Rhd-n(_^TGi@pHO8`>Z9=i7LLn5`w5NXa#O;b+9xy=CiXWWk7^FNauUb`5a#0
zVmsQ#`6<FzwyB9WI!IYnssCeXUKbXnmz-}sg(-bKdn7WjAhtr(#n+(r(Cf>D6v=<y
zGenOJZvZ6B{ZB2cS6oPf9y-tT$eWa&OkToVngtfncsEkxU(pj}`dU+z4LuEy%pQ8|
z`C)~VMUC}`3@oi7_r8edv-Is>vR~z_eqgG*s$5^#+o~{NdGmFOG@J64e)$w5zyn}|
z2uY&6|GTp5(j@qe#GS!@!%(^1fR3tjuN~UxwY`C=9TSnJ8;w&Mcn6!>D%OzMfRC!Z
ztT;^`P~7y$bCMPV`^rl>1H<rF%tK@!(@hM<{jPhRHq7|IRG5VI1&26GuRu4$lrbFB
zJ!*FRb)WQEjqx7=l!wT#;fX?G(@d;dvAyH!XB!IUt`H%b?YtlNQ;DKn41Ghp<tg($
zgf}AI%8>}}&SwCjQ!w8gEG=KOmz7+X8+hTb{rh=mnEX;Do6rH}ap8?4G5q65v2z+G
zl@^A|LA!jKSaaU8?+vGd{s2h;zyRJ+ek1a8sgY+E_-;lLTM5cv@^#5)Y|T^Z9h!Tg
zth3FR0jCe`#F%FUZ#_6vdg4z`I`U9(I@5Zs3{}U>i2`p3m@Sv?cx*cc^erUy>O~++
z+_DQl>n!8EcWBEU4XElH&1(x@hZY50{CnK}?4TGq;W5$~8>ReL4EV40O_DRn`K{$_
z5W=g{#Or7_Eo9jiKV*|4l!ZM_cM!yQal=^5CMQXGHK)L~fV(2K98p{j{J%_!6ccWS
zN&YQN2FlLig)ee}L=^>>RxuU(E80(5TDXwn0GL0<+lA@~C`yK`%vf%=mGzfTx7Q|_
zYWBvlZwy7Zra-AO7FWfzILCQK^^b7S1{1dVVh#klJ{UfFufEoz#_4W48{g0Ugico4
zbEoBLx{{UweXbtMNcKt2=>NiGqMtbKNbf|~2la{c^zxb3velIgUDo1#Snin_=3ab@
zkmUf)jwqB9U}~5M($mAjyJ8V+#`O->De8ke8pSt-%0E+w*Jj)CBL7sCTukyE<*~Vu
zYZN?WsKoE@n}prs2~3(@&9~S0D4SnVk}V*q^BXw@j+;wN#i0m+8ce_MJ71{k9}AgK
z35_t^WY&ixd{#(e9&1eQhU;i0ug`d>XoXX2<4@BAp2Fx3tPCI+=p(ORPn?YS=hHsV
z`imzlKcAj#s;Xf-L`5EV(a}7?^BumxJc7=_H8(@tSHg?xL<E*1>upZXGJtK7*YoFJ
z1pz|%e$85G7yrisFn85>X;)QZTUr4P0Z)zc_5Qg>|6r%`?IxpqEDs=SLyaeU$nt?#
z*V^_GY|H;Ix&oY73l5H_;$cQxF);LPS^~U@$S&qqhM4~4W$3_Hwl+M2Zf%Qx1^m(Y
zBqqk33ndD-ktdd)BR{_hi->y$No-+pl&9($W?XTj3}fON#-PM!I1X8^Y#=agcvLLG
z66it0YdP@t3G0+qRl9mefrmN;^Zzb(8ta}GdI5c7jc|f2GBu(o=vl>#w30(@<NG=e
zey+h9Kii6j1_pv!=8ouMGz8wjgjeiQbwo>SW>4s5?ZxJ8cM;gMdA$z%8(V$xq-B6U
z@X-iq)lzm)ImdsI5C}-0Xa9M3Rt78Da`dagXywMKodqcTL87Uq8AHjF2ro5-#7Aw8
z_HM`-c(NT%P+Z4=^70AlmF3xIL?`ISWp2!ARwc8Wmk%7i=BV|WS%Y)Uw?2iLID?U&
zyhvb+K}OLkr~9H_*hO5+n8EemCr85HM65sE`EK!yEWM7McH0D^!Bmhmv;IAcF=j(i
z)#V493GM&G*Hs2Y*)?r!5JVakX@Lbvm5`S1?i7#~kj@2B1X;R4x*J?N7f`wp=~B9p
z77+aI1$N<i-uHWdv+UmI%*-{{Tr>AM^WR}UMrP%8<3*|+Glxe0j|Aik4_1|P_T|D^
z)@q-<(VZQWyjEq?^T=??iXhM+xCxSkcKRl*Ap9kC_CeEy^pQ3A!5*=O`_DUCwW+eB
ziJmxI>@olqVlPn29-MromMrRMT>r`}?vPkQ-oo*OdO1J*6tc>`sayWg0JdTpJ(B9h
z21MQfSG?&2coY3rd`a?sx(82PJ{mmOd775R0J;?vK~Y&kv(R9cPRtrUwww-{xt_|k
zl1=|Dp?|<{Xf8|WK>(rnAF8Q`?$Q!SRooNMZbPB7Btg1Nw?eXR;eESnFg7DT(iPS!
zv_#rp{!zm8u8o_>cGIELYy}ZM@y=7=m>{R_oX1MDeaAWZx3tSK?<<4?LI!W~vM|rG
zE*<=gh;NbPc-xFM@k{I->$$h29^tQeQndVI*^3{~0(teLY8dZ45XsJ{dF47W1qB@T
zc?%df?mBxZ`3ISXKrmTu@gz^C1<ghawQq7iU}79PwAfK7S;(XFGfypLToel}6(e>g
z3!^O2Ok-d)m+WHq&}=r)r9cpsB(|%&)TyB@xzyND&O~=mf4Q;VNZ)Rop!(dPFc8Ks
zHiTR!o|NeT2mZZ)i+~kSV11qodaTJHtDHk_T$W&ehJ7)GyGFjeVY4A32kG$I?(w;x
zeu~6pRfOVAW>TPkR9me<D6d(E4MLAZu&N6qCsaCs%l;-z&Mx~CNo*sCDOC4PcN;0H
zN723)Jdmcp6Kw34h@ld@@>d>$%CC2K?YVq2<s((^6W|D>_kF@7%gdfZDvt@>dk4^4
zZfBsjpwIPf7pQ3pPdxT3SHC7EC;%+oBqJJ$l_sL7-=Dml_L-)^5^iCSWGJr1=@T7&
zRBi~hnPU^)QP{rzMWLeMJ1km>X7LB3f}JomT);C~c|q7lBN-MeGbpOyA(_6Te@ZNX
z7vteK9G>Fy%0|QoCHR56$MLlPx{!nz>#Xi3{-ytgoE>ljK@H#a_w2HA29|?Xfj+bp
zc;uWjq}6JTpM*gveH6!XP0<3-w%?YFF7hM+OUu;Gv?5^2Z+%qw7V)19)k{>uS(Acd
zi-6@-y}v{yI@3{w_T@|)RTMNMxIJgDIh7TZYgOWhu(hqhC~&_#$x~Hp)OeL*MbDJ{
zTj5P&j0k92TG@$4|BgcQ@6cBkHcZ}}?G%o?X_Nk^niDkm(&A#r%{!$(qtQ^)VN6(j
zeo%E3+~~lDb;_E~S+5=N${ujAiI4YM@?!B(nk+-L?9M0PxEbX25?xQ|=Q<+idMzfN
zDQwmQwe7L$s^Ojc*l)j(w5zp#kSI3%HWDGc%P+W`CH~x3DcnJm{ciTX6`Jj~>AtKm
z9#Jx1-u_XF{8<GX4`b{$!Os^b8)R`^#ilj6yN09cpqX!z53{Vsa(iN_T+IyDXs|@@
zuqVT1DVL(0sd7%jm7=x)+EYb8@dY-=2e5W2zNn6}V1{bmO=~dL(8T7cmaRQKv295F
z>^*k?obWX*TnaVPC~EQgHw_*mF%pO({+1W;H&ng!TPnTvMMnYEC?*ZU(4(X^@mM54
z88{d=3nX&a^>fs;CynLHJHb*Q0koOvzEB=mI2B<nD5x2axQ!(`+<fkyY=WY%<ue~s
zR#H<75#5`<RbJ+wg(g#Y%2YiCkm35WKsbqc6S#N54Qvskee<mY4?WkblzbXa=y(jf
z8M=(TQV^~W%-VV%bf{NN@F_S`1Ir0G^GdAD(kKSjW490jYXt)*v<84KA5vxo5X9G|
zrLO5AkR64*08&;2Ljz3PE010O)EbCXnxAsH6Fa$j`&0oFV$P>`If^7S5q!!8LO|z9
z$6(M#=0{Ohd59~m$ZTTYXH>rLT|olrx&BO*B}dy5H1WRf(W<Dt*+)m3=4F0|w^|3;
zBiem<5@cISR7pBVj@_y(T{V~2t45y?99<mAF#;&0{#$|hR`PB(s*x%|rE_Gso*$hA
z+V6$NvKiBxhH>|zg$sFeG1B_1X_%l2Tc{;eFW=U9I36|P(kr~x_H8yJY&+zccdCk6
zDO}G>IWMM9`9;^D`HonP31_W@^IP{myWx)kIkR~G@MbK}K<~3gMcQMuM?sD9TT9QX
zC(1J(78yN=bZ!NyvTrG1knFXRFKj9_Mt^A-nUPet@n@I=-8Bk#k!m5wujRzw>h;$r
zekaQwo$bQwDx*mg-qBDQpfUlp#f!y|bf&Wj^%X^NC&TBzMqk!82Cu|36nEHc8+N48
zS58b(;Tl*(%!%`{m)ocNk1_V)-nSWtp(O){4R(CsEGv-zdTh|dRu9J8Q~cwx*UhLJ
zNW@it#|f>Ep5NtO>yyy7tB?4Eh(7+jl5#^#=LPpil#ch-ShvKeiR30{Pz?Opl4^z@
zf4oIGy#0gzrO{oP_#DnAK>~H*K??_HRozuP@dUn5-Cg|Uoyyfg$vHEc{MN?qWhKV7
z1l9b;B0JeGJiNJsxSGP9y)`o?50k?~KdTh;-?Q6yW$*G<5Dt)gp<E|Jxq`}%auusW
z`)X*SX&}M8sf2JHAf4$=X0qjku|@2wkJTQQZt>oRxK8xNFYuVXu-*)xHV%^ACpFzt
zTL{<RpK8o&1(hqDyk8ni%353!A$m*{e@Go}-YGs}uV2*LGiqM9?^nm3MO}jR_U!k@
zaXew(MStF4Nfz0>N(hMg(q3AapETqy&eFI2R5+P7lHiK;pEhGBx7gc|zbEW`gaajk
z?#w5>PAb&L;oK#UO9KuPB~>0n>9uvA-e~&vfwa@Tr>2maPKI_cP4e~Wa!!BAGZi;0
zVqad4`|z>B%;CNaz2XPxu!F!@RL`Wp(M&b}Oe<eHM~y4%L7<h9w83T{#mki5%rZUA
z>MRcWuBq4py>YXt##dA>=0qzrGqLP94cuE1+m$6&?;#XTO-K7$TIPi@t%ww1`xj_}
zM7XI^W`4xTi4BW9iNH;i2Q;rO=-2#wMzf_xS-vl+di+-8`(0jl;%sGX;mXzL(BnsO
z*Z!wkVxmB0qi@@@6RI%{U&AF55GWF7%&5+BvfN@|MJG-{%h>A%(ok>TQYJl*FO!*Z
zaS^a)aMWx+(kcPFo?7T=a%jS6iVON(x9TBEvc<B2EY)D$pP{MP;Pq$8uw+96ab0r*
zx*?T!jfZqkSzFs=>~WNiMl+^A*nkZrXKmE~4p*>0;S+F8-Kwf|NWi^@*hx(3mkhK;
zv63Urvrl7qU)o>Ex6H3AjRvg@4v%W6mn50oF4mBH2ovmU3cY7D68*mMSnJ#K+_`&$
z>o~{o1&z{PT97lapw1aD{EG$EuVJ5>sit;Fp>Bo%rz12avXtRqSaD=^mJ3$N8Xy|~
zQ84CrTe^(}Sl#N|U%A>cM>@K_u!`a1UKM81foD-xoJ4b@T@Qgf%dejL*qhz!Hn;Uz
zVxwOlO6CV@WR)ANnLiGWg80_Dy9FweTHK1yed)br!Xh8n{4QnK<8T6))RO^?ZiwJL
zt!!*QNzd77>h5M$t1=etYkL|!YLe^VT2_kF@r_4bj-INl3-+PLYOH;zmLt8Q8x<X~
zOJpdXo^R)kN1H1ew_wrnuql#xFw0o&tBc^VAO}oS^AHa(O?b**XeLhOpYRl?k%N-L
z`~mZLri_)foe4VJ{?So2J>BN!TGBm=c)QTw@yxJ7ZPJMW5AaQTr2D=2p(UKF6qhwV
zuyx4rfh`*v5k-J&NU=LK*#4SNL+)87ZqwiX4H#imSe#tRri<P_B{i<LM1lM0{Af0K
ztxQE<y~^@7B)ze0q&#@vLen{cFcWW^L+{2J7{H_FjplW;IWQic)r$SIwX%rDC*@A~
zqm_z~dE#Vi#~wacx2nI3lN=8w=RW;uVXw`wHl_lDnWW}MC4$wa+O!(JwRvaT{|U>!
zokQ;*`;<-38?958Xy{|DWnjQU{kfS?JaegnHD@8COYg&yaYq>f^4GPRgw!F;18f>n
zcwaX+tv?@`awp$o0-5DacExdoLkHR_>NsO(Ys`vmHG%k_Ah^+_pg*>%*>+Tx?>^zD
z2wCy}d=4^jU$|$E(m>PJSkdab*r(O=Ky-^Y*D_i6@kVOW@Rx}Ry*v4jBB_K6dL8;{
zh4m(C(Togs|BCzXlZCJ3`?ke*;^Q|>wrTSzD`BkJHtYB6ZfoS2WegS=;v_&7xlZm!
zNr28wUr&yfNBzWjbYPkLzOazG#623;^*J3qtKpBO8#c8|3YBFkYHMRCW>yJ_H$7D4
z{;V;P$ZVUobSFPCRkSfrJ;u@8%^n$)pqCtydI1d|W;;^Zf?4GDY)^;llhEWLOb#r|
z(uY64Cg`31GV6Ecm}<w{3L4pXYq_$8^pgDw`Z1c}x7P?{X<;rlRew)(?-OJ!vQMfI
z$sW3t3s&CrZ3D+>R0*VrQO8=o%eI<JjU5P}=y4a%;5_y_GL7r=?TY>BjfRNHTV$WJ
zqtovSha5Le&$Z?S0KoPuVVv7FcKuaYnLWA+Q8tm;RID*Q2(wC$Y$_RKaDdsAB}>R(
zCn>1kR@@H^ywXgJr1vP}h#o>WUd27vR+EEUO9Vjc+U=^+1IdP)dkk~nhHu?$v*FVt
z!0S(EaSP-kEru&?J+q0kW^5b9M3~z!C0@dLaJCcMUi#5#AVtB#0y>8j^t0>Unw#i5
zFeTRBpJ;aUY3}O}B273cI9gy-l*9Y^8~7A07|bOxH){5fYY<kZPq}exajql&OM(iX
zra2)|%XIeZw#m^QaJg5^?JJ0Nvy1SPx~P=JOZeN&4Hj$!^-!uXN`9UhZ4zRi`h1T8
z)b0ox?I>t#kM{t@2Sw?3<}*=dN2o;>z+&F%FMYXdG^H@9ic6e{Xa0?f4e^~=Z|nO|
zzqcQOKqPAsZ!&hvBGtTwOTV)Ta_vJir+=z;!u~9$CaD=iATc+Zcysb`+Ubq*z!tY$
z51wnOFye6OJf`>F6BOF-Z?;jWi!zvTWqx;_wSNqv&f?WZzSie_ymB<p(zWbWN^H*3
z7gf2O8KJ2g(eZ4QuvY8XiLDOz4{)&3^4cm|6SFq}Zmoja8{8+o#)Yh0E4EASaHc?t
zhbQT4QEbJqF;?bh&m==z@bt@7q)o=3TxlD4B-_u{`~`rt+V<CznMH%68qTCQng-W*
zJ`DD20Os&AAG^CY-jV>xbTtqY>7gWb9n~~_@vBo<n90F$u&-cX56q=rTDLC8WKjEs
zgKo${y3j$ddVtCQ>%H?I@?=A$$FCh!shE%b=CkGgn#4Bc(3<7FC3BfWHmst_4u_n2
zsD&ZZ*0v2zxN^rUrFf~SnAF_GzO_{GNU{`;&%26e_*@F$GvihbM|yM%+T%<3yh0-f
z4}PcP!uUl@O-Xvm+Zu2zo<0`k<EnMq9e%uOZ^d#jQ;>D9IShJ2Tr)5N<1d|Yno8^#
zU6kH@6;kT8oi!CYKI#Rwg2}ZZpF5UJPGeD{C}_!LM1WkLOp}i%f+K1LH^wTlNb@l@
zCto>-x(@-~u2+h7?C;iek8HZBWb)&<jc-_WYV;u+&I3T`?+cxU{vZ(g)vf;y{dxwR
z;Qb~lKEML}+q}?@uA|85rumBSJe5h&7n=%>pP|kg@YQH1uZ_|+-3nTdnV$O9{2kB0
zEz#^j(7t%{AweXtmgrII98v<7{CF6>QIX|VOlO|-v4TZAyJ65nq^)Q1>J->&#|M^N
zqFFQ_6CHM&77(rbM))-bmXZ?S(r^*F@7P}59eAhTjV8Q%U#09%aihcYZ_cONudHx+
z`+@Wr-E&*=^7d1mZGUZHOjCSmq*^3Fy}(g*f&!izTgKjof>K<|j+JlsPswBZohqTe
z{=;;A_1MpKO*H-u$gSQ(d(H5g;b50jH8z$k%}v00St;duQCV5yH!45hY372yCTl;H
z7y`UGFa71&WUo+3u($lI@xG{ORf6)^$0I5sxSN#z!%hDEoNpwr&(^+a>no9714~aG
z#ndq28(TTUfp>b+(IN8y?l*p#$;aPlo0#ejvocqYev^vkv|#54?35;ft_Z}}+N%(b
zde1QCIevht+#fK+yQgZPcI^<!e(62OQ&g=8xxa!+l>DQ|32>UGMu*+A&|A!Mo4EQ`
z(m5HZA#TByccj?jWzZou0)6eQ31yO;rBI9P+THw-LO9|8fI|zl*vp>-k2%QC@5=uD
zb(Hn>{rY1T-h<t+EKKqX^g+2^;Cr4nlqr-Ge-Ym`uP&b~FchpSZG57^p*f<uxV<$(
zH1l(k+2wHaYk$8kYy<y#b?n%f3k$^u&b|c#e5xcurh7g1$2c{l$pM)d(;qms7d8B2
zNUtJo%Zw8ZdATlc)@)Q1V|w#N7HPA4eMMJjU91$CH6^e~e>vAC^nSx5+@P9wDcriI
zua`1qo-Er&kD76HesAu9bHw#ew4n9)q5Ll0+iaf!972y&HT=EjxFKUzcf$MND6EH%
zo9Fir9VH(uq^t7(a6uC*$Z*!ClkMM-!@FJSoY?$2K<hL`nmZl}te7%D!*y%EJ>F42
z8dmh(I2y0b;t0Cm4dsoT&ZuJ)rJ!;o*#51uuxnvxr!?ERug{F24nN*pPia$_PgVWS
zP8M8rf7UW3S{u|sY_4R^x2rsoZyOpj1Ce2SEhGy%*(1FBKE6_Kapu{kPBxfZg4h&}
zr=>vUB}wScd<4X^<j<M#>2$HDcQ0(h?{~(nG;78P)fD}F=OU*gcQ^U=l*p<a!xHLT
zY+Qbs{FYQ&!DX1IlA)6j%5OVhnHH{PVC&m$ZJfbxRyt6qZZf7N;Y1jqz^6X)%TQ=f
zl5NjR0Xien_Al0*vKG&(0(mr*UA8%scd_{@OOm1wTNO9^>XZTw?#g>79*#$FHkerK
zFAu@-NAL`P2)1DsT(offxzChGeqI~CtuMt5){@?OZeei5&;_jN>+r*<&;fW((^<x3
zW0UpsO^01kd`GvlDN8Gn_VZ5K(Zs%Cjni($UE)I_KPT-u=8}FW0shfqLz_hD63qh-
zuc9{({bv63XA%xj6Uf>lD?|R5k3HjNO+H)S;Nnv4<O}~`uzA<ef&S?T)YvNW=j*uM
zw#E)^4LPZJIjMt6)a^-vMCWkMP`Lk?oz)mq`asb{GY8Zd^v2$|E_S@$9_y^e2pkgK
zWYmzRFUM=i>jCI@()~#>i9`jw*rYecIo@pUq|zs!UU1IfFAK*}`i-QywPaUOt7R(E
z?<v?aa3&)P<{vp|uW4QG@D_*>UWGJqM@bpoF_1PWP7^oc%@-J4nzbCDIBNMz3^QkN
z5ReD2>gGuB*<Y1UsB9EHF`eof6e)xgaISL#{%PN0OgaW2;2lnScaQ|UAm+4~*wYBz
z_x;N-4={0q!Z@Pf(obsbOgoVshh4x{HzvyLh3hlLbm8NzNDcHhm&2N0sR&_!8)XZk
zxv7crd=fic${~haL?OJ$t@w!BOY!MMUS*t4C#krMF)*z8Rmz<?hdfyymHC4#Pr?>d
zo=PXosOJ86`c4A!yJm@Fmb)DR9;(38I#M~&|5e$BNy-DP?Oz@(dG66w9fVqv9lmpe
zDba_=JwvDQOP=(+CVOX2mDJg0*ZNB`$3k0+OFRt`vCw?G^LyqJ^cGH43C2Sz?LeJ{
zUt@35SGIqj_#<d92Zg7TX9rWo-0EYJ7Uzr|+Da#HPOd?GjhJq>w(qTehF%F;wLU5a
zCNKID<doufgBOaU$46WHBRgS6#$d(?I9jg1$2}``;((o&|IeL{csp-9nz2`zv2k2A
z{!3*W5IeF}yCXU@W$5Rk96Ah-b$DHI(Kh>+mRj!0%6c5YGK<~c>w{{Xx1a<IZBlq4
zK66#k7E;uD3O4cj0IeyTm^L_G5=0cET-PPgUq0WXjg1B_Wet7W)6M;avl82(yr=Eh
z)pSk+7?6XgEV$+etxs2z9CH)k;q`po1j%c3A2BusD?*|uy0{Uk1E^{8&-_?wLXXzW
zehM5~vo}aX2warF10C1EP^|mil8{gp#8d0&vbns85qkJsxGD?8X6s6WuFEP}X31Me
zxV_0R?v@<Kf^-ND8qaZxkx$xEr0pdS@3kB9g-Hc8OUrEeZ%FM-`=6XXALFD+FYBT9
z1B#G^l3_;>B$4;4DIY<H@I<C6oY+f0rNP_pUKyNcx<NEtp0IzTBmarpajcarAMD8X
z*&a=zhm*^%hT4DRrTSeg08>uH10&n}ZMJyHZ93q5a^)Z%G%CL0n|`7|U}5%*V2boF
zaPg+c$#%K?{(|<4%@?X_7=;IMAt@;wF_53*Eq{D)q_B$d!@|d_7KYVa$LjnF0|ZGC
zgmQ&4SR!rE;nPY~5&E14CB|G4rfi1y%+>gARgTKKuxKrKdS%Jjr%V%d+>{pOsq=m}
zeGpUj5V~8lfwTIE-R-!Ey)T=D`*36_Oqb7h8A||BxdLJOBkRoNgAO4TGgU#sYLn?$
z)Ua$th=NpjivH@Sp33|z5R^4Cn{Q7shui8TIwFjr$wp-d;n{EN9j>N-D55%&%QUQ)
zE6ClQH5;dm9|yuu<W2sz^^+yj7xFToW*HJlz)89pop<~0h*Zn;*r+mm$Bls<KTR-{
zv*0NHPn_vuFE(*@Vy+>R<Ekvb4QpF}NkF~{!-is_W;X!V%eg<AM=2jg9{N?z4Suu&
zL<l@QUAL20bibRXdlq;-^yKH`du9raq&GB~g1YaUJ6}k4B{DzaZWNZPvD8LhG2ql&
zXd7_yTDF6IX<OKdveS_J6I8cbXY5Ks#h?yJ+#3}Yd|^$x?%L(z1?=PNNEzBKXl7K!
zwazO@bR&Bse)X)LtMR+jWgp(S#VE|Y2|ier4nOo$l|9^OSA*~kQ9&S2-aq%|*teCG
zi)Tp;&xyT`9*;BjfjJAgiTrGyp(*N}Djgmy@r+;kv%hBT*u_jmfeK)W-Lo3BAV`3_
zPfCA9!m*_C(pZ>38;m#ASL#q@A!ThW7Y+@;acyuFgp>-Xe5#)mey0__4KaKO^+qbA
zqK}dQmw@un&w&R|BH`Gu7x6~$n35{;5WQ+{ujZtSJ(G0m*l<4QhOj(+Q_#w~jvB^7
zt+}Ul*$c{#8x8k*KOpKgBeb~G9}y6}6;E2C8yidcGbTTFVBgwUL(QY?x|i(^cdAJb
zlihsumm;`CO~*g&;Q@uD1=C*07Ho7!ArtRlxPNrk_wJ=VG(F!#cunkB(r9BYzq+<D
z89(WMeC%K4zM8E}--SIihfiP_SWy={v24&JZ2WvD51eIyf&3CPtXA5D5gXmWT4RIw
zmOC8BT1$4@6O2oFrNc|235@Y_c%;rJMb;J`kdcvAiPWk#lPMWE!%>Yp<@zTk&QoRD
z^UmqFA=6#7P2ApUFAyQ(nTpfvCNuV(cW|e6>dpd?z!VGP9!HU~0HkdGoB3~&CaWs;
zxAO^q52r}Gyd-VY>=z!>FTY#WdVWJ>w}0?3*Pj6C&GgvA3fGH+)GANpxrnkf(~8mX
zW^Ts0!&0f@OI5fSK#_5BO1eZFAn<L7Xr<zU`U3VgY<rcP<U<fofbEOh$kmd)(LM`W
z#X{U{etQpS+&O-7nCVxh%k6EAY`B8-<okfVOD|&+)u@wG3rBkwrN|km-PNmGbmvQV
zd<a5hMDYx`K0U{+`Ktr{Aw#|JJFe=FMX~KmGus7A10AELFRYh<%N3%$563p-lwwK_
zJE?)r&;r!Kb<dnCGTqf|-fMc&d1CZW`pauasq^3lwr(~le7KE^$*HlB;A|55cGmb)
zYWmi**nM9H=czv`3UYBzE796Qa3v2687p->HrJN^P98hk$nOugB;?3<KQxVv_>v-p
zlYKQ~W+^9f4Cim_PDeu@0=eEeU8Rzh%f9}YP~f|V-WAyKc<)e)Ds(^u9rBz8&vyP}
zVF5AijV6cnCMp%1*_y(eq`Z42H<k4f{|`{Y5|Jc$-3cw_0w?c2<bl%DCM>liXp|@@
zY0-glEV{W(iB!uZ$LyXye;V&#MtDq}>k?nPP+0zy;yfh+Y;{t&py^<tv;nPVgOGj4
zf?BmO%zzS;7BFQ&yj*yljC51)jiuuLPw}H0eQ;@P|63>8snUP4=&XAOm|=yu1?k@z
zZA1_<l&co-bh7o#!XWn>F2wv`oB{4}r<6%s5RfM-o|jAV?R_7WkZ0M2X2P`p++j4Q
zJf7RF$oW|`ac>~Tr(bF&k0Xqg7(uDYKF8N<2n>koIKTSTBF5EeF=Ni>6HkB_B?Fr8
z#?>{{RJaYXm<Srpt@N(h824AWA<VnCGoI#q>%!SS6MCMCB(>{{-P<~nfJCQ+4v_p!
zzQ3z&OAWX7?J9=nyGtz>RMCglx{BOXQ(IREx*`a1kdg(a$Pbw&sNEfA2|Zp_&iZk-
z2StSQqw~Y2FIema<n@Q^lp=Vi(nDbEl@EZY3ayHrD4hj6rv7l<hGz2K6+zO59|Wka
zN7JEWrbJ^;opt>oOh+ClC@6pbCKyTxAZ|ootwpae6jk2PBqtB1akhJK^;0l0S(_yZ
z1s$rL(qa&$DnFVj4xTVylJNJA7r(H=A3b`Barft2k|vdcckgDuiCpnSFMHc?i|j7z
z=P(quM`Xcv36C09ql2rPNAKsF4;2i#4aAvuO&!AQx|CVXzGvxnb(f#;<jjv-+1u|j
zK27SuSJh$(nAf}OI{Y?8qec#P&}BX-y*zBdUU<YStGizH7>vH*z7IL5uB&nTS|X#y
z+q}wQoZGgU96we%Av@9CPIqD)KkgS`sf`Xpa4!_k-Rs}@vIoapVZias8=_+4(7K?O
zgR~s<v8a==QLDxwzUkRJ?^W(w2`+f5Xt^w<_qWObhe_#B3hhTU)QMFUO6zm=994`I
z7dgJ~AYO<wi!6IRDkpoX9fNXx-j`ps%8CyMvTa9HLb#HG)}e%B*+W>s>amj}S%czn
zq@5|fqR2bmJu!TAf^BIrn=@0nxkZcww-RBp1QS0REGloOzX9?NriExtLp*=d?5!&e
z+O1g1Ed|qXiDJOtECpL^@U_u8a-NbUt3{~s90~V>cR{aL@g+cvZQlL1<(8}bcaXsG
zwq6TYjwjR5<Vp6PX&v?oFwK<AW!$hrD6iJ8yEnWrXRNbo;eIl5<dB@<_WNP@^3#J%
z+Nobe%gc?$4QTiX*YOYKcc-8~sqVPNG_?<#q&hSa=;KXRGad)a-#vnOO@AKBTw5fN
z-!E+tV~bnoo~rw~^TltFEv}3OlNXtzf&ABd%d^0a6s@V(Ov0x9s5PUSYiEK4!d2`p
z|G|(xrfmC9P;8+SiFs-ePibG%pJ9hR`*_~bI&s}ysLDJQ@z8$uUVg1wZh#*vVu9&0
zFlAYS!@&%3g#wu=PVgg3Y0xs7aAG3hBh|%<K3kfLB>NRUR_(<ti+*>Z+Xq_?^lBp!
zpYu3BG{niRY7V707v3I(>MBhvNEHtb6?gx#;+TVGmcIFhhBOSur?-nw2TaK0z1Jm~
z$kCdfrG6+;d!<MF*WedeAc$v|3a`|oHLA>~z3SNQWOQe4Ter60D9vy;dhwj-+(W<e
zcO#D1h!jW9-XMFW(2_4e5f`AG&}^pN=d+<+)4{5l$HN5b`>g3r2C}g@X?%#~0jjYa
znq4|@7X_w$6IY;l--mQ2G(ri`ZL=dFw0C$tc)4`7!(hA~8bcAfyRv9V&<W>P^(~`z
zkKGw?7LN&&`yQW@&9b|%-rh}8w_~sRrD&P^rmX;Q(SfZ|-C|W?o6WD`e(Ie?bmaff
z`@J<`?K9OlOcC8b$jU3EOs+_1F@XiVP+R6(9QJzvT!~1RoW<+LTgp)W`m5-=n#FLJ
zW{pMXebO_R@pL9<o^6Z>(j(d|`}sA{;*(q)YR6PeoZQ2&K3>!JRGg<Cwj>?xb(J0B
z2l3>0`;#C%4M?kBX?i1o)1x{5lx{rw0!5W%zfT|mY9?`AW_-KSZ&A2aE3K>C+;qVT
z!YO)<u6~x*<@_6Ua)jRxyvX?{?FXgoT6~U87Z+ZH&T%L%WOqb)={doK>IYwo4g*<>
z2U-`<&_qS{7n8|FL2^o}56nxqvLXg5SYY{NWq=du@QrbAxPQuh$TVVbYELuUps@ly
zG32KSLV5@4Fx*9orJ)b9F%`eTZFSnbhV^JogH|811aTnp-1)45;`;-h%?7e7a;cYe
zpZZ?3Hv-m+kDnQD21u)o`c_3HIzt^xZAe|09m>Gf78V{%&sD~tGk4Ej(6foGSJ|dV
zG_wQxy__1)fCi6>?SMD~>;{!tp=NWGW(;+BachFzj*324L`DEg2X)yq=m*<_K9IOp
zt?h&XlhGaaTHkIV9h(2;%j-%#ruT0%4e+k9>rViutoOzj|JXIai`?S7cHl+gN@rmo
z2KlDS#~J9Z+h~JftJP?4AaiOW=Bex6acO3Lud|<z&X@><;6g0&^%F>eA<4K;8UgbA
z%7z3ktJ%NFv(+M~_bwS#kgRt=L;*mbfr#1uS!}Te*{QXL`mG1ebo5l%1g@*55stL;
zAv(nrS5!X`lp+3+p7RP<vZ_nL`tT<?|9H9Yj;3OHTh)P%AU567hDUfugZ9~*L@6Y>
z>$&e3_J$QVi)Z2#yS_zAD{JXr_%azY^VO7@S-!sa1=Qc=`oIztTA^|n#p;WZEa)Aq
z4ZrsrcGu{#P40@l>&s|BB8veT7LG;wJjnxHTnIpNOL5F+FsPH&97dZ&iHg^AuX61S
zEAGBfkQbM`#ECkuU6BEGf2He^_ctJn1VFWb=jMQR6%B7{=g|`}`l{Ij%<~Ydr@%&~
z+LY*K!GBMUXDb8Ba9gsf4%cows5Y>tqMxu<m=XtNpqBRYI!r`KS|{>i>{t9s>HiW?
zg5;f7Pyt{{koo?NFfTz)YN?WCg;xf2xE_c7ot!>k<aiyw*mY>mv2gFOQQub)0vnV1
zYdmY7iiz}s&TF3Gzv)<g#yOrPx1u#EJT7L#L~D9d(+56?Qxq4T%#7~Axh&hequG^)
zgnQ}Nt&EN>qIR(Etb{=n%9)_Q{}lTi6=2{5)gBM%y*%Sh^(JVu)Qg8Vl~`q@LGuZ6
z;o7-1g3trHFuB*=%q`cdn0&tR#wsa<C_;|ZtS`FF^S*Brq<{rgg9WfB7ARj~pb7Iu
zeJB)GTi2Y9UjUD<s*h9`>buxSEv<y7;G*gxl}VKAPf(SlALU3&gKPxkeZy(RL6JZG
z*K@m_XEOT1yiQTmyaZavk_6bfC=?nzpK3PS`}e=tOcgabB=i1V%{Mab$N)G0<8u)H
z3854j0bB?a7otxz)|LaK7U^HmeimRlU-;gyz{_wz4tQgPN~MQnLNzGXqd%KPvZE!M
zc)>D%AzTOH#em6bvEf-56s&YqGWF4(lm<`GvHRT+CH)Z78wuw2zhb{Ho7IP3Q=ALF
z66oq^s2u%QBH+7iU-#2y;;pcx6JF#7{^wV<Egk_EOKJ#;ldYBOW+|6}_Qyy9>2q)B
zRgx&KLfM{gw2}R+NT8WQCSiWXs}@ukiq<9qEPy6Cl}-+ubtK)8laiM%ze}AwTwWj<
zkSKD$lz(4%Q;0}0Rwgyx^nzSt-NU=mXT6oU*cm)1w4Wa351iQ1C!j$_&(64_Zmd-U
zwNWTO2b@W^^3cW<Nv{p$CL}%Z46ewXr<&4$2UMr=p!eKM9EgRKQ9W-jc$d&e5di5v
zT%>#QC+S`VGIZ46F9Cs{$1Bn-*7>QR6(LNl$S))lNwTAmH|X;Mnovzj{)a@&hM5NY
z@(utDc7O2FSJvNM?E~*tm=?0JrW~eSS_Md!UzWyf$&X(al(91&4L1kau^V+6E0Tz?
za)mhSknd9frQ$VYGHezmqw1CT*6T$9s4WWY-oV>9lZ!B;xaz@=T`8&T7Y0GO{^xqt
zsF;L6UytigbL_|yD#qGt5sy+mh_evY|4VCB3|i;K)Eu;?aa>4|d4wbAP)sG4GFdG^
zRomQ7SKH?3Fk<lIk;we*OIyKk)#Zu2WTNho7e!T%XzM*3h&v;?uHWY!CTm?TIdS^K
zq4yeyie)~kz8BY5LIu9dN)(0?I1odQ)T2+f>ynggK@aaDw-4p20Tqy~p6fb`-vh5`
z4n;G8O@k_dGbX#{yJ9=Hw#4UKDT5XbjZiz7_k<@sC8nR$tRv8;S>UA(4mf>K@H8-R
zBwy34&=0m*om5lndUsYl)_ZOc><rC96SmWxzr$>7<d=uN@R}Bt5V@l76iF^J9l>Fj
z!a>GwmwnPHY@~AAVm>Dq2J2p_UAIpffgNXd8jx&XTUnNLBTv4U!JR>zwZ$jKi&zFd
zn<qI<xi`PgDN0vA@QaoMID4&+L?9b=fLiEdgG*6wCzzBQkoE{qaD~b@@^}xuw4Gos
z&*6E%Z*TTLu!C;Lla4>XgZLV_r?Kxo^?7LX`QT6b<N1l|N%QrZZJF(&sM^-?zbE^e
zd@AjKT~^ws#6Zm<@%GJ4I`1tq6JM8E=<b!tUobE-W{>O7lJ9XPXKj279%=KYpG+Qh
zi@y~HKy^W<gPrcStNtPhM>@w2J_RqrbYxi6WSlaxq~2^3DMw%alzi|?30=$zkKHLe
zSw5Z*337{m{?lt_C}69=QTz4DPf}hzlE1%OqJeWqEwnX<bb$v#^Jz@ksrr>tWR4#c
zu7dnt6i;g4VE~k;#>?F&mJ)!vEKZUv@-jt!$N@Bb0S1~dPo&a?F8vl4f?iRh`)4|;
zD|V`;KMpvocD{+}Bt7NnqvU%n0c!r+$@oZ9(%-%UKzy7W#@Z`of>5PU*>;(=BTzL?
zr1C<M30l+epFS`P=k@@>s8dymoBxRQ`ryGQ45SW$;)#vj+wZG!m{CB3r|4qYSrvo}
zfpKNZO+=`=ch#)h*O`+X_9}Lw>-<9hAfinK#NGpTeU`3E^H$6oXG<%3{5%5)kHXd3
zfjJXsKqhxvhUC`o3k44=OW@4>xp@K|Wz~)zoqu+9J?gpV03yHT-b7Qj%CjEmr5_hm
zLP5}fd46bHh94JF3EYx5M7JUL8n$d80ZKXgG{q#i^=9xsq3hQX>+WnIbx>OoMIgKq
zK=Wgq-Y{TYDNtG9({5hWvNP)1+S;y+L3OoGYieF|%gbx?1BLFj2YcmKY=urulEAr|
zC6wF+u*1=%PZP%_>20O-8}5#Usl4ynqBq>jCyPj378@Lo9*Ayg_8fh7OzAn<C4JVC
zXqQ$VAaTji2jsund2|zRcAzyCnR~CBHxo?^`Rj&FdYn|r#y&2raDVSNBg@&&eKh3p
zQjiB(;lZC6mt*y_;L+Wbm)8NpgIe7vSM5+c<hE+?_hrKGM#V1RCZg~`Z#!9%viaYZ
zqWiG@+A)XcCLSJ}-aXN|FsHBH_kvtoL*~`%_OkBmDqTuworIpMy1N0U_3$K$HP*%%
zyZK6W%8qNwy(-yi@JaM(9z_eJO8&T>>tc*CrL&W~C^RMdEX70^8Rx*)eWiHQi0vai
zWuN%XK6`H|PbBY%BYF5M7`&AJ*l3}<JSH`W_gkF(;NNE!)JwCV0=L=byWlj_jmeck
zR;x+3ovMvwrlXBIk1SL273b9@0mjE+7exN)RiG;haJOK*%6ozU`%s-#SvCwtkPf}o
z^f`mhK%ltJ%Dfne0iKEKoD`YhQ>_q;scRJ<w?D2kn%v1Kmn#0r<lI&meKhsg{RCCv
z<hZuTd?L)hg{1m<^~NxUvuN|MO56d0J2F#WMKD}c!jXy=F1*+&^7jA*v{UUm1&dd$
zB*RXevjgO8NLzmndArEL1Fp~|+jze|*4mX%p1g>&^y>(?Rm#eJ!O^`3IA%D1-nr*U
zefy&*cgfej=>l<3^TBZbDsQRjn~N4Bf!|wdyypP5)dtXM_2uXy3^NI{(~v&7;+#!i
z^NW6iBoHXqn}q;?u!W5d>3Y0BnEHFfAkq#XP#5>fX3&zo$RoV%&6dXL4}|fN8>pB0
zV*_^YX{PR#QHc+SIglA<?t#PMLWZsScX-?`$1~UMz2=UeDy6hRtg%`2G*eS1{R154
zs)#HrtUpP?4@B<jPuP34oknx4)<LP{C@KJb@RY<=BY<!a8Hz7;P*3X=L)F*VxR6Cy
z(Z1YNY7<+6jRbUKK+g^FQ)JARXKZSC6FBF%t6Yl2_Rymf*h^zfo*k@W!;Us1d;0<4
z6vp@!5gi@gwm6q%gP{TY1Au9;R;}{pH=~<hd=(P_NztwX@(%f1m`(G!)h6&%2p~QQ
zPZI8e^}xsb=kV#ta)uK3hH3DC;!~hGSZ>GGG=C3Ww_^NH1L+?=agQL^@zk#3gkAqM
zZOxm&al0Y@YrKF$F`%Y@VL@*p0HJ`Gt)}yfJWYQd-cKH&H6`a=x4smH!`C^IP5OUS
zu>fZy4q$H$_b+!M6bfMC!5fZpK!85+8I~CwdAL#4G0u7r>ax(>3G+D(&}@1Q0qF7{
z$&u8fDk|bJr*;F`YQrE$<f7$1yj_0IAP81m>vyLd1Opji;yJob(C0i@J+M8OOZ=CM
z2*ef^fMQtL>FxuQhZ9eYt|Pw-#nbGJNU@Y)hS6`oGPl0WZDT%|#$#8@4*mRrj^tiy
zsK@|n^)hTZMd}?OfcElN6lU0k0@lq@nNztqS+3zwBRwO@7^n+(cJvl~pDe<5C_?0<
zo@$7pdl}Xm-U0U5nr+wV8#*%2?MR>RF%Iypk?{_jeZ64A;B6QGCvo?0AV0-V76eol
zzzA)GIEr2jT009m6<$<i5DQoL#IoA<Mb_j$-GgXb0hW?5<pmIz&RkfL;t_8<yrQTK
zmdlgKOW>(0Bq`6S?e$gm)w}5SwBwwnOOYX52rUo05m37ePsKld#|b>Oa3D{&(by7A
zti@Q=%JYoNg#z3kf$U%O8(tR$tUvyGKVHrIY3e2Cn8jW+qohS^;yyt){_r`kE-iT?
zb2>ycad1IG7HO8=Tku?uEaxObkmZ~L7aL1_>wLe%y_t*6_)e8XgIl_D@ZVA(!<Qlq
z8!tm}H%l@|N1SfZSn}d$<UXANQY3d(A>2LgcR!I>4NoZvQ^LFLb_s|iuzq;yqqS2$
zxTdG>0aX+T*{A)_?}4i&8GuWxcZNBF+AX`jbFmxk@o2%t5}8X?!oXU@sGEcWlu`W<
z$rwG2PGyp<NVCK}BIr7Beez<r`Y#tR!{Qs%*~9*%waEIjUJijVUT4L@lejf`F}l%b
z6_@zO;(LBoB5`q$w~B43VZv($OgR5^ANv|&SV~4;%q$$>Q0LmTI%)6R>phYA;@LB#
zbvzre0O(xxN`XlVzKbM2Ev2{aoL6g8Sgcir<n^B>fK9wE%6?%hY@k$gIb_}ux^JiZ
zfcH$c<X0;z&U=JE_VXj*#Rmxiqrz*=+X#I_a35ckmza51h=@i1ni-$@*DPenyu<)N
zgu`2zkwetW(Dk!8U0Dl`Eb^~O&yZYjAlk4c*G<8}Y)bT0ZY9KlDEkfoN45;i42m#?
zLM-GOFXnPgd`^Xo1?B%J$krHbmP6RMx}OiwTa0{PPjY&X<f&LDQl}C5+P^6D8V)4l
z%>bLo{(#7kglH%v|79a`iW%!IzwjBdt67s1G(FTCIgtoedYwp1f;1oh&_D57Kmhvu
z>%+T?u!j;8qrpn6kxY{ppT)ew6O)x*7<8slJac}~v%I`KYU})GtqCak41+1@tA9R}
zq+?_tHE@WlE{*lvywrG<JI}S~x@$6vu+c?2d!c>hF9R{wG72G?qjuh;Ey&G$X$Rlx
zDyj{KFQ^@szX5J5-eg%ZbAcg1cmezxO?Z<Spj1SzTbRNl`7xIP`x1^Yx}=w+_&fg;
z&a`KF#*17`{UTacLjRNNkNznHqIRqka8Ti4`%XYxo+2SOOX76D^CSvlfSZ?%{@A#t
z9@&UTKqXCuWoRSSZ;4#g5PmY}m6l`oqYmy7jfZ9t4DH8dK9n71FVu|max6-YCLSB@
z7j)DfWBcmRD<NnQiYGi-pDPnr+5?s0MN{@ofR@0OdaI8QCWf$H-kLBFZ#plz)?X@V
zUHr2(E*5cS=3PPA@E7~(8`wAyQxSAi&(Evw>Vg}JkCKOZTtyJGeJEEe&oEI-e{C2Z
zOWk%iS8hzOcc!CJ;X4aAGY|P)Oc&JGVE+KF!^j`5pAlAbsN+vognVpVA9;Eexi_tF
zTf_mEE63+?My@(VT&jWqXM!uTi!hh*s&$}C(_T?Sk!pVU+vnVqU;1C)u}g{Mz3C#r
zlD^OkC$)>!wjy#qz;lNHCb*mzd&5tWaX9={=?TCEHgn7a)^jBbj@-HXGF+Se5rcXF
zN0T@+fT)WoIeiEHA3&X1CH~lw-~yGqC{ZD}0IaN|36GSv1ALPS7p-X?!<IYg$Og1)
zfGYvo5{h*nd@ql17PZ%>BE;0|HGoC&k|wwHJE^Fu4kdKVSTX7NeWri^P*2_4R_qa6
zu}YLj!ZbhR^pT?}1}wR%3bhl+trlTeF()g|Dbjz<`LAENyF6SJmsc&zbOxUI?E()V
zTRgpu+~6$9GkP-3RTgH?wl_-*Dbjqp+Z_-CzDo!vk&5J}-7r@>T^`GHmrbEqg|zz!
z#oH5l+5`J2nga%UeeeqNv<C(RD?zk&=70W7oLrvdXC0uEw_BmS_F+9&A}^0ewuH7L
zwhy_gdNtHDHFOd4)2kR7O+Uf>BH_xZy*IDa8BjaurSk7VS4!~xIW9sX{sP0V2A^TL
z0sy$DirYDc2Uc$j#RguPAwklKacO?LxCCAd9{HOSQ7~=bKJfbtX*a$UjTaTIlsq&S
zxu{>1(N0OOavN(gDNx^!Ji9qXG{L?~HSNpxRfja?0yeiL-r_>Gt8A_X5b3E~>J8g#
z1hP_Iq`VwB5}whLmT=<2E%A|TFsUKDW26P#pMEZMYPRVB4)@5S8S(os^Ui!2^FMr;
zzPuHxop}!6N{sg|Ju;S>=qaFR`J`_%0Y|utaucIWsCwIm$qUGqsB6F*yN>3#LpuoR
z4E`n$t!DvSXsF!7r8^%!(^vmW*3QJaAr2I}4JJrOs;34*5uvS(USH7;lgn$#H)FEX
zRo0(dGa!-^r;F6RD}vAiU$USrwh1wSi%b4Xp?=O%?ss!am<(l?EI@>?KgAg-rGz7Y
zEr}T5owHv0T->&`K;($C77OmP&Zwq1(kFvv7M6vf!cTACxLn1?$<30OM4)^rQjQZh
zrVRmMmc_dljg)~$RwO6CRv6E5k~q(#3sPM;zU;Z<^S~Yd@P9kLtMBAk?aJ2TXO>zJ
zv9Rs>r2YL*ED|9ih-BX^!M__9Nsf8m43+_TuLJX3qH2-4KjP9-gywDwYij8@^_k9H
zGZsSp583DXbN1*5IMx{-v+&nnfkpGFGC}h5S)J2G{7Ed3JM!}3Id!-DD^=7Pmfw`D
zGLkZG<;qc%N8n{hkTDowBb)-^AJSRTyo=#;@o#CsubjzcSVBbxfG)~br~WVG|G8dp
zf_}$}27m2U%5ds^!S7i}5beA!)UvenwHMd07rev;u&^^>`bcx|VF+lWZP?+E)@b1%
zDi8fTLEy@R-&c|1F4^Uj3{d;Qga$V|q5ZBF&Jp?fMPcYr<4*uQUpr3yYK*lHaJvX-
zta~?rW>27gOoud%o_b)ZRhm}F;$DQ%3?LI!4Y(nm4Di}4alnMYSdAc;y1mKBCS789
z{OEt`)VmXpMVTOM;XMfvwJTqQpB8*=xFBiFR7sftmDcO~H|?KeCU`Ci8V#Yq{6|xB
z{>)_a^tn#Jn7{`Wu%?|Z*g<enU75+OlTgR*la2y1ZC*k34?bh2fxnDGX({P_ihn5!
zQ%t|%Fc*D<uumu7Kd67zrB2Rgi*d(C^Fp~nf?d$2n@H^P_dIRpup6I=)+Sre5(Wka
zN*A_%kNFKz(07mlZuc{e3vx8+r^eT)=Z|^eLP-zKDCvXpjKM`%#`W#P)T1qqI}<s}
z)Jo50dR4=JBY~K|9AHVq3{!p1qH37tZMmzQ!VfXc*FGYi!}9<R2LMZo(E~(k|N8$K
zKZ2{J+ciN}PP+Oa8J!4h3PB!37#dK)1#2vyOM>-j|Hh}hzVa%zHAhV;B?Tg2=A+K_
zlskxOtFjTK%$A&TUosEX;DXiv#5pUudOykjw>U`=fq%1_o%%hD^F8&vL36eiaFln_
zc@B|giT2W;s$yhy7yP~m^G3NfSlgZ|%T%*i;wdc4fv_Hb65r)g_b~Xp)SY9FV%-l1
zOdY)<s)YD(C+T@XFbeRZ(}pL!AecOOTlpzO)zA)xhjGC(Zb|~#<h}N%0}Kqk=i~NV
zEB=0&4Q=8=2SBTii(!oq%ZKew0fIri<p%>`+4(&A-e_A+YDb{zLE7qfSWwYpJ#~9V
zfX?9L8V9ZCwpg^*uF{OX<~pAB)v4*uy`OF|wl_yE`(gOP=qy)VIOQd$>i;9%JY1((
zF?jvRw8C}BP{&$Kz`9~Z^c8Yph)Sm_<gBaaZM!5d^t4?R$J^muU*LOH!{N&X+YH57
z%Nm&KH7XWX>z2dV5)k?cbeBcK{49e%3Pzz$U95};zA~O<P_>G)k3P?!0}~E}EnvN%
z4?F=ShAhYl$WPBG-xpM;nnn=NB&$<8rOUksXhVrXy|bp=xw*OKJ#(Aogh0nL@<rfh
z`mdEsAaY<8P+IlFi-85!vy3ysbrr~#>_CaU02A#SGODot^@`~7Gu3mT0*vb6iq`8$
z)DHbDqC0pJAc&$vESrF&F~V2L0AM){4;KHmSMkYlHG=XE<{6WfEX5l?Q{Z-?yJVM1
zmo@H<nHoBSNrC_;#L_{hDJ8k#-?C3!;tiwnr9XN2Aqb#3ZHT~C2y;NDJIf>BARK^a
zzKVsZdT%1W@GU<dLi<Vhui6im2)T9ZjQLF{_jSZ1aeW+|%AQro8FXB58*B^1d61QM
z)Pp*5Gdo?ux3=GSc3u(TKx~WNdLcM4|H9?8VMvx7ZB!CWfZB1OJ~*+V!&2?24I_Wq
z`19tY3pv}f{R~>$7a!Q)1Ad{8-h1l)1p6OWGs-W3nccQ?{Mk@vE+()$3E?9FL%>@i
z8eVfhqD%;0iai`f6Bg%eHwN5VosSXe)hO4$0bhMizC=U*Kk_xkz1Qxn>}}G)^LQwU
zXu;ak7pW$SoUt(D{|Jk?L}5%SJj0t?!;08mF7M<+{Utj@DX|ueoqh2T;F560APouM
zAtVeRW{E}R`&HNj0}*2~eD^0fygpFgM0!r|l`}53`Vu<dK=s9jM8#CsKY1d90&_oa
zb;8(bAAkDIH5@wR^4`K>(H6L8wg9N=7KPQbL%sB=v`Pc|z@>d%+AY1gPhL@^+_lHe
z;bJLCz0^3Sr#LORtpfjNpybO7R7)$KVEfPtlpsQd*#QwLR}hgx?mwNtpZ6e)`xxHr
z&zbmqDNWxa11yF4Mt=VJBd<BPd>veCF$HVb;3pG=XM)aa5FB}UzjL~c3(v6JXgQwf
z=@AKI70~d?yj1GVm#VIY>vzYA(ap<!K_(;TNANV#`%vLu8fjeJ4ND44ak}m)7hL2k
z8HVSYlG6(45nhB)IaO7mCQNWgXMc%}lQ0w_@eT)>Yde|FY=JR>`(H!mjfiw(@+17U
z9$*w`PxS(j>p_6g&&%z%uTE7{_uPL~CC^tUXP4NXOaL7a>c7Obmh&bp_8dq8RsRs@
zQq4^mi6CAJw(7AELD#`JjgpIAL%rI$k8QjKrgu7Is(+g-!lqkqvAhfm$bz9I5rdVo
zE-IC;@b>i2f-Wr)@6T}^hOsw^aeU~u1lC~pbpeVBGh0TwR=o*aHCXV{H=)V>68irJ
z4#f^9ke_P%<tEg2BOY+y9`Fl8cFyL#&Sq)K{$rMA!7&7!iKmR$p69lfJkmb%8c=mu
zzpv&K{;Vvi83AOeS&Pg|mcTtvo;ipdvyCOXxg?;;F43tR3b8LqwFyz{4U%|&@h^6=
zXSI5@ZuN`6x1S%*7QMP<h|ZVzdE2)ocsqwn+A!06fHxVMec`L%e^l;;fYy(w+!+c2
zj&~lg$7B_km8K7VXb%nwt{wFAo?YU&_(@M*)YFoO<-F?<mbsZ%P*A`V)iR}1L!hUA
z^En{SE-;E>%g;+vFGp8?VV)NRn`ZmA(}7EvE-I6I6z5A_PC9D;C9)s!lY3TPh&=<x
zvvO2^Uftd{`><fts#VJO4$@CC;^A{QFacWv(l0jM1f94AkM?JAUG4lovLC_6mG*{4
z>?L02Btu>tbYm)}3m|a1p8Fi%JDK2B(BJ<VdIZ_3>oX#@r3M3C={e-KtP2!SV@3oA
z!0<WkQiP<BW*(n55bssl64a3BRsKMK7@e>xa|djL<K^Jt5bX^dn1-0u;s#n*!0bmy
z0kF~V0VzWcrfJaLjKai$vN&drciyLub}h492(AbSj=bmLnBVZ<<Sz{lO7N>}EB);2
z$?Nn3Fk08rR75q3a#ir0uL2yeQ_vv~-amTG`A?1~I?j_pY#a8zz`Jv#kyC$G3Nb|N
ziHY_xu)p6YnWjDI=o>n4U~-oUIRjQCdS#w^*zL7v(!K(!7#XX@?;0*nTG%;LQR%C(
zF8UfxAxz)j7TheMi7IYT0!C%(;&{y@5w-wiE4f6#j4Q>dnLa3AzFjkXqzsMEVDVgl
zyl7~`><<}#CUri>b6Po`C$!Dh|Bvql=p3Pzl#(lR7tz|2sum!k8bDk4ln#;he~7}A
zYO}3)f+@`5z|euzoe@#*j7!71XL6I8gSyyxtHU`&)Yn2Rz=C<#R162==a$Kg6v@4X
z&LYpJr2U4$3pD~Sp88|~w5H(tZQmC$gJu1)Y-43ici(t>T}PISTX3Y=_expSm?MTb
zzcv8wo-?o;H5c43ZL#T0aFfl&CV}3`us`&VVX;NTYm(-fa%S?sGMIB#YNJEUp^%;!
zhBr>IU_84uQk1!JR!}U4F6{vlnkH<Scp~!=C>G1aTrj8cM}Mreek#+2^VN$0b5rB3
zUY~AW$BMl3gF$RaOa2~beAYevph|fnB;b)TD*@RN7^(~(HsDe>wvGK?)Gt711r2VZ
z8o)$6nlQwZ$$6g!Q)<S422&hMZ8QikgeEb4F10HmVukwe+8viyKBicA?jS-Dcn|EX
z@t}`{%6lSc7Ge>VV~5tJ&c!3nCsgu}UP}Z_Cu1WK<Qet(9BF#Yf79L2E$}2kDlr@z
z@_7R2nh37qEpn!z95UYV)jo@<T#kRn)Dkaca4soT<-M_BI?R!9&adsr;r<V(mx9Xg
z#igK%purzA{IudPIyf8p1u<FaNse=l7Tv&nn_;H5+cP^|>sfT3AApW9-u|aI_#fQ~
zR2-m*1?-AFDjEHXoMsJgWwn&z(}fV$%R~lls`I+ZHthe)Z$!0!u8$4}(<O+54i6L%
z^RN7lr=$=Tvg4&M(G#G)Zw=g0wcwwGsrUSyaCe+OQMJ@=2y1B4c(F|YAVWR3wd+Jg
z;9LH>Gp<{-0$f~n#@G$+r{~|v{vyemrsX%4&Ja5rdTFqkAN`EELq9adh!Uc9IBS(!
z>RTE_W!H$x^8#d=m*F{-qpALUM*IrusRvy26TgHa`-wPg%BUsDq=wwUwZz6ipCDsH
zs|44-|K<szd_uX(aMn848ar#9|Mw*3d|%kXYgtt0o|(SfN>1d*hMYzu<x65Zr-%6b
zK<<KfMV+NmhezcX^2J{q>txQdWZ7r+AvEMGsJ{Vyy07QM=qDhoSQ`S;H2XzI$Zm?-
zq%upN`C0wdbBR~#DUhVR_5N2whTTP(28**2H2S8eeUjdXdx;ty7x|*jce2yzIKO|3
z4TNr<n$eT%j)ku|vOz&?6U8>#kz}*s`sjXd(GjlB2}lIOG9ZUQmkE?dctbxISJA5q
zpO$bqpn9>TON9Q)d&=mLcdXkN?9TWdQJA54<{&CAoc~H6cSI}|pq2vijdMWto4SC8
zg7`IWxPl7|4Zs6S=Pa;R{vB94zHeid@Qy1ij*8IYb?oA|(=J!3CVr<wEcaYeFshiO
zV-fU`&_<mrCOlp#d?LL-4C79YzT8o6G{%S`A|<0-KO{R{Xk<Dc5Xd8@grs*SR39EJ
z-YVHmG2^dhydfq_3keL*oPmqTIp}K$&;jHZ%>N_otHYXH|Njq&3daCJ1W6GjrKB4{
zT0&Y7rXXF?wSg#-64KJ$Ap)a^K}dI(5+lYyVgokf_YBbE_fx<B&UJYmuI=9QyyI0j
zFy@O&?*ZBWUq0S!Do5SmWuYH8GLX9*@^T6QPj7OO2Uh(3&srwk`_EREck90~^+H|(
zjOmGwW`px*of|q@hk&wYA*v}<2;cbw67wfqUICzHw}0EuXg=Sr7MHu`hZ!4~QW)8j
zxmB>7B}4nGrbB*>AAkKCKlqdrUluXkc~oOz=1@1^{3_ez2J>&P{L4>oPvN{Ck_ZI;
zGm%o1y*Cs}8KV`X(wWrVv(4R3C+}Wp?*8eY3=J2YfaD*E8zpe-E??K?%k|C-e^ny<
zLWVmcHOL*UTv^@zNG5WXV$xqI){Xd6j8E%-VSEkwS>LQ$fNk|5y&t=C@fIO<xx(c?
zK=I3dXLy+aps4W3!gjnckKxDDlT{{Gy_f4<_Bfr<S?Sp?Ki#J>98I+5F5S7Upbx!#
z-9703`tRlpj{^P@K=_-Pl&2I|1gp;gc#MG7##F-i?u&k$>4*Fu7FxdS9zmh~!`OU9
z_PEE?zqrf0RCvSA1_AK+f1&&rUva-69xP(Ku;^F-%mlk6oU;GCJ--2&-<$Kz97(^5
zo|+mbi>Q#;cl~(3cI<a&DvQ1_{`Pz7&>MjUW}!|^$RFQ68j!?2PT(n2Ob@4=I@J^A
zjw^1BB>~To5`Rz7i(g2GbUg4ho~cTOPd7H$?f<KDiCj-1%g2mqUHLZr?&D`ypnBqz
zYlTGrOzsKS{@K~OU!3PwWWCUT*X`UFr}EM2vhg>~>1(&9D?W}=$E|ZsAFjs+|J4At
z&T^f(N~_mFLy|!&+j@qJK9xS}525$|cnL+$=`xXc52%KR4?W0SRTb*j=S|k_>W}%e
z?Og$&sd7B=I(w#<A8@k@oB(X1L2pB)EZC7Rnc)HO)B?o$0T)HgL<p`KFuK(JLQOSZ
z<3I#}w{ZKo`r+fejwG8{)kfBuvc7K0C8CzX=2bwQwCb3p_ZVNYBwuu3#$#1}aqu0)
zUTt_!k(Hh3Z2I~1;`Bv)w?fnslM2{5=KV7_dT!pQ{o+IrIyHD5*_K*o(qpgkW>i5_
zy`_-aon3e07YL95I_nw&Od0wo@SBCamHemagu?a*s8tnV>m)-Uk4IYzonc{g<2MWN
zwBfYEI%e-)|KgN=3G{S=9SxRIYtd1oEDh>fnV(qpsLi7_Ew6|DV-YdI^^@QHY#Bhh
z|Jw4N@e$zH?GNRhoAx(lpMgesVuXj;%bKONa&-gUPKz^uqU}7-jDxYR{SUf!n^NIX
zIR&XB`v*;1Fj2cUy`A0<J(L6$yGe^Ws}Jq(B^7WWomN{TbwG{-h~ELxQ;rB;e=q<e
z;p9e*Jx;FKF7}0hs#WPGwJO{7-+qQZA)wl#8}Tm*N`7(re8c7G;59sGrCGEPyQ;7k
zb&9gYW6Es{cgHtr&_yN|6u)WgADhOf17MQEp?Y0hxUCQSKexVRfrt2RAGvNk3qRUM
zIb^c5q!qCxF$+R?HGh>3X$kzorOL~mN>%FEFnwSS;&oZN>mdPrEDDeBgPqexh4>Hs
zvy20C<F?+fZiGnPR3J;t;QhUIKRr4FFGd02>em@u_7hFgl3U{Bbhv8o5&KOx_<^@B
zn*<xQl({5ntg>y9>i6I}#PG`imGGYiS4vmKTNdsGggl;W-Rnm2>WoCe!k~M7S0wJh
z!JGcA&XDcjOWyt&WQ*GnHP4!^#J^6n!ua+BT-VbrYk|7URF%EcyZL?~dhX&MKTUwr
zAKbFB*e|iflW%4zMM_qrF90Q(y8GL8@#Cl~o1vD2ySJ>fk`}b2Cg?b(Vb;`J?-;3u
zFa9nm+c=5~r$F@ibBpAAiiaOnLskL>ng=t{*G@r8c_Ib{l@KGvOUl57$tz0bm-g>#
z)m{Jn$V(FeyUX4Co)1iK-BqX@bLJUlk0^CK|K5SZ(z3%IRCOs>$|@pQ9{Jnpc>0I{
z{tuk0wDG?RFO~&t@iXb;p|*(^9?PjHUo@Q6K~e+fA>`KaPS1*v;f$d5^C3EE(EZDQ
z)`iWXY0sq;cnAkf9MaI=&LTBtdacahaqSn}mE6WzvR6f#0g3#%Mmr;&+jr}4htAd!
znBaGz3S_wA2wkwt?uOFVMt9j(Dn@;N)5a%k8cuU%a8&?MJ#%=|^H32alIAB~kJ;3o
z*Xd8E%0mxt7-X`=68t{mUk<w9;aH|Qvv^297&Dm0hx1H`V3xDGfR7lvTvfh5>VqVQ
zqbD6^$9M`vHk--1LO};AGtg3YB~~}N8%?f6+T_kr+Aj2giF{CI#$mxb(!gc83cr^7
zPybSL4*N&}$t>I^<RamYTzX+#jIgPfrg3LEz*Cpa?5unplGg*J!Ss2#PG=RI4ge@R
zl2`xJrc0anK^(hwsSCI`UU*)E1?bIBKs%Z7NsJNVU&Qy@C(U6yb3eZl^ASUq*)Ua#
zN;2hy<`)X2Z^e!keYf7?3X9Hz13Sbz(tLwmHCW2=>7+NU=I_rvb+&YIG5pdDz8&E9
z6D4ZVZ{7+TNRV_zF(lrTkN&Nm#@qtHMag{i)&|tb24%baEO?s}{_4_<2Espc2eRMU
zE3}nA1lo^X+U1W%eZ75mbo=`x4`DMMSu}M96Irz#C^a$gU|djSE}Ax{cBlLdwLGED
zTGd!PicNr>x+3-8cWPM`zz$GYdJ9cLdDTyUtIRq^#GrpTkYU;?0*Nl4{H+&E_dY3B
zVF`Z>B95yLbt{u_A>ZxcD$)==5G2fVxCqdC|Gd~VE50ORA)1QaSC%QKUw=!*5*a-e
z>dn*Y^8?bAHnA8abyEweH9z><eF*+FQceOtu&B37T2#9KC-Ajz!*%C#2!CY(xRyU2
zZ+}o8dWRhaK-nt~XvtIym{@-`!sWzYaz^_TIb&*-JxQDzwo8M%7%kyl%J1Ou^_W*3
zJXZi;QTOdgd^22XG!k2Yc5NMQ0)%OR9(bB8=+0ke3Kpv=!8!!#V;w9z`r7VLcFJk5
z-^P8wZTfrKd4DDc>jqTnE-n;xW&0Czc=!+!%fdT%%t2qc7xdI=bd^{$*|0Tyl)nHd
z`9EqL%n+xaQRNgQ?CkO}LGSm1%xAXZq<E#a7$f)sY2EVoN2*H&FVce}JwKgWcc435
zR(OBzs&M+NYPW|<tkAF5xRInm$iK)PDM+2E<aF>@4>gsPQ6;Y)HEI2t1wT-p4MYf$
z1piDvJD0R`{$(TyWcX~H@0AZ~leAnXc$=bbO##S;b>iLr7^Mf4zb2bLQlL>n2|ku4
zF@M?8O2ZU;Vkc(>XffYk6Q?qhmo=hAoq_A*)#x)Yp-p?+@T1)(&~)Dj3OiDbR+}Ba
zJ?^$|356i*ho(&vgVWYZQlSWX>%3Xn94484>2wu*3i0Ew)Kd<S)u?ey<waG5)X@yL
zgJpckyqf3)jqkua>WR{ZxeMFjqsN^A7*TSm?E(6o@JDSFkY8W`<psZO*cQs_79eEM
zYI;9C`~7Re4-6~7dmt2)d<~lc2|st^y1sNe2R_4eeAb;i%)?2ry_rqSiR)x$RJ-=7
z>f@U?Y0v&%2?@?@V(u9oSvm;lvD~gaKe7IJQ80Bd6SZuBm&D+&=?}!Vfvc8H)I_Z~
z*_<Zrs^7aeVI|<vX$M^k*>mM^-@~%w$+f2AqhQv7?)CPsE)mu@F4DSeSkrz805qQ)
z2Pvn<lsaRJ!yi!B*&?6K{o+;GwG*349%w6H)M(|G%kPskQNwD71915OuuYyfz<r&v
zHHl_Rk{3iz_Co7NbtI11SQ6T#P|dEcuCONp<Jp)VAuw}&(Od6f3Wuq5o8kLfkRqSo
zZ^t(loLOawn){{MiRjmShDH@|KQl1RLq4B(ND#+IQEg{ZG=37_`>et+xaQR!-szht
z7tg!EI6Hckv$SzrPVL@A8g^!v5^M7lL$^D&YuuiG?Fxyd^e0HM)p9?dPC$@EZYwI<
ztJ;Kmv&Qsykc8h6;WDE-j%VAj!Ko)3Z}4VNX1-d#^e%%AA4&S6DtjZA(p5nmU_vVD
zbNTN?NOsEyp9Mvx1DgzA|G<*IPqhl2+>z|=KB4N?Lx7e>q(N4yIdsLCpRsAUBfZM@
z$J&lQi*LV|GE<X4mA44jv)qi3aC0s%ze$MCi|@8o*1eFz!6nPPoWT2;6$o*f_{g^K
z%Ig<u01rA__;3>XvxV;&$+EIa$C~EG(@G-C+kCnA`(p%oe-l-fNdVwH$!yRMq5$OF
z{<{rJ=im;<Xj6FQV5&*#Pk${Ad7W86f0#xqQO}azE>3P@_trABGeL@$&%8H$fb#yt
z*<-gYcM4eC%fh)F-Y!}(L~~;*BOf2WJZ12Np(j=p+E)?;tJg?43z8lnJPCI-Q>dUk
zh*o*6R>l4cr`slgP6JA+jQ&4qZ#76XFs?F#2a{$@U^_Vy(F&<Ya|`eCy3>#w{<H_m
zGQ2?n0eB>lDamJJvtbxB^IvzAaJ%&ga6dSb6{(yJ6@8Zc3tx?7I+?neA5~d@2*O!N
zimC~FaDpYv8l1&=SZJ8dRk=U!nki%~Kk>HOZwA8kmn%tb;mzx4i^Ww7S($z-ih95{
z(+~dny1qM@CG{!Yq3o;_^0?c<*lq?hBE=_#t*}U#k2p4VWid?dZUSSR;dTz*@gj3U
z*aYXEeOKr2eiut^s@rBq`+CTUTHJ)^a;Wh?MpuL?gkYMJx!>higdkbTsnebg!9;TQ
zW*ZS73#{}qg;=iIo}InvKg`C9sO)Qim(HgR;2M{gMFDS2Rm(X73{zih+>pq=U_q|S
zGutd{LnXQ4xMgJ{wqI+T7a(GBIrsF$uYu|U0}f0~{*=*fusRr*l_`}wYu|L}wq4t6
zWWDZMcCLFDGW*rM(Fc>KE(G|#j(4!2qeX0&f)r+RW%m-d_$ZA_Ut@PpeCb2d5MRm0
z$MietIZ7Wl{oo(IUQJ0=K3KCjw!;W#gARvj;S9uV?xK%g-N_I(l;M(>mp=dW?mJ7y
z%{vT8bb?1Ew$9BhX>U-WWCB+EOYjdg@DpL+ZZYy2AY)4=c(Y!i7XWyKZpnWdaV%}U
zgi^87R=q=RYHEGk5dZ;Kel{fWR{lM7Mqandy(9~fDBcAfw15(Q58Bk_6@2EYeLJVN
zQ+Jamf=YJAD_2;Z*K4_x8pRe3XX{!Ub{pyWL3>{o;XPs-$@^mKH5<GZ$(uVU77B}!
zY2!!@oQgeAMcOT}E<#XMj(YMDJ48-Vt+SrKe>J4eYJS=`NMF0$VwCZ~_IEl)$3K>>
zFkQEX4wipBTQ@avCOKEshrmxYL__^CZkjv~dQ~-AU2?|OAJ^KyhCaN*B5V2!uhVxq
z73_dfe}d5b`SqApo33{XbRwSM6@#dLH`htKBKU+&nSkaD5a1#ZNaqza{bVQ6-{jHM
zW$0f7no2FXS`YRF?kM`6Ex9<^4WPc1@NxCN!UhD=TwR8V4lSSab0qr)coXgi<;_;!
zzxFGWWdDpi*oJ_H!c+ZC>|9)Ec<~@ltn~;<e2m_;oRKKEaE#b5FzoVnm)-kaU=0_?
zLS=VRjrebgfiNLuCUQ?qw-+3IHun*pNE9MM{<|lj69BbZZby3U$_<X+K1lqX@scij
zN`i>iGfVVARF@Zb<#rb<zL4VQ5W1vjFRTdV=teo7y+Hp$h%+wZku2XUNMJm8egJHF
z33l#HWsZL-5}u(=JUCMWfE2Nk;iNry3I9He{v+)PGbRDXxlyH-<h<E*&U$0BgOkVb
zQzRkI9&p|8Uxm|`V4ohAIpf#jYee~b>!QtD$W&NE#sKN*M^_l2XT2BPa`z+USrz+o
zIuh|@aTzGGy4`opQg@k^XC9J%$_o5LH9-2dM;0zCJKpfdT$#8ROnr9U#HT5;Gj&`E
z?|+2KUP(tuvzD{Ia+qAnO8|(7TuIyd1j_3_Sm;IG<fj4WQKG_R-5AGo<M#@6a@4?Z
zzFN=UT)(Cd@OQ%^^%B1%rtY704#L-q2roMm?np~lW`e=NEit7rKCmmr6TKs`Hc(5I
zAD>Ry0Kk>_hbkLD&rVg@P7Lv}O5J#P<5$>~T5--%j)TO94A4te!63}pfOzBmS?7xd
zhS<Ae$hoWxYzU);G~soI^g7LslqJvrCc(0E<lWpEHmeZ>G``R|<Acv6aiLAET`$H6
z>PUP(UUF@#H57Ya^Y@5a^8A^Cr;ibMl1`1-!$1G4`9_|bu#4;jn?${+ABrov7%F@A
zDqX;nNCA8;q5md;Q26lep<^FGog!<8fBl3~VZ;xBC-UrjLUhJ>X~dp>sn{oc-lQSj
zdy8oK-9;Q9wylyLnm0=r#sw<zgC);m!@iyD4o@Xdwexe}Vqe6g+jk8WLSzZGnfqdY
z)&mhI>}^r2PdB`fn)<nOfVlbux9=0geYM}dGxo?ocp{Pn2f*E<OU`a&r!NQU*_BY(
z!Tmq*cc;ko#s7s&cd?QLUCRgJk)EfnEd$uUQ<-<wrl1#li{9G8GsSdW+BQCZom1w(
zZ{`gBiT|a#vyk_bP9u=<;v|n=txbRyW0tjo&3Zco9zkwWsn095sQzxpRsg8+knc+-
zKAykXdHn>LbVhu(A1rfrR{P82%Q#FPU{xwMfHc1Zo(hQnn#?PQ8$r(fEnK?K%zCG9
zMwOj?f{C;Dxd+GBzW(3fHt;J`;~9?bn^afj6mhgj4uc>8{8^Rt7{TXa1dxL5(+Fd~
zO_GyvF!;af!MXPE`5J2(NJ#BP#O@n?j!UfMSeB9Lsj8q?h^CUAL}q6;*8&R5!3E1T
zrff7Smh^S=owPbhuus8RK|FQbZkz`Q(9;3$I^mA$t=ua{kfl(8ky(ok%v+SZUDB#b
zutH@qc-2~V_=~-*R^GuG%&q^|S+Yr;2=ilj`^cO|k?o`PY6+Uhx|hD(!%=0+$<nMD
z30S&jjbr`qw-Xo0yq*`L$;HMS>$VS6DhoB)l;o6{)&UVJ!h(g$A~yIExwmz|vKHwn
z1k-l`c%V*8!E^EPa-97o`d{RNE`gttNeJ)k6k4+Axi3C!3^BjX$r>T6y3*HI13S`z
zTVP$I6I~C-Bu_iO-?KlxN28H{5LxesXh?q&H5Doe>0Pc0zOSY{LG)P=TrD8=07PBn
zM||@7oifHVm;MiWc8kD^@w>fXNml$5dOJ0F`g#MQ49bw4RdLhc!9th}@1rv|Y#BEl
zV~zdJQ`0}IeR`?G1)lgag`_yqHaZv^6(xHyN~||wY*6GjCgu!H8e#xMpJ#5R(*F*c
zo%i!(dW!f`N7atzRli)=CB$NtSfsZUe61W;&M3uB3PwcWe=ZpIg!$=Kw+)6A3Y0>h
z+SFC>TfjsB6}EGDnWnAge2`*%FYwf}X`SNg>3^Me!M|*62uVY3yuHfZ525|iB0z9K
zUOL)OC&~uKwqVTznyquSTkf3loTT=W-R!XH0EXZz0>q`pBXkgv8Z(AlxT9=5Q&k(s
zS2caU7I#=~@tm-RV4<z4K_zqj(xQ<srZI%17-OlmOB<onI<S}h@D9FnvvD}RcNeVe
zO>9FF>F3d1Wgc8owjYkz`7P?*t`GD#aSjln>w}2IeDM1;2Q=SuZN2TBWelmGYuncT
z$F{P$(cuPYT82@meQKUZU~7a2@JAd;PJ$rJ?a}~iHn32owpP^D)uErz`Fd{}_?z8a
zAnZJ`5X;_*Bvq}$-rJ2aRm{$5-7N1bhuG;Hv>fb#cOSEuIHb-YqsdKieVO(CDcE}7
zdzNw;2Dw|;AhYYLL@`T3$>JBIrqwtW2#X;PzBA$^R2-5v1lKFS`9)#W)Od~-pU(ux
zpYvP*3KGp%c8X-iFCTB&2E6(tO_<;J;cGdWAts2Xe%hUuj6H)FUA%^>O&obc04n{9
zoJExQrY{o9qy87WX_!0JUEdSfLb~j%B;mEXee7Z8@eIsd8+q91Y-&FHRSxGF(*DOY
zMBNCSg)rW*(Yg%}|9jR(9c;5i3)xH89W%u;47kNaODa<Za7x=B2FsL}7geq_@XxSA
z%5mNwAR7UP+jmR+t4Z-Z=xOdnKSDfm8k0HZZKo&?dwO?P6t_|ZU=F+osmvRxg>Fr=
zfSp?Mkm?73v37i6v)ra##jtC_4Prb=1KwWe6KuuJ>pZ$=oQ~Mb-_ZVfARgxyMqV;X
ze`lpE2G%uhSwPh>63^R6xmDofG^*S-34k~iTriOyD(|{Jt@sSj-Rn2Lp7eQuF+NEF
z;;sDfpXhWVM#8)Nc-Nk-dmos5dH8hUZ`QPp$b=oDw1hD-Z&j*^OHAdpV$X}3ZLwQM
zSolIk=y1Up86aHd)vUygRIGn+?#gHIKi|K={{YVS{2HNE4j0PoTeUi<gUOcDT%A3e
zJI7d!`RZfSx;l}n{n1pRQCw$f{XZBP$N!mT?x&OEXEdh?J46wL5=n}*TW%a&R(%i^
z;H_J_&3s)JQYk5Pvk+~dJY|;*c3Gk)trO)s#j9h03IoUx;(nB50V`fU9;apSx?$fN
zGXs(YMtok#I1?Uxm4iBXc?1~LJfi)eDNj@`XFSicLHLvv@gg6jGjjnDtJ^e7)1*&x
zO|Y^0)qvC6dAi5}l&oji>53E(q4IMyh>)R=!Y_fx=JwumNrsFY6XdJ+A~#JpYtiew
z>`LM{+tirqV1*$HdTTntEwo9eVN`u|{|ri&&;OfC85oqL(<Sg34o3p$Lhf)lo2Mt1
z2B!gkRF7<eSNZN%L6p7=$~9Jht^A|`0Poa^B_a9$Y&d5!8}kqxZo`tVZ7cA?2JGW}
zxU$D^KM}zadtLPo>9iw1SM|){dXm_y-kfH4Tn$@1n>EIPArY7Ub!ydj#FylZDatZQ
zog4^&?U~F^*og%9vr}o+d}E2#-?_u$Y*KEzbV@kgh(E<<xz;7fQf_{^7ZNB>_%`*W
zdIE}bZ*4P)5T-#|7V>R-`j#6Q=H|YeKjv?;iOM^j=jw3H;g^3-5OfI`>2AV3q<0rj
zbj-_%FS++)CzjunMXOiCcgUy>n*=t=ROSTw6G&(Idp>1pKpvWwKTO%gxx4+Yk)6~&
zjsGPy1?DI~-uWWh4RT4+8!QB6PlhNpFdt2oLyjOO>KKN-CtfEssz&PRm>CBIBb;iH
z-U2SJY@#vhyOTrrU8|A-Dcdq~Z=m9_!!sKly8`NX^yomC7D9Ftvy+ehJ@5cVUXPKw
zZx64<$?@<J5&*5TQH77mWcA^P@DEOH)uu&MuQ`m9_O`;17>cC6WC&C;ZRli^0fbbi
z`&3pNQ0y}QKYBSp1d1hCQw1)CvbkqqB(cPdC!5I0A}ADW(JPSW+mRdMJ1V%Cbi&n|
zGMtFQ8~%inaKRZ*ex5h?BC!m98B#d#s+Zh--Nora783%f%dYimO_N8?EvY<_y*RS4
zbBw+;=n|5SoZjs&Z`w(fcquIed|be7VZRs90K^@>v#O?>9_3PR0KQ8%VLq3&*11K?
zuQU=x6GDPtBU@dBLRQ7KU~zE~Jp=Kj#|Z(hcBZ_*l!SX4Lsibq+`C=>NzM9&5I>70
zeXcnBd*?Opk34LjS$G5elT&_P8eg8P-wg7IR=-<r8CnArHz}%6%=8Tpg@CcG6C0f$
z8hFmn)aQU5=)~+z`)eONHGAKhX86lbmnaB6Ug4C23y;!0Y^}^F={9*?h`ur&#P!5r
z#?~#>H+b}cfOAJgGmt)~)dkEId{S)PtO148i=w&Dko$kfLmzkUog_zeG5(Q?;AI8`
z?3@^CPgb}4>16n~gKWzOTW>LZ2Cgz55@w(uP>+7eKuycSj2=>8y-NNyLo1SfPlh)i
z6ecJ4-1-TUg1sdIe1n&Fyrq$HP7#nr@X(#L5&_&ixmFn(5wOk?o{R^44FbG=4m+FJ
zgi<Y|I6|8V8kL~}HQ<nh3lZS?g0M2xRTGGgP%FI9$1EABiVuZ8TT9s$8E|F7t_OU{
zLt;O$8?NE@Cl@6E88RB6&2aEvB&0}TlTBy#<BAf_U@}yGI<6H&#76GTKuxa0@HrQW
z_JAQGeam&|89o;rHTOV<!dlA&b(FmuXrzpa9b!F-#yX~);Mzd;6|NEd_MWhH{CBA?
zkVp%@M!52^aDpLQ>rFta^yTqoeJEW8n{~a>QvFpc?eZ$JH!zq1iyK-=c{q2%IHz(c
zi7(KzFTO+ALQ+Tn8K6tNoOG(SFZqv?<2C$q%-_ktI=)?4@N$gKT8r(IcR7%pV$x8~
z54QA9oL@LyUiCv`H>VWxb%XCi3U~%K3<AJxX&UutdYOW9<-6M?8zc$W&Jo;-JoOiM
z%@4-`fF)G1^)MYf2et%eiW`A1)#D4ex>9m_19z%Wfh8)oO__%ak8<5^_u7|{s<5)X
zFXBczqUzK%9Uq}`8hdmE1vperV90Ih)3;&i8A>eZ+Ou)IlK_HP!ZE4298+??93Ve6
zjuYM7L#u!dNl9uRgtC|W6Xam5HS3&%_x}!-sE<JQhE3X6V82z30sh>_JR*r4YQRIZ
zDlRjc0%D&LHGTMc9#{&1_y^zz`JEwDP4tKQ10XH#;^XP<w(Q7Q!b;^e#SG~KY@2mk
zcdWXeG-l7&PADLf;&kSMy0zv%+^k2tB~948PXwt&skcU}(@rlPiOE=s-^>d4O|d9&
zGM>BJ^4jqh$?cP@mhtibp{4{X^<M;E2y8!7kWoJ)K6;+PabeBT1XfuR@@+s;r>)ON
zB<I6pgBfwYD@0U!*Skf9DOP){okxr=USlqQkcpgFw67lbS^Y^4l58#I_D<FpceC~4
zgkZfQ0=nz~JVW3*aKPz2r<=fCkKaNQAW&vG?d3tLUfBF<;?%M&{Y2X^jb>U=<B&p%
zM`3t|X^!&yh~(@cbwv<nCmvni$&R@ftEk;137=rj!6=Q7Lv{?XtL~h3hr;T0n(zvM
zyWYv>Vgd#g9!7u!zWF!T#I2VfY+v67qN;(MJniD)YkH!kn7$3oiF$>_`d;}}*)-8j
zZCXZtH~U6vQ^o6xm<pse37na<s*LkfrCLpm&uWn5CI#sbdk`XgKD2+aXnQ4dsA_u?
zVImF&CPM17GUwF-G>!(f|EWp?rjDnQM3$so!~QzvS(23z>eMxY*3|5X<}6W$g^F%3
z_i(%L5{zbaW33}LVSjFGyruQ$zJ_<OPLwm+M#TwLgwO-G1J*J#*IA{4`AinE&!L0o
z8BaRQ&cE7@PCE=hPew5nvj4SKP#Y|+M_6FTdNT*<_E;#jFmr&eV!cZJ=MZYRGL5qm
ztRy<=t5cqJyB+<`nU<P`JZq15T3y6+X<`{-y4{IPGG$q-yHPIU`1G6~17O%Vu}qNu
z)+_>H4m_9fw<f&++(W*}7qwg4^DU3qh_(<4``Es5z%Du><Tfm-`f8tU(qLFm%iFNC
zWhdP(p3j^%x186cTuI!>Mma@&avsnFt~1--;;}u$2n=?W;ygcA2@kOw#$fG;72<|p
zedzk)e+`Hm=f%5blA%z%aJ57#y=8_4ojpVTMw0^L5B6WHf`#M{Nrvi%^hLD=<Pzj_
z7CNu{#VB5KtZXZ{E5w8nDNZ^J+w7(mcyGLE9rko1_qBYo%E04rIt1WC9c$Z9TNVz>
z`y-#^q-FJA`{0|^7R3*}8g@TqKL`~KWfv7(w3(TZ>r`^8v9o($Z!cIDSz7#2QfK3J
zp{U69EQyFIo8fmlradUo09T-Cy9Z?v=ugis)vtJ}#gpJVz`3;?u{RcBWO%*xRtvZp
zW0iFDb7TXi>9af;p|Lq$`H02&ox1YND&9CuWV23BmaT1h)+?@7V>_2*_MD`LosSvQ
zd`BaC<q^*>?`NxBs4LgLDckmUEYu;%{o70lecewgFECfH+>y^AN+^NYcQ4D_YQ<w)
zR%$1|Ho5RLFK!>`)qjGR(ocJc#wK{t*e9-*)^)t}1+W48<I>%6?vJqRVF_6lW~RB_
zY}uT4)bySDDfQvC2nX!t7PHCaD$DOf0a|)2^}K~;>^YEQl_qw{H)UW=Q4eeFW#V8f
z#m(!9ZZ=;nx4K<fcaKl)_4Jg$B5>9AK3syvf3czL)+VqRD@)7WUu&zyX6a+=%wp*~
z?LpZ3*5{+`Rn0Y(4I+syHYy~Ad*4#(sf(~ia}z)!z#v(UYjZS0T_3B)SBDtm_Yr%4
z!+=5v+UG6=jZFEwe&{gUZM7|22eBGgx-m88D!wvnF1}hFra8*nQXN*_x-c)XvH$Y%
z<J#LIi^C4nZ8TdF)rp{E6ewllXg}?^2~L)_S1I!S<+&~d$oHu7q3F#7zu)r3m<KMM
ze-J8v<d^IKu^O$a(x`B2cKR!as;f2?2onI^9&)u5-z?h<00-UprquNYqte)uRUkRY
zYYSPirl@M?bgnWBde$Qh6;$2~-|y@<qso)WMQGWrVG<hcDC*q~bvFwT34Cq(m+h)n
zp2*ZaT3^gn@3Q6?Vc4(TY)$GaHQL|<9j@wiS2saIMfSeG?mk?v9v46O(FrF~7j{i-
zh<6X`F_n2lmTiEkLHsF@IulNIU1#7S#b>wF{@$EhBE>2zC?Y8XfuPnswq$iB%eBCy
zue+=OG3rQdu~T9HAzs<r5s?OL=vW<hYBlnKwB_wb2-Bk>W}T4Pv1FI4Twh~AAaBK|
z7+Wn@D!p$#;X=$XTd_$!#kKsf59-0Gbuib<)xPY^NWH?LJrl6pLT}b5ww6Ajp2-<B
z1swPdE+G=vYv!BsBK&KYB#0A+W=$vD${fH=wFn>YE}NO1ft6iu@Q-`W{c^r$;ZjG!
z+fz%D-bTls`<W+;s}b)X(53X#a>8#8?aA)Es~ZD-YXE39#I5TPQP31<O2id*9}~P@
zA;l|UK3PCq61ws^rYG7>57at0%4=$7{V)tfX3PsJ7wnhI9CWUI1GW-}xp_l!B(htc
zh@r@_LSZp+m-pYc6wTK`oH3TOrOIJS$Eh{*iuJa8YAZI?v$uVZCq|D38#cpyp91tI
zdgVepF6VN}ZL+X-G0re=FfMOE++)zRTMBl}QK5RlBJI#{FnhiebTlU?&lm5EIEi(u
zZJ<rO81})ohXe=!2IQ~bh(EJOl=socp)c1Rd@h<_iNHoaRc=Z5S*w{wL=vYhSJ+J_
zfMt5=I;*NDCmOB>f7Jk->~G%qXx6(I1&Jn=W8&{Inai3eV3sj~;0Zw|q>l;A&Jbjq
zAu@TGXq-n-x~F$RH0~&6U;`Rf)(xE(RsEjq%*6v_MrL{ThZ;YnJNTY+^LWaeP2vmB
z-9HB%oH_hg_|_N>Wztu_wd7tf{q-=c(9Y;6AdYvDQFJU1#UC){V1l|fD3vh_f${3(
z?M*aCBq<J2LeLX}>N!3AyG(TMbQ#`X^IE_*%N5bQGCnA}VF9Q4cN+|812?uf?6w{q
z+suv}y$v&)9IlP?W(LH1^N#bQ=)OZEmRz}I=NZp30q<R78@lZTdLF4wvg5Bbo(quN
z!8-H=qDyS1yz_}SvI^eR<k3;7_8zt@XQ8?l&R+&z?7R>mt<G${a%oHwkoIj~OB4^Y
z05l`WTJa<FTonNJeFK7rPNI!~4u)aW%p9VZy>t8Ooq4d#XNrhsT1+!(5maOQuqM7{
zfsE7(osc0iCg{W|)t9vX-38L&{wxQP=iDCFj$R@oSsjH|ltCcb5=@__yg3$Ut47Q1
zjzul(q6dsY^)s-mEv16xV=|6bv&=cIsTi^EtxKUwBKs4G&BCoPQ<am{H=}UC`0^WL
zAR)nDcNiXWn1tr9I9QFQsq@tcZW+y8g_y9^tT|4XSE}wPD1!>^SOd5ackK5SHFZq8
zl?dy2-@rE}R@~FLoHx=U?Y^n4L40zI1)`qRmn$1}<y8hAC?|n!k8K2OeBCnIcYK2P
z-InHC9jfCo-qy}H=TLwm@cD=uHQjS#?8J5hgU`>DWBa2c2~aAu9a4$1Vo8i|Sg@_I
z8tiv|r?I1E?Z+XKQS&KuvTcYY4x(Z45Ez+)2o?gf!6v~q(D@PiPf*ZogjaoIiOo$z
z0?omF4E%fOBcJ3^rJa57xGRkQu$H%gUQgSyF@_Daz6ZTke)z3QOs#9gg4-1)xh@gi
zDZ<>KDQN<^qjpFr<w>KlN>AK8V@|uhVM=0(fLhra#otR4JPLQEwFvhW%CJ0^^*f^7
z+lp^8If6S#@_R0<oa+Lza);g<mK?vM_ng{+P;Yq89$RqLuZ6<lNe+U<;P0F8VD*D*
z&U3-U$&#8VS$a&+{;u!wm*;H=6dA8qj0mzaFY%(pvj2N-tPEXkh0UmU-o{SNO0Hop
z8tBD0wij3&D!SQ?mXbBD2jaXAcq|(lG1G|!?)F`4-ZjfOJ^bH)=;3i$kiclW;9dky
zv@T`m-ScTfY*exP6iz@<lcN+()S=61V+heF*yN4Eg6DMTU?Hr#Lq`PW1=2QU9@_41
zh~29UpKgPywwO<b5T`kanS<&jpvIGsO0^DcUm?2sGUH1e{SgHUS0*3IdU0QmID)*G
zbe<a&PKpt2qY0aOo?HhhYuK`ZyOv>%ocr$Yrp7FJPmir#6~S;hkIAAFxqUq&M1Foc
z*X`H_>8<SbyErr0@bgWt(dhAO`YAiEMi(<D%%-UG-j6`Fo$rqD_q_*f&Sl=yHYw8z
zl1}XpC`uF34#zFj=<6{@yBmUfMkoQsvN>V&81_Aj#0n8SJDRStR6Uj(ha;m@|6Tu`
zumZp$ubH176OKru+Dd%V0`J|yXusHlPd@iq9bNi*<cF{st&802b7M1goXljjcqj&s
z`75RziDiI;4qI)TL2Freogm8zl4<NwJKBX9^tf`0n0c3EbGPxO(N_CQEq%I;V)2xD
z(AKqyh!l;6Es#Uf`qT(-EHY0Lb#ohC4SKF7{9~_ueF!vSo`s6=*~#h}Z8}oj4s!rI
z-=-txn>|8z4>l(gwpa9E(%|Yx3*24tupRnn=-0e<`D+drX+AXVhs}Bfk2iP@cBMV}
zCMJQ?HVOz$y9~NetN{{>XF#%aVuAX#xF$@I*vR;jPd{!xcMh&H1qv>0I2-pe>^j2<
z8*}#po?cS-q(grKdVriXEXFhsEs|4GZ|l8V-8gLy$`fvz8=f7U>ot<{zFe!Da(z1w
zGNcP@gEN$O5)A<qcBT<WtY~=AXRw+~@^i^`Z#G$*xup7DL4J!xBorYIc3c`!vUQW0
zSl2TPcWW9L-SX%KzDlyy?TyU{-?t+3Z}S|+m{v+rc5OahQET<4*6yG1E<TinElF&M
zi2T^=SgEqTa5z0XZN>E1b2%eWE$Y>7ie+^YGu66*^Ux!OhppJyf~!NdN8@RYo#3tL
zGyy|svTbS2T&LEwM;EU`61Bqtdci2XTGe&#y+x{|Cley8@{*bVnVL0S{fO6etD528
zw~4>}dVnpzSw^Z<jJn}Y8(V*t)H+1!0Kz~RIEEFgR1b;o*2RvQk$)yh%tA5>q=s|i
zGw*JSu)8^puTZ+GJY!FzIT8)nxrSFruSaOz*sYAnc6pNwU7138cj-<HHT9-J$6<xW
zMW8h9G~voaEZn4;_;5mv)n_N=Sfti>w?{^oE>XB)!_Tm*SA3VnT*d&A^Od*F_MVvA
z!IX>MijE!x67>MQv@=cfF&g##^@d$4Ae4LPR^YS2wBk{<KPUxh%RT_E;iK*KLGxM6
zD`jq0`yfzSoBMAq#9I!QBkWV4CrbB-qh%y~JFThfV5&Fena^@yVJL911{@HOT!_{8
z2%NLEvIhMMi2x=2G*H0BtGM9Y&O9sxPo~oiY}LM~GyiROB_XS6p}O#JQlOckl}#X=
zR_j~SP#%BIYFGlXRZ-@+i*bQScI(5cB_~}eXh3l${WR9_aDPYFvZoLPMoLPF$`@Cn
z(h*<nogoV|V)jj?O}=|y=>;|*8yEAw+RlLA?w8X~TGL_2==lMCSCd}T@g8@wrQ2b*
zSZ<PpL;c!kgxYjSG@xq>WhVO4o1h@M^#i`-d-PaKO7pnd7t0GsslcDaG5xbrB4Hl6
z+Nx5C8Zmi|-@Iw69gf>qRurtjZDKQaxJkZ+#%(MEh-*LUeqUCV>ggL29(~;G3+T`=
zSLHT=ChvSFV|nT0PoTTM0hurp0uWd7OHTHkwxdt7GbF~#3XPO?Qjy4g`xnp0^VYH*
zLi7|PgJ0%jJl>7b$np3;h;#_oS=C-!_?bUf&26an<jsiU++giwIp#syG}n==n9X!K
z(kBBExmU5#m|V5leTN@Tf<XGu8G<&;5HdrxR&yGpA=O7q&nlX*2EM>Mpr%F*8=C^u
zqp7IlZwf~<Pr#elyn5{%RW+{FrL?0JEqKbtlz_;l$J9dee3@m{{x=0l$(3zbFeSv_
zbKTk3T$h`|X?1h0f;&Ond<hSMDdr0SpA-znNKs2F0NH$9d!_wOor7Z0{ku5qu#WL5
z;)Y(RtB`els%cYM(ZSWxY#l{<%ON(k437o(7vx4(WntkZW)Iu)RZ}z%;P&Pv^Gygz
zXh7(umTKl^vrFz+P$PH$8s?F2#y}p09esv(8``3`zjOm@Kqm$i_2UvCK0hVE33%Zj
zlKEboc5B!l1C=#OJu)5gpL=xCH=_2iqe>xlwE>1IvFSQb(eN3V*51LBosH`l@b(Jz
z8b_Y^9)`ig92%sjRY@alSb)3X7l7AauxK<?uVHR@a4@9TYlN?Bo^IE&B=<5dS@^HZ
z!zi37+cE2;eL@MBBj#d)tZW)e5Bz;tCts<zj_e1_X<%cFxVXje?ntc_5lRZR_R+|i
zG$TQ~-de}CGIjP~9c;k|#bEKxBB)b}DZS|(JFy-kv2Nz0wTLN)+m%T5wZ^+tdTI|>
z`CNK2ts|0}ap0wJiBd=jQA-IiR%yF7;)$J*YT4pO3G-FrX>7vd`mX(-^T(Z7R&Cy>
zU6Zw`{XDqO?xd~pW=MPwro9D_3hFUuZV~5h$b%O-dK0S-f6yPT>@-q09d!2c<t1-L
zK5jHtEOwvToDQ~xywNp_|LrV)-Oi=^pA)$Nc^L2;1Cko+Tf@5Bf`O4YTzu67*IZzt
z$3qToqh$cmLaX2+XJj-UpF32m&G|+(Hg8lLm1fF~ST6M|OS9&;8f-czS+AWkBLq!(
z3zFjwi5P<99YI8gg;FO}9xF3;%*!oG?56EC=dqOK)R<AdD%s`FHB}cL0<XkP!-MMS
zY}aehmL;jH`(?(B_r^&uLW5f2y&kPv7It2nBh|2J@VaxF5N`U;WueIdE(1~b8i+{D
zlRzG84n8hmp{uq({ISCL*y7523LH%b5|YSB7;rYl#gy+%i!kQP>W4pl_x?6e!7TAH
zU&nT{T>JdQKJWX#Q%_g{r>qZ_l|t4o*4QAnSzPC2No8Hnry)m$6mV3z%8UKA`uss}
zj#fSa`=o6W(AFlB&~^U9Xl*d7c2kE;lgi`3&=6Q5#O_@YZ#LYFuTL@usHT|atmfO>
zb?6a}Zm}86edoTgPQ(|n#lQ{Iqs318iJ$gzymbd#-0x?xjZ$08*B2#%%RIZ2n8hAP
z@RWHCJ9L3zt2$*#;_EJDwHpv2)hO59gkeOB>*(}T;Ifi>c(=omT}cO*0FD8&eO^Kd
zIP1@w4iMFG2NRwM>@LYlyL1k0^|wS_wR?TZN`BU1zt6#HSIFgI=l(&RbJWq6RUwC|
zV~r{L(CDV_usEc}NH42)vo4X-MXK&_ZDTI0qg}(SL{hU0a1X^Bb|lMec<t;*E=))r
zZ|8MgoY{1M0~bN>G}z@4wS0Ae`0uZ~U;UWWb<TJ1>h?aj?*xrw^AAL-ZkD{zEV!r^
zHDcJ!eK_FEvhh&d${<CV6}ADPPh=83?h)U~$V;GT_)alIZuauDLtl8w?IrNVsb4Q)
z9~j!UrKNrD#!WmMAt#=VSc~Mj6wI0iA)hRT=lVdLhXg@vQd>n0Q?TyrlxD`GC!eY3
zRzhb>$?h|yv1fi;pItCm4~<FhKy8G4W%g;&TisNG)vRH~-N8ckX|ozBQ`H#7nYu4p
zKyd{lHJrb@kI;eU!|(VkGR@T=eWz~}tfoJ3-F_LkF6DW7g}V%4z><+4Q4P4RE+zuT
zk*6tAhqpJ-9zQOn6@juA-J87Q_gnSs`GsaZlJ@7%*(XlG&>6(<#X~GiKWb8)U00Xo
zQ9-4|YW~dH)_>jREy=}`7cW41`1xlN7Wish--iCE+LFmJJgzP>ZjdPkji|faa*aw4
zeoZ+s08{yBW9zD8c-S4u1+7*8tWq;A<5dNdvO41d1<5%^i*v|C)I?h0e%(;ey)@>D
zpI<2;GvI|e?ulo;r4Ax+GUiG=R8zP<ZyR`{u^Y1Pyw^3;D15Ssh<_ima%Ioho7wgG
z1O|FAA8rY$Fl}>wqSwBGyP61e5y-IXz=)EDAUxl~slPb!t8_m&VXM?W-MCr$SNX-=
zLssH(tSyp$4?0Uail#B0AKz{#C(vom#O$^(f~Ovsu<Fu-Ms?c;yxvEEHNSia4;tWI
z(`;1-_Vcz<q&JJZk<8I*Pj5Jb-r59&pYCz}SgEF!UU@zw+htU1a%jeVBbzjYbaq!_
z|LOL+5Xc&R?9ikrzJM3KOAUM0I93#`0R{xrzFWdWWom98j$Vm;qaia(cixT#VRnnU
z^N!Yw%ohh|`Q6p%-m#}0AB9y7#_swc7tQxCh6_Z@xV`f1`zna7<9=YIC#F;M<zL`B
zV7>AotPyQdC9&2nO&JRG*}(W52igw%Qka!q=o<q&@3iQceKq~2+qSb`jR4<8y48*l
zynnJZ7HTx8ZWIO&FDl3GKy4;R+=s!n#Y4tz@mQv8;v7yr>cSFriq>_b@ds2BFWGq|
zS9cD((MYL+AM%|*+MG#ruY%NOM!TuzakoERFqNz1m)4UDHm}V*yl3XWs7R4&g;NYI
zzkGRUD#o<;Tz~)Ca_U;hc)e7<vz*#REq>sKIGWzCaejeaO<vs_mAuUgmcmSzPi@qe
z?_-ow#5G3_Eos;QVLj=r9F3djz&#7j7I{e>(j0b5o$u}8dbFKz^Xf?yx@x+&N^jG4
zQblGH%`svEMIaG_#!d~jd+w!`MS@QdG?f`@GQfPlYvSC((j=ks1T&{|4BJ8vi*<vJ
zH<!9BxNEItE10iucz!k*cybx$i*SVJr5-Cfe{V#mjRTCkfxP@oX2O4tHy83cW$2{{
zY`ZOw-CVQEk(J%UCI;L&)+{AO7(-300NXSr7&avmS{kLUC=~=*A9Nh5?p8y9%l%hP
zwqN+}77@P>RF_(1^a{+Re-qgJ7!{Mz!>65l@OL%4X~!W4z3Pp+DWj65P?gb{D%WHC
z>=Ag6_xhkE%n>cM(9~5liy3^3`KHQRSB|W9Sy@O4uTET&+#Sc=*;M0DW0``x8KV`a
zvaDaHDgiFH{3ms)>*OZ*>rCmd9{5JK20&XBbi8*1?QD~wlP=-BP38OkKK@0^B?u7a
zjT>XD$*`ceM5UTam}-+ArzvbWo{;|9I|jdEH#fHAdJ<1GrE96{q4UVcLf^Dcm2#%b
zM_XZJa~9Y^pQimUDbem`wgY7|nsm=9c5SLXC#RHe8XkR~M;|(T18ztEshzQgsW?u1
z(aBkzAWJMUB=Fv+X$5n9&%wJ}>b(Ql71^I#`i?t;RdCn$JEY6m7_IM|mNn8u+>DfR
z0B$SnnG6vKr2sA)I=dGkT^w)t8k<C>8)?ST4PrUOk-Yl;)qN|?jn?nm<RKh8-XNsi
z4ap`h`d~Rp*UIO%bnC(}eHqsRk%#jo2_Bcibn1+2o7FG^VM?ecwvZ`au;_MQ@PYS!
zlCRSidr(zaY=(C)Di%|Ed1xK5yQUe_d3^UXFyqhgnBrx5|FsEwYkRnJ)WIZfVeuL9
zxJ{2FHs>)>$NTwr#wYiw2H58VtdWyNfI-+w{U5qgV9vP}sU+=lumR+T3``t37R|AY
z6cj?b^ah~`pB#N@0$V@&zO9F3bMCxjdPrMNv6Y7s1eHV&gmv3@HeTHyIYLwNLaobk
zri9pLfh>TK$f)LApBVSyn#!ie9)o!HHwv-Mg?ocFMd(W{CM?U%%q$s{TjWK@F#2g*
zNmM~^VwPINbi*7uw~+;=s_Zb&v)g9o`MPr%Hff&NSKB3T{axFPGv?bcN7+Fgz&v~r
zKi==<1vcEuo;2|OVTF`=DP|5uy&5qA{xE*<@@sdFx}^1ld76gy{5+jgjOULc9~bmh
zxq^6H1o<PPVeX5-J+LP!nUZr>fVux{k^9SUj$lEkCiO|^6?>akG<F3|NIP26?O8d)
zyBRTL@g;n=-p?4+O^G1DwP)VVzWI&TjUQdGi^Z=e6`Cdy1+E4m>-~8@nkfqpk7s8f
zpA1z~iD`WQ^6NCl4>2I`yB&0H^^-wKRycb}MUDZYxN78)qjpQ9LOImBs#J6}f5JMY
zO2-g*&!!FT%RPF4maj?Q>m>vCbhj%xMLDbYsP-H#Zu>1TIm|Td!xtl$#BYk~rdb;Y
zS(nF;lx${RJ!0Xh<vVwDFFqe~S-(4MH)^Uj-|nHNq-Bz*!#EXp=S#f99N%M~t&!Kr
zuOT}8zJ1@+2CP2Gcpjpv%__PDIlX9B1>$G#Y&7jW{!zexpL(xHedHaj8}+ry{5A2g
z`L*@0A6PoZ#I9G}n}a<CE;aqk<ti2b>Ek*vuUODu_=H2+(&}&LnHtszCa4c$Z1V3+
z6w(sJPL}$5>i1<Y-j)7YMuvX8+a3kzyR8LgsbeMtqt_!!mgpxpq5Ct^eejhrP)~iA
z7!oP;%r84mDeCE<;F~*NU)wey%dH&hZH={k%kK$7QS<h_BSq-3G>yxAr6X?jSJSR1
zYOIMmcj_ngySst=A%&M|MIWHY=IcLt=o;>}UB|*a_S7A|X%ROVYLC`Y?>PEArDT6C
zxQ6JFI;sK|f_KQ|db1SYB>KHl|7r6budQi%V(vo5UiyJLO@RU>bJ7qc>ePy@W{8N`
zQ+^6O0ut#@VOIu1I8g^9J&lI1MDNN6g($t6C|D{M9TRcTcwf1+Z~Df%1_OCDr{McO
z{gA&mUs$WNSrzsMnKXMVpYzCdzH)Z8zeZfi)39uOewd{_v)4i~u*F6mMqfUFEPk=h
zzs3iKt--Kdauoipn?rUJ8{}SKl$Btd?NyuJ<oBVAud6MTlAv~G>13DCPpX{t`|A?Z
zbf*A7tF%t(Acf(>TL-yz6Te#LrS^fG&SZ}5NnyRh;sb`#8iF*EZ%v=3YPVB-W|qb0
zGD>z36o)lU2OD<CU1{~8ucX4yO8UuKgzuY;VTKdvZ@k?*=WlIWbi=hPRYHA-ml4An
zjV&E<H|TN^GDIHs&zH#7MjtqRe%)mB43I0QcjBv7`{DV%p>_6s<U8r&IptbaeExwb
zEfiFD7x@|SD0CrL%FhZL?=l&fidMB6g&Or|JwhdbP3v73GC>E^g|2N$U}w&o&Rynm
z+jldBedc2~LL0W-xxW=1r)X-!vpL+Nj3M<gkjil5mO|Mv@-n$tXkTC@erI;QtPF2~
zzS&|hmfhL3B#t6M5|Zoez861Gp!IrZZM<=j`oSfVeCekXl{eSV_0+i{=EM^lRn7~1
zP!_?Ha1VDD${HRVa5U3@OX6m1i?nYqPf^)f9hRk{61|Z)HyfXxkgp;9*}wG-RlI*o
z#1Rrxgl^@pyUPk2fUcg0Ij-}(ohVDMJ49}lfl3dduo8;uh7!H7a;kOjF8QsWc4ZyZ
z>J(CLl~gDj+8IiHDVAWoDv0Qq1EnbKj)Ub&)S?r1k^5V-9&puhyhBOLLZVh`Gy|hv
ze4|fYfs3tvOUlw==eovSiry-CO@x1Dbz5bo6}I=2n<r{mZ>G>L?It4$GZ1_DFkm0W
z1Z0+_hMT+Yw$Ss8DfzfoYbH%J5VgqhHQgo9;;d5lF)WkONP30O!9h{kWtgUhJ*Rr?
zgryng;+Q86u=PoH(@%;nHZ{v?y`gc3sH;+PGF})<3x!mLZQFN!FxB9yrGXk9AAIhi
z4ccg|z4xfJ6H`1Q7v5ycVl`R1S9RgYe=dX@7PD!3%utcX$)#Ppo$&1AbZSn77U%VE
z{RBiUG_=L+{INA<e+Oyz-^ORUZs?y9ru!HWAjjTEWQ!ixmP*lmo3Nj#^_R-XNA9#<
zzM(Lbc{qdgs;0OOB4~L&qI+Eab0lYQF<ez+`bgB)aAx1pgnz@!vBMj&tB&w#R!v{6
zgX{OSHPK>=5#G@d!C5h=$1q%{j?)m-HY17}n{q{|+;f0i*uQIxpyIN*y8eUt`lLG$
zp;|U6dsV{BEXrB3Q1-9CL`Rh#+}G|mRFF{3Bj0_eQJiNS^%S%wb<PmDo>Ga2iv>96
zWRgzAec+=@(6qRqKhyJgsBaC9n|M`uk%b&$O29)|Fw@&B(^<pT8M7m+VSmpx_+>XG
zyT2dKF}-5{rPK`-SF!TJZQSYD%?=jUdEWOvgw-}h*zdjlD7E6s(g%#od*41+*C6Mr
zQePwNngClh5?(nBrH~w{Kc5s8I4kws^k&s|deq#Jf94GjbpP(6gAh!YRqVxb;npA@
zY;ndjUk`T?8@_4_*;Ix;uTds>FqYKuk+3Xq?0hafmdY)_e{kd$A;=pYL+;prZ20QN
zbuC6?$j#>VH}4}*D1<1q|CI@cO;%Osr0z!fhec5}Qa<@RUm7O2M#=`CO^%q|N65vF
zx_-IfVD!xXKB6^D;`K-sB{%zx9rR(o(|wde_u%0bZ};4poP2jyYneyKZ>Q|-#vM1V
zgcNSiWGGJQJ<!?%j#0Y5ju|-ENgL7-=Fc_Be-X7f$=ofw2;A3(dU1Vl{Ne^t&2F=J
zN=c4k5%5qL4j$UHD|BLBGhKS{g()*U(EWXB8WZb|12?<k<x2*Qu6w(kadIez4bd5>
z!g<4)^e*T#(+w{JwZQZudDq9u^_NO!(jEI8x7Q=wX&sUp?RMh&tt$R5Y6Hz2K1o)O
zi7p*`?6u1QnbOygaa@1f;<VWxaib79RrNaA@$34VUe8oTgz|7UGt>3x5FOprnzDOB
zBi5e4hvoFWY<YkCydtaM><2oVpRH_e;Yo{!mCnH*{7qC^B9ACJSugPFIb8k`Hrvo9
z(`juRwJ@h;)Mi-igTDHxZM4a5!)r!x0_^KqB&K%R%yoIp5ccE=<#lc^#NSD1=<KZh
zhNxTDLEAvbvgGQ|F$s=YPls&|S}a)z74@oI7D_bx<S`wk(=u-YPLo4krk{y2o(#AL
zK`m$zbqhFWx^NROY&xE7)UHeRXcT;yCPu^7IIJ5qNo}kD;!){H{`|AfolUN8rI>*8
z!86bGwZ$5-i=0bWDjT-#3|=>;L%m;1b5Q4T&S1VwJV%`mbES5C)scP}GLcG);IXg^
z9loifu94KZ&DG${y>wM}BRO{PK9L-Re2&fZ$bhvNVf3Ocb#XRqxp*1Q`&v*tzr>7X
z;P1Q9>wroz+axxYWxc2?tjfR`c%eG!&l&WAfivwEDjucc_cI~NDBj3QQSrY6m@*G%
z#y-yle}L*=zV^HnwH(R+&T{f&9kRmRmiMiDs=Fv_=_Tu|lB`A7F~`{q!A@FlPTTvn
zUUz#g8;q{zKc~4KA7CfjDR0zejU|El450VPLTE*y(etgodD&DDDU(`v*40Z+hBml<
zn%!9$eiNPMhvX!Zi+=h<4ftpx3~{5cfk)I!IAjI+`oiUIrT^F7mxn{$whc#=B~(Nu
zTh^>8aoa_bWUDM$B3VPSj%_SSijbY`B+9<W7(|wkeP2e|2E&YP3^RuBqUU*T?)QDa
z@89qEj;}wa;qbeD*L9xfb?%Es)=-K04J*;~9Iv!*ijk%Tj4hmkcgl|OhSkXH-z#~p
zMMU6VzN|=d-C1uaAj8I$c3nIjX9(KMHjsY%>i@C45rz>L>D<bb4~u=>xvUm^0?f|y
zsW9r4+VWR?1m)_is`RN_k?f)eBPP8!A}f=>id0>&HCF<A5f3|_Z=C`*Gq*jkKI-{+
z{C?qwJB!bdXCJBY8NAaKPOv8A&e<5Bzr?_)+@w6n!$Nra(Mk0VrW#@6AnRC}k4ns}
zEXc75oyZWrMl<S`wVL>d%l@f8fk@fZIU(KJ!(_*%D}!6_3ntv*d+kaC7nEFVn~HtR
zI*5*6Uj`(m@!ycxWrCIldoHCi4P!}s5e?N@q53OU_U%Hg)Kn#T_aci=SW0abYK0mG
zmGf$N*g99a<{4=v|D@r9yPhb|G%{ODi>i#|NGzDkZ%(EK4zcw%v8sg(D=G^;!(7-U
zuLWAP72FIANny0+PigI72^L}*iW&21Y7o%lLfi7N=T^o8XfS44HR><`!dJfc<(Vjn
zbX==}qV~;_v(hJ;c(ffOaHgJ77LG%!1(@VPw}+8pG{LTL>}Xkq$P)1t;@a+3KP+6g
zV0_r&jJc6;)xzX!`Sic6-Q`D+R-D1XBhn16JdP%3)PEnRY$8=geJI*HWml-m_knEM
z2{6-k%HkSUvO9c(joRjHZItFW()9}_gv!U+>bH(3eO>M~&F^)ce$M&4ipxKlTeF>;
zJl{y4dFn)Ng->+MK$ibUT*2Tc@oKvC;xFcnKTo!XhfkKQjPSe!h58rwl4`aa9tvDG
z^szD?nJ!YNPF<IY7>xO%s(uX&R~}tS0{5qVge0?y99$c6Djp|6pb)g23<m=dmy9ll
zP&O%Y8oFQRE8u5h@<_B>Rf}}3K}kSH*-)<svX`kx^&GZ~pl@I_xdJ&U!0K4CgBPuS
z2=Hn}sL;gOYItaDEL_K1)lFAKm!H6%GUWajQOwoaKCCznE=a!m|3C`+Hzs{*cDMct
ze~(U$Wrh4BEyjVnm|8qb4a&x>a6B_Hk}T43asBzEH2hjo(yxx*k7j*EL%$Q-=A`q+
z#2ORUco|IGcaCqP$_p7{Bsa?PEmmGb+rdb{LbHMNC25J^XNAiL8lV?wQF~YeWKSPn
zYw=?gGXYyzr}>;dOTJ_TIu&jeue97jY+dorpW-%9QIq}LGPyeyo4;dDV9*ig_X#oR
zs*@dy^{pLD5172d_c_Pa1=Yh&hrquUG}#QCpLi=}tyuhEh_<91=runEZL8L6d`DP<
zXQ!$LF99uKV>4b?%zp2}uyB-?7-jP;poLg^GY?q`(EBGSvsEtofFI|$R7qT5bZIhQ
zxHytJJ&|2Bh{xR!q`bhm<$=n-&AyN?^32o1bXz|APGW``jOD6PZ8^a+&;#*ms*kQ#
zfKKveHst)03>ES!Z@Rt+AVg2e$kok&uOcb*StUR(5%p?$#mLPB#yX6@yHUbxiag83
zkQp8-Q4mbeq&%cMmIX8>hkN=qGUqE&d>!CpYa-MHJ5^=h`_8*bT{8<O5m6D+`QF$j
zX3fs`+AvzTsL&^_1MNFyQ*uBe_bQ!52NA8_zqk?1=?APncJPs}`%6Txmg4~;QN(bm
zCf2SBHR}31jgUz=Cr-AUE4ej9$&V7rOl=&*#fBQjL6XxJ7TAQMrA55*%_C~*9UFxP
z-|u#AebS(wn6;gdQvL+3N&C2@&1A=Wd~o?U!Zk<l%c9Qy_{UdQR!^a|?%mVvq52Lz
zS6qDd&+94J-`}Zc)*G34@s{8EcZ0U*2YGR70h9=GGOGw%Gut-sTL_gp_%tnCJoMtm
zCw@5W>JsfO3`_8L=(k8Nh5+M5A7Ous2{tpg)BD!lF%q&bf~GgkoL?sKOMD&~7?BIq
zVPH5Vk83se*zE~d)1~A$yx*E|rd*xoSmp4nt;O+gX`YR9L`>6#weT^X*Fv$g*|e^p
z4D;7=<Td%NCJFQJ;wAzKsmKd9#-Bg*gBeZti`&}haXNF&1_#gs9|~tlLL8Tjp{j#F
z6-RQ+Q{{K_#aP8M(}-9H&YXKU=E^^1i+wr}@3%GcZnE4=SkJM--nIaJJh<yOo?Q+7
z7al0swZcj1O%kqz?`_LIs@Mq8hYfYxoEt$|ahhjp+cxuK7<+NNQjn`saW&OXrG0T>
z<wnB;MXM71HIX*tv8sm3ilX`*_z=hy$5g!ln?JxffCxW*4rs9z(*gwwBg)IQl##Nj
zS~7Zx;@=4HQ+QRMu^qa$GbtY_k($h^eb;=QWiVnH$9h?o;1VVnHxvlOm*>ofq32Vz
zbKAw@e?PcO{v}eYgHp-Bk&Uvch}JY=?ww$zfX?#bLwnu+fuF!L38C=n3aLERNAdkk
zyfma%(0G(@d9*ynho~D`N@a1nHTf7(1`_n{gdY1<t4<jkJEJ-8)Jo^tF2X~t<GJkb
zq5hwI5H5%k_nVpb+%2l_C5Y)Ub{J2gZ&jwyl+4vXzGnXYS?_0CLWA^KxF&H$V=QH?
zswyRJY`YrPL9fQ}v+|tbizHps1edsL&OsSSeR8x?Xq=&Mac!3UWi++SwT?Vz|AjuQ
zDooz;aB=1anuRF$Q7YRM=Ok!K{AyS!+*@uNcL_tO?(mb;P5V9uA)EEIg*HFoCA6#9
z4?Dd})Z2~CuR81C%_MFQvqR2(hicgM4iC@EMDa>1S(3Idz3)Ctc8%NgZES`8*qyyy
z==A~divbGdWb?>@$d4(4)MD86icXv)(;M?`{ho?8^1M`Mj)zekVCp^fr_&1TFw)pJ
zHrDfeaL)*>4s=9@g*Iv5r=Kn&e=Q>5AubDwilS}779;11e8zM2G;OQ_*O=f`ZqFbS
zOmAfg*1o$M8=>_M8C`YXZT3su6Y<IBC+)=b?RN@BB!nkmZoH9b+pKol=hP_cAJeOu
zKUcx7R+IQ*9QM5rlA5~ua|p*fQ{J=1T<P(Ja>l|10+li*UTU^ytj``;=*arG-YVyS
zjrkt94aozFnLG7oMaR?QGXrivP$SD@!!7hZka7W%ob>xUqkSHVcyc*iUsmmzVww)c
z%5j(6zdG`Lc#%I7Mbu)&*1*pgN-WLk4Zc=9;^}6=4XR-eYA{fZ1$s7Eo$(%rpMo3E
z&Mk4n-6dN9D8Ff&wk)GNQv9$_&jP_NUdC4ol!q<d*}GttBX4xitQ+^2Q25V&52<LG
zK3uLM{+_8ZBUU)9F!-8c+O2w@_&zIj=PX}Jy6cO#>e1G}PtHq8rY+r0Qvf4*A$%uD
zMN5UrKHxq2q#20PmY8-5slqTRhl(}v%|Vnb8?LjfL5#9V@KWHGebKJPQ_Kg14`Z3k
zmpxC^lZ*a>RASr=($Ww<x1VP8HuR(x<br21F`E4_h6|@#GyS;D?(og6&i7c-7y0Cc
z%cy%I>}KulTo?6`3-6m`)b?+ZGVc946Dh^_bL{Eo^@5!|7>lGCRQcw%`Fb~<NWD|z
z4un!i!gZ4*NV6KujjMCn{nI&p1EOOKJ<q)tA+&6AGHCv2B~XRN)g7(1G{eOJE6<R%
z(nG1%Bgor0HBk#W6}8afTc<!jBWO!)%H}X$6nx#)<_68T8b9=(jwYcSeNp99F<$VU
z_Dg^V#VtyJuV~u#MP)x}2Gs1G5;kqH50!<YyvJl;db*Z(B<(I)XX$;B(j@ya1!P!=
zO4euf!7k7uS2ISys{JkDB{4n`N}|RmF6I*b8189a_pR)-|1lD@)+vQ2x@a=NGkq|K
z$ii?j6g{dLa7K#+)uK(QEbU9neDZEZ1Mr%dnQEHykW##&>_~<w#0nd?!Dbu$;!T)H
z*XM*@Akx3~S7mbVobhli&Rn{c%NT#%%HJ8uNUYI;ywojPcSAJbJo>(Y!IkFU7)UA1
z^?*|07EU7+B4SF6mu8qw)V}5gNNS-kJI-Z(UjnPVVQe~6<Z~%T>)iv*l(qxg>=cT8
zwxs>W<AMZ7=9;b<tW`(;J4kKkdO>DtzTBR-akHO7v>)+mi!?)Kw0<mv1f~?>lY-+^
zC1e6M;`RZgB8}M1AWkD5B^Pfc3wU)^mK%*@4MxTD>#r^NSj<Ia-v~q9P4XV2L#gcU
zT4LRiC>h_>a{SJ^J+zs^T&EtrwAlYSYGdgKr)H^VlC@qLpnXL8`Q`7ucGUWpU%xV|
z2&c~vVFXk~sXSF{o$I6qX1URuNs>}9P`=j^ocg7dOf^i`&BcJ)I96&{aEzD~)_sv|
zDIyyP=LKsp9IwQbiiE60;Hv$oAW|{(9AGZ6mN&66On~}e5DdR@wTo~aDfj8tv;!Kp
zT=~`gahP;4yh=$_{E_ud&?!!A<dLaf@`>`+*pXpF9_tjLa05$=^Z(H_fP}-4x&?mm
zm2d2fYn*E_o-`>KOa<c$4M3W420m7(w77CG&7(r*SwhT{cmENKehP5v(>NU@1sIAM
z##z3vv;1519wSZI*Oa!EZXG|fFZ&O~9r*CKq&z^H-QbhY%Wa5UHF)1B)t;4Y0>o#y
zEXLyS+P3FbQUr4S_nad_kZ{PZ({5KqO6O2|)}%$8uBi>-0wgH`!q;s7f~MVr@E&52
z_-oJZ=UOLSVnxZjL$9>XCq+10+*t7ycH%$_Om)+7F{s^%_dmQ-ivDcCq3|*VOXm*l
zpMu?#)TRkYX7#_?pYqZd3|&VJbi|5QI>sidkNx{N;9#w}3RUaNPVKIB<?m?G#&1({
z4JQ(WHuMbqEqW&aGdw0bReOxVlH+gSyiZZ6`dTpVWdF!^-vs<L#Ps>X-w@hgG8*jX
z9t-WofFs*{Xw06!f8&PJ`61+vp>a$WRmtK6`LX9)*XM4gmfNY-!J0<f&;PxYe=I}M
z`m8~yp_7U0*uEx$6SxlH8~m$4z;Vsqgpf|lShy`eh95M^l=DIrI~@UAD5=}7QLdvJ
z^MRYrZh^CX^AJ=4A}<0&>hsw99C-3IJpO~>{nxWk{{h`~pI;MmP}hMk7G2cpo5nv4
z;=9NWgIG+hz7uYLNjE{L(($3Bu_YuWFh<SGXCHKKQCg=?bE2sgo-iJIVu4cslGSvP
z(1;VWN=%;x_98G2PD!tIs2n0O60*g6fR7}zXt|yI8y4W(${*6(xM{VWX<6r5v{bQI
zD!gvh+72CbJ%0$J*2|=b^|`clyY<!BJ`Y5f>R>7UnXHCD(Y<Q6@$~b;a~PG~uIf*f
zlTF)~YH!3IBKvMnvKnO1P2>pAZ=vk<n$)N~E9`)0(m@bmP$hEt5D@@!5aN2jum3>q
z@bR3>lsR-t3ii{Qe^4#6$0%a)$ie>2MEdYlXa>pO*NdV*!4KE=08^l2TU*%~8-K7p
zF!(m0=?&qfaeT<%l%sHi{L8t?z<~!pD|GMc9q5C1!%m<MZ5!-GlHm?G-`#y<@BHjh
zM7#gNW#bMO8fZWIN@yTxZqTL5`13Oe?`w<N_Lx%a&TIoDn)~qH3jLCqP^^np6*%va
zPI$nkoE<!^<k1IkaT8qFt^;=AXrwnaGggZQYVRN$5d~xsYP)?K_51oEFy!B??+kG@
z0kH@9DWD)%K2<+TWO~3g;OzR<T5$gvg=7lBM<uu9_I9WjM$4%SuYHMn+V-4`x_c<V
zI6jfZKaiS9b*#cZOD(Q*#O5KDkg5O(oX0nO`=T6!5YT{Mz~<PGNf|Utvy{`eAEpIY
z(pp$^@aA5wg_|6UxUd1Q65D}y&CC**Ort9o;|f3ej!2C@#JGeecFr9<DMK3f*fZ;1
z%1%U*+bCtp=rf{>TKuiFTC#Z*)>Q^&?*KOGyge(T8S{Lix-F0?<qVPG?o@4kqo$(h
z`@jT>{<?Z56Yt+i9X^7XT4~i-Aw%KZLWGI<sCB%{e7Bqn64_<KPS0B_@*vxX-?l(f
zp>`Mht#iQDSgBn?T)Y~q`9bWFCw7qoRVc5I$6=TaQphPj3a#|0JTM0#&fZKF$Rki}
zLMovmPiOwbl9ud~dLl;@50vk^WrvH;$9a)}6ZryA!X~AiBvA(J=K@pw4cm$u;wD*-
zV^@l-IUIZ9hk<SWnbE~I!gf$EP!Etb7VkGudcH^@E(RsoDeBV-X>FvXR$URAa@%5M
zgEs>P9nrM&_e;DN`)xEXZPyCw7j?^~UXXD&)tT;(q`7w7-;!#6yD}xjciGmq8p(Ha
zfQTQ`YYSp~rnu^kGm_0YwSM}J8t7fi6M~2-|8@eUM%esaM<BPCUw}wLW$#3I*ig?q
zKrwKfzxU261Jdhj_9@ny=cajIy|<B0<Br+Ybyg-W5rFKbISx1|3*@33jsI1D+~l4@
z0SwLfsLa|?J3<cX+0GHP#1?Pe<A+|%-J9oPi#s>CFkTgP$+cf|B5JOoqmXAKWCv0Q
z8zep%i(Qx~d)mCjJ-IW-C>|n8dIPkl__f|u*RkA~_*}|}R(8WL(!Q??NSlb`ByhqE
zNSk-wn=hlmQ-n{rjj6hg+Q2ee_<Oq4-#Dg_rMOSoRPF7mB&h^Kg;}KC-ZwW}Jg-Bz
zjXAW*5xfh{_2!Y>T!9w=6Az^Mp7e{hDPD^~iM@q1Tz>cCxEDq#-Rnh%Xb_Ak*^^pk
zb2!Rr6552CZ_2v}S<JjlRO-ce?cu#|r`;z}@2uGzO<Wr^#Iz<l>~_I9cl?g~@EO{P
zUMp(*#0Av)eFb|Fm@?{dpt{QNF%(PF6O!xt7jFDPGWD((&!}!M#tH9v`7xEm1BK~E
zY4lcz%-+u4xq**$H+@ltndXzwI+G2rH{1+hEyjEu6Sm%c88@gW|HR=mpTJ5pvqL*}
zXmZggzsJ~Z#Ny-yDVH~d>G21evI3rwX{t?*(_tdjqz?<rU|Um@cVTYJGid=nGmp9P
zpbur6P~|vBZeB@nlIP-Z2K`w$VYOzxxxFKy)Vxa;5~ZCf?tC_+bY?CQ^8(Eu&nZt_
z&>V+xw2Q}*Qan^uh!XtpYcO|h!<>^Z{f>O$S@D5>qhFoUd;B2f!MUXQVs1OGwleqB
z`JxYAfg1sIa{M$jG)^-SL0=~)N3Y^Ne$vq~=e_ikWBDB{P~gf&w;8~;^xMc@s=EsB
z5-GE@B0s^)amoGH`4PL)Gl4=qUDOQz^Z;A_%pvirilU@J$`t)?>yOjz7_&k*C`a7J
z@{`u1Nw~dk*bSe?{otthRfT*U33e=uOPXtu-}PsHDu?aAj>f?e+pbe@RhWo;@f!q0
zMdNw@XB43Cc?f7=3}MnyRxExKKqn*!vTPu2)FL%T{lRxZV7;zgm5(uEokQ9GoPcht
zgRfU;&E9TM$$)O6G?R;W<F@R_y7gPNS2F6N=A-O4nq%`T5hdMJz`q$B%nN!f-D?S^
zn0cUJ61EdpC-Z9;^s@7+%t!_#VA2l!mE$l5m3PBmi<Z!^JsxnuO?VT&rh;U+M20!r
z7my)i+z-%KnFSXv<c=MXNd7o0gid}VkMBW<8_E$~s-1&|-H~T3j?WH14d?5yBK0pL
zPw`&6P(?r!%n8et5xgnx{8s^fGzolKz86V7f4}yD(48-;UT5{=Od>ODX|UXwq9yjF
zD+M*k;640bzP&`L-)rP+gm7wiU@$1qGca-bff*0U6mxujoH)+)vSr^BwJ!|^n2Io%
z;-|hG$4?d*yB%jTdXYdM-Wd%FV03!Ob0x-K<kqaUF>9RKzK(Mo($xkx#xPuzbl`M1
zTUMN~@uIFP22NM~7Bo&8c%K#ajM93q-{(u?)~a6BkC=ahTo*-gU-J?P{JP`z+N`pw
zk}+mB&LoQ2q2{LtR##q<xIs^)1BR1nJ(RhrSZpSxdn;|1jMa0V4b8dkHsPv*6ZNPL
z!YfNZAFo1(_W}tJ>!p&hyls^#6cME<HwOQ*k>sp##YhNGn4FxinVx5%q`HI+k|&a^
zbX?bTLM)em(P=vG-%qwtpigwn&`f>o^vhp9GDNt#hHD?7p{c<S7`t<~u+MfIX{8m!
zN6g)42Ir9xGJ+jN_2ol|C7FT+AxXm0gK><L!-JMk=`Qt;sXm|@)ec{!$LkK-rWR2<
zpHWD|lj4<9cGI#c8+7z&d<skLUDsy9XM5EO-*YGpI<q-ETX)S1kE)sbATI^UbXZL3
zLe<=@#kk6zAnQHd-Lg!?)pe<HQz{dUm7}Kd50l|IRw4X(xf5hxgkE$?##I+`eF095
z8JN)+ORUz%LOnvxeUoUv+7gsrozxgnmLO3lS+~;9BkS7@c$Mth!!G5G9+a%t=6m?&
zD11d4((hJMBlt&6)*gbKmh2j(VbKrMWZ4XFVI@?q*9He!bji9!vAB&7F7H0b)^Lsz
z1&xgTK??CVfjw1s{2sumRCvTrtH<|+uMO=jno<jbkBX<LdD&&MXWI29eK*2=ciL&l
zZm^%aB!exLQ8<=5JSi{0=88XwTRr)-g>@lCVSd_+7%n5Uh?JMk`M8_mgUE!zWXe*d
zTaZ4;RgH9F(zM=`VSoFo>Wp)*s#P@+SdyCbP^*k}wOjLeW#V{km+Th!Ptt!d{bQ=v
zBHfN0P7wESLhQ#{_w@AT-}LjD;Tr=2nJqgnG)G1~=w8#{`^lFx8-wU#8tLS_m-ikd
zXLHOebAd@B6)&fycS~{`x(tOS4M*qeIJ>m^yRcViY*O|-DiY_+_cd*CD`4U7z|<;D
zrpLbQ7uHGj=vlmD^GzY_jzwZ9HTSxCr>yTQn<NxB(h5A)mM-jJ+iMPEL2_|>Ni%)g
zZOi`@x&9$%IZm+eLds_GK0q4kF!ir8y0W=`ObMF&-q|K%9A4}t?{rIc_28iAPE6th
zx5dA$y{wC12wlChc14wsXN7V{I9$K(W1;lP+idEO`iHP*qHR+2RLZ)Rc?Ao-e2G2-
zlWIvUu0E-5Q#FNnLw!ZZZ!ZJUeGZHwyB(^3?HX-pqtca}37XbT1IF(U^zG`Fx1*<9
z%l#%ExsN-lNMTfsS|*rij$vn>^ymDv*dyYujMSkDU`^07+p4@Q(>^1M)7UWCO$Hp*
zL-^SN`5nUKUW6CAYLb!x0wHa(-w6TUIUVw)pts2o#Le5`h!ME7E+!IG#D&guEBP7?
z1E+?<w_9OkvBLJz^csInc)S{x>I}D2sn)xpnQ%>K<Cc5*06uQ~hq8QE&$q14%A_HT
zLT(sglN$Hpb=r6EWXRKZE^#Pn25dO)jQhs=<?4F6f60%8gjZ&_m><0Yy_V>^*U_5t
z$vh|7YU<X>5-HKs9M~&2m>p#94H`LzX0$D{81Ley3^<6|T2Iy0H5T6kp|SYnox(*@
ze@f-a*DoUl@j3l1Y`$&KuK8L5`nO>~IK@NFG;rh67P%&84TPYwB*MxM=!<ySyosoQ
z+4)R55j$SK^c*A;Uidcr$=rLTO^EMx!Myj=Zw~?&l4u4o7XN2d_Y&6LI&(Lldw%)F
z_}Ah<!*AF}{gnv3)rez@Lp!UNccpn$<)lxz%z6hkdLqPAVn16d$?Aa#yKNyn77RS;
z;hr_|UOiW}JtsY$tASm70NBOZ?l=*YF+V;tkf&y>+?MHs*2mOrSB4x3brRS$lSzBn
zY;58Wpc5?c(Ay^_+Lfg5tOR+TV@<pzFL9ySgCSnlD{N;@La)8XV?CKI{bcJ2<~)pC
z*zL*byzpn0{yxvo{+<RoPAftuL^#sVFEtpEEvo*?D;0J^jNE8}2EUYBdx#M<`T8Ql
zE3STX%(orh+P^u(I$ePI&H|K{pTh6D<OpU7lFk)IZX+IOLVA?XBon_GsXqf5n2s{)
z!0D;6zMjffvS!11r7V>HjF$f#$ZhbBLsjfhD&woaT<tgbg-1C=fXN%BGur(iq))K-
zs+@4+4;HeWn0w|Ra=^sO!x^v~@0CcP8-BY%g&6VvmJiR&x?g9fc<Q37QIC7rmH<EM
zhS6XP8UyGMnzzHoZvk~4Ewk%VlUIpg$AyvjZVx^7dazN@oS)pn{O&?&oUdr=nVeuh
zko>*YUKl|q4_KWgzWADH{j=xr(Ut%-fU;g-^4({GrWpFg$-2Ts^AR6{z4>qvM~U5t
z)AmqZN-UTYGX`at>Dmyj?NpG5T-XxGCdHq(fipW2u_xQTFmTM~x<>NFaTSoN97MC>
z97VyFeNO+igQz=bJY>o=cUeo56u)Z-S29&Wx?W6{a&>~(H@t$(hWpkSZ=jvw)TZCx
zgidTrjIZ!|%*G5QpI@WkjlHDb@Zxu1-M&%LUuC-DvVedaPOX24q)PBtI7c4I!+@Q5
z?3w-$gMcUThV5jmU^0W%Gx*7Z$v%Kc?wWA{GnR*80i_(|Pj};Nw;Zs5Bw!M-q!$g^
zfi204b3{tz5H_shascgU;ccP3pO%00*14_Xb7a?qUQ9yis`R!UN`AbMTq-&qJc@4<
zaA*aU=KN#!I3XPQZ)NIaI#7L*U9&aS?$jYF@9Z<fJaghTZ>o2N7q0#X?_RHoQ3`aE
zylT0^Yg|BXqm6D~9u}#fd@x?HHVtWez2RH>iV;4K+08DiknRyCZ&fXRL1h6mEJL*s
z2v&iz$07_pOz%<2$Fh^0lJbh17dyjUyi=L{;#*^YUMaI!6Sqm(qvCI0`jfBHM1J*q
zADXja-_Yb;A!z?TX@mFJR!Y=0edN>L#C?a4(QAd$e~@bhi0xK&i3x3&GmFNM2_zR~
zs-p<Qh;`-@?n~LpZnYh$b@d)xBZ25-Bh@;L8o&7oQXPgnky`yK<&#eBsT0}K`7rb-
zI+uEB;<1k1Sse~7uI?nzUtKwc?&jod_3+u?4OcqGC}0`!j#;xAm6(4f5Ep%!G9=P-
z3KgrTf>GltUwo0$ld%}zx{V*+H*DRhnD73}FweM4w#H?uXEx7b1Q=Gd%hGUq&NgVg
zpYA+6j<Hz(snV+HbM2FLf=>}nI0e|*ozgIT6=nqw>}uD-i?YCAQEQXm9Il(KC9t}&
zHX^-#yQ`{NvTG&O-$%lozy%8P!UgkakCxdO=2&m9SzLqS@8PJr_l-bc-Ttm+j{X-i
zu3L3RmUh+NYAE;!TWubj0ZN^K5mECG8z(1OMj}%Pw!BYjYv-S?)C3@~PEF^Aq)JEM
z)K*s64hbvD<EY3r0rj775b_qLY<j^QyYFmGg;sp`;Qp3?#%(GyB`=`SlY6^gWww50
zc2;fIUVGMW$xYV6BB22iCHEZ!zlGKCP?P%{*H7nUD1d=ssdDh7>=42Cz_N~cOFtRK
zwxb{tKl&v&Kb@cKE_pAQzz2;(#R{e0F%gla?NHVGRoW%zj41bxA;2a{PVQ(`44aw%
zI3biioUY`BJF~cNQ>`;U34n#k{l4d`k{9>KN``No-aPJ4MYiGSj^ENIPRXo(f0~5j
z-4y<8vAI;`G?&2CCWZ3;>A}#V&E1ZDv}iCFsn+R+>LImCa!mKa&Nnfo&YplGVxGg6
zpCg5vm?mI0uj$wpjlYYL>YVAOf0J9d`_$ApG&D2=lZ@)kD&0U04Z)*yqsBJY6@)D<
zjTA5)y=+4pFHgg%d*o$yx1yYY4qgRuwTCV1ZaB-I1_8O1)IAwq2x41r#Gwvqo|`W&
zVp9rtR!c2P(=_-+sCkuRejBJbKVG+WfN0l=H%%1%#eQ-r#I0#fXJO0b`JEN-Fj#h%
zEG}{$;@QnY>Wu7+>eH;NmxP!oX~=EsS9)_#;#}{w6_ghNuxE(70=2=h%TVk`ru_3%
zx6#$!Qmb%A1hu$H7pV;}G-}%i_6H!To9L0hHlUb?EX3$NMjDyoZ_Yp1ECcFnzYYAe
z<<hXpn_b<U@bp301!DM}0VfZZ16DIvuG0OS?8B9`@HRpt+w({rT^n8%Ht1J^(LxbF
z7JBZ(D@ja^Bk<nfKpey*LvHaQo5!;NxLyxI<CO6yOu00+-#J+XGmD0aNV)LUGWQNz
zy;CqCeG)D+u-wztcRw2;M!rG2Mklw`TD3B*m!aq9c@nypj|eQ7Ts7Iln?lYI#|6H-
zu##=8Lh$Ehh@xiwIlZr@h&cHv8CtB#6r421hZSnxM<tk9X;2;g%Iq_x^XVL3l2Bp;
z>+}{HhLH{zCI%1+ZUIG>mYPY}DUu&4%~ZAN7#YONpgsJ+btC2i3|;;7$)jEjp&|Gl
zyOxDN7p1a8@6I-IPxCn(+m#W%ExWUFJa?S`i&C0uTJI|};A@!3;09b<efM6LKpK-F
z%3&ODSSj~yq5ilpgD0&SrLwFFm?;uq0i@IO=_SPhU`{OaaP`?c+CS6WXC&v{c9Nn@
zvXOy81059Yt3T}b+*P994k2bL_F6gVTqljS%dXe%5@(|JVqlovM8bJm=%pa+UZO*7
zqlfR50GbJFnP_j2W46ef+D5K%_I}~gU+Ax#$vCh4DPb>g17<eIFpik3-O@CQCnq=6
z+KQ7fsZ@2o86Y%D^p0L1PLQRZDEOG_JFqAA!{-}$T+HVBo}5VK<Z9fCkH%isp6;;k
zgvJ6&-l2{dMwquD(hj!Vhgk`05cjo1)*#Pfud~q2k!%JuyYCuMNMV?&x&^{cZVIBm
ztEJU4F_dp(FVY8X^eO|Xl0J#7-8{!%LK)k)N58qJndD4G{HBk&3%VR|TAV{Br;A%(
zbzg0YYcI38p7ar~+B>v;#(-eehuhXgKmDG!^Rtss{n!ODmPKOO<DoW;tJ<sKUf#8(
zQTNyn)0d4Tfc}<HqtB@r_^U}T<-J&^abGdj-p}bbH^*%uHbb3~zFW`qt6e@D`E0Y9
zw?%83vmsSr4h}dWLoVP-3aP;_CW3<NldYc;X(7{{S)uJS`$pr1>y~Yj$fV0~Ppn|H
zrsv}a`aL{OODHtya#sgV%~Y`m;w|{YtT3c_QfH&!JUp{GrNVTgFP2G;!0U}AFqO*H
zjd}VG34<xs_%2{GbI{3d&{Z-o*AhB=b@wn%yI&_~lcj#7c%gN7jBCx`8tsnR>iNWB
z)@8RhD=URvzD3sV4n$_u)~>!z*v1)Je}yz4r;TScN@cFMXcExCE*5lvW5eve{yu?j
zx-jUg7M6Iu@oex!wX2ZKZR7R@2R>wy^IL*klVf&R45HIEQZZ<bb+0=(7xu@2?;1{D
z&hg&EABRZf(ld4q@QO>5V=`ci8JZ`v5zx)!dqxP8el8GjlFkJw<cd(mjZHr0Wb@s5
zOW0Y!4zUS3)XCo5#egX~?Z5^3q%%28AnLX5TOL~MNh_#GeXL=aONz3r+Ag**dje-}
z9Vt4$`lUknDNvo?+~w;%xb$V@8#CL$<K|SD$1H00v?q6cce~5CJ?~Me8hSA!H5)3H
zBTwUBw~Hn5n0gCVd9+m2SkXE06>hPwkY&X^cJ(zy?CXCb$dk6r^GKdEG9c7&3SqD{
zPD#)6l{$zH5_#ej2q5Z_;-^#O+?FOFyj7Z7`VR1$8HL^YMW*QlJT>&y=jLb?HsS*$
zZt)X!6;|<6yGUKLWZWb4?qB(nIKY522x2EN$-=L7e&FEw?dSdHC<GlNui(vjn0eGc
z2;vb)|Kmduwa{ehq@<e|&mL!I=7vbYQH)6Syi*Tb<&BY?!jDfB(yB<=ystjm7$6Y`
zUQWXdCYN~u(hbpYsd>pVxQNEIYN&u~H!hZrMe4hH-orIx4HL+P?Of1n`DFg4Av8-5
z#0M;9T4U;htb4nq=Q*=l-i@>;p96e5aI>0*xC;PBx2hY_zHngqR4vhpUG#4w2b_<?
zW2u<8UH3)zO+r=WTf>{sku@5Logb7`laQ!zymG!t5wO=%31<xjtez6@5794Kr9y9R
zR4qzbuiY=K?t=`8vUi^)1#xwII${lX7}c0vfuGOh?y>`&bhrfc(+YH~%GnAz@pe+A
zn!3G0NnV1o+2c%iNJ@$iUJ~>r@U7MhGk^hK>1EhbNq|>NeoCDCN#L}}beGDnFL4)`
zl?PrYhg+0nYl3N$6P_(!cN1>dYCO@~ShxZFxAr48AX8DZ<cgpR&!>9{{6M!)^!oOu
zbBL`L+ZorB30Y+nxW`MZ70GKQBmH?gUf$RL9W&&yIn8Jn4wM50UVEa-DD(nI=C<i@
zd2T}oxW#W3%mIJ*uUG4QAo<)WrSr@U{VA>Xd4W*jufHVF%B*;eu3D&`t6u&LEUA_?
ztFz=8)ja0s)0Qv+!>nCeX!8Z$>RG@-@ngZ8lRu*5-Fz{ul1@=2u2xkF2k|RGHrLVD
z3Ma@nf<FOk7|nH3PsC_|Vc;HIN+&=ODyEJ@Ph<p_obvCW=<d4i%x8Uvl!RvUlLoQh
zgW)Iten2L;V2kd{YaiB+4K;$))_xLJokmE|2Bk)El6etc$_+{+*6+sv<FHpqxeFMF
z%Z>MeaR~b4uh0FYhyVKQyH~3m{*!vF%VQl(krXSanmdLo2q<zAmtO$n+j0<sHO=zl
zX>*<L0VAj{)Yb`X3*&UYsgRw{N&j<%i>$ZA1OE=vfp29W753dR(97xZ58tgFil%|!
zmW=-a&8_ZSIr_}ZV|I|FaOKo~s(19!An4SeXy-em=9QyiGaTuDDEP_$F6!ue_h_WD
zifKPONVQ)K^gl!gr?($~6oo^9vsU>2Y3`o~?&v1I@FNHPPJib>G<T)mKYB_u`F?1v
z;l0*9{@ecuD5v~^n)`0F;*n6VlgNGyl>y{f+!t*-l+OS{nyX|7dR6m!Hb^Zw6zzQP
z<-cFrbalVdK%4c*rr+=$_>M$V<g*SP1q8dtsrQ53fgJyDpr*!;XOTxEN#7a?$dkHH
zDgxMrWfAAu?(j7$Wc;0cxQ{d~|384IqCJs&2!;T>k3cGTd{d@k;!#MAf{d2D<Qs<n
zK+_eFy%QG{diyAfboTRhZ`|cS`p1h8K=>ZYs<(NRqTUFAH3}GJkG@ZrACzFku)4c3
zAHn3-UB6eCjY*FDPJ#VCSeT2B`e=N+8YHUd<AUN@ZXKbl5BsG^9zJ=0XMuu4Q_L8H
zB@p54%SSKUeMSI`YgY%39`C|_3e4*9TkA)+Tt+VT<opd#`)9ZjPLrt;UZDyB8rA^@
z?Bs!k#F{CleB=odvMDN%HB)Y5XSZ|hIp;I&u=*0lHoN18Pq)Sg8lKT{Jrk#Fx@E+0
z`^Zt8L;&P)LGM`(XX22M{r4yAhk@i1Px*!=wj;Mj>oz#}1$M@xC(JiW8{lH#)kxwq
z@!8Hg#Vmer6r@wO(MiLOUipVArcJ@;K-keiHBsE&oQNGN)W|wFs5q|2aVhsGuQ@|%
z(+o}U3Ywbv5oNK^n^m$oYO4kqI;Xxq@PHn^$fxy8zr7^SSk+Zh3Sf3<G8PGS`VO_p
zxzBs}ugcu~#inaJxEYWg37o)`kJbR{V7>qGDz)NroX;lPVHkc8O4IwS@j*;AR`Ahf
z;ChKcz`Z13opTQr#)mG+4k{l3DzLM|2-o7Y9+JWgbfQ3I8aArH7#687xXj#f^T=Ob
zCKpRHDzlF0h!<^C=dt{azGZvWF;(^IQ6%v*<>-vZ{dhICHrs9i^2yMdngYD$M^he-
zk<fRWPx+A?{|I&`o~zvQ9V*n;)qI$q(`d<i-x01QXq7$93IF!!>x*<Taq+e|6Caz>
z3rBHB@%^-Q-=|RESkVTBn}bu~k~Fr$jwbP^)(@|D%zurvJeY7TSxc}Bs1X04%YXiO
zXaDMcsO~>{X^t`ufI@{%kTE*9bFA0<|FB^1K9l(*?D~(L|8v3j&Y(I&Qr=JO2s;0{
zIl#xoz<WP^R+~6XM*G+L9}CVHfn#*ivHv`J%m2%Q?CRjXHjDQq{>utS7nIy*1nbA|
t9EI#*()+XY{|DcH3&{WL!RNns@}XT$-FRNF>M`(7RaxU!fuiZl{{?P;rQHAk

literal 0
HcmV?d00001

diff --git a/tt-train/init_repo.sh b/tt-train/init_repo.sh
new file mode 100755
index 00000000000..80737f089f8
--- /dev/null
+++ b/tt-train/init_repo.sh
@@ -0,0 +1,18 @@
+# SPDX-FileCopyrightText: (c) 2024 Tenstorrent AI ULC
+#
+# SPDX-License-Identifier: Apache-2.0
+
+# Description: Initialize the repository with the necessary configurations
+git lfs install
+sudo apt install clang-tidy-17
+sudo apt install clang-format-17
+sudo ln -sf /usr/bin/clang-tidy-17 /usr/bin/clang-tidy
+sudo ln -sf /usr/bin/clang-format-17 /usr/bin/clang-format
+sudo apt install pre-commit
+pre-commit install
+chmod +x init_tt_metal.sh
+source ./init_tt_metal.sh
+
+sudo apt-get install python3-dev python3-numpy
+pip install wandb
+pip install numpy
diff --git a/tt-train/scripts/install_cmake_3_30.sh b/tt-train/scripts/install_cmake_3_30.sh
new file mode 100755
index 00000000000..db2c42218fe
--- /dev/null
+++ b/tt-train/scripts/install_cmake_3_30.sh
@@ -0,0 +1,16 @@
+# SPDX-FileCopyrightText: (c) 2024 Tenstorrent AI ULC
+#
+# SPDX-License-Identifier: Apache-2.0
+
+sudo apt-get update && sudo apt-get upgrade -y
+sudo apt-get install build-essential libssl-dev
+CURRENT_PATH=$(pwd)
+cd /tmp
+wget https://github.com/Kitware/CMake/releases/download/v3.30.0/cmake-3.30.0.tar.gz
+tar -zxvf cmake-3.30.0.tar.gz
+cd cmake-3.30.0
+./bootstrap
+make -j$(nproc)
+sudo make install
+cd $CURRENT_PATH
+source ~/.bashrc
diff --git a/tt-train/sources/CMakeLists.txt b/tt-train/sources/CMakeLists.txt
new file mode 100644
index 00000000000..2b7c6318ed0
--- /dev/null
+++ b/tt-train/sources/CMakeLists.txt
@@ -0,0 +1,2 @@
+add_subdirectory(examples)
+add_subdirectory(ttml)
diff --git a/tt-train/sources/examples/CMakeLists.txt b/tt-train/sources/examples/CMakeLists.txt
new file mode 100644
index 00000000000..308e1dbf157
--- /dev/null
+++ b/tt-train/sources/examples/CMakeLists.txt
@@ -0,0 +1,5 @@
+add_subdirectory(linear_regression)
+add_subdirectory(nano_gpt)
+add_subdirectory(sample_app)
+add_subdirectory(mnist_mlp)
+add_subdirectory(graph_capture)
diff --git a/tt-train/sources/examples/graph_capture/CMakeLists.txt b/tt-train/sources/examples/graph_capture/CMakeLists.txt
new file mode 100644
index 00000000000..71dbb56dffc
--- /dev/null
+++ b/tt-train/sources/examples/graph_capture/CMakeLists.txt
@@ -0,0 +1,6 @@
+project(graph_capture)
+
+set(SOURCES main.cpp)
+
+add_executable(graph_capture ${SOURCES})
+target_link_libraries(graph_capture PRIVATE ttml)
diff --git a/tt-train/sources/examples/graph_capture/main.cpp b/tt-train/sources/examples/graph_capture/main.cpp
new file mode 100644
index 00000000000..4df8d2af5d4
--- /dev/null
+++ b/tt-train/sources/examples/graph_capture/main.cpp
@@ -0,0 +1,119 @@
+// SPDX-FileCopyrightText: (c) 2024 Tenstorrent AI ULC
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#include <core/ttnn_all_includes.hpp>
+
+#include "autograd/auto_context.hpp"
+#include "autograd/tensor.hpp"
+#include "core/tt_tensor_utils.hpp"
+#include "modules/multi_layer_perceptron.hpp"
+#include "ops/losses.hpp"
+#include "ttnn/graph/graph_consts.hpp"
+#include "ttnn/graph/graph_operation_queries.hpp"
+#include "ttnn/graph/graph_processor.hpp"
+#include "ttnn/graph/graph_trace_utils.hpp"
+
+using ttml::autograd::TensorPtr;
+
+namespace {
+
+using namespace ttnn::graph;
+
+long long extract_peak_DRAM_memory_usage(const nlohmann::json& trace) {
+    long long total_buffer = 0;
+    long long peak_memory_usage = 0;
+    std::vector<std::string> current_op;
+
+    for (size_t i = 0; i < trace.size(); ++i) {
+        const auto& v = trace[i];
+
+        if (v[kNodeType] == kNodeFunctionStart) {
+            if (current_op.empty()) {
+                while (++i < trace.size()) {
+                    const auto& inner_v = trace[i];
+                    if (inner_v[kNodeType] == "buffer" && inner_v[kParams][kType] == "DRAM") {
+                        total_buffer += std::stoll(inner_v[kParams][kSize].get<std::string>());
+                    } else if (inner_v[kNodeType] == kNodeTensor) {
+                        continue;
+                    } else {
+                        break;
+                    }
+                }
+                --i;  // adjust for loop increment
+            }
+            current_op.push_back(v[kParams][kName]);
+        } else if (v[kNodeType] == kNodeBufferAllocate && v[kParams][kType] == "DRAM") {
+            total_buffer += stoll(v[kParams][kSize].get<std::string>());
+        } else if (v[kNodeType] == kNodeBufferDeallocate) {
+            auto connection = v[kConnections][0].get<int>();
+            auto buffer = trace[connection];
+            if (buffer[kParams][kType] == "DRAM") {
+                total_buffer -= stoll(buffer[kParams][kSize].get<std::string>());
+            }
+        } else if (v[kNodeType] == kNodeFunctionEnd) {
+            current_op.pop_back();
+        }
+
+        peak_memory_usage = std::max(peak_memory_usage, total_buffer);
+    }
+
+    return peak_memory_usage;
+}
+
+}  // namespace
+
+int main() {
+    const size_t num_targets = 10;
+    const uint32_t batch_size = 128;
+    const size_t num_features = 784;
+    auto* device = &ttml::autograd::ctx().get_device();
+
+    auto batch = ttml::autograd::create_tensor(
+        ttml::core::zeros(ttml::core::create_shape({batch_size, 1, 1, num_features}), device));
+    auto target = ttml::autograd::create_tensor(
+        ttml::core::zeros(ttml::core::create_shape({batch_size, 1, 1, num_targets}), device));
+
+    auto model_params = ttml::modules::MultiLayerPerceptronParameters{
+        .m_input_features = num_features, .m_hidden_features = {128}, .m_output_features = num_targets};
+    auto model = ttml::modules::MultiLayerPerceptron(model_params);
+
+    auto mode = tt::tt_metal::IGraphProcessor::RunMode::NO_DISPATCH;
+    ttnn::graph::GraphProcessor graph_processor(mode);
+    graph_processor.begin_graph_capture(mode);
+    auto output = model(batch);
+    auto loss = ttml::ops::cross_entropy_loss(output, target);
+    auto forward_trace = graph_processor.end_graph_capture();
+    auto forward_peak_l1_memory_usage = ttnn::graph::extract_peak_L1_memory_usage(forward_trace);
+    auto forward_peak_DRAM_memory_usage = extract_peak_DRAM_memory_usage(forward_trace);
+
+    auto call = [&] {
+        loss->backward();
+        return 0;
+    };
+    auto backward_trace = ttnn::graph::query_trace(call);
+    auto backward_peak_l1_memory_usage = ttnn::graph::extract_peak_L1_memory_usage(backward_trace);
+    auto backward_peak_DRAM_memory_usage = extract_peak_DRAM_memory_usage(backward_trace);
+
+    auto pretty_forward_trace = forward_trace.dump(4);
+    auto pretty_backward_trace = backward_trace.dump(4);
+
+    const std::string path = "/home/ubuntu/graph_traces/";
+    std::ofstream forward_trace_file(fmt::format("{}/forward_trace.json", path));
+    forward_trace_file << pretty_forward_trace;
+    forward_trace_file.close();
+
+    std::ofstream backward_trace_file(fmt::format("{}/backward_trace.json", path));
+    backward_trace_file << pretty_backward_trace;
+    backward_trace_file.close();
+
+    fmt::print("Forward peak L1 memory usage (in MB): {}\n", forward_peak_l1_memory_usage / 1024.0 / 1024.0);
+    fmt::print("Forward peak DRAM memory usage (in MB): {}\n", forward_peak_DRAM_memory_usage / 1024.0 / 1024.0);
+    fmt::print("Backward peak L1 memory usage (in MB): {}\n", backward_peak_l1_memory_usage / 1024.0 / 1024.0);
+    fmt::print("Backward peak DRAM memory usage (in MB): {}\n", backward_peak_DRAM_memory_usage / 1024.0 / 1024.0);
+    fmt::print("Forward trace saved to: {}/forward_trace.json\n", path);
+    fmt::print("Backward trace saved to: {}/backward_trace.json\n", path);
+    fmt::print("Capture complete\n");
+
+    return 0;
+}
diff --git a/tt-train/sources/examples/graph_capture/visualize_graph.py b/tt-train/sources/examples/graph_capture/visualize_graph.py
new file mode 100644
index 00000000000..7d1f40739c5
--- /dev/null
+++ b/tt-train/sources/examples/graph_capture/visualize_graph.py
@@ -0,0 +1,19 @@
+# SPDX-FileCopyrightText: (c) 2024 Tenstorrent AI ULC
+#
+# SPDX-License-Identifier: Apache-2.0
+
+import ttnn
+import json
+
+
+def visualize(trace_name, path):
+    with open(f"{path}/{trace_name}.json", "r") as f:
+        trace = json.load(f)
+        ttnn.graph.pretty_print(trace)
+        ttnn.graph.visualize(trace, file_name=f"{path}/{trace_name}.svg")
+
+
+if __name__ == "__main__":
+    path = "/home/ubuntu/graph_traces"
+    visualize("backward_trace", path)
+    visualize("forward_trace", path)
diff --git a/tt-train/sources/examples/linear_regression/CMakeLists.txt b/tt-train/sources/examples/linear_regression/CMakeLists.txt
new file mode 100644
index 00000000000..be7801ef7ec
--- /dev/null
+++ b/tt-train/sources/examples/linear_regression/CMakeLists.txt
@@ -0,0 +1,6 @@
+project(linear_regression)
+
+set(SOURCES main.cpp)
+
+add_executable(linear_regression ${SOURCES})
+target_link_libraries(linear_regression PRIVATE ttml)
diff --git a/tt-train/sources/examples/linear_regression/main.cpp b/tt-train/sources/examples/linear_regression/main.cpp
new file mode 100644
index 00000000000..f98af41a1cf
--- /dev/null
+++ b/tt-train/sources/examples/linear_regression/main.cpp
@@ -0,0 +1,89 @@
+// SPDX-FileCopyrightText: (c) 2024 Tenstorrent AI ULC
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#include <fmt/format.h>
+
+#include <core/ttnn_all_includes.hpp>
+#include <ttnn/tensor/tensor.hpp>
+
+#include "autograd/auto_context.hpp"
+#include "autograd/tensor.hpp"
+#include "core/tt_tensor_utils.hpp"
+#include "datasets/dataloader.hpp"
+#include "datasets/generators.hpp"
+#include "modules/linear_module.hpp"
+#include "ops/losses.hpp"
+#include "optimizers/sgd.hpp"
+
+using ttml::autograd::TensorPtr;
+
+using DatasetSample = std::pair<std::vector<float>, std::vector<float>>;
+using BatchType = std::pair<TensorPtr, TensorPtr>;
+using DataLoader = ttml::datasets::DataLoader<
+    ttml::datasets::InMemoryFloatVecDataset,
+    std::function<BatchType(std::vector<DatasetSample>&& samples)>,
+    BatchType>;
+
+int main() {
+    const size_t training_samples_count = 100000;
+    const size_t num_features = 64;
+    const size_t num_targets = 32;
+    const float noise = 0.0F;
+    const bool bias = true;
+
+    auto training_params = ttml::datasets::MakeRegressionParams{
+        .n_samples = training_samples_count,
+        .n_features = num_features,
+        .n_targets = num_targets,
+        .noise = noise,
+        .bias = bias,
+    };
+
+    auto training_dataset = ttml::datasets::make_regression(training_params);
+
+    auto* device = &ttml::autograd::ctx().get_device();
+
+    std::function<BatchType(std::vector<DatasetSample> && samples)> collate_fn =
+        [&num_features, &num_targets, device](std::vector<DatasetSample>&& samples) {
+            const uint32_t batch_size = samples.size();
+            std::vector<float> data;
+            std::vector<float> targets;
+            data.reserve(batch_size * num_features);
+            targets.reserve(batch_size * num_targets);
+            for (auto& [features, target] : samples) {
+                std::move(features.begin(), features.end(), std::back_inserter(data));
+                std::move(target.begin(), target.end(), std::back_inserter(targets));
+            }
+
+            auto data_tensor = ttml::autograd::create_tensor(
+                ttml::core::from_vector(data, ttml::core::create_shape({batch_size, 1, 1, num_features}), device));
+            auto targets_tensor = ttml::autograd::create_tensor(
+                ttml::core::from_vector(targets, ttml::core::create_shape({batch_size, 1, 1, num_targets}), device));
+            return std::make_pair(data_tensor, targets_tensor);
+        };
+
+    const uint32_t batch_size = 128;
+    auto train_dataloader = DataLoader(training_dataset, batch_size, /* shuffle */ true, collate_fn);
+
+    auto model = ttml::modules::LinearLayer(num_features, num_targets);
+
+    float learning_rate = 0.1F * num_targets * (batch_size / 128.F);
+    auto sgd_config = ttml::optimizers::SGDConfig{.lr = learning_rate, .momentum = 0.0F};
+    auto optimizer = ttml::optimizers::SGD(model.parameters(), sgd_config);
+
+    int training_step = 0;
+    const int num_epochs = 10;
+    for (int epoch = 0; epoch < num_epochs; ++epoch) {
+        for (const auto& [data, targets] : train_dataloader) {
+            optimizer.zero_grad();
+            auto output = model(data);
+            auto loss = ttml::ops::mse_loss(output, targets);
+            auto loss_float = ttml::core::to_vector(loss->get_value())[0];
+            fmt::print("Step: {} Loss: {}\n", training_step++, loss_float);
+            loss->backward();
+            optimizer.step();
+            ttml::autograd::ctx().reset_graph();
+        }
+    }
+}
diff --git a/tt-train/sources/examples/mnist_mlp/CMakeLists.txt b/tt-train/sources/examples/mnist_mlp/CMakeLists.txt
new file mode 100644
index 00000000000..b69fe6dcc74
--- /dev/null
+++ b/tt-train/sources/examples/mnist_mlp/CMakeLists.txt
@@ -0,0 +1,15 @@
+project(mnist_mlp)
+
+set(SOURCES
+    main.cpp
+    utils.cpp
+    models.cpp
+)
+
+CPMAddPackage(NAME mnist_dataset GITHUB_REPOSITORY wichtounet/mnist GIT_TAG master)
+include_directories(${mnist_dataset_SOURCE_DIR}/include)
+
+# Add executable and link libraries
+add_executable(mnist_mlp ${SOURCES})
+target_link_libraries(mnist_mlp PRIVATE ttml)
+target_compile_definitions(mnist_mlp PRIVATE MNIST_DATA_LOCATION="${mnist_dataset_SOURCE_DIR}/")
diff --git a/tt-train/sources/examples/mnist_mlp/main.cpp b/tt-train/sources/examples/mnist_mlp/main.cpp
new file mode 100644
index 00000000000..7a272bcd982
--- /dev/null
+++ b/tt-train/sources/examples/mnist_mlp/main.cpp
@@ -0,0 +1,183 @@
+// SPDX-FileCopyrightText: (c) 2024 Tenstorrent AI ULC
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#include <CLI/CLI.hpp>
+#include <core/ttnn_all_includes.hpp>
+#include <mnist/mnist_reader.hpp>
+#include <ttnn/operations/eltwise/ternary/where.hpp>
+#include <ttnn/tensor/tensor_utils.hpp>
+#include <ttnn/tensor/types.hpp>
+
+#include "autograd/auto_context.hpp"
+#include "autograd/tensor.hpp"
+#include "core/tt_tensor_utils.hpp"
+#include "datasets/dataloader.hpp"
+#include "datasets/in_memory_dataset.hpp"
+#include "models.hpp"
+#include "ops/losses.hpp"
+#include "optimizers/sgd.hpp"
+#include "utils.hpp"
+
+using ttml::autograd::TensorPtr;
+
+using DatasetSample = std::pair<std::vector<uint8_t>, uint8_t>;
+using BatchType = std::pair<TensorPtr, TensorPtr>;
+using DataLoader = ttml::datasets::DataLoader<
+    ttml::datasets::InMemoryDataset<std::vector<uint8_t>, uint8_t>,
+    std::function<BatchType(std::vector<DatasetSample> &&samples)>,
+    BatchType>;
+
+constexpr auto model_name = "mlp";
+constexpr auto optimizer_name = "optimizer";
+
+template <typename Model>
+float evaluate(DataLoader &test_dataloader, Model &model, size_t num_targets) {
+    model->eval();
+    float num_correct = 0;
+    float num_samples = 0;
+    for (const auto &[data, target] : test_dataloader) {
+        auto output = (*model)(data);
+        auto output_vec = ttml::core::to_vector(output->get_value());
+        auto target_vec = ttml::core::to_vector(target->get_value());
+        for (size_t i = 0; i < output_vec.size(); i += num_targets) {
+            auto predicted_class = std::distance(
+                output_vec.begin() + i,
+                std::max_element(output_vec.begin() + i, output_vec.begin() + (i + num_targets)));
+            auto target_class = std::distance(
+                target_vec.begin() + i,
+                std::max_element(target_vec.begin() + i, target_vec.begin() + (i + num_targets)));
+            num_correct += static_cast<float>(predicted_class == target_class);
+            num_samples++;
+        }
+    }
+    model->train();
+    return num_correct / num_samples;
+};
+
+int main(int argc, char **argv) {
+    CLI::App app{"Mnist Example"};
+    argv = app.ensure_utf8(argv);
+
+    uint32_t batch_size = 128;
+    int logging_interval = 50;
+    size_t num_epochs = 10;
+    bool is_eval = false;
+    int model_save_interval = 500;
+    std::string model_path = "/tmp/mnist_mlp.msgpack";
+
+    app.add_option("-b,--batch_size", batch_size, "Batch size")->default_val(batch_size);
+    app.add_option("-l,--logging_interval", logging_interval, "Logging interval")->default_val(logging_interval);
+    app.add_option("-m,--model_save_interval", model_save_interval, "model save interval")
+        ->default_val(model_save_interval);
+
+    app.add_option("-n,--num_epochs", num_epochs, "Number of epochs")->default_val(num_epochs);
+    app.add_option("-s,--model_path", model_path, "Model path")->default_val(model_path);
+    app.add_option("-e,--eval", is_eval, "eval only mode")->default_val(is_eval);
+
+    CLI11_PARSE(app, argc, argv);
+    // Load MNIST data
+    const size_t num_targets = 10;
+    const size_t num_features = 784;
+    mnist::MNIST_dataset<std::vector, std::vector<uint8_t>, uint8_t> dataset =
+        mnist::read_dataset<std::vector, std::vector, uint8_t, uint8_t>(MNIST_DATA_LOCATION);
+    ttml::datasets::InMemoryDataset<std::vector<uint8_t>, uint8_t> training_dataset(
+        dataset.training_images, dataset.training_labels);
+    ttml::datasets::InMemoryDataset<std::vector<uint8_t>, uint8_t> test_dataset(
+        dataset.test_images, dataset.test_labels);
+
+    auto *device = &ttml::autograd::ctx().get_device();
+    std::function<BatchType(std::vector<DatasetSample> && samples)> collate_fn =
+        [num_features, num_targets, device](std::vector<DatasetSample> &&samples) {
+            const uint32_t batch_size = samples.size();
+            std::vector<float> data;
+            std::vector<float> targets;
+            data.reserve(batch_size * num_features);
+            targets.reserve(batch_size * num_targets);
+            for (auto &[features, target] : samples) {
+                std::copy(features.begin(), features.end(), std::back_inserter(data));
+
+                std::vector<float> one_hot_target(num_targets, 0.0F);
+                one_hot_target[target] = 1.0F;
+                std::copy(one_hot_target.begin(), one_hot_target.end(), std::back_inserter(targets));
+            }
+
+            std::transform(data.begin(), data.end(), data.begin(), [](float pixel) { return pixel / 255.0F - 0.5F; });
+
+            auto data_tensor = ttml::autograd::create_tensor(
+                ttml::core::from_vector(data, ttml::core::create_shape({batch_size, 1, 1, num_features}), device));
+            auto targets_tensor = ttml::autograd::create_tensor(
+                ttml::core::from_vector(targets, ttml::core::create_shape({batch_size, 1, 1, num_targets}), device));
+            return std::make_pair(data_tensor, targets_tensor);
+        };
+
+    auto train_dataloader = DataLoader(training_dataset, batch_size, /* shuffle */ true, collate_fn);
+    auto test_dataloader = DataLoader(test_dataset, batch_size, /* shuffle */ false, collate_fn);
+
+    auto model = create_base_mlp(784, 10);
+
+    const float learning_rate = 0.1F * (static_cast<float>(batch_size) / 128.F);
+    const float momentum = 0.9F;
+    const float weight_decay = 0.F;
+    auto sgd_config =
+        ttml::optimizers::SGDConfig{.lr = learning_rate, .momentum = momentum, .weight_decay = weight_decay};
+
+    fmt::print("SGD configuration:\n");
+    fmt::print("    Learning rate: {}\n", sgd_config.lr);
+    fmt::print("    Momentum: {}\n", sgd_config.momentum);
+    fmt::print("    Dampening {}\n", sgd_config.dampening);
+    fmt::print("    Weight decay: {}\n", sgd_config.weight_decay);
+    fmt::print("    Nesterov: {}\n", sgd_config.nesterov);
+    auto optimizer = ttml::optimizers::SGD(model->parameters(), sgd_config);
+    if (!model_path.empty() && std::filesystem::exists(model_path)) {
+        fmt::print("Loading model from {}\n", model_path);
+        load_model_and_optimizer(model_path, model, optimizer, model_name, optimizer_name);
+    }
+
+    // evaluate model before training (sanity check to get reasonable accuracy
+    // 1/num_targets)
+    float accuracy_before_training = evaluate(test_dataloader, model, num_targets);
+    fmt::print("Accuracy of the current model training: {}%\n", accuracy_before_training * 100.F);
+    if (is_eval) {
+        return 0;
+    }
+
+    LossAverageMeter loss_meter;
+    int training_step = 0;
+    for (size_t epoch = 0; epoch < num_epochs; ++epoch) {
+        for (const auto &[data, target] : train_dataloader) {
+            optimizer.zero_grad();
+            auto output = (*model)(data);
+            auto loss = ttml::ops::cross_entropy_loss(output, target);
+            auto loss_float = ttml::core::to_vector(loss->get_value())[0];
+            loss_meter.update(loss_float, batch_size);
+            if (training_step % logging_interval == 0) {
+                fmt::print("Step: {:5d} | Average Loss: {:.4f}\n", training_step, loss_meter.average());
+            }
+            if (!model_path.empty() && training_step % model_save_interval == 0) {
+                fmt::print("Saving model to {}\n", model_path);
+                save_model_and_optimizer(model_path, model, optimizer, model_name, optimizer_name);
+            }
+
+            loss->backward();
+            optimizer.step();
+            ttml::autograd::ctx().reset_graph();
+            training_step++;
+        }
+
+        const float test_accuracy = evaluate(test_dataloader, model, num_targets);
+        fmt::print(
+            "Epoch: {:3d} | Average Loss: {:.4f} | Accuracy: {:.4f}%\n",
+            epoch + 1,
+            loss_meter.average(),
+            test_accuracy * 100.F);
+        loss_meter.reset();
+    }
+
+    if (!model_path.empty()) {
+        fmt::print("Saving model to {}\n", model_path);
+        save_model_and_optimizer(model_path, model, optimizer, model_name, optimizer_name);
+    }
+
+    return 0;
+}
diff --git a/tt-train/sources/examples/mnist_mlp/models.cpp b/tt-train/sources/examples/mnist_mlp/models.cpp
new file mode 100644
index 00000000000..5d324fe389d
--- /dev/null
+++ b/tt-train/sources/examples/mnist_mlp/models.cpp
@@ -0,0 +1,46 @@
+// SPDX-FileCopyrightText: (c) 2024 Tenstorrent AI ULC
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#include "models.hpp"
+
+#include <memory>
+
+#include "modules/multi_layer_perceptron.hpp"
+#include "ops/unary_ops.hpp"
+
+MNISTModel::MNISTModel() {
+    m_fc1 = std::make_shared<ttml::modules::LinearLayer>(784, 128);
+    m_fc2 = std::make_shared<ttml::modules::LinearLayer>(128, 64);
+    m_fc3 = std::make_shared<ttml::modules::LinearLayer>(64, 10);
+    m_dropout = std::make_shared<ttml::modules::DropoutLayer>(0.2F);
+
+    m_layernorm1 = std::make_shared<ttml::modules::LayerNormLayer>(128);
+    m_layernorm2 = std::make_shared<ttml::modules::LayerNormLayer>(10);
+
+    create_name("MNISTModel");
+
+    register_module(m_fc1, "fc1");
+    register_module(m_fc2, "fc2");
+    register_module(m_fc3, "fc3");
+    register_module(m_dropout, "dropout");
+    register_module(m_layernorm1, "layernorm1");
+    register_module(m_layernorm2, "layernorm2");
+}
+
+ttml::autograd::TensorPtr MNISTModel::operator()(ttml::autograd::TensorPtr x) {
+    x = (*m_dropout)(x);
+    x = (*m_fc1)(x);
+    x = (*m_layernorm1)(x);
+    x = ttml::ops::relu(x);
+    x = (*m_fc2)(x);
+    x = (*m_layernorm2)(x);
+    x = ttml::ops::relu(x);
+    x = (*m_fc3)(x);
+    return x;
+}
+std::shared_ptr<ttml::modules::MultiLayerPerceptron> create_base_mlp(uint32_t num_features, uint32_t num_targets) {
+    auto model_params = ttml::modules::MultiLayerPerceptronParameters{
+        .m_input_features = num_features, .m_hidden_features = {128}, .m_output_features = num_targets};
+    return std::make_shared<ttml::modules::MultiLayerPerceptron>(model_params);
+}
diff --git a/tt-train/sources/examples/mnist_mlp/models.hpp b/tt-train/sources/examples/mnist_mlp/models.hpp
new file mode 100644
index 00000000000..6445648b69f
--- /dev/null
+++ b/tt-train/sources/examples/mnist_mlp/models.hpp
@@ -0,0 +1,27 @@
+// SPDX-FileCopyrightText: (c) 2024 Tenstorrent AI ULC
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#include <memory>
+
+#include "autograd/module_base.hpp"
+#include "modules/dropout_module.hpp"
+#include "modules/layer_norm_module.hpp"
+#include "modules/linear_module.hpp"
+#include "modules/multi_layer_perceptron.hpp"
+
+class MNISTModel : public ttml::autograd::ModuleBase {
+    std::shared_ptr<ttml::modules::LinearLayer> m_fc1;
+    std::shared_ptr<ttml::modules::LinearLayer> m_fc2;
+    std::shared_ptr<ttml::modules::LinearLayer> m_fc3;
+    std::shared_ptr<ttml::modules::DropoutLayer> m_dropout;
+    std::shared_ptr<ttml::modules::LayerNormLayer> m_layernorm1;
+    std::shared_ptr<ttml::modules::LayerNormLayer> m_layernorm2;
+
+public:
+    MNISTModel();
+
+    ttml::autograd::TensorPtr operator()(ttml::autograd::TensorPtr x);
+};
+
+std::shared_ptr<ttml::modules::MultiLayerPerceptron> create_base_mlp(uint32_t num_features, uint32_t num_targets);
diff --git a/tt-train/sources/examples/mnist_mlp/pytorch_mnist_bfloat16.py b/tt-train/sources/examples/mnist_mlp/pytorch_mnist_bfloat16.py
new file mode 100644
index 00000000000..6c4c52ce778
--- /dev/null
+++ b/tt-train/sources/examples/mnist_mlp/pytorch_mnist_bfloat16.py
@@ -0,0 +1,78 @@
+# SPDX-FileCopyrightText: (c) 2024 Tenstorrent AI ULC
+#
+# SPDX-License-Identifier: Apache-2.0
+
+import torch
+import torch.nn as nn
+
+from torch.optim import SGD
+from torch.utils.data import DataLoader
+
+from torchvision import datasets
+from torchvision.transforms import transforms
+
+
+def create_mnist_dataset(batch_size):
+    transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5,), (1.0,))])
+
+    train_dataset = datasets.MNIST(root="/tmp/data", train=True, download=True, transform=transform)
+    test_dataset = datasets.MNIST(root="/tmp/data", train=False, download=True, transform=transform)
+
+    train_loader = DataLoader(
+        dataset=train_dataset, batch_size=batch_size, shuffle=True, drop_last=False, num_workers=4
+    )
+
+    test_loader = DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False, drop_last=False, num_workers=4)
+
+    return train_loader, test_loader
+
+
+class MLP(nn.Module):
+    def __init__(self):
+        super().__init__()
+        self.fc1 = nn.Linear(784, 128)
+        self.fc2 = nn.Linear(128, 10)
+        self.act = nn.ReLU()
+
+    def forward(self, x):
+        x = x.view(-1, 784)
+        x = self.act(self.fc1(x))
+        x = self.fc2(x)
+        return x
+
+
+def evaluate(test_loader, model):
+    # Evaluate the model
+    model.eval()
+    total = 0
+    correct = 0
+    for i, (images, labels) in enumerate(test_loader):
+        images = images.bfloat16()
+        outputs = model(images)
+        _, predicted = torch.max(outputs, 1)
+        total += labels.size(0)
+        correct += (predicted == labels).sum().item()
+    acc = correct / total
+    model.train()
+    return acc
+
+
+if __name__ == "__main__":
+    model = MLP().bfloat16()
+    criterion = nn.CrossEntropyLoss().bfloat16()
+    optimizer = SGD(model.parameters(), lr=0.1)
+
+    num_epochs = 10
+    batch_size = 128
+    train_loader, test_loader = create_mnist_dataset(batch_size)
+    for epoch in range(num_epochs):
+        model.train()
+        for i, (images, labels) in enumerate(train_loader):
+            images = images.bfloat16()
+            optimizer.zero_grad()
+            outputs = model(images)
+            loss = criterion(outputs, labels)
+            loss.backward()
+            optimizer.step()
+        acc = evaluate(test_loader, model)
+        print(f"Epoch {epoch + 1} Accuracy: {acc}")
diff --git a/tt-train/sources/examples/mnist_mlp/utils.cpp b/tt-train/sources/examples/mnist_mlp/utils.cpp
new file mode 100644
index 00000000000..fc2a4294748
--- /dev/null
+++ b/tt-train/sources/examples/mnist_mlp/utils.cpp
@@ -0,0 +1,33 @@
+// SPDX-FileCopyrightText: (c) 2024 Tenstorrent AI ULC
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#include "utils.hpp"
+
+void LossAverageMeter::update(float loss, size_t count) {
+    m_sum += loss * static_cast<float>(count);
+    m_count += count;
+}
+
+float LossAverageMeter::average() const {
+    if (m_count == 0) {
+        return 0.F;
+    }
+    return m_sum / static_cast<float>(m_count);
+}
+
+void LossAverageMeter::reset() {
+    m_sum = 0.0F;
+    m_count = 0;
+}
+
+void Timers::start(const std::string_view& name) {
+    m_timers[std::string(name)] = std::chrono::high_resolution_clock::now();
+}
+
+long long Timers::stop(const std::string_view& name) {
+    auto start_time = m_timers.at(std::string(name));
+    auto end_time = std::chrono::high_resolution_clock::now();
+    auto duration = std::chrono::duration_cast<std::chrono::milliseconds>(end_time - start_time);
+    return duration.count();
+}
diff --git a/tt-train/sources/examples/mnist_mlp/utils.hpp b/tt-train/sources/examples/mnist_mlp/utils.hpp
new file mode 100644
index 00000000000..00b28a6ffe7
--- /dev/null
+++ b/tt-train/sources/examples/mnist_mlp/utils.hpp
@@ -0,0 +1,64 @@
+// SPDX-FileCopyrightText: (c) 2024 Tenstorrent AI ULC
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#pragma once
+
+#include <fmt/core.h>
+
+#include <cassert>
+#include <chrono>
+#include <cstddef>
+#include <string>
+#include <unordered_map>
+
+#include "serialization/msgpack_file.hpp"
+#include "serialization/serialization.hpp"
+
+class LossAverageMeter {
+    float m_sum = 0.0F;
+    size_t m_count = 0;
+
+public:
+    void update(float loss, size_t count = 1);
+
+    [[nodiscard]] float average() const;
+
+    void reset();
+};
+
+class Timers {
+public:
+    void start(const std::string_view &name);
+
+    long long stop(const std::string_view &name);
+
+private:
+    std::unordered_map<std::string, std::chrono::high_resolution_clock::time_point> m_timers;
+};
+
+template <typename Model, typename Optimizer>
+void save_model_and_optimizer(
+    std::string &model_path,
+    const std::shared_ptr<Model> &model,
+    Optimizer &optimizer,
+    const std::string &model_name,
+    const std::string &optimizer_name) {
+    ttml::serialization::MsgPackFile serializer;
+    ttml::serialization::write_module(serializer, model_name, model.get());
+    ttml::serialization::write_optimizer(serializer, optimizer_name, &optimizer);
+    serializer.serialize(model_path);
+}
+
+template <typename Model, typename Optimizer>
+void load_model_and_optimizer(
+    std::string &model_path,
+    const std::shared_ptr<Model> &model,
+    Optimizer &optimizer,
+    const std::string &model_name,
+    const std::string &optimizer_name) {
+    ttml::serialization::MsgPackFile deserializer;
+    deserializer.deserialize(model_path);
+    ttml::serialization::read_module(deserializer, model_name, model.get());
+    ttml::serialization::read_optimizer(deserializer, optimizer_name, &optimizer);
+}
diff --git a/tt-train/sources/examples/nano_gpt/CMakeLists.txt b/tt-train/sources/examples/nano_gpt/CMakeLists.txt
new file mode 100644
index 00000000000..49942019fa8
--- /dev/null
+++ b/tt-train/sources/examples/nano_gpt/CMakeLists.txt
@@ -0,0 +1,12 @@
+project(nano_gpt)
+
+set(SOURCES
+    main.cpp
+    utils.cpp
+    models.cpp
+)
+
+add_executable(nano_gpt ${SOURCES})
+target_link_libraries(nano_gpt PRIVATE ttml)
+
+add_definitions(-DDATA_FOLDER="${CMAKE_CURRENT_SOURCE_DIR}/data")
diff --git a/tt-train/sources/examples/nano_gpt/chat_demo.py b/tt-train/sources/examples/nano_gpt/chat_demo.py
new file mode 100644
index 00000000000..d3549d4e21f
--- /dev/null
+++ b/tt-train/sources/examples/nano_gpt/chat_demo.py
@@ -0,0 +1,115 @@
+# SPDX-FileCopyrightText: (c) 2024 Tenstorrent AI ULC
+#
+# SPDX-License-Identifier: Apache-2.0
+
+import streamlit as st
+import subprocess
+
+START_MESSAGE = "*******************\n"
+END_MESSAGE = "*******************\n"
+
+
+def read_before_start_message(stream):
+    output = ""
+    while True:
+        ch = stream.read(1)
+        if not ch:
+            break
+        output += ch
+        if output.endswith(START_MESSAGE):
+            break
+
+
+def read_until_end_message(stream):
+    index = 0
+    output = ""
+    while True:
+        ch = stream.read(1)
+        if not ch:
+            break
+        output += ch
+        if index + len(END_MESSAGE) < len(output):
+            yield output[index]
+            index += 1
+        if output.endswith(END_MESSAGE):
+            break
+
+
+def stream_executable(process, user_input):
+    try:
+        # Send input to the executable
+        process.stdin.write(user_input + "\n")
+        process.stdin.flush()  # Ensure it's sent immediately
+
+        # Read output until the start message
+        read_before_start_message(process.stdout)
+        # Read output character by character
+        yield from read_until_end_message(process.stdout)
+    except Exception as e:
+        yield f"An error occurred: {e}"
+
+
+def run_executable(executable_path):
+    try:
+        # Start the subprocess
+        process = subprocess.Popen(
+            executable_path,
+            stdin=subprocess.PIPE,
+            stdout=subprocess.PIPE,
+            stderr=subprocess.PIPE,
+            text=True,  # Use text mode for strings
+            bufsize=1,  # Line buffering
+            universal_newlines=True,  # Use universal newlines mode
+            shell=True,  # Run the command through the shell
+        )
+        return process
+    except Exception as e:
+        st.error(f"Failed to start the executable: {e}")
+        return None
+
+
+def main():
+    st.title("Shakespeare Chat")
+
+    # Specify the path to your executable
+    executable_path = "TT_METAL_LOGGER_LEVEL=FATAL"
+    executable_path += " /home/ubuntu/ML-Framework-CPP/build/sources/examples/nano_gpt/nano_gpt"
+    executable_path += " -p transformer.msgpack"
+    executable_path += " -s 5489 -e"
+
+    # Initialize session state
+    if "process" not in st.session_state:
+        st.session_state.process = run_executable(executable_path)
+    if "messages" not in st.session_state:
+        st.session_state.messages = []
+
+    # Display chat messages
+    for message in st.session_state.messages:
+        with st.chat_message(message["role"]):
+            st.markdown(message["content"])
+
+    # Chat input
+    if prompt := st.chat_input("Type your message here"):
+        # Add user message to chat history
+        st.session_state.messages.append({"role": "user", "content": prompt})
+        with st.chat_message("user"):
+            st.markdown(prompt)
+
+        # Get bot response
+        with st.chat_message("assistant"):
+            message_placeholder = st.empty()
+            full_response = ""
+            for chunk in stream_executable(st.session_state.process, prompt):
+                if chunk == "\n":
+                    full_response += "  "
+                full_response += chunk
+                message_placeholder.markdown(full_response + "▌")
+
+            message_placeholder.markdown(full_response)
+
+        # Add assistant response to chat history
+        st.session_state.messages.append({"role": "assistant", "content": full_response})
+
+
+if __name__ == "__main__":
+    main()
diff --git a/tt-train/sources/examples/nano_gpt/data/shakespeare.txt b/tt-train/sources/examples/nano_gpt/data/shakespeare.txt
new file mode 100644
index 00000000000..51d57abba95
--- /dev/null
+++ b/tt-train/sources/examples/nano_gpt/data/shakespeare.txt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:434c0554a8c4c53dc17e56a0abb0f30b88f83cbceb0289cb897db68c25e89eba
+size 1115390
diff --git a/tt-train/sources/examples/nano_gpt/eval.sh b/tt-train/sources/examples/nano_gpt/eval.sh
new file mode 100755
index 00000000000..e7ef19ed5c8
--- /dev/null
+++ b/tt-train/sources/examples/nano_gpt/eval.sh
@@ -0,0 +1,13 @@
+# SPDX-FileCopyrightText: (c) 2024 Tenstorrent AI ULC
+#
+# SPDX-License-Identifier: Apache-2.0
+
+#!/bin/bash
+
+export TT_METAL_LOGGER_LEVEL=FATAL
+SCRIPT="/home/ubuntu/ML-Framework-CPP/build/sources/examples/nano_gpt/nano_gpt"
+RESET_BOARD="tt-smi -r 0"
+SEED=5489
+
+$RESET_BOARD
+$SCRIPT -p transformer.msgpack -s $SEED -e
diff --git a/tt-train/sources/examples/nano_gpt/main.cpp b/tt-train/sources/examples/nano_gpt/main.cpp
new file mode 100644
index 00000000000..e988c045d58
--- /dev/null
+++ b/tt-train/sources/examples/nano_gpt/main.cpp
@@ -0,0 +1,366 @@
+// SPDX-FileCopyrightText: (c) 2024 Tenstorrent AI ULC
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#include <CLI/CLI.hpp>
+#include <chrono>
+#include <core/ttnn_all_includes.hpp>
+#include <csignal>
+#include <cstdint>
+#include <ttnn/tensor/tensor.hpp>
+#include <wandbcpp.hpp>
+
+#include "autograd/tensor.hpp"
+#include "core/tt_tensor_utils.hpp"
+#include "datasets/dataloader.hpp"
+#include "datasets/in_memory_token_dataset.hpp"
+#include "datasets/utils.hpp"
+#include "models.hpp"
+#include "ops/binary_ops.hpp"
+#include "ops/losses.hpp"
+#include "optimizers/adamw.hpp"
+#include "optimizers/sgd.hpp"
+#include "tokenizers/char_tokenizer.hpp"
+#include "ttnn_fixed/trivial_ttnn_ops.hpp"
+#include "utils.hpp"
+/* WANDB BLocks this signal.
+ Control+C didn't work.
+*/
+void signal_handler(int signum) {
+    std::cout << "\nInterrupt signal (" << signum << ") received.\n";
+    wandbcpp::finish();
+    exit(signum);
+}
+
+using ttml::autograd::TensorPtr;
+
+using DatasetSample = std::pair<std::span<const uint32_t>, std::span<const uint32_t>>;
+// tokens, targets, mask, positions
+using BatchType = std::tuple<TensorPtr, TensorPtr, TensorPtr, TensorPtr>;
+using DataLoader = ttml::datasets::DataLoader<
+    ttml::datasets::InMemoryTokenDataset,
+    std::function<BatchType(std::vector<DatasetSample> &&samples)>,
+    BatchType>;
+
+struct DemoConfig {
+    // training
+    uint32_t batch_size = 64;
+    uint32_t sequence_length = 256;
+    uint32_t num_epochs = 1;
+    uint32_t max_steps = 5000;
+    float dropout_prob = 0.2F;
+    // model
+    uint32_t num_heads = 6;
+    uint32_t embedding_dim = 384;
+    uint32_t num_blocks = 6;
+    // optimizer
+    float learning_rate = 3e-4F;
+    float weight_decay = 1e-2F;
+};
+const DemoConfig config;
+
+uint32_t sample(std::span<const float> log_softmax) {
+    auto probabilities_vector = std::vector<float>(log_softmax.size());
+    std::transform(log_softmax.begin(), log_softmax.end(), probabilities_vector.begin(), [](float value) {
+        return std::exp(value);
+    });
+    auto distribution = std::discrete_distribution<uint32_t>(probabilities_vector.begin(), probabilities_vector.end());
+    return distribution(ttml::autograd::ctx().get_generator());
+}
+
+template <typename Model, typename Tokenizer>
+void generate(
+    const std::shared_ptr<Model> &model,
+    const Tokenizer &tokenizer,
+    uint32_t max_sequence_length,
+    uint32_t num_heads,
+    uint32_t tokens_to_generate = 1024U) {
+    model->eval();
+
+    std::string prompt;
+    fmt::print("Enter a prompt: ");
+    std::getline(std::cin, prompt);
+
+    if (prompt.empty()) {
+        prompt = "\n";
+    }
+
+    auto *device = &ttml::autograd::ctx().get_device();
+
+    auto prompt_tokens = tokenizer.encode(prompt);
+
+    auto pad_token_id = 0U;
+
+    auto vocab_size = tokenizer.get_vocab_size();
+
+    auto positions_vector = std::vector<uint32_t>(max_sequence_length);
+    std::iota(positions_vector.begin(), positions_vector.end(), 0);
+    auto positions_tensor = ttml::autograd::create_tensor(ttml::core::from_vector<uint32_t, DataType::UINT32>(
+        positions_vector, ttml::core::create_shape({1, 1, 1, max_sequence_length}), device, Layout::ROW_MAJOR));
+
+    std::vector<float> mask;
+    mask.reserve(static_cast<size_t>(max_sequence_length * max_sequence_length * num_heads));
+    for (int head = 0; head < num_heads; ++head) {
+        for (int i = 0; i < max_sequence_length; ++i) {
+            for (int j = 0; j < max_sequence_length; ++j) {
+                mask.push_back(i >= j ? 1.0F : 0.0F);
+            }
+        }
+    }
+    auto mask_tensor = ttml::autograd::create_tensor(ttml::core::from_vector(
+        mask, ttml::core::create_shape({1, num_heads, max_sequence_length, max_sequence_length}), device));
+
+    std::vector<uint32_t> prompt_tokens_padded(max_sequence_length, pad_token_id);
+    fmt::print("Generated text:\n");
+    fmt::print("*******************\n");
+    fmt::print("{}", prompt);
+    for (uint32_t token_idx = 0; token_idx < tokens_to_generate; ++token_idx) {
+        uint32_t start_idx = 0;
+        if (prompt_tokens.size() > max_sequence_length) {
+            start_idx = prompt_tokens.size() - max_sequence_length;
+        }
+        for (uint32_t i = start_idx; i < prompt_tokens.size(); ++i) {
+            prompt_tokens_padded[i - start_idx] = prompt_tokens[i];
+        }
+
+        auto prompt_tokens_padded_size = static_cast<uint32_t>(prompt_tokens_padded.size());
+        auto prompt_tensor = ttml::autograd::create_tensor(ttml::core::from_vector<uint32_t, DataType::UINT32>(
+            prompt_tokens_padded,
+            ttml::core::create_shape({1, 1, 1, prompt_tokens_padded_size}),
+            device,
+            Layout::ROW_MAJOR));
+
+        auto output = (*model)(prompt_tensor, positions_tensor, mask_tensor);
+        auto output_vector = ttml::core::to_vector(output->get_value());
+
+        uint32_t predicted_token_id = prompt_tokens.size() - 1U;
+        if (prompt_tokens.size() > max_sequence_length) {
+            predicted_token_id = prompt_tokens_padded_size - 1U;
+        }
+        auto logits_ptr = output_vector.data() + predicted_token_id * vocab_size;
+        auto token_id = sample(std::span<float>(logits_ptr, vocab_size));
+        prompt_tokens.push_back(token_id);
+        fmt::print("{}", tokenizer.decode({token_id}));
+        ttml::autograd::ctx().reset_graph();
+    }
+    fmt::print("\n*******************\n");
+
+    model->train();
+}
+
+int main(int argc, char **argv) {
+    auto result = signal(SIGINT, signal_handler);
+    if (result == SIG_ERR) {
+        std::cerr << "Failed to set signal handler\n";
+        return -1;
+    }
+    wandbcpp::init({.project = "tt_train_nano_gpt"});
+    wandbcpp::update_config({
+        {"model", "transformer"},
+        {"num_heads", static_cast<int>(config.num_heads)},
+        {"embedding_dim", static_cast<int>(config.embedding_dim)},
+        {"num_blocks", static_cast<int>(config.num_blocks)},
+        {"dropout_prob", config.dropout_prob},
+        {"learning_rate", config.learning_rate},
+        {"weight_decay", config.weight_decay},
+        {"batch_size", static_cast<int>(config.batch_size)},
+        {"sequence_length", static_cast<int>(config.sequence_length)},
+        {"max_steps", static_cast<int>(config.max_steps)},
+    });
+
+    auto start_timer = std::chrono::high_resolution_clock::now();
+    CLI::App app{"NanoGPT Example"};
+    argv = app.ensure_utf8(argv);
+
+    uint32_t seed = 5489U;
+    uint32_t model_save_interval = 500;
+    uint32_t max_steps = config.max_steps;
+    uint32_t batch_size = config.batch_size;
+    uint32_t sequence_length = config.sequence_length;
+    std::string model_path;
+    std::string data_path = std::string(DATA_FOLDER) + "/shakespeare.txt";
+    bool is_eval = false;
+
+    app.add_option("-b,--batch_size", batch_size, "Batch size")->default_val(batch_size);
+    app.add_option("-i,--model_save_interval", model_save_interval, "Model save interval")
+        ->default_val(model_save_interval);
+    app.add_option("-p,--model_path", model_path, "Model path")->default_val(model_path);
+    app.add_option("-d,--data_path", data_path, "Data path")->default_val(data_path);
+    app.add_option("-s,--seed", seed, "Seed")->default_val(seed);
+    app.add_option("-m,--max_steps", max_steps, "Max steps")->default_val(max_steps);
+    app.add_flag("-e,--eval", is_eval, "Evaluation mode")->default_val(is_eval);
+    CLI11_PARSE(app, argc, argv);
+
+    // set seed
+    ttml::autograd::ctx().set_seed(seed);
+
+    std::string text;
+    try {
+        text = read_file_to_str(data_path);
+    } catch (const std::exception &e) {
+        std::cerr << e.what() << std::endl;
+        return -1;
+    }
+
+    fmt::print("Max steps {}\n", max_steps);
+    fmt::print("Batch size {}\n", batch_size);
+    fmt::print("Seed {}\n", ttml::autograd::ctx().get_seed());
+
+    auto [dataset, tokenizer] =
+        ttml::datasets::create_in_memory_token_dataset<ttml::tokenizers::CharTokenizer>(text, sequence_length);
+    fmt::print("Dataset size: {}\n", dataset.get_size());
+    fmt::print("Vocab size: {}\n", tokenizer.get_vocab_size());
+
+    auto *device = &ttml::autograd::ctx().get_device();
+    device->enable_program_cache();
+
+    // disable for now, unexpected freezes and crashes
+    // device->enable_async(true);
+
+    struct CachedHostData {
+        std::vector<uint32_t> data;
+        std::vector<int32_t> targets;
+        ttml::autograd::TensorPtr masks_tensor;
+        ttml::autograd::TensorPtr positions_tensor;
+    };
+    CachedHostData cached_data;
+    std::vector<uint32_t> positions;
+    std::vector<float> mask;
+    positions.reserve((size_t)batch_size * sequence_length);
+    for (int sample_idx = 0; sample_idx < batch_size; ++sample_idx) {
+        for (int i = 0; i < sequence_length; ++i) {
+            positions.push_back(i);
+        }
+    }
+
+    mask.reserve((size_t)batch_size * sequence_length * sequence_length * config.num_heads);
+    for (int sample_idx = 0; sample_idx < batch_size; ++sample_idx) {
+        for (int head = 0; head < config.num_heads; ++head) {
+            for (int i = 0; i < sequence_length; ++i) {
+                for (int j = 0; j < sequence_length; ++j) {
+                    mask.push_back(i >= j ? 1.0F : 0.0F);
+                }
+            }
+        }
+    }
+    cached_data.masks_tensor = ttml::autograd::create_tensor(ttml::core::from_vector(
+        mask, ttml::core::create_shape({batch_size, config.num_heads, sequence_length, sequence_length}), device));
+    cached_data.positions_tensor = ttml::autograd::create_tensor(ttml::core::from_vector<uint32_t, DataType::UINT32>(
+        positions, ttml::core::create_shape({batch_size, 1, 1, sequence_length}), device, Layout::ROW_MAJOR));
+
+    std::function<BatchType(std::vector<DatasetSample> && samples)> collate_fn =
+        [sequence_length, num_heads = config.num_heads, vocab_size = tokenizer.get_vocab_size(), device, &cached_data](
+            std::vector<DatasetSample> &&samples) {
+            auto start_timer = std::chrono::high_resolution_clock::now();
+            const uint32_t batch_size = samples.size();
+            std::vector<uint32_t> &data = cached_data.data;
+            std::vector<int32_t> &targets = cached_data.targets;
+
+            data.clear();
+            targets.clear();
+
+            data.reserve((size_t)batch_size * sequence_length);
+            targets.reserve((size_t)batch_size * sequence_length);
+            for (auto &[features, target_span] : samples) {
+                std::copy(features.begin(), features.end(), std::back_inserter(data));
+                std::copy(target_span.begin(), target_span.end(), std::back_inserter(targets));
+            }
+            auto end_timer = std::chrono::high_resolution_clock::now();
+            auto duration = std::chrono::duration_cast<std::chrono::microseconds>(end_timer - start_timer).count();
+            fmt::print("dataloader host only step time {} ms\n", (double)duration / 1000.);
+            auto data_tensor = ttml::autograd::create_tensor(ttml::core::from_vector<uint32_t, DataType::UINT32>(
+                data, ttml::core::create_shape({batch_size, 1, 1, sequence_length}), device, Layout::ROW_MAJOR));
+            auto targets_tensor = ttml::autograd::create_tensor(
+                ttml::core::from_vector<int32_t, DataType::INT32>(targets, {batch_size * sequence_length}, device));
+            end_timer = std::chrono::high_resolution_clock::now();
+            duration = std::chrono::duration_cast<std::chrono::microseconds>(end_timer - start_timer).count();
+            fmt::print("dataloader step time {} ms\n", (double)duration / 1000.);
+            return std::make_tuple(data_tensor, targets_tensor, cached_data.masks_tensor, cached_data.positions_tensor);
+        };
+
+    LossAverageMeter loss_meter;
+    auto train_dataloader = DataLoader(dataset, /* batch_size */ batch_size, /* shuffle */ true, collate_fn);
+
+    auto transformer_config = TransformerConfig();
+    transformer_config.num_heads = config.num_heads;
+    transformer_config.embedding_dim = config.embedding_dim;
+    transformer_config.dropout_prob = config.dropout_prob;
+    transformer_config.num_blocks = config.num_blocks;
+    transformer_config.vocab_size = round_up_to_tile(tokenizer.get_vocab_size());
+    transformer_config.max_sequence_length = sequence_length;
+    auto model = std::make_shared<Transformer>(transformer_config);
+
+    auto adamw_params = ttml::optimizers::AdamWConfig();
+    adamw_params.lr = config.learning_rate;
+    adamw_params.weight_decay = config.weight_decay;
+    fmt::print("AdamW configuration:\n");
+    fmt::print("    Learning rate: {}\n", adamw_params.lr);
+    fmt::print("    Weight decay: {}\n", adamw_params.weight_decay);
+    auto optimizer = ttml::optimizers::AdamW(model->parameters(), adamw_params);
+
+    if (!model_path.empty() && std::filesystem::exists(model_path)) {
+        fmt::print("Loading model from {}\n", model_path);
+        load_model_and_optimizer(model_path, model, optimizer, "transformer", "adamw");
+        fmt::print("Model loaded after {} steps\n", optimizer.get_steps());
+    }
+
+    if (is_eval) {
+        fmt::print("\nEvaluation started\n");
+        for (;;) {
+            generate(model, tokenizer, sequence_length, config.num_heads);
+        }
+        fmt::print("\nEvaluation finished\n");
+        return 0;
+    }
+
+    const uint32_t num_epochs = config.num_epochs;
+    for (uint32_t epoch = 0; epoch < num_epochs; ++epoch) {
+        for (auto [features, target, masks, positions] : train_dataloader) {
+            auto start_timer = std::chrono::high_resolution_clock::now();
+            optimizer.zero_grad();
+            auto output = (*model)(features, positions, masks);
+            auto loss = ttml::ops::nll_loss(output, target);
+            auto loss_float = ttml::core::to_vector(loss->get_value())[0];
+            loss_meter.update(loss_float, features->get_value().get_shape()[0]);
+            loss->backward();
+            optimizer.step();
+            ttml::autograd::ctx().reset_graph();
+            auto global_step = optimizer.get_steps();
+            fmt::print("Step: {}, Loss: {}\n", global_step, loss_float);
+
+            if (global_step % 10 == 0) {
+                wandbcpp::log({{"Step", (int)global_step}, {"Loss", loss_float}});
+            }
+            if (!model_path.empty() && global_step % model_save_interval == 0) {
+                save_model_and_optimizer(model_path, model, optimizer, "transformer", "adamw");
+            }
+
+            if (global_step >= max_steps) {
+                break;
+            }
+            auto end_timer = std::chrono::high_resolution_clock::now();
+            auto duration = std::chrono::duration_cast<std::chrono::microseconds>(end_timer - start_timer).count();
+            fmt::print(
+                "Full step time {} ms, cache entries: {}\n",
+                (double)duration / 1000,
+                device->num_program_cache_entries());
+        }
+        if (optimizer.get_steps() >= max_steps) {
+            break;
+        }
+    }
+
+    if (!model_path.empty()) {
+        save_model_and_optimizer(model_path, model, optimizer, "transformer", "adamw");
+    }
+
+    auto end_timer = std::chrono::high_resolution_clock::now();
+    auto duration = std::chrono::duration_cast<std::chrono::microseconds>(end_timer - start_timer).count();
+    fmt::print(
+        "{} Steps training time: {} s, cache entries: {}\n",
+        max_steps,
+        (double)duration / 1000000.,
+        device->num_program_cache_entries());
+    wandbcpp::finish();
+    return 0;
+}
diff --git a/tt-train/sources/examples/nano_gpt/models.cpp b/tt-train/sources/examples/nano_gpt/models.cpp
new file mode 100644
index 00000000000..4aa2886b04f
--- /dev/null
+++ b/tt-train/sources/examples/nano_gpt/models.cpp
@@ -0,0 +1,94 @@
+// SPDX-FileCopyrightText: (c) 2024 Tenstorrent AI ULC
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#include "models.hpp"
+
+#include "ops/binary_ops.hpp"
+#include "ops/unary_ops.hpp"
+
+Transformer::Transformer(const TransformerConfig& config) {
+    uint32_t vocab_size = config.vocab_size;
+    uint32_t max_sequence_length = config.max_sequence_length;
+    uint32_t embedding_dim = config.embedding_dim;
+    uint32_t num_heads = config.num_heads;
+    float dropout_prob = config.dropout_prob;
+    uint32_t num_blocks = config.num_blocks;
+
+    fmt::print("Transformer configuration:\n");
+    fmt::print("    Vocab size: {}\n", vocab_size);
+    fmt::print("    Max sequence length: {}\n", max_sequence_length);
+    fmt::print("    Embedding dim: {}\n", embedding_dim);
+    fmt::print("    Num heads: {}\n", num_heads);
+    fmt::print("    Dropout probability: {}\n", dropout_prob);
+    fmt::print("    Num blocks: {}\n", num_blocks);
+
+    uint32_t vocab_size_divisible_by_32 = (vocab_size + 31) / 32 * 32;
+    if (max_sequence_length % 32 != 0) {
+        throw std::logic_error(fmt::format(
+            "Max sequence length should be divisible by 32 due to current limitations in tensor. Provided "
+            "max_sequence_length={}",
+            max_sequence_length));
+    }
+    if (embedding_dim % 32 != 0) {
+        throw std::logic_error(fmt::format(
+            "Embedding size should be divisible by 32 due to current limitations in tensor. Provided "
+            "embedding_dim={}",
+            embedding_dim));
+    }
+    tok_emb = std::make_shared<ttml::modules::Embedding>(vocab_size_divisible_by_32, embedding_dim);
+    pos_emb = std::make_shared<ttml::modules::Embedding>(max_sequence_length, embedding_dim);
+    blocks.reserve(num_blocks);
+    for (uint32_t block_idx = 0; block_idx < num_blocks; ++block_idx) {
+        blocks.push_back(std::make_shared<ttml::modules::GPTBlock>(embedding_dim, num_heads, dropout_prob));
+    }
+    ln_fc = std::make_shared<ttml::modules::LayerNormLayer>(embedding_dim);
+    fc = std::make_shared<ttml::modules::LinearLayer>(embedding_dim, vocab_size);
+
+    create_name("transformer");
+    register_module(tok_emb, "tok_emb");
+    register_module(pos_emb, "pos_emb");
+    for (uint32_t block_idx = 0; block_idx < num_blocks; ++block_idx) {
+        register_module(blocks[block_idx], fmt::format("gpt_block_{}", block_idx));
+    }
+    register_module(ln_fc, "ln_fc");
+    register_module(fc, "fc");
+}
+ttml::autograd::TensorPtr Transformer::operator()(
+    const ttml::autograd::TensorPtr& x,
+    const ttml::autograd::TensorPtr& positions,
+    const ttml::autograd::TensorPtr& mask) {
+    auto tok_emb_out = (*tok_emb)(x);
+    auto pos_emb_out = (*pos_emb)(positions);
+    auto out = ttml::ops::add(tok_emb_out, pos_emb_out);
+    for (auto& block : blocks) {
+        out = (*block)(out, mask);
+    }
+    out = (*ln_fc)(out);
+    auto logits = (*fc)(out);
+    auto log_softmax = ttml::ops::log_softmax(logits, 3);
+    return log_softmax;
+}
+
+BigramFCModel::BigramFCModel(uint32_t vocab_size, uint32_t num_tokens, uint32_t hidden_dim) {
+    // make vocab_size divisible by 32
+    vocab_size = (vocab_size + 31) / 32 * 32;
+
+    // create layers
+    emb = std::make_shared<ttml::modules::Embedding>(vocab_size, hidden_dim);
+    fc1 = std::make_shared<ttml::modules::LinearLayer>(hidden_dim, num_tokens);
+
+    create_name("bigram_fc_model");
+
+    register_module(emb, "emb");
+    register_module(fc1, "fc1");
+}
+
+ttml::autograd::TensorPtr BigramFCModel::operator()(
+    ttml::autograd::TensorPtr x,
+    [[maybe_unused]] const ttml::autograd::TensorPtr& positions,
+    [[maybe_unused]] const ttml::autograd::TensorPtr& masks) const {
+    x = (*emb)(x);
+    x = (*fc1)(x);
+    return x;
+}
diff --git a/tt-train/sources/examples/nano_gpt/models.hpp b/tt-train/sources/examples/nano_gpt/models.hpp
new file mode 100644
index 00000000000..b41a9b57825
--- /dev/null
+++ b/tt-train/sources/examples/nano_gpt/models.hpp
@@ -0,0 +1,52 @@
+// SPDX-FileCopyrightText: (c) 2024 Tenstorrent AI ULC
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#pragma once
+
+#include <cstdint>
+#include <memory>
+
+#include "autograd/module_base.hpp"
+#include "modules/embedding_module.hpp"
+#include "modules/gpt_block.hpp"
+#include "modules/layer_norm_module.hpp"
+#include "modules/linear_module.hpp"
+
+struct TransformerConfig {
+    uint32_t num_heads = 6;
+    uint32_t embedding_dim = 384;
+    float dropout_prob = 0.2F;
+    uint32_t num_blocks = 6;
+    uint32_t vocab_size = 256;
+    uint32_t max_sequence_length = 256;
+};
+
+class Transformer : public ttml::autograd::ModuleBase {
+    std::shared_ptr<ttml::modules::Embedding> tok_emb;
+    std::shared_ptr<ttml::modules::Embedding> pos_emb;
+    std::vector<std::shared_ptr<ttml::modules::GPTBlock>> blocks;
+    std::shared_ptr<ttml::modules::LayerNormLayer> ln_fc;
+    std::shared_ptr<ttml::modules::LinearLayer> fc;
+
+public:
+    explicit Transformer(const TransformerConfig& config);
+
+    ttml::autograd::TensorPtr operator()(
+        const ttml::autograd::TensorPtr& x,
+        const ttml::autograd::TensorPtr& positions,
+        const ttml::autograd::TensorPtr& mask);
+};
+
+class BigramFCModel : public ttml::autograd::ModuleBase {
+public:
+    std::shared_ptr<ttml::modules::LinearLayer> fc1;
+    std::shared_ptr<ttml::modules::Embedding> emb;
+
+    BigramFCModel(uint32_t vocab_size, uint32_t num_tokens, uint32_t hidden_dim);
+
+    ttml::autograd::TensorPtr operator()(
+        ttml::autograd::TensorPtr x,
+        [[maybe_unused]] const ttml::autograd::TensorPtr& positions,
+        [[maybe_unused]] const ttml::autograd::TensorPtr& masks) const;
+};
diff --git a/tt-train/sources/examples/nano_gpt/runner.sh b/tt-train/sources/examples/nano_gpt/runner.sh
new file mode 100755
index 00000000000..db64a47eab0
--- /dev/null
+++ b/tt-train/sources/examples/nano_gpt/runner.sh
@@ -0,0 +1,23 @@
+# SPDX-FileCopyrightText: (c) 2024 Tenstorrent AI ULC
+#
+# SPDX-License-Identifier: Apache-2.0
+
+#!/bin/bash
+
+export TT_METAL_LOGGER_LEVEL=FATAL
+SCRIPT="/home/ubuntu/ML-Framework-CPP/build/sources/examples/nano_gpt/nano_gpt"
+RESET_BOARD="tt-smi -r 0"
+INTERVAL=100
+DEFAULT_SEED=5489
+MAX_STEPS=5000
+SLEEP_DURATION=30
+
+$RESET_BOARD
+echo "Running $SCRIPT..."
+for i in {1..5}; do
+    $SCRIPT -i $INTERVAL -p transformer.msgpack -s $((DEFAULT_SEED - i)) -m $MAX_STEPS
+    $RESET_BOARD
+    echo "Sleeping for $SLEEP_DURATION seconds and restarting training..."
+    sleep $SLEEP_DURATION
+done
+echo "Done running $SCRIPT"
diff --git a/tt-train/sources/examples/nano_gpt/utils.cpp b/tt-train/sources/examples/nano_gpt/utils.cpp
new file mode 100644
index 00000000000..691ae54a42d
--- /dev/null
+++ b/tt-train/sources/examples/nano_gpt/utils.cpp
@@ -0,0 +1,37 @@
+// SPDX-FileCopyrightText: (c) 2024 Tenstorrent AI ULC
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#include "utils.hpp"
+
+void LossAverageMeter::update(float loss, size_t count) {
+    m_sum += loss * static_cast<float>(count);
+    m_count += count;
+}
+
+float LossAverageMeter::average() const {
+    if (m_count == 0) {
+        return 0.F;
+    }
+    return m_sum / static_cast<float>(m_count);
+}
+
+void LossAverageMeter::reset() {
+    m_sum = 0.0F;
+    m_count = 0;
+}
+
+std::string read_file_to_str(const std::string& file_path) {
+    std::ifstream file(file_path);
+    if (!file.is_open()) {
+        throw std::runtime_error("Failed to open file: " + file_path);
+    }
+
+    std::stringstream buffer;
+    buffer << file.rdbuf();
+    return buffer.str();
+}
+
+uint32_t round_up_to_tile(uint32_t value, uint32_t tile_size) {
+    return (value + tile_size - 1) / tile_size * tile_size;
+}
diff --git a/tt-train/sources/examples/nano_gpt/utils.hpp b/tt-train/sources/examples/nano_gpt/utils.hpp
new file mode 100644
index 00000000000..521280dd9db
--- /dev/null
+++ b/tt-train/sources/examples/nano_gpt/utils.hpp
@@ -0,0 +1,54 @@
+// SPDX-FileCopyrightText: (c) 2024 Tenstorrent AI ULC
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#pragma once
+
+#include <fstream>
+#include <iostream>
+#include <sstream>
+
+#include "serialization/msgpack_file.hpp"
+#include "serialization/serialization.hpp"
+
+class LossAverageMeter {
+    float m_sum = 0.0F;
+    size_t m_count = 0;
+
+public:
+    void update(float loss, size_t count = 1);
+
+    [[nodiscard]] float average() const;
+
+    void reset();
+};
+
+std::string read_file_to_str(const std::string &file_path);
+
+template <typename Model, typename Optimizer>
+void save_model_and_optimizer(
+    std::string &model_path,
+    const std::shared_ptr<Model> &model,
+    Optimizer &optimizer,
+    const std::string &model_name,
+    const std::string &optimizer_name) {
+    ttml::serialization::MsgPackFile serializer;
+    ttml::serialization::write_module(serializer, model_name, model.get());
+    ttml::serialization::write_optimizer(serializer, optimizer_name, &optimizer);
+    serializer.serialize(model_path);
+}
+
+template <typename Model, typename Optimizer>
+void load_model_and_optimizer(
+    std::string &model_path,
+    const std::shared_ptr<Model> &model,
+    Optimizer &optimizer,
+    const std::string &model_name,
+    const std::string &optimizer_name) {
+    ttml::serialization::MsgPackFile deserializer;
+    deserializer.deserialize(model_path);
+    ttml::serialization::read_module(deserializer, model_name, model.get());
+    ttml::serialization::read_optimizer(deserializer, optimizer_name, &optimizer);
+}
+
+uint32_t round_up_to_tile(uint32_t value, uint32_t tile_size = 32);
diff --git a/tt-train/sources/examples/sample_app/CMakeLists.txt b/tt-train/sources/examples/sample_app/CMakeLists.txt
new file mode 100644
index 00000000000..eabce8da413
--- /dev/null
+++ b/tt-train/sources/examples/sample_app/CMakeLists.txt
@@ -0,0 +1,6 @@
+project(sample_app)
+
+set(SOURCES main.cpp)
+
+add_executable(sample_app ${SOURCES})
+target_link_libraries(sample_app PRIVATE ttml)
diff --git a/tt-train/sources/examples/sample_app/main.cpp b/tt-train/sources/examples/sample_app/main.cpp
new file mode 100644
index 00000000000..36231a6fc60
--- /dev/null
+++ b/tt-train/sources/examples/sample_app/main.cpp
@@ -0,0 +1,104 @@
+// SPDX-FileCopyrightText: (c) 2024 Tenstorrent AI ULC
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#include <core/ttnn_all_includes.hpp>
+#include <iostream>
+
+#include "ttml.hpp"
+
+ttnn::device::Device* device = nullptr;
+
+void print_tensor(const tt::tt_metal::Tensor& tensor) {
+    // IMPORTANT. This function prints the tensor data assuming the tensor is in ROW_MAJOR layout
+    // but we are using TILE layout. The printed format WILL NOT be correct. But good enough for a demo
+
+    // Get the shape of the tensor
+    auto shape = tensor.shape();
+    // compyte the size of the tensor
+    size_t size = 1;
+    for (size_t i = 0; i < shape.size(); i++) size *= shape[i];
+
+    // prepare a buffer to copy the tensor data to the host
+    std::vector<bfloat16> data(size);
+    tt::tt_metal::memcpy(device->command_queue(), data.data(), tensor);
+
+    // print the data
+    for (size_t i = 0; i < shape[0]; i++) {
+        for (size_t j = 0; j < shape[1]; j++) {
+            for (size_t k = 0; k < shape[2]; k++) {
+                for (size_t l = 0; l < shape[3]; l++) {
+                    std::cout << data[i * shape[1] * shape[2] * shape[3] + j * shape[2] * shape[3] + k * shape[3] + l]
+                                     .to_float()
+                              << " ";
+                }
+                std::cout << std::endl;
+            }
+            std::cout << std::endl;
+        }
+        std::cout << std::endl;
+    }
+}
+
+int main() {
+    const size_t tensor_width = 32;
+    const size_t tensor_height = 32;
+
+    // tell TTNN that we want to use the first device available
+    tt::ARCH arch_ = {};
+    size_t num_devices_ = 0;
+
+    std::srand(0);
+    arch_ = tt::get_arch_from_string(tt::test_utils::get_env_arch_name());
+    num_devices_ = tt::tt_metal::GetNumAvailableDevices();
+    std::cout << "Arch:" << tt::test_utils::get_env_arch_name() << std::endl;
+    std::cout << "num_devices:" << num_devices_ << std::endl;
+    device = tt::tt_metal::CreateDevice(0);
+    std::cout << "Device created" << std::endl;
+    // AutoFormat::SetDefaultDevice(device);  // set the default device to the one we just opened
+
+    std::cout << "Creating a tensor with bfloat16 data type" << std::endl;
+    // TTNN wants us to explicitly specify if the tensor owns the buffer or not. if not, we need to make dman sure that
+    // the buffer is not deallocated before the tensor
+    auto buffer = tt::tt_metal::owned_buffer::create(create_random_vector_of_bfloat16_native(
+        // In number of bytes. so 2 bytes per bfloat16 element
+        tensor_width * tensor_height * 2
+        //  max = 2, offset = -1, seed = 42. Effectively, the range is [-1, 1]. I know, weird API
+        ,
+        2,
+        42,
+        -1));
+    // Now we create a tensor with the buffer we just created
+    auto x = tt::tt_metal::Tensor(
+        // Let the tensor take ownership of the buffer
+        OwnedStorage{std::move(buffer)},
+        // IMPORTANT: SHAPE MUST BE 4D ELSE EVERYTHING WILL BREAK during the PAD operation
+        {1, 1, tensor_width, tensor_height},
+        // The data type of the tensor
+        tt::tt_metal::DataType::BFLOAT16,
+        // The layout of the tensor. We don't care about the layout in this demo. But the valid options are TILE and
+        // ROW_MAJOR Where TILE is the processor native layout and ROW_MAJOR mostly have to be converted to TILE before
+        // processing
+        tt::tt_metal::Layout::TILE);
+    // Once created, the tensor "on host" and we must move it to the device to perform operations on it
+    x = x.to(device);
+
+    // Print the tensor to see what it looks like
+    std::cout << "Tensot x:\n";
+    print_tensor(x);
+
+    // Perform the sin(x) operation on the tensor
+    std::cout << "Performing operation on the tensor" << std::endl;
+    auto y = ttnn::sin(x);
+    // You can try other operations like relu, sigmoid and what not. Or adding two tensors!
+    // auto y = ttnn::add(x, x);
+
+    // Print the result
+    std::cout << "Tensot y:\n";
+    print_tensor(y);
+
+    // Remember to close the device when you are done
+    std::cout << "Done. Shutting down" << std::endl;
+    tt::tt_metal::CloseDevice(device);
+    return 0;
+}
diff --git a/tt-train/sources/examples/simple_cnn/CMakeLists.txt b/tt-train/sources/examples/simple_cnn/CMakeLists.txt
new file mode 100644
index 00000000000..beb74b2031a
--- /dev/null
+++ b/tt-train/sources/examples/simple_cnn/CMakeLists.txt
@@ -0,0 +1,6 @@
+project(simple_cnn)
+
+set(SOURCES main.cpp)
+
+add_executable(simple_cnn ${SOURCES})
+target_link_libraries(simple_cnn PRIVATE ttml)
diff --git a/tt-train/sources/examples/simple_cnn/main.cpp b/tt-train/sources/examples/simple_cnn/main.cpp
new file mode 100644
index 00000000000..0911c0417f0
--- /dev/null
+++ b/tt-train/sources/examples/simple_cnn/main.cpp
@@ -0,0 +1,21 @@
+// SPDX-FileCopyrightText: (c) 2024 Tenstorrent AI ULC
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#include <core/ttnn_all_includes.hpp>
+#include <iostream>
+
+int main() {
+    const size_t tensor_width = 32;
+    const size_t tensor_height = 32;
+
+    std::srand(0);
+    auto arch_ = tt::get_arch_from_string(tt::test_utils::get_env_arch_name());
+    auto num_devices_ = tt::tt_metal::GetNumAvailableDevices();
+    std::cout << "Arch:" << tt::test_utils::get_env_arch_name() << std::endl;
+    std::cout << "num_devices:" << num_devices_ << std::endl;
+    auto device = tt::tt_metal::CreateDevice(0);
+    std::cout << "Device created" << std::endl;
+    tt::tt_metal::CloseDevice(device);
+    return 0;
+}
diff --git a/tt-train/sources/ttml/CMakeLists.txt b/tt-train/sources/ttml/CMakeLists.txt
new file mode 100644
index 00000000000..623309d5076
--- /dev/null
+++ b/tt-train/sources/ttml/CMakeLists.txt
@@ -0,0 +1,131 @@
+project(ttml)
+
+file(
+    GLOB_RECURSE SOURCES
+    LIST_DIRECTORIES true
+    *.hpp
+    *.cpp
+) # I am a bad person
+
+# Check if Metalium::Metal target exists
+# If it does not exist, assume that we are building with tt-train as top level project
+if(NOT TARGET Metalium::Metal)
+    if("$ENV{TT_METAL_HOME}" STREQUAL "")
+        message(FATAL_ERROR "TT_METAL_HOME is not set")
+    endif()
+
+    set(METALIUM_INCLUDE_DIRS
+        # Metalium
+        "$ENV{TT_METAL_HOME}"
+        "$ENV{TT_METAL_HOME}/tt_metal"
+        "$ENV{TT_METAL_HOME}/tt_metal/third_party/umd"
+        "$ENV{TT_METAL_HOME}/tt_metal/hw/inc/wormhole"
+        "$ENV{TT_METAL_HOME}/tt_metal/hw/inc/wormhole/wormhole_b0_defines"
+        "$ENV{TT_METAL_HOME}/tt_metal/hw/inc/"
+        "$ENV{TT_METAL_HOME}/tt_metal/third_party/umd/src/firmware/riscv/wormhole"
+        "$ENV{TT_METAL_HOME}/tt_metal/third_party/umd/device"
+        # TTNN
+        "$ENV{TT_METAL_HOME}/ttnn/cpp"
+        "$ENV{TT_METAL_HOME}/ttnn/cpp/ttnn/deprecated"
+        "${reflect_SOURCE_DIR}"
+    )
+
+    message(STATUS "Metalium not found, attempting to locate")
+
+    # Define the path to look for the library
+    set(METALIUM_LIB_PATH "$ENV{TT_METAL_HOME}/build/lib")
+
+    # Try to find the library
+    find_library(TT_METAL_LIBRARY NAMES "tt_metal" PATHS "${METALIUM_LIB_PATH}" NO_DEFAULT_PATH)
+    find_library(TTNN_LIBRARY NAMES "_ttnn.so" PATHS "${METALIUM_LIB_PATH}" NO_DEFAULT_PATH)
+
+    if(TT_METAL_LIBRARY)
+        add_library(Metalium::Metal SHARED IMPORTED)
+        set_target_properties(
+            Metalium::Metal
+            PROPERTIES
+                IMPORTED_LOCATION
+                    "${TT_METAL_LIBRARY}"
+                INTERFACE_INCLUDE_DIRECTORIES
+                    "${METALIUM_INCLUDE_DIRS}"
+        )
+        message(STATUS "Successfully found libtt_metal.so at ${TT_METAL_LIBRARY}")
+    else()
+        message(FATAL_ERROR "libtt_metal.so not found in ${METALIUM_LIB_PATH}")
+    endif()
+    if(TTNN_LIBRARY)
+        add_library(Metalium::TTNN SHARED IMPORTED)
+        set_target_properties(
+            Metalium::TTNN
+            PROPERTIES
+                IMPORTED_LOCATION
+                    "${TTNN_LIBRARY}"
+                INTERFACE_INCLUDE_DIRECTORIES
+                    "${METALIUM_INCLUDE_DIRS}"
+        )
+        message(STATUS "Successfully found _ttnn.so at ${TTNN_LIBRARY}")
+    else()
+        message(FATAL_ERROR "_ttnn.so not found in ${METALIUM_LIB_PATH}")
+    endif()
+else()
+    message(STATUS "Metalium targets already exists")
+endif()
+
+add_library(ttml STATIC ${SOURCES})
+
+target_include_directories(ttml PUBLIC ${PROJECT_SOURCE_DIR})
+
+find_package(Python REQUIRED Development)
+
+foreach(lib ${BoostPackages})
+    target_include_directories(ttml SYSTEM PUBLIC ${Boost${lib}_SOURCE_DIR}/include)
+endforeach()
+
+target_link_libraries(
+    ttml
+    PUBLIC
+        dl
+        z
+        pthread
+        atomic
+        Metalium::Metal
+        Metalium::TTNN
+        Python::Python
+        fmt::fmt-header-only
+        magic_enum
+        yaml-cpp::yaml-cpp
+        xtensor
+        xtl
+        tokenizers_cpp
+        wandbcpp
+        Boost::core
+        Boost::container
+)
+
+message(STATUS "xtensor_SOURCE_DIR: ${xtensor_SOURCE_DIR}")
+message(STATUS "xtl_SOURCE_DIR: ${xtl_SOURCE_DIR}")
+
+message(STATUS "msgpack_SOURCE_DIR: ${msgpack_SOURCE_DIR}")
+target_include_directories(ttml PUBLIC ${msgpack_SOURCE_DIR}/include)
+message(STATUS "cli11_SOURCE_DIR: ${CLI11_SOURCE_DIR}")
+target_include_directories(ttml PUBLIC ${CLI11_SOURCE_DIR}/include)
+
+target_include_directories(ttml PUBLIC ${TOKENZIER_CPP_PATH}/include)
+
+target_link_libraries(
+    tokenizers_cpp
+    PUBLIC
+        ${LIBC++}
+        ${LIBC++ABI}
+)
+target_compile_options(tokenizers_cpp PUBLIC -stdlib=libc++)
+
+target_link_libraries(
+    wandbcpp
+    PUBLIC
+        ${LIBC++}
+        ${LIBC++ABI}
+)
+target_compile_options(wandbcpp PUBLIC -stdlib=libc++)
+
+add_definitions(-DTOKENIZERS_DATA_PATH="${CMAKE_CURRENT_SOURCE_DIR}/data/tokenizers")
diff --git a/tt-train/sources/ttml/autograd/auto_context.cpp b/tt-train/sources/ttml/autograd/auto_context.cpp
new file mode 100644
index 00000000000..be009ae6caa
--- /dev/null
+++ b/tt-train/sources/ttml/autograd/auto_context.cpp
@@ -0,0 +1,51 @@
+// SPDX-FileCopyrightText: (c) 2024 Tenstorrent AI ULC
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#include "auto_context.hpp"
+
+#include <optional>
+
+namespace ttml::autograd {
+
+std::mt19937& AutoContext::get_generator() {
+    return m_generator;
+}
+
+void AutoContext::set_seed(uint32_t seed) {
+    m_seed = seed;
+    m_generator = std::mt19937(m_seed);
+}
+
+uint32_t AutoContext::get_seed() const {
+    return m_seed;
+}
+
+AutoContext& AutoContext::get_instance() {
+    static AutoContext instance;
+    return instance;
+}
+std::optional<NodeId> AutoContext::add_backward_node(GradFunction&& grad_function, std::span<NodeId> links) {
+    if (m_grads_mode == GradMode::DISABLED) {
+        return std::nullopt;
+    }
+    return m_graph.add_node(std::move(grad_function), links);
+}
+void AutoContext::set_gradient_mode(GradMode mode) {
+    m_grads_mode = mode;
+}
+GradMode AutoContext::get_gradient_mode() const {
+    return m_grads_mode;
+}
+
+void AutoContext::reset_graph() {
+    m_graph.reset();
+}
+
+tt::tt_metal::Device& AutoContext::get_device() {
+    return device.get_device();
+}
+
+AutoContext::AutoContext() : m_generator(m_seed) {
+}
+}  // namespace ttml::autograd
diff --git a/tt-train/sources/ttml/autograd/auto_context.hpp b/tt-train/sources/ttml/autograd/auto_context.hpp
new file mode 100644
index 00000000000..bb43cd8d061
--- /dev/null
+++ b/tt-train/sources/ttml/autograd/auto_context.hpp
@@ -0,0 +1,59 @@
+// SPDX-FileCopyrightText: (c) 2024 Tenstorrent AI ULC
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#pragma once
+
+#include <random>
+
+#include "core/device.hpp"
+#include "graph.hpp"
+
+namespace ttml::autograd {
+
+enum class GradMode { ENABLED, DISABLED };
+
+class AutoContext {
+public:
+    // Delete copy constructor and assignment operator to prevent copying
+    AutoContext(const AutoContext&) = delete;
+    AutoContext& operator=(const AutoContext&) = delete;
+    AutoContext(AutoContext&&) = delete;
+    AutoContext& operator=(AutoContext&&) = delete;
+    // Static method to access the singleton instance
+    static AutoContext& get_instance();
+
+    std::mt19937& get_generator();
+
+    void set_seed(uint32_t seed);
+
+    [[nodiscard]] uint32_t get_seed() const;
+
+    std::optional<NodeId> add_backward_node(GradFunction&& grad_function, std::span<NodeId> links);
+
+    void reset_graph();
+
+    void set_gradient_mode(GradMode mode);
+
+    [[nodiscard]] GradMode get_gradient_mode() const;
+
+    ~AutoContext() = default;  // to make it work with unique_ptr.
+
+    tt::tt_metal::Device& get_device();
+
+private:
+    AutoContext();
+    uint32_t m_seed = 5489U;
+    std::mt19937 m_generator;
+
+    GradMode m_grads_mode = GradMode::ENABLED;
+
+    Graph m_graph;
+
+    core::Device device{0};
+};
+
+inline auto& ctx() {
+    return AutoContext::get_instance();
+}
+}  // namespace ttml::autograd
diff --git a/tt-train/sources/ttml/autograd/autocast_tensor.cpp b/tt-train/sources/ttml/autograd/autocast_tensor.cpp
new file mode 100644
index 00000000000..263d718ad02
--- /dev/null
+++ b/tt-train/sources/ttml/autograd/autocast_tensor.cpp
@@ -0,0 +1,42 @@
+// SPDX-FileCopyrightText: (c) 2024 Tenstorrent AI ULC
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#include "autocast_tensor.hpp"
+
+#include "core/tt_tensor_utils.hpp"
+
+namespace {
+
+inline bool is_castable_tensor(const tt::tt_metal::Tensor &tensor) {
+    return tensor.get_dtype() == DataType::FLOAT32;
+}
+
+}  // namespace
+
+namespace ttml::autograd {
+
+void AutocastTensor::set_tensor(const tt::tt_metal::Tensor &tensor) {
+    if (tensor.get_dtype() == DataType::FLOAT32) {
+        m_full_precision_tensor = tensor;
+        m_half_precision_tensor = ttnn::typecast(tensor, DataType::BFLOAT16);
+        return;
+    }
+
+    m_full_precision_tensor = tensor;
+    m_half_precision_tensor = ttnn::Tensor();  // Reset the half precision tensor
+}
+
+const tt::tt_metal::Tensor &AutocastTensor::get_tensor(PreferredPrecision preferred_precision) const {
+    if (preferred_precision == PreferredPrecision::HALF && is_castable_tensor(m_full_precision_tensor)) {
+        return m_half_precision_tensor;
+    }
+
+    return m_full_precision_tensor;
+}
+
+AutocastTensor::AutocastTensor(const tt::tt_metal::Tensor &tensor) {
+    set_tensor(tensor);
+}
+
+}  // namespace ttml::autograd
diff --git a/tt-train/sources/ttml/autograd/autocast_tensor.hpp b/tt-train/sources/ttml/autograd/autocast_tensor.hpp
new file mode 100644
index 00000000000..ed9a1ed6835
--- /dev/null
+++ b/tt-train/sources/ttml/autograd/autocast_tensor.hpp
@@ -0,0 +1,32 @@
+// SPDX-FileCopyrightText: (c) 2024 Tenstorrent AI ULC
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#pragma once
+
+#include <core/ttnn_all_includes.hpp>
+#include <cstdint>
+
+namespace ttml::autograd {
+
+enum class PreferredPrecision : uint8_t { HALF = 0, FULL = 1 };
+
+class AutocastTensor {
+    tt::tt_metal::Tensor m_half_precision_tensor{};
+    tt::tt_metal::Tensor m_full_precision_tensor{};
+
+public:
+    AutocastTensor() = default;
+    explicit AutocastTensor(const tt::tt_metal::Tensor &tensor);
+    AutocastTensor(const AutocastTensor &) = default;
+    AutocastTensor(AutocastTensor &&) noexcept = default;
+    AutocastTensor &operator=(const AutocastTensor &) = default;
+    AutocastTensor &operator=(AutocastTensor &&) noexcept = default;
+    ~AutocastTensor() = default;
+
+    void set_tensor(const tt::tt_metal::Tensor &tensor);
+    [[nodiscard]] const tt::tt_metal::Tensor &get_tensor(
+        PreferredPrecision preferred_precision = PreferredPrecision::HALF) const;
+};
+
+}  // namespace ttml::autograd
diff --git a/tt-train/sources/ttml/autograd/clip_gradient_norm.cpp b/tt-train/sources/ttml/autograd/clip_gradient_norm.cpp
new file mode 100644
index 00000000000..8e0a7e5f458
--- /dev/null
+++ b/tt-train/sources/ttml/autograd/clip_gradient_norm.cpp
@@ -0,0 +1,30 @@
+// SPDX-FileCopyrightText: (c) 2024 Tenstorrent AI ULC
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#include "autograd/clip_gradient_norm.hpp"
+
+#include "autograd/auto_context.hpp"
+#include "core/tt_tensor_utils.hpp"
+
+namespace ttml::autograd {
+
+void clip_tensor_norm_(tt::tt_metal::Tensor& tensor, float max_norm) {
+    if (max_norm <= 0.F) {
+        throw std::logic_error(fmt::format("max_norm should be positive, current max norm {}", max_norm));
+    }
+
+    auto squared = ttnn::multiply(tensor, tensor);
+    auto shape = core::create_shape({1, 1, 1, 1});
+    auto out = ttml::core::from_vector({0.F}, shape, &ttml::autograd::ctx().get_device());
+    ttnn::moreh_sum(squared, std::nullopt, true, out, squared.memory_config(), std::nullopt);
+    auto grad_norm_tensor = ttnn::sqrt(out);
+
+    // this is workaround before ttnn::repeat is fixed
+    auto grad_norm_tensor_float = ttml::core::to_vector(grad_norm_tensor)[0];
+    if (grad_norm_tensor_float > max_norm) {
+        auto scale = max_norm / grad_norm_tensor_float;
+        tensor = ttnn::multiply(tensor, scale);
+    }
+}
+}  // namespace ttml::autograd
diff --git a/tt-train/sources/ttml/autograd/clip_gradient_norm.hpp b/tt-train/sources/ttml/autograd/clip_gradient_norm.hpp
new file mode 100644
index 00000000000..8a466344222
--- /dev/null
+++ b/tt-train/sources/ttml/autograd/clip_gradient_norm.hpp
@@ -0,0 +1,23 @@
+// SPDX-FileCopyrightText: (c) 2024 Tenstorrent AI ULC
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#include <core/ttnn_all_includes.hpp>
+
+#include "core/tt_tensor_utils.hpp"
+
+namespace ttml::autograd {
+
+void clip_tensor_norm_(tt::tt_metal::Tensor& tensor, float max_norm);
+
+template <typename Model>
+void clip_gradient_norm_(Model& model, float max_norm) {
+    for (auto& [name, param] : model.parameters()) {
+        auto& grad = param->get_grad();
+        if (core::is_tensor_initialized(grad)) {
+            clip_tensor_norm_(grad, max_norm);
+        }
+    }
+};
+
+}  // namespace ttml::autograd
diff --git a/tt-train/sources/ttml/autograd/graph.cpp b/tt-train/sources/ttml/autograd/graph.cpp
new file mode 100644
index 00000000000..9fdd7b1b4ac
--- /dev/null
+++ b/tt-train/sources/ttml/autograd/graph.cpp
@@ -0,0 +1,65 @@
+// SPDX-FileCopyrightText: (c) 2024 Tenstorrent AI ULC
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#include "graph.hpp"
+
+#include <fmt/core.h>
+
+#include "core/debug.hpp"
+#include "core/system_utils.hpp"
+
+namespace ttml::autograd {
+
+const std::vector<std::vector<size_t>>& Graph::get_edges() const {
+    return m_links;
+}
+
+const std::vector<GraphNode>& Graph::get_graph_nodes() const {
+    return m_graph_nodes;
+}
+
+NodeId Graph::add_node(GradFunction&& grad_function, std::span<NodeId> links) {
+    size_t curr_id = m_graph_nodes.size();
+    if (core::debug::Debug::enable_backward_performance_measurement()) {
+        //  we are using this wrapper to measure the time taken by each node.
+        GradFunction wrapper = [grad_function = std::move(grad_function), curr_id, this]() {
+            const std::type_info& typeInfo = grad_function.target_type();
+            auto demangled_name = core::demangle(typeInfo.name());
+            auto time = std::chrono::high_resolution_clock::now();
+            grad_function();
+            auto duration =
+                std::chrono::duration_cast<std::chrono::microseconds>(std::chrono::high_resolution_clock::now() - time);
+            fmt::print(
+                "Node {} took {} ms Demangled name {}\n", curr_id, (double)duration.count() / 1000., demangled_name);
+        };
+        m_graph_nodes.emplace_back(std::move(wrapper));
+    } else {
+        m_graph_nodes.emplace_back(std::move(grad_function));
+    }
+
+    auto& node_links = m_links.emplace_back();
+    node_links.reserve(links.size());
+    for (const auto& link : links) {
+        node_links.push_back(link.get_id());
+    }
+
+    return {curr_id, this};
+}
+
+NodeId::NodeId(size_t node_id, Graph* graph) : m_node_id(node_id), m_graph(graph) {
+}
+
+size_t NodeId::get_id() const {
+    return m_node_id;
+}
+
+Graph& NodeId::get_graph() const {
+    return *m_graph;
+}
+
+void Graph::reset() {
+    m_graph_nodes.clear();
+    m_links.clear();
+}
+}  // namespace ttml::autograd
diff --git a/tt-train/sources/ttml/autograd/graph.hpp b/tt-train/sources/ttml/autograd/graph.hpp
new file mode 100644
index 00000000000..83011514c3b
--- /dev/null
+++ b/tt-train/sources/ttml/autograd/graph.hpp
@@ -0,0 +1,46 @@
+// SPDX-FileCopyrightText: (c) 2024 Tenstorrent AI ULC
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#pragma once
+
+#include <functional>
+#include <span>
+
+#include "core/not_null.hpp"
+
+namespace ttml::autograd {
+class Graph;
+class GraphNode;
+
+using GradFunction = std::function<void()>;
+
+struct GraphNode {
+    GradFunction grad_function;
+};
+
+class NodeId {
+public:
+    NodeId(size_t node_id, Graph* graph);
+    [[nodiscard]] size_t get_id() const;
+    [[nodiscard]] Graph& get_graph() const;
+
+private:
+    size_t m_node_id = 0;
+    core::not_null<Graph*> m_graph;
+};
+
+class Graph {
+private:
+    std::vector<GraphNode> m_graph_nodes;
+    std::vector<std::vector<size_t>> m_links;
+
+public:
+    [[nodiscard]] const std::vector<std::vector<size_t>>& get_edges() const;
+    [[nodiscard]] const std::vector<GraphNode>& get_graph_nodes() const;
+    NodeId add_node(GradFunction&& grad_function, std::span<NodeId> links);
+
+    void reset();
+};
+
+}  // namespace ttml::autograd
diff --git a/tt-train/sources/ttml/autograd/graph_utils.hpp b/tt-train/sources/ttml/autograd/graph_utils.hpp
new file mode 100644
index 00000000000..1ec6d71b0c2
--- /dev/null
+++ b/tt-train/sources/ttml/autograd/graph_utils.hpp
@@ -0,0 +1,28 @@
+// SPDX-FileCopyrightText: (c) 2024 Tenstorrent AI ULC
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#pragma once
+#include "core/template_utils.hpp"
+#include "graph.hpp"
+
+namespace ttml::autograd {
+
+template <typename... Tensors>
+std::vector<NodeId> get_links(Tensors&&... tensors) {
+    static_assert(core::are_same_type<Tensors...>(), "All nodes must have the same type!");
+
+    std::vector<NodeId> links;
+    links.reserve(sizeof...(Tensors));
+    auto process_node = [&links](auto&& tensor) {
+        const auto& node = tensor->get_node();
+        if (node) {
+            links.push_back(node.value());
+        }
+    };
+
+    (process_node(std::forward<Tensors>(tensors)), ...);
+
+    return links;
+}
+}  // namespace ttml::autograd
diff --git a/tt-train/sources/ttml/autograd/module_base.cpp b/tt-train/sources/ttml/autograd/module_base.cpp
new file mode 100644
index 00000000000..4cc13b09826
--- /dev/null
+++ b/tt-train/sources/ttml/autograd/module_base.cpp
@@ -0,0 +1,80 @@
+// SPDX-FileCopyrightText: (c) 2024 Tenstorrent AI ULC
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#include "module_base.hpp"
+
+#include "auto_context.hpp"
+
+namespace ttml::autograd {
+
+void ModuleBase::register_tensor(const TensorPtr& tensor_ptr, const std::string& name) {
+    auto [_, is_inserted] = m_named_tensors.emplace(name, tensor_ptr);
+    if (!is_inserted) {
+        throw std::logic_error("Names of two tensors coincide");
+    }
+}
+
+void ModuleBase::register_module(const ModuleBasePtr& module_ptr, const std::string& name) {
+    auto [_, is_inserted] = m_named_modules.emplace(name, module_ptr);
+    if (!is_inserted) {
+        throw std::logic_error(fmt::format("Names of two modules coincide: {}", name));
+    }
+}
+
+void ModuleBase::create_name(const std::string& name) {
+    m_name = name;
+}
+
+const std::string& ModuleBase::get_name() const {
+    return m_name;
+}
+
+NamedParameters ModuleBase::parameters() const {
+    NamedParameters params;
+
+    std::queue<std::pair<const ModuleBase*, std::string>> modules_to_process;
+    modules_to_process.emplace(this, get_name() + "/");
+
+    std::unordered_set<std::string> modules_in_queue;
+    modules_in_queue.insert(get_name());
+    while (!modules_to_process.empty()) {
+        auto [module_ptr, name_prefix] = modules_to_process.front();
+        modules_to_process.pop();
+
+        for (const auto& [tensor_name, tensor_ptr] : module_ptr->m_named_tensors) {
+            params.emplace(name_prefix + tensor_name, tensor_ptr);
+        }
+
+        for (const auto& [module_name, next_module_ptr] : module_ptr->m_named_modules) {
+            const auto module_name_with_prefix = name_prefix + module_name;
+            if (!modules_in_queue.contains(module_name_with_prefix)) {
+                modules_to_process.emplace(next_module_ptr.get(), name_prefix + module_name + "/");
+                modules_in_queue.insert(module_name_with_prefix);
+            }
+        }
+    }
+
+    return params;
+}
+
+void ModuleBase::set_run_mode(RunMode mode) {
+    m_run_mode = mode;
+    for (auto& [_, module] : this->m_named_modules) {
+        module->set_run_mode(mode);
+    }
+}
+
+[[nodiscard]] RunMode ModuleBase::get_run_mode() const {
+    return m_run_mode;
+}
+
+void ModuleBase::train() {
+    set_run_mode(RunMode::TRAIN);
+}
+
+void ModuleBase::eval() {
+    set_run_mode(RunMode::EVAL);
+}
+
+}  // namespace ttml::autograd
diff --git a/tt-train/sources/ttml/autograd/module_base.hpp b/tt-train/sources/ttml/autograd/module_base.hpp
new file mode 100644
index 00000000000..442d0dc36f1
--- /dev/null
+++ b/tt-train/sources/ttml/autograd/module_base.hpp
@@ -0,0 +1,50 @@
+// SPDX-FileCopyrightText: (c) 2024 Tenstorrent AI ULC
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#pragma once
+
+#include <memory>
+#include <unordered_map>
+
+#include "tensor.hpp"
+
+namespace ttml::autograd {
+
+enum class RunMode { TRAIN, EVAL };
+
+class ModuleBase;
+using ModuleBasePtr = std::shared_ptr<ModuleBase>;
+using NamedParameters = std::unordered_map<std::string, TensorPtr>;
+
+class ModuleBase {
+private:
+    std::string m_name;
+    RunMode m_run_mode = RunMode::TRAIN;
+
+    std::unordered_map<std::string, TensorPtr> m_named_tensors;
+    std::unordered_map<std::string, ModuleBasePtr> m_named_modules;
+
+protected:
+    void create_name(const std::string& name);
+    void register_tensor(const TensorPtr& tensor_ptr, const std::string& name);
+    void register_module(const ModuleBasePtr& module_ptr, const std::string& name);
+
+public:
+    ModuleBase() = default;
+    virtual ~ModuleBase() = default;
+    ModuleBase(const ModuleBase&) = default;
+    ModuleBase(ModuleBase&&) = default;
+    ModuleBase& operator=(const ModuleBase&) = default;
+    ModuleBase& operator=(ModuleBase&&) = default;
+
+    [[nodiscard]] const std::string& get_name() const;
+    [[nodiscard]] NamedParameters parameters() const;
+
+    void train();
+    void eval();
+    void set_run_mode(RunMode mode);
+    [[nodiscard]] RunMode get_run_mode() const;
+};
+
+}  // namespace ttml::autograd
diff --git a/tt-train/sources/ttml/autograd/tensor.cpp b/tt-train/sources/ttml/autograd/tensor.cpp
new file mode 100644
index 00000000000..41affcf6e9d
--- /dev/null
+++ b/tt-train/sources/ttml/autograd/tensor.cpp
@@ -0,0 +1,135 @@
+// SPDX-FileCopyrightText: (c) 2024 Tenstorrent AI ULC
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#include "tensor.hpp"
+
+#include "core/tt_tensor_utils.hpp"
+#include "ttnn_fixed/trivial_ttnn_ops.hpp"
+
+namespace {
+
+// TODO: implement stack based topological sort
+void topological_sort(
+    size_t node_id,
+    const std::vector<std::vector<size_t>>& edges,
+    std::unordered_set<size_t>& visited,
+    std::vector<size_t>& sorted_nodes) {
+    if (visited.contains(node_id)) {
+        return;
+    }
+    visited.insert(node_id);
+    for (const auto& next_node : edges[node_id]) {
+        topological_sort(next_node, edges, visited, sorted_nodes);
+    }
+    sorted_nodes.push_back(node_id);
+}
+
+}  // namespace
+
+namespace ttml::autograd {
+
+Tensor::Tensor(const tt::tt_metal::Tensor& value, bool requires_grad) : m_value(value), m_requires_grad(requires_grad) {
+}
+
+void Tensor::add_grad(const tt::tt_metal::Tensor& grad) {
+    if (!is_grad_initialized()) {
+        auto value_shape = m_value.get_tensor().get_shape();
+        if (grad.get_shape() != value_shape) {
+            throw std::logic_error(
+                fmt::format("Shapes of gradients are not equal. Expected: {}, got: {}", value_shape, grad.get_shape()));
+        }
+
+        m_grad = grad;
+        return;
+    }
+
+    const auto& grad_shape = grad.get_shape();
+    const auto& m_grad_shape = m_grad.get_shape();
+    if (grad_shape != m_grad_shape) {
+        throw std::logic_error(
+            fmt::format("Shapes of gradients are not equal. Expected: {}, got: {}", m_grad_shape, grad_shape));
+    }
+
+    // It is important to not use inline addition here
+    // m_grad might share memory with other tensors
+    m_grad = ttnn::add(m_grad, grad);
+}
+
+void Tensor::backward() {
+    if (!m_node_id.has_value()) {
+        return;
+    }
+    std::vector<size_t> sorted_nodes;
+    std::unordered_set<std::size_t> visited_nodes;
+    const auto& graph = m_node_id->get_graph();
+    topological_sort(m_node_id->get_id(), graph.get_edges(), visited_nodes, sorted_nodes);
+
+    const auto& graph_nodes = graph.get_graph_nodes();
+    std::ranges::reverse(sorted_nodes);
+    try_init_grad(/* init_ones */ true);
+    for (const auto& node_id : sorted_nodes) {
+        graph_nodes[node_id].grad_function();
+    }
+}
+
+bool Tensor::is_grad_initialized() const {
+    return core::is_tensor_initialized(get_grad());
+}
+
+void Tensor::try_init_grad(bool init_ones) {
+    if (is_grad_initialized()) {
+        return;
+    }
+
+    const auto& value = get_value();
+    this->set_grad(init_ones ? ttml::core::ones_like(value) : ttml::core::zeros_like(value));
+}
+void Tensor::set_node(const std::optional<NodeId>& node) {
+    if (m_node_id.has_value()) {
+        throw std::runtime_error("Graph node is already set for this tensor!");
+    }
+    m_node_id = node;
+}
+
+void print_tensor_stats(const autograd::TensorPtr& tensor, const std::string& name) {
+    core::print_tensor_stats(tensor->get_value(), name);
+}
+
+void Tensor::set_value(const tt::tt_metal::Tensor& value) {
+    m_value.set_tensor(value);
+}
+
+void Tensor::set_grad(const tt::tt_metal::Tensor& grad) {
+    m_grad = grad;
+}
+
+void Tensor::clean_node() {
+    m_node_id = std::nullopt;
+}
+
+void Tensor::set_requires_grad(bool requires_grad) {
+    m_requires_grad = requires_grad;
+}
+
+const tt::tt_metal::Tensor& Tensor::get_value(PreferredPrecision preferred_precision) const {
+    return m_value.get_tensor(preferred_precision);
+}
+
+const tt::tt_metal::Tensor& Tensor::get_grad() const {
+    return m_grad;
+}
+
+tt::tt_metal::Tensor& Tensor::get_grad() {
+    return m_grad;
+}
+
+bool Tensor::get_requires_grad() const {
+    return m_requires_grad;
+}
+
+const std::optional<NodeId>& Tensor::get_node() const {
+    return m_node_id;
+}
+
+}  // namespace ttml::autograd
diff --git a/tt-train/sources/ttml/autograd/tensor.hpp b/tt-train/sources/ttml/autograd/tensor.hpp
new file mode 100644
index 00000000000..d036e14762c
--- /dev/null
+++ b/tt-train/sources/ttml/autograd/tensor.hpp
@@ -0,0 +1,63 @@
+// SPDX-FileCopyrightText: (c) 2024 Tenstorrent AI ULC
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#pragma once
+
+#include <core/ttnn_all_includes.hpp>
+#include <memory>
+#include <optional>
+
+#include "autocast_tensor.hpp"
+#include "graph.hpp"
+
+namespace ttml::autograd {
+
+class Tensor : public std::enable_shared_from_this<Tensor> {
+private:
+    AutocastTensor m_value;
+    tt::tt_metal::Tensor m_grad;
+    bool m_requires_grad = true;
+    std::optional<NodeId> m_node_id;
+
+public:
+    Tensor() = default;
+    Tensor(const Tensor &) = default;
+    Tensor(Tensor &&) noexcept = default;
+    Tensor &operator=(const Tensor &) = default;
+    Tensor &operator=(Tensor &&) noexcept = default;
+    explicit Tensor(const tt::tt_metal::Tensor &value, bool requires_grad = true);
+    ~Tensor() = default;
+
+    void set_value(const tt::tt_metal::Tensor &value);
+    void set_grad(const tt::tt_metal::Tensor &grad);
+    void set_node(const std::optional<NodeId> &node);
+    void clean_node();
+    void add_grad(const tt::tt_metal::Tensor &grad);
+    void set_requires_grad(bool requires_grad);
+
+    const tt::tt_metal::Tensor &get_value(PreferredPrecision preferred_precision = PreferredPrecision::HALF) const;
+    const tt::tt_metal::Tensor &get_grad() const;
+    tt::tt_metal::Tensor &get_grad();
+    bool get_requires_grad() const;
+    const std::optional<NodeId> &get_node() const;
+
+    void backward();
+
+    bool is_grad_initialized() const;
+
+private:
+    void try_init_grad(bool init_ones = false);
+};
+
+using TensorPtr = std::shared_ptr<Tensor>;
+
+// TODO: In future implement create tensor without variadic templates to help with code hints in IDE
+template <typename... Args>
+TensorPtr create_tensor(Args &&...args) {
+    return std::make_shared<Tensor>(std::forward<Args>(args)...);
+}
+
+void print_tensor_stats(const autograd::TensorPtr &tensor, const std::string &name);
+
+}  // namespace ttml::autograd
diff --git a/tt-train/sources/ttml/core/compute_kernel_config.cpp b/tt-train/sources/ttml/core/compute_kernel_config.cpp
new file mode 100644
index 00000000000..f8b7d03c596
--- /dev/null
+++ b/tt-train/sources/ttml/core/compute_kernel_config.cpp
@@ -0,0 +1,45 @@
+// SPDX-FileCopyrightText: (c) 2024 Tenstorrent AI ULC
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#include "compute_kernel_config.hpp"
+
+namespace ttml::core {
+
+ttnn::WormholeComputeKernelConfig ComputeKernelConfig::precise() {
+    ttnn::WormholeComputeKernelConfig config;
+    config.fp32_dest_acc_en = true;
+    config.math_approx_mode = false;
+    config.math_fidelity = MathFidelity::HiFi2;
+    config.packer_l1_acc = true;
+    return config;
+}
+
+ttnn::WormholeComputeKernelConfig ComputeKernelConfig::softmax() {
+    ttnn::WormholeComputeKernelConfig config;
+    config.fp32_dest_acc_en = false;
+    config.math_approx_mode = false;
+    config.math_fidelity = MathFidelity::HiFi2;
+    config.packer_l1_acc = true;
+    return config;
+}
+
+ttnn::WormholeComputeKernelConfig ComputeKernelConfig::matmul() {
+    ttnn::WormholeComputeKernelConfig config;
+    config.fp32_dest_acc_en = false;
+    config.math_approx_mode = false;
+    config.math_fidelity = MathFidelity::HiFi2;
+    config.packer_l1_acc = true;
+    return config;
+}
+
+ttnn::WormholeComputeKernelConfig ComputeKernelConfig::fast() {
+    ttnn::WormholeComputeKernelConfig config;
+    config.fp32_dest_acc_en = false;
+    config.math_approx_mode = true;
+    config.math_fidelity = MathFidelity::LoFi;
+    config.packer_l1_acc = false;
+    return config;
+}
+
+}  // namespace ttml::core
diff --git a/tt-train/sources/ttml/core/compute_kernel_config.hpp b/tt-train/sources/ttml/core/compute_kernel_config.hpp
new file mode 100644
index 00000000000..0dd24ef7262
--- /dev/null
+++ b/tt-train/sources/ttml/core/compute_kernel_config.hpp
@@ -0,0 +1,19 @@
+// SPDX-FileCopyrightText: (c) 2024 Tenstorrent AI ULC
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#pragma once
+
+#include "ttnn_all_includes.hpp"
+
+namespace ttml::core {
+
+class ComputeKernelConfig {
+public:
+    static ttnn::WormholeComputeKernelConfig precise();
+    static ttnn::WormholeComputeKernelConfig softmax();
+    static ttnn::WormholeComputeKernelConfig matmul();
+    static ttnn::WormholeComputeKernelConfig fast();
+};
+
+}  // namespace ttml::core
diff --git a/tt-train/sources/ttml/core/debug.hpp b/tt-train/sources/ttml/core/debug.hpp
new file mode 100644
index 00000000000..65d04b3ef42
--- /dev/null
+++ b/tt-train/sources/ttml/core/debug.hpp
@@ -0,0 +1,19 @@
+// SPDX-FileCopyrightText: (c) 2024 Tenstorrent AI ULC
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#pragma once
+
+namespace ttml::core::debug {
+
+struct Debug {
+    static constexpr bool enable_backward_performance_measurement() {
+        return false;
+    }
+
+    static constexpr bool enable_print_tensor_stats() {
+        return false;
+    };
+};
+
+}  // namespace ttml::core::debug
diff --git a/tt-train/sources/ttml/core/device.cpp b/tt-train/sources/ttml/core/device.cpp
new file mode 100644
index 00000000000..e9e13e122f5
--- /dev/null
+++ b/tt-train/sources/ttml/core/device.cpp
@@ -0,0 +1,28 @@
+// SPDX-FileCopyrightText: (c) 2024 Tenstorrent AI ULC
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#include "device.hpp"
+
+#include "ttnn_all_includes.hpp"
+
+namespace {
+void device_deleter(tt::tt_metal::Device* device) {
+    assert(device != nullptr);
+    tt::tt_metal::CloseDevice(device);
+};
+}  // namespace
+
+namespace ttml::core {
+
+Device::Device(int device_index) :
+    m_device(std::unique_ptr<tt::tt_metal::Device, void (*)(tt::tt_metal::Device*)>(
+        tt::tt_metal::CreateDevice(device_index), &device_deleter)) {
+    tt::log_info("Device #{} successfully created", device_index);
+}
+
+[[nodiscard]] tt::tt_metal::Device& Device::get_device() {
+    assert(m_device);
+    return *m_device;
+}
+}  // namespace ttml::core
diff --git a/tt-train/sources/ttml/core/device.hpp b/tt-train/sources/ttml/core/device.hpp
new file mode 100644
index 00000000000..8bea2815cc7
--- /dev/null
+++ b/tt-train/sources/ttml/core/device.hpp
@@ -0,0 +1,28 @@
+// SPDX-FileCopyrightText: (c) 2024 Tenstorrent AI ULC
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#pragma once
+
+#include <memory>
+
+#include "ttnn_all_includes.hpp"
+
+namespace ttml::core {
+// should I implement pimpl or its fine
+class Device {
+public:
+    explicit Device(int device_index);
+    Device(Device&& device) = default;
+    Device(const Device&) = delete;
+
+    Device& operator=(const Device&) = delete;
+    Device& operator=(Device&&) = default;
+    ~Device() = default;
+
+    [[nodiscard]] tt::tt_metal::Device& get_device();
+
+private:
+    std::unique_ptr<tt::tt_metal::Device, void (*)(tt::tt_metal::Device*)> m_device;
+};
+}  // namespace ttml::core
diff --git a/tt-train/sources/ttml/core/not_null.hpp b/tt-train/sources/ttml/core/not_null.hpp
new file mode 100644
index 00000000000..3babc1b8bcc
--- /dev/null
+++ b/tt-train/sources/ttml/core/not_null.hpp
@@ -0,0 +1,60 @@
+// SPDX-FileCopyrightText: (c) 2024 Tenstorrent AI ULC
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#pragma once
+#include <stdexcept>
+#include <utility>
+
+namespace ttml::core {
+
+/*
+Simplified gsl::not_null to comply with clang-tidy checks.
+*/
+template <typename T>
+class not_null {
+private:
+    T m_ptr;
+
+public:
+    // Constructor
+    explicit not_null(T ptr) : m_ptr(std::move(ptr)) {
+        if (m_ptr == nullptr) {
+            throw std::invalid_argument("Pointer must not be null");
+        }
+    }
+
+    not_null() = delete;
+
+    template <typename U>
+    not_null(U) = delete;
+
+    explicit operator T() const noexcept {
+        return m_ptr;
+    }
+
+    // Dereference operators
+    auto operator*() const noexcept -> decltype(*m_ptr) {
+        return *m_ptr;
+    }
+
+    auto operator->() const noexcept -> T {
+        return m_ptr;
+    }
+
+    // Get the underlying pointer
+    T get() const noexcept {
+        return m_ptr;
+    }
+
+    // Assignment operator
+    not_null& operator=(T ptr) {
+        if (ptr == nullptr) {
+            throw std::invalid_argument("Pointer must not be null");
+        }
+        m_ptr = std::move(ptr);
+        return *this;
+    }
+};
+
+}  // namespace ttml::core
diff --git a/tt-train/sources/ttml/core/system_utils.cpp b/tt-train/sources/ttml/core/system_utils.cpp
new file mode 100644
index 00000000000..8aab7eb3253
--- /dev/null
+++ b/tt-train/sources/ttml/core/system_utils.cpp
@@ -0,0 +1,21 @@
+// SPDX-FileCopyrightText: (c) 2024 Tenstorrent AI ULC
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#include "system_utils.hpp"
+
+#include <cxxabi.h>
+
+namespace ttml::core {
+std::string demangle(const char* name) {
+    int status = -4;
+
+    std::unique_ptr<char, decltype(&free)> res(abi::__cxa_demangle(name, nullptr, nullptr, &status), &free);
+
+    const char* const demangled_name = (status == 0) ? res.get() : name;
+
+    std::string ret_val(demangled_name);
+
+    return ret_val;
+}
+}  // namespace ttml::core
diff --git a/tt-train/sources/ttml/core/system_utils.hpp b/tt-train/sources/ttml/core/system_utils.hpp
new file mode 100644
index 00000000000..c67c11c371b
--- /dev/null
+++ b/tt-train/sources/ttml/core/system_utils.hpp
@@ -0,0 +1,11 @@
+// SPDX-FileCopyrightText: (c) 2024 Tenstorrent AI ULC
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#pragma once
+
+#include <string>
+
+namespace ttml::core {
+std::string demangle(const char* name);
+}
diff --git a/tt-train/sources/ttml/core/template_utils.hpp b/tt-train/sources/ttml/core/template_utils.hpp
new file mode 100644
index 00000000000..e10307f8ae1
--- /dev/null
+++ b/tt-train/sources/ttml/core/template_utils.hpp
@@ -0,0 +1,12 @@
+// SPDX-FileCopyrightText: (c) 2024 Tenstorrent AI ULC
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#pragma once
+#include <type_traits>
+namespace ttml::core {
+template <typename T, typename... Rest>
+constexpr bool are_same_type() {
+    return (std::is_same_v<std::decay_t<T>, std::decay_t<Rest>> && ...);
+}
+}  // namespace ttml::core
diff --git a/tt-train/sources/ttml/core/tt_tensor_utils.cpp b/tt-train/sources/ttml/core/tt_tensor_utils.cpp
new file mode 100644
index 00000000000..05dca336ca7
--- /dev/null
+++ b/tt-train/sources/ttml/core/tt_tensor_utils.cpp
@@ -0,0 +1,331 @@
+// SPDX-FileCopyrightText: (c) 2024 Tenstorrent AI ULC
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#include "tt_tensor_utils.hpp"
+
+#include <fmt/base.h>
+#include <fmt/color.h>
+
+#include <algorithm>
+#include <cstddef>
+#include <cstdint>
+#include <functional>
+#include <optional>
+#include <stdexcept>
+
+#include "ttnn_all_includes.hpp"
+
+namespace {
+
+template <typename T>
+T get_median(std::vector<T>& vec) {
+    assert(!vec.empty());
+    std::nth_element(vec.begin(), vec.begin() + vec.size() / 2, vec.end());
+    if (vec.size() & 1U) {
+        return vec[vec.size() / 2];
+    }
+    auto neighbor = *std::max_element(vec.begin(), vec.begin() + vec.size() / 2);
+    return std::midpoint(neighbor, vec[vec.size() / 2]);
+};
+
+template <typename T>
+void print_tensor_stats_(const tt::tt_metal::Tensor& tensor, const std::string& name) {
+    auto tensor_shape = tensor.get_shape();
+    auto tensor_vec = ttml::core::to_vector<T>(tensor);
+
+    auto median = get_median(tensor_vec);
+    auto mean = std::accumulate(tensor_vec.begin(), tensor_vec.end(), 0.F) / static_cast<float>(tensor_vec.size());
+    auto mean_sq =
+        std::accumulate(
+            tensor_vec.begin(), tensor_vec.end(), 0.F, [](float acc, float val) { return acc + val * val; }) /
+        static_cast<float>(tensor_vec.size());
+    auto variance = mean_sq - mean * mean;
+
+    fmt::print(
+        "{}: shape: {} min: {} max: {} median: {} mean: {} variance: {}\n",
+        name,
+        tensor_shape,
+        *std::min_element(tensor_vec.begin(), tensor_vec.end()),
+        *std::max_element(tensor_vec.begin(), tensor_vec.end()),
+        median,
+        mean,
+        variance);
+}
+
+// copypaste from deprecated tensor pybinds ttnn
+tt::tt_metal::OwnedBuffer create_owned_buffer_from_vector_of_floats(
+    const std::vector<float>& data, DataType data_type) {
+    switch (data_type) {
+        case DataType::BFLOAT8_B: {
+            auto uint32_vector = pack_fp32_vec_as_bfp8_tiles(data, /*row_major_input=*/false, /*is_exp_a=*/false);
+            return tt::tt_metal::owned_buffer::create<uint32_t>(std::move(uint32_vector));
+        }
+        case DataType::BFLOAT4_B: {
+            auto uint32_vector = pack_fp32_vec_as_bfp4_tiles(data, /*row_major_input=*/false, /*is_exp_a=*/false);
+            return tt::tt_metal::owned_buffer::create<uint32_t>(std::move(uint32_vector));
+        }
+        case DataType::FLOAT32: {
+            auto data_copy = data;
+            return tt::tt_metal::owned_buffer::create<float>(std::move(data_copy));
+        }
+        case DataType::BFLOAT16: {
+            std::vector<bfloat16> bfloat16_data(data.size());
+            std::transform(std::begin(data), std::end(data), std::begin(bfloat16_data), [](float value) {
+                return bfloat16(value);
+            });
+            return tt::tt_metal::owned_buffer::create<bfloat16>(std::move(bfloat16_data));
+        }
+        default: {
+            throw std::runtime_error("Cannot create a host buffer!");
+        }
+    }
+}
+
+template <typename T>
+tt::tt_metal::Tensor ttml_create_owned_tensor(
+    std::vector<T>&& data, const ttnn::Shape& shape, tt::tt_metal::DataType data_type, tt::tt_metal::Layout layout) {
+    auto buffer = tt::tt_metal::owned_buffer::create(std::move(data));
+    auto storage = OwnedStorage{std::move(buffer)};
+    return {std::move(storage), shape, data_type, layout};
+}
+
+// TODO: optimize precomputing multipliers
+template <class T = float, class InternalT = bfloat16>
+std::vector<T> untile_tensor_to_vec(const tt::tt_metal::Tensor& cpu_tensor) {
+    auto tiled_buffer = tt::tt_metal::host_buffer::get_as<InternalT>(cpu_tensor);
+    auto untiled_shape = cpu_tensor.get_logical_shape();
+    auto tiled_shape = cpu_tensor.get_padded_shape();
+
+    // Calculate total size of the untiled tensor
+    size_t total_size = untiled_shape.volume();
+
+    std::vector<T> untiled_data(total_size);
+
+    auto compute_flat_index = [](const std::vector<uint32_t>& indices, ttnn::SimpleShape& shape) -> uint32_t {
+        uint32_t flat_index = 0;
+        uint32_t multiplier = 1;
+        for (int i = (int)indices.size() - 1; i >= 0; --i) {
+            flat_index += indices[i] * multiplier;
+            multiplier *= shape[i];
+        }
+        return flat_index;
+    };
+
+    std::vector<uint32_t> indices(tiled_shape.rank(), 0);
+
+    for (size_t idx = 0; idx < total_size; ++idx) {
+        uint32_t untiled_index = compute_flat_index(indices, untiled_shape);
+        uint32_t tiled_index = compute_flat_index(indices, tiled_shape);
+        if constexpr (std::is_same_v<InternalT, bfloat16>) {
+            untiled_data[untiled_index] = tiled_buffer[tiled_index].to_float();
+        } else {
+            untiled_data[untiled_index] = tiled_buffer[tiled_index];
+        }
+
+        for (int dim = (int)tiled_shape.rank() - 1; dim >= 0; --dim) {
+            if (++indices[dim] < untiled_shape[dim]) {
+                break;
+            }
+            indices[dim] = 0;
+        }
+    }
+
+    return untiled_data;
+}
+
+}  // namespace
+namespace ttml::core {
+
+tt::tt_metal::Tensor zeros_like(const tt::tt_metal::Tensor& tensor) {
+    return ttnn::moreh_full_like(tensor, 0.F, tensor.get_dtype(), tensor.get_layout(), tensor.memory_config());
+}
+
+tt::tt_metal::Tensor ones_like(const tt::tt_metal::Tensor& tensor) {
+    return ttnn::moreh_full_like(tensor, 1.F, tensor.get_dtype(), tensor.get_layout(), tensor.memory_config());
+}
+
+tt::tt_metal::Tensor empty(const ttnn::Shape& shape, tt::tt_metal::Device* device, const MemoryConfig& memory_config) {
+    return ttnn::empty(shape, DataType::BFLOAT16, Layout::TILE, device, memory_config);
+}
+
+tt::tt_metal::Tensor full(const ttnn::Shape& shape, float value, tt::tt_metal::Device* device, DataType dtype) {
+    auto padded = shape.with_tile_padding();
+    // if the shape is not divisible by TILE_SIZE, we need to add padding
+    if (padded[2] % ttnn::types::TILE_SIZE != 0 || padded[3] % ttnn::types::TILE_SIZE != 0) {
+        int additional_padding_h =
+            (ttnn::types::TILE_SIZE - (int)padded[2] % ttnn::types::TILE_SIZE) % ttnn::types::TILE_SIZE;
+        int additional_padding_w =
+            (ttnn::types::TILE_SIZE - (int)padded[3] % ttnn::types::TILE_SIZE) % ttnn::types::TILE_SIZE;
+        auto padded_shape = ttnn::Shape(
+            {shape[0], shape[1], shape[2], shape[3]},
+            {
+                padded[0],
+                padded[1],
+                (padded[2] + additional_padding_h),
+                (padded[3] + additional_padding_w),
+            });
+        return ttnn::full(padded_shape, value, dtype, Layout::TILE, std::ref(*device));
+    }
+    // if not padding available, we can just create a tensor with the given shape
+    return ttnn::full(shape, value, dtype, Layout::TILE, std::ref(*device));
+}
+
+tt::tt_metal::Tensor zeros(const ttnn::Shape& shape, tt::tt_metal::Device* device, DataType dtype) {
+    return core::full(shape, 0.F, device, dtype);
+}
+
+tt::tt_metal::Tensor ones(const ttnn::Shape& shape, tt::tt_metal::Device* device, DataType dtype) {
+    return core::full(shape, 1.F, device, dtype);
+}
+
+template <>
+tt::tt_metal::Tensor from_vector<float, DataType::BFLOAT16>(
+    const std::vector<float>& buffer, const ttnn::Shape& shape, tt::tt_metal::Device* device, Layout layout) {
+    assert(device != nullptr);
+    const DataType data_type = DataType::BFLOAT16;
+    MemoryConfig output_mem_config{};
+    auto logical_shape = shape.logical_shape();
+    size_t volume = logical_shape.volume();
+    if (buffer.size() != volume) {
+        throw std::logic_error(
+            fmt::format("Current buffer size is {} different from shape volume {}", buffer.size(), volume));
+    }
+    auto owned_buffer = create_owned_buffer_from_vector_of_floats(buffer, data_type);
+    // remove possible paddings from the shape (it conflicts with ROW MAJOR)
+    auto output = tt::tt_metal::Tensor(OwnedStorage{owned_buffer}, logical_shape, data_type, Layout::ROW_MAJOR);
+
+    auto to_device_odd_slow = [&]() {
+        if (layout == Layout::TILE) {
+            output = ttnn::to_layout(output, layout, std::nullopt, output_mem_config, device);
+        }
+
+        output = ttnn::to_device(output, device, output_mem_config);
+        return output;
+    };
+
+    auto to_device_even_fast = [&]() {
+        output = ttnn::to_device(output, device, output_mem_config);
+        if (layout == Layout::TILE) {
+            output = ttnn::tilize_with_zero_padding(output, output_mem_config, std::nullopt, /* multicore */ true);
+        }
+
+        return output;
+    };
+
+    if (shape[-1] % 2 == 1) {
+        output = to_device_odd_slow();
+    } else {
+        output = to_device_even_fast();
+    }
+
+    return output;
+}
+
+// Workaround implementation due to issue with tilize for float32
+// it is expected that tilize will be fixed in the after next tt-metal main update
+template <>
+tt::tt_metal::Tensor from_vector<float, DataType::FLOAT32>(
+    const std::vector<float>& buffer, const ttnn::Shape& shape, tt::tt_metal::Device* device, Layout layout) {
+    auto tensor = from_vector<float, DataType::BFLOAT16>(buffer, shape, device, layout);
+    return ttnn::typecast(tensor, DataType::FLOAT32);
+}
+
+template <>
+std::vector<float> to_vector<float>(const tt::tt_metal::Tensor& tensor) {
+    auto cpu_tensor = tensor.cpu();
+    cpu_tensor = cpu_tensor.to(Layout::ROW_MAJOR);
+    if (cpu_tensor.get_dtype() == DataType::BFLOAT16) {
+        return untile_tensor_to_vec<float, bfloat16>(cpu_tensor);
+    }
+    assert(cpu_tensor.get_dtype() == DataType::FLOAT32);
+    return untile_tensor_to_vec<float, float>(cpu_tensor);
+}
+
+/*
+From vector uint32 doesn't support tilize_with_zero_padding on device
+*/
+template <>
+tt::tt_metal::Tensor from_vector<uint32_t, DataType::UINT32>(
+    const std::vector<uint32_t>& buffer, const ttnn::Shape& shape, tt::tt_metal::Device* device, Layout layout) {
+    MemoryConfig output_mem_config{};
+    auto logical_shape = shape.logical_shape();
+    auto volume = logical_shape.volume();
+    if (buffer.size() != volume) {
+        throw std::logic_error(
+            fmt::format("Current buffer size is {} different from shape volume {}", buffer.size(), volume));
+    }
+
+    // remove possible paddings from the shape (it conflicts with ROW MAJOR)
+    std::vector<uint32_t> buffer_copy = buffer;
+    auto output = ttml_create_owned_tensor(std::move(buffer_copy), logical_shape, DataType::UINT32, Layout::ROW_MAJOR);
+    if (device != nullptr) {
+        if (layout != Layout::ROW_MAJOR) {
+            output = ttnn::to_layout(output, layout, std::nullopt, output_mem_config, device);
+        }
+        output = ttnn::to_device(output, device, output_mem_config);
+    }
+
+    return output;
+}
+
+/*
+From vector int32 doesn't support tilize_with_zero_padding on device
+*/
+template <>
+tt::tt_metal::Tensor from_vector<int32_t, DataType::INT32>(
+    const std::vector<int32_t>& buffer, const ttnn::Shape& shape, tt::tt_metal::Device* device, Layout layout) {
+    MemoryConfig output_mem_config{};
+    auto logical_shape = shape.logical_shape();
+    auto volume = logical_shape.volume();
+    if (buffer.size() != volume) {
+        throw std::logic_error(
+            fmt::format("Current buffer size is {} different from shape volume {}", buffer.size(), volume));
+    }
+
+    // remove possible paddings from the shape (it conflicts with ROW MAJOR)
+    std::vector<int32_t> buffer_copy = buffer;
+    auto output = ttml_create_owned_tensor(std::move(buffer_copy), logical_shape, DataType::INT32, Layout::ROW_MAJOR);
+    if (device != nullptr) {
+        if (layout != Layout::ROW_MAJOR) {
+            output = ttnn::to_layout(output, layout, std::nullopt, output_mem_config, device);
+        }
+        output = ttnn::to_device(output, device, output_mem_config);
+    }
+
+    return output;
+}
+
+template <>
+std::vector<uint32_t> to_vector<uint32_t>(const tt::tt_metal::Tensor& tensor) {
+    auto cpu_tensor = tensor.cpu();
+    cpu_tensor = cpu_tensor.to(Layout::ROW_MAJOR);
+
+    return untile_tensor_to_vec<uint32_t, uint32_t>(cpu_tensor);
+}
+
+template <>
+std::vector<int32_t> to_vector<int32_t>(const tt::tt_metal::Tensor& tensor) {
+    auto cpu_tensor = tensor.cpu();
+    cpu_tensor = cpu_tensor.to(Layout::ROW_MAJOR);
+
+    return untile_tensor_to_vec<int32_t, int32_t>(cpu_tensor);
+}
+
+bool is_tensor_initialized(const tt::tt_metal::Tensor& tensor) {
+    return tensor.tensor_attributes != nullptr;
+}
+
+ttnn::Shape create_shape(const std::array<uint32_t, 4>& args) {
+    return ttnn::Shape{args};
+}
+
+void print_tensor_stats(const tt::tt_metal::Tensor& tensor, const std::string& name) {
+    if (tensor.get_dtype() == DataType::BFLOAT16 || tensor.get_dtype() == DataType::FLOAT32) {
+        print_tensor_stats_<float>(tensor, name);
+    } else {
+        print_tensor_stats_<uint32_t>(tensor, name);
+    }
+}
+
+}  // namespace ttml::core
diff --git a/tt-train/sources/ttml/core/tt_tensor_utils.hpp b/tt-train/sources/ttml/core/tt_tensor_utils.hpp
new file mode 100644
index 00000000000..ba1be90da3b
--- /dev/null
+++ b/tt-train/sources/ttml/core/tt_tensor_utils.hpp
@@ -0,0 +1,40 @@
+// SPDX-FileCopyrightText: (c) 2024 Tenstorrent AI ULC
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#pragma once
+
+#include <core/ttnn_all_includes.hpp>
+#include <initializer_list>
+#include <vector>
+
+#include "core/ttnn_fwd.hpp"
+
+namespace ttml::core {
+
+void print_tensor_stats(const tt::tt_metal::Tensor& tensor, const std::string& name);
+
+tt::tt_metal::Tensor zeros_like(const tt::tt_metal::Tensor& tensor);
+tt::tt_metal::Tensor ones_like(const tt::tt_metal::Tensor& tensor);
+
+tt::tt_metal::Tensor empty(const ttnn::Shape& shape, tt::tt_metal::Device* device, const MemoryConfig& memory_config);
+tt::tt_metal::Tensor full(
+    const ttnn::Shape& shape, float value, tt::tt_metal::Device* device, DataType dtype = DataType::BFLOAT16);
+tt::tt_metal::Tensor zeros(const ttnn::Shape& shape, tt::tt_metal::Device* device, DataType dtype = DataType::BFLOAT16);
+tt::tt_metal::Tensor ones(const ttnn::Shape& shape, tt::tt_metal::Device* device, DataType dtype = DataType::BFLOAT16);
+
+template <class VectorType = float, DataType TensorType = DataType::BFLOAT16>
+[[nodiscard]] tt::tt_metal::Tensor from_vector(
+    const std::vector<VectorType>& buffer,
+    const ttnn::Shape& shape,
+    tt::tt_metal::Device* device,
+    Layout layout = Layout::TILE);
+
+template <class T = float>
+[[nodiscard]] std::vector<T> to_vector(const tt::tt_metal::Tensor& tensor);
+
+[[nodiscard]] bool is_tensor_initialized(const tt::tt_metal::Tensor& tensor);
+
+[[nodiscard]] ttnn::Shape create_shape(const std::array<uint32_t, 4>& args);
+
+}  // namespace ttml::core
diff --git a/tt-train/sources/ttml/core/ttnn_all_includes.hpp b/tt-train/sources/ttml/core/ttnn_all_includes.hpp
new file mode 100644
index 00000000000..62b295bcfa0
--- /dev/null
+++ b/tt-train/sources/ttml/core/ttnn_all_includes.hpp
@@ -0,0 +1,60 @@
+// SPDX-FileCopyrightText: (c) 2024 Tenstorrent AI ULC
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#pragma once
+
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wundefined-inline"
+#pragma GCC diagnostic ignored "-Wdeprecated-volatile"
+#pragma GCC diagnostic ignored "-Wdeprecated-this-capture"
+
+#include <common/bfloat16.hpp>                                                                     // NOLINT
+#include <tests/tt_metal/test_utils/env_vars.hpp>                                                  // NOLINT
+#include <tt_metal/common/base_types.hpp>                                                          // NOLINT
+#include <tt_metal/common/math.hpp>                                                                // NOLINT
+#include <tt_metal/host_api.hpp>                                                                   // NOLINT
+#include <tt_metal/hostdevcommon/common_values.hpp>                                                // NOLINT
+#include <tt_metal/impl/device/device.hpp>                                                         // NOLINT
+#include <ttnn/core.hpp>                                                                           // NOLINT
+#include <ttnn/cpp/ttnn/operations/copy.hpp>                                                       // NOLINT
+#include <ttnn/cpp/ttnn/operations/core/core.hpp>                                                  // NOLINT
+#include <ttnn/device.hpp>                                                                         // NOLINT
+#include <ttnn/operations/core/to_dtype/to_dtype_op.hpp>                                           // NOLINT
+#include <ttnn/operations/creation.hpp>                                                            // NOLINT
+#include <ttnn/operations/data_movement/concat/concat.hpp>                                         // NOLINT
+#include <ttnn/operations/data_movement/pad/pad.hpp>                                               // NOLINT
+#include <ttnn/operations/data_movement/permute/permute.hpp>                                       // NOLINT
+#include <ttnn/operations/data_movement/repeat/repeat.hpp>                                         // NOLINT
+#include <ttnn/operations/data_movement/slice/slice.hpp>                                           // NOLINT
+#include <ttnn/operations/data_movement/tilize_with_val_padding/tilize_with_val_padding.hpp>       // NOLINT
+#include <ttnn/operations/data_movement/transpose/transpose.hpp>                                   // NOLINT
+#include <ttnn/operations/data_movement/untilize/untilize.hpp>                                     // NOLINT
+#include <ttnn/operations/eltwise/binary/binary.hpp>                                               // NOLINT
+#include <ttnn/operations/eltwise/binary_backward/binary_backward.hpp>                             // NOLINT
+#include <ttnn/operations/eltwise/unary/unary.hpp>                                                 // NOLINT
+#include <ttnn/operations/eltwise/unary/unary_composite.hpp>                                       // NOLINT
+#include <ttnn/operations/eltwise/unary_backward/unary_backward.hpp>                               // NOLINT
+#include <ttnn/operations/embedding/embedding.hpp>                                                 // NOLINT
+#include <ttnn/operations/embedding_backward/embedding_backward.hpp>                               // NOLINT
+#include <ttnn/operations/experimental/transformer/nlp_concat_heads/nlp_concat_heads.hpp>          // NOLINT
+#include <ttnn/operations/experimental/transformer/nlp_create_qkv_heads/nlp_create_qkv_heads.hpp>  // NOLINT
+#include <ttnn/operations/full_like/full_like.hpp>                                                 // NOLINT
+#include <ttnn/operations/matmul/matmul.hpp>                                                       // NOLINT
+#include <ttnn/operations/moreh/moreh_adamw/moreh_adamw.hpp>                                       // NOLINT
+#include <ttnn/operations/moreh/moreh_layer_norm/moreh_layer_norm.hpp>                             // NOLINT
+#include <ttnn/operations/moreh/moreh_layer_norm_backward/moreh_layer_norm_backward.hpp>           // NOLINT
+#include <ttnn/operations/moreh/moreh_linear_backward/moreh_linear_backward.hpp>                   // NOLINT
+#include <ttnn/operations/moreh/moreh_matmul/moreh_matmul.hpp>                                     // NOLINT
+#include <ttnn/operations/moreh/moreh_mean/moreh_mean.hpp>                                         // NOLINT
+#include <ttnn/operations/moreh/moreh_mean_backward/moreh_mean_backward.hpp>                       // NOLINT
+#include <ttnn/operations/moreh/moreh_nll_loss/moreh_nll_loss.hpp>                                 // NOLINT
+#include <ttnn/operations/moreh/moreh_nll_loss_backward/moreh_nll_loss_backward.hpp>               // NOLINT
+#include <ttnn/operations/moreh/moreh_sum/moreh_sum.hpp>                                           // NOLINT
+#include <ttnn/operations/normalization/softmax/softmax.hpp>                                       // NOLINT
+#include <ttnn/operations/reduction/generic/generic_reductions.hpp>                                // NOLINT
+#include <ttnn/tensor/host_buffer/functions.hpp>                                                   // NOLINT
+#include <ttnn/tensor/tensor.hpp>                                                                  // NOLINT
+#include <ttnn/tensor/types.hpp>                                                                   // NOLINT
+#include <ttnn/types.hpp>                                                                          // NOLINT
+#pragma GCC diagnostic pop
diff --git a/tt-train/sources/ttml/core/ttnn_fwd.hpp b/tt-train/sources/ttml/core/ttnn_fwd.hpp
new file mode 100644
index 00000000000..3c6bddf0de5
--- /dev/null
+++ b/tt-train/sources/ttml/core/ttnn_fwd.hpp
@@ -0,0 +1,21 @@
+// SPDX-FileCopyrightText: (c) 2024 Tenstorrent AI ULC
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#pragma once
+
+namespace tt::tt_metal {
+struct Tensor;
+class CommandQueue;
+struct MemoryConfig;
+class DeviceMesh;
+class LegacyShape;
+inline namespace v0 {
+class Device;
+}  // namespace v0
+}  // namespace tt::tt_metal
+
+namespace ttnn {
+using Tensor = tt::tt_metal::Tensor;  // not sure if it works but we can use original tensor namespace
+
+}  // namespace ttnn
diff --git a/tt-train/sources/ttml/data/tokenizers/data/tokenizers/gpt2-tokenizer.json b/tt-train/sources/ttml/data/tokenizers/data/tokenizers/gpt2-tokenizer.json
new file mode 100644
index 00000000000..126e419a201
--- /dev/null
+++ b/tt-train/sources/ttml/data/tokenizers/data/tokenizers/gpt2-tokenizer.json
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b2dd4a144b04bdc21cbf27834f05628de4e6bc511a59b3c1bd9679c7cef7c665
+size 2113739
diff --git a/tt-train/sources/ttml/datasets/dataloader.hpp b/tt-train/sources/ttml/datasets/dataloader.hpp
new file mode 100644
index 00000000000..6384e1e8b6b
--- /dev/null
+++ b/tt-train/sources/ttml/datasets/dataloader.hpp
@@ -0,0 +1,101 @@
+// SPDX-FileCopyrightText: (c) 2024 Tenstorrent AI ULC
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#pragma once
+#include <algorithm>
+#include <random>
+#include <vector>
+
+#include "autograd/auto_context.hpp"
+#include "core/not_null.hpp"
+namespace ttml::datasets {
+
+template <typename SampleType>
+std::vector<SampleType> default_collate_fn(std::vector<SampleType>&& samples) {
+    return std::forward<std::vector<SampleType>>(samples);
+}
+
+template <
+    typename DatasetType,
+    typename CollateFn =
+        std::function<std::vector<typename DatasetType::Sample>(std::vector<typename DatasetType::Sample>&&)>,
+    typename BatchType = std::vector<typename DatasetType::Sample>>
+class DataLoader {
+public:
+    using Sample = typename DatasetType::Sample;
+
+    DataLoader(
+        DatasetType& dataset,
+        size_t batch_size,
+        bool shuffle = false,
+        CollateFn collate_fn = default_collate_fn<Sample>) :
+        m_dataset(&dataset),
+        m_batch_size(batch_size),
+        m_shuffle(shuffle),
+        m_indices(dataset.get_size()),
+        m_collate_fn(collate_fn) {
+        std::iota(m_indices.begin(), m_indices.end(), 0);
+    }
+
+    void shuffle_indices() {
+        if (!m_shuffle) {
+            return;
+        }
+        std::mt19937& gen = autograd::AutoContext::get_instance().get_generator();
+        std::shuffle(m_indices.begin(), m_indices.end(), gen);
+    }
+
+    class Iterator {
+    public:
+        Iterator(DataLoader& data_loader, size_t start_index) :
+            m_data_loader(&data_loader), m_current_index(start_index) {
+        }
+
+        Iterator& operator++() {
+            m_current_index += m_data_loader->m_batch_size;
+            m_current_index = std::min(m_current_index, m_data_loader->m_indices.size());
+            return *this;
+        }
+
+        BatchType operator*() const {
+            return m_data_loader->fetch_batch(m_current_index);
+        }
+
+        bool operator!=(const Iterator& other) const {
+            return m_current_index != other.m_current_index;
+        }
+
+    private:
+        core::not_null<DataLoader*> m_data_loader;
+        size_t m_current_index = 0;
+    };
+
+    Iterator begin() {
+        shuffle_indices();
+        return Iterator(*this, 0);
+    }
+
+    Iterator end() {
+        return Iterator(*this, m_indices.size());
+    }
+
+private:
+    core::not_null<DatasetType*> m_dataset;
+    size_t m_batch_size = 0;
+    bool m_shuffle = false;
+    std::vector<size_t> m_indices;
+    CollateFn m_collate_fn;
+
+    BatchType fetch_batch(size_t start_index) const {
+        size_t end_index = std::min(start_index + m_batch_size, m_indices.size());
+        std::vector<Sample> batch;
+        batch.reserve(end_index - start_index);
+        for (size_t i = start_index; i < end_index; ++i) {
+            batch.push_back(m_dataset->get_item(m_indices[i]));
+        }
+
+        return m_collate_fn(std::move(batch));
+    }
+};
+}  // namespace ttml::datasets
diff --git a/tt-train/sources/ttml/datasets/dataset_base.hpp b/tt-train/sources/ttml/datasets/dataset_base.hpp
new file mode 100644
index 00000000000..ffd0572fde2
--- /dev/null
+++ b/tt-train/sources/ttml/datasets/dataset_base.hpp
@@ -0,0 +1,45 @@
+// SPDX-FileCopyrightText: (c) 2024 Tenstorrent AI ULC
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#pragma once
+
+#include <cassert>
+#include <span>
+#include <vector>
+
+namespace ttml::datasets {
+template <class Derived, class DataType, class TargetType>
+class DatasetBase {
+public:
+    using DataTypeT = DataType;
+    using TargetTypeT = TargetType;
+    using Sample = std::pair<DataType, TargetType>;
+    // using Samples = std::pair<std::vector<DataType>, std::vector<TargetType>>; TODO: consider return Samples instead
+    // of vector<Sample>
+    DatasetBase() = default;
+    DatasetBase(const DatasetBase&) = default;
+    DatasetBase(DatasetBase&&) = default;
+    DatasetBase& operator=(const DatasetBase&) = default;
+    DatasetBase& operator=(DatasetBase&&) = default;
+    ~DatasetBase() = default;
+
+    [[nodiscard]] size_t get_size() const {
+        return static_cast<const Derived*>(this)->get_size_impl();
+    }
+
+    [[nodiscard]] Sample get_item(size_t index) const {
+        return static_cast<const Derived*>(this)->get_item_impl(index);
+    }
+
+    [[nodiscard]] std::vector<Sample> get_batch(std::span<size_t> indices) const {
+        std::vector<Sample> batch;
+        auto size = get_size();
+        for (size_t index : indices) {
+            assert(index < size);
+            batch.push_back(get_item(index));
+        }
+        return batch;
+    }
+};
+}  // namespace ttml::datasets
diff --git a/tt-train/sources/ttml/datasets/dataset_subset.hpp b/tt-train/sources/ttml/datasets/dataset_subset.hpp
new file mode 100644
index 00000000000..8f44d0cdfb1
--- /dev/null
+++ b/tt-train/sources/ttml/datasets/dataset_subset.hpp
@@ -0,0 +1,38 @@
+// SPDX-FileCopyrightText: (c) 2024 Tenstorrent AI ULC
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#pragma once
+
+#include "core/not_null.hpp"
+#include "dataset_base.hpp"
+
+namespace ttml::datasets {
+
+template <typename DatasetType>
+class DatasetSubset : public DatasetBase<
+                          DatasetSubset<DatasetType>,
+                          typename DatasetType::DataTypeT,
+                          typename DatasetType::TargetTypeT> {
+public:
+    DatasetSubset(const DatasetType& dataset, const std::vector<size_t>& indices) :
+        m_dataset(&dataset), m_indices(indices) {
+    }
+
+    [[nodiscard]] size_t get_size_impl() const {
+        return m_indices.size();
+    }
+
+    [[nodiscard]] DatasetType::Sample get_item_impl(size_t index) const {
+        if (index >= m_indices.size()) {
+            throw std::out_of_range("Index out of range.");
+        }
+        return m_dataset->get_item(m_indices[index]);
+    }
+
+private:
+    core::not_null<const DatasetType*> m_dataset;
+    std::vector<size_t> m_indices;
+};
+
+}  // namespace ttml::datasets
diff --git a/tt-train/sources/ttml/datasets/generators.cpp b/tt-train/sources/ttml/datasets/generators.cpp
new file mode 100644
index 00000000000..85054620ce6
--- /dev/null
+++ b/tt-train/sources/ttml/datasets/generators.cpp
@@ -0,0 +1,55 @@
+// SPDX-FileCopyrightText: (c) 2024 Tenstorrent AI ULC
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#include "generators.hpp"
+
+#include <numeric>
+
+#include "autograd/auto_context.hpp"
+namespace ttml::datasets {
+InMemoryFloatVecDataset make_regression(MakeRegressionParams params) {
+    std::normal_distribution<float> dist(0.0, 1.0);
+
+    std::vector<std::vector<float>> data(params.n_samples, std::vector<float>(params.n_features));
+    std::vector<std::vector<float>> targets(
+        params.n_samples, std::vector<float>(params.n_targets));  // Targets are vectors of size n_targets
+
+    // Generate random coefficients for each target
+    std::vector<std::vector<float>> coefficients(params.n_targets, std::vector<float>(params.n_features));
+    // Generate biases for each target
+    std::vector<float> biases(params.n_targets, 0.0F);
+
+    auto generate_sample = [&](auto& sample_data) {
+        std::ranges::generate(
+            sample_data, [&]() { return dist(autograd::AutoContext::get_instance().get_generator()); });
+    };
+
+    auto compute_target = [&](const auto& sample_data, const auto& coeff) {
+        return std::transform_reduce(
+            sample_data.begin(), sample_data.end(), coeff.begin(), 0.0F, std::plus<>(), std::multiplies<>());
+    };
+
+    auto add_bias_and_noise = [&](float target, float bias) {
+        if (params.bias) {
+            target += bias;  // Add bias
+        }
+        target += params.noise * dist(autograd::AutoContext::get_instance().get_generator());  // Add noise
+        return target;
+    };
+
+    generate_sample(biases);
+    std::ranges::for_each(coefficients, [&](auto& target_coeffs) { generate_sample(target_coeffs); });
+
+    for (size_t i = 0; i < params.n_samples; ++i) {
+        generate_sample(data[i]);
+
+        for (size_t j = 0; j < params.n_targets; ++j) {
+            float target = compute_target(data[i], coefficients[j]);
+            targets[i][j] = add_bias_and_noise(target, biases[j]);
+        }
+    }
+
+    return {data, targets};
+}
+}  // namespace ttml::datasets
diff --git a/tt-train/sources/ttml/datasets/generators.hpp b/tt-train/sources/ttml/datasets/generators.hpp
new file mode 100644
index 00000000000..b63c7ae1a1c
--- /dev/null
+++ b/tt-train/sources/ttml/datasets/generators.hpp
@@ -0,0 +1,23 @@
+// SPDX-FileCopyrightText: (c) 2024 Tenstorrent AI ULC
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#pragma once
+
+#include <vector>
+
+#include "in_memory_dataset.hpp"
+
+namespace ttml::datasets {
+
+using InMemoryFloatVecDataset = InMemoryDataset<std::vector<float>, std::vector<float>>;
+
+struct MakeRegressionParams {
+    size_t n_samples = 1;
+    size_t n_features = 1;
+    size_t n_targets = 1;
+    float noise = 0.0F;
+    bool bias = true;
+};
+InMemoryFloatVecDataset make_regression(MakeRegressionParams params);
+}  // namespace ttml::datasets
diff --git a/tt-train/sources/ttml/datasets/in_memory_dataset.hpp b/tt-train/sources/ttml/datasets/in_memory_dataset.hpp
new file mode 100644
index 00000000000..cbac8094762
--- /dev/null
+++ b/tt-train/sources/ttml/datasets/in_memory_dataset.hpp
@@ -0,0 +1,38 @@
+// SPDX-FileCopyrightText: (c) 2024 Tenstorrent AI ULC
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#pragma once
+
+#include "dataset_base.hpp"
+
+namespace ttml::datasets {
+template <class DataType, class TargetType>
+class InMemoryDataset : public DatasetBase<InMemoryDataset<DataType, TargetType>, DataType, TargetType> {
+public:
+    using Parent = DatasetBase<InMemoryDataset<DataType, TargetType>, DataType, TargetType>;
+    using Sample = typename Parent::Sample;
+    friend Parent;
+
+    InMemoryDataset(const std::vector<DataType>& data, const std::vector<TargetType>& targets) :
+        m_data(data), m_targets(targets) {
+    }
+
+    InMemoryDataset(const InMemoryDataset&) = default;
+    InMemoryDataset(InMemoryDataset&&) = default;
+    InMemoryDataset& operator=(const InMemoryDataset&) = default;
+    InMemoryDataset& operator=(InMemoryDataset&&) = default;
+    ~InMemoryDataset() = default;
+
+private:
+    [[nodiscard]] size_t get_size_impl() const {
+        return m_data.size();
+    }
+
+    [[nodiscard]] Sample get_item_impl(size_t index) const {
+        return {m_data[index], m_targets[index]};
+    }
+    std::vector<DataType> m_data;
+    std::vector<TargetType> m_targets;
+};
+}  // namespace ttml::datasets
diff --git a/tt-train/sources/ttml/datasets/in_memory_token_dataset.cpp b/tt-train/sources/ttml/datasets/in_memory_token_dataset.cpp
new file mode 100644
index 00000000000..a9f63679a03
--- /dev/null
+++ b/tt-train/sources/ttml/datasets/in_memory_token_dataset.cpp
@@ -0,0 +1,35 @@
+// SPDX-FileCopyrightText: (c) 2024 Tenstorrent AI ULC
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#include "in_memory_token_dataset.hpp"
+
+#include <cstddef>
+
+namespace ttml::datasets {
+
+InMemoryTokenDataset::InMemoryTokenDataset(const std::vector<uint32_t>& tokens, uint32_t seq_length) :
+    m_tokens(tokens), m_seq_length(seq_length) {
+}
+
+[[nodiscard]] size_t InMemoryTokenDataset::get_size_impl() const {
+    if (m_tokens.size() <= m_seq_length) {
+        return 0UL;
+    }
+    return m_tokens.size() - m_seq_length;
+}
+
+[[nodiscard]] InMemoryTokenDataset::Sample InMemoryTokenDataset::get_item_impl(size_t index) const {
+    size_t dataset_size = get_size_impl();
+    if (index >= dataset_size) {
+        throw std::out_of_range("Index out of range");
+    }
+
+    const auto* data_ptr = std::next(m_tokens.data(), static_cast<ptrdiff_t>(index));
+    std::span<const uint32_t> input_span(data_ptr, m_seq_length);
+    std::span<const uint32_t> target_span(std::next(data_ptr), m_seq_length);
+
+    return {input_span, target_span};
+}
+
+}  // namespace ttml::datasets
diff --git a/tt-train/sources/ttml/datasets/in_memory_token_dataset.hpp b/tt-train/sources/ttml/datasets/in_memory_token_dataset.hpp
new file mode 100644
index 00000000000..4251b8575fe
--- /dev/null
+++ b/tt-train/sources/ttml/datasets/in_memory_token_dataset.hpp
@@ -0,0 +1,35 @@
+// SPDX-FileCopyrightText: (c) 2024 Tenstorrent AI ULC
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#pragma once
+
+#include <span>
+
+#include "dataset_base.hpp"
+
+namespace ttml::datasets {
+class InMemoryTokenDataset
+    : public DatasetBase<InMemoryTokenDataset, std::span<const uint32_t>, std::span<const uint32_t>> {
+public:
+    using Parent = DatasetBase<InMemoryTokenDataset, std::span<const uint32_t>, std::span<const uint32_t>>;
+    using Sample = typename Parent::Sample;
+    friend Parent;
+
+    InMemoryTokenDataset(const std::vector<uint32_t>& tokens, uint32_t seq_length);
+
+    InMemoryTokenDataset(const InMemoryTokenDataset&) = default;
+    InMemoryTokenDataset(InMemoryTokenDataset&&) = default;
+    InMemoryTokenDataset& operator=(const InMemoryTokenDataset&) = default;
+    InMemoryTokenDataset& operator=(InMemoryTokenDataset&&) = default;
+    ~InMemoryTokenDataset() = default;
+
+private:
+    [[nodiscard]] size_t get_size_impl() const;
+
+    [[nodiscard]] Sample get_item_impl(size_t index) const;
+
+    std::vector<uint32_t> m_tokens;
+    uint32_t m_seq_length = 0;
+};
+}  // namespace ttml::datasets
diff --git a/tt-train/sources/ttml/datasets/utils.cpp b/tt-train/sources/ttml/datasets/utils.cpp
new file mode 100644
index 00000000000..ee42f0a55ec
--- /dev/null
+++ b/tt-train/sources/ttml/datasets/utils.cpp
@@ -0,0 +1,37 @@
+// SPDX-FileCopyrightText: (c) 2024 Tenstorrent AI ULC
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#include "utils.hpp"
+
+#include "datasets/in_memory_token_dataset.hpp"
+#include "tokenizers/bpe_tokenizer.hpp"
+#include "tokenizers/char_tokenizer_trainer.hpp"
+
+namespace {
+constexpr auto gpt2_tokenizer_file_name = "/gpt2-tokenizer.json";
+}
+namespace ttml::datasets {
+
+template <>
+std::tuple<InMemoryTokenDataset, tokenizers::CharTokenizer> create_in_memory_token_dataset<tokenizers::CharTokenizer>(
+    const std::string &text, uint32_t seq_length) {
+    tokenizers::CharTokenizer tokenizer = tokenizers::CharTokenizerTrainer::train(text);
+
+    std::vector<uint32_t> tokenized_text = tokenizer.encode(text);
+
+    return {InMemoryTokenDataset(tokenized_text, seq_length), std::move(tokenizer)};
+}
+
+template <>
+std::tuple<InMemoryTokenDataset, tokenizers::BPETokenizer> create_in_memory_token_dataset<tokenizers::BPETokenizer>(
+    const std::string &text, uint32_t seq_length) {
+    auto json_file_path = std::string(TOKENIZERS_DATA_PATH) + gpt2_tokenizer_file_name;
+    auto tokenizer = tokenizers::BPETokenizer(json_file_path);
+
+    const std::vector<uint32_t> tokenized_text = tokenizer.encode(text);
+
+    return {InMemoryTokenDataset(tokenized_text, seq_length), std::move(tokenizer)};
+}
+
+}  // namespace ttml::datasets
diff --git a/tt-train/sources/ttml/datasets/utils.hpp b/tt-train/sources/ttml/datasets/utils.hpp
new file mode 100644
index 00000000000..8e4bbdc6688
--- /dev/null
+++ b/tt-train/sources/ttml/datasets/utils.hpp
@@ -0,0 +1,49 @@
+// SPDX-FileCopyrightText: (c) 2024 Tenstorrent AI ULC
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#pragma once
+#include <numeric>
+#include <random>
+#include <span>
+
+#include "autograd/auto_context.hpp"
+#include "dataset_subset.hpp"
+#include "in_memory_token_dataset.hpp"
+
+namespace ttml::datasets {
+
+template <typename Tokenizer>
+std::tuple<InMemoryTokenDataset, Tokenizer> create_in_memory_token_dataset(
+    const std::string& text, uint32_t seq_length);
+
+template <typename DatasetType>
+std::vector<DatasetSubset<DatasetType>> random_split(
+    const DatasetType& dataset, std::span<size_t> split_sizes, bool shuffle = true) {
+    size_t total_size = std::accumulate(split_sizes.begin(), split_sizes.end(), 0ULL);
+    if (total_size != dataset.get_size()) {
+        throw std::invalid_argument("Total of split sizes must equal the size of the dataset.");
+    }
+
+    // Create indices and shuffle them
+    std::vector<size_t> indices(dataset.get_size());
+    std::iota(indices.begin(), indices.end(), 0);
+
+    if (shuffle) {
+        std::mt19937& gen = autograd::AutoContext::get_instance().get_generator();
+        std::shuffle(indices.begin(), indices.end(), gen);
+    }
+
+    // Create the subsets
+    std::vector<DatasetSubset<DatasetType>> subsets;
+    auto current_iter = indices.begin();
+    for (size_t size : split_sizes) {
+        std::vector<size_t> subset_indices(current_iter, current_iter + (long)size);
+        subsets.emplace_back(dataset, std::move(subset_indices));
+        current_iter += (long)size;
+    }
+
+    return subsets;
+}
+
+}  // namespace ttml::datasets
diff --git a/tt-train/sources/ttml/init/cpu_initializers.cpp b/tt-train/sources/ttml/init/cpu_initializers.cpp
new file mode 100644
index 00000000000..b493095d951
--- /dev/null
+++ b/tt-train/sources/ttml/init/cpu_initializers.cpp
@@ -0,0 +1,78 @@
+// SPDX-FileCopyrightText: (c) 2024 Tenstorrent AI ULC
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#include "cpu_initializers.hpp"
+
+#include <random>
+
+#include "autograd/auto_context.hpp"
+#include "fmt/core.h"
+
+namespace ttml::init {
+
+void uniform_init(std::vector<float>& vec, UniformRange range) {
+    auto& [a, b] = range;
+
+    std::uniform_real_distribution<float> dist(a, b);
+
+    std::generate(
+        vec.begin(), vec.end(), [&]() { return dist(autograd::AutoContext::get_instance().get_generator()); });
+}
+
+void normal_init(std::vector<float>& vec, NormalParams params) {
+    auto& [mean, stddev] = params;
+
+    std::normal_distribution<float> dist(mean, stddev);
+
+    std::generate(
+        vec.begin(), vec.end(), [&]() { return dist(autograd::AutoContext::get_instance().get_generator()); });
+}
+
+void constant_init(std::vector<float>& vec, float value) {
+    // Fill the vector with the specified constant value
+    std::fill(vec.begin(), vec.end(), value);
+}
+
+void xavier_uniform_init(std::vector<float>& vec, FanParams params) {
+    auto& [fan_in, fan_out] = params;
+    float limit = std::sqrt(6.0F / (float)(fan_in + fan_out));
+
+    std::uniform_real_distribution<float> dist(-limit, limit);
+
+    // Fill the vector with uniformly distributed random values in the range [-limit, limit]
+    std::generate(
+        vec.begin(), vec.end(), [&]() { return dist(autograd::AutoContext::get_instance().get_generator()); });
+}
+
+void xavier_normal_init(std::vector<float>& vec, FanParams params) {
+    auto& [fan_in, fan_out] = params;
+    float stddev = std::sqrtf(2.0F / (float)(fan_in + fan_out));
+
+    // Random number generator with a seed
+    // Mersenne Twister generator
+    std::normal_distribution<float> dist(0.0F, stddev);
+    std::generate(
+        vec.begin(), vec.end(), [&]() { return dist(autograd::AutoContext::get_instance().get_generator()); });
+}
+
+void kaiming_uniform_init(std::vector<float>& vec, int fan_in) {
+    float limit = std::sqrt(3.0F / (float)fan_in);
+
+    std::uniform_real_distribution<float> dist(-limit, limit);
+
+    // Fill the vector with uniformly distributed random values in the range [-limit, limit]
+    std::generate(
+        vec.begin(), vec.end(), [&]() { return dist(autograd::AutoContext::get_instance().get_generator()); });
+}
+
+void kaiming_normal_init(std::vector<float>& vec, int fan_out) {
+    float stddev = std::sqrt(2.0F / (float)fan_out);
+
+    std::normal_distribution<float> dist(0.0F, stddev);
+
+    std::generate(
+        vec.begin(), vec.end(), [&]() { return dist(autograd::AutoContext::get_instance().get_generator()); });
+}
+
+}  // namespace ttml::init
diff --git a/tt-train/sources/ttml/init/cpu_initializers.hpp b/tt-train/sources/ttml/init/cpu_initializers.hpp
new file mode 100644
index 00000000000..4743ba8db79
--- /dev/null
+++ b/tt-train/sources/ttml/init/cpu_initializers.hpp
@@ -0,0 +1,39 @@
+// SPDX-FileCopyrightText: (c) 2024 Tenstorrent AI ULC
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#pragma once
+#include <vector>
+
+namespace ttml::init {
+
+struct UniformRange {
+    float a = 0;
+    float b = 0;
+};
+
+struct NormalParams {
+    float mean = 0.F;
+    float stddev = 1.0F;
+};
+
+struct FanParams {
+    uint32_t fan_in = 1;
+    uint32_t fan_out = 1;
+};
+
+void uniform_init(std::vector<float>& vec, UniformRange range);
+
+void normal_init(std::vector<float>& vec, NormalParams params);
+
+void constant_init(std::vector<float>& vec, float value);
+
+void xavier_uniform_init(std::vector<float>& vec, FanParams params);
+
+void xavier_normal_init(std::vector<float>& vec, FanParams params);
+
+void kaiming_uniform_init(std::vector<float>& vec, int fan_in);
+
+void kaiming_normal_init(std::vector<float>& vec, int fan_out);
+
+}  // namespace ttml::init
diff --git a/tt-train/sources/ttml/init/tensor_initializers.cpp b/tt-train/sources/ttml/init/tensor_initializers.cpp
new file mode 100644
index 00000000000..74cbc911767
--- /dev/null
+++ b/tt-train/sources/ttml/init/tensor_initializers.cpp
@@ -0,0 +1,77 @@
+// SPDX-FileCopyrightText: (c) 2024 Tenstorrent AI ULC
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#include "tensor_initializers.hpp"
+
+#include <ttnn/operations/data_movement/copy/copy.hpp>
+#include <ttnn/tensor/tensor_utils.hpp>
+
+#include "autograd/auto_context.hpp"
+#include "core/tt_tensor_utils.hpp"
+#include "cpu_initializers.hpp"
+namespace ttml::init {
+void uniform_init(ttml::autograd::TensorPtr& t, const ttnn::Shape& shape, UniformRange range) {
+    auto* device = &autograd::ctx().get_device();
+    assert(device);
+    size_t volume = shape.logical_shape().volume();
+    std::vector<float> vec(volume);
+    uniform_init(vec, range);
+
+    t->set_value(ttml::core::from_vector(vec, shape, device));
+}
+
+void normal_init(ttml::autograd::TensorPtr& t, const ttnn::Shape& shape, NormalParams params) {
+    auto* device = &autograd::ctx().get_device();
+    assert(device);
+    size_t volume = shape.logical_shape().volume();
+    std::vector<float> vec(volume);
+    normal_init(vec, params);
+    t->set_value(ttml::core::from_vector(vec, shape, device));
+}
+
+void constant_init(ttml::autograd::TensorPtr& t, const ttnn::Shape& shape, float value) {
+    auto* device = &autograd::ctx().get_device();
+    t->set_value(core::full(shape, value, device));
+}
+
+void xavier_uniform_init(ttml::autograd::TensorPtr& t, const ttnn::Shape& shape, FanParams params) {
+    auto* device = &autograd::ctx().get_device();
+    assert(device);
+    size_t volume = shape.logical_shape().volume();
+    std::vector<float> vec(volume);
+    xavier_uniform_init(vec, params);
+
+    t->set_value(ttml::core::from_vector(vec, shape, device));
+}
+
+void xavier_normal_init(ttml::autograd::TensorPtr& t, const ttnn::Shape& shape, FanParams params) {
+    auto* device = &autograd::ctx().get_device();
+    assert(device);
+    size_t volume = shape.logical_shape().volume();
+    std::vector<float> vec(volume);
+    xavier_normal_init(vec, params);
+
+    t->set_value(ttml::core::from_vector(vec, shape, device));
+}
+
+void kaiming_uniform_init(ttml::autograd::TensorPtr& t, const ttnn::Shape& shape, int fan_in) {
+    auto* device = &autograd::ctx().get_device();
+    assert(device);
+    size_t volume = shape.logical_shape().volume();
+    std::vector<float> vec(volume);
+    kaiming_uniform_init(vec, fan_in);
+
+    t->set_value(ttml::core::from_vector(vec, shape, device));
+}
+
+void kaiming_normal_init(ttml::autograd::TensorPtr& t, const ttnn::Shape& shape, int fan_out) {
+    auto* device = &autograd::ctx().get_device();
+    assert(device);
+    size_t volume = shape.logical_shape().volume();
+    std::vector<float> vec(volume);
+    kaiming_normal_init(vec, fan_out);
+
+    t->set_value(ttml::core::from_vector(vec, shape, device));
+}
+}  // namespace ttml::init
diff --git a/tt-train/sources/ttml/init/tensor_initializers.hpp b/tt-train/sources/ttml/init/tensor_initializers.hpp
new file mode 100644
index 00000000000..c83ef5b8686
--- /dev/null
+++ b/tt-train/sources/ttml/init/tensor_initializers.hpp
@@ -0,0 +1,24 @@
+// SPDX-FileCopyrightText: (c) 2024 Tenstorrent AI ULC
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#pragma once
+#include "autograd/tensor.hpp"
+#include "core/ttnn_fwd.hpp"
+#include "init/cpu_initializers.hpp"
+namespace ttml::init {
+void uniform_init(ttml::autograd::TensorPtr& t, const ttnn::Shape& shape, UniformRange range);
+
+void normal_init(ttml::autograd::TensorPtr& t, const ttnn::Shape& shape, NormalParams params);
+
+void constant_init(ttml::autograd::TensorPtr& t, const ttnn::Shape& shape, float value);
+
+void xavier_uniform_init(ttml::autograd::TensorPtr& t, const ttnn::Shape& shape, FanParams params);
+
+void xavier_normal_init(ttml::autograd::TensorPtr& t, const ttnn::Shape& shape, FanParams params);
+
+void kaiming_uniform_init(ttml::autograd::TensorPtr& t, const ttnn::Shape& shape, int fan_in);
+
+void kaiming_normal_init(ttml::autograd::TensorPtr& t, const ttnn::Shape& shape, int fan_out);
+
+}  // namespace ttml::init
diff --git a/tt-train/sources/ttml/modules/dropout_module.cpp b/tt-train/sources/ttml/modules/dropout_module.cpp
new file mode 100644
index 00000000000..1f503282e53
--- /dev/null
+++ b/tt-train/sources/ttml/modules/dropout_module.cpp
@@ -0,0 +1,23 @@
+// SPDX-FileCopyrightText: (c) 2024 Tenstorrent AI ULC
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#include "dropout_module.hpp"
+
+#include "autograd/module_base.hpp"
+#include "ops/dropout_op.hpp"
+namespace ttml::modules {
+
+DropoutLayer::DropoutLayer(float probability) : m_prob(probability) {
+    create_name("dropout");
+}
+
+[[nodiscard]] autograd::TensorPtr DropoutLayer::operator()(const autograd::TensorPtr& tensor) {
+    if (this->get_run_mode() == autograd::RunMode::EVAL) {
+        return tensor;
+    }
+
+    return ttml::ops::dropout(tensor, m_prob);
+}
+
+}  // namespace ttml::modules
diff --git a/tt-train/sources/ttml/modules/dropout_module.hpp b/tt-train/sources/ttml/modules/dropout_module.hpp
new file mode 100644
index 00000000000..5c8d162f040
--- /dev/null
+++ b/tt-train/sources/ttml/modules/dropout_module.hpp
@@ -0,0 +1,22 @@
+// SPDX-FileCopyrightText: (c) 2024 Tenstorrent AI ULC
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#pragma once
+
+#include "autograd/module_base.hpp"
+#include "autograd/tensor.hpp"
+
+namespace ttml::modules {
+
+class DropoutLayer : public autograd::ModuleBase {
+    std::string m_name;
+    float m_prob = 0.2F;
+
+public:
+    explicit DropoutLayer(float probability);
+
+    [[nodiscard]] autograd::TensorPtr operator()(const autograd::TensorPtr& tensor);
+};
+
+}  // namespace ttml::modules
diff --git a/tt-train/sources/ttml/modules/embedding_module.cpp b/tt-train/sources/ttml/modules/embedding_module.cpp
new file mode 100644
index 00000000000..ace24ab6cea
--- /dev/null
+++ b/tt-train/sources/ttml/modules/embedding_module.cpp
@@ -0,0 +1,48 @@
+// SPDX-FileCopyrightText: (c) 2024 Tenstorrent AI ULC
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#include "embedding_module.hpp"
+
+#include <core/ttnn_all_includes.hpp>
+#include <stdexcept>
+
+#include "autograd/auto_context.hpp"
+#include "core/tt_tensor_utils.hpp"
+#include "init/tensor_initializers.hpp"
+#include "ops/embedding_op.hpp"
+
+namespace ttml::modules {
+
+void Embedding::initialize_tensors(uint32_t num_embeddings, uint32_t embedding_dim) {
+    auto* device = &autograd::ctx().get_device();
+    m_weight = autograd::create_tensor();
+    init::normal_init(
+        m_weight, core::create_shape({1, 1, num_embeddings, embedding_dim}), /* normal params */ {0.F, 1.F});
+}
+
+Embedding::Embedding(uint32_t num_embeddings, uint32_t embedding_dim) {
+    if (num_embeddings % TILE_HEIGHT != 0) {
+        throw std::logic_error(
+            fmt::format("num_embeddings must be a multiple of TILE_HEIGHT, current num_embeddings {}", num_embeddings));
+    }
+    if (embedding_dim % TILE_WIDTH != 0) {
+        throw std::logic_error(
+            fmt::format("embedding_dim must be a multiple of TILE_WIDTH, current embedding_dim {}", embedding_dim));
+    }
+    initialize_tensors(num_embeddings, embedding_dim);
+
+    create_name("embedding");
+    register_tensor(m_weight, "weight");
+}
+
+autograd::TensorPtr Embedding::operator()(const autograd::TensorPtr& tensor) {
+    auto sentence_size = tensor->get_value().get_shape()[-1];
+    if (sentence_size % TILE_HEIGHT != 0 || sentence_size % TILE_WIDTH != 0) {
+        throw std::logic_error(fmt::format(
+            "sentence_size must be a multiple of TILE_HEIGHT and TILE_WIDTH, current sentence_size {}", sentence_size));
+    }
+    return ops::embedding_op(tensor, m_weight);
+}
+
+}  // namespace ttml::modules
diff --git a/tt-train/sources/ttml/modules/embedding_module.hpp b/tt-train/sources/ttml/modules/embedding_module.hpp
new file mode 100644
index 00000000000..04f826cbe57
--- /dev/null
+++ b/tt-train/sources/ttml/modules/embedding_module.hpp
@@ -0,0 +1,21 @@
+// SPDX-FileCopyrightText: (c) 2024 Tenstorrent AI ULC
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#include "autograd/module_base.hpp"
+#include "autograd/tensor.hpp"
+
+namespace ttml::modules {
+
+class Embedding : public autograd::ModuleBase {
+    autograd::TensorPtr m_weight;
+
+    void initialize_tensors(uint32_t num_embeddings, uint32_t embedding_dim);
+
+public:
+    Embedding(uint32_t num_embeddings, uint32_t embedding_dim);
+
+    [[nodiscard]] autograd::TensorPtr operator()(const autograd::TensorPtr& tensor);
+};
+
+}  // namespace ttml::modules
diff --git a/tt-train/sources/ttml/modules/gpt_block.cpp b/tt-train/sources/ttml/modules/gpt_block.cpp
new file mode 100644
index 00000000000..b47ef7457b9
--- /dev/null
+++ b/tt-train/sources/ttml/modules/gpt_block.cpp
@@ -0,0 +1,59 @@
+// SPDX-FileCopyrightText: (c) 2024 Tenstorrent AI ULC
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#include "gpt_block.hpp"
+
+#include "core/tt_tensor_utils.hpp"
+#include "ops/binary_ops.hpp"
+#include "ops/unary_ops.hpp"
+
+namespace ttml::modules {
+
+GPTMLP::GPTMLP(uint32_t embedding_size, float dropout_prob) {
+    fc1 = std::make_shared<LinearLayer>(embedding_size, embedding_size * 4);
+    fc2 = std::make_shared<LinearLayer>(embedding_size * 4, embedding_size);
+    dropout = std::make_shared<DropoutLayer>(dropout_prob);
+
+    create_name("gpt_mlp");
+    register_module(fc1, "fc1");
+    register_module(fc2, "fc2");
+    register_module(dropout, "dropout");
+}
+
+autograd::TensorPtr GPTMLP::operator()(const autograd::TensorPtr& input) {
+    auto x = (*fc1)(input);
+    x = ops::gelu(x);
+    x = (*fc2)(x);
+    x = (*dropout)(x);
+    return x;
+}
+
+GPTBlock::GPTBlock(uint32_t embedding_size, uint32_t num_heads, float dropout_prob) {
+    mlp = std::make_shared<GPTMLP>(embedding_size, dropout_prob);
+    ln1 = std::make_shared<LayerNormLayer>(embedding_size);
+    ln2 = std::make_shared<LayerNormLayer>(embedding_size);
+    attention = std::make_shared<MultiHeadAttention>(embedding_size, num_heads, dropout_prob);
+
+    create_name("gpt_block");
+    register_module(mlp, "mlp");
+    register_module(ln1, "ln1");
+    register_module(ln2, "ln2");
+    register_module(attention, "attention");
+}
+
+autograd::TensorPtr GPTBlock::operator()(const autograd::TensorPtr& input, const autograd::TensorPtr& mask) {
+    auto residual = input;
+    auto x = (*ln1)(input);
+    x = (*attention)(x, mask);
+    x = ops::add(x, residual);
+
+    residual = x;
+    x = (*ln2)(x);
+    x = (*mlp)(x);
+    x = ops::add(x, residual);
+
+    return x;
+}
+
+}  // namespace ttml::modules
diff --git a/tt-train/sources/ttml/modules/gpt_block.hpp b/tt-train/sources/ttml/modules/gpt_block.hpp
new file mode 100644
index 00000000000..1e45aa2af10
--- /dev/null
+++ b/tt-train/sources/ttml/modules/gpt_block.hpp
@@ -0,0 +1,39 @@
+// SPDX-FileCopyrightText: (c) 2024 Tenstorrent AI ULC
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#pragma once
+
+#include "autograd/tensor.hpp"
+#include "modules/dropout_module.hpp"
+#include "modules/layer_norm_module.hpp"
+#include "modules/linear_module.hpp"
+#include "modules/multi_head_attention.hpp"
+#include "modules/single_head_attention.hpp"
+
+namespace ttml::modules {
+
+class GPTMLP : public autograd::ModuleBase {
+    std::shared_ptr<LinearLayer> fc1;
+    std::shared_ptr<LinearLayer> fc2;
+    std::shared_ptr<DropoutLayer> dropout;
+
+public:
+    GPTMLP(uint32_t embedding_size, float dropout_prob);
+
+    autograd::TensorPtr operator()(const autograd::TensorPtr& input);
+};
+
+class GPTBlock : public autograd::ModuleBase {
+    std::shared_ptr<GPTMLP> mlp;
+    std::shared_ptr<LayerNormLayer> ln1;
+    std::shared_ptr<LayerNormLayer> ln2;
+    std::shared_ptr<MultiHeadAttention> attention;
+
+public:
+    explicit GPTBlock(uint32_t embedding_size, uint32_t num_heads, float dropout_prob);
+
+    autograd::TensorPtr operator()(const autograd::TensorPtr& input, const autograd::TensorPtr& mask);
+};
+
+}  // namespace ttml::modules
diff --git a/tt-train/sources/ttml/modules/layer_norm_module.cpp b/tt-train/sources/ttml/modules/layer_norm_module.cpp
new file mode 100644
index 00000000000..c492d5ce8e9
--- /dev/null
+++ b/tt-train/sources/ttml/modules/layer_norm_module.cpp
@@ -0,0 +1,30 @@
+// SPDX-FileCopyrightText: (c) 2024 Tenstorrent AI ULC
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#include "layer_norm_module.hpp"
+
+#include "core/tt_tensor_utils.hpp"
+
+namespace ttml::modules {
+
+void LayerNormLayer::initialize_tensors(uint32_t features) {
+    m_gamma =
+        autograd::create_tensor(core::ones(core::create_shape({1, 1, 1, features}), &autograd::ctx().get_device()));
+    m_beta =
+        autograd::create_tensor(core::zeros(core::create_shape({1, 1, 1, features}), &autograd::ctx().get_device()));
+}
+
+LayerNormLayer::LayerNormLayer(uint32_t features) {
+    initialize_tensors(features);
+
+    create_name("layernorm");
+    register_tensor(m_gamma, "gamma");
+    register_tensor(m_beta, "beta");
+}
+
+autograd::TensorPtr LayerNormLayer::operator()(const autograd::TensorPtr& tensor) {
+    return ops::layernorm(tensor, m_gamma, m_beta);
+}
+
+}  // namespace ttml::modules
diff --git a/tt-train/sources/ttml/modules/layer_norm_module.hpp b/tt-train/sources/ttml/modules/layer_norm_module.hpp
new file mode 100644
index 00000000000..a8dd8247f26
--- /dev/null
+++ b/tt-train/sources/ttml/modules/layer_norm_module.hpp
@@ -0,0 +1,27 @@
+// SPDX-FileCopyrightText: (c) 2024 Tenstorrent AI ULC
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#pragma once
+
+#include "autograd/auto_context.hpp"
+#include "autograd/graph.hpp"
+#include "autograd/module_base.hpp"
+#include "autograd/tensor.hpp"
+#include "ops/layernorm_op.hpp"
+
+namespace ttml::modules {
+
+class LayerNormLayer : public autograd::ModuleBase {
+private:
+    autograd::TensorPtr m_gamma;
+    autograd::TensorPtr m_beta;
+
+public:
+    void initialize_tensors(uint32_t features);
+    explicit LayerNormLayer(uint32_t features);
+
+    [[nodiscard]] autograd::TensorPtr operator()(const autograd::TensorPtr& tensor);
+};
+
+}  // namespace ttml::modules
diff --git a/tt-train/sources/ttml/modules/linear_module.cpp b/tt-train/sources/ttml/modules/linear_module.cpp
new file mode 100644
index 00000000000..8ce6b1b2d19
--- /dev/null
+++ b/tt-train/sources/ttml/modules/linear_module.cpp
@@ -0,0 +1,38 @@
+// SPDX-FileCopyrightText: (c) 2024 Tenstorrent AI ULC
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#include "linear_module.hpp"
+
+#include <core/ttnn_all_includes.hpp>
+
+#include "core/tt_tensor_utils.hpp"
+#include "init/cpu_initializers.hpp"
+#include "init/tensor_initializers.hpp"
+
+namespace ttml::modules {
+
+void LinearLayer::initialize_tensors(uint32_t in_features, uint32_t out_features) {
+    auto* device = &autograd::ctx().get_device();
+    auto weight_shape = core::create_shape({1, 1, out_features, in_features});
+    m_weight = ttml::autograd::create_tensor();
+    const float init_k = std::sqrtf(1.F / static_cast<float>(in_features));
+    init::uniform_init(m_weight, weight_shape, init::UniformRange{-init_k, init_k});
+    auto bias_shape = core::create_shape({1, 1, 1, out_features});
+    m_bias = ttml::autograd::create_tensor();
+    init::uniform_init(m_bias, bias_shape, init::UniformRange{-init_k, init_k});
+}
+
+LinearLayer::LinearLayer(uint32_t in_features, uint32_t out_features) {
+    initialize_tensors(in_features, out_features);
+
+    create_name("linear");
+    register_tensor(m_weight, "weight");
+    register_tensor(m_bias, "bias");
+}
+
+autograd::TensorPtr LinearLayer::operator()(const autograd::TensorPtr& tensor) {
+    return ops::linear_op(tensor, m_weight, m_bias);
+}
+
+}  // namespace ttml::modules
diff --git a/tt-train/sources/ttml/modules/linear_module.hpp b/tt-train/sources/ttml/modules/linear_module.hpp
new file mode 100644
index 00000000000..2ddea2a1015
--- /dev/null
+++ b/tt-train/sources/ttml/modules/linear_module.hpp
@@ -0,0 +1,30 @@
+// SPDX-FileCopyrightText: (c) 2024 Tenstorrent AI ULC
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#pragma once
+
+#include <memory>
+
+#include "autograd/auto_context.hpp"
+#include "autograd/graph.hpp"
+#include "autograd/module_base.hpp"
+#include "autograd/tensor.hpp"
+#include "ops/linear_op.hpp"
+
+namespace ttml::modules {
+
+class LinearLayer : public autograd::ModuleBase {
+private:
+    autograd::TensorPtr m_weight;
+    autograd::TensorPtr m_bias;
+
+    void initialize_tensors(uint32_t in_features, uint32_t out_features);
+
+public:
+    LinearLayer(uint32_t in_features, uint32_t out_features);
+
+    [[nodiscard]] autograd::TensorPtr operator()(const autograd::TensorPtr& tensor);
+};
+
+}  // namespace ttml::modules
diff --git a/tt-train/sources/ttml/modules/multi_head_attention.cpp b/tt-train/sources/ttml/modules/multi_head_attention.cpp
new file mode 100644
index 00000000000..48d3fc6f6a8
--- /dev/null
+++ b/tt-train/sources/ttml/modules/multi_head_attention.cpp
@@ -0,0 +1,42 @@
+// SPDX-FileCopyrightText: (c) 2024 Tenstorrent AI ULC
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#include "multi_head_attention.hpp"
+
+#include "ops/multi_head_utils.hpp"
+#include "ops/scaled_dot_product_attention.hpp"
+
+namespace ttml::modules {
+
+MultiHeadAttention::MultiHeadAttention(uint32_t embedding_dim_, uint32_t num_heads_, float dropout_prob_) :
+    m_embedding_dim(embedding_dim_), m_num_heads(num_heads_) {
+    // create layers
+    m_qkv_linear = std::make_shared<ttml::modules::LinearLayer>(m_embedding_dim, m_embedding_dim * 3);
+    m_dropout = std::make_shared<ttml::modules::DropoutLayer>(dropout_prob_);
+    m_out_linear = std::make_shared<ttml::modules::LinearLayer>(m_embedding_dim, m_embedding_dim);
+
+    // register modules
+    create_name("multi_head_attention");
+    register_module(m_qkv_linear, "qkv_linear");
+    register_module(m_dropout, "dropout");
+    register_module(m_out_linear, "out_linear");
+}
+
+ttml::autograd::TensorPtr MultiHeadAttention::operator()(
+    const ttml::autograd::TensorPtr& x, const ttml::autograd::TensorPtr& mask) {
+    auto qkv = (*m_qkv_linear)(x);
+
+    auto [query_with_heads, key_with_heads, value_with_heads] = ops::heads_creation(qkv, m_num_heads);
+
+    auto attention = ttml::ops::scaled_dot_product_attention(query_with_heads, key_with_heads, value_with_heads, mask);
+
+    attention = ops::heads_fusion(attention);
+
+    auto out = (*m_out_linear)(attention);
+    out = (*m_dropout)(out);
+
+    return out;
+}
+
+}  // namespace ttml::modules
diff --git a/tt-train/sources/ttml/modules/multi_head_attention.hpp b/tt-train/sources/ttml/modules/multi_head_attention.hpp
new file mode 100644
index 00000000000..8ad5dbe5eb4
--- /dev/null
+++ b/tt-train/sources/ttml/modules/multi_head_attention.hpp
@@ -0,0 +1,28 @@
+// SPDX-FileCopyrightText: (c) 2024 Tenstorrent AI ULC
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#include <cstdint>
+
+#include "autograd/tensor.hpp"
+#include "dropout_module.hpp"
+#include "linear_module.hpp"
+#include "ops/scaled_dot_product_attention.hpp"
+
+namespace ttml::modules {
+
+class MultiHeadAttention : public ttml::autograd::ModuleBase {
+private:
+    uint32_t m_embedding_dim{};
+    uint32_t m_num_heads{};
+    std::shared_ptr<LinearLayer> m_qkv_linear;
+    std::shared_ptr<LinearLayer> m_out_linear;
+    std::shared_ptr<DropoutLayer> m_dropout;
+
+public:
+    explicit MultiHeadAttention(uint32_t embedding_dim, uint32_t num_heads, float dropout_prob);
+
+    autograd::TensorPtr operator()(const autograd::TensorPtr& x, const autograd::TensorPtr& mask);
+};
+
+}  // namespace ttml::modules
diff --git a/tt-train/sources/ttml/modules/multi_layer_perceptron.cpp b/tt-train/sources/ttml/modules/multi_layer_perceptron.cpp
new file mode 100644
index 00000000000..06c87b74ee5
--- /dev/null
+++ b/tt-train/sources/ttml/modules/multi_layer_perceptron.cpp
@@ -0,0 +1,41 @@
+// SPDX-FileCopyrightText: (c) 2024 Tenstorrent AI ULC
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#include "multi_layer_perceptron.hpp"
+
+#include "modules/linear_module.hpp"
+
+namespace ttml::modules {
+
+template <typename Layers, typename... Args>
+void add_linear_layer(Layers& layers, Args&&... args) {
+    layers.push_back(std::make_shared<LinearLayer>(std::forward<Args>(args)...));
+}
+
+MultiLayerPerceptron::MultiLayerPerceptron(const MultiLayerPerceptronParameters& params) {
+    uint32_t current_input_features = params.m_input_features;
+    for (auto hidden_features : params.m_hidden_features) {
+        add_linear_layer(m_layers, current_input_features, hidden_features);
+        current_input_features = hidden_features;
+    }
+    add_linear_layer(m_layers, current_input_features, params.m_output_features);
+
+    create_name("mlp");
+
+    for (size_t idx = 0; idx < m_layers.size(); ++idx) {
+        register_module(m_layers[idx], "layer_" + std::to_string(idx));
+    }
+}
+autograd::TensorPtr MultiLayerPerceptron::operator()(autograd::TensorPtr tensor) {
+    for (size_t index = 0; index < m_layers.size(); ++index) {
+        tensor = (*m_layers[index])(tensor);
+        if (index + 1 != m_layers.size()) {
+            tensor = ops::relu(tensor);
+        }
+    }
+
+    return tensor;
+}
+
+}  // namespace ttml::modules
diff --git a/tt-train/sources/ttml/modules/multi_layer_perceptron.hpp b/tt-train/sources/ttml/modules/multi_layer_perceptron.hpp
new file mode 100644
index 00000000000..feb61113787
--- /dev/null
+++ b/tt-train/sources/ttml/modules/multi_layer_perceptron.hpp
@@ -0,0 +1,31 @@
+// SPDX-FileCopyrightText: (c) 2024 Tenstorrent AI ULC
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#pragma once
+
+#include <vector>
+
+#include "autograd/module_base.hpp"
+#include "modules/linear_module.hpp"
+#include "ops/unary_ops.hpp"
+
+namespace ttml::modules {
+
+struct MultiLayerPerceptronParameters {
+    uint32_t m_input_features{};
+    std::vector<uint32_t> m_hidden_features;
+    uint32_t m_output_features{};
+};
+
+class MultiLayerPerceptron : public autograd::ModuleBase {
+private:
+    std::vector<std::shared_ptr<LinearLayer>> m_layers;
+
+public:
+    explicit MultiLayerPerceptron(const MultiLayerPerceptronParameters& params);
+
+    [[nodiscard]] autograd::TensorPtr operator()(autograd::TensorPtr tensor);
+};
+
+}  // namespace ttml::modules
diff --git a/tt-train/sources/ttml/modules/single_head_attention.cpp b/tt-train/sources/ttml/modules/single_head_attention.cpp
new file mode 100644
index 00000000000..d29f638f4c7
--- /dev/null
+++ b/tt-train/sources/ttml/modules/single_head_attention.cpp
@@ -0,0 +1,39 @@
+// SPDX-FileCopyrightText: (c) 2024 Tenstorrent AI ULC
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#include "single_head_attention.hpp"
+
+namespace ttml::modules {
+
+SingleHeadAttention::SingleHeadAttention(uint32_t embedding_dim, float dropout_prob) {
+    // create layers
+    q_linear = std::make_shared<ttml::modules::LinearLayer>(embedding_dim, embedding_dim);
+    k_linear = std::make_shared<ttml::modules::LinearLayer>(embedding_dim, embedding_dim);
+    v_linear = std::make_shared<ttml::modules::LinearLayer>(embedding_dim, embedding_dim);
+    dropout = std::make_shared<ttml::modules::DropoutLayer>(dropout_prob);
+    out_linear = std::make_shared<ttml::modules::LinearLayer>(embedding_dim, embedding_dim);
+
+    // register modules
+    create_name("single_head_attention");
+    register_module(q_linear, "q_linear");
+    register_module(k_linear, "k_linear");
+    register_module(v_linear, "v_linear");
+    register_module(dropout, "dropout");
+    register_module(out_linear, "out_linear");
+}
+
+ttml::autograd::TensorPtr SingleHeadAttention::operator()(
+    const ttml::autograd::TensorPtr& x, const ttml::autograd::TensorPtr& mask) {
+    auto query = (*q_linear)(x);
+    auto key = (*k_linear)(x);
+    auto value = (*v_linear)(x);
+
+    auto attention = ttml::ops::scaled_dot_product_attention(query, key, value, mask);
+    auto out = (*out_linear)(attention);
+    out = (*dropout)(out);
+
+    return out;
+}
+
+}  // namespace ttml::modules
diff --git a/tt-train/sources/ttml/modules/single_head_attention.hpp b/tt-train/sources/ttml/modules/single_head_attention.hpp
new file mode 100644
index 00000000000..cddb8df1863
--- /dev/null
+++ b/tt-train/sources/ttml/modules/single_head_attention.hpp
@@ -0,0 +1,25 @@
+// SPDX-FileCopyrightText: (c) 2024 Tenstorrent AI ULC
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#include "autograd/tensor.hpp"
+#include "dropout_module.hpp"
+#include "linear_module.hpp"
+#include "ops/scaled_dot_product_attention.hpp"
+
+namespace ttml::modules {
+
+class SingleHeadAttention : public ttml::autograd::ModuleBase {
+    std::shared_ptr<LinearLayer> q_linear;
+    std::shared_ptr<LinearLayer> k_linear;
+    std::shared_ptr<LinearLayer> v_linear;
+    std::shared_ptr<LinearLayer> out_linear;
+    std::shared_ptr<DropoutLayer> dropout;
+
+public:
+    explicit SingleHeadAttention(uint32_t embedding_dim, float dropout_prob);
+
+    autograd::TensorPtr operator()(const autograd::TensorPtr& x, const autograd::TensorPtr& mask);
+};
+
+}  // namespace ttml::modules
diff --git a/tt-train/sources/ttml/ops/binary_ops.cpp b/tt-train/sources/ttml/ops/binary_ops.cpp
new file mode 100644
index 00000000000..6b474cc3388
--- /dev/null
+++ b/tt-train/sources/ttml/ops/binary_ops.cpp
@@ -0,0 +1,104 @@
+// SPDX-FileCopyrightText: (c) 2024 Tenstorrent AI ULC
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#include "binary_ops.hpp"
+
+#include <core/ttnn_all_includes.hpp>
+#include <memory>
+#include <ttnn/operations/eltwise/binary/binary.hpp>
+#include <ttnn/operations/eltwise/binary_backward/binary_backward.hpp>
+#include <ttnn/tensor/types.hpp>
+#include <vector>
+
+#include "autograd/auto_context.hpp"
+#include "autograd/graph.hpp"
+#include "autograd/graph_utils.hpp"
+#include "autograd/tensor.hpp"
+#include "core/tt_tensor_utils.hpp"
+#include "ttnn_fixed/trivial_ttnn_ops.hpp"
+
+namespace ttml::ops {
+
+autograd::TensorPtr operator+(const autograd::TensorPtr& a, const autograd::TensorPtr& b) {
+    auto out = autograd::create_tensor();
+
+    out->set_value(ttnn::add(a->get_value(), b->get_value()));
+    autograd::GradFunction grad = [a, b, out]() {
+        a->add_grad(out->get_grad());
+        b->add_grad(out->get_grad());
+    };
+    auto links = autograd::get_links(a, b);
+    out->set_node(autograd::ctx().add_backward_node(std::move(grad), links));
+
+    return out;
+}
+
+autograd::TensorPtr operator-(const autograd::TensorPtr& a, const autograd::TensorPtr& b) {
+    auto out = autograd::create_tensor();
+
+    out->set_value(ttnn::subtract(a->get_value(), b->get_value()));
+    autograd::GradFunction grad = [a, b, out]() {
+        tt::tt_metal::MemoryConfig mem_config;
+        // TODO: support broadcasting
+        a->add_grad(out->get_grad());
+        b->add_grad(ttnn::neg(out->get_grad()));
+    };
+    auto links = autograd::get_links(a, b);
+
+    out->set_node(autograd::ctx().add_backward_node(std::move(grad), links));
+
+    return out;
+}
+
+autograd::TensorPtr operator*(const autograd::TensorPtr& a, const autograd::TensorPtr& b) {
+    auto out = autograd::create_tensor();
+
+    out->set_value(ttnn::multiply(a->get_value(), b->get_value()));
+    autograd::GradFunction grad = [a, b, out]() {
+        tt::tt_metal::MemoryConfig mem_config;
+        // TODO: support broadcasting (or not)
+        auto a_grad = ttnn::multiply(out->get_grad(), b->get_value());
+        auto b_grad = ttnn::multiply(out->get_grad(), a->get_value());
+
+        a->add_grad(a_grad);
+        b->add_grad(b_grad);
+    };
+    auto links = autograd::get_links(a, b);
+    out->set_node(autograd::ctx().add_backward_node(std::move(grad), links));
+
+    return out;
+}
+
+autograd::TensorPtr operator/(const autograd::TensorPtr& a, const autograd::TensorPtr& b) {
+    auto out = autograd::create_tensor();
+
+    out->set_value(ttnn::divide(a->get_value(), b->get_value()));
+    autograd::GradFunction grad = [a, b, out]() {
+        auto res = ttnn::div_bw(out->get_grad(), a->get_value(), b->get_value(), "None");
+        a->add_grad(res[0].value());
+        b->add_grad(res[1].value());
+    };
+    auto links = autograd::get_links(a, b);
+    out->set_node(autograd::ctx().add_backward_node(std::move(grad), links));
+
+    return out;
+}
+
+autograd::TensorPtr add(const autograd::TensorPtr& a, const autograd::TensorPtr& b) {
+    return a + b;
+}
+
+autograd::TensorPtr sub(const autograd::TensorPtr& a, const autograd::TensorPtr& b) {
+    return a - b;
+}
+
+autograd::TensorPtr mul(const autograd::TensorPtr& a, const autograd::TensorPtr& b) {
+    return a * b;
+}
+
+autograd::TensorPtr div(const autograd::TensorPtr& a, const autograd::TensorPtr& b) {
+    return a / b;
+}
+
+}  // namespace ttml::ops
diff --git a/tt-train/sources/ttml/ops/binary_ops.hpp b/tt-train/sources/ttml/ops/binary_ops.hpp
new file mode 100644
index 00000000000..85bc821db08
--- /dev/null
+++ b/tt-train/sources/ttml/ops/binary_ops.hpp
@@ -0,0 +1,20 @@
+// SPDX-FileCopyrightText: (c) 2024 Tenstorrent AI ULC
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#pragma once
+
+#include "autograd/tensor.hpp"
+namespace ttml::ops {
+
+autograd::TensorPtr operator+(const autograd::TensorPtr& a, const autograd::TensorPtr& b);
+autograd::TensorPtr operator*(const autograd::TensorPtr& a, const autograd::TensorPtr& b);
+autograd::TensorPtr operator-(const autograd::TensorPtr& a, const autograd::TensorPtr& b);
+autograd::TensorPtr operator/(const autograd::TensorPtr& a, const autograd::TensorPtr& b);
+
+autograd::TensorPtr add(const autograd::TensorPtr& a, const autograd::TensorPtr& b);
+autograd::TensorPtr sub(const autograd::TensorPtr& a, const autograd::TensorPtr& b);
+autograd::TensorPtr mul(const autograd::TensorPtr& a, const autograd::TensorPtr& b);
+autograd::TensorPtr div(const autograd::TensorPtr& a, const autograd::TensorPtr& b);
+
+}  // namespace ttml::ops
diff --git a/tt-train/sources/ttml/ops/dropout_op.cpp b/tt-train/sources/ttml/ops/dropout_op.cpp
new file mode 100644
index 00000000000..aba066916e9
--- /dev/null
+++ b/tt-train/sources/ttml/ops/dropout_op.cpp
@@ -0,0 +1,43 @@
+// SPDX-FileCopyrightText: (c) 2024 Tenstorrent AI ULC
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#include "dropout_op.hpp"
+
+#include <core/ttnn_all_includes.hpp>
+#include <ttnn/operations/eltwise/binary/binary.hpp>
+#include <ttnn/operations/eltwise/unary/unary.hpp>
+
+#include "autograd/auto_context.hpp"
+#include "autograd/graph.hpp"
+#include "autograd/graph_utils.hpp"
+#include "core/tt_tensor_utils.hpp"
+
+namespace ttml::ops {
+
+autograd::TensorPtr dropout(const autograd::TensorPtr& tensor, float probability) {
+    auto mask = core::ones_like(tensor->get_value());
+    // dropout seed is not properly used in ttnn::dropout
+    // auto dropout_seed = autograd::ctx().get_generator()();
+
+    // currently seed is not used in ttnn::dropout
+    // we use default seed for now to simplify job of program cache
+    // it will require to generate only one program and reuse it later
+    auto dropout_seed = 0U;
+    auto scaler = 1.0F / (1.0F - probability);
+    mask = ttnn::dropout(mask, dropout_seed, probability, scaler);
+    auto out = autograd::create_tensor();
+    auto masked_out = ttnn::multiply(tensor->get_value(), mask);
+    out->set_value(masked_out);
+    autograd::GradFunction grad = [tensor, out, mask]() {
+        auto res = ttnn::multiply(out->get_grad(), mask);
+        tensor->add_grad(res);
+    };
+
+    auto links = autograd::get_links(tensor);
+    out->set_node(autograd::ctx().add_backward_node(std::move(grad), links));
+
+    return out;
+}
+
+}  // namespace ttml::ops
diff --git a/tt-train/sources/ttml/ops/dropout_op.hpp b/tt-train/sources/ttml/ops/dropout_op.hpp
new file mode 100644
index 00000000000..d3b2690ad18
--- /dev/null
+++ b/tt-train/sources/ttml/ops/dropout_op.hpp
@@ -0,0 +1,12 @@
+// SPDX-FileCopyrightText: (c) 2024 Tenstorrent AI ULC
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#pragma once
+#include "autograd/tensor.hpp"
+
+namespace ttml::ops {
+
+autograd::TensorPtr dropout(const autograd::TensorPtr& tensor, float probability);
+
+}  // namespace ttml::ops
diff --git a/tt-train/sources/ttml/ops/embedding_op.cpp b/tt-train/sources/ttml/ops/embedding_op.cpp
new file mode 100644
index 00000000000..a7a02f11813
--- /dev/null
+++ b/tt-train/sources/ttml/ops/embedding_op.cpp
@@ -0,0 +1,42 @@
+// SPDX-FileCopyrightText: (c) 2024 Tenstorrent AI ULC
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#include "embedding_op.hpp"
+
+#include <core/ttnn_all_includes.hpp>
+
+#include "autograd/auto_context.hpp"
+#include "autograd/graph_utils.hpp"
+#include "core/tt_tensor_utils.hpp"
+
+namespace ttml::ops {
+
+autograd::TensorPtr embedding_op(const autograd::TensorPtr& tensor, const autograd::TensorPtr& weight) {
+    // prepare for embedding
+    auto weight_tensor = weight->get_value();
+    weight_tensor = ttnn::untilize(weight_tensor);
+
+    auto embeddings = ttnn::embedding(tensor->get_value(), weight_tensor, /* pad_token */ std::nullopt, Layout::TILE);
+    auto embeddings_shape = embeddings.get_shape();
+    auto batch_size = embeddings_shape[0];
+    auto sentence_size = embeddings_shape[1];
+    auto embedding_dim = embeddings_shape[2];
+    embeddings = ttnn::reshape(embeddings, core::create_shape({batch_size, 1, sentence_size, embedding_dim}));
+    auto out = autograd::create_tensor(embeddings);
+
+    autograd::GradFunction grad = [tensor, weight, out]() {
+        auto out_grad = out->get_grad();
+        auto tensor_shape = tensor->get_value().get_shape();
+        out_grad = ttnn::reshape(
+            out_grad, core::create_shape({1, 1, tensor_shape[0] * tensor_shape[-1], out_grad.get_shape()[-1]}));
+        auto weight_grad = ttnn::embedding_bw(tensor->get_value(), weight->get_value(), out_grad);
+        weight->add_grad(weight_grad);
+    };
+
+    auto links = autograd::get_links(weight);
+    out->set_node(autograd::ctx().add_backward_node(std::move(grad), links));
+    return out;
+}
+
+}  // namespace ttml::ops
diff --git a/tt-train/sources/ttml/ops/embedding_op.hpp b/tt-train/sources/ttml/ops/embedding_op.hpp
new file mode 100644
index 00000000000..5a85c05d40a
--- /dev/null
+++ b/tt-train/sources/ttml/ops/embedding_op.hpp
@@ -0,0 +1,13 @@
+// SPDX-FileCopyrightText: (c) 2024 Tenstorrent AI ULC
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#pragma once
+
+#include "autograd/tensor.hpp"
+
+namespace ttml::ops {
+
+autograd::TensorPtr embedding_op(const autograd::TensorPtr& tensor, const autograd::TensorPtr& weight);
+
+}  // namespace ttml::ops
diff --git a/tt-train/sources/ttml/ops/layernorm_op.cpp b/tt-train/sources/ttml/ops/layernorm_op.cpp
new file mode 100644
index 00000000000..e0f8e97f6be
--- /dev/null
+++ b/tt-train/sources/ttml/ops/layernorm_op.cpp
@@ -0,0 +1,77 @@
+// SPDX-FileCopyrightText: (c) 2024 Tenstorrent AI ULC
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#include "layernorm_op.hpp"
+
+#include <core/ttnn_all_includes.hpp>
+#include <cstddef>
+#include <cstdint>
+#include <optional>
+
+#include "autograd/auto_context.hpp"
+#include "autograd/graph.hpp"
+#include "autograd/graph_utils.hpp"
+#include "core/compute_kernel_config.hpp"
+#include "core/tt_tensor_utils.hpp"
+
+namespace ttml::ops {
+
+// simplified version of layernorm
+// it works only for 4D tensors and for the last dimension
+autograd::TensorPtr layernorm(
+    const autograd::TensorPtr& tensor, const autograd::TensorPtr& gamma, const autograd::TensorPtr& beta) {
+    auto tensor_shape = tensor->get_value().get_shape();
+    auto mean = core::empty(
+        core::create_shape({tensor_shape[0], tensor_shape[1], tensor_shape[2], 1}),
+        &autograd::ctx().get_device(),
+        tensor->get_value().memory_config());
+    auto rstd = ttnn::empty_like(mean);
+    auto output = ttnn::empty_like(tensor->get_value());
+
+    auto out_tensors = ttnn::moreh_layer_norm(
+        tensor->get_value(),
+        1,
+        1e-6F,
+        /* gamma */ gamma->get_value(),
+        /* beta */ beta->get_value(),
+        output,
+        mean,
+        rstd,
+        /* memory_config */ std::nullopt,
+        /* compute_kernel_config */ std::nullopt);
+
+    auto out = autograd::create_tensor();
+    out->set_value(out_tensors[0].value());
+    mean = out_tensors[1].value();
+    rstd = out_tensors[2].value();
+
+    autograd::GradFunction grad = [tensor, out, mean, rstd, gamma, beta]() {
+        auto input_grad = ttnn::empty_like(tensor->get_value());
+        auto gamma_grad = ttnn::empty_like(gamma->get_value());
+        auto beta_grad = ttnn::empty_like(beta->get_value());
+
+        auto res = ttnn::moreh_layer_norm_backward(
+            out->get_grad(),
+            tensor->get_value(),
+            mean,
+            rstd,
+            1,
+            gamma->get_value(),
+            input_grad,
+            gamma_grad,
+            beta_grad,
+            /* memory_config */ std::nullopt,
+            /* compute_kernel_config */ std::nullopt);
+
+        tensor->add_grad(res[0].value());
+        gamma->add_grad(res[1].value());
+        beta->add_grad(res[2].value());
+    };
+
+    auto links = autograd::get_links(tensor);
+    out->set_node(autograd::ctx().add_backward_node(std::move(grad), links));
+
+    return out;
+}
+}  // namespace ttml::ops
diff --git a/tt-train/sources/ttml/ops/layernorm_op.hpp b/tt-train/sources/ttml/ops/layernorm_op.hpp
new file mode 100644
index 00000000000..11c08692e07
--- /dev/null
+++ b/tt-train/sources/ttml/ops/layernorm_op.hpp
@@ -0,0 +1,13 @@
+// SPDX-FileCopyrightText: (c) 2024 Tenstorrent AI ULC
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#pragma once
+#include "autograd/tensor.hpp"
+
+namespace ttml::ops {
+
+autograd::TensorPtr layernorm(
+    const autograd::TensorPtr& tensor, const autograd::TensorPtr& gamma, const autograd::TensorPtr& beta);
+
+}  // namespace ttml::ops
diff --git a/tt-train/sources/ttml/ops/linear_op.cpp b/tt-train/sources/ttml/ops/linear_op.cpp
new file mode 100644
index 00000000000..bbfa95d5df7
--- /dev/null
+++ b/tt-train/sources/ttml/ops/linear_op.cpp
@@ -0,0 +1,143 @@
+// SPDX-FileCopyrightText: (c) 2024 Tenstorrent AI ULC
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#include "linear_op.hpp"
+
+#include <core/ttnn_all_includes.hpp>
+
+#include "autograd/auto_context.hpp"
+#include "autograd/graph_utils.hpp"
+#include "core/tt_tensor_utils.hpp"
+#include "ttnn_fixed/trivial_ttnn_ops.hpp"
+
+namespace {
+
+tt::tt_metal::Tensor matmul(
+    const tt::tt_metal::Tensor& a,
+    const tt::tt_metal::Tensor& b,
+    bool transpose_a,
+    bool transpose_b,
+    const ttnn::WormholeComputeKernelConfig& config) {
+    return ttnn::matmul(
+        a,
+        b,
+        transpose_a,
+        transpose_b,
+        /* memory_config */ std::nullopt,
+        /* dtype */ std::nullopt,
+        /* program_config */ std::nullopt,
+        /* activation */ std::nullopt,
+        /* compute_kernel_config */
+        config,
+        /* core_grid */ ttnn::CoreGrid{7, 8},
+        /* output_tile */ std::nullopt);
+}
+
+}  // namespace
+
+namespace ttml::ops {
+
+void ttnn_linear_backward(
+    const autograd::TensorPtr& tensor,
+    const autograd::TensorPtr& weight,
+    const autograd::TensorPtr& bias,
+    const autograd::TensorPtr& out,
+    const ttnn::WormholeComputeKernelConfig& config) {
+    const auto& tensor_value = tensor->get_value();
+    auto volume_without_features = tensor_value.get_logical_volume() / tensor_value.get_shape()[-1];
+    auto reshaped_tensor =
+        ttnn::reshape(tensor_value, ttnn::Shape({volume_without_features, tensor_value.get_shape()[-1]}));
+
+    auto reshaped_grad =
+        ttnn::reshape(out->get_grad(), ttnn::Shape({volume_without_features, out->get_grad().get_shape()[-1]}));
+    auto reshaped_bias_grad = ttnn_fixed::sum_over_dim(reshaped_grad, /* axis */ 0);
+    auto reshaped_weight_grad =
+        matmul(reshaped_grad, reshaped_tensor, /* transpose_a */ true, /* transpose_b */ false, config);
+    auto reshaped_tensor_grad =
+        matmul(reshaped_grad, weight->get_value(), /* transpose_a */ false, /* transpose_b */ false, config);
+
+    auto bias_grad = ttnn::reshape(reshaped_bias_grad, bias->get_value().get_shape());
+    auto weight_grad = ttnn::reshape(reshaped_weight_grad, weight->get_value().get_shape());
+    auto tensor_grad = ttnn::reshape(reshaped_tensor_grad, tensor_value.get_shape());
+
+    tensor->add_grad(tensor_grad);
+    weight->add_grad(weight_grad);
+    bias->add_grad(bias_grad);
+}
+
+void moreh_linear_backward(
+    const autograd::TensorPtr& tensor,
+    const autograd::TensorPtr& weight,
+    const autograd::TensorPtr& bias,
+    const autograd::TensorPtr& out,
+    const ttnn::WormholeComputeKernelConfig& config) {
+    auto bias_grad = ttnn::empty_like(bias->get_value());
+    auto tensor_grad = ttnn::empty_like(tensor->get_value());
+    auto weight_grad = ttnn::empty_like(weight->get_value());
+
+    auto res = ttnn::moreh_linear_backward(
+        out->get_grad(),
+        tensor->get_value(),
+        weight->get_value(),
+        /* are required outputs */ std::vector<bool>{true, true, true},
+        bias->get_value(),
+        tensor_grad,
+        weight_grad,
+        bias_grad,
+        /* input_grad_mem_config */ std::nullopt,
+        /* weight_grad_mem_config */ std::nullopt,
+        /* bias_grad_mem_config */ std::nullopt,
+        /* compute_kernel_config */ config);
+
+    if (!res[0].has_value()) {
+        throw std::runtime_error("Tensor gradient is not available");
+    }
+    tensor->add_grad(res[0].value());
+
+    if (!res[1].has_value()) {
+        throw std::runtime_error("Weight gradient is not available");
+    }
+    weight->add_grad(res[1].value());
+
+    if (!res[2].has_value()) {
+        throw std::runtime_error("Bias gradient is not available");
+    }
+    bias->add_grad(res[2].value());
+}
+
+autograd::TensorPtr linear_op(
+    const autograd::TensorPtr& tensor, const autograd::TensorPtr& weight, const autograd::TensorPtr& bias) {
+    auto out = autograd::create_tensor();
+
+    out->set_value(ttnn::linear(
+        tensor->get_value(),
+        weight->get_value(),
+        bias->get_value(),
+        /* transpose_a */ false,
+        /* tranpose_b */ true,
+        /* memory_config */ std::nullopt,
+        /* dtype */ std::nullopt,
+        /* program_config */ std::nullopt,
+        /* activation */ std::nullopt,
+        /* compute_kernel_config */ core::ComputeKernelConfig::matmul(),
+        /* core_grid */ ttnn::CoreGrid{7, 8}));
+
+    autograd::GradFunction grad = [weight, bias, tensor, out]() {
+        auto tensor_shape = tensor->get_value().get_shape();
+        auto grad_shape = out->get_grad().get_shape();
+        // for some reason, reshape produces wrong values when last dimensions not divisible by TILE
+        if (tensor_shape[-2] % TILE_HEIGHT != 0 ||
+            tensor_shape[-1] % TILE_WIDTH != 0 && grad_shape[-1] % TILE_WIDTH != 0) {
+            moreh_linear_backward(tensor, weight, bias, out);
+        } else {
+            ttnn_linear_backward(tensor, weight, bias, out);
+        }
+    };
+
+    auto links = autograd::get_links(weight, tensor, bias);
+    out->set_node(autograd::ctx().add_backward_node(std::move(grad), links));
+    return out;
+}
+
+}  // namespace ttml::ops
diff --git a/tt-train/sources/ttml/ops/linear_op.hpp b/tt-train/sources/ttml/ops/linear_op.hpp
new file mode 100644
index 00000000000..c95236e7304
--- /dev/null
+++ b/tt-train/sources/ttml/ops/linear_op.hpp
@@ -0,0 +1,29 @@
+// SPDX-FileCopyrightText: (c) 2024 Tenstorrent AI ULC
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#pragma once
+
+#include "autograd/tensor.hpp"
+#include "core/compute_kernel_config.hpp"
+
+namespace ttml::ops {
+
+autograd::TensorPtr linear_op(
+    const autograd::TensorPtr& tensor, const autograd::TensorPtr& weight, const autograd::TensorPtr& bias);
+
+void ttnn_linear_backward(
+    const autograd::TensorPtr& tensor,
+    const autograd::TensorPtr& weight,
+    const autograd::TensorPtr& bias,
+    const autograd::TensorPtr& out,
+    const ttnn::WormholeComputeKernelConfig& config = ttml::core::ComputeKernelConfig::matmul());
+
+void moreh_linear_backward(
+    const autograd::TensorPtr& tensor,
+    const autograd::TensorPtr& weight,
+    const autograd::TensorPtr& bias,
+    const autograd::TensorPtr& out,
+    const ttnn::WormholeComputeKernelConfig& config = ttml::core::ComputeKernelConfig::matmul());
+
+}  // namespace ttml::ops
diff --git a/tt-train/sources/ttml/ops/losses.cpp b/tt-train/sources/ttml/ops/losses.cpp
new file mode 100644
index 00000000000..825a2908ebc
--- /dev/null
+++ b/tt-train/sources/ttml/ops/losses.cpp
@@ -0,0 +1,115 @@
+// SPDX-FileCopyrightText: (c) 2024 Tenstorrent AI ULC
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#include "losses.hpp"
+
+#include <core/ttnn_all_includes.hpp>
+#include <ttnn/types.hpp>
+
+#include "autograd/auto_context.hpp"
+#include "autograd/graph_utils.hpp"
+#include "core/compute_kernel_config.hpp"
+#include "core/tt_tensor_utils.hpp"
+#include "ops/binary_ops.hpp"
+#include "ops/unary_ops.hpp"
+#include "ttnn_fixed/trivial_ttnn_ops.hpp"
+
+namespace ttml::ops {
+
+autograd::TensorPtr mse_loss(
+    const autograd::TensorPtr& prediction, const autograd::TensorPtr& target, ReduceType reduce) {
+    auto difference = ops::sub(target, prediction);  // TODO: @rfurko-tt use "ttnn::squared_difference"
+    auto squared_difference =
+        ops::mul(difference, difference);  // TODO: need to add backward "ttnn::squared_difference_bw" might be faster
+    if (reduce == ReduceType::MEAN) {
+        return ops::mean(squared_difference);
+    } else {
+        throw std::logic_error("Unsupported MSE reduction type");
+    }
+}
+
+autograd::TensorPtr cross_entropy_loss_without_reduce_(
+    const autograd::TensorPtr& prediction, const autograd::TensorPtr& target) {
+    const float eps = 1e-6F;
+    auto prediction_tensor = ttnn_fixed::softmax(prediction->get_value(), 3);
+    auto prediction_tensor_clipped = ttnn::clip(prediction_tensor, eps, 1.0F);
+    auto loss = ttnn::multiply(target->get_value(), ttnn::log(prediction_tensor_clipped));
+    loss = ttnn::neg(loss);
+    loss = ttnn_fixed::sum_over_dim(loss, 3);
+    auto out = autograd::create_tensor(loss);
+
+    autograd::GradFunction grad = [target, prediction_tensor, prediction, out]() {
+        auto grad = ttnn::subtract(prediction_tensor, target->get_value());
+        grad = ttnn::multiply(grad, out->get_grad());
+        prediction->add_grad(grad);
+    };
+
+    auto links = autograd::get_links(prediction);
+    out->set_node(autograd::ctx().add_backward_node(std::move(grad), links));
+
+    return out;
+}
+
+autograd::TensorPtr cross_entropy_loss(
+    const autograd::TensorPtr& prediction, const autograd::TensorPtr& target, ReduceType reduce) {
+    auto loss = cross_entropy_loss_without_reduce_(prediction, target);
+    if (reduce == ReduceType::MEAN) {
+        return ops::mean(loss);
+    } else {
+        throw std::logic_error("Unsupported cross entropy reduction type");
+    }
+}
+
+autograd::TensorPtr nll_loss(
+    const autograd::TensorPtr& prediction, const autograd::TensorPtr& target, ReduceType reduce) {
+    if (reduce != ReduceType::MEAN) {
+        throw std::logic_error("Unsupported NLL reduction type, only MEAN is supported");
+    }
+
+    auto* device = &autograd::ctx().get_device();
+    auto divisor = core::empty(ttnn::Shape({1, 1}, {32, 32}), device, prediction->get_value().memory_config());
+
+    auto tensor_shape = prediction->get_value().shape();
+    uint32_t Ndim = tensor_shape[0] * tensor_shape[1] * tensor_shape[2];
+    uint32_t Cdim = tensor_shape[3];
+    auto reshaped_tensor = ttnn::reshape(prediction->get_value(), ttnn::Shape({Ndim, Cdim}));
+    auto loss_tensor = ttnn::moreh_nll_loss(
+        reshaped_tensor,
+        target->get_value(),
+        /* reduction */ "mean",
+        /* weight_tensor */ std::nullopt,
+        /* divisor_tensor */ divisor,
+        /* output_tensor */ std::nullopt,
+        /* ignore_index */ -100,
+        /* memory_config */ prediction->get_value().memory_config(),
+        /* compute_kernel_config */ core::ComputeKernelConfig::precise());
+    auto out = autograd::create_tensor(loss_tensor);
+
+    autograd::GradFunction grad = [prediction, target, out, Ndim, Cdim, device, divisor]() {
+        auto out_grad = ttnn::empty(
+            ttnn::Shape({Ndim, Cdim}),
+            DataType::BFLOAT16,
+            Layout::TILE,
+            device,
+            prediction->get_value().memory_config());
+        auto grad = ttnn::moreh_nll_loss_backward(
+            target->get_value(),
+            out->get_grad(),
+            /* reduction_mean */ true,
+            /* weight_tensor */ std::nullopt,
+            /* input_grad_tensor */ out_grad,
+            /* divisor_tensor */ divisor,
+            /* ignore_index */ -100,
+            /* memory_config */ std::nullopt,
+            /* compute_kernel_config */ std::nullopt);
+        grad = ttnn::reshape(grad, prediction->get_value().shape());
+        prediction->add_grad(grad);
+    };
+    auto links = autograd::get_links(prediction);
+    out->set_node(autograd::ctx().add_backward_node(std::move(grad), links));
+
+    return out;
+}
+
+}  // namespace ttml::ops
diff --git a/tt-train/sources/ttml/ops/losses.hpp b/tt-train/sources/ttml/ops/losses.hpp
new file mode 100644
index 00000000000..fe1a037d802
--- /dev/null
+++ b/tt-train/sources/ttml/ops/losses.hpp
@@ -0,0 +1,22 @@
+// SPDX-FileCopyrightText: (c) 2024 Tenstorrent AI ULC
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#pragma once
+
+#include "autograd/tensor.hpp"
+
+namespace ttml::ops {
+
+enum ReduceType : uint8_t { MEAN = 0, SUM = 1 };
+
+autograd::TensorPtr mse_loss(
+    const autograd::TensorPtr& prediction, const autograd::TensorPtr& target, ReduceType reduce = ReduceType::MEAN);
+
+autograd::TensorPtr cross_entropy_loss(
+    const autograd::TensorPtr& prediction, const autograd::TensorPtr& target, ReduceType reduce = ReduceType::MEAN);
+
+autograd::TensorPtr nll_loss(
+    const autograd::TensorPtr& prediction, const autograd::TensorPtr& target, ReduceType reduce = ReduceType::MEAN);
+
+}  // namespace ttml::ops
diff --git a/tt-train/sources/ttml/ops/multi_head_utils.cpp b/tt-train/sources/ttml/ops/multi_head_utils.cpp
new file mode 100644
index 00000000000..aedf340cd30
--- /dev/null
+++ b/tt-train/sources/ttml/ops/multi_head_utils.cpp
@@ -0,0 +1,85 @@
+// SPDX-FileCopyrightText: (c) 2024 Tenstorrent AI ULC
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#include "multi_head_utils.hpp"
+
+#include <core/ttnn_all_includes.hpp>
+
+#include "autograd/auto_context.hpp"
+#include "autograd/graph.hpp"
+#include "autograd/graph_utils.hpp"
+#include "core/tt_tensor_utils.hpp"
+
+namespace ttml::ops {
+
+std::tuple<autograd::TensorPtr, autograd::TensorPtr, autograd::TensorPtr> heads_creation(
+    const autograd::TensorPtr& qkv, uint32_t num_heads) {
+    // qkv shape is (B, 1, S, E * 3)
+    // q, k, v shapes are (B, num_heads, S, E / num_heads)
+    auto [q, k, v] = ttnn::experimental::nlp_create_qkv_heads(
+        qkv->get_value(),
+        std::nullopt,
+        num_heads,
+        num_heads,
+        /* transpose_k */ false,
+        /* memory_config */ std::nullopt,
+        /* optional_output_tensors */ std::nullopt);
+
+    auto out_q = autograd::create_tensor(q);
+    auto out_k = autograd::create_tensor(k);
+    auto out_v = autograd::create_tensor(v);
+
+    autograd::GradFunction grad_q = [out_q, out_k, out_v, qkv]() {
+        auto grad_q = out_q->get_grad();
+        auto grad_k = out_k->get_grad();
+        auto grad_v = out_v->get_grad();
+        // (B, num_heads, S, E / num_heads) -> (B, 1, S, E)
+        grad_q = ttnn::experimental::nlp_concat_heads(grad_q);
+        grad_k = ttnn::experimental::nlp_concat_heads(grad_k);
+        grad_v = ttnn::experimental::nlp_concat_heads(grad_v);
+        auto result = ttnn::concat(std::vector<ttnn::Tensor>({grad_q, grad_k, grad_v}), /* dim */ 3);
+        qkv->add_grad(result);
+    };
+
+    auto links_q = autograd::get_links(qkv);
+    // grad_q function depends on gradients of q, k and v
+    out_q->set_node(autograd::ctx().add_backward_node(std::move(grad_q), links_q));
+    // this needs to be added to make sure that gradients for k and v are computed before we run backward for q
+    auto links_kv = autograd::get_links(qkv, out_q);
+    out_k->set_node(autograd::ctx().add_backward_node([]() {}, links_kv));
+    out_v->set_node(autograd::ctx().add_backward_node([]() {}, links_kv));
+    return {out_q, out_k, out_v};
+}
+
+autograd::TensorPtr heads_fusion(const autograd::TensorPtr& x) {
+    auto x_shape = x->get_value().get_shape();
+
+    uint32_t batch_size = x_shape[0];
+    uint32_t num_heads = x_shape[1];
+    uint32_t sequence_length = x_shape[2];
+    uint32_t embedding_dim = x_shape[3];
+
+    // (B, H, S, E/H) -> (B, 1, S, E)
+    auto fused_heads = ttnn::experimental::nlp_concat_heads(x->get_value());
+    auto out = autograd::create_tensor(fused_heads);
+
+    autograd::GradFunction grad = [out, x, num_heads, batch_size, sequence_length, embedding_dim]() {
+        auto grad_output = out->get_grad();
+        // (B, 1, S, E) -> (B, 1, E, S)
+        auto grad_result = ttnn::transpose(grad_output, -2, -1);
+        // (B, 1, E, S) -> (B, H, E/H, S)
+        grad_result =
+            ttnn::reshape(grad_result, core::create_shape({batch_size, num_heads, embedding_dim, sequence_length}));
+        // (B, H, E/H, S) -> (B, H, S, E/H)
+        grad_result = ttnn::transpose(grad_result, -2, -1);
+        x->add_grad(grad_result);
+    };
+
+    auto links = autograd::get_links(x);
+    out->set_node(ttml::autograd::ctx().add_backward_node(std::move(grad), links));
+
+    return out;
+}
+
+}  // namespace ttml::ops
diff --git a/tt-train/sources/ttml/ops/multi_head_utils.hpp b/tt-train/sources/ttml/ops/multi_head_utils.hpp
new file mode 100644
index 00000000000..5adbf38bd9e
--- /dev/null
+++ b/tt-train/sources/ttml/ops/multi_head_utils.hpp
@@ -0,0 +1,14 @@
+// SPDX-FileCopyrightText: (c) 2024 Tenstorrent AI ULC
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#include "autograd/tensor.hpp"
+
+namespace ttml::ops {
+
+std::tuple<autograd::TensorPtr, autograd::TensorPtr, autograd::TensorPtr> heads_creation(
+    const autograd::TensorPtr& qkv, uint32_t num_heads);
+
+autograd::TensorPtr heads_fusion(const autograd::TensorPtr& x);
+
+}  // namespace ttml::ops
diff --git a/tt-train/sources/ttml/ops/scaled_dot_product_attention.cpp b/tt-train/sources/ttml/ops/scaled_dot_product_attention.cpp
new file mode 100644
index 00000000000..26d3d448b19
--- /dev/null
+++ b/tt-train/sources/ttml/ops/scaled_dot_product_attention.cpp
@@ -0,0 +1,158 @@
+// SPDX-FileCopyrightText: (c) 2024 Tenstorrent AI ULC
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#include "scaled_dot_product_attention.hpp"
+
+#include "autograd/auto_context.hpp"
+#include "autograd/graph_utils.hpp"
+#include "core/compute_kernel_config.hpp"
+#include "core/tt_tensor_utils.hpp"
+#include "ttnn_fixed/trivial_ttnn_ops.hpp"
+
+namespace ttml::ops {
+
+tt::tt_metal::Tensor matmul(
+    const tt::tt_metal::Tensor& a, const tt::tt_metal::Tensor& b, bool transpose_a, bool transpose_b) {
+    return ttnn::matmul(
+        a,
+        b,
+        transpose_a,
+        transpose_b,
+        /* memory_config */ std::nullopt,
+        /* dtype */ std::nullopt,
+        /* program_config */ std::nullopt,
+        /* activation */ std::nullopt,
+        /* compute_kernel_config */ core::ComputeKernelConfig::matmul(),
+        /* core_grid */ ttnn::CoreGrid{7, 8},
+        /* output_tile */ std::nullopt);
+}
+
+autograd::TensorPtr scaled_dot_product_attention(
+    const autograd::TensorPtr& query,
+    const autograd::TensorPtr& key,
+    const autograd::TensorPtr& value,
+    const std::optional<autograd::TensorPtr>& mask) {
+    const float scale = 1.0F / std::sqrtf(static_cast<float>(query->get_value().get_shape()[-1]));
+    // (B, H, S, E) x (B, H, E, S) -> (B, H, S, S)
+    auto qk_t = matmul(query->get_value(), key->get_value(), /* transpose_a */ false, /* transpose_b */ true);
+    // (B, H, S, S) * scale
+    auto qk_scaled = ttnn::multiply(qk_t, scale);
+    if (mask.has_value()) {
+        qk_scaled = ttnn::where(mask.value()->get_value(), qk_scaled, /* other */ -1e9F);
+    }
+    // (B, H, S, S)
+    auto attention_weights = ttnn_fixed::softmax(qk_scaled, /* axis */ 3);
+    // TODO: add dropout here
+
+    // (B, H, S, S) x (B, H, S, E) -> (B, H, S, E)
+    auto attention_qkv =
+        matmul(attention_weights, value->get_value(), /* transpose_a */ false, /* transpose_b */ false);
+    auto out = ttml::autograd::create_tensor(attention_qkv);
+
+    ttml::autograd::GradFunction grad = [scale, query, key, value, attention_weights, out, mask]() {
+        auto grad_output = out->get_grad();
+        // (B, H, S, S) x (B, H, S, E) -> (B, H, S, E)
+        auto grad_v = matmul(attention_weights, grad_output, /* transpose_a */ true, /* transpose_b */ false);
+        auto grad_attention_weights =
+            matmul(grad_output, value->get_value(), /* transpose_a */ false, /* transpose_b */ true);
+        auto grad_scaled_dot = ttnn::multiply(
+            attention_weights,
+            ttnn::subtract(
+                grad_attention_weights,
+                ttnn_fixed::sum_over_dim(ttnn::multiply(attention_weights, grad_attention_weights), 3)));
+        if (mask.has_value()) {
+            grad_scaled_dot = ttnn::multiply(grad_scaled_dot, mask.value()->get_value());
+        }
+
+        auto grad_q = matmul(
+            grad_scaled_dot,
+            key->get_value(),
+            /* transpose_a */ false,
+            /* transpose_b */ false);
+        grad_q = ttnn::multiply(grad_q, scale);
+
+        auto grad_k = matmul(
+            grad_scaled_dot,
+            query->get_value(),
+            /* transpose_a */ true,
+            /* transpose_b */ false);
+        grad_k = ttnn::multiply(grad_k, scale);
+
+        query->add_grad(grad_q);
+        key->add_grad(grad_k);
+        value->add_grad(grad_v);
+    };
+
+    auto links = autograd::get_links(query, key, value);
+    out->set_node(ttml::autograd::ctx().add_backward_node(std::move(grad), links));
+
+    return out;
+}
+
+autograd::TensorPtr scaled_sigmoid_dot_product_attention(
+    const autograd::TensorPtr& query,
+    const autograd::TensorPtr& key,
+    const autograd::TensorPtr& value,
+    const std::optional<autograd::TensorPtr>& mask) {
+    const float scale = 1.0F / std::sqrtf(static_cast<float>(query->get_value().get_shape()[-1]));
+    // (B, H, S, E) x (B, H, E, S) -> (B, H, S, S)
+    auto qk_t = matmul(query->get_value(), key->get_value(), /* transpose_a */ false, /* transpose_b */ true);
+    // (B, H, S, S) * scale
+    auto qk_scaled = ttnn::multiply(qk_t, scale);
+    if (mask.has_value()) {
+        qk_scaled = ttnn::where(mask.value()->get_value(), qk_scaled, /* other */ -1e9F);
+    }
+    // (B, H, S, S)
+    // auto attention_weights = ttnn_fixed::softmax(qk_scaled, /* axis */ 3);
+    auto attention_weights =
+        ttnn::sigmoid(ttnn::subtract(qk_scaled, std::logf(static_cast<float>(query->get_value().get_shape()[-2]))));
+
+    // (B, H, S, S) x (B, H, S, E) -> (B, H, S, E)
+    auto attention_qkv =
+        matmul(attention_weights, value->get_value(), /* transpose_a */ false, /* transpose_b */ false);
+    auto out = ttml::autograd::create_tensor(attention_qkv);
+
+    ttml::autograd::GradFunction grad =
+        [scale, query, key, value, qk_t, qk_scaled, attention_weights, attention_qkv, out, mask]() {
+            auto grad_output = out->get_grad();
+            // (B, H, S, S) x (B, H, S, E) -> (B, H, S, E)
+            auto grad_v = matmul(attention_weights, grad_output, /* transpose_a */ true, /* transpose_b */ false);
+            auto grad_attention_weights =
+                matmul(grad_output, value->get_value(), /* transpose_a */ false, /* transpose_b */ true);
+            auto grad_scaled_dot =
+                ttnn::sigmoid_bw(
+                    grad_attention_weights,
+                    ttnn::subtract(qk_scaled, std::logf(static_cast<float>(query->get_value().get_shape()[-2]))))
+                    .front();
+
+            if (mask.has_value()) {
+                grad_scaled_dot = ttnn::where(mask.value()->get_value(), grad_scaled_dot, /* other */ 0.0F);
+            }
+
+            auto grad_q = matmul(
+                grad_scaled_dot,
+                key->get_value(),
+                /* transpose_a */ false,
+                /* transpose_b */ false);
+            grad_q = ttnn::multiply(grad_q, scale);
+
+            auto grad_k = matmul(
+                grad_scaled_dot,
+                query->get_value(),
+                /* transpose_a */ true,
+                /* transpose_b */ false);
+            grad_k = ttnn::multiply(grad_k, scale);
+
+            query->add_grad(grad_q);
+            key->add_grad(grad_k);
+            value->add_grad(grad_v);
+        };
+
+    auto links = autograd::get_links(query, key, value);
+    out->set_node(ttml::autograd::ctx().add_backward_node(std::move(grad), links));
+
+    return out;
+}
+
+}  // namespace ttml::ops
diff --git a/tt-train/sources/ttml/ops/scaled_dot_product_attention.hpp b/tt-train/sources/ttml/ops/scaled_dot_product_attention.hpp
new file mode 100644
index 00000000000..9c92f1f395b
--- /dev/null
+++ b/tt-train/sources/ttml/ops/scaled_dot_product_attention.hpp
@@ -0,0 +1,23 @@
+// SPDX-FileCopyrightText: (c) 2024 Tenstorrent AI ULC
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#pragma once
+
+#include "autograd/tensor.hpp"
+
+namespace ttml::ops {
+
+autograd::TensorPtr scaled_dot_product_attention(
+    const autograd::TensorPtr& query,
+    const autograd::TensorPtr& key,
+    const autograd::TensorPtr& value,
+    const std::optional<autograd::TensorPtr>& mask = std::nullopt);
+
+autograd::TensorPtr scaled_sigmoid_dot_product_attention(
+    const autograd::TensorPtr& query,
+    const autograd::TensorPtr& key,
+    const autograd::TensorPtr& value,
+    const std::optional<autograd::TensorPtr>& mask = std::nullopt);
+
+}  // namespace ttml::ops
diff --git a/tt-train/sources/ttml/ops/unary_ops.cpp b/tt-train/sources/ttml/ops/unary_ops.cpp
new file mode 100644
index 00000000000..5f13d840253
--- /dev/null
+++ b/tt-train/sources/ttml/ops/unary_ops.cpp
@@ -0,0 +1,109 @@
+// SPDX-FileCopyrightText: (c) 2024 Tenstorrent AI ULC
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#include "ops/unary_ops.hpp"
+
+#include <array>
+#include <core/ttnn_all_includes.hpp>
+#include <optional>
+
+#include "autograd/auto_context.hpp"
+#include "autograd/graph.hpp"
+#include "autograd/graph_utils.hpp"
+#include "autograd/tensor.hpp"
+#include "core/compute_kernel_config.hpp"
+#include "core/tt_tensor_utils.hpp"
+#include "ttnn_fixed/trivial_ttnn_ops.hpp"
+
+namespace ttml::ops {
+
+autograd::TensorPtr relu(const autograd::TensorPtr& tensor) {
+    auto out = autograd::create_tensor();
+    out->set_value(ttnn::relu(tensor->get_value()));
+    autograd::GradFunction grad = [tensor, out]() {
+        tt::tt_metal::MemoryConfig mem_config;
+        auto res = ttnn::relu_bw(out->get_grad(), tensor->get_value(), mem_config);
+        tensor->add_grad(res[0]);
+    };
+
+    auto links = autograd::get_links(tensor);
+    out->set_node(autograd::ctx().add_backward_node(std::move(grad), links));
+
+    return out;
+}
+
+autograd::TensorPtr gelu(const autograd::TensorPtr& tensor) {
+    auto out = autograd::create_tensor();
+    out->set_value(ttnn::gelu(tensor->get_value()));
+    autograd::GradFunction grad = [tensor, out]() {
+        tt::tt_metal::MemoryConfig mem_config;
+        static const std::string approx_mode = "none";
+        auto res = ttnn::gelu_bw(out->get_grad(), tensor->get_value(), approx_mode, mem_config);
+        assert(res.size() == 1U && "Gelu backward should return only one gradient");
+        tensor->add_grad(res.front().value());
+    };
+
+    std::vector<autograd::NodeId> links = autograd::get_links(tensor);
+    out->set_node(autograd::ctx().add_backward_node(std::move(grad), links));
+
+    return out;
+}
+
+autograd::TensorPtr log_softmax(const autograd::TensorPtr& tensor, int dim) {
+    auto log_softmax = ttnn_fixed::log_softmax(tensor->get_value(), dim);
+    auto out = autograd::create_tensor(log_softmax);
+    autograd::GradFunction grad = [tensor, out, dim]() {
+        auto softmax = ttnn::exp(out->get_value());
+        auto sum_grad_over_dim = ttnn_fixed::sum_over_dim(out->get_grad(), dim);
+        auto grad = ttnn::subtract(out->get_grad(), ttnn::multiply(softmax, sum_grad_over_dim));
+        tensor->add_grad(grad);
+    };
+    auto links = autograd::get_links(tensor);
+    out->set_node(autograd::ctx().add_backward_node(std::move(grad), links));
+    return out;
+}
+
+autograd::TensorPtr mean(const autograd::TensorPtr& tensor) {
+    auto shape = core::create_shape({1, 1, 1, 1});
+    autograd::TensorPtr out = autograd::create_tensor(core::from_vector({0.F}, shape, &autograd::ctx().get_device()));
+    ttnn::moreh_mean(
+        tensor->get_value(),
+        std::nullopt,
+        true,
+        std::nullopt,
+        out->get_value(),
+        std::nullopt,
+        /* device_compute_kernel_config */ core::ComputeKernelConfig::precise());
+    autograd::GradFunction grad = [tensor, out]() {
+        auto resulting_shape = tensor->get_value().get_shape();
+        auto res = ttnn::moreh_mean_backward(
+            out->get_grad(), std::nullopt, false, resulting_shape, std::nullopt, std::nullopt, std::nullopt);
+        tensor->add_grad(res);
+    };
+    auto links = autograd::get_links(tensor);
+
+    out->set_node(autograd::ctx().add_backward_node(std::move(grad), links));
+    return out;
+}
+
+autograd::TensorPtr broadcast_batch(const autograd::TensorPtr& tensor, uint32_t new_batch_dim) {
+    if (new_batch_dim == 1 || tensor->get_value().shape()[0] == new_batch_dim) {
+        return tensor;
+    }
+    auto out = ttml::autograd::create_tensor();
+    auto repeats = core::create_shape({new_batch_dim, 1, 1, 1});
+    // currently assuming tensor came with shape: {1,X,Y,Z} and we want to get {B,X,Y,Z}
+    out->set_value(ttnn::repeat(tensor->get_value(), repeats));
+
+    autograd::GradFunction grad = [tensor, out]() {
+        auto res = ttnn_fixed::sum_over_batch(out->get_grad());
+        tensor->add_grad(res);
+    };
+    std::vector<autograd::NodeId> links = autograd::get_links(tensor);
+
+    out->set_node(autograd::ctx().add_backward_node(std::move(grad), links));
+    return out;
+}
+
+}  // namespace ttml::ops
diff --git a/tt-train/sources/ttml/ops/unary_ops.hpp b/tt-train/sources/ttml/ops/unary_ops.hpp
new file mode 100644
index 00000000000..839e86a8fd0
--- /dev/null
+++ b/tt-train/sources/ttml/ops/unary_ops.hpp
@@ -0,0 +1,18 @@
+// SPDX-FileCopyrightText: (c) 2024 Tenstorrent AI ULC
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#pragma once
+
+#include "autograd/tensor.hpp"
+
+namespace ttml::ops {
+
+autograd::TensorPtr relu(const autograd::TensorPtr& tensor);
+autograd::TensorPtr gelu(const autograd::TensorPtr& tensor);
+autograd::TensorPtr mean(const autograd::TensorPtr& tensor);
+autograd::TensorPtr sum(const autograd::TensorPtr& tensor);
+autograd::TensorPtr broadcast_batch(const autograd::TensorPtr& tensor, uint32_t new_batch_dim);
+autograd::TensorPtr log_softmax(const autograd::TensorPtr& tensor, int dim);
+
+}  // namespace ttml::ops
diff --git a/tt-train/sources/ttml/optimizers/adamw.cpp b/tt-train/sources/ttml/optimizers/adamw.cpp
new file mode 100644
index 00000000000..c11724ac17d
--- /dev/null
+++ b/tt-train/sources/ttml/optimizers/adamw.cpp
@@ -0,0 +1,232 @@
+// SPDX-FileCopyrightText: (c) 2024 Tenstorrent AI ULC
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#include "adamw.hpp"
+
+#include "autograd/autocast_tensor.hpp"
+#include "autograd/module_base.hpp"
+#include "core/compute_kernel_config.hpp"
+#include "core/debug.hpp"
+#include "core/tt_tensor_utils.hpp"
+#include "optimizers/optimizer_base.hpp"
+#include "ttnn_fixed/trivial_ttnn_ops.hpp"
+
+namespace {
+
+const std::string kFirstMoment = "first_moment/";
+const std::string kSecondMoment = "second_moment/";
+
+}  // namespace
+
+namespace ttml::optimizers {
+
+MorehAdamW::MorehAdamW(autograd::NamedParameters parameters, const AdamWConfig& config) :
+    OptimizerBase(std::move(parameters)), m_config(config) {
+    for (const auto& [key, tensor_ptr] : m_parameters) {
+        if (tensor_ptr->get_requires_grad()) {
+            m_first_moment.emplace(
+                key,
+                autograd::create_tensor(
+                    core::zeros_like(tensor_ptr->get_value(autograd::PreferredPrecision::FULL)),
+                    /* requires_grad */ false));
+            m_second_moment.emplace(
+                key,
+                autograd::create_tensor(
+                    core::zeros_like(tensor_ptr->get_value(autograd::PreferredPrecision::FULL)),
+                    /* requires_grad */ false));
+        }
+    }
+}
+
+void MorehAdamW::zero_grad() {
+    for (auto& [key, tensor_ptr] : m_parameters) {
+        if (tensor_ptr->get_requires_grad() && tensor_ptr->is_grad_initialized()) {
+            // setting gradients to not initialized tensor
+            tensor_ptr->set_grad(ttnn::Tensor());
+        }
+    }
+}
+
+void MorehAdamW::step() {
+    if (core::debug::Debug::enable_print_tensor_stats()) {
+        print_stats();
+    }
+
+    m_steps++;
+    for (auto& [key, first_moment_ptr] : m_first_moment) {
+        const auto& tensor_ptr = m_parameters.at(key);
+        if (!tensor_ptr->is_grad_initialized()) {
+            continue;
+        }
+        auto& second_moment_ptr = m_second_moment.at(key);
+        const auto& first_moment = first_moment_ptr->get_value(autograd::PreferredPrecision::FULL);
+        const auto& second_moment = second_moment_ptr->get_value(autograd::PreferredPrecision::FULL);
+
+        const auto& gradients = tensor_ptr->get_grad();
+        auto output_tensor = tensor_ptr->get_value(autograd::PreferredPrecision::FULL);
+        ttnn::moreh_adamw(
+            tensor_ptr->get_value(autograd::PreferredPrecision::FULL),
+            gradients,
+            first_moment,
+            second_moment,
+            m_config.lr,
+            m_config.beta1,
+            m_config.beta2,
+            m_config.epsilon,
+            m_config.weight_decay,
+            m_steps,
+            /* amsgrad */ false,
+            /* max_exp_avg_sq_in */ std::nullopt,
+            /* param_out */ output_tensor,
+            /* exp_avg_out */ first_moment,
+            /* exp_avg_sq_out */ second_moment,
+            /* max_exp_avg_sq_out */ std::nullopt,
+            /* memory_config */ std::nullopt,
+            /* compute_kernel_config */ core::ComputeKernelConfig::precise());
+        tensor_ptr->set_value(output_tensor);
+        first_moment_ptr->set_value(first_moment);
+        second_moment_ptr->set_value(second_moment);
+    }
+}
+
+[[nodiscard]] autograd::NamedParameters MorehAdamW::get_state_dict() const {
+    autograd::NamedParameters state_dict;
+    for (const auto& [key, first_moment] : m_first_moment) {
+        state_dict.emplace(kFirstMoment + key, first_moment);
+    }
+
+    for (const auto& [key, second_moment] : m_second_moment) {
+        state_dict.emplace(kSecondMoment + key, second_moment);
+    }
+
+    return state_dict;
+}
+
+void MorehAdamW::set_state_dict(const autograd::NamedParameters& dict) {
+    for (const auto& [key, tensor] : dict) {
+        if (key.starts_with(kFirstMoment)) {
+            m_first_moment[key.substr(kFirstMoment.size())] = tensor;
+        } else if (key.starts_with(kSecondMoment)) {
+            m_second_moment[key.substr(kSecondMoment.size())] = tensor;
+        } else {
+            throw std::runtime_error(fmt::format("AdamW: Invalid key in state dict. Key = {}", key));
+        }
+    }
+}
+
+[[nodiscard]] size_t MorehAdamW::get_steps() const {
+    return m_steps;
+}
+
+void MorehAdamW::set_steps(size_t steps) {
+    m_steps = steps;
+}
+
+AdamW::AdamW(autograd::NamedParameters parameters, const AdamWConfig& config) :
+    OptimizerBase(std::move(parameters)), m_config(config) {
+    for (const auto& [key, tensor_ptr] : m_parameters) {
+        if (tensor_ptr->get_requires_grad()) {
+            m_first_moment.emplace(
+                key,
+                autograd::create_tensor(
+                    core::zeros_like(tensor_ptr->get_value(autograd::PreferredPrecision::FULL)),
+                    /* requires_grad */ false));
+            m_second_moment.emplace(
+                key,
+                autograd::create_tensor(
+                    core::zeros_like(tensor_ptr->get_value(autograd::PreferredPrecision::FULL)),
+                    /* requires_grad */ false));
+        }
+    }
+}
+
+void AdamW::zero_grad() {
+    for (auto& [key, tensor_ptr] : m_parameters) {
+        if (tensor_ptr->get_requires_grad() && tensor_ptr->is_grad_initialized()) {
+            // setting gradients to not initialized tensor
+            tensor_ptr->set_grad(ttnn::Tensor());
+        }
+    }
+}
+
+void AdamW::step() {
+    if (core::debug::Debug::enable_print_tensor_stats()) {
+        print_stats();
+    }
+
+    m_steps++;
+    for (auto& [key, first_moment_ptr] : m_first_moment) {
+        const auto& tensor_ptr = m_parameters.at(key);
+        if (!tensor_ptr->is_grad_initialized()) {
+            continue;
+        }
+        auto& second_moment_ptr = m_second_moment.at(key);
+        auto first_moment = first_moment_ptr->get_value(autograd::PreferredPrecision::FULL);
+        auto second_moment = second_moment_ptr->get_value(autograd::PreferredPrecision::FULL);
+
+        const auto& gradients = tensor_ptr->get_grad();
+        if (m_config.weight_decay != 0.0F) {
+            auto weight_decay_update = ttnn::multiply(
+                tensor_ptr->get_value(autograd::PreferredPrecision::FULL), m_config.weight_decay * m_config.lr);
+            // weights -= weight_decay * lr * weights
+            tensor_ptr->set_value(
+                ttnn::subtract(tensor_ptr->get_value(autograd::PreferredPrecision::FULL), weight_decay_update));
+        }
+
+        // first moment = beta1 * first moment + (1 - beta1) * gradients
+        first_moment =
+            ttnn::add(ttnn::multiply(first_moment, m_config.beta1), ttnn::multiply(gradients, 1.F - m_config.beta1));
+        // second moment = beta2 * second moment + (1 - beta2) * gradients^2
+        second_moment = ttnn::add(
+            ttnn::multiply(second_moment, m_config.beta2),
+            ttnn::multiply(ttnn::square(gradients), 1.F - m_config.beta2));
+        // first_moment_hat = first_moment / (1 - beta1^steps)
+        auto first_moment_hat = ttnn::multiply(first_moment, 1.F / (1.F - std::pow(m_config.beta1, m_steps)));
+        // second_moment_hat = second_moment / (1 - beta2^steps)
+        auto second_moment_hat = ttnn::multiply(second_moment, 1.F / (1.F - std::pow(m_config.beta2, m_steps)));
+        // weights -= lr * first_moment_hat / (sqrt(second_moment_hat) + epsilon)
+        first_moment_ptr->set_value(first_moment);
+        second_moment_ptr->set_value(second_moment);
+        tensor_ptr->set_value(ttnn::subtract(
+            tensor_ptr->get_value(autograd::PreferredPrecision::FULL),
+            ttnn_fixed::divide(
+                ttnn::multiply(first_moment_hat, m_config.lr),
+                ttnn::add(ttnn::sqrt(second_moment_hat), m_config.epsilon))));
+    }
+}
+
+[[nodiscard]] autograd::NamedParameters AdamW::get_state_dict() const {
+    autograd::NamedParameters state_dict;
+    for (const auto& [key, first_moment] : m_first_moment) {
+        state_dict.emplace(kFirstMoment + key, first_moment);
+    }
+
+    for (const auto& [key, second_moment] : m_second_moment) {
+        state_dict.emplace(kSecondMoment + key, second_moment);
+    }
+
+    return state_dict;
+}
+
+void AdamW::set_state_dict(const autograd::NamedParameters& dict) {
+    for (const auto& [key, tensor] : dict) {
+        if (key.starts_with(kFirstMoment)) {
+            m_first_moment[key.substr(kFirstMoment.size())] = tensor;
+        } else if (key.starts_with(kSecondMoment)) {
+            m_second_moment[key.substr(kSecondMoment.size())] = tensor;
+        } else {
+            throw std::runtime_error(fmt::format("AdamW: Invalid key in state dict. Key = {}", key));
+        }
+    }
+}
+
+[[nodiscard]] size_t AdamW::get_steps() const {
+    return m_steps;
+}
+
+void AdamW::set_steps(size_t steps) {
+    m_steps = steps;
+}
+
+}  // namespace ttml::optimizers
diff --git a/tt-train/sources/ttml/optimizers/adamw.hpp b/tt-train/sources/ttml/optimizers/adamw.hpp
new file mode 100644
index 00000000000..001b3e5c683
--- /dev/null
+++ b/tt-train/sources/ttml/optimizers/adamw.hpp
@@ -0,0 +1,63 @@
+// SPDX-FileCopyrightText: (c) 2024 Tenstorrent AI ULC
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#include <core/ttnn_all_includes.hpp>
+
+#include "autograd/module_base.hpp"
+#include "optimizer_base.hpp"
+
+namespace ttml::optimizers {
+
+struct AdamWConfig {
+    float lr{1e-3F};
+    float beta1{0.9F};
+    float beta2{0.999F};
+    float epsilon{1e-8F};
+    float weight_decay{0.01F};
+    // TODO: add amsgrad
+};
+
+class MorehAdamW : public OptimizerBase {
+public:
+    MorehAdamW(autograd::NamedParameters parameters, const AdamWConfig& config);
+
+    void zero_grad() override;
+
+    void step() override;
+
+    [[nodiscard]] autograd::NamedParameters get_state_dict() const override;
+    void set_state_dict(const autograd::NamedParameters& dict) override;
+
+    [[nodiscard]] size_t get_steps() const override;
+    void set_steps(size_t steps) override;
+
+private:
+    size_t m_steps{0};
+    AdamWConfig m_config;
+    autograd::NamedParameters m_first_moment;
+    autograd::NamedParameters m_second_moment;
+};
+
+class AdamW : public OptimizerBase {
+public:
+    AdamW(autograd::NamedParameters parameters, const AdamWConfig& config);
+
+    void zero_grad() override;
+
+    void step() override;
+
+    [[nodiscard]] autograd::NamedParameters get_state_dict() const override;
+    void set_state_dict(const autograd::NamedParameters& dict) override;
+
+    [[nodiscard]] size_t get_steps() const override;
+    void set_steps(size_t steps) override;
+
+private:
+    size_t m_steps{0};
+    AdamWConfig m_config;
+    autograd::NamedParameters m_first_moment;
+    autograd::NamedParameters m_second_moment;
+};
+
+}  // namespace ttml::optimizers
diff --git a/tt-train/sources/ttml/optimizers/optimizer_base.cpp b/tt-train/sources/ttml/optimizers/optimizer_base.cpp
new file mode 100644
index 00000000000..446f23d6714
--- /dev/null
+++ b/tt-train/sources/ttml/optimizers/optimizer_base.cpp
@@ -0,0 +1,25 @@
+// SPDX-FileCopyrightText: (c) 2024 Tenstorrent AI ULC
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#include "optimizer_base.hpp"
+
+#include "core/tt_tensor_utils.hpp"
+
+namespace ttml::optimizers {
+
+OptimizerBase::OptimizerBase(autograd::NamedParameters&& parameters) : m_parameters(std::move(parameters)) {
+}
+
+void OptimizerBase::print_stats() const {
+    fmt::print("\n\nOptimization parameters values and gradients:\n");
+    for (const auto& [name, tensor] : m_parameters) {
+        core::print_tensor_stats(tensor->get_value(), fmt::format("{}/value", name));
+        if (tensor->is_grad_initialized()) {
+            core::print_tensor_stats(tensor->get_grad(), fmt::format("{}/gradient", name));
+        }
+    }
+    fmt::print("=================================================\n");
+}
+
+}  // namespace ttml::optimizers
diff --git a/tt-train/sources/ttml/optimizers/optimizer_base.hpp b/tt-train/sources/ttml/optimizers/optimizer_base.hpp
new file mode 100644
index 00000000000..49f1f4a32aa
--- /dev/null
+++ b/tt-train/sources/ttml/optimizers/optimizer_base.hpp
@@ -0,0 +1,36 @@
+// SPDX-FileCopyrightText: (c) 2024 Tenstorrent AI ULC
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#pragma once
+
+#include "autograd/module_base.hpp"
+
+namespace ttml::optimizers {
+
+class OptimizerBase {
+public:
+    explicit OptimizerBase(autograd::NamedParameters&& parameters);
+    OptimizerBase(const OptimizerBase&) = delete;
+    OptimizerBase& operator=(const OptimizerBase&) = delete;
+    OptimizerBase(OptimizerBase&&) = delete;
+    OptimizerBase& operator=(OptimizerBase&&) = delete;
+    virtual ~OptimizerBase() = default;
+
+    virtual void zero_grad() = 0;
+
+    virtual void step() = 0;
+
+    [[nodiscard]] virtual autograd::NamedParameters get_state_dict() const = 0;
+    virtual void set_state_dict(const autograd::NamedParameters& dict) = 0;
+
+    [[nodiscard]] virtual size_t get_steps() const = 0;
+    virtual void set_steps(size_t steps) = 0;
+
+    virtual void print_stats() const;
+
+protected:
+    autograd::NamedParameters m_parameters;
+};
+
+}  // namespace ttml::optimizers
diff --git a/tt-train/sources/ttml/optimizers/sgd.cpp b/tt-train/sources/ttml/optimizers/sgd.cpp
new file mode 100644
index 00000000000..0e25feb95fe
--- /dev/null
+++ b/tt-train/sources/ttml/optimizers/sgd.cpp
@@ -0,0 +1,98 @@
+// SPDX-FileCopyrightText: (c) 2024 Tenstorrent AI ULC
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#include "sgd.hpp"
+
+#include <fmt/format.h>
+
+#include "autograd/autocast_tensor.hpp"
+#include "core/debug.hpp"
+#include "core/tt_tensor_utils.hpp"
+
+namespace ttml::optimizers {
+
+SGD::SGD(ttml::autograd::NamedParameters parameters, const SGDConfig& config) :
+    OptimizerBase(std::move(parameters)), m_config(config) {
+    for (const auto& [name, tensor_ptr] : m_parameters) {
+        if (tensor_ptr->get_requires_grad()) {
+            m_theta.emplace(
+                name,
+                autograd::create_tensor(
+                    core::zeros_like(tensor_ptr->get_value(autograd::PreferredPrecision::FULL)),
+                    /* requires_grad */ false));
+        }
+    }
+}
+
+void SGD::zero_grad() {
+    for (auto& [name, tensor_ptr] : m_parameters) {
+        if (tensor_ptr->get_requires_grad() && tensor_ptr->is_grad_initialized()) {
+            tensor_ptr->set_grad(core::zeros_like(tensor_ptr->get_value()));
+        }
+    }
+}
+
+void SGD::step() {
+    if (core::debug::Debug::enable_print_tensor_stats()) {
+        print_stats();
+    }
+
+    for (auto& [name, theta_ptr] : m_theta) {
+        auto theta = theta_ptr->get_value(autograd::PreferredPrecision::FULL);
+        const auto& tensor_ptr = m_parameters.at(name);
+        if (!tensor_ptr->is_grad_initialized()) {
+            continue;
+        }
+
+        auto gradients = tensor_ptr->get_grad();
+        if (m_config.weight_decay != 0.0F) {
+            gradients = ttnn::add(
+                ttnn::multiply(tensor_ptr->get_value(autograd::PreferredPrecision::FULL), m_config.weight_decay),
+                gradients);
+        }
+
+        if (m_config.momentum != 0.0F) {
+            if (steps != 0) {
+                // apply momentum
+                theta = ttnn::multiply(theta, m_config.momentum);
+                // dampening
+                if (m_config.dampening != 0.0F) {
+                    theta = ttnn::add(theta, ttnn::multiply(gradients, 1 - m_config.dampening));
+                } else {
+                    theta = ttnn::add(theta, gradients);
+                }
+            } else {
+                theta = ttnn::add(theta, gradients);
+            }
+
+            if (m_config.nesterov) {
+                gradients = ttnn::add(gradients, ttnn::multiply(theta, m_config.momentum));
+            } else {
+                gradients = theta;
+            }
+        }
+        theta_ptr->set_value(theta);
+        tensor_ptr->set_value(ttnn::subtract(
+            tensor_ptr->get_value(autograd::PreferredPrecision::FULL), ttnn::multiply(gradients, m_config.lr)));
+    }
+    steps++;
+}
+
+autograd::NamedParameters SGD::get_state_dict() const {
+    return m_theta;
+}
+
+void SGD::set_state_dict(const autograd::NamedParameters& dict) {
+    m_theta = dict;
+}
+
+size_t SGD::get_steps() const {
+    return steps;
+}
+
+void SGD::set_steps(size_t steps) {
+    this->steps = steps;
+}
+
+}  // namespace ttml::optimizers
diff --git a/tt-train/sources/ttml/optimizers/sgd.hpp b/tt-train/sources/ttml/optimizers/sgd.hpp
new file mode 100644
index 00000000000..756facdf26c
--- /dev/null
+++ b/tt-train/sources/ttml/optimizers/sgd.hpp
@@ -0,0 +1,44 @@
+// SPDX-FileCopyrightText: (c) 2024 Tenstorrent AI ULC
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#pragma once
+
+#include <ttnn/tensor/tensor.hpp>
+
+#include "autograd/module_base.hpp"
+#include "autograd/tensor.hpp"
+#include "core/tt_tensor_utils.hpp"
+#include "optimizers/optimizer_base.hpp"
+
+namespace ttml::optimizers {
+
+struct SGDConfig {
+    float lr{1e-3F};
+    float momentum{0.0F};
+    float dampening{0.0F};
+    float weight_decay{0.0F};
+    bool nesterov{false};
+};
+
+class SGD : public OptimizerBase {
+public:
+    explicit SGD(ttml::autograd::NamedParameters parameters, const SGDConfig& config);
+
+    void zero_grad() override;
+
+    void step() override;
+
+    [[nodiscard]] autograd::NamedParameters get_state_dict() const override;
+    void set_state_dict(const autograd::NamedParameters& dict) override;
+
+    [[nodiscard]] size_t get_steps() const override;
+    void set_steps(size_t steps) override;
+
+private:
+    size_t steps{0};
+    SGDConfig m_config;
+    ttml::autograd::NamedParameters m_theta;
+};
+
+}  // namespace ttml::optimizers
diff --git a/tt-train/sources/ttml/serialization/msgpack_file.cpp b/tt-train/sources/ttml/serialization/msgpack_file.cpp
new file mode 100644
index 00000000000..42fb0b53378
--- /dev/null
+++ b/tt-train/sources/ttml/serialization/msgpack_file.cpp
@@ -0,0 +1,398 @@
+// SPDX-FileCopyrightText: (c) 2024 Tenstorrent AI ULC
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#include "msgpack_file.hpp"
+
+#include <fmt/format.h>
+
+#include <exception>
+#include <fstream>
+#define MSGPACK_NO_BOOST
+#include <fstream>
+#include <msgpack.hpp>
+#include <optional>
+#include <stdexcept>
+#include <string>
+#include <unordered_map>
+#include <variant>
+#include <vector>
+
+namespace msgpack {
+MSGPACK_API_VERSION_NAMESPACE(MSGPACK_DEFAULT_API_NS) {
+    namespace adaptor {
+
+    // Custom adaptor for std::variant
+    template <typename... Types>
+    struct pack<std::variant<Types...>> {
+        template <typename Stream>
+        packer<Stream>& operator()(msgpack::packer<Stream>& o, const std::variant<Types...>& v) const {
+            // Pack the index of the active type and the value
+            o.pack_array(2);
+            o.pack(v.index());
+            std::visit([&o](const auto& val) { o.pack(val); }, v);
+            return o;
+        }
+    };
+
+    template <typename... Types>
+    struct convert<std::variant<Types...>> {
+        msgpack::object const& operator()(msgpack::object const& o, std::variant<Types...>& v) const {
+            if (o.type != msgpack::type::ARRAY || o.via.array.size != 2) {
+                throw std::runtime_error(
+                    "Invalid object type. Expected array of size 2. Where first value is the  type index and second is "
+                    "our object.");
+            }
+
+            std::size_t index = o.via.array.ptr[0].as<std::size_t>();
+
+            auto& obj = o.via.array.ptr[1];
+
+            // Helper lambda to set the variant based on index
+            bool success = set_variant_by_index(index, obj, v);
+            if (!success) {
+                throw std::runtime_error(fmt::format(
+                    "Cannot convert object to variant. Possible reason: type mismatch. Object index: {}", index));
+            }
+
+            return o;
+        }
+
+    private:
+        template <std::size_t N = 0>
+        bool set_variant_by_index(std::size_t index, msgpack::object const& obj, std::variant<Types...>& v) const {
+            if constexpr (N < sizeof...(Types)) {
+                if (index == N) {
+                    using T = std::variant_alternative_t<N, std::variant<Types...>>;
+                    T val;
+                    obj.convert(val);
+                    v = std::move(val);
+                    return true;
+                } else {
+                    return set_variant_by_index<N + 1>(index, obj, v);
+                }
+            } else {
+                throw std::runtime_error(fmt::format("Invalid index for variant type. Index: {}", index));
+            }
+        }
+    };
+
+    }  // namespace adaptor
+}  // namespace MSGPACK_API_VERSION_NAMESPACE(MSGPACK_DEFAULT_API_NS)
+}  // namespace msgpack
+
+namespace ttml::serialization {
+class MsgPackFile::Impl {
+public:
+    // Methods to store different types
+    void put(std::string_view key, bool value) {
+        m_data[std::string(key)] = value;
+    }
+
+    void put(std::string_view key, char value) {
+        m_data[std::string(key)] = value;
+    }
+
+    void put(std::string_view key, int value) {
+        m_data[std::string(key)] = value;
+    }
+
+    void put(std::string_view key, float value) {
+        m_data[std::string(key)] = value;
+    }
+
+    void put(std::string_view key, double value) {
+        m_data[std::string(key)] = value;
+    }
+
+    void put(std::string_view key, uint32_t value) {
+        m_data[std::string(key)] = value;
+    }
+
+    void put(std::string_view key, size_t value) {
+        m_data[std::string(key)] = value;
+    }
+
+    void put(std::string_view key, const std::string& value) {
+        m_data[std::string(key)] = value;
+    }
+
+    void put(std::string_view key, std::string_view value) {
+        m_data[std::string(key)] = std::string(value);
+    }
+
+    // Overloads for std::span
+    void put(std::string_view key, std::span<const int> value) {
+        m_data[std::string(key)] = std::vector<int>(value.begin(), value.end());
+    }
+
+    void put(std::string_view key, std::span<const float> value) {
+        m_data[std::string(key)] = std::vector<float>(value.begin(), value.end());
+    }
+
+    void put(std::string_view key, std::span<const double> value) {
+        m_data[std::string(key)] = std::vector<double>(value.begin(), value.end());
+    }
+
+    void put(std::string_view key, std::span<const uint32_t> value) {
+        m_data[std::string(key)] = std::vector<uint32_t>(value.begin(), value.end());
+    }
+
+    void put(std::string_view key, std::span<const std::string> value) {
+        m_data[std::string(key)] = std::vector<std::string>(value.begin(), value.end());
+    }
+
+    // Serialization method
+    void serialize(const std::string& filename) {
+        // Create a buffer for packing
+        msgpack::sbuffer sbuf;
+
+        // Pack the data into the buffer
+        msgpack::pack(sbuf, m_data);
+
+        // Write the buffer to a file
+        std::ofstream ofs(filename, std::ios::binary);
+        if (ofs.is_open()) {
+            ofs.write(sbuf.data(), static_cast<std::streamsize>(sbuf.size()));
+            ofs.close();
+        } else {
+            throw std::runtime_error("Unable to open file for writing: " + filename);
+        }
+    }
+
+    // Deserialization method
+    void deserialize(const std::string& filename) {
+        // Read the file content into a string buffer
+        std::ifstream ifs(filename, std::ios::binary);
+        if (!ifs.is_open()) {
+            throw std::runtime_error("Unable to open file for reading: " + filename);
+        }
+        std::string buffer((std::istreambuf_iterator<char>(ifs)), std::istreambuf_iterator<char>());
+        ifs.close();
+
+        // Unpack the buffer into msgpack object
+        msgpack::object_handle handle = msgpack::unpack(buffer.data(), buffer.size());
+
+        // Convert the msgpack object to the desired type
+        msgpack::object obj = handle.get();
+
+        // Clear existing data
+        m_data.clear();
+
+        // Convert object to m_data
+        obj.convert(m_data);
+    }
+
+    // Methods to get values
+    bool get(std::string_view key, bool& value) const {
+        return get_value(key, value);
+    }
+
+    bool get(std::string_view key, char& value) const {
+        return get_value(key, value);
+    }
+
+    bool get(std::string_view key, int& value) const {
+        return get_value(key, value);
+    }
+
+    bool get(std::string_view key, float& value) const {
+        return get_value(key, value);
+    }
+
+    bool get(std::string_view key, double& value) const {
+        return get_value(key, value);
+    }
+
+    bool get(std::string_view key, uint32_t& value) const {
+        return get_value(key, value);
+    }
+
+    bool get(std::string_view key, size_t& value) const {
+        return get_value(key, value);
+    }
+
+    bool get(std::string_view key, std::string& value) const {
+        return get_value(key, value);
+    }
+
+    bool get(std::string_view key, std::vector<int>& value) const {
+        return get_value(key, value);
+    }
+
+    bool get(std::string_view key, std::vector<float>& value) const {
+        return get_value(key, value);
+    }
+
+    bool get(std::string_view key, std::vector<double>& value) const {
+        return get_value(key, value);
+    }
+
+    bool get(std::string_view key, std::vector<uint32_t>& value) const {
+        return get_value(key, value);
+    }
+
+    bool get(std::string_view key, std::vector<std::string>& value) const {
+        return get_value(key, value);
+    }
+
+private:
+    using ValueType = std::variant<
+        bool,
+        char,
+        int,
+        float,
+        double,
+        uint32_t,
+        size_t,
+        std::string,
+        std::vector<char>,
+        std::vector<int>,
+        std::vector<float>,
+        std::vector<double>,
+        std::vector<uint32_t>,
+        std::vector<std::string>>;
+
+    std::unordered_map<std::string, ValueType> m_data;
+
+    // Helper function to get value from m_data
+    template <typename T>
+    bool get_value(std::string_view key, T& value) const {
+        auto it = m_data.find(std::string(key));
+        if (it != m_data.end()) {
+            if (const auto* pval = std::get_if<T>(&(it->second))) {
+                value = *pval;
+                return true;
+            } else {
+                throw std::runtime_error(fmt::format("Type mismatch for key: {}", key));
+            }
+        } else {
+            // Key not found
+            throw std::runtime_error(fmt::format("Key not found: {}", key));
+        }
+    }
+};
+
+MsgPackFile::MsgPackFile() : m_impl(std::make_unique<Impl>()) {
+}
+
+MsgPackFile::~MsgPackFile() = default;
+
+MsgPackFile::MsgPackFile(MsgPackFile&&) noexcept = default;
+
+void MsgPackFile::put(std::string_view key, bool value) {
+    m_impl->put(key, value);
+}
+
+void MsgPackFile::put(std::string_view key, char value) {
+    m_impl->put(key, value);
+}
+
+void MsgPackFile::put(std::string_view key, int value) {
+    m_impl->put(key, value);
+}
+
+void MsgPackFile::put(std::string_view key, float value) {
+    m_impl->put(key, value);
+}
+
+void MsgPackFile::put(std::string_view key, double value) {
+    m_impl->put(key, value);
+}
+
+void MsgPackFile::put(std::string_view key, uint32_t value) {
+    m_impl->put(key, value);
+}
+
+void MsgPackFile::put(std::string_view key, size_t value) {
+    m_impl->put(key, value);
+}
+
+void MsgPackFile::put(std::string_view key, std::string_view value) {
+    m_impl->put(key, value);
+}
+
+void MsgPackFile::put(std::string_view key, std::span<const int> value) {
+    m_impl->put(key, value);
+}
+
+void MsgPackFile::put(std::string_view key, std::span<const float> value) {
+    m_impl->put(key, value);
+}
+
+void MsgPackFile::put(std::string_view key, std::span<const double> value) {
+    m_impl->put(key, value);
+}
+
+void MsgPackFile::put(std::string_view key, std::span<const uint32_t> value) {
+    m_impl->put(key, value);
+}
+
+void MsgPackFile::put(std::string_view key, std::span<const std::string> value) {
+    m_impl->put(key, value);
+}
+
+void MsgPackFile::serialize(const std::string& filename) {
+    m_impl->serialize(filename);
+}
+
+void MsgPackFile::deserialize(const std::string& filename) {
+    m_impl->deserialize(filename);
+}
+
+void MsgPackFile::get(std::string_view key, bool& value) const {
+    m_impl->get(key, value);
+}
+
+void MsgPackFile::get(std::string_view key, char& value) const {
+    m_impl->get(key, value);
+}
+
+void MsgPackFile::get(std::string_view key, int& value) const {
+    m_impl->get(key, value);
+}
+
+void MsgPackFile::get(std::string_view key, float& value) const {
+    m_impl->get(key, value);
+}
+
+void MsgPackFile::get(std::string_view key, double& value) const {
+    m_impl->get(key, value);
+}
+
+void MsgPackFile::get(std::string_view key, uint32_t& value) const {
+    m_impl->get(key, value);
+}
+
+void MsgPackFile::get(std::string_view key, size_t& value) const {
+    m_impl->get(key, value);
+}
+
+void MsgPackFile::get(std::string_view key, std::string& value) const {
+    m_impl->get(key, value);
+}
+
+void MsgPackFile::get(std::string_view key, std::vector<int>& value) const {
+    m_impl->get(key, value);
+}
+
+void MsgPackFile::get(std::string_view key, std::vector<float>& value) const {
+    m_impl->get(key, value);
+}
+
+void MsgPackFile::get(std::string_view key, std::vector<double>& value) const {
+    m_impl->get(key, value);
+}
+
+void MsgPackFile::get(std::string_view key, std::vector<uint32_t>& value) const {
+    m_impl->get(key, value);
+}
+
+void MsgPackFile::get(std::string_view key, std::vector<std::string>& value) const {
+    m_impl->get(key, value);
+}
+
+void MsgPackFile::put(std::string_view key, const char* value) {
+    put(key, std::string_view(value));
+}
+}  // namespace ttml::serialization
diff --git a/tt-train/sources/ttml/serialization/msgpack_file.hpp b/tt-train/sources/ttml/serialization/msgpack_file.hpp
new file mode 100644
index 00000000000..19f36f6cca9
--- /dev/null
+++ b/tt-train/sources/ttml/serialization/msgpack_file.hpp
@@ -0,0 +1,80 @@
+// SPDX-FileCopyrightText: (c) 2024 Tenstorrent AI ULC
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#pragma once
+
+#include <cstdint>
+#include <memory>
+#include <span>
+#include <string>
+#include <string_view>
+#include <vector>
+
+namespace ttml::serialization {
+
+class MsgPackFile {
+public:
+    MsgPackFile();
+    ~MsgPackFile();
+
+    // Copy constructor
+    MsgPackFile(const MsgPackFile& other) = delete;
+
+    // Copy assignment operator
+    MsgPackFile& operator=(const MsgPackFile& other) = delete;
+
+    // Move constructor
+    MsgPackFile(MsgPackFile&& other) noexcept;
+
+    // Move assignment operator
+    MsgPackFile& operator=(MsgPackFile&& other) = delete;
+
+    // Methods to put different types
+    void put(std::string_view key, bool value);
+    void put(std::string_view key, char value);
+    void put(std::string_view key, int value);
+    void put(std::string_view key, float value);
+    void put(std::string_view key, double value);
+    void put(std::string_view key, uint32_t value);
+    void put(std::string_view key, size_t value);
+    void put(std::string_view key, std::string_view value);
+
+    // added it to prevent implicit casts from const char* to bool
+    void put(std::string_view key, const char* value);
+
+    // Overloads for std::span
+    void put(std::string_view key, std::span<const int> value);
+    void put(std::string_view key, std::span<const float> value);
+    void put(std::string_view key, std::span<const double> value);
+    void put(std::string_view key, std::span<const uint32_t> value);
+    void put(std::string_view key, std::span<const std::string> value);
+
+    // Serialization method
+    void serialize(const std::string& filename);
+
+    // Deserialization method
+    void deserialize(const std::string& filename);
+
+    // Methods to get values
+    void get(std::string_view key, bool& value) const;
+    void get(std::string_view key, char& value) const;
+    void get(std::string_view key, int& value) const;
+    void get(std::string_view key, float& value) const;
+    void get(std::string_view key, double& value) const;
+    void get(std::string_view key, uint32_t& value) const;
+    void get(std::string_view key, size_t& value) const;
+    void get(std::string_view key, std::string& value) const;
+
+    // Methods to get vectors (from spans)
+    void get(std::string_view key, std::vector<int>& value) const;
+    void get(std::string_view key, std::vector<float>& value) const;
+    void get(std::string_view key, std::vector<double>& value) const;
+    void get(std::string_view key, std::vector<uint32_t>& value) const;
+    void get(std::string_view key, std::vector<std::string>& value) const;
+
+private:
+    class Impl;
+    std::unique_ptr<Impl> m_impl;
+};
+}  // namespace ttml::serialization
diff --git a/tt-train/sources/ttml/serialization/serialization.cpp b/tt-train/sources/ttml/serialization/serialization.cpp
new file mode 100644
index 00000000000..d96e26f014f
--- /dev/null
+++ b/tt-train/sources/ttml/serialization/serialization.cpp
@@ -0,0 +1,174 @@
+// SPDX-FileCopyrightText: (c) 2024 Tenstorrent AI ULC
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#include "serialization.hpp"
+
+#include <core/ttnn_all_includes.hpp>
+#include <cstdint>
+#include <ttnn/tensor/types.hpp>
+
+#include "autograd/auto_context.hpp"
+#include "autograd/module_base.hpp"
+#include "core/system_utils.hpp"
+#include "core/tt_tensor_utils.hpp"
+#include "msgpack_file.hpp"
+#include "optimizers/optimizer_base.hpp"
+#include "optimizers/sgd.hpp"
+namespace ttml::serialization {
+
+// demangle type name
+
+// trivial type to the std::string
+template <typename T>
+std::string to_bytes(const T& value) {
+    static_assert(std::is_trivially_copyable<T>::value, "T must be trivially copyable");
+    std::string bytes(sizeof(T), '\0');
+    std::memcpy(bytes.data(), &value, sizeof(T));
+    return bytes;
+}
+
+template <typename T>
+void from_bytes(const std::string& bytes, T& value) {
+    static_assert(std::is_trivially_copyable<T>::value, "T must be trivially copyable");
+
+    if (bytes.size() != sizeof(T)) {
+        throw std::invalid_argument(fmt::format(
+            "Invalid byte size for conversion to type T. Expected: {} Actual: {}, type: {} ",
+            sizeof(T),
+            bytes.size(),
+            core::demangle(typeid(T).name())));
+    }
+    std::memcpy(&value, bytes.data(), sizeof(T));
+}
+
+template <typename T>
+void get_enum(MsgPackFile& file, std::string_view name, T& value) {
+    int int_value = 0;
+    file.get(std::string(name), int_value);
+    value = static_cast<T>(int_value);
+}
+
+void write_ttnn_tensor(MsgPackFile& file, std::string_view name, const tt::tt_metal::Tensor& tensor) {
+    auto shape = tensor.get_shape();
+    auto data_type = tensor.get_dtype();
+    auto layout = tensor.get_layout();
+    auto storage_type = tensor.storage_type();
+
+    file.put(std::string(name) + "/shape", to_bytes(shape));
+    file.put(std::string(name) + "/data_type", static_cast<int>(data_type));
+    file.put(std::string(name) + "/layout", static_cast<int>(layout));
+    file.put(std::string(name) + "/storage_type", static_cast<int>(storage_type));
+
+    if (data_type == tt::tt_metal::DataType::BFLOAT16) {
+        auto data = ttml::core::to_vector<float>(tensor);
+        file.put(std::string(name) + "/data", std::span<const float>(data.data(), data.size()));
+    } else if (data_type == tt::tt_metal::DataType::UINT32) {
+        auto data = ttml::core::to_vector<uint32_t>(tensor);
+        file.put(std::string(name) + "/data", std::span<const uint32_t>(data.data(), data.size()));
+    } else {
+        throw std::runtime_error(fmt::format("Unsupported data type: {}", magic_enum::enum_name(data_type)));
+    }
+}
+
+void read_ttnn_tensor(MsgPackFile& file, std::string_view name, tt::tt_metal::Tensor& tensor) {
+    tt::tt_metal::DataType data_type{};
+    tt::tt_metal::Layout layout{};
+    tt::tt_metal::StorageType storage_type{};
+
+    auto shape = core::create_shape({1, 1, 1, 1});
+    std::string bytes;
+    file.get(std::string(name) + "/shape", bytes);
+    from_bytes<ttnn::Shape>(bytes, shape);
+
+    get_enum(file, std::string(name) + "/data_type", data_type);
+    get_enum(file, std::string(name) + "/layout", layout);
+    get_enum(file, std::string(name) + "/storage_type", storage_type);
+
+    if (data_type == tt::tt_metal::DataType::BFLOAT16) {
+        std::vector<float> data;
+        file.get(std::string(name) + "/data", data);
+        tensor = core::from_vector(data, shape, &ttml::autograd::ctx().get_device(), layout);
+    } else if (data_type == tt::tt_metal::DataType::UINT32) {
+        std::vector<uint32_t> data;
+        file.get(std::string(name) + "/data", data);
+        tensor =
+            core::from_vector<uint32_t, DataType::UINT32>(data, shape, &ttml::autograd::ctx().get_device(), layout);
+    } else {
+        throw std::runtime_error(fmt::format("Unsupported data type: {}", magic_enum::enum_name(data_type)));
+    }
+}
+
+void write_autograd_tensor(
+    MsgPackFile& file, std::string_view name, const ttml::autograd::TensorPtr& tensor, bool save_grads) {
+    write_ttnn_tensor(file, std::string(name) + "/value", tensor->get_value());
+    auto& grad = tensor->get_grad();
+    bool has_grads = save_grads && core::is_tensor_initialized(grad);
+    file.put(std::string(name) + "/requires_grads", tensor->get_requires_grad());
+    file.put(std::string(name) + "/has_grads", has_grads);
+    if (has_grads) {
+        write_ttnn_tensor(file, std::string(name) + "/grad", tensor->get_grad());
+    }
+}
+
+void read_autograd_tensor(MsgPackFile& file, std::string_view name, ttml::autograd::TensorPtr& tensor) {
+    tt::tt_metal::Tensor value;
+    bool has_grads = false;
+    bool requires_grads = false;
+    read_ttnn_tensor(file, std::string(name) + "/value", value);
+    tensor->set_value(value);
+    file.get(std::string(name) + "/requires_grads", requires_grads);
+    file.get(std::string(name) + "/has_grads", has_grads);
+    tensor->set_requires_grad(requires_grads);
+    if (has_grads) {
+        tt::tt_metal::Tensor grad;
+        read_ttnn_tensor(file, std::string(name) + "/grad", grad);
+        tensor->set_grad(grad);
+    }
+}
+
+void write_named_parameters(MsgPackFile& file, std::string_view name, const ttml::autograd::NamedParameters& params) {
+    for (const auto& [key, value] : params) {
+        write_autograd_tensor(file, std::string(name) + "/" + key, value);
+    }
+}
+void read_named_parameters(MsgPackFile& file, std::string_view name, ttml::autograd::NamedParameters& params) {
+    for (auto& [key, value] : params) {
+        read_autograd_tensor(file, std::string(name) + "/" + key, value);
+    }
+}
+
+void write_optimizer(MsgPackFile& file, std::string_view name, const optimizers::OptimizerBase* optimizer) {
+    assert(optimizer);
+    auto state_dict = optimizer->get_state_dict();
+    for (const auto& [key, value] : state_dict) {
+        ttml::serialization::write_autograd_tensor(file, std::string(name) + "/" + key, value);
+    }
+    file.put(std::string(name) + "/steps", optimizer->get_steps());
+}
+
+void read_optimizer(MsgPackFile& file, std::string_view name, optimizers::OptimizerBase* optimizer) {
+    assert(optimizer);
+    size_t steps = 0;
+    auto state_dict = optimizer->get_state_dict();
+    for (auto& [key, value] : state_dict) {
+        ttml::serialization::read_autograd_tensor(file, std::string(name) + "/" + key, value);
+    }
+    optimizer->set_state_dict(state_dict);
+    file.get(std::string(name) + "/steps", steps);
+    optimizer->set_steps(steps);
+}
+
+void write_module(MsgPackFile& file, std::string_view name, const autograd::ModuleBase* module) {
+    assert(module);
+    auto named_parameters = module->parameters();
+    write_named_parameters(file, name, named_parameters);
+}
+
+void read_module(MsgPackFile& file, std::string_view name, autograd::ModuleBase* module) {
+    assert(module);
+    auto named_parameters = module->parameters();
+    read_named_parameters(file, name, named_parameters);
+}
+
+}  // namespace ttml::serialization
diff --git a/tt-train/sources/ttml/serialization/serialization.hpp b/tt-train/sources/ttml/serialization/serialization.hpp
new file mode 100644
index 00000000000..617d89e878a
--- /dev/null
+++ b/tt-train/sources/ttml/serialization/serialization.hpp
@@ -0,0 +1,35 @@
+// SPDX-FileCopyrightText: (c) 2024 Tenstorrent AI ULC
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#pragma once
+
+#include <string_view>
+
+#include "autograd/module_base.hpp"
+#include "autograd/tensor.hpp"
+#include "core/ttnn_fwd.hpp"
+
+namespace ttml::optimizers {
+class OptimizerBase;
+}
+namespace ttml::serialization {
+class MsgPackFile;
+
+void write_ttnn_tensor(MsgPackFile& file, std::string_view name, const tt::tt_metal::Tensor& tensor);
+void read_ttnn_tensor(MsgPackFile& file, std::string_view name, tt::tt_metal::Tensor& tensor);
+
+void write_autograd_tensor(
+    MsgPackFile& file, std::string_view name, const ttml::autograd::TensorPtr& tensor, bool save_grads = false);
+void read_autograd_tensor(MsgPackFile& file, std::string_view name, ttml::autograd::TensorPtr& tensor);
+
+void write_named_parameters(MsgPackFile& file, std::string_view name, const ttml::autograd::NamedParameters& params);
+void read_named_parameters(MsgPackFile& file, std::string_view name, ttml::autograd::NamedParameters& params);
+
+void write_optimizer(MsgPackFile& file, std::string_view name, const optimizers::OptimizerBase* optimizer);
+void read_optimizer(MsgPackFile& file, std::string_view name, optimizers::OptimizerBase* optimizer);
+
+void write_module(MsgPackFile& file, std::string_view name, const autograd::ModuleBase* module);
+void read_module(MsgPackFile& file, std::string_view name, autograd::ModuleBase* module);
+
+}  // namespace ttml::serialization
diff --git a/tt-train/sources/ttml/tokenizers/bpe_tokenizer.cpp b/tt-train/sources/ttml/tokenizers/bpe_tokenizer.cpp
new file mode 100644
index 00000000000..aad9d6c14fa
--- /dev/null
+++ b/tt-train/sources/ttml/tokenizers/bpe_tokenizer.cpp
@@ -0,0 +1,86 @@
+// SPDX-FileCopyrightText: (c) 2024 Tenstorrent AI ULC
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#include "bpe_tokenizer.hpp"
+
+#include <fmt/format.h>
+#include <tokenizers_cpp.h>
+
+#include <fstream>
+#include <string>
+
+namespace {
+
+std::string load_bytes_from_file(const std::string& path) {
+    std::ifstream file_stream(path, std::ios::in | std::ios::binary);
+    if (!file_stream.is_open()) {
+        throw std::runtime_error(fmt::format("Failed to open file. Path: {}\n", path));
+    }
+    std::string data;
+    file_stream.seekg(0, std::ios::end);
+    auto size = file_stream.tellg();
+    file_stream.seekg(0, std::ios::beg);
+    data.resize(size);
+    file_stream.read(data.data(), size);
+    return data;
+}
+
+using HuggingFaceTokenizer = tokenizers::Tokenizer;
+
+}  // namespace
+
+namespace ttml::tokenizers {
+
+class BPETokenizer::BPETokenizerImpl {
+public:
+    explicit BPETokenizerImpl(const std::string& json_file) {
+        auto blob = load_bytes_from_file(json_file);
+        m_tokenizer = HuggingFaceTokenizer::FromBlobJSON(blob);
+    }
+    ~BPETokenizerImpl() = default;
+    BPETokenizerImpl(const BPETokenizerImpl&) = delete;
+    BPETokenizerImpl& operator=(const BPETokenizerImpl&) = delete;
+    BPETokenizerImpl(BPETokenizerImpl&&) = default;
+    BPETokenizerImpl& operator=(BPETokenizerImpl&&) = default;
+
+    [[nodiscard]] std::vector<uint32_t> encode(const std::string& text) const {
+        std::vector<int32_t> results = m_tokenizer->Encode(text);
+        // we currently use uint32_t for tokens, might change in the future
+        return {results.begin(), results.end()};
+    }
+
+    [[nodiscard]] std::string decode(const std::vector<uint32_t>& tokens) const {
+        const std::vector<int32_t> tokens_i32(tokens.begin(), tokens.end());
+        return m_tokenizer->Decode(tokens_i32);
+    }
+
+    [[nodiscard]] uint32_t get_vocab_size() const {
+        return m_tokenizer->GetVocabSize();
+    }
+
+private:
+    std::unique_ptr<HuggingFaceTokenizer> m_tokenizer;
+};
+
+BPETokenizer::BPETokenizer(const std::string& json_file) {
+    m_pimpl = std::make_unique<BPETokenizerImpl>(json_file);
+}
+
+BPETokenizer::~BPETokenizer() = default;
+BPETokenizer::BPETokenizer(BPETokenizer&&) noexcept = default;
+BPETokenizer& BPETokenizer::operator=(BPETokenizer&&) noexcept = default;
+
+std::vector<uint32_t> BPETokenizer::encode(const std::string& text) const {
+    return m_pimpl->encode(text);
+}
+
+std::string BPETokenizer::decode(const std::vector<uint32_t>& tokens) const {
+    return m_pimpl->decode(tokens);
+}
+
+uint32_t BPETokenizer::get_vocab_size() const {
+    return m_pimpl->get_vocab_size();
+}
+
+}  // namespace ttml::tokenizers
diff --git a/tt-train/sources/ttml/tokenizers/bpe_tokenizer.hpp b/tt-train/sources/ttml/tokenizers/bpe_tokenizer.hpp
new file mode 100644
index 00000000000..7e86ef52222
--- /dev/null
+++ b/tt-train/sources/ttml/tokenizers/bpe_tokenizer.hpp
@@ -0,0 +1,31 @@
+// SPDX-FileCopyrightText: (c) 2024 Tenstorrent AI ULC
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#pragma once
+
+#include <cstdint>
+
+#include "tokenizer_base.hpp"
+
+namespace ttml::tokenizers {
+
+class BPETokenizer : public TokenizerBase {
+public:
+    explicit BPETokenizer(const std::string& json_file);
+    ~BPETokenizer() override;
+    BPETokenizer(const BPETokenizer&) = delete;
+    BPETokenizer& operator=(const BPETokenizer&) = delete;
+    BPETokenizer(BPETokenizer&&) noexcept;
+    BPETokenizer& operator=(BPETokenizer&&) noexcept;
+
+    [[nodiscard]] std::vector<uint32_t> encode(const std::string& text) const override;
+    [[nodiscard]] std::string decode(const std::vector<uint32_t>& tokens) const override;
+    [[nodiscard]] uint32_t get_vocab_size() const;
+
+private:
+    class BPETokenizerImpl;
+    std::unique_ptr<BPETokenizerImpl> m_pimpl;
+};
+
+}  // namespace ttml::tokenizers
diff --git a/tt-train/sources/ttml/tokenizers/char_tokenizer.cpp b/tt-train/sources/ttml/tokenizers/char_tokenizer.cpp
new file mode 100644
index 00000000000..1294e93b97f
--- /dev/null
+++ b/tt-train/sources/ttml/tokenizers/char_tokenizer.cpp
@@ -0,0 +1,58 @@
+// SPDX-FileCopyrightText: (c) 2024 Tenstorrent AI ULC
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#include "char_tokenizer.hpp"
+
+#include <sstream>
+#include <stdexcept>
+
+namespace ttml::tokenizers {
+CharTokenizer::CharTokenizer(Vocabulary vocabulary) : m_vocabulary(std::move(vocabulary)) {
+    auto vocab_size = static_cast<uint32_t>(m_vocabulary.size());
+    m_vocabulary[BEGIN_TOKEN] = vocab_size++;
+    m_vocabulary[END_TOKEN] = vocab_size++;
+    build_reverse_mapping();
+}
+
+std::vector<uint32_t> CharTokenizer::encode(const std::string& text) const {
+    std::vector<uint32_t> tokens;
+    for (char chr : text) {
+        auto chr_str = std::string(1, chr);
+        auto it = m_vocabulary.find(chr_str);
+        if (it != m_vocabulary.end()) {
+            tokens.push_back(it->second);
+        } else {
+            throw std::runtime_error("Character not in vocabulary: " + chr_str);
+        }
+    }
+    return tokens;
+}
+
+std::string CharTokenizer::decode(const std::vector<uint32_t>& tokens) const {
+    std::ostringstream oss;
+    for (uint32_t token : tokens) {
+        auto it = m_id_to_char.find(token);
+        if (it != m_id_to_char.end()) {
+            oss << it->second;
+        } else {
+            throw std::runtime_error("Token ID not in reverse vocabulary: " + std::to_string(token));
+        }
+    }
+    return oss.str();
+}
+const CharTokenizer::Vocabulary& CharTokenizer::get_vocabulary() const {
+    return m_vocabulary;
+}
+
+void CharTokenizer::build_reverse_mapping() {
+    for (const auto& [token, id] : m_vocabulary) {
+        m_id_to_char[id] = token;
+    }
+}
+
+uint32_t CharTokenizer::get_vocab_size() const {
+    return static_cast<uint32_t>(m_vocabulary.size());
+}
+
+}  // namespace ttml::tokenizers
diff --git a/tt-train/sources/ttml/tokenizers/char_tokenizer.hpp b/tt-train/sources/ttml/tokenizers/char_tokenizer.hpp
new file mode 100644
index 00000000000..f5f84ca45c1
--- /dev/null
+++ b/tt-train/sources/ttml/tokenizers/char_tokenizer.hpp
@@ -0,0 +1,47 @@
+// SPDX-FileCopyrightText: (c) 2024 Tenstorrent AI ULC
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#pragma once
+
+#include <unordered_map>
+
+#include "tokenizer_base.hpp"
+
+namespace ttml::tokenizers {
+
+constexpr auto PAD_TOKEN = "<PAD>";
+constexpr auto END_TOKEN = "<END>";
+constexpr auto BEGIN_TOKEN = "<BEG>";
+
+class CharTokenizer : public TokenizerBase {
+public:
+    using Vocabulary = std::unordered_map<std::string, uint32_t>;
+    using IdtoChars = std::unordered_map<uint32_t, std::string>;
+    // Constructor that initializes the tokenizer with a vocabulary
+    explicit CharTokenizer(Vocabulary vocabulary);
+
+    CharTokenizer(const CharTokenizer&) = default;
+    CharTokenizer& operator=(const CharTokenizer&) = default;
+
+    CharTokenizer(CharTokenizer&&) = default;
+    CharTokenizer& operator=(CharTokenizer&&) = default;
+
+    [[nodiscard]] std::vector<uint32_t> encode(const std::string& text) const override;
+
+    [[nodiscard]] std::string decode(const std::vector<uint32_t>& tokens) const override;
+
+    [[nodiscard]] const CharTokenizer::Vocabulary& get_vocabulary() const;
+
+    [[nodiscard]] uint32_t get_vocab_size() const;
+
+    ~CharTokenizer() override = default;
+
+private:
+    Vocabulary m_vocabulary;
+    IdtoChars m_id_to_char;
+
+    void build_reverse_mapping();
+};
+
+}  // namespace ttml::tokenizers
diff --git a/tt-train/sources/ttml/tokenizers/char_tokenizer_trainer.cpp b/tt-train/sources/ttml/tokenizers/char_tokenizer_trainer.cpp
new file mode 100644
index 00000000000..6fec9cbbe51
--- /dev/null
+++ b/tt-train/sources/ttml/tokenizers/char_tokenizer_trainer.cpp
@@ -0,0 +1,30 @@
+// SPDX-FileCopyrightText: (c) 2024 Tenstorrent AI ULC
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#include "char_tokenizer_trainer.hpp"
+
+#include <algorithm>
+#include <set>
+#include <string>
+
+namespace ttml::tokenizers {
+
+CharTokenizer CharTokenizerTrainer::train(const std::string& text, bool add_padding_token) {
+    CharTokenizer::Vocabulary vocabulary;
+
+    // using set instead of unordered_set to stabilize order
+    std::set<char> unique_chars(text.begin(), text.end());
+
+    if (add_padding_token) {
+        vocabulary[PAD_TOKEN] = 0U;
+    }
+
+    for (char chr : unique_chars) {
+        vocabulary[std::string(1, chr)] = static_cast<uint32_t>(vocabulary.size());
+    }
+
+    return CharTokenizer(vocabulary);
+}
+
+}  // namespace ttml::tokenizers
diff --git a/tt-train/sources/ttml/tokenizers/char_tokenizer_trainer.hpp b/tt-train/sources/ttml/tokenizers/char_tokenizer_trainer.hpp
new file mode 100644
index 00000000000..b0b5f782156
--- /dev/null
+++ b/tt-train/sources/ttml/tokenizers/char_tokenizer_trainer.hpp
@@ -0,0 +1,15 @@
+// SPDX-FileCopyrightText: (c) 2024 Tenstorrent AI ULC
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#pragma once
+#include "char_tokenizer.hpp"
+
+namespace ttml::tokenizers {
+
+// right now it is very simple
+class CharTokenizerTrainer {
+public:
+    [[nodiscard]] static CharTokenizer train(const std::string& text, bool add_padding_token = true);
+};
+}  // namespace ttml::tokenizers
diff --git a/tt-train/sources/ttml/tokenizers/tokenizer_base.hpp b/tt-train/sources/ttml/tokenizers/tokenizer_base.hpp
new file mode 100644
index 00000000000..f62c77294a6
--- /dev/null
+++ b/tt-train/sources/ttml/tokenizers/tokenizer_base.hpp
@@ -0,0 +1,30 @@
+// SPDX-FileCopyrightText: (c) 2024 Tenstorrent AI ULC
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#pragma once
+
+#include <string>
+#include <vector>
+
+namespace ttml::tokenizers {
+
+class TokenizerBase {
+public:
+    TokenizerBase() = default;
+    TokenizerBase(const TokenizerBase&) = default;
+    TokenizerBase& operator=(const TokenizerBase&) = default;
+    TokenizerBase(TokenizerBase&&) = default;
+    TokenizerBase& operator=(TokenizerBase&&) = default;
+
+    // Virtual destructor for proper cleanup in derived classes
+    virtual ~TokenizerBase() = default;
+
+    // Pure virtual function to encode a string into a vector of token IDs
+    [[nodiscard]] virtual std::vector<uint32_t> encode(const std::string& text) const = 0;
+
+    // Pure virtual function to decode a vector of token IDs back into a string
+    [[nodiscard]] virtual std::string decode(const std::vector<uint32_t>& tokens) const = 0;
+};
+
+}  // namespace ttml::tokenizers
diff --git a/tt-train/sources/ttml/ttml.cpp b/tt-train/sources/ttml/ttml.cpp
new file mode 100644
index 00000000000..fde54fe95ff
--- /dev/null
+++ b/tt-train/sources/ttml/ttml.cpp
@@ -0,0 +1,10 @@
+// SPDX-FileCopyrightText: (c) 2024 Tenstorrent AI ULC
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#include "ttml.hpp"
+namespace ttml {
+int sum(int a, int b) {
+    return a + b;
+}
+}  // namespace ttml
diff --git a/tt-train/sources/ttml/ttml.hpp b/tt-train/sources/ttml/ttml.hpp
new file mode 100644
index 00000000000..7563694e92b
--- /dev/null
+++ b/tt-train/sources/ttml/ttml.hpp
@@ -0,0 +1,9 @@
+// SPDX-FileCopyrightText: (c) 2024 Tenstorrent AI ULC
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#pragma once
+
+namespace ttml {
+int sum(int a, int b);
+}  // namespace ttml
diff --git a/tt-train/sources/ttml/ttnn_fixed/trivial_ttnn_ops.cpp b/tt-train/sources/ttml/ttnn_fixed/trivial_ttnn_ops.cpp
new file mode 100644
index 00000000000..652073c583d
--- /dev/null
+++ b/tt-train/sources/ttml/ttnn_fixed/trivial_ttnn_ops.cpp
@@ -0,0 +1,57 @@
+// SPDX-FileCopyrightText: (c) 2024 Tenstorrent AI ULC
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#include "trivial_ttnn_ops.hpp"
+
+#include <core/ttnn_all_includes.hpp>
+#include <ttnn/operations/moreh/moreh_sum/moreh_sum.hpp>
+
+#include "core/compute_kernel_config.hpp"
+#include "core/tt_tensor_utils.hpp"
+
+namespace ttml::ttnn_fixed {
+
+tt::tt_metal::Tensor sum_over_dim(const tt::tt_metal::Tensor& t, uint32_t dim) {
+    return ttnn::moreh_sum(
+        t,
+        /* dim */ dim,
+        /* keep_dim */ true,
+        /* output */ std::nullopt,
+        /* output_mem_config */ std::nullopt,
+        /*compute_kernel_config */ core::ComputeKernelConfig::precise());
+}
+
+tt::tt_metal::Tensor sum_over_batch(const tt::tt_metal::Tensor& t) {
+    return sum_over_dim(t, /* dim */ 0);
+}
+
+// Stable log-softmax implementation
+tt::tt_metal::Tensor log_softmax(const tt::tt_metal::Tensor& t, int dim) {
+    auto t_max = ttnn::max(t, dim, /* keepdim */ true);
+    auto t_sub_max = ttnn::subtract(t, t_max);
+
+    auto t_sub_max_exp = ttnn::exp(t_sub_max);
+    auto t_sum_over_dim = sum_over_dim(t_sub_max_exp, dim);
+
+    auto log_t_sum_over_dim = ttnn::log(t_sum_over_dim);
+    return ttnn::subtract(t_sub_max, log_t_sum_over_dim);
+}
+
+// Stable softmax implementation
+// ttnn::softmax also exists, but it is not stable (even after max subtraction optimization)
+tt::tt_metal::Tensor softmax(const tt::tt_metal::Tensor& t, int dim) {
+    return ttnn::softmax(
+        t,
+        /* dim */ dim,
+        /*memory_config */ std::nullopt,
+        ttml::core::ComputeKernelConfig::softmax(),
+        /*stable*/ true);
+}
+
+tt::tt_metal::Tensor divide(const tt::tt_metal::Tensor& a, const tt::tt_metal::Tensor& b) {
+    auto inv_b = ttnn::reciprocal(/* queue_id */ 0, b);
+    return ttnn::multiply(a, inv_b);
+}
+
+}  // namespace ttml::ttnn_fixed
diff --git a/tt-train/sources/ttml/ttnn_fixed/trivial_ttnn_ops.hpp b/tt-train/sources/ttml/ttnn_fixed/trivial_ttnn_ops.hpp
new file mode 100644
index 00000000000..dee98552ef6
--- /dev/null
+++ b/tt-train/sources/ttml/ttnn_fixed/trivial_ttnn_ops.hpp
@@ -0,0 +1,17 @@
+// SPDX-FileCopyrightText: (c) 2024 Tenstorrent AI ULC
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#pragma once
+#include <core/ttnn_all_includes.hpp>
+#include <ttnn/tensor/tensor.hpp>
+
+namespace ttml::ttnn_fixed {
+
+tt::tt_metal::Tensor sum_over_dim(const tt::tt_metal::Tensor& t, uint32_t dim);
+tt::tt_metal::Tensor sum_over_batch(const tt::tt_metal::Tensor& t);
+tt::tt_metal::Tensor log_softmax(const tt::tt_metal::Tensor& t, int dim);
+tt::tt_metal::Tensor softmax(const tt::tt_metal::Tensor& t, int dim);
+tt::tt_metal::Tensor divide(const tt::tt_metal::Tensor& a, const tt::tt_metal::Tensor& b);
+
+}  // namespace ttml::ttnn_fixed
diff --git a/tt-train/tests/3rd_party/tokenizers_test.cpp b/tt-train/tests/3rd_party/tokenizers_test.cpp
new file mode 100644
index 00000000000..cd4b146fb60
--- /dev/null
+++ b/tt-train/tests/3rd_party/tokenizers_test.cpp
@@ -0,0 +1,63 @@
+// SPDX-FileCopyrightText: (c) 2024 Tenstorrent AI ULC
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#include <gtest/gtest.h>
+#include <tokenizers_cpp.h>
+
+#include <chrono>
+#include <fstream>
+#include <iostream>
+#include <string>
+
+using tokenizers::Tokenizer;
+
+namespace {
+
+std::string getTestDataDir() {
+    const char* envVar = std::getenv("TEST_DATA_DIR");
+    return (envVar) ? std::string(envVar) : std::string(TEST_DATA_DIR);
+}
+
+std::string load_bytes_from_file(const std::string& path) {
+    std::ifstream file_stream(path, std::ios::in | std::ios::binary);
+    EXPECT_TRUE(file_stream.is_open());
+    std::string data;
+    file_stream.seekg(0, std::ios::end);
+    auto size = file_stream.tellg();
+    file_stream.seekg(0, std::ios::beg);
+    data.resize(size);
+    file_stream.read(data.data(), size);
+    return data;
+}
+
+void test_tokenizer(std::unique_ptr<Tokenizer> tok, bool check_id_back = true) {
+    // Check #1. Encode and Decode
+    std::string prompt = "What is the  capital of Canada?";
+    std::vector<int> ids = tok->Encode(prompt);
+    std::string decoded_prompt = tok->Decode(ids);
+    EXPECT_EQ(decoded_prompt, prompt);
+
+    // Check #2. IdToToken and TokenToId
+    std::vector<int32_t> ids_to_test = {0, 1, 2, 3, 32, 33, 34, 130, 131, 1000};
+    for (auto id : ids_to_test) {
+        auto token = tok->IdToToken(id);
+        auto id_new = tok->TokenToId(token);
+        if (check_id_back) {
+            EXPECT_EQ(id, id_new);
+        }
+    }
+
+    // Check #3. GetVocabSize
+    auto vocab_size = tok->GetVocabSize();
+
+    EXPECT_EQ(vocab_size, 50277);
+}
+
+}  // namespace
+
+TEST(HuggingFaceTokenizer, ExampleUsage) {
+    auto blob = load_bytes_from_file(getTestDataDir() + "/tokenizer.json");
+    auto tok = Tokenizer::FromBlobJSON(blob);
+    test_tokenizer(std::move(tok), true);
+}
diff --git a/tt-train/tests/3rd_party/xtensor_test.cpp b/tt-train/tests/3rd_party/xtensor_test.cpp
new file mode 100644
index 00000000000..ddd5c3b63fd
--- /dev/null
+++ b/tt-train/tests/3rd_party/xtensor_test.cpp
@@ -0,0 +1,29 @@
+// SPDX-FileCopyrightText: (c) 2024 Tenstorrent AI ULC
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#include <gtest/gtest.h>
+
+#include <xtensor/xarray.hpp>
+#include <xtensor/xio.hpp>
+#include <xtensor/xmath.hpp>
+
+TEST(XTensorTest, BasicOperations) {
+    // Create an xtensor array
+    xt::xarray<double> arr = {1.0, 2.0, 3.0, 4.0};
+
+    // Compute the sum
+    double sum = xt::sum(arr)();
+
+    // Check if the sum is correct
+    EXPECT_DOUBLE_EQ(sum, 10.0);
+
+    // Perform element-wise addition
+    xt::xarray<double> arr2 = arr + 2.0;
+
+    // Expected result
+    xt::xarray<double> expected = {3.0, 4.0, 5.0, 6.0};
+
+    // Verify the result
+    EXPECT_TRUE(xt::allclose(arr2, expected));
+}
diff --git a/tt-train/tests/CMakeLists.txt b/tt-train/tests/CMakeLists.txt
new file mode 100644
index 00000000000..20fccfc0764
--- /dev/null
+++ b/tt-train/tests/CMakeLists.txt
@@ -0,0 +1,22 @@
+include(CTest)
+enable_testing()
+
+file(
+    GLOB_RECURSE SOURCES
+    LIST_DIRECTORIES true
+    *.hpp
+    *.cpp
+) # probably should not do that...
+
+#set(SOURCES)
+
+add_executable(ttml_tests ${SOURCES})
+target_link_libraries(
+    ttml_tests
+    GTest::gtest_main
+    ttml
+)
+add_definitions(-DTEST_DATA_DIR="${CMAKE_CURRENT_SOURCE_DIR}/test_data")
+
+include(GoogleTest)
+gtest_discover_tests(ttml_tests)
diff --git a/tt-train/tests/autograd/autograd_tensor.cpp b/tt-train/tests/autograd/autograd_tensor.cpp
new file mode 100644
index 00000000000..19777251505
--- /dev/null
+++ b/tt-train/tests/autograd/autograd_tensor.cpp
@@ -0,0 +1,44 @@
+// SPDX-FileCopyrightText: (c) 2024 Tenstorrent AI ULC
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#include <gtest/gtest.h>
+
+#include <core/ttnn_all_includes.hpp>
+
+#include "autograd/auto_context.hpp"
+#include "autograd/autocast_tensor.hpp"
+#include "autograd/tensor.hpp"
+#include "core/tt_tensor_utils.hpp"
+
+using namespace ttml;
+
+TEST(AutogradTensorTest, AutogradTensorFLOAT32) {
+    auto tensor = autograd::create_tensor(
+        core::ones(core::create_shape({1, 1, 1, 32}), &autograd::ctx().get_device(), DataType::FLOAT32));
+    const auto& half_precision_tensor = tensor->get_value();
+    const auto& full_precision_tensor = tensor->get_value(autograd::PreferredPrecision::FULL);
+
+    EXPECT_EQ(half_precision_tensor.dtype(), DataType::BFLOAT16);
+    EXPECT_EQ(full_precision_tensor.dtype(), DataType::FLOAT32);
+}
+
+TEST(AutogradTensorTest, AutogradTensorBFLOAT16) {
+    auto tensor = autograd::create_tensor(
+        core::ones(core::create_shape({1, 1, 1, 32}), &autograd::ctx().get_device(), DataType::BFLOAT16));
+    const auto& half_precision_tensor = tensor->get_value();
+    const auto& full_precision_tensor = tensor->get_value(autograd::PreferredPrecision::FULL);
+
+    EXPECT_EQ(half_precision_tensor.dtype(), DataType::BFLOAT16);
+    EXPECT_EQ(full_precision_tensor.dtype(), DataType::BFLOAT16);
+}
+
+TEST(AutogradTensorTest, AutocastTensor) {
+    auto tt_tensor = core::ones(core::create_shape({1, 1, 1, 32}), &autograd::ctx().get_device(), DataType::FLOAT32);
+    auto autocast_tensor = autograd::AutocastTensor(tt_tensor);
+    const auto& half_precision_tensor = autocast_tensor.get_tensor();
+    const auto& full_precision_tensor = autocast_tensor.get_tensor(autograd::PreferredPrecision::FULL);
+
+    EXPECT_EQ(half_precision_tensor.dtype(), DataType::BFLOAT16);
+    EXPECT_EQ(full_precision_tensor.dtype(), DataType::FLOAT32);
+}
diff --git a/tt-train/tests/autograd/autograd_test.cpp b/tt-train/tests/autograd/autograd_test.cpp
new file mode 100644
index 00000000000..70e980e72aa
--- /dev/null
+++ b/tt-train/tests/autograd/autograd_test.cpp
@@ -0,0 +1,105 @@
+// SPDX-FileCopyrightText: (c) 2024 Tenstorrent AI ULC
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#include <gtest/gtest.h>
+
+#include <array>
+#include <core/ttnn_all_includes.hpp>
+#include <cstdint>
+#include <memory>
+#include <vector>
+
+#include "autograd/auto_context.hpp"
+#include "autograd/tensor.hpp"
+#include "core/device.hpp"
+#include "core/tt_tensor_utils.hpp"
+#include "ops/binary_ops.hpp"
+#include "ops/unary_ops.hpp"
+
+class AutogradTest : public ::testing::Test {
+protected:
+    void TearDown() override {
+        ttml::autograd::ctx().reset_graph();
+    }
+};
+
+TEST_F(AutogradTest, TestSum) {
+    using namespace ttml::ops;
+    auto* device = &ttml::autograd::ctx().get_device();
+    std::vector<float> test_data1 = {1.F, 2.F, 3.F, 4.F};
+    std::vector<float> test_data2 = {4.F, 3.F, 2.F, 1.F};
+    auto shape = ttml::core::create_shape({1, 1, 1, 4});
+    auto tensor1 = ttml::core::from_vector(test_data1, shape, device);
+    auto tensor2 = ttml::core::from_vector(test_data2, shape, device);
+
+    auto t1 = ttml::autograd::create_tensor(tensor1);
+    auto t2 = ttml::autograd::create_tensor(tensor2);
+
+    auto res = t1 + t2;
+    res->backward();
+    auto res_back = ttml::core::to_vector(res->get_grad());
+    auto t1_back = ttml::core::to_vector(t1->get_grad());
+    auto t2_back = ttml::core::to_vector(t2->get_grad());
+
+    for (float it : res_back) {
+        EXPECT_EQ(it, 1.0F);
+    }
+    for (float it : t1_back) {
+        EXPECT_EQ(it, 1.0F);
+    }
+    for (float it : t2_back) {
+        EXPECT_EQ(it, 1.0F);
+    }
+}
+
+TEST_F(AutogradTest, TestMul) {
+    using namespace ttml::ops;
+    auto* device = &ttml::autograd::ctx().get_device();
+    std::vector<float> test_data1 = {1.F, 2.F, 3.F, 4.F};
+    std::vector<float> test_data2 = {4.F, 3.F, 2.F, 1.F};
+    auto shape = ttml::core::create_shape({1, 1, 1, 4});
+    auto tensor1 = ttml::core::from_vector(test_data1, shape, device);
+    auto tensor2 = ttml::core::from_vector(test_data2, shape, device);
+
+    auto t1 = ttml::autograd::create_tensor(tensor1);
+    auto t2 = ttml::autograd::create_tensor(tensor2);
+
+    auto res = t1 * t2;
+    res->backward();
+    auto res_back = ttml::core::to_vector(res->get_grad());
+    auto t1_back = ttml::core::to_vector(t1->get_grad());
+    auto t2_back = ttml::core::to_vector(t2->get_grad());
+
+    for (float it : res_back) {
+        EXPECT_EQ(it, 1.0F);
+    }
+    EXPECT_EQ(t2_back, test_data1);
+    EXPECT_EQ(t1_back, test_data2);
+}
+
+TEST_F(AutogradTest, BroadCastBatchTest) {
+    using namespace ttml::ops;
+    auto* device = &ttml::autograd::ctx().get_device();
+    std::vector<float> test_data1 = {1.F, 2.F, 3.F, 4.F};
+    auto shape = ttml::core::create_shape({1, 1, 1, 4});
+    auto tensor1 = ttml::core::from_vector(test_data1, shape, device);
+    auto t1 = ttml::autograd::create_tensor(tensor1);
+    uint32_t new_batch = 4;
+    auto res = ttml::ops::broadcast_batch(t1, new_batch);
+    res->backward();
+    auto t1_back = ttml::core::to_vector(t1->get_grad());
+    auto batch_shape = ttml::core::create_shape({4, 1, 1, 4});
+    auto new_shape = res->get_value().get_shape();
+    auto back_shape = t1->get_grad().get_shape();
+
+    for (size_t i = 0; i < 4; i++) {
+        EXPECT_EQ(new_shape[i], batch_shape[i]);
+    }
+    for (size_t i = 0; i < 4; i++) {
+        EXPECT_EQ(back_shape[i], shape[i]);
+    }
+    for (size_t i = 0; i < 4; i++) {
+        EXPECT_EQ(t1_back[i], new_batch);
+    }
+}
diff --git a/tt-train/tests/autograd/clip_gradient_norm_test.cpp b/tt-train/tests/autograd/clip_gradient_norm_test.cpp
new file mode 100644
index 00000000000..5d4cccf5656
--- /dev/null
+++ b/tt-train/tests/autograd/clip_gradient_norm_test.cpp
@@ -0,0 +1,73 @@
+// SPDX-FileCopyrightText: (c) 2024 Tenstorrent AI ULC
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#include "autograd/clip_gradient_norm.hpp"
+
+#include <gtest/gtest.h>
+
+#include "autograd/auto_context.hpp"
+#include "core/tt_tensor_utils.hpp"
+
+TEST(ClipGradientNormTest, GradNormTensor_0) {
+    auto* device = &ttml::autograd::ctx().get_device();
+
+    std::vector<float> data(81, -1.F);
+    auto shape = ttml::core::create_shape({1, 1, 9, 9});
+    auto tensor = ttml::core::from_vector(data, shape, device);
+
+    ttml::autograd::clip_tensor_norm_(tensor, 3.F);
+
+    auto clipped_vec = ttml::core::to_vector(tensor);
+    auto norm = 0.F;
+    for (auto& value : clipped_vec) {
+        norm += value * value;
+    }
+    norm = std::sqrt(norm);
+    EXPECT_NEAR(norm, 3.F, 1e-2);
+    for (const auto& value : clipped_vec) {
+        EXPECT_NEAR(value, -1.F / 3.F, 1e-2);
+    }
+}
+
+TEST(ClipGradientNormTest, GradNormTensor_1) {
+    auto* device = &ttml::autograd::ctx().get_device();
+
+    std::vector<float> data(81, -1.F);
+    auto shape = ttml::core::create_shape({1, 1, 9, 9});
+    auto tensor = ttml::core::from_vector(data, shape, device);
+
+    ttml::autograd::clip_tensor_norm_(tensor, 10.F);
+
+    auto clipped_vec = ttml::core::to_vector(tensor);
+    auto norm = 0.F;
+    for (auto& value : clipped_vec) {
+        norm += value * value;
+    }
+    norm = std::sqrt(norm);
+    EXPECT_NEAR(norm, 9.F, 1e-2);
+    for (const auto& value : clipped_vec) {
+        EXPECT_NEAR(value, -1.F, 1e-2);
+    }
+}
+
+TEST(ClipGradientNormTest, GradNormTensor_2) {
+    auto* device = &ttml::autograd::ctx().get_device();
+
+    std::vector<float> data(81, -1.F);
+    auto shape = ttml::core::create_shape({1, 1, 9, 9});
+    auto tensor = ttml::core::from_vector(data, shape, device);
+
+    ttml::autograd::clip_tensor_norm_(tensor, 1.F);
+
+    auto clipped_vec = ttml::core::to_vector(tensor);
+    auto norm = 0.F;
+    for (auto& value : clipped_vec) {
+        norm += value * value;
+    }
+    norm = std::sqrt(norm);
+    EXPECT_NEAR(norm, 1.F, 1e-2);
+    for (const auto& value : clipped_vec) {
+        EXPECT_NEAR(value, -1.F / 9.F, 1e-2);
+    }
+}
diff --git a/tt-train/tests/autograd/module_base_parameters_test.cpp b/tt-train/tests/autograd/module_base_parameters_test.cpp
new file mode 100644
index 00000000000..1edbf7d212e
--- /dev/null
+++ b/tt-train/tests/autograd/module_base_parameters_test.cpp
@@ -0,0 +1,111 @@
+// SPDX-FileCopyrightText: (c) 2024 Tenstorrent AI ULC
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#include <gtest/gtest.h>
+
+#include <autograd/auto_context.hpp>
+#include <memory>
+
+#include "autograd/module_base.hpp"
+#include "modules/dropout_module.hpp"
+#include "modules/layer_norm_module.hpp"
+#include "modules/linear_module.hpp"
+#include "ops/unary_ops.hpp"
+#include "optimizers/adamw.hpp"
+#include "optimizers/sgd.hpp"
+
+class Model : public ttml::autograd::ModuleBase {
+    std::shared_ptr<ttml::modules::LinearLayer> m_fc1;
+    std::shared_ptr<ttml::modules::LinearLayer> m_fc2;
+
+public:
+    Model() {
+        m_fc1 = std::make_shared<ttml::modules::LinearLayer>(784, 128);
+        m_fc2 = std::make_shared<ttml::modules::LinearLayer>(128, 64);
+
+        create_name("Model");
+
+        register_module(m_fc1, "fc1");
+        register_module(m_fc2, "fc2");
+    }
+
+    ttml::autograd::TensorPtr operator()(ttml::autograd::TensorPtr x) {
+        x = (*m_fc1)(x);
+        x = ttml::ops::relu(x);
+        x = (*m_fc2)(x);
+        return x;
+    }
+};
+
+class ModelUnusedLayer : public ttml::autograd::ModuleBase {
+    std::shared_ptr<ttml::modules::LinearLayer> m_fc1;
+    std::shared_ptr<ttml::modules::LinearLayer> m_fc2;
+    std::shared_ptr<ttml::modules::LinearLayer> m_fc3;
+
+public:
+    ModelUnusedLayer() {
+        m_fc1 = std::make_shared<ttml::modules::LinearLayer>(784, 128);
+        m_fc2 = std::make_shared<ttml::modules::LinearLayer>(128, 64);
+        m_fc3 = std::make_shared<ttml::modules::LinearLayer>(64, 32);
+
+        create_name("ModelUnusedLayer");
+
+        register_module(m_fc1, "fc1");
+        register_module(m_fc2, "fc2");
+        register_module(m_fc3, "fc3");
+    }
+
+    ttml::autograd::TensorPtr operator()(ttml::autograd::TensorPtr x) {
+        x = (*m_fc1)(x);
+        x = ttml::ops::relu(x);
+        x = (*m_fc2)(x);
+        return x;
+    }
+};
+
+class ModuleBaseParametersTest : public ::testing::Test {
+protected:
+    void TearDown() override {
+        ttml::autograd::ctx().reset_graph();
+    }
+};
+
+TEST_F(ModuleBaseParametersTest, AllParametersIncluded) {
+    Model model;
+    auto model_params = model.parameters();
+    // 2 LinearLayer modules: 2 weight tensors and 2 bias tensors
+    EXPECT_EQ(model_params.size(), 4);
+};
+
+TEST_F(ModuleBaseParametersTest, UnusedParametersInModuleSGD) {
+    auto* device = &ttml::autograd::ctx().get_device();
+
+    ModelUnusedLayer model;
+    auto model_params = model.parameters();
+    // 3 LinearLayer modules: 3 weight tensors and 3 bias tensors
+    EXPECT_EQ(model_params.size(), 6);
+    auto optimizer = ttml::optimizers::SGD(model_params, ttml::optimizers::SGDConfig{});
+
+    auto input_tensor =
+        ttml::autograd::create_tensor(ttml::core::zeros(ttml::core::create_shape({1, 1, 1, 784}), device));
+    auto output = model(input_tensor);
+    output->backward();
+    optimizer.step();
+}
+
+TEST_F(ModuleBaseParametersTest, UnusedParametersInModuleAdamW) {
+    auto* device = &ttml::autograd::ctx().get_device();
+
+    ModelUnusedLayer model;
+    auto model_params = model.parameters();
+    // 3 LinearLayer modules: 3 weight tensors and 3 bias tensors
+    EXPECT_EQ(model_params.size(), 6);
+    auto optimizer = ttml::optimizers::AdamW(model_params, ttml::optimizers::AdamWConfig{});
+
+    auto input_tensor =
+        ttml::autograd::create_tensor(ttml::core::zeros(ttml::core::create_shape({1, 1, 1, 784}), device));
+    auto output = model(input_tensor);
+    output->backward();
+    optimizer.step();
+}
diff --git a/tt-train/tests/core/tensor_utils_test.cpp b/tt-train/tests/core/tensor_utils_test.cpp
new file mode 100644
index 00000000000..196cfb8fff2
--- /dev/null
+++ b/tt-train/tests/core/tensor_utils_test.cpp
@@ -0,0 +1,214 @@
+// SPDX-FileCopyrightText: (c) 2024 Tenstorrent AI ULC
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#include <gtest/gtest.h>
+
+#include <core/ttnn_all_includes.hpp>
+#include <cstdint>
+#include <memory>
+#include <vector>
+
+#include "autograd/auto_context.hpp"
+#include "core/device.hpp"
+#include "core/tt_tensor_utils.hpp"
+
+TEST(TensorUtilsTest, TestFloatToFromTensorEven) {
+    auto* device = &ttml::autograd::ctx().get_device();
+    std::vector<float> test_data = {1.F, 5.F, 10.F, 15.F};
+
+    auto shape = ttml::core::create_shape({1, 1, 1, 4});
+    auto tensor = ttml::core::from_vector(test_data, shape, device);
+
+    auto vec_back = ttml::core::to_vector(tensor);
+
+    ASSERT_EQ(vec_back.size(), test_data.size());
+    for (size_t i = 0; i < test_data.size(); i++) {
+        EXPECT_EQ(vec_back[i], test_data[i]);
+    }
+}
+
+TEST(TensorUtilsTest, TestFloatToFromTensorOdd) {
+    auto* device = &ttml::autograd::ctx().get_device();
+    std::vector<float> test_data = {30.F, 20.F, 2.F};
+
+    auto shape = ttml::core::create_shape({1, 1, 1, 3});
+    auto tensor = ttml::core::from_vector(test_data, shape, device);
+
+    auto vec_back = ttml::core::to_vector(tensor);
+
+    ASSERT_EQ(vec_back.size(), test_data.size());
+    for (size_t i = 0; i < test_data.size(); i++) {
+        EXPECT_EQ(vec_back[i], test_data[i]);
+    }
+}
+
+TEST(TensorUtilsTest, TestUint32ToFromTensorEven) {
+    auto* device = &ttml::autograd::ctx().get_device();
+    std::vector<uint32_t> test_data = {1, 5, 10, 15};
+
+    auto shape = ttml::core::create_shape({1, 1, 1, 4});
+    auto tensor = ttml::core::from_vector<uint32_t, DataType::UINT32>(test_data, shape, device);
+
+    auto vec_back = ttml::core::to_vector<uint32_t>(tensor);
+
+    ASSERT_EQ(vec_back.size(), test_data.size());
+    for (size_t i = 0; i < test_data.size(); i++) {
+        EXPECT_EQ(vec_back[i], test_data[i]);
+    }
+}
+
+TEST(TensorUtilsTest, TestUint32ToFromTensorOdd) {
+    auto* device = &ttml::autograd::ctx().get_device();
+    std::vector<uint32_t> test_data = {30, 20, 2};
+
+    auto shape = ttml::core::create_shape({1, 1, 1, 3});
+    auto tensor = ttml::core::from_vector<uint32_t, DataType::UINT32>(test_data, shape, device);
+
+    auto vec_back = ttml::core::to_vector<uint32_t>(tensor);
+
+    ASSERT_EQ(vec_back.size(), test_data.size());
+    for (size_t i = 0; i < test_data.size(); i++) {
+        EXPECT_EQ(vec_back[i], test_data[i]);
+    }
+}
+
+TEST(TensorUtilsTest, TestUint32ToFromTensorLargeWithBatch) {
+    auto* device = &ttml::autograd::ctx().get_device();
+    std::vector<uint32_t> test_data;
+    uint32_t batch_size = 16;
+    uint32_t vec_size = 256 * batch_size;
+    for (size_t i = 0; i < vec_size; i++) {
+        test_data.push_back(i);
+    }
+
+    auto shape = ttml::core::create_shape({batch_size, 1, 1, vec_size / batch_size});
+    auto tensor = ttml::core::from_vector<uint32_t, DataType::UINT32>(test_data, shape, device);
+    auto vec_back = ttml::core::to_vector<uint32_t>(tensor);
+    ASSERT_EQ(vec_back.size(), test_data.size());
+    for (size_t i = 0; i < test_data.size(); i++) {
+        EXPECT_EQ(vec_back[i], test_data[i]);
+    }
+}
+
+TEST(TensorUtilsTest, TestFloatToFromTensorLargeWithBatch) {
+    auto* device = &ttml::autograd::ctx().get_device();
+    std::vector<float> test_data;
+    uint32_t batch_size = 16;
+    uint32_t vec_size = 256 * batch_size;
+    for (size_t i = 0; i < vec_size; i++) {
+        test_data.push_back((float)i / 100.0F);
+    }
+
+    auto shape = ttml::core::create_shape({batch_size, 1, 1, vec_size / batch_size});
+    auto tensor = ttml::core::from_vector(test_data, shape, device);
+    auto vec_back = ttml::core::to_vector(tensor);
+    ASSERT_EQ(vec_back.size(), test_data.size());
+    for (size_t i = 0; i < test_data.size(); i++) {
+        EXPECT_NEAR(vec_back[i], test_data[i], 0.5F);
+    }
+}
+
+TEST(TensorUtilsTest, TestToFromTensorLarge) {
+    auto* device = &ttml::autograd::ctx().get_device();
+    std::vector<float> test_data;
+    uint32_t vec_size = 1337;
+    for (size_t i = 0; i < vec_size; i++) {
+        test_data.push_back((float)i / 100.0F);
+    }
+
+    auto shape = ttml::core::create_shape({1, 1, 1, vec_size});
+    auto tensor = ttml::core::from_vector(test_data, shape, device);
+    auto vec_back = ttml::core::to_vector(tensor);
+    ASSERT_EQ(vec_back.size(), test_data.size());
+    for (size_t i = 0; i < test_data.size(); i++) {
+        EXPECT_NEAR(vec_back[i], test_data[i], 0.1F);
+    }
+}
+
+TEST(TensorUtilsTest, TestToFromTensorBatch) {
+    auto* device = &ttml::autograd::ctx().get_device();
+    std::vector<float> test_data = {1.F, 5.F, 10.F, 15.F};
+
+    auto shape = ttml::core::create_shape({2, 1, 1, 2});
+    auto tensor = ttml::core::from_vector(test_data, shape, device);
+
+    auto vec_back = ttml::core::to_vector(tensor);
+
+    ASSERT_EQ(vec_back.size(), test_data.size());
+    for (size_t i = 0; i < test_data.size(); i++) {
+        EXPECT_EQ(vec_back[i], test_data[i]);
+    }
+}
+
+TEST(TensorUtilsTest, TestOnes_0) {
+    auto* device = &ttml::autograd::ctx().get_device();
+    auto shape = ttml::core::create_shape({1, 2, 3, 4});
+    auto tensor = ttml::core::ones(shape, device);
+    auto tensor_vec = ttml::core::to_vector(tensor);
+    for (auto& val : tensor_vec) {
+        EXPECT_EQ(val, 1.F);
+    }
+
+    auto tensor1 = ttml::core::ones(shape, device);
+    auto tensor_vec1 = ttml::core::to_vector(tensor1);
+    for (auto& val : tensor_vec1) {
+        EXPECT_EQ(val, 1.F);
+    }
+}
+
+TEST(TensorUtilsTest, TestOnes_1) {
+    auto* device = &ttml::autograd::ctx().get_device();
+    auto shape = ttml::core::create_shape({1, 2, 3, 4});
+    auto tensor_zeros = ttml::core::zeros(shape, device);
+    auto tensor_ones = ttml::core::ones(tensor_zeros.get_shape(), device);
+    auto tensor_vec = ttml::core::to_vector(tensor_ones);
+    for (auto& val : tensor_vec) {
+        EXPECT_EQ(val, 1.F);
+    }
+}
+
+TEST(TensorUtilsTest, TestZeros) {
+    auto* device = &ttml::autograd::ctx().get_device();
+    auto shape = ttml::core::create_shape({1, 2, 3, 4});
+    auto tensor = ttml::core::ones(shape, device);
+
+    auto zeros_like_tensor = ttml::core::zeros_like(tensor);
+    auto zeros_like_tensor_vec = ttml::core::to_vector(zeros_like_tensor);
+    for (auto& val : zeros_like_tensor_vec) {
+        EXPECT_EQ(val, 0.F);
+    }
+}
+
+TEST(TensorUtilsTest, TestIsInitialized) {
+    auto* device = &ttml::autograd::ctx().get_device();
+
+    tt::tt_metal::Tensor tensor;
+    EXPECT_FALSE(ttml::core::is_tensor_initialized(tensor));
+
+    auto shape = ttml::core::create_shape({1, 2, 3, 4});
+    tensor = ttml::core::zeros(shape, device);
+    EXPECT_TRUE(ttml::core::is_tensor_initialized(tensor));
+}
+
+TEST(TensorUtilsTest, TestOnesLike) {
+    auto* device = &ttml::autograd::ctx().get_device();
+    auto shape = ttml::core::create_shape({1, 2, 32, 321});
+    auto tensor_zeros = ttml::core::zeros(shape, device);
+    auto tensor_ones = ttml::core::ones_like(tensor_zeros);
+    auto tensor_vec = ttml::core::to_vector(tensor_ones);
+    for (auto& val : tensor_vec) {
+        EXPECT_EQ(val, 1.F);
+    }
+}
+
+TEST(TensorUtilsTest, TestZerosLike) {
+    auto* device = &ttml::autograd::ctx().get_device();
+    auto shape = ttml::core::create_shape({1, 2, 31, 322});
+    auto tensor_ones = ttml::core::ones(shape, device);
+    auto tensor_zeros = ttml::core::zeros_like(tensor_ones);
+    auto tensor_vec = ttml::core::to_vector(tensor_zeros);
+    for (auto& val : tensor_vec) {
+        EXPECT_EQ(val, 0.F);
+    }
+}
diff --git a/tt-train/tests/datasets/dataloader_test.cpp b/tt-train/tests/datasets/dataloader_test.cpp
new file mode 100644
index 00000000000..2ed7f3a2fff
--- /dev/null
+++ b/tt-train/tests/datasets/dataloader_test.cpp
@@ -0,0 +1,140 @@
+// SPDX-FileCopyrightText: (c) 2024 Tenstorrent AI ULC
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#include "datasets/dataloader.hpp"
+
+#include <gtest/gtest.h>
+
+#include <memory>
+#include <vector>
+
+#include "datasets/in_memory_dataset.hpp"
+
+using InMemoryDatasetFloatVecInt = ttml::datasets::InMemoryDataset<std::vector<float>, int>;
+class DataLoaderTest : public ::testing::Test {
+protected:
+    void SetUp() override {
+        data = {{1.0, 2.0, 3.0}, {4.0, 5.0, 6.0}, {7.0, 8.0, 9.0}, {10.0, 11.0, 12.0}};
+
+        targets = {1, 2, 3, 4};
+
+        dataset = std::make_unique<InMemoryDatasetFloatVecInt>(data, targets);
+    }
+
+    void TearDown() override {
+        dataset = nullptr;
+    }
+
+    std::vector<std::vector<float>> data;
+    std::vector<int> targets;
+    std::unique_ptr<InMemoryDatasetFloatVecInt> dataset;
+};
+
+// Test that the DataLoader correctly loads batches of data
+TEST_F(DataLoaderTest, TestBatchLoading) {
+    ttml::datasets::DataLoader<InMemoryDatasetFloatVecInt> dataloader(*dataset, 2, false);
+
+    auto it = dataloader.begin();
+    auto batch = *it;
+
+    EXPECT_EQ(batch.size(), 2);
+    EXPECT_EQ(batch[0].first, data[0]);
+    EXPECT_EQ(batch[1].first, data[1]);
+    EXPECT_EQ(batch[0].second, targets[0]);
+    EXPECT_EQ(batch[1].second, targets[1]);
+
+    ++it;
+    batch = *it;
+
+    EXPECT_EQ(batch.size(), 2);
+    EXPECT_EQ(batch[0].first, data[2]);
+    EXPECT_EQ(batch[1].first, data[3]);
+    EXPECT_EQ(batch[0].second, targets[2]);
+    EXPECT_EQ(batch[1].second, targets[3]);
+}
+
+// Test that the DataLoader correctly handles dataset sizes not divisible by batch size
+TEST_F(DataLoaderTest, TestLastBatchHandling) {
+    ttml::datasets::DataLoader<InMemoryDatasetFloatVecInt> dataloader(*dataset, 3, false);
+
+    auto it = dataloader.begin();
+    ++it;  // Move to the last batch
+
+    auto batch = *it;
+
+    EXPECT_EQ(batch.size(), 1);
+    EXPECT_EQ(batch[0].first, data[3]);
+    EXPECT_EQ(batch[0].second, targets[3]);
+}
+
+// Test that shuffling works correctly
+TEST_F(DataLoaderTest, TestShuffling) {
+    ttml::datasets::DataLoader<InMemoryDatasetFloatVecInt> dataloader(*dataset, 2, true);
+
+    auto first_batch_before_shuffle = *dataloader.begin();
+    auto it = dataloader.begin();
+    auto batch_after_shuffle = *it;
+
+    // Since shuffling is random, there's no guarantee that the batches will be different
+    // so we can't do a direct comparison here. However, you can check if they differ:
+    bool different = !(first_batch_before_shuffle == batch_after_shuffle);
+    EXPECT_TRUE(different);  // This might not always hold, depending on the shuffle results
+}
+
+// Test that the DataLoader correctly iterates over the entire dataset
+TEST_F(DataLoaderTest, TestIterationOverDataset) {
+    ttml::datasets::DataLoader<InMemoryDatasetFloatVecInt> dataloader(*dataset, 2);
+
+    size_t count = 0;
+    for (const auto& batch : dataloader) {
+        count += batch.size();
+    }
+
+    EXPECT_EQ(count, data.size());
+}
+
+// Test that the DataLoader works with a single-element batch
+TEST_F(DataLoaderTest, TestSingleElementBatch) {
+    ttml::datasets::DataLoader<InMemoryDatasetFloatVecInt> dataloader(*dataset, 1);
+
+    auto it = dataloader.begin();
+    auto batch = *it;
+
+    EXPECT_EQ(batch.size(), 1);
+    EXPECT_EQ(batch[0].first, data[0]);
+    EXPECT_EQ(batch[0].second, targets[0]);
+}
+
+// Test that the DataLoader correctly applies the collate_fn
+TEST_F(DataLoaderTest, TestCollateFn) {
+    // Custom collate function that sums all elements in the vectors and returns the sum as a new batch
+    auto custom_collate_fn = [](const std::vector<std::pair<std::vector<float>, int>>& batch) {
+        std::vector<std::pair<std::vector<float>, int>> collated_batch;
+        for (const auto& sample : batch) {
+            std::vector<float> summed_data(sample.first.size(), 0.0F);
+            for (size_t i = 0; i < sample.first.size(); ++i) {
+                summed_data[i] += sample.first[i];
+            }
+            collated_batch.emplace_back(summed_data, sample.second);
+        }
+        return collated_batch;
+    };
+
+    ttml::datasets::DataLoader<InMemoryDatasetFloatVecInt, decltype(custom_collate_fn)> dataloader(
+        *dataset, 2, false, custom_collate_fn);
+
+    auto it = dataloader.begin();
+    auto batch = *it;
+
+    EXPECT_EQ(batch.size(), 2);
+    EXPECT_EQ(batch[0].first[0], data[0][0]);  // Ensure the collate function was applied
+    EXPECT_EQ(batch[0].first[1], data[0][1]);
+    EXPECT_EQ(batch[0].first[2], data[0][2]);
+    EXPECT_EQ(batch[0].second, targets[0]);
+
+    EXPECT_EQ(batch[1].first[0], data[1][0]);
+    EXPECT_EQ(batch[1].first[1], data[1][1]);
+    EXPECT_EQ(batch[1].first[2], data[1][2]);
+    EXPECT_EQ(batch[1].second, targets[1]);
+}
diff --git a/tt-train/tests/datasets/generators_test.cpp b/tt-train/tests/datasets/generators_test.cpp
new file mode 100644
index 00000000000..d9c2a5333f5
--- /dev/null
+++ b/tt-train/tests/datasets/generators_test.cpp
@@ -0,0 +1,79 @@
+// SPDX-FileCopyrightText: (c) 2024 Tenstorrent AI ULC
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#include "datasets/generators.hpp"
+
+#include <gtest/gtest.h>
+
+#include "autograd/auto_context.hpp"
+
+using namespace ttml::datasets;
+
+// Test case to check the dataset size
+TEST(MakeRegressionTest, DatasetSize) {
+    MakeRegressionParams params = {100, 10, 3};
+    auto dataset = make_regression(params);
+
+    EXPECT_EQ(dataset.get_size(), params.n_samples);
+}
+
+// Test case to check the feature and target vector sizes
+TEST(MakeRegressionTest, FeatureAndTargetVectorSizes) {
+    MakeRegressionParams params = {100, 10, 3};  // 3 targets per sample
+    auto dataset = make_regression(params);
+
+    auto sample = dataset.get_item(0);
+    EXPECT_EQ(sample.first.size(), params.n_features);
+    EXPECT_EQ(sample.second.size(), params.n_targets);  // Target vector should be of size n_targets
+}
+
+// Test case to check reproducibility with a seed
+TEST(MakeRegressionTest, ReproducibilityWithSeed) {
+    MakeRegressionParams params = {100, 10, 3, 0.1F, true};  // 3 targets per sample
+    ttml::autograd::AutoContext::get_instance().set_seed(322);
+    auto dataset1 = make_regression(params);
+    ttml::autograd::AutoContext::get_instance().set_seed(322);
+    auto dataset2 = make_regression(params);
+
+    for (size_t i = 0; i < params.n_samples; ++i) {
+        auto sample1 = dataset1.get_item(i);
+        auto sample2 = dataset2.get_item(i);
+        EXPECT_EQ(sample1.first, sample2.first);
+        EXPECT_EQ(sample1.second, sample2.second);
+    }
+}
+
+// Test case to check if noise affects the targets
+TEST(MakeRegressionTest, NoiseEffectOnTargets) {
+    MakeRegressionParams params = {100, 10, 3, 0.5F, true};  // 3 targets per sample
+    auto dataset = make_regression(params);
+
+    auto sample = dataset.get_item(0);
+
+    // Generate a dataset with no noise for comparison
+    params.noise = 0.0F;
+    auto dataset_no_noise = make_regression(params);
+    auto sample_no_noise = dataset_no_noise.get_item(0);
+
+    for (size_t t = 0; t < params.n_targets; ++t) {
+        EXPECT_NE(sample.second[t], sample_no_noise.second[t]);
+    }
+}
+
+// Test case to check if bias term affects the targets
+TEST(MakeRegressionTest, BiasEffectOnTargets) {
+    MakeRegressionParams params = {100, 10, 3, 0.0F, true};  // 3 targets per sample
+    // Generate a dataset with bias
+    auto dataset_with_bias = make_regression(params);
+    auto sample_with_bias = dataset_with_bias.get_item(0);
+
+    // Generate a dataset without bias
+    params.bias = false;
+    auto dataset_without_bias = make_regression(params);
+    auto sample_without_bias = dataset_without_bias.get_item(0);
+
+    for (size_t t = 0; t < params.n_targets; ++t) {
+        EXPECT_NE(sample_with_bias.second[t], sample_without_bias.second[t]);
+    }
+}
diff --git a/tt-train/tests/datasets/in_memory_token_dataset_test.cpp b/tt-train/tests/datasets/in_memory_token_dataset_test.cpp
new file mode 100644
index 00000000000..f343d2b4958
--- /dev/null
+++ b/tt-train/tests/datasets/in_memory_token_dataset_test.cpp
@@ -0,0 +1,79 @@
+// SPDX-FileCopyrightText: (c) 2024 Tenstorrent AI ULC
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#include "datasets/in_memory_token_dataset.hpp"
+
+#include <gtest/gtest.h>
+
+using namespace ttml::datasets;
+
+// Test fixture for InMemoryTokenDataset
+class InMemoryTokenDatasetTest : public ::testing::Test {
+protected:
+    // Example tokens for testing
+    std::vector<uint32_t> tokens = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10};
+
+    // Sequence length
+    uint32_t seq_length = 3;
+
+    // Create an instance of InMemoryTokenDataset
+    InMemoryTokenDataset dataset = InMemoryTokenDataset(tokens, seq_length);
+};
+
+// Test get_size_impl function
+TEST_F(InMemoryTokenDatasetTest, GetSize) {
+    // Expected number of samples
+    size_t expected_size = tokens.size() - seq_length;
+
+    ASSERT_EQ(dataset.get_size(), expected_size);
+}
+
+// Test get_item_impl function for the first sample
+TEST_F(InMemoryTokenDatasetTest, GetItemFirstSample) {
+    size_t index = 0;
+
+    auto sample = dataset.get_item(index);
+
+    // Expected input and target spans
+    std::vector<uint32_t> expected_input = {1, 2, 3};
+    std::vector<uint32_t> expected_target = {2, 3, 4};
+
+    ASSERT_EQ(std::vector<uint32_t>(sample.first.begin(), sample.first.end()), expected_input);
+    ASSERT_EQ(std::vector<uint32_t>(sample.second.begin(), sample.second.end()), expected_target);
+}
+
+// Test get_item_impl function for the second sample
+TEST_F(InMemoryTokenDatasetTest, GetItemSecondSample) {
+    size_t index = 1;
+
+    auto sample = dataset.get_item(index);
+
+    // Expected input and target spans
+    std::vector<uint32_t> expected_input = {2, 3, 4};
+    std::vector<uint32_t> expected_target = {3, 4, 5};
+
+    ASSERT_EQ(std::vector<uint32_t>(sample.first.begin(), sample.first.end()), expected_input);
+    ASSERT_EQ(std::vector<uint32_t>(sample.second.begin(), sample.second.end()), expected_target);
+}
+
+// Test get_item_impl function for the last sample
+TEST_F(InMemoryTokenDatasetTest, GetItemLastSample) {
+    size_t index = dataset.get_size() - 1;
+
+    auto sample = dataset.get_item(index);
+
+    // Expected input and target spans
+    std::vector<uint32_t> expected_input = {7, 8, 9};
+    std::vector<uint32_t> expected_target = {8, 9, 10};
+
+    ASSERT_EQ(std::vector<uint32_t>(sample.first.begin(), sample.first.end()), expected_input);
+    ASSERT_EQ(std::vector<uint32_t>(sample.second.begin(), sample.second.end()), expected_target);
+}
+
+// Test out of range error for get_item_impl function
+TEST_F(InMemoryTokenDatasetTest, GetItemOutOfRange) {
+    size_t index = dataset.get_size();  // Index out of range
+    auto test_throw_lambda = [&]() { auto _ = dataset.get_item(index); };
+    EXPECT_THROW(test_throw_lambda(), std::out_of_range);
+}
diff --git a/tt-train/tests/datasets/random_split_test.cpp b/tt-train/tests/datasets/random_split_test.cpp
new file mode 100644
index 00000000000..c202700c986
--- /dev/null
+++ b/tt-train/tests/datasets/random_split_test.cpp
@@ -0,0 +1,84 @@
+// SPDX-FileCopyrightText: (c) 2024 Tenstorrent AI ULC
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#include <gtest/gtest.h>
+
+#include <algorithm>
+#include <vector>
+
+#include "datasets/dataset_subset.hpp"
+#include "datasets/in_memory_dataset.hpp"
+#include "datasets/utils.hpp"
+
+using namespace ttml::datasets;
+
+class RandomSplitTest : public ::testing::Test {
+protected:
+    void SetUp() override {
+        data = {{1.0, 2.0}, {3.0, 4.0}, {5.0, 6.0}, {7.0, 8.0}};
+        targets = {0, 1, 0, 1};
+        dataset = std::make_unique<InMemoryDataset<std::vector<float>, int>>(data, targets);
+    }
+
+    void TearDown() override {
+        dataset = nullptr;
+    }
+
+    std::vector<std::vector<float>> data;
+    std::vector<int> targets;
+    std::unique_ptr<InMemoryDataset<std::vector<float>, int>> dataset;
+};
+
+TEST_F(RandomSplitTest, TestCorrectSplitting) {
+    std::array<size_t, 2> split_indices = {2, 2};
+    auto subsets = random_split(*dataset, split_indices);
+
+    ASSERT_EQ(subsets.size(), 2);
+    EXPECT_EQ(subsets[0].get_size(), 2);
+    EXPECT_EQ(subsets[1].get_size(), 2);
+
+    // Check that the subsets contain correct number of samples
+    for (const auto& subset : subsets) {
+        for (size_t i = 0; i < subset.get_size(); ++i) {
+            auto sample = subset.get_item(i);
+            ASSERT_TRUE(std::find(data.begin(), data.end(), sample.first) != data.end());
+            ASSERT_TRUE(std::find(targets.begin(), targets.end(), sample.second) != targets.end());
+        }
+    }
+}
+
+TEST_F(RandomSplitTest, TestShuffling) {
+    ttml::autograd::AutoContext::get_instance().set_seed(322);
+    std::array<size_t, 4> batch_indices = {0, 1, 2, 3};
+    auto original_data = dataset->get_batch(batch_indices);
+    std::array<size_t, 2> split_indices = {2, 2};
+    auto subsets = random_split(*dataset, split_indices, true);
+
+    // We expect that at least one of the first elements in the subsets is different from the original order
+    bool shuffled =
+        (subsets[0].get_item(0).first != original_data[0].first ||
+         subsets[1].get_item(0).first != original_data[2].first);
+    EXPECT_TRUE(shuffled);
+}
+
+TEST_F(RandomSplitTest, TestSingleSubset) {
+    std::array<size_t, 1> split_indices = {4};
+    auto subsets = random_split(*dataset, split_indices, false);
+
+    ASSERT_EQ(subsets.size(), 1);
+    EXPECT_EQ(subsets[0].get_size(), 4);
+
+    for (size_t i = 0; i < subsets[0].get_size(); ++i) {
+        auto sample = subsets[0].get_item(i);
+        EXPECT_EQ(sample.first, data[i]);
+        EXPECT_EQ(sample.second, targets[i]);
+    }
+}
+
+TEST_F(RandomSplitTest, TestInvalidSplitting) {
+    std::array<size_t, 2> invalid_split0 = {3, 2};
+    std::array<size_t, 2> invalid_split1 = {1, 2};
+    EXPECT_THROW(random_split(*dataset, invalid_split0), std::invalid_argument);
+    EXPECT_THROW(random_split(*dataset, invalid_split1), std::invalid_argument);
+}
diff --git a/tt-train/tests/datasets/utils_test.cpp b/tt-train/tests/datasets/utils_test.cpp
new file mode 100644
index 00000000000..4144901f8d0
--- /dev/null
+++ b/tt-train/tests/datasets/utils_test.cpp
@@ -0,0 +1,9 @@
+// SPDX-FileCopyrightText: (c) 2024 Tenstorrent AI ULC
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#include "datasets/utils.hpp"
+
+#include <gtest/gtest.h>
+
+namespace ttml::datasets {}
diff --git a/tt-train/tests/model/linear_regression_full_test.cpp b/tt-train/tests/model/linear_regression_full_test.cpp
new file mode 100644
index 00000000000..1af4f315405
--- /dev/null
+++ b/tt-train/tests/model/linear_regression_full_test.cpp
@@ -0,0 +1,58 @@
+// SPDX-FileCopyrightText: (c) 2024 Tenstorrent AI ULC
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#include <fmt/format.h>
+#include <gtest/gtest.h>
+
+#include <core/ttnn_all_includes.hpp>
+
+#include "autograd/auto_context.hpp"
+#include "modules/linear_module.hpp"
+#include "ops/losses.hpp"
+#include "optimizers/sgd.hpp"
+
+class LinearRegressionFullTest : public ::testing::Test {
+protected:
+    void TearDown() override {
+        ttml::autograd::ctx().reset_graph();
+    }
+};
+
+TEST_F(LinearRegressionFullTest, TestLinearRegressionFull) {
+    using namespace ttml::ops;
+    auto* device = &ttml::autograd::ctx().get_device();
+    const size_t batch_size = 128;
+    const size_t num_features = 64;
+    std::vector<float> features;
+    features.reserve(batch_size * num_features);
+    for (size_t i = 0; i < batch_size; ++i) {
+        for (size_t j = 0; j < num_features; ++j) {
+            features.push_back(static_cast<float>(i) * 0.1F);
+        }
+    }
+
+    std::vector<float> targets;
+    for (size_t i = 0; i < batch_size; ++i) {
+        targets.push_back(static_cast<float>(i) * 0.1F);
+    }
+
+    auto data_tensor = ttml::autograd::create_tensor(
+        ttml::core::from_vector(features, ttml::core::create_shape({batch_size, 1, 1, num_features}), device));
+
+    auto targets_tensor = ttml::autograd::create_tensor(
+        ttml::core::from_vector(targets, ttml::core::create_shape({batch_size, 1, 1, 1}), device));
+
+    auto model = ttml::modules::LinearLayer(num_features, 1);
+    auto optimizer = ttml::optimizers::SGD(model.parameters(), {0.01F, 0.0F});
+
+    const size_t steps = 10;
+    for (size_t step = 0; step < steps; ++step) {
+        optimizer.zero_grad();
+        auto prediction = model(data_tensor);
+        auto loss = ttml::ops::mse_loss(prediction, targets_tensor);
+        loss->backward();
+        optimizer.step();
+        ttml::autograd::ctx().reset_graph();
+    }
+}
diff --git a/tt-train/tests/model/model_names_test.cpp b/tt-train/tests/model/model_names_test.cpp
new file mode 100644
index 00000000000..307b063df6d
--- /dev/null
+++ b/tt-train/tests/model/model_names_test.cpp
@@ -0,0 +1,75 @@
+// SPDX-FileCopyrightText: (c) 2024 Tenstorrent AI ULC
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#include <gtest/gtest.h>
+
+#include <autograd/auto_context.hpp>
+#include <memory>
+
+#include "autograd/module_base.hpp"
+#include "modules/dropout_module.hpp"
+#include "modules/layer_norm_module.hpp"
+#include "modules/linear_module.hpp"
+#include "ops/unary_ops.hpp"
+
+class MNISTModel : public ttml::autograd::ModuleBase {
+    std::shared_ptr<ttml::modules::LinearLayer> m_fc1;
+    std::shared_ptr<ttml::modules::LinearLayer> m_fc2;
+    std::shared_ptr<ttml::modules::LinearLayer> m_fc3;
+    std::shared_ptr<ttml::modules::DropoutLayer> m_dropout;
+    std::shared_ptr<ttml::modules::LayerNormLayer> m_layernorm1;
+    std::shared_ptr<ttml::modules::LayerNormLayer> m_layernorm2;
+
+public:
+    MNISTModel() {
+        m_fc1 = std::make_shared<ttml::modules::LinearLayer>(784, 128);
+        m_fc2 = std::make_shared<ttml::modules::LinearLayer>(128, 64);
+        m_fc3 = std::make_shared<ttml::modules::LinearLayer>(64, 10);
+        m_dropout = std::make_shared<ttml::modules::DropoutLayer>(0.2F);
+
+        m_layernorm1 = std::make_shared<ttml::modules::LayerNormLayer>(128);
+        m_layernorm2 = std::make_shared<ttml::modules::LayerNormLayer>(10);
+
+        create_name("MNISTModel");
+
+        register_module(m_fc1, "fc1");
+        register_module(m_fc2, "fc2");
+        register_module(m_fc3, "fc3");
+        register_module(m_dropout, "dropout");
+        register_module(m_layernorm1, "layernorm1");
+        register_module(m_layernorm2, "layernorm2");
+    }
+
+    ttml::autograd::TensorPtr operator()(ttml::autograd::TensorPtr x) {
+        x = (*m_dropout)(x);
+        x = (*m_fc1)(x);
+        x = (*m_layernorm1)(x);
+        x = ttml::ops::relu(x);
+        x = (*m_fc2)(x);
+        x = (*m_layernorm2)(x);
+        x = ttml::ops::relu(x);
+        x = (*m_fc3)(x);
+        return x;
+    }
+};
+
+class ModelNamesFullTest : public ::testing::Test {
+protected:
+    void TearDown() override {
+        ttml::autograd::ctx().reset_graph();
+    }
+};
+
+TEST_F(ModelNamesFullTest, SameModel) {
+    MNISTModel model1;
+    MNISTModel model2;
+
+    auto model1_params = model1.parameters();
+    auto model2_params = model2.parameters();
+
+    EXPECT_EQ(model1_params.size(), model2_params.size());
+    for (const auto& [name, tensor] : model1_params) {
+        EXPECT_TRUE(model2_params.find(name) != model2_params.end());
+    }
+};
diff --git a/tt-train/tests/ops/embedding_op_test.cpp b/tt-train/tests/ops/embedding_op_test.cpp
new file mode 100644
index 00000000000..d3cf0a4bb78
--- /dev/null
+++ b/tt-train/tests/ops/embedding_op_test.cpp
@@ -0,0 +1,119 @@
+// SPDX-FileCopyrightText: (c) 2024 Tenstorrent AI ULC
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#include "ops/embedding_op.hpp"
+
+#include <gtest/gtest.h>
+
+#include <core/ttnn_all_includes.hpp>
+
+#include "autograd/auto_context.hpp"
+#include "autograd/tensor.hpp"
+#include "core/tt_tensor_utils.hpp"
+#include "ops/losses.hpp"
+
+TEST(EmbeddingOpTest, EmbeddingForwardBackward) {
+    using namespace ttml;
+
+    auto* device = &autograd::ctx().get_device();
+    uint32_t num_embeddings = 32;
+    uint32_t embedding_dim = 32;
+    auto weight_tensor = core::zeros(core::create_shape({1, 1, num_embeddings, embedding_dim}), device);
+    autograd::TensorPtr weight = autograd::create_tensor(weight_tensor);
+
+    uint32_t batch_size = 1;
+    uint32_t sentence_size = 32;
+    std::vector<uint32_t> input_data((size_t)batch_size * sentence_size);
+    std::iota(input_data.begin(), input_data.end(), 0U);
+    auto input_tensor = core::from_vector<uint32_t, DataType::UINT32>(
+        input_data, core::create_shape({batch_size, 1, 1, sentence_size}), device, Layout::ROW_MAJOR);
+    autograd::TensorPtr input = autograd::create_tensor(input_tensor);
+
+    autograd::TensorPtr embeddings = ops::embedding_op(input, weight);
+
+    std::vector<float> target_vector((size_t)batch_size * sentence_size * embedding_dim);
+    for (uint32_t i = 0; i < batch_size * sentence_size; i++) {
+        for (uint32_t j = 0; j < embedding_dim; j++) {
+            target_vector[embedding_dim * i + j] = static_cast<float>(i);
+        }
+    }
+    auto target_tensor = autograd::create_tensor(
+        core::from_vector(target_vector, core::create_shape({batch_size, 1, sentence_size, embedding_dim}), device));
+    auto result = ttml::ops::mse_loss(embeddings, target_tensor);
+    result->backward();
+
+    auto weight_grad_tensor = weight->get_grad();
+    auto weight_grad_data = core::to_vector(weight_grad_tensor);
+    for (uint32_t i = 0; i < num_embeddings; i++) {
+        for (uint32_t j = 0; j < embedding_dim; j++) {
+            EXPECT_NEAR(
+                weight_grad_data[embedding_dim * i + j],
+                -static_cast<float>(i) / sentence_size / embedding_dim / batch_size * 2.F,
+                1e-2);
+        }
+    }
+}
+
+TEST(EmbeddingOpTest, EmbeddingNumEmbeddingsEmbeddingDimNotDivisibleBy32) {
+    using namespace ttml;
+
+    auto* device = &autograd::ctx().get_device();
+    uint32_t num_embeddings = 13;
+    uint32_t embedding_dim = 26;
+    auto weight_tensor = core::zeros(core::create_shape({1, 1, num_embeddings, embedding_dim}), device);
+    autograd::TensorPtr weight = autograd::create_tensor(weight_tensor);
+
+    uint32_t batch_size = 1;
+    uint32_t sentence_size = 32;
+    std::vector<uint32_t> input_data((size_t)batch_size * sentence_size);
+    std::iota(input_data.begin(), input_data.end(), 0U);
+    auto input_tensor = core::from_vector<uint32_t, DataType::UINT32>(
+        input_data, core::create_shape({batch_size, 1, 1, sentence_size}), device, Layout::ROW_MAJOR);
+    autograd::TensorPtr input = autograd::create_tensor(input_tensor);
+
+    EXPECT_NO_THROW(ops::embedding_op(input, weight));
+}
+
+TEST(EmbeddingOpTest, EmbeddingSentenceDimNotDivisibleBy32) {
+    using namespace ttml;
+
+    auto* device = &autograd::ctx().get_device();
+    uint32_t num_embeddings = 32;
+    uint32_t embedding_dim = 32;
+    auto weight_tensor = core::zeros(core::create_shape({1, 1, num_embeddings, embedding_dim}), device);
+    autograd::TensorPtr weight = autograd::create_tensor(weight_tensor);
+
+    uint32_t batch_size = 1;
+    uint32_t sentence_size = 13;
+    std::vector<uint32_t> input_data((size_t)batch_size * sentence_size);
+    std::iota(input_data.begin(), input_data.end(), 0U);
+    auto input_tensor = core::from_vector<uint32_t, DataType::UINT32>(
+        input_data, core::create_shape({batch_size, 1, 1, sentence_size}), device, Layout::ROW_MAJOR);
+    autograd::TensorPtr input = autograd::create_tensor(input_tensor);
+
+    EXPECT_NO_THROW(ops::embedding_op(input, weight));
+}
+
+// This test was previously throwing an exception, but now it just freezes
+// The main reason that we are passing input_tensor as tiled, but it should be row major
+// We will uncomment it once the issue is fixed at ttnn side
+// TEST(EmbeddingOpTest, EmbeddingBadLayout_BROKEN) {
+//     using namespace ttml;
+
+//     auto* device = &autograd::ctx().get_device();
+//     uint32_t num_embeddings = 32;
+//     uint32_t embedding_dim = 32;
+//     auto weight_tensor = core::zeros(core::create_shape({1, 1, num_embeddings, embedding_dim}), device);
+//     autograd::TensorPtr weight = autograd::create_tensor(weight_tensor);
+
+//     uint32_t batch_size = 1;
+//     uint32_t sentence_size = 32;
+//     std::vector<uint32_t> input_data((size_t)batch_size * sentence_size);
+//     std::iota(input_data.begin(), input_data.end(), 0U);
+//     auto input_tensor =
+//         core::from_vector<uint32_t>(input_data, core::create_shape({batch_size, 1, 1, sentence_size}), device);
+//     autograd::TensorPtr input = autograd::create_tensor(input_tensor);
+
+//     EXPECT_ANY_THROW(ops::embedding_op(input, weight));
+// }
diff --git a/tt-train/tests/ops/layer_norm_op_test.cpp b/tt-train/tests/ops/layer_norm_op_test.cpp
new file mode 100644
index 00000000000..1926fbb5ba2
--- /dev/null
+++ b/tt-train/tests/ops/layer_norm_op_test.cpp
@@ -0,0 +1,102 @@
+// SPDX-FileCopyrightText: (c) 2024 Tenstorrent AI ULC
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#include <gtest/gtest.h>
+
+#include <core/ttnn_all_includes.hpp>
+#include <random>
+
+#include "autograd/auto_context.hpp"
+#include "autograd/tensor.hpp"
+#include "core/tt_tensor_utils.hpp"
+#include "ops/layernorm_op.hpp"
+#include "ops/losses.hpp"
+
+TEST(LayerNormOpTest, LayerNormOp_0) {
+    using namespace ttml;
+
+    uint32_t batch_size = 6;
+    uint32_t seq_len = 13;
+    uint32_t heads = 16;
+    uint32_t features = 333;
+
+    uint32_t size = batch_size * seq_len * heads;
+
+    std::vector<float> test_data;
+    test_data.reserve((size_t)batch_size * seq_len * heads * features);
+    for (uint32_t i = 0; i < batch_size * seq_len * heads; i++) {
+        float mean = (float)i / (float)size;
+        float stddev = 1.F + (float)i / (float)(size * 2);
+        std::mt19937 gen(i);
+        std::normal_distribution<float> dist(mean, stddev);
+        for (uint32_t j = 0; j < features; j++) {
+            test_data.push_back(dist(gen));
+        }
+    }
+
+    auto tensor = autograd::create_tensor(core::from_vector(
+        test_data, core::create_shape({batch_size, seq_len, heads, features}), &autograd::ctx().get_device()));
+
+    auto gamma =
+        autograd::create_tensor(core::ones(core::create_shape({1, 1, 1, features}), &autograd::ctx().get_device()));
+    auto beta =
+        autograd::create_tensor(core::zeros(core::create_shape({1, 1, 1, features}), &autograd::ctx().get_device()));
+
+    auto result = ops::layernorm(tensor, gamma, beta);
+
+    auto result_tensor = result->get_value();
+    auto result_data = core::to_vector(result_tensor);
+    for (uint32_t i = 0; i < batch_size * seq_len * heads; i++) {
+        uint32_t idx = i * features;
+
+        float exp_mean = 0.F;
+        float exp_var = 0.F;
+        for (uint32_t j = 0; j < features; ++j) {
+            exp_mean += result_data[idx + j];
+            exp_var += result_data[idx + j] * result_data[idx + j];
+        }
+
+        exp_mean /= (float)features;
+        exp_var /= (float)features;
+        exp_var = exp_var - exp_mean * exp_mean;
+
+        EXPECT_NEAR(exp_mean, 0.F, 5e-2);
+        EXPECT_NEAR(exp_var, 1.F, 5e-2);
+    }
+}
+
+TEST(LayerNormOpTest, LayerNormOp_backward) {
+    using namespace ttml;
+
+    uint32_t batch_size = 1;
+    uint32_t seq_len = 1;
+    uint32_t heads = 1;
+    uint32_t features = 3;
+
+    std::vector<float> test_data{0.0, 1.0, 2.0};
+    auto tensor = autograd::create_tensor(core::from_vector(
+        test_data, core::create_shape({batch_size, seq_len, heads, features}), &autograd::ctx().get_device()));
+
+    auto gamma = autograd::create_tensor(
+        core::from_vector({1, 2, 3}, core::create_shape({1, 1, 1, features}), &autograd::ctx().get_device()));
+    auto beta =
+        autograd::create_tensor(core::zeros(core::create_shape({1, 1, 1, features}), &autograd::ctx().get_device()));
+
+    auto result = ops::layernorm(tensor, gamma, beta);
+    auto target = autograd::create_tensor(core::zeros_like(tensor->get_value()));
+    result = ops::mse_loss(result, target);
+    result->backward();
+
+    auto tensor_grad = core::to_vector(tensor->get_grad());
+    auto gamma_grad = core::to_vector(gamma->get_grad());
+    auto beta_grad = core::to_vector(beta->get_grad());
+    std::vector<float> expected_tensor_grad{1.3333, -2.6667, 1.3333};
+    std::vector<float> expected_gamma_grad{1.0000, 0.0000, 3.0000};
+    std::vector<float> expected_beta_grad{-0.8165, 0.0000, 2.4495};
+    for (uint32_t i = 0; i < features; ++i) {
+        EXPECT_NEAR(beta_grad[i], expected_beta_grad[i], 5e-2);
+        EXPECT_NEAR(gamma_grad[i], expected_gamma_grad[i], 5e-2);
+        EXPECT_NEAR(tensor_grad[i], expected_tensor_grad[i], 6e-2);
+    }
+}
diff --git a/tt-train/tests/ops/linear_op_test.cpp b/tt-train/tests/ops/linear_op_test.cpp
new file mode 100644
index 00000000000..eb97fd7fad9
--- /dev/null
+++ b/tt-train/tests/ops/linear_op_test.cpp
@@ -0,0 +1,116 @@
+// SPDX-FileCopyrightText: (c) 2024 Tenstorrent AI ULC
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#include "ops/linear_op.hpp"
+
+#include <gtest/gtest.h>
+
+#include "autograd/auto_context.hpp"
+#include "core/compute_kernel_config.hpp"
+#include "core/tt_tensor_utils.hpp"
+#include "init/tensor_initializers.hpp"
+
+void compare_tensors(const ttnn::Tensor& t1, const ttnn::Tensor& t2, float eps) {
+    ASSERT_EQ(t1.get_shape(), t2.get_shape());
+    auto t1_vec = ttml::core::to_vector(t1);
+    auto t2_vec = ttml::core::to_vector(t2);
+    ASSERT_EQ(t1_vec.size(), t2_vec.size());
+    bool all_equals = true;
+    for (size_t i = 0; i < t1_vec.size() && all_equals; i++) {
+        if (std::abs(t1_vec[i] - t2_vec[i]) > eps) {
+            all_equals = false;
+            EXPECT_NEAR(t1_vec[i], t2_vec[i], eps);
+        }
+    }
+    EXPECT_TRUE(all_equals);
+}
+
+bool compare_tensors_for_broken(const ttnn::Tensor& t1, const ttnn::Tensor& t2, float eps) {
+    if (t1.get_shape() != t2.get_shape()) {
+        return false;
+    }
+
+    auto t1_vec = ttml::core::to_vector(t1);
+    auto t2_vec = ttml::core::to_vector(t2);
+    bool all_equals = true;
+    for (size_t i = 0; i < t1_vec.size() && all_equals; i++) {
+        if (std::abs(t1_vec[i] - t2_vec[i]) > eps) {
+            all_equals = false;
+        }
+    }
+    return all_equals;
+}
+
+TEST(LinearOpTest, TTNNBackwardGoodShape) {
+    auto* device = &ttml::autograd::ctx().get_device();
+    auto tensor = ttml::autograd::create_tensor();
+    ttml::init::uniform_init(tensor, ttml::core::create_shape({64, 1, 256, 64}), ttml::init::UniformRange{-0.1F, 0.1F});
+
+    auto weight = ttml::autograd::create_tensor();
+    ttml::init::uniform_init(weight, ttml::core::create_shape({1, 1, 64, 64}), ttml::init::UniformRange{-0.1F, 0.1F});
+
+    auto bias = ttml::autograd::create_tensor();
+    ttml::init::uniform_init(bias, ttml::core::create_shape({1, 1, 1, 64}), ttml::init::UniformRange{-0.1F, 0.1F});
+
+    auto out = ttml::autograd::create_tensor();
+    ttml::init::uniform_init(out, ttml::core::create_shape({64, 1, 256, 64}), ttml::init::UniformRange{-0.1F, 0.1F});
+    out->set_grad(out->get_value());
+
+    ttml::ops::ttnn_linear_backward(tensor, weight, bias, out, ttml::core::ComputeKernelConfig::precise());
+    auto ttnn_tensor_grad = tensor->get_grad();
+    auto ttnn_weight_grad = weight->get_grad();
+    auto ttnn_bias_grad = bias->get_grad();
+    tensor->set_grad(ttnn::Tensor());
+    weight->set_grad(ttnn::Tensor());
+    bias->set_grad(ttnn::Tensor());
+
+    ttml::ops::moreh_linear_backward(tensor, weight, bias, out, ttml::core::ComputeKernelConfig::precise());
+    auto moreh_tensor_grad = tensor->get_grad();
+    auto moreh_weight_grad = weight->get_grad();
+    auto moreh_bias_grad = bias->get_grad();
+
+    const float eps = 2e-2F;
+    compare_tensors(ttnn_tensor_grad, moreh_tensor_grad, eps);
+    compare_tensors(ttnn_weight_grad, moreh_weight_grad, eps);
+    compare_tensors(ttnn_bias_grad, moreh_bias_grad, eps);
+}
+
+// Currently raises SEGFAULT
+
+// TEST(LinearOpTest, TTNNBackwardBadShape_BROKEN) {
+//     auto* device = &ttml::autograd::ctx().get_device();
+//     auto tensor = ttml::autograd::create_tensor();
+//     ttml::init::uniform_init(tensor, ttml::core::create_shape({128, 1, 1, 128}), ttml::init::UniformRange{-0.1F,
+//     0.1F});
+
+//     auto weight = ttml::autograd::create_tensor();
+//     ttml::init::uniform_init(weight, ttml::core::create_shape({1, 1, 256, 128}), ttml::init::UniformRange{-0.1F,
+//     0.1F});
+
+//     auto bias = ttml::autograd::create_tensor();
+//     ttml::init::uniform_init(bias, ttml::core::create_shape({1, 1, 1, 256}), ttml::init::UniformRange{-0.1F, 0.1F});
+
+//     auto out = ttml::autograd::create_tensor();
+//     ttml::init::uniform_init(out, ttml::core::create_shape({128, 1, 1, 256}), ttml::init::UniformRange{-0.1F, 0.1F});
+//     out->set_grad(out->get_value());
+
+//     ttml::ops::ttnn_linear_backward(tensor, weight, bias, out);
+//     auto ttnn_tensor_grad = tensor->get_grad();
+//     auto ttnn_weight_grad = weight->get_grad();
+//     auto ttnn_bias_grad = bias->get_grad();
+//     tensor->set_grad(ttnn::Tensor());
+//     weight->set_grad(ttnn::Tensor());
+//     bias->set_grad(ttnn::Tensor());
+
+//     ttml::ops::moreh_linear_backward(tensor, weight, bias, out);
+//     auto moreh_tensor_grad = tensor->get_grad();
+//     auto moreh_weight_grad = weight->get_grad();
+//     auto moreh_bias_grad = bias->get_grad();
+
+//     const float eps = 2e-2F;
+//     bool success = compare_tensors_for_broken(ttnn_tensor_grad, moreh_tensor_grad, eps) &&
+//                    compare_tensors_for_broken(ttnn_weight_grad, moreh_weight_grad, eps) &&
+//                    compare_tensors_for_broken(ttnn_bias_grad, moreh_bias_grad, eps);
+//     EXPECT_FALSE(success);
+// }
diff --git a/tt-train/tests/ops/unary_ops_test.cpp b/tt-train/tests/ops/unary_ops_test.cpp
new file mode 100644
index 00000000000..504c0f01cc2
--- /dev/null
+++ b/tt-train/tests/ops/unary_ops_test.cpp
@@ -0,0 +1,58 @@
+// SPDX-FileCopyrightText: (c) 2024 Tenstorrent AI ULC
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#include "ops/unary_ops.hpp"
+
+#include <gtest/gtest.h>
+
+#include <core/ttnn_all_includes.hpp>
+
+#include "autograd/auto_context.hpp"
+#include "autograd/tensor.hpp"
+#include "core/tt_tensor_utils.hpp"
+
+TEST(UnaryOpsTest, GlobalMean) {
+    std::vector<float> test_data = {1.F, 2.F, 3.F, 4.F, 1.F, 2.F, 3.F, 4.F};
+
+    auto shape = ttml::core::create_shape({2, 1, 1, 4});
+    auto tensor = ttml::core::from_vector(test_data, shape, &ttml::autograd::ctx().get_device());
+
+    auto tensor_ptr = ttml::autograd::create_tensor(tensor);
+
+    auto result = ttml::ops::mean(tensor_ptr);
+    auto result_data = ttml::core::to_vector(result->get_value());
+
+    ASSERT_EQ(result_data.size(), 1);
+    EXPECT_FLOAT_EQ(result_data[0], 2.5F);
+
+    result->backward();
+    auto tensor_grad = ttml::core::to_vector(tensor_ptr->get_grad());
+    ASSERT_EQ(tensor_grad.size(), test_data.size());
+    for (float it : tensor_grad) {
+        EXPECT_FLOAT_EQ(it, 0.125F);
+    }
+}
+
+TEST(UnaryOpsTest, LogSoftmax) {
+    auto* device = &ttml::autograd::ctx().get_device();
+    std::vector<float> test_data = {-0.1F, -0.2F, -0.3F, -0.4F, 0.F, -0.2F, -0.3F, -0.4F};
+    auto tensor = ttml::core::from_vector(test_data, ttml::core::create_shape({2, 1, 1, 4}), device);
+    auto tensor_ptr = ttml::autograd::create_tensor(tensor);
+    auto result = ttml::ops::log_softmax(tensor_ptr, 3);
+    auto result_data = ttml::core::to_vector(result->get_value());
+    std::vector<float> expected_data = {
+        -1.24253553F, -1.34253553F, -1.44253553F, -1.54253553F, -1.17244159F, -1.37244159F, -1.47244159F, -1.57244159F};
+    EXPECT_EQ(result_data.size(), expected_data.size());
+    for (uint32_t idx = 0; idx < result_data.size(); ++idx) {
+        EXPECT_NEAR(result_data[idx], expected_data[idx], 2e-2F);
+    }
+
+    result->backward();
+    auto tensor_grad = ttml::core::to_vector(tensor_ptr->get_grad());
+    std::vector<float> expected_grad = {-0.156F, -0.03906F, 0.05078F, 0.1406F, -0.25F, -0.0156F, 0.07421F, 0.16406F};
+    EXPECT_EQ(tensor_grad.size(), expected_grad.size());
+    for (uint32_t idx = 0; idx < tensor_grad.size(); ++idx) {
+        EXPECT_NEAR(tensor_grad[idx], expected_grad[idx], 2e-2F);
+    }
+}
diff --git a/tt-train/tests/optimizers/adamw_test.cpp b/tt-train/tests/optimizers/adamw_test.cpp
new file mode 100644
index 00000000000..356b364e6b8
--- /dev/null
+++ b/tt-train/tests/optimizers/adamw_test.cpp
@@ -0,0 +1,70 @@
+// SPDX-FileCopyrightText: (c) 2024 Tenstorrent AI ULC
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#include "optimizers/adamw.hpp"
+
+#include <fmt/format.h>
+#include <gtest/gtest.h>
+
+#include <core/ttnn_all_includes.hpp>
+
+#include "autograd/auto_context.hpp"
+#include "core/tt_tensor_utils.hpp"
+#include "modules/linear_module.hpp"
+#include "ops/losses.hpp"
+
+class AdamWFullTest : public ::testing::Test {
+protected:
+    void TearDown() override {
+        ttml::autograd::ctx().reset_graph();
+    }
+};
+
+TEST_F(AdamWFullTest, AdamWTest) {
+    using namespace ttml::ops;
+    auto* device = &ttml::autograd::ctx().get_device();
+    const size_t batch_size = 32;
+    const size_t num_features = 64;
+    std::vector<float> features;
+    features.reserve(batch_size * num_features);
+    for (size_t i = 0; i < batch_size; ++i) {
+        for (size_t j = 0; j < num_features; ++j) {
+            features.push_back(static_cast<float>(i) * 0.1F);
+        }
+    }
+
+    std::vector<float> targets;
+    for (size_t i = 0; i < batch_size; ++i) {
+        targets.push_back(static_cast<float>(i) * 0.1F);
+    }
+
+    auto data_tensor = ttml::autograd::create_tensor(
+        ttml::core::from_vector(features, ttml::core::create_shape({batch_size, 1, 1, num_features}), device));
+
+    auto targets_tensor = ttml::autograd::create_tensor(
+        ttml::core::from_vector(targets, ttml::core::create_shape({batch_size, 1, 1, 1}), device));
+
+    auto model = ttml::modules::LinearLayer(num_features, 1);
+    auto adamw_config = ttml::optimizers::AdamWConfig();
+    adamw_config.lr = 1e-4F;
+    adamw_config.weight_decay = 0.F;
+    auto optimizer = ttml::optimizers::AdamW(model.parameters(), adamw_config);
+
+    const size_t steps = 100;
+    std::vector<float> losses;
+    losses.reserve(steps);
+    for (size_t step = 0; step < steps; ++step) {
+        optimizer.zero_grad();
+        auto prediction = model(data_tensor);
+        auto loss = ttml::ops::mse_loss(prediction, targets_tensor);
+        auto loss_value = ttml::core::to_vector(loss->get_value())[0];
+        losses.emplace_back(loss_value);
+        loss->backward();
+        optimizer.step();
+        ttml::autograd::ctx().reset_graph();
+    }
+
+    EXPECT_LT(losses.back(), losses.front());
+    EXPECT_LT(losses.back(), 1e-3F);
+}
diff --git a/tt-train/tests/serialization/msgpack_serializer_test.cpp b/tt-train/tests/serialization/msgpack_serializer_test.cpp
new file mode 100644
index 00000000000..b725f45174a
--- /dev/null
+++ b/tt-train/tests/serialization/msgpack_serializer_test.cpp
@@ -0,0 +1,241 @@
+// SPDX-FileCopyrightText: (c) 2024 Tenstorrent AI ULC
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#include <gtest/gtest.h>
+
+#include <filesystem>
+#include <fstream>
+#include <string>
+#include <vector>
+
+#include "serialization/msgpack_file.hpp"
+
+class MsgPackFileTest : public ::testing::Test {
+protected:
+    void SetUp() override {
+        // Remove test file if it exists
+        if (std::filesystem::exists(test_filename)) {
+            std::filesystem::remove(test_filename);
+        }
+    }
+
+    void TearDown() override {
+        // Clean up test file after each test
+        if (std::filesystem::exists(test_filename)) {
+            std::filesystem::remove(test_filename);
+        }
+    }
+
+    const std::string test_filename = "/tmp/test_data.msgpack";
+};
+
+TEST_F(MsgPackFileTest, SerializeDeserializePrimitives) {
+    ttml::serialization::MsgPackFile serializer;
+
+    // Put primitive data
+    serializer.put("int_key", 42);
+    serializer.put("float_key", 3.14F);
+    serializer.put("double_key", 2.71828);
+    serializer.put("uint_key", static_cast<uint32_t>(123456789));
+    serializer.put("string_key", "Hello, World!");
+
+    // Serialize to file
+    ASSERT_NO_THROW(serializer.serialize(test_filename));
+
+    // Deserialize from file
+    ttml::serialization::MsgPackFile deserializer;
+    ASSERT_NO_THROW(deserializer.deserialize(test_filename));
+
+    // Get and check data
+    int int_value = 0;
+    EXPECT_NO_THROW(deserializer.get("int_key", int_value));
+    EXPECT_EQ(int_value, 42);
+
+    float float_value = 0;
+    EXPECT_NO_THROW(deserializer.get("float_key", float_value));
+    EXPECT_FLOAT_EQ(float_value, 3.14f);
+
+    double double_value = 0;
+    EXPECT_NO_THROW(deserializer.get("double_key", double_value));
+    EXPECT_DOUBLE_EQ(double_value, 2.71828);
+
+    uint32_t uint_value = 0;
+    EXPECT_NO_THROW(deserializer.get("uint_key", uint_value));
+    EXPECT_EQ(uint_value, 123456789U);
+
+    std::string string_value;
+    EXPECT_NO_THROW(deserializer.get("string_key", string_value));
+    EXPECT_EQ(string_value, "Hello, World!");
+}
+
+TEST_F(MsgPackFileTest, SerializeDeserializeVectors) {
+    ttml::serialization::MsgPackFile serializer;
+
+    // Prepare data
+    std::vector<int> int_vec = {1, 2, 3, 4, 5};
+    std::vector<float> float_vec = {1.1F, 2.2F, 3.3F};
+    std::vector<double> double_vec = {0.1, 0.01, 0.001};
+    std::vector<uint32_t> uint_vec = {100, 200, 300};
+    std::vector<std::string> string_vec = {"apple", "banana", "cherry"};
+
+    // Put vector data
+    serializer.put("int_vector_key", std::span<const int>(int_vec));
+    serializer.put("float_vector_key", std::span<const float>(float_vec));
+    serializer.put("double_vector_key", std::span<const double>(double_vec));
+    serializer.put("uint_vector_key", std::span<const uint32_t>(uint_vec));
+    serializer.put("string_vector_key", std::span<const std::string>(string_vec));
+
+    // Serialize to file
+    ASSERT_NO_THROW(serializer.serialize(test_filename));
+
+    // Deserialize from file
+    ttml::serialization::MsgPackFile deserializer;
+    ASSERT_NO_THROW(deserializer.deserialize(test_filename));
+
+    // Get and check data
+    std::vector<int> int_vec_result;
+    EXPECT_NO_THROW(deserializer.get("int_vector_key", int_vec_result));
+    EXPECT_EQ(int_vec_result, int_vec);
+
+    std::vector<float> float_vec_result;
+    EXPECT_NO_THROW(deserializer.get("float_vector_key", float_vec_result));
+    EXPECT_EQ(float_vec_result, float_vec);
+
+    std::vector<double> double_vec_result;
+    EXPECT_NO_THROW(deserializer.get("double_vector_key", double_vec_result));
+    EXPECT_EQ(double_vec_result, double_vec);
+
+    std::vector<uint32_t> uint_vec_result;
+    EXPECT_NO_THROW(deserializer.get("uint_vector_key", uint_vec_result));
+    EXPECT_EQ(uint_vec_result, uint_vec);
+
+    std::vector<std::string> string_vec_result;
+    EXPECT_NO_THROW(deserializer.get("string_vector_key", string_vec_result));
+    EXPECT_EQ(string_vec_result, string_vec);
+}
+
+TEST_F(MsgPackFileTest, MissingKeyThrows) {
+    ttml::serialization::MsgPackFile serializer;
+    serializer.put("int_key", 42);
+    ASSERT_NO_THROW(serializer.serialize(test_filename));
+    ttml::serialization::MsgPackFile deserializer;
+    ASSERT_NO_THROW(deserializer.deserialize(test_filename));
+
+    int int_value = 0;
+    EXPECT_ANY_THROW(deserializer.get("nonexistent_key", int_value));
+}
+
+TEST_F(MsgPackFileTest, TypeMismatchThrows) {
+    ttml::serialization::MsgPackFile serializer;
+    serializer.put("int_key", 42);
+    serializer.serialize(test_filename);
+
+    ttml::serialization::MsgPackFile deserializer;
+    deserializer.deserialize(test_filename);
+
+    float float_value = 0.F;
+    EXPECT_ANY_THROW(deserializer.get("int_key", float_value));
+}
+
+TEST_F(MsgPackFileTest, OverwriteExistingKey) {
+    ttml::serialization::MsgPackFile serializer;
+    serializer.put("key", 42);
+    serializer.put("key", "Overwritten");
+
+    serializer.serialize(test_filename);
+
+    ttml::serialization::MsgPackFile deserializer;
+    deserializer.deserialize(test_filename);
+
+    std::string string_value;
+    EXPECT_NO_THROW(deserializer.get("key", string_value));
+    EXPECT_EQ(string_value, "Overwritten");
+
+    int int_value = 0;
+    EXPECT_ANY_THROW(deserializer.get("key", int_value));
+}
+
+TEST_F(MsgPackFileTest, EmptySerializerSerialization) {
+    ttml::serialization::MsgPackFile serializer;
+    ASSERT_NO_THROW(serializer.serialize(test_filename));
+
+    ttml::serialization::MsgPackFile deserializer;
+    ASSERT_NO_THROW(deserializer.deserialize(test_filename));
+
+    int int_value = 0;
+    EXPECT_ANY_THROW(deserializer.get("any_key", int_value));
+}
+
+TEST_F(MsgPackFileTest, LargeDataSerialization) {
+    ttml::serialization::MsgPackFile serializer;
+
+    // Generate large data
+    std::vector<int> large_int_vec(10000, 42);
+    serializer.put("large_int_vector", std::span<const int>(large_int_vec));
+
+    // Serialize to file
+    ASSERT_NO_THROW(serializer.serialize(test_filename));
+
+    // Deserialize from file
+    ttml::serialization::MsgPackFile deserializer;
+    ASSERT_NO_THROW(deserializer.deserialize(test_filename));
+
+    // Get and check data
+    std::vector<int> int_vec_result;
+    EXPECT_NO_THROW(deserializer.get("large_int_vector", int_vec_result));
+    EXPECT_EQ(int_vec_result.size(), large_int_vec.size());
+    EXPECT_EQ(int_vec_result, large_int_vec);
+}
+
+TEST_F(MsgPackFileTest, NonExistentFileDeserialization) {
+    ttml::serialization::MsgPackFile deserializer;
+    EXPECT_THROW(deserializer.deserialize("nonexistent_file.msgpack"), std::runtime_error);
+}
+
+TEST_F(MsgPackFileTest, InvalidDataDeserialization) {
+    // Write invalid data to file
+    std::ofstream ofs(test_filename, std::ios::binary);
+    ofs << "Invalid Data";
+    ofs.close();
+
+    ttml::serialization::MsgPackFile deserializer;
+    EXPECT_ANY_THROW(deserializer.deserialize(test_filename));
+}
+
+TEST_F(MsgPackFileTest, MultipleDataTypesSerialization) {
+    ttml::serialization::MsgPackFile serializer;
+
+    serializer.put("int_key", 100);
+    serializer.put("float_key", 1.23F);
+    serializer.put("double_key", 4.56);
+    serializer.put("string_key", "test string");
+
+    std::vector<int> int_vec = {10, 20, 30};
+    serializer.put("int_vector_key", std::span<const int>(int_vec));
+
+    serializer.serialize(test_filename);
+
+    ttml::serialization::MsgPackFile deserializer;
+    deserializer.deserialize(test_filename);
+
+    int int_value = 0;
+    EXPECT_NO_THROW(deserializer.get("int_key", int_value));
+    EXPECT_EQ(int_value, 100);
+
+    float float_value = 0.F;
+    EXPECT_NO_THROW(deserializer.get("float_key", float_value));
+    EXPECT_FLOAT_EQ(float_value, 1.23F);
+
+    double double_value = 0.0;
+    EXPECT_NO_THROW(deserializer.get("double_key", double_value));
+    EXPECT_DOUBLE_EQ(double_value, 4.56);
+
+    std::string string_value;
+    EXPECT_NO_THROW(deserializer.get("string_key", string_value));
+    EXPECT_EQ(string_value, "test string");
+
+    std::vector<int> int_vec_result;
+    EXPECT_NO_THROW(deserializer.get("int_vector_key", int_vec_result));
+    EXPECT_EQ(int_vec_result, int_vec);
+}
diff --git a/tt-train/tests/serialization/tensor_serializer_test.cpp b/tt-train/tests/serialization/tensor_serializer_test.cpp
new file mode 100644
index 00000000000..cd39ecd9e5f
--- /dev/null
+++ b/tt-train/tests/serialization/tensor_serializer_test.cpp
@@ -0,0 +1,89 @@
+// SPDX-FileCopyrightText: (c) 2024 Tenstorrent AI ULC
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#include <gtest/gtest.h>
+
+#include <core/ttnn_all_includes.hpp>
+#include <filesystem>
+#include <fstream>
+#include <string>
+#include <vector>
+
+#include "autograd/auto_context.hpp"
+#include "core/device.hpp"
+#include "core/tt_tensor_utils.hpp"
+#include "modules/multi_layer_perceptron.hpp"
+#include "serialization/msgpack_file.hpp"
+#include "serialization/serialization.hpp"
+
+class TensorFileTest : public ::testing::Test {
+protected:
+    void SetUp() override {
+        // Remove test file if it exists
+        if (std::filesystem::exists(test_filename)) {
+            std::filesystem::remove(test_filename);
+        }
+    }
+
+    void TearDown() override {
+        // Clean up test file after each test
+        if (std::filesystem::exists(test_filename)) {
+            std::filesystem::remove(test_filename);
+        }
+    }
+
+    const std::string test_filename = "/tmp/test_tensor.msgpack";
+};
+
+TEST_F(TensorFileTest, SerializeDeserializeTensor) {
+    ttml::serialization::MsgPackFile serializer;
+    auto* device = &ttml::autograd::ctx().get_device();
+    auto shape = ttml::core::create_shape({1, 2, 32, 321});
+    auto tensor_zeros = ttml::core::zeros(shape, device);
+    auto tensor_ones = ttml::core::ones(shape, device);
+
+    // Write tensor to file
+    ttml::serialization::write_ttnn_tensor(serializer, "tensor", tensor_ones);
+    serializer.serialize(test_filename);
+    ttml::serialization::MsgPackFile deserializer;
+    deserializer.deserialize(test_filename);
+
+    // Read tensor from file
+    tt::tt_metal::Tensor tensor_read = tensor_zeros;
+    ttml::serialization::read_ttnn_tensor(deserializer, "tensor", tensor_read);
+
+    auto read_vec = ttml::core::to_vector(tensor_read);
+
+    for (auto& val : read_vec) {
+        EXPECT_EQ(val, 1.F);
+    }
+}
+
+bool compare_tensors(const tt::tt_metal::Tensor& tensor1, const tt::tt_metal::Tensor& tensor2) {
+    auto vec1 = ttml::core::to_vector(tensor1);
+    auto vec2 = ttml::core::to_vector(tensor2);
+    return vec1 == vec2;
+}
+
+TEST_F(TensorFileTest, SerializeDeserializeNamedParameters) {
+    ttml::serialization::MsgPackFile serializer;
+    auto* device = &ttml::autograd::ctx().get_device();
+    auto model_params = ttml::modules::MultiLayerPerceptronParameters{
+        .m_input_features = 128, .m_hidden_features = {256}, .m_output_features = 10};
+    ttml::modules::MultiLayerPerceptron mlp_to_write(model_params);
+    ttml::modules::MultiLayerPerceptron mlp_to_read(model_params);
+    // Write tensor to file
+    auto params_to_write = mlp_to_write.parameters();
+    ttml::serialization::write_named_parameters(serializer, "mlp", params_to_write);
+    serializer.serialize(test_filename);
+    ttml::serialization::MsgPackFile deserializer;
+    deserializer.deserialize(test_filename);
+    auto params_to_read = mlp_to_read.parameters();
+    ttml::serialization::read_named_parameters(deserializer, "mlp", params_to_read);
+
+    EXPECT_EQ(params_to_read.size(), params_to_write.size());
+    for (const auto& [key, value] : params_to_read) {
+        EXPECT_TRUE(compare_tensors(value->get_value(), params_to_write.at(key)->get_value()));
+    }
+}
diff --git a/tt-train/tests/test_data/tokenizer.json b/tt-train/tests/test_data/tokenizer.json
new file mode 100644
index 00000000000..126e419a201
--- /dev/null
+++ b/tt-train/tests/test_data/tokenizer.json
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b2dd4a144b04bdc21cbf27834f05628de4e6bc511a59b3c1bd9679c7cef7c665
+size 2113739
diff --git a/tt-train/tests/tokenizers/bpe_tokenizer_test.cpp b/tt-train/tests/tokenizers/bpe_tokenizer_test.cpp
new file mode 100644
index 00000000000..1def71591d5
--- /dev/null
+++ b/tt-train/tests/tokenizers/bpe_tokenizer_test.cpp
@@ -0,0 +1,37 @@
+// SPDX-FileCopyrightText: (c) 2024 Tenstorrent AI ULC
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#include "tokenizers/bpe_tokenizer.hpp"
+
+#include <gtest/gtest.h>
+
+#include <cstdlib>
+#include <string>
+#include <vector>
+
+using namespace ttml::tokenizers;
+
+namespace {
+std::string getTestDataDir() {
+    const char* envVar = std::getenv("TEST_DATA_DIR");
+    return (envVar) ? std::string(envVar) : std::string(TEST_DATA_DIR);
+}
+}
+
+class BPETokenizerTest : public ::testing::Test {
+protected:
+    BPETokenizer tokenizer = BPETokenizer(getTestDataDir() + "/tokenizer.json");
+};
+
+TEST_F(BPETokenizerTest, EncodeAndDecode) {
+    const std::string prompt = "What is the  capital of Canada?";
+    auto ids = tokenizer.encode(prompt);
+    auto decoded_prompt = tokenizer.decode(ids);
+    EXPECT_EQ(decoded_prompt, prompt);
+}
+
+TEST_F(BPETokenizerTest, IdToTokenAndTokenToId) {
+    auto vocab_size = tokenizer.get_vocab_size();
+    EXPECT_EQ(vocab_size, 50277);
+}
diff --git a/tt-train/tests/tokenizers/char_tokenizer_test.cpp b/tt-train/tests/tokenizers/char_tokenizer_test.cpp
new file mode 100644
index 00000000000..3acb7902c47
--- /dev/null
+++ b/tt-train/tests/tokenizers/char_tokenizer_test.cpp
@@ -0,0 +1,62 @@
+// SPDX-FileCopyrightText: (c) 2024 Tenstorrent AI ULC
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#include "tokenizers/char_tokenizer.hpp"
+
+#include <gtest/gtest.h>
+
+#include <vector>
+
+using namespace ttml::tokenizers;
+
+// Test fixture for CharTokenizer
+class CharTokenizerTest : public ::testing::Test {
+protected:
+    CharTokenizer::Vocabulary vocabulary = {
+        {"h", 1}, {"e", 2}, {"l", 3}, {"o", 4}, {" ", 5}, {"w", 6}, {"r", 7}, {"d", 8}};
+
+    CharTokenizer tokenizer = CharTokenizer(vocabulary);
+};
+
+// Test encoding functionality
+TEST_F(CharTokenizerTest, Encode) {
+    std::string text = "hello world";
+    std::vector<uint32_t> expected_tokens = {1, 2, 3, 3, 4, 5, 6, 4, 7, 3, 8};
+
+    std::vector<uint32_t> encoded = tokenizer.encode(text);
+
+    ASSERT_EQ(encoded, expected_tokens);
+}
+
+// Test encoding with a character not in vocabulary
+TEST_F(CharTokenizerTest, EncodeUnknownCharacter) {
+    std::string text = "hello world!";
+    EXPECT_THROW({ auto _ = tokenizer.encode(text); }, std::runtime_error);
+}
+
+// Test decoding functionality
+TEST_F(CharTokenizerTest, Decode) {
+    std::vector<uint32_t> tokens = {1, 2, 3, 3, 4, 5, 6, 4, 7, 3, 8};
+    std::string expected_text = "hello world";
+
+    std::string decoded = tokenizer.decode(tokens);
+
+    ASSERT_EQ(decoded, expected_text);
+}
+
+// Test decoding with a token ID not in vocabulary
+TEST_F(CharTokenizerTest, DecodeUnknownToken) {
+    std::vector<uint32_t> tokens = {1, 2, 3, 3, 4, 33};  // Token 33 is not in the vocabulary
+
+    EXPECT_THROW({ auto _ = tokenizer.decode(tokens); }, std::runtime_error);
+}
+
+// Test encoding and decoding consistency
+TEST_F(CharTokenizerTest, EncodeDecodeConsistency) {
+    std::string text = "hello world";
+    std::vector<uint32_t> encoded = tokenizer.encode(text);
+    std::string decoded = tokenizer.decode(encoded);
+
+    ASSERT_EQ(decoded, text);
+}
diff --git a/tt-train/tests/tokenizers/char_tokenizer_trainer_test.cpp b/tt-train/tests/tokenizers/char_tokenizer_trainer_test.cpp
new file mode 100644
index 00000000000..8e1490456b0
--- /dev/null
+++ b/tt-train/tests/tokenizers/char_tokenizer_trainer_test.cpp
@@ -0,0 +1,81 @@
+// SPDX-FileCopyrightText: (c) 2024 Tenstorrent AI ULC
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#include "tokenizers/char_tokenizer_trainer.hpp"
+
+#include <gtest/gtest.h>
+
+using namespace ttml::tokenizers;
+
+// Test fixture for CharTokenizerTrainer
+class CharTokenizerTrainerTest : public ::testing::Test {
+protected:
+    // Example CharTokenizerTrainer instance
+    CharTokenizerTrainer trainer;
+};
+
+// Test that the trainer creates a tokenizer with the correct vocabulary
+TEST_F(CharTokenizerTrainerTest, TrainVocabulary) {
+    std::string text = "hello world";
+    CharTokenizer tokenizer = trainer.train(text);
+
+    CharTokenizer::Vocabulary expected_vocabulary = {
+        {" ", 1}, {"d", 2}, {"e", 3}, {"h", 4}, {"l", 5}, {"o", 6}, {"r", 7}, {"w", 8}};
+
+    // Verify that the generated vocabulary matches the expected one
+    const auto special_tokens_count = 3UL;
+    ASSERT_EQ(tokenizer.get_vocabulary().size(), expected_vocabulary.size() + special_tokens_count);
+
+    for (const auto& pair : expected_vocabulary) {
+        auto it = tokenizer.get_vocabulary().find(pair.first);
+        ASSERT_NE(it, tokenizer.get_vocabulary().end());
+        ASSERT_EQ(it->second, pair.second);
+    }
+}
+
+// Test that the trainer handles duplicate characters correctly
+TEST_F(CharTokenizerTrainerTest, TrainWithDuplicateCharacters) {
+    std::string text = "aaaabbbb";
+    CharTokenizer tokenizer = trainer.train(text);
+
+    CharTokenizer::Vocabulary expected_vocabulary = {{"a", 1}, {"b", 2}};
+
+    // Verify that the generated vocabulary has no duplicates
+    const auto special_tokens_count = 3UL;
+    ASSERT_EQ(tokenizer.get_vocabulary().size(), expected_vocabulary.size() + special_tokens_count);
+
+    for (const auto& pair : expected_vocabulary) {
+        auto it = tokenizer.get_vocabulary().find(pair.first);
+        ASSERT_NE(it, tokenizer.get_vocabulary().end());
+        ASSERT_EQ(it->second, pair.second);
+    }
+}
+
+// Test that the trainer starts indexing from the specified starting index
+TEST_F(CharTokenizerTrainerTest, TrainWithNoPaddingToken) {
+    std::string text = "abc";
+    CharTokenizer tokenizer = trainer.train(text, /* add_padding_token */ false);
+
+    CharTokenizer::Vocabulary expected_vocabulary = {{"a", 0}, {"b", 1}, {"c", 2}};
+
+    // Verify that the generated vocabulary starts at the correct index
+    const auto special_tokens_count = 2UL;
+    ASSERT_EQ(tokenizer.get_vocabulary().size(), expected_vocabulary.size() + special_tokens_count);
+
+    for (const auto& pair : expected_vocabulary) {
+        auto it = tokenizer.get_vocabulary().find(pair.first);
+        ASSERT_NE(it, tokenizer.get_vocabulary().end());
+        ASSERT_EQ(it->second, pair.second);
+    }
+}
+
+// Test that the trainer handles an empty string correctly
+TEST_F(CharTokenizerTrainerTest, TrainWithEmptyString) {
+    std::string text;
+    CharTokenizer tokenizer = trainer.train(text, /* add_padding_token */ false);
+
+    // Verify that the generated vocabulary is empty
+    const auto special_tokens_count = 2UL;
+    ASSERT_EQ(tokenizer.get_vocabulary().size(), special_tokens_count);
+}
diff --git a/tt-train/tests/ttnn_fixed/trivial_ttnn_ops_test.cpp b/tt-train/tests/ttnn_fixed/trivial_ttnn_ops_test.cpp
new file mode 100644
index 00000000000..c43370a75f4
--- /dev/null
+++ b/tt-train/tests/ttnn_fixed/trivial_ttnn_ops_test.cpp
@@ -0,0 +1,230 @@
+// SPDX-FileCopyrightText: (c) 2024 Tenstorrent AI ULC
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#include "ttnn_fixed/trivial_ttnn_ops.hpp"
+
+#include <gtest/gtest.h>
+
+#include <core/ttnn_all_includes.hpp>
+#include <memory>
+#include <ttnn/operations/core/compute_kernel/compute_kernel_config.hpp>
+#include <vector>
+
+#include "autograd/auto_context.hpp"
+#include "core/compute_kernel_config.hpp"
+#include "core/device.hpp"
+#include "core/tt_tensor_utils.hpp"
+#include "ttnn_fixed/trivial_ttnn_ops.hpp"
+
+TEST(TrivialTnnFixedTest, TestMaxNegativeOne_BROKEN) {
+    auto* device = &ttml::autograd::ctx().get_device();
+
+    std::vector<float> data(24, -1.F);
+    auto shape = ttml::core::create_shape({1, 2, 3, 4});
+    auto tensor = ttml::core::from_vector(data, shape, device);
+    auto res = ttnn::max(tensor, /* dim */ 3, /* keepdim */ true);
+    auto res_vector = ttml::core::to_vector(res);
+    EXPECT_EQ(res_vector.size(), 6);
+    bool all_equal = true;
+    for (const auto& value : res_vector) {
+        if (std::fabs(value + 1.F) > 1e-2) {
+            all_equal = false;
+        }
+    }
+    EXPECT_FALSE(all_equal);
+}
+
+TEST(TrivialTnnFixedTest, TestMaxNegativeBatch_BROKEN) {
+    auto* device = &ttml::autograd::ctx().get_device();
+
+    auto shape = ttml::core::create_shape({4, 1, 1, 4});
+    std::vector<float> data(16);
+    for (int i = 0; i < 4; ++i) {
+        for (int j = 0; j < 4; ++j) {
+            data[i * 4 + j] = -static_cast<float>(i + 1);
+        }
+    }
+    auto tensor = ttml::core::from_vector(data, shape, device);
+    auto res = ttnn::max(tensor, /* dim */ 3, /* keepdim */ true);
+    auto res_vector = ttml::core::to_vector(res);
+    EXPECT_EQ(res_vector.size(), 4);
+    bool all_equal = true;
+    for (int i = 0; i < 4 && all_equal; ++i) {
+        if (std::fabs(res_vector[i] - (-static_cast<float>(i + 1))) > 1e-2) {
+            all_equal = false;
+        }
+    }
+    EXPECT_FALSE(all_equal);
+}
+
+TEST(TrivialTnnFixedTest, TestStableSoftmax_0) {
+    auto* device = &ttml::autograd::ctx().get_device();
+
+    const size_t batch_size = 1U;
+    const size_t features = 2U;
+    std::vector<float> data(batch_size * features);
+    for (int i = 0; i < data.size(); ++i) {
+        data[i] = 100.F + static_cast<float>(i);
+    }
+    auto shape = ttml::core::create_shape({batch_size, 1, 1, features});
+    auto tensor = ttml::core::from_vector(data, shape, device);
+    auto tensor_data = ttml::core::to_vector(tensor);
+    EXPECT_NEAR(tensor_data[0], 100.F, 1e-2);
+    EXPECT_NEAR(tensor_data[1], 101.F, 1e-2);
+
+    auto res = ttml::ttnn_fixed::softmax(tensor, /* dim */ 3);
+    auto res_vector = ttml::core::to_vector(res);
+    EXPECT_NEAR(res_vector[0], 0.2689F, 2e-2);
+    EXPECT_NEAR(res_vector[1], 0.7311F, 2e-2);
+}
+
+TEST(TrivialTnnFixedTest, TestOriginalStableSoftmax_AllNegative) {
+    auto* device = &ttml::autograd::ctx().get_device();
+
+    const size_t batch_size = 1U;
+    const size_t features = 2U;
+    std::vector<float> data(batch_size * features);
+    for (int i = 0; i < data.size(); ++i) {
+        data[i] = -100.F + static_cast<float>(i);
+    }
+    auto shape = ttml::core::create_shape({batch_size, 1, 1, features});
+    auto tensor = ttml::core::from_vector(data, shape, device);
+    auto tensor_data = ttml::core::to_vector(tensor);
+    EXPECT_NEAR(tensor_data[0], -100.F, 1e-2);
+    EXPECT_NEAR(tensor_data[1], -99.F, 1e-2);
+    auto compute_kernel_config = ttml::core::ComputeKernelConfig::precise();
+    auto res = ttnn::softmax(
+        tensor,
+        /* dim */ 3,
+        /*memory_config */ std::nullopt,
+        compute_kernel_config,
+        /*stable*/ true);
+    auto res_vector = ttml::core::to_vector(res);
+    EXPECT_NEAR(res_vector[0], 0.2689F, 2e-2);
+    EXPECT_NEAR(res_vector[1], 0.7311F, 2e-2);
+}
+
+TEST(TrivialTnnFixedTest, TestStableSoftmax_2) {
+    auto* device = &ttml::autograd::ctx().get_device();
+
+    const size_t batch_size = 1U;
+    const size_t features = 10U;
+    std::vector<float> data(batch_size * features, 0.F);
+    data[0] = 1.0F;
+    auto shape = ttml::core::create_shape({batch_size, 1, 1, features});
+    auto tensor = ttml::core::from_vector(data, shape, device);
+    auto tensor_data = ttml::core::to_vector(tensor);
+    EXPECT_NEAR(tensor_data[0], 1.F, 1e-2);
+    EXPECT_NEAR(tensor_data[1], 0.F, 1e-2);
+
+    auto res = ttml::ttnn_fixed::softmax(tensor, /* dim */ 3);
+    auto res_vector = ttml::core::to_vector(res);
+
+    auto exp_sum = 0.0F;
+    for (auto& elem : data) {
+        exp_sum += std::exp(elem);
+    }
+
+    for (int i = 0; i < res_vector.size(); ++i) {
+        EXPECT_NEAR(res_vector[i], std::exp(data[i]) / exp_sum, 1e-2);
+    }
+}
+
+TEST(TrivialTnnFixedTest, TestSumOverBatch_0) {
+    auto* device = &ttml::autograd::ctx().get_device();
+
+    const size_t batch_size = 10U;
+    const size_t features = 4U;
+    std::vector<float> data(batch_size * features);
+    std::iota(data.begin(), data.end(), 0);
+
+    auto shape = ttml::core::create_shape({batch_size, 1, 1, features});
+    auto tensor = ttml::core::from_vector(data, shape, device);
+    auto tensor_shape = tensor.get_shape();
+    EXPECT_EQ(tensor_shape[0], batch_size);
+    EXPECT_EQ(tensor_shape[1], 1U);
+    EXPECT_EQ(tensor_shape[2], 1U);
+    EXPECT_EQ(tensor_shape[3], features);
+
+    auto result = ttml::ttnn_fixed::sum_over_batch(tensor);
+    const auto& result_shape = result.get_shape();
+    ASSERT_EQ(result_shape.rank(), 4U);
+    EXPECT_EQ(result_shape[0], 1U);
+    EXPECT_EQ(result_shape[1], 1U);
+    EXPECT_EQ(result_shape[2], 1U);
+    EXPECT_EQ(result_shape[3], features);
+}
+
+TEST(TrivialTnnFixedTest, TestDivide) {
+    auto* device = &ttml::autograd::ctx().get_device();
+    const size_t batch_size = 2U;
+    const size_t features = 64U;
+    std::vector<float> lhs(batch_size * features);
+    std::vector<float> rhs(batch_size * features);
+
+    for (int i = 0; i < lhs.size(); ++i) {
+        lhs[i] = static_cast<float>(i);
+        rhs[i] = static_cast<float>(i + 1);
+    }
+
+    auto shape = ttml::core::create_shape({batch_size, 1, 1, features});
+    auto lhs_tensor = ttml::core::from_vector(lhs, shape, device);
+    auto rhs_tensor = ttml::core::from_vector(rhs, shape, device);
+
+    auto result = ttml::ttnn_fixed::divide(lhs_tensor, rhs_tensor);
+    const auto& result_shape = result.get_shape();
+    ASSERT_EQ(result_shape.rank(), 4U);
+    EXPECT_EQ(result_shape[0], batch_size);
+    EXPECT_EQ(result_shape[1], 1U);
+    EXPECT_EQ(result_shape[2], 1U);
+    EXPECT_EQ(result_shape[3], features);
+
+    std::vector<float> resulting_vector = ttml::core::to_vector(result);
+    EXPECT_EQ(resulting_vector.size(), batch_size * features);
+    for (int i = 0; i < resulting_vector.size(); ++i) {
+        EXPECT_NEAR(resulting_vector[i], static_cast<float>(i) / static_cast<float>(i + 1), 1e-2);
+    }
+}
+
+TEST(TrivialTnnFixedTest, TestSumOverBatch_1) {
+    auto* device = &ttml::autograd::ctx().get_device();
+
+    const size_t batch_size = 2U;
+    const size_t features = 64U;
+    std::vector<float> data(batch_size * features);
+    float step = 0.1F;
+    float value = 0.0F;
+    for (int i = 0; i < data.size(); ++i) {
+        data[i] = value;
+        value += step;
+    }
+
+    auto shape = ttml::core::create_shape({batch_size, 1, 1, features});
+    auto tensor = ttml::core::from_vector(data, shape, device);
+    auto tensor_shape = tensor.get_shape();
+    EXPECT_EQ(tensor_shape[0], batch_size);
+    EXPECT_EQ(tensor_shape[1], 1U);
+    EXPECT_EQ(tensor_shape[2], 1U);
+    EXPECT_EQ(tensor_shape[3], features);
+
+    auto result = ttml::ttnn_fixed::sum_over_batch(tensor);
+    const auto& result_shape = result.get_shape();
+    ASSERT_EQ(result_shape.rank(), 4U);
+    EXPECT_EQ(result_shape[0], 1U);
+    EXPECT_EQ(result_shape[1], 1U);
+    EXPECT_EQ(result_shape[2], 1U);
+    EXPECT_EQ(result_shape[3], features);
+
+    std::vector<float> resulting_vector = ttml::core::to_vector(result);
+    EXPECT_EQ(resulting_vector.size(), features);
+    const float eps = 1.0F;
+    for (int i = 0; i < resulting_vector.size(); ++i) {
+        float expected_value = 0.F;
+        for (int j = 0; j < batch_size; ++j) {
+            expected_value += static_cast<float>(i + j * features) * step;
+        }
+
+        EXPECT_NEAR(expected_value, resulting_vector[i], eps);
+    }
+}

From 3c70880798b265f91b62c29880fcfa5aa73e4500 Mon Sep 17 00:00:00 2001
From: Andrew Fuller <afuller@tenstorrent.com>
Date: Wed, 13 Nov 2024 17:42:05 -0500
Subject: [PATCH 55/69] #0: Disable Unity builds to detect bitrot (#15017)

### Ticket
None

### Problem description
A header file missed an include and went undetected in CI because Unity
builds accidentally provided the needed include in the combined TU.

### What's changed
Disabled Unity builds for CI
Added the missing include

### Checklist
- [ ] Post commit CI passes
- [ ] Blackhole Post commit (if applicable)
- [ ] Model regression CI testing passes (if applicable)
- [ ] Device performance regression CI testing passes (if applicable)
- [ ] New/Existing tests provide coverage for changes

---------

Co-authored-by: Bryan Wilder Field Lozano <blozano@tenstorrent.com>
---
 .github/workflows/build-artifact.yaml                          | 3 ++-
 .../data_movement/concat/device/concat_program_factory.hpp     | 1 +
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/build-artifact.yaml b/.github/workflows/build-artifact.yaml
index f0dad00701a..ea0a012949f 100644
--- a/.github/workflows/build-artifact.yaml
+++ b/.github/workflows/build-artifact.yaml
@@ -137,7 +137,8 @@ jobs:
             # NOTE: may be inaccurate if we have >1 build runner on the same machine, using the same local cache
             ccache -z
 
-            build_command="./build_metal.sh --build-type ${{ inputs.build-type }} --build-all --enable-ccache"
+            # Disable Unity builds to detect any bitrot from not building each TU independently
+            build_command="./build_metal.sh --build-type ${{ inputs.build-type }} --build-all --enable-ccache --disable-unity-builds"
             echo "${{ inputs.tracy }}"
             if [ "${{ inputs.tracy }}" = "true" ]; then
               build_command="$build_command --enable-profiler"
diff --git a/ttnn/cpp/ttnn/operations/data_movement/concat/device/concat_program_factory.hpp b/ttnn/cpp/ttnn/operations/data_movement/concat/device/concat_program_factory.hpp
index 710640be80a..c902e407103 100644
--- a/ttnn/cpp/ttnn/operations/data_movement/concat/device/concat_program_factory.hpp
+++ b/ttnn/cpp/ttnn/operations/data_movement/concat/device/concat_program_factory.hpp
@@ -7,6 +7,7 @@
 #include "tt_metal/common/work_split.hpp"
 #include "tt_metal/detail/util.hpp"
 #include "tt_metal/host_api.hpp"
+#include "ttnn/cpp/ttnn/operation.hpp"
 
 namespace ttnn::operations::data_movement::detail {
 

From 9146db83ab90a2f29beffb081ec33baaf26bc65b Mon Sep 17 00:00:00 2001
From: Abhinav Sarje <mywoodstock@users.noreply.github.com>
Date: Wed, 13 Nov 2024 15:11:51 -0800
Subject: [PATCH 56/69] Update Resnet50 perf on n150

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 133a9edaf3c..916150e52d5 100644
--- a/README.md
+++ b/README.md
@@ -45,7 +45,7 @@
 | Model                                                                       | Batch | Hardware                                                 | fps     | Target fps | Release     |
 |-----------------------------------------------------------------------------|-------|----------------------------------------------------------|---------|------------|-------------|
 | [ResNet-50 (224x224)](./models/demos/grayskull/resnet50)                    | 20    | [e150](https://tenstorrent.com/hardware/grayskull)       | 5,100   | 10,000     |             |
-| [ResNet-50 (224x224)](./models/demos/wormhole/resnet50)                     | 16    | [n150](https://tenstorrent.com/hardware/wormhole)        | 4,100   | 7,000      |             |
+| [ResNet-50 (224x224)](./models/demos/wormhole/resnet50)                     | 16    | [n150](https://tenstorrent.com/hardware/wormhole)        | 4,670   | 7,000      |             |
 | [ResNet-50 (224x224) (DP=2)](./models/demos/wormhole/resnet50)              | 32    | [n300](https://tenstorrent.com/hardware/wormhole)        | 8,200   | 14,000     |             |
 | [ResNet-50 (224x224) (DP=8)](./models/demos/t3000/resnet50)                 | 128   | [QuietBox](https://tenstorrent.com/hardware/tt-quietbox) | 32,250  | 56,000     |             |
 | [ResNet-50 (224x224) (DP=32)](./models/demos/tg/resnet50)                   | 512   | [Galaxy](https://tenstorrent.com/hardware/galaxy)        | 95,900  | 224,000    |             |

From 3fcaaa3c6fedef1cb278cf404436b13ecf33f248 Mon Sep 17 00:00:00 2001
From: Felix LeClair <fleclair@tenstorrent.com>
Date: Wed, 13 Nov 2024 20:22:19 -0500
Subject: [PATCH 57/69] [skip ci] Add GEMM techreport to explain WH performance
  (#14585)

### Problem description
Customer clarity on how to benchmark their card, what to expect

### What's changed
New tech report


### Checklist
N/A

---------

Signed-off-by: Felix LeClair <fleclair@tenstorrent.com>
Co-authored-by: Yu Gao <145494740+yugaoTT@users.noreply.github.com>
Co-authored-by: Mohamed Bahnas <116673264+mbahnasTT@users.noreply.github.com>
---
 tech_reports/GEMM_FLOPS/GEMM_FLOPS.md         | 113 ++++++++++++++++++
 .../GEMM_FLOPS/images/TFLOPS_WH_SQUARE.png    | Bin 0 -> 100514 bytes
 .../images/effects_of_precision.png           | Bin 0 -> 358138 bytes
 .../GEMM_FLOPS/images/effects_of_shapes.png   | Bin 0 -> 175342 bytes
 4 files changed, 113 insertions(+)
 create mode 100644 tech_reports/GEMM_FLOPS/GEMM_FLOPS.md
 create mode 100644 tech_reports/GEMM_FLOPS/images/TFLOPS_WH_SQUARE.png
 create mode 100644 tech_reports/GEMM_FLOPS/images/effects_of_precision.png
 create mode 100644 tech_reports/GEMM_FLOPS/images/effects_of_shapes.png

diff --git a/tech_reports/GEMM_FLOPS/GEMM_FLOPS.md b/tech_reports/GEMM_FLOPS/GEMM_FLOPS.md
new file mode 100644
index 00000000000..98e12603b64
--- /dev/null
+++ b/tech_reports/GEMM_FLOPS/GEMM_FLOPS.md
@@ -0,0 +1,113 @@
+# Matrix Multiply FLOPS
+
+
+## Introduction
+
+Across many families of neural networks and applications, the common denominator is the use of the generalized matrix multiply operation. Depending on the size and the precision of the input and output matrices, different underlying effects, and more importantly performance metrics, can be observed. Classically, this comes down to the hardware's ability to execute an operation, and its ability to fetch the data for that operation intercept. 
+
+If the data is small and already in registers, the cost to operate on that data is negligible. If the data is in cache, performance is dictated by how quickly the data can be funnelled thought caches to the compute units. In there worst case scenarios, the data needed is in device memory, host memory, or stored on a disk.
+
+Thankfully, matrix multiplication requires more compute operations (2N^3) than memory operations (3n^2). As such, for a given device, there will always be points at which a device is limited by the underlying compute units, not the underlying memory system. We call this point the roofline. 
+However, said inversion point depends on the size and crossover point of each cache level/memory technology and the datatype in use. The amount of 8 bit elements that can be moved per unit time is nearly an order of magnitude more than 64 bit elements. 
+
+Therefore, the peak achieved flops changes based on the datatype, the size of the data, and the layout of the data. 
+
+
+# Test it yourself!
+
+Assuming you have access to a device (if not, they're available for purchase at Tenstorrent.com!), you can test and see the matrix multiply TFLOPS results for yourself by running: 
+
+`pytest tests/ttnn/unit_tests/benchmarks/test_benchmark.py::test_matmul_2d_host_perf` (available in the ttMetal repository) on a N150 card. 
+
+Alternatively, to test on an N300 card, use the following command: 
+
+`WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest tests/ttnn/unit_tests/benchmarks/test_benchmark.py::test_matmul_2d_host_perf` on a N300 card.
+
+To do so, make sure to have followed the setup instructions guide available at https://github.com/tenstorrent/tt-metal/blob/main/INSTALLING.md 
+
+NB: You'll need to comment out `#@pytest.mark.skip(reason="WH didt hang, need to skip CI and run locally only")` line.
+
+## Points of interest in the tests
+
+The parameters of interest are 3 fold:
+1. Dimensions: the sizes of the matrix on each edge, denoted as m, n and k 
+2. The fidelity of the computation, referred to as lofi, hifi2, hifi3, and hifi4. This affects how many bits of each input datatype is actually ingested during the computation.  
+3. Datatype of input/output space. It has been shown that a network layer need not always use all of the bits of a given datatype. But some layers do need the full resolution provided by a given data type, and its higher memory footprint. 
+
+For example, when changing the precision of the matrix, for a given size of matrix the output performance is expected to be different. 
+
+![A simple bar chart of the TFLOPS on WH when changing the precision of matrcies](images/effects_of_precision.png "Variance in performance of TFLOPS on WH from SRAM due to changing precision")
+
+
+
+## Operations
+
+### Matrix Multiplication
+
+The WH matrix engine performs 8x16 x 16x16 = 8x16 in a single cycle. \
+This is 2*8\*16\*16 = 4096 muladds in a single cycle. At 1GHz, this is 4 TFLOPs per matrix engine. \
+The 8x16 is the smallest matrix that can be fed into in0, and 16x16 is the 
+smallest matrix that can be fed into in1.
+
+If the input matrices fed into the engine are "shorter" than 8x16, for example 1x16, the engine will still perform 8x16 x 16x16 = 8x16, but the effective throughput will be 1/8. 
+Thus, for 1x16 x 16x16 matrices, the effective throughput is 0.5 TFLOP per matrix engine.
+
+MATH_FIDELITY is used for higher precision, and TFLOPs are calculated by dividing by the MATH_FIDELITY value.
+
+LoFi ->  ~4 TFLOPs \
+HiFi2 -> ~2 TFLOPs \
+HiFi3 -> ~1.33 TFLOPs \
+HiFi4 -> ~1 TFLOPs
+
+### Peak Machine FLOPS
+
+Each N300s card is made up of 2 Wormhole ASICs. Each ASIC provides a usable grid of 8 * 8 tensix Cores. 
+
+Depending on the fidelity, datatype, and matrix shape chosen, different peak teraflop values can be achieved. 
+
+
+For example, running 100 tests cases out of SRAM, where the shape of the input matrices are m,n,k, the inputs are in BF16, and the resolution is of Hifi2, the below results can be achieved: 
+
+```
+m	k	n	inference_time_avg (ns)	TFLOPs (avg)
+
+512	512	512	2.180337905883789e-05	12.311644689367128
+512	1024	1024	3.8516521453857425e-05	27.877435019315975
+512	1024	2048	6.270408630371094e-05	34.247905911562704
+1024	1024	1024	4.348278045654297e-05	49.386990101661326
+1024	1024	2048	7.58218765258789e-05	56.64548930721963
+1024	2048	2048	0.0001335597038269043	64.31531626584545
+2048	2048	2048	0.00023612260818481445	72.75825604362807
+3072	3072	3072	0.0010478639602661134	55.33357448544656
+4096	4096	4096	0.002201988697052002	62.41583058805059
+```
+
+
+#### Square matrices
+
+For most hardware, peak performance is achieved with square matrices that best align with the underlying hardware, for example WH performs best when using Square input matrices, but also when those matrices are of size n=m=k=2048 for data types of BF16 at Hifi2
+
+![A simple bar chart of the TFLOPS on WH when using various square matrcies](images/TFLOPS_WH_SQUARE.png "Square Matrix TFLOPS on WH from SRAM")
+
+#### Rectangular matrices 
+
+When deviating from Square matrices, the total balance of compute can be thrown off, lowering peak performance. For example, processing matrices with equal amounts of elements, but different shapes can reduce peak TFLOPS. 
+
+Given input matrix A of 512x1024 and B of 1024x2048 to produce output matrix 512x2048 requires the same amount of computation as if the input matrices were of dimensions 1024^2. However, the performance results are measurably different: 
+
+```
+m	k	n	inference_time_avg (ns)	TFLOPs (avg)
+
+512	1024	2048	6.270408630371094e-05	34.247905911562704
+1024	1024	1024	4.348278045654297e-05	49.386990101661326
+```
+
+![A simple bar chart of the TFLOPS on WH when using square vs rectangular matrcies](images/effects_of_shapes.png "Square vs rectangular Matrix TFLOPS on WH from SRAM")
+
+
+
+### Understanding device scaling: SRAM vs DRAM
+
+When a tensix core executes an operation, it does so by reading in data from SRAM, forwarding that to a register, executing the operation, and then writing the result back to SRAM. 
+
+Each Tensix core on a WH ASIC has ~1.5MB of SRAM. When feeding data from SRAM, each tensix can operate unencumbered. However some problems require more working memory than is available via SRAM. When this happens, Tensix will instead map data to device memory or DRAM. Accessing data from DRAM is slower both in terms of bandwidth and latency than SRAM. Simultaneously, because of the interconnected nature of the WH ASIC, a clever programmer may often find that the result of one tensix unit is what is needed for the input of another tensix core. Instead of writing that data back to device memory, the data can instead be forwarded directly over the NOC. 
diff --git a/tech_reports/GEMM_FLOPS/images/TFLOPS_WH_SQUARE.png b/tech_reports/GEMM_FLOPS/images/TFLOPS_WH_SQUARE.png
new file mode 100644
index 0000000000000000000000000000000000000000..6f627b6e979d6d44eeafa3b46335d46e97b0685d
GIT binary patch
literal 100514
zcmeFZcT`hf*FOk0R8$mHM0$-%34+ogC`fNA0)h|)5s_X)O;l7wdXOe17K%y<AiV{Z
z7En-nM<8?v5^5lUIRU=)nfIMp^WUuB!&;D=Th6&>pItwDZ-NZ;)DIs#eUOfh?y#nY
z>NPsLJ$iI>yLtBS17{SiA!q354k_EJs2FIfsPGuLI^DCix2B`h2#PjlFu9IDnQVrR
zP`*#UC*{e&o?~a}Q=a^|Jhxx!GBeN1Q@bzKgo&IhK53|0^(_0v858<ke$SIN1FVb(
z8%j-bgq^M_clwX|F8I5R<2Dn?&GC|MjdZU)AM-O?znh{X>|dxpe+jubuZefR#>BJx
zkinxPIit>QSH8V}FHYBJu)abf?o&>gz2W<z1Gc_t_w(1hTL~S{)uZVo*Am&I+;nu<
zu@}|ebVpOmyB8HAkAFRwUoEcuoK@SUl*3rt<s-*jWOsE<4v*t*WzDBwkEQQ^?ZZY|
z^mCMJGnftRx?0b&pWfh|%1ly}WHf`z!Rn45YI|}GF_CKldTWCht_NO}O<cSqf(gz%
zLuPcn(w!K;X(P9Rz~f^Mk#nBh5LDsd;7^kL)|-|2;mx(oT`m~$i+s$My3b+zy(D~}
zU)6dhAY<Vee%jgXrnrrBH2qZl$ZFtadpeib;_`etpVQM0?CL)HmGg1aQ9|wrPsdZQ
z^Dt>1qdT886)vg`F*@k5e2P1`!J#g)AiT7sj6Tt9IDu>W_36Y~+=EMMxA>}$KmWe4
z_rS5E8JA}w5>Bvx%waQ*`26Bj-Z5><l=%b|j<2ly$oQpP$_1C(Azvy5Ac*^wU1^8t
z<u5;48MUt7+qe)=;3Av1I&>?i(T`UWsS${gSF*n^rcjA`=tneRf}i5f_(A4rSO|^`
zuQ~g6=FC$?$A;!3Xj0ly`SG#3bG5?<yq#4VmgX;9wJ=On&@1POVqq2xXModxp<`U#
zEf-Ps>R{Sq9iE8^yQu&KDOtIS-ZJ9IYL&(3L!lZkm-Y``JbC;fjCVMoZ^#9iXQ5Od
zNa?&|ge+jN%^UFRelGI+_%O%OgLy+0s#01qGothgk3xQN56I@Y-YM_S(OTbBgjFML
z;vx=Q7f17kyBF|kThjf+oYFZY(DQ4pSeh6V_&EcbsVB1=!TqWZ;rr$KWysL6A-Olf
z&b}{R8&Iyl9D2<-#c<9wa?MK2S^NFu<Mkm61I}|Ja>YtHbxCiyg^0Tq%amhMpPRip
zv8LaHO8Cr_@g|2~9(|RxKM&fyHXix0@T1t*b1k<v<G8&uKYS(MJG?@f!7QmPd*>Xc
zb~BrW%+@toLUI?nj%6PBkvn1{*%8JWJv`LEq*_?^th}av82?!AF>00F^ro$l8|O5K
zFDI^kowO>MGIzh~)wx$^Po~%qe#ZX}u1f#(^j6?icZfQ~hAw50l{`JIn?F<bXd)jq
zesY8UVdwnk&!4{zmPCxu&3FWaZ@!sJyto@F_mW-)@m%=Wc%$qsAKt2>C#U^d79SoH
z-`(M?q*ryqYZv?|-DPEg=esp;ON1!xCc2%EJ8~~z@9R@9AD_5=`28OC&nI(s&+J=Q
zW|7&Y_gL~5U-6;rfHLP5_W;({+~vD5$M*2@8a{b8!CU)$uVtVN@1bh;1Ik|_P6-^)
zywq}xm+#U&e#XdaDTh9&O7R!eTM&-AGRua$^A$w~YCuV+$xkfbAN=_A^xK$;`!63!
z{4}_5B<V@oyVf7O^7orRGkSe-LM;E7VvvO@_MQ0zqdVR1IEl1?_}j~;c3a(vG?9pT
z7L;3Ja=<98{q~Xx<E`-w6V{GE|LP!_!|5Z-&I}AA^D?ZMXU#RS!_lip%8xmH#SSyg
zBPr~Uw8VM3jw$T7|KfF7?vC7>I0L;dH5m<=!<baZFY<hm?^W{oN<Z724~o-FymCwH
zmcBhtGLI)SHw%M?0GbOQM!51qz(vD5#j-`NMXbf8<-|4V$pCa7Iu{*v?e(=hw8001
z54UX7Th6BnYc;=AifYH7yvyV&5U-xBX`wfu<;V-?Lqx%@UpQlQ9IC}37?t<%+NBTa
ziD`ZB%C8z3N!{=?4l_D=rr9{#VBnpf9{CMP{e<obvlB55swaXPB%)`}K@1%;%HLf3
z5Mu~`AaOCgG{jU}@ru7P^|jV@4+HBAlHvFVqYqLaEw1{1P&4{uVD(YgNZ3HwxEJfG
znWdkU>G+}P+W7nC8`x{G6y$Y(<$$#FYW&yu)vZ3%>V+<xR(vI(tNoo$H~C)V*vaqL
zUxnMA`W{mI*2R|5*335Dw%=4$R9LjoD9z|Yhhx@}ETn_3gU#%oS+<@VJ?aHz?&6%<
zoP~OYO|D7)a?<Wfq2e5$IX-je-c_YO7v>+B4r^O#ADxbwkeaaQq^78MGX7|6G5gS&
zf`D^(7PW_WV6t(SHuI5#8d;Ia{3ZO_YD9MLI=YKF7oVSxv(Bk`-G964N4<Ex!QJAf
z-!JcABEurFSJQqzcxR<nQG3D4$?DPkgOQQi=E)v+A;koXOHS`r5>CbKNzl-dN^p<!
zlPZu1l@`6=AYS35cHSy3H9kwq?&7qxv=oyx#=*h{S@+ofb54DZiJeQwa7Ajg_1jUt
zMxjP7VRPZa8;@=zwJX|vEC^q^FnxO3PD+*inQqK?pM=K3_gTRX&m5v>1?AhC+VyT1
z4P?6=zw>ha%+UK+SEXlf%)~E~mpzu5INdqEa2(`(#bFU9#hs+!Gdl6<9Zt4X=AOKx
zN4dP3n@)Y)m-C`l7V_P4E=eB~vqMwE4s*ym*)Mb%H+eSsjUR3*x^-V{NG$l4sd;5-
zX~_%oX>;HHZ~fQ$KK46G<e$14k{aSh+=GCvcoWt8U_Qk@1n-%J)yCEV$ws$yn`W5|
z7mABno#L{@(y*7q@)Rk}H{HvLL?oS`A6ji&RUlnl7$rL{_}Av&xhH`*4_}jA3!)xb
ztFZCT8OzaIwc5C}A-KUw|B8N|KAN6sm+tP?-P60)18zOOw>N3O(4N43*h7_gsk?67
z1v48*iy~xs9p0RKllvz6P52l4uhn-%hwgi6tbY9Md+g+~FVE97j^e{^o$!V=oO@u}
zHgw<8-O&BUVr$&_^PjAmYFjMRtBE20tmCXj3Pi_pk8**^i<d<2SYCGcvaY&WomZVb
z|E{i?4Ru}<rWw(esH3H)F%Y95&U^dIs{{Qk=0Yd+%g^H<w1jAmL#FkclKtFY^c-(`
zNj>}IW%bLXvv8Tn#M4d6=j03I3NVes*`t-e$Rwi$AF>5SV&l^VY<^yTvO9M1#$*MX
z65A-7|E`$V*M6UqlyS2fThFY|oV^*)9n^hTR!jD*Y~YhuUqtTo8ov7=ndbRoMz!W`
zwk=m{ht4!>;ESSBzLF@{goXTH;eKV+rlerPok|4q#zx@ef!3AgA91{iKb2ZN$I$W3
z2X`?d6P`8?H$6ylknsIH6X+J_Ce?gpEUn2Q$wmr$-(rRGnk;|P;N)G=)rudvIhns_
zzf?aJz4GkJLNM1R^P;wS?t|Ro)iJ*2E9|Tz!Fz+{V?xaH9PvfW7Wk*v>RL!GQ0)?J
zC`E3p^97tCHdS=mgjnkL_2BTqp%?7J{zk+zt!ISK_?DmZyD{=$Z3^A))TwjRt+KDg
zZ^X306yKBmBIm`J$Jn${=K8hxtKJn2*pF{|Ui)Y+tuK`?8B47?z8}0_oa;W4uKQTG
zNQ%M1F*7&w?BL}-G;VaV<4Kkc%8FuCri(J0EX4Lc?Je`Hp1Gglk&)GJo8#uqX$tu;
z-G9-gbF_b>EMa;UNaMtXmqdDB0pH{~-ELni_(Wv~HH5mvAacZzJB3rkR`s-v)Uveo
z1&@o6iwLfRvTZ%X-LAiLz7EZ*>x3C{wkwrVPK}!U%>6vFw~j&Yx!z)u4vbvvp*7uq
zeMH~fF4%EehSO)QFTP4Ly-A{(3pVaStoIII2ow_+yI}5K(bYw4LH<U>4l;QbdtR_`
zEo<}Zrn+j67dKfM`(-DVPob8+U0f~*?&Qx#3_QoVOpjG3m=}2!`8e7PB0V6RP2*(?
zbH|9$ea1NWM%AxN-?^nw7u-ucIsGR#bEsEm-j#)ygu@l^{-o5hiL*<#WXTbm0!2MV
zb)Vh7P#ETS`TEm|n(@kE*u{-Zq%D<H1+PREtc1_aAq9~aSA8bgE2Mh|<!}z1&rf^(
z78~y$UlHV9RGL|>U-eqPKAdiYNk{See^`C6P%vsXQREvS8xX)37RSGSqU!dQ))qRi
z3QoEQ%ybvLl-}t?U^egWjl|rv-cNyu!1x<p2-7Y7-c&4Q+UW1wbGzBdi}HBw0So=}
zx4VvxANi4M6*&Flrmko9%aeQx4E8TJUcHJsxS<<~kwraUA3v#aBk6PVERJqH-0z;g
z?GI-VQoOJ>(Y&XtOLq<&@2A^Cf0}MDIHCu?O7!f19;?xxq1(N6eit2Gpe@~=oqIrN
zvh@?O^L_hsH~JYJ131GBe!X7r`s40Bdarl?alD%cT%)^mT}4w9d|tP5wYGM2yYJ*~
z-aHuwP8@L7xamemcT$k{ORsrN;5T@uPFq70cN1M5IV&dzu{(F2EUm@79B6Bxqf_vb
z1BVXQ?ss^+9PAz4<h&I5w(gJv$F$Q>KAx>x-0c+kOmq!+RGeI`d87b4iSsEP<l*5_
zaJ_p^?wYFF&g0;lBHw*?cV{^$6oEj9Atc3|Ty3D|Wo2cd;u25^2~lu|sGGN=`yDS)
zM>qcMLH-y=)!NO<)z;bF*2$5FHtrouCl7Z;K0ew+fBtO0r?r>uUo$zn?JNr{5K6lO
zJufB>{c~*ar~>V*oPn*Ewf#+1TL(ZhFo%+ixVXaB{r_?0uNnXLq{&}To<A=x@%Kmn
zcIn@b-f**aRdI3vbGj@2wO>0A|NY|5g9=dE(*H(^Z8~qA1%y^Qr~v(Q)07TwaBhXX
z9JZ?ZhG0Jdm(hOcY5W8tUfL(^_c_8h-)lb|oid%K>Lo)j`kDR%zZ|Z2ug))(6&qhZ
zBWV4+i22RSXD~q@UA#dW-!1Qp=`U5^uSADL6WY%>TEDkoK783r#p%9U+P<?dCGPHD
ziSgZB?;cmeEzEdySoq3}&$um<{hQY}2P>9Mn@#QJng?*L-QoVr@un}!IiP!vD$~*b
z^Uou$M`xKKib94w59xO8|L30%bU`e7d;WPp?QtIUht--#4z>U5cG}k<mZpDA@ZSgi
zN!0&R=^qUBUtapB6#c&tpb_4j@8x7gc8TaVk*N=}M#l>f{7LH@eX^d>5`Pn6HhN|V
z+YzHJk+0R|c`1|IYa*!0Jf%Kd{n0?}Pd)H^uSVi3UnW0I{6nIZuMVd))b<Dj$7m)R
z9uqkEhcL%^?-J0vH*nr2{FpM`BclVy1@$h_zim7x5AhevB;QDS%E(&U>sTf#)8E$i
z(k%ZB#hTC7cdZpeO#b}@QViQ9Hxnb(xm=_MVyvCbV@J<rWDdvNyvHVu+VDe+Cn3_@
z6lQJp-5SNp?49Ks3<rGYDv0SY>W8wiS9UEWQb@dZUo8}w>r0BlaLjJR;wUxL5$?_X
zwe^K!1#Q=eY|UQJ5-!>qiPsvvXun}>KI&L9+R^6Tk$H9{(?+pTaa^|9CBZ&Vv(#Lw
zQj5%lS`AolOIM2S;dHI%y-_N4dOcx~Qk&%3VAHpcJ#%h=GA1o8CmqGRtr3-P0d3b8
zXhdV%N5>e!%0Jp$oGH1s62_v*p|LBaX?k@e9KpR>#n2ZfPb7#H|Dv?0_$?$1x?_F&
zr~8^w8*VFaSu4)WBKlnFN&=XaiWg6v_WHb+wRojs&Yy_DttS~-$Jk*D3fBoi7<c)Z
z9F;T+V^47N-CpRA_Ft||EpC0EI+2C)s{ssU70W%srxrq;{Jv5<BR5AKDNCWBms!hy
z?Ipf{7v|dQP-LD{@!oYhtI(4b?wTt<^n|7F5(B5<aizTFKyIXr=E<y4F0Wt6l?=G~
zc=Ag5c&NQu+^L;i&I5a#$CbIY?vQxp`060c(fQ6<G1~;OX(2sg<vMXuY@fVmH|cyG
z#|VKKD1m2?A1kU{%F1tsBIg;4eyXvj>Z)`5=gTktP;oHHrvSo~@rTKL;Y7^jI=(3p
zzW4oNr_MT_^Yj{8Uj44Tpv%ypY+kFLe91yH6tfsvjW@0&o#2!+PxkDwb;MDZ6SAYq
zYzTTRB~hbAIBJ&f;!pKfsrYr@!S(E3pJ|6QtH$F^G9w`u?tH2}Z6|>E)Hgi#jctyB
zzA@=M1DDt9-Uk_3eOFk`4ezlAUD{^&NBj0@Bwl#<?f90|E$tO>d8>JZttMtVyl3{N
zv<n}9eNBa*wK^PqKSExnu-~&cU#rq}{xcKOAfYw2y1w(ak*RI!WHmmq+pW)|6CNAH
zn3Y-3Vd{t%jCGd+W5pTxy>}$`l@J@@8?HV1Uk(Zv>!a3MiG!Qe!SuQ0NGy{%DL}i=
z_lQ^c`ugBza_wxz%0R2XyN>SKw`9p)b5#G5ZNW@}-|W55kM|P8QIvOOzrQf{vsV6o
z>TbU4u784q9G)rHY}|XgxNdW)*vzHoM1Q<Za(p5F6v^lJXNFd67iC;s3BB<cUESw1
zokcvW@2%BivDgemP`_yGRkJIi-y?m!#TM@BG=QMsHuxOvGE36dwOtP(e+O_E8{^j6
zuas%Z@92>WJj%l<^mo&?+=lm1gR!uv9}}-`|FpKy>9<LIQX3`bZS+(jLB)c5?MGl=
zozik{KF6AR6q75Y+A3V}<iP_S?YfDS#dMepw0kvGlg~2J%xdXk--;L|*$li$fLot?
ztFB{$0R3WJuaiFJJYTbNx7SE$x>ciNnSkoRVw!X+^)9&`i@Yd`@tOMIxJg~nZyZlt
zfH?xC5zp0o@(66kc_sIq{Ij(ig1yPc8*807;=u1`(kacT$g%)Jx445B-ndw4V?MlW
z)}kzFy{QP-`b_s8JJ4c)>EpbBj$}bS@4f_t`*r`WS2FQh#EX%PNdN99&qeLZ%pa7A
zIher^iCr%FeeT1A34$FgFvf4<L*HlTZtK|81RoFdes5b6PgmEpX^?k!V@rf+Vb>RP
zpUKqLp|u-H5}KH3W8{mn@nqkq>c-&yg3b*APOCw}kqGYvJ^e;kL<8VZDaAt3dKG63
z`I}AJ4YXmz_}7=-4tbMl#7*QPe^KWE-uOvnBK1YYVywI=Av+94ozy38tj{6zeOjXW
zaMWI0Dm*F|K1p7}qGkzXeo~GZ)#oJ)7a|xW6q|^F`VFZ$F3)twNAyM!TEo!}k%0$r
zGTy(p7_^8&YD`BQ!J7-A??=f$AS`9TsSS8HbAlio+9Ghv^fPS7IEga@Lh;|RxwfSQ
z^8lgFvJVC^rfRr9u{Y~wfmq`{x>1)pA-Jx*OD}TBV^}TbGqJ)N?q?4~HA3gAEx=wo
zg6sMtjRP)xpPT)OZmrnv`UP#|=K5edbyQgA)gE$7EE?OQiqsrKZH%K@#*;Oehc>CW
z&DiB|s#~WyA{95me;yDr+m|{G#~j#f-lXUwEJ?q58Myt<T!#t+avUU<FH}xS4f-wQ
z>sF=-An^6Z&jsk2wSL!g@}b)zWBvBp&K7j)HKJFKLT>hSwrE3>luQuBK7TZpnJP<_
ziyH9fcd-*>3aXf8Dk=eH;wuriF^#h~Zuo-?yJ%!kw$a)m18J9c5KF>Fjx~y4l2Cm_
z+Qrp`bR+2<Mfa5j7VQ^De0hAz@5cDdr)E#znOt?b5Wi6CqJGa$Dq}ID#!jAt{ww+B
zwGmicy<NHhuQQUAA$J9l*oPSO$1hwr(oA+E=@*U0n3V=YD`jRklji*D9Dqi&^#+<o
zgPDmT9o`>F(86IqHOaNkme`C>Uy|zPAK~)<_LG>y#3`j%^9e(>FKUvTE;ft8T&^7L
z(0i;>+2Pi#xRL^66Gf!fdp1t9U{+Pvv{hS^SXnJL@?y&}uqAR8QH0Hnxy_ayJ8XrN
zcgJ_TKL3^egzQGBI9>r=h0Yc9ehY6#k5_e72PrV9aUBuX<SV)Ulyyg6gcv=n#^0yL
zbJ8rzYvn5a#N&dO?-om(mz#BFfy~<^&b*{>%#(vrVXIwMVY;{%DNO3^vr(y(kKg16
z=8vpYLzK>E6$VA+;Xd*s#Ojkyl1NbpY^|GM%X%j!HzNsmvPdls;evXEp*)hfDRn6+
z;D`^3^r?zlf4vdAUOpe{<nAgR<ge*^r<GtQ1=UCI+f4Q=xWCkHT=YG)!O4HxthG!=
zVD(KPmlnp2&)VBLi{jcmxUnp~f+pB$k=z>cvZ38Y{NV}`=F}l3)Px1O!8na+)7<U*
z#a8TM`C=;6!x-*g@GM=M{Oac`yX?r2**+WC`puQXq6*QhSQE|UdSgFA*lDWlz3zSF
zXugsJRXB23wRV*<4#OzzUj>ehF0SKUuE@6jQGV?W80ve1BtMOQ%6sWG6RVkda|<O}
z4_yS@gqD&0xNy7mN8tn~<c$=L_>A4rc{;1e!qj0);yyJit?O)Z<md{mF04+I{K5gd
zh-D2(mROC35bj_}dA-^AU7B11gZEuX!BW8c2#jKAn1U|UODZ^4lJBhm7wF<Fbdxd-
z-*l}Lf(83EoJ}vrQAVMyONBh^epU1gDF;q%cJoR##R$&xAs*C2e`}ys;mc&nwbrH{
zA#L&%0q>@u^0Ds;<JCGAf!r~DvM(t!pUMfcf@qDOotls&!od2#N?P;9NdE|G`S1(j
zzIzD{G0(d0MlDB<W|tACnq<cl^QRc$Iz$<AI5j;XT%K6GX!APtGV?|BlcA?O0qHvy
z9v2BY-+Nm=>lxl(gD%(JYNx6;zGi`tLKw+wf#nBLH*C_A`u2`vFphJj&8SKRvo_uD
zT5>|#-uQk9qab@E1yy3Ff=QZRz-;zaOk9C#Sv3ghE%ze^FR<|>5$?PvU9YybK(GE_
zh$U9&lRi~1R3>_U3W|E`k~|+`Q`)Kzb669GA}JK|LhJbZTWbN@{U?mghmi9N@Dw~N
zI|T7o<Fe&wv~gkk=RBqB)+2;VnVlO1Nc<^siM2OghSb`$wg$*Nbp%G)zfvO*#0w?k
z?|D}Sl@r(mC2-|>GDCY1KQ1w(T9eg@S|y|gVK7Vmf+4}9`j~nOBD#J)%0&hCMue#+
z8uKV77F8Ft!55K_H$T4?Ep&7I4?2BkjF7P%NMzgMt#kXUnIRkfLK~$zEFV9$GRe7E
z&OJDzlPJ)9Ad+#UtJ8d@@IgrvCxjH8{1q(^AuIuXA+o}1>3+n#M<A4vpEna#e!)e;
zuH&Wece_IE#7{UM0r*w#j!((aN~eq}(eDsh6UDiy3hN^uGqPPt3t!PyUW7>Xmft^&
ztuK#c=C^3sx@r)oB5d+ku_($2HxYO{mFqumz+9y%Yx@u*q0(~XC9d_nAeIlY@Gp^N
zFbgzAVBsR(*qI=x8E2!kz=rSiMpt(fH6i3@id9TE(<%lCO$sAAMN=O$mO~Qu32bUy
zzJe1XK=cU0N<Ohc6#E6_`fPf?3E#HcUbP>1dLeX4XSc}Fz&K&-ky1Cze!c-=!7&MM
z>!p8|AQJ@RD4f}CG`p<TEu`15!7*Plo;>2&ZQbeq9&EWHbt)L<c;CD-j}MViKb`2-
z<RL%TM}B%*H_=dctzL$7BFW6D;>L?fY+E(@9BC=JQ-JAaG(0{smU1p!;I1VW$fFYZ
zz))ZdLv^xCt+C{rJeX_Ua|L--xGOTVvUX|`d&GaecTg{bT8$r9T9I8@F2X4~v^uPQ
zvr!ruAU7dOAuuxDmiY?$3aQ7nF$;~6s}#^YofYM$sEJG~URkQz&7?E9Gy>6^;S_`T
z12^*_0#}Bhd$0rZygm=KOXyX}V_VvV^U+&*y}RE^51g2k|5lB^gYRck7ev?+DyD?E
z?T1rlj6DcsPSOcJZ<$F(`08Z{0uB6-{GV#JZ`CYE9Z&o3d+xa2qTTEguC!(n|0c~!
zVE#2+{@j9duT{8w2d7W{8{5rK&05Ir@z5mray-`$;d-ft4aYpZ&}5Qu4+Xcmjw804
zmCKRWTE8qhhN~Qr6`*$CA;*tE6ow8=CW)@mS&+5Reit?2q$MG;H3!)=QX%#VQtu|}
z<4rUKxX+teZzehU(vv(**9?p`Bc5209u6Fq?h|()u|Sq%{hD-=&%VPwJAOJ)j%J;K
z0XkJl7Zf$bF1dSa`P{pZ!zn8n(%r9SOy{e=>LiA=8hfZ-Mm{z{)r^6N!h+;h(yR7J
zto-*Ef}IRj(~5$vm?>T*i@1%|H%}>@=7hAJLD<&opJ-eiq!JM-ysaK^4dk7%tYB2u
zSh-X3xTjCQUaP~K$CQvc;I~_?gxDzX`)g#?a(MlEM7)k!GhCZFrJ1>k$gYPHGIMKB
z+ZF>kHvEVYh$3Vtp*14m3?%Mf=YpbDm}Uih2*T><s(Co+hY2~A-<up11p=&KqNtO%
zc8?HjFcTJ-X!M(D$LL-V15Ub&tB@*f(eL?Q!`p-UZ(I|+$(KuNneR!KkzI>>mp)fJ
zF>`YR=UP`^I-Q+>ym5~_8mo`tB~1|^C>5x;*CbF+J}bWhm`YP)ZZ%NXYT^2@MWf-R
zuqZird*lXS^n$3AhvkHEpFt2Fr(17wlTE{`Er77teo;Y-USTSXw)yBcYX{dJ;r3tr
zxyU9VL6qvX9}z2^;45l4myrU`nje>={QjDxP<O?G6wO7w!5JF0r`Zm^-s8UXi^;Ew
z6Qgj(h-egv?da;nB9}52v$;yjY|3T}wYg`?s+qNQ0{eh~6H}qS@nydA&;#Tk7R_RO
zk}9cmGA$vETSs1zZtu>{-!}vy`@24!XEY09aHDF#1s%(S&*&_%oVALS(X1qrOrB!P
zC=^zlGgOP{6A|kdY6U!_HuJ+`vF4uLi=FV4pF#|Tcn(*aCzR60TD*$>sPS#W=M>V7
zV<pb9m=iIv3Wm!d;MQtf)5irxVKx@ii6^jLS6qu4tQ_&FG48%|1w~QvmuV5Utfqv>
ztzvQNt5Cmabu=dlEq|Aw7ymX^fNx-}qbThTx-Ek6tbBk<+>OSzB__s<LLvm!#?TnO
zjnz6BQBR3XaJM+H-+!+bTBiAyL&4;ez-s@+cig(VsrK+!<&mXHh=J~kY4bT(yX=yC
zK8P27S2WO&H6Ro{#z^uwv20E+Jh(P*i#Y&*F$UnC3r<ZTTORF+J@&)Zp231Dq@L2H
z{<@m5E5mp7&@!6L`>o;2Y!K_U`sSgHStnG&k?69s1Ca_7i&rIW(2_2n?ukYsmsEFS
zau!+o&SGS(*M@RTp^c?Xg#}iL9vwFwu@%d&^o%qTD5^QgP<0mRYf#(+e#CnX=JyjG
z0HY`YA+zw|C~R9jr!Ywi<AUzH!>`HajbG3aaN$F=*K0GglY-?R2p%}pt+wv7xjrzi
z<UJf@8!hnat{BxPJhW^&yR1dtGrMH$w3(v$fNV*^cwQq^A4RRhjf`32`_HsEjd#w?
zG-+~8H8Rp%Q6L#=!pg&U^EbYQZSe$8M6AqwnMV{~1?-Z2F9jK(m)OOC;c}@yMy%%#
zUrbN57_^Qx?*sP0j6D9_sd#8#(Z<@ee)x66-z*SjC>#<Q*lDpIh0%c=bmu5(9TJ3g
z%aD5-ezr1+w@aB56YbZfTOF_~AuOEg#Ql+FT1|56A}d(bhIU4JEa_-B08Je5(4O)v
z!xmNfh5+MH`b^SMWACx&l{7qq0EuSu0H^_{>1cBCHwj`vuhSgC*i}!Hi?lWr?kC{T
z>ypmdU(~eT;huPWM#WsZ*Z$K5wx|Q*!i*<HoJk@qngh2`)We#MZx`PblcIUaS`n02
z5q?SPNKdi{*4Mn}I$DM#S%cP)U(&|wAH&6C_@q*fZ@Z+Yf`HPD%#l31r8IBS!(1$*
zcnU>E$kWzir`(5Az9o*F<cG6IVw>uxYoJJ%-$!2Daa#Dw1WO#Km@gZoc2KEvOvM0$
zYtO1&o5Jg_DHU}X-_TQBGA!EMSWkx`MAjjQzzOY2SN;Rp@^j^q{tq)L2b(i*e#)11
zD4VNXk4pKuPY-43RhoP65;HG}=O(qCYd!|6j-Ljw->CazE#x`V<dwWHFN90wqp(Ls
zqXay>c96QN`+&JaIxyW^OF6iQQXO;{8+9g^nyNEg%^Dx#p=p>IBVa+)tb8lL6w~jw
z@SVN)y!Y#AJ9wG7D++Ju!=OFU8N58;%e?|12SM-3#%sh%;IYpsMILU$$k2jSj1nR>
zH}oFgj-shQ!Q+C~R@t&JfUojz{(3T^@63SueH((~5MU~Im=c8JBIWOGpeYZ+{Mt1t
zxdfU;BRfHWfkyg|KOwMs69kz;qXSzSw2_x@BDHQNkMQARf%g+{tGKz7H~8nOjw;{Q
z*hO;gUg|Jw#y5+@UGajGScb#4ACvFkNYU67cu6F7yj~m4L(+*di?S(Is9YV&cUAwU
zA7%4RVd;aut`#k&DHX<+OX#L(T>dL^3k3T<FLT6x?~YYDbV(Um=><YgZ4AT}pp}R2
zJfrZA8b2gt$2N-L?AK!b1WdUqg<<@nOwZeGlIx*@4ke>8jsb_IH0Hu`n{VMUk52xf
zjxPgs%+m1f(T+neX{5~MC(Pty9??htspmmP$q6HqJGG`cBimti{#9^0d-&<zke&N?
zqHo~Wh3*F~?vr@U9S4VI@U~37^6OB5UG{jjb=~{p%%9iptAgS9zv(>wkI@{C^SHFs
zzN?km0>A&Brcvex!=1i!kNH1Gdk5$w#dWce?a#UYnCbK}Fx-C0sJ{=iHK#H=;LH5^
z_qpHyTFK7QkLO@GwYc~_|1nz68DP~z)k7Wrg6g-IO7}>OhGBmBCh(u5sR4)2LE(YD
z<jyAk^(q(mz;MUz3GLvZe@*vD>L8%$$W^O^Z3VD13GIg0$7ueexa5KV8127XL?hFG
zw}{4H|CJ)3gZ_UmMaeUoCq+#OX=!OfzL0MDe`>u)Y-fS%q{g+pfmmyjpL3^Wc-99S
zp$Htcf36R$EjpQu8}Om|zp3)T!S6pPoOv{n;DlOFkSEpi_qQ9vXB|uD)rzXo+NB<y
z=A9L!=p<yLXwj`4?&yE3un=G>@Mqf0l*UgO-x?|$@UdIzvI^h3La^2M|5!FL9NZ5;
zusB=Xa*tiVC)Pi!45FF*#3BQmTUtgK*6wplUPv^M^!WFv+Yu7UQ{M|D5XwNPU7|<i
zxcT5iY9BoV$1l&WI}C{R*`hLlj&_<mf8MKCS^5d&vPv)%0>rT+^XBA{W(;hw;@4iN
zv-F^!Td5L)c&i9xJ4^O5o-Br<$c+GZZqlBX?ssQF)SNITwPQRnLiURP(3;}sfkhD-
zGm!ekKL{fGzU5vgjNbBmII6E=lRDR|BNFs7N?%Dft86ljy<cux0V;}dsb!-%(prrz
z>T6Tl%%Z-t1x4};-y~Z;YC|j99CB+;_IzXhOObCKJ&HUKA;ZSzFAq`LSRLtSfUSOZ
zAKCmJ2rItWVZ#53pA`cC8Ht^*pH9G=3t)R~)AP&MiDhL@ek1IT@zyaeG?b8*RBH91
zju-Yix&ai})2FDxcVNL{<%53Gd)+^f%Rh|X(Npm>hw*2Tn<e>N6G(u@IhHGYx(o1~
zrkQ$LI<E+zlqnuKZLZU09j11v)|xLU8!I?Of>UKl*-T!egunZ!_jKlnW98_{A`r;Y
zJd<k~H+MjsKTWo{1TaPX@0`Y~!xc!+eH$8t%haPrGb{>j0zbz|o+_9~3rihAwCI+1
z3rc>P#O_$k+mENErBh=qdZ+;R)4>F}wrE23B9I`P;4z-)7G~y9(B8+rxmtJoo47D7
zYW?=<#;c8ZYm^eoKW<&jYJGt`{S9`Dey|tLO`PFCfzLWr@vh1uoKj!6?54jHigR=5
z;OfA}nzPL~Wu=1ZL0M;su%ols51fslC{39chv&Y6#HGVZnsfzTarIZgRyuA?0S%)J
zc_KhABE>APXDXw_33A{B^+r-4mruPYjzY3cpd6n+rp|G!Zx~UzkVKTkkagBPl4&s7
z=eRpY7Jff#nsh~z?_AB(X~2&dx1<_E#XQfYr<^CF@Rko;w0o35tsqCMeoty^+wdb+
zL96=WR;>HO8oS`eZy2wX+Ovo{ZG+$A6m7lT8yjAmL)4-L*vUy;D~hFcGed%kGizzu
z-1v>d`E5irI|idSXMZgiM`2(^z*QqAFbnQ66#>3P4TF<mf*2x6V^|e+pl&b>qzO8c
zoWC3(;`NcPJp#C@g*F1@#pns2*Y-UT6n>^qDggqUj4^T5T)`5EFVo;_3k}P-3tf3Q
z#hx^6oOy?p!y7N0ekQJPQsRO|rs(lzO1IU?g;pu^X9~1Sl=-6Z5YhGV;KIokr<LYZ
z1K;;eVK$A(OxpL@B@?qV19L3XSclR>p{#a{HIk_1xEe~nZY*yzxWt8`%*t9dZjj$P
zlrHvC-Ojdgef%`ioyVO)`);k+aq`8=di9!Jf|$xkP0!`|uZb9WJyP`trv$K%g`By^
zoUndR?|u3%<wm+~;+>V~=b#P}G_>?KjeRD~&s>ATZu$Y)PdN|;<jF}c$hGaNddb%#
z@;aA&EI=;$Hp!q%KNWUIWMuyGpQ5wvYTa%FnnBHfkEzrqZy?uHnEY~=d&b~$VNog~
zLSa5gU#D}fWSpyL5V<bctXRTh9t;0Pu3}L71=9HRMIgbP;_(wZRX)NA*o11orf}SQ
zVE=y<dZ~#54c?~y+Iqn$=kq?|+B8;wPNHOODx<YbWk#(2lV<WJWjq~`Vh@xG5#+@@
zKp?3R#p~Mpi-wFLhJ%JMr;%UTJrVHDHR+b|wE9lpKZ35mMDzg=(RWI_L|<(J(7`h!
zGf%6uCJ;HHLYxTq7b#v^pTptgm)frPC(3G<0apJZ3Q9K|LAizpYGVP@A7kb?DOM@t
zz2ELv0SL9zan;*8m^x$gX#dB5JN)-SqDk}9KvuFY*Fj-i7=JT&B=%RmvCCzw;&Rv5
zB>&}Zyqj+FFtH@vZTxwkzbiny(h*U8-s4GI$<S<)2bZ_S3(DR3WMASWE#}sd7yh@^
z_`?<{jDtk_xMMjmVW1qP6uv=<C2H#XT)XE)7|L}We@-#XrBJ#+ox#gO)G!OinSs;Y
zz*K20&Z>_$kOMu50Ev|1E!|F2-bn)R|Cw?Ln^xTN4V=i;W0OX>8EdE;4b*iUVa3T%
zyDySYBmrGAOB_pZ((7%Oo7P#WmG)%jagD{x0sYB^T)hL*XJR8D2mh|0ec<;#T5Sy^
z2o35l_$s1CC-vbq{QyG-l`%1P@rM$E|J=7NxLiDlW}fEXA92B)6uT8SpvlMN-`**q
zZ&E9IEB^<esg>#IK>1)of2<z0WwG>l<!a6KezSqRO?Oc6V`v%^SleI+EG5kly3s0F
zN}K*j9dD!YV73rbVA|h@D>IJAjz{=`Ek^)H!hNmExQx**Ye{6~XE;}MpDjr(J?L1k
z7`$Khmm&kmU6yIh*nK@L?T*n!zE~ZFqDr#T{v8%pY0$#{mpE$FCdh&7qI^uamP-L|
zmqi<yerVIff4%JlAVPvzlwe=!v{JJuFgvMh$DaI?01uU;00myP-4pw#^8>8v{b-;B
zj5x)l{;Y=jGe&wviy%{^+;#wnzlK!K05Mwrlyh$GzqyL%M+z8Ds386iVCpZ{HWdVV
z^+sTD=#_t|2)ai?G}!LL_gnut+BqO~S#r1RMgHM#(5QX^xNRw#4rBjTIJEjL5c0JT
zSz7&P$fpU0(|sWMM?&|n05V?-NL<DdR`!2!4wy-o7C~N1DEVt8J4df?0GgV56nB~Y
zCsD6~;i7uJ{gJ%-YhhW;fTkb6*3|yn2l($6ZL!7c|85ar_5XvV=p&=Oq|=6b`iGj-
zE3Fr6_D-?{Lmw1LilfxO`XEt64sF#xu&G)dpmO6lk4iCY2M3oJ?Gps_W}f=kQ%slz
zu%cLt@v{#)Wp375F7%M5Jo?^xhO)O)z#Ye4Z!6iVRa!WKv-)y!Tf>PP&{vG4Bq+ye
zF&_^;7<IOPb>RDl){fb)lnzX_<{bIT;cYE6A^`l2#ISw6+n`(3(LMV->NM?JOEn`0
z%GB)5><`&yFx%DTQSD2n;*fB8^-TNqZr6A+sDHR?tFp!Bw-;${5$aIau5I1%?A&Wa
zVl`wQy7Kj>R6~ql51c7H!q_akC^T<Ws#$+nU@q}B*Y+~!kAi{$uDVBAI}X<UT~~sl
z?04~%o$P&2ip;%Ad7@3e+Nd730+u>lw_Vu@5(L{m(nInN5T5r?K#w92fU~e>vGiZ)
zq4=Qug}^*14L>2<hTxyMl>HaZ8MxO4IqHbSAG2%Spo&?!RrjnV8L!ukJuk4`EY)U)
zc#KX2ZkY&WNt&&3xwiw5stMg@fl!6$>``W7Wo{m8UAYb@v%>Sau1QLs3cZ6d4mz0}
zQ!2v(LTc>W<bB5q7Pw|rb#hxhiyJ=rVeC=L@=R>BEbH3AD^I5KD)OkLkp(mNas$=(
zdLtgTR`xf0<r^s{Lm9!<3l_0bwJ`-1pAuaMG4k?eHK%uMM+nW*<;ro`AKE?<z?y?d
zY-2Nru6eMXwmPSY717#oIt5KKj(sEBqX6pSo`jv*3ARJR!K3rCC+&B@LX~6n+Mt-_
zn6-2Y=KR`ZMsH{HDe5980g&7<N^pluXt;txIHTFN5vZ!#bHU}>%QI&lVDm0X#FWn{
zp1Cr@T7Xa!S#e4jgScEH#xd{Q`JR^B*Gl%@-m3c=$}AAMBwd59yu#@gr-jZ>F5X$E
z+s%74%!q34TF3-k_I%P>xl!npm_v#vf?rM$_Z{B3s^@Q!q2>`Qb*+K5DXG;V<qwa*
z3Ibkh=Ewo|?GshE_ku(Mth6FxrT6mmmH8{P8*u?)+>2f}ii)NveJ;a{pSE&J539Ur
z!h#+7+_AGQ&W{X+1w382?G~i7wK8Yfr&;YjS!tB~d3fjnk{MEYZhG7Hg~ZY*D92^L
zBSX7ce^dwQ?k7YuD3!2@JL#D@ePOW3U!Z;eK>N-N*%!pOO-L(%72iF?bTDD(jd}K7
za2XD_g%YSzH-Tz6Z*57HeX(}DK)(TypCTFHu`N=y+CV*caWWO|+m@S0jQ59AboeZP
zMJ3S3!`6jWv{Z&w1$PQ6#FGKf)-Dw1+}M8ILpp%y60e1z?y}D{DY+C6`9Khf6}F46
zTal+dZ#lH4I%JqF*O5ABi$Q#6%kN_O^vpKSE{>#)G#8h1WoIPADj~h8L-tUTLrIef
zV1L=af4)%?(7SxDZAbqo(H<Q)$r0Rn^a>BuMIuDtA@N~YmUdQAqe((&#`TU|z^?ot
zro&!Kx0;q{Gw#($eB%CoN3ZzNc&9n{w)VD@a_U7@;lfzi6@0zV8?J=v+PjU{@uwuT
z#-kk=VP#*#3;grH!+YZFy<Nv4F2k6;+lBHx;uf?QnfA)Lxbq^nczo-Ebkz^}eXQAC
z3nc#V17Ay^0MsO5K~a}VB#&%w)K`!qkl-*@+jj77+waK>iu!CHqYLZGgZX5B56{YT
ztShiag_tC8n_2gV*)?vQ%9RQgba~6XgS(v42maZJ_pPpT+grF#$l#^SSvMCf`*op0
zoscj{<GFcj%f{-ER+<|B`9G@hM{lcxl0;s*?{Bxom?YF1-h5We*ghEtCi{>F0_cye
z=dB!0U(XkCxh9~xQw;nL<hmqGQTNoh)$_zIDVJgR#QJnVS6o*dkTE5eahx+T@zRWx
z)sEL#6#+<n=s?V!9gWILpCEI#OxK$EG%^Gy=Js@|!CoaE362M1$R#5w-`Fng=$(I5
ze1SB?BZqO(ddf>@`<8M-z~bb_DHr=E1BD+1{4G%p#nNxhBw4`bv?y-8+}Tof8j-vV
zyM%U#bOsbIvuK*sXK1;m=wX^fDmFXVGW0M5@$wfvYCpC@E`UvnIQTzmvge@Sa2WR>
zPk<f6$y;VrE>EqqQLoP~utFxgT=PqOzGG%y0+vww(A>Ap5f^xXQb(Ia7)z0VerQ}T
zI>UCg&9TmUBkbAIco>WCvz-=<+Y}l*4xYagyz}nwY3vxQ_p%zz<L%(RRLXK?_B5o?
z#NNWpq9V+!E!*YYD@=8e;;xrFM1|4TiqPrWNlEUbnMrq+{i9r+J)v{`iE<A8O5dEg
zW3@r?3+3&Opt}GUTS8Agz+pQZbbJ4<fYK3xLfifnjX87vLQ$z_`8R?Jth$kfw>)%k
z*d%_VLSU)C0hY4N%iobiA29Db*gkvuZO&FX@k3s3IBGR^eds>BbtCgEHc-7jL$tG3
zt=lyL><${CXAr(+{HnY_5|1r>q{?VVO<n-Zs3}DsX&&rgNUrZXgZz?8c|Xcu=L@vs
z{D_M1_5|-vf_I+cucGV-+)ZU>2on7nj?FueTj}k9tiUNnfqAZtPOxl8D9Q_9KVC`O
zsBJUB=@Vy4M^vSSV!qMLF)}t+*3yC6ltHDL>3mebU{F-4>*BVGcui}a`7R^LxIHsJ
zOHNP}l7+4iCmP#dBIiHT1&(^-OiJ{)<&$Tf8l{Acjx6!IFtfZnysbxnf<int>iOG@
zJ6rluc_Kru|A0y?so&`AQ@+a{-4@+Q@;q5o&f*N8VfuV&{`tbp*(7UZa4qfXX{<K8
zRb$V<O}&!2=A@)7%k5$5_tb(A3~$1DL~3UnKQng8>rI7Zvr+-*bv`=9_@v;bbVzJd
zM&l0-hh%_YT~>d&Qw&uLTK4d(9E^qAiN;57_7ep4EQ6lsvA59da{KJGO$c)Bh$|rZ
zNBlOZ{AeBow-U*9PqVfI$lJGRW#3xIj~5ts^RQ`KEOWhlJ0IL5&mXg1q`MF$a~7c!
zv~#B+O%eo~%XcI};B^p7Qj^db;DdNf7LljBi~mPk+!9lEED1e0hPCrcSHj+SSE|i^
zc9e0vX;SLiP8j<rmsSqy8TjPz&SW{Du&bK8-=coq;my+9)wiCgZ<p$re9Axent33J
zGV@Pw?r5xXnxu8k-?rP?mSW{IF3e@c;RR%NW5JH)(52vl?(-?P{yf}w5f;P(kN#hb
zWwp>AQp0Xc-=R3^^epM2-OwIy+V?i_9TLP6pn7bZAI{SV`HbPI?#>%W07A~!<+GBI
z37M<~y>Sw`8yr{sU-Uq%0r@xLKg(_N-3WjK?v{r!m<4RhO%<@}*MyFp|Be6t@Ee92
z&6~>mzZnKVbT)%1)>4jYfen+5(j=8$ciY~n7%I(FXaCDo=SRxqUj8373Q%Mo`wCgf
zg|}=n0sANZ{nLP(8Nz#FYKNrUG-c!_(zTN}f5r#OY}nM8l@ZEy9jP6?)|uaH+s{sS
z0d_qGFk0get-w-GWjFu}FZVxM_}jhS3;l2J-mNeQ*R{0D8~t$i5L@a(L*Y|Q|4?0z
zQqLIFL8nlm1aRPd^RA8^tq7&Ph=)Sg=^YCo7Ev)$oyNaUsePE9Ch_|U6ln_JT|A~5
z|GJNjVSA&7XqqWZ_}w=0ATG75yBdF6sxgh*zMEUIW30~TX3<Db9!%{Ymbw-#bM-*X
zHmirE(|kAuG5ZtSTezF$j8@{ylH$(Lvn*ypYIRZ8(W?@<)|R!lV5gVw|H}s0j)Ejy
ze!Z?PUcG$^I$7pCbXgjpYCTqlm}o9ALDjcoU^tT?gBD)Hbbm`=psm?XLk-idYI`oE
zH2~E7y<S~NfvwI@bhT#W(b=6>+AhgAQ(W(?-0)Z_X-0NXL6?l@N<RW)w#1fEdRV&a
zhyUtVZcnH;UR!H6K|q(5c0+(ZgJS>jCXe(aq|XQBo-M8ak~JrYW%KeN9144Yylbxm
z+~{WR0zd7oBPdLJ>zr-}G*KRc5;xuE3FX@anl?c0?<t{I=kiWjuUA)M?(-^{zdKM4
zEd$+8@?DO?w5FCaP%Ecs+YY+SruF1!FZAkcMX|T(fS+;AQ)Y7)tgShGzB5(-z1W2q
zxN#bWdjwl#O%w2NrGI(QSzV_SX1sm|TM#_^J&adC?fqa9YJ*G!wT7j>6K@#$^BTpb
z0T6nUR!claYz>tjm`K?K4NEb#GxE}!iHUl{0<0nX0PDO?1J?NDU8I(bl#T`I(waLe
zL`LeeQ?DUfDHmql$Nn=bJ_s=0Bo|6+huXCA5Eq4WR^_@(N~^DZds>rYgyf|LysQua
z=`QL1WTWpie>OdFcwi|TkvhFF_T6nAbcnboh>DoxR8`O#3}_Yj=^5if@OIfG?39kL
z0nNSGC;0!xg~{q-)_hiP{2)eyi?G%^NNvh08G98!n+!u$(yF@Mm=v6Gpd7Ei|1$%3
zqHSk1D9Wwo1Svq`4XjVJnQiKcercSyg`60?{VFew!>fMnu?%7<S;f8*!PnAUpc8W{
zoAcer^6X~gFgA(DxgmTziXat`;p$Y8v!i#!bPX$fMylWaIsz2Pck~1ZXBU5Qs3eaO
zgBnFfvdcDnRvrZ?{c4h3;b4XoT$rSVFvl4|dx^kFLbNW@eTuXps-<?kpw}fH_wEk0
zWTZNo_g|W-)+gBLmdllu72IY&r$^gWqW_#PW}Vi*kqz1<7h^pL#3)*eq+=Z?!o5*I
zGxEpuBL*&KcE{CSAn%KEhEs-F`%x5-+XKa)HA%k25}=T7Zz-hRN4q=+C@n$g9lS)k
z;=TFEUAExwc8wXGF2Ow)9Sy<$=c+&4>AW4G#g3FPe3v7rUcTFu*rYFnB)VbT;M6JO
z{&3{tyX0AOn<S6En>I<K7Mo8kA%FZYfYmRo%lUZdtOl;LeC#3-RMI*2h%v<~f;?s4
zdyETd;$3)JucRQ1F@IGM7ApcZgG^tW>(s2+Jh)WkPt98_8hN&+9Wo1<_Qo2ZdOM+w
zGT)l#I~UHnZ)hYaI$e;;KeTmU$Y-*2;(}8;)}uBW#fr;__PJ#zUAeNq({_6K>`HN$
zBjs*0YJkYt3L^Db{sHf&1F)2uN+*#chs7>a9pE^TfM4ObWSut)>OC{w?{eQ*zsJo^
z$6AMIDsIMD_)H!>=CMCt4<NhKN4h7ZRzW)0IU}pkw3Pl+?5o+66t_oa*3mcmK&mHR
z$dFdOiSQ@2vwLUdw?y?jA>HiqZk;ucMOA_}o?_6N(T_J5mhN-@x(FrS2iYr_y5Ia2
zR>92-!OeH~H|rV)bVNh(VLF1FB3ejZ%lUCea@PLI`<u~JbB?~VJt#qgx4Mso9P}W1
zrY%C$-{^f%;&;j_Bz@O7Hwtxe0Hs%M_i3@&W<EjFop(-YISF#w{<H(kEaoVx-+lV5
zQBXk=?b|6-|MEiP4NC`jZ*z=bExd@DLa9vObUIzVj`|`_#f2`5=<KnMUV#m4J}BSB
z51OoL>3<+#Cf#(VQ^nrsCJ?YUENq?A<D`VHG~rCg2aQ0^xhrz-u36yCwh%`;CpzGu
zHxA!vFO`_=7A#t*F?T#t^?5nS!Aua|U?GXlmrIdb`6E5Kv!a8v^cD4RCt_vL7e`Ab
z^>g{oM0Pa7$^;OJi3xqcKduhi5Fsr-6WqHF-;=LonC_7G($m`mMoG>1jJhBNRk~pO
zzUfG|l>`pCnk(0xXF9~K1Xme1ec$x)<Js{+uOyUDU67(tg?0Bm-`pSUycYq|MlE!n
zGs%LP%5-eG@|zv(KK&{vyEk$f)q~ud>j@j2t3mX>Xc4jL5;QBfT<2vE&-<KFD=2T9
z7q~X9x0fw=rwx*luGAe0wHZmt%)NG`CUERvyC%wiGH%1G&*kl9kzQH>`>cA-=h;nA
zY&@L|Qhw=@4aT016-!1%AdzEJ_-nq78v$CMlFL9(MPH9yRuYBO<kTu{gO}R;#Q<{#
zg`{q;>~oW9N5fdkbZP40ktTBvQ{drQV_IOrSYu~S)|fqJajy=a{nXd}L8E%D8eS-X
zQee`2>3g%Jo@K+o!kVn^#~@#cTFa0Ied9r8z)n2w6=MojoUdl-&m=&Cxk0)OVL`h7
z)wj~N`NA@2n0RdFJsK@Z4TEiXt$^P2E<C5moYJp;$Bs6UB9I5ZYiY6-Y`ur_029hL
z2**nJWI9<Sz~`{d1b}-Q%C1#!yB$Buf$kjpZYKY(?=hsP7q4PD8tVp{tey6*jJ{L%
zZ}VS$&Xn!|`UDYc?HrnEhTeD%ZP5b5XtC|*3Hy)tyS(kU!~+mvc@Dvrz~9j^SKb4Y
zQqR;~?=(ltd=OTLZF@zpXa$QQ3)SE9mW<5Gf}tf{ioiecpw+lIg9@8zU1Er2lRK!?
zN%RG+#r@^p3w@yTJBre&=l$@(+)D8rXeC%|dMSvOH|)@>JeW^l))r!luMn%S5|p8j
z-ZF^Mpu#+5aL+>!CRt0r2LV&zcEFU~S{G)s-kTQ+`W^f=60584{}c5!E!azvtOhki
zy{!(FzaBGg)ys5m^g7`bbw7T?z+lvZBwA-=E0<}vM=W9dc{3;V>m8L|HBR7oi67YY
z20P`xBJ6&4&>{L7h_Pa4qi18eF@tYM{t;k}72Ut}_XuqCCHOZA-QDLp{%~C~X(2=V
z(5cPPNbG=p&c;3>7PX#Xh9r;n;~|(C&t3;<Y2Rgg1ZXfU^IuL*P&m3QJsEZj6wqw7
z0}ScSmQIV|cUv`clCe|A9na7Mq9|+co#BLdcwX8<Rr%+#zZlrJ%h52^jsN?gEx5xT
z-;7+AT={5RagOQ}YQM;={ms>FOk|eJn-Dqazxs=T$+M2jw{OuW^9Lv+DIw;!>V-dT
z+3tQ&0SFnhw(85IwErg@wz12p9W=eFAbve!D5crN*4|PAT3Y~ih2n=;9SST1*3r7O
z@_i1Ns~9vN_EwCFI=Ns#<v{Qk+pQSu{h8OGXHNCfRo`e^YZ32mh}nryc-sJ5ZXeD8
zpca}XR$9lm9piU21mYa|K>P}5kB=%F36pQZ&SnB@J&7gi)Mw|i)4Yx9{r87e5%vEE
z297Ro_r4eiepa&JjY1V7)0&%oe~LHL)jE~|)lkTNsgQd+UfD%oVKV>qh1jVVJ=O}L
zKT~1q_7(5-udb(jYp&6n5qaZ)-ZSHS_2(c0oT5<2IR~%Kz~-)as|Wb(K{v|>qgjt`
z2`P}6l3hNz*Tw=?#ZSc1fZqq-YhGyOB)Zf4Lr>QrNgFK4&D^@jD|{K3*dAqqcgSS0
z+kR3f@nMC@&tL}gBbGK>k>&pi0*_eM9K4!*7r=Sn^-*v8k~gVDEG!1+Sl20K-cwj=
zjS@lihk=rud<Tz4TFlC6Op94lzWGK%zsT(}2Qi&F<Bp@Plk!kEv90^kI&W{xbjXeL
z@TAtieV&;5TBD`$v{du>`vr@;n`8DRXSwCmMfHa%<*XMy4CB-PRW5#RKfyfn&;lO_
zQ?2j2fpVkOa~^VqVk-WBJ1htdj*d9u5q0MFVDKV^yB0YG;@Izhi(}dU3TfFNj8vQv
zxWc)M6TEZcfgM^ED@XT^%mz-{d!H+*sa!Y7w>P7dEu7A1JXI8mkAWbNVn&uWdmN$!
zK*0Tm+OY8*F=(8It}|HJkff}@2ZAr#2zu;y2P(-~Xa_nQ+SMI=!q-+JlQSH=347ae
zHGrN&?}35#je@EcG4PLK_Tq&+S?IoY4|ENBWjYv`u>gZ0wMO=ao^CsTJ5iGcO$4b&
z`)@-NRr~L<K+@iXzoo7^HEu-YO2G;nN;x}YsOK#mkoM64H~{c|b<c&E;1kieWhZw9
z_(1be{}**{9tc(Y{*OQHn-Y>WDp9f}*|$olge+MziWrO~L>NpSO7`s8Ln8ZF#u7%>
zY$H1(WS#8U#xl(O?t}O9etLR7zwht!{qOge<IH{T`<(mS_jSFl*R>pN%nYd(zu9{k
z(Efn&e_gnae$YKzjF}0dK`a@Y#H}Vl-0DB>Tb_aEfy~25A3i$chKw5MuaL@;J=!Sf
z;P{=>hszlrtRwk=$y~>*?IQsGnm!~D%D=ENH6DHfA|akg>+GtFk^MIrc$_7fHomf_
zcaZ-<U;oWIHfyN83rynB^Y@owr#6sByFLPSXq?U~H~MnwS@S;`@UikV-_vLzWx$!;
zlN^~5fMiysKV2>Z5BZQ4%I%~WT5wAQxC3#h;cw!ey;gPCAI%(CzR^{hxVc45_7M=&
z0G9DZ+ax%b(ta0kzsu40cR8F}M|x}S^qpCfz3;uTE=Vq2KGOBFZgIMCdC7Ql1p~IN
zFKSsgLDM5|UOo|PGE@UBUQQWJLJ*yvKnG<0_KSfHoS5$+v0nmM)narzit6oC535ka
zyj<CFrv8C4YD#<RoXz7kS<c2y-97wTprng(HYn`lwVE)vsw)_*O*fA%V{ph^XYhCf
z_VQoE_q{jQ+iHDhEaIpM4jW_WZHnABHo-5vEcxcv`y*rJ3|;H`*&P>K^C2&dKWWp|
z>dO3KwV>;Z;^u0Ld-rb3q&@q&w9n2P$rE*CHnrE!eq~9>YRIOKA+kf%d3WG?i4M1n
z%B_2^UQl42lp%zp*Nh%och`oDNw{FWAjn}f0pdxbO6udKx0TA<b9;$K@-n?m8Q3Rx
z;8a^hjkSB1o_A)-GVr)?p@EEY7RxM~SvM?bm7f=opLKK{L`g6^lgk+T@?T!vs|z-u
zF7>WLe=D|!63J^M$@uAT17yHC`{IRFBGA2Mm@=0$MZmIC5C**x58sb}vL~kI_$++J
zMi}yv*9-j9OYRLeP@baa@BNqSOuD}iSS{|hb``i2hp}d#NVrRm`9#(><kxa(6|msa
zc((+r8NzOnS|!^DR+|Q2Lk#^oxZM>!4bK<g$^Rmg?xj_}n-lX{cqSXRO4iozKg}YC
zPp`qFEoJqyB|Z%G;AnXI0YeY%c0kK3hrHmjT_QG6`WyU~rXkx!3^g#1=*g&=SXrw7
z*<&S(g{aNKElXXaO^5zgfhE|OUvA)aZD6hS{7Ji`mg%_fKSQ_oP1>>NScUO5F_K+`
zo9)WMVUaT!B|VP&R+08uO8FNG;>3J%K%R*Yv8S42tSe&LD%sLmeS%PaX@2t^o5z^+
zE5okgYrB4tyaS5@S`wtV@;|<0*FI?k(0f6^sWC`T#;%M&FyeG~HH%~Q_=;2`&WiyL
z{SxG%ccP#J%NS}~>Gj|kutcNWHv5neX7yk?dltN)YV=GV7WVR7w`fjF&T0>Qar0p&
z1fVgxz$dx6rA;z9hio=>-1Z7&k1oO91NYev;J#`{+}w4p(s90H;E|UMUwy2%QH%Gj
z<LhSwa#721GpV?j6^%&(raf4xv`KF2`;F%1(O9%j|2s>_p~q!T26Qx2=iHSOd+8u!
zf8+$WB*a+&V5e0H%Unk8c|_{BbFAAe1XQWn|KuqEnmJu6O`>3kIA0$jhfXX7jm3Uz
z%s*9)RJ#|1i2?@FlMe}dW@NV|QO&X2BNr(9?A`sUy#DSl_$QqGpB)MwN9c3!e8cfi
zP)sh}_&Wmr?ULX6#JsGzqvb8UAHV=Q00(w}$dOY&%5^?sLO0s<A`=HS?pT<rEuyNO
zCd+_^Y?}a^{>T}%DK52lIAkz+VEXD<`vjKqmE0>aU}Jz6;iI;oI18w&MI2rR%YRZh
zao4@!L;K~Ej*r_(fn#5P^-=I^X@-u!Ki0dNWV=q9t}jb)`;nSsZ7*x=wXq0$pZBlt
zI+423QPB@?WvQlH7ZO>1D8+0uiHLhWPPXa*iziI-!>vNY>qCAwkykY+<SrvtVuE+|
zRNEt;>p5Wk!RRb@1?{Ch6r)=A>qRE0RG<M2MkQDNcaa7UGoM*qQpUU}V^}9(m9z2X
zMYihmIS9-DI6KUn241T)fDi`Z3XO!GbhUw$fP!)BTd8BfZNfbCu@%vkHrK9z&%5=e
zW>!M&W+ey&83DU^$3ftAWX*>TwYo{&>i!7}J7w4jb0|w+u!>3Wqo*9sm-)CiaW&}%
zrTm186h-G00Os32Z#B0fgL7JbgICwP{K7A@mu1xGebd3}#GH|91T4tV_M!28ps7bP
z@<5Lpuu~)!zruC?zkd9CJH{m-WI%YtKZVT6QXMoq&dT}iACB>5i}xTM^O(({j_smH
z(rVIUYs4TjFe_maTzsur=mmP09(Xgm<Hvb-{knkGW^U54S4oacs>aVITWRzKrZhTb
zxWQe~N-CLwa#AFi=JKimIEA7(*FQf+YDKCIwn2hv370f>VE>M=JA5zu1uQdc{jAKr
z0~4*}#y5KQXy9nSXaLrM0y?3L;x&YtL4Pm*VsT&LEXW?dF#UNqP=@Z}BLS`DksFbo
zy}A%)#U=g|v;No5qK+@t0lt-sx{vzDK#2uxFMbI-w6y{SMy!Frh<>f2nV|8Ue%nIj
zNKBS>AA6Hw(WpfXzg0|_v%sRfj^aBo@_WRMBRd<-=M?ZoN=TZ?KIylMLr*QE?eTy6
zd(*90=g7zVqCR^a2K{D+V);bJjR^(gcVZJ(lRgAqUiwGc-t_|>WnkuE`+oo=3koXp
zoia>O0SGwF=D&wI7+W-Ld!jY%ff;0QJf*r<^^87<{xSMjh%9GS#_;len%p~%>my!5
zdv;|5Lz*43Z0kxKpDJfj?{jv)2rEC-bC|IM(<nx;Z*dF<*((0;fJC$$xlmx(hh3i|
zHEJ%7vkjtNQOHe5wI7-9Pk00Zaf92fMal(&Ezm{+%C8DXAt#O}J|2?UZ2&zr0$&dN
z+W|~e(b>Kl>%EV}`dLUUK_D0du^o&d3&4}7#6f?d-Du}L)g5Hc=mGNJuPv(;uv4u1
z*suPNo*uiM_3`NbygYHfF~SY(i@(Kb{2eQ2W-h@_sVDyPzTf>YN@?_-hFn0r&VRXr
z__aW!hExNzJujnjpzsZy(cLhPX<)GQamf8<bzT2q#A$O;j(tPU0WaJALGVFD;=)Y_
z3DaLe?J2(hC^2A7-bAS;N~{3q_q}ivv$FefflP>wx0jo~IweY+5!&vejNw7sdB+bR
ziYdxmF?MS^Nad|9Kwq_#ce6RvdoPN70G`>_nswmdfuH=;!g$!f{Dn-Ko@f}B@>zC)
z({u_*sS7I0%};(8n^5G3G5OHceOcK$!$_IL(yu!127d*A=s-@K(98FU<(HO=W@6R=
zGOqES+O)knZ34k$7Z!m2FFg~v?|mKkv5&y_?xGd`T&qQ;)5KIM<4-ZG!-gB3TYOZM
z)LT)qJe9R~yFCjV=z+iNC%J<p!^5>`F|rl8niMEZIe!w6547Or?5<DROQkx&23bqP
zWhx0$-4e!KlR|+&l!QFjOiQ@tQMQ^SfocMP*m(yKCnV$?pEz@FisrV=MZUYR>;H*$
z89NG0$S-V7-jvJi`&~x!h5HJPI>@8>98Y@if&f+4*899}IQetPZ-att34CL~BQh;3
zBYD#?%WWyNhEK-yR`WvuRfSf2^9kFFQs@>y6JHJ?omV1xC;byCS|vJ<$br!-1S9#@
z3lX9GXQ{ozBc&;R#F8IdDQN?Ntt3@Nj@E?L^GM9^;_5pkMBeg{F~d11q(nkh?NeRD
z)zdLO5!yMUrwb9^(k2K-wQPGDLwO&C7AJy2=vfnm<6YBh{|r4Dy#h86<hJ~$I0Rs7
zfcoc7%n9dCK&VCo%D#s()l_>IgoU;|Ir?nSZ;ez0<gsEY$-R$7Ej6qE;t-HaFoea)
z`{2nhf8Gr{I6}|ZdFG;E{G9h9Ak`85M#i`G!*wJhX5mc_>X%7)aU&DJ#Wz6|$Gr&5
z9onxKtOsS*QIgKK?&Y8jEg^^GHtQ1JTjow?B$v(-A~S066_^A*C4a0nY29Sd&y}W|
z<_Cl{^zQFE<j)`Q<=2vsG?*JL1fZ6w+A!yp?rvZy7^P6?<qzV~F)YJ6s%6-%Kn=s&
zMs{)XD#LFhV-QnU#xNeo-pTQ%to~<)MC`#GnQ{+vom_htj?HKIqp$GoJt(k+d_Z+K
z?(|`1O%QcfD91?`fkyHfc#jbhCUaHf=)7!q@M_Xp$<66zDB2}INEW#rGjnC%{xJYM
z#3BrcJsDSOHYN(!SI8Up;<0?d25MuG+(EiBp+Tr{x5(Z%0~g}t<k=FpdSrGG@KzJA
z@lFJ5d2<;v-E>UPL=S0N!ciZ`F#wDBN1ni={d-+L>tAyi^fsSlK=3dZSt2b1b@cld
z{6-)d3ZSxu4S1=2OKE1PCA@JR8dJ8A-dzuPhX-*N1!N$axh}@b{t_k#jge|iUx6g_
z144rzL{K!b<XDt9+XAEc!6%xaN`ELC&iI6|q%VYDu4o*XIbefjuLQxb{0RNjpfml%
zA`=!mo}NkL`ur`czPg&mL>a5nAD|pY6WR@91@`1hj*wQIjMZB#AK`xZp$GcU`T9rU
zO?X&ACg7LNclhP$W%Ott8DnW|G@c}5B_3o%y3Mk;8sZ;$b?bHm@$T)vU>id55XZ`k
z%ATT0?{G4}GwOE}aaXbOqc>?1B?5X<0?;N_^Sn#?*ZBY5!0vL1{v$_>#l4<%uJ1E4
z0Qj#i{-epi_c7d4IR0l(yqxDZRAHVH;z^UphvxEUi4}>ZGsS(uOTWkNS}N(|l4oRc
zU;P$5{1u5-x6)WnF65UhPYvm8P8y@UT4;B6V2Vy|St=Sg;&d#!9<wD!2+itPQi;SU
zHzf~JFtKs~B6J!=T3AOW6BEML`|a5Jj8&ODJ^*^@8fS~t&LZ}|4QzQ%$8+kA8YHIV
zgo;wPE&n#vp&PiN2*2>J&YeMzW!(-<<`{SB67#7+7?D+%Al~W3BxbszQ<o^DAnWF!
zzkOYGG<ZY7bFxbNf6bV=0vOEid>ijzoiLLQh%rXq_pKAKnD^J28xm;&gV0L{HL`k$
ztWT2*);GbjN^2pzoS4kZBFIvEcX527^?(7<QO?!5%ZI7$B_GHszmERDh38f!It`?B
z6)edn6k(7uBXs`#VChQTcp?6aN*_So>`?U5-(a|%Xp+Wa|E5Dg<OZ0~Mc9+o>Ieta
zv;TdJlX(+uTg3i{&uuQ9`x*-jj)}~uUBDN77{c@%zhV0At<Ofke!5UwI{v{Zd}Px~
z(tAem%<=;BvXi5J%EN2!YRImI^tx3c6xioOUJm=CL0Ejd*H!!7xw;9azA)w_>FJqY
zqmu76Zh~p}=)$&M-<Vw=Xq(v*D9BxpO>k4sz{JUw4v;Ifr9Yr;ZyF;EG7W)GX*%z2
zCNbCq9Jd6jT?=GK^ej!7H90?71?-D5lxTD}+Ih!%Vhnx(`-!J+avpzdZ8R8*ap!{8
z5qQN@Y#hkgrACK({tf{U=xk^jTPg@F1G@oqzvGGHE|jL?@fP#xCVvglI$WrBwxnZ}
zyR$tW%(<coF_LlrK`3(nYm)y?aDC<5BVz_&nC3)fev3#M=H#ZYV$Kdb@l|BZ!%Qz6
zhqOXzvt)PTMv{#DY^dF)oQUJlBHrpAOKW1%sD7vQD)wIHyiR9onj0;_rpbAXU8$SK
z?3WrQm4^>3|5?=hvhs>(=z2c(>icdmWo=8hi#|<-jIzs+ADod}5E^GaI*aU+z+p$0
za`*^pea0%Rdp#k(^|o@|uHvBrA}<Ff^~3ID%rs;Lh=YXHU#tNAfxK;k926D0tFI4$
zqVp;I#jJeN2Zz#55d#Z1S*2~xJnMJ+!z2^4nI1^K+VO66h<sJU($cEK#K6A46&M)Q
zwRJ|8u2DNZe+AUL2zSwV`IjN<e|gMBey1kukgYdXO(vvO7C5jb-0S)Jhj8ZLe}>w^
zaU*$L^5Yp)FvP$6Z5&S@0Ka%LW6f9r-QMqRCE-2e7|Gr5h8e%=c8CTtMsD~Um{Nfs
zBPaERKY(fJ+B(nXaRUmx@;bj8IDDlJR8|dr?0&chgolmz6uunE=$r&5%1Xe054$kn
zRgT3eqTM>&{A<1N8(AGd;xP;SH|kpy90qk2F0e9>kx3$e&EIm=4f<~ff9vabVd1#c
z(OGJT!&Lf!^38q8wCi7?Ii^`6J?Q=x7m@l6NY&-=UCZU8u~&o$mwx;=AU4rPbQk-K
zs~6_UCd=_%7Ab(yvkm}s{wM*ZUy*Lynm7l0uGHr?7q6{xC)#s0VNN%K&>21xaZM|7
zAYxEmf6&jRD3*EuD*>>E*Xo*~#63VZ-#(DXz+)mKv-+vz*FaTlQ!i_|6(pzP6$O(A
z-DVW(@ug~8;gD|Lh$-xs1PVAy@@AJh5|?<OA=`oOj|Z#y4_%wZy0xsAN75ieX(YHw
z9`X|`_=nn6+-lK;VCTx0{GlD#sqk*|fQ>UPX`w%2i{=;B|8ESxxh0a{g)q$pub4R5
zU$4`@{7CpHME-mG{!0F5WcJ&pNO3=p4HN#kDl0&5JlYs&z=Z>W?e0Ur+D=;C{fn?u
zc*m)ej1c4Z_?VE)4#?*?4KTds`cnfazMb<Uts(f^EViv1#Df}pBYc=HQZD!FLucQu
zVwDMR2A*?kFI<Lv2i9`CCYA#kh>Gr@-{lct#cQm!e#oKHR7k+B@%JFQzZpqMCSd2?
zB($9+4sCdWIQjo6WWF$~h45z1kJka^J5?ZkD#yxU$0~FD)hg4}#f(d(=c(45zj6u8
zy=TZ1)*XL*on|HM=LUPh*2;~Uo7*vU2|i1z_#R%P`c6Oz!ABNaNzr%c{V)%IPI;y~
z42h9FQ6oyt&~HF}Pp((f{v%J+|2oL5XjGX;y*sf291ISVbm_+e#(Tmi3H58!R#A`+
zAv=#DNz+yftF7gE{j>8qSeRq%jpm_l`%4A$CIhZEsPY-HM61IA<Mlgs*tR@8_Ai10
z6IXP0Nh2Pu+(7y%;5J2$wB519z5)p^Ya24)w<#QmBBnlDN~!WuSdR(Qo|(5%$DJEQ
zxdhoqjSh*+RZ+CoNr)!rQ-c~7G>q9`5e#o!-3^81V7O%eS)Wkt94F(<2!-s!S%G8*
z&Js3dUu-O$12IJ<;c*<wO4_?KInX3dMj`>52_ZvrSt$&PS$!sh`e<A@j_@QC0UJdJ
z1%}@@J+U+E3YWec{=_Q>Q#Ye2Ay&pI-?m*A>*dHEQcx=})TZNFb0)V<wU&ge=LRES
z%bzExf$fgSLzI)IdzQCd8xyVFp9WE`kUwMI)m@pg5J!WL7GvrpXCNq}ledRW=GUOg
zlTK9n{oAdg<G3FZN5N}*x%x_P|0}3CU2%o~E|K@;faS<7nM?oic=S+yX00T0E<M-G
z1kf1hfG0tG_1Mj!j$9lCb75#~ICn9RfR&ECIi9qX0Zc)0UP>NGXbxaM;4;%_JbVqx
zE9N$z*p2qTwU;cQTO-2ZND~5CfJ>(e5wX0+=xx)`8kmLw7Lb|*vQ@r9eind3ul4>A
zu?D4zNx|1~JWwl>x=u!eI*{?*lp!GWq3*|35iO9N;1(q)tPpACEYLn{;`5JeYK61g
z7Gm!>8$&P1O8BvxmB4`CR{vv`KiHZU2?|Bj^+RkF*k={xo($0{8$^$I%>w)KLJ|Bm
z%Gyl7NM>joMAH2ZB0b6^58RAt-+Fq+q9rFEcsN|vrq`1-{R&5W#Vt#YLm;@8OC_`1
z=9>uWlK>ghkJ0HP=9>(0i70gGNOoU{mD`%Z3Kxy18*jK}*>2||_D_J7eoFFm!MQ8E
zV3Ssdm^|Fwq_@sGe(oQ*MHzK3s*SJ=+p#O@Bb!z*YM5^mkVZI92nL;rke=g|Q8rcZ
zOpu|dHjj$IB^vcT^|FytA(JuV^EY6ASkYlIkSpN{fnzlt?|)eazO)znsK56W_SQuP
z`3H<#e7Ng2`On%%Im+fC&_b!MPb|Ye$5O!hzr{8E*1)eJ95k=m>_&r*H($5=Rz3x=
zo~k}vZ8tocWe-JVPu~^N<C3XYbvx#yOSskRl@`R(^wx^nHY`()t(jTePJ0X^KbVN<
z?ZprvvlFyCyb|}`KJCl-0t)`(wgaXR6=kv;%oG`=ZM>T<X;5$7<ov$B>36AgUBJoj
z1CDfsZ_CI^el)@gvJH78-PImKmS$VA{4CIK+(<V1SsNG<Xs!rRSl0%mxzd-$Y9ULr
z!723ViCUsycLYJV5HdNlrlgz~AVujs`XDP(9KSey@4;$-)Bc0!z`i=^>%UWZu9ERs
z@3BF5*%$p_fjh1O!_XKUL53mlzBui8U&K38l^Mz%Nmk;RR!o*kLKztnH>uDXVohIs
zXEB{^g4_g&41N(HQB4q-s=wxGKI}{dQ*Oy_F84?WNSRlhO~t#Gln+QJU6j2-DEcws
z{l9bCa6UiQjMEC3)a%+GI{??m&B6YeDDs<=X1l>=DNq!6d$TvB94$xl0L#9*u#!mr
zPEpOsGoq4q?yl~BfegMN0j!MQ%AXH`ghyGKH#vt59O^!4rU0CRLb&GL?A<T>T?Dk!
zt$f`J05`m<84|4<wmEbKCXvH0XJ6n935%M!(`NaoAFg>0E`uNm(A>F`G!$W<2HZWR
z)9Nj=cAezVtB@IjoZ;s98U{rBhxbv3cIY=6fYvi>_-{2@TgVzOcW4*oKygZvU!aXd
zD-pCIIFmYmcFQ4CqRE|@2rSSn2?dtca|4b8k>E@n<oH2A*FDz(Z%C?r3;u7Sr%ax`
z(+{6vS?bRzfTX_L28gXVnx?J(56~b<1hpuRuUx2s1d#+3SXYI8hOOreFe{sc;ar#f
zy63+_1bmOzZYBZO-Ae5DLWWD2nfZcW(p3Jxd2a5O0G;b>*fsf_Vr7?=5$2}g)u~@J
z2ohpg`ndaV>k~QtC%2M1n9$pR)pzgBn+xIxt)y8Z|7Qo$UzX{7C!x?n^SL>PmF{_b
z$cyQ@<DC1WK?Y5Gtgz5-CKoLz@;PGTMH%k1S?%!~0TUIdEnHy*Nlp3I<nu#G#Ps@n
zi_LqnSS!LC=?|VVFi*Bt>x?izV5#Rwz`EAlAz3*V1DwhK)U59q${gv8*t3nT9I3dq
zeG1)-!H#$Izq1WJ9<*{$boy3$1p`6e4cGYEYQCyP7smc}QUB0tdi^yK6*J2$KaiAT
zjxx~l>5|Fw-%il+ty?gjbs1y@OwLTjeT8@2Ksqbm5!x+=%t8D07>x8-JV&ZoxjCkz
z8<5pIuXw=tNsv9PWUkj_;~%zUKQ8gcJc_BPOx(&X%66)*7v%8##}XZiSW0Uq(|T6E
z15q4QdSMH@hOCfB85qVxyr4mTwq4Qq858rxoa~**s~u+Kef{l7=XwcEL&%C$4@7;b
zo70DV3NRlP*L>#chX>y<@>x0K2c_f!!(BXR)3`xmuQD)#%x^}C?~aiOjOB=ulZ+Fl
zS5ZXmSDmF0*~d2NsBK`kSzV7!H@@WlAyzKYO+deXUE7rmOqk2aW*@mnZ-Cr6__0A}
zSMDUfDF5EI9iV0Ze-6+>08AwvbQ2WpavUm9i3I+sta<n(WSOG>>7N3Ysef05VON`>
zDf2S9-_X=I09c`!2*9y|1mMJ89sd9UjqUtjM##;@HI4y3&wk)%{M%!C$ZYePU(n++
zud+J3faw^1aFU}J{B@(!{g?qvN|QGtu3wu!edqIgwAvTZM4T6$qvSs~ITxvUXG3#E
zbNG9480j#-rl=WD#B#Ar$;hA55x#tZ$J9q|lHMs7y_V>V(a9Ejr^>JMXPR`9R#B5&
z;Zc5S{toK#t113RQ;J^*2VOFY^zHukWoB(Ya-rTw>Y~k{KBYv|x`W>Ke8uZ8Ygfse
zLP4uo21JsK#bBpXrjLt-D@(Q)Qxe-)VOmyB#pW0BDYtQ(CQo5C8^7tMAYrxoP0p5+
zY|niIrIX`n`wtyNBn>Lgg|X_;t%UVHyvJ=H)*m?<=OM%RGayt(XPFiELg+h{f3Qb3
zb-Al&t-9o`rqp43%h;Hd(}?wJu7Lse`JG0539ouAIlr0JnWhDig;l=PmFQZC{Y$$(
z?JBCah5CsvQvt|=E73NZEIlS4$Map8W$K|z&=IQD`WRYxS}U1T6pXOkfnPLVV`U+j
zeyr7#Ii6rV`i-|xr$xJw)qJbCg^5vgUxn>e>sRngl<qA3eOn1IKL!Eo+1Oma(`_dG
zhTz5@306|`_#G^A_DskVzcje~PUg@bFSmF<3#-cC(^%bLrht2%D3h*m#(AUY=9fe(
z)&(BF`f>Af&-r#^`<pWBM!ZUs^}<Hc8*6I`uQpOPKc2vyT*SF7#gbOMQKPC;8~Tlf
zuL-p&LRan_bRB&XVySA&hu<$g9;Q(R;!j!lE7nGyf{K0_1MZ6r7azAo3lG0!fE}L4
zQrvY)pWOlFf~b$yv)bR&DR*(`1+nv8VxkiJ@|&;s2YkRCA9TO#H6GfVjmUU4KCP!(
zrz^~x(A-hMKdg{1F1JO@5qi><>*|7W&3I6~IIi8!k;%N|#@T#wr7xDFd<idstv`of
zJg$~(u64XgRIlrC0cdtyXv|`~`Rl4@x0B@Aq9)C<TJ*S5%h0QQ3!Mr~*++LU63?ir
zs)p{<SfQ`b<hF|@3q{X^nu<rUq=5$yR?z{WkuaB!_=kOYCVq|WAi7_s*U6bb6F87p
zKD9BueQ?l-n#YOcSvuCRc*hdQyp+UmuOs8rqH7H6G&#|95iO3u=~wMjvV3K?5w0_r
zFq5V)HpyZVAgmTC&X-U#ktrxJb=7;^*z$<bt$JU3FSkUTG`hgmfAT!^LnN4vSm6nI
zgNa;M+L-*hviGV@k}B%rZ?@_&MkgbqB@cKzt3`rEWMtp{mcb#adhrf#mQu-S^ZGdv
z_@i%BYjq99Oz|F@ETYy_-e(+jz8??v(>^|En}=Bs3>Cm`US)k+x#uxRd3xLs!Z{2+
z8*!9JJDUoMq<0!wxLB2tuxY;nm!T|X7`~){uVsXiJK+b@R!+xx`0jDlIVaZ`aR2DR
z$(GNlk2J1ysAIiYl*Q$~NKc9rcn-w;`Pul(_7BHCx`StPZDUP&8uO>_?M(u!%~6p9
z9nUFXPZCvJP8f3co}yxtJ0SzE(M?-XisBe+b$~xohi?A;+xT{gAr!?;Y$`3e>eX?U
z35kcoy<)mNA}aCj8<S5rCR4TFLJ{Z>-B!Bnoz%@ZppTfOTh5m)sfhio+{fZ>D7L-4
zs_)Jz>#;=Y$+Xlt^RgS;y^m1Mw^eDXf6MxDyu&g!-;1RM6F@ij@_l!P=_uo0=Q|b!
zxfCtdudxuoD=`m+7TdcPHTEqIS*Rz$h#*06^`_K5vD9(ndx4#c<|4R;iV*l~<hh2e
zg1+9I;l3XE?W-}a{tdy<CE|Qw@Fzgxbc7nmc~n3Wr{ZmFn-r*jXiy9iDXMJi&oD&H
z=J94Cjp8<#E*62jzhY2XlP5yN3V#YzsKg#Lp6<z^PK|f5FiB>G$2*u-&mV(7ieUQ6
zriEY2wf^v4HM`#~buBBl{bCRIdjAzImC4X)%(r&KssJwBn;GqF6O=*s_DTMgC;s6Y
z8)JGCQ+XJ>yX;x1v#q>Og1#g!HwYVjn%ZuYEi8y@<aV<QaJE~f&6ew=M%@=)?bfEm
z2EB(aWnOyLwLOvn!YCFxQ5q&*<!<o&AXn`Ro7j84liZnd3A%Qh3-#V8ygTV<U5w2u
zP!pd^3RExi3<!PTN$qXW|6>gV?o+{IZ52#FJ<Xmf@KwUTpR%Ad<8YE6f4U{R>(3iH
z6|T;^&spgWYw#?a3`SHTM4}Y+Q)}+{KWx3ZMa$-r#ng^-n{o~%=x1vznbp4=4-xYT
zo>rfVT+BUBxV^~Nlk!u5aCxQJ1Zx);of@=}f_u}AylfL<Q=-z6e8w|EY2<=Jwo;~H
zBWr{GFarLF7FvN9bmFU+4c%5Ah=C92Wwc%@Ssg@HgZh=s?)EsFs?i|n?26Gb@LG<8
zI+w_`{-KKFAQa!yY3gHY)d|U&`+`g0`$8{KTPr!$6sj?h(|2mnorEnqV!X-3r^$-F
z%;Ydz+1nZ-XxBnK<%3Q`pl9F>j3uT>-x{6HLLFlM6aR<#((*>UQ+m84-Kz@Oeevf7
zPdo5MZ1FO2^jN9}Gb(N9Qc8;Po9}vi4}C8j4>vS18#We2!3+r$Rf-mhUedi>B3{l9
z(c+Cx)7S9{oz5p07cV^M*5k*luP315edt^QrU^l&uMM5%?!#s8f*})jhdhOxV2|<%
zyC>&wh>A8$k$PfU(Y-<t5RqHr={+q)NJFUY+^l2+hG`0fJiIAwHQd<Dl3Y!##5QFv
z-Db^C;9TPG0ojaaK0TD!ColA!<)VB-h1K()%X24PBEOiY#+{CLo`{;d4t;b8iygg1
zkuchb4L!zReAX3wn#@x<Y@q7M#n_WSS{-3v3NO1SxDh{!Xqw5H>X&s>#s%!DQ&(me
zzW0a@H1RV_ojNYHGHYbQI4Di+x`Jv=<zV}vf9O))@h2%EB`WIX_LS?<mj%4j|HzS@
z-W*z(w6zaC7w*D8%^h0t%^~ukfss*t7pHdQ+lpz5`#i%T?sadc$7HA>8jT^$7!=RY
z3TfZJWMJpuLt{dSfc{4D96s7|k`PUb6J$(zNhB%4{MO!1-dA*UXz1ePa*T!AFllee
zZZ7%Be?!$|%WrqWPd0Yem<8XXqotE%!>y3Y4H>T=%wWrry_n!?mzmig=aB?Sg(Q`p
zDzRQrj_+D5NPc__w2$X=@3J$X68`*DrPJsw(o>ziUq7jUZ=?K~Ydw(Asqh2gZv)m~
zF8B^ZI@y+G;mk!%rMb6+&ibstSg{(Ht<R1P=gAKOn+v8p>ad#v&uZ>2-AFMv&&IDa
zpBZZY`cN6=wh3z3&fbib6_<0;Qjj%lp=zPmYF}wv00-ePx~$~U7^Y~R%=gn?hKJ_G
z&UM}_CwH*6%;$hkmgCzbBr2j3ORDac%4{FBBa$4RxyE_q$Q@hQ@Z4%ds2OhA2ed8O
zav9)ShYGG~yPPNp474#d%gznSR+4H_8|q;PA78<h>o*x!imEh9!uywB%YuTrxg%fc
zhnpy*t5RSC4sR~CCP=akiX|R@Lz=g7KeGU4NOQ~QM0vFr41B5pDK_$&qbf+2%m@Ri
zUsDE`o}}jRx-Zl+RD+_Dca%`#`*4*n_G~=d<8062W4^=@pSx^fUD1i@#agZRZ@ztw
zpNjk`I=t3*s-sS}MQd^ifH#^}F~x!j?a57hcEP@G!x;7@+6b7NAxdd2lXEIYh86s?
znh<UCa8p%jOFdh@>*TsDuF{JMyuu@NFgtO)hs@;r(5j_YS$3QZX=bX`E|(oKItI#p
zQbS2y2S9pV&S6kVRKM5#XJXzmzRcNvg^7E4Pu_Bp&G$s==#@x-MLVg!SKpmEC~XQq
z(ZA`>VgA^-;%(d#W}QjfXp7eR+PJ9h*&+2J+=FI@eET^O+0om2y>A6f31wF9bygW0
zKx^63#UzWk_iy$`Q_Ry08|#jcW#k5Ri%dBu{Q5NPuD(hM=lSi(guCaw3@j2?c>#Ku
zJ9!39S>SV}W<QxY8MF+(fx^s>gQSURLpagguPUUhqHoMF=aulOe5LHN#l~Exg^^sl
z@y4TAqgvme-TF2U=C3+~j4U3*@img{GF%EO$hq=3|6;t8k<(lW;p=Rv(3K9V8tZFW
zF)43t%v4FmW#`<<+&@NG>h7sR&*=Q>Loj7MlxfISH*G8Oid>CQE6t=Fzj>Eyt*abq
zd2>@#GDo{wCT1|P97L!^Cz45uAH1cK>EYee^)mdJFma1`T|@DGp^w4h;;LTK)GQed
zq#V{Z-15$Zxr0oqpCBdL3nV*b_<ka9pr{euAOfct6hp28MRXY=_zjprdUQ<glLlIP
zf&%<eH)EwYea7=L-CJ=+V-mtaFGFQAJtT(5^y)?=4zc)!ijOgmzK{1z=>>OgJb&s<
zd51(13g=;)Mwkdg$6}R@Sf@BU0q}mUIIZUun+)4adFH<vQja5w_}Hdt#6Hw7vCDl6
zXEp_2Y@A_tmXj7u^{-`)2((R$)WU{|=iy$GVm)jHfjk{Gay+^u1CY+}hZyVVX@8Vp
zr=8l`a))aA@t_1gvhLctfIA!caqhyx1dELq_?8F%Oz~)77a{6v!uPKUBW3~{WnrKM
zz=aviwDsd9igou{Yx{$yA$!(yt7MV44o=}Eg04g<2PvGI%Gbks6@6)9vXIS}xEXIO
zOKEAfc;i~5Yw2>=Stn7vicKvtzisA<31>barEj`*Pe)#pn}usBezBh+U=yZQTO3<V
zx$RFSoLm*#Hwta>y878>6O7F082G}`2?(G<*`2+)kL7xSVqo6<>RrzUz6IRnUDMtZ
zk!6Uj7+!rTbft+(hq`>R4$Y(5BGnQeE_BA+B$(y;q;5y>Xol=@mW#@4F+6@H&)$cp
z%q%5rB)&PlFGVWz3nDmyGU1uw5Xfg3T0e1FIJ=W6i5z`KRQuzWezAyQzLC92?FWu5
z-eVaQ^@%U0o#nVnCrAh{{FDgQ%?b!*u_+VGgq4?$jDwu-6VC^o;bNewdbO(T)+rcB
zJbVhO410m>&!R<HaC#UNT#dPNqTC#Q-T1mp=E=I!$A0VY4YCXIf-&?gOLsm!thX?C
z<Pn(&bu8(gs<C9$uL}6!n)=7eOX->o|Guo=x2pJY2`#03eY9XgK~tgDMbmsEV;S>$
z2LPiV{wzEkWMFp+>ZVDm8}Azcb)DtX4zI0qZRhXFdsWzNlxqGebFd+GrBx8>K)62j
z_ReT3ADb9=jwZgPG}Ex>D8V0vY=fM>_(i=k+dPY|^jybTyh2gIjhpiq5<xND9!E;C
z!AgP!#<H*K!&6XzLNmesD*S0J)6;_?Y+4&h+>S!>>wH7JJp#iDq3llmxKL=4^#?Vv
z!1zj;u&D+%9QaoR1@2w?Jx)y@D~JUqnBRC)p+5M<I*xHL{qg#xp=-@=x{}BJ7NQVE
zz%u(LAy@{kuU#C+e^fX!|E_tqX#scL5Zvk=!+S3Al?iH4^<ZyE;InZo-#|Yx%r{kD
z14SE{$>U9t-k{KUqDyK+lKittA|yVq>|1<%z~<e3xNFP|t*5vkn?sF`OC)2n_d*&+
zUHhk+M>75w%YpF{drr!IhUY=OQ|G6e)Vj0TcWnrN{BfA)&aE4oClE=2j7sygp6gej
zt4mL7w95&w$xdmt{+Onv^P3OJP`Y;n*e;2&)aeR?y7f!m5~ThoU<{xCj-hx)Fk<ku
zMkA}PbZILSBLig0^P5WNG*l82P;3C-Yr1*jK&=fHkkiRPH?IPr`gh*2SyM$&4Em&}
z3mwDi6%{!%k-Gd_R8uoKCf};Xjwkjpw$ciw<zpjksYkEv@4s8z=HGIkeM?9&`bppy
zZx=e2|F>)W4$hy}!C&M1`e5!LE79L2zr5JnmicDBU_7VMeK?A5l&#MEa7&$2#OoVz
zTZR5e#?*7~v%IEHu6PP{!YR%4cgGHc$<XGiGe<`UEb!8JN{|ptzYf>|o>v(8GhFBj
z6T}x>l|s_PE5YiK5PCsf-l89C+9>a-*pcN)2^g()(6taYEp$I38ok7>RUpJzwBQW?
zcn-d@Da4SxY*<C5meBp_fWl;gxc{Aa$p~g^AY-n|8Dz`54(e{KC$wHSTmsm=1zyRA
zt6YP0)`I;Eee51E9HWm1hDCYWG%xGtC3Gw@j5WE2KWt;0y)k26V^8r`Ga|)eX-!{m
zbWNC@I-%$@8;Zhp$p^MqUqZWP#wOP=jTjSPrC-$S{Aru&CGG~Q8Pz`kf_Q{%Ah9p>
z38YdQFg<n$!C>$9?Za2!y~|w6oy<^iC}mb5tHlt0kMvw?D)MP^gHWd<(8A{M3Wb)&
z^&s1j4rpJvEwa|SC?HiRtc0bWGMJD6w;i+TmsZs3k^ububgSW!gg#E>N}<pdZfFM8
z?;+3Z%;41*)I4aGCOq2`PQd?mC#U+xSoA`n^n^+0{qwXd=aRaCDad5lZwym7&Us#q
z{k(-TXmFoXD)&28o1ig!S}3(4uo3;UW(5V3U(p7qQPUK1x!JX>fex=)T%z&WIH`t!
z8szLYcn=IIT9QVb8mYcz;G9y6IH%;I2s^{l`Mho#pVC`*6CMgxVCu{KneX08lDX9x
z4a~^V23R9iTj62S^*GXFU`7tq)c*QA0@>V@mU!#Jl-#YXJAdy$<u$E)!f{F47N)nn
z&_>a%D31nPS!`b))faEr-Lp21PufK&#S8hG5ba>~>gmA^8ACThwJ~QoA1X#x)R14M
z2;v=Bf3g;?^h~sl3o*$0ttiEulS*|<$f;NwMu!YWU|SMm1Q=xRZmCb)wj24c$wy?~
zp#UrYkj<=79~-=2%f^FSw3TXN<*k2HFv80YqNzC0#TW}6ndTcVV7=n0bfxuV`cNlM
z8)u0%d5hm}{cMy;4`08cXt<*E_S}^4^S}$*#TYS+>s!xdR^B>2+X!lV-uD|REpLD{
z)KA-N8nw{Z2VF#o9`Zw@i`6F%BW(wm-kyRUo%=(1;P#`RfsA~K4;E5Sz6a1v8Q7=R
zJdS%CACk($rA0SeQleDcHpWyXW9(v05nt0$E+C9G)f#~YF+zvfGjyTtBc&)V%^}h)
zi>DbX3Un}X84?yW%;dt;BqT?Q$n^?MiAxiV&?@&@TcSLJx^5v=DjxE7`%I=Zt69$n
zK0mne2rTrCM{V|E`@r;EQ8<Wa(^@<D+60$aP+PG6@wBlHP7i`=#$c7>@SE;rQWOR3
zDO{n5q7(-&e3p<vGA+s45U_oP7eg%3_4q$7$y!T@*Z}?GoE<ZTzCseQogp;79>|AS
z4?)3q(wWcry2dB65qx~s%+uQ&=vu%g!v1tVUnd#N>qr>kQk_kVTjJu7<2SeU`aVi#
z^VdrRqu9+<R&7WeC!eY~GfK7G0?3HvmNbI>_6~&~RA<u~J0A6L^c*^fbE6c{Y!8iL
zMD#5~ZG(vrM01l&#E<DDFq)50ja&Xqd%k~g$r$s(jImEec4;$b&)Y~h-nO0xk>KP~
zUJ&Q{-M8l?RAM>wmX0CM#XJ-`;dc7q0{3=9&tDpXk-{><Y`TVROs77fHch#BL`gc1
z<E*1k_)EHTf4NA}1%PkoZxxOQxjsWg%AY1V!(i>>Di2eAtv)(9_D<#|KJz|czO_PV
zDglj6qi*`rNV${y4p@umLxY;Mx$sVnQN`NF%AJZWG0cDhR}HYK4!<JT=~;HVaHT7f
zU0ks|XVc0#mut$()@>clHT4$zo~de7r40?PG$K_#Qtl*ui3{0>NQ$S8@#8iLv?JM-
z34J{Ty`n>Y@zp!8=<3&%#7LW59Sd)>zcb^lXj|(Dz0C2BpV3)>S?i@*X~WhX=@S!G
zIu8H69WM!*L2@svZn<sHX@#W#i_ucvLRY|y6MDfVeBF!YPEA@PR4aYhu~+u_VeWeR
ze=*~KG42`bccVD6*o(yXjhaH>vL%=B9D3W2ZXH~EY|{2!#Af<CYO^Yat+OvEWQ(`$
zqC?F!hn$=RK#Q5;`bT33z&jP&I<z){1&)O2{f2(GgIbQPkNKf_Q`mc6n@^37qL=}b
zlqw-6_eFBL#eS}bog5(haoPsFsCb5LD`^e&#~*)CD5yOz-u<V9&`devi?x+H7jfow
z0(1DCP^U`vKSi9(-ePF>(_7WPzLWf&JNX9HvynYF6azjlRGMWxDve_w74$5)K??G|
zH1)2XD>-TQ<5uXiQ}T=#eaWv!_S6cpRj_Z3XFb;)DxI7D_-U?MVx}lQb&#59;L!6x
zJF8S{7CwY<n|EK$k7TRhUk^X}e(%TYmw!H?U&|R<q^9d3&YVjnF3i<(-Hqag9TzYS
zQMPmuF6(vrNZDGM7m0snrywpzBH9#<bp13E>0is%uJ#Mi&J!wQ?m-_Gy5JUJ|Kxea
z3~aSHsl3-0Z|V###iS!Jpj8%CvQ{jU%R*uyK0!Izpyy|N9W1j>v~(VHl<Z*Pl=F(=
z)5D}@wXCIYss{o!zw1_riK`EesT@%E(0uf1Z=pB66!~2XeL17CqU)G^jCk2E2jc-x
zzh<_4y^ZznBAc*VQP-pSWV&y~i2t|wTO*F<HxGGmG{lz};`<B+jpk<GGBcq_2IHC6
zLEyl9=fH?!`GTRiogD}DHBLt47@e$V)!!cRzkM+p*9I->J~2SfrR-JYc_W|_s3o?p
z;h}k!zBla;_G`!!H>DZNGHG0?F^BAVRybIJd3qm-R-DrYCp>$u{5k)b_$Xd|3rv_x
z1M7-ezo+xvji84%;Me@;lwi}tNCxDs95zepgE^1)4|&dhmg?G2n>f6pBuOu(<l$;-
z7tyYJk^VNAnL7vcsCkmTftO#$wL5;%NwC&G^K?QX_!#CrL5_@C_$)Wv%hPna)`xgL
zqbs8@Rs65>sbB)WSS;uw8z*Z|b?AY)S$@>g#<rC#16}`b?LMNv2)!@=lD<_yyk<Qt
zERwGWhO|TeG9)5QoOzNP{jm~ubj&ZbJnUXPn3LNFPN-M9=$MBup3q=5#?}tdt%`})
zxl^N#joRAzZ=Dvc2Qy^*s@RZNf@K;S9b+U3t%%h_f18Czk+H0Flx}kLfz{W{cK&WP
zI6T|M`rd>rA7|*VVmysx72(bDQmOa6UKLg=Uz?H^Uf&={42E|T!60Ny<>n=5Fo<{V
zP&t58KWi8poARvt&i1lW-{bnj<fzPYu7DFF$e9;=F-UM`NN(#eb@?Eb?rOzq#ln^;
zHnP6-pr4<u|6>3IE>ZJHDQ1e2)RqY)Q{~okkz25M=_$pya~Z3!0UaA6(m4~6Tg+I`
zNaO&)i;AuS?WzZG_n|Qp?Yf~CruoQoFW{RGU0tuDXcWyXZ*9zY45#2`@WTSr{Tqs6
zW)*XM0ux)PswMWvuf|qov0{^?<>1Jo^>zDY!hBZqee$~f7NI!`MOKVT9&C-TN!g3M
z3>F|!Q>X;=ewJSB`kwj6Cet2A1)je`a2-M<NmmPpdJ4D24~N9+1FudWS-!fD!3FsP
zb!;VBD)T42t-1NTv?B5!jjLkQgx)hh5-9D_gWFle3Oy7OS1ot<l5MqMo^ZeXv|#W0
z$e7v7P}%KO7;sdFnrGZIopq=?t{uTJ6ez%Scb+({==5lqsp?bSmU}o=ig&mrQnKR7
zCFxIGbJq2d+b8sYs<CN33Ct$(JipsI$Mn2tROwZF^<7N)8N;h#@dk9@1jDPt33BbC
zsME7uSc}p))u@@3XzVv`>6m?gwQgCv1cXlLr;n9rXwez3C=<6Aoe0k(+03v4*ER0s
zl#4p+dWMC(6{`q8EWP5Keg^ySJ0EJEq7T?+t>tyUD{@IfxZt%7dIQvKG6u7Os14T6
zT`XKB*n~yeELJ}b`hnGMO<u-^Z>Y)mv3XxY-8rD51-(^XrwFC(x2g#oULVhVMUeC|
z=(S@r*)XRwW_fHt;k?(-_Fp&5yI$T$oO{5h)vrAVD&OU~^y+0|a>}xbN8+_1Ie_n+
zNK3KdA(yRSUv8+uV!tQA(>~z`Gdqn6x7bu_e@xkS{dUAWo_{+>v`6#%y#tH24N(^G
ztnKIKI+b?jT6wiPA7*ZKtGJFH<u)vY7A81Ofz>rdHIPK^3DrfdFKUv4z+>m*f_pO}
zM#2pXdrMK0SV|L)!Dcq!Kbu5aWk<Xs&a;QZ9O?{W`!Wqlwk>KCX=VA7DSHW*z>34Q
ziDzu<jw@Mz$YeTQaMHP@Z`V6NzVMW_n%V4Fpc_sewI8sxCy0VUvox7w3Sws66&f#)
z45$Lsc%@W<qPV#Bx<1cD;+aHuFDBYE<*nyCS!WM#Gzm{J7=D^7(rT8|U+i5iR>=Kg
ze@y6FIz2I{G*P;-p+u)TUyyp%Y$drl__@H<h@-42*MX|I+c&1X{U$CRSbKbc+Qdvr
zoLJAFD8Qm-HiXzZJaoQQ=t=8=p%xKeq23}Ui%)KSuW#z>^yNIrTP5Yuyd*|!Y_G*!
zctO!J_8YY+Vu%y>Xvj2emT1Cp)atPbR;yufzDeN}KG^ifYw}CFm0X0=b|)mQDa9y`
zc@<M$da=8Za1P6Bh=H_frOHt!5Fzk1hEI1fEWW-kXjx~ivcDA{B$Z9(D-+{(;FVS_
zaerS=OCqn5@84GS0K#QcH*6I(<6YKm`mPhbY$2W(^d$|T%rFDzfuOUBPWQWlrP!zC
zn|xDx5(wwE7`6PbD!MB&DIW+_1FMlW8Ikt^(&o7_dpC2>*EoG0zFEjPpWiAKKR@1W
zRMuLd9b^o%=Bp#k*dK4L|91G0GFfHM<Z<LrmqMeR^ZE`J$ep7e3tiIo(GSJ9=h|+_
z`u)V~Cg}OE_QBa&SM3mjgYcn@dO?Ar)1z+0l2-)%lA>#JuzA}2jh<MjRnKO8S5sE4
zQ^Hn!sqeyed&3g&#$YztA1kAveJa$2(*yVV=IFgkUzc*P-avflJfQ(HSGoFXJ^;9&
z9ClTsjJZ(9%ku;7P2V-kZkJQ98TR=qL%&&FKH31V$-z?^rd~`TbuI+6yI$TGRE-5h
z^n;elm|_0TrfvXEgt`=6k%I-nEVXQKOBPOo6HzPW%rHv~8{tmC;h`H1J#j#iML?2_
zHzi*M+{ydBp+|pKA7y*@buJ~L;f8gyB8y<Wbf~B6+aM7mDJ0jS^(%%mVAf*KmATJk
z^whZ)z!#DBC%uK+6#U_>V`teXk^>l9F?Q#rmq%8TXIK2uQL%bts-Al)UK?trW@CpQ
zB+prlZprD7W;t*j5*K8*s&OLTJF)m!2LF0tMoT43e9@&@=kn7q=$<Z_go<uU*SJ$}
zykXP1#<R9}YTg-)OwrWp9!97xl%79m6(E3+P7*uzc$OQcUGpw$!6ZxZ6#U#XfvOw%
zB67&(9A@1NiwlAUN|yi9DpR_4+Ly6FxFWNatwR6%OS@?wmt=wOU;{J}|IcGM2|#Ms
zQ>Xw=NvF?FqX7>f6Lktcu7~vPp%*kmxav;-dY?=_R~1dD|5bP_M<~XIEcR6=PbyJU
z&Mc%jI22lZ1AF#R%d|KB^oj1(<%(&GbMq_YjMMrXKxnn6t`6+g-_Sme$iT&ApPn0>
zR*XuFYCT^*Upldc)q0e(4peW?(J__FPs^a0bPCA7x&3xyPMJZH@kz$dh==2YRcOxD
zkDWo=xRLJ%HpQE-1c@^Q3(7r99G^5C@lAR~J3GojiCW)oMK7FVgle)7As{q;I*_l#
zB}-onGsgBn7R239N<Qp`Ln;`jDTzpdw%?V+7c05#ava`3$<N;~KhvqV@De@WgZv1T
z&PSonQf!vfP@LiYRXYkI+J717`S+@E$$G8DvhCQ3@y&2hL_(?6<(LqV1Z47(e>0iv
z_fzOoanG`bNB*fV3>V?taSq$6{;lR8>{=FwnoO{#%lZ9p7cc?bLyw*5N%GaxzLivd
z{<NR*43B$Xjy!!~_Ul_ndEV3Yy$qUOhfc?yD>0qfXXP`@P~WRPlV|96*}{Ovhv=)r
z8;H(X1KmC7EIm&<OOMuJH8xyma_37=bC7@E3k0ME>H!=<S+jfo;A5;tpW;BaZQ-T!
z3@pKBLLtA8rCm0o7<F!)w@T(~xBIhkMiOWCzxI3q&trrZtE(7^+|tujoA$k3aDZmG
zHNP;lK;6Th9y~E&Dz0n$i5azCu)Dl`K`nU?@5>&5P2He55#3ESOf}<%B~DZ$=s(pC
z`35Zpj3`b+LhCvqt;>){CY^AJ%r>nUbFM5EIh0er{!_2kdZB0}*)@5M4=52DL<kTA
zub_oO6`rrKy--+Aef17!uKjWDj&qrmz=!qHz(>N0S^}+p=M??C(La#FWsG*O&kVma
zZ+X*`%rHt;%zZmO?kqTV61m^61QyRA7g#<8?qJg4S)dNjvUET>XfFL{?L(-CdX02;
zt5;G1<-HnR2{{?#NNE?ri8G&#toZ~QSsx3Sr|yMwL#s($VtPltegv!O>YWFG)%kp6
zR(CF5DpMmL>0~Nc7+oye2yWt|ke6HcOdDD8Cv>Jt)VEeb7#U{2-!Ow|XK4k;)96pL
zuhL+P0vs%egTlc%T6C*4vPNJ^W|(G!;}y|5sir}8f!`6rwkR)~Q+^H>jsVBcN-un@
zWP?PtwC26Q9UQOuZY4MA&{+dT{*>?zUnRcUA1*c!z(Igz4bO5Kq_|DUR#7#SV<v9H
zZ}lKaTRCpi8)X|oVsd;fdbVwPf~7J7TQ(^Gjx(3E`Y0bDLU?>8<;mjqqdtAJx4XL0
z@?%u>fF&Q#{ORfcAeQdfE@Hd|g~s&uwJ!1W$$mU4eWNHFnX?wWHPl39Rl=Y+HDqr<
z0~B#GTRC0m(U~j&3HtQg#&*CN{D2FfqORA;z(VSt)%Iq#Je=Xk`Sr*jOr(#{d$(zc
z4k5x72GZ-Ex7my_g;9QPV_kngq6JvdE>28wWSW6^9y!Bv!kF#gg0zS|I*66IgWfnF
zIx5>f!aS8<*LGMoG{pllCo)RPI$%;FLwrJzfpogN`fNqLDL*k-&bZX*-pr*0dDnqS
z@ZPs!)qQcq_GIMV*yF78S?&a$#mrkB&4rh{zyhe8+2dhyA9d)g&iMejCH<4=`$9T-
zAO=ehbkJa9e@u{Y+gyvy@Ofa(RVK)1RS<oXC(Pz+hekd}n-wh*eT|h1f@RyOh8<RW
zdxh}QCCEp)TzKTu)u)e?3y^k-f@%|H1vxdTdy&ws5buNVrEQzyzDJ)oO#0EJg+UTS
z{!b2MV^&7^bS>}Zmw3Bri*XmhV>Yw$0N$CJ1(ErbpQx5N6h4JDR|U;*7iDW@|8+$M
z#1Qlt#&G0_&bC$SptF#A+S-1te!6SH(&OD-jVnf{2mAfR<dDQx{H3C7R)(R!|Kz^i
zuU(c7aByxklxu!BgHUhV(Tk{^ElUr!ES$Q&ERv1fAt1H<F3a|za5hg}ufjcZxSJ9M
zXp>-@Ac6Vy@o}G~bxCV7uy|cne!zo!3&n#OQQTVEfyqH{+htTJ`f2RrIaA-M66Y);
zSEXc8%c`6CGN@$N)01Um(MMxHLTFQCHglUfsr1^L<@rI`r(RXTNw5yL0Z_?b7zFPQ
zOz&;7glFvknk*4^J97nZA~4v|>$kF4^c3{pJu({r1dxtD)&-w&b41;v91@X8o@m!?
zG&PfXefIHzAXzheXMwt28g}P4(-XfO7&>!E<#w8yy*I40pyUN%yhnRavm=sPVqyMh
z4w=QI7FO6jG}vj`^n}BtoBGK1E<bKlQ+ZfHRFj{eV~rv>sNOk1+l))9xCnpWCP{jl
z20Z0p1>!w_YoxUamUed`0rtDW_`TZq{tBy?%M~YGjy1q^OaK%uX0}L`NHhxi_6zj^
zKKy8UdpSgWstt2!DQua&WDqnrohNgE@IZdzYnXH|8x^ejaK$0OqNGFed@9oMcUV`z
zrlmk_p4F7QmMJ~HVC2iB%O`_S6)%5lq@d;3ir2t__0jD|H!H7|p^Z?rFZ<7V6@d?L
zgS$rZiD4an+#d=RfKdM&oh-ef=*A1K|F?D;#=-F0$lwPg^I7w6RR~MdLTE_hFGF@;
zvcJ}@VWi<v4k>gbca+rjyJLM10f)=uXFiD4x%+*(I0w-qY~<5*5EsH{9SCM<gvu5>
zV~-N5fJhGblx-mNHwsY)7!@vZ)|T+GIT~Tm_;p;Nv`QdaR-bN33j7U{;&Ak8JvXU{
zx?f1dXj2+5X8iwyBuU@;ql{CJ5>DZ+mS}fx3t3^P<=q1M#&9))f!(yebYuEX+8OeJ
zzWPVD5tOhQVWWYPdT71Ooy3ai&PUsf6IvIRP&`7<g0Pf@=dDX~0I?;S{+o_n%{}kU
zyB_WK&LA(+5MFF*2LvO;dH|8j#NSX7J}(wEYj~}Bz4{Jw(SLZ8&d#Xe=!OzN&cScb
z*Eg5PHnh2v;hwgPJvCT?x+q}FX`~-+6{;Zq3w8sH7&p7-_?P^-FxaObj;;;&U5X54
ztM(mhJvkUYyP5&0lFTRl{R^COlYPRr&$qb~aN^G(Kp)vZ8!#-<aB*xc{sd7aY+8A<
zvuh!V%QMm71oq0UHP(=d|A)Qz4r?-N+kPKs9!DQ_WGsLv!YFn`Mi~@@5EUhMM1@cj
z0R<!=0wN`lj5><aqJW}Q5i2bq1f+zh^j?I}i8Ltz0)Zqz+P7916&>b%-{aWd-rwHG
zvHv0_w|Cl|wXXHM&huPTX?A3h;fG=Gvh(CdLGQ_dX8A8iI~IW@YfeNPSU4KOg=2!#
z>lc!RqsctJ<+G=~`TBAyh<9a;)A1L^s8};Wl7<Vz6cpqqyp6Neg%>iORD(Ksd6aWd
zK_j_;8mCC1fI=%M_xgP@6-kWC^$t;7ia#i(dMP{;C)EQ|I-zlJ>e#l+uVImO{#}~6
zWW<hJ1XN4Ie_&c0O#ucu&q<*eJ7PV-R_2EiL7`kK4xT?)nc+^hPi@I_$?WnAX~?Hr
zQG9wG3#ECvu)3;!z8j_DV>Z^8Bvt`W()Dk#wr+a^*X9{OFTBg!^w6Y)zujsC1CkeY
zj*5NX@Z*8C5A0LjXslxO<Voh@m^(?DnP-si_n)!tm*LQ&E2hiCUbGP5+ja{oz))6_
z1@VP+Ve6C-do|Ak7jxlCJA)=RR!b*}UrxlUjdWGm2wO<XLAy}BQ72TTHBb-@1XY=A
zl&tA4&L6cbRC<>W3_ke;Rg!P_D*8UN%R{a^rj?pZ6<kwDo(`mQ8~1>995?ERww%d0
zq4I)H68cfbFa`8uAqPxf{rx^Nv7hWvHP~bW+GL9xjbf$H5?q{DtbYAawAsU0gQOH<
z{m;V&C#woqM+bV^p%N4?1m)uIrJnt`1<suYWt*HN--OmyU<&ZqqH9)`ioE(CNJjV=
zCTdc~GkP6<$K<69gUEMzm8V*VA^vz@Mo0>#GQtA4q-TOp9bAilBI2me#Jz;y%=kN;
zf8y`fb<Bj+WwA1!a+FKJ?tW4e^t&!<LN)TGuSNdMNit7^-=U}bF6&os4Pb#87kZ*Y
zQea*^{TAQfd!yB%H2B4Yb`_u_?-z6s=!70;j#Fphkyq9Oo@E>*#DgXtjpuv7YtcbZ
z<Id_QxngT%4`ug{&w`$z%kTLd)Qeg-5;s)FB>Y?R{f+pbo+)EhQ={!vk+#%8af%gK
zWown$ZMz%*uf|mtA?lKEq<!T_U+Hy4H<rrA18U*(Y0P_uZBV`B$0hh82Iryk2|z5i
z`=Y~6O*a}7q-XAD#Gh%MxkWQ7Z;_x86sUpkES)y_v$1Nr9&i_(agV$(GygFiJ_Zrw
znI&N|>e`Gl>N7dESJ=Y_+9!Ba-X~t-413|#8wSiAU$*u_(1e%kiTPD$A#Qwim3~wL
zxei>5_PzffQ?+MTA1+UkRV&@J8<LGDp^v&6UKXyG|K06Y#dFIvdfdL;Ri_+w!N<-R
z9OeQB^7!E{aOM0?*Fv3Huf4%@H9oc3E3yxiClzW;P*psv&$$U+(}Y!;muoa2Ejz0d
zVSgr#qoHqqaB}I<ypcIZvOg4!M*ns5s;cn#QWt9p%6KH-lkc6rth-rFv&47GN-}SC
za;&k`-JjwRA&iI{8i{v46BP(SGVZS!h?ONPI+ASzDtqeLaPVATPuiucgnYT$4ZVh?
z*s|Iw37qWCl$}yhnyt%Ssd30$GF_fj+-tNhI(;n;PNb-<5xFx((9=J(Cro|U;z-BV
z|1`WDP1;g*!1$lY1Y{$zc(9VB$hE`~8%j(KmF~n^SbfJYz9lF=+|l+QR71`5>(}_q
z^F_p2Wc>Hf@eaRC$M>mXWGsS@PP%+3utqO{SNc>#xv}L;Zu0H@X5Hnc-@eZB!=akn
z@Q3FbJYqAoyFx>&6%<Zjy(U?=Rg)4D47<T^R>Ka1Of+^+$$P=GKri+r^8nYa1A|tu
z4wWU-R7bNnHgg-fLb=StAj_6if|Ay!QJp2G{D7|{hH7RT{px=DS~n~wi*b?ClG4BF
z={_bH88?~T%<S35gX}~98LVp^Z(Z6iRzWzFw!4bpY6DkUEAw+^&&Vs9OJ$;&S5t68
zW4!z_+rwlF{3Dah%MPUtxONC!UV=u}VScr`w8j}YAU<y~QI`Dt{xd((_rAhv&CpU=
zg49vwW4fPefDf(X7@TyXV(CK1LjH*0dUhBe`{DdkG3+!B$~Slw3Ga#2Rr|<-Tq6rF
zIvDbD#pApf+w)u=_RpB+LhAFFMUfIZd~6^wM8agB{xF$xTFZC`pVIGUdGJZ);s1ho
zeC865*F#NAsk*hI-LWM!04l7$fC_Yhm2-ekp(>y9@(Eq~o*nsiVb?0YAC`+VIFzKk
zzA%WLa+;s8*Nk5rmXmO_H?mA;@M>Vfp=DW3w+}|#n?+YXKUdcU@WqtcF5_Oy;KnHz
zH>o<$edb!sGXKKaxJTvoLSl4M)YJQmA`{>W1<gu)AbH0rKL~1+|LxsmS!X1$6CfG@
z@JhE?XnT&ur|eP0Bs8grjJ!jgRw7N|p!L&URFcSa%m9HJ;~67cmUJT8`Gnz0-^OiM
zLE}WRcTJ(wD=>)|hk#)~O#-7{Myk(TD3AZpNvz+ZCv~K(8=#QNMz?<_XukIH#4OP?
z)ebnsNa!rQs|(>N?pCe>brlc!7Y^}lQZlaKumMmRG6b*S2szHRz%-V1aR?s1Z2Kq4
z89=YpPOS&r7acG1woqX(YyEe!7RtEdJV9F&e`JofD7c!A#CNUxQR<{!wF|GuGAg`1
zU(NqjLxIp>1YT0>hSF?ET;z2!xMx|O!FKwq=D0|uNz=-iODk|9pvhGHhBQfa1hj>h
ze8(HG71HD~t(GO0)pn!_?XQsK4qwST&KMjOS5a%nx}!gz3;TUTu~sfKOyltnNxssE
ze+}$)*_sq-WNyDzuMrR1PbLwubOf$&T*Oj?U}J=$j@qk~-O<V-4x2UhH4qEr>@U*&
z{ONwR9M1uwGFaKvmiU}>*p?Co=B@Lp8$)I~z&BQ=C>YrstCSzH!I^kw8h@@>GCw6|
z@Md~Euc{1AFzBK)4K%lQd?9H_jIpsRr)r!QM>G9Sfr*&YANBC;IVV^Wbogz;&Xfeo
z0Ls5#u!}nu{QGm|A{)m~R*IhL-`<)*;o#Liiq`baYC`3Hi&Q>imixwF{9zVoF2~YR
zWSgzA&a1|MyUj|4Xr*R0S^>D_rg9lAK2E&rLx-@NR?*{(JKktgxs#ODl$r7kX0bv|
zZSP{9L`rMu!cpW_nTe8Q6#02k3BdO*H68o3LjBZh71lTZt0>&g>9PFL!f?CAN5Hr>
zd@U3GZ$P~|Kuw!?hilZ);hlKrsuN+cEMR2|MBD8-6tuiL3a=`Xk)j{Wxu1!a$2|!Y
zfSx=#r?@D(cBHiZN6<~}xRK)vv@h);T7H0Q$N6m3k#2%7<+ZP0={?EM?%JXXor%GD
z$I#5>)Q2|}o6lUbNHSNo=|piu&Ctq?{w5x{&zRon;(AC_ZjM{Y`~^9_{*S;z57%Xh
zTI4;c7fA|5eva)Qva?OSzkw?qm;$+$G$LIu%dPj#<Tnq^kQ!Vem?azZ=g=yEpR6`s
z;}dmaG=|U_yTCE9JKou1XVGq}qgqTe*o_YI#(R5KtE%doS23I=Eos*SNdJQ5$~y&<
z$5OLh<b4W4xz5v!1Hvst!v(er-OxF?eN$6$#I6|5)EdyA?Atg;e-co5$D`VWUGD)a
zp!4iKzSJNexL|EpF4vMtG+PH6IBCsA?yz>r4UXz;coPIZih;C9rJV5#yzFA>;X<SO
z-Q{3d$uP7?gGXIhMd_-j@s~;(VE$3N13w1!C1JFip!1U|<K)x^j<gaqe85OA5BobG
zv<LOCV1Gn8erHk8WQqT7S3q-S-6&hrdF|P5T_xczBYv1qIzqlUV-i5PPZL%FYsc=%
zI({0p@(ToDFi8W|B-_#wk_==PYRZ76n16*T61w8&qs(xwOPI&Q;-zoA!Hg)#gM+eW
zAJhdd?bpc&9w=$xM2u376i7-M2MGKBjPLuk{>HoV)$e%c@V|j7`u+Y8+l3Tad=G&G
z?(q0|hNWz6fUVuI3IkFG;^B*@;t~!QJc{QjUc8m!o{E|n1oD&7)q0}<om8Aj0^|89
zTKeCc3jp=*@;Z#OxCLb-q}g&2wbv5q2PlR!Fp+kuuT80@`0(z+m4DsuHCqyrqz(pk
zDZ2b{EppHr$W7)vZneL*Zi|0ni#|@jTLxpTCQ+_re*2VqkK*nDt=Xa>IH5NmEA5d;
zI6nRg3kRdEtlz+FXx4^sCWTlJ@kTR$gX$0`XIJg_`&ZcB=3j{6M~q2`$QhC=BqG&7
zj0zCQnA0%X<TB~yIv^z(N*so~r8)dzANcFjBbTb=z4Qoj`bIn@lRAyoF@gV|q@9)l
z(TOe2DjNO;yonnz2XFeQt9oWM*4O8q8NR}L3gH=GDOMOndGvE^O0s!WULb1^pQ=q)
zH#QHjUcxsRM1ggTrWLiB^aNRL$Z~U~4e{7R2FSrBCRE;Fkyjd{+}h$sVWCD*ief5&
z=Uw=FbM!3k`bulwEkzG6_^47lO%~|O(eH8nA0*PY*bIRAW-2=~ck(wadIMJf!0@PQ
ziOx^{X@VE$$;+5k2s*f4Tzn@BOT8Msm661t7_r%-SiC?;qT!?6#bAzApG$g_3!d+M
zY2(n3!@*R+-p?|V@KV5<t1lm^O7bo)br3(XtWrE#_hxOc<xQl&jm`7tRiGbQl|(1)
z)%8N!jPxE4qIbIi@rf@p9tlnn$MG;s29?Yh<;5Je8$JK%)Z=ZaXtx&=Er%p(ksRUl
zG6iFgTcgPiG%P?RmK$3;p!}pd`A&|6oq#)JyhDAZAH^|6Wl?o$_@Z_RDoF`;bb>Mz
z+r|QL$IW%Homm-k2+tvhU+O!v2~Yi`3PdTN-hMe+O{Yx0xxMGow#$id;b`m=f{@HP
zm5Z9BlsBp<63Xlb{$rF!N2~a=ge_f8h4=V$$TYk1%aR6E)1-Z1o!lSbx*?Bns1eJ5
znFI<g=N{=q?_nVrDSIOXmz$KPSq<Y`B>j=<C&lZ^u&1wuz(HX1;^^L){+FSo<2TWg
zN4GDO-kji@RA|pHYxPs!VFe3iQP}oVP8n3Zp??GSe$KZ#ls;Hp-Pujiv>#~oLplka
zVhN$Ln<x9^29q=PC<F9D^F6aO+spj#xAdmA*D)*SsMzfnoM5G#^-tpaAd{U7vDnGu
z4wYF^uqU@yH}j>KbDoaP({6yEy+6@)12_JeM=u(PQW#Y%@$2zIGyZn)m57AJ;juzK
z>KQu9S=M$pvyou#3|2iX2d`IeH6a>sy;8MJG5{V|26}dd|0u}RK9C1GD{cAWl%HCz
zTgSN8D!!w$AKvh(u*UyeISa1iSSvqv6e<p5HGh+W^utg@_XG6fCFrysM6uEqs-v{7
zi(CSy3mcR9Nz&sr545?6<Yn90ZSR)^+TRq$U9uy&xa^>hI#lRtnF_bK_O$y40G%J~
z$_1U??Kz|KJ2H;|x-)Vur-Ov4$NC`|;eiS|vt)cpv+Fzew7{+<T*q52mmvO-G2-Ix
z4+--e4#rYJ>kC|qN}vK!5vuZTG`G0lnNyE)^{L$6C&bQ!h@sUTl9S{C?a`-_oDjKV
z^Mhd1O#Ybzdq(UL@<WHNs$!eXu?O1E`kc8+2pOGGZW>pKlt5gpdh+PnW7TJp6$era
z^2Ai+%DOYZmgO%Z%Jd2q`Rm9Oxh-znsir|gO1lP^I4fR2V|Yp>%M{MKZKy8xs(o!W
z!0GcmjJzccgc}S4y))OWVaZ?IAD%d~%6<;??+bS-TzPW}YD&~deRv7mU^R3<oWU37
z`QtkQ#0sIE+2@t7kc&zB>wcrCmgt!yD5mroGy<vZm741HxmW2gpr+(e@i?^&DoJ>Q
z&J_|RiBEFpl+lO^Xu0MTB#P7}N2$|nITAWUdy#CFD>)uLvc>gjV+3y7<>&BsdYqDG
zB&lduESk_xJYEyNm69L&2le5R-KhhBY^-=zv68H(A_Lo~=Xd`cK~b3BRrrTh2z)HU
z@4#!oK;{*O!bWOphi3Tl&UK}r6H`g=)&}K?b+idw);#S{seo<Zh=zz+;**G^V$ah`
z@7+(a-R2s}1S!`SY)Qut0F<#tf-=6yN1{vj>r}{U!Fg*TS&=_T*{L}ENYEi)V0q#T
zQGpO7De|xs)kA29M_4;-dhq}bo-4p_T5k2!{stf&^AOk2J#bN_`%iwWV#&F`0zg0o
zwK;TCJ+X~@hm7&&%Xiv^z41EeEc5=K$b^KAwEIh>6AHovJ$n4=BTI&tn|;eKz9lHk
zG+&mCM{JwdlaBty8sG=;n)=^AM~DBY<_6)%CBw}$`<u%4f9|%*)!$TTl0}d1ayzu7
zWi^)m-8uAtg1A|C&cTD)yX?hNZ|<nwbX`6X<$ck@B55`J&F&wcwk*uP!9f81h-nF)
zDvmFF00*s!0iVpb4@8FfE%B}|5^m=3poZsD+F@2svZt6V-Gmc?B^l+1tdK?WWVqtS
zbSFXy(W|$c=9_|%@wet}xf{`0kbU@Uo9U5XSv>7DV0mmLdK|7cu(+TMyEhx%^@aCw
zHtlu!56r1Xy|*=Vy@p%LE3$7|A4|qa#3rZ)AZKYI&Y_z+he({LDTzIvH;O-Hx`1Nj
zZ8^l}U}kiFlo2Q84);FUdTN_Ee3Oz8$`Q`=y`-y+kS55Un#A5d%3xhu$NMF{!Wf!Z
zTPckuUN5NFF`_O!`*Ay53Yr|XjhfLDO275J-}24{2ua9`m6ki|f#T>NiN{}DV!m|e
zwfVM(ljZpP%&aCMsL1)Rbt!1UUow=fS3|+GVH=uTka=wuhTHniat$(Z<|=+Ag?bxW
z0zhx;3tjolKvek#HivcmQ>4-<iWWfYdnaMG;^oVk|0Zy8+<$O#++lb_(%FyZ@EvD)
z*-|9$_?GlXU(C9X`Ab7Gx=`J$R{(fDSJ&0}@q775EgZCduhF#UH2RSSHT`JC)3r7W
zSpywBoRdzjQ7UMhOm1OpAshpfNJ02hP|(D}3q`j#ab}J9)BFj`XL>um4xd&4%8-kB
z$x6nR_ZrH_tx9UUEVsjE@-2A?PfgW&IQ@lh#Gi3Ufx<hh<gmWNyVhj*3cS`vDbW}C
z0R0;{oOEtyBRs=c=pfK{fcv;-Iekp~DWHQ3@B}&)vvU7ksHS5ruil!#=`}61YVXYS
zx(!@S$4^U3$$4BpfPav)S&CUdq!4X#JZaMi$pyTK!6lY4>iBf=GH-a#XdrL`RVwt#
zziYIIP0=%bc28{!=+nhM1uRGFsBo(LiJpXp0-pj>6ZlB*dibY(Um?qhHpR|0z<U%9
z=(ZkS-+Nn%FJu~0(yYjCke!hYq;`t$tg&gH)`rWY78RgBi%#}l>P_!DeQalThZ$6d
zwJf2Gm?%f{h%r;JekCVq*2&d!ItXGMBU^_oQmQ(2EX~L~-6$Zr`k2@Zax1+N*RM@i
zPg`x;R0Pe_vwIE0;~s}0?qFGT^wB1(RTTyA$hqF4KBB01F>Q3QswzlPcvS-KRW$FI
zc)kZCB8$h@hA~7@jJ(`pyx>Kclz3Ds$GgbZyQsIECMZ{BaWAoKKK0scyIc*W(P>^4
z|GV%^$2>*^L-45_Del7(JjV)&zvT#Dh$kAtGHJp_pl*m}nG_HwP{axLA?(DVFq()-
z+re#^dMq{lxST8+Bx7_w{;0Dr(*<f}7krk*EPA!mV&`t0nk>I0;oDo*uz+21k&#_B
zQK4B;sMGZw0{ROJB@}?#)_gORP~QXrrhLErhrav0Nl<6w`yCWgF*g8N+rqF%+!00b
zg-s+g>+CA~AkT`zyqlcaSZIY-ne8&H`}C`~a_~A;JDV4~n!Q$u?A4I4gDB<^iJpbZ
z80V?yPSelJ6BIF_f)=v4<q#%7Kz9<-tuw)!$9i~<N6=WkRpNe<sGl9HI2}v!WQj&t
zgsEw=c)Gk8>>rL5u~o%v_FBf++AyrB7fUF}<5I{}L7l3wt{yA&%raj6>TTfGUjZH{
z+0Rcj0y-QMWD)4fy=}#dN2BME-aLVtdV>Tqwd<v33w|uq1N4!Ta;aSa7xa*R*Y#*e
z?B)hs)gMt<1@MzX8v>+^4M(>b9EXA{<*$OPT%{mYQI~3-H-qQH=(S*QEEL7zAd_p-
zm)0f(+vs{s!8cW|)iD-S&Zmj_SkWX&JlWeM>-|pFJWmYnsugF&@UzM{6WN<-@*y#9
z?29qni(#2yTVfKP-AKfM?agWO`byGo+~?aUdU5`Emhdd@i&W?TE#e-=Dlv-CgVM=c
zHI-Y<&n=e{eU!qu@l+UWm9Razoj;TK;O!AzJZU^U4aL{hnb6%q<FSY=HdZJ1x2Y*#
z+06nD7yLBXsfzqmQY&)wN91KsAro7D7Xa)W!+nZI(Fpl*jMt<-ZM5b_Mv-&Fo8(p2
zYj!@1;;2BK0V)rXm)jQ>ws}V%>kS-b*qX61hpZYu9kdcWGq%tS(y=<q*N`lxXz@4H
z{>)CW=ol9WYLR2r(}F;@$<y)RuZ5`7V>%wZP^{>alju|Dg510TwmNoP9lh445ba&q
zo0G@P*}>vqSZ-5kNPe2XfFeH?9OKT;B=R%Ez(wukBf#yAoP^(Y7&?!ApVZ3WI*JF`
zs-i-)+6FLfUy_Dk&2n_IbRkguNNQ6Xrn97tr{4;&|8`qDhRE?I5J*?ptYL5f7aKcz
z^%lB&uMDV0s$GJHs>0ft3g~oMGlSOcpuTN2a;pSA!_@nh^?qu)sygY<$v^7l3>D%*
z*AOF;sRFgHX)e_n9O*xX*2fzjHs6+#2?uhgL#&4^cS4`L0u6VNFv;vAjPzl^*GT9w
z2z1XIj|y`V+;Vy&0MR%3z3{E7Dvs||bbVH*FDTI0F4{2q0V(`IBD(c03SIN+ZPd0?
zc;k27QZ3s<zO!om<=G<V0s>&K_IsVGHHFxWYRz|c@CQ0Lo0DH<PtTj6@=Ow+mV_7>
z1f(i$y=qHEZ{aGfE+ufIQ*wjcl*wMZnY#M2n1bGUo7p?^Zk=jNBy-k5%7oDB^~@Gm
z(2_HS;5q&Mo+`02jjaq{?ON6(O%=Q)<drF3-!LV%?m6P%3&J3Kog#akMSFR*o@XTv
z<2u#O5VjYyPTf=7r;JqU9a5dTmdPGB>R~pkO~pMIA7gvDYCF*Ng%<`r9_VzV$~=cO
z7jn*BevgvSx}<t>X|+#Z10uhphJc}oJ#USM7bs+5XN#bYa>T<#{%F|S9|TiMuJv(_
zQlGXCAYG`C_H_u$mr(Me8~aF@uzGQYco=EG>vfZO%|7*ZNfmvTQQ<!=B)3OwEgW!<
zNr=I1`^6Bv+vS`p#66~HLSH;l9JawjlQ9*=@SF-(oeC~rteX2r>vJKR$U%n@xeP&C
zdA0a`HL{2{wuUywu4D)*H*xzb#oY{HjxSQ1^KopnxZ}oDUH*FFzjO$tu}0XJ|2m#e
zdi8^-IM?^gXm@^n`Kg_vodnN}A)Q}k#x05ep#=~gZDxw&-n%D?X(A%Bv$xjMv*T?|
z>h+`(&jFvjxNNpg%jUd9b0vtE@`w=qM3~|YpWzF|!%}MX{0kiW;jJ;>l0^y31kf^_
zE*`VIwH|zGF^2>cl(^*LKk~{C)$24fwJFsx#Y)vkOg~N3&6KOdoNpkQ;#X&mu8xqX
z%d)<x%Ool?>Au5n6luGoX`%%Esg)PX)TI|fnd*Oh_|f7*ae{9Qirq>ZZzYMZSCQdl
zPUMS)!`+8`%JO}m3hu9A_1+PzD8P=+7w7jP1-|0GoqcIK<7qpP0wv_oC;8@7@8Ub&
z4IR<rqL(ZSEtl9p&0OT5l5A_q&D~ud#Cp_@jC+e63&un*N#+hpvMsl}g$9aQO?o@h
zr5mR&%`5C2gsL+gcExx8It4SD*}rIKO0r4nI|`lzdUPmqBv!rDk?(NnBd9pLBMuQy
zw*TObrRcCo(`D94;MuL*6#Eu-JGhco_01(r+f8`!M+Z6D8=O1~zT!q(9q-0qk?Q(p
zMglRhQ-We8=l-f@bA1{a$BP)7TDv*^d~z)$WVNi1E3yew=qoznaio|Ht2PpvqlO$;
z3)2_Y>r=B2Om)7-@+nyS6joG8GZB$X@)aYn9HdV}Z)@Ig>kbxs2g~JVb>P{L9nx=O
zl;4{E9o|kh|L{+)KqUH)zS|rdq@PF|O>L|czGjrCp@c8h&T~2v)-Wr9!pH4r8QJbt
z`Gf*Mac;8?%_?i+Rue&`NsyFi#YE4(W3-85s*G_2<IELt0&QAZN);{2DOVJAD(Z|7
zn|ytmdf_CZ8Z-JIQRXF?4FgYqaDV;>2(h^amVx4W6)hiHnf|7?#G{Sn@oPmJHZ!rx
zvNQ1aA&=(j6;mt#E5S$J1ZNjv&byA%>X>dM(doji=wcQ!vIS2DMe=C$v#2j^^EEHW
zGcu{eY*kt(R!2v(REp>W>MqNxoI5d-{|N(+gsHMmA0&KCAP9%5MMJ%5I?S{kY_t`|
zqqBX7lSlp@Ox`qj7wlx#bn>E7D79HKA9CX!z>OCNZp2SP_B*#rF%lX1)LBbHt12sw
zpNNu6eiUhzr08~Qf>6;`{zyKlm<dqw(ndofI0cG17H_^xq}`3v2VKFxOtO1Up+xhe
z8bL`%iV}2zda8E?a%@FqvQC$hP8ZvZ#xWy_e;*?YTgV720;nm)?07VFa17f%^;Syo
zwm62!1~pPkMRg@u6>gCReb%xChMMd|t!HQ#lMabuJF2`clbuO9$mn#AyUU~LAs
znkeU>rHE;#_*7y?LOa}aSPqai7dIsPyaTBJo@Vw4xos<zN$|9l==zec4ohIPy9LQT
z<1f#nmc;Wo_&@YKQnzx!M2gJGW%2V^*7~5-i{VbPZ>$Mr9i4irKmD{^i!oJIjwbY?
zF?qsPnsC{A#zl4raw;T@=NZGx5)?TJih6G#CvRv10Pw5y%Pr75c}_<VT3fdw(L0H3
zD#PGYfV)D8mjo3A1-|#qK45K>rv;!XS1Z?d*jwnAOye4unF%RfCCbKAE&TC4W#yTT
zh+J>2Ct?hOR3`=s->L0h@=pXCr8P|8?D_g*ZMjcXpLbPnF=41!q18jk6!Xj1FeZ`M
z4zZ9SX1f!4?qQ&8SIqXVZ1k?|g%z=acskHKvlQi1kFprO=`R><0<}CDDC}|erX}uA
za~ttgv`QrZ)!X*1fEQDK^m4=&&c(_-jpa0(8dE5I<JLxg3n1uSLMIDXeQnUKQUs8^
zond7DhEonDhjaKZUn}+&{zv5b9MNscH2CeaZV{N>je$ync$5Rg;<Htz(|Ut~ff&(1
zuR7VYvKxs^R1w;Wdt%H*HL+S3KW70<BSpo<tX;KKxJ1eGZVvNlj^ooa^BM$=ZY#@~
zpt=XSb=y~Vfb07WdOl?DzZrdD?+M5iX0DfBKu}RtqcTWDJVHnr>*FLX2gu&{9-Fav
ziqj`l$pj9aX~C-x2MWi(T%K5#e_}$_8-KnDjoa-PS7G33`{CI9DrhQ<j2k~;I0b&f
z3FT|@#(oalBPKiL34{!xu%$K4Hms2V7EyT|@LOU#EApIGS+s!@PTAh=jXv$=gz`!F
zuKfjuj|EV+b_jT6$A14mE_&>k$2z7;y)qStoR0JNmMz1BHM}`ystm1@>|W!Q@o-UO
zJmA_NV}Y+^3$T!;|9`_m+DK3cU^vSKvQys29P$(w6H0KNjc77(ks8%gZUPtS1`A8Y
zAq#ivNT?@M^wJPoQX7CJ^-VXBSDq)rHivs??b(m3APL)<y$L6Q<~dLqdSGbnLwjKO
zB=%gNLGmO>O##$@tgf4c?`+S3nBJ+O;LQxuFJZiv999dP(02$joq)jqvkf)V#p&X$
z+>VVl9I_@niado#m^lx^O}cx=w(0`Z%G3^XAUT3_y={CHMbZs>c<OPV<%R3?<FD2}
z|MyVg@z-}wfVD`(mJ824)a_FxDxb8AO^KW>zW}Rf#BsQyIlD3#NClQ`d0qh)1c98Y
z+R%kkM}SZOnnJ<Pchz_Wq;8uymn@-5sVT8zmNv4<0Z|pQdRg9$V6xU6kQB&AwfDug
zA47)RX#46H{bKRVYXY4q4P6gm6Pg@2Wmnz9`}8YF4Fi5r3GjzbQ^($eUj2nkR4Y@?
zT1}H8fD_(Ap!=yQ0ij{-4Mlw4j3P585LsG%E3!<A1YaKK;T*EwU!|yY%i@;q;M@Ci
z3L2FB(ti0za1POZKSrYL<Mw0KUmG<Xwz3Or`VOyu3;1IY-kEO~_tIBz^ssM%E-GyJ
zzkiNV3^XDvu(hie87i9_D1n}a&#tXiWfbm8va(eTeeyN^+t;~k`Gue|g}N`pk*m>6
zc7rL%Zcrg<Z0t?*MSTFKAFZxq%t8BxH;u3X*(jw<Uz^AR^2UY(9ZVr;JI?qlSvm7F
zfGw786s2XM(nf6r5>qvx2xque@`*`rG2@{8NtP~&I{VQB{tU)e+uoD_;oVzn_S$**
zmKt@cWR+0?njbj>&745OWmZFk22oxl4O~z{Q`huE`M^dxNH$kvK2XT$8Ki7O70A`$
z8ZkC>6?teRh5Z0TJm}%qiRO^?&SP3W^VM5=fqj#4qi*j@O4nj*2AKzS9%pP{x)IIB
z^0a7=M|wq#zsn6kwOq_z6eZf3_>@>#KBbH#8-_)5WNfyu!~$WT+#{h-w$t{iGZ|XT
zzOxhp$A10N2f*3sWwGiX%$uKq{9o8y_2ojT?Hal+l}!s;mglPi*<{l{7)8HG89%Ee
zQ-NQiUKC$N7mf9f#GlrD@tTxv(QR*VaL4ZTwKf}o2lcz>B6~E0FNSUMr_bXgVSm|9
z?lE(T1@&)dPXAtixp@8i7+c)tiy)&wV##gLN|>?aCjJXKv1p#+)raZQ$kjxB*pEq)
zdoS1Yow|_(l@L#^RT(7EjJ(fQ8wh|jVz;h`J}(c8AMd32wS&GsYD#a+40`_Q5V|aZ
z7mD0A+rXI%Wy@nJWp%$=-C(wdC+7IL+uC>bL5UJasEm|#EzmjvlPKuzeF$GiMyVDF
zKhyekT%cy8Y6F0ehuWPwX1{!@Q9l4JvpuCR&H_aylx)9<M`F?CJRsAq(7ImOW{Lsn
zCYZW8(oL9_7lPwGO5WaB(UR7N@B}tb-e`rDJ+OI(f){bdb|F8pIZjBLW&GrB>06j*
zq&9PDDIt_H=*STzg3}t%44~rHV*&bCA`}F{G_V#W1nFxJpi8unu|)<P1)NPS_^vb8
z5E5t8%ZKuk?t=WweN4+kQv0%l%#SL+kPb5is>Tl3sG{4vbgMQU%N6r0piCL>Sb4AN
zb~2C4$MWeMPmZGVEz@vH*At4>6(7tr0|c<R{_Ya9Vr~^s^Jz8ToMuy_4i!2mQS0Th
z4+)br6NV8I34&d`>)8*P6!BS(lLZemro^X($+fts2G!I0vZ2%*h49+ENxI1liuf|0
zl!i$<c~!7yJbEp_CwD{OfM2Rf1W-9*zP%+U>C_(P)7_riR$1=gE+c<S&_7f4jR3Os
z?1hdQ)uBV&CbWE!_L45V1CUi}elM`>^kb6;`)L*F?&3*RHP5!yAKZ*wE39{LRT5`Y
z^@mL$jASzV!<-2-*L~&F_tY$XW_2Rn_wepN0JmPVMPeg8upEqA_>aTV^YD5(dx*+l
z4x--A+yfkd?GcRkP-1rt>2K;YERCwH?eM1+0<q(z-eKwDa62U;NC0Dwq^8;et0)g7
zfTi<3j&!~ax8wB$Sv>*bSL|Hk*EA`r*W0>=?1QtlkFyM)EuXu{X5kVyX=m==)skpy
z(B(>|?M7joTH<_Dt;~4o$;mP?vuSm1rmXWV?obTH;F5*rj=b3q{pC_MO$9MAoDlr0
zx3v<>iS$X)%ujY3_{ml)1bM0=1TU~2{XmHS#@-wqkQ(@~u4?*$2+Sr*3<Ye*czOAX
zuyOY`xPG3B1WJWeoa_3XqSJ}V@+iff&wL_9nLtd7ZFnblpuJ%S%ab7LkD0Xf5$OT>
zj3-$6{!Q=>n#2*yNihMVRsm6;gDE66NIGn8x!mFoGmJ_AV|B<FvjDZr8uEEt5A|gD
zQA}^|D;e=_rj9rt!lz18D%k+iIvO@QfTPUh^pZ5xCc1QS(|gaQmt%5Yq{=dTojL*Z
zO)P9(1I3oBh@5ovO&A;mldHH!zfa8wxJ|CL0P!rb<`a9i9`P?rC{QovvGqL8z53sh
zvBfBJkDRc7D{RCR{+b+?v%ojk&NX=!-A$YXmv$nqn)-K_F9=qS9fU}9^!1E@5g5!a
z*YwC87%u!92ZhR`jejGcsPJ_|7DLOZUMV1?>y{*>Yy2#TkWi{u9sYao+YXT!pipNT
zW(lLR=F&KV>mx~aGf*!Yk?GN&=1AZINn6KpKpk3l1L%q;NYgN>YtG|})>q#JIbGZj
zEg#<8Y;3BKq;m~_E2{(s?WZpap8tS{6FT?wE>FpKr*Rk-4u)k~$DKi*mb-o%G>jqy
zDgjWUXx1hVSL||KgyuexjW-lXR@ZmS!@p{c0W4>i;{r22PNq%CzGNqNnHCNtZdnsE
zHhuC0e-%CThF|~_N%Yu$*pV!{uWtnZpHlTV@DGg2e+r*}g+^b!7B%H7z56Y(qjZIF
zV}ibdbra1O==Hp)a<4~xSU%nS*=Q^jCh&L!ffTK{{%Y3-e0!TJ&`9Fzfl|<jMa@p_
z0GdeS#&ga)gLV#~5<#UliJ7aV-eY-a>3Oyxf1by&bdj?7%q_R4!Y3P<s(vP)<F#}u
zg4{TbA*Zwii(ix8b!``KZN?R3yON-%u5BH``E`dnqYQwtKCzKazXDO*&5G`82mY7?
zm2=IpEbx3LY<2#=ob#$QX#NBP;}`#Euq)8}?rNpkJskH9`TY`<c~(Dp^yt+qH6Yh<
zP2j{;-R&Sf!fQg^V-)a`c4iM&gHzZcF!z-~CF4CjN9_hY!l!j!mff>$-%7n&D-knZ
zq7gu>NA$W}<yICN7)gywGbHNBG+ft+gj0b#<}3DE9{SXuO>=oMF%<pPQ2NzjO0c{Q
z)HXijwzU{x!t>Ut&{7O}svKx0H7|d$p9X-L^6H+fh(+=emx&SMyuQScmi7ZT|LBo&
z-<VydUcd&*F)Z)G(=sN_oO|*puppmIaluWOI~<CEb@JYB`8#B6EvHe(9tR37dC&RE
zSUgdr=Q*_rDbKwv!gI8b!iIfsTEFvccG%gE=V6gmdfhflGGgmV1S#{V8Dxs7b7ASA
zB@S3b*%@7a{?PppBHp;TKEjcO0fAXQy_dd`Nsf3*l+h;~gG{G0x~k12UVlh(Ze=G7
zR~`lFT&&Ks0m%yJzuy7_(%eS8mi-^Jwl`QY2@Dd>e5_Gc=X(;v2~*XlK+Dozru-eM
z{Z`DBA>cNd<9ynX_ZVHL9DF52ajK1-SY$9@Xxmo!sB)U3h?WPd9<2!AoDYI3Hu<Qu
zqml*r2Vk1@LtXbhl)lkdh*6L#-kEqU<6~2w`2;G3qW$Td;&_w7yQ7z&(?M(g8}~63
zAHB3f7atnpdS7A>1sNFt*wym4AE>MidQ3BqBaBg)1YDtZ`bLru3HNV*ZIGE1XKE>a
zxuzv9GSXjNd(C)FkP)<N^48B`*Zc!`|4!Y?2(=~-EuV%tT2wk|RE4dr%QYWucZk!t
z;S&*?(F)z7H}4BgH{xlKrT`BSycVG><e0czcrJr$F}GZD?ZsO`{a6hHshnGxl9NcH
zL)atd=W_h<YRVU6HV1FiAMlJ69wyzC$`vLGOS%Uc@BKqg7uSu4GErXJQ$cfB>gL4C
z+p{={<4-^&Nq(Tjza6kww++>Nzfri*<<A9%YfHydG3klMUP4cWI+A7Ke}zZ_7OdTI
z7CJmDx~FDLoOq7LCkejLX&m*g0P&PPAn3m%WdRwjJad1Z;fstu$q|I42NX#{7fF(Y
zHj%cF9L0I{k2ci2`}ku(5wv4x0<ltZR3*z~2F$QtC9ImUt=`uvlON!Gnj^0QA*H+`
z-R9`<ZyV2jN%uUsRjn|=z#n9I3@&+eBHtznbq!cWo6f`J&*x9>4goRbzMU6<&gWvH
zd(iWP9*5b#bD=4;8k8pwZJ~u@=PUQc5|xq-=hG~0B<w%e-yU5XIumg6{VEh-wt0(&
zG1I^=3c|EL`$fN4JPU*1y!D=0oag0Ofx>U1?OEUqbQH5fvmBH+gP?T`vISGUDx&mN
zFC07%zEr$$OOJ3$eK2`<2Qpo~10)#UUGc5R@>1`?M@r=4xn#Yb{gkCzbcL{kOIkK2
zD;PxHO!+4=0olj_9&p>@)Y4Jn_<NLiz2&+&1>f<DZwbouP-o73%$CgPmwTpvSPDvT
z2l9XW9M^g4p%DQFqmWHI{Z_{Yu4088aW+b(yzsDC22=}SlyA@R;NBM3v)sFL4Ia8q
zo&{UC#vruA^Cm_a%djAUC7}rXW-r&|5{y$v&iUvAd7}&)!=l-zK7)gnj~7EF<r%Rd
zC1bE{z`k>Y3M8iCprqv|<2uB+`P^{v7S}6D`$nGrP%H^TZPZYniLW_wjSfb};~9&b
zN)1OK`-nL@Bid@eE`kQpoIcn)7-$TSQ(1kfwEYaRH{2w1B4s0*Y^b@u>MkaSyDRNd
zCp3~mE#!y+0AcsIpcnl&SuYS+A%1t+5~3<l2&L~&lE$v$6R2S`GV~;Vs~@j@YQCE)
zkM_LuFf`gt*K5Xe0{BWo9Ht68<(ZT{kqCx7PjMd$SzR*3qAf(G`QuM~Z6bY1S{=1~
zb~VMEi>qb6|0nb2XX5b}m)L*E<J>#BU|f+-&4X10XcPs$(>aZzpQVhS)4TkDU*Z`T
z?UunMsk)=@mucSjsUKdA_FwOky6nzwCC%AkS${E*a`43jSFZ=TUc)GKP%5^|USdK0
z{>*>N1vC2vw9U-4jwnlOpkY@gxx++7Vr;OSKN*W&IGK|FG-<MS5+4tM*Tu?Ts@{ID
zZcK-R*3VahOhM}>&MT;kl}bpLdymiN30oqh$c95QL}i__J$?-6`A^+6SI&`-@hgJ>
zh2YR)AY2S5GZz|xg&K5(rhBaN&jSkVb-oau(wBIGsH_p<UlChNQ$MF}H6?&o5R5_f
z-KHF10C_ez{IjQ2`x#JVmVo;ZhQg_A(>x&4w(bBjN;>-qFzy7;VcY>%Q^?su-JcX9
zYodEcO7r)s=lI&noJqLDk81-jVy|wKZKc965u|H<F@B;nVf5C_rL}rME|ns3Rvg2(
z0@}i|STO}?i{hBlRq&BS*D4Hn!gQ?}$g+Zmn$&~QTOhTnwqr%iq9LdbZPqR9kp#tl
z+&3R&`+EJW&aLOPsb<of`qKf)JR?xxfM~xYJl(HP$gqVn<u)8H=!)JIY9>S6l9^nc
zAik|uUzaE?n<O2_9G7(6fN_c+*>v`tDy*U~M>pw9Dv1SKf#9>^LAYjHCH*z~QtId4
zkyE9KBkL6xnkCLe#f9f3317q$nb|x)#bz<JqUHhK`KbueC=I0MAd*LG=c~6Ja5+tt
zK9MIW&aN5(D1TGx{80DCi^tIcl`TLu$hQCX)(i><uT~A=x)cOvd3=jhK4X?`lDw!p
z&@p4F`nou;>$gvqzc8$!OfmL5$g&t6N;E`?FFdF6i;0g)yXg;;)55J4TvMGa5wkH%
zK@i)dK$83Tec&4~Pigl&re3?cMH*!09(+A70JuE&X+j&ROfVFi{s=Z=0@RB-Wd`P;
zVIb+70)ILLGCQEZO<+<#h$Cy*2XQe16$RiZRdyn4&=0Pe0x3lpcTyk-=?ck#8;epG
zxJxYn#?sC1D}fGQkAL*5_%&q4!}_E2L!bVJA0H&sH-ca-<m(r0*``_FxH{>dF+HR1
zAas*1?wcLO_1FJc4TH*r6p$$SK>-mU-J1OZ#41jH16Qcj+c3W|G0=rs<~;V*bNZFk
z_#Jwrln~PfEGIDSRzB%6dnP8vV4jb$C--s%exMU6e6K6jvbxx?JKHM2u^{v+J#zxg
zCh}9D{Tz+OM8AoobxShg`u-%y+_8vQac3;d%B@#p`u5i0?g<!nD~bnuhz=%S^{gHr
zM~MrNOwc*Dy;pT?4#7L1@GYSpg7bd@R`5BPLQ;dg7zQlyfm*tgIO70l;Mk-EvLvbH
z8@2KioY#VnqRv(mb+xa2#Eqof;-B==@c|fC922r_K;}hL*>CoRk78vNO+Pfwi?iT%
zfxJFrRpMfpilNfc@dg}OIGON*x^2Ubzw^O*1*6f)8@t1JU!9;|ETQ1W{XJlBPsdw&
z(-#uCs(d@VMv&teU~v@#Uqxjr*8nhR>Kv4!noikl0VG2<Mj+li>)Uwq9%;T&UiVY<
zk0Uhhwx67-Z3qo;kn8_2kz6Nnp<(s@mkVx$sY{~AFTn_hP1V2Ht8z<(-<Uz;;m=+b
zr06-~f73rAzQ-H}R#EN<h>WvjS&{l1Ko<E&^?;A~6yPg01><=vVU8>%fO;=X+@)tP
zfx%h|a|p8nF4s6vHEJ>EDP9!r@5LZrY(}s(F*_v76bA-%g+xt|$x&Yn0=VWpPF=?H
zR=TVSC59edq>@`?Bgyi*^zBmy?m{UcHtv>gf~t_$VZo&{Nf7&?GCS#XhoztOQ05za
z7c`8rI-M+`JCxD&$pd^sRlo4cb$EMV2Dvryxk8@@_#2ZYK`32}7Hrx_1p{4R32icI
zg}fzAeP=cNTcT!#nv@SFOe`?OIRQVR_iHZ+NwIO1pci_9Hv9{C)6kwdc+*>7%=3@B
zQvB<g+=s<%JbP8X2vJWOtzlN)bz|XKYZ$Xb#^YcD&wJMbuQbI&#OQ<$<|jfktG79h
zP%rn|i`5XwnVnv41u#tkZ~YuSi(jRbU49KAw@AG;d&0p^#q3H{8kSWtAsSTFf%#_K
z&B=4j@@irNE&9)vPV8NuB{H^C`lT2pdK5kpTrWLK$NWP$$x}uIkvvkIxl5Nf$yX%u
zv{}F(v&{a%9CNG(c)pTga`S?g|C<FsUurkt5oYf<4AN$v9H971;>X?j9FT)Ix}?*h
znLs=GeeC#54!O!Ch-3y)vcmgX&un=CUks(Q9HxtPVLLMd>CYX+s<&6YIL`HglAq|x
zYhRAG@dk;7Ii>AqKN`acICXjxNNfk~g$U^sMWdsp|5r6Od!{;5P!o)VJ_AXAB<2uF
zH<b4Dht04WKPjVKKGXk-mvsF4DLzRJe#$3}q|Bg2G*cRFUH}VafvU;l4hK9h9;TMN
zD<^B1ARJ&+c?XCpuZGptaz}gD`npVMVu!@*uX=K{TCLu~(a8v;a}lIoSY6?~`TH%X
zf1cBbSHCp8!}67k8IMme@AjkRjXr7$ch!PDIk#Jv48+XRM4Go}JhUv;n)DqWGCVd`
zMmBY5r$sTu7yA4&xh8szLLr}|uFL0<j*|Cw@2GO1lZ9`(z^bRCc>;vS!roI~f284z
zOuz`3X0?}VZ-1hq2@s-(Mv&#t>GO+q`ko-KC-v@Jz<*fGU1;P2o*l>7`j+aCY@xZF
zQ-Jg%K>+QcL8qW|HmJjPnSIdJe+zq8bNtB`JeXXr;yHvjS2FI&x+%B1VG$qIUZc?4
zW1iUA{LuGmy2P$2JC|J(c>jENr9YGBTZh#^GQy$|{&c$6C-=JVI?HBFq1;gG5=uLC
zi>f3JHEwI>!$2-P@DRak5mX>%)7X&(X%yCS+Y~3Tl@3fV&q#TVO*qfufvED~!)jL!
zir?SyYEsXYoWuu4hum}DJeT7y*B7Jnrt-md&!g4dLx7HWe;l?QMk%1hGMLEFy$_;a
zm?P_~@A!!E#4S%>>;ukK(e^SvlB<issht`$_M*r)qt4gMb?KBP=Z>K6DrvtQJRD8p
zJh@O<$E+T|OIV_;_ehBr7x4UtVwfiyRI6?>W1c+$%(Kg^wEHi|Lg~odiQ?7(Vr?S6
z_+#Q0!h>r1SJHUWUJ7CcaNsD3K9XA#kn%r{Ipi^%sQ=G4)TA~u&~kOZV<N<zgh$~B
z)q^waMc<;(Lt<Mcw}EJ)IrH@UJ&hkdRzG9!yLBFZXNZH<2Y@{!`=bP!Zij8u5{K2h
zafn3)z$&^4KDJx>=6TSI`O$AHY@}LKU4P&BUN;zYVxTD$lqW^nX>eI{glhH>wt;si
z?3pD#f%&0N_5Nf%?vVtXe81lZ47WdJ$EyQ=(LIv5@h|cbNl>}!Rmr@CL2?^#Ad5?F
zimG`#AN1~Kg=U#)-c#@zo37=06*V0{sRGrLn?Zhu+VAWe@doacF;Q&N7=3TZjmU3R
zELW8`0|<ziE9SmLm+aP*%6+rJs%D2X=XTfjKmUnLNZ3d=5TpX>1Y#N;m}saTysBkm
z#<%?9JAz^>#ZVbNki|Jjt(CnDegGHn|NV31%67UQo_+^|Q5qJZ>lXrO6m#4!%_Zqe
z>iKu)2srpC?zQingNIkS(#a!JL!Gq~&wRx}lXW10MSTSPW^a7r<omfTVB&#(gmRbX
zB{uX|k@ze>@Go6c5hmQC81|h+FLq}^4Nryby$*MaaS>KIrBMJ@e7N~8o24}{aK)Wl
zWU@}O;y$q`9gd8z4Lyz}yEp;%!9P3GBkz})U;u+?8=_AYTu?N+ykqTUU?ZJYZmh_L
zx#eC-Wy_pzO|($L9=R<p21Zhe*n|`hnk4DsIdoGXdvYN5p6(;>PTu46rS?ga@0)l`
zDr}_H%R%X5E~`rQB|b~6enM|1aJ|YP$+k^#06Xp?DyzmdJhP?VlmjCHQS7N_LlhsH
zd}Psv1DfXVErovo1?G)j;OxZ6{*!t0Gx7L~ODx_75-_XonyY~Ttb$P(V1D)60Q0|T
zMLy@1pB@4OB!w2AMj?7<<;0(@dNmYxsqu~J%6tBS|B#K20r4Goy=5#F?QqC!5dE<n
z{Xa@V-_UaX_Gn*o%MsI8Z?oXy*OgOcFOfMcQGP2$J@D3d?Iy=UQW4_}3}wK9z2~^!
z$w%T~tI6)MYyBWuTCu@(C6UC5tNf+o1C`a-;q8%Rl{slPW{_tfcmUPn>PP0tIqJ_p
z8fZ0f_o+6}mVEHGK&6B$B|t}LTw%WP1eCaG765pfZc_rHvYf@Aamd#YatwS06MOff
z5~Zhs{tX=d*;CpL(qne(g8PPE4OTZcu&E%eYdq-O&ddk07lKm4<_L-bSJR`X+Z1+<
ze*BZW-CbV+WN9I^(;k`vn`bY05%plxIULtc4AQl}7(WqZnjm^B0A~4D)gG?Cox~yW
zZ!=npKyo=NyaWx{%5B9|@R4u>xQDAH=~^?8<#WC{pxc4m=*hF^q+m&<ux%4+hG}f8
z@iMOc@outGSY6r0!e<Yx02hG;4h3A+X-3*Cv5q9f2xI2@RP70qDik&5x%ps(sL0p{
zHc4;nl2%#7goz~Sl!0x1bGKP9WYkx5UdniI-`)Rn#X`hY+2dz=JmDagZo+XGX(%*n
zq@o2SBG!dl8EyVb99C8`)bbgSI_?~q&xOnBp(H!Dq&Q1S`vE|C?_aGIRgZu`a0fb!
z21kE;t4M;v>+;;91|GRS^!F*ieT!5+W0pIBsb+!Q=EgXS@%Ip1Y2TevHzpOJZe{6r
zkj3oH6G6h>X;ro1cBH8MxI$*pm8b^!ad7U%%-NV_(^cSklH@-AE$~e{@#DG$!#}6P
zMDpQv!9Sk^md2mTV$UYt$+&wEXnPbq@Q2=l%&ba=ADDxjdWS24jD+_o^D<y%ULUKU
z7-8{+2CnJ6E~hcjG&S?Y7lO5M2}mg#&Bi-`gn;Na#Bboth&Ol*;G#ri#XW@TBJ>>U
zqPIi3WQCL~lpVEmM1}*hk;(mx=^t#85EYHDkmJk$XpjVzkt^>tMsWKr6PxG=5UW`D
z4P0RjAA<RfwvGV11o^Y(;4Vhn!3M6vo;f0~`}51ve62gc9v1_$3SKpIjpWsh($)@2
z&(qWLo)N)pQj%F5#uKpF1-*i`dmyUT**uYUc&H`vZ~$#(f^u;!>i9*)7m~1BwA<xd
zr`##!_~;nq^k$Js3=Q-ruV8?!_isU=fWkj7=-#XZfPyXD73E6};s{w{WULQAQyK_J
zc1AdQH-g%wNV!=K6Uc-gg-*HVYc1?mGTyuv<8a6^4QNOl%o@~N!BIxH43w*uP$z4`
z^Omj}ek}|0`n;yhG{E_BW85Mz0BtYAB8CmuInTidxhdC+71OhFX9+%LZNc_`)XrBL
zBhh{7fq33SFTMW%ZQTAJ-o`Bs`O*~Mnn3IgX_z4?--uBFZ?dG#!JGc2GqG#FqF0k_
zE~Qxe0V4$LzQzK3O8H`(ZYAOwc*Z1Qc7B#5fhSst9wFQ^J#Nmy?BSIYRI3%B6r%x8
zz63UR`x?x(49bG?WasKx02j}XkGvZQ|LRztBrX$&TCTlFvn!qUw~{Ev$oO)9sWfW~
zA!kGVQt|a6>lBT0;!*DE)R8u8xZRigZER$sdnIsw_DxRF?#pEtGsKfQeV#l}v7Jhr
zYmUVm-_w2IU8z&Eqq&^?IhZ{8kvtf3mxT4S0YOy-S9SF#LFb0RR-mJqOtZ7Gu^+4e
z{m?dh;6!`Mv;Kssg2EE0O?i_%4NqZ8LihTO_Q_%KbJT92I}j=r@6u!9ew1u+MPhPl
zB`bDbyJ@?SRmSBTejhtNlSA$}%in~^v2AOIVO&zm&0L8L<?0D=2X+-~XEFpP>1c8M
zz*U_KuN!e9AWCsq`sL`|-C)VueqsbH9HERis9{g@%)&A84`IsbI(FxafCtg%dT~At
zuO~amAi-2V%P9#mO`XKHaM3QaT39FFse;>dpsxriq?Q&?XrVA5(YynUl$g9yDPP1t
zyOK<HNUCuIFbF)mx{o}JOZs-lw)L=G+Cwb;fn(E}gNE*(ERb40ONWA&R9ALqBH|w8
zB~6ag#WEn^&G|Yp8f+<nPdg3J3tmGyE}<CabbWgZ2j<$joeo!PCYBxpo3t@_-K)_8
z&durqdSvU&jzU-br?L7r!rt;T!;n-|?L}FUP5saWAW9Jp3q-0~*j!5WuIv#OgP17S
zLL;8_YLIhxwn-xl7fXYc2)pWe*rLcdm`sA|OAM9_faxG6x9i?V%VmzGmS6+KeCH*&
z$0{0JO<CbG2NjZLQ?YgiDkXRgSjX{C&Kt21)Qax^I4+?dy-rDhe*6h@Nv=%j_@8o3
zSL#=7e2MiQ&W6e5j-}R&?$z(sSF2RiyfkV;4&LH!+1zLKb9Iq+dK?V8>7bU(u>%$~
zSA@o@^foEx{0Yg351FIn?XO*Z7a{Atr*+HRJGB9kHuTzozFS9~*ai78$tP@XJqjv3
zR%u%HWPN^er)0agVM$m>uGXVDEEOLS6YO`68RtvScq!NdI@0;06B37HznRV)?RpB_
zHrl<@_Y{r|@fm#Gb%^;hX%HRaR7JLF#5JHoN$4%nSsw?$vcc6}H{^Bs&i#fcws)ee
zit+;tG1_;++IVM~{M6bDze{WA!L?yPa%s5a8@U8_CiDBHd%y4EhJQ>HS9ApsYf5YW
z(>+7)m<@h2N#jlpuZCxCVG}M4OA@-`Amv{m-2NVC>jRBFF`n9tnTRd2Lh($BR^RGq
zr{0dbC7#q{6@yq;cocRVZ#2VR@PpfmW{&R#`Sx??=~w5M{X6N=X|3b`Y4U$`enkYp
z9=i?7j*r>bTS?leHXl+<m<GQBFV5~q;IWKXTQ-1R%&JqzAAu`pH8U!1>oSF4xJwdn
z-F)4=?f78(d)Nj}HKA&L6`#QT(9P=Wy%%hh1crRS3VjRGJN`J36GqsyAnUt5*CavZ
zso+y{zBM|XO2&XJuIN%$<$ih4yPLK3TWi^!i5mY#Qbwa%k&TP}l$yC1iO2r1`<P*5
zJ8wPz$>UY~&o49h_p(NY#kJdJ0Xh4vR~~<|>EQkc2Ls;dzJ2qn^E<mIA8)OiZ*H}+
zt<cU}!5RDOC4+$dfs2#RT@fW;w4WXh-`mhPv5OJ;Xtlz~t1}!U!^T#Q8!vKTV4y2R
zS7h$BrFVZriB|a=@hKa8Yg6F@>nF&v+o?S}bXG|T4fFywW={3gp^O4Ax&BmS^;Y&|
zkrlmruVfqUUw)i;j?QVSy|Qbg)u~He?`4WEE<91}p|I@6uGWN|4wjL(YOVzIE!(*T
zWT@uD2kE_+W90^8*A3bRrQg$BzozVjL(_YWpJNABRV!J6XIH<a67b=d@Srj6^*>$6
zKzBc9U%HkQ9=|c_*^alCm0R|dFE(5CV%_g+yFCvsDRUqfrQCF0ec;2+XjH-D3a<SU
z_~6qWzTLkx+(7((URWU=RI&`blc3`PAH3E7;XgT;x(vkG{H){plT7{}s*&4o*;aSK
z?XLIP{xo{RX?*z$IhQk2^ed-Ddy4$W*)MyI8wHx?c2zW!TJAaHVty3r=d5IYOjiFH
z;!?yxH&ge;6R%=4mjt%CYumqZtByG)<)$e!M(C^MiM4;!<q&uH{9gSBF7n!#-001a
zF-`6g$!_lxR*#1de8_(2aICF3%$j$nw%FZ--6#313_sHJfA;bA9iqv5-5zY1ektV(
z{?!v>?Iy>suPF}8yl>spq?<czek?3iKR_ohH_69pVr;E4c%}jlzkkX0;CWaLlMSuP
zkxY<P;+T5z!*i?gPnAauPi_i%g$)k55p_-7dLKS*x$ZoR(AvddPq~~^xHR;#ZjnV8
ze5Wo_lwrnhd|Nd66!W!MH}YwHjINYHR2wQb+#u#OlJ6;2CpA_at9Y{r(Zi?lMPZ0K
z4olV7_G+oA&&ZPPe(>2>eo&6=D^n(K=QdfBto&VSdtRU|3>~N(x(%f-${jNnpN28?
z9y^BJPMC-jt6cK#M)=*3%5!sOmsW2dx$;8mQQoC1Khf{a^G;iyosu}Qj;W2ieb8k6
zvlCTv*a2N5y#}TA2Fc6rZ*X*}nmnrbdRM%{vO6w;FE?BaaXrvGdTT!Zc7_~t-y?^~
z;N6<G4$q`a<P1r-4`%6|4NfoB>{j+EDSPR4a6_5;li;VbPI;uNxAJIz!d@R{abbJ-
zGn<%T1<#1$n~Ap*&mVlU{a)Rsz5w#XI_0>*W%sr;-aZP?tv%p6unuINtli>@&^{Uw
zxySk2$G!jYx&JKb^fp3w1*@eP|K#0yKGmCiJ|je5Crg7<f93^6DCk3~W-ti+b5Yg!
zj&VO*B)>U6YQ)&HomsUbNSo~(-+sU9ZT7BT857H;*j6Z)4>pQxXeXy!-_+E5EySPe
z7ph;;ZZisQ?aFLE)BUXL%y?goZoxdBftimZ_pHvDG3G;Lrowxly%VC<dlfDzmpYEM
z4!s&XJ?6vit3w5^h|&7d{~Y-3%e-UzeVP>UXxQAI05w0^NWygD0bFMfj1cg%CLyl+
zQH8^o(1O}!sFe#8JkE_(x#y*MjA!-9AjQqqervBb#l8`+%l+H0wmQmum~VToUGC!^
z<#h_DRf9ExdTpDVVv1L8=hxUxnd}azl|flKU+PV9SaqXEe#h?=h1d4!4zEQkw@;fA
z9{g;5H}d)+Y<cx-%Yaxmcr#~l_KHW6FJE$YX#VY|(yhi56v6g2MaItsS)99tsTa(Q
zM7>ujl}W2rA_5|=A7b3a?(q4O@@DAk6&l)iP0IV+SUZc5Ir&~T?^I3NiBXO}R`8_h
zQqJ%x_50*)wD;pM9-)v-6f(QS?PFqT#hf1NLL;*VByIA4wfCN3O=WH0_c)ALK+!>(
zl!&N+Gy!P>iGqlQDvETJP^A-ki`Y<FRFDplBB&rp@1Q~mJrwCxI)ol-0?E5J&NXvg
zbI*I<?|U52^W}N;izGXH?Y7oge(PNS9dxHW*>fcvtAI=NlOA+)88>3QBTGkYcmQ4+
zBgE#JbL>@FZP+ry#AQFV_$>A&=2tpMFu>~-kA2`+d%u=GBCdH=<9SrjA@GCyO~(`_
zFUbY+Bwr}O+`NB9xQpx6QA9ph!*g;H%1i<5dlY+es!Lh98&-W=fPWS_>WaIY_+miH
zJ;W4zM9ri`dvlL5GByKy#@IZ4yr$nW&OSIJn_6ICGabfk!05F-1~awG!Li~X{Q|Sa
ziVcU&j)w<D>b5uW2*EJfQA`k|US`wdH_DhPNe8ClHj@MK$Fll-`{p8o#TBv)so;Sq
zD}E~^^vzY$0%vHzHmw(QsSfZdWc3dtx|_$YbYhWA1<qZT{8=2;V^@}|&TfJ8irAUc
zv$`t)b#Q?*-2yG3D<_V6R8a;qM?ETu#yDG`95thm@8mK}87L~A9t{pE8=QB*osKe+
zAh~_vaH8+yXdTJ~(w5Ie**v^c9r|uJVXL2<*=Rkxf^C1^XaLz@1r+S9Q&^A11gywe
z&8vyr{Wg$1hZWz#^k#8Eomt#hzLtYM7T6_@*fyz<4RSYD$G>3moy|m2_Uv44qv*uo
zDTJto7gZ3q%+F=&JI)6ags;8VWHBYTN2PgS2PO-PM0mE}?0auY(pSD-VQ%&2%f+TR
z7OzWZrr2NDD<pr+5JuL^)mm_3tchnE^<2W7du#dnO6!0zHDhufRB(T<Pp$)BdloJ~
zE6mCbIX%T5^&h*LHzoyI%jaOMTf{P-xVp5+=IgW9;OitJ<B^Pi-$yOSW$QLK)gmp4
zh&Q#&HOmXdXT_&rL8HgJzMg<ziOd=2LVmh<g>KKkv;aa?^Iyg{=3I3sA|wQ;oEu@_
z55DV#LzPq4vayQcm&?Y+V{Nlyb}lbxn5=ymJ>=-Ayu6G-`-Z*V*%+?!?1&q>HIU`&
zG<T(Zny3_BpZdmh<2zf6c36R}Y}fKLXYDic;KQ;-NXR2^n{YX&FbVuc?gqOl@vbw@
zva0&Q8NE_mM9<5Ji25*81k#s-Z|sdppZVcL#JEV@amUL_Yzh+VFHlX-S6;|v>P0hK
zG+JHdoV*<4Vzl}0TFM=BlOfDaO;mo;fMswF^HY;!8%b04S!cLuxLEB4cG2ELsN{IA
zcPVs}mrIbvaxxzSKf$ytt-d8Sx$_|He-A`OPUJj8g(#k!62K%xjUhE6dg1HO<g{zc
zzX6Hx4dXcVQRsv(K_{%;_b%|Jsh(uRd*{+oasCG5vA5)V8WB2UH6tY$r>dkTajig!
zey>ygh5ZhVI<SO*Ovs~WdJMbRDTu~fd&IXsGT#91Zl=~jAAV$AW<seU)WJ1nbg7yN
zEBelN7C}wf^xb-+?>+CbV;s~dR#sjNo&X|2Yy}%E>TUYk;bvmwX49=OSHYXqq+{VO
zm)`0|kzB5zl+-4EuglVr>O0$0;C$nRSj(+1H-uD0RSruOcu|9r*CuHpw94-MxSO=^
zS)<ydBD>>o6?~4@H(!ysr<!ffc05w1jcMPd2A#{$sR~!Vp-ZLc!<f6NicuNtoGTl&
zR~MNMX`_uDbw+y>q(62x<=Si><Hk>FlM4Q9sMmda0hZNt!+)D;?d5IGZtHvJ4NbYH
zCbWaCde8d?g&|4rwhW{XInJcvlj!eyg&`(X+ULkqr-$Xu3tuaDAsD<re$MfVaE~>V
zK7>sk>ruF08L3{KJ}oPaiOsf@uT;P;Q)3qj_hg|LQCN!gjPVqsIUblY^P<K`$43OY
zsrb=%DPge=(S=ScTLg5a?B>WLD^CBOdx0#DKK=bc2_8dUA5x~?#=Ev(!!ej{ex2El
zMurKtD3F>s{I$OViHeNc!7~w2x(zc=Jd)H{CJ^Oc9qzF=@=>f&Hhdv0am%bvNlSI`
zKBK<<NStN)lkW-&xftmv>92Y@9FF&7nJ{xH&2K}`>DtvAvbR+K*o_fCS+>{ueVFKz
zttV`X0(so3g`{WFf_!O$KK`LJhOo^9?~oVe4Spf0h+fPF?Fb$l7>QkJ87gyn__=3b
z$^vE*OrmmT(+YTwyU;pBBNU&aB1%N^w>^t4zSHc1sn)LiXbpo$I4Db6!FSM?M#7l0
z)EAKPDIuP(0%fUMTrB~Lm%X<dUZdd}4c^u*Qp@k|=><lol}A(Y@)~LVEwI&P`bfUT
z8^@K|AF%b?<sH&6cBjv^0B${Z(5*4=`&5K!r`&8GmZ_`qgK|}J9tbzoWju)=i$q1M
zwpYvuiW_#vE+TAwM*{Y-Mj5;7J9oq_82SRmJKH}TMbgy4p7@c|QI`#*pU+<?a?vZy
z+DgT~uFkEOohNZsG7rrq-6M8$<htldD;kt`HQeiMmz?3z4f8UDVrOZ})$H>2mTtWi
z5yQ|~etkJ%!>Y`JBCvbs(VK65y{BVy&)dfE7PuU81WrC*&)8M>aAVi$s7WM<5vU=|
zq3YdobWNb6oMVQ&TmAxG_8}7As(f$e`=`}XqVeJf`0!&pJPd8*T+1OQBJDV2q{T(k
zIF&OhW_4oQa<3R#D>7bR`nh@x(kQ}$vGGifASxs{H{@RLDQSxqXOw64@3f~fiF>lJ
z3R;mzh2v_{n#4sLB%WG?_e99ms_55#vI}VjcH1xq^GP(K<P47pC$cnzoy~%yG54BX
z1J_uxFjDVF^RG}|vcl_H*DHP(A~K2KPH5ClOX7<K8lxlC&2%QAU8-}h2{)tUSZ(?j
zE?C{QgZ!o4)9w@g{n@jvAAMcpcoa{qe_`@>a{m5=?L%g~OM@Uz61&y(ejW<49XzW!
zKgiy>Ku4^7P~I9+awO=>35?(-yB(wDZ78V_^x0p+FK2<q-9?@&KJ;Tl3=~4uu&rq`
z7Kuk&j3O+2ke|e(lZb@(fuG!8>1+mlslDsOd2>m;hrKCxlJFMu0ucqghdAUt%)FnF
z^8IcHUQgerZ0x&?!-+UJ2ajf$?tEyd#VV}l;NpfhYt7OV+=czg9T~Smgc1VW%-?_|
zQL`!roPh=k>Ud{iPUOuiB8h2Z(UGCzf+?(;9nMW4!sut!C}urgU?9a!(UY!C4S{|u
z4({t;zqLn#o+D-LcmDED9R}={gRwxWHN3-hxN>#mil4FgHfnPrtiCYb=b3HNOQKCj
zj&XI~$Hv|5{eNz^>17X>m~2-Sl#=V>V23gmj-Yy}u#WZH+{u%fc`awd;N)q+8;hQJ
z`JjD@)0Ym_0-;MsmQ_^E9aBFy=S3fwiD<6Tw@;KB4m~USd>J8oUp+P*kRI6QAm?<|
zY{E&=oA&HNY5DbDhITfD?@G)*SG$C!aw|nqyI)56w&Q~Y?OjBXZZU@9Z$l?D)}<Kq
z`CJ6-UBY^@a#?ix#>;IrR`bk2LV=7wCI=<BDB4Qa>1~5%tu?+o&>>JE9k&aiA9yrB
z_uoCH5qUq+t(QaZ7+s}Pu0YxyStj6?sy-;cMbjw5VuvqGznLi>(~xJ$_S|X=R#g~1
z+JWVxj`Jh*xxBu;#C22iyid%jBFJr{JR8OMgOZbP<>+h}N8(Io1jW3^<CkJJYTG8o
z{eS11BECQb%2enw*-+r_RrNa}Q#sE0_N`{u;%n->M7L00c_%4XS45^tmc%G$-ODU0
zTzELGo^6+`ztHs&c6{Y1hY3xwf5duY^wgCWR1@|FtlsZC%IfY>#?7zwC-qF0xi(QX
zebx6$wl&e@4jgYFmxSMv&`e>E87ik!*N|;h^7FQLUq_-YNJVB8*4jXjc%6Q_`JnNO
zHU`A|jPuU42JEYXawRtO_}JD|OyhSY64lC7XQ8aPVv2&UB;ctO5N^FAO?<82Hhq+}
zJf=xXH+PFXYu{Z}b$cc$Ypp*%j6nM^4*I-s<=`Y^2o6}75Zv5xoZ`j+Di}c>LnP|j
ziNqfn;e%~mbsliWvw)FkWyxqfb)h?kdTd!TaXDo4CB}DLZSkSx2;(hAJx5(~Ws-7O
zS<UR|BRTDb-!p=lK)`ZCct!uUyhdr4CReRU{fU0k%7rPyM01b=t6Q%X$)~d2^?4`G
z>NaVrPq`dXI@c%)kJk({Hgj8k3DgQt>$1UG_*KJRS8u5~ltF>rkD=K|I?uidaxfhu
zXB-PIOnyOcV9rt|jykJBH^OZ7nIl^KO@+QqrS{t!i3T0<&0l|eLfy1{JSiOQZaIx{
zykgyAY4cFY)c3HXo;edQhaWM@l%5D<DsnJM#0-BsZ$+Zs?0^l2w8q!lkx*Z!RxF>8
ztt%_j(&XZ@@7yPk#T|{6jIsA0+WkrMRCp|P^Q_WgJnsi%p5Uh+uwL^p4ByO{?>5+>
z1No*9QOS`cSFKhmyj-&S(j?S2A;1}mshqdgrhiZp%Zuo|c19sF|Jbg{kWZ^0q6|Ki
ztT`HPOUu`q6hN?IH1757F|4=H1{FNhXI683u_Pf6YDrjYkFIGP_t@`|Cdx20O5Zmn
zusTc|EsRC(<|T5h>Tz{6XL?r1c?d^4X-@3|ptE$FmZx0<c-m1}((+^0`JZ?VuM(X5
z=Rz9A(TCY$SE7VpI0_n4&YCmK$hpTQIG;D@c_ETIRc!BGYk!LhGNR%GYucPu_UoaO
zXqJ_!T&IUqlH&;$8F!QlU%=0tL3-w!J+b5UQ&X#5vc?_kRU$`6hkO^0LLPO@#<YAs
z30=BM?tDr79`Kx|KVO^%fJpCP?|r|4vE#Pe+fO}mI^h}Nh(HGQg{0f#=t{}zV6i_l
zpqECdYk8AG<{4`lq>bUs^4Lx{>5m2en`F{#(s!b2nr)(Xup)6w8PnMO78C2!W!3W8
zj2#>hw=}2!<A6=SRO;tE9KoGz80P~cS~x!c-r<M?{8R00Q-VD&upzut)BF?1gtsOE
zr7G3A*rh@GMwX)|C&eGvA-ilB9^KJu;oVKg4Dct%xhA+a)9uHj20#UW=yagOR(%PJ
z(B5EY-K4;qC11%eWg>Kt*G-%5)lUbyIDkZv&(^fKnS(tTGR49A7$WpMK3`<4lM(XN
zek$=coRqKrjCAE#12Gm6XgF@cG~>s+VJUyzyRL+XH#HR6a_n}Ca*V5AyiBszLO3Ey
zM(CR=h~@oev)2Pdz*`|w1LKKDmHK2IFDI#KPbfrO>&AQW9h`{wwhkc+o>@tn`jqcH
z^i0VMILvm{u0+>3$#bm0j!LlR@pJ<V6CV)3{%L5nfuSf%FwQwvQpi%FJ?NyE=U7TD
zAHK2bAOpWTPn=|MiDxRy=|%8146Uod0Y*cab;y^CwEV#Gr`~Vskto&`Ud5BBd0u_h
zyt5hPW9HYn%%yOIT7xdj+DYLdf>NJ!Q}hj3FH1=$2YVoNCj#$aW_RzzUFc36V;6HZ
zPky03J7H9tGa2bvcV^ASVIo|*QN}#nTP{j2Q;rOtjo{+3H+`SzGjy9Rx1QSF6q_<g
ze9^LUs5jN5MY-gB#RO;WHN_UxH1B8tMx}WE752YruVk|9x}tffhV)1O^#f1?y$H*E
zUuEg{QKabdkeuDWP+<u-V9V>|zqn&!<L>=B%ve*dKXXLa?SQe`(VpD*UoHrI<cj>L
ze6|zUa5NWS;d;eX)<X?ip}*zW9Z8O|1|eO=tA}|Y_`CE>!wfk2C%AQO{@<_j2O}H1
zq_C@bAco{fCk}uZQegP(>WkYc7Q}|4fB(94GTc9cFHl}!!ksekryEqn02?ez$IbD=
zQFrOrgA+YV#=hsI$tfZ1Hx@=8Ew{ofYV=&#U?G9jQy^4&uK2hg<A!UP#<;N2^k6k0
z0X&MV2Yc}B?$PDz;@uJ4kIB6)*;l&xcpI|lKZ*MPEkym#o^9=@YxcEOg`;ALC!&y{
zK-z~zd|}wFewR#X^;7Nh@bg;-3q&c1nrqj0<W`S#*84%!PjD>{yJ1T5Y%Z?}$!Jxs
zq?IkAd-;rMR2NUnsLal(me0wMNmdb`_xfRy>wrl<V63N0e&GfeKdBvg6@-1YcgQbZ
zYp*@K8ex0+^tHHn$;$S?%#aVNmy?b7&R**qkJpkbzxRbF*}nFI))~%92yYwy55)Y>
zCPAK(_dEH6#xU90xybT;Z+fcg&?ErbCxtj(3fxHjCv>h!Hu8QJrlPm6!H5Y1B4Bl+
ze@5&7h1N^(y2xTXUl)%v3P%E4r!PF5*t}~v%<0GDOb(t9b2o35ru1yOHGtci)}`D-
zr@WY*H1@sl)c|W%bK-F^7A)`uJSPtqWBO)o^U_yhf=uy%{T^B9NZRObU3N0O<`6t=
zsTmp-v<UnG{8juWFbcmxvGM*$#lT&opvVqbbkdRcY}dH(D|vQv1mwX;KkIymBU=aa
z<;a4BRyyR83-J90O!`s=()#FwtbQVh#;4*}HNGTg1AKk1R%;6w(+^Swm{WITi)0HS
zcKt74Ev!Qw@U9!}$rTZzxs>%y>V`(D*Q$R>cB~VNf`2tIa;Im7VoTv58^YLb0HO-z
z65PsCL*^q-m>Z0d_|1EXFSwY~h0a7Uuf5L!Y=4~R>jh<(`#XZ0S{mQdYi{7o5?{n0
zm)r|$i1{%|BPhiueG4%Q=y8ab(n_nGxCsEnKj<-Vhk{b+W)OD{jd{ofxbyY$Tf&nE
z>PuF&@6U&M&_9KM|I@o|NZ@=QNS-^n%YlD=hL2FUA=%{Vh}JF@17BU-%DAT054X*u
zP+@5_aPfWJ1~6v~*oLMV$zV8dqr()x)#}G36k9T4IB`EB>6`0$$b^6-sCy?xugbPO
z`1-)@<@?WU$+IplFqi5`L9{5_egl}>Ab(>Gy8AeJGgKRW*sn(M{FNO+N;QdC75h?|
zsm*^9e!UlHBkyD{qQ~E5l#42UdktG5QEeBijL7daVa3^!t*=36nrVoq$k82Td@L%l
zHQtQ8{7&~71YIBe2Xy_D)kZe%ReFcXkJZCVt#GOSowxplc27>Vs`e;@OEkV82?uPJ
zFY%w-tL5u;0e3|M3Pu@hc@yF#58JcSFN^Pj-$#GnM<>(B|8^ic#%7wl92Jw<V>gx~
z1R^bDeaX{;bI&j@ErjLp>~>>E?N3d)2*2u||0`2k=dx|QIDImYf!Fhua><QMQFt`-
zglJCb=3R}5vKlk2k?4_km`|1wC6PI*fUov=$-x0wokwB-Wo?L4hOH^Gf+<!^vpgOj
zFOcUqcNCSZaBB#JvG-r&lapp0gCE7G3{1*Ia6{j37s`U&RvX_3I+Y>KGdU*Mhk4I|
z<MEo|Yd*&d;5XCEz_jYOQm#ct#xI~#Gk!_NJJJ8jSmT>3F?sC;K!o;b79C517s8rT
zCT;(~h5~-5l8e*iv?$`Fc9$j!U`icVt~k}^?th1nCLI&4s!2S@ll?=P+aa78|6kg9
zx0NdbCQs)72R!LAm&8kq8gR1Xd7Ci#7lHh<xr|<y8NQD^04gCR0p~zU&MkMx?kTSu
zXFi5u6@O;a5Epc93sw~?O)1L=nRtn`<}-_3xj#Xe_%(oHt>YB~(#tI}A`l*=Uo(Nc
zL~^WzXL-^>H#VZc&_plgJR8>n(N-mUV@m!Qm$<r@0`(3rS`=t}@hA=HJ1n3O)Flo_
zZTzrNn96_{`~qFDIW+%PUpQqTPydx|zo*SPRuKTeq`$4}$qDz5u1vWE99iHC76Jj1
zB~C2|!e`eHGuVPaN>^<#QW+-YW8Fh`)6L|HRL4YS6#zK5s#Y5K8aWgRnTlq3yHIvM
zNvg}bQ8$Hr4I*n|w)6I2Z@0&{1hG7&X~LmVbT>0XaE4rlg*}`0vO<9tw!xgX5am;-
z^giDBa4bF`2})X$IMzH{emVD<EoR;3AlL6IfTA%XA&M5}tQ{%-Cpo)P7fE<B*_oR<
z!$nlme<<=#MD6!DiK$|_)uXWBo3pmnE<2Bv^~wLp=-A_cJt#DT-7^+IsVf*r*Sc~*
zbT0DW=p1MZUwkUNftx>vtB$OEL;P8ADEFi4Vu2f`{|@^iCrh}3^X)wnB+p4h5x;ab
z5Bej==inyklQ%US0RLii&^ZXF=X}&Vy<K8-q7EMPU79|xZ9Drqm5Y3J{fLXE%k`?7
z|5a$MkCb^Dvb##D*8Ilmc<P^cOs%mtIzl@*eMUCT0vRGb=@QTiCK_#bj`OE>0Oqw`
zWEXOcqrfvZ3<++=ALI-*of3h3shg2DMi%*Zi1q`xK`j=FgK={_9}Y&>xGHG82mhat
z?Edn3ZoIi+i(*YdYu#G@Oi$ML-mV{z%^reiyW8`jLqn5&v0Ow(M#!Zx;!^R>^vCG)
zgVi3^Gbh9gCB3@6%c#c9gC0S6%jiH<X0BPTNEE|kEtr0)Q|R$4;7iiAPJMAO$FGC3
z@q|EG2`ix;4E`SFTobvX`=JH98<8kI+1-sSGF_Iq2<h1DzHVXj(7HGfo|u(2>>egd
zu8jZzwlL?L<6L3Nx0n#`Vr@A;_=l;cKu6~qbAA0W8bm9`sAMekg#z%$-hhq&>5uaw
z8JEBepIOjpzFJMQ@pu=T(rEi<%i-*Drjt-FaDv%T)BxjZY^u^_n$0(8&FW0svcABf
z8i(IJdgccmD+m`Y^!F`%Fqtx=B&Q7<;}>_~G2Az4p&yKs<~e<(<p8+dSiBKNC0AMw
z^AYRySv&ZNyGSAUxjC0T5A^da`;^EV9X2_q4vPbNmic^2kCQkqAnu~!sm?!UO=3qp
ze@WsZfrPaWj`~~ol@VG=9~U-L;x}FSA|t5FXhYg=#^#>#cPY8}2A-VwCTfuu6!;B|
zpQCY+rvV@9*c7x4Mj)t7w1m)|U5iB+k!zmiwc#W!YEHIK5?`Qid)HwQfPak)tAVNX
z{<&PqPrDPl+G;+D1?DH#PUbb@yx6d<Ez<yS5pu_YUHmee62BGY*ZMY66fPk~D62V%
zy<IYvYP(Zo+PznCGO^aJ1xw6%*9fon3v-Fm19=vhcLRj$OT2)`^C$3<^&_YU1)s9)
z=SG=>dI9pnS`e_+Tf#L-#Dstx$n@lJtZ&*jKUjOp_B<)vv4Iog<*U;XfA5U(!Y<5O
zjej@4`^PyQot8Bv)Gn&B?aFNafvQB1I^DadN(yC@SGmbWuDigx_R(~RaTt+U7<GSe
z7gS~JDBVQ+N-CjP8w`jCu#IsnvngBT*x1R;EXh-Acp1L)R!Nx!mAfn}gY-D{59Bp^
zaTj@Q{eMSZwk;3JFW$W{eOV>jKlt=*M(Ul!ZN2Q!#45AXi!wsZibM*6x@a>^g?KVX
zMY&ruVS1-Kn+C-igdWxNQ{a*MQ?Oo)q4wLa6r)HSid>N<{yUOyd1tZFJFrH6T=Jd$
z*v6c9AHLGu=vf<~$TQNBP-w|GqGH+qoaueepS&xj?dwxJUPZipo}|OeE#RY{&7?^C
z_s?;DK}=rm?d$p%^=h-eCuMlp?-ZzBCBDwp<wF9{RoRT0mo>-%e<h9;_HwAEJUP^>
z<Q>*;NdV9_QVUkl*==c+`0sE{4lT;S?+I5P(l1=UUB}vvuQ6xD=w^b0V4l~r^L*3;
z7!Jl9F0L@?B0M?|fUn*uD2l&YsMy^n{>J#kH~!*cd}rsT7rC_OK8!#cN-2!xcRL_u
z0Bf`CqPX~{C>I(mQqWZAFQCedwp-z}MV-<#h{}MX*IBOxzB%-(iHwE3w6|SiY_#T&
znO1A-T^AxjYuN8ZLPp@zZ0rU@s1)IyO2rgY0u97DlZk6FlAqY2X&P|_pGYZ-6YD2+
z+L<rR+yD~yu!hIssF`5WF}pg=+83zRa2Jp@v@uzHt&^Jp!|yXZdH!(4rz;T$#HNzi
z-|K}ulGD-%74#DiJPme5P7#tYN*)KTeXlQ^4@px-MBpc}MSH<Cw=zT+J@dm<>f{>a
zmNfh8#7gEf9XUb6E8!?1-Gi)SnMzD=U7oD*ISQoLMpQ;uziMNe7}(`ml}`V`vjjLk
ziAuOV9G@xVKCL)zggMe)3t*T<(AlXZwNiKsr?x=)BpAJy){l0DfMkP2xp*Z`wT`P@
zxn*rXuRVm=BKuhze(!3uzVN#?z$U@vb4lwBuD{t>>}(o)<v-zjEDXUkeFsB<8+u|X
zGgmJs4e+Bi@Fzjs2w%IS)@U6(1`*P4%JV+DsxMf}ezUM)P~ER@ex5r}AmYwxP|B9N
z$lZN`ZcGBiVPh-Aw(j_VVi_TpYt76~D9bi>On&ltu`A<ED@~yDu<h4{O*A3aZ|8+g
zGCJQ3Qw48YF6T&>vppFnsg(8g>$SYm$xMA`6f4G#T(2*wfjW~Ti&0JPE*}U11U$u7
zT_Jb9Efv^d<<4R7jg4=PC{H~Pxd;3`!}D62$42F))xhSvXH5lx&p0xbTZAWL{aM=!
zrN+)%MOr}^P$&#@91{Xc|NaZRQv=k=bK;sti;i{P4{+wzh;7{aaE`A}v#XK;Vrt#?
zG^X1i*SV~Ciuf8eOj~Bu8+c-O6GH+|yXQOds6&~#$S&V=&Mbl8T}8a%R%-JPpl5N|
zHft;ECX;?axOxWGPYIAY)H;SkWEg^a3TMQS!n=RPBY}%3ulQR?xp`9Z?NV!fh?!nt
z1Q#nbH=7wPC=G(V8}(m$K<-eCd71^BQXANC?h33I{6`+YY$~K$2SQ6{rh6Y_xjwx7
z6saEEsbU3!Mq?KnIEW$BaM#-=Ky8+$9qjG5Fg*~OJ{2d|LBS<IKiNP|m=mY>WVfuV
z?%rQb23l0J`<XYqtmcmJyYFgh^@a(I@Ag2&4L5oN_Wj1@>xz3ofvQNHq@>4kj9k>O
z{|WCTDC6Eq_5GSs$#$I~g`#j?8k7SGqo9hVWxf3S+hdZtpEQ+LB)YI2Is3Pi<!Xw)
zc`ScnpT<a-36}&j)heU^Vtny8nBFS9J5IFZu)Pv9Z;u3E@Hth0ul7~h=3RwVRSxmU
zfywY2Fny`g|De?ojID7j3fGbbP6_-&5$r!!$c8IMG5pr6YG9uk&Id*rGh5*~4Q1;K
zwWMe45xpj%_ZY^`KTbm`V4huyneSwWHhvNfgdbdMJ+daL+E>T*^3cQ?>HqFv&N^Pg
z4U6e*)ob56dFdyPU%L6<Ro)_@jl6DhT!HU+jB2=Z{U;Bqj_<dq@uW$`)PorVO6$MR
z7<5GhY%x4uxV-c}R8^!sZROX$%4O?Bpb@#5=J9&7+gl)Ytkmn96K5w{ZdH32d6d*L
zCb3ND&()@&pKU{6%w|yJg(R(<Nttg$5p&!jg-CcCSjrU~?ZFfEk`_8dQ=S0RaQQq=
z{~6spE4lbd^f$A5B6A8x%+VCf?&nK^3|7)d4B4p%UnE8wuQ~|0Oo|rM$6slF_ov6?
z&1q1{V0M?PpX}V2{3#k&Mq^L<Q>c2Dix1B;YCmOA8UoePPT0?bMi=#<If<-cRja!%
zcc&$L*NtrPzxZ8H6)WGDI)Ay>Qj_<kSz9CNjzViyog=v5zPQ(=$)7+$?5O1&6`1hi
zz)!S-7Mna<IB&v+sV@y06o)pnt+6RX8)#f@4<0t@%K`;d%|xl8BIDXDG+Is}FM|QL
z0P6VuJ%QF^i}4J2kjr@bA4*;S1b2U9fl44dcmR;&ryD!ImtaBH>>y5<LW~E~w!%Hi
zE>%z0-`CgxTf;)<M?pj(wRcNir;tZCm@&CB6Xnkt{`+#-|CP+ozL_0>`OKAx`o^!X
zJ{}sjj9Lz?W#Sfd^Fcxh?whs03RWXPYTfR=e`BVVVms|w{P9!~&M_~&_680(D=_&-
zw~$gu?wfn>n>N}lj&@LdOmY2t5iA9o)IG%y_n>EmZn5w~3qnpI<C2QyvtNkgmc8cX
z{~~vPm72n#@MpS1^_5UO$b&(RY9JbzNW((VdtY`bv*IJ|dp9+EbqCy`!c@jrI-jP4
z{XxgJ_qh+dS8=nJtYhc*X}awG7K)g3dkp3ybj75L3{oZbu)pdx-{Dd84+08<)fAkQ
z8F}g10HEVh5d4jC;<DoBec;m%GIpAt$W#v@!6z=s@G}~B7y-BgNN}2OAxW$Go#scx
zUW}KL3m|%S0Ql)3(?-OZ8h)VXh?|mTIg>9!Hj^d*G-OI(9s^V{Wb>F@5zp~bDti``
zoB+m1PkKC_m4n;{AAu*seIv44-L-}y9P2BJiITrC))<+O9$+xCGK))i5c1u5l8ZNg
zE58@j0{oehA#52wDsEOpIAIUBUyt8FI8Z{4@YaCJao7{@cONUI<z3hxv8=X)8Q&z3
zpjpqCe#lyBthD^ZT}F(!7e_T5x@UP^sz!7eBg3Yi$qvhOE-*I=To_b>+8BYr^V}aR
zYOYk<#@h*PaATmuh|hwj8LbQ|@l>ENA=tbh^^u#dOJ(xP%cH21JgEM^sG{mms`!&C
z{-laOsp3zn_>(IBq>4YO;!mphlPdnCia)91PpbHnD*mL3KdB-#<^Pi^{#Zc&lPdnC
zia)91PpZiNvsUr{h+4(+`{KR7{Bmyf+T}~Pe%3AitXuq9xA?Pe@n_xQck`chi$CiY
zf7UJjtXuq9xA?Pe@&C%Y#SDq1Y8dsC=K>0A=nItWgezA%vsn%8hK#tJKJURPE_xA0
z$HO&6-3OdiJH_stuF(GyeypK6_J5ZB{G;Nr7t~I@@JsEiy&z`>#5|q<p{VgcD;|F%
zx<EyHhqMu>)X=7*y;=@tyE3BxSV0LiMrFWCwQc|7ibk5o+-6!?M^8QZ4>{4@qQ<{z
zjoyr^c78VdMP-1OXqDx3IZW$H$cAZn3|K!{!sIu}DEt&!jLJwzd}}_UgC*Zu-Un#}
zR>QV0P9U=@)`dkYH+OLUl=<{N)U6iN6Rz10HSHDdzm$H>_9lF#O57wqN>xM^tdazE
z-ZB*#HC0{8yxiT*DNp;~0fh<-nP<1O@i}@ZGPIATJPy^wYY-g*;<u*!*pHViLbZ~{
zXt1ntLa(NrR2;JHxJrech03AV9uVXITJoSnpH)eo9hELBQD5#{5oPw~cKfD{19~KN
zqcXIOxY77Lf}m*e$iw&41&=D9<u;6eeoy{$4a`=Rndp&Bh5$W!a?SKuV}CdQ71(ek
zbhg&S6gb!V@_Zv=D_aRHY!djUobefr1uHQR_tKy`vWzut*kQ`%;d%sqR$~2q5-7YB
z!q?>ldiIw@iXx#BOfN$UXjER8YD9iU-1w_ZzgxDOb9l3MGza517ZJc5hN$#x3pQ0i
zA|+yuVSu<rkj!g#Z@&>bn80EAxlb{QRsryY6t*F{cK2kvy!53@InxtcLOj!^<1(OP
z7N`grYwNA-m^w?kgOX0w0p%W=!Bl>)6!t|6MXVB94e{YAMIfJ`y-2AtL(Yvcp(=q;
zDIPnyc6_m46bVFLxDtCipD=AW|FzyVU&5&d=vO-W5e<Xq3;VN*M}veQRk4!qq>qk%
z89i25zqC>yc3J2-bHlXn)6|6=QHkYW_gLs>{L_-?{}+MM12K(ze_hvp_f>R_wAWAI
zOb%4h8IlF+l!rduWvbsf-k1J+db1zx4&&-P4XgnK;)JUou^5#BO@)S`2+|o*bBkMw
zCx4$fu6^x)w-Bj)q2I<}(Pphor8bxMG%s!+Vw3lEpy5IldbzVvRA|8;Q^p3POyh5<
zIb7ee#P#`mt>k_(x2-Pfwb)KtR>t2L9EyfkNm=`S12Qa%uL-xdFHC@<*!+JI9DNTh
zpdNidfyxAZ&SKwhx;0@9nZCohz0rrbnTlSI7jD|GfAf652}&_%`0Ahkn}}gEY8&)#
zp~7CvxKWv^Ep_K6aXhP4;`?EBC4z!ifB%r~!1x&%!OER^$L!y1eP42%t-Vm9FU=vg
zH@Gqq3#!gfrQdDq1}neWQ#7fd<ej|wa{S;FDC#_fT>U-_D&8YSKxKaF4?R(7s!kzG
z(IE8e-6h`1ra+{kKe10$FFPWKddv_eOwKX@3%;#8H}OM3FQW>;I1qS>#LLntL#s)Z
zC1;l>*^6gCWCRgSHcEANNLbqT4B;>#;5sgxHk%FPL|0ySWRtc>*{WJkq|5;6p5oCw
z?9QC^vf`)R)#|&FIJ--;KRGbAVwB^7t{G9cu)FLbRHA%v;hTlf%)SW%)9}($nRth;
zV3XD4(Z|dhS~4$J#k}fo1QCs_8dnw~Y;QH`H7GMF=5H<qA{2Bze~7I-`0Egv+;o9l
zE?1Y6m}X2nOywoYNB&ScH09x>rNDOH?dlc|>Jh`3ONozo4&FehsF^P6k2fwtvco?_
z5+Dg1pqjGB>Ml3?lDtOOJ+W-nP_Sr+hvKE2v0bm}FF|2Rm_lT4e`t}^)GeBtPQi$_
zTp(u7khFkWpXqycpl0^h^+Ez3e|-95QS5r`Wa(tAZP3S}A#Z~nV5q;1Y;eKW-l?)c
z^1{xKI0xRm$==x9l_L4`T>dzv&m29wb*(8IpJT^|K~E1}^jq}7eGi107`e8o-0$4S
z?V2$PgkWzvgDzB9yxM-_HqbEpugyTRFn^Su`h=o@)a|{FeaeG(yN!cs!0rZ%6b<Bl
zq4!G&<T-Hzp}!rCS4i_5;~egRWW#pv&7bG<$ETkCP`NFio@sTwnE!HcFtetDv%!{4
zdV(&bY<P$@Ol~rI)I!Xs_m*n#nS4*6WB2zKe%$gDXk}dJ5S{HSTG@p0nSESNjiVnj
z{B`ZBst@Ddr^nxHl~mvLAL<iI^^(}N%+Xz;=zfPX7D(kUXDm!U>wu(5+FxDb&)eVf
zSpw?JjpZJ?cKs$K&1}KM&By#LANp?DoefeCH<NgOD^A8y6!JZzmzca;8OI<AtiN{u
zvzvX|t*gJ1TlKVLc%yQIWwju_QLL(5d*{nOn8J?WXxYGo=%EVDT^)@7a<c+Lp{3E%
zuXdzb{5o&nVKJqyL<Nf!_9Sl~B<#Vh_d8bA+Ii0WWB+MvfC|5fiOcgYQ1|?Y8yIlN
z3ToV!ZIk{V&U^P_KIcLO$Bm^{zGeU12I%+I&_g&hyti`CKb{Bp@>3=>ZgUqnhDHDB
zJfM^7ae{h8wMT&C1IZUmbXCBR1*^wNvw(XXHZ)ytF0uC9-D#x#+(k;*25f$#hmBG*
z%F*44*{<IJBD=;eXcq?<{@xn`d_GU*{mv)t{@(Ojx0U%)bSc^GyZg(MQadc=7Gtab
zb-6pPgW1MAz6d#>yR@=<b!Cpejqw4O*mvcdkztC3zRLQQCELc@Vc`-Ea=a>ASHf$J
zJq8OrfgYseWZ6oCP_W{9@wFVU;AWgAdL`T}&c5)#Xw_D;BWpU=-FtcF2Ey1E_|LP2
zgAQe*k(5PKg>hacL8w!k0m;e3<kEXy>~H+ma-%9TJ-hM}Z!2t+5Ts=0<{i;K%YEII
zg`4eBRkw_m6NG{)Hu}nBmvr}OY-}vjsEZkuuJ^VyX{(yJvZ1xC(e@i+B3b1t=pet1
z3Vmmy;c%%ZxdUr5uwGbndnfhP=)eYiLqyb{hQp1)Xqz+cS3?MF_>0|DG}!jYJ{)A0
zN13skmzQ5>`&z7{);7wg6oVmQ?jC16cS_J_f7#i0DM8&83vMX?$6Hb8t*B8T83^pr
zO2Z<X&wPQEbL*vjEUGX+S7)HRhi<{M1S3blS<-^oDwm$%C_@{Squ?OE%Z;Yg+#a7r
zzaaUc<-@p+#P$49-J;UlPDo39^2z{-`vU8Ho`-V=-5RnDOP#^8(ntszYM#^P0=Gk=
z=+~Q0voF;<P!@tww0ZZG!AWnMK4Lo7#E3j|BUsFTCK%`5?|5ep9lT<+F|uOhk%=Wf
zl(%`r*nzC}YeeC26D7$C(&f5x(yeLtn2aH<I1``6+v}~Da5)oj#ko(_-#(@%Dw3Kc
zgQ4s1NZjcq`0EaAPs)fnc$D?4V{Bzg>eX){Zu(Ej4-1+Ar^UADzfs1};E_NyO7C&3
z+6f9&T)l^Tj`lOl0U;H=-yk${7|h=&JlnO(y3qkHmmce0Z=!h&@6>)|R2btRMB_Gr
zyem%)7U;Q#C%E_7JbhgPUg(LfhXkYFRB=W=3RnbMhT)LjYbi`qVfivD1xTU48=<D{
zm{zvMkZ2&#J@56Ij_1g#(4tceB$J8qj~#82q}^jpH<E2Tk!5&<4(RRB#t>BGP0^8-
z^%MmM5F+?!h?dRezB}7TPAW$^tOTMr1HTwWP_Aiu-@GNd?-1c)$ph(<YVL(20|-|;
z&%xeo9cGx<_{&R(c|F&$K@XYbo<Pc6g(lyW;kHrt2w&i}l279O!$~NW`%Kz{AoP_v
zvk&IGBrLstsJ<I?(KAz6bmjFnV07o}6MYsqRx3I$6~#s{1b)bd+|1n^#@3+HD#&?0
z;n*ZrQHe7LwjR(iMVC*jh~3{d^-q>bqM4FnyMN<&Bh#vhn3)L;_qP82SoD_$dF?d4
z<5=*-)Zwd1OzL9n4Vs}=;#bpT#Ha>v`Ub;jevl#r@>2%ws(PuptQswF${dFSMsYp5
zeD-#B>;fsOa`kQA6Sh((VT7E+G0GkE+8xO%pFyHYIEb&BEru{^K9+&p?(gg-I8V)V
z@4u;0xji99@D5v1pbD7EzT^_QKeV<;>#m}%nbUSQuxrX*QFW=?-`rh*Zm9+j`9)yM
zD)cb<zhS#F%Bw&2GBqe27pYt3j9WPzZnN0tJUgRF>#sNQS&+(0KvVbXo-GQk`3y$m
z<6iii9a@V=IqK#R_xY`G#8!AcP?VP`urHb~4A1tPw+$8fjBE>a^c|bYnFd+y(s)x;
zqFl*POLrWu26^3!zhfLk9V-Pi#9OWz8oy?u%lsyrCOJ4}7`FWd*0i$MXJ>v&Gj^)f
zr(k_f_#H!v#0ktHRl%tsCnhO^6`QeZo1NiIv@5}<vs(t^)r33)iG!YboP?SJ3A6Y-
zuYtlA6xERUlFGw9T!IY;%<cm<UG6gz+*;v~cMXrKnwtm*BRwo{NS?Ym#_11t2H`{K
zJ;>KQ^&reL3Wo$x1~w`?!WWnMQxiO4o1gb-IH2PC)f%Q^nly&CE&1}Dl&~Y`(#ly1
zwfT&VM!4y*l!2+^?pQlvIw*DF_o(O}JR?)u7HUa(vGKORDR;4wqOt6%tvBQ_AOHn4
zqvnkwMe%)$&KRF2J5w6Do8QnRy*<iwEXK%hty7Zo0I-L6EB}e^vg6Y6FJNl|j{KoX
znn&9R6VB|@<P9K)TYj&QNmKUw<J#Ib=SM@&mf+NpeTbe<W0#5h=9}B*9nfVq3I@{@
zpdTK>duH`%^w4LmN(4Ctalgk`0#4`3_#JgvB%+Q@<c*V9>q(JH2k(wJ4SPZS$<=Oi
zlaaSiqM=Aka(~5_iTNeeU`XU!mVD$R_bh99to_?~LJJV!)E85j5Y&tr`^9G+SS0sq
z>T1<TM8qqP)K+PoVQuVu#m3Mf1Qs~%&$W=!IcIg56<F+EdvU8`ZsvK^P<(26!_<o=
zQTokaU*cQ8UfW6-cu%NU^mCNztfEn>2xgH4ZY}Yw0d{)oLVi`;Jm(NW;VG-ev;$LZ
z1mTpnOuVB{n&zPYW;IMh+!36-rGJ!>%kx=}&F_Jl;=uSv-RD2;xA)l!%%zSuJWz4&
zo{oMQ`(b*g>Nkw8JCyXj2m3ItGJE`<+^C$E_9T1L-wX4b)p_eBeTQ;s>sE=c$>;N3
z$k#HH$L4^9C+V9;(GbNJSOfkwU$wT%*xPVbv%+^|HuN%EUMSI9ICihr<CU!&9rnGY
z<vxJlY@5?Llj0~PS(oZwX?{aKD(3v?o7JAV@o*|{#X)vfQvu8EGnb>o?RWK}#|vvQ
z^2md2_g;&rUhb>;0%{B=Q)wLtiexj$O68{19MF`0O<GiN`G-flHt7}=h}`m4ESCHP
zPRIkdbu!=gvq^hx>&KNv@9wfj+OX6P?SQ7|S7BXXiX2nm;4K!NMy~0590?3m>fW=p
zuQ{tfqp4HqmUXFl(h9E|Gqpj@`ulHKtg>ldo5`ll0A-j)6&&9y=J_R`sj%;UMz2ch
zDxd>AGB_<w8?gpetlDC6M_9O{KPfI8Qm@rf@N2F%<*Wvt8|}Stl$-6a;%e*2Q_}n#
zWMOVGZd4zZUpO}(7>F4C-VxoW)MxK#IQaIm$7h>tKSzT>m9=6#Ttl+V;WE9+<_6l4
zI!J?JiEkP^N~U`!y5*LHvs-hXRUWRJg27pHs<;q4Gn$%(eILk6Bbj+7g3Wo^jJS9r
zwKB8bloPBBH4biJafRC^3dNcd=e*%5W2|epx3os?zASfAmc7v39=wnlGLOG|R|e{u
zN>FV0q<y|VJ^&}N1igjA+bpe2bvlC`gu;uOz_1m~bc;_5{qEYPN??2D)y5C=ebo)R
zVa#bXr3fvbWH{ewiqle#)ck<Oy^)!s8aU9em#lYuf_gNAK;F*#LBWfiZQX9gj?^tI
z&CB4N-{3=X>h{~j6MI(Y4y>eT_Kycw@@se`zevCmsu}qEI4x!tN#8lZAlJl}u5_0)
zn9>e9urCg;L<vrw>tS+@`lW(BTaonCAu4?+Nk)gwL=S;~><~&T>(Gc>6XKTKDNtb4
z%HG!Bs#kz%P9NUm@21F_g9wY567?u)Jg>P$R74TR?yq`#vAtbiK_%2SjJkQ+S<Bx^
z-DtH<-#bX@E8}C59eHp!U^`Q8b`ZVOjZPTdyi|}s;@d3Mt=3?AF}`&<7@nFeFGcXI
zzi|c<+<hp%8&U*A(K;)k#L5*6^UoYs@#1B5<H723?elA^t@(v_`oCU_Lx{8vtV9;Y
zJ1{BA7H%J?lV&S&JV!S{@YLA2WBq-+cq}ZguMw-k=pd+Q6zwCr9)4$geDQt59Z4Qa
z4_*UF-7_G*v=m6;=KqrL>9?n?!!$^lSRdWW8P%Gx{d`1Fqm0g`i)m@8rT6ReVs7lH
zsaxmmZ~#rd<du7r5RLhP7BF9WmG75|>lNyz49nfM5*LG4vqfPh8o|azi97e})9O))
zq`BIs+#BC!^)8zx?XNJysBGT;3XDKPlG7!>cioQ;JFtiDflH4JS|lNryMelgT67ra
z>VT;4;lr$MMJpO|@WS$~0U1s8)l1AX(+(F(IcF~zI*3=Fi)8h{nv7cC!<j-k$1;?2
z)GpEu|7vyjJbl>1z=HPr7=QIq#l@}edLfhLp!$G)JhJlFIy4c5I}L|K;~^e#wa=35
zAUZhzm5q+z-Ie!ho4Z_`v6bN14rdUFUy9Q&-Fsn`49u}~|6&&>ay?yht7tIIn;s`B
zzbN4&da$41)9BUVxvEot$H;ka22q|dLZ(6T1q0{T2WuM`N6VQzRZ3#y+XrTGM7uVl
z)D$e@jN%#{waII``6wPs{d#3YPNREprZgPt{K$70&brTUqf9Qy`!8)h;@L04MnQ(D
z<ln6oSHhpXP}HXU=zwa~XaxC#_s&qB0^uy1?{)!nwkqjdoMG@%@>?0AZ`#@s_&_Y{
z-HC@p2u<oETGyA1ogwoC@9Z#xiM)6*|3dk2f|eKpgi#50RfFr2RZ|L6a-7P^z@5Yg
z&_e<RZ>srQKHnE#vO=$?cW6pZpHg$YyR1_57QnK#^sH<@d$)c;GcEX3JlV>sAba)%
zs|C4otJ%RluXpb+iZ$Ub6x%J15i8AjPcu@KhG!!NBtkR(Z?g>^o4972H$Ku%+f@7Y
zd3s`U;M}tEMUR1d_!+`+zA)D7?j=;w4!za)=PLV#1o*lpaN@1&8yG_7n<s;<!3zq>
zJbmR}9gGt5fh+fdQ=@i3&o!llFi!wg{P2{1N7E|7lIr=zb6&uQNhf7nIAynAEo!IO
zB01jdqXX*S7}3T3-ZgV!Dt$7mYdS^)$7l6cU#$?24$yUp$=YQ4g56@4=w1odR`aZD
zA-ZiYi(FK#YRPXDmCwLGZ7q4|Q&;-vraZPSP@@Qlrd#=;vV8_^m4Z*NqGnf7UA`J)
zmEM4=NLxI}YSb&jvBouGdHsDJad?l2`$@e#2N;C<*rSK^t^81DJqJ0Rd$)W}THW1Z
zf75?`s~eTLS;HU8`OG7}MmWAz>5!kxL$*V!MhsgvAu-=VIU6okIJqR3a&O!I8W-9>
zHgku5b98C7L-Wp<FW}4ZPNH8IiiVr{SDmi-wxRY-cgNOgI0j7!xrz%r!%Z3Ka?rHZ
z1Bx4})1trhjaF?p&SqR#l|p#mLnsI*!KRtVJ{At$k0ot1V23Klkx^=6%Lq0jr!R9~
zu-z3yZv0TH+cQ4)`#CMYxAz)ns44QNw{ZBA#Ec8#_~b`SSwp{caO%$HM6+#p=e##K
zk%mgd63wDH_dUWtjX<fWF}2+Z+46icCF#vt510A4&}uG^S^e@W;z)uk449#`Cy2D|
z+hVmNVwZV^vIDXAHFqp7hm7R{yut{*$BeN$Mb#O++~EOF<u(4rvdCdlk;$r{2zDH3
z>Q8^Y3+65ozi$kqX$KU0hfKXdm${u_1(L2jKQ(NPjo9{cd{ou;#G(CzUYFTI%8Rc!
zdQXqurkrITTG*bwQ)Q{-=tQK6#Va-~w^kocn4Pgw-vNUvb$dsLX^GmMXjRR^<vg#+
zIy^!z6a8UH?$~Op>g-<f<Be^G7Hoq@32MzrlZ<1?bw<=E<cE_ZFmBst0*k8gp;jZQ
z;fe4;#5b93Qa1mF0;~3IfIO|NC<&}^GZVoI9%h304##I?HF&$g?94ENo2~^x+pd4Y
z_H8PtI!p1+63R})zqUSaXL!Q*O1BC>)_3JGPk9ybkbA;h*<$6y!$ngKw(CP6>25J2
zWzQ~gi@Ii(Y>ozZeqH9!O2S^%<u;xHd1(q1E2gy0UP=fEwR!*-d>fANOSV<48qe}5
zBd$KyU~(uh@x(u0uZ{GZdA<yWo-qi@%F+{EJ|KER>g(+WD1F%Hir+8qgoUHG@#uJC
zMw6B0N&XF^vxAXMj!Ij<zP1B*SB!t7Y;ni3P(BjNg9ejs$fA;?j!9FV_9??6=Tyh0
zgGAA^GLX#Mzy>Hof%}RUb&EduTg(&|l?@*2cnh1}6P&Mw_hTkEoB$!Fc$XMruT0mW
z1P<AicYNst)|^BSQWBG&bs>5P_&PPz_nc?`lec}IgsFr~Oii~|MtPKr)8nsXM<G7?
z&%RIJEIE9j??eePhc_*ddfEwYE5uUte8D@VTabXT=)xzN5|YZSHt*o6E7MhWH^$rn
zjbe|1M5$x$GA%ON9is!xHQkbb0t&WfEn6Qph?W_lgE|~+RQQc#4-7i5tYxG9a@Pui
z&-g5OF_S{i)w+Ahag1aW;yoW$I<0(P*^=6(4;N)DV52hfq1&&>`k&+3>ZrErXO#nZ
zyj$G?#CaLCjz^f80RLmb3kJLcz%h-OGIY4tJuA&Z(_HWDpD#1^s0bjd?KjvSbZML=
zN<oQP8p~ZoD;2v|TaY`qkNjSX96NH2KjD0{iic3$6kJ>i5YFDrY}57JsGL6h-g8di
zEi&pQi18Q74|`kCl&!oSRGSK$y7Q`VLKgm{Mh(JQgCJ<*qwcfQ7swEfEtu|SZJe)`
zcz}P}>wM39;awPWGqPIb%)SVwhIHVUd6J)8DAjq@f?N%FuwL4cuyB5!AV0j-B1x)>
zLRd5_`Ye=8wX6!X%7u%IdABjNjYH$t?iWbUYEP=qb?7Y!t(@AGq%i<fuw*~`^3w^G
zk8CT<ST#29S*_AP<C&exF6S7L*H8j4c2IP$AR0S*-}9*E9CA`BVi_a$4^!9YypC>K
zK880>m>zvrahhlBelB9{GSVVv_ojL5sgjeO&+nL&CJSnMH8hFBewE)>se5#GAGz~P
z3yb%r5aL;+)9PfcX)y8unme#K9f#gZ#!;)85Wb~MV4{wCU4^8VZ@ZfFY-}w1?b0Y$
ze}2xwnlXR=v(ao{_7Q9rx_oo5^js1kgKk}hM!ird*W6+yQ{`1ry#8FXhv5WBnCj=E
zNey=@UsbDT*E`&TTRrzqsFe7qs51qKOM6q)=q@3WhGc<z^{9$vlIPCSsd72j%%xJ^
zsVaJ}fH^taSWM-8Wsm)KI`H=i8hxwJ^{2ysANrg}fLpQNC>#3?t-Gq(5SZ9TP%)s9
z=09)^<0p`J-SO8(GA`uf@US~OYQk%6I}0Ae2X=nn>$mrz12TPT@<k#QZ@9Q+1-nM8
z*Yq{(<6mX?bwEr2Un%HS!<V(g!s0Am?wgr4$-1^hNzbmGEAG(ffcHXKzL?)k472xs
zz6$NFG9Ar?A(hJ&xxkg}UHtuJCP*K5^Tp|;PscDkA%1-}Xc?gEwDXl|kQYZVN;2&|
z;Fq8}N!mKc*owNcxsW{QSWbDo5Sc2Ize)zS_kvfQGLM9OnnH2JvfK7DzS(X=3?ckX
z4#d~~Wc2-BR9A*(wxRQ2Av^snvRWH;V3}yJ8o<1E#1BWffYnBu(8!$$Wq?<%ommYy
z#@*o$c&+BnX7ygO1=oezkzjwz=rEO48XeElgL>%gaPMQBG=2Iy&g9Epl`ds&{D-J<
zn$^SmA_q3wX5hW=qg=0!$ruMP>yl`~m2ZE!zxp0+O>r>>SL4g8PhB-J^e+-i3@8k&
zZf9*f$>m3WN{R88V4ZUHk$_uGg?W@CtHbmJVXm#0w4ZddRbwz;<*mWwG<By#nag?D
z@hm#YTt|E3D0klGo@{YLhqdZ`@TGuWGsdZL85xE2{w<q51k+%v2mZ#-H|RnQ9SSDq
zGCVWWNyih0msId4x8?4BT}iN?Q^lX0j~blbQyo&W^p)Y<T)diNG6$z5xx^vt>!I6A
zIo|~)(i_%D`hFFcDTc(PjqUy_Deq8L)2-f9<9XoCh3Z2z>IR`x-6N)zTcbyH`}WDM
zwtejW!m%kWEG?g(?t%uTGJXIW@Sic@jg~VWjx#mQ{AR~(_B;_iL8VmD<nJS=txois
zmW@4j$g%Qi+RXB(5IURvNIGl}*<W_Pky#@jeeWU2uCRHN*-w|L>4_<tOK30?)*SSP
zL4`aXDsIm=_?3&~y_y134DDUd>l4r63UB#*1WjA?8K$HW&c0N1Uiz|+A8nEL5e7K3
znA)Pr3&+fv=9V$=I+uDVk#PNe0C|pny7zRx^;w?)*5YX$@5&oj&!i&Tm``F<rf*;u
zP@>+op=-yJyFYF^a7G0bA|JWI<+bApPB&~WMY7&5##pD8^)0wWm8oH|5A`Bu!dS@{
zMh{O6x=-IQMZYccm@qy&O@u6(aAbgbMAo+QS`_$@DNDDSZUXm670~>u;-w_IKIF5=
zlHJp2c%E(|`Ra1Bqko<N*8XM$_7%&-ozFol;YBKrPp81#!-~Ef?oQ>(1bBU^NYF}B
zmde(FIct{JrZ~Zd>~pLWac`5$DOm!G<^m1-7%`Cqx^!_Z(RQ{dQ~#}R?|peL=TH{P
zRQJ9i4bmKk!<>&i4LI>j>5AC)J{rwURxssQQlChg(i;JvSmg(-TU;d0^j}E&tJj)T
zGy{l~Idb#rqR;fUzEsQ0IEuT^qWF~W5P5A&PSkJ87&E0es%KkaGNtnCZ!W(Eg~wNz
z|N7hhBPT9C_+`(JKi(K!biP@6V(jR@eFtrn+k?B-Vj}U!um1IOEzvi!f~GH)|D%Zq
zzXZfT7(Hbr7W1!0|M<B8n^t^;Se@s89`V6Njf>8&@}kuK)#x856Tu*$(ZFV|@V65@
z!T(0KzW2tNzfO5~zlOcX_$L@7c>Z>RFPOFB$LCHn|Ltlfz_qXJ<-q*y1nUk6Xt=Du
zI`Ho|1YC^^xc04wA*z2n!SUaY@yq$t|NUx89*m~?UaIB$+X?>vyxjpXdqJ4puSE%U
Rzkq+&R5UJUDcybe{{R*Pk9`0D

literal 0
HcmV?d00001

diff --git a/tech_reports/GEMM_FLOPS/images/effects_of_precision.png b/tech_reports/GEMM_FLOPS/images/effects_of_precision.png
new file mode 100644
index 0000000000000000000000000000000000000000..31c33ceff238fda72c18c19bca652b3f01b0900e
GIT binary patch
literal 358138
zcmeFZbyQUE_XbKTf+DDZG=j+pNOvd_iiE&0<e(tk-5@HWq(w+e3Ihz?rBc!%44u*)
zL&tsRQ~Db};dkAC?p;gPLdL1{p8f84o@eiKo+~NHoFJwl#=*fkaZC25G7io$cO0A}
zaH1pNne8bz5geT35+;(8O1C5>nUrj;3{A`paByUwhro`it9+)6(R%el!Wfqz@yQSY
z`4!y6Cv7*Di1==tVtR8H|9VZ(Rn{U()tgmMGyN{B<7TrsQq~Mnkq|eOsAoZ}lqFCe
z6K-oBc2kvyQQNI=x$PQp;v65boH9t5#raCKR?T_cbA9F3XM5$7O!&u@yh*brtnH+l
z-oL+w)2Ot&xw?2nB5?ugmf9`8duY}^vSL?^!*rJ{W!1J=kc<%r2RZ3i?Sey=RMxvL
z^770l;=Jl>63?jQY)a^0ayDsnOD}t?YqFRu@g;8gd?HW5k8`D7U3a$>`l7Vp+jF=6
zG!d>+g5-SkYwnPvHpJE4ZPEnUs<AJ39xLn&^Qidp3P!EpzZ&bGetDb3R;oAZ?V*v-
zfz#*Dp~ttgo<J{1($TR*b2s&8q^G`9PRFx}y~fLYN>BcoIFU1#+q1j3pPm<hTL#lu
z+i6@gvJSzWtsmd=y<v`H6L(FR`Ob%w<YRcfWS{6CwUB+y9%t(Iapn}~XHvVLdrO2@
zdX&WC&gtB6;sZLFt7{N6TH+N&tLk)R%SbN8PPplHX>I1}GtZjW2#=AIA#TjSh@_xR
z%c6$8_~3Uohg?oCaV1ic?i1CK?a%1!Jsz8T0Us;QUve_8=t(|~D}2LybHbpSuyO5i
zzKvkc)~I$?qx(5-Pg&nsVKH-Kc99CNhwh8&Cw0#<BHFf@8rJ+@2G?APpTF!QYT3|A
z`f4?qOn7Rtj<t5|n2WV!1A2w$E?hNAM4^o7_32X=f{*Ire#9Z!!WVi`6+xW*=nm8L
zwAt)qr_~sVDqOu6q+3<+561&#-=K*`c`4646hAlicwp4VGY2kK@4JV(ujZM5)Ffxf
zz4zJG<uhY+WW+h6<v00m3(T|Oig*W%Fb)Z3+1@Yf&APpNC@Nm<X%znAn98+R=Ys9?
z&&lcGw8x&kbNqbY$W9Ue;&a~*2#$0G0emOMh&m^?k197VjgpTFz4Nzr^NUm3Q+YEQ
zM>2bq)%N9%KD)Kt`<X|(qi`j9)^VXCv8=l2cZ?So@kL7|LX)0pMNsT0_IX8qIEi?d
z#UlLb?kZ6ZNAJ$m%d~<t_D`%G+K1tcF6pVCwhd2g?#;)dB{y8MPVD!d(h69pYtg%u
zz1BmXeylBfT%Ef+h(2U&bP#>Bp!8{3P5s#CM?#OhwrDjqOfK5d&(XQjSJv;YZgD3r
z8CONHMqHsxH2T{9cG<rwCD%vW_pbdVnM+1EiNjReb93@}^QGR?d0tbL2e=PWD<3|5
z_%vMnVjO4Q;c@WcyQL^zd{3b_xB^bkAmmexg4(X<stTXbxOc2SB)^8=Z7rryMd6I6
zONMhp;`}rGTlcsE#PApGIKxQ|9}~u%ee;Oo-ih}Fv>zz5@aK>0N}LwJQ+UKZ!d!Gb
z^Kq&5ru}28IL0#kSaO1M=Tx6OojzClj8M;4;N0<Q+G7$QUz|OE?AG-T@^j4B4OvKD
zDkmOKy~)RtUl0FEW_wC7*q*uYrLQc<D$Vv2z4ydvJ~Z*6)5dSSx!RR@NTZ)5Cv>*q
z<q_#TRg2@DX3rxReGb2gOwgGovB$v==SqJ3_TI)>eEs_`)wx2SKF=;zKc*Jcbq}ph
zqCJIBr|S0gsD3VRB4vET`smT|6#=T)r>!+%V<B6lW#m?$kYguTJojk5Z(n2TAr~Ps
z|LA-}=)Taqa3zHvX#rV*6R}B_ABCA;zL(5nF8N@@`8@nql$7>uZAEjY7$(P4jHi#v
zo`1#gIq0jD-+3EZmqfh|p$_&An+^(P{+Y+Ga$aS>daWF%obyU4RVh{5B&CBh339vj
zjo9n1&y){N+Ma(a6LSl$Fm&7UoG!D|YjG8x%W7vhZqr?Oo%2xndTL5k@<2k_T{Sg6
zs3R;$jq-9UEK_MH!Chhd-Kq?QJcSlTXv0m4=M7vT3#^w^EfHn!lv6`hbxpZ=gG&N5
z<wT`CB=+NOt2ihbAXZhUQq@xV(%^SJQl-^$mGsl()gVd`SU=M7R)%79x@BsM^3?lQ
zC{kHG(No1k;&C#kG>bBejDBjZLf{&WXvBGWxn>;s7{iy7l+7v;!6s*$18U=KOh`<$
zOj1k+HE*&(*b3B=)l$1HGe|Q$E#xhX76=xo`=EU?`K9*P=;i1O6bf2wqdkQ9?ZpDG
z(S4x%z?l8u=KeDX%g|iV7j)OeT<A34G#s^`D1#ztYwXZUMI}1vGNKB*g1cigE3Y5s
zc@E2Fyo_NfW|5O#q;;vo;mzWG#u;vqRTDROud1#7TD{VPqL$@1_hVlMy+qzkZZ}QP
zmoBg6(YMm~UNIdXuWg;_v%e@B3BPWYuo-zaoFG#64qv2wxI14yS0FzdkHxieD``&s
z@T9jHd}h3J{QP_;`C~2MMxJ$#%s*t+XQ`XnbdQxMg&4$7FgIRoWPs>E3ZUN5=q^#S
zwESQ+&m7I18Q)FXr}ClAu91xe?=$=@o?3(~ToC@!(xq^(a46I6%>6gJmq*`6+~r?@
z&cEH*-f-ABNpDa0k&c)?f({<U#~3Z*Ix(G_P$^g<U?^<qP$n#Gcc(u5BPW|aT)0=r
zCOR!DGcYOW1f8&z`C1RG#j(YG>O@PSwlVuCyT7)kPDM#cv7gSIj@w|<pz=W4pe0w{
z*}DNr0d|W7PU4#`i!uY^u0^h2UFO%e8as!$8|_kzS_Kd`dp25iq8nW3F=va7+0|sX
z6lbf|#Z}Ig(XB6ABCEV>6Wf++9<_P*4Y`~+b$0}Jp6`?HlpDEZO=c-<=^tD_xNty^
z8-crm8-jZhPaeM$e-3Z=vGyZF!f2w41inX*$16VbJ+SM|pFbcgd?9$w;vMU|>~|sW
zf<Kyns=m)QYV0h#mA33gPD%dpS+XqI=OAqg7x4yGQ_U}<#(MUu_R#gta8Ax#{g&De
zy_D+301v7uszQ-P%QA<u^A)_;+3xGzu=u!p^RPOnI&&qVu9e!0^OpFn7hj_8+*Xhs
z3Kh9_?%v0UV}qx4E>bF%aeg-K2)H$MX-=^v#@*hp?@Y^^{VPx2RKJP7qATz+il#+^
zRXATLKell!bE0BodsS`CbsN6Nb&$)0%*)A(u}7{$XUeI?s3)jB@IvF1msz<5?DQvh
z)9ce0G#>Xp?>!-STkwjY?~{m+SMT?$CZuvFJNnJvtclMwVd(6>Ge_m?S2)33{Mt5h
zEpH^)y|h|$)&J}L3MWtKf$z+*&dt`g@N-e^Vx5kYuimy2<B@nq`m~O<m_}Q0xqX=T
zwF|f7Yn7TzZn20q;zJt4H}~SUg(;ONAFyqew`FIgFE4zo_F<EHDz)a%aCmB6PAA(m
zyJ%~YxmAjmYTTdDUpO>CC&%)0;VJlMALY7^)ea81VmXdIp-GgV?$NL$wmJ3168BHU
zW5lC=v=9%q#mk+SA(!3CSlyxHraQB*x~x!lH0&w{irvRG8#F)nW%_0LO*%}@sip5K
zzrE{H-hfPd*B9q{3%!djL&NyCEZ+|s7iHUzr^r8&FXTIFVVRzter5Q^z^lrMneHbU
zMqc`RYNhgCS~CU6exLqQ$Le`wgaaaD&?L*wg<kVg>f9i&4QgWWpfqxB0Yu|8&znVD
zxASf>OY*&L`nuB<-TMLi=%ZIjRT&fMubSMXG2+|cH{fyLy~OLpKrHyBZ>-mLIqTEt
zg3O&DRr)TmlD)GN>h0Md#tG|=Dm+tIkG><mUF2|kZct@hQOC^Ra!!EWb!Xsh6?aMt
zS1W_~l*3}ZOYoX6`!#kR9sBa0p2ZH&Wv8&=la57>JaF66FYdkjwsKQNE&4F`%&4+i
zFLV>{M!r9aCDUo>S*6X~WObxYp>v_DrO~jb!==NPsnWG2^2Lw=Sf%bk)yVZ`Mm{ed
z`(j6WkLkm#{k!uCrNPC)x+0%FR+CDnub@q~xyOz2MHNJ4T=CsF#ABDsc73L6rYgq7
zc@NS(P4-u-bSu2_H-nd!JTG|iZn@6Tmh<-y3sqXsKcjJ8W}h0I+PuKHE;heazvaB4
zGL~W#o8ooOBX!GkEq_95y3p;h;N!>4LE$XB6jk@6Iy-Qj%jt1UPvP)5izVE75qtQ6
z@MWxq0ny&2tKuvTeh?h=@}X$S$%DZGf_trM&U=q`Oi$xJi^n6IB5ljo_nm9gkax^{
zL&+?1)ZFhNBH}gif&9x@L9b`KQ<SpM=nt(6l{mY>?uLpcZPtKN_!+3*GL)CcVFizg
za0qZ|a0tO8T<|M~OZ(SjY23>=_`f}mhlAs5f<y4fdlbN5%+Cw(i}}p&fAL>E#W@Q8
zcMAME$Kn0??qlw8_<ue=0tc_*Tvw62bqoAe(YG})u(UI_vZul|wggWcvzFDc!@;4v
zfceF}rF?!Fe1DILs=B?p{2d{ED+~7f53KYI*qtpf=YWGF;w%InS{T^hXL7bMx3m*-
z7G?hJ9YWwS=4lRQrr+LTZzjsDF0aHSX=Q7`#0T8z8nYNN6BCn&?E^z0<(txfd>s5w
zl-bzc-dc!*!^z2s-HDss%GQX3Q&3Ql;~EzS7Z)3N2b-OXrTu+pHcLB}-@oL~@40DU
zr*CUwZEs>_$%Ogd`+8Om_M*(pm;?Rw^ZPmtoK3zR$<pqR(*h^Rfq8|4ll>aUU*8Qr
zDuQ`dNXf+6z+B^|i3PA3IEEM}ANMtp-#+leOWzLp-AC2GeUyubOW^yDe)kggN1=8G
zwvtvB;Gp(m-!|-z4}bsSA0HIqz?}Pcw)ox9zdZ{qEk-QD@z<`25ic@3T>=|PXL3_f
z75oKmhWWwg0RLS6{V#Zo>#=9>?8yucjs(uFo7YvHapwn*m4)gL9bNW9%%77xUuv$A
z^CYJwJp64v(}QF8mtPHC4__huRDE{$Y-10Ks@L#~+2^4b>E{B{HJDZ7^KO;vD7M(d
ztcj$wq^^lf4Y)70(oIzyY&wb#&k3c7#d_N5${V)!ew2QQgNsK%M8+iX9}fcIanGw@
z35+cC=l|=izki*Cg2YJU&Hwz~Ki(`&Kv2QOrBrh3KbG<5QoT9se)64o)Z}Da4fPT;
z7k}<V!9tpT@|{EvZc2<mSn!h~|6JC8n38OZ>*!C|8uM*0$Z@@q0mt81p87cx0%Ozt
z+&dpi{4zGy2LCcPc1-{BZ0wZyD~(_!xnF4nyLA3aBfruJwgrBr5o`<mN+Vbn_?1S!
zS-@NR$~C*mrnAD1=8JQkisFImyao+#)IGN+5-j=)LUc2<3N7F4TOE(45%+X2J3QE3
z8FE{yxfJVu>a%GeZ+E>%Mot53d%8w$ZJWHuk9hd~<A+r+`mCtiSmc2R?yY56d_in?
zKKIDN$SXN^Y8eKX2XMxxr1Uhc;)m|=j5mQp<!&;Od=|tEcU8n)H|D7knI)zlwogFA
zuid{f;I>rGX+2WqJfB_1>{BsWn6!)n3!|UOwV&y@5W!=@RViocP<Am_Q%8FtF_cZK
zmaFmV>s?KY{4TvuC-~C~J=HwEyuX_;Hs;R}WEA6ak!pQsXAX&AXz8~aDs#!R8Wy8E
z*s0<0BtJ@Tuf%>c%<0t;EDS60P$J<p*_NrjWV|GngGuNR!rd0~r+c#XbvO{4Ybw^W
zpH7N5m0A^A4(ipvIOmeD%%{A(@DQK$vhz*(ec92t@nG?akv&@~I57OyC&mM^lj3{U
z?z;<xBzs%y-DxaA0?Z0x+h2{l(z@`5FX^bi?!7U%zU!~Dg)l>9+H8&b%4{wUQVH#$
z*E<#KT&AL_HV2(_n_@)Wyi3A|Ho6jIf{T_vlGt6(A}o3C|05&1&Caw8wbeU`y@9vc
zmS+*j+KZ9q%CmJ%n+G*`x--qUu2idLE=)#R1>v6*$r`cYMUTh(v-^4O?@&bPW`6wo
z`Sp0P*Pi~~f|Zy5QpIlO5JPFVrdiEIm@dPN9mllV>)uC0$mF?hU9bJ3*))VLZdOD~
zYU)9U97l~!9X-{jrza3IZ-h`(y6!6iz&{eKhASK@x(Tst3|u&gifoIuJo^%M!n;68
zwzY#RAm@z)e#%9qu)75ow1tB%D+K;qmyd$e`m>i9M=xn-R>5Y~{VCaOt-6Iadi2c(
zTaPk0Gi~%5#cqMqGxd<yQJ5dD^onv@EOscaF8*-^24r9b_&GAzD@dXMEBL?<@kaVG
zxL)2Euxkw$a+p0C9oq_bUHhWv&qg_37aXjSYuG$B&q%tI*Qvm1+!iN`LK#4eMiBA3
zylkH}Y|6%V^@Bx2wQOxCo8D3+xp*pi7p`x=*3k)dzhkAl+ZlVn)9NNHixpX9OiFOT
zWLI6?Xs{es3D5g(g7m~kNr!Uun=2|~k57tSE=o7)Om2TC>HBDBj~J4Xr<};7(3O$g
z*5(JvJln`#YTl(XXCPkwGSzK!k#I9DmGMM&qCT;162FRktXO$mER-Z#*eM7^Y8|QY
zJQ*TA=l0d|^^W3CCNp)}`gOIXOBJ;EM@a+vElb%h-U(MicRbq9)G4jq-yCL?V=sR2
zAv=PY_idNsymHp;TB~@h)o_}aIAJc_eWgaWQKUOBskLOKmbNaOQ{PnRLbtYEv%q}T
zCq3ZX!Hx@kHJFeV=6{<aDDJvGEo((2CwD73_HjhPdhYUkZfh;2migVzE3<#IYrsEP
zxr^$^vD`gSW@IKMHs}mNT1CF)pisr$YUAt#p(*MN4XaaITK2mB*79if{ziXY;jpJx
z!W4ts{7bFdSq1M(sksfSqn(DR5BG-;>4Doy1#S+4ARK=w!|?o~e8d-Lg)V%w0iVn2
zSM7RM!VJrz(WgaonWdB4p&1}VYw4W^?&d>Wb}=bX+XLO@lJ8e*+sH;$IOJ-USb>_f
z3!>#`XRp{AEBg9Mjv1ZX)19i4;(V|@#o|+G{^RML;9y$rTGo#{iuIBN{xKqVsW;rt
zp`lg-=DE$x<u5aOLWe3s9?1YNHO~IDmXTCwLhrdXT0B8(CL}mm2_lb|z6F7TjX;)?
z?PxeE_iWdfhQ%4cTV{Q27tW;LmNr9{JAPO>({S7B)h>t`;-Dn{IMJQygzW9fNGiHW
z-E%sH;7q0E<!&bPqmq(1EwB~^!uVg;8^XVgE*;)yr@98=8@Q6rV?W(CpzKo<gh~p<
z(+PvSnzYM-ea?2C2!Z)MO-<8PN~?moD!6{?RnxKvA@PEGS%UqmE<M<pvy<bcES-FQ
zDW;yyOJiQ$!W5=$BSmM|#3`k?4^kB^4DMpkuJ8Bb`X)dyvmy&qesC9Ai4nx)y}KSK
zRWwgo&t((_fq<qMD1jo>R@x*_PK3H%6)3{-Sr34hyLJ^_9Sag(JR2;2u;sp_-2fK3
zRsWJj-=D4ES>AaAG#i`QcB|JxuB%{>*v@qP2SkBIe=W#f%GRDbBuDA2CH9sM53D{I
zG;&nBC-T4V{V|2pz`<%p>m*KMz2tr56j;9Qh}q%(W^lh%W%;zMu3a;4dx~Ogd>v(8
z_zhK1hz-hiW)V_WD<4TXnw(}nDIcv+t|Jxv`l1L#AIVNppDChdx;=qpcCgeb_`~C)
zR9`khu`OC_5hj^+U^`C3bJ=m&V>3Q15cp{*_`3MIGsmz~B0<9?P-Dc*6q&IQJIz}F
z5nS+eV?B3F5^+>CYqNGjLwV`M+?{z$Iudu@`B;i?e|`OOXcB5JcO8)@GFDb#OHjdH
zH1bGbrTR2UNp|%qx$4AQuZd@)g&h3IS)|W=X%(D`5}0_SLJU@yXvAJRpTh|-&LcSx
znl-Ymh1eTOK*W;{*6?Y^O$aMxwmrnd)15fx9&J-gy^<onpL#D*rGD<MQOxs}XkjWk
z1oB2}jHrLbHY*#;cDoFt6o4zH?y7=#x+lf9?#&nUTT1O-^VnH8?#(e6$`0fo5-2?L
z0B)o3TYbDcK0DQwTt4X7U!$TZ3bmds-yEV9bzQ&w9`b|jh~B`!?v@S3V9Uom%$QtH
zP=U#f&R~a0q{RsR0f^F6LbiUiYZZm*$Ia(7vis`?2`1Opg|I9`JL&F3M~JxCUIqEY
zWq+<aL+dy-*Lh?p4ZW~q00^in5&Twg>-0<?ry-Y13|@OH;i7JiLrKC*Wp?T18+|Uk
zCw|aey;n(y+(U*3PGfDlw=}boN>8%GT=xnn{@rpMjpQ_G@-nn(jQX_!&p`&KSQvxC
zbSPx#Y;Lyeqc!H+-bt3ByR`@l2dpah$AI}D?_p;e?mGg+R{hSGL35?PVTnyM?-|Iq
zss#|oo%Na1C><vqDek4+TbH8Gn3&WVHb+c!@nVGl*i=t#u#w`KE9)j$uXzg*fwdo;
z&~+NHkx9G<1(68d0sugY7!~3wyw=1$4iFWG^Bv;&Bm;W0z6SEGSqvfo#v#;Ex2d7f
zpYO>gL`PbdTyfo8)KNlSHqWl1v{}eUNo(%3WA7Fj)AINkfDCMc5Q<V*b^FT=u*2)#
z$nhXy_Jf1XN~!?qsg`I=$st55pNVmuO_gnwq3o9qW}ru86li8k-kk+W<T1)H(t^^e
zY(@IRTH%0gqio|{wc%uiXc`Ez%=S){K=#hN21m{Al^*@zK#$Eq{sIun#(jAv6P&_7
zCc0jLry35<?hs(TRE0+jY$Ueam2ZZUZRsim&U6tIRI?MW@-8XH(8+O>+c!n<B1R5C
zTCiFNnIbNRIEmDKADl)j+Xwae(MwU*H=?`_wrfah&?sF-uu;9aMpUBk{GMd_$R3Nr
z!LhK)Jp)kNN!vl<OY>bgja8Znh#n{b>HqQL<VLK;N$eBxJQy*HcIbZ25-b8{%4B^>
z>Ei)hSu2C3ev;!A6+tg#6b`WpxHt2nzTS`{JbGJI-D@8`-poIIwP^QG|GO?qaoc30
z3jgHS+IfIlAUpGiyOoEHK6!o&Q7UDt4UqBagy75>>p=86U#>+#@5O>4*SQ|Yev1T^
z6a@_bp=ou(+EEb2aVwzY@>X!RvtR{H+#S3V)LUt-!9}fNyVX_R_){*8%1^^4(Ie|m
zQ&-08sG?TuS%RxuJvVY6xuz1&u)#1H<-@|a%$BM|oC`|Ug^tMb#md8j_|`i$`Q4fj
z=|CFZVW#e+FgT6J?ncInSiWFZ6IWw3*soKiizR0EbKPu(+7B!;?MSh59pE#cFsqF!
zzl3f56IVg%?z=$WdCCuj-{q1wfHhTwyOpXICrcpV<-bL$^8hHVXQ!f_;J}oo#O@14
z<3ZWOp1Ty~$*wC1WQ89%0Vd$R(Ypt=v{p?{S8BnNLZsq*%d>^7eHBwNZhBP@aaRBf
zpb&+l^jybLn2;8A+Xw(I(v7yA!d?^s(VOcaWRBM}T*opzj{-=F9Y-pX6=NAN1@le>
zPwG9qfU}&;e1rC#7<<alJE-oBGag<p@`x4lU}odcFh^WjhRzS{r0uVURR&yte56az
z99?}{v6_%do6O`&J@8i!d#8?5;VIFMn(RFnxANPfEU_vX5!rhfII`=6JvY{NGs%JU
zlXSveK3a$u16Sfz#y}hdpWy3>o6c4~qmngOGM5P{LW3GYAExW+0!-LIzjd3zM4`ZP
zu*BHQR0YEY^_NFJybNuk6S6nH6C+a07uMz6;yRZRR;sIM*zseoX%hggHJUnsX+o@>
zZo|PipC%@I6;FgB!W0K9y$+`bOE*Q9peoKYe~`K6wLTxgqUm`5+B}m^V#}BFg`iYn
zj90ncY&8bzKU<N9tACe<Q8ZLZ(I=wGFQ)884TGp_HK=oE<=FDS+!&hiZL9=%*?F;e
zY7CrO;QdceYjRt~Xir~K*c*MKxtNp%EAjN9P<uZ?p}_KkF)t^9qI|_+F!GyP@!N?Q
zf59xD)uAm-$X$LkNqI9tr__<tuqk|Y@cHgyDMyLZ5?qM`df%7cvA%pIZD*eYlsbWf
zt#L${-NJ9{d{_g@ciokgs8eq9*)jlTJiuL2E^X{k4n71ZOb_JTl_jr3;myfRoAKJN
z*)fnRL^^U_%3aOAnbG2O3d@`zaAts|ZyU|EVCN<l3`1Rb6=br-4?GV8Qi%;eg6cw_
z660!my<F92oiG2`>5FFu4*sOmAan8(R&>1b$Izp8GgDSRv+k>fs^fs`oW4SUN|Fof
zu`FF^C`mU!SAc}>R6K$b-5hxBw%c3B=%u~?rpys&3}fC$87{kAV_SMhgS8@HN<!d@
zgy)H|UZ_%(`n~=PIt_~+IYFa(Eor0A42hvB#Is440~kbwy4Cdq;|o1kKVQ-Z&dA}K
zo?Hb>9rjbp<G#S&5+%spBC^(W_S!uOOev^MNh)+tDopx2pbGlbh|zh`x7gcy4->UV
zD-J*ap7D4M+uc|g$57P9;~1csVAL8z*lOAyFXfKzf@K4@q@<JciB%p!;N6>UdsoaK
z<FVc@Ly=Ur1h!pW8>laTk5n*~4AtL>jRK_4gABJ*g#t({M5`pLC#!O6%)cGbEIr@Z
z`~r+@uuwP*w{FGw&GP6cl35C$+A~`i95n!TfkFqdSMiY;QVq67tv53`8YK0rLiW=y
zNIpR;bzN2ZfLaABPnXBaQ-G>VgLCH6b323OQf);Thhp4{5c6=w5bBo|u;Cr_9F>Vn
zwX)^wi}HRHFl|my<~5hYPb&zVYY{)(d*mwhj+3wgh(xc|4ML!-C2HFqk5BMDL+N^z
z=jiAP8^wA!?v0-Gx?H4#P|NVsq>Ot1L&PyDae?)`YA@l$UQZ9<Z|k{j7YFn}&Ik1`
z9?iUXq;8^7Cq=1|IHK-B(Xsm2m(kO?y+2880RT>^;3PeLV)tg?7E?TC-RWfq$=3#}
zpPybcMYo^jGgk*HUD9+$Z7?GvMr8O~+VxNBPM|9A9}#b)*Z!PtI}o<wb4Q}=TAr&Y
z2qsNu`>-jz<+{+VWmUcf>NT-t{$j~oZKPENwR{B66#=!o#&^7tUsBGz*xg%eg_OXy
z_r(vl6&LN)C};0a0nZNv;!x>ybVq+}Ux9_zdN?zd9WbGev4qw9F|7EJV8O6N#gRDx
z9{e$dc#{t~t8)MOh0td}bg5~8Ahq|~ZWJDF)rr?)Qs%lC(6?;p0KS$AhMu9-8bJUY
z!KqjM%x@9uDUSZ4!UBL(Y(IeTR6t>2FEUu0Y}N<B4FfeE>0?JP2&D-yb-@~(n}W?9
z0s(Hqn2To}lvv9^tJOXLxAfZLuA^y~Z+j=vJNBHTQb-d+E7#8h5i%%`4)Cj5%I{Dv
z+_HFL4CJh4pIKs3_Ism1tbLBr`n1adXRO;@s?=42{eafUF2{o8y`3)l96QP%U>FsN
zJ7zzG9Ou>7*>p`b=^T6RV=!}f<MCTo$LIR;%>rooGpF}E`m;K;vL_393#w26ahwH}
zQ>xw4X}axPjLYO(b;Gv?ehf%C&hnQQfFU%MyJYM*09Yyq2u-4^26ov1Xe<C6+n{>m
zGFEktvjMm#gkw2G`XIZG!9VtJZ;I1xV?O@$C<Zo@aFkus%rm~b?rh=5A`=v_I9Q50
zd$r#bk)hX<bwA%VQeB_KJ*l;_MQE-Xuq2f?%;oJ@eX=zR%r!6{;Wlh~1_Xc_;9WZF
z>lo3SRAA)c-!o(ukc1eF6whG=Ce1qxL&n!NAx>068HDMmjhpw_cgnN)_;t`G&*n5o
z@It64g%|QV^#CK1;;`sSgJQrACJN%~5LC4KBXgi!2M+H|@^_VL_9}Rn;Nl;-NnFt7
z5Xoh5dvWNb=(<w798A+BN&AK6eLMpxtMtE76)%C*AY>zmowm0)F{%;<R_<&pXf1sz
z0R*NND4c56!&VjB7dJzZqn~^MRla;Rr=Hnn`NL6}rJ?c!xiEH$iTL453Q=b<`XG?@
zSvy|&@><A8@-?$jyVRfa{bGOndO7Ej3gt_im^uWUmv~1j|HYmn$Z$OlC>=zoKL+>i
z+Voo#)?grqRXJL%-A;K02-ZPBZ7-`GC&i8h02yU4(a;ie11lBt+`tr{6Ai~MMUhR#
zdU>`V1qe&f6OGWV0i@KQ0jY^8DVVw!3{}&(payrDPzPedI6yQ0ocguZAi;-eeY6Gv
z3gx)cW7VCmp$#~OP22UO6m*fWciBfOI;NI@UkAzwtL@)e=r0Tgr@)V?sOmuQ!vL6<
zWwk-{k-N98L?U5ZnPYGu8$375EKZJ}5&kg=;e#^9%TytQjfqb{fCmX34j*h3+5oW8
zz4b89eFW%~<iR4VvVl`y>bxhLBkKSiq5;4<eQklpYy{%*LQ#C5XEQmEf5<tZB^HrM
zwYP(+3<6{U(h-UJU-iWhQcq8B<oZ`Y(Z3GiZA^(D3S#QiQgpBTTJtzim7lQzp6b0g
z7>=(ygO3r6fN-33mdCga&!b{n=L-;Ww)U627(q|QF!93Q$%TQEU<+*m9>cMB_XHE>
z<k$NMDp+I0fFeBIUuacM7#0e5ox#8oIsgzzC{#`7y^+L~7@Qacv^d!ybUw(3jy6E_
zp9&31N4k}q{L$R|KqKf1PAJ7bA~hiMtb=h~7nY&z{upozh}`&M=~57o`HJGT5lb^C
zRDtB?0n}(QpsDufnW!M<CLNU3KpTa-3Dic62LmyfRu3Ti6@V~W<`O7GyGtDAWE=Sz
z+?PLOgX(xjROTgC5(TMj6qF$v&*SS@ehgL^=T|k~n>&sXsX#>*pxUDWy}d*vW|Iro
z^@;5=@;Yu~SoXjspdU=YPUMjEtQ#Svd-hJ1Kqhq_^`RQuo{9|y^@$UMCc*`5^xU^6
z!Z<-+Fsd^Rkx2vG7rCctmXWUqVlR-AO>@tF)ZG><ss5UJ`(bN-jjLc4wqNs{SUd#x
z*YNDGG=gmb?BxF|jr>X@*cSK!nf#i;|G%mA|6{|m-bbjmHu|mfzt+DD**6|6aY!$o
zj0jm!Ubq9~iy;7&8~QnlzrM-_q*Aqj%Ft-6`Z*_nYR3Hq7U@M}ek67f)K6Wf@E#!4
zw8X4VB|I>*;5HB7W=QZb=-H2DmM*55hnc20Ee+Xp%JV~qJnQJ40)U2CmZK9EeNOUe
zHPGJ9A}bGE7tNZ&IYYt<O*${~4Oo8~K!K(Q6~NC{nD*>y5&eIqtKVETAE+Z~8E@8Z
zUtG?O-M<PbNp{0-7|XH)Xc$O?9x5dEts~|6WnRy{EsRn-4!CP`DXNSH({=={P2TKU
z8ndQ3p1S|Lso0tEkEl2U;5x;N%WqUh0O@XKOAr8LzM(EuXc_(aQtZvTAbS0ePmYDO
z-Vt^<;V!ydg>M7sW;I}aF;204Jd~hGj8VPEp0NbIZ=PwvCgXp*f)`*AjEkJ+4ijOL
zCy=G;FtaQuUuD2vv&=z<GBA%oADzMOjm&#6euUPX3`{;bp!>g+3<71{d1od?9}Ev<
zqIng5=1QL}6VPnk&#SBaAhr*<MKlQ<^o`vJJuVxJ%vpKuYFQquedOD6<;#WTu5!1{
zvm(ycA1nYZJq@U<E@n24ug^F^*6JE95aALpoFDc&+<&yDVr*XhjJ!R^pz&{|B`m26
zWA!w!2q;xM``ar1VfWW0x004SF*7|t!(!V>S0=0&avl%t0d9<utfXSK`m0c2$mJ!8
z1GMt9&|Laylecw1BSY-G8(4jbk>Evu;6+%cAilpYFdo27Jo`8L);}*Ili&L{wmHUY
zIZ!04i<!Rx+{&(ElS66*f=;rVYFSxr<Unoh^&P2j>@!Jm9(#Uibukyn+jZQnBBy6g
z=<-^0rYN?$s8#}6RPZwNnR!RzE%~rCY<B%OOCTbv2lB^Tq6?>GtvWRA+ok5*50wcj
z6a**U2y716L3VQ{Bg|@VLo(iIS!TzpM^>RgdxsiBxUO3-fS!-P<Vn%IF*ROKMlkOH
zW^(ul>taElv<C<%KWzn|1wgy9bdh+07nXU!e2lEsL<Fyy+Tt3oMIX=R(@Y?7wqhjP
zhE{EGe86@o%U#iFWfvDPQ;tJmPRSUk>hSX9+7{s@I>20=hei*#V-KGL<8ULBv72ky
zmF732#FYSr%7J{r^|bMEZzQr}hn27Q!RxJ&$0u3EQA?hirC?fvBpyh|4y5kr#X(`q
zl9}WNx1{LT3PM*7fZ8zGHQ6O=g+B+jVd#pMMLtvn4F3I{fd2zG2hP|9sBoIUF)H#m
z-rEJ!jh@<1zVr4J7&@~U5=0j#j(<4HNC*7XZeky(;rq@rNn!C|NQdOi)@(<T+z{7`
znOe}#H<|~56w(?#+ye3=C76b261M0y2nz(=*r_g_zYTlrDy$0HJ!$OmI%?h+=_^!S
z?|GR~H*Cagq{=%ROhT#+mAb4=7H4&X8JV{TwtCtMpa@ZcS%}m!qrO{KC*>N2x*g@@
z<k*4Y&k2Ub%H%2lAy>Ekd|}P>ZyGs&<Bn;6_=2N1h(YDcX5)eUL7=~+>Y3cBAj}4e
zNqU2$4!$l%la!*;;ZeSa(37(wjudh*mCEfN7Iv6D!y+9}FYE=%Qg*vE?ZnTXHzvEM
z-LIU-OCoYlN;<C+J~!&in7Jnc<|0f)KpX%E^KZ}uN1l;~gSM~h1lq)?3knMaQ?A#L
zYLT@Zo-6)k2T5iO&LiF!anH=I*zRX6?imjhUGa8rtE-HqXS|W1XLcU6NnqRLS_S6R
z9A(Ql?7?_OPN?H7&6ew0Gr#^m8r51_hZ&jRC|_5GXYd7Lq*)LWp-!`)jdmIgdK$qO
z>Ux0YF&4a{zkLVW#@>t>5}^OWydQhDb2G2GSN^a$AJN_EJ@8IY#!H}ZtgIN7ct~k(
zw<tVy%&tLP<dRd$lGWt4c+q*`13|(yWM6Aitazo@-hq52?cVly>#$!XVr#c<pi(?3
z*3WO4)TFW@^kt<N7(Xnzow9%_@e)v=Os3InG_orVdn1AWiWY$E5YA_zIbkFSqGbGw
zieiR2Z2k-HG8%LlZn~<dcUgcQ*t1J9&he(HYP)~-y6c5E0pbY8Oo1Rb^vy-q+a$$n
zgD1Fun1=F<!YlUTm-kN3Yu-NkY2a`&Z|aQghXnh3Eeh;8T@P9|dy78~Y^e!N?KXC7
z59`Jp@Orkm*nOmiT5(PFF|yx=HG53Xs~&n*jvXmZNpfoyebc|PC;0B#!DMV~<p4*-
z0P+aN0@2*GryhxWO$sH(l6W2L?8;CAaH1xkgISM+4LIpB_B4Z2?^~--0o2^_e`Imk
zh9_pw4zy2MF*!~H(UWR6GKZ0?G79?A##RS5qaFLMVBkfWrUgwMo!uR6(B^hY83KAa
zMva~5@(eiPjbt2uzk|KERl%@(mdX&0NZzSWo{TGLtD>-|wb-S{n#ayljWtMRE1OwX
zL3B8$bffKx-}o`XZd*3HNbJ;2ujdU!`c3Ak7;J3fQnt2ce|=cN__A0`@YB+;InSo<
zn%z@7r>Q-spkeLayxsi}yGFqUVylBm-3V@qwR6oB;$EJITl>5FOHG2qta!D<u*n1W
zrfsXmsq>ptYCDRit%Cf8bDKzUuf1(8U9ZEf8jg~rSXNq0B0Pg>`8<kO74zOX?jG&F
z`gsLad61v8hM8_=-@~-N)Vk9afn)HRetEAB^zpPI=E6H)K>$X;5c1=d2XmEUpfa`!
zqa(~RpNo0ye8pHtPUzsQOqUV%n4V`79yCn#j+}tnFl%UV#BS?=-GPf$i(qlz=CN0P
z8v}Av42@kKfs+JsX+zwb>1T)=_j6r#WHwl*V8Vs*j4`pyM@%9~;P*E?a{WdJ>}+d4
z^YS9zuEx?1Upt&^N%bpMMY~1tLHU6_4klZUR(gTf?wC={(y(BzUQ4jFkFWS)r{b-U
z%GShWaWA_ymb-}-aIeFx#Bptw!wbM;IDyCLv6>p3evhm|y;c;X#z03*D>`Q>OxyO>
zIuI>$bUacry)pfWI=+7M*G`d`QRK?Q{UwaA{55==0>>!#01|%vpR*sDd<*o%kb1A>
z`JRJum$;lmlhhg+Mh~7aBIH%Ie&DKR7@P&#%4MbIVDw@I42Dm1nRbHN3#Y^jbO=k#
z#P9lS7sB+j*xyyUAF`1MIB5~mKwFY8G9z<wk4gPNe<>+v4l_HDXg~?m^*}dhW=EGC
zslc35k8wias%1WApc+hvy#g+v52iExg%^uhmcC%*eoVu^EaJ!O9R3Dod#8cK3+0^;
z)46vaU-t@6$VU}uVV>$3Nc-@7Rr^X{W<WTEyLFrf%iNMSz?BxOruin|VhsixFey3_
z9nl9)xfNmZw`c-eIDO9wgnj5I&t5`GzU6$(Fe(tX!%<5ZX+1_xP%TR2Ue&{a$ST%8
zTJx;yQJy<96wL#h^8AB&Twl>(@~{>(q$8cmK*`5cRb$YcfqQrct{Y~(WRaPS<}>ff
z>M<;WA3y%*y#OLDR{!UE>vO_ABkLpaN)46noAhBS9s9fMb13BM0T_f)#mpb&L_1<=
z9T-)Rgj>J^!F(eh1|NXQcIhD#%s6BS3@-SyXe$k%E$E3|1FCw>5rlqS&<fbN3Bf^7
zII~f3w+R5I0Q~01T<FpyoMg}#$_W^NX8GHzOE-QT-u%(eykp-ytU@W87SRs-HiH9z
zSvpSV<q?s3?iPWBdsphqPN(r6A*DNu;ec?5jQnmt%&6$kuKf?Bm_vNke8dbBC;(q^
zD8<$V(o$hSc&TFuR4vt<A3#^ja&$Ll*oD;$|H$!Y0KjAbBYGaJ#nyrLUKA`6HV#J2
z{4w$Cl0pZ7gAJ(g|2t9x8^?CXsMKMOQu68X)UujCrnH?6N_$!`6WYL#>;miH+MqhX
z+?14%@GbE8w}~S_f6F9iGIT)^R)otx{+lTI!L||(AYf;i`wS?+&Ot`g0j<T*dXW^-
zjUJW*FncuF<;VLcc~~ROH}B;FvrGQ!rm0D*vp)#||6pba2}*ojfC2Qhtv?VFDqN}W
z0z+P4NMuy^a0fPDJQag!ar-GR?jk{#6GIb!_7wg~wDo!;W<Zw!jV-|2I`L94P|Al2
zeio8hq&JbhP7OLv>zFGR^uXmSA?Uo$G$_Iu^pO%IeMwy$e#Q}T@oK;rj9=>e5P2ql
zZ{(BLL*1Awnc@{d$Geejx2M8m7jxZO85j=&J=lQwse=g)uje557f*9a{gf>62XzR8
zG9keCe*rV`MWAX@_L!9}6!-%%L_}Gl>Bqr3Y}k)!y77U0n19z+;WI%F-PY~P7`6ct
z00-ux7qbK~By+y7dK@#**c{1^L_7Qx(+BNL15%#%64_O#yE61(;>Skn^KKWoAnz>K
z1KA0?jXy^4A^rJ-+hCk!{K;vBXPC>4Fdjq0AqOTsOj4r9z(8dnn5jnAuKnEYDwJM>
z<RZ^COF$`cZ=lEqR12%G6=HzOse{R8Jy5j#!N@Fc$EhH&5ctE=k|#go_f^`s-W=TC
zKl4`bo?ln~V-@=UkBOH*q_uz8*I)Ovz=k)!Jo}etV_5(@c7ApDu*>VOH1aErU|Rqy
zh<+8wUugu}0>9D-wgs?*=ocFKl}4~EfE`5tzeOWkUK@*I-8uW2B!@yr%eZ|WJqt|S
zBD?wsFXOIi9<w+D&pI9RnMb$G65iTLDzTh9*rR6g+LGX@_6`41sl>9<a!)pu=Vq04
z(5tos)==MzJW9{VB;4Z;PQ4S!yJy5MG_~on<+<7C*1E8eRA^+_GLM=qcgsNQ45Rm3
zT$&@wsW)~#EAVttU-=t2USk{eAL0^EH|A%VmG)_%BblwzPK5pZ{X1KKav21f&w>2M
zyB)aN_%rhepj-2>D$EF5v;Fakkl&v{fjfB4MeIp$8~oJ#3X=r5&nEXj7!2EYz0H`J
zmL23h6ZW{Uu0;PQyD}|LQ2*0PvA(^E7j#=nE>_<4e2ewUKiQRRYxOvG%Ki4y4FZLw
z0WEfpunFT%hD%yTy=9%JE``a+w2yn*3;VgD5Ox9PpV?Y-trW;aWe!VJIr=TuhRS=>
zhJ_?ad-TOp*J5J_)xGJyb?*eMfluIj;p}aFJIMd-p;kS{ODSYo=8X(ymFIn>nalY}
zv#vb_AD+8L(>CiFW;6O8@16(zK2l(QU*Q2nb1J-e=^1R$;9z2`yE%UOyGte{_B@ym
zNM1J&!yf+g2@+kEJ6f>=d6V9s&)7UGpjKDPhhh$QaG@>-7h+~^@ZQjBXXDl?Ho!?Q
zq{}a~oLdu#ff){q3^#Eu3g&G?xBB*Kg}?31zwgvMNz_dr<bYvP2C-3~1ng;sAray+
ztgfA#S>tVkP!ME$xUY2?M*1quumqw|XBy;UV6M{(5uPL~4}CCv?_f-e{pySWBc<Cn
z(ePh`v8tkKX2FQXouZM~Fb|>(i$p6MT0^*XDB2?0kwJ#dmwVDXLvS<8ypBV?Gtm_>
z`8VL>x4Tx;j;+=2bsp|@2iC5BcSEdy51%X!C5j5>_s~Y}AR%4qg}x!7_aQG6Gn*aG
zT+d)#<o&4Sa{ow8_X|{jlfjFtK{7K)^Nf4ye9gC-Dpz2mW@2YHzIns!(v>wNjllnP
zivQ!i%1rpWPz#8P*4ya@i^<$EL<p%@&8Yy2hFMZ{pd5pt7Uh&s&|H{)ah*BR8g;RS
z;)Gq}itU!oTKarG1sx*hs7NKT?wUsSrN1Kf-}g;~$@-L>keVq(QZr<>fdFP$%=HG^
zixdiyh0U+=7MAs~IcRdYT<CM-&)hgGc-yCJEYHs)Fmlzjur6wn<FS8AERB%kw-Y3~
zc|nh&LgdJI&-`Ei^tSNse7_9UN<s>S=RY?IEI4l4oB=bMr_t#h!p&ODrrhwGv+J!Z
z%CCI)*)85H=4(XIzT(9S*TySjnwzu93|8ms4GRUz19WdXdYzgQ7cUj`R2Kf`G(fEu
z!T;xji+XqVI|)AsDNfUj$WBnr)q?Fbr0KG}3-Lyj36w%35N_xYQtye0d2r(>=%({$
zS(w^dLles2xZ9c03}P1pqcx7HLV}91TUp<t{xT3&PvZSkev`PTIC7rV-QuHq$k)CG
zTd85{;aRza+o-cF2#<+ojpqnfujcHj?H*X{bmVB}EZx2#8<O*8@as$2H5U2kd^GP;
z8teV>o!0_~HNAl=oxR_V3&iPvOP%w_qK1m8AXf~lSwE>#X9@**Ejh896|_!JBVg-@
zRc-3th6)$mip+jTC^BuDw>JGU>bQ58sA%Xm%u7xvv-Rb7H%$2VP>iWEQQeB*%_~tx
z*+j{ZL1-XL6#I}Wic2zeHumC@n$*jFq%2!q*E4MtYbsZhMOv9U7x^CFFMXHADdY2j
z)a`&oPH0i=TFYNiL8=KVRGioD{_;QVWR=~qudjkAdKzA!rj@*r%^I+!K7zbjAwP`}
zNPjn5aG};t*&^be`bH(A4i{TNOB>7Arb$H~J8)vbZ~i(l@5f+TNN7t2=l?|DU&{aw
zvN)`5cNaCAA(Gl}>->z>p60UVP*{YO+v?{zY?%eUoa*hkn;}xF>ttBhNaWyf-u#4T
zvxGYIq(7%wtdmR_>k&hcx@TyUW_`XL!h26x@}T+OZbP0$E>QfMqJqnhLd)ZIj;;rS
zL)94~1ocLeucpoFxCkcb2ud6&oZL1bs7=eJ+0m1#%2C|#bIb91`g!&$e0K6vjIIyT
zm9Fet<o&mgetiqBAzV#E*r02QkL$Y^pQ27bZs%|l(P(X;3dz-A*{3*{A%Z*$3p+YD
zH+WXE?)sBTF7i;{$SU<Hqp-3E2VI9BUj=Pn`|fIU(83Hm`p*rAtV`CeyF*vgDc+_B
z+?!AiqVDkfblezvc-L?4eTPPS$MvjvJA1mid<K19rzE6UIx5HkZQTwwySKar0jmr@
zG!_(hW7YWn>bNK1h;{!AgR(BQc_UvgHoS)!mNUQXx{f4YXqeTeUThdZ5qYEO6Y$i7
z-gZ@`nWD;0?dJ*}EO5HTp6Ctj-=NFgeAnt_<Ctqx_8nsSkLV+b8YqTc&+Hwt$sli_
zd7eRRSR?0CA>D7lj_=|^S7<N|DUcu5B+F@mFomMxB?*i9JXV9ojTe&`+}_sB_pAk3
zYU$Vd7>hjFZe^QMZ}q17mgBC(eGHrWx6{nh=*aG~<E$exw>Sl3v4dX8%9z!4V8BhE
z4cq)$@U(JAq@U}!V-(BeEepH(d_~`~+OEg3z-3nAdcLI>B9PXCasMr?ecdQGo*_Uv
z5<yA>wNZw#E6B8luIzN0)G_$mF`*z+9h*<u$87_hr3%xI@I89a&L)Jej8qVPy&Wb6
z+%x@ua?f>)dom%++@uW6(j&v$Gc`&fueG7uW&@EQ(wm%{lg<ifhLjst7BA7V*7FKk
zY&L&^1>Qe6cWF6)(^&{ir~Dp&KZVesqli=5xeeXNT@8#OVJ2m|_Y;XdniF9sayR@*
z8{6B}i#Rt{lJ4+9&mrA2VyI^uYTJc^`Q#n4^$i5=e49wE4*YXzZ|Q$`I}IjGS^Afn
z2+9(;sh@8;S5U}(1l-9+r3SGEbC^1A3sr(>G7pFJrV~3fUqdAesl#d8k@+<0u*0+t
zxLF^>6*>iWZUhob{o6RwZvpQO?ya|a{#)#SLvX)a0l;3%;lR*mE>ObEyZ0fl;TqZy
zJdMcZ?hTLj&C5GYZKyKUml^J<X7r;G9;Q(V40(`;fyWG)n)dGEE*O(<MJT=&{mU)9
zw{~UI1;q~)KKx7forxglBaEY^h<>CYtJ@}Tn31ATEv@O10d7{>e7=K`*Rj)7bFuJ>
z>@=Sm7f;F8g%0^h_vT#Z=Idh-+bW6sgcY;bSH5B2IE*og{;4wHc#C?!CUSS!OOw=~
z81Q?l`G|8(J^HI(sZerAIaeGni?*Qlka(d>M&IheF8zF3T+dkuvWv(?l$38OL6nx%
z;b1M7_H{?hU-`P~18C>d@3G+gW5o4P)mNbggq-uA7V|ZV7+!-2OPM0Z;VjkA`x(!i
z9g2~+t)NgO`Dnx_lGM7{X6mX<%#|z=`u;L5+LJcmQr&Hyo^Nx`^@n8DuYzLmzlA&2
znN|e><XdVp)Of4~b^N{sY^Z^j&!N1QD`4=HoS5OT=9tY;G;UT|gg)IA6;Gq>z-T5f
zPQlai*^+IU=Dfy<y?I01{_jfMe}{P<t9!Em3?6W;+jiheW>3aUMp;4m#HnEFxmh<O
z-b_DNsd_V&b%@nx9CcD!t)~^MnNj0hG^K-k^iQ=yL22*q9Q{`PB*1vA3+Z>q$MQ}$
zyi3qg2rWxR$h&k|{XiX~aziD=M-BR}h9HVfzk9HSxs5VTp(&=jV`fLfYwLLp8H`R{
zMYvi__Goi@bwrDMB!qvLSD0RSm1iX;{9CeQv38vrGTg4MxR#)9Ru~;BP~6nktvn}C
zTRzuCezf1u+F>|~iRB*kM%tqO^g@A))c`pRl{;SosG{QP-W02QV9KYUzntaYiNx|l
z3HF(A@3cN?hs*{o3mWcpibjs0hp727+RqJ2YU};P_7iC8FPd{Mfi5{#fp2#gjSk3&
z4*j+GFTMIM<Q`Kgjn0iQ;^JU=#$T(bs*pHZAv74`P--Y^woq7L9>g4OPu<AZ-4Os?
zFndzKTpsc^dqC)Gh>W1A757%xIplGGdwWrPGXBK6<M*?_WiB!>Q^D{rz8F(=OLeOu
zLsGrG^^4k(60QPXNp<=eq0EBYKJ;}V+OalID{Ws0Wc05J6x2UjxH=k6pOXVygixt%
zI!!n3rRH3v3bk*NsrxqI@!!GS+21PNHtm@w{m@J}u1hfe_Bn$=B=1gFd%DZeE-7SU
zU;jNZN5&>Pkz$nwmKLfmA1*T)doWdKIloUT_*WrdvMvIr=DX{*bm<#Y`m;*>;ge#|
zB%oV<acXLF4a;iMg*j=cjqd9i0p};WiWmU%YoVy?qM=$9DdhdiJFY{0-hce=4q|cI
zdv?+0^|miyVd$IA-!5HI!dQ~vpQAcP;z&hWpIU7(*RzoRCL<}L)K9wPYYmOudC=61
z=`ah8s5ya5-Jl5uyMWoS=#4^N<?h#k4REaw*(Ku-rvT49d~2idXNt0}$DG_=>5|bu
zo?MK|$oWwF^e<{p^H{q=&ZP(F8`vUpbR7oJm8*Z!zG%EgJ-CaR?gHneMgc4vX`=$q
zAW~1RZPGnLr6|s8KmT1E|EJWF@DXMaQqkuRDBo@Cz+G2|+RQH@3-ma13}*L{MPgwd
z0i3SIb!C@!rfz8?mSI7~+z+=~TP;aV{zrC)Xi$SrrX^8xPvl$b%pk(xO)u1r<gq63
z0(xgw$e<2q)ZrfM8lQU69&V)34B^%d<D9>HNBeG8+2_5+Z8mSDyXtqCQW=O}c36J#
z-C6gX7I;pq8#35{qjsTKC_Q9Qs3^a}P!2jnqpvD^b;;igv5KN|iwX81EY}M^?gX8p
zaifQnbR|yeh60SB(*D1d8G&*;jz@6nVsNylr9tDvyU~pi3wAEGvlRPjA~3Z{EJCOH
z|BBExWU2Y&u?9HMO_5h?`rDxMwV$pCxDK`gYnH`-BJy;2VK$($f~33F%3YsV+I+9a
z@^TZ9RsahqJsI%V-PgPy12iHuJ?tqi@)u^nda$gGSDdI%Sitp_S4efl%&xnuln*!K
z&LabYtN8b*clyi|JeJBH-+4>cc(6I%Edn>1Cbu<QOZixQfu3LS#dpc+zgX($g;FzK
z<Z%N721vXHbxRr%EKg^nA+S9Z)eTg>qy$YD$I8W~pfK{;Sy}a?AB5=m;0agi(pmLf
zoMKQTla<ap(y6ZBB3gp!u;qnU?srN6RxKkSGAB@2eBgsj@BK$2N6|wg`So`qCsZv*
z8k6v~T_8xu=ksjCbPly!5XC~f<K;EPa!BPc-?!Ys2Drzq9}e~x;seZI<g_<Z)&?49
zs*PZRJ;@(y3<=V#LAN}DUNwJGAiV=&w>Mj#S-s~`POBdv@N5}&@#CuX>9dV1`W1gt
zIXA`kDt{i)TN1O0Uxz)!f4mXOA4O+53J9A^f!AP$Ib4rI<OB_6A*9@KP^Um;ZJCf3
zzfjUg2C)g_0-5Nuh+>Eb`hK}%I>OigDY!q?q655qr)E#4al2RLv);GTBk|v$nbbYd
z9wuGRHAnG0ozykZ7^XJWU=5qBkS0oBnjV3nn@#UQ^UdJT4>)1bHP;8lX>!;KOE@+J
z8|FQx*L0%>@6dch&A52yfK@sF)vCWUx-w~Gya7QTYd8gm@VgA{Bde<dLJrzy3tA*M
zy2~lZ8ol+~h;1!u8#RIwio;yz4H$Cu9ik=azwS+i+Rq>T0Qt-Py}_-^SI_>nZ9hEN
zAm9VaoE!V(S?>(;riNsp^fNtbXyi-cIZqcwOS3IRYcAWTdv$_(F<<!V;JfzqX4-)j
zzM`P=j9>qsYWU{`xAu->nG0lUj<m&@&0^!a&=7>a2Cs5WIaiGXU02E2)8eyVX;%C!
zp(o4<isQBHXTA8NT`#V20K9-!clj0*|5@d#X1K|>{3O&+8-7jQ>V=#MI=POpi^XZH
z-9`M5HQug7$Z<MqL)@A@?S}bPUzpkVEk>pyX+G^qDvX#dt=Rvi*;<c)3U~AXXKUr3
z`V8Jj3DZ8pE)*GtWKgd+w5hRoI#+&_rPztMg`_cDprQ{=NC<dVL%F-9oE!w1rbtk2
zJ?`M(TT)t5+nAin?wEenD07?C8bjr18Na)aH>nQLrrKh0#Kr&GN2~|S536R>JF+F!
zpmva*^%DO`R(Nz}nj3dqzC*-z=4}g6nQDY^&@@W(L^{F?YJ%ilpeVOxS|f4q(jWN_
zD1(a{UC6#GHCUbrfO6rIw47>p8nJf5N41OPIYlsn9LF$6I)0hqglQF*jEoi594KxU
zI<q%LGsv|q6Nm3&xc9X+8N$2-sS&d|Z`;4_FlD`rG3Fr=4%RWg;mZv|z~I=5)!Q>&
zc;_tWWp?)i9yQnY8Bty(c?dN?W(W)SQ2fr}76aF`GdQgf1RCX&8ci=BSM#o=I31lP
zs8|)u{w6LM193s|-!)S_UHZydB{jmr_;;cAnnctt6^~^ezc2x+gW(f|*6fY)rX+fV
z)sR=^V;zb!wE7eIBI#u?I`bA8H$J-Ay(uI9hIW$gc4m9?QIEtAA@<F_>3Gyz5n4x)
ztbS1WNCSrP8y=rerDg=V1(Lyh@|nlFCMx1pU}+KLnbZffI4C-&mQz!2@ftg`n<2R?
zWK&dt6+iw4(ZSjMORsMP7o$QaWQ3iLLQ;Q>XuPJ@X14ymzFYli_R{kUjJZ~9D*{U`
zIO;M5>>R;smG!!z<D^a*xLIPuq>s9KbT$!|VqY-ONNm|H@teL@^^W-cvcvnzHBZYw
z^Y(YA|JzoMJSL7(^Fe0zcSUo#I{ZQA_Rsx4?0sicQ)%1o45A{Uh>D0vQ7khAL`8au
z6-H6fQ3yRC0!kH-P7={klxi88l%S|6EdtU@P^3tU1qeMr2$32{AP~~dvvEclzu|r7
zJ8PY_&X4(*g=@L@e#(7c*LAlxCu_;Uhpj~p<_kQoVS&fZWTlcfYfWVMFnl+=nStPp
zsZq#c4GI`b%_WbE5O6`~Z{l3SDO*Y#+cNW7&2W+}=~qm46s9}Z7UTvH!%^NZaGw)z
z9KC~+v`L@X$K1Rto2r9SHLzgE!-t!(6=&aCf7=_#PX32Dp3FJ`jNJ2r?2wI@+Jt-}
zdbO22JmVi_J#F&{2T?pmw#&OL0jvBWyP7wTRU<P3k-e7_W*r2_?o#0ZH|t0*3SSJk
zub7gEWX-OY`F8dfy~mdi-gehe%dO&|WNWe33swL9UrF4GS0%{S`A*@h#_g~@Gidl*
z)!IgsL7CG=hO$JVo7!{4qJ8<#VG306+92;i*r?CAL0$pO%{BYj-qHOru9;6IGdhVv
zuEzpkAZ$0T*xG(7RmpL96!nU!%Kz~A4bjhZ5hVZZn(#m1ZmuSs^{8hB``<^O&6Ipo
zPu{73YOPpHKCvfze-+YWXK9P8|JE+krW~V@d|04;?bgR_zKy4I;_-6})Sj`}V*OWY
zpH_&Cm~7=<iueKX1pN2L7SxvtH-GoWw|zz!<PG`Lv@_3lWXYrpSdF4|FN!AN<n>K9
zI4CoW_zVm6Ee+os=a0Z#{f9t3pud2(i6E2+8Y3?DTb%>Dny(p8!>*efcRRl=$z_X#
zW^Kbs((2{RwnlwKiqDQTn)X;R;xJ^~8zS=oHSSLy-S>X^`8loW-IWBRKZ(`%Oh8BO
zZZJ;dvi;2$r<$FI^0rsE_TzxC_7m)ZxqNqYNJmU{aU^-3Vmh&}tJd?b$E!k$pGUDZ
z<I;KNevIjkq1@*{o~+aO%1SkWUBunsjn7tn6NJ9IWQ3iI<8|vy9HRE$jqcCXn>Z%@
zHC&hKNULd;kvDj87m;YPK-<Qdai2K$SEi=V!Crg~fBY!05)7U3?i0Nxi~aMPzyDCb
zGkpgXf?7?6GuAq|UNBEpT4wDv+A3M9k0|i{Au}-ZX8FLpC3BzKI7Nn${2A$dNZ*eC
z;0~_WGqk9!yyQi#)T-PLrPIn}iF-ZRw~*0_#I}5sOMdvzh7yF%rkOo!&Fr(&V;shQ
zY8`8ft_T>eK4E6xr=Z;I7CqFNwK9eX!yB`+)*%iInVB#`b=o~8t7MM(WgNElf55Uk
zZV-&~Tfd0UodA69#`nAU%`N<e&lTiaekgSIYdT^5ALDf*E+QYugU4MwEL>5psHW2N
z6YWL39nBzMV8^+-$hLr%82&5%hcWqTX95M%d?${*hsm2rKC8dd>cI)bdzs~-_7F_V
zA0wlw^>rr9CN|x>$1^d~!Je(<aIDxtD05;srx8DI@zDbnxhN0eKbuRS|E9hnW<F|-
zE{TLa$u|zqxNaj^uca0F6Ok)&IYN#IaXVP2U5ZFR>!#fX;+wmYuB@o?Iby4M_-K`a
zh)+8MY5LJ)HrV5<YIEg1gdezFw2-Cc21B{nt7FGDs-MkT8`RWiV%a13=5bbxeWRbr
zipr7tt=`N|U78$X*V|k^H%=j{5O&R;FLEayF5av(94Go)#MkqA@Sor!AN%ZjOZ$|J
zRKx<!6U^TeNU0HPFke|`k?U)aPai{1-$6x+J<5*!&3j#Dl`UEIp=V>Z&?YWj$DMG-
zwAaE{W{4wUMEd3Y|B@iQ6GmFuWvsWBuQX<5oFa+!hSrEU+V61IZjSbKJOiWQ8vN0c
z!C6j;HwL4_ug&y$CvsIzZn7wtZSoq^G(pyBt88RGA>iI1c<4nU?JFQF?*8WKzplj}
zKR_gnep0Yv`!VC-bPuJ`EB2<a3d=jG?X@$)p2m{dYaQ(apW+4%-^{Pk?bt^!Abwu&
zD8{Ql+J4#wR!cy8yyIl>Z|;<?J$K3A>+P50r{B%L@i&uVz8+U8fMI6!<6tUglCSHZ
zZd7?99+J}#103RJSduX{kMP0cx7Q%mqskg}B+5+QRsZ?D+{47N<Td{J6_qan8DlR&
z?Ee2Ed>3x%J*}|JLb9(uuRFa4TzZTNyvn>sg|=C|GTm;g0p4X4=JmFn7*|ec=#VcQ
z*L0vA!SV;j9LRoOt?YlxXlj?xDE7i#x6CUio6G;Mdf!UCu~XEQvj48xOvYhkPS^DK
zX47QELB*AkhhTO2!w>Sd8h9zs#BqmwJ~Oc3F6s#{%J}$ij=t%Br1v%nr|`-)AqV?&
z(k8;N?CRsj{4FmsClF!U`UF)@^z3wucx9y5+m4r#f#WJ~>|3iiAfZ4;d=V@8Z%=v9
zHT(F*2FtE(nPcjnRtcyYe7XW+%<*;x2-R^Gh3cvgLJaTSUm4z%OZzvoB@&$4(0=Kg
z#r)1a2?}Kjq!Etxfryfo1#G_s%lpLd^!kIu%%14u*{TDox#stp7jZns9~@7<b@Py{
zrvP&F!SL;`$NL~J>BxUp-^88^BjfAR`EF?K&(VO3Kbu0|yr(d2%vGp}R(I(n%k96A
zI)u1YeIZkFto{sQrtu6yt%lX@RMc^ims_k+y3c>?8-3hb29oYx!v%i$_OXBc%wW_)
zn69SOv!M=gt4!g!c!*Un>@+O1we-{(SYX5h>TN&DI&#g~cNxtk^N2NNtF9UEP}G*?
za0HMuoBuaja3zqcKCBPUOs|agz-7J_Kc1yHAs(FB^HjRF35wB$i?lnut@kzl!KK`p
z`6A|z5FG%TMi`I`K_2{ikFOmx5uG^vT5+jQ0t~@i4i@B%${POxp-lCYjIP)(P*86F
z9j_CKFb>abW@{J>TU(t{wD!7tX+I|P6~2<l{4}%}E(unw<*u4!ED2sjg%ZKhP_dM(
ze#q8|Gt|uHYocmj<stxb_7#ntZF05FDDW#TBfZWR4)yMJRxzTKKUQKli)9!d_`>OS
zJ^e<q{z>)*Kxf6?lTP}$?N?P}{TusCd_r4HYRmO+!?ui;HK3|zvd@Evpb97YPPS!D
zPS<}9%{|8(Wb)StuUiD;zgyQ}d(Co``sM)5*Q|%zGC3wsmzid@9Kv0aYS_!v%^9>~
zX6=-r_J}b;3rc3`MK`Q#r6<gn**Ld5uXz2~o{^z32qB~Z#cyYF-DUxWApecuJ!TQF
z1E|5c-?^rkenB4>sa-8zs#+dREGW!>h*(kV00rmlpW;piRmY^-U(1NOl*uSlE(EkL
z(ja8*4STjN0b_f-HVDX+Umx(_@zD!mWXEQZb=Riw(bSG;h7-PMKLM*Uldl%9mFxof
z6#2L=Mh!&m@T!%@^2E3fr3W@B<f;5t0_V6dZ#v=v{aYya+CkpTcC~fi9{=Zi{DR-B
z|7S0Num74+Ih+)jK#?j{#h;dZo&CWzZRhDdDzEZev?4ud0p7jQO2rg^X45LC*R>kG
zyaET%dq^s&Hc9iaJ0OJg9Ry7F8zRTG@Eb<?Zz&ug#YiPZHN(sMS7&D9OA|%BhQJ@0
zn#QBfu=mCwZqdR{sxx`jyw8gyhTCF)er1<yB1+o^_;8HJsKePscmCkPoBu@<_>S^X
z?nmah*eNxcwpQq0f_=1z@2GdR`nb<vJgaNIOyB~!e$1Jd3KA)%Uj8L%!%!<g<j+<7
zx^iEC0M+PhKdjK~GKi_lzmLwzl6*7r3(;db{3zm9NhJA1HbJ8bsrifb!{KZV=?u-G
z=p66-?6J5bE%T8bKdUh$NS@%p1jsB3vHk)<`2UzDGR@2rs|cm6SLqagk<oaiRqcjF
z{BZiUws5rf0cZW!0VLa}^=>m~#6O$UMz;li4wANO{jH<n|1wLnY9UM0LbA5Aw?M@v
zf1{mXetHq{wzvJP<VK4l)+)WxhjKM!p5hfITH|v{N{9CGYe3@5_-?}2c?*?~Rs$X7
zv(Yu|(xm7hNYa%2STB4oA(04Tb)gwhtS<eG$*z3;BA95NQka)??My>Xw_O3E2v*xo
z@+>Q|#Q7gk2#kj-uJ<xKKi7$INc?Igx8?%JvyDH&?pSp3|KeAJsT(!!?39M(M{mnI
z;cYJj%QKLytG{9*i?GLOjBZ9)kH?IhSN?xV`{a@T(!Rf@Y5q(5{-u48BL-4u_rJ97
zU)uLC?faMZ{Y(44lJ~_w{-u5Y(!PI0`@Zs&f2rQTRPURJwb+(GN90RE-oI4u|4oGN
zU#j<gMc)5ERPVijN}L5Pvlm0YLHU5;`QeypP;I-Y5lBkvH%s__)ZbSTp=g&t+Sz{R
zw)Z0^tZKzSb}G~uA9^WbFD-5G`zZR>EB(99f}&b=;^X_ytSEO1dypy|K7P0nO6gQI
zxUD=1`%BQT(vyDj{Do;A+Z4sKOf*9iWlsSbq@xHz*C9JyWtLJ$`#Oa#QsS(SdTeMN
zsU(WHRkxm!Q|xpys|j}m1<2WrvfqUOGftmMoNXU+zZ0Ze_=FsE4Jh@Sp&Y}S(yUoh
z?J$EnF!nQHH^jhnc0i97bltM#qvwI{eAnh2DC0AWudTN^v}bfPT;F84WGq^$P{5qa
zus`^7pznD_!UQp?V`~3lE6o$uO$QMnRW;5BC~;O|it`lzUblrxz8`HthGM0{%RR$I
z1^)a~VFt4-uWgB?df}14E?ubxaZrSai{H{O@|7&+nvqve*CSogiC=<*P0AgPi5UV`
z<Xa_)KjIpM!tDcG@+i?FXedZ{2q*HO+C<m4rWx&}od_zkAL6MX#&j~+JV3p7M>+pl
zUFNzDkAZo`VbPeW_^B&;7{Q96PnSX3LU0BsaV!b2Q~7HsW08VPJlVHU`xk1{)>n^2
zFE8GnDUQs3EAcreYBZ@j-PGh}UT*ovl^%~QQjNY8|5**ly+N93DT=BG-`olxsyFPo
z#LGtSI(pYACE^EA-}^w@m)w*~IzNh1A?Y%jTG;NW$Q^I1c$}OkWafquC}4m9Q}Vp?
zEA+o<vERkyvmYHaj(msoU9ociOJhUL8og%Nxw}OAlQx^ot0qZ^J{V0iQn}M$-J{y)
zQX|YVoSl`frS*&O!>3gWbQ)jGTZ7tsUDb<FHq3hvkJ6EHPwvHx9{E{MQ6_{S+i!`{
z0#e)LB&Z~kNzRX7N-qW74BjtP4B|ehjEeu59se#J*OsS+xK%npO8+Phw;+8$mLW))
zD@=?_hoV`nL1WI1z0`>R8F#E)r%{X|u-(x<Z$tFaZjDCqr=HA-1@VM}SY*&XA~<BX
z+~mw;X4BE7e4gb7u&UBK;_%(P?Wo^!Yt%=@8$)A-Q4s|Vmbdn0psNuy@xIRKtmWHA
zEm%1NrfW<NWuL;9ku@eppUUKsy4Ru>%9`Fl`Q|&XY@vMu7Q~LSZ=O?Jv@>xu3c5xz
zF+hc^y07*>hwuI2ey_Mf`KWDRAo0@{;RyNPj7d0zuye*d@fUk<eA(_oRZc6lBgXb6
zxkztCL(J4oZf6?ZrbAN=q%XG`T?ffeCczK7e2F7ry%^XT&T}~CH!mXflNfk(HdODM
zeludpG#g9pU`vHedBo`cNJhx!;_IQ#-Unf+7H6Yj+bvbz8hhY4LtKqc@-5B!EmQRq
zA7Jw>64R(alr3@@@uKc0A0fpAP-wO7dVm}du3zrF`?@>m6R=-%LA*o@wD0IDZ;%NY
zoB=xMmzX0{zsj=T-x%VVK2U|Y{wSzIbUs$+lqnVS;kvL_E1?D9cpG-ZOtKfmHM3u5
zcDPSSJ-Kd~`pJhRvbSaP5NU38kX04@DEggXwy90&`cz=O$iRBVP<(N7IqFbaJk+^V
zN_bH?_qzibya~iPqvbyT)VAf1Oje8_ejsSqnvph1p)mFUHnk{D{$kpfOjcv#%k<t$
zqiel=y(3~zowFLNx7GBr)_luSZvIV3P4qTMm_K2&XSm&decn4Zjgwd>de2~rmHH+Y
zo(Qm&pC5P^Q9|d*uTcEEAD{`x=cjKR{8F6Us&QqpO`YebMmWaYqmu7CwVHwuPYaUe
z2U?|JA5|6njftFxdT#X#g^FvvzGk=vD+Qym6NM*fcyg+IkPL+yH9L~EvSk}|5KO^A
z2n}Ad^g)0JXKp>WFC)EDRl&vnBP=Y?p7Oh2ylrNakpwF0u@NO-Euurc;DmgpZ<AiD
zXUh$r<uCUgidfHh2v<mvRyr(bA^Q6fumC9v`%bT>;iIalQ50Y5vVUqR<zTf}pEzq*
zMvLLdm+-ReP!Cu1ABBvWx!#T|O9w7+H^`fqW)j#MA)l?K<-&+tcRXywZXZ{k%=d5E
zx=wDdpqhBafYK*6jXcGYpQL=bddbk$%TwC`mf*Xs|MHhBrv!67PdW(g4^^G@Uw7<c
z`nD?ba_^HcDV3Dga`uT{b*P?kfI>&FELHePt)|{t^{{o+jV9J5_#o@1(9na6M%OAg
z)}07v=?!ydr&t`Z<|LyuD3Y3O4HkSPqUk&9`|TwxRf5N#zWJlJQ4!QOVzPg6ogt1d
zE^hQb_EPiXX4q*j$=XOCSF47Nrpb}6*6|S`8X!U(v;{csIDy^16aIWuqaLQ&y(bQ@
zyO1F9kKCm`B~HZDd#m%BOHipJ*6AF=Ua_iO2?SH0yxQY@r`9_>o?KF<Gm%2{2$zb;
zn@2>8N+mh0Yvd>epnVwgx<}E~f<l%r4ptZu1E0*2<`j>QaCH1%Vc}FqBdcLr@oOmN
zVgv5JxzeHxs;|K5j~^%c4#ZrwBwKd;PI@Nm8w&FQ>8uKtGdlyVu@-x#;#7;Z>Qpm4
zjH$nbxrU<cXSZJH#y-N8!Kg*yHEvkjmoD%zmePh8!nFneIDR6$XM#I>7Q?d?qPoE!
zM}k+l@)Uv&_0Ct8H~|Oyw1lCQT26E=ks6;LdZ4Y{yUlq;zCvQ_*{m^Bj~hZhP3?zb
z=R=z{>z+MCgbhkC?Ba@&87@w!W9+gEyeHb2&i%IB7LWyFBMeH@rDzGgm<IAb7N;0A
z@DI`%hu(n#y~Al2fhFIe1nMbK*hrCL6O(_;6jR&`Z#-?7Xz6eX7W1iovdz9%XP!`+
zyR%S-P@#F~SVcO<Dm@menY=3Fo6Yqx9~eBI%;%i!bLJ-b9=&!@`0CCY4Zk-ee)cCe
zp5n{x0L_jntH2zm0KYbe?}s0JJ(AgX4z5_W|BP|h^XAs$Ssih<S*B<Y!7>M`YDz0N
zy^Uy+lJUmc<HfA@y{t<~c0(2T0o>U}yk&QZ*+wX(c<=J`^>#NMORL$m--o%WbKoxY
zaCNVMT(;m0FsZBLgq7XW9^luMWk_k(Lqp|>TdUe$whjFh+Y(BtcDh_6w9VzPg~#3a
ztZtter?F=r)Yy*g8IBK8j3AZTBFEB!hM~?4I{0<3htzvPtn`S@3a?RV#ws+wtE|=?
zO(n)OeXyf5kk4z1SXQ3kE(T|Sf}0X2Tk_JcavJI?LQy+n(G`3>`)%vC%(d3i286jz
zUA#X$($6Gox#k-vkUJuRNT!5F<y&4%YB5AG>mxOfT7mpsigQ70$J}AK1;`=9>9H8A
z#Rfmw$RD(^NKptn_Ov=c7J~OdJj?C?Cw5`oLTuC$jKC&}4{Uk(2ORhxCZi}Bz;kSb
zX)AZG+TfMbLUCrj#Kl492x}1NO`feFn@0DHJbH|Y)Sx*h2>VCC#N{)-_Tw34$+3(#
z$X&4)kZjDF1r`0i7Bsx{a`f+5_?uz`vq%bpjiMljQupBFB~#%vt_G)1ic4P4Y>P%F
zJI2Z3PmsPe>k(7EonitCh%3HSY>Mn`+<Q2g=y87R;Zyubq|Eb)W<AZ9tgchHQ<8<0
zLEYA>t9=b@_i{tMU08T;Mxo4^5MY>#E&!eZTcNgOCEFFQppQcB#dWa4ctYq#L<oMs
znJH}V9U|;Yu`RIo$R-z|HDjb2nC3)}9hC!DdpygwqPulqfgm|~rIfCyA6mSkEv*_L
zBBd*yRJY$Qu-ANKyT^q`-8EoAzQSk$ru_q7ar(mX+7qt!aVfuA#rM_IJ{c6*Z1v^7
z2uebfK0Q6U6Zh7tJPfU(7$ado+MFATGUkuzzCp6G%2^G41H);zBBg<6Fb8K85DNpe
zmhL<O%g9TRF@ns3-!Bk8Sk-yp?;RDmz%ybR<tQ8`=Z{*?NQVfwdm2UIEhg6W%(Zgi
zlP0^19_wZk><b@f#RiW3&~BgnHh-&JDQ&F{Kxjcffm{@q)h!=OKfKQ%2k^59kp^H$
zPk8`)mG{Yi3euQ8;f~(+*`UJ@{sb0&;%kc^9BA<qF;hK%0C!bLkw7&wD4$ObziO|F
zYYeigd1-LZW-osPgEnl9^ZONgi&f|M`j)*2;IE0}Ro)T3$NPQw6YI{WSZaqc;Jw`?
zIWyXWccHtG4QfP36}%%(EeYNYgRK=5(ZiCJ={sJx`3|@0;MEASIJ}l@Pa8#w+&lhs
zi&tj$(NAnmpIfC)!!=tjOJ`JMs$*El#8qO04;;X~6#p!Y91V1^epmP@z&(jpn8((U
z4W!|@9n5<?&Xl#TtOnFL%d#9?(N?L1CGSU(!<QuEpYH!CK2$IGdU2BR_N!+o#ku<t
z%%*jRE3z%x=0+|ZF2LEDXxZLcH<B#Vz{$YdziFe^LC16NjpCW+5w5zPsSFpuH>bbS
zDZSg3IQ~3N8Jaq#j)3=X(6jZghFRxm@_&)$(3SagI+E9bF|+2;W0kSf$FbA*wzO&d
z{dt-c6dwH5*6OES6$f#9+dOW*DhTyBmIBMB><|CaTS&f6_4-Qb!BFPrpKe&^SHC;E
zyvFB)!(JEljWP0Z@M2K6QpB#*04u2JKa>oR_ZUFljr3C9CD6&G&lcno>ndJm1f2<X
z=eD_9@y4*+yo6CCD;il`Mp1I_NW>Ja<nu^;Hf3Po0&7Z^KQ%wAOY<i<JdZ6Ovu|)j
zwK=xpv9tcz!m<f(+$f*!^oe{#HfYutGfN*Q@y2CK$c*W^q9}F|MXI7-Dkh2*7v-O9
zz2{pY)MB7fS+xm3>-|si?>N~{D+rj%7a1e;<zN=IS{KBB2t$vm+1ceiC1}@)+u7SF
zDuj5OyCgj_*vp@W|3x2Rmgdfq*4;6Ro?n1D5{^dxdJx9x+Xj8@j!U0{t^~tbyZM9k
zauT~dVXd;-<V^2ug)a++q2LrrbR|-*B;$y5&<6?B2l^?}^r;3KuLsXQ;@xA30fQ1_
z!AN)aW_b6FLOgfe@K*Dp+_A4N8vK}?gCv&}1ux?vg=JRKx*%EQSiwNpOoMgtz6u9|
z^JomzIw?n|R2FqXmTy^e-@-o;k+&^sBy2t?qH1d%6V%*N$5}hky%t<iFJ4%hyWm0+
zct&N(6LM#adpyf+n5({%P6}Z|0!)G^kRE`4I0mlbqrC}(H&$^P37kgyWi0dZj6`<_
zwOX*EnLvom%gdu+cm%#?NZ@OB6t_Lfs&}qNeGih`isW9OU>(7H7FB|Cs^FaK^LohL
z!8qzCk8i2xiydE-e&1ko`NN_&%i`Z{`SH3A?o)}@8;cKd{h9ejwZ-%Xric?|4az7H
zsc)09#rax=>7ne3(usFG$z5ShoZxjbpr(QhYG(i!G7liKX?8Qfz~|3g(}Ez7IA}Ox
zweMv=yy)PqT;4R6_o+df#A;4DE$h|?i*;G}G75?oNNV=O-+MqNlPPdsEx(A9183GI
z&ld0*;0x&=V=Jx}gSjybX5co7<rUVq@9X{T74s_+*bN14f3?h8foQh&{v!VzUGa03
zrF^@weUVn8v9W#n0iuxeF@mt)u}d#<k<opj<g(pwknutdRi|teYom(k_d(MzV7&!E
z#<&aUd~}C4T*lvY9OX8cRxAH^x({DY06udLo}ZYQsK{r*`P7vGK}EqhW$s61E8aMs
zTPPYnn;Tp1;txi7;)5or=m~l~qQdiC`URR_t#*t%yTIK7&2+>tgL#r$0*R%)`CxwK
zN~GO9oSVi!-=904fFCrhxX5~9H2-A$nZ#9&H*(gyAq_L1hEJ)`TI%|VJ+I%VX}lso
zeLG^$)6K|dwf?9yYkauIm!$S2M^M2m6c8bIIO?o=%edw*wIu;XWxtX>P}P}sqslb$
zTMh+-X1?ZQn4cPxe+aHxuYnrs@MBE#EM!ki*-mKyy@x8(hCKvV`cWXt1f0|l^Rg{t
zc&0PnBsf#V9JOFJf<=S+G%-BU-IFsz>iFWrR_tX%p7)9y$!X}3v<aj+dP)N0;Xm$-
zEDa*fqDae#yk-(hyMc(|65;Rlq<AxU9<gE6dbY&hoQ-7{V=d5ZcmBAx>*>X}^BZ>S
z3lP5A=lrCU?mXQXaO|41L+(h%OlEU*aAqT_`ljT^^b<BJAETkJScs_n$jw|Ja5=t&
z;V1>VwOeQ2Q}G(59U9iayzGUV3Nd&ISm~$_)Tqi>crpYQzK99)eGUQ44#$j8qB7@=
zvLcH{;ByGdd^Uzp6CIxH7p$n5T#dit-_-?D_Lwx5F5Z#Fqw#q=VL_;u<Gtm9GtS-k
zSuj_Ib>HwP6O%vF*MniW^O_b+s+xpnB!{(>5xOXp*gY)wnsd3}hPg_I@#{?Pdy^;l
z$lMXc+d_rP=%fL;)C;Zmk&RS|Y0WDIRBnP^N5IymvpKttTxk7S+lT)_Y2|1j<8Uv$
z=H1jD?0YCXchJZgQh@dQ4)msp_H&it!r2XMezj76chj;F2mv4zfwWm&8KT1h^R;4Q
z%LaGiZLgCq^^L}&hht+%%r11-GTU4XOPNohPk%DP)UhyqE73OoGZ{$kLA-ATBdI}_
zSB04iX1dJfDzkGbr4^%~Zzrz>JAbZWHi$6m(k%VGH1*|XSI%r|2X&foedX60H_STD
zBP)o7lNPP`nnHSyGVPt~8qAD;f&Db-BSKwM-jqYsiynT6NOhmiK3@7_t>`6yK5J5K
z44`itWcHl>*+cLmIpPD`ee$SqHr!B#M~%8NF*J1*q9-s&RQ4RGO6s!(gJ!3nsVlkr
zRZ01zV2HGNs|F+fw2?A?Se^SUi7DzsKZcw>)?h)JZ%b;xjJk8PD3CuX{iJ53M>(fw
z&%*I%@XennIBq_^fJ4S`qSUEN^att-SPj5{baDOj)(JIFD{<n8vt&<bx}|hOdh?8T
z;4aa~R9&~Y)WPzEtaOhEIRvS~xMpYI%*i3-rft`o&XK{3Z~LqEN5n#)R_6c|KhQ_{
zZP3lEB!ZzjF<>~Ld+(3DQ4ANS%>OLQO-(AGYQJ-`LCzQ>^FL9%^r&7jvYgkl-Ip64
zKxn~C4+1%9TqHLGlg}xJ&r*Fl6PMZ~aShb2>w;o+^?fEQviH=M*h(tJ<(~spd_YIc
zn6GZI49y{%cQ`W>*gh}hh@W5es4o*yy~PSitR&8G9}FT+_l*b^!1e|3BM1*``~FZ8
zC+?{42Uyex3}W@sO+*PUrz_*(^QrLpGI<`f!ZDDGm^+OneZC(FnT66avNWw-lH64>
z-W3C9IPw}#cQKMU?Z79}(-GxPX$`y41H1RofSd8{qz|$yXzUmQyNu9v88cf-Dlel)
z<GL?<Ez0Cx>>P;*KqHN>r+|)1?_ozQichw2h2@PuI|IS8HYxaJkupzHYfJFU_S)F}
z9UEt<YWh%JMr51+tIO$O(y)ej=Za;C{DG6V<*;i&{Z&S#<;NVNN23o7&+r;Ji{gQz
zNj>aByrmTqER<Y0RPnidBs-t-R2el4z7bqDUrr<`r;Msg>yVU>wP>`LMt=6;^SBgs
z{t&5*GlJudG%EoQ%8mipEUN1=yT+YULvgMca#nLyck6!6)&px-(%H-B%_>XEHSFAp
zWah>yVQ0*+Gg9;UE-QS#S<t_r{-~2FvBC-c!JwN?dpabJo*f3_=oZJj8Q0A}tY>8l
z2Z8E;=ZmHDk&BwRRpb$Z8<i)Gqo5{#1%mUal3pi+tp&<}dW6hv_22k66qLhQUDjSn
z%p>TP0t2ILw*RGSVS>8*9@)9dl|#d;dohr2q-Xn<JE!rzj3?#|F~iDAd=8e+IE62x
zje{xG^cf_7#?WL_!^;j{%%dC;bor1f&Wu+>nv<Gw?xm3v7jNh{<RByay#QM+G3;E8
z%R+ih>xSw@6Fc3S$->vN{;B**D0T{G+<67!K$X!D?9t3wm2A{ky`o~j<;>4m`p??u
zIFNsf<uggVuG3wsHCY47ya7sJ1vSv8BpHiU-o1ObwkzB47hW4_-Xa!)bg0)}6$9cp
z)7%rLxA04O20vjCjLfFxgO+pw7PTMHaAtg;dEu9ybc%W3Nx8M3t*x41i^^W+Gc~s#
zOj-U{Dfch*{F9duv_VFkw?>NCDM@UO3SE|S19OGdFU#vsDv<D`-%>BL_5YO6`|8t7
z7oOV{i<=9@wZF2eAys${*IE`#Rl8}DlC*teiZ>31NfWr=wqUU%pS-uBu6|$qwz{9%
zx~YdY-g|LN#CfJ|UzSU^wI1-&D6a%!x{~!Cn(xbu>lf@Y%S@`MGaBuQQOqOB$lEr0
z#}|T#nQ!B04|*}OSP&WOW?k(BqEp`pFeKT%?IHyE$qUPnaJoD^mlF)1T<PO5#{uq}
z;_4o_upJ%9^nEl|8=y}VZ(Lx{5EIMoz*>P)KZYv{X^rC4VmNddo(>}rv$DB2Fq5KA
zcWOmDrdEwiHsCo8M(>Tt2H(1e{BfytIs}aEFL7?OWRJMH`@4SHR4ab+jvgb~fBdsX
z`nH}Zr(Z0%>f2&2fdO=FsOYQ1W@@gel4jNB!rHKeCj7WY<bzOt2%l5VBbDt~<vi->
zgF2|*pjdNr{(;zwEM5ufx1z^6&9y4Y!&&GP)L-l}HJa?V=K1mya$af>HkztMD2Y1S
zC?>4+;HjfnD{txZIK7ymz|2`VjX%TN3pZetKkhv@KzR3K5U+x#J%H+DyJLFarX9tr
z57in3UC?<<z^a!QkTT%E5|En)7TN<<dV#a8TNu_BvIAO9S`bw`mcU9Rbg){{{SxT@
z1uCZPsysLdPyxP-^9eI=<XeJfsACv(D!8_!SEyRwpjtWw$)hAN#%};vplkt*RfPlV
zy7Jt0%8RsQ^Zo79z}%-cX!lM<QfPcG9$a)3t!u)%R^7cfQx6a!EVD9ycYZ&KTZ)-|
z!n=gt^S#bnxncJilDGqNM}=17wRhe0(!t2Wi-y)s#I?55hZ23$O5PLMk?9V<7gB`0
z&y^h|+b6g9BS(pDDsaBR6o>nOKZR-a>WrfJX^?Gu170!qMeO!JnnM>o>5~y7qFHlU
zX(~=vAAczfb0M6(HUKn;3*s3>%^$gk`Gt-ZcFmmivFp{Yse5sutDE~9fscT*_*vLO
z49{m(1oLDFixHAEi=ElFD)QHK>JB3(25*$HTjsa<?ugywEf^l}0iVWT_jvJGINzfn
zvO{N3`Dm7fy6msAcV@A~vZ1hAb_^tJ35|ih7!T9HJQskd?*lmNK?9AA!n;$69N_uV
z51-eLQ107NWloxlAQgRLBvcG1#Ng)Bah<QY<@njz7(BO-&m7Vb)f-9GbMhxkk&1%G
ziRkguZ00)*pT<YAAn!sceskLF-dL79%9(F}mHlk@^jxt>@_X6VsAGR|7~onLF$E+)
z)Mr(QB<F;laLKa0ej@)WZY-{RRFO8GDstgyqWy2C`?G!}>k}pGHMY#!^OJM%*Cu=z
z46Kp<7*$10HlJfsT9D;Dv$cGVO2cxZl%L<z3Xgs;h$CzG+#3kB;+hZP<dxkEroxHX
zV2xw~Mh{FYj6!-w#+-`JuT!E98=j3;+)}EGiTv=08KTWcat90Z{k3+S&pp|w6Hm}r
ztK~oItiewlCm)TPkk_Pg)Aog_jIB?@;$4i|{Oucx-1}*)496$Dy?T$~7<Y~=7LH!y
zrOpeddTr%*aac6?bV6ZxP&$Y=x|gDMz%e{*ES@Vr&tpGSMmKSq@}Vq%U|j&hbl0Vc
zA|UIJ{u2_HGs1WqFat)4{C+qmWWjJs@ZKOzdDZwi32iH^|4=+-Y|P~oQ-VK$wA?S~
z13&Se8e^zl!D@21K7IPM8Y-D4mvIe6^hFVuS><|~T5JCXN^V0YOUE3)pRJI*BRKux
z2Kbqq@=Wy0$YqVn#%!1I^+?$^_4J;P<go%uv^DzD)kbAPq5BqM9+p+Ocf$@fUe<f$
z2xbtAC-M^#D2X3c829n3HP1OnsT~T|i0gmrl38>}r&h)1%&Hs064gB(cGmG-K6`&=
z+Z*GWwkDv5j7HT_)g@2t=KXmVkCI=Vn!KcE_Mt@2VMb3%lyusnM|#$^*P@(1w6aMs
zvz&nEoh$IyFSsfb3&-Htky0R(?jnKLjCw~UB-pWf#^EQFnS5T}6LOoZ4kTH(gJ??U
zGLQKy%dK7dMy+T8M0y9X(AxOVaJm7nj(!9_ePqGzv}IWs?&&WoM>P}%H7xk1(ic4X
z1N?;L<9)FgffwVvO&TTm(!JobUJZ`uA;&#544>^=Vt;oGXl9s#PeV&gp_EC2s`h4{
zdasnC6ZfT>j^ZwVE8Cml8(P~PM-jpw490b+@2(Rvy;bg0AxCmj!E_~o;R^lIfU3(0
zo^~fWuLO6xiArE~h!_SQUC^`AAs1!dK1YBpxF{YOUa<g6<H@sp9tqE>i{jKlhyl_N
zkgX*J><S?1nN<LH<XJj=OqLHgmS`-FU4rY9q@Dw6L@bs!jO|?#)JC0_yCJBO5*>Xm
zR|^q4`w;(3#JQRtCf}#3jr%ccLLIgjqzqXTJObfS%-Y7BQl!;BzNd#F@UlCBmwg45
z^oqyL@VaE8ssNM9h9`mT`5J8}Y+LvSND>K}2Qo!-oj(t<ObdIHk3)^qlSr&2Z7T-H
z5I=3`+i5*^+(ee0ikXg@5Mg|f<$h=YP*A&GfoAEs1SXF<_%(Kgj<2|tuVzsE#6++P
z#IDkzK@c8x4h5c|^N%~|2-?#|6<;TXa6mF-LBxEaAE=EyB>7s8aA$MoIK^ioJQzV*
zK)S3nhA><H%PH)aTRRLmZtzZVulQ<6QL}<9<cL4D7ate>x|ciQOFMuEti#lrJA$CS
zDsO+liHCz-Ll<M<^#Sg#__;8A5hsR3w<s{*x~S3iBB))Nx?V%l@TUD;8~FQZC6(V=
z5mBWGADu{;S+nykQToc;pA^@r73Mr7>qMXss~b>j&yH$4q_sC<6&ki5Jri|f0bhfz
zVCT&<eeVRNPV(4waLgVz-Td#heC{^r!oYUdO}4N}d|py9AVcU2EXe*(ywWATThO-w
z*)YtgZ}Y*=0;18d8$;jiu8ukyxj+>bh*QmcJeCzt5y$yepHA5Dy|?`Wu>B4!M9osp
z@<K|*`a)Y;E$@45B&7$vKD+#t!*oWfnTl{5XlrWhlM#|W`7r>=cY*qv(#?(jY-_6X
zaOLYvKw=BN1kS`Fz!}3D0RLy8F`L6B4@l@=tK9Gt$S)G)J+n@e9hMiH3uw)-&R^hq
z%~9MKKDIn?RnEmOhCmstHT_o?u`S*y1qaq{6i>Q+`_;0iE8}lJe=el=TqkSU?#J)P
zm;LT0?z%BZRXFmMg6p9mk&WhFVYlT&9?MyaZ#jGPm8-byvW&IAcJb}9Zr26UG<7v|
zRD3BCiKV`8L&cr#(c;Ad+UV>VUtV*+IuDSIl(B}j70i5`G`e4zb5k_EEQm#)<Z;=!
zp!jM|5}smFv6C0IjLZKGoAj{X`NZ6=MW^8_L#ZUMZV^G=>#y%9p;P?z3+cx5J_&q8
z@6HkNci9iS0R=Tv=Oysd`WE7T1!vjC&E&ZGjO@?1Mn*<<C%ng)trBaI?!WOva0F<a
z>!}kV);h=DkqoaHX<FkH3m&5lK?adbma@d0f&dcFB=S0|M7i%pbw7WO;(dSz%mI2_
zvC<ytGg-)MBWW!hVwv--Vju*_pyxs`em}FNFl6_wfVEUOYBnd~S%8Q3Ygdr9Wq#+H
zuH-{Dv`U%#6hD0^A3KwWZtGv5>nV*(6xQk_SKf%X`?FIlRDG)Q{EP|W`4z;3gW2h3
zZ*i)TU2%>g?g~P`v;y70%w>5@C4y-ynKk-c$8Nl2MFlU}<0OdHS)J_OdnC`U=m1Y|
zf??3tK-viqQZvX#^VW0LMS&)g+b3s0r~s?Oxd$AE%qt#bhC+bZ;eEJ~tE@ezi>J<R
z<$XlX8;YiLAc`gmpSuAs`W?le#qj$@`Tg*+t<=*p{5$Ze-3y=UPO8uif}HUUCv|o8
z5I_icuW#&MBK2v~Pk;#eqCYLzfAY+W?2}jQISNP2qPgkZ<U$zS%Odtude!{rjfYPU
zMaTMZ-RxxS{ZBDf_tfW!^XgmTcny7X9Sx7jVPOBDfK?4?+HVU1%kCn&EkrUkz?5ZT
ze2O=tX?aCxB@xew<<lp_FjLCBTWfi(_<2MKa&k5LwfgTE)N9~R`0TqXE?hvMF*1qu
zW_UCMD3FF7Nd72>JE@JXrI|*0?PhZ!ur8aOP3MQSMU|uGCi^E1(eIQsIb9gGyX<-1
zGEAT7R^Bw0_er!KBm@2ZSzgs$#(tlKm>(Gh{gyJ<TapdNT0tw&Y(?4+hl$TuT}qd0
zCfL_Lj<BBChkN%_F7<F?SPw^e1G1cQC~?%>s%6~%X1du;YykhTAlIGt;D=!2l$hR#
zDkBu9^vM*-w7eI~1QhaRA^7~>H=X=np-qmK4q_K#*!|!$G5i!9a&{Em(@*2|;Mpl-
z3DTTL@ad3>Z7wr(N<&o??{kz4y55NObHmQ!!2tqG6Q0JG?J*qgpM(NRA#i3bul?Cz
zm@j`$mQN;Keen*%iQ-T7BlsgoZncEgk?-;Emt}AiFY=$QIARi?0ebfmuNd>>BtL3}
zKb3G+FVj-G@@FrA5o3L46RA;crfo>xeBP-24j2DBveERP9HA*KKks4&h^@M%g_RiQ
zU*Xqe?(-*saTKP@72*<ZxU9vRbuV+<I(RnfBkI|aqEx5EeY}Yoj%I~_G=Kd<Z&i&O
zU_~VFdQVjgAdUM*A&FrvhDqRcxPSi1v8lczui_Fr5JRI$qpRHK){xc>%&X#uh+EO0
zL(re4_OK&qbCFeWZW@33kbm>U3@{N$ZB7Budu4vda1S-FW0s<cG_65JWY@Sl_ZjKx
z9CRk!!^ZwT#iLSw_4Kc{Q%5fTka1ryLGh5dZ%T^MOa|j@mP5je(Sgo~{tmo748I^k
zfl9Zrp}Ky#<dM&9Gw_JW!oz|LHTtP*2UkyvAZtkB1-jAHsGLqDlsmBaVk=C}!1+{u
zCPfMyX&!;}z5z(H_?by6uN$k)3CB*U#CUbkK!jmd8Uo{elYAbDZ&B|>of##DK)0m2
zpUN%6&Zg*yda`PeQ!sROKbJjAx-y}SXK0IJh6tQ^^PzodgUJ?qNOL_TZchlF4}h7X
zc?D$EDU*;YsmE!=vQ%RCYcHX0%n7Lb%5MK0%`fZ~*)W(foiRtMZMJWs7wg!yh6bqW
zBJ>M#-K=A=qXkLEyPTWUU?<d$*m$2Re9CVn?YA98U5xr6cvIZ#ZsGbZ7;-RUjv#B7
zk3S6@aUzs3AJi5wzYE%HSVD7037QMui1Z}3s=t<7B<nlJbmudx3Q=8!(!2%?OF0DS
zHnMs_JQkJoY63fd4l^>jlHVuGc}C5h#BrkWzp8kn&fdgw9$@JGkQKnpt%CRO-n!u1
zctrj@5_ujhF8BvHr-C%gu9z&_u*64e2o#?;)z4LUVr-k4p%E!(veBg1Ik~}pGwyL)
zE$B5@2~!B~JG8R6Mpc|&A4b=>aZ<E!3|1KO=%q&{zv{#NNYstF9S3d1iqP!WbL_Xt
z@LG!o+8*`i(h&Cr!HIm8mqU2Vz6_Bddm}spc;YHZww`Iw=HAc4(-FM-DmZI5|B1i_
z<geqCV4R*+NTE9kZ5FMpI}0%}ouCEVKFjMRb@Aa96|V{&{QZYqaf8wxZkUO^-vSc0
zoi*!K60Vu-dc8k8`p1`ADtpYGbak?A5X-A_d9_6c0$y~O(Tlp8nbRcRY|Pa}^}Fr@
zg10K0N@rCM6LX6+>02=8Y`rjzX)TH)WL+%0wktMIg#<nLWgfV;&6N-Np20Y+$>?%!
zpD53!>ER5VSIeKHH>*1$%L9xJb$C2r+e8=s*c9(x!JZ=5R+akF)&i473Y;_I55rN6
z6Ua03&|c%5@g4W?A+OxI@b{QaBd^k1a8*o4+5V$<{WJZdllwCR>}BlJKy16-;a~=)
zPN(^PqyptbZOMLE^7fYKtd^5Db~rn`DakdC2{TCus)uSepX`V`(Yg}c%4bkWTX8rp
zb82=f8E(+uJ>&2Sbm9t5g0%b<ZJgNEL3x$L1!Fu3d=!#LUyTXR<ImuE#43<z;0q2W
zoD&iPNPsN&O=ocn88>f?ADM&*ai;xd{L2A}#5w5d$?)DF=T?Wv&IQYAERZ9KmXQJE
zbQt<WKa*MT=28dl??c^!$=Q`NJt6b`3T4r&aAJMwoL0JB)TGHq2My<}PafIfG8fM%
z?rGv*2qT{Z1BqS=56BZP3TEOav~sYvx$U`S$`cZEsl|#B<E&}-TR5J?>~T;*amJ8V
zlX8i3>H#a<uNy=Ri2;~SD6rNGB>lkE&ioI{d60n&0o=!V2G5_A<yH0X=03q*orA>2
zDgv(q&c17?!D|6`5q}>$1$`lU4!|l>3!m46XZwc4DkEjkokr}-Az0og%zS?=Hy7zM
zR{&zixF@m^OL$+N0MT83QvB!Z;TOD3VO5B!?YeKwr#zpgns`(+w;r&RZmx$Zpw{hH
z$W+aK7b@O1^5*$^ZK#`?>BOGkn=Wv4xlZ)Wd3O*D=hFOj*A*QI9xa{SXUa@5Cp$zY
zYsPKCjB4IrC%#vY%&5H|8OuZrQg<HgyAQT{4+}{keC^=>Jv12GenzGCVgu`OnZscH
z)2;F+*VRV<{M?$HvQQ9Voa}9FSslg{JNj8|<a&vY)UYW+J=Ren^?mWp^!EOn?X4IV
ziBzE|Aq#FyoPfGJJnHqQISbLJex^D%p}1a|HVXJ58w}83xDhkoKm6-jS;hf;tX~2{
zraHF41T0k^7~|T!e6mHL7C@&#pZz95HF+ydDbuFTR@W>geXdg;UZp*n>e<EIXMvjF
z%DN-fBO&l55<KcF8;)m<`5h+61}kHrV6g@k++$Ye%+fUww*g*VxoRk?bd6KfMa1&F
z3cXj+4SR<&kz%Cu9}p&?N!AZ@e6&*TuA$mm<6GXrvL6@41~kfbbbI=qxA38BtcG1x
zJDsfefk$&ko)U(P$4|ito71pWbH2yP{HX{)%AuD_Gz7lE^5gvf(53%ehCTW=V89Z3
z!1}t!ioD$CbmN&#Xcacm4@R1yKkROsM94at-Flrp5M=$n%Ox=Doc~LIwGsXCkyl|(
zvywC53V6Wgwl5gyS#%QW!(7*oeN$IHJHBH<sg4cfmqP$zErl4z>C8pl?IBzDZDaaA
z3VObH8EKi-pwUdS9TZl`ZdGnoDG|@}^j6GBH|~aQFh`t~k!UobciLxk6zSj1P?kNO
zYnB=~qUX@TdC^8I`+SZDLIB*^LIZ#mF7BPx499d~XlXo7aUo}diEYpWrpX87MR!Ic
z$bWtIHvm_;Q@Fb<Lhg`YZh#w32od33wclKoI#FL|tlZj;w_TC9pR5{t!4la>2w((e
z$)37sZHum+74mP5<ynkfSvNNlSOaiLMy1a#eKPHnA8#wk6zyAnWE`kIHb`(5QK<YS
zZ-iya!KO*$RoBHT(|<A%KdJ|deK)^(C)p1K8D|qgkZ}($aB92!g`8^Nvfp2DGn0SD
zrN=+Y%|7!K^=z3<Pf+rhX~##B${61dcJyVpoDXyuz!(CYZ^E$}5-YJTq{&VP(+Cpu
zq53*1kkPcT`N@CTf^W<4L-6W9iX!9@lJxWxlN|+4n{ds6t?n&3nPM&V=48u)yTrkQ
z#<eCP@^#V39B(6!?NZsfZYjuKYQ8@wh%GUf-t`tZ1F2w{w7|MCu{@(Kn5suIJ6U^=
z$1`*>;jPu<(0nCFFaT9+b>0%W9KdH7uCI%?Q*ba=xe_5+;8|Xi*bVD_q}-o1t}O12
zJepMR>{>tS9r#qNEyE#6!T&=9Q^Y3qhSSdZR!rkH=78MMm&K39*2|p{P~AX&wU?)N
zXQ4b|&2R#EHXAyT?wKq9-X(&N(HlX}p*YRaHqh|$BP`ZSr~0IYF1c;q5Hyq9-IpG3
zuZCded*t%MHl?OyN0567Y9(!!p|IHg`t97bU!V@*ZGlZPfUCW1oD@IuY-;wh4ya<|
zcteSLLBJ_c^V*iKkJu)l>Gh>P0`IkZ*S#0lzl6aQ8?TuVCsh1<x`&Dt!lFLa7VWtX
zbqjZ;6gzDr)HZwS)MLE>Phy+~h#6E!#BcUW1XMWbe`OLYeO|m=Yn^4AZ69AP;y|H^
zzsN%O=pD8cZwtMRFq$<U&Po<@DLBv>S_N9nrCKF;fxh8YIZY~glbTHBNmbekV2Mrw
zHz8U>+Rfw?V;D|6#)8iACjwYH-`HP3eV^CK6`5L*`72%(sJK(y^BXhv$JmbDH>L0U
z)jSOhgL1VSDmtB(u{OZ!Sm{{}M}GQnmHtyn!B3d5UQ$IxSwfu=&;haqG*1&ZU*<$O
z9{-if81%rxXA6~mx#~m>U<5!%M*(EUP2Ht7{@TG+-C$&ub0RE^?mBOrRy2^&c46#A
zA$`!HVW$k^=*r?D7fb)d(F1!Ppef>K&l?`CaRXDL(#OafZ(8Y9*_iRl*o7xedzS$g
zRv^$7zqzz~j@?ecre2v~Zpw#h7_A^AL}^`knZ-u%&l`B<jsQ`$3c(wknlF2Hceu{@
zR<6_f?1!gz&Z<D=5t)CMM;OhBN9WzMSHo?3C&5UK*T8ZWy->$kI;s38rToVIwkRS5
zoP{k(<?1}W94Y}|X$Er|<n;#kKyR?SZ3#IBlulfRn2P7&?0usqylKhGEw8hu8gzcT
zbK~5tf(x2iBJN4_fwLIv=$*yhmlX9<PlMVBG<KA6*6a5sXKWyUwh*+wl)eDi%Er&}
zo9$}n*cX%GF{GJ7s5p=T@-~wQ=0-~z;hRDi@7k<*H8f_K)bbF2@K*i>R<+pV!)PKn
z{29d+YQfpUfz>Z~%`yduMm0vMymf1Rob;G&e`eQ@c}C@Yasu%f(7t{YIG5sZA$)4)
zoGjndr_NB4f76TF^G~n7)@18FT=DdP+Qa-L59d3spnKiag^C!5@O5w2ch>vP$m=p~
zUewrrw9iAYC@x7eQ;&+(+f<u>v4i~^TVlH@uYVlMA#>PUsNZPh$FZoHqae{rX3XR|
z8s>l$nU1>%ImJN81Sw^rmO6=j1A^Ihx4;nMOU_b1)x8Ac(Yz-|6`X3EXM-4MaYrYm
zI)K#l>E|7b=szKxGb!6gQ}hA~vNPjOwY<yRXn}Sxla7#>H=}pYOKh(zeGP<DlN5)!
z(D+TxzXPjqhf0`gY8Z##TX=r^*@Jz~29~G(p1y;Zjs5uwV}fzpxZ3lMWLD>jS6U&)
ztm}!sU#cNwW@m+*DKFd<MjXQ!B?T|;5;e|w`5T86r}2j|{2^mI7L*9OuFl3SB7Q19
zHEXIcwK58-+nS;N!y^F+qTA{g1kqcoAVG9A#YF$Uev907bJb!*OV&q?d$q;(KiS?A
zqZcvlYO*zF5LO@7w6>b<mTJS4F4jZC@v0~E?g0@Y!0SdQXrf!$4D5mSGUsozi@?_|
zKAc55!7Bk+T~r348hA6e<nYZ>PxS^*&+H5h%$9}-d;hFajrBGjMQ$C|YVbYDDo`1O
zl$0vggYPhBPTLd+1x;pp*h1CJA1-s&JeU&lZ-g=n|H9vwtP<LDv`_2Hc&k-lyw$4g
zAgtb-<#u=~ww~Z$U^wzpkABj!6?xYERyBR_xzwq)i%4U*bJQfFBA<wn+zJoWycz^R
zNXl}Dy~Mc$8P?SR4wb+icP@lv6rsf903n`yU$QhpH0T@Z3E|m-NjKj6vd*@B$ULjG
zer4J(u9O!*#d{;<?tL9rPv%(q!0N%wtF_mX=yJ{W%gng8%b=5&0A|C2lLth*>(<i5
zISWt&f#z+_DD-^}oxCwnI-QThwLvkkKMmmGAHl0P0wqn4Y^JM%P-<$&eWc<(s-P&D
z3sa79I+__!3B+CkD1nHgo&_Zk55uogT&&|+TF3H~x4%x|xAJ)rW_AOo!L*K-V$W?R
zHlL5c|M*EX(eL*3xo4u(4QWP%)N&vysN>Z5hkc;8urY!{x8C_n7eG8Ys4Tt-9<bu2
z!ts~>vDVd$e1Bd5cO>Y%MXY<cbD^QCgN5#fQ<y)*EE7X#i<w+&J_S=IzP(Pn39N>~
zdzGLpJBQ?V{jX};G<%yzfb=DWVRtZ%;Q$;lZZRMu%>_o9sou8iucz_bDqXQ%kdrQL
ze0NRQ%l>+w0&g}4v{(-UE!Oiv^}{DGN~5Kj`Vk>kv72Ts!eB#gkZaz@RVu%oAzrnZ
zM{<=m6!aPZ>GV=Cs8G7sIwvI>Q;Pxn!u_WZ1;{!^8#gGwhlP6^Z_8@BXestN)T>y8
zY&?c)yxS(^83Cy}k9D@xryyKCCLasfiEeY)J7)WnqQ!fRr?k@W+~h5fqt`0?=f_u)
zX&VB0TiuoLqect(bUna@v9Hie;?pn)pMuJQqxt&-#6jg=ce;xF-RP%w(mBG{j+J{S
z<m8xrtOaEh1^f4IQh_Q9#vfU{=TGB7WkFeR&<0?d-FnMuXFKMkwojdZL48No!$cM;
z_h6xGe`Ocgew9O`x4tt2Ul+b;Yw)Ph`j$q^=;Hc=2!p8oiN^P`{BqDE(NZvh0#tQ!
z?8WiOP#~9t|3+cY`E4UT%F#(!F3Cxkm|;nlb~vD@t^SFzR6|Z#!3dndi^7%*3qgI^
zO@h6T*XApRG;URj!2f)YuT5i6e@aYHUnm9m2T-}Ff_(<=c5~(a2dV;s{+1!$KED+{
z-dknnoD}IrS;*jDjykm!yZxo{&DMk1OgRM7ig=L=Ix#%s9~2T*32GhhTL>8`l{<4#
zzXBqQ`)jz!|1VbY>v8}*OBc*0=|eg9UTd>CYlilRPkHX#|BK`hfDI3CWmK7|W}H0L
zr25AEY@UF%n0fjb7-Mt+j=KZR>GJel6<+Pp8g>qx$eTiYXzl>O*|;yL43U4R3^gR-
zcLnu;O~zd9W@i&2B4+}&sPzp%*Ng4+qBlwrbQ+55Zb&g{s*5)Kxdt|^k$v(|Yv>L0
z8#y1%jQ!jEJ;WEfEp$C!JwkVRVrSK3nGCuu;2+^aixaG7^8KgF*myiDehZ|eEB=kZ
z;2GEntYzf{a2=%^M*q%kExa4hOBRNDRp=8k5rRbyyE3e^sAHCA?Qi8f{F31ykZ><M
zaZRSf_((z9CUVI)dEtY>JiNEUZ7XyjHwzGHdYdtf4l&V)G&)CYj{Z-5rX|Vyt_`oK
zE2S+c&-!)d-1GNDzb<s>G%+l31~y<J-nWG!DV}j(Lk1RU{06jwH%uxUP#Br{HOkE0
zjZn^KcOf3UQ3u~~qHkBtWwN`@x#af@>YTm?5evDE=A9s{w&yZ;3A_EHfQ0-@D1F;w
z&+p>FSyR92hB+1fYMRhdsIQPQN>MG&>c{Cd=Z2%L?8E%ed=nP*XuPILRk!6xXyJ*p
zUR<6s5Sd<2w?o-{6y_@sZ}}(i6&NRj>^A$YRN7w8<sh~C8kl%_!6ZA<-Z(!rkdiQ)
z@l(`l2bl9)ScXB?^K;6d+4KNg{7_{{nyZka6eg0pTCByDvTz+%czL~FAW?*Jim$rw
zbE|*qw4@GQ7rJPF1-1;4mg8oszj~<KO0qmXzOaUdm>IC!Q(d$o&!L>vBLRBGpYLpI
z`r%0aBYriqZ&ZMPn^Py~v#GuL?Iljc?vES(5E7_e)(mhSOhK2AO~Olr7=Y&l-4!G&
z6b`*Vo2mLo6M3=_x_av5mX@a~>oyUqG0FoM3+_e_JUuz?I3wb1+yv%?-BcJ?@xE+w
zCrPIxCwEMfpi|3>0+APgchD){botQzNZnHxVx%xsukEsGQB$0KRo}^*OSTp+(g32l
z%dM4Kp3&QI-4+fD`NM9~4O<RnWvVvYul7IXW_T%ct-)Ow{=#VTz|qs=#`Zl2@nr;O
z#vBXKN#nvhqEJH#-P5yoJLDa(2Y9!R=$@e4-rr9R1imV)3y2JY&D+6A+~kn7<VlR(
zaaX1Gd}F#p-xd|9&BE^4P~Xpne$H&eV?7dQ)9B_2^nf1g%fHQ}o28wwzM87Lto?I}
z`5pAk|6%Vd!=h}vb`?PpQA9+f5mc0+5tIfM3{XTIhMXa!JBAJc6-C0LQ;?AEhCz{#
zp<(Ed7&@dI_I1zmJo|g!y~WRWAK$Ni_=g|<@V@7|*R|qY=Q<bjyNI%Dq?D%Smr969
z-%B-oy~gE>Q5fC;+$6^vo1l4Lu?sxF7X9@8BZ|Bp5yJY7#e$Wpq04pn(+YfT%1woN
zyAMN2uty7x^+Ov1UCkm=_hPFqC7FyAuI2cp3kDC@=@n+*-n-m-P{bo+1(^duTATtd
zRj-Y<UFir2BgD4(4TMpuk>MRJz<wL%`(EC8oXql<%)&7svyitXj|4Ic>2<>+%Y5mt
zK;xoHR^2gOt#&CZhaq5bybnxwm%f8SC5iRLY6f;cjKcT^TZHKReNf(?>_UNR!ChB;
z-xH^~n5oH;>V+@~F;Ug$^HSY#20Nrt1p$9=A8T`kJEa&9hjaPm5Az0F%@}r$X(UuE
zTAD7^bGm@b=h9H}8F(J0@0)X83u`aM-RA~N!}i3=3F(;uaQ=}Vg8PeVL{;3YXLxro
z#a}n=VA^cG`i45!QnIh!*^=`?F23(OJ_R{w^o9thw@oH{ArUR+ryIL5#+#XnMu$Hy
z55}$)fJx7bi&RuBnJH5KZGy6()u#HkeM8K#4@#M_ntKg`1Q0I;;Je1!0sCbM0Cl<w
z1cK<s>tbEP-Ef!F*OT+kI!~pE6?I#`tPyLGP8X&}FpZ1&VLzfAWlrNxy~zp(8Vej-
z*JQMVn|M`%B<FX5#=;t;v5*E3#cFO)#IHgbK}ng+EQD?&z&U6-{`E58oBT`o`zs!4
z#8#jiegsN7mP%w?ehhBlht#mADf&v^cE}CTk6<w6J{C_+0lQ`;R~fwN<Isi_nZ>GY
z!;6gul2{FtqD0e#<m8-Wj=ajCAQ-p|A?mcB2SzZnVUr7zC_ttFZ(tAJp#LTMU(<C$
z*!$;IG{Pzx`n@Awo75KvO1d7-l+85R=BKzj{QdohG?n|XO7+B2hxTZM-W_vBx>rp}
zN^dff2gf&E+8Okgdd3I#cG5r_#k-45iiO1KVs#hau0$vV;O&_GCWc0PKICAUuqc8(
zZ;a>lKe`HdpsTQAoSt!ypsSFK$?=}0&S}`5dx6N~*FWN{a&6kKVn%6xZ+#4{xDI|u
z#pxsfVYuwH*I*(T7avx;5A`|P|L)a?H1JKh%L@JU%?$d~bkqSNTN;uPPs{c+^J)H~
z=FaT^ag}7N5Zmt5XzBDhqmG7=_<$-a02D|J?LH$7a(4i3o*c^0DVM02_Ja7_HiA)d
zL1pZVTg(mBh1;-hC<*zy#q=v5JRJg5@@ac|4wLcEKr5}ci+#ctdc+4yQB|R8$E=@X
zvfqf|Xw${&aC9a`wIWFYQE4!kJ{6iDRuvSjs(fqYnOl%p0eGcFb3g#kqy`a2C!vb&
z0x*c-4I7sw>n(-Hgy-7<&50gLMBC5o5wg$<J%F%piRr+zG<0NB@(-ppYQxJ}#{1Rc
z5R46BH9Md?<(M4qv)Mgdnr_B1D%jYrfy8OgO}cLmVjOWQ(Bv;KF@K1<uD;8dp~?2_
zr60NyKzM1#@i$EZz-o_4t@G1e9HX>7T>{JdMUn#Fd?d&z+?%6Dr(C%|YGqD)H$?lX
zSo%&#mdX7cIKuV@8r|{j`O2XAbz<}_U=vzw%^o}>7JhFPMmKa*2l`^5vhkn3?Y<IV
z+zQ`6*YnYStuy<3v6@2z@<VF+gQ4`khU|!lX^zI|YHM9p<g|!Cu?j=FdVX+2z^n0k
zB=t^RwQK`{=G0MA1tH3ou4@w4v&5wUrv6y$I@1y)rjYTQxPGBOL3bapve9~;u))Lt
z%g<f&8;OfHQL}GFQUk=O5G#oGaP2HH;VId7g53yQQxDsApvCM8>TpdH@&qGD%Lo;t
z;BLY#iAbis4IB5Zo>>6m05l(}+Ng5Pf8vS=&dCEy2`c#y?p=3LuA1^D?}3WOqgP))
zUw)|W$bl7o!uv%%^1ILCHOb(9pKRn@)*-D#u@^SoQp)aKPaw-n_{;L*?c-t)HG;bx
zIU{eP0i|u0(HOG4EwHU`b~**UUq$ephy;{;O%BRU^>8_X)s*Fplx2C8@O0;GL%r`X
zg+^mt*DQ&80Y)P1h-15AM)^h1_zOqqm)~!hd3Bq2!hHqfuG<k(s00RZOo=%{L=VG9
zEGm`{u`{vmyq~Px_&V9PWS9XVYAFqv*HDUhvP%iXNCi!^eGxU9%gu7nsse!W%-vI_
z1M!Fj7qch~5+lNlgJcwXW*_=>D>vP28_by{x5u^m;aYo?%ThbtH<&HJY242N_mB*5
zVx~hv4Xm%FWug5GayG@72CE0%@?CL&8M0D31j6_4G(jOJO-d)jAM-^cl%>#T&vizl
zgEJZ{pqxg^*uJbGEP&_$A_{Kl>WvGymZyO6lWO~g{9L<Wf6y+%9s4wG?Tz?f0vjfe
z1$V2x@C)F(7`y*&io#9ZK~?qj*fN=}Cj7RE04;*JcvyqG(t+BUS>U6frv{rtvvm5U
zfam-ZaCL^#q>8Q^FKEe~&yr!!EJthHk`s|A)dosOmj6Z%*=&RG#v+g@H(p3%-q$8{
zh@C!Vk{fX;ruiae4ww)ZOx1&oeNyFCDv_3=MZdnOY0$D2z%bIteXHuVBhRQiPn=nn
z+?nT^yk@}y@*r}LaQk`ZM?$;LZ;QdF92X0lqDN-s)_xmQ6Y(S#6hP>u^EcyK;>Zo)
zk73BvXeWHN&l59L*7yb0k~kg5Wj;3T4G9Tkt-T><lfLv}%#@6)TeW$<>YyW<X&k?C
zN-`sDvndDSUl0MH4bbDXG+V8>u;&DB$&Fat9o=i(+7>1oA3JUc(GgUkBl(iL4|x}q
z@^6XL=Ox+$)Q3FkANw7UX6n?AWE@JnRBzd#mN=rZkuC0}<|vJ#s!1d@g=-5ov%_j}
zh)zLO$;!uBlaeRDRFb36q68NS2oZGd(d~eRY4ZCx>vRFwrhK|H!VBi^RYa~^s_~Ni
z;qxNd%#?(oDNE%B4kaZUzf9%!)F+6iK^$A~qv>5uCahrFQD&bek*<}-%J6$&k>@<I
zTl64->rw~lqpNS6cv44UXk6k#!S(DkYcTeEfpM*Tn$U=WgBzNc`Cn_|{?9u8l1G^E
z#WnsUxPUeOq0a)DxIawkLEi9xM^dnuNYR<oN4T0XiO)mL`Gn^X6XjJ!LvNGrk1M+!
zH!d@sQ5XW*bV*+nXw<S~mVi`p?kmJV|9LjaeGib`+4=+jOScg*4f2{tq1tI+q&>7k
z|0B%cP>O0vaavwgVS3NZJh>gds4w4j`3)%JGU#g{gM41G0=48^?7rR@`9x?%{%*|d
zv<FI!l5;{yf`h3DUYG>Ovfw~|H0#a^tHF-Xx-v<jtfOpI@_GLnAKkzo$<3V)*)s}#
zoXXmDJHmTyz~K8T?&A*#&pcK5(yC{jzUe*tGT<rxgZ<k;?I1Zd$$c}1DpF1$)2I+}
zOixD3r1Vi!Y){(h4*k`%H+54^pnL>-!vk_S<4}lh9j11nUgY#6CetXt;-(UJ0T(~@
z&ot1%uavA!&%CN0bmA3X1w3;5B5}~3`JMDt3OJ`XDQMc74N+eW>8d=Dn822_5NMe=
zJZ%`cF}A}|H{#h1sWeV0!lv4fZI+iJyE23`=Zxryu+`DJdmv*_-+(d(FsRLNg-}G<
zSZwq+FcNs!zZJA(U$YRY()QI=;EV_wjl%W_*$m^cw1q8r*?I^;qfynuZH<I_lrAtI
zEl}~mY{GqkX1%yVl2wu)^jz}=_&^Esa+w$cxq}A#H}X574(u6_yFdGK;@|T1f2|>6
zNQ|Fn+&yV~+T^&Co*^vQX#1Kc%1Z3rVe};V(<=1B;Hzt)T#w@&E-P)Rir>(L3*6Au
z{BU<mUNa1?!>$1KjXO6AG-d6Lq@9!=pa(FTGDJsEco%~IS2{x8zms?O&z<i7HHYl4
zp;rrA3sQX~;tt2C*JR5+fSLH&bYDll>pex6Qb{>lm(f+6c<qgnGhFp*Gtc}|kC<CI
zC?|1d!vriRpiTVo>Wgkr%?>>Xc<U1Kv3>)h#SgDa1N+PYYFv@eRN42n#0y79>-ng2
zl4%R8$<;wM|E+A8ar4wb&+bFTHJSELr5y>S$N~hNcA0U<r?)iyhD`^AM3R(aQ9Cu~
z4R}smCl05t;|6-A4+MZ%<H-Cwhh4x98F&p9wV4>;0d}jM_V)ntEC&9eIpz(PU^Tk6
zaFtShE>6Ha4f9C<vo%qAn5w>w9*UlQ8e_;uHq~=&ZpSpvq#?f0nl_!Y)!oPT3D`DY
z);U87#AC7KD89$QF;_$M_<v*3{#-=_I3yZYfSx>l&Jw4xBx<j7DwLx(Q`v&Ct?moC
zA$nHgU1x(?R7&>xHCNwh)-JeoR*P~}%9%x%8P1xxZQ(VZo4oVqL8z4Xh<M3Ya|h63
z9D!^|V@VqXEUp6Gg`PnbihWP~*-_JAvgz+Qx-qpN)p{-EUF4gdssF9kxMGV69GIg0
ztS=XrcCB8IL9{1NIQVj=7(=g^z%g+7h)_%B!1Gj`2FSS9wyO?c%U{hHlMDb<Opu<b
z0_Sg_)v#}R7R+u*1%7u$@`#Fwl!@q)@~$$rltY)cnj0)NXNyNCUtsDK()XXXEuM<$
zgy+&fvsujulQ4Z0Ce6E3FtIZ`jV*b({S`F0I9`v~`n{<hbO<_+mQY{<3d`<W)}8J)
ze$b;UI6(VL4b-^%xPH?2759MXQ*}}HrNIOg+6Pu{%vBxb1XD22*BItguzhXPSRGhX
zbbo|@5`m<Y$-DV@Mt=g9Q73U<u?sIi9)t&u5c?D_4yob6G9zDRVf`=xJ5WYoZlSmZ
zodXmE5lF(z{51LmU$WE`aKS4uWi*+Pg@PM+Ddpl&a`z)^$x2m5T%8Z~)fZZwzKqL{
zP>RCzs`^XDZ(R5!!(J|RH}Q8O-Zu$sDim)uaZ+{I`vEpu0%(Cc|JCh*k|+gW87I3m
z!5i4??4tq#-YNK^rOnzo_EEYVbG$+{X!p#}Vd}l!Ub_bT5<#f(G7r<rI9&Cux_9-k
zz|^DBl&Q5M31-1t-KDEVYwqr_A9(}kK&R`rN7PF`I{@H~0RT_aJsdpycQCZWNA;yl
z8X<MYjbo2hU_mH`gBhxh#|_bCVbdm56Umu~h^hG*ZRaXe0gUJ{Jk7WJGt-&IVn*Jr
z_ASYBN6pg`Ajb2O7+<VDi;`Sf;4$zneZPRZW<L=5n^*szNjLL2)n3|ype|+Z$+J9T
zCXh@c=Z7>Ggk-BOMUXMWMNOe;pv>SLhF)@*1lbS(3KR-KHiTy$0B#-<X*Bt|{j>(7
z=L^QJJKyTfzuexvmBYuqcbAZa{5?SN3m^PlM1Q@`?0J5^hbOsn!%5{MnDkivQoE+B
z^9ZYi4o`-m4Ab)p=;yEi>%3|;?(7p&aR$?MKHWDX>+fda4T5kj4j-Z~G^K%8Dy4_&
zC@8{&7-h)<uK=7O!(eIOE>$;Sc$N|3r+md-*)KX-h7w(!mk3aSa|VaBB%4X4*r){Z
z)H)V)Ou)hBbu0IPOy#Jlgjl!yYZ(M-MghTK1UkfdGkSN;0x1{-(IUw`$j3TDXrqOD
z=-frgwnGRq+P_KM1WY}k${3LbBd9W>;w=hJ80(!kx_O*63I5Jmrk}%)$}vLy`6>Og
z=K@S5^=HDA+;P<X4>Tju+1fh=;U}g0DS=INkpfl5b@jMWDn|fiU%mQQXW?%~JA`!p
z%q4hWsT_C#M=HxYd?WR?6?z?p%t2gAfjugO+2?Swj~DwaC1&-h1a%hsvmHF+JsmWw
zw=^;=oP&_TWM?TZ(XIi$=8Ab1geU`EP{0FIir3YHytDzYYY6h)(@@>C0Tl@bY3%#f
zP1gZ0<zFQk*CyFCcvqcwKW#@Clzcof&DQi4zC*pDufnO|U)=Ke-YR@0XO+|)ukO4P
z{COcK_(ly#%LtN;ewfKO3W=ca+&5Tm=L}6rR)j)NCr+c^_vzN4dFwjCoZpH-(X`R&
zF*eZQSgkC<BGjn5#eWnT#lTEaL2N2Pk?}Icu!hiU9J5diWO<kWvb@RuOA|ONKg`OE
z!zjcBarW*3mRC^@!TFoUkw5i>z+V(fK_#C+6i&bf*Xp~&;kwHq-iB!06g;IyyI%J$
zr$+^qO0gvGU9srEgl<pC_;wr7I2IV1p>$=7^%Obian28h2wt2!4f?(G9%N(w(xC6o
zyHoMf`?jRsrlo|{=CWANG=!)bFY!EvM)inSAw*3H(F-2_Q;VpOA%iAs;tU9rHTIKm
zF;w_?MrTIjtvvKbb)j9!Bi($QK+E!)sc5BUo~>?PZa$o6dIhyxqYh4^4V*?Qk$7dN
zi2hh8!+jIe$dTDpiGZ4dg@R}^)XGwginzr!f|LI55r4=@d&<5cPvS*fBuyW7XBG_>
z#Evie!eQ5sfZ3MD`arb6)7R0{K{hVvE9bDCLa<2Q8>BxOZ=qB5!z^eyZvS6{KEFy)
z<eshE<Xa#r`=WvX3nApIsXfxK1!H-URRdti1lj`6WcwOc7xw|$0)K>ugLn0$&f!X?
z=U=i(cGx=sGmgUI>@8wahV(%<KokXJxrGiOt9S)mLrZRuDcjtch8SBv{Ro9;p}~+F
zYI2WYaR^Gll}i&5AW~Ho>7YzGpwg~hKuaHf!L?61-4J$2m(NBev8scqm}Ey`&sk2S
zk-iq>K{o)qW{WA%4|DF6LZDNW*xb^%!&d9gZ-Z+7EYc12@X!=;TlDg!N>%D^ADU?}
zyphGK)r**Mywfq4q{8G{e_N$IAb@6(kq(=q`ucN*lVWsqR=8P*Ju<S!(Xsq%lS#_%
z(#+m^cghn;<3o#DCD>L2Yh7F4(z+ggY6G(bC=y?a91ryXJG6v~haBGNeP-mrH8~Fv
zKvzbT9ascZ*`!|Apj8ZRIl#ySBJw&WuOZ4!n_`ra&7EHtE)%K4RgN!zBePz?q3?Ax
z@v53`Qat^^TZ!mDL2!{kIu-CpS3ujl=l5~eO*61QzDjWXd)=TGcjZ|g1gqD3?R3hS
zWFtubFQ=}(U4t!^S`hY~`a^u38{8ANx)et0uE)x0A6%irfGE2L2Ch&qzAx@R2MOHj
znZj~s<`F}RWI^+>38U?_*QO~EdmF$3md!iOLI@i{Z5w3QKXur^Xzs@7`+qAVH^%vD
z6us9ey&f=9cluG<wIsrl!9asZ8pJ}ZD=}rUUvUg~ldMPwt^prfcWIe^GdZNB8;%OU
z)l>qS@xh-6yE<4cmsXd~iUH5{z{k5v$pK{3+y#k@z%*yLK`JSlwU62uC~)McCMr5_
z+vERB8xQBy*hmf6S21~>YkV=2cCxO!!nh;d+{BD8Jw<3s)JxNmyTFXu_2b^i>~!@4
zMAbWIxdU2qSECshnjJX)ABP<X+a3bIwuge*#M|y=wW=7_a`?e?#VExrj+Y}@!zvd_
zIhOMX6Awt9m5`OO)jo)gA$S?WGh>L&bs~gzs|D~!fx?HyKiIzpaNOf>JNDo9z(<6A
z>%T%ZeqEKai;f4ElVk^bmKW-3@-e#@3Cc&h-Kh~y>_jI#KjcVWLw=`7;E`~HxVfdd
zL2}fH@(-CO6|CTw?}%*0E)+1ygAAIoQPW%jv}b-NeSIzW3_WVBS#7m@f(`M&D(!@!
zo-IrtKc%7!Pbox{J7Rnm3$^mIWlOW2+I>V;Jj;19XjV;5q&oLJugG2)@4|qLL78wJ
z9`jy3a*t3%^?_9nHr@mR<L?Bm0?P9*q?8XA@LQtkrl0HOYBlx;wxonPnryqlc_hXj
z`kL^Xbc7GON|O}H#+&t%^FE^AL~WClbN>ja4UpF|@R*hN1tkotxCd~Jet3KP@>!?@
z>j4x>O+IAqGXx7x$Iv5eDCHC}W0jg0lD4N#k51D6JA$F<qqL^TA#E$o?KRf>`o~l+
zBkS7Tl$#zPa6mc@k<==o6s#ZE_3a%{Pm5Zrw;@b5yj}w^8bCL}u7|es`b7)&66}lG
zOJ|#y0hMko5g^c{GhE;T6lAXMn>)Rt`fJw%G}cfDw!#NCqHyya|2EU`w0g>txM&fU
zKX>{5Xgd<#aeNuORVow^Z2`(jbogK0pAlP%P;s5rJ0lj?+T+eMU%YHn4q{btp(_Z{
z-#LLL8fbSM-6vYGSl)ri{Z*;Phtv5gp=#bb!@;}!X+b{bQM)T8nj<6caz^KDpsY$p
z7&>`W=4y|?6TcJ5ffI;5zHi;HynyKX^K3alQg4^{ZTmu)>kk@?y!sI;YG-)B0bsJ2
z_DL1MTn81Hrx;6B7l;VFlc8~c;k1C+im&=KPoQdM#FG^Lpl*%s&gyTQJZiq4#X|nh
zlD!jZHkZ>ip2J=(N3$}<Pyr&Vpff~d<*!7LhgJ_JE<k*w&fnzl0`!C+?X}$)8FDj$
zj6W5pUXz>6dq_>Qq-hjw6F#2IDRG~9q?ptW{=hUha6I{$7_%sL;gWkJpySkwegmOW
ziN!#t-l}zd%QgxO7cI>Z*zCVSl<fU`=|hm-8G(>M4B}({CVGq13a$cUMT++$;tQN)
zGrL<2VJH4t`p~FH#z|2l)KvckT)v9VQhEXr0wCjO-J`pf#&;W1HkwADDH1@_0qGg7
z3PjBLssFFQgJ}ZL_RE~Q)<M_Y(^wja!SQF*sXWh;)A?3&+b9wUh~@c(j)+92i%g)Y
zXJv;Xc2W{3=mYgn%I|SGxo>SQnzKp*#rhJm(QBHz9~Z7hUAPn3^sdGgN}2ms1~mxN
zOqrnlm39WyxIcXf@S+zispLe`t<pK`3MLVe!#){mud)&c>fVKO1v}JJF^-1iSmCYG
z+Y0430}(eBxhQE%XgBLpvICS(+ij40>1_Z<$gk~40r)^&WTpl>Gu9*Xx{^>v(5v1K
zG4g(6v`V9s9JTSTW_l^?dIGqCb;)-MjfDdHnk<!X<z?$s9cS$l)IB1OLp*Af1+%DX
z=TY?m`ok`(IrS^l_$A-ncXPV}*rzL+BR~S+GH8Jg5Qc$41E{f=04CW%U{a*YKH3W)
zw~s&C*XLS$w5U2&eWT3)w0labd6r_%pSVxk<AcxTFGbhAy0zm74;t<GkhShBLcNi^
z>6+ejt3gpH0}T5O4wAW~*}njO=3$J1jxQsqSsej@*Ex6*{Q7?;(f7>(0EQ;*N!uKj
zV}pKZvP(qU;e)5SRbdk6na%l+7JxDmT!f6V%^u=njv4C2O;P7ABwug+yuSk+rfZk!
z{Vd1)NWH|$Zh!@n<{1UJ?eZRBFZrrFfe-?=`y7wnHcexsed2Grfr$#lt@5$fSG|TE
zcGYPY@&n~Ws$XrWh4Eb{l{175;sTbtyyEG+?*un&(vD6=>Gz8vH&qxQ(12<VjM;5?
z7u*1RelUto$0s?oU_m<)?7jh}ADme+do8BESj<~OLh|4H<3fMha_jI9X=X7jl}I3f
z^Eo3Wi8iBusbLEq@hOkx);f&-n^NRZU7SW*R?w?dug!_>;%pruMfRMBrW>Jp=qpGt
zXlOHydciV(7}QkiAlkt6)lwCKhd2-6-8=<*Hwp1T>lR2^8Lb5OCv_))>Fg#pWoqmN
z{aj=E<|zhc{4wi+G(jg^OR8hvAX{o+V)3V>Xs*WTo`mbj5%~r@oN+8_H*s$#9N~wD
z+V3}5yov<%s}20~s|WpwB*#Fj;cu$s-wGtZFpmcUE;7$q0A3P2PJ6KyG1;5urAB*)
zPSK0h7&nSkEVw&zlW9EY!&6$>+L4|B+LEz$6k6u$BZNVusmk8$#hVZ8p8}f@B4S+U
z-|e`xar6IHd-)Z8mkIEtq*MF!B>;z)ao4AFhPrNwjC!uFpA-A7D1CU_i1TU?J}-Ub
z_LHiiVm;^X?Lvpf8F)*EscE}Q^-lPZNtrqUT?QN>w$r_bfR~c5W*g;;mLx*$LaWIv
zCBW$~sZQ^|(C?(jv`TB%Lv`AVNska03m08CNojm{qTD$o$Gf>dRc_V_Si#+8%4pAE
z-^H0<QGMqM-$@n!g4*an?&(=>&)}OEfDj!dj-WAO;WXMd33||g$<+@5I={u}4!39C
z^iEsGbynrds@}?x&AZJI!I~GCm|c2^l&utZ6m+1MY`W7fP2-tF57^atT?;<`c*Xnp
z1h6~{GJ@rS-F0M6Xz+M(QKk)u7c%exvb@X;0PUBTtL=}gJ%GwTUxSr;jl3(mV8sLG
zZqssLj+@_8<-_HRqp3n2G}D!0ZRaw@qHZy+OquYv2Ie^GjZL3!D1SlhwTsx>x~lK@
zQnU(;70AYT7U@0^+&wHWX*qnZRUuR&LLovzJYdxcLf3k>k9C827a&as4HmTHg!au)
z-Vl{NQrVs$EO;2xbfRwR9NU<N)H=*G%lXp#xp5(8EAm%%#Csbp5nTN$Txn)FtkX{L
z-hwi?hgZO9q)0XW$ny_%yy)FVr#|i=n$k3~V75JL(2LmF=2LE>Z~RU4A2{q6F^cC8
zanuJM_53%rjArS3O@RV7di~_*M#qcs3@FBXwTpQTciHPtUe|0hn{;0(kneJ0?q(wl
zy%fkQ1OBoKpsVPnuKsqq@=#k`QN9$~)Bt~$EA$T9)X)evH6XIq_-?5Ev#DXxkg%!Y
z^D8o6@uwl5Gl6b}sr24AG+@b9(pQ7+65i7VU^W~g&XwkEr%OW1C4XZi8}z%duRBiR
z>L%dCnX;BCjlNRKLH_8k90$&<beL6F>)&Cve+}!Yt5B(D=rGF4^EQ{>Yug(D6Ml!G
z+0T=JX;XenZ^YuoSEcwU_5i>E7$~=O|6PDBo$dAmfq&n1P__99VV+9bwMy-be48iA
z5p;`L;8f83$jCDn@9IU<Gmn3yAlt_cX7vA3gg{2|EG+dT?}=?mx~+1s0GJ67i90SZ
zc4jm1+vHx^O}M!`UkVLHnh!jINcIuXssqk?k^N_o?tpIojh#Yjrds(9(vzcBj2+!z
z)#Mj)gEX{AV<1njKvE#hvY?fVPlYnI@*&r3PYqYJ=DQUE`7`0Wht7%B11_-_!A0gR
zr!TDC5eL(_Lcfi<7P`@UWZbrD5H4K28iM7m#s=21=!Jk3!k8}ogaIy<Ur?|UtQ2VY
zuayEjr56b+1^O@Wo~eQJ+y@p(!&+F1IVZ{IQAUzJKTvb6$II9+T-b@u9d^E#W88gL
zrPXS_1i3{xD&lhGV<6MsC!g~;l@zLL&Euse$OcLKXEcu#@{_<}Cyb9*-!*{aRmZE~
zrH0JXPtD`Fldw5&{LyF|#{_<|o#*8oW9D|a&asxIInbEbGY0t_OD1Vw>5kZ81>m{f
z4{I-+hBE8lAKMrCA=s+9K_{Mrr2HNhn(CAnm183lNZS#1E-mb+scf;eP-a+OO0Sx_
z?{sp*Zle|=3TxJTi+OU-VXN561sJ3jsRjnL=Nb%diCu}RgW8gRjTij)G1JnuQ`&Q5
z#N115slkaPXE^RUYQ${oax03*x+CL-1^kK0dxJ-*cZVHPuPGnRAamtHFZ!e;b;^nB
zmlcVx{U5Rk0CJRfybcIF^kAJPp$`xY9Cy*jm3?bl4d06FkGeZwiykNw=U2<Ea3=TP
z)i%=~r2CYb9t_6gpJO(!Vfi=Ffm~zKW;wHY_wk>{x|6m_x2{%p7E*(GyiD-RoiUB&
zhTCaJ+CVn_)F!yM4eH4MMk-5Cj#mq|iojIXQa{p2Iiv=(<5i19g5Wrgxj>77j94-5
zkhE0Fj8vw)*!yKM^VsQUJ7I^Wqw>#Xnr?$h?VAq@0twfFw!T5SjZjdn0d_#&2LhS#
zcY>A~@ln>-fuGaJH6p7Cs_!=TddtcB19TV^?o~k1=@Gj6u$3`;)C1{sMI&{f`i^_P
zOXKd5acrAyk&UWLRZEmA4wN&u6}|(&8!+l4Eu0%5I1Pvbs9l=b$MV7HsDUBYWMb)C
zSsgKm=lRy5)d6hl)HMP|4}u$|R5WPz%3i$dzOL*O%%2yH6KIDE3bf;>lXIKe<5e%%
zEk3TP%N`e7V%U-#Afp^D1CW{)b%>nE>XnzO=G&1?FI~FCs+Vd2t!~E%qIUnrh%TUz
z(K1bdA?;FYi%^;PCB#Yzn$v_hFT3MfFxRMH5!Z(-5eDh>ZJ#t}A1dKIL?HUm&Z0y{
z!^n6qrUKD_k$~$gU+cf29}wc&x=w#$<<d9rJ9QEBAXagkGXo!R?G>;@)e!VPQkY;-
z(py0E2Wuq5JWT2{L1q8@G70k5|Fbd)N`GV${>UW!kxBR?lki6-;g3whADM(dG6{cV
z68^{}{E<l@6!L#$5}=0J&mfIIG6{cV68^{}{E<ocBa`q)CgG1v!XKH0KQak_WD@?!
zB>a&{AP}nm$RvOU&kJZ0`HxJ(|CcfeyY8!VB(>Se<!kFxUX8vKtbF7WcR3#tajaj`
zt$Oi=I_YNdapL25Mu)4+>%wneKvRvt29h1l-HERg<5s)FffN*Jvp&ujvYsz$couas
z?lLu-r|-+R#AQUW>n{YnYzOz|_fUIfYoSNl{ASw5mp9B8B6d`q(`+Q1*9OKTcEa<7
zcl~i2-W3RgBYLL~5}%w<eNusFs(2J|fQXdk$<P11W9KcYBtiuZ*GUvEpI8*~wX>??
zzfER?(+ak)q#oj&hgX-3C2MoRCJ*u)<8oZZQfUaKqdB+ZZ-&T*SA^Vi7FNHxl_q)4
z^V=5vFg06qLB-54lc;~SG%g@q#L0Sl<+W*&@9uUb7vt8z^e1NUR)!>`E_r8s&+YR<
zgg52>E?#3OFU>XoMy}vtuH1ysY$y?r9+ew~tb&2#5T}4Z=CN797bnr{WaY&iFI5na
z5-&t4zJM(wAH*4Rbl}=;3oyBvGIZqNKa*nPEzpf-6cgUvyz}k;nGws~a}m8WQNGZD
zlpO<7|B)ewPf8sCFZR<vTCK#aKj3Ye#lGpO!6b$J@9@DLIMHD0Z<)%^b4%GXeQiHo
z<(<!iO&Ocyu9P-9C}+xHuh{m!xZHz6cAdE<#tI$-*RmT&`eQ`wyYA>}G)fYeuP@yM
z50qsGlzZEb+^0YPGK7PmV&yG4EA8o0)DPxtRM)hE#2qG5kY=d7<xXFxo$U3XBZX5g
zGF2}A_c}%W9lv02pzhJ8@nu?DrQ8a{YWMV_)Vjh>i?M;1;N~@`G4jXQ-KULPYFZQ<
z(%2QFlse#{c=Y5tUEr13{p<t0Ma5UrD+RX8h*Ww#Rjm}{qfSodmD9?JYiVJ1KGtft
zZqV=g7&Ry@o5auw4&CVJ4_XjY;m*r$>z5FB!D)K1gD+03Z$D?`j2c~^W>4^w*gM&1
zW~U$v&f8-E*uu(v`9p-?`tv`$MnuJMm7D23z1PN^lZuD}Ok||wTDYbIVLGbF`VnOD
zqD1CK{`HPl+0sXScU5UIQp_n^BwIp9&Tye`XeGPQ^|IgmUP4Vl%F;~vfERJ(o<C;9
zW><B1wIovfy(0MPF{pz?sGZG+qWfPGQu&yQz7xkzsj$d><|0W~o_4J~%~e}oa=L6W
zs$gns$$Is5fsUz{sw#K1E(wJ-?s|VV8LtCl?%+DrdKi#zK=KobAG3AV8C^Fq_{eu{
zkF&{Ut3S%#Ir$ba13K{d<0Nc9|Mh=e5-Cy#3RD!!i%ZJ%%MT6Z3XC*9edY;o{OFMp
z=E1Apv>TrFELr;5OssD}O5t@)<iii?m%iKRzRXBw6^eaE2~)zV@%zp4@SJ3AF8>Vv
z$26&dR=(mp?JoWp$EfCS`fQDo%Rg{)(S$qc3f&3ykWFA5``(FR;$dZNTBaAZf!C3A
zCQgbkw*xT+FZ6IRL3eR_$<ot?hSzl^RJ}Buvd^hylT7ts$7M*Y7Le1%PENh-Kky|-
z318#Ih%E#E$P3zI!n<K!(RXe6-_%buN;0HGh93c+Me95fak=al`ClCj?;WBk;c8vw
zfkTK{ZMi!<=sCU4=I|fNYxh*UX<;LdX^k$JzB!|(c4?d=Hu|xx69-d;zUQfw<XS{V
zG@QOp9LeYdEWq;M{7XKrYrFnc8{WP1V<)}|0841b9Pr54^)ouS-ywk0xhO9sUh_CZ
zowo5|t6YJSMqX5YY)#tgz01cEP860Jgf@m{rE@LsrOJj6P50RPrubh9EEsMzE4=b*
z^}?xxB1EX!^6Ilhq)(;VDuWsMf%C2b&YQ+P61sB}#H22MjbTAQ_ez9MqjjCQ-Nu)n
z*hdGSBb!P^8~q_unVhn%Rh}d_)|RuyNqCe$+JGVjmLEk@B6dl6x|Kge8f*43G<_+t
z$n`pL5&MGg0iw$>QD@N+qwAs)<IG=*itHD`zmYEUNPSOn*Hzo!1P*u{rJ5Mv;Pgb8
zg`^ZU#Cobap@MpRu(N_+@!DLk9AxZKQ3F0qpR*Ixg6~ZU`BTnLaVn9?zeCqv5ViqV
z@<*Wap(aw6*F-t5oWT$8-Y_t7Gcjk1B3PZ%8I~V70bU&Bw)VLe6{AP%Rh#(8RAd-Z
zzyQDVKx>KdCj3)@wy&@q_LiJc+9nrMuLyrlUa`oFn9ho+ywL8gV0&ieV|pijS8v!F
zY4?uK-YnGtF%m9SV<ahSy`5+CG6^lw-L=%z`XvZiARI_|dM+?RwPV%4_zDLO(uCxw
zi{x8zH?-oGr9T$Ve4DR2t7&1RZP3icl*i(!(M=zmfR3(CXyZxK&e=ZmMku*CxZ@#x
zijcsV!CCpG#1dC?ssz_3z|x9|`lHV3O4JMjC&@|GI6D{X3%x}d8Mu;x!F9j7lBY=Y
z%e;BYtV>+xG;42;-u=Ki{}k)2)Kd6Gl@j+*I)Gj!6Q+Bl1s6=MeBqu2@?7k(r93+I
zbi1(fa#J_%53Vg-nqSSn=wu909wdIKWoJcEGsq{p$7v&1(<kmI3+&B_27DAzzT>|N
z6d#Y)bIrlMHS#*S%A80tjWbiqLq7-$WhFNU_1~8pN`t3qV8_+M;?7;yT{YAbTg2ma
zRXGdZqLJyMv$H&0C)bVFPJ*+RB5I+E^RwJ$3}2!s7{pspfjf9M65PiY)wWanSp+Z@
z`4>+-iD{eb28vie1zJdHbCvAq9Cy$g5{jY5mu6(#kHJ<V^Djl=uG{;iDm76qEUFpL
zYgG#5m=f0uId5T?dLuj`&%;7e$}THfZoWDa?c^QtenfJpuTPY42Aj|sVEDLxbp}t5
z-uw<@MLAAanO{li9Z{z0ZRKK|{Zcv@)^eTno}pall!=q+gt8y*{Ft62Nl|P(XUb%P
zUv*Ywo3GjM>QTxo^ih<=CzHU^+!eO5ixRtnHS7A=0-LZbKF2-ma&?#K1w!O3haxAJ
z{LVi21G+f8Rzx`+mBG)+&CrKP$ei^^VV<Z%Fr(mpG~QK9ZX5Ypa6A5P?j?SWu-1OM
z9lJLYLliCQ!DPu~sHE#c6N*bN$zZZK!pJ20q{r&v!{Dlrfhg3`7R!7A+}NrO14lPG
zdn!JE=uypa;Oj71eWLr7w^Y(HAD4Iq@18wHoi=qim(?4;L9|kWlg(fa3v64^s8mxs
z4Oex<g-k#5^Ddn_HIK#8x1XI&+-u$Ls2B!j2@Zv9Y?H5z37v~yr&s*$Ck8SJeA9nm
z9cw@IXo*%j;$MxEQB6E_vVRd1#Ck{2SPLYYeb~B94fPnK<75{epOtQtFc-Je>qM2y
zJ{mPi@#RR^v1GgX-I@5WxK%+@gOa@K|82ut7yX~5+Q46Xs?hOA>s?MKb??@*qtKH%
zNMUdEXsKPevmz*?qu0-<e6Ta?Vufio<D=;0446R^l4<9mUf$^G0tE`x?7$Z&UO*}K
zv8w8WZ$~mh+m&i%qufbd7CK*o58-<s5DX_$jgJ26JKWUjFrS?G#P#f`7%rYTI6bk`
zQkpSfH*q;cPS;;y(<`V;ZQ(JVpEKcEUMhSt($Fnx$eGcVEgs!F+`9Bv5u-${Y$
zhV^!!cjJ9}_gz)BuIf4H`7FGwAe&+HOrYH-orVHe2!lsar1MCX)21;dL^+bCB~`8d
z)RB~+o>ta|fCJZ$TgZ4~$9fk_XM-AE^@__LFojd)%Tp(<Tax4|ypR$lIYdNiOS0Wj
zBE&Z5k6q|&Vx)K39a8D<zd;M7Q=8P_j%M)W{VI46lhx1m#W^T1*W(NHedQ)Mlw%Wl
zRyN^OYGf`(+GkSMXkOjndU-M=!G<FindQ%_In^K+ngLI-t3E}ZJGjZy&}QO0dl^(0
z&6Gd1%*B(ew*^D44Rzf)-^kAwY7n6kq2wCr9yDHHmwz&ja6w%m53Uqk{3{R6N&=Fq
z)mG|?#q-kRYQ;^B*fA$(tI5(cCKiK+({>iPq~ciiFo(us>bXVrt4{aKG~KeIR{Px@
zJ;43E0`8|FNvwCqYRIPxA`zQef&m4pVu{lNHG|g*`a8#&{c%N=P`v-m*dC(ZXCloe
zci16B?x?57j{5~`Rbfesripk?+wc80pA5JI;iS?5voR9otYI-5W|2|%YfpM+P@!Ag
zrkl*Z^z6Lg`^!W&-vo|iM5+jZ3NR}Sb05m<Y@xi);GW#CJl8oW=eIqrOVcdwEBNEJ
za-snDG23qb{^wR&x-c>$91G$&)6Lh|F?AU-rwuJC0j9I`&ug{1EHB9mHkTI$6~&MI
zI8k|;i1d9o<KO9pqet)jVj|&=+y!?e>7&t+U)>Q|7IW%^*UD75hs$+DDuRW0Ib#+%
zn`zgtVh8l`J?C_Fh1nC>YW2{UdG*D;Wz<s5&Kbw-ETe{l{EH4j#|#I)F6tVG&9)$=
zXv+G=W8Ef6>@oE&NrC~$o(F~B&E_j!(2dzA$9c!bYa&fFABq(~klD&C-yY%&FztS}
zf-AI%yH!yf+?D5Z_tqnMC793CNvBfH*8$BTLB`KLQEnfQ0Yb?lJE|roYAMsK<sB=F
zZQZ+zGU~@Yp-k40qz9xbOSZM`Ex>u4U*2npat}H>ZhYi1l<LW{g7-RKf5r1x?{%KH
zq_7i}?tSF_x8#FF_qIOrqz!R}f3j3ozc{Q>+?5^^r2GPH>VzXwmuQ2%c*Zp9ljq79
zhzO}a+;f$9DaM;;kkJ8De*RAeoq|W^RU(1!Lfbu%1qb611;sRS`IUVS1^5?7q8w4q
zAHaXkc*)JPSX3I)AUM4~@*<AgIbPMEY+OX@m71i}b!5L~`jOrI)&L{+S#LzG<_qfX
zQ={CYzTLUOIWph|Q?Z7C0S+V|k?XN~?zyZHLxx8?g`eWsYfu#5p4Uw@N%pm7&6h<%
zMUaOw08van1HZf9ry^zfPBdWSTYb8Hk*UZQCR6g%ye;bTkB34-Yg)^;K{Sp00*6YP
zkzS;V-WlIhQr^Bm%b?YUX`dBdpC=JaPSDZ3ch7!H?wI;Vs5T?LLS!Sl+Wq<6NJjMg
zna6x5ptQVU?i(o-WlKPm9e8O3Inn)6^NaUbh){z4@z{wvC)Fn~0aZ66%`zL+H{>RI
zx3SJd*xSKQhnA2fvFZ7;>P^$W1|%i9@;3_nLSlFD&&)h3*}!8SkbrU>zn)^_9eJf1
zzkTETD@ldlzqmQCD9_gh_Z~OK2cjwTyS=>2r*NxqIroO7PM%j5j!#k=5s&J}yKKuw
zLfA{imT~V}=nlbX3Y;DlF(j1j#6L^B^BJnYz7yHlMU1VRjKAj-0rfJ~KY>0F9D(AU
zwiCbj9P++WHgdPztBMtaNb;v-&<%@<3fgF=!Q@Y=mlDxea)&ZPI>cl!hm#elBp&yh
z(KdPEAHqcYXWM{Q3AMxZ10J#fuZhZElkvy6>GVu2Q4ICzuRSIPhd!YRY+?TEm3{6k
z!9w0$j8T6kGn!oHr(5nF2K!N{Oq_eVKO0%Z8icPBn~R6d9}2gCJ?1j|Dp!^EK5*gK
za#k@a)51op<>6K0gu28mP@0viFMvld9wf0rHxo*;=nLkU-tHT;GXFt<%VhB&$v)lp
z@Vjx|z*YGc&zTar7TCOFO1UJD0fi&1S|}tCzbLHyg9X9W&Xsb&t5bT*Ptqgf=r-H%
z1-9lKsRDLc02j&9c|mYd!QiZ8L822dgBVH>Umx)P%14kseX{lgf7J7ZPM$Q+vwYj|
z>Oi{f>`8gL^<-MooRt|2t$GsOqgw8|^+F$5JN^Roln;@_)u8oc4jneNvswIksTbP1
zb{C1x5zW#=L6&Tki!m<zX2bY;`1sLCQ=#GQ8z2|j{RZ91<^o_4<<AJn;=Xs{KJey2
zLmn?QE|11%CC~)cqBfjgsVE;e{`5#QfiBno`Nb<^?*wb(+3901sK=z2xgZ~%q?yl4
zPe?3qB|?>l|A4-R$F=)4=LGyivR;p52(N#-$xijncMgQLo+8KJcb+j(x?=ixaZ4Oi
z{`=*J<4C*XWk)U0gYN?)v-qPrr|Q?Rvr)2!opt;Y1{XQ}m@149Az$UmZFPL>mOpp1
zx%<OeU;#8gqRzH?0^b?oUbtgE_nH9j?Le^2V91$$9RrBGUjSYpzgiqP#Cf62R<sH}
zZ3J`bGah`Otu^%o>l`$hN;}<ieXq$lm$p3pUQOZDi-(Rd?*8X;Iff`!${02P5t#sp
zNQ(4|xJ&oBcO&x-5&vQ03E+}TH)1X+OPG8Wd9y%EW>R&MkE`L46zp!zskGo5+)IwD
znZ-KOs+m|Xojh)?CA!=`*|13w_b`Cry>>oD01T#HD%RW~V0h;u!kyW{zde!K11ZJk
z)dZ6L-Qvj*UXQu7FRTu@=S5+?oE#zXYkHs33j2^ZG?+TFUm$GP@RMm1l$<8X=<bi^
zBWGLE8~NiauBduzycGafk_c6F^#=&+mqG9YQ7Z&wcVh)(V?(r%Cyp@yIj{I3CGo#~
zl7g9U@__^5zISd)Kl)kBkdLZ}SYe2J0{@!T^F+B|oU1G)0Tt*(Umc=b;F9~kBYAK*
zD~-)YBPGR))aiICGCZ`*-EmDj@4?~EhLSgf&*a}5xc?O<d%y!pVjHold%gxG8vH_S
z{GFfp9w(RSBsyG47<v;a7yxndw8(!EEKXW_2$#g`(vP=tYTjeM>4S~hM*6m_1f8)v
z+&`>%jI}nf%*se@&B#<_r<G0!Ye8ugN9Jn8m6v5KpzHSh^A-2v-f<!Ao+D?$A;6kv
zU-FIFZ1+zrHSrJi&2iUVgRW-^xSmMIPYl0!i;Id{9nR~mumbG(z%%)q>8I-!?-!O{
zrS^kGP%Cm#FBf`{l<?b~)O1zVhPN({QL`5FqEfQ(#)*^D*s<|bJv9BTGJO|0SLv82
zLG7+jvj&iZ9Pknaw)Oq*44@BW4?a*CRogGp8dCYNioRhax)$!--<pCrkaD<WDx5Y<
zmnwzKta~>1^ne<!F3%-BUYL&cihh7{H5&>k5maRjQys?_w+-yJ5_x3UQD}p+K7Uf2
zuZOf|FiUvPa!Yep2+AN@AdEdykn0yMkBX<Hmu|m4m^Kp$d#ko__;g2hFHgr4#2NEL
zYKo0pn^VNaVl#TLhE=(2J8MN&7$|u%;#W@yi3wn3D@=0=Fk+a7%OI-)&`pK2PVUIG
zzU1yF``SCgyHt%0Wq+N4J2-<K`Zl7!jr6C686pMbwiW{mkTbG5#p33ISzP_CE!_26
zX|!BVo1f;}rj;kuJU7Rw+|xk@Azs7O>axSkdkh(Rg&Vpqnn$0M&KaVWKH$Oe`m=_l
zj`{Ei%Es3s-$gW$mj2xuM&81=L0N|oMb$wR9sJV%tF{ysJFkhji&1T}^3?}1nTY3^
ztm}fAZ|%A@5d%l^zs{4WR-%$`zGAJ6zuceJVnH)WwR^G*{V68-$1{${UQB{biBlx5
z4Wan}PcgL60-qoqL?k_Nu1&?a1iuMtGux|_BZPo04%KFq!O4Ww{O9gdi=Sl7ZZ1b9
zu%BD8CD~jGW~~y%^8ll6w#*T$D$Lj<m>Msm7#N7~D?)ZT)TVGRl$VxD`?a`%O3~st
zCooHJC^1^;%mTBDx77VHlq3VSAglPh1M_><0ic(la}O*uiyhK=J-FlTy-AHPzkL*U
z_)Pm^V_9lyQyxiJN6drk&nd6Wj&$fE>~`=bH)YE6>ymVOD6WEx;?`d=jMT+ydnIl(
zB*Oigvi*OQcHaQL`$hb@CDou|pC7@R`fCrAPyetCmK9Wpv^j3NVO-W9GPdQ)XhKwf
zu~&NK(`K>t>I}Yx2d*r4-)akLFK`iJ2b69<FlAr?nExH|XQre7DdMd^8{{9=^Dv}B
zN%OAdq?6Nen)ZbW_qzc_SH{*qN*caQqgGJnuGCRe!KwugbO#3`Ch2xt;m$qwUieq}
zj~JuC2lfE#OSx-MjuSJv3**u8&&+0`2!_CRisgqOt6`Y*{`s}XH_DLSqN~W%#EjKf
zMmR^xhU6+vt4!lhTwJZ&q%7?}l+PEMmUA`rns=8L_UjT?C<|(1A9?stAf&dQlIAM$
z0=JB)uqZPT7@jL+aNp_6`V5uk{o?PfAV2k+ov21}9|M^t4L6hO!uL@Y7o0X9W}pN=
z#8(8L(a&;n&kAXOOX^`HlE^wmJ$<P)exUUIhFYGn0q19_t+c&Xv9ju}s4s-{l47jG
zi=zhR^^(dz^5!rn)jP-PmsP^JP6*mFF*wHO13Ap*MBu?C(4Xjk<-s3<1WTVs^B||f
zDckh;t(j8O^a7gTMw6`7Rlj`mHD$kKm{aV_@l^F8ZVWoEv%LdPGi@}u6HG%&X{ewJ
zK8Z4m)4Fj$s9J30Cua2}hwv(!VhNjVKbu}xSAgwAiZc8MalWXk?97$y$4|VA-m{$T
z+C#7g2QDqG`XcMD9~lr2b{c!A`t0O#*NLeSSnScT!H&tvr{w3>MHFdf3wHZ1ruGM%
zy+JUMbY@lI-JX|vBTb+uwK4ts9a|{>zr|;Fu)k8}5QPz1WF%WEn5)g~>=B{-*w@O(
zt#RL_iHl6j;U8myDTMe&JhHT3kUu}}{pHc2*;gi^>LMrRdF4Tx722Qmcd9{CxWnxp
z25ht<3F->n4YT{Xn<Ch)mJZaMBQp|I<dVtYd;<4UFaLNr`%1yUt@=Y+%IS>!LP0h<
zBAyz`!9=<pHYy*hdd|ci?orsgU_KcldJ-J-1IX9CjN`9CQDx=$L@NlbZs(L{KHGtC
zRw=EUAk~s3z9&GqF+cyqD@>Gg=KPPSA~BnIDa6fy(~XO@L7FNkKhdjB#Hr3NHB*vo
zZ&qE&i@hC^UeimmLF#ApN&nJ1o@Q#rFfO?*IxjyVub6w-M3YA{a+U{`i(>)#1u&7A
zvUpul1#fvqF5%s~*Y}ubHw;28)9s`H^!2;lLeOp6!8<ufg{IQ6H0LU8w$cq`hmslO
z>9ID(znUd8HhQK~lYrpL#0!*ps!T|ytMqm%IzIbm8smC{m%?d%vEKtU2qs9^)9n9H
zE&fm~{!lIcP%ZvYE&j)-7TRPa<tu3xfjsUTSUhEb!%fHaN@j^?M5TJb!T&5L)5<3K
zDWDPeKL}`iV)sB`HgR|p)RvlhiA4M4u^iWP$UF<|Mg)m9^8P$VERlIN(>ri*BXaQa
zTFKx}_Qbj9jN+-}?9x(=bj<Fi^$txsPr-H#*KRx#)lpzEh`cW{`O$ju8+&luebF?r
zy^W@|PTSf#m3=|Q%pt&XN!ab{6N6wAMICIV+5#(ELVPHh;l`KEsXVt6MfKxGJAs>9
zdHRNe^w#Z5NTH0-d_Ck>eR&g*cDm3E0qkFdh=80haxI+=f2HdtkB7)14DMT^po!Wb
z{+duH|6L<qZz2kC-yXsGT^%rF#xGX(S$eH7U++#VxPR0&HU$+^W@}Kalsq#~R>I)E
zy--PQmr%aoXYqb;-EZ)cg>!FjN8c{3L6qvi_Sn_n5r5HiC91!=U_VP2M$R{v!Bw`O
zrZNW#>IXQ6%Y-jShlLMDFEO9%T=#H`>4<TUn3>kUm&YC6ZpIKd(8!ObN%$r*`Zhw?
zZ<cDkuFM0x`E8Hxs<OywcLw)3rDKtoIfwJ5L1Mp=R^;daQ5XI<5pobCMZcbmN*xK$
zaHr`&Oo&`XzPLAqk`!E`P*z^L(nT{BZoa7_JoJ=G#|cvtA0MBcnfbuHY(*K>nroXW
zVxy^^u*$q1b0oO4d^LL!i`m8&J%tYLXU2KJgYQ&GPL%F&w&PY(bVOj2u^UDD1+nb-
zSJQM^@ov~#_W(sjkkLlqAC5{;SN)9ZMYoE4>J`*+Pkb?-jT1!QyQl%>H!+mWRy_f4
z7~E^dwsUeb<GUpY1V++j2;@PbjF~+uMwku6U0fHc@5`;;Q(>N;Qd;V}{Gmmq6xF>$
zHfY;+@5phNerqfj{^L6Zj=}OJQS@=wh<a^|$P#*VcvI{wd7WUVhEmk-a^if!(>weA
zD8Dlh+hr<vlvH;TH(~$XLtGWWMyK0*=GpbAvp>u>$W7~8XdqR@T_>;V^5{_{$L^NS
z=(&EYNY1yGh+b`r?tQw^_!(;95*Lf}Rh#-_y2D3h0iJer+-~)+Hjo?C2AY<eWjlFo
zIyI*bvpbFH!6Z*TPhSock!aT&;E3KiRwzD!x`n6P+t`^8&Tw5xxq)hAUXD59SXRSe
zQ-5%NcqXTbd)|F-iz38*x#d>yQj;V`Y$>#flRercJHvf8uQLdiQJU?7$D;gKr-B__
zRo%Dmum?7|j0xyr&$P}R(c(Y!vwFwl9*gA`Eql+r|7u)}8zz^T1VMeu*e$eO;cd&-
zyMbhL%$134b6N`&MrUeGo-05Y=_P)XCFwmiJ*@+eqL6~w6^whe9;)YR>XiF3uH=x_
z@qnCx%3!kR81apu-a*MPs$nUyYcm;sNvDJXhc#bm*L>FO3f>T`G9X}FYk(Xg#X<})
zt}ZLkAu{p(26A9^k;QRBV^bb9HfxiN3?oR8KRjvP1M|eRuc7zITrpdNn9?=z`O7&u
zIY&xo3NSmfOjC;_XcF<sjsWJ&UNMO=z5rpZNYS~%xCrODB79hYd*4)(a%|UyV2y{|
zWy`IHI<yb}966FNuP`HX>5f5>2s*UaTs*C%SRuFf8Xa>K_CfyP38rI<F)FFy)1N$3
z<@9W*qYbE%+r~HF1<6i2q~gtV)OsbII`8SIxA7JfUbWV_H`yUzaJ=Euu0YlN$VBgI
z%x-?R1=C>pW>uH+vg<h$+@c-Qyrj!%_WfN2CD1Y>F7FY43OdcNrIx=MvVnSfTN3ok
z%rV%(i{NE>MF8X#my;0BN4-8Gxf!?UgDB5-L<_7|Iayg{X*di$Ybc$o1)fS#qI8-~
zNDdXRdu?}}s!npdGEYX_h0(aH#NI-9I<<T8O@NC-QY{<P?hor%E8Gcov_HS{S~f?M
zym|I6C`#_(qEDMNvR{{!$l2<RA(EQq+9@45CGK!3wI_)wJU%m|PK_DGbk;bz&Q7Gt
zL|4Kftaq?Hsv@`_C}%#$SR7$9)$cIFtrS%Cr(L$3U&1yEG$-QGRGZmDg4y@Q?ZbSY
zB2P!Qt_F`6ynILK>k<RXlH{i<PJhgCzW~#(8{U?4KZLZ%Am671(6;E}cJWmrgeDpu
z>fC5kSJ!WU74?4pQ%#zM+fukpfy4G^3OC;6fq})Kg7>euiwzA2K3|krc&YGSDaz{Z
zxO(y3hWGB9i3)*<5-XpMs6TW3nI{Ja{+Vc?5>ZAJ8Je~l<W~FvMIJiTl8cH-lZ!Qd
zKOOXv`3fTT7KM6@Ub*@4!y9P_(UclDByeO2MLXz~A9#-pskSq>N2GAN05KGB@6Q+5
z(RHpCDjc6>7QE^BtV|~kpC7vudi_OA{}Q1@e?Wm^aGy4ElW&v+#HiY6cIcb)gKy3j
zm+--gs;kV|-f^?dJ-N@S=4yDbU5`;>pk|3(Ae;ZS*w9YKqG2Cc9SMy_b0pkfe6?$l
zXK#@QziUTdn@4UC<*47iGDOj9ad4F~*R9umB?YDFwr1~c&y}Kd_-B3ra6xZNdGIu8
zs<LYgs0U;0Ud^vR>u#janhb@nbimmpcDJ;jx=l5`pgfpNV+-fLA*CqC6)&4gN8;ZO
z3-J!feF2CZ;^Hn1YOkd#hjzdwCbRKxULIzl=@^bI*4fnpk+g@@Mn}w=oJ)2R<3NY-
zZzd2xHLDgnd2QcjB@$@G7TRkNPI2<=8vxo+O1ITJ9CaBN9lV?<wlj&}<muDm;Vg$U
zquUkn#nb7q0Jr5hiVYpq`WKoYEDuI3sF!AucI)}>BBHc(dx&{==v-&FyNA;u=ayH<
zM+P+tBnr+wZFQ2nBa(Bd_eMVt5g~w0JA@s$aoDAJ=!5xVm|MK>)oIr|8)S5^E)<mq
zKe#~W@`j0jlR7VDzzKi#SWEevOImqR6`g&JuCk_ggf}`?%&x!VQ*q6Dc4FCLye6o5
zGmm4?v>%^be^pO2R<9+d+E8*Qc&y;LXaF!mDe4EjodKb4S{<|k{xu8r3yAR`#b|fZ
z@xMLd-*hM=U*bm5|3%q%Mm5!LTPvb~ihxoq6a|qk3R0v+1w=(adM64Z9YmVch>c!U
zdJBm3CS7U(rAY5BKoAI_g-%FF+24-mo^RZHkMDcW`NtRx7|(w8Ds#;>*V-HP+tn~)
z^;YPvDpF}HcfS}+3II10EeZFwufg#Y4?x0)gr&P<yz1vWKOc<B`B8S@;#z)lXOMHF
z6lK_M2KoXDe^)*Vj5mKu_~WcJ)v4Z=l!PUYEXi;-i27va{#sUE@4My2eYND}qk9fd
ziwA^E$4Uiw-1B@N8+tal30>H)vXp`rhs}X4gn>Fw!=mpHZzkws&P=UI^=4n?yw}{6
zoI(ElhFj4~+(ImcMYA7g5dH9i{K{@op>|K_F~=m_caU~prz-q!9aouEZ+{4O#$Hev
zv;Wx{dw2cJOMr-i_9_um4j9dN4yk*$NotDm0QmM(#0NZkxoWYd&NZ!_BQDd@I2Qot
z5@6c?z1ZwTVPRoRkdcceioEWHJ!4x@;yeQ84*9MRYTPxW_JTow?x>srlVqa6c;Bl%
zPZ*`#l8Bt6h(nIt-XdCICw^u^sv^AxSngl!qw@~U#XbGpDy@5%SBqmpEY+Q0YRs$H
zAFzrbuO#DaAPc*X`mXbiq-)I951z8o=t6PS)kN5(N&k9WL}}c!yx`M7bb*xw<Rf!}
zN@|%l!8wWusx&SVVdY7=v^ny>hCJHt&lcCM3v0b>In%kG>^!)?{R}_eu4JI8xHtUl
z;4vQ&hZSLZ*>oh7#`OH|p8skUT?~jIApSmzOj`acx?MRC@^Xg0(tb@J<335MB8K75
zc95VFSf7<?;Y<59W+LS4qi1hO)eo2$z0=xJ*#WR!`sn>bSt0kB^VXUIQoB(y$Yev+
zh6A+3=$`$1Zd*i5CZry8hQ@nNKN}BD+<1%TnlDw{`-d)1S?t_Dscrk6$?aU_J$o}r
z?7r<ksO@i8n5h169Q>|CWa_k}WnA!)4GIQf)VU?oE4dF+Wp+<Y=XR(9u#5fWU~ota
z%nPWey;`?cwp|wA&<)k|dvbLYIW~cA*zn(Y0$u8nL77F$ugi+$h)-+{Y>lgNt%51h
z6HPv@K%D5i%5yMFmn^<vQeRQ8_pC2zy5#bwzsLzcX=<}F>Abh;?vsUcXKEF0J)NiM
zTG75T7;%V?YlSsF43kn<^kQs%yP_a&7Wg@}D#oy=)={O$+H`1&f-`qu^6SA)T_{_{
zjoD8>n)wQILy8rQ$=es0;vOSW&_h^x)=RI)xx7_Cf{;qAf5H;9x@0K_1Qh>W80ZK^
z{?XI~c^3TQM9F~cQ0H0tDNm#m`o|efK~(Jya_eWG#n9ttk3h?i+t^!uF242tf{Unt
zB4x{>8x~cnrLN`Ose5WEZ}s_}Edk6HkBadHB5S$^rU$7Tem36gZg6Dp_!ld$8Sk71
zY%i0M)U6y??6=bhXT1Da6!#Y2=(V?f)y4#5eB1%krSqj8yU3tpJz-^|NjEV*jp4YS
zpN*T}szWAZE|DexJkf!vgA*o8S@|)r&Jti^*URUs8RDG+%Rh~QD#3ohY2k>qL(Ld1
z|MQp~uN0B(<dCpULh=TWA27)CzBj=SSRhto>4xy+onnk)59E3)8sj&7^A7ABK|N7p
zbv0pe;tg<v-D`Kpqw;Z^3*&g2RmH^s#<16EJmua6=Pv68l88HH6Q4&5oHU*g_kA$}
z{Ab|P-D<+qmtbnF(=h)r15@WtVfV9u%HF{XNqqopG$^~kiZ)8xs-VumIfn$CDIk(o
zH<no0fG5XO&lSp7b4t>CY8w$DYyYqZ)3<{nyjuLywl$_DImwu`1Kdr4%}}Nk_m?SN
z!`f=CzQ1L@w`RfhGElncYgXmQ>+!APS6zJOFJLpBPaZ_O{W<|PV&~u(+l}++Y88MG
zei9m{$8M5$hq{W*>qaXAs{30xpk_1p_cA{mLi!fap(4eC7&t7TdOVvGs6*6sqT+#y
zCmZHSWk>?vb3pu@k`v_V(wwWG?iMO*cm6I(QbmzqBK;XIU-3HXAUMAKI{7P`(|D#K
zxAp3IkHpv^ll;jvay%<M+yD0)E6;q*6*PD4%NrF=xGZJK_8gr#2*KV(J*R9*3Q^zM
zCP5u8yh>1pU-Kwj*s)1bCXh<}`5L`8$$Xr6iasE{c0>S$z*AcHOK_wbV;JDw=|E8<
z5<mGX<?@^+6=YinuV8{?4e@;zlykghq<K|yfq?nY5h?Y8wj4&t(7KE>y53pjNl?ln
zDO2B_zRWra5`l#sz$l5{g;+dsvP7w?hNQqfd!6g8G<QeoJvixw=~OwwC92k3@7G}j
z#HhhIUfM7&FVknGBE_j{^<~XVJGyDgr%8&7(i|U-<+J?`Sp+BD2-_qgfcr}u(DtJc
z%VGaaZUM&{j#7XANJa;F3WFyeZ>5%XokXl#E9lGLP>6cR3G<hnXp<i&z*yUXc%`}T
zt}|s~^H@Tp*Tz1=20|1aY5L80JZC;kwD<%)!&DYyOz(I)aJ86dMhJnwd|+&ByoHLX
z9CMkjDw=xvoe}+QKGJlkjx?O3q@RY<#g%N4!oDb7EQyosmCZEs-y3l-L6|NX!uAZi
zR-pjQF;3~(s&cQr_o^w%<ZAt9i*u>Q&43a8<9_$G;L<zONKv5B#?2`$Zn-Y#0i!=}
zWnvGVzW>CdbN-ei=P9i7*9%fNeu*cC8#tVs<OnWDo}o;NRq&Jo_X80KpZZz66*D0s
zWnLV^lPbDf-Ast@<7wm>f75#w{8K;5>HDiKm_FRj+Xw>XbKhA%%EFG<8l$V`);v8N
zK_K<)Gd02p%eU!{T$uQQf~468iUX+y-1N!w8C<8yw08c8Se?3^d{#M!!byd_$w3kY
zo#o+aeqBOj2@ZMRh2I((8wh$ivWnWZL(%TKs%XKzN8s;6>80kmafmT8Vo081Ck%e6
zkgZL+b7k+QF<ZgnZuNbUv^Q0bj%+<HtGnkN^#StGao@ZQEGjN_6bIX3;bBa?1asXX
z40+XMD5?{mtjD)qkrb=;ZUhLH8si<{B*Kc*s(8@8_}iWuvN&;Nd2iEsBWk|HX)yu<
z)GvJSENo<og4m?0&Z@(jse_kI&0w>N>)|`^WX)U6g-oyozP(QkGc)$yKh-UCEQph1
z#e{|#+iG_|Kd`S;y!%5ysWcZ;vUduBb3hnSNPDmuZ^bN9E0m<_4dTy=nfxoo_0G$=
zGS2Xu^XhR*y96c<N@W*HOLG+?0zBb^JKO~Ni%sfJL$mVuApOXb&@|{$lH6V(9Jgfy
z%x{!%b8e3_t9``1{IL~8eQy9kruVzI=AN4%)lWGWB$IKB{#U#FTOm#!k$H_9n}n!7
zM3vQZ%822I#kv!4$eoLjs4YTRAJmUBw`1tW+0n;JIcMqSN2y<}!A0E@d-UP<2?)(W
zg=QqSKVWcBLBk_{_44Nl7T%SsIKE&S@go<?rxKP}*>_mj3Ik6~#&<oeK}-+27GdB!
zsG1@jq;{v7O*bmaFjn8xejH`;9Ofh*F<ve`b-oI*qGQL1HNLb+xPgra=SZVm<Qri>
zC}=>i3&(-#a)gD!zF5nL;s$G1?VhY}Ojh4maCe*PQFP8dRnRIj;gUL4QeG&QO^(d-
z+vHiM?E8VDpA-Phl5iW+z$%jw42whkg20wa%(A4WI52QTnW<d<@=GCC$L&sinA@7I
z>GqC&?>^#E1IbS{>$!US@H|!dB>M;?90IR&Pduf8lh<szv=q=nlqXpMje=Anv{S5l
zN+<H>VuBO`Nrlnq7@L$qzZGrsoHS1p!jjrvN$98ysk4Oiz<Vfp=0?R}B$s?=OG`_?
z!j(S)>{}q+>kOG!Av)=3^O;W-zR4*$CT){dINsWG?r2rTaDaQaLkht(yKnfYsi{=l
z^5?3n>1}&@dyEEF_oSg>wiAyWDo|Uf#?|avmQ%ouI=2$v6}2Y>hYqK6x=Ef}w!fGu
zPI<p7D|*VSrLiZf4j0)XU#~<KJtS^}EZhpO0#z|1bgkNr3DjFDbFGjNEEqWYo!I~8
z7!g}E;{MC(OiMo<oSlpPW{>a~jfD}{Y?MsXEUVw`ijPy?JQRWuK0%C?2tT!o3%J?R
z;P8D@*(#CVnCb>B5O?D0xAfH3zt9^$tZ#B+$A?~w`fOY)Snx24MuB@j#9PC$<AG79
z8ke`w>>T<AR%FE9FJwi>xFFN>0FCx{CiZvl?Abx{S5NpEN9nA)WFH@}LmG1hnD%_f
zcUffd<!1@ik+Z=M85Y?Q3(Si&h%<Fb+;QT(;H2j7xmkL_gaW=oF5%?cL!2;~byE35
z+3E=nQ0fCtTTn;!s;}+2<Tg!vq0G4n9cpo=E)64yd#h*QN=srOp^t7kA#OJ2P)9jf
z?K+dT`Suyt$j%P|NS@jL#47mNGOn8dX{QT`qTs5&>Ej|rP}<Z|&6C^eFYx3R&2c{=
z!7r#Q^RFvf1J-833M8fS&>N4`_3kN$6NNqEqL6FB!QPI!qikrY_U&ujr#i!?&It^e
zRPYpjG&h`e&<4kn#WG_x6hTp+nAJ?fPRRvBefMhbNK;?##qunRkGSPuUHfd@R88A8
zo4}GiIOr)=@8xP1Frji3W$Lg^W$phMm6g$(_22M?5^CIfw@Nw)ceuYgnG$M`Hbp9I
zccfHiW2Wo8;@i`5*T<~-+ndku5XWE4N5~Z0k)1Q#`uv@}`+QMOsBwLD(uEQ)#J(ar
zrMz#M-RdtCb63o6Rl5(nywVUiPs$%l8c{YZ-HbnNtjtm9Gzq)8t68(QuPo*gkkEai
zOv>-6`7MHYxQF*nSf&@F{`WiJKynHA4P|iNe{m_WxR%E!S`#VY(y&Cq-o>_}EN+(*
zR;yClK+)82YP7)bxowX98v^=~h=YnJ$M?|$zyg$<W?7Z{N>Js$yH`Q(*~{|$_`N{r
zK^9cl(84>b;VnKPXKecfV)KN@%z?#(41&<J-5;dnZkttPS{(=07QX0UzR0slux}v(
zipxLIaDd=O5nr7PN=keI(y}qSnm5tU3(EfL>F>=N>}`7VkI8dh)?q>1MK0qD2>L^V
zkK?YFov2|sHT0=3SiLTw)Ys!moHcU?du(E0QHF;dg2hN{w#zqiAgOHl*fsnWGn-Gp
z>`#!nQyR7FpT~Uw@;~~WuaA5-Y_kwos;Y#?m9$DJXEt_^{d19k=zm)zAbOLsc+QHm
z7xLCEdHNFzSOor4Uf6V(VagkmicYYK6e6ydG5HcACM>#9q#jfZ_>^#jq4?@0$TlAu
zi6RsiB}3W@eVCw8m1&Kma#J2-=4g<Ddp=R!XJKgb4IO7n!hK3L4ySGp_*Xw@dV69i
zE|RKcd6&PT3Nd_^(Ei5E<1RMlySnuRTk^8Q3N-TIfwmT9A#v5lC^v5`;QQ_gM1Id$
zmj482!|IiN^DbqOIxS)wHCH=RmDuahS|2a%iUb-2zK*|GFX5i~3@`R8M6aoi^vp{p
zVNIavRhH{wAV;y?QkCCw20RfnO}VFfyK*Yv-aU3WZhphIkGj=h^}lOmZdeA&)Ne7O
z=QyAW!z)onk5>)qQZs!PYh}KFO544>nz#yg8aXHrQEZs+^orT(m3QeGnSKt!Jg`4e
z#+1Gl3l(vmsLfg<P|^-cevF#29sRRF?{W80g-v~!=B`DJ!n76Qn()OhW6n(63qK{4
z45$69qs*}v95W%(B|@mk0`8*5{OAOq)G`JazOK8L9b+K$sfLnO-HegH*DQ$oUxN+U
zo1T1Eb?O&S{8SOU2uu(Ne#7WkSL8(e%{cF`dy%ICK)R)A@k~Pb{0%S{<SadX&V&K9
z!ATkn1ID=#abt!BoW!FFUtOmq>Wg_yW`}jP#KtV#G)U<glWX65NhdH}y{YmW9@Cf<
zH%)I8aYkm>v@<K-!sKer!bhnw!WfJ^h9>mE8WCxJu2J6ITWEFYWW%DIVDO*J;SkNI
zOVqB!_zC@20xgE2Pg>KL1IeRC%En2?;^RKS?N`{Y*#|8xcWp2malCYm5K&w><9UDV
zZo@<Kv*5ti0Z;ilcjJo_Q{}Ly%PmkOZ`G&Kf&TX!mL@xs*b*Ve8YiTLaO@H4c*OdN
z-$YaQx}ME(&%%l$xxHY^ZO5$4^$)Y-<A(xWU_>NZ4P?%&t<lad=FsT9fyE;{FJXzv
z$#r8TOZzZjzl%yX$w2CJKf@`@@T)weS-LSZObKvM5)UB`L9l+>a3KKO!V%Mj@*yOx
zreFMnJAilc@O_h04rFv&Y1H&nj`#ZjlW>{GhPh<tLkBEFq26}{1Nu3A-lX={_ysf+
zV@D2DolOUP-<8=E6*qssV;_&Z0AN??{gw@$WQ-@tIZDdJ+dGdzEKLGx8nz5W#wAKC
zM%~A;Jf?EhcII8xMT@OEJ<}xY8QzYfK5$g!%PZ#wdB%2xAHt`1K0ql7NrZySC8m2?
z|GWO#FOybjH-jk_PDq5Gvk$m-Hpaz&)fC;q7sZ)`NMo4p-TLWis^%XYdK)@m*o7%U
zG^V%@-oLpM-FN)6;)-^ZFp9A5HFw5XEYAWFu*Ct}ie4@KW7iR6qIGAhB(=gVs@KqJ
zx74m*B<G)fL+4G(FIx0g>458--2N(2u7k<<Gn;Wzx7Wm-F%TL4WKh8Pl1-Z1;V!|>
z8Cde5ZCvy;if+1S6_#k;;9@ie{Y>=Du90pHT&Iic1D9J&1~)R=A+`iIvlK%?cBK|f
z$;;z;&3&cQJJ8+&wGRqOUR`b^#xKNeQn^AO)~gaCK}AoQ%%H&Xxb@`Onx+l%%tov_
zWsDzpQw&nY#`M(OrMZyDQ+EB^d?!dfP31ky1{+a*v0e?U=BeIvJpLmE7W$M~Cdy;m
zUvXlloki6%CfwW6K_`1`X#Rqbht5_FiEUbQr0=Pt^CM1*-iYBa@bI3@kr*DLq)arr
zLT}bm%IsIf(~dJPOGixbCukHfs-IU)bZkzOio#86P}<GNt)sYWR$znf&3l^*yT@T3
zKeMkpv7NIKcTVahNLzMQWHaI%IjsH6THT0!wnVkV@C)jCVH~9_5q8L`J$s^-DY5GW
zsQXl>>OMuHtKApy7i?l(Wr6<hhIKHB_n_tOih)Vg<;e<)IS@lybL$)&)bZmhhE|Bz
z+Gf<F#asv(&^{3hO6WF1oH&1Q)C^jldG2V91)<ZByNu$6AWow}{E!B_xi7wApaY>v
zA(z|4@7M1)1erx0{nyMv4IkHPXsY3^pWEH%;#C@==r{08VIEVyFScd-LKP>Y-V-vd
z{E-CT&+OfTPZyRy=XR=z4YOVU{7c^|5gc>$@gJI|_&6`0tp>kprN4ynmr1s$mqQB2
zy7_W-BN&|Q8ZxjvNTDjk$ZV_%nuR+P9G#%hb^q@7&dH{#4f!lppgCcT&073&iFk%~
z?o~n|W{0VLrT0{)Ab7a{&rMp<M*8n&cELzQ1d|lyU<}b{=C{?VytCf(*7<}tRXtG>
zuvgHtaxj|<d+O1A6pRp8>k1Xto-jGYfgl4V5^v#S=RLh@VHmLah6j%kCcnyzGD21<
zrh^}%gbPLauURE;|D%WgFH-oXMV4<pG|-zrjxH^Jba?9Hn9=UDGO~=v_G89Bev_5o
z<W5&O&bm6>>ZCF8TDbgWG+mS2&Wa#5{>AQy3>Upy@XeK*cTEL+W<MgFnD`rpYvRaF
zb=)wgvf7MQg9ubG_#JooGqHD>9#<YlO;*4upoLWbLmBLA*?lfZ7H}m=T?D53(<<E~
zbl&vUM1k{&0Wyvt#|h48UIJ${SvxDP>l}WUU*&`}SZ^e*XPf&+5<q%}_@XV*MtUb8
z;lonVYK?|Cbh<WDb}o2(9_-nm$E~8opuoXcSA64c){C)5e+w1FQM}h~ZHt4&PSUn1
zV$XDX<o5{mzm{Kt8kesPe~*WHM8X*J@zIna$mIw)<Ia^=uD3mBfll`&%#ZxfFQ$<<
z+YU^Y5Rgf8ZYzEB-aP|_#|v4u2uG;eth!>m%7P~Ha?mKm-Hq}UQI+zbZB#FDd`1b2
zRLFKCV&a@}ub&ZZ#3p)otF&fC_777#-A9wS8{Gp6Fpx;Z1z(WUkNsb54VDvAemgx>
zW+;qDie&mt30)LNMceaH5Q>tol=#7hxUh@2n{QZ$ZEJoBAH~0Zh(ck6<Di-%b;UFK
ztjwVH_)p;@utzAJyK(nZ#xe8P5j@P;foH`glkI%B3G<&7)tI0)3J)+Q1h>r2T%C9C
zCQlnb*m+N<UlNEj?82GX9kF7J0`)rOc7CgNZC_y>?_JyKbQG#IrDG|L!`udjjznZ+
zKRKwyWYJXTFV?XP4@g9n)`%{)>rqFdnzpJD)FH&FBmZLwMYz#yCJ))xd$c@lq+$h<
zGCrKPu_0ty?d-IO;^2#i26+EwTEK{UiYS=Kb0x^37q?er&R;d|04^qh;>zM<i70^b
z{axanKi`1{|8r#GUR<Qq)QCk)@nJD@f80V`0Ax4hn{5$m^p#a+#8s1h{+mOeo=A^%
z=4O+~+8mXNDltzo3g6(S_m|99!7=Qae4dqaXZySBduA1h$n(PK8S^D#qr=u=9v9ke
zZ(FaH9jStehP)V>Pzn^s-lGotu_c&S%)2*GvNLUbDdj?uh}%+g;7%5jzpq$TUBf@@
z^DJ0(O06{yUtu1vbTGU*<UD;A8Zqw*c5u+6rY>0YblyyS!x|c$UqHD!8GI#Z((wDr
zPxX>j5D{y2&U5d7v#>Z3`xn^yCBu}@0fTC_P58a2tG8D7Cf;0qgj5|L2^0%S+-h-%
zR#%mvk6AtNF&TN5d%mv1oQEt?9fTNu)IHWv<T#aPi`awsaDkm%a68Q%g$mg8Q-hYS
z8_VKF<#R}73iqvCDLY-FfhpGyToDp6`uIfU;mrPD)qr~?xUfuY=Gs4+n*FXx^DZei
zoGpFEpxJ}l6<R3&W4l7)GV?bw4%x-gxcTnIcSf0*5<j#EI*`flF`<-)So&hEv^|b0
zF5?6{y+ena>wfoy|JBktHcjPBf^A0bC>*!+4i->`2xqu5`B^Wh^}V@*pZ}HqE!5$i
zhcPnFJ5>GCbf{zPxq5yVt<|S4dKD1e$ceqyyUWk{+&tCQoYKc7gd3CqGa0fJR?*)s
zb9B;@h^eTGejn&$a>39IEIZwjuVI;fzux{LuOK?e&_toECn)gm<H=N@uDaox;s0%h
zKCB?0z;{*uFSj>5tZu^EjEiwKU<A}RhGz|FgIf5VN6yaIl80|K1TcQEdvN=@X~D5~
zy9>r8CeK^hPdUDi*SlcRs8n!uaz8zmIjYg8T4nS`H&}ZKpQqqh+oW?=d43g6$%3jI
zwmQ>qb>zxaR{C(U;;NNn+5RXX{_33c6NgDxp|?$}48Lc^kF-Nk3qd!5s1;Iqw-{H>
zl6Z7AI4N-fOymxk)rYWud2~mU!+V=q85WU|^idk)9>#Sz`(HdK@F#CP;dd`+<5XZ@
zxLss4#*ugW!p~YgX~QjV=?uB#I~Gr3dOgb0?3{olbD0n)jql)?8tRH2$0RmJ_=AmP
z&OzTemXmE}u&O7^xQ)DOhB7d?i<Ij`zLs+bXVZw4i~LA-$H6Olm}gEfyTB&Hbsj2)
zr&rMmmu~zS1^eVq!r<yi9#n70JOjFwoP)`q;{g~a6C|il@F+6pwcn3&MUCiOY@g5*
z)<gC#%NVYB_qtCy*GH^`E*=fls~Uc#-5Yf*^QWjL@mH0<VxV}PzR~p4_jXo|T%WmX
z?3M9VD+kCa>2&nF!kE~dLq-oByr<tbI=-YX=F#pQ`%ErVK!_dso$>Nt?f=M4cT+TS
zBXP}0QHOtJy<T3p_Kpgkgh245Wxn!T<oB=k4IbDYR3j8v1mYs=A6hLL`0ES2QDxcI
z9v3j^yk*oK)l#YwB{GotsqCIK+S>kFhYMqF*zEc3JZXs@Y)L_oIFsK|lNldSlH-s$
zb0BiI%D(yQ?!J>wb+e)iF#cLQ0Kk@LEcpkZvr{^$q)AUNSR3D<S@|W>2i8UH^B_?M
z4!u$SF}weN%_%>&Kgx>Vo{F7D_xY~AU<liIc`f(5<&b2-N>M>usk?mB`SH&F=%5Ju
z^m}LtTGq;89s3{o+OxSe_#)Mc4a(~BFNe|BwCD33-xi%(DfxEWQC^GtB=6=3zTmXT
zpffODF{y%0$9lVfu?AeDl0I$vTSK$S-6%^r|6Wr9+#mwdnFO(acNQu^>9@|K3gy~)
zwaxRp@^nXB@zCO||IL3YJ+%J~r3bj4F0H6>{0`kg^^TRgE!ia2rD}cKL|-ofSvEld
zk&*h2scI{g1GxdN{#ki>p6*%d_4${aeVj1!XFvXv%?OW8jylWl{P;HKc<9{rz)5r1
zp4!HQx^C);A<uUP9CrSjud%I;=AEPcU?C>wG9tJC)xhz|kim_Gu4HfBDXKW5`f<>?
z9qz_%t%j^hQ&E!J$dqQYW0o8k(!U;^_+)snYW!wJPs?RuFMiSI+Qahtx6k?mz&#Tk
zsjq(vRKfBx;pdhYxc-r|z-?oJN}n%5L1h-uR`ExU=QmQ%0+G7+>!SfHSc(E0>p=P&
z*QXadbd@H`W7J?V2YIX3N*%f?l>%qPs)$E@1n=))<21PibnH7^Zr>wvXGp|h4N>mb
z4&L-$0P*OT^KulH#`1*Qd;6`<&Q?h|y*0<G{4|iLY70l@BEKe_Ay)b!byAP5?hu%T
z`j3ObZfgg=I6TDLj$#CtHiFB!w5W^F0{_p|Q@YerhxL`5n@*g-L0D)&8Y^~SZRuxT
zWc$;%uzET{wos98!t!5R8YAN!tSs{l*`z!s^}N!_I+2u39~vkzd`QQ%tc0?C^4GY5
z)C-L1<*NJgIh7?y{xnAsZ<K|H>qEOtbate)9?a2gk_i9@`zpTtCS<RGlc$l0Nv<}~
z^(wwb`2TZc3E(+&2mrJ?_mKbmM*CW-wk5J^@EyNmh54&#t{c;LKMDA<xf;ejWgFFf
zQ9a;E7r_yYYd*lFG)KpXwd?WTqZCBR@ZV;pU1Lf(?1rH=a-K7Ke(Mko^Q%L&7Y`k#
z|HF^ZpVPom^(Lb8z&n&^ragezay4m-kRaXCaAxnq!+{MS#V*Lz62llaCR3r<zOh(L
zORwFCYHGfEP_nN1hjcRS3&OHid7@iw0HDQ=Oh#CQQ5+-i9`G0z@)@rD;+<>HGjfP1
z{r-T*NKt2(0DF}h>M8Eg81`sk6a=98M*!bW0v$Bf>XRgR#y~4(BVN4Rv1gNiedI%d
zZkIO+dm_N(xqxD~&{B89ZBd3Y)^z>;$vf%3(>E+TO;wExs!Y;&3U0Ai1c<Ji-HlyJ
zpl`{yuFg=~Lfx!_mB!Dms^g*_D}G1$v!V6e*Q&pyOeZWWKo(29Zk+$pT|j}}h4}a_
z@lQgh8dW#5ezhJxOdn1w+8z5TmZ;|`H}!2`T90B@{x~kV7lwykXKDK04Oy**<O|6D
zzUEnaT4q14!x9>QxbT0zh9>XRXAMA|H-cu!m61{FM<(Ce8aj=4+1BGky~*B?<(T$p
zsRNtK-qV39iI|=2WrO&cFhI|xKu)CJe*tbqBo=OF#aeZna$dNFqH~#!^Sj8?gV~@2
zhfbl2oI~oSgo=F&yRelKE6-4M-GoX+<877n>rKC(<Hfs@0h^Vdv!>slOja6f)w&qo
z)YC%XQEz%7_?7d|%`Smw+!c5=TL7^#`McL3$^U%+V9e|$A5E7IL#2A=rM!M&ibI#T
zKJ%o`XJY^B`zPaEhP}^O%do!@vZ)I{-{F-%bNqD_qm5#bcUN3*0ok7xI>tju+!W8A
z<00ikr_@GDenOAGjv(rjrm;t-6q6f1lS(zmnBR%%ZOR*ujRRH_ZccEgmiJFoXlTHV
ztq&4b_m~xB=3rWiz-+;FEvkXH^Szo~eF<(C`x~q84S!Ybev(bASb~?J#o6ex++Dhx
z!#~gK+PCXHwd%>|YVbuMszFR&@Ks#Fy?4&tTh0s}G0!Ky4N{t5;?0JIo3Xjdw;qQV
zd~@*^s&{qViNB{lJOX`C5t-Bs^~kF@HFnNM3S3m8)?`$kmDx@wKo5@oJ>R=(V7W^Q
z0{n$``VQV>z1mqG90$*M3ra)Zc(9%2`6uuEFJqClqVXz$8Xvk$&}8t@@vjI?A-4Lf
z*v6lBT(+}&``R$XKjq_N!R;5Ow~tn4y0gbuh>U(LJ1MlCtdP6nFoo2eWa1{$H@+zN
zy2g;|!P-@yOkegnC~XcBHyO~?a#d|<1jrh_VA`>Dhd3v2yxMWu=t!^#?ZtlZgTa6$
zQy?&{2*52V#wbz~{w+0-1oE-m+Vr^Gf8)p>L&`r6jZwR3h(BHv8iy(q6#SSm`XI#c
zn62*d(cTYP@9IT&v~Nyh%D8H5W8k^Pf@fNaO?E}BZ})O*DyN}x{I#dGHuti&n7*7I
z$h$D@R$4SKNouvo(#`);Hy)ttpCh-pi#RZMsRQ)cuq7}+`_WCR+nuCs6XOaJt(lPB
zMC2sFMuXkjoA3R;VjJ*^<&I?g{M{8pX-@my7i2B9A&2n2mA^!p6h-2~dxt)q)V2{)
zfAeK)>(^Lju;JT5;)i69W!DGMdZQ=&TK&81taSvxB>Q3Nktp@ey<hM^Sc^zi>$Wex
zy4T~xO5(~`oB_-(7=J$n?7B=ViTHWTK7W0P_6lur<=*<t4iCjk<t3g#*F1JFo|SsD
zGhpD@yYi6ycM-dawjwKYSnj*9d96d~v!Z@<EaDDJ72DTHU$&pB&378JFzh6iXUMAS
zls<B<zGZQJ_QiW;mnzw9#2I7vJ2U)CXTXJ-UYGF4BUd60!>nq0U~9uyNp>g_3(F7L
zdC=jj=*|4`ryI@Fs)s;(o%W?zskawh>SUxIuoyg`N2@ye?+(~YOBp>5l|SrwB1G@U
z`J$gKP$BM6I+pHH;U~%C?EMTIy$M?8UJ5AD9b<+MtQqFTbBqn@#3O<J2KepH>pr@o
zBZr$&mU4NBIq90Vi2d1aF-iSzoxwmh154Q{{lyi(;xug!xhdcixYD+7PKDUjitS-q
z{6Y2jTP)uK_FZJ}kz9K3iCa-_r<i7b0!{f)AccCdHV;qy-MO==7aNbUFYsh|qoTmS
zSQ=emU7=d@Fy9@r@ANLOP+2_ZVaJ_|BWI3Q9vFtad+(FUb|OkI`9w_pQauuZ-^~f`
zmebjk!WopMy*SzOfgnX$S1W2`N54q@DpwtlOL5?m*>xZ?jD$&<^fr+9dbG+9BJME1
zY9cUy3l-ynCjpy?>!&oY0T=OdJj8jKcZX;-wGK6pEfZhC|1O79GHFHe_LgbdsUx%|
z2dVs{AD41ELEgo>(Z02qrCX33QlCLeFi`aI=lcMzG7P=YAQ_|48oj}Fajaa@0ob1B
zIViH>GUU;@_voG17RQS%rzE)@zfjnNMp^~R57j-z1^A1v`cL~-0XKU9x2VQKwULY0
zn7^HZL;w*tRX+tT#tJL~R=r>f40a1T`~Zy2uWtQ&KIc^e3xQhDq_INpzmE-CH_Ra0
zeHOK@dR3&V;OcPu5j!sRNL5jIS*qTpydb|b{G6LyJkQK4pd{Fg?>q4iqZxaHpR3bm
z;U={UC5;^~D%&On8@nhS2l;?`gNdz2ly$b%E1dDKg1g5J>2Iaen>7G~6~lOF4<P>J
zKei1e0OzpOU|D&kz&q)xLuX5Y>y1m=th!%E0ioyeH?M@b-K=i)FfV#jzxC1ZU8n=Y
zd$V_wS>!+f&HY;3i7oD9!3R5!pG^>nkNYt&&os2f5m;>3M?%HOt|MThK9Yl(QsRBR
z;$KX);r`LO?c#aR_t=tkdm+`12-ENXu;O*9_o+QR@^@BTOsj<OmU2W13`80veca4Z
zfv|5I4P`0>+4@&1+_TI;`**Q5p9b`P7K)ECBr}gX6dg8XS@!#c6UQ<8nIleIy8Wt~
z`-G<x>T79EnHD$B<~onBV<$O}f?Cg919Ag#bQJo=E7F>$X_&7aW@wyrx4ADy@sa@O
zGO!zw^Hdz<hzRq(16r&s`(avWJFkZP-yPSJRtaE#T@w6F_+mrI4`CH${ze_aQ7`_L
zyxhH*kI>#Mgv9fLpyQMF$!#X5Mb}5}s?>^nGkszqcK9xl4}e1`*RzNt6^3|;{<Tu=
zrC1V2GgN3lNL+4N@U}%g2P|&j@cQJpyN}wIMJMd_0GmKR#a?3v{@rWV*2=yE0A`eO
zaE#@)yBr;8(U%^X%8d#q;ZZ|<kit!=bLp3<NXY#Ph=e2`=Lh{IhCURyLq$TdXo%1+
zmLS#*qk*BpjQtShIk<0^52@R`vrk5Xe?TiZ{LAbhi*?FLZNtvsvREG}xHUc*_h~}O
zRBqAW37Z1P?U4_>bx+t5ZzoFa7W(Q>mxsYVy{ue+q;;P*dvHR_S@~RVSEufRDm9m<
zJb!<P_AbqG&KBZkaz!5BX37gnvtiWyr~e?0C<V<HMCQpld_GeaE~sjD30pdvla@Vh
z%a%C5Vy<oKs}LNNf7m>8ZII@x-Lq^vSm`%B9%FS)Yrz(MwyW?MWrE4VOyt0hhq6*O
zI>3(6XR`4et{V<DUy<MQ&Q@>v`o%vArb2sOj1!D;F|Q0Q3&RB#AtaYW+g~JmO-5Pe
ziJLo2$c@og3&%kmkq!Zkg?w&^28`B!1uIg70*lxhIX$XAi&&Ppo`F@w!sox9Cerey
ziV;4@iG(R#%sD-T<=pXS8_d@=c_j?hmz5d&qT_!x3sW@c<^;T8YK}LEWRWjD-K=nz
za=&`Jnh@sJkhpQo4+tT3yzD@cPwOvp!GkCzCi$s5PCU(q5r=;}B#6{Mt~!G0dcR_a
z>2=BZ)c*h@r?WM(290Jg7wSXg)E2MAK-kY@wFw-(t^Fg5q@X@#TAN>H^y7yiLr~jk
z{RiI2pnP!0U2-cr2XVSByxbr%(LI8Qz7AYY4%aP=_y5SqN)(OtU#ORLTF(TmE*j-x
z8aaI>+uIkEgmc$t)`1;69+umYLcQRX-@7GpeGb+-kM4$mP-yG^Zz$vfLLr|s*#UnC
zg-pScutJP0`J2PZPU*ln-Vp=m`v>S`)@S#R#%Md1mtT9>6|a}A2hq+Jqm8vgOsJ@A
zjaTY(Rq!apg@tJm9hdQa*PMF>KNir+IhLQTX1B}}=iXupNtL_T{uVKF_J`M&5mNS<
zOP-@IE*IFjzX#l`ikekF5^{uwIpOrT1LlD90P*u899a#~tP8&J+aBC6gHFh2mJb^J
zUztbX&Xvu57lf#RNvqH`Bj%@MtrXVuQyG2F*t9OM=Vda~eBQiBo9`ut8z}gYAyc7b
zl*WJh!B+hFPCu*@+w!-5N(W#W#nxDCg+6=RFA3(EMAYY}+e>}1t{aXKYipBx&|W!F
zSOzl%qG+ypJa$i^+_-cWYa>22LyBp-sSJiYTiS&jEZ-I)i8L=uCb{|U9GO1@X2L_X
zkt$#o|8Q&Q<X@isFntQoVVC8zyu}*9FGV};?TcdUk20O`-7bEsnB4lnp63LtO~5ke
z^j5d<)#(*<99R6-b9HB}jbZyo6$mpDX*r5zNHTAAeDL}kOyp-Z9|&d7dCSX26<`Vr
z$n*DjQlMn*+i9jccK0U6VXD6|_p3&NAe9)<rJ3~cB5pq9FkY75LJ-KV-2|HmSufvv
zFX*T)$p^Y5*DoEZzdN@~VBJ`07N|XE7|;}ad71R!*P|CdBMwOxVF80n^upd+T@hgh
zN7Vh}x6Qt*2)gqy8M4RZ2{oV$n1)8ge@xtwoJ8R9@gQ<4u5XDCy#j02Lj~tmdKP8s
zq*xDbZLX5`<|=|ll$Bv>fbU4Pz17P*#PdKkqFlr1B;zTtu(~t5@;!bt84u<2zO>Vq
zB*rDc7jqdra9|@9@uAhRf0umPXe(OcBiSQ08MLPEHSq?HU3qIHGJOv(DG2k!V0><G
z9B8ZSndetqxLQo*YGMvP=NQZNSt(*BDXlq>cTS_O<X`aNpNv^*X|a{DLwIp3d9@JI
zaJ4+4+H7AxrgWhTU0~3_woz5TQn%KyrJ2>vzJ1q&Vu(aE+}@OmWY%GR^@@JsLHLIz
zz=r~3pun+Sr5gW15er5o!S-4afx`K7Z&hk0hxC5%9q#{@1?RY$cvT>)HmOtXHE*o>
zjX-U*&6|m*Y&qlw&zh)Nk*ebk=Iqz_&;hojqta=$CD|lCugPR%;Z6-&bDx>oYsIlT
z_qp_Qij8|GDiB5p@=rn8+n{|5(pZFCCl#36J)MJvJ+x?8T=;<D`B8)PZ{nifcN+D+
zzo<|D-F=VJ*0&gj-SWLul78KJIcFqcTky$;b*^+Ltq0681Y>E`071!cH=12&-7K5o
zO}$k5>sh&#!SgpUxP6mfU9U^tW~T|aG3aBi+uS<}b3z)ErUiu0hm`EYMx)XExK7Q2
zMsP(e4NKs$)6{&<4~E`Ngi{iXY8WGDbChGK!Q|g8VJ_p#)`dL@JND-9S+=?8o(Vji
z3#b{eH`kcU#2!k1T%e*Hcc}7Jac3;Opo3l3@I5`g<`1lQ*?E>(u*m>e=$XHZzNN;O
ze7Z1|#4pp)Sib$3O;HkE^5ApFI51HuEWy3f=Yj=xY<XCkWlipZ_j|ud#P1jE$onga
z-|AlxY{)pe_X58ik2B=Z>k_`e>~nt?vmrDVzeEC@?5Yb7hgMjY3GtriH|;F=?r@)Q
zG;8~Q!-j;9aS)HS)BJ(1Pv+$Z8gE%uSIIsvim@Afc#yes`Fb+TY*RF##ks!r*uh%H
zO8XZRo%;GJ%4lX`=d^TVPN<1tmB4ye24DyBM2+O@Ph6>yxS#%B0`WN|`M|I*8js5C
z1@p{*Lc%m*WJ(_NNCv-&(S9vb)KclF4q*&jz9)$New2LKS^Z8-UwNcKua->dH?_C5
znJ<sln<^LOYoGKi#zOrL9B6S5J4O7C3Koq@;SxF`)b@0@!*$M3R*sVMV(as@+(&-R
zgi}hg5HTXD@!U2H)<1YAWkghHuXBSOU7#3^5dLG|51=sy<Q^NORB8gLj}r@Cbn(kP
z&fih{&v~Xdv1vLPdS`RUV6rSBN65$XmeB2JeWl8>m@J46xnlnT&r>$-VsLk9f{GP8
zzYwI#T?cP!!kcOfTYwR^`nVRnY@CWX`M$_qA*rGy-n5~jdK7dzhvv)*YDm+*6G6q*
zMQd5d%oFXS=EIRT?@BmnQZ5||{LnNKB%TJu3}vg5HbO8ww9t9{<)hRPEdz$=1_oWo
zZ%g?z-IyX4I60_&NavMx$fJ8NU!>;5oLqjQ6ko5Vwx}ghv{GA5vKg7_d6D_>p-9S7
zPlhI20w?J9W+WG$<}pN|<Xb~a6#j(GX#>c!Ns+D}<~VblV`ug80FgwF_(D1R8@WTn
z5aXZ~eUn76E*nDtk8aga!T+_u(~H>~KY&Omt<<d-bKbH5W+<iMlp+8oOuYy<u($<Z
zLoc<SIrebx?_x$HwJLw7kT=V~%;Gm<Z_duFI;m}c-<S7c0s8)~D1cAo#>`oqOVF2+
z+dcS}5>jp=ZyevX^rdz<ysLUj(izwu@@pR3Bf@CAPqzA8HdUCGH@@52Js}~6_>v<R
z9?$|C_Y~Q#=hZ0iHUy&B`jvQTqK;{T`04Gd>X`^%QpsXg{B@9C6gcvayz=jz+5TOL
zNngR`JKkKX3a6jm6xSTJLPrKLXnN0_f0UoG)uI2nEPc;v%Xa>){kBHRHrJ%gbZe`{
z>k@IH*1>|lO)Y&x+!N*E-fO^G;6qH!p2x6KaK{v|b>pDq0Cuj~7c!<^f%xX%m4E&E
zX{tRl96sTOeu`F#g`bjfc8@QAs+4E^y#6~Ntwfgw8+C!{Y5k)Q(8NLyvI{g%lm3p^
z-MM_Na8n-<a5Q`V{Q0zCwXsf%5AGEv%}e);_A;MWc2<mgT#5Y@h?NyFOD@qED!SUh
z0eGX&AlL--&x#8IoJbUCxKqNSiRcGrAzFg1^$SOc+g`AW0Bu<Ae^JwG>K=x%cK6MD
z%gB3-yfj`)RF3pww1{7Xk}i`%d}p;ZBw5LXY1<eb@6rlzdxEtM)AvubRGaztYT$JX
z;QSn?kk9L;mv*wO?=4zkPB%-~!wr6EETBhW1k2R;4~WvU4{su^hi^6N?in!qIm)iu
z^{mp#mE-nV{LEo^0mFx9#u!9Gj_Rc7*BAKiSR5~os990;@;&UO*x5tM!LO#~%c~Dk
zo;6E9`3`<k;xO3%<L?%OhX9IX(wmYt1#0og83l%v`O<9uzl+_HG~lPM;x25_XvPs}
z524gMB)LCQ!h_bc`{rkAc|yMPDte1vimq}sE9PaZEj2q^$ZsZX@dcz7KzrH0gnyw$
zYh`5?;yF4}Ym<ynF%cT=R9VeEM;h4|F8c1#W~|X4Fwb&r)Ny%fs~P0T&N7%a5OaE9
zcvQRuQ0Ouq;KiBog&OR3`9X{=4z>TgLRaz%Sm^49z7?IfyI4puQSBwqDe+dQGgMv1
zegH+>0T0&ZA2Ph38}Z9d_l)VC(|$9$^#6(Ip2{3{rkktbNR--Utm!*zt);?Dqs3{J
zXg0-NR-kkkXUgt*^G*pxymDsz&Z(o591!hq*6Qu&Z-K>D;N95+GKVicX=9xLu!02o
zB3;{mlpSkY3_J8INO7vSKb)*Icu+5K%c0b80e@99*{GvSf+u#!|0?oB*j+-%N~px8
zH67!IgtCh^OgWeOHijAnu>$x5iQ+|mmektoE1OkGeVZJ<v6^`w*DZ-XcsQkd1-4f=
z;|UZ9PeK7oX8H?i96n1Q4sJ;_eo^i^3)7MU3Ry6bZ$A7k&i{`9^77v73K_xj1EIyz
zzIpBQfe_vE;7+@v2mFhhpJemyop>yifHt*OeQ@V>{}Ch3YjN6N)zyw4b(fqPqBOyq
zV(1uhG-)s6KR#M2B}-++A!Dtd1<Mr;4_-u8a1-|gdG8yU_)e6i1r(6sHbh^BMkNwo
z(}bjFTF?TNT*O`mxFuA$n{5J?lG~sYFErOM9S(ELsTzOC3S4LuBn^~e;9**@-jyW>
zr-lou3rDYJ0+ZoOX~S9#z7(g_RrXE9BCF`O3uE(|Jcg&Sz=qNMI2Aj}(t~-5uK6DS
zUqt!@t@PKrKFT7yFVXPedrn6S=Vor-74OvjRp9vjwv1vFeF|LV>D^!XIzf2F25fLd
zi)_HzC_Z%h^rINukpnT)zL#&GuPEO#mFwID3BUy&#|7=!%t<CJ#K{y9xMm*kD;)>$
z*n`cRu{vhIqb}a_hheMgq>U{Mj|y$s^GFNY?8eD}ibe!QnbL=d*L3ZsP!t9>U|X(f
zv#Vfz@hfu_fy%%@NC3#$o62QJ{DwczPlCmC<n2W1KSKqOP<q@PzoC0Cec|Gj;J0s~
z9#1!gPl)m@Wji&CF7}^rO;+90DI5j$IY>RzSmR<|bGEzJ4L^(!Qp(D9oZ<RbmV(dS
z`8KN!@RP>Gs_1*Fg##&~^;!E*S6&=1o7NadoRQx1TdAEV+{N*{dl3AE!y4H4G4xQ7
z>n*2t*4~s<ZLq46TzbTE5@5_2B?(K3Q}<@qs|~-i*W^npI$qvH2qeN54^~#afV(zZ
zWK+H>zg-~&z=GlKzpl+j4Yf94vhX>^97T;}f4~;cZE5&<b02?x$I0Io>Ok7k=4Jd@
zEgb#HTJ3O9midQetmm$C{f*%Z8(ClYEXM2~pMFzO+jc(FuYPtP8HO2E3e4klNg*<C
zN<ZmodTJ@>IRQU<qwJTcrM$s*W6`zPyZP}$l%N;Z2p^fjo^YqtQ>6UUlhv7Ll53D}
z8B7oCKWOITp^RBWJ&@I8_%O&aE#0{#;>Q?JS@{ZZf__L-rs)Ky-)b5Ugsa>ujC|Tj
z0)0h<s14M70I*{|%}LKeeI~VwV7rh$+K$Ru|A|}Br8%y7A2NF=mRsfRz@68_tVX$|
zMuog}25aX~+*x!VWMcZ<hupt4MyT+b?1|XB9&!^D`5Axy3wciKu{|gB%PFm<*Co$j
znYG(?#PY*IWCtms*fH56t9UZec*3hr$r=C)c(VXf=5r<AhJ)xkuZ6#bA)>qBb1+4-
zx?V_On-Sj!2PPWa2*t7mXnae+Y!5-pYYB3s8<c7UFl|FIMX098J8bIEgAdSjn3~-W
zspS9hQNZ6T(N@?$<o;+;l+;v!j(KjS^DB41BbL1)G|K9utLvb4!pmm^x6cF%PIIsD
z9!|8#e-%;gr$t;%<~dDgL+^eOjQ#Ps`ZT>25MxX3Ofr+SbFxOALE<HpxZgBo=-d}F
z62~(R!3u!1EIAQxlj0!NCVJpO!K2QWn2>WcBor5U4Dd<UZH2abs3f&@PBQ8t9;Mp{
z*)hZ)wCuZ_oSdP^)cX>BRg;W}KZ2R!+F`)06k?BaFbCBH*EB#LWv1ih;lFrSD7D9?
zD$E{2MyCf{={l^mc}~c1-p})tcx`dJ13Pu{!J=f*Oe~vVmh$PNhV`MO4xOeeJN`e1
z607@;LqC<?+MWfeXe!756lR?z-CCMzgB1_UP8Xbh^I}lsX{EL*r!H~K-$4yK81VY|
zm3UwJ#`^PCe5nmKg-VR1`C=b-0O>`!t#U0Aq7x*rNvLM_`<bu@_g@)5Mv;l8{j~k&
z4G<P?vD0S;O$CB2<p68U7WqP|WH&%12Z}PtYx`<A_dQ?ZAN@d&nI6%dQfEz^zSGJE
zKJ6_pFw)fWW&hC)!JBbB^Ua5+fJIjJBjMDSilv`tRqSrFLn|aS-rGw%q0YZE;Ou_E
zM$l9B$W>~6DM26as*b5N$*-~_yj<WwwpNOK_xQ1lm{NKz9&sLwg%$SUYqs{Lby!%1
zy|LLO{3Kxp7Y~7UMWvL%vZ}_GcM3AeGSFK@PuH-Tg0|rOJ$WZ^c^3|rbitDTT(5h$
z;CvPVMhjgLrT(uhD}NC*z8y$=L77n7<VR(FC+9djoYRHJ?X1slpaF5p1~9Y_9AKj+
zE`g;EwWg5=bHy~v(4X9q>ZDZ+!^fe=9||d0-waeadg_t2v(islzq?;09HnC4iZ*Ci
zh1}*1*8gyIESdM{7<0+h`6mV?7aP;8ABW@O!%VB(cY}Wo2fZ?g*jl-!O}=uvfD0!R
z9P0RmDc}^sOhiBm&de@KEMb$1S+&gAt+;SjR6$tJ;%UD9B=l+k1>b(8NekukPW*^Q
zR0zO@iPfc&t8nfD1wsJWKvt9kTX?Q1&nSr^_-)ANS>l+Re<W%^%V7@Ex(^ZYQ{2A%
zlp08v?jCA3l&R1=`R5lJI89q@^-lb!RAIMmOSr2PjpBeLgVb(grjhZCS8d-c@@uFe
zyY?>wQ(vidwgSDyQz0DKFpX$q1o=M0DSG!y&pCKs=aI8K+GZW*wL&UjKSS0%;`<x{
z0$;>^MaB;~jPhx%Zz{H8c=Iaqi?-|5fGH$ggqOK%O1SVfW<9T?6$t$yv<Oz2=VLh=
z==}o7>fw&0jnT`ezd9)0XL?yruvxbcU(N|ibi#Xk^}ycP?hlX(_6d~UEgr>n&At)_
zz@NV@5Z;plY>qV4v+hyr!Iuv~=e4@B@P|C|`=yHnuI>X>O@n9exiQ<<gnIH-%(1<a
zNag44o?~lbbq<M+;l*ONnCp`$KeRp1S+`v@S9163+DhIk5}r?ybb>bxyKhD--3VOH
zzpm?Q@Lop^`H&^vam?Z(cZZxLZNvFBh?~eoh{^UsPCyGq|22*yEG!D9`Pg4EFV>24
zksCh`)4B;(c?cy7?*o6@Hmp~d{5%Yt{23uh8m6qqo-J5~CjfW~<1IB3#fCV2J{m-&
z|EEl*i9K+YskFt=_U1NpFzD5HJGKu^%9xYV_7~W#tvg(WXJ61?-A3Bq3J}p6e7(#w
zYdU`2Thbvih{88OgXI9I3^@JAw@ptN9E&#ji4QXao&t6lJK#adF(M4V6OMCv6WLyD
zASE)e2KBfhQZ4XYXcB6+ehJ@+^=iEUB)i=^nRiyBGBwvfw`}q{7fjf2;gl5EUKSoS
z^JV@P#@2Y_*%kt9lP8Mwa)E)l8>UFoimVwTAOI~aAAaRmMlO}WZgK{LkEh!uj=u=I
z)AY7dG^<zVDnM>gn_iw%(1pcAv0P8NkMH_awV(A5-eUpj7;i!PEWS6f1Oug?q=#Cn
z7kvV8psD~hF?d~#4#N%7u!a+lC*YH%X(A`bEqU2&rNxk*I=FKoKlv~sR!B;LO2OlH
z@r=`Qwy^SVq+P?71lOxj^QRvE-Oxlliqp!U3gpf^fYur=;ui~R14h?$#@Z<*X)*DB
zQ}W9faB$e-@y3eNR#gLIz$}pIa%2~`A9O(6A_d9rmvTi@l`QETR4%1MM;wGcfAY(;
zi|JxqlDi``Pfw-meSP>u<fp`(h48iMs?0RK#jib93ePVJciMatc`bhWgKG9hmgvC1
z4}L;`!}o#Y4(%s021O}aMA~cxIav?hi!6arA>^#h{D~Bet3x%GgSt3zhe&jtV@~at
z2ou@tW1f?+S{B8$N#JHRY|9QV#syrLOYM|ohml;uc_V5LWe(K&l2j9q^g`$xX_GQJ
zKoTBtcLWdZV~T2oL_q*Eic4BaBiKsu!H~R+0IRD`lvaQ)q67~!=*6P5a;fjK$qIb0
z+>)2*{^GrwGnlo;Mdoc8W%b*~wnSV?7=Ftstaqm{=FNgJL1<COR)Xn79j`I+)`OT?
zF&_O<)B0s4H<8HdiT+0X45c?N(&?=0@~znoRKON4T$lKXU+xyyyvxuU%+^Po80FJa
zHbl|&<5TY2lYWY~117mY;)Ve8wbGU`k3!B*E)h18j0yLZe8mWAGwA_3nvOL{yy+{X
zeG4G%iMj@|NMFxeMauRng2vZ&da$zz4p1J5zXC?aR)2w@54q8)<s0xFu-}fF-`(j0
z^&yH6W8{9Pm&f2Y^`P?U(Ny{jj7$F;DuFz9@eKIfp=a6#EF~kaFZw@_Vh___y3MOd
z>yl#YQkWE~TpjwrKs7((R1j(|`yFp4FWJG5WAuS?h0CMQoyVbfoAb_q&7ASIK7)zj
zq&E|HE$xt^KXZzo%u7%@`d*;^A7x)173JHmtB491l%zD$AR#R}C@F}DG)PFNh#*Lg
zfJlQ#OG$UP3Imc7g5XF>cZUoy@t$Y+?S1w>=ji#qy_SFIaxIq+Gw=Oe_f@YIWT$SN
ztHYjwU6@@`hxcx-td02AdD}E)`!+)c3JkQ7csHBNmFJssEOk@uc#hJ#bXwJL>nU^E
z3R~EgNyOrNs9g>+n5UhB>;VBU_wFYCD2&7`hpR*hg(s){e@|8(_DY_RXQM0cooj50
zTr_P7ZwA^iBWU-A%S-CBDKw)zmxr!8D?g@ivc6Sl<l$U9aBcE3Iq^oA853b{DB&;@
z;pS(%UK=>iXNJyn{_4$Y?5{#l2UWFfJBE1iq^XLI6a{A%M^qa1{6IW~mL_rSoe0J1
z{%pLgL&5=<8d{HV1V}K^`Ay5w{?5UzJ<=lK+c!)-Fh+zp-#tFu!P!1o!i~DA{lW&N
zZP+1nv^RAuz_;X^aAVdIe>yKR+3lM1{Gs@}<p2K6z$ZCPMrbESKwNQcdmB-o5Y93<
zXn}k}r`)4CDx>oB;eq+74yhfz7uU^iRunt&o4nAXtWGcf=``wT#h{jbS*>~ewL*>|
zibjIMzFSA)$#1@2@*zQ}bR6qc&95<=&X2#V9&G4K_;%vtNvn+GvV1i3_^(@q5^151
z<$_(#JgHC}n@jBuf4z4O8h64qG~%t6o!^6GkL-}8TY?y;!;g#5{L(s69p=d~!ZW)v
zC<VUw6p_p)+@bz`YzckvdkS(u@D)wt&-IJ}Je85HNjiTXLcn+Pc3Sn_>9Yg71_ivC
z`weEQorflIdofzUKU7Pe#r2(v;&a^Bas2qPG~`3^Y7VxYfV{;u8{CYi!4+~Lo!%vJ
zmgU+z$Hi8=hEdlMuOx5z?^(h2U+->Eg|yG}QL9Z$X(2<qn_e7>NBlZin!rtHn-lWt
znu}J>Z}|qJGx)X<Qo4-RIDj<@UYw&LVn~xE0U}H73XKrdJ2kkYdT6ayD8*?vSws#1
zefSDH94uQ#SyBIv32TI{yfG(BNA7FZYvzKUhXcat7ERY!jrWQcbmBsvX*a}&%BI#j
z?!`xv&wo9x)U#DA{8qgGdEBJR?*1jtQbr;@`L~?XHnnc!CgN$R;{hC(8$)}gbm~@P
zX6X7y8NXVZSlQzBshUAwGa^y1l5Me3U)@^|!;O;E&A+|<0=Xr_$;iXmqkhZ~cj?Z-
z#S%V;gXga7`jgT;DU6c@bhFnvU?oCIu5f4RljkeE0rlU2<+Qb~;mbc<FGvKAKCemR
zJilXXR`8>XWxn<UUr!#<os~e{<B*~lp2N3a+GYi@M4qw8qw|H@zTR^~9<}%d@-%*d
zWFr_g;`bLFGa1QqbQv_B5R@<|Su<QW*%uYeP|Tz!ZaRm~18g7=oh~(vHTpwHp+eyt
zX9G6LoG3hTjXVvm#fe6oSYZd${T}OF7}X}}sxvu+g7Lg)1`8z|UNCDNRdi7Cg=PxT
z!s7Y0^br4rjKi&$=bW(g0eHKoP^=c=UnZspVF!}vik?=N4C(n%1HC?x>wJ~5Bssy|
z3N3e7#69kDbPRlA3O~?B1ibK!)m9)z>MhK2M!GlxrveJLber&ba(Q{36~Z=yPKJ4-
zc3lwu1{4|e6dxGfja%N%s<uLyJH2yDCDoM8&D_T|)FGm#*N(9WKU%2hyTP22`PbI_
z!?d1rGVkUdCW)1j0)?Rot5Nb?_QM6g7F{pXo+nGoI_ByT(C@0_$Z;kpDGE}%%#QF`
zOa}n)+v?H;r%y7poM1p&@T3X8@hL{@XhpvGb4kVeX~EDbacIXV_3??kF9}LjO*%i^
zzxt=~1BOx}8Et?jNg4NBri}R7-%)3T1=!+GC%zQktTBk{u9e`du?$wEI}%k<_|OrC
z);cA^6#LNOqQCabAQbU^$4gvKFW;ZuOV$mDj>qOO)>Y}E-tAinx9eMe)I_VF`F8#t
z^+n6^o8O*!R_Up&Pu<Q!=804zLIfC>O6b{*f%JDc12^dawDZsW`DF!Dy$Es$&9<1{
zFgt<C{h9LhU`zD~*QtB3`{>67{IC=TM~O;0K|OyMr$8YsHA5zbq?ix><X%xZ(m^Ne
zUv9kjVjz^4)|Nb|`{oSVGNdE>iiR7ePK&CDD4?cfw(@1ukX`cQZ&f}7nlY^SU3zm8
zBQk`usN^V)j<{V7S=CXZhp+l7)ovnf*jQgI@%E^wzBk;iiL**c?RUdwE)C!odbQ2l
ztgkGcyYXa5cqR(!tH>@`qTM6q^IjmEp9pIHwYH5Eca5;el%=FC65d3FP&72b);vZL
zqm0jR4Jka|8jrUZxN)6Xmm%01X!g@R*wOfb_ZFGrk=>{Kw|G;-pUxXe_*}J<!oDDb
zbNAW6geO5=T868o;<N_2m=f~J>t3PcQo2ySq&KVIzss{+ek{%Ypv1!1Ak6Mgr$-&!
zt5y7o)Md#7mWSi73pTHq#<gDpYolS3=ZN)Kd9mKgJ*z7CEfWQtUg<h)O%2Vwrt#^;
z0M5*L2jMaPIwYyR0k9+Yg)f^lW2$i2Ao}jdtRRvB+}>)KLT_%Idtxjwgg*9uzf`1}
znC!wfg_xtclN8)VnDDu+eiJ%~8c$9ybOs(J@SqMP0j=TNs{*D!UXZ5<NCEL5g(R{Y
z7i<5{nA3f#S5d;?9xKE>o2WEVG_8}EyU_5ADr`47cy&2?x6atzW1-V!Ou{|G3P~G5
ze6q@C1wN)kS4G@>bv;14M|~W@WS_EW4S8m0Gu|5`rH+6m9KW=w&pu}M+8Z`JoRaH@
z1rt4&1TbcL&k2!T71yst*>`IW-fot@r9RYCpvK>>q3=fIOff*WH`U&Yq;1@Ps+GR6
zjZOZoeiO_sbJ-IjU6KNPcuE9nH~#Y6aT7Gq-|1*tHJQCBn?;+*9=;>E6BpV3Ii9?w
z@`}8lh*Mu;MoOWoJ>8F=au0aRbAz;d?JD`jY7?WhBT*6;2sG)1z<dOh0-bNko=pVL
zeyG=|H-?PFAORcY#^BLEf|y&{!Q~RH90$I~^63lY{i>_YY<IDTZx#xhxiQWHq12FW
zF==P1f<8cX&Yq!8YV5gmw`&@66s&P?0^^R3*EWBhkVWc;NS365uGS{rG~iqo8Uw^Z
z!VO*XY9ZYRoW2i^5{z-3F7^~VNn1{LoX#wqhV~70zNKEIdWS#tM+(_(B$Z$s-evSB
z>?7<~L<(KxV^W@&k*y=JYmo6X;igmcl8OB;*RJh4U$s-raP~uNmU;e7<7ddE4U)ow
zjQgKmOE;==%r9Lmy+w49&s%$eiZ;UD#>V1hM$fy`9qCq|shjj(Ogp3(|9Z5eqfVVV
zh7tX>8u}#%veULc>zsvgae${&HL0ZVEq^*xS8zR?I-1)uTuK)k--@T18S{Hp>-xhF
ziXfgn3(g5HTjUgE<8Xh;1cPYeUEN|B$Lt_ajDf%%B?pKBXUQRwzpP#72wDq;qvx7u
z&8P}?OunisFgNUH-n~zf%U%D@lH+H&ncNX|cx)v5R?dTw`r^r<@wp7o*yJPg)KXTF
z{t_TB7IlPY1^mb$23H`GTK7%8W0d-6dhze3QMV?ICChmyhH%|)Zggm3bU>kmz^$_v
zHDoIL8&9$+tl#>yjoK;_LlJtlOf8VX#RO6Rm`<zoqaY+L=|*b4lgfv-hTV6EQyUFA
z9-mlTKA+IDLz^BVMqeJoN8NtIyy5*2`h3&z*O~4!_}OzF#A?>2l?%xJ_(7f`FyZ<z
z9jm3Sdf{AckIJJD=7zDzP4?N^iDEXg0{7&}58aAJJ=&jNl)d`o%J|5QJA)o;)|6F!
zd5o0st3OrKbLD57a?OU)W0MvM>f<J(zPKxWPlK56p%^5dK!pnMH-)o`nc3A)SLAaF
zKFOKN=sV~DXk2xhs!3Yoyt#QBo}|;S%9)!cR|dGw5%Fok^w&k)0npnXoA`kIO5Xzd
zK*0yvBjvfgm7Y%y&d{t06#QEXZ@m9FSb=$#&9p@~hFovNGVqPggdN+ZvTxda&sKuU
z2eW=N?&MmqclVf+UE|hODX%v;H7m{(Kh~6o?emW#vb+Cm<Bsh3>$tvdX`ikKa-TVp
z5v1sZ!-uuBc|U9vGpaq~B4Y^+qq`l?a;(NiSkskU8nIATAA@b=^<RM+*!;OaDx0L<
z%BYGoOLG6H3#$f?>0tiO<G1Hj;-Y_`Qh_2u4Fg~OiAG|Rk_<W}10U5HxVi`!wZ}s~
zI}^5gVl>cNG@7Cb8kT^`3_Hr4sG@#n@KA{5t=3<XFMbVpFhju-?za_tsZFY~`fXGh
zIrjnbyAtuoYeH`&uhk+$&T(8RP=I%wvcz5cDCpt8ib6Sk>_}IW7t=Jmi!PR*_WM#m
z>Gya>RWV__cI?g4hzUlfR`V!86w`upxiC^Tj__sFZ{(LT$@U>11u>bk(ArL(pX)Jj
z456dw`%YAlCeK4$D3Idzu?0s(YV2v+q6uwS-6j}T=E2kraKsd@L;5;bts)PbP0h9A
z!MLd<v_$Rlm7qhH@GWTnQj-DuTj0ff)hvvc6V6%wEsNLux*3Uvvp=Nv7YLDjBlko&
z$ebbbxq+YC#`GeW^YeL!$0i+hzGH2Mp$iUi8v2ZM`NMtB)X`_)1e#3OQl!btgJpAZ
zUAQq+_*30@eH_b>Z;quD<7lhOBmLAVt@d38a<8c&Tz?g|k<f(M4E;-T;?iooX=-&(
zg4JZ*j#Fw8uc+ii*r@+dbRZ;=;s^Lu4$^NvMDy{Hl!gwRg{r0bFO|_Sbtt(>Z3@v3
z;eg5zTH>A+bBK#G`ayeZ;{Y~=^zpt@gI_5}6u=a^&O476EB`QJKd=LOk}l;Dr2%UT
zAN2arp3ju(;IfFPMjGd`4J<~1*^`M}85Y8=Ye)Y3%@<peqqb31?fJ$4S&Br3JFsD%
z%VQ=T@sSR<VzT^PND0OUcRp>X<60D%w51BM1Gu5u{k`!fQb!2NKpjwcH(qGb-LCzh
z`fRQl>hN~SJUS07y?4OL?)%hWZG7O^AWS-3Fm*N&i*?VOf}==Gn48+JBqL(s2)TYZ
z>NrB*TRy!TYUszMSNF$9N*PPyFOGkWU(2U^4tTCTd|tR<<II=PHLXppr#PTCKB(85
zLo=%*<vZ>ryBP0Cov<9zW>iG<Ks)ebKaGJ`wIJ*HqK=nj&a3j%PL7ndRhy2i(V==-
zQu>GTTcZ-MFZH;A>GNRZ$6mo&<WjQbZ9Z0;QK)eeWFwky`<J!oj=P7l;SOT=NM|RW
z(8esw{-aTj7T*5<X_Riy9%>Qep)4_2m(xzkiT~yH@FeFZzjKySV4ho8=dr+caEEGk
z^qRZn3ja+K$=~<y3OPw+yJw9&bdpiMcTX!9*_@f8lcy0PV{N!J3G_*0_Eb*=Kn{$`
z=DFRh7!uMeo8%aeyCmUIY0ij`1c~l;*$|XwJocK}D!fb;0$D6v#wbcp<@i)2eNsy-
z&nbKGJ~I?TK71dUH3#A`f0QX(|F=x3)8}J{i|HT6bRKLuBcP+wr<Y$}!0WK=fctns
z*d+=)CV#}QJg3j*Wp~rIF_J&2e*1w(>``P*XtR0A$c?63Dx_>q-03U1Jki%#{SC>p
zhYC%c=T0p?6t`rlhV60*iA4$mwF(UBDnu@A<p$6r#is)u+|#gVuIuQh!+GoJKs4!z
z0xZ>h5h>5Le41@UYxRw5qVuBZ4Cc5eVjc7$fJ-hfL(x#;4miMFpN@Nv>>HlRi<*IN
zLjmK=6zWF+%d|CAof8biQyL7f07^Y0n@#e^qX3alPPV9Ho>llKuh~Mpg_J6v>&nkN
zu^C#bZD*jXSRKWN2ht=taXMFAqbLM;<6f*VGG-m}ZN({}XAw<DcZ4Ka1_HsNV3KJ}
zEUD?uquOg;jc7SD@<9pByo>!Zya7wSGlp=SdJ%zAM}LI?{ibtX#;XKLTZ910jbV*2
zFQ;R$!mK2I>9}>%f@hl%FI3j-V*{boFETjH4R~Ogpf~j9hVQ*8C>R(c<mkb?_*WTD
z8Mbl(w%|_Vh@!KDK$cYc&UNR75(~Sg{!rG+$GeK}%9hu)TONzc#l?GZ-$jImvKQHX
z%`2(B;RFy?ZdT~;3}SEG#;_IKw#6{Fq$8~cfeCq2qB~Bi=O(xp$*c9f4OjKH>O2|*
zCmSZ@wil4J$w6wKU$vL~;MrmPI0-JHh&GEW7H@XA9^B(i{}_CJOQbtLk-El(JSXVx
zJp=t<A_4*>34(O^g_eYKV?Pgj7Pk>k-aQGuo5fxt()Vc?b7NR{36`1o4Zb3Ch>Mt%
z{s8r*_3sTp&dKIx$&kSK)&uA06F=y`Fi=MQvw_UQ>E7A3yraHF7r>qLoJHV4L#;o-
z(`e>srN#}@_@e$iPq`rTIOlGbiEBc`b_HX(3v<yX4m881;Vw4JEeS!FfWzg#X_Tvv
z!&FdQa!qv$i%|(R;T+mqtuF|&=6~P}-=iWdh24>~J59cMK+rKi2@10ust7g%TZ`ig
z?dITZ?MVX2YwXAx{JV=1jGGAYr4`)H=YvD91`@R%E2!u$2CBng&9Nw@nWd9^;a@+D
zeg-qDemrO67#=)&_?5h<w~vdixSL4lO{QW-&|RT1hSD|`XOrmbjVU$9g{D76n5_0m
z%nB81Kp~OS=-_vDKTT!=1F?>xR{0{cYYqR|1+bs2T<kV7MW$67efv-f^SmVGhxc8*
z@^yqx?4#1KK-u1>rsdoD4?Us_3T@#HhevbFq^Dp$YgA1aMQ?H9B-%-toVZSGAh&cp
zcCo-9<moyMZ=F!T0rgUOA2Dl;sIO(4|JHnec!;}&!%LM5%JX=2a+($ZJjMVK$3G1o
z@b|!fFAZ$yk6XJh_6S}3(oH~@J6P5AEDcDNw?ng2*aMu5F-n$|!F(^D*=Gc;j<W7#
z`Z!+@0b%(hMlPb?EdL=@KI!Q|8W=c?A^a;3Z09JGOCKFoeLeHKOOx$2s`=})bE6&c
zH%DwC<YGfDOjn-ebAz^?r{yoe(rrGF0D{&aH())c_ts3d5$|b@yD&H0#X5cuwe~LR
zYnxwNQd}5LsPL^>?7tQGU}=iqnz{eL{fony7Ptwy`c(fObhv749O{Xu&1FE=YfYH)
z`Cqu#fw<;<G@NgKUMQ9xsFVd%pV<CUDK!#^s^m}E%^|&ZSKGDObi@eDa$(M~NF8|+
zOEuEYI)<*RQ-vZR;^$Ye!p-VqLolRmo;zD?d;eCSh>OV2$LcpEfkFxAyZk8uyLOO?
zZJ(W;ORxH*$JZ&@duzKBvSX4)0&*wqmMhQUCA>HZQeQ(UZg}BtUnLwl<qb6jZ^3J)
zN=|=0u?xV#rFeWJ_v~M0tOI#Hj*QP+znJPy`(cbN)x9Haoc*z+YAU%t-ScFUdM&m(
zmmat?Os7omGqq_&x%sZu&y)@Q2?cbEkOyGboC!g$`aEZO&6kAr8xfwga7!RddcODX
zMVdmS@DJZ{8?v(%McRETqG(+o&&*;K>pM5&yeUY*I@WWG(4+xd`f?8Layg`Bp4{_2
zNIxeWy8LE)MuYdin|#CTtv5mZE(v=6Q?);}iI3@6z4uMD#uQu(D)@YPAJN~634@^c
zBM{~aq6EHCVB%o~9~*a~E#V)r+bII}(<O=@-oAT~j3(k#F#UQrf?A{hnWoH7m-D$-
ze4p=Jyr<Blb9xuO!FWDj%i7}d?B%H?F99hT&8yYvz|jo0<OtHzATE$5aB$Mt{~<oq
zWGJM6_Ptt9ZEE;u50r^u9f=!;q&od!8}J&N+$7rg#*5b`wXJ92F##RcOgu4UDJ!(Y
zRrFpe!#9@NZKsY%$^E#W|7H^^-KLt;nY+<Z*ReXhfl$#Z=!B)JEa&f(iQ-JW5x1c}
zDsNkk4vUFlXyliI+#&qZbTw&&WyE?AhJ1;W4urhG`g{ifS7Gj<bAJIqoOlby8=S$b
zfz#O})VvOv<)#y=XX7=lAdJ*`T}BpxAD)bA;l>12OF}TGRk?`WBMS|s3#AL_l1pMA
zV7%)cTBy%sTtYc5E#L71(rr$ZHM$m=iyhT##eH7wia~L${fBTFRsKx&(t0)AdTk{h
zo9=?9Nk@$i?xb7LEbWH3GecLil+CX<GuY!m^yQ)w1>*@NZRfL!J`Z`V<7wzU?|o#@
z_R!5G#IkA=eCQG96mu)>aB(h-u?f<bstX9nRKb7P>HrMw6Vg`xMdlg^Yu0vHvDs<1
zit~%tCXsmbdS*E+KzB&?es{%gv3I-mV<j}J<}#Is-0xcfF-`l_P9KMFlDw*M2LX5|
zxV37hv3aT|O&9`>w#?X|%R^aT(0V0((F4eqrpVWi_ts&JuQsHOpE=y5@5T$4(UBK6
zqA-Fey_q#>oydD-QfYlxAij%CL-ci0xY;oepawk=Q`}ix2Czo>jb@}@D)T*#*E~}t
z(M^t!?b$;UpGGEMT+%0ODW)2oUj=zgKvI{Z79bp~2JW>d$IPCibMg?s#AnFZ|8bA}
zX+IJ4D{==htl8Jj+{z2InUo*PN;GuHUBX@YK44^AjTG(_NU}-g*Nx51%x)V_)I7A%
z%0}u<B;s;l8_BUYm0uDxo~_CQ>OdS_r7d1#rE)g&yE4CHvh9Lx#gmEurd#IdWPymy
zf9aOGjISYV?T+rc9cIPYZow%PzKyI3x_%bnC;Jq^XWxnUto`5WrQDH;k7_R#-AV7f
ziF!MK8IPcpz{HaNl4JLmiM0+S^A>KFhsr8<3f$zcO%&nUHfjgtj*>>N7wp>!=1Sgo
z$`6;5DM`B$F8SqY#=XyEY!qWo(*!hrJW=!yQi&!!kcn45j)*7M7rjp&L)4}l1g1u2
zY38^lptf{<tZzI>Cp$QJvd1_6BG$VDr+Fw<`|K%5nPB*W`)=YD%x+G&)WSasCK=Vm
zfT}4>t6lhBhd@qKMDvuBFA!-z94$RxM13+AJ0QZHhhT7E%(GmWaN4{3880-O;wLi7
zy5n!+!Qpj+8hXF8Mh2_$d}+#xWVW`ItBH#R4on;p`Do!&b15(Rzjr;K@nOhns_;f?
z084kJ0m5=E$yqd7TJ*FNC3W1>Q4<hh<1vhjD0O8QeAtoX?)!9@?7-I|i*YS<z1RKd
zvqtK-eU7z=i(@peCI6dxnelK@cy}U+GHM5b1)<BU@`PfW^-O7a@v?YJ5ItxGReIm_
zcPlew{1#Q@BR??>CzXJDfB8!2n)QBbhERJ5sj<${@($?jPzM5sYe9X=Y{f4s(~@}P
z5OkyDZ^(g#rA7BbNKMh*H*#Iq1}R<v(Y-11bbBrtoqweMlQ=a$T?OX6nu3(I_e9+`
z@QX&QY+cc90JxQGTWZMC8Huv1=@5rlB;VYC$(!<!gmoG;wq=&1Zq({tqmC&H7Hcxd
zDdCln{HcOwPr%#MTuVFhwHT0eF}|@h_e^fhlx6xpk(q%944qy~-ycs`%+F?dW(s0K
zcH$8b<KxMK#q=XXX<xb}1TE#^lw+%8JEqlr-;Md%NivcPaMHI{9!dOV=(>@OOb%*5
z^sp!j-|RHmJDuL|jzE23Us=2%Rg_sYE0v#$<M?6;D|=VkzQU1mtX1}EuR;GOtXNUU
z57Sw|o+=R9cK46`KRRZ3jk18r_yt~B6V#T0Elim3eIxMgI3S9cXuXT=hirT49vx0S
z-6Li5e=;4tEGyeY5Al_uM1T;~^zn5=8o>3xFk3>Mmue*zFBWv9Z>^rDAjUs&zwp+9
z$i#Dh!9C?_rXX6HsW~JtamA_!>G__aNA*<BBldu|YRlgVc=c~95UB`$9#nsvdcP&;
z3MVyuMQI?b_#5nDiOBrb^b<{Jj)>*OO9E>0u1z!Y{O`Cgw;hvXQ2!+Arp%ziJP%?q
z3Z(M37_g6$Vh;3Y?6;AdRG6|i8WB7ZLliAMKH(s!RdRV6z`V`<G7T}D?$7loO`mHW
z8CAP!wa~o+V1-G5(5h50FQ9TzH36sAx$mlO?RgN+y6CrckiglK<ue>Cp8*-!Y=0Ru
zz3f7ziXWOY?=E^scK!Tf7w@beoukFApmm+(GC{>xRbJsYll!Kh%Z?d~hq8)!Rq{J{
zX(XIe?9stQm5|f{>)Y|yfsJBlwF`aIM>chq@S__J<8TA3#x06M`5@*TF*KuBO!b;~
zB7q21$-#e+%v;c<wnLMMpY$+#RVJ&C9RpXFq8Wm^i=BOzkCspyQFd`>2#km>HjSMz
zynIusj^4`-lb4QkpFkb<dt;oQ(;jW$DLssf4s1z$WgMe~_~EI1X%A~Zm73kH&SkdX
z7-32JQUg;9tk0|S-E?5s{u{g=ZOWsU3Vy$dYN~VK48gYK6&IV^!ko*Ho?5ky!UY0y
zqTOA28oUHz#uIr2Z+AYtHZvzH{&KS#NY?g}&Mv0-PBHrM>UF`5`qakCgkp&>o^QNE
zsbg^ZXSd&0n{tm&I=>l8ZnraoP;@1?<1(;n4l9yQU{Tv8F=-)%&?1Y>-ZhFGN`eGn
z%pOkB2!kzUX;GqP@qQEBr27EY^f4hJ(-{9DMuBgxXYXr<zkHys9M7nVI_}Ri#MCO@
z)9tPsLlnC&s}%<5XtgMZNoq|JPYJ4ua^?hA&LMIN^%Q7}9jOx9_fmDn=bxu-P$s^=
zc>3LWu4e~}2}vlf7=b4d4!<+75}#5>5R=<CD_@i;QW6*3)YaE97@Y7gD-oSr_pV)|
zxfO&Gv@WG*znvv1kw}~zNeoO|ksNBbIuqe~GkifTK=Ix*4l1k7uzx%UU-&J`HFBgD
z+qI9?@9)9&ZNA#;U%2nQ3G#kQJ20jfhY)z(*7T>3QBj#EK;RRgWXBW&W&U>FYi|vu
z<MmW8iA4tmEdHdd-@ll{cY`1*G+(MPm&M}EsB7QD$6rOYq9Qp7oTmrrob5RF1`V_>
zNPqi;9lu&j+~eHbaNci>^ZtvxQ$_qwiE{FOb^mC+J@m%1T}53a8M}7GyW9EP64G}X
zgY=@tN&@gcN~mv~^L{V?w|1z*G}QT!0L;@Z*pG*cBz^&u^<G%JS=|M>9YkxktN+o`
z?2wF!(U*x7$|D>jf~Z?wp0j?QqvHjHfd3D2f%u4wPqE^|@lu}1O>XUTrs5yJtB@Qi
zk$1|>8lSEEA}Y`ySfm#_C9kCv6^q#a*uw`)8)|B}0LH7${b#?~fR9gL{{2Y0<HTuK
zx?%(0G2Fi1oGT1He4%7jdC<4q2D9{(lGB%B>3Ad=G5G~z@&vx_x2@4G%zz|8bEKAd
z=f1buq=WdV(cbX)#t$6Lcp|$i<$_~geM^L{Ta|V!1Dg5RA!utV8$Jf+7UP+BCwzJ|
zo=>N+d5;g9{=s4>OWuVf5<zK06jP_^+nX7lgu_G1T=^Gk!inN@SV&?$>^m+lXc-yw
ze5hr5{>V@4R<W5*jAcPZyLa*PYJ@&n;^3>9WrYtqhuz!usY>?u3v?%X7UzN<4t!jP
zSI>2Mb>J9bQ3n!K%EN*`)=j?JNo_RMPzj_NqFr<&$g9@KmS*AO-9r80uqY($V|tUh
z`o8b+LmIhzn3tH9)TKqlcwI3)`)39pBFwKgAX}HWPka}-6ZAdJ42bUy^jU!o&qa1z
zx)Hy3VQGdj!TsLw3!YcGNe|1WDxYs!<iEvthyAOFEntXz4z2ZeWM?m*|48+)XMq@V
z_p8v@Tt+w(Z)qlbdW7`?_o*BPM!8=*{zcy$bRVi+vYgEUrEmg__NG8FGS!<Y;w@{o
zc`PT#-g?0({-O|0bgpK*z3_}R+F~O<w9d0~`Nv}3ijMA;>sVb-rWbs5;h!=+?b*k7
zrrP1WnXS+_X84peR6Oci_nBz;GCqEK8RK-HOwbh9Fkv59hQleDn@dBp(7v_?d}(Ac
zs_duD3?7sJYwH%RPDBX9d1{&><p@Z{m-o1<goeuOk&r@596w*h8>*|_b*9;_3?70n
zzI0gbr%f`EBC{rXnb%w|MSDEmD09*ivtnQbb~XGJSX(dB=IG_!jkCx(=&Rj}kQQWz
z&zXLHUfVOGKEkZkXuHB=8ax~Avw0HnzU=(8bd@c#T?^A*xle=}HmlDjPmvKv_kx7U
zOPeiuA58*|Z*d82(9?c_zVWk3bHI!Tm|VC1m=%h}<DS|qXa?<tn}Tg4)i3KZ;Srsr
zB!EEYl~e-%B$t4{Ka|Fk(uD>FLghv}?+6&jGmG7r$?F!ZsQi>0$A6#G7GE_eoh|V?
zJCU>Ywo;B5xlFUinDb)5mqnQx6zs|QgNJQ$@H>k0llK^|8?~2b<8~9HT)9+FjMf!6
zzM(}WLBWxORdS1s(Q6@20H~H3GX?UH+sF`U+N<n%Jpo?^aXw0RcAL-+{Y&8dniZ&p
zOT-7Bz@}t(eF`DCicW_sJHLeFBk*TPV*?}tkI4LAMD`Dqr!O?U4{!c*olmrSR)^B|
z<%?1SKULW4IHsJfU<r<^8pcckEO#0n`L`np$m({=f1qw{I8hhNKQ#9T0iH3=Vh5d{
zD{^l1vjU?SpAke`P-VMhj^YiH9AuGe_O+2vt*3n48hH*fJvNTboN}InI9r%c?qNR_
zx%_v9<JJFY?i*@6&R77nf?~vyzv#ol@a)<xOn_wFq4UO_=}ommyGwnNgR6!lkk>o>
zs7>DtTYAu1*l=hPHIcB51qAs_!_zsbli|Y<jOXF~vg!YFlk{=`FcWv`$wlux2Nry3
zpSO2}Y(rV(ZIuBIKfgBCijoJS+!@N?qd3RC$MxOkE2UOl9ehX(=fGB&L_s-b%|SqV
z1#DjPBqkysCu))6GM%{DrHa&U=Ij|#RJvia?=Rf`TGy)O%n{p0SeGw|K7;=z>{2!;
zUs{`j|FBtnB}+rFi)=m6t$qcSY^nE|IMVMFXEt2hMC@W$mYk2%z!B$UT88U$vJCp<
zT+$7O6G}0+rw|nid6I4E=H5`5(>9Xc53~>cJ1j}*By`oUlFZ>=n=k`Om=wbtS=<mw
zwf%VsSV#=J_1v*Y)BX_W<RgCTqVVq3F4@MXZ@k8s4=EFOS!RBO0K6`;uY?+|wRZhf
z*}Ho_3B@PSv(m++^R9aplgk^w6g+}XL&4KFwm1%t$;P)jW}ZH*BV5ZiY7*URih!>@
z*3M{|G47>KiR8y1W-em%XZSpTl+RM<nlfT`L&NB~Lc81;<Z(Rp+wmCZgF8q^e9?6P
z`OCJ0vh~c4Wm~*hv^kfRk0<|{uY*<7O@o#6FJu@a@nb~)bnN>95@O>ct~L|H(?#{}
zZ@-7_ir!H|C-EN->;=mn-;-~66!t;l$^*?Vu5ZSLY+H<k@0$5ZjvF3058y;<T`ks<
z7%ruFkqyL0PC3GNNyT^byEQg|Wwy2?eG27cgN8_J5TIq%h^WcG?yQmj)$=%s#bnAp
zyd&r$a0HpI7Huf4q1RyZ5XNieO6U3(f)b0X0x_^{j@%JU4hGHWiK`lR>YZgj*24As
zprCA@TG!s4FaI8XeRbBPNuAa2ou+_Nm|=tbxw6V;5&x%S@f~K@8_DXrcg82rh}1b$
z)ysowAOnSGg}gs}ng+(Qe2As=#5w|kHMt1j_Q~a`FPu&q4Ew%*;qvTnVs+4~{X~p*
z|B{>bzv~O{^x(GkuWyhNqR8>)%O=e$JBYj#U@o}ankavsz#Y*@U)!a#zw(6L54!ag
zFQk|t&=S&NTT}EE3t8$%*eJ<=pV*QOA>vK6VO0vLn6>j$5;bOvc#PmvwI0=I&akHN
z_xQxxc(whF=bXNZN~TE3IqHRK=?Im1sd4E8;4s{$U-`jDFv;0_PP%41B=t>;=l=E4
zQiu8{F^p^VPJH)sqLmaWpIurQL;LE!$XCX2@wrb+3?5zI2<ro`uGa!SF9xq@gMDn!
zm4VS6-vyfebO!%`8h}l$sN^ih2_5>If%;*jB-Fc))UB}F)Z9z=Zd!yj!K2XFe)ktH
zERRnXFh4ML*kyB1w$4A?9QTR2B{{S^ST=)+vH}U!3ZkDEYZEfrx2NrFXn2x&$Q|96
zPz!M-CZ7mpN!7uIUOg`qRLOHD0~-zVD{%IXmX^T}X{=cLTgL+)blqt|i%)y&Q!wZM
zp0qS~<1+D4X1&w+3S!JnWzAsCJDLY3!y@6I%JX9ZuY76{iQ5R!57>bDJzV^>=`zgc
zA`}2IPvG9Z?1svw*uJHTz2w1LVW&^w364{2+J7TB@H!4BX-I_*IbYqi9({uh)w;`>
z7jq2Qm=33w>qNR=3~F$fRA+whinY-wC|2ECrfoOzD3ckZf%f}I{aa4^T?dN-M6s{Z
zoHVO?lDNFY)O_yN81#0Frs*lPa&Zvnh%U9ShL5S^VDRHA>FH7Xk`6tfRYr^!Mra7z
z%JEZlCV{16ZIo5jZ;NI7sIRC5_|dnnEbc6&AMttn%%CMwc2tjXN+~yM7zgp6$1&b&
zmLOR3ms@a}+|QBxjtt7~vA|oVuw%*4+r))83Fwl#-!rKVQ;EFuR<JH3S*)Zhte%h~
zKi1Y_(AQSI?O2oQ0qTmy7`3xW)$m?GakT1%NfAdeR55lzUDTm*%(uDHE6H6eBa{=B
z9++|qkN{!b_};{_ju9D$LDf=eQVhG11rR;_Y_D-<vD^rUIjcN6`k^56GC@PJy*fWN
zjQiFVmBH@2@?>O^AIT3fhD1%G1GOC{;z7`*n2VZM=oxL}Y$qG>M2Iv*-tLN1U1~XS
z`u+1X)(PdE7KDo$XP_L%IF0(chlc$-B|$ImpKKsxP|sVf3-Kq3%dfF#Y@NGnsIEK<
z!vxE$I-ak3>~kA=RhpqFT=U!vr(c7PDpFj4ISOO;sou>pMGmz1$vA*4&<AaE3nk6*
zF-Is%d&xkn5=OV%C?Em<Tx`i-;K^J{Nz0Lkni{Nj;{UI(r6Cg~<h2Tf6J5{Kz!qYo
z+W9O2Rj^A9nhU?dz`rYsSa}B3NU0x0Ej4yd^=R7|h@(xYW+BAF7Bf)ZNMPsxfdK#V
zVf-g(nLq#Roxe(?9zJMksttmcSHwteUGe|E_>ALR2s+etWuX|Ud$uZmN2o}bOjDrH
zlCc`z6CcS$oN`PyP$+E7$XPfgZ&uTad-I_(;&)5`OoIJF_9EpMe@mz)GxoU=KcSi1
znSem0!0xd7+G?QAwVxS!dFpV*ux#7%!;0_@g2#9J<;@lVbPiT3x?{GuSXzd&fE!@Z
z_dT9l?ty~8_!@9AW~C3d0q4XH0s_~k5}P$1*mUCK-L$c``1R{gOBW@<aK)E|d3qfy
zy*I4nI@Dc4acK%~_88i`cQvjpFwlAL&BAzN6dtz(7+$!g)qfJSG{grjzphK(`EcIv
z6B+sa4Z%x>;-x<a&4zGPv5o3i(??w6y3^H|Sme_vSgX%Jcypsmymq4H0lG}QzXsdD
zMljP?@@0^3jR`7pcK?W%zZ6*k(~)ih0qcI-_FeELEfUU6XR;FSFQjLelHf%&h3^-M
z!8dtar6DUR^vfm$rPWm;96I#nv2PmmKeBaCL58ecK^Z?110JfDj8J7zm7R)R3I8ac
z$U*S%YT0Cru3#xeMIq};F>FXyaV^^;ZhKG3rUlu_L!CHK!bN;vO^J9#jxw>7{sxt`
zn)#}2d{RusqUZaT6`=eXI*&B2n;>X51gUA5p_E_k22veQHvM2Y-MxVVK<MnRXQ;L~
zu?Rgy0LCC`u^PAvol8M0ByJJ73caB!?9)riCr;*LN-%@cg$4dLSHqh3gY;&4%u0xO
zJJRnLDR-CcnhbU7uZDMZza7_V&Y@^XHN=IXD?Hn0Yfgm)GR8<$EjskiULPZU^K}j|
zYV5L)$RvgERgxCzwm(`A`OxY&hlj3Ml;TsBXChEe=P9C$O6+tpM-i(-l$P4k7sd+a
zV_v=liopMot>lav80rN9ytZ_F08ek`0(iI8iRb=FVA)qpLY1-S=5M2&0l1lXc=$1{
z=tzCxMLy)iv}gi61-)?a_@|uos!5XI4bCn1+5h|%1%)jjUl|zdIyRvs#6zG;ZC4%a
z`6hX`BJHM7&UQ?gwKDH+4awUnr!7g@9BFnJGWWyVQ}$nHlEyw@=MlJ6uo-(lyCs9C
z&Aq}3^J<e&zx==Q4mcRpZdlCsRY9H&+U0Y)AF!|4aA4&dyJO?7bF|7<R3uFaB{PWL
z-cwF`J$=8kJ`Fjd`7sR?8KJ+ygNfv$!I7N<%V_K9(}kPFbU@@wJ5%KrZbj>^QUp9_
z2bx9iA31^48lN!dTobNy^E&+($o^EGj}5=Q1#CV_QsV0~ykq2FS7@AME{6$TUuMxR
zfB1wl@a&v1Z+Up;;fCj<q*n2$oo2s$?(~^bq9%D!;ea@e7`8BJgJ%L$PLBDfu^@Gs
zTe*b1?Z<QJGBB`TW7`(4fQ{Fch;Q_F(4&^LmC`_yfjj?{<xM1q&+EW5cBXmQn}+Ij
z>rl!#;t=nhgRXs&Ot9w1R~RVJyPRIs!`FX4k$krdTxx)ZUela{9Zs3|1L`?>si^Gs
z>-0$}NWo+mn;b84grn`Kw-UpyY-9Vo8yFd`XrIKO{Q&^tYJPhO0Ej<W4gHKk^g>lB
z?7Vo<#p>cm_0en>l}s2uzdvX8C^F7^f$#R5^cZQ<-LkH$nOk%zzf0;~4mD0{oGJ$W
z*^6-WO+#rE{o0RDdKm1u;04Ei$r_m92ZA3@x{|G+Gew;9@2#Pgvkt>M{^sM~AC}g!
zwnn)=#ut-Sbds11S3JM(*|+16duZ}xJMm|I-ww_2H;|mXR3scKp#-2zgmB99i|ZI=
zUe^0ZPa}SVhz?&fw7Ryxz1Kgn;l7afODPrGKZomge#s9VKn2+GWaeX9o)dHBt7dkv
zlQ6Ne{%s8V0Ao;?03#JwxZEAyPO{ViAIFHw&_wfW!_x~(8Ed>GD(&Q7?=&KrNNGUk
zvJ{`WEa{l-_$`RFoywMvp=t{E2bg_wuwpd5q_RLvlb0+4pSrxyryh1`E`CSJHy$mH
z*7e%ut)Zvd5TpTM$=prdq!aaNl?Ag^7lAO-ol`$}#0OvdFO$V{w2}VJ6HA-RS(E>4
z$@GToM47vxRYSAQDHtf2uM4{7j(=*U@u-RXy7pgC6Ud*05GkJayZeYlrG}kt#(1CW
zX<Z;wcYm+jy%Jq7;r1t<a+VCUw)@$pZRBnj)Mf+8q6d7>(9{#%_xJ&H+39aZ9oYlz
zwWDoD$cZ1g8tf0=bytQ|-keX>aK!4(Was18^$5~rlhM+w`5UP8FZ~sPF80vuoXH)B
zN#_NsCN1%!9v*8ahrLedO};5@3KsNmn5RfQ<=J=q|A#M=X=VlR-%~W0$ThGVlYhKJ
z!OH1Qo{uErUmkqKWw0YJ;;*b4qunX0Zn?+ZaOYRlY1*YW^AN@O3tMxv`w<I8k#_#5
z7$m;5Cp;?z`0`&Sm*>0o;ARyG9*h<`xaM5-@Dus=dC%q3oB9Lxs~dW{7S>HYOA)KC
zrGolS76VNzZO+1)@*gUPneV1tB?=UYieCDb@-iISkbov8S=jQOJNLH<yLF4KeBh%A
z;^>5haq?=fU#ri2A8)YFo`byVL45)0Qv-#$+c{iaI@I_5_O~};xS$kq-2MS+?GzOw
zs1*gs0q-f1OtC%t7vyjTbYB}N7aLKDFx=&gjz7f@iX~oL<Q5O?B6IB}DRLnB%zG?Z
zqPFmOSpK0R+I*P=czCbj!CIhT7(w@PCY^1iit@Gb30sJjKvhFe=I1I0bzn_;MI&29
zqPIfv4wBbVyP>5*XN<X_*GNn;+eJ(<XEy&^;_s}_%Ezn3aH{<abHvcql*>X;@f1$<
zF2`BFGnC4}$7ze%n%;dNc9O>Ik%Zb4o1v#yJ3yU7Q&AoI)k#wL7GCbVm+<i~7K-wK
zW>kFS@$jKX>^A#8A&K;2Os_6kkG*frMB&RTVHXRCwCu*koi5ScK@|&jD`kqZYg_;H
zoZKkW7t1|I2^dp;Hf2rU4qW9<^>7dhRx!sNk6iKIqOvSjusa$vet7!05<aYg4q+Ww
zulX`;fi~K2o;Ej1e(+y0%!_BD!1lZg0_`3(pnMOklpMYLqCwEcB0NayX?Y{){}jV{
z>yo}z#t+fu2d7$e-P=U}K&j&~pPwT^E69wdzJEHCD`Z&ub7)@An1DNup6<27-6-mZ
zUw3)#KJ=W4^POF_Ai13_RB@;Z^JlVrf%CJAW1Tv5#p^)yN~a21DS=<zjV4Ss0!4$e
z9Z#vIIp~SdSv#gi>)z^9&5zDk1ZHNOEqodQ>SO}S+{&imV&Qi8ZBycj9;}%6!Vc|T
zoB}WpG~P!P?jS0y;3=!@!k1Iy4yv*0D);~!X3C`SaC8nR8{6#%PC~)Pm3+6c2Qb4N
z*i8o>{Mn{W@eAig4ywPH)J>WEKv&3FYATWb);qP-I#uC4q5L^_c`~1CJR=KMGkz2z
z{JZr0oDXu#G?mFxTjQ>nPD6dj+~aX2`M22q22#HXPX6_hQ6~eNbNx!61kgR{PsVXW
zB5OVg4>_bU+$E!1BgxrDORZPl7T7ex{~?w^cPL8-WLN$3@Vnlw(2&@hNy(^rNiD00
zH4y)*1I2?y_(X!~Hj-)N`H0j<03zmK*y5(S+D`c596YAA%PAdC<E2*}OV!<*T7(_v
z{FJjx+%iFLUeorKCg*dq=l5z3o2~1Ed+T!P{+o`cj_KGhd?eXQ`8fjEIobPR!Iq~u
zjl;c4er8o}MBH<v%hXW39r1`fe`oeK)vNIH99OOem{Lt<fas!OYe@1L%0tW|(lpx=
zZri!B9@l8msDIyCCu#EsZs^sXU-ntKQB>rOlu3RaN_1Vn&3?hf4JQ!7_5c=x1n|Za
zU(l<EPu+^~N_mpg>VQy9Q!v<4Bqb09V%ED@^rI$d=YLR&QUo}Ewxz^sF_n*xnvgil
zMmeWPjMbSWiUw^NyAKY1^)rQe6k-`60qu6)FQ$$~_55jD)nl9qdk&xx#e(RqjHKTu
z2}Ex-EXje`-Xe>112MYMhTDHL6<y*b_Fh@}S3`H@;JZa?Z^s-u>!m6q#IjU1T*?JV
zv2^VXE%!%U?}KI(7k=ASsSZpliHQ^;P`RHSNPE{@GMWSCg1kJbmTyj(@}*n-p2jFA
zxAQ6R>(-$zWw*VhLZO}fR;d3Neg1!vvX~gGXfv?O50POItxIbzvF0{PB7dnR-=`sV
zmi0z&uKk-?mD~(ZS5fldGYQ{<HTjYfer6gBI8Sa&O_r<aMV<kL5I@A96y7|mXXz}*
zeQQG}mkE_5&d;ag`EGRGq+d507SD(7_Z-4?dj5FI_)pL2r1%pvE9dJtiF2Catsuu<
z42Lq_i0BeAqyQ(mE+vAOITT{#%%SXs(WEaPtx_y5rgR-_9V$HJWD{xO5EL#&L)bag
zVxC<JKIQ%|-x$y=N1(f9F6KYxrsnLbR3LJy>U)IFqb^ve<RbY2Z#`A6t;hAp?U@+P
zOli+-Ov$j3@zG8Ss?R82Urf>5KZ^!9mu*;w+i(tZmp}BFB6o*SWu5eS=uw2RjkeM|
zRDi-9%sp9XCq{C(a}u*pjXM~?8$uW&LDkwJW?XQ^Vc`Z+-lhN0q+(<VJ@osz9i^(X
zRy^a~6hhYyjJq(nXq+0aBCT(FL!4N^CmgQQzWs+o{}(mpp|YilZ!5}<6AmvAaGzTw
zGRLTT=a>tZS<095pVj!#Yk0$oJ8yN+;9SlA+ZdAKl_ykH@A*|D1QyNXvOwmmIriK|
zFdu8a=)N&olQA$?X`D3{XNJA@|7i(jF)B^oKy4j*d`1+=e)P?-f>d;RY!6CGY<4Eb
z(m&aSCmIETaOJqp8xJdtM{&pW;30jdV`Obgq)6&g=bZYMj0E(n$r4FBv#=~Ty~PQn
z>Nf(5k@3XR;V6)9TL9rx0q}CKkaSI4P6D(u@m}705p6gyF6+?@$HvFGaLEOpbc&aP
z0sQ}Y`wgBIYK66io~Lk^94L8Za9w?xM&-?SCQ{4a51pwPj~rz9lumOmEYq$=(mZ})
z^~vAPj+;_2)gDyl>zxSzDiL_Xv^~x$g4$JjI^*R^i&r~&bY+2pI*gLzT=sQZpOG;N
zdEzj&x{sO9RuBeA{W}x?S+`2DL~?u`r%BF;bH^Q`e;!%)p*}4r)Vtw3YhFyl&HZ1V
z2In;9^WHyz(O(@pqJhfFLXU5Dio=1?;sTxz@|(k_Oc=gZ6Z~a0@oO%(yFV{kUazNM
zhtb{D{I2x0F`q2PpNFK>LO(>Tc;sQzS@s;DGZ#>bUoSfNje42Sv|xqP%d-#e1#wv)
z5aXO%W_e+QzXEI8<5-`L&uOB#_!^yRQ}M3}JGy#)F0RU`1JpBpSXEP1KP`79YTd;N
z4qAv`1iQFs(>U|z&I-ntC~YICTR9@8=PO37@c|S$FRs;O3U24p&)0D7UQcZ&uXizk
z5)Q!LR(qx0PY@b5sb$2HPEGAWIKMLA=k>ezwX3-WKp<(!N4zI5wEBM!+&p|jL?Z|N
z{WdD!U0EKXwyZ%1*9%A#5y`Lq%9Y5k)EItF<@Aeh`npk2ETc-kA|i2%8`vSv+^IAL
zt<4npUc>mvT8YtxA(U=yI!vKzd`9WrtxizeX!kumqp&`jn`Z-7nU&H;@}S<qVgNS{
zp(wlq%O3n_8KFJV5I|w8<V!+|Ag+hwTTi9*Bm$sY?I!|4W0<zf5za?r3c|>RwxYT0
z@MqY*0!Re)XjbOGz|~YDy=|lgA+?)+Mg~>gd*#+57bMB!W7gu|+&)U5c}9bGcWDWU
zyW=SMOQ~PgM+D{6ZrQ@COga4NDM7wxK>UeiRpkTb${{kRuZvr)IF8$~0v$YJ?*V~r
zoWsu1qLU3?J{BTvIj65^e@2{cFb2eHR&KPzOM_WLuMg^4|I_uFx``wepn;M_?PO@O
z{T^9fs{y@DZ4K>F;H&TM^hMX+rXyNs3Ru`;@S_&zY9ixKuhZ8KdCoVz2EoKW(*74$
z`%ggYv@9M95nV(6X^93Or|ES7W{L@k%%^W!o^g)k7nq;3C+zll@ms_HI`PG8mqQ$1
zWZofG03U_0Y!WPFQ#I|w$3|>&>5es}Mqg~=z->PAmmk391>Rxy?)yS*N$rdjiUt<7
zp@n0)1TTjs2vBRNG7}{o!3+H~wkGl@N^hSQ$P;7BlIn}o2OY4AH_j7&Bzs%N)P%?c
z;)oC24rezMY)#e2;S9xXsl%d%4!{wamx!CdJIj!EOz+V@J~p{7fDW|?0qJ&D9K#$M
z?gQE!i01Jt*S}^-p6%nNh++dxRoNnE9_sbZ&<5qF=zK7`S$}jdg+c1-aKcVWy7*4K
znUtZWmX6Z7)ffLqT`WJ_+X7f6qV`r6sEdt<byb_#>OW9ImA}){S(4DMkWp%zJgA2D
z9Ps_AK_k8rm)!$Ca_Dhw?B*mLIARQ!3P~<qJ}|4DcD0y+E?G|l{QETk$oJsY+HKUy
zA*=8Bc$o%q{X<Z$)6l`z9s<HpHJPpbTE5l<x1<qMR6h}jn3)wy@T;^NNZ8hdk$C)t
z3|m&7KK416_v5vW@^&{2yrzvrue~uK+&DImSbD7A;(Jh<65*gaTX&oRVPnR#_jhkQ
zqaIOtY_Ys0w9**pe-RphN~p+TH|(FcnVtv$A$81CbPT*aT1h+74pt3SJ7O`lTXN$N
zJRcuA-MYI?^;uw<VgkHTPIxNC2JZqge+y2gY2pYom@<J_p`|I+JV`4!l)8M7-uPuW
zNKm+aQE*{uYhW+E(bt@s`Fq|je6tGT!e{LxY5q<8%73`)LqZ&zVkc!V(aj<9GVW|V
zx$97^D7DK<V7+$a6pHC>gJp6%hbNo!Ft5L+cE;vj;7_Iv)vlI8VJ3NytUWXwf4x!c
zWvHCZ_S5D8_3iIos7p^;|BupGtE7GvA_Ek>%-t5wQXVx8440_X%wXu&0^r%*X*;e-
zlhva)kmU-sh3ceNi7{LE@Z!nw@h`-8eQ)SzumH19_ZGbEe?V-h$Ukh6zaXU&PNjQ6
zCUb|vcK+B%U%Drf;PR~)vflAaE5EqKe_ZW<^oSzPR;-*m4{58#)<62Qp-$bja9BCd
zm>d+viVqb0day2f@n@0yS2xSnOUdhcoj!(7cq3#)-oLWv9-ch-R4At~3)62ngNXj-
zZfoMO2%KCFD@8H6`Ll4b*xb40dVG5{*9&mxeIy$Io0^HEz%+bZH+!l4)$=xd<4;ql
zwoCeZXns!&+QE8FY{8p~lF!WGZ^eK0UQbgM*%qDxk#o*U^0WfiY*Zp*%}dy}_4rWX
zPEmsaU01uUKiaBfs|HW&5Uy0O>P|?$l8RNMBLV>e;`<SEVW4>k4C(bUo;MDk)JGAQ
zUb2p-T0^*yo~H4A9#HYKFbm$*R(11eRgQ#s^(-**Zd@e3B-O`-Hx@=zD6)Z$uJs~p
z`)kOD7~J)F2&)%+`%s1Z^-<@EE_zO>a<8AOF7jK=!{g=4A=M;b`KoG2oXrNu%e~IW
zf3w~1-8k(5Dal(=r{8O+?6Sa?r;wf^*76}Ix<+^Bl$$3FiVFo7LbtX%qM+Z7{w~%$
zPP~D#M6a8viLOoCUmzQjCwdM3_@`eouTSYlIGYLmB==q*`TB@jcX_rXg<G!VqpTlJ
z5wR+yo3MM=IfqL6mqym-M^D50g(6x^nyihFxu79;6LQ5zMErNBhl-rOUq2(s=>0W%
zSu_&SLSQ2sH3#n+KI0w*zj`JL9^q3X776`!!cC<>lLYk#6Gg(cpUvRHhH;Va?iJ8p
zce?>u1eSSei&Npw`9*O-bvrmJ^v%Eq^pQ1=#00*GrRg~2X@s~W-KE{0GZL_p5%f?f
zHxE~O4|)FiP~fDG*~{tVUzwG3Xh*v8ud)jsw-sC`y(W?*rbaa!C`1AKlBIK2)94c~
z)#e>iEIGZRPTlOFont>5%ty_V?)_R~Zw&4eJ)EiKs$6pFuba4&ROi|mRQ=MnBBg_k
z2+ru<XzQUml>Hp@>Xjf+cBy~<?v3m8gOw&bte0^K*=5rC^?rcQTRgh1O~tdtx>`<8
zQ=>^`Od5Cpy*@G9sn^-<MBW&t(<WNgm#{AG6$sjQw4w5?^BOBNjXwaC&_PFW=kBFP
zC{4myO9aMDR3|&{;Oc80IP5}iH{A2;|M@wD;eQU9FNa?K{pY~7eQaH<_zGHo$7_Ah
zLtT+8v}|6vwZ)!qVCIeYOQxgh6n(qN9B==7O)3oQuahMg3*(+T8Zaw(mOrpdr9%(E
z37WFSrTFDv6?HsgQ0x5JHwR}x8i$3>MV&di%RbE9UlJ}hx`}*2py48oTa(6K=xG79
zWL@Y!^$FiP0ju>bz`zq9R#J62(-BMzk}p0IkNe4nI&fY}4<ht-_BDTpvHLcM-S^l!
zMnBhm|FZ@&YPt?3U@*?dDEnD;5<fB`KeCb2euUl|r=EWVzyHfG9zWxmut*;1xcLW}
zn)jIamOi$9<Mk?E52ZhIYuSHh@fqVqe>BQ<=Sf}Rja2<0Zr43mvq~jYZ!8XGp{iti
z_i_)}bfK^T<GDVQ+1cecI}(y{V#;OSmv2q*amD>F%H9Gh%0AlnmktF)8YxAj8>EB*
zQ9zKAlnxQ;mS#v%B$P&CKxw1|X&8`{lt#Ld?iePX`;6~7|9kE|zU!X<TC-d()U}-F
z`Th3(?!7;IZ}nE<^^1L)ywnnV;_C?v^h%P9&sju%d0s;9U##!c@&>!W$4Sj`?cXsV
zw-<>vXW088B!kVS20BOQBtnbZMp5WaS7-*p*10tW^X9>;k3{*y8g9#d$o;(qpz3>y
zxN!HMZ%KgtmZ9#K|MC^Vze5rtVYu{ei7~2Qi=Hs7!lccaC;675?~zr#T(M+E`=A6r
zrd_)16Jgmb&(E;k+3U+mm>*=%H5M?6JMP3G9K-<+uD5xjd@Vept0dwK<Dk+ug7>>P
zts5K=Rl~S!JqZfLdf969c;}Ut<jzdM+(ye8MB+`bPqSWXsmQ)Hw8%S70=TpI&M38-
zIwY>VB?i6Laq#d@O>|m=LNxt_-qxE9rS<c4VYtyR5A~c;#N_Gom1B%kY98^WZ^V5s
z$PlJmU(hCR#2I^FO*r2Nmv29br@~(8zkc4`@Hsv<-*u0_8I}2i#H)v<anWd)RKBHV
zlE9f(GF#xAU)+$92|-J7o3A*!qp+{^&F|iicM-L#JvU)n(#TmTla3Dm5AZfWF^EYc
zv{et>Vo4sXEU#DF&5&d40$#J8DXhR2c+sp(P4|(3HE-(Oqi1LFfGP7q#W-=GLw`tr
zP8WrXyK>f83D2@)@y4Xx%qU5sH=g$e0&naCAcaaJvVnbfhFS8sEy=bSEDzZ5|NgNE
zYfV2lTvBT@a^+cul|SoTGQc?aF)?I@@HNin<CuI|+MPwwom(HDbb8>>61;kn<=MBV
z*=D-QdTn5(eWqMFxSlkGpK1zx|0|<mem~(X0LuoSm;Cy$5T-mR+YQwhoNYMp)y8b>
z!q%V9Qs2DsQ*@M?R-Fr)6~eaIH?tq_y;Ht%ohw1?#BCjhAMgP>@)TbVtcFf46o8G!
zWdAW`HtW=}-l<Wm5SmK%0UWX~3slNct(C6`0bLj<ST_w8EYcyzzN!i;T#}1vl6B7;
ze}8k}-(kloQ<6`)L6Jh|dbh1IS3c&baAq#tD!EVi(Ro7g3`ayj8E)>zHiJ~*)LoIt
zMe;dheFIbPl=7tnFv<!_vTfzT8O~O45bpfX6HiCKfE3X*8v7?6j@vhUkGr4{6SCO@
znh|);d*BcOcVYAN(4X15+b0^qfOH@q)8veNe0@;2``JW8_qKoz3pqt5aO0tT)MGVD
zdV`D5`KOSag|YF*Zs_E1qe%#}gU~n3vGf@%U_T33Ck$}oP!ctp!1T=Ke4c{xRCE)0
zrF{%cf`)pi-~AuoR2=8cmy_mF#B1{QwzuZ)dR|@5(zZDJ^m|iav)?jZnYxEE;_{Vu
z8v@tmQ)bf3w^@(!-OCV&tuW>TR^AfPiMevjO6n)~JN%D=74TIP4hs%TwZUOBZBd5>
zddkksP^LM<7PXZ`&KY6?@`|vV-SDr^8iX<D^+3u71=!gMeGzGG+_lb%yvqM+5U0Nk
z`^L$(Qr{5}2l$;b8#V8uInTT&kB1J+j~!39&nreH2BGh5=Ap7r@9wjP#d_$kzzODo
zD)&8HLLT+kSSi7ef8)UGYU=aIzo`hUv}k>bZ)UBgOh946@@v?AwUvy>vIlv4V|<Kq
z#g&gWO%sToE~MC$7hENq_m<)GD&e2^8M6I-rNnj=egVA@HkKC1+qOGjT#Va?%PyWp
zj~*6Kst}129@)eIA$kPv12=&hT*gTs`4%V7)zf>D2W~Msm#heQKr5-aOND!M>ADZ~
z=#;tDdkr0zQmCZheIURmWt{$pj2#26NerbDL)30cE_t}!jJo;)6Tm`H2@mj}1BXU<
zBTc96e!R3qdYED++D(7K#ZQ5yIfK4yBh`$5j<#zIc(u!JJy!Zx9~*NnWAUG}0Ql4v
zPc^4TJ_J3uBVDwrI7YDV?~urT!Q^I2le*t=Ovxh>g_ojuqEAD?S-;?FP^Kc*F>W}<
z0E|G;@2KrwL<r{eSl$AM&{TH8lx!Y3Nb6q7Twd0)9{oJaP8*gU#)>FAw^@Oqhl@|_
zbvJ>7#q{Ha7iyPeH4AycR#5m8;G^j_>pnv%D(_g+@zil#y;G2Fao1aF+8PS`juMqv
zd<TxIy*uF9(~A`R=4*VGvVX$^ijh=z-OE>97r>e>+B=|8CRobf{JW<$jBn-q@-W*F
zcGPahCb>w=&@?W1zV!R&mhk=Ku-}3U+S2%#u`s^x(7^6p&DgrFYcPfz`%_uV6p1B5
zxJ;?ti5NkO7Aa({<*di^>ure#iIBDD^&ceXc=<_=ZS(UipNT<<A`eOK^co%&R~=oj
zW%1pL-TeyF#vrFaV?OeciT1X<F*8zTnw2Q4#&o6{(wGW08>}AHjhr-EdS_4x|LDNt
zi}_(6J=U3<M~reiGM$IQej<Ux{$1~H_GO{@ISl{U?gAI^bU%H?v-{_C>w(s+LG{l0
zU%k++iW|73w+n}+%RC~Gi?202A_z7+o+gcHbM(6JP=p39E=uvZ2v~<o3tv-lx>at(
zBryBQ;f%eveE{^IGg5jblyB?^mbq6+Ck+syTH(IzE+Z`aTp=+T%TS-}KqiivvIWgb
zIi1VevaFVY3DJo+Oet*HPT%dDDg$F6*IK5`<KU)45J6i)N-cetd3H%UT!$Io&grkJ
zJA(Ar0%)_Ka!qR2LlOld9}!oVzE}ajAL<+c$1MACC=ok>2fK4xjYtZ%--{_N!p^ci
z?YV`0(XSq`;h4*`WoZ11ze3`#riP}347z{$5kbzhDCz^-jF7ii4tMq)Mfgpune7%i
z9C6qFGA7-k*eK|GTOY-u0k8ur$G*G)Zpbv+h@qPTpFhA?h2C6i($;j>qEW>udM=&>
zaW5G0<R44NPt1dQa75x$YAMDi-_9PV|K&p=ZMNKi_tpyPQVFf_b~<uAw?Dew=eMc`
z3XG8Yovsq$Bhn&B|HIfycw;6M6gBN%dxY_6*%pJ#M>ZpNC?KzPfF^_sCOH;LtIJEE
zej?@;JAmB3AmWgXKm_!mkP=W#kN>GIp2z<rhTu~Icm5qtp3)g<tF^qG-K4u%OP!bq
zKH(ESEecpC7jadf`4Ks<x5ic!Db1$fRD~N4TM|ZlJQBSPYjp2lPmZ0{8n#Oi&5RYW
zdv=H^c6jk7K%PpGLGD_uI;gg2-ZImnGM>!c<*46B!@wzyypLkvUH`pkaxduo!sDE!
z+@KrYZAZ4iSoMVgIdfLK%Ziu8z&3HFsoU36`LZl?-9yJ%oy3#XUMEu6w(bFEC7d0)
zEo!l}?2MLBn1mMtSw%M<3fY^L)o2FST)sfRLo;_(M!GYnWJ1@cx5)d~Y;!-nMVoeE
z$UT^(cw?m6^$v&py+nJZuqPuKtIO6H3L^i-1cbNN-3ZD2xtD7qVnRUrE@*g3RuW<u
z{;1V(%!}gm9ipY1;XE?Mg#XL;kdr3X#ltPZleAF{*V?Dw&Z>cW-fq_j!F;o|wjG~4
zLxO9Xn-|YWwtC263K-|0+rY^J>OcIRyaK*hOeLHOiHAH<0eLhT^+0+@@iNduTS2{r
zL2;(Tn~;3_U~}C2rSv|$`h{8Ok=p_g2hD7>7XaI=kma7|MNNQhi6{A|T<jN_BE!n-
z*B*W(e-9!`sV;nupw6RN3?|;clfugArJvJz5#Ur3e(iGVxWIQf@hFSo<B0gMiDOoj
zu!JaHjM`*nB3?SMaAs?(W5oFp58&H;`}TwN!|g9%0)F?0VzO4>@KdMySIEn7K6jmQ
zwM$MIKBXbxZt^-f%)=Tw6ZGqW>^vCiY3SUXN1^B(ZFcr`(!jfn_DfUvsIPbhpU`+f
zBDV=x9zO%Wn0XdjPED{`y<wppA(wS-4%|OT-3NTjv+^2>sIiX9_!juqN-dtS|BDUt
z2g5HnPnT%)=(rocauQzS&V|-PN?vv^*Xx<LUp1{1CN=vQ<fyHEC|AC?PTDmu{m`y|
z__Oqm2jGE6qO_5i*P7()w|P^zIRs+haFgT3GjT7u_SJ4JhuhG#Px8w;%(aTL7V(NN
zhmdjBs>ScyAK+7G7>9|SdE0k8LSM-OP4{87_SeQ0cmvu%{y<~x%m?eD9w_k)M735h
zvhR4o4c5?fFIhIxfaU&J;xo{0_adQ=UeWeg$GZ~?df`{y(z4jR#()2`>XS&m2^J{2
zRCAWfW6;nVmYo{W-1F26xtUvIUwzzbBN3P_%ogHhZ$nkz;yOe_;WGejcmn8Px^0hf
zNzAOr=@pvpUU@tul9*j~R8Djk#T-C{czXW^sZovL#V|Sfua038#J(-y_QX`D<GJI9
z)`A2V(53!FdE5u<vBj<Q8EV!>s+35XexNZ6`t|!HEt$^82BpPO-YI)BPx7;pee?d6
z#cqh{<s-HI{<;mM!8{x%R508460zd*!gX)^987bZzikFKnlC^4{-ZVclt2UhE=Rrn
zUtTC5Hso#A?Ma!{MP2pY=n?eVRdf~R)PF7uDKM7p4B$W0)m+m5_WFr5^>u}gyRlkn
zw)rRfLiP0}ls;<8#}W!^E3YuF_u~T(UDof{R?mgz!MQ2Vg~pn0LsRy=0oHk&^ooi2
zj9Sg?S`zENN_e=u!->!FP@nmR6P1!Lw`7WTRVDmIHnG)^&}!8-V&kN{e98%%5VAOK
z{&+dyZT3=%Ms^==Q8{xCs?uXeFQWCQPiizyHA?(hdY>Y~8=IgqhLet8Ha-`N(tQ+E
zy)5}4m<q@>A&Jr-|Igxn4&ol+yoH_=qR-MFTu37^HZ4qSyH!Un2ijEiCZ%fRd?qFF
zu=-W=jjDb?x64X!&#P1Y)0wVxKxg0{)VXzX8&=7P6MWCUv`3G;F#Y1Hb0rIh!(mLH
z?=yUM=)7jwQG?v^u<9l2MY-j8>X0qxfs8duo1&LyvY;i!S(jOV1)%ehH-3FY4&-_;
z9!!uWn-&jW#dl*_nqFC8&kXc>AhQVt7YijEzxLQjD-rfH`NTNDK_OOH317TI+Zpbm
zzinPr(ref>0H!uG^K<DlN;=veqPvM!I1*lyb@#p3F(+F1;bjHon7;ko?Dl!*v1TZ7
zkDwhlE-M_E=pw{3w5BI*azcKlwFaEz_bUOgwYjUX)H{bd%l41nqcUzzOj2#co*&mt
ztcn$Vtns(oBTxJ#RKA?i`n;+(OB;1Xt{GSIH^SnhWG-(<J2@Zi5_J6=Rr(Mw1rg%&
zET6G<TTTsOHE&y5K-T#t><&u}W1AP|;bS|Y_-q6(TVx`4s{e%=bwI{<JK-c$U|;-1
z=*fcE#A50ts*m@_7>vOx@jbVOYK@B_!I2&Z)4g|H+3d81H(<)L<N<f9IUHJ*KX?tR
zFTQRw^NSugq^ni+p9S8r4qp8}S1_zkHNCqccs6POC@I&-R6q>q|2CSqbrXNXJ}CFM
z6%_5PAe8jd#Sb&dAxW7OgFk4D)kXuw!X4?=*FqpXlHQMz?T2mJD<1RkeeBI!w~r?6
z=Py(P)ns>EL#+d{HXoz8@M3LNTgROSYgGRqiAFvpi&?*}y&F<eJ0EM>X=Hg--o$#K
z%Ijh=V!QNh404XtA}aWFdU4~_K6lDfzIxGHJuJ@L4%C2LWzn8Njsmku)@E=3wl>`4
zMjJ#zBGGByCR3-cF>aaWprzEr64$?JtGiE*nHy(g9~)QIqCt&1_t-zkoWBEX`~(=m
z3Q53OZ~R`{6C5=vzT>OJk$y|JwhsfskvBMb)htiyc<jQr8ESQC@=vu?8(X(b{ZcU@
z?>;XtF1GOoY;rU8P^n+H*%9<L9O}vYi5qt<Uh*;8weAO|;`__v{K;`zSjj8?Z((8e
zuRbmx>06?rCC=kC%O2FqmIIh9bbZ4v*p5Siz!dxnO0iF?zK+vkAj)qCE!-EoSqYpE
zLAxo8mUz#+Gp@S8fLGXLN?+pYpEQpQ)=_0zXmI|ET?@pWz1u&`M0b!fYH4P6N4j+;
zB$j{r_p`55S6K3kN5s)Zzl&cq8)*<kylNOtYW`INvv#2S&FH=5Q5AbRGm42Df{Kp#
z$ATs8!i|db^ovGE9%CZ9y0T8w8<hLB<!eK~-}EV9937rGb{l=bJYGJT8#NctH@L9p
z_UALkB!4OQ7c-vkSNu-4FZJBF^RV==&v>>r!j<ENW%TDVRiFH?!qZAa$6Ay65AKDo
z2Ao-!jQUDeK_-444;27(<%0;0eRY^wAZKz!q2{Rx)jM<&xq@<X7L?`2!6{)|y=-k4
zCbr?y@OX$(-@0j%E%$o@$IqGl5!yAh_wn4kD9RZWF++f?nt`;?<UZd;t}hUQxm53W
zzbU-8g@)AlJ<lrnlX3cp%{bl7I8*#<#>wIqXe5+d>7WU5x#o+aDl5bWZ*IC)D47b4
zh0Z_o8K(Iye7^%}-#SpSe&^8HDAs@8Okc7)R}$SmfS>G93(N87JxV<izf5k+*LnSW
zn5dXF6rN#JIJG>pN>TrbW&7e2zPKU=g^!#9c)&WWVJQdWhrqb1Z^eSH$;*atrMzK0
zs<(}^^^OXR;WYsRPm`XJY`=gbp<q*@XcsFYwrFissxF1g3Yr%{bwB;Ayq^UxmB#W-
zR9=-VAj`FQwPxD16T`RyPUbB5HBw3TVDReo-@D!pq3I{rkw|(_4jQ8`Ilp`?fkBTj
zNZom0@*t8r0Vk{P&!Z6sm0qb)0OH`UCk**ykxKt!>ydZey(jgSODh8BVog<^;oeW*
z{}fZ6-)CPnVKQk|Y+V!_SJuLrv0+q{b?po$SgnuNpcxZjH3ZXBDHMh=+00R-`tKuO
zqj*(tEFSxZ!QJoBnt0hQ=;$Z;ze6>`i<AzEPN2ROuak)7V3~>0#_^@hiP@(^D?isJ
zt|wQp=$Iq<oQmY>*T%kWUvg39IJ^FY$c^(CUGYZFfG(qoyUe(}8NJ6vsTYloxGf8g
zUIR0+?oVCIx~{KnP$Q9`CuR)wP*Zm|x)l;szOKB_Fqi=AhR#DD6Y{+}!2m}n?^ix9
zV2OSJDQLKq`30JFv**;`-1Z{q$2qok<7F%LPb!D+2`DqJ;%OZJ^<mwr_><|?8@)ER
z^{7^fXnbn>eL?o^*WqkEL}@7pCVF*o$2@EuZLCL~GfSO*JtwrWJ|BmQ!=0Ao&=jZ*
z!W$q#%z*llS>4H6vDv|mnbtpM!6XKLYyEh^YJ76ge{a19s(ESNu8I#EGJA}<!QNSX
zXvE1wuzwgwx*SYxQX;ZP*qkGvjVa?Ta2nTd<?42)VP&cutTuhs_MoP(V{ogw01cOv
zk_hvUH4<0ClQ>Huy+iG>Lox0U^2e(hngb}?oqY>Z7AQsveYowfWE`cq*hlhH-@&ir
zLDWYyZ3QIG378lI`l@_!%vX|QKkuN6M_d$}!2G<x0-(9jsUJ6;`p=z9#@@NL^of7D
zbHcc0;`!<2i?=Sset)qN>JrN+%-o3{))LO#n?fEQRn!pai1Jmw>-e_H=-RKG)OsGl
zIQOcyGjbGSH((mGcsc~70oI*f$0xirqTTg*USzN8GqVcA{CGtc%UyP`=Jv|hHKetK
z-N#9aC6AslHIYL^t4j})%hRLHxV5BlyW8@p|C?2e+n1Jb>1%htsk<SWfXxLIAmtGu
zNI+TbbLaikLU#s~h5F!>WoG}ZVmLbred;Laz<#f38CBr(lT67e@@0Y+jSbHWr)H>7
z6_wf)>GqiT_Aw2?V{tcd-f2I3w+*q6TK8Ll<S13a51Ho+PSfC>#-t)O(65d6AE;0u
z5e4DAdLMgG<Vfr7u~)@juHI)Z>ge*N<_3;b53Kh1f)yeY0&M}9zebnM90!!f(mZTo
z8czv)T74B@1o0U<_eYSoIB7hV3Ws552EGXC>Sc<I@{=BC$tSMmud2Dom>BuW&vx|`
zr0^b$0IdM%*70{U;=d3Fl0#it4pR6e5IP;S#j7<=2*?Ts^2K7Cq3j^<kZ7qUv53;7
zDzwDIcb6;HJ<iy~2UT^CW%cLAL}}ArXQN?6`j#6FhhBRdZ{Xpxny^DzMgqBFKQQc*
z-?(Kg{Zyn9J_ae$O6Tdn)jzX~I?fI*hh0C2@E!Xc;=Zv_%V!NOIEJ+DSYRGs578Bh
zj37Rs;GWj^$ogzIaxsJ~-+;Os=#D$j1GeP{r<kG4Db_&r6tuZ-Rsyh5Q?^%o`X}yr
zeZ}!>(31sqmrV|AZ2p&zg#*4^5JfjNp5$V{;)lVBK1G>9B;BEV8GAz*?@{64w|NQX
zUwrz}Q>zsFb73<EO~i_7KDU%p)y7|4LVOV0E|bIyi8;Z7G^r3l2DSD(7+j-s$)f0x
z8}SnQFFuQ<y-g@tR#Fpw%;ooKo9O|DblTFxK8$WW--yvj3nO<1f30UN2e?~V@=Crt
zf)OW#+Efs9141Ju<XvBDJQIH+88W=$!aB_d5DemLj4PF*jSF%mT|sBSa^&%cA!Y-p
zYkI}aKJZzE=sv7|huFA<V;)vI@iT|o8tU(y^VGOp3S;?_5V;~RG#~-$xryr;{wrFV
z>pa8(-~GSoCxxksYQw$WA_Vv0&#{q4ea+>7CKTjoJ1|2q;6?^7RlMS#2N@2YyH}x7
znSY5MvDs^|fv(mwkv)ps?$Wf6M2#~^|2B7uBhi~F1qV)jnW{@)eZ8BfZjE~cA~YiT
zY!uWZh)$i!_L!)%Cg9f~w=)Fq)j$Q}xIZ++;V=)`<j`1#s4^_`dOqI?8~BQAK>ta@
zP_*GuhH;SpG{DHDW3uF+;x1>*n_<Atj7VWD>0#C-e2`?)r)`X2w$C+A^#8!|o9HB*
zH{_5~_Ac46*QFqCCjjHzz>9Abs8&xU{Ovh=&phJ?eDwy>VVpyAi*>XHyIC!~(uF7>
z$yJD!ucU}6jSk6sN_h8L%-5Uu9bSE9(zd*LJha&e)#7`6tAaQr=6q|*a^@^@AeNTn
z3@8IT!b*c;yzK`vug<`xK(3-c9hf*UYkoud_%qfs91w#AfK%e1qc(q!ARNPjLy3P=
z7HqsJD%*T-k%bGfFT&N^Y}hqzLti%uChgsmwomK}kWvXhl|FPYyk`=fhMWav`I$E}
zZg<RFjA{=KocW1IATM;TIhI?g?j=G@#Yt&Y0X#rlGN1n7(Xa7K(!`6TJN)u2o&~t@
z{2B45kX-$y>`$&fqSA_|Idc8$yA>+*VTKi!&*|hzMuEEk9p}cCSJ3H-@nsT>Mc~Xu
z%9A_UQ|3JDfoGaGxb*ItQQ-XE=09FI{MD31{|gMk$?s^fI{R63RSLfPp|~R`G}{il
zx4PdCaR3PpFOH2<%J-$~*|BJXux=PM7Aw)-&bMlGV+35FQX;(_6m%d6U*JNOA*1H2
z!;QVDHS(ZmH4aX_`<F`#-sDAL;wfQ0xW}Z+Qr~euyrnBIRN^Gvm(X5FI9R1jzy2#K
z&9}VbdVO7@0es+Glw8);l!f_Z*zI<Ik$~$5jIB%o{Xw4CQF-(&!UBXSY}u<J9d7-Y
zM;@2VP~$V|*Nqv*84X|z>+8iAIsGvd1{cfM!`?IW7den5guVzNC{ZJ2@zOO}Pzj;s
z($(ImWO($FEsR2>GE<uS-MP=p-=b&=K8%;Ufc`nNT=ONZo)xs{!<a}Hqx50W`SFm%
z2K=0;?)<*vX5q(5_*eF?BuWKpq%>zQ=#Dk@FB%MNbe3GMU-*QUj^qr9ll7X#49v-5
zPOq=4st-A46GqNNp@ADn2spAE;%x<GnZf3!@i$_V*$uFVtwIb`aQy0$yMM1XwLZZ2
z+h#x{x@IT5Ww~2Q8c~(`<j9oUVr7vSS~~QSV!SwDP8ctikHBlO!Qeu>-L%bC#&fsZ
zDi+mrzW{m&+s>H2cKb@JKO)_;FL5W05L4&;BlaleK{mN67uyfmsmW-b{fm91IS7L!
z`#vtsl_MN>En()iO1Vb*0I~Aoy7Vr-R~v&FS;djA1#{Z{Y=JZxG2tJUS+v(?sxVk<
zj`kME^q(?z`**`FQWVqOJ>m;prL5x6th)<6FRP(<CA_yyYrv$pL$#Q9?M~&-ub#gW
z;mTk>zsJ!H^cD&Z=7Vb2FD_(pTO?EcEi|QnAO&BG^nY}k>%iQyE`mYUr|nyGjuT?e
zAZIDX$kgMZNJqF#B}@G^Y--bn1cZK`wb;r2%~^m29`;r}>yy5K6QQyMkxggU@7Yad
zkymhJH?L60-6D!5P213CRth~}lf6HuG6=#QqtIj6&4W`cMZ&g`xF&wYsn%z`I&@7f
z;#6SG?Rb<QD(CgBIa$g-wMI(LHvuKFeU#Ai;k+MduGX6J{MkzNJyi8<4wVt&DveQb
zGF+@=b%>~-A@4f>+NRd7QOXyh!ANqZ*p-EnAZGh!p6=J5x@C8<rj8vhQ&wkxt^33E
z&t`Ei=0#&xpw-1I7s{LA&{~lRPk%GczFn`+jE!}6#_ur$er1_=yCooQ$hl;C!!Ar{
zx8ysO$?W05!_0N=VkhnkujWQGI8k@CKwjRxyC|vyn=UB<#bDkIRjAsnV+^wHct{bq
zrFYga=;|-f`e|j^fx<7Sxt~qF<R4i)36fOXjPcL^#d}B-2Rv+@6x#`PhL+3APnLvT
z`7N|YBa7i#VHSv8tak3^+5TOYS-dNrX7#h3c|M1B-1(BV-C1P~)DdQDP!)`oW|~ye
ziE4C^)wPrC(F_F{=4fjKZ-Zi-jEcg@_s`7N@7NBI9W88N<%?SP5ZH>*E!xfZ+$Y@4
zjwtsuuVc@?p^{L$ykT!mn?PIMJkPFzjPVyEaLTncs-ZI3`^smmRw*Cq4op&PP#x5o
zyN}bSY3aU22ci%g=~WBPbQ4K8z#W9&y0cr~{7n7X1O5rPVUMh>-awbDmiP8g<2;vu
z)h57d*)Ichu{h?J?4-Z!n#bh>e1X%OEavM%9*3qMR0qx*n41CHh;lOC`1C)KSR&|j
zX*<)q{zWiL;hKmmJ%;yp)8L9y*G2S^_gP2<>Y>!(eM+`(>^GC0gUN9%Z_+i*ICiKn
z_H;bOXDG6q%?GRkd=8ER%-Gs)emUT<?H9)+Cm_^bF3;7g!_Vi!dmIR;`$E=|^M{Iu
z<i>PUv_jXesR5xLmuIXGPjg2%%r(lcrduE2x+g`u{mxboqUvQjzjk7E=T_c6-eAwU
zV*i(1Qs>X7aUoWIFGhmA)+0d|UvDh!hn(X+Zn!&%%lF~s^Dc~lEC@~9&h~85GyJ7C
zyIm#qFkWX*GEpqZ=wm`oeq}pS8&e54m-~auq*YVn0JjK;?)eo8543ct83i{1U@#8_
zH#OIcCXecRUJ4U@e}|q!qyGP7?1FJy4rEYmzW32%Z&()f#Xcw4^Ht+L4=CR5dHi&q
zl92Tplv%)lWa3DEonx9QgWF5EM(j*M@InrRdfqP%fWgN2@CUBg$oUsT(2xb=J?#gJ
zatF0c)pc}HEZ<?btFCkxC?P)2$?ihQVvB$*XxyQJ^sS^|F`5q%R9*ZW(z!8*lk7D^
z`nY9UoqDVnskrT~Eva8H<JOthX*G*)W=Qj)t?WV>t$Uq6eg&h--htbn?^Q~~KziFm
zaNUKyl?N79g5F!1M)Rx7wY^4hBI=$1L_~r^K?~vT-OL8DIm6LtC-gC*avdf<wIwsg
z{YW#TwGQoUJUwL&I$w+|FG4mxc{t!+M?)vrH7fH>-)e`#O$|sagq&X#NhOJL+iEUA
zQ!X*!D#_xc|DguN(h(;7^DiPw`_e*04VW$OF(W-YE5e@z?nXs%xsq%z)ofolM!qp1
zTCOrkFp=-k1k|%;Uz95yu<!=FmCE`KUth?5oHD5>PhNhwz!muco+s05%LA<$*1UXn
z&bgTn73SEYaDlVn$PJea*%~f6e8Ty>oLF|?p_o7Wb-3MqjCM^ls%K#;&gwUdcENSU
z{6#@I$Q}EhM^1Qe&vLW(YzrT_CIlq!8G}`smV5dajPglhjGqtOs&9N&dRaPn_e-wK
z<CTjPA?;5u+_GYU8ie!ck@q8>Ud$O33o4c|i5d;lw)H^HYQ={f^DmV-*ts1<`?L*!
z_s}rRxBO!Uvjs7LW3ey8!{l`<r(==^`tv#nV6Obs>7<#uBQ`e}P!Do!u6q^V>C~0c
zsH$+Z@S8(qH-_pWHUA8PW6qvp@`pSf0oCF+v88I(zvDqyDJ`z!pg(zLOwWD4ih(1p
z`Xq~Z2{1nkEXWoTV@u)V)upsn^nA9f8I?HH#S?S~O_liDjOG4waa@r#>y)YZxL7AC
z)iM3r*P?wNx*Ot^Gs}-{trOXG8Ycrcm}q|gJTl-W-vo{pPr3`>c`MLCGlj3z^c_Co
zc$puG_pB=P`(|%!xW#X%$!-^4tC3gx@>(IFs8#TOTYgd;(Ivx{jT6HrpS~(E0CZAp
z1iL|(NzFS3##Z7cSr)+Ja~4+psiUfHfQNL)UgdXx^Z+7Gy%S^94VG8Iva6bt&gNAT
zaM|^@z^-SX{qLKz0;Zo!J5C2>?^yw?(!c=O#j<L2V+ADfzzZQNdIs=-(&wpQJXR)J
zNMIk_&B1?*_G&c1#aqMMCT%y}A09Bq5@8D1Za?p(5%)<4lU61&W*5|?dOs#fd5Z2)
z6O0TM--RiLgT{emC>KiPe5D)AH-i(|Orh*h=arXbH(syd)x<DH&{C;NdrZB!HlE7+
z?Dvl%)GfP5=M@2o(93hlTYaGVmIIDRVJFu=mp|{TdLHWnK3uYBEv~RfgN$DZ%icNU
z*W75(etTS=<W-YC?6C=51<DgV*qb&AcfFt4ieNz5Xo6zx<v<^u3V@Y-Ug4+DM@zr^
zi6QV--XO}uI1!s*E%fUcJq~rV9ajCc($VmPB@Pp*NsqgU=hI-N4C;->KvCOGyWZw-
zz#QB`N;`_2cR!ol=g%P7hXE56?qw}1f2g#BYgjkUKrC$j2XXknb+-kC<fK@b9JyGD
zE3=c-#VlBN?K+Chq?1U)pm9uF6JBnM!-P-#pl&rvVNk4}balCk`OVl+v4ULGwR;Je
z^zR%so*A|7{SKh!EvSHHUC;t;X-8}r4v`<MA`G9PI~imA<BL(KQHa+R3WBPO<!9!d
z?ZfRJ-jgMN5UX*A)JvtVNTHX?+i$0`nCIygn`J58)(iL;L+#YM_PNi4lvDXjgSr{{
zi_RYi*TBh6)&=NIq<<$y<r%9TCe5LMdsM<u>NPBh{YxB^?Fy>fNI)lZ%AE1zZTFm3
zoffF00|s%hw^`l=_U%?{;EqCa7bS;~`(sB4u%&YI(Sw=~`*dPZCSc|gVU1zcwn+E%
z%se;O&|vb9129<T3B~sKAF(6D7JQzBacU*^Q=|LmC;$CdWA+0|1^n2qYy;APtrXPg
z`dSco_ti4W{<d+IMC19bCE<HH!R6#q%M3XzG{nn(b2vj>>3akcV$}=iq4|+RPDd)G
z7b<J%l=cfVdP-f_T62hZnh+uwv|}IaJX&7`ShdGEfUqN&XL&^`y;*6bs4K$UqP|Hz
z_GoYj6VKSk6}C9@mcJ2)EceB&lhU$TMyH4xfsVA|Uaf)?_zytASUMECvp<>&ZIJ?E
zK3nPgh4C{NowINRbbPkiLro5KVT=G9Q%o1CiqKeAt@7#S;~NL8PhG1w(C6Guz!2HR
z$Fh!>og<pkb=AAE=($*QU=Pas<Kir*-O5o8c(SrM!5BqWvwq$9uO;anoDj`&Ox(MJ
zYatKlh`A&my#6jiZ*oPM2v25uH;0&&!0%KL^K7)}JH2On@OyUI$-Mgp6+{|432*=W
z_tUFDjz;)RaQvp&nx@n<=T4u(?trlJQ9=-p`sTWq3}Jl&(l*%3DZ1!D@0whd*B@f|
zh$-nQIg@JK>s@BIa+|kCoB10`Z9F$uSp7su233G7V`0}(W*Hgk-1C0wAVg(+*<JrQ
zL4}ujH)DezcXkY}#FIzsCmZEheXp3kj8z0^&af-^g>)!A=q#C9&<|9zP5?lXQ2a}m
ziqm5w&Jc)33z#FN@&jOKWoi>~?^LEEjQ`{iG(*z+5CgqD+bqv$)Zk6h{Da#}f>Ft~
zbi(St2xuN+t~c3oc8!CZ_}&MRvrB6<F6UodN}`;9x8{XqDljXr4Ro%(U!~Q`RuS)v
ze>`eh$@=RY8v|F{Kgszb&lGWAg;qUjjm_BCs^m6^SEYXY5O%XSab_R!tFjtqMNYN<
zEG0yKv_-ejeQY)iEb_^rOKma!uRgii=Oi6~^Lt)?Xs5qfq~Mq=5*IHz6d2~PaoHiP
ziAdooJlRrEkSeRoah(v`iW`+Xg?OteJ5dqdA5e*JBYW0~d2pO<(3k-=%L0P*nq8Wq
z$1H92jI5>+BMy(HUDykrGa?)NQO8TBp~Oa19^mG(nsLW8k53D6_m`yx#ZTOXzg_7q
zzugkO$zWD-BSv|A&sLXJ;!g$51KhZ-8oj!E&l;Li3CC^9K^wT=MOC2%PK5vD7hXNY
z5?r{M&i}Wa`zK9kF@q(zMpbwZT#;#{2RZHSv3JjX#((6x>kn6%eq%inMbXGjAVOD2
zZPB=_2-LRkyl1iU2>jUxkf?2<X{keBTN@cePoD8};|gG;8?!2Eg)S{^UYW}HjBy<m
zlvaM6xL>%)KkrQ?cg|@^qEq|h_;?>mrhpHFN);V)N}RBnKi1$imj6B-QGBNhcVYyR
z`0b5bzT$N0n0{>3ONB=n;{Aw1KMR`)+ev9I^M+h$o>vUwjzgDVxS2Oj&#B^NH~XkQ
zLPyIV)rk=h0p)goo>&v3^$(7-+GrdfEt@32vZ(<SR@cwD4c@Bo!X>}p$&5vtc3_^q
z1gXYU(DTE_3-xeEx0Oa@)aIdR7GjarQ5WXtD;Y`?_t5*z05&fT2mdWL<?sqb0F4^)
zj}d>3RnC%w!Td@2t{_uu$ilDl@?D0)PA)PhGL<1x9he~rn2Fn<34>%57iQp~Y~X6s
zw^|$Rm?b`9XyoyPJPcxgfdLzL?UMjOqH#i~S`^CI?DLqBh8`ipW2QP8V3RP>5>Itb
zh8ugMOOBu|XjqxWIK9i}%U_6>X~x-~2cI~&R`6)3G3taG5Zw2FdxdPxn48aowt+u-
zYox03IX!Cbb+OA<bkyZRKN5P_{tP-QGsK+Hr|KOCk+5Di=r-^3`C^Ue#MGd_-F>=I
z>f;1FjMn;KC^o54g&EV=!JQ-O%jn8jBF*f;geK#vaQa<km@NhF`cz?bW+*6bJpAdG
za(n~-Wr@oK{4L98>vSp>YbdKdl)E;40rJeMhL7{GzNP`sgCk{Xfm0axveZ1ym}31W
z+Yh@kUw>L_x((b#5dL>3^5?<Z;&&wx?}VRC)|RMhvluxMk&nF!nv%%-RMzLTa4$$E
z9H}TUI=V+stKVw)%~9Fl+Ydj_-k<8*SVv+i^%^vjr#wV2y4ih$xS^_TL#k}b@1s`@
z+KB!GCz}MxgJ{t7v%deR#8l2T&mQpB&NZ1SFZSqD#|Og%5^E}oiXM`WuiXNMJCpo7
zCiI3*9WFjscikIgI@|;nE%r#wNpG{sbxf%vx-ld7l9msvKVvIP>&lEEj)u?{4{*)^
z3>lHUst+prvCf3-k%|#FX#88+InPmdS9GZsC6PH7U57)0Cf{DJT9V$IhZ|BOfknjr
z#bkZSx)q4vn+pXWn*PK*jd7U(wvQJWYg9KT|6yP@usOsbg($=yee<V&{$C%X|Kr7#
z9{|Gt_<UzA>ecV0OpR;I#S5rO26%xoE%T1)auuFr15H-iSTf7+gV;kvadtw325|eH
z#Cr>qnBdq)N3iUSuS^&I;`zxidrg+F;YP8^_L0k8NL?y9%>s6rG1*wt!FzeCdSAJL
zjX8iZl(_mkT|~!+AHo6QVfadxBJJ+A7ly6=v<iZ{9n$@+fY%EQfiuq&f=7IwRx}eb
zP4e;#cy2tfv`tp(R-m)wIHcE<j(o6FG?*T#M)tATa)y6HyjvY<-Ru}tJW5)0o27xd
z);Y!|$l<&PT9)|&;u3_p`gz%R22Pe}!*&k=$=nB)b|<O9;t0qYRg5zciAXY$h=_!t
z^}BC|ty`XT(-!Yq%#_ak;kb`>&Y!uT-P=sYdwkhkGbFxn@NNTb5M}$6o%KOYvZkl+
zPe8B=aPR>kI+K4OI$k!AO`sv>W)=Q>^33^mD?hXHd6!`r{QE{Rgq-g{@Pvp$rubK&
zP0En#ST9dgx}?(2I_4{W#1hsnp4wzy+YC0p{h&zLO%);Of*yLYHh7BMb(^a%!XO6k
zezUz^hlrdfu_~6nMXk*#gmgsO^Fy=JMu4R@ut)%d`f|{Gn5_$ali~J0Cw$P~j(qj)
zp)&W(6_u`bg`s*D4#-`Tl50|Wk<$4|?U!jes^*DLcijL%jX|YwZxe3GT34N*70I)U
z&pYHl7~YO^8JUf-b66)8W_JUrVLPhvz;jedxqY182*6Q8k`B^&zIG9U2k+3CTcut}
z0jWlsr7wQgy~RBC)vhQ7vq3e}y6z_IA>hs=F0bux8&kv6T^K)9+(V_uF8&zbDH(B_
znlETe3Vr}HQOem^0d?4d)HI0S`JXh%SWx1U->&F;q!HkAlU|IC2+~{v0(^rw{}te?
z5M~SZ+7fIWE@rMhWpbv0u?Y&yvA$$|gq#P!9KL+$q@nohuk~4XWc##28+zecsRy;k
zcB?~-9JKGE)h)z7`5r9{VL)?W)k?=6B00e|rG`&p`k>|-`Ep^$0~2~^odpApyqDX%
zZACu!j--}c4@tZ6a(iW8o}!w$rfIh{mR8xS5g-wmrD>7N47j@fZ6vWn>#NXs4ExME
z*YnJh6?j%yv=uaD<LU-%C9R}OOUT+)V2TPd8V=nl_xid4U9!9R;5k^s0#`bTVe5eb
zegwv)AsWrG=)B==DNVtfB0-Jm&{$i-jg}ywAPAFfw2G#?Uf6PV%KRu(1}I%>Hd^5P
zL*@DqVyiLN!c)utn@RX%Q(^di5rRSfjnAbGyXW;WEieNE9e(_9kqK54sWLSpoXlUR
z?`Kk<_m=c|LF=@WVK(C+85r}QWZ)L*QEK)*gWk}z|HK1zXxX7P?PBY{^&^WnskZW<
zPpvM&Si^<t46&2Tly;pBvzZ_|{Z3-+>K(frK|Kxal2jYLq-Ra~3@-0YbSd;nGV{r`
zACw4@j&R9=m9B4%!(fsZjIEcw7ji(lP<EO9fCcL1u+<~jWFH?bi_v~Z<A6FAIXmy$
zfTh3(XYNKn&~kMKpW;4AKcoN>^r-SqMC)z*dkZC?BLWYfuiTKnw}rrll14=k`=qhz
zCW+en{ZS#HL1=#=`PgZn)%@(ZJJK8AI+OZGt@Ejac2$i^1NmRA>Hq$H#dRF1d}>N9
zmgp^I96U)+p?6Gd%qok-xML5?KFKu%Qy|mKn2E^_V8z8mvNM*XeDd0Bdg^PtSXj2j
z42T{lZiOj`-v|r$l!1nE97G;$=!|B+Nv1m<Huwi8q$dopGnOJ#CA&`{vl+Xer!Yo2
zD-sEqFDm^9Psjw+4O8Mxlt4|{-uxz>?8$T1Qr2>K;+fpwRNL>zWy9`O>4F6xSPy62
zI>+gq=(Ha<^~8Q}1$Y+QKS>;LK3%p6l>5F#o#jw@r8dbxJgIi*Eal320081#yTXzN
zn{6nB?oRob=^Z2U9|T6Ii2oBVq+Q8WlshvZW^uy!N`Yf%oSLt?#en1Mz$|4AIupRO
zL2^}E>Ang5BSaOzhuN5&A@?sr6bHB2J{qsF>GTPaA6IZ#7R6mc*C{W(y*Sbniw~+@
zY`Aje_nD~U7GG#zed{~TsO2oQv6PDW(VqPeTS!fn*QOG?K4ifx^^6M(7xo8ajQj^&
zII{*XN0+w=ukkY*+p;N<q%G$@M|Q0Y-&US~F--4yM`2R*?ENRQJA{^tkQbG|-Z|q+
zTs!+3RQD^VPj(N;0cWoCup?;IDbHm^^pwV$V$u9IctRre-I#$)V13$-{H+A>AlT-3
zK@{zOQTw#2xXgF{2o&>9p~P>*!OkM6E_f=jL1ru#y=<NRsGCv^A&+JCa=APn64+Vz
zcnJwao)`==bEN%4kVs(BB;z2&$^SlU|K$ezKf&^ZP!E32NYhbLMm9z%I|j*MfM#;I
zX<9f`G@y>_;gkY_=WhmZ`>l#u`O(~0W^a*8_i?k!g3-vA>t#&GmuP@5F8*o=DLg#<
zW-jUR)5hK+X+96KiKSDY=NeGjagX7cqy4fw`}W4X3fj-!pA`XKj++-)ija?6TR#~=
zK>fy><txk3bo}tLuG78jZoS9sO`b)(+LrGdSiZg6)vp}mM&o^zlJ$uY96h}v&{YW!
z#wD0p`vbC@%=1LUDBHhVl?#8i{7(7l0UfB^Wr2(`3#93m`iL~iLGwrMeR##optOqS
z#!k$KG0F9UNuSyL!>;q8pzb;sj7$*N_kWBWLJHm?N+TDEuXD3_u?g+dldA0jYt(`@
zMqD0$+G8`XgPqSBLInwbrAV$%$>BItt(y;;w+bpoN<sNGDXwO)?8<l3IyIyie}oTv
zCzZXGu`M*?=L~z$EqY*pNOwX?HQ)&R6yF=$c##>n2V9^e*qb*b!L2Hni41F=Nsz}i
zeCa5|$>p+uAB~;cw)B~<Qav!In78rY9Qp%y{DA`ADZl?p5gIG<xvbw>dVS9HwdreB
zRj;dy>2K99p~A6N+pL4S_mmocQ?hJLtM({;$QTN8^pd97a5fr(z%b@b(i<sF5`liF
znTE8V<i<VJceXC#(5ChnU+^n~tn1mV>f7qlXf#0-BhY>p-TxihWNwdn25w^#ukZkA
zu=${r<6lqD3SRvDBsxt!ki*rFlo(FMc1haWX%nprI6<udbX9K#_%!{JxAOmBCXNwc
zp{6EOIObo%^F6R19&A=8&8~0=*Vt@lmwbF(V$mX|Av@tCJTsE+aR^JhVpBQc*2?Iq
z*xh;cKDVKnKGKa;>QV6o3<bryGei+A9SW~G9P#4`6S&?=xzkE75xFQ>K+98eaSt)N
z6f4r(%(Q=F_8Q`tYR4KVF<E*S8XCZD8`INLFvYF%13vha$wnhliK5PoMsHVP+vKzO
z&xTv+mWp4`(GYOPHp5C>Q}@`>Cey4MbtQbHgCt8G$@wk(Y(egS(}Z#nhz&7g=^BL|
zU(g`w`3<~EV6lR-T7#*z5_2G2&*Su1a~|7t05D?STZy!KFko}nC3itZsP|5gRqk(!
z&;mvh@SW7W`8WCG>mnhxj^IRp#`$N`-(|Q}UVi+X9rK?sp*;DxZH1QXz1Mz=Q8-s$
z_S<+CXTL(Uj=-@Ej3BL~rCSktUu@M}f%J;n^i5QfwOwpp;E>__-sVS`xdmPJ3*Afj
zeZxPLoQ7%_|BnfB)t{FeZol2PDbx((ipm3%Ya~P4IlOio<a!?#-efw7)AUl~X`RD>
zn9ZtV)b@mf3tUF^A&oUOM<ov1?t42sgF2%`_#r~76LYpRcHfa8vGD*^m9;qQ2mAOk
z#>s7GKbv3XBiJouXYT<wIBu|&G*u|~(mb@J?MDi(2Uf5paA2EK0FA}-e9Q6Kh+PVZ
z$_{WJ3BBAa@n1_O{l~t|U|HW?*C)JxHFvivJaAdf154x>3di3$$B-8vu3Yma<@w}5
z7_0K!?7OkflH8!jRAij%YK6LNpI_6YV$VudSUS=9CUg<3VS&rv2NpH=a``dWsSG8c
zP9=|j%(cOeN@xwK0BfXr7I(YU(Lw@dvf@VDMwLN4yhEkZK6m1+YMVdmbLiBqw+?z;
zbJ-GO9K8!(Yt?1br1W_LdWUV4+fQa}==h#DKH2%A0n$3}5<EofHVXBu#azJ-c=-py
z-59NAT5ws-v|ZFGi0pwqRQZeG%>GeUc#CbdY|tUpqQvR06T}*7?80J;&D$19RgZsU
zImT?)MiT7oEAIS%q~XfNIsH!Ut$DETg<X5!wb2KF1fHKbofh(gz22Z<^P&Q8e8XCb
z=3lEQ72Wtv_D}E%7jIz%%-!f;xl5jedF{AbN578pjcaBVq;T*T5^ulJsA8g4hSMFO
zOA-Jn9Cq?H4v<7AOw5iGn*k_;Lznp%TvBiIZH8l&lZ*4iYt=EEFh@`$gB1@J+o><)
zAqL6^AKmY(3X2JGt3A(-E<rukd;eK%tUdE#Z2M}OLnq^kVIYYY^+i$FtXww~gkzzD
zCQE7x`1~v+ch1g*H(*2r0nOkvUs*>nax19pOyBeyvT{j)o5eFsd*;^k9o<K1QO}I`
zS^IZ7Zq$62_8Pbhc%<-DeYPBIO$Tl-zFV+R!AN6{E^kYAU>+3e*l8Z9;MGU~_r(@V
zAaCU|S8dP<Jtx16G>yNT7X!Y)JqPz41rakU^sM)m#H=43#r>TP*bRZkFPJos6tvrB
z%_`7@G;jC+_7L``BzPYWO#Ao|KIk|2>(qrQO<Y!Rbt|sO?>l<IL>WYDHr1oJ%8#!7
zdd8s~fvIlEFj#Z=83l1|89~%63AHs(4}TJKeOcMmoO)E8oq=-;JgCt79^#>|G_*Zz
zkBK&LAf`_fhX|o!N;qHD&5r8S{|_Qjxia~`5Q%-JYe%rH;3u)>UtHSo%KQ%nlHGM=
z%g$PribTye->G&SQpnzn!DZEx{T4S8xDVaax4bsUF=v_daGqfqbx7g+29h;icNh8`
zOzO1bZNR}7fesHAtafqY3?L5yQP7VdO!qK*(d1XJNGg3nB^(^?0QOTb&48_iR09Uq
zF~HP<avJo-A3D+UY0|7kp19`a`0IF-|8e;jxsM7_XxJZAaa%1Gd%Y{J;$Eb=UI@F<
zq4VPY?v{x|_$fiG1q8(&CHBEM>u!-V*Nul<;gXho=t*R-YB{iNH~L|~y^~m-mc^BV
z%i?C8_RF_TDPJ-$1t@dO<xR@@bn1gVqxB)RhSzBw(A!@=K-`YWd8@QPuBpA!n|;Zh
zFT7K*t%YS2SH_4STIf7zY_#|n#$yEC7Z4#koUEs|f^i$DCQv?P0@_u|rRn>opFSBk
z!f~CDC3mFy@rO~|Ih;zHbAjnuL$J9tM9)TS38YSaDrSuZ%0V{%1|1<Fa38o2GBy?C
z*PrePe!Bo?nS8_F=&cx0>cYrfqK=ox@5`v;p-A?})n-^3{FtqPf{^Nm?p~O5hmr-<
z2_4JcIcw9gdsp7TAhNv6yl6&a0hqj=!7Ctz^}em5*c^l{TSq{|Umqg+j?mnQ+Z!M_
z%=%5=6<RchUnHY$uo4kVcCo5wzDusisw;P{={as1DTB^Uu7{j_E9_4|3iyRg0hrLO
zUjn$H1lG5#7gg(2Jk7nFs_p>-+vAgWWW=PaOE^meo)_pnD=w|<e2dy@3)}`P{~&lQ
zekT?}+_?7eZ6Ek!wHUoICj42_C5xYfE(TS_IZ<>7#m1jhW%x>Zi-2Qox+?G}`vN##
z4Ul)AB&hZL@<v+NX6}XbLHV_qCHrVpniaHG2Ha75H+qNpKwOm<1ei=bNH2Q!u0)xU
z1(Q6oksvI2p1Cjr=5_a2srwQ%zK)0WioKsQhjTsfl>gd?*yyCNtva<?X>Q1hLeG_<
zIVLQ97N_qe(OoFt{7$L@F15^CacI)n{>9?#XEtG10?tNaJxa3!I|R(sW!3)J)&FNw
z4eUKVdGn;1y)Iud)Rz=hm`AFcw_W2M5lcD2F3XfS=atxJqra2;yji1(shEt;dR^`u
zTNRKlxHn1Lwf3L005XgXE<^(&)xu8f?avPj=%=zW_Baw|Ry7TpANXkP-$U#h0}ifF
zz$(wzl2b3PL*B<m!S{cYGr1=Mo{dUdkz-MGy40ngbklUu?e7gtnFJ^a^7Ue<J1q-b
zE;)$`K)lh*>)ftgzZ<M%T{*mmw0>mjjFFN{uf4&ZcX`>2M`HAyo+lnM6nHDJ_0}1k
z3NfB*GI-~|GFpQnOB!^&tfS`^zj5hXcPXT`;-L=~)MkYN=~nd;%^}!4`aDMNIYtJI
zU&1{0^}gu-F_1}E-K~nB6!%9Q{dcc!Kmsz2a<*t%79uHQO4{3zyjed3iMWJ<RdVE_
zPF?+*3=H3xnDp{S@@qbE^m{^nX@OFv)1~{(iynHVA-t$&w`$H1Jx)2ZK7`UH^gj0H
zd%LFjtFcrY<M`Go#;E|h1#C8!zd<}M0KhI{a$(}MHvEQUA?}y7i!0|g1TTx~T4=&L
z1++CEWp=-y&I!3z)7J57E0=G6B`0U-1X6J^`J>)&&imqI%NV4e^1kIH3nzZJtA^wS
zPC#Z88H<A^U*EnBYj~64C!f+3)QUPDK<ry5oSTptOBc`2p}_=jG@lIc#VSJ33GQI(
zA`4apn?N4ILLl;@o2sm<?hD%25!*zzQPy>FxdA~d^MW*O<2q~#P)EXFFV6bs1Jqci
zvDF?snM?drFA>?_yOGs#n@^*e<|ThFs7oa2N5mV?o+hbNASjdfJ`~Ij@~wF%_*`7K
z;n|}J8gn%`_(dM0ZVPs%=pX$D+c@Ko&@u89ce=aL3wdUvgE0P1dd|2CsR4&vxeWaS
zHl7v|u;eCkWY%4);ZkG^OeJ(=X3Cjh^V}lAvW+nXN4h^h-UHdjxpaZ;3NY19cIV;(
z4GyZ(d<m{ZKNUza0k<-}+|(XsZ?)z+tCgDibU|QM!C(E*I0Nfe7RTxB;&Qk}K7ejp
zZW<dLdYC}5Aw*)@YFCEv8hTP%UF?a@={j<M{bD2VZ2&zGbi_C<GY{FovW+fh<rLvV
zZ{}ZvaSg03@A6Gg)BXl4R0r~GXv}+ie_6z+*=BcP3K<R&fq_hgeg%3j(Y6d*xv?0)
z0u(Pc*Q)-?j_!!#fa_mv3Av)TbDz9##k1gyMl(0n%zQsRP1({j9mJ&>v0T#k4<scF
zWoFWlaCg{Ho_qoffZd`g`s~5)Uf)Sm!la8>y?y=$FYmjbN#0vEhUj>!s{)xdoRxZn
zntrbey_;L)uJNdE@2LR}Bp30y9NCJcbmDciSOikdz2+xB)K34$7^0Mj=62WJ%wp5+
z_)~d;m%m*_Fw7nAG4*DX5Y55#`Al$2EfuoS&e-vPQFazkRjup7mPS%aknZm8B?c&<
zbeDj1Dk%bsMx~`&x=T72Azexf(k0!!;-3r8J?GwYykp#d48|TC_8x9F%=vw9Jn!>%
zgul?&zn-qf8Lb1cr~fvo7pw(4fUmwqU+P-a@VJJZpt<~<O3ehwsN4DRDdI!2n?ci@
z7})QVQDBa!gZr09N4(2b5m1<Lcrk1I@pJgN_2d|&><j=N^QGrDG!`8D1!)%3PT(@@
zlhOI5`~fndzROUL>+_wP5QBj~-LN`jtI+^8+46s>@Jyr2GA`Lsy32X=ROVsJDocg0
z?fYFkwixQ|CdwAQCclB^we4RgUhiQf`7+4-T4u3TlgR~#_$1*LNxxR=ErAmDo9yl}
zv-nEU-gK%c8wIR^{oq^SRtVDAM4PS$7wJ*Baq)q;R#5e5J+I<*$S4e~+mR4yC}KH*
z-A61aovmCX9QVFJt+KDmxKeStRhpB{*aqwDDP`%A$%`!6y&%^zd#tmfxm5|62`zLd
z^d#RGn8@w9Q-BRz*>K=B|LNp0V5eUQ*oMMO5tJk%<$z=WgeV1S=zG9iJ}`y`Of%x5
zm`r~T_gDZIjr!)$Gqf2wwjppx7Rfaw-<e!efxtufm)krvCvc@}Wq_yv=4H4)Qr|w3
z<j6-7cVLU@Zz&`|<kAB~ZrqYiR!(pvLFiFU>W^;TlL~}UC-iMkpO|W-ea2F;>77_Z
z(eqh;jpyA>W*h<G6a0dz(eVD|dj^stxXgehE3+N5efrYi<pyCTOY8`XZ}a%B_mjjk
z^2-FY!K*$^SFwIDw*E5&qdO9Vy!bB+va@yR&Nl@<i!a8tx{X>)Xsr~_*|o+{++q$w
zSxxjT3d6omfiOA4an`?X4-xLaL#hB;fLHCEQT>-^EtdfM4!X@HKff=jbkESZ;i#E0
z`RoahqckWcm6H^sz-eI)zuGJj8r2bsgg?d^aVz9b-3|S31Y~hz-FWPD{AafXa9OV}
z+5|2Zj~JYbCh_yd4c1k}0hqJc|I4-YHUrQUvv(_c{_-}^^g_03Td_|t{`xeY#{*>K
zQZS|OI}k?47kp211-40DYplLY&FL6$NedI6>>=N!o-KJhks#lXjU}v-u&TT&LX3!A
zQ{?|1pkmNbs}20L_U$N|D__<9^}gNooeO`+`uA?aWtA8`zg3MK=#Tx3a|mYd1b7tE
zd&!ml<-52xl8P+n_`%>1j-ajbvE-TPsWv=|h~nT!*$nI7Hj$Z<NU*_;Y$!9n#*#v1
z>g)<^Gzfl^TKVz`!IHIEO(nNWodx<@?AS(>%Wj1PetT|z_ow7|(Nl<!4=W<k3gTqB
z@TNE3d`4;4X(w|Okdu{mXoEw_Xcr;U`F25x`Q7mSx3`Q?8WMpv090QHT0{nX_Fj~b
zUXiHE#mr*p5ZGn0sFv6q9At1^@Wd{=)fC=+F*kl;V*V3;VbXQpTqbxs64*YA1jzS@
zhc*fSb~nu!paSEqSOvj4lnm$+LeBEae)s9$aECII42Z4T>1aMd;Z=Kppb+cq-JM<e
z=}28c@a?ALIq#QKa4=$b5_`csm``AFZ82Ah^fh*0%oFz5mrUUP%IY^61DShO`FZ_{
z`zjY(Ca*uN0n#%z^vsR*O`%dsp&x#4mb})E`^+4FTGvw7KD`QOJQh38;ipk(u)<4B
zJMi{>HW3{*i%GoqTyK6<HF$~jihI5u4h$gL1KaKVIBkUGm#O4sRnND@J0;DgC&l3`
z%4<b>bse~UqUV{v<kJuA0wjGOwv+2bJ}4_(iWN*ba<{C4l)bYQngl7mYCaj|L~RS1
zq);-kPBaIj<jCV{ZoCZz*6YMmaFnUcaHi)-onP1tfm{MzLdn`#PZo*KdzmE9!6Qg#
zn%zGWB*2-5v3^SQzd(`q04OqjI=r#6Ig{Mod2zz1*#fIqEOYF{)wsQ*8|O^iC$_2d
zobBm=6=WT7J)u9Pr$33F#J)tQYyOttjkimANmy3v-E?;3oerYPp-$c{b#<`NY^yMU
zZ<!Q+L;MSdjMSv?-e;nwm<U|r#k8v(GJu!AmihgrSfIT4`yF<w-Zt%n&#N>ll{V`+
zpHpj$rxIdD6aiTVpRYL9w;K|j%dhqy>XufjW=yt!?E}586YwHmO>sxOIeT*g<>=7w
zH|tl4$yJc4Ba6*U5p*K&kh1*G&ZYGk-{5b{858D5K`wj33P%l{<g%QkDVfP$J{Ok<
zlty7Z4y;qJKEdc73@86L3)|s%FWinItjh>E0s>q(Ms8&;o&?DL7j<sZ!GEis|0^1;
zOhm7FLDBk=tc5`Vy<%NGActrnu0s!?BzFxrowoNpIXA<gT5XoN?n;dg^Y=~bBe7#f
zlo3?W&2s84U^8*g>wfTg$bAFFO;)&X@fG1*`0_hNm8LIj1;+|Xr=>+mIFhf&4|xP^
zniwy~(lU;eAmK<H2kWa6%&Cn4LEW{|j)N^VBYpMSTmg0u8qw}G0b>iD;#|ZFI6_iF
ztmb{>tZ$+le-F>c8Izx4*7>I@oWJc(imE<dDR1j}m!*QiK$u0$)ST`}HkDHZ(%j`+
zb3M4h+6E9wy~pj-VC#}1ZVQ9=6TNV+M@Qo4ZbFPa`4L~av~<r|P8PegFRuwQ&Gc@%
zocd`?2`yH)VNr_$uqY^?$Ek+_qfQ94x+&UlalD5JXtmLed83VU4&kr88RQg>NNHMF
zN5ZXAW;6h(3TFgUoUr`;pp5~f5s$4HX`Z+<bSy6$tb0D7j|0YYB_FD4oUK)~`;Xvw
zKILsRyH_5n>2GM?gcftHSJw`9&tDo!Jtf3R!~%R3)F+e%lAkEg$4M4fK;ka6-xk%Y
zp$wFqcY^a&Tp4`6K-ce-7=Dt3X@m|Vx}tVw#;ShJ-*0%mB~aGUX9xh56KL#@(i_Ij
zqqMY4jW`1^MF|?F>dd{-=cKo3gPTgR#vq;ePXa?5{n5eR2#4JRlG~Ezz$LzR<TCvj
zf020<2R?)LqffB1pLWYvn_bNX5K8k3WC+!du5tHel*DaGq!$TOTK$13n=EW^rWGO)
zdkCliB8t88h2-90=eYuok>sA!%QDk_M`YIrak$Ub)&B<~c=ZO=(`0!^`?k>jm->49
zEHFHB&p$_C$vw%7HiVDyA}ozReET;<UdIt5057WQE{3;TAJHiG*>AFz@A*nI;^6ck
z!KX1tqA)SUPXkGKkV7YS=B4L7u<>CAv6-a$9fGHT6mI~M>2Osd)CYpamRteVG(N|z
zXFoYkb9-&^)%sm8Me{ViQ%Qhslx(?k@B*CJ>d1yug1^_x^%jB+8y^`ctPH&mk<0_0
zR+oAl4QJlVSesp^ExI^GFL~IS+fI#RQS9I_2oU=A7+d9u0m3^v{&O7?BHcr2Fx<&~
zE(NS@4zy-70q_l|KTqjeu^<?_zn&|c95BrShCPtjl3f#VPH<WB{lvQYxv&42z}~_M
za4F-GxHYm}BQGJC;E>qg>v9^vB?QYfFB>3y@9!7DghATg-LZr!F8O=LraqL>oAh^Z
z%iD6XCLHtcJZO(1PYn}~hhEOmmjBM1POnJ*pB6e4mT{Yb%(q{EJxg0_&bShamEi2k
zx^i)2#d=X$x#k-N<@i=*RM&l@_<jaCx~VS`5~zY&a9T#y6mY01%#J0f7}<3m*)Xln
z#j+8hft#tN>O_tBj{Ex_ot7f<NWmoj4y5P_@E@XTQSxEcreM*tiqmp#S}l_(MgyLu
zFUx*6p+g`(t67_KxIW+<<2<i_K{(v41#bqD^(x9$t~<xlv^QL&vWKvf{E45c#luIC
z^s7x1P}<_ePQ*KKnzvXoqUn=}ufh3d)gJKh{X35N<exa^9Fz1P8bb=%hBOMeg8u8w
z{BH;8dnKR%a<<0?7rgqhB|IMd(p|fX(<vF?65U_@EOOUe_*JCx(h;RD7;A!*t{gW*
zG7lV^w3jwkED)^jy_Lc6dLX;#tM?GqyXsEQ-dQS%hAe0!EMtyJ>4sy2|H!zh^Gn8a
z6HZMD+bOGUtBTY7Vsk=!XCGK*o6ohuDD-0YNr%q;lpv<_1mX3~Z>Hf9(hQ=C7I6u$
za|3tqC4;bvT;jk*W;!$C)pQQ`8S>=R+!{t<qw}1~b3qY(!p8l8Af_(Bs}?8~tzR8b
zHTg^rr(R%>OkV!hrwV+Q417BuVYOPiX1BC777=taF&4nqgb~o4wHB;{*3G)f(mshF
z2MQVc;v*sl%h~y$jhPG`BBxV4K4G0>j_G}?q^{B##LXT8*sa$}%+zon(SneL18t+a
z#dTHX|6_$Il0!VA29>a0xA+d-yUgEupLL3iUbsmOPgnC^n7ps9Btd)D!-LUEk2^5<
zu|R*lR}o?P6yCdeqJ6n?F2sYAiEFtUdic#`Q!Z|g!9GiCuiOr6RIAY=R{!J`v1$T*
zIG!5F!Ve$Fq0@rjK2`#=8`wl*{qD$f@8IgUg|6x&=waFdX*AI*Y87SFWIqBviC1$#
zAFss3>t!14eDU5QE}<{_35@<A^Imi2(nrNgh92}WEFGOMs~}1vAQv<j4*ZH9-M4Ro
zdP1lrQ0;-2okqY`Tf@OJ(!S^ZM&@`ug56YlK32zkZ4(Ux)>WUF^(0}psME?q${Z{e
zP4VSEa1fV&d-hRc36R7*zfl^YF?=cYW*RC6R_X<nsMVF5ZiY!Z4#M&Ho>7k~Rw01l
zS^(5?<k+A_22XZKMP02De<tW}Db*{dyFhokNr{)bT~2{<f9)YhZ%rL$u2pc6%6egC
zY&V8h?q+i_HS?LO)awaRF8ZqnwBry|>vp&O0fAHl$+4@Qn*QWS=ER!PT~^2Qv+wk0
zYH&Qfl{y(7a2;dSYPR`#(~G(~K->plnn?m5zN{Oo$3EogQp?xEdZd$<z-)+S^3?O3
z7=?47UcFLe+S!0&GLPK+xmICMa#T$(405?Vgtsr@?!51ZOP*7jzdDMad^vfvC`h}b
z>stkJ02e{b6w=dXKt!p9JBCujoB4Gq#J^=H@(IjGoP*)!U^yieX_D4pyw|+#dzLV<
zj?}ZvuQG-T&ig;PO0e2q?pCbJe~h|Iys!*C`A*j6h=ev*!zv@AsvyATcV>^1ORJAX
zU(>#AB29qthGyW*Z_w(H3tKzGKXrUxWO@a`JHh|^tsnPC|JU<MC%z?Ie-G<rox5)(
zwg9c`Lw6&`oBVjT5N>h324&S|b6s5&^U1F?Mw6^NKRAS%j?bI_rT=><CK<h%cw9gV
zEk%Ee-v7)ut|~o#h;BU-o{BtWGl5MIy;QmRM7>;}F0f6^DAQOJigNH&13!u+^NSv}
zvxBWEdybxxHxmo@ag~4nICw2BF*)-R7#DBk^8TS629!8M;4)LURKfw4L4+Lv8Gb0$
zCMcl?{^_$Sw-BPVQ`=5NzA#XoN_TF2W^v<d^DWfLc5}KdMJ*9_9Ay5&Z*?~mO4SRZ
zE7T_yRPN}xArAv;P4@qE41R)Pp#A2Kzqs3is4y*YjaIyU^{tRF%;=-=E8@SNtvAnZ
ziA}pXp<p|#fSU1-S&z?B5t2FRddH?(k++sXMwZ|=|7IBk(tNlh>jic1BsOUV)AJ)D
z4R?c7=rTvHx00`IMyL5BH>JXL>mqmdjM1~ZdTaUHBn9AOy{Gr&fB?-#V?#k)Rm&M9
zna@Q?V6(nj_zF~neq2h~m3-~$KZK(p=VBWjQOrpg3d|`ye(w$N9b!n45!FDt71m||
z3q8=`+-6I+m3r!zb6!Po7T<0^ue$*Clwr@MU0H5|xrCL?ubu!Qee*j&)_uub1z9n;
zR^ruwCwzB<o42vZUBTvo$ZD=)5~d;REqg;t7iDsPk?OkSz{h8a3v}*r1zcuAI%c^o
zm<Fw1U|E(kW=FqPd)`6ul$DUd0^-<yL8`X?IbD&cPG;D}{awP)s?VYIl(c(>*Y)!!
zqh!Fjz-zN{8?lxk&xOe_ev>~4{J;J6mC0RarQRQ{{OU^;vyIzPurPRcvv<^bq4j1E
zaQMD{sK6tWB|6BY88vZm*t_sM+l<m>40Lx0pP3?EV`CKQKSaCrABKH7DbAqq=jyHJ
zeF~tPEYhSkImPRvLb23l?-y<#-#_T4xb}mmOLCDy2Qri*@Gahb!IF@uSg$x}l5?5h
znkG%;n=<ODQ9P|{MkL7vbhqB*E&b|zaTT$eq+Ype2JS%lm_$v#YG?8F8KyEhy=ala
zXkkV<F!E%aF=`+Z;$R!h=SqGfHED1{mwYoYrsodV3oL{4X`6?`x}Pm*l7yS$GqZ<@
z<ee!n`E&qg4K8nBW9-|M9m2vWuE8McR)M0|LtCG?TNeRYh5@|mC^8d7d;St2z7Xp-
zbP9Sl><{g(gHq0KLI)L7$-NMOmxETpu^f3SyqZUrv>dP2$Nyes6+povx+K58W|8$R
z8vTMI5qml3W51PUvRQ)jOSM)j$H#%U`L2l+`fglF{0q!pIhjz=t;}lj4ldG}#QH^`
z4KWN9wIa04*;99HwsDx3N}xL*?}D*F58Dc5(IcL16b?#@`cHn@=m&#tOaje6p_qgR
zEo0}r;#<yb4zhnh9SGJ9#<eOU9a!5dsiZKZGmBv=7iLQD>NoJN`}~{XCw}exPup5Y
zOefRp%;U`F&S}8sXcY;1uKkt0NO5X@Q<!*QrN&!#VHI?4g5$dh^+0MK+jZKf`!Xe-
zwJ$);!45k5h@b)o>`!$vh*D&)8hVUlB7L5ig3@(?th~~VTs^nUQ>l1WB64vwZsLP5
zqs2mq!)*$HVUB+?o>mKB5e}Qat231CCWo66IpH6O<_(BQi_WMZ&i@ri{?6rHRciS1
ztwzV0y2P=P(R)of250ojjLd2$Kb0z!gq_*?Vv#?j=aP!TdR2y@@%u#t(Eo4A%ef+X
z|4#|zCW(8Eu67QT$0t$0vvlW)O;)~2w9&Ow2;D-NX)1p?<zu21Kk#=<YR|RS{b;PB
zYxVWg@|EXWmPZ2dN0|L(6M<TtZ7mOI^`B;r3CPC5F`{ZVFo{or{e3(k_KfG#?j>iV
zpLDmMYUUWbTmA$wLZ$xUlI$4;deC0OKRwPyp>;I(twzCdw!pX0G?`_4E!?-bu0?Ki
zs^i8rL1LMY*Z^Oz>M)J5d7V7~oN2Pp$epA6Lh(M3Ommiu9x&1-8xr3%w;~U6R@m@`
zts>gnExAUPn=V#!#O5%V&&n}&p=87B)YCF%mtB)3;?c*q1CT-PO|^+3x*ouUN1C8G
zSk+Gd>?;G!c2z~$sI*`*0)R%p8~p>mA4KvP$Km6*<iLMsa@>y|MsmDkhH_o(KWXEj
z&WA*A9&DsX>0d`k6C6vrqh#`8;mJgBpMWNH2pg=v_0I@8a;e1sL>~jXhhj@RXQR3i
zcfzSeMm>(KIM$E&;l9saT&&s#UPYVzmx6MJRe5eeEkrwHd4ilWl{%-e%NB}26`K`|
z^SES7k0p8)jq{cNRqORMMu2ssbA;hGrW^XY`6H>xIs;3YIK>WcfEfh><?s@L9{3|I
zOrNSqJ+(JHG@sA$u64GH1@F5kW*;{t)E+AVG8y-jR(>QV3OnJ55-@S_1!w?1KnVi`
zm6<q~*#kgZ2dJA@(2a<rTjfP9g6+)FmeqV~Ud+{jY-2EAkhr<2CX{V1vLF)@U}@z)
z2vQJO)f2EL8L@63yTa$Yk>e6P;{V6C_xF+>@yal;8~&!eL=kDb+$a_an)vAs+#Nh~
zml4ZPQJzgQQ^$dNE$VHxeYY&51AZSHkm&uPNad<9j~G74K~d-etl>C){!r$T_m^`e
ztL{o`4OLJc5tExFcxn$2cK~~;EimI(L#^I2hgZA{LvX*<{PMk-cF>upfDTvFhDpQW
zIn@<=v7swTgK1?Vtp;7J`$YpXstK>7#N)^`oAHnV0<^O735{a1dhU3F)hE`W7c-#a
zE>NK;-!mOCasEUFl1#gTt092oQ<U{S&4AB0LLR&I6Xs;V?t1U#>>LTzLa*=+%n(kQ
zfDF~=x}?V&zW(i6AxmQ>9?<2@evy01$InFidfNISQ7@wIR*p?NPDhs3oC{kPbDB(w
zM9hE*6N_M=j7G{FR2ST0;jn3CfYNNQf7J2)%;CDgw6PnoweF3ZNa*MSYe4|Y!0L9&
z<l%>_e_&hx7P;I!ztvotNk|+HdSQ5z5?{3(r2-S>%>+1eNGx^cxcr?3ilH`(M8^8r
zTQ>=`;-L{VDJxIvn>1q`phoo%&*kDhZ=pVH`&wd>;NS_<JH&)SeMgu;*J~;J3i{J&
z=V^T_fbyCv_3F5G%Q=0)mU+g}KnW&SC0KaZ_lV26g6Bc<&mxO*MapF_`GGviDMt5P
zN7Vs$-E<C^(=l?sqV&)ryvHaUD9>Wx)D7O+-x^x9C_qf7kPrnzjtHzM9Gdss+9HG+
zvY`kIr=Rp1i_?vB3pzJ9(FfqBlsyliU@>sps<b}-!>OA2CNCYfOu`*zTdoc=g_CN+
zE{1Q#mUg}XYech|7do@3j*-DCM7MD;@Bn;YT3i=!SM`qEP9J9=x7Ns@F>G-C=TQNC
zWtR;Flc>);U^1^o25VjrS}TPnJm9|{O$e3EghUsh^=dWc_a8lI+OU-%BbCeu8nv(6
z+nkxjXzHPn5!KlSyv)6ukOx~by{3~C6D~8uFQ7)E@n!Rz<y@z-aV-Wd37{O3f%RWz
zbh7ca{%@5Lxvtla_$56pU;PYHh%;EMI~*0q3B>;gp$ckvuJuvt3l+Qe-O&l5KZI)2
zX;fL{td(77{x*u1&bSx+>J%_iH@2<&gaRW@B!l=r@CjpvUH})RN71dk%jsU)4v&h2
zqP(8onaur=y(;?WQ4iL7;Q<(DDZgH$I=fy22S4Dxg!0w7=H^R#+-zQez~cq>2$Qrl
zy(3F%De=TbD~_9L30nD7#t;Brt?wnc0=R_tArn3Azh+Ek3|d=azMbzmK}v>_5mT>9
ztT=T3Y`eli_S7*R4Jh1daQ+jXN&T~N>6DtG-~ZEMJ00yZ&Km=QsY2W)+N-&Do-w5V
z<;0AO{p#o{#-kwB(pp1es-Fg5Hbfb0oEvqXq!=_GnEs`3QDmqiAIT^nfA;uL=lREY
z6^G-3u22H^fiQA=HNpiha9wQ_tm17j-$$*_^#&COZ$fH#o*U6WUNuQ>+sJDfC~e2;
z&9$5e5XROqr-_f}`C<N@Z|d_?RfhdH3yE;LnmizVX+00+6lUd^@Kg$p<%GkoHsz2#
zH8}Hk*HgM1q_Ad7zs}X(XI<jFo-G*Qw2afBl2uF|Cg<nP8RRmtS0DF&($yARy_$s&
z3mskBTvQ{~XlE+AB03<%8Vu=1uUIDJ=sAl2zk%tu&dep8@?kAU-EWrw=K4}T#N5^F
zRgXNu4uj{15;vni&dg9kMS(}2(fjQ6eq54;o)vhKT0KjV!16CXx=Kp_{*-qW3G#Zb
z(Fu7X%Y0XQDLhJxZc0dFX0Lv#irNM+%0sEmc<Jq)ebm3)izt%t>3$yxj(WTqnUj8&
z1l7#U<B5Fr=DAUGd6Xd0HYOHqRbxH?$JvUjItn=q*fVz!NlY|9W5PbRitNQ;v$J7r
zPU+_K2*u}}MeNJqFfY%T+*@vzYgIAKnRwvPO)H8hWfQN5APPkUvn@i+!2<g3w^rea
zAwa6>HtO8e*)SzDd0%ko=D=~`zwxd!CYAalaP@mGn-D;-HP2Q52amZoLjb(=4x#4y
z|BGclo#gUz+u-bZfgWFzh-6_;l+fDNXxjr~R0U>1KB7j8#I@g^@i~4X(cPkZOWEcG
z7hc%!ytP^E!`yB?OwX1aIxP>`JjU*W+w)0UFYOG5-(PC@PNLeafbD<u3(X}lA!N3|
z_*_KZJ))*mT-^`(;ykm}58KN3qRop=$h_r}S0V0StIN}-M%)0Irr>tJmDjiP{JpQZ
z_rDskx@#98=W_ZRAN+EeC>Ee>sDfzHXC;|~3yn+Jw1QTaORe&P0I3at36k#YWFo?0
zWMG`tKk#e5ZieeMp?&Qzb5vy$T0OuQ-ZHYBWDd2vFBZUgT}nU?at7#So}ji-B<=hV
zO~H)n@TFP-7Ib)A0psHPTXWlL27k26J4oo-ZICbX@6dJBUC+rR%sUE<a1Ck9wCcBl
zk_DZDyM^l!pY%fhfUn&D1Yc|W%_KBbF<O@(oZ~LixnSbtX37rWTjzui&*44SNn>F#
z)G#I``uy*%&5-yO{_jysB2T%8Ucb_Jp^lTINzTnOo-7bOGW9Z6JJGK?uDmARW<S)3
zXyp9^zGkGopkMSYq7S$qRs6=j7cM(>>x8_R@g@JbGXC(#jJ4FTaz0-=Vxl-zb#=a6
z;q@-4&lotvDm$maM29S~dY^fI=!2QxQUmaT=q=)k@l@?7J>~iIJ=+Cv{S}ayjnV;4
z+(qXdBkYf&vBz9thO777wNPtbb37e|uo%j!2Tn|yfrqg*%V!p29IAz-MhtbXfGKDF
z?9u<*o#Y=iGp2^}0BAT4%3&$I6Z2eu|1SKh<UQ-ekIbnXU+33Hs2y0RAMBx>v=_Dt
zVPjw166(n{jL9Tl3{-uFci;SRZ)R)OtaMYuqk$IL>&E^}#=2Y<F-51;V{i|W+n5pp
z2;tQzv%Hn1XAw&&WUc9DNY+(v_Dw&Q#A%}&SH*v^t}FhruDS+Y+XD5YlQzK4_*8gs
zdZDX;M+raX=4W6WEKV6ZpgCL*2ACtH)szIa?UjNIC9|iYK|&Q|{3?@kfvKzR^+#ZK
zQ4SqvBp!cKE=1`3;s}VD+*5Mi+m;j!dy9-;E@s2jnrS(s%#aq?U9z?XL?g+~W15fu
zz+3-i;`C+x5w+F4#nbxN3my2PeMEcj0nw9(eA%7Co&i)V-~A2tKFI&*V91tgU<70W
z<)@IXKFch#AXc#F0qrl@X7djpa};zXU@BJ-=$RN}ix|g}QteP?qAR}8N5m6dCh*Dz
zjW%Ys@L=L@al;RF<>Qyxku*o8kFU6_LnBTW2!Fq^tg)<=$oJU=19v+RD57`z`;n$S
zy;2E!ZX-AounMNw(!w&=Gx(&|&?Us;kTJvhISd+}^7h<wcgQhkw74o0S&sBxbLpL=
z`^CE3b3C(XLr+GuXLdVn@~R|j1qjzydegyEfJ(DVk?O(C4KlmcIoTE78)a+JvD0Gu
z5c!d^LaFVsix)5%fX^`OH|iD2oNM6=Px^zq%Bnzs3#T3|?>TgV5JBI3<PwW4AuQ=3
zQNrqb=52r7PT4L?oaDI0u0YzNPE$JmlJg&&E9PL*?{9$ZHlTY*m8#Tl!i_JwsGYvn
zA5{|KX~vR!>f~FUuJOw^<kul&q=Eg2{og@X$5_bP2ZDyqdu{;^5j-!~bm*)XuaOl5
zpf>V+#L6!C#5Q+ui&Mvh<Gx~y)zd&B&4EDa*KUO|r+yclA-)e)8i*;rLMd$P${YCV
zx3n;%tUx!2&Hi_24JSi9H1GXlxio}mE_LL0D}F|+RnMLS3#L-}eKE1TBgQ_#@;E^Y
z|AT9N7*^C*`^kzB3gE3_c;`+)k>+1?>uep&v^vlWU|WO9FGEoi1b8eEGw1r(nZUTp
zYC5p<B-$>OYKw4jen1=433MM8N+j;zI)s)akUv?MXtTDzCaUem#H#oFsXv~b?WBw_
zdAaZ|!^BJTaMm7f1G3Ie*plHR3=0pM#=wT5#K0%I6(oG2wq4yxjmQ?uSj<Qf{^kN1
zoS?Tj;`+|1ESwZ=ZVsEW`6)awmF&zZ7SVPFG{s4Qi?><dB$OEPITxcZP*7`W`W9#n
zapS@X6>AMFCNyf#a}-$9A`ewU=djjddPlS;sf<`#I$c?7Dc_fMn6dTW@Pqh3^;Cgw
z!!QU~(xGjZt7!u9pqEpk_#6v;w~xDp)=StuK$Yf{3I<va)Lx13MF5Ji5Ro4MoQN*i
zSl!~<u3arOK&wK>Y%MyWZ0gB_o8~xJZB`KL_m{+yT0!U4_607(z?|1&=^H06G%1=J
zyGRuaxWxZI@5*wKOigo~(PxU9C`iK*7+Z!$O7U=Q((AY4o0ZmIpHGRz<Hv*9@);oe
zYnA1aLt`vlRT}fVP6*&h#k*9eywd4eI<<ul(OE2gN&O*E*S}cByrH7ku)q$kV^(vs
zYO04ymCP*^0MZhlNfShW2BM3sP5c}EYfO^g`klF7^!%b05=j}i3me*EdU&>r3QuOj
z88xFjM3ZJ-rpI-^mli%vhGX3ar~~qn8v=mc3F8Fb>1Vb-fz6yi1J^gRFXY%%Nn!mP
zd_e?pU<TLg#xa@o1w`8Bt@$KrG|)2&0CQ<KBshv<GmLlx{RJg3Z+Z{i8C=2LfEiTa
zg?vcF-f2%92A}9$UXRO}cW0)nPhT33!!#;2(ocOBzyg9;WMb872t9rd45c>Q$mC!O
zLO&tLnrm}8<HyeXhuto>JPa4IbQ_P$Pxtqh4*2frl+bms6oISY(e&v|^b4yEA&yi?
z7U)r%uf~DNMik_m0t@^r?H?w#GKGQw9^95`WeGDj8O=igTMR~uhUroLS`s18>y-`Y
z(ZHfvLq$*?)5d1ZtLhB9rW*a(6Zmv>r%uGC_MQH>c-dLiWqAHdm>6<!B1g<D4V6ON
zkwe=j@dc<meQW+><*6>0*nytsWe5N*0M>Evo?qM@2D#KP<c-;h^icB1a3+ne2?%G%
zWP8B@oIZvEu<al&ZgCh-5&7B2?04hkk9#Au9wqa>mEQ%dF`vWRPoGiwW#y`F7ZV0>
zm3jbw+Mcih3@HJfDNWsvbOnY5+`KqxOgl}BkecI^g9*~v%M+Cd9vs1Q54&>@NqS+;
zm>-)ZP(TrRGrx7w?{;h8NCp^=DehM9^Z!hUfUmT&0_-8_5E>5N)eKb1?KQzmiW{ll
z(OV`%)opO~X6!MW)kl)bzlyt_HRz*Ir8ur_X!oRSL9PHlk{3)!o2ebl+%J5*7)PYE
zd&l8pR+{l^Puc?L)x_jEbKQ13adlb`8xXhxDo}E_ZaMT&eLh=ko@LJa-|r6oTpTz~
zv~*Y#>NPjueJi?^8)~#|zWLakbrl2)#MO0a{S3}XBhPJCF%)4vPxP`fn4TI+*_I}c
z)E<_^nKyWKOS?_pDnH@hsKJ*LzyJ#n&9eag#Kf)*0L20{2>YVnW7W&kq())}XEN(p
zqb!g&{4Mt?1LxM=s8>FL&$;X#A0P^xAQ|DMhDh?@AvX7O<ZSrZi@AR*gZ$&uN_zh&
z?8}r8pYQ1eAYtk;SDQmeSC>P+Us^rhm|X2P^Lehpfb4S?kbU~PBUx9(=tA>D8~1p(
zg#u0wyys$D1d;6I3{Ozhco}j0H8JuGFicMkR>KuN(9zZ89S7+HP{Xhb?K%(d(g{|M
z;a~gHuUP2L7aEc27MiqwVZvrci5u*Tc6{d1upThEh(LhPT}IyRcySsXtWVr5Zlw*j
zWn5hB!ghT|d~KaJUMMgzrN{(9_6u5|!}ePZX$ZV4T<$z_-n9>QMo&Jal|P4*U1|{b
zo0L3Ym&nt9`*2Jc`*T$%=W*u7G2z)OC53Xz*zpA0>gZ+eSqq9U&z9a;b86FMo^h^b
zq^&cH=E(28@=?PD))f=qW0uKk(sspKZHA6zd@t|w*|Z2lZ_HXP7MOyl1+r!#8l9J~
z@huXFYC03Y(Vi_{*Kg^}AflQOTd{8(x0QAlkIq4<0A-75!j;wwQC(PlfbbBx9HUcv
zrgHLd`ZouGku=<56eF|=FUQYISb>H`{jn$F5XS`F(F&FI6>1K{tH6pM&h8w*-~Z*K
zavi<FHav}0$Qh$6{4zXqJe+8#k<)kH3A~bs72z)|)@kD>jA}}&)Iv4H{PhDK84eSZ
z1vQFV$lzk_%y9H8KJClTjl(!_hJq!&tmmRzN+|556hGzGOk29V*?ZpF&JTPKGIh90
z<j9DwBF=|a=YFm_4*&J(-S;|f#9>=C+5$RV6kW0jhrMVE3-)tHoMg3^E%8GF9~B<+
z=@To)Nz&uNpI(MPcxf7=Fa@IIjNA5i35xGz9F9v|oQ$TgTXYcOo9192C_Su))A%CB
zCqMYbOV{C;-^5fZ?%=G3PNqCj)LVIS{hR7%xL^=Z;adG8I1%4m=I27g#7yXo=^=&@
z{u`GP`5hC@V}T3M<|xk-@Z>TD_?f?z<=l|Gv~&N|*3m&>55h>t98ZRZ(JpM{T8#5R
z{5n(i_cj4%hnjLby7Cutbo;|eT!AZw7r<x#`SFSc<$<^9qv8x{M}>|jO!eb7!#a~$
z0w`m-1L7&n9Mw$1vLEhhhxEKY;g(LF>{>Fi)JjR)X1phwzV0EyJoA3({84SP25E2d
zj3pl&6;$qm!==5n^4h#lF0ACCbSi+~1_KlJ5zv@}xP-;fk3}S5iP^AtNx7`5vH?b1
zHbaf<S}JRBURKDG66rU2{c^94*MiFJ?}}=#b?V&eEVcvm9Cv;ncSN;2qc)e{;<v5<
ze%qJ*qY@AIAet<?{!uJw<QchuAj-JK%b~mml&b=(G$UWFJ^cpLdjpalh7uuhW#DRf
z_d7$F3Z3iOo8Q}((0Crv(Ng;<NX{;}Y1>9I|EI6mI@$H)>Y_d32h2i2co$x<lcW!5
z5B*&2PQh{3(jF%hy8~NmLV2r;rZBiHu#s{3=Jt`D^Yg=RtMk7HBl*kEwjDh`w!Nw`
zOkk3ZD<awtc#b!UnNA4|2=rLv8j~9$KYV3xXJK5W+mlb?&ad-~yc?sN9&j<3uKdst
zE(lN8VG{p9*7nUi8vOaCM)WMb0&$k|m<Y5Bof#H`hM~7M3Z)Rr?H5<e*f(nHyVsxP
zV<mEj?R5JRRXf6F(l38meM!xnVu*(-M_;t|DZGa9lL_^6H3iEzDpb5L+6bYJQ_*=}
z@tKb`EF#{fSO>!K((8_!DT(n)548e0$DW0E>ePFG8JhGDs_iH*$%iQ=9ct7K1(E}{
zBcO^bW_s8iZ%)a>d6|p1P?cVF<}}svYP=l0_C(#_C*rhosl+&<YJz;*V#lPNy)0PL
zDF~)AjpT040q)jq>KafWWR~|dp5d63!JR0s+4O79z=`ltOcON&Z($S?et)%ll^xUU
z?9F&2{J^LG@j-!#LP_+>Sh0>mU|uCKl7CybtRXF7{fTx8Rv&vf7tA>zj7ag`Caas?
zu@gpr=6b@FQlg3~I;vo*UW6Y<(k^q|T8^pCFk}S315W`S^WMd{KQmb^(p93B)?+#6
z4vjh-r@Lc4vy{L@_R2tVp@ko-U;Wj+Rc->|ZT~mq5w+?Y3~y)+eBTt+5Rr-8iH~qo
zD)F#cq2WlXg~rg2R~jeqFm;8+82TPBzWK@c%6=CBXWxul+^o9QIPEN|U4WX&otWV-
zBe{B2-SeLRR*vd`N9pMy<Z0*}5#v})0ju99YhUt`DYzL<15fEb@+qxbu?()57>!$#
z-vgQFCcJtX$U7TyhOoSWlw+D0eiWb})Iq3rP9Ywu7B}_86>oFhKx8R43F_1rll|@{
zJp|vKSt@~iJ&8>PKKqXkWeF4qlAenFZqLUgL@f!=QDoc3b<r|XJ{4P~b~gC62^^`n
z&2p(53WX*mbjp-H6y)({^3eT|TQEqs1WaO6IBw|*u`0p2LZmtp#`Ge<xr*>JQS)ai
zTSdI=76$OX=TkCxPHS`$!9xYDP+4VV8jv8Cz;OFWNne6hKXom`dIg1UDy|uRj1Z=s
zdX25|_*1FKokFE|t=!zLOjLO;SNMqZldzS>A98aCg6bu(A|EAMKd!c*7yY?I&Q35Y
zdz_`h`ho?7tI@}p?z(y$rT(?YdWh`%E<-5%>SQfF@xlbs=1~7;Re7|tOpeP;T}Gd2
zcm*M*iI|2?_16i9WW(Q2?oLa<v*{jpg7)=Iw~Vs+)6bl8z}=+b+t)gMyax!;`p)U@
z&!OA-XHJ?b+R6>Uf3F;ztWo2m?fgvbOsYI5k97VHyt!hl4z`~WSfBO4Vvy4<$Ssh?
zAq)O-Q+xPvYZuMnH=fLeb;9F=u7O6<)O6%8!X67%^m3L!{nY6{A9CO;Ns6eAws;W|
z;`8@3*afcIe}*3RuHmsUIj@|D3*>d)y~k*NXT@CmoLL7CXY`dGULyqyo0(AAm;i@#
zQpBJN4qJ&4;AQ>^cr@d2Zni!;Et{yYI(z~XJz!>dFj=h<ccHE<Uq#6s3mLScA%KVS
z9`q(eS~P*sBh8g7s)*E&ok?x?oHZypRDNv<%oS%BqV|z&-9y<knTuS0i-`nwIb8LK
z8@!VZCW3&o<Cp8!(<#W{X%y#967YU|r35U5#hYK2+MB+6EoFBN>iNXqT?Nm}=6F)q
zhLETMAM2P8k+psOtc!@OM{E@r=-JJIpLMwJA2}JEbf0~Jk3W3W6tjw345~wvc_Dto
z6-Z}T)Af6=JqoWAW#+#?f7<@8J%+7*hYK-%SGYYT{If{fJ%hVmNd4u^K>mRJ+la(*
zm;x$$J`f(J>``12rTB7?@HOy)BX}1D4U9G{SE$<X+Q!yCSS5LpK92s7m>J_KpH$lC
z-0$ipzlW97b}Z$z<tb8kmq|!mjioanagE7Z8&iBabDSeNTMCS$@b3PVl5R9)klgp^
zpbN+3*I3Ng*dNd9CdZzzXu#46pGdBJ=^rf>v>miBH(e_dF1G%$VS1hK)%p8$V}NY7
z>7LcX_{2vBUOH+V1aRH;i`UnHb{5qUszsWkswJKRv2ObHU3*|&!<LLZ_qXo>;);%6
z|88t0jV{7{4-sn=31Z&W_zm{TDJcW9<s*IyK-xp*`n&1W@pKS<AMC@gD^!4c`_i`J
z8%03mMO4`oXbLfZ1L>0CAKA+#rS3<D2wo4{F->IdeWv^Iw}7D?U7N|-ejjdX*L`r{
zti;cr)SWBgB+~flx$Vo{wVF_F@7KREAcBqMLE#m5_Zo8K`bx(v*hdFM_8_ER`n_IE
zC`c!Qzcx89_xM$H#K@yZpmflK2RPQ`!Jqk16Qq|>Mx!xDc;JTV9O0}K1${pFS3;(-
z6@v#|-{Q0*iZDh}h}<6Q6yC!r=E9Ozlqs&S?`+jhQyZ`d&Hu!Z>3*XiBS&MVE|FMK
z*M0x-^klSO)9iKq1jOvJXExPw0ybJ6>6J2CuK+N+pS<+gb{5<D6W|4OB?2Qwt<`k(
zaQsP~9gt|=I<FRr*0+1tndyl<ynJtlZ&Ek-&M!RzdJyS|4*?|`Z@nLF9`RildPluF
zP-L|?w6@;;)^0<(is-sw1+N=sR|z4XwLRnya!&3O9RDxD?JHD`=MUx4*AweE28E+W
zo(mcM{-)}4%%>3C$JGW4RqN$;x)<nzx+To><zqmfoLpI{kTI(oM?><|v6RY7o5`fk
z8s%@%B5W23Cn}I=`j4^hF7=%Shnwf`W9mL;7X5zQ`@SD1_@&EL=*0{8ln~qf4GV9w
zM_irbz}&!7QFHrT%paj~8rPDW_+yi9*zpff{L&b?r*Vwkmuj*^!&LKZ*fJN6n;psW
z9Xcsr+jhj|oH_lbn<`N|2Q}Y!($#LBZkvC9ucy&ktn_>tlQO0S)7$xRx-Cp94Zisu
z-Y>@3aMjmxc7T2clG-G%K63PYX*B$Snxy-ZeHJv=NQ%Ns>En0)5-fhTY^mQ1>AHj&
z)h|ZGQ!oT|E?#?xUyIqkZ$9Tcu>oG9^Jd!yS>%<~E=mui3l8*85a@q<yZyJAzkKBI
z_L#<zo}CkrJ+wRjxVSpVt?8RG?J#Fz;{yMb^&nQ-=FWsYM<FP!Lg&I%-0=CzaUj{S
zy*r<~!E)H#XfFFo3nsWf>pshqAkWW_mrc+a^vKXE*5B%oJ}r<_F<0F<-r+Pn>-exF
z9*)fno=DS$@GzI8kl@!y7V*%Ay?)THpAUW(qou6%Y{Q_Kb9O-OKF@~_F4}x7ukpIA
zehje-zPYkL0Tu}f1XXsZl?VtDkPtB#Qcef!I7i&0Edz)CM7W9!<uRLy$QyS6_m{0Y
zq2JS@UN<8eHH3@j;!omkW;)=X$(mMv?ipc?bfG|34}0sPdfwl_iNkO-6-`FxF$D>t
z&rjV0Hz7gY;Eu$z1O78U36DKQ6L4<X^~W`?3cfs3bL&}Mq`<l_`y-{W<An1~{52s0
zMGMjoT#2jO&~8~|jPP&~88I9XpPu|ZV&L%yOwXCCiDsxVB!La%Sl7p<83s7rwrDJV
zZ1MqNQP7s`BZUTHP1)lUV0>*Y$nTi<4O5UpV#Qs<_MN7;LrV#f&m}B~FJ5ObaBwXY
zgojZ$+j?(vVJFYI=R@rpm(6^x6bOjGUIU<Cfm-I2OaXDydFX8ZqHe6xgG4KMXaP-_
zad&=Y2Kg7xij~JGkT4xTDoHT;X4>k<RnTCH)4;(C?zO<}cok?)xjEI~9OIDUjoVM@
z)MxhYM(KfqX-tU1&f)|lt5tV1XQcp6QiYl<XLKeU{WU=Dzgz(0wQeFT(H86~G?baw
zIKjGz0PP3hCq9_mhR=+a-kx34p)&!J6FQzeLV5JD$x^CIi780YGW(os^da_5=5YC8
ztrx5sj?c^R*tkTFuo1Z6O2+whMlE1cL_jLiGpUy9a~tZyN!pWA!LqMzQ{umz6yL>A
zhZWHU?$V5!<i8dlES&T`++t$`QJ_;1Vp<qF-iRK$VP6Otva%PLKj;H>hpxuZeRkD3
z&Q*CCAoP;PPI2&{=`EhlB{5Gs^%7r(M*=91#D=BtXOc6pR}+;eF9!T!YF$T*jx|5h
zTuI?>WsSIE>etk#H}+IkKcX461GCz<C}Mdz4Vb{C%l_^yaUDcO#&6_9ve~}~Bc4^X
zWOp}1_qn{)bi#lWLJ9GOHlhf5mH%Y6HHxsCx~jbah3F#yeiyF*u@ziCWgA{d@HR-!
z3N}b!NBt)!$TN2#U)3EV`q|+HRpK6dU)pcC1JN$!p++3E<?j?X>MjpnljG5|;Mk(}
zAbf!Qp>}Enc{A0ap-_1{U~ycF{bxW6d_|eXKj-8kYH>K~#)7j+5B$WEt{bBKXf@JB
zC#9=cgWrO}xm~I8P`RKb&QFIk_oWHrZ&EqDQEzb;;oIESTAK;`Wy%v05k`lq(yLxu
zWey;INv{7uORV1yIpS;l`lYWpZsk}H@!s@FCBH&Zmt1inqfoVYWpO3r1j%?eVTiDv
zT+P_M^|r6eB{F;rKDHn1H(7Kn1gg;k1=o7HI~$>E5OYU?tkU-ctA<O;Y7Fj*eb-`V
z+C?M5%x*e&aCzhp)(Y75818=ku1PsLadv}@s3Z<#oTc8@!}A1n!@$qj*V!%5EZ@*=
zGBRMMn;~4R7xhR7pER%HCazp%2k0Z@TOU{<s;gHO!%H=5%W1RWAMX>3Fwfl`OyFF_
z_{D!_k~y$g>-_l6&n1evT(MT$xAV5Jt~L|il=>i1Mc8z~@eP0Z!WVssncKMFF>sx=
z62;R^{5>wfM3TCeUO9JHHN|vbcW)vEkJ5#tKP1GGq!nR1QQLf1OE6Z=VDTMsc=9b@
z_~rVTh-?#&@3?`l&f@^fx171avhufExB^DOkh<Ptmiy3H$?@+`1^d3TfUg+o**>Ym
zrN?a^*$*H?fr%p~mtw}xRCBiaCeMDi=nVTy|A4EK)KsB$&A!}Xq-9wYH{`;TWg(br
z&?XoZc8hCyxRLdGTe#G2ToWF76^ywosUtYT(-0UPq&;ibhpd}$2~^oud)<Fss;xF{
zYgX1kMAqwUbPX{Io7avojRb03-Fk?GD)ijXFtD8QAjdb+U+@CLrfYO<^BVZmoe0a6
z$=6>pt!o{Fs;jDFjy`V|nj1_a?17xkOQeTkn+^~nZCv*)4leSTK%v-$b6QVwMTbbs
z8d&hh$J~B^%RaAiur@%4BZ^ip-a`kE=<Jk=gbI0ADG(QOItnF6x1rdz3h)bnC1!#B
z|B}KbwZ{=-GX-3J;bTJep4iNdVRgB5qciZbR)4O>kFQy08!nN9ZGfMCtj^rS!Ie5e
zg2h5}JOPcfBoc>(L5YO-)(?z-TRyAA0wk!STt@FX9Yx?QPs6~<ZDP;|IUk;$1PYNd
zXa{byBDT08gd`fmmn+KrbCekGiDPTn7z|z#l;j_lat8Bj-|3DR;16VJX&4LNrhcjK
z#yaO(6dKoZ9J#9%vLfW6#qQMz6-AVDdDEmqu(1avZHXVpU_|qJ41x3+R&^0y-Pv5U
z+^2NlB%Ot&W+6A_u+jZU<o6Wuh8LH8%r<_sU=_^*(MvG}@q5Vqq^=t{OM&HOjZ>3q
z4@$v5S+mnCUxqg!egb(n`j26n)WGtQ0XrD3wLt|q?ATa~e((k|PZQyCLEWNtotdnU
zP_%#i1469di@f`6VYGC}I?gL9Wc5MxUN*S%8hyWEHFA5V%6#&L*t}-C&-u^E)&rk|
z?b2#i@Yd!TIR5cp3Tt1A0g&kH2aaUnss88A%HE}@it-F;uH!Y@GV!!;mPZejLk+r%
zWkgDb-N;`(4pL9`92UkuiD0umwgL6H^FnrP(W~;zlV;FU&DN?|!9yvrKFg!Rrq=hu
zsjjeK9h5PrVTLQ{9F5S|p4NypFqdRGz+HP{-$TrZQlU}I7d^E#PM5pP%--;~nz1E|
zYS5<WT4rQF?t7Ldj8DQ+;}BOQnIG=l4(c9JR>~SfWl8NLNme*@&_lk8zgU$^yQ6h+
z##6wCx(G{r`^mu|^;0^c3`IosPKx7PkeadJNZuszAn!V`HACTp&<$wE<QQ90t;?<C
zF#}ma6I{XbBkng2+v`w*EUPZW;kuRl=ex4BfKcpc*Yg?F7`8Tx2Ev*?RU(&@9((%X
z>|p8)1YEsagpnm2_<bx~>-V?lENx(mWr(@jnHM?*rj9kV5!#R3NsH6;v!49i$TVfx
zl%F`Qn6pqq1YvM>Ww$06hDiheHk5D=Bz1UgeOpWBcLfq_N=Y+vegahCL$CQAW1ZGb
znS&xY`~X3#4HKAHsva5{(Y|Qtl_|dQ-Sp62M0`-t9DToVQtSkqa+*Ii7d-HQ+|0-D
z%7Z=ix(0nXN_>puL(wew`x`x4?cCPmb+kPi2aPa0z0Lnpyf(tNHd8#v6<Ck95f%%6
z{QgnELH=@-uGfnmx$4LV?GqNLAqRDMVc<^{#LlCjbf>pH4o4cVEm6dVk@?xI%bgOU
z-%^jhM2_CIJ0YwTcl-p&C0fZhOftl}&f!i}Nh{v=0o2KuCbQa+f_OGl1ud%hW;Alt
z@~a4?h93!8EnOg(<s=SeQ9CZasJ4k-0MRgaIhtaP>ytv7V~%$UBjKSBE!BDR%b>`j
zs)+o~q8uzIq~OyhoLtG8I=DH1Gqd)>hi=KlD-L04=iM=c>*V-QTu<ZMRNl;@?1*kH
zS-yiqu2S%0a<X)0)-|nDLM@yah57cz!-GJgj^Xa5uqb<&6dZ1~X@Oh%JpI=XCB8>S
zVFQV0P9<kioFao1vG5&@t7b{Sz*^?Puq=<z_%#LD)_=-aY%o6^`312RSZ>Ml?^zTY
zy7h;%i2~W$>lJbcHke>=urg%T)$lmSk2eYDVl&O<<=K#xY_-=iu8FRZ=W_7r-Wo3i
zdc584kM_Y}z?y9G_4i}$ZeBhF>;MdQ>t{(6e(r7Ym){-D-L+x?)0tp0e(}bR3l+nD
zzd*iD*R+T66=uIBeO7)cXA8ALoxgi)z8*9r<<BWY(R;LNDEkh*aJWofRX5Pn;Tt{N
zk5G;o{9x^&FYzWO^zmhk6On}DqXCB6hRB>j6-u9^q)$9F8kU7`y*(wh<E}#-(&R}_
zdrkS-+aih>m%cn>p{0_gVMEcQN+vZ{O2%-s-tC4uYu35NXc+oV1BF*3n$ypw3l=x^
zaGo>VS3vR8!bVdxZL8iN&IE!!p4`CL0!X{-V~TIh+cx<>aaRrdL!XPWgK(l2RX?qD
z6|Q*(G(?(z<Kp4XW+I-;JVVUF7PEX}A+|$DOebmxc2}HcaqWK9T2j}$*3vOO*B%rw
z6`d}_Ph0tBX;R9t$Ng55*7e{Wgf*`O{0n?>9(aNtY}_xj)oaPGhGU>d;$S?UwwnaW
z&kn#h58L{Y)Ux+&`NiQjC4Tq;ephaZYVlJ3?ANAj{WWY60YhQ7NMnO&RfKI~kQ=HR
zdCa>_w(h%C_@CO%OYfQzmdHYv2^E~BRxR$%%9R++E!jQbUrhOrLZ&P0cY7I<4b+si
z_D!z`-f@P)n-Sz=X^2R1Y;L}y(_U(KW#RjQ1bP06vQWDzu53HOFR$CzT)76*_K%y(
zsjD}a9BPkV@G!McC@Kpual<*3j(V~(m<XUNneupC7XHxRz#MkwG!Yxr)dZ?hH?Hps
z0u~J6zANC@!lHk8`89((>+K<L3b!rBek}`=%y<=n>c*$s`{lyL%%3!DpY=RuKQVQc
z3>j(gesnLYJOmvmfGa-za0%hBgR?{xTCJ&nM<u-T)FM*yQ|06-om_qfbqnu10i%Ag
zTui2bNZ{->tkR;$d~<euw1yMjGB^6w{pw2Uq*3s|W)hNm1q*#&4J$u8$UQ|KsB==i
zldfG8pwa_PDtp8f;#E~7e6h8NtgRHB+U5*JwnIebn*OZ{dC%Vjo@Y{Qd~uDynx^2f
z`)(*dvS$~xE#<$^uH+e6`35*Y!JcRSgx+5yPlkl1^R5pN@Po<5%?yKicPe;i7?Vru
zj8H$Pg`;X4e%sX3y@&sitNXiQfB*1MNM6^7vo3VNLzB`v?(Ql7RXY_|LEcGvqi-Nj
z<H^%x4HpXuvezWB`^rK|9&z+1#H0~Yt>lC~%kPhC5+Lu7uHBfY&?ifJ+PQ`twO8p(
z9e#$qk^2^*Lj08Rv&VqQ<+x<Fvcx8N*fmPFsA?o{W*ou6L07SW3&T=eeh@h^UI&4q
zMka(*=TT3^leh96S`;^1@)w64;15~PPCI1+`sJ;(MfK;{?-H&{ax(01b!O^WXG6}r
zFr>F2^8<?l^@S#lKbB{Hbzj2hzsu{eF(yyWLe35<uQP`!y!V3NIQk-L;SSr|4L~=p
zw8s<GK=QMuTGE_0qWhB1*M_sclp*%?#$$-@8Kn_{P1@k15I0Pt$^TgV`OvD<BtyFm
z1iWk9QWZ91+x3g0nw}&ItOhn`7S&zMw(L!cDz+)RCO0iOhX24anlE}Q@fjHpo+4XO
zUi8eMl^%52>mO0tix<%cC2&|OHAU|Zy2dbY+1*?mRM4IkDrgzTgoPMpLf6<zJX&{*
zCJh#@?{niWd>dBxJt7DHKf=y4s;O@8_9}=d(os4HNRuW_x&f(DA3%DENbg;WNI;}W
zm9Bzxq=X_Oy@VpYNePJb-b893fh7BGe9n9CInQ~=xMT3aA%1XVXRq~NbI#vfRe`u2
zXm%Yu3xYpLFe3MFBOvMsKJE$r+IarE$=cDg`Q}skzV`29dO`b!-_X(#19Z<E_uuUB
zRn+G-qL*W?9UjL0FY~Dno5!&pUC@Czb-ccQX3FFXUdsd&gF1nriSZ&Xy}J6Ha$>IP
z$EZ`zu-;E7LoM=FiMmd(k~q05Gym4F!!X#?u6@0eW^evb`e23-<Fgx!kH}ziD|SR!
zltpe-sv{BR2)qa;x7lBp5UGG4jH%lCRD!jv*ypT@NSR<iVzHm3f2>El|JQlCz4;MX
z3Le|`I!^yVzk)cikrq?<EsH#xVrZ;*`I@_vMi1GpyZGaqYASi3@dh8`qVLIuomCdM
z_?{uO=!3v%T(@{@(H$o<kKgcs*813#TfJDDKCAnDhfKjru_KO#fek4b)QNaGMBIp=
z#*M^doVT$OjlH{m8jj_gJQfMIIKBsLOb63t_&BAxc}U~-ww#eQ(tLQps&!-qsvd>9
zb?bTT3h)rirVI}~ANzCi(z-Z#-OgrxcwP2)B&^;Py@Qy{f#7~WuWK@Jhn>&BSyg^G
z{kCR3H%7hYmd!moE`rEW$P#c=jRdcYBI{u1b>!$e_xXc(zb6n}HeR1s$Ws0xfEh8m
z7=M0_2D$z(yPzVc$4yWsHG8<AiQ)dsmf`jS+L}v+=xKSKt@fqw{$aUZvDa4Euzh+p
zuiso|fboiL5L1A+RM9gUi5a)0u9Lr`jgvEPeRmhI$S{rz(HB`&UvDsFuuV)BV=Is1
z#M@}=wiWWP2zuXZb?!yH5r5YY+HGfG@9#l4rkF1Y6xV7sr}6nL5$+S&#nx;G&GFSv
z8t-M(b+ilYQEevo9>rztW&`i;X&79x?sI3j5IHnm5Ho5@QROh*#O`rWNn_8+w8<Rk
zPcR+k0L`3<wVcGWr(!rASgg1ASL6f(udd_3_ksqw3Y1Xq&8k2CL$<g1(9W+VpS1(`
z{in8tf=r`mm{HHJ-;RN|9Nu8a0*{|^aO_XeHoYbiKA&bqheA%a!kFd}ctQ08riXuT
zM=n<{WQnWF8^VTHW~mOU5?;FO(A$-cYiE?#Z%_HXobs!|8d(N!DzcWc;5Ic%XWS9Y
zp$z5Q8QXif`JGaGxg8TeYMN|X1iA~EYGi2=x%aaME**I6@N$pjpKIwRbtv_H_C+z~
zqCK1YuOFue&fDJJQ*U09p@wGoydfCzIuN{Nwd-#yqT$&AWzF_1N&0f>?o@#Sx-rP>
zQU-In08Q&ocKomf&Fe%i+PQmrbZW-uTlo*Q9_?FYe+j8E6qdMwps-=$_2Re5`5N2v
zf-`TBM>XQU&}<X-0}MPX{NO4hfO&Dd9$a(ztc}mi3_Wec@)jl$STDoMRE9<IzDYnz
z%0gr+alCcL=f8$ic$o{K+GRA&M&Y0FaV&8juLi&clldry-$t>>z<R=?9L7tXOJ@y?
z8|dhNenB?tgwsIgeG<ciQ^cCR>yP<v<|VsLr)7p(ye_5kh8a+uM1|?cSHiQnbhe%1
z>SXb8^WK9ifCY&}@dtOG94j~at4Q5^(shRrKNcX~3k&lHR(G}=zo0w*yjI%aYb8gU
z!-^=i#BCcH31Mq!o#eB_8jIvmB}l-3JVSb^0ed(AC%8ubP2Q5%?&&X7406Zvv{8H<
za_~l{48H-zDmQPkPp$~QFh?1|P)5$(e*ZO-Xo`USg`Mg5hLSh$eJbuChr2IYBW}Xz
za)Y!teEO;E06VEFJ??K;?LFTo$Lg``d`g0^ENMfRJfAKn*(ra{a+DrekU|*el*6J8
ztVOhiS*{r3LYvSB<H7xGTuz<?Fpql3^|0LcU*zJjdN-QiI6r=Jc%$mEO~3v)5ocvo
z<=CfYYz3V1mz2kgCFo);Iws(-sF{advMXhuXS{rQ@35~HJkOV0I7UT)6s#5QDHuzf
zXGQzb5hpW@x7R0vUaP_hW}<}dXvzcQU{#iO+oDGC6Ym>!WB7dbrS27|SZT;3Ljr&K
z1jeiG$nzdM(pFhK=^HgJ*@B=&<~<rSar`VAm%sBaP^fOctJ>{1t_UlM>zH&m+_7cd
zJpj2D*s+zl!03NQDvEI2*k2=f7ZiYBVkK)<^33Qy2dBl4-<wZrD$BEu_z+axW(;2q
zk;$+_h~eJZieLGAyya=DA4<yQyw~c^9Es&VcH0j;+O2f|Z4UlhieQuea)nZxR%-oE
za(i<JH7dTBOOa1SR9H_lDmQCFje@umJn%z4ZX3#gn968Oa%-!oHG275IYGe3n8<^<
ze$c~Rm_|ljGLv?CbQ9YU$I%cYrEIiHA$r?$q%AO2FhZ0nvvw@>fcEM{l=SjFRsnTI
z%z+d~+@Wp5{@Z4YR>@z#>lqZZwOykj{!{1}{%aVHI`)a&UKnFKtoWMQ2w0R_10k09
z04_~t-y}id&n$dYa_Y>_>nt1GgDgnHgpS=uf93(!E!I#c+{|x4bsRWukCN|h_*Eu{
z=OiqC7|+@xf!_ml*v6U04><nFZYQu@F5VWq5D;D&Ru_nsv08z-Z>hyS`&SSv#4qVg
zbk0HDhnMSf7a<FF@tP4UrdJh}8&mm?*3#b!)g=^M<;{DIoTcn!55aVBl<a6A2hInT
zWULRHomjZFGmD@9`XNkll2+GTbJmc3K7ARo@K0S<9!ksv5h=pZJrqJveJ_rr6~UU|
zImSfU?Osulyt!2|ow!UJ(4I$P;?+naIKKYk9ak57;{Z9{g1ac*@&?D1eiHsLfls;i
z9Ugq#d2a%UCVkBlwLGu*?np`|4qY17-L7opWt?AuS%{F&e*ea9w?{{p+t0W(Zi%mO
zuHgPc`tfEf4IldG%OV;63d1zvQdG{#b$pT}^_}~9%PJZNw&yW#{O!~+)wsTc)`lma
zOhn%?e_VwwZ?aqq=M^BRg<`-n_c}TeL-Bn3yR5!i3We?93AD~<Rx}>9_Z?iw(gmXN
zDv=pr^K@xq8LR~Mmxi-MG~Qm62wGFot!N6wLxIySNu$6YXMXJ9Klh$H7EApu91EAj
zUyaI3nO^pl`g8y@vS>Noo-tctO@JLSTV;Zi0Nuuyp_z*mN89rY2W<CF7cZXFZ`M;4
zC9n~!;S26h6Ewm5*I>nGGYDM9S#W*~D0BoM4U8-b9i&2Dc8g}|wk(ml8;fsUd$=FE
zounbyUPO!&NM8<G<zOvgInKFO&aja6Pv_zO7U5T?n=E<}vR_u(8Ogay>ZxcKl9w&s
zu#T)W;^ofJtoS9`y~pMP+js3;-dAYfRV19JRm@E?(e~ZBeVzTsAD`KMiwASrhGFOR
zEKjw_0b9WRN4>uCvh{t(G?|A_2_^d|H#qxJ9zA2(&%p+C06y=7e-7-)N_Fkp_mToT
znGP|kL#ZB1A8tn(=GVJQjgcp*<kg@>%~ZaZiWS<VFYJR-FpaP8ZW&B>qh|rggdmJ#
zY+7RSK~{zI>zrE#*j}1^L&3G)=F{^S=bZ&Q0T%;c+&ANXG~3Nq23JSiK<R8nPyQ~9
zY!A}(HNH>ovC))4L>$A&_JNS(C8Z@QfyP*uO}THU3E-3Uso*DA*-q*|^K(WCRZR{N
zOzogx7QQH!*b?yZO+ArVz0t4iMNEf7BS0Z>Qu$8;;RhL*zN=v_kX%D7iB|?_p&CYB
zk`cV4>+tBRe0hH2o9A`DTW7Rz`f|bOC6<~sLD#PNW#6S=QX>Jwr2)-&RApuGR(deK
z6<yl1OTLru?G2#dJ_R4wfmj?M4pS?iu;MmV;X1*G$;b+erqXM&i<kc#nhlP=6#a7q
z*SvDsRj$*Isde3B>?+`WnY)Gkz=EXue$_TDg1x6`<xA_xy@Zi(K7-_rl!z7!>ww)y
zk|d9q%4DgpHeTqpaLV4Okls+83UmE<wky4GeuEqydI@_Y4=5w;oks#N<@L}jH#p?F
zHF^dHOB)<yiyA;_N!rUR`q$+0NF~#1HoV03TlQhET@59GjaWWp#DgM89_3|Q&<YB6
z5KnJ)km1;dJs%XRIJv`zCGx3339S;)m*)d{`iz!9^^4pY^;nwKtS~FJw~q?#?6g*-
zgq|FE@H}{VBW3CQ9<CDQ9@Gid!GjOwrAgfA%0eF)MFrD9TBePSu?Sf_Gk}JjF2~Uf
zM7RkZe?<v37K}-0J-TpJR~a|ljH{U1|4amrE+zuX6jpCZ!uq7C3x%Q?nntTd<DK3#
zTX->6Tg>5p`$22I+dn*dk7R@arz?dl!YpUsBMtXpz&Sj<^|%lzvw#`i7R|<K2CP&r
zVBk&js0t(jD_Y+5RzTM+qbHzH1wP%G@qfA~-3p7}5~T*?oEt^#_%IY}h6`~!(^}0k
zw{~dCxl*QXs1AyR4t#AaSfg9EIqbb1aOW?DwVr#7>8xVJWEDHBPc$`CPjy1O6&`hF
z2nzN|{T`QpMWn<=tz8(o(lU5>mk-06pw@rxUm7(e)1sO#d5kO^xE7Ucu+hiM5Y#7*
zjZy6_kNR>l5JQTN6nNa5{YsZvz9la<I=pa|{`A-zQwBtyW<RE*-gR5Q0WG7Y@G6f%
z{^r@#vpO6*GuqYlkA6~ES6dHEOCf%g3^Qdk5g&j{pes+Z=P86%aQ^#ax7a7{Tg5ax
zr?5%H6J;>O5?IxdUb_(2nDuh^{xNw^fK^EX$!!}PV*6Yb)S4a~<R6*G340u7MKRU|
zpQqmh{iq)?B93tgE@dFj3~>8&VLQ&5e(3N(CH$;fWvI!EqPs=ptGGL|7HL^p3a*+D
zTxX0fR<-Z$pcK4ahs7TICADopfB<@j7R->l3K`uSPZ~XyJ)wX;jnZ@kcFlPmTdXQq
z3NEAPGt&4A9qQwY-wjB6$kF@3yaHF1V9#4U=kuqn7c-zVJ*?a$ir}BI&!?{my)r(d
zouATC<@^eydORX5IXBw8yQ&yLZhgS<ko%mSpvBYikds@X)Z|-fHJ|M*^7MqhE1x;C
zeyb3kk_RNdi2E4ujmkCU=Et|99sJmUh?T0_;j-$On^CK9rB}2a<F&8*-bEgED4>Y^
z#z8T#uS9|4%A?sR*juihS~#XtwBsjMv7=XNRS+X;`7QQ)(hL|!28)f;YRvk+2<KJd
zmFLKmX>7KMt^jPHOhkG;-ZHT2Pf)`GBq5$pIK>tgkqnvmaB2IZI(%Q701t402Pk6D
zQGNrxTUOtz^zryNruI0U!JQTus0zDGs|tm;(=v(z3HLPJr4nHI+L0B`TM+-7&e#P5
zBX3QuTv9V2r8Gx>uGv7O#XL9z$YlWov<hec2+&S)dyH#I7);?|Q-)C7Pa`;^dvn#<
zkDiByxUFoQqwh>PCjILW8V8(}WS7pLoYWZ=eSH42^!;JwcfU+nurEoi>rL(q{F*g@
zoWDkN)OiPuz-|C~Eo%bqCe>x_gsm5uoPYlbU7}PA6H3jgoDv?O?9mg9eQjs=A@18V
zDrc3Q)B;D!h4)_%Zzk<0OIXmc2^VG35d*y$Qy=nl>&XOJ_w7oVk4Q+9eJj9(dTani
zq<(Ofwh`ZDdD@xCpLaB)G_d~gICPTU(nvD{JzE;#bT6dvu~bt~BK7sdJ*WMejZ20c
z#b)Y38S-3>4g*mvNo-^`0u7%p=Vg7x=yMGm#hlO{x^7Gt45n=#o<G}Hb~FXPeAH9{
zyz$s)VxLLD7VQY|SSJ(vdJ?gCM>5VZzncViO}hL*RnO(wn7ua+Y9yz%^{a(p;cYog
z!5<9Uw$cR!U<d4yS2${IYVEV~Tfo!Mg0JS##!7#~kZ9iU3oJ=)oI4glA68jxRTy72
z!^1L~tkiOZmUuDjlGv^>L_PSkjE+6R?+YN)yvP55_wR&VDCLDyMtr9!>VV4s=XjQ(
zBWERDN#)u7YP8yM%t)Xi>cdp?Cu!XAxKW(q2n>@BD``EFL3o>>pLUCiRwDy8=4u+(
zz6sFXS-VgDm?)n&!Kh{1;8vY~CYRsxx9d1_Z9-D%n|Gv&2d}ed9b`YW=aA<&x~uyt
zi7e{<<=Ot`jHJo<mBXP0)W|M*VB}f%S<l%NF5w(XK^%7L=0jP$1Cd^H^kv1YZ*S&V
zU;>pq1ymJ6D<j*D+zM|+tBR8!Zg=zDoVipNI1$CPRCZ<bzJL1y^Goi9k|*TP)P~jQ
znWmyt$y<qRW|OOXbv7kW>^~2rFPR_0Hk(MAyiD_PbNGVDFxI;31Dp-TogvAG?33)=
zmWfpzuNh13ynV|)@HWLW585Ar7byDXIA6p>+51BfF$Bc=!PLG#Z~{O9r`l*gn+;Y&
zQWM>)n&SCZUhx@Q`P=&#@5i+ckJ(`T(&oLQs|oyfI(xH3EO`M(LBLh#EO3e7(W@sE
zhct2bbfQ)FbWdT($n3#Lt3K&!>_o|7M5Q@ldPdYpV5(<1Out7NE|rS6U$+|Y@zt~c
z7&;*B)sxIZiHZy+uEtxGz*MGzObqu_DYO2=Vp0d)iI0V>=93`Wtus=EPi45?CGs1;
zk&qR%BR?}X^rce;SGlSk;P)0LHoIw7qB=F)grXQjHrL^8VdB5>MWsCx4!ab6frmvE
zmP;wUE6~Y};?cWGb5hs?fF2etEIw1RTKoL`d<!CH<|&IWe7!jV--^}P67Y4sBvpC<
z%w*9tnkM9TcJC&PNH0b&21o-~W&(v);Xkjf12Gs#vRZ<>sxR`MXX&+|1YjXh+Wc_4
zzxqS9{h^<ojZobZtyO0StiZu`tmv?D^0#c<bhd#`FcsIMxA8*veBLimXQQc+)lbD9
zNccJWuD`c8>`7p)e0Z3c@C0@w0n={*hpIBShPAX%bh8w=PnW{ZieRO`K_8IUg@Fu)
znMd#^MUnGjht9rHb7=2RScC}3KcnJ-aED0B-2mQRExhFZQ9Y2*WB;B`SDb=de+>PE
z(B`0~Cwgr8kR<vM#AAg=<gN|g1ZuqXmv6sP^bRXLb|(8L`Mn4fx!Px4;wY35B&-48
zMP5$rzzI1kj=po^V1fB##pzc@SS42sN;3*yEx&Gz_VkMfj`5w+eBrmP*0{!W_k?tT
zm9!NOhH@n*-b-#sHAR9}wbb9a1p}HZhN{X@vY#5T&WHfzAxKVWJjHUG#z$>K@&IJD
z?}+3f0q{ub8)YtM*(PiF{hWQFfT~NaZU>qKpBABcqm0_^jn8;uW0w_Z77wpGd^+1F
zfs$zF8Tpsb11;+$3A(aM0yfDI&j-6gB31?aw!eDixL7rk^NUv7_SvV^`q7;H+YvXE
zj+3(8IFx%K8jmi?-97FN(NV5r9)SIZ`9tA&p^Zvbxsy|bYUauI+s9@<Xuv}+ALsTx
zvf^sxz|yA9->?eYdS$K}*zcv_TF48n3u1UJcbtld<vZI~*y^uwTlsmGZmu#8U^__w
z_s2Q6uW6*U>4~aXJS%{=dhkAqX`P_fbC^UV8MM>vk{d6o50SOjFKpvC0v1jlrm}nH
zE(-1nWs&fvM9CR{?G`+=H|?GoJcY!;O_=Vb_8?x-ZYnwyR<$E`qg{Sbh<CRIz~><q
zEi2({2V{q^UU+axN<!qo26O%DcmP%g{1Qjz!kfM#;P~CDQY{*KXTxYW(DDRh;|L!2
zWBkD^;1Smip@|i%c<r@;+FM}-I0pOw#F;do-nv3Z?)>Ac`r8)Gr!Oe{`}LphqbBPf
zK!`XVwzl=;RLWuDrF-c3z2bRr2r*p1%`MD*bGK+&zExRld6<;mR`j@Mm$kBS8(}aK
zd;)XrVrs^Mf{BqFZcE2#Uh}6Q0BVt%HTCtz<iSMxZiuE+{Mw6y^8-@>;#E3rqho?Q
zFS$DndZUB_>L)ide|;e5zf2XTB*+&;#b|%0MT}u(`Oy}YKTUrAfCOv`a!;YA(ScgF
z5l8t7MP911scm>;2U_I;boMXer&bd3D4)<zMEYZEaNXGi)cTv`X{K=ZHFaGkq0fHr
z1XkQnZeb%sC5M}zqM{P&8<#FSis92#PecL?3GXU1t+gM&qw~fr0}7hjz5zhMCLDyJ
z?CAOyAzxe@F5?AY=A|(!O(u_TQQ16XQ?-j;E&rYZQb0x;Rpw$+HU00$4k1|QcbWIx
zT24pccvrVu_{Esi&M?UPjkKZ^iUn9QEK}596l<XI=(`}opi^8}`Ot|eM=lfxaExFD
zz9RIvpK#t$+kw{9dZHjx^9n1od3+^eW)*XI*0WM*g<=^j^w}V8Boc~#;Uvu_6U`O)
z9r+V5xIbKj{|M$^0~VfZ70bmj$ISk?G35wLxzD`DeDK{lui<$j>}CY|yeR>IEyfzL
z)4#C06%|JFTq-iV2l4i!jhgTM*#ih^>nrK_Qf}vg?8XneU+fN|r>ggl(SFA(u(%S;
ze3im^(>6&$YOLb-x5jgL)A{Bn`R*!1qbggZm_RJ4yiJcZY<rJ&B(%Z~n*I9;YA*rX
zn>E;P|7Y|GwI$4H8ZZjAz1%7uy8CDmSsCp6x0MGpAC*?VTwz2OJ;d~Fk7u7fLDf{k
zULvzW8@&mR!0&J%PlHBI%ZPz+C^~#w%RNw(=oGDCAN$vE(@p?#2G~p}m3z@h`XCEU
z4bjDQWdoS#Ab=@Y$G?Nx5bds-u+pOw_0ck|KPqrdot4gpz@xNCy1B5T7oxJs$7?Iy
zWt^5^xt^LP<d|4H%&KUDHxppS5gi%uvl!O5`-dSq*Ys<Z?C=$^QJ-1-#GN1PWw0_B
zl4lkmJ;~S>*0Wmf#zEe^o?kSW3Bc;65a;l_tFpjsXQrQGedl0h4vKA!V#Wh#%!|ix
zY6sge28T{_)R#4A#EV{XBhyRyw`kKvT2*<YBMB@k_aD=*)ngU#s1s3MMM`IhmlobT
z@9kt19xLL{TFGiBJ~BAdtOqb($&r6Ylt`yDMVVb#_27PRnRagRK)-$L$#X%$>{)Ht
z>9rWa8+i5`wLZHCd$8^k(e?|DD}FM{nppQsPMNx4Z}}3`G^KwrR{)lp;KQaBp%Tyt
zRmP8-;%9=F8RLR@-Ys)EVm%L~-L`(nX4p95%ot3+(k;_F0!QB=x?0l?*n=T%eIxzn
zqxie3v!(#qHbmrALVV{h*f2VoyyBijAXz*dzFoBNGTz1>Cqs`LaWa#3o51r-JqO^Q
zjInz2zG%Zu01;V{W<{wHk#9Nna;sdoPY%z#gl4tgHf^O+iLwo7$=zcH6PKnA@6NdI
zy}n?ygz6rXzY*_osXNYPYS5DY*n<f9YaTEjF;4Fh$HUH(5xJ*K{e{lwj45!v!#jT9
zRU#XOBY$g41K<YkOi!_(eJ#{I_$ULJF^~PUU5LPpZO1^d%Wa<{2?ZNB^R~e~T1i|+
zL*qoDN1<n`^+eot)PK^%-hkHJqy6b?-`^L;l{4ydWiSd(P?dEMYyZUEzmt&&``<a7
ze0Gl@&k9Z1u1GK3RT=J2UTzGmV>)R9ogFcv2L93lJ>d!U_kTx|e?55oKDj2dyZ7dC
zgVfQoy}d;zEp*LRGxW=|<$*QtyjSMWEo<*YGcv~y%|VN;gGJuc{(#>loBm5gi>W{4
za6CocF4WuND$RTv&v<95q-+!R3Zcz&q501@FsYWHjiKVh2Kg$aB|E0*Vb)BtaI^nN
z9X}ZBRS;84*U+1f?(yx1dU`%R&pJ6Ihq5GxD>#TGNC*uo#M97ajhsnviTBI`zv^A8
z^lhbXSTMULFXvxc89t+x?fwp2n8eAmbiBU4>T&kb2`>vi<jZb?-xVj6JTv$EvU3IA
zwm$O6p%X~tNp_bD*H7liiuFo6vys3&;k01sAn<8#p%L&byFzJcatl(a$4|<#>z`U6
zUrLR0tbr<631@DB5<;Pe&L?9RcrM_>tiMoN)cc%vgvQoxdiQWGveOPl4qH;ks1EDK
z_bl+Pbjp24reyvjQ&2xn*m|K>T?kch4ieR86Y-hHhvB&!;Go881i>Uj0*a0-`Pht@
zUMy8w(ObolaZJA4d$F*cY#cE7_ulw#;A*$R9qvy*h3;Y83O4#qX@mQ^#XCP&Y0*XP
z52_t+M0#^ZO?;u$e|pzObB^qrX<EaW#GYNF#cf*#(6vVxtChj>C=~`ZL)8yAl-TKx
zAoK?z*>@@S*$PsWwACr@U%ACwp{SP_i8p^ZevfN>X{0ALgu8$@IbE}S2aJ^a_4Jjr
zTyx{*$EAa^zq&{2efFC!)ZzzrYJrn8vg}V?@jQ{YURQ9!{uU=Ma;k5^Sis#s7(~8>
zt49H4s5u;0b)pOxaBcny12gDei`9<+S1q6#o^K^+s}fe}gRz-JjAqB?gDI%?si6tv
zqZDy2Q{S(ClUYo#Dm&%OPw_P!6D1P5@7$7(vxC{Qs#suY&<BnyK!vqa|00-4kW8y;
ze|t=B>-Hx^_fktWtTOET_*<j5t_UFHYY(FA=i+!Xfb?bhc3c??(PJ$Mt|$f)?=3Gz
zog&06Q%g;|f$yz-62qkT|9G16zy%5bh#U(^Dqn%3J7`NIWmnmZw|2Nzze%L@?hB{O
z)&?$|S4efu8gT)O*C58YHgZ|2%Z2=z+7R6F_Q{O<R3ti;_t$>~<vKY%f~+hnc^vEW
zUG35R9m)v@2WhM?eM@?Zi!!z&qepgPW0ZW>T1s1UAz1%q?F8)XV1B0H+8fDz>i}sT
zKg`EC;{H<MKCyzUZ)m<PjkL&x+~#&0Q9F$8k-Q-vM7#lVamzh8A7z%<qzJY5hFkBr
zW#4tIafgF~DF|QwlA?oCW_}C)v}}fUPU6gol$Aap&ke`2B?q3QKGY~BPeI(gL@B;j
zS@ayU4BI^H%YK(vAvlhkR`=9#?5GkR(=D#))N(A41yzzp^{9zUy~KXt0eQ<w&11Zf
z)&$W0>%z&b&iW<i!N;feB-!h43hrKDs{PJ_gZ{Hx6mBFJmkeHCfs3S>M7Yp7@vZzp
zl51B^SoQX&LC^VGLp+|~?rSIWHylV-4$xgwJ%!}ei)W3V8DaBPe+J^<L>51ND8)0h
z5+NWq1-|Ab_y1&A+Xy3cF0I^(m)_NU+IYPgRqkG%b(m6g>5-`Bbc3}$YAplNy!N#I
z<4VBB$H-7x$Wq#&M<ZrH!bk@6aSjhNbN?wIyC}&MLH!k%u3dSTl>$GEP$I~A7S$m^
zSHa&#q4ly~GAiQH{^-=z?wlAMQjl^>Kx|pWHlU{_rQYNj<?EIH%f{)5x>I)0mW3ve
zbENH&&{?Ms#Z3J;6rfJ@6|qYcQQr1XVmC*tHuHzrzpgo5E)(tl5|H&j(4Eg+QQZy&
ztS@|$LZMnDWbouxxtPAsM)sH{27VuGo#oyu-ToJISB($HKeiBG2SIK_tDj?(r0?NY
z#y}cATbf+lwB<&n{-@HliRYa=7bM+723X{ZW%>Rsxo1iX=0Fd$4{D8LL_zf+;rJqn
zMh4`!6=<!8<b(IZ@c@|E*%WSMdUigyw9VQA#Oea55-bSd?xxZy4j<sHa5Al6EQCDP
zyJ?fK=i=BJ`VbUIj93d+?ah<hYNjmBMnj^7biP{1uEdI#_Q<XaJ9k6*7Dcl$n!ezQ
zXSSCSc$t?Hlp=+UWc+@c^G$y|S*Us8hg!4l`H;7d&Bjj@m0q}@e$qPJ7x2ix|AfkD
z+%aUAcN%J3j(tqbU^BEV$Jf04k;HOIl9!JpW&gVpmm-CULjR@H*f6C`<Kj9$E3SGd
zvQQ#lGXP^j+8Pg5BAYUWG!m{?@840^5>&jsnmx{v5{c$uAtm?FkGUee)1-i>S8h-*
zQwOh~B_Jy$tACE+hLg~`X+GV_a#VUN5KB3Ot)3(FROek&3b^+rDD7xt+uus8T~hhC
zGpHy#ANpcI)jz4Cf|03^^@8K0z3~kGgt@)_jCSa0J(w|6yt3`I1D({G)No)ioHVT@
z-OkoMhBQfo{BxC@=~9f*BqI4caBMB!Emd=BwUfQMrYl|!Oo5w|Wo<9fY~P$1%yS3F
zoqWl|5zw<}BzRsdTyuT)1y&~WG+`K~)NbJIyyTCCpSB7o0OT(;dY;_*13R^Szy5Tv
zuZ~}adXqaeQVwf{jI2Dr53${%ZvW+2jWlw@sQZErfbBSB)9wB*>0qosW`ZKy1UP`<
zD!1FogZDojnF_T}A@Hc}S?`gq$hh`Tr-ec<OB!e9e%D&+MP8V9|NhhV7qLUQP_wZk
z%WZdMN9wPpR)J>GQ3veZv?7vc-l$41_;m`GyP?g?bKIZQ)svced5=~P`-IWikUAcI
zMrIqOzu2!ju3wI1)c&yZsE8=AU+utCky2Q2LEoNJ$TD5iGs)!bJ>}JpTgEEuT|eF{
z1^+uXC?@^Ij=+2vf@7F3n49GUeirT9!(+c{VPh3prROyad3-($K$_hT>$0Rm1(;EC
zk1wdfzB50tK*4Cm+?9GVivO5%)d44yg}0wtCVi_SQFnSL);ssUzMqQw)U0s2N@q8z
zZz*aIl#Lb7xnbvX83|9%EfQeJwlA)mnG!d6U5Sevzzdy$%|<P0Rz6?xN;!P^(!{$S
z;9n4+*o5%TLf<Cr>@!Yqrr|%PiOl7NeK50BA?>wFOuT<J)cvLbDwq6aaUi;rwPL3t
zvapb|eARbApzh|~20V3k@o(5c*WrtqnR&yO#j8SX1E#_LawaV+%zMQbK{(Qa>CN@V
zoeD<(hYvAwt+*NZ$jjf}jGVsp|M|Q^uHOv5RCj&cF#3b%oA<OhWTQIYp4nN*aG&FB
zkuJr2C!Knc6cf%?oa@?G68&WDJr~UgV}5q%(sGzAaHGkEbL+63D5uRzbmny;cbxoZ
zE=IL(LTBOv;!k&a9?$n>pBC0Hr*>EoTddN<RhG2b3-8cZ)+c>ilfe4@3r!^E{i{2G
z6rZG+$Xpc}uEte*RRLoyz<~yH1MKTF&s|)H{NlJs(xC<~@3Tviaa;lS7w*!5LazaP
zP4KaW36*#EZp3we;;zGom%iGcL_)s%l^jl7chv=ZN$#h=ibm3LesO>?Tw7}0G_eGE
z><U1tdG8l{Y@b}0Q*6+~2{+skI^DHJ?Bv;7(Z&PVMVCX>ee^xoK(({THDkbU9GI)~
zX|9pyoGW(nyrr4|Q-$F7I_n!PRR0c_z2pL&>xUX6BbK-c9o!RC1(=}^`34$n-P>C_
zu(OZ_6$w@V-2=ZSyMH-Q*ZrkmiNmGmQ%&;*eU;d9J~%CK7IW016bzn35tbK<#}A(K
z(0zhyWKYte!2tvApUy^E=gckw{*D;TpUm998#5}Zi|9ltnr?btZI4wZE<pO-y5bUT
z_VB<jJGs6-dmH|&XI$K4<Vt@<2t$6ZT2isyB)Zp6U?qrHN6>S;TD<ny>LltJ@#DoZ
z#xtuAVWJv70r%LWw`z%b6i`eL_QVzRsFY`mX?d5_I>B%^-U7UjZgxz#UXLm)a~l83
zt3Udb*Sed03O!-;7E-6Bfr*r*WTY))@OjTIm!EPR^CH4tt$PiG3vBW;zjz_7<oEy&
za<IcAmY6Ci@dM${UVcU22>CKtbwql!(EX=%SHV!>z-=OrN?#6+SGDyN!ZS}EGLNrQ
zi){Cwq>I=Mx_HJqf-wLI)QHL-A7I$kqQwFQE%{KWZ6}wZiCX{*CQb~uE?`ByrYeKn
zZr4z}t+sHE<8~SLFoeAiRt?tE%NYZL?m(a9V#M&G=f%ow@iC$yup%oC5@tKm@TrD>
zgOzQ%$M%-}y&7(&9r$$&?j@a(1k=kud3xN%!ww#B-_FLTRtDLk@@QKeLOMYUv~Bxh
z<i9YmU$((oG8XW=3l$ah%h84@@10J6layw-Nt>SImRRvtC~wHQ_ga@kGvf3GqHa0b
z0yKL}ncZ7rX55;}pMBVIZ~L+|l>ONNaRx`2wwxg=Erm)t|NWW&;dmSfwfTn)G_dNk
zUJc9XH^cYwNhgmEbfq5hI(G>cEbR>Ovo9vYgc;tvH74a39lEk_Vn(F4mEbG9Z}oGH
zJ3-R#W2TcKJ26M5+E59>{yKlUp;@PIFxH;J<rf_d8MWk%fft#ckWSm5hor`Pm5Cv$
z>Uw^hI%ggXJe=lJiM#@coql2SrI>ivm?%co3-nSn`YTm0|6LA@)i%}P+gXajL<IPa
z0r$kQlcvI}ra(jk4gB;X@YCHef!tOthnbz(2Z5iyoK}nZj+425XT=;~0>tP2i=`}+
z^C_eC8J(8ERrVnA82~@zc*<Cn)bA52Tnv%D-t6e_DR)3qYd6GLUmDNcy|?gEYs$bI
z^j&)pg?P#?bk7A($z1U3&xd7pVdd0PtQ_N`oGV*u1Ho`e!<9B?==sm>X;4tS0U%}T
zudZ^Yo9oRzmwEVg{hXHl;ckgeIld=j{t5R|TFhzlhTP7J_ywtegQNxj$EYvqE#O_Q
z8{2C_umDsB0+UfrcR<zV%Db_#VTN!0ukmG+QWD&6()W|}2bH`&D3Z6;-EIN++jC+f
z<Wbh`vzPY%60Oes!IQX3xhHj~h()6lc>G~wnQxZcWWqc?AqH4i7p51T@pv}MzB@S)
zTL!&scU6ECw|z@qA_fB9WUpA4HPy=s!M114-{s$QeQ&^CxeSx9je;C*FMdE0tEFK(
z#r1eZN-&*zuMc?Qqf&-6u!@g(RoBxn>r&so9gCe=hYo&v9LZSkD1e|<A^eV9x+6~9
ze@093#LDFT_1CFQ=(u9AaiPxG<_n%T)_aAr99H7@$j4K=#o(LCUwRVZ9V7zEdo{<<
z^-p4)r>HXoh;!Qh264`;Z)fRxW`A%_I=#4H*PU~4-yF^}29N8|dYTbcvf}qFz-{E<
zi6-c2W}0NH2;&!~JvmkS;gd+k2}l^-m$_K&i$T<aWUhOnN-I#mA)*i_i?`zowdTyz
zuhy1>^n#(3Z8YV79Op8(p`X8geIf^D#Vhc?_wa5Zo#0u(h*{SEbKqEVme?W!yPnUA
zb3j=`POL!_{M_W~+S7Cia7;b_Gr1Bx{*zxtU}nho6F#EcIueH*i-t<evN7+o>7$yk
z>*V-F@~_BM*Sfi%LGP6Bx@lmB{aU>Pd=~yml(+Q~PneDDYh9z(7pCm=4icQ|w>V5l
z&y7m*q^!(pi5D9T$qx2T{d|`1fzKP{yP*!v6xFfR_QP0!9Z3-e)?l(CX))pbR|>75
zD0kKVJ`<5^TatS7s}(O+=`Z&~vl`viS7;2F>k1MhD~jr{Aa^me`ec7Y-MREMe9Z~l
zU~0rswYYFU0ZzgAtY)>6;Z;axu{!2Rvq$#tx*E@OixQQ7En4MbDrvGZ#kYm1z0Hf<
zDv6UKs0ocpN_;R&eEXV9p!9(&PG$_8ik{VhJf2bce&ISudtbjGEl_X51CHw)_FdLp
zxm%b68x4tXL_&E^y$maFTJvOzYB6&Py$wi9^eh96KvqA#gf}`+T;-Ye^ka)7ZCc5Q
zr;ORLy<+1sc+T*0Fw)PcuZm#-o-sxB(_05k7=Sk(fvKa$M#qL$k&81&Fg1AQK&)*P
zT?<WsVONlftcTyX4a>mmaI<}adX}QVOsi0(<=I#|S;b&@4h%O1-hR-0*uT9EudGN*
z!!r|PCCL}P-@Yp3AB>Vg9LWqiwc$EJt_Y<FIaWlL*Llr^^YC%V{`O3O2cP=E>H_z}
zSj!Shw_rHbl+lQ4@E#_6FVu)P(jMEi)3C9P+4D?zVdIniPf+sdD&Ys$4)iB|{8(nC
zY*6@U?(76KMuX|~$8UF0kNJZXR%ldu9zC`4Zo3a&J~Pf<pViK;zk4cF9+>0D_bJwo
zKb_U3Nn1OdQlU4J^~8EO!RV<v{>PZ2o&nA4^l}EyvLITO*fs3{UaIl*R4qM20gZy*
zj<xC&ZeWgN?&kFq@FuXb(=aS&%iCO=&zZ0{q`&$grY0P)?!UYX(fhz`%8$<f=>oV}
z-R2ZDw+i{Vc53x3nh5%&w#=EP3?;OE{0>~K|FEpZ*5}DKQ;vYkgbP;d+&+2&B1Tq)
zzm`vWhL~eEio5JWA7q<Bu<0Ry2fOvSr&*CIE4g{b;&siy7I<itm;w))T`LP!34ot@
ztue#z?jmdN??|W3vZ;X{sP0rbOmzzQ_1ht{X=AaRV-!-VFbRB|hn#_YwlVk)T#T#7
zVITl<#>Nd<BO-KYMuChRVHNs;)FyiD<veD?V8QpG%fI1Wc?DwWY#W{KZpmp^&~#jt
zP>yFX<Ij5j(=m)bwMhbj8A6U{orsa1i=#K7I;B78_F)5cK^mx#J^b(OW6|MaJSH1x
zIO`XHDtH9*$7%M%6?~Bfl_gn6nNX7&rWgNYvf7G?S8qTMDmpkUnxT%vPyDRRa~#3@
zI{etWDo8`vUA1LJ(|h&ld3s_??-Q!<HV@P%szzu!gRgd6->ueH*}b&uRC=Az*BJy|
zJ#GHb@}e(_Ec=yEUk(PWS=I;SS|jx=kI0zF#xt+Bcl{DUdP%XP^}Ap$21)!(Lp6z0
zxbYB`6aJyKJvr;0|K0NrNCzE5EF*p}vf^O0oUG82KN<EK)R7`jPXE3zXB50JCn@-z
z!6XS$6-$?GiOhcNrpTZ`s^Zk(J?7+ZQ+#Jiy<fK%hOl^W6=~3-+j6tV(W0VI++t=;
z@dtI|S{-`}zFKwQ*Sc8*w1bqZLwvY%0I49%dl1Z*iHGM4EDDbjPp~_Yf4!XNf_p{0
zoGAFH4CXEBb?o#P1H&lsT{1McDKpsW75L%!S`U){250I+i5&=Q=gPupEa2xm%Oz(t
zROn(XBX&F5ls((QXy2YNN=&6$D}zKzA}`)t56=QypvS=+fa&lSoxg?m&Wm}>iuyU8
zgKd8ZtXmGmuCPJ{`1u{^ib9!toP|5gQ1pg`D_56T7EWA|zEc)Pe^PGZT7h`%2oMB%
zMDPr3qDlPuPS8$_CNgsb^v1VS5hybx43`RRCt>0E`ClhoH1TX~xqbadv$EEj*N<aO
z%vZv1?D2IG7N9(^##uyH*Sd;=fj1;t11XZnOMu3^yAx@iAhl{+nO^(uz6T}@JF-`=
z^^WTk@jH2C25!rx2i(2>BY)+l!vYs0_6k0<*<RvUD=@uI;>jD^9vEKel1Lv@Xzi6Q
zB)vR}Ty!3}T&B|Y!0<1P*E{=0>*2nBc+Fd`{1tkZ&5DBHSicZyU|r9UkJ__ALBK{h
z;o~nKESW%>m2FKG>3h6jczVIliF3wzuYoSpR6-M|#QsgS8i2(q2jDAeSxezs-+8cE
zNZa+4wj&>#lZGByS<u>nj8e<IYOcmDRK&xDopvnx-yC+Zsh^_Lebcp<$3y+{=`Vo(
z<cR+`(;mB1T-H|kBhgj(FWo`{m=1vV$+m8+iueKUQgCw)Uy*lJ&9Zm)`Rq1k1dKX?
zri$thY1Oo@-8--qiQxRR1>k&vJ>O7QYb?#4_v9K2CJcF*F2*AER?nZ&?%&jN&8Jkv
z-yT?<Jz_~v<e&(?89}>|pYE^H`*`;Ij}QsT?j9e@@50YoZ4Pb&eId&4ZxW^^9PI0>
za1yk97A6mD!h&nv8VJV*-!dEN4JM(pZ8JgulT`UO#0_ay^J{kn#|Ox-MW>p5(tYA}
zJU$t+ML92+Eqi5R-H?N)OhFa{H~H%dh0<ZV!rd!(he^>IhML-=1@AyJ!Yrap0H(4$
zu4#6m5P>$wmsAH%o9JY}0L86GtyH39VG5H7d3Fv1gX4~0G+?<HUyoR$>8r%AdSWhd
z)A(|j2FN>K&9VA}k~7@C+3H4$`O9Y#AzXXs-SSTyY@4x$-_WB`;5UqHHv?;69nbn+
z^{O4``X237W(I@Kev2y%ItGCWh&ntjA8sWW?lX?XDf)oZRZ5T{=?;LpXbmv3PO)+e
zElulkZWNe=9Q5cWiYMex!d~73+d@IcwJx!s@?wzAS-7a&bjM(}jMzLtCE|(Di|K(<
z$XUK$8{N(kYVp0U_nnVZGS7dPo$SsSwQMR2l|J8_Z&7#Kc`+BbQ~o3EpmZAnoK8If
zO))LF%CHD4n9w!=*}b0I@gpYQO}z%9pZ_b~d-J_~g%X9k7Jr%Mqor?s8~0hj{|Uyw
z;?0e#DgJA&F$Zb;Rv$;jbYuAN?SuT@{EqKss*SvqlN{fS)ni}#eA$+kLjECV*%D90
zi`AcoOHFGKOMaG|Ux}4+xh`P%oX?RaHiW{im)LVhO8xb?a`3Lm%oL~4r%vF*RIE+?
zn{P89OMW4#`<w0anD>T(cLuTK66<v8fh`=J>?->kui0K2_FrkG{CW?_w69pv^u*wP
zjCl&){TTP=R<77xuuB$<Bl@C7pD9tee!ZQ65~7&~t4zUVP=LSo2Fd&KN$}vK!cFbl
zv{l_(@!RZ<Jxb!ZVBp+q9jx-(^Kh?SK%xEgz%o>n7@P|PUjt<h&_-dJVmdX`AW6%y
zDVOL;Q~{obm<E05fxY>O8_->Q6GTt~EDTnj&z4Xg3~Yu~7}lWv<bZ20Uocz*Y-ikM
zT8nTf&dIlvRN_fC`Wp%vCo!M%CgOZ<uN4U<fpvXHm1+MtlRw``&>w=D-}SYiyhauP
zb~v70IxKy}PoH>!q9Zw24x+ZRz!MUew~x<%>b94u=}0fTQFkXCqwEjgHI1h)zT1(H
z;#%=ii67}20kG18`VF7J3&w7L*et<$T-LE2+T@Qr9W)+ZtHbzNiW8|2w!^NAbbR#|
zAnA`ijl28o5z(o#xob(d4NujVOxi2r*F$=_m!^hg#`>D?t`>=0TTyG(H^u+%+R^xE
z2|TG}2y?JHsb>I)8CSDKEbeJlswp+=H?zTpbJBS?XkOs{N^zNxO4p%^JUz8SzH6ZH
zP}k2R$3#9|k#;5?;k+C^2pd~#{(`@H_3L|&>DyFPLAg_OWfWN@8#aj@G@6n1lDqHB
z3VI@bgT_=pJT*m%rU(8HR`#SkMXmo{<4iG0Zhp0$F_6^r`Tt_GD(7~%kWSJ)MNnmm
z2aAUrodlIryBl1FI>#Wn;>HXX`T7M46q^SU4TQj4jKraxVtPcws6Dw3U<~va7is`g
z*DPABy1f2$hHYL4pDRk~uVBr8J_1llr@I|0hIZPhd{ZJKI@fB)G<T#?Y3H&SO3xqs
zbc6fpH0=RPqGWFFkH(P5v0l-NfD#i@m8TVP`t%n}K9U$DRKhAR#Zebr>K3i8e>Io3
zZsyolNZ-8*<zm=TY8IlW!NMOulNX^%FJOM=jp<0nHn~AM`*<gTHSBwne))h$hJPci
z>#O{QL#tNDsa7>u$_W2hYT7~F>l|@j1?iwv&;qgz&*Ds<;bP9RWQ})qTaZaUmlN1f
zkdJZR4NkE?8ME(Yxz(KXWPg{$<*Xa-($_fvXRhUo6JXKbozYxiCZ(O$$2TWJC6%ED
z&ukBP*9)0R+r3x|0ChZ^jI9-p{F_t3V(5_<4q=V|kj7#Gh}12l#%o!W-{rL4fqsAb
zv75I^WU?a=d)*y9Zmdlk&DYhigibbjAb4Z56n<(RXW)&(uN>?Z!~*C0ku+K_k5c~(
zrx_Q+sQ?7h@=s+IoLn<SLRVDR4|KEiRb;zVG0#8M=ste`IeM!QJ2_H`1OHxCJ8YR6
zrumTfb@x5Wcr#2+${AB~gzl#ScQn|RAsun6)ys>oK89!AI9gNkG6()u0^bLQuR(Wb
zP0;)X8t>S0u(xg`Ivn2Y(=%;+6f`ILV!f+R#e95ZO%=CL^9D)$W%q4^AtMza@|fr|
zNT;SI-EnT@65FHCM^~@`{5s2g4fj(AASNg)&MOQX5wZG7)6!ecIGK3RUg{UF`XA&z
zj<}f1HZvxz$Iu?9!M?H!JG0KrsA(w?pH7%+sp&@ks$0^pxLVI7rlgPTQ~=m3meZx_
z15AL*A^@yucsAlGUKyT6`-*iJ(%oGT)(zeTNyZonwJVIw+kg1em#LSt#59sf%G<5$
zC)mI9exb;3{rU4Wq4RuXup{WfA1{bb1pJ1R(toa&_;+jQ?>~n==JvRvLYVnz`3VuX
zCOWY`%kM+iHLLlyY~a}m2Upgz-zc><h>KdK$!BMZjP}_37Z>nf+7ff5Y8A|8YO@3T
z)Y$7rmo{gWqdVSonET|YS>18mElM~O8|sLB*?&dUNSKqpgS0x)>ox7s6VvXM7Ajr4
z6b{9Aw7)HahY)Z!1sCecP)I{^Et=qgF*i(mxb1}nK1HuDRQLc=yRb?RNcX|i`}aE4
z345xvjCt()Q{u;jxD9X2!yg#~r9;Hp9`uHwUM*<^?=D_41@th)U3B2_EAWGX!$_zZ
zD7TnSq;xL;Os!b{nqQ9}&fc`pG9TUOt`|IWJy|2xjf|(3ErCl??W1KRIkTKd7C`vg
zQ9ZWNbQ#<#e2rI>YG4y)nfWZ$&lBvh2<P04cBAS-jzT)YpHIJ5I>h`NL0ZQ5|L@3a
z<E82qV7BEBX+9j}bE64MxMqd!3_)WPI1CRv$_Dwl;UbrXB&pk&D7p<krM&Lj-1e`Z
zH1cOMbu=X8Xa_qEyn}UnA4q>I6{d&NkzeNHh7&iV7d2_AYnn^;s!ox8^sYn&ba8L}
z2aG>3M7^<`Yp0!e)YdB)>h%!J{@Pff@_D0e_boLBE=<_*;Hdf2zmQM8TDC&smTIiU
zLk!C5**)JH)ZXebSS~K4bTqTb!|^KbKJs-5_*wQ$0xtcFJ7A4@I9TcN`uc?%(=m0O
zE=XC%2zlMD5T~l#5!Zuocyc+sjA%b(-Y_1JaNbY7GA?@Xb|mr|xEb^HWHTaQU}j`*
zZL;Yta(x~a?ZLTJ9G=u*Ds=8JL70HpIb&5XZ^D8lW0MI-|JZo{!!P^WJ8LVUSX3vR
zok)On4bjtrQGIoS-doASA7&j|^PJ<O50*s(ULU!Cde1c}_(qndVsGPs^5b=>y3Px1
z6V^QPS30B@W^crC#bt{pOm?4~K;}!_PkDTb?Vxc%^M0-+b|vds;dnIl{f9Xr47#f3
zYaH!*!9l}n`9UUvvc~ZP<YY}U6)JC&^)r^qhsA@<b<@JIiDs1Bf2|_o1|Sn*;Ay8L
z{U>Wi%?GyCgutg4R`7-UEH+%IQj1p}3a3gAj#igV`$N<kbuN;K0;4}C_9uRq!Fv85
zy=nWhLwe*Oxd*ZGN6mGabF~nV199|Cgm4@nQ@QWEZ<qk%cERomFk3e-=3gwPv9G|T
zRYhUi_J8}x2&pB5Z3OM02eD#YIlAORHt1?>!}$LYmsxhu<M`&Mqv8Bg7B|uzrFP&i
zm}uTAfCW^m6QdedO7a1rwx7c>J(_fGg=}xRrmQS_ZcX2?vvMa3qn<vl1WH??c+}-D
z9ks=2p8K=us`4b<%sx5vv+$OC?6PUIUufK(oqpACpZ0q%7qKc~jZR4VxA&JqDQ`*q
z3=^G!(&ZFlr}3*4S_WKS#z7-oEdAnK22SSWRz+k(-Z#=j?9MCAq<_<cjlT|mOb=yP
zey7$fmkUyD5-|ZOc@gD6gB!8C*$Q?IMt;TH3b42X<FqU0TUI*@Z#F@g%y)cz0@4G1
zy-Kx}d8@j2#z2837F=w%TZ|Usxzhi|SQ72Qz)$<MT{7Ku{{P=B*v3ooy;N9DNRcIo
zZ{syrTsS%%ow*s+M;0ubu+zw+75o~8_8hyN{cZ_HC0eU##GS-;WzRr<H(P(MpYcDq
z$O5i`c9GTY>H;eEdqnI>T4u8K!^fJ%vh2#elv5f>yjN1MQLEy<D;e|QzNRk(cty{Q
zyk*p_#oS8xz}K;PK}B*169PA_n^GO#es*&s*2Mfb8u<hN{l}NAiKGZHg#>DrM;o-x
ziDiE60?hfLwKJ~zKpBi+q!=8t!D^XLR{KZ?jU8xzsYSvxcie>o2>e2Q_^I^zf{P?-
z5t4bD0N!tX=SI3N7=$pd{oY`CGZwCKF9D`#IvQCesCr(+v-JgF=mryI74u=jDAToS
z448>NJAi?0mt_pkFT{w?%oia*8MRTwpZEPAj*WI`NNv{A&71hGd^tqy2J7Q-bHd~&
zrpPO-XpH#BY|OwJ89b$QWP9H}*vkL=e1$Mq%e$Kg4j*5o+S3UwH(^&7e8sQOQ_=(#
zd<c546LsnEp2n?^=gjki!)-Bn&*;cZf_K?2hbOYE6t37>xV#+o2m5g?>U>sLY1k^d
zSj(@;52%z}YNUikvt=Mvu+tH?_f`*`{{|I9|A+-L>$KB{4}1t3O`~t`zW8`TYx<tK
z03{^zDegF%P@Yq*Y>mIOM;uiANahTm*|feOi(m`wk7DPMcHC@Ul&iG$X;Q#UB8nz~
zWANIeNk{^W%~?F!fz>yk{s4{GQIB=$9zzEHwX^UT{PqgXt5vydo;=b4VYgWz?hE)l
z+z;ctCCZHB|DaQH7q@3*5%(uJ{x9tvo$MaVy-Y3@Vixq(sr<#WPV-66xf|QQX;o>D
zI*|pGdnh?*ZEt>|i?TiJeZU2yQ6iVLT{$EmYH#=bm>K=1gq<a{JGb{<J$p5~QdUO@
zkKxtjCx(*q&ZZ*k%bO))p3APXw<<UL8*g%wy;P^H_rQGQE^^rCoZ1&+sUfxL<+n!W
zE)La3owL7t0DHh$-r8~BGF?6kgb9<n<Z$K>cYr0FJyo|-Q$|AvZ3f+I^POjX^kZM~
zq{=BgD&UQ7nb2QP?Bj<lUi*tJuEIvCIIPC;n?gO^Zy2*SodB;o%X1@k?DHL%2Zwx>
zO}6sXKEmUG2;(C)VJ3a=@C=uY5;D-8fILdSnUj$OaTeY2o~3rs2g1`I-gH62&LmtH
z@A{xt87aS=G=(7yc5m~e&kw=gMU4Pj23u@~#}m>oUoM%>ZOpv^yCtjycYclrRCNF7
zEiQSXOI;3|Vu7hYe~UCcfy^198^!Ssg()NUR%R<dc%0thJJcHQ{#MMw;5%g+)y8|F
z!8hK(QLFAaomn>6qrEUsHvv=V#X2oNa*?UqxN^MmPcsDkWm^eVuWN_o`@CJcL^jA|
zj|lzXOOHmyg^?|brhO~^m)aXsUbYmhgW{3-Jd*#1viA;Za&5au73qSCg7gj|BE2IR
z5CjAS0qISo2_n54h!p9)qaa9?8j8{-6zNTRm)=E6KoUsuoG0%0{hi;p_ntZ5naMbV
zGy2D!o9kZJy4JPU0Ed1S^DWBwTd|m)Uq?wWZ_c3esU%uTuj?_~v*B5qJ@l40ak3qU
zhEWe#-a;;MpzkCsy>;mmbBmBoroF=cd;3qd&!M_VR80lE;P1`i$h*9_p#eq&T_xkR
zQ)&egAy!HPR19d1*J!NKGM}|se&`Jw`cR@t1l!2%b~atfYm>0~@s#_1AEesv&9O$@
zdHgv+pMd@9Y~%RHQua2``mGk1l>@$olTFms`c`+}NxXbG`KHmYjkCQB7Pf#lE@0EQ
z%D;byMiDFM<Re%o4sLNl#6+qprU1{0G3r6^B|7MI<b3HjF|@k)Y3$=}uFT~dS32+y
z<OGwh?u(8|h^I~KL;QZB;$QoPbelR3|EqE%&j97U?Ef{^C{`*pTG&Wjbj)#$F3C{T
zSdjB^RQ;<c{SOL>>3!|pE{_R>iZDVfHD(Fkau4$kZs~+{RS`ZmLW2c?;nuFH>jWJ=
zl5MEZQ)G!OT$n7WT2)#JiY@T#&2F~(9W)wc+7TTBD<6zzK-&pC9_SIAB;%t$5_|i9
zy#B`d!P(NwaD|od?R+75vf?M2RS$LYHPNDnM;I{b@S5l3!&=@3$6^75AhAWF&B&Nd
z$8dbp$-d#Al0MNQAp-{8)~>!7k%c?LsnTDGu2k9>15W;boKz}a!#936@e236y~Ji1
zAeQ*NEam7kjz*$AqNLp2t*<7itvX`diBlV(np|0=4Ti@f0*ugDj1A`DOs!QXyYEpB
zE6YdMyX-wZtNXSXUpvsWD7N@fCf}xnNHvbx8>j)7247ef#3+O_+_M`Sl|Iq@z<U^;
zt>(aqwZdL~0Fv1q=R@SJ|MAD6$O7R?XzVo6zrhvj8&V&^xYXy`JdaDnSrH8rk+;Of
zm%lBJK*{ay2B1DX%tT%Iah65@*xY7^Fo6Da?Jkct2PAY<L?hHnz#^on;=q%d*y2^q
zy*`qaH?p@Se-yB}2#Dl^CSI3kY(81{-aFKqc_PGL6g}1)=R4*)nlF5B1e=_O$N&e=
zNN^J`P_Ojbay+MiClxP)V(`*1_l!>p#>T#Q+%lidp4}dFLa(-^L2H4o>VB04R&<-T
zF_pNi5WLM{l!HAZGq?=;^tMUy$^P_t_GQ|NMOAChdVN$i?qO$EG_FXitD#`^rVO|u
zJIv6Hg6Z->+m^ueLKN*DOcoUH5?h>cl5g9yY{}vVPYGHoW2`XyG%dNe{xY=8!&^US
zKk=gfi_-ree+sn`>X#in?4Y9i&|`g@^2zfLaiSCC{g3Wq?`nqJJ$xzhOU>ue;QrnL
zd7jXakMMh`B?>@QX?TUYZGFGKY>wQ*R9yMW{B7bR!Q?2k7NH8=NL@zS%R)Zfbut|{
z_1JhAS)?O1p8)-zd?i=Y`_cnXn{GDIU;hcl&b^B^YJ;pL?+NCp*Hf`}-oC5BWn?{;
zJRYF)&b6>vKM6KYeaklb(`9zhHx{3&MD{#)0W=xpd;1vd;FRT96v2ireu*xcv<z=Q
zv+bZH|Mr^6TWvp`)Jeq3wJz3Gu<^i0A;s+cI^4I828OeDB?@g{$3gOMTjhxq=EB^;
z5Tx6nbnH(El3Q+}rmruCQD}BGPCEuT&nca$-TR|%xQACac*W<!g8z+xAP8K&Tn@U&
zvs*Znx1{hrT<_DWnV;OZo(T=L7(;W*rE(=6UWIT>U76&ZC+C75Azy!>zig@6kC3tD
zdK|s{^T@6wi<zg$8eCI4={{$;e^`sIyY#^EGj$*D&v+{2N2UkLVz>x0r%>eqBOlwW
zC+|3hZWZZE>l))7S3yC!#ajq!Na?&?<z7fyZ781_&tD-*?7@;9*&KX9UTC%_B`y<x
z<tG^fnuef_MX8X}jZ}fBj$;l5>bsy{YUu}(C+{X7nT3Dhs;dNMKwIZXs}50v*ed)^
z65gs<%HvBnV%*hZrhh{kqb!Yghe2Bp{%%4ygWkBZxL-d2PiEbh+{)zfPBEuf)y7vX
zgbYuxS5Kf(ix9jLuQcRuQ;YwhKK&2Hpv&e$P}=2k#Vkqi68az*qcamqmECJUtrs1L
z4;ea3ABr%hi|Hg#5?;td9e1nv1iUhk1#MoaYU<t5-x$Z{tS9^@(xbTZ(%X7Cr8#d@
zQkpb|N*9$=tAiUiK4FY~52lX$?0=`nFH0yzhr#+Y3zwEz`TT{R%&f?Q)~V((OKkcm
z<F~NHih?J|qN_HE72ul9Q(_7bF=mL2hBkAXr3n_6)rbAg(nG!<zf8Yb>mWboGINyM
zB{U7LROF|=o8%Bs+=4a!NkV$)x*Z6%XA{8BA0chD2C?>^UpSELF3u16j$^e1DO!Xo
z3P&F8eM$?<fz7TS8JGKDzCHlFnRRu|yDRl%OeE);yAhxC%9=bM4n-*g>pd1HSKCbF
zaQhCKCh&t!65f3X)a|~#YSM75JT@2-Fn)yNf$@$VG~$BuWoS1PrZ70|UBvwqXRs!x
z#^HbEg~;))wnR2*mj8nJZfGo_R!oJP+j$(JtCQrKndvLE0RePjZN0tKHikaJ*V}Df
zmsC781V#)a5?04#V2;qca4ZL1OLYIU0LO0Tyh8@`3w};pewb39n64piDAq(~H2lq{
z&d;{yt=bg*Os-?vnrc0HqiMWU%BsJlcB|JG$rB+_e8@C`>>P4nuuNILFp+gOZPw^P
zg}XZ1`q!W@H_TR9di0@5^z_kfoNLyRH~IlL)(dsiyEuBd@m9KrO2^8-2A36%v+K5r
z8u=t4<@Of-;yrb?Sg8(ZxKAP@6hle+DaUd#S6$!R^mEXfR-EoO9vasNetY)}H0*Rm
zM>}KHHNh>>vu8@zObXJ3T_^3wIpSbOayb`OWbv2W==FP=mV-~9zIS|wIC6SS0&7hz
z;8y?!Skb9`giU7}eSZchYVCqF?D?}y_1>BP+iB+^K>ROEu@n)3v~XwK#I_jv-RbtA
zU_-qvorD*~C7KTz;kf$nPYK2n&&i`h4j&<uy6kf())R^NFRSnOC~w!k#!FVdOIW_6
zCmCEDju2A~0(!&4t3QVO<nSf&9aDB(B2doZ=oEL)5u+IjY33Jjn>vzSUOgg`M0=Y_
zhnmPtMqh?Gt<qIat+E#D+&+<rQ*9roK2}33CPL-k+NnFHEFXcXFst~zlVou3WE-EG
zG*TfU<#4`Wvp0N}#!$K?Tbx&K%Y2nT>>Q+x%?zIB_sp8ST~9Is5(#gAj`qI&6;xdN
z3!xe_8jZuNyOhdf&xU4`Rt2@Wq|AFLLkhgD^E{IBf#Vy8(bBly|L&cT|JgeMeWiF;
zfrTlLxd89swmIcoSni_!AZrxf?APB=+CXiJ1b>{<#CTgA%8;$>KaI@)c*~v<6g`li
z3vtycru+3`j$U&qnY#|<YLs>-L|6NrQ%O{GFYkPx;al{$f~z77b@(mIwp}TjNV)Qs
zhwtu@xRC<xzbhey*Vw80E@*UQ_g5~zEB(YS|H*Yt){l_weu#S_AMsnBOAfw5uNXLN
zj0>zj96XZ{fbeFPd3l;j+g`2pczWlj-JO=VFu4rfSw%@)B3*9k_{mQ`|J3IqsT0pK
zK=N$Y<pAR03!1-v#$}b^ia?zEIL+J|+oS<}I+f2B1)1T}4~4c1f8vi`;w?|Ogi8J-
zqgcdL;S|Y143Lp`80`*PWN6i})B*9ibEr~!!1eCHGHuG+?nOdZaEWsP2q&1!&r^zZ
z_hgS*JA}JL+hf4Fenx!wpF*78dYg2~KQd4AU&D?9?YY^41)<EF;J(KFWWK?XRyqRK
zp(u7WXyeqTM<@9z8v{9eMT>wCu^H6iI~9pPRLhyZijBBMzKDuSM3<q<8{LMdfwbwU
z#$Q_|TIKM3UlGF+?voBoNUfRN;@GvWPfutwc*k^T8q)vBca4sHKqg<jlGZLuo7cE#
zn-76ZjWw`(FC-G2H2n>-ulSr=fbv3(|AG<DaZuy^(WYGa2^f!70h++j)_LUry#09B
zLV{&{2Mjtit0NyvjeXJcQ+K9u0;n3?HBc>^L~#DZ?`p7oZ)*5DJu2+^B7fQxNV6}8
z<>5*vvmb!Ibg$a;OSK<^t^%RKR!Q`$C|EZph$XL|y>@Xuymxgme#Qp`36x-rnQ&<4
z#8~x;YyQ%qGd(P`1Q*j4UHcKiL16xS?nJ(Dp<VO2sT{hiVUOqcmp`M64E*Tgt{kN4
zUyy_2#X!S|3-qoJLa(qo*~g1Vku@B&cOOlznL_6$e#YePD$2l@Xcql5E+X4L(s@nH
zHGOX<^ex{Rom)5fYv)2xSny|REA%Ey^Te7+$fJ&+%-=b(-w?Qi?C-q4RaIsv`XB`b
zrtUbE4?TnHmk3H_n9&yXE>`o7;4Ya$t&s97<gnL`Z*7Lx`Ku!Bm?!T*qC0NWNcH(J
zqjg;#w+P_9`oV;s;H%PM$?VQgNr@CTN(9Q1ClRACa90$3=AIas0M$;_=DCyA)g`$)
zM*r{1#r-*!@&mmO)wh4RF>HZM(a5T51@+d!S$05KmsHX3p~ESmleiOzinPm%c+hw0
zo}h_!ndYxTbd<?HB!kOXwrQd<l2g@2gmh?if4Ldvf#W2vcxQ6i&gZO%<+;MfE3^p!
z9|v899^?8N9Gd?Z89!1q1jfsJo0h^a6D1T%*Lm}(A5U|LZ9u4FaC<SeQoKPiV(|t8
zA=Xc`kcIgJy4Jf#y#S`+q2<gL8)wk<2*d4ny@KX#21Ue;?VL)M!htl({xylT@C;q4
z45DJ4<g%@WzD6Zo&mOiPE~5dUrKqfQfu0q4-{@A)J4zR6;8=ZzQmlpJjdFWn7G~xS
z=c^m<e7}cj>bRjNmO=CElL1iUH%>W8MOs0{>jX0B6QBp_(1e6#!PlyVVyr^!B;px+
z7Kv3L)t?Nd96FhXuOyb4ChyF+UWm6OcD%Xj1op#I0Mjf4txPD%nsJIU*lXA>8CND|
zP@|7oqTYMy1GWis`8Co-6&oOy6UrP#RnmK{q4HGY(sLm<;Pu)JF?PO-q;g~5!_Xev
zK%*0akxy7+9o&Dxfh1h*qK)3?|L<ff7jnY-ljvHV@o4pd<}G+?3J+v>bxao6UWK9`
zu#T~v{ThC8mI+6cb*;VeQm`NGH1KDkDh%gVr_VotSN7}23b4kvQZh-@Jf?e_Lo;2E
z)et@)3#4vaQnLS^@WshPo7SLNM=Dw=tha75HHh7O?dh#UHR|<EFp3GiZ~n6uEC|^j
zO%we-ha^9ZLaaTpR@qNyzj1o)#XTpDgM8S%y6_XA##>;<m8qNImQbbK4c{x<%+s@>
z7c_hHo|tFk>&4%=I2*dooGz=frZIKt2p1P#;&7etI^mi27UbasCm-vAg|)J+0ilTf
zlPu;w9bf=PsY$rMth9vVc~$b;szGFSG#Zy>RL}Y*3irwmL+^<ib%>_c_m(kVG45cg
zK+FOoi%tDgYy&z>YQ9Rq(Am_JK)YVwz|vI}X7`MVQek~|{^*D3<PTka>!_$TjcSTy
z*W>+#?rcf;71(<rc$eyS9?jOG*YVYt@eJWO+6Z_IERRn(v52{)L`wzu<&U8U(UI1%
zxQB!nX~>p^`_}6Hu59sv()U_J_(0=IF9KJTF=%&;R%9c5@VC!WEa4BpFZBA0gxtWq
z#T0i>;zqdC5PWEt-!IFQUzhzmX(ZDSB0-*L^yY+CiaCi=Ku~a`o3O%;BtoTbc60>8
z#!Dx2n^>K|Zb9e@9Bof%xuqKsPJDqxJ#5OI0?vEG&dqp{e=I-XC<7e_0<eYxD_se9
z5u0L*qthElg4!#Qma!QXY%r(ylSkgZs*#r^D2e-%4T^z5a{ZW$EYjPep$<gXx8`jz
zWwG&|$If*J4yXs0yiSF1vH<~Af#)wJ-8c**FO$K4!2|*yA$|h<tI{!Zt!1!AKm}-8
zt^=c*hyZQ5L8!XfV=^%-^}udv+r~)T!YrzWm?vmme|rQmH-^{=zO=gd$^aFKlgm7`
zdumW;RWS}d-k&_)7wdX_X^BbHj0OM?v<RLMh0oYFln#`FmYcpoqy8m7+Qoo?RbZIK
zf6Zj};T8>|IfW!Mpzn#pYPH3-Ukp7Rk<05yLiJ4{`jS`cB|1$6)LbO_6%@PAmpWZ`
zoLTP2F%)g2kUEBVIlV7gTmLvQ+1e7r3MFK{7g;#wHut#Cg}ffJEJC&y+Zr};p~Si=
z<7tM*l(I{j>--6LykKlRUAR{;!Y6<aMn3Ah9_&Kis~V78mGzJg(hoV21jOkIKVvHh
zcUe^Ib(=>J=Zr9A15|EBDnks;Y>V~ts$2{t&+na#-{{0oXOu<9X38pSwlY4C$<YpD
z^e2z$#TD|9wxzZJ7MLioFoA(R)zVelA8P4NKs@(p7e-nn#{b|<g6|?8-jQ(o)E<N8
z5e?`-o&KyF!7maR91GIPt5J_%8~~=kjz^Un%;FAd4Q9I*1}9<ff=2vHfH}&GfVHw<
zRi(LIP}JtV^wS)RpR3?Bk#2mOR7`L6m9M7eZl_4LMrcB$;+Y8L^h`)bO6aCvi0CWc
zVptMT*)fHNT)ZAAdwHiB7e4>%PD{bd9~O~qSGdf`hbiu;vj{iqa3|6t`8<MEv8@WE
zJkz6x-Zc%&QgG#G(yww%``lKps?+yP?a^TPKc?o%FsdDX9X=sL_dL|&FV2SQFDqRz
z+q+<&N2vr51f3Bue0fv9&}50cxR36^+A^*VuhytfxQ?|AHK3HR+USM@1jO}p?o39{
z@K})a@mbtw+klpls+^3%bQ-;3Lpop`B?&el9sSt=iP1x#`is{&jjy-h;o>?9c{X98
zhh=B|VS#xQSLctdamLoAo@G;1eCv@A-*yOS;NNE5^{<QnP*mmgQri5`Xom+VqP22&
za8A+;Dx5R*moSMGMOtu*6z>fIQ$ld!m=OG7`&=VjIXGeamM%y|@Xx!vsQGNxFSLsY
zr&s$U1^IJo&A37R5c=cM8Nv3gefG_&jFlT2WkLqWHS68G)jI_k%%^@Oje?K#7`k0n
zvzR4j{g{hi_*8=+cNx3(Y0vtVUS$@@b33u{I=k@%q;8Wm>gWI3oW0E$Dy>{c<K27`
zCv$zEi&s66Z{c`U(qeUZ;<{}_229tEg*b|`_~wzpU6x8YsmF}IOl=}@Uvc17uE$n9
z<$RqbpoSOPuUo5cD+9kk#t|B@tm)Zt$e-m7*@Z>NZ}~*y7Pe8yV>ibMx=1}f=p&^O
zgnN`tf0m;7URE^Z<@X6zfgw>qE(Z;4WxnJ0Gmc5<aFlfAb@;JOpi&1&7v2yVJ{*EQ
zT5drAdmG~w5ac5Ibn*Ts0RBt&;XyroqI}tVd8U?0egyRFU+!?dXrS+Vp_bc4^m%1W
z)ppbMA8EUi>QoCX)#!>LmdlrANQAj>64VS|%<i4mwWTu6ewIL4@wweNecEK;)jM?^
z#7rzh(NCpkp~>%|@-Fe(shFyQu>2A(A9E9NHADU=KI8a49gYon%H!LTUHr0brb;_J
zX&b=L=K~2#MEWt>7N{UZz8$?9aW)c8(fR|k&~J+tecQ;_sgh7dTGl0b3s^0KQHJu$
zK&ybNa_!0y#EY@lEcnvk=ZyoyhIx9;$AFpQ8T2M@n5i7L^_dZSRar6`HcX5cs!KyX
zvoE~}BLcQ8>K?_aB#qVUQNQ2#8V7l4xV#(p$RMx58rO7+239xICS0BGf4v6^=Tz*P
za8TTOEep1(>2Px<$Ng871#Uh9r+qmRG|j<Qh5DWhGO+XFU#n-#;1&IHMS5K~6f72h
z`?OAy9+O*0SRM4pzcicC@U+_`$Vjp2S)Fs5IdXY%DI8h5oPpkr7`-z`z3dphyQCtp
zIt|`U@j?R>M~4WS)_vpyT5?Th>Vf<<_Cez$-Z`BxQdF~zXm0_h4Ja0WIe)v~*T`|l
zpdYcG8oS|KWyo*=O@kAXP4qu>-jJ>PY(@}={oFS$cQs{{XTBhy)+BTaQt?L`9(>ir
zUprg&j8^`_TWrva=@Nh5-s@>BNcvTp<=sZ>NHr6Bz65A!GEqTLg9no+KMK74#}bw$
z>NW>?O2Ds|7~iwGX$i~Zv`qJHaYpP*;v+WOY*(18`)|}{p0V@?Cp(NU5{{@03ciwQ
zptVE%=G%+NdI(mO_lw*%;>$P$^m6_ru{n{<Oqhm-GYZeqpF!r36&rvSN!_D;J$Lj>
zSypWx>}T5!gJIIy@BBSOo?sP;FU$+bC%pdcS=}&5#c<;bhv>RKA*6UL1P;TzN0`U{
zRU%Ayz<OQIsY&$DioxzOO^9oq6rCR$EOqw(rjF4yU5ybdTRkE7J0v=|&N%F7a#XtI
z#~CBmGR5~(t%D8+P|<(3VcI{?>YVC#{(7#7WyByD!BDACUV>Xs61pM`>R*u%{XiCf
z=WF3~QM^h@?}B<0;rQkSI<5XVI&M#BS-;JJe*C5mL^Zt!^;;Co4)8DQbh!j;6EJd4
zkGNB@Yu9LmYpi)e9|!OYcI73HLk=y#U=PqM^z5{<D|@c2Yb&6*VpH)LPLtT0tQnL#
z+ldkdsm@|^90n$vLhyAEwW*=Amu04wemH_yl);s?MJ&a+p>pfN_Yc4Ty~xX6jUNww
zhrx2-if*{>iV|t+JSj`MY?yE`vgB@Rp2!`EvqgEQ@8HQBSdC9M$-hSnR#m5n9C$Ye
zzx$Dk^)JOSI|CTSNGT`{F7wZg-^Jf}&u_(l!)kwiyz>4{9ob8D!ux#D*F@~uB7cX+
z$#S0EaN2TgWn{vFRx=bDQeX8qMDJB^4W4i0`O(8QdUS|TlyaPUlT}lY#&!cZgW`{`
zC()QX7ao%P;9$_M@T22hIadBJiS=rh0dgII&(~A!{aOd2K|!`alr}|iwk*YtS1A>=
zH@j>GXHV~lbL-)<A>SazCDPQG2+6L5dZ#94d+??2-{Krfm_!hFBzt=HN4picQ02ld
zezdGFXzJyRHFFQ}*k2vq>JzT1*a$=W0a3W}-;YPMX<yl6=qJXm-DGOPDct}8Sn}cK
zNoj{ShYzqT&2aGh<m#N=F;JMesj=yfh3urOX999S(Zjlj*j4Qu59ZJ6!-!k}F%E#W
z$ST>&Nu20tVtUwp0}-Sk<{l-=QrFRdO_=e=;e3!g2L?klsZ972Ub=WKykqLpwdXAl
z{!vooN$20n!<8z1?)_RhxRu^T!l?Bu_F(RLGlg;{%T|ia{sCf;hV}4ymbb|GN+h3W
z=8^q+CwEe#Dfeip&AO{}%Cn4$KIy_Ul^9**r09lo#9cTqUn?PS(@6dCclDbG?1|r-
ztJT&OlJl)Bjg{{fk-tbd^D^2zIz#QSnn`bT3IaScpG~>&nL<7jma`O`hm~5;x3H!-
zC~wp*y5?1WfT%GTyKOgu_(`mrSFTyrjYq5qejb8T?bCIZ%XzTg4kXczRE$yh><0cS
z&$Kt|(Lq)xRxOU<Dxb_^AkO+VSf>p{m<Yf+G4^vm(?HZw7kHAU1YEi(H7|yfYnmXp
zSVcfY0C8Yjl7kxM58w>Jnr%1Cj&C1WE`Z=y?gIM;-`|Td9?O}Ly1W;9FR?bu%^4Cy
zRbLc%1K1vrL>~YC_%XMfrrB1OpO(C3Ba1>#mr3ASuTN?xAHs1e=2y|wvD&v!tK~4C
z2|6jNbU?+Wpsm?PAtUADS1!wK0w?b6<m6Cp#fS;FJ3cS+?meD)Z*z}T{XD$HwMa&C
ztg_jv9GF>0JUlM!`0Nn9RWoAIna%ayCMkQ&c%DYkx6UUk4Esf6i2wJtm3FnN-l2QO
zor@2RaoxJWdm2Z%F+S**WPaKH#h?>U^(4EZPBO`6yR>z^W5L^Wp+5D3VqD5GF_5@n
z0kY$+gI_;iKT$z*R}_rj%_u#C9q-qIMMMhvHN@R14%8*@?rd4)2!$~0H~BhyV~jc+
zqCxb^X`*%vCuS;^I+~tBiGq@3?#ZEs<b~Q}tz(3W+);}?=Cv1^m8WTWI<`U1ZV2NI
zAOZ2M9maQhEMfRVbs+t={8Bu(kAyPtpXg%m+KVu_&9=zET~mcw{t)})XyyC96l_7U
zw@_MF%xy+8FzxBazgaR<iX0LW;(b#-s+HS$PI=07I<kylFTr6R;z4zg)@605ZxD6s
z`Wm&t!CbCoOxGQg>31+`KaC6@3nJ=(qpaX<=XrJNL@#k-$YbmVq!CLIqw5G{BY6f>
z17GW(h9rV^6Hu9g&lwB9z1C*d;_MP3*SB)lKAjl`^va%SVGJ7D)4uKKgPX=)3$oKc
z3sTPr>|*F6O+6TF43DKC-gniX30fJoZ0Ifsc4b>`npQA~eg_P3Os3~uU+;s*$5qu@
zQw*r>nTg^S`j#&@&gy-XdjlSS0N5wLoEc3h55=Zzf*?3{e_Uq-kT26DXNmD|0}j#7
zH4I;uOF&ifyaShO>K!_yU_UiaZyJy4MUf#4BiAqVY4@j!fWsU5gA@DN1F)+$I7BEU
zzg~fP&Giqk3k89lK4%H^`#+EV+VqvcQ^bd{phsijS*$K!DVht)ODOdv1i$bqT}{_T
z+Y_%bXxm(AV}87Try=s^%WR{U&cg+CCc1p(THS`gr;e$6Wd{>o{O+{c(-)!yDW9x3
zr=EDkQqAk;qfuL&cL|f<jw+Ukd|)?GH#AU%zrLw8Gcq0ivI_(n=UQ<eemoZsYM@mR
zss0`hGwC-r)|>!^$f0QaMbU?y0S|Fz+20Hbgwl?feX%bMBi;-(q`InCzvi)8z~#cc
z@8O~=m{#|^H>tE21kY$GeIcvEpKe@S=h`$qmi^thW$|U6#U9Ej(!>axT+K8=7C$WA
zc7^8Zxts6of4HpmgeFYCs4y%T=NcV{qlNDz+4^Gfy-RP)=0x*NKXcN64aAB9YW(fJ
zcL3xoCX40=U7?=4C_uzFKz4pQuQc>kc}3Uslw&KIe-K#2pwfzeoBmZ!9Miz_uI-mB
zz~B78eOlXYpf2zbyo-O)_q~zQ=aH$zZNn!A52hyzYlYAsoUt@J6IDML)9mvr%s6gH
zn+t*oHmHgV_BLXwAgW-6AcTG}#ySt<K2nQO|IWR{{Wd03H`9D)jGHi2wU6M3@%pb-
zDJVHTxz9K5{-@@Jt6xRfV|`CTY=mF!IOFm_qU4t2oBzs@C1I~heJ{cK;A+p*FmZd5
zzJ(MjYcQ@8xFvVc>GZ9Ez6=qYH!f%2l-ph_FUy=wG4#D+5i1PnX!-WrS>Kx(rkv&e
z#UWb8=~&Q)gPQX!tlaN1_yv5hJ@q?8W`m_&Ku7qT74`$O3#aZ^2!rI5M!cLmQ_)Kv
zio?}uA{;P`6Ju+S%(Bpe#$$m4K(=|bG<48Jro>=t>Hc&L`wtsjQ$HGb&wACuV7Q#y
z01wnEo$xeJ9p-5J-)NvALO*qtndm3R*}k1I&kv4T1#xKkTXz{os}gC;U_~Bxw<wx(
zXwtZQ>W45<LItJI4JSE<Y2y;k4R>K~0Sy;UPt@TIW5c|(J(mpa%c?Qdx^AtWQ7T3*
zabFGYXC3=F4q4F;PxZB53aZfdYD#+Q8W3)*1CZ&X7{oQPa1^bnwuhk5RAbkpA?wb?
zcI!p9S85F%bw^rQu6p8*r(jiG?4XCW+++0|dV52`)9`d=3v?=k)VZO~i01Y=RCU#S
zKsJL3HVKN3Nu;2IR-z17o&8$%72Da77DB?wf(I9-{f7YO-Gd2&v7)4AH7F`41Nc#d
zYXX1$w=&PT>wdf~b2>dQ3Q;Gr$a*TW|FY*E(zy*?c<4-tbbvK6;m_<D7LUsU=r<Vu
zelhVyI*ZhWo4~2$o1Pd-IW1Hce*E^iWpsk8RY25zX&<zBLpEM(-2F1Go<c5-MukKv
z*ILPKSKj<|#S;4NOcD*D_vC0zjdkm}1P<&}XL>aiMwVd<FN+BlMQx1>qJ}d_75IL%
zqN51jJ>2_zM<u8LgObkb)~Au-Pd?;1i8`%mn;@N)^5nzC?}Q+%8+us_Ql8Xt03@cs
zF8IWO#LzgvX6n7LW$`;4Jh(hCAx2vA?9`<z+e8|QB@7dccK-E>1(^ONm8uK~bOQ@u
zU+lsW?_N>ZB9pW0j!Q=Gw`>3)H}v&&ajAG*A7<*zSqlj))3gMjMUjh_-UD{(D#NsH
z6!;g)>{_dH;<osLz>Yfezw`*#2uPHduyDIQIW{{2nUjMDv5;)oWbE2*h+0w2D7)tH
z++I|Iz-1P*+ld=GAwzQ&d^T6#vligASI6gAmz3nC-xSQWQ}{pcE|*Zcfh23*t><_@
zdhJ+wTFe@m0R46o=CjXm@Wy6Ixx&9W{u4G?<v!&1SvKysb7H-4&$4>)Kz7<sa*7qf
z9M8C<58A1TlKQr~i_f29-O2ab#<0VoKDPOo8G!m}mq#&lLF6gPb5Il^UdDjLO}Srg
z@_dnPQkg7#4vdev4vUQ-Oc-G-5Ck>${S{zc!qVVYB(4Bwzo4kpL<fG42Ou>A?b))Z
zUBSjeUb+sYu*Blz!VfnOXVO0u;(Wt=zvz43Dfo61-ai04#lx`)LD(5<qv<eA1lU_7
zJ<kN-ZPID}_x}Q31bE;@$h&dx->x4s>5B=vwKUGFKT^hW9@(+uxbIUkOL!fuH^7TZ
ztT8UltDiTBjz+gt;O|~>YhRSwDkCpMNqCkz%ARuQ7Q@DuUp5rvt&zm<m2u7|8qwds
zo$q#SSu`|t`x}DeX=swFV>9M<hK_1kNXEkb^tbFMjDwY~e-y^JsfH2#Z{Jc}s5V%_
z&GSBQEDldQb2C_nyx4F{sTt)6)GsK0rwa_&g9O9Q4zCiB%lZ?BLI1+IUGMdzb#4Gj
zK{zpaiBxFm@l~2Bfv)Tka2r~B??4<jyJaD<js@{*0EV&C?YlywpMwZ^WpKN_mz7Ak
znlb@MK!EVPxx~gq3Lnx!{Us|B&^E&oxI4r!FD#4?V9rUj89<h@;79HAcjGsts~{Xf
zcK0_#iCsI;CNNl;rFpsFJ4O5>BYOcXjeZv%3>biv*grl^*S=4Ro`f0*`lCZ>y?7DZ
z1vkhf26jl+k9BwNnQHH0{L;;NJTS&iQ=ddmZqaX}yo@H;zDEDo3!qe0IBrqMTfEyG
z*m59r<G2mIu56|yxW@}OF3G9decU3?NxW^tW@-Cl<-^bewY*D2)|}j9o=fY=j*aP@
zaSgZ;erEg3##ls*$QSK9Z+?8se*VmtQTaW*XYp3{?mEoz1l}g}kcXg8;4H0cv-ClC
z?yLP5coNKaf&|Usq-dZ5q~-?o<$hAV9A*{^85Zc$f4-*j`Y?)_%xAE5xIeXU*o@Y}
zS4s&mjt$}L2KY8u!S`vhn!yTe5kvu2Fck%B(rEnk9&%AJ9e^}pu6DXv`H;ymW1IZ3
zuiqc;4}#s=(AO$UZiPR1DFly~I6grK{&|wurf&vbMSN%z;&Cr1PbvH4<hyd&!$fkg
z_2Ip6{Tp}G%bB!q5<3=7sC)ZeS+=zEuDX5c>Pa{i>783AjG@mUeqs)p)s`jSIKNY0
zi6nUD&i(91T`(bY_}v@ts+m9`sBEvAYu{>Fv)zdMfyJ3_C^n}=K6D)7n3>y=);@sX
z7>D$vn%)}_ssDgz%3X)I7`D3_9aSbzZ&d3-=D^1i82xnhH-^Lj^}Oj#{I*8UONr<$
zi%K@S>QDX5F%YdG!LZr>rWzQV*|Fdk%J)B8y0Z}$xcgj^x88vgfY>byX~!zKc06h0
znz+X(c#rkU4hr_X7bbhd6y`;Mg?<cdTsY0m@Ac3G-9m-0#b9O{>C`FAJ9GfCk38*0
zkz2riDFHuCfbwFTxYTB&#U>q|lHJ9VEhqg#i~mLj{8GxPBSSh+xNynaLyh2)n|FN2
z=Bp}^dS7EL?=&POf6TgSr^x0+2a9~F>#Aq)Fh77Uu{GSMD~l9o?@MFeOF3S}UbK}2
z3676~toSO&J9_AmFuXK^upW~;TX4D6yjUdPu%1y1RosCczZjSLvaKW^E_o<vUu_;5
z0pY+p^1>dmh_oj?!f9+K2N)NCNORX{x%3P<PI|k4CL+|NTu&uAdB(s$5Vf^PB)e<O
zIwNO`<&#q$rg<A<{yh04q)1x@>^`7ZaU)<mii>vos!sn>yo+JGQBUZD<&Js%@0zVe
zU)*ww`{L>_Lndi61Lth_xO^_`R2mn(WB+>_-?>C(WdP140Og`Xp;@HCDzV7mUN&z@
zxIJ#dc%>%yHD-Br03nHr+i@5w&T!p8Fk=-nywca=!c!f0ye}1UnZuu*FQnT1uSR9F
z%j%t5dtb_#?&~iL>1SpRFh<OTd*+^~6&f^!kN1i8w9|g2_$_zv(gi{1-Q2(N#ehlY
zxlOA=s8<GQDfKsIBW&ET;4Q8xGf;k;lzaGTYTJ12!VAWa#y2(WIta&h1v~i~^7F%z
z&5_;?GC^|2JqYy--9p?3War>y#jZ?Q=!CjI|7}e&Rrd770&e2TpKrE!)N#qJo9hjL
zQER`*X6AyfE?~^ma(i&gLSFtHPN}u<>r@N}^$ys+;?<E*+EvL4o4J}Ld`8PeXgBFF
z?lTEHh2W1;ZF5~~K0xwpij(5x>RL6(Dezyh$N;M6RBQCKY+<%>wg#Igzd!`w?htUl
zBJ&<hc?j{#0{AQ;OeHFuzp#Z6pMWuj!MTSAU^BpUF8rN06#e?6jD5C?t!*vrLu2Q>
zCV=ogG0@Kjp(q}WUII(*?8UXG|2B6B(^v|wCA)XXnzCE)%MrSyA14A?8k82G!VuIC
zi(#@>0x91_InVS`>lNPk<~gfXbaI`3y<vJ>u)AReWVohGq@er@EVpU<B1$pTo~o?|
zUhitwZcpj<INeN&(HK+rzmJmrwB)t#E?wfh?Dg@S6Ann2$OH@lAxE^Y`vyMd8$9=j
zEqD}vw3&GA*P~PzJXg`zL-y^$l?;(d3wFS&0yqJkx<9)+)sgNlOcv|sPrTR&eM?$o
z*R=C_1aT^fll2WS`VNPHGVkIiTv50M0EIk_f>{VwcItvIKTd10G}wD_eCKqq$+b*I
z>}$H!;c|UcP{$N4h*l{~&oh8H8R&N0?eTe^XXz(THxS_8*u533`}P+MrNzV0%YDV1
z|Cr{o5nf4erR9T;C|ro;aie!2N%mq($63!RGx_r~Jh;yhn44MWhV;J|J?BN9$8$OA
z@S@$fdp9`wZU^>DZJe<}SWT9rO1o~qVeFJw$<8%(VhQ+Zr-ZOJpBgYYyiVrkrhXj>
zR}oMyhV~Y@i0HIwCY|*z6Pn1xN?&jOMsDHObrytJAKzR2&TU$xBJO8P`&eJvoBJdi
zd}5NP^p3JKL7JB6bZf8_K{cTe26oOcUPp4tv$QON=N@*`>nPD3)q^XVgkUWY#YM|d
zJz@7L6;jjc6FD<r`iZHEJG5iIV)1?A#jgy2r3(Wz)l^A~pEm*VK~`i=4w{`Pv<Pz1
zYzR;>u=@O=HZwK1A2Su>Ee%YAQ%cIlcmDVOS176(VMtf_Yy?$r=wE6Oxs#Hu*-*a7
zQ*m2K{iY@~=@)mzv*}zaYl(;+dx}Z<?&ICl`^ZbH^ha(J>@+IfLR^|yp12Pb3`J|8
zw0Erk_4B<8ALb)%*%Jb)mzipLsls#1I6ZLHf$zDer~M;)oNnBy6Dh*Y&_5ebSXv(j
z_(~jbi7`?i>S4<T%pVLaI%o=h($von-wq3aGUBM|BXe_fA*4@l1bim$7!T`b&4pcc
z#LzF?iJlAbtc?6x0#unaK2!#D1iki~fy2%1&VBw^#$i+GTYr!~NGY9upFm<t=UlUA
z*1iKEfj#qQfGdLh-NX++pMn_R$y7K4pu9P59Fag^jq2NHRTBd{sOb^#4CnT%A93Na
zXR9K9VjITR2@U&~N51}2;8jEMPzewEY>B>)3TIZYE`?65dHlR%b)4xaYA!gI5N#6b
zdy%7oNLqT;zO!r2tH4VC?sZqS4uhS=B}j|Iz-eLvKb=39-7{j7$f&{++aFUmFA(Wy
zrE5-U4*wLZ6_u+Cw35M`-xccQUHLjn!YabrAv^_XW@r-xCr;$J$Bp2IOPFeWZD|_4
zs2LK@GI3P`C2F?$nSz}gs|ekp)jjgT?!a##6u-v13*<|aKo8!UcV)fPIu=w_<>xnC
zb^|B3pE%pP$Ssm`a$yBcjr65x&39VwUYe{9*MP;vbv@lE%4;9|#hyadkcSBllx};7
zyyf4{-h#b(#beZdYm+Yk7ClsK^zEKw%FvH2^uFZ)ppxZYMAIVGGP%zx5k3IJ_P5QF
zx*R)8A^|C=cHUp->VzlyBr|o@|3Q7olQP{6JWaD(_jwpBh3gLFBU|Qgu%PHjKPs-J
zaHqbpoRPZv;5wbb*4?ge0bhpTkz#spRvPaXj)+@G)Ue#@6gMAw$)7haD4<0Q>gR4;
z)Q;Nz*8EJDBAaN%Hb>(wCsPVSgG1wcb-PiT9&ROJ#mEf3Ul0&<2Ulkl^G1Ukax5t0
z)vK*(<7qE-6=`Rg%o%*1tFEoodEU65efC!EY;O>81*YB&5g1}qksyWs+OlBuJe9?Z
zG`7K6Yyuy&!C41N{p{pv<PGvnx2!YBN+G-9a8{%H;lR%k#Kr;t>S9el5Wn(dsZY37
z?$MqD?(Utlw9IL6$#P!UFGmrgYs@5tXdXn>5%Ul@r#M|%(3<F;h)`fdS`5DO62~*y
zjyXtjix<6)+41yqB}xXHTZ;>zn&FCtX@s_nP5&9s8Ee*s922-N=HWce>LNp?JG%GA
zpJ4zyei)z_|Gec-Gw`?OYCce&vCwhIML|TPw>j|j*M+WzyDZSq80J?wyR^+>;Y$rk
zGIh{z$G;coUya(L`=4gh40V(K1@>oeX?(cIHi@xUCA7lvaNe=GROoh#ZQf1bSs5if
z`J&G41h^5wdj94>&m6gczxhP}2eM)WUMADQ&&<=+3G&JezdbQorEf)EA;a$uJXV15
z!c-GkpA0wg2<)h0OF^*$*ha@cJ>F;kgHIRWPI%?+@R`N2pt%0FY%!-rO^iwZa)3HG
zcmeYeXWU9Fan>UUu8ZX9u+PZr1$9w}c9d5vU{4G#YuC}$In;UAe)s^3;AlI1V7Y3f
z(nPm@j|*D_4<DXJX5(zbBmmYdUEK?R<Q}v17LMv?VVSwWOg$Q6oG@DlU(mJ14F@pJ
z1)SoC7``^RmtM$y$o#%N$f%~ck6ifcv=#8D9emBIMd@EIIrQGeS2qeze@eC1e|!aZ
z@zqgxI-IBy&?B<w?4K9Yd$CIGbUqBVxwz9^Zwbw+A0uCQ?DO<{0Y}0uP~2k46J#u2
zm@)hPVg&W`Zg=M!*Ea7s_{blrb&L7EIgTIUsi0F@)elP~N&3vGv#EDKHG*_Q75jL3
zEUze-ue%v!U;5r+H9G51HHZx7-b)cq8_pk%aH<d^3wq>8m!U-P@LKlSOz0qRf|#r-
zusYti1n*ZYApkm%jv%tPS+}L5rNK8{8T5kMRWD=e7(P2W6g*2K^b*H4;|>oDtTJ)&
z97qdqcaIMs7o~C{Hg|Y*l~?LflP|6y<LMV*03nUduWp7fvaZ6E0$+_5gA*DW*vmES
z5MTkIXuXR6NH$0VnXk1%-_{KM=cm6_U@LC6Fu~?qo0kdd?~kWAin)ZgzCU+UTD}K;
zH;Yy()RHC{(Iyr4YigyY2$;y>yyA!cG2+JYfV?^0nUkDcm!(h(WcM91{Vwwa*$Zm8
ziTM$f%O_p#KkhJ(QLM5F&#Ycv$+wA|$8m9+GOSZ=>bK|em&bgSDhfh1cN|VYnhRF7
zrf)?LT;q~B+<&be-Cl6X_7v{CzBuZLMt=C=SB&t57=wF2^P>sXQMiY3_GVu;5A8fk
zg<frk#|{YV84I5oIE3GEnS`~hVr&0&SaVshCDe5jncqBaZUGO(n%B+8$7qdi-hQOz
zn+IFKaii{mT-Ed~jYft+E~JGQ1)-+{6UzB266oHf#W7t{aYppd=x-q2L4Wz;Kp_3a
zG-Bj=Ebc^5GOiH(wtkegN_)$5b3OXgyawT$SN1|^&)-fJ|Kl{vMYz(NBl~#EH~yi-
z8M<_9OmEXVFRx}&>>bHhv4<6Va4TM^skx9EtVr)wxM#Ly`9Seh?_*xZTF)-4C&~|u
z!rnT47RDyEw!HH63Xzg?@As;TL6)NfcG!KZu0)|uZ6~h8fm^VL%kmf5X*Byv?pcV&
zvY){Gj!&Zix-Lo;5D|$mr(&8G)1`X7Wsup8bRvT5&YMDgG|R+j(N)u5g_t;ysiG^)
zFXag|8A|?G@m=Zog#%M~IPRfjD}%Q(JIkpY2&_O2uL_V`z|w<4dOXq6O=<Z~ZlP1c
z02B&@ek#7V!{DDlMKOXH)v$OEo43Ijbvs0Z!$uM@JgVIekaofBqffydpR9|)q%!w6
z8+B<_2C4x&TvOH}YsVkYbe?4&+s~BP8`wB6_>amN9Q+J>xG}K|&zzH!-Nc`y;k}E|
zzL=DhzgmI6{pnIh__^I+&eA@&bhp%}hxZ||iR_+aOT9J5Esvk!VPyhg-_gr3%@X&;
zPeBr~xl-T8h`3K6e#Z+l>v||ec+w4p&g*=^;VYq<Cf5@+p$?PxjiJ%i7yC8+i$L+N
zQDX3X5Epe?j>?@Hb&tlO+#^@_vXiTfMV}osgv*t+CVyVYM()?AW)(;-Y7?wMI;mm5
z)i$%Ty%@TL4$P|`6m**c?+wC0^=?>{`)5j3%?EtT+tk=5_@Ujnw831+AcrojfGH1F
z4x6s<md43`#W0W3T^57|x;uH^+e|;PPphN~X51OyTHK2URTZ*Oo*+noT=(~Wec`x4
z2Tn%H0%fbr`4$n71$g3J8iO<gfrL}i5V?r~QU+-urTOSK@^juM>x5rK?`nn*dIMA;
z<<0H;ad$P~!2fb<(I_v_^DX6<(Dy9BWF^)K7MA&kx!%LK3|?d#$bXxvFq66L(!VOW
z9hJndbyn(`(r<7Eecex6FQ&QsMk+($_(yi{akT-=h;kJa@Ma);HCi22JMYXIF44AQ
z_<qjz37-U0$_IA5swLypV(6~|-OA615JWmbylYatjc8;`%*#~$%VAn{rnb(!FAjC$
zH4Hvh=i@Sfn*J)9mU?V-gLfK=9M=2MTIJ{OYug%0Y`0-$HX%L|q$)KSn`h3eH+6Ty
zeB#eyBAF(alnz)l34g&x5Dxtl^%L&D#lZUvwHH?o`0>l5`P-LJIn&o%YaLg>>udkr
zyZp%E`ska&p>uu^&shoZAswRiUcu7eGNBc~5tt;d+4rQbfs$z}#lUJUOi^(Ja(uwD
z;yQ#e_+|30<mU=F8BmK(PG@e(h1C_qCPcprDt93c`I&(-g_MHQIdpDp29W!B-v@l&
zd{2})dmsNPA?f%Ma<>_J{4XV>{wl#fOioewv+78zCF7laUpe?Y5lF&Y+miG{Qn4U?
z7RCyi-QZ{w<QSUl2(PqtX!qQqB};6Mxe<s~(D6wSH1VJ+$h%3oHai;ffe7Tlxk+ov
z?mBmGYy01<CBtiN^?v)cZ~lcM{)lazGQ>8A(YhfXzcZBVE%~1p+JUa(dyZ&bwM0gN
zX)(HrRD`<A+3Noh+}=eL)2QaSzuzvBQOrDkyHkV|RZ<2BNK9ld`p=<GQRvR-<9B@0
zRF!u^cF!#e+9I%HZo4gwybo!+t&W{{EnW)N`buIZVY!V<pav<!xww=!utL#hN2puF
z;1lGG_Vdz+<>swgOTwO|v)|4^)DvlY*)v*`%b8J-{3oqb5x6h&B}D0$8QZUPXU~EA
zxt`}yIC0vj_^F-mCZHtE)AQcOj{8G^<r}Id)c1>t`yWkGWdK3C`>yllN#OAjB_7lL
z<I@t+8rEN<!fF%ozUI_oe(Xs%q|FJ&j+l22W*T($i!M)3HHn<q5v_I8`)C%}zsgC1
zp_DcsD8g+lBR{!1m`AiKZ`t1IU|@gwMZD5^m87Ag+o?Bu?#ioX=5{Puntf733eQGe
zt4}_cr-6+~Va$@xQ>}y@RZwdt9m)rdxcSWKfOgn<U29tk4!%#p=Ugy-!`x+VMNiko
zEmdC@3+ziF4f}I5vPrqvQQNr)+`=N*4QHyG@ryHHUthzI1`E|PJ;cu$aljd>yJ!T#
zVjcec-s<ojk1zUGgqMhFFB4z=-H$fdKs15u0m$cUFU*`uot-}3K)h&r{Q-cSzw($c
zEjysWme!UAI;fwWN`M%+R*j_cf29Lmx}?v6l4|hLu)RXFMcd@F2v8Aw$`CxBXl)Dk
zdZWu^-Cxb#9~u~`=tp1gGOk8DP^KX}#hGcZu8Q4zt|*qxLU|9G1>9hx>Z!>Y+KsE^
zQs}M`d9Yf-3_Xh&uIW#WeC|qmN1fhhohJ)%eQQkx`eVI*+^P&Vp76{(rqzu@m{`}m
zG86S$ycvF27t*o@={C4*`A|U3lRul)9b4+cv887<EjpqE3XT+*iDQiA%e?0+7sAF1
zmo3`@YGUvc7w*z};c0N@Rd-_Ws6_eGqzU&>?xTgZi~YutBUu)Y3`v7$->jF*>g{;`
z8N?n6!f>o5;W#2J{HQL(I5t%PJ$!gG>1>&~J9`R_Q>$X~@PQp(x}VMXVgunMIrkm$
z>wF0sjDv~PU*}|;$S<w?1e;f!f6$m+=~OIY&Ze67Xt{xwx1z;?`+IMOQxP!v6c~$I
z#L78K`MvjWXY1YAL1MRACB%syShr%!Hg#Zi4sI45&9e^%gz@)zQj6`1#crBq%O95g
z?@da?n_4nmG9T^10upPor5Skdm-4(7baT=%(P;1YAOe}kU(bH&wLkrFbVFdYxOLko
zO*-wN`?ByPq>-WRUKztBhnq9s5}rcrCs?DpLgQ?5B{YQ!z72A&Kb~RxqyhQ<g~Jmv
zr97iy&lktx;s1Xrb<0oiN?rI~)vbsOU|`<hCZjc(qGi7YKjGEcClg1T847aPo?VMz
z%gDKXHGdta#@ip&wxphfzF^$}Wb2$#lXr8Vnkv1+AMwIrX?<{>un(Xx&y{@!Cro5E
zZ#t=~K6vlve!TD0on4e>Roe9=-*{Plp|nefw7MH{)Ay81UHr`ZJ;)7voP<Row-zrk
zz(DTe=kS<AG}HB_;8H7O#fijlz%RN5{drKrJGd6`N5Y%@=y+e^M35{l7WV&3!&?~;
zW1@d$Mszkg5V^+RZtztC3JnTw<0+eLg*>oRyGv-BeW@_Ms>uBE>KS2_hHJs4$@Xu?
zCpmrEnIE3cyfV-(_H<V|0X6=hnD@RL$2FKaS!L~04wLghdHpYXc~z-*D_$tvQIAM3
z)R(X~vX8hj-V3|KaZ;lo3E0tE#S#iuI%w3LKc-UkOZ{klAPuN}UD`J%Mn7Ol8vD?h
z+v3&EQOU5Fj)|+mqrk0$hc^AS%59)18it`aTi#tjLx|6_xn4g8(nj%mf5`QhSaqzV
z2u_X>42Yif@){}#vY}|t#eqWJUQj3YAN_7kdCo&y++~V_CYRY03~-24_nb@UXcFCW
z@bn1yF}!JU!h918m>g?8{s6~FjoK8mIt~E;k@OB=4E#0jbfez5WP_5Xfl~O-kcdAc
zVw&q<7fAj#HIC_Dbmym!atu(_^ua2`hN3>-df#81ZIHLsyPr!!h2uS0yRg>$%Eu-k
ztvAT@InRWsthF|C=A_MWl2thvFrIF{97WN;W;XDscJTXSpZjwcqm%6?bHClJ4kA@+
z?i2d2u-B1ss6V9Gf*%+GhjKr_Jr<(I{EP7x%U3%%XJ`T2A$Rq;qqVAR8Ra^er)t?H
z%4+E!M_KH#aF}U_gr%uCac(3|FVCQVPoaP!s|kFUn92O!3;q7QW%1xeOieaOy*XQf
z9!chILj@DEXHyhw3i4EbKR;;~D97)`L7Z_Xw2|vMmo_Kz6(wZDN2o7)IRHq515OtD
zWIm+xvwE-v1V`=1lAwN<cr6hPe{VAgnR(zX4#51R3rpvOr@q>uqe66SmTr7I4`~`e
zn1m(8%%SJUA32VyFG}IQ;trkf-ugp#K6plghkloOiaGw}RlJhX$~x^AI@gPa??YC&
zal93W%1k$l{fL@TD5z1a;Y<PMj!&%v+8q9!kJ1VSs91kb^#pB~Y2rqG^Q=I&4p>-{
zuoTgt5(<nI(`;_%HvqQN(9pi_*zPYJF7XpptU?LM&zO|%5pJGauBFeuP&iyJX64eb
z9$((=9R5qI`$Xj9K*fsSblHkNCZpefz<^)yhr(nrYy|;Y_181})n<b%l<=9iF7VSG
zFBTn7_88I&$4}Djz*Q_*atG~u4Ui<y1|Yrx%lBnnpywSBS;^3Y+YrqFX}MJov=>rv
zUElmBHTEj>x#1HM7)+MgG)CKGUq=)4v?<I+!vl=tFTdXp0t7bjqJGC1GW?9<;2KEB
z?Xs&1FaC9u{}WXFl5d?0*`{nz|9FEGx2o>m5(o5%et@=9W>NYC=UEgYg96E3O&;)n
zm?iln$~QZFtsrga&L{HpyX>loXRI+52NR`T(^dT@!|~<$AEQ=YS|&Tv*>S3>mvjAs
zhCd}|x~=atQ%o{2DD{f6C(~UOyJq>M&Lk$s@KTxC-jit5C9$<B%zAtT$<JFq-W~2@
z_?6P`^~ccj7c?kGO=#%=3>3PP6Z{I{$?aC)BNh_6oZ3<fHrgO_Y2Y2#)Cg$5;v>4U
z0Bi<C*Kb)2@V8U|Vn3_Iw{mB{f#ld9Arjf9>9*ZX?@OSrza9Q#`uQDP3->qpFr+6g
zZ^UzTaTn}@90txUh7ZLL<xmCa6utpnvw#slAr%h)!LD?gM<Reg$Q#!*@G8ePP>;ce
z0s9ecr3}FM0%3=QZ!nH=0^x{W5O)HbLq`{JW$6w5k2^-qw)J*c_XX6L#hsJjdnkmj
zCyH;gBxdbTk9hSB2(4M(Ttm|Q6W={p5+L;B4vMKeyL-*K412@^Up;Kk>?(>36(*EC
zx^GFrI&HSjIVMa@VlJHh|1frzQBk&E-&O&|0s)l<0Z}Ot5b2VXl<txeq`PFKl@{q5
zk&;FbNg29BTBM}AL1u=D_Z+<M=O2&n``&B$1((YY*Sa{beeQkyj{U{l)I9XQ_8apb
z>KP-mF``X#8|Bu9J{eURTo6i1`I`;*xm4-k#_nzR-;Tz!o3y+I#IDBDpqX&l0}iGH
zbpjrtKnHcP>HN-T;CYh6P|zh+X9kn<QF+hn3E%q-Zm3(sds%vCc~3f%FzoUj#BIKC
ztP7$lUDvRtTXYOL4ifjtKXeWqcr70}9^qFZUPg#z)gtW#NOX*q>Gjdv$VB1yu<SF%
zdG~bth`xX;&bqZ9k;9FU@S(R6Eja<B8@?dXf`K1c@=oqpVtnoa?m?taxzCHY<<XJK
zRsBKi(`>#-$bfxi^Ei&5mN^t-DT|J2^F{fh^aD%A$h&?|LjDQJ+pW~PV^0$7L&d)b
z?LVYo2`n=9U}Bi{?hy!1rnsIt4&y#lbgC97z0r4@{oOIUjzlHA?Gq_i*WQ>|z0H+0
z!d@(sxl~{cBD{3_;&5!4tCR5)L5l;ATD|@=#7p?M-`FN>`Lb_iE#WX;8W;2cSXrOV
zN0X;%7NZ;@-L>X|@%s{29LW~Pg5Ys^BDHV7S3bZ<_ZQ2n{)r~FT}|vsJ;Lc~VSAcx
zBS&8_ibJO=xMrLu!HMv-xo|2$=?92J(}UKQmzfM)HnsqZaDVciFT}%YHc+s;>NQdW
zagXxYnpr$$)%ZhZ!Ven#h@I_)0Xv!oqh^7sj<MC*nqm;e_#5MWQq!^!L_H&mjqTd*
zfPzFDV(o{3M=CNe<g8ZcgJYJ;#8qtM-p<4lBw{VZOlaB060rU$H$br=R#h%jU5LO^
zxw`Rm37pn*%AlTmk!T_c`9m9Y>aDntFGu)W(*nhzo)@`;A{;OGDa7%Oid0JDkFFi_
z55LbGsTB{}zc(~qAE%jmG0BODb@}ya=xU4D5zhTQ?G}L`OWOP;Kgn_r$5aVjic|>Y
zk6gw2(sP%vnz7$wB3dn&0;mI_xqb)q(|#fCEMKq#h<BDf;))D~+YGfGeO-C#@~IPB
zJHf)KsJ$y|N$uq9s+8%n%-pP?(W+Up%lxS}{-f+Kkv5{e0_L@IB!jT47K7IKy=kpI
z7dlAOyszmXQ&f%=*}D!0vrcK=EKIv?Lcb<DdTBtSvahP>DLpe%dzakv(GmRvf3ozp
zhDZP6df!j2&H`Y{VXQpI>{vSkbW`d+_H_pMlO+Y)Ytj%!#ebNrUUm`D2QZt1&01<Z
ztYZbnb@Fs|%j4u-Z}QVRQLV$$@1NmkR=*&%6-$#K<(&f60RRn}zgv`2I4cImhuvVD
z1(D;bsxk=1&9iDRg1O7|eor3pANU_3G7v?mxx(pPITI2mo&UM3Q#8x!=o$S0nsr8o
zvcox&G+AfEAuKs0*E!Gi?aM8mtGr)DlAo!;%(=kWo(RC@F0|tztUT<tCT--xeXz9w
zQq_ncpRkAY^e=;gUzI=gejUHW{W-`@8t>{n>dN@TFATH`YA}S@*ESn`7tRk%XEWbH
z1X#`#<t%A%Tr9Utg8mXw*VxovoIQ$vu6fCYW%lhjXs{EmroEu7x;;wGAlnW~J~L4A
z3A9h{$Y93^&2lQiK!4=TD|(IonpLw+w?Wv=Y0(bo5&w$&3#h(Q^uq9S!{20YYFYF;
zqm4KJDt(tMHeCDYNku)NuSSQnXAW$PE{kyD&!WW+jQ7uk)Aj-C6w`mn;mUA9O)@s%
z#&Qta$DVP;s3lAC-?-iq_S23Zp+5&1DEJc2=E48a7U6e2hvWLlNR=p^DVi1L%O#ou
z0?3=|o76ECM=c?DA)GRds}tPm)1(x`cgVi_$^_Z0Uy4zY!RyHNb3T*W0c^C}xh<0w
zkb4)e2J0L?alXf5Yo0!Ts0J<3eITWFv&<mAE_#7oj-G)!A-`U+!^h%7ug(o<8T(Wo
z<_-2ElYiAk>Ypmx*&LWxSI7E{CvAY^Sgn9Gi4<!V{xD<DK@KAeKB!uZp3!#AMQJSR
z=Lz#vY_Hc_T`wsfM0^9WplOHiiH%aUVIO<LME3OCVX7+z#Z*~Q)aaf^EZ2Q#a3cYq
zZ{bjE1SZGkE$f<Jt1ABFCG{2q9t_Xrc+wN{B|rLwfmE;^Bbd$=j#v9*HeGgLV`TZs
zJl#?Bt+@Wg{b)>{8dNWBaw<<26ox=c%qwh;DSTAq^#LjNgS+3pW4mY3%@8~8Li3$H
zeec)8>~J_&XsMTitk&{RQBN2$4~A_xoK0DW94yRjtlP5*oaPFC;9mPJ(%zLnzV@W6
zI3Mw2-IvQZ_~e^<@Psh%$ggVO()LMY3e~B!1o1fGcT$*tQfeowC18Bj1*KLwgo(n1
zjfG{lK^&tCVWD34Yg5xq@S;*d%!RTc9Sa0Y?xJ||J@1B&nP{kTx4L56=j&<E0JWQR
z*IXlS+bI{$qps!TV&2JS6_V7$5fES_23+t%h)eN{w8dU+6o>Gy$XKPpyn0w9Wc_Y4
zxZ>!6MmAkP{9qc<HiB1L7egk&qWH^p4V>FwBmahj<)87({RbRe11(&up4Py2h;NO>
zAvj0gXU2@42d0n#1lEqWLC*xK7_0qK)c0F2WJeu4jXE)NRYu3^?>yG3Izg*9Yx23V
z#r2N*aAnD8EYAj*RZ2^(r<%dU#vYXZs_OYCSM@X6j6ZYzP;B-MgE<TXZ0YZHD9p^K
zl4d`ExQ3t7_KE?|YcEvQ<bBD3BaFfSixU?5{<b(3522p$MMv&)7+6<4PfNz|yzL@C
z{ZsLZkESDUQE!!hbHcJq<wjCv86yrvAST81d|{**9(WK$uN^yZ9ulYZcsdM4?}#qd
zE<3uS4OhdORcY}>U@W4)uYj<MCPlhS{@X0bpmSzxHgxLsF*jzadv4<{zi-q2&P;C9
zT_p<i2?7pFJ*~x^`<F~OcF4YX^+N&jHjbkUDm*ydDh~m~G6wNU!4jA59`!)M3H%);
zrZ1@W59i*nj0t3zirl|w;q@rj=KiPoF}gTbc<^7WutH(!w$ryjzc^g{$^r5UBpL`3
zEH592k_p*YG9^T8?3OHq*#_%=Uunr@NZlJYu1tJwfJaw$Wva6>nw+!}y5G41X1Bw$
z7$s7^^8&ss@rZ+R$5hy>S_(4vb4RB};<Lo**MX6^;si?jO%JmPxg?i+XJOuTGr_6%
zs1&R`09=dy_hQMDseqh_32a_Qlus_`=ly^mz!5pCv#xN?s*82ps)w9MDjRf+AZ_RZ
zxTz=F3z9O}@*6uKGZgMpa_D%tBM|{2N9~6Xj|PN#3o8rMS(v|g8d_rN5)oLCS3wM8
zzb$!V-XR>YGW<<Du$O(<@t<iuqQCJfVt-tv9B}=B_D_{E5&mbJEBiffFYI(%f6K18
zh9!lD5K#-k_ov3yHM#xxXPh4Xn7_d@FV^&|$5;K10OPLF4aGgXb*+icpB?{(3Uem}
z<}3jp@iUV6z}My#*NIDn#CQs>td!9>p;futB;%WU?diFLIQHp%&@JsO_F|IuQZ<X(
zBxKscW=TG`ydzspFz5)h)(469{JJ^Q9%vi4zqXxApC~xhbwJCg*89>XWYre6NyKd3
z3Pcz7IBK|@z;P>CU$8dt{>fG%S`6vI2WH!kOs^NvHdxwGqI+`dZ98b*xr63DV$Wgx
zSEZ!qJwp**%7_M<S0H|J;nltSe}lv9e}lt4W)zd<Ct}iFCh0o0b*VxNye<|Cvha3r
zbE!oVERPtUpfgos5Ig-K@tvey=d}D!z)W%dfp6ymA>P&ld<$sVAzwpcrg*K&#>;*U
zN-Hlgdb4w3-WzQV-4zQ>&UG^8m7hY%eG$>|R?qpLny*9Q(Iqms?gqSJc`(Mi^qfT8
zs8ZwXuJVA@Mub#%w!ByujOq3*p|!!7A^$}6%HcK|uim3*`!%_R>kQrE(q*q|7gZ!F
zh2%<e-N_(fnRTN|sEN=e);Q!{4CpcszPWXpeC=3C-1OoLxoTJ()lY;y!f*y(LtieL
zq=aM`MijToH8sH7qVPG)Z7kRIGfo(h<Lupel8%d>-fgIbl7#_1Ewjbr0aPN4p=hJF
z^yBcRCq}H6l&S8sYo#|I<h`vd_B(<FiG6VZ+y%#*`(oF7BIlS75ATD>8@OmHksDKE
zXX*ugSEi_`wMb8yB6%G9!W_`uaoQjHuPVWg1irhGj9J%SI<ON6oS7M`n_Am?bcOH8
z-KbFDurh<GrTJr4kMp&S&HH2R^df4~AMq|$ug5%ICl66wh`Xw;AaM4M>bbI-dA(!T
z5wrJrZ?y?Nu=MyJb1QVW=I}2$Y0apIxRedkTYM)jxMVJedtC*6S$a`y0rO<~8G@(G
z@XIIckW14Refw5T>@V&M9aW%Vh&|3oq{#rpF__bxi_mMRjr0yd--VmDhn$)uCgSuQ
zleIKTXJn)0y8%(1k!DO(ndo7K6$j}m`5^9zV*myA%!V$uO96jXHwn7XPs&hIf^k)i
z_NuL8)+o$1dc(W(T~5QqU0t~Wu{it-D^4})2FACJShe6_|7Yr~8#OW%`C_fHl#AY}
zZV2)M{AzL~_HAwn0McNRo@8JSiP`D@iAemhXSD_82w2)WWsCQ2G~kl<6IB>}b@U6z
z{`3>~65J_aGL35#*&~*Q|1={^zX-zVMv-Tu@b5Ws&wo5ratYler>+#tjih+^tL6-~
z;$lnAZT>XIQ4NDpO_F^2klSTU8BTWtAbLifJZ4suMjs--QwF77R3;hVIT!uz-gVWZ
z1OE!+w}h&mGY!nW?c5qSiEQ|)g_dy#cd9aekS7`B-JK1KBskO1Kmv-xH02(%M;CN0
zZAhi?GgB!<gw6Nmm;-aq#A_OK_!m2Ab8htu5~!Q)(BD8YJ=#YcoC{-hBT;&hpJW7I
z^baNTEA?F-_qz$mODuTH!6@<PD8-;??)h?B^nw;STBX<Z8S)1%#HQMs^*2{scacXA
zFqu`>8-fcYcWA!gK%>z#AQ7G%6C1tmno?V3W2v!jybt*ru;szr`Om_|LcaM|zJC1@
z-_pTKsr>lv-2<7ZN6*rqe~oqLnE%3|=XG8e-ksW{EXSUI6)(zTqCVk`%z&F5`jyB2
z>K>x4cQJ*ngo??EE<3YpV8fMz`;03>RFTN^vBBC=;(WY;8Y_nVMFNT1lb8yWeoCCn
zI=R5xTUgc5QgSgf-hlkPvH`h!wL0CeNn#m@%sFa07y4^b#Vq!{ytW}XcguSPtd2)U
zU^hcKdEc$)RnFxt0YyXN-b&qpf=3y36zCJ9;XQt<5iVG0C96;UiCD({_SyRu%3VpT
z*vSY8C{BmG5M))ek+>lDzZl~9J6WPxG8GL!mQWB^<-UQAc$bxd*Ar^XFdtf6PG;$*
zmR&@PrL^a5x4Irwl@-w>^b{BVH|7OgABb+{h5gsTfcuf({2cvfrSEUZ-C|XTIeV-#
z=d{ATd9MUsz?P?%_H1a@_Pg#2w&7)1Fqo2tDCeefr{5|W_7lt@X}f4>yPmjUutyKN
z;~X>%3HBcNDgxFD%x^LY{9U?YxZ*Zg%*Rw%gs1Z{36Vj3^cs$J$Di(;PkCikMK(?E
zg|3<y+H=9&UF0lVzromB$RbZ%s8Z09ES6}#pm~t!0d<~yBEkGV#=X;=sYyX1ej|nb
zf5F3~0I$8;9?%v!T(Rc*V|8}SX`6SGQvuE9`jZ!Q=DqwTMoL9oa3FEmU8Dy{(9d#W
zUKZCU5LY%Q){;;WAl0oE<VnX}^`bG;OYn+2E8y<j0<uSy&Ezx<4TQ<q$J1L@IPN7!
z?88%+VZ!_9DepJ$rh6d*&J3H580d|LhHtmuoQ7VCNx(B6`<DCj56@WVf<LDg{6pXw
z8kOf`i@KtBzPJpj74y^VE*-8@BvxZOtJWg+54wwQPZhk4I?paAz?v%3kYr7A*lPjW
zI?+&L_V>Q%Silxv6H^U<s2a(C%uuDlpX%Mvt7-`Oc$lW|oFvv`p4I}$;s?%!YQimn
zBPv7&b1#6wgjow7%e8(Plb~J&IY@L&ryE^OHq@x?(Jf-Wm63)=USE=$gSwj+JVN{!
zO83?4i0_l&VhiX?GV~5)E6mAYwE{0(j?x{K{O<APlzyoXBx@=MqQEst$RPGRPZp#K
z_tzv(=>ctA#Rj@N<%2&E3k585R#fXV9Z)xZUkU9UaV(+zAvO~E7j+C2j5zA}dlFl3
z;>ej~?6wW~{DBRDhY!n@0e4J(HK|rk>fxODN%5XymF>jy;n7BP(3WxXVQ-BP=YSa!
z0LfIwzdQ7F__$aXA=k3Pe+~6czlv34o{&!u6z9;}9c}CSU)40I2UTi{NA8w9SLF;q
z+%cpx<)2I8e4F<wv2E&78<(#{Z=op9jSL1(?7ChX1DYi^8{DT<&PS>{!_QT$Es1<`
z|Ne9Kt?1+#?v?Tgc|kv&9i+a|qqAurdhN-&)uUHpZywQDqXE^18-J#JHE0;&kyEBo
znS#}y(>~(O5yz&$^@+YRt=qxX&9_5bN^~*zj^%IzzV((RVn7K!0wInY?RL$AQrG|~
zfxE|ql8<U<27!EM{`h7`q!mCLRmjnk3s}Dn=x-c38S5O?^zCc?K-45S8#Gi7-t|rH
z{92Lr>G0@8Rh_^feU&;Z$+y=ut2=Od9z8a$nWtjR>jNT?bs%C>k^)c{EM&{0u50mh
zlJo)>m#I{E02TYgxMN!m%&{XguNt-|MwY{o$NwUeH~&Q@BZyy}XbL(%pp!Y%V5co;
z*+zd6I)DEtkRW~b(k8rsYgR$#i^<{JG}UY0)eIhL@*Xs}OJA20F%>iv5~(;Ce(+V>
zN_`0F`ZD(76&6(DOoWAKh*=55lQ+N^WK(aTQa|hCMcw>h{%K830HDbhaOj?um!h>b
z_r>gN$ZHieqa<CaG1_h3<0H9qfAGVVSH$-j=Gj>`0AthwdN^-Q%z`<?!kz0sU~8)M
zQh>dT8j>;HJQvQ^(W@6%4to7tTWKC5n4;#n%Lg`IeRZFz>#Q)BsYdnqUAG)DVuE?O
z)ib7ruD`YcF0^Ls@!IC9NaWd*LR`;c$~u@cs`Th2PoWo;Xij-X7V8LULrFSIuzrEi
z?!Zr&h>XoMPCM~+4B#nGJ1TShA>HX%hCgGR5kE&z>aiE4&Bj0YpslLKmn(w20cz7j
zA<IRQVJ&c9IMgY<UtV#Fb0hxKrKbjoZ4Yagdn_sZ;L4P46T9z_sn~a5=?I7$QNCf}
z1fcT5_5lDE<K9>VW$h;_JSF!(7{NcB$`>LZC@R@Z%!m2E+OX4Nx@F%*3?`ZpB&;u*
ze3Ha2S0snjByum4jkaA5@=HLT4fJio{tu_T>xA&%tI2}cjZ7cl?q<w&Ba!}WzHaoJ
zm<~l?K<cMx{9dwjulLK<r;~J`D8{VFB)gp}p^=;zl^w1TV;Z1?B!ghLop0UmXp&~F
zz@hLHJA+6<s#^!M>8F6F&t84A_J&n{HZ>CR;`JCQyERX>!&8M`>krs`vC?tuDV^*@
zjtm_N#~f3<90qjr%2c0GN*(S5*5L#PL$S6yV)0K;nG^rV18E+q3zWIYhOC~9Cadq?
z2Z>mPdB}!39W&3R>5D$|UXAd}F@EFp<D+Dbt29((W0k1@*dET=k=^Oxe;yG1bTH;M
z8|Lm^#_kZtdH>7^wus8PH#NIY-}v^C>ZD}wFgdVQA9`tW>MyH94saZczMyhMxLB3b
z!_5u2koIMZAC^~ilTrgPYM7YYV(&9NR18MmBI%2p9vorDdE{7;$1&~DyDN#?+I%#X
z=xr-BU}32WT{A}BmQJ)6FFC?;z&wgsCd{@*QwLJMY8-oppjYgN*PEfqHX!E<EfxD%
z82nK`LcelSNMB_DCiV8GXJ*-6QI=uEK5Z;!Si99~6QKgBkW9r!_vf>`0zLf&xvdD!
zFM#n^9>m&Cq3Da{wkF57S$MEkut+=D$$TRAT|KaxfXp{NNJU|dTe@qWzxqAuyM-I|
zNxbGV{?nMWAsyfq!#?gbCR5+OgjWkUJ&LaFxa?(EaH%0IvBtXKTRTgx1p{IE8H5^N
zge*8Mt$Xf23;+D^<L(!lL^W)lY>P5}rHvv_WhNeJ=9R{gy{Sq3@hQ;@#6eWD&yMl3
zLmfq(+HT42H{B*)wv)~7FZ5+6BVh+fb1<+x5S7pzoCAAG$ba$Nn4MQqk3=L0$E;dn
zu2Z;ZZg(NOlcFEB+k~eoA@cmbm>xjTrEa!s6lfov(+C3u0+&79ISV*t?-cJNX051C
z?&;AQGY9YvjX_u)6~qYZ2`}VQhRCT#2IciaR%)KPQAm|BeLC=Ls3-=FhK%JEJq!cE
z38L?SvoG5Q6OT9sHW&&l(<R$?{kd_7(vvv7MZXm+@Izg$)A%jZX(S>vX=q-v{F7QI
zsPPkNOpCTaSmRM<$@BVFAu}qoZj#{Rhm{9!reqdrq+}VWq~xMmC{P6wvNVVLU$ZPm
zBu?4nu!;#)=A%^XN2hg;G{Viu3%^<&4zmsYOLW+WiG6s9+7cL?yW1J{ayz$F)S9L1
zhusK-7bcuJ`}5zIq^>L!LOc~Zpk+!F$!|!B=1cXTR<w2|o&UCNyv>|$mIhI%SSszl
zg&mo#r!RgM%n9z&6Vlkn*v}<D4sboNq%W>gvdc-V#T4il>7h4^Ri%;9mj?ChCvoK%
zcB@~^(f17pt$wH=Vyhk2PmSLA#o07#dO9~68hw5T<mgnG%6c}PFaU0OOmDXMbISt<
zcUSiQ62byBSg)k@xxkBTi<aS!q-Y;8<aoN*=5a^r4gQs=)u8N)xS0emj+&>+jvF}b
zJ^a^!pnv;kJmFLSq7wcVQOVGv-(>RAzmv(qt)H8E9)4?2civ3-;*lA8@#?g8aFk^%
zxJiLgJgpX-^x)YsiL*ew<k>~>m2>iv&YzoP=tLI>ta|w`PCLIKvE6qt-DBygNTVzy
zbu^)oT*n@+?vP`-BB-NW8CU7rv*Un2KaIWWnXm{KB%tacd-yUSljVa6j5r~W(niEc
zLR-e=fzea2e#w?aat6-o{g9VPN(x&*ZwE&DZnxi;L}7B~pVdhf;!flL!IJC4FrzvZ
zfDorG<jMfl-#7TdB7#|H_bL1@2${gU5DC*r+<vqI(u(#g#~r)$s*a;mPxzCuM#m=(
z+6P#iMRjRvd+r~#ya5Sq7aBZ%#`dA{QCF;b*p)9IM}85H_QRKC^dDY&#&j{RP0B^h
zLU%FitTe;XB#{vcCRFz@kvhtm@#DighEh(Fc^q%YtwW4jnMLRGUMD5|rjA#_mTqRt
z%#|NKS9Q(XWM_qS3%4UfBh^7%AxSHb>4kga0K&tdN7<lmhka2@gcGwF-><L+x6U)Y
zg10lO^>mGE9193)2a$uhc?0yw|6-2AF$9;tEW89E5KhR-U5ZOM-uS-eL7ewk9&P0x
zc;f-o@hgz!1a4rls*+{5Wtd~yj{|TsJ25P6CCmiRxfjrtTOJbw|0p1HrYd7lE3Eu^
zbM~7G?J(0!=L`Ads}K7Vg2ybciCC~w3LPbmb`}~wUG`-SoQMh2RFv+%6~-@><0puC
z6?BCSN;hqp`e1+LlI@gVE{!jH45=bR$CJT6=&`1ubfqIa$e&33AR*wUL(eQrE$Cru
zEn+vFZNV?8nEKa6Z%nLYSJd|{2CgF}g%9SHabf_`b2a}B4IkC#Y-d#W4qEJ^c0r9w
z*5t!aB8Q22ZLS*|e<cetDh~>!-}rF%i^1d%kW6j<O<iY#&=?ANtoUqY_d#n=@z=*J
zrNH|wL+hgZPTQ<V@T7%(k^kk`sxcv%iM*85=PSn`-QiW4FiL^gl90geZWVrwjix;(
zvJP6c0@%@kD_t75AN}nD;HCL;tC^A0<WNG<K?oBG<efHKyOoCM7Vi&O2JOx<wf5i=
zxe(K3v4)%lD8VRm*_1^!Cz=N>zAUTtaexYqCBQ*c`eFa*T?SK2O8FSx9HaUQ@e*Z;
z;~Nc~zrGgTuTL{__7LvJ(Z;D|@S<~c6R%og6Ox^<7um!=o;MgS6UyP=Mmj7=nfS$A
zFOn^nCRerJk=^}46_damA{tL&shiCA?sE|P<?hzxv@wsKhroksiOFK?u(e<N9@hnR
zO+8pap<7w*6%)Dix)LB<Cc$#br8kGwCc`GxdYJo7YteZaJ~ka*naQ1mO?C<SixTJ3
zCnK0PULOv^w)raD$sSJ4yhM4SP|xs=$L$xT%C3`>8ckyjnn%&47Q%U|p+>9p$41>#
z0Xvvdn*3RcSC$$oJS}S4>gW1&g4b}I@u`&qG<eFUSVfQ?K1l6p2NP7?)+J+r%|eCe
z4|OU*(S2(94XIh3Enb_ojz~S<2*kjw7|`HVF=isi9k6B2IAKoBnS(tXLjPZXewN^i
z8f1tdJF7m2e`6!^WFzK(8JGFJ-2ygrM8)#}|L3pk7ZdtTADW+(?$v!OIYYpJch!VQ
zNAeP92&krZy+-(XOQlShTzPUp?7s2`Rmmh=Vtdy~XMg2Os-D*ev2)uh)r!h>Osgup
z91rMNo=j^dM_4mehkw*4S|t37D&{J4u=_Wv_*C`ll)PPF{c#^>ISe$FpWM+Rj=(J4
zTWF{lzy%Z%YxNeMpN<xA^?>uxsI3DWy8*;-J2+p#s!efw=U7XlJI?PbfbB+kfN-L!
zh!V?Jp%XgUXwH<hQQ}*_PBu=&T(k?A(y{gnCx2tb@rY9W{E!dmfm5%qkKO6FUl<>!
z)gV;s^*C)W{8v{&=o<VL2d!AMoT@#e-SX2nk8I~}U}X7S2T7h6T-ZMEnd$2shG)#4
z$k20h3vk6Slq{)WY9~=Zb$RGv^4|Z*eK|<F8tb=^7M%$c3;MN<(Xz3$&(Z^}1o84?
z1Z#ELSnx_w^#^POg{I$_65TgX9kYcbF}4C`yz*x`k`+;3sRqa4`j0s}bTKZzU8&Iq
z8l$nuF31gK{BNT~F|4)vhn%<Mj^C?Zem(h>`pXfC7x-QQibR*Y24jxvwTu}vvm6SC
zl^tolsd#F`o(vZIi9{O9H}_ugHuqvviSKqs@Am9DfH@dEQ4CA0DTQ_3AKPalpIf3_
zU2$r6HZkuRwNeN`><D7&{f@N9yuZz%+lb-5_U|W0#LP>*e`RCG)WB^hp5lqi&?w4T
ze@Lu2*>~nEu6wJ(>6fQte`V&*%M)a%p;FVd&kE8(>8RAy=%rbUe8A}?;v()6A@vlI
z^j|f@=^M^*Zg`C6q5W>@kOiQIEd`6Gay&(MZol+*<$gFf?3TifK4LYsI7ku0Fs5Hh
z%WzG0(R~<5V+MBKlhn5qaJ{#Dt3>nU=$)2f#dBLv!#g$$?CSDU!fG>dR|C2a@eJA2
zj$Q2H`r^Lq-+MgXp${hQO}1&}*Q<y0IA0#{VhoC3BqSjQdS6<6Vz_Z7o&!>OP57sX
zWDnHpe1g{|PHWP(<&h)G!e<!wl-ZT0yGsiVRddISiO37w&kugGua45VlVv#eX?=Fm
z7Z>*8X?{^MGbC_afmzAW?pU2oj{@Jr4shX@!}ZFfKuYp&*>4tv_&JI)UA+RPQpn|;
zWPu;p{@rT|+<L`(WuLpQ1TNEuBl29^8C3_wWIJG!efhdJr@lnvz!IZ}(*`&ZZDRp$
zcs@Gc6Z)s&c^z325OZvJ2>5?CY|$dp2&g)GS4F5qtGs(rW?e-G9st1~I7a9Y_KIFm
zWH3`nmUes@ApNL%=<Ew8Cl`UR;dfy|`?yZHJL3a(O4U}VcJ(cY?V9YDUwHmSHia@5
z3oZPwLodYVNc9@CrN;F#+Zr7}!&K0=-}<l<ZAnZ`my4*FUPD{*>JNiD3gOxz1VAs;
zN#1ogPb6GgI_+tYPNah(rb8D;3A)0cVa@cjOtMZ*5GA`u+bH@#v`Q=ohodIeibe=H
z_6*yhR)GM%p6u_h(I|_M(AkZ~xTQ{wf+7J76OvqcCoi>+^g{eIUCEX8jv~_KXv3`B
z^Qk5Uy}fg<$R89h%mVi>u1NMcZ&+N5@B&Vr2=D$+(FkmX#L5%^=#mko$Rv%Zx|=Dx
zAkYNPVB(KKw@>R{O<di}+8Rep`3FvTlIa_89eg%ry0F2DcSU*TN>@LD_ZFww6(V?t
zaK%}15AG`?LRKPH&bdp;QKp)oOO7H2NYumzljc?nxV|<gA_i|Skb2%3B(T4e96;Ns
zLqDvNOSAOsj;O<gqVt;ECGwm2qT{Ugngu;iaId_lg6sibbL@;ZqGea^R~yue-L%JM
z*<tO(I$oDMMXlCLJR~1R*?@7n3Kr6gQe2|DfHIp+i?d-dv_31c>vG3R51>qa0dk#>
zZ9N0U&Bu)D56BR`TEgiYBOUmp;2&pz;l<3Ih#Ym_-YImcy&>DY{wOO45z>H`5`9_R
zgC3HBdGtkz!s6D7fnFc@uf*x_ZJRgG+Cm%LeU{&v(!F&pl*}nQKf5(M@UnxkE}tiZ
zw;DZwnni7J_B%Jq_7|cjjtF|6hczBIR|+5$6lc||{J1|r?&Q#+Ii0~o4qz&mC(^V%
zP68G0C9E1hoT=L|c8}V|whFpkHSnDQz)#fLI#`ptGm(M$bm4{=k62}Do5AF-BOF<R
zUYKpY_m>Y~3kUoBqRb2VuQMO_BmeVr5R(l``;}!_1D5b^1~3H23$UwM1nB$v)sMCF
zOeU<~K<_KPgf*8w^lin8F$R8#SfIt^d+MrmPux3l*u~ld#VBPiVUl<c{MBV@#YR`_
zm}}m*-))+ce2TDZoX1<X%?t6ibXudNpEPYP4jARG78Y^CkK+E0>ueGkbNaR0;BT4*
z>oL}cz<wqaS#<%YQq5F{@93IWsG1tK6#{5NN8WbON2h|=SAeeWhuREcjgK1dy`R2}
zMOVh3qZiOy7eFR{-v-+rMu%HfRtyL(EodF_0KL7{g)7Z@csmU*QM#bsq+i-?D@{f0
zU{<gy8OCLUSA(m>P<W9yVh_X~jjkHUOpb%Q4uD_nzV-|}@mf2+&_;{)nFZM#z>|Po
zbn>LQvg7CfC!Y>{z{Awei2+qe?tI2DPPo{9#9gSPq!r%G&yW0u4-z@f)~mhGB6+vo
zSI0db7#903=F+ndk#g~F;A{97g?yRKBu2>dvX>BPOlfcvTdod`enJIF`ox{Vy&r9?
zob+kWUx<%A2pv(OJMUIEO4zViYA^Ba8h%}Tl_Ce?=>xvqs$Ij95|-RpSk9d_EjGv6
z^*!Z&!RRtv456q{-!hIZI2=ru39kkU+IxVx98Ln<(14Z3YMq?V9fMTR)xkhPrc&Vp
zG^!;LQ`doEDTFNgws}h#4VIbBk+DG6zTTxEwj`tCx>)iGeXq+fPKZBOdg^5JweYRO
z#4h@qSY0=q3lAIOCLQOc;$hpj#9Sq6bQ?eAkK=kz`QuL<mY5f3uU(m3yo!u2kk}uR
zuG|p<Zs14NR9Q4Gnl+;9^DYN2Q_#PrqYg&2VAqW8kAFmizt?8esi*a@_LSXDJbswr
z!W+Uu)qbsEv@{W6F*v_*7_As;jxz=$J~NvyX<jreA?^B{C!7ysc!~2`rBr3?%^K)4
z_H2-nh1tKY9Kru;ewI2aXgS`3mH5ML+RMY-&jd6ZQPwP@7U?}3pr7#@J)Sj3X=<~6
z(YqMkOgfsJ^NjT>RDhuCz+2S5d)^yk=c|m4rYkm6=-^SP72=_O?6wHV%D-vjIHc{T
zC!Ap3LFv=bxICpEZU@AG-YFuz`oT4YhP)VQ?6y0~t0HB~1IY2?aNQtF$Vz>RDGqqu
zT7|5h0P@nRN^kB(29J8k@YI*~_$4?1UVnH-SMj^p*6qyF<5>E;|LEzxnhcuwERlG&
zey?&%*&$$SBanR6n{vt_NSCrC`DtFQrE6OSA*FXNzk%zZVifh?(%^jT;BEnzBjYz}
zpQ8IqTL`_Iak`Y>6i5^7dy1H)bsv0gwGqI(Zk8pa-)cEgBC*Z<k;=na`G+DC0)#pK
z1sh9YU!b0pTTCpje15zm*;Ti+F8ujtTvv_H=3)Ll-D83AIl7Y?6z<*v?CzsUJeR9;
z^6$*Kn>Hb9^$7>!{R0-o)|`!`u>t3f5+5H2hXlzTvgPxYKjtnjU<kj*bKND44r*8s
zlu88D39?V_LHfJvF4fP!MtA`W_Vm~(0hH+2e^kIal2lRj0WxR~_L~%$^0pIy9~wlb
z(q^O`aw4_)-GQm8Bt=2B(&c{q5s(L8#VaB*9b=)owfo?}!I=wdm580|i_D~T;Z5=R
zc3u}tI=#3Ro8L!^<;BEZ>45k~jk#O~Yb7La_en>Lc;B@|p+B8B{Ku6dxTvy2OJRwn
z%_k{9GR*0<#MKz&%VJzQdKAOifu7gSN64ht#D$I%dOwM%D58SG!nD`3O8(O?e1!80
zQ^$M_B*L#xf+uF$b+?|dzH*j$Pq=c+zrV`2V7@!uVUHj5`Bs1NJXHRKhV(<r^v^%L
z^pja4P4$AkvS|8#*u+XnYtj>pXNarfLO5yMx|y(olpLDuKd|KcXO9W`ZTQ0s9#UC|
z-yU#rWLJ24`LS+;<s}gprlQn^63BOf#KQI~hp@uipBO0LFe0@h2d-**j$FEn*0$M6
z+1t^=xrRzEq0gc~Wh8{D*X>D&vgO?@6_&-`ygYyHy8G=ZxlEFw>ogniC0?@Cne;OI
z_I4;Bk(HKaT?fI<iy(aQqai&TQrniDX0N6g<Ce?00%Lb3Dlq}NZurH*<@IBg-&JJ`
zy;|=U$3L>Pf@MATTf&Wss~9X)S?##X2UaP!Xx-247Q!EmJxF*Fx4Sd9GWvB@Xa25Y
z$4`TomQTKlWM^dibv^st=CfH51hQc`JgI`_RcrlUB=RN7m%fK2H_vrH-VdnOxi9%!
zHZ0P@yANFBHW(F$PgD}ec-%XqL-)enhsr;v6)<xhXZzU*#J5glHLzk!N~OXh`r|>a
zf%I&;3v*%cqFzXx`QZ+Zk&id4N0*#zj$pxC?R)%N>r~e*P$kp6YD}6rHZcpYpj(*&
z(`X~F?Rl-?1Bwh_YyvVDRGdgDt}&Hi`noj(Y%Sn~<lVE!p~f`Y6Aj4Y-nfc+u{v#q
zxX|k-FP3MI5hn|M(U9clE5F<KCUrQL#jt@z<bPe<fg&rT@d7`|x827F>0@?{3j19<
z>(@Cuy#n)4RvQ<#C)e}k#KsinSqv#%4!6>cIcJ?}9_~M=NeiJ#MRuL72O8mPHK{~E
zBW&6nr^_K|@*`CskDrovHAO1#Rwu9sKYF599Y+OSdyV&(L|D}_j_Lo#8bjMzq>Ow1
zhK)~@{sbG#!;}GRtR46obM#t<f4RK)R5ei8{&4}Ap6)z8^}nlB5PXFr23Cy=Zo1{5
zPZMU^AYs-*|82Cj(hi-)D!(F&g}nSr8(fS4J`6gMV_}dd<67S3by)jolo@zpGH~8A
z5wUZB1?%7^>}x1emGF<~cpN6o^Vk}P3EQ;dFyUPZ%7HcIU6L`Fx{lpm^#>;|0orf(
zI#0d9hwS0WY(N|wU5x>wq05fOu)_i_bbVUrtN3=-p6JqF`b2SWJ}BI;{!d0&^EV^B
zrL5M5V360edo2t=A#Enly#s6*r0ItA0pF=R*cZ5ivlJB`NkjKBk1+XA!f}%%8$(c2
z@h=Q`tYr1qI5IkRxRYBtL}I2V;Vx{UrN&@WZNNk3QSRFmXj~C;6>vqbS`t$(rCtsU
zJwWv02w}na_<3^Ak&f-1;|?4l{1oZB4&TO-CW}tK#93SVyeFP4zxV*$IIzXBx=zQs
zS$bvzE#34uF3e~v7uEQ6k^r>=by+MfZJMO5Un_8We?;Q$k7({*zCSicNPf!WPied<
zM0cJK4%9|I{`8=|v&QP*UBW&>Trl=3uGXD_p18n$>&dw>fA(y0;)UT6SQoYnxV*o(
z#!1q_C!3;dr)l(vzUah#3b4Ggcl=dr^L>)n*%iD?&u>-^7tnCXOCt7MYJKB-yh$<>
zv1JjvzMDs4)CQ67Bh|A7dg*=dqN+cz^C7azNY_E@_0<Moor<lhn-h`rK?;cd&YL8s
z4-P0?1Xt7qWU$^5F@DZWbmNd?{=Z3KQ(qSc^$v9y_}C5L_HLaMSaB{(IKDV3yrKfw
zi5D2$wvsx~obqNpFy4UqjKJo*@tTzmX8%(!T($R){C5ET*lnMtkLkyEUC(rq$H|4a
zqg)^7yt|>W27qA;+WfSC0mDEp%pE4+wa$nA+b-<o5A4F|h1MwjEkdc@yQjdgyxB<j
z0sw}Ib73}lww}>gw=Ql+Npdnnxe>pD@;%h|4G3})*0z}&NYfD4iSp8|?lTrQ47(`g
zkGu<4nD2cRsvJhW)ii;2aBs{)0k?3BqEdU?+INnHqz&9Q5+H@BD$@`BRYRaaS%Fm%
zp*>b4L)TS;*!qFOAZ%^d*q+SY;=`)(I&@4#>T~$s$|EA<cO~tifYN50*@0Kw1W~=Y
z)!9G_2pu$X&3YZ`k)-`XGfz$4cCWS<<-vI>#@)}uX7(JR?jFoyBFg~m4h&cmEB~m$
z|4J)T!cSoTr=h^r6xW;c#`DA9OwBGBXPkH!$hQ-B2t~>0*FTd%-PN&8cIdt>i$7Bs
zz|x>J%}Py);s+f!Uydr*IMf-ncIUFsQ=a!*LKmCx6E;FT9fuLgsumB<p6fA%-%FlX
zR=)H4J<4ldGN>Q2iBqyvqp#G=LDH%1q!kS=lACV(uky;*#W5}1vNlIjL2mZo>&55V
zD5H6BTg^Te=3PUFKiV-ey>o&Sg~QUMoTuCLOLlXJYB#G~PF|oX(fWc#Ch`t*$OiCT
z0*+y>&0XFXJvhg(h0x3v{9w5Qrn$jC_0IKZ#Qa;>M9hET!a6uyc%$KnI6OV&LP-`j
zU!CwA)^V8`udWa9*!}|GV`U;6^3yU6gYGza2^O3^D7};NTdqI|zD7savXgdB;feoh
z#ChC$Uu*@pp}?BE{!E?2Fj?0>!r<dr;cxopsn2a*hO*g=zOZPm5MFGXJYI5-9M*qs
zw7Vs$l`%NEHo|G%ha2e&kX#dZ$~wZ^^NH{l&<cw`5wz1!bQGi1j(NY~;ngc~cH&9&
zn&57EcnaroC!kSW6=bRMIzh5+t>H1|YswEL6if8B7T;c8zEKG`E_!>u<vQ5CSGQ^$
zB?;z)(z6o);en&}3yAlnr$WG}F4*^0){DdF3zb`uzm3A#w~SHGWCL!p^780Y4ZO@u
z2$e(}u4U59mQp~nAUlsRMHam6V7mf9(f-DSkAEEwG@P~$!19Z*rS@G?Z$Tuvq}o;G
z&<9|o;s)=p;d@kc`}BB>$F+gv@#CZa*al+Uog3!?z4wyOm9aNQi)W}+#*`@5x`m_*
zFCGCl*n-kASvzs**4Tt0dv~LxNC9UyNM~7JqiO!=)E$yvE^zrF5h;~oclB+n*;AqL
z;yRP|ea&2-nx!s42gh^q1_~GvZyn~9qn+yhhYr3mz#TsJva9&1yLo2&))8t;;TfWk
zQ$C>rl?=7u`r&_^fW3Z@jH1MT+W@H>W8?<?CW5$Rx6S=5gG#MVb5P?4$n;0*wy0A^
zLUL<qdO|GbKJxf!`+r-7i$*CVZr8@qoufX2ZAFO{>dYxj?$}3F&L@EVmtcw)RbxBg
zFi=CaAT0ZAIHTnVJ)7RNC3fKOlx?YTxGD;=z~+mtAv@g|mx}};fx_J6e`LL0wQEL=
zHv+Y=>EYxa^kT&#V18$2dAu(Hw=l<d%@B9d5)ZVSf1P@EN?2hn&=j=9;Jdf-Y)kE`
z<d9p@+~#{aUhtHkCqtr>B~iU-jG0a2O>6W^DNBcrH<LzlN(7u_4D|Cqs)AH>zQoSO
zAUmp{6RwQ6T2tYv4|O?PTIt_{=qKzpG38v>ZAh+*yjby2R6XXXXm?4Juj*{~P!z!+
z>{gSzy>zzLeR@5HW>ee(1*WD+`i5wX$a{d6Y#9FHDQ?$cqD>v*mcn{ZSrgT#uF(Gv
z`kNp9P4CM`NQ!9T8w|D<gIk3&2@)eM<Q8{oxGag1v9(H7bOw)h?E0{>ZoDfa*MEFD
z8knMLe9YUwg8Re*1?;S0H_!NKBi#}=Dylf#pZL?Lh>q}cOJdm2<O_X)N4B&NK0yEF
z4c?><O0373gYUgA^`*4+sj_b5Y^U8R_u?RnA6kDel@At}{2TF$a8AD?b<P4I8pm$=
z+ZSBu`y?NJlH6m?;EM0_IR=vWSRl{uZ7O`$2FLo!X+++zKxDY~LGuzQVn5+E?^iDl
z^g?@F=1$60m(wbI3N|*Rz$Ox44E}DI1(~2HGQ(voVi{|=fT4JO-eMM64@1sP6|P_a
zQ4Ji-Ktp3<jA`BS$byJ81C6_GcneU@Y(5eZo2Lo<5pA_DVYFk9qs*397P@N=tb!8y
zQRI(PA=smPkD@gaR+a@no#`^|VT29uM1!q4nr;cR<$e&F%DP^NzVkHG^xaYBrid$i
zlVVH_b|{~N_0rC?-)CmoW$=W55G;-S$rpU#FJJHnW$%0;7-~r?`Y-(VC4x06f7R1D
z?1H<vu5Xv0S<y#tw}@)xleIXR>xnDR)L}F$Vc6A6#tw74m99STeec~ZaaJEWJE8r#
zdX@6oU&`QYVc1Bb?d^KR``)dYm4x@moA!`ev3;aNMb5$sO64BpQ(iBPyf?GVO4(;r
z5ReAjy4M49m7%&jwe@WdbGuN*NBH&EQI9de9R`*$tUX_uvIl`#aLVC4!6HXUGVRDW
zn4@YZ!ryf7*T$PFxQPYmf?fFV6711)?l1P&U+@Msd&Ms<EDWq?a)cup%`umkzCuA;
zre!=F`ZWjW+G&a?eu-fRE*U3C{njG=e;!&mFw)(^?;xwWQi&I~mF5ZAw6bd*8vnqs
z$n^DzqxJwB-z*<fx3cMH&rcNejxzN;T;&0z*_e)j4lVe60oSFUGF&kb)i3;)3ZW$E
zST92!<2Hrcon0KN>{xG&B{|{RWp#eae3QH<=tal<)wLxsm{6d|Tg*Npfb`^_K&8}&
zG`uyktlS69<6X2_&sN6i8)<hzxPRpN@&4p1#e^J~{<|djF*-WkH1YJ8T!^1U3j4+K
znONZl=iz`op#JZk^HX!UvFzS<vTA&(ub&?0R-W4>FpRvDacW<M2~nr2S3pjPS-!RN
z=00f8Tu~qUPj2^lQ05{xmH>a9k!0{yc&_0>vc1U7lVj-c^!QFR@2bu5_CHzza_SG>
zojG23c2sP{QV_YCH#N3ErhR>A(hT<GI9AFTGpDe~@^g!;>amlezA~~`!!xAbz)cRo
zy4LID*t>!4ECRm%6Nf<mrBbhe>A7%|S|s>K(6rh+*KWb)M67FCj3et33uB$qJr$7*
z1-%iMY}`<@9)*Bt?_QWCJLW8H)O>lBg~g(@QCdA>>6;MabcmR=<){7{O9Z_G&NKD0
z8pVJ-ZUmbaikqvIbS@NrFGSLRwmal};IuiUI$l4o;|g6E0CFOI8orDgXT`&vxzoYf
zPG%vP+(enJdYMCMtH6F4fQQ{pt(x5$@WCSQ9_;MxIk+kp$5XI~hm@i)ov1|HU2HrI
z*k?`&-3fz#)Xi?ZP(&q3T^qqky|f#Q>i7{t;_!OyyJq{Uz=UPJQSR{5%XwU!XG~q9
zhj=oy4C?K)0HMoBl2@&OZhdjgT25umyX@1|>9Bhu*MAS+mgRm*L5i2L{+WL#bVaHd
zp5gSto%qq7rH4(Sy&aCdN5rKvyO*`#wE>WK?k^gh<_9Rcvs-#$-Wp=xe-7*m<{dni
zM~1>jICB^Gz$(_eexcqBD^t>!V{S1^{Subf&0C$N1>Msqd}=mdrGPL@{PwwM!)UPq
z`?=k=cr#0mA3qq7RaNh1j{Xt`FIDpMC4!|N{HID)gB`F9*T(~NgSY?>)}Ic3z2JH<
zummJswgi-t3%jFu;=cv;FLB49=)%aKD#;5<KhaCN0!DMA)mKU%%zAlU6$)vbTU58B
zY$tE$-rjrM<1|Ot?k32#_r{pxU}Hb>gt0%ddm~%!P4{Zg<E$iFm>T2&_^Wc5(g|Pd
zDc)TsNPPxpzs85RWKKEv5w%3xiqFB8=BB&ENym~g8jrqzM5eI^>$AZ(XGLT}20tl8
zIkOfjw0HsXHj9RGirg@ak#%OA13SX0G34NYaf9m%Vw{}L3t+^2#$##QKH0scYN)c(
zxx9VFSLCXKtI74BZzt#o)^B-H{Yv*Y<#@lca=sq@=3O%LZO!Xynig7H;^s2#d@rCX
zQf&e^BtjZ}WU)U!)~(=fNo<!#Q`kJ<<+icDw@uz@!~)5%EA1k^b<4FP-^3EV`U>yE
zQYUx<a5IczUq93S+0XUL=lzd9ct!rI@x0V>+J3cqT&BWOKKuPNV|#84N7I`|*&OQ7
zSoN;zAPPbdwXW#{yEDIfr(6---X&1J0|~HP)O=eqorTi~9t!$8-IZC3<JbTKXu`9v
z?tfk0Bew?oQ578fms0QI``@~TPjnW{;c3s^IG_v=zeadY;uhiQU;q8X6rMk~;zZh9
zh<Q@17v6y0bo*Cb;iS+&U>PnXFrt!*+4)eyLC;m(Q1?OUhb=^JU2maRhW{7F$Ajq`
zm+AhMGtYv>uD)qZ!kSAl+r7$9yGUs7a^IxjYH9Xa`HwLRw*IbaqHO!02&zQ4*DVHu
z4vpd9IBQ{|y%0f-5pEwenGJ@$xS<+iMyDEF`DC?pjaYu4Al^8Ze~k$(rWatkzA(x2
zlojcryfE<YfURdVr7&vt>m6WW9MMr)c|t3oOjniAa*)stok^#7Qog2YdSp2WlR9_L
z1f$xokUaz75@vqim+wn%O0{t<+Z+hh<SiRNe%r|Pp)6Lr%U<SaIU9ROvKPgR%~yF;
zg@Ej&rQmx)eleAd^utX6@X*SFg!cAfe=_b>EhNN|V69-sv;gO=!jgRjIIsJ{v6wJU
zG8~ld()8-$>7xx42X>7M679aP!L$GSA^+=%&qlq9x;QK-bpL_<>UU`=702GJj8C^p
zKi2+KtM65k41J{hiJe*^hM-eb<N>X9{a!XlZR&2p`;Vlm=dIU_;T}RHpAF3}sZ$Gz
zzdiqH*z3#1_id3KJfvRnW|S81g<l^83!NXV*329Xl<p?UUoW7Wlsfk!8~b>!Co3;q
z2$+W7Mim4fC<Z&UKy#@UCRhC*Z|w06DnjDui%Jy{wq`|9)w($l<XvKFET0(Xf*fEO
zyduTEM&U!yhmwao5$#YT5v4vPsZht$yR)KTPDAQ%2-f5n4M6zX=R&V1V`I5~iDGxa
zim-(>Vix%J#)@&u6JE>!>Le2eImEs`*O9Fb_T2SwT1Y+H!4Z<Mh49<IpH3suUvl9;
zJ~xS<IwUbI<O#tw;%CM~!7_#OGE@}YDNL7?s2*1G$o^#)X0dacpzzL@&^GuC@4s@V
z^9`XYduJjEK_tt&iM3hyF;(6S`2CvkJoA?SlIy%KKzMD3lPjr1)lwOpKepcq%l){`
zXWc4Y+oO3QcdT1CO;U1{Z>Iw^l-@?cekMJ5b|=n}1mlfwlS}bN;X-dLr-Jq98M|Do
z%o%@(YQ<`4zc}{L*i%femB?6l=<VllOfZJU5Lkxujz2!2T_cqEo8!F@HTIDaZN=Qa
zVizrjtwFXl&pN$)!$i9gY2aJ2SGq2PO~FdTeImpZzDKIoQrX5LI4!6o9!Flr{(69g
z`1-9KuT~@BhW%f-u}2<XfTY=nwjo!DE2Wppn&jv?Df;9Aa=dx!7`^c9Y<~?cD}(gu
zdl-&;GsADG8*%^V3GmnB|8;Fw8UH!-JmvZCR?lMuf+hN&=&(a6R?MF#UI@D?z@C$I
zMWCqv{CA@JePui}JCBLUsA?T9#rHDG&Ol0A#(g$-rC}F@@S7Nj->M5-zvitT66I#`
zD<+h9&&tRp>7Lp{K2njAd1Gd|TXD@wnn#QL!xTf<L|JSN3fiROl0$ZuMcp(E(iKT^
zp<Pm7LF_dJtI8Sxc^}}nP$4Vkzn9z-I&_Xeb2dF-HWaxw<$CApjTg>OL`fM~;s#;Y
z@HU;S?ge#7M&sVqk<mfeq7o^dL7@cx-pyR3139QEltuc+eKKhqD8d2PYs`B7^_6uw
zr&Y$m_@}x&(~q##pgkBdO)+qL2jnXUu`d^9->qnybT{mdBck8!h-rq_cJGUo@vG&c
zpoks!28b*6bwJ%RQ7hmPD06kjSD}dGaU^zr6y?#+adAKd_k_z_@gJ-#Ruu*PUtj2N
zM8+1OIS1n5vq7N|<=o#H$A1yuFhd@`=b$2RilO<jUoLrL^edfSUv16Ap$ZBnR!7Rd
zao?Hoc`zXxUP1Vyhbe3yCJAfz)3cgylk_;hS=BgvmKMq1$swmW_a&a!y0%;MM&;}e
z;dV@2r1}ruTvJwG>uP+*57L?4D6Zn?cMX)*GH>nC?^9#q<g56WwJ38R?CRE^`=D~7
zmb<T7{#AWcq!q(y)Q4S7R!LlmRVOPZQK<s!@4$)BUn6+O_I7fJ$`BWF9BDNUo^YX<
zL}bJ6J|F*z+Et=9*_V1x*jPN@D}#nN@MJ6>q0$LO>qQ1o9x?~m8M7glft*6h0Tg+Z
zdJ0qzt2*$6gpqq)a)6L6JTP_r`F5Db%S^|$0`;xK2*}rt9q+p`zg93HFqZ`(*u8XA
znxnXD+`Lz}IC=VT8yRq0Atc{F6P5hK7Y^Uw=T>8um22Dw_7t6{4nvrpcB(<f$~p(;
z+GuHB4-4U&;kJ`Ge)04G?y+~euiRAqbdKq71|irs-Q&!V7v#=d;<t5Hq<AB4Ch*G~
zFV^TM`Nv57+e}$kTJ?y-u1r_nzN7WzrfaHO_Ip2mhdU;@l7+DtSrpXpXlu7^!75Cu
zVKxA^`t7;wh#iA|=7M-33tD$XgQ_I%5giw^fGuLQkl9HuYY;1kG!7tZlI16Pjd@@6
zeBvG(m7e${NnK<`lEnRQ^fnQ#2@+$)QrH<Q)P@^;$UY*H=2pgI_2bxK{UzdkBgWbD
zrrOJugFHx&5g47MzY^Km=GJ?XZ}6DDM%5IpOjA%w-g~C_BN|iu=@~cnmc}{TdO0hR
zM!3t~`m9+NPox*0Oasl+Z-B=X?Ku6GatZnJW<+6VHD(&xp%^AwY)FyBcN<@3uDf%Y
zhtW>rEyYlfyFJ2^E{~GkeDd)0g@2BleLde_S_=GQr9tl>?J)3-kVDPjb1|>&HUNVe
z@>pP6RiV=Mzxvebxdz>AtF;O^J7b~WvfZrSZ<A9H#z%P~>9750ebWf{S9{<Zb0rs_
zi*Hw0trPae;psv9V?w54MWH;F!mpRt9H{DS2R+%}Vcrb9hV1ln70L?U?3RAlj1a^~
zrP>S1Ol$mfQ>1)$i#uA04SB#J?H-;|Frx^m`nlgRjBPvT9!Z%_C?#qj$2_BDz1-?9
z4NOi;xlhFL&VL7w38b`^E=^U5biOf>zkj`67c<q|x3BGO%5FwSaTG1!|B!D^0i(%;
zramt|T$rb8T2OcjwXrw7G4a;$KHT|rxO3}U3Z;@;fpgOPJ%?dhV0MnokxgrM6I)Oz
zExF%eaMdLl+y4xd5?*e@*J}v;!J%<88n`rWsx<q)`r`?5EctVIbRuiuGpWksEzkKZ
z;h%?tSK1!kuPE!wh}hFU$-DQ!jVOy@vd&*KYxFEtES_49nK3JSZyvU}<=ALpq?*h>
zvaG~TJ-*1`huzihiF^Ntv$u|_a_buQB~&m#O1ev=yV)YrEh*B7ba%)GR8YD>qy!|T
zq(im>(ji?U-BJRw!OgyZYjd7weDCv|Gk)JYzJDFVGdwqox#pVln%8{(Xum2C>tpY)
zDfOKeK9+dz@7y06nfa>u?>S66vz4i*a5DSW_S9JIAbi#-cK5&DvjrNhtoaDoYQL8<
zHXgaH@>zVR$OM_)l&IX9C|Z?IZ4+riByv{o&CVRK=YiVv8hl@#LKTh9{E7r$shIhy
zNwnB_N!So|7mULs^`X$25B2g8lV`2B6)xwUI7$ksDNW39#qLw|d}*Qb`x6`3Qu=8{
zgZGs~Od__M9!U#y<z##IIq6?eaD2u0IF)~e16fQsc+OxY^WY!Dfs5h-NsK4zhMcYk
z35Q@fRgDG7(~=?AjnELCA#Oz=6;8u_LRN{Pe^bJK4G7uWkm&2P{&;+QS17a)(nB@V
zwhltq8Hirl;COw9Kfl5m64R1fet=HD;muI2?`xy=_!r*YlZdJy&YZ_*>^GinnJR|5
ziDHkwPO`kk@%V8mGC;J2>}r`Zt4`5|ZBr^|*51y|H>(9kyqnobuC~{gnr5)Z3U;H=
zPS`jFw;9#~1nXAgeS4<0AK^yhBj<NdXhEyRq$;xZ%V1+3+W1*?!t8H1BZtyoL#cGz
ze`F{1d+1fXX2K)Uyh#W;$p2V~DCl$>X8s0KouKK=2Gxym3V3UMY$|MVwHrP=4jZxl
zX`^}O!f_7U{Qf%rh{{tKX1s=2;2z?6J>c|fPqwI&e|s*R3Z7+(?kS(VLZ7(n7Pq(T
z=Q1}-{cBtRO12kx$Si}KX2)4WgM?-~5v={C$MGKc*dQII!K^owxWu%^{8y6kUts}A
z$=inojki`OaEwaV=lWJ>8dW#@E11Y+8@~i}dre(RjdMt?e0oS_RyE_UXmvK_yS?K3
zXKyZgLE1YSi`DraG1BVYkNksBbWI!w%l-Nl1(%U4vQ83@-B`E$enuB$zH=<i!~Ggt
z4}l-Su$X*{osx^dcx8U(%LNj*%TgZoq$y6qMqw^`gN=8D-yy_sDOawB<-+!EY{D4{
z%v^@jBd*NlD^hFAdm5)FIsvQzLs_IIyz@%+@f-xNkdzkhRbT3JP73@{^|PUCYW*6~
z;wkbUWM@YvVquyUXWr2k&%xrhC9rcV{J;iZ12>yz=u7#6N_8=Eq@P3KeiCJ=@!M0;
zDn_o(R|06Qd6@tDSpIM&HFLM1meSr=c6ry;xl@f{E7ivpa0PKoYFoxLXyU-7T3q-5
zH}?%hFb{Q9n^%EndTLIoVvjo4MG$tx1YyUs)t!G1!qg!LX?)alE4UB6qo02N7J5)=
zq^1*oJuoDG>3R@Sb(t%gZVCejb1_;O2u3mlg{q5yWhVg^7WDiEv<F#eJ0xS>r%nz@
zA_}>Q>dYv9tPjlRFG?a95VYgnj$~}y&Up5!FAXctKR&iTkgDo<o3WXF!vqOy{^`Nu
z8)9Om^tR%8kquSEmB|AaVXfNz`lRS18~oRtJ&Flc_&=|VD?-aZ;rL<2;HF@KuLu*1
zoRLd?0vWVVc*V&&%(FN9PCz(Hr-%cUhpJZCq?d@6hj5th-lMUT&-r<^u>{KU+NFAa
zWo2JaK=D}#-xn~^CgveUm2o>Hw$gG2Cl+MXi`(rvr6F>_|B#JY<7wTv)6fcgUe2>N
zfHDY`+Jg0+@pNU|WDYj+2K@<OKU3r)J)b{5x57W%B1n#7x_INsW8#Z^90xBY?nP$(
zpKogO0K72*Gt<uX<dBJ`r-I`oDSu9{d!nkD@eP`thNx+SjZ?$0sL&MO*_e1_NcHFh
z=B{U#IF?SiKdF|d3y>)QPwVJn+SaJ-IF~e%X#qJwAK|G>8phP>@zUR}5v`FsWymNe
z>81YR+0Rk>z4^_UAf^@iF^3nJpLZY{T5tucg%WN!@cLPMwwId4sC`HFj;^zV&QJ^A
zRn}+iI%m8$=37kL_$|H*hp}{Ah9?ck7K^;62L7Gfe4XL0M4UhR43d?jIAaO%KHRr+
zb=4R`j&3Vyb-k!M-~LtTBiH2rmGe6n)NLKY21O}AL4Ux2{Yk4;+a*Do)`bk!3RcVc
zeH;Hxf)S>(`ZfAldN@xII%a=J!qwC%Gj!51xB2F|;GNonqg=>k0#nsctCgr=J7rT5
zKQm}@1tr$eQW+DI=CxmQ1yqk>n}pPNs5jD2U7{^k+z-X$aAZ2;?%ampU>m^NF+veZ
zavc+7a6`(Nz|W(s?QQ2j9{GOy5JEBku&y4zld>T`rfjB|Ra3*l8RGpFIdeY>zdCMV
zj|uZ}_XnyQWjAz?kl@{oZV5__%HaOB+Whb3oAArfo@6Y3qJ>JOhhG-aRqm<fhmog7
zEsAxc+6wqA*u>VK@3*|K(iqNGFBwm9dj7ug{-$FD5%CzSTSZVzHDn6>CkRVX>|Tl-
z63U4M7jw7wj#%3=DEXU-DV`QyCTTJHBTz6z>>a)695swMq=Jh~W0b>s80GNhzm&ta
zRwfq<=UK&w0&T%C{F{FG7hc_<jF1Hv`a_6)h?l#acG3rSzM%ZZELDef;2o$Z`%(|U
zdd@pF^4Vk5>^<g8&!HL*Q3cvhk!#X}4^;(`k_A?!=U{uv`5wLMw{gov<BPL=6{ET(
z2!3^n_n)WwUuC1+=fSb~rOCcE57=*^mUG2#(q#K7{vwz9HXTgZ;Lo2)bmj4VnssJ$
zRT1@<PrbD)+-IflQuqzq9x<Gs?C$F|cTS-q%*;uAz9}9(%wKe{WH4i8o6KF^c+g>w
zvq|AoUD6jtLVL5Dt}_n(Iy_{?Q8cvTAl2}e?X6JDRpxD~XG&?p$hSym1Gj|Y;dxyW
zFLkS@i||A-a+_?$DTo+j9+t0Gkoukr@0nhCg4sIzNwO=dsfi`V-Jm84&COl{$`#<)
z(Hs*GHu}R7c+SY%Un>$@w)4?Kk;SahdOt(Dz!kZMaOm%<U%zzaPEs88T~<n&CwS!8
zuiRhSfn`2$siTuv(=OwL&5A=P-IGOo4Y6i{E$3~T2X%K^`pM^Crpgvf_ERe@O&=RI
zIfEXpJ4d<q>#aK*@V05t8)EjXau)-^<>9<|Dp$fqjYtd^#B47--F_RqzYg?&=6kM$
zxc2PWyrb3cjPe!LB2Nr6x<VTRn~ZjQJo(K^w@k~Se)l`fiex;?%cn_REfcX6^hV8h
zStb+ec+6goz$@JBrTjaQ)QxsDHRen?+zG;->dsPpv?4S_1Cjx?5=>+QYHFjec51HZ
z7YpxrP`NnKYD3_&wIFw(E0TNnn7>4;lBBugFs`)yxsB=zd^&QpZ05ao1Z@lp#CS?o
zH`ctM473>J19Jhd<%&Om{oS9M&P+U*Dx(OVfbq~TzxmNeU@b&i>@~pwXDWc>N#EbG
zM>QJuAAp4sgVvbbj$<#wKyYJLc@2TPG<eE>y}PIBok*wlP|2d{JS@wprhW3rWOo<n
zKjGsPwhtu8??gk9Df4F_DmNK9zE#C!c?*{?je4wG{Pc{NYw*8oZ=avtTHv=3ajJPg
zS9foS%zsv*GMGn^#H2GB`iR{v{xX@Ofy<GdhtfYxh_yOwCL!b!;dkMFi=~+aG`-gz
z6%IT0qlcBvNwMtw=r;1H-jF^Xp8*CO*T?a9A0gX+#@NmdUV0;dowJ|n;M7d#w_3f)
zeg^VvXnLGJx^S=cW^-rG)E)lG10T~`?k1H8HO@%WyjL40EARW6Xn)`jtQbF`H;qWD
zwc|P`c^x^aIwic;kIQ2paa4f7-xkxeJ>or04@qNK2pOVt|9l$w`?&!3b-h>0ph~e^
z%}HuX|0DbS6?!;`c{0g9sCr`_9Q(Ioff%CQM~^WzILzro6#6phm3ITMh)*G-x4W`g
ze|94A6(tgM>dN5P;9lcNs&2Mt^ap!$!l}Z&;;A)#C?7B9JvWDXkZ2n~f&mqcDOTlA
z>mHAR(#ZKdEG4IOvQqEkMYN5S#Z;9T^-M3GfPee!-nZ_ASJ-U}k-8~ohi%wpSRbh|
zXLGG_*-jFPQ`{jLYN)PL;S9wc*Unvz>MiX)ub~0^vx!dTo2kDvXD{qq=eXgN1t0Wq
zSng9N0Cv|nD$YLoDcfVc+-12)QS4iP_|u3vhwRH66V*PBA`bSXF-GBEkQeH;*YLfx
z(2vof1^jdxtr-b3ADjbtw>~UsK9`)8#geCbH%31?&BXa0YJ<m12EP8Vmat*A@3kmf
zNB-OZ_k=qdHaFP#UARCFjU?*)T*y|c;+TE)mnO%WnRP`kEP)johU%l?)Z#$ULv3%X
zyFhi`Jb$om$pOBp150a?o<PitDK`c2V9xjxezD}2!l8LDmbu<Z30efaLbI!EUwDH_
zTFbS%_C$i&y@Fenh|W13`-|Fa-eR7EK?m2p2gXmbffhjkGHWB`V8hO`#T;_;VT`J7
zc%`rJ>fev}y;}`1Z2I}EzWo|YrF0?Hx1>9&)|DUqiq#mcUs>LTpY+eS$2RwUtLhNv
z>e9N6J#SZ3yv&&VFpP_Q;+W#k9#|9>OWWGpIM{Gqd#ZuD_46o_`dbR;-Kd4E-2QdN
zYzsyPZe6;SghSkGWG~6&n*E<1=F@KF!>x3YusL(l(1fd&<#S;#;2tBFDpTlF@2W&4
z=r-o&+m?9WdpEmbcK$lB<f)7@WagT`jO~qVbB-!-)Bc#BSUijXEmMD=n1h4nlxbPC
zPy?X<t-xcEDF-;joT>5i;KHu19ZO2fSB<3;YQ`R~(OVUl{B~L%a?kLX4YdE<lkL-i
zMNubS;cnFOZPDcJeS13cbE0K?SxT<=HP1E>{v@rV8Gk@c8c7jrZ#v-1=(yi=i0nXB
zu0Z{89|1t39<yVaQHPm$)PH~@nf{EyRkxs}V;_gjlz&CzeLjq%!@^aK^B?cT+mWBy
z#fV%F(hF`-IX0+{nfpk*w0=&gw4Pl?>-#hzl#8C*#c0qOpJ&Z;N9JXbe+hw>i2_Ms
z0etC_BOMIE6};Diko+TiDnvC=z#Dyy^EgcC{O8+pWzTEa+~$oRl}9>irz=h3J4EzS
zSCJo7@+mjFY3g3CAHf#px0M#G+9dYbcON`P;y9X{Ah!(%gpr9VWtUSr;55VSlIW%l
zILkRNSWp-%#@65aM#%b-+m=GXf?(n;@}WDUo5KiJ(Ii%LrVu9G&WHpg_p(`qN4a-&
z`~o0+Pw^t8!3FA{&UCaiiO7Q&8|AC0#E3;K7}}J)mF$G;zk8S)7{5!%k2Xf3cts?(
z_pzF0vNH{awmS|VEMXnT-m{f3>xG?Vt2|o}JN%xi2>w&P4-FiAyHN&GKp?zA-xAe)
zI`vVlWj6u7x_rOR^XottqA>Ltf!T6=%+LNW0?5j<LK%!dIN=Q8D-Aa>X-NpU_&wOf
z{2pwQ_W$|k-vf4Rul&594{S?!qYiRNx#*@2x;TH|mG$Vgp(xX^Gc3Lq<;t4Q%@Ess
zqgdtBycPkz(aSFDhwG(|V=@!y><lw9d!HYl9Ms6(DU%&t`XHqLCP>B3lR-U6N@n?I
z7ccvcFO{&4uIn>m6+K^BEgJdWjEQs)HY0V2J=Wg`puyX)9FhZ<g=_N5F2<mfEq*AB
zQkUvg$WxcgK0=G2Mq!M`p%x{cUPpU@o!NUsyjSsCIGNZ(3-~yuN6Ll~0n~62cjVd4
zuDuA$hF5<Xgi|WpCxQ9)(co8!wYO{#MbUsd5Uky#BDJi=Cj1uAeF_y2n91{fO57O#
zLCBkax+k}VIKFsL^7<X|*wz+3)kpo?1u%V*tzi7RxKT?e^~h2oI=>%rIcEGeV}j%F
zXOLlA%S<-UB<BA7{0n1c`(>A=`QK6f;u*hSis#CXO?UUJnVW}vYM9iXXXsOlsCYLk
zV|jX-mKL?$z+K7?3G`0gtXAv9ke4izkpfdJ!IKsdLM!Jpv#*eH1qoIR1{B_}e@<H^
zEO`q)GIINRygo-ek4c~<BkqnEAccu-S%jk`a$&u%EA#2+N4>^){>WqhQHDAfL0W}t
zoiT$4Uy%A7o3>FM3NvzeFn8^oumKXi4$^(eKxOou3F3BOyzIb~H1pavGE}2!&aV%?
zBu)$xAlb7<O?NMDlVfbcE_R6v$|MSqDLv#Phz3QwdxMQMA6B2;&ss-u9L@v7ZNRb6
z`+$<t_k}>6f}R~kAp*iH=Fx*FpQncq&8AZHUwPcrO-zsc=HMplKc*m#L`dZC{t1H?
z#!UQ;vG!Lq@N$F~Ym>R2?R%|9r>noeKB>PP%Oj-h!Et~5{kXwU!=1d3nDlLHPScf%
z;ZyMYs+WJ-U(?q25~+52`A)hi)?KMKr{^sVb1Wc4*t@W}6Ly<Z`^GuYGc(nCfF-C^
zC(zLe>{Z7M*vpv=cHE0JL?bSF2`WroX34%}N@{cYNz)xV&nq@PKqnl5PJ&$~CFyGr
zH;ELCX@ppRQXL-bG`=>GsM(b_97+2HQ<BJf#CGQ6uspQ9Zg3wy2YT@&-d680Xz+;i
z3tP}*VIu%j%AKkTP(maaTjBaDZs0F==5rhTg=!gT_47lzL;Ibru_R(~(_z3L2!J->
zI4}J4DwDw>xaqbf+wfbV9S1Z5cip%*@cu<kcMFr#F>3K=|8q_!ed}8}e-W=D-G}7S
z9Az6u^j|vR2H#!ex8x{#4IirLIzvl^Gjc=6M=nJG50)u?&iF}f&YqeqBNL5jnkh;|
z1Yz)CbGxPko|3c&1F{cl-XvT-C>e^dZx>cm?0`iUbn8-oa74#3IzEJ^{_Kfp93Ux|
z6z&uvYhPd)tnfz>;}sK8M!{FLo#m-?X*zDck%x=)VgINdkp7d7e%%h<BxB=pIVjmF
z<Ss$<h!T(`&LI!aPxXiv1|?%XYl1aae><FlKOI1h27oSueRzfTWAF)+3j_Q4?R=Jp
zzHY<kIUGE?lgjA~ujKJwT>#_GIKj3JKiz@T=hRyC@dx1LDC>u-bH`e&Sogt{flPSH
z)2B*uRM>;{kU4M}yjD!ku|2kCTj3iwfgAf2)JhNS1#QYNF0)l;@OEAu<;(srF5s^m
z`tzq---ZQiJ}+oS{x+tppjY;2q*J)NV;^!ksJy`A4Ps}{RSw}~VYV}UVD4|sTrxA&
zjz<nv9p1QkRYeWLL4Rr{0wwd$q<nQ^o3_NkrcR~UpCwmfCn)2S<n@Ab;H<Cid5`2V
zzQ@j!tCxGCCbfnRi1o~Oi(mjYVRNi4QaDxb9fsPsGZHA}Fyc}#q)r&H38Sx~Qq9wH
zNo0@qS}+wkgQS*P3wZ<8ZyfveeF)O~N4&0>mebFVF6;`#zrVV8v)p0Q5!9LgWg5R>
zW;Glrfg3}k>5>Nj4wTou&QU1T8|ath3onzNh!;=0KyxaSf@5T4Ji$3gb~*-Kb*OU$
zIUUY#0-5j@oTYCDz{39IV5c*j;&)iL)AIb+(o@Oxa_bMIAU|Wk<H9%<xnF4c4gBp1
zf{B{5_ZLQI1<$t(c06h`WJ~LvxP1)g!WIk41|3AhBRJ{F*c52x-#$0g^Qjg=-%+J~
zM@YI*E_ZFyjPHo5&@nB21mB!k<&vW@(WD^lF~!Ec6_XI#UiXFS{saLx4s|c%<e~gU
zil0Y{RQd|r*kmK{dbbDyvun&~?vNFdx)wbOJ6BQkZ&v<uWzTxyk?fn*WmP*znrMXY
zxrwaw?4PT<`|?CM@mEc_EEB%<^I3o^KjPArqv52<Ra<P=r17FakmUB9>X9Q_V1KYy
zbK%NFBToRE2`@+g%%#2#_QaMb9968mY6^StKub6DisOFH9O5X-w{K-Y*DP6C<l!M9
z*_W?g-*@BmiIPk~ms}#q=eXKg4$^|F(;|h}n4JtTb?6g`wS<8mnI58@@etkt#cm4x
z`tx<OrDX?{W{i2jp9zqkA>iQ^2%lW|?=mQ53?k0(P@LBIUzR^5*!r9KKgVC<CQzWM
zZMENxJGXpqP?$vYyi-Qh{Q2v^23_1#og}JshiA1fjtJ$5E19LvHE<!}Pg*uFGM#)Q
zMcM`#x*si&ZniM*OT^vBFM~gAIJ@~W?gi=>w}o)syE<2*`Gw82KYPL&WfDZUI=qn7
zc9zmNanMShKS<#^X>2OPCPy~nsh-XbJKcZG1_{h2SM+OWqPvF?3r?I&%z}7$BlUa$
z?ICXYvkCYMN>H#++<Pc#FWM<G^2kFZ^Y!d!(^rzbXFfDg{l#tdxtTu{cI65<to)Pm
z5Xn;s{y7o!@s^#?!3MZFkhy$^h(~EX*WN(sg=O0}V$IvEA0I-dCIjNauj`!7M-64`
zWn0Dt6GtFY%+(oW@S^|X{&Ky9$$BrnN{jvH`(FkxsXcY(+g5EEO>14sB8#WH0Nsi$
z^Tg}loE(!(bhEjcHd-%Lf86lCZ>efc;o3C&)R>KGbvpr#^`j>b)X|yQd^#E&qB8un
z!q{MYqr%0Pa^CK>FWh2#_1N$HVG9{~G8Nj<A)iR)bsY0~WO}~G8M*uYP~?e3;3p(m
zGtrHN`)`P}N#BgT!p~*o=`iw)@5*!_N9Xq(|K@_T?bi%cJyo_pGmB_XoyzaAliVNm
z1VOeB6MB;`I}Ri_cWT8TQsfM1R^ekMNxpqBclKqMT7i@&Dn}3-$bz%jyiCz;%K$fC
z7XWwJBIlu5M6}T3(I;azW403pK;?ciSbQ4vUEC*7u|EZb)oo<;>-JRf4+j+C11Ssf
zV}YrsE(0`mFbLs$%p0M5aiKBb{lu(3+PSpiNrW5vU)>PQ`)&{~aD?B2{l}QHf;w-?
z=>7J*+qFW#Ck&q+T>Vv-R4$p>q7nb4nTv(1P3!B?H#K&p(vM~ub`RWSMc<-V#U}Un
z=73a<rnLb0gtI(H;#HXBh_Fu-<O<@-3v(W=Ciwb4gygA-Tw012-S@0CZwO(<I<x!B
z{6dS0j~qy#**nZWtK5CTXC;>)@K#M_AqrOi#OlL7@Cn-kpD=e$yn8oGqPe!#P80hm
zl=cT+9n4I-lku3w;?rXZy%q8S>h`osUsu168p+XUZd#k-cP(ni6*)Mj(ib?z%d<?M
zT`FcWm0u5_-DjxNp;daH5r=e8>t<0fzt*iYnv&15^lzOo3r2C{7X_o<*}oiKQE`6_
zq2=@E4FrDrn~J2CqR|T@L+?|?rUy`|%;aqZ>(KP#Hm7>RU;V!cOxiG!qu&0H&$X10
z&jyl#!?KrR*o7Q^@z9gZirW?IQ}G~OjU*IE4~C4@Z4wS2JCQAFM|%pH%ash5OfN?!
zv&s57FN6PO6v`dDcSn1dD(@Z=pLO5mjwZ+0FC_wO<Fxt)I@+tnNkh!yQLx!3>FpHO
zJW=1bNd3B%ppFp9z+-XZ77+3T6rR0AZ0e$1Q1Zs9ilV*Mkt(k<dS>uC-a0U}`39to
zz+H`$pnDsAd65RtZGDf{z`y_kOVd1_*VN~;6m*8J{r1H7r>B2o%JP_v#5$+UL!0jr
z2{4tXM|*W$*{1B+TR8M&Pg|Efj(Fcp4AP6L<5$=fU8RSoV5sCzIOoi+dC=q*v^R9N
zHkRaap%bR-?&pruwhspHVPOdL*M|N5DIQ%Nu|{9NQouvZl<pb~pY<(JbQke$;&1Ti
zhD!3KCd->zPBtOd4g0{f|83WGd9{P@<?-B5;#|FbVUa*?_AkR?-tWV?+_)aBz$e_m
zI4HluXkL}3_)hHQ^^U0V%=dBOO{nqObh%R<Qf2VE%JQTtT-m?eANDn%&B)?1q;fQO
zC9W#PZmejzziz_p=p+6Dsnc!EB8@(}$LU8N`QR!6eDX1UCC~}8P(IKQ%Q9?j!7)e_
zP7V&diF`}o#k9qSOnh>?y=p=UUK5ay!HbSJw*XT61`~B(9;8)X8%EsA6=$EslXE1U
z$LNHA4Ka1iNczMq&dNg*NW?s7>@}S+dx9aX3@g5*q{%ruOl;V)Yn}E*Ie+3s(q=Or
z&L_a(Z#h542rB1Jw&4I?Ykh3ywVOJWUo;;OxD9N=>9_nAP9fr$dG3^+f5Stdjpuo7
z?ff5m8sAAmG^d2aL_8_Y^k<fS?X7#?ph)T2oEnd+vZ@;Q<Sxjgh`n)080^47E1X8a
ze92%|FGlZxc~M8|J=|P&z5a?LRL&2J06yWdWU)L(b5-|YUA;7e@3ip@+~KRFK7RV^
zvz0AAHaf$<y}c6gZ?K|NT?#(Zwez3Mf1>{40-9)7P}l7|aLU<oZL78(iZ9Kx^e&jS
zXYW+y7xysbB%tH6&@XQs_#SSV;RJa^cZ-vc(cqXGKj<*go!~z62?td!<}bYjW-R2J
zzRyvQjMN8!6c`M2iMS${2Kn=Dv+hvj!neiN0mvNbifn5!GCh2HJ2GGmaWBv9dU!eQ
z#V2H$44_HY*o;BXWua^D2{{+p-Ka#^0X$}q)*E2VGp&$r<Qs_i(UsslnAAKUbjw65
z0#&gsjevJ7MyCBm$9g{p?GEqf9{v~o>c5iS{}Y$_9^=ZGW(v1oPw}bhETzq`O<UUH
zBM+-u`178Ya+&4!{j9p>`!Z46p8A%HMjs5mP6UsXaHnWzL=NzwT-7O`0iiJZI>er-
zC^7OnScteF3h(fWNLKmP6mtf3HD+TyL?f;qNdqA;lwY92%})_(&}BT=xhJC7ZQN(F
ztt>WbT8~Lunh$KOL`ExrCw2B~n?*6`nx0*y=paX$p`;*YT9|RxqStAg@7{(PwbzLh
zS}G_=KHiABq1~Xzyoa6>s39`>gX1e!@2LH_@|REe+=Y9f<dRPe<IYU!rs-aHbC+34
z3|ctW1?0n_Bq6aXq6EU4#IcdDbFjd@wuwR*XzE0C(YC6<R9Va7p^;nfS*J$qD;>2|
zqpUGH;XTmFBQX2^8lw{)!{~%RIintjdhC`Yt#M!;!j;CDmbCHA>cBsqWhp#t>$F_T
z+|V03$HqMaSzn}Re%f9o`|$|h`bmQp4u(FqZpuwPdTjAs|4fT>e7LqnMJD%O0P@Eu
z9Q6g}B3q>gQP9eR2QzUGTKI&Pujx4XT5>-r!SKgr9NA>2Q=yOPeU3j`%bb&=L2F}Q
zsS9g43>NDqH086nXC$Arn8vSb;Y57qaP~YQP<AzyMdM5ficK+3J461ykraW>1lpGs
zRIhvOTFJ6cMUUALudx28-$TWi#ICZ{{g(HjLCvm^;OMVw`03FRZ~UceY4D^&;^O*9
z5FH52=!;4(Yr91?&Y(1Qi%(OvSX`s-d|1)@_*VOq_$KVxgBGg+_*UY86M9~<kPQn{
z%;&5Xl$lC7B6N#DvwIxE8kA5w4XoB3(DdH3uDw>!n0Zjq509Qho)Zp%)keHv&mEun
zUr%NR%-ts&$4}4nk2Q#T<{YcGn!%XsQPdLNM8hOBhebAA-JB&n4Wzstqu<^|>u~ic
z+<JoLu1lz<NaZ4h@d+n>1U}&^ah~@OZ){U0-b^|F%i^1~Ie2!L)G`OF)o-mIh<D!_
zYasTv`4@pK9#4rQZCFneUsAqRrOs0M=E&?$px;rA$9fvM*obzmYgX~T6LM77Dv(-Q
z%T^lALg7$5^ZYoOBikuzoJh3vNE2GISccZtt!aQzShC`va58?OB0K06I&=7f95OJo
zT&HlDJWyGcj)8NLoi~>rtPgCT?sbZx;&S=%UEV-39$(q`lyVrfeWm87Coo6&@KXUS
zKPRIi8*axozGn6Y4g-BWG`;phn&(Hb4I+iZ4WkowF@-C}t6V(ga6@Prf={m%zWodA
z{@Z)*a}^Uqc*WWsY``Pu9S}Qo3Uvx8lFE#Z4bn7%i1#YyzR;^%(kbTq51*Ge*dBaJ
z077AW437MD{AW=DIvP{ip}i0a|HMbUW1r#vMi%KLQSH5~WiR-EV0`MqFlp^MTXchx
zPvcgw&x-oc5)D;Js~@Zt-5l$Qy4>}GNbuc@$o)xXEy1s^_;2R0nx!O^49EUIkjRQJ
zZMo}r4%T4?a6_bIIj>v3)utBtyd$c#X&A8vCqmOL4-tD?C6!-!`vk4}Mj@yBFQM?m
zieV2>wI7oMPm8C2GBL@Sx{sw$rtnC@fl$c<NG1wh%Lla04TixU&aoOwZ!XG;#F!IG
zT$JQ<1M^`1?~q$5kK00mB@s7H<}+op5$AR7c}8NTp$z|E&o>@IQfhZY6@Jo!o2CUJ
z0=q(8<%_=L{%PO7J<}<Aa$Ectrlj-3DxBBiN9CUzO&PuKE_vp7U==e;%$PG1(k~53
z>C?P&ZX(0iKj5|GPu+PlsHS(lfiIfz9PW3N-3L)d2^&ZdCB3M+!=Z~z)F^n|?%U0e
z#{DtI$9)Yh86JPvao_nY(G7I##Q>N0(V?GVzo5GKhW~Qj9S3s)mIshZOr1F9;id(a
z!##A1=lxO}0Q~LVTXrQQ<THzrRU2l}!r)I*s;HMAUBLA0Llt*En=Dow(zDOQORgR1
zC&G()q{{{oAnp#H#O?y4@Z|Rpu{Y3<<8zEr_@ZlE&mwhl@gYYr_1O`Qv=hRA`jD=g
zxHatt(-w=um#7EIWFEPz#DBS+IfH)p<Bg28ZKQ%M4NFL!p)FU*-9&$(oICmO*)P=S
z`D_!)RAB>gq8b5(CdP=X5>EJja|++6mzd}m*<TxCq{6x6%T`_P*NJr+oUGN;%M<kE
zcj`rAtALX;$vfIEP2?T(1nK8(q=C_}MnlR^wGG&cD2gMwTy;yVzL)Q1$j}m*r+@#s
zSxY)JEC_fg)<QNT<}7E>1fqibXL@+4O(_8*ySc2)4`3)e+R|*5HSj&9&hRKs4<E^s
zDa%hLmC`CyfShemeuw^O4P5#kSvF|NJTSV!znQc?G9CqX4s;bu4?zL3^T!%d3`R$L
zj$jCc2B9jhdGTvagN<R*aPO`O{`b+lc;+@-#g_6{P~_;%#ZP40{lO6y-AkL#oii#n
z%T*)t{7&loCqKt{OSKIObE1yNu9UtbZKg<1k|niMQh_{wVG#>_2=uGz;kW3}k*qvG
zzFAwa3KeP->UPs{UmRMWFj2j`#1{^%!prfYY%as|tR>k;E+1Y&)upYpt-dI=#DwMp
za$2e#yk=>ZE8+sIRAmK6Hrh4>1}fCCX$drSsN5zDEIhMvzW?X{(88}SXki5?LKRqr
z!AOvusKST?6WAp1jg#iLTH_T0Sxeiqt7ql9S?p{pzi+4iI!gf&l|NP5PqcHw=2QQR
znD%A`C^YR-LHw&*>g{sTF!G9&kJQXCX{~ScGS8L`maM|>#WuZx$_L@c!g5h^1Wbk<
z*ZIzv)I@0^e;>;y_1?&ta<m0#4(D*%I^1Q>rHHqnIM{=Y9PC{wd2HT1y03L+gADCA
z(6`+KDq%9TYMKE#na9Pk<)XzD%rZm$puYuJOdfXfU;#`3)i5}yJodGD{yj5vm(#fW
zcl8IJD1ylE$uW}AIb|{GhW!p$k3<$K-`iPB!M_mP+QM?9@(w5NnuFLha61W;LXVZ|
zHp1@2o`8zYg-p0fSRkrK;0M=3QUPuy5(D=~F!jEC_>$n#e6vUYR(#A$fdlrYmxo@c
z%w^(y{g?dKuR_pLB4ryc{<%_2Pi}_2-Q2rvjP^Z47=AA2mk6pK9wcQ+zV+Z+yiEoe
zRG>e1Xr!!td#`=@yek{52sKz}-H`<^HTQQ51Haa9JEtIW4-6HsrH5fDrX$~9@SYk(
zAGuImQm@E*`m(s3?eaA%;ex(2fsR~wy$Gfo@o$8<qNBnJHR~B(v0)agN#|z&VFtii
zrKNrR2JOa2dlN@{e4RIO7QkPm!hB#tdgl0xFwuqy6K(iulm8qh%y4B)GNsS@^=6cd
zXs-q)UCoe5$LNH4H~nKNDX{l_Bd~SW`KUggJD}`qB<{%--NQ-4`5o8R=6c2}6$q+a
z(UL?{cqVBhQp|C+vs6pgC<dStPFotX60JRa@CiFhvq4>>k$Hn1$wpC;pPyy)R*i$X
z;jQiasUD)=;CtWjL0aZGmM!o~A;{nlyS(I!bhH$Xn4{lrmRE&IXo4d01xP%{7BY;W
zvF4KeLxYJEU=L}G0sy8j2&u&9<!p#WXy;!#VfhXFold1)3kEYj-^%QZU^;-w01{R9
zhJrfAmm}G1cCO!AMLVC`?C5VPts`cUsG%SLKafiXTB99!#<gl7%wtTYaE$FDM*QDL
z=rqMf4g{k}2h{WXY3zr?)VUVnVRGcOAz?ua&cT;g6LpLmD!<=&RA*Z+lytHLi}*JE
z!24FdaDdX>n=G`!!Jao#0;=F(qM~qV4x)Ub6SXOC@;R1kn4q})i)3cVm{N1oqxZ$w
zR)ZC1cfUAOB&|-we8MoT04rvi&8WJjf;D3+u&_7x%DH`>^sYqA5&bCjr++iWynivp
zx-UUv9UFI(j<5C-|NPskve;LXBH(=57(}!A%>T@wK7V{xs(md%kvQdt{fZ+n3R50s
z<4H%E-EHf=(JSytudue)JM;7yIayjbjp{RlWXXpS87*YzuOb&#X}`rTM{R!Y$SvI~
zz}=qr0l|68?5Een;_|AdFHCSl<e0F^02VL;SKWk^*EOOZczhd2xq<<^_iqB{fl>G&
z@N7XWk0t<J-1_WXPFttach4BZNdcI6Qb7OO#gE~3D29i>U)1sx(;WI+>fpr4M<Xq_
zd<CA&qk3siWYlh*=e-~zLKk=SiEZ1g-kYaqr6z6Z0}g@b%HqOeE4pv39%Gck5a>~~
zbd~ep8(^V!WhpVM*-lrsYTvHWqO^JRaAhs|h|THNSb-p^L2g#p52d&Ni4<GkbuFx&
zIa?D!H9nqH;$MbY?xyvGJ>=89eg2iPdqGfcINRq+o!?JPVd3jCPzo!H%2WNt6PK-I
z>x$sK>TH)k?y$&3#mUt4JgN8kaORUZ1a>vJzu1qNl9X6CO@yhMg6O>KiSqLwuXnhi
z4AxDu5Dz0fDBu9Yfth3}LW8!b%}gDgAK`Np;{5_4vQ|IEoS&jq??uJt3@7rY!`0TZ
zM<^5T3qwC?ArUZ(KK>V15?Y>B>70A<9py1iAo)aIUV7Yrs7fxN@uIq3_??<)Fj!Ak
zCgWt9*9Pt1eplRnx%IQs`DJReeZ_s_p)LjU-{R)@lmY3Fa1TfvKYqz1n2LN$J*%DW
zZZs2rGVhS_ktF0x0+BpYFbwaYD?X@WB8VYJPqIY2<E~Lo$8k=b?yKY3LAK~MCXr>7
z&f$M@!?;Uc`^vnl`~D>8vNl|}D&<Dx((}=*o?JE!rTafSC;!C_gWZYR=xzrV$D=(o
zX!?S5FdO>D0nc?D#TM;>jov{GBTm4cNdO7UIf1YH@Qc7@i%|Gcg`c|tCdEab@om`u
zx=;#Z$&H#c*i8yvi^q-diO{8<z3bPlb2<5R;$RO9{Z8&(K*Rd;f%E-v<-Bx{pH^t-
z!>IVlR~NVqnv*iIx5_r;<%u|;cUqd{So4)IC)S(@LoE<J=~cb*PxQ_d9d>OyxjMCo
zdu7Gb;4cH=jMI(x`p^QM3!$(H%A1$`6D)Gd#5uFSK1%VC2&*k1GoU*p8iBpz5Bt>I
z35es)ST@S~&yqx$p9h%aDsd?OSe<@J$}{@Vfyy1Ym#V_zW+7|d>h0!G)|7b!_^Oat
zPa$+zrS_ga%EFiB_f7Z&|4a2!BDfCNJU^h2_bz`LM@S8Nx%hTcRg5jFxifCRVIlUT
z_?__r&-a&;76WhFJo=FbkG{;vc)Kl(5bs6*Or+*u{E(w+p-1KuwR=n8AHC_tQe@RO
zCDGH>%L?Ba{>1c2x2$(5_oSp_!|akLbg;e&p0h9FuecAsAD23tr09Mg%%z6U0GDtM
zT1&Q|i9ae*<(#HyMb4iFStfqnQnMjkgNUHWY=lQLrI~oe@8)eBiabWF?CWmJD}?-k
z8qyMliqH)e=`*wLzd9fyA0;Yy&^YQ!z$whPcYe(KGH)fjk{~yv+v^nWKVAs#&dHH#
z>2oW>yRCxrGmiYf>z<w0=cDIa(Di)1|LWq`Va9OkV%_}zDd;fpIpE4HY*5mzubIum
z!$^iWmJ9{L<xAcyu<gZKJFQIDjiqc9avs;dnD6C_gkX80F;UAT=&_a$>;uzx&MOw6
zXY2eH0%}C&*fUXtucpkg)e5iR%f?#fW^yqVRNXY}Hy?1fxAkL7{gvhvPULa9+1fRr
z7i@#VIE67~7v7++&bUccS8A1fucEQ*hyC+^N}wL~39K0=uag)lW3Eq7C~W2jm!$)0
z`iZE*w*2cXUW`+?*H%E;kNf>+E_PLEMTodAoDz^wLW_kS<=C&Qw4o1V=Y7*y&lg5t
zYJy2$kcKSCc_5d%22eiG1##TtX|gz{2@8HCD-Ol$<-tR~gF&&v%x3<r4U<;(&G&os
zEIvnja@+Ijt#B?`gG5#0i_j8&!54=_oZk3*1PAHLZV2FuN3S(L{!-GaR-&|o4cYvN
zKfFxnDYAf5uA=jz))C)aEhI*Pt0dV+&zX{2S>wL_tC#x3;}cZ?8f-V%p|a}Y`57$x
zJ*1w2R}p*3!IH0&IMHF)T`0GD=AK07+=nfJ<p+v5AH(@U!e@~qG8?_%fMUIkOZ6hh
z@U(kOfu8v3O*C8UtX{?w`y<iu2Xmd5c_vwYe4Sn=`i;PCPxA5_j`C!6Bg%zi-4Bta
zt#1aUpY=3tgL!T+lpc-1(%D34$4Y<4lqon&c^5M{nMIY!&m$K*nDMJD(qf=oGFK*D
z<~iib!|0}78_q$>b@~t0&fvbZM>ltZT7OJzl%@fFf-l1qr96M2);l5f2QAT^A0nFG
zz;UjA_L|?L2LpZi{buWs@MbqCZ>C?u4U=kAfV72QnMMeqTh~uwZNLwqm){_BTaSHj
z;KWz#Lco-d8RUeo8-MivU!oO7nRsla%g#3)R!mz84>FqhZ}jkmx#}Uf*F+<4r;fs^
zX(D>GA}O{vmAdr}xOD+=JeswY4`FZi<$>WcUof}n;S`~^9Ia;Cq@=G)vJri|a`Bbz
zk4FXLvI#|2mgx$Pg~EKT+{F?AIR-V20G}AYgIVZir`hX=?fR07@rL)kIv#SA-8eK3
z?G}_VHL!CNMOz1IK(Tb(U@l!yr|HF;sBgI9XFeAbbU~9Dc<E<cA2C+pglK6q2Fb}W
zOAJhzRq*1ph%a(R9g`e7cV$O@r9l8Mw(*5i;n$#_aJ$H{CUspN^loo58a^a4v*?f^
zm>Aany)K$_^g#5`Y~MG!zQMyWVB-RlUrGGmPMlh`InLztUD6M9Y93XoeD<s|xcuva
z5z$kdSJgSr+=DAAgKSSWC`WD;7d)aNG<BSOo)vC=b_5j#Oj3vu*?y&KRz3A?&An_l
zabqKzxWIY%!yS1eKGLr<-dAip6@1(~>}Pj@Rd|C+(_O=eoB52+4jL*)^W{5~x0mzZ
zheS}XpA3JDqwl-sZ$|Ygvb<dGx`v>5Y6pRfe6P{k_i)zV66djn(EnhKS^LS^J}iG_
zY^lG-!UE;@Q37`R+L+lj*6Wvvi5U$Zx^!IoT2mUvmHXivU)RpTbh445j~`Ga7~TJ4
zr6*JSnL@FD`xp6nGmwOmgjZms!sS3J%v_cW|MD>aBrBEF%j`6pwx<Q3N!ZWjp=&X7
zU|73(EkSoRL)H2d_xWfTmUlSc7U|7f-TkPy4HOvic)5K>E2fcZK-w1P5uC*2>nyj<
zTgh_~lrm)u<QqHUL1H8iJ!g3t$wz!bkaqB=i`etns8i+Qa{YWCQ)ar4mpu8;WhQRi
z-bIHQnN&of>LQs9<*Hk}0UZukxBO|{hjKNZn1Drt_l>!OMkVcHp|@YVrRpu8J#0~-
z_qxay|A3lqTdJrQRzp?>(-j2PwB*LU&)var!J3=N$nR7PCbyK@_+RBLGkc61d#t~G
zQV3t34?x|B)>(nJkt9EJL-;N)M29xy5lUqQid#pGD{5Qz-h;2aqv;bH!F!KR8@x?~
z83|$XK@vY2U`^=uXPcH^h*;q1)LwdFr;GKvCH70Pqb$vfIvN}N)i?OF?QSRYI~jc@
zRuU_FY$_kL6!Kr8$qwIWUyf~X{RUUxnd3)0fwq~+PB{bNXRx>&0jQ#fF`g`wTBd%n
zXeYy41R?ij+n$qB$x?;tWXeQW2WN4=*m>sy){UV5d_y-W(IRFg(nuBe{L41XEUcFB
zvcakk&S%n)b)uE)RY3cH<((2Xn8#RS`T$%6%cnTyuDW=8e#E>zKW?Vh|MT0^1y`n3
zI(1QAb=z9$O^wRisGowpd7s)fE}2H~THVxXRK$l9!ghD%yNJPR_va4dncU+CalOh=
zuKZud0lh)5PLo${U0q^*SW0Q+XUy_ROiNjz3fc6}!d?c~-1>Bnr>;+nRR6~6@dv36
zZT55Oyf$d_N;6Rs&8@>3?&z93?A$ti?<0uwSoF#_$L?snZc0Xvn=y0WZPGt8>(s#Y
z93(4aOX8$TNB~Rg=>G?$3{G_`&^`KR;uV>!X3S!w!c6zes)5H<e6SI07H#?U{j0Ig
zA;k|M6$a>WU|+g{VAg4c4aI#+%|ocr0G>eV&|c}l)1~*z;8`}Mech46(9#1xC2dnd
zD5!gnp1cA<{Y`@f@8aQ87m3M0B3+FB@hkEL;L3b?9UMlUzE|UdbGzp%qEv?~?hvaH
zReg*zWPPlgTV1OpmE_P#^UQy8a!E2*hnX<4&#t?8Gh6J^9;i>e{NB{pTumRvXEz~2
z%QsUh94_=PtMIoAt8jWlKGmV;*OyKuJbed`l9dEU;>8`W4{gFfBPQcNcfIgh(ER?o
zZvX1k=cfHaw`)B<OPY6UZyORrZYZ`r<aRC;e*_FEcrr_Ae4I8TNg-n{q5_w*zup2n
zk%sYN+@>c$FFT{u#EPY17?J-vXKER9dG^Ti2AxZurIyI&)__cLX&oNVRZT#jQ;I)X
zcO2VfD*-!$ejijAXp(4d*$>nM6UIj4`YANpClg+Aprok$k|*U$`ptcSZ<-83yrx5D
z2C?503@_5~X-pbEebhJm&+xD)1|D`)ur0&D!wML9IGl38SHmZCs1lvoW*_-8C8~SD
z^Ijgqwq~1Fd9uyp58t-}lb>~*!^9EkmwW=onmcuaJm|15xMAZNUWqt16=%;cf`dUQ
z_YtYDV`|!8j+G7d7B9=Xk#hJXOk@ZSm-x#q@m~x*M4CZ%NKx09xd)=<+%a!6PN~pr
zbapegqu<y+A#OrPns@+xy9>m^lGz9Ae-2I8jm3xzE4kZ}CN}0jMZ>^E!g<)T4^cix
zaf<90VBC+sn0O?iY3;cCg!~HO`f}!X;1zz+t8&2U%VI@%a&Dy)Ucr>~LnGAXl)UH2
zneZ$022Q<Q92ezwA9SOFdh&d<EvonEqZ@vH#OKgKmA$qST4EszRZ%I*g95rR!@lZs
z#ut6PK1DDI(pP99Bm0lf^8m)rnRvcM#6ExGZ(bwcJ9>$sg*}EEB4osT{oZ^(MGbh;
zvafx0r<Q98`k^Lnn_WjsN87Dnf5^910H&_9k0gi|loZ8Ru;7wYM31ZGhQHj7eEK1W
zQV#j&k)(HAf7IvGU|cHYosag4Z%5*S0yXO^yr+4IYmkI}czazxpz6Unyk0SRq2`0D
ztvIMx@hPRqyefsUvTsl@24u;zX=qPfA?#p%=EtfC_!Jk^ru5w?uyrN|98>1}?Ea4P
zvpuY?>}Z<SppbU*PEpmjxSH<ZzRSLt>~$YpHRs+V`SowFFlMM}PRE2>8tkV!p2yiT
zTkO5w)^zG}LSTkC&ph4hUxUO)hNZF`E>0vl=0uX;DUAKc8mOQGrUv?<;m_qwwR?qh
z0D(YyoPXjRzO1LA&y_4^z1k-+V&BB=>-kPOU6rWBxv{+a&f3zyd^hK~?ARmM4XGYC
zz%!=YN@+sy4)I&I8<^krQf&2jjmHf{EgJ0P-^@NR)2<7-+LU&O>7k^76Ei?v;b(v7
z+Z^ZQ3C!+3K~XxnQp2&f^rI@TE6Ic)?hIb*_SBZzxG|W37{-b{DnxPQq#1S&Y#L>_
z^@DMP$uuy)tNwH3ofmIgLT3^rJ95x3aIqW>3_K8@ZS%kZT0wnX9(o8R?J_=SrhtpM
zA;ILislZvPaSLE7I0hR>+J_O<7_aa##w&aRyut>B1xeip5n#Vv>M0zI$(;7VZZ)Pu
zPtcF&iGG8(cBluBwaf)7?23nB*KQT14gH;6OJmr_R;21S3?thz$}iw>a^(m<kZmTm
zJ)Kdgdq>0c{G6h6)~PIj@8QQ}`*9iBRkqI`H?Urp3^jp(2j9J(H6t9B#Cl$(uOTt=
z=oO-a-DMxP`FN}=?>UAzzP~J#562?;_(RLi9M6j{|1Iz1*^5BA=-{89w{ewejt$Dy
zCUG5pszTRMDuZ7rqWk5@=BV0Mg{g{!B@W%fPx5YyJ^-5Q<e-Deecv}b39Vzj(Likc
zLL0l-;o$pzMa#zgpJ<rJ<Y>O2*YPbSINsv}eDT+s-$4%O190J!bpssb-H4WD0<d$A
z>Q2!TMgGYaPiENF?>=A-kB+4C1WeN*>p;JS7S3Y|y?ye=D-MvwVQSHnPR&KPmu=8}
z;w#Q;5Lle<ec0FA(lp+Sn*81ri}~P#XNuI4s&m0Uvmdm!!fR%a&xU~hV=jX6@<jj;
z!~_6Ab}duhzpkzSQA(=hz~+TNC&7xkA*3{1#L({Yr&z#=P()&XB4xUfuiL%9T|VrC
zekv<+^ykb#`08E0nTVT2GJ^c))QWA2DDZNb6?>m&bM!ID1$o^S=jv#3AU1Qdcdhvq
z8A89poxOVzX4jXn^EcbK4>e;QSm1oB(TUoXhc~W?6UE{{w0@YZ4Z|UiRT9s8{c_29
zbPL|FsF{idsoV<rnP0Dgo_;WV0+3DVk@(KyX0T(5m1Nf$zz`M@8n_oDxpdiP%Ex8R
zA2#ecjP}8^_6OKYmHJGQKcSqLufkLm?MA^S4Xn*KaB+TqDgzIN>FD@uFs>LBURj<O
z==*UR*E;+!u()$%++VxZd<{Xm;dK_<K=y312PyzFjwa`t0~O6r&aHx$hrUn3CJuCe
zU3^p3`{0|Z=IZ|t{@XYG7V;U~7S0aEP9Z7VTSiY@Bu%H<6;!9yycc*sSf>=d_;S^T
zYjk2$FZZ0sWT#1G8~b`|cl&ei>@QS^8|K`J^=peBy5~c4V#k|1lp9W4-A2j+wOtBL
zb#a94Hx171nq^NJc_i{yI*RgaJ#INTe%2rU5Dn{ncx`0r$(bk>F2TD=G3$0+=Gfp)
z`3Kb~)!)=ae$_b9KDs#^eHKaZlNjCcgJ|n8PrZi6=-5x7KS)a3N|gl;9vBKYg->qq
z&(B@}$Fb<l0SsMH5@`X@hhTS4bFetj_v7A0KiYdZ`ehh1t={tPL97m#danO-Zbc*+
z2*e_NP3xu^n$~F5^$K7>2%z=gSlSzGqy{q5uk~Y-Rt=9KCoA+kqTOHuwkCq^e|B!Q
zvfXiUB>#UdD)Y-&R0R!cWhRY|rdl$owza)M5oVkYjI*kZoYxyYC)!k6G9K86v)-n@
z#(zKYtG#PUq&V-n<^f#raM@-njc>2>-b_eu6Y2X?zA&;Bt&ci}DzK?k;V!SFyt=ra
zBONKR>-c3ADRJnxD6BU@?4eM)=}psN^#-Hh*Rbad<me=LVT8!wZ?7^`y=9SM>oc`C
z38LpP|1pOQBQK^)aSjKfBl0>rR-v%BT9!w9x)5;!*xMj38Te+6$tzD(r<)egYL;CW
zbX9Y95N=2=hF(3Q`=m-V>Kx^iABj#Lq2y^t#wLL=XjNCHednk##hYnh8bbn;r9S!T
z1N*WV+20T4&$CRuEAXA0z8j_rhH~vCAR*V#eL65u<mEL1D_?e5+W|{&xBqm3v)ccz
z@uHqdfP)bYUZFkY_#eOc{|di2*gEW?<MJClZ9@8$$Et^myIWlIjz(VXa}(FNpDZt>
z-(ic5ts<+}+KJgwcdX!aiV@y9YNraHfxec)DdXOl_ARPbu)9Zk=sVFetn~Ew#^psD
zvd>iH8%K7N%=jtPI9?yHjnH^;V~wS-`D*8CZcd<y?wxa(41$Kgk-5{v>W<fqq`~oL
z330EusNcoSFBLrRdrYABj0v8kTrbfJ<BUDxwU1#Pg)h!;@4ug;2x(%ypMTddS)n8b
znV3=#F#_saU1AYw&-X+zFmPnH@LB3FzF15O##ucN1d!41cw*4C<(y8xYtht-$=Z-K
z3X{buPoyIDk#qi5z4SlMLXHWK{rS6+6knxu?jI5Q79G+{8v#Cf-8k2gxvVjhV{NA&
zK5+CT2$#Qg_+Y#zqvglRA4Q-fly+bL9<PuDld?v0o(jr;c&DK56!c@_BvzfKmCIr*
z-IqO~j1?Juo;AkwexPu503Lnw5rXvrgdOEV17B-JTLcYWgb!B=%&o|`k*4`~`k3NY
z5i_j=`Aef8W5jIwB7KaCop&k9?1_^4@{BI6=4R>NYc$@<d8wm7<`!+lW8Ey&EaR^`
z$l$BrSW=-MAM1~l>_o4luTN@9u>Hg(WqHOXC7Y0}t#gH(l!0gfuW5AOFG4^hSOD?z
zZh@l5t<#d;Ao2G^FW;=t|G;qPm`eSZMTf253{qG{F-gQt@_65=Z@#(Tl=XuFYR_%8
zm1|SOzWhhE@r1F2s}H_Y)S*jeW!6pKxaoT~zoMjQc?aF4mo9MT<FY*T_A)3ltQ)J|
z7H2>6fq*x1-GGGEFCeM8Q%Uq8FKDIwg&x<FY#YURBNL_`Spk4xaQlQGGE4Aq9!M30
z=JrtVJlfWU{bRju6gkdh5GWQAm_35h=Fj@Tfp1MX{L5K*I||c~Qfq1P|69hpz=j)`
zk44Mf+rRM8xb*4LMlhoXe^&gL7X5Y&F&_5Rn{1W*WOtbfvkiM95&JuphXjebbw<>5
z)*DZL@c)MSERO+3Ile9@U^qMM?y%m`r;w>9<wJLbdR%JtViko)h9k3RED24zA4XM<
zu-qZEQ~UveO@|~}&av*36qgsCQr2^%+cAZExFjJ$ikL#|7!z{XXdBevLJa8*@A!L-
z#r#TiQk9{OMO;OR-+)X;9lP99HD#~UL7fWqW`ESZb7tU+Rpt~0Pyn?0{eB5dWs=av
zloB^Fl>B^_ly227j!w2_=Yq_V&cKtxR$2IB6CGpL1C}-d=_a6%@8S*t308eE{QdYW
zu2r1*mc)B)B?Z*FOd;D)V9Sg0we4j=Ad)`Y@k2xBkLEMdYYquH_6MIzvT+Fgejs)6
zZVT|S>Obr1F4UPg_7QZ@5)`gb&;FQ`#04;9q{Ml14=Mn^X{KnE3e&YRlmT3ak<86Q
z{J)1EPCQ%oP?axRw6VDq<i(lqj-|`45U2*pQNGVWrV$0&J!o2P4`rA++LdiJ_&xQ|
zD<<HK9PI%Mdis^`^3zkP>1U<h5lq~6KP6YI*vzOj<Ek1GxMKOf-!faQBRh*+G{k};
znYF3B8n>wvpHQfRxqszCHx&Z2&yFOgI9U1_?fM*+5(x|p*|vk6&|5Ssq`~c80RR6O
zd&{sW-*)X=DM7lVTLh#_ngNlJ4iV`V>68)}Lb^d31ZkvG6maO4E<r-N8>yMO-fR5d
zYd!b#uC?9o`^63C1DjsgoX2^@zW<IjIfr+2I3~y&4Y@}1=im5PZ}i|zdtfyYR;Hl`
zkq1T`{+1wp?{#bLdQN3d)!<F^jKm-pPf;RdjgTeRjS>FBI8T}B!xZ{^VDiD0H`zZx
z%BQypwG{Vh<-ZY$hk{%WhEL^vN2bC<pIc=6;a*;#@k2^&FsAS5L7OB@V&jr=0J;gI
zuBVpF>W7J%Ui2D(-t;9gFvdqX;TlfC@@d!eY~We9H;Nr8f1S50|G|>SCu?>U%1QB}
z_HLR(_>YR!4c!L{#mHvwAEmi;Hs8}scUC&CRuZ~Ik<+Ucr!}i0`@QX-xm~#V5H!G7
zUhja;D*lE~Lsu^_rg9pPzv9d>WP2DlBWvgE#N7uNCF>H;)26@O7tAqvw#vNd@wA`A
z=P@`wSq4cR@%hC3*2gKLYo7)nYPz95J;n(bwqoq2r=tQDBb;kXIZx#i#qqh!ud6>!
z0xbK-(_2a)rf~oGEjlvvTqe{lf=nLdh5R630c`Qgt$vljH6-;Xehoie(MB-&2LoAp
zKxPBqXlb_liEVEnp}_c162wXoK#QaUHR5p?oDzQc@1u}y(g3)Y^vnT0oOh&Lkz0G>
z67(+jn(lIjZA0(lXf~U}4c~WBY))qY>b;NSRNnpdk8r#@a(yAb5igK?&AXn^z{1CA
zJ_XIxWYXxOKAgO!;CCpXwTNrUqd6D{K;oi4z!y|}mF!kH_3iqosW<m)HI%aUpV~V-
z2#iDoA#vAa|8uHx_wM1d75Q0%rE=CeSN_yc``@}>oBcWxHH9kPe5$G37tk20A6B&T
zcG^*Wvv)aY(^Apl#%1ikt;IoZ$<er9=KvPSxh<}{Yt#Dft!Q*gxJJ#%@+^th`E*oI
zG-DHWjJ983Z1IRklm7tGnt`cVIrzTyT+z(Bhz8O5@$cq4rp=N&AP!X>gN)hyu`P2m
zzC(A_y~!}vUV*xj{JY~}m<BWLXqAZPsZlj8y4TetmJ5Azz@rvoBL4BCzzfrpKxzAT
z?8vOiE!dS0Qc*mhSu#0W`Az}Uh99MM*5wM=%8~F(!6O6eLFZpzHP}`L8{uATZN(YC
z#KIcN6Jxy|Rm4G6hvBo5-xon$M>e+Ki*K&%(gScW=-=bOLGN|-7{TRg$^sNIDXY{J
zO`HA9OQjro22{`fSKz-Ayu66l#rdK0|J6bI{&c0s83>=&$slhi<znNh;2zp<Pq7Yr
zZB)*>Z+=4cQ&M4Q$E5WoeOsV`?#t?IW!ku}vRL*gu-w*ncU7ul>I)^=v1UBa!jhz@
z<H4?uI8ThVoS*Bp`OtEd68iM1x<z><pA)A)dx`kB)V~m8f;JQY%0rd1GV+Iyy5=G~
z@R<3$<kb164R;`?ADu{1`d(AY)_iBK1neuCGmqdH&<|47#KIKC4R{p5C37?nUiTVa
zjh2!FQd3v;7+q6+Uu=?Ka<foQOLVXZD!u^*u2Ytsf9Uod)eT!A$q_26PcoKIt|nVZ
zS)M<UdOYkfw?ySbSOo1~9tT40KgHM#vpu|hh=Jyw3f`eX-!gd@$n?OCYPtqXk%Q|J
z3SALz_$$^ESt*A}25(bOs=d^IRH0PQ_M0$}f(W^7bDsTG0ZnjJ@tybb=pU7yMI|w^
zKL671-AC#7$dotT|Mo`zuV7D&##`H!rur!45;N^adVwmug`dgz+GyCx^8>G_hDO+<
zL(1T1G)x&gHarb!OrLk%a1x>Cgk5Tmr{rPtJFp-(w6!U(Sn;#gope6pne&-i&O-YI
zw`h1?sY!11fWQ|#hTfqe?+^8ihgK0xF{+&oAJ5-V@OwruZfY^F?wFq5qx~;^^addr
zaaE`=CWGr?zNZ>8C+L)MB<&IK2+!8ix8~6I+ADkqz>V|RUl7~GjPoF~19i65QsDNq
z0F&NCu;T<$pnU0YkT*Nw{}=#$GuN25rfE6F?8`V)yf$IoA!m5^;sK?FQ#kN{gwG&9
z2c^Q@wRJl^4=1xa9)^}=j-KcuY`AMGuc1IZdmWN#*Fj%~VRr1lSO9?Xo>v^y-4u8u
zr|0xHc7({>IU`pWA^pD|>_2~5(IiR-W69_X7^kyJt;_0)majogm)_GVT!HN&rq%P4
z<9cHy?=gD1z-*IE+BCDJLH$67y>jP_9l&Dq?{)!~hjd1Ofhw(qiESL^LteFmVRH^2
zygH$|yesq!?AK_<u8AkrN%{#;vsQupF_-pdsTx2pY*zGw01O{;<EsSM^`N>R&H*G1
zzK)>%%;d(rXl8JwM(vt-=vGJ)QZ~&A^iBO2t~q#q>|uK<i%Lj+KIm&ga+!zo((hpN
zbU-+?T-DE=2h>O(?L4w)SNH;|Ch5tGS?Iu+B8Y@<?Gk=osyZf76xTkK6sQ|~y=FRH
zd6G7IhxrB;$u<7)VcXiWg~tWKxyCv4<j3L-xG077YX#@km9a8tP$3yaX;10C*CAbV
z+Ah=xBk5Z}01{Y!u8CXt5TJpNn2QRfc+Bwi`K#jnKV}oKRBFe^eltrc=E#`bE6;U4
zUR#`surg07xW`4zE<#<?{E%I+&d`xSOsLz1%Ac$6Pqyf~8oqFU!z4CJy%=y}w<OW$
z&4xxyMS(GvOrT$~O2c-1X|a)Qv6Y|y-AAtY!NbIhenm6N@!XVtXxB-c!+W^b|Dv4E
zkVr8?M+%?X^o^n|dX$&w!lPdeH^S>Ru$CquF6k>k?hq(f-G0WmJFN>>af%V`DPt!^
za8{JDIITb=!pYs8P_ji7rx`mNQUYPctR1j8ceW(BQx}5D7X!a#9u=I#f74fPSohTh
zp}r+&gvU+6tRGJSA`k$lF7>0z^i4Q#FDQR3f(GsL4W|6cTqWB|B3*-_2+f}P(0t&U
z8~4)==4s+wXpHzcYQu5u3%#p7c;CQf?&qiULV(nJWF(SpI-;!W1+qFGx(LZ%srriU
z%0;CQyb&eZ9Ds*}Y?tcw{A3Z%{FS~2n}LNye0ttl$uE0S{ZFuY^)IyoN^(XJnsiP3
zKkwDBKriNY^q0HMZwA+LoBOx$KQ?lWG>V5B*iF~j8)APLkl!$5`c-VJt4D{&9eaR(
z{kMPEHwymWh~ffX6~46$O}zVvpL?)n<vwouWajTs$&e<O1onA>w3Elu_C73oLUGRl
z&MPR?Q&<-9r!*Vepegam2rIyx6v%tO!EZ{-XFc)R6!{eKUrl!II?iT&aHhq}=;N24
zKC@RgioLX`QqiMJ>FfG*vFw;VQN@ox_{gJs{^pyeH8TC>T!!x&sY*QdEz=FO{^V$N
zx;1`@H5L?<si><UGnIZf`HrzzgXDb?u$$ms6^Db4uVTkh?I$XPC>aeWEaA7L6jC@4
zN*e`0k+yaoXDb|k<m(mo#o=s2<vb;XiGE>g6#h<P8M4&+@iLyhpXjepyZm~cojWO!
zT-KH(G;72D9oSHaf6jKGj2e!ic2$F@=hui3+GxIl6Pv?51}v_BNF?Sc;9^&vrL_G2
zD74IEWF&}%w(uUsVLhf8cb3}t_1k81_NE1TAXX`RkDitz463W%=<vy7t)fEH+Hpmz
zKq<GW*8)s2e#PoYK2imh-m7R)zpG3SqN-v~4P>yS!gJ3C@dU>7B)cWGU!i2)-I(4l
zViSK_5qWYRu=kx4Cfp%&nOjfiBaxM;*>UKLgbR}-#KZ_F8I|mO-t!yWd^J(T3M5>T
z_y~p>nBNsCeZmEJ2%gx;EHyBYOUEJqiwu3AdD*#x!K2JAxHh6Ti4w6_Yu5?tsc80g
zJZG;2y`+8+9uuo$kSMskmsYuTS<T)y&8xiy572Lw7XBm?(yxxY-&q%#0kEqM&LQdC
zp9>uI_EUl>P&b~c<ZsKtMgNPV%CmTyUKnU3eZu|P@4_j@g3>(YnwQ<^F@cvvVb*TT
z6>=QB!Z}`tSNBei!$JlZX5lbEsSn0vuc^8&i1E49mhsC;gv|+)fO6=y$*{h#a><Fb
zP^7ZOQf*z7-cRRTs8c@hrxGf%10p4M^SuxMo{}Pzny^TR@96)fCTw~ygZS3?priR6
zh8(w&8U-x7YtM#v#f=+J<MHH!-i9J+ir<vGAk^6@xpvpu=#>tQQJ#GY2qOWLn?XL-
z^dLxlEIT~5ByT2@RSQN1)j56*S1ZfsUvQz+33~-rY!rgav8Yi=0?`y!8y3B-_PsKv
z36V`kTaxrR8GYxs3LmxOFy{YsJf|7#Jfq-BD8JV|2}@v1$`U1}gl(Hr5#9YLM(lOE
z{tO@=I^pr`@n4^&wl*@gHVD{guE?Fc7xFcCmKgZqaWR*)@Y(BgKvg_+Ke>2qH_N$$
zuNaTtuYVhwhq?DLb||*#r7(UYG4OTI_WSYnv%J~9BG*_--}Q{aX*vcFnC><yn%9B8
zPj3eI0sX|7hYR#V)gO~^F~59*DkCq8fK(9s^6`<BRCQBT1ac7ob27Hz9W4;nv@i*q
z_(Hd~TrA<IrK<yZ_k!El*s{Xcv4B@4?hID64G9P?Lh<*+_ZSLov&cuRP(W#6Yo`A^
z6ln@<Z#iQyd)V_ts*$!oSMEtTH*ati4|}Z<l`LGml1%Sw;{xHHrEOxbS9R>*9erz`
zeM7{mwyn21P<{obYhJUI@J?L`#2xo4cn+HTc=d0Mzzliu2ARx?6<*(}Kg$}>w^6BJ
za}9D?(D_$wu3D$|6tQa3ClDkyXgZlR+L`Lz+b{7iVs*z~Vs%i4<2qy?_pXly5So0{
z)?4PvhDKv<oYjfn3bM7qt|quf1jPC0Lo4Ml!g#VZ_5)B2d*!|(JF`e+QEyfkYO1Va
zT<J<CJu-R1d$x=KyTlhlx)XWj#B8IsH^075^7TygCk;0qx(S^fIr;&w-dD{y#jN9C
zonnG`Sa>JjG_)lXl}>5zAb!R-`}&PjWdFe|{ZVn00G%0u8^V!+%aA(cC*l&(JpyFb
zGO9)(RCf4BU!{qXpM263$YuGz8~2iYD3xbc3QZ8#gtduW=4{1c`Ru28-zGICPpRX4
zHZ`UZ2L+<0pSyZ-N9B)5MLvQC(vmdjgNI&A{;KIaE0}u%cxO(^Zw|gWR;(u(&3Q`G
z*cfp7#3HL+oa^vE84Oh3Ovo>I$EowGu^O5F_SxSCKgjwEz`9U=nw24EMegG&c8wR#
zBX03Hg+en+2+|Y!%sP(bResT@)`^VZ9_UHVU~>eu9C7jgfnD>_d}QvPJ+(n2e;a!_
zYgOvYbD;3NO@VQ&`fBybwbu{3K0mWKCI(s!04#{toWPlhurywU<c2jr?X`0YIMKKe
zs-^M-SB;@M{a5=@h59shCdN1Qu^${Mx!R}@L*x0KpcdUk>W7!l^&Z4<7P5X13mRZT
zqp`1qQh|feYSxNPziaR7<Tw3Z!*GgLuF!kmO?3SvvVi}lRu{P@KvZ7uIHyk9{5IBa
z{Xl;iF4=os?cvFCD(8=^fI2{tE!+syd5{<V59fi3<@l~TMx)Ab4g1TZAyXv&$Q_es
z?hjT6yL33aHUtl7b>4=7glfZ(3txw{M^oqV^7)-u34f5C7gC<gjJmBd3o`=G8LO~s
z+nUl9=lNndd>w4t8StCm(KHweZ@6E7et<sBCAg#cRGg&{Ec)KX>Udr;2ei8d!`@)M
zk(XXEygGAg{mK}BMGe2UM<hBGr%<=|aF?-l|2;pLqvMfz<GEI`NKBUG$xA_`Lq$2;
zsiIFI9Hi#iz<;WO*0W60O9*n<Sc8;}9>zJ1ckNk-ze}29TmWXLAD}A@;%ZdAO_^mx
zdf}a98Z<G`_)*^SAc>=+?QDBjhD}pdq8)Eg<XM7JsrZBXtWuC*{csTOwLYiYVOOzu
z<<oKoy8d^;g%A?skOP%JdG%mAR{3Y|B6mqlc*B1#Hc<|Zl(Mk&x)8f+6X2IG@)&^M
zn!}(DPf8yBXex(FIvr{JRrYW1YPtPG@a3XGWyDzlKDYli{~o5lOD|;!vacV6{NBLG
z@;gqeGOAcH$OyRjywQ+aOq;40P?>M5RVjtH^tcjFrBmq?lLj&-7FlV=LOriWG&l!t
z0>GsR1-E8fI5>`3uwk!+`hSWNXW7)z;7gu79~US;w!tHwz*W4Tl%9`cI~^BARvSg(
z<jvAp^mHfpmO*MyNWJaq;As5K>pCSq^=7gAo|d~^%O7Al$XK&Cux)1!x!YJI7n3XN
zb-HIaV${FG9ZV;Z?z<({6lX=~5st&G>}3RYI~~C;5sH_c{%Ranfc&|I0uzlUu3&n`
z*5*Z)KT6v+C7@}chi+Dt3^00J&4VjrQWy}<>SP38VxVmjer<hPJN{?9f3S>GuX#m3
zUb&x&#(gEcK&<I+RejwyA~4OQIPt$y#se|=j$p8-`!0?5EwdHPcI4)s*_#`(FUHlt
zc@<Q%VL4Y8BqUvbUl@m|dCx8Cfmz>M_|Q$C2i$(YE&=R1f-4U(`2MlFhN5`Np(H%j
zbg0HEHmYdB2r6RKZ0T*-km8M7+t^9(33bEjHJlo4_Dzh9ANds96E16xgGV4|@W<?d
zRK#)LW|%@_dcP1&KuRcX^jYUEO5L*ZoGloOH<7eAUxk&##`~B^<AC{-HkSyGj@#D}
z4t(Hcv6Dj-{yY8ni}O;6$#TwAwpeg(SfZCjx1%H(`AdnIb!B4P4IjGIBtK(#($^_s
zySzOrz900dF-n5NdKXmL0U#DSblixy{M-Dwki07<NHB`TYNl3xPDWn877Y2M8#0I<
z2dWNxr@%O0S=L~P#+~E0xW{xu5uuq9Sw|=V-YN<!EOb9~isP5Vj^3VKfdY>oC~q+<
zEom=<c7oTJa%kbNF|~8x=A`<aNo<Zg=QAKF99&Bx_UJMOssTO9(n)>|ib1L$fcv4a
zZZPjXS89m-vPRz#gA|ev^{{32r|C5XHM&p=2vp>W*d*oF^rn1k-t>i5H6&skopFVr
zJL6@Ceyf1!7SrE=xTLo=?MKt9r#3i$Y5LuK1|sS8==A~|nRr2Xx-8I#Y62#B{Zp)m
z|8K=gItSVs$k@m3bbeT?RLrv0;1Y%@RFRsJk%DV8d{zyCSP~zQxjrz%;ciruW}TP)
z=(y*gkKcG93mJMp4`y9pa~8x)vxzRzPg~e@!4g%jGS%PzdcjnMmybDwYs>26NG7zw
z%!uwj{eYQgS-WvJTA(rgv2V!X4nsN-1wzVq{wqyT+()LV&aYNB-`HECpmY-K0uf5Z
zKh>_1Ett0y&b?TLji(1lrYWpxPvj0!3JZoB8q2?IufO2!s#Hm>JW5x}jb&RBk!Lw^
z0%}Mu@05;jAnVK5%$;TcIJp3ab{DI9`@P;6H}QTPp6GiK{vO~q<APDxdt=LH)J*s+
z6L78re=BcTd?XyTivk2|zwj8*9cytuMT9q|egZ@ZVfJvFD)K8N1o*Wz^?nzWyE6}g
z6M0|{(gvnei{L=@PIZVIvf$Mlg08}0s7N9d?e+e9R#&nx?vy|QS@eR*=GPwn2&O%T
zZY<e3$6y%efjhDd!<gIQKWszzi~LE9^OoaT$HSEjSnJsCz^fMcg30jUSspPF@Phn#
zZaGfj<Xn**S8rXHGFI+xkdKj&1yfqC702^vb-U7OD%1OK3F-kgJRNk8z!b&T)DCif
z`2}}FrAcTbMA$=tb^<NwfuQ(ifpp3!662J~=@<49E%c8|po|zq4fOY4<-R>sVC?$P
zE)s_Jp;QE(A<VqA{r&?7vO?~fg{d4WhQTDaZX^f27}(t(q+1Qr*d*B^qlnS)6n4##
z13{_NtsuIU-@I7lxo7E(Ms6&F^Hr&phwF}z>w{dF7ua%v4G|$!<OSnTZ0Z-W2~2@V
zS&BRWRI$}n@qD~nr{-^)M^9fth$nVmQ|L2ozRDO>1x&gQX$5!fNY?&b6K;(@mYv}L
zc>TqPvXnSI{02D=`p*tjM7Qh$lsm1E(YB;1gOtT*@e?BNm0g!Wz&6ZB_{iA(IuY_+
zs&9*}>-0>p2Rz^|5&osWsQFmf+splFSUuQJHUD|76Q+)7XWOO;Z?M)hW*|Z=2J3%k
ze3=F#+^EFOg$Ok=W$aHU{P&URHS;kJ%xMEn3|dyg($Ua9yF`KOV|2WB_yGJPDYw2{
z(-5P=t&;A6ji!(A*kB{ME{`}=;T>0Tv0>Y}bL%?_$vq`SOZ*u8VwVl;R|VF0F?2c(
z#ZCLPaw03j0fSjx?vh%8Npm?Z2-4c7JmjIP;tllN0USvdH@tbZ6b!AbClo@XYanHa
zoH#Bq5+-3L75a@HtdgJ*6R%hNf-c2A#W^3~KZo+46?EFM1?xryyX9rQQP|b4ei9s=
zEw4)QD%&g*^nBc7d@5$!>_3FEk@gkX#6WE8BjMz+1yHqy@V`u6lqBCr5~KPVKmv8)
zq4YLg8$~g{3;Kgiqim0f5V&}Mz%7KkyP0~1iiB1}aEgl7_>&Tl7t-16&ST;}ro}y&
zwc+Z<ZNeo^_^mC{p{TyYG98qHoQ?Xct9@T_iUEhJqc@;R|IvYH_3Qo{^oD7?8PhuC
zZ+(*=i33^G@!bV6?k`T>I)Uyi58)Hlzjx99>fko(#U6{433F<*G|X(ze4omCj*X6=
zI}|OWV*f*}+n!?B!Cs9$TKq=0mMhrdLu?+X!IwU_W~)2reNX)|p5C#+9*=SQY1@ND
zX>_Q#Kjqf9&bE<rof|u<Kl|bji#g19AN;_&A7vMylWGrPQv=&Fkd_<MuAM_es$%xL
z1#;HEP2np~nk6H&)smvrXHRT$@~yQ3xa?Ek?5rl{+u!h_y8C$eI<bREo(b>9$|qX#
zA<7AJ<FMZ1=dsS4K+<DNeS`S2VZ}RD!7t+z2&GBQLX+evc9(>)B#2$I3zxEY-~1s%
z4Dm-tq#^5j4ViK}`Wgp~XQ37KQQYHL->$sQP=LbtPtz^TpaH<{s0sN~Z@kWzen<fz
zX$>2a0-X<kx#A9NH+=$0*IuJnmsk6{Jp2r&VCS7(f2j#s%fp$Y^{taS`rAW<%5Uj;
z@O#gi?cR3;3FWu|x^@`ib!Pi=w}Gr)*w@iAj&tNVjinU{Mh(7LpN@1tOD+iA0M`Ll
zhtolQXCX?T-9}gsp5y=-gX1v5YXr+41Dt1a^Xmk&38!HvSkD@(W&8&_KKL^I+Fh|s
zHoHF;GaD?@&Pcau#}P7CGxJ)hLgF4MwC(Q5Pdq@W_KV~=iygYcj56M)4D=-Ker9ei
zSKpK8WUVN_UpbvR`?0u+kK6`l`jr_FKMLL2tHytO9)tTpS{HwmIW2ZOR&<bzJa7oa
zi`N*=y_#X>n&T7v*-h4I+M@g7thv4q^a!CX*=PHLJZ@$pq>;;`)Itgte8oT*0o1}&
zsKzSxSGA3h*XhO!$MQ;MRvj!9^BsB^hp}-iJea*>`a)zYp?~wf6t5=0{Tz&G3de^a
z!*`J#s<yu)qoVNeARk)RJhKaN_jw-D+(sW$u+r)e-vb%uBVPk@722mw&dCs*?f~EI
z!sb<dKi&84kDE!u&`+!P34d|xl6M&2RO3*s;bphu{VF0e&VDRATdHTtB$y&X1$@hK
z|7t%Bwx~Ck6T;(;7~0(JCbAWave@$C+aG|bq8Acc`+<dSKv-M2+4(u(640k5a056Y
zmRrC%3&X4o*M+M4oAd5V(^!mT2d%q8ama&7Sf|dKIZd?Ud&3wJ^k3!W^++waNbW@%
zn1QIKbC3Vk`>&$({;SW^3IAPo>BFK4bLy!>N2<N-32UCT8nN#sEo!(rpB#VGwQj(7
zar&AYlLI~DFm!0_)gFT<g$lreY=gN>yJ7|E_6joDS2Vi)z!Xmk?nawIl9$Xj#J&?8
zOL|j?Lm?yYo@x`D$gE7{s-d-cRxtXix%ez?umm87=b9imnrDo|j4hoymOmI3VefR4
zA?A&)(i$sDHw=pFc1ABL-4QQiSBqV4?y<{|dz-W(NACPMzabv^D)cNmz}vL9Bq&}r
z4$1l?|8tE143{#{GQ+nN)qdNP^1RCD*))6xxczcA*m;1^5w~A);lJ#PgJ_7%#g1nR
z-*^$A+@#ET?o~DU;;AzrnNfC9E&>UA2Vbx<X~RVnQ2n95Sk}Hd0wL#&{bG6r4x6wk
zQM{g^fXDtGmsb+029=FvNYglXr(0>xC4Re)mHO{+E1zt=RNh<;v`Np2d`@qNN@4aJ
zHm74|vCAn<6h?jJyLTMIMJEM-r6st;9s>*8h%9jG=z9Txl9em{|Ip6{Q$!(EAK5_O
zZB{`sfKl9dy*q^Bhk-4XL+!R$#k-IBcNks~L7nc8s%o1t2cH&iI6QFYFF}b^)iLHG
zWn5M}8S3s!RoMCdCf88fD&{x2Sr=gY_`Lp4fnGu7`fq{4GUh!qrfkO=V>pWjd*$LJ
zgrDvN>icu+SeSlCREGB^(V&=uBZy@%eLfnp9Mb4x^VTQ|DLxbPp(O-a;f8{RAk&D_
z_cnAUr`O}RO<lx&GFD|-8%j|7cZYa9eZ2mJ3@)-lxfpA8G5DRJ!5_9wWokoFvj&tq
zAqF(&Nl<|XByhVPZHm*zGxOJZNp>Rlg>p0Ad&gd}BKo9(#hI<696kG4N{+?tWsu(o
zlFC;vK<GKRb=od5>L&mPR=0dc%LW8d1_ZtH>b@e!9aGs~y{#?1FtBU!r?k9e878bj
z2CUVu@CftlYNxGT#~r~2`$DU?qsD;`IOcO=z{pLX+pH;Slv(?)i|l_JWttUZyUeDO
zR;1e|mM<j|n%th{5WYGdAB-Q?nXO9+TNKe$h*LD-mtzr1vS8!os>_+E5pIxZvM%{>
z&f8CoY{UIH8!t#S0D{T0`J$h^YLJ_sQxr5|(HbF<%g7I*xPH~~_84y~gqQ7yj;a)2
z$=hP>%J|emq~`kE%@GjF8TaugCD56L!H4Ypx~h~UXsQ(^pI~E0!JVUER|M!Hb8mxf
z=yu)=Qrsg0^Mp%q1}_F<zB_|;*vszvVqkmOc@lyuqjq;GV93c{*!FVbcx5PR0tWUY
z3MQtrLAAC-{55b1@;`5vh?7Z!s9JXRK>?x|ungk3MQCBsjB1s4ML_EI2#^L27Z2Q|
zOECx?lX0FjbL=q?ecpKk3A$&HbWc}$3f|&{2suBc?ziG$?)U<aw(|Yb^n{=3n07Na
z3kn3xT^~KUIm*%5+cqAvtG_5wFdBD#>K;2mThNjG9$S`Ge_eJ5ItPl?P1K_P1LuLy
z5<_}>o2oT$Mx!;Nvq!D!;AC66*Zz!b(?Xx{c?3T$#gR}5-iH*Fm7!2rJGF}emp%90
zgW5Q##LklsxyZYT;981_6}mVang_%0A;)AO$$aPGO1Nhuql$PQi7i`Zv_Nr)-f)x&
zJIl>&hL!aSuhlfpf}#+Dl-yE2;v4k$&BAi&YfT?=DeOncY+nud_@E5Z7s7{eOGPU=
z35jHDjU4(FFSLS^Do@!zAenGj21Re60&8ct3@$sE73d_71c{Gv&{1(OORlvK9`Z+5
z_m*OiDZb<+*(wzykc7)vP&Jpq&^Dy;gYLT!5bW|!aul9MQacJ#by^J+i`I+<feXr_
zlL6J%v}-^Cv@o>)#yxwZvfz2HF?$KFo{j|<PBSOC^W3xd&tM%5fw~OcgxAhppc1=c
z4~@6Nx-VuR!+o~~z`%`S7c}Sw3eI+9p8V5<`@37gOeuqq<lmYNKFoQb+}qCNR|qEE
zUXBruVuHEx(3=1s?x-gARihUcRPzTfmh>xSR4ShARvEo`S>$$Wk-wk}nnR_eTLi-F
zt0vd*2EVC_C#ZJot$(u5d0Cy<E8zw*(bGt`QkYW7HEO;M(Q&*8+j!aenJHBNR4WsH
zd=ApRh1J{O8w`PZaA(DY&fL=y8Q&PKhg8tb$lhjL0H;;~uAincm!VeI(|P0W=-88n
zjEy+?tBqNxXExLM!T9F9I-JDiLOBobw=-o|lMoWbHZLL0O<Utl`oEDMf!%~<l<OT@
zQ8VmF=ys+3`rP>k-XAe=Ha$yunB9VlAVn+#h_#cm@`Y!9tOyXX-jt+Gm;q=bRsKW{
z@_;%-cf0SSdF3Zb;Hu^5O9{X<%Dw7?@K%IA(4H`%a1GcTIr{E5H*t1ki6u-eq9^i|
zb)bJXZ=WGGI?N;=bE3rhFc35l^!vBq=X-^ZKHgAJK7pfu3kXt~Xy?PzXlZ|c_qE+h
zow@f);=QDA<&rKwj1!M`5<iah>^IIVNiaJJKt_sAm)@-|y-kl1b(LnBk-la5BX-$s
zCdk%z_92+R&Lv1GV}2dR^unT1%Z6M=Zm{}N<B@`4H*6Z{FyJ*qc{p0-mp<nEVLh=0
z?qM?Z$YgiXFv4d~kmHZb4}f-=qhVmO1qgv3nqkBQ63>xiw656evF~j0p((UuQ{H6(
zKb9#0BP6Bxz&dO47rc8pmI6%^_MhdXA?s&pA3RFkYl)yZLytd)-G40TZz8tLER`f?
z8~7kGAdU)shUW1AasM9T02m3G4BQE`JUCAIKGIJ7b_rXD1V@ZndCZ1gPhW|0MWL-w
z`8<=rU>u91qP*C9<VJPuy~dQLbiwT}=pGwBeZa|CdmJTNeY>6!u_zk!AWC3eb&wTV
zh+eOs7L<n9VVg+RaQ{Y+{`%#*wK3VyYnP=X(BbTDH%m#RCDaH%cnUP|TxAAK@OzVZ
zv<3tHB^`M>-*b*RC{y7oy^Zjq)@T{A-m%ZnOtuona?(RlHe?|SDbFA0Yl(bvOMme=
z^CPB{zU|zl4WYi3E4lk|#F7xo#$FsMz03zYO;8aV0P5|hF!77>JcAE^Y^S*quI5Vd
zN3rhe(YZ#snp$UXaP`Gtt}N6Lxm*gUfAn)<fjPDWT3zs2$-EeU3q=qv1R!4m_xgS$
z8RwvvO(W|O+>;Oc@%MKTrhMNEkoGClf1r1q`?Ej7dEINulY9IHP;Uq9*z#UwO5q^x
z{P(L0B;KY8a`Ei#o;-Iq521)47Ka{(2>GyyI)*~T+IA>IO<!2=@N_bBBR(6L;Hj_E
zP8~(_-VI8j>fGyy<h|6u8>OfO>z@va-_hteWa@U}Nssv+1Hz<X{giBUAsBgj$QLiC
z%lG+Of_D2g=?TygoU-WMh-Juas?stgy?>-UM&#PttK@JYT|8f-P>4!`?B49K$arqy
z#w!<qKMog2?(^4B-<cHr_7extN7`|n)!-v0ztIFeApW4LhGgO3hTg?;8S6~L-^-oR
z5O;HnI-NMZbVI;w5rW~79P`sn(AzrTh&EU2bcr(6s_|f1h^#sl%0xbMiF5@T98myz
zrhU5w*c#DRR)?9V)qA!_PMyfyA*Yx>kn=UD?FH0@Qsw1jU;}WM(qS2}0Pa;t6ZGLN
zq{7NUNC!~BckvVTt*^nw_U@SF(HtNBdo2Dr3_)?k;0EchGUeImt;XC!L5j@*LB&FU
zf$`@L1lL(Ll(jK*0>vagFVa4)BG8oixLI0WdV4Rf79HdFyR=A-8B3+l-I!I+6vrPX
zk&)B6jLD0ZDg`%>&`~r5vL8n3h!J~NTKiZ+em>E_yIq3b@*2V7<=yX?5InuZP94aE
zrczl$_A!)uoEwgQ@L90tHzu`_29Q4F0fa)2l69(c>GYAF#ZUvb5(^7EqS}a8NGC%;
zNJHJVLr?kT(DE37qqJM`V>dUM?MU}>@uRJtj?V%zrxkZhG@C1r8E~Ee^JTvoArli?
z{}?wT8Ry%yhYL*zVEm_unuq15-^p>G&2KNiUBcAnfcm-LD!I$}Ce>ctzp;LId(|=F
z51Rps`LZ2964E3&o+m~Q_c7A=$yQ7-0)_~N{qF(isb6GMByeWWfTn{{<NakMVTQjL
zRy6}OD*;yC4=f=VNP9h}P>&w8*McGF&%<-{f1*zyG5GZJoH2GM1u5t}qvd=>_LhD(
z$O*-H_;x1V)hM0tX#TZW{G=Qwhb2_EAzOawg-PhL%P!xqEu<mcM>o$I0*G@++qxlZ
zFEjZld$~MM{u`n8UA%lOTw3NF8w~mo<!>)Gu4vh+7NK`OFjk&s@V%U>G7=?hm2bEQ
z*JplHLyeT(Da{a4q26?SU496S0wzZw5S_aD(=QB&$?p*>C8@9k*(8YQc^>N7r&16f
zGr7S}%z37vsjz}gGzBWOlp0oSx*^pKUL8?+&tzoQD1qrog<lo6->xh1`1P9zOjEo&
zpRx~*aJ<dw@SkX(FpPbm{XJZUc9&KzPs&7OVLCU!;`R&6Eo{2@QBfm}rQJL=xiA{m
zpQlgP+p^%baOYX5`>SU}E&NY^;h|#`be&$#Y&g3t{<x{uRZj*1oKeOj1H*6_7xDnO
z^N4oxeVDm;>#Q~O%b?x-RBOFgEx-j`*r>+N4G%&-{TMlPb$JN%_Z<)XJqV<zgP@);
zPH2w0AO3X^0*EOoh*vHk6j$#p1N&Sas2O>zg_LS~|4j9Be6zN)f=?^<`}KXc-Jk$o
zQ<dnepHC0`^8OU$rr!>VC!fE`vbN|e_>JY+3`q2qbXl5E__&_qxDXVED72f%#9gIj
z#aHD7J&NEynhY|z;~{!ii)AgCQeP{X`x#LR31BykxMxr<Dp}(FVDp)%LFTIZpm7-B
zrcc8ENG95%Kdn#1jsl^-XYuY3S}W=U4~RRK)?h7v>oowC9iqIKNHA!Cw0ln)|8_6e
zQy+2Str4M*j5}3b^TfR=dD}7D#alyDu)(X~_+ASL|MmN#6mY%EN=6+94>x#~AlL|b
zb;JCzd^`6m*A=tDUMeJrHJ<8D;Rf&qP(J~VuPQXZ{TXJ^9*bix7H;B=o9(Anybwil
z^!FmmQ)(aTZ~=Pb&`P(=Xa4765Lk=Jb-m-YNC|SQZiVwaK0)m}VdUV4l>@{H<o&OO
zrL!;dQ|n%b*%A}j+6M0%;5py-bfO)VfjJ;ZR!j%+|Krkb_4RxP)eQ5<>6rD3crLC!
zlfy6(*}Lz-)bXh})X>e&N_YKC=bqbRCP;wGsPZ`ds6+WY?X6DzA%KRggt;Ji*ou{M
zie%8TXHUho<AToDZ0wkgXSo!7IlP`$7c%K6&C120cdtF^dMhg0nD4r--bsJCyn8&w
zjCPlyG>N{${0<qkkx@H&BW(pLgCzZ<3)vQY$RX$>z@d8Mu9CT{=y%^h(FJG&IxiZf
zNnc+tBqMD>oW9uKpVN@~8_97W%9%Liz>C631CY$=V575i<xy0Pp<RG2d;yN6MO6&9
zZb-di6LC~_e<ByZfx&8Lc9zs3sT}Oxcy@SkR6c*cW4Zv#Y6P|#M87QMUG2X3e*YG7
zOz`~W;1>lXwZq~cuW}&7OE*{9#t-}5K9mT{&@bQ?A`_Icn+wJybf|>I-?Zp1Dkj~F
z*jD`av}hE&RZ=Q0QdRWgs*F!u{A<!fe?Ql~)2s1lew@dFLhH5zL?Z^4ogQM6%+=YY
zp9h+j^V%Om>?d=#k)!}&ygHkVDP1gR#CeEhXUlwc&9A)MKA^Erg$-TJeL>*%%xSZh
z(y(8geAc?qUWqXmw?7y;?)jvEns~VyY3Rigp+Su?@GK64=S_VKz-N2$0I5}V#VZp(
zW1&UC(ZBa<y|2+Y-;hizUf%j6)gMsQ!&b5cLQxwqWuuX)Fv|9>!0%Qy?k>x9$ggL4
zYA^TBTCDhf<!<o0VwWi>JzCw?z7`}(!Ax!kH8O`ejoc5t!CB`+jpf~3FTvAXC+^(d
zQ$4d#hi}#h_tPtMg7;Nj<u-juKU50d;h*-MLf2tg=cap4gDK{Od;d6dXxHiYf>8FX
zTEGyDkLq1j1ZBha3$TatfA)bTN4vO0z2hL4hy`Fg94MIvWaoPbIrw_ThB=A_VziX+
z+(Mpg0JQ)M)++4*AlART>YN*kibd{<?YQ4J)n%7q{|Zc~bX8nYCK|i>jSOi`>(A5d
zX|aiBxw~F;)U*Z1rEMAeNL>keSG1^+IJtnCbxBBw%u&{E7){&=n>smO2ZHza^3C&*
zt-jnlPbbtBd#G^Xu2^dK9>2DFB`Fjb-%o;^jA1lX8zFS8y^jnh9S4nw&N<l=>KK?8
z<X5C?7~*Bhy9z{fgu$@miHP%c;gmaz<FCgcBPnh75to&~5(H8*@Db{*}?6INQQ
z9>B!wpbq?UG|Dc=G;N;My^7CPZ?pw-&L@K~hH9vR^+f@1KzsUXX&s#I<Q}u(1jIc(
z-Piw5*}?M2KJdDQ8u$BS@Rm0Pen}vT`(4*EB(}7N=8@c4>>~?xz!qb<sqD)IK$a%@
zQ$@D8;`}271Pnf2PXYqI9Exx?cKt?(I5rK_b3dOM39Ms5aPp6&0_{xDY%Oe3svdOO
zwvd#!zwC4t0OB>3KF(eSe)T+&Q&7MLSO%vTpfb*&pDZhwruA%}82+nLK)n=ymg8MK
znA0=}!+%V88O;toP<ECRqq_=imosuDP#l33_2ttXs0Co%6~5BB^}velNu8YLx$Vg3
zztkv=^m;#&gS|Ri*`DCh0h`N-ouxomQH7L{2Ruv-=GC}LHgYI<V{$@y=i!UZn(S4r
zkJF1l1axnG3S8HQ0X!8lK(E0th356LUe!y=i5Z4tW+<C;k}KS>DeCo-ATB}OKHbOa
zTKgC<!gHhtIS69KT^wxZzSK8v;g9;{y7m@Gh224K>Unamapr_!Y7x#mWE%5xOtA<6
zrLiBS1E5w2bxVV6iShPT@4Jd48rLAI^p?WgVH;8KmZ@Wcz?5X_AsI)rB+{0JLYV3c
z0$&L2Gp>=?d~VS-2B%n1BF#F0Ab;RDUPbV24vEPj5Od``kId}}=AE0Kyacx{-$M92
z-5L@qMYaLk#xk*H(-oZ^+Qn3u|2D%dq!dkzd8-8KdkHNRIQ!3+Gh1<|ZEC=X%d0WV
z7>on5XMK`uZt&0o>50j%ct0Ldu~CiJWsePlj?g)KT=H}nqFY4u<6nu(6IoM{y8&)w
zcRxxHj$jYR{uc9MW;`rqd4VIs%#xpIV(t9^E{8Myt%0Gbqy1gf1J@S_kJoDAu0W9B
ziS1i&H@4t*(O!RZ)nS&LT4HF?Zy9hw7=KWqZ0VaZ8@E_Va@o5R5tv|$QC?i28K7Qk
z(@<{{m#BjusDftzUdTd090S**l^A?kySh%)`kaSu=oL~s09-bpZ!Lm0j(%@w`Cu<M
zdGrP-TpOBXsgbycmp=c;nc@%d*t2oNnZ1Did=x})v1Ojzu%lr6%J!$j7z?nVu3|7~
za4|<{6fFQUCcM()!Q}kJ+O?09o7e<Jegn2ClU4_=;k>`-4-?mJUn8qYSV7#rn;A|R
z{ko~7melgWPguc+7@HjW<%{bB;0+a&zYc9J0M6TX3n^ZkbVXIj?Z1eRgR8&lj9oha
zTb49J>>V4)Q8}J;8FcR}VqFL~oQRHc<HJhVvkc#gB1xawKIY-Ba5~GN`=b@l!fBbc
zgzQ>?_QD!R1nBDWBkstfBH9?;z!V62{lTN_ex!1th)B+t&Ro|IL_M+}I`j=3*Vo1O
z!q<`;>j;0))=1+j(i{S2u0VOy=%^(8bW1k+%kjO`(<;&#mqRQB$y#@Au4~Ns-ZA0e
zGcnqo8dxIob^&@M(KppiR%Y-?^wn$%t=!XA+v^q+M154njKh2^oUN8<J?9wm6|(Jg
zP4k#-eK~#{QVHVxljE447idJ94Vjufm96(d)n;XSqj7;T2g@JNM;F%wV`j5Lq+vrA
zsW9rU^%KY4+?gE8=XE<Joyr2CF`eUoj(Sn=<jVKC{*R7eS9)LRil~||=>}_1t}e<h
zZTcVRVQ&<Cqy`{T%!jpJfC^Ycmwt6WG|TE}dhlmh277)E;18_33&bNB`2SeId-}QH
z?`9$33sJD56Z~5mGoVUi2tESi-%DfapjXe@4d2(tB&YkVraiEXeCf;YT8Syus@j(*
zecS)|YMA9rlp~Lw=clH+*Uo3{smw<zx>o%lxgUsr8D8RfMN%UpoRiZ?q-2FA_1@m%
zF-cN;K35qgbx+Z1Ek3&2*JuKP&-hR1&SibZz{Kt4GiJerlv(yKM~E-H%<VRv*o!Ov
zY)j6aRD|%xXo|DM%tFj}s|BCebGmcMtRkn670IUu=MQuD8J~TW_K8n)3h}fg8*pvU
ztAVo;P$CVdAU~q98${E-(4gu0IbOHuBHXK8x6;F}3JXfX>?suKbMsSDdgslp-Vhy@
zKQ-xb^RFbtvYj6Ea;L8eDp3a*ZV26*X93Z(FVDOSk2e!=e++3ShCf_bwr>J*rFWnr
zy}!fB#FWmXdiVwD_^Bn8yYoIR+TOl1K4Jhb4nNwK?0Zs5RsNR{hg<L@B)m*i2|Dtg
z5`t%iAa@o4*#NJ<+0p)ew6WP2_yqs<<iXF)q_D?+a)glH<CZoTIn`Ndhlsu(ihKyL
zkz~Xcu~8*M&<XIph25C$Tg?BA7IX9Ei!b8m*D$r6TG6L)eKv(4W%HXjxq7jcMF3Z&
zB*s&I*xW8YKC;Ud)VlM9GQT}^vk{JaApaUctyQ*DNewzxG=<&*Dpy~`B}*iTF__SR
z{MWxn?R{~@c)J+mh>>sU$XH@`0(p`5c`B(7&X>248sfRxpGS|d6b+Fxlx+46DnNs}
zx>O%?@GE?^O>PAO`Z&vDsEc5nnU!?1Ha9f%=N%fRR!qbs{CVH#Nd}*1oY^%l(R<j+
z9eREXkuvCJO&0}ZM=8$%2)2yU`{Dq633#k3v1NetEPzUj--^Fi>pCHp-ckC_xxo0a
z>0o*064qI@Pl$8NaDwEg(Z}bv2D-ZYe@7QSyW9ltZ6sk-$lONuuaAS6-@-Ibs^HGI
z5@Vc5(+flwk}z)2=Wn|DY^LLH&BlqU*{U=rM*qELQz!Pm*r!C|_(-^R(#sJSU6Psz
z8W}x!oFT7yNVLI>6Bifh{)$w}!H}ofrE@%oEbdux-QCw~fu6shE6~dhDy?t)V?V#f
zKy`73amqL66I6duQHT*iL;9IWUXYx)?e7e~jzSLWuz+?Ok8;jS2pteDf5wb<NG(XT
z<qLn_yvnC-76m<t-HvFjrM-S0DqkLb9^YfGYE5-H>Iyw=KFYBSTcnJ>micattcNc)
zAZ?A0i+aAM!i;xv<JDpK*Ir$T(bx)K?XFuK|2BgL>~=4;J1H$cS(D=!#=Ok%5F%)T
z-ktVch8R$gXue!PZp^JWcb6{KA)wV->N+tHkl7mZ6UjZ!LX7?l%e7FH%Va!7vvL%f
z+vgOcxO(|^h+zo&-P-kd<5lZZpz#JWH|5ors|wdpHIE=3J%Ql47U#R4V66UuR`dau
zV7HFG3x2L~gp6;WR{VPjI5FPIN_N)&pI`je0MBm10rLQsVRibQ50hVcrW%dj4o*ql
zQJ%`o8#+`?4%VuHJO3O!4R@NWyfy8GT?m@<#?`Jw3k~L3%Q6otbuquz28-CJ36~Ce
z7Um@N;n)dRyi<M-+S1z6b<MTVlYOJcm5HS@q#*=C3Vz-9yJL)G!=h$kD#se1F}{0b
zqkE@-zjh+ZQ)M_!Uh#elwDZ-~CGwgIT=vyzfYg3^@d+v|DNaLmoSFWBSoJDi>*P%w
z<b;Ftp=QXtwNhI0COGB;4i;xQR6ogfHa#y5OrThMLCo~%z}k06o61svzc1}KBT3`o
z5q_&%ubWSOYV$y4wEIOyO`??$F&C6S4$PnqbNuD*3xz70TDbCHzx*cHu;tyOpft+F
zdQp9FbkPcx^q&B&E28u!tBTHlph|=ItxWX#79Z9rvu6;<@m+xNK>4Mhde+1J`W0s>
zVrQDfpyjos9u|DiTOaBGr4T@W9cmd3{{o;FxY)K5x9x22=>g}50OP3m-&T?*CU_~#
zlPlkd{dXthF3T7P+C>JR&&_uF7z3@%MoD`H0l(PfG2YFG0!eE2i24BGGMfUOkWhX4
zA9#?Gn9N~o&H7<EF6k4|nW2#aUh<gxw=>W^b&P`gWYU^7HUydpaZrMyl4K)#+;{E=
z1V~2gC_V;rWj5m%;sS$-n1gD{b*Z<!@CAtNu~hq`(r)K&sjBp0Ozz*374WABWw?C!
zr{_XZ)T?q02?)OYcRw&+r<izv;pts&_Nq-M7j%G>a(I=_1r5t7#+Y7g>7=hgl+m&6
z#wKO-{fPoMR1WEqamffId3=yUIb_r(Lygx>IRM5Z<@F85h~Ce#jr@P0K!syCFIWXQ
zv5vG#^gjNex*>LyqyJV=B~sV(6wWnxnrwDWm|ZmnZ5y&pa-9>S_(0B`+(a$*K}~g9
zuhh+DDzLA}Zi=YOXL*15trT=GxccVB&4&z~MJsJ$w#fSisR%u;WoBp-H?TPY-Rm^c
zX7RLC{YJ*0)WbY|>XrNl7+50&4LBbyL=={C-}A;L{ODOth0)BS!kjjjA7d)44VCb2
z+5@8<RgXp`{&{<MQEw0P@eS3#zde4mE}Ks8csHksgRkpMida>n{IX>p;_^FYoP7!(
z=%I~}&6+lTKDr=v@2NO#70h4I)#V%9S%YKUS6H9lyGGS&QY^_rH1F5zW#SUEYD%*y
zTs_C#<e1`kPUa|3;>&TPSUy~x?SBY+D-xWUPBPq{oBM+U9`Z3*d=7Jb=p=XIni(lu
z#xDC!7xcuW-%=pia*h;^cYhK`M2z3{#kYL;b>vipkHfZbeB`b)26LE<ep-60WDzZn
zP$g+r9*tP(yHKQ|HL>iktPN-08yjJL<B+ljqk;A2%~zkZsr6dvtsDsW@daY5pAVmU
zu_9($vLBW8$z#Z0UG(QBS*HRbLx2RKdNk$iM{7NefzXMb$xsm*O4g_Ayl2pX(Cx>W
zYvZR--RtGYC6VXnXDz9)g;@yLE9AS9X&)<}hq4lKL7h`!N$>IhHbM@xUg~`Q?g-UK
zV(~nc;vTu!18XBkKyi++5PSLgyJKLuqbhEEzW`Nkn;}uR4BU2`?caMMv#4SE9Db3L
zyFlzGOqrCpWui|0lj1tTub$cCPnkYcUp)1ApQ<7UOWz>hrO$7l5__y@?5$tlvwP>_
zvH_Tp#~6)=U>($`JyTO8UU}FaAze55soPB$hs4Z=W3_tm(JM%f#&9P!<h8DTz@K1a
zyt}i9P%7ZowDFKSgzkI8B=sHE;_$qf?X)XWtJnBf_iWRSDWuYFol9WiKh%FgPL2>d
z=GCKdTu?CwHuD9r=jUekE0Xb%`5JNmuYvVc*wCpnI{4`p<>7IU%(UG#C;O?)JBza$
z8v;_Du92^{u38SR`*Yw5G{mxlo;z^|);i(C`-+%ImnJw&8~J6>_I{D<%X@WCDe*%t
z(e_Rd;j~nB(GP17ZJ;YpzuyjAH;Hw>HwX)Tl#-WdZtXS=!H{(dGz@~Cm!5Gp(O~t~
zyx>M4MCrrpuL?*t<;8qtEfC^cj0qr7R}JCay^z1pUXci_H|UrO8M;sx{=a4vZOnPb
zmMcVr6qx?6UEqTv>RVJvoY85`0U_NU$!S??@sC9A5!`d;`_vUG(}ivsob{H_)y#01
ziq<`26Nyei8wo7xf|iOxYy$-uv`+!wAo9FVD>X63g$ac(I`C#9&iHUTNgN2|IoU>Z
z;~RTdkT_Evt4li~EbO!0tAggT;hhk6ML%@I)lqpb)rcfnwnu^($i(^>=!?fUWyggR
zbMd#;<3CQ8Z`Y^nLFXp?Km`!uy%I;3&GDsscGeFZq{7HFyNfDtBPScdiXjx`Y)OUD
zmXPWu38`es+z+3(Zo4V*5>r!P@Ql4{&;;!mqMHM};zpD*X}W6Md9c16EY~IMw))({
zz`l1){P!#KgHxM@Q)hJq#2=1~g*P3bRjWgKxG_Hefd;~GdKQ8;cCx&IH{ej{7hYXv
z)0!D{EN*xx;(^auQAQ+d-+4_b-Im&XuCL8#fL9Up7Bb@ugcS~c8g@0c^q~ZfUPDpw
z$@D+Rr(eIzJ)b8Y+4@$)H<oKC%O!2N0-H}MV+A~NZMUjE3zC&#58|tL*BYyT`UX|M
zkH*&CIR-{3in+);jT5q*pG&{bo4+IO;HRe7$)}9(WQfb(mVVSwpLu`WUw1^QDPNT0
zJaOckC-9q*5N-(0emeA4pnM&sLg)?549DDXAA;7M1VuO&xG-T(=}R7HZecR`JRYQd
zXzG1=cuY${{1DNTAez8%GI}bqk#xoK>X{!)&IeWDj*8|zaN?O_eeoU$&iw>Dg0p>`
zmcsh-07$lHp;8=LHXMhP57-SYQ&xi^1-vL$yBV~V_B9u(w2*U1b)UIB;E*;qOtbCx
zRd%%dq*5I`XFjnk^hpJ(n|P3^QfC*=iyyh;uj3BZl36!ozHjrP*5n1v2S@GYy^vA7
zhNmxmfb@_GFtGqPK3%7V<IulE$I<u^LoFQi^#0|in1iE*cfgy*vgbJd6Z`{$no6^+
zQFs$!=Bne?#XphXe_X~-JJ?NhpMIf02;S3|a;viZ+q3dP^{mc{bB~t(?c^-7v}=sM
zuNPdjaPgxzOOCz4!1A}gncl<n%Akd*a`Mivkm-hcJBdzoX~nQS!WlW~5`0+p@Cn*l
zt1#N>y2D!_@Vy!Dp`@tgelMd98@7zW!_VRrn`jcY&z6N3UW;vp^Jei|@zLcru&6ql
z1^yQcfM1?;;xUB0thhknbeX_TCG-^ps^2f2p<p360+DC&PrHRJi;7(w4;}T%<Km<-
zvb>+eFTe-#;M3=dhIpj#kRgQ|&IN>h^B?d<LP!;oiXFJ;Bk!F(^+OFg&{Ep1j8XCJ
z@dM)Sw*5Ld3k&&X+YoSD;)shwZ!t@Wqji6%$n2Vi`lBKeRtk`E;oVLEV7J#dZPIUd
zPRwgP^zA?{ne#S3`=|O1C|)b>P4(OGYKp5DzF7$Q26Gjh+o4|Qgp~kNzv~p*00~{h
zKVrC~@%q6!p5{5i3J3pz&T`0S1>-K7>59nZ7<iTr;0`Xb+^A<s;8nm{4h4r%LsJrv
zu#4iq_04HVG)uqTkSU{U2Hc!FlmSU5kFhaR2Hgj+{p1NPM)*2lHsG6V5x?{QQ1+H#
zQMc{8Hr?GI(jiDlDIhYWfRrE&3KA01A)zqRDJ>;9NUC%QDChvvC<uZ`ccX+fGxJ;5
za6fVX-?g6SUF##;Hu_<%x#B#|V?WM=A?(s!fnzm!jPySJzfZuM`40(P#<F}VpTnK9
z%9g+SOjF%eXDrz2co9l*vK3fzos>Lk=`a}{JCo?CLHVrjtfSr&Rd@FY!Gd3A5-rF~
z>lw;~VIcZo#N(wn_X&o&jXPB7t)eL5=<@zmZR=7;iP@EscLmQvO<Xxn?3xqw>5i(Q
z$Bt#-2Utxmcwe@X9EF47!KA!MOQ3(9&Eb2WN8n{l@B(Mx7fOp~A8Rf~NvLPaZ&^e7
zh$mMRL!xUO%rEX!#4V6`cXscz7P3eeyw;4t#~9^=<ubD~lceH}>#V3<{Sy@tQ}>1}
z<vF7d!LNr<#W_sV;o0Zb7OF$LPq^#(sRt_he4B~3f0nkf)d8=M5-j?)u;U`T9V42q
zy=(g@<3*2;r>pB}a!&*jU`1SYhBHKiR4Su;rmpqQ=<lwkcB@ll;GF%;-tK&-!@&t1
z`<nk=S>Nm>PBzg6R|h#7=&|*?@saX3o$njq%2&8@K#pQWB)36Pz&EM#q#oV8u68o5
zOj*kPsuq(I_}=*A@q0+YiP6u;pZ{#6L=)tzUJ`8NG*^&R8~8MV+zh~&$m#B9{&Jb2
zTr<46OL=Jh&0vIMeekAW+a9M4&TEqs4^Udo2K|~X=mO@Fp00q9Bh2F?xX#aEgYznD
z3oEsar+uUTduRR=Eeq@hJyrNM&(!ra$~}SBcQ|k}(LX5<DLAT+4^}UTT#;4FC6xB#
zjsARG??fE9|Ni-9o*Q(Z-<8nRQpE2f&tPU#!JcG%?o{571TX~px5L?$1xExMG>zrO
zS?`~46G~)szq%!nIi51Gr_-u@Z@Vu>`iIbInI4nFbF(Y8aP;iMsb@-vm)v?dmv;mM
z+MH?a<ZWZ2>R+&7wf3SdhvPcM^%^#GAu#!DJpKXSrW(uy;9l+cUM7AWHTuu|E?KR7
zpk!7W>gqE5fj~2_%L%ziglR!75im#0ZO$`;4_~E<D0DBh-(x)0kI=D5%6a!P`g!;d
z5yumKtC-YpBz_uJJMdeO5XR?x{KN$2S3kzu;o8mqSuj<&jkuEG%F<}EYvwp2)zfxv
z67*q~rMKhaeHcxCBi^~fwcnA(I-Ke+ZwmwO^5jU-X}{G@C$}AhKeN%hIMRBY_LaAS
zy`?7hC<)k9oR3i7le`dKRYja02Mx<f7FOZt-bu<EQWmibFhwdvICT*L`-5R5nzKka
z%&9RB6Mu^bwAj8ucbB(iYh4s-h_ABWXNQBy0QP><jf3aM@_X#D>{FV*J^Qy~IgfY8
z0^!@CyZ06Dd8I>_TCa2uB@w8jmaC`K#4yZxP?Do0FW7AK%-D=Nlg!tOlKD$~tkIG6
z-2LdP#X5pb&>B4EDUXD~vRQ2wzpU&#w+_Ak{3FIcI4FWj-9VMLA%e>zWJHVEW4}!V
zr5~qpgTV)CjWDAppMxX8q$gb+QdH~hr+PcoWDE;hrR2wTR3}L<=QpgW0_}HRE__cO
zqI>ZmYuGxtHpu=Gat)b;MkxG#yqa~aWA@Fm18+za4&XHl|Ko*K|M0Ip=G?DE;dd(C
zw4m~a3(7scZ?GaqA}0HDhf&W4o0LtUka?~fq}NLFm`_I!M{USVmiDdkD7R{423_Q`
zFgSX_&<m445BkW&EZ%lE8PAx^7>--_DX~ep!tpQn9cpDwAS$r^{<;?$X1Qfr?sO|r
zM+i0#5&2<VhOhlG-Men5bu=2`zqZRj`l;gY$75Ehn9(}BZ=AkMxA|+NrW#NNbs5mp
z`vlp0a|I}m131m#ws)pazjgjZDAbKfs;8Fia4f-gAChM;o>ySaVNaQ#{37E=v;R%$
zTVn_vJNCPLaS2u2phybzfzQs|r7bPa-fsPZ^1F%McZ}b=YAEV1eOvXqlJ~{f?fv4e
zw#Ca7@zVEahy~C*F4vb&B`MH*$|}uM$%mtoA}xjwH7*tzGl-=+95~@H>qJv)tHoO%
zI$q>^568hrZC8PK-E^i{{`Dv*#}%iN%rDQ8F1>zKyvCe`dpe=bN2f=Uby?_-no?&r
zPjs{QG#H14AS=rwQJV@aP&RwUdOPij^T`E#3*L`eT3SobS^fZ>0yN%^*RzENbr;9}
zaI$73*`AQo**=|(0_|zJmHA8<bsz<b_##lsWzbSd`tlrF-Jt1K@(Slu@JYXk_SLS1
z(6R&TvU0KJoKsb9_sKnGSyxr;8-F<N`f93`g4H1g|LdzS`EJdaicVMt#M>ZxzU$b9
zr=EG8V!lcfQqW~51ywabyt2_fF`qrD#9W>uEB3&v_K%*Esnkyh$HFOBz^^hHuIBP&
z713oNPHI)9qczQAnTDx@7K2caf53c_M{OT*o-fq(r`UzUp+%GYeW6%!#dyA{h)>(g
z@6D~JY-?nClI?ylq3wB>A;!z7q8<>_J{D2yEnVEGbr7heoF}Qc&!}f~#2HXpMsod0
z9@(oGR1>i{$$bXIPX}v!bX?IKvSYhA>jj)Q0-5`&S6{2z67h$9P_N^g_LMRfgX?Wz
zGIx;h!(UHF{8DnVhE*zT*lri>RPEA;CfCmAIGh?EYs^TZyK8xZ*;8lK$MRv0-G`uE
z2j+fv7#W&RRled9XhcnCV4Tkh9jC62J%23?F9i>6XbN_ADo=Z3iL$wi?<n$R%WGg~
z(a!9wR2(te9D)hzuT($$VvyrQ_w9|VRAma|u1d^md21*`L7sXKXG_V`JC`i;$!cPj
z;qfS>k8cLcJ$MkXqueI`q*bZlz+cp_*W3d&Ajb|cHGN@KU~e_MpZLh3d*2lI+jf*|
zW4^w@Hn1iAP;1X>(rfrF{J_rw+^RsV>U8lEb8Zwnu{+TpH<=GYJ3_==_fO0t*n;Kv
zk@H6DS4{T9ZwSnD&ccIskYAKd#xCujd(W=;V;?rZ8TQc^{#DxD%=aO<x8Osx_i$M5
z70JiE7&TjYp&c3v<BpD@gU=d^jLakwSD#S_y2QKHa^-oyFE6U}N~{zP`}6V$zIuGQ
z-QVuD{-ByNuKb&{fS27o1$>En>V!5=JY!f^Dr{>-ctx_L(+V4j@WkpZJH4ryD_y=_
z351=2=62Ar92Uj#J*E|1=p-5FunLg6A@Pv_Qp*-hPu;v-Tu5kDmi@U$nWrkc79Twg
z@?6hU!Yp<ejS}p><wT5<;Gua3td{x3yCavy4N}z@WLxo9ZQ|qRM84q7rZ^jM8+=vB
zweaT+zz8utx!VWVyeThO#=!k5^+5)*qgY8bp-Bf%NHVHGzj!XEvkI7J5QzDOspRf{
zbY1BO&=CB(jxhhdQ#|i{@_2IZTGPD*v3o6gkc{}iGDzP!G~Er+%xy}GDYb7>21N-G
zOu7YLKQW?~hqy(3%ccky!ta4=3Gii{(l9TyA70zFu0q2lGlA`JMDydSk^wuu3yZYG
z+ndh@hnP(HeF`G7LcYW+XAz1*mP>G88{tY)OfE>u@5UYDi9~a@+u{w@qVRQE*BfSF
z77p*woal9|hoo~{@q8U0{RjQ(VADjee3&-<o__9Sj#bZ`&r%VwG{xz>K6|b?`59@?
zmXDbo3JD!v2jBZnNo@`P?c6al1-|A}a?{`eoNAY6$;p&&36It4_tr)<HgR4$hc;h|
z+s~(hKW9M~;;5`F<SKXEP|Lm+f6XaA4P5WoZRmdp!Y_UW{*+lELxrdIGjJbd88*L5
zVVbWHb<tfDPmHr<@!8A~b$ELQP#yfmlcnCrW5ceK*;UoDV|$s48CR5coVF5;Qoy^z
zjsq%i)s%IpF4>p`oAd6@0p9f|=9oB<={3aT%UfM$duDdGr0ml>f%5bX=JPZ5X}r%h
zZurf0;+e4gLbH+ks@U?p%GNaVtNqzEq4KtO7tfG>w7A<w@jh*V>0=6#45Ev1V-6RT
zqWYt7N`IW`FtXh;Rxzh_`nqQ=S)vYob@X!oG^lq|*X(lIQ0moEL-`umL*KoY%jd^s
zc7NrMKZ{BeG4433hDxM|aMwN{-D@Pm5<1?Wua2$TKhDVW%Z!8@&GPp%k`Y%=CsH-n
zpC!>-4oPbqBzJXQHtqW*?M?shJ!M;+++;g=C#d)m&#!!h0z8*IEW}DW3(ShPA48x~
zS4|%}*AaRlo8(5hFiE#~Wpd?v(q+$V=1Q$P)X>9x0P0-9f;ygt1iCeMQHxo(EnZGz
zK^>(3L<1O+RkVon@wHk(WLa{0tDt=LL>z1zkU9_i*yAv{xuh0%b>SxCplS88MVuh$
zMvI>L*WLRk@hN)dHxK7pXHtQuXRhA4Ndb0hwk!g#%777Q+G^8wG@;22e}MaqnXbdJ
zooyenynS<jko^%)K%{<gwC94&gZ$(^dSdA+0O~lS*wAP0Q03-{<*$unqtnx6{Y1RP
zIBp7Ox}eSHa8+y7Z5h0jZw3(P3?}XF#BaMnhV?N`z?Y@3xH(y}k`+j5uG{3YU!{$C
z9iM)Z0hw+h!OLGksH}8*cJGc8D!f2eJ0bQ%dU4ar^>8$2DCN$aXKs|TU`ruT@a;Zf
zGQr;Y?zPc$u6O;$YK>DuYiHN6-QB+mKBZNxEgjPr*t`2Veh<|{e{0n|Xi)lwAGf9R
z@xiq|r}-4Ncp}-n10uZ!RoUd9YJl2dj5nM@a`o2?Z|t-j%i(j21?k!_xy>DGOxkhE
z1wK;~&0y(cxM*AOl(B%SQsN{eB>AP`w4hI|)bl>Y!xre!x@qCN)a6C%fHa&?*qtz!
zBHHue9B~0(bSKo961Q(b)4u_P?HD{<BU?2ffhPjT+NU@WlvaLsBY?-`qVd|@lfk`A
zyBqDYr@U@{=<mX1oFTia8_4Mlc+Gyy8eacxk=Uz#<Oguh!C4^ohVhrVL5Ft0Uq1Cg
zmN}k8>>{wJGz!5jf6YE;dJ-HvxXq(!sAUWUNl-;cU;N~f=|joR(O3UkN+S);Y4esQ
zj!-*~lm(U^VtKCc7LjpkPR0J#OW;I_r>@VqZ=%QcAc2!vmphx^oAXCqFr|EAqzE11
z98>nstr+UiKYFJgRFC`=@{+LMd+5ozdQ$od@y|uTI)3i*?ieCowu$yRW{4>S^sP-U
zmY%Jxm4CgHMc&AfN@K@FRr8%f^0o<mR3S6mY)-wD;<D4;<;@O{u|mq_W@ta;qKE}(
zhp;0^FR(vo=L{e-8&2}2+fi-y?n$M3tu_(6QlRrq`5B>eN}OZ?OengHizlv{L;n65
zfOTxy&1ef&teZkk&^W@Iu^a?{z+jwFqP;QneWhx|{ZL0bU>C)FgbzQxP7B;5m4s=Y
zA|95&Z*o_S%lyVN%=50~n#J+p9&{7!4<7wpIk^Qz9zTF{SEePBz%AC{Ec{jyFYFh2
zdaA@wKJ@YtX8o=rC@WIYo!z#0i6cEzb!n3QhufO|^Qz{T?C4B!5=&Oykl>aXJvEYd
zvOmkL*OZUd<fHw5^&wPMKey>4a@;OU=1#vDyX}4lBYguI<OcygKELz*g=Fv)bUU}~
zHkZ`q@bqD*pDLX+@}JFOk~-i~dq{tXIN9&74m$<jy`6VbMf>4@mp=3f_Sadb*)E>!
z1TAPJoEG7Jm#z`JW--;0Tfn-khU4gP`zAS@Cs4rzi;6r~a#$X#XQl@?Hhgu>wPQEu
z=e`e7Zq|L4*Mv{hIrKzb4imCd`gwO@u$Y#dn9(IFlOr#UtIzMhFRoTdX&0%$5<Gsh
z=TC(F6Kb5QF1QGR5ULkWwot<zr#Uy6&gVY|<Ngr?>-~B;q>t9oq%~IP!#9&BzhN|_
zDA3FbI@o$ml2wrH`wqq5+|l%7E>V>~W9ChrcN;}t)$U+NtG6+FOkVB)^&L?_K2zLe
zfVRZAn?h4(8xNmFy`Zw#DOSlqI<UIlfxeuxJeOY?d6y2zCAw>4%rK3cNZ?>{xAj|w
zcMnF~wU9hUb}7dH#6${P)&WPvoivW{a-tMhV0H2s18~5C73sr1r!!c-p<zF@{AGR)
zeIXWJa^{%yH@cV$D*PrF{7i7AeV?JHHz1ok0II37#83?KP3H9<cG;XDMIcM;ZBHYe
zcY-aC60BCU&d;p>*dJjz+3Cx^Cc9xP*}xPxViX-WG+62?OEllJD84&R1o^D1$;c?k
zPOv{>G~~&9;6%H=x|8)Dj+=>N8(l?Qitjt7?h;+)twg3_6u_(F#V>eylIoOVS(uy@
z|Gk088@jtC;SCcaIUBhLTC#Eqg*U&^?t=c+K^hS?2=uklpa?aeyIe`a9*TU#&*Ckx
zOb1!P3h4N7C-1Y!(X<rTDnor`)==D|9>*I;NVEOlJ0q16`OlqPPRdB9-2jzAX#HJZ
zo=86|(MG<1wsDzy*=Viu%~Vc$nhq~7U({QglZ1v1#7eJF&qi{RkYF9W-}>oSi6^)a
zFkAzCW}XGEshr>n9ok@#nGD{8(==fBJ!8i6`?%L=485BeDRL1xW-Invn^^bxCeZbQ
zD$Z&Y_G{j(r#)!s$Ik*Ap@VwH6XrpNqZ-PS=39`Uz9Lu7UE<!2&{k<-&*t~>cT+h_
zS;&QNqCL(YyPY3-;V!+E0|q}_pu3%rOV^$?-pyK?rxaVE0lSa`_&|9BgfELX(mw-5
zoG_ta|95;%d)fMGd;5Ct<5GY6m!#;p7nLMILc{BywPh8JnWB9Hln=^W0wvnjbCBXU
zU&SlCQ}RWPg_9$vU=j-5VO4eUHg|)*5?b$dtSjO5l8b5>8smJ>E=t(5<H_?^#`|^|
zF5EA#TD<rBRW7^H7gbz-$LS{0BYo61B3L`Zog(Q|2TxeXt~@H*IqKm~lGYp4Z3Yrm
zd1D)2t~)maYzxkkysVC~T7>s4#LJ<tdf<4wMkGtL4_u9#?gSQUSFSAdq-P*o@l<Y+
zBd?XiEnw?Cy+VEcW#B;|{tgHwgX{t&PNsMKfra96tHUe9PSgVAHwii|gJR7ojl=)u
zZs_KMl&uEOy3yYofWNCJx*A+oiK;oNfmg0e@(g^wPk#tmJ+iBZUU32s7ZhG-RhvV!
z#I~$|rSEWQHuv;j_xzGS@wv8F$puuPW)&@1G<FC5<BpM6SGxXW8Lz^d>W_(>d1t4_
z0l9SXD;)oehWU+I+~R?TsXVV=)D@n}_;rrrlJ$IN_ha;f_5_$P+maH`?UOgt>|>LW
z+Dy7U>IQxh;bQH|>U14`y5GF;t{ra78CH_C{HUM$so(IG9{&Z$XUxGY7^<-SC$n~)
zi{NPiE(=ff2$}Z0`A@p(cI1^rdHf5)fkkZx$K6&XXJZ81O231>HxBA}`)3Ehm%C)7
zqeIDqRbs(L0b4-q_*SpW$%RuGu2$YVb6}KS`nd6FQtFqBu*W1a%^#OdAdS#y2?ACL
z|D?g3uKxJKB!huN5Aj1t!*Ny`JBdYK{%zDR>yO=k2*X;U^Isp+{EUHqfVMa|2^_m|
z1ZMnVslz?PJr~YKlZmFbAWi8@P57_pi})Vk*K;mHfOe{0JRPzey0m@ks6!b(eN6oz
zX1D6dY5Mx+6*CMH>y@yhdB>tUmW0&pnceJyI<Zn~jA-y**9-~vn&FMuxBL4wV~Qnl
z);Dh@U8$s6pDu5X!lxdR<zXE#2u@6K?jvCyFA5WW`_;uZwO!a^fXBndcL5W6?`eyZ
zUqIiu`E^ta<>BxY+x+0ldJ^S_*n%OPGN~9g5ziRkKxH3IwMXpnnDQkIK9?=-j>(A*
z%jJde*RL-%=2ky%4s3;aXK{acO+a;Rfiqn!$K;*nvj(0)>@b*#lU2uCCYG4PLihwT
zEy_qiL?SeNe}xVr&Wsx*#)ymEeGU~?tWS`YP@uLkkyyX*9uFRl^9eag{&iUe@lj}V
z6!vF)eiLstY&KiP*A>DerOKy?2>oKDR56b(9IwRe(@!EVjzbk!Wq!K6nr6B4J!P`w
zKFdAL1cgcDowH1{>BF;bcF<^XPe|90yJ8QY4;v$67i5=8u{nGE5u7r)0?sf0!QsHZ
z#mi}`usO4}aDZ_y;lDh%33~b<t{yy<XYiu_)@Fi<hg-8I=DII(M3c~h9w`>8kO=LF
zCC@JzbdtV-3~!`H1NZ6bI9hxW0BhmWzJIM18FsBMr|<jxeXZ^gbg2_~!i&NM*?H$}
zNr_1pYZkxlun04}Usf~Vn2?oaf!WjRjo_p2)mmv!$?06@%x_;)^MfLxyv6UhE1Nvx
ziHf20+G0U@u_Y5(bJp%+3+(*a-d+eg@aURZ@+ed_;=5@wB66#SnV(0pp-&PRP>Ttz
z)e@9Z;i|H)#6RbyH&Z~%wV|*JB5~Z-9NVw&?mJe*|4KOkR_wIy?Rbul9Jfq1F&*w~
zauOjMC8F?aZ{ZUT2i*(usl9uR)c;byhxSa_UM^-DYB0y>xJBr$V*wmG9ih@c;h7SN
zu8?~t2GBPNeD5W8q8S?CnSp2CCylKCXag&*uKK${QIXVa?&ILH5334*QXXjM-E6z~
zL2SnD7eo@vWo9T_zKSj!Em_I)T%Br)uU6ZFaM&#Op>lxc$ch9eH0F^iYf;tg-5HVV
z;#kkqu@HRjpzZQ`50-+jbNL)!Qx4`lXA+u2v0@w1MKr@d&o7|CtuciHdu;n+!S$lc
zgziiMR#fk^h*SQ2_ATg%Y~2b!*3HiqHfLNud4vBRdN=p<*)#VG8Mc<AB?`VG?YBF9
z=P{(3n}7!Fj}=nBc4Muy;$4dv*NWGNExoa)FPcU8Hk`O^i)wO|$ku~;yprD+@K^P{
z=}L&dpLFKo7vMr0I=2$aJ@rC?{GQDvC$BE;R|^J)Rz#;<qGW1*>}f%)_aE%puOwrp
zQX_do^OALn{0NR_0eon^DRm!MLtOLL5I-({dbx=GYq2&n6*f5nD+4};A|d|b<tph;
zrh~!ntl)2^)r^}B=*e(2a0{AG$d4hK9wz9RG1Q@(gU_JC?^RWTKl#9rmDpeBxBs9s
zsM}@NKfZm!cnIG?r%^C@JbPko)rfs$&D|X>9cJYWi)ci1wn0VgpC6!L0D#^23(8^Z
zQat~SKO(>dh9Fv%Q;EMDf-d8U>nANE@=aq9!uf@19E1D2oo@Y$8sc4iuW^QxyB+u~
za5|(hjs{|UtQ&8xRHhsf?P`VmFu8HWc?=KmmB&5ZaN62A5ZD!5u^R_Sj!g=6jUyK-
z7H%llL;F6v+t!tP)5t0_MQoS{VYx?)F8t+sQ-<f0HM;TlO&WwWNop)Rsnxd!tdNy$
z@>13U=W-l`x9=o6{YUwQ=a6+@>i?v1P|hbW_K4}?!vk2ph3a0>O?0RFZTSP{&r#p&
z5*(6A5oK8eyuaL|BwCsxU1TqkAEtaB;S(l*fV#Rr__3}bk|3!XOi^67M0W>+jxP(H
znk&<YRVhX;5QR&uGB!V2x7cbg|6z;^E&~0b#9Q13mEMQs^DIR43rSO}29A>pEYazi
zYf9FjGgJ{z5>R&Ew9bKavzpQsq8qRH34vx1yHwR2{;S`$&?{<4AKc>g@qlo{w5pS)
z6t9JxaK?~+Ch2^D09DGtB$9n=7o#elbf`~*o<2i_mnRPW`mbgoCtBPV;wds%uG+F(
znUn`-V3zK@Ba69aqCxJg&Lik;p8Q&DKKVlOWFKPHk>+Rh#!c#X7k=U-oLRSHU$nq0
zgd0(mml!9-bi7EBLuK?uQ>X5aEnUVAqKKD`{)$~iK9>eTx3csGMgwb(sHay}4kpAD
z7~<p^IlySuQo`q|e&UJ*>hdf3*R$>?`2?r<71{eG)}1j!2bh!W;#eUHg8o_WCu3$w
zH7#0bpGGgQN~R~F56X5p<wo|Hxt12Hzg<;Z2DuDn<sti>)EP)68&>$vRL1!m>K-E4
z<*ZTa3^`s22yoD2RkGFHYrOKQ4`Z(gbe_|%xA#j({0E93RiiDh4m~<&n@p1d>-|mA
z;gYEJs^(-7R0>4@fv&;rA86Z_MOLclquD2p1Fn;^q0}`O+8MW@FV^R;=0iBe8oqtP
z&o)TD#HWYodSc<DJPcNT<dsZh#mdMlN#I=iiiirq`=?z9oKL>bTFazRAB>x4GcV&d
z#$RIg8=AbW7wwZS*TJ~zxrDL|5|-ziSrWWp5Dt4stv^&W;{9R50cIw>f+3aJP5kzO
zUK#a)ktJ2^n7O%b_tZ9-oMD}(UNRD9yCX#kPmOnD_<ETG|7k=Yd&V7wzPb;<+zd2j
zwnxV~bJj68AL3-pZBmak5x_GXxiIoXv|e8s8Vv6j!M%MQM26tAU}nbr30rfQ8OAo<
zdw2Ir80);*Y$C?_`RKa~g)A2S?9{+=&7Pmp@Cu&e&wEUV6&`PuChy*Y?pR-OwSW|Y
zQJ+qKoq*b!o>Jr!v*(*KeA!9lg?hlC(TV)p;a3m$3L%A}^B5NqUX!y7RHx0YIfdvG
zvD0k~8TltC(@6~fOfZ0*&2|WzgHACO60Q(&K)`hj!sJDc$_k<4^MN7vlgVhTL+iAP
z^W;KYzzA~eyaw#3J9awf1}U*p{<XEW(tkf^6O<ZB66o*DOKZa%WfSBO(E!lth^6j&
zw?nQ+d3xwIC;n!BHs6EBt3CzsC9Wt7$Bca;F6~3s)V%!-^5{DR1}0yK(oNiI5*E(J
zpgdwnf|ahF-0QS7{&Re<N&dy@l>C#^QOZ$^mwg)|Unh{)1hr-#*UKxsr&qRNZ@JAv
z|3}l&wZqz3HW}IBsKez<Bh0p+ZN;mve*3AkZCgI<VM-#!Z)8^h7A_wEj=AgKVg46D
zt)Nc83o&*l)*6?~$`N>^YIjR!Ak(cIOMxQ!OliNhZO{Mgbakl?16x1d+b7u?z{M{C
z_d+BZ8F$z1<U-$TVzdvmH5vP$MX@i>PP2J9M?tFX&||LJ`3z1KPqJ`leV4;%F{0Me
zjJpj(eHthm^0bp^Ya57%KfvIa_FwZHT6Hf@khjv)ZM6GC`B{RJ;k#-FPZ}MeL_^7!
zenC&hRvV#02SA08C7kUdfdM!xQNR1Y0fTcg0wwh~wP%Pq))t11o-ecn%fXgalFB?8
zBtp3LXZ-lzl5Ea@POMQ<ui#0dlJlikFLXEpahtk4-+>G-c1(6F<$+Rq+0IY3C^cr&
zY)y{_rgeTyLfM1in0jUBtA)vFn>v);f^VX97~`7Gm|5^KM@W}5)R)iVaE|%_ayXah
zwtW7}IsG_7N<4T-mjp@86w--zfj|59Wm{Q20r9N2HFKCq9Oj_|FtJ8g$4kWM64ehs
z{_f<~+tsMMjTm!vlkPsN+l2dIIBQ3S@(d|tiJ_CgP@iPh)vzt<Fe_f(K>o<HJJ#xc
zm9#axcg<N@p|S(!4dMa5+_&8rt>b<66>ENnYIW}@%%!XDiK4bS5E4gS`GecF>C07D
z`0~M(Uje3X+JD!Nd~W~2?d0>W!^x}wv|2s4$?qUTOio&sqW7k+)zRMN8PxQ4zj3c1
zK&CJ#11dX%Te!I_4CeWSKfr;jpKb3Q%{oQ7Sz<B;tGnQPEkF4d+Jp+xZcy~iL@It=
z6|T9T@1bIUs5_?oFT2)%5FBa1RTb~4?@6om^0=7s!{Y#>+eWdM!lh-$_Qy3$+8jCj
zp4G-Zh_rI2Iq;(B7V6iV$X)sX4|{Ls+?F|~9aQbEt44GG!}<tUFOx2{G$k{~r7#61
zatogeS36EF-?KNmuwQtSsITyq&u=r@*e^;Z_X?*z9nF4<!9kGn`hMP^UO?a6=fX^M
z#-C`4!Vcv=tqMM&hq5B**OZe4P+HZlnT|(;ZQ}pNct-L`L+Y>;&u7bjQ9RoB`*`1v
z?fJhrUA?zBAVIyWrD5=OuaO-6XTL7+$?(u&RmOFSQ{Tx$IN)fv712KrJv0vCWw|R#
zk0bJYK7Y8F#4xjXrccrR4cP!6bAw>P!;pKzXKEqu<Q;pc+o>$h&K@TvrRQFKbDOgK
z;$Frtu!p{$fiQr@ke=1E-|(M@e<mSrBGRk9i&p3E(q8U=HbZdsWxnd3*W~|X{y`=N
z2f^A_dRuIGE^_OpB02jpaW`BpMS3$<?Sb98R@dz$<0fH#=7OD^J2$|TnF_P}&ay=7
zya{Xyn>>j?spaV=v@VsO^__EUkFmMU(?)nvEve(`m76pBmJ|kaDt|0Oo?4`sBX1hH
z$hSwk#IPsNT>6E{+yyUKJ%1h&6#~qt1ps&kOq}mW&~z&wn^xqr$fZ&pFI44I^e3zQ
z4}?cu6#g(ln+|v_{)X`AHt;>B;y#8ov1(kA^m_{s9@Q1x%Zcb@IPL<#+P)%uQ9yyV
z^Cz)`02~C&8M(lmL1hiNMB=x<tUYzc<18B>OBiEGo{uQJc?iat7ksh3Vg8r|;mbGs
z+|A}JlkMT-L&()$#-<j!Z@aXkZdma654bEK;!fo)Z#{SYWxTsm7EvNK6UU9{i)$Y@
z&!W+s>^^@+u;5xu`CrCfQ>?M~?yDli-;KR_Jit*dfui4gv=DJ4is#n0;1dVmjJtII
zW_PGM9(o&{!#lDcX1u)_v~uxNAL@<E=BJ)fqW~!1Ot9aWwzg%}h3q3SN-_3v)55(!
zAQEvczaLBQco{|=Z+^j3hp)UzDF-aw|BmnUiijB8|BK$ivy&UX(_yIl-mBMVTt|e;
zPPo4bo_Gd=UAEb!K*LWJcgb=E)<3Izt`!1Ub{_okYx93FJRizIkG|iXOYFrE7HLi`
zF9{Ln&;3cmD{SvTE3xjx4l3}s2cO{eVto1K4e0)wUN6#K!RycQW!gtDRt>PX5k&8z
zKE371UJS7z?f{_*M{LW|kKwPue1bZ(Wr5)9ngpn13kn1Y#e5;wji}g05ei+Jjy-F3
zVG+Jd0;g4gF&Be=AE(7a;Dy(C`5aXk0fr^^#}t3QW7h-E^q=J%j;r1$2f4ph)!07B
z0oTsyI4z7X`sCS_>z|FUp3X15F&*<A=LJwtuFE@tOOhE$ISk(-aEHqbjIA->D4YGD
z{^Q0FfvrvnrYD5s1fMbpV=wWf*)XGT3%n`yHdc3Tnk#x1Q$+7m=*u4dM2DxsETigD
zwA_n14B;E94&=cfsTz9aRz00wM*G(R=w<?_D9(xS+qh}E^=~ZKqx^9<u2_StI})t-
zV+fV-=on9^*QW?J{-_VWsA&_D$e$XbvJ+sKh#<*8!t|gg0lrrw(S1!!wDw}XZCq?~
zDhi?@Hw#GEMugbjD2R!`@yjNMVPDhkOvnhVIh@3YHeo^krfvQ0)JAIr2fl!(Saudg
z?ibgS<{O}4>s6^pe>GKSi5Crc0#i&JKimH7$2t$CXAL`R+9CtG4e@OBEByi)hPCj?
z+08&G;IwF))4)@+r6Xx(3*s_Ocza8sw&PMbo;Jo}_Dlm*8Aq12IbNUWBZIHLogPL8
z9*HpRSuHPuF8vU0jUQX=y)21hV`v;|JNo@JwfY%5qp3%6zDTz<H86;S+av$K-X6C>
zr^!)ft$eQPgV*FbF<CSL9MPAgI--JSUvL{`_&muha=o?Hm_2>qFWkqDb0QL6UV7Cm
zGw?CUqdJFujuR0^WC5gn%3qQLyy;RM-hbgXN#M%iv(dWAYUGoXDC<n8HEzaW><sG4
zM)>gD87-X4tckg3fmIYIKAorY-Ep`J7?fihRU4C`yK0s_%qa%~OV)zD;}E2ESZjPa
zNTQL<UiN^Uu42q5!9j#_?;Vx3eg$?8aW3%1Vr{yI_gAgBY|pN3(Jk-{j04#7!(5r-
zbH$5a-Zh<CV4^=73z&W)*iGRr>ZwLq-^DbFec}IBbHUE`UdN^RilsHq$Gzj2ANf?}
zBtz0uKhC-VmQ(We5o^*kOW;E#W%$`I_|FEY80?j}NNT>oi9P?#Q{V*K&(q3PTJNWh
zBj12Wh8zsCZ*8ayIiBmc{{e$$QKauWV@J_zb9md&ow8GTe|`Fm_g1O;$(7?xX)(^Y
z`b%ju&)RxJ@q|0+=?kgy`m}K~=Q&v|lb%>@g9bCwvx3k%lxf#7!8hAhfh&gOkFpS^
zAlb~347RO2ccK-4{O;j{oemWe-io)GuLl*tH%^oNW17Up_7($v56V?_o-=@`dTf!C
zp@kiqt1XaNXSlD!YWv&z7~%WrLR*=m1{e2hN&xE+NOS?yULoAXT#Y7m80#Fb%PB9Z
zx6j%@Cg-=sGaq=Qv)j8Gb$Nk*oq@c(jsy)2ts7!Im)d>OxinvOv#`0n?OFH+syX(X
zzhC2Qxwm1EpbyX1awyUqbW1Fq!&wGI#jJQ^#MzP@j_n(ereoMR7)^Z`Qk#rI@@mi1
z#VpAV5x8k8vX`xwUS>f2JlF@oI#;+v;A4XD&M->s+0}M<sSI3Su;&lR?ixQ48i^dj
z=8S<xhFw9dJUCIS%j~4j&I^z%7?1^J;B2R)f7eK4@TO!09xc9|EV_`3>n<&d6L<OE
zAE^)z;p=G+yIDmU7JeHnQ|eu_YNGy3*m<#l%E0D>Y^dCC&V0(IJaG$VS_K$}<)>F;
zWo{x*s}{~x4Z}C%zLz8--CI*4%;WC4|7z$ubs`^gA$o|)VhnZuQuZ*{3wxC#A>++T
z2@oB1y#n{PZ4j_^BRddRq#m}yG@B!kYwxGv*U_WSj}|bo@NERy<KRd3C;j)YlTQAC
zRixcn)B`LCFX_3ZGI5^Dnqnk214+L!nToYgj#T@&TUfFGVZYu02Xz2QOtMl_8Ui4q
zE!=$&<MI4^x=whx0CFDrWmS6?o^_$2j%)&Rum7xD#9l^moduszRV6%h_4eWa#^X#t
z%FY7#cKsNDqZN7a>8+eQAU$LLfqeW2pm0Klh-!B)fGR$?{+OWmd8AaZmd9M5=C#20
z<NNmWW*XVovcAo$<j)66w0->$En8jTThx_stK>b@N~o~__kW&8LLwBFLF;rgM*{bw
zg7RGg119RMRs*>Q)i-mawP-Swh9^JgZ}7_*B(|aP@8CSz-hY9!6lM2BKxFOqG4XPY
zhmAH$ZoMbs605JRB>s0{M^@-Hm{wDxRb0b)B<|^hI#QH7V2cOum@{z3lYdHt3ty!u
zq5MQYD9~t$F;t?>tQF(a8+(g2iX=3HzPWd<o{`qDh#Q^|d*a3cV2zihS%ZJd5IK!w
z2*v$vpjkP2UT1d-z(=vHHNg8--u_wn1%ZOCUGa1w`dvT5tsg0O0PXLdUE7;%Aa9XJ
zGc?k?o`7&M0t_DHip3raKzoJSyrg<uCKAKZ%GV{uK`uNc{hdb-dL?d%7%sGEo%t~T
z`Zs{1aIydMG5p%uW2#5*SSJ7K9Z_L>N7t3qU}1mj9R=Nddi}|$n9ygPd(&6oGx8!`
z!h@_*w5A+|22sDIeoYyl&z5=L7AxD(FJyEysHHqdV8&trGC=XJ)8?DVJE=2{QqLB9
zd*09+N)$3k(3q#zF!1H1pD=*+eFOZ>?U|PgR^K$_Hz&~1;R-uI+=v5571Txmr^Hjz
z4!XjCQILwsS?ay8v+91IciP+L4PgT<_gd`V@f&p|_MW6;M<8VYIL0JnbYb3Jsq-)3
zhO*1()dDn!Qn0|x#9_>JL65^*>DI8TIkYcU8cPq?<Tg-qD<jGJ;M$UE@MhW#Qt4y7
zN>J5KAeh)9!BC>vD=B)qJ+Qm}vqPq4)hhSJv^hramu7qFA(58`m(HCt)7W9_J_FK!
zP&3oz&^epaPxS6{rn^_r>=67Qr)c!<n-Mi=0wWNRZyaW{h;WQDUWVB14>rFqKI;L*
zbOLwnf?8!7B({MBy2zpd!q?~pw{;d%uxHE7uB7L%)Ihcq3CzK#cgonTE7Q6+v4SrF
z6RED6IBuDal(6k3wj<HHHzlzE1C)+p<o_R~(_ydup>u8>u(KCX#h3F4=TQzbYf6i%
zL!`y^w;r?!O9jMt2d!rDDRfg0%F4B-{v=5Vks_!s=;b)lw^};^zNmzn3*otWt-1A@
zMt&SlGTE`D-b6K7oQkGhw=<6y(*z3M#&N;0Sk6NcD{l>#el+#*tVb?zLM|UI7DRHz
zYX-D(I@(@KxQYtjGyM)DjXI3Sv^nsi%~iH?RGko5)k-czKqr^1lJ~x{(y$nd%mB}&
z!VF}4jAq4oMbgO=#+6|&;MCRW4B}>NwZR4NJgE{E{ZmGOg|ZYNIod;Yz(U%byFwF-
zEjGqP;oyl|0cO**mJUyDiM!nWX*m3GDPqdJ(7o@lu?T<u(pg$e#arn2wck0~pDZWj
zGWqUj?R%VE<GZ>pfpRK!)hwCb1O2H-$3Z7sB-9+d;SDjs9F%QiR$I2f-`%NW`)0&V
z<`EzMp9Kx@=stz@Oo%6Pm5p9=2O1{}-`$vArhO2#aR()*Li?ZqzG7oaoUh3CUsOe<
zUu>%WtkZtpdUHmfcmyOW74C7VnRBr6X!taI2zyAAw(0%&N_^Reozmga*SWDe@sWbA
z{1Zm_E;Vt&)Y(JiRd2U`9|AY{5>Hr!KWpZ#bW_9`W=EB9zLpXd@x*q(%J3npU51^_
zoi7Q_6;5`S;x+w>Yh`;*EMgx6$+~V7oau}5xe@bC0;v>QndHZ&_vW53VzS<GAI8Eq
z2_mCS(q0z1EdS^zrt|!X#2^$;@;N^B|EVn(V3s5x6oqfvi6k2&!(I%-{-SM)ex^@`
z<A+}lPZg0M0+}bu22t?P7h-lJkR~>~QW-QqPO1Bf%3EsT{j+`>h*JHPo$M=^<S;vC
zVw9Ferh)dyFCvK>F~Zg;&Q3QtJu@y|F3q6RLf0mNn2*K2b=_U;U>h?G9_c*%Z10+;
zPT##M=y&kw12EghBDddIy8(`mUoj^f5uSAOjQ5<mxk<yAD&~ck`SAtj`pMQumuk>f
z-e8GuQdg+r2-L)pXM+}hX)q;Gh&<!f+m+b(I)P~Z3A6)&-~EqdhH&m0uHGWDL8Yl2
z!Xvh$vYw&zTcjttcL(v(s^0<!dHABo%ky`%y_viVR>#b`fqh0iraQ&``!nJi-eRfa
z8g|mn{3X~Waca7?9tu+fjuenY{RCfdf_p)vwZ)4};3dm1E^&eSpS5hmLP2qC#!>u#
zB}zCe1$;Ib80bLE(ZbpjU~+G?TKKQsAg_Ls-oXlRn@~L^q7_ND_2*BG0zY03k1Ws|
zvi9Q%tU5!EHC+f*yn)|x6Gb{RBBcF+ON#*a$5IWCYH`gI{#0pKl|UD=UE{UEzO(7S
zlQz5`0cmr8(XpMctaH%*wwExgdpndOaHu}A=WmXw01kbz<Dg?g;8#HXy7`x@sLQOu
zZRfhIB}%-qyX~+_M04U&x_^4)F>>H2$r$^=yGRtH;8RieAjs+Ych@Aq({wy+fOlVO
zE?6}_BsKt@FQosA9DTGT^OFioU`)c_lJvydm@(ADzfVdz?S6)N@0K3Qw1Xk?CM+I<
zd&njAg5fogb*B3HQbKn2iS-JIqrHnb|JLiUX=e}gRbc;?WNZjdZ_d3_T#tEN6s@gS
zyBk7W$}H7X9dREpGE}xr>E?_e4wJrV&OY;P4cVrHA0xY1R=4`)@yau1y)xic_?Dt}
zeE<H_ivgT#-=C^*lG57VH)SY~y(-GZP0Jd_5*KxbOjK?!YWi?L_=vg(Hw^Xa=|Kpf
za&xl!cEy(Bbz<RGWYeu#MQwmP;=?;hk}4TRGGXV}w1i@mJDg0t41<%-V1ZLd0}{L0
z)%n-7)C<mCT=f919^8(zx0UX<){^WI?|bjIju?H!OCkr-&&<_o_|FrCl1F78$o6z%
zQpXnwyJ0ue8Lm^=Mu|kz`^mbAIK2L5$df919;kS|JE7Jhf_6Pl`VOWM{bKQjYG$pC
z?s;ZS?jA|Jj~zF+m@5?J&VPlqWE;-!A=}5edXn)ADor|@k;zO#0d3_aB0OtwFdfv!
zPwhYuBq(`=gEO0RtL|VtHAlWxqd0#d`}YxO&RA;VuV2o!vc2a2F4(Xk-HXF>uM@7P
z5qR$`e{FYGE~WBs%;#-lx1Pt?vX*pcE!SRT8niz)7+1>BZe-0H@c70N|7N{>6@$NR
zf_0EeR1<aKf`K*4pB~$O!6Q*7)Qz^H)0eE_!1Rw_94){#dLaMAtSND~Ju1GQ?c#5^
z7rYodutrm)sud}~8F8&YW=e};=#eY=c)_^Ce(C3&Yz1#Mr*?C%Blz%1DVS4(M|*AE
zRIuN=fX+MCHb$BEV??%L7Ofqo0b5LPz-~w_zjsCjVHQ7|VP-1-#l8f`%-fxiGvQGI
zP594o(c;j}rKApvz9_dI@Wz`JF6*zPx**imyu5t~{+rwtv!#LehLBKUR>{L$2Wp)}
zof@>|HXi*nQS1IE<sFs{`N0<V=h!ym6NTShPLKu+C}wiH6GN2`sv+Fr3VbLY%}JLj
z|K%$rNo+3q;OMV}8+@!En2Kg7>F*HWOWHXeJkk1L-S)KpR#LuP+XZ$#uX|%6AFLj(
z3z%|ebj#$m9+yfvmMqgguyds+^y`yNlgiU=f^(;5i7;LM#fGmbLrSRt(tPTU_l@SP
zG(@V%PgBIsaTX~<p4XPrmv6m#tMa@b@uwN$$GWjBmF_;SjD0i5`jjL@EAz3}m)nT+
zXK#cIH-%BW9@!W>^AtrYt?hz`-)7MW@t|3`DEd2E4qnYsVOdjq{+6eufnzLNYWa+o
z8AxsUxWzzA@Ddmw3s3kcCb`9yHpR@;ua0&+vHT#SD5w4G9S0C#?5Uc&*dxP4-Cla<
z6!L}95k>GkcP43?^UIoQO&RlgrNeXC)i_2g0}|bX28LK$KghMno)R46PjMOK)$%B>
zUg%c`TjXOUIKG&k+-P5z^6N+Ruj{`Qy~~8}co%EMKXSj5_GVGTBf%U)M;NDg3%fIc
zgz52)X*KoHW-9Cr(a(!v%h2+k;0_2{kxfT9NSdt7!L!55sW|rkE0~MrSLRnDc(rMR
z&S+vgV*ZfUzm19Y4R{u6nj2z!Dm_5=!Nf9VJxuSF)epllSq)Rhw!pzNqz{U5zFA_V
z)UhBRCvQYcVbDizn$OY*r;9i`=KhSZK)<Al3jbH>Av1hj=7|co_tE60GEP}72y`Qe
zZ$hzOrzL^w(P8NoO}bnEgjdX^Nd|=V`1D_?8{NP`k2g4(>q<D}BS9|9V{6UZ<e$`b
z6gr%^@5_WK9mPml2KM%Q(H5%FDQ7oz$wjHFdz$2J<P$v;wlB*z4Yo%Z*dk9ork~B7
zl$z}r-xk~DOidZ+6;{`LU6dX0=(QTh08yMYFDucl1T80xqG}Yg;K&Z4(gL2%_~{9&
z*g!GP#~4`6gF1>fbKawMwqr}o%qL9tGL<zOT;i4pyzN>f@c{QIAbD~?CgFnBZ#Lc#
zL3rtxj}g!biQSxWhOw<;(l3vE<($Nwbd*~#mWU_44tvgVcN{}0wQvaXJS&yTSu2O(
zf<7ljyc*AbGOx<?a!&Zy4~s&$HCYS+3|sUlkkQQUUr4P+DJi6S)%Ap2;b|wqOXKA#
z9Jdj8MePJ8X%Bg@2nQKo2D!iL&dsgN=Q?$M0pff={$m$j#15WW?jX9TyjY~(xu~Pz
z5d6|WXkPbTkmPa1(q++T=ca4@V`?5Q?>c8mT;d-8obE>_9XDdyjS*5f<=!s^mSd`&
zG>I?TrbNr;eeJPlI}+$|#wiliRT|8PNz$rqJIBR5j!8dfS?rXTry)Ab(ZDBC91mHr
zEv!PJbkl-2`Dp>SE~~}BD}G#dQLo7N{)X`Do^saC-oW6J1uoin0S_L|>+g#othC8^
z_?h69mrmDaM8vaQPqbZ+navni3H&u(rgQ#JB#T}59<#zb2XHmzku=Hr)9*Q^V_s2-
zKvh5BWj{xqFKUgE*$J6~XxeJfV5Uvhv&hw6rtaJAxm}-7mb#QJe1R;Ntc$dN&i2jn
z!+vOV33{;d?A1TB0G=?dGNHQ-=zrWYOJWms_L;<#RK5z;<?HgaJio|pWZwN6;2GTI
zDfQup=mb<Fgw4KN)MV!;?Y6ypP=+-g)0qEP6XjFTb3R?6jOy1J@K7t&)3fR*cqLLn
zsOou28G=-0m?I<~?k%}QrEvKQt#aUZZ|_%*HJY<Wm>0~nHiCF<vH0*F;r;5jT<ld%
zDj!xJJpb%)uSwwRKK-Id)yQsgdT(4gzA57i+vekLiC6Us&G3AQHfY9qGOTJhEwTS+
znv+@%%jTaK3XKbcXEtsx%`Zk5AQd-zO{!kO%C>xfjdz9_?<2>(((;w#YE-A@s+qv!
z+a#cYGZpALA#L@NGq~J)Y?qTf#*I-qVN#r@bC3CUxiWaM@x=^A$gF`5!?QJn+?Lu7
z89=+{)gGLKEUY<WIw~#FAzsj@N#te_#-!Pzxl41+0&@tGx|gNL0araci79qUi5dTd
zhVPlrUX|(oqTS=t?r?G;$ocM`DNX4>vmW%W&$&Uv{6;X8k@tEx$+6Ac)@r0!r29Yy
zJ8vemk%dtd>ZNb59>M_q?JsP_Ui-hVM01yCrl8#W4URzCPe|E6cvPH$A^+r7t^;nx
zAmGb45<#t+H0E5_=-i)d6Bj4r)P;UqC-Y+}s0QN^*l{yT=i_EPC%E0Vcm=CcUUy>0
z&8W(rm~n5+vBgslP<t0=1=;95Dlr__5O@P)7;nT(!sp}k2g?)S^SXEh=i_FEZ9f%I
z#B+(izFVIlZP<?%Pr?SI9%yyvKY8}Fpk{d+*o;7$Bi&qA>oiMeqOkMHnfu=AqXVql
z<+zjj4bY;yLpcncE}X}*B-}Fjj;TRUMx&wm-F4X~Rk6;r$`o7a<3N;iKBphFpM)?g
zr&^GW9d2I*ZkMec<K|py9&V$_j@B-xS_of9X7rFgfQ$d+Ro`SS5}Zm!rpHPG4KXaR
zV{5+eAB#O~RpvOh21jR+z_))_1K2U&DSjlks91sDT8K-e&(YStqhY5*$3yVkKE<*a
zazBC*H|H??1De3YIwZ-B_ND&T%gMJRxCiX4P0A<w?XfDW0>O__zMPBi0?B;lf2h*g
zere}^l%q`4=i)WErk<LVzP!_o*9Q;h1?-9-mR*4>F@UDd@EnM&zAF1ahRrM|^IyCj
zTTlO0Jv(1D;ICmb-7>v>t=2tn=^K(>p9u<wGIG0MA~!Q<3oV}Z6|N~?*=2qMql4E=
zCL@1S<A<Tac*^PXX*1f}2<)^OV6u4F##c1na4<Lxa*|R<?_lsV1;Z;NAyLR<1P-D%
z?8<wEdgDyc(KXH|mv@EvoU}zIIeQ*!p#RkW(cj*C^Zs+@miQI_HJ?f3Qj8l2XnXWd
z@p<Cocjyc9w@!tKc1^Y*EL4<8XkA?xKKX+eo-+fCW5v))b&OKH&ztfC$$p8X-EaUF
zg?3^?JG^tIZag1lFgS#F$j)S7De?aYP{l&_=t?*hTyT1sB*!fCf~fmFy`cmiy;8b<
z&l$aC-pl0nSWacRpjLRaRve5p0o{=%A73Vx8PD91f&2%VTEwZb$8ic)7J}O{!vQzN
zlD_<fsyKAw2l&6|jqIzQGBkUsc|YUFCEbvK4<LgRya!HE#7|8ZVN78Rm?aqDuw4R=
z(2nCvr-Zs41kJ+^gkNA}-gt+lNICL#MaA(HoQv)lE0u1H8_-@+mm&dP?zaSmK|;yV
zJJ;5#W!4ctdf%)fFbqe97s$rr&Opz&67&9ObWB@y<t@4Bv0=%whdB{=CSxNRuCKbQ
zV969D92{wn7~{o@v1p2D@krz^>pl3cqQQ6smQ48*LS+8zMLoLTpylovTrA#)zucJ@
zD5cL+EkdfZ(-s47wXv8tDHZ9s?ak5%*G#;wBg^Mr9y=4HFBAp*JY-YBTwV6A?9L32
zhrWR#{)>7jJS^FkYha#|2D1sGj<AzNZ1VkAp#K@z`G52dH$9}tv)@bYwTucMkYKf3
zml@?oaPFOG-;y7BZx(}$a9}i6C48Q(Se5IM!qJ;6trV{b>rTMkq<p?avUGqsgs-+a
zxB7mrd31cs+Ho#M-XlUNHiDg&e@!aAOoWfZv+$lFe~>P5g;O}g592!uj==&>?Sd|s
z5JUd~S5arh;wtdr(DCyAR_&;{`W^}kLiFXNCztnX#o`>KWVijHkJlC6J*0@almmh`
zUjhC4i7srNMS)cQUK@PryUjPDS-oQh3T+JkB1k=)QO{?$a~j2VIg>|8{d4Kz!zrZ9
zXN{<fLvb~V7rU#El8@~`vFDv*=ZWGb>?V21nP9OPk#Jcz-WP<**Fi7^MXc<7s*^>u
z<~wD9DQO}<<28Rm4+_W<f%|rCV?oP<`N;F#2{@Iel-Y*6O0_{Ol3Z`sJ|HpCZZIrG
zy!Zg%DWM}S&^RW;nQn>b(E&!BW!3;(J1%Fir`PQD^f8%oKSz5P6wiHp56*pjFC#*5
z{^sM`f2JgR-EP78^nG<3c?U@z>{vOcg+8~T9g#cZo6s))w$_*Veo97&v8Xtapf}aV
zQD^4U);@~)gSn2(Gr_3Uco4eXG#9_q8A?DVD05MZk>xp*L#k?@ke2utj#9Q^{3vT$
z$<O{0H5d}{JRf%fgN0FwJpK)%^s43v6Wyq6NAUOLA3R)nv8F%lEz$(BbVq`?zfXdt
zSU7Df0^Arf8G%2sQ3}sMPW!DA0VoBqC^-1CPV@ay2l|_W7v<#C0|C$d9y42y?d>r9
z0^?kQ$FcOur9ck~?OKre%ob<bk4z@pd<-?gCmQH^NzZ5=fmtwKF@L_=0Q97tp8nc>
zXb_~IoN=D+B7vYOo4{Y@tTX}PP&91AYao0R&S<+RxBgk<({1pm%{p~Gck2;i-FnQg
z*Bt-u*6Sv5imtNCHy*&9sbtD&<+Va8-C-m19OEVoif7$R733ER6Kc)9IO<NtXi9vE
zOW$Y2cm$2#{S!BA=VN~d!=?nE@dBP{L7@|!R$LsF>>3Zd5udWvY9hIX62no)n_R%3
z)V9<hTwe%A1#ctK%$2j<1#&pz#N0Rxl&v(O#U^HCgY6k>OqkF|qpUx7@F*)w;a?m+
zK<&t*-Zx{|Bs<{C0}V0>37Hr(H2|DeV5b;0&0Xd%TlQFk2>ASdJODABX-99*+z0E+
z-n4*}kr_fKc0H!sDbO`%X!eHULpE_>^uH-5mfwihPIa5aKx<ns1U}S!+tJb(twukd
zKARQTg3Pyeia|bZ)mU=|7CE^-o=gO4bl#!-xd%PkFVl;9)K=`0DL-+UI446u;Cf^Y
z3+kWD@5_tWJb_Ohu7YVwQ1q8i?BQeQu!->!97Ly*+Fk$u2#i5$bE9RZ@$lnlxEX3m
z5N7^le@NrTOu_bf;yEZZTDxdKmVuS;Pf;v>GWqm7)sXD)I;jSc!mC@~=e_-3Oq2`w
zCNh`|c{2sx31kfU{N}SK0&9f-Cw`LZ@th^zNTINt>3z)K_>-5k0TjaPhR+v$a+@AO
zxVOXV`yAWN44S>?6?T}0ICdOk^V?jGiHi##42w8tHD*_Q0HT-vbqJ+{O+gFU*FWP4
zLs(!`h_o<s-@-A@dASz%)LNr^cDw%<Wp5o7W!trlE25wX2q-A6lyrw63<%N<0#YO0
zAX1Veq9Rg)($do1&7gF5cf-&z#N>Ak_xrr-7k_-uz2+YmYdLXV``r83$KLzc!F9U8
z3X6zDqK8(5x_@IC-MIu~psLIFr_~THQWAdc6~}c?4gpn1xy{VSlz#lzKE11kvMqh~
z5SmhKhe+$Z`y`li2mkHez_a)Bj`|Qp$lhYaE*7Y@{G4zO%d>OP1=i+F5#5eDcLo+O
zKuxgyW@IwE9fYoeckhLf06J}P&_5ocSHsuM0T_1AXKaEe*zyY?d5?dqN<sZ)`_MEF
zJ<m~Kr*kr}&<?25IK@Gq_|e}&5P7rm;6_ZC7+Tq`s{86!6*d`pP+K+GMJV(lw7t8#
zd*uL&Ie=qRF-Qo*0ztqsRe;&i0j360b5R4N#Z=HE8fe+Y^kBf3xFCVh8vo7xD}kR<
zaXfd=FE@Uv#~5(5XtWKV{Lz<C%g&wdS|?%*@?#PEMDb?jVV7Qhm7TX`o_WfcIK>>u
zmA#EJ6o99zPw*Bzw_4KdG>t4|+al<K#Z2$(K7F@p02FcwT!h}mlV44=G{;i9s@iYf
zSaUPZr=@nq%0&aRB{09yWk6?frJ)6s%X*jXf=X25GBwl9JC)>=8I{?Y7#v19{de{l
zPbpUrm}YUbTttG~mzCNc%0byNF(OL=4=^w1)3lZbdtUPYHp1M96hV6(U-(C7&T5t$
zipHZ9dI+$aPC?$l981pC9E(!k(yQLZ5!5s1oMYbAbS=D+rKP96NA$3bP5?dJ1d~Er
zoWm|eV<v|o3tybs_9xDGTMQ~N3B4bAiAR5qUjA8sr~sg6^Q--0(Q^-h-DAhDZ%+|x
zq_YfvDc2T_5kPUf?Ra<-g(}`#d~eY*1;`!a#)2%qczQIR^pq<$lkUG!Zk(OLl95M!
zeXJ;UOWZZg67SLiP)T;OKkG_%Q=m?FA$wPeJ(0I+YERN08a+F=9sKt0npQO@&qt~|
zbuByZkmpXwSq$klv8%N+va4hNq&E>)_V7Xx)ZuyWViSVdY(GFR9B^aSG%#yLG=!NR
zD;r&+F{`Vq?F`r>2Ht=pD#%fhxViydpngtg?r+h*%c`BNkJ-Mie~7Pq-Tw1uiL+us
z`DU<Rh=p}?@v@T~5rL&{Yjp%y<8NjU_}_$(OO7sW<N}t1cUW%FEP(K?1liZ{k7d~<
zYsQ|f+_&w&J><GkO-{fs@S90MY>n(X_H!?%vUTVh=*ViE)RZ>*=WB9(x<I1y65J{j
zQ%MQs$2~{%?oYP4AhI?%eICqTc*C#wC+o3zkC2VAlTj6qy5OsonZ>We5ZZNI56a%o
z$V<Y;TU9O~Z|z^M?!LJT^a2&M>Vy*Fi;~`rg=WQ7XF+_hpLZPmS^LghG=R2E5-?5~
zymztrbdV#}ooi2#j$O$K>E&>S{hxuEY{_>18#MWj_6e`kPg}b<D5M`9nZB~BF@x;q
zUyaeE2ka#@baZ{ndS#9D+5zSQbHHGg61!|bihlNUo4TEv^2dg=K|p706NqDf$^<dr
zpRB=)d!QriO*=T+&QAZo-fS~Pf~S!xW%Atn*zyR~a3E&$^RelQyOvh+&Xe5=>^1}g
zro<kBFyPQd^!#GcHz_C+o=@P$3v#jcQABv`#gpS(cAr7X8?{R;w>vb(53_!zSPp9t
zi2V7gTxN;C{+QU?KG2E3=vzJZw(};SJW}|*;S7ZrN+=h4U6E2lGgMRVmy(#9%0>$)
zl>d>;^f!L0&@e^)Tb57nAedvKK(Ud}O>I(Ng=C!pBC&B>3R5AB(ocK>Yiuc0nCl`e
zEVGz3(|^S&C0U^ea+ZxQ6zbAIhMG)VYZxM2M$20!R?}sV>(#$Jw2h%_=7RX*YI|gD
zY{V}tzqI|#s6eUlWg{RuKpy*AQe9Q4{Y!BTnh5<_rh)|t>@E~}T{L(Q-EPJ-jbS0d
ztscTH#7Q&Z_AS245>YD@-#QY0WP^;m@f`pCG<2okC;$s=p1Ljp6L0kS!q8;*9iP7b
zwLmkP;hEe=8!`2-_q=W94;cET?(I?oE2Z4s8hh)t7TlV50A2@krLY#PH8p+)D%8P5
zlNrLn2LLY-7Le9>{gSB_r&MqW;C#ltmEimANu#V5h=*ALWms<=j29AQ%iIUd!^`OX
zsQDj>t}Epyya(vJGzv?pnC(<b=rJd>{9+P<)`zPtCQ@D-Pi~CSd?a?_f8B+UAUHS_
zdkn_dA2~qJpc(>nCJz9OD6uu0tbxl=@n)q}p+Bj-4DRQo*O%NlHxzFNy_S2_!${gT
zW+OUH+rzMevMxvJ@9DmHF*3c3Yxg->a{Pd-?`j9DMmY*U)A?zJR*bkW&C?e&^cgge
zwynv=#NXc<sw91b3C*_swI17!6O-*tGj`1a*L<`2AaN;u(fJzWd^1_^4_KFczQfS=
zg$z5vpT)8p2FLYC0x8_qYB>k8Kjc`J%!pqFn`OA3T>w`eT^_Ne5nJV~^LhjT{eXZ1
zrlqiRf>{XmiBh*mS34Q}Q`Fr$=S!leVwk{}?6nMl<Ruo>H_~?h{lXdVY2n@Iat}V*
z@6xYJ*^yZ1)fHpzi@!|~vG?o5S|6YWjc1|PmbEWC*S>$Hy>ajanEl5s>5)cSkY*6+
za6y!Q8U28oTYXFl93!rhdPm%Q>a2584F(2*fSF!Q>VJ0j;DrcxFaC`O_5fUsRPDKL
z8s8yCf_hlLxg~UFGSdwa7{{$bVo+B7de}jr*@CkR=876O(n~(E+k@QFxK1%+_>gz!
z6bk4KYg+@Ify10!RkCxL=)vznEsCD6<n@*z;eO*Z4l&`B-NR4QvLEE80PY@WGj{$W
z+ymAEMqOTFfGuxNYyCuUbJn~6%Z)hF1}-9s77bCGSQMnS3%rlOppl`e=v|voFcuC5
zqhKW%*a{Y1g7r9>guwMfLD&-z7}W|vx|~}5en|_jwcS0y>>p4bK(Uy2vd#ALj8YVy
z*A1HQeHUB*ZR6ml)sj(kasBYqTh&b?d|2s&SM6TLBxLENTA`%zQuv8gE|n4w!N=L?
zyD!HGXXZvRMv(cQo8Y!zCH;7&_>3~q)HhvXL&}d{JZz}Q+ahc*_oD;ofS23~BsABY
zTI25d%O~wa521^-OFP4Hz<#>rV@5TOunRVYSMpuZHc;*WxeOWxo{SrUJ~iP*2FVh_
zGh<-ZkgHK6*zVLZ)oQBx!DGgHHS{P?!DRy9yYzKl7QHJgvY%RZ$N=&eh5iR~gl|uS
zPl<trp#Q8B0{X*mvn}2->nB`a^aQZkYxK$%#eC+i%6&0<>4Vg(2rdffg0pyD%zVHt
z?E8V$U^qWW8eTDei+addwu}96k-4URN&H@98*68xcxYbV8h4-A<!3Yv)k}HFm(PO%
zef)nqGaM=l6xjdXzoq>Y1%wz|X#y^}FL4g1S339iR-MBdUobYzV(&6W8$CVYlLzD^
zT>y#Pd_c9ee*M5_-Gxss30L5^b(M=|eXs#o{yF4+;{R8mO5|HhDtbDVJ7Ci*fbCaD
zW=z$^#cCuKs&ADSJ;K}M0m2^h0uXX<5wq70#;k*zW6pyCz#a;^2m-z5oWJda6XuDQ
zDG3eR+LW&CSm9Bh;A0CjuU3d`W#-Pvr+KMGZhHIj_7OQfZ%Xc;I6y`0#kfrR$((>h
zv;kp%1GYUmQ}Cm|1<=l|_~6zEyX~h;zQMRhlwDj1`TMUhKQp4#$#bxBQU_&iQzFR3
zTOxx?m_e%Z!DeDUXR&vG^nt?mZ!(jG4$ybvupp`+)?pblhOCeB%B_K@_Rl8o?h6nF
zdKV0*R*6MC^lZQ`fHOKn^3r6F@@Ofa<gXuO!v~cc_&<gKZJ042ma-SsN_S1fJ~^5<
zSR_!QCxM<6b$lcv_A5O@<=Qt#akKeU)B&IYFg_i%2UdF?$jhJYQ;Izh2^ayERdf;X
zW4bTb!nCTtvak}wzA<=(sho!Fr*;*W(*$k>2JJ2G&0^V7;aH{4xWm9Htjw3ddpm8|
z@^2>G0X0`kYZ0Ja-a5CtcKV4in{`vEjjz5?`-a6~^r=bQuG-YUy83a<0}DW^d^BdW
zakGLyJXcA;OEkv>4^tZ7Nn-UwIND=lZ>aFy$MP>6W1t7<S?sU?#@w>~;Z7%Z9sr8D
z|FpJy{%<Rz$d{I*;o+6tf|6JSavgZQ?YN82yFcGh^HZwnVmX+B5_*{gt-JW<u|s$5
zX29`}R4&XB16GTOCVQREMOK=3$mdABC9-owrcbaykdCa`p3FV5!7wwcj}cD=_AXC%
z%;3q`(2EU|r+@W;P7vrwiO~(edwEZ_cv<itCOpw;3)1%E?hv+F&fk}&=^J{Lp~sqb
ztLLk(acNGXYh-KhyQ*bO;NYJO54MlNsTRQ{e|faPo(4<b;=!Syp3-Zr_z-PJTcT-H
zn4Wt_dBQm*nkmQG{-W<MJ$$jD^#mC80=7}S5MFcO`mytnIjb#*|15#h>Y>Hc+u5N{
zrS6+8GywC?;Mxx7Fpt446gTFF=eP+B!v*;~3_s?5bJwT17`qP{zqmBn>oCZ)$;a}I
za6xWjv*8+7tCV!RSw8AAJ?(I4(R%_jS=R_hz-;m&?ftg$9c7v4qH!X~1zj+M1R|lw
z<u_yYz+u^?bZ{oT4^NyrZnU;x?xI!3zURFl+ni!f1pG9%YM-`ML!89l5?c-VK{>g?
zkbCpb+fQg$nga%4ZeoRJlQ!wV9NVviIeHW%+Wth05nsjg90jOJtYDnQU?&Y+71o!s
zzWzzaE<ETO1W55}1rtu}bNQkg=Y@bt2Qh0?Z9TWr-%I7FEMs%V@Q@z44mSCo+IXsV
zBKdJ&D)c14O48Tjwl5XgV*6iW$cBc>y4}bf$w-y?sV3>3<Hjap$mCd$G~bRoO+zjJ
zcmRG8&G5P4A7xh)>1*$d1@jsXxdLu6m3rcvHv84pVu#V~*b6Ks*i=X;C&Uyx3PJcC
zP1|TV?_ymr8!nXC&Jr|rUpe0(?6d}gtO<o+53uNPxXF?ezP&lee6nodd)GFrklNB9
zK?W+-K?>-p*w7~ygIHc6pTUqg8mET-t7T3XBbiToGe06HcdCbM{X+r*!nn)pLh+t6
zeoV|I_a(a(|51Ju)$Yukzn9o$9_2%pSD&v?9;N=&M2Ofvy-Bgp-Mz1TA95S(8!q7|
z35c#F9ce^I75|3uAr-$hl1YArvM$9BzDo<~`gEX_hyD$z{?JED%Eh)o-x*;aoqYp+
zUVzQfKrDnaS_<0<kcU(Pd;F|o<O?5ucu1E2@L&gMzAaB|=-?}CgZO-3a-ijg1Tu89
z-#%Z317rdopw|4m0!5rM@^!8l<BeF8e5X?*)4=a6{g1%W#!LL~<1F5?GebV*sQGM+
zT}^?7Xa_*;n=QIbV<7*mg2J|LLDIB8SN`a);0BO}33@(<fwu_)?DQMUWcqM$o^*!O
zC0$qtmO}MLu?n(B*3KDWZ<I}KoB-%xDV`nTK}<di-8<uby<3ew00{reQ7U#IT74jr
zf=*)%T^{dvV@t6X`QA^O)h-*pp@1yp|Fs56NE8P|ZDp1;u%{YS*iFa<>HCAUKSUFO
z%Sb3D2~u%!hQ0JWTJUwE5L1|X9M<3$!>$^P!f^_`n5{@m?mDIM1|D9CMkKsfk2>jH
zQ<sIjg4Xx1+hi}sw!a3kf)f7V4@a-vW%~ikr3ULBzQ~V#rBiei*NSkzHozsX&$Lqu
zS-PgWFBIUj;f*oR4+zgtV$Wy(pltkEZ1ts1x@sb`D+M-<T;PXc*|*TR(J%2}8Oe0_
zQ$VwXvGfT&Fym#yXyOYLM7aL?WZYkQ9wajAl1}o;a{5jnWSkrI(qjlr;r3K~1OzK%
z5b2bvz4pb?$ceA|0Day}b3Hlv$s>|jD1Y?LE<hsz7gSn5&T>O~F+@k%T21As+jjEa
zqCMbeTEn)LoGm3lshwnSTOHq)EKsr{IL#VfVhZpG84&e)@Gsv&*S}Ff>R9R87FWsX
z_V`YTs1+7h(CB=EmuQ9s`EFj%ed&tctBdp8=3xBxamp^^4GU$<mw7_Du_<AD)nS7Z
zjj-Hd19L|uDd976n%Joa54f~4Svj73{YHF~{o}U>x@QtL);7IHUc6MbZ&P3GSKA?X
z8=wi#d_BLMmCVukwtjkP?~16YMHKj%9FN06CqE+u-tp{#y(+j~HGO`(U^R?!*>$pK
z$<h&rk(#{fkvb{wz$`;V5uacy0di+0sMjOt$U>dZ(@}0a)0o~&P(&cM;S@f&xjPXq
ziY+`NZHh3lQdaQa*NvOYaM)$&?ZH&c*}}Dl7*0V+7@I1{*2$jyv&kGfB*7VJUTfiQ
zCKjHDrEWmCj&GtqU&CiCHNV%g?9g5tY22>;<ZajE;P@oxMt<0WLp58fUwPfqM<Ri^
zr5T43VFg<6PY%4nWz|#WM46e8vNg7Q;nWt%zLhuk@A}OURvEqPN*R&#HN<?1Fd05I
zw>RV6MCQA?4>OV%>;0{h9U0PSr1eOcy6bqD{R=XoUb!R9Ir|-Ad*LqHV}5^kB5W@{
z`=a4wSq+ZX@YU1~bqRLljdpifS9o&}v5}rPgWNKm`VQ%Xp48SzPJMF__C&DBFp$h%
zj$1P8J%()l%2I$^T48)vSKOU?MAWDz7i;(;x9(nYVmqX$|JZ1Jr3j?dO|kID41q)c
zqRHr407<>nqMsOHRdmHM$*PLA(R<A*@nk4z`c3!2)QO7v`%>7ggXlbnfCSkbV`hh(
zC;_@i7t33O*o=~;KJVgpZJ3c9MQG-vV!sSCI3Zr{mGBje+27t-JpKvM3og+p)FYbs
zPKRi)Je@?VSBygnCJIcP942yOc4pOh6r3D7NrRgv5n-@ZW~}Z$8l8hK;@$lDjSG(@
z62nW%n_Z}<AUbD~)1X{udMPudqt`MIM=p}>xm-Sb%7Y$l**ho{xi=1b3PC%~EapLq
z*MFim&qv@TKWBONLndt`5h1-tTbqotTM$q5pgmUev|);@*_{X(ROE2VUoJEPg#u$>
zLY~NAjCP@3=)xPUE$vw<&YHX`Y8Ls-u#-&nX;4#atS34MEer9j=YqgBENUQP#s7bs
zDTPlMC2gko@(ien*IMj9w2+UWeZ){uE^If#JX!*V>gK-Ed)?lbEFtO<mh6aMd4s#(
zWXF$%a}iq}1%Ll84I-@EB`0MLb|bc$d_|Obk<6D3&ZE;*bB=NRxR$z+ir&$C6w<OF
zgq@rxl3cqR&oe(Wl=!o<8z!lBBloNZ?Q~94e?Zk#R6k`=Zm}vCby`TYw>ouLKB{|i
z@EpV4TQI{QksNsFH5g1vcf#e(yjG%-C8g>7VVE4nQcmx2t`8nmY~$libv}-VdhJhA
z9N10ihUgul7^Es)CCL0RZJMY00sY~5m~L^kDYn_r);KBE<%~fF1gBx<2(v0z;#;OX
zY5D|7Rte(QSj*&qj&Ql9J9(Fx(B5W@-R|h%6g%Czf(lYU^`K)Ib0Fs47`OP%VMp%X
z7lUX-5N78_v>H|r`l~#%<CEPty`3BJjynjN)aTYal)f(xJR5U@wG}K0(1ZF|$#;s?
zQM23_t5XGqXEpxpP4Ez!{bF4R$az?b`z$hR6f*$(RluOVwQ>-(AT@87e->=zkAA(!
z;FILpBqQbX0}tsN=W`+Sfy+m7*n3(Aa<UDyE0>hbf=<s5^ZZUH;t^QHY$bGfj8qua
z-edpL1&ebZO(%v$vs0khCNu#u7<Fg8E*uu;?vx1{J&lsry)vQCoeYH1*YG$AuhTZ`
zeW&TR;-bTID6Y#+kn*p$+-OkhXvAWW)<snlJ;onihEgVw(22*FG$VRCj@bQ=+NrWl
z8axw5AEz;;jXs(x@oLifF68QeW9rOo&xUQ}Sy?7ua3%cHmvU_HGa5~j?wLoHYw^1?
zd5(^uhIl<Qz?v~FG`eQlYB1$(h^=b>ODf;rw>MI&DE7dr>}-)^&f;^+k2_yQHoUy-
z&_O(F^kH)1kou}N7}1j5>Jm&MzyGDn`Js?^YS*G`_lddk<;$_;&*B~o@WLj2B@B6|
zTbx?Mlnv|h(E6yyj==e1wi-qW6G^MR870zF<?&<)9L8NaXM%*z`9nhCKRpzO!T5*<
zAy{f56Iok_`e{ws7-%}j9i^8d>^-&X7L2iPiZ+gYi5{H-tJ^jB*lk7nyMDwp1eYtL
z<Iu8q=!E6zz48oR71(0M2{Z~Ih(oHaw0Sl9nq3t45lFq{A-n)vA2ulRu2>zq6S^{C
zbsQIZxa`<%3vpN-UaUN{{(NrFk6;Kl$$`JfjB;D2GTSNNg56Z0s03}frHA^ziyemB
zcwCfc^4ptHkh*PcKBDJ1LG_;OGkBjjOuk(vmzTlLpQPqsdI66y;{5aEdmZ#q2r?Hc
zy~|);V00Jl5qk|Q$LTPrJGuX;#!LD_6mTH1qv0L7M%q3|sBt=^%_<x%ONTda0;UF{
z7T``S25ymKFhh%0rr0-F#ht}yTkOX0;`2*h(mi|sjn|CVt`im#;=jExB-?*7S*lCO
zWn4Q{={(<o9~$s||Em!U2I~`D^!p)!4NM{tDrrP1KOFQhVpIQRAatz1|0(GW{%qjH
z&Y5VtfM#}iE}49NWW-yJ55xz%t^0+<F4(RBMqWZkuIY)0<lVX{o}%|-r5ATEJMrC`
z;zw-1Y3VO>ze~RB+m`z6P<+Kl!<c#TvC>?~VHkasL>38ibTO+_>lB?NEB|<TekeE!
z+Ku>Jz#!bQ$l1h@QWlgCS!EiIJ)E@Td=Bx|5XyDbZN~E4q-s9I05ZxhyUAOM1+A&K
z5IX@i&7N9ZXEFxg;2L_H#s`P55SODlyv=v!t0x07Ji`dKNWDzxK2=_fszok@-fk_E
zKj5rQ^ZYJR$;Gti;Iln&XB#@>v^$!`xQ;I8L$S<i75{^lFM~$db=N1-5IDV1${9qe
z=nVSceAX4jD@losMLn8XjEFgY`)Q713)7*HQS=M3IOSnjBIx<#aI)F*v=fUN!om>H
z5t3C@e{OB{9&BwddI*zn)+l`aoYO16cyn|RI(aiLMy1$wI~9U)_W}9y?;^3)ZQ)yp
z3Rq?A>@&YQiHhH+Y;qJBR+K+aBX&P-<xvAT5EC9LXY>s2;@tzq`IQJck$ZKIy=Nh&
zq(h4^L_uLiCA4A&B=l_D-k*MG@df&*{`XH~x^)cC^qH4l(OE>DUa#I8bg0n6{H;qG
zhLCFNZa82mFhVApdmpah$GYK(`EhTGeqZ60#2d|46O^2UJ@RZcX_oXk<zw(Wl-Yl=
z+jugtQPxTC=49>^(tX^;6d7;U`lT-U!LG~s{(L_b*pHoeW-LmX%FbuUMh$V~{T?m6
z;%AY=Qkr9N5(3KJ6n1pH`$Kna3h71qc6<7h@pH8iR^H7Y$riI(ZcUE;FJt*mN}v1m
za37nb2!&ByjxK9q+oL@0N8hbflxkE7MUC$+Sfs{rQ?{>?O^kKL1>8KxCim;bSo~GJ
z(WAlC4Gxx^^nJ)(4$f??RQh&SnsV3W!(it^M5%1%GODC2i|osYD)S>@v3!{is8Q6U
zuXpS^44nGY7jw&G<uCgz3<Zz)OxiI2^Z5^yFxfNuMTpU-Q*_OFMe{k-ZViiaE#T<3
zJvx6$`qKx4;&$EK0oJ#fXN_+>oqV6khdtk4klSyd9jEXdMrMT4eqWUJ!wxS3K%j8F
z%CK?_w3A=(c@R6MrXS<0zqrZU*ok?p02xK(49ZHvpm1=qMfr0`5YL06Gg1iz=hIk!
z^a-k@UaqE1itbek^xfVf*ZE==cn)(63empQXt24q2L?;BGro@XKtlHXP04PC6J<d|
zw9g^1mSSzg8E7+Qh3H_7cAORxpw%}N(S}V}iLH_8>e_b>cGwOO{P5g(@hrIL)RFq^
zs5>3otkzTsuPH^pV#nHHXZlQ%P#wZ4rbV75_OBQUNxA|L@9w#HmsU1eQDJYmks8UK
zfhtXkrsbafEQQ#jB|N0{Z8~xlkb^JX<1#O&djDb`L;V$+r4Xs(m~>Doa9NSYDbNMF
z9FsGGC|QUQLyfI}GY)6(&rpO*KuX4&`(CGjupjc9^<Y@N5wP!o4bXnogAHmBHQ@l_
zxJ?1Jd<Jw%U!b?+_C|PqhIfyGVcYL=6AZb(Tw?a93Z&nLDg*YQqlKFhHcHbV_p{%m
zAQWHvU&j~M@Brfm2GwsdevfMT$*!Y%u+X$RG%`V5+bw+pjl)tg^wxT`TqlS(a%p<?
zA~^ZlbUkvvp<fEQ`-A`UI;TNr1YJMs@<ipncryn74$VMv2yy(&D3%Y@yqi+|Q}Q00
zRiiwe4(pM7F-mE<7L@c9LGi1_In4e^QM7K4<Q*h=itjF{t~PjUO$+(;KjF_+_Msl<
zUMs29pSEjkP0!`zstE2H6Wo`dM!s=*^}cOBV#+d3RXKcy=c99-pbu22*#t#ghD3Dc
ztZI9BsYV9NixHD`cs&;S65H2#lJzsFys+!C1o1n`ht(TRU(n!l?iIIVmB{BEu=^I>
z!@g!SXm4r4oY5eglBqK8=Hbw(w^}e7Bv<Ms6p?eg&H|&F2o06PtO)tpie*`Rgl6jR
zmozY(IhyO7&lfkiWbH$5X`<nSU=D)tLKT@d4F-mcsI^Z-A29Ws9phS$c?lMzG*mKy
zUFe-h(Pdm_&CZ<%9p{j<?{GBxZI1IHskcXH-g8t|$}}Iv>13;EBf@6<NJmf-(prq1
zn6JQxcx|3yzIZ8$4uO%aUU#y3HZHbE!I4=3y~77kV2VF8I8V<Pv8~qMog9)1l8iNE
zmNrGJwbw|jpI~=G9#=xvc`n$5UJtShH_50pprJh^8Z!;nSlwD|Mer9xCrIV~q@%C|
z>Qy=v<8^RQJg4%kk{QX+Y(lpvJ%7CZRn&{D$kWTQ8%By?IH_uc>;25dc6?Kx?NoHp
ze;0MAq79knWl92Qi?;8jmhbelN~b{hADir64CA>9%?D+%xq~%sgMF3Lx36DP6~ix9
zDxvy%Mb%c{g?f2}t${<>on|QlZ9)9pIL9HTIrD>Q$0s$tRYYr)lO^b2jP)qEiL|~}
zp?h8yX;e)b=G=~=X+X=Ctm>5M^Ae+7TRtraj6l~}uJTZ?g<H`WbR9T*(uJ5xk9Rpz
z<S2=lGfo+?>fWg@s}2!TiVJn#el{H%fK?NP>@onOnhNtgw};2>4Me1~&6o5ZG*ls-
zwg>Zck)lyQ@!#70hCs?N8{H7#ww#)*+NAnh^B)r6?g~CFfHW^yq#2n4Z&R?wtBFjv
zRVSP#SBd!FBBggs@jLDgV{iKt@oeLv+j@+C{f%N`Yv+bN5kdR0T(O}$l`-xY${t~-
z)U7+bQ$SurzHp*3A9l>IZPNt~Xh5vOiBR*cT+hj$K2Kf|hJsR{n%Lpgu~tn~;ZL!T
zm<;F`D1?AXYnxew;`$vCok(N*9L6`>CMh7`1#PQ{dcQ@U6msomQl0`&mCn7=Q=UNv
z;00cpR($Rr(l7VnWB(Xa+zdORflu6KB=F)ra%85jZ?k?P{l^*>rLuP)pR1|&q!W0>
zy%!-TH8B??OM4>-@5=JLgBQE7ez^*~RG!m>1oM<75Neht_^fG5+`O)GaE#)f-qcXI
zK%h<rRa!Tn;xk?gWL3TvT*B(JTDObBH}%<Osc5G(`b1_j`nK!)V{6pKQ5}WE*%Uaw
zx1@Hi+Ryk#X~*3ARpO1bAd;TsiQ{|etI7g@CpX3a#<^%a$iDW>+FULHd;YZu;hQKb
z9(#b`3vrA7p3JKLL+w{}(s1+Q_o{hj$i>beAIfCvRV^J_j{;g|I~p89(sdBB$S|0C
z^7|CThW7X!pgruO95F+&MGE-MG^m+>G<oQuLB_WbQI%hrm~=Fi;c*^hP095YKfEO4
z>)KzJt{LYMOd47S8FZ$QOXT^$S;0T1cAU)RlJAHN0X@ZTV@I+2FTc*xOo6ncsqOM<
ztJrKI!<Y>8OC+bGMpoOC=drft?MW;HX4tZ`R1!MNPbGtomN_v+r8ln=N=qmH60q^M
zrMyXi&!`#FlH}5C=L3g5xH-(K4-H2oO`0a%qwB3d>IGv)Q0N1Q#!V<Jzo0Klfj&XJ
zI%D^AYq-^~xV6y!Ks}cFHLoG_jg84R9sAt-+ef+V1<0x@*6(l2AD5Zujl6|%u|8f)
zC21@vTT8u*e!z7{wdmL6P@;8F$M}*69Wh(O8Yv_I@*Hy8L<TYhhB3gvPLHQzZ|#xJ
zfGa_EkWuV+5(;!lsoqya<SA$yVgYHvd_><y(wr&m33r2bOQurGpeLBXbKXXXw#1CU
zd+bEIKfwu5c$f*GhTxF4bPHL_awr%NUx55#5Q4F|MNG|{Zqlij=~Mw1*+{TS>5hLv
zyPKOCA)p@`q`&*uvd?1@>_Q^B_Fe3r5$@hyy~$O$`oH}1DdUG${p<HUA8SUm^p-T5
zi_nP;rH~NB3jn8E=M5x%y)W5)QTJST>yNT&elQRx89CRr?*gSytJSx7S6BSM{U|ZN
zBbpD=@_0YkG(`N7XX8gr8BukJ^8*)F)h<xhY5;Q2X1^Bg8K)k<6whzb*A>Mi82jbT
zzxgq0Q9y83J1TbThd((yn{`Pd7MYW7Y_flmQ_|vC`sdb0zvyCL^1_1efzy7&K|n~(
z4{`Cx5+Fhk?$&Ll)QvvD`@bC4|MOs_M;`+xH(EDoylqY%FY1!AJ(j1#5mR3;ezG11
zPPFb%%!<&T9gh0L+APIS&!PI{`T3li^-!A3c*Z;We{pbWw<%hd3vMs^@koLKBj_X&
zFHbgSzXsTMvg>5W6N}FAxtTBepC{W*7AL>1eQU-saJoIJR^vRc-8mKbFYMrhbTn{y
zO4N?BX?Fqx&>{jqSk<CmKeXbIPY&0LZP>5xy~`jqw`X}#aD;^K#NC-Lx8gk6t67Xs
zr1oI4_;;UV2P6kE8-KM3e=?T>bW|b%{ri@b3MRkqR6+SvGx<n*69K=xAg?*s;77vt
zy0EeOonnV<_2fkH4w8TIfUe=+xxIAwO7W9Fp)TWLjvK}@05WsnL<rbggv7orz3*<>
zowAXf0C)j70gB!`Sbl286B9%t-UYbU@a+$~GXKr>`Hw}U;c($i>8a1sEO-=b>OyGb
zIq#i}QK7T#(VVUK%YnSj7k>-O<V8=~slYQj;z1USI<l}_FzB*d<4#ZoDRATe>rV^3
z55!b$v8h054?KsGrfP32`dlp>Mb!nV-)%Zy*Z^Y@`r}=|FLLR>+)PcqGg)q>B4{%}
zJDM&VJm%{8-(J_Ni>Ijwgryfxc5z1WLtVTZwa!bW^wjk%nylU)Z}Y<A&m~p-4~=Ja
zj=R1GO_RG7gHi$+`r->f47aBDJN#3ABzFDZ{Iu&D*MfO9J__U99`I(W;Dlz^j!uSP
z{<a-X+U=bWH%R!XOx&ZkXQ~{2&gg^vtpHnb;%Pe7K0%)^_8##y7Qzh26L3g_SY(Rv
z&fu+oWiCB{&n=*R6d{|27l?BT>Z|d0SLT}&g&nLXOnse3Gdm5fCkhib_VQek4k)?q
z$MG>dd3PzWne3JF3_hmwqq~&&-~8CXa)3?Y2>n8FFIRQxD^mpDq^WOmt?SL@3scvN
zsVYvX*qP{Sv(6Po-pAc+YEDj0u=^7K#v9&yssKE^ca~x#fp_tp>$h3mP)D6f-X|Lm
zJ{PmQ-y~{kff-hn>pV|!Kkm*3{`~St3>SI+dpmrG`~C`FR_}Ewz8U!SY~vG!I-f;n
zgm8t;V3a{%=yvdB`407H;p?JD;_kVZJ7BCm5UpeQJwE=co5jcqxY@ps+=N^rff`bm
zZK^5Hr!mN-AX3#N5hv9g^<-5XT~Iyi?4W08m(@NH^sk<IA`ftx68D=ndmjJ0AHN<3
zT=?E;;>_BA^-MlUkFwyxM$@_Rvw!tP8B2cw=IQXP!R$WOzla{d)3mJzo|Aq3c?xIn
zzljF`i6e+3aco4xR{lwR|LgCo7Ke+&=L>G@|L$yB-pK<NyT1EH%lKc|Az-s<+vLF2
z-Cpo5ru}zkV=zFtbXfIkp6?sczcKymghD33*$B`34#vp-*B9pM6M7#31{YgFBk%ez
z4K7;+IGb$WgSvND6#o6dK`CTkU!rg?mqBSuVbj@6HUO$P`{3M$m0f+mo{jdUNY1)%
zkF>k*O!SY9jTQ1uc7DcPZi8t2X5_wvpzBK8)6f__!|uY00rBAgS;xXI4^7#_$Hauv
zZ>Gr&sKs;ja}6&r$b+PDeek^d-i$|x>1p}Id<hgYqnaSVQZK*fTL{G<hNZ3;zjE?J
zpECdt%!t6EtF5@yig?=73_ucJ1B&%IY>O>q?!`w+A-eh!1UubfW8DB2ztNz?^AZ2c
zHT+}prxO)l97uhNOr5I6!!SnVy#U|xW@8}PVQ;%XsfKT-sHWI%IHNtfizCL`aWjR<
zbAMo}PRIQ}_-2g_xKRPtZC9~4P+nRZY&`FNqPEfOdmyOsoPWUYa-Awqdpk5+stFk{
z<&TWFh<Mfkpd>aAsonuuQYqX0=FJpc%^h2oFKsPjUC@`uqMnV~H!O<8%x{vnI!z{?
z(aj`*Fh`NuPP5f{?)iI-sN>#^aur@-4M}uZf)tu5b8;unGkT(ar^v+zzA*I5eMQN-
zqF>}B#{g#?w)gSreYVWp1|_|cq-pim)k3*{z;dY1qDcz!W4isxG<0-~99F9ihK7H+
z4NPaNC5eQ3sMXASCQU=I=pw@csmO=?BXi!H@$Jew$)PTF>y&N>P3MVh9g=3vsFhw_
z9|w0MS)uy4?fHVZXJ=GUz9!#u^<*1d0^Tyh{1&-alY~B-<?VUb_LVFn$V(mxpMNOf
zS?IkMrO>yg^rLU**adOAUF}rJy*_YU-sA!e$aYz5wr<#~76ihei{XKrb^Y7r7X=OH
zs2zI2foe1~`o8Y?d<KJ@W9{;nZ17@0Mnh+!X$Dl~-LtjtvVGyA3lFXMExYas2B0o%
zQvrjzhPO`mdnQUlQQ}x-GGdIfB0mw9U>`rj7w~wmt9`sefx&<5IbzE|&~9Ysc+jpY
zR_$bmnRf>HZT|P7JWYaA^}<Y8bp4ZYk;xCljkEx;*vL+D{x-8Ady7<L6{vli)}fG#
zSdlnzKwZLf@A3r1neNHDK3IM`f7KY27yNVeP4{XJ2%VP~6L5<NaASdAo-EA<+0#s%
z9jt66xab$C`xbe^UxaUuc|Vx#825zZ!iju1^(&Vm%t|P4UyQz~9q<SrpB&hv@Gtsf
z{eLI*CyCZD@Z~z&n-b8z{on)^#V^4EIp;=sFOQ%G2wj=**#FDt+Y(+zWtW9Dm|Nze
z+g9q&kK}geh_C+=y24;-c4Xh<xVZjtk%8;q07B6P{{{hxu2C+$A)2Ig{>))^kR;MB
z-nu`D05~Cc#!Sogdca^Mg_4W&TmwwgHS{M?S%OK=7TH~zAECAsj2ireK<p5A6PDwW
zyrSD}E%*B3`I+44@jv0Z2Y%mvkFFF?7+N(L{N)@PGh$aqoH?65@C6a40O~av1@a|^
z_4LcoOxe1$goxg+JjEo@^)28?Fx3R8n#6vh(9}aS`|mwMz^bHg;pGQKsSAG~2>_r;
zV568ujn0p~)ld6KpEm^TtqljwN$HjCGQajozE&DnO#tt~2Cwly629>tEr8c2ECme!
zzE{w~$Oe;e^N1<nPUQZ*QlIMW?6Q<;5tn*2&35812kY4=h4pGH+iKH-)zkbMsMnNP
zu#bbspRCxqS7<IrEa{_YFbM1qAAbYf7dP61F%Sk(Gi4qsEmg?*nui1zx4KkXwqNHK
zn9b@-UF%9xWn^-EP*c!k!kp(4a#m8kw=-JYj15Ev0o{MW9-3w7_MNPnULglP2d9D5
z0EFHC&N10SrRP4wXP+W(AM30WsvL`?96!aCA8-Bx_M{)+g&neR-=(~YSq1@w#vbFZ
zHh^|gQE|s=B<m~0w-KhQ`Svj@W|E$4Po~*w2VU^X;+wN+n^fJdcM?g?^?Acl)zP6O
zx=c5QKB>ejJmG|}zJ7}KQG&MQS3f_SvhX~;L08>i867QMhmt57=~j*B`%_rfK|Nl+
z{2XV^Eb5FS8f1dwV7Pe|O`_ZHbfv7Y(4S+_oD`p)h`SW2{Wb6bi31!amX@ddp<3jm
zC4WI}ij8std>SNv5w#(zP2j)E!~A+G*ES?**=h$~<1jf9aqCAF$+u{W8#q5=eovEb
zEbo@b*!2=<sNmnYh%FWdn)qoqYlZFhcFrP*DP*?V&Vi@-!u+Hn70nf)ZKdLIjst{X
z|12h-ZSsw%t{ys#Uqqs_HBL6p-?eI~wY@-?3kwPJzDAABtYWZ2q<47=(^%WO4G|Ed
zqHbS!u8ErWDhdVfvBdopdbWzh?#)zHWSXNdIza=Uwg9d~B$(bO-WJY9B^70s*BIS7
z)Ae;FZfy+4^*K6ntWRDbdMf8+BdZv6Ko}z0t9U@b;P;j8mE?e^>&^1l#n{Ir?i)2w
zbkQ^9ZpCV@BMy>wDYfiOKOe5hb>c9R{X)XO+8I9hE72Zq7EEt%-5=}e@n;3TNJk>f
z^VmcDN83v(6Uzv3SnkL8_J#@;G>}sc%<*KVvgO~P;fu25Fc*M~J99gjCY@MBh9)D~
zkTZ6R!bjTPG2+=~5?=d`ZFQnvDb#T{|G_ImV*m~rAKMq3#Nm)P&sg6A7b}T~w~^2P
z=DN=pat`kv7q6|MKjN-MKq|iNe&;OUVwu4E+2Vj7;NG=fNQLVTMH841LO6h?2XWtS
zs{9S#N6HeWfA5bJTvR$=s#@yEQ}RWEFk++kf8Q=6xK7Arg_obY*Z$`$$z`0KgGHyz
zlBKfXspd+Hpg?<z$gHD9<QiJgVS=jym!M;tPqi(_#vFqu-8d@#Cl7>#glfGfK1#R{
zUpElAs%${8NN~l1&koCrirctf(S2_yP6EiXgT0m9`9_{h=a#G%s83JnXTQHhg<i7T
z<XCQJ`mMdmb4pg<d7bdlp+F`mdRW;xm072LE41)0l}_F#B6G@(Y10{u_-9qwtzth-
zJI`q=D#5d|vX_&olV<5@Q@g+w=M=j>8_}pZ6Qv}hawoP4b13ceAZq)m&slUO(|AbD
z2)P~HZ`l5IhGRxJpXYKjDSM}b6|!zKRTQAItRB8U(u)e{(QO$f-$+#eRY{H7eJo05
zRqk?P=FM62TuD>!Z3Ysv2ftf*a90h6ue_ZQ7x&yJ+ptK7ZJ@@GF8jM_7SXDp5^YhD
zlXZ%}-psg>Q;b;Rf-W*FMPEX$@_ts#K<!qgoG~N_-tRzRD(K<ip(PL_qV2i=7oX^y
z0v<W>15-Ium-;;~GVWBjHKsVo?LRRg5SL@Lnq-7s{o&1m50ZPd#tWQD_lhpP#ounN
z{)52Fu<3wcy4|OSoF$FrQOg-l!H2Md5&?h`3L0|LdxiCgj(C0o<kE?HMTRjfoI2d`
z3r$El*Q%PjS)PW3;sOuuZ>tg+^`^hADok{y%h>GKh(>hw2nAn~wcbDoIIo6_id0w+
z+*N_ePQ=RuRSQ57<0Yyk3r&4JoJEdO7fUxXnHG%S{<$eI5*YDrgS(^nK1BRnV*?w2
zgh*YVz<uvn@j%gq_pPVfoQOOM)6djZv(62P#U}a`CO^FiEi1W&r=F>uQD^6@{c2S8
za^UuS18tckt3T#((eR($6>!u)0l2!UTpr%}LVhOsyP{qT-ZsU|i#{r_v}XSzonjTv
zW|0K-KV7Gy6Z}OwIqLE|kvL>jjOZLE$24T12uNt(r1<XFee=*P{!8`{7rt)exj?za
zpAIMiFv?j>ylAqBTJYT(p`dkNJb4)hgErd?z({H0dGC4JdBWzpccF_IYXF2+XM&qf
z1{yNl24|ixK(I|2D5Nr7dgfNO<8-;>7d<Z|TyPO`-sGUIl41&}Sq#XX*7DGoAKI(z
z;jljbDcV=d*71n$_63q*Z8by=o#}e(?nB!Ys?N{KmoeOmD{WSBCajrEv|x9%Vawwn
zE4kpKRf_a!T<BgR%%v}Il5u*Z4odcffvYSWLGHd4^Gkbu92qt<5fgE<`X=0aUF3+k
zEV}JVM4+Z%)m<JaPjW*%`fPC!00y8tRS3F*Z~^Xwu?Gh)mxaE_Z;uv4dasY`QyNJK
z*QxL}^+k-CzDFA-S$cdQihMQ}o2gEYNu0|Xj|aei|1ZDeD?BTTtKJJFXC(-2d$>|J
z8_2(asQq<Nj|eJm@GzRkCFWb553c~t@x8Zue^y^8-S%iNwwD{cW#demKe`Q&Gz`{z
z@a}jXllpiqWQ2f`%j)d-fdMkGdS1tnZeuz}!{wxhbG-@y^ahAx&ms0~5gwYEFa@$q
z`Ec@v6Gp+eOVSdF#p0j06?(nqJ-l+`U6DqssZTTaP2*97#dm9`GbU+V%0m*K=_n+G
zoxHML7?^MHa;X?m=1i~~&F+3$XTK>02<gz6#xaq{1Au&T=5vo_yV-xT&7U6{b<O^V
z%M|j#YYiryq&FXio(zJZj?-Y@y{b2%+|@%1d2J_HX5u6Ei}`~9eUX%uS^!>$9wW9g
zBRM+q4wU%2I=q#pQ68ocG~DXPPZHPCZq_6@vw3*Jn||R*E}CG9t7~!V`nU%Fz%&0l
za4)58n`575)Uiszyuhp1L-igzx)@WYS6H~;LY5<)yUe<{qWsJ6?|$zvjKAlv-QdPs
z#wCxsENSBJXPs(ZCp*k%t0|$ih^e2wvmCFybK|EyO+j;rjx(PJZ0zrnCuD!F?hoM8
zX<)8%9peIF^pf?Kv=Gf~GxsAufAV6%Dzvbp3=(QMySQoO%pGw(Fw9m{lXcw7%R~TA
zrY@Hj;0+SR0?YThxT?eOh;{of0iX+x&7HXZ8IiLGe)NFrUS~!0QQI@+N}nXf!Ny{%
zVOaESO4?sScG|bmr~Wr7zP38bgpD-hqz3q8gq2}>g#HAoB8>#1#inm=s`yHJs|7TW
zcSeV}-UE`QV|+*7(pP4_Ljm@0JggNv&eBF<?z09}O7g#privFljhys55u5Nr;|4=1
zdCokjfUpIh+22p)=Q_P?6{h#?Y@8})mf(|MUYfzwDYwBU<GD}aM!kmp1KbR<RO=zh
zvpQD)gWR41Y-7o>qu7nB#60}dhp-N-<Vrt<2?=sz*Y*dmRGwzq+?=oX2u&va8&T#5
zf6c#dmy=Anu`NDw{P|V;AzbnjaJ7^k)>S6~J?uHUYTEKrgfq$nb0>#>3;yTx`L4Hd
zoYEU5vY|M(d<D}aJ(Cv*cAYwW#K$HuG~f^LpXr5M7M_pk<jhJzpl4r(8zV>rsP%u3
zc31KYY7+D(2(lUIgypHHTC_R>Bv;)1fDxb%IPu=~k{}wVN-brACcVq5NzkGm?Q7Cb
z{YQZ;SyqYFs!{a01FsA#!(8Vs+GDE+skjYw1@fJcDhSx`rQX6z6YupHmR2@WdvD7%
zN*Z2obZBM+5ha(RpKVmJh2P~V^;N6aee$oTs>0kyyrJB!+pljM$$$3z9mb>c8v>I~
zQ#XmB9DkNGZssdI26nQD8-*!vU%J_$RRs^9my7qE!NwbwW>nJ#NO5(E!Qx#6z}qSw
zO*VSX+2xckKf^J?Y2QCo+Y$VhYJ?|J)(}@z6O8ls*}W;O1H5cNwkLe)kNRfQ7kdc^
zd|aibz>4|!$)1i)3BU@J9v*)?gEPS{EP^kntth``ly={MDaO`Um+niful@M(N7=a`
z8PFs>(@O9+qL=VeGN#2jP~b7h0VLQr&C-A9=<t}9fXuT{iO}?A*Mg$z9^<)AdS5OW
zR|lOrMdR5_Te;nZR*m20yI?v`E*ei)bz1i!8Ler{o<|K$^PaO-0*3a@Y$@-}?=M38
z064ZonWNZt)u3&FUl=eG_3BslUhQi1@SsccWws*rNw~%u104}@xImUg@y@K*qE>X|
zLotd70)ai54WXV%nG!4Ypbt2gpiK5w{9*Ic;|Tl*r-M6Y3sEGS{Yi0e^WnTTs&DtI
zCSy7DhWQiJ`O_z<7h^7gN&?570)jJ|toof3S42|I9u`PC*jAA4s_jvCl_{B@zlY`J
zWRDoxBzxc`Gz&c6D7Na-q;2NlU!%oO<hY)|!<og95Ai<b(vZ-ust5W#Z(b%a7Whk7
z&CG&+-I752Z)c4dCa<SZ?rhcvs<PYlZtXF=re${jUA*E@aJwU%CNArtIntXuYud-Z
zpt#;XyB7Z=aM)@1BR+Z*I7aGEWVNTyhP>WSG9p30Yu_F9*@Q*G4Kr1s%(L=Jtd)4j
zSh#l1GEgaFf#Fka{sn>K@95iYTDp#WJ0_gn5J>h(8*o{V{hd79Iq%%?Ct+`n?N^$o
zvp)9XB1Zler#?x<|8Q@K<Tw&{WPZaC=N7qg@K3Y89jt8|O0?B1csdjpY|quX1s;t6
zOCKSSX(4y~Qp*&W!T3-w;E(LXV_gZLtXorM7JL>Roe34?yp?UY|Kz0aFhz(7xcTAX
zQb=1{JW1cPFu2a?_U|9+_FG(zJ`2m7c@U-f>e=)kv80)u@?m|VSr+9a<=cvhH4BLq
zB(O1d>nB!2@17sF+)(*M6au`BTCj9FNw!ewC(PxEH(j$cz~f9#j0v?u=6&5sDS3?J
zA5}*zt(;5>>1ufR6l<$!d-?xrte<5coBxfORPX|_(e1SA>eue*s;a6$`dKCUFGqGe
z;=M%8+LJx}fSYM&)%<Of0fCQHe8S}ONEu|%<rz8&>$ECA9A~X^7&9thY0f?g2BFeT
zMYr>EQpnMT;gwK%T5?@b@Lr?a`@UxkmI-)H7}TXqZ!C8vDPJQd04V<^^km=NwQVs&
zTRaPq4HTCv#c_za@{D;l`5;%{p`C=$Uamcvq8q4A(txH3Lx=SV`;s8_LDjc2c6LP(
zuuq!3Rp@yc`9W>T{`y?E8IF~u^;vAQv-ja20u@$ZAS(5Bk$P;AY$wSdfRR)A!t0^A
zH`lyhx~*6j`J7zBi(QX4=${Mx9w1!3$qTPo(=;vep@>`v`<g^qL1E*&4c;KsOJW)k
zE)fpH8@6h1fhc=1npfqi>2T_hA(wNpk>sNO+3ssHP@@v{@m`bY2Th-|)kRNi6D@de
zCNBXODGZ~QEH121jpHL3hcxkZK}{*%#7*1TNz@#dy5bANe>Q*giQ2sh8*C_%J5k-v
z_0SaHp4SVqNujbawACl5Lp5oM?oc)6g<9iiH|dYk+BoOzQRt7r<0t}oyK7cRY%K+a
zR`T_pu7+gX(g(X$aRzVYa%Wa{0$sBwmiHO}QJR@saVHo<z36HP+I_Og?_@Gn-wT9A
zok)XQSa#wl)W!b9x@~ru{aNQ1YW+&`lByRMT|ToeopKdH;Vuf|MMw1pJ+ZF9WQakN
zG7Q_zmKq&e2D8+L2p4fzPd(A6rZzz0OBMxmOfqcS*d`r$$WRi50R7z@n!)@KG={oj
zf>@l}sz9$O1JSwTQ<cZ#1{Na&lAZ?<A6!UT;hj4PUT@%=<y#36AJ*Z6+fHiVt?FBu
z4T^3wMqGNN>tO*l!9Bt--QVbh_VS0Hh)uuS#o=;ZqYxBem^oknvNhV9^I1T<A)p7%
zNcqhQM?au)88}k~e!qOvVRJ%@wam@tvBJYEFlO6>D_Zn;r8?k>7NxZJfR+IHjEXJs
zaDXA}GdTvl3ujD(I)kROJ@62`Ss&<0<O6lrzGRy!LR^Z%!mxc!A=!+APt`-%{ul5b
z)PjMP0pk9?X!vBDVidf+|I=-Xk3b2rY7Ha_6y^uBg!JGdk(sTX3=re{kd_bE>q=|}
zCC|T(9F0z<o!9Y`U&p)mlrc@*K>0?Kh#{Td+4K|iBYtyB8RZx+^r?|+QWb0M$$$i>
z0*6?ElU)g<8C?)T`dNj2zDQ$sQDcxkZW>>7?yanu=Yw;ioZs7k0#KT|XZ>#})~I?G
z;0}LFa3u$Lk7Yt;c;C)qOJLGQ^FU=v-l&e+TeQdIgRIp`aVPLYL2Fg5T4jQjJW@1~
z?|GSQmRQyR9aqF(CG_5Cj>aK?dS=r;lT^Hh>nJ!)BA75@C9Ll+*W*;I!0xJ`Xt(u{
z%R;V!wkw35PbLPqCML8ddOO*cG#g&Xs^eQ)YvS~=(XeO__CjrEEYA(5{j2}AR9Y#6
zWI^^MK0Kl6NlK5v#3E2+mWYX4yo8sG5713A)F;;fA8-D@beJceFtsj+=|6PX2kA`a
zLx8MFq<4UScg{O|xbDk**|16P8;{(Fg7m#7O}tES310R}SFVA)Jf$MtXfpQv9GV11
z9mkDO#nN>H`+bDL0hd`(?AII(5=2_Q-Uy1~4GVinr?%MaAFdknW^nzo0(VjQHD{hC
z#`u-lOQk#;Ev$#-dq+R!!sYDpvP;LkyrMvecYKKkKxwo$z?o2~fGSK&!*0C+YRPeT
zcu2tVck}Zs);$6G-P+RqV=jj5M#;2)<b1g5ZDStg?$w>*hKMnlgRFs=f*|d{tI|Vh
zJU|Adsczf0nQ^-HI22FM_r*(@RPu>1_ygXpZU(yY%NnRjvSjYrLT!=fCzmH`$g@54
zW5K;vJL@8PhiRX-8ti#)+&*Ou+UnD_ifQYlm)@^tY5^*#FCbI?C1Nqdo9JZMZBX8y
z!1Y1^D6PgYS6UN>sVZ3m!B?MMX#ilY)WSZ%EC*90b@ZEprv#3xnI?_AB!MjT;P(jo
zc!Y6<ua_V?Gg5o`*uJWnb!#X+=u&x?VF8oDaej#!)G<u%`Ta%#?)Gyl2b?JCVx0oP
zfTA$@0aC1pZHN27hpu);p8x4B4&HlGkIaP?gYEme*^+H}E>2R>f#o}bgU?5Qxpq7n
z_AY`{F9!IYTu^Apbxz?5F`9~L27`$-m$_W_|A)P=4vTtg+qDr9P!W(AQ3(|glxAp=
zlI~_u>6GqL0VSnF5Tv`iM(OUM5owsAJDi!bhQ0R}=e*~9?|%P0*Lz*_2XHaW;<wg%
z*7MxYecy|C1AyX70}?dBYc!gB79cuvu(iBm%q7l{s;zNztSzxrcUm)PxRGBG7!dz4
zCy1HCbpNIV^(%wnRuw^Vm~AmHrfOcfEoVK!<6vAo<3RQ@ip4OI!H`8K+|f$lp^+89
zsVs)lrDGbC*Z%6p^?T#uhj1(%jyHDh0-?OH-wHSUsD(91MmH-}sZ;3972==0CT*Ok
z6xL|yONp@)9%<tt&j=OmH++qt&GY2t(y|b?x_T~vY0YMxop{VX*=1?fxO-(FwFlx9
zWRp1Y6ymX+ex=`WeNwZHl~b>CR6fH2H6pf3c6Vi>JU33jxMDpr*Fk@`hnwB3YQO7r
z6V7@({c3?pn-+#A{m><vCkYXUP(zuGXc|LG!RpeYHPqEj3~i*u!L4lcZ@c?KpQ-EF
z7^+;Mh-%?S?+uB=O0pslc$;4Lawr;pD*8nA9IS)>+QGA&Bplg8R@6+ZbKw5)`Yf%V
zv7>!IbR}l)T%G=p3)<MMAGTY+ek^S@5r-BOPxy_9AQy@rX&9hrw5=tdBPFN6nl~HZ
z#gS0-i?6nSWX!Ovtq8-Iymp~(P{BhXwF0lOl(3eTL3GteJKfPe+|^y_4vheBdOc*L
zDiR#Wv(1T9k3D)ab1+)&dHvh^p=ITCFSU!oK?NL{3uPXwfM+mI=xPtiB=js959)g0
zk9y5xjDAkJys9?qHqAu;Y#-)P<aMH8U_EN>Bxra`!KOk@)*5_Rdv@AuP_-7rc}AFZ
ziuwl?WCtA$?rQrr)Qvi(SqcSUkTsf??CfyiJ{5NDKwbSXN|{D6%UfD_3%XYV(DP9-
zV!7*clxrtHzId*vS%%%5=n<!~G+62(*jG+TIyfB~JTZmP4cka}GA{cQbu2wS1u_H;
zs*iT14x`!o&1m8&YsVSO!xAxNV;F^ETRX?A?33m5!If^u&W0olmq4mOch#MasEo?G
zt~l;+wJr;S_8-FN_nypa4(yXdQ`GE^lqMY$O?FdgJU{M9uG6r+vxqbrA)GAmR4s!>
zdzh-r2TdTfdfDZKnntL&WCteM`*?;DK&GczS~OdR<~!LN;DQvwd*V6m;<Y{xn&svj
zS&j<Ad$=e04VTp(E=*dh^BrDF-ET?YwOf-IF{8nO6g@cljpNuWJh!}irjCELnX_I9
zL%;mhZ6b~LcK?(%4J&7;DIb$EY%VMrtpnn~wpa9efKhRQ;zw6&6sB03Htws%<h+?z
zl%-AUwdOmTTpj}3O~gW%*j{VAhF&0{wr#)96(Yxd5B7y&C(OR7j9@pW*B()4k@6b{
zq7>b}n&rvhz&O&U;gxtd*UGQ$R!z=?j;zqGj$hp#aXCjXj2f~-S3LGg7jk>Ka1;)(
z<0p<abhx7V9)XV}UQ<PynZjL2B=HTUnu7IJ(4sN!sCm>a$G2|4$(xNO7T8lztZEU^
zci^tk%Y>thhIJDe?Ajns_6`!Ut=_U+_|0(-RGAYSAC6;U$lbGd*KoP3$<+(RPiQNQ
zBTEeH4u858qwftDv<xDj63!HO0q=)gy%1z3;`jr`s>H?=y@x)pOALXQnN0|Ocbv2&
zAE7M{PBZL^_K~CIvRjg&=`Mf+>_?cI^c|Pwq_~AUx^%r_GNZ8s2Pxc>x;I$buBJ$0
zp_DCf$1iu}?y#eS5?196Di_gEk95!)k~Q1Uulz|l9=@M|oWY3%>bhknLCdu;lDA^|
z=%CPjEIOdj(aPo$L1CY-&|Z<KWh{tz+8I?=yE;X7nMG$GgKJ#85QaM&{E1uT6<uPe
zJ2C4|5$nnlUFU0*xa(rjVLl>PFN0ldbQ4X7NgEII=^ck;p*YO(IFI`rEQpU*&-=KG
zX|d5z<V(*<R=t>DE0Ilx*itw!!!VqjKepRwXtG2iaIK2(B@dEM9;Z5pxvuP0e1Ce!
z)KCjMNNIKfA%A@0P|QkiYyUCm(=j!Ktnysp1T@Rna1KRNKSCSwNEh#km1i?9ms5}w
z7Kbw1j!`~itDQ%IofBqOnB&*+@q+KT!Nd&ccn40vK|&^<QKfk?d!wSv5^IlEE?9@f
zZqUc*g`4^3()t8NE8JmL@Gmr&<}{FW^!H6^K4UH=Upk*_oFXEXKd`Rk%4OJjn~G%|
zqsO05v`+P{ehwrVEQ*bL-=tZp>GG@-FcoG9#?J$tLp}zD3POg%r72GriEz@_6_A>p
zCy9aWlJ&gbAJv8lex1DkS<&SJg;Zr0M)_14NIovg`r{skFz<yNV=1@%3Os4lc#&kN
zidSwqo@K38BfWzMNFWyj6t|s+3y6k>en)7bK?d@0$L4^XLW=2g7!6a#7yj&5wGJq<
z^oTOkcA489$v-Ttm*pZQjJ{u8pL%U9KL(E&wXQ^0zkQO*eFc8pV+|(!YUv+P)s6p-
zTy^oDcgUHj!nP{54LdzB42Vf76hfy1l1FX!mel*~%`vk!<qXwFPw&<c)e_J16)q<^
zHONEDqaWY25EQH{@cKPZ(?!6zeCOAE?QXC4YEz9J^qbs-Wbzf=ga)P9aj>qUANV{+
z??K7(Tb!5ol|x+!nRHIK0PpSR>lne+twq8hT8R*yHTzP!r)~#p$zE8DP2w#|R>OeV
z<bIxbdV$8XnvwW??t2XV%Q44p&_9GyJL`^wlB$&M)+NYC9|KCpV>9eeu*Ev=1(xU6
zFe!hc??g>>LZFWAX|)rAF{TcUCmrQ*>|Ul&6tnW+E0H)As)26?56<8n1wU`(cx#8F
zK^EMs>k%SE#=Lx}!Eewk|DqMxo^13E{;EQVA4~<TG73^c5$jlfT6a?`w)Fx?w$kp-
zD&4NECHb(|1o5Mm!nqb)8g8O=Nu|#qVCutHgvxMf+LDSnagLhX^bYF8Y#*dCiH}l;
z&<3?oLjtLlMSuuKAA5cUNl>^%F<P_m7;ubb(sygz?}ZtBJ0Le-CW6GE_sI%8sGayN
zv1)vW?=BN49T)vMk>0}30CW)$_x2K4Pv<VFox3n0vN>LwiLxNObIek+l2W4X44M=Z
zy_HnR8ISXed`X~_VY~&9W^p-HTz`6NSXo}G4Lg;bdPmQLM@ZIt10|hW`O`J`5gb%9
zQ+|gKU0OK<8+Y#E3dbCEJwrpj`guhO2<ZykvSOUUWIDs)4p+$hd^CFw8L9V0+!!ch
zzr!*6tOI=)bOAD5SbuP>H3ULMfl?7l0&<OCQ+8rP>PQVGEK)0Gq>sOag~K*SZOO*I
zJwlz$LIsa>SSa2r9r`Hl%H-J}dnf!DLM`a-yp#s2JYUIFUAIz((LL>sAWzJ0(qviq
z`V5s-)qRSzpOax50lgEp410%wa8VFn&>%xu)N$?;RVB7^!YsEWRd%CHdG;zyG-pZV
z94@2Nyn1^zZj^H%({{ZbXa%2*AQiAXP^IG%mwd<gO^*FmD(k(>z>%CJC_?kL-i|t&
znR}wzAy#%wD2G?_wo%rQ9~y1#6}ZDRiv8G0vd#YEzCZP?m8*PbC*;eUj|SNXLh~yq
zRW$o<Q7X&!<*7Z~lb>v!T#g;#(lkOKpQD+Ek+R&=9N(hk`cPVxM4(u!$a<n}$tEXF
zu*R>np|0mbAN8T!?%~9YFXh@KHrF0$aC!?_l!2|7Hqz#Mkct&9e0tY$<F?UXj9hvg
zTlp3Nu*JnCVlE7Cu@{LiLEi<Sm|GK-)^z}0r1qoHm!M1Ur>1e@;+*M2Rg7r@0M9PR
z=sIeS^SI8vCl<WwNI^!lxXxGQs7rD^`tddUoYDo`t|ygaWdwXBj<7idS1uvo%tErB
zmA)NNtkY=|bpJ+gSjL)ZFkPWLsecl*>Z>?*R&h7uC%BBnZ9heS!x#?qf#ym2zVdvh
z5MoB@ByRb6UFv&omYmj*?q^Y&U+dh*@Y09|2Mp?1%)bg^pHxnj<n4Ng_Te&FVbNP#
zP?PdU!z275h1Q)MuQ=6)bZeFOWh4f2`fS%&LxT7ED_rRFaVm5{sr0!iZ~&y#e%md2
zVm^gY&wT&V(krRIBQt?||NI`Mc2`$XV;F(EhVEuvX~pThyWjAxp)Q+j1@4h)2oMM{
zC_j0L+)NgE-&sMcT6k@pW`6|vCbXRODB>7L5;3$6ksCMU6d0Z{JK^n~lwCOC`1$Mp
z`>qUK=Q&*Sx(JAF()LDmD^Wsp4w_w?D9Xi^LvwTU_i(^__LVxLY2V{nj8x9Eaz^K7
z81uOa5bPJmH^m^3AGoqTEr;Qr>&PS;CK{zN_cyo&Vl&?giab=(XmeEBw2rCPe(h!r
zlLi3bUlYnk^IBl#+yw*{t0$^GPC9<qVR8<TA74eSP3biWc*zz5t^JQ63icxh90GY(
zQQ^A5q`%%?JNq`8^#b&DH>j-dIz07|7%KF)65==lTZd6EG9(2d1N|l#A{Vy>QhxQI
z2>$A2JvVV156YU6u%_^?8O*_<3eRL5Azx4Y^3nEu#1CmJfI;=l1(C%dmL9?aAkM#h
z=$q_7H^%#WI9>8dwm5&G2XZe`TFCIW7>)Yqh~%SqM*!|4YRAG-Md#@IKHiizdf67>
z@Oc534&m)C_Z1$N-?)jJe~+tKM6Im43GbEe`h%0lORtRA(L%{%<fbvI^!J*v1nKH;
zhf!Viq%?~JNhcCodOFli$TUOX2J&M>#{qn&W^YiMDVm8lJvC%ld;K!3rI`nC3piZB
z$tyu^^I{kitT*R>qMZ-=o>0fPC0g~C9_1&-;3DZ*EI>nGJ9o1^878su)?{dz384~X
zW(b*1CuY_Go-RW*Vx8*g6VlxP%I@(n(`zUp8nsYdkfJaA^d1a+h)#cBM({`$X-y`u
zTa$t^11<l=VWHsC;GnmvNsGopB>B-t9bV}Zh$jC~NnfcY>`)ShL%|~n01S{~6b<%B
zqG-nR)OU7h9h-KsWa${lPSOL=mCfyeolmpjE^AYOLdM!j!{>C3^G{y4&jWyQlHQ*M
zSrQJ%4!va0SPc5Fx=`QC5T(@*W)JP;<K)(WFsH-b6iOM>K#glTzN@xdHVT-(=mC<~
z0~GDdtRuvfgxaI~nczoVl*tuAnKB}H_sMP&HsPAKsYE8-$GOQ6Pg#&22A04uYA*xN
z$OdFF-lKg$sxT@ogt&2sN+WSw3DSYA&5E#o<?2EWDOt*ewbei$aaFmZM}P0PD33rN
znScF&LuNnw3NIX)O@5L~6%Xm&ILb-QHqmB34mSpMZ(q2BVk`(sRmsz4@4jGN>=dh*
z@l>2}Sx<)g6lTUQRDf-@0IHc+KBDz4!goG3<h5_=E?M;B#VvX?F(fSfEXne?+FTJ-
z1&LdmH#U_J*UERzcV%7g1E)Iz>jlmbu^1eYN)s&;%^BnAcM!^~Ebj?0K9SJ^-9Y4x
zax9?0(pM=Z+{UFtDijVcI{-V44^;6IpPWMmx>(@}0Zl+u($9fEtlsB1VVWrkQ*rZG
zF(uAf(&)LqFF(%_t0t|5jU;|g5CfP84^8Naily=oxNgB<X}zY9TNq`uwXQn(9umO>
zUFz%kD!>;NH38~lrys}5nT=`_ymSMwEi|#i%9G)vm!@xt{@BX@8{OwJ?Nj641pVPG
z#Z@A^F&nk{Bu|R8A&#74Yttvo(h?&M-t#f?03xtrbmK&t#l~O2EB`9sb_HtqW@K^N
zX{MY=k4wn<>F0f1GR=wKh~qMua9jeEuQ~;)Y3VzNZHG~5l=0WbXj^*|N&mtuqL(Yy
zN>~rb2OG9NU0Q&V^?$yUH*Y@I5^Sb&7vc^cUm}Y?G;{m(a1&Pe;DpPq1nm|hLlI%T
zU1JV=wCX^*Qs{Z~Idk2sUj(n((JfZa|6n~(ocWPzL;EaKP)y6KZz@n6Y#+~Z#@HWc
zs|yi}Us}7`0Eugfo-Kn`2xX~_I=yVu$eot<@|YC_yo(enhhHLi8Av?z+tjzsF)5O^
zfYMg4cdTytf?9UI&<mYAWQLzKj1_3EuAGnznV{LyO3L%%05()JaFv_9S3pd7b^)(L
z2ERad)xON5^+2CrEI&|Pw$Hs*YrhRc;-Q=1Snl+3hel`yK>qZbQ$VUhm8A>q<PL-1
z4TO|WIXEX;B689lj;3w=Ar?AbI-1!P4CL0!EZw5(Q@H-<mHev2GP|}>|HRq{9CBwt
zSvUg6JdVKPpTmXd?>o}JfUu|tW=BT3?!H_ipY+qmbG4={hFs72hr@^Ti)(~e%<^PI
zZg1@=%Znwt!Sw2hJVc_L#zFiGgs!{5zApAeY~ezE^riEKNtnwG$TxvYPr!Nma0Um`
z?<+|Tb<M9RJZ6RAxe&@1Af5ugM3T4SMaG+;p?9x{<EloRx@E)(IUldX7N{#<wjk`e
z#`NF2l6tTT?YpG#yJW1(A^`>qM{a=AZl>F?7L@#ff(=zC$XxaoR$GlbP&#G)3%KQ@
zZ~^dFYRe7j%6Z2z>yw$*EZTl%<sQ$h_RCu<yikP*H%-eRy|us%a<g)zan9N;@yrWd
zCC;Em(CZO-KBNO$jw!$0>&i!v6f2K{HOU;f#_We=X&=6NQkyX(lVQz7oz5Wb`T>*{
zYSb5U+N98`mQ^P?xoo<>dYc?8CKmQ9C-BZS=&n0-m*;YKN2<R{t~X#HC~OkM0vrwg
zRHLlW<6UZ~;6#*q!9ZI%>cyl^SH?*p?JDwiWQ_@xcGI!#D59LDWB`Yo-D+)-&2RxQ
zo48?nO)-_@MqbucK!C&5Bt@6Z_6k+L|AmSy`gz+c^}O<j7%g<sYa6Y8oT$_o6se?g
z$k&PkDksG64JWKIFgl4oarpG!XSM%Zs%F`WKacg}$-3%YK!_@|2$cmSn7#E$Zf0AJ
zx0)R>rA7-M!))rVYlCFGt{R+W>98k0<!(=MOEJzByz6B%#vf&G;D!?H?~Md(BEEQ<
zcVZ(6cXDdORvmg6bMwM$lXKeECn`&qWXk8vTxrEao;rot>`QE1qJLgObj^@E9sWcc
z5vMH2tBFjWc@hE55*&R&jq3)|>pK+bx>bKK3VXz4;~qRQfGBYPS?7`gC{k+6dx+1!
zStmv&-1iYN85I>3T6@FC7^*{V64;kmGprz}zi~>%?6<yrdQO|+74W4(pkLQ6x1g_a
zducVj4l0ym{Ruo45f5Mb>>1Zd%Zv>1!mi~7IdTjdoPq`nGO;&EU+8^^8%XVwY@vFp
z@2v^AFa)V30Au8MbwRH$?}@0dOpJop9-*6V7qr-L!CE3<Ub!Zp8F0V3R>=n|$TAsX
z<Xhov6G3r2Hm}pH8LRrxr<&}5$`qjVaZfo-iB)uR9B_VfS28y`p6v@36;e-W3S^wR
z{stWSXP|0{4}8?-c8Gk_%k!q}N?42Uj6sBK5KrDXEgK?UQ*7;#UX(|GJpcBGaiBZ3
zjAiy}9%yVYGf*NTgplb}eDy?9;E)^xsdrDo#=a&goN>T4B$!}SK4$h_XWp_xU}cZl
zr9*H2H<K#3wlC(}b5pmiAogC#RhUxWgZu-jz%8);O1S(^O!iSJh@ZXR7P|)v?h1L<
zRnN9kbpr^^!l*BvHiv5Le#4%;rDbr~!2KTJg<@`C@M5UO!F(6|Mr?)UB&AIPA?ZYH
z@QJm7-j{pOuNrlU+83(@-z%G&FW-&p>qVpHJuNY=<1ovpU#B#<LcXQfL+gPA*I%ID
z7C^yw=+0CK-rsFSgo8b5H!5<E`*vABKxC~ChUP|N1rElhb!<E6#r<;hGDe50?Tp!-
ze(KCDR~p7a9n4e;G)s*DPOaTD$!<B$xdl*PGp=!ic^$)16uQUSlO+9{K9C(N*iAc(
zF23~tepd8d1mWw%GAs73Wtf46SLwlo`7+B_J9`uI7UhWfw>CL|MfYr=n4d}u4?JTl
zB|P*)#xH@JL2=+{u66jXJK~`5)%*cCmWp1JC@f!yi)OIk=F9J#-%7XN6ZK<lNMMy!
zbqE+FGuO|MiF?g8^|q;d%fL)~NRGM>w9!ACVdauAVPQ8fb!O}W0pu55$FK@4L0{(1
z;Z2aV65EX%#!ynATWC7#HcF5=OA)+wgR_elBKAXT_1uaKFas@B-U9^;t^;D^zLtCT
zWH=qDY=rkvsctvOH)ziP5~DRLu3DFjQyOUJXs~%#f-5yO9%c^zxW<=*Sf4CsCm>@9
zLJ_)53cfkHUJ7EX#4~kLbOQS_$v812v#dkUU5cxYkNRs+;|nUqc*+^nCTKug>@{)e
z8Z_(RI$6FQwGBR=V-J;iDdSqXLy}%eCq9dmDk%ai%2Wr7_8W=!&er62<oLt6qsSs5
z9^aCb$XayFi?mf8`)&FZ<2Et>$;!=pM0%$A1@g68lZOOlx{3WWAiq<$P?zxiOz5Dc
zjK@K-*k6Dk8Ukv)2fC`--L9wt<4LR_W^0zVTh}RFC?#-hO7L^u{t3);k+j&(I-Ez#
zLs`g75aV2Ir)|wTisd=#f8Vn|stXffyMHEV0M9ARTV?G(qUk$tqH_3lE0wu=#Y$2T
z|BZHa(cn2~b35bEW0OLa&c~Q*gFnhA!R3}J4ZZ~A6RQ^&NJbwe_>s?B-)2Cy;<T$v
z_g%ieZN%41CBY?5!m0P~h2R%^y5^H7${;~LW9F(CSbrB}5)#k!ue{EZL0M|FwyNMQ
z&WdW_BH%-Pho=K~Rb{(#-?K4Vh8=XQa8&=2jpukQM{9wtut1cUKw}M3CJtcXHIs%2
z7MnQt#0VW}UJt&XBlI11zLUt*wNT%>mRI*(cr8F*Nj6qE&G<6f3lZ#RCUJloWi|Bb
zuh>j<LMZ2woB)w+v6|Crcz@rm6XSh_P=D7us5cXsn~=j3ih_s}7tl&>KB4?txM?2+
zxTVR`Oo~Pn#l`qowrYET6sH^KJi|vbWhxGAe~3=oWBKMKC7?W^mP^2~Sx~#t=|G#}
z=<oomq?O1tz~<BPxI*&1yuki&O265%70b|q7S|_~^$ottE$DVLN#M@HALRo$R6@L6
zZA)?I91hkI@Ue!YxmG4ww@V(c4I`yv1rEn(V|neCUc;I|$$raT`%SQZu$6gLxrNYf
zH+%IEdIpP8-?Pm+jW-7<rMu4}j6oMfak1GP2+!}LrU2-Sx{i%lxe2bjey?92P@f!8
zWO=VR%e3#MmJdq{ffNI)bupI`UzOZ}E0$Aw#dzeMHXfT)n+GLQuQimKm(6_dQ9}Ov
zx+c*S%z*92fUtu?B*mGSjPkR0kiEvRTsLYf9ZFtbhrh3xk=vo#IcW{A&!T3dd_B&g
zRAcTi#Y@DIw$aLZ^t`R0|7O#u&_TS%&l;9~Y~yZ&z-B1yU6cCwX&3%g4th{7`m!F?
z>#Y^HeI<O|rSLUO%^#Y-K67GHUIDKUS*?jwBZqdMptiTCJXfZoh6mpA5y7;)B8YC`
znhar>&d4|WC-{)J-_IXRgdcaA1ns{JlmxplKtx#FJ=p%60yZyW(E>|~OzQ6Ze1X*5
zA<ql25{wvS$`cK0Cs3M?^9!QoqIZ8Pjx49j03^C0ToJDw&(?R;tBH_1^1AlwuUGyL
z*QV}!txZLJwE3e-h=<}>O((40s53w&;RX*lmPJ$FOqe*gQXE5DqF4^WS@-Gsmj>EX
z@TeP<BEy#p058H`Gu@6+dEmCMz`?Rm=+3U6VqARjBc1Y#Ve=Y|Pfwdk<W&M&UCz3q
z;==HtL+H{7r-;YiZoGNHW7}8|$6uqnwcbK(p6M3+rkym=TmG8qpiqO!?vpOdTVw#4
zCQd$eBiKIHx#R2TEDCVTS^5X#_5C4E?W)Z%%uWm!;Y_QVrQk~P`_7D;m1xBFy^)y#
z;~*BQhnx5iUJXdpsE)GW!IoSXvMS*yQ?AWC?;Dn5Dt=btzWEg>G%g=@YkkwoJZrv`
zU=k?VegUphr#4+9*nv#<&KDX>p_S3?s329so^fOUgP;#A>qVkK*JLF$dg-JD{&sZ+
z4oAGmjmi0jj~;q6y+8}CBoex+XnEa1E#nPymU0lG`A~ROP7=^)*+-^(D#S2{S^>8N
zD9-jf<*6pO!OON6TD`^VN!Z`UBP+NPS55c=nj4ukcn=sitPABTr?_nVrWDDoQTvwh
z!BTZ#CJjT1jGL35_^R4hW;JHh2C)l6BMLoLlMU0$U4MP|5n1lleb>vz<R>$J$D(>M
zy1GnRh<ZTrQR0<a^Wm`OGA~85<Ktq&H_LZHp0q$wiRHus+$gFu{LLBc1CRimUE>kO
z*+v#leF%7JLCLzc2V!#PQYAvI`b%a&YJC}EM3Qx%J@mdJ{O5Z4tHLbywcwh}1lyO2
znwwSgMC2?S?Pg;K>yyhTjia9HjN3ZPlZEE*fFv{Pkq{B?FKZ}WdX3%_ua0+JUG}50
zy0fZDkM4Gb;P9Om^`AEz1c6v5`~8I2;*KjYQOY!aa%J`mq+q|$LhFvW<l?YZKB~tw
zD2#ZXD96yX1ETjNSHV!<k#B5>oPwF+wB=+$e$UY8Wy;EDs~bg$PIAvEpav${(zAC!
z@My+Qn+z+)Bx3x9ICv-!$TXu9Mf_b82fSj5qlyA~>2)%~>NRR)ro0!h!y%1GVmFrC
zj@(pwxZseZN`!+^D4*mB4h5?+KYe%~c<G4hkM%Y^@3BvHP#t2w%nsPWWorIh)BIZb
z74s^!S_!EyP0K5em1mn-b~e4Re7or5$r%h80>8m3%RU%9U7J~wgBaj_c}a`hNO!n*
z9fg;aSpdjM^pZShWrIloj$9DnlxU8liveLa&+o0K^wQ<Ts|m_OeTMdVetMv#PBuI|
zGV!=MJh78M#1MMDvQ^ee&itJct<@8aqI)ZVb|SJ|&A<bU@7Q%G05g$hMdeCEp6AN;
z%fw9?e3CPro!kXAArj?RF1nrRNydq)Y4CWt$6#~TjSYw(KNq=!YM`BR+K(@<xvUx|
z5G>%hdL4%yYu+phDg1^xj={P}91WHnu8p&s*|`SK0R9`h?g-dV$T<c*Oc`i^DM%D4
z1&&S|oMLeu7*hK<NFX&?qj?ItzT4_Dg=n%O$(HNVzWTnM)EFE8rA)+6$GHi(+j^98
zZoWWM`ZwC@$RV>N<DdIQuYQU=x8&m#Awy5S4tx@TOm-O2d}ue0LtXTN5|-)Zn{tF&
z*S-(zr#U^)tbxp~|3H5b!)I?&L42-G&5f^MgwSOI(cv8L#hZ`8nW2lx6*Qpr%!XT)
zoJ^Ox<LE6$82UAzbUq;<XSqJ#d_bNLT->Swx%u|WM|1Gp1e~{i*=pMUhyELwvycm2
z{A0R?FJ;Il>FseE<9d34Mf~<tq#Ixw6Y-%oP|WK(BLO6_A$rP|)yGR;rr-Yj6r)KA
zACxYiL2o#TC2kspC&{U16X`CJC1zLvh(;_?e_e9<SSwfO1V@a&_c`YK$fSjCnla~`
zLGpx#rD8B52HUGaiL(BwXmV}$F=zUluN?bKO4bHt<^yA7J=)C0(5#~0XG|3hoeKk!
z{(}tr%Bd=5U!uD<L2$kud?0se<KlHh3js@2vd(R{-#9~$lqWQ80CAm`ZXM!ksHl&a
z$fYe|&aUw4cy7QGYpe0O=6+z)b3X(83&5pQ_)-}p+wxVWu~mKAzDzkZJIRwTLM^8M
z$X2VKr?P{u`=o?0pRF-wX+H-3JAJ`n9X((F9;nIlUfVQaPRvmjzjmdeGmlx%7+m}G
z3R%XQkcc`p@(H0{z(x!Zs|IcJc~>#Zz7zS0`Ngsm(D91gZMYlIq~2u==%YS*ff-2_
zSk&z(<glt~v7^=G<`&wQa<XESIE|?%`I|@d13sr%A;*oE&Afls;aA^L`rZq^@Sl{i
zw+L1BxYQ76XOygK<Am&VmY0_uAhLDoKN;M$>+N!!gn0|Gd|UjSyR;X3iRe&A^7!TO
z#~%oZQWVetkAEDB3CpHXZgVgOWB`uWCLC-zYK(B4Uk*;Y#((tdy_k(btmQ69++#UM
zG*VX*?jG$8$y>UzXUNM($@WPtPv`(3*a#<n0&@C(QE~Ch?5uTL@4^b-^(=5RoOYEi
z@AZMq%*@~j^{2pDI`BJ=5wpJw;MvEWwwZ~JY&du_SSI4jp!_poEwuYDAH0A$m7%_r
ztMo+eS7G8AZzB0EN%ciProOKR%t7f7EP}TYBCpV?oB?`Gr0P1w8vCJ3Dmg@qEa%VU
zvCMY??_UupegMB`RD?!eFYAk-$4P9cv8T>3hX0rM#co_3p{DDgN;75BV4Ps%hXWjf
zgYs@cMweW4bD_IHaDb~Q1n*jj)u8$$kAhFFEet+faBHsIEhpBWJbcCDRi|_czwe-~
zw-pEIa@{e5?H`GZrmAfI6!{l4oS2>IJFnb#WuT2wDKl*LnO4iQOtHd0ZHv~n(lqGm
zzZ0hB_1h)~ox7}(%UZMYfHpM<RiL|Y*)0nZ6SoPMHz`lZ&&_3L+m9XBe^1!hPgr`(
zUvqbCtT+rUN^R5w`VmYFEHq7;iBT~avDq5?)z{5o-`m@_#0Bqt>djp@$XnP2Rw5HP
zr{WIe53PWc1C+5PljTI&r@1qB|2<+KUB~ejASX{%jAVAr@X1#@+<3B%j8oKsE56Ka
zu|c#CFD->jvpk#QSbRU>U_O9U?<~vZ6t?a#?);*ha!{)xrsj1xVa{p?kY(*JWwMf(
z_N&@Uz((vDt`FZmX<bi>ENfPHQ}?^@m<Fhr1)V#}^}bB>w5;+9(ir79T9xPK%C<~a
zBQQAeIF#?upx6#|%NR&cZqBlvLHokV55SSmI5$i82hwkT0fcf6rx^$JlsgUP@%(8s
zRIBFBt6mIplRYLgI9IhRMnJ=36s4*rMYq6JSiPiW2h{twAx?N^%`VCL<3;MMLpi|a
z$7Rr9cl>u>*OFIQ-ES!7Xud0LkqFh5@~ZYa`;Q0Hqo^%Ihh>CGHtSf~ynqxGKJyS3
zZ+hd|Z{&WZ#ds$_Hfg!X`}{?n7Z@DAsnL0<pn8dNppP~&Kul?gJHhkd78+>Xe1ZOp
z5La6JhLii-Kux+^Ud!+|1UxIASmc%PKe)v_tkz-FyfqsKX&B~R1KBQ7>9(QF0wuEW
zZig~9>*B=2q{xEzx3d3Gi5BC(fSS<DB&G#X2)d`O{ED42?K06dO7b|c*}k?{2<Tln
z0O8AB0oT~LQlt26Yk5ur@|<1aJow4B?~>O>^)f}=Qn3N1Ni1~}xOly@tJqq7bNP7E
zg6;z$#pOe)l4$I*=Z;iIg%@<z!tLiP5z$->F@?_btNYrWAAnv#?e>KN$YUVwZeLAu
zcY6}^u*O7vwqo+J-MFC4T-geYULfM8es`!>o|bMNyu7?eEY}T?SwE?HF>48MW+OiF
z8|;E>2+bX!1`Ps;zT(hehq=dw)NBS>JnpJau~7pv`T1``Ok5tHh6WLt7UnK%3G}Ru
z+jRqn(8qEXb9Y|d%s8$zooub!o0A^t4KLjLfXgXvkQBKxgrw^Y?v~*VX+bixwE+bT
znCR+F=FzVsLtA_fakxLtIk(e7ziosRXZ}~vbvhmhy%Kl43m?`H+wY?^w`T_}gM#d+
zir>Ik;7(kpVnhtRF82DQORNi!Pt)n-^XbkOmfCH;77E{8#~PqVa|{z#tgVaf@QijA
zq*g;kQ|!(2rdPl`a|6zGP)NyNR!;w&vp{*%2PjJ0V<fG4v<)>_Mi(RDv*hBwi3%`x
zoA==bd3O#LyyKw`QdTi`M}u!c)9yp{#=^^O%(-*I=cHbUs7#pa))}Nd3hq2cjTS9;
zbmhTl4$W6X%`Zg7bg|TSC)|T6pzq1)dVVSi=9;3YWbz^zoyjRk=V_g`1auK@nnX@q
zMPm5;S5XrYJGYLPs2KVQvxd^r9@DAag7naQvnE%dCs{_+)TknXecDr9_q(*>22S=x
zu%^yOA)b%Z;WmWxqNaLLC>lb|Y@z_=*Fb;n{eA8x;1?aR4SusGFgJ4ngr#ou7A;W>
z*N%hjSgslL1t)qEeEPa1%gKY>;X&<$cbV-gp}rm5A23{L8z<!M7|N26m0wMzU^3O*
zFHD5ED+75lT2{~DTY&iKfFFVGOYBl_{@VAPgCO&kuZo{l*iKXI6g9@j9KUpGCBA&L
z971z{V43@cxH|=QAp&4cVa7P{%I212=-bq=T!fI|#dX-DFTf09nC6Unove4s9mp}{
z?+OfE5B(g5Us%6UqY+yF)Gytpb_d)QEIUC1Om=Cz$$Iq^ou46fGH5K1I=)^j$&UBx
zD<5UUdaS~6(6Fym%rq8f)g*Co*M;GtQ3q@t>C}PuYpgnAVGOr2K{*nMb7V4Qp~gv`
z0J~aPSfb2qvLaveUK1Z5t?5_?qQoWFG-Za$;ZADsAo;Aky6b8xWDu1$1mvS8?7Dv6
zdLyu)M{hBuG&CH^62|8W5}P=*VZ3i4`~<qzT^oL~Gr<j;=oqE9L3^C;aYA!K<WS#1
zW&<)?5`}e;qoDFVGyJw=-nF=rMJqvGH{GP%UsKbzG_<OkJ-PA)+8S&z#PU4ln7EH8
zJ$2-wrB>y$8J7OXp7cie=zYi0m70{`M_t3>JNAI6Li!1UOA-S`zN7S&yvW*Sr2}n;
zWVy$0S^(`8Im(so{jJ6Ph@0l}F^>B{P@!{>9*MNl)itM}un<YGx`^C-XefU8L~$y|
ze~#R}K(jY8L^bRt0E+^=EByCquOzA!N!}%xfRhDTs?>b8aUN$J!Xz-A)b4aLsT&~6
zPn7XdJ$NjvQrrU=xh0tTL#-!6eyW4WfH)%_UYQJGDk@nYHCkHt)vKQn5=T2ya1aWY
zh`o;9SHspG9@M<n7nC$h<gW)!cbr+kXT<^K4>)Q%QaZUo8OpF*3o_dLH!3;0y6Na#
zc9E?5lSpN`;#42{A*M<XKnsb*o=$IemjT|$`z&auQaWbbysthl!s4zmYfRBIv*nS;
zjO9YFV}kZ!IIg{1VBW#~KXbHmmvV5ngizY&XJzSkwg<_yrS=<B<AmCf==%uny^Nu>
z%dCv%bIBGpF*~z<A514R^5fWzLrj67@^<QQD^R_>$t0~^8<g(J(=1)%;0UuUyNC5e
zpNtp22#t8$p5vjb%E&5^;R2+XQ9GME%}*z6Ru=Yi-;c_BohaLNKWmF6d!p6sGlr9#
z^<TY;>sl@;4ah-$yj|+nY+=ny3)0K3_e9>VS`_!l?xSy^ooqhw>K<r|1x?>NGg~b@
zhj6ILx4Nt$`fRvj2l{fFk~H=8((*U540>bP>~_)6M^VgYeR-tFcc^<gF$C7cjhS%l
zPI(_yGQ@?FdYEW%7jl{vN2_dHovwj3`@+c$yiA^*M8V}&wM`!*4o0^Kzrod<(i0F9
zn5fPeer}7gUZb_KwiMAo)C+R@W?)WN^!&zK*O3-xjVv0X_VBH$mygo<9(qYbh9c7U
z3iD+oc#L}=#_d@gtc@=<N62x63O>+!azOkg<G0l9w!1?ImCypC0W+#s&oFC@M6l2o
zM6lK%>SGe1V~12L51Td&Z=<9rAZz|K_Gx=tIoBp;%)YF`6}{|$foME{{b`*9Uz`z3
zLcFPpw`_RV_ly_h#p9njEhKSGo9%x}L8JtSsRc%pP9%Nd%q7kL+3-^f4bB}+i5$CL
z-Zp@y)<oub*-wZ&d=}6Vzc&e}JD`h)v2omBYVP&$jiVnDBO4Lw_zetM2<$-i`+B*6
z)nX3UUi21@z`3AB)bH!?oQ0GH`ec}PJ-~{M0%W?j08>KqVQ#N3^b4Y<z=&(c-EIgV
z9d%OCgmB<P1e~j0;Ji{PA@_~_?3+=|y!qy%1qU-Aap~4F&sXIVe29xu91Itr@lj^W
zU)W?vK_~o;R)vLW%|~|C7*+N1R1&T<Pp%A9#u-)}f8nf6eP%2-q<9f&VBg%E$QK9b
z##D)BM(I&%X4mdiiQ}R~aJ)AH2^^E?22g+z700!6{&}XVTCrAr57(p$<XK*^-V~_h
ztqUbi?<i+B&~inZR{R{eIl4dNipp2tB;KJ!aVoQ%HRqCvU}k)AbgpQhvRsEUNr<jF
zP8RXr@Y)ox_+&X~L1oo|X)LOU%eIDF{)}-S?l2GL9C-@Fs`29h?!g&+lJO-OTmXCY
zCMBZPa(yZs9->kaKT%CDzcwYMg#)Z{@X-!1jbox=eg5aSf8z9i0ZV6l_m80dzkR7;
z1RxGAqm>SvVZuNBjsMF}g#`hJWpQ52<gE1mzx>ia`h#6(hI-cpDE{tH+i^CB?sZrH
z5+MDfKmL#Y2mLslp<dj#hj0GwP>+FGOH?TNx4*h<|LlLsd}gS}zGr|#Y5tg>e{MqV
z(SUMcXZr!i*=G6Y-|AzCGt@Km`x^9jhq^!u_NSoUSuzSX(?9)@Mcp_<z3%>qXMcC7
z?qI=nwgvY8qpA9TJ*((5Lp=&twm-9ze|o5&Ljj-e*!d=orufgU8Qeg=vw>$b$|fq`
z`uE=V|HEcH!mcIaC+iN?V@K;nWbP|U$*3ydKuV=Vj(m2r9`rLK6v>W$<e{ba_$J+e
zPjqbgi3;pgB_&;}^npQ@RF0RJbbtKotQ5Lir1`g=?_fQ=;N=H;rwJat0z*C3g@?7_
zZXmyzLY4ZLA6TOgti#UXfBv6dDiYq;V2JJ4X1DI0|JyGU%JYowoP+{Q41fQ<_k567
z6+i~0CnI}-PWW%XWNC$c3p2hMxBLt5rBi@_ajUikW=nZrlKi``WYbGoTL_N5yx710
zUhhjexN#$$pK<=rQ}ch$9Y^pv(uR1yucQ(G)3pAd@BJ+70LHBfVa@oLCM)A+a-;fC
zf9dPLy-2{i$_fGF&Y5o+{#PcepW}kckr}0{n#|u`B;ex^xN$qw#m4_DlNB!xHixWe
zU%<b#PM~1iIU(fA|I%a);WmfqisrL_Wu5$cQ~!HY{}cPv=ii(9Z?BVoZ|c7r!~cC!
z|J}p--zW9oKCJ(JQvcly{J*B({ayHV1LI3t*C_HLZ(J(Sd91ztAwMJHX)x@*+x<-W
zp@6g}Wbq4OdvC{%7RU@;o~y<){QmG=L43@t4lx5yWlvj*{%gjPuUrV6tU&0jd@ZjO
zrwQ2@m!csKb=)TlgzaIOw3+4fYUmOf9G~QeUvw!iYotjFcUOE%P-9EM{y{kR{Atvv
zP?4R8%7gkpgG|30eo)*=!9n&)&1_y@Ifi>WrT~Jr9@fl6v(|X5<x8G8+9RWfohDYW
zoOF}ec{D7ME;k*sI)9?&i3$IQ`_B7Daw9qGMcnb2_3k@W;yPo6Z!B4g_ErD-_K}wZ
z9qI&?=VT@Vwlo^m1DZ94{A6w%n2C6%jQRU#>rF=;MNvM9sA1Erd!E+Vo-&@3<t2Pd
z%}4#8pEdmA+%?#W*Q}aJQiz<EtND6lzL)q6p}&+)aJY9`twdGuH4a&Y8^neW3du+p
zCA{T$w26ZKD<@9ydcQTsNVF54<$J=c4~sStU1`?ha7zJeJ#22L#!8bwZdsw6vNp^b
zEfl`$2EP=I!j@A@DVU>Gj*yGwtY75+L4JH0m-C<dP>Xw>;D#cl;;r<d@)X~6*glfE
zAlu}4)6K3VbM965$@A@8(z(T_Qb8tBbb)xU%vzf8It#9a7gGt|8mU8oS*~YauDbK$
zua8i8?9*a7DW#-5=yyx)?N+O<3vf(0oWH8vVr`uxUFt8x^(N1ku2WZ##*aenuC2be
zf;O6Jura16mG;kty4)(WOV*&WNB}AL9Yg4<^_y;UhSIlR4Zk1{D^?l2v!^<9IGcXr
zrOZ3@lCc20k!m_i@NxAYR%=0^x^Sk)l{~_vmy);AZMEAy5O1#OFx(0GO-VynAFbxw
zpeMp?Ac0_9iINO{?#)BWBRY89Kj^KedM@E*=gTDp$juqSw$5Lye`VXkvX`Qd=pi13
zYh7E8yybQk$z@5=ku|ge_0Fm;T@tU~khB-!z0T30q;u1IqSM4FvrE_H&7nifl$%7>
z5X9p3XThL9lf8+53FVb9bS0t;OZcH|=0Mlw78!rI-psrQO|brcigJwHjD2-JR^mgH
zFaO&Lxl#s%$8Qxip(pP2-&gMcqfLDSlofM^XBD&R4Gl(VX#9I;E{Cv$Q9P}yO4&T*
zxAeLX0)}GmO^Z3_u4zQJYDHci8~vUl>30V$V#O244QFLvTTwF~P|5N_AMnO$Z2U*9
zs3M_nb6Vj$U6|Bg?s+HOR6RJ~zG-AVm6$yD-r%XR>)UlKJ+*f5lSh;hEw?fUq6G>|
z3#%|bc3%fvk3|1m8TE<ZV@<oth6n}hmU?QsFr%&~{h4^tQdrjW-;Cu?y>dkm_2C#H
z2!)C=c6#1qmg(v8f5eXenv4lA5R{~8h*t<d`Xdyi<JUbVYY6wxxN5GgHRwydZ_1do
zb8?x+-i$m{gBksi|A30YNA^0kKW%dZdj+d!L~8#FGIO|KM!-pR+MDMQcmK?Xvp#*F
ztGFLdJOWMQ4gsqR(OCA()n;#aBnh?CQ?u;#xrg{c+12eyp6`R&_|a<8+i0&xh}Ns+
z2zdh7_NU~evpAQ`O)}<$tZir`5^!=>mSAP=b?UXDu|&h0r6fWc?yjmNFm9fFyKNx0
z)QI8VJ0>%C_@FeHZ!}`tg%CpDVRWZ>{GS2iKb}7L!W*}cA%8C9=w2%lb1J<orf}`;
z0BNxwH)fu1O<tpCHSspBE4)WHq)U1eWfpvYy;JQmv8jq=kZj=iCB<(gtS{%xv*8b!
zSC7E;c(uSQxaamS6+StUM-Lyh%v!eukQS7ysw|~AuN|%qwe7D@j_#vI`v~tm{cc&@
zLS_YMU5+@AoK&f06L3+<oxl$mcoo#EPa;0d;hVnK7Yu2lK==Dm)v$W3O540`!2rO1
zMgWjSq)vg{ZL<Q1%>#DT<8N3x?<3Tw!Tm~0SBR)rfaHP1(kuPAmKIVF3|Pnlb{NpW
zvg3AK8>`7Zou|RBYyelRH0lHtXwEg!cYOu-fw=)nX~f?9D}=T<3yu;Rw?@w3r?si`
zW9~x8(8A5PvZQ)gswEw1yQOZMQ2w2d{P!&$WKR_@j_L{S0m@g5c9Fh_t$Wps-*umN
z1Q4*XuQDG<Lm3IHW2QTYZDSl~T&*+AX2ONP7OwC;wfe4i=c<XZmCe<d7x_CDM8(kP
z?|QK+;xE4CmSDanZ>gGN18vS>Ms)0Y45*71qqWUwG}n3Vd0NG|V#rcjJte8W!foQW
ze)qXqwXJp@1FE3FA<j**Z)St?qJBa-=(|8J{&MD?wjas}J^xmB3aH9Ej_?v{-dw+N
zGE>m`mCvc*i;$P6ZB5Rqc?>IT12B=b)^lz0>wzd4>NjnY8axH=64`7xC@(av_Cljz
z(||UX2W~f}0ZuhmfFdZ3%pWebUsb9!8F+9ANF&fG@|9X#yzC@->dPuc>ydp`EbwH8
zY!5zhIEQb|!jH-qt-Px(zQ5D=dTTr;qVmuH_Ffdj0bv|kz8R4J4f9ohmT4~?pF~t;
zhnwH4$qh|uo;N!J^YZX0#I{Qa&Q**XQF*9EGv9l7q3vyLJK2t(q;H2P`ab{O>g;jg
z{Bw=BF6d+65uCZ1d!MiZQAT6w_xm-l+eJ7Hn6<Im)Vp|6V?i%*jf_=qO>plUv8fQS
zCVfG!5j{skW5=Uj_5P&nKx*MRSb$%fG)$#e5AU&?a`YYDVchn2dmuN7aHf@;j6j=A
z=2uQLj=CMV<}96rZWg|kv%zqW>0fjjR+KP|rwMKP><GM@=og>G1x@|8aO$Hc1Hhg7
zqlMB{5MdA`&Lq;%E9Z+kT%IZCzM=@D`C!g;uO|xXUYBj6Pa~3X`-)Ua0NK989NiRK
z2Ln%VDO9ChNfeFdgNmBUP6FSrW2?dDPoEUj7DERs`uDnhe7-mdhhOcz$zb!j4e-8@
zRzOr`54KeC1_A8oc?(Ija)yizxFqhKyDn2<xu@5ofo*M{;->_rj1HqPuX?_@kF?Cy
zw=WxJQdw6O53U&w3GDTYZs7bU-?I&Mv0{7bv6!7Sw0U7^2;;s5(2ZMxYc4n5(41<F
zm(<YS8f|@1&2{VHIt`~;<QLvIMz>UQm){YvyalxS45P!$DPV$G-P`-y?KL3jITDaR
zk;$PFv~U(b&;02|-(~UyRzv@JqIBsOTS%_8v>%UB!%^1sU+;%a8qCWsjOp!@DD(#3
zdDb{P#nl;1#oY_8T<IZFxdy53lf<KEhk{T`Ta>0R)wV|@w#gh(=lVuk>r;{2RK~k7
zK95@;c<4m&Kl<!?PIJPE@agVKKi9-=Mwq&Gz4}>z+Qh#G3*PWjkFfa;OvmZxy;-|>
zeqB|)vt#no>g{!K^`Vk;vd%)|>gSJ2^TZ3E`i$$;5AS>j$_0Iji&JA`&%RGp?@j@M
zehIkY>M-J<5ZKE?&(}8*@l`3y3zQ$fIX8n*gs@$S9j|&O?A`!{PE3RDd7zxf)iGxh
z3cQa>Iy_k5TFt~ubk(KEckbk9NY1WLkhUw+%-1x3Ykj9!cbW#Xm26bK5H3As8qVcd
zYSd#3fk12&3TQfgfEiz+XWdgb$4neOmM#$<pPmA!5W|+0Xh$pmZ+ula3ra!=l%taR
zO1Q8!!&9Ri=G822M$dXSGAorCa&2s&0QQPC@uj4;%uxXYw#7~UFz?OrauumN!B|n5
z{eAh(^U~0aO)C?xli!MkSJG#*S*?Uj(ug+qlcshfHmcr)4jsOFr0=k}b|+29S{SWG
zPBhGyut9o*=LJg{uN85x-jn^pjVO5X%DaYSHN$kbO?2XE5)EwitMiUjrSQpDuWv&A
zqGmT2XB?)16n91NWFR!4+kjOy_J@SMz>mw(ac>^~N_vPDIr->ZQVFD4)dGhT+675(
zra?bhZ@~GfveQM!Uh=r67wy#iUOMEAeJ0;EGS2&}tKVD>?0%CH<Fc?l+9>*B*z<$G
z{{Cs=S|WYgwGGawHf9|<%{9h5g0HLE`3H%26v%M#`oFw{qru5o!e1qU%Q6h{nOnj`
zek%DhdVac^O4<d@|Dd4sg8Z|de)K58lXsj2X$oO?Uq60CVtZ*@V(dXwvy>j1lB7lI
zL~Xp_W1bg6$xFUyE1jgCts}m@i+zrU<cjI3K;n0qRfDoOul4*+x0*uV3Sp1?m`>Pu
zNS~5Lrjo|D3k?Tb((kvi<vjOM)luUWVmMy1?+qK7L_Bj`yU#Z6CR)v`+xkO`grN~T
zgFW?%R-;r7@sgI#Jsnhp*#f7ZuUZ&n$-67`{rJv`je&jGEX7pA5-0I#&gN?{o;VXP
zZMWJl0(@R2<gm|;Cz@}Y{A1LPt_57&J`UZ)Du|b?zw?^%TzFg;y*5ThD;~(~WD%bn
zkOZt>et2Mt`W}wblte2B<))Z0(EE5%;R>*bieH%0j-P5S$sYtYH(#zlZA=VGY?qoS
z!-h7tP%-%*Kj82=@xU7QZQRiIKnsY>=JK7w9?7V)5%m|hr*$-reoZ}RX`F+>&^)wF
z*XYk_arpR7+1Pl36#_iVi24>?sawyrQQHLpDb8tGm?Po0=C=^d)o<vltqXQ`t>+@x
z(S5)qU^<9`kouzJ<nz<XNj`(?Pjl%ee!S-nxB31cm2TPoL(s(<C2)HT=5$)mZFBu6
zIRUI%4mo2vIz4rfm_K;{*wPe#dHU+z$we$+E;?p!?E{iADor2+FVwG~l_qvdXrpR=
z3b+^Kld;)`2K&(U6W|Ah(L?rsHib5P`SMz1g`!O2^WlR%Qls<TaFqGX{g)#<!o`@G
z3bQF`Hf{OWP7d!>2bcD-<s)8)Rh$tiUl!xo7Wt38uX$uUi4mvWxUX$$;*3Gck)v4!
z7iyN`&1>8XD>2lYp;@Ag9sD<*jLqfb!U7|bp8FP7HKZT|E-)5))vo*3FclnGkFsoL
zM%93;b6Go4#?>GUb(N?nU<+#3mpp&ZV{H^(TU8J+ugjTsA2B6(1>t#Z2g34f(+m4E
zV%On;jiZr|cV?H;w!*Y~*VTUP8~3RtY;Q%DXP);|&+86JR9(6sjpWFuv)+@i+U8C9
zBPsnWd&cO3)&Xi}QKkd}HcRdYT+w$~nyTV@DmnA>?>F&TRP4|Dq!O!(M6bQ>5I;S~
zLuz}EDzfIY#83zE0wli#55lI3=`6c`p%{sR!9C<MB8}E$w#gbgM+mRJ6XpJy6eB2~
zVZZXtAISzidR~>#Mbb*Yipmm-RLn{vY3#Br&O0C7+jQI01pA|COV!_jSfiC^ZP&k=
zdqyKibw1ZFQg`H-L!<RV7X_lEOksRS`US%>-Q3K10vO@Z1F~zdqX{l6p-afc+YMxa
zjp~vxxok$>4Vl`K<YGMX&eHl@aK+QGhW^G;K!G!i3mFaP4YqAlOYxif1uTPF*2{+E
zHa4w)3shqiKn=Ax_gbHoVM}ZB#yS?ATH}3*A#i??I-gbaK|MCzdbB~Brg>x;#ZDt|
zcf$jg)%puV?D6dbEhStsblcxz3;sQ-&`W>$-MhV9Pi}17YG@OsspUSVj<y?x6@@;M
zdc`Zi{6(cRir??ba41Q2iT+^Duub#qor@Oi22=vsu6K5n3w>Gll9FQRFBDI~%F%sS
zN^akdCe}-TbI#h~Nf}%;vP6G7N6*P8NT9Eo&JZ{NmIp@37YC_!l0Nn%CLw?g+zNF$
zE_yKvj6#_dO~p(Ibbi4n3u<fd?s5>o$YJ(jU*4XqMxTR+JCAhlkTrPjL?nGAtx%qV
zWA+{6S?85qr6hH*HK@GYYq#+r_6oP;iShT0E`0K3tyU~^iqftu!V(_Z4H7&=vp)aK
zR8@v8|0(bRh~W;gk60JV>a<)C`-hhZU@ab|tIP727%!8Yx~ok)3!T_$urVL%B?<Vo
zj)XQbxo7r`kaM2{v_gsFAB+yKR<{Ks<B}5+1sXb$*6TyaUVij-rvmpWgiMU*xqA|`
z#Jb$$Mg~O1Q%9_io0qc;_+uZ}o##`CPU0Vryf<*Yibzo}Y5d%x15>^Wyo&1;q%u;@
z@IJO5^Az*gZ6Ul$VM&pLh-vq{-gpUx)L(eSSvg$)5u6gP106=np!`NYEV2h=P<DME
z>B}TH6eNoor+g)PY&seqImO+O6KiK|dwKc3U^knY9_iN)tiJc}`@l%yY3{@Kg^t3<
z)EO{HBu&3yCZ=^%^ZDcZ+4$Y(X>IlIiPT7WS-?`RUG@~L#u-vi^>9y9ld7x<Z1Tes
zB0|j!&^+J439)_CKvZAW4A?Wr7<ytpi%atE%N)E8w+GjWl~_dRyl@>z6C*!)Sq`@}
z_##)KR+1+EDh{zhu1htD5ocY#O{47M%UZlvtPy=<YjozR=JQ{(x1_Irm?s*ARS6HF
z*gt*B?K_g*mOyKYG}uvn+AkKft^crtK(;sSKe6_PzRfpa&OU0gqT(HNu$5}-KAw}?
zWGJe#oLs;VI5~~YVGd6wJ)hGxCwskzkNNh$*p?0-L$7Ck5WQI<>vwKWKI%uF!EnDS
zTHr^cy3--t*$Q_R?FP6pj?r&jG$Vnv3o6eMcyIA5JU|eQA9}d(+y5E|zGGeHRyAZW
zYJx2IygRS|$H7BFPDT&WadVKFMl60YTy145&^etn{9lB<RajKvyEm+&Qc9OJNVg!}
zB`qOHH;8n13^mdX14x%NLw8AsN=kP~H$x0D-<rMmdwuVJpS;%voOmL`de(E_zuenQ
z=Z5HPBL}gG{7caSNySw#{_gg`lWuUtfaY0S|7$853}gC%r}8=Kb4i;`=o3WO>)4L}
zJ}8PuZr3BL-NTCNfZzcc3um24=N@bPt;D}=+Px#}d*Sq5@r;*<?~(x`ReebVjPGbl
z0g*UtT4SDXG2DfQ0{znE*<B=<vw0k`5&QEs_*%e!EY=}!?e(>GE%cV%xDg>|x;waj
z*s<!M@%nWAAGGMP&9^%-w(JS*OnUDPN_O|)!gsO7ilX^dmxs9Xl$;Q(<ZF0_QrB$Q
zHP?%U*D&JFDMr}skI;i$br^KQ^Y!)6z1#>#FQ)fTF*B9(EXz@;2J~Z_n^i#geC(QY
zXK+<m(bFLX(Q7~@{R0|@U}a8S%?EuueL@@4M8YJH+7>{%yrdbyE?5i704}uvPPH3F
zy**Lz{pcBcI|6g?PY6F=)Irp*9^y!<4`8T^z?=V?j{X}i0e&h1>rFcj5-`ifc&0O{
zZWwR+@@bg-cMDTFd>quLjyNJVFY#@|U+IGLB)P3VwaS~k_yVTtbow1fE@_fUhu#0x
zpat&Rml%8gcazQ!M#76ZO0ojjGZ&=LeB78v$BpD;`b>My0i$QJFOXKW3<zk~fV@Bt
zPe2&ktZnT7xA<4>-r?bN(Qe|<GgX%83tSBqU;Q>pIim+w3vUKxFCvV5j7XJ@SwM8y
zQ<8XxbWq&^`GiNnGUcs;pR=i#>e{n)R&6SbnFD6<k{ebdNw&4S7QJ#}%yLT%p^pUm
z*t(JHu>)Z3IPSsZB#6RAOdc?0`#mq``6E!Ub;i#^i{`(1#qXgo7_ku0F}mf~6k|8W
zmPOd~_CmI_kfQN|tHFb)82zz*b3o=Ch+`~O=;5uzm*YdJm=&mE4((s3v1cN42I??g
zx3#{;Fs7--CF<npx_9ve+Qn=W-hrMwDGW+{*T>^}7S-RA`cXpnx-iINRib4Ewd@Pg
zvEypp-417VrN`Kv16OiTr|QKwF-ldI2$^;<ew=b)jo>~>O!BrB8Z3x-Qa2vVQK0yi
zsMF>MBo4GfUB`j$-+Gdp95ubuG|_M%$I&-A)v<gv{_ptJC~Z(76-aXt@pkpQ?dQ(v
z8Q^j#z-S^tA!sbfe%B4e799f0-tcl+XsVN?{|0`J8n_@Y-|IQ65y>aO>p5n~1mOaL
zwJ6#;pUJcg{1Ltu;deG`4OI=dxg^@zV$=Q3LJ4|rqaYXU_$S#py0&^z9`f4;pEW0o
zKBH#h16hPi19QOJiSYHeOVZfln%trgJa9wGTnwe?$g=BL^p;-a=33Wb?dVeJrYZ~@
zeW`;z-z2rG@Ga!u{cfyq>$V_<ABpRlBd`s-P$jy;f&9hN7eMC8CF8Mwi(}6XXFceq
z8u$3Rs|)(oPu<IJi7^P|`ON%BM343~V&hA_RXnUlXJ7)Z<?FO{-31w)9EM(YT_q6p
z^lx7w5(lo|HO#EK5_Hxooi%z636E2FD%RPxo10V8=Z#5Smfj#2vuQXToSKPp84#7G
zUjy-6K7k)sW)=VI80TVPjY$s?sd*YbfY;Y`qyzeT*99<F_b?B7U41PU<^4}EZ{zus
z-|QPRjOePtWqpA>NnOwF6mbvC?1Xpec@sNbhQ5D}fN&&-SU)@b)G+RK;BT>oY+bts
z+VWJcH=TO{*WBU945#mlF!Q#^+Ox3b18CQAw$}!_7D2$}gxnDx3`TRokYSg92K+-O
zq@fIC2Fark@HCAW(b(h+9o&>O5N}#?Dc5g4{ac7G0A7FW)tL9nF(l-uXRZQGm&AV4
zVKXKiG_HyrZxm2zp2Z<Dh5|<U@WB@8RwrX>kwSKIVQkX(@)h4XXM9ntk}!;OW&iWd
zv5gLBPeiB&kOYdvx9&+nS}weG_E4AFjePVZO@h4r&kB0@1~ihi!%7>P8{eGJ3M@iW
zDBmrUDe^#HDv~M$!rfTg^sHg9>ytG6zpLb&3_gs*eWsJ2t<5TxuG#^$y^w+r5X)&}
zj_0!>cp8YJMN^^4TCAL~*?lf1hR+wHCswVQ<t#u@P95F}1iol8JyxLwnKOL<{O5MA
z26eW+@NMTz$0mQ5!sum+Y6f5BS1E3C;?QwqlV}EF$Pe=zpCH4bF&(2sy7hR0K}sg`
zl>i`8f2IL!+cTntc5N|w_ZBC1<>hO{QfGh}b1XDL$~miH)?N6JtDZXL=-cL1YNFAn
zm|A6s_WIj6v5ghuWA?_OUh^aOnFRAi8cu6jQ=kj5`EG7d>>q?ZBn8En!TJN}@fvrO
zF3l|$U^uv*RiA=8-zq;JHGJE473H+rz&K`nQ2DNf2Aw`#^gvb+)7r9NRQis4tML<6
zHi!vC&-P7^npUm!NU2qCf3;eUZEvpmadzP8UF+)doxUiF2?jPFYPeRszQb@6M*i55
z*R90x)9uXY6zS7r2I7Gyz`zGx1{y$?g#p0j`+ZQsN5p-lyRKFA(|`*yrb(srpXkY@
z+N$nIn={(59Z(2hc|AYo|2ft96>M|z?(_@Q=l)aW4Tm{(S&8V;HNQJyx5Rn?Y%qJp
zb*9|re=i%XFrqOkw29+JUTrY&G@uaaq)0uoHQ#1$d!@fL7T}R0{4dR1&7i7RU=!&u
z;GU0A7z5&I{s8bYl<7gI=E{So$|_T%w7Uzh8F1!~X9d7R^DogP<^hjsrZ0|?JcIY)
zE<=P?HUJs&^HZTH<Tl|1f;roYasw^>2S-vWXS(2zv&l|9uPq{4k3liHFocFf{zDKB
zlgLnIv}vH1O(h84JVF@NcHo!KnPp(IZkw$0B@{etlXbOo7wdfec;$KCTziqnD0`1e
zsRQqL^sAW@#3d32Gn<%Ay;FpYX9yN7NS84A|M;DV0*gtczbm5<XEW|JNSzY}GAl0&
z_}J0p*6zBDhi}%#rLkq@2mJ3AS3OomcgQ;&aMm*_$KX`=Yp7WMT6xl|E<p8*l5r4r
zvj|<+&oHmmyOnE<x{AaHW;o57?%ulyXn}vS4l(o$j%)k33%^B;_S0FVPRi#_WsX34
z3TLYf+vIZ}CaCTrmj~7+Ho2T<;}H;?>pE&l{xWRGNzgl*VFUaBf=7^0$<~{+@5o0F
z$!msoXPB*H8XxaZ#;_zOaY|^FY-+XRpw@&keUYR#%NMXaQZMg1FX&mtIizR2S>Q)Y
zP&u_S`+_T4AXCWq)95wq+VN!i{`Y-Er430DoI&@@{dCatuGx2Em@)rLLvx&;h3i8w
zAI2}Q-#{6*{FRx;d^FLzQ$x>!a-O9JS7{ji1pw-9K-hFEj9II2$}ycEJbWI`Zfd!H
zdYY$<3YsRm%vGB{N+ksQCEznCtFancVokB@oJRZcib!>}ew<Wz+{y7>u?NB)6x2Mu
ze(gu09X`0n3fW}6XA<d@S!a#hwW;M-)SD5Og`j3V?+@zx>#4%nU-1mPY83ESEGOSz
zo+WlXI-%)AMq(J3uUS+_B6yw?HQ6(cX|<U8A*kSAe1Vw&s%J1semT|OL_kYN;@>L!
z(w3SuQoB}w?A}zFM$n#a4V9R8dX)#_7D`_{0PCtguMX3x3MGts-t4q^-<#2fJac%T
z54rJO5{Z^S%E{R$MfVpGhpp+}?Wx91Jc5StqER5>+Zr|Pp^^R%35?9SkaykYaiae`
zh%j@QgxD8b6bGJE^EHl!8!m~j!iy<F$jPd@b`a<7i$DfI;^sb<>bgcwNkQAd6N#0X
z6<{oeu_nDBN;UE+B@=&qKd&;j2UM?Yior@%!F0TiAkeD+F@DZ{p?H>$M&Wm&>rGau
z+aL&bQ@g#P7t+v#Ow?5fyK-c>9*p9A<V7zCd+zf#$iH|Pm49Jy#E|9z+WxJ)cN3pK
zvtw|xs*zIP=y7`nze`ok7Ql=el$lFr;KkR#)46%=^MawWY^UY5?TQ!Y#G}D2S@CcF
zlQQu^Uy_g6qM){m-VCmQ>V-?GlJei$hVCg$w*LLGh5;eMu<1{KxwYr87xT`aPJU7q
zJJPp>^|rpP$gKouBO2Y~tMIq6PF<~!J|JGQ^5bOC7NrcqSGEFIMrW9TI1BND;g(d~
zsMokLR!P+o44>kobh|=EKD^D<Z$3Vxsv5^WBY-WaiNu#<#<i2B0y6x+xZ*NuKzYkz
z@{SpKa7ubo&2VTe=*Sy~4bfJppGAcB;yu@`jr{PQ^cmlLQv2|EY&@5FKK~flgf79Y
zGVxDvSvf%UXSPpjTkFs=CF;?z$qDZh5}39~vd!YR0Y>uh{%Fr7WF;Lc<0JDq@2SNt
z>70OKwG#!qF`Z{>4>819M<3!n4L>}WUx1<#{t@3u3~@1AFZTg+vN4Aifz2d$0N+j}
zgsm0t`KHc59MuVz^P9GEXy*ceuybJ9G>g|9<2H1y*uEvFBj{uR(s90l!r0aYZ9O`{
z(0FK~J?&f!9{+xG0c~49X1SF~0gu7C!937-Kn{s+Ke_Hw-QP@%MaS@wmq0H8LqlIs
z>g<k9i0kKy4QvlGgv!zwh*aX0GWSyvw7EsG4PS^W-~Ygtj3(<bYD6@(R`iy%BWbX|
zfcRoUK;q`w`sghLGj;o8;FoNG{O}ZOawczO(`^3W)DikAEfa(;|BSsBLQmoT>J;-0
zN@#82AOEWb{kt~cfd`!ZdJ}XPWh5vTe<G}=m_~6UCO!MQDlsvQ*Dp0&dBM;#2ssQ5
z6wN&G%?F7qADbM#IC;HZFU7oZt!`{s6$>P)%{$8+$qA}q0gZTBBbJur==b^nBfH@1
z0v%ai6F$4dn#vY!S?%!^J|=7H=*{~2!1*jm;4JKMHMw_^3~GRgla%3wtuCXm`OOas
zt#B7~X_%xt&-_|Qv5tbfGB?FBakD2~!`sCwN=|T=mZ7JFpxsYhY$y-Uv!`nAep!hX
z%ye&ZvDwF1o~&@{ED%%uTSLjJT0&;tTvMI@!q(=jI`~;@w*tgXw~Z{YpAEP4vTtCQ
zt<=ozS!Et`w4Txm8-X|*BMnNXy{ac(k1|QZ;o3!nw;!4qIXI{IsMJqxBY}3c|D&n-
zZ)7hw0I4U=p-}?f@Vm0lA(3FHRB=)*3l}7f`izjXXA?|G^8^AWm+AbXQ5mn<WGaUl
zwxyJ5SD}N8>)R!30RqY%L3f#UKNt?PIF-beGn08;jiI?{O0*P@w_~V+rjun|Kn1%4
z5WPPfaH#M+THQo046Ot+v{K#8aD{6=st&^xMEjSGi8p3~XCF&!3&(TwH9tWiOUE5h
z5Y`%mf*N^yxBVS$8>a6ldJW9=EkKcdv)cgEOC|D{Z?chuF5B=ZETv};z>XKA!Ml>7
zXDB^C^N58TIF^h)<ztw37<0@2t4F&%GeUga2fF98sITzo-IY~5T55Lw1d9k2Fq9UL
z`T!Wpr$2$p!U{S-8Tned^MA2TPry(oQ0}He_Toy2iqr1Q`g>)M%k=LSQWZs5-tRjO
z=a%ZGX;A_{Q*d@eW%pDofo026Qcpg~Z+QZ-LjAA>WL83~anm@wAKaUMz9h5gsd3E)
zISk@1-!2W1bSHN@YfeN&V^PP{JYvs|(BxmnG-~Z{ypv#Z>>wR@l0*8yTxY;aOfe$p
zK^XmX@_^PJR6ua{spv%F1<^98@V`+eKXjgHH`Ne=PJlGEz0TCpi&^Fb-72BVRx2)d
z0|{1l>nDjK9go%Z0?8P=^0`fTByW|3HjKcSp5Mgi@9(hTdhLP4S6wu0TMbKlMogUO
z31+9tVVd3N`Fv5Yt;>MPXTo1y<&z<N=_7W5k*s0+EQoT9Zx!(b|E9|*UwlmX+wtL8
zL+7u=aTu$gA&^#xH8Ru8&~xKpdyBRMAm}PTBe988&ax4dkQRJ~W)G+3ZtjKFgQ^UL
zj3$)ZZlQyUq#<;v??92W%INF6avQQghF?+j-s=&(w3@D_`zr7yI$ex&^QV(I4f%GL
z(FYEKb3QXn`XL2;#xMJ`HOdtBW!udrM~8dGIUZ3&?33cjLAF-oYmyGTSJJMjvBzcE
zPMtkq)T-v$eL-VGAjRf?#2Ubpj{N#xQnj?6RAQBg_X?Gs^KtR&@3KZD1JQeaSO_v0
zf|-98Z!pgPCI3FoohC8YdG@!6oM!tn*sAj73seOqLgYeHAeeER)MgQ<-1@shdIH0^
zp>cFl<ip~4#k{A=_0^&yCf_NC#~E?tV{{2D<hcH{VP?UWK8%D)y+j-f^U;RgZy89u
zJPt&`9-4LqK=2l1x=h{>N7IcB$*75BHC7GJqj`@Xux1`}JBp&8cj7rFcN-A`FYNSn
zjgRXGfXkkcUXU%-^k`7y4FbrB%UTp^gZxPpSnAuLQ}PVZ#VYz%obzq+WwM0+665uK
zVcSL*bi9o9LATYdl;bYqf-S6E4r|l_Nsh1yU+wi}QSY_<pDch$ij6B=Fek^J4zuo$
zgV)a?Ly57b3XGS-0XXq|6miecwJ~#R@A0SGE3unSch&Q}FNbj*WauW^^+M@jU~9#D
ziKmQ)8f6P`-(WEGco($uC+ol@hRZt_y|mN+uB>RR-B}{7KC;>qK_XG%N7UN<*xnpc
zUVna@v3ft?4}e-xkv7git$aAJc2Iy*uFYPZ0=P^w|KsDgv*fMcBq)I@1r=tQpr5y9
z+dQ`5A4?=pLa~5=INg6jTJdb;s6bKju*07vK@af}(byt<Aah>pKutZcS<L^ltMGAi
z$nu~|-Ns5M5WBk!q@|Re0fu{bAiY2RY8y=lDDGJX3I&qY#|LG^lCLyoX{mTFJ=1c>
z>O7KujKZr6ccb52TRelpR}t>UVpns@B%boQ{J11M?&&Ka$vjq(BzygQ#NPmc*1sy!
z!cz!3fA=K>rcN@M@a5TVfz5wOYDnN3_DNydCv9Z#3y(dZ`en=XXPH*%HT>d7<_^QA
zd9BkqooWH0&!#KHx(-^u!e@TF`p$fS?r$zLdu8RqmuSo2yV<VCqDUbU<L`AJ%;$GC
zKL3_BvO7z_Cg@!8pCzD>W-dzJ+h|{2AFHh=-52#7_Tn<Y2`)JywH@)Qt~V%2uvc2#
z;*_6vtM^<@Tn7(gS)Ll-ZxEl?%#hdETz&l4iDnHzZ?A82ZK*u_h&$I}#o)UZukbdM
z?57&R1nk@&@uT+KbuKaKx9xd0xz?L!vwfpP{s9g1Z$A77luow>U;t66&ycf{rsUf1
zKOVTNW|M?}03-5i4W1kSipsrQo40sb3b}hKL98rqw+xx_`}Dr`MNkW>m|FhTK){c(
z^n7?gnrd5Jk*^1X&^vV9{a`m;+@`P;s2@E+@#0a_>Wb(skj+r-#mHLC-3r67-8{#9
z!1-miM=pSWxY+Onfl*WVmqp4D%~|{`LRDs`6;4l~Dzry;-AnaSGOYm<KN9C3%#HYu
zf=}fvqTlVSE5PdBbq2d6b%a>zN-O_($?tdLbPYuKhb{ji8ckA3+?F(-&6?F%uR&@T
zka7xVm?1pLQk>T^YSA<-PwypzrWu%1rm5j)&uk{gsKqn9U3Oq?c%88Go|0~N;EWC1
zzy0McNnb{aOcv&hX5xzKtAk0q@y9?+vIC<08mSU`3M1;$k2teBh;ObMFNiAJ(8bqv
z9kukM_Wi#+LUnTQM?jBO)8B!<x~s($a@4{>6Bx=4lR7z6?pCC%eTZz}2_SgvGk;aR
z!zf;=t;w_&FKx1ZFi^d?O{6FfBr~-XUK}MJO_ypTa)9xv(ogS`T-<=hB2Sy0r#5=x
zJBe=EH?7|_vHb0axczR>-k=uxvlATTnxvjcaV+&t>>3B>_?$BXslSWXjZqw}bI28Z
z%Z-M(N|IdPNymm&=j-dDnaPubvlW`bgB%_v#D014h|98ESXncDkdfRIqR!K1nb}vV
zE0xZOsG;$(%`9?Hv+py<E^RTqJ%itHM%jZjh839tMlM#wj?bISV_??;V9wQ>iGmMN
z{W))#BU${bnI3XF#j(fQT~1)uAW9Y++;Zv3oLL%uG~kJhIyh(*A7*958vW!VEyCsl
zKnI*3%RYb4H6a&nFW4|_U_~}fwgAm51B^Va)!(qncYiy<Yi8-x2Ot@AnFsK}5L#lm
z>Ja$wPv3*~G2Gg%Z7hct<KNd$nxGE*vMrKTug%w<6--X6E~Ax+A|iE@jC#)mn16*!
zPvF$jMtirqc+UoS>Txd0$66<(4z7I-OGy}Xb3cq`{J(XWK`T;7;n22nT^4|{zow~n
zYxHxEjL^fK`*ypashon5N~Z@gi%fN^x29^ic~J@$LbF(_5Mc^T$0wttB{9Oy_y1ab
zqowp9YN{ZIPXJ6VMb48eK9w{Sw4;<gv{Ft2jB*t2^%zhoopnB3ouDQwgA+Ugu)Ahk
z9B1m;-%7!j*sc3G3SW7xQNbgi8PMVJJ}il|UteEuxg68j9T3cwT{(UBhF5?&Z4pe$
zr0?w%<j{<rYqtwW5seA<`9tr8o3eoZ>jWl0c0~cmr<=&izqwonAPS0tR{MM!WvMU}
zDn7ro48-X@@Zj+ovAiJvjE_kNB<AES0#WO{P-eg3#8{zs+e`9#H~Wdq_?H|=dVvZH
zAorToy=sS0x*6PX%?g6L2a2T_p)+C=h}&UfnTS}=B}CXA=hlY>NJ=%)KSoEc?vnpf
znUxE}jdh+j1L*~X=;Pfn__90YmUygVr418xGU~I%vfs7d(WH8^(Y(SiZlp_sfxgN$
zd=`865^ZF-x#G!l3~2;_e+|0_2@hV2#|SY3Y4Lf&o6$T^WDuK=_9tIG>#Vlpo(5uV
zK4AkBETO62_%LbQ2mx_l<P~KDSWOnqkKN)O?L9_OI@tc)>Nmk!ztQmlh`~kYGa1I-
zPb9x^{FdrA9F(mS0c59x-dNVf^R$6eW_EHAN|OP(UZ+8*NuA;u-+k%Eh0zW%0cy2!
z@==>AW7yx0U_57b0kM(Y&<>aQnzBPiiI<0vd%=rT8%&=kQS6i<RB{%Xn|yK>^Tgxy
zd5(9TCv%J`Q0X6?UY%Vu38$z@`42x0FJ9q8Y%z*EBO}n0ldnL#O`Bp&)7m$tSUol5
zTe8H41c}K@0XxF0&Q>}8d}RtkB~{5eN<=e(;bssR>%ls_MPVSM`QleY_~f0s<+c=!
z<=e)uQ(Fp2+%V7LXSA`%VhwjdVp-_BSCm?hKCzebP|c_h+qixElbM?0q?<*j!mjEC
z;XOeP6b^j|X5ns={72&nCO=lxvUz2`q%8IS3yA+)vqJ}H&Qo%8!;tGsE5qprtBeta
z)2Ui1f>tiv3=90A=UYsb=LNw85fe{1btTgZ`4uqpsppamRYEG`64?RDf7nR5A{Z+|
zD~#>$%xYJ$&J7-o&i+3Bp`q(<-WCWerS}$Y6Oi~;{FmTHBPd0j6VDi0-IyC$<~cQN
zG#dG~+(WR~B3OeHXGg4YwCWfe<i#aO#5VODOr=BS3ro>nvW+4>sMW?PK>|$3hO4QF
z4>~@2e>p@MKl6Ma=ZO4N_l0-7Rc^WjD_t#%9>3fhON3~YxTv{~#*Ex(AoBCNb3wbh
zS&GFMl#0YH3VpxWJP`~qR1us50sz4!(ET7t#@MB)x$e*R!%iR|hCvG)@w|;kZ)wQ?
zYQnNS@%d#uQr|Db=4Xw{ex0M;Rcm;;>hyTany^vvNlr0J0;{bpQNg57UhRylJ6k-D
zz;3;s?Y9~r(;4m2UQ;th=)cE0z0X4vwwvP$`Fv_SX0I;RlvG&!!)W^=n5AqbX7I^}
zAN?_+HzuB&Dk=Im!Sp5IC?dUOmtkaF_;C;_rF<-X^wURR4{Oo3cojHoad|Cj{|aY8
zTru^&3iK*oi;17yP{pNq^NCWNt@Y!*W(&L<1=|g{IP`n?&TbAk4fGV6uSI~A;|Oqy
z%=##41@jj|og2MQzZT*3P<%IZ4xJD%Z!@6i)o1r6fEe0PeKvb}dBRDRuq&;Q1lyaE
zs~j|WqacL0Tlw5%U1_zq!_@wuv@++<$_P=?5s3ZMZdZzF^03eDc-*22<qE6afgD<R
zSsq~PnE48~X7xc$Y@D7-%@+@=^%xoc9&Y7W`Y3YYcYQD1XU3Raw7fq4g^lLT0H~zE
z=*=^>?>}qbWEwKFJDdQF(?x)=YVSr4pjravkCT9OxT)wRAaonQz7eoLMD$WU=0f^v
z>|9G;oDE&4kNGitY#q7xc_7dUfXeBr4u(|grF*z9K&rN+V(gqIL}|dYzW&L=oj{h!
zp4XF6_b;pBlLodGovnQh0&EY~f2z1)zMKQZK+H4vZeP3W`xmxT@WqQ;>E_3_i;!WN
zf9r7tOQ>Ei!bS}LW8c|rKpM<#RYl1@0;UbjprW$VZFbLyZfU}1nd(9#`LyY`>%)hY
zss&T^tg=B$p>#AHVu5N9Vpx%So-+EGHMQ7`C53c0ox&->>?%H+pCps8!mlMc*N?pD
zKXaR1fc*9rs)t><<1EV)wA5sx3XUZo)?f3f9&b!DbEO`S2?kQc65qc}>Ufwj<adbF
zyJ-d9%#$*femIv%xp-;!STPy@$Ifsd%P@?doLc$#@KBimGcn)qt_J(Wq{il*Rz<$V
znU$7MX3`;XO^h((u*=7<1zo%*k|>b(=SwEHWuzlcKU46zzA%vbeLHT4)FzFyJM@LG
z69OE*6&A!kjd`>p^bR|T5nJpgSDzx-<RrL9d#)G^l?X;stxA18+LPP67x=zc)UvCu
zQorq1VJAHrQ%cM{EQZ#O{WvpqA!(7eCNVD28dv)vJ8gi-6&x!>ou)G`fUhFsEg#lU
z#mCXhJXaE&CAM;-T>-vYhDO)X<yAJLS)1!5i@xup(acD3QRx7H+JzaJ$M9kRVz(iZ
zH7lW)fiN7MhF4>tN*Wu`sTq1!DR)qF+2E7q`P6UkBz6%KS1#sH`mibOAY$ubtOk>~
z{*1|$6HOb~o8@y0Z0CLt2aGLw(oJF71C;Cz<K01JL(r$^Nlbn;MvtdUOqw@|xvk4h
zoiKK-!`MaaC}V&Uw_CQXdxCe<r7V%^mi#jGkyu|qOP~i;&ueVPf2sFh4Z%jLzs`Z8
zubooVr4vn-Ws;`GlkBd~sN71VIVBLd?{{DdaJToj_JW4>4A%l01ohUV{f^G@Np5cU
z9pAJVDj8D)D7Nsl&%U&HEs!=V%yd0~b#jEb5J%txH<ACAdFJZM-Qp!}63Hb(Z6;mB
z+hw3^2RJ|n94%XTs2}5b={{C<bzCitK3p|vd`vPRiJBKRLsiFAibh4kNf`zuCL(ix
zVN<(5c491BZT%uF$$@@7vg-k|ekB(vBPXC(qy-tKbc=>Jf;1C!#iE)#6%#3Z2GU6f
z|0+q=nya@@>@`LhGLOyn65D(I=G+V4DbmyPXEM}l%dSmN{STDv(Gb{M4pw2mY7K$m
zY*`90W3d!zF-OQ#;aS_m2~QMpgDaAu?lj~OgxQN<TeNdLmYm}3Y~GfCMbV{MQl6EI
z<G%D&h{niX4>KWAfDN8#>L3Ce1&8J|iJ$xW+%IgUI4s(S4ROEpHpVB#KZQ#$a1#&L
z%HW`Fo&?nBoCO4#5L92!J!~EWG`tJS%U4W`A4Rcn!#9@J$%7E_G>Zg&f751GX^ll`
z?eltT)}!Q`ft_`l0?_b-6<AiZ4=5jIQhX|mxJZRRSNIOqK2H%q>*G?#B#!LH`=GXe
zI|2&m?|_Xp(J`<uwp-uPr^Z2SKP6`Qo%1i<?x?EMbF<fHfDSWFA}TLQoIi3Y7@~tZ
zpo-+?JDDFncPKo+EUd1qeAZi6dJ(O5Q|pR_X^fqU!^O~}8uTrKs@eYAE9Pd%`{s=t
zAdabGtHGjY>~iiya*Y^~Hd=qUqu(Y0de6JYp}y|@V?doyun1tU8x}&tj-y`}yt@l~
zmRekSted5ub)*9ho1?<pm0G#o<g@WmD#Tyg>-3KsBc1-QIVnZnS|~8m^%<ZCe1;d1
z9;Mqk<9*ud3FSIo)jOaP*xAc>(RP=peBi9m=%&>Y!fG}icv`JV!{PH7V8HP4q_zBB
zfI3>%X`$!FHauFk^Lw=Hp-8TAX+47ZdZ{sl4Pi^v30I(P^QacpVe=zo!A)TSZn{Jc
zO*{tkKqzp8*_AKhwGH0458{*cL*18KNvbYu^<``U3xWyGx&j~Okxq(ZTeIb|@-yw0
zN2>PRXmE|GMq_<f2k?nL@#e;lsWtw1Mz+V<ajD_CaC5eY^Py)v)hxd$nFkD=qSyNq
zm7PS?V6*VIR<USg6cYpx`tI`s+dk039RwiHR$&|2+BPRazkCTu8hSQVd=T?I`PJL!
z%F~)R{l=z<lPRTld5ESazr9s@@BI2BM{$CQVA?)XFUto|08*xV;J5ddVdT!u3nA$n
zW;X>B#(`hV7K(#z9ij3$kF~yQKT(&?ug7yx<>!Q&+L+3h+;_4pR}UsU*FtCj;nVBV
z1JR0p`$pY8PdHb=IL6sql!ytl7?*)x=?ZE^*#LB)br(LE<0T^M1!|BC+5<kHKkPQc
ziS#ymXFz)2G~E2L9H#sGzpO%l=EoC?007gqy%)z>p1CqR^CN;%Ijy}|(w5BKt1W;$
zhq@_N3pcHL+I*5KVbG5CWPEUxX2jiC>haljbN{(1(LjjWm5&-UXM6Z|u%@6s?u2IZ
zCkFCzwf*{OnoeDUypqa*x0W)Q*Jyw|bp$p3>~_%lo0Kr*Yv|uu5mXPIe=hJ#UphT*
z8Pq!<G5)uq_jcTQZNy~ml5O=6Q49Q-m+@NfO?sGLX!wqdlVMD+l#s)JBzXWnuJ~(|
zUt;6`os^@Ad|i0iNc~2o)nW3KtS(p8__gNK5@GY(M6M_<z9?;t*gZ{N2~#4Hu5>_w
zt|z#z{svr^rU#e@C#+@PpYgwa&gx8#PDYh;$VeGgZFo!guaaBvwusYTf!)8N?vD_?
z;>QqtdVs||Bh*aFE)NcG4#<Q6W6~g?%i(7()*Ax^Yuj+xLFx?S3uHO;F_0u*Qc{8x
zqsxv6-;}w-4X2iUM^{Ia^~aNyZ=IJo+734YuG>~C6VX>s!5#+USZwpEA|B0+tX+@U
zQ5r<`awqNe<i`Kx(+~UUoU+N<6a3U$cX?x^M0>vhD&yW;<K%4qZ4twTf(7`duZY;o
z>84pR-!^a8o&KK@;>uEQDY8X+znvUbyXgvE1L4}uhBEK$(*3qyhqbQ?UmMgkrV=jD
zRt@!}a_k8ZtGfZOTq%<pog0j6<MaCoUBkKMzPTr3<gP+|<HCg3tWv%2t5tFAPno2D
z%3Iy$7e|IN#tA9?C{66fIG~yX$QXAy1g1u%zur=6r_Q_p^(8-U0BpfqMLOxB&)gaK
zorV<>wo|WwZ$*)&DLkb+mrB3=#9026KLq_-F;B6CPN_Y23_V9=;J!XE8gI!@`FN3=
zA6`)%AKyKkXh&@D4}^g~izS(>{P!n>i{KKAB}?u=Y_uEd-}GomrG1_D)h#0hd@%Fd
zNqMK>;n}(0@kLp3qj+OQ26pS1*5g&_FO-BJBl-#g4DNYOS7}WCMPqb3GbuIyfLdDe
z1$jB-+b^wDEi{2qRuW;WT){sOlP+TOz9FVd%NElgdICxq^zrf~EgGXbcwnDNwI&>b
zY8fa!cwVQ`Q-tI@Ofta=kD5Ny;*2$)vQ5fNsFH`#TXlJDB^+baY%Txw^^a$lku7b?
zhL8Fm<|{hX7MlC0*N01~hA~}g{s2_oOKO_IDL_?nmqBvPFDdJ#06&>4z=qZan>FsL
z)*Tib36z=xE*v(~e$CMaM7%VXyZHV3w#o_0hAVLsRgCf#ejM+z$2NZpGbSE2lxdy9
ze|o~@!C?I}thPT-<sBkcy?~`ttRlq_A$GS^=P*d&k!(qts<d%=j|}Vf1o(l<UQx|I
zrWm_G9=@{<9v+9<;af!y5MKJ^c%F1rt@*u}b-vEN+&=exWxL<4l2761d!t%6Yi3qg
z8*BF(Ccj(dlz^?zZfmdp2m1dX0>DTH3AyZA)Rh8{7z|%$VV=VUmC#)V9HArkoH72E
zfi{U+@Qjuqs|RL{1MVVB%%hk0rhgJ?z^8QxqW@E&S`n|X_4(|rB#VTk{lC^Kq(71*
z)I{Ll^Ek&Kn~psHeH>OGyA{H%!YO9Sa3*#e`1)nD5@*Xah0kuDA;vd#w%7tEvzl!9
zL|Vr+9(|Vgg_@trp9<e_FsJ)hc7o1<&lRWOZS<N~&s<m_eFd`g;?)7)IO)Q|w_nx2
zd1lGRc*ghqkKp~-!-{w7D=Q5Ne(xV%H2H*%N6^drf(f5e`G1A;5sZjH;f2NJ3Jt)B
zhH3l=XKk{}H<~(cux)@5Q)FjGb%Rbil#G?iP8;TuC!F%z;0rL(^xZ)Xnv0!cMbo)3
zTH|c4IfBmnLhyT=EwQ)DK&SmV4R`W$ADW>4qaUZ>JM1yNrPgLo6_wGzT^)&>;_&xY
zaY^q9&l$$5wt}eh;B4`Mr)p>PVUNZa)Q+!_9NAMBzQ4#rsfp63_yWQnoc|tD#JpqI
z!>8dDlilT`5kLPi&C9O6c!xEUt4}|WL2k0+p#EE0?9+SRx8PV&FT4KFVtL&niWSkj
zru4J;Snm(GUIx<6&Tzl%E-Z<CS_twI*4<~WqzYMJ&Y$)Jay7qT?tSZU^GmDj(s=yp
zIeFNZ1j-F}NO~txYMGUQIieP&dA9fm!DafbMuR!ba$qpW$L1TzlBknmC}<S-_KZ0B
zB^RW8cD?<$3-NdiCN^SX+2>aOcsIaYeWL&z&W!HPPpb`{p8ifF9URxR4*i#qdNE-r
zLdykSQThWR$KgB3pR)uXz9?&j7snlxhmHTrX%<)gp5S#JSHUC%W`Bco(6>MR<Y<i5
z8;b2Or{S<MkslclneV#4Dks*?Y`IM0zd^_3_fx^t^gihNU2;s69F&-fY5=2Jv52VD
z$Y;2N_2f&$Xek0v0phT(nkUhDQ?hOPZKr6rLJ_kdYeI!KJXIr(w|<w+&Nffo?vCt-
z*YA^;smgZ#=o>UGB<D)P_&+R=pFQQvTz&pHr!pB~B>_snR1nu5h81k{o-&>keA9!J
z9`46?-t=>}+Ctm-ytO3m@|GIc(yf*NRpF$TM@*RI9L=J`!Q<J#3^7qfXEVdAT!VvJ
zk4}rjr56agl~hv4#EUJIcOKc=J&9f)&W!$?p04+f#P)9Z?EWt#6sa-pfc>gl296~!
zu8vTf%8Q?@GIM!Sn7GrD^O(YtYsqLr)O|`G@qN#SS+a&dq<6nG)r(Ipl&CIx#`#=Y
zg006ylYXzPvTKLQFCxBP&BUrQRz4^skqKkU$+|0tOzD$?4gz*p9*FVdaI}RsNJ~$V
zf$U?LIR5-;CVZ56&w@L3&in4DDJuCd^<kx(!XV(V6uJg;lIyDZECkPpRUT(p`A_*D
z=&ve5@?J%(5{{}CkI2w>^2JiPK;Po;AZu*;dwK;xypJMJt$i-%6P+G*vJe|dgelZU
z9a&DG2-Ml3L*2T%7k3>Mgq7!tTEotAT1DUwK`&so-*-16HTJnFijq;C?V@DYS?jyX
zp@ubuv%rH_!jHeu^-s^X4QT;VDs{-9{IV6r?zf(H^+o{<MZ@l(TZ4oH`=#ALfeZ!Q
z!tguua~&@DUG*h?tMg&$GtWuH`co5cJ2?NnUe~6t-zekod*1!vt^qjzp@9kNY~LzC
z&%I9MEd$Ep7aSq_M2?Yc+9KBZ%6i_#I}C@(=+c^{kHs~Ls~(o*27gJ`Qn_E=NKHqU
zXl)!#EevJ>?IJDaPnceUHpy1Uhd0$EhF)*C6leP=rdDD(FW3A%psf{pv(H&kPJpn1
zgOcZ0X23nEmkm)C-0z#KlFNZ#qyeB7uQjU+H$!jEQ|KwIT3luw1hQQgSMUGvA4i!T
z9aAArlbWCLM1wq1i^c%4rxCJ;JcTvbnQojFO=6w($oZ(nR@AkykNz6_>QsypZtPc4
zRAwT-B#WSOaH7)^Zxjk|QkSZPsZupDDB6^b9*i9Y<}-R--j`lI7~frFIJn%%7blN0
znb1e<mv=1*`!8tU3gwq2?)NIlNu(UD0(D&$=o`XQ`7VBHVvh^6oeWBE#L|rF)7>T{
zQ%a~tVyd|#t`T1ZtJ&aBRIwO@;J@;;77@Kt3rJ)2$$ffotMee+P&Pg9E(J8^&GNgl
zAN1m*6#syn>}{Z=smsE|{~}}6Z70+9V~d@vGqHh_WP%1_O)_Mu=PPr6JadBg*FbTl
z7DiSK0!I?i)-vMls16?Xd72tXn>#K(R{uXCpWjHxCu2-(5efP5K4tQo->Os|jO-@r
zVH5~=*L&ZC>DAthv9UxbALex%gng@*Ft|eqCCZ)ec&{`%6&^p7%9^1EI9A@tDcSy0
zWuc4N`t&v1QEzg|Yt@x)&LRUhrDlNCwi%EUwVG`3WQe>qi9VnCgb)KJh*D@!UBk`^
zn4UsfIH8Ob3}vlq?s-V{+t%M5Z+E||+yNj`K7Ng4&k;ekr(7R90o&lib%6ytx3gE_
z-R&5N5<Fb{rB)LMQq<jGcm336#M4;>@=ph_UPR(CDA^Ha$35u_BbF^FT#_iKjg5Rr
zQ|s@u$sWZZ83n1A&DQJOj2QH)=Q(G!JPP34T6AS@1nN>GY`<;~DT0J4>QDqt>d5(q
z-I?^GekaxPN1%!$<^LK(M)edtU!vP~vV9Ys40!5KQ`P{$*+OBECXZ)U4m)7$T?y7T
zN#@bMn^UZD?c1m4!#OIUU-@23zYHBEuRUD!dTPxZ2VuwMODk!8e7$q|3JkRfR$WN(
z_8>a8#hfrw(k5X4NrSbr2GpBCqfr7ryO780+_11!JBbq*Nj_{dg!ek9ES7<x0cPp1
zc9z?G2A|{!?z%d*RUW+S23V4vSfPEUqs1efisx+U{y;WyXz%1vBE1qh8>H{Smmz4C
zIhvFA<<N;z-I}y7H*D*rUZg<T_-YE6lMIy&5_||J>#Wnc`EI<>qg~UJiVudqA!PQW
zKS~J6<gN1nLyMGB_iGcy@8-wqhkE$TnQaYFb;L4bpUZa(Hk=Xyr@7#vW_9B%uG5g+
zx!@BK@pSiT+6+$4OJ_(A<@%(nEm{EXGu^=3N-VlR2Z-`g#4#mc_V@Oj^7sPkerxTT
zE&egE0(>Wy>$uu^TvlG?A~ymU(KsONeP_<j$iLH@f(Mre?;o8^ss^5t&dw91UDTWv
z96*ES8J6-W`!*VDJOCv@VN)+GbB&@wjmsY4i#DU|%dF7O{pzKQwowRS4rb1EeiR3?
z=tXaR9SSI8CubC@PhV(H6H&!4>nxn=Co{?v_Pf&)2dA(O@%(Dd=`!CQ7!)IKto|{>
zqC{aPg5P7J!m65$TNQURPHMl7Nwr`376uB0ir=ez+w3LDU3jEWitIT*Xgi5Gw{12p
zN?<BK6?$d=2@OLkAYVkG*=yr!2kvju;;S-fT5ZGOPVKD?`Me3=`+r?=hN0r|q7o!@
z`to*Kw9y;Xv8Nj7gjlG|;b3!B!j5>&sgV!Qa2;Ienep!$3lbvIbnt10K&A$k4-bpN
z^scTH&t;Qj1qZe)2MP@dG=8XdvEt^hqlJvKqb&vp9s!amCurp9IZ#D2U4(V^_a9Qv
zO&*A1WqGAr8K-}G`78`db&-i)L2aU(yFZ)bx$K_Gz^Mf?(xrJ7+z6DNiynUNcs(k3
z%PN0iZe0dAg!A%it03r|oK(J<g(-a2G@W$w23|j99eP4XLR-{szVq4ZX<9|0#c6vA
zbk5QC0b_dq`Rpf;2jWxC3S%o?zwZED{9E+28%6#0-)kuu&@?Gp5nE=G(_<8O3K~<U
zN#|NH<}%=IU{|rfuF$!TueC~5yBT58is+d|1DtDl_Yo^jOCC%fm-a{Y4#58E8plDd
z-#ie{bY)H@g-GM=2@W}RJ+qAh{}2Uzdp&CWWnV=%+Z3=&dw~4UpzvSa?h_129XdTM
zOWJwAyvX!#q;7@)7W4G)y_K9_^ktF+4?3Im;>Y73gpZvKzGC+y_3^OP1%%bq8m01y
zI~n>Mos-v>FiN(1tOxwb;ZQ8fvYR;SvuQm<x8PWA;Ez_HyVZN=KsJR}5>f+v?B}nJ
zHnWXR>y>?o;!u7l3$Ai%9bU-)xVqvya_kYr7#mi+%lQ!f6du9LF53V2N{i=ZVB#3i
zJeyH3{>6D6Oguy)0l}*1rT_x=9rmY7#fr{;hFY(XAWQflZtLN7_nUBO(#x*4V?Cc&
z@l4O)3-83*530@~AN?jR@ZocG5+c<mXGe}8{bO-KJP>xC_~GHNrt^sQv&7iBlaX4P
z{L><m=>A{nJQm&q6GPbtCWifgqKXDNA>SPw9miCRIZkKv9o-qToq~NW*UKm6Svi+5
z*AVk#0W%K(1L0y@+3>l=j0Bj3BfCi&PAyeFjM<zlHJhL24jq8l|G*=hR)1I@&0MUX
zccX7SC96|vetx6cd3Y85Ra+l%S>QX67@IWBZ4Unx1nWB$qu^Ct-R@?;tO*a%7zWy#
z65D41W}O|22>Sb$+tP~HyDLDCP32$5Y{s)DQ1L9AHRu>~Qe~HH18y#k2YzrlF(v6u
zV|yzqwtDvOs;k+o!cO#|MD`H};@~-N8LBt81iVNS>I*|${s3y9JtJTHgwt2I3GMx_
zDc_H0U38qZa#CGv{}QH4PDlFA8aB{x6$~5&V#BBc<RPPEMzt%)kz7&g)s2zDW3ewE
zRxkIiu$KScU~c{d12Tc~`hyP%sY_lLDh~6)D>gYqUyC-fNV%f`EplrKbG%h|q;-k7
z3J&<DmhHLu?Ya4z4n{x9`Z7Q<rnUh<pNP{wr#^OvQFgJ7?67_)=@K$8A=_^Q9aN<E
z6kFsp7f3oegppWK@<F&st2;f5Q**$+Y$rHU2F-)<F9YsV5_4X!e699<tz_#9F_?T8
z$JYBFdSz-QV6@hyTtX`PgKD2cJj`eZWPYYq@SY_<FaWt(u~zx|;Zg3v3eQO;Ps>Xa
z2=fWi4yWS`PXO5EMqO>cUdPz#P!3~xwzjWWcpiS|`AY%QCXth8;vhr&&!mQS7<v(e
zGvF-h82EpmDt!8aG={vlPG?t$E9hqVQ~-9KCv987#4!Fo$zMQvK#B8@(y2Qrk%Cd&
zUEi(-jKO^dcB97{MX^IhTip7N@(kzTNK%L9|2zT&ZpQ#r_^K@+;~X1TA>05VP!DK*
zjLt=I^ONjXv^xQgzSZDoUj#0xdVo#p@XGvcq_Qp;dJb&8>&3i%dW$8SAJ=5ED_}K~
zgD+Xxt4D}#uJJKB<3!;ly)d;ur9G+28eyYfw*d74%V3mqM_i!ZjiQi!y)mVoPl2_g
z<2BY*zcnY@gNsu6jf~?cWyng1U+36o*%P%Nhr2pGk>jg}pD27ggK_jtA5eCrB#T?B
zCLW-%D4lehfvXbln3OQaepWoIYj|j9-WleWN(0Js0Dbe6p5k5NyTEKECGFoNPaqjs
zA~@!O(|ILvcIwL{Yrw$ow1_o7!wwP`eklQpdlYWwQXsv0P>DZ|<g|`FLEyG@L~}+C
zV*R1_w@w^xQO{2ktqV2t_>s@y!4Yp6GbXaBoTFHUTUt;~Pr-U*EBzPnyP?I~YZG;v
z=xzhC2XQU}S|e2-)-fS7oq+A2?Es2MNlxl`462P&b^|FTFEb32peepWB5=$>@1`L0
zVW0C<fA4`3gqC!#zGQVi9CR*yWoPY&$K6kLlJjRb+aVkkX<0<acW4x+qj_h={S7Qn
z+<mx~FOx)?QgYNuMSbeU?rciWsSr0|!snQBt-{Bz0Lcz>5!_Wjf1cIvb_+H{G)a2l
zKW}*?^_@t4e@{Mn!2}m0l5KzEf_+xrHs^EGHj#cSjr6y#Ig#v-x$jp}SS^41BE}9+
zs_wSkEh~jhqk}A|L61>6#l-c{Q>6C4`n09Y5LZc(ugpLB@{Y$=h#(r(G=X50G^Eog
zVPv%wHVIIzL@60>M(NTc6A53Wi4>?|(VC}UY==DCb4<!F44@bdw`}~-l3|hl$qy&K
zm5Be#uVQ@JmiO)4tCFY`Te<JEDeFJId1hf(qjYlp-fjA;pfL~WY-7E}P8mg5zxV&+
z@?V($af_E%^>NPUXH{Ps8h^Mq^>IIcPxYYbDu%gnU+4Ll#z@t-LJcB9+^8oyBk_vw
zQ?B*(?kXD}VT(I`^BEheFMNaI(5^m2<d$I3%yH}fFj;-La=rNXOZ{`6DNbf5P&nt1
zq{4?TIn1YO!mC%o^(!!6Dan9)K4W^7wx_*@Q2(YCFsxDh+ORT=65h$ITjmEk)$E*z
zeeaH&pl!Zz(L}{6F;Ojcq+Qz$mCDa4k%usTp+$M@Yod(EyuhJ-F@~|=GmTrbg-$%|
zTYU@WLUmqM(ol`+j<w*6<#ekLJ=%g*R~~g9gvFSTk|C%Ogq8HQwntQAcA>qqW8NGF
zO3rr&9X~Gi(M=_9-t3c!wcpH{^lt(pyHqx2M9%$XF_6h&^0cb`uAXgAKpGH1wQJt6
zj78OS!~v|3{X-VGanzqSt7e6Lw_-cp)EK(BZ1vFj5k*M=PaeVX&;moq$C2-aOQ6)d
zI;Om5UBXCb>d}#yQlc*t;fOdqmf7D2+;JCgD)ZCD*VErnNp~Pmu0=$_VeJUJP!N1l
ziIXS><vcb|IA_sWU+K2a{E2YyUnW0*o9uGGJFHXHY)^6iGY>4oGfnxYRUJ9EzpQ>q
zMG={9cjvP(ya?ebIw`i8HAc|T&g+TYsX*rwlM2({1#a6+3kFSI_wkG+zXjLTwrQZL
z^*h$&<5gI?Tdp4fU3knoMCGyroY$I7g$3jk1HE58>9L4RR0Ymtu3On^?Cqmm8=5IR
zNLZ2@XT#lV4L8u>01qJom~VO%-5TWv#H6Md8n3!0lYkq0xAJT{L@CF4PRYE+%DSFB
zSvw!0`bjst+F0R1)J#E6JzGpExp;F&wPVblO}i<leb%$5m1~QN$&jy-z4CZF)!2i~
zvE4baXi;l9rx;eWL-o19idRBVaH={ptFmcj;bHrBhi7(`NZ*d;qy+n*)=yBETM}5%
zvljtx(tCgMr<Nu<YE%MFCge*;kI9yLTA~;sOfQhGQ&$;|Ehy!3W4Hm+34GqP#IO5G
zlP&eM#SQ=Nh(tPl(NX+80)>xaR2kB@H&?R4r~7*ks*4J)_0mI4z74o6pel+eJI!()
zcylbJ9?fsX9(uS7G%yb*2XB8EOQaXr+%fLB_WcnYoCSo$XxMtJ%k*GSRJkJs{{Pok
z&?GX_^80IZ<aL=F{siLYR!O^$CM7Q~8mxw<@6Lds9)T9xclx!J8ZJwVIXi58R+dX*
zxo-irqoeAf9+^1#M$aKLs{uk>3Hn(4z8}UvG;JYc^ZgX(3seH;7omwbGFMOc>7F^y
zrMAGBvj4i?50NqWK4b<PQJ}_A95}R}4MawM>wl%({q2CoqE+&oigsIb^P||U#dYr}
zXR7TOc{q}iaZMTvellfHc1kGK!?z8k$&(Z#o$1Z?!At7X1vIPju^K}AnlE4RN`#}E
zemNXbxsociIkSb*A5qaf8wgWGBl$!>pym^Gu>19d?<YtI-5$2BBIVe5`>*V<gjr-9
z@CJ){|1e>}93#GawyaNo9)_&)YelwBif3gSebT|e#2S4kk$|Tp>(F-8D2B>UH{OQE
z=5UQ+>>~cvv+|k*Zg;uq<s$hT)3D`o{_%>%i$$;fexK(v4=whwQrnnQT>kYeSJz+B
zTgh}iz!Hd^StoJe@K4oirg(5X4e3pNTc}fH(~mi>n=cWv<9}E?W-lyUAY{|Tf}3hs
zPGue#@L-`JCwTAFp;>VHf;Ch4_(r`{Y%ze%HPofdnK8$$w(UMF)fVE!mUa`BGI%lg
zgr))dn`2k`FFj%BpCrrLKehB-(#$d6<uzGz@=podt~{9hqogF!wMz_Qwv%?oI7KJE
z5_JM8A0u7UB=0r7&-zn=$WC@_r{9@^<-0zw_j<38;RJU8Sd+;k_TI&R_VD0ij#Dek
z1W*9^orbIlxGY~hA5ivJmFgqK&u%?8PNl$CoqyDJ(DoM|{Ol;W>S@bdTTz#+Q-zQt
zGJd8pVr&mv6|K6x_GlE>CaJ<D0`>TIq@Wo&v9oY8X@;xZUA%WMfMb>O-(}d>^}Ad7
z=XtP!1WiJoHoI|G)zwXzMr<f91Kx1f6DR3=)aT4RegcgEfbm&RS$eXct=B0J*Zf^a
zz=t6(9F(%5ZP>6W8~(m(#N2W^g!-M)hcMa%?9D1EE213+1wDn61IJ64bIZ_I>hcJR
zf#-nFPK#W#hRVFnb?hOV(&4N47DIEB8F3C2XXUG9qZ&seS%k=!gdy`^4bAIszqN!>
zv4~<_ptt<5NIVrJo(sRMI|DwNzj!(ef*Ka~X8zKl0zzkHiPZRU5;8#WZC54jmin8x
zmrBpj{HgPjfV!d#j5s!(?G7{<?tALGG5p|OqU~wq0h`3vFLBorK#Mi`M`@2<mARoJ
zKlS%21Y67rX>Iq_XHZ6Aw1c*vD_#TiZ3B~ULbqak9AtRqjk^vhYgyq)VD70UaZtm?
zS_n4Ys2;zT<|N>#T4p%^>$((Rktq)9B;&Kwk+lq!5x@C~y~N+?9C?`k-gR>z#uZ?)
z-RHEX5RQ+Vzv&`aP}W;jHP-M3{HbTl?|;U1!7QwQ_E;9j(~B}b8YSH2sz-hjdc!pg
zaeGzN)YNn^sW7q5V5C2Z`|N|QN(-kVtoh+QH}7yFzhuU6Rzt@@;~+6-og3Zorr`hp
z{FZH_LIS7xN2v@11`2CV<xN<6v6y@Kk~BO4dX~FItW%x*Cl223i|2Tl#?7*Qds(z`
z7$Oh3rzst-Y|oQ=ng=ToAHH*8b-SvEJWPGKvOF40F&qB>+WYQkIQy<^U9^yb=n)Z}
zAS5~wDSC(=gNW$8*CB#LO@bk6qDCFPGfEIdH;66_VT?W))4nVBbHDHVJntXhTHp8g
zZ7s8gWmv8`=Q`)N&)Iw5Pz1ZgJ@63xHnSReCpZf2K6{0^qkuHwH(2uGZ@{A+J7eJ<
z2%-I9LsRfaRn8CRKDR8Wt8_Pi+ca9(Fd#tzfIk~7?6ZCO8?$X)s$Vje-!uC4M~h%g
z<u{tiPL&w0T+V(P%0`}LupckMNp4N^L|e7TXpYFo)#nswOOjca_GIEK9>_jm?N&0=
z@9B>mCFcfi@Y3jKwHI54qV|*~Zk*wF(kz2Y6nt@K^u$<*kK)>~Hc=t?M6_CUj=NNB
z?lq?4#qchHdRK4lm~ht%8|N#VrDFQ}3vi|p)U#f8+}lowEUrAM0Cr@bT*^{PLj7FD
z*F3dzdy*$4<IRU94sOG+zjJB}@((1rQHRP(Wqte@GV~w8QP-+2s*8ga)sd8Zrc$w7
z5QmG#echgSO84F(X$b4>5)|Jtdn2bE`V32(2Op9e#;tBRpIA=Xu=N0T#e6r*99nx4
zU%<C%s-5@|Z%$#(m6&GHm~_i-XkPdBV+u&Jz`MLgh>bm?ZS^PVS5r?*zv>Tu%(&*<
z#=q7zCX|0H;#J8bYWKGALF8|S`P4(sPHNq2wG^x`*;^!{DqtXj>4u^pR9&1YtH4O4
z!M8!^Y7DA=#uE7v!Gd5KZy4qB2p1c=CQv>G1`5vL#~fJ7l!Jt(#a38`Et{g8)#G25
zkAhFl8tGzBzf>xgZSejttQZhiRE?4V(EurWQ=c-Tuy+0d%P#?!Nweq7S4&p3hiU>s
zYB?a@cMjg^Ol2tc(iuHSe?wy>y7=b)>x0|hUOW|GmgOBxE37GI|4ZR0*4R(afcayr
zwH~m6WaAQV$H<8S<U@D416Cr|JG-#QS<ovVRFl+Gecsc)?9F&aP7yEIYRr1I<M{19
zpW9B?p9ND7P6+4iEe3lr@4=VAmgr=aCU^=E&9Q&Wbk9iZ9zgr$f{1R5vv#~qn1@<n
zWL0S-v`>oqmwwI7J!%=bJgr3Kev{VlP{YvyoWoq9Z+rtsPg>*pZ3gw!lg*teEcz((
zE{)RTj<=S-#8RSsKHc~5J;O>oCkCXfHWzT<-qN8OxpFo9#F+s?q#cP)56|DZ=9EH*
zzkBYu>;d2Y7zQF6t4)91xS+4R<R*<lOS|}$JuycS<Mvbpl69|LFHmRDDnm@^gOjTF
z;X3%eeiee}DRA4lh3C14NFc8g9bO->e^QPnuFPjA8>cR?q^CcJTB#__JWkr&12icV
zOt;FviCJ|lRMbzK%TtXZ$?d)`@9n{sBeY~L+J;Myc3ytF6{SF!I=DD=!8c6{Lffqt
zr;dG;u<h{n(8rJdI`en@3>>rc7;@SC4WRa8oyEaBB%pGvPnZ~a{DP&UFJ@g+d^F?j
zt$mf1!1FgWej}ko1w#m>NZn)BLse4O<6>n5_m#4S0lr5!R*7RV%Ikp498cLt>&K&F
zfQUyRu%TaFNifN&*=qld-P9&Zpz|Xy#aN*{*{6uhq1vNQ&nBqOa~}$+pv3Xh<P*8%
zS&xeF7)nc1F0xHO%s$n$Qfp6PO}vLLq8+NnQZmFPaW%QByvFho#0%&yeD##jKFu!8
zo&a$nMf4QwEi%L!kYgH<^HnNiL3Hu5>9HDwQ!9T7Plveg(8(rMDM!Q!Elxo6e`E5#
zQ6(M(Ivt}AfJ;7(0yu}C<TcGW>H3Y_S^@vsCd{E{IQZGtne%R2GqA#YI`UFMZ_dv+
z@3Z|3IKRBwUieU<t2D5Q7&FjMcTe~K-YWuX@2<ku0}oX1=Y8b*G5eOf7B*XY1=fe$
zDQ7%#{Ps3ReCzGl%_LSX+qJkV!J#Xqhm=2WQAA618{1Y82wd5pM2BZ3=`Ox-98kFK
z<PIfjSzm1&mQ@EhDM}*Bs?q@fC+`D?1Pd#zCDU}YVU%LJ=?;ySd|$tLf-mEG<l!h^
zlA>^j$3eNzAKuy@z~#A1T9_yulf}_}D(#!x0D5k@Q6ZytPrx$_f1_apq5KrI^f3`3
z8bVoE>LD7aX~a8{7+eF}jt}++X{B;zn%y$H5%YE}g6{MUDr))cTyHXZcESRHMTVm#
zi3DJ^nohjAtbhm)rqPn?&bMsu{l$tW4zk42#acJXSUhOuU{RA{n6S$#jpO&fo#)fd
zCxx?<dHjGG&A9XYeMupk!m1gM3YZ=J<Bd`NW-ge!j^tq`wT7LbX#d525<2>8!clbz
zIq&0;W#4#GI@apoqFZl6WT^rkgg9c0buu?@H(Mkig>1tgW+#_-r+xT6*y2)E#MYF`
zL4AF=&?`GZ#r5@>$5S?9rC1067H<_}6>BctEql{F^JlyCwh$IrT(Rh0Z|n^{sl}Gb
zT#RS@4Yd5i0)T3&+1Y(Cg?)Q88n)H3Fn*cW()HIzf$}%%TO3?4VQ<X*^2Zy=K7c0x
zUYfT+jhJ)__BDU_i&@VOm?h4HL`j%R%R3*wlzXx4@l$rY8IT9*qk`#w6bpgs+0m_*
z`|K%%UhA2C`t#aFmXa^!^Vx3a$6J4B0z=OZ#*=o}fxY>DDVx-tPWh?&8IZ^ma%n?z
zWyDp}Q;@It(8RXu4*G~y=;}<7oM^scScsqEwO{P@!wmyxvU9&U%Uing#71)ARR$Gp
zT?Q_>%AuuBnpJJ)4mymGHM;(*KO4Seu-px*GnH}wjIKFb5&01KZR<Zl3DV2&pSI8+
zPBvb@{IBLsP-`V&#E-d>f>evi^VTPyLz(Kh>s@E(CT|)qcNpC4`(9ilAf_7T<DA^Z
zyYQeL6e4`^YF+(}{9#ix|LvFKF6K3ur{yh|(WjyS1K*nC$Vl{-Cbml|+RuK)c1P39
zM3BW)X5-$3vC-b7Y<X)Xh>X|2KDk{WCm;_yo>Zwlm*l1U;hmfI1LL)cC$<B2tQp9i
z9UoU$SNK&!BlE0eA2aHqIl(XPOE2AngEIkLn@_><Y^3>huq?ZzU(Qe7<TBJ~e)RW<
z?A`wE{KEI<b41T0V6@C=2lE5;xkh%;FrsBzox=xyp*Canr=C$mhAY2<LkBK5YTtrf
z5kM`wOb#C}br*&o-hNUAtRL||`OO=#^x!%Win7o#Wy@XqW2GjwG2)BJ$DV2>LfR}b
z_f&+d9%RfM@?MV@@<GNu{9+!B7?WQCFdA;#)w0gm2Yfkb*^8!nBrkp`QOY>Ks_rci
zs-c)Fc#^CxyqLJnRJiAsQ#Jg|v?{rH->72Z^ql}9t@ZGc$;&x#tmr5`(h%PClKwrY
z$}RCYjjz?4{<`tZt#`jvcU*2%hlz*<+J;wN?eO6YFZ_UKkh>4j3;P;xM^n4+o3W6&
zbp{iOk`R>8W}>t5AoUdj0<~Vw`-bRxpZkT8&_4tpK-jpko}TcBlljPcxoo(bb=P7c
zis08+u?IHF)w;n%9TUtv0xK*VJwI>aNy9!Ps}Ftz8o0Xa8T|09?js3H-~D0u)bbTT
zA8Su_I`N@sQar(<9eq9!9M43maQiAFBv!O8Je@ld&KPZ}wCqW6@^uV#q)E8eQ&5P&
zRw+uSgiRXD*KgBkWILsX{80yfl@jCW3JPBI-39Qw@6fi)`{Y9&?w>Or#`jLW0Mw0}
z3?P2-mL&ukT|=nKQ`LBOIM+;nU#80rOGnFX`2`k#^k}8mTB6)7TI5_X9zD}4@jb9V
z^vVLD%~h-w6^*az#bk)uZu+E5wjb7(M?qQeUsqRSv@FaS`Uil*_HP=&fUYIrgE4HM
z>k16;zGnQTdb+iy`RUmdG{`8{`O0kqb}37W7abA#&EPP0T?$lrsard_?k)9%P5rM`
zayt{sfmsF~ni+lI+!BId;e?lZsa3_CL?eE&^QI^P*wNz8WjO`D@cUeGRu((53m!dr
z8#b8Efh&9wsRc&P28+j0FUCpNDOSpU0NWKNFFd+k4J*255^9bozr=7m6=J2%-(CJ$
zJF2v4jV`%dY{dkA*l``wbK#;!`|;tB8{nDEpvJJA5k7TGS{+LS2cL2~o0x5Q;!qJR
zqSre0#iO-^G!ZvuI@2R4MjXG0z4xTJOv+F0mHU*BRc$w(R%7gayl3nUwsaN!7&e`h
z$IUFS&0k+n%e!z*ZK&7anxxGQvAXQd)3&|V={f99`{>VN=WK5$m<S5w3>|8nKHq7d
z#^(4dVMCFn*SV&<)3V3LR^C&FEG{1?wrTY<pM1W{Ky5zcxX69HvFB4y)={}Of`um9
z-k#IUWQGY)&#F5pQG9hy2Xy;=zxe7MV0b*gU9tfdhkg0=<S)4FlZjB<&&!#<Dvzm4
z#TML}bZwM+I`T}pa%h?y2H%cpKNvoGm@_`l8Ei%2zg5w0BFR@-E>?${J%OQKI;@s`
z!I>aPC}C&%0a5$Wl+~rJt=sSS=Hb4ZBT5tIn7ZI)3I>f%6U1!)bE>ZeU#R^@RfxDM
z2zyKRt6-5EIVtUJhle7okvur-#^LE;LU+`R8B<O@iG|!r%i#nhTtdqVG1|pT8R{=i
z{`;lZioY@u6tQd$n=7<cj`={=0obBbA@hEipOr3LkXL$dti=($Q~l_zn8(fQ9F~E-
zscOr?R1{3tGCOhe26R?_<3WioGe(U#6K6qUeJ&`lPtkw?6)`$}eRUEw6WEA@opIzP
z;Ux&g@7!0*Y)RhMpLy>oAz8m>9(l7t5V_VW0f9<@%7uDOuSYh5D@}0kd!*|#lg4nl
zf$iBL#_9VGmHlJ(ckMaNX-v#s_0i+Sps&Al6dx#H$6m0y@RN`T$v%0Y_Dr3T0T~1h
zYC}!_VUe8uex$O!s3O8eG@t**mn4d6#crO|VMDDTy%Q0$UlY&Mad<}+Zr`|t642Ms
zc61<AzDP^yUaNLtWsUZy7Z>PQL@&g#N;K$+FSfd@{+V?l+5xoD{!S0z4Si*bd0k5a
z-({S%ERryln%`l9iZnkqL^ho-t?yY`R#Z3cY3=z}8EcuWcLoQ;VDLadzcbXAJNsln
zk%CE;g!EqzvOc6_9kR!I@7ErYM0hgmcc#I;11R0Hs$wC}6n0=6!;__~m5QD4mV>ko
zkk&7>v*x2nlj$>PNc%<pky;eG%!;S(OZ2fZUoPTG{ycG7T}$!ZyO4Lx!A@OWBmGH>
z!Y7G}UhNsL!GL;W4{r?T<)zTm#q^h&{u@6&tpWr0oq(1ZbFIs1mTvt|s}l9>T>B%R
zNXNyrZ?)^LHLWEv(*9ycRRy89tA;9D-!OyCS2J(Mdi1f`2l%V-71mCN(X{5)Uqt_q
z9xX@`DDnW@D`Nft@CCy&&VgoI9LeK3n?LGfs7@=F)yWGt7=tA+2Ip;Satvmt9H+}O
z7#k?}ab;Z$E$!OWLx5-sGrFz(r68I?15qq?P>-@38ROi8IL;2r{18kMyCrLI*ATyZ
z_%%P#>nLF3M1O&-gWFW<peqlbLyhDcOX=h&(dthLEm-{ad2c)N2`swq;~>S>#!4@_
zPH_}_FZFlHGp#SrJ;#C>=|+D(o4b`*H)49C!e)Nvbb}~1NV?Z<(fQpk&C<V~+DzhI
zncY|A_d|?B$DPI%5js<!S%jtxN~nlIvq&0as-`vA-ZeCD34Im7$i%82q(5Bnsj~~0
zah?vKqa78sc(n|biaNM}Yo9i^G#t9beu&|5v?F0uO)Qq&n2}l$$xcoPq6~F@)GRaH
zHu>Suobjw>tc6LBwdlmB5&u@zZow_Gg#(}GOaAG;N@%?<snt^NMg?)zu~_=TpVceV
zI)|r?1)q{g$nu#?J&;ZoBA|$thF|&{u-+`6)l}rww3dnq1!9sBDP;2Zk~PgYulsF1
zMOFbA*L>|@A8f9+ScqH_tQ;Jt0vutU-8-|!TS7-~F+ff#TYW}Gjf(c&_$LYV8dpeV
zG5UirlOZnIW>}k50&!$pfg6vIm@IE10&+x{<*&56+>ZK|1Is)?4gsQtABI%U=MPg&
zKwGVaRUc07P8?3uJg*%I;?<K0)o%3@VuS=&+G}o3O}uDE!QPR418_>#RbX-|rYD)b
z2|)PV9fzsV`0baK08sfET^9MVdGz&aOmuW<ZD<SnC|1GcuocQ9BK%l_^-(aM#C^6)
ziu~^+0@5iW;aPY%G~qaMv+k0rwF{ZMSz4l}L+C=rwwds|J~?wOM$eg&=o#l=!*-3*
zGjzr>X|UnVVykV(?0b9!_Itxel$s{$)e$fbN1_FT*a7<1A-~C0qa@#vtmY*Rby$BP
zb!JfK34qwi2%45=6ipa3dJE)+c=f+zZSdiGX)(76kOiOjXZwHp$-Q3)KX+YJpMU<!
z@<z#_Lg>&P5f`bdCryW?BJBq5s+Kt4YsXb)j8WkINS-=znVeiDT&>4fBc{7@>w6D<
zd3=p-fQa?To?fe(aH!Al?^Y=X=1+^}r{Beb&#Gq!+s(%Z`z!7oF`LbJVhQsg+^jIr
zHE!|x%x~~@WCLz0O~4jX96Iq@9ZLsy`dRQJPgU*fx4XzhVon4lPJBe+vu?u^?VWox
zCzoqO5v$X}Z-?O_3}G<+X6w!k?3I;mONGk(1iZ=doS@s14S%NHkkV&G*J)RxG^FkQ
zbb8a%Y>a!b+-F6Rhzg|n#qX!C{ZrI0i5YF~Y5nlfQd_na3cB+S+het5O(h5^%UlY$
z2ZxpI9V9U^V_S(8sjS+Ib%|yaCeI4o_@uUoy{i6_o{_k->M{EzhPgH~T~GyJXgAB$
zUCo~w42K>bj`#d<vm<ex{5X@L4)qG3c6VGA0pSU@(I8T*y9@ii&Z|Q$KJh%M9}lb#
zoYL%qF2E*76jG71)nBodX`m3Hox0><dmuU_>3+aaT(AbOk>|}<QLiKo2xaw05CreH
z=7Wz3{_{=*p3{|8kOsl3XkwJWnk#PxFhJ{q=xW=1Tq^8@uRT$#qBVY6HnSyb9^Z<$
zRB4Ob5Mps~?&@Z!pR;7poi45%ZK~uEOkK_!F}%AJV0({<JF;MLA`<rxl41ZmA~6?W
zN!pKta6SWz9tAXGR)O7)CuOsc5}bDzB`bxmq~el#aR|6v43>uIaxMq`c2!~i`E6{_
zclE?`yQU^cH<0Mf;Y7L%uu<rVc4%2Rwl779{Dezx6#2SHCv*CMFBE_Yx`Etd>+r+R
zbkE70ujO$j%59{3082#>X<<VR|J2k#9Z0stpiRRl;52Ysk#Nsa-uUd(ga4{TOsdxD
zB&1e7kJBSa$S(M>p2gMg8$ByGwL_%{**!|EhF1u&tQaZJS=vjtBP6t>qFv1p$urd@
z?1Cm2`mfsdzAsXG@FNWy_X?xl^4v9*1zk-Xm@c$^xPVvZp)Ce)NF$nygzkEe-BaI#
zlfj*#9qeJ)1#g>A^{Um~EsM#H2Z05y%yoHg?^+n((@J%T3;ks^4fB#)HqaKU<D@y`
zi2zYjt3Qm3_A)A^n=U+425G`R@YtED62s8yjH5rSv!3S79F83Pu)FyQlzGao6VT+G
zJSlJjLun^)akBwjQ>P@#R3Vnnxoc6M0ovhGRCxV<Y77V_SYf=gfzB+)xhORVO|uUX
z>ixF@ph`CfW~Kt_c0Mj%<E!GmP|k^~;wnPmb3UYDz$3%9IR&lh8|!{8IM<MBjiA0F
zUI^tO#4}V$d1NNHIPWfmC?wXNB)2sCSBcq9A&$(2o&v)4#c|(abTP0&1NJ*%@Ilh;
zqJ2Ohs=-UuWqg8lPU{Zs7ZbV9LCYh^{^^3<;emMUJ?ap|+BoSQ1FEy=RMHbJ5Y?Hz
z2sv(UkOYoLUhAu+_t+|&)`6g;!9?VsRvD>PAT=MrMu}KH2zWT+?A>XCK4Vs_FqUp*
z5Kgi@)&SnqF!y<2qK?_f>@l+koB-ueKu2(Q!Dmt@Ow32i;>fpxezs>BV&J9uAV0FA
zJ{_RW)R|lsRbO#8x9+Ma!=1myR36UiV%*yVdbI>V6*)2US_?No&V$86S>`T1lS6@F
z&oiLdHW2qCFVyJA{nWwW2hGG*-AqnWs}}c7wf?N5qovbaL8JA}6My_9@QH-vBI%Eq
zhKo(Zl%W8RHT&?W<uEaX6YjaNY`0aLsFkr`<-1HfHNI@;v?xRi(SjSTronIsk2%NU
zC}459&9ur1F3cE)#citafX;ig4ir)$gZ_J=CkH{{r|o;8y|QaEA5>b;<oc0n==)A^
zDMl1RZFtngoIp@a+m4!psmLXO5M6D$mTMup@MFJGy9J*-%vl+7ee2}N6)eG#s(0rr
zKNj$PkuDGFZz-zhRd6!zdP>Wrc(wxAN4Xen=7*O^m^a&QO%(Q?2ey_NbDO@ZXdBH@
zvUozzG3YHa`gve4to*2or2_|YvXtlcP_6Lg?$LeEcHw$BkZgZF;2172?W6DR^Lo)<
zxPCi~%zaVCfzi=WBL==OSHNIrR9<TFg5RDq*i}=xtt|tV1Q!X4mirvjEL-i?IdPPs
zYjiGbM(0&CeiiyGky&mI4)ivGEk2?m(b;O7#fESW8`=lSTQVavPq6V9C3>ejT;jwU
z>cnZ%qCQF6`<dPyX&HGPC+=AGIftIq%A!=Eco2iM+2acJ*-F<lUA#V$d3X6^DbA_3
zyAJaz(~NQz#I1VD5!{K*!I#R*&ugaQ^)egGOwYd??EMldUYwd0xm4j>vrRyrYn!+F
z#M_UR`6dC3-!2~c-xh*t<QMBcm-d6<2UA0Xqh6lLsks`bRF3|>jr&HZiKm<3vr92C
z{92F{f&Qyx9L=4BQ-%4`0XfW>OTzBegRr40cRP@zvthH^o{z~B=VGZh8-i9m`f}IB
z5>9!7(|ClP)v#lm1OfF<lVaSAp4bfxo2Ebx>|sOQWOK2vyclN*r+&Np=BR6E>!rgU
zvn1Afhnij%i>vm9M=HB#Yv<e3a(l`U(hW1ejFrPDMa;6bQk|e?`hBETj6aIktN3tc
z%F(a~05+w>?ZTY^3eJ(q-OPGl<W5D%D_**D?k`bLu7+jaa{U$mb3#JO%#wrHNRE3&
zfx}}kZ~SbxiOh-!whS)GWx<`;XuK45rlRu0eZJ3?$HhG3)T(?TuznxV7K~;LSt0Lr
z(!O3*P4g{WLcw_mTyEfFN<f0a{q&`ug~SDN$GSs$d-(^6@SFBHE#(d3d9d=uxks8^
zvS{mzV%K^I0$Oxd@NfwgW(Ala+vvSqx1bp*&C6i~IE?s`{pU=SSdqI8ct+*QtC6gq
zHLhXasQQ!0FGGVZl%edt2(CaLc(uP)1<XW5V0wKy6dvjvggHd8{P5Nf0PM4NvPJ64
zrL4t*c58+Jz^VmdLz`&w*|gZEg)$I-Sl!>IIOZ5Qc|@y)CHP};3Ry4Y%YV&JdZlBW
zen=ZtZlm!7NVrTTL_%qJB<5$!*Q6ux4`pcNMftDyp4TswPVXL#g%N1CjRW;g+3{8G
z^>EDkmwR<}#1DYGs8VAS+SFouw13RG4S#XoB2*hg-u7Y8rhVcC+uynLC!sjvfNI7u
zBrqI!BV1LC4E!`LF^Hs0UKW3Peld7l{fj0dSJ=JBJk!LiQWMk;_uDRg8UfOLY{9~8
zt|6cCiOQ;Ly5275bpm#gIPJ~-y+mi$lv$&hS%B+}?y{IX*9Y}p${PdB>&ie2hD&{@
z-EE@y&0Iu3jfw~J{n4*zOOQ9EtuHYdC|^7c+3AhD<M#f#m=q!^bD4fK*B@kB>z81Y
zs)=~!PIS!Fp;I|uP3i{Jj<b_pd=|{#IkeW0qiK(Wr(wORPDpMHj+&af2T&vFxD_i-
zF~N4aJ-F3^6b;z_i2KZ2#NF!F1id7o^d>B{5Kp$WM}zivzYSh$^Jr{Q8!wsl__jJ-
z$VX~*Dbj>OdnKVkoNKbsK`TXJUL7=Qx>F(cHc%V0FTD(=Fxybk<hAqwF;zK&IW~d0
z^sBFnW9h{Hy42|CFrbQ&b;u)N2in5WPd%>^WQj=*$k%D)gvmkD+SW93ANn+_N|Ln+
zcSW9A<B_`bA7UYxcppMPkG-XQBJXbt$B@ZE603WAbkmke9lQq?c`uJn3vmQTR702X
zGyQb3gj)sl??+&jl?~o(pMwbA8}w$U?@k-D12EG(cjkHyad?grnPGR+&Y38ol_98A
zq;cAd_7<OKr*?q$kxjE<rjz@s^Qh7(Jq(ZXZnz0~o$5D2EZa<m5pzsID{HtSI|dlB
z4YNUv29&gl?1*h@*0z_^!DYT%zgf#c=+h=kiI$f*T}L^$7MnhP^H<U#pti~hBgf3&
zq4b35+~*yZ&o$l^PE8b@bhHtLB`dzp5Kh1|H@7AvsX~l&W%bVQ!QR<_x+k)^SG;td
zr!(R4y^D`%$wLf#Mq;eF*q}1elIl{b)sH)M3Vb!Kd6w1ew+AA^OpXGR?tt0?Jnx;%
z+IDB;DRc~uuRr10yxW7nTisce3BxXWY?l-#*0uIQ?1vy+zu!;3Ke7sK0D|s(fcWqT
zKY%TDZrg7`b{D2S|Jd1zMItkxF_6$C4?>dCNmJvJ(c;GZGC{Ygv&9n!v$6b==tdOY
zvjpSOY89Pm_ENFqjg<bmC}${s-LAS2UK0f+zo^)0E{MdDx3w|Y{yR?L_XnpqaVOaX
zj-8=2ZH3}oaN(GSnhZfBHHNPi{l4cK?xpQdb=*on3|4qDD^$BqF+VF`AAs`*?!22T
zYDg@$8JImMZ#bOOL|WPYBo;WX4uwT=i0uDrHZE&jxMLK^3X)xww?hTw7?w*7n}2EA
zE-mW;018gPMl<<ESmV1I{&y3hQi?l9X^HX{p=EbC!8n?es?&?P+(ScvG{c$)fN)SN
zJr<`@KzzWR&ILpFEz1D5V861l%k=RZ;ZSCJQzc@H@TH?3kV=dXAyNrAQd!C&ym!%Y
z1}bh?-A!5tSLxI5&8ti7Wbi{m*_sogc;i&QVcRc1+jz(WP+{}04uW<zz2LvaH2apq
z_+n|<mBoB-g8SODTl)(BgT}EjjYgRe9IT~tH>AU|+2{wQ&!cB@*Ke`!Ejm)$&Jtf<
zI0M>1&HMI7?L@3au3--N2m2AGU#9KrOVi0Bom#I>qkLiNZ!z(y)ZZXe>w@HM%PR1Y
zU4bl*`%P<!Xi@Tz!9f!DVyW=)gcKsu0-(?b>*=$t)Ig4)B1|wr@@FGp2M66IfCC!w
zb6%wB9L?N|-n3M(FnPQfy-=8M^}R&(wn1l_`I*6tCi~$j5k$wX@1M+m+!)^S!n{7H
zehC9>dexyYt1w?>PPnGrlN>9NdSoIGtY1-yg%|~-FPu?qSfnL}p|*O7vPlvA7}_Jw
zpDh~jURDbdEie)pR3~OjXI*;bErP^-VTdJFF0i*wi=Z+M7>U*IG(c<8JhPuS?au-I
z>Y&!-?Vacp*4ZW$A=%BL<2A=knPGjp$v;vIO~3Qv9^zU%_#FgMumN0K3oN8XXLmG0
zzU`})Gi4>93g!>dIwDQ=9xRE49m1ese6*)yd#z&ls^QNX{L!ehQ<ymMoWEG&Y1Y0`
zbhcZ@hoMNT8i*1#mCtbE^7~Q8S#A+RnB=q39+O1>gl)g<5#m8V4+qPBp=(a)$xn}u
zaEu~)JsEy;a7!FJlXUZso=XfMT?l>NI5gEA#qz_igtE3!ZGqOWJfD9$>y<AjIGmNd
z<6EJdFr1&C{ND-?aE1c3dRrN^{=gG903tzW<>bic*W)0=6*Q5@o}}5EV6y&f!?}c|
zY^d*y+&Va!dU)o_y|QnI=p7BE1BS1^Cbz#To{2z_wni6#kx)f6q^>~fl6$O`(#c#i
zQ+&@O$CgmEnAmM6fD1||ZNJOHB5KM6z?4l@z>%{aI*kr39&Eazrzjb+7rcyY(V3)+
zomLXw1fR%lzR`$dshFuW0d$hxTDFQjk_(Rnf&+x+-6G8&Vy22!m`9X07jFA*Pq~%J
zNWspug*S-<8Oq5PZq58zfqiU25W_4mbT{i+SJQ;~$#>)_ZK-f#Yu?1Vzhf(EXV%!f
zjL%QJrT19AVURZ==<sXcqrj>R!=FPmEIv)jq}2IKk|H;%A}+KkpRG>G+j}aMa5i(Z
zn6!Gw2Yxc%$G$_L!cgZ4_ck!JRR{Gx<BoF#%txODY`uf@+C62a*ow##7?F)p5t^<2
zdu%Jmj8k86AfVE*#nkxI`&Gzv=uteX;i%0DK~n!#4lv3cG+z`@;8IUMMeRN^5I>OW
z#ESi%R{?YUl*<X7{+R<#b4IHm>_~=)?&l-^-MIjr-<~Xn)63)RxLA({SXFb=zpJ_?
zcV_>hYZ88>a*&Kp8FH&8IguWbuSL9O<E$}O-rHLkEFK9qT{$I8*B@Qc^Wkp><fD2p
zo4M{oLlV1>Nv(*43il{5X2*W$j_V|Lw;g)nEC_hldH`)Q4&w@Qqsn}x*&s1BP=yvM
zdsoPFY~0~H;?tS<n=K3NMz4{<+o9*jigA);9NDVl-D#GBIjw%*!AIz4>)m0bPL&%D
zWr`^n@Q=kKoDaSbxB=NMO#3gL5(G@>tT4}g`-L=G7Qu&ef#ooyy~|y`A9JpdMqx}+
z?%5liJqYV5XXD1ECsAi(vjQ6z<2Ug{APO)mZr}8jt@i-VGpn&80CQLA?7i=zLq(AP
z!Lm=XPEQzim!py}1FoO$N_5@!IM-~%k%qfMI^>l#C$ldjQ;4pPcEzYy-ws!Bn4@;Z
zrg=e_O7=;$@{yNFgQj}%moG}h1n+s!qd)>}fWeAkv6@|!b7gZMc@rpLH7W<2+l;JS
zeSlpm)%rdTAwOO;IW2)7K1!aMtMZy9rG8_Dk7enQ4?X#z{y*5obvmZ~RQ_vxK;xIs
z6o4eCLA##PJx3a(C6vw`4_fL3ct1Z`dfj@pypX@z!1UcFd-)v+@e|jz(gW7+(jNxW
z^pg7`##aOBZZtbmSuE`l8rjVA$gO7|QyN>)T^fRj(E$~P2UvB5ae`{8VNTs(z`=ul
z{-?S(<wcZJDwzJSGu$Gj@x|Sz6wg!Sf`YykwaFon2N91!e8W5M%csXad)*i169hM&
zLc&$FN1n_!36h;1Hu)`S+A>Q9p}<mg`-MF(uYpNZNvCeSJtQI|Bp9LX=jO8^N5i63
zj?|29dGxmlxirBKNFv@bn_7#7IDW@KYb+4?=h!9U$Ta=va}i7hWWKFUFezperm2l-
ziazrPFAUljU_66Fu~v#S08;j76#0@V0KkfH?zm+l;dIp>hP5>92nn18MDx3Me%9iN
zfH!O#|I~m-Ij8y&8PHKZ4*)ul*RKgyjXc#AmaWr6L9}@WB9(s@%a5qLxc_IA0RTt~
zlo!s}{k{C&0_}HAc#F?5uMtUbLWK!Vx7x4KfYP&a1s+nil~C3Pz_0P?GlXpoqc)6x
zC%pcbj~Sq=%4Ya@XwG=N287L}x0LUzU19zU;EiA~v@WdQN2tu4hA8H!4=$o~GPXXL
z$<8bV8M~_NB_#zLvUR-*qPbrD;dSesyUJ&9<AyPl*hx)Z2-{A32Hn{Fop?Xuy!fXx
zmX1S7>4$$sjr9%6jpbLQR;jqg@3w}7GB-}!SMwso1Pg?A^Y4vu+41Q?Xv`)NGKqz@
z)+x?VJUfw$#b4Rye-2E&{(E}!uV>%MC_?OL`hXDV6r@-(m-?OB7$oSdB;GgpyewU3
z)mVeJI><yMz}LN!T6f5Z&gE$Gv6510%iUrYfzYA~Tcnv?wLkW&PDpX}c2h3iUCI85
zr;GOAz<?@|%xP6C&?k(^<!4QM0yHDH=H!N6QgeOk1p^kIqaZZuC35b2YmBa=V4{e4
z6<X|zuao4Rkts6NosLFyUk%Y!KrJot;6Z@Kzy9kNlvCX%BR#2bVLDKw+w~2o5HUe*
zHfQVP1F`QY76U7^cmQ@DnY%{6YpJ*~5lPm0K2NkPc2lkn*x~vgwEgdAw^D&4R`T}R
ziZ$@w*U;g5f!^XYcF8wC!APzgQvXZ`32!iewO7$Gn>*aMZIPWi{RAqzl?n@tR{3V6
z<Q4KAIWNdoXbUw)XBEA00=mL!bw)w8|GL1}@;e8+enKu;jAR`W)3rpZ(=8g0dpYCC
zU4vsQ0@ttaEwH~G)fs=$`$|@lKTSw9)%5b?s5r@;Tyd^>W!j}1e~=ZIIx;%9U;%=T
zdvjS{ecZ#wo4+vb1y7ZsA!gtYNNQ7IvUN@~L;MCYa6XHO9XVehTKa5V+X}4ESz(2l
zvTqD4l=3Tu?xB58#dSlwwaS)IC04AMylRn~eKK>JVWBAlUmY^-bG!zHY{v665ciAz
zS8O<40a^fA&CB<J0fky8c?a$RYTUWdOO|kF#<@Gh@u^&Skc1I?S?Mh6ycX?_aEt#j
zyOh|@?e(8?3)37`p?zj}6gcw>Ki6e@mcKiTc;ilsGht(6w{lff`lxe*3a4QXkMwrc
z^ew7#rWNP8Iu+rgkOeJD5cgQ*E7z5-1)-QE$6nN(#SEfzB9)%+6joqGr1T*6FW9N`
z^}Ml#Ec=lazB-544T<-1KvXFgz~*fe%d+lDyu-kx2rta|iAh~H!pyU`?-Q2y2&|Lc
zCO(LI*@)ga))-1U7J5DCA)cqB$~CcL7lF7mRBX;~N!8CCs(m()?cd<~&Eh}PB|@I!
z!da>Mhdcl4LjBv9v#J!_VP~trx%JUE2vUq`!nSTlL-K=|I_<V*K13sEQdy-V2?`kT
zYBwc^Lv3FD_|J6QW&$qxMU9F7{O-T~epPGo%K?9iZ-$I>8(sglO}{vo?Rr|#Or_;~
zjjnH?3#k<;h?Yg9ar_v-mU{q$sj?^13I7Gt{VOl^lYz*WQTs^$@BQB2Df^#4`QP~7
z^|3nOH9B<3%Ks0q`RDIk_~*I-@$yd`{1XR%?koSq!9Q{EPaOQK(Ed|T|G6Lh$%TL7
f;Qu!cI*H_SwQ$NU&zFx$fR85{x(_PvTfhE4;q<eU

literal 0
HcmV?d00001

diff --git a/tech_reports/GEMM_FLOPS/images/effects_of_shapes.png b/tech_reports/GEMM_FLOPS/images/effects_of_shapes.png
new file mode 100644
index 0000000000000000000000000000000000000000..73acc726c16c37300dd538460cd5d7bff7724317
GIT binary patch
literal 175342
zcmeFZ2{_d2|35sGqo}lqQcR^nLMp^yv`AVIWeKB5$d+|5jFuy%kO*ZNMI>9uIvB|=
zrpPiFWE%`KjAe}3{`b^5r_S$p&i{F?=Xw6m^M9`E>$>pqnfr5p?&ZC{-mlw3BLltl
zLJ~p{2xPtfnN!9Ph(I+2vf6FUZ{W^CdrTt)vQEe8<Vhp_lP7l>dAi?la<PX%&OCf(
zCTMQbB$jOX^ofom-|DpA2Uc&A<4gPf^9kx2)f1a{#cW%3ye{J4fl{$cr>gJghRK@q
z<xBgD)eVRX3pJOUzms=2*69u!L(K<yk#O8Z2LAaGuNFv(?_KH5_Nh~l(KYk6%E!?Q
zbNWr*#v;2`tuwl_;oaCRFWpaX-W-Co7%}NI%5OSpGnY}BU2rDX`ODCpR~cm2#f=#>
z&ocFmPzWS%Jgn9qvhj5VVL>xW^rO)G+Cw@I#Lsz@?=(B-@pdOQicnkkZkO9C9sQ7x
zn=)3V1Z<}*1iEQ_HJS<Sx!Aa64WCi!$?2r%BhLgqglfA!pI)7R2@|z)&tPTns7a`b
zdg8*hgP2FzvJ7EQT|(k>u7d^#+0+!fj`8mI%ll65+$o)O<WpZxcIHdtY<>^SA(cIw
zZ7)24ukkyKdT{aV{k_K!ZjlnVyetkm+<L}0)i|;odcp<bk#Y#O=lq9^^tJqijUOfN
zwrw2EAKBFv;-?H(+huyKP+wE!^fzJG^IHnzg*ZF)4$jLjF6unpioZ00Ya1%ux)Oi$
z_-U&>wW1GN=LObo+L(1>`bomp9dF-lH+%9SY+J#mbGB)72`6`c6#tFUw3yF2>T&gP
zP1Rl*q+?Z2`Z_+?i97T$`&xmP`Flkk>IKW+tlqT*?mmJ(6N-UpyErOpRt4V<q?n7?
zY=dTfX6$O7e-ss2C--VvHbl#<8NcBvEqx=5G~RHaet50_t&`1*b4M>CE+uLjRP2i0
zvUy*mpbcLQM0k0X#*^w7Lg{zU@0yr!p1OymCF@l4**@8@T#fj!F8oZ);+k(NVxqU<
zyNB=ffAc^WAha7pS>4x6(M5tz1p|SE2M1?Ghj(riD)?4;O7-lqX$3ybJCBE;1M2TQ
zuT>D<on>;h;I(Ln_$O;k4n5r+>0Pw@oGs)FX50C7dwYjgO4TS2LqB9GWg8q@g@nFn
zK%#0)PRM-Q^iAXCqg$x36eE^N%(oQbDZvAtQ7d+ex6ZwpyvzKCFp@klqEV{-t|93q
zbU$U47FH+r^#jWnTUQKwgA+c8WW9VR4SRZ#wx&RdutJJ@Tl`k><AHW7ZamaKJM$ys
z#(Fwy8nbwE$^YGYHes{n<C%swTbca%o=w?nKj)8_AL)vad^Y@zxOl1<d%vQtak%NO
z#@*oM9ak)z_IpW^ccLV5jZE6|ku<7f^@{^9<iye(M!!6teN>%M7-AKA(OX7O#sQKx
zD9#|0FT9_|-kEqGOcLYp-R_?I@ZrP9!LlbKkZGTLk=&QmM3q%&jTpXT$OrP9NG<AC
z0lTY9ewPSrU%0*L(5kLm+6L8I{rGJ*LQd%HeXvUZ>fy)Qt0-Q|@f&X36G+(>b9d|2
z^>0@1_#pOf)%0&noh`@s4elNp+EcnN_a62Z-TR(+3bbMsX4C53yD$BIe`0t20|DF6
zW4qVY?pUi+^JLrJwfe`~H|^eY{D!n}lyTa+%u}k;MU9Bjjh>s;Bfa;OM1`JFqDe4*
zw|yh@Hbml8?1W>?ox@*@j&4Z$Jw3JKGynTF*7r?QR3;SPZ_;{*IF*-bJt6E3Srvac
z{oeDdOWRi2U5he59DDy^ewq1N(}>Qii{`>sq%3psuF#;`hsV}uj4a&}6daj5CXTs}
zuZtUgw!EQYll#ZKVUamBYsa0lhj#UB(p=+G<99;in#Rj`BZHpP$IcvEk9qA@1KShz
z=H&Z5<sTfBAI9q^>RO$(GIZIMyvuhpbc^7by-%f@B1Uz?_IjN0PqS^;Xjg3aXy0nA
zHhJ%9!PESw(Z(sp1y7AKjWVsAGTN12%b&%^Xh(N8iCq`*-1}TFSs!6AaMo?N%^qYl
z+~laNsi@M~o%^B-ZW|xZ%t%b{Pp!CUYN~qK*DS(ROcrmJYc!A=Xux<$)7yGstL4_%
z=2KfAHXnXAb3o>jTUNzO<ILDgHa8EeM3z6ka!yM(NQa$r*2KriK8tpVlxdo&`WA69
zDD$*wp^@F&3#Rf$@@9Q`zWO<aN!f0hZN{WG_{(|5@HDhZkj}ky<<ru}(t388^#<Yd
z5?U|zUO3kZxsZG#YFw<<<VB>@w${h>uRNTDoh+R)oQPLWDab1no2Hv)cDdzj$U(ba
zaCMkjJ+r;{a<5(y*87m;Imu#!;x^BuAPqHd?eIf8KkWPf&A)z%{XkxNfE@94v2%<Z
zJE1y(=w_$sbqjxPX}8SmPD9#2yGuGFyD+)9<J|Y?!818g$<k%g=T1|0_%}dQ-l;rL
zj<<hTmqNT+{kic_qtW%!w%M3#n5c-Tyo>2yZl>CuuB<<5=Wcgr?&ioyJ$|y+d%so!
z;<$S%Jz-n?>Vz}rRTI49167L-hpQ<Zbv;z+ep=Zs{`K=5Rc95lnwqMJ8pajjfNr?!
z^5I?MJ9B4`uHnkp&+K1~?P=NHA|-DvUwrw_<)lt6=eI?Xi$}>4WM|b=JMLeIZ4F3h
zDSngl$o0PKvzdLcuWg+MS4#$Ry+p6YFlE2Jd2vx~=JNFOCB~A^l8B`D&YGP<k}q~5
zB2=MCngL@Ig{e68@?$q(Zax*T(_ZHr<7<=^><}=5hDXxd#N6=L5$kut++F5-%-Ve0
z0!i!JN~|0ezbQVlx?){bUS1YvO}0i6KM{@l-xA#pzu$K8@$1K4l+{Q$-Jhb@4-Y5}
z81<i?UvB9bIMU*k;ebDu<-zi>Y|vUdyg2OVx->;gM`ifA(<n6Mxo^u~mo;fB^J5IR
z`Jnpu*KQm}D%-56uRLULSgCaIe>eWlVA+myoU@N3$@hY9j_(<t2>*pu9jnOv%zIXM
zZwMr<*}ppUx4d;#O{&+u2u0JJjU`Xice}nk@G}49vzL)IE+1>JDSUJEJG1<D7PU!i
zQ_X|)GaH*CthV~Yn-AQ)^7Wget@kDG%L^Ux%F2ayZT0Q88MTzhLE<Fw5>1L*g-^xa
zDwX33*KAL?)-X?TYYS>~=TaN++k=($;rdU$CZ0cQaAqJ@^U&_AH80i@w^;8NGptZ<
zy4n6%pCm&zY)cOG4(k<di($+C9#b2WBxiFhDp8_M=K!ooqX^S7oI6%E#GskZ2QUz<
z!<@pSdG8C}Cwu2DT%N4luDyM1dk}wYit+4$Bgee##+liT*)taR2oDMC)z7NSsfYgl
zqUPYWzDuc@N78-6rcc$q%5{?J=sHgp4-G3B+fx?pnK1u;C^8URdxiFB^jZ}XeVG$F
zxweCj{~W(N@r!nc@A%W_cp-jabV3Mzxb0?=>tWP~=}@nDFIBwmczT;_l7niVBZAIK
zVZg+U#I7qWSANcampwaEQyZe7dtZ0{kra3H!a3{woB5^7<9qPBJH$sG2|R+uKDI7!
zYbx1{XbLfIXs5L+ohv)1#L^h=4zm%Ad#yk=r<4bN6dD%#7Pdn^$dn@6AuBJ7syGmM
zdF1BG)YDG)?(-I2RsCgwBUhTQH1+0&y$c)n87G@&GmW2L^sj8rd;79CB|v|XxmdAi
zrn>C*X3()T-+LtE!rcocs)DX=+4<RWgD3i*;>ISse$R0Twqu!MF9cgo7U%Va^kIE#
zrya9=vT}${@4WmaugGMQi7Fo5V?+)%fjk4WapGtUg%7nCl}x=rK-t+$RCTc*vlj&q
zZny+ZlRW5jO2R>PNzMMKkBW>6Qc6huYws|@bN1cGZ!>!5BQ8mHYL~ONjhTPR|1cuZ
zAZYNwU?J%|oKfm?mP|AmF|>Am<VHRw8L-m-y!uE++hM#Eoa94k^pBhmRXn74)Y`kU
zr-#yxo<+tDiujiL9z}R!zXlT6p65uVZFXjXxrr51!Hb_%mWm#AOXng79^gF4<FyIa
zC4MCVZVrQJ9~o{N2|G{SM0wV4hO^;R4;^oXss<nRF7uTPn&7@;Uz|?GMwUg|Xf_4W
zUSlWZ7M&PJMjVQ?47BtDR-u&OnAr+u$V45fY8bA<$woV|Y1KAW!A0~)DiysCt+E_2
zxua68Z%_m0D)~UdZ&r~+B+>Ul7qq9B8<+i-OolTYFd4zSgEE(I&KHeYPL!bTso%S|
zCn8>&xwZPLZbv)BuTm0nb2H?qpLXi`Cm8N^fhdfH{Ti0cLAZ2tm^@^0maA1R!Xfsr
zzKS>XW8Gc3xrOh+EB=k74WIMvLdl;kF8Jogi0#o7bP40Uco8kcxe$d>4}QQTiJiHe
z^Z`GEgD@ilZx}j#z6DZ>Fne?T8y7A>4uI=5kkxz=5CL$-2fnrWcKmgHnokz8>id0u
z2qe@AvigrU2H-dE>k0VgJ@e!Ds;BoMg5a;s;M*^S|IgL})hVm~yk6}FK7$-LIjOG?
zew)~N+S|K%Il6lbw0@ZZH`d-dW8npXi0$Kj^XVJ!odx~(I9)RLHotIQ!_M7R@!EBF
zTYE)6SKb^T5KTV~aOrCAeQlSYtBaeLhM(4+?=3XIHSf04o?YLYcspzDF~4B6>!iD<
z{Vr7?PKWkr3+>vqOVjiE4GrT{r~h~y{H3+W(cAl$hLREziBv=$QFQloP*PS`S64c8
zSn2R#1<*pl%iqoWnxBH3m-LTL{_N+Jy_cP*(=Bf&ceh=<ey`cO`*>^Z*~5F$Utd4Q
zY47Ls^G$AEe@qKZP>J`6lCt6<rN8<Hk81MnY8W~B*}GVra&iT02Hv5qtbF9C=JyBw
z+ebfN`OBl`zdU+W?eMRU{_@d(ee|-Iz2`}HSMXAA?VlU=$HTvV_{W2qO1!!MvKBuU
z`ukn5(%M3rN`LK|wvg&7m3gp{JDpA$UIM>?nDM^&)xZzgAHTshpZI+>L)2vmL<gdO
z>i8u;zG<Rh|IW*0%X85>j{I>qbpjry+<xGlw{4^F1wOgso7U*AR>@a;k&?1Y_5h#m
zko)Z0Gh0^gs}T?o6_67$(`ZlFvn%N{ZO|dsVd?pNvO3Jcx2ftmdxmW_9z)V}Ai=gQ
zXg`O+ZkUosmUrET@UQvDAD<81Q5LP<;j61Ixq71xgzq1I+&O=n|Ck#8zjSb0M_NGk
zwi>O#)A*m?0A4LxT_*T1o%5}hkkjF-h>Cj_ec~VA4n~v$Y18`m37p{*8Vaw-P89vO
z0YU_BQT}OO-|yWyEo207AGIG#`_}==-u4pt$F<{)>8SB-em`j?d6|%ZTVWl(MAW~{
zOeZA@EY1#p3w61FSoj}<78>eb^{*=%E}8-vXl!ZL-~De3WCS5e{qw$p?j6%00<)(P
zrUpX)wm@L(>Hjvf|7`t#w*G$;t#1|Yv0eGJNk%<g<uJMPVqffT9QUSq-ATsOph0qp
z?S`8FqTM%2fZ!7p+R%S!)@SiMMtuGI6_qUd-cV+2lWh%6O(0aaGpqk*R1V1kTXXNJ
zgUuhxI-#GE4bd(*MI@xVv_a|2q;=AkqSbmY4&D8m$~+bWkEM>Coz?nd6ub1R#i~t&
zy7Uv`hYGZ0Z|~Uq;%^#WExRQJqTQ^x#zycDy?nd2hrcxUo$k%PE{0o!hO_K#q{$4G
zC5i0+MyX=D(LbLQDQl2IeNrfB+~YR+I?lI630GkmwAjhS5_Tre5F9W*GRc0nzQ3JQ
zD2icz4%I#?h`B(|L-)Cslnt`R)!e$WE!>zhB}n<4_r`SHQ$Ow9x)88ZFH1iq?)|Y8
zty@0x`@M0Ll$aNAdF(1a((G_FcTixKNx|YPsX^2hbgP%CGm7yWB4hC+rtr$h%yzbv
z#iguewHu$ZMrh6{Iq9OS;d2iSOAXPqXp0GTQsT%4&Dm;GtmLlwg8j@nBljQzC&9g{
z3`Qv(sR*2@OuvvYe!U8VX@Caz2T@UjUJi*K_pyxegmWf&1<h~W+S8Kyzh+`QM;aoP
z!aLUH9~-*8>HvmY!!3G;8u?wV{PO<n;Vv8Zpps5gOQ*LNk*pf=s-Xz=@%U+!WWdBr
zN|p_K>J2l3%{mch?fbmXyW>J(jHO$C2=;3IV|2QV_+JuGz4M?BU*Lu+zd)ZK`+wh}
zTIu#Vf|~sBZ$@e62X#lfhdZeJ%Sp(ANmC2^XI1FVN_uZ!QebnFg@K%r;q$|<Ggx53
zlcI<71JjFyzL}&Ow9Y35Ol7*!iAX%NQZ*M#(I^>-fR$q@<yoEw{P(av9l?8Z*@Ik3
zYiV6p<~UaG-m2&X*YfAEZ-O$9%VqF+l}lGjzTMwZ9%q9lgxlP#z>X}z`tJ%WZaa4G
z#`-3koz~u8bP4)NT(S+;wRoW1LZ*@Mb8o+HPJtjEh3ozye1{zQ6mB<E>1E4GS;}}_
z?rw7um_<CQvb7T)LRX>1stwi0Vy2rGNtj?nu|lBO702BrwZo6F4N}3MwB~A~L)0f9
z?@&`j@oSh4E#a6C_HO-c{JDb^a_pH|xlflx#)$=I=pJ3_dexq4FtA3_@<=2S%3W@-
zVTS8{*wAg|6>b^KUMzGkn0lL4n&4Khjukom!E8kS=Xlo{-R570%n>C1(26PHOLDj4
z?Bj-u1E-ICBMJ@J&$AV8<CcdXQPQFfJo74OXS0L`dy5vYZ0637K)J`1ZZ-FlwCU<4
z!(sBpSH8J3#?^3yO0qTe>F4RreZ{gBeQsELfex2;u3i&ohwJ+&OZUpff{Brcgsy<F
z^7&K#IH4iq_v{ZMXrtxR^4affi{mQrSCVO6c<D&ZVFNhZo6hdqLM$m65O9S<IkTZe
zJbLK}ET)9bzwCxBIoLJ03u6{sCMKf2oPwnfF#D;&To`@!qX^RWib8%Vn0S9rzMb&M
zy@|wIHERj-?$@bnx9oo(R;^Z6MoLC)w6WiXYUY+y1AR`K3Hqcw=*A{2PwP|(ChYmk
z|38-ESsT)o*$aPKo8wnoCXdK(eb=||+pTNI-Y12>cZHjhR;(d(`r1Y0^Y@Qn?8hDR
za{jK(XXSHZ=yvOI6MfDf$_E*vST2>z$wL9Hvplxm|8)a{grUY-c{bIQ&S=inZlT<R
zvQn5tD%v5m5E)8+-(e7<Jo(i>=n<=Q6dB3Rh0X5rj|?D52g@n@(?;8>Ay;VHNXBj>
z#_=<Z^9jCzS=+RC8V1YLF`R`AR;OBvBGzSTD)HKiFvMr*!Cxly^Cl4E<fr@@V0k^!
zA#54C60Zn*<~GkMcbi)!cbnRN;C4x}Ul;zW@DMwQ-zuz>oP$lqQu1cZ{2JBWurtLJ
zFgUMyJRFzoIXM?1j<|)QR+ca)4TE1a#AvOx&Vc0V)BD`v$l-@FH@kzT?U-dy&b#H>
z0B!XI2^xnyEqKY2*(85;G;?uZthue~jCMe=h5zekL~CTp;CAjjhTGr7ZGb@kisJ5k
z+X_s0MR)QMz3;)_)d(?J`P+TbQ>GQg`Nfi!Bz*XN{$)64PNam^EZ=7$>&i5AF~n#S
zXYw1QUe6KtuOt}?5ox`f+1dH^5|sNp+|HmYAFv?ln(MQn{z+k?IWrk?R7H5RQ>z-$
zyQ7PI5$)a~-b))T&Gui&*R!N5+SDi%eKmIbqATuNX|GFp!wD=V+}J@+7-a1~x8K#@
zwyMuz{L^#F9YLvEB3cVi5}b3(3hpwN=SGq$?1GFiK-?7N2U@aj49wnFyTswTeWs#5
z9~Uf>jySf5+{`82u1HN1`Dq(xK^P4kaK=BE{w|1z^mwY7k(i?35?6GR@!LfR=4${y
zYu)^6KP7?LFAGAV{@k*Pnv=<n-G)RelH*UpinuWOE{w$9-0H*q(u}R<7ap~%6s;c9
z_i1_!|6KOza7#Ipic`WXtoPsdh$V1ObP%X!S52aK8iW%S8CW}mrB=DMs&wyE#vHyi
z7(}<H-oXz@4qlPvLiu6T3a>+T(T3WSqt6s*m7_j#o{CG-A-CivsO98bH@p`8y$Pid
z!_LOOSqyuTeQPj4ZN<=%YG#9)DH2vj>kXXLc4|-Sl7DogXuuf0U)~^iXjPJP>RcQm
zAQQIKV@G@}tawYQYT@YytbMFSy=y_!{v0~a&}WBxMSdTPNcH8ebmI!t2DlS?a8H^+
z0J99ZucQh(0&Zp#<ilwTkR16b?uRr%N)SrA;+6ixEFV2v9W3ZKG5Y>VPJ_{Tf_aIG
z2K)j4vZ7W6|LeGJ^zYfMJ)Xq7MWa=m6*AFsaHT6SVJz0N`0_ejR#)V)vlq?6nvDZH
zBSp<+3=-cEH^JAupb=l2<Er`dyp$>zY-0_Re~ZIC_>3na2a~o&QOihDk_WWTeqdx5
z;ywFZvDQxKOV-N0@3UPTiG)+cEUrgwYT+!E;L2K6`#!NraPD?xaf$eqOv9o9f3yoH
zZH4F8Yq$;7aa9ZZkfTw0_hZNHLZt%Uz^31Qq*YmwN-D+_{B0!Qj+NRsnq@Ldf#)ha
z<OUN~EKOm~xCfK>&m3;jy2V{Z__{EeQCx+ms2+4~fz6FcH0mvJr_69KiWq{w$lY2X
zcU8`9+keR2=QVe_X=`UiQ|^dXS3*CW58w~%^>CYt$z;f-iw4c>rhgN!M)|*P-tLeX
zr6rd=DFSERLJr-jgC=|}JJ7Jj;MmJW=_i=RlVeFl+WR6ji<)dZ*Nx+JR?fJ(cYvx$
zPI0phe2*de+-@7;cM^LA`r~Z;39Tx=iu8pJL-H-(ME5F2qUglfGf5w9W0n|-kzLSy
zfD!?tH1;k*X+Z;E<s9;0`6KPI$dHx}LL`g1DY%(E?ErVJjJ?tA!g}tI+=Li>H5mMQ
ztC=^l7%{V%*=TdB-)|(`qOsTAx{p1R7u@o3OzA)s>sel@yQI%0?ez7^QFlvO_V>_Y
zH@9osPd8IaBz^n8uxZnUh4xgx23S9EM1mJSZ2Bw3yV|%~_O=K?faJaDduHeurqjt^
zs-2lbnxXqxMQCK`!OXg(&hS-)%2cD|R_?D$msD%L@LXdbMq}~oBnVdMjZC@lykNF7
z<;h76!>MExrHmkQHGTA5J_w;KmPCsqg2s<vJO&wU$oH_}wX+4bH>-v^?~A&s4F=BF
zz;N)60F_2$6?^93a2$^Hy6;-3B+cG<9h|c~QfTJ$SQ4hXBaR*oy;aR;P?d^a8bB!_
zX-yf9YLlhL<h2V}8--ImUt_>I{DC`N){SrrAa}~F_yaFR_66}(wrno-NSxWLKt_v=
zFDcP0;(l2qdLh*Sq4EmT6fFZ|wD7ZQ5z3w|3MHj#m6I1FrmBdCpLV~Q(94}$>T}N{
zPRn-dBi2hBK0G(aR0?LCTZ$>uWX}XQbJ~Y@3@)GM#L3X^r9^74Oy~?cQ-a-Be#Y*F
zh9E$hF1#W0XM&J=TddkOzqgoKdiiLlj?j>nCLcvSTO8qcO^wT%FL}$QvE0cW2Pkca
z9S%|hV(+>$^8~HPDo|u;cO}Kg{}fz!%J?yx=cY)pP@I)>P7w`#^W?yzN*1k!)G}?-
zd)S<WZDGE~wFlTT%y280lg%W=t-YgAc+^02c#{c<wory1RgqmnO_Wx^t&$sKGn&Rm
zaHn-g$|c$JF~fO*dfY<V0eLKzQX#)17P|HsbS7r_9XBc?cP!IY4$-0~)U$N?Z~?`d
z8emp64g6dMY-J*KzRI=Hge3?Kyrp{7H#eV<b#7EpXHnYF!N#L*>kOmIX2Y46LDWim
z*CcD%n4C1^8O$0M$2B&jdE6v#mBk7o?s$H4SMs)WDbtl=CfIO$U5ggG{4u}%qStGH
zUMD-ZKK~Kee3k{FX}7wZLJV0sMq1TjM9>&QPlgx<2sC@-n5Dc&pMOUoWo9IM(hi8Q
zcXk7{f9ua57(i|s((;8e3~q!+N{%f;Y47vUtV-{Ssf*~1#$imQirEvzwZSS6xN&^$
z=y{2p8mfeJdy5UEF;cNv{#05|HvDMj9nO|++NnZKLN-P|ucBa~bHDNL_yI%ql+EFO
zU#2U8e8$`LSt>V;2~>B|*QJub7EQr%(Ix$!En_D;(l5wYNbSe?73Eu!*Saw7oMSu&
zK{{&rWvES36i)j2*yzJ~6e6f$Nk~5X31O5kJ-<zZe9WI(8d%(8E39T$|2S|7AI$Mu
z5{GEK8`%IWKZ4gV!AeKaR-W0%Sr^Z8XazFWV@i^yRvvYl4fHB{Wio}SxoQ2+I7Hn5
zh~!RW({jxpV!DfeN$=1Y!JgS)pn&M}J!3#x+!pplc}MEZanWj-3okDD)zyJ8s_t$8
z@YLaI13mir=5XL56Qk)mbehH0I|7NLQFtNwgM$aY4n$|b+ECB{?WoUBhVw){fkT`O
zd{!|$($giPH>D?(Kgi|KiOfMRT5^g!H{FHJb^XA6dQMSJ^QA&UQ>1NkbRR8>#274V
z+V9r1FYeO{2cGu1VqC(x6l=rgGxWD*z6_N*@OzU9k>*{3KB0^D7h(LqcFWPUMa4Nx
z(ZD{=w+e2EQs4(kuTRSUTtkJ1aJ61pYN!P5eObk6UAR5HkLUC`WE|(*$Aa#cJ=6rl
z&p*L1#|n_z*4x+`Z2WE-@7xgj8q9BXu6uV}kCp5|8CVYFTrvbV-v%Rsz*v-Xci@dC
z17_bG6X=a&FL@=r4||zp$n=q+#pG3RPQiNz{Ygp0elL8^$0CIEQ>7(|r<TcxuE+|v
z>x-Rc-EOow4437wbT7R8)_m*WjAGSta1m@6OWLwkf;P3$->gWpQbE_3SvLN{x&OBv
zF0)fvb{a=o6tIbX<Ltic1GK3va>?|O`HxUm^$d3|l9YL!>(gzmMnyU|qKe0qE^<Ju
zj<*IO3nj+HzymBiZr=V}Q8FaBQIk(`OUWP_iyGN67~gQ_%b-}g{qUPE5zA&~?O5UG
z6T)SoP;}|leYz;Qffk1-U9qOH1q&Hk<Rhi3W5Jz|e}AUMr%2E0&scfS(PmDM7-&qU
zow<?z6H<9`06@5zX0h`>Al#I1{Q3N)3U+<PhMN=S?OIkqgkF*iffHGc*OUxz$8Fu%
z($)>lo0oEhhc2-*!jwvf_)9>{GUZMC5?pt`BEgcwuLJM6k_UZmix4cFOi#F>OElvO
zrEE<{o?&WD&?xhl&8$@(MIod6kQUAcS{xjg&Mh!=YwY-H4`rMaX6!!85MEEfQb_Oe
zf?6ee57d(N!a8VLE!xKY+Vp;#qJc`b9<uYYnYzERW!R8C=JS@J&GOVuanSjg$O?om
z0%pr?zkYAMs<)X95&b%xb!$AbRpeH)gj~A~lIYX!?QVf`MY6N<&Nlsamh(+~zz%Lz
z-edb{Om|TvaA=kEKp&z&;NwrXupYoU1Mh^JCx4`}hYs?;<S&htxBKm~?exm`1O{bz
zJ43XZO-InLK<F4*CDgtU6$R%Ok*m!jl2fD}suWq9ooVQ-#*nT1qDAjNVk48cJKqpT
zFZ9E>+e&y^YcrY*OV24Xtlw83JU1#AD7Uc%)ocP6l=OZA8-5^7v!~_KK6i{6KZZ|K
z-|LSwScyU{5gg|IaLT^Z@m<j!v~I`@et9kj2{vkA2~NBB49e_jpk0%@-{R~tI)b~$
zMB<o9eJ+KqG;^mj*Q}G<<M6c)w-8^)Azl>Oas6gL2oevs9E=XFr3|W7TzlRXO%}P4
zPZM9aA`~B}{EIny3DA6P(YWW)r@p5}DgOLdAdy?&*m?P!I`Jb-IB*-|u`<b!OB1bT
zS}!bOUPWx(JFjHbUb-@sRcIDEu^^E{H)?TKpEjw}FdLJlmT^8px$&V0zi%OgE!`T2
z+HK4Gg6LY_<?an!770$N8AMDdR!p2gGREQ@@^-A<XXEsGtEHhmH*;2d&L|PSA3nQ0
zX^7;!8d@ReA>I93EQPKq;aCOHP?9F_(ZVCNo16WgS|S|wP|4`|gu%odt?>!F{)>Ye
z2OcTN--d%c*kThCWXAa6QP>0)kDE`Tmg0jbUIf31$y?8#RF0~sSuiiS`>G9i+jT{-
zPono;MjyNk&u3m3;8UdX$KgTHLTXbd?X<4{B}RL|i_yN+jbHjBMtce(2@|~BrP{8Y
zoW#NIW2l{{`1*qdmv;ehW|RL)W(QG9<xwNe?ioP-0>804aGb?FVwa`6Ym<CYUg@w9
zJpx%F9cg{*gFs=%)3Ks$D|0whcKloC6_1#V<gtp;hYLI0N?$%PMvu--q(&+}_8}+w
z_Bw??<0_^LTKZuima8e?AsTL6HXYvpa^SVRT=@q4lN|L39ZsXT`G=MSHVX|McA(bT
zTN>UNL=6TJY|MMQQv=83Z9E2->(%yF%o4y{iNH+8g)3Cd));VJrS#eOHi|f*N2Q@n
z^D&kDx&v$lDkKhhr^Nvzajk7?HfL7-3|t*~i_-df{L~+d601|fUusja#C)|$BJ0)l
z?p6<hS*M1wOq2BS41)8Q?qGqlc??#@^Rdodw6ES%4t-rP4E~5A3<`ihf{1CaW}Yq|
z3&?OY`iwC>3K_zLKh5;=WHlM0hez2xSKbgEQ^ON0u$p7idE$h?`PTV;#+T4s<{*at
zdbh5foD`V^032+z%yf%34^N1u$G2omOAZ~DG3<2DuM=zkq#eBc5n2Gf{w$+vF|ir_
zX#qq~R^-qlxYTonRy2c!9*0Nt%DlzBY+e5|1ojeu#9+)gNkJ*U6c}sv<Gjz&pGNk+
z9&qHdR?cmrKfKlHwT^sNVP9yI75SyaycN81&f(?kuq@Yamh*V93|%5{0~I*S+*If5
zJlS^|xrxC;J+ktu%CY2JEG-AOT&GkHP^$uR$A@wqp(NV97c2g3azhCM<{28DQ9aYZ
zrBrHG=K7Bjdudba<$iC`tHNsQ=O}YLl6<;6f>DctyT+QP^`!i3MIJ7_j#mirWjA8W
z;45DuZ>qs^i+gWUiu>GJdTApG$N_(cYXiP4=Zz?SD@4woIts`KWzX%Vn#Y-CPG}WV
z0?6ggxmES)_&zw7fhPhO^aY*w5VG9A$WVLk=1Fw638M2=h|hLKZ19-XSmL5|aQ>@7
z{f1*dVXt+ez?pw5ZA;wuBZv&=;nakOi&wUl_}i8JW(>i6$GN`DZMcPd5ID79LZqOk
z-55{%IL?ZDKZI&8yqwnCLju@sd!Vs5XPE;<$|MCE?>?Pu%&nEtzPK&A&3TTvFHf^B
z2>1>CBl5r5c(tk+Am%o$oZ<5av5PmYwjgkHt)u^2jj{RRM@lygVQ}_B2caM`m^DHH
z8>^IN?mzZy9(Tz+VDfbmEB-ox1Rw_&j-vZRi@>=$;k?&#-TF~BHhl#P#^<n?O|1#+
zxk$6?*Ek}Py+xiBcQ>sNbYD)%_(%KhvqIAe?ws5%*hlv!yVn<&gklQYG+JgrPUP=0
zQFMs0_ZNeES{T$f7skd$e@K#JuFe~XHf`<9@R|H>V)|JYNJ?^`AKn`_N2DWfQw#Ch
z7Ww7u7&l9!0kR9GcX4d2$%Xmx{$}O?y{f@FaBg47=Mo%yhLUn`LvvUySOPO`87iu3
zq5T|DE#g>>b$fv11NNB#crkj7$$;QY@M+O%L)WcTBsIrR2^Bbb&c+iU6eYA7)n3Ps
zrujiHcmOL+*@;EA;d+AlgUz>w?PJyiTHEwjyx^6*z%S6!{&$ye<JwW-%^P5p3AFzb
z!T$>I+R5iJ!5qeXzNK4Po&FrE{_%E3{L#^srs$dZRyCVzRxq^zzf?;wzM=$ZLwG`i
z)@(J~;tm|TAmIsZLDeowqaI=^pn>egZugwvYW=wtFo1F_d#O+5AUsx!4_(bCSPF82
z1dGH#cYmDUw2~q1_g|DzLIy-U9&s^SendQ1kF9<W8OS+9bRD><pB-q7$T*xKUfq(M
z;(E@TOL}j54cYuvj<`yC6HRnBJg3{QCsMOjLHB%6K~I}PRqe3bvu9==wDKpQf^!g+
z6e=3c#y2c6<Cz0UoY|G-z#O3=<X})qA3y<!A4T9+%V%k#yo6y0B;FQu*8Gt|Xm!kR
zC7K7$OgDM>x9UwbRKbEcrXT2;T^&P9N^4nO&sdKX%io~LmV;BHYdz(7MvSXS^ewZG
z^PxIdl9_4IVTRpge*exUpKe43NP(%lyP;(V&&Nlw>>D<~rj3@*L5^s>k{h(Z#Ktuh
z@Gmp3P=#<4oyZV(04o{{M7RA}s@kY?^%!6Th>(0p*+0y5{D{DNNUn^r>4f4AYOw)n
zMmfgWK#3Oz&iFU<zO1=<Q2VI(f!ogg0OKa_rz#V)=0>+@d|i}<YxsZnF|2LE$z1`^
zl^E?R5h7)g2gqDVJoZIv@vDh%>HA7^vWm2L5hAF7QcjMouifufKABz<c?H1>g<aiI
zmQ|+^Y6YELmJX=Vd;8WizO>|_7qfVNOzBkPmT4XYckMjvC3EQUkv7?i`={*KsiGeu
z9knWh&p6{P#+6pHjxmJ!{k7VF?~2<xV~-e+R?KV22Z5%5zk8Y3!b$k;n<Gv?@Tc#T
z082m#D9lrG2>lLW>(~htCu$ci=+79vyt2D_Tw|mYEoQ<?U@=EC7dOlv>CO1MvPVRF
zHYJ*_Va8YvY5rma8N7OLJq>vywCy;W1(;_WT9WspX-zUGcN5}8t4atr5~c+Yoy;4Z
z6<_MB!3P6RSq5bd2@*^?t~_3}8!Zo7J_+?@&fqYXKAo4@lWA3&twBK49h^_VtK@uX
zrz$I;{o1+;!dKj-oMzx3TtPZRh^EupJo77t?h4bWyYiyT04&W*^BI-j*1I<fcNk~K
z$_$wxU^E~&+NdIYrx~{n6sFi(_#zPbmh0!x2U^sd<So;5@-Xtb%c&@4AJ-B~m;>3K
z2Ng}n&gb*ui&^QHjMcwbr~e)fa<&ZRANguqRNLeG#zrNwe*G&*E=r`&gNIeqT9lCV
zey_-AsBTO*=FH2>VT}O9NsKnLj;YKDcw!0?`n%EHuNs-zR2-Kx-MQDe_8GOZ_hwdE
zzFou87`ch;vIO$8gWUx!3S>$pX<(X7MAJY$OB*%z@K`f8e+-sg)OCg8lZQ%zPK_!-
zM}x;g552C|cIz+Qq?}iGJFo#%=H&a#(0eZptXG7VacI{o(Y2R^s7r+aFE5+y2u@tD
zpIre>{+}Y_zY7f&m%8?OzCy!iMqA8GAcoO<SBkbg!R;~!@X0(fvSmvrsDDj5mt2c{
zK9=dhjF_cYkhBOk*K6C+AfE8eNb<cRR#43nF3rIBYbZsuq-?7I;U02@D|f5Q!oi-p
zEpqJohQGq56aqk$6M}-&VLxC23Hgl&Kq*4uzZSb|r?)&Yt80YijTM4&t^<7YbDz`S
z;As$Tbmko8SzI4{^~xkZuve0qWOoji$uc5Y4$xv*owC$G5{wAo2-62~!=<84V}&l{
zP}o=#ifNY;a#iDJ8gz0usGrIz9+7$YS8eCeI)0iPuf|oX2Mzifp5&a3ovcqNFiU*g
z<rOFMwdjEsUUUAKzY?>GNr4TMo1M@5_Ior;S9lvELIKL2;Mm<H64Z<xd~KwN;YFtu
zJ1Em!+whl=@xRz2UiL3>U{}Z>n?4AK!Q><`{^QS6m(Gvrnada^TP>sIb4#Bxs|5$B
zu@*PBBLRk0?wnWFW$o44NqKF0#}W=IggG!EghdIGh9D2F@Z&^W82F}$o2J!}FA9C|
z{v}{L4Fa~$RyPfP1Z=$RxVmlisQv`V+Pkemsgp2?02O*SP{!C%l%eg~dsNM5>XMps
zSN7RK4}dg#%3uOLm!w~n4*|RAwZ+uBQ)_nUo<Z{7I4>LUoX-pCpK#em;N_vikMNnl
zm?0A#gveo!;o9j@fS^*duWG))UqX0z)~uB&oo|B=wgz$-LuSvQfS27UO-$2|KK%*f
z*(~RV30i8ZAJdu$-Rar*VB2rM%*O=G=N1Cl_GkHZqYlXQX?sfp>mwv#4HZJ8pmgO0
zs!`FXp*w>8Ae1vyL$j8rlg4R?`4F)yZx+HTX=j<CN5|wm(F+AJ57+(>N3bliJclu6
zPfGjYFn&Tbe_Y)e=S49bmGO1|*5D-|LTSguB}Je97p~O{CE&3c-SJ0$B%c4h)qTL^
zS~f!X8vLBz-y1nrfe3Tl#;)xLT=bWE{ncvQSx|XB=}%C1`g3A`ee`1<DGZ(>28XMk
z{Fes*S%^Qx?LQ0gpN06lg8fG#{v#3pk%<518NWvr|8bu`*5yAo;{T5}V&o3y8?Q1(
z2kD()RYP`HPkhaCwnbrosmDnF=^d_@wu$&i`3><QK?aKjl$tL@fW*2bca^JjS#{)q
ze9dj9sM{C78;*|K?S5t(zg`{M%j49Ho)DQj3Jqp?lz@zNpDS**Mi>iNv9G*az3^J3
z4cg0B>43{tlJ<_mV^B^+XiUkLa5+q@vR#<&QhV+BpG3r8uXIuYD6KZ-8BrBo2CDKz
zP!@@<GS7x^hVF@!^GGs8fEzsXbgN>Fy8)PrU^4bhY~$~LWk7%N_IQ1U^Qdm|=hQU}
zulK%pa%+x}Bg6rukI_$SQ5fXBqFwSO`^ePc<GVU~+bYp4GI1=<8j%=KR5DZCR}Siw
zN|+rm-)tVH)(;qC6UjkK7Rb7-W~Bfn+lPs(2Rw`M4eFy$YPS8$VUIsT+>txU-$24e
z<`w#;VS(36B1^f9Chk{`wnLInY@bs`5^ZrFuZCeVoHEM5s}-Tq^=egf>j9HSaNKUb
z?MNh+bALzw&8(c*$S?nBvLJs(^imVsn<Fg{0Hz=ykFCU~5=n46qj%7O$FxB>yjsiS
zRF;65IhWkxQ3xg?0PP}h8laD!Hh@$$ul*BC_?vV1Ya&VCK-GR&4B@w%4!E>82EPGl
z)|X~L(ZDz)``0T}Ef2c^WPw0at90@~j)bc{<5Z9FxSXcuXaRcvJl=lJJ&4>(0c01;
zq#TQT|GaHpZ$M_CzEIP;)`khVKDV~)Ki`OZEBZRl-1cDwro9vh8d1#BB}z#hfcnCD
z>P<4|M|jjp7+|N+qJr6EoA1?c<}U!_ABy}vf%r3j7Yy*lvBs2$0FHYS6v|_o5resG
z2HhVJz6&x+NH3go0}pl`11uV%44l!VnbzDfswe3a!=%OG67v|K^e{d8>|o%pUH?o_
z0E7v0um&HgMhhw5o@*4qC~sYgr6ZvpwVMd2GIvMzuCbhwffrk=$3o@0&ArysDWHhG
z)a#trZuVlCx`1B(7D@!{!BmpZ2LbU?&{QLmQKy9dw*SJ57`oH18uN1zbqaZBhnOoH
z7N$oh-Tj5Ew1Fm;BrG-`q|@Y;;?NoE)p|cCEq~lX1n<$>a|7_Ck=%JWH#$6W`;q!e
zK=jS+>lpcyIy`UHfF_yK0Y;%g?f4h5yTMB&mlh=_2ss+pF4H4noTvrK@nlpYvjdYW
zE1J9Sg8^)26p%2o4|eZ)bZ}TIm^GK{Ppz0YC}}>BHR{15%M0h7YLYp9ZKR+@QJ!sD
zc%${EmYh-t0ZuFJx`n!aKI_ZPCU_slG*GH9x#PKp-ckdGX~-F!CE+>Dd%@gU>floF
z!8r_^MN;Cj?{PV=xs%nw+)-RGIoNEN%bDe(QaN&&0i#?F_>0D+<~`gZV_F&X;3VXZ
z8CWha`HKDg02O&`>G(1vkri5m2ILRj5>V99V8kr~8%lL68xsT6k#+P`DErOElCCSx
zWzKn(UEw>_4C2aNONd>d%HeNegKjiNe=g`F0ZngN`f|*FU(4>$ov(-&gTC_n<;HET
zT*RGfHgo5SH%((^ZlJXhYS5B!i@aQ=8}GTHXJJ=7?V(F_1ir(xjWc-%Wxe&_&N+8^
z53Y}!aG8t-H`Ehl6;mJ*%vqlA=HLPr?mJ5M>DHTZtL8gB*oOBj0Kb6><*=pcB3#Z4
zYi?)|$08Zd8J=Pez9vZ41aoK@aKoKs6U!RJC_MTyKZx7!rt!sU2RJBUqzmxXt@~`B
z3gnj44wQVUPl6`KyrKh6^o*EI;M}G%MI4JZP3)&3+}S(PwcjY!t2TVi?c<R%07vIJ
zkH3zlPZ#3Hz3jt5)vi0|U5yZ9;e#M_x@xH?Z!AdSSI_a&Jb!ip*?ntO)hZa4SM<`q
z#%z+ocms}kSTxeFPo|u$<O_<pr6TCrdyH8iKO>-wRbGJu*1rs#Ffg+S{uUzcYp6MQ
zO53cf6cieWfJ;^%cqMGmcs1!??8-+TV0J8bey2w)HJ}^IV~aDVv2B1wXMjjO>^t3)
z?LNq-4TQ5C%&Cg3&Sq1Cku;61?43SeFH#=auwMD9|E!Sxs)9Nnd5Xv4$LnVx8$X1W
z=0Q1&Ecrp;soZLQUuzLe8*QR|H8E@@r9l)!W_w7Wh?UxRJ^BLe32zKM$Mh>(>!(74
ztD`^QZ|Y7T-;NAJB;W=*aFXzFZeR&;hS%BmHfS_T0-M4}tdP=0L~mv$2GW`+KvEs0
z_E2|~<XJSyIQ}})pMC#%&~O7MjWZV+TnYv<=w?dp?f|4i!gMe;D9c@naRFIvwxRLk
zsE;f_P@THqlWEcYK)@65>`@uG2Oxr54v45|FJ>C5`%hWIFNcjwwEQf(|FnkSYCI6^
zw--lc=QD?Qw6i>hT$Mc24V+3zh0T9Df<@!CmU=$wU$cY*Cg2-Om;AZ{3Xf<>Z$nm@
zWF(4~-~cgxoNaUx5xghwcog2st948YHLWZIPigvDD*4+R@1NuWY*r)!I@Wk0)-0mY
z0g$(S=mV%cOHj5yhid@EpJ`J|R~7nbwBZfqmV?;GSZ)<_ei_iNniR@LpHg_3o9a?|
zx55Ol0psm5ElO3Ro9W3&9LxspF?<RM9L4y*$~SMmt+@JC>L1;MnSB)CE&{9QiqMrO
zuo)5@&qUj`r|DBbjifOY&LrS9ny$fl$5fbQP5Hn-V>=At#QWE0#BBn<<Ww&8QpOY9
z+emxkMo0fmwxxk=`{gdWXm#)~b@>x;P|aaf(eA~9Dp)qJZZV$^DBXQCg>-Go(?RQ}
zkqU*M4(DWG-M7!xF57Tu;w5wS?nt12o>Hiu?yB)B)@!9Q0Orc<i0)22pQO>rqt&+9
z0j9#k>}`sA+W)N8f3mIdOGmWkvkUv%U^AC`kkdW+ZXj;*mOqHQi=|hg`hmrK#x_d-
zEqwct{V1Pi9!>OQ$cT8=tN`+pa)W4n+B<f-T4ht^a?K+ORgFW`0;S&&Mu!2cbq08e
z5anM`@Yc<|qiQv4!<`myY(4$f$ChmCg<gs=$Ep@9sycX`f$i(c4bOShO4vCa8QffH
zOHJx2?@4U^z*Zv-1mlu4^Md6jc_?oXFIE&k{WNCnPM^^zp1=0O;{&S>?uFW13)$Ka
zG}ew+C>pavkMQtgPz;M>^8B%3_(bFmwGv)XFkICYa5nL0tpd1X7QbIJp||FW1#F+q
zO&3D4ucnH_*$QGtcG6QNm0}k|Sn`V_`vxc`T?S2Y5|+gol7_9%H@nm2oHItxg)8)R
zL}ZMge^|ndDKeqGu8*B@YB2Fiuk8zJ?v-O0+s!14R+}t7(1YB03*zytvzyE;c{NNA
z9+7mZiUMe{FQ4coud(rYcDbKf+*8A|6K*d<#14Fl^J(J(K8v`e;l05Ca!!a$<32Q{
ztf6eN+XjCjLA8~~&z3*;;n-h@xX!)REdEll%M0<IZ;4&r32~K2Pe)GMSf?adl=!pC
zxC>HW>=v<kc4o!tXAQ!?*jeO$_A5~#j_=E;pHO~p$fMa4$58|f;1FECwv%sl065Mu
zkhO~}FI)i%V>xEY{m&~0nE>$i$;#vGF9Dprase^(6weLtS>T9T=XnRB2w6t^&&Lnd
zhwmVTw*NQ=!qa!hceiaIhU%RM1d?I`GxMiys6Hy0ZT54^BQ_)-eS5me)G@jZiIWOR
z4xn$0gz#SmrrPY;o}cv`(8=i>P{yZ-$Fi!lwkLI@CS~<4c`<?#%t#B4q^V-I6?$;I
zQ(ddKtlSkIH(28DV-V?^>(+-Kqq`#h7c`IHFTMOS9Kz4$oklji;Q8xe<Z7MO#V+hM
z<J0IclSitaHN{$3$aHdXE2mJe`bFQ;G@;TDpC3$;vt>C%-mWCMVB?5#x9P7MuB_hp
z0C4*wClCE7qyFIsynff2SUdY!y!%ABl%4X+GDL&7jn;iy-S~+)IlC8@kuQt#%Zvh>
z<Hx(~99>-$MKOod%NDm?*^wYzm+{!iP3wMF{r|0|skm7FQl%46zSS%HKtaLg>FDua
zZ1|l!fRxiOX6gc4sPjfdzT75Rs9d)M2zI0-2mB+ixeQ|!)5vnO2}ipy*VdTuV}JfH
zfbSt9_me2sWtNoZ<?zOc^u{z``qN_<LcA>&lrmpJwA+(6t=>2h2@(y?v8MvRCOf~$
zrU=X~o4tFz1AbVQ)X+84u=Jp{lZdww?~_&~kuv)tXEr5dVKF$xVm+5Oyh#ZZ+&4qQ
zgRNa6dlE4jZg6lvmxAV$_ekZd!wrdOaNml^X8Gi60n?OR&hC+#Q5PEWakT(jK?G?6
za*M6huOm2J$)mYHtSe@3dC<zpmkUbhE@tlpeW|{7pYJWkfMGPGFSsIrkk*c~dSa?t
zo%_%cJ&DkS_7Z>BKKv%Y5;a*-&2wd{gBQA-9UL!mg@dt1UYP8wD4rqxA9WhjdJ#f@
zJt2Kt2M0z+KlfDgS7U$Z(47PDyB4P2a?EzQxy!a$wLR3Qn{D<LpIyg9t~RcCTWk7o
zavH&kItP=wY1U-DuYTO%VcurTefQZgemMm10dHUEC$=76KZ37!&92Xez)`Zl?)`Z(
z6v*bfQ#u$(cdN-Gt@7OIZO!5m9gF$u;ri<LX(9DT9b63Tj3ZtKzEp1Ap3o5=V<&6l
zc;2gi{OJC%?@bf`pJ@6yir?={h%yio9uo>J`}AiK6iDcA2ZqF}PulD^jhQ2{(6TmD
z<_9z2B)SP*sII|t#S0COQSvz$+ku&9W}-rd?bO-oIB0X6hFHrap27ZFd15X<5zr1r
z#?DDT8e0H8Od(z_n;QhTQA263BkI2Q;FD(*_`TDU4C4k{b)N`+zqbeGK)ik##Os&k
zhJVdFuYMH*2b#r97Ft$<wu^*21x}mWCqYN(1St2j)+Q{~JGWT_UQhMndT>K1YbzUU
z8xD|J+i*}USN3+p!gG)cA$U9e-Aw$<^tNr~FNPo%g~OemeQ+q^WHe`FKah6LvfoxF
z%pAe9&1MWiLUNjeI(>fB<ia~iXRf*gmwgg!>+XNwR&Y`qrLc~$<xO-}n*=t)wY%xE
zLpZUcm^}Knu%6=7=i}LFVMl*Iqfs`Mosa7EO|tUo>d|t=8sus?13PCeW0s`iYuK0j
z509Uwr$KTz@7$!5G6>uuQ{k-kuVumb8GcIx%%s*@Is5pw!l*g~J+B)TsC+WNfkRv!
zKtFIpcBy<OnG^Onit$LgPKt@#GC|CWf7wGWhIQkRdA9d$^ZJ4NDqDX9h-?B7i2;^5
zu+cv?BUPtbn=jWU<K`FL;eDpsNl*Q;P^Lo(i#YD;w&d<S<W3Fq!}hT|T?cNEI3t`b
zcaC!z3CjKmlw~`w$hxv0*gyA}y}$n3jyuQKeg((=#MZQgowWCFuJ3%)S5e=jEM}={
zrkd-IZFSzs9D!oZn#Y^#7ptaJ^E~>4KYDTJR+m<i0f9yb4m=&*B--ld7}xANw2^lx
zIqasrSJK=~xN!GFsQ+7^aSi7lTr=L*w_=tqFxBJTbqffJ57(lrjXgXVR4F#qPzuid
zJ6tzD4TSEFHaJT{UA0H<ZvyxZ5Rd7&5QVOu;#x*fFgQ+G8=EulwH$<PbI+*lE%Hrr
zJ@<I9gv|0in{DmfO(?GYfA^#9BWrJ690I{}9`68t_lakTe_Itxf$7MrD<NBPn5DY6
zImH)?6;O<zCBsc`(j8J=b_9b=0DY#6`F4t+<&wb6dwZ4n)(B#Vg*`kCxiiI!y(~BV
zBw>JHGW8OW1&Vme6nYH&^8=;W!hq`sAK4sxn_LvIm)Wp7ETRsv=#$^LY~|5lt5jS7
zJ{6|he97;5)O0!WvHf;~V4<No)Lo0!8{hEC>{XKTzt-um#;iHRr)lbUAYs0Njc%T-
z>xyk<jG!^>EaBSe=5dw>&L#+nyJ}%&5!(&^i$dMOIfRQVArVfYkHEwn?3`P^H|gI!
z@F+*&dl!KX!!Ccccm%6O<rmp|aY_dxspg5{-&@Gt7QZmL`?e03XCv@W&;M=ZcJVP+
z`{8hRZPM}?c3!HvalSlvr-bS);<-CjJ*64o2Fnmj@15;Xwc{=kh+f|P%4w(Wdf-<H
zJ1gJw?gtP$a>IdNEmRM1;`!Uff(+{&z^^)ZU2-*n<Q>r6$-gEIoQXBuD*bm?nWO^<
z!k4RD@<MiE2a2<%6_kO!==ZI3?Ta_Zwn-%5b4y%zY;o?ku$w0=7Q5Qn&S*HhXoIG%
z=d-OH!do4>`-;OK7Ll{_8PfrM#mf1<Nh+!sYxh3Y{av|*_2Y$P*S`ANUeIaaf9sSi
zAvC1CZyfM8#DNQ|oX8jZn->qc4eUWkOix^aa<R3QiFT^T%Lkw1>&aj0dy3miBFBzM
zh=;8ScdC$bee6mO1U#Gn-5Pu{ftc-!0Orn}=b^>+L4UD!Yd-k`L&Dq<>7-?;m9+@Z
ztJ8a=7O-yS17{gnAd=Do^**9uVGtIuZY-gL1H6K!_tx#e&!Bn6BXp1Y-;75JKN<+M
z=+u%f$|9=_)%2z1%96YAf^ARB71o^i9R%eiqN{C^4i&gndG~)}-_d*()52#w^tG3s
ze{~bcaGRfb+{tO;xXQ3#^m8T5tiC`Q6S^23O2@2u+yxI7t;TY<@8{=RJp}^eJW~CE
zU){wvU2v$x4|~hJGSE@lwGVy)_-5xoRE3Las)pVUr@o{IzAeDJOAJaWC3L=N`*OP1
zSLPLwMq*Xfmo&88P<iY1KB}1b(6NN2j?0^ynJcgepW5k~=8D|YtMLD2S22y-qe(OC
zKJ(nniYzcLk@0DNb2%S?ajCG(eBD>UZDmRhQUfyw<JgZ3t5(ox+#rJ87Pk^az>wWB
ztR*TGH%Mi^KC7J!q37`jfk-Ki7b)pK{Tm+h+x{UqAL4b-=9Pkc9}vTu0sFthQXFV{
zTGD3c?)t8}ahDBC4e&8UKHb2@+NEuLh?C$&g$oUFlFt8oUa%}M8>3_^>})^7OZx0l
zy>nj^<>q`b1-@sizV;Mo&G_AHi}Ax6{$EPGi%`Z8!%m*1c?)dL59{+2M0<RN4+qKB
z-Kv!yIOpbE-r9nYNYz`)Q~cbT-KWXv4-SZ&$X_9L=gV7~@Y3J>gJzBueJ|YRpPJY$
zZ0pT%$>#l0Ic*4}_3fbu2!ADb2leuk#lM-=Ti_kZw2-%=-o_yFF9`0%J~s)%){OuR
z1DlT}8fMP-2PZKj`TeGdCpG!kTmtzt;&Rxrzlm@W?_89PIgPn=-TQxS#`=GzccCHE
z5LNy)3xM|K2UltOAMCw%T$5+_KMsfwDy_IEib_;!sUn~v4j=@zNb3M6$c%~*Wh){h
z2?4F*Lh1lyB`O7Egvj0r6#<nInPCWF2w{g1LU!`IZfq;~v_4%=pU?OA>R;Z5<X+cx
zopavrbI$wvNq=JMsb?*0J}=n6t&q^&@HiyqsT);W2E>M$(&2?q`*Psui#i=%!*stJ
zOQkL9Czw0hMjd^AvbU#z_jS;(ngNkltX|+RXE;%oeK4-#`2I6#v`y`np^q^CT_igV
zANP~uWkzql2E0rFuq@1o&;4dtyAvQ(%Dtl8HXpkwmgex~o(k9f>F@;3TR%n%yW3Wj
zbTp(fZd6&n^}(rVF^eIoN9dLzmW%j9Z<eiZ*pVEjW}vHXvo)c4lu5yT85o*{k_Rgp
zuzy8}+oxxMXW4mYWDO<Ct^?(vw*ZXv7DA5Ko6Dw^0SC+X8=Qn3@8BqdbQ3sUHoM`E
z09`adC3ZGVAUN0td_?=V58m@KAW}S}^KiDO&8c)Zsr(-TqJI|MI(jWB!LAbk52gco
zu)CkX=jI@cx&Ps-&=cXxcCW}6_F}@C*Ke9E{QkM+b>gB@$;zN5r1j~?PU#n_;NI5V
z4sJbm-V&%+AL&j3==P-aw($*1=m)Xw3SQDfGP#z4zdo~HYm|7i8%~9$X&ON2^#b<q
zK(Es<F;MhSJACpJFiAlAqX>56?R$U+hA3zru%xdet}8nNNA)KmDge2J0?RLrrf}~g
zOp?PC$Te@*TmqZIy$8t+>Br4t0TY+J_t$M!j!atF5^+WVvV6CLD$Kybic!JC0ta!y
z{Q0OsY)VE=C`<ST_XhU}y^ERNoIV9?D@K98*o6H^D5e!?Dc<E^?KoydlqQX@)9ilH
z>bz{v=rbr>IVP+xB~b22@e3F8k1lqtXvN(w$4L)@VZHY2!}=76IVtX*lpp-_!1Y7i
zTMJ(om?Ccq94aKM7I5>f()75}sjwux9_4SoYwxlIw(IG1ns|t#hO9a`oQ~TT)KPq6
z*^OfN!bHj%htm?!lr$aP)^M0pkXDnMC}Zzl`Ux<`5a`^7uy|kQyehEd%LPx(1BYly
z))(&hUJo-*Jl!!`A!*UV<@!ZOi`!IrZG~$(Mla&LqjOvGrFqh?PQ1oRoc7~s9pPC3
zrUa28o40)J^%*wrpq%%-&ZODTGP)01;WN(ZL16X#t+Mdyun?CHceZMUgjX>dmXV)v
zu9#E+a4nZU_6BDt>(2E5bt=JFu`~U#b&D;ksFnK6yW14+9GhRGI!PMOe6UhSgbmW)
z8mk9Qly+QrwikuxgCt!MY#PnI$5<A5ACP~>50xOAAn@yXGn{5B;NHX@CvWQIXq^k7
zBVB~+(jA9HD+1_g?6!637m0SLwnaVM5q%02y|jAgXl?pi|AZ{}m9t#}6i7>4z;YI4
zA6%<3BPzF$6{<f7I9bL_#HKr^xZQWFv<JxWh<x%u%a?8C1R<3IJ3LBG_P?mq)+b3>
z-%MV=q@;e->@i?X3czF=wD@XzhO^{LWiS_nv!4_o^B*KO-|S6QTUObvayuDCRC_7e
z6!h33-4biJVY89t8vtI3&|ja4EiY`CJaxWN-NEW=Jzz{k_!9sZ4T3@el1Ihf8Si?n
z++onIjGw}+g!BGC0#B55P;OIU#tz&YrX{^y=lExq;k_JybT{oQQn9hw$_N#TdAG}2
znp>y7qT+Lnd+Hsgh0)WuFaPly-Sd9|_0H=wIpPnz0Rr*{nV&D7{^maU8bJ0ACaF{9
ztgECsCS)jnCBUDwO8iAPh}W&I-Dd#tx)Jb^R*2V)j{Chp_gLFhjs|ePNnqX`DFb<%
z_scrykj4VEf^V2ko6G8owA_*P=zaSE5Y7E<K=jX|+k&fdzWcB500sxD1<>(NHJNd1
zs~?&Nx~yJ184oEG((Y5w*nYWYO9IP3Yf}!9J>tN>Reoj7E(cYT>2ilQeY#Qj|Fth@
zv6qw_yAIkrCZJq8#pLCT6iB8WFeH+42WzSY)mcn+VbykrsO?5SMSxIV#8(s-KB8?B
zb8oBd=J#bmUO~yuq#PIz7-T#ieLa81gnj~2QK8CL3b3{IP0pkC=O;44#cguMOD8iv
z!--5DMxdWWWP1q!^=(U9_3QQ>oJ2<`ci8rBDE5nNn;(+eFa{3UxC~s|TxpHCZbUM;
zmHx7!xJ+Cj){u+iP+_yjMsZiK1Jmmd?rA7LeTMq%o;x75)uFw@LI>9Rb=#yl#7X}@
zhgyFDORqg&{?^Z6Ru*tGK}HaQQvV`phRfLqj0@Akd0;CO#}26A8XyO`j?(UXBr<`{
z%(bMj;{%YB3_+U)MJ%jfa)kj&cSAKw#3{xf!JOuSmg{o9CA&f}l^XUldOZsfoDjH;
zzAR>$=UWaJFP;LpdD6Z&0HWpLH0pmH3jpS$_h$3ol_8!n^W!Xl;HJwVlhAd@DS(<0
zQQm(Cu!t9Qhqi_yxP%UM|I`4k_wcKxqc4*-`ERIHR%PZSv=zN=SPH*7xo-Io_zy-$
zctl30aQ!X7H<Q5hLo1(#&oHaD;2MdTM?Ym}36t)9N9pb$J4+aMA9e@XS;POIWoKiI
z>Z?Z+w`ZSG*KlpiR4f5WjbQ{@J~iKsif_NkT_N<ruPMh(&S^&1<e*z(f3&wj4}kZ5
zFyCti31B+sTfB@C_MBEj|EA9tM-6%J=Qe_5*MA`oI}N=~%kBzX)&&aJOn}$`30b?f
z->A*7b`PL5<;k*~-N^kviXI(t1$>}ihu>t;b`{+EV-B1r=qtAYR&~NbH3HQAEb6~-
z5`CYm^Sp06$5+%;y$BCkmm?H1Ve>lGN0#lB36encV5qv)jFlV+iJIVM+Z3?W9Y8+w
zmlCS!`2|A$&?x|DMTUO_=+@%nWbA3--XJ(Q0DQ!@)qOJ<_Xb6fhYmmP+J*@F>(Z>h
z1KqZ)nC*$YVFWzbKFEW8{&GeY(5%Hz#?>D<nnYyi#7-q*Z;7?vWoDn^Ojnhq>~#%$
zP%~Nv%g^e0bTRDBu~V+2_jwSyZKkMy*L@6RZug_OKfD2i=!o*eU#~lunCsbYnB5#V
z^lB>iY6ttT<zA<u*Y_FSxhvNN1|MDeQ4V>+@s!E3k7ZGR8$<<XNE}tZ5jvHc{k7!o
z5yqE+iOGw8wW1_YE6`<=2_=;t!2+y*#qDEF{7oPfXDPKWP8XUnl2h3ZA|pUXBm42M
z(HkzNP669=h|=3n`1Mc>NQoVzynbM@3}Z2bOUAsqu{|Tl`s$s+sc@xHGBL*RcuPS&
z|H`OQzAb%{5XYh;pkaOd>%;0QTSR;I9X#j{WL!T4t{*0BFPP8kTn{NF3bX*uq(6SU
zdwN{iqW!dYBC_M&cZaVI7%sOO_Llk=SE%SNK@ZapJbagNZ5tgRy;vGa*?xEiy7dyH
zrxLcW#6YjOZ6jC=#SrMM4sD!aKIh2~FYXk0Cx2%5x&5i$`gM`H!gSolO1fJ|pOZA_
zUy{kRYq^E(NRXhtIEolWEL=K4pa6C}J9n)cuyYSRAe*=6n>90R-hE?Wv7LjSrfxqP
zp6F^D0>Em_3Wsz@FUOv!T#%hjM@Z7q@vdpEw#*a=*NV3pa$)9yZH4$>rxJj&O6n7f
zt+ZvN$xP2!-kn5iR9fa$af)bzQ-vZ9`SA7u)Thuy;SwB2-sni$X)ccfZV5-;`tA9`
z5s-fi`Y!|DVYKQlNM*dYoBz@T+?&*<`BaG8z8E@})*7(awM8Eh{6EaW&PWAEY{JW^
z%m!tIW7fdt<Ft;xoRLAzJXRu^lYl(V5}rk-wmL6&Y|DtCqohlMoc*`H$^o<}n;Cq+
zkd%yXFU`r9CeO<u&6_=zRe0|V$eC(`$yT6zXU1IEI~&Y}0NLbGOYOdOlc#p1n3iYo
zuEY&mrI=>m_CM@DK70(Ra`P5d*$04EP3W(lQstF5iyzo>UK|hann;vUqi8%ZkqHt&
zF?L(}qUrB?&-FmSLSIRo0;GvCKc!n+C<Vq<X;V`6jXqq(@Uk#RKQYM8RxM)wn6pKY
z9nNjd$(i~}*~;$XA2Dr(&xV@vcUfIZ-tK=#y)kmWwfEJ4HvsyWzS0dZXSBb}S_B|F
zSE$J|QH@3bX~z!}suOj$0pC01XbvhQK~Yd&|L+C5jaz4rB_DncINxM2?~ZQjp6M#*
zKJSpV@2#A)evv-pR`{x8(Gielb^8-(*58h9bJ8Bks2w>4O8fkwYR;Q#E;DZJ=$w9_
zbp7>p>^cBc%k=*5CtyF!5?30|_B_@Nw2j4|fyeVWG(IEk;i~dlE|0=`(3IJjX)R@2
zW3qhXDqgD%gHRrZlkAeZ_j~V`7ROZ_B<~o3yuxnFJ0C4?axi)1j?9Y4jw*~;QJm^1
z%`w)wTAW%2!!7LZRU331i3tGIH%;@!uiJM}BUMQbToUotf$+_Ug(twRHOEH?!}N>K
zC|;Bcg_MG7pUvJL=RmQ^^^!AUwyA89kZ^JhFuiy%vHaarXCx*MeGX(cautIcD}%Sz
z<%sJ++0_5>T<b5O)?df~Uys@3*HI&*1-YD1$mLx7W@aY(7BDV_Vb%rJLV6LM^)3TT
z;k`{^GPuH##&-$Ye97cX52=JD6qAw3Z$Yh#bp1#2Pe2sr*bS{mI#3>#?G^tMdfnX$
z2u`=D9PA04=*Xxtn)!SxJv&O-!Ii|xjM86z1Pg#7T>hGzrxrh#GE1ey#frBsS{+L(
zl<YZEd->48xpk}54n`k1y8kO<vrA8&xNX_-#It1ifybYJacYh8S6kN4JI{J%WK*<i
zp-qWF=g65{*IWhCoEtZGxQB&gE&NK}W7We?pW&PvPc`_{o@}a!@=9yu921L=GTXgw
z?dV|LK4xB>oE8;uG}}L?-`i}RJIkdp+-Wc%1$EP&aliXBiiO+RgwfpJ_{YKh+9fsv
zkAh|7o#%SW+V7jUQht?;>>OhmxdnkT@~b@E=kgRi3y-E}_x?chZyM~`P~ZGY<S*w^
z)igN95Vb5z&z4M75ATKR;S<uc-ztxgIH`@M=Jn00)$2xx;5fI6Ay+hU=p;;TLG#`~
z#WAA%0Jzq)OUySbK?dd;9<07%b`zNr&Ud3k=Z7())Ffjb{Pt!SMpAYSKbi_t=_Dp9
zz~xsR+y_o#eYk(x8CJ;zdF@+OAj4JJXGHTUcg}GpZkx0>vP7n|BxiRtqxrU2qEkwf
zzXP+^i<2Eiv>pm6j-WQu{;Dsks&wwXvJ^buJsFiwblr2scdzcpx43;y(2#qc<MtuT
zbFJ~!y!kE|3z-WE<yDB~a<6QAG9$@`UR4C7$tTjx)eli-JzDkoGW1=c_H&F^sh7<5
z-4m=j{bjB4Ts`M2l1!-D8*OLT>Me)dhVa$ddgGPOcgT*^BDWC#OzZDGT68^Z;yO}4
z8?c$IYHND3EyY9-4rk}+4OairuVHmmI`MOM$gg^;BCB1(U2|P&=1pysyC|EcqN!@C
z<m7Fm9x{^Tv<#{5-^&-ZvK_T>$CA&wWn0xIM>$qucwUlWFDHKf4-7hH^R3rKs+u;8
zR|8+`Rr=CN2-`q{6LmhgiV}2{u4{@juVM_Yf=67kZ})YyV7GP{M&D`lXYk3cdo0*S
z#eQ4WQ#dtjY|E|GKCC|*d^i-#5N0g1suoW4fe)Jus9Z<gj419vb>=cFq7R-~>2z9;
zSs#tkO#lN}>^g8+M!so|r!0BTHT{{_9i7`eJI_A-%rHGDwJ%#MU*J3p&xoSsyd6wQ
z=e}^ZZ@Zg9D(_HF@pIdkMpi|K{@E|Z9qo~Ob8ih;CX;JQX7lVnX9dk%F?$~NfrPHM
z-Cm{k$E4b(GZR+U6MC(xpR!SMT1C!TdQpCY@le~rfF(3v+L?wz6J#8*b|dbK*UwV+
z*u+I9!a!l1Y#L1V)Ewgz=5n@y%x@O!{Jh|yS=+fveptD4YY``-La^86t;&cM&O-M<
zr7GV9-51%ozdmD6KrP<SNv}RD^$s!%g}18Sq6K{|67{FP_LZhe=R)O4u$<x0I4GDH
z&Rpb|7mcZO)|Q@**i7zYzaV+*uqX5gxivQ?A`?k`!*Q4PN<(ZlgP6Gn+c)516e_aj
zZZ=1<{f1J%Cin@WsOpra;;MQYrbAz|4u${KQxMB%`)VGWSP*Ee1!kekH;=%)n06mE
zTC&A5xs^-M#|fdrTKqs}b^_G}Q-k4|qShGrSfmWFRS8~_?YP>=r@G-pJ*<5Xdl<ZK
zd~M>tWaqf92IKje|HQ5zt?N4z!MGG?A4oHL9Utd1R%t42C<@a)SKgr?Tz}+&60Qc>
zj#47v5-Bye_Lt}CU%F=@#i~&xo?rl1e{29J<S;{J-;rm*?cs3!BRy&)yjpD1xll{<
z(3*z4ij+iCl=GXO#`-v2y;0WSKy9Qd*OxQVzdr<B<nIs6)4%_$`mNlXub+ZDM=$P_
z!+1RA?VRxtD6`Ij<?iQ5->x8??4#|`&g4|Ir6Kh<==PBoHrKceJgsU<$d%r>RWGTG
zbu1U@$+h`utqgBzy<jAnI<ksHsB}7RJqkwOO+HVCULNW`b55>U@Y*u4t*61h;FjQ!
z0lC_;?+9LuuEr75I;sd~N|5FzYNS|Fc0mF3_SJ)S<0S3A;zs`s*H?qLJiG^ZjrN15
z<llQX@ac*h#?;xqL$gl~oPYB6Nb9yeF5dMaE(uzVdxd?47TZ>`y9lvihlyDp3}>Rd
zehuA?A!5z*gma!R2rR)vH+k<1`zbH^ZXCg<JLfdd_Pv;7kz6hG#(n1AL<q1%J9{ay
zT_hX&suU%7W2NZ)V0H!HMw*$*_G2Y_44H`IyAp<&o~`#+LeFck0K8m<YsY``zwgc*
zy!M1jr)1qk#-5%-3@>_$-_4u`1{{-yHFB?YCQY6-bpBh9jYe7X#_q^@fCnb-1TWU+
z@_PKt_Yk@0HR95Mjs>P;@`QJLa-DBXuUB90f+ul_>qEMzd^Y1z??hu={W4WAiSt}D
zn6h?Bo0*3594I1I>Exsy{9u9H4<yUIK^k+6#_Ld#j(E<isxst?#=<7eNW#M7=WU%h
z*vQQ!Ee-9=)J*%xd{0UEiU_McY2fh9R=$@ko-j8VEN28*&cc@3xih9f%woA7Fk-Bu
z$$Ts!w+L<Jk<`b<)_@3SS>fSorm!fWfb!J#VmL-jrbcM{g>%B41LzF?b4R@)tws^}
zZ1MPpcSr2&mCj8z&}nou-depoVl0kmOwB#Y7TN~tMBW7pMz|0RFCu*V%(bJi<{?Rj
zE?g*D8zMmIHM(5~9?Gq9|423PP@LvPDBX<K@W|Ha>KD#~>1%78PG@WBW^V)Dufbto
z4EWH-$=L&Vzod%r?cl?ls)i2W{f>mTKKL<E3bFu3K6lY>rI}A|AlJf_A^9~ERd%$J
z&Q%W|s;`ghXeFgPXXGWCinEXoji=9<hYHS8%9CGq<JCCz0U^ikmBHbIiDK21FK(j!
z8_b1z76(4#PCOw@x}-*J7#%o-7@-~4vwBk>7clcg=^Rg4{GrDmGl9i&Z?q3*1RoA5
zuf(4(yKrYst8Otdxwd(*-a74p_TjJ43AxAgRBf=<mCE%dH_lKea+3Erq?o(ppDOrz
z;YehMMdKM)&RH$X{VMAo!|U6URYQnI#NHbNYfQ3G{x-+r(i;=AZ8zC{e{FKVM~d^7
z96?sNX2Q)Sz=XRU3n{lCH2v0#_N3R$&O3DK3UJR~0CV2)73%{J5ff+^DEuMfw!ICO
z-%qgVsn+&z4RP8Clf)p5q>o9T+Hi-5ZWHg&O@xBd*}fBpPAh@f@iG)U9(+D)X6*Q-
zEKU}0H;)MK54UG`bf?Eh@D~SZeUdxz3?@+z(n6it$239UYy;a&zf%S$iH1(HxD1>R
zirs&-O545UBp?Kr>FKTGC6K<cEA+HZF~99Cb&@=vD`ERt86aIaEob4~qH~fxatsbT
zRB?<$J-Gy|7#Or-@@i+#oHy!nwX=B_&K<{LpX?107Xnwi4a}D0C};a)NvzZdYmn^x
zfkg4)#-r@yy&<TwCHyGr0mMi|azd2lF=s=~gcmf7JD62R^%{$k5BFGYG9mYQ)td|B
zlau<6g{e|+tQ*%OL<h9$96O*#N^bDA*nfw_e=|`ZkmQ`~cPzd<^VAJYH6wvAx$oGK
z>Xh|*c;zE+fBP-a4*B(PEUC-Cg5J=@&%yZJ-k5&r{mTNM&fk5aNCsKxY?UI^pxaP#
zkwfgwIQ}b3DwTm|x3-Zuev#A)B$7Wu4W)98bo2s><Umg0#cG+LVkm4;`3CdTo1Yql
zpvw!#NHU~+*XwFqhbo;rQcO~ct&m`SZUVO3qbZ`X0W|)5?9lYTm_bfWP>Z45L`(8y
zxEhfwz<}O_2K3Hzg_#4o6wHc_@m2om7B_4}*4goq1@ys|$Fv>sd1a(yHc?4JB3{cs
z_Q>!3V~TFRj7NO1n%sgXYXS7oT6bakGzu)qU)!luGIAJjdJ4!PtCY~p{SKP!p}DD>
zY*I=IY!vEwlc0WErY5JsH>$kpu4NXbDboT5Kj7f9B{3@(9Md-w6vygJuV6vksR^c5
z?xODjDx5x3w@*Eov7U;)RshkuFIVi|XF+!H=ao`=eH=)!iB0+p4<j($bMa~)3DAQ2
zfwS1ax1}%pn*iW6NNF3Wsr<39EUmCByek!1b~a*bk3lNSO7?Z6s;<;^&X-;xR42iB
z)h1kk@+J}WY9M|`NbY`y97YlD4`$XaFb|fkbe{gQ0!yx}T2L%!=<iwhBwermt85~A
zE3N5;&YwqnAH|NGV%tFUnzz99+d~o1&CiWy-qa5u0?M;bJ+lJ-pwTU4OHN{xBW9pK
zjKOxHMr4bQB_(7xI+>(6hi_toI3SZ6*~|DB0mpv1aTnV;)iqW#QN3|P4;G4V1ESyt
z;3`JYRf5&EXI@1XKr&y+9DnbgFso`8^zVXu8P-fgdIa#B9f{khz?pXVk#&IIEHd}N
zS3hvJ4diCT0|R;-pl_b)>cN@U&DS`g(&=CJI31_(-$a1#vE<<pMNjh^A29_%QJ_h+
z;KDlig_~r6?rKqIYiVa(wp0=3W{o8SY}hwYi8!AmJY?Hr7k7yH88#$mPpK%<Sdpaa
z7UF~tc)+fhofjZ~4a_k(;D5*(2Z}zhUJIUruzRejQf)|+2%$akUxfBPiX9gdZ39h@
z0;ecHxeAPf+7i|@GXdG?y?`__Rk~;Qiuap=-S5YSS%4TI9<ALiBbN<kA!+TE`7_N`
zuKWZDT8<2##2Ss7q7$f1QAQ-z4t5tT_WJM}E8-L|<oAe7P|s?P=QK_Dg-|oW>-OD4
z0G!U#D_}|6^w%oRbgqhW{<8K$<A~d7Kj^^3N7xnx&zGTxR+yQeG!pH=c@y9mNvuR)
zck?p(;2bk+G1pH7Vu-weZ$CDGuL0|(vKy!+{Rf2in}Olhdy1awKg<KL>HvuTi?4Sp
z%zTI)5aU%=yy$$<)kx&>{Hi9HM_~X!fDb5pP8g8lOE=p@cStlhn+bJ%!k4jWb6gqO
z7mho*)bn{BQ%8;x+rWsHgMjtOf#T)mhyn=1{S1kMyo`;JvhJZr&;y$XO-FcJk&)(p
zlT1Bnc7eYx?y)QmuIyKIk=!&mSOeaEi2Qsp-v>DTq4dwM0!ROJIdDBbyRR?&IY5G@
zmgkppa4^^^-^N!yx@&%xzLeN_U|U*b&QZ;gXr^`hg{olOMT@=Jb>!HT?1F*qxZGHF
zg=Pt-VKJB-=tbZr&7*uj2<le@Q_&J{owSun6I<n3;RJRgN!9Qw5<l~St_}|^2Y2mL
ztAQ~*#MF!$Mj3%jRN+KijZUMKo;`9AT7$#J;N`AdbxL;HqXWzu4n>!VuPaqUOcN)f
zynoUHh^1mleOj%D3EZ*#Nq!y(GFlT^PgsBFF+UHy64QA_;63F23%q2HPm_IT0y)%P
zW9gg(%5j!o+vP^@l)XLY!l{w;T@P>Lleb(&FsOv!@OsOA=BP7O{FJrZu}AY{JPcx=
z0Ndmb5DYDI*9X|`h@4=aCr-9e_+lTyG2E|7Q&$Cd?kLlmbUI+qiI?Xh8@Z0JigIG|
zsxoT)9s8c~Z2(kUmJbfkx4i|xKj4ce;P%8|X){r#8Z)PZ*_>No^vaGvXs}~^)Uh}|
ziq72uqMg&jd-W(xwqQW=?0k#WI7euNEciFLXM{9+kvRqYjHnvgDepifIp#amyCb~0
ztwm-BfR|pf<kNcrC>k~eLoGhu{Q*634ZvOu({wCi%OIigXzp)^ep=@M9(0RLDh9-7
zf|^Y?z=Lg@kDvjpJOFog_=(lB1_r-l?<as%pSHR@N9UZT%qr?TnQQ;{45eHz)AWFO
zsK3i!0Rn#kO*R1lwCDQA%;Drb&%&=?s_oTN)j_}dA0xky!_5kPmCilaKL*1t%8n`k
zUfhk-8;5fb*;ZJTt{3$KM2Yi6H_2OF+cQZ9IeP+BMuwCC&uMxG4&r1bh@aiLQ0cw(
z1J<DPp}Yu?7VEMmAKLg=qi<G(+xMBv=WKjar-<W)%ICy9(kfOF1J=<Uw$lQf!~;6X
zfhCBUC#eFNow2%Ew<7lSM3cYp9(i__XtbkA9L-;>!J{%9M=-=jR3hgZV5^qTjiFHy
z+9fnZ@F8G@vCxX?*`qRZ-rSz^l}cxQ*-0_y(R#f}X2M?x+WrRIXxbY>L&7~4KsLPj
z4sgu|&z{el>FqxWV##4fk{yYo1dNBcBY;S)6oEae;0OY<+Qoy?#Bel%;jq2A2!s(`
zSk%A+RH5|Hs*kYvB^0@$zKpg0&8a#mex%QclTKK$KFcB74FN=@J^*`C`&(WLGNSxF
zCEBBWkQN}EJ`V;|2O3b5MX;FzdIFjim6!X=tp8+AaAr1<So2n{C``>Cuyf(y#bkf~
zdZwdega=Y&a`!3Q*r;KMD56o#<`7~PN#kj`D{(%qh7h~ZX2_ggmDX^i=d7yDKKojU
zHPsqBu_c698&lT-9amzsOZLH(CxM+OLec)-?*e8%Sm)G(&1$sm&*Eh}N75$?BW?I;
z16@keaC%7bMZEP;422J-dxyXO>yHF$j}<*}>2JNTsMP;vHgFmke4mH}`Ie&}3(@jM
zui9MA3{XzAYrVyMT8^4+jB>6LfsA04o>BBbrEZ0Yf9&}fNE!O~ZRyw?t2m}L(GLiy
z&0rp&E<Bkz9x9iNC+3ay|1k2nylMa9#+=a>TN`!CUzra41%TMdJuf?Y{W~Zcf<iW(
zyS$&nwW;t$K^;Ux{^^g;O`NdE)vIg9JBPF4<1F!15**nyZW$ldWD%`QOtvbo3I=h2
z5h|Rp93!dLD4kQClAht67IXV(<}I!m#Jaq*lf&TDLWn1a-Ol^qDxZU^@YVYpLoA*2
zNF)E2>H8S;d*NKn#vp;;^dtORYSAGXOs>jsBe3*Bz!Grz_6c~<X~>X)2SM48^38I|
z%Kt(>@I4y*IkUJ|PrC%Z?qjCFSu)3Ipe}ymwUz)Ny*-5Z53_(e(iicgrsqO@LiQBf
ze3(8wSpyR8f`zL<z&r_&WcV$t4@k0ICHAuJr8YX~R8kg{4&<Qz>U7{Q0mQ8HvTBD;
zew;5=gYUn%uIG;@0RJ;i1SCQ3VG}^7H-N~$D13?jOmp>S(Q8mLu|GYS!_R|_L>k$+
z2#U%X06|>%BEt)nJp~dOp=_1dxdsj`*7XcMBP#T|mtg_FQ}P*D(jJdYb!IwO?K%Ck
zeMe^5=@<Nqf|)R{wi#H28YX&ehhO1?DvU<#<QU&aFjV#8G$uuT`&ka1!GdB4!NN5k
z89-y5l3e>w1`J;Rt~B6BKw>i}B?*z&1Q2T*i2k`>+<?t|h(3A1etpK>`+}wF6yWxD
zvkk6jL;%pBgG4oGq$qrTrK_92ko02v2<45HQx#JEX)LYoWF+wbqlYqe6b5oR_D;KJ
z0}l<Q^V~%J5T1J<yVl6cVrS>U%|_xYpTM$v+Axkry)6`)h&DJ_ExP_M5$EVzRx95+
zCB*=|{cVf3PcZ1f=@TpNg4&J15_ib;e7bhev}FxGeKcnR47UA(aP8o|M)6pOEQ@XJ
zc}}fRQjiR?g_~fK7OQn2UwARCX)vKj)PAuBMv|c&G5-Vv_1}P-grDu6NuRIy7MKcD
zgmZjr5t57QauN^tWG8OrZlv^4Pt_E4GUrY_jYMz9#ZWkiN_~O~PLN~YfK+uakjzg!
z&;r!~Je>vM*T8GDr$72C&*PvtVwA8;$7PF}w-L8ED)-DjY)2`hmC*wN3ui)kT>0O6
z3=+;Vuytbq+q_W)FP5VBr27N#woX>8{y=i7jNztaK_jjYsjxCa1t<pT`zO~e1(_0@
znzb2H3m*Gu6N41qIdP~<u4v@KV=&-@P?-{S-rAXuZ#K7IvqU`mC{tYr8*bHSMXItD
zl^rT}DZa~J7PPj)(Sjbaj@^2`dvm4!P*|iQ!vnxW$G3xFk(F-uK;oGn`4G?<6Ckr)
zL|7R+a|-NOz0p1prD6^tK)=euh<7PK5aE26t_h_8hZ2Q~m5!*FG^P+UaQ8!qC&cVn
zP0fJ7p3snxm+CYHB>Cre;om_@KcjP2)&BeRDAd`!yX%&L;f0beT!-t6XFlI50CmTT
zVLu@2p5QEww9m}+eGKm9JO>ys$1!2j2^7EhyUs{0J_*VHk<~f_5&oK+0I5E0MU~{M
ze<z+l78Ts?e<x0?N~==S1KB^~@6Z1I0`U9yG@=bKSj*Oa%mg-TmW-@fi~h3lOwsVV
z|9+nD<7n@r$wjpx+rZ^(KL))3Ne%<Ncy?n%s3c#nV|cv0>CC?%O1y`yBWQi<O(l;u
ztAc%*p`s{Ln8|-AifM^|-Fn%5K#!@VX5$};SlIl`bDxf=3$dvA{MDQZ`w58T#mZZO
z0J|6>dAA)BO*5OI;zCygWOnu`ZM4@8@eo8cWGg0S3*H{*di0scbRq~5+3ai!LTr;d
z#8!RoX^)WR$snaG_5fmP0t9<Sq08-N&YK-;7T5)vs|0gs!O0EHl<2=R!T0B};>xgX
z_Bc+_PKb;-VG1FwIg2%Cdiag<<%(n3DdE`#xbb2RT9|F5!;T<fI3f&r8>v`qO^D!Q
zM4T|XM8v}|*@t@oFD5n(`Us0JE1fqMRT<uYGLxLzS8Sct8qIOk<1B@2_Y^360H99h
zW0VF+AP)Y(FU(_v0xWa)B{ZPFmUC3;)KYq^;_@eRf*)iP_3tT;S@`#Z>so^sL%X}{
z1LoHTq?+GoA9Ho{8_ULL3$|Bo$=OW7m#AiD>jV1w;vQEb0;^ki*gPchtgDuviO>b^
z!l|v_lC#HaFB&@5V&}`~V2?Hf)SLscs+v0mGpXOpi$M*TmeNtLNA=ybWXFIurjvVb
zVL~uMWD~G$venO-=q2fcNcN$DogWF%u4$Ku%kEM+vFX1E*Z!Mzqvf;n4zGS6mg!uh
zL7U`u=}5YDuF{J7^hych7Q&|<zNO29Lj_L?Kjo6C#lbEJNo&_5C2YUvKpl_pu-?PN
zzdrgZ*wK_zaiy}fRvIhnm&Tsafoa@Ks5C`e5V}w1O|}Z>!z5iW^6Tu+Nxo&8SGi<v
zv1!~;A6+t;3sakTsE~cjcv|+s^M^UMKBZbcKEWt~6Ua^;451FL{vCy?jCrZWU<=IU
z<cWQh3%-ph_Qx<xBi3TXIbM#3FsDnAGNx#*O*$Z&Mdb%|xe&&(!!HsMTrb2n!|^+z
z!-L`o96zE9sGxftVFqJf1b^azZflG3UoZ`5*=~@l)hvnQmi$j^U;Sh;0fI!FKkpSu
z^s38_>2KLsZ`8Zad<S`sCCdxXj_I0f?t95yuL2?TLKqkh-U)RcUA{a(n2Qtgln1q{
zK{)w(*ZcRb{e^6y4DF_JNF4XoV9p-Zwj<S>$y>xai|IPHGl4Neg1q`n`k-Xo)l-ro
zJ$5ZULr+kY32)rsU?iOsi2yldw&H#8?QbTQNSM+9US6m$uWKdKx8mhud$6%_sg(6y
z@Fy^ImNbEat{>BsQc0gcyVsiyTH1M^%^OHS!a(VDFku+Z=vVGEZ|2P#izps;RHQCu
z-{@4pjqNC<VX804;wqGidOSnX9S*hL1N}HmvQ$>>rcd#ZU8qit%C1H4&1I104G{1$
znD_1N{QkRTyZ!T$5ToZp!G5t_SMPEAYC{7i#vBln4y6j%*9x)2#q3z-3i6~2<)W{I
zO~7Sv0~HKUu!ato(_s=k*eLaTwb=p#3bZ(slrWvMf}=gi{NsXU1lF=rU)g}dwsvi2
zyLPmw2Zk_Y`cVy<uq_eTma%6lWY26M1oyK)EeSI((JtZKFmW(n^_Nuy{{jQ>bNczq
zMo=yC{fnPn!Aq}UHdQL3mfDSOmejr+z{yOWRgz?NaRa2>XBuicxzLuspMt%NVK5qr
zEEqglw3M0uR86N6#=;q$;`Ji=y$&RF`;dJpOk4|F39qz!)}j~P1Ixq!veR#axPvL~
z=o+F(hFn@-Ph($fyX1eQ9KKDMq8j=$v-(}K^i=z(^47kMASKP7=!NAsh`&r@rPV(1
znaWn5l<a?a#S)~Xb<Gh~fZnu4$i1(v?AyWd=T{<)42MrD8Tb`I8#C}?sQj3FZxrbK
zSo#hOw4`26y4bU$PKrt&5IyJuB3<8b<gB@YyYQhw2dlqM%nb(n4oiqzH3XFrI!ZnT
z^|7~g-d0r8c_H+rMr>UK`+KwZ_;HnI+u9Q(nQd~!2zZmFwL0YtRgf8V)*2L5wCYk*
zoub0^#&1qm>83;`IEIKG!`W5N31_XKuNnTSuYF;iFBAIYDR8C3P&Fm|OT1Soj28sk
zE@}LtBGhl7Zj5P)lV{IDjLo`fPE?`Aof&y%xY?g|6KM$9PQ!KL26J)4*n<kgzR#I+
z?YRZCPhrAz7`hH@nNIfRjVZy9Ej7!hO;NX4z}SZR{>_Edb|>1UVx)9vYyxH2!^4$c
z$Sv$}<K07u?@bi*$91IRT_!sIwb%QyN!XF@ZvOWN=XCJOx6S7kGqL89&OV;7g2TO`
z(rS^56x2n`M^2a^FQgXsMHMPRzS7)Z0yg;Z${S2hxh<8x{9^iwbh`@btJ@rJ6tj(B
z+&FkW%H;ro#*5*Ph1Ps^aWvHeVqh4c`t7mqNz{gSWK9d0jD~UMp8~337Z?()iL{aq
z7A@XT)TIO~SL|htfQ^M#N<>s?(ZDG`QO|t@{~p5c4T4;UDKe&1z(>!;c<^aQ@Z<Np
z49GnOx3!FzY$GO*mV%+yGG%eq1j)cytCT~zjk}gV*@FnkV~R(8CX6smV|ha`+Qb_h
zd<bX&`H5l-Dg}hnSgS_<g^87Kd%4osm~4FA+M4EV;`ms-;Ck;qP<>^|Z7F5Y2S~c=
z_gKW+o!>X&2o(@^7&5)~zB8q$#2(tCBN!%l1XIQHZ=vQ(1vxl)fO$D0kiU;Iv2TE>
z$YqXe+OuwStybb^b@kJ{VH9l7wt$|oR200jvS3_C^h{?(K$l8DR~~nkQRl@9-3}1G
zd{(T+D^ZwF#g2Lfn+v`-U+?_o&v!37$95U0lYpwx7Y+QjQuB{+Og~nryM6(3ZI|BX
z+KP<>wsrXSjA_xwwMwD=rMnJ*2&wkUxCm!J7V6PH(b*w@cl4}1KhoALEm1H!!z)E#
z8qZY*2lNNe2fK6)1uF@kDy3w9?cKwas^OG;E87&_NG3~}(hlaQO1>qttXt=&iHUT9
zmfuxY&nfzz7dhYd1dsO93aK97R32Rv^lkl+Lc^IX+c|y976DQy1|j}-`wekot*{a!
z2|4rD<;J%xTGSQeRn*l;98XY6Bk<Wf5yhvu+l#a?V!!fgrSqV2p0YHQ$)Cuna?LGr
zr|GN=xAIFnmNOJFzp^UfK5dFB)1+p<x+n&$Sj+wK0y_$*n&{29Xql?Bk3MXl3w<}$
zkhZ>!voF}u>By_0sqcm{tR1K7FI@NfwL{;{9Dm~>d4Slyeq4-X1z+dJq5F5ktgn6z
zn6)_oAF7?ocXKrYo3*IndCxVjGKazL*v%(J4iC8bRO~bVEdC{xq7zGn!y^OcIKkNW
z8t|8O2Zjvj{F*K^9|7IW-pVKnxDT!>`RzXPY2JjnbfOD#uw#}uj%6uH+)7$W8MFz;
zKGVQF8ymEw_~(gh9eE~x24tCBo0U>Jt_|Ubx-AL5XBw4q+g)bQz^;I!c^HhYC%?b$
ztSCp_k4+BO`6N`}CVhT`ufAw1ecCfb?XK*tRrwcLVulzU2QFQR9@pBvgvZp}fW+)-
z#7Z~W2MTismI&In=Y}?HFPL<LItRQSAZi0C_#1}ypa|C68!`fW<bow4be*qk2a7|m
zVe&Eu-MWONV55op#95UT;ANcw@Ue_582`3qHjmc<6yhvDMM!$KebNIfvqht+=ngN;
zYB>2W1_MHd(XAp5Qp)kHFz7f(%ubeo_(`D@Y~j|qc!Y;zVP5XD-~6Um+h<2hW~Wa8
z!{@3geDF|dG+pG>rZ2&EnFoyDGe0jk%@Anb43x}Uf~GcU%g_e<bU#v`5<?kIz3&{4
zHLyGsiwrt-;W!i@Okw|8#kX9NR6r`Fh}$Ti_P~-crOP`uie3^VFUPVH1Gt{-Y7)Xv
zr`$5T!?*rGDGE1<A`F}S0O6DQp}x=6&Gyno)pSD)*9XV_hOFm1WJA}#MY^Fz;9{9*
z`j|43r97Zn@ZLsmp1cRFV=#C}2&0#Ce~y!6<kL0)%2LfPxfyN!Q@cboyJ)hZPpyw8
zvZ{)tRkc6Hx4xN$<H3qAHP1hCGL~9-cBi@9XqTw{f_SCbZbzsrwq!oU+sbx_oo0>t
zP=q|%15p)KRQ0lu^IfpvX<j>B+<wn+UU?;xwdhTUWSA;(U5ewpSWQ=(T&A?#ULqxn
zcniz-Nm#TewqWz1V9e)E(-6m-z4F6yZ`!C4`x235NqTe!`+q{FPA75yomjQ^r1})~
ztSkk)cu9MZCr>e7RfDa*M4DzBXdK{Z2v-Yn4eh};9oS0NBrczhTP6`ng<fGRK9@`Z
zvqJJvp_vl}>|wUD9P$0j>2X4sDb&-m!>{*U$qpL%`+3^IUF$vad&9f*NPg9|^h?_X
z>eoyuHK&VYBu|0GZRGYw<0lT3X5#s&i`zXUeIE0b(9NZ0=RXY?Q^0xhqDcIxd&ESr
z@m6UrrZP9?ZScH+ZjFFbc@n{pdASq{8pdWL$7XBnnC|bxc7yV;QJshW^vZbZ=1N0A
z_}-*!Pxc{NZbqm-eVbP`DBSF5Z)0ZbWQ8i;?A0aG+M%6!%N88wyNxsC3=xZug4GfL
z);Tw`a6v4Dj;<DVaMCoW0=lFPw;VtAtwzk-_)aKX=?+Co-kOuo@b2pH>kdki2!f<i
z2zBEd?7>EpN-*{&nActA!XgM6OIR=|i?>J$%~Q{6Jz8q3QO`2!(sH_qyN3>rUm45_
zMU4Z-{3qAx*}HJ5PD$p?#HmJ<D@Wh;l;~3M#^q_Cr^JXv7}ePj5xW)%>XG92eMl%X
z*}dNyRzlP=la1-lh<Zm`SI+&ghs8*xb6>(~u#f_vg%sMG?_B_plA)-QBC2Guv5l74
z#<4hrATEU}m`FrmeaG(+Fptpy3+%Y1(k^im^+W>O4mZy+t$xxjbSpY2ji&IU9olq^
z1U=yhX}1dL_EPD=K1C$&+2VFyiuIn)p!%?%_(EGh`3dDtG{r}9caJzTYVc!h^>1&4
zF?Fl@v3%Vasx!B_Zf7h`#I{}zwk+yax=j$N8+EH+Wi>Xy##h7ck~MlX<8cs{IPS%1
z^jX>=nZzw0^$f1Z-QhzXD;NS(2$tZg2k&YZn#@3(jTc{ntf89y_}z#VZP@-=lBZu!
zZJ7|V?kXeDR*y<B+(?Cz2d=FxRIewc9PHS)?K94jX|_*$YtXPyAgu^xqjNqB*%Q?g
zmyl>$oc)*f-2BYCpBm}{!R%M>gIVpbnb4CdA;#4a=f|^pNXE->%6kV!tf>A?V+ZIH
z2Q+#W3_E2@Q|vj(_iQht)Xow<Z)Jd8;!1nexZA^0%cWyV(v3yKjyeW>crV}$8X@rE
zwA2zUBr}ENlSQ_<xuys~Z7Fd5<~IJ$*oHU>sf&S=Fw7<iAf%WdrvQ4#e$F&lej$sV
zooDy`X=c3h7W>Kjf2=L%Ptf(hrxz&H8zeawP2*YSi_0yI=*$4Ffxj|$!nvQd5XOID
zIv*h_MHo&wcfW3mu#mEx%(0<(`~IM4zp;pkWg4_dTL975btu%o;hVG{pa{#Mctk8=
zQcn4fU51@L+o&&bcb6(`l}>s{FXlmzq7EUkXPJ|w;^16q8zpu)gk*@D%p}0yFpRi6
zS_=7n(ZM9aE0U_D!b1{zuecB<CXZ2g1OSTaV@EG!02$8rmc?=5Q$<gyz1<%vMq|nO
zLJY`qH7Tz`2R~64zpu2IDW)iKiz-Y37VBAlm35a7KpS%x+Z)OCF6H8+T%E}+{QKto
z`(2HIczD^=)-jpWh$Nj~6M#GDC2O=7De5L53i6Ql&qGOzD74~HJ<LQcbG5W>n`op~
z=*ebjzH}?+8QrGn_bv?e{njmgT3M-W&`-s1qaMFK15)cX=))iH?OJE<_iC&PW#q*R
zmGVLx6N@ffNEhV|kzjb?8^(Otm@I6;V1?!NP*QRLt+2z}meMy$TY>yOxchYGq#Pz|
zDM1PVUR!8Ku+5^1q={NUnUq2&BKUjroq-bPPo`WZNYNBjtVeVNvCf~x)YHCCv)fK=
z)Tk)H>EgQOrTn5)(7GxZ&Qd!!)x63U@GEBX1dNUIK+#nws5rsC(RQWrG<V^79P<?c
zVnEVK#B`~+LAXiss8WY_NoOfu(2zKvW{)08DsV625It5JfyF(3dZ>%Qu_~>U2yuv)
zE|*!{n~1R>CEFKbl5uVK@#VN<mAn+I4gfw~26)`7bO7bli~uL=<J($~FDJpRUax-N
zdTyx=8f!2G+fyk<N8`)!z&HP8U0HuVY4aZ&!+xQ_ujp%w9?fSJH{a604z3juFLEj3
zkzl}<g57<K+kj%$i1zcs&60v})-EXsG<1S-fc8}#Mbm}z31f2!8{e>Q`xGspQg=_A
zjKiis_l_mD{L?%3v&Z|VckDOFk^Iv;_D}EFpH~lgpDXyMckG|uv448U{^=e28EsGR
z9s8$W?4N$IpQS=RRzUbqzt|sE3i&uC@lU_lKmB6=^o#w|FZN4%l>hXL{nIb@`$Q(i
z;_om1qy_MCRmxxL9qX)`+pJU&;5o2-ePiPpe~w<q2ZdXab-z})MaiJT5dDLuQNEgx
z7RgHnQlhz9RPnB^D~|elN}=)rV)2=-V?c(os-83~l?({2ZrDr}g$XuH^`~8BDo&^y
z&up5BSqGFBspLnmoZ(6<AnD22|DN>ZPrYVt4P%$n?wo4B92kJyC?LjCnj#_16Y}4q
zd4fSM_^`Qc`y5&6W%tV6r|I{Bj+RZ|{VYiAvgmC~#+0VzU-iM@OQnLfdxyCTp76=W
zR05&P#fU4w8$tq`+$uI4y5<`+=k3(BQF!J{NNBSoMS}|c3uj}@A;C?qJ=_oaml?@G
zL&BT++`*T+kN_vukbVym;4HQ@$NO1_Fctk@)Xf6TbD)K@=;|U>(9dqWsGm>JZ`)=m
zm~Vo%)+w8k&8#(O)vRt1|E#Gp7mkY0`$<!U1=<~SZ;91_T*@e=gBL?WV8qvJzN*fj
zCcxP#Vo^Rx*)i)3=x{KCvbpEtfHd!R1$mKDL65rKC?;*6DeZNHW?`Ji@rq(r2!C;K
z6Asi;RWOiXBUR0*Hhdyds}?0cA-uFdQBNW)#o>5NsvDbv=%L9PNvU)Og09k!do;7P
zH%NEwLH6a*rkiK6!T!G{r}>|2oTjUAMyUn7KKAOl&D?(OY&{0)qCF7uG#~JT&i(+l
zxc_8J{bF;1{W^HlU|N!fN}u@(>jwu`CQ92&GZGmSZ_g$I!B0j}6+?Kb&h*ku`K|3s
zWyL!!VW&h19{f+4=TVVoQ7b&AijNGB_)HZP?bj${P1O;V>3aK2)e&7d{3pJ`LU`X!
z%52-$IYHWEgU@r!F?v^=G*cW#|96SQ1kHjf<gz`In+65$w^~_le%Fmg!X(9g6bnma
zpZS(fQsSCWS`E$Gu6+k+t-DiVg_W=_rv*$d^jQ0luJS+yDIG)*_yR<1RHi_HLN!?M
zN^zfi+8}VC6(k$G7m__)&>RL5F@@dFKLK-SpPUw=KTRxCWI^L<)`x>a8@63aw7adY
zTarcVHad`q0UaAZRp5X=!E=(dBB%)m@7_(6w<ha6wZe##D4vza!@y5@=JHLvrHSXN
zv*NAK(lNBb1P3EgX6E7Ku*nfy6K}H4M*UQfpYMS3^U+@hfWydzOL4D5$mcQ?`hlbj
zP#Q%wwKe$tBrjvAONuNsfoJC{NXC%T%rk7i^ds9y;Qx#qAT_bm$4K|9HlUxy6XjoM
z%{&^-^)Mjd_6Bq55+zxTh|ffTDowPqK*ZTaxv~EosJF@A@&>B6iOv9v*7$@1SV>Ho
zg^FO=Zj>KZXm1^0Z?gpDF^{tKD4!Ai&;ISLhCf%w0RDU}c)feUl8mhPjbX5dmsw{G
z#$i=;VG@a9e?IHnO$&6kxdUipq7oF~+kO;JwbjRAkPj#RZ4G<6NFSmFu`GH{ti>&Z
z<ho{B_oyqau*~|{*(y<Q#TN+?BmPn87ROXU!H2}~xyF+$8>l{pI>hf$Xs@e?2F6KX
zx>4sDpM-;n(TjbO_x2nbVK+Z|_q9kR98PVohE@k@$Rey9sQ!6Nc;r=*4JTB<Ofv&D
zrz4-7gM_ZWdU!!LEwqOs+ho13Xu`@V(j3&Ou;_VBzd`-fwnfC>xlP)9#7!N{@-aS&
zQWqEU$WmvAwl1l$$Uey~u?G~$7!dJ3gJE9gOSC}e<oCJ7{_)<=hk(v%*~)r%s3s;u
zD!fNwEUfTP=DZTE3pc9VFqCTPq#exXv9UWPn;y+*oGb~N+pkn0Ih5fcf^>4c5bBjv
zN;g8oXokx>^_x-DX}@=+EGRZc>z#P2_|^vMyHXWbJn3C$WIo|e?~igh)$HtdVme&i
z48wOn$n+!ssKpwvCmdBwE>|j8_Kh=9zMQEjT6QE9s+h5bH+8@J88SMF^X|w}n!Wwq
z3208v@OMXzIIWs@*UpZ#es>pM9OTV+w~cX2XnJ>KE2jPG>5S+qsfQBggMK)uAv|IQ
z7Oc@9fV_&Q^wS0+D-i0yq-CDg%}`}?SAVyu##y+ZOL}fquo*|NG|)~0#Yg8XYzih&
zkW4w^TuUU-rrQ_c2p(yl+Ud4AJf>n!zE)wTF1|5bTWkds3OMGr7ePB`Yh647h?r+f
zu2<Fog`a;rgJc7cwG(J)8$fch19b!-e{L;(?uY_s(6SrcOxAhQ^DWRgIimr3KMi#C
zftU2!^>6~1MfHSpBMv~;HTOTsy8c{Z^2hXHxAZC4#qTT%GoCakaVc`CaqR<brYN}e
z(7-wlBu5Q2%O8ri;FbLv>f>(D6qNl<SGG~2tDL?M$z#|+U2<2{iov+-oZAnYA?v?a
zS~7ALd+S&n>#*hjEq&SPtWnb;FQ8mz^J-pDwfwN~QY+hsFJ4?JIA5l=_5Y9}?({nb
zGQ5UO`7+z~{+r8s3cFAdG+BL*Si9R+|FE5rfYIcenGbqDAeD0%0ar`AN<>e3S2Vp~
z76pjA6x#da%ItMy#bjf6%@Ci48V(s@cQm~S<`S3)p5!tU@m@+7IYfhzb*_Dx@xF(m
z8+50x_||wp!A`odWt*e@5L>IbFT2cPpAK6?&St6mz!ikZRL-X6b$G|k^S~?8j1H%w
z7mU$WT=^?n>s*+XpM`1_R}tp8p3I9^k&MCSy6)I*<F6y&_Xy>Lb230cFii&?wWmb-
zuub1r)OQ{fcYK?QEzP$etQe92*@49(20H^1qsy%KfBuskT?NV5N@{Cz$o%1?P>w7)
zuwO~cr;7NlBrz9DG%_HX#^7qCUw0M-NY2TUB|C1($t~!Ic7uRzD}Yw1@d?pRn$)<{
z3tjcU($W2M6&tp_3U+bN3lu;>iJ<|mc0W8x6;1qqte#}8G?*2s08|8@>OdX)z1KnI
ztmjqv$FK`-qSjn%7jJgDHzX4^8IzO7CpA`AG(nxhEnck!AS&Gkv*9)|M^MzWLs3%e
zc8qH~C>9~W6NxU8U;N=9h#e0(wO=-#z%AvH{UsQ!SJ3Yn1V2#5-Z4l^MoGrQa`6Nz
zw=a82u$T}^A&<Wy5H^9B|Jpn>uzPAy_;`7Z8e{_JoCe|`#zeZS4J1IEN^km%$hqYL
zX%RbASGec_Ra8CB2{8F4Qnyf-qVza{puM){20j)DRVA5LgZReI{1!%0R*KzOY1Nqj
zF*RdE4YJazHnWsFd++7<jVRC-Dcg_8Nfg@QSBJK_#QTQsSQ}cmzg>qjnojcC%Tn{&
zs|Hk!k%R3HNU)_1yzjo@UR`fWLtzEEW?dP&YMtMGu;ERTPbe0s9CIHW8UGRh#luWB
zp?=~L<|b1(Z>Qzi1s-dk_5#2YNfQ&P5uBlz!!aBWVwlsQq-mKI1)Ro8+GGtj%JGQ3
z^ZWkTCA@TNi5`fA6~VaYzK#O{+x9PmSPFLHt4R|Z!7e!8-<*WD%83I#&LMsGC!4@=
zRWJqUVC_R`wL;3t&<+Kl7n;Mk1-k%WIIaZ>*z#fxY;21D=ngv&uKPe%8Q?nU<xw@I
z!|YGnq1RdEA#A_4_I9ywELhsgOc;jS=wWlq*cD0zdK$A1Xa#A}Ro;AtklxpX;{P5B
zPO$7;&L*JD%Ye~UVn$jVROQ|Gff+>6!e;p)S9@Mrc!s}|hI1J3GI7we$!=PX)*+;j
z8eJQ$J%)=hX=IDcV=l$%-0^&Yb+rYNQ{GX@7<Bn7zE~FnyMRurQ(1V8tn-?^b&_X`
z{rly-SlFQHJ=~BsQqBE9$wL?VmJp?fEGzhTH4P8=sNKeqE|{(L5vbdIc8YH&Zmax6
z)miXCZY(a~H9RG>&ds65+mxa@0tBIx30|rM<#Vw~o7h18c{7dYn&2T66jtaC3R78X
z9eQeD6OX74_!jUFyvW56kOO+f=riCSn<75>#0M(TCHY@GROy5(In$rCsQR7G{+gbF
z>d4tsY^7C~I~<uyJmg_<b3zN<=jPH7-M9u@iT7($<v9J>uT?7*Xf?b}TlhpwE~z09
z@BCLT@{6}DuqW$`ef8q!9#}|%9f(?NlsrtK8`|l=(ynx&guhdt)<dI!lvt#al%E}3
znfU`h`elq{c(X+g-lEC?$d6$pcF(DZfY-6qt+f|He+pUDT0%pTCE?WYSY?~aSK}!}
zzT}=%V*gw!xs7VCDJC+yL;wd(9(p))a|4C7ClLLU@UJF`_7+3Y7Yj5&EN*7N838E3
znx0i}2V(t;tw|mlP>7xoJNgR5@%3g{0|)|a8@XT!V)WRHgKaOt$K<n#g<v0}k*p(h
zGbs2!V9>5d?Nn&5ztB9OagdAs0V6Rpf90Im_*;J+2Jl5<tl%OzDbb^KAPry#gOP^Z
zd*EYiy_pYy39V-n$+52GM|*^qj502O_<0B{eI)8z5Ql6}ly3@n-RP0NWs+$IwqXjl
z#p+F}0TFJS^PI+J^l3ek+sj5v2PP2I&RJ`T3@JAKvtRl_sQ^ER|C!t*HVBBK7yJC#
z^ZlkfJCqF8xP}RUbi*b9j2C|`kq+D0od4YPu8nv&#?9lAO-+YcO4jk?s}7E;kyhLt
zyjs3rBD2C?KJG^3j-$EW!+|S#=M_BTQf_7{uJ0pyn(;MTHCHg1^M`ra&+NJ;M#PAP
zz4qaxGbGRBxL^wS$QS2xqJpG6Ixo>fnoT8mT;Lmwh?;`JDDw48iXimcTILV8k(zIm
z=Tok(l+5$F(j#sUT1m0U(~KdDX%c<~5*5yzI`^1MOBc!0aywbOlNsn`a6+NuLWxod
z{X)I<#gh4$q*)@FqJoFXPw%O$7%+jKv0S>{U{l>q|JSJ-Juja6UWN0}>SX8e^RIOV
zcV<2-X}NyKzTvXdm7vVU>cvkKs^`r@Y3%Voq!gw6uNt#k6OYxlXnqnCyA(Q+#V?#l
zsbX7!=PTBy3cP~O+UQtHdn|N1_dj~N-*jJQOR#ne^1JaXpU(2w-l@>(k$2`(@Dj*n
zvc<LSehV8-B2QT}hhOG}C7iNGMHz_q9@#|nnLPal`d7C)HXuIfL6ZS`vpe)}WS<$O
z?&$uZSopGB=->TbiO93-6F4>aTQ=Zd?}Yq%(iY`<f0TK-;{kfVlXgC?yX2L(ECJE}
z=t0vfM3Ux#vMi&PL|y1&DNWzlK|g+J6q^KoWSwm`2wl^Ba$rAntz_@RmQgp(Vl&K9
z>%p%qi1!MhOWwok9|6D0c8+Zb`(h(fC1(53GVnMSE5>GxDMwVL&$`%Ys_!{Y(C_KK
zL3#M3!Z(~$wa#?N)L_ujAfK{1aRsrME-^)2qFPP;!G(YzAi$k6H*(EafVU&?i_1pf
zMEu3E*TcMWp|4{l7|;pb#b!u1yY_zaz(HZSByv`OOh8|BA~dOG=l6$<vhPatF?KkS
zc&UI?Hqd{GGXR(R;@Bs-^hr8{%A{nQhayKnY*FHa(5fb_=p4Fqqv8Qh!efNuBqBTl
zho{O?q__?toi<yv_*FhMRQk|R6`a+AhN>YbY<YW|LcM5q?3viAF|!);EHfux4Tgrb
zhHlY4_G03_WgRd1*PC8`!h9?GYUuP;pYs8ay*HN{>_F7&M|Q==5I*>srY^bGF(Q_(
zUqYL$$?))|gWv9-^fv>iEka!Ed|r7k(ibf}FrZ+A-rs(-h{Au`V-?L4aj!>c=n|rB
z7~mRxiNjp)os+Si^!d#}HVPscQPWOpo2<c@+x)Q*ZOS#qV~?WFl?D!F_s#`SVany}
zJ6^V<B~Lb56s@aa^5dQzFEhwTLw~rjt&9)H9bKB@tMUIa_w4~qmhJx-U!%exD@6pP
z;w?%PR8-EaGNJN7rhsyqiZT^s9E1UbO)QUqyk?3hFq0CM!%*Z58-jqGHW4`qY#f9E
z0|tySc6#=^?qR-_)tlA(zQ4a1d!FZho;`bZ-`Dl|e6G($Qf-*HYRTlqzv8_>@OTVu
zFyXw(6U(>k%IHn*EJYhCFZIGrTN-;#xs}%zLUJ;gk*79Ry<y=ZT&~)$ggQ`GiicA)
z<O!7X4QNttLdH+tlgX%pR=AKtB}FZT?0pxm-v(Vj3+IHcKTZptr1tgA^LS=`J@(32
z?M8PqnRp}hSWcGM!eRtRJ7rBvrt7kX?Wy+Ecy+u86T;_}C8=)a{Yf5IDr=`HM;}Hu
z^9)(<Us2>M<JFlmiA+pWmb;fXYPG>RZomVC-AG=T15c}oL0e&Ca+exA04rMRe{65m
zAS+-zWqY9cGX6T!v9EZFX{NP_?s-vQmXBqaJ(bCk2-O4ZyI4dx&a}*wie=ye(o1Ty
zGl<Gd>gLq$rZ-G}9HoSdxf0&3xMk0B{#sU~HfH$<&Yg$OotN5z&RwS;=6@(v)41gD
z8yV~jMXqIXe5PH>&ZsJxE9RCm<|;O5f4PM)!c@?dx%(6)-48`!M?{HSo?POPPtzrc
zwYukoQO%+b_-q#_?FG2dYCH~$SEDKwLM~28U>@b*%x1QJ-*3E~Ch22hVzv@{Go)H|
z2acetICAoQHe3Z+zbtNSc$yL@lDO-~HcW*UWK;$x;q)`ZHwz6+*^~M_ZAagfrbviS
zY^x;|jTd>VxjmE1*nwEU?E$L>Qh{yV-?e+8;o+POaBJ^nY0lIKVk1pO5wF^KwDef9
zVAkM=cL%3Rv=Rv6r-RY7VW%DG|Kv(il5u#QY%*)~ut8YU9Gn`pX_Id|-0UB;<Y~l1
zIOd{KKiM8z1E0XG?Y5pF&dUc@#RLlsnHD$6&aPdth4sCiP)KQ~J@ImFk#AFduRJXU
zO-fuMnv|ITC!_^ntxN~Sf!@!20<rAe(+h5mJzAY(DW=Hiyti%k2T~7S+>s=cD{@#g
zx@Ipax)@frP8*m(#7o@n!1GNEdW)>aGIKOLtlLrJn0)ibp_asRr@RWX(zDD<nL#_J
z0yA28OMT+v^r~-M^dPt_wWp@Wdpld@vof|(4l1Kk-Km1OF)NrYD9WDrsW#yk?Xdj^
zkF3^2TTx=XwYR8dEIGu7cJkI1LdHAx5&VrC?*=~9O%J;g(wca#maY({VYNnvt)8BQ
z7W!`3a3Cmo$Ms0I7$!WC`0h2B@OWA?b7l%jDYxN)xMa|+=Na`u^HKb<{dnK5EzQk-
z@PdDUwp;b$Z+$0YH={8#X>}gIEZFlhA$HBGjWV~Ks%3N^b9liHUFFr_b74ByN3mn}
zMat-&l8X&1MtvKkIkPY9I%>k!T-x=h^`eLnu6hxXcXr~y+KSusHGKA@>V?S9xNr8-
zTWJ001$=JvggBUP0)`J89;R9Itk68)H780fOXmyaNuBMZ*Ru1yHsq$3mnaQ1md2Zm
zB#%ljS*G3)Gld<eQ`5bj!##GT)`mO7e!{rEJ+Pz0xto)5V}r-jZ8WBex;Gz8%3_5b
zja$ldhHWg1(`TDczoO^NN<ZLuG?}PxoNlmS?TsW?c+-Y!x!J{Lsoyp$`VE5xp^U(!
zjL>au*Mhv9ZI+Hi_F3+LQ*7CrIT>>2qLU9i)7b*JVgY&JWw>I>3HA0kQ~ZfQhV+(`
z&-P0^E5hb;wEFBN>|j>XQe5ILyA)oMl`~~kMT9NeD)@$c&oNP0O7OroNzFX|ge|ZB
z`s7*NmcDRS;?{m+^Nea?l6hJ!3>5$C10~5E61oB?{0~Kh!w@MZwSRt`XGQl}WRqAk
zfR@A(rcD_Z$k7c6T~%@FPn^)Zs&($U4OOtL7X(6xNbD45z>~=xR>P@PuYUtY6EkPc
zj|<Nv7<I3Yy{2LRPOzm0XZPvDIxDX%nK@D4qaPMy^@7*k6A|4;tBwbEFCTa*Y`)QO
zbo@PiR&@S_|Ik79NmQl8?Az0!-H+yY=wzfYnHbfZ#hzc%c4EG$%yEwvugtxE{`fl%
z57iCNSS6_wS2~u(EZU+S`N=?lR!OD)9JAe@*p1Imtgsqi82o=F|Dl7hZi<>#f<Brb
zJJ9^tv4xN3$GuwJ4&ogrVvO$&u9r_(@aasYm^Pz9o}X&KAu6LVITmBD=TDyL{3v#c
zGR%u79!eFQC#PwRhs^PnKNPa*6RWFbOIALZXch3ak&}p#)URN)Cs<?UgARp!BIKjT
zSI96Imh`|g%j~(b21RTOHEzM|vJwK{pj<}c(U!QXh77TVhQDn|Vy<#p8Q48Aud5#0
z%7T<*A~`6sVos@PLAZ8TeXnm}V?m+_8z_rAZjm$1#c*1kN#9*+r%ENlkU5O6PNNfz
zq7#i?6QL8W(wK4}ktTuzCGra98>@=DlX^A<j&eEN$XiN9Nj<A|p5Sv~J&(;31a=p5
zshH6h0|ICB7K81jrsF)kaH^QA<Z#*3BRtvcj$9sGTpB|uSBu1!tgV1~-5kX&^%hR(
zR;r`5Ty*S1-;(+k_7~X^d*0(EH?FF@{UTFj{$|FK#F}uz?Tsrl?+_L9=B7LP1aWki
zZ{6rS?v7h`XZTEF63L<N&8Efj|JtDrQK^~<`rLrGBGkN+nfr`s+gRt)FSK;$Q;r>J
zHZr#o#Ge1)>YABM+IKre_cpR@Qum(P|0;z~cz?Uys<T4YKFC_W;!1U2afz>gTw?Ba
z^*?^7{YIr>Uzaukean3GE%Vcw(YO4u#uYu!44UVGhJeAh7!6c+ZTcI5S8)~JeKoUI
zdC;z8^W8*j-f^wo4tc(4+<HDiYOZg6f+*y+RPaAZ8e%_t5KSak(>JwY^Z>XWn{G<O
zRbG)E#5y9R7vb1QS7L%d(MROmpt><fTRS3(>8&z_g2%OO6$&3ROiy?Ovf;t7K<SW$
z$L(Wi{_6cuq?UxgKUTTcC%y2M3fo>S9doCQfu*r)e5`BxiylE}-J=kRx0Lo4O%{}I
zufRH#$vNeOw2SF0TDDtJ8I8`>9$d@It#fS%=fp0-3#t}7F)I4o4`ji<vQZ^#M^Dby
z2Iob-GjfT1G--jo4a0P&MG}gouc9dL>ZO)ju&8e^xUbh`ab8&IcAk0DxanSK5bjb`
zDw}*u9%q;6kat$rKB5=JIonShsabt1xs&a~9TcW4qk21CJu~{4vR39m6Yr7ekypj}
z8H;n;Eu<yJb=<a!#gNEYK<ph5?D@~+@G&lfaUey!CwvVx2GTuhaq2~$9jx5k-R+yl
zxMSA_!kx~$3iG$a+45~}gu~gI<;%~HKE_5*KXVwv^jgv?AISFN;6qUQQV1}2&A4c!
z=y@`B)_TpeJfUkg%=Yn_Nqt#s_ibZ-gO5*vxPzKB|L#b}dPRHvJh4P5snGrCVeo*S
zXUJVqoVrv^pQjb*NM37RLG$c>D?(YMZqD{8P>9iP9=NEkVn3htZ|L9MX!473XBsGY
zoju$IVz`2W5VSBEOqHN7cHl5)kTLPGi|HVz?d<+AC5IMwK@#>TUpWmXHCt0sGS3xq
zCm>cQ_3ycmGhEgXaz`+OdE*r#8IMNACTx0rgPBUHjNLjHPo6Gm(2{RPL6~5wYr6G-
z24$s2C`>5jU;5^~xab}4w88`X8;Z(a)3s@E-r(CZ_t?F*{aJ2qIi@Q<soEUe{NQ3p
z^xLaKFQ%Vw$<Um4=4O*e?rk5>ohM?G2X6eZhX2C4JtM1KmuK$K`OTp`WDeqo&g}JC
zpR~@6<fQ0k?$G$negDSOqM;ykH~E#&9r9Syizzx~64%nVI5mZ0YMlCr6%U^iWgE=l
zQe&zycq4MaQh9Nd#N%2N;YFTOyJ<O_OI`iQRk_fF$+J{0tqwQlag=McvUy3=hUsE9
z*TTz)+K|NjuJp<s8!LQb)K`$DQ`eE#e?<yntdhNW98&kxQT`D<v8}Jh$cOzf(YTH+
zLb;lw4Se%XUhkVee9b8Cm6#bDyM0CuISUp}kRt9o#QK;l>lI90j$72=vi&Z1=p((A
z7c;X@Fq^(wyL+|!uKCn7je~dh<(0pKZPtEtqS5c@dU^7a+w)V~e`}yoy)J{+`edu@
z2+za9<BNv=o>71A@;*rxg+yETK7!dRAI)C*Nw-hKLyjLS3r?-maiYpS73-O@HM>Q`
zACAOqk6Kx;8!X6IDoEPZIX%MGWJx6Hj>%mo2f+$Ly#kXNGnA339qhauvc|Ytq}*?u
zN5@fxIY)DjTrd}M^QJ{<R1ABAirEtkcuOXXlc;!|ZQQxuC~ra_nZ;Om3(A{9)GUe!
zW><<Hrf26Iq-}+8S+>oZzYQW|zYg9uh^+0;v8+)L-7#&2VsWP*ayFxQJ1=T83ZJ(Q
zWo(6af8|hak6xD(snd$IbF}C(jmh4$V7kWs?-=;$H64=_b=U7ep}`6gJUP52iMwDj
z(+=4gnp&5zJ&B*NbyscI^&JVp&DU1+k3Gz&D?G66LJ;XdOTWjOr5g)v*6@R}7B9Hk
znS3$C$~Eqltz(-TijvYDj($K&^YLUkr<Q+Pyfw#^cVo+M4-=W1e!74Ws0Kgi0ZZ4>
zKQ+elT*{eU4AU>tJkZK>9<4m*&$b{uu+E??F3Xf<dhJ4fxm7=cm?2Tf?rR0mxG7((
z{N!8m4#t(z@?DB&ab$Z~{rUyBh3*<I*y|gapo)CmfvI6b$pk!EzBqIS@aNNFl18Z9
zttTr$I*pg{45`UG7^<-==zCQ__Ds=Xruwf<FY4pNM{bmF_6k5Kv*+K3GCvCo+v#%$
z%Ri{LK**ybF%%0WA<(+$kAc?D4)@2<?=4dhexLf4@Ef9H-ui7i5YUz8827mV$eX->
zYzjfcoc6PU=r3zENqP<-;y-~)C#;D|y;S%ZJE?90j~#aC_-;27<@Z?tFiZE)Sli%a
znQq1*2w*mr9l8pLvTw8XbHmbQk)8p)83YLbwxdMR)ngT)TG56>Wi`~{*ccP+$dx61
zI~WK(HXOZ{0P#g_Qqb*bip((iWqZQx1_Ucdrd}$o=)?uWB+uu?wP5lIp3|ZCTv%PI
z-64L6VSUY}DeJP&neRk%AptGViNq8XPH9+PwkTq6kn+bv6cb|aLFU4=RLzz&vAj>w
zaJ-A!L8alC$`*S&N-mTmsBsmyS2KeS(>0ea@x~;5G#n-ok}-L>y@ni9h6`Z!8+Ur2
zheF#S6xt45%|)T@ml`hqhlqw7g*u0R7f$}eD0F81j6;HFD21<v*U3m?Hb)@@yPa<Z
zDD?!^7X5~qlTYvHsKpP4exoZxwoYIJ6{iUYdxczMuax>ofB$4ax)W))06b|Yl}7>o
z;BNp6g}a6w5oWf0t1^os`8z2wjEN(pk=O-JReuszb}l(vtiqAQwkC0fjz@2vA_a*N
z-n@W#3T_%8$Cy*HcWakjNVhQFLC>jkdqaLF+HVp|R2Ng?Uh!Vj!B9FLiX4tl{^@&T
z0;LL-?#E<v!p7p}!}X7$h0aE4dl0VQ!Wy0=f{wF!5;OL=Om1%f39)p9Yp&2`6>+$o
z*~-YJ!;zs3AIo$ZgxPjlk<BtTeBQBO)y@2{jz;AR2vjV8tI(N7wM;#s%1`BEJOLz@
zn#7kK;cO#i>F5DU4|S`Dr<HzaC(nTts%~2$;om3#fcZO{iaX@pl?~Y$+FiO@DdG<0
zaK@hPE!Bk=d%7iZMFfwnT_!nX!wlKUD^e?#OLDNX%*mw{w-^edR=qsh$tmN<QG^s3
zKKb0dI;o}W{4xFu1%3PA+#TrL9jDz;dSVvbUDz0Bnl%2x=4|_%>DJyu_Ic9)E$a&K
z65sG5d>3gCV8%Bgj>TH9h=fPBw)O!IO`!>SdR-Ksq%NDo8r_Q0sq2ZmN|mw_Wt#z2
zF@t+St`M`SN;yrjFhW_<?R4fmZD-U#yiy8Sv~24`1JxbSL|38to?AIwg&+`C_w)`~
zWiyW$xMFQnN6)Q={|Qj}k3iNGHI}IrGvuve({A%>NsbeLc+E(_j^n&pj5nGcH(=wD
z(r?8=)5etTcwyr#8Xg{sMS7NvS>?lwunB<yn=CT-!p%;w9;Y!H74qFvw?@xS9J)|h
zxO?JIvvcl+x@D|Vs+@Co%-J)<9HG!MqEZB6?f3avo6KW`G>}rIUW#IN!K9>oy`EYw
z!7CSw(oY&ER!S;btX=M@;OS1;C^O~w$oG|s-2Fa*V?r_8nhL2WckUPv3;_rq<dXNz
zh5OB@i5{6_$<!}Z^YJq_45_;DU<ZK95j{m&MXV{THoCF;rW-B&!kV@I`m>Pmj6BZ%
zv~z!_b<UTPT!lE!Ur}mN#Dm#lMsXbXz?;2pZ@m5Kb3uh(gvo~l`{CGT<6=@vV$^%J
z4#_u9%?xImubf{E5OnX!7Ed@4bDB+)n#4zXkV2AoFi*jT8#N(5KTO(jVZwXJeIpen
zyd(sZ^ElxT;DI299wr_V{jg1kqI)$eGsMx^H3z*&k2>Y7dhxfua|$?=pa*SP>g<r{
zl$9;_5T?BNuVBh}e*5OC^!9m8jFt>s-Rz5;<k;EIu!DMD^#ZR`@3Gb&o{;M(s~t|_
zp?N-3V`@%;*M|yYubmJ+aw3*PL@UHd`qyl;vPpS-VsGpXj*>;RIh_UHP2e-2a#gKG
zQL16qnErkK5u4n=BI~~A!j}6>ZUEpcsS3OWNW?EO?cCbdxxV&^=$U4~6*B~h#^{RQ
z*4krH6!~_QWVp^gao54W6=i-Ft_~Q}^9-$!WU4pQ=0Kzv<YQux!YjGBYh=%Rmk`4`
zfsLujs>2Bn+iT{TOg=ZP?R;7nvJX>Tl;u?cILZuU=fIO0nGVtfeVt8m6iqm>^jVHx
z|6{26XQ1{>0H)wEzp#E3?W+;snH_;)Qe~VK&T)LV(f<}|{u!t(`ARm&(jy45N(a&W
zc+fGEfaXW5!+KqWw@p)COVydE=4LQ4j7#k`99bV58pZ@fMkq6T3M9*}({PN&IEMRJ
zGUCfKCbqJ1%&I1R0r~Ey)p#bVwCmx=59ajb9cLTd{gfycR=3VQ6ElWD*<&aTITq7`
zXij(idw{1@Lf~85EcIFFX0g~Jhr`t`m>BnI0m!X7`jYt}bD0wY+$u>(Bmy+ffZZYU
z7<#`$pHGYIlDfqbPW>aQOlcVqe9YfRIm83xW=6~USymU2wysKcWxr5cx%uA6k>Fz%
zmAO<hRak?Gg{y4v<|QT#=D=VwtD^-Jr6m5tM5b%QcG6B!Q5lWe&;;+1skn|DnqYQE
zDh)1<CdGv6;}UY)Jeb#6C~h(5+M=JOb^xs9&3Z3nPOSQ0Vad;fU@sEInR)=KA3ZoH
zECjof9Qmb{Yu0}oOMV^%duD}#8cV_@p~oo;Jx*CP7c^678l3PybdwT#ea)YTntz5*
zN49{lpkQR?n0ljfWmZun_GZj*zJ$mh@=Y0Hk9(GSGQ{!B@N{gyA`egGk21UjVlfC}
zJ=pB6vZA+wD(&FG-~>Pr%MEB;jOMQx^=9<|%16aI9TzeOmd+ENu<@w3y@^oe3q$WG
z@PUcEc}(2ObrhZA*Y>^Q{VIeaB-SgmWCSdG_XBR}Y7iXNci2m3B*~fMju2*B#J&TD
z?foi1KQ@G;?9FRj0lXISg!#cPGb4~`cLkuu?GlReF8RCFp;$BIL*gOsc9xFs4y-C=
zqftkV!f!gxUQjb>F}-Io;KGC);uPS*&jm_r`$8gvs7L@+KMEZvq{^iK22TFNDD+Ix
z<)GRNknLHZ4-rRB$whOA0gyYABOj1gcE9CQat(JWu_yjL%M*<$85ZyosIowZ*K%?e
ziU{WBoj@pQ6aH@tJCS&(d48*f)}VSolFeO%QNX0Bq0wIrD)?IUwwHQ9nFPB9DyBjN
zw-Og@3V6*t^OxgV$f7*|8RgoEqRSSvFooO~qA&K6dPJ(MN^?iyWir6?Jqj{)?e2lT
z{a^+SCa#;pBl27ZDAg!dKX-N)>xQ~T>s)4^EIvxqDgfA7QY|<^Z%|--0S>PdIod8<
zYcp<R1wk0IiPV8lH<+o^2SH4;=7!An#m|Eu{|+X+s@-?5nwri#pMCPtw%>)N0|EBj
z>T0KB^_r*7csSYTwvWNzbP2zES_G^^7@uYNY#0wYZNa_>FEt)sbOaP%|ALYy>cgqZ
z<x6CMEsrPm$$j&<$yDwjrI|D&ZWH3D%Eo<og?olPrB9x&&r41sD_bqgQi8EH9RhnQ
zH!M!RCKx+5ed<NQ<H)_!uWEsA?A6f)uY!uKc0^Af{>0_cHT`G#?{hHecoll1oY8O;
zsBm2PcLiUpi(S3+??s%S#fI~Uhe#ggq=B;GkU1>F-QWKi9Q)Z(e-{uBX=$B1e8Lya
zWI<>q3wmIU9{56aHpm=o6%!<pj$*9!L^Jc;E2g_em4G#aE=bI^(`O%J<4FU;uDSHr
zaJr=xNh`%N0c{4PVg9%d)|f$=`uIf79U2ppH?SA7UfIp6r*=GQrC}18lUyQ83P~d0
z1&vRX=NC~ru`tmG^r0N8fVTtXP|b5X2UAK<41Y=g1Gw^2`1cP$(ox4jn$#7@>x|Cj
zAE99O7S3(Pvfl!%pJ(8wF!|yE{t<J69SRdVVg8_z_aH(N_b?i%Ms^2g;Ws}Iocugc
z@KaQJJTA++S6T(q2>6+J<eL0F$oB7C3gGS^CI=xz#~n@J?wj2ZHfTlytf9!V`0{Sf
zE4*)+05?O@P^J<vZ{~88Z)_RcD>*g&%5f3tSxgyL!USd$sM5AeHmp!!>B>(RDVEIY
zrpJBTRN7_^*Pv~}_h@nXPzQCCMwN&7NS=tJ&(DI|{1-5-Uz=XE%hEM(ykZl62MLiw
z|C2)G>43;o%`{;A0fk&r+Aitn(gm6H;IGM~|19GCtZ@62MhL}CekBx#2sv42i|4l=
z{Hl046X0U&rp_TGK>iaLwMWZTod3W#GGna_c<dxi1^_zCpOr5Iz)c%zN<^Y(XZ~|T
z)Gr7_?wrg?0fJ*^CH%JVfi?OtTTs4NH&$LZH_e*SJQWd3kRT+Pay!)v7;lr{+Qvt+
zQRuooleAa0VHzN2n4zY%f|YmYP}H($7H%?OH4WF8xZR4R+8tsEGwV0(shC-X3+u2I
z%!Ns4sU|SOYteGh4Ft5k*$};=RaDK5|4!+ablP+~jU$W~TNs8aK{yBk9(UQ)OtU=|
zL@VN^2lCoEz^jMFN|5q-2BgsvAy1+|E>rSKl~JB$xbJ>NHn6?0u{3H$1OEPV2%LWh
zk<MM!2ibf(Wb=`p*+H0#7-_2a(2=IOO6IMxxx_Z{WPU#PTS|6r^}|h-vP;@FDY+-v
zR9aa0Dq&$^2ogzCYE_K<%g-|MYTUuem@rdml_uw=xr?ptuPc}TJ;Xe*d5nWF<=#!d
zA)l7uLQZ><#1Es{xWxx;1HIR9gOHlKRz}e3l&x7B+w3R93>zn|<3fM>UNPva5JG)U
zCeY|Ev~U0{r63p<I)Pkx8)jziAgu2+&ut9>*s3LCLiWsSrJ`F@_7uUAA3+VSz=YSY
z3EL$Xdoso#<Q7|wI}1RjI$6wC-AuLRR4kK8Q%Ja*8(gEXp4fL(x$1Z^jfoU$CGR5j
zSGGDGDck9f_OC>1+k@z2y{`6G$LI0r>AgPNTaJ&-(-53oU;&!tLj=n(2yD}HzS*_I
zkaqx-&_Xdk8HZEjHK96?Wc~{ylRUOmJ-;>`kmhNo)e518z1__pM`6R09^$fU8NbhC
zeF0sw`*bRtn~1<ZqRj!6sxa!Kc@9vHQLbruM_T^~AJZghdxT)-5^6uae~1#oJ(CPd
zXozVg_?Q;9=4Q`$q+^akx-eN{V5mTC$ziehL7LAo5hf5~-XyuGPx;cg7|>>gkUH%?
zs1P2(t#_k<U>b+<0hbmyZ6U$^&Q>==471sVeMmvcsD{-&%U_`5N@G^uY1>FVUg!3o
z7CHYG9t}uI@ng8PBSnYV`aHIMW_oLO#a@GyJ{8`pVQ+C!5D<PIAZeI3nuKLMhbQXn
zcBFqR=5@l1V1Wo3i>}+};bY>5%cxQZu`s5wF3+i|Y1cyDsmdm2*XG@(pj^f#$Zr(8
zhBP-IM0%8|XcfLjyPQ$HwqY5II;@(M6rnIFVH;K`U*=I2z7Of|rR-VA({|@?e_I7r
zBl16D$*r>s#jRHpD+}3N9A7RedW)h{0J@2?Qixe8g6|d}tnfXJTN>T+ps5!w+)R!H
z?eXUX&>jyS<rfWGH_1#i91N=uL}l%6X)6evg@=2E7E()>bHV)j&-+`aLD(S@a|Tmj
zw&)fcVxPv_X`i!$?X~?8$u(PPl56HJv=Pn@!gS*MdLdOydsFjScR$ewjrA#GHxisk
z{s>1MrrS;ib~T6iD3$_Fed9f(i55q~g!cv2DHcKIV-W7q$h^<c1JOQ*(b$~urRU6E
z&>goJID4uD$XU}LxTs$It?w)y(Ce!G%)402wsmeq)pjGKS@x~}4bAc&!k04%HoCLg
z=^X60MAT89D}vh#;0Z}X0n+v06a;5<7~I$Bj+i5&g+)X7Nap8)$f91x4ZIAJG){XK
z^Q8HW2&D~k)UwFh<y`eM0JE+Mcknc&peXo5y5`u!s-fXn*I#Owe-~#K-T?5Kr&nDA
zsKhTt(=AJZt(5FR&$J9(F@unT@Vw)e|Lhu^Ji3UNZYJAvEI0hm>zbcMoSzj)R}>Nt
zwZF>C*V;yXapu%?ZDQu3uoU5eV>izx`eCZIt5|)+ke9rL07B|}|3*mtD<`z_iS+!U
za1YAz^bk;5WyeOiVkT6>1bD2TK+y!wD%jF6{J*bo{u%82Gc9#*g#^lKJ|5bqk!4U8
za%Wf~<0~x&<ob7F;{NLj=bypOKi5)gk7;xV)IjM^bUvCN^9^!QH6DII&HoVXXhGP=
zv0ir#sX3TbMpd4qX6vkc)T5ZK8+!^W3(_dqjDv;LzmPNxY*==clbuq&*eSR~Fy9Es
zPX;zFI!?!3VIS*QVo!R_sI-f1h}ZeVv<P9{dgz(eGj&6PVPfE0#f{B6TOsgu<zL#d
zuWMgtJ*@2ARTY<Z0Q_dC;m^9?FoHwin+_KVMu5_}B);FUjB#Es?$QncQL8g7%-mNw
zs_ZLk&@D<-q|pGy>9!esmf;1|jiiIqNnqpTHgD%>vUE9bwqm1-24fkQT{jZhFrKxV
z4H5`!i3UTa;Nf1_i+Qc_?uu^azd*&-I5A}kfkNTOrZCvEJ4RuK8kAufaL_!#E_0aI
zEm7RUjud_iw-Q$dYq=}&rQ20?e*b^Sn|~LQ_I?O)M-3qSS2VhtB{89spY?kb{*UF&
zzY9tCRZJ;Yaufkb4qJ>Kr(%j*13XS<hu$h`tdzF@(DGag=bypOKbJ-CfG@b`R-c~M
zC(7!A;YrEXWjDA3%*-@;I7q5VqEHD3hYe&((?lJV(#(8WL@5{a4GqC|I42NI-diAs
z2P3Qk(jjWadS6s&w_Ix*s21jqdii<h0rFgLG#0Q@cvpomUrqJMf-;z@#T-1<u-PQb
zB;F);4b-}EtxO_AZve;~cMdR2&yaf1?nZcK2fKt55!yWQ7?^m#vt!pld7S{sotr%X
z$Y#^nT%?5V<G&DE@oWVr6x&95Tj4YIJ|kg1yMK>OA+3SY2ukJW1E@24p3ss2u(l`i
zuu*>c^9Y=O2a)dMAF2J|SurFKJ^h+jM=B40bz$NP=I3(^cd`2|2l6cryd*Of)w&<m
zv)mri8tMa3d0f)`PwWUF9(P3IaqY6evDZkq0iTw)&(*pjrrWF#`W-_mB~VO@np#Wi
zd|X!F`KbQ(FURGo%1k{2pDi<mx?4T7J^e?<KtFy=EKszcNQ_U5f?13}UE4p@?v^Ms
zKxN~un~zzgYjt|)$WIG!)6IA$K+rrxrV(SUSO(=Lk)GqZ1a`+rncy)5Vam<igVfB>
zZk=Ed$GDm;%k}X+{MTg7zXMDE3lpx)B_49}6nQ-U@Q`O2Z#1#-V#t5vv`CnZLigU;
z(0vA{rlzJ;hODbu^P_X_NU3jzzvqs3XO_JATIH7$<=WY~uYG;D{J^G-XZATxysnq(
zIe$UiDayI&FRK>4vCn(;{CCfMasK_qaSp5FdgH&mv*GssPZvLOC$-BbzBE{H?r`gY
zd0ty~U;es@kX+doTt8ti+!4&mWsn5HEX%J2g|QQ{ye@AxNs!x;&y4c#VhDMUaez6r
zMU(GGD}kpC7u_Is)r?oh3Qfu@lAsXUa3I%p5=x-|hJVQ|jOp(j#rL6v+*8Yz&kC87
zHMi(Rt+yr;d2+QO)0a6l;jQCy{sGwdoClt-!#T5h)IWtk>faat1CV*o3XOllAB|5*
zpTTt%AJcwoVlzwopJU&2Cnu3!HT<wQIZ1G4I^@ohSz-4;*>ap3dt@b=Q8$IclgBRE
z>~ZaFhsC-JCmdCRZu4cm@hU;L&-S~9DnWNc_6ghrB<Q{bg6{8;pnGk_JJ_cxLHDus
z@-r$y_q>=HBb74zw+8C4C^E`QEbjkMFpH}HrNKaE{s*q=U#R+NgsR8SqUs)<(ftky
zW-0z%*ZrH=l;-V{mnQ~~9*J2Lc;9<-GR!AtIP!Mdt2AcsJkDqDmYdu?l0>M#4iyVy
zm16i^v)KhULR5gP$i0K7Dn$8FR|8yL>!?5L5qv(lpYKFfmg+E-rYe2+dr05Cho;hZ
zk08aScotjFLfAU1^u)-QW)%Z@$xeH1R*iV<&pJDLFT3y{Z)^MMc{Z*W`Fm_u`T2K9
zw&fXBu+9xw!@HL{LhaANPQgDEZiaV%<<Px=t+TrV37lvK|40zVve?^ZMgBVmetJzV
z^{sP*SrG__L&}E84kHv56?~`V2U*Fx3whF>Jr{x;RdViC8w-t8a&FzLoo7^X?s;3s
zK2^!Nk9|P;9?7|lD!#S(63MyG?5Z_TslxBA=s&Geh5z<2uW|=(7-fkq5A$EJ4^B!C
z{Y!(z{ef%R_f+5=@GZ*lznv9$_d*q+t4=-$yjv?0`^EdjAn>+a(d(+6e|&3R0up#<
zmN*t4uaFwam-uT}%VN}xD53ze<zATr^mM6pwEaA?!JrCUH(?f~Rtx4s6K*?wis!TY
zD-`iKmW#b=Y!G;V&8-;i0ooa-5_nrafXD;)Kvnu3utj5LPFUzi=XRN!St36X)`|cJ
zm7hrBuI^ZspNM%yneMEg$gybi*HwNZ^VA9dQ2B}IzR3Di<tJi3FX@cRPsC@DrIE^*
zr2bb1T6mgxsOQ_h`A`^n=SqGE<}fSpe$G*z^K$+r@YYB5m4>tRl?4k?+2iB&dyv4}
z?p9i51Ays8KUh6r!{3NKPBE8|Q^<zk?`g>V<6g=_izcx3I6v?y4*fl2x7`0Kh_jbB
zoS~q<Mr60njR26lUo=vnzmC(l3o?fEy3TJHV_V)2hSk!E^#0Y{Ny0GNN&xQ=bkvPC
zTL}Q&+vPYBR;j#060OnTiBZ;PRo)>D4-)}=X9M_-j&k<B%0k!XLu2hx>BG%KuD%Sv
zE^}5y75uuEz^|*M`rTmk>pBBU*ECi^L9h1v!z($p<biKfMOO?&ZZ?Hn^PgCYm<STL
zo^C+~5sb!zAn~TDB;KuhnF>)go_fW+F?cKj4h35qHvZ6ik(eBROq4VSo#6t?YIw6*
zjYSzmjh<c)Mr&F__$=?ij^w7sx%>z2SeSCSVh}gVWYf6RD7@WRJ{?C<vzInMkk<pe
zeh=VvSv*rYBV&IhQ4l27-C|_}St)}ugmo&R=%*;Z|8!O;y53;0u(3<F^CIrDZe}8~
z5s7W72OE({2VWns5wTBQV-5vMw8o3bMufeu0lAM5Zu=nj5py?l<UaC?!_o;=7DXes
zgr)E(e#DmWyDVUcrU*N<H^<OsMPGmY#*!$-XG3%O<c*~_&__8z;NO+t2qj60R@{<M
zAl-fZ5x#!g{*5sx<;ZS@9~GB>nTM+1)@p$60)IEC(G@@5l3dhVW65RH7YrkF5ry3C
zyDX4w3wW)sgdpz_3MKOkS8y8<3w@6kfrgueW4c^JW+F04?#G^BKT=G?n><9KZq=dS
zF4Bd@LVhHwLuXaq$syfx*VJa?tgO8J5j1qa!d+`bobGZhJ#(~=d8N;TIri|Jm}r&F
zrNaf}<;jFh)vHHH=G~!Z1o6uV`clOb-hROMvKoPDVT5eSJt{nYGavycS~r#3D&?KW
z7J|;3S+q+j+4fk{B6(E4qYKCvVqo`|KVs}&;2NanhDlqMl<8Kb=%L3j|B1m!jJmc|
zLT*?WU-iUbB;XpF9k0zAjC`XN+d1gprAF+L2kz}$*fPA1q^UtXshfW^S9-sz{0^wK
zY02l{{XUO_Na@{0eWLW{^2_dp%n6n7os{bN_35xjhq5_(IarLmZtkz?EBdbZD6%KP
z*_bTPXs#|i6@b?%GeAOfsDy@9KJ;M(e0Gm|SmjkP`-MBT8@c$Htut+Iz4Kh4Sf5Al
z|A4Z9d!}LMUIep`f?al!PEyh9gzQzq?j&Fq&YSo7G1bG=yf_<p7nf&D877Ghi~HgY
zmG6K~jj9xjm6^1U&G2S3>Gv+OckUf%8OqhR8}@i2^`3(g)j6|LZ$6TGmv+=uHcOlW
zSLFR*QNDM1N?&BpzJ}n7@hVUoZ|NpX8Zc6ctaVPe*rrKdLPj6Lqp54y`|Zp7cl>R;
zE7X!agBY#RE#Qk}#Y%{BBhYt!U1x1uT?qA~Do+tZmAeVzSkib%`aOG9?nv%8l7bwp
zP9q1a_*n-lkB8C6p>$II6~OGlRS#xi_8mXM?9$*jiW7bTv%BOI!7haR5Giz8cpVB|
zjTy5$B|0psvih3gCfk~2ES<cTn;)llSO6NY@;%v?e7k^){)@uKGKD<;kr~hKECh!l
zI7>0>G2$C^8I>8I_EIxn`%SBL$hn~@bMpnI0i&*?gb3=uhojz}BKF>T{d9{K=e6zZ
z`fZWe0Ym=M;4E6=^-eA(bFff$>KOzBJ1oa4=u?ZpV<a~|1UyD$iMAmrFklGFG@0U)
zc=DE8S{8VWD8OUnX3O>Tu-&86=H};QGMN*0#?gMd_z}Avzdm!TYx^FY`PRY|Hj}P7
z$v(LSZrcF5+;D;Ieh9kP*nqS~`sFQ6=0cU$+l7#`92`FgvN!8|+*Qc?j^u_s5M?Ki
zZe4ciAozQ@k@keF?I^!Z56N@7>6dpDS(BV;Zuip!!F9WR<*oML83+OG^`CjCYAsU%
z?WySCmbTg))iL>@t5Q^_YLL2ztIi;5*<3ZdZ3oqat-2o0qna?)wl#cW|LjTKZFFb1
z`6x1NRhJmyYuq=xErWSZ^??|sZ9yumJqP_7GG?*%M^CW!fRZvl$78U3-7?S}P*hv~
zGdyWmwO=dk?sex<<po-3hG%Aaec!xv=Ye$CuYs6hoMWKtlK$`<@sKahV-#WS)7P7u
z+I894ZhINEp?lioRS~N?&g{yqjYQ$GBu=yk!s7$yBun8<p&q{mhG||ZbVf0=Max-;
zWq&#LlrzJ0P*}4JzO5?sZ3VJzQQ6|FftN#|3Uqgpm@$&5+cE%-A!qx|XB|U~>j+OA
zL(a3GSdT2DJ+U6yVQ)TbJ#s4&csgV~vPV)sYdumk2?HVPk!J_e;;E|21tD+A47%+e
z*%8(@^@Ou>bgO)P+=Qxy46naaX=xT~dns|(dL%x7&hxAUfTjLj1hD)f8=<J3j(Lyr
z(U$m$cL`d&HT1ZDd5g>}?)ZtF2>s^&Vv{Csjkew5A5sn4M%A)3BiP(gvA>_N<0UV@
zr=};qen0U!VN|XCi66>;Z<BDY7^JL^5h&e=p8LkxvZENJ`rbRM`VQo4@u!oOcKSaD
z+<hxJOBiEsIYd~9_{eR9eV&}7avSMHZX<zfXWd4spSq1a%FaJdw>fc!GuZks#C<+G
z=log3-4{K%>1OX0zrQuBh3xZPuA?_QLrm8inFs*Kt$bySI+=AprJbyT+-V<F8zIQu
zPVk*w&dm1deIwvE(j0BKWDHr8Py=_ebsw3N=_<_KT!p#+{Nr7QLX4P%n7V##<;Ht0
zb9oCHOQ#i0GFRFN)ymAB=c{S<c}F5bDtpU9m`b_H7kS)tpDCOBWY2CoH9f6Wfr+u8
zynF_e*t34WjZh+%;Pnfx3xIUzQnW9jA7T;&7WO=q^<BXntz{NP5$i6m@fvtU{BGwt
zu<6kBZa^*~x!jNMECo{$ovMq-#KV_)^oa92=2m90YmntgD@Ai|`X@d?UW<H@Z3kVk
z@D-Pn;Nr2O1^JWUWy{~pcpdCYc4Z)s59YM{``h0Ibh>=IIhdJHNAthP{v5nbj^6YE
z#2kCh6URy&bWE+5fVGAiGTPp3e?7^>^*?>mzxruCE?_!+l`7{EedIi1IO{x8a9JH{
z=vs)L2M+e!|H$?t_4X1k<6a0PwG8g2N*wt;A@BGP5ua~U1`n93_KBA?u26rJfTN*?
zd^r!e<&Gi;2?wuyjNtc1ib1A@6ySGI^wt3UzTqkSZds0!;PUmphpfh1@)@Ejcc#ca
zjf;(P?G+q>f-@8tPwpJr??isorKC<8QzwVE#7AsPU=ih2#3YVljKu4-;qDM%d(HCw
zlb1*s0&+x^dCD1K9Otao9>DU|7ninG`K?XZb-_`;XSdJEc8_B>tInSeDrAN3S>zi=
z#lpAI3s|<S6)oBDlX;vm-d;1XDt1i-(rq-aY$c=L*O{DJa!%nq@3?m%)Iwu{0m@JI
zMYNOyDA5goY12cM+fNhwqgrwiUMU(%XAGxrW{ztzU+di<{LCrh!BC|cWn##R9lrT&
zjv7^#PbG`*gn$0}I9?t@4V225O7c)qiJ?Bo&W!BeV{G^BcO2Qj|8{5)a}D_un)vpl
z(e&-rD2u#kcZE%MlguZ(EzV5tBsMGoS|C$?!ftt7%jU6spLp;@aiQEzIq%?WEOyUv
z1mU^~Zub*4I>eNAt#dQY*@{xS*(Es6_7XQHH#CSUIPI4L=kTMLwLGd(l-k@1=QP#k
zNJ_WEAmXumKpL2^2gJMKgxq4IXAYbxdMj8ZNGGGJ54*AO(57fS){nQ3k|ab)hQMx0
z8FIGxwG_n&f#xN;;hOB<C8-%5WYe)>dW!vMll}4|7wvFrM7LnJdL8IKSLE8i74VvS
z$u;JhoTFwc2EnZ~GjTbnOmM81cJskL1^oxv&rf9R@U>}lTar!~J#sg9pSh)}|A|0A
z+F6&gyc3hjdt%?3rZIXweXlx#f6956hun+p<vJi~PqoYBwA3$T5FiG0qp1v3+R!gZ
z07CXn8rY<na&qe1G~50uZRNPOYf`oS^v$&X*5t@uV@hw=@m{(N=S7mnFlS;+B0uw*
z{3anOMJeaG+P#UJ`3e^%Jw}{4R^0EsL`X@Cm)}$RAN@v99#56`IWebwnJ>OvExQN#
zyIt?P4W}Q>pS-k~gPSg%Z}F<ZKia4=rl|M>jrfPJDL%3Vh`_-5o%e{A*}S6ek0O;f
zld&0vg3VRV7zDO=3Ye1^2>tf5Q}WxRHEIyE^-uc_9c()j<1?|Tb-1Z@uI>rPIrOMe
zUnfJX$B@!YuD0O>M%>Z#3Xr1TTRO+cP;#%BX(X?4qD(q#*R)19v?fue9H~?Ol1`qz
zc<PjE$qWDNQy(n57nkL={XoO?oOOHr*8b?$lJleS%cZBrFMdB7zxe!U{Id4akH#<U
zKN`O@{Am30>qEu(atB#O!-e;v2SIMsu_|>9U+uqeT4Y@_02Ey%qUaov6wR?{f2N5B
zXYzc0Qi}AZlbs@;iV;cVI>C$=_CkGs>zPlejK>=okEfb&iYC|lb$D#DX`Qy+A!nl<
z*s3~<lAc#dbtA@=cSt2n1F$XhSiRhEB`+c9FezwEkq~&;jwfH$!8hkA`qRQjlza72
zdGFplfsK;USo)AKbf-<2VFQlm4{(*W0xOnTzop=Dr_8KRjm$#3$)x1FzK`P*g+{l@
zs{wPd%W({VKaTsv`v8a``^0y{AMrj}(av~Pp~zbZX)q_nZn7D#b!FU=YA$CAWDLPn
z9Cs#eQ>g`0(vTJ}xe#Aio<LiIr*yrn3}=%$!Qwg6$*p~Lv{vR=NSc!}mc8?oP@^7j
z3VY#qeILKN^1;2og?acrvH*SrfeoF@H6rMm0oZFsUH_KG_`Siv;O5`+A=h%iEW{C+
zg*?TQuj_{mZNNuFaD*wOW?3;)R@|f!bzj4?dCTP6hLdln9nF_Z9y>|CZBmN(Os_`H
z)K!ZvMRM;x#^^z!O;p?Qs70SwYux_@tATC3-u;eG=&Cf7N+;0lGEk{N;D8*J3b1v5
zs7SJgQUR|>C6ogv9VEz5sesh?D<&r86J>rq8E@q!j?$1{4O7X9;ZhTFP6x+_y}^PB
z?H2n8(ojw#H6giE=+_j@vlB?t5*UXRr5r=)cZLztbfz@@J<=Sss?zMc)$O~dSR{tf
zR1lmZvPyGR3g}rrlrbJ<Y-%okd@hOX6xREO6BQ#+LyszjIHho?CDT1k98MD#4qFu}
z!9``P)A6F`@ZPr0JmGan<&sSrLLhZqn?D+f5<K4L!O193JZM8hxnSV~4`X;YxPz4j
zJB4PgZmofLpFQ-($avgXq0Lym&3%&R7c8#i>O30)`)?Td=`~fKk`(P^sl0HqkwG7k
zyr=4Gb(}O67aAc-SLPQhT_h(BC1=tMXQB;1b*2c!6v0%Wp*YYrN!fpiF*MbsoWOZC
zZ<s9CVT?~1a3>5%WSY>5797zS<8??nCcws;lrlV4Hxx%5*B}`l#)+=OM3PPw1*WlQ
zV%gS;VygK5(ERwYyPgbXAk{h>l3DQJN!R<E37Ka(lnRTY;S+p2t!|a3^G5P>ushEV
z_D4t}w+nR5haa>;YO9aTs|@?U4+*szUx1%jkVcc&&`KVJRhoDOMvMj1$j)XplTF+@
zr)V=Ohj&Imwl3vOchU5j9WP-M#FPo*W;5BDgk*cH4`=^jSCJvO1ZPo}WrBUYD8NSo
zX}P|@Oe=P#85o#vNBfa|yosQ3tS1|LiFv%EVgl<eJAt?BT(Y^H0xAQLDH9$^yfh+I
zHa?U1ms4q9Jz;rX{j)63Cadun=2T1?Rfbb^q_<b&u!%HWf$)tksZ9lK@}?m<qRCLv
zxPWdP&b865RSJ~~niFm6Gup#-7Q7|FaB~Pc+jQ3o-*yBqo=5*ls-~@c0?amoY|DWi
z*gloVf|VEf*<1X5{L`;6N5-d~x)B<oz|qL#cGYcl9DJJ;=_9)0QByHTlw}e}n>k6d
zT#lR1qH-oqlalPmZ^r+?<cuGZ4#s1HO$n5tgp?FXJt4(@l11MU-lxY6i)irm3XW*6
zqdY!z3Cp9U;S@t@TzLc6wp1YFNUl$iAFD!7hWMPR;XSgMp+EPeV3WW>x_yjNK(Zg~
z7hmpUcYPMi-T&n)pF7@tm>i}qX3i{U56&#w{Ypmc7nQe9`#6vhKNLT9Y6ksvj7Tk2
zaZjZ6a`e}zw9yN(OsU21s5q)KoPB19UTXB5ui~&iCEiXz+}2>$3B+-Qd~fl{yMZW4
zP&#?beW=PiF<xH%L5&laqm-rB4XJEoj~}Uj0<u|7$^~>>O6di}nJr#S-i=N}p5DC{
zhWFh!auQvQpgpD?UB!1djeSsg3c<|B95}9HRnH#$@`yXuPwd<=!5My$L+$Njf&~pt
zdYyJEp%<W$57Qu*=j{J>f3@|vlXA)_jVL)!w2D%SsnmDnd-2nINz?_ea6dKu8pSwi
z1lv^G5&3N!{5W2egCo0PM>Z*h5Q6g4c(y#NG)VL^UGGoI$|2cfif%B5Y9`h6^D)xd
ze%C~<!c?zx(!}TgUMxL<kI3heji}8#zR@dE;yT0C7$qem+NM{_8Y6}r?Z$K3p3-RV
z*pSM8LXrhk-BNASQ3RzH+d6s~{)4N@rMeKay!QJn*fpOyC7%!|<xs(FM|3lzUAX2*
z%l3Knbg(PogD(yK$jLP(UiNO;h`76uP;3l~ndo=#d$m;Q5%&SN_>zP3hj-0N6K8Lf
z%fSEydLYYm#^@Pc4V!Uai0#wp(i?PZ#TmKvS-Px<Zlmbd#&*u=xT?Z{<-tU%tV4it
z1ax^V-PUgMOuQfC7ELJuzi?SQn^CTmIn8|CMC_7$5TGn%>US$2h8JeLVapO;H>dh{
z6~4;ovg#^2;XWOqjFr`GIss+`e6n~(LBc`?yf7&bb|3S-nzkFrptpRIND4(Ym#R>-
za;a?Qsw7gRA}|q*aCMSscBt0C64^=MsvOe~S&qjmXX4X{*Z|R9F%IcY7j3fq9^|Dm
znkun54}mZynIcf6LkZOW1i|MFuB?>1U$SN?ELxNFZxlRA;AQX&5`6Xc^npLi4gQ=`
z98QE;sdyxk4$}Jj!B~X20qDisUTh#BW5v~4^<h@#ZxHd+FT?Lhmu#gk<uIiydfTl7
zF}AK5!G~o*PX^{O@e*=_%4B~nLs%;E#uQr*N=!Aplv1UT<rJl4r=b}qYerd-7Fjz)
zjgr(kK^Am29h+a7AJvhc%n;fzg`W?j{LjfLymo8W0b?;k9W()k4gVdIal;C*k^+)(
z2M*gzRnqIzxq%_h^DA~EvNAnjZ*wQ&ByYGaSg2wxkD8<psL09Y8`*6t=F?-x8Wq8L
z!{unDYD-V;jY33hI_EB3HjSvyS6n#<rN5*ihqN@jrf(%AV~SRVtZ7vIt>ZVU)Rimp
z<c5gNiXU>tU2nIqhZLvb_>zdhQ!<nVr?F)v?BW~AoifxTAv5O2@}#cq1$DHb-L2L#
z6@~fe@24=gYIsQ>z#<?n;W>|*F@-vwSXnYxq_|@!zvGNEK|y#jCZ#K-ru{EFg_RGT
zbc)>Vz>F!KsZ{o`@R5?`t|fTMIlPsGxPfu2kMXI4|Dk;U!}$Sgm-XdkyBHR^TZV0@
zF8~Jf_}D^~>(xlcZFrw|KvM&pnK2tGSTm({K<MC*rI}rar%DYA_Il-JyKphOmY+nO
zcK$-3`}TD0uI2i+P?FNVWB;zqpY~e_xQ(dT+o>(ZK|8~{<@E%6;SEZ?%x+-n6fLrH
zNE1LLdPHf>*_5g8o7^D_EuPwooufzg4-C3e`dyQ}CQi{xdY5b8Hxw6Cdp2(#i_$o+
z*ELxFvSB-Ls8Y-=qrAEWF{XVAJL+=RiS(J%elMDsgZmeB&DqxvEgQ~ReswNUL)Ee&
zNq08CnDnx4_1pD>vRh@HC7;omgUMwwCR0gNycr>hxPxEc5($IkZ>C2LZsA=S10n5B
z4J~BtpuM8;b)HrUBpOyx2qYOIQFx@WBX#(XB^#Ji8`9YFdbX`G>CpM(9Gnc6iG)J=
z;~AQwotC^r)>ggNm@93`Pf3y9PD|lnVj{CHy!?%nFe&63Iqz}HN{2uYPsT?T7Fa7k
z(5lLta!@sbFjF;xn6GLCp{Z&F@wKWEL^)~%F-w&)RE;1URgED2p512~L5QvI7pV-n
zZaMKs*2MnYkZYEe{g)w^>Z-y~71H6ZVB|t^2GIa#{N4Dq(T1{{hBi?mSE|TaVT7B!
z)zlif={o%|>+!p0;Wa~$VqD5<!%x18kCZwohtjeRY+}s74{pjFCp32*?QhM7Rtb@8
zXD|{B=<Z=O+L1mp>tN|_IKCsvWT6wm(>VrpkdX1&n?}4^oGrYezzRHo@$7YxN8Gv?
zV9`a{ij=VKG@_WGkNQb?St=EVsg%_d|H!e#x4BinlIhR=v7f|uArE>fs{bU#`9J9=
zF@jt~#pHcQ%c9wePPSdCw3*I`S%$l=K#6i%we7&lIXSoexs0fC{0RvrP-1D?Gl#Vc
zUf{~lThQ!#@JfDDhqA|z9#MziCn<&HCCwNosc|jsZ~cCmHsH$AR=nsuR~JKzQku5B
zOuGcKPMrQeJ2hVXX6Rg@SL2&pRC`F8m<XLKrqid*OL4)Mr*CFSd8>R*H|z12UK%0z
zK<5fzjn7%N0>L~64)Up&sAF-bpLDKpjff4uHu9*t=u6X9#l{qCHVkM@NZ5?assCgL
zFvjx@ODXTY*Snqn(AoB8s4OwDR-sTBEAJT@$~;F21N19EH~ZZJzS@J$gJnm;Q+2Ko
zqOZAxl(JEO=0yC=iKY(Dbl%rz*0HIW(5cQTN=S1p9d=GLlU_0FZBgZbL#dE@IvI+#
z7=}m@sMK_cD<5c@e9M4(USqh9zF8t?D<f~~3yeVQ;)=@{DW^Xd*Ze0*Yw#np2!7CZ
zO}$R#N45`jv{;ijsUo`hnaQ!U{CE!LGRsw$=ANCHCBa*`e~rhcYv#>bl4T%niJ!!~
zs0E3lyr%2;gH0c6yk@6{uVV0v8T_eHAQ934zev8cv~Jx*0#n}KjK5oGwWr*!|FFGU
ztLC-ieP1DnVGI3Tz#TZ$SoOqe-O&0_#b-%w&9ZpO{_01~MYXo9Uy{|Idjj?{UD0e0
zj!QdfZg$DZ6VLF{^{z<v$=&YcW3wv5C6SI`59ZC3`p`YwGgsk3H|j_6wo3%g`xiv`
z=lFj95qnxaSa$A276^vo`Zx5nO2$IyHc>5vsFoK^j^oFV=p=c~l<OS(zJHEP(`)=-
z8Vb_wOZlFH8<zS0cD)FrmQ1z7JesCcI=CLoH9lt}#y1v$8h<5=Y;fU;1KM+v(9gq~
zKXE|w(zwN)!%6K5_Av(sG}}wN{Cuvi!GXZ5Pf<B%I$l_QFDFsg<?*kQ>YIO$o!2)9
z-I|BWZXL<(A_2`Tb<qGEPTZ+DXH1Oj({rK<EvUf}kD5|KBcx3<hNQTPE?Q5PeD|b4
z?FcmI_y(GDTx@mtS9^=dxPw3GXwlmCKiG8lsPN+PC2<cLwMIh02)UG}&0I}yAR2|;
zeA*i1f5YWNyON(t3UoT5^5z><CGCi+q#b9gq)kzgTEdeewM?6g4iY8~kl*pTyYD!t
zm5|3Qbe$%XXNtaBG}QHwjsv4Kd-t&hl~G!t$|$XU)+o*JsZrWfiaixijfB>8gQytJ
z5uuolv#sf}Dx+KCvKSXeu52uH6F`TKC2@8+9Hw=4b%h=3yJ4m^VzrL!BT$v+L|)a8
z4?seclH=3$z25DqHwi5=SB>eg0s?YXtBkfI>|>9VQotwkF#Gbe9}gT1PMdm}nkL}n
z=5M>}B)QMrXcIdL@KLvv{Uv_M^yMY*A0!|pXN)?~<3jp}?G*aOMggejqnTJ{ikDc%
zmDRO#q!{C<&JUyYJT8wK*nL&iq(f%shwM&*(fW`y>)QLGWp?M_@T<`+VTu3$kmA43
zPp=%-^DKBA_O*JW`uK=^y^ZAipe&ES&cy%zN&l(@AaIuyAFHD>O$$KF3MFIKH0_Ai
zCl*E9nIYG6kFL|0*oQSnge9MFm%3XXdM$pyF!Olp$$}r4&$lTj#qe;&UeIlvz$%|q
ztf8TxVr>Ie)38~upG;Mp%^#+<ILbn9a|Zb$3KSq2vqkb{xpd8UTsu+m^GisP`;ty3
z0vmEI{|OO~&gzJpaqWJl6)WN`%PZ?&SFzR04f=zUNAIm@8OP+pYhHoLpx4US1;v>6
zpz!Q$!`?-{Q%E*>!y7dD(5}M>+I9E@>sBq^0QyBVma1LH(|u99j<}z;>j?NB-Kp<q
z+jU$)eGTW=Np@$RGrXDhLn9E!EhF=zfH;nCYkIH`T5|YKpRgC~e_kT{Nzs~@#<Yjd
zZI-QA%wYT#a{A}2Hj-7T>ehZPt5PvC7u?KV)W5sXY?#>K*At^MPjeyq`e;HEwRrO*
zZPvb9sOZ0+ckNgL&Zw3wgWeO<h(;cHL8DP7GiaOi)67@eQ!R~7DAMR*trv$H|4-xQ
z4+}*3K6&G)gU3Fn^nBp91?>X<krqN_6k=%*)kvQ0+tG^pb}X6rIKp)BG~CN^>?Sv^
zZ89*A2;XFj;A)bcyIrt$FET-3*Q`iWk^0)b$Um)GAfRFx_@~tkP?YDy(Cf`q)wjd;
zQ#8eVHPt>{eL!Q(Tmw?vr+qvAL-g&F79hX`L*EWk%hSFc=b!ZL_>Z6Vr+qt~$}^C4
z7Svrh=;&;!)9jXiX?Eg&%EF?{Ni~2k-rVd>odYM%IQ)B7a#6!yD)*X>8$01U;BN(r
zE>+u(zcd(h75Vo?Xg_J&k&0mc)Y-NjI;5RYEE<#MPP)-Cm4jb%Ub$^)!HISaM0bKa
zIAuE9UeBIdnJ6UblzePU_*N@;7_<)L2?c19gYuF`Nov|<eEO@vg3_6zsBK4^PagFV
zvP{FsW)!6-%bI1^yU9`sVWYh!>e$wXEYk$D&bc1x>IIF-<oWGCcs(~8`II#LRU)%i
zi#wQ7+{Y?Wk7}LUFHa`cW(QCIF`D!D2Y$+sW&nRSkR(D??H%Y>o#*HS@JqltgEk2F
zH!Nd`Dr)|HVb-6c?w*#3sfu>N9sNn)jz3VC^#_W@e&?DR%OgE6Z!XM(mGFJE625Po
ziQ4Q|s`_^Ldw#K!e%!ncaeHAPfxJtH8m=@yGv&$m#ctv8q1wkqjx)gXofhk8ILL>4
zrUPoHW#EzTkbrAq0e~Y+*eO*cC_i7M(4Qp>$9)&|h@tw6gV$d)kq%Vh2OR>KSml9v
zlTR4I<+F?+G~?A(*@^xL<RClI7;At*ax`zuyY$2}?Jw7E{R(&XV-Z;1tY=z0@=RM;
znklp>GDoFk4Zl(W_B*3IUnyDH$t%ARmw>v|$m|^uvi`9$Eksu3uXdEGsI-y&frzE@
z-+y`v{aw=D)Qi}wt_S)<s!A=W6GI)N^E`s6RnaL>Pe?6Vw=(k_?yuHy0e@yLJM<pn
z&zdh@H{Z0AVR|93#T?jzDU@;8nI@tqE$-0H0bEE`<pS!5AiZ|sAl^u4^ja$fJt<cG
z`$nP2#rlm%mRmR*_3@Z>P2(F3V6ouP?ZM>i8>QngDYT4E32sG)B24f}%jgW=Y|H4M
z>t-Ren*7G!iIL?_TC$EihFsGUIFr>fS3?KoewAw)Up5@bj8aUfk2liK4-il>Rh4%=
z7g|gM-?OA%fU9eTmeE9xs%5l+s%7+#Y9`b&+I09scH9^TT1L-7tjfPD+WI*$^Q*O9
zcC#&`1tIV#MzHV#24Y`Y_)w~@YTbc2nch*p)8}T}x!%LhmFT0`x*M~UR#5O&FB!-+
z2b3Uq>$I%svM5p2GrAPbZeFv!J91}xcbt!5NRQ-?Jw&}bR9ptQppBuLE9m$fsUk$6
zN3@FV09!PMisXP^9V%i3nnwS@O0Y@luzvB_ScW=}xj554FeJBhuc|Oizzy2T#6xA+
z!Gng%1qM86NLp42wJusgX?`UnH@AzXRK04R=4zL9a26roJsQNz47RFmbbzXDw2BhJ
z#U3q|UpKr$Fa3YyoqJr<b^HG@@}{yNGezZ8aT}EaDk5i=sc<JMnJIFdi82-BAm<I3
zT8arsrh*)&lA>~$h@2TiQBZ+RL^jR?5e5txFt)Mt?|KjI*0j=2caQJyFG05V2K#*8
zhu8JGo-YQbXrjoU0T%AJB=pP==tVT<oT=RC7}Z2PIDQgYkuXI8RQ&EZ6Y`Pssd92=
z-<Md0zuin=Vqvwz?$TM_y7*<C@1StW=KU*COT^0Is|XLA7_{}FT`1>YVKdnA3;(K8
z#>xK#Ogv@*Jin}gjxXDg5xn8xERV{mnndc8H3`SRqb31=9k+N`^oKh<jB;tjBj4m1
zQf3-?k7=|2wu*u1iFyR^eujR47Ram080m_kMoYq5`cKD5E4#}A5hvq`mak5fqp?+p
zAc#LZj9Irs4u)Q=M`4;tf_lEW)1q`*>7_Y=Kt2;PNh!^g>@44EH?II+=%P!Ho_w{S
zG!n%~pGPs$g|;l{m8~Sc-beA5OE7k_ohU}S0>wyARR>07s8+y%I1f}S7~zHbp)~6o
z+*d-u`rNFH`xV}O1O17QtL8jLsU0f^F??+%h~fPQy{Wq~jS>?z=;yjbH1h1*Ldna$
zay!cF`~#koWQ$W$psa3fuN<M&MC{J~Q&T9lsB<-^qVMEJW-sjB+d+SLk%tBh41un(
z+O;Q|EQ4o{HE?j-J~&#i-u>n*?;>TF7>hU_Z#|yJYKrobf3h2@0Usifo*x0^-0z9&
z5lU?_db{)~lv?f-O6>>t$pYx(%CWj)sce$6Y|GrD?4&9fxivT?Z~Kt8O|?yx>Ss;$
zUFAm}*`aI*;~dSVDI2h8f{zoFU0S~|(B>tO*u8$fR`Avmqq`>6^HDche)XTOmNTB|
z`Vt0TOQ@}ekLnbvf8)}4sW3Y*M;=S!(?PKwy1@JabOSD?dO!4YS$UPo|GhqEP*x+o
z>N&jIeTOdY=OXsG+=vWUN%w9NBHuXXq|<0&ztOn$;sW_O@_6tHk}^wrF#_MkPi0B_
z9#+kPY|XcO%7r_ik83%Kkp6>xT)!qcn+w5w&<*B;7Oo0xEiLks)ailg^^2FAG$x~V
zXYh}qOpmW00ybTl>G3Q5V$XXWl<Bcundz}L9xW3!N}ihklRPzJ;Hi<r|K|!d%7Ol`
zQ0@kAslVYPo#9Gfi1oNjko!_YJyTeAPsDm)J++<)jW#WOB3$}!{E=}n=xz(1Z8rv@
zhtZ6Hv2W*64!eeKDdI}G-IL!?4mYSnb>5a3rC5#C5(q|vAtAc#A_&kKT9JkADQH)-
zrRRJ)F)A+KqEmSI*ilRrij%Hl>NPMAQEVbQAb{GADhJce-UdVuJ&LN~@l!zuZZYV}
zja<@#5~Vv$X5`q+CwNha!CT{_30}%|ygBfYpssTv8xp%F*FyUd$}|Yq7*4ubbLAJ>
z!DLWIRw1o;&aX#l&~Yz!b2B1N;yx=hGA|n4l*c83IcB?}HoV0W>l(J9m4+Hv=BVWY
zR}H)Wip;|_gSEdCxN0(28_I}~Doo6$@MV!x&0tiz;z8xQNQG$Hq+lxCqYoY9EvCXf
z-tvcI{8YHd|FCQ8rL^Kla5ZRQQtN7Y0<QM5tzs|rqJOkU?ZNd4>RjU}bF`b<f{wsj
zFgXYfG<_=MgY%{L42Y0!kY=C=X|$>Ae^0j_<AS25L1liaAK{=EC%X2iz4_a+n1H5U
zUgnms^k67QEDu`8Y%iJJ5z55|eodI1cmZc8rGd$jf!OSr&Jj892BctaF+vJH-Qf5h
zL4{_#b7GrTKJ;RJtsC%dOwt0S?(7Qj(w0z)h3+a58{(xcdFG%&LsjM&#7i5b)GlW9
zKZ=*;20#hNj`j(+9`$M^M1Yq9^>rPB5W1fpI|}D6EyQMns03dn%lv7O1?sIr@zT%`
z<>-)6a9onpqAxDb{DboK7I07x9fG~wP>In_NSNkEMz&ak?A;>u$cEP-gl^}-Yv6c<
zJZRA3b{wmR2CejZ?oDXsqK4Wphi0zu<y1{?ux@nfJo7{d3QF4e9QA6dO$vof8+!b_
znF)4ldbn5ff*EJ!ph+W0K(xt$3+%c_M(?S#kD0^JLKUGTtqH9z_qNROg;S-!-P;mJ
z>{za8Bg(9HSom`#oRzQi(j9ryOBW7vU2eUykLloq+g%+f1$4_isakoxJw7P!BZsxl
zf>4O>)4hRc0_n%UUQbpf<j&B&A=Hx$#<N%osVtr~GZFGW7NWclH^ESNicZ+~Xo^uA
z$RCbxQg6p0KE>d{a}1xiAWQlWv=seUlb(XEJ%6ZT&`7hh(qps_!{)r)(8MKQ{qJ~L
zl!gs^d#C5@?Ugyx36L{=&g1PTyMLHCnxxiotXWZOUWq%(tMy@p$|Exf7wPxD$zwZ*
zMwzyrBOW=)3QWb`2|)SO*y?G5RP-<*UwD{#OV8wIeM0efM;d@eCalxF1~3<H!xJ<z
z*oNVzVy5j-%rx#%%=BO0g#&pjGnsWAdru2XRoEI!?!5p^ETiR*y(>!&=at|a1+Yi9
zM<{!Hi(NWMFd3=bi(;mWM!qT<z)^QEQe6DXe`dbMA&X$R;i%G~uOpX7$d8z?xkh0J
z!)PDr^xZFE5rq#>2xqU4RU4v#GqGAyJ`+o9!_B}=&#U)QTrD7DWHO<LR9>cx_((w!
zAKypLK@lI9YrqY{nu?V#-mK=IVI}<FiUZ{Z&|*Ikz+Qk#c2Wnq#cI;R)x^0o?ue{d
zC%$G6{Sve6Wd{C<l(uCFL<(APK8C+xAyOyy#MdD>@}b8)E}ZD&e|gV|GH1H82ap7l
zbpg4pe|b|)#zE2JjE@aS#o92H@eyOSPs`oCz4Y`p4-H%bE2HDdCKL}jWPB8i>kjLx
zbK*w{fERnBdqoIk$2cS6`800uX}0mT35l`d<YKYKlsw(dTzs7ivi1{9GZsv&lRZAY
zQy4R;i=9#cC(oEP=|U(+ee^--Pf^%4gVDXMY&1E55M#r?7curC0<%vy)ZW&^1{WB`
zi}0>&zaSjSwodsAh-d$OJF`~*!oCCu#P%MmLN)+o;CUFam({0#&M~V^2Dn$rLlA+X
z>-tgBH1S!I*v}Q{n^)6YGELBnXED~Mp?;4#+qB!z@8MG5^(@2;a-kOYD3hl95x0;q
zl{Bq2P+2#XG`$b9A#=)c>n&W2TQ`tlLt^tp%16PtQoV*P0wVob)gjW)wBjgALo1dj
z<r`SevSH_<r$V(vx*B(#O;_yLLZ7!7<<(dq7Z%M7#mD=rqPF$k1s&-T?nNWkhoC`0
z$NjPqr8izkwvClHLIbu)#uPU7L`lRwYF!yv!+JUj8jr0xg&<@=BC|flA|{YkqZF)i
zAv7*dTAg?f#3)^>gQ9U>q?-KVtg&B{t?^!9;y1Elp{VJJRBC4=;<n^4kqq4<%TqP+
zpw-=IW0FpIT&}3OpLGQNDq)4|Qx$>LwMj|5dT$&wIrN=J@L7&YcLl5#1awnT)bXA0
z*YOVA=NBYey+|{XKi#k6;==UUUtoy+2?BPn;=V;lM~AZ-jcDo;`CxE2p;oDlcT#&R
zqo}(NmbSLq$`@MF_?-3b;fe%RiR{ws!QI|v7=`kyOT$#XeMOWd+hE&ni#}8mF&!1!
z=A*i_tL7$GbK=|?p`wkLRaim}5dkBzIkGG*0m*T7x*yn|QkzgUV~&~;AHuARsFGA=
z1>kfqnJ9z2X&KhB{#?bQtPiUB=xGv_X+5#?an?sPay>*(WqlY8K!5oN<E_y-G(D&4
zJ>ZfLD)JwFu6w=-jQDgGw7f`FQXPw6?;Y!=xit~m-7)^P<-5PmsgPm8grR0@xa23@
z8<Ilu^Y2}EiA>FUO&C0_Jpd(s?nEc4ol}XQH?_C;24&1JDBm|PyKX{OV16(@X8{e!
zV-kYgCH;ZZm<}@3BmA@6yWvjwwTlY_zyhy9LpBTx6PmzkpITQhB`CM$`WnTxa7CV#
zd~(eA6{$!F?NSPs_~VIj{sHQdY!M9No8}?8n?YOM0W|_>gY1F{Sa0ju1&AN03k!W)
z8O6;lP4TXprz;*r;RhD9zm_-l3!{VGuVn#nFLy%JT~bd(OJ=ZOJ{az+0#gCK%2dXC
zOnL5+`3<i-*}ij%f>s{<-^l5w4`5-%z&ypwrTAFk7U_5e<ZiJ}3*KaiLZ-Q7j9DJR
z*{LHI8P(-&fH@m`eRr?~k~>7}Y=D5UC$gdgAc+t;P3V>iiLv5_8^`M&M11Y7v+$}?
z%ASgxW_%taLBe)Bo2!({A4N`IW}&^*RNP1LhiWcB3F+;hzSvVg98BFw>j4ASFoPM@
z6m?#RQ@?M^*raf)uFJ!Fy6-p+!t_lx8BF9$+2AX`BO9jr1@Y*Vh9%HmRmsIj*TgT;
zIOz@=_MP5R((R3GG$4EE-T*Ek>7%URL5TZc(jo2xeB?(A7`})%23-^n^}-)|QnO@r
z(oOe)gSHOg$v7e@gk&mG=R%s})=qf4Bq8lMlGO8|*(l3+LD{JHA4H73i0wOjTSY^C
z>ayj7m3!S!<6=!+0CeUR_lxi_7r)`{Q(&=Ve$gLtf`bvtFfk6w8jBHrgKo*TK<CPi
zX^|?qnqO$t%m5$tKi;+tpH}<ZYA(v$>2Pjs`u|DSH3}PbK}$pN&Ig+B^5<gr_W678
zTL0(dv8UY1#qgHyW^91y=?-XYKP{AYL~*N^Q{%dB+QnW_v;82@T<w2F9t-Sxl5&Ri
z2d&Kk)ovmC3RZ#P3w*Q+oPtWF`<w?4USEQrSu}FlmnC9Vjj0i4X6V2&hBDJ)DwYoo
zcXQOY{TR9tL}v*8@F_7vJDJPX>rn6q?osfE^l|XVPaf>JDA`!>iic+j=ry0F^1_uX
z-gYHsY42_dZjK9KnV=4@7_8LQ5TZ<JD&S|{98RJ*AxzR(66zJF7&Y^BTTNo(cf?nx
zJ9A;l?deWODIuC_f>hv>3B@S)$@ZJ){CfBdtJ1KNVmJ9H{aYV*f8Xw`Z7d9h$iFR5
z<Xs7_C>%X|HBAJYPD6D!p@dF-Wru{a+m5PAVNREei}3gkE|F`DBXA^=IUXkpO-K+x
zeRF?`GJtwmczGWDYIW%79ypakr_}{V_bjtlU3zxp{*5THKcWxUTmW424}fc41zsYa
z=J3PgcS`m`$M-)MoCd$`je`TkB_2{`2uL0}_U286fXuLbyQ6=bjB$?fN62GO@yh7<
zC>8ZYop}HE#F{;qYT1r?7i@uVmR`|kC^L-s%I^eM#&_(zM0C*`W;Wn6$V>?c%p_|j
zFaQ2C&;ew@XFm8;0Bq8`{Wq6;fs?w*`oU|OCJuE-Yc32rQ5y_AO7KSW=o$E%!F>Us
zI|G<cAH_=}G+09*Fv7ErmUN;h>R5=PPC-uVMTifJT-hj!`lrXDC~C94PexHc?HvOP
zmHi@iOu@4ZuRkJLUp}PFh9c-R2fE+2<h8RT;+I2wJ?)}?nhh3Dp0PCZIf_*R6l=Hc
zrp)~ZpuAv0a6_5Ygc~ykN={pW{9h%hi89+(ThmejpQI}_Wv{@8UuSa|72s?Ps#2Bi
zbL+z>4@3_(Y$6Hdff%JoWKNynzbGD3W>UY0rWlQTxHi2konpH7+$S=re*<mxVmdSu
zgb&9)e<+kg?fg>?wd<oC>gPWeg-5?V?@xns{ggxf^xl4oY-PNU=RkSyNv)YoPZcJT
z9^@$ibp@e;i7gdeDbR?&B%M?9r00)FcVBf-yt|OfHm9EGDL)>_rN*ibStuXI;m#x<
z!+6o2R;BgMbYxt!vfw7!902Y(D+92CK%8U1nY#`)&GdF<U`E_jV8$2!zXxVap{4ZR
z1n;zDm?&zg68lU_XX9ur_t7`TQa4yWk(N>05>m*TX01Y|E}sNTwY*RnL!Hth-{lQ0
z?Gi2p2on|@bKzmLZgfzQESd|Y$e%eEy6i$nR&P(l|54!6&z#VoSVd$IMvg+5$Ar7)
z30`Q$$5phJ-$j!8*}y+RsPeXf>obtC5#|XW)Z5mko1}ya2XEy-07;%XQ^Vl*#9#c1
zqxsns4QS=n382(G&Y?E>iy}Gx`<`a-g7;hYw!)qBv@c%;a;S@gSKKVur7faDDh9M}
z?pOMrQ?F2@24~y7dl6cRq4%su8dOAg`C5?FKBZ$13<$CEg<nquPQ`3g0m2+bVLV~b
zrozNJR?~9FUg^o6EQnQ#-dN31T;-GOy(zaU#Lm)<E-dxwoBsa|3~6d#g4z$u4@0Np
zRJ6q()uex(t&wONnbEfQHVD6XD2_-6)NfwhEdOoQ`Q@TB_5)q5+L;b*3U^TxhOl`1
zQP^{t-P@m}xIa(RNjhz{O|`4iI_&{T)W94?=$!&6?f0)8wSdbFDb{Q{TAOo1Pf1Kr
zL$3^<Q2+-Yj$_b=c$)iT*L4i-TRKTmdpX@?D|&^e8!KUJRqIe{BP#Xn2!AuOEYPxa
zt2@3{4>~#L$})kX!;_G?Sv4{Nzec0dp6ZfJLQsHt@k@w`F`*t&5x$tJScFxR%BkU8
zB3z}RAF<I9Y5*PE2F@3tW7`E)8?#b^-ddj8GEk9UE*9gtQZ>I0(k$FVDwx3K0w)AT
zg0e4M$VhrdQPf`LsSrg?#(YfK8X14lX*T**#(dJu73|nV6BBj{lLn}qx^+`sX4jwX
zGE-(zZ!r=N&gd-N=mc5RFBp9B@}l(}#l^s((z1N@RK@PA;u}p(f&^uz#uNVONU2j$
zDu<a?IxaU<6G%2Kyx+tKA<Bj5t4(c0;KLF;Z@^>~iB1-_3hO<fSDT_<iz1^-h5quz
zb4JE9qYhUHTomnqB6em<oXmzWq$AcWusoS(p$Y~ZCI_K?$URDEp;Y}ptp%n8i!K*a
z(~}({WmWT5UquY<t`!&}vbcpgn1f#wxF*OosZr3l4ecWD?LVf~k)zdRn2O1bZ38*E
z-?zK#`7$%`UPx34S1OujPVxmEb#94Kr4dm!K<r8gZ-Qq^pkcXJBf@3dPg*zR1>$h$
zAa%psY32y$mExXf`Q*?Y3<b2XX(n5e1|;4AZ|J!?q+YudD-=o#tZ$MZSX}|}8sTGQ
z%(EwX2R1sPAPwDARNmR^m8xu-swE7#<sUpL|3(=}J=TMLb&|&g7>At7-`L;FnNFPT
z%!5JNEh&(8mKIYA?GM0-s)WrIrgD`7YBd5_pj}ZY;r|+iWvY2+)E^qSh>P(pA+#-)
zbr0O~!YnzLT<UhrVM7X}?>6C;+|qf-RSm}myzYZp_Xh>*Ok#K`csK&Ki6hD|Jt~R?
zr-au<T*X{|#&-swUtecfZr-VsvZwMpav+jgDQqj~on_U737eYE27a^egVXQnMtyPa
z^ucZSFQmS@<cEFnoQ1X@;KyE1_?Kz*+`x}wHP0R{u~k1(8Fyht{fhU;v{&5!w0QNZ
zh<)Eh&(#>zxS{sSCjZsP^le91d>hGh%V?dyXM!xWWzP*{6zP`j`HXMxP2u|$m{t_@
zr55bolSZu#mIt|aQD@vgsk(P-nJQkTqGRR9x29S6uSc4LsJD5>)@uCt=FFU>X?)PY
zJ&JRk-0An_b^94Lg)W%HC4t!;&6`^u;=RtF<uS$0o09@LppQ2Ru*k#hv6BQI?$biO
zsB`wD=!6dVc|t88|DmH!U(R}0oh#lrn|xZBb5`SKT|Eka?b^w_rd<n(XM>m^gl8#<
zDmHq|veNV<>JfMcS8l^Q=#8*dlp5W_7<|^dPkV0Yry7+tl*S*mzgJ}s!8F#I2qx~;
zr-s{Fk!5@NbsrX^#WrR~{!A}PVIlH$$1QFv3NECBi~HM_ikxBO+19TU>`5EW-?Cim
z>q0;8GGy$2EkDu~Cs~wGZ}%%h6`b-7GBG&Fw_J?XYz7}UdJV6*bJoh&;NL^R9B7<;
z`IqU+aqzoP<_3CMXX3|n<;zm80vkp7F0-j8aLRY-f#N%q?}UK`TnZY&7>VoXjhk|e
zp*<T$$eoekHpfWqVdVw*z-$Lo`yTGoHV;)BQuQZH3l3F=bqp)RI?l@^%CHWJPZ8$3
z#!I^uwwU4O($>wuJ7HXkMW_-jR`Ys<D%rGaVMl*q)P>f*eW2%^XROEJtgBp3j-jn`
zk5(HB<S6*02L*Cen-!u!4)0CXDC{H7&j*El{Cu=|O7KZVUK#?QWL?R<7ktf<bmIrr
zy{B2zY*e_ywX^nqs_yo-+F7NOJ7TyNcG`QU0y3694&vDUIEdrc;~<V>kApb$A18X)
zKTh-jH;Jy&39k23qQ}n%^3R1B)Rm&mh)q&qN2pJ#N6FYehw_dk?TlLGz1>cdg_5yP
z9>c-#C>h(J_GO0OZs8T={(Z^6*Q(#zZ>ZisZE}uq*+Te?O3>mI&~U@}7H5eqyRN&u
z_vUM1x>cP0@hGA?ajt$>_?zS@G1_BW>^}CAa^1<pAY+Gpx8#-dno)e0KFYOTQ2iG@
z$%HL%<$m+!26Z2V9r32K_+`thrE{;v@aKAHL?F+1`Hi{L=$>IVqcfALEJok)eAPO>
zn_p?aU7cO&5Ib|!`Vdo9@0dlZs__!LO5R`eB$Hgo8~A(!{AnpVccHwlb21(7dHX;(
zr0_m-vwk0M{Gj!9j`+4><S3>&rhoWul!(mB6q1AmUwdm{L*KTc#EAL!W~H4t2MRQP
zm+mt)8Wgda=E*&g@%;n+JxqRdE+$CU9l58uwTh{;%8@MO#OKbi)@(Ws^P+~@gi3^l
z=DGLL$7v2KW~=6L_Y=;!IYo7!dB;>II1d}E`;7V5W=j(l`Kqq8y9q6|d*wkHG4vA;
zbk&=oro}yD2ss^vJAIOxfHylwqp0o<QG@}<<(qqwC_`*V=Jd%oKe;o_;&Kt1f{QZl
z!OM2-RBzVm=)B3h*(;4Eez<+$K;Y}fM>z6}vYQ4XEWP%g$mM$0y}jponLINq|4uGO
z_Qr``;ib78oh=0S(`4e!MqZS{n`qPKz+xqiUb>n(AmvEu>TdUiB;9?+)aaYv!>?8c
zx1%0!-9~gfs;Kp8xf51y^)9cw7yrfO-H@Jfa>LS?mif|u=(oZRK)73Q(?)w#cOQ;T
zZ@X=9GO)JbRL(VH*=y6Y>>xR0-h%vp*tgY57eO7Deeoe&OBHCEsEAs#4z6o`%Pypj
z-#f>62XZ+mA_B_gFd8t1T#oh>Y7vU;_(A@1KceIDppMTL`d3Ocdp0N1NjJ;!e)3Md
zz|<JVrQj(M!+ykZh8V}8$XZ>=LjG>5twMU7Es)j~z_ssm|JiF_i5$64c#IrruDV&L
zJr$6lQ#)nhJ_8DS!((|I?gyL4@_1gKbij^jc^)65Dkwe8&K~bXq!CXVq1aj}l<%ZU
zxyZGB1!?7Jm^5lSn7{2I$?VWxi}%z7L9o;dgEqRrg{s{&jgV1Rv$tYP%(#tU)%=F_
zBlh_Oh`2bx%`>=nIAkv-xgI2#dfbIk7QaY@6FZe-`IUc%6K%g&~?$9oAO9OEOF
zr{j+^fljil%$_7%CDJ8qS&0nX<2mX+B}Rt<I8vkpj;sP#_UstQ#^9iAjNkiRKSX1!
zIR!J8t2WGDh9kQ+!zUgXytM-5WcNc(_S~^@`MtV#Y`qVB(?9$j-oeD7SWruZ$m{;>
zaM`(}1#WCdlycFKZqm{3V*iz2K&7opP07lk29IRt>umR~_jW7rY6;e^*L$fnJ=$vz
z#Enjr7%#@o1K-Tur5qrF@v2%w+bn*9SHh^FB-s7!$j$c?;r`D;^G(x=`dZlX`J(y8
z#^YA<j8gPxZDk3|X6ozNOUAKsW0~)rhWrAr5j$&QX@h&<(i_g4$^^~<tua|m#b6Ip
zh>2A6gx&}Uq$ma@Fq_rb5L!P}^ZhqWoKicn56APQs#D$mbeLYg-sN5Ruel$1W|TBt
zfmmsXa#&X6QmE9p<N7w&GO}m9c`UD?eRoT8@c#FgE>Ctfbj>#c<F#F2)(*&IICQId
zr*DCeQp?t=&q>H}*8Ii6V!S*0kCtAtpikqX=m=|1Z^pEjdHx@JGmQ|Kebx;=XB)I)
z*>s#pKr2=-7{US?mQht`@%#gj(9lZMR%;HV#phFO0>N!}#9ppfGYvv=^>C6x9$Bly
zGcv(9)&?6fp4D(vwlQ0p-Xq;|l((K%pfFdz%vL^(OeK`t$xm6ZRuba=1&N&XL%Q0K
z&#Bw$jgla|y@V+Hp=c+kbZZPD8MgWF1;VXl68i0J_{h(o6288YI+AX8C|sHj+0c}h
zH+hpbkPVH`pVv1=fRO14FO#|+A%sl#*L$6+;X=rCLrVSPcvj;#%g>sne635(RNJ%4
zZ+Vq<lp};pcl!L&K#Vxwc<%>B7MBIc#of^Od&YRlHxsg%Ih`@H31*F<-hI0JxV=9H
zq0MrsDwx!;*6vmpb0Jc?V9QSmd8?bGmndtg68Q8>H{sNk3i<aNtm<~bm2K1`aWpIC
z`z&l5QPJI3gw>khJ7I}Eh&nRYq=!_<;Ad7SJgT`=o^wr!efeu_EOUAbZpE|rJ?vUM
z0g!ON_Hy2y=#9uu{ndPBdbCelH%gCIKUs~^qqW*X%uzhv%5!-rJ-WJ3XFal0f3YhY
z1y4WcsU2suI#!0>pBK8dBY1@kF*;@G(DybBggR_4tU^HyI-kTT^AZY&{n5rgdW|Ov
zVu;&gm5Ma*GxxTjAO_Dib!8C4g=Nn^b$Vm)wNv@Yu#w-|0y_t+;6HD|<ZN_7)dB1-
z-@$R%e`(!b0)$Do@7{i@SAcS%n{F+x9Y#sfS;xLkXxj+c(VO+F78PYenskocA!lDl
z$d`7Fdo#a19TKOD=Fjsgv4IR~U(;V1=!}*-=kTE6qp*U_R)27~7kXba&vRbkoY;+)
zqP=J-+WVy?T8cWLjj;7*!}QrahQFQ2`zq7_D%J9C4|D#={Pi6k3h;8T7R?_v)n;(J
z$=$i#JBh)kYsjYUrm3Z}-6RYoaN}IyM8c3RIE7dAQs1eLQpZbOs@$u(nRlzO@Na{B
z@Y8s#cd@s5u%ro?PI|Z#!pVX|WrwQB=c1|bZwBWT`I{f1soW9mQXS3CEr6ZH26SJy
zOy6`Uu6(N_owLHKs&e4F^mOJoUWfKK&a$x~tJS}$nrVQ?duSxj``3EMiHUOEWe_2q
z-aim-vlAkuZ5kSR_`47xjX#^o=AVWv>WNioAjqPAW}p-cXzJg1gkTOrX@2Lu?c0Bd
zRZ;m=<?~f5zJgre0T{9WB}aV8w-&a6IX!*hAWUeNqtSX69di%%wq0)8m!XV{NF=Zc
zBnVcuuWj5`cyt*7NFz6WI`R#GEHWoQ_{Ww;`Z7Z=PP)xZv&O5Za`fxdqt5SI;wl{a
zt{6Q~XO{p_Brz?eIL90H=z;&q&m`?~sbkm>J}~kgv@_fLxooBy!%cHkly9}j%ez(&
z@F6iR`0XTcYa4obXPJJ9gV*8Tvb8#np0?ThM;aRDSY*hD==E|K|4BptuP+%|i(c0?
z=a?n>e|teo5_-dhF8UBNy%sQSwB2^FoS33b<NU-M$KW^o%yRKK$+G9c5zKq!ny?e(
zq<n2xdaH*q@JLJp6Fj|#6BV6FU4e6Yz55o`Ff56duz1IHemxj(4rkI~rj{#*6JH?@
z!)>cCaW%nN6f#7OA|CBq8cAMz*%bGwHeBu1XveaIwG3TV)BP(u`)S4l;x0~U$oO2b
z_|7iwf#8vP;#KP8U5TUe$i*rp>pBf{;&t~4qYB=|pq$pFx}IWr1{Wii2YN;+lBnwa
zGU8Y#$T(fxFYbVh)7%JC39E74F0>+jrN)31;>gAIdUp>qZlQ!}IGUq`>9eLwK}y$a
zn2!>s<9Eg(Sc&~rJCrc3@1l<qrf23HL&4NPds=}P#0n2xfsGwxo+tB)LRj#CwG((d
zc<xnJ=++yx$L0|UqR$8Bxe8B}Uf4+-J5!UtG+>fG)7aii-7Jv?53gI@LgpOkH#t**
z_HbxD&r#p)f%b4y2Jbq}$l>XJZMznxPw<ANo6qD(AtajSH@o$SnMf28m34QseEo<o
zyXcKQH&dh$R@6;97ZMFZi+HXF3m>eDsz_sfa`hzjcv+h$=QG!_4>2tC)H|%(9m=u7
zj+6Y*Q_m^xw}iTPY*14NgE!$p>+bMc<<PCvhm|2zmxF~at(}U6mg@Yr^{%@VKd83J
zcF%6zy{pVtq5SF-0al~Z)9+gl_b%6?KX@PMJpA#^?ZN9md2e;oJ2buOx-`XAjRpr}
zF&oYrw9-~=`}c~7PsSq#A`H8;bEgiP*+P<Ukf<XwA`7%V6~joGt0IcQRP5b-l+V||
zy}LnPt7zG;B?GS=Cvr65ixaA7?HxpG@9xYSXze|Z$|DYDl@8`aw|QpQ&)Nnh`j7UV
z3eaB!iPNvEIl=-QviXBW5HoGq-8CR}r|>e>x*|$X`a#c2sY=-dA16aG)33#d$H9&E
zP$MGQvpVkb1&xjc+SYKvw>W=y`9gePZ+EHDKC2_>#fQ<04_}x-FP;nwe{N{^x0;I&
zG$jQS4cGU2ddKN+No$as5)B>sS>mHUuV$H`9WP+f`G7@lGKU9hvEl4els<1qn=*FE
z!}5OY<yLAyCvoVgZIm=dB~s3pVOfgAs5dB4lrHiI3-qoH))fpi6?IC!_(})vqKB30
z=(bh=aLs3M)$mZtw099RCaa3{V5`sP<mg%#>l#sAo+-VhJX#4Y{~Nz9(cPEtaHq)f
z?J0wICk_zi6H0_RGfLz~S0!ns2$j*-Oqd<=QP6ZfoY`svQ|*y%F_ix7@oaw?SUX0)
z$xGZ59a~bhh-TOa@bRoZq?ZedNLnmPws*$_IK18DwM2rhbk@7UOr)O2;6XFdimYhT
zQ#!nB{pCzztRQ0yspoZt1?0Dt4-16PSYW-%pq@)k@tVW7BqnmC1ErNj%?n6v3qgxu
zS@f4^{!i6Dth<j_o2OYB%YO)h^A1bUrjJnk-N~it7iCGRIq`c6q7a6pEN@C&Ud<-+
z^KzE58AyK~rurg__g!0%i#}CaylpiM%i!mW5;=T;g4emG6<Tg*((qnsV^=_)wzkD{
zl1`0!f|@N~HINI4aVLocqNWM9xuL-}CdAq39UMpMxF|v%j5s9Ug5U#W3tb>vWPT18
z2@P>b3XsP;2GrgHWoR_gi}NGr)GGDjJt$ckiNhQp668oDakx##IZY`qTejiyrnKY`
zd&sCpYBWvYyhte=|I$$2OF95W@&*SEqDGg&=KTRk;q~(!!lz#29AbGZRc+ayuWGw%
zE-G1sPDdNi2X^109?ktD>dk6hf>#Dd!uG@Vw~bS<-D>Gti7?<j=m!_H_!8gA+|(qG
zjB3pbF34FLZ<4*~X_@)n=F)~evM&nirxDyQuggh8Fp}RdS}*lLwC)H`D@5zwyu<w*
z=RmOScd!h8N`*Z}CpYOsm`{q|L!j7*_p3#%i8f2CTCM38fmb5_Y~^~p=#d28?e<8c
z*iKcAh!Z56qzsH&vs>Qu)Hi96!a5Zza8hf=&=%^`?Yy=eV_V?XC%k|#{?qWc_tqh$
z;`3)uEKuF<Ir|;<d!}auEvYKJHs){AeP2jn#?gk0I@Qjd)ovbw)=ow`l@q!rR<z`R
zCg*R~(tlO8$GIVH_F33K5UfrE<JIaD8@T6>B5pPtakD2pZWPR7@Q*7xbRcTFq3w)p
z9Lsj(Vz}N_3=}nu$7@2=v~LiFo`BC#MoqgZqo$WZ)O15*Lk*du;ZE!qS4mng7oe!=
zbPq05&s{Uj=#l}<i@Ip}(KTAL5^Y)n-YbQ92BN0(-CX^En>{DaFuxV6-Ta_C!C7Ri
zz^N6cf?l223yYp6>0{7WFEgjRcCK{w8q4!(XkMBHP9Q^F-=vd@abn2U+Ig)@2`zL|
z?u^lSU%_g>)eqg#e)i|k%Qm1bI3<Q1Q!7GI)Ay#c-|={tDcdNCaU7?nC!XUKeCu?s
zcP*$}ej#1uETVY!N44BdOBhxl5qyeO9LZ$M<V-ofFtLJa+`Va+MWcu*B7wt#i-E&u
zltv6v<WIrvXxo=^9^H;A>cBS;9I6Cv_Bp*XTbm$idSS-)Ir*lO|Ilx_8E|%TfU{GV
zjS8JcDmw1?2|K}iiR(95@ys_i$JIS7U%JNYpZ2ZEM#Z*r@flR<ibR{?NZE;GByI&&
zmNncIpL57rplx}Wz!Ng6IIG<5VXvS0rr3<=XH&lZ#sb;P{fNY$P|-G8IMTgOXk49R
zM8d>`^E;&!{*bo^j7#9-MXfCP!yG=HL68&_d2l&RQIUw6JuFJ{hHF3f!gH>DL?O8#
zd`uz9i;hXMn4(`>CQLcJS0aZu{|E;VSe|de{Xl?-=QO2QT`y{k%MD)3pQAPuyn^oB
zOHqbOr^5DVIii$6)rl1drta+srj1TpNc=vUJEQrnUbGJ#K>N^v^H<S6^pdK;E;PP3
zB5%%b>Czv>?KBW(wAhHt4ekh=XYE>LRialv0&&k9Dr(aIqYp>DogwkI-0wgNzkJ9y
zsO7C*SDog)?r#Hw#81f}`3?e|7tR5&2m^sde(!e;!7KVsEQT4Y&>Utj*Y?J2_{7<R
zmC!w_xwau@n%a^FKNp()VW$S{>jr0@7zx@m5|XCR+TdXFIeMY<!M;e9qk+-Q8I)!D
z-_pw}SKOEL)fa^qep^dS!+`S5&m`kD>$3rwz2x3F8elT}b@j7F27`w6i@IF}oYMEi
z*%yrA{@;w|o2+k=K?CV(NAnG#{CiW)ASCx+rE){{?QD+<dZ|*ZxZ^|y&bz)nE)2Gr
zWqsQZ=QH*jGN+p%WOh>xnM)KSWOh#<UeP3k+3YAmaCMz}`+v~H88^|wsQE-K_Y^w1
z2(I6oLXi9pN&00a<vGmK2A$wdf$6H95dT*cUd_l>{nPaE-+VJ5d4;SlK`WLeTCqw$
zHbg7dAQ&wkoZS^QEme;*ee4M-nn38gN|nDGFU1^Hjp#GS&;)zf-~*`<FY1^}sF-}{
zl%XTZ+#<565#?#K(^9<^0oq&X7U*GEqA)B@x+Jh7S}^}DIKn5l*qFnC-sUP3#TPT{
zMi7X^uYnPKZK|mLX&>B5xRKXTmGm<NBC%J3NNnQ%D&_ho)j3D2Lzj8Oeec%c;Au3y
zvzafUe*ZMaOUmfO*|{^a^)Dfzq!yjqYd;@A2Av>Ot*r<v4f)bNlO)@nC(Zow+Uh#n
zxqVkl1hoore)Mq5T~QWIyP~XX_Jj8`LQXZpmF-@QPVbzS&4nFc^7f-H@8^&!<kbj7
zvc#(zvnPiGS$LFK7Ll#Y51vV*=A!)IXYYI1wbTGJ;bP*i%Ej}RzpT<+3RSr1kqagx
zm8F~mc)7aSj^_6Xzod8nE~)&D^PB@qaHdGlx&k|gilJk3VRCl+LM;Jgjdx-KCEb6a
za{ZIk48JCnC0w;D;kLG*<%Ik`A^fM|UPw{@07OK_$Iq#S6gL#aWI$eSPIv4$b0_&p
zm6{?4t9Q(}X{)z+HtShM8t}^vz4WUfu4RiH7W3gezaUnoJ@H7PJ~O3I%bND<QW^eC
z|3Lw`y5cdkkGNyrZ-u@Ujdjo<!oN`DmCpMD`I*wuWSj20#sPK~8IJ+lTH)7_avky)
zDAzyf%*z9z=RfmZAp&T7QO@z0WsOMx27vYp=-)q$@{&UO*x6sGvTyYZA|N0MZ2VqC
zh4w|Vv3;_Q49UeKEwg~29n)fYZwQFlpl{RD52qk~`Iwg@2I<Nmgu5~B$N4_|L9DNy
zU1HS5!-a0CV<cey1eqG%NBmGeKud1=>?yDtTs!Mj?hL!srdU`)ypfJ<YV@5xe9Q*{
zN&E<-?GZor5f(|$=>-%IsMRZl>Q-X~#pD`$(H&-;^U)k|C?Z{mQgt76A-#QVWV1pB
zQxBNev>VBRMP9VMJ~^OFveRBq!^jf9_NMJKhx<9!0#+=~id}2qN}AENkJ6Bay~n@C
z3?6>CWP*RPv2!}7Jw<`EULi?&%%TQkZE{<gdSFhI?fy8b4|-^~TUSrW>{-MK8M|64
zHzDfWl_(xi6v6J5tTQK|UVb?fO&^&l_n;cqbpigA4ISdG$(Ym6m!V%;y{?ai&D;*y
z%mE_Bl<hZM0plk1^J78z-bv+F9!}IZkqgF;-!my3SrWXVLH=N6JZ{H>QIfvL(K>Bk
zqcrsJm@80%Y`}dMiS})14R_VKUyk-|eM-;wDPwW#jLyQw&DH~KrS>ecDk(!^^lZQJ
zKTXG8qqvyQEiKJZ*Ou?TD!5V5G@hW8wXYIy?;nmTcw<k&Rf@tXNPDm-t?-_Ffm)%Y
z>65EX_d+7&Md+(dLsZ>;HJz_PxnnEz?x|GNW7Z+F!ci10dM@VDf>cawl9ghY-q}Qt
z2|AoFQiBVq#qTRbUp*8dhm^DX)Zuaq7ggKJ9HH7aS*qe978l87*<?lUeQgd$kM_}7
zrFF|pEGm;RBwdDQK2n2SdUF$6Z=2SChs3G1YIsPv^4*u{FE3u|`|aw;exvTL`XxPf
zTkc1XyrZ5Di|$bNsx9|xN?fjAYZR5?*^XeK4O++o#Cq_wkO2-|tmd2KG;<EW7t#rC
z>rF1JiPidggI=kFc^F*q`T6g_H|#rA!R{)d@1n)N4ZV2Vh5_{A%g|!az5crKR#2xo
za+lOQHH`W8mn}17-asW8bmHr@ytWzl5O%phB^d-NNmc+n(2NG>6<-^ZJB5f!!lwjX
znD=!7u?MAjMxr!Nv6LgBRw#@i%~OWbJlEKQ*6rPx<wp^oxq!->xAyCx+g4}oiEq(W
z(*vtM)0eF26pgugt@#HJjl_bmqr4<f@AUj|jYM4d-}-f+jS4RDgkSJ`Q^}j<bViGJ
zfA<x2dO)shoRzL-Yqp%*J-!IOh1mi3gp3_egWY|z&2x@qp~AS4ZZ%KiHuZUdkeTUP
zN1~N(u4e<7WY`a5RzYxPmJT2h8?F_B^`yZ^gSkk7*5rduVkVBr!aP8x@5F8?SuuCm
zzQ=KC3@>SqH}?Ihhrwq&9DaCxG7D=~zlatrHv&05c)!Bv3n%z952J-IMB^N)6j)e5
zmLWUQ6QV3Rw?SmS69jtnY(jUbN3hLCpl7@2we6N6GD(Q%l%RZshULJQv>c>4w-Gz~
z`$&W{ksuN6?<7v1)Q!}234xzG8!<Ua*1@NWEcXQ8$^|#KWzP<fq{Aa77$!@)Ye2!4
zes|9u;xKasLXuQp7~#ep!~;It1>V7j$dSzxN#W3?NPP`i#oOWH)Dx~h4i`x(LX&u7
z*MxLyzMO;l_nNU8+e(2>@-Bk|(5tQVx!$Y5rJZb=Fg_)JJ6O$|a#z=j|9mC8)M(3I
z=VG`QGSCNhAut)8K-zUKLp62!{f@C$zzSW+^ajVpDFL_VQ)m$%&hG$*cGe}C1Q^<!
z-L8nCo%OV|d@d&ldIrwyP61!s))W^j6f5~M?&_yU+fk0J9Fx@{OTo&KbEk9-b2=yD
z9kii}f^%{?%VZroJgm?@47=UWVtEC|gzbk4!qWr${;L9y|1qESvjbI?Jklj@MX61K
z8v?hyu{)fWKd;)(>#PVZm&0WB%fwVCyY#ra%OCuPjr-Z*kN2KJ>ckJvIi+A!YF5vp
zLUDXD)g+pip!q*GTR$Zb(^A{~C>m>4TU>5|ZqIBRB9q%Xm*@GaG2?%1wth-<rx)u2
zK6^E6AOs=E_Y(3I9`1SVdhor_5I(z~a@gA-n`59TvyyK$K3lX>0v1G4C$W=xC<o!N
zCnk&rnR*@lc#<LC4)!vyFp4j*N!Xs9M};<LGc)3`OubV1U4Ha`o!XsbOsUbR+gFZo
z&Dx;l$Hwi%A+-EF_%sOSnYFMlE-CC>IwQw$zN=ROlz?wm8RkR9magt&0UEV&Ei8IQ
zw#bjN6(VEHqYF{o<Jr6&n<X)-uGZ~K7b<!cLzM|8S9b`mkTC9=T_zV`wXf_%cXa2`
zHRxqCEYKEQldhS_(~~ECe{KeCLHS3yl1zan=^@d^*n#c+I(bcP5-3`%p~`ToVq)PJ
z39PJb*`Xi{?5~i|cBD&WWEtPbIFg8+@@SKJ7>FEW5n%%()t!S>_$5fTsxw=MmiKAe
zr~HD3-`bxuw$uUh*|kf@lHg{z4Q7(jf9SU?`{0ZjiEv)3B&ejPVf+k8U(pPi+Y0|h
zozQ<bdw$aG@Es@W{%POh&dYCx@mD6nwIo5CVTt%-QzU5h?+eX{;X5R{I6%Cku#c;Z
zcl?q1?Mk%(WS55IM+foC{m7OxFwNX;b9>b^E;iQH-Ih3qH;63VEzEKlS2)NYFQCe9
zyK+c5LV>p*rIVQ@6C-$b8_$G^uKf+qx%Ls5<O}X&Fv&IzO_M++m?R+x3ew=>%R3}s
zlk7ROnb6!*Js<7|A<Ar23e?HGOmNJd0>>QmBBbZvAt)`|7O*|?LEsXoG*Q{vqb;`)
z=LF7Lay$s$I_{d6wVvb78HyXxg|tV*-_oLA!j}E@TOIvZw7`LIxwrH^ge4IpEXgcD
zXK!8v@;2mX{QmDUP}1YqFk|@y!0c62oaqdoIIU?vWa=27I;*LwzU_umoqkeWhMih~
z-P;-Yef&hP1b?uigB-nzK!eFgX9MNtG>M8fTRqR7u*>UpQt19~F36zaV%+ETM1>_v
zdgM4_`9{gkh@AanFB)Lfd6xDnKgav7;M6V}Vg|36aNdB0`#%TGH=ET=k%V1><{M$y
z``$+V0+9H35?Nfk*a+?6TB3+LQTDMtosnFkH3r+v(E8hN<8oA8b7%M>a`vToaGgvb
za&}EUU!KK<*-TqFv@1os_!ycvwFdHY;y0<gEkQ?@;e+RLTIK^v<j=NQKTk%^4bHF{
zywqo{Rt^zee!SQmXSH9nX+Q7zzuWVT1_POE=*Y9K6fjrZT-_*r#T>(!@!(1MSx~o&
zDEnAf1XgU|0!O6|x6#1+5zlunT_T7WyG^#U5;*3K6OQscEofF2OnHxaWqKfbScQCK
zY`(~|FG6G3bUx_SruhVau%i)nk`X_foYe6RVw0TBfd-%{%RlP2p|{UOy&ne=n<Ty*
zu}Sdr|74%_vr4lo7$I}ND|=Yk%6TOfv&zxzH@(`=j`5PF?^b7b%`Ns{A)zD@o!b+S
z-$RzgepI227z-Nyv2;B~(zH$Fc<w;VsmR6Pu!eFGAE(ile}?CjW)Ts3ZQ+fTcEgJ^
z;mR(HMW=U0evVI>YM4rA)SkOcY@{qs$tJ0bCAPXSNn$x+9#uiZ>9BRjK1e0HYi=h^
zfA+qIUCU9*;h`^$U%*ZMGU{@0v~Q3`#|B*c{$S>GzEsEe&gGSY-*i*Ij6D7>T-Bbm
z-AK>cfp!ip`b$v3`?UHZkTn*}VGAJd@z3^IKPxXofiXw_Qjz&SxUB<IU58)Fw*BmI
zFXaDT1tMbh#^=;O{4=~Y?KQuubFG@!OK$LZ(ko4nIj$<v>_vX;E!1aK=tvM0Ye~C6
zT-yS%kAuqCM~OxMBUO6klqwDFpf*uCVA{Sv3{EaRV(NxYrLS9^nxHEOe>W6%CrwU2
zr^DkBM|=DL@>v5%`_J-OKRb|5AHo29Jc2mdb$AChIW!ZwwSlAk%WmywM|nxxchBly
zsH}rISb!KMu<?5l75e4v*x47gI&LlC+iPzLbgyL7M=qbrF(m#DH}%hDYLLx!TX49g
zAe*=Dj=P`q;bM2qSo5rTuvK87YPYxBJN9tj%>Xj-Bbs)(ZqJ4aBt6@B6DUxhR|?ey
zSUCZAfbKly!v<F((uMHR7eN=os1#xSd~(PbvA8GgJ5#oAeQIyB3n-Ixt67#rv2hh6
zby@)2&vQ|4PEADP3D~1*RzjPb74=8XH)KxhZO?wr!c58xMv0jMX}w&`)G>=wfp>%S
z05GIUn!c3Q0U+d7SnPl#zCzJ2p~s12IJxC%q2eY%!71HGUHS2|eF9qoGs^hamJvVw
zRTK4#tJlf%U^7<>n>lC(@TRd^Bhk1GoO$5eZir&-N2Y4&aIe8RoKJ`6;MudXo6)-T
zJ2$#9_T}117VU19=;4|>pzL^-{;e48+k(-)&GJGx+PBri8s4k91UpfnS-vMTC+r?w
z>7gDE-5aVjRd+u8^K@+aMjR_R*j~q6+O%`{JMXMsL#0IigtMBUAorW!_p{n_PUfKQ
zBh1>gGwa;2Sn8+zs+nk`5>J`=0W9^)(R`gkOiqC1Pa8F6IB0GH^YdJjP<f;6Dq~7N
z37s!EyNLlS<-Cixc*qr%UpjTT%<DuIwosDNzCP{h1m904amFfYFIq5|nA8ocly)uO
z30|lqM|jQkGw6_p^;X%`mzDMo5~oh7Pe9OBhl+8xG+;+Nn5OT#U%!^}JjZmwx8G#)
zLisf44(1JPu4qE>szdjfB)&%bQZo3lYasyY2h8ZMqd%FpuVMjIvH%?1w3E@CQk9X9
zp6y_U#l8~-K7u;U0GRg4_#D%tTZ8Q+OaPTMkq51=%*>|Uf?ckZdA5lMppqVVpl>Ri
zS32-sC-x}Qoe2Y9m|Wkjs75etF@kANh?qiRqyh^tZ83srpNIghn|a=eh3Br^5Da(G
z_o`OtwzWF-(d+1{sex5Lx5L9YR~-dDE}-Zt10PLl7}hwRt`U7)oBelwjaQ)=eGEl`
zAa@kj5>Cp=hgP2*I_W*oc}7JUk3OAMz0UA0njM~yv31j6cYm4ioFm!Dh*n3HMzu$w
z&eA@}d@MSJNej+NrUR>_l@;Y10j!eXMkHdxwK5z&>iETdccefY52E70glIGI#x%LZ
zV&f_iR<WctzM8as4%?)SQSsT-!(dWt<Gr1>j7s%Qc24C;-9b+0yrOVFoeO{FHst;q
z)?Kp#X2-z2@4>HTT;ri)!Z;2h^T~m1(4z&{gDHdf^8%duRolWQaY<~F=Wz9upgcQ`
z3J{Sysjf(KE>wV~ixGY%fkc2;D2maAHe&0mL-6w}Zn;d8@WI7?!L5VeyIXORf~#vm
zk{+Mz&%tfG9uEq(Xx6#1^rVY!!(5EWeneE<*~AB;_EmTX>yQhZ%@e_)OKvgq6Sx(N
zVJB)klJ+5Le852fJhS4&&~;<P33+kX^zSubXWtCUNU``hn+Y7O%Je0S!wBi&y=V&Q
zk&_s*bcz#_%uoE&mF!^m%MY*k!@XdKKClh!Wyls+Vzm{jsWUE=^`PiSAF?v~ahO$u
zq95CEWwOsxH*b;%MZ0PkO@()?Y2bl=N?Kmy3M~=*l}*dR3wLdKL5QVQ&u?HrepRRr
zL!FLeal_kT+wF2P_#_y4MgK-Evobg*hf#6K$kBfQ!5794&SiUJ1-IVx=2^4rZvW|O
z_3_?Ec#-J9N4muKGWAh#K+t^7SMgGDCb4mKt9WRgYQ^eoRhW}wt+kTAUi~}hB}<Sx
zvEVtUlqi*>F&{>)fa16XPMvLN{MUgsp3-_>2;q@xQN+UQoj9_0Yrbl+(H4R{msPN&
zujVz6?7tpX@|1r5t5_6LfDI6}pVR(R+>oXTMC}H`Oy>G6Oh<WWsPvl0D!*uGw@<@R
z6y@Z%TnUq<-}Dc--v=GKfp6<N7>D*U*#R5q7;1HcHkYu<(O!>=a3N<n8vk`?Q?NuJ
zDV1D6U=jjaeh9^3w@qzF_iL<WjT^F-%G#*1wuvq^Oqlp0NqjLkzTcgTXHHa`RRoAL
zF!-K{^JcPviPLH}JMj<o^}@vpx_pS9J`%|#gbo-UBPt+2A{lF<D2bAZgZ9X21o7Mx
z$99#nZMYVZ3|!>qwy%O0prX#OTH;@eCQ1B)dy*W{JJE}k_hyo%eJOtFBgH*OH|gCZ
zbn?;%K6VVhGr^(CIqxfaY)5*|>&Y7Q($Xhy7#vE~?cyrrTt~gj!aD1&X&R^G-Fosf
zxK-^A55S)>9NF?WfKG<*tgXjb$PQvB4>o#x@RA$j2hOMXCXbZ!XgA}zMNyoh^M2B5
zzf`%HtiT_$nw38Kt-)IA_(JO5@r|v;4_!<{KE-!%Jj`rH0^+Uuac?0xr=-0Z>YskE
z`NYLv{s*hyPiXRyUzwHqe;%NrR*s@Cc_HJU_qOkaR@HxMwE>kx0=x#Xu}+v1P%gPl
zMtTyNl(=S?rI4{Q4KSkHjSg7xy?kaQuih+8`f8Y5Me3&z_+`5Kmh2E`62+PMC6W$q
zRHKx$Ud|&%-JhU^%88AB6#}!&(2AZk++?aX+X7?T5z^L}?BO?dK(l<SIF~EVZOqds
z<3?ttIO;YtF$UDHhgxs@6{jeImq(H!Z>`i_TCYLlvj;5z-SQygNBw=9XC;mviuiJ<
zo~6X85GBWr85mdYFtZa+Av;O<03VlLVb?GSBuz%6sz0X?B>6DKw)Vj>QT{0>E<BB1
z(FY!uawl>kSqA;G8!8md`J5&KiTR}G-5cD`m8F+lVAn-KIxi)){NI+mm7F4~Y`Bpu
zA0)4*PB>7P$y!J<XWkvA^o<i-DUF*Nh5iygBOBuJ_a_FOsCaRIyu3fZQ_?A{7)YW=
z^i8z0ZH3vPWI_k$Vd%Hifr)OBbH0hF&`-h`U`LIx9W&D>el%ET95ohWHG36xjrw*l
zxb(OM=G}~Vs7&<y+r9LC(;3OcO}DG`<fFUf7r8_9d#a;F75tn=gNW+}YZYU9ihxge
zqsLi5DIreSH_Es~qLYAAA@I&rjMY*j5WZb-b|8f+V&s>ah1YrNnozpm@-x?wz%$1N
zb~&~QX7y4eH>q2u$S6A4?A1Vuc?n1UuU0{_fn%Tqk`=mXuEM2DT(MZl0X=J-!aA}V
zQbB`JD(LV14p3wk3!Xrc88r-9u|%|8a#^PXF+q19b!dmNxBRnOHbx<83?&Lfqp$E(
zc_QQMH%Ow)8aCE*={Qav*()q#$ufzeL}A)dMW3I%Zz7x|4A=iMx#R<-jC<|f$Sqy(
z*;$jR$itu9^&XyZwi7ru;J|_G(IuANmFkiXl}9W94<b}dP!DO<ycJj3Yabb4!8#Ez
z@;2crs+5FlnjdbZcJ5z5Pw<Cka>y$!mQQTro=q}^q3v9h1CUIwt(Xw3qMS=w5K$ZD
zuDOadn~&U5sKeMVc=U%lI%b-Do&ojJkJ1A}WgZjmtdc%#%($^)+&HuT9Q9RRIGr#u
zaX+&&M6iJ>q;S)0AL!dY$QJyd21|8gU5dh5kQblkp%60(S8sO08i!pUA9h2Z7RHSd
zmSw78Ca>WNe$b*L0#UT;6ZM+vNA(&Ouk3Ssy7RJ7{dAD5?J`#Y8~{Gwb_9{%q6coP
z0R)jhn!=Gev52Kc<|8eQ%Psu%kzLX39?oiV3rT!Gb3Gl_()A7JNGjCf!ZunIStuP1
zk9sXlCW8Hgd57?!an=p8hv)}m(GLTg$oNgm-`U3F8Y(W4#g{fp&uB#UPaF*`?}@ci
ztVtlW_vmISFy5b(j3XtWb%uj$zB7Q}e+!=TXH;d}ICi1;H`Rx$he7aNXWu+mWrXVA
z&^CdvK%g2o&wj$>)wiH&x9&bx5HF}EE%aNx+jLnwJ<YmOPIMgQG)gZ^4T$3gD@Wd}
zBM$VUFG&FvY(fQ2oTn$r%P%z$UDK0Z8*rxM83VVZ@5!QxlhK(X;XcJ}lJxcfwl93R
zwP0WtW^9(DP^L$MTgr@BTeLRNk(iqc=BSvM^P-Gi=@va3?n5_8e2)r&CJHDl!mXEZ
z9LH`cpq>Ou0OaI`$b&No+4OsHdF&WRh;7ZAifz@aMX{{{00Tj6D?CiV4~<vGw*IWp
z^fdox&K`G7p8&^v(9pJ{BUB&ltplbSm5#c*SpoJ^m2&0}Q|qS3_z(Gcn$*dgjmKen
zN5ASNGXDBR7FFz>sjoNwzTV#PHn&_yy~#?o&%UEe>*%Q-lh-xZlLWg+bN0UK=8NzF
zx9kthFkR)S|Cd*lICe!frU8lCK)Bn-i|!xzLpY)rD{)z60r=(c6X2KecQ~V}qbcj*
zVk&7=9d#;yFuTzy;-*uo1@HT4>+ND5M!_4HrV&=@Og1R#omsTjPgGkp5xZ7c%A}lU
zNhquoU0J_<XnmTQC^SkKN{>1cUSWPp;f4{h3L1ScIuZ;N?{vM35q&Nc8KhU*-28&%
zpG)eRomH1yKdU!$IXB=POxU<0i9WfyU6J7zS=Yj`4lgkf4OK}m)#^pO8F*9V?mugv
z{_@iwL(2!$vV-NLA-h;wBFBT@#Ju*F@zO^HHEJeCN=%PRW{1*th~*{<he?q%O`7Rd
zFT7I{ETHh-ASx&q^c0XQsn~`3L>y(bpxur<jj)5w`_6A&pTgmAcSyxnMEqFzs%50{
z^V{LScw{<J_DN%UoSaNmFmDH6%U+^LitH_GZcMLGq>FjQHJGTvl|+!}`u#!HwW=$<
z^i`Bf#e`KQ6F)kGIy$4Vh$ty?<PsGUx?R+mCNUtH_(8-&x^jym7nUgo%vRK4hQ8KF
zsgQH1QF!TXO+~!uRD4pWw9m}3h!rK7$j3<XGch{qotaUCnd{*;C5DL?Yg1x$iNc4|
z!YBtTIGg8p|2w_i&x$zmLEZ70YLy@JS3MNp&KPmdKKg@Ea_H)BkB4m#^w=eO1le1r
zm=nL~Btch^J_Ui2hQ__F=*;We6*mF@EWc8lf}DPtruS;Wa(Bzf??bTMr8E9x4FBkt
zl_$Y7!d-s;#KEJPn4yQ$Cf$}3N5qG|@msGsDsklP9kX;yo?PDc;MLTqMFx7K3l#FU
zsEEmkG(`<{r1W$;RnT)P>rJce@qY~8^s|EFwL8?gHD>bTh6cl<1kPJhnJ^%p(B7Q+
z>BhZX|3Yprzb;2x+n^|wsK#|>>q0xj4Ni|tk`-cdqe=SM#DIvdASSo?O&;>I7H#^-
z*f!kXlIY1%@D7=ke~qv6yyM8PYvgH46RFbPM*GP7_Bw|taz2@a4uk9A=vzj+ED|2X
z^vt81XPtC=ODzEu_lg@2+M_03gaP$Ci-|w=_S_{7`Y7!j0PacZY2S_e>u^>7eXG}l
z+_$EEX$az*2XcRStoPWa1J{4AK_wPuhjnmt-gurm8AILKK6#Wod30d9aoDw>ctV{b
zfY(e4rAj&)vla1*$@omFP?AzHkaCaIFE7AK3I<+x47=WP#ZfUqKh-%|tq9({uw*{q
zxtUSRegRF@DKy#tGg?(Horp+Nbq#Swq-BpvqvImie6q5BT1C$Kc^x$*QZb+`ncB;p
z)TXdi;KSr&pD<X3f811PtD;~N<?I0hRY=GriIPaWqWHH!)?f`-(8FoV04N@hAxTO|
zRx$yc*#-tRWOp^v`mh;Sdf%6ZN4(jZgc|(vXIy%-)@<?uls6Zr{gvg-dTn5eqMmnX
zyKwd!kn%?m8!PDdvaw#Q+FE{r^9p@7ALhm8D&+dFy6KRc*E?;}8!KsTB#EQi+BzA<
zqs=w8A^3(Ek7XTuYYo>aPTdrOD&&ZnScwTL+$*19t~YkrKDB9$5fyrii`e>ijP5M@
zeORmCY$&Qt29A{5m&6<UO;M!rkUA7j$L$AE|It&0ROkb;xk9IHhd!i&run~vi}ecd
z<__m4qvnq|bm*@>w~#h3QpHxW<2cVTlsNj00rA)accSk2_t9(VQG$F&#g*Cymhi2d
zoB+9lnIDDEFiBD_(20>PiD}BejlSZdiBaxkh7ccXFG{wk&Esbe^MMPh>_TEhiMVO6
z+v28*yQr@SDpCD9=jDf0r7G=Fl)Qopc|RvU^g~8!nUb)D++p0c{n7#Io34TXLQt+p
zNZsKz>(FjX{!X3MW}v&#BS!)=+kDG%#=5mCy0z95nL_)hhxW^5Hb;uedkq^|vJO_-
z8k>nTaOqRC0|)5wa;C!4rf0wrlTlAq?;VRLN}Y2aj$46Ij2*vIZdGM3ce8>3i^$%q
zq1P;heVA5kM87Ok3I{BSUx!nCmVj>$InY#>BaZ_tA{7lny+}pL5~`vOt2QC0hH=3r
z0|6G#ZjvKlldKc^quy<#8>}VqEJ%UXCEV!9=7|_M{ZBMuWlcB=<#;qk+H<~$GMtln
zMZvzEo>(!oBW7W`^r3;HarkWqI5NRm>%5vx{|Y!V)p9G7Au>VZe9O6ic7s&;D?CS;
zv13}gP|8k=`nQ>}^wmj_jdb}J?AH;cHN=QJB*os_P8H)P8_~4J%2{8h!NkhP6G<as
zU8;Iv;_u~fQp6VXl=AL(q>0ZPF%?2eW@@zU#Q6aoZn4S38O5pUqmbkNDsMHGaIGz*
z^@xG&_fo-nxerH_7_jo&zfcSxzyu52okH*LufoPL(u)gSI<F{!V*+D~%N=BZl_eAk
z6-SP)ltCld6D+Y@x<W}6J0fGMjy7w(k81JAolqnSU2vWd^@<ez_L+V%hM$e37>kYs
zEWsXD$maNe6M1Gt6InF~sX;P)_;$DmIs?U}WJ&3OuCTZ!soha}!*Q)6p|z1VNS2k7
z)8rCknJ~ZgLoCk}YeJbgXKdq5*XzkXgK?^z2CHMx1}KM{{ME(s#Er3fa5@}c_{GTD
zsOfhSqXrV!lkrVi>oIpl2A5f&&adM+?kCFvS(46B@u-Gk^pG#{+?_XR)I+^}$C(E(
zM7|TzG*XO<5KlNNCLMRK<9UaJAZ~^xYa9isQkfe4FrPEi`M#E-v~E9fBPMMGcDf|~
z3E2HSZY~R1^thv}Q`_Wmo7pa^lula%J!h#|069yeW`)WpP~<hZ0wz%8b%i|p|FQSp
zaZP61ySRuBIx4nNRGP{tqX>vdFR_AS0Tn6Iq98=1*H9C%k2D>W4pA9EsUp1wrAbQ!
zgwTT^2@oJaNJ2=v`vvFx&Kbw~&7AX{bMO6J{tCQh@BQww_FB(cdp&4CC$i=01vpPc
zD3TMT@7`1dPMSR{1wMW53`|1?{NtLXX--(_qWe!-)XQ&!tdpONpK9{6;6ZL}=cLzH
zQbW)}bLe?}Nv(2r->`g=80$P4t4ae~wVq-LNnlQM>U9K>&8?Q!&WS0n%eQZ<l3+}Y
zT1zr=+Ipzo?AZ>iFQrTWXMDB2^c*MMpaNj$uVV~cm7oas@-@=_9w6D;WhP<pl6FA{
zNUgSg0!|~$T*9VN-|4(kmja>rGw@&|{2DzAC@WjBD-JrY9&=&gxOz4<`FQiHbgn3w
zTQ@l}REJ&lo($q&>0I<^>U#=d4R8O&84ylj9%mUqDPMoG9SCEPbz=YkL)0sSHc<nZ
zWf=6B`uv%LL>@>(rs;s{Oj*w?=t=aQSysk1{N5fA4hLhn;|P6#g({{pv3zcD_K-D<
zVVx>cK60`d-dU{r{o7<WD0ds&qr6C+vro7-a@MyfqsBcRFJ?xI|C|@?zxg*sK(+K2
zkczKm;T$<Rpa5rie0`Lsci3W@w5j5p#glpN&_?A4*tt6w6U_O5IB$!}$$t(Al7HC&
z6bi*u$@yn|_G~+B=23V&Z}t=6jyD>3N(g7+Q!C*EN7MaaQ-ZLAT9nK98*}I>A@o75
z`Ic}eUxxA1gR0g<W&ly8d^QTM&SkUt3D4SyqC5yoZ4(J-5jh8&k%AqvL0_;{d@*=Y
z-33V;k%+bW-+0;%kev8W-I9za5!dlZDp;R+BA>0*CMC)Gu`SiVA@wYcx}AU#tqK|u
zP({x&mPh9#mbKfZpG9aX8svy%Q49%+`7rmVASGM}-lj>+UZucLxfnzujBxU`3k*rU
zfE2|opGydJ;uVW@lR=2y2=^X*w1xc4rck@!$*@~_V!gpVACNEZ-gx(Ae1@WB$5`Hz
zI$dY2&6`mUp4|<YANnB1lN)G$J<JVXN6*Sakg}hH7C5+p7N}wHAWCWm|I>Nk*RO^A
z`mWT&`+#Yorcnu!lq6phLH!-HC%<!OPc}uguj$9D<-$#bm5h)a9VlnrW$XWt<K#~T
zM1Z86d!p`u5aWXo{2Q*Gss_dT^nnolUdtnW;_?T9CGJ-q2@G*NzP9i+2}GxM&xHP9
zURpO6udh*%1xCsBixhF)rvbs3WLkk5bbRP#ep}ae{BL6)8vZp};m@<AWTbmoyuwN6
zKnMs#UIV5Zbh1{YI%otONT^nYvtI+wk+{E}x$Q4SLBs<(!@lz%!fPBE2m)u_Z5726
z$(nIj9j@(-g%YfSq{GYDbv`zqy56monfdEM7Gc{#__f?>h&~_~(%NgZnn7fkOBTe~
zW`1~j&5Umd$djk)?DJn?IQf%71A|S);OO^LkislZywdYcHx0?n3RQsVOj&9DHyBR-
zWFQ7B1W0`Ii(lqhs|=ki+aRKUzdyLh>um{Y23tC`p0V3k1H|ZoNK`KQAIv7y=O}JV
zXcn{UqVoq3E_x*tq}^mGg^|38Qoic*bDmICg9X|fE?U5t@)T-mqj2Xzj%obKf3i+b
z+CgVX<)GAujGAjAHj(xe+1&TtUpZqJ5Bwm5ipfRyp!2Pbe%BihXh(kK=pGv!i3UvJ
zzZ13rEL!BWzpEPmBWCsGk^%F#!Gk5}8|djDc?8;PkBZ1eev?s7vv53<tq+*OS>^j{
zeT6zKMMfy_>De+OzZ;Hcv@)4{%J$15)cNXws6~X%1({_VsaK&$`hShQ1?PhZ{Tn)W
z$mcG3^#87-h`-~}zX(OF-WT>(DKWu%yv+X(PT=wW<=kFwA~?iq13;ZAUQJQ^Fb$!Q
z-494r5(An+<gItMr97P7gDK1Bb@@7D86A{>^6qRrjQ^=v3+A5r8)W9!xexiiSJEvV
ztf9<rO;QXO1;x6B9C1I^xfSEQu5Giwj^_E~S-K|p#9z(W#UDoFFMrI9fGEK;)X-20
zw3i?SS~=izOfOS`lE+0uv8W4)=m2K_kCb!AK@?~<bT|(vc%{$2JZu1B?k98N<vac&
z*~uTduq8U)hsu>c14<=0X7*eW&&|x1j|1uE5}TcC08T<Ik@^q*1Ke%#o1pVgTcM!n
zMM8=Jb8%oCux{@2Rro_^GVuSB_y}tE&!2RD?w!Tsx)@sn8oPSzP6h|9T<(Fay_I12
z^Ly(K-ZHVoI}e(6tF^aoo&-GWz>WNZ4C0W-?o&1k%?haE#0MWR+L|__CYB4eu*u@y
zGvliZ`P$IoJz8qEUsQ7wpiPOkJIPIrk><35^mE-2^hDdlsR%9zuCI`p>;c?|W8m->
zMt#Ib<BCv4lR$Y(z|fs0^Y`UB(rr_`e3+;L&9%^(Kf!OqoGbP#uqE-TA?n4Lh^x2^
zL21HDG?a3VPM~7YJQhefhbN)<bS5TyjkX`7tXY`zBSdY9neAf`ZU~_ig^Zj$=x;AB
zvZj<4-&o}F?rApsOdsx)Woo*>kmBXu-nu}VQV^FOxyYl^XP&=EA3i}!OI~DKQ8AWX
z<V^YfqRiPkNP;2$r0@yIWt+8q@!;O?Kl?veEmRa-<dqC=B2e7B4UkNt+s@f{3200K
zvMwDFk#mGK0~`bS`$)G*F2*%bi_B5%sZ%6#xrN^2e1zD5VjT-nfJs0TM^Iy>vxspP
z5raP5or(^ALUfNuH$d)FCb|}KpW+>0Y)-(C(hQ-ACEBeBo!X1(Qh}1qspKIbz7)k=
zXXp%{ba#cEsw+O%t5Wz97!L2@Fa>C=^8pO#PrfsFY@I^ui&yutt+$k|4x3%wh)x!A
ztiE3Gb~o=dU_fbcxO0;81W>6Yb$uH=Cj+QdbaJ<Boi+wkDj~Kq2L`hNmCCkeBa>=4
zpi)tY&HP$H6)hI7&Ark>w%Kc3cpIep{-eVFRI6U(R>+;#H3w6CDpC-&j29Wvkt66e
zFeD5T5BpEg67XX}+)2XycI=aH<*ev62N6x>L>`l<C5FF?ha*Y6$(zRakvddxoy%iy
zA)+f<CJ;Sc#o*AMHcEMpM!;ZMU2iWRZHi$zo3Q}^f=1V1ump%Mg(FNR5>~b&-$aYQ
z5|oBkz)yaO`JnDI;y|_O4}Kv>-|yhu6Oh(@;VLq@TrWTAEu)|>HBEermvJ}clEz=Y
zIWJ5c>F;wz5QB;-H&MLFxc%d}{(q;SuQlOng<R(%nYboICXQbu6IbyZ7+oY2mrs5B
zxb!06LrJ@EwZQ^nNNIm@a5xL%Ou2C>+RYfSsF*A5KU*vV=uonDY)R>T0tix!L%%hc
zJPjyQ(sl0aLnZ?f6~m;j7qdVn9xw>9B7=jiS>69u8GofIQdNNNQ?IxH44bSFXBkIU
zIur25R@Q+b%#y>&y@*z+>Kevh$GenaHC_!8)*|n=6*BoJ9{XL^PSUEn%H6^Pmxj1~
zL0ho4MT+qiT&6adLc`QNnt_llZ_`;Ue+;%s2P-6lYIZ@Wa&{QaDfVs;V<Lr&II$mb
zF7pf`n=FY=xLmPNc2%~`9avSuM?m+775qKPBH-<L)bOf?ylQK(@B6HSE=E?rh|r&2
zzB-UmeM)}pXzts}%RfD`{BX<mPEhV9sqz;>&EE<l_UQOtSRcFP8{FxT+2<06W!A<&
z**1Do>$=f4RT%}Spp4%sNG{CT<KyEYQHX;gnd!kZLPDjK{KZoKVskFcdLI4szz>M9
zvX5ZD!hU?N1UP)>HVMt@Acs$060vz;SN>Xh$SC%KU4mWKtjxKZJj<>KSLhda*m(X)
zij`^hczqG{i-?uso;Fa(G-h1ug?<&ApP-UoMt8{g9dd$xV@Rj5t(VSHo@i}{sKn!n
zKk>o@6(Qe{$2}`k8FXsa;Vo^x2n_BLs4UY|!xMT*gYA(5Ub1iRehJmt-<7He)s^0%
zXJNNLg^trscnRJz&G#CEszfqI&q8md<r^5sg3Ld!hB6S~bD~{=cMxHIs19&KJC9c4
z77ys%d@eC`Sz9qxEK`2sMHI+=+Z6AX$3U?qA?MEGjR2lMAnT1Quj(a2Y2_&7&<-AF
znhco4ZE<}IkF#j3Uzx;X7MaBTAi0$2ocJ0{a8cE#DkX$7^Es>%-aKpDeBCDxk2k*b
zEOZ%`^=jUkO{I~U#NO08DuXp7G?0#oBpHLzKU!nm1VeLZbucuq2?HaL1BrYhl}iOI
zKC6d&!8fRggWwXCzPv~v1|>bOkaU={;B?+m4O0z>;zT?%0irkw>%c+GD7y9tz>HFn
zln;@N6I;3=W)$l635Xd5ndb#Dqx|-@rVwY4RF0eq&BG>ETwfu0H=;ZdDwY~9-Z|}t
z*xTQaGMIl$k}y!&LUq>8@b1_<$?pd@NEOANUZB*9Mij`6M_kPWTeT|0iLEa_TLu~n
zU(6|2BsX8Xrawo~2=IuLITx;G3?}qyk;A{Wu8AAu#KZQ77?wqpF5K`oh{w=$@8mo{
zx-p>}Tbv)z+<7k!8UbGMMCiugIad@;a!c+0{`$U!8)f@&cvjL|6Bf)f%mRrd1hFMU
zl@p6{eEZ#?=Ni};F;%AhrETe{6BP7`It%o8$P*?E@Cpz>sro%!E^Og$FJKYJmn&ES
z$rJ-NITQ(#3ch9T7!=9WcCvjl4qjVR&f6~eywH+CmF7yvdzNF1uY!znA3*S;@7v3L
zfP|jvHef)75kW>dJnQ8G5ek~ujKy$SM7`!zZN7`n#tAGJD>m1LC!%>gmHtg(U>SA>
z8AGFMJdp=&au|;*!#d2-Z{}8$v(pD{O*-yPZWiv}BI9(|CSXMS<A4vAn~ggS{)tbQ
zhB(BDzjBDD3THr|KMYNhEHbpTG3hAeHwQ_h2iku3-BnCafnL`<Fk>Adk?g3OiGW11
zSdkM*c`02MD4v0D0w?nl!ZWoduLRVx5g?)5q%l*xOSl6uj^(03Lb-X6P>$j%L{UWJ
zShP?lm?91b@u2W5OYCh;sC+dl%&QI#<ApCn^SLx4jE_OBDMwpQrJia;zS5=<uslF4
zUi_It-1`lr-Mv|$5T8;4F$zd8KWtG6f2)wSzORgV$DkTcKCDACmG$jHLb>21CA?k5
zTj)R>XYhKndu;Xs6$w_=lon5tF;5)+pL)D^eP!1kurRIxp+!=~GVCnM?{1N!Oo@^E
zo%wXlsp1y@I}kHVF&=}*K*#bvw*(|@&j#7#$dHsBg{16gN<JiIUkLJNLnpbxEsX^l
zLt_Vn-D?I%)6#D!L|u;e<Ik1<)R$f|W()C&Gl)Ej&wc&#9SKk}IU=3I(otcfO0f$J
z;@Gz=41WhbeStx|1~7=fD{*gY8ti3HU=#0+|5C#(wzGAeq%rLiU~rLY>T)EsD3M@M
z;<fT_LW}a#5h!m=7&w$y@N>Ajwc%)!qRf_ZmjvSkgfDuya%9M)8E@FMLsTWqP)QA+
zamk;jBUzW`=9GB(C8wXq)+yxmi&{V=P8?p9<Y*4**=t?Ny3@txcyJ~!5ftc&VB$1$
z@h7@oC>m{)Ece;FcTjaK#89)Pgw}L=n8{-qpobrY)3R`i6k`Q&W1*4?5}Qi7Iqzrn
z0W66CFf09u8v5Np=d1y4(X5KcV{hfF^0M<MUyR*Q27h?w>*p-|@jrUSNP#$Fq_@M&
zbD%`-J>c)&o)VCGd1zm@t>Rg>DQx0eS#0yc3H#=GeIKQ`tKEcoK;PZ&aMuoQ-+^vO
z?^@ed(V?%geevmBo`EG`Mr7op0DJes)3J2S%tDjtZF;qfPiOES8|Q*b;3AsE4v?g`
zfIgg{hKJ0YVc`)$m-4`QY0E45NpJgoW!uBPrSXtrFEq1WAqyzZgCS}Ce4MjJSXP+f
zu<M=B{cJ<<978260z4>cSlggdk|ddh6MIj&shICtqCT`mIc?k+-0LB#>KOs$_)=Fk
z&{GSruJIXOmpWg(h8u#5=poh16E9v}u)(H*i@aLb*Fd>WYCsu+i`>wmYoH39C&dkf
zmG0UxHb7{U0BKn<86yqN@4dp;7Ku;B8~Z<VcN<nHE^>FL`6Cv&yDzbDi`?BWXA%~<
zyA6gl7P-4q8<3y5yPr@$b9e72e&+7}vLKg95Gr2<ByBrJ0H})YwlV;*E{nbDfY=)c
zZG6+bNeHn<eCWb0BG!Mg!m#adkxO#hbAgns2J#8EX!-fS1m>QY$T<$AP8j3FaJrUZ
zfg%deKNEBZt1Y*zzW<q^Tb8uSd!y9YBCB^{wA8@KkQa-r-u+g8(@Wm2JQ@>wLA^W3
zTFAxXpdSy#L0aSR8_PQHZ~RQqZPv7LYPZgZMS^Z;1>MTvZ5KYXde?{v$XlKG%<8?>
zeMQ;d^b+bL3a}81L56(~B>H<UP@oyFAmozdCtF>!*=lw-Z@zwhnug|A1Mco^6H}-K
z-3dP1<<eb625FR-7BI9wAciXJ?ePSX$YiKA5seBhRqJQoe-wsA!O?T`RRqZvM3voo
zjK=*({bS_t>@~YZ`k>KV2km+5L^7{}=^Lf`uy156_@!LK<8*rcfjqs#dbZ_QHJ8DU
z^%>wKNR*&7V&odbSPhsQHOPkPbwaZx1D~`!4h3fqpBTE<!HyG@mPl;sMzhgsE^c@A
zY|(TYnX=qpNc_uh0~)sHw5O{Nw+g#?#^Vj^wn|Mg+7C^u=P_wB9u<jGS+nsh4vS+U
z>`!Oo(JLBge(fPXkfG0E5j44DOe~X`%blfhS5r^pWAIo!f4CAEz-I}}(fm5s0wjVV
z1{4V`q9(K~Ra^#2TG~aIgY0o;u;XvWK=wGKPUClNJ2)Nq^RIjXjdw!Jp_;sJuyn=m
zG9<t`Fht&2h0Hh3+_fvyb#2}Hx62xwwV(R0y}FGaDrfHI54lynJ$cU){4c5*XIYuv
zBF=CB)+oH`Q<<V^N_59SA?ohUb}2<QYDnb16s5UL%(E+WK<A>71k9FoC<@{S6RkrL
zG~D%=zx`Ej1ui-S35$oOZ5D)zWMPb;xpfP2fSB+-UeW<jJ?+Trfl|fk&+CIMazqjq
z$|5)S3;(a(%1x96Ky{sp*nM8OD*&ofO)g=Z(PsK^S_E%%MOtA4lQ*s6+z!hmGANGa
z?F0&!$D=tXQdq?|;36loiXhca6f#mo<xH!<9wYDi6grfDoet<u+Rg7=m2kZi93x}o
zzNL1DzIwxO36S?0AGvhqh4R6YnWDEQRlFYl6TrKj8k-IfX=o)F)|q2epAuHeAf7N6
z-ZKW;Sl5j#@GN_R-vBUegJ#2R_H$q|XF>_*fJaFs9>R}Opq@&J>ws(}kS#TIQWQ!X
z*GTM)u)C&FjA^a_6egE>wTWk?dg{3jj8JWA{fr9Z+Kw*0NkpzQVDpZ|0yy;@VDpX(
z0Bqi`IiXrHIt3Bzu5r<T{)l1c@vG~IAZ;8@UtG0V?j0cP)`!EDz;k7>u$tEiy|BIu
z&9uY=*(CiO2Wh50@)E5~x-p$)Z~)ZSCf=?IYP-SvuF=bl@1gtK*CfDZp!YkQ0ReCZ
zAGDGGMyxtSgpL7_@R`kG)z|eq{9=9H0vv28WT!1cb_~|6`B59afG7Ln{FBndinLvi
z0RF7rU0K(mdjvU`4dv+mAZdJ}3y^dGo>oQAmms^ELiVcOk1_gy2BpcLbJ>ujb(v~M
zS`F(0y3%;*U<FX;QlX)Jc`LCH^l78md(l1>%XdvH`$YW?t)s%(xf>NFk!+w(!nj?J
zOA+?Allq!q1QXP@lbY0B+$^S~!D&KcG?xdCA)+~9a}JYCIGWEY8OTN8d&Vm<pMd(x
z|B>j3&h#2kaR<hUvHXF2Tl>IWhyD{p`A?z5HTljSNDI38Cnzljc_OwY*7LtnErf~F
zfMaFwOUpYD;_3D%UxtG%@O1a{w?jPLu-pJm_kJQexQ++#WRL<2ZOiUrfwa-76=%@3
ziF__gHz{DgaDn6`6(TwD1MoB;IpH@ka`8kJODS}7JocGZ(i;smOiTcj9u9rnPv;h(
zH&tYRa}Vi;dOkt9xtXbHFEoB+7z~H;<Tgmsm_Z@X$OVoQsnEY21)dP?+d^mxaJ#ZV
z)2-0@RebtSLCSvy=)SK`p)B{T&IeQ6Sq)&N&T4kzV3X6Htn>u*Li%WK2I@T(6>7pY
zdpWS%2)@?67Fa-NeqYtJ!yLyGF-ev4zT=IU6$aO-Soky-I9pH!2?<cv^C8477@Et9
z!^p;Osg94~10G2jm&P>au>2A{<Nd0sF1!GX^8q3uK)SmOf-c7tkuO6~{|AsYX0rVL
z*v0Js5NQ4lK1`hoo$M1wGQ^`x<wnTVikELE$NvZ5?0?j<gG9Nhh0uZ&gIq`ik3ZTD
zY*T*&p~OXg6Rz8ThAICEJov7&NBc&C^k;tVG>g9r-u_2Z4uQy{vB&mc*!tbTN+G(n
zszcx03x;rPaQx;2AH{eJUI=4t9lKO;5jzg><AV%wQNxPeieeq`k9Ytl_Y=iUrtR-)
zRE_qa0eB~B!PC=#0R&F|%`7{cCv|n!ZxOPt%D%b)D|@4nS8B4iPp(aM=9i;+(}bAe
z&$zVdA}*c1W=><^cO(l~)*_d-6wq!$kn+U83n~8@FnD|rc+d3IDxmyu=DvVr&l3S5
z+-83R!u|g>XY<Lgs3J$4J&Y|+cwPY7-S6PWg%Eyx);$>xVZRNDA!gv$Go`pS8$yn4
z^lLN0uX>yLs4+m0e-F04+@?|zb7qpNdJswur;q$0C4UC_C2|a2&ecck2Y9nlW<m*6
zXF`Cw0jMkg#>58*ex{DM#Xxwoq5YSj^=dHBG;qGPK(pEoS?{%b+#y>yQh1z`iYqD_
zl>cE&A#qsIy`g2kHOv=iGWe!_y7tsUehXmXj$F9W4ZuUZN=cVS6IUDnjzy^c>l1iM
zH17n$iqzW-J&u84up?)ILWNx$rzS}16wr7@26BnYBJG3ZQjnl;l3)2Rl5yFTY)tqw
z^jSS!kql$%d>uP(eLBAYII>av<^W%g69xvX^?5rx)G!$ROPaL{pfiU?#&cNi0m5@!
zKDVD=Avb_I|2wV`TPF{F$+I+(yuW}g{{bZ3+%%^XW#NSygYp!{W4i(%T)K<WwP3k-
z28qifZ3ibNRFdT#0GEluG>=sE0O2^uqe3?(<IE5M=q8npGd+(o5?qL&^9>-Y?!!?$
zq-WYdq4e1Ml;_T61L7LiH4$F{jcYWD{UDV1fl3Wa4GlnIZ)xNLnmnDN3WEUZEg+H}
zx*^m5-R~pUPxuDpPERCKyold&eds6Abh7LMkX^>F4|2!q+{y9nuHfl<^wR5aCO-TK
zK{6`zTWfk=ACbRsw}HzPoaz;o0+J~n+T;*CpKeG+_W*+nv$&-7kz-@tOTkR<@}=1D
zQLX5=mkI1RQ3gi;B#7P3mX2?SJQi|7+H@Wbd@ws61Er;TKSL&R*j*U@#0UQ|8sOh<
z0UCT`5RA*=lBwt+*^k$Szzp?nhb-@Qy<o_M`4NgJz&HhBTzDn>e+8Q@a(+di6PZ2y
zv5adb9d*UNZ{N^ZY%UHX2{OWg(W`6#L?$d6f)`{qYzTm9Hc`xbMo9a2-+kZ$7pWB2
zg+#I!B$B;8kZ2Lszvcq)g~W4H5Cu@PyS!b~`DVVU7arWk>g*l@sB;;bls!C(@+3Z*
z0Dv_39x`@oCPpN_L;`}7$?*VoXDAMh_X=n(qf(PgZST@~7#hzS$)OMNS4}Yi0L`WG
zM+@Jx$t%$h^86v(noHsHc$m@`8lc|JAQ8xT7CD=D8$w7SQWHcWGdP{??0S&#OW|7^
z*}ofW{u+|aT^4WQh8jEKwU8Rl9@EioZ?eE>GRbAYAqJCX;|1bvHZZnEeFof<K7-`r
z99BtSpSu&-^5Y;2A<oIP5qxI2`jkHKabvfYJCrJBfy{8m2NJVN(zZerCd_bRTxWS&
zx>K#Wt*a9sf|KV~*z`egGS32!arVfi7P2*jr{FhPGVy#?_62OL_s^H(;SQPBcQQ1f
z{BgkGq4>PhGwsDdxV1JK+rgrF9_7fn5J1)L&;so}E(3(u)X9ZBA;Tjp4_eEx08IP@
z0LB+v))0K!eNvdl!XesW?Hu!(j2zb?6T=9A8rP7WXI(S)lSwg88mJRkXAcUsyhRDH
z_EZ8E?ne^#68rtY{l|@T&4|=Vs6eC_wMmhE_Hn4X^_J<DIE2;iuq2#G&E`xE0BEDA
z7#Q7Hf~86w)67L}0XG&Z8Mdzg!tVuES_gnPoe*NqET{bf&4+P7jWl-Vr5p!PPn%N0
z(y|r+VPp1p=?j1Sk6zgVyeY%K<QNKNQI>02XFLKpbF5P0*O22(jMt$>oY|I}x(H8m
z^CG4faAqY6Y<K~lrlcl>Ea1$Um-$}<vzeZ6A{uL<4~P7B$OKwE^x>0%+RF%QUZ4k8
z0>F4YlW=k3#X%~%E~sJXTR%i8Z!hk}7?;W^7#bNfF_bX=0z=O5PtutGI0mAj<Wvnr
zQ_~eKzkrgtQ1Av@w66Y!?iUa?BOP3va2AtJ7ykmgj!6fn(8&>(!Q#z;LMbSTK5xM!
za~?e61<V7hczne;fR{-=xR`$vUj8#UwJ^D&#YP24-Sgb-13+y=6tn^oll1MiUbe4U
z3l3B&L}G%}NtpOdV)6%g_U}~~J?(jP_-820NDq=vi&kJ|lsXt9*OqEXwLlFwef0k{
z{9gx}--+RZz5;2BkYQ-PXmXOcs@F2Y%3LzF4y{7sh`k98{jUSf!ew8Dv_&R`23+b}
zz?><B9I%Dr`H5i0OS*%jJcMKBn!9x52eUC5k}z<bmUFB8ev2?r*&sdYBxYTzmjC2V
z9bY1yR;N<Zf^00e0Fo$Bse-~H6N=)70>ZbA&QX~3d)Y95?#2M+c*a!mt!<;3DBfJj
zPPY~<X#aD8_CJ@F9nhZllVBI{a7B+jKU-{yX*x#vePH<sWPbRkiV<CoWUtZdKQ5g+
zXZjVi8iuZrt3<y4^4lP7TLdl=MSyWMcIk`6>jQr~yO61Z8Arq9^~sb)WEq1e4p$mu
z`5fJ+nporJXe2QDD>3NiwrmvN4-M<lhY>N0U^SZq7xpJGfi3{U-fMm(=ye4TlnNrP
z*#P=3L7^;Bi}GFXLB#a`87T7~V%y;th$C<JKrpLBpfOp#yI%T<`|$q%y(se^W7{Ha
z{5o%DRScN6RCOQ%sq=P9U~XxT08X4>x_*@De}X#y3s$X7VSy6{UJyRd-)0QBOCFr0
z_izg7vr4d0*vE9=bbqQOnr6gpn|H+acbEl~_fx4bKB+hnlg{``yiuCa0OzKWe1U=)
zfQm^Qjf<j-t(1(fW=#K*WS#}&eeI5ZI9jq@CQBw<CPWDY9l+IO{JqkOzqJmHfmVWN
z6a6PaUyE06Mn2Bad#=O4^PfrDAEH!7<YhysGr+cU?gGy|5dx%#0BvzqG3Y60sHdDu
z>|~*y8Vl?nsenSIzeJMj4+Fp$Z%Z5jfHB%yKjqQHCzLW<2gAI^0?=wGz>jmgQ~>|>
z7vQx-2e3U#KXN_<MF5Pp$(=t-H~b4Y^B=*}&}Q)X%gw+A<Ud_$z_6-3+T?wK58z?!
z;WtM^7}v@{urk2JV7OIQULDVQAiQ}g^m!@tc`5Yy&j(~Lg+BlF80vq3&cBJiUJ89)
z3Vr@HhI%RVc`5XHDfD?M^m!@tc`5XHDfD?M^m!@tc`5Yyp8LRm7B9UN`n(kSycGJp
z6#Bdr`n(kSycGJpVf(+A4rD3x`L}5~mO`I_t86Ls`QOjVu@w5ekdk95^!Z;;VJ(F|
zgP7>0(C4Ml=l?fDpO?a&e=l*zQn>R{xbsrD^HR9;Qn>TiBbO`xoeUmJ;m%9p&i|YR
zWGUSF>!~@G!kw4GotH8_FJ*dO%JjUH>G|KyC%qKzycF)d6z=?2GCkLOvITz41+bL$
zc`57jQr72R6LBnMef9~XEoFUP%KE&N^?51l^HSF5FY`+;g+4EZJ}-qn{~s6n%qRB8
z2=!ff99$Z4Byhtcao(!91Dk4s5A3!V5P5ZbW=HahYpde|FWx-0VZ*Kq8}6Lix$*Mp
zs*_t+a)O>8IF_^KlAXYA^E-i;cCF}IdGz6{?>`~u#fa|xquHg16vRvb0>>1iCKFMF
zQN(=w=xDsNUZTa!DYHU!AM2B9a&sdF5vAGMF#$(vtLfR%X2@N8!t*@YgarLCgYGhp
zk5|2vV;|3x95M7*SsuOkDi&AwP0{(3%v*s}$=bd$UtX~?9P7j{p%vG~N5Sxx%J9qR
zF$C6!GULwnq@`)VyM~gr$#68SHvv|FFR9dBDZXOoc_9JiO3Kqy!mEy4IkBo*xsopM
z#KPf>!vQ(l2yeT_xzizz)~*V1xJOZS^meXEX!JSqQxr$DB*Oz%t7_Y(Hn5(5$o(Wf
z!Nj~N#r?Y&bni$>^@#DVfvhzRO@<aKTSan5#f;>J(ybRtIT|n*6(i$2Sv_c_oN$X(
z$B|Jqp6$JA@zieT@ny?BH(e1d(~JFdbJ>c!PXhgt%yeDLP1e+Vepo%cs{O<Kc?WS{
z3+e9(7o;CcT{55?&+#1bOn7Lnn{Ynt{JG5aH670gp2yt8J&&m!Q_9!=&g`BgVfB|U
z>yig_jN6W6oqpO-vhy%GzhZ2<W)PGk7My)!4X7ne$!fYp?^IZgasHhfHv-ys<amQh
z>ju-iL8%otLQTrDHveorZRq$c))bzva{#I&XD+P(G$GqmYHK#-mE8SDZ@&>(b!k`S
zH{M%(tB);PE_E8PJdUk27`ald+x-xuj}eJL6spzen!*zv%E|A=>jlg;iak<3Oz@vC
zi`LYjg|npHPnaSTU6UxAp26<~mI}zx^tulf)2wF33nJ%Eo#76yn28?l`iSR5sO@%~
z?jl%=JlOZMf&VY-hG#~hr<7CYPEpb~w+`2Q>Pr8Bu-Kq3GqP)0P}+*lW!a}6dV|5P
zUR9)0;wg6O(Mk)b%U-}IyYBA_d+9q@1*3L7e(9cms`q+47|yLR+vmR9bKb_{;0_`>
zR%VDiUq{!+O5HUkaS49>9Jy`%#kxwXLiR3q2n<ijZ8q6;@XCq?Fbme4Da>xUf6mnL
z%#%KICpdXaTwZtN{?g*!3Y?6xUB0ZLmwRzQg0VyE4YRayk{eq+yYrX!f=gY^!wgT$
zoFCI2U%rlOoS>9hMHuy5v{8xbA^6nn%`8Z6aFQ#`rakz>f=9JgN?u<2iMDYnbIq)O
zt^%lj<A1UG-+q^g$o2j2HUu6?fO<b-Z}HA$L09)Yb@s4nxwigDW0mKTl9b4oFwGT*
z-DTJ7)5qhJP*gL`DM^XtzBoG{?=<5$e(#7d;qne~PcLV!9gaoVnlz%jsf66%zr1$?
zgY^_klE{suytDZJUe?Ru-8r6fBeAAgQpiVQ;K|_Xj*m(9H;PX?b*wZT*|<hvRrRV@
z0uo8L)-B&KV!zy1P;>Z*&9l9k^14kVntW13Z|l63wK4|1J>gSihlpq{i_<vrePLcj
zXLK_et|MKXOs}bR(B>afub0h~SGJ|GqFR1*vJ1#jh{Iz;^xV!+f<wFfc{UzDo0-BL
z{-8(|l@gm5ALDeNj+7iBVe%@<{%QgLX`4!NGS-;yo(0RJ1ayd{$d5BemMyQ+gO+Sf
zTeOjL&itft$5y^k+T3YT(GW&`!-solYU#bM;x`=1qwVvPn~Yk8=_7V|`bJ<$R5K-2
zO3I5vFC_#z1R`LfbDUwCie}-ptW%<lwdkH$x-@~f*AhKp9_1tN2x~%jtjx&`1S9a`
zSm3&;{j>M)EL)-TIIwplMf`-rOOf1UL(v^u(l_R=Ykr*aQ~Uj{*0gn@55VtT+tY2;
zH>w@%+VASrfZyYa$#zysRt0%CjweCC&th5+=j>20GtGZ?0sQv7KWi5Ygtu&F_xl%?
z>zq99wH}Sod)is@RE2-n=t0+A-_$?-Iw;2RddlH*89GgoMvm7nfnVjr2C;jMq-@Wm
zD<9quenn&Vo!gw=l9MBMO6tsaD|SBA2OsCJ;|=;X;Pg%^NK2oSH!dt~>mJZJ>uD)l
zs%E83C_D=%x4({)`g<=HwRryYKKs~_j+J0-bv29U7FXEJt7k{{i7CWU%u=#xH@ccP
z{=FA>iBgtFeHXCm$Wu8WGYKbu*4z+S`D*h+QHw&C>qnEh59WTE7pb?XzBjM6+{Sy$
zwCrG8dOv&fh0HbeC*4G*TUd>ABH-e2u0+tGvIlcpr$-GX$60luS=w#y3|D{0+IY4f
zJ+1q;i2QVI_U?+4{<8{?r{xFD{-r$aP6A8|_t5;V@kZiq?Rk@ig(9m;j$j0?frjZp
z4f`>|9BSC6_o4$iD-D|>-6|{mwG%#tbycUHr8JV;e#Awe^KZyoxES)cT+CRnQgZA;
zf$(x)iz~F@X6$RF0G7Xcy=d-F`Y|0*_|%Aag}AxLb_*A`|2-GOvu!K}l5Prv1$t|l
z=z!<>?}b+EJh*#BuV~*Pp^avbi<JxEDSGczBHQwdx#w|*=1uenBu3+h)vQ{#66He4
zhRZsVjicSYs2KHK#D#mu)fz{v)hyzA<PD@hi7wpx0kJ*cK&rhv+h**x|H8fX!v~VW
zhdmsfuxAq<X5;EZo$ha%|8Dt7Fl~9oO6GArNUp(rPJCALMCu23Yb&TYq<ix0!n^t}
z$+gu`<y6w?(K_f4%td_YPf%&Z<|nNS#ln&kMK?orSH#IAFE;SBmdak}D*{xEPs1V&
z95U8ie78wZVAXe!8nf~A9JF?S+&QeDVB?xB^r_9x@08po@_bQ1Tu&;bHtlxJ3hznP
z<^;GO!{<IU{~kwoWjDb{Ko~wlZUasC73}4fxw8FvQ>>x5%_m6~k5mbD6N>z3bcNa*
z&X9c7gC+rgurVG6Q!mw3>RYfU2`--u&C>d%=BGNPl>Bw&MOX5i-`wzIoa74Md(FQC
zqlGi!Ir`y8*_^9_W#g%PcnV;iY@oF%XTi+TMZ6FkC7dfZz$K!oK*-L2duOe{st-Q`
zIizm)^P1%wwizuy>Ew~_be1BJ{OR;hElT<EKg;aVA$)K<o%MK|g+b%K;)_-u=VVj=
z(#_opy(2doz%m^1Jo<=boM#CNi+(@-3n=7S1?I@NjgPmX={S|-%*;+@Pw6#u4Y7uU
zloM;Y&fre;vnLv{kInZfa4%v!kB*#udT-f^G)N=fe{>M+O&g_v#y~&Xc@pmRCa(RO
zpL(mjF%xh5j=@pNFDOAPA1_FiVGWP*y4H3jVpBSd7VdfM!J7BM;&%1W2p_r^?`3_8
zN@pV!uley}P0beWwLRgudGa0ZI^f|3_KWvQeuigFVwxXLOsC>dW7iE<uMIeC>Q!sw
z$1+S?c=-cf>U@GRs5HvBoFk=vmxRCZT5F8F@dZu>)Yh1eV`VS=wT6Ze)NJ}@4KOr9
z!TM>R+qh0(Rme_(a)HFc7Y(_Tt0r)EgVS@Q$1^@@7@tci`#^mkh)vw$s^*Lu?(*~^
zNz(9}wq-$^>U`fl^Cy9pBB$iE`46xd7Khpt`*9v9!24x6ADvf+Fj&a8H;wPiZf;!;
zbgfKC*V-ao52n<|@6VHTRH)ym;<M_u(AISw^R16Vh`@Sk{f`$sw45Pz)r8Mag*QKW
zDEmv4&l7NmL>y>_0ZV9H2UrPBB}Z;dvkd&JWNY_dTQmBRxa(n^u4<PuN%Zxd*`HPL
zawwn<IAq;+*j3&ngGulVdjAaU;bVKjs_8pXFC?&Pt*C%s62(llUf`EbUxgaf)>N{*
zp`YV}Op)W<-LS^Cq4puUi{L*&^c<Ogz|^_oT?Qq8Ux8)Q2xLt$NNX>8E`Kf1?<c>m
zdHZYnY!aE9I(iVrx(A3&2<R-n5Ahf^z;Z<YQCPecr6xo{MN$)W{o>&bfy-9h*a;f3
zZo`iPpv%_^6t~ElmKPT7n3olHY-}{HH72*Znigm~!Rw0Mw!_8Khw>Z(dMm=*hYwY#
z8`ar$fwe78Zm{w9%m{0H4OX){?n&d^n2C6*4P}Vo87~tx-JYYa*Vs`vK~Xh^EIiNn
z$jtB_f@_v}t#RF$s;rr0VfK)DaY%Mp`yW*lv(Dw*>f-%D?@Ny0)nt2oonT&TL9G&?
zqOmGs!g}BSqR;JX;#l*FX-Dy*zgBmg7Va68a_@_keP&(<IdY}frsw?n^cG7~FGt=;
zbVoCZ*=}i?f@1W}jn?GJox^4`S`TqPQgK53wcpwWpR|{kWl}gpIw~F&EvXyc!)|un
zX2JbKCa-fIb)RYM=$T+E-p;Feo*xzYay=TUecLt`4nJV(XBWdFWU%T=D*19pfKGi$
z8uYp6-D{xxb?zUzE@&<pR=TXC;E<cLet}4%L9b$DjYZK`ylHR5yu$L!VO^HfMbU`f
z$js=8n%%*EwXYBIUyJ3IZz~>5%sFtY86(;^S6pWr2B!TKFGAP%vGs^ALe17MD|=Fe
z`QgIxVTVKq=NCwQgWg<`p@`Q$ueAX4zyrV2x3SHndIo2sxT7oHB=SuKuUdC<$Rz%T
zD}L_6HQF`5oOe+9QtrIVBGvfSMyrnOOaQBMbo~Bp(Cp~IiWblHdkUu3beWg;AnmK?
zb2=Y3x0iBFv<O>+*Zr**O$fE2P9fhHyk0S0Qkh__uKSKO)>nahBwc59T-lh)pxwCP
zcR=}?I~|yG_z3HqU2*A1wvST2)>hHi#lFBWB-ej#Y*}o##A0I!EZQd%2dE&F<vGXj
zn!65J{Sy?9kK1BFu<VqA&!v0vG}RUg>R9RXI0qmSFM$-%-=o`mf3r8E3(?Hi;-E?L
z;hdVLI2HVX^4V6~h;zzze108_=bGDkS|)T(!EM=@NMNr|$~}!dK?kaGW|lRix4(d|
z585^qotZc`sp9(j@=G|rP(1e%80evB*|<IBB0$AYHg3|X1*&}4fi4M^lFJgaX&YSt
zpZaT?Bx`p~+eX_TsgR;Sx+V7!q}VH;@s)=(?(FE$5K#W>%dLLUW#1ZyYmj2z{BwUW
zWG=|qN?8MQz#($h61r@uAi5n?Wt00IB}eXu+ST$v+0z67P@|%h)$P>>OkP!<#c206
zB`VgHVXr@8Uf-)1f87=rZx^OHb4t-Rr2GJF$BE_7K%U?YZEhF7*{7gh7)E%__^BTJ
zh538`YB{7*68nvt;qn|rAkdDl;c90_yk+aG@?`hy{SE`R?z=!;j>Sa!Nx1IYql5S`
zZQ7x&pteVc-1e9Lx@=`4QvJi#TVU(m0Jh$5wXE(XRJ!A}5MJGO(zNQ$`roCMH_DYp
z$2pa}<W5W-6Hrxp5ZJ#g>STmOpnmFvQHQoEZvKw!AelBbS`9ayNV3U6cE38w@t4p}
zm^+gmkavKgCt7eHgRe^6jJC@UkF4pptJ*qE^O^G)l+R?eyzbO+?A(pgUXRh^B)4mu
zZuLuDXI70Te{A~+o7b(|XhqGt8a&6+^jDvbGGbr9Jf5KJun~s2r!UQGy&)+ZT;6u~
z&sLs^KPMzFWnsGN(%zkvo6l*^)9C$oxm8=$O#ZFn{@dT;FyRomW{)YDz)q00a_+JJ
zna7on_hA$X7R)%O&>ZX9SlNQ!w+`j>j#`u=r{9mN6t2ZEAvoPJ(AR2jidK+HN>bdm
z;*$;`OlPZp>%5L?%5Z)W9lK}x=aSa>8|Ni?O|X7+1^#ZDI;;0lM^rW(hAwKMC&+rd
zTrpd6ggioEWrr&mMl#FuDl|hdbRo+64#|Ide4M<VvRv|{2hc^tAzkFwZne92DwPQ4
zj*M=5s#E50qy^60r~Qd-!{dDx30o@oTxv{&zIJlDC=6A`qG9+B%H7wKvV^Z5Ty@*Y
zqpziAt4&sRPxdC6;<QgMHl5)L2kl>*Uj0^*7kT7~+NC`O`)J{!7HS50o*RJFFzi2W
zXk~N2(CyMs$*6lrw%j$e`nVo_=4pdi+NPdauv;C$?UV&NnF<8{?mb<xN9PLYoa->$
ziElZBBG`j|)V`h#BIS<ysvlRlcU(qz8m7j(Jzz~Du{{$~Yrhd->J*N@7$0n{m_;6&
z*&6S|(5toLgPJyT{Ahb1&CM@nX3wJTHp-L?KT?m*9=s5Lq{UMJgBDMb;_^~a&Q4?t
zB5l0<ENc@D!*X6dOJRqvo8mAJN`M~Q0ov78v{9*PMZYgaHg~2$CJx!}lWO5J^QoTn
zNLOMtuX4Ci9_s*_mcX#S;82+o=X68DcjVbs?HdiUcFP$$gz{Z3Y`3-I0q^!(mK*uq
zC?>Z&0(O%KKTP}DmTb#(t^8iv$SGhz1*3H(Uq3L;Zcj9w^zrx}SZ_7hr1h6;(g`>8
zWtp1lKLfjLkl1oBW3b&mKNZLq@p@n+O;bmX8%>YpcN;Wng@=6thSVH#c2~ML-E`vR
zaH6iRo1+t)qcQ)`u*^4;@xcwBBn2!*dEUR-9Hv=u%a+q%Xr~+l>qwaFl*E^&UID=9
z`@i#Z7JAoIpLeyaPH;7_tfPSLL7(})XVY#MTC+-AACg!e$DM7{ch(|(`s75E9xo;*
zlDh4)Ct@xAr%&e2kAXdZRkQR!Y0Hxf!?O7^<3H<mmvAXK>AHQ6mORbUPmc;9H%Ptf
z={VpAk@NW_vj;or?-t(*nHXmz>?n+E4UAZ7o)3%md#|Ds!q>SxJ8LE+K!k;q`p`M<
zjuj}#`Qjd~27E8&4i&F?AMhwz$1s^eAsHeoUe}!oz+ri$Vhywd#xp4VTXi6ngdwSP
z^pV(^_)5Um=p3d#_QEN_%(=XMyo8d~SD~-!PCDz}s)FrO;_{eSXTFK8anO73<vHtI
zw2EE_JN^_}Q{%8n-f=tpryNJ%jA>JwnJKC$kp#Q<sMv>ajBeDE$+~TI4(dXjk8s<p
zk>bt4!Cf9<yCEqMwT-)fMX%@4nBp`_E#zsbg4B4(rPHSN;-CFB2Z8x*Lwb;?k+J6R
zx_<_K&V&TA&r$)&WPe66DYnH|rOzWG2fr?gU}$V%12iGY(N^JQkQ4~pHCkhODl19v
z2i+Gb9rvs#?<wlLio?{Oi2?VB<b%xP*_7L#)W>eS-3Gpt9}0xaf%Am&VspdBw3qZ}
zo$9l|)AHrZ>O!CQdc%&D&(yN%hT-zSuX6j7iNB#4sKq#QV=(0DIZ%-%3;aOo9rtTM
zrD|K@?OQ>qhW6F}f9{-no<~EzQwC%7Ef6#5@1?vtSDj5zZ|967VD<!2pyymJa(T90
z!^<I$C%9hoiuUwR`W^wc_(aDF9z7e$=Kdgtn)^tSN3+hrBuwEFZ9-Uo<7dvs_0Lat
zAG%c^T%X%xIw&Pe58nLFIudaYM$;$V&#OVe!!3SDIi)xKNcnMNDjs?&eo!hiTklj9
z@CM0W<kKao-|Y{VSf0GP(=!NFFZRT!XXX=d2bDI<YqyfxKQJKI5dEV(@C<2T7w&v^
z6m^%WNqcpyJhmI#cP-*vQ*W_5O?(>IAKhTvLF_(!v(nOU!X^uhBO@oPXz&3E!{;C-
zU16+G5w=`9fjXbyTDzSTH}>cUi{p~3dmM5@Syhs}_qw{(&j98F*^xV!39S0{&-eN(
z3i^HRttDv<07dyh2LLKo@l!jlE;mRyt3a?oTCjgGGILa-wVsh5mS|DLYg%`J>a&P1
zQxk94Hbu8gFq43R1WVi!23G!W%Z7m#Q!(yu9vz0jwq{lRB;F6qem`e-db@F<+JVB}
z6|@dBKF;P4n&+R3Q>S=vQ}i)TdKwdXPFTr1RqO4wmJe@~2x<){-?au##D8u#_=H)(
zpmW;YzAG9|YA#Qf#I3A*zZR@fl_Aa0`6Ny+SsQ_(#S!z=GG4v?mP9AO@isF+--`}a
z?tVc*s9-1dsse5K%MOA1=86YU7X3Zq$+h7Z(SrUlHjxU>SgzM(^mzD>sBvpa%6xOY
zT3fu!+tq#A%YWOfU&}n$3BF!(oThOwNn>u{cvEteWve*;=E^lquN&on^?BQmT22^$
zRV8fE2IQfAnD+dLlcc~Go%ngIwmE=rThCyLN4F%nHS8n#5!seV&Go$LaNFEw6)Gd(
zEtn_G8+FB6l(=KY*h64akV^wreDw=ot$rn--AP7X(LtblKi;qMi|Yb>a4|9Jo;N)F
z>%`nca)|Shgj?&iuPgUBD2}!Qujg-vcd=!HdSJ>3z~0<CR8~r@=ov2>3hVpW**u}<
zPV!)DztQGZYkyNyQ5!0XuneFZTBDCdh<vrj)PcW7TOH><q+9Ir@mOy6jr16TrC+92
z<gm1yvqT&-I_9w5&q#;DKCieg$HKmnf&{Pb`d%M$!XFL895f6Zcg=%Ry=XqSd5f`m
zi#T4JJ#L*<GM459zYQiN-l1eP#kRl9=lLN7CZHfj+0nEx{By_uW=?~vYqf!x5#o!j
zZ(2nyrE2F+6!b)&4DP3fjpnuKDM}}LKi>{R`I{jm-DmQ%1DZJ;GK@N(0V<nlcIlb<
zavp!4Fgoi&vAXzTiTjTv(41-8j*QDOIh~;s^%2r>NMZm%bLO2Csab{C)R|p2bDCBo
zk5WiV0)XzX^X*^T`;SI`SS516<chG??0IUEes55inXer!GeYlBYJzW?{qyx-dp~43
zm~Y`XBQ)<HFXP>%G_S71Tmj8;&GnuhY}!x<hO?mihKzf^4><<3Nz?l4EeVl;N=kIs
zdMFh@$t-NL4%f1^^cmhls}*&Q4pr&zbSV0G*LJj{5bJaFYm+P~4#<xrHb>F-E1wHG
zLGTQih(TKUYuA2drULKB2!QeCMzr~$O}{^z!84TrqxHX{FDnR)#@|SsU%UEX3AB1j
z@USF+>3;2*SJ1rcG-#~+$^&4$LLgHoe3FI-zWlEZ#M%SUY-4;lbnxq+{+GL_a^(A0
z*IRbGk372<TS!FpK(mv?q2ZlZ+!wL*exo`2{y0e=Js;sS+oIdtke=bN*SRc*HFZx#
zxyP|E0h-=YwI4O!K}$!B-k#KCpN+NMdp7#LwsgVp%Ed1+*|eh9er$hwMr%nTs?(vj
zBujZX_D0<m!U`|P_m(0VWp3;_Y5hcpf<CXF+Ke@y%r)l*Zb56%c9<Vhl^8rxNbH9o
znKM@3KHR)GS`Q)D#WzCY3mtJ?J&2NxedEu_<+n-Hn%ZaDknk8H;kfVc6XT|<_rz^c
zTnbz$oOWDt{-N4Xh+-447{zBt5moil7zfBYL$058MziBCBl|r6At-@q_sz#x^;Ood
z$gkh2c9=P9xszfL%3c8mb@6+ErwCg8&@#PuPtkB;Sg&kbb-=Q$(|^7Vqg>eQcBa%6
z>l{dQtrI6zzBFjg#cLVg$0hnr+JWtQha%W7kMXzz9@KJXtT?i_rHs#G_f-LJNAU7R
zxqoB#mF$-WW`qyq9scJgLOup|Y)h0sVS~q``^{;Ma7JIrQ^hbrvmx(-PSqVn75fe!
zj9+NQQ)eJJ`&CkJ3I;**F=+pa{s5+hd!>LubGtDg>lBOXUxpq{j%xab;(Vpwn-)4b
ztrxGF!%gjqbFT1Vf5R*2aZ<!<$CdlUE7?b?O%@H6C~yyyP1lOrww+T^@~Se!b}l0v
zez-78pY9K0D0x<ODAD1QN}gu9ERtDX_De8}N?B|oKXG5XKH-waUAjtVWm#8=S3_gg
z8Zg8fxiyiQo^NvTF+HZ%-UZ=xSHd-r0W=PcRAA=pI6JR{TMz74(7lK6L7IY^vDm(a
ze^pnEe?tKVzD-s+!_>WIi*a<FBwBN*1ur@P2m4&}N)hw-T9&1w0g$25Ze*63&Sqly
zj-aQn;?&6TSvYN(Htpl?kX|%4Z>aj|;qj(DQRas}O@Xy}bH&PqIxg)z&xutp_dJ)-
zmw7GwC4<#1nx~woJ8yVxt#dg&GDf>EY!_gLj#4nWd$M+;DRH(<RmWHRyyU{99NPot
zWVvQN2oM6&q3TE_&nGuPj-f#<+nm{vEjr-UhPt&*`Ec-7SqDk1crf=_`W7RTHyVjt
zMjsx63oLqSMBok}U<QUob@^Uu6k&8X`rUU3^vih)uv;*eE0!~2U$^K;Dz6c#AcXPG
zQ;J*HYjIw&DuuKMnhU5ROeJIPU|LqDWY$ZswW3Z%7p6Vmpg>nY;TFohqQtE;<43=G
zMZfilk#A`qKy#t!YThDS5j%>D(y+LbNUNO-GOhB+Mj-T_>Pg>%CeT6kRh@M=&&D3!
zhkRv!?gn=iNs-HxiZ+G^c#?!Lq^20_Ff~&2fcJQ4PT%)YQCzQfumwx^RoG*kxjwWy
z9tGPZ!=JkcH0}%*F_=#fb8~U7N?bQpp<yGo0s5xeP21egaj#Nh=kuqO=wLxMakd!3
z%nj~^>_S)^^jNE#-?0+5W7^#$K#W^nvE8*&Y*Ea9zxj%w=63O*1=)S@A~ZK_;~YEW
zoYCuF(|e-&q5=*);BLOkj?L>ZCB?Alj)6q5<&)09)ju3S5x0t2??qjCUYt)CJHrma
z9m?*?Ma0>)rFb5<M04o_h1eRq<R?alvcXI%E#)$LV*DnHjn5``3rToain(34SM28P
z9X7GvD~D0Od;S9R-l+*%!vyJOe<g5<omj86Fqa<h8L%{%$e+0-$+>bkv?9d`=qk0m
zEACxU<%tshQ*RO^pE}5%*jrHH>V&?nG3!V+;iXx3Rw&)|+f;icygtiOIG=K}CUa$b
zXGga!YdE6rs$}zXbm}j&^S1ZIo3xNb@#A+ubP1%CJUjvRyYC%VENq%bu$x|qHZpWt
zE5=YO8Z3}~HeO(BZhDVF&D-SyfC}N^Bkt$^1D<Mb^Te<TW=eb<R=zc#_qfyP5|)bt
zLS2i|S=H-36q@t!X|u{*RwnxUR~cj8UCt?z0kbU=mn2#5bekmkb9IdH@KHo$u7IVG
zo!i3H3EcD0?VLqLe!Wcig`NOrUo#M@J*@{RWr<@7U|ZQw8jc#G++-5b#Iw-{pUrF=
ze;V~c^+A^Nn5sy!zJ$QyP>w;0-<Y17;9Y3?b*{MUC#Y#dn|od?5+mwV_AFrccr7g)
z^&LOghMg4UrFhx0PLjWlc1tpTPv1qy1eFq}tA6ojPC3Os_HTM@dyA5rBzaSQxx<lW
zJDB?UUCWU5EejiU(52<>WZVQVX{y^k(-FsQDQ>yMxVIHH-xBT>5C-GT0ih<;hq~|#
zM;SdSV(~%Z7FQ;1H|1tY?UlMQQQZdEWRLmlitTOlGQ~gIpKGuneozxO!+h3fcS7T_
zVf*aD7rj5Vye&n%Xa1YHct+0pI+aQT0LtIgnyM6R9*%<duPYsY-PrWur9mn8&@s^d
zl1X3;ARt4GHgIcwL3>-OzDjC>m#LYppn1eGs8f~&J^J3|JSEom@;bsLBHcr^V?|_N
zkdXK_Ip@rkpA4BLS2}Ai49)A9y#K7J`vdiZdEqE<Sb|{+-Sd$3jW^}qAu^11*3sH$
zx<;hA3xVmwA+X8qzNRUxj(X`hpc&^B>~MNc$tuO2jX9K@Zt32v1gUF@Z2vZ=K&f}z
z)b$mp+pzRZXG4iw&A`e;<ZVWk(ft{0YAOCe{)dYb+M)xvAD;rZ8K_h0*gy#-Tg5<|
z-Q_f)M7VBYAgN!P(m>CP%h>b5RjZxlKl9M@%?|s5+9{Knl7TxSme*mC=Q>svXcgb}
z45D`C<HYNBWV%m#4;H`odGuL9(gq3>`^D1*)eMLcRYs>B@Mr9yt)2RK*R6g}R5Y;S
zWk>*V8jY*~x<e>e#~W5YSxlD5ZPU}(&OX~ZJychSVVBqMjq_OlXx)_Dw~Hpywp~EX
z&OgcaPiFTz_-*5Qp9}psnz5#$*Home$J4%h+j!9W2GtU2fyIq_J`wCv0q6&+M;3IE
zACB*&t()qg<xgOyl_&DAyjkwv`lECVn1uIYjzc1-C;ZrU#aa}z3}(rnCt#Z(cr(#=
z>?K1W!G{z>0(NPWn3(9mb}^67TE*(ySuch1vR*PS{J`}%x9;ld&}Wi_mCqDR0$jv|
z#NMNRIkOpe(!sxTar;LDb+jK#zALrZFDsUJf!0W!ClQ#d9^bCoC+^1VL&jAJYcy$2
zS8tB#lnU2ujR*Lv?RaO-cJ3*pu=d;lLuF*#)ghOzOiJ{l+HseWikhzG2qeBaDzqSN
z?*0^5o+1IkB-v=LeTo&<(m+v&g~3r#%u>cSf67f79){g0I_LGnguAN=#ay$c*Zrdg
zY-F*0+reCPuJsxGVNrqD83ZO(0e#hDT=*5e8v%lxn7jUh9AHK`pxx)+u8}{OSr?3i
zZ{B<MU@k7vCXCbY$if1<3T!|5U0|8?+_Mg+J-a7y2yAvAA68il`mfsb-NK@KwPLkL
z_Dd1->st$X(%C>sF`~qF&+87O(D`*!&CaoF<X=&4+J?P36zT5z=8njY@=Ckb;t^)?
z+v!Ca3fXnE?H_@A9ZcVKyB~7D@m7=wa$`>*^D5}v<VoujE0_$8@hd3b=^9(Gt8XwQ
z=E-9@uVXZQUi$#Gv%Ag%OtsjgaP4kC|9{%T_Z%5u^f){7_dL;zJ#%{eeMyRCZz|!E
znvNZ{Xh@irEbr5?65K}tC6X~*awN>6@R@r~KvnIty4%1N4%*WEJy0?f@uIpQ*aG}3
zu>4%lGJd^2s+G4lmMie1Pi9W-4qLD<Pe_)I6UbNl!Hly_&n)=)xKkm|+A)Bc#jH^p
zCi;f?Q1}(Dy@!DZAosDv#?RY`&XvjD;t9!;4}=d&1e3OSHChM|gHLrd<$fubyUuMV
zu!Gx6t~4t(H+48WbV|Nq!0p?-W$S;S&f5a<5QB)f6IOq*oow~v%r2FMJ#ei&q#;MC
zv@g!()tf=YS;XFRhwz$hSo|`KmR~_mT{@g0lsTSTVQlVtGILU+t56`(?ICflR>f3w
z0FnH^)&g3n1oWuD3!#iprlD+xzy^6TusK(J2H3JzKb*DQH}ZZ8-z+|ebv&RB(z6gF
z0kD=0EMM4uM6FG4IRE}~dc%ES%jDVq|G)R|R?j_DBn%vciOt&gp;f=6^smmH`uNw<
z59Xh#1Fn><*kAmr{LJ?B8f(=l5@2&09A-OQUca(T;GM*@*fPuF$n4k+z`+R9S30{N
zN95g1J(~(F$7g)?U7S$70i5~*W&q2C*1(4xV5j>k7`z3RP2c9+eCz!7I`0S9Zn2rV
z+io1P`SaoM+s856N{cIijndh`ntCf|$L-@gfa4vXU#(s*W_54*{5q@8J4AlJ{kZjd
z+-}+3pvjV@)1p=d$Uz$W3`}~A)xLL&PV3&bKa!K{yJy+2YpmvX3Yu@GPLI9w@LTDA
z+rDc%D|UQceB)c{c~{}%7q3sSv;h0bLkXCUgv^>CxiX&fMSg?-tSw=$1vNfrTVDb7
z{O(k}UVFFZo8_F}Z@13}4Vi|AyX<(esQb*rcKLgye?Ruu=k!@T>d342<kH^<te=5Z
z>@L+e8-e2>`<5QR;2YrZz}<k`;Sei0ZB63nxndf+Zh80r&(7kHkM(A+=M0*6(KaY~
z!-222j`bbwQ!b4)nDp3VmJOs4b6^FqgZE>F<y9ULp#B3d6c(5tc>Ry<2dp{6<MF#g
zG<x}&*T(aI_Ur`qPHrm-tNWSs+x^P;9UYX%SFQYN_r!SyiA|ApKcB{aclx{QSip)K
z^>c18=sR=AgF^sVQoo-iwQHJJ@0IMVxz{hBuX$D<^}0#=R(r~I)_$8$9g%rEpFR;N
zzJ0~;#*ux6H}>C|wExN}-E}fG&y#?|fFIYThD3y~G3v_`f)1@Ufch(P8Y>_{x|3y@
zY;_JWJd1BJzW~Pi)$r)Md;auKpR9OWe%dV4?69Cc@7>H(h4=du0^0)@4vB-q>6Ac3
z?N?x)d&+hFO6J<^S2v%oUs)RG4=V7kN&E{;yCHeME>b+iPF(bwxugQ5ieO;k115V}
zg%U`SdXwdg`2qE{rYCKT|IfU|7IOUTy1!vpfin})FK7IE>bCub0x%G~xa^^U@Blb`
zYXlsOiJLh=ao@`8SFEnFd`oy0DejZ~_wbLK;it9~Ffg$3c)B=-I3EjhxfuW0<3Ia9
zrR1ZK%D@ej!QJ2QOn(AO_-vC7e0h2K?91iz@5$W;j_Kq)J3Bl4w#D0i^SrHB!)Df`
z?zhdIulR4Jf34Q!#W~M!-~YGm=ehEI!DegT*59xFKGVPM)8v^qHYJ~65q|+(uFW8%
z2gy#e9AvLwku*-50qp*5EqM!c;<gvSLBF&zU~Bc#*HVX95;=VLErNinW+y#&+Ux*s
zBr$UQ0JbFGffsmU4%~rC6riXSX!Y{3`A%#&$6SC4ksBJ2vIyP0#$&+V$exeOCon?I
z1t$Tt5v+!OVDhljg4Ba(a*#fa!-9{X+VPKO3Io{R*d6si`2gDv@I(}LLx7?gpoJgO
z^<0urbFnHM4W7~9!J0lsQx9mC7i*pYiH_{x`S737Kh?T@P40(@3_#%N>gTe~DWM4f
DoZNh{

literal 0
HcmV?d00001


From cf1c75e4a0106ef0081fb390f867f29a2616a21a Mon Sep 17 00:00:00 2001
From: Naif Tarafdar <135640067+ntarafdar@users.noreply.github.com>
Date: Wed, 13 Nov 2024 18:18:43 -0800
Subject: [PATCH 58/69] #12184: Alignment fix for BH in I2S and S2I

### Ticket
[Link to Github
Issue](https://github.com/tenstorrent/tt-metal/issues/12184#event-15046053642)

### Problem description
Alignment issue for BH when going from DRAM to L1 sharded in i2s and
s2i. BH NoC is 64B aligned, while L1 still 16B.

### What's changed
Added extra logic to handle alignment on i2s side and s2i side.
There is still an alignment problem in BH when going straight from
sharded to host, for that path currently added an intermediate
sharded_to_interleaved. Will address the direct sharded read in
subsequent PR.

### Checklist
- [x] Post commit CI passes
(https://github.com/tenstorrent/tt-metal/actions/runs/11828337532)
- [ ] Blackhole Post commit (if applicable)
- [ ] Model regression CI testing passes (if applicable)
- [ ] Device performance regression CI testing passes (if applicable)
- [ ] New/Existing tests provide coverage for changes
---
 .../unit_testing/misc/test_sharded.py         | 14 ++-
 tests/ttnn/unit_tests/operations/test_core.py | 94 +++++++++++++++++++
 .../unit_tests/operations/test_maxpool2d.py   |  3 +
 ttnn/cpp/ttnn/operations/core/core.cpp        | 37 +++++++-
 .../data_movement/common/kernels/debug.hpp    | 20 ++++
 ...ut_sharded_blocks_interleaved_start_id.cpp | 18 +++-
 ...ut_sharded_blocks_interleaved_start_id.cpp | 12 +++
 .../device/interleaved_to_sharded_op.cpp      | 15 ++-
 ...interleaved_to_sharded_program_factory.cpp | 43 ++++++---
 .../device/sharded_to_interleaved_op.cpp      |  3 +-
 ...sharded_to_interleaved_program_factory.cpp | 13 ++-
 11 files changed, 243 insertions(+), 29 deletions(-)
 create mode 100644 ttnn/cpp/ttnn/operations/data_movement/common/kernels/debug.hpp

diff --git a/tests/tt_eager/python_api_testing/unit_testing/misc/test_sharded.py b/tests/tt_eager/python_api_testing/unit_testing/misc/test_sharded.py
index b3e41058c67..5df2d752340 100644
--- a/tests/tt_eager/python_api_testing/unit_testing/misc/test_sharded.py
+++ b/tests/tt_eager/python_api_testing/unit_testing/misc/test_sharded.py
@@ -101,6 +101,7 @@ def test_sharded_tile(
 
 
 # TODO (7735): Switch to new interleaved_to_sharded with sharded_mem_config input and re-enable BLOCK sharded tests
+@skip_for_blackhole("WIP")
 @pytest.mark.parametrize(
     "input_shape, shard_scheme, shard_size, num_cores",
     [
@@ -180,7 +181,7 @@ def test_sharded_rm(
     assert passing
 
 
-@skip_for_blackhole("Mismatching on BH, see #12349")
+@skip_for_blackhole("BH LLK issue with untilize, #14594")
 @pytest.mark.parametrize("H, num_cores", [[100352, 98], [25088, 98]])
 @pytest.mark.parametrize("in_sharded", [True, False])
 @pytest.mark.parametrize("out_sharded", [True, False])
@@ -256,7 +257,7 @@ def test_sharded_untilize(H, num_cores, in_sharded, out_sharded, dtype, device,
     assert passing
 
 
-@skip_for_blackhole("Mismatching on BH, see #12349")
+@skip_for_blackhole("Mismatching on BH, see #14609")
 @pytest.mark.parametrize("H, num_cores", [[25088, 98]])
 @pytest.mark.parametrize("output_dtype", [ttnn.bfloat16, ttnn.bfloat8_b])
 def test_sharded_tilize(H, num_cores, output_dtype, device, function_level_defaults):
@@ -895,6 +896,7 @@ def test_partial_sharded_op_binary(
     assert passing
 
 
+@pytest.mark.skipif(is_blackhole(), reason="BH ND hang, see issue #14745")
 @pytest.mark.parametrize("in0_sharded", [True, False], ids=["in0_sharded", "in0_unsharded"])
 @pytest.mark.parametrize("in1_sharded", [True, False], ids=["in1_sharded", "in1_unsharded"])
 @pytest.mark.parametrize("out_sharded", [True, False], ids=["out_sharded", "out_unsharded"])
@@ -1335,6 +1337,7 @@ def test_sharded_matmul_2d_transposed(
     assert passing
 
 
+@pytest.mark.skipif(is_blackhole(), reason="BH ND hang, see issue #14745")
 def test_resharded_binary_to_matmul(device, function_level_defaults):
     grid_size_binary = device.compute_with_storage_grid_size()
     num_cores_binary = 98
@@ -1426,6 +1429,7 @@ def test_resharded_binary_to_matmul(device, function_level_defaults):
     assert passing
 
 
+@pytest.mark.skipif(is_blackhole(), reason="BH ND hang, see issue #14745")
 @pytest.mark.parametrize("in_sharded", [True, False], ids=["in0_sharded", "in0_unsharded"])
 @pytest.mark.parametrize("out_sharded", [False], ids=["out_unsharded"])
 @pytest.mark.parametrize("dtype", [ttnn.bfloat16, ttnn.bfloat8_b])
@@ -1501,6 +1505,7 @@ def test_sharded_untilize_padded_shard(in_sharded, out_sharded, dtype, device, f
     assert passing
 
 
+@pytest.mark.skipif(is_blackhole(), reason="BH ND hang, see issue #14745")
 @pytest.mark.parametrize("in_sharded", [True, False], ids=["in0_sharded", "in0_unsharded"])
 @pytest.mark.parametrize("out_sharded", [False], ids=["out_unsharded"])
 @pytest.mark.parametrize("activations_dtype", [ttnn.bfloat16, ttnn.bfloat8_b])
@@ -1691,6 +1696,7 @@ def test_block_sharded_untilize_with_unpadding(in_sharded, out_sharded, dtype, d
         "unbatched_16_shape_out_interleaved",
     ],
 )
+@skip_for_blackhole("BH Issue with untilize LLK, see #14594")
 @pytest.mark.parametrize("dtype", [ttnn.bfloat16, ttnn.bfloat8_b])
 def test_width_sharded_untilize_with_unpadding(
     shape, output_H, in_sharded, out_sharded, dtype, device, function_level_defaults
@@ -1761,7 +1767,7 @@ def test_width_sharded_untilize_with_unpadding(
     assert passing
 
 
-@skip_for_blackhole("Mismatching on BH, see #12349")
+@skip_for_blackhole("BH LLK Issue with tilize, #14609")
 @pytest.mark.parametrize("input_shape", [[8, 1, 49, 2048], [1, 1, 8, 2048], [16, 1, 49, 2048], [1, 1, 16, 2048]])
 @pytest.mark.parametrize("sharding_config", [(True, True), (False, False)], ids=["both_sharded", "both_interleaved"])
 @pytest.mark.parametrize("output_dtype", [ttnn.bfloat16, ttnn.bfloat8_b])
@@ -1833,7 +1839,6 @@ def test_sharded_tilize_with_val_padding(input_shape, sharding_config, output_dt
     assert passing
 
 
-@skip_for_blackhole("Mismatching on BH, see #12349")
 @pytest.mark.parametrize("N", [8, 16])
 @pytest.mark.parametrize("in_sharded", [True], ids=["in0_sharded"])
 @pytest.mark.parametrize("out_sharded", [True], ids=["out_sharded"])
@@ -2064,6 +2069,7 @@ def test_sharded_matmul_1d_in1_wormhole(device, function_level_defaults):
     assert passing
 
 
+@pytest.mark.skipif(is_blackhole(), reason="BH ND hang, see issue #14745")
 @pytest.mark.parametrize("in0_sharded", [True, False], ids=["in0_sharded", "in0_unsharded"])
 @pytest.mark.parametrize("in1_sharded", [True, False], ids=["in1_sharded", "in1_unsharded"])
 @pytest.mark.parametrize("out_sharded", [True, False], ids=["out_sharded", "out_unsharded"])
diff --git a/tests/ttnn/unit_tests/operations/test_core.py b/tests/ttnn/unit_tests/operations/test_core.py
index 23b9d1f8459..c39154379df 100644
--- a/tests/ttnn/unit_tests/operations/test_core.py
+++ b/tests/ttnn/unit_tests/operations/test_core.py
@@ -439,3 +439,97 @@ def test_create_sharded_memory_config(device, shape, strategy, orientation, core
 
     passing = torch.equal(input_data, output_data)
     assert passing
+
+
+@pytest.mark.parametrize(
+    "shape, shard_shape, strategy, orientation, core_grid",
+    [
+        ([1, 1, 2, 16], None, ttnn.ShardStrategy.WIDTH, ttnn.ShardOrientation.ROW_MAJOR, ttnn.CoreGrid(y=1, x=1)),
+        ([1, 1, 2, 16], None, ttnn.ShardStrategy.WIDTH, ttnn.ShardOrientation.ROW_MAJOR, ttnn.CoreGrid(y=2, x=1)),
+        ([1, 1, 32, 16], None, ttnn.ShardStrategy.HEIGHT, ttnn.ShardOrientation.ROW_MAJOR, ttnn.CoreGrid(y=2, x=1)),
+        ([1, 1, 64, 16], None, ttnn.ShardStrategy.HEIGHT, ttnn.ShardOrientation.ROW_MAJOR, ttnn.CoreGrid(y=2, x=1)),
+        (
+            [1, 1, 2, 16],
+            [2, 16],
+            ttnn.ShardStrategy.HEIGHT,
+            ttnn.ShardOrientation.ROW_MAJOR,
+            ttnn.CoreRangeSet(
+                {
+                    ttnn.CoreRange(ttnn.CoreCoord(0, 0), ttnn.CoreCoord(0, 0)),
+                }
+            ),
+        ),
+        (
+            [1, 1, 5280, 16],
+            [5280, 16],
+            ttnn.ShardStrategy.HEIGHT,
+            ttnn.ShardOrientation.ROW_MAJOR,
+            ttnn.CoreRangeSet(
+                {
+                    ttnn.CoreRange(ttnn.CoreCoord(0, 0), ttnn.CoreCoord(0, 0)),
+                }
+            ),
+        ),
+        # TODO: Add this test back by checking for core grid size and skipping if we can't do it
+        #        (
+        #            [1, 1, 675840, 16],
+        #            [5280, 16],
+        #            ttnn.ShardStrategy.HEIGHT,
+        #            ttnn.ShardOrientation.ROW_MAJOR,
+        #            ttnn.CoreRangeSet(
+        #                {
+        #                    ttnn.CoreRange(ttnn.CoreCoord(0, 0), ttnn.CoreCoord(11, 9)),  # 120
+        #                    ttnn.CoreRange(ttnn.CoreCoord(12, 0), ttnn.CoreCoord(12, 7)),  # 8
+        #                }
+        #            ),
+        #        ),
+    ],
+)
+@pytest.mark.parametrize(
+    "input_buffer_type",
+    [
+        ttnn.L1_MEMORY_CONFIG,
+        ttnn.DRAM_MEMORY_CONFIG,
+    ],
+)
+@pytest.mark.parametrize(
+    "output_buffer_type",
+    [
+        ttnn.L1_MEMORY_CONFIG,
+        ttnn.DRAM_MEMORY_CONFIG,
+    ],
+)
+def test_bh_alignment_i2s(
+    device, shape, shard_shape, strategy, orientation, core_grid, input_buffer_type, output_buffer_type
+):
+    torch.manual_seed(0)
+    input_data = torch.randn(shape, dtype=torch.bfloat16)
+    if shard_shape == None:
+        shard_config = ttnn.create_sharded_memory_config(
+            shape=shape,
+            core_grid=core_grid,
+            strategy=strategy,
+            orientation=orientation,
+            use_height_and_width_as_shard_shape=False,
+        )
+    else:
+        shard_config = ttnn.create_sharded_memory_config(
+            shape=shard_shape,
+            core_grid=core_grid,
+            strategy=strategy,
+            orientation=orientation,
+            use_height_and_width_as_shard_shape=True,
+        )
+    x_t = ttnn.from_torch(
+        input_data,
+        device=device,
+        layout=ttnn.ROW_MAJOR_LAYOUT,
+        memory_config=input_buffer_type,
+        dtype=ttnn.bfloat16,
+    )
+    x_t_sharded = ttnn.to_memory_config(x_t, shard_config)
+    x_t = ttnn.to_memory_config(x_t_sharded, output_buffer_type)
+    output_data = ttnn.from_device(x_t)
+    output_data = ttnn.to_torch(output_data)
+    passing = torch.equal(input_data, output_data)
+    assert passing
diff --git a/tests/ttnn/unit_tests/operations/test_maxpool2d.py b/tests/ttnn/unit_tests/operations/test_maxpool2d.py
index 43fa209acb0..04903485f40 100644
--- a/tests/ttnn/unit_tests/operations/test_maxpool2d.py
+++ b/tests/ttnn/unit_tests/operations/test_maxpool2d.py
@@ -183,6 +183,9 @@ def run_max_pool(
     output_host = output.cpu()
     output_pytorch_padded = torch.Tensor(ttnn.to_torch(output_host))
     output_pytorch = output_pytorch_padded[:, :, :, :in_c]
+    torch.set_printoptions(profile="full")
+    print("output_pytorch" + str(output_pytorch))
+    torch.set_printoptions(profile="default")  # reset
 
     ## reference
     golden_pytorch = torch.nn.MaxPool2d(
diff --git a/ttnn/cpp/ttnn/operations/core/core.cpp b/ttnn/cpp/ttnn/operations/core/core.cpp
index dba2edf328b..b61567ab540 100644
--- a/ttnn/cpp/ttnn/operations/core/core.cpp
+++ b/ttnn/cpp/ttnn/operations/core/core.cpp
@@ -11,6 +11,8 @@
 #include "ttnn/cpp/ttnn/operations/data_movement/reshape_view/reshape.hpp"
 #include "ttnn/operations/data_movement/data_transfer/data_transfer.hpp"
 #include "ttnn/distributed/types.hpp"
+#include "ttnn/operations/data_movement/sharded/sharded_to_interleaved/sharded_to_interleaved.hpp"
+#include "ttnn/operations/data_movement/sharded/interleaved_to_sharded/interleaved_to_sharded.hpp"
 
 namespace ttnn::operations::core {
 
@@ -54,12 +56,29 @@ ttnn::Tensor squeeze_from_4D(const ttnn::Tensor& tensor, const int rank) {
 }
 
 ttnn::Tensor to_device(const ttnn::Tensor& tensor, Device* device, const std::optional<MemoryConfig>& memory_config) {
-    return tensor.to(device, memory_config.value_or(ttnn::DRAM_MEMORY_CONFIG));
+    auto mem_config = memory_config.value_or(ttnn::DRAM_MEMORY_CONFIG);
+    if(mem_config.is_sharded ()  and (device->arch() == tt::ARCH::BLACKHOLE)) {
+        auto interleaved_tensor =  tensor.to(device, ttnn::DRAM_MEMORY_CONFIG);
+        return ttnn::interleaved_to_sharded(ttnn::DefaultQueueId, interleaved_tensor, mem_config, std::nullopt);
+    }
+    else {
+        return tensor.to(device, memory_config.value_or(ttnn::DRAM_MEMORY_CONFIG));
+    }
 }
 
 ttnn::Tensor to_device(
     const ttnn::Tensor& tensor, MeshDevice* mesh_device, const std::optional<MemoryConfig>& memory_config) {
-    return tensor.to(mesh_device, memory_config.value_or(ttnn::DRAM_MEMORY_CONFIG));
+
+    auto mem_config = memory_config.value_or(ttnn::DRAM_MEMORY_CONFIG);
+    // Currently no direct sharded write support in BLACKHOLE due to alignment issue
+    if(mem_config.is_sharded ()  and (mesh_device->arch() == tt::ARCH::BLACKHOLE)) {
+        auto interleaved_tensor =  tensor.to(mesh_device, ttnn::DRAM_MEMORY_CONFIG);
+        return ttnn::interleaved_to_sharded(ttnn::DefaultQueueId, interleaved_tensor, mem_config, std::nullopt);
+    }
+    else {
+        return tensor.to(mesh_device, mem_config);
+    }
+
 }
 
 ttnn::Tensor allocate_tensor_on_device(
@@ -86,7 +105,19 @@ void copy_host_to_device_tensor(ttnn::Tensor host_tensor, ttnn::Tensor device_te
     tt::tt_metal::write_tensor(host_tensor, device_tensor, cq_id);
 }
 
-ttnn::Tensor from_device(const ttnn::Tensor& tensor, bool blocking, uint8_t cq_id) { return tensor.cpu(blocking, cq_id); }
+ttnn::Tensor from_device(const ttnn::Tensor& tensor, bool blocking, uint8_t cq_id) {
+
+    // Currently no direct sharded read support in BLACKHOLE due to alignment issue
+    if(tensor.is_sharded ()  and (tensor.device()->arch() == tt::ARCH::BLACKHOLE)) {
+        auto interleaved_tensor = ttnn::sharded_to_interleaved(cq_id, tensor, ttnn::DRAM_MEMORY_CONFIG, std::nullopt);
+        return interleaved_tensor.cpu(blocking, cq_id);
+    }
+    else {
+        return tensor.cpu(blocking, cq_id);
+
+    }
+
+}
 
 void deallocate(Tensor& tensor, bool force) { tensor.deallocate(force); }
 
diff --git a/ttnn/cpp/ttnn/operations/data_movement/common/kernels/debug.hpp b/ttnn/cpp/ttnn/operations/data_movement/common/kernels/debug.hpp
new file mode 100644
index 00000000000..25c95ab1888
--- /dev/null
+++ b/ttnn/cpp/ttnn/operations/data_movement/common/kernels/debug.hpp
@@ -0,0 +1,20 @@
+// SPDX-FileCopyrightText: © 2024 Tenstorrent Inc.
+//
+// SPDX-License-Identifier: Apache-2.0
+
+// This file contains common kernel functions used for debugging
+#pragma once
+#include "debug/dprint.h"
+namespace tt::data_movement::common {
+
+inline void print_pages(uint32_t l1_addr, uint32_t pagelen, uint32_t npages, uint32_t start = 0) {
+    volatile tt_l1_ptr uint16_t* ptr = reinterpret_cast<volatile tt_l1_ptr uint16_t*>(l1_addr) + start * pagelen;
+    for (uint32_t page = 0; page < npages; ++ page) {
+        DPRINT << start + page << ": ";
+        for (uint32_t j = 0; j < pagelen; ++ j, ++ ptr) {
+            DPRINT << BF16(*ptr) << " ";
+        }
+        DPRINT << ENDL();
+    }
+}
+}
diff --git a/ttnn/cpp/ttnn/operations/data_movement/sharded/device/kernels/dataflow/reader_unary_stick_layout_sharded_blocks_interleaved_start_id.cpp b/ttnn/cpp/ttnn/operations/data_movement/sharded/device/kernels/dataflow/reader_unary_stick_layout_sharded_blocks_interleaved_start_id.cpp
index c132e643ad5..16b8820e61a 100644
--- a/ttnn/cpp/ttnn/operations/data_movement/sharded/device/kernels/dataflow/reader_unary_stick_layout_sharded_blocks_interleaved_start_id.cpp
+++ b/ttnn/cpp/ttnn/operations/data_movement/sharded/device/kernels/dataflow/reader_unary_stick_layout_sharded_blocks_interleaved_start_id.cpp
@@ -5,6 +5,12 @@
 #include <stdint.h>
 #include "dataflow_api.h"
 
+//#define DEBUG
+
+#ifdef DEBUG
+#include "ttnn/cpp/ttnn/operations/data_movement/common/kernels/debug.hpp"
+#endif
+
 void kernel_main() {
 
     const uint32_t src_addr                 = get_arg_val<uint32_t>(0);
@@ -38,15 +44,20 @@ void kernel_main() {
     uint32_t stick_id = start_id;
     cb_reserve_back(cb_id_in0, block_height);
     uint32_t l1_write_addr = get_write_ptr(cb_id_in0);
+    uint32_t l1_write_addr_base = l1_write_addr;
     if (aligned) {
         for (uint32_t h = 0; h < block_height; ++h) {
             uint64_t src_noc_addr = get_noc_addr(stick_id, s0);
             noc_async_read(src_noc_addr, l1_write_addr, block_width_bytes);
             stick_id++;
+#ifdef DEBUG
+            noc_async_read_barrier();
+            tt::data_movement::common::print_pages(l1_write_addr, block_width_bytes >> 1, 1);
+#endif
             l1_write_addr += padded_block_width_bytes;
         }
     } else {
-        cb_reserve_back(cb_id_in1, 1);
+        cb_reserve_back(cb_id_in1, 4);
         uint32_t scratch_l1_write_addr = get_write_ptr(cb_id_in1);
         uint64_t scratch_l1_noc_read_addr = get_noc_addr(scratch_l1_write_addr + aligned_offset);
         for (uint32_t h = 0; h < block_height; ++h) {
@@ -54,10 +65,15 @@ void kernel_main() {
             noc_async_read(src_noc_addr, scratch_l1_write_addr, aligned_block_width_bytes);
             noc_async_read_barrier();
             noc_async_read(scratch_l1_noc_read_addr, l1_write_addr, block_width_bytes);
+#ifdef DEBUG
+            noc_async_read_barrier();
+            tt::data_movement::common::print_pages(l1_write_addr, block_width_bytes >> 1, 1);
+#endif
             stick_id++;
             l1_write_addr += padded_block_width_bytes;
         }
     }
+
     noc_async_read_barrier();
     cb_push_back(cb_id_in0, block_height);
 }
diff --git a/ttnn/cpp/ttnn/operations/data_movement/sharded/device/kernels/dataflow/writer_unary_stick_layout_sharded_blocks_interleaved_start_id.cpp b/ttnn/cpp/ttnn/operations/data_movement/sharded/device/kernels/dataflow/writer_unary_stick_layout_sharded_blocks_interleaved_start_id.cpp
index aed1d42e19f..03820991b77 100644
--- a/ttnn/cpp/ttnn/operations/data_movement/sharded/device/kernels/dataflow/writer_unary_stick_layout_sharded_blocks_interleaved_start_id.cpp
+++ b/ttnn/cpp/ttnn/operations/data_movement/sharded/device/kernels/dataflow/writer_unary_stick_layout_sharded_blocks_interleaved_start_id.cpp
@@ -5,6 +5,12 @@
 #include <stdint.h>
 #include "dataflow_api.h"
 
+//#define DEBUG
+
+#ifdef DEBUG
+#include "ttnn/cpp/ttnn/operations/data_movement/common/kernels/debug.hpp"
+#endif
+
 void kernel_main() {
 
     const uint32_t dst_addr                 = get_arg_val<uint32_t>(0);
@@ -34,9 +40,15 @@ void kernel_main() {
     uint32_t stick_id = start_id;
     cb_wait_front(cb_id_out0, block_height);
     uint32_t l1_read_addr = get_read_ptr(cb_id_out0);
+
+
     for (uint32_t h = 0; h < block_height; ++h) {
         uint64_t dst_noc_addr = get_noc_addr(stick_id, s0);
         noc_async_write(l1_read_addr, dst_noc_addr, block_width_bytes);
+#ifdef DEBUG
+        noc_async_read_barrier();
+        tt::data_movement::common::print_pages(l1_read_addr, block_width_bytes >> 1, 1);
+#endif
         stick_id++;
         l1_read_addr += padded_block_width_bytes;
         noc_async_write_barrier();
diff --git a/ttnn/cpp/ttnn/operations/data_movement/sharded/interleaved_to_sharded/device/interleaved_to_sharded_op.cpp b/ttnn/cpp/ttnn/operations/data_movement/sharded/interleaved_to_sharded/device/interleaved_to_sharded_op.cpp
index b899760c02a..2bbcb4f4574 100644
--- a/ttnn/cpp/ttnn/operations/data_movement/sharded/interleaved_to_sharded/device/interleaved_to_sharded_op.cpp
+++ b/ttnn/cpp/ttnn/operations/data_movement/sharded/interleaved_to_sharded/device/interleaved_to_sharded_op.cpp
@@ -36,8 +36,19 @@ std::vector<tt::tt_metal::LegacyShape> InterleavedToShardedDeviceOperation::comp
 
 std::vector<Tensor> InterleavedToShardedDeviceOperation::create_output_tensors(const std::vector<Tensor> &input_tensors) const {
     const auto& input_tensor = input_tensors.at(0);
-    return operation::generic_create_output_tensors(
-        *this, input_tensors, this->output_dtype, input_tensor.get_layout(), this->output_mem_config);
+    //return operation::generic_create_output_tensors(
+    //    *this, input_tensors, this->output_dtype, input_tensor.get_layout(), this->output_mem_config);
+
+
+    auto mem_config = this->output_mem_config;
+
+    return {create_device_tensor(
+        this->compute_output_shapes(input_tensors).at(0),
+        input_tensor.get_dtype(),
+        input_tensor.get_layout(),
+        input_tensor.device(),
+        mem_config
+        )};
 }
 
 operation::ProgramWithCallbacks InterleavedToShardedDeviceOperation::create_program(const std::vector<Tensor>& input_tensors, std::vector<Tensor> &output_tensors) const {
diff --git a/ttnn/cpp/ttnn/operations/data_movement/sharded/interleaved_to_sharded/device/interleaved_to_sharded_program_factory.cpp b/ttnn/cpp/ttnn/operations/data_movement/sharded/interleaved_to_sharded/device/interleaved_to_sharded_program_factory.cpp
index d41cadcf1d1..e51e67fc92a 100644
--- a/ttnn/cpp/ttnn/operations/data_movement/sharded/interleaved_to_sharded/device/interleaved_to_sharded_program_factory.cpp
+++ b/ttnn/cpp/ttnn/operations/data_movement/sharded/interleaved_to_sharded/device/interleaved_to_sharded_program_factory.cpp
@@ -32,6 +32,14 @@ operation::ProgramWithCallbacks interleaved_to_sharded_multi_core(
     bool rm_orientation = shard_spec.orientation == ShardOrientation::ROW_MAJOR;
 
     CoreCoord end_core = (*shard_spec.grid.ranges().rbegin()).end_coord;
+
+    bool convert_df = input_cb_data_format != output_cb_data_format;
+    auto src_buffer = input.buffer();
+    auto dst_buffer = output.buffer();
+    bool src_is_dram = src_buffer->buffer_type() == tt::tt_metal::BufferType::DRAM ? 1 : 0;
+    bool is_blackhole = (input.device()->arch() == tt::ARCH::BLACKHOLE);
+    bool is_blackhole_and_dram = (input.device()->arch() == tt::ARCH::BLACKHOLE) and src_is_dram;
+
     if (input.get_layout() == Layout::TILE) {
         num_units = input.volume() / TILE_HW;
         input_unit_size = tt::tt_metal::detail::TileSize(input_cb_data_format);
@@ -66,13 +74,6 @@ operation::ProgramWithCallbacks interleaved_to_sharded_multi_core(
         padded_offset_bytes = align(input_unit_size, input.buffer()->alignment());
     }
 
-    bool convert_df = input_cb_data_format != output_cb_data_format;
-
-    auto src_buffer = input.buffer();
-
-    auto dst_buffer = output.buffer();
-
-    bool src_is_dram = src_buffer->buffer_type() == tt::tt_metal::BufferType::DRAM ? 1 : 0;
 
     auto all_cores = shard_spec.grid;
     uint32_t input_cb_index = tt::CB::c_in0;
@@ -94,10 +95,17 @@ operation::ProgramWithCallbacks interleaved_to_sharded_multi_core(
             .set_globally_allocated_address(*output.buffer());
     auto cb_output = tt::tt_metal::CreateCircularBuffer(program, all_cores, output_cb_out_config);
     uint32_t dram_alignment = hal.get_alignment(HalMemType::DRAM);
-    if (src_is_dram && input_unit_size % dram_alignment != 0) {
-        uint32_t scratch_cb_page_size = align(input_unit_size, dram_alignment);
+    if (src_is_dram && input_unit_size % dram_alignment != 0 or is_blackhole_and_dram) {
+        uint32_t scratch_cb_page_size;
+        //scratchpad going to be used to align DRAM (64B) to L1 (16B)
+        if (is_blackhole_and_dram) {
+            scratch_cb_page_size = align(input_unit_size, hal.get_alignment(HalMemType::L1));
+        }
+        else {
+            scratch_cb_page_size = align(input_unit_size, dram_alignment);
+        }
         tt::tt_metal::CircularBufferConfig scratch_cb_out_config =
-            tt::tt_metal::CircularBufferConfig(1 * scratch_cb_page_size, {{scratch_cb_index, input_cb_data_format}})
+            tt::tt_metal::CircularBufferConfig(4 * scratch_cb_page_size, {{scratch_cb_index, input_cb_data_format}})
                 .set_page_size(scratch_cb_index, scratch_cb_page_size);
         auto cb_scratch = tt::tt_metal::CreateCircularBuffer(program, all_cores, scratch_cb_out_config);
     }
@@ -236,10 +244,17 @@ operation::ProgramWithCallbacks interleaved_to_sharded_multi_core(
             }
 
             uint32_t dram_alignment = hal.get_alignment(HalMemType::DRAM);
-            bool aligned = src_is_dram ? curr_idx_w % dram_alignment == 0 : true;
+            uint32_t l1_alignment = hal.get_alignment(HalMemType::L1);
+            bool aligned = (src_is_dram ? curr_idx_w % dram_alignment == 0 : true);
+            aligned = aligned and !(is_blackhole_and_dram);
             uint32_t aligned_width_offset, aligned_shard_width, aligned_offset;
             if (!aligned) {
-                aligned_width_offset = tt::round_down(curr_idx_w, dram_alignment);
+                if(src_is_dram) {
+                    aligned_width_offset = tt::round_down(curr_idx_w, dram_alignment);
+                }
+                else {
+                    aligned_width_offset = tt::round_down(curr_idx_w, l1_alignment);
+                }
                 aligned_offset = curr_idx_w - aligned_width_offset;
                 aligned_shard_width = aligned_offset + shard_width;
             } else {
@@ -256,7 +271,7 @@ operation::ProgramWithCallbacks interleaved_to_sharded_multi_core(
                  num_units_per_row,
                  shard_height,
                  shard_width,
-                 padded_offset_bytes,
+                 (is_blackhole) ? shard_width : padded_offset_bytes,
                  static_cast<uint32_t>(aligned),
                  aligned_width_offset,
                  aligned_shard_width,
@@ -305,6 +320,4 @@ operation::ProgramWithCallbacks interleaved_to_sharded_multi_core(
     return {.program = std::move(program), .override_runtime_arguments_callback = override_runtime_arguments_callback};
 }
 
-
-
 }
diff --git a/ttnn/cpp/ttnn/operations/data_movement/sharded/sharded_to_interleaved/device/sharded_to_interleaved_op.cpp b/ttnn/cpp/ttnn/operations/data_movement/sharded/sharded_to_interleaved/device/sharded_to_interleaved_op.cpp
index 55b32e3c00a..f736258f7d6 100644
--- a/ttnn/cpp/ttnn/operations/data_movement/sharded/sharded_to_interleaved/device/sharded_to_interleaved_op.cpp
+++ b/ttnn/cpp/ttnn/operations/data_movement/sharded/sharded_to_interleaved/device/sharded_to_interleaved_op.cpp
@@ -20,9 +20,8 @@ void ShardedToInterleavedDeviceOperation::validate(const std::vector<Tensor>& in
     TT_FATAL(input_tensor.memory_config().buffer_type == BufferType::L1, "Input tensor must be in L1");
     TT_FATAL(this->output_mem_config.memory_layout == TensorMemoryLayout::INTERLEAVED, "Output memory config must be Interleaved");
     if (input_tensor.get_layout() == Layout::ROW_MAJOR) {
-        uint32_t dram_alignment = hal.get_alignment(HalMemType::DRAM);
         uint32_t l1_alignment = hal.get_alignment(HalMemType::L1);
-        TT_FATAL((*input_tensor.memory_config().shard_spec).shape[1] * input_tensor.element_size() % (this->output_mem_config.buffer_type == BufferType::DRAM ? dram_alignment : l1_alignment) == 0, "Shard page size must be aligned to {}B for L1 Tensor, or {}B for DRAM tensor", l1_alignment, dram_alignment);
+        TT_FATAL((*input_tensor.memory_config().shard_spec).shape[1] * input_tensor.element_size() % (l1_alignment) == 0, "Shard page size must be aligned to {}B for L1 Tensor", l1_alignment);
     }
     if (input_tensor.get_dtype() != this->output_dtype) {
         TT_FATAL(input_tensor.get_layout() == Layout::TILE, "If diff output type, tensor must be TILED");
diff --git a/ttnn/cpp/ttnn/operations/data_movement/sharded/sharded_to_interleaved/device/sharded_to_interleaved_program_factory.cpp b/ttnn/cpp/ttnn/operations/data_movement/sharded/sharded_to_interleaved/device/sharded_to_interleaved_program_factory.cpp
index 6d585e65a13..2cb58883bf1 100644
--- a/ttnn/cpp/ttnn/operations/data_movement/sharded/sharded_to_interleaved/device/sharded_to_interleaved_program_factory.cpp
+++ b/ttnn/cpp/ttnn/operations/data_movement/sharded/sharded_to_interleaved/device/sharded_to_interleaved_program_factory.cpp
@@ -98,6 +98,7 @@ operation::ProgramWithCallbacks sharded_to_interleaved_multi_core(
         tt_metal::ReaderDataMovementConfig(reader_compile_time_args));
 
     bool dst_is_dram = dst_buffer->buffer_type() == tt_metal::BufferType::DRAM ? 1 : 0;
+    bool is_blackhole = (input.device()->arch() == tt::ARCH::BLACKHOLE);
 
     tt_metal::KernelHandle unary_writer_kernel_id;
     if (input.get_layout() == Layout::TILE) {
@@ -141,7 +142,8 @@ operation::ProgramWithCallbacks sharded_to_interleaved_multi_core(
     uint32_t curr_idx_w = 0;
 
     const auto cores = corerange_to_cores(all_cores, std::nullopt, rm_orientation);
-    uint32_t padded_shard_width = align(output_unit_size, dst_buffer->alignment());
+    uint32_t padded_offset_bytes;
+
     for (const auto& core : cores) {
         if (input.get_layout() == Layout::TILE) {
             uint32_t shard_height = num_units_per_shard_height;
@@ -217,6 +219,13 @@ operation::ProgramWithCallbacks sharded_to_interleaved_multi_core(
                     }
                 }
             }
+            uint32_t dram_alignment = hal.get_alignment(HalMemType::DRAM);
+            uint32_t l1_alignment = hal.get_alignment(HalMemType::L1);
+            uint32_t padded_shard_width = align(output_unit_size, dst_buffer->alignment());
+            if(is_blackhole) {
+                if(!dst_is_dram)
+                    padded_shard_width = align(output_unit_size, l1_alignment);
+            }
             tt_metal::SetRuntimeArgs(
                 program,
                 unary_writer_kernel_id,
@@ -225,7 +234,7 @@ operation::ProgramWithCallbacks sharded_to_interleaved_multi_core(
                  num_units_per_row,
                  shard_height,
                  shard_width,
-                 padded_shard_width,
+                 (is_blackhole) ? shard_width : padded_shard_width,
                  curr_idx_w,
                  curr_idx_h});
             curr_idx_w += output_unit_size;

From 462c8f61dc805e46858d33e9a412955ad6c0850f Mon Sep 17 00:00:00 2001
From: Mohamed Bahnas <116673264+mbahnasTT@users.noreply.github.com>
Date: Thu, 14 Nov 2024 05:06:52 +0200
Subject: [PATCH 59/69] [skip ci] Update README.md (MM FLOPS) (#15029)

### Ticket
Link to Github Issue

### Problem description
Provide context for the problem.

### What's changed
Describe the approach used to solve the problem.
Summarize the changes made and its impact.

### Checklist
- [ ] Post commit CI passes
- [ ] Blackhole Post commit (if applicable)
- [ ] Model regression CI testing passes (if applicable)
- [ ] Device performance regression CI testing passes (if applicable)
- [ ] New/Existing tests provide coverage for changes
---
 README.md | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index 916150e52d5..39996f902d9 100644
--- a/README.md
+++ b/README.md
@@ -50,8 +50,8 @@
 | [ResNet-50 (224x224) (DP=8)](./models/demos/t3000/resnet50)                 | 128   | [QuietBox](https://tenstorrent.com/hardware/tt-quietbox) | 32,250  | 56,000     |             |
 | [ResNet-50 (224x224) (DP=32)](./models/demos/tg/resnet50)                   | 512   | [Galaxy](https://tenstorrent.com/hardware/galaxy)        | 95,900  | 224,000    |             |
 | [ResNet-50 (224x224) (DP=64)](./models/demos/tgg/resnet50)                  | 1024  | [Two Galaxies](https://tenstorrent.com/hardware/galaxy)  | 145,000 | 448,000    |             |
-| [ViT](./models/demos/grayskull/vit)                                         | 9     | [e150](https://tenstorrent.com/hardware/grayskull)       | 1,360   | 2,000      |             |
-| [ViT](./models/demos/wormhole/vit)                                          | 8     | [n150](https://tenstorrent.com/hardware/wormhole)        | 912     | 1,600      |             |
+| [ViT (224x224)](./models/demos/grayskull/vit)                                         | 9     | [e150](https://tenstorrent.com/hardware/grayskull)       | 1,360   | 2,000      |             |
+| [ViT (224x224)](./models/demos/wormhole/vit)                                          | 8     | [n150](https://tenstorrent.com/hardware/wormhole)        | 912     | 1,600      |             |
 | [Stable Diffusion 1.4 (512x512)](./models/demos/wormhole/stable_diffusion)  | 1     | [n150](https://tenstorrent.com/hardware/wormhole)        | 0.167   | 0.3        |             |
 | [Yolo V4 (320x320)](./models/experimental/yolov4)                           | 1     | [n150](https://tenstorrent.com/hardware/wormhole)        | 95      | 300        |             |
 
@@ -73,6 +73,10 @@ For the latest model updates and features, please see [MODEL_UPDATES.md](models/
 - [ViT Implementation in TT-NN on GS](./tech_reports/ViT-TTNN/vit.md)  (updated Sept 22nd)
 - [LLMs Bring up in TT-NN](./tech_reports/LLMs/llms.md)  (updated Oct 29th)
 - [YOLOv4 Implementation in TT-NN on WH](./tech_reports/YoloV4-TTNN/yolov4.md)  (updated November 8th)
+
+## Benchmarks
+- [Matrix Multiply FLOPS on WH](./tech_reports/GEMM_FLOPS/GEMM_FLOPS.md)  (updated November 13th)
+  
 ---
 
 <div align="center">

From 375f35c38fe143d5636ac03650220a989ad61d40 Mon Sep 17 00:00:00 2001
From: Austin Ho <aho@tenstorrent.com>
Date: Wed, 6 Nov 2024 16:18:21 +0000
Subject: [PATCH 60/69] #0: Split memcpy_to_device related functionality into
 new memcpy.hpp file

---
 .../impl/dispatch/command_queue_interface.hpp | 65 ++--------------
 tt_metal/impl/dispatch/device_command.hpp     |  7 +-
 tt_metal/impl/dispatch/memcpy.hpp             | 78 +++++++++++++++++++
 3 files changed, 87 insertions(+), 63 deletions(-)
 create mode 100644 tt_metal/impl/dispatch/memcpy.hpp

diff --git a/tt_metal/impl/dispatch/command_queue_interface.hpp b/tt_metal/impl/dispatch/command_queue_interface.hpp
index 85c756c2efc..346cbcbc784 100644
--- a/tt_metal/impl/dispatch/command_queue_interface.hpp
+++ b/tt_metal/impl/dispatch/command_queue_interface.hpp
@@ -10,18 +10,19 @@
 #include "tt_metal/common/math.hpp"
 #include "tt_metal/impl/dispatch/cq_commands.hpp"
 #include "tt_metal/impl/dispatch/dispatch_core_manager.hpp"
+#include "tt_metal/impl/dispatch/memcpy.hpp"
 #include "tt_metal/llrt/hal.hpp"
 #include "tt_metal/llrt/llrt.hpp"
 
 using namespace tt::tt_metal;
 
+namespace tt::tt_metal {
+
 // todo consider moving these to dispatch_addr_map
 static constexpr uint32_t MAX_HUGEPAGE_SIZE = 1 << 30; // 1GB;
 static constexpr uint32_t MAX_DEV_CHANNEL_SIZE = 1 << 28; // 256 MB;
 static constexpr uint32_t DEVICES_PER_UMD_CHANNEL = MAX_HUGEPAGE_SIZE / MAX_DEV_CHANNEL_SIZE; // 256 MB;
 
-static constexpr uint32_t MEMCPY_ALIGNMENT = sizeof(__m128i);
-
 enum class CommandQueueDeviceAddrType : uint8_t {
     PREFETCH_Q_RD = 0,
     // Used to notify host of how far device has gotten, doesn't need L1 alignment because it's only written locally by
@@ -308,64 +309,6 @@ inline uint32_t get_cq_completion_rd_ptr(chip_id_t chip_id, uint8_t cq_id, uint3
     return recv;
 }
 
-// Ideally would work by cachelines, but the min size is less than that
-// TODO: Revisit this w/ regard to possibly eliminating min sizes and orphan writes at the end
-// TODO: ditto alignment isues
-template <bool debug_sync = false>
-static inline void memcpy_to_device(void *__restrict dst, const void *__restrict src, size_t n) {
-    TT_ASSERT((uintptr_t)dst % MEMCPY_ALIGNMENT == 0);
-    TT_ASSERT(n % sizeof(uint32_t) == 0);
-
-    static constexpr uint32_t inner_loop = 8;
-    static constexpr uint32_t inner_blk_size = inner_loop * sizeof(__m256i);
-
-    uint8_t *src8 = (uint8_t *)src;
-    uint8_t *dst8 = (uint8_t *)dst;
-
-    if (size_t num_lines = n / inner_blk_size) {
-        for (size_t i = 0; i < num_lines; ++i) {
-            for (size_t j = 0; j < inner_loop; ++j) {
-                __m256i blk = _mm256_loadu_si256((const __m256i *)src8);
-                _mm256_stream_si256((__m256i *)dst8, blk);
-                src8 += sizeof(__m256i);
-                dst8 += sizeof(__m256i);
-            }
-            n -= inner_blk_size;
-        }
-    }
-
-    if (n > 0) {
-        if (size_t num_lines = n / sizeof(__m256i)) {
-            for (size_t i = 0; i < num_lines; ++i) {
-                __m256i blk = _mm256_loadu_si256((const __m256i *)src8);
-                _mm256_stream_si256((__m256i *)dst8, blk);
-                src8 += sizeof(__m256i);
-                dst8 += sizeof(__m256i);
-            }
-            n -= num_lines * sizeof(__m256i);
-        }
-        if (size_t num_lines = n / sizeof(__m128i)) {
-            for (size_t i = 0; i < num_lines; ++i) {
-                __m128i blk = _mm_loadu_si128((const __m128i *)src8);
-                _mm_stream_si128((__m128i *)dst8, blk);
-                src8 += sizeof(__m128i);
-                dst8 += sizeof(__m128i);
-            }
-            n -= n / sizeof(__m128i) * sizeof(__m128i);
-        }
-        if (n > 0) {
-            for (size_t i = 0; i < n / sizeof(int32_t); ++i) {
-                _mm_stream_si32((int32_t *)dst8, *(int32_t *)src8);
-                src8 += sizeof(int32_t);
-                dst8 += sizeof(int32_t);
-            }
-        }
-    }
-    if constexpr (debug_sync) {
-        tt_driver_atomics::sfence();
-    }
-}
-
 struct SystemMemoryCQInterface {
     // CQ is split into issue and completion regions
     // Host writes commands and data for H2D transfers in the issue region, device reads from the issue region
@@ -869,3 +812,5 @@ struct LaunchMessageRingBufferState {
     uint32_t multicast_cores_launch_message_wptr = 0;
     uint32_t unicast_cores_launch_message_wptr = 0;
 };
+
+}  // namespace tt::tt_metal
diff --git a/tt_metal/impl/dispatch/device_command.hpp b/tt_metal/impl/dispatch/device_command.hpp
index 12f4caf3af5..2e0decaae05 100644
--- a/tt_metal/impl/dispatch/device_command.hpp
+++ b/tt_metal/impl/dispatch/device_command.hpp
@@ -11,12 +11,11 @@
 #include "common/env_lib.hpp"
 #include "tt_metal/impl/dispatch/command_queue_interface.hpp"
 #include "tt_metal/impl/dispatch/cq_commands.hpp"
+#include "tt_metal/impl/dispatch/memcpy.hpp"
 #include "tt_metal/tt_stl/aligned_allocator.hpp"
 #include "tt_metal/llrt/hal.hpp"
 
-template <typename T>
-using vector_memcpy_aligned = std::vector<T, tt::stl::aligned_allocator<T, MEMCPY_ALIGNMENT>>;
-
+namespace tt::tt_metal {
 template <bool hugepage_write = false>
 class DeviceCommand {
    public:
@@ -762,3 +761,5 @@ bool DeviceCommand<hugepage_write>::zero_init_enable = tt::parse_env<bool>("TT_M
 
 using HugepageDeviceCommand = DeviceCommand<true>;
 using HostMemDeviceCommand = DeviceCommand<false>;
+
+}  // namespace tt::tt_metal
diff --git a/tt_metal/impl/dispatch/memcpy.hpp b/tt_metal/impl/dispatch/memcpy.hpp
new file mode 100644
index 00000000000..c8e5c730532
--- /dev/null
+++ b/tt_metal/impl/dispatch/memcpy.hpp
@@ -0,0 +1,78 @@
+// SPDX-FileCopyrightText: © 2024 Tenstorrent Inc.
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#pragma once
+
+#include <cstdint>
+#include <emmintrin.h>
+#include "tt_metal/common/assert.hpp"
+#include "tt_metal/tt_stl/aligned_allocator.hpp"
+#include "tt_metal/third_party/umd/device/device_api_metal.h"
+
+namespace tt::tt_metal {
+
+static constexpr uint32_t MEMCPY_ALIGNMENT = sizeof(__m128i);
+
+template <typename T>
+using vector_memcpy_aligned = std::vector<T, tt::stl::aligned_allocator<T, MEMCPY_ALIGNMENT>>;
+
+// Ideally would work by cachelines, but the min size is less than that
+// TODO: Revisit this w/ regard to possibly eliminating min sizes and orphan writes at the end
+// TODO: ditto alignment isues
+template <bool debug_sync = false>
+static inline void memcpy_to_device(void *__restrict dst, const void *__restrict src, size_t n) {
+    TT_ASSERT((uintptr_t)dst % MEMCPY_ALIGNMENT == 0);
+    TT_ASSERT(n % sizeof(uint32_t) == 0);
+
+    static constexpr uint32_t inner_loop = 8;
+    static constexpr uint32_t inner_blk_size = inner_loop * sizeof(__m256i);
+
+    uint8_t *src8 = (uint8_t *)src;
+    uint8_t *dst8 = (uint8_t *)dst;
+
+    if (size_t num_lines = n / inner_blk_size) {
+        for (size_t i = 0; i < num_lines; ++i) {
+            for (size_t j = 0; j < inner_loop; ++j) {
+                __m256i blk = _mm256_loadu_si256((const __m256i *)src8);
+                _mm256_stream_si256((__m256i *)dst8, blk);
+                src8 += sizeof(__m256i);
+                dst8 += sizeof(__m256i);
+            }
+            n -= inner_blk_size;
+        }
+    }
+
+    if (n > 0) {
+        if (size_t num_lines = n / sizeof(__m256i)) {
+            for (size_t i = 0; i < num_lines; ++i) {
+                __m256i blk = _mm256_loadu_si256((const __m256i *)src8);
+                _mm256_stream_si256((__m256i *)dst8, blk);
+                src8 += sizeof(__m256i);
+                dst8 += sizeof(__m256i);
+            }
+            n -= num_lines * sizeof(__m256i);
+        }
+        if (size_t num_lines = n / sizeof(__m128i)) {
+            for (size_t i = 0; i < num_lines; ++i) {
+                __m128i blk = _mm_loadu_si128((const __m128i *)src8);
+                _mm_stream_si128((__m128i *)dst8, blk);
+                src8 += sizeof(__m128i);
+                dst8 += sizeof(__m128i);
+            }
+            n -= n / sizeof(__m128i) * sizeof(__m128i);
+        }
+        if (n > 0) {
+            for (size_t i = 0; i < n / sizeof(int32_t); ++i) {
+                _mm_stream_si32((int32_t *)dst8, *(int32_t *)src8);
+                src8 += sizeof(int32_t);
+                dst8 += sizeof(int32_t);
+            }
+        }
+    }
+    if constexpr (debug_sync) {
+        tt_driver_atomics::sfence();
+    }
+}
+
+} // namespace tt::tt_metal

From ec673d37fa558176386f30b8af31c0d8bc596b6c Mon Sep 17 00:00:00 2001
From: Austin Ho <aho@tenstorrent.com>
Date: Tue, 22 Oct 2024 17:33:55 +0000
Subject: [PATCH 61/69] #13655: Initial FD refactor to support sub devices
 Support multiple dispatch entries for worker->dispatch sync Update dispatch
 d/s to have a semaphore per dispatch entry to enable syncing on specific
 worker counts Update LaunchMessageRingBufferState and WorkerConfigBufferMgr
 to be tracked per sub_device Update various FD commands to support syncing on
 multiple sub devices: - ERB, EWB, ERE takes in a list of sub devices for
 blocking/issuing waits on. Will wait on all sub-devices if none are provided
 - Trace will track only specific sub devices used - EP currently only
 supports one sub-device - Remove compile time mcast grid and unicast cores
 from dispatch kernels   CQDispatchGoSignalMcastCmd now expects noc txn data
 to follow the cmd for sending go signal to cores

---
 .../command_queue/EnqueueReadBuffer.rst       |   4 +-
 .../command_queue/EnqueueWriteBuffer.rst      |   4 +-
 .../3_pcie_transfer/test_pull_from_pcie.cpp   |   1 +
 .../dispatch/test_dispatcher.cpp              |   2 +-
 .../dispatch/test_prefetcher.cpp              |   8 +-
 .../test_kernels/dataflow/dram_copy.cpp       |   2 +-
 .../test_kernels/misc/watcher_asserts.cpp     |   2 +-
 tt_metal/host_api.hpp                         |  96 +--
 tt_metal/hw/firmware/src/brisc.cc             |  12 +-
 tt_metal/hw/firmware/src/erisc.cc             |   6 +-
 tt_metal/hw/firmware/src/idle_erisc.cc        |   2 +-
 tt_metal/hw/inc/dev_msgs.h                    |   2 +-
 tt_metal/impl/device/device.cpp               | 201 ++++--
 tt_metal/impl/device/device.hpp               |  38 +-
 tt_metal/impl/dispatch/command_queue.cpp      | 626 ++++++++++++------
 tt_metal/impl/dispatch/command_queue.hpp      |  78 ++-
 .../impl/dispatch/command_queue_interface.hpp |  24 +-
 tt_metal/impl/dispatch/cq_commands.hpp        |  14 +-
 tt_metal/impl/dispatch/debug_tools.cpp        |   5 +-
 tt_metal/impl/dispatch/device_command.hpp     |  54 +-
 .../impl/dispatch/dispatch_core_manager.hpp   |   1 -
 .../impl/dispatch/kernels/cq_dispatch.cpp     |  68 +-
 .../dispatch/kernels/cq_dispatch_slave.cpp    | 121 ++--
 .../impl/dispatch/kernels/cq_prefetch.cpp     |   4 +-
 tt_metal/impl/program/program.cpp             |  65 +-
 tt_metal/impl/program/program.hpp             |   6 +-
 tt_metal/impl/trace/trace_buffer.hpp          |  14 +-
 tt_metal/llrt/hal.hpp                         |   2 +
 tt_metal/tt_metal.cpp                         |   6 +-
 29 files changed, 947 insertions(+), 521 deletions(-)

diff --git a/docs/source/tt-metalium/tt_metal/apis/host_apis/command_queue/EnqueueReadBuffer.rst b/docs/source/tt-metalium/tt_metal/apis/host_apis/command_queue/EnqueueReadBuffer.rst
index 6f7b9929086..037f50995d5 100644
--- a/docs/source/tt-metalium/tt_metal/apis/host_apis/command_queue/EnqueueReadBuffer.rst
+++ b/docs/source/tt-metalium/tt_metal/apis/host_apis/command_queue/EnqueueReadBuffer.rst
@@ -1,5 +1,5 @@
 EnqueueReadBuffer
 ==================
 
-.. doxygenfunction:: tt::tt_metal::v0::EnqueueReadBuffer(CommandQueue& cq, std::variant<std::reference_wrapper<Buffer>, std::shared_ptr<Buffer> > buffer, std::vector<uint32_t>& dst, bool blocking)
-.. doxygenfunction:: tt::tt_metal::v0::EnqueueReadBuffer(CommandQueue& cq, std::variant<std::reference_wrapper<Buffer>, std::shared_ptr<Buffer> > buffer, void * dst, bool blocking)
+.. doxygenfunction:: tt::tt_metal::v0::EnqueueReadBuffer(CommandQueue& cq, std::variant<std::reference_wrapper<Buffer>, std::shared_ptr<Buffer> > buffer, std::vector<uint32_t>& dst, bool blocking, tt::stl::Span<const uint32_t> sub_device_ids)
+.. doxygenfunction:: tt::tt_metal::v0::EnqueueReadBuffer(CommandQueue& cq, std::variant<std::reference_wrapper<Buffer>, std::shared_ptr<Buffer> > buffer, void * dst, bool blocking, tt::stl::Span<const uint32_t> sub_device_ids)
diff --git a/docs/source/tt-metalium/tt_metal/apis/host_apis/command_queue/EnqueueWriteBuffer.rst b/docs/source/tt-metalium/tt_metal/apis/host_apis/command_queue/EnqueueWriteBuffer.rst
index 3b48f8b7b4a..85d61986364 100644
--- a/docs/source/tt-metalium/tt_metal/apis/host_apis/command_queue/EnqueueWriteBuffer.rst
+++ b/docs/source/tt-metalium/tt_metal/apis/host_apis/command_queue/EnqueueWriteBuffer.rst
@@ -1,5 +1,5 @@
 EnqueueWriteBuffer
 ==================
 
-.. doxygenfunction:: tt::tt_metal::v0::EnqueueWriteBuffer(CommandQueue& cq, std::variant<std::reference_wrapper<Buffer>, std::shared_ptr<Buffer> > buffer, std::vector<uint32_t>& src, bool blocking)
-.. doxygenfunction:: tt::tt_metal::v0::EnqueueWriteBuffer(CommandQueue& cq, std::variant<std::reference_wrapper<Buffer>, std::shared_ptr<Buffer> > buffer, HostDataType src, bool blocking)
+.. doxygenfunction:: tt::tt_metal::v0::EnqueueWriteBuffer(CommandQueue& cq, std::variant<std::reference_wrapper<Buffer>, std::shared_ptr<Buffer> > buffer, std::vector<uint32_t>& src, bool blocking, tt::stl::Span<const uint32_t> sub_device_ids)
+.. doxygenfunction:: tt::tt_metal::v0::EnqueueWriteBuffer(CommandQueue& cq, std::variant<std::reference_wrapper<Buffer>, std::shared_ptr<Buffer> > buffer, HostDataType src, bool blocking, tt::stl::Span<const uint32_t> sub_device_ids)
diff --git a/tests/tt_metal/tt_metal/perf_microbenchmark/3_pcie_transfer/test_pull_from_pcie.cpp b/tests/tt_metal/tt_metal/perf_microbenchmark/3_pcie_transfer/test_pull_from_pcie.cpp
index fdbb5ccc2c1..4e0f88a4267 100644
--- a/tests/tt_metal/tt_metal/perf_microbenchmark/3_pcie_transfer/test_pull_from_pcie.cpp
+++ b/tests/tt_metal/tt_metal/perf_microbenchmark/3_pcie_transfer/test_pull_from_pcie.cpp
@@ -13,6 +13,7 @@
 #include "tt_metal/host_api.hpp"
 #include "tt_metal/impl/dispatch/command_queue.hpp"
 #include "tt_metal/impl/dispatch/command_queue_interface.hpp"
+#include "tt_metal/impl/dispatch/memcpy.hpp"
 #include "tt_metal/tt_metal/perf_microbenchmark/common/util.hpp"
 
 using namespace tt;
diff --git a/tests/tt_metal/tt_metal/perf_microbenchmark/dispatch/test_dispatcher.cpp b/tests/tt_metal/tt_metal/perf_microbenchmark/dispatch/test_dispatcher.cpp
index ae6c2cf33a3..be7d28a6afd 100644
--- a/tests/tt_metal/tt_metal/perf_microbenchmark/dispatch/test_dispatcher.cpp
+++ b/tests/tt_metal/tt_metal/perf_microbenchmark/dispatch/test_dispatcher.cpp
@@ -477,7 +477,7 @@ int main(int argc, char **argv) {
              0,    // prefetch_downstream_buffer_pages
              num_compute_cores, // max_write_packed_cores
              0,
-             0,
+             dispatch_constants::DISPATCH_MESSAGE_ENTRIES,
              0,
              0,
              0,
diff --git a/tests/tt_metal/tt_metal/perf_microbenchmark/dispatch/test_prefetcher.cpp b/tests/tt_metal/tt_metal/perf_microbenchmark/dispatch/test_prefetcher.cpp
index a272a402425..15494c37bce 100644
--- a/tests/tt_metal/tt_metal/perf_microbenchmark/dispatch/test_prefetcher.cpp
+++ b/tests/tt_metal/tt_metal/perf_microbenchmark/dispatch/test_prefetcher.cpp
@@ -1920,7 +1920,7 @@ void configure_for_single_chip(Device *device,
          prefetch_downstream_buffer_pages,
          num_compute_cores, // max_write_packed_cores
          0,
-         0,
+         dispatch_constants::DISPATCH_MESSAGE_ENTRIES,
          0,
          0,
          0,
@@ -1940,6 +1940,7 @@ void configure_for_single_chip(Device *device,
         dispatch_compile_args[12] = dispatch_downstream_cb_sem;
         dispatch_compile_args[13] = dispatch_h_cb_sem;
         dispatch_compile_args[14] = dispatch_d_preamble_size;
+        dispatch_compile_args[21] = dispatch_constants::DISPATCH_MESSAGE_ENTRIES;
         CoreCoord phys_dispatch_d_downstream_core =
             packetized_path_en_g ? phys_dispatch_relay_mux_core : phys_dispatch_h_core;
         configure_kernel_variant<true, false>(program,
@@ -1960,6 +1961,7 @@ void configure_for_single_chip(Device *device,
         dispatch_compile_args[12] = dispatch_h_cb_sem;
         dispatch_compile_args[13] = dispatch_downstream_cb_sem;
         dispatch_compile_args[14] = 0; // preamble size
+        dispatch_compile_args[21] = 1; // max_num_worker_sems is used for array sizing, set to 1 even if array isn't used
         CoreCoord phys_dispatch_h_upstream_core =
             packetized_path_en_g ? phys_dispatch_relay_demux_core : phys_dispatch_core;
         configure_kernel_variant<false, true>(program,
@@ -2663,7 +2665,7 @@ void configure_for_multi_chip(Device *device,
          prefetch_downstream_buffer_pages,
          num_compute_cores,
          0,
-         0,
+         dispatch_constants::DISPATCH_MESSAGE_ENTRIES,
          0,
          0,
          0,
@@ -2683,6 +2685,7 @@ void configure_for_multi_chip(Device *device,
         dispatch_compile_args[12] = dispatch_downstream_cb_sem;
         dispatch_compile_args[13] = dispatch_h_cb_sem;
         dispatch_compile_args[14] = dispatch_d_preamble_size;
+        dispatch_compile_args[21] = dispatch_constants::DISPATCH_MESSAGE_ENTRIES;
         CoreCoord phys_dispatch_d_downstream_core =
             packetized_path_en_g ? phys_dispatch_relay_mux_core : phys_dispatch_h_core;
         configure_kernel_variant<true, false>(program_r,
@@ -2702,6 +2705,7 @@ void configure_for_multi_chip(Device *device,
         dispatch_compile_args[12] = dispatch_h_cb_sem;
         dispatch_compile_args[13] = dispatch_downstream_cb_sem;
         dispatch_compile_args[14] = 0; // preamble size
+        dispatch_compile_args[21] = 1; // max_num_worker_sems is used for array sizing, set to 1 even if array isn't used
         CoreCoord phys_dispatch_h_upstream_core =
             packetized_path_en_g ? phys_dispatch_relay_demux_core : phys_dispatch_core;
         configure_kernel_variant<false, true>(program,
diff --git a/tests/tt_metal/tt_metal/test_kernels/dataflow/dram_copy.cpp b/tests/tt_metal/tt_metal/test_kernels/dataflow/dram_copy.cpp
index 78a989fdab7..13c5c4c40c5 100644
--- a/tests/tt_metal/tt_metal/test_kernels/dataflow/dram_copy.cpp
+++ b/tests/tt_metal/tt_metal/test_kernels/dataflow/dram_copy.cpp
@@ -34,7 +34,7 @@ void kernel_main() {
         tt_l1_ptr mailboxes_t* const mailboxes = (tt_l1_ptr mailboxes_t*)(MEM_MAILBOX_BASE);
 #endif
         uint64_t dispatch_addr = NOC_XY_ADDR(NOC_X(mailboxes->go_message.master_x),
-                                             NOC_Y(mailboxes->go_message.master_y), DISPATCH_MESSAGE_ADDR);
+                                             NOC_Y(mailboxes->go_message.master_y), DISPATCH_MESSAGE_ADDR + mailboxes->go_message.dispatch_message_offset);
         noc_fast_atomic_increment(noc_index, NCRISC_AT_CMD_BUF, dispatch_addr, NOC_UNICAST_WRITE_VC, 1, 31, false);
 #endif
 
diff --git a/tests/tt_metal/tt_metal/test_kernels/misc/watcher_asserts.cpp b/tests/tt_metal/tt_metal/test_kernels/misc/watcher_asserts.cpp
index 6c623db7eb3..13406c2423b 100644
--- a/tests/tt_metal/tt_metal/test_kernels/misc/watcher_asserts.cpp
+++ b/tests/tt_metal/tt_metal/test_kernels/misc/watcher_asserts.cpp
@@ -41,7 +41,7 @@ void MAIN {
 #endif
         uint64_t dispatch_addr =
             NOC_XY_ADDR(NOC_X(mailboxes->go_message.master_x),
-                        NOC_Y(mailboxes->go_message.master_y), DISPATCH_MESSAGE_ADDR);
+                        NOC_Y(mailboxes->go_message.master_y), DISPATCH_MESSAGE_ADDR + mailboxes->go_message.dispatch_message_offset);
         noc_fast_atomic_increment(noc_index, NCRISC_AT_CMD_BUF, dispatch_addr, NOC_UNICAST_WRITE_VC, 1, 31 /*wrap*/, false /*linked*/);
     }
 #else
diff --git a/tt_metal/host_api.hpp b/tt_metal/host_api.hpp
index 88ea38e80a2..5d0fffba0e1 100644
--- a/tt_metal/host_api.hpp
+++ b/tt_metal/host_api.hpp
@@ -485,72 +485,81 @@ RuntimeArgsData &GetCommonRuntimeArgs(const Program &program, KernelHandle kerne
  *
  * Return value: void
  *
- * | Argument     | Description                                                            | Type                                | Valid Range                            | Required |
- * |--------------|------------------------------------------------------------------------|-------------------------------------|----------------------------------------|----------|
- * | cq           | The command queue object which dispatches the command to the hardware  | CommandQueue &                      |                                        | Yes      |
- * | buffer       | The device buffer we are reading from                                  | Buffer & or std::shared_ptr<Buffer> |                                        | Yes      |
- * | dst          | The vector where the results that are read will be stored              | vector<uint32_t> &                  |                                        | Yes      |
- * | blocking     | Whether or not this is a blocking operation                            | bool                                | Only blocking mode supported currently | Yes      |
+ * | Argument       | Description                                                                       | Type                                | Valid Range                            | Required |
+ * |----------------|-----------------------------------------------------------------------------------|-------------------------------------|----------------------------------------|----------|
+ * | cq             | The command queue object which dispatches the command to the hardware             | CommandQueue &                      |                                        | Yes      |
+ * | buffer         | The device buffer we are reading from                                             | Buffer & or std::shared_ptr<Buffer> |                                        | Yes      |
+ * | dst            | The vector where the results that are read will be stored                         | vector<uint32_t> &                  |                                        | Yes      |
+ * | blocking       | Whether or not this is a blocking operation                                       | bool                                | Only blocking mode supported currently | Yes      |
+ * | sub_device_ids | The sub-device ids to wait for completion on. If empty, waits for all sub-devices | tt::stl::Span<const uint32_t>       |                                    | No       |
  */
 void EnqueueReadBuffer(
     CommandQueue &cq,
     std::variant<std::reference_wrapper<Buffer>, std::shared_ptr<Buffer>> buffer,
     std::vector<uint32_t> &dst,
-    bool blocking);
+    bool blocking,
+    tt::stl::Span<const uint32_t> sub_device_ids = {});
 
 /**
  * Reads a buffer from the device
  *
  * Return value: void
  *
- * | Argument     | Description                                                            | Type                                | Valid Range                            | Required |
- * |--------------|------------------------------------------------------------------------|-------------------------------------|----------------------------------------|----------|
- * | cq           | The command queue object which dispatches the command to the hardware  | CommandQueue &                      |                                        | Yes      |
- * | buffer       | The device buffer we are reading from                                  | Buffer & or std::shared_ptr<Buffer> |                                        | Yes      |
- * | dst          | The memory where the result will be stored                             | void*                               |                                        | Yes      |
- * | blocking     | Whether or not this is a blocking operation                            | bool                                | Only blocking mode supported currently | Yes      |
+ * | Argument       | Description                                                                       | Type                                | Valid Range                            | Required |
+ * |----------------|-----------------------------------------------------------------------------------|-------------------------------------|----------------------------------------|----------|
+ * | cq             | The command queue object which dispatches the command to the hardware             | CommandQueue &                      |                                        | Yes      |
+ * | buffer         | The device buffer we are reading from                                             | Buffer & or std::shared_ptr<Buffer> |                                        | Yes      |
+ * | dst            | The memory where the result will be stored                                        | void*                               |                                        | Yes      |
+ * | blocking       | Whether or not this is a blocking operation                                       | bool                                | Only blocking mode supported currently | Yes      |
+ * | sub_device_ids | The sub-device ids to wait for completion on. If empty, waits for all sub-devices | tt::stl::Span<const uint32_t>       |                                        | No       |
  */
 void EnqueueReadBuffer(
     CommandQueue &cq,
     std::variant<std::reference_wrapper<Buffer>, std::shared_ptr<Buffer>> buffer,
     void *dst,
-    bool blocking);
+    bool blocking,
+    tt::stl::Span<const uint32_t> sub_device_ids = {});
 
 /**
  * Writes a buffer to the device
  *
  * Return value: void
  *
- * | Argument     | Description                                                            | Type                                | Valid Range                        | Required |
- * |--------------|------------------------------------------------------------------------|-------------------------------------|------------------------------------|----------|
- * | cq           | The command queue object which dispatches the command to the hardware  | CommandQueue &                      |                                    | Yes      |
- * | buffer       | The device buffer we are writing to                                    | Buffer & or std::shared_ptr<Buffer> |                                    | Yes      |
- * | src          | The vector we are writing to the device                                | vector<uint32_t> &                  |                                    | Yes      |
- * | blocking     | Whether or not this is a blocking operation                            | bool                                |                                    | Yes      |
+ * | Argument       | Description                                                                       | Type                                | Valid Range                        | Required |
+ * |----------------|-----------------------------------------------------------------------------------|-------------------------------------|------------------------------------|----------|
+ * | cq             | The command queue object which dispatches the command to the hardware             | CommandQueue &                      |                                    | Yes      |
+ * | buffer         | The device buffer we are writing to                                               | Buffer & or std::shared_ptr<Buffer> |                                    | Yes      |
+ * | src            | The vector we are writing to the device                                           | vector<uint32_t> &                  |                                    | Yes      |
+ * | blocking       | Whether or not this is a blocking operation                                       | bool                                |                                    | Yes      |
+ * | sub_device_ids | The sub-device ids to wait for completion on. If empty, waits for all sub-devices | tt::stl::Span<const uint32_t>       |                                    | No       |
+
  */
 void EnqueueWriteBuffer(
     CommandQueue &cq,
     std::variant<std::reference_wrapper<Buffer>, std::shared_ptr<Buffer>> buffer,
     std::vector<uint32_t> &src,
-    bool blocking);
+    bool blocking,
+    tt::stl::Span<const uint32_t> sub_device_ids = {});
 
 /**
  * Writes a buffer to the device
  *
  * Return value: void
  *
- * | Argument     | Description                                                            | Type                                | Valid Range                        | Required |
- * |--------------|------------------------------------------------------------------------|-------------------------------------|------------------------------------|----------|
- * | cq           | The command queue object which dispatches the command to the hardware  | CommandQueue &                      |                                    | Yes      |
- * | buffer       | The device buffer we are writing to                                    | Buffer & or std::shared_ptr<Buffer> |                                    | Yes      |
- * | src          | The memory we are writing to the device                                | HostDataType                        |                                    | Yes      |
- * | blocking     | Whether or not this is a blocking operation                            | bool                                |                                    | Yes      |
+ * | Argument       | Description                                                                       | Type                                | Valid Range                        | Required |
+ * |----------------|-----------------------------------------------------------------------------------|-------------------------------------|------------------------------------|----------|
+ * | cq             | The command queue object which dispatches the command to the hardware             | CommandQueue &                      |                                    | Yes      |
+ * | buffer         | The device buffer we are writing to                                               | Buffer & or std::shared_ptr<Buffer> |                                    | Yes      |
+ * | src            | The memory we are writing to the device                                           | HostDataType                        |                                    | Yes      |
+ * | blocking       | Whether or not this is a blocking operation                                       | bool                                |                                    | Yes      |
+ * | sub_device_ids | The sub-device ids to wait for completion on. If empty, waits for all sub-devices | tt::stl::Span<const uint32_t>       |                                    | No       |
  */
 void EnqueueWriteBuffer(
     CommandQueue &cq,
     std::variant<std::reference_wrapper<Buffer>, std::shared_ptr<Buffer>> buffer,
     HostDataType src,
-    bool blocking);
+    bool blocking,
+    tt::stl::Span<const uint32_t> sub_device_ids = {});
 
 /**
  * Writes a program to the device and launches it
@@ -570,11 +579,12 @@ void EnqueueProgram(CommandQueue& cq, Program& program, bool blocking);
  *
  * Return value: void
  *
- * | Argument     | Description                                                            | Type                          | Valid Range                        | Required |
- * |--------------|------------------------------------------------------------------------|-------------------------------|------------------------------------|----------|
- * | cq           | The command queue object which dispatches the command to the hardware  | CommandQueue &                |                                    | Yes      |
+ * | Argument       | Description                                                                       | Type                          | Valid Range                        | Required |
+ * |----------------|-----------------------------------------------------------------------------------|-------------------------------|------------------------------------|----------|
+ * | cq             | The command queue object which dispatches the command to the hardware             | CommandQueue &                |                                    | Yes      |
+ * | sub_device_ids | The sub-device ids to wait for completion on. If empty, waits for all sub-devices | tt::stl::Span<const uint32_t> |                                    | No       |
  */
-void Finish(CommandQueue &cq);
+void Finish(CommandQueue &cq, tt::stl::Span<const uint32_t> sub_device_ids = {});
 
 /**
  * Begins capture on a trace, when the trace is in capture mode all programs pushed into the trace queue will have their execution delayed until the trace is instantiated and enqueued.
@@ -665,12 +675,13 @@ void DumpDeviceProfileResults(Device *device, const Program &program);
 /**
  * Enqueues a command to record an Event on the device for a given CQ, and updates the Event object for the user.
  * Return value: void
- * | Argument     | Description                                                            | Type                          | Valid Range                        | Required |
- * |--------------|------------------------------------------------------------------------|-------------------------------|------------------------------------|----------|
- * | cq           | The command queue object which dispatches the command to the hardware  | CommandQueue &                |                                    | Yes      |
- * | event        | An event that will be populated by this function, and inserted in CQ   | std::shared_ptr<Event>        |                                    | Yes      |
+ * | Argument       | Description                                                                       | Type                          | Valid Range                        | Required |
+ * |----------------|-----------------------------------------------------------------------------------|-------------------------------|------------------------------------|----------|
+ * | cq             | The command queue object which dispatches the command to the hardware             | CommandQueue &                |                                    | Yes      |
+ * | event          | An event that will be populated by this function, and inserted in CQ              | std::shared_ptr<Event>        |                                    | Yes      |
+ * | sub_device_ids | The sub-device ids to wait for completion on. If empty, waits for all sub-devices | tt::stl::Span<const uint32_t> |                                    | No       |
  */
-void EnqueueRecordEvent(CommandQueue &cq, const std::shared_ptr<Event> &event);
+void EnqueueRecordEvent(CommandQueue &cq, const std::shared_ptr<Event> &event, tt::stl::Span<const uint32_t> sub_device_ids = {});
 
 /**
  * Enqueues a command on the device for a given CQ (non-blocking). The command on device will block and wait for completion of the specified event (which may be in another CQ).
@@ -708,12 +719,13 @@ bool EventQuery(const std::shared_ptr<Event> &event);
  *
  * Return value: void
  *
- * | Argument     | Description                                                            | Type                          | Valid Range                        | Required |
- * |--------------|------------------------------------------------------------------------|-------------------------------|------------------------------------|----------|
- * | device       | The device to synchronize.                                             | Device *                      |                                    | Yes      |
- * | cq_id        | The specific command queue id to synchronize  .                        | uint8_t                       |                                    | No       |
+ * | Argument       | Description                                                                       | Type                          | Valid Range                        | Required |
+ * |----------------|-----------------------------------------------------------------------------------|-------------------------------|------------------------------------|----------|
+ * | device         | The device to synchronize.                                                        | Device *                      |                                    | Yes      |
+ * | cq_id          | The specific command queue id to synchronize  .                                   | uint8_t                       |                                    | No       |
+ * | sub_device_ids | The sub-device ids to wait for completion on. If empty, waits for all sub-devices | tt::stl::Span<const uint32_t> |                                    | No       |
  */
-void Synchronize(Device *device, const std::optional<uint8_t> cq_id = std::nullopt);
+void Synchronize(Device *device, const std::optional<uint8_t> cq_id = std::nullopt, tt::stl::Span<const uint32_t> sub_device_ids = {});
 
 }  // namespace v0
 }  // namespace tt_metal
diff --git a/tt_metal/hw/firmware/src/brisc.cc b/tt_metal/hw/firmware/src/brisc.cc
index 51c2bcc4338..0702555d237 100644
--- a/tt_metal/hw/firmware/src/brisc.cc
+++ b/tt_metal/hw/firmware/src/brisc.cc
@@ -360,8 +360,14 @@ int main() {
 
     mailboxes->go_message.signal = RUN_MSG_DONE;
 
+    // Initialize the NoCs to a safe state
+    // This ensures if we send any noc txns without running a kernel setup are valid
+    // ex. Immediately after starting, we send a RUN_MSG_RESET_READ_PTR signal
     uint8_t noc_mode;
-    uint8_t prev_noc_mode = DM_INVALID_NOC;
+    noc_init(MEM_NOC_ATOMIC_RET_VAL_ADDR);
+    noc_local_state_init(noc_index);
+    uint8_t prev_noc_mode = DM_DEDICATED_NOC;
+
     while (1) {
         init_sync_registers();
         reset_ncrisc_with_iram();
@@ -379,7 +385,7 @@ int main() {
                 // For future proofing, the noc_index value is initialized to 0, to ensure an invalid NOC txn is not issued.
                 uint64_t dispatch_addr =
                     NOC_XY_ADDR(NOC_X(mailboxes->go_message.master_x),
-                    NOC_Y(mailboxes->go_message.master_y), DISPATCH_MESSAGE_ADDR);
+                    NOC_Y(mailboxes->go_message.master_y), DISPATCH_MESSAGE_ADDR + mailboxes->go_message.dispatch_message_offset);
                 mailboxes->go_message.signal = RUN_MSG_DONE;
                 // Notify dispatcher that this has been done
                 DEBUG_SANITIZE_NOC_ADDR(noc_index, dispatch_addr, 4);
@@ -465,7 +471,7 @@ int main() {
                 launch_msg_address->kernel_config.enables = 0;
                 uint64_t dispatch_addr =
                     NOC_XY_ADDR(NOC_X(mailboxes->go_message.master_x),
-                        NOC_Y(mailboxes->go_message.master_y), DISPATCH_MESSAGE_ADDR);
+                        NOC_Y(mailboxes->go_message.master_y), DISPATCH_MESSAGE_ADDR + mailboxes->go_message.dispatch_message_offset);
                 DEBUG_SANITIZE_NOC_ADDR(noc_index, dispatch_addr, 4);
                 // Only executed if watcher is enabled. Ensures that we don't report stale data due to invalid launch
                 // messages in the ring buffer. Must be executed before the atomic increment, as after that the launch
diff --git a/tt_metal/hw/firmware/src/erisc.cc b/tt_metal/hw/firmware/src/erisc.cc
index 664afdc89f9..4e690d1532a 100644
--- a/tt_metal/hw/firmware/src/erisc.cc
+++ b/tt_metal/hw/firmware/src/erisc.cc
@@ -88,7 +88,7 @@ void __attribute__((noinline)) Application(void) {
                 launch_msg_address->kernel_config.enables = 0;
                 uint64_t dispatch_addr =
                     NOC_XY_ADDR(NOC_X(mailboxes->go_message.master_x),
-                                NOC_Y(mailboxes->go_message.master_y), DISPATCH_MESSAGE_ADDR);
+                                NOC_Y(mailboxes->go_message.master_y), DISPATCH_MESSAGE_ADDR + mailboxes->go_message.dispatch_message_offset);
                 CLEAR_PREVIOUS_LAUNCH_MESSAGE_ENTRY_FOR_WATCHER();
                 internal_::notify_dispatch_core_done(dispatch_addr);
                 mailboxes->launch_msg_rd_ptr = (launch_msg_rd_ptr + 1) & (launch_msg_buffer_num_entries - 1);
@@ -99,9 +99,9 @@ void __attribute__((noinline)) Application(void) {
         } else if (go_message_signal == RUN_MSG_RESET_READ_PTR) {
             // Reset the launch message buffer read ptr
             mailboxes->launch_msg_rd_ptr = 0;
-            int64_t dispatch_addr =
+            uint64_t dispatch_addr =
                 NOC_XY_ADDR(NOC_X(mailboxes->go_message.master_x),
-                            NOC_Y(mailboxes->go_message.master_y), DISPATCH_MESSAGE_ADDR);
+                            NOC_Y(mailboxes->go_message.master_y), DISPATCH_MESSAGE_ADDR + mailboxes->go_message.dispatch_message_offset);
             mailboxes->go_message.signal = RUN_MSG_DONE;
             internal_::notify_dispatch_core_done(dispatch_addr);
         } else {
diff --git a/tt_metal/hw/firmware/src/idle_erisc.cc b/tt_metal/hw/firmware/src/idle_erisc.cc
index 554dd5952ab..307faa8e0dc 100644
--- a/tt_metal/hw/firmware/src/idle_erisc.cc
+++ b/tt_metal/hw/firmware/src/idle_erisc.cc
@@ -164,7 +164,7 @@ int main() {
                 launch_msg_address->kernel_config.enables = 0;
                 uint64_t dispatch_addr =
                     NOC_XY_ADDR(NOC_X(mailboxes->go_message.master_x),
-                        NOC_Y(mailboxes->go_message.master_x), DISPATCH_MESSAGE_ADDR);
+                        NOC_Y(mailboxes->go_message.master_x), DISPATCH_MESSAGE_ADDR + mailboxes->go_message.dispatch_message_offset);
                 DEBUG_SANITIZE_NOC_ADDR(noc_index, dispatch_addr, 4);
                 CLEAR_PREVIOUS_LAUNCH_MESSAGE_ENTRY_FOR_WATCHER();
                 noc_fast_atomic_increment(noc_index, NCRISC_AT_CMD_BUF, dispatch_addr, NOC_UNICAST_WRITE_VC, 1, 31 /*wrap*/, false /*linked*/);
diff --git a/tt_metal/hw/inc/dev_msgs.h b/tt_metal/hw/inc/dev_msgs.h
index 4c3f883b61b..ce9ab22bac7 100644
--- a/tt_metal/hw/inc/dev_msgs.h
+++ b/tt_metal/hw/inc/dev_msgs.h
@@ -112,7 +112,7 @@ struct kernel_config_msg_t {
 } __attribute__((packed));
 
 struct go_msg_t {
-    volatile uint8_t pad;
+    volatile uint8_t dispatch_message_offset;
     volatile uint8_t master_x;
     volatile uint8_t master_y;
     volatile uint8_t signal; // INIT, GO, DONE, RESET_RD_PTR
diff --git a/tt_metal/impl/device/device.cpp b/tt_metal/impl/device/device.cpp
index 33dbe2b4547..78776f56369 100644
--- a/tt_metal/impl/device/device.cpp
+++ b/tt_metal/impl/device/device.cpp
@@ -56,12 +56,9 @@ bool Device::is_inactive_ethernet_core(CoreCoord logical_core) const {
     return inactive_ethernet_cores.find(logical_core) != inactive_ethernet_cores.end();
 }
 
-uint32_t Device::num_eth_worker_cores() const {
-    return this->num_eth_worker_cores_;
-}
-
-uint32_t Device::num_worker_cores() const {
-    return this->num_worker_cores_;
+uint32_t Device::num_worker_cores(HalProgrammableCoreType core_type, uint32_t sub_device_id) const {
+    TT_FATAL(sub_device_id == 0, "Invalid sub_device index: {}", sub_device_id);
+    return this->num_worker_cores_[static_cast<uint32_t>(core_type)];
 }
 
 std::vector<uint32_t> Device::get_noc_encoding_for_active_eth_cores(NOC noc_index) {
@@ -202,8 +199,9 @@ void Device::initialize_cluster() {
         this->clear_l1_state();
     }
     int ai_clk = tt::Cluster::instance().get_device_aiclk(this->id_);
-    this->num_worker_cores_ = this->compute_with_storage_grid_size().x * this->compute_with_storage_grid_size().y;
-    this->num_eth_worker_cores_ = this->get_active_ethernet_cores(true).size();
+    const auto& compute_grid_size = this->compute_with_storage_grid_size();
+    this->num_worker_cores_[static_cast<uint32_t>(HalProgrammableCoreType::TENSIX)] = compute_grid_size.x * compute_grid_size.y;
+    this->num_worker_cores_[static_cast<uint32_t>(HalProgrammableCoreType::ACTIVE_ETH)] = this->get_active_ethernet_cores(true).size();
     log_info(tt::LogMetal, "AI CLK for device {} is:   {} MHz", this->id_, ai_clk);
 }
 
@@ -480,8 +478,8 @@ void Device::initialize_firmware(const HalProgrammableCoreType &core_type, CoreC
     uint32_t go_addr = this->get_dev_addr(phys_core, HalL1MemAddrType::GO_MSG);
     tt::Cluster::instance().write_core(go_msg, sizeof(go_msg_t), tt_cxy_pair(this->id(), phys_core), go_addr);
     uint64_t launch_msg_buffer_read_ptr_addr = this->get_dev_addr(phys_core, HalL1MemAddrType::LAUNCH_MSG_BUFFER_RD_PTR);
-    std::vector<uint32_t> zero = {0};
-    tt::Cluster::instance().write_core(zero.data(), sizeof(uint32_t), tt_cxy_pair(this->id(), phys_core), launch_msg_buffer_read_ptr_addr);
+    uint32_t zero = 0;
+    tt::Cluster::instance().write_core(&zero, sizeof(uint32_t), tt_cxy_pair(this->id(), phys_core), launch_msg_buffer_read_ptr_addr);
 }
 
 void Device::reset_cores() {
@@ -1266,7 +1264,7 @@ void Device::update_workers_build_settings(std::vector<std::vector<std::tuple<tt
                         compile_args[18] = dispatch_constants::get(dispatch_core_type).mux_buffer_pages(num_hw_cqs), // XXXX should this be mux pages?
                         compile_args[19] = settings.num_compute_cores;
                         compile_args[20] = 0; // unused: dispatch_d only
-                        compile_args[21] = 0; // unused: dispatch_d only
+                        compile_args[21] = 1; // max_num_worker_sems is used for array sizing, set to 1 even if array isn't used
                         compile_args[22] = 0; // unused: dispatch_d only
                         compile_args[23] = 0; // unused: dispatch_d only
                         compile_args[24] = 0;
@@ -1320,7 +1318,7 @@ void Device::update_workers_build_settings(std::vector<std::vector<std::tuple<tt
                             compile_args[18] = mux_settings.cb_pages,
                             compile_args[19] = settings.num_compute_cores;
                             compile_args[20] = 0; // unused: dispatch_d only
-                            compile_args[21] = 0; // unused: dispatch_d only
+                            compile_args[21] = 1; // max_num_worker_sems is used for array sizing, set to 1 even if array isn't used
                             compile_args[22] = 0; // unused: dispatch_d only
                             compile_args[23] = 0; // unused: dispatch_d only
                             compile_args[24] = 0;
@@ -1610,6 +1608,7 @@ void Device::update_workers_build_settings(std::vector<std::vector<std::tuple<tt
                 uint32_t mux_sem = mux_d_settings.consumer_semaphore_id;
                 uint32_t tensix_worker_go_signal_addr = hal.get_dev_addr(HalProgrammableCoreType::TENSIX, HalL1MemAddrType::GO_MSG);
                 uint32_t eth_worker_go_signal_addr = hal.get_dev_addr(HalProgrammableCoreType::ACTIVE_ETH, HalL1MemAddrType::GO_MSG);
+                uint32_t max_dispatch_message_entries = dispatch_constants::DISPATCH_MESSAGE_ENTRIES;
                 for (auto&[core, dispatch_d_settings] : device_worker_variants[DispatchWorkerType::DISPATCH_D]) {
                     auto prefetch_d_settings = std::get<1>(device_worker_variants[DispatchWorkerType::PREFETCH_D][dispatch_d_idx]); // 1 to 1 mapping bw prefetch_d and dispatch_d
                     auto dispatch_s_settings = std::get<1>(device_worker_variants[DispatchWorkerType::DISPATCH_S][dispatch_d_idx]); // 1 to 1 mapping bw dispatch_s and dispatch_d
@@ -1617,6 +1616,7 @@ void Device::update_workers_build_settings(std::vector<std::vector<std::tuple<tt
                     uint32_t host_completion_queue_wr_ptr = dispatch_constants::get(dispatch_core_type).get_host_command_queue_addr(CommandQueueHostAddrType::COMPLETION_Q_WR);
                     uint32_t dev_completion_queue_wr_ptr = dispatch_constants::get(dispatch_core_type).get_device_command_queue_addr(CommandQueueDeviceAddrType::COMPLETION_Q_WR);
                     uint32_t dev_completion_queue_rd_ptr = dispatch_constants::get(dispatch_core_type).get_device_command_queue_addr(CommandQueueDeviceAddrType::COMPLETION_Q_RD);
+                    uint32_t dispatch_s_sync_sem_base_addr = dispatch_constants::get(dispatch_core_type).get_device_command_queue_addr(CommandQueueDeviceAddrType::DISPATCH_S_SYNC_SEM);
                     dispatch_d_settings.upstream_cores.push_back(prefetch_d_settings.worker_physical_core);
                     dispatch_d_settings.downstream_cores.push_back(mux_d_settings.worker_physical_core);
                     dispatch_d_settings.downstream_cores.push_back(dispatch_s_settings.worker_physical_core);
@@ -1642,8 +1642,8 @@ void Device::update_workers_build_settings(std::vector<std::vector<std::tuple<tt
                     compile_args[17] = 1; //prefetch_downstream_cb_sem,
                     compile_args[18] = dispatch_constants::get(dispatch_core_type).mux_buffer_pages(num_hw_cqs), // mux buffer size is a function of num_cqs
                     compile_args[19] = dispatch_d_settings.num_compute_cores;
-                    compile_args[20] = dispatch_s_settings.consumer_semaphore_id;
-                    compile_args[21] = dispatch_d_settings.compute_core_mcast_noc_coords;
+                    compile_args[20] = dispatch_s_sync_sem_base_addr;
+                    compile_args[21] = max_dispatch_message_entries;
                     compile_args[22] = tensix_worker_go_signal_addr;
                     compile_args[23] = eth_worker_go_signal_addr;
                     compile_args[24] = (dispatch_core_type == CoreType::ETH);
@@ -1668,21 +1668,22 @@ void Device::update_workers_build_settings(std::vector<std::vector<std::tuple<tt
                         dispatch_s_settings.upstream_cores.push_back(prefetch_d_settings.worker_physical_core);
                         dispatch_s_settings.downstream_cores.push_back(dispatch_d_settings.worker_physical_core);
                         auto dispatch_core_type = dispatch_s_settings.dispatch_core_type;
-                        uint32_t dispatch_message_addr = dispatch_constants::get(dispatch_core_type).get_device_command_queue_addr(CommandQueueDeviceAddrType::DISPATCH_MESSAGE);
-                        dispatch_s_settings.compile_args.resize(14);
+                        uint32_t dispatch_message_base_addr = dispatch_constants::get(dispatch_core_type).get_device_command_queue_addr(CommandQueueDeviceAddrType::DISPATCH_MESSAGE);
+                        uint32_t dispatch_s_sync_sem_base_addr = dispatch_constants::get(dispatch_core_type).get_device_command_queue_addr(CommandQueueDeviceAddrType::DISPATCH_S_SYNC_SEM);
+                        uint32_t max_dispatch_message_entries = dispatch_constants::DISPATCH_MESSAGE_ENTRIES;
+                        dispatch_s_settings.compile_args.resize(11);
                         auto& compile_args = dispatch_s_settings.compile_args;
                         compile_args[0] = dispatch_s_settings.cb_start_address;
                         compile_args[1] = dispatch_s_settings.cb_log_page_size;
                         compile_args[2] = dispatch_constants::get(dispatch_core_type).dispatch_s_buffer_size();
                         compile_args[3] = dispatch_s_settings.producer_semaphore_id;
                         compile_args[4] = prefetch_d_settings.consumer_slave_semaphore_id;
-                        compile_args[5] = dispatch_s_settings.consumer_semaphore_id;
-                        compile_args[6] = dispatch_s_settings.compute_core_mcast_noc_coords;
-                        compile_args[7] = dispatch_s_settings.num_compute_cores;
-                        compile_args[8] = tensix_worker_go_signal_addr;
-                        compile_args[9] = eth_worker_go_signal_addr;
-                        compile_args[10] = (dispatch_core_type == CoreType::ETH);
-                        compile_args[11] = dispatch_message_addr;
+                        compile_args[5] = dispatch_s_sync_sem_base_addr;
+                        compile_args[6] = tensix_worker_go_signal_addr;
+                        compile_args[7] = eth_worker_go_signal_addr;
+                        compile_args[8] = (dispatch_core_type == CoreType::ETH);
+                        compile_args[9] = dispatch_message_base_addr;
+                        compile_args[10] = max_dispatch_message_entries;
                         dispatch_s_idx++;
                     }
                 }
@@ -1797,7 +1798,6 @@ void Device::setup_tunnel_for_remote_devices() {
             uint16_t channel = tt::Cluster::instance().get_assigned_channel_for_device(device_id);
             CoreType dispatch_core_type = dispatch_core_manager::instance().get_dispatch_core_type(mmio_device_id);
             uint32_t cq_start = dispatch_constants::get(dispatch_core_type).get_host_command_queue_addr(CommandQueueHostAddrType::UNRESERVED);
-            auto [tensix_num_worker_cores, tensix_worker_physical_grid] = get_physical_worker_grid_config(device_id, num_hw_cqs, dispatch_core_type);
 
             dispatch_worker_build_settings_t settings = {};
             //allocations below are on mmio chip.
@@ -1861,7 +1861,6 @@ void Device::setup_tunnel_for_remote_devices() {
             uint32_t vc_count = 1 + (tunnel.size() - 1) * num_hw_cqs; // 1 return vc. outgoing vc count depends on tunnel size and cq size.
             uint16_t channel = tt::Cluster::instance().get_assigned_channel_for_device(device_id);
             CoreType dispatch_core_type = dispatch_core_manager::instance().get_dispatch_core_type(mmio_device_id);
-            auto [tensix_num_worker_cores, tensix_worker_physical_grid] = get_physical_worker_grid_config(device_id, num_hw_cqs, dispatch_core_type);
 
             dispatch_worker_build_settings_t settings = {};
             //allocations below are on mmio chip.
@@ -2027,7 +2026,6 @@ void Device::setup_tunnel_for_remote_devices() {
                 settings.cb_log_page_size = dispatch_constants::DISPATCH_BUFFER_LOG_PAGE_SIZE;
                 settings.cb_pages = dispatch_constants::get(dispatch_core_type).dispatch_buffer_pages();
                 settings.cb_size_bytes = (1 << settings.cb_log_page_size) * settings.cb_pages;
-                settings.compute_core_mcast_noc_coords = this->get_noc_multicast_encoding(dispatch_d_noc_index, tensix_worker_physical_grid);
                 CoreCoord compute_grid_size = this->compute_with_storage_grid_size();
                 settings.num_compute_cores = uint32_t(compute_grid_size.x * compute_grid_size.y);
                 tt_cxy_pair dispatch_d_location = dispatch_core_manager::instance().dispatcher_d_core(device_id, channel, cq_id);
@@ -2043,20 +2041,16 @@ void Device::setup_tunnel_for_remote_devices() {
                     // Initialize dispatch_s settings as invalid values. To be populated if dispatch_s is enabled.
                     settings.cb_log_page_size = dispatch_constants::DISPATCH_S_BUFFER_LOG_PAGE_SIZE;
                     settings.semaphores.push_back(0); // used by dispatch_s to sync with prefetch_d
-                    settings.semaphores.push_back(0); // dispatch_s waits on this until dispatch_d increments it
                     uint32_t dispatch_buffer_base = dispatch_constants::get(dispatch_core_type).dispatch_buffer_base();
                     if (dispatch_core_type == CoreType::WORKER) {
                         // dispatch_s is on the same Tensix core as dispatch_d. Shared resources. Offset CB start and sem idx.
                         settings.cb_start_address = dispatch_buffer_base + (1 << dispatch_constants::DISPATCH_BUFFER_LOG_PAGE_SIZE) *  dispatch_constants::get(dispatch_core_type).dispatch_buffer_pages();
                         settings.producer_semaphore_id = 2; // sync with producer (prefetcher)
-                        settings.consumer_semaphore_id = 3; // sync with dispatch_d (this is the "consumer" of dispatch_s)
                     } else {
                         // dispatch_d and dispatch_s are on different cores. No shared resources: dispatch_s CB and semaphores start at base.
                         settings.cb_start_address = dispatch_buffer_base;
                         settings.producer_semaphore_id = 0; // sync with producer (prefetcher)
-                        settings.consumer_semaphore_id = 1; // sync with dispatch_d (this is the "consumer" of dispatch_s)
                     }
-                    settings.compute_core_mcast_noc_coords = this->get_noc_multicast_encoding(dispatch_s_noc_index, tensix_worker_physical_grid);
                     tt_cxy_pair dispatch_s_location = dispatch_core_manager::instance().dispatcher_s_core(device_id, channel, cq_id);
                     settings.worker_physical_core = tt_cxy_pair(dispatch_s_location.chip, get_physical_core_coordinate(dispatch_s_location, dispatch_core_type));
                     settings.kernel_file = "tt_metal/impl/dispatch/kernels/cq_dispatch_slave.cpp";
@@ -2189,6 +2183,7 @@ void Device::compile_command_queue_programs() {
             uint32_t dev_completion_queue_wr_ptr = dispatch_constants::get(dispatch_core_type).get_device_command_queue_addr(CommandQueueDeviceAddrType::COMPLETION_Q_WR);
             uint32_t dev_completion_queue_rd_ptr = dispatch_constants::get(dispatch_core_type).get_device_command_queue_addr(CommandQueueDeviceAddrType::COMPLETION_Q_RD);
             uint32_t dispatch_message_addr = dispatch_constants::get(dispatch_core_type).get_device_command_queue_addr(CommandQueueDeviceAddrType::DISPATCH_MESSAGE);
+            uint32_t max_dispatch_message_entries = dispatch_constants::DISPATCH_MESSAGE_ENTRIES;
 
             const uint32_t prefetch_sync_sem = tt::tt_metal::CreateSemaphore(*command_queue_program_ptr, prefetch_core, 0, dispatch_core_type);
             const uint32_t prefetch_sem = tt::tt_metal::CreateSemaphore(*command_queue_program_ptr, prefetch_core, dispatch_constants::get(dispatch_core_type).dispatch_buffer_pages(), dispatch_core_type);
@@ -2200,7 +2195,7 @@ void Device::compile_command_queue_programs() {
             CoreCoord dispatch_s_physical_core = {0xff, 0xff};
             uint32_t dispatch_s_buffer_base = 0xff;
             uint32_t dispatch_s_sem = 0xff; // used by dispatch_s to sync with prefetch
-            uint32_t dispatch_s_sync_sem_id = 0xff; // used by dispatch_d to signal that dispatch_s can send go signal
+            uint32_t dispatch_s_sync_sem_base_addr = dispatch_constants::get(dispatch_core_type).get_device_command_queue_addr(CommandQueueDeviceAddrType::DISPATCH_S_SYNC_SEM);; // used by dispatch_d to signal that dispatch_s can send go signal
             if (this->dispatch_s_enabled()) {
                 // Skip allocating dispatch_s for multi-CQ configurations with ethernet dispatch
                 dispatch_s_core = dispatch_core_manager::instance().dispatcher_s_core(device_id, channel, cq_id);
@@ -2215,7 +2210,6 @@ void Device::compile_command_queue_programs() {
                     dispatch_s_buffer_base = dispatch_buffer_base;
                 }
                 dispatch_s_sem = tt::tt_metal::CreateSemaphore(*command_queue_program_ptr, dispatch_s_core, 0, dispatch_core_type); // used by dispatch_s to sync with prefetch
-                dispatch_s_sync_sem_id = tt::tt_metal::CreateSemaphore(*command_queue_program_ptr, dispatch_s_core, 0, dispatch_core_type); // used by dispatch_d to signal that dispatch_s can send go signal
             }
 
             log_debug(LogDevice, "Dispatching out of {} cores",  magic_enum::enum_name(dispatch_core_type));
@@ -2274,7 +2268,6 @@ void Device::compile_command_queue_programs() {
                 tt::llrt::OptionsG.get_watcher_enabled() && (not tt::llrt::OptionsG.get_watcher_noinline())
             );
 
-            auto [tensix_num_worker_cores, tensix_worker_physical_grid] = get_physical_worker_grid_config(this->id(), num_hw_cqs, dispatch_core_type);
             uint32_t tensix_worker_go_signal_addr = hal.get_dev_addr(HalProgrammableCoreType::TENSIX, HalL1MemAddrType::GO_MSG);
             uint32_t eth_worker_go_signal_addr = 0;
             if (hal.get_programmable_core_type_index(HalProgrammableCoreType::ACTIVE_ETH) != -1) {
@@ -2301,8 +2294,8 @@ void Device::compile_command_queue_programs() {
                 0,      // unused prefetch_local_downstream_sem_addr
                 0,      // unused prefetch_downstream_buffer_pages
                 num_compute_cores, // max_write_packed_cores
-                dispatch_s_sync_sem_id, // used to notify dispatch_s that its safe to send a go signal
-                this->get_noc_multicast_encoding(my_noc_index, tensix_worker_physical_grid), // used by dispatch_d to mcast go signals when dispatch_s is not enabled
+                dispatch_s_sync_sem_base_addr, // used to notify dispatch_s that its safe to send a go signal
+                max_dispatch_message_entries,
                 tensix_worker_go_signal_addr, // used by dispatch_d to mcast go signals when dispatch_s is not enabled
                 eth_worker_go_signal_addr, // used by dispatch_d to mcast go signals when dispatch_s is not enabled
                 dispatch_core_type == CoreType::ETH,
@@ -2335,13 +2328,12 @@ void Device::compile_command_queue_programs() {
                     dispatch_constants::get(dispatch_core_type).dispatch_s_buffer_size(),
                     dispatch_s_sem,
                     prefetch_dispatch_s_sync_sem,
-                    dispatch_s_sync_sem_id,
-                    this->get_noc_multicast_encoding(NOC::NOC_1, tensix_worker_physical_grid),
-                    tensix_num_worker_cores,
+                    dispatch_s_sync_sem_base_addr,
                     tensix_worker_go_signal_addr,
                     eth_worker_go_signal_addr,
                     dispatch_core_type == CoreType::ETH,
-                    dispatch_message_addr
+                    dispatch_message_addr,
+                    max_dispatch_message_entries,
                 };
                 configure_kernel_variant(
                     *command_queue_program_ptr,
@@ -2737,6 +2729,7 @@ void Device::configure_command_queue_programs() {
     }
 
     uint32_t prefetch_q_base = dispatch_constants::get(dispatch_core_type).get_device_command_queue_addr(CommandQueueDeviceAddrType::UNRESERVED);
+    uint32_t dispatch_message_entries = dispatch_constants::DISPATCH_MESSAGE_ENTRIES;
     for (uint8_t cq_id = 0; cq_id < num_hw_cqs; cq_id++) {
         tt_cxy_pair prefetch_location = dispatch_core_manager::instance().prefetcher_core(device_id, channel, cq_id);
         tt_cxy_pair completion_q_writer_location = dispatch_core_manager::instance().completion_queue_writer_core(device_id, channel, cq_id);
@@ -2758,7 +2751,8 @@ void Device::configure_command_queue_programs() {
         uint32_t prefetch_q_pcie_rd_ptr = dispatch_constants::get(dispatch_core_type).get_device_command_queue_addr(CommandQueueDeviceAddrType::PREFETCH_Q_PCIE_RD);
         uint32_t completion_q_wr_ptr = dispatch_constants::get(dispatch_core_type).get_device_command_queue_addr(CommandQueueDeviceAddrType::COMPLETION_Q_WR);
         uint32_t completion_q_rd_ptr = dispatch_constants::get(dispatch_core_type).get_device_command_queue_addr(CommandQueueDeviceAddrType::COMPLETION_Q_RD);
-        uint32_t dispatch_message_addr = dispatch_constants::get(dispatch_core_type).get_device_command_queue_addr(CommandQueueDeviceAddrType::DISPATCH_MESSAGE);
+        uint32_t dispatch_s_sync_sem_base_addr = dispatch_constants::get(dispatch_core_type).get_device_command_queue_addr(CommandQueueDeviceAddrType::DISPATCH_S_SYNC_SEM);
+        uint32_t dispatch_message_base_addr = dispatch_constants::get(dispatch_core_type).get_device_command_queue_addr(CommandQueueDeviceAddrType::DISPATCH_MESSAGE);
         uint32_t completion_q0_last_event_ptr = dispatch_constants::get(dispatch_core_type).get_device_command_queue_addr(CommandQueueDeviceAddrType::COMPLETION_Q0_LAST_EVENT);
         uint32_t completion_q1_last_event_ptr = dispatch_constants::get(dispatch_core_type).get_device_command_queue_addr(CommandQueueDeviceAddrType::COMPLETION_Q1_LAST_EVENT);
         std::vector<uint32_t> prefetch_q_pcie_rd_ptr_addr_data = {get_absolute_cq_offset(channel, cq_id, cq_size) + cq_start};
@@ -2781,16 +2775,24 @@ void Device::configure_command_queue_programs() {
         detail::WriteToDeviceL1(mmio_device, completion_q_writer_location, completion_q1_last_event_ptr, zero, dispatch_core_type);
 
         // Initialize address where workers signal completion to dispatch core(s).
-        if (this->distributed_dispatcher()) {
-            // Ethernet dispatch with a single CQ. dispatch_s and dispatch_d are on different cores. Initialize counter for both to zero.
-            tt_cxy_pair dispatch_s_location = dispatch_core_manager::instance().dispatcher_s_core(device_id, channel, cq_id);
-            detail::WriteToDeviceL1(this, dispatch_s_location, dispatch_message_addr, zero, dispatch_core_type);
-        }
-        detail::WriteToDeviceL1(mmio_device, dispatch_location, dispatch_message_addr, zero, dispatch_core_type);
-        if (device_id != mmio_device_id) {
-            tt_cxy_pair dispatch_d_location = dispatch_core_manager::instance().dispatcher_d_core(device_id, channel, cq_id);
-            dispatch_core_type = dispatch_core_manager::instance().get_dispatch_core_type(device_id);
-            detail::WriteToDeviceL1(this, dispatch_d_location, dispatch_message_addr, zero, dispatch_core_type);
+        // TODO: Should only initialize dispatch_s_sync_sem if this->dispatch_s_enabled()?
+        for (uint32_t i = 0; i < dispatch_message_entries; i++) {
+            uint32_t dispatch_s_sync_sem_addr = dispatch_s_sync_sem_base_addr + dispatch_constants::get(dispatch_core_type).get_dispatch_message_offset(i);
+            uint32_t dispatch_message_addr = dispatch_message_base_addr + dispatch_constants::get(dispatch_core_type).get_dispatch_message_offset(i);
+            if (this->distributed_dispatcher()) {
+                // Ethernet dispatch with a single CQ. dispatch_s and dispatch_d are on different cores. Initialize counter for both to zero.
+                tt_cxy_pair dispatch_s_location = dispatch_core_manager::instance().dispatcher_s_core(device_id, channel, cq_id);
+                detail::WriteToDeviceL1(this, dispatch_s_location, dispatch_s_sync_sem_addr, zero, dispatch_core_type);
+                detail::WriteToDeviceL1(this, dispatch_s_location, dispatch_message_addr, zero, dispatch_core_type);
+            }
+            detail::WriteToDeviceL1(mmio_device, dispatch_location, dispatch_s_sync_sem_addr, zero, dispatch_core_type);
+            detail::WriteToDeviceL1(mmio_device, dispatch_location, dispatch_message_addr, zero, dispatch_core_type);
+            if (device_id != mmio_device_id) {
+                tt_cxy_pair dispatch_d_location = dispatch_core_manager::instance().dispatcher_d_core(device_id, channel, cq_id);
+                CoreType remote_dispatch_core_type = dispatch_core_manager::instance().get_dispatch_core_type(device_id);
+                detail::WriteToDeviceL1(this, dispatch_d_location, dispatch_s_sync_sem_addr, zero, remote_dispatch_core_type);
+                detail::WriteToDeviceL1(this, dispatch_d_location, dispatch_message_addr, zero, remote_dispatch_core_type);
+            }
         }
     }
 
@@ -2881,9 +2883,20 @@ void Device::init_command_queue_device() {
             }
         }
     }
+    auto dispatch_core_type = dispatch_core_manager::instance().get_dispatch_core_type(this->id());
+    auto dispatch_go_signal_noc = this->dispatch_go_signal_noc();
+    const auto& [tensix_num_worker_cores, tensix_worker_physical_grid] = get_physical_worker_grid_config(this->id(), this->num_hw_cqs(), dispatch_core_type);
+    this->noc_mcast_data_ = {this->get_noc_multicast_encoding(dispatch_go_signal_noc, tensix_worker_physical_grid), tensix_num_worker_cores};
+    // TODO: avoid copying?
+    const auto& noc_unicast_data = this->get_noc_encoding_for_active_eth_cores(dispatch_go_signal_noc);
+    this->noc_unicast_data_ = vector_memcpy_aligned<uint32_t>(noc_unicast_data.begin(), noc_unicast_data.end());
+    this->noc_mcast_unicast_data_.clear();
+    this->noc_mcast_unicast_data_.reserve(this->noc_mcast_data_.size() + this->noc_unicast_data_.size());
+    this->noc_mcast_unicast_data_.insert(this->noc_mcast_unicast_data_.end(), this->noc_mcast_data_.begin(), this->noc_mcast_data_.end());
+    this->noc_mcast_unicast_data_.insert(this->noc_mcast_unicast_data_.end(), this->noc_unicast_data_.begin(), this->noc_unicast_data_.end());
     // TODO: Move this inside the command queue
     for (auto& hw_cq : this->hw_command_queues_) {
-        hw_cq->set_unicast_only_cores_on_dispatch(this->get_noc_encoding_for_active_eth_cores(this->dispatch_s_enabled() ? NOC::NOC_1 : NOC::NOC_0));
+        hw_cq->set_num_worker_sems_on_dispatch(this->num_sub_devices());
     }
     // Added this for safety while debugging hangs with FD v1.3 tunnel to R, should experiment with removing it
     // tt::Cluster::instance().l1_barrier(this->id());
@@ -2912,7 +2925,7 @@ bool Device::initialize(const uint8_t num_hw_cqs, size_t l1_small_size, size_t t
     this->initialize_allocator(l1_small_size, trace_region_size, l1_bank_remap);
     this->initialize_build();
     // Reset the launch_message ring buffer state seen on host, since its reset on device, each time FW is initialized
-    this->worker_launch_message_buffer_state.reset();
+    std::for_each(this->worker_launch_message_buffer_state.begin(), this->worker_launch_message_buffer_state.end(), std::mem_fn(&LaunchMessageRingBufferState::reset));
     // For minimal setup, don't initialize FW, watcher, dprint. They won't work if we're attaching to a hung chip.
     if (minimal)
         return true;
@@ -3141,6 +3154,29 @@ void Device::check_allocator_is_initialized() const {
     }
 }
 
+void Device::reset_num_sub_devices(uint32_t num_sub_devices) {
+    TT_FATAL((num_sub_devices >=1 && num_sub_devices <= Device::MAX_NUM_SUB_DEVICES), "Illegal number of sub devices specified");
+    // Finish all running programs
+    Synchronize(this);
+
+    // Set new number of worker sems on dispatch_s
+    for (auto& hw_cq : this->hw_command_queues_) {
+        // Only need to reset launch messages once, so reset on cq 0
+        TT_FATAL(!hw_cq->manager.get_bypass_mode(), "Cannot reset worker state during trace capture");
+        hw_cq->reset_worker_state(hw_cq->id == 0);
+        hw_cq->set_num_worker_sems_on_dispatch(num_sub_devices);
+        // Reset the config buffer mgr (is this needed?)
+        hw_cq->reset_config_buffer_mgr(num_sub_devices);
+    }
+    // Reset the launch_message ring buffer state seen on host
+    std::for_each(this->worker_launch_message_buffer_state.begin(), this->worker_launch_message_buffer_state.begin() + num_sub_devices, std::mem_fn(&LaunchMessageRingBufferState::reset));
+}
+
+uint32_t Device::num_sub_devices() const {
+    // TODO: This will query the active sub-device manager
+    return Device::DEFAULT_NUM_SUB_DEVICES;
+}
+
 uint32_t Device::num_banks(const BufferType &buffer_type) const {
     this->check_allocator_is_initialized();
     return allocator::num_banks(*this->allocator_, buffer_type);
@@ -3220,6 +3256,16 @@ void Device::deallocate_buffers(){
     allocator::deallocate_buffers(*allocator_);
 }
 
+std::optional<DeviceAddr> Device::lowest_occupied_l1_address(uint32_t bank_id, tt::stl::Span<const uint32_t> sub_device_ids) const {
+    this->check_allocator_is_initialized();
+    // TODO: This will query the active sub-device manager
+    TT_FATAL(sub_device_ids.size() <= 1, "Invalid number of sub-devices {}", sub_device_ids.size());
+    if (sub_device_ids.size() == 1) {
+        TT_FATAL(sub_device_ids[0] == 0, "Invalid sub-device id {}", sub_device_ids[0]);
+    }
+    return allocator::lowest_occupied_l1_address(*this->allocator_, bank_id);
+}
+
 float Device::sfpu_eps() const {
     switch (arch()) {
         case tt::ARCH::GRAYSKULL: return tt::tt_metal::EPS_GS;
@@ -3355,15 +3401,6 @@ void Device::end_trace(const uint8_t cq_id, const uint32_t tid) {
     TT_FATAL(this->hw_command_queues_[cq_id]->tid == tid, "CQ {} is not being used for tracing tid {}", (uint32_t)cq_id, tid);
     TT_FATAL(this->trace_buffer_pool_.count(tid) > 0, "Trace instance {} must exist on device", tid);
     this->hw_command_queues_[cq_id]->record_end();
-    auto &trace_data = this->trace_buffer_pool_[tid]->desc->data;
-    trace_data = std::move(this->sysmem_manager().get_bypass_data());
-    // Add command to terminate the trace buffer
-    uint32_t cq_prefetch_cmd_bare_min_size = hal.get_alignment(HalMemType::HOST);
-    DeviceCommand command_sequence(cq_prefetch_cmd_bare_min_size);
-    command_sequence.add_prefetch_exec_buf_end();
-    for (int i = 0; i < command_sequence.size_bytes() / sizeof(uint32_t); i++) {
-        trace_data.push_back(((uint32_t*)command_sequence.data())[i]);
-    }
     Trace::initialize_buffer(this->command_queue(cq_id), this->trace_buffer_pool_[tid]);
     this->MarkAllocationsUnsafe();
 }
@@ -3446,6 +3483,48 @@ size_t Device::get_device_kernel_defines_hash() {
     return tt::utils::DefinesHash{}(this->device_kernel_defines_);
 }
 
+const vector_memcpy_aligned<uint32_t>& Device::noc_mcast_data(uint32_t sub_device_id) const {
+    // TODO: This will query the active sub-device manager
+    TT_FATAL(sub_device_id < Device::DEFAULT_NUM_SUB_DEVICES, "sub_device_id {} is out of range", sub_device_id);
+    return this->noc_mcast_data_;
+}
+const vector_memcpy_aligned<uint32_t>& Device::noc_unicast_data(uint32_t sub_device_id) const {
+    // TODO: This will query the active sub-device manager
+    TT_FATAL(sub_device_id < Device::DEFAULT_NUM_SUB_DEVICES, "sub_device_id {} is out of range", sub_device_id);
+    return this->noc_unicast_data_;
+}
+
+const vector_memcpy_aligned<uint32_t>& Device::noc_mcast_unicast_data(uint32_t sub_device_id, bool mcast_data, bool unicast_data) const {
+    // TODO: This will query the active sub-device manager
+    TT_FATAL(sub_device_id < Device::DEFAULT_NUM_SUB_DEVICES, "sub_device_id {} is out of range", sub_device_id);
+    if (mcast_data && unicast_data) {
+        return this->noc_mcast_unicast_data_;
+    } else if (mcast_data) {
+        return this->noc_mcast_data_;
+    } else if (unicast_data) {
+        return this->noc_unicast_data_;
+    } else {
+        // Needed for compatibility with tests that create programs with no kernels
+        static const vector_memcpy_aligned<uint32_t> empty = {};
+        return empty;
+    }
+}
+
+uint32_t Device::num_noc_mcast_txns(uint32_t sub_device_id) const {
+    return this->noc_mcast_data(sub_device_id).size() / 2;
+}
+uint32_t Device::num_noc_unicast_txns(uint32_t sub_device_id) const {
+    return this->noc_unicast_data(sub_device_id).size();
+}
+
+uint32_t Device::num_noc_mcast_unicast_txns(uint32_t sub_device_id, bool mcast_data, bool unicast_data) const {
+    return (mcast_data ? this->num_noc_mcast_txns(sub_device_id) : 0) + (unicast_data ? this->num_noc_unicast_txns(sub_device_id) : 0);
+}
+
+NOC Device::dispatch_go_signal_noc() const {
+    return this->dispatch_s_enabled() ? NOC::NOC_1 : NOC::NOC_0;
+}
+
 }  // namespace tt_metal
 
 }  // namespace tt
diff --git a/tt_metal/impl/device/device.hpp b/tt_metal/impl/device/device.hpp
index fb5214c71be..c1f4fe72188 100644
--- a/tt_metal/impl/device/device.hpp
+++ b/tt_metal/impl/device/device.hpp
@@ -63,6 +63,9 @@ inline namespace v0 {
 
 // A physical PCIexpress Tenstorrent device
 class Device {
+   private:
+    static constexpr uint32_t MAX_NUM_SUB_DEVICES = dispatch_constants::DISPATCH_MESSAGE_ENTRIES;
+    static constexpr uint32_t DEFAULT_NUM_SUB_DEVICES = 1;
    public:
     // friend void tt_gdb(Device* device, int chip_id, const vector<CoreCoord> cores, vector<string> ops);
     Device () = delete;
@@ -137,9 +140,7 @@ class Device {
 
     bool is_inactive_ethernet_core(CoreCoord logical_core) const;
 
-    uint32_t num_eth_worker_cores() const;
-
-    uint32_t num_worker_cores() const;
+    uint32_t num_worker_cores(HalProgrammableCoreType core_type, uint32_t sub_device_id) const;
 
     std::tuple<chip_id_t, CoreCoord> get_connected_ethernet_core(CoreCoord eth_core) const {
         return tt::Cluster::instance().get_connected_ethernet_core(std::make_tuple(this->id_, eth_core));
@@ -157,6 +158,8 @@ class Device {
 
     void update_workers_build_settings(std::vector<std::vector<std::tuple<tt_cxy_pair, dispatch_worker_build_settings_t>>> &device_worker_variants);
 
+    uint32_t num_sub_devices() const;
+
     uint32_t num_banks(const BufferType &buffer_type) const;
     uint32_t bank_size(const BufferType &buffer_type) const;
 
@@ -200,6 +203,8 @@ class Device {
 
     void deallocate_buffers();
 
+    std::optional<DeviceAddr> lowest_occupied_l1_address(uint32_t bank_id, tt::stl::Span<const uint32_t> sub_device_ids) const;
+
     // machine epsilon
     float sfpu_eps() const;
 
@@ -301,14 +306,13 @@ class Device {
     uint32_t worker_thread_core;
     uint32_t completion_queue_reader_core;
     std::unique_ptr<SystemMemoryManager> sysmem_manager_;
-    LaunchMessageRingBufferState worker_launch_message_buffer_state;
+    std::array<LaunchMessageRingBufferState, Device::MAX_NUM_SUB_DEVICES> worker_launch_message_buffer_state;
     uint8_t num_hw_cqs_;
 
     std::vector<std::unique_ptr<Program>> command_queue_programs;
     bool using_fast_dispatch;
     program_cache::detail::ProgramCache program_cache;
-    uint32_t num_worker_cores_;
-    uint32_t num_eth_worker_cores_;
+
     // Program cache interface. Syncrhonize with worker worker threads before querying or
     // modifying this structure, since worker threads use this for compiling ops
     void enable_program_cache() {
@@ -329,8 +333,8 @@ class Device {
         return program_cache.num_entries();
     }
 
-   uint32_t trace_buffers_size = 0;
-   void update_dispatch_cores_for_multi_cq_eth_dispatch();
+    uint32_t trace_buffers_size = 0;
+    void update_dispatch_cores_for_multi_cq_eth_dispatch();
 
     HalProgrammableCoreType get_programmable_core_type(CoreCoord phys_core) const;
     template <typename T = DeviceAddr>
@@ -345,11 +349,29 @@ class Device {
     bool distributed_dispatcher() const;
     size_t get_device_kernel_defines_hash();
 
+    const vector_memcpy_aligned<uint32_t>& noc_mcast_data(uint32_t sub_device_id) const;
+    const vector_memcpy_aligned<uint32_t>& noc_unicast_data(uint32_t sub_device_id) const;
+    const vector_memcpy_aligned<uint32_t>& noc_mcast_unicast_data(uint32_t sub_device_id, bool mcast_data=true, bool unicast_data=true) const;
+    uint32_t num_noc_mcast_txns(uint32_t sub_device_id) const;
+    uint32_t num_noc_unicast_txns(uint32_t sub_device_id) const;
+    uint32_t num_noc_mcast_unicast_txns(uint32_t sub_device_id, bool mcast_data=true, bool unicast_data=true) const;
+
    private:
+    void reset_num_sub_devices(uint32_t num_sub_devices);
+    NOC dispatch_go_signal_noc() const;
+
     void MarkAllocationsUnsafe();
     void MarkAllocationsSafe();
     std::unordered_map<uint32_t, std::shared_ptr<TraceBuffer>> trace_buffer_pool_;
     std::map<std::string, std::string> device_kernel_defines_;
+
+    // Data structures queried when no SubDeviceManager is active
+    // Otherwise this data comes from the SubDeviceManager
+    // TODO: Encapsulate the default case in a SubDeviceManager as well?
+    std::array<uint32_t, NumHalProgrammableCoreTypes> num_worker_cores_{};
+    vector_memcpy_aligned<uint32_t> noc_mcast_data_;
+    vector_memcpy_aligned<uint32_t> noc_unicast_data_;
+    vector_memcpy_aligned<uint32_t> noc_mcast_unicast_data_;
 };
 
 }  // namespace v0
diff --git a/tt_metal/impl/dispatch/command_queue.cpp b/tt_metal/impl/dispatch/command_queue.cpp
index c23ea335737..00231c7d710 100644
--- a/tt_metal/impl/dispatch/command_queue.cpp
+++ b/tt_metal/impl/dispatch/command_queue.cpp
@@ -45,9 +45,6 @@ using std::set;
 using std::shared_ptr;
 using std::unique_ptr;
 
-std::mutex finish_mutex;
-std::condition_variable finish_cv;
-
 namespace tt::tt_metal {
 
 namespace detail {
@@ -79,7 +76,7 @@ EnqueueReadBufferCommand::EnqueueReadBufferCommand(
     Buffer& buffer,
     void* dst,
     SystemMemoryManager& manager,
-    uint32_t expected_num_workers_completed,
+    tt::stl::Span<const std::pair<uint32_t, uint32_t>> expected_num_workers_completed,
     uint32_t src_page_index,
     std::optional<uint32_t> pages_to_read) :
     command_queue_id(command_queue_id),
@@ -113,9 +110,10 @@ void EnqueueReadShardedBufferCommand::add_prefetch_relay(HugepageDeviceCommand&
 }
 
 void EnqueueReadBufferCommand::process() {
+    uint32_t num_worker_counters = this->expected_num_workers_completed.size();
     // accounts for padding
     uint32_t cmd_sequence_sizeB =
-        CQ_PREFETCH_CMD_BARE_MIN_SIZE +  // CQ_PREFETCH_CMD_RELAY_INLINE + CQ_DISPATCH_CMD_WAIT
+        CQ_PREFETCH_CMD_BARE_MIN_SIZE * num_worker_counters +  // CQ_PREFETCH_CMD_RELAY_INLINE + CQ_DISPATCH_CMD_WAIT
         CQ_PREFETCH_CMD_BARE_MIN_SIZE +  // CQ_PREFETCH_CMD_STALL
         CQ_PREFETCH_CMD_BARE_MIN_SIZE +  // CQ_PREFETCH_CMD_RELAY_INLINE_NOFLUSH + CQ_DISPATCH_CMD_WRITE_LINEAR_HOST
         CQ_PREFETCH_CMD_BARE_MIN_SIZE;   // CQ_PREFETCH_CMD_RELAY_LINEAR or CQ_PREFETCH_CMD_RELAY_PAGED
@@ -124,10 +122,20 @@ void EnqueueReadBufferCommand::process() {
 
     HugepageDeviceCommand command_sequence(cmd_region, cmd_sequence_sizeB);
 
-    uint32_t dispatch_message_addr = dispatch_constants::get(
-        this->dispatch_core_type).get_device_command_queue_addr(CommandQueueDeviceAddrType::DISPATCH_MESSAGE);
+    uint32_t dispatch_message_base_addr = dispatch_constants::get(dispatch_core_type).get_device_command_queue_addr(CommandQueueDeviceAddrType::DISPATCH_MESSAGE);
+    uint32_t last_index = num_worker_counters - 1;
+    // We only need the write barrier + prefetch stall for the last wait cmd
+    for (uint32_t i = 0; i < last_index; ++i) {
+        auto [offset_index, workers_completed] = this->expected_num_workers_completed[i];
+        uint32_t dispatch_message_addr = dispatch_message_base_addr + dispatch_constants::get(dispatch_core_type).get_dispatch_message_offset(offset_index);
+        command_sequence.add_dispatch_wait(
+            false, dispatch_message_addr, workers_completed);
+
+    }
+    auto [offset_index, workers_completed] = this->expected_num_workers_completed[last_index];
+    uint32_t dispatch_message_addr = dispatch_message_base_addr + dispatch_constants::get(dispatch_core_type).get_dispatch_message_offset(offset_index);
     command_sequence.add_dispatch_wait_with_prefetch_stall(
-        true, dispatch_message_addr, this->expected_num_workers_completed);
+        true, dispatch_message_addr, workers_completed);
 
     uint32_t padded_page_size = this->buffer.aligned_page_size();
     bool flush_prefetch = false;
@@ -152,7 +160,7 @@ EnqueueWriteBufferCommand::EnqueueWriteBufferCommand(
     const void* src,
     SystemMemoryManager& manager,
     bool issue_wait,
-    uint32_t expected_num_workers_completed,
+    tt::stl::Span<const std::pair<uint32_t, uint32_t>> expected_num_workers_completed,
     uint32_t bank_base_address,
     uint32_t padded_page_size,
     uint32_t dst_page_index,
@@ -276,6 +284,7 @@ void EnqueueWriteShardedBufferCommand::add_buffer_data(HugepageDeviceCommand& co
 }
 
 void EnqueueWriteBufferCommand::process() {
+    uint32_t num_worker_counters = this->expected_num_workers_completed.size();
     uint32_t data_size_bytes = this->pages_to_write * this->padded_page_size;
 
     uint32_t cmd_sequence_sizeB =
@@ -283,7 +292,7 @@ void EnqueueWriteBufferCommand::process() {
                                          // CQ_DISPATCH_CMD_WRITE_LINEAR)
         data_size_bytes;
     if (this->issue_wait) {
-        cmd_sequence_sizeB += CQ_PREFETCH_CMD_BARE_MIN_SIZE;  // CQ_PREFETCH_CMD_RELAY_INLINE + CQ_DISPATCH_CMD_WAIT
+        cmd_sequence_sizeB += CQ_PREFETCH_CMD_BARE_MIN_SIZE * num_worker_counters;  // CQ_PREFETCH_CMD_RELAY_INLINE + CQ_DISPATCH_CMD_WAIT
     }
 
     void* cmd_region = this->manager.issue_queue_reserve(cmd_sequence_sizeB, this->command_queue_id);
@@ -291,9 +300,13 @@ void EnqueueWriteBufferCommand::process() {
     HugepageDeviceCommand command_sequence(cmd_region, cmd_sequence_sizeB);
 
     if (this->issue_wait) {
-        uint32_t dispatch_message_addr = dispatch_constants::get(
+        uint32_t dispatch_message_base_addr = dispatch_constants::get(
             this->dispatch_core_type).get_device_command_queue_addr(CommandQueueDeviceAddrType::DISPATCH_MESSAGE);
-        command_sequence.add_dispatch_wait(false, dispatch_message_addr, this->expected_num_workers_completed);
+        for (uint32_t i = 0; i < num_worker_counters; ++i) {
+            auto [offset_index, workers_completed] = this->expected_num_workers_completed[i];
+            uint32_t dispatch_message_addr = dispatch_message_base_addr + dispatch_constants::get(dispatch_core_type).get_dispatch_message_offset(offset_index);
+            command_sequence.add_dispatch_wait(false, dispatch_message_addr, workers_completed);
+        }
     }
 
     this->add_dispatch_write(command_sequence);
@@ -311,7 +324,7 @@ void EnqueueWriteBufferCommand::process() {
 }
 
 inline uint32_t get_packed_write_max_unicast_sub_cmds(Device* device) {
-    return device->num_worker_cores();
+    return device->compute_with_storage_grid_size().x * device->compute_with_storage_grid_size().y;
 }
 
 // EnqueueProgramCommand Section
@@ -326,21 +339,24 @@ EnqueueProgramCommand::EnqueueProgramCommand(
     WorkerConfigBufferMgr& config_buffer_mgr,
     uint32_t expected_num_workers_completed,
     uint32_t multicast_cores_launch_message_wptr,
-    uint32_t unicast_cores_launch_message_wptr) :
+    uint32_t unicast_cores_launch_message_wptr,
+    uint32_t sub_device_id) :
     command_queue_id(command_queue_id),
     noc_index(noc_index),
     manager(manager),
     config_buffer_mgr(config_buffer_mgr),
     expected_num_workers_completed(expected_num_workers_completed),
     program(program),
-    dispatch_core(dispatch_core) {
+    dispatch_core(dispatch_core),
+    multicast_cores_launch_message_wptr(multicast_cores_launch_message_wptr),
+    unicast_cores_launch_message_wptr(unicast_cores_launch_message_wptr),
+    sub_device_id(sub_device_id) {
     this->device = device;
     this->dispatch_core_type = dispatch_core_manager::instance().get_dispatch_core_type(device->id());
     this->packed_write_max_unicast_sub_cmds = get_packed_write_max_unicast_sub_cmds(this->device);
     this->dispatch_message_addr = dispatch_constants::get(
-        this->dispatch_core_type).get_device_command_queue_addr(CommandQueueDeviceAddrType::DISPATCH_MESSAGE);
-    this->multicast_cores_launch_message_wptr = multicast_cores_launch_message_wptr;
-    this->unicast_cores_launch_message_wptr = unicast_cores_launch_message_wptr;
+        this->dispatch_core_type).get_device_command_queue_addr(CommandQueueDeviceAddrType::DISPATCH_MESSAGE) +
+        dispatch_constants::get(this->dispatch_core_type).get_dispatch_message_offset(this->sub_device_id);
 }
 
 void EnqueueProgramCommand::assemble_preamble_commands(
@@ -1085,7 +1101,13 @@ void EnqueueProgramCommand::assemble_device_commands(
     cmd_sequence_sizeB += (this->device->dispatch_s_enabled() || program_transfer_info.num_active_cores > 0) * CQ_PREFETCH_CMD_BARE_MIN_SIZE;
 
     // either dispatch_s or dispatch_d will send the go signal (go_signal_mcast command)
-    cmd_sequence_sizeB += CQ_PREFETCH_CMD_BARE_MIN_SIZE;
+    const auto& noc_mcast_unicast_data = device->noc_mcast_unicast_data(this->sub_device_id, multicast_go_signal_sub_cmds.size() > 0,  unicast_go_signal_sub_cmds.size() > 0);
+    const auto& num_noc_mcast_txns = multicast_go_signal_sub_cmds.size() > 0 ? device->num_noc_mcast_txns(this->sub_device_id) : 0;
+    const auto& num_noc_unicast_txns = unicast_go_signal_sub_cmds.size() > 0 ? device->num_noc_unicast_txns(this->sub_device_id) : 0;
+    cmd_sequence_sizeB += align(
+        sizeof(CQPrefetchCmd) + sizeof(CQDispatchCmd) +
+        noc_mcast_unicast_data.size() * sizeof(uint32_t),
+        pcie_alignment);
 
     program_command_sequence.device_command_sequence = HostMemDeviceCommand(cmd_sequence_sizeB);
 
@@ -1208,9 +1230,7 @@ void EnqueueProgramCommand::assemble_device_commands(
     // Get the address for the slot this launch_message will be written to
     uint32_t multicast_launch_msg_addr = hal.get_dev_addr(HalProgrammableCoreType::TENSIX, HalL1MemAddrType::LAUNCH) + this->multicast_cores_launch_message_wptr * sizeof(launch_msg_t);
 
-    uint8_t go_signal_mcast_flag = 0x0;
     if (multicast_go_signal_sub_cmds.size() > 0) {
-        go_signal_mcast_flag |= (uint8_t)GoSignalMcastSettings::SEND_MCAST;
         uint32_t curr_sub_cmd_idx = 0;
         for (const auto& [num_sub_cmds_in_cmd, multicast_go_signal_payload_sizeB] : multicast_go_signals_payload) {
             uint32_t write_offset_bytes = device_command_sequence.write_offset_bytes();
@@ -1239,7 +1259,6 @@ void EnqueueProgramCommand::assemble_device_commands(
 
     if (unicast_go_signal_sub_cmds.size() > 0) {
         uint32_t unicast_launch_msg_addr = hal.get_dev_addr(HalProgrammableCoreType::ACTIVE_ETH, HalL1MemAddrType::LAUNCH) + this->unicast_cores_launch_message_wptr * sizeof(launch_msg_t);
-        go_signal_mcast_flag |= (uint8_t)GoSignalMcastSettings::SEND_UNICAST;
         uint32_t curr_sub_cmd_idx = 0;
         for (const auto& [num_sub_cmds_in_cmd, unicast_go_signal_payload_sizeB] : unicast_go_signals_payload) {
             uint32_t write_offset_bytes = device_command_sequence.write_offset_bytes();
@@ -1269,7 +1288,9 @@ void EnqueueProgramCommand::assemble_device_commands(
     DispatcherSelect dispatcher_for_go_signal = DispatcherSelect::DISPATCH_MASTER;
     if (this->device->dispatch_s_enabled()) {
         // dispatch_d signals dispatch_s to send the go signal, use a barrier if there are cores active
-        device_command_sequence.add_notify_dispatch_s_go_signal_cmd(program_transfer_info.num_active_cores > 0);
+        uint16_t index_bitmask = 0;
+        index_bitmask |= 1 << this->sub_device_id;
+        device_command_sequence.add_notify_dispatch_s_go_signal_cmd(program_transfer_info.num_active_cores > 0, index_bitmask);
         dispatcher_for_go_signal = DispatcherSelect::DISPATCH_SLAVE;
     } else {
         // Wait Noc Write Barrier, wait for binaries/configs and launch_msg to be written to worker cores
@@ -1281,8 +1302,9 @@ void EnqueueProgramCommand::assemble_device_commands(
     run_program_go_signal.signal = RUN_MSG_GO;
     run_program_go_signal.master_x = (uint8_t)this->dispatch_core.x;
     run_program_go_signal.master_y = (uint8_t)this->dispatch_core.y;
+    run_program_go_signal.dispatch_message_offset = (uint8_t)dispatch_constants::get(this->dispatch_core_type).get_dispatch_message_offset(this->sub_device_id);
     uint32_t write_offset_bytes = device_command_sequence.write_offset_bytes();
-    device_command_sequence.add_dispatch_go_signal_mcast(this->expected_num_workers_completed, go_signal_mcast_flag, *reinterpret_cast<uint32_t*>(&run_program_go_signal), this->dispatch_message_addr, dispatcher_for_go_signal);
+    device_command_sequence.add_dispatch_go_signal_mcast(this->expected_num_workers_completed, *reinterpret_cast<uint32_t*>(&run_program_go_signal), this->dispatch_message_addr, num_noc_mcast_txns, num_noc_unicast_txns, noc_mcast_unicast_data, dispatcher_for_go_signal);
     program_command_sequence.mcast_go_signal_cmd_ptr = &((CQDispatchCmd*) ((uint32_t*)device_command_sequence.data() + (write_offset_bytes + sizeof(CQPrefetchCmd)) / sizeof(uint32_t)))->mcast;
 }
 
@@ -1331,6 +1353,7 @@ void EnqueueProgramCommand::update_device_commands(
     run_program_go_signal.signal = RUN_MSG_GO;
     run_program_go_signal.master_x = (uint8_t)this->dispatch_core.x;
     run_program_go_signal.master_y = (uint8_t)this->dispatch_core.y;
+    run_program_go_signal.dispatch_message_offset = (uint8_t)dispatch_constants::get(this->dispatch_core_type).get_dispatch_message_offset(this->sub_device_id);
     cached_program_command_sequence.mcast_go_signal_cmd_ptr->go_signal =  *reinterpret_cast<uint32_t*>(&run_program_go_signal);
     cached_program_command_sequence.mcast_go_signal_cmd_ptr->wait_count = this->expected_num_workers_completed;
 }
@@ -1357,8 +1380,7 @@ void EnqueueProgramCommand::write_program_command_sequence(
     uint32_t total_fetch_size_bytes =
         stall_fetch_size_bytes + preamble_fetch_size_bytes + runtime_args_fetch_size_bytes + program_fetch_size_bytes;
 
-    CoreType dispatch_core_type = dispatch_core_manager::instance().get_dispatch_core_type(this->device->id());
-    if (total_fetch_size_bytes <= dispatch_constants::get(dispatch_core_type).max_prefetch_command_size()) {
+    if (total_fetch_size_bytes <= dispatch_constants::get(this->dispatch_core_type).max_prefetch_command_size()) {
         this->manager.issue_queue_reserve(total_fetch_size_bytes, this->command_queue_id);
         uint32_t write_ptr = this->manager.get_issue_queue_write_ptr(this->command_queue_id);
 
@@ -1510,10 +1532,10 @@ void EnqueueProgramCommand::process() {
     }
     uint32_t num_workers = 0;
     if (program.runs_on_noc_multicast_only_cores()) {
-        num_workers += device->num_worker_cores();
+        num_workers += device->num_worker_cores(HalProgrammableCoreType::TENSIX, this->sub_device_id);
     }
     if (program.runs_on_noc_unicast_only_cores()) {
-        num_workers += device->num_eth_worker_cores();
+        num_workers += device->num_worker_cores(HalProgrammableCoreType::ACTIVE_ETH, this->sub_device_id);
     }
     this->config_buffer_mgr.alloc(this->expected_num_workers_completed + num_workers);
     std::vector<ConfigBufferEntry>& kernel_config_addrs_raw = reservation.second;
@@ -1579,7 +1601,7 @@ EnqueueRecordEventCommand::EnqueueRecordEventCommand(
     NOC noc_index,
     SystemMemoryManager& manager,
     uint32_t event_id,
-    uint32_t expected_num_workers_completed,
+    tt::stl::Span<const std::pair<uint32_t, uint32_t>> expected_num_workers_completed,
     bool clear_count,
     bool write_barrier) :
     command_queue_id(command_queue_id),
@@ -1603,9 +1625,10 @@ void EnqueueRecordEventCommand::process() {
         align(sizeof(CQDispatchCmd) + num_hw_cqs * sizeof(CQDispatchWritePackedUnicastSubCmd), l1_alignment) +
         (align(dispatch_constants::EVENT_PADDED_SIZE, l1_alignment) * num_hw_cqs);
     uint32_t packed_write_sizeB = align(sizeof(CQPrefetchCmd) + packed_event_payload_sizeB, pcie_alignment);
+    uint32_t num_worker_counters = this->expected_num_workers_completed.size();
 
     uint32_t cmd_sequence_sizeB =
-        CQ_PREFETCH_CMD_BARE_MIN_SIZE +  // CQ_PREFETCH_CMD_RELAY_INLINE + CQ_DISPATCH_CMD_WAIT
+        CQ_PREFETCH_CMD_BARE_MIN_SIZE * num_worker_counters +  // CQ_PREFETCH_CMD_RELAY_INLINE + CQ_DISPATCH_CMD_WAIT
         packed_write_sizeB +  // CQ_PREFETCH_CMD_RELAY_INLINE + CQ_DISPATCH_CMD_WRITE_PACKED + unicast subcmds + event
                               // payload
         align(
@@ -1617,11 +1640,22 @@ void EnqueueRecordEventCommand::process() {
     HugepageDeviceCommand command_sequence(cmd_region, cmd_sequence_sizeB);
 
     CoreType dispatch_core_type = dispatch_core_manager::instance().get_dispatch_core_type(this->device->id());
-    uint32_t dispatch_message_addr = dispatch_constants::get(
+    uint32_t dispatch_message_base_addr = dispatch_constants::get(
         dispatch_core_type).get_device_command_queue_addr(CommandQueueDeviceAddrType::DISPATCH_MESSAGE);
 
+    uint32_t last_index = num_worker_counters - 1;
+    // We only need the write barrier for the last wait cmd
+    for (uint32_t i = 0; i < last_index; ++i) {
+        auto [offset_index, workers_completed] = this->expected_num_workers_completed[i];
+        uint32_t dispatch_message_addr = dispatch_message_base_addr + dispatch_constants::get(dispatch_core_type).get_dispatch_message_offset(offset_index);
+        command_sequence.add_dispatch_wait(
+            false, dispatch_message_addr, workers_completed, this->clear_count);
+
+    }
+    auto [offset_index, workers_completed] = this->expected_num_workers_completed[last_index];
+    uint32_t dispatch_message_addr = dispatch_message_base_addr + dispatch_constants::get(dispatch_core_type).get_dispatch_message_offset(offset_index);
     command_sequence.add_dispatch_wait(
-        this->write_barrier, dispatch_message_addr, this->expected_num_workers_completed, this->clear_count);
+            this->write_barrier, dispatch_message_addr, workers_completed, this->clear_count);
 
     CoreType core_type = dispatch_core_manager::instance().get_dispatch_core_type(this->device->id());
     uint16_t channel = tt::Cluster::instance().get_assigned_channel_for_device(this->device->id());
@@ -1708,75 +1742,96 @@ EnqueueTraceCommand::EnqueueTraceCommand(
     uint32_t command_queue_id,
     Device* device,
     SystemMemoryManager& manager,
-    std::shared_ptr<detail::TraceDescriptor>& desc,
+    std::shared_ptr<detail::TraceDescriptor>& descriptor,
     Buffer& buffer,
-    uint32_t& expected_num_workers_completed,
+    std::array<uint32_t, dispatch_constants::DISPATCH_MESSAGE_ENTRIES> & expected_num_workers_completed,
     NOC noc_index,
     CoreCoord dispatch_core) :
     command_queue_id(command_queue_id),
     buffer(buffer),
     device(device),
     manager(manager),
-    desc(desc),
+    descriptor(descriptor),
     expected_num_workers_completed(expected_num_workers_completed),
     clear_count(true),
     noc_index(noc_index),
     dispatch_core(dispatch_core) {}
 
 void EnqueueTraceCommand::process() {
+    uint32_t num_sub_devices = descriptor->descriptors.size();
+    uint32_t go_signals_cmd_size = 0;
+    uint32_t pcie_alignment = hal.get_alignment(HalMemType::HOST);
+    for (const auto& [index, desc] : descriptor->descriptors) {
+        uint32_t go_signal_cmd_size = sizeof(CQPrefetchCmd) + sizeof(CQDispatchCmd);
+        go_signal_cmd_size += desc.num_traced_programs_needing_go_signal_multicast ? device->num_noc_mcast_txns(index) * sizeof(uint32_t) : 0;
+        go_signal_cmd_size += desc.num_traced_programs_needing_go_signal_unicast ? device->num_noc_unicast_txns(index) * sizeof(uint32_t) : 0;
+        go_signals_cmd_size += align(go_signal_cmd_size, pcie_alignment);
+    }
     uint32_t cmd_sequence_sizeB =
         this->device->dispatch_s_enabled() * CQ_PREFETCH_CMD_BARE_MIN_SIZE + // dispatch_d -> dispatch_s sem update (send only if dispatch_s is running)
-        CQ_PREFETCH_CMD_BARE_MIN_SIZE +  // go signal cmd
-        CQ_PREFETCH_CMD_BARE_MIN_SIZE +  // wait to ensure that reset go signal was processed (dispatch_d)
+        go_signals_cmd_size +  // go signal cmd
+        (CQ_PREFETCH_CMD_BARE_MIN_SIZE +  // wait to ensure that reset go signal was processed (dispatch_d)
         // when dispatch_s and dispatch_d are running on 2 cores, workers update dispatch_s. dispatch_s is responsible for resetting worker count
         // and giving dispatch_d the latest worker state. This is encapsulated in the dispatch_s wait command (only to be sent when dispatch is distributed
         // on 2 cores)
-        (this->device->distributed_dispatcher()) * CQ_PREFETCH_CMD_BARE_MIN_SIZE +
+        (this->device->distributed_dispatcher()) * CQ_PREFETCH_CMD_BARE_MIN_SIZE) * num_sub_devices +
         CQ_PREFETCH_CMD_BARE_MIN_SIZE;  // CQ_PREFETCH_CMD_EXEC_BUF
 
-    uint8_t go_signal_mcast_flag = 0;
-    if (desc->num_traced_programs_needing_go_signal_multicast) {
-        go_signal_mcast_flag |= (uint8_t)GoSignalMcastSettings::SEND_MCAST;
-    }
-    if (desc->num_traced_programs_needing_go_signal_unicast) {
-        go_signal_mcast_flag |= (uint8_t)GoSignalMcastSettings::SEND_UNICAST;
-    }
     void* cmd_region = this->manager.issue_queue_reserve(cmd_sequence_sizeB, this->command_queue_id);
 
     HugepageDeviceCommand command_sequence(cmd_region, cmd_sequence_sizeB);
 
     DispatcherSelect dispatcher_for_go_signal = DispatcherSelect::DISPATCH_MASTER;
     if (this->device->dispatch_s_enabled()) {
-        command_sequence.add_notify_dispatch_s_go_signal_cmd(false);
+        uint16_t index_bitmask = 0;
+        for (const auto &i : descriptor->sub_device_ids) {
+            index_bitmask |= 1 << i;
+        }
+        command_sequence.add_notify_dispatch_s_go_signal_cmd(false, index_bitmask);
         dispatcher_for_go_signal = DispatcherSelect::DISPATCH_SLAVE;
     }
+    CoreType dispatch_core_type = dispatch_core_manager::instance().get_dispatch_core_type(device->id());
+    uint32_t dispatch_message_base_addr = dispatch_constants::get(
+        dispatch_core_type).get_device_command_queue_addr(CommandQueueDeviceAddrType::DISPATCH_MESSAGE);
     go_msg_t reset_launch_message_read_ptr_go_signal;
     reset_launch_message_read_ptr_go_signal.signal = RUN_MSG_RESET_READ_PTR;
     reset_launch_message_read_ptr_go_signal.master_x = (uint8_t)this->dispatch_core.x;
     reset_launch_message_read_ptr_go_signal.master_y = (uint8_t)this->dispatch_core.y;
-    CoreType dispatch_core_type = dispatch_core_manager::instance().get_dispatch_core_type(device->id());
-    uint32_t dispatch_message_addr = dispatch_constants::get(
-        dispatch_core_type).get_device_command_queue_addr(CommandQueueDeviceAddrType::DISPATCH_MESSAGE);
-    // Wait to ensure that all kernels have completed. Then send the reset_rd_ptr go_signal.
-    command_sequence.add_dispatch_go_signal_mcast(this->expected_num_workers_completed, go_signal_mcast_flag, *reinterpret_cast<uint32_t*>(&reset_launch_message_read_ptr_go_signal), dispatch_message_addr, dispatcher_for_go_signal);
-    if (desc->num_traced_programs_needing_go_signal_multicast) {
-        this->expected_num_workers_completed += device->num_worker_cores();
-    }
-    if (desc->num_traced_programs_needing_go_signal_unicast) {
-        this->expected_num_workers_completed += device->num_eth_worker_cores();
+    for (const auto& [index, desc] : descriptor->descriptors) {
+        const auto& num_noc_mcast_txns = desc.num_traced_programs_needing_go_signal_multicast ? device->num_noc_mcast_txns(index) : 0;
+        const auto& num_noc_unicast_txns = desc.num_traced_programs_needing_go_signal_unicast ? device->num_noc_unicast_txns(index) : 0;
+        reset_launch_message_read_ptr_go_signal.dispatch_message_offset = (uint8_t)dispatch_constants::get(dispatch_core_type).get_dispatch_message_offset(index);
+        uint32_t dispatch_message_addr = dispatch_message_base_addr + dispatch_constants::get(dispatch_core_type).get_dispatch_message_offset(index);
+        // Wait to ensure that all kernels have completed. Then send the reset_rd_ptr go_signal.
+        command_sequence.add_dispatch_go_signal_mcast(
+            this->expected_num_workers_completed[index],
+            *reinterpret_cast<uint32_t*>(&reset_launch_message_read_ptr_go_signal),
+            dispatch_message_addr,
+            num_noc_mcast_txns,
+            num_noc_unicast_txns,
+            device->noc_mcast_unicast_data(index, desc.num_traced_programs_needing_go_signal_multicast, desc.num_traced_programs_needing_go_signal_unicast),
+            dispatcher_for_go_signal);
+        if (desc.num_traced_programs_needing_go_signal_multicast) {
+            this->expected_num_workers_completed[index] += device->num_worker_cores(HalProgrammableCoreType::TENSIX, index);
+        }
+        if (desc.num_traced_programs_needing_go_signal_unicast) {
+            this->expected_num_workers_completed[index] += device->num_worker_cores(HalProgrammableCoreType::ACTIVE_ETH, index);
+        }
     }
     // Wait to ensure that all workers have reset their read_ptr. dispatch_d will stall until all workers have completed this step, before sending kernel config data to workers
     // or notifying dispatch_s that its safe to send the go_signal.
     // Clear the dispatch <--> worker semaphore, since trace starts at 0.
-    if (this->device->distributed_dispatcher()) {
+    for (const auto &index : descriptor->sub_device_ids) {
+        uint32_t dispatch_message_addr = dispatch_message_base_addr + dispatch_constants::get(dispatch_core_type).get_dispatch_message_offset(index);
+        if (this->device->distributed_dispatcher()) {
+            command_sequence.add_dispatch_wait(
+                false, dispatch_message_addr, this->expected_num_workers_completed[index], this->clear_count, false, true, 1);
+        }
         command_sequence.add_dispatch_wait(
-            false, dispatch_message_addr, this->expected_num_workers_completed, this->clear_count, false, true, 1);
-    }
-    command_sequence.add_dispatch_wait(
-        false, dispatch_message_addr, this->expected_num_workers_completed, this->clear_count);
-
-    if (this->clear_count) {
-        this->expected_num_workers_completed = 0;
+            false, dispatch_message_addr, this->expected_num_workers_completed[index], this->clear_count);
+        if (this->clear_count) {
+            this->expected_num_workers_completed[index] = 0;
+        }
     }
 
     uint32_t page_size = buffer.page_size();
@@ -1870,29 +1925,103 @@ HWCommandQueue::HWCommandQueue(Device* device, uint32_t id, NOC noc_index) :
     this->completion_queue_thread = std::move(completion_queue_thread);
     // Set the affinity of the completion queue reader.
     set_device_thread_affinity(this->completion_queue_thread, device->completion_queue_reader_core);
-    this->expected_num_workers_completed = 0;
 
-    for (uint32_t index = 0; index < tt::tt_metal::hal.get_programmable_core_type_count(); index++) {
-        this->config_buffer_mgr.init_add_buffer(
-            tt::tt_metal::hal.get_dev_addr(
-                tt::tt_metal::hal.get_programmable_core_type(index), tt::tt_metal::HalL1MemAddrType::KERNEL_CONFIG),
-            tt::tt_metal::hal.get_dev_size(
-                tt::tt_metal::hal.get_programmable_core_type(index), tt::tt_metal::HalL1MemAddrType::KERNEL_CONFIG));
+    for (uint32_t i = 0; i < dispatch_constants::DISPATCH_MESSAGE_ENTRIES; i++) {
+        this->expected_num_workers_completed[i] = 0;
+        for (uint32_t index = 0; index < tt::tt_metal::hal.get_programmable_core_type_count(); index++) {
+            this->config_buffer_mgr[i].init_add_buffer(
+                tt::tt_metal::hal.get_dev_addr(
+                    tt::tt_metal::hal.get_programmable_core_type(index), tt::tt_metal::HalL1MemAddrType::KERNEL_CONFIG),
+                tt::tt_metal::hal.get_dev_size(
+                    tt::tt_metal::hal.get_programmable_core_type(index), tt::tt_metal::HalL1MemAddrType::KERNEL_CONFIG));
+        }
+        // Subtract 1 from the number of entries, so the watcher can read information (e.g. fired asserts) from the previous
+        // launch message.
+        this->config_buffer_mgr[i].init_add_buffer(0, launch_msg_buffer_num_entries - 1);
+    }
+}
+
+void HWCommandQueue::set_num_worker_sems_on_dispatch(uint32_t num_worker_sems) {
+    // Not needed for regular dispatch kernel
+    if (!this->device->dispatch_s_enabled()) {
+        return;
     }
-    // Subtract 1 from the number of entries, so the watcher can read information (e.g. fired asserts) from the previous
-    // launch message.
-    this->config_buffer_mgr.init_add_buffer(0, launch_msg_buffer_num_entries - 1);
+    uint32_t cmd_sequence_sizeB = CQ_PREFETCH_CMD_BARE_MIN_SIZE;
+    void* cmd_region = this->manager.issue_queue_reserve(cmd_sequence_sizeB, this->id);
+    HugepageDeviceCommand command_sequence(cmd_region, cmd_sequence_sizeB);
+    command_sequence.add_dispatch_set_num_worker_sems(num_worker_sems, DispatcherSelect::DISPATCH_SLAVE);
+    this->manager.issue_queue_push_back(cmd_sequence_sizeB, this->id);
+    this->manager.fetch_queue_reserve_back(this->id);
+    this->manager.fetch_queue_write(cmd_sequence_sizeB, this->id);
 }
 
-void HWCommandQueue::set_unicast_only_cores_on_dispatch(const std::vector<uint32_t>& unicast_only_noc_encodings) {
-    uint32_t cmd_sequence_sizeB = align(CQ_PREFETCH_CMD_BARE_MIN_SIZE + unicast_only_noc_encodings.size() * sizeof(uint32_t), PCIE_ALIGNMENT);
+void HWCommandQueue::reset_worker_state(bool reset_launch_msg_state) {
+    uint32_t num_sub_devices = device->num_sub_devices();
+    uint32_t go_signals_cmd_size = 0;
+    if (reset_launch_msg_state) {
+        uint32_t pcie_alignment = hal.get_alignment(HalMemType::HOST);
+        for (uint32_t i = 0; i < num_sub_devices; ++i) {
+            uint32_t go_signal_cmd_size = sizeof(CQPrefetchCmd) + sizeof(CQDispatchCmd);
+            go_signal_cmd_size += device->num_noc_mcast_txns(i) * sizeof(uint32_t) + device->num_noc_unicast_txns(i) * sizeof(uint32_t);
+            go_signals_cmd_size += align(go_signal_cmd_size, pcie_alignment);
+        }
+    }
+    uint32_t cmd_sequence_sizeB =
+        reset_launch_msg_state * this->device->dispatch_s_enabled() * CQ_PREFETCH_CMD_BARE_MIN_SIZE + // dispatch_d -> dispatch_s sem update (send only if dispatch_s is running)
+        go_signals_cmd_size +  // go signal cmd
+        (CQ_PREFETCH_CMD_BARE_MIN_SIZE +  // wait to ensure that reset go signal was processed (dispatch_d)
+        // when dispatch_s and dispatch_d are running on 2 cores, workers update dispatch_s. dispatch_s is responsible for resetting worker count
+        // and giving dispatch_d the latest worker state. This is encapsulated in the dispatch_s wait command (only to be sent when dispatch is distributed
+        // on 2 cores)
+        this->device->distributed_dispatcher() * CQ_PREFETCH_CMD_BARE_MIN_SIZE) * num_sub_devices;
     void* cmd_region = this->manager.issue_queue_reserve(cmd_sequence_sizeB, this->id);
     HugepageDeviceCommand command_sequence(cmd_region, cmd_sequence_sizeB);
-    DispatcherSelect dispatcher_for_cmd = this->device->dispatch_s_enabled() ? DispatcherSelect::DISPATCH_SLAVE : DispatcherSelect::DISPATCH_MASTER;
-    command_sequence.add_dispatch_set_unicast_only_cores(unicast_only_noc_encodings, dispatcher_for_cmd);
+    bool clear_count = true;
+    DispatcherSelect dispatcher_for_go_signal = DispatcherSelect::DISPATCH_MASTER;
+    CoreType dispatch_core_type = dispatch_core_manager::instance().get_dispatch_core_type(device->id());
+    uint32_t dispatch_message_base_addr = dispatch_constants::get(
+        dispatch_core_type).get_device_command_queue_addr(CommandQueueDeviceAddrType::DISPATCH_MESSAGE);
+    if (reset_launch_msg_state) {
+        if (device->dispatch_s_enabled()) {
+            uint16_t index_bitmask = 0;
+            for (uint32_t i = 0; i < num_sub_devices; ++i) {
+                index_bitmask |= 1 << i;
+            }
+            command_sequence.add_notify_dispatch_s_go_signal_cmd(false, index_bitmask);
+            dispatcher_for_go_signal = DispatcherSelect::DISPATCH_SLAVE;
+        }
+        go_msg_t reset_launch_message_read_ptr_go_signal;
+        reset_launch_message_read_ptr_go_signal.signal = RUN_MSG_RESET_READ_PTR;
+        reset_launch_message_read_ptr_go_signal.master_x = (uint8_t)this->physical_enqueue_program_dispatch_core.x;
+        reset_launch_message_read_ptr_go_signal.master_y = (uint8_t)this->physical_enqueue_program_dispatch_core.y;
+        for (uint32_t i = 0; i < num_sub_devices; ++i) {
+            reset_launch_message_read_ptr_go_signal.dispatch_message_offset = (uint8_t)dispatch_constants::get(dispatch_core_type).get_dispatch_message_offset(i);
+            uint32_t dispatch_message_addr = dispatch_message_base_addr + dispatch_constants::get(dispatch_core_type).get_dispatch_message_offset(i);
+            // Wait to ensure that all kernels have completed. Then send the reset_rd_ptr go_signal.
+            command_sequence.add_dispatch_go_signal_mcast(expected_num_workers_completed[i], *reinterpret_cast<uint32_t*>(&reset_launch_message_read_ptr_go_signal), dispatch_message_addr, device->num_noc_mcast_txns(i), device->num_noc_unicast_txns(i), device->noc_mcast_unicast_data(i), dispatcher_for_go_signal);
+            expected_num_workers_completed[i] += device->num_worker_cores(HalProgrammableCoreType::TENSIX, i);
+            expected_num_workers_completed[i] += device->num_worker_cores(HalProgrammableCoreType::ACTIVE_ETH, i);
+        }
+    }
+    // Wait to ensure that all workers have reset their read_ptr. dispatch_d will stall until all workers have completed this step, before sending kernel config data to workers
+    // or notifying dispatch_s that its safe to send the go_signal.
+    // Clear the dispatch <--> worker semaphore, since trace starts at 0.
+    for (uint32_t i = 0; i < num_sub_devices; ++i) {
+        uint32_t dispatch_message_addr = dispatch_message_base_addr + dispatch_constants::get(dispatch_core_type).get_dispatch_message_offset(i);
+        if (device->distributed_dispatcher()) {
+            command_sequence.add_dispatch_wait(
+                false, dispatch_message_addr, expected_num_workers_completed[i], clear_count, false, true, 1);
+        }
+        command_sequence.add_dispatch_wait(
+            false, dispatch_message_addr, expected_num_workers_completed[i], clear_count);
+    }
     this->manager.issue_queue_push_back(cmd_sequence_sizeB, this->id);
     this->manager.fetch_queue_reserve_back(this->id);
     this->manager.fetch_queue_write(cmd_sequence_sizeB, this->id);
+
+    if (clear_count) {
+        std::fill(expected_num_workers_completed.begin(), expected_num_workers_completed.begin() + num_sub_devices, 0);
+    }
 }
 
 HWCommandQueue::~HWCommandQueue() {
@@ -1932,20 +2061,20 @@ void HWCommandQueue::set_exit_condition() {
 }
 
 template <typename T>
-void HWCommandQueue::enqueue_command(T& command, bool blocking) {
+void HWCommandQueue::enqueue_command(T& command, bool blocking, tt::stl::Span<const uint32_t> sub_device_ids) {
     command.process();
     if (blocking) {
-        this->finish();
+        this->finish(sub_device_ids);
     }
 }
 
-void HWCommandQueue::enqueue_read_buffer(std::shared_ptr<Buffer>& buffer, void* dst, bool blocking) {
-    this->enqueue_read_buffer(*buffer, dst, blocking);
+void HWCommandQueue::enqueue_read_buffer(std::shared_ptr<Buffer>& buffer, void* dst, bool blocking, tt::stl::Span<const uint32_t> sub_device_ids) {
+    this->enqueue_read_buffer(*buffer, dst, blocking, sub_device_ids);
 }
 
 // Read buffer command is enqueued in the issue region and device writes requested buffer data into the completion
 // region
-void HWCommandQueue::enqueue_read_buffer(Buffer& buffer, void* dst, bool blocking) {
+void HWCommandQueue::enqueue_read_buffer(Buffer& buffer, void* dst, bool blocking, tt::stl::Span<const uint32_t> sub_device_ids) {
     ZoneScopedN("HWCommandQueue_read_buffer");
     TT_FATAL(!this->manager.get_bypass_mode(), "Enqueue Read Buffer cannot be used with tracing");
 
@@ -1958,6 +2087,8 @@ void HWCommandQueue::enqueue_read_buffer(Buffer& buffer, void* dst, bool blockin
     uint32_t unpadded_dst_offset = 0;
     uint32_t src_page_index = 0;
 
+    auto expected_workers_completed = this->get_expected_workers_completed(sub_device_ids);
+
     if (is_sharded(buffer.buffer_layout())) {
         const bool width_split = buffer.shard_spec().shape_in_pages()[1] != buffer.shard_spec().tensor2d_shape[1];
         const auto& buffer_page_mapping = width_split ? buffer.get_buffer_page_mapping() : nullptr;
@@ -2001,7 +2132,7 @@ void HWCommandQueue::enqueue_read_buffer(Buffer& buffer, void* dst, bool blockin
                     buffer,
                     dst,
                     this->manager,
-                    this->expected_num_workers_completed,
+                    expected_workers_completed,
                     cores[core_id],
                     bank_base_address,
                     src_page_index,
@@ -2019,12 +2150,12 @@ void HWCommandQueue::enqueue_read_buffer(Buffer& buffer, void* dst, bool blockin
                     buffer_page_mapping));
 
                 src_page_index += num_pages_to_read;
-                this->enqueue_command(command, false);
+                this->enqueue_command(command, false, sub_device_ids);
                 this->increment_num_entries_in_completion_q();
             }
         }
         if (blocking) {
-            this->finish();
+            this->finish(sub_device_ids);
         }
     } else {
         // this is a streaming command so we don't need to break down to multiple
@@ -2035,7 +2166,7 @@ void HWCommandQueue::enqueue_read_buffer(Buffer& buffer, void* dst, bool blockin
             buffer,
             dst,
             this->manager,
-            this->expected_num_workers_completed,
+            expected_workers_completed,
             src_page_index,
             pages_to_read);
 
@@ -2048,45 +2179,39 @@ void HWCommandQueue::enqueue_read_buffer(Buffer& buffer, void* dst, bool blockin
             unpadded_dst_offset,
             pages_to_read,
             src_page_index));
-        this->enqueue_command(command, blocking);
+        this->enqueue_command(command, blocking, sub_device_ids);
         this->increment_num_entries_in_completion_q();
     }
 }
 
 void HWCommandQueue::enqueue_write_buffer(
-    std::variant<std::reference_wrapper<Buffer>, std::shared_ptr<Buffer>> buffer, HostDataType src, bool blocking) {
+    std::variant<std::reference_wrapper<Buffer>, std::shared_ptr<Buffer>> buffer, HostDataType src, bool blocking, tt::stl::Span<const uint32_t> sub_device_ids) {
     // Top level API to accept different variants for buffer and src
     // For shared pointer variants, object lifetime is guaranteed at least till the end of this function
-    std::visit(
-        [this, &buffer, &blocking](auto&& data) {
-            using T = std::decay_t<decltype(data)>;
-            std::visit(
-                [this, &buffer, &blocking, &data](auto&& b) {
-                    using type_buf = std::decay_t<decltype(b)>;
-                    if constexpr (std::is_same_v<T, const void*>) {
-                        if constexpr (std::is_same_v<type_buf, std::shared_ptr<Buffer>>) {
-                            this->enqueue_write_buffer(*b, data, blocking);
-                        } else if constexpr (std::is_same_v<type_buf, std::reference_wrapper<Buffer>>) {
-                            this->enqueue_write_buffer(b.get(), data, blocking);
-                        }
-                    } else {
-                        if constexpr (std::is_same_v<type_buf, std::shared_ptr<Buffer>>) {
-                            this->enqueue_write_buffer(*b, data.get()->data(), blocking);
-                        } else if constexpr (std::is_same_v<type_buf, std::reference_wrapper<Buffer>>) {
-                            this->enqueue_write_buffer(b.get(), data.get()->data(), blocking);
-                        }
-                    }
-                },
-                buffer);
-        },
-        src);
+    auto data = std::visit([&](auto&& data) -> const void* {
+        using T = std::decay_t<decltype(data)>;
+        if constexpr (std::is_same_v<T, const void*>) {
+            return data;
+        } else {
+            return data->data();
+        }
+    }, src);
+    auto& b = std::visit([&](auto&& b) -> Buffer& {
+        using type_buf = std::decay_t<decltype(b)>;
+        if constexpr (std::is_same_v<type_buf, std::shared_ptr<Buffer>>) {
+            return *b;
+        } else {
+            return b.get();
+        }
+    }, buffer);
+    this->enqueue_write_buffer(b, data, blocking, sub_device_ids);
 }
 
 CoreType HWCommandQueue::get_dispatch_core_type() {
     return dispatch_core_manager::instance().get_dispatch_core_type(device->id());
 }
 
-void HWCommandQueue::enqueue_write_buffer(Buffer& buffer, const void* src, bool blocking) {
+void HWCommandQueue::enqueue_write_buffer(Buffer& buffer, const void* src, bool blocking, tt::stl::Span<const uint32_t> sub_device_ids) {
     ZoneScopedN("HWCommandQueue_write_buffer");
     TT_FATAL(!this->manager.get_bypass_mode(), "Enqueue Write Buffer cannot be used with tracing");
 
@@ -2100,6 +2225,8 @@ void HWCommandQueue::enqueue_write_buffer(Buffer& buffer, const void* src, bool
 
     uint32_t dst_page_index = 0;
 
+    auto expected_workers_completed = this->get_expected_workers_completed(sub_device_ids);
+
     if (is_sharded(buffer.buffer_layout())) {
         const bool width_split = buffer.shard_spec().shape_in_pages()[1] != buffer.shard_spec().tensor2d_shape[1];
         const auto& buffer_page_mapping = width_split ? buffer.get_buffer_page_mapping() : nullptr;
@@ -2167,7 +2294,7 @@ void HWCommandQueue::enqueue_write_buffer(Buffer& buffer, const void* src, bool
                         src,
                         this->manager,
                         issue_wait,
-                        this->expected_num_workers_completed,
+                        expected_workers_completed,
                         address,
                         buffer_page_mapping,
                         cores[core_id],
@@ -2175,7 +2302,7 @@ void HWCommandQueue::enqueue_write_buffer(Buffer& buffer, const void* src, bool
                         dst_page_index,
                         pages_to_write);
 
-                    this->enqueue_command(command, false);
+                    this->enqueue_command(command, false, sub_device_ids);
                     curr_page_idx_in_shard += pages_to_write;
                     num_pages -= pages_to_write;
                     dst_page_index += pages_to_write;
@@ -2258,13 +2385,13 @@ void HWCommandQueue::enqueue_write_buffer(Buffer& buffer, const void* src, bool
                 src,
                 this->manager,
                 issue_wait,
-                this->expected_num_workers_completed,
+                expected_workers_completed,
                 bank_base_address,
                 page_size_to_write,
                 dst_page_index,
                 num_pages_to_write);
             this->enqueue_command(
-                command, false);  // don't block until the entire src data is enqueued in the issue queue
+                command, false, sub_device_ids);  // don't block until the entire src data is enqueued in the issue queue
 
             total_pages_to_write -= num_pages_to_write;
             dst_page_index += num_pages_to_write;
@@ -2272,18 +2399,21 @@ void HWCommandQueue::enqueue_write_buffer(Buffer& buffer, const void* src, bool
     }
 
     if (blocking) {
-        this->finish();
+        this->finish(sub_device_ids);
     }
 }
 
 void HWCommandQueue::enqueue_program(Program& program, bool blocking) {
     ZoneScopedN("HWCommandQueue_enqueue_program");
+    std::vector<uint32_t> sub_device_ids = {program.determine_sub_device_ids(device)};
+    TT_FATAL(sub_device_ids.size() == 1, "Programs must be executed on a single sub-device");
     if (not program.is_finalized()) {
         program.finalize(device);
         TT_FATAL(!this->manager.get_bypass_mode(), "Tracing should only be used when programs have been cached");
         if (const auto &kernels_buffer = program.get_kernels_buffer()) {
+            // Only stall for used sub-devices
             this->enqueue_write_buffer(
-                *kernels_buffer, program.get_program_transfer_info().binary_data.data(), false);
+                *kernels_buffer, program.get_program_transfer_info().binary_data.data(), false, sub_device_ids);
         }
     }
 
@@ -2294,32 +2424,33 @@ void HWCommandQueue::enqueue_program(Program& program, bool blocking) {
         TT_FATAL(!this->manager.get_bypass_mode(), "Tracing cannot be used while validating program binaries");
         if (const auto &buffer = program.get_kernels_buffer()) {
             std::vector<uint32_t> read_data(buffer->page_size() * buffer->num_pages() / sizeof(uint32_t));
-            this->enqueue_read_buffer(*buffer, read_data.data(), true);
+            this->enqueue_read_buffer(*buffer, read_data.data(), true, sub_device_ids);
             TT_FATAL(
                 program.get_program_transfer_info().binary_data == read_data,
                 "Binary for program to be executed is corrupted. Another program likely corrupted this binary");
         }
     }
 #endif
+    auto sub_device_id = sub_device_ids[0];
 
     // Snapshot of expected workers from previous programs, used for dispatch_wait cmd generation.
-    uint32_t expected_workers_completed = this->manager.get_bypass_mode() ? this->trace_ctx->num_completion_worker_cores
-                                                                          : this->expected_num_workers_completed;
+    uint32_t expected_workers_completed = this->manager.get_bypass_mode() ? this->trace_ctx->descriptors[sub_device_id].num_completion_worker_cores
+                                                                          : this->expected_num_workers_completed[sub_device_id];
     if (this->manager.get_bypass_mode()) {
         if (program.runs_on_noc_multicast_only_cores()) {
-            this->trace_ctx->num_traced_programs_needing_go_signal_multicast++;
-            this->trace_ctx->num_completion_worker_cores += device->num_worker_cores();
+            this->trace_ctx->descriptors[sub_device_id].num_traced_programs_needing_go_signal_multicast++;
+            this->trace_ctx->descriptors[sub_device_id].num_completion_worker_cores += device->num_worker_cores(HalProgrammableCoreType::TENSIX, sub_device_id);
         }
         if (program.runs_on_noc_unicast_only_cores()) {
-            this->trace_ctx->num_traced_programs_needing_go_signal_unicast++;
-            this->trace_ctx->num_completion_worker_cores += device->num_eth_worker_cores();
+            this->trace_ctx->descriptors[sub_device_id].num_traced_programs_needing_go_signal_unicast++;
+            this->trace_ctx->descriptors[sub_device_id].num_completion_worker_cores += device->num_worker_cores(HalProgrammableCoreType::ACTIVE_ETH, sub_device_id);
         }
     } else {
         if (program.runs_on_noc_multicast_only_cores()) {
-            this->expected_num_workers_completed += device->num_worker_cores();
+            this->expected_num_workers_completed[sub_device_id] += device->num_worker_cores(HalProgrammableCoreType::TENSIX,sub_device_id);
         }
         if (program.runs_on_noc_unicast_only_cores()) {
-            this->expected_num_workers_completed += device->num_eth_worker_cores();
+            this->expected_num_workers_completed[sub_device_id] += device->num_worker_cores(HalProgrammableCoreType::ACTIVE_ETH, sub_device_id);
         }
     }
 
@@ -2330,26 +2461,27 @@ void HWCommandQueue::enqueue_program(Program& program, bool blocking) {
         program,
         this->physical_enqueue_program_dispatch_core,
         this->manager,
-        this->config_buffer_mgr,
+        this->config_buffer_mgr[sub_device_id],
         expected_workers_completed,
         // The assembled program command will encode the location of the launch messages in the ring buffer
-        this->device->worker_launch_message_buffer_state.get_mcast_wptr(),
-        this->device->worker_launch_message_buffer_state.get_unicast_wptr());
+        this->device->worker_launch_message_buffer_state[sub_device_id].get_mcast_wptr(),
+        this->device->worker_launch_message_buffer_state[sub_device_id].get_unicast_wptr(),
+        sub_device_id);
     // Update wptrs for tensix and eth launch message in the device class
     if (program.runs_on_noc_multicast_only_cores()) {
-        this->device->worker_launch_message_buffer_state.inc_mcast_wptr(1);
+        this->device->worker_launch_message_buffer_state[sub_device_id].inc_mcast_wptr(1);
     }
     if (program.runs_on_noc_unicast_only_cores()) {
-        this->device->worker_launch_message_buffer_state.inc_unicast_wptr(1);
+        this->device->worker_launch_message_buffer_state[sub_device_id].inc_unicast_wptr(1);
     }
-    this->enqueue_command(command, blocking);
+    this->enqueue_command(command, blocking, sub_device_ids);
 
 #ifdef DEBUG
     if (tt::llrt::OptionsG.get_validate_kernel_binaries()) {
         TT_FATAL(!this->manager.get_bypass_mode(), "Tracing cannot be used while validating program binaries");
         if (const auto& buffer = program.get_kernels_buffer()) {
             std::vector<uint32_t> read_data(buffer->page_size() * buffer->num_pages() / sizeof(uint32_t));
-            this->enqueue_read_buffer(*buffer, read_data.data(), true);
+            this->enqueue_read_buffer(*buffer, read_data.data(), true, sub_device_ids);
             TT_FATAL(
                 program.get_program_transfer_info().binary_data == read_data,
                 "Binary for program that executed is corrupted. This program likely corrupted its own binary.");
@@ -2365,7 +2497,7 @@ void HWCommandQueue::enqueue_program(Program& program, bool blocking) {
         expected_workers_completed);
 }
 
-void HWCommandQueue::enqueue_record_event(const std::shared_ptr<Event>& event, bool clear_count) {
+void HWCommandQueue::enqueue_record_event(const std::shared_ptr<Event>& event, bool clear_count, tt::stl::Span<const uint32_t> sub_device_ids) {
     ZoneScopedN("HWCommandQueue_enqueue_record_event");
 
     TT_FATAL(!this->manager.get_bypass_mode(), "Enqueue Record Event cannot be used with tracing");
@@ -2378,19 +2510,23 @@ void HWCommandQueue::enqueue_record_event(const std::shared_ptr<Event>& event, b
     event->device = this->device;
     event->ready = true;  // what does this mean???
 
+    auto expected_workers_completed = this->get_expected_workers_completed(sub_device_ids);
+
     auto command = EnqueueRecordEventCommand(
         this->id,
         this->device,
         this->noc_index,
         this->manager,
         event->event_id,
-        this->expected_num_workers_completed,
+        expected_workers_completed,
         clear_count,
         true);
-    this->enqueue_command(command, false);
+    this->enqueue_command(command, false, sub_device_ids);
 
     if (clear_count) {
-        this->expected_num_workers_completed = 0;
+        for (const auto&[id, _] : expected_workers_completed) {
+            this->expected_num_workers_completed[id] = 0;
+        }
     }
     this->issued_completion_q_reads.push(
         std::make_shared<detail::CompletionReaderVariant>(std::in_place_type<detail::ReadEventDescriptor>, event->event_id));
@@ -2401,7 +2537,7 @@ void HWCommandQueue::enqueue_wait_for_event(const std::shared_ptr<Event>& sync_e
     ZoneScopedN("HWCommandQueue_enqueue_wait_for_event");
 
     auto command = EnqueueWaitForEventCommand(this->id, this->device, this->manager, *sync_event, clear_count);
-    this->enqueue_command(command, false);
+    this->enqueue_command(command, false, {});
 
     if (clear_count) {
         this->manager.reset_event_id(this->id);
@@ -2415,29 +2551,28 @@ void HWCommandQueue::enqueue_trace(const uint32_t trace_id, bool blocking) {
     auto command = EnqueueTraceCommand(
         this->id, this->device, this->manager, trace_inst->desc, *trace_inst->buffer, this->expected_num_workers_completed, this->noc_index, this->physical_enqueue_program_dispatch_core);
 
-    this->enqueue_command(command, false);
-
-    // Increment the expected worker cores counter due to trace programs completion
-    this->expected_num_workers_completed += trace_inst->desc->num_completion_worker_cores;
-    // After trace runs, the rdptr on each worker will be incremented by the number of programs in the trace
-    // Update the wptr on host to match state. If the trace doesn't execute on a
-    // class of worker (unicast or multicast), it doesn't reset or modify the
-    // state for those workers.
-    if (trace_inst->desc->num_traced_programs_needing_go_signal_multicast) {
-        this->device->worker_launch_message_buffer_state.set_mcast_wptr(
-            trace_inst->desc->num_traced_programs_needing_go_signal_multicast);
-    }
-    if (trace_inst->desc->num_traced_programs_needing_go_signal_unicast) {
-        this->device->worker_launch_message_buffer_state.set_unicast_wptr(
-            trace_inst->desc->num_traced_programs_needing_go_signal_unicast);
+    this->enqueue_command(command, false, {});
+
+    for (const auto& [index, desc]: trace_inst->desc->descriptors) {
+         // Increment the expected worker cores counter due to trace programs completion
+        this->expected_num_workers_completed[index] += desc.num_completion_worker_cores;
+        // After trace runs, the rdptr on each worker will be incremented by the number of programs in the trace
+        // Update the wptr on host to match state. If the trace doesn't execute on a
+        // class of worker (unicast or multicast), it doesn't reset or modify the
+        // state for those workers.
+        if (desc.num_traced_programs_needing_go_signal_multicast) {
+            this->device->worker_launch_message_buffer_state[index].set_mcast_wptr(desc.num_traced_programs_needing_go_signal_multicast);
+        }
+        if (desc.num_traced_programs_needing_go_signal_unicast) {
+            this->device->worker_launch_message_buffer_state[index].set_unicast_wptr(desc.num_traced_programs_needing_go_signal_unicast);
+        }
+        // The config buffer manager is unaware of what memory is used inside the trace, so mark all memory as used so that
+        // it will force a stall and avoid stomping on in-use state.
+        // TODO(jbauman): Reuse old state from the trace.
+        this->config_buffer_mgr[index].mark_completely_full(this->expected_num_workers_completed[index]);
     }
-    // The config buffer manager is unaware of what memory is used inside the trace, so mark all memory as used so that
-    // it will force a stall and avoid stomping on in-use state.
-    // TODO(jbauman): Reuse old state from the trace.
-    this->config_buffer_mgr.mark_completely_full(this->expected_num_workers_completed);
-
     if (blocking) {
-        this->finish();
+        this->finish(trace_inst->desc->sub_device_ids);
     }
 }
 
@@ -2698,11 +2833,11 @@ void HWCommandQueue::read_completion_queue() {
     }
 }
 
-void HWCommandQueue::finish() {
+void HWCommandQueue::finish(tt::stl::Span<const uint32_t> sub_device_ids) {
     ZoneScopedN("HWCommandQueue_finish");
     tt::log_debug(tt::LogDispatch, "Finish for command queue {}", this->id);
     std::shared_ptr<Event> event = std::make_shared<Event>();
-    this->enqueue_record_event(event);
+    this->enqueue_record_event(event, false, sub_device_ids);
     if (tt::llrt::OptionsG.get_test_mode_enabled()) {
         while (this->num_entries_in_completion_q > this->num_completed_completion_q_reads) {
             if (DPrintServerHangDetected()) {
@@ -2729,55 +2864,84 @@ volatile bool HWCommandQueue::is_dprint_server_hung() { return dprint_server_han
 volatile bool HWCommandQueue::is_noc_hung() { return illegal_noc_txn_hang; }
 
 void HWCommandQueue::record_begin(const uint32_t tid, std::shared_ptr<detail::TraceDescriptor> ctx) {
+    uint32_t num_sub_devices = this->device->num_sub_devices();
     // Issue event as a barrier and a counter reset
     uint32_t cmd_sequence_sizeB = CQ_PREFETCH_CMD_BARE_MIN_SIZE;
     if (this->device->distributed_dispatcher()) {
         // wait on dispatch_s before issuing counter reset
         cmd_sequence_sizeB += CQ_PREFETCH_CMD_BARE_MIN_SIZE;
     }
+    cmd_sequence_sizeB *= num_sub_devices;
     void* cmd_region = this->manager.issue_queue_reserve(cmd_sequence_sizeB, this->id);
     HugepageDeviceCommand command_sequence(cmd_region, cmd_sequence_sizeB);
 
     CoreType dispatch_core_type = dispatch_core_manager::instance().get_dispatch_core_type(this->device->id());
-    uint32_t dispatch_message_addr = dispatch_constants::get(
+    uint32_t dispatch_message_base_addr = dispatch_constants::get(
         dispatch_core_type).get_device_command_queue_addr(CommandQueueDeviceAddrType::DISPATCH_MESSAGE);
-    if (this->device->distributed_dispatcher()) {
-        // wait on dispatch_s before issuing counter reset
-        command_sequence.add_dispatch_wait(false, dispatch_message_addr, this->expected_num_workers_completed, true, false, true, 1);
+
+    // Currently Trace will track all sub_devices
+    // Potentially support tracking only used sub_devices in the future
+    for (uint32_t i = 0; i < num_sub_devices; ++i) {
+        uint32_t dispatch_message_addr = dispatch_message_base_addr + dispatch_constants::get(dispatch_core_type).get_dispatch_message_offset(i);
+        if (this->device->distributed_dispatcher()) {
+            // wait on dispatch_s before issuing counter reset
+            command_sequence.add_dispatch_wait(false, dispatch_message_addr, this->expected_num_workers_completed[i], true, false, true, 1);
+        }
+        // dispatch_d waits for latest non-zero counter from dispatch_s and then clears its local counter
+        command_sequence.add_dispatch_wait(false, dispatch_message_addr, this->expected_num_workers_completed[i], true);
     }
-    // dispatch_d waits for latest non-zero counter from dispatch_s and then clears its local counter
-    command_sequence.add_dispatch_wait(false, dispatch_message_addr, this->expected_num_workers_completed, true);
 
     this->manager.issue_queue_push_back(cmd_sequence_sizeB, this->id);
     this->manager.fetch_queue_reserve_back(this->id);
     this->manager.fetch_queue_write(cmd_sequence_sizeB, this->id);
-    this->expected_num_workers_completed = 0;
+    std::fill(this->expected_num_workers_completed.begin(), this->expected_num_workers_completed.begin() + num_sub_devices, 0);
     // Record commands using bypass mode
     this->tid = tid;
     this->trace_ctx = ctx;
     // Record original value of launch msg wptr
-    this->multicast_cores_launch_message_wptr_reset = this->device->worker_launch_message_buffer_state.get_mcast_wptr();
-    this->unicast_cores_launch_message_wptr_reset = this->device->worker_launch_message_buffer_state.get_unicast_wptr();
-    // Set launch msg wptr to 0. Every time trace runs on device, it will ensure that the workers
-    // reset their rptr to be in sync with device.
-    this->device->worker_launch_message_buffer_state.reset();
+    for (uint32_t i = 0; i < num_sub_devices; ++i) {
+        this->multicast_cores_launch_message_wptr_reset[i] = this->device->worker_launch_message_buffer_state[i].get_mcast_wptr();
+        this->unicast_cores_launch_message_wptr_reset[i] = this->device->worker_launch_message_buffer_state[i].get_unicast_wptr();
+        // Set launch msg wptr to 0. Every time trace runs on device, it will ensure that the workers
+        // reset their rptr to be in sync with device.
+        this->device->worker_launch_message_buffer_state[i].reset();
+    }
     this->manager.set_bypass_mode(true, true);  // start
-    // Sync values in the trace need to match up with the counter starting at 0 again.
-    this->config_buffer_mgr.mark_completely_full(this->expected_num_workers_completed);
+    for (uint32_t i = 0; i < num_sub_devices; ++i) {
+        // Sync values in the trace need to match up with the counter starting at 0 again.
+        this->config_buffer_mgr[i].mark_completely_full(this->expected_num_workers_completed[i]);
+    }
 }
 
 void HWCommandQueue::record_end() {
-    this->tid = std::nullopt;
-    this->trace_ctx = nullptr;
+    auto &trace_data = this->trace_ctx->data;
+    trace_data = std::move(this->manager.get_bypass_data());
+    // Add command to terminate the trace buffer
+    DeviceCommand command_sequence(CQ_PREFETCH_CMD_BARE_MIN_SIZE);
+    command_sequence.add_prefetch_exec_buf_end();
+    for (int i = 0; i < command_sequence.size_bytes() / sizeof(uint32_t); i++) {
+        trace_data.push_back(((uint32_t*)command_sequence.data())[i]);
+    }
+    // Currently Trace will track all sub_devices
+    uint32_t num_sub_devices = this->device->num_sub_devices();
     // Reset the launch msg wptrs to their original value, so device can run programs after a trace
     // was captured. This is needed since trace capture modifies the wptr state on host, even though device
     // doesn't run any programs.
-    this->device->worker_launch_message_buffer_state.set_mcast_wptr(this->multicast_cores_launch_message_wptr_reset);
-    this->device->worker_launch_message_buffer_state.set_unicast_wptr(this->unicast_cores_launch_message_wptr_reset);
-    this->manager.set_bypass_mode(false, false);  // stop
-    // config_buffer_mgr reflects the state inside the trace, not on the current device, so reset it.
-    // TODO(jbauman): Use a temporary WorkingBufferSetMgr when recording a trace.
-    this->config_buffer_mgr.mark_completely_full(this->expected_num_workers_completed);
+    for (uint32_t i = 0; i < num_sub_devices; ++i) {
+        this->device->worker_launch_message_buffer_state[i].set_mcast_wptr(this->multicast_cores_launch_message_wptr_reset[i]);
+        this->device->worker_launch_message_buffer_state[i].set_unicast_wptr(this->unicast_cores_launch_message_wptr_reset[i]);
+    }
+    // Copy the desc keys into a separate vector. When enqueuing traces, we sometimes need to pass sub-device ids separately
+    this->trace_ctx->sub_device_ids.reserve(this->trace_ctx->descriptors.size());
+    for (const auto& [index, _]: this->trace_ctx->descriptors) {
+        this->trace_ctx->sub_device_ids.push_back(index);
+        // config_buffer_mgr reflects the state inside the trace, not on the current device, so reset it.
+        // TODO(jbauman): Use a temporary WorkingBufferSetMgr when recording a trace.
+        this->config_buffer_mgr[index].mark_completely_full(this->expected_num_workers_completed[index]);
+    }
+    this->tid = std::nullopt;
+    this->trace_ctx = nullptr;
+    this->manager.set_bypass_mode(false, true);  // stop
 }
 
 void HWCommandQueue::terminate() {
@@ -2785,9 +2949,46 @@ void HWCommandQueue::terminate() {
     TT_FATAL(!this->manager.get_bypass_mode(), "Terminate cannot be used with tracing");
     tt::log_debug(tt::LogDispatch, "Terminating dispatch kernels for command queue {}", this->id);
     auto command = EnqueueTerminateCommand(this->id, this->device, this->manager);
-    this->enqueue_command(command, false);
+    this->enqueue_command(command, false, {});
+}
+
+WorkerConfigBufferMgr& HWCommandQueue::get_config_buffer_mgr(uint32_t index) { return config_buffer_mgr[index]; }
+
+void HWCommandQueue::reset_config_buffer_mgr(const uint32_t max_index) {
+    for (uint32_t i = 0; i < max_index; ++i) {
+        this->config_buffer_mgr[i] = WorkerConfigBufferMgr();
+        for (uint32_t index = 0; index < tt::tt_metal::hal.get_programmable_core_type_count(); index++) {
+            this->config_buffer_mgr[i].init_add_buffer(
+                tt::tt_metal::hal.get_dev_addr(
+                    tt::tt_metal::hal.get_programmable_core_type(index), tt::tt_metal::HalL1MemAddrType::KERNEL_CONFIG),
+                tt::tt_metal::hal.get_dev_size(
+                    tt::tt_metal::hal.get_programmable_core_type(index), tt::tt_metal::HalL1MemAddrType::KERNEL_CONFIG));
+        }
+        // Subtract 1 from the number of entries, so the watcher can read information (e.g. fired asserts) from the previous
+        // launch message.
+        this->config_buffer_mgr[i].init_add_buffer(0, launch_msg_buffer_num_entries - 1);
+    }
+}
+
+std::vector<std::pair<uint32_t, uint32_t>> HWCommandQueue::get_expected_workers_completed(tt::stl::Span<const uint32_t> sub_device_ids) const {
+    std::vector<std::pair<uint32_t, uint32_t>> expected_workers_completed;
+    if (sub_device_ids.empty()) {
+        expected_workers_completed.reserve(this->device->num_sub_devices());
+        for (uint32_t i = 0; i < this->device->num_sub_devices(); ++i) {
+            expected_workers_completed.emplace_back(i, this->expected_num_workers_completed[i]);
+        }
+    } else {
+        expected_workers_completed.reserve(sub_device_ids.size());
+        for (uint32_t i = 0; i < sub_device_ids.size(); ++i) {
+            auto sub_device_id = sub_device_ids[i];
+            TT_FATAL(sub_device_id < this->device->num_sub_devices(), "Invalid sub_device_id: {}", sub_device_id);
+            expected_workers_completed.emplace_back(sub_device_id, this->expected_num_workers_completed[sub_device_id]);
+        }
+    }
+    return expected_workers_completed;
 }
 
+
 void EnqueueAddBufferToProgramImpl(
     const std::variant<std::reference_wrapper<Buffer>, std::shared_ptr<Buffer>> buffer,
     Program& program) {
@@ -2861,7 +3062,8 @@ void EnqueueReadBuffer(
     CommandQueue& cq,
     std::variant<std::reference_wrapper<Buffer>, std::shared_ptr<Buffer>> buffer,
     std::vector<uint32_t>& dst,
-    bool blocking) {
+    bool blocking,
+    tt::stl::Span<const uint32_t> sub_device_ids) {
     // TODO(agrebenisan): Move to deprecated
     ZoneScoped;
     tt_metal::detail::DispatchStateCheck(true);
@@ -2884,36 +3086,39 @@ void EnqueueReadBuffer(
         buffer);
 
     // TODO(agrebenisan): Move to deprecated
-    EnqueueReadBuffer(cq, buffer, dst.data(), blocking);
+    EnqueueReadBuffer(cq, buffer, dst.data(), blocking, sub_device_ids);
 }
 
 void EnqueueWriteBuffer(
     CommandQueue& cq,
     std::variant<std::reference_wrapper<Buffer>, std::shared_ptr<Buffer>> buffer,
     std::vector<uint32_t>& src,
-    bool blocking) {
+    bool blocking,
+    tt::stl::Span<const uint32_t> sub_device_ids) {
     // TODO(agrebenisan): Move to deprecated
-    EnqueueWriteBuffer(cq, buffer, src.data(), blocking);
+    EnqueueWriteBuffer(cq, buffer, src.data(), blocking, sub_device_ids);
 }
 
 void EnqueueReadBuffer(
     CommandQueue& cq,
     std::variant<std::reference_wrapper<Buffer>, std::shared_ptr<Buffer>> buffer,
     void* dst,
-    bool blocking) {
+    bool blocking,
+    tt::stl::Span<const uint32_t> sub_device_ids) {
     detail::DispatchStateCheck(true);
     cq.run_command(CommandInterface{
-        .type = EnqueueCommandType::ENQUEUE_READ_BUFFER, .blocking = blocking, .buffer = buffer, .dst = dst});
+        .type = EnqueueCommandType::ENQUEUE_READ_BUFFER, .blocking = blocking, .buffer = buffer, .dst = dst, .sub_device_ids = sub_device_ids});
 }
 
 void EnqueueWriteBuffer(
     CommandQueue& cq,
     std::variant<std::reference_wrapper<Buffer>, std::shared_ptr<Buffer>> buffer,
     HostDataType src,
-    bool blocking) {
+    bool blocking,
+    tt::stl::Span<const uint32_t> sub_device_ids) {
     detail::DispatchStateCheck(true);
     cq.run_command(CommandInterface{
-        .type = EnqueueCommandType::ENQUEUE_WRITE_BUFFER, .blocking = blocking, .buffer = buffer, .src = src});
+        .type = EnqueueCommandType::ENQUEUE_WRITE_BUFFER, .blocking = blocking, .buffer = buffer, .src = src, .sub_device_ids = sub_device_ids});
 }
 
 void EnqueueProgram(
@@ -2923,12 +3128,13 @@ void EnqueueProgram(
         CommandInterface{.type = EnqueueCommandType::ENQUEUE_PROGRAM, .blocking = blocking, .program = &program});
 }
 
-void EnqueueRecordEvent(CommandQueue& cq, const std::shared_ptr<Event>& event) {
+void EnqueueRecordEvent(CommandQueue& cq, const std::shared_ptr<Event>& event, tt::stl::Span<const uint32_t> sub_device_ids) {
     detail::DispatchStateCheck(true);
     cq.run_command(CommandInterface{
         .type = EnqueueCommandType::ENQUEUE_RECORD_EVENT,
         .blocking = false,
         .event = event,
+        .sub_device_ids = sub_device_ids
     });
 }
 
@@ -2977,9 +3183,9 @@ bool EventQuery(const std::shared_ptr<Event>& event) {
     return event_completed;
 }
 
-void Finish(CommandQueue& cq) {
+void Finish(CommandQueue& cq, tt::stl::Span<const uint32_t> sub_device_ids) {
     detail::DispatchStateCheck(true);
-    cq.run_command(CommandInterface{.type = EnqueueCommandType::FINISH, .blocking = true});
+    cq.run_command(CommandInterface{.type = EnqueueCommandType::FINISH, .blocking = true, .sub_device_ids = sub_device_ids});
     TT_ASSERT(
         !(cq.device()->hw_command_queue(cq.id()).is_dprint_server_hung()),
         "Command Queue could not finish: device hang due to unanswered DPRINT WAIT.");
@@ -3002,13 +3208,14 @@ void EnqueueReadBufferImpl(
     CommandQueue& cq,
     std::variant<std::reference_wrapper<Buffer>, std::shared_ptr<Buffer>> buffer,
     void* dst,
-    bool blocking) {
+    bool blocking,
+    tt::stl::Span<const uint32_t> sub_device_ids) {
     std::visit(
-        [&cq, dst, blocking](auto&& b) {
+        [&](auto&& b) {
             using T = std::decay_t<decltype(b)>;
             if constexpr (
                 std::is_same_v<T, std::reference_wrapper<Buffer>> || std::is_same_v<T, std::shared_ptr<Buffer>>) {
-                cq.hw_command_queue().enqueue_read_buffer(b, dst, blocking);
+                cq.hw_command_queue().enqueue_read_buffer(b, dst, blocking, sub_device_ids);
             }
         },
         buffer);
@@ -3018,8 +3225,9 @@ void EnqueueWriteBufferImpl(
     CommandQueue& cq,
     std::variant<std::reference_wrapper<Buffer>, std::shared_ptr<Buffer>> buffer,
     HostDataType src,
-    bool blocking) {
-    cq.hw_command_queue().enqueue_write_buffer(buffer, src, blocking);
+    bool blocking,
+    tt::stl::Span<const uint32_t> sub_device_ids) {
+    cq.hw_command_queue().enqueue_write_buffer(buffer, src, blocking, sub_device_ids);
 }
 
 void EnqueueProgramImpl(
@@ -3037,8 +3245,8 @@ void EnqueueProgramImpl(
 
 }
 
-void EnqueueRecordEventImpl(CommandQueue& cq, const std::shared_ptr<Event>& event) {
-    cq.hw_command_queue().enqueue_record_event(event);
+void EnqueueRecordEventImpl(CommandQueue& cq, const std::shared_ptr<Event>& event, tt::stl::Span<const SubDeviceId> sub_device_ids) {
+    cq.hw_command_queue().enqueue_record_event(event, false, sub_device_ids);
 }
 
 void EnqueueWaitForEventImpl(CommandQueue& cq, const std::shared_ptr<Event>& event) {
@@ -3054,7 +3262,7 @@ void EnqueueWaitForEventImpl(CommandQueue& cq, const std::shared_ptr<Event>& eve
     cq.hw_command_queue().enqueue_wait_for_event(event);
 }
 
-void FinishImpl(CommandQueue& cq) { cq.hw_command_queue().finish(); }
+void FinishImpl(CommandQueue& cq, tt::stl::Span<const uint32_t> sub_device_ids) { cq.hw_command_queue().finish(sub_device_ids); }
 
 void EnqueueTraceImpl(CommandQueue& cq, uint32_t trace_id, bool blocking) {
     cq.hw_command_queue().enqueue_trace(trace_id, blocking);
@@ -3218,13 +3426,13 @@ void CommandQueue::run_command_impl(const CommandInterface& command) {
             TT_ASSERT(command.dst.has_value(), "Must provide a dst!");
             TT_ASSERT(command.buffer.has_value(), "Must provide a buffer!");
             TT_ASSERT(command.blocking.has_value(), "Must specify blocking value!");
-            EnqueueReadBufferImpl(*this, command.buffer.value(), command.dst.value(), command.blocking.value());
+            EnqueueReadBufferImpl(*this, command.buffer.value(), command.dst.value(), command.blocking.value(), command.sub_device_ids);
             break;
         case EnqueueCommandType::ENQUEUE_WRITE_BUFFER:
             TT_ASSERT(command.src.has_value(), "Must provide a src!");
             TT_ASSERT(command.buffer.has_value(), "Must provide a buffer!");
             TT_ASSERT(command.blocking.has_value(), "Must specify blocking value!");
-            EnqueueWriteBufferImpl(*this, command.buffer.value(), command.src.value(), command.blocking.value());
+            EnqueueWriteBufferImpl(*this, command.buffer.value(), command.src.value(), command.blocking.value(), command.sub_device_ids);
             break;
         case EnqueueCommandType::GET_BUF_ADDR:
             TT_ASSERT(command.dst.has_value(), "Must provide a dst address!");
@@ -3250,13 +3458,13 @@ void CommandQueue::run_command_impl(const CommandInterface& command) {
             break;
         case EnqueueCommandType::ENQUEUE_RECORD_EVENT:
             TT_ASSERT(command.event.has_value(), "Must provide an event!");
-            EnqueueRecordEventImpl(*this, command.event.value());
+            EnqueueRecordEventImpl(*this, command.event.value(), command.sub_device_ids);
             break;
         case EnqueueCommandType::ENQUEUE_WAIT_FOR_EVENT:
             TT_ASSERT(command.event.has_value(), "Must provide an event!");
             EnqueueWaitForEventImpl(*this, command.event.value());
             break;
-        case EnqueueCommandType::FINISH: FinishImpl(*this); break;
+        case EnqueueCommandType::FINISH: FinishImpl(*this, command.sub_device_ids); break;
         case EnqueueCommandType::FLUSH:
             // Used by CQ to push prior commands
             break;
diff --git a/tt_metal/impl/dispatch/command_queue.hpp b/tt_metal/impl/dispatch/command_queue.hpp
index 7ba939df748..4db17e205ad 100644
--- a/tt_metal/impl/dispatch/command_queue.hpp
+++ b/tt_metal/impl/dispatch/command_queue.hpp
@@ -79,7 +79,7 @@ class EnqueueReadBufferCommand : public Command {
     Device* device;
     uint32_t command_queue_id;
     NOC noc_index;
-    uint32_t expected_num_workers_completed;
+    tt::stl::Span<const std::pair<uint32_t, uint32_t>> expected_num_workers_completed;
     uint32_t src_page_index;
     uint32_t pages_to_read;
 
@@ -92,7 +92,7 @@ class EnqueueReadBufferCommand : public Command {
         Buffer& buffer,
         void* dst,
         SystemMemoryManager& manager,
-        uint32_t expected_num_workers_completed,
+        tt::stl::Span<const std::pair<uint32_t, uint32_t>> expected_num_workers_completed,
         uint32_t src_page_index = 0,
         std::optional<uint32_t> pages_to_read = std::nullopt);
 
@@ -115,7 +115,7 @@ class EnqueueReadInterleavedBufferCommand : public EnqueueReadBufferCommand {
         Buffer& buffer,
         void* dst,
         SystemMemoryManager& manager,
-        uint32_t expected_num_workers_completed,
+        tt::stl::Span<const std::pair<uint32_t, uint32_t>> expected_num_workers_completed,
         uint32_t src_page_index = 0,
         std::optional<uint32_t> pages_to_read = std::nullopt) :
         EnqueueReadBufferCommand(
@@ -144,7 +144,7 @@ class EnqueueReadShardedBufferCommand : public EnqueueReadBufferCommand {
         Buffer& buffer,
         void* dst,
         SystemMemoryManager& manager,
-        uint32_t expected_num_workers_completed,
+        tt::stl::Span<const std::pair<uint32_t, uint32_t>> expected_num_workers_completed,
         const CoreCoord& core,
         uint32_t bank_base_address,
         uint32_t src_page_index = 0,
@@ -179,7 +179,7 @@ class EnqueueWriteBufferCommand : public Command {
     NOC noc_index;
     const void* src;
     const Buffer& buffer;
-    uint32_t expected_num_workers_completed;
+    tt::stl::Span<const std::pair<uint32_t, uint32_t>> expected_num_workers_completed;
     uint32_t bank_base_address;
     uint32_t padded_page_size;
     uint32_t dst_page_index;
@@ -195,7 +195,7 @@ class EnqueueWriteBufferCommand : public Command {
         const void* src,
         SystemMemoryManager& manager,
         bool issue_wait,
-        uint32_t expected_num_workers_completed,
+        tt::stl::Span<const std::pair<uint32_t, uint32_t>> expected_num_workers_completed,
         uint32_t bank_base_address,
         uint32_t padded_page_size,
         uint32_t dst_page_index = 0,
@@ -222,7 +222,7 @@ class EnqueueWriteInterleavedBufferCommand : public EnqueueWriteBufferCommand {
         const void* src,
         SystemMemoryManager& manager,
         bool issue_wait,
-        uint32_t expected_num_workers_completed,
+        tt::stl::Span<const std::pair<uint32_t, uint32_t>> expected_num_workers_completed,
         uint32_t bank_base_address,
         uint32_t padded_page_size,
         uint32_t dst_page_index = 0,
@@ -261,7 +261,7 @@ class EnqueueWriteShardedBufferCommand : public EnqueueWriteBufferCommand {
         const void* src,
         SystemMemoryManager& manager,
         bool issue_wait,
-        uint32_t expected_num_workers_completed,
+        tt::stl::Span<const std::pair<uint32_t, uint32_t>> expected_num_workers_completed,
         uint32_t bank_base_address,
         const std::shared_ptr<const BufferPageMapping>& buffer_page_mapping,
         const CoreCoord& core,
@@ -302,6 +302,8 @@ class EnqueueProgramCommand : public Command {
     uint32_t dispatch_message_addr;
     uint32_t multicast_cores_launch_message_wptr = 0;
     uint32_t unicast_cores_launch_message_wptr = 0;
+    // TODO: There will be multiple ids once programs support spanning multiple sub_devices
+    uint32_t sub_device_id = 0;
 
    public:
     EnqueueProgramCommand(
@@ -314,7 +316,8 @@ class EnqueueProgramCommand : public Command {
         WorkerConfigBufferMgr& config_buffer_mgr,
         uint32_t expected_num_workers_completed,
         uint32_t multicast_cores_launch_message_wptr,
-        uint32_t unicast_cores_launch_message_wptr);
+        uint32_t unicast_cores_launch_message_wptr,
+        uint32_t sub_device_id);
 
     void assemble_preamble_commands(
         ProgramCommandSequence& program_command_sequence, const tt::stl::Span<ConfigBufferEntry> kernel_config_addrs);
@@ -343,7 +346,7 @@ class EnqueueRecordEventCommand : public Command {
     NOC noc_index;
     SystemMemoryManager& manager;
     uint32_t event_id;
-    uint32_t expected_num_workers_completed;
+    tt::stl::Span<const std::pair<uint32_t, uint32_t>> expected_num_workers_completed;
     bool clear_count;
     bool write_barrier;
 
@@ -354,7 +357,7 @@ class EnqueueRecordEventCommand : public Command {
         NOC noc_index,
         SystemMemoryManager& manager,
         uint32_t event_id,
-        uint32_t expected_num_workers_completed,
+        tt::stl::Span<const std::pair<uint32_t, uint32_t>> expected_num_workers_completed,
         bool clear_count = false,
         bool write_barrier = true);
 
@@ -395,8 +398,8 @@ class EnqueueTraceCommand : public Command {
     Buffer& buffer;
     Device* device;
     SystemMemoryManager& manager;
-    std::shared_ptr<detail::TraceDescriptor>& desc;
-    uint32_t& expected_num_workers_completed;
+    std::shared_ptr<detail::TraceDescriptor>& descriptor;
+    std::array<uint32_t, dispatch_constants::DISPATCH_MESSAGE_ENTRIES>& expected_num_workers_completed;
     bool clear_count;
     NOC noc_index;
     CoreCoord dispatch_core;
@@ -405,9 +408,9 @@ class EnqueueTraceCommand : public Command {
         uint32_t command_queue_id,
         Device* device,
         SystemMemoryManager& manager,
-        std::shared_ptr<detail::TraceDescriptor>& desc,
+        std::shared_ptr<detail::TraceDescriptor>& descriptor,
         Buffer& buffer,
-        uint32_t& expected_num_workers_completed,
+        std::array<uint32_t, dispatch_constants::DISPATCH_MESSAGE_ENTRIES>& expected_num_workers_completed,
         NOC noc_index,
         CoreCoord dispatch_core);
 
@@ -507,7 +510,9 @@ class HWCommandQueue {
 
     void record_begin(const uint32_t tid, std::shared_ptr<detail::TraceDescriptor> ctx);
     void record_end();
-    void set_unicast_only_cores_on_dispatch(const std::vector<uint32_t>& unicast_only_noc_encodings);
+    void set_num_worker_sems_on_dispatch(uint32_t num_worker_sems);
+    void reset_worker_state(bool reset_launch_msg_state);
+
    private:
     uint32_t id;
     uint32_t size_B;
@@ -515,11 +520,11 @@ class HWCommandQueue {
     std::shared_ptr<detail::TraceDescriptor> trace_ctx;
     std::thread completion_queue_thread;
     SystemMemoryManager& manager;
-    WorkerConfigBufferMgr config_buffer_mgr;
+    std::array<tt::tt_metal::WorkerConfigBufferMgr, dispatch_constants::DISPATCH_MESSAGE_ENTRIES> config_buffer_mgr;
     // Expected value of DISPATCH_MESSAGE_ADDR in dispatch core L1
     //  Value in L1 incremented by worker to signal completion to dispatch. Value on host is set on each enqueue program
     //  call
-    uint32_t expected_num_workers_completed;
+    std::array<uint32_t, dispatch_constants::DISPATCH_MESSAGE_ENTRIES> expected_num_workers_completed;
 
     volatile bool exit_condition;
     volatile bool dprint_server_hang = false;
@@ -533,8 +538,8 @@ class HWCommandQueue {
     // Trace capture is a fully host side operation, but it modifies the state of the wptrs above
     // To ensure that host and device are not out of sync, we reset the wptrs to their original values
     // post trace capture.
-    uint32_t multicast_cores_launch_message_wptr_reset = 0;
-    uint32_t unicast_cores_launch_message_wptr_reset = 0;
+    std::array<uint32_t, dispatch_constants::DISPATCH_MESSAGE_ENTRIES> multicast_cores_launch_message_wptr_reset;
+    std::array<uint32_t, dispatch_constants::DISPATCH_MESSAGE_ENTRIES> unicast_cores_launch_message_wptr_reset;
     Device* device;
 
     std::condition_variable reader_thread_cv;
@@ -548,23 +553,28 @@ class HWCommandQueue {
         const detail::ReadBufferDescriptor& read_buffer_descriptor, chip_id_t mmio_device_id, uint16_t channel);
     void read_completion_queue();
 
+    // sub_device_ids only needs to be passed when blocking and there are specific sub_devices to wait on
     template <typename T>
-    void enqueue_command(T& command, bool blocking);
+    void enqueue_command(T& command, bool blocking, tt::stl::Span<const uint32_t> sub_device_ids);
 
-    void enqueue_read_buffer(std::shared_ptr<Buffer>& buffer, void* dst, bool blocking);
-    void enqueue_read_buffer(Buffer& buffer, void* dst, bool blocking);
+    void enqueue_read_buffer(std::shared_ptr<Buffer>& buffer, void* dst, bool blocking, tt::stl::Span<const uint32_t> sub_device_ids);
+    void enqueue_read_buffer(Buffer& buffer, void* dst, bool blocking, tt::stl::Span<const uint32_t> sub_device_ids);
     void enqueue_write_buffer(
-        std::variant<std::reference_wrapper<Buffer>, std::shared_ptr<Buffer>> buffer, HostDataType src, bool blocking);
-    void enqueue_write_buffer(Buffer& buffer, const void* src, bool blocking);
+        std::variant<std::reference_wrapper<Buffer>, std::shared_ptr<Buffer>> buffer, HostDataType src, bool blocking, tt::stl::Span<const uint32_t> sub_device_ids);
+    void enqueue_write_buffer(Buffer& buffer, const void* src, bool blocking, tt::stl::Span<const uint32_t> sub_device_ids);
     void enqueue_program(Program& program, bool blocking);
-    void enqueue_record_event(const std::shared_ptr<Event>& event, bool clear_count = false);
+    void enqueue_record_event(const std::shared_ptr<Event>& event, bool clear_count = false, tt::stl::Span<const uint32_t> sub_device_ids = {});
     void enqueue_wait_for_event(const std::shared_ptr<Event>& sync_event, bool clear_count = false);
     void enqueue_trace(const uint32_t trace_id, bool blocking);
-    void finish();
+    void finish(tt::stl::Span<const uint32_t> sub_device_ids);
     void terminate();
     void increment_num_entries_in_completion_q();
     void set_exit_condition();
-    WorkerConfigBufferMgr& get_config_buffer_mgr() { return this->config_buffer_mgr; }
+
+    WorkerConfigBufferMgr& get_config_buffer_mgr(uint32_t index);
+    void reset_config_buffer_mgr(const uint32_t max_index);
+    std::vector<std::pair<uint32_t, uint32_t>> get_expected_workers_completed(tt::stl::Span<const uint32_t> sub_device_ids) const;
+
     friend void EnqueueTraceImpl(CommandQueue& cq, uint32_t trace_id, bool blocking);
     friend void EnqueueProgramImpl(
         CommandQueue& cq,
@@ -574,17 +584,18 @@ class HWCommandQueue {
         CommandQueue& cq,
         std::variant<std::reference_wrapper<Buffer>, std::shared_ptr<Buffer>> buffer,
         void* dst,
-        bool blocking);
+        bool blocking,
+        tt::stl::Span<const uint32_t> sub_device_ids);
     friend void EnqueueWriteBufferImpl(
         CommandQueue& cq,
         std::variant<std::reference_wrapper<Buffer>, std::shared_ptr<Buffer>> buffer,
         HostDataType src,
-        bool blocking);
+        bool blocking,
+        tt::stl::Span<const uint32_t> sub_device_ids);
     friend void EnqueueGetBufferAddrImpl(void* dst_buf_addr, const Buffer* buffer);
-    friend void EnqueueRecordEventImpl(CommandQueue& cq, const std::shared_ptr<Event>& event);
+    friend void EnqueueRecordEventImpl(CommandQueue& cq, const std::shared_ptr<Event>& event, tt::stl::Span<const SubDeviceId> sub_device_ids);
     friend void EnqueueWaitForEventImpl(CommandQueue& cq, const std::shared_ptr<Event>& event);
-    friend void FinishImpl(CommandQueue& cq);
-    friend void EnqueueRecordEvent(CommandQueue& cq, const std::shared_ptr<Event>& event);
+    friend void FinishImpl(CommandQueue& cq, tt::stl::Span<const uint32_t> sub_device_ids);
     friend CommandQueue;
     friend Device;
     friend detail::Program_;
@@ -602,6 +613,7 @@ struct CommandInterface {
     std::optional<void*> dst;
     std::optional<std::shared_ptr<Event>> event;
     std::optional<uint32_t> trace_id;
+    tt::stl::Span<const uint32_t> sub_device_ids;
 };
 
 inline namespace v0 {
diff --git a/tt_metal/impl/dispatch/command_queue_interface.hpp b/tt_metal/impl/dispatch/command_queue_interface.hpp
index 346cbcbc784..40ad90a523b 100644
--- a/tt_metal/impl/dispatch/command_queue_interface.hpp
+++ b/tt_metal/impl/dispatch/command_queue_interface.hpp
@@ -33,8 +33,9 @@ enum class CommandQueueDeviceAddrType : uint8_t {
     // Max of 2 CQs. COMPLETION_Q*_LAST_EVENT_PTR track the last completed event in the respective CQs
     COMPLETION_Q0_LAST_EVENT = 4,
     COMPLETION_Q1_LAST_EVENT = 5,
-    DISPATCH_MESSAGE = 6,
-    UNRESERVED = 7
+    DISPATCH_S_SYNC_SEM = 6,
+    DISPATCH_MESSAGE = 7,
+    UNRESERVED = 8
 };
 
 enum class CommandQueueHostAddrType : uint8_t {
@@ -63,8 +64,12 @@ struct dispatch_constants {
         return *inst;
     }
 
+    using prefetch_q_entry_type = uint16_t;
+
     static constexpr uint8_t MAX_NUM_HW_CQS = 2;
-    typedef uint16_t prefetch_q_entry_type;
+    static constexpr uint32_t DISPATCH_MESSAGE_ENTRIES = 16;
+    static constexpr uint32_t DISPATCH_MESSAGES_MAX_OFFSET = std::numeric_limits<decltype(go_msg_t::dispatch_message_offset)>::max();
+
     static constexpr uint32_t PREFETCH_Q_LOG_MINSIZE = 4;
 
     static constexpr uint32_t LOG_TRANSFER_PAGE_SIZE = 12;
@@ -127,6 +132,12 @@ struct dispatch_constants {
         return tt::utils::underlying_type<CommandQueueHostAddrType>(host_addr) * tt::tt_metal::hal.get_alignment(tt::tt_metal::HalMemType::HOST);
     }
 
+    uint32_t get_dispatch_message_offset(uint32_t index) const {
+        TT_ASSERT(index < DISPATCH_MESSAGE_ENTRIES);
+        uint32_t offset = index * hal.get_alignment(HalMemType::L1);
+        return offset;
+    }
+
    private:
     dispatch_constants(const CoreType &core_type, const uint32_t num_hw_cqs) {
         TT_ASSERT(core_type == CoreType::WORKER or core_type == CoreType::ETH);
@@ -159,6 +170,7 @@ struct dispatch_constants {
         TT_ASSERT(cmddat_q_size_ >= 2 * max_prefetch_command_size_);
         TT_ASSERT(scratch_db_size_ % 2 == 0);
         TT_ASSERT((dispatch_buffer_block_size & (dispatch_buffer_block_size - 1)) == 0);
+        TT_ASSERT(DISPATCH_MESSAGE_ENTRIES <= DISPATCH_MESSAGES_MAX_OFFSET / L1_ALIGNMENT + 1, "Number of dispatch message entries exceeds max representable offset");
 
         uint32_t pcie_alignment = tt::tt_metal::hal.get_alignment(tt::tt_metal::HalMemType::HOST);
         uint32_t l1_alignment = tt::tt_metal::hal.get_alignment(tt::tt_metal::HalMemType::L1);
@@ -170,8 +182,10 @@ struct dispatch_constants {
                 device_cq_addr_sizes_[dev_addr_idx] = sizeof(uint32_t);
             } else if (dev_addr_type == CommandQueueDeviceAddrType::PREFETCH_Q_PCIE_RD) {
                 device_cq_addr_sizes_[dev_addr_idx] = l1_alignment - sizeof(uint32_t);
+            } else if (dev_addr_type == CommandQueueDeviceAddrType::DISPATCH_S_SYNC_SEM) {
+                device_cq_addr_sizes_[dev_addr_idx] = DISPATCH_MESSAGE_ENTRIES * l1_alignment;
             } else if (dev_addr_type == CommandQueueDeviceAddrType::DISPATCH_MESSAGE) {
-                device_cq_addr_sizes_[dev_addr_idx] = 32; // Should this be 2x l1_alignment?
+                device_cq_addr_sizes_[dev_addr_idx] = DISPATCH_MESSAGE_ENTRIES * l1_alignment;
             } else {
                 device_cq_addr_sizes_[dev_addr_idx] = l1_alignment;
             }
@@ -531,7 +545,7 @@ class SystemMemoryManager {
 
     bool get_bypass_mode() { return this->bypass_enable; }
 
-    std::vector<uint32_t> get_bypass_data() { return std::move(this->bypass_buffer); }
+    std::vector<uint32_t>& get_bypass_data() { return this->bypass_buffer; }
 
     uint32_t get_issue_queue_size(const uint8_t cq_id) const { return this->cq_interfaces[cq_id].issue_fifo_size << 4; }
 
diff --git a/tt_metal/impl/dispatch/cq_commands.hpp b/tt_metal/impl/dispatch/cq_commands.hpp
index 675be8dd774..aa30a0ba85e 100644
--- a/tt_metal/impl/dispatch/cq_commands.hpp
+++ b/tt_metal/impl/dispatch/cq_commands.hpp
@@ -49,7 +49,7 @@ enum CQDispatchCmdId : uint8_t {
     CQ_DISPATCH_CMD_TERMINATE = 14,         // quit
     CQ_DISPATCH_CMD_SEND_GO_SIGNAL = 15,
     CQ_DISPATCH_NOTIFY_SLAVE_GO_SIGNAL = 16,
-    CQ_DISPATCH_SET_UNICAST_ONLY_CORES = 17,
+    CQ_DISPATCH_SET_NUM_WORKER_SEMS = 17,
     CQ_DISPATCH_CMD_MAX_COUNT,              // for checking legal IDs
 };
 
@@ -259,7 +259,8 @@ struct CQDispatchSetUnicastOnlyCoresCmd {
 
 struct CQDispatchGoSignalMcastCmd {
     uint32_t go_signal;
-    uint8_t mcast_flag; // mcast or unicast or both
+    uint8_t num_mcast_txns; // Cmd expects noc_mcast_coords and num_mcast_dests follow the cmd
+    uint8_t num_unicast_txns; // Cmd expects noc_unicast_coords to follow the mcast data
     uint32_t wait_count;
     uint32_t wait_addr;
 } __attribute__((packed));
@@ -267,10 +268,16 @@ struct CQDispatchGoSignalMcastCmd {
 struct CQDispatchNotifySlaveGoSignalCmd {
     // sends a counter update to dispatch_s when it sees this cmd
     uint8_t wait; // if true, issue a write barrier before sending signal to dispatch_s
-    uint16_t pad2;
+    uint16_t index_bitmask;
     uint32_t pad3;
 } __attribute__((packed));
 
+struct CQDispatchSetNumWorkerSemsCmd {
+    uint8_t pad1;
+    uint16_t pad2;
+    uint32_t num_worker_sems;
+} __attribute__ ((packed));
+
 struct CQDispatchCmd {
     CQDispatchBaseCmd base;
 
@@ -287,6 +294,7 @@ struct CQDispatchCmd {
         CQDispatchGoSignalMcastCmd mcast;
         CQDispatchSetUnicastOnlyCoresCmd set_unicast_only_cores;
         CQDispatchNotifySlaveGoSignalCmd notify_dispatch_s_go_signal;
+        CQDispatchSetNumWorkerSemsCmd set_num_worker_sems;
     } __attribute__((packed));
 };
 
diff --git a/tt_metal/impl/dispatch/debug_tools.cpp b/tt_metal/impl/dispatch/debug_tools.cpp
index ea5141443b6..66ca865673d 100644
--- a/tt_metal/impl/dispatch/debug_tools.cpp
+++ b/tt_metal/impl/dispatch/debug_tools.cpp
@@ -178,6 +178,10 @@ uint32_t dump_dispatch_cmd(CQDispatchCmd *cmd, uint32_t cmd_addr, std::ofstream
                     val(cmd->debug.stride));
                 break;
             case CQ_DISPATCH_CMD_DELAY: cq_file << fmt::format(" (delay={})", val(cmd->delay.delay)); break;
+            case CQ_DISPATCH_SET_NUM_WORKER_SEMS:
+                cq_file << fmt::format(
+                    " (num_worker_sems={})", val(cmd->set_num_worker_sems.num_worker_sems));
+                break;
             // These commands don't have any additional data to dump.
             case CQ_DISPATCH_CMD_ILLEGAL: break;
             case CQ_DISPATCH_CMD_GO: break;
@@ -185,7 +189,6 @@ uint32_t dump_dispatch_cmd(CQDispatchCmd *cmd, uint32_t cmd_addr, std::ofstream
             case CQ_DISPATCH_CMD_EXEC_BUF_END: break;
             case CQ_DISPATCH_CMD_SEND_GO_SIGNAL: break;
             case CQ_DISPATCH_NOTIFY_SLAVE_GO_SIGNAL: break;
-            case CQ_DISPATCH_SET_UNICAST_ONLY_CORES: break;
             case CQ_DISPATCH_CMD_TERMINATE: break;
             case CQ_DISPATCH_CMD_SET_WRITE_OFFSET: break;
             default: TT_THROW("Unrecognized dispatch command: {}", cmd_id); break;
diff --git a/tt_metal/impl/dispatch/device_command.hpp b/tt_metal/impl/dispatch/device_command.hpp
index 2e0decaae05..e070e7b4c12 100644
--- a/tt_metal/impl/dispatch/device_command.hpp
+++ b/tt_metal/impl/dispatch/device_command.hpp
@@ -93,7 +93,7 @@ class DeviceCommand {
             relay_wait->base.cmd_id = CQ_PREFETCH_CMD_RELAY_INLINE;
             relay_wait->relay_inline.dispatcher_type = dispatcher_type;
             relay_wait->relay_inline.length = sizeof(CQDispatchCmd);
-            relay_wait->relay_inline.stride = this->pcie_alignment;
+            relay_wait->relay_inline.stride = align(sizeof(CQDispatchCmd) + sizeof(CQPrefetchCmd), this->pcie_alignment);
 
             wait_cmd->base.cmd_id = CQ_DISPATCH_CMD_WAIT;
             wait_cmd->wait.barrier = barrier;
@@ -250,14 +250,27 @@ class DeviceCommand {
         }
     }
 
-    void add_dispatch_go_signal_mcast(uint32_t wait_count, uint8_t mcast_flag, uint32_t go_signal, uint32_t wait_addr, DispatcherSelect dispatcher_type) {
-        this->add_prefetch_relay_inline(true, sizeof(CQDispatchCmd), dispatcher_type);
+    void add_dispatch_go_signal_mcast(
+        uint32_t wait_count,
+        uint32_t go_signal,
+        uint32_t wait_addr,
+        uint32_t num_mcast_txns,
+        uint32_t num_unicast_txns,
+        const vector_memcpy_aligned<uint32_t> &noc_mcast_unicast_data,
+        DispatcherSelect dispatcher_type) {
+        TT_ASSERT(num_mcast_txns <= std::numeric_limits<uint8_t>::max(), "Number of mcast destinations {} exceeds maximum {}", num_mcast_txns, std::numeric_limits<uint8_t>::max());
+        TT_ASSERT(num_unicast_txns <= std::numeric_limits<uint8_t>::max(), "Number of unicast destinations {} exceeds maximum {}", num_unicast_txns, std::numeric_limits<uint8_t>::max());
+        uint32_t total_data_size = noc_mcast_unicast_data.size() * sizeof(uint32_t);
+        uint32_t lengthB = sizeof(CQDispatchCmd) + total_data_size;
+        TT_ASSERT(lengthB <= (1 << dispatch_constants::DISPATCH_BUFFER_LOG_PAGE_SIZE), "Data for go signal mcast must fit within one page");
+        this->add_prefetch_relay_inline(true, lengthB, dispatcher_type);
         auto initialize_mcast_cmd = [&](CQDispatchCmd *mcast_cmd) {
             *mcast_cmd = {};
             mcast_cmd->base.cmd_id = CQ_DISPATCH_CMD_SEND_GO_SIGNAL;
             mcast_cmd->mcast.go_signal = go_signal;
             mcast_cmd->mcast.wait_count = wait_count;
-            mcast_cmd->mcast.mcast_flag = mcast_flag;
+            mcast_cmd->mcast.num_mcast_txns = num_mcast_txns;
+            mcast_cmd->mcast.num_unicast_txns = num_unicast_txns;
             mcast_cmd->mcast.wait_addr = wait_addr;
         };
         CQDispatchCmd *mcast_cmd_dst = this->reserve_space<CQDispatchCmd *>(sizeof(CQDispatchCmd));
@@ -269,16 +282,19 @@ class DeviceCommand {
         } else {
             initialize_mcast_cmd(mcast_cmd_dst);
         }
+        uint8_t * noc_coord_dst = this->reserve_space<uint8_t *>(total_data_size);
+        this->memcpy(noc_coord_dst, noc_mcast_unicast_data.data(), total_data_size);
         this->cmd_write_offsetB = align(this->cmd_write_offsetB, this->pcie_alignment);
     }
 
-    void add_notify_dispatch_s_go_signal_cmd(uint8_t wait) {
+    void add_notify_dispatch_s_go_signal_cmd(uint8_t wait, uint16_t index_bitmask) {
         // Command to have dispatch_master send a notification to dispatch_slave
         this->add_prefetch_relay_inline(true, sizeof(CQDispatchCmd), DispatcherSelect::DISPATCH_MASTER);
         auto initialize_sem_update_cmd = [&](CQDispatchCmd *sem_update_cmd) {
             *sem_update_cmd = {};
             sem_update_cmd->base.cmd_id = CQ_DISPATCH_NOTIFY_SLAVE_GO_SIGNAL;
             sem_update_cmd->notify_dispatch_s_go_signal.wait = wait;
+            sem_update_cmd->notify_dispatch_s_go_signal.index_bitmask = index_bitmask;
         };
         CQDispatchCmd *dispatch_s_sem_update_dst = this->reserve_space<CQDispatchCmd *>(sizeof(CQDispatchCmd));
         if constexpr (hugepage_write) {
@@ -375,26 +391,22 @@ class DeviceCommand {
             initialize_exec_buf_cmd(exec_buf_cmd_dst);
         }
     }
-    void add_dispatch_set_unicast_only_cores(const std::vector<uint32_t>& noc_encodings, DispatcherSelect dispatcher_type) {
-        // noc_encodings are only populated if the device has active ethernet links. For devices such as Grayskull and N150, which
-        // don't have active ethernet links, this is essentially a NOP (command with empty payload).
-        this->add_prefetch_relay_inline(true, sizeof(CQDispatchCmd) + noc_encodings.size() * sizeof(uint32_t), dispatcher_type);
-        auto initialize_set_unicast_only_cores_cmd = [&] (CQDispatchCmd *set_unicast_only_cores_cmd) {
-            *set_unicast_only_cores_cmd = {};
-            set_unicast_only_cores_cmd->base.cmd_id = CQ_DISPATCH_SET_UNICAST_ONLY_CORES;
-            set_unicast_only_cores_cmd->set_unicast_only_cores.num_unicast_only_cores = noc_encodings.size();
+
+    void add_dispatch_set_num_worker_sems(const uint32_t num_worker_sems, DispatcherSelect dispatcher_type) {
+        this->add_prefetch_relay_inline(true, sizeof(CQDispatchCmd), dispatcher_type);
+        auto initialize_set_num_worker_sems_cmd = [&] (CQDispatchCmd *set_num_worker_sems_cmd) {
+            set_num_worker_sems_cmd->base.cmd_id = CQ_DISPATCH_SET_NUM_WORKER_SEMS;
+            set_num_worker_sems_cmd->set_num_worker_sems.num_worker_sems = num_worker_sems;
         };
-        CQDispatchCmd *set_unicast_only_cores_cmd_dst = this->reserve_space<CQDispatchCmd *>(sizeof(CQDispatchCmd));
+        CQDispatchCmd *set_num_worker_sems_cmd_dst = this->reserve_space<CQDispatchCmd *>(sizeof(CQDispatchCmd));
         if constexpr (hugepage_write) {
-            alignas(MEMCPY_ALIGNMENT) CQDispatchCmd set_unicast_only_cores_cmd;
-            initialize_set_unicast_only_cores_cmd(&set_unicast_only_cores_cmd);
-            this->memcpy(set_unicast_only_cores_cmd_dst, &set_unicast_only_cores_cmd, sizeof(CQDispatchCmd));
+            alignas(MEMCPY_ALIGNMENT) CQDispatchCmd set_num_worker_sems_cmd;
+            initialize_set_num_worker_sems_cmd(&set_num_worker_sems_cmd);
+            this->memcpy(set_num_worker_sems_cmd_dst, &set_num_worker_sems_cmd, sizeof(CQDispatchCmd));
         } else {
-            initialize_set_unicast_only_cores_cmd(set_unicast_only_cores_cmd_dst);
+            initialize_set_num_worker_sems_cmd(set_num_worker_sems_cmd_dst);
         }
-        uint32_t data_sizeB = noc_encodings.size() * sizeof(uint32_t);
-        uint32_t increment_sizeB = align(data_sizeB, this->pcie_alignment);
-        this->add_data(noc_encodings.data(), data_sizeB, increment_sizeB);
+        this->cmd_write_offsetB = align(this->cmd_write_offsetB, this->pcie_alignment);
     }
 
     void add_dispatch_set_write_offsets(uint32_t write_offset0, uint32_t write_offset1, uint32_t write_offset2) {
diff --git a/tt_metal/impl/dispatch/dispatch_core_manager.hpp b/tt_metal/impl/dispatch/dispatch_core_manager.hpp
index bc700ac6219..5fba1392b21 100644
--- a/tt_metal/impl/dispatch/dispatch_core_manager.hpp
+++ b/tt_metal/impl/dispatch/dispatch_core_manager.hpp
@@ -70,7 +70,6 @@ struct dispatch_worker_build_settings_t{
     uint32_t cb_pages;
     uint32_t tunnel_stop;
     uint32_t num_compute_cores;
-    uint32_t compute_core_mcast_noc_coords;
     uint32_t vc_count;
 };
 
diff --git a/tt_metal/impl/dispatch/kernels/cq_dispatch.cpp b/tt_metal/impl/dispatch/kernels/cq_dispatch.cpp
index 384a1793a7d..14345084738 100644
--- a/tt_metal/impl/dispatch/kernels/cq_dispatch.cpp
+++ b/tt_metal/impl/dispatch/kernels/cq_dispatch.cpp
@@ -41,8 +41,8 @@ constexpr uint32_t prefetch_h_noc_xy = get_compile_time_arg_val(16);
 constexpr uint32_t prefetch_h_local_downstream_sem_addr = get_compile_time_arg_val(17);
 constexpr uint32_t prefetch_h_max_credits = get_compile_time_arg_val(18);
 constexpr uint32_t packed_write_max_unicast_sub_cmds = get_compile_time_arg_val(19); // Number of cores in compute grid
-constexpr uint32_t dispatch_s_sem_id = get_compile_time_arg_val(20);
-constexpr uint32_t worker_mcast_grid = get_compile_time_arg_val(21);
+constexpr uint32_t dispatch_s_sync_sem_base_addr = get_compile_time_arg_val(20);
+constexpr uint32_t max_num_worker_sems = get_compile_time_arg_val(21); // maximum number of worker semaphores
 constexpr uint32_t mcast_go_signal_addr = get_compile_time_arg_val(22);
 constexpr uint32_t unicast_go_signal_addr = get_compile_time_arg_val(23);
 constexpr uint32_t distributed_dispatcher = get_compile_time_arg_val(24);
@@ -110,9 +110,6 @@ typedef struct GoSignalState {
 static GoSignalState go_signal_state_ring_buf[4];
 static uint8_t go_signal_state_wr_ptr = 0;
 static uint8_t go_signal_state_rd_ptr = 0;
-// Used when dispatch_s is moved into main dispatcher and needs to unicast + multicast go signals
-static uint32_t unicast_only_cores[16];
-static int num_unicast_cores = -1; // Initialize to -1: Number of cores we need to unicast go signals to. Host will set this during init.
 
 FORCE_INLINE volatile uint32_t *get_cq_completion_read_ptr() {
     return reinterpret_cast<volatile uint32_t *>(dev_completion_q_rd_ptr);
@@ -822,30 +819,16 @@ void process_go_signal_mcast_cmd() {
     *aligned_go_signal_storage = cmd->mcast.go_signal;
 
     while (*worker_sem_addr < cmd->mcast.wait_count);
-    if (cmd->mcast.mcast_flag & GoSignalMcastSettings::SEND_MCAST) {
-        uint64_t dst = get_noc_addr_helper(worker_mcast_grid, mcast_go_signal_addr);
-        // packed_write_max_unicast_sub_cmds is the total number of compute cores (num_mcast_dests for this txn)
-        noc_async_write_multicast_one_packet((uint32_t)(aligned_go_signal_storage), dst, sizeof(uint32_t), packed_write_max_unicast_sub_cmds);
+    volatile uint32_t tt_l1_ptr *data_ptr = reinterpret_cast<volatile uint32_t tt_l1_ptr *>(cmd_ptr + sizeof(CQDispatchCmd));
+    for (uint32_t i = 0, num_mcasts = cmd->mcast.num_mcast_txns; i < num_mcasts; ++i) {
+        uint64_t dst = get_noc_addr_helper(*(data_ptr++), mcast_go_signal_addr);
+        noc_async_write_multicast_one_packet((uint32_t)(aligned_go_signal_storage), dst, sizeof(uint32_t), *(data_ptr++));
     }
-    if (cmd->mcast.mcast_flag & GoSignalMcastSettings::SEND_UNICAST) {
-        for (int core_idx = 0; core_idx < num_unicast_cores; core_idx++) {
-            uint64_t dst = get_noc_addr_helper(unicast_only_cores[core_idx], unicast_go_signal_addr);
-            noc_async_write_one_packet((uint32_t)(aligned_go_signal_storage), dst, sizeof(uint32_t));
-        }
+    for (uint32_t i = 0, num_unicasts = cmd->mcast.num_unicast_txns; i < num_unicasts; ++i) {
+        uint64_t dst = get_noc_addr_helper(*(data_ptr++), unicast_go_signal_addr);
+        noc_async_write_one_packet((uint32_t)(aligned_go_signal_storage), dst, sizeof(uint32_t));
     }
-    cmd_ptr += sizeof(CQDispatchCmd);
-}
-
-FORCE_INLINE
-void process_set_unicast_only_cores() {
-    volatile CQDispatchCmd tt_l1_ptr *cmd = (volatile CQDispatchCmd tt_l1_ptr *)cmd_ptr;
-    num_unicast_cores = (int)(cmd->set_unicast_only_cores.num_unicast_only_cores);
-    uint32_t data_ptr = cmd_ptr + sizeof(CQDispatchCmd);;
-    for (int core_idx = 0; core_idx < num_unicast_cores; core_idx++) {
-        unicast_only_cores[core_idx] = *((uint32_t tt_l1_ptr*)data_ptr);
-        data_ptr += sizeof(uint32_t);
-    }
-    cmd_ptr += sizeof(CQDispatchCmd) + num_unicast_cores * sizeof(uint32_t);
+    cmd_ptr = round_up_pow2((uint32_t)data_ptr, L1_ALIGNMENT);
 }
 
 FORCE_INLINE
@@ -858,14 +841,22 @@ void process_notify_dispatch_s_go_signal_cmd() {
         DPRINT << " DISPATCH_S_NOTIFY BARRIER\n";
         noc_async_write_barrier();
     }
-    if constexpr (distributed_dispatcher) {
-        uint64_t dispatch_s_notify_addr = get_noc_addr_helper(dispatch_s_noc_xy, get_semaphore<fd_core_type>(dispatch_s_sem_id));
-        static uint32_t num_go_signals_safe_to_send = 1;
-        noc_inline_dw_write(dispatch_s_notify_addr, num_go_signals_safe_to_send);
-        num_go_signals_safe_to_send++;
-    } else {
-        tt_l1_ptr uint32_t* notify_ptr = (uint32_t tt_l1_ptr*)(get_semaphore<fd_core_type>(dispatch_s_sem_id));
-        *notify_ptr = (*notify_ptr) + 1;
+    uint16_t index_bitmask = cmd->notify_dispatch_s_go_signal.index_bitmask;
+
+    while(index_bitmask != 0) {
+        uint32_t set_index = __builtin_ctz(index_bitmask);
+        uint32_t dispatch_s_sync_sem_addr = dispatch_s_sync_sem_base_addr + set_index * L1_ALIGNMENT;
+        if constexpr (distributed_dispatcher) {
+            static uint32_t num_go_signals_safe_to_send[max_num_worker_sems] = {0};
+            uint64_t dispatch_s_notify_addr = get_noc_addr_helper(dispatch_s_noc_xy, dispatch_s_sync_sem_addr);
+            num_go_signals_safe_to_send[set_index]++;
+            noc_inline_dw_write(dispatch_s_notify_addr, num_go_signals_safe_to_send[set_index]);
+        } else {
+            tt_l1_ptr uint32_t* notify_ptr = (uint32_t tt_l1_ptr*)(dispatch_s_sync_sem_addr);
+            *notify_ptr = (*notify_ptr) + 1;
+        }
+        // Unset the bit
+        index_bitmask &= index_bitmask - 1;
     }
     cmd_ptr += sizeof(CQDispatchCmd);
 }
@@ -969,9 +960,10 @@ static inline bool process_cmd_d(uint32_t &cmd_ptr, uint32_t* l1_cache, uint32_t
             process_go_signal_mcast_cmd();
             break;
 
-        case CQ_DISPATCH_SET_UNICAST_ONLY_CORES:
-            DPRINT << "cmd_set_unicast_only_cores" << ENDL();
-            process_set_unicast_only_cores();
+        case CQ_DISPATCH_SET_NUM_WORKER_SEMS:
+            DPRINT << "cmd_set_num_worker_sems" << ENDL();
+            // This command is only used by dispatch_s
+            cmd_ptr += sizeof(CQDispatchCmd);
             break;
 
         case CQ_DISPATCH_CMD_SET_WRITE_OFFSET:
diff --git a/tt_metal/impl/dispatch/kernels/cq_dispatch_slave.cpp b/tt_metal/impl/dispatch/kernels/cq_dispatch_slave.cpp
index 3ba5a9454fd..a651b7c04b2 100644
--- a/tt_metal/impl/dispatch/kernels/cq_dispatch_slave.cpp
+++ b/tt_metal/impl/dispatch/kernels/cq_dispatch_slave.cpp
@@ -9,7 +9,6 @@
 // - Handles the following commands:
 //  - CQ_DISPATCH_CMD_SEND_GO_SIGNAL: "multicast" go signal to all workers
 //  - CQ_DISPATCH_CMD_WAIT: Wait for workers to complete and reset wait count
-//  - CQ_DISPATCH_SET_UNICAST_ONLY_CORES: Track workers (ex: eth) that cannot be multicasted to
 //    and instead need a unicast for the go signal
 
 #include "debug/assert.h"
@@ -30,13 +29,12 @@ constexpr uint32_t cb_log_page_size = get_compile_time_arg_val(1);
 constexpr uint32_t cb_size = get_compile_time_arg_val(2);
 constexpr uint32_t my_dispatch_cb_sem_id = get_compile_time_arg_val(3);
 constexpr uint32_t upstream_dispatch_cb_sem_id = get_compile_time_arg_val(4);
-constexpr uint32_t dispatch_s_sync_sem_id = get_compile_time_arg_val(5);
-constexpr uint32_t worker_mcast_grid = get_compile_time_arg_val(6);
-constexpr uint32_t num_worker_cores_to_mcast = get_compile_time_arg_val(7);
-constexpr uint32_t mcast_go_signal_addr = get_compile_time_arg_val(8);
-constexpr uint32_t unicast_go_signal_addr = get_compile_time_arg_val(9);
-constexpr uint32_t distributed_dispatcher = get_compile_time_arg_val(10); // dispatch_s and dispatch_d running on different cores
-constexpr uint32_t worker_sem_addr = get_compile_time_arg_val(11); // workers update the semaphore at this location to signal completion
+constexpr uint32_t dispatch_s_sync_sem_base_addr = get_compile_time_arg_val(5);
+constexpr uint32_t mcast_go_signal_addr = get_compile_time_arg_val(6);
+constexpr uint32_t unicast_go_signal_addr = get_compile_time_arg_val(7);
+constexpr uint32_t distributed_dispatcher = get_compile_time_arg_val(8); // dispatch_s and dispatch_d running on different cores
+constexpr uint32_t worker_sem_base_addr = get_compile_time_arg_val(9); // workers update the semaphore at this location to signal completion
+constexpr uint32_t max_num_worker_sems = get_compile_time_arg_val(10); // maximum number of worker semaphores
 
 constexpr uint32_t upstream_noc_xy = uint32_t(NOC_XY_ENCODING(UPSTREAM_NOC_X, UPSTREAM_NOC_Y));
 constexpr uint32_t dispatch_d_noc_xy = uint32_t(NOC_XY_ENCODING(DOWNSTREAM_NOC_X, DOWNSTREAM_NOC_Y));
@@ -45,18 +43,16 @@ constexpr uint8_t my_noc_index = NOC_INDEX;
 
 constexpr uint32_t cb_page_size = 1 << cb_log_page_size;
 constexpr uint32_t cb_end = cb_base + cb_size;
-constexpr int max_num_unicast_cores = 16;
 static uint32_t num_pages_acquired = 0;
-static uint32_t num_mcasts_sent = 0;
+static uint32_t num_mcasts_sent[max_num_worker_sems] = {0};
 static uint32_t cmd_ptr;
-static uint32_t unicast_only_cores[max_num_unicast_cores]; // TODO: Allocate this on stack
-// Initialize to -1: Number of cores we need to unicast go signals to. Host will set this during init. Assert if not set
-static int num_unicast_cores = -1;
 
 // When dispatch_d and dispatch_s run on separate cores, dispatch_s gets the go signal update from workers.
 // dispatch_s is responsible for sending the latest worker completion count to dispatch_d.
 // To minimize the number of writes from dispatch_s to dispatch_d, locally track dispatch_d's copy.
-static uint32_t worker_count_update_for_dispatch_d = 0;
+static uint32_t worker_count_update_for_dispatch_d[max_num_worker_sems] = {0};
+
+static uint32_t num_worker_sems = 1;
 
 FORCE_INLINE
 void dispatch_s_wr_reg_cmd_buf_init() {
@@ -102,7 +98,8 @@ void dispatch_s_noc_inline_dw_write(uint64_t addr, uint32_t val, uint8_t noc_id,
 
 FORCE_INLINE
 void wait_for_workers(volatile CQDispatchCmd tt_l1_ptr *cmd) {
-    volatile tt_l1_ptr uint32_t* worker_sem = reinterpret_cast<volatile tt_l1_ptr uint32_t*>(worker_sem_addr);
+    uint8_t dispatch_message_offset = *((uint8_t *)&cmd->mcast.go_signal + offsetof(go_msg_t, dispatch_message_offset));
+    volatile tt_l1_ptr uint32_t* worker_sem = reinterpret_cast<volatile tt_l1_ptr uint32_t*>(worker_sem_base_addr + dispatch_message_offset);
     while (wrap_gt(cmd->mcast.wait_count, *worker_sem));
 }
 
@@ -110,12 +107,18 @@ template<bool flush_write = false>
 FORCE_INLINE
 void update_worker_completion_count_on_dispatch_d() {
     if constexpr(distributed_dispatcher) {
-        uint32_t num_workers_signalling_completion = *reinterpret_cast<volatile tt_l1_ptr uint32_t*>(worker_sem_addr);
-        if (num_workers_signalling_completion != worker_count_update_for_dispatch_d) {
-            worker_count_update_for_dispatch_d = num_workers_signalling_completion;
-            uint64_t dispatch_d_dst = get_noc_addr_helper(dispatch_d_noc_xy, worker_sem_addr);
-            dispatch_s_noc_inline_dw_write(dispatch_d_dst, num_workers_signalling_completion, my_noc_index);
-            if constexpr (flush_write) {
+        bool write = false;
+        for (uint32_t i = 0, worker_sem_addr = worker_sem_base_addr; i < num_worker_sems; ++i, worker_sem_addr += L1_ALIGNMENT) {
+            uint32_t num_workers_signalling_completion = *reinterpret_cast<volatile tt_l1_ptr uint32_t*>(worker_sem_addr);
+            if (num_workers_signalling_completion != worker_count_update_for_dispatch_d[i]) {
+                worker_count_update_for_dispatch_d[i] = num_workers_signalling_completion;
+                uint64_t dispatch_d_dst = get_noc_addr_helper(dispatch_d_noc_xy, worker_sem_addr);
+                dispatch_s_noc_inline_dw_write(dispatch_d_dst, num_workers_signalling_completion, my_noc_index);
+                write = true;
+            }
+        }
+        if constexpr (flush_write) {
+            if (write) {
                 noc_async_writes_flushed();
             }
         }
@@ -151,59 +154,51 @@ void process_go_signal_mcast_cmd() {
     volatile CQDispatchCmd tt_l1_ptr *cmd = (volatile CQDispatchCmd tt_l1_ptr *)cmd_ptr;
     // Get semaphore that will be update by dispatch_d, signalling that it's safe to send a go signal
     volatile tt_l1_ptr uint32_t* sync_sem_addr =
-        reinterpret_cast<volatile tt_l1_ptr uint32_t*>(get_semaphore<fd_core_type>(dispatch_s_sync_sem_id));
-    // The location of the go signal embedded in the command does not meet NOC alignment requirements.
-    // cmd_ptr is guaranteed to meet the alignment requirements, since it is written to by prefetcher over NOC.
-    // Copy the go signal from an unaligned location to an aligned (cmd_ptr) location. This is safe as long as we
-    // can guarantee that copying the go signal does not corrupt any other command fields, which is true (see CQDispatchGoSignalMcastCmd).
-    volatile uint32_t tt_l1_ptr* aligned_go_signal_storage = (volatile uint32_t tt_l1_ptr*)cmd_ptr;
-    *aligned_go_signal_storage = cmd->mcast.go_signal;
+        reinterpret_cast<volatile tt_l1_ptr uint32_t*>(dispatch_s_sync_sem_base_addr + (cmd->mcast.wait_addr - worker_sem_base_addr));
 
     // Wait for notification from dispatch_d, signalling that it's safe to send the go signal
-    while (wrap_ge(num_mcasts_sent, *sync_sem_addr)) {
+    uint32_t& mcasts_sent = num_mcasts_sent[(cmd->mcast.wait_addr - worker_sem_base_addr) / L1_ALIGNMENT];
+    while (wrap_ge(mcasts_sent, *sync_sem_addr)) {
         // Update dispatch_d with the latest num_workers
         update_worker_completion_count_on_dispatch_d();
     }
-    num_mcasts_sent++; // Go signal sent -> update counter
+    mcasts_sent++; // Go signal sent -> update counter
     // Wait until workers have completed before sending go signal
     wait_for_workers(cmd);
+
+    // The location of the go signal embedded in the command does not meet NOC alignment requirements.
+    // cmd_ptr is guaranteed to meet the alignment requirements, since it is written to by prefetcher over NOC.
+    // Copy the go signal from an unaligned location to an aligned (cmd_ptr) location. This is safe as long as we
+    // can guarantee that copying the go signal does not corrupt any other command fields, which is true (see CQDispatchGoSignalMcastCmd).
+    volatile uint32_t tt_l1_ptr* aligned_go_signal_storage = (volatile uint32_t tt_l1_ptr*)cmd_ptr;
+    *aligned_go_signal_storage = cmd->mcast.go_signal;
+
     // send go signal update here
-    if (cmd->mcast.mcast_flag & GoSignalMcastSettings::SEND_MCAST) {
-        uint64_t dst = get_noc_addr_helper(worker_mcast_grid, mcast_go_signal_addr);
-        noc_async_write_multicast_one_packet((uint32_t)(aligned_go_signal_storage), dst, sizeof(uint32_t), num_worker_cores_to_mcast);
+    volatile uint32_t tt_l1_ptr *data_ptr = reinterpret_cast<volatile uint32_t tt_l1_ptr *>(cmd_ptr + sizeof(CQDispatchCmd));
+    for (uint32_t i = 0, num_mcasts = cmd->mcast.num_mcast_txns; i < num_mcasts; ++i) {
+        uint64_t dst = get_noc_addr_helper(*(data_ptr++), mcast_go_signal_addr);
+        // packed_write_max_unicast_sub_cmds is the total number of compute cores (num_mcast_dests for this txn)
+        noc_async_write_multicast_one_packet((uint32_t)(aligned_go_signal_storage), dst, sizeof(uint32_t), *(data_ptr++));
     }
-    if (cmd->mcast.mcast_flag & GoSignalMcastSettings::SEND_UNICAST) {
-        // If dispatch_s needs to unicast the go signal to specific cores, num_unicast_cores
-        // must be set using set_go_signal_unicast_only_cores
-        ASSERT(num_unicast_cores > 0);
-        for (int core_idx = 0; core_idx < num_unicast_cores; core_idx++) {
-            uint64_t dst = get_noc_addr_helper(unicast_only_cores[core_idx], unicast_go_signal_addr);
-            noc_async_write_one_packet((uint32_t)(aligned_go_signal_storage), dst, sizeof(uint32_t));
-        }
+    for (uint32_t i = 0, num_unicasts = cmd->mcast.num_unicast_txns; i < num_unicasts; ++i) {
+        uint64_t dst = get_noc_addr_helper(*(data_ptr++), unicast_go_signal_addr);
+        noc_async_write_one_packet((uint32_t)(aligned_go_signal_storage), dst, sizeof(uint32_t));
     }
     update_worker_completion_count_on_dispatch_d();
-    cmd_ptr += sizeof(CQDispatchCmd);
-}
-
-FORCE_INLINE
-void set_go_signal_unicast_only_cores() {
-    volatile CQDispatchCmd tt_l1_ptr *cmd = (volatile CQDispatchCmd tt_l1_ptr *)cmd_ptr;
-    num_unicast_cores = (int)(cmd->set_unicast_only_cores.num_unicast_only_cores);
-    ASSERT(num_unicast_cores <= max_num_unicast_cores);
-    uint32_t data_ptr = cmd_ptr + sizeof(CQDispatchCmd);
-    for (int core_idx = 0; core_idx < num_unicast_cores; core_idx++) {
-        unicast_only_cores[core_idx] = *((uint32_t tt_l1_ptr*)data_ptr);
-        data_ptr += sizeof(uint32_t);
-    }
-    cmd_ptr = data_ptr;
+    cmd_ptr = round_up_pow2((uint32_t)data_ptr, L1_ALIGNMENT);
 }
 
 FORCE_INLINE
 void process_dispatch_s_wait_cmd() {
+    static constexpr uint32_t worker_sem_max_addr = worker_sem_base_addr + (max_num_worker_sems - 1) * L1_ALIGNMENT;
+
     volatile CQDispatchCmd tt_l1_ptr *cmd = (volatile CQDispatchCmd tt_l1_ptr *)cmd_ptr;
     // Limited Usage of Wait CMD: dispatch_s should get a wait command only if it's not on the
     // same core as dispatch_d and is used to clear the worker count
-    ASSERT(cmd->wait.clear_count && (cmd->wait.addr == worker_sem_addr) && distributed_dispatcher);
+    ASSERT(cmd->wait.clear_count && distributed_dispatcher);
+    uint32_t worker_sem_addr = cmd->wait.addr;
+    ASSERT(worker_sem_addr >= worker_sem_base_addr && worker_sem_addr <= worker_sem_max_addr);
+    uint32_t index = (worker_sem_addr - worker_sem_base_addr) / L1_ALIGNMENT;
     volatile tt_l1_ptr uint32_t* worker_sem = reinterpret_cast<volatile tt_l1_ptr uint32_t*>(worker_sem_addr);
     // Wait for workers to complete
     while (wrap_gt(cmd->wait.count, *worker_sem));
@@ -211,7 +206,15 @@ void process_dispatch_s_wait_cmd() {
     // dispatch_d will clear it's own counter
     update_worker_completion_count_on_dispatch_d<true>();
     *worker_sem = 0;
-    worker_count_update_for_dispatch_d = 0; // Local worker count update for dispatch_d should reflect state of worker semaphore on dispatch_s
+    worker_count_update_for_dispatch_d[index] = 0; // Local worker count update for dispatch_d should reflect state of worker semaphore on dispatch_s
+    cmd_ptr += sizeof(CQDispatchCmd);
+}
+
+FORCE_INLINE
+void set_num_worker_sems() {
+    volatile CQDispatchCmd tt_l1_ptr *cmd = (volatile CQDispatchCmd tt_l1_ptr *)cmd_ptr;
+    num_worker_sems = cmd->set_num_worker_sems.num_worker_sems;
+    ASSERT(num_worker_sems <= max_num_worker_sems);
     cmd_ptr += sizeof(CQDispatchCmd);
 }
 
@@ -231,8 +234,8 @@ void kernel_main() {
             case CQ_DISPATCH_CMD_SEND_GO_SIGNAL:
                 process_go_signal_mcast_cmd();
                 break;
-            case CQ_DISPATCH_SET_UNICAST_ONLY_CORES:
-                set_go_signal_unicast_only_cores();
+            case CQ_DISPATCH_SET_NUM_WORKER_SEMS:
+                set_num_worker_sems();
                 break;
             case CQ_DISPATCH_CMD_WAIT:
                 process_dispatch_s_wait_cmd();
diff --git a/tt_metal/impl/dispatch/kernels/cq_prefetch.cpp b/tt_metal/impl/dispatch/kernels/cq_prefetch.cpp
index dd903bcbb92..6024297317d 100644
--- a/tt_metal/impl/dispatch/kernels/cq_prefetch.cpp
+++ b/tt_metal/impl/dispatch/kernels/cq_prefetch.cpp
@@ -12,8 +12,8 @@
 #include "tt_metal/impl/dispatch/cq_commands.hpp"
 #include "tt_metal/impl/dispatch/kernels/cq_common.hpp"
 #include "debug/dprint.h"
-
 #include "noc/noc_parameters.h" // PCIE_ALIGNMENT
+
 constexpr uint32_t CQ_PREFETCH_CMD_BARE_MIN_SIZE = PCIE_ALIGNMENT; // for NOC PCIe alignemnt
 struct CQPrefetchHToPrefetchDHeader_s {
     uint32_t length;
@@ -24,7 +24,7 @@ typedef union {
 } CQPrefetchHToPrefetchDHeader;
 static_assert((sizeof(CQPrefetchHToPrefetchDHeader) & (CQ_PREFETCH_CMD_BARE_MIN_SIZE - 1)) == 0);
 
-typedef uint16_t prefetch_q_entry_type;
+using prefetch_q_entry_type = uint16_t;
 
 constexpr uint32_t downstream_cb_base = get_compile_time_arg_val(0);
 constexpr uint32_t downstream_cb_log_page_size = get_compile_time_arg_val(1);
diff --git a/tt_metal/impl/program/program.cpp b/tt_metal/impl/program/program.cpp
index 9dd1b98d811..58877bc1de1 100644
--- a/tt_metal/impl/program/program.cpp
+++ b/tt_metal/impl/program/program.cpp
@@ -140,9 +140,11 @@ class Program_ {
 
     ProgramConfig& get_program_config(uint32_t programmable_core_type_index);
 
+    const std::vector<uint32_t> &determine_sub_device_ids(const Device *device);
+
     // debug/test
-    uint32_t get_sem_base_addr(Device *device, CoreCoord logical_core, CoreType core_type) const;
-    uint32_t get_cb_base_addr(Device *device, CoreCoord logical_core, CoreType core_type) const;
+    uint32_t get_sem_base_addr(Device *device, CoreCoord logical_core, CoreType core_type);
+    uint32_t get_cb_base_addr(Device *device, CoreCoord logical_core, CoreType core_type);
     uint32_t get_sem_size(Device *device, CoreCoord logical_core, CoreType core_type) const;
     uint32_t get_cb_size(Device *device, CoreCoord logical_core, CoreType core_type) const;
     void set_last_used_command_queue_for_testing(HWCommandQueue *queue);
@@ -162,6 +164,9 @@ class Program_ {
     bool finalized_;
     bool cached_;
 
+    // This will be turned into a map by SubDeviceManager handles once implemented
+    std::optional<std::vector<uint32_t>> sub_device_ids_;
+
     struct CircularBufferAllocator {
         CircularBufferAllocator(const CoreRange &core_range_) : core_range(core_range_) {}
 
@@ -235,7 +240,7 @@ class Program_ {
     void add_config_buffer(std::shared_ptr<Buffer> config_buffer);
 
     // Ensures that statically allocated circular buffers do not grow into L1 buffer space
-    void validate_circular_buffer_region(const Device *device) const;
+    void validate_circular_buffer_region(const Device *device);
 
     void set_cb_data_fmt( Device *device, const std::vector<CoreRange> & crs, JitBuildOptions& build_options) const;
 
@@ -741,14 +746,15 @@ void detail::Program_::allocate_circular_buffers(const Device *device) {
 
 void Program::allocate_circular_buffers(const Device *device) { pimpl_->allocate_circular_buffers(device); }
 
-void detail::Program_::validate_circular_buffer_region(const Device *device) const {
+void detail::Program_::validate_circular_buffer_region(const Device *device) {
     //ZoneScoped;
 
     // Banks are in lockstep so we only need to get lowest L1 address of one compute and storage core
     // Only compute with storage cores can have CBs and all compute with storage cores will have the same bank offset
+    // TODO: Circular buffer allocation and validation could be better optimized by determining usage per sub-device
     const std::vector<uint32_t> &bank_ids =
         device->bank_ids_from_logical_core(BufferType::L1, *device->compute_cores_.begin());
-    std::optional<DeviceAddr> lowest_address = allocator::lowest_occupied_l1_address(*device->allocator_, bank_ids[0]);
+    std::optional<DeviceAddr> lowest_address = device->lowest_occupied_l1_address(bank_ids[0], this->determine_sub_device_ids(device));
     uint32_t max_l1_size = device->l1_size_per_core();
 
     for (const CircularBufferAllocator &cb_allocator : this->cb_allocators_) {
@@ -1293,6 +1299,24 @@ uint32_t& detail::Program_::get_program_config_size(uint32_t programmable_core_t
     return this->program_config_sizes_[programmable_core_type_index];
 }
 
+const std::vector<uint32_t> &detail::Program_::determine_sub_device_ids(const Device *device) {
+    // We need to calculate the sub_device_id when we haven't compiled the program yet, or this is the first time we
+    // are getting the sub_device_ids after compilation
+    if (this->compiled_.empty() || !this->sub_device_ids_.has_value()) {
+        if (std::getenv("TT_METAL_SLOW_DISPATCH_MODE") != nullptr) {
+            // No sub device manager, nothing to validate
+            this->sub_device_ids_ = {0};
+        } else {
+            // TODO: Add logic for determining which sub devices are used by the currently active configuration
+            // When program hasn't compiled, we will determine and return a value without caching the id inside program
+            // After program is compiled, the first time this is called we will compute and store the id.
+            // This makes subsequent calls faster, and is why this function is not const
+            this->sub_device_ids_ = {0};
+        }
+    }
+    return *this->sub_device_ids_;
+}
+
 void detail::Program_::finalize(Device *device) {
     // Store the number of tensix "go signals" for use by CQ
     // CQ iterates over these to update runtime addresses, needs to know when eth begins (after tensix)
@@ -1349,6 +1373,11 @@ void detail::Program_::compile(Device *device, bool fd_bootloader_mode) {
     if (compiled_.contains(device->id())) {
         return;
     }
+    // Clear the determined sub_device_ids when we compile the program for the first time
+    // This way, determine_sub_device_ids is forced to recalculate with the finalized information on the used cores
+    if (compiled_.empty()) {
+        this->sub_device_ids_ = std::nullopt;
+    }
 
     TT_FATAL(
         device->is_initialized(),
@@ -1458,39 +1487,47 @@ void detail::Program_::set_runtime_id(uint64_t id) { this->runtime_id = id; }
 
 void Program::set_runtime_id(uint64_t id) { pimpl_->set_runtime_id(id); }
 
-uint32_t detail::Program_::get_sem_base_addr(Device *device, CoreCoord logical_core, CoreType core_type) const {
+uint32_t detail::Program_::get_sem_base_addr(Device *device, CoreCoord logical_core, CoreType core_type) {
 
     CoreCoord phys_core = device->physical_core_from_logical_core(logical_core, core_type);
     HalProgrammableCoreType programmable_core_type = device->get_programmable_core_type(phys_core);
     uint32_t index = hal.get_programmable_core_type_index(programmable_core_type);
-
+    const auto &sub_device_ids = this->determine_sub_device_ids(device);
+    // TODO: This restriction can be lifted once we have support for programs spanning multiple sub-devices
+    // Semaphores across sub-devices are expected to have the same address
+    TT_FATAL(sub_device_ids.size() == 1, "get_sem_base_addr currently only supports programs spanning a single sub-device");
+    auto sub_device_id = sub_device_ids[0];
     uint32_t base_addr = device->using_fast_dispatch
-                             ? this->last_used_command_queue_for_testing->get_config_buffer_mgr().get_last_slot_addr(
+                             ? this->last_used_command_queue_for_testing->get_config_buffer_mgr(sub_device_id).get_last_slot_addr(
                                    programmable_core_type)
                              : hal.get_dev_addr(programmable_core_type, HalL1MemAddrType::KERNEL_CONFIG);
 
     return base_addr + this->program_configs_[index].sem_offset;
 }
 
-uint32_t Program::get_sem_base_addr(Device *device, CoreCoord logical_core, CoreType core_type) const {
+uint32_t Program::get_sem_base_addr(Device *device, CoreCoord logical_core, CoreType core_type) {
     return pimpl_->get_sem_base_addr(device, logical_core, core_type);
 }
 
-uint32_t detail::Program_::get_cb_base_addr(Device *device, CoreCoord logical_core, CoreType core_type) const {
+uint32_t detail::Program_::get_cb_base_addr(Device *device, CoreCoord logical_core, CoreType core_type) {
 
     CoreCoord phys_core = device->physical_core_from_logical_core(logical_core, core_type);
     HalProgrammableCoreType programmable_core_type = device->get_programmable_core_type(phys_core);
     uint32_t index = hal.get_programmable_core_type_index(programmable_core_type);
-
+    const auto &sub_device_ids = this->determine_sub_device_ids(device);
+    // TODO: This restriction can be lifted once this function is changed to return a vector of addresses
+    // Addresses are not the same across sub-devices
+    TT_FATAL(sub_device_ids.size() == 1, "get_sem_base_addr currently only supports programs spanning a single sub-device");
+    auto sub_device_id = sub_device_ids[0];
     uint32_t base_addr = device->using_fast_dispatch
-                             ? this->last_used_command_queue_for_testing->get_config_buffer_mgr().get_last_slot_addr(
+                             ? this->last_used_command_queue_for_testing->get_config_buffer_mgr(sub_device_id).get_last_slot_addr(
                                    programmable_core_type)
                              : hal.get_dev_addr(programmable_core_type, HalL1MemAddrType::KERNEL_CONFIG);
 
     return base_addr + this->program_configs_[index].cb_offset;
 }
 
-uint32_t Program::get_cb_base_addr(Device *device, CoreCoord logical_core, CoreType core_type) const {
+uint32_t Program::get_cb_base_addr(Device *device, CoreCoord logical_core, CoreType core_type) {
     return pimpl_->get_cb_base_addr(device, logical_core, core_type);
 }
 
@@ -1616,6 +1653,8 @@ bool Program::is_finalized() const { return pimpl_->is_finalized(); }
 bool Program::is_cached() const { return pimpl_->is_cached(); }
 void Program::set_cached() { pimpl_->set_cached(); }
 
+const std::vector<uint32_t> & Program::determine_sub_device_ids(const Device *device) { return pimpl_->determine_sub_device_ids(device); }
+
 const ProgramTransferInfo &Program::get_program_transfer_info() const noexcept { return pimpl_->program_transfer_info; }
 
 const std::shared_ptr<Buffer> &Program::get_kernels_buffer() const noexcept { return pimpl_->kernels_buffer; }
diff --git a/tt_metal/impl/program/program.hpp b/tt_metal/impl/program/program.hpp
index 05d1dfa54ce..5c77856572b 100644
--- a/tt_metal/impl/program/program.hpp
+++ b/tt_metal/impl/program/program.hpp
@@ -143,12 +143,14 @@ class Program {
     ProgramConfig& get_program_config(uint32_t programmable_core_type_index);
 
     // debug/test
-    uint32_t get_sem_base_addr(Device *device, CoreCoord logical_core, CoreType core_type) const;
-    uint32_t get_cb_base_addr(Device *device, CoreCoord logical_core, CoreType core_type) const;
+    uint32_t get_sem_base_addr(Device *device, CoreCoord logical_core, CoreType core_type);
+    uint32_t get_cb_base_addr(Device *device, CoreCoord logical_core, CoreType core_type);
     uint32_t get_sem_size(Device *device, CoreCoord logical_core, CoreType core_type) const;
     uint32_t get_cb_size(Device *device, CoreCoord logical_core, CoreType core_type) const;
     void set_last_used_command_queue_for_testing(HWCommandQueue *queue);
 
+    const std::vector<uint32_t> &determine_sub_device_ids(const Device *device);
+
    private:
     std::unique_ptr<detail::Program_> pimpl_;
 
diff --git a/tt_metal/impl/trace/trace_buffer.hpp b/tt_metal/impl/trace/trace_buffer.hpp
index fce464a3b8c..d2488970185 100644
--- a/tt_metal/impl/trace/trace_buffer.hpp
+++ b/tt_metal/impl/trace/trace_buffer.hpp
@@ -7,6 +7,7 @@
 #include <functional>
 #include <memory>
 #include <mutex>
+#include <unordered_map>
 #include <utility>
 #include <variant>
 
@@ -16,9 +17,16 @@ namespace tt::tt_metal {
 
 namespace detail {
 struct TraceDescriptor {
-    uint32_t num_completion_worker_cores = 0;
-    uint32_t num_traced_programs_needing_go_signal_multicast = 0;
-    uint32_t num_traced_programs_needing_go_signal_unicast = 0;
+    struct Descriptor {
+        uint32_t num_completion_worker_cores = 0;
+        uint32_t num_traced_programs_needing_go_signal_multicast = 0;
+        uint32_t num_traced_programs_needing_go_signal_unicast = 0;
+    };
+    // Mapping of sub_device_id to descriptor
+    std::unordered_map<uint32_t, Descriptor> descriptors;
+    // Store the keys of the map in a vector after descriptor has finished being populated
+    // This is an optimization since we sometimes need to only pass the keys in a container
+    std::vector<uint32_t> sub_device_ids;
     std::vector<uint32_t> data;
 };
 }  // namespace detail
diff --git a/tt_metal/llrt/hal.hpp b/tt_metal/llrt/hal.hpp
index 1ba7a104e84..13e0c96fa6a 100644
--- a/tt_metal/llrt/hal.hpp
+++ b/tt_metal/llrt/hal.hpp
@@ -31,6 +31,8 @@ enum class HalProgrammableCoreType {
     COUNT      = 3
 };
 
+static constexpr uint32_t NumHalProgrammableCoreTypes = static_cast<uint32_t>(HalProgrammableCoreType::COUNT);
+
 enum class HalProcessorClassType : uint8_t {
     DM      = 0,
     // Setting this to 2 because we currently treat brisc and ncrisc as two unique processor classes on Tensix
diff --git a/tt_metal/tt_metal.cpp b/tt_metal/tt_metal.cpp
index 644e882a471..071198de166 100644
--- a/tt_metal/tt_metal.cpp
+++ b/tt_metal/tt_metal.cpp
@@ -1279,13 +1279,13 @@ void ReplayTrace(Device *device, const uint8_t cq_id, const uint32_t tid, const
 
 void ReleaseTrace(Device *device, const uint32_t tid) { device->release_trace(tid); }
 
-void Synchronize(Device *device, const std::optional<uint8_t> cq_id) {
+void Synchronize(Device *device, const std::optional<uint8_t> cq_id, tt::stl::Span<const uint32_t> sub_device_ids) {
     if (std::getenv("TT_METAL_SLOW_DISPATCH_MODE") == nullptr) {
         if (cq_id.has_value()) {
-            Finish(device->command_queue(cq_id.value()));
+            Finish(device->command_queue(cq_id.value()), sub_device_ids);
         } else {
             for (uint8_t cq_id = 0; cq_id < device->num_hw_cqs(); ++cq_id) {
-                Finish(device->command_queue(cq_id));
+                Finish(device->command_queue(cq_id), sub_device_ids);
             }
         }
     }

From 5536fe4c5286a7f9546312978a7fe2cf7debaa08 Mon Sep 17 00:00:00 2001
From: Austin Ho <aho@tenstorrent.com>
Date: Thu, 7 Nov 2024 17:24:35 +0000
Subject: [PATCH 62/69] #13655: Update allocator to support taking in a
 CoreRangeSet for banks, instead of assuming a rectangular grid Update device
 allocator related apis to take in a sub-device parameter

---
 .../apis/host_apis/buffers/CreateBuffer.rst   |   4 +-
 tt_metal/host_api.hpp                         |  43 ++----
 .../impl/allocator/algorithms/free_list.cpp   |   1 +
 tt_metal/impl/allocator/allocator.cpp         |  67 ++++++---
 tt_metal/impl/allocator/allocator.hpp         |   7 +-
 tt_metal/impl/allocator/allocator_types.hpp   |   5 +-
 .../impl/allocator/l1_banking_allocator.cpp   |  76 +++++-----
 tt_metal/impl/buffers/buffer.cpp              |  43 ++++--
 tt_metal/impl/buffers/buffer.hpp              |   9 +-
 tt_metal/impl/device/device.cpp               | 133 +++++++++++-------
 tt_metal/impl/device/device.hpp               |  43 +++---
 tt_metal/impl/program/program.cpp             |  12 +-
 tt_metal/tt_metal.cpp                         |  72 +++++-----
 13 files changed, 292 insertions(+), 223 deletions(-)

diff --git a/docs/source/tt-metalium/tt_metal/apis/host_apis/buffers/CreateBuffer.rst b/docs/source/tt-metalium/tt_metal/apis/host_apis/buffers/CreateBuffer.rst
index 2d05db10694..d8799309801 100644
--- a/docs/source/tt-metalium/tt_metal/apis/host_apis/buffers/CreateBuffer.rst
+++ b/docs/source/tt-metalium/tt_metal/apis/host_apis/buffers/CreateBuffer.rst
@@ -1,5 +1,5 @@
 CreateBuffer
 =================
 
-.. doxygenfunction:: tt::tt_metal::v0::CreateBuffer(const InterleavedBufferConfig & config);
-.. doxygenfunction:: tt::tt_metal::v0::CreateBuffer(const ShardedBufferConfig & config);
+.. doxygenfunction:: tt::tt_metal::v0::CreateBuffer(const InterleavedBufferConfig &config, std::optional<DeviceAddr> address, std::optional<uint32_t> sub_device_id);
+.. doxygenfunction:: tt::tt_metal::v0::CreateBuffer(const ShardedBufferConfig &config, std::optional<DeviceAddr> address, std::optional<uint32_t> sub_device_id);
diff --git a/tt_metal/host_api.hpp b/tt_metal/host_api.hpp
index 5d0fffba0e1..510868e6ad0 100644
--- a/tt_metal/host_api.hpp
+++ b/tt_metal/host_api.hpp
@@ -281,51 +281,32 @@ std::unique_ptr<GlobalSemaphore> CreateGlobalSemaphore(
 std::unique_ptr<GlobalSemaphore> CreateGlobalSemaphore(
     Device *device, CoreRangeSet &&cores, uint32_t initial_value, BufferType buffer_type = BufferType::L1);
 
-/**
-*  Allocates an interleaved DRAM or L1 buffer on device
-*
-*  Return value: std::shared_ptr<Buffer>
-*
-*  | Argument        | Description                             | Type                     | Valid Range | Required |
-*  |-----------------|---------------------------------------- |--------------------------|-------------|----------|
-*  | config          | Config for the buffer                   | InterleavedBufferConfig  |             | Yes      |
-*/
-std::shared_ptr<Buffer> CreateBuffer(const InterleavedBufferConfig &config);
-
 /**
 *  Creates a pre-allocated interleaved DRAM or L1 buffer on device
 *
 *  Return value: std::shared_ptr<Buffer>
 *
-*  | Argument        | Description                             | Type                     | Valid Range | Required |
-*  |-----------------|---------------------------------------- |--------------------------|-------------|----------|
-*  | config          | Config for the buffer                   | InterleavedBufferConfig  |             | Yes      |
-*  | address         | Device address of the buffer            | DeviceAddr               |             | Yes      |
-*/
-std::shared_ptr<Buffer> CreateBuffer(const InterleavedBufferConfig &config, DeviceAddr address);
+*  | Argument        | Description                                                       | Type                      | Valid Range | Required |
+*  |-----------------|------------------------------------------------------------------ |---------------------------|-------------|----------|
+*  | config          | Config for the buffer                                             | InterleavedBufferConfig   |             | Yes      |
+*  | address         | Device address of the buffer. Default will calculate address      | std::optional<DeviceAddr> |             | No       |
+*  | sub_device_id   | The sub-device id to allocate on. Default is the global allocator | std::optional<uint32_t>   |             | No       |
 
-/**
-*  Allocates a sharded DRAM or L1 buffer on device
-*
-*  Return value: std::shared_ptr<Buffer>
-*
-*  | Argument        | Description                             | Type                     | Valid Range | Required |
-*  |-----------------|---------------------------------------- |--------------------------|-------------|----------|
-*  | config          | Config for the buffer                   | ShardedBufferConfig      |             | Yes      |
 */
-std::shared_ptr<Buffer> CreateBuffer(const ShardedBufferConfig &config);
+std::shared_ptr<Buffer> CreateBuffer(const InterleavedBufferConfig &config, std::optional<DeviceAddr> address = std::nullopt, std::optional<uint32_t> sub_device_id = std::nullopt);
 
 /**
 *  Creates a pre-allocated sharded DRAM or L1 buffer on device
 *
 *  Return value: std::shared_ptr<Buffer>
 *
-*  | Argument        | Description                             | Type                     | Valid Range | Required |
-*  |-----------------|---------------------------------------- |--------------------------|-------------|----------|
-*  | config          | Config for the buffer                   | ShardedBufferConfig      |             | Yes      |
-*  | address         | Device address of the buffer            | DeviceAddr               |             | Yes      |
+*  | Argument        | Description                                                       | Type                      | Valid Range | Required |
+*  |-----------------|------------------------------------------------------------------ |---------------------------|-------------|----------|
+*  | config          | Config for the buffer                                             | ShardedBufferConfig       |             | Yes      |
+*  | address         | Device address of the buffer. Default will calculate address      | std::optional<DeviceAddr> |             | No       |
+*  | sub_device_id   | The sub-device id to allocate on. Default is the global allocator | std::optional<uint32_t>   |             | No       |
 */
-std::shared_ptr<Buffer> CreateBuffer(const ShardedBufferConfig &config, DeviceAddr address);
+std::shared_ptr<Buffer> CreateBuffer(const ShardedBufferConfig &config, std::optional<DeviceAddr> address = std::nullopt, std::optional<uint32_t> sub_device_id = std::nullopt);
 
 /**
 *  Deallocates buffer from device by marking its memory as free.
diff --git a/tt_metal/impl/allocator/algorithms/free_list.cpp b/tt_metal/impl/allocator/algorithms/free_list.cpp
index 4e8af7fa361..f508af97548 100644
--- a/tt_metal/impl/allocator/algorithms/free_list.cpp
+++ b/tt_metal/impl/allocator/algorithms/free_list.cpp
@@ -21,6 +21,7 @@ FreeList::FreeList(DeviceAddr max_size_bytes, DeviceAddr offset_bytes, DeviceAdd
 }
 
 void FreeList::init() {
+    this->shrink_size_ = 0;
     auto block = boost::make_local_shared<Block>(0, this->max_size_bytes_);
     this->block_head_ = block;
     this->block_tail_ = block;
diff --git a/tt_metal/impl/allocator/allocator.cpp b/tt_metal/impl/allocator/allocator.cpp
index cfc00f31d3b..03caa3c5224 100644
--- a/tt_metal/impl/allocator/allocator.cpp
+++ b/tt_metal/impl/allocator/allocator.cpp
@@ -31,14 +31,14 @@ void BankManager::init_allocator(DeviceAddr size_bytes, uint32_t alignment_bytes
         std::make_unique<FreeList>(size_bytes, offset, alignment_bytes, alignment_bytes, FreeList::SearchPolicy::FIRST);
 }
 
-void validate_num_banks(uint32_t num_banks, const BufferType &buffer_type) {
+void validate_num_banks(uint32_t num_banks, const BufferType &buffer_type, bool disable_interleaved) {
+    bool doesnt_support_interleaved = buffer_type == BufferType::L1_SMALL or disable_interleaved;
     bool is_pow2_num_banks = num_banks && (!(num_banks & (num_banks - 1)));
     // Dataflow API does not have a working implementation of generic modulo to determine bank_id for interleaved
     // address gen For non pow2 num banks, special cases need to be added to avoid falling back to generic
     // implementation. See https://github.com/tenstorrent/tt-metal/issues/3321
     std::unordered_set<uint32_t> acceptable_num_non_pow2_mem_banks = {12, 56, 70, 80, 94, 124, 130, 140};
     bool custom_mod_bank_id_calculation_exists = acceptable_num_non_pow2_mem_banks.count(num_banks) > 0;
-    bool doesnt_support_interleaved = buffer_type == BufferType::L1_SMALL;
     bool valid_num_banks = (is_pow2_num_banks or custom_mod_bank_id_calculation_exists or doesnt_support_interleaved);
     if (not valid_num_banks) {
         TT_THROW(
@@ -54,7 +54,8 @@ BankManager::BankManager(
     const std::vector<int64_t> &bank_offsets,
     DeviceAddr size_bytes,
     uint32_t alignment_bytes,
-    DeviceAddr alloc_offset) :
+    DeviceAddr alloc_offset,
+    bool disable_interleaved) :
     buffer_type_(buffer_type), alignment_bytes_(alignment_bytes) {
     unsigned int bank_id = 0;
     for (const auto bank_offset : bank_offsets) {
@@ -62,7 +63,7 @@ BankManager::BankManager(
         bank_id++;
     }
     this->interleaved_address_limit_ = 0;
-    validate_num_banks(this->bank_id_to_bank_offset_.size(), this->buffer_type_);
+    validate_num_banks(this->bank_id_to_bank_offset_.size(), this->buffer_type_, disable_interleaved);
     this->init_allocator(size_bytes, alignment_bytes, alloc_offset);
 }
 
@@ -72,12 +73,13 @@ BankManager::BankManager(
     DeviceAddr size_bytes,
     DeviceAddr interleaved_address_limit,
     uint32_t alignment_bytes,
-    DeviceAddr alloc_offset) :
+    DeviceAddr alloc_offset,
+    bool disable_interleaved) :
     buffer_type_(buffer_type),
     bank_id_to_bank_offset_(bank_id_to_bank_offset),
     interleaved_address_limit_(interleaved_address_limit),
     alignment_bytes_(alignment_bytes) {
-    validate_num_banks(this->bank_id_to_bank_offset_.size(), this->buffer_type_);
+    validate_num_banks(this->bank_id_to_bank_offset_.size(), this->buffer_type_, disable_interleaved);
     this->init_allocator(size_bytes, alignment_bytes, alloc_offset);
 }
 
@@ -110,12 +112,12 @@ uint64_t BankManager::allocate_buffer(
     DeviceAddr size,
     DeviceAddr page_size,
     bool bottom_up,
-    CoreCoord compute_grid_size,
+    const CoreRangeSet &compute_grid,
     std::optional<uint32_t> num_shards) {
     uint32_t num_banks = this->num_banks();
     bool is_sharded = false;
     if (num_shards.has_value()) {
-        auto num_compute_banks = compute_grid_size.x * compute_grid_size.y;
+        auto num_compute_banks = compute_grid.num_cores();
         is_sharded = true;
         TT_FATAL(
             num_shards.value() <= num_compute_banks,
@@ -227,7 +229,7 @@ void init_one_bank_per_channel(Allocator &allocator, const AllocatorConfig &allo
         bank_offsets.at(channel_id) = static_cast<int32_t>(alloc_config.dram_bank_offsets.at(channel_id));
     }
     allocator.dram_manager =
-        BankManager(BufferType::DRAM, bank_offsets, dram_bank_size, alloc_config.alignment, alloc_config.dram_unreserved_base);
+        BankManager(BufferType::DRAM, bank_offsets, dram_bank_size, alloc_config.alignment, alloc_config.dram_unreserved_base, alloc_config.disable_interleaved);
     for (uint32_t bank_id = 0; bank_id < alloc_config.num_dram_channels; bank_id++) {
         CoreCoord logical_core = CoreCoord{bank_id, 0};
         allocator.bank_id_to_dram_channel.insert({bank_id, bank_id});
@@ -241,7 +243,8 @@ void init_one_bank_per_channel(Allocator &allocator, const AllocatorConfig &allo
         bank_offsets,
         alloc_config.trace_region_size,
         alloc_config.alignment,
-        dram_bank_size + alloc_config.dram_unreserved_base);
+        dram_bank_size + alloc_config.dram_unreserved_base,
+        alloc_config.disable_interleaved);
     for (uint32_t bank_id = 0; bank_id < alloc_config.num_dram_channels; bank_id++) {
         CoreCoord logical_core = CoreCoord{bank_id, 0};
         allocator.bank_id_to_dram_channel.insert({bank_id, bank_id});
@@ -252,20 +255,18 @@ void init_one_bank_per_channel(Allocator &allocator, const AllocatorConfig &allo
 
 void init_one_bank_per_l1(Allocator &allocator, const AllocatorConfig &alloc_config) {
     TT_ASSERT(alloc_config.l1_small_size == 0);
-    uint32_t num_l1_banks = alloc_config.worker_grid_size.y * alloc_config.worker_grid_size.x;
+    uint32_t num_l1_banks = alloc_config.worker_grid.num_cores();
     // Space up to L1 unreserved base is reserved for risc binaries, kernel args, debug and perf monitoring tools
     DeviceAddr l1_bank_size = alloc_config.worker_l1_size - alloc_config.l1_unreserved_base;
     std::vector<int64_t> bank_offsets(num_l1_banks, 0);
-    allocator.l1_manager = BankManager(BufferType::L1, bank_offsets, l1_bank_size, alloc_config.alignment, alloc_config.l1_unreserved_base);
+    allocator.l1_manager = BankManager(BufferType::L1, bank_offsets, l1_bank_size, alloc_config.alignment, alloc_config.l1_unreserved_base, alloc_config.disable_interleaved);
 
     uint32_t bank_id = 0;
-    for (uint32_t y = 0; y < alloc_config.worker_grid_size.y; y++) {
-        for (uint32_t x = 0; x < alloc_config.worker_grid_size.x; x++) {
-            CoreCoord logical_core = CoreCoord{x, y};
-            allocator.bank_id_to_logical_core.insert({bank_id, logical_core});
-            allocator.logical_core_to_bank_ids[BufferType::L1].insert({logical_core, {bank_id}});
-            bank_id++;
-        }
+    const auto &cores = corerange_to_cores(alloc_config.worker_grid, std::nullopt, true);
+    for (const auto &logical_core : cores) {
+        allocator.bank_id_to_logical_core.insert({bank_id, logical_core});
+        allocator.logical_core_to_bank_ids[BufferType::L1].insert({logical_core, {bank_id}});
+        bank_id++;
     }
 }
 
@@ -371,7 +372,7 @@ DeviceAddr base_alloc(
     DeviceAddr page_size,
     bool bottom_up,
     std::optional<uint32_t> num_shards) {
-    return bank_manager.allocate_buffer(size, page_size, bottom_up, config.compute_grid_size, num_shards);
+    return bank_manager.allocate_buffer(size, page_size, bottom_up, config.compute_grid, num_shards);
 }
 
 void mark_allocations_unsafe(Allocator &allocator) { allocator.allocations_unsafe = true; }
@@ -416,6 +417,28 @@ void shrink_allocator_size(
     }
 }
 
+void reset_allocator_size(
+    Allocator &allocator,
+    const BufferType &buffer_type) {
+    switch (buffer_type) {
+        case BufferType::DRAM:
+            allocator.dram_manager.reset_size();
+            break;
+        case BufferType::L1:
+            allocator.l1_manager.reset_size();
+            break;
+        case BufferType::L1_SMALL:
+            allocator.l1_small_manager.reset_size();
+            break;
+        case BufferType::TRACE:
+            allocator.trace_buffer_manager.reset_size();
+            break;
+        default: {
+            TT_THROW("Unsupported buffer type!");
+        }
+    }
+}
+
 DeviceAddr allocate_buffer(Allocator &allocator, DeviceAddr size, Buffer *buffer) {
     DeviceAddr address = 0;
     auto page_size = buffer->page_size();
@@ -423,6 +446,9 @@ DeviceAddr allocate_buffer(Allocator &allocator, DeviceAddr size, Buffer *buffer
     auto bottom_up = buffer->bottom_up();
     auto num_shards = buffer->num_cores();
     verify_safe_allocation(allocator);
+    if (allocator.config.disable_interleaved) {
+        TT_FATAL(num_shards.has_value(), "Interleaved allocation is disabled, see validate_num_banks");
+    }
     switch (buffer_type) {
         case BufferType::DRAM:
             address = allocator.descriptor.dram.alloc(
@@ -470,7 +496,6 @@ void deallocate_buffers(Allocator &allocator) {
     allocator.l1_manager.deallocate_all();
     allocator.l1_small_manager.deallocate_all();
     allocator.trace_buffer_manager.deallocate_all();
-    allocator.allocated_buffers.clear();
 }
 
 void clear(Allocator &allocator) {
diff --git a/tt_metal/impl/allocator/allocator.hpp b/tt_metal/impl/allocator/allocator.hpp
index 500dd42c062..4c20eb9a01f 100644
--- a/tt_metal/impl/allocator/allocator.hpp
+++ b/tt_metal/impl/allocator/allocator.hpp
@@ -35,8 +35,8 @@ class BankManager {
    public:
     BankManager() {}
 
-    BankManager(const BufferType &buffer_type, const std::vector<int64_t> &bank_descriptors, DeviceAddr size_bytes, uint32_t alignment_bytes, DeviceAddr alloc_offset=0);
-    BankManager(const BufferType &buffer_type, const std::unordered_map<uint32_t, int64_t> &bank_id_to_descriptor, DeviceAddr size_bytes, DeviceAddr interleaved_address_limit, uint32_t alignment_bytes, DeviceAddr alloc_offset=0);
+    BankManager(const BufferType &buffer_type, const std::vector<int64_t> &bank_descriptors, DeviceAddr size_bytes, uint32_t alignment_bytes, DeviceAddr alloc_offset=0, bool disable_interleaved=false);
+    BankManager(const BufferType &buffer_type, const std::unordered_map<uint32_t, int64_t> &bank_id_to_descriptor, DeviceAddr size_bytes, DeviceAddr interleaved_address_limit, uint32_t alignment_bytes, DeviceAddr alloc_offset=0, bool disable_interleaved=false);
     BankManager&& operator=(BankManager&& that);
     ~BankManager();
     uint32_t num_banks() const;
@@ -45,7 +45,7 @@ class BankManager {
 
     int64_t bank_offset(uint32_t bank_id) const;
 
-    DeviceAddr allocate_buffer(DeviceAddr size, DeviceAddr page_size, bool bottom_up, CoreCoord compute_grid_size, std::optional<uint32_t> num_shards);
+    DeviceAddr allocate_buffer(DeviceAddr size, DeviceAddr page_size, bool bottom_up, const CoreRangeSet &compute_grid, std::optional<uint32_t> num_shards);
 
     void deallocate_buffer(DeviceAddr address);
     void deallocate_all();
@@ -109,6 +109,7 @@ std::optional<DeviceAddr> lowest_occupied_l1_address(const Allocator &allocator,
 DeviceAddr base_alloc(const AllocatorConfig & config, BankManager &bank_manager, DeviceAddr size, DeviceAddr page_size, bool bottom_up, std::optional<uint32_t> num_shards);
 
 void shrink_allocator_size(Allocator &allocator, const BufferType &buffer_type, DeviceAddr shrink_size, bool bottom_up=true);
+void reset_allocator_size(Allocator &allocator, const BufferType &buffer_type);
 
 DeviceAddr allocate_buffer(Allocator &allocator, DeviceAddr size, Buffer *buffer);
 
diff --git a/tt_metal/impl/allocator/allocator_types.hpp b/tt_metal/impl/allocator/allocator_types.hpp
index 04e2d6fe39c..060bdf47eb4 100644
--- a/tt_metal/impl/allocator/allocator_types.hpp
+++ b/tt_metal/impl/allocator/allocator_types.hpp
@@ -38,7 +38,7 @@ struct AllocatorConfig {
     uint32_t dram_unreserved_base = 0;
     //! worker specific configuration
     uint32_t l1_unreserved_base = 0;
-    CoreCoord worker_grid_size = {};
+    CoreRangeSet worker_grid = {};
     size_t worker_l1_size = 0;
     std::optional<uint32_t> storage_core_bank_size = 0;
     size_t l1_small_size = 0;
@@ -47,8 +47,9 @@ struct AllocatorConfig {
     std::unordered_map<int, int> worker_log_to_physical_routing_x = {};
     std::unordered_map<int, int> worker_log_to_physical_routing_y = {};
     BankMapping l1_bank_remap = {}; // for remapping which l1 bank points to which bank if we assume normal row-major assignment
-    CoreCoord compute_grid_size = {};
+    CoreRangeSet compute_grid = {};
     uint32_t alignment = 0;
+    bool disable_interleaved = false;
     void reset();
     ~AllocatorConfig() { reset(); }
 };
diff --git a/tt_metal/impl/allocator/l1_banking_allocator.cpp b/tt_metal/impl/allocator/l1_banking_allocator.cpp
index fb919e7ef16..8f0ec9fc289 100644
--- a/tt_metal/impl/allocator/l1_banking_allocator.cpp
+++ b/tt_metal/impl/allocator/l1_banking_allocator.cpp
@@ -69,8 +69,8 @@ num_banks_t compute_total_and_storage_only_num_l1_banks(const AllocatorConfig &a
 }
 
 void init_compute_and_storage_l1_bank_manager(Allocator &allocator, const AllocatorConfig &alloc_config) {
+    TT_FATAL(alloc_config.worker_grid.contains(alloc_config.compute_grid), "Compute grid must be a subset of worker grid");
     num_banks_t num_banks = compute_total_and_storage_only_num_l1_banks(alloc_config);
-
     auto logical_to_noc_coord = [&alloc_config](CoreCoord logical_core) {
         TT_ASSERT(
             alloc_config.worker_log_to_physical_routing_x.find(logical_core.x) !=
@@ -118,35 +118,33 @@ void init_compute_and_storage_l1_bank_manager(Allocator &allocator, const Alloca
     // If l1_small_size exists, then it gets the top of L1 (offset 0)
     // and the regular L1 region is offset just below it
     uint32_t bank_id = 0;
-    for (uint32_t y = 0; y < alloc_config.worker_grid_size.y; y++) {
-        for (uint32_t x = 0; x < alloc_config.worker_grid_size.x; x++) {
-            CoreCoord logical_core = CoreCoord(x, y);
-            CoreCoord noc_core = logical_to_noc_coord(logical_core);
-
-            if (alloc_config.core_type_from_noc_coord_table.at(noc_core) == AllocCoreType::ComputeAndStore) {
+    const auto &cores = corerange_to_cores(alloc_config.worker_grid, std::nullopt, true);
+    for (const auto &logical_core : cores) {
+        CoreCoord noc_core = logical_to_noc_coord(logical_core);
+
+        if (alloc_config.core_type_from_noc_coord_table.at(noc_core) == AllocCoreType::ComputeAndStore) {
+            uint32_t remapped_bank_id = shuffled_bank_id[bank_id];
+            allocator.logical_core_to_bank_ids[BufferType::L1].insert({logical_core, {remapped_bank_id}});
+            allocator.bank_id_to_logical_core.insert({remapped_bank_id, logical_core});
+            bank_id_to_bank_offset.insert({remapped_bank_id, 0});
+            bank_id++;
+        } else if (alloc_config.core_type_from_noc_coord_table.at(noc_core) == AllocCoreType::StorageOnly) {
+            std::vector<uint32_t> bank_ids;
+            for (int storage_bank_index = 0; storage_bank_index < num_banks.per_storage_core; storage_bank_index++) {
                 uint32_t remapped_bank_id = shuffled_bank_id[bank_id];
-                allocator.logical_core_to_bank_ids[BufferType::L1].insert({logical_core, {remapped_bank_id}});
+                bank_ids.push_back(remapped_bank_id);
                 allocator.bank_id_to_logical_core.insert({remapped_bank_id, logical_core});
-                bank_id_to_bank_offset.insert({remapped_bank_id, 0});
-                bank_id++;
-            } else if (alloc_config.core_type_from_noc_coord_table.at(noc_core) == AllocCoreType::StorageOnly) {
-                std::vector<uint32_t> bank_ids;
-                for (int storage_bank_index = 0; storage_bank_index < num_banks.per_storage_core; storage_bank_index++) {
-                    uint32_t remapped_bank_id = shuffled_bank_id[bank_id];
-                    bank_ids.push_back(remapped_bank_id);
-                    allocator.bank_id_to_logical_core.insert({remapped_bank_id, logical_core});
-                    int64_t bank_offset_bytes = 0;
-                    if (alloc_config.storage_core_bank_size.value() != alloc_config.worker_l1_size) {
-                        uint64_t storage_core_offset = storage_bank_index * alloc_config.storage_core_bank_size.value();
-                        bank_offset_bytes = static_cast<int64_t>(storage_core_offset) - alloc_config.storage_core_bank_size.value(); // Assuming top-down here --  Not sure if this is hacky... need to specialize based off top-down cofnig flag or not?
-                    } else if (num_banks.per_storage_core != 1) {
-                        TT_THROW("Expected 1 bank per storage core if L1 bank size equals total worker L1 size but have {} banks", num_banks.per_storage_core);
-                    }
-                    bank_id_to_bank_offset.insert({remapped_bank_id, bank_offset_bytes});
-                    bank_id++;
+                int64_t bank_offset_bytes = 0;
+                if (alloc_config.storage_core_bank_size.value() != alloc_config.worker_l1_size) {
+                    uint64_t storage_core_offset = storage_bank_index * alloc_config.storage_core_bank_size.value();
+                    bank_offset_bytes = static_cast<int64_t>(storage_core_offset) - alloc_config.storage_core_bank_size.value(); // Assuming top-down here --  Not sure if this is hacky... need to specialize based off top-down cofnig flag or not?
+                } else if (num_banks.per_storage_core != 1) {
+                    TT_THROW("Expected 1 bank per storage core if L1 bank size equals total worker L1 size but have {} banks", num_banks.per_storage_core);
                 }
-                allocator.logical_core_to_bank_ids[BufferType::L1].insert({logical_core, bank_ids});
+                bank_id_to_bank_offset.insert({remapped_bank_id, bank_offset_bytes});
+                bank_id++;
             }
+            allocator.logical_core_to_bank_ids[BufferType::L1].insert({logical_core, bank_ids});
         }
     }
     TT_ASSERT(bank_id == shuffled_bank_id.size());
@@ -154,20 +152,17 @@ void init_compute_and_storage_l1_bank_manager(Allocator &allocator, const Alloca
     std::unordered_map<uint32_t, int64_t> small_bank_id_to_bank_offset;
     if (alloc_config.l1_small_size > 0) {
         TT_ASSERT(num_banks.num_l1_small_banks > 0);
-        for (uint32_t y = 0; y < alloc_config.worker_grid_size.y; y++) {
-            for (uint32_t x = 0; x < alloc_config.worker_grid_size.x; x++) {
-                CoreCoord logical_core = CoreCoord(x, y);
-                CoreCoord noc_core = logical_to_noc_coord(logical_core);
-
-                if (alloc_config.core_type_from_noc_coord_table.at(noc_core) != AllocCoreType::ComputeAndStore) {
-                    continue;
-                }
+        for (const auto &logical_core : cores) {
+            CoreCoord noc_core = logical_to_noc_coord(logical_core);
 
-                allocator.logical_core_to_bank_ids[BufferType::L1_SMALL].insert({logical_core, {bank_id}});
-                allocator.bank_id_to_logical_core.insert({bank_id, logical_core});
-                small_bank_id_to_bank_offset.insert({bank_id, 0});
-                bank_id++;
+            if (alloc_config.core_type_from_noc_coord_table.at(noc_core) != AllocCoreType::ComputeAndStore) {
+                continue;
             }
+
+            allocator.logical_core_to_bank_ids[BufferType::L1_SMALL].insert({logical_core, {bank_id}});
+            allocator.bank_id_to_logical_core.insert({bank_id, logical_core});
+            small_bank_id_to_bank_offset.insert({bank_id, 0});
+            bank_id++;
         }
     }
 
@@ -193,7 +188,7 @@ void init_compute_and_storage_l1_bank_manager(Allocator &allocator, const Alloca
     uint64_t allocatable_l1_size =
         static_cast<uint64_t>(alloc_config.worker_l1_size) - alloc_config.l1_unreserved_base - alloc_config.l1_small_size;
     // Assuming top down allocation for L1 buffers so the allocatable memory space is the top l1_bank_size bytes of L1
-    allocator.l1_manager = BankManager(BufferType::L1, bank_id_to_bank_offset, allocatable_l1_size, interleaved_address_limit, alloc_config.alignment, alloc_config.l1_unreserved_base);
+    allocator.l1_manager = BankManager(BufferType::L1, bank_id_to_bank_offset, allocatable_l1_size, interleaved_address_limit, alloc_config.alignment, alloc_config.l1_unreserved_base, alloc_config.disable_interleaved);
 
     uint64_t small_interleaved_address_limit = alloc_config.worker_l1_size - alloc_config.l1_small_size;
     uint64_t small_alloc_offset = alloc_config.l1_unreserved_base + allocatable_l1_size;
@@ -206,7 +201,8 @@ void init_compute_and_storage_l1_bank_manager(Allocator &allocator, const Alloca
         alloc_config.l1_small_size,
         small_interleaved_address_limit,
         alloc_config.alignment,
-        small_alloc_offset);
+        small_alloc_offset,
+        alloc_config.disable_interleaved);
 }
 
 }   // namespace allocator
diff --git a/tt_metal/impl/buffers/buffer.cpp b/tt_metal/impl/buffers/buffer.cpp
index 4b5f7c83888..519945e7099 100644
--- a/tt_metal/impl/buffers/buffer.cpp
+++ b/tt_metal/impl/buffers/buffer.cpp
@@ -35,6 +35,10 @@ bool is_sharded(const TensorMemoryLayout &layout) {
         layout == TensorMemoryLayout::BLOCK_SHARDED);
 }
 
+bool is_l1(BufferType buffer_type) {
+    return buffer_type == BufferType::L1 or buffer_type == BufferType::L1_SMALL;
+}
+
 void validate_buffer_size_and_page_size(
     DeviceAddr size,
     DeviceAddr page_size,
@@ -201,6 +205,17 @@ BufferPageMapping generate_buffer_page_mapping(const Buffer& buffer) {
     return buffer_page_mapping;
 }
 
+void validate_sub_device_id(std::optional<uint32_t> sub_device_id, Device *device, BufferType buffer_type, const std::optional<ShardSpecBuffer>& shard_parameters) {
+    // No need to validate if we're using the global allocator or not sharding
+    if (!sub_device_id.has_value()) {
+        return;
+    }
+    TT_FATAL(shard_parameters.has_value(), "Specifying sub-device for buffer requires buffer to be sharded");
+    TT_FATAL(is_l1(buffer_type), "Specifying sub-device for buffer requires buffer to be L1");
+    // TODO: Validate that cores used match the sub-device
+    TT_FATAL(*sub_device_id == 0, "Invalid sub-device id");
+}
+
 Buffer::Buffer(
     Device *device,
     DeviceAddr size,
@@ -209,6 +224,7 @@ Buffer::Buffer(
     const TensorMemoryLayout buffer_layout,
     const std::optional<ShardSpecBuffer>& shard_parameters,
     const std::optional<bool> bottom_up,
+    const std::optional<uint32_t> sub_device_id,
     const bool owns_data,
     Private) :
     device_(device),
@@ -218,10 +234,13 @@ Buffer::Buffer(
     buffer_layout_(buffer_layout),
     shard_parameters_(shard_parameters),
     bottom_up_(bottom_up.value_or(this->is_dram())),
+    sub_device_id_(sub_device_id),
     owns_data_(owns_data),
     buffer_page_mapping_(nullptr) {
     TT_FATAL(this->device_ != nullptr && this->device_->allocator_ != nullptr, "Device and allocator need to not be null.");
-
+    if (this->sub_device_id_.has_value()) {
+        validate_sub_device_id(this->sub_device_id_, this->device_, buffer_type, shard_parameters);
+    }
     if (size != 0) {
         validate_buffer_size_and_page_size(size, page_size, buffer_type, buffer_layout, shard_parameters);
     }
@@ -234,8 +253,9 @@ std::shared_ptr<Buffer> Buffer::create(
     const BufferType buffer_type,
     const TensorMemoryLayout buffer_layout,
     const std::optional<ShardSpecBuffer>& shard_parameters,
-    const std::optional<bool> bottom_up) {
-    auto* bufferPtr = new Buffer(device, size, page_size, buffer_type, buffer_layout, shard_parameters, bottom_up, true /* owns data */, Private());
+    const std::optional<bool> bottom_up,
+    const std::optional<uint32_t> sub_device_id) {
+    auto* bufferPtr = new Buffer(device, size, page_size, buffer_type, buffer_layout, shard_parameters, bottom_up, sub_device_id, true /* owns data */, Private());
     // Using a custom deleter to properly clean up the owned datas
     auto buffer = std::shared_ptr<Buffer>(bufferPtr, deleter);
     buffer->weak_self = buffer;
@@ -274,9 +294,10 @@ std::shared_ptr<Buffer> Buffer::create(
     const BufferType buffer_type,
     const TensorMemoryLayout buffer_layout,
     const std::optional<ShardSpecBuffer>& shard_parameters,
-    const std::optional<bool> bottom_up) {
+    const std::optional<bool> bottom_up,
+    const std::optional<uint32_t> sub_device_id) {
     // Not using a custom deleter, because it doesn't own any data to cleanup
-    auto buffer = std::make_shared<Buffer>(device, size, page_size, buffer_type, buffer_layout, shard_parameters, bottom_up, false /* owns data */, Private());
+    auto buffer = std::make_shared<Buffer>(device, size, page_size, buffer_type, buffer_layout, shard_parameters, bottom_up, sub_device_id, false /* owns data */, Private());
     buffer->weak_self = buffer;
 
     buffer->address_ = address;
@@ -377,7 +398,7 @@ CoreType Buffer::core_type() const {
 }
 
 bool Buffer::is_l1() const {
-    return buffer_type() == BufferType::L1 or buffer_type() == BufferType::L1_SMALL;
+    return ::is_l1(buffer_type());
 }
 bool Buffer::is_dram() const {
     return buffer_type() == BufferType::DRAM || buffer_type() == BufferType::TRACE;
@@ -389,12 +410,12 @@ bool Buffer::is_trace() const {
 
 uint32_t Buffer::dram_channel_from_bank_id(uint32_t bank_id) const {
     TT_FATAL(this->is_dram(), "Expected DRAM buffer!");
-    return this->device_->dram_channel_from_bank_id(bank_id);
+    return this->device_->dram_channel_from_bank_id(bank_id, this->sub_device_id_);
 }
 
 CoreCoord Buffer::logical_core_from_bank_id(uint32_t bank_id) const {
     TT_FATAL(this->is_l1(), "Expected L1 buffer!");
-    return this->device_->logical_core_from_bank_id(bank_id);
+    return this->device_->logical_core_from_bank_id(bank_id, this->sub_device_id_);
 }
 
 CoreCoord Buffer::noc_coordinates(uint32_t bank_id) const {
@@ -419,7 +440,7 @@ CoreCoord Buffer::noc_coordinates(uint32_t bank_id) const {
 CoreCoord Buffer::noc_coordinates() const { return this->noc_coordinates(0); }
 
 DeviceAddr Buffer::page_address(uint32_t bank_id, uint32_t page_index) const {
-    auto num_banks = this->device_->num_banks(this->buffer_type_);
+    auto num_banks = this->device_->num_banks(this->buffer_type_, this->sub_device_id_);
     TT_FATAL(bank_id < num_banks, "Invalid Bank ID: {} exceeds total numbers of banks ({})!", bank_id, num_banks);
     int pages_offset_within_bank = (int)page_index / num_banks;
     auto offset = (round_up(this->page_size(), this->alignment()) * pages_offset_within_bank);
@@ -427,7 +448,7 @@ DeviceAddr Buffer::page_address(uint32_t bank_id, uint32_t page_index) const {
 }
 
 uint32_t Buffer::alignment() const {
-    return this->device_->get_allocator_alignment();
+    return this->device_->get_allocator_alignment(this->sub_device_id_);
 }
 DeviceAddr Buffer::aligned_page_size() const {
     return align(page_size(), this->alignment());
@@ -463,7 +484,7 @@ std::optional<uint32_t> Buffer::num_cores() const {
 }
 
 DeviceAddr Buffer::translate_page_address(uint64_t offset, uint32_t bank_id) const {
-    DeviceAddr base_page_address = this->address() + this->device_->bank_offset(this->buffer_type_, bank_id);
+    DeviceAddr base_page_address = this->address() + this->device_->bank_offset(this->buffer_type_, bank_id, this->sub_device_id_);
     return base_page_address + offset;
 }
 
diff --git a/tt_metal/impl/buffers/buffer.hpp b/tt_metal/impl/buffers/buffer.hpp
index e36db0b85bc..b9bdd19e6ed 100644
--- a/tt_metal/impl/buffers/buffer.hpp
+++ b/tt_metal/impl/buffers/buffer.hpp
@@ -156,7 +156,8 @@ class Buffer final {
         BufferType buffer_type,
         TensorMemoryLayout buffer_layout = TensorMemoryLayout::INTERLEAVED,
         const std::optional<ShardSpecBuffer>& shard_parameter = std::nullopt,
-        std::optional<bool> bottom_up = std::nullopt);
+        std::optional<bool> bottom_up = std::nullopt,
+        std::optional<uint32_t> sub_device_id = std::nullopt);
     static std::shared_ptr<Buffer> create(
         Device *device,
         DeviceAddr address,
@@ -165,7 +166,8 @@ class Buffer final {
         BufferType buffer_type,
         TensorMemoryLayout buffer_layout = TensorMemoryLayout::INTERLEAVED,
         const std::optional<ShardSpecBuffer>& shard_parameter = std::nullopt,
-        std::optional<bool> bottom_up = std::nullopt);
+        std::optional<bool> bottom_up = std::nullopt,
+        std::optional<uint32_t> sub_device_id = std::nullopt);
 
     Buffer(const Buffer &other) = delete;
     Buffer &operator=(const Buffer &other) = delete;
@@ -223,6 +225,7 @@ class Buffer final {
 
     const std::shared_ptr<const BufferPageMapping>& get_buffer_page_mapping();
 
+    std::optional<uint32_t> sub_device_id() const { return sub_device_id_; }
 
     Buffer(
         Device *device,
@@ -232,6 +235,7 @@ class Buffer final {
         TensorMemoryLayout buffer_layout,
         const std::optional<ShardSpecBuffer>& shard_parameter,
         std::optional<bool> bottom_up,
+        std::optional<uint32_t> sub_device_id,
         bool owns_data,
         Private);
 
@@ -256,6 +260,7 @@ class Buffer final {
     const BufferType buffer_type_;
     const TensorMemoryLayout buffer_layout_;
     const bool bottom_up_;
+    const std::optional<uint32_t> sub_device_id_;
     const bool owns_data_;
 
     std::atomic<AllocationStatus> allocation_status_ = AllocationStatus::ALLOCATION_REQUESTED;
diff --git a/tt_metal/impl/device/device.cpp b/tt_metal/impl/device/device.cpp
index 78776f56369..ee7ef233d98 100644
--- a/tt_metal/impl/device/device.cpp
+++ b/tt_metal/impl/device/device.cpp
@@ -23,6 +23,7 @@
 #include "tt_metal/detail/persistent_kernel_cache.hpp"
 #include "tt_metal/tools/profiler/tt_metal_tracy.hpp"
 #include "llrt/hal.hpp"
+#include "tt_metal/tt_stl/span.hpp"
 
 #include "tt_metal/hostdevcommon/common_runtime_address_map.h" // NOC_0_X
 
@@ -214,6 +215,8 @@ void Device::initialize_allocator(size_t l1_small_size, size_t trace_region_size
     // Tensix/Eth -> PCIe/DRAM src and dst addrs must be L1_ALIGNMENT aligned
     // PCIe/DRAM -> Tensix/Eth src and dst addrs must be DRAM_ALIGNMENT aligned
     // Tensix/Eth <-> Tensix/Eth src and dst addrs must be L1_ALIGNMENT aligned
+    const auto &logical_size = this->logical_grid_size();
+    const auto &compute_size = this->compute_with_storage_grid_size();
     AllocatorConfig config(
         {.num_dram_channels = static_cast<size_t>(soc_desc.get_num_dram_channels()),
          .dram_bank_size = soc_desc.dram_bank_size,
@@ -221,7 +224,7 @@ void Device::initialize_allocator(size_t l1_small_size, size_t trace_region_size
          .dram_unreserved_base = hal.get_dev_addr(HalDramMemAddrType::DRAM_BARRIER) + \
                                  hal.get_dev_size(HalDramMemAddrType::DRAM_BARRIER),
          .l1_unreserved_base = hal.get_dev_addr(HalProgrammableCoreType::TENSIX, HalL1MemAddrType::UNRESERVED),
-         .worker_grid_size = this->logical_grid_size(),
+         .worker_grid = CoreRangeSet(CoreRange(CoreCoord(0, 0), CoreCoord(logical_size.x - 1, logical_size.y - 1))),
          .worker_l1_size = static_cast<size_t>(soc_desc.worker_l1_size),
          .storage_core_bank_size = get_storage_core_bank_size(id_, num_hw_cqs_, dispatch_core_type),
          .l1_small_size = align(l1_small_size, hal.get_alignment(HalMemType::L1)),
@@ -230,8 +233,9 @@ void Device::initialize_allocator(size_t l1_small_size, size_t trace_region_size
          .worker_log_to_physical_routing_x = soc_desc.worker_log_to_physical_routing_x,
          .worker_log_to_physical_routing_y = soc_desc.worker_log_to_physical_routing_y,
          .l1_bank_remap = l1_bank_remap,
-         .compute_grid_size = this->compute_with_storage_grid_size(),
-         .alignment = std::max(hal.get_alignment(HalMemType::DRAM), hal.get_alignment(HalMemType::L1))});
+         .compute_grid = CoreRangeSet(CoreRange(CoreCoord(0, 0), CoreCoord(compute_size.x - 1, compute_size.y - 1))),
+         .alignment = std::max(hal.get_alignment(HalMemType::DRAM), hal.get_alignment(HalMemType::L1)),
+         .disable_interleaved = false});
     TT_FATAL(config.l1_small_size < (config.storage_core_bank_size.has_value() ? config.storage_core_bank_size.value() : config.worker_l1_size - config.l1_unreserved_base),
             "Reserved size must be less than bank size");
     TT_FATAL(
@@ -2999,7 +3003,9 @@ bool Device::close() {
     }
 
     tt::Cluster::instance().l1_barrier(id_);
-    allocator::clear(*this->allocator_);
+    if (this->allocator_) {
+        allocator::clear(*this->allocator_);
+    }
     // After device close, no buffers on this device should be used
     for (const auto &buf : this->get_allocated_buffers()) {
         DeallocateBuffer(*buf);
@@ -3148,12 +3154,33 @@ uint32_t Device::get_noc_multicast_encoding(uint8_t noc_index, const CoreRange&
     }
 }
 
-void Device::check_allocator_is_initialized() const {
-    if (this->allocator_ == nullptr) {
-        TT_THROW("No memory allocator! Device has not been initialized, did you forget to call InitializeDevice?");
+void Device::check_allocator_is_initialized(std::optional<uint32_t> sub_device_id) const {
+    // TODO: This will query the active sub-device manager
+    if (sub_device_id.has_value()) {
+        TT_THROW("Sub-device allocator not implemented yet");
+    } else {
+        if (!this->allocator_) {
+            TT_THROW("No memory allocator! Allocator has not been initialized");
+        }
+    }
+}
+
+const std::unique_ptr<Allocator> &Device::get_initialized_allocator(std::optional<uint32_t> sub_device_id) const {
+    // TODO: This will query the active sub-device manager
+    if (sub_device_id.has_value()) {
+        TT_THROW("Sub-device allocator not implemented yet");
+    } else {
+        if (!this->allocator_) {
+            TT_THROW("No memory allocator! Allocator has not been initialized");
+        }
+        return this->allocator_;
     }
 }
 
+std::unique_ptr<Allocator> &Device::get_initialized_allocator(std::optional<uint32_t> sub_device_id) {
+    return const_cast<std::unique_ptr<Allocator>&>(const_cast<const Device*>(this)->get_initialized_allocator(sub_device_id));
+}
+
 void Device::reset_num_sub_devices(uint32_t num_sub_devices) {
     TT_FATAL((num_sub_devices >=1 && num_sub_devices <= Device::MAX_NUM_SUB_DEVICES), "Illegal number of sub devices specified");
     // Finish all running programs
@@ -3177,19 +3204,19 @@ uint32_t Device::num_sub_devices() const {
     return Device::DEFAULT_NUM_SUB_DEVICES;
 }
 
-uint32_t Device::num_banks(const BufferType &buffer_type) const {
-    this->check_allocator_is_initialized();
-    return allocator::num_banks(*this->allocator_, buffer_type);
+uint32_t Device::num_banks(const BufferType &buffer_type, std::optional<uint32_t> sub_device_id) const {
+    const auto& allocator = this->get_initialized_allocator(sub_device_id);
+    return allocator::num_banks(*allocator, buffer_type);
 }
 
-uint32_t Device::bank_size(const BufferType &buffer_type) const {
-    this->check_allocator_is_initialized();
-    return allocator::bank_size(*this->allocator_, buffer_type);
+uint32_t Device::bank_size(const BufferType &buffer_type, std::optional<uint32_t> sub_device_id) const {
+    const auto& allocator = this->get_initialized_allocator(sub_device_id);
+    return allocator::bank_size(*allocator, buffer_type);
 }
 
-uint32_t Device::dram_channel_from_bank_id(uint32_t bank_id) const {
-    this->check_allocator_is_initialized();
-    return allocator::dram_channel_from_bank_id(*this->allocator_, bank_id);
+uint32_t Device::dram_channel_from_bank_id(uint32_t bank_id, std::optional<uint32_t> sub_device_id) const {
+    const auto& allocator = this->get_initialized_allocator(sub_device_id);
+    return allocator::dram_channel_from_bank_id(*allocator, bank_id);
 }
 
 CoreCoord Device::dram_core_from_dram_channel(uint32_t dram_channel) const {
@@ -3206,64 +3233,64 @@ uint32_t Device::dram_channel_from_logical_core(const CoreCoord& logical_core) c
     return tt::Cluster::instance().get_soc_desc(id_).get_dram_channel_from_logical_core(logical_core);
 }
 
-int32_t Device::bank_offset(BufferType buffer_type, uint32_t bank_id) const {
-    this->check_allocator_is_initialized();
-    return allocator::bank_offset(*this->allocator_, buffer_type, bank_id);
+int32_t Device::bank_offset(BufferType buffer_type, uint32_t bank_id, std::optional<uint32_t> sub_device_id) const {
+    const auto& allocator = this->get_initialized_allocator(sub_device_id);
+    return allocator::bank_offset(*allocator, buffer_type, bank_id);
 }
 
-CoreCoord Device::logical_core_from_bank_id(uint32_t bank_id) const {
-    this->check_allocator_is_initialized();
-    return allocator::logical_core_from_bank_id(*this->allocator_, bank_id);
+CoreCoord Device::logical_core_from_bank_id(uint32_t bank_id, std::optional<uint32_t> sub_device_id) const {
+    const auto& allocator = this->get_initialized_allocator(sub_device_id);
+    return allocator::logical_core_from_bank_id(*allocator, bank_id);
 }
 
-const std::vector<uint32_t> &Device::bank_ids_from_dram_channel(uint32_t dram_channel) const {
-    this->check_allocator_is_initialized();
-    return allocator::bank_ids_from_dram_channel(*this->allocator_, dram_channel);
+const std::vector<uint32_t> &Device::bank_ids_from_dram_channel(uint32_t dram_channel, std::optional<uint32_t> sub_device_id) const {
+    const auto& allocator = this->get_initialized_allocator(sub_device_id);
+    return allocator::bank_ids_from_dram_channel(*allocator, dram_channel);
 }
 
 const std::vector<uint32_t> &Device::bank_ids_from_logical_core(
-    BufferType buffer_type, const CoreCoord &logical_core) const {
-    this->check_allocator_is_initialized();
-    return allocator::bank_ids_from_logical_core(*this->allocator_, buffer_type, logical_core);
+    BufferType buffer_type, const CoreCoord &logical_core, std::optional<uint32_t> sub_device_id) const {
+    const auto& allocator = this->get_initialized_allocator(sub_device_id);
+    return allocator::bank_ids_from_logical_core(*allocator, buffer_type, logical_core);
 }
 
-allocator::Statistics Device::get_memory_allocation_statistics(const BufferType &buffer_type) const {
-    this->check_allocator_is_initialized();
-    return allocator::get_statistics(*this->allocator_, buffer_type);
+allocator::Statistics Device::get_memory_allocation_statistics(const BufferType &buffer_type, std::optional<uint32_t> sub_device_id) const {
+    const auto& allocator = this->get_initialized_allocator(sub_device_id);
+    return allocator::get_statistics(*allocator, buffer_type);
 }
 
-uint32_t Device::get_allocator_alignment() const {
-    this->check_allocator_is_initialized();
-    return this->allocator_->config.alignment;
+uint32_t Device::get_allocator_alignment(std::optional<uint32_t> sub_device_id) const {
+    const auto& allocator = this->get_initialized_allocator(sub_device_id);
+    return allocator->config.alignment;
 }
 
-size_t Device::get_l1_small_size() const {
-    this->check_allocator_is_initialized();
-    return this->allocator_->config.l1_small_size;
+size_t Device::get_l1_small_size(std::optional<uint32_t> sub_device_id) const {
+    const auto& allocator = this->get_initialized_allocator(sub_device_id);
+    return allocator->config.l1_small_size;
 }
 
-void Device::dump_memory_blocks(const BufferType &buffer_type, std::ofstream &out) const {
-    this->check_allocator_is_initialized();
-    return allocator::dump_memory_blocks(*this->allocator_, buffer_type, out);
+void Device::dump_memory_blocks(const BufferType &buffer_type, std::ofstream &out, std::optional<uint32_t> sub_device_id) const {
+    const auto& allocator = this->get_initialized_allocator(sub_device_id);
+    return allocator::dump_memory_blocks(*allocator, buffer_type, out);
 }
 
-const std::unordered_set<Buffer *> &Device::get_allocated_buffers() const {
-    this->check_allocator_is_initialized();
-    return allocator::get_allocated_buffers(*this->allocator_);
+const std::unordered_set<Buffer *> &Device::get_allocated_buffers(std::optional<uint32_t> sub_device_id) const {
+    const auto& allocator = this->get_initialized_allocator(sub_device_id);
+    return allocator::get_allocated_buffers(*allocator);
 }
 
-void Device::deallocate_buffers(){
-    allocator::deallocate_buffers(*allocator_);
+void Device::deallocate_buffers(std::optional<uint32_t> sub_device_id) {
+    auto& allocator = this->get_initialized_allocator(sub_device_id);
+    allocator::deallocate_buffers(*allocator);
 }
 
-std::optional<DeviceAddr> Device::lowest_occupied_l1_address(uint32_t bank_id, tt::stl::Span<const uint32_t> sub_device_ids) const {
-    this->check_allocator_is_initialized();
-    // TODO: This will query the active sub-device manager
-    TT_FATAL(sub_device_ids.size() <= 1, "Invalid number of sub-devices {}", sub_device_ids.size());
-    if (sub_device_ids.size() == 1) {
-        TT_FATAL(sub_device_ids[0] == 0, "Invalid sub-device id {}", sub_device_ids[0]);
-    }
-    return allocator::lowest_occupied_l1_address(*this->allocator_, bank_id);
+std::optional<DeviceAddr> Device::lowest_occupied_compute_l1_address(tt::stl::Span<const uint32_t> sub_device_ids) const {
+    this->check_allocator_is_initialized(std::nullopt);
+    TT_FATAL(sub_device_ids.size() == 0, "Invalid number of sub-devices {}", sub_device_ids.size());
+    // Global bank id needs to look up a bank from the compute grid (not the storage grid)
+    auto global_bank_id =
+        this->bank_ids_from_logical_core(BufferType::L1, *this->compute_cores_.begin())[0];
+    return allocator::lowest_occupied_l1_address(*this->allocator_, global_bank_id);
 }
 
 float Device::sfpu_eps() const {
diff --git a/tt_metal/impl/device/device.hpp b/tt_metal/impl/device/device.hpp
index c1f4fe72188..0725519eed2 100644
--- a/tt_metal/impl/device/device.hpp
+++ b/tt_metal/impl/device/device.hpp
@@ -18,6 +18,7 @@
 #include "llrt/tt_cluster.hpp"
 #include "llrt/hal.hpp"
 #include "tt_metal/impl/dispatch/command_queue_interface.hpp"
+#include "tt_metal/tt_stl/span.hpp"
 #include "program_cache.hpp"
 
 namespace tt {
@@ -160,31 +161,34 @@ class Device {
 
     uint32_t num_sub_devices() const;
 
-    uint32_t num_banks(const BufferType &buffer_type) const;
-    uint32_t bank_size(const BufferType &buffer_type) const;
+    uint32_t num_banks(const BufferType &buffer_type, std::optional<uint32_t> sub_device_id = std::nullopt) const;
+    uint32_t bank_size(const BufferType &buffer_type, std::optional<uint32_t> sub_device_id = std::nullopt) const;
 
-    uint32_t dram_channel_from_bank_id(uint32_t bank_id) const;
+    uint32_t dram_channel_from_bank_id(uint32_t bank_id, std::optional<uint32_t> sub_device_id = std::nullopt) const;
 
     CoreCoord dram_core_from_dram_channel(uint32_t dram_channel) const;
     CoreCoord logical_core_from_dram_channel(uint32_t dram_channel) const;
     uint32_t dram_channel_from_logical_core(const CoreCoord& logical_core) const;
 
-    int32_t bank_offset(BufferType buffer_type, uint32_t bank_id) const;
+    const std::unique_ptr<Allocator> &get_initialized_allocator(std::optional<uint32_t> sub_device_id = std::nullopt) const;
+    std::unique_ptr<Allocator> &get_initialized_allocator(std::optional<uint32_t> sub_device_id = std::nullopt);
 
-    CoreCoord logical_core_from_bank_id(uint32_t bank_id) const;
+    int32_t bank_offset(BufferType buffer_type, uint32_t bank_id, std::optional<uint32_t> sub_device_id = std::nullopt) const;
 
-    const std::vector<uint32_t> &bank_ids_from_dram_channel(uint32_t dram_channel) const;
+    CoreCoord logical_core_from_bank_id(uint32_t bank_id, std::optional<uint32_t> sub_device_id = std::nullopt) const;
+
+    const std::vector<uint32_t> &bank_ids_from_dram_channel(uint32_t dram_channel, std::optional<uint32_t> sub_device_id = std::nullopt) const;
 
     const std::vector<uint32_t> &bank_ids_from_logical_core(
-        BufferType buffer_type, const CoreCoord &logical_core) const;
+        BufferType buffer_type, const CoreCoord &logical_core, std::optional<uint32_t> sub_device_id = std::nullopt) const;
 
-    allocator::Statistics get_memory_allocation_statistics(const BufferType &buffer_type) const;
+    allocator::Statistics get_memory_allocation_statistics(const BufferType &buffer_type, std::optional<uint32_t> sub_device_id = std::nullopt) const;
 
-    uint32_t get_allocator_alignment() const;
+    uint32_t get_allocator_alignment(std::optional<uint32_t> sub_device_id = std::nullopt) const;
 
-    size_t get_l1_small_size() const;
+    size_t get_l1_small_size(std::optional<uint32_t> sub_device_id = std::nullopt) const;
 
-    void dump_memory_blocks(const BufferType &buffer_type, std::ofstream &out) const;
+    void dump_memory_blocks(const BufferType &buffer_type, std::ofstream &out, std::optional<uint32_t> sub_device_id = std::nullopt) const;
 
     // Set of logical storage only core coordinates
     const std::set<CoreCoord> &storage_only_cores() const { return this->storage_only_cores_; }
@@ -199,11 +203,11 @@ class Device {
     uint32_t get_noc_unicast_encoding(uint8_t noc_index, const CoreCoord& physical_core) const;
     uint32_t get_noc_multicast_encoding(uint8_t noc_index, const CoreRange& physical_cores) const;
 
-    const std::unordered_set<Buffer *> &get_allocated_buffers() const;
+    const std::unordered_set<Buffer *> &get_allocated_buffers(std::optional<uint32_t> sub_device_id = std::nullopt) const;
 
-    void deallocate_buffers();
+    void deallocate_buffers(std::optional<uint32_t> sub_device_id = std::nullopt);
 
-    std::optional<DeviceAddr> lowest_occupied_l1_address(uint32_t bank_id, tt::stl::Span<const uint32_t> sub_device_ids) const;
+    std::optional<DeviceAddr> lowest_occupied_compute_l1_address(tt::stl::Span<const uint32_t> sub_device_ids) const;
 
     // machine epsilon
     float sfpu_eps() const;
@@ -233,7 +237,7 @@ class Device {
     std::shared_ptr<TraceBuffer> get_trace(const uint32_t tid);
 
     bool using_slow_dispatch() const;
-    void check_allocator_is_initialized() const;
+    void check_allocator_is_initialized(std::optional<uint32_t> sub_device_id) const;
 
     // Checks that the given arch is on the given pci_slot and that it's responding
     // Puts device into reset
@@ -341,12 +345,13 @@ class Device {
     T get_dev_addr(CoreCoord phys_core, HalL1MemAddrType addr_type) const;
     // Returns address where allocator starts allocating buffer
     template <typename T = DeviceAddr>
-    T get_base_allocator_addr(const HalMemType &mem_type) const;
+    T get_base_allocator_addr(const HalMemType &mem_type, std::optional<uint32_t> sub_device_id = std::nullopt) const;
 
     template <typename CoreRangeContainer>
     std::vector<std::pair<transfer_info_cores, uint32_t>> extract_dst_noc_multicast_info(const CoreRangeContainer& ranges, const CoreType core_type);
     bool dispatch_s_enabled() const;
     bool distributed_dispatcher() const;
+    NOC dispatch_go_signal_noc() const;
     size_t get_device_kernel_defines_hash();
 
     const vector_memcpy_aligned<uint32_t>& noc_mcast_data(uint32_t sub_device_id) const;
@@ -358,7 +363,6 @@ class Device {
 
    private:
     void reset_num_sub_devices(uint32_t num_sub_devices);
-    NOC dispatch_go_signal_noc() const;
 
     void MarkAllocationsUnsafe();
     void MarkAllocationsSafe();
@@ -398,8 +402,9 @@ inline T Device::get_dev_addr(CoreCoord phys_core, HalL1MemAddrType addr_type) c
 }
 
 template <typename T>
-inline T Device::get_base_allocator_addr(const HalMemType &mem_type) const {
-    return allocator::get_unreserved_base_address(*this->allocator_, mem_type);
+inline T Device::get_base_allocator_addr(const HalMemType &mem_type, std::optional<uint32_t> sub_device_id) const {
+    const auto& allocator = this->get_initialized_allocator(sub_device_id);
+    return allocator::get_unreserved_base_address(*allocator, mem_type);
 }
 
 // TODO: Find a better home for this function
diff --git a/tt_metal/impl/program/program.cpp b/tt_metal/impl/program/program.cpp
index 58877bc1de1..c3e0d546579 100644
--- a/tt_metal/impl/program/program.cpp
+++ b/tt_metal/impl/program/program.cpp
@@ -749,12 +749,14 @@ void Program::allocate_circular_buffers(const Device *device) { pimpl_->allocate
 void detail::Program_::validate_circular_buffer_region(const Device *device) {
     //ZoneScoped;
 
-    // Banks are in lockstep so we only need to get lowest L1 address of one compute and storage core
-    // Only compute with storage cores can have CBs and all compute with storage cores will have the same bank offset
+    // Only pass sub_device_ids if sub-device manager is active
+    // Allocator is handled differently from other sub_device apis since the global allocator is always active
+    // State when there is no active manager is normally treated as having 1 sub_device, which is used to query state
+    // For allocator, we don't have a sub_device allocator when there is no active manager, only the global allocator
     // TODO: Circular buffer allocation and validation could be better optimized by determining usage per sub-device
-    const std::vector<uint32_t> &bank_ids =
-        device->bank_ids_from_logical_core(BufferType::L1, *device->compute_cores_.begin());
-    std::optional<DeviceAddr> lowest_address = device->lowest_occupied_l1_address(bank_ids[0], this->determine_sub_device_ids(device));
+    constexpr bool active_sub_device_manager = false;
+    const auto &sub_device_ids = active_sub_device_manager ? this->determine_sub_device_ids(device) : std::vector<uint32_t>();
+    std::optional<DeviceAddr> lowest_address = device->lowest_occupied_compute_l1_address(sub_device_ids);
     uint32_t max_l1_size = device->l1_size_per_core();
 
     for (const CircularBufferAllocator &cb_allocator : this->cb_allocators_) {
diff --git a/tt_metal/tt_metal.cpp b/tt_metal/tt_metal.cpp
index 071198de166..acd6e19ae0c 100644
--- a/tt_metal/tt_metal.cpp
+++ b/tt_metal/tt_metal.cpp
@@ -837,16 +837,17 @@ DeviceAddr AllocateBuffer(Buffer *buffer) {
         GraphTracker::instance().track_allocate(buffer);
         return 0;
     }
-
+    // TODO: Validate correct sub-device manager id
+    auto& allocator = buffer->device()->get_initialized_allocator(buffer->sub_device_id());
     DeviceAddr allocated_addr;
     if (is_sharded(buffer->buffer_layout())) {
         allocated_addr = allocator::allocate_buffer(
-            *(buffer->device()->allocator_),
+            *allocator,
             buffer->shard_spec().size() * buffer->num_cores().value() * buffer->page_size(),
             buffer);
     } else {
         allocated_addr = allocator::allocate_buffer(
-            *(buffer->device()->allocator_),
+            *allocator,
             buffer->size(),
             buffer);
     }
@@ -875,7 +876,9 @@ void DeallocateBuffer(Buffer *buffer) {
         TracyFreeN(reinterpret_cast<void const *>(buffer->address()), get_buffer_location_name(buffer->buffer_type(), buffer->device()->id()));
     }
 #endif
-    allocator::deallocate_buffer(*buffer->device()->allocator_, buffer);
+    // TODO: Validate correct sub-device manager id
+    auto& allocator = buffer->device()->get_initialized_allocator(buffer->sub_device_id());
+    allocator::deallocate_buffer(*allocator, buffer);
 }
 
 void SynchronizeWorkerThreads(const std::vector<Device*>& workers) {
@@ -1137,37 +1140,38 @@ std::unique_ptr<GlobalSemaphore> CreateGlobalSemaphore(
     return GlobalSemaphore::create(device, std::move(cores), initial_value, buffer_type);
 }
 
-std::shared_ptr<Buffer> CreateBuffer(const InterleavedBufferConfig &config) {
-    return Buffer::create(
-        config.device, config.size, config.page_size, config.buffer_type, config.buffer_layout, std::nullopt, std::nullopt);
-}
-
-std::shared_ptr<Buffer> CreateBuffer(const InterleavedBufferConfig &config, DeviceAddr address) {
-    return Buffer::create(
-        config.device, address, config.size, config.page_size, config.buffer_type, config.buffer_layout, std::nullopt, std::nullopt);
-}
-
-std::shared_ptr<Buffer> CreateBuffer(const ShardedBufferConfig &config) {
-    return Buffer::create(
-        config.device,
-        config.size,
-        config.page_size,
-        config.buffer_type,
-        config.buffer_layout,
-        config.shard_parameters,
-        std::nullopt);
+std::shared_ptr<Buffer> CreateBuffer(const InterleavedBufferConfig &config, std::optional<DeviceAddr> address, std::optional<uint32_t> sub_device_id) {
+    if (address.has_value()) {
+        return Buffer::create(
+            config.device, *address, config.size, config.page_size, config.buffer_type, config.buffer_layout, std::nullopt, std::nullopt, sub_device_id);
+    } else {
+        return Buffer::create(
+            config.device, config.size, config.page_size, config.buffer_type, config.buffer_layout, std::nullopt, std::nullopt, sub_device_id);
+    }
 }
-
-std::shared_ptr<Buffer> CreateBuffer(const ShardedBufferConfig &config, DeviceAddr address) {
-    return Buffer::create(
-        config.device,
-        address,
-        config.size,
-        config.page_size,
-        config.buffer_type,
-        config.buffer_layout,
-        config.shard_parameters,
-        std::nullopt);
+std::shared_ptr<Buffer> CreateBuffer(const ShardedBufferConfig &config, std::optional<DeviceAddr> address, std::optional<uint32_t> sub_device_id) {
+    if (address.has_value()) {
+        return Buffer::create(
+            config.device,
+            *address,
+            config.size,
+            config.page_size,
+            config.buffer_type,
+            config.buffer_layout,
+            config.shard_parameters,
+            std::nullopt,
+            sub_device_id);
+    } else {
+        return Buffer::create(
+            config.device,
+            config.size,
+            config.page_size,
+            config.buffer_type,
+            config.buffer_layout,
+            config.shard_parameters,
+            std::nullopt,
+            sub_device_id);
+    }
 }
 
 void DeallocateBuffer(Buffer &buffer) { buffer.deallocate(); }

From 5d470b7a0551d2d832faea8749d63d352b1b6141 Mon Sep 17 00:00:00 2001
From: Austin Ho <aho@tenstorrent.com>
Date: Wed, 6 Nov 2024 18:08:56 +0000
Subject: [PATCH 63/69] #13655: Add SubDevice and SubDeviceManager
 implementations Add support for splitting a device into multiple SubDevices,
 as well and maintaining different SubDeviceManager configurations, owned by
 device Add basic tests to validate sub-device support Update device apis to
 overload rather than take in optional sub_device ids Make SubDeviceId,
 SubDeviceManagerId strong types Refactor Device/SubDeviceManager state so
 that the default state is also encapsulated in a SubDeviceManager, and access
 the active SubDeviceManager through a pointer instead of map lookup

---
 .../apis/host_apis/buffers/CreateBuffer.rst   |   8 +-
 .../command_queue/EnqueueReadBuffer.rst       |   4 +-
 .../command_queue/EnqueueWriteBuffer.rst      |   4 +-
 tests/scripts/test_moreh_microbenchmark.py    |  26 +-
 .../kernels/receiver_l1.cpp                   |  11 +-
 .../test_dram_read_remote_cb.cpp              |  78 +++-
 .../kernels/receiver_l1.cpp                   |  10 +-
 .../test_remote_cb_sync_matmul.cpp            |  90 ++--
 .../common/kernels/writer_l1.cpp              |  11 +-
 .../unit_tests_fast_dispatch/CMakeLists.txt   |   1 +
 .../sub_device/kernels/incrementer.cpp        |  17 +
 .../kernels/persistent_remote_waiter.cpp      |  36 ++
 .../sub_device/kernels/persistent_waiter.cpp  |  24 +
 .../sub_device/kernels/syncer.cpp             |  17 +
 .../sub_device/test_sub_device.cpp            | 430 ++++++++++++++++++
 tt_metal/common/core_coord.cpp                |  14 +
 tt_metal/common/core_coord.hpp                |   4 +
 tt_metal/host_api.hpp                         |  70 ++-
 tt_metal/impl/CMakeLists.txt                  |   2 +
 tt_metal/impl/allocator/allocator.cpp         |   1 -
 tt_metal/impl/buffers/buffer.cpp              |  31 +-
 tt_metal/impl/buffers/buffer.hpp              |  18 +-
 tt_metal/impl/device/device.cpp               | 364 ++++++++++-----
 tt_metal/impl/device/device.hpp               | 124 +++--
 tt_metal/impl/dispatch/command_queue.cpp      | 157 ++++---
 tt_metal/impl/dispatch/command_queue.hpp      |  32 +-
 tt_metal/impl/program/program.cpp             |  62 ++-
 tt_metal/impl/program/program.hpp             |   2 +-
 tt_metal/impl/sub_device/sub_device.cpp       |  55 +++
 tt_metal/impl/sub_device/sub_device.hpp       |  45 ++
 .../impl/sub_device/sub_device_manager.cpp    | 305 +++++++++++++
 .../impl/sub_device/sub_device_manager.hpp    |  95 ++++
 tt_metal/impl/sub_device/sub_device_types.hpp | 103 +++++
 tt_metal/impl/trace/trace.cpp                 |   5 +-
 tt_metal/impl/trace/trace_buffer.hpp          |   5 +-
 tt_metal/tt_metal.cpp                         | 120 +++--
 36 files changed, 1961 insertions(+), 420 deletions(-)
 create mode 100644 tests/tt_metal/tt_metal/unit_tests_fast_dispatch/sub_device/kernels/incrementer.cpp
 create mode 100644 tests/tt_metal/tt_metal/unit_tests_fast_dispatch/sub_device/kernels/persistent_remote_waiter.cpp
 create mode 100644 tests/tt_metal/tt_metal/unit_tests_fast_dispatch/sub_device/kernels/persistent_waiter.cpp
 create mode 100644 tests/tt_metal/tt_metal/unit_tests_fast_dispatch/sub_device/kernels/syncer.cpp
 create mode 100644 tests/tt_metal/tt_metal/unit_tests_fast_dispatch/sub_device/test_sub_device.cpp
 create mode 100644 tt_metal/impl/sub_device/sub_device.cpp
 create mode 100644 tt_metal/impl/sub_device/sub_device.hpp
 create mode 100644 tt_metal/impl/sub_device/sub_device_manager.cpp
 create mode 100644 tt_metal/impl/sub_device/sub_device_manager.hpp
 create mode 100644 tt_metal/impl/sub_device/sub_device_types.hpp

diff --git a/docs/source/tt-metalium/tt_metal/apis/host_apis/buffers/CreateBuffer.rst b/docs/source/tt-metalium/tt_metal/apis/host_apis/buffers/CreateBuffer.rst
index d8799309801..a68628cf8cc 100644
--- a/docs/source/tt-metalium/tt_metal/apis/host_apis/buffers/CreateBuffer.rst
+++ b/docs/source/tt-metalium/tt_metal/apis/host_apis/buffers/CreateBuffer.rst
@@ -1,5 +1,9 @@
 CreateBuffer
 =================
 
-.. doxygenfunction:: tt::tt_metal::v0::CreateBuffer(const InterleavedBufferConfig &config, std::optional<DeviceAddr> address, std::optional<uint32_t> sub_device_id);
-.. doxygenfunction:: tt::tt_metal::v0::CreateBuffer(const ShardedBufferConfig &config, std::optional<DeviceAddr> address, std::optional<uint32_t> sub_device_id);
+.. doxygenfunction:: tt::tt_metal::v0::CreateBuffer(const InterleavedBufferConfig &config);
+.. doxygenfunction:: tt::tt_metal::v0::CreateBuffer(const ShardedBufferConfig &config);
+.. doxygenfunction:: tt::tt_metal::v0::CreateBuffer(const InterleavedBufferConfig &config, DeviceAddr address);
+.. doxygenfunction:: tt::tt_metal::v0::CreateBuffer(const ShardedBufferConfig &config, DeviceAddr address);
+.. doxygenfunction:: tt::tt_metal::v0::CreateBuffer(const InterleavedBufferConfig &config, SubDeviceId sub_device_id);
+.. doxygenfunction:: tt::tt_metal::v0::CreateBuffer(const ShardedBufferConfig &config, SubDeviceId sub_device_id);
diff --git a/docs/source/tt-metalium/tt_metal/apis/host_apis/command_queue/EnqueueReadBuffer.rst b/docs/source/tt-metalium/tt_metal/apis/host_apis/command_queue/EnqueueReadBuffer.rst
index 037f50995d5..db9688926ec 100644
--- a/docs/source/tt-metalium/tt_metal/apis/host_apis/command_queue/EnqueueReadBuffer.rst
+++ b/docs/source/tt-metalium/tt_metal/apis/host_apis/command_queue/EnqueueReadBuffer.rst
@@ -1,5 +1,5 @@
 EnqueueReadBuffer
 ==================
 
-.. doxygenfunction:: tt::tt_metal::v0::EnqueueReadBuffer(CommandQueue& cq, std::variant<std::reference_wrapper<Buffer>, std::shared_ptr<Buffer> > buffer, std::vector<uint32_t>& dst, bool blocking, tt::stl::Span<const uint32_t> sub_device_ids)
-.. doxygenfunction:: tt::tt_metal::v0::EnqueueReadBuffer(CommandQueue& cq, std::variant<std::reference_wrapper<Buffer>, std::shared_ptr<Buffer> > buffer, void * dst, bool blocking, tt::stl::Span<const uint32_t> sub_device_ids)
+.. doxygenfunction:: tt::tt_metal::v0::EnqueueReadBuffer(CommandQueue& cq, std::variant<std::reference_wrapper<Buffer>, std::shared_ptr<Buffer> > buffer, std::vector<uint32_t>& dst, bool blocking, tt::stl::Span<const SubDeviceId> sub_device_ids)
+.. doxygenfunction:: tt::tt_metal::v0::EnqueueReadBuffer(CommandQueue& cq, std::variant<std::reference_wrapper<Buffer>, std::shared_ptr<Buffer> > buffer, void * dst, bool blocking, tt::stl::Span<const SubDeviceId> sub_device_ids)
diff --git a/docs/source/tt-metalium/tt_metal/apis/host_apis/command_queue/EnqueueWriteBuffer.rst b/docs/source/tt-metalium/tt_metal/apis/host_apis/command_queue/EnqueueWriteBuffer.rst
index 85d61986364..e48e5f83014 100644
--- a/docs/source/tt-metalium/tt_metal/apis/host_apis/command_queue/EnqueueWriteBuffer.rst
+++ b/docs/source/tt-metalium/tt_metal/apis/host_apis/command_queue/EnqueueWriteBuffer.rst
@@ -1,5 +1,5 @@
 EnqueueWriteBuffer
 ==================
 
-.. doxygenfunction:: tt::tt_metal::v0::EnqueueWriteBuffer(CommandQueue& cq, std::variant<std::reference_wrapper<Buffer>, std::shared_ptr<Buffer> > buffer, std::vector<uint32_t>& src, bool blocking, tt::stl::Span<const uint32_t> sub_device_ids)
-.. doxygenfunction:: tt::tt_metal::v0::EnqueueWriteBuffer(CommandQueue& cq, std::variant<std::reference_wrapper<Buffer>, std::shared_ptr<Buffer> > buffer, HostDataType src, bool blocking, tt::stl::Span<const uint32_t> sub_device_ids)
+.. doxygenfunction:: tt::tt_metal::v0::EnqueueWriteBuffer(CommandQueue& cq, std::variant<std::reference_wrapper<Buffer>, std::shared_ptr<Buffer> > buffer, std::vector<uint32_t>& src, bool blocking, tt::stl::Span<const SubDeviceId> sub_device_ids)
+.. doxygenfunction:: tt::tt_metal::v0::EnqueueWriteBuffer(CommandQueue& cq, std::variant<std::reference_wrapper<Buffer>, std::shared_ptr<Buffer> > buffer, HostDataType src, bool blocking, tt::stl::Span<const SubDeviceId> sub_device_ids)
diff --git a/tests/scripts/test_moreh_microbenchmark.py b/tests/scripts/test_moreh_microbenchmark.py
index b77b44d007b..6076d6c034e 100755
--- a/tests/scripts/test_moreh_microbenchmark.py
+++ b/tests/scripts/test_moreh_microbenchmark.py
@@ -288,7 +288,7 @@ def run_dram_read_l1_write_cmd(k, n, num_blocks, df, num_banks, bank_start_id):
 
 
 def run_dram_read_remote_cb_sync_cmd(
-    k, n, num_blocks, cb_num_blocks, cb_padding, df, num_receivers, num_mixed_df_layers
+    k, n, num_blocks, cb_num_blocks, cb_padding, df, num_receivers, num_mixed_df_layers, use_sub_devices
 ):
     command = (
         "TT_METAL_DEVICE_PROFILER=1 ./build/test/tt_metal/perf_microbenchmark/10_dram_read_remote_cb_sync/test_dram_read_remote_cb "
@@ -310,12 +310,13 @@ def run_dram_read_remote_cb_sync_cmd(
         + str(num_receivers)
         + " --num-mixed-df-layers "
         + str(num_mixed_df_layers)
+        + (" --use-sub-devices " if use_sub_devices else "")
     )
     run_moreh_single_test("DRAM read remote CB sync single-core ", command)
 
 
 def run_remote_cb_sync_matmul_single_core_cmd(
-    m, k, n, num_blocks, cb_num_blocks, cb_padding, df, num_receivers, num_layers
+    m, k, n, num_blocks, cb_num_blocks, cb_padding, df, num_receivers, num_layers, use_sub_devices
 ):
     command = (
         "TT_METAL_DEVICE_PROFILER=1 ./build/test/tt_metal/perf_microbenchmark/11_remote_cb_sync_matmul_single_core/test_remote_cb_sync_matmul "
@@ -339,6 +340,7 @@ def run_remote_cb_sync_matmul_single_core_cmd(
         + str(num_receivers)
         + " --num-layers "
         + str(num_layers)
+        + (" --use-sub-devices " if use_sub_devices else "")
     )
     run_moreh_single_test("DRAM read remote CB sync single-core ", command)
 
@@ -866,8 +868,22 @@ def test_dram_read_l1_write_core(
         ("wormhole_b0", "Matmul", np.array([32, 2048, 128]), 1, 8, 10, 256, 1, 2, 15),
     ],
 )
+@pytest.mark.parametrize(
+    "use_sub_devices",
+    [False, True],
+)
 def test_dram_read_remote_cb_sync(
-    arch, test, test_vector, num_tests, nblock, cb_nblock, cb_padding, data_format, num_receivers, num_mixed_df_layers
+    arch,
+    test,
+    test_vector,
+    num_tests,
+    nblock,
+    cb_nblock,
+    cb_padding,
+    data_format,
+    num_receivers,
+    num_mixed_df_layers,
+    use_sub_devices,
 ):
     data = []
     cycle_list = []
@@ -893,12 +909,12 @@ def test_dram_read_remote_cb_sync(
                 else:
                     input_size += k * n * 2048 // 1024
             run_dram_read_remote_cb_sync_cmd(
-                k, n, nblock, cb_nblock, cb_padding, data_format, num_receivers, num_mixed_df_layers
+                k, n, nblock, cb_nblock, cb_padding, data_format, num_receivers, num_mixed_df_layers, use_sub_devices
             )
         elif test == "Matmul":
             input_size = input_size * num_mixed_df_layers
             run_remote_cb_sync_matmul_single_core_cmd(
-                m, k, n, nblock, cb_nblock, cb_padding, data_format, num_receivers, num_mixed_df_layers
+                m, k, n, nblock, cb_nblock, cb_padding, data_format, num_receivers, num_mixed_df_layers, use_sub_devices
             )
         cycle = profile_results_kernel_duration()
         time = cycle / get_device_freq() / 1000.0 / 1000.0
diff --git a/tests/tt_metal/tt_metal/perf_microbenchmark/10_dram_read_remote_cb_sync/kernels/receiver_l1.cpp b/tests/tt_metal/tt_metal/perf_microbenchmark/10_dram_read_remote_cb_sync/kernels/receiver_l1.cpp
index b5ad0fcfeb4..260deef8925 100644
--- a/tests/tt_metal/tt_metal/perf_microbenchmark/10_dram_read_remote_cb_sync/kernels/receiver_l1.cpp
+++ b/tests/tt_metal/tt_metal/perf_microbenchmark/10_dram_read_remote_cb_sync/kernels/receiver_l1.cpp
@@ -14,6 +14,7 @@ constexpr uint32_t cb_start_addr = get_compile_time_arg_val(0);
 constexpr uint32_t cb_rd_ptr = get_compile_time_arg_val(0);
 constexpr uint32_t cb_size = get_compile_time_arg_val(1);
 constexpr uint32_t num_layers = get_compile_time_arg_val(2);
+constexpr bool global_sems = get_compile_time_arg_val(3);
 
 uint32_t rt_args_idx = 0;
 uint32_t vc;
@@ -64,8 +65,14 @@ FORCE_INLINE void setup_remote_receiver_cb_interface() {
 
     remote_cb_interface.fifo_start_addr = cb_start_addr;
 
-    remote_cb_interface.pages_acked = reinterpret_cast<volatile tt_l1_ptr uint32_t*>(get_semaphore(pages_acked_semaphore_addr));
-    remote_cb_interface.pages_sent = reinterpret_cast<volatile tt_l1_ptr uint32_t*>(get_semaphore(pages_sent_semaphore_addr));
+    // Global semaphores return an actual address instead of an index
+    if constexpr (global_sems) {
+        remote_cb_interface.pages_acked = reinterpret_cast<volatile tt_l1_ptr uint32_t*>(pages_acked_semaphore_addr);
+        remote_cb_interface.pages_sent = reinterpret_cast<volatile tt_l1_ptr uint32_t*>(pages_sent_semaphore_addr);
+    } else {
+        remote_cb_interface.pages_acked = reinterpret_cast<volatile tt_l1_ptr uint32_t*>(get_semaphore(pages_acked_semaphore_addr));
+        remote_cb_interface.pages_sent = reinterpret_cast<volatile tt_l1_ptr uint32_t*>(get_semaphore(pages_sent_semaphore_addr));
+    }
 
     remote_cb_interface.aligned_page_size = aligned_page_size;
 }
diff --git a/tests/tt_metal/tt_metal/perf_microbenchmark/10_dram_read_remote_cb_sync/test_dram_read_remote_cb.cpp b/tests/tt_metal/tt_metal/perf_microbenchmark/10_dram_read_remote_cb_sync/test_dram_read_remote_cb.cpp
index f161368dc22..f914d3ca87b 100644
--- a/tests/tt_metal/tt_metal/perf_microbenchmark/10_dram_read_remote_cb_sync/test_dram_read_remote_cb.cpp
+++ b/tests/tt_metal/tt_metal/perf_microbenchmark/10_dram_read_remote_cb_sync/test_dram_read_remote_cb.cpp
@@ -17,6 +17,7 @@
 #include "tt_metal/detail/tt_metal.hpp"
 #include "tt_metal/detail/util.hpp"
 #include "tt_metal/host_api.hpp"
+#include "tt_metal/impl/buffers/global_semaphore.hpp"
 #include "tt_metal/tt_metal/perf_microbenchmark/common/util.hpp"
 #include "tt_metal/common/work_split.hpp"
 #include "tests/tt_metal/test_utils/tilization.hpp"
@@ -77,7 +78,7 @@ void get_max_page_size_and_num_pages(uint32_t num_tiles, uint32_t num_datums_per
     num_pages = total_size / page_size;
 }
 
-std::tuple<tt_metal::Program, tt_metal::KernelHandle, uint32_t> create_program(
+std::tuple<std::vector<tt_metal::Program>, tt_metal::KernelHandle, uint32_t, std::vector<std::unique_ptr<tt_metal::GlobalSemaphore>>> create_programs(
     tt_metal::Device *device,
     const CoreRangeSet &dram_reader_core,
     const CoreRangeSet &l1_receiver_cores,
@@ -91,12 +92,19 @@ std::tuple<tt_metal::Program, tt_metal::KernelHandle, uint32_t> create_program(
     uint32_t num_mixed_df_layers,
     uint32_t cb_padding,
     std::shared_ptr<tt::tt_metal::Buffer> input_buffer,
-    std::shared_ptr<tt::tt_metal::Buffer> output_buffer
+    std::shared_ptr<tt::tt_metal::Buffer> output_buffer,
+    bool use_sub_devices
     ) {
 
     log_info("created program");
+    std::vector<tt_metal::Program> programs;
+    programs.push_back(tt_metal::Program());
 
-    tt_metal::Program program = tt_metal::Program();
+    if (use_sub_devices) {
+        programs.push_back(tt_metal::Program());
+    }
+    auto& sender_program = programs[0];
+    auto& receiver_program = use_sub_devices ? programs[1] : programs[0];
 
     auto all_cores = dram_reader_core.merge(l1_receiver_cores);
 
@@ -127,7 +135,7 @@ std::tuple<tt_metal::Program, tt_metal::KernelHandle, uint32_t> create_program(
     tt_metal::CircularBufferConfig reader_cb_config =
         tt_metal::CircularBufferConfig(reader_cb_size, {{reader_cb_index, tile_format}})
             .set_page_size(reader_cb_index, single_tile_size);
-    auto reader_cb = tt_metal::CreateCircularBuffer(program, dram_reader_core, reader_cb_config);
+    auto reader_cb = tt_metal::CreateCircularBuffer(sender_program, dram_reader_core, reader_cb_config);
 
     // mixed cb dataformat
     uint32_t next_layer_num_blocks = num_blocks * 2;
@@ -156,7 +164,7 @@ std::tuple<tt_metal::Program, tt_metal::KernelHandle, uint32_t> create_program(
     tt_metal::CircularBufferConfig receiver_cb_config =
         tt_metal::CircularBufferConfig(receiver_cb_size, {{receiver_cb_index, tile_format}})
             .set_page_size(receiver_cb_index, receiver_page_size).set_globally_allocated_address(*output_buffer);
-    auto receiver_cb = tt_metal::CreateCircularBuffer(program, l1_receiver_cores, receiver_cb_config);
+    auto receiver_cb = tt_metal::CreateCircularBuffer(receiver_program, l1_receiver_cores, receiver_cb_config);
 
     log_info("reader_cb_size: {}", reader_cb_size);
     log_info("receiver_cb_size: {}", receiver_cb_size);
@@ -164,9 +172,21 @@ std::tuple<tt_metal::Program, tt_metal::KernelHandle, uint32_t> create_program(
     // semaphore
     std::vector<uint32_t> pages_acked_semaphore_ids(num_receivers);
     std::vector<uint32_t> pages_sent_semaphore_ids(num_receivers);
-    for (uint32_t i=0; i < num_receivers; ++i) {
-        pages_acked_semaphore_ids[i] = tt_metal::CreateSemaphore(program, all_cores, INVALID);
-        pages_sent_semaphore_ids[i] = tt_metal::CreateSemaphore(program, all_cores, INVALID);
+    std::vector<std::unique_ptr<GlobalSemaphore>> global_sems;
+    // Global semaphores use an actual address instead of an index
+    if (use_sub_devices) {
+        global_sems.reserve(num_receivers * 2);
+        for (uint32_t i=0; i < num_receivers; ++i) {
+            global_sems.push_back(tt_metal::CreateGlobalSemaphore(device, all_cores, INVALID));
+            pages_acked_semaphore_ids[i] = global_sems.back()->address();
+            global_sems.push_back(tt_metal::CreateGlobalSemaphore(device, all_cores, INVALID));
+            pages_sent_semaphore_ids[i] =  global_sems.back()->address();
+        }
+    } else {
+        for (uint32_t i=0; i < num_receivers; ++i) {
+            pages_acked_semaphore_ids[i] = tt_metal::CreateSemaphore(sender_program, all_cores, INVALID);
+            pages_sent_semaphore_ids[i] = tt_metal::CreateSemaphore(sender_program, all_cores, INVALID);
+        }
     }
 
     std::vector<uint32_t> reader_compile_time_args = {
@@ -177,7 +197,7 @@ std::tuple<tt_metal::Program, tt_metal::KernelHandle, uint32_t> create_program(
     };
 
     auto reader_kernel = tt_metal::CreateKernel(
-        program,
+        sender_program,
         "tests/tt_metal/tt_metal/perf_microbenchmark/common/kernels/reader_dram.cpp",
         dram_reader_core,
         tt_metal::DataMovementConfig{
@@ -191,11 +211,12 @@ std::tuple<tt_metal::Program, tt_metal::KernelHandle, uint32_t> create_program(
         (std::uint32_t) receiver_cb_addr,
         (std::uint32_t) receiver_cb_size,
         (std::uint32_t) num_receivers,
-        (std::uint32_t) num_mixed_df_layers
+        (std::uint32_t) num_mixed_df_layers,
+        (std::uint32_t) use_sub_devices
     };
 
     auto writer_kernel = tt_metal::CreateKernel(
-        program,
+        sender_program,
         "tests/tt_metal/tt_metal/perf_microbenchmark/common/kernels/writer_l1.cpp",
         dram_reader_core,
         tt_metal::DataMovementConfig{
@@ -208,10 +229,11 @@ std::tuple<tt_metal::Program, tt_metal::KernelHandle, uint32_t> create_program(
         (std::uint32_t) reader_cb_addr,
         (std::uint32_t) receiver_cb_size,
         (std::uint32_t) num_mixed_df_layers,
+        (std::uint32_t) use_sub_devices
     };
 
     auto receiver_kernel = tt_metal::CreateKernel(
-        program,
+        receiver_program,
         "tests/tt_metal/tt_metal/perf_microbenchmark/10_dram_read_remote_cb_sync/kernels/receiver_l1.cpp",
         l1_receiver_cores,
         tt_metal::DataMovementConfig{
@@ -241,7 +263,7 @@ std::tuple<tt_metal::Program, tt_metal::KernelHandle, uint32_t> create_program(
     for (uint32_t i = 0; i < num_mixed_df_layers; ++i) {
         reader_rt_args.push_back(i%2 == 0 ? block_num_tiles : next_layer_block_num_tiles);
     }
-    tt_metal::SetRuntimeArgs(program, reader_kernel, dram_reader_core_coord, reader_rt_args);
+    tt_metal::SetRuntimeArgs(sender_program, reader_kernel, dram_reader_core_coord, reader_rt_args);
 
     // writer rt
     std::vector<CoreCoord> l1_receiver_core_coords;
@@ -281,7 +303,7 @@ std::tuple<tt_metal::Program, tt_metal::KernelHandle, uint32_t> create_program(
     for (uint32_t i = 0; i < num_mixed_df_layers; ++i) {
         writer_rt_args.push_back(i%2 == 0 ? num_tile_rows_write : next_layer_num_tile_rows_write);
     }
-    tt_metal::SetRuntimeArgs(program, writer_kernel, dram_reader_core_coord, writer_rt_args);
+    tt_metal::SetRuntimeArgs(sender_program, writer_kernel, dram_reader_core_coord, writer_rt_args);
 
     // reciever rt
     for (uint32_t i=0; i < num_receivers; ++i) {
@@ -307,10 +329,10 @@ std::tuple<tt_metal::Program, tt_metal::KernelHandle, uint32_t> create_program(
 
         log_info("l1_receiver_core_coords: {}", l1_receiver_core_coords[i]);
 
-        tt_metal::SetRuntimeArgs(program, receiver_kernel, l1_receiver_core_coords[i], receiver_rt_args);
+        tt_metal::SetRuntimeArgs(receiver_program, receiver_kernel, l1_receiver_core_coords[i], receiver_rt_args);
     }
 
-    return {std::move(program), reader_kernel, reader_cb_addr};
+    return {std::move(programs), reader_kernel, reader_cb_addr, std::move(global_sems)};
 }
 
 float to_float(bfloat16 bfloat16_num) {
@@ -618,6 +640,7 @@ int main(int argc, char **argv) {
     uint32_t num_receivers = 1;
     uint32_t num_mixed_df_layers = 1;
     uint64_t k = 8192, n = 128;
+    bool use_sub_devices = false;
 
     try {
         ////////////////////////////////////////////////////////////////////////////
@@ -645,6 +668,8 @@ int main(int argc, char **argv) {
                 test_args::get_command_option_uint64_and_remaining_args(input_args, "--num-receivers", 1);
             std::tie(num_mixed_df_layers, input_args) =
                 test_args::get_command_option_uint64_and_remaining_args(input_args, "--num-mixed-df-layers", 1);
+            std::tie(use_sub_devices, input_args) =
+                test_args::has_command_option_and_remaining_args(input_args, "--use-sub-devices");
 
 
             test_args::validate_remaining_args(input_args);
@@ -718,7 +743,12 @@ int main(int argc, char **argv) {
             l1_receiver_core_coord_range = CoreRange{CoreCoord{1, 0}, CoreCoord{num_receivers, 0}};
         }
         CoreRangeSet l1_receiver_core{std::set<CoreRange>{l1_receiver_core_coord_range}};
-
+        if (use_sub_devices) {
+            SubDevice sender_sub_device = SubDevice(std::array{dram_reader_core});
+            SubDevice receiver_sub_device = SubDevice(std::array{l1_receiver_core});
+            SubDeviceManagerId sdm_id = device->create_sub_device_manager({sender_sub_device, receiver_sub_device}, 0);
+            device->load_sub_device_manager(sdm_id);
+        }
         ////////////////////////////////////////////////////////////////////////////
         //                      Input Setup
         ////////////////////////////////////////////////////////////////////////////
@@ -771,18 +801,24 @@ int main(int argc, char **argv) {
         ////////////////////////////////////////////////////////////////////////////
         //                      Application Setup
         ////////////////////////////////////////////////////////////////////////////
-        auto [program, kernel, output_cb_addr] = create_program(device, dram_reader_core, l1_receiver_core, single_tile_size, tile_format, k, n, num_blocks, cb_num_blocks, num_receivers, num_mixed_df_layers, cb_padding, input_buffers[0], output_buffer);
+        auto [programs, kernel, output_cb_addr, global_sems] = create_programs(device, dram_reader_core, l1_receiver_core, single_tile_size, tile_format, k, n, num_blocks, cb_num_blocks, num_receivers, num_mixed_df_layers, cb_padding, input_buffers[0], output_buffer, use_sub_devices);
 
         ////////////////////////////////////////////////////////////////////////////
         //                      Execution Application
         ////////////////////////////////////////////////////////////////////////////
-        tt_metal::detail::CompileProgram(device, program);
+        for (auto& program : programs) {
+            tt_metal::detail::CompileProgram(device, program);
+        }
 
         log_info(LogTest, "Num tests {}", num_tests);
         for (uint32_t i = 0; i < num_tests; ++i) {
-            EnqueueProgram(device->command_queue(), program, false);
+            for (auto& program : programs) {
+                EnqueueProgram(device->command_queue(), program, false);
+            }
             Finish(device->command_queue());
-            tt_metal::DumpDeviceProfileResults(device, program);
+            for (auto& program : programs) {
+                tt_metal::DumpDeviceProfileResults(device, program);
+            }
         }
 
         ////////////////////////////////////////////////////////////////////////////
diff --git a/tests/tt_metal/tt_metal/perf_microbenchmark/11_remote_cb_sync_matmul_single_core/kernels/receiver_l1.cpp b/tests/tt_metal/tt_metal/perf_microbenchmark/11_remote_cb_sync_matmul_single_core/kernels/receiver_l1.cpp
index 76f4b805fe0..b1b62772b8b 100644
--- a/tests/tt_metal/tt_metal/perf_microbenchmark/11_remote_cb_sync_matmul_single_core/kernels/receiver_l1.cpp
+++ b/tests/tt_metal/tt_metal/perf_microbenchmark/11_remote_cb_sync_matmul_single_core/kernels/receiver_l1.cpp
@@ -13,6 +13,7 @@ constexpr uint32_t cb_start_addr = get_compile_time_arg_val(0);
 constexpr uint32_t cb_rd_ptr = get_compile_time_arg_val(0);
 constexpr uint32_t cb_size = get_compile_time_arg_val(1);
 constexpr uint32_t num_layers = get_compile_time_arg_val(2);
+constexpr bool global_sems = get_compile_time_arg_val(3);
 
 uint32_t rt_args_idx = 0;
 uint32_t vc;
@@ -63,8 +64,13 @@ FORCE_INLINE void setup_remote_receiver_cb_interface() {
 
     remote_cb_interface.fifo_start_addr = cb_start_addr;
 
-    remote_cb_interface.pages_acked = reinterpret_cast<volatile tt_l1_ptr uint32_t*>(get_semaphore(pages_acked_semaphore_addr));
-    remote_cb_interface.pages_sent = reinterpret_cast<volatile tt_l1_ptr uint32_t*>(get_semaphore(pages_sent_semaphore_addr));
+    if constexpr (global_sems) {
+        remote_cb_interface.pages_acked = reinterpret_cast<volatile tt_l1_ptr uint32_t*>(pages_acked_semaphore_addr);
+        remote_cb_interface.pages_sent = reinterpret_cast<volatile tt_l1_ptr uint32_t*>(pages_sent_semaphore_addr);
+    } else {
+        remote_cb_interface.pages_acked = reinterpret_cast<volatile tt_l1_ptr uint32_t*>(get_semaphore(pages_acked_semaphore_addr));
+        remote_cb_interface.pages_sent = reinterpret_cast<volatile tt_l1_ptr uint32_t*>(get_semaphore(pages_sent_semaphore_addr));
+    }
 
     remote_cb_interface.aligned_page_size = aligned_page_size;
 }
diff --git a/tests/tt_metal/tt_metal/perf_microbenchmark/11_remote_cb_sync_matmul_single_core/test_remote_cb_sync_matmul.cpp b/tests/tt_metal/tt_metal/perf_microbenchmark/11_remote_cb_sync_matmul_single_core/test_remote_cb_sync_matmul.cpp
index 324c7058096..7af8eb29d35 100644
--- a/tests/tt_metal/tt_metal/perf_microbenchmark/11_remote_cb_sync_matmul_single_core/test_remote_cb_sync_matmul.cpp
+++ b/tests/tt_metal/tt_metal/perf_microbenchmark/11_remote_cb_sync_matmul_single_core/test_remote_cb_sync_matmul.cpp
@@ -17,6 +17,7 @@
 #include "tt_metal/detail/tt_metal.hpp"
 #include "tt_metal/detail/util.hpp"
 #include "tt_metal/host_api.hpp"
+#include "tt_metal/impl/buffers/global_semaphore.hpp"
 #include "tt_metal/tt_metal/perf_microbenchmark/common/util.hpp"
 #include "tt_metal/common/work_split.hpp"
 #include "tests/tt_metal/test_utils/tilization.hpp"
@@ -92,7 +93,7 @@ std::tuple<uint32_t, uint32_t> get_out_subblock_params(uint32_t per_core_Mt, uin
     return {1, 1};
 }
 
-tt_metal::Program create_program(
+std::tuple<std::vector<tt_metal::Program>, std::vector<std::unique_ptr<tt_metal::GlobalSemaphore>>> create_programs(
     tt_metal::Device *device,
     const CoreRangeSet &dram_reader_core,
     const CoreRangeSet &l1_receiver_cores,
@@ -109,12 +110,20 @@ tt_metal::Program create_program(
     std::shared_ptr<tt::tt_metal::Buffer> in0_buffer,
     std::shared_ptr<tt::tt_metal::Buffer> in1_buffer,
     std::shared_ptr<tt::tt_metal::Buffer> in1_l1_buffer,
-    std::shared_ptr<tt::tt_metal::Buffer> output_buffer
+    std::shared_ptr<tt::tt_metal::Buffer> output_buffer,
+    bool use_sub_devices
     ) {
 
     log_info("created program");
 
-    tt_metal::Program program = tt_metal::Program();
+    std::vector<tt_metal::Program> programs;
+    programs.push_back(tt_metal::Program());
+
+    if (use_sub_devices) {
+        programs.push_back(tt_metal::Program());
+    }
+    auto& sender_program = programs[0];
+    auto& receiver_program = use_sub_devices ? programs[1] : programs[0];
 
     auto all_cores = dram_reader_core.merge(l1_receiver_cores);
 
@@ -146,7 +155,7 @@ tt_metal::Program create_program(
     tt_metal::CircularBufferConfig in1_reader_cb_config =
         tt_metal::CircularBufferConfig(in1_reader_cb_size, {{in1_reader_cb_index, tile_format}})
             .set_page_size(in1_reader_cb_index, single_tile_size);
-    auto in1_reader_cb = tt_metal::CreateCircularBuffer(program, dram_reader_core, in1_reader_cb_config);
+    auto in1_reader_cb = tt_metal::CreateCircularBuffer(sender_program, dram_reader_core, in1_reader_cb_config);
 
     // in0 reader CB
     uint32_t in0_reader_cb_index = 0;
@@ -155,7 +164,7 @@ tt_metal::Program create_program(
     tt_metal::CircularBufferConfig in0_reader_cb_config =
         tt_metal::CircularBufferConfig(in0_reader_cb_size, {{in0_reader_cb_index, tile_format}})
             .set_page_size(in0_reader_cb_index, single_tile_size).set_globally_allocated_address(*in0_buffer);
-    auto in0_reader_cb = tt_metal::CreateCircularBuffer(program, l1_receiver_cores, in0_reader_cb_config);
+    auto in0_reader_cb = tt_metal::CreateCircularBuffer(receiver_program, l1_receiver_cores, in0_reader_cb_config);
 
     // in1 receiver CB
     uint32_t in1_receiver_cb_index = 1;
@@ -164,7 +173,7 @@ tt_metal::Program create_program(
     tt_metal::CircularBufferConfig in1_receiver_cb_config =
         tt_metal::CircularBufferConfig(in1_receiver_cb_size, {{in1_receiver_cb_index, tile_format}})
             .set_page_size(in1_receiver_cb_index, single_tile_size).set_globally_allocated_address(*in1_l1_buffer);
-    auto in1_receiver_cb = tt_metal::CreateCircularBuffer(program, l1_receiver_cores, in1_receiver_cb_config);
+    auto in1_receiver_cb = tt_metal::CreateCircularBuffer(receiver_program, l1_receiver_cores, in1_receiver_cb_config);
 
     // output CB
     uint32_t output_cb_index = 16;
@@ -173,7 +182,7 @@ tt_metal::Program create_program(
     tt_metal::CircularBufferConfig output_cb_config =
         tt_metal::CircularBufferConfig(output_cb_size, {{output_cb_index, tile_format}})
             .set_page_size(output_cb_index, single_tile_size).set_globally_allocated_address(*output_buffer);
-    auto output_cb = tt_metal::CreateCircularBuffer(program, l1_receiver_cores, output_cb_config);
+    auto output_cb = tt_metal::CreateCircularBuffer(receiver_program, l1_receiver_cores, output_cb_config);
 
     // sync CB
     uint32_t sync_cb_index = 2;
@@ -181,7 +190,7 @@ tt_metal::Program create_program(
     tt_metal::CircularBufferConfig sync_cb_config =
         tt_metal::CircularBufferConfig(sync_cb_size, {{sync_cb_index, tile_format}})
             .set_page_size(sync_cb_index, sync_cb_size);
-    auto sync_cb = tt_metal::CreateCircularBuffer(program, l1_receiver_cores, sync_cb_config);
+    auto sync_cb = tt_metal::CreateCircularBuffer(receiver_program, l1_receiver_cores, sync_cb_config);
 
     log_info("in1_reader_cb_size: {}", in1_reader_cb_size);
     log_info("in1_receiver_cb_size: {}", in1_receiver_cb_size);
@@ -189,9 +198,21 @@ tt_metal::Program create_program(
     // semaphore
     std::vector<uint32_t> pages_acked_semaphore_ids(num_receivers);
     std::vector<uint32_t> pages_sent_semaphore_ids(num_receivers);
-    for (uint32_t i=0; i < num_receivers; ++i) {
-        pages_acked_semaphore_ids[i] = tt_metal::CreateSemaphore(program, all_cores, INVALID);
-        pages_sent_semaphore_ids[i] = tt_metal::CreateSemaphore(program, all_cores, INVALID);
+    std::vector<std::unique_ptr<GlobalSemaphore>> global_sems;
+    // Global semaphores use an actual address instead of an index
+    if (use_sub_devices) {
+        global_sems.reserve(num_receivers * 2);
+        for (uint32_t i=0; i < num_receivers; ++i) {
+            global_sems.push_back(tt_metal::CreateGlobalSemaphore(device, all_cores, INVALID));
+            pages_acked_semaphore_ids[i] = global_sems.back()->address();
+            global_sems.push_back(tt_metal::CreateGlobalSemaphore(device, all_cores, INVALID));
+            pages_sent_semaphore_ids[i] =  global_sems.back()->address();
+        }
+    } else {
+        for (uint32_t i=0; i < num_receivers; ++i) {
+            pages_acked_semaphore_ids[i] = tt_metal::CreateSemaphore(sender_program, all_cores, INVALID);
+            pages_sent_semaphore_ids[i] = tt_metal::CreateSemaphore(sender_program, all_cores, INVALID);
+        }
     }
 
     // in1 reader
@@ -203,7 +224,7 @@ tt_metal::Program create_program(
     };
 
     auto in1_reader_kernel = tt_metal::CreateKernel(
-        program,
+        sender_program,
         "tests/tt_metal/tt_metal/perf_microbenchmark/common/kernels/reader_dram.cpp",
         dram_reader_core,
         tt_metal::DataMovementConfig{
@@ -218,11 +239,12 @@ tt_metal::Program create_program(
         (std::uint32_t) in1_receiver_cb_addr,
         (std::uint32_t) in1_receiver_cb_size,
         (std::uint32_t) num_receivers,
-        (std::uint32_t) num_layers
+        (std::uint32_t) num_layers,
+        (std::uint32_t) use_sub_devices
     };
 
     auto in1_writer_kernel = tt_metal::CreateKernel(
-        program,
+        sender_program,
         "tests/tt_metal/tt_metal/perf_microbenchmark/common/kernels/writer_l1.cpp",
         dram_reader_core,
         tt_metal::DataMovementConfig{
@@ -237,7 +259,7 @@ tt_metal::Program create_program(
     };
 
     auto in0_reader_kernel = tt_metal::CreateKernel(
-        program,
+        receiver_program,
         "tests/tt_metal/tt_metal/perf_microbenchmark/11_remote_cb_sync_matmul_single_core/kernels/in0_reader.cpp",
         l1_receiver_cores,
         tt_metal::DataMovementConfig{
@@ -250,10 +272,11 @@ tt_metal::Program create_program(
         (std::uint32_t) in1_receiver_cb_addr,
         (std::uint32_t) in1_receiver_cb_size + cb_padding,
         (std::uint32_t) num_layers,
+        (std::uint32_t) use_sub_devices
     };
 
     auto in1_receiver_kernel = tt_metal::CreateKernel(
-        program,
+        receiver_program,
         "tests/tt_metal/tt_metal/perf_microbenchmark/11_remote_cb_sync_matmul_single_core/kernels/receiver_l1.cpp",
         l1_receiver_cores,
         tt_metal::DataMovementConfig{
@@ -278,7 +301,7 @@ tt_metal::Program create_program(
     };
 
     auto compute_kernel = tt_metal::CreateKernel(
-        program,
+        receiver_program,
         "tests/tt_metal/tt_metal/perf_microbenchmark/11_remote_cb_sync_matmul_single_core/kernels/bmm_large_block_zm_fused_bias_activation_copy.cpp",
         l1_receiver_cores,
         tt_metal::ComputeConfig{
@@ -309,7 +332,7 @@ tt_metal::Program create_program(
     for (uint32_t i = 0; i < num_layers; ++i) {
         reader_rt_args.push_back(in1_block_num_tiles);
     }
-    tt_metal::SetRuntimeArgs(program, in1_reader_kernel, dram_reader_core_coord, reader_rt_args);
+    tt_metal::SetRuntimeArgs(sender_program, in1_reader_kernel, dram_reader_core_coord, reader_rt_args);
 
     // in1 writer rt
     std::vector<CoreCoord> l1_receiver_core_coords;
@@ -349,7 +372,7 @@ tt_metal::Program create_program(
     for (uint32_t i = 0; i < num_layers; ++i) {
         writer_rt_args.push_back(in1_num_tile_rows_write);
     }
-    tt_metal::SetRuntimeArgs(program, in1_writer_kernel, dram_reader_core_coord, writer_rt_args);
+    tt_metal::SetRuntimeArgs(sender_program, in1_writer_kernel, dram_reader_core_coord, writer_rt_args);
 
     // in1 reciever rt
     for (uint32_t i=0; i < num_receivers; ++i) {
@@ -375,7 +398,7 @@ tt_metal::Program create_program(
 
         log_info("l1_receiver_core_coords: {}", l1_receiver_core_coords[i]);
 
-        tt_metal::SetRuntimeArgs(program, in1_receiver_kernel, l1_receiver_core_coords[i], receiver_rt_args);
+        tt_metal::SetRuntimeArgs(receiver_program, in1_receiver_kernel, l1_receiver_core_coords[i], receiver_rt_args);
     }
 
     // in0 reader
@@ -390,10 +413,10 @@ tt_metal::Program create_program(
         for (uint32_t i = 0; i < num_layers; ++i) {
             in0_reader_rt_args.push_back(out_block_num_tiles);
         }
-        tt_metal::SetRuntimeArgs(program, in0_reader_kernel, l1_receiver_core_coords[i], in0_reader_rt_args);
+        tt_metal::SetRuntimeArgs(receiver_program, in0_reader_kernel, l1_receiver_core_coords[i], in0_reader_rt_args);
     }
 
-    return std::move(program);
+    return {std::move(programs), std::move(global_sems)};
 }
 
 float to_float(bfloat16 bfloat16_num) {
@@ -597,6 +620,7 @@ int main(int argc, char **argv) {
     uint32_t num_receivers = 1;
     uint32_t num_layers = 1;
     uint64_t m = 32, k = 8192, n = 128;
+    bool use_sub_devices = false;
 
     try {
         ////////////////////////////////////////////////////////////////////////////
@@ -626,6 +650,8 @@ int main(int argc, char **argv) {
                 test_args::get_command_option_uint64_and_remaining_args(input_args, "--num-receivers", 1);
             std::tie(num_layers, input_args) =
                 test_args::get_command_option_uint64_and_remaining_args(input_args, "--num-layers", 1);
+            std::tie(use_sub_devices, input_args) =
+                test_args::has_command_option_and_remaining_args(input_args, "--use-sub-devices");
 
 
             test_args::validate_remaining_args(input_args);
@@ -699,6 +725,12 @@ int main(int argc, char **argv) {
             l1_receiver_core_coord_range = CoreRange{CoreCoord{1, 0}, CoreCoord{num_receivers, 0}};
         }
         CoreRangeSet l1_receiver_core{std::set<CoreRange>{l1_receiver_core_coord_range}};
+        if (use_sub_devices) {
+            SubDevice sender_sub_device = SubDevice(std::array{dram_reader_core});
+            SubDevice receiver_sub_device = SubDevice(std::array{l1_receiver_core});
+            SubDeviceManagerId sdm_id = device->create_sub_device_manager({sender_sub_device, receiver_sub_device}, 0);
+            device->load_sub_device_manager(sdm_id);
+        }
 
         ////////////////////////////////////////////////////////////////////////////
         //                      Input Setup
@@ -765,18 +797,24 @@ int main(int argc, char **argv) {
         ////////////////////////////////////////////////////////////////////////////
         //                      Application Setup
         ////////////////////////////////////////////////////////////////////////////
-        auto program = create_program(device, dram_reader_core, l1_receiver_core, single_tile_size, tile_format, m, k, n, num_blocks, cb_num_blocks, num_receivers, num_layers, cb_padding, in0_buffer, in1_buffers[0], in1_l1_buffer, output_buffer);
+        auto [programs, global_sems] = create_programs(device, dram_reader_core, l1_receiver_core, single_tile_size, tile_format, m, k, n, num_blocks, cb_num_blocks, num_receivers, num_layers, cb_padding, in0_buffer, in1_buffers[0], in1_l1_buffer, output_buffer, use_sub_devices);
 
         ////////////////////////////////////////////////////////////////////////////
         //                      Execution Application
         ////////////////////////////////////////////////////////////////////////////
-        tt_metal::detail::CompileProgram(device, program);
+        for (auto& program : programs) {
+            tt_metal::detail::CompileProgram(device, program);
+        }
 
         log_info(LogTest, "Num tests {}", num_tests);
         for (uint32_t i = 0; i < num_tests; ++i) {
-            EnqueueProgram(device->command_queue(), program, false);
+            for (auto& program : programs) {
+                EnqueueProgram(device->command_queue(), program, false);
+            }
             Finish(device->command_queue());
-            tt_metal::DumpDeviceProfileResults(device, program);
+            for (auto& program : programs) {
+                tt_metal::DumpDeviceProfileResults(device, program);
+            }
         }
 
         ////////////////////////////////////////////////////////////////////////////
diff --git a/tests/tt_metal/tt_metal/perf_microbenchmark/common/kernels/writer_l1.cpp b/tests/tt_metal/tt_metal/perf_microbenchmark/common/kernels/writer_l1.cpp
index 8f07c6b5add..fd136b8979b 100644
--- a/tests/tt_metal/tt_metal/perf_microbenchmark/common/kernels/writer_l1.cpp
+++ b/tests/tt_metal/tt_metal/perf_microbenchmark/common/kernels/writer_l1.cpp
@@ -17,6 +17,7 @@ constexpr uint32_t cb_wr_ptr = get_compile_time_arg_val(1);
 constexpr uint32_t cb_size = get_compile_time_arg_val(2);
 constexpr uint32_t num_receivers = get_compile_time_arg_val(3);
 constexpr uint32_t num_layers = get_compile_time_arg_val(4);
+constexpr bool global_sems = get_compile_time_arg_val(5);
 
 tt_l1_ptr uint32_t* noc_x;
 tt_l1_ptr uint32_t* noc_y;
@@ -74,8 +75,14 @@ FORCE_INLINE void setup_remote_sender_cb_interface() {
     remote_cb_interface.num_receivers = num_receivers;
 
     for (uint32_t i=0; i < num_receivers; ++i) {
-        remote_cb_interface.pages_acked[i] = reinterpret_cast<volatile tt_l1_ptr uint32_t*>(get_semaphore(pages_acked_semaphore_addr[i]));
-        remote_cb_interface.pages_sent[i] = reinterpret_cast<volatile tt_l1_ptr uint32_t*>(get_semaphore(pages_sent_semaphore_addr[i]));
+        // Global semaphores return an actual address instead of an index
+        if constexpr (global_sems) {
+            remote_cb_interface.pages_acked[i] = reinterpret_cast<volatile tt_l1_ptr uint32_t*>(pages_acked_semaphore_addr[i]);
+            remote_cb_interface.pages_sent[i] = reinterpret_cast<volatile tt_l1_ptr uint32_t*>(pages_sent_semaphore_addr[i]);
+        } else {
+            remote_cb_interface.pages_acked[i] = reinterpret_cast<volatile tt_l1_ptr uint32_t*>(get_semaphore(pages_acked_semaphore_addr[i]));
+            remote_cb_interface.pages_sent[i] = reinterpret_cast<volatile tt_l1_ptr uint32_t*>(get_semaphore(pages_sent_semaphore_addr[i]));
+        }
     }
 
     remote_cb_interface.aligned_page_size = aligned_page_size;
diff --git a/tests/tt_metal/tt_metal/unit_tests_fast_dispatch/CMakeLists.txt b/tests/tt_metal/tt_metal/unit_tests_fast_dispatch/CMakeLists.txt
index cdb9e144c43..30a552af009 100644
--- a/tests/tt_metal/tt_metal/unit_tests_fast_dispatch/CMakeLists.txt
+++ b/tests/tt_metal/tt_metal/unit_tests_fast_dispatch/CMakeLists.txt
@@ -12,6 +12,7 @@ set(UNIT_TESTS_FD_SRC
     ${CMAKE_CURRENT_SOURCE_DIR}/multichip/test_eth_ring_gather_EnqueueProgram.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/pipelining/basic_pipeline.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/streams/test_autonomous_relay_streams.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/sub_device/test_sub_device.cpp
 )
 
 add_executable(
diff --git a/tests/tt_metal/tt_metal/unit_tests_fast_dispatch/sub_device/kernels/incrementer.cpp b/tests/tt_metal/tt_metal/unit_tests_fast_dispatch/sub_device/kernels/incrementer.cpp
new file mode 100644
index 00000000000..6653d79a1ec
--- /dev/null
+++ b/tests/tt_metal/tt_metal/unit_tests_fast_dispatch/sub_device/kernels/incrementer.cpp
@@ -0,0 +1,17 @@
+// SPDX-FileCopyrightText: © 2024 Tenstorrent Inc.
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#include <stdint.h>
+
+#include "dataflow_api.h"
+
+void kernel_main() {
+    uint32_t sem_addr = get_arg_val<uint32_t>(0);
+    uint32_t waiter_core_x = get_arg_val<uint32_t>(1);
+    uint32_t waiter_core_y = get_arg_val<uint32_t>(2);
+
+    uint64_t noc_remote_sem_addr = get_noc_addr(waiter_core_x, waiter_core_y, sem_addr);
+    noc_semaphore_inc(noc_remote_sem_addr, 1);
+    noc_async_atomic_barrier();
+}
diff --git a/tests/tt_metal/tt_metal/unit_tests_fast_dispatch/sub_device/kernels/persistent_remote_waiter.cpp b/tests/tt_metal/tt_metal/unit_tests_fast_dispatch/sub_device/kernels/persistent_remote_waiter.cpp
new file mode 100644
index 00000000000..e684dcdcad0
--- /dev/null
+++ b/tests/tt_metal/tt_metal/unit_tests_fast_dispatch/sub_device/kernels/persistent_remote_waiter.cpp
@@ -0,0 +1,36 @@
+// SPDX-FileCopyrightText: © 2024 Tenstorrent Inc.
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#include <stdint.h>
+
+#include "dataflow_api.h"
+
+void kernel_main() {
+    uint32_t sem_addr = get_arg_val<uint32_t>(0);
+    uint32_t num_inc = get_arg_val<uint32_t>(1);
+    uint32_t send_sync_core_x = get_arg_val<uint32_t>(2);
+    uint32_t send_sync_core_y = get_arg_val<uint32_t>(3);
+    uint32_t recv_sync_core_x = get_arg_val<uint32_t>(4);
+    uint32_t recv_sync_core_y = get_arg_val<uint32_t>(5);
+    uint32_t local_read_addr = get_arg_val<uint32_t>(6);
+
+    volatile tt_l1_ptr uint32_t *local_read_ptr = reinterpret_cast<volatile tt_l1_ptr uint32_t*>(local_read_addr);
+
+    uint64_t noc_remote_send_sem_addr = get_noc_addr(send_sync_core_x, send_sync_core_y, sem_addr);
+    noc_semaphore_inc(noc_remote_send_sem_addr, 1);
+
+    uint64_t noc_remote_recv_sem_addr = get_noc_addr(recv_sync_core_x, recv_sync_core_y, sem_addr);
+    volatile tt_l1_ptr uint32_t* sem = reinterpret_cast<volatile tt_l1_ptr uint32_t*>(sem_addr);
+    uint32_t num_read = 0;
+    do {
+        noc_async_read(noc_remote_recv_sem_addr, local_read_addr, 4);
+        noc_async_read_barrier();
+        invalidate_l1_cache();
+        num_read = *local_read_ptr;
+    } while (num_read != num_inc);
+
+    noc_semaphore_inc(noc_remote_recv_sem_addr, -num_inc);
+    noc_async_atomic_barrier();
+
+}
diff --git a/tests/tt_metal/tt_metal/unit_tests_fast_dispatch/sub_device/kernels/persistent_waiter.cpp b/tests/tt_metal/tt_metal/unit_tests_fast_dispatch/sub_device/kernels/persistent_waiter.cpp
new file mode 100644
index 00000000000..6252d7eee23
--- /dev/null
+++ b/tests/tt_metal/tt_metal/unit_tests_fast_dispatch/sub_device/kernels/persistent_waiter.cpp
@@ -0,0 +1,24 @@
+// SPDX-FileCopyrightText: © 2024 Tenstorrent Inc.
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#include <stdint.h>
+
+#include "dataflow_api.h"
+
+void kernel_main() {
+    uint32_t sem_addr = get_arg_val<uint32_t>(0);
+    uint32_t num_inc = get_arg_val<uint32_t>(1);
+    uint32_t sync_core_x = get_arg_val<uint32_t>(2);
+    uint32_t sync_core_y = get_arg_val<uint32_t>(3);
+
+    uint64_t noc_remote_sem_addr = get_noc_addr(sync_core_x, sync_core_y, sem_addr);
+    noc_semaphore_inc(noc_remote_sem_addr, 1);
+
+    uint64_t noc_local_sem_addr = get_noc_addr(sem_addr);
+    volatile tt_l1_ptr uint32_t* sem = reinterpret_cast<volatile tt_l1_ptr uint32_t*>(sem_addr);
+    noc_semaphore_wait(sem, num_inc);
+    noc_semaphore_inc(noc_local_sem_addr, -num_inc);
+    noc_async_atomic_barrier();
+
+}
diff --git a/tests/tt_metal/tt_metal/unit_tests_fast_dispatch/sub_device/kernels/syncer.cpp b/tests/tt_metal/tt_metal/unit_tests_fast_dispatch/sub_device/kernels/syncer.cpp
new file mode 100644
index 00000000000..f91b44768cd
--- /dev/null
+++ b/tests/tt_metal/tt_metal/unit_tests_fast_dispatch/sub_device/kernels/syncer.cpp
@@ -0,0 +1,17 @@
+// SPDX-FileCopyrightText: © 2024 Tenstorrent Inc.
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#include <stdint.h>
+
+#include "dataflow_api.h"
+
+void kernel_main() {
+    uint32_t sem_addr = get_arg_val<uint32_t>(0);
+
+    volatile tt_l1_ptr uint32_t* sem = reinterpret_cast<volatile tt_l1_ptr uint32_t*>(sem_addr);
+    noc_semaphore_wait(sem, 1);
+    uint64_t noc_local_sem_addr = get_noc_addr(sem_addr);
+    noc_semaphore_inc(noc_local_sem_addr, -1);
+    noc_async_atomic_barrier();
+}
diff --git a/tests/tt_metal/tt_metal/unit_tests_fast_dispatch/sub_device/test_sub_device.cpp b/tests/tt_metal/tt_metal/unit_tests_fast_dispatch/sub_device/test_sub_device.cpp
new file mode 100644
index 00000000000..a54df488d7d
--- /dev/null
+++ b/tests/tt_metal/tt_metal/unit_tests_fast_dispatch/sub_device/test_sub_device.cpp
@@ -0,0 +1,430 @@
+// SPDX-FileCopyrightText: © 2024 Tenstorrent Inc.
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#include <cstddef>
+#include <cstdint>
+#include <array>
+#include <tuple>
+#include <vector>
+
+#include "command_queue_fixture.hpp"
+#include "gtest/gtest.h"
+#include "tt_metal/common/core_coord.hpp"
+#include "tt_metal/impl/buffers/global_semaphore.hpp"
+#include "tt_metal/impl/device/device.hpp"
+#include "tt_metal/impl/event/event.hpp"
+#include "tt_metal/impl/sub_device/sub_device.hpp"
+#include "tests/tt_metal/tt_metal/unit_tests_fast_dispatch/command_queue/command_queue_test_utils.hpp"
+
+using namespace tt::tt_metal;
+
+namespace basic_tests {
+
+std::tuple<Program, CoreCoord, std::unique_ptr<GlobalSemaphore>> create_single_sync_program(Device *device, SubDevice sub_device) {
+    auto syncer_coord = sub_device.cores(HalProgrammableCoreType::TENSIX).ranges().at(0).start_coord;
+    auto syncer_core = CoreRangeSet(CoreRange(syncer_coord, syncer_coord));
+    auto global_sem = CreateGlobalSemaphore(device, sub_device.cores(HalProgrammableCoreType::TENSIX), INVALID);
+
+    Program syncer_program = CreateProgram();
+    auto syncer_kernel = CreateKernel(
+        syncer_program,
+        "tests/tt_metal/tt_metal/unit_tests_fast_dispatch/sub_device/kernels/syncer.cpp",
+        syncer_core,
+        DataMovementConfig{
+            .processor = DataMovementProcessor::RISCV_0,
+            .noc = NOC::RISCV_0_default});
+    std::array<uint32_t, 1> syncer_rt_args = {global_sem->address()};
+    SetRuntimeArgs(syncer_program, syncer_kernel, syncer_core, syncer_rt_args);
+    return {std::move(syncer_program), std::move(syncer_coord), std::move(global_sem)};
+}
+
+std::tuple<Program, Program, Program, std::unique_ptr<GlobalSemaphore>> create_basic_sync_program(Device *device, const SubDevice& sub_device_1, const SubDevice& sub_device_2) {
+    auto waiter_coord = sub_device_2.cores(HalProgrammableCoreType::TENSIX).ranges().at(0).start_coord;
+    auto waiter_core = CoreRangeSet(CoreRange(waiter_coord, waiter_coord));
+    auto waiter_core_physical = device->worker_core_from_logical_core(waiter_coord);
+    auto incrementer_cores = sub_device_1.cores(HalProgrammableCoreType::TENSIX);
+    auto syncer_coord = incrementer_cores.ranges().back().end_coord;
+    auto syncer_core = CoreRangeSet(CoreRange(syncer_coord, syncer_coord));
+    auto syncer_core_physical = device->worker_core_from_logical_core(syncer_coord);
+    auto all_cores = waiter_core.merge(incrementer_cores).merge(syncer_core);
+    auto global_sem = CreateGlobalSemaphore(device, all_cores, INVALID);
+
+    Program waiter_program = CreateProgram();
+    auto waiter_kernel = CreateKernel(
+        waiter_program,
+        "tests/tt_metal/tt_metal/unit_tests_fast_dispatch/sub_device/kernels/persistent_waiter.cpp",
+        waiter_core,
+        DataMovementConfig{
+            .processor = DataMovementProcessor::RISCV_0,
+            .noc = NOC::RISCV_0_default});
+    std::array<uint32_t, 4> waiter_rt_args = {global_sem->address(), incrementer_cores.num_cores(), syncer_core_physical.x, syncer_core_physical.y};
+    SetRuntimeArgs(waiter_program, waiter_kernel, waiter_core, waiter_rt_args);
+
+    Program syncer_program = CreateProgram();
+    auto syncer_kernel = CreateKernel(
+        syncer_program,
+        "tests/tt_metal/tt_metal/unit_tests_fast_dispatch/sub_device/kernels/syncer.cpp",
+        syncer_core,
+        DataMovementConfig{
+            .processor = DataMovementProcessor::RISCV_0,
+            .noc = NOC::RISCV_0_default});
+    std::array<uint32_t, 1> syncer_rt_args = {global_sem->address()};
+    SetRuntimeArgs(syncer_program, syncer_kernel, syncer_core, syncer_rt_args);
+
+    Program incrementer_program = CreateProgram();
+    auto incrementer_kernel = CreateKernel(
+        incrementer_program,
+        "tests/tt_metal/tt_metal/unit_tests_fast_dispatch/sub_device/kernels/incrementer.cpp",
+        incrementer_cores,
+        DataMovementConfig{
+            .processor = DataMovementProcessor::RISCV_1,
+            .noc = NOC::RISCV_1_default});
+    std::array<uint32_t, 3> incrementer_rt_args = {global_sem->address(), waiter_core_physical.x, waiter_core_physical.y};
+    SetRuntimeArgs(incrementer_program, incrementer_kernel, incrementer_cores, incrementer_rt_args);
+    return {std::move(waiter_program), std::move(syncer_program), std::move(incrementer_program), std::move(global_sem)};
+}
+
+std::tuple<Program, Program, Program, std::unique_ptr<GlobalSemaphore>> create_basic_eth_sync_program(Device *device, const SubDevice& sub_device_1, const SubDevice& sub_device_2) {
+    auto waiter_coord = sub_device_2.cores(HalProgrammableCoreType::ACTIVE_ETH).ranges().at(0).start_coord;
+    auto waiter_core = CoreRangeSet(CoreRange(waiter_coord, waiter_coord));
+    auto waiter_core_physical = device->ethernet_core_from_logical_core(waiter_coord);
+    auto tensix_waiter_coord = sub_device_2.cores(HalProgrammableCoreType::TENSIX).ranges().at(0).start_coord;
+    auto tensix_waiter_core = CoreRangeSet(CoreRange(tensix_waiter_coord, tensix_waiter_coord));
+    auto tensix_waiter_core_physical = device->worker_core_from_logical_core(tensix_waiter_coord);
+    auto incrementer_cores = sub_device_1.cores(HalProgrammableCoreType::TENSIX);
+    auto syncer_coord = incrementer_cores.ranges().back().end_coord;
+    auto syncer_core = CoreRangeSet(CoreRange(syncer_coord, syncer_coord));
+    auto syncer_core_physical = device->worker_core_from_logical_core(syncer_coord);
+    auto all_cores = tensix_waiter_core.merge(incrementer_cores).merge(syncer_core);
+    auto global_sem = CreateGlobalSemaphore(device, all_cores, INVALID);
+
+    Program waiter_program = CreateProgram();
+    auto waiter_kernel = CreateKernel(
+        waiter_program,
+        "tests/tt_metal/tt_metal/unit_tests_fast_dispatch/sub_device/kernels/persistent_remote_waiter.cpp",
+        waiter_core,
+        tt_metal::EthernetConfig{
+            .noc = NOC::RISCV_0_default,
+            .processor = DataMovementProcessor::RISCV_0});
+    std::array<uint32_t, 7> waiter_rt_args = {global_sem->address(), incrementer_cores.num_cores(), syncer_core_physical.x, syncer_core_physical.y, tensix_waiter_core_physical.x, tensix_waiter_core_physical.y, eth_l1_mem::address_map::ERISC_L1_UNRESERVED_BASE};
+    SetRuntimeArgs(waiter_program, waiter_kernel, waiter_core, waiter_rt_args);
+
+    Program syncer_program = CreateProgram();
+    auto syncer_kernel = CreateKernel(
+        syncer_program,
+        "tests/tt_metal/tt_metal/unit_tests_fast_dispatch/sub_device/kernels/syncer.cpp",
+        syncer_core,
+        DataMovementConfig{
+            .processor = DataMovementProcessor::RISCV_0,
+            .noc = NOC::RISCV_0_default});
+    std::array<uint32_t, 1> syncer_rt_args = {global_sem->address()};
+    SetRuntimeArgs(syncer_program, syncer_kernel, syncer_core, syncer_rt_args);
+
+    Program incrementer_program = CreateProgram();
+    auto incrementer_kernel = CreateKernel(
+        incrementer_program,
+        "tests/tt_metal/tt_metal/unit_tests_fast_dispatch/sub_device/kernels/incrementer.cpp",
+        incrementer_cores,
+        DataMovementConfig{
+            .processor = DataMovementProcessor::RISCV_1,
+            .noc = NOC::RISCV_1_default});
+    std::array<uint32_t, 3> incrementer_rt_args = {global_sem->address(), tensix_waiter_core_physical.x, tensix_waiter_core_physical.y};
+    SetRuntimeArgs(incrementer_program, incrementer_kernel, incrementer_cores, incrementer_rt_args);
+    return {std::move(waiter_program), std::move(syncer_program), std::move(incrementer_program), std::move(global_sem)};
+}
+
+TEST_F(CommandQueueSingleCardFixture, TestSubDeviceAllocations) {
+    uint32_t local_l1_size = 3200;
+    SubDevice sub_device_1(std::array{CoreRangeSet(CoreRange({0, 0}, {2, 2}))});
+    SubDevice sub_device_2(std::array{CoreRangeSet(std::vector{CoreRange({3, 3}, {3, 3}), CoreRange({4, 4}, {4, 4})})});
+    CoreRangeSet sharded_cores_1 = CoreRange({0, 0}, {2, 2});
+    CoreRangeSet sharded_cores_2 = CoreRange({4, 4}, {4, 4});
+
+    auto sharded_cores_1_vec = corerange_to_cores(sharded_cores_1, std::nullopt, true);
+    auto sharded_cores_2_vec = corerange_to_cores(sharded_cores_2, std::nullopt, true);
+
+    ShardSpecBuffer shard_spec_buffer_1 = ShardSpecBuffer(sharded_cores_1, {1, 1}, ShardOrientation::ROW_MAJOR, false, {1, 1}, {sharded_cores_1.num_cores(), 1});
+    uint32_t page_size_1 = 32;
+    ShardedBufferConfig shard_config_1 = {nullptr, sharded_cores_1.num_cores() * page_size_1, page_size_1, BufferType::L1, TensorMemoryLayout::HEIGHT_SHARDED, shard_spec_buffer_1};
+    auto input_1 = tt::test_utils::generate_uniform_random_vector<uint32_t>(0, 100, shard_config_1.size / sizeof(uint32_t));
+
+    ShardSpecBuffer shard_spec_buffer_2 = ShardSpecBuffer(sharded_cores_2, {1, 1}, ShardOrientation::ROW_MAJOR, false, {1, 1}, {sharded_cores_2.num_cores(), 1});
+    uint32_t page_size_2 = 64;
+    ShardedBufferConfig shard_config_2 = {nullptr, sharded_cores_2.num_cores() * page_size_2, page_size_2, BufferType::L1, TensorMemoryLayout::HEIGHT_SHARDED, shard_spec_buffer_2};
+    auto input_2 = tt::test_utils::generate_uniform_random_vector<uint32_t>(0, 100, shard_config_2.size / sizeof(uint32_t));
+
+    uint32_t page_size_3 = 1024;
+    InterleavedBufferConfig interleaved_config = {nullptr, page_size_3, page_size_3, BufferType::L1, TensorMemoryLayout::INTERLEAVED};
+    auto input_3 = tt::test_utils::generate_uniform_random_vector<uint32_t>(0, 100, interleaved_config.size / sizeof(uint32_t));
+
+    for (Device *device : devices_) {
+        auto sub_device_manager_1 = device->create_sub_device_manager({sub_device_1}, local_l1_size);
+        auto sub_device_manager_2 = device->create_sub_device_manager({sub_device_1, sub_device_2}, local_l1_size);
+        DeviceAddr l1_unreserved_base = device->get_base_allocator_addr(HalMemType::L1);
+        DeviceAddr max_addr = l1_unreserved_base + local_l1_size;
+
+        shard_config_1.device = device;
+        shard_config_2.device = device;
+        interleaved_config.device = device;
+
+        std::vector<CoreCoord> physical_cores_1;
+        physical_cores_1.reserve(sharded_cores_1_vec.size());
+        for (const auto& core : sharded_cores_1_vec) {
+            physical_cores_1.push_back(device->worker_core_from_logical_core(core));
+        }
+
+        std::vector<CoreCoord> physical_cores_2;
+        physical_cores_2.reserve(sharded_cores_2_vec.size());
+        for (const auto& core : sharded_cores_2_vec) {
+            physical_cores_2.push_back(device->worker_core_from_logical_core(core));
+        }
+
+        device->load_sub_device_manager(sub_device_manager_1);
+
+        auto buffer_1 = CreateBuffer(shard_config_1, SubDeviceId{0});
+        EXPECT_EQ(buffer_1->address(), max_addr - page_size_1);
+        EnqueueWriteBuffer(device->command_queue(), buffer_1, input_1, false);
+        std::vector<uint32_t> output_1;
+        EnqueueReadBuffer(device->command_queue(), buffer_1, output_1, true);
+        EXPECT_EQ(input_1, output_1);
+        auto input_1_it = input_1.begin();
+        for (const auto& physical_core : physical_cores_1) {
+            auto readback = tt::llrt::read_hex_vec_from_core(
+                device->id(), physical_core, buffer_1->address(), page_size_1);
+            EXPECT_TRUE(std::equal(input_1_it, input_1_it + page_size_1 / sizeof(uint32_t), readback.begin()));
+            input_1_it += page_size_1 / sizeof(uint32_t);
+        }
+
+        auto buffer_2 = CreateBuffer(interleaved_config);
+
+        DeallocateBuffer(*buffer_1);
+        device->clear_loaded_sub_device_manager();
+        device->load_sub_device_manager(sub_device_manager_2);
+
+        auto buffer_3 = CreateBuffer(shard_config_2, SubDeviceId{1});
+        EXPECT_EQ(buffer_3->address(), max_addr - page_size_2);
+        EnqueueWriteBuffer(device->command_queue(), buffer_3, input_2, false);
+        std::vector<uint32_t> output_2;
+        EnqueueReadBuffer(device->command_queue(), buffer_3, output_2, true);
+        EXPECT_EQ(input_2, output_2);
+        auto input_2_it = input_2.begin();
+        for (const auto& physical_core : physical_cores_2) {
+            auto readback = tt::llrt::read_hex_vec_from_core(
+                device->id(), physical_core, buffer_3->address(), page_size_2);
+            EXPECT_TRUE(std::equal(input_2_it, input_2_it + page_size_2 / sizeof(uint32_t), readback.begin()));
+            input_2_it += page_size_2 / sizeof(uint32_t);
+        }
+
+        auto buffer_4 = CreateBuffer(shard_config_1,  SubDeviceId{0});
+        EXPECT_EQ(buffer_4->address(), max_addr - page_size_1);
+        EXPECT_THROW(CreateBuffer(interleaved_config, SubDeviceId{0}), std::exception);
+    }
+}
+
+TEST_F(CommandQueueSingleCardFixture, TestSubDeviceSynchronization) {
+    uint32_t local_l1_size = 3200;
+    SubDevice sub_device_1(std::array{CoreRangeSet(CoreRange({0, 0}, {2, 2}))});
+    SubDevice sub_device_2(std::array{CoreRangeSet(std::vector{CoreRange({3, 3}, {3, 3}), CoreRange({4, 4}, {4, 4})})});
+    CoreRangeSet sharded_cores_1 = CoreRange({0, 0}, {2, 2});
+
+    auto sharded_cores_1_vec = corerange_to_cores(sharded_cores_1, std::nullopt, true);
+
+    ShardSpecBuffer shard_spec_buffer_1 = ShardSpecBuffer(sharded_cores_1, {1, 1}, ShardOrientation::ROW_MAJOR, false, {1, 1}, {sharded_cores_1.num_cores(), 1});
+    uint32_t page_size_1 = 32;
+    ShardedBufferConfig shard_config_1 = {nullptr, sharded_cores_1.num_cores() * page_size_1, page_size_1, BufferType::L1, TensorMemoryLayout::HEIGHT_SHARDED, shard_spec_buffer_1};
+    auto input_1 = tt::test_utils::generate_uniform_random_vector<uint32_t>(0, 100, shard_config_1.size / sizeof(uint32_t));
+
+    std::array sub_device_ids_to_block = {SubDeviceId{0}};
+    for (Device *device : devices_) {
+        auto sub_device_manager = device->create_sub_device_manager({sub_device_1, sub_device_2}, local_l1_size);
+
+        shard_config_1.device = device;
+
+        std::vector<CoreCoord> physical_cores_1;
+        physical_cores_1.reserve(sharded_cores_1_vec.size());
+        for (const auto& core : sharded_cores_1_vec) {
+            physical_cores_1.push_back(device->worker_core_from_logical_core(core));
+        }
+
+        device->load_sub_device_manager(sub_device_manager);
+
+        auto [program, syncer_core, global_semaphore] = create_single_sync_program(device, sub_device_2);
+        EnqueueProgram(device->command_queue(), program, false);
+
+        auto buffer_1 = CreateBuffer(shard_config_1, sub_device_ids_to_block[0]);
+
+        // Test blocking synchronize doesn't stall
+        Synchronize(device, 0, sub_device_ids_to_block);
+
+        // Test blocking write buffer doesn't stall
+        EnqueueWriteBuffer(device->command_queue(), buffer_1, input_1, true, sub_device_ids_to_block);
+
+        // Test record event won't cause a stall
+        auto event = std::make_shared<Event>();
+        EnqueueRecordEvent(device->command_queue(), event, sub_device_ids_to_block);
+        Synchronize(device, 0, sub_device_ids_to_block);
+
+        // Test blocking read buffer doesn't stall
+        std::vector<uint32_t> output_1;
+        EnqueueReadBuffer(device->command_queue(), buffer_1, output_1, true, sub_device_ids_to_block);
+        EXPECT_EQ(input_1, output_1);
+        auto input_1_it = input_1.begin();
+        for (const auto& physical_core : physical_cores_1) {
+            auto readback = tt::llrt::read_hex_vec_from_core(
+                device->id(), physical_core, buffer_1->address(), page_size_1);
+            EXPECT_TRUE(std::equal(input_1_it, input_1_it + page_size_1 / sizeof(uint32_t), readback.begin()));
+            input_1_it += page_size_1 / sizeof(uint32_t);
+        }
+        auto sem_addr = global_semaphore->address();
+        auto physical_syncer_core = device->worker_core_from_logical_core(syncer_core);
+        tt::llrt::write_hex_vec_to_core(device->id(), physical_syncer_core, std::vector<uint32_t>{1}, sem_addr);
+
+        // Full synchronization
+        Synchronize(device);
+    }
+}
+
+TEST_F(CommandQueueSingleCardFixture, TestSubDeviceBasicPrograms) {
+    SubDevice sub_device_1(std::array{CoreRangeSet(CoreRange({0, 0}, {2, 2}))});
+    SubDevice sub_device_2(std::array{CoreRangeSet(std::vector{CoreRange({3, 3}, {3, 3}), CoreRange({4, 4}, {4, 4})})});
+    uint32_t num_iters = 5;
+    for (Device *device : devices_) {
+        auto sub_device_manager = device->create_sub_device_manager({sub_device_1, sub_device_2}, 3200);
+        device->load_sub_device_manager(sub_device_manager);
+
+        auto [waiter_program, syncer_program, incrementer_program, global_sem] = create_basic_sync_program(device, sub_device_1, sub_device_2);
+
+        for (uint32_t i = 0; i < num_iters; i++) {
+            EnqueueProgram(device->command_queue(), waiter_program, false);
+            // Test blocking on one sub-device
+            EnqueueProgram(device->command_queue(), syncer_program, true);
+            EnqueueProgram(device->command_queue(), incrementer_program, false);
+        }
+        Synchronize(device);
+    }
+}
+
+TEST_F(CommandQueueSingleCardFixture, TestSubDeviceBasicEthPrograms) {
+    SubDevice sub_device_1(std::array{CoreRangeSet(CoreRange({0, 0}, {2, 2}))});
+    uint32_t num_iters = 5;
+    for (Device *device : devices_) {
+        if (!does_device_have_active_eth_cores(device)) {
+            GTEST_SKIP() << "Skipping test because device " << device->id() << " does not have any active ethernet cores";
+        }
+        auto eth_core = *device->get_active_ethernet_cores(true).begin();
+        SubDevice sub_device_2(std::array{CoreRangeSet(std::vector{CoreRange({3, 3}, {3, 3}), CoreRange({4, 4}, {4, 4})}), CoreRangeSet(CoreRange(eth_core, eth_core))});
+        auto sub_device_manager = device->create_sub_device_manager({sub_device_1, sub_device_2}, 3200);
+        device->load_sub_device_manager(sub_device_manager);
+
+        auto [waiter_program, syncer_program, incrementer_program, global_sem] = create_basic_eth_sync_program(device, sub_device_1, sub_device_2);
+
+        for (uint32_t i = 0; i < num_iters; i++) {
+            EnqueueProgram(device->command_queue(), waiter_program, false);
+            // Test blocking on one sub-device
+            EnqueueProgram(device->command_queue(), syncer_program, true);
+            EnqueueProgram(device->command_queue(), incrementer_program, false);
+        }
+        Synchronize(device);
+    }
+}
+
+TEST_F(CommandQueueSingleCardTraceFixture, TestSubDeviceTraceBasicPrograms) {
+    SubDevice sub_device_1(std::array{CoreRangeSet(CoreRange({0, 0}, {2, 2}))});
+    SubDevice sub_device_2(std::array{CoreRangeSet(std::vector{CoreRange({3, 3}, {3, 3}), CoreRange({4, 4}, {4, 4})})});
+    uint32_t num_iters = 5;
+    for (Device *device : devices_) {
+        auto sub_device_manager = device->create_sub_device_manager({sub_device_1, sub_device_2}, 3200);
+        device->load_sub_device_manager(sub_device_manager);
+
+        auto [waiter_program, syncer_program, incrementer_program, global_sem] = create_basic_sync_program(device, sub_device_1, sub_device_2);
+
+        // Compile the programs
+        EnqueueProgram(device->command_queue(), waiter_program, false);
+        // Test blocking on one sub-device
+        EnqueueProgram(device->command_queue(), syncer_program, true);
+        EnqueueProgram(device->command_queue(), incrementer_program, false);
+        Synchronize(device);
+
+        // Capture the trace
+        auto tid_1 = BeginTraceCapture(device, device->command_queue().id());
+        EnqueueProgram(device->command_queue(), waiter_program, false);
+        EnqueueProgram(device->command_queue(), syncer_program, false);
+        EnqueueProgram(device->command_queue(), incrementer_program, false);
+        EndTraceCapture(device, device->command_queue().id(), tid_1);
+
+        auto tid_2 = BeginTraceCapture(device, device->command_queue().id());
+        EnqueueProgram(device->command_queue(), syncer_program, false);
+        EnqueueProgram(device->command_queue(), incrementer_program, false);
+        EndTraceCapture(device, device->command_queue().id(), tid_2);
+
+        for (uint32_t i = 0; i < num_iters; i++) {
+            // Regular program execution
+            EnqueueProgram(device->command_queue(), waiter_program, false);
+            // Test blocking on one sub-device
+            EnqueueProgram(device->command_queue(), syncer_program, true);
+            EnqueueProgram(device->command_queue(), incrementer_program, false);
+
+            // Full trace execution
+            ReplayTrace(device, device->command_queue().id(), tid_1, false);
+
+            // Partial trace execution
+            EnqueueProgram(device->command_queue(), waiter_program, false);
+            ReplayTrace(device, device->command_queue().id(), tid_2, false);
+        }
+        Synchronize(device);
+    }
+}
+
+TEST_F(CommandQueueSingleCardTraceFixture, TestSubDeviceTraceBasicEthPrograms) {
+    SubDevice sub_device_1(std::array{CoreRangeSet(CoreRange({0, 0}, {2, 2}))});
+    uint32_t num_iters = 5;
+    for (Device *device : devices_) {
+        if (!does_device_have_active_eth_cores(device)) {
+            GTEST_SKIP() << "Skipping test because device " << device->id() << " does not have any active ethernet cores";
+        }
+        auto eth_core = *device->get_active_ethernet_cores(true).begin();
+        SubDevice sub_device_2(std::array{CoreRangeSet(std::vector{CoreRange({3, 3}, {3, 3}), CoreRange({4, 4}, {4, 4})}), CoreRangeSet(CoreRange(eth_core, eth_core))});
+        auto sub_device_manager = device->create_sub_device_manager({sub_device_1, sub_device_2}, 3200);
+        device->load_sub_device_manager(sub_device_manager);
+
+        auto [waiter_program, syncer_program, incrementer_program, global_sem] = create_basic_eth_sync_program(device, sub_device_1, sub_device_2);
+
+        // Compile the programs
+        EnqueueProgram(device->command_queue(), waiter_program, false);
+        // Test blocking on one sub-device
+        EnqueueProgram(device->command_queue(), syncer_program, true);
+        EnqueueProgram(device->command_queue(), incrementer_program, false);
+        Synchronize(device);
+
+        // Capture the trace
+        auto tid_1 = BeginTraceCapture(device, device->command_queue().id());
+        EnqueueProgram(device->command_queue(), waiter_program, false);
+        EnqueueProgram(device->command_queue(), syncer_program, false);
+        EnqueueProgram(device->command_queue(), incrementer_program, false);
+        EndTraceCapture(device, device->command_queue().id(), tid_1);
+
+        auto tid_2 = BeginTraceCapture(device, device->command_queue().id());
+        EnqueueProgram(device->command_queue(), syncer_program, false);
+        EnqueueProgram(device->command_queue(), incrementer_program, false);
+        EndTraceCapture(device, device->command_queue().id(), tid_2);
+
+        for (uint32_t i = 0; i < num_iters; i++) {
+            // Regular program execution
+            EnqueueProgram(device->command_queue(), waiter_program, false);
+            // Test blocking on one sub-device
+            EnqueueProgram(device->command_queue(), syncer_program, true);
+            EnqueueProgram(device->command_queue(), incrementer_program, false);
+
+            // Full trace execution
+            ReplayTrace(device, device->command_queue().id(), tid_1, false);
+
+            // Partial trace execution
+            EnqueueProgram(device->command_queue(), waiter_program, false);
+            ReplayTrace(device, device->command_queue().id(), tid_2, false);
+        }
+        Synchronize(device);
+    }
+}
+
+}  // namespace basic_tests
diff --git a/tt_metal/common/core_coord.cpp b/tt_metal/common/core_coord.cpp
index 5bb8f597921..29e33fc1916 100644
--- a/tt_metal/common/core_coord.cpp
+++ b/tt_metal/common/core_coord.cpp
@@ -203,6 +203,8 @@ CoreRangeSet::CoreRangeSet(std::vector<CoreRange> &&core_ranges) : ranges_(std::
     this->validate_no_overlap();
 }
 
+bool CoreRangeSet::empty() const { return this->ranges_.empty(); }
+
 size_t CoreRangeSet::size() const { return ranges_.size(); }
 
 template <typename T>
@@ -294,6 +296,18 @@ bool CoreRangeSet::intersects(const CoreRangeSet &other) const {
     return false;
 }
 
+CoreRangeSet CoreRangeSet::intersection(const CoreRangeSet &other) const {
+    std::vector<CoreRange> intersection;
+    for (const auto& local_cr : this->ranges_) {
+        for (const auto& other_cr : other.ranges()) {
+            if (auto intersect = local_cr.intersection(other_cr); intersect.has_value()) {
+                intersection.push_back(*intersect);
+            }
+        }
+    }
+    return CoreRangeSet(std::move(intersection));
+}
+
 bool CoreRangeSet::contains(const CoreCoord &other) const {
     for (const auto &cr : this->ranges_) {
         if (cr.contains(other)) {
diff --git a/tt_metal/common/core_coord.hpp b/tt_metal/common/core_coord.hpp
index 7e13e87dab9..253e782972c 100644
--- a/tt_metal/common/core_coord.hpp
+++ b/tt_metal/common/core_coord.hpp
@@ -140,6 +140,8 @@ class CoreRangeSet {
 
     CoreRangeSet(std::vector<CoreRange> &&core_ranges);
 
+    bool empty() const;
+
     size_t size() const;
 
     template <typename T>
@@ -151,6 +153,8 @@ class CoreRangeSet {
 
     bool intersects(const CoreRangeSet &other) const;
 
+    CoreRangeSet intersection(const CoreRangeSet &other) const;
+
     bool contains(const CoreCoord &other) const;
 
     bool contains(const CoreRange &other) const;
diff --git a/tt_metal/host_api.hpp b/tt_metal/host_api.hpp
index 510868e6ad0..268d75291bf 100644
--- a/tt_metal/host_api.hpp
+++ b/tt_metal/host_api.hpp
@@ -10,6 +10,7 @@
 #include "tt_metal/impl/kernels/runtime_args_data.hpp"
 #include "tt_metal/impl/program/program.hpp"
 #include "tt_metal/impl/device/device.hpp"
+#include "tt_metal/impl/sub_device/sub_device_types.hpp"
 #include "tt_metal/tt_stl/span.hpp"
 
 /** @file */
@@ -281,6 +282,29 @@ std::unique_ptr<GlobalSemaphore> CreateGlobalSemaphore(
 std::unique_ptr<GlobalSemaphore> CreateGlobalSemaphore(
     Device *device, CoreRangeSet &&cores, uint32_t initial_value, BufferType buffer_type = BufferType::L1);
 
+/**
+*  Creates a pre-allocated interleaved DRAM or L1 buffer with the global allocator on device
+*
+*  Return value: std::shared_ptr<Buffer>
+*
+*  | Argument        | Description                                                       | Type                      | Valid Range | Required |
+*  |-----------------|------------------------------------------------------------------ |---------------------------|-------------|----------|
+*  | config          | Config for the buffer                                             | InterleavedBufferConfig   |             | Yes      |
+*/
+std::shared_ptr<Buffer> CreateBuffer(const InterleavedBufferConfig &config);
+
+/**
+*  Creates a pre-allocated interleaved DRAM or L1 buffer with the global allocator on device
+*
+*  Return value: std::shared_ptr<Buffer>
+*
+*  | Argument        | Description                                                       | Type                      | Valid Range | Required |
+*  |-----------------|------------------------------------------------------------------ |---------------------------|-------------|----------|
+*  | config          | Config for the buffer                                             | InterleavedBufferConfig   |             | Yes      |
+*  | address         | Device address of the buffer                                      | DeviceAddr                |             | No       |
+*/
+std::shared_ptr<Buffer> CreateBuffer(const InterleavedBufferConfig &config, DeviceAddr address);
+
 /**
 *  Creates a pre-allocated interleaved DRAM or L1 buffer on device
 *
@@ -289,11 +313,32 @@ std::unique_ptr<GlobalSemaphore> CreateGlobalSemaphore(
 *  | Argument        | Description                                                       | Type                      | Valid Range | Required |
 *  |-----------------|------------------------------------------------------------------ |---------------------------|-------------|----------|
 *  | config          | Config for the buffer                                             | InterleavedBufferConfig   |             | Yes      |
-*  | address         | Device address of the buffer. Default will calculate address      | std::optional<DeviceAddr> |             | No       |
-*  | sub_device_id   | The sub-device id to allocate on. Default is the global allocator | std::optional<uint32_t>   |             | No       |
+*  | sub_device_id   | The sub-device id to allocate on                                  | SubDeviceId               |             | No       |
+*/
+std::shared_ptr<Buffer> CreateBuffer(const InterleavedBufferConfig &config, SubDeviceId sub_device_id);
 
+/**
+*  Creates a pre-allocated sharded DRAM or L1 buffer with the global allocator on device
+*
+*  Return value: std::shared_ptr<Buffer>
+*
+*  | Argument        | Description                                                       | Type                      | Valid Range | Required |
+*  |-----------------|------------------------------------------------------------------ |---------------------------|-------------|----------|
+*  | config          | Config for the buffer                                             | ShardedBufferConfig       |             | Yes      |
+*/
+std::shared_ptr<Buffer> CreateBuffer(const ShardedBufferConfig &config);
+
+/**
+*  Creates a pre-allocated sharded DRAM or L1 buffer with the global allocator on device
+*
+*  Return value: std::shared_ptr<Buffer>
+*
+*  | Argument        | Description                                                       | Type                      | Valid Range | Required |
+*  |-----------------|------------------------------------------------------------------ |---------------------------|-------------|----------|
+*  | config          | Config for the buffer                                             | ShardedBufferConfig       |             | Yes      |
+*  | address         | Device address of the buffer                                      | DeviceAddr                |             | No       |
 */
-std::shared_ptr<Buffer> CreateBuffer(const InterleavedBufferConfig &config, std::optional<DeviceAddr> address = std::nullopt, std::optional<uint32_t> sub_device_id = std::nullopt);
+std::shared_ptr<Buffer> CreateBuffer(const ShardedBufferConfig &config, DeviceAddr address);
 
 /**
 *  Creates a pre-allocated sharded DRAM or L1 buffer on device
@@ -303,10 +348,9 @@ std::shared_ptr<Buffer> CreateBuffer(const InterleavedBufferConfig &config, std:
 *  | Argument        | Description                                                       | Type                      | Valid Range | Required |
 *  |-----------------|------------------------------------------------------------------ |---------------------------|-------------|----------|
 *  | config          | Config for the buffer                                             | ShardedBufferConfig       |             | Yes      |
-*  | address         | Device address of the buffer. Default will calculate address      | std::optional<DeviceAddr> |             | No       |
-*  | sub_device_id   | The sub-device id to allocate on. Default is the global allocator | std::optional<uint32_t>   |             | No       |
+*  | sub_device_id   | The sub-device id to allocate on                                  |                           |             | No       |
 */
-std::shared_ptr<Buffer> CreateBuffer(const ShardedBufferConfig &config, std::optional<DeviceAddr> address = std::nullopt, std::optional<uint32_t> sub_device_id = std::nullopt);
+std::shared_ptr<Buffer> CreateBuffer(const ShardedBufferConfig &config, SubDeviceId sub_device_id);
 
 /**
 *  Deallocates buffer from device by marking its memory as free.
@@ -479,7 +523,7 @@ void EnqueueReadBuffer(
     std::variant<std::reference_wrapper<Buffer>, std::shared_ptr<Buffer>> buffer,
     std::vector<uint32_t> &dst,
     bool blocking,
-    tt::stl::Span<const uint32_t> sub_device_ids = {});
+    tt::stl::Span<const SubDeviceId> sub_device_ids = {});
 
 /**
  * Reads a buffer from the device
@@ -499,7 +543,7 @@ void EnqueueReadBuffer(
     std::variant<std::reference_wrapper<Buffer>, std::shared_ptr<Buffer>> buffer,
     void *dst,
     bool blocking,
-    tt::stl::Span<const uint32_t> sub_device_ids = {});
+    tt::stl::Span<const SubDeviceId> sub_device_ids = {});
 
 /**
  * Writes a buffer to the device
@@ -520,7 +564,7 @@ void EnqueueWriteBuffer(
     std::variant<std::reference_wrapper<Buffer>, std::shared_ptr<Buffer>> buffer,
     std::vector<uint32_t> &src,
     bool blocking,
-    tt::stl::Span<const uint32_t> sub_device_ids = {});
+    tt::stl::Span<const SubDeviceId> sub_device_ids = {});
 
 /**
  * Writes a buffer to the device
@@ -540,7 +584,7 @@ void EnqueueWriteBuffer(
     std::variant<std::reference_wrapper<Buffer>, std::shared_ptr<Buffer>> buffer,
     HostDataType src,
     bool blocking,
-    tt::stl::Span<const uint32_t> sub_device_ids = {});
+    tt::stl::Span<const SubDeviceId> sub_device_ids = {});
 
 /**
  * Writes a program to the device and launches it
@@ -565,7 +609,7 @@ void EnqueueProgram(CommandQueue& cq, Program& program, bool blocking);
  * | cq             | The command queue object which dispatches the command to the hardware             | CommandQueue &                |                                    | Yes      |
  * | sub_device_ids | The sub-device ids to wait for completion on. If empty, waits for all sub-devices | tt::stl::Span<const uint32_t> |                                    | No       |
  */
-void Finish(CommandQueue &cq, tt::stl::Span<const uint32_t> sub_device_ids = {});
+void Finish(CommandQueue &cq, tt::stl::Span<const SubDeviceId> sub_device_ids = {});
 
 /**
  * Begins capture on a trace, when the trace is in capture mode all programs pushed into the trace queue will have their execution delayed until the trace is instantiated and enqueued.
@@ -662,7 +706,7 @@ void DumpDeviceProfileResults(Device *device, const Program &program);
  * | event          | An event that will be populated by this function, and inserted in CQ              | std::shared_ptr<Event>        |                                    | Yes      |
  * | sub_device_ids | The sub-device ids to wait for completion on. If empty, waits for all sub-devices | tt::stl::Span<const uint32_t> |                                    | No       |
  */
-void EnqueueRecordEvent(CommandQueue &cq, const std::shared_ptr<Event> &event, tt::stl::Span<const uint32_t> sub_device_ids = {});
+void EnqueueRecordEvent(CommandQueue &cq, const std::shared_ptr<Event> &event, tt::stl::Span<const SubDeviceId> sub_device_ids = {});
 
 /**
  * Enqueues a command on the device for a given CQ (non-blocking). The command on device will block and wait for completion of the specified event (which may be in another CQ).
@@ -706,7 +750,7 @@ bool EventQuery(const std::shared_ptr<Event> &event);
  * | cq_id          | The specific command queue id to synchronize  .                                   | uint8_t                       |                                    | No       |
  * | sub_device_ids | The sub-device ids to wait for completion on. If empty, waits for all sub-devices | tt::stl::Span<const uint32_t> |                                    | No       |
  */
-void Synchronize(Device *device, const std::optional<uint8_t> cq_id = std::nullopt, tt::stl::Span<const uint32_t> sub_device_ids = {});
+void Synchronize(Device *device, const std::optional<uint8_t> cq_id = std::nullopt, tt::stl::Span<const SubDeviceId> sub_device_ids = {});
 
 }  // namespace v0
 }  // namespace tt_metal
diff --git a/tt_metal/impl/CMakeLists.txt b/tt_metal/impl/CMakeLists.txt
index 1897305eb79..bd156e29c36 100644
--- a/tt_metal/impl/CMakeLists.txt
+++ b/tt_metal/impl/CMakeLists.txt
@@ -1,4 +1,6 @@
 set(IMPL_SRC
+    ${CMAKE_CURRENT_SOURCE_DIR}/sub_device/sub_device.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/sub_device/sub_device_manager.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/device/device.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/device/device_handle.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/device/device_pool.cpp
diff --git a/tt_metal/impl/allocator/allocator.cpp b/tt_metal/impl/allocator/allocator.cpp
index 03caa3c5224..2c210ec9f4a 100644
--- a/tt_metal/impl/allocator/allocator.cpp
+++ b/tt_metal/impl/allocator/allocator.cpp
@@ -503,7 +503,6 @@ void clear(Allocator &allocator) {
     allocator.l1_manager.clear();
     allocator.l1_small_manager.clear();
     allocator.trace_buffer_manager.clear();
-    allocator.allocated_buffers.clear();
 }
 
 }  // namespace allocator
diff --git a/tt_metal/impl/buffers/buffer.cpp b/tt_metal/impl/buffers/buffer.cpp
index 519945e7099..fdfa57a79a3 100644
--- a/tt_metal/impl/buffers/buffer.cpp
+++ b/tt_metal/impl/buffers/buffer.cpp
@@ -205,15 +205,16 @@ BufferPageMapping generate_buffer_page_mapping(const Buffer& buffer) {
     return buffer_page_mapping;
 }
 
-void validate_sub_device_id(std::optional<uint32_t> sub_device_id, Device *device, BufferType buffer_type, const std::optional<ShardSpecBuffer>& shard_parameters) {
+void validate_sub_device_id(std::optional<SubDeviceId> sub_device_id, Device *device, BufferType buffer_type, const std::optional<ShardSpecBuffer>& shard_parameters) {
     // No need to validate if we're using the global allocator or not sharding
     if (!sub_device_id.has_value()) {
         return;
     }
     TT_FATAL(shard_parameters.has_value(), "Specifying sub-device for buffer requires buffer to be sharded");
     TT_FATAL(is_l1(buffer_type), "Specifying sub-device for buffer requires buffer to be L1");
-    // TODO: Validate that cores used match the sub-device
-    TT_FATAL(*sub_device_id == 0, "Invalid sub-device id");
+    const auto &sub_device_cores = device->worker_cores(HalProgrammableCoreType::TENSIX, sub_device_id.value());
+    const auto &shard_cores = shard_parameters->grid();
+    TT_FATAL(sub_device_cores.contains(shard_cores), "Shard cores specified {} do not match sub-device cores {}", shard_cores, sub_device_cores);
 }
 
 Buffer::Buffer(
@@ -224,7 +225,7 @@ Buffer::Buffer(
     const TensorMemoryLayout buffer_layout,
     const std::optional<ShardSpecBuffer>& shard_parameters,
     const std::optional<bool> bottom_up,
-    const std::optional<uint32_t> sub_device_id,
+    const std::optional<SubDeviceId> sub_device_id,
     const bool owns_data,
     Private) :
     device_(device),
@@ -237,9 +238,13 @@ Buffer::Buffer(
     sub_device_id_(sub_device_id),
     owns_data_(owns_data),
     buffer_page_mapping_(nullptr) {
-    TT_FATAL(this->device_ != nullptr && this->device_->allocator_ != nullptr, "Device and allocator need to not be null.");
+    TT_FATAL(this->device_ != nullptr, "Device needs to not be null.");
     if (this->sub_device_id_.has_value()) {
         validate_sub_device_id(this->sub_device_id_, this->device_, buffer_type, shard_parameters);
+        this->sub_device_manager_id_ = this->device_->get_active_sub_device_manager_id();
+        this->allocator_ = device->get_initialized_allocator(*this->sub_device_id_).get();
+    } else {
+        this->allocator_ = device->get_initialized_allocator().get();
     }
     if (size != 0) {
         validate_buffer_size_and_page_size(size, page_size, buffer_type, buffer_layout, shard_parameters);
@@ -254,7 +259,7 @@ std::shared_ptr<Buffer> Buffer::create(
     const TensorMemoryLayout buffer_layout,
     const std::optional<ShardSpecBuffer>& shard_parameters,
     const std::optional<bool> bottom_up,
-    const std::optional<uint32_t> sub_device_id) {
+    const std::optional<SubDeviceId> sub_device_id) {
     auto* bufferPtr = new Buffer(device, size, page_size, buffer_type, buffer_layout, shard_parameters, bottom_up, sub_device_id, true /* owns data */, Private());
     // Using a custom deleter to properly clean up the owned datas
     auto buffer = std::shared_ptr<Buffer>(bufferPtr, deleter);
@@ -295,7 +300,7 @@ std::shared_ptr<Buffer> Buffer::create(
     const TensorMemoryLayout buffer_layout,
     const std::optional<ShardSpecBuffer>& shard_parameters,
     const std::optional<bool> bottom_up,
-    const std::optional<uint32_t> sub_device_id) {
+    const std::optional<SubDeviceId> sub_device_id) {
     // Not using a custom deleter, because it doesn't own any data to cleanup
     auto buffer = std::make_shared<Buffer>(device, size, page_size, buffer_type, buffer_layout, shard_parameters, bottom_up, sub_device_id, false /* owns data */, Private());
     buffer->weak_self = buffer;
@@ -410,12 +415,12 @@ bool Buffer::is_trace() const {
 
 uint32_t Buffer::dram_channel_from_bank_id(uint32_t bank_id) const {
     TT_FATAL(this->is_dram(), "Expected DRAM buffer!");
-    return this->device_->dram_channel_from_bank_id(bank_id, this->sub_device_id_);
+    return allocator::dram_channel_from_bank_id(*this->allocator_, bank_id);
 }
 
 CoreCoord Buffer::logical_core_from_bank_id(uint32_t bank_id) const {
     TT_FATAL(this->is_l1(), "Expected L1 buffer!");
-    return this->device_->logical_core_from_bank_id(bank_id, this->sub_device_id_);
+    return allocator::logical_core_from_bank_id(*this->allocator_, bank_id);
 }
 
 CoreCoord Buffer::noc_coordinates(uint32_t bank_id) const {
@@ -440,7 +445,7 @@ CoreCoord Buffer::noc_coordinates(uint32_t bank_id) const {
 CoreCoord Buffer::noc_coordinates() const { return this->noc_coordinates(0); }
 
 DeviceAddr Buffer::page_address(uint32_t bank_id, uint32_t page_index) const {
-    auto num_banks = this->device_->num_banks(this->buffer_type_, this->sub_device_id_);
+    uint32_t num_banks = allocator::num_banks(*this->allocator_, this->buffer_type_);
     TT_FATAL(bank_id < num_banks, "Invalid Bank ID: {} exceeds total numbers of banks ({})!", bank_id, num_banks);
     int pages_offset_within_bank = (int)page_index / num_banks;
     auto offset = (round_up(this->page_size(), this->alignment()) * pages_offset_within_bank);
@@ -448,8 +453,9 @@ DeviceAddr Buffer::page_address(uint32_t bank_id, uint32_t page_index) const {
 }
 
 uint32_t Buffer::alignment() const {
-    return this->device_->get_allocator_alignment(this->sub_device_id_);
+    return this->allocator_->config.alignment;
 }
+
 DeviceAddr Buffer::aligned_page_size() const {
     return align(page_size(), this->alignment());
 }
@@ -484,7 +490,8 @@ std::optional<uint32_t> Buffer::num_cores() const {
 }
 
 DeviceAddr Buffer::translate_page_address(uint64_t offset, uint32_t bank_id) const {
-    DeviceAddr base_page_address = this->address() + this->device_->bank_offset(this->buffer_type_, bank_id, this->sub_device_id_);
+    allocator::bank_offset(*this->allocator_, this->buffer_type_, bank_id);
+    DeviceAddr base_page_address = this->address() + allocator::bank_offset(*this->allocator_, this->buffer_type_, bank_id);
     return base_page_address + offset;
 }
 
diff --git a/tt_metal/impl/buffers/buffer.hpp b/tt_metal/impl/buffers/buffer.hpp
index b9bdd19e6ed..31c1e3b73d2 100644
--- a/tt_metal/impl/buffers/buffer.hpp
+++ b/tt_metal/impl/buffers/buffer.hpp
@@ -20,6 +20,7 @@
 #include "common/bfloat16.hpp"
 #include "common/core_coord.hpp"
 #include "tt_metal/impl/buffers/buffer_constants.hpp"
+#include "tt_metal/impl/sub_device/sub_device_types.hpp"
 #include "tt_metal/third_party/umd/device/tt_soc_descriptor.h"
 #include "third_party/umd/device/xy_pair.h"
 #include "tt_metal/tt_stl/concepts.hpp"
@@ -35,6 +36,8 @@ class Device;
 
 }  // namespace v0
 
+class Allocator;
+
 struct ShardSpec {
     /* The individual cores the shard grid is mapped to */
     CoreRangeSet grid;
@@ -157,7 +160,7 @@ class Buffer final {
         TensorMemoryLayout buffer_layout = TensorMemoryLayout::INTERLEAVED,
         const std::optional<ShardSpecBuffer>& shard_parameter = std::nullopt,
         std::optional<bool> bottom_up = std::nullopt,
-        std::optional<uint32_t> sub_device_id = std::nullopt);
+        std::optional<SubDeviceId> sub_device_id = std::nullopt);
     static std::shared_ptr<Buffer> create(
         Device *device,
         DeviceAddr address,
@@ -167,7 +170,7 @@ class Buffer final {
         TensorMemoryLayout buffer_layout = TensorMemoryLayout::INTERLEAVED,
         const std::optional<ShardSpecBuffer>& shard_parameter = std::nullopt,
         std::optional<bool> bottom_up = std::nullopt,
-        std::optional<uint32_t> sub_device_id = std::nullopt);
+        std::optional<SubDeviceId> sub_device_id = std::nullopt);
 
     Buffer(const Buffer &other) = delete;
     Buffer &operator=(const Buffer &other) = delete;
@@ -175,6 +178,7 @@ class Buffer final {
     Buffer &operator=(Buffer &&other) = delete;
 
     Device *device() const { return device_; }
+    Allocator *allocator() const { return allocator_; }
     DeviceAddr size() const { return size_; }
     bool is_allocated() const;
 
@@ -225,7 +229,8 @@ class Buffer final {
 
     const std::shared_ptr<const BufferPageMapping>& get_buffer_page_mapping();
 
-    std::optional<uint32_t> sub_device_id() const { return sub_device_id_; }
+    std::optional<SubDeviceId> sub_device_id() const { return sub_device_id_; }
+    std::optional<SubDeviceManagerId> sub_device_manager_id() const { return sub_device_manager_id_; }
 
     Buffer(
         Device *device,
@@ -235,7 +240,7 @@ class Buffer final {
         TensorMemoryLayout buffer_layout,
         const std::optional<ShardSpecBuffer>& shard_parameter,
         std::optional<bool> bottom_up,
-        std::optional<uint32_t> sub_device_id,
+        std::optional<SubDeviceId> sub_device_id,
         bool owns_data,
         Private);
 
@@ -260,9 +265,12 @@ class Buffer final {
     const BufferType buffer_type_;
     const TensorMemoryLayout buffer_layout_;
     const bool bottom_up_;
-    const std::optional<uint32_t> sub_device_id_;
+    const std::optional<SubDeviceId> sub_device_id_;
     const bool owns_data_;
 
+    std::optional<SubDeviceManagerId> sub_device_manager_id_;
+    Allocator * allocator_;
+
     std::atomic<AllocationStatus> allocation_status_ = AllocationStatus::ALLOCATION_REQUESTED;
     DeviceAddr address_ = 0;
     mutable std::mutex allocation_mutex_;
diff --git a/tt_metal/impl/device/device.cpp b/tt_metal/impl/device/device.cpp
index ee7ef233d98..3c80ccca4e5 100644
--- a/tt_metal/impl/device/device.cpp
+++ b/tt_metal/impl/device/device.cpp
@@ -4,6 +4,7 @@
 
 #include <string>
 #include <chrono>
+#include <type_traits>
 #include "tt_metal/host_api.hpp"
 #include "tt_metal/jit_build/genfiles.hpp"
 #include "tt_metal/impl/device/device.hpp"
@@ -23,6 +24,9 @@
 #include "tt_metal/detail/persistent_kernel_cache.hpp"
 #include "tt_metal/tools/profiler/tt_metal_tracy.hpp"
 #include "llrt/hal.hpp"
+#include "tt_metal/impl/sub_device/sub_device.hpp"
+#include "tt_metal/impl/sub_device/sub_device_manager.hpp"
+#include "tt_metal/impl/sub_device/sub_device_types.hpp"
 #include "tt_metal/tt_stl/span.hpp"
 
 #include "tt_metal/hostdevcommon/common_runtime_address_map.h" // NOC_0_X
@@ -57,9 +61,12 @@ bool Device::is_inactive_ethernet_core(CoreCoord logical_core) const {
     return inactive_ethernet_cores.find(logical_core) != inactive_ethernet_cores.end();
 }
 
-uint32_t Device::num_worker_cores(HalProgrammableCoreType core_type, uint32_t sub_device_id) const {
-    TT_FATAL(sub_device_id == 0, "Invalid sub_device index: {}", sub_device_id);
-    return this->num_worker_cores_[static_cast<uint32_t>(core_type)];
+CoreRangeSet Device::worker_cores(HalProgrammableCoreType core_type, SubDeviceId sub_device_id) const {
+    return this->active_sub_device_manager_->sub_device(sub_device_id).cores(core_type);
+}
+
+uint32_t Device::num_worker_cores(HalProgrammableCoreType core_type, SubDeviceId sub_device_id) const {
+    return this->active_sub_device_manager_->sub_device(sub_device_id).num_cores(core_type);
 }
 
 std::vector<uint32_t> Device::get_noc_encoding_for_active_eth_cores(NOC noc_index) {
@@ -200,13 +207,22 @@ void Device::initialize_cluster() {
         this->clear_l1_state();
     }
     int ai_clk = tt::Cluster::instance().get_device_aiclk(this->id_);
-    const auto& compute_grid_size = this->compute_with_storage_grid_size();
-    this->num_worker_cores_[static_cast<uint32_t>(HalProgrammableCoreType::TENSIX)] = compute_grid_size.x * compute_grid_size.y;
-    this->num_worker_cores_[static_cast<uint32_t>(HalProgrammableCoreType::ACTIVE_ETH)] = this->get_active_ethernet_cores(true).size();
     log_info(tt::LogMetal, "AI CLK for device {} is:   {} MHz", this->id_, ai_clk);
 }
 
-void Device::initialize_allocator(size_t l1_small_size, size_t trace_region_size, const std::vector<uint32_t> &l1_bank_remap) {
+void Device::initialize_default_sub_device_state(size_t l1_small_size, size_t trace_region_size, const std::vector<uint32_t> &l1_bank_remap) {
+    // Create the default sub-device manager representing the entire chip
+    this->next_sub_device_manager_id_ = {0};
+    auto [sub_device_manager, _] = this->sub_device_managers_.insert_or_assign(this->get_next_sub_device_manager_id(), std::make_unique<detail::SubDeviceManager>(this, this->initialize_allocator(l1_small_size, trace_region_size, l1_bank_remap)));
+    this->default_sub_device_manager_id_ = sub_device_manager->first;
+    this->default_sub_device_manager_ = sub_device_manager->second.get();
+    this->active_sub_device_manager_id_ = this->default_sub_device_manager_id_;
+    this->active_sub_device_manager_ = this->default_sub_device_manager_;
+    this->allocator_ = this->get_initialized_allocator().get();
+
+}
+
+std::unique_ptr<Allocator> Device::initialize_allocator(size_t l1_small_size, size_t trace_region_size, const std::vector<uint32_t> &l1_bank_remap) {
     ZoneScoped;
     const metal_SocDescriptor &soc_desc = tt::Cluster::instance().get_soc_desc(this->id_);
     CoreType dispatch_core_type = dispatch_core_manager::instance().get_dispatch_core_type(this->id_);
@@ -272,7 +288,7 @@ void Device::initialize_allocator(size_t l1_small_size, size_t trace_region_size
     // L1_BANKING scheme creates 1 bank per DRAM core and splits up L1 such that there are power 2 num L1 banks
     // This is the only allocator scheme supported because kernel APIs assume num L1 banks are power of 2
     TT_ASSERT(this->allocator_scheme_ == MemoryAllocator::L1_BANKING);
-    this->allocator_ = std::make_unique<L1BankingAllocator>(config);
+    return std::make_unique<L1BankingAllocator>(config);
 }
 
 void Device::initialize_device_kernel_defines()
@@ -2887,17 +2903,6 @@ void Device::init_command_queue_device() {
             }
         }
     }
-    auto dispatch_core_type = dispatch_core_manager::instance().get_dispatch_core_type(this->id());
-    auto dispatch_go_signal_noc = this->dispatch_go_signal_noc();
-    const auto& [tensix_num_worker_cores, tensix_worker_physical_grid] = get_physical_worker_grid_config(this->id(), this->num_hw_cqs(), dispatch_core_type);
-    this->noc_mcast_data_ = {this->get_noc_multicast_encoding(dispatch_go_signal_noc, tensix_worker_physical_grid), tensix_num_worker_cores};
-    // TODO: avoid copying?
-    const auto& noc_unicast_data = this->get_noc_encoding_for_active_eth_cores(dispatch_go_signal_noc);
-    this->noc_unicast_data_ = vector_memcpy_aligned<uint32_t>(noc_unicast_data.begin(), noc_unicast_data.end());
-    this->noc_mcast_unicast_data_.clear();
-    this->noc_mcast_unicast_data_.reserve(this->noc_mcast_data_.size() + this->noc_unicast_data_.size());
-    this->noc_mcast_unicast_data_.insert(this->noc_mcast_unicast_data_.end(), this->noc_mcast_data_.begin(), this->noc_mcast_data_.end());
-    this->noc_mcast_unicast_data_.insert(this->noc_mcast_unicast_data_.end(), this->noc_unicast_data_.begin(), this->noc_unicast_data_.end());
     // TODO: Move this inside the command queue
     for (auto& hw_cq : this->hw_command_queues_) {
         hw_cq->set_num_worker_sems_on_dispatch(this->num_sub_devices());
@@ -2926,10 +2931,9 @@ bool Device::initialize(const uint8_t num_hw_cqs, size_t l1_small_size, size_t t
     constexpr uint32_t harvesting_map_bits = 12;
     this->build_key_ = ((uint32_t)this->num_hw_cqs_ << harvesting_map_bits) | tt::Cluster::instance().get_harvesting_mask(this->id());
     this->initialize_cluster();
-    this->initialize_allocator(l1_small_size, trace_region_size, l1_bank_remap);
+    this->initialize_default_sub_device_state(l1_small_size, trace_region_size, l1_bank_remap);
     this->initialize_build();
-    // Reset the launch_message ring buffer state seen on host, since its reset on device, each time FW is initialized
-    std::for_each(this->worker_launch_message_buffer_state.begin(), this->worker_launch_message_buffer_state.end(), std::mem_fn(&LaunchMessageRingBufferState::reset));
+
     // For minimal setup, don't initialize FW, watcher, dprint. They won't work if we're attaching to a hung chip.
     if (minimal)
         return true;
@@ -2953,13 +2957,15 @@ bool Device::close() {
         }
         hw_command_queue->terminate();
     }
+
     this->work_executor.reset();
     tt_metal::detail::DumpDeviceProfileResults(this, true);
 
-    this->trace_buffer_pool_.clear();
-    this->MarkAllocationsSafe();
-
-    this->deallocate_buffers();
+    this->active_sub_device_manager_ = nullptr;
+    for (auto sub_device_manager = this->sub_device_managers_.begin(); sub_device_manager != this->sub_device_managers_.end();) {
+        this->remove_sub_device_manager((sub_device_manager++)->first);
+    }
+    this->default_sub_device_manager_ = nullptr;
 
     std::unordered_map<chip_id_t, std::unordered_set<CoreCoord>> not_done_dispatch_cores;
     std::unordered_map<chip_id_t, std::unordered_set<CoreCoord>> cores_to_skip;
@@ -3003,13 +3009,6 @@ bool Device::close() {
     }
 
     tt::Cluster::instance().l1_barrier(id_);
-    if (this->allocator_) {
-        allocator::clear(*this->allocator_);
-    }
-    // After device close, no buffers on this device should be used
-    for (const auto &buf : this->get_allocated_buffers()) {
-        DeallocateBuffer(*buf);
-    }
 
     this->compute_cores_.clear();
     this->storage_only_cores_.clear();
@@ -3019,7 +3018,6 @@ bool Device::close() {
     this->sw_command_queues_.clear();
     this->hw_command_queues_.clear();
     this->sysmem_manager_.reset();
-    this->allocator_.reset();
     this->tunnel_device_dispatch_workers_.clear();
     this->initialized_ = false;
 
@@ -3154,38 +3152,20 @@ uint32_t Device::get_noc_multicast_encoding(uint8_t noc_index, const CoreRange&
     }
 }
 
-void Device::check_allocator_is_initialized(std::optional<uint32_t> sub_device_id) const {
-    // TODO: This will query the active sub-device manager
-    if (sub_device_id.has_value()) {
-        TT_THROW("Sub-device allocator not implemented yet");
-    } else {
-        if (!this->allocator_) {
-            TT_THROW("No memory allocator! Allocator has not been initialized");
-        }
-    }
-}
-
-const std::unique_ptr<Allocator> &Device::get_initialized_allocator(std::optional<uint32_t> sub_device_id) const {
-    // TODO: This will query the active sub-device manager
-    if (sub_device_id.has_value()) {
-        TT_THROW("Sub-device allocator not implemented yet");
-    } else {
-        if (!this->allocator_) {
-            TT_THROW("No memory allocator! Allocator has not been initialized");
-        }
-        return this->allocator_;
-    }
+const std::unique_ptr<Allocator> &Device::get_initialized_allocator() const {
+    return this->default_sub_device_manager_->get_initialized_allocator(SubDeviceId{0});
 }
 
-std::unique_ptr<Allocator> &Device::get_initialized_allocator(std::optional<uint32_t> sub_device_id) {
-    return const_cast<std::unique_ptr<Allocator>&>(const_cast<const Device*>(this)->get_initialized_allocator(sub_device_id));
+const std::unique_ptr<Allocator> &Device::get_initialized_allocator(SubDeviceId sub_device_id) const {
+    return this->active_sub_device_manager_->get_initialized_allocator(sub_device_id);
 }
 
-void Device::reset_num_sub_devices(uint32_t num_sub_devices) {
-    TT_FATAL((num_sub_devices >=1 && num_sub_devices <= Device::MAX_NUM_SUB_DEVICES), "Illegal number of sub devices specified");
+void Device::reset_sub_devices_state(const std::unique_ptr<detail::SubDeviceManager> &sub_device_manager) {
     // Finish all running programs
     Synchronize(this);
 
+    auto num_sub_devices = sub_device_manager->num_sub_devices();
+
     // Set new number of worker sems on dispatch_s
     for (auto& hw_cq : this->hw_command_queues_) {
         // Only need to reset launch messages once, so reset on cq 0
@@ -3196,25 +3176,39 @@ void Device::reset_num_sub_devices(uint32_t num_sub_devices) {
         hw_cq->reset_config_buffer_mgr(num_sub_devices);
     }
     // Reset the launch_message ring buffer state seen on host
-    std::for_each(this->worker_launch_message_buffer_state.begin(), this->worker_launch_message_buffer_state.begin() + num_sub_devices, std::mem_fn(&LaunchMessageRingBufferState::reset));
+    sub_device_manager->reset_worker_launch_message_buffer_state();
 }
 
 uint32_t Device::num_sub_devices() const {
-    // TODO: This will query the active sub-device manager
-    return Device::DEFAULT_NUM_SUB_DEVICES;
+    return this->active_sub_device_manager_->num_sub_devices();
+}
+
+uint32_t Device::num_banks(const BufferType &buffer_type) const {
+    const auto& allocator = this->get_initialized_allocator();
+    return allocator::num_banks(*allocator, buffer_type);
 }
 
-uint32_t Device::num_banks(const BufferType &buffer_type, std::optional<uint32_t> sub_device_id) const {
+uint32_t Device::num_banks(const BufferType &buffer_type, SubDeviceId sub_device_id) const {
     const auto& allocator = this->get_initialized_allocator(sub_device_id);
     return allocator::num_banks(*allocator, buffer_type);
 }
 
-uint32_t Device::bank_size(const BufferType &buffer_type, std::optional<uint32_t> sub_device_id) const {
+uint32_t Device::bank_size(const BufferType &buffer_type) const {
+    const auto& allocator = this->get_initialized_allocator();
+    return allocator::bank_size(*allocator, buffer_type);
+}
+
+uint32_t Device::bank_size(const BufferType &buffer_type, SubDeviceId sub_device_id) const {
     const auto& allocator = this->get_initialized_allocator(sub_device_id);
     return allocator::bank_size(*allocator, buffer_type);
 }
 
-uint32_t Device::dram_channel_from_bank_id(uint32_t bank_id, std::optional<uint32_t> sub_device_id) const {
+uint32_t Device::dram_channel_from_bank_id(uint32_t bank_id) const {
+    const auto& allocator = this->get_initialized_allocator();
+    return allocator::dram_channel_from_bank_id(*allocator, bank_id);
+}
+
+uint32_t Device::dram_channel_from_bank_id(uint32_t bank_id, SubDeviceId sub_device_id) const {
     const auto& allocator = this->get_initialized_allocator(sub_device_id);
     return allocator::dram_channel_from_bank_id(*allocator, bank_id);
 }
@@ -3233,64 +3227,141 @@ uint32_t Device::dram_channel_from_logical_core(const CoreCoord& logical_core) c
     return tt::Cluster::instance().get_soc_desc(id_).get_dram_channel_from_logical_core(logical_core);
 }
 
-int32_t Device::bank_offset(BufferType buffer_type, uint32_t bank_id, std::optional<uint32_t> sub_device_id) const {
+int32_t Device::bank_offset(BufferType buffer_type, uint32_t bank_id) const {
+    const auto& allocator = this->get_initialized_allocator();
+    return allocator::bank_offset(*allocator, buffer_type, bank_id);
+}
+
+int32_t Device::bank_offset(BufferType buffer_type, uint32_t bank_id, SubDeviceId sub_device_id) const {
     const auto& allocator = this->get_initialized_allocator(sub_device_id);
     return allocator::bank_offset(*allocator, buffer_type, bank_id);
 }
 
-CoreCoord Device::logical_core_from_bank_id(uint32_t bank_id, std::optional<uint32_t> sub_device_id) const {
+CoreCoord Device::logical_core_from_bank_id(uint32_t bank_id) const {
+    const auto& allocator = this->get_initialized_allocator();
+    return allocator::logical_core_from_bank_id(*allocator, bank_id);
+}
+
+CoreCoord Device::logical_core_from_bank_id(uint32_t bank_id, SubDeviceId sub_device_id) const {
     const auto& allocator = this->get_initialized_allocator(sub_device_id);
     return allocator::logical_core_from_bank_id(*allocator, bank_id);
 }
 
-const std::vector<uint32_t> &Device::bank_ids_from_dram_channel(uint32_t dram_channel, std::optional<uint32_t> sub_device_id) const {
+const std::vector<uint32_t> &Device::bank_ids_from_dram_channel(uint32_t dram_channel) const {
+    const auto& allocator = this->get_initialized_allocator();
+    return allocator::bank_ids_from_dram_channel(*allocator, dram_channel);
+}
+
+const std::vector<uint32_t> &Device::bank_ids_from_dram_channel(uint32_t dram_channel, SubDeviceId sub_device_id) const {
     const auto& allocator = this->get_initialized_allocator(sub_device_id);
     return allocator::bank_ids_from_dram_channel(*allocator, dram_channel);
 }
 
 const std::vector<uint32_t> &Device::bank_ids_from_logical_core(
-    BufferType buffer_type, const CoreCoord &logical_core, std::optional<uint32_t> sub_device_id) const {
+    BufferType buffer_type, const CoreCoord &logical_core) const {
+    const auto& allocator = this->get_initialized_allocator();
+    return allocator::bank_ids_from_logical_core(*allocator, buffer_type, logical_core);
+}
+
+const std::vector<uint32_t> &Device::bank_ids_from_logical_core(
+    BufferType buffer_type, const CoreCoord &logical_core, SubDeviceId sub_device_id) const {
     const auto& allocator = this->get_initialized_allocator(sub_device_id);
     return allocator::bank_ids_from_logical_core(*allocator, buffer_type, logical_core);
 }
 
-allocator::Statistics Device::get_memory_allocation_statistics(const BufferType &buffer_type, std::optional<uint32_t> sub_device_id) const {
+allocator::Statistics Device::get_memory_allocation_statistics(const BufferType &buffer_type) const {
+    const auto& allocator = this->get_initialized_allocator();
+    return allocator::get_statistics(*allocator, buffer_type);
+}
+
+allocator::Statistics Device::get_memory_allocation_statistics(const BufferType &buffer_type, SubDeviceId sub_device_id) const {
     const auto& allocator = this->get_initialized_allocator(sub_device_id);
     return allocator::get_statistics(*allocator, buffer_type);
 }
 
-uint32_t Device::get_allocator_alignment(std::optional<uint32_t> sub_device_id) const {
+uint32_t Device::get_allocator_alignment() const {
+    const auto& allocator = this->get_initialized_allocator();
+    return allocator->config.alignment;
+}
+
+uint32_t Device::get_allocator_alignment(SubDeviceId sub_device_id) const {
     const auto& allocator = this->get_initialized_allocator(sub_device_id);
     return allocator->config.alignment;
 }
 
-size_t Device::get_l1_small_size(std::optional<uint32_t> sub_device_id) const {
+size_t Device::get_l1_small_size() const {
+    const auto& allocator = this->get_initialized_allocator();
+    return allocator->config.l1_small_size;
+}
+
+size_t Device::get_l1_small_size(SubDeviceId sub_device_id) const {
     const auto& allocator = this->get_initialized_allocator(sub_device_id);
     return allocator->config.l1_small_size;
 }
 
-void Device::dump_memory_blocks(const BufferType &buffer_type, std::ofstream &out, std::optional<uint32_t> sub_device_id) const {
+void Device::dump_memory_blocks(const BufferType &buffer_type, std::ofstream &out) const {
+    const auto& allocator = this->get_initialized_allocator();
+    return allocator::dump_memory_blocks(*allocator, buffer_type, out);
+}
+
+void Device::dump_memory_blocks(const BufferType &buffer_type, std::ofstream &out, SubDeviceId sub_device_id) const {
     const auto& allocator = this->get_initialized_allocator(sub_device_id);
     return allocator::dump_memory_blocks(*allocator, buffer_type, out);
 }
 
-const std::unordered_set<Buffer *> &Device::get_allocated_buffers(std::optional<uint32_t> sub_device_id) const {
+const std::unordered_set<Buffer *> &Device::get_allocated_buffers() const {
+    const auto& allocator = this->get_initialized_allocator();
+    return allocator::get_allocated_buffers(*allocator);
+}
+
+const std::unordered_set<Buffer *> &Device::get_allocated_buffers(SubDeviceId sub_device_id) const {
     const auto& allocator = this->get_initialized_allocator(sub_device_id);
     return allocator::get_allocated_buffers(*allocator);
 }
 
-void Device::deallocate_buffers(std::optional<uint32_t> sub_device_id) {
-    auto& allocator = this->get_initialized_allocator(sub_device_id);
+void Device::deallocate_buffers() {
+    const auto& allocator = this->get_initialized_allocator();
     allocator::deallocate_buffers(*allocator);
 }
 
-std::optional<DeviceAddr> Device::lowest_occupied_compute_l1_address(tt::stl::Span<const uint32_t> sub_device_ids) const {
-    this->check_allocator_is_initialized(std::nullopt);
-    TT_FATAL(sub_device_ids.size() == 0, "Invalid number of sub-devices {}", sub_device_ids.size());
+void Device::deallocate_buffers(SubDeviceId sub_device_id) {
+    const auto& allocator = this->get_initialized_allocator(sub_device_id);
+    allocator::deallocate_buffers(*allocator);
+}
+
+std::optional<DeviceAddr> Device::lowest_occupied_compute_l1_address() const {
     // Global bank id needs to look up a bank from the compute grid (not the storage grid)
+    // Since banks are lockstep in an allocator it doesn't matter if the actual core matches or not
     auto global_bank_id =
         this->bank_ids_from_logical_core(BufferType::L1, *this->compute_cores_.begin())[0];
-    return allocator::lowest_occupied_l1_address(*this->allocator_, global_bank_id);
+    const auto& allocator = this->get_initialized_allocator();
+    return allocator::lowest_occupied_l1_address(*allocator, global_bank_id);
+}
+
+std::optional<DeviceAddr> Device::lowest_occupied_compute_l1_address(tt::stl::Span<const SubDeviceId> sub_device_ids) const {
+    // Sub-device banks are currently all compute banks
+    // Since banks are lockstep in an allocator it doesn't matter which core is used
+    uint32_t sub_device_bank_id = 0;
+    DeviceAddr lowest_addr = std::numeric_limits<DeviceAddr>::max();
+    for (const auto& sub_device_id : sub_device_ids) {
+        const auto& allocator = this->active_sub_device_manager_->sub_device_allocator(sub_device_id);
+        if (allocator) {
+            auto found_addr = allocator::lowest_occupied_l1_address(*allocator, sub_device_bank_id);
+            if (found_addr.has_value()) {
+                lowest_addr = std::min(lowest_addr, *found_addr);
+            }
+        }
+    }
+    // sub-device allocators sit below global allocator. If an address is found for a sub-device, no need to check the global allocator
+    if (lowest_addr != std::numeric_limits<DeviceAddr>::max()) {
+        return lowest_addr;
+    } else {
+        const auto &allocator = this->get_initialized_allocator();
+        // Global bank id needs to look up a bank from the compute grid (not the storage grid)
+        auto global_bank_id =
+            this->bank_ids_from_logical_core(BufferType::L1, *this->compute_cores_.begin())[0];
+        return allocator::lowest_occupied_l1_address(*allocator, global_bank_id);
+    }
 }
 
 float Device::sfpu_eps() const {
@@ -3414,21 +3485,22 @@ bool Device::using_slow_dispatch() const {
 void Device::begin_trace(const uint8_t cq_id, const uint32_t tid) {
     ZoneScoped;
     TracyTTMetalBeginTrace(this->id(), tid);
-    TT_FATAL(this->trace_buffer_pool_.count(tid) == 0, "Trace already exists for tid {} on device", tid);
     TT_FATAL(!this->hw_command_queues_[cq_id]->tid.has_value(), "CQ {} is already being used for tracing tid {}", (uint32_t)cq_id, tid);
     this->MarkAllocationsSafe();
     // Create an empty trace buffer here. This will get initialized in end_trace
-    this->trace_buffer_pool_.insert({tid, Trace::create_empty_trace_buffer()});
-    this->hw_command_queues_[cq_id]->record_begin(tid, this->trace_buffer_pool_[tid]->desc);
+    TT_FATAL(this->active_sub_device_manager_->get_trace(tid) == nullptr, "Trace already exists for tid {} on device", tid);
+    auto &trace_buffer = this->active_sub_device_manager_->create_trace(tid);
+    this->hw_command_queues_[cq_id]->record_begin(tid, trace_buffer->desc);
 }
 
 void Device::end_trace(const uint8_t cq_id, const uint32_t tid) {
     ZoneScoped;
     TracyTTMetalEndTrace(this->id(), tid);
     TT_FATAL(this->hw_command_queues_[cq_id]->tid == tid, "CQ {} is not being used for tracing tid {}", (uint32_t)cq_id, tid);
-    TT_FATAL(this->trace_buffer_pool_.count(tid) > 0, "Trace instance {} must exist on device", tid);
+    auto trace_buffer = this->active_sub_device_manager_->get_trace(tid);
+    TT_FATAL(trace_buffer != nullptr, "Trace instance {} must exist on device", tid);
     this->hw_command_queues_[cq_id]->record_end();
-    Trace::initialize_buffer(this->command_queue(cq_id), this->trace_buffer_pool_[tid]);
+    Trace::initialize_buffer(this->command_queue(cq_id), trace_buffer);
     this->MarkAllocationsUnsafe();
 }
 
@@ -3436,41 +3508,36 @@ void Device::replay_trace(const uint8_t cq_id, const uint32_t tid, const bool bl
     ZoneScoped;
     TracyTTMetalReplayTrace(this->id(), tid);
     constexpr bool check = false;
-    TT_FATAL(this->trace_buffer_pool_.count(tid) > 0, "Trace instance {}  must exist on device" , tid);
+    const auto &trace_buffer = this->active_sub_device_manager_->get_trace(tid);
+    TT_FATAL(trace_buffer != nullptr, "Trace instance {} must exist on device", tid);
     if constexpr (check) {
-        Trace::validate_instance(*this->trace_buffer_pool_[tid]);
+        Trace::validate_instance(*trace_buffer);
     }
-    this->command_queue(cq_id).run_command(CommandInterface{
-        .type = EnqueueCommandType::ENQUEUE_TRACE,
-        .blocking = blocking,
-        .trace_id = tid
-    });
+    EnqueueTrace(this->command_queue(cq_id), tid, blocking);
 }
 
 void Device::release_trace(const uint32_t tid) {
     ZoneScoped;
     TracyTTMetalReleaseTrace(this->id(), tid);
-    uint32_t erased = this->trace_buffer_pool_.erase(tid);
+
+    this->active_sub_device_manager_->release_trace(tid);
+
     // Only enable allocations once all captured traces are released
-    if (this->trace_buffer_pool_.empty()) {
+    if (this->trace_buffers_size == 0) {
         this->MarkAllocationsSafe();
     }
 }
 
-std::shared_ptr<TraceBuffer> Device::get_trace(const uint32_t tid) {
-    if (auto trace = this->trace_buffer_pool_.find(tid); trace != this->trace_buffer_pool_.end()) {
-        return trace->second;
-    } else {
-        return nullptr;
-    }
+std::shared_ptr<TraceBuffer> Device::get_trace(uint32_t tid) {
+    return this->active_sub_device_manager_->get_trace(tid);
 }
 
 void Device::MarkAllocationsUnsafe() {
-    tt::tt_metal::allocator::mark_allocations_unsafe(*(this->allocator_));
+    tt::tt_metal::allocator::mark_allocations_unsafe(*this->get_initialized_allocator());
 }
 
 void Device::MarkAllocationsSafe() {
-    tt::tt_metal::allocator::mark_allocations_safe(*(this->allocator_));
+    tt::tt_metal::allocator::mark_allocations_safe(*this->get_initialized_allocator());
 }
 
 void Device::generate_device_headers(const std::string &path) const
@@ -3502,7 +3569,7 @@ void Device::generate_device_headers(const std::string &path) const
         dram_offsets_per_bank,
         l1_noc_coord_per_bank,
         l1_offset_per_bank,
-        this->allocator_->config.alignment
+        this->get_allocator_alignment()
     );
 }
 
@@ -3510,48 +3577,101 @@ size_t Device::get_device_kernel_defines_hash() {
     return tt::utils::DefinesHash{}(this->device_kernel_defines_);
 }
 
-const vector_memcpy_aligned<uint32_t>& Device::noc_mcast_data(uint32_t sub_device_id) const {
-    // TODO: This will query the active sub-device manager
-    TT_FATAL(sub_device_id < Device::DEFAULT_NUM_SUB_DEVICES, "sub_device_id {} is out of range", sub_device_id);
-    return this->noc_mcast_data_;
+const vector_memcpy_aligned<uint32_t>& Device::noc_mcast_data(SubDeviceId sub_device_id) const {
+    return this->active_sub_device_manager_->noc_mcast_data(sub_device_id);
 }
-const vector_memcpy_aligned<uint32_t>& Device::noc_unicast_data(uint32_t sub_device_id) const {
-    // TODO: This will query the active sub-device manager
-    TT_FATAL(sub_device_id < Device::DEFAULT_NUM_SUB_DEVICES, "sub_device_id {} is out of range", sub_device_id);
-    return this->noc_unicast_data_;
+
+const vector_memcpy_aligned<uint32_t>& Device::noc_unicast_data(SubDeviceId sub_device_id) const {
+    return this->active_sub_device_manager_->noc_unicast_data(sub_device_id);
 }
 
-const vector_memcpy_aligned<uint32_t>& Device::noc_mcast_unicast_data(uint32_t sub_device_id, bool mcast_data, bool unicast_data) const {
-    // TODO: This will query the active sub-device manager
-    TT_FATAL(sub_device_id < Device::DEFAULT_NUM_SUB_DEVICES, "sub_device_id {} is out of range", sub_device_id);
+const vector_memcpy_aligned<uint32_t>& Device::noc_mcast_unicast_data(SubDeviceId sub_device_id, bool mcast_data, bool unicast_data) const {
+    // Needed for compatibility with tests that create programs with no kernels
+    static const vector_memcpy_aligned<uint32_t> empty = {};
     if (mcast_data && unicast_data) {
-        return this->noc_mcast_unicast_data_;
+        return this->active_sub_device_manager_->noc_mcast_unicast_data(sub_device_id);
     } else if (mcast_data) {
-        return this->noc_mcast_data_;
+        return this->active_sub_device_manager_->noc_mcast_data(sub_device_id);
     } else if (unicast_data) {
-        return this->noc_unicast_data_;
+        return this->active_sub_device_manager_->noc_unicast_data(sub_device_id);
     } else {
-        // Needed for compatibility with tests that create programs with no kernels
-        static const vector_memcpy_aligned<uint32_t> empty = {};
         return empty;
     }
 }
 
-uint32_t Device::num_noc_mcast_txns(uint32_t sub_device_id) const {
+uint32_t Device::num_noc_mcast_txns(SubDeviceId sub_device_id) const {
     return this->noc_mcast_data(sub_device_id).size() / 2;
 }
-uint32_t Device::num_noc_unicast_txns(uint32_t sub_device_id) const {
+uint32_t Device::num_noc_unicast_txns(SubDeviceId sub_device_id) const {
     return this->noc_unicast_data(sub_device_id).size();
 }
 
-uint32_t Device::num_noc_mcast_unicast_txns(uint32_t sub_device_id, bool mcast_data, bool unicast_data) const {
+uint32_t Device::num_noc_mcast_unicast_txns(SubDeviceId sub_device_id, bool mcast_data, bool unicast_data) const {
     return (mcast_data ? this->num_noc_mcast_txns(sub_device_id) : 0) + (unicast_data ? this->num_noc_unicast_txns(sub_device_id) : 0);
 }
 
+LaunchMessageRingBufferState& Device::get_worker_launch_message_buffer_state(SubDeviceId sub_device_id) {
+    return this->active_sub_device_manager_->get_worker_launch_message_buffer_state(sub_device_id);
+}
+
 NOC Device::dispatch_go_signal_noc() const {
     return this->dispatch_s_enabled() ? NOC::NOC_1 : NOC::NOC_0;
 }
 
+SubDeviceManagerId Device::get_next_sub_device_manager_id() {
+    return this->next_sub_device_manager_id_++;
+}
+
+SubDeviceManagerId Device::get_active_sub_device_manager_id() const {
+    return this->active_sub_device_manager_id_;
+}
+
+SubDeviceManagerId Device::get_default_sub_device_manager_id() const {
+    return this->default_sub_device_manager_id_;
+}
+
+SubDeviceManagerId Device::create_sub_device_manager(tt::stl::Span<const SubDevice> sub_devices, DeviceAddr local_l1_size) {
+    TT_FATAL(!this->using_slow_dispatch(), "Using sub device managers is unsupported with slow dispatch");
+    auto [sub_device_manager, _] = this->sub_device_managers_.insert_or_assign(this->get_next_sub_device_manager_id(), std::make_unique<detail::SubDeviceManager>(sub_devices, local_l1_size, this));
+    return sub_device_manager->first;
+}
+
+void Device::load_sub_device_manager(SubDeviceManagerId sub_device_manager_id) {
+    if (this->active_sub_device_manager_id_ == sub_device_manager_id) {
+        return;
+    }
+    auto sub_device_manager = this->sub_device_managers_.find(sub_device_manager_id);
+    TT_FATAL(sub_device_manager != this->sub_device_managers_.end(), "Sub device manager does not exist");
+    this->reset_sub_devices_state(sub_device_manager->second);
+    // Shrink the global allocator size to make room for sub-device allocators
+    auto local_l1_size = sub_device_manager->second->local_l1_size();
+    allocator::shrink_allocator_size(*this->get_initialized_allocator(), BufferType::L1, local_l1_size, true);
+    this->active_sub_device_manager_id_ = sub_device_manager_id;
+    this->active_sub_device_manager_ = sub_device_manager->second.get();
+}
+
+void Device::clear_loaded_sub_device_manager() {
+    if (this->active_sub_device_manager_id_ == this->default_sub_device_manager_id_) {
+        return;
+    }
+    TT_FATAL(!this->active_sub_device_manager_->has_allocations(), "Cannot clear active sub device manager {} since it has allocations", this->active_sub_device_manager_id_);
+    auto &default_manager = this->sub_device_managers_.at(this->default_sub_device_manager_id_);
+    this->reset_sub_devices_state(default_manager);
+    allocator::reset_allocator_size(*this->get_initialized_allocator(), BufferType::L1);
+    this->active_sub_device_manager_id_ = this->default_sub_device_manager_id_;
+    this->active_sub_device_manager_ = default_manager.get();
+}
+
+void Device::remove_sub_device_manager(SubDeviceManagerId sub_device_manager_id) {
+    if (this->active_sub_device_manager_ != nullptr) {
+        TT_FATAL(sub_device_manager_id != this->active_sub_device_manager_id_, "Cannot remove active sub device manager {}", sub_device_manager_id);
+        TT_FATAL(sub_device_manager_id != this->default_sub_device_manager_id_, "Cannot remove default sub device manager {}", sub_device_manager_id);
+    }
+    auto sub_device_manager = this->sub_device_managers_.find(sub_device_manager_id);
+    TT_FATAL(sub_device_manager != this->sub_device_managers_.end(), "Sub device manager does not exist");
+    this->sub_device_managers_.erase(sub_device_manager);
+}
+
 }  // namespace tt_metal
 
 }  // namespace tt
diff --git a/tt_metal/impl/device/device.hpp b/tt_metal/impl/device/device.hpp
index 0725519eed2..36058b11f82 100644
--- a/tt_metal/impl/device/device.hpp
+++ b/tt_metal/impl/device/device.hpp
@@ -18,6 +18,8 @@
 #include "llrt/tt_cluster.hpp"
 #include "llrt/hal.hpp"
 #include "tt_metal/impl/dispatch/command_queue_interface.hpp"
+#include "tt_metal/impl/sub_device/sub_device_manager.hpp"
+#include "tt_metal/impl/sub_device/sub_device_types.hpp"
 #include "tt_metal/tt_stl/span.hpp"
 #include "program_cache.hpp"
 
@@ -32,6 +34,7 @@ inline namespace v0 {
 class Buffer;
 class Program;
 class CommandQueue;
+class SubDevice;
 
 }  // namespace v0
 
@@ -39,12 +42,6 @@ class JitBuildEnv;
 class HWCommandQueue;
 class TraceBuffer;
 
-namespace detail {
-
-class TraceDescriptor;
-
-}
-
 using on_close_device_callback = std::function<void ()>;
 
 // TODO: These should be moved into arch specific host files that get exported here
@@ -65,7 +62,7 @@ inline namespace v0 {
 // A physical PCIexpress Tenstorrent device
 class Device {
    private:
-    static constexpr uint32_t MAX_NUM_SUB_DEVICES = dispatch_constants::DISPATCH_MESSAGE_ENTRIES;
+    static_assert(detail::SubDeviceManager::MAX_NUM_SUB_DEVICES <= dispatch_constants::DISPATCH_MESSAGE_ENTRIES, "MAX_NUM_SUB_DEVICES must be less than or equal to dispatch_constants::DISPATCH_MESSAGE_ENTRIES");
     static constexpr uint32_t DEFAULT_NUM_SUB_DEVICES = 1;
    public:
     // friend void tt_gdb(Device* device, int chip_id, const vector<CoreCoord> cores, vector<string> ops);
@@ -141,7 +138,8 @@ class Device {
 
     bool is_inactive_ethernet_core(CoreCoord logical_core) const;
 
-    uint32_t num_worker_cores(HalProgrammableCoreType core_type, uint32_t sub_device_id) const;
+    CoreRangeSet worker_cores(HalProgrammableCoreType core_type, SubDeviceId sub_device_id) const;
+    uint32_t num_worker_cores(HalProgrammableCoreType core_type, SubDeviceId sub_device_id) const;
 
     std::tuple<chip_id_t, CoreCoord> get_connected_ethernet_core(CoreCoord eth_core) const {
         return tt::Cluster::instance().get_connected_ethernet_core(std::make_tuple(this->id_, eth_core));
@@ -161,34 +159,46 @@ class Device {
 
     uint32_t num_sub_devices() const;
 
-    uint32_t num_banks(const BufferType &buffer_type, std::optional<uint32_t> sub_device_id = std::nullopt) const;
-    uint32_t bank_size(const BufferType &buffer_type, std::optional<uint32_t> sub_device_id = std::nullopt) const;
+    uint32_t num_banks(const BufferType &buffer_type) const;
+    uint32_t num_banks(const BufferType &buffer_type, SubDeviceId sub_device_id) const;
+    uint32_t bank_size(const BufferType &buffer_type) const;
+    uint32_t bank_size(const BufferType &buffer_type, SubDeviceId sub_device_id) const;
 
-    uint32_t dram_channel_from_bank_id(uint32_t bank_id, std::optional<uint32_t> sub_device_id = std::nullopt) const;
+    uint32_t dram_channel_from_bank_id(uint32_t bank_id) const;
+    uint32_t dram_channel_from_bank_id(uint32_t bank_id, SubDeviceId sub_device_id) const;
 
     CoreCoord dram_core_from_dram_channel(uint32_t dram_channel) const;
     CoreCoord logical_core_from_dram_channel(uint32_t dram_channel) const;
     uint32_t dram_channel_from_logical_core(const CoreCoord& logical_core) const;
 
-    const std::unique_ptr<Allocator> &get_initialized_allocator(std::optional<uint32_t> sub_device_id = std::nullopt) const;
-    std::unique_ptr<Allocator> &get_initialized_allocator(std::optional<uint32_t> sub_device_id = std::nullopt);
+    const std::unique_ptr<Allocator> &get_initialized_allocator() const;
+    const std::unique_ptr<Allocator> &get_initialized_allocator(SubDeviceId sub_device_id) const;
 
-    int32_t bank_offset(BufferType buffer_type, uint32_t bank_id, std::optional<uint32_t> sub_device_id = std::nullopt) const;
+    int32_t bank_offset(BufferType buffer_type, uint32_t bank_id) const;
+    int32_t bank_offset(BufferType buffer_type, uint32_t bank_id, SubDeviceId sub_device_id) const;
 
-    CoreCoord logical_core_from_bank_id(uint32_t bank_id, std::optional<uint32_t> sub_device_id = std::nullopt) const;
+    CoreCoord logical_core_from_bank_id(uint32_t bank_id) const;
+    CoreCoord logical_core_from_bank_id(uint32_t bank_id, SubDeviceId sub_device_id) const;
 
-    const std::vector<uint32_t> &bank_ids_from_dram_channel(uint32_t dram_channel, std::optional<uint32_t> sub_device_id = std::nullopt) const;
+    const std::vector<uint32_t> &bank_ids_from_dram_channel(uint32_t dram_channel) const;
+    const std::vector<uint32_t> &bank_ids_from_dram_channel(uint32_t dram_channel, SubDeviceId sub_device_id) const;
 
     const std::vector<uint32_t> &bank_ids_from_logical_core(
-        BufferType buffer_type, const CoreCoord &logical_core, std::optional<uint32_t> sub_device_id = std::nullopt) const;
+        BufferType buffer_type, const CoreCoord &logical_core) const;
+    const std::vector<uint32_t> &bank_ids_from_logical_core(
+        BufferType buffer_type, const CoreCoord &logical_core, SubDeviceId sub_device_id) const;
 
-    allocator::Statistics get_memory_allocation_statistics(const BufferType &buffer_type, std::optional<uint32_t> sub_device_id = std::nullopt) const;
+    allocator::Statistics get_memory_allocation_statistics(const BufferType &buffer_type) const;
+    allocator::Statistics get_memory_allocation_statistics(const BufferType &buffer_type, SubDeviceId sub_device_id) const;
 
-    uint32_t get_allocator_alignment(std::optional<uint32_t> sub_device_id = std::nullopt) const;
+    uint32_t get_allocator_alignment() const;
+    uint32_t get_allocator_alignment(SubDeviceId sub_device_id) const;
 
-    size_t get_l1_small_size(std::optional<uint32_t> sub_device_id = std::nullopt) const;
+    size_t get_l1_small_size() const;
+    size_t get_l1_small_size(SubDeviceId sub_device_id) const;
 
-    void dump_memory_blocks(const BufferType &buffer_type, std::ofstream &out, std::optional<uint32_t> sub_device_id = std::nullopt) const;
+    void dump_memory_blocks(const BufferType &buffer_type, std::ofstream &out) const;
+    void dump_memory_blocks(const BufferType &buffer_type, std::ofstream &out, SubDeviceId sub_device_id) const;
 
     // Set of logical storage only core coordinates
     const std::set<CoreCoord> &storage_only_cores() const { return this->storage_only_cores_; }
@@ -203,11 +213,14 @@ class Device {
     uint32_t get_noc_unicast_encoding(uint8_t noc_index, const CoreCoord& physical_core) const;
     uint32_t get_noc_multicast_encoding(uint8_t noc_index, const CoreRange& physical_cores) const;
 
-    const std::unordered_set<Buffer *> &get_allocated_buffers(std::optional<uint32_t> sub_device_id = std::nullopt) const;
+    const std::unordered_set<Buffer *> &get_allocated_buffers() const;
+    const std::unordered_set<Buffer *> &get_allocated_buffers(SubDeviceId sub_device_id) const;
 
-    void deallocate_buffers(std::optional<uint32_t> sub_device_id = std::nullopt);
+    void deallocate_buffers();
+    void deallocate_buffers(SubDeviceId sub_device_id);
 
-    std::optional<DeviceAddr> lowest_occupied_compute_l1_address(tt::stl::Span<const uint32_t> sub_device_ids) const;
+    std::optional<DeviceAddr> lowest_occupied_compute_l1_address() const;
+    std::optional<DeviceAddr> lowest_occupied_compute_l1_address(tt::stl::Span<const SubDeviceId> sub_device_ids) const;
 
     // machine epsilon
     float sfpu_eps() const;
@@ -234,16 +247,15 @@ class Device {
     void end_trace(const uint8_t cq_id, const uint32_t tid);
     void replay_trace(const uint8_t cq_id, const uint32_t tid, const bool blocking);
     void release_trace(const uint32_t tid);
-    std::shared_ptr<TraceBuffer> get_trace(const uint32_t tid);
+    std::shared_ptr<TraceBuffer> get_trace(uint32_t tid);
 
     bool using_slow_dispatch() const;
-    void check_allocator_is_initialized(std::optional<uint32_t> sub_device_id) const;
 
     // Checks that the given arch is on the given pci_slot and that it's responding
     // Puts device into reset
     bool initialize(const uint8_t num_hw_cqs, size_t l1_small_size, size_t trace_region_size, const std::vector<uint32_t> &l1_bank_remap = {}, bool minimal = false);
     void initialize_cluster();
-    void initialize_allocator(size_t l1_small_size, size_t trace_region_size, const std::vector<uint32_t> &l1_bank_remap = {});
+    std::unique_ptr<Allocator> initialize_allocator(size_t l1_small_size, size_t trace_region_size, const std::vector<uint32_t> &l1_bank_remap = {});
     void initialize_build();
     void initialize_device_kernel_defines();
     void build_firmware();
@@ -287,7 +299,9 @@ class Device {
     static constexpr MemoryAllocator allocator_scheme_ = MemoryAllocator::L1_BANKING;
     chip_id_t id_;
     uint32_t build_key_;
-    std::unique_ptr<Allocator> allocator_ = nullptr;
+    // Leaving here for compatibility with current reacharounds
+    // TODO: Replace with get_initialized_allocator()
+    Allocator * allocator_ = nullptr;
     bool initialized_ = false;
     std::map<uint32_t, std::map<chip_id_t, std::vector<std::vector<std::tuple<tt_cxy_pair, dispatch_worker_build_settings_t>>>>> tunnel_device_dispatch_workers_;
     std::vector<std::vector<chip_id_t>> tunnels_from_mmio_;
@@ -310,7 +324,6 @@ class Device {
     uint32_t worker_thread_core;
     uint32_t completion_queue_reader_core;
     std::unique_ptr<SystemMemoryManager> sysmem_manager_;
-    std::array<LaunchMessageRingBufferState, Device::MAX_NUM_SUB_DEVICES> worker_launch_message_buffer_state;
     uint8_t num_hw_cqs_;
 
     std::vector<std::unique_ptr<Program>> command_queue_programs;
@@ -345,7 +358,9 @@ class Device {
     T get_dev_addr(CoreCoord phys_core, HalL1MemAddrType addr_type) const;
     // Returns address where allocator starts allocating buffer
     template <typename T = DeviceAddr>
-    T get_base_allocator_addr(const HalMemType &mem_type, std::optional<uint32_t> sub_device_id = std::nullopt) const;
+    T get_base_allocator_addr(const HalMemType &mem_type) const;
+    template <typename T = DeviceAddr>
+    T get_base_allocator_addr(const HalMemType &mem_type, SubDeviceId sub_device_id) const;
 
     template <typename CoreRangeContainer>
     std::vector<std::pair<transfer_info_cores, uint32_t>> extract_dst_noc_multicast_info(const CoreRangeContainer& ranges, const CoreType core_type);
@@ -354,28 +369,35 @@ class Device {
     NOC dispatch_go_signal_noc() const;
     size_t get_device_kernel_defines_hash();
 
-    const vector_memcpy_aligned<uint32_t>& noc_mcast_data(uint32_t sub_device_id) const;
-    const vector_memcpy_aligned<uint32_t>& noc_unicast_data(uint32_t sub_device_id) const;
-    const vector_memcpy_aligned<uint32_t>& noc_mcast_unicast_data(uint32_t sub_device_id, bool mcast_data=true, bool unicast_data=true) const;
-    uint32_t num_noc_mcast_txns(uint32_t sub_device_id) const;
-    uint32_t num_noc_unicast_txns(uint32_t sub_device_id) const;
-    uint32_t num_noc_mcast_unicast_txns(uint32_t sub_device_id, bool mcast_data=true, bool unicast_data=true) const;
-
+    const vector_memcpy_aligned<uint32_t>& noc_mcast_data(SubDeviceId sub_device_id) const;
+    const vector_memcpy_aligned<uint32_t>& noc_unicast_data(SubDeviceId sub_device_id) const;
+    const vector_memcpy_aligned<uint32_t>& noc_mcast_unicast_data(SubDeviceId sub_device_id, bool mcast_data=true, bool unicast_data=true) const;
+    uint32_t num_noc_mcast_txns(SubDeviceId sub_device_id) const;
+    uint32_t num_noc_unicast_txns(SubDeviceId sub_device_id) const;
+    uint32_t num_noc_mcast_unicast_txns(SubDeviceId sub_device_id, bool mcast_data=true, bool unicast_data=true) const;
+
+    LaunchMessageRingBufferState& get_worker_launch_message_buffer_state(SubDeviceId sub_device_id);
+
+    SubDeviceManagerId get_active_sub_device_manager_id() const;
+    SubDeviceManagerId get_default_sub_device_manager_id() const;
+    SubDeviceManagerId create_sub_device_manager(tt::stl::Span<const SubDevice> sub_devices, DeviceAddr mesh_l1_size);
+    void load_sub_device_manager(SubDeviceManagerId sub_device_manager_id);
+    void clear_loaded_sub_device_manager();
+    void remove_sub_device_manager(SubDeviceManagerId sub_device_manager_id);
    private:
-    void reset_num_sub_devices(uint32_t num_sub_devices);
-
+    void initialize_default_sub_device_state(size_t l1_small_size, size_t trace_region_size, const std::vector<uint32_t> &l1_bank_remap);
+    SubDeviceManagerId get_next_sub_device_manager_id();
+    void reset_sub_devices_state(const std::unique_ptr<detail::SubDeviceManager>& sub_device_manager);
     void MarkAllocationsUnsafe();
     void MarkAllocationsSafe();
-    std::unordered_map<uint32_t, std::shared_ptr<TraceBuffer>> trace_buffer_pool_;
     std::map<std::string, std::string> device_kernel_defines_;
 
-    // Data structures queried when no SubDeviceManager is active
-    // Otherwise this data comes from the SubDeviceManager
-    // TODO: Encapsulate the default case in a SubDeviceManager as well?
-    std::array<uint32_t, NumHalProgrammableCoreTypes> num_worker_cores_{};
-    vector_memcpy_aligned<uint32_t> noc_mcast_data_;
-    vector_memcpy_aligned<uint32_t> noc_unicast_data_;
-    vector_memcpy_aligned<uint32_t> noc_mcast_unicast_data_;
+    std::unordered_map<SubDeviceManagerId, std::unique_ptr<detail::SubDeviceManager>> sub_device_managers_;
+    SubDeviceManagerId active_sub_device_manager_id_ = {0};
+    detail::SubDeviceManager *active_sub_device_manager_ = nullptr;
+    SubDeviceManagerId next_sub_device_manager_id_ = {0};
+    SubDeviceManagerId default_sub_device_manager_id_ = {0};
+    detail::SubDeviceManager *default_sub_device_manager_ = nullptr;
 };
 
 }  // namespace v0
@@ -402,7 +424,13 @@ inline T Device::get_dev_addr(CoreCoord phys_core, HalL1MemAddrType addr_type) c
 }
 
 template <typename T>
-inline T Device::get_base_allocator_addr(const HalMemType &mem_type, std::optional<uint32_t> sub_device_id) const {
+inline T Device::get_base_allocator_addr(const HalMemType &mem_type) const {
+    const auto& allocator = this->get_initialized_allocator();
+    return allocator::get_unreserved_base_address(*allocator, mem_type);
+}
+
+template <typename T>
+inline T Device::get_base_allocator_addr(const HalMemType &mem_type, SubDeviceId sub_device_id) const {
     const auto& allocator = this->get_initialized_allocator(sub_device_id);
     return allocator::get_unreserved_base_address(*allocator, mem_type);
 }
diff --git a/tt_metal/impl/dispatch/command_queue.cpp b/tt_metal/impl/dispatch/command_queue.cpp
index 00231c7d710..2faa7221f58 100644
--- a/tt_metal/impl/dispatch/command_queue.cpp
+++ b/tt_metal/impl/dispatch/command_queue.cpp
@@ -340,7 +340,7 @@ EnqueueProgramCommand::EnqueueProgramCommand(
     uint32_t expected_num_workers_completed,
     uint32_t multicast_cores_launch_message_wptr,
     uint32_t unicast_cores_launch_message_wptr,
-    uint32_t sub_device_id) :
+    SubDeviceId sub_device_id) :
     command_queue_id(command_queue_id),
     noc_index(noc_index),
     manager(manager),
@@ -356,7 +356,7 @@ EnqueueProgramCommand::EnqueueProgramCommand(
     this->packed_write_max_unicast_sub_cmds = get_packed_write_max_unicast_sub_cmds(this->device);
     this->dispatch_message_addr = dispatch_constants::get(
         this->dispatch_core_type).get_device_command_queue_addr(CommandQueueDeviceAddrType::DISPATCH_MESSAGE) +
-        dispatch_constants::get(this->dispatch_core_type).get_dispatch_message_offset(this->sub_device_id);
+        dispatch_constants::get(this->dispatch_core_type).get_dispatch_message_offset(this->sub_device_id.to_index());
 }
 
 void EnqueueProgramCommand::assemble_preamble_commands(
@@ -1289,7 +1289,7 @@ void EnqueueProgramCommand::assemble_device_commands(
     if (this->device->dispatch_s_enabled()) {
         // dispatch_d signals dispatch_s to send the go signal, use a barrier if there are cores active
         uint16_t index_bitmask = 0;
-        index_bitmask |= 1 << this->sub_device_id;
+        index_bitmask |= 1 << this->sub_device_id.to_index();
         device_command_sequence.add_notify_dispatch_s_go_signal_cmd(program_transfer_info.num_active_cores > 0, index_bitmask);
         dispatcher_for_go_signal = DispatcherSelect::DISPATCH_SLAVE;
     } else {
@@ -1302,7 +1302,7 @@ void EnqueueProgramCommand::assemble_device_commands(
     run_program_go_signal.signal = RUN_MSG_GO;
     run_program_go_signal.master_x = (uint8_t)this->dispatch_core.x;
     run_program_go_signal.master_y = (uint8_t)this->dispatch_core.y;
-    run_program_go_signal.dispatch_message_offset = (uint8_t)dispatch_constants::get(this->dispatch_core_type).get_dispatch_message_offset(this->sub_device_id);
+    run_program_go_signal.dispatch_message_offset = (uint8_t)dispatch_constants::get(this->dispatch_core_type).get_dispatch_message_offset(this->sub_device_id.to_index());
     uint32_t write_offset_bytes = device_command_sequence.write_offset_bytes();
     device_command_sequence.add_dispatch_go_signal_mcast(this->expected_num_workers_completed, *reinterpret_cast<uint32_t*>(&run_program_go_signal), this->dispatch_message_addr, num_noc_mcast_txns, num_noc_unicast_txns, noc_mcast_unicast_data, dispatcher_for_go_signal);
     program_command_sequence.mcast_go_signal_cmd_ptr = &((CQDispatchCmd*) ((uint32_t*)device_command_sequence.data() + (write_offset_bytes + sizeof(CQPrefetchCmd)) / sizeof(uint32_t)))->mcast;
@@ -1353,7 +1353,7 @@ void EnqueueProgramCommand::update_device_commands(
     run_program_go_signal.signal = RUN_MSG_GO;
     run_program_go_signal.master_x = (uint8_t)this->dispatch_core.x;
     run_program_go_signal.master_y = (uint8_t)this->dispatch_core.y;
-    run_program_go_signal.dispatch_message_offset = (uint8_t)dispatch_constants::get(this->dispatch_core_type).get_dispatch_message_offset(this->sub_device_id);
+    run_program_go_signal.dispatch_message_offset = (uint8_t)dispatch_constants::get(this->dispatch_core_type).get_dispatch_message_offset(this->sub_device_id.to_index());
     cached_program_command_sequence.mcast_go_signal_cmd_ptr->go_signal =  *reinterpret_cast<uint32_t*>(&run_program_go_signal);
     cached_program_command_sequence.mcast_go_signal_cmd_ptr->wait_count = this->expected_num_workers_completed;
 }
@@ -1763,8 +1763,11 @@ void EnqueueTraceCommand::process() {
     uint32_t pcie_alignment = hal.get_alignment(HalMemType::HOST);
     for (const auto& [index, desc] : descriptor->descriptors) {
         uint32_t go_signal_cmd_size = sizeof(CQPrefetchCmd) + sizeof(CQDispatchCmd);
-        go_signal_cmd_size += desc.num_traced_programs_needing_go_signal_multicast ? device->num_noc_mcast_txns(index) * sizeof(uint32_t) : 0;
-        go_signal_cmd_size += desc.num_traced_programs_needing_go_signal_unicast ? device->num_noc_unicast_txns(index) * sizeof(uint32_t) : 0;
+        go_signal_cmd_size += device->noc_mcast_unicast_data(
+                                        SubDeviceId{index},
+                                        desc.num_traced_programs_needing_go_signal_multicast,
+                                        desc.num_traced_programs_needing_go_signal_unicast)
+                                        .size() * sizeof(uint32_t);
         go_signals_cmd_size += align(go_signal_cmd_size, pcie_alignment);
     }
     uint32_t cmd_sequence_sizeB =
@@ -1784,8 +1787,8 @@ void EnqueueTraceCommand::process() {
     DispatcherSelect dispatcher_for_go_signal = DispatcherSelect::DISPATCH_MASTER;
     if (this->device->dispatch_s_enabled()) {
         uint16_t index_bitmask = 0;
-        for (const auto &i : descriptor->sub_device_ids) {
-            index_bitmask |= 1 << i;
+        for (const auto &id : descriptor->sub_device_ids) {
+            index_bitmask |= 1 << id.to_index();
         }
         command_sequence.add_notify_dispatch_s_go_signal_cmd(false, index_bitmask);
         dispatcher_for_go_signal = DispatcherSelect::DISPATCH_SLAVE;
@@ -1797,40 +1800,40 @@ void EnqueueTraceCommand::process() {
     reset_launch_message_read_ptr_go_signal.signal = RUN_MSG_RESET_READ_PTR;
     reset_launch_message_read_ptr_go_signal.master_x = (uint8_t)this->dispatch_core.x;
     reset_launch_message_read_ptr_go_signal.master_y = (uint8_t)this->dispatch_core.y;
-    for (const auto& [index, desc] : descriptor->descriptors) {
-        const auto& num_noc_mcast_txns = desc.num_traced_programs_needing_go_signal_multicast ? device->num_noc_mcast_txns(index) : 0;
-        const auto& num_noc_unicast_txns = desc.num_traced_programs_needing_go_signal_unicast ? device->num_noc_unicast_txns(index) : 0;
-        reset_launch_message_read_ptr_go_signal.dispatch_message_offset = (uint8_t)dispatch_constants::get(dispatch_core_type).get_dispatch_message_offset(index);
-        uint32_t dispatch_message_addr = dispatch_message_base_addr + dispatch_constants::get(dispatch_core_type).get_dispatch_message_offset(index);
+    for (const auto& [id, desc] : descriptor->descriptors) {
+        const auto& num_noc_mcast_txns = desc.num_traced_programs_needing_go_signal_multicast ? device->num_noc_mcast_txns(id) : 0;
+        const auto& num_noc_unicast_txns = desc.num_traced_programs_needing_go_signal_unicast ? device->num_noc_unicast_txns(id) : 0;
+        reset_launch_message_read_ptr_go_signal.dispatch_message_offset = (uint8_t)dispatch_constants::get(dispatch_core_type).get_dispatch_message_offset(id.to_index());
+        uint32_t dispatch_message_addr = dispatch_message_base_addr + dispatch_constants::get(dispatch_core_type).get_dispatch_message_offset(id.to_index());
         // Wait to ensure that all kernels have completed. Then send the reset_rd_ptr go_signal.
         command_sequence.add_dispatch_go_signal_mcast(
-            this->expected_num_workers_completed[index],
+            this->expected_num_workers_completed[id.to_index()],
             *reinterpret_cast<uint32_t*>(&reset_launch_message_read_ptr_go_signal),
             dispatch_message_addr,
             num_noc_mcast_txns,
             num_noc_unicast_txns,
-            device->noc_mcast_unicast_data(index, desc.num_traced_programs_needing_go_signal_multicast, desc.num_traced_programs_needing_go_signal_unicast),
+            device->noc_mcast_unicast_data(id, desc.num_traced_programs_needing_go_signal_multicast, desc.num_traced_programs_needing_go_signal_unicast),
             dispatcher_for_go_signal);
         if (desc.num_traced_programs_needing_go_signal_multicast) {
-            this->expected_num_workers_completed[index] += device->num_worker_cores(HalProgrammableCoreType::TENSIX, index);
+            this->expected_num_workers_completed[id.to_index()] += device->num_worker_cores(HalProgrammableCoreType::TENSIX, id);
         }
         if (desc.num_traced_programs_needing_go_signal_unicast) {
-            this->expected_num_workers_completed[index] += device->num_worker_cores(HalProgrammableCoreType::ACTIVE_ETH, index);
+            this->expected_num_workers_completed[id.to_index()] += device->num_worker_cores(HalProgrammableCoreType::ACTIVE_ETH, id);
         }
     }
     // Wait to ensure that all workers have reset their read_ptr. dispatch_d will stall until all workers have completed this step, before sending kernel config data to workers
     // or notifying dispatch_s that its safe to send the go_signal.
     // Clear the dispatch <--> worker semaphore, since trace starts at 0.
-    for (const auto &index : descriptor->sub_device_ids) {
-        uint32_t dispatch_message_addr = dispatch_message_base_addr + dispatch_constants::get(dispatch_core_type).get_dispatch_message_offset(index);
+    for (const auto &id : descriptor->sub_device_ids) {
+        uint32_t dispatch_message_addr = dispatch_message_base_addr + dispatch_constants::get(dispatch_core_type).get_dispatch_message_offset(id.to_index());
         if (this->device->distributed_dispatcher()) {
             command_sequence.add_dispatch_wait(
-                false, dispatch_message_addr, this->expected_num_workers_completed[index], this->clear_count, false, true, 1);
+                false, dispatch_message_addr, this->expected_num_workers_completed[id.to_index()], this->clear_count, false, true, 1);
         }
         command_sequence.add_dispatch_wait(
-            false, dispatch_message_addr, this->expected_num_workers_completed[index], this->clear_count);
+            false, dispatch_message_addr, this->expected_num_workers_completed[id.to_index()], this->clear_count);
         if (this->clear_count) {
-            this->expected_num_workers_completed[index] = 0;
+            this->expected_num_workers_completed[id.to_index()] = 0;
         }
     }
 
@@ -1956,13 +1959,13 @@ void HWCommandQueue::set_num_worker_sems_on_dispatch(uint32_t num_worker_sems) {
 }
 
 void HWCommandQueue::reset_worker_state(bool reset_launch_msg_state) {
-    uint32_t num_sub_devices = device->num_sub_devices();
+    auto num_sub_devices = device->num_sub_devices();
     uint32_t go_signals_cmd_size = 0;
     if (reset_launch_msg_state) {
         uint32_t pcie_alignment = hal.get_alignment(HalMemType::HOST);
-        for (uint32_t i = 0; i < num_sub_devices; ++i) {
+        for (uint8_t i = 0; i < num_sub_devices; ++i) {
             uint32_t go_signal_cmd_size = sizeof(CQPrefetchCmd) + sizeof(CQDispatchCmd);
-            go_signal_cmd_size += device->num_noc_mcast_txns(i) * sizeof(uint32_t) + device->num_noc_unicast_txns(i) * sizeof(uint32_t);
+            go_signal_cmd_size += device->noc_mcast_unicast_data(SubDeviceId{i}).size() * sizeof(uint32_t);
             go_signals_cmd_size += align(go_signal_cmd_size, pcie_alignment);
         }
     }
@@ -1984,7 +1987,7 @@ void HWCommandQueue::reset_worker_state(bool reset_launch_msg_state) {
     if (reset_launch_msg_state) {
         if (device->dispatch_s_enabled()) {
             uint16_t index_bitmask = 0;
-            for (uint32_t i = 0; i < num_sub_devices; ++i) {
+            for (uint8_t i = 0; i < num_sub_devices; ++i) {
                 index_bitmask |= 1 << i;
             }
             command_sequence.add_notify_dispatch_s_go_signal_cmd(false, index_bitmask);
@@ -1994,13 +1997,13 @@ void HWCommandQueue::reset_worker_state(bool reset_launch_msg_state) {
         reset_launch_message_read_ptr_go_signal.signal = RUN_MSG_RESET_READ_PTR;
         reset_launch_message_read_ptr_go_signal.master_x = (uint8_t)this->physical_enqueue_program_dispatch_core.x;
         reset_launch_message_read_ptr_go_signal.master_y = (uint8_t)this->physical_enqueue_program_dispatch_core.y;
-        for (uint32_t i = 0; i < num_sub_devices; ++i) {
+        for (uint8_t i = 0; i < num_sub_devices; ++i) {
             reset_launch_message_read_ptr_go_signal.dispatch_message_offset = (uint8_t)dispatch_constants::get(dispatch_core_type).get_dispatch_message_offset(i);
             uint32_t dispatch_message_addr = dispatch_message_base_addr + dispatch_constants::get(dispatch_core_type).get_dispatch_message_offset(i);
             // Wait to ensure that all kernels have completed. Then send the reset_rd_ptr go_signal.
-            command_sequence.add_dispatch_go_signal_mcast(expected_num_workers_completed[i], *reinterpret_cast<uint32_t*>(&reset_launch_message_read_ptr_go_signal), dispatch_message_addr, device->num_noc_mcast_txns(i), device->num_noc_unicast_txns(i), device->noc_mcast_unicast_data(i), dispatcher_for_go_signal);
-            expected_num_workers_completed[i] += device->num_worker_cores(HalProgrammableCoreType::TENSIX, i);
-            expected_num_workers_completed[i] += device->num_worker_cores(HalProgrammableCoreType::ACTIVE_ETH, i);
+            command_sequence.add_dispatch_go_signal_mcast(expected_num_workers_completed[i], *reinterpret_cast<uint32_t*>(&reset_launch_message_read_ptr_go_signal), dispatch_message_addr, device->num_noc_mcast_txns({i}), device->num_noc_unicast_txns({i}), device->noc_mcast_unicast_data({i}), dispatcher_for_go_signal);
+            expected_num_workers_completed[i] += device->num_worker_cores(HalProgrammableCoreType::TENSIX, {i});
+            expected_num_workers_completed[i] += device->num_worker_cores(HalProgrammableCoreType::ACTIVE_ETH, {i});
         }
     }
     // Wait to ensure that all workers have reset their read_ptr. dispatch_d will stall until all workers have completed this step, before sending kernel config data to workers
@@ -2061,20 +2064,20 @@ void HWCommandQueue::set_exit_condition() {
 }
 
 template <typename T>
-void HWCommandQueue::enqueue_command(T& command, bool blocking, tt::stl::Span<const uint32_t> sub_device_ids) {
+void HWCommandQueue::enqueue_command(T& command, bool blocking, tt::stl::Span<const SubDeviceId> sub_device_ids) {
     command.process();
     if (blocking) {
         this->finish(sub_device_ids);
     }
 }
 
-void HWCommandQueue::enqueue_read_buffer(std::shared_ptr<Buffer>& buffer, void* dst, bool blocking, tt::stl::Span<const uint32_t> sub_device_ids) {
+void HWCommandQueue::enqueue_read_buffer(std::shared_ptr<Buffer>& buffer, void* dst, bool blocking, tt::stl::Span<const SubDeviceId> sub_device_ids) {
     this->enqueue_read_buffer(*buffer, dst, blocking, sub_device_ids);
 }
 
 // Read buffer command is enqueued in the issue region and device writes requested buffer data into the completion
 // region
-void HWCommandQueue::enqueue_read_buffer(Buffer& buffer, void* dst, bool blocking, tt::stl::Span<const uint32_t> sub_device_ids) {
+void HWCommandQueue::enqueue_read_buffer(Buffer& buffer, void* dst, bool blocking, tt::stl::Span<const SubDeviceId> sub_device_ids) {
     ZoneScopedN("HWCommandQueue_read_buffer");
     TT_FATAL(!this->manager.get_bypass_mode(), "Enqueue Read Buffer cannot be used with tracing");
 
@@ -2185,7 +2188,7 @@ void HWCommandQueue::enqueue_read_buffer(Buffer& buffer, void* dst, bool blockin
 }
 
 void HWCommandQueue::enqueue_write_buffer(
-    std::variant<std::reference_wrapper<Buffer>, std::shared_ptr<Buffer>> buffer, HostDataType src, bool blocking, tt::stl::Span<const uint32_t> sub_device_ids) {
+    std::variant<std::reference_wrapper<Buffer>, std::shared_ptr<Buffer>> buffer, HostDataType src, bool blocking, tt::stl::Span<const SubDeviceId> sub_device_ids) {
     // Top level API to accept different variants for buffer and src
     // For shared pointer variants, object lifetime is guaranteed at least till the end of this function
     auto data = std::visit([&](auto&& data) -> const void* {
@@ -2211,7 +2214,7 @@ CoreType HWCommandQueue::get_dispatch_core_type() {
     return dispatch_core_manager::instance().get_dispatch_core_type(device->id());
 }
 
-void HWCommandQueue::enqueue_write_buffer(Buffer& buffer, const void* src, bool blocking, tt::stl::Span<const uint32_t> sub_device_ids) {
+void HWCommandQueue::enqueue_write_buffer(Buffer& buffer, const void* src, bool blocking, tt::stl::Span<const SubDeviceId> sub_device_ids) {
     ZoneScopedN("HWCommandQueue_write_buffer");
     TT_FATAL(!this->manager.get_bypass_mode(), "Enqueue Write Buffer cannot be used with tracing");
 
@@ -2405,7 +2408,7 @@ void HWCommandQueue::enqueue_write_buffer(Buffer& buffer, const void* src, bool
 
 void HWCommandQueue::enqueue_program(Program& program, bool blocking) {
     ZoneScopedN("HWCommandQueue_enqueue_program");
-    std::vector<uint32_t> sub_device_ids = {program.determine_sub_device_ids(device)};
+    std::vector<SubDeviceId> sub_device_ids = {program.determine_sub_device_ids(device)};
     TT_FATAL(sub_device_ids.size() == 1, "Programs must be executed on a single sub-device");
     if (not program.is_finalized()) {
         program.finalize(device);
@@ -2435,7 +2438,7 @@ void HWCommandQueue::enqueue_program(Program& program, bool blocking) {
 
     // Snapshot of expected workers from previous programs, used for dispatch_wait cmd generation.
     uint32_t expected_workers_completed = this->manager.get_bypass_mode() ? this->trace_ctx->descriptors[sub_device_id].num_completion_worker_cores
-                                                                          : this->expected_num_workers_completed[sub_device_id];
+                                                                          : this->expected_num_workers_completed[sub_device_id.to_index()];
     if (this->manager.get_bypass_mode()) {
         if (program.runs_on_noc_multicast_only_cores()) {
             this->trace_ctx->descriptors[sub_device_id].num_traced_programs_needing_go_signal_multicast++;
@@ -2447,13 +2450,14 @@ void HWCommandQueue::enqueue_program(Program& program, bool blocking) {
         }
     } else {
         if (program.runs_on_noc_multicast_only_cores()) {
-            this->expected_num_workers_completed[sub_device_id] += device->num_worker_cores(HalProgrammableCoreType::TENSIX,sub_device_id);
+            this->expected_num_workers_completed[sub_device_id.to_index()] += device->num_worker_cores(HalProgrammableCoreType::TENSIX, sub_device_id);
         }
         if (program.runs_on_noc_unicast_only_cores()) {
-            this->expected_num_workers_completed[sub_device_id] += device->num_worker_cores(HalProgrammableCoreType::ACTIVE_ETH, sub_device_id);
+            this->expected_num_workers_completed[sub_device_id.to_index()] += device->num_worker_cores(HalProgrammableCoreType::ACTIVE_ETH, sub_device_id);
         }
     }
 
+    auto &worker_launch_message_buffer_state = this->device->get_worker_launch_message_buffer_state(sub_device_id);
     auto command = EnqueueProgramCommand(
         this->id,
         this->device,
@@ -2461,18 +2465,18 @@ void HWCommandQueue::enqueue_program(Program& program, bool blocking) {
         program,
         this->physical_enqueue_program_dispatch_core,
         this->manager,
-        this->config_buffer_mgr[sub_device_id],
+        this->get_config_buffer_mgr(sub_device_id),
         expected_workers_completed,
         // The assembled program command will encode the location of the launch messages in the ring buffer
-        this->device->worker_launch_message_buffer_state[sub_device_id].get_mcast_wptr(),
-        this->device->worker_launch_message_buffer_state[sub_device_id].get_unicast_wptr(),
+        worker_launch_message_buffer_state.get_mcast_wptr(),
+        worker_launch_message_buffer_state.get_unicast_wptr(),
         sub_device_id);
     // Update wptrs for tensix and eth launch message in the device class
     if (program.runs_on_noc_multicast_only_cores()) {
-        this->device->worker_launch_message_buffer_state[sub_device_id].inc_mcast_wptr(1);
+        worker_launch_message_buffer_state.inc_mcast_wptr(1);
     }
     if (program.runs_on_noc_unicast_only_cores()) {
-        this->device->worker_launch_message_buffer_state[sub_device_id].inc_unicast_wptr(1);
+        worker_launch_message_buffer_state.inc_unicast_wptr(1);
     }
     this->enqueue_command(command, blocking, sub_device_ids);
 
@@ -2497,7 +2501,7 @@ void HWCommandQueue::enqueue_program(Program& program, bool blocking) {
         expected_workers_completed);
 }
 
-void HWCommandQueue::enqueue_record_event(const std::shared_ptr<Event>& event, bool clear_count, tt::stl::Span<const uint32_t> sub_device_ids) {
+void HWCommandQueue::enqueue_record_event(const std::shared_ptr<Event>& event, bool clear_count, tt::stl::Span<const SubDeviceId> sub_device_ids) {
     ZoneScopedN("HWCommandQueue_enqueue_record_event");
 
     TT_FATAL(!this->manager.get_bypass_mode(), "Enqueue Record Event cannot be used with tracing");
@@ -2553,23 +2557,24 @@ void HWCommandQueue::enqueue_trace(const uint32_t trace_id, bool blocking) {
 
     this->enqueue_command(command, false, {});
 
-    for (const auto& [index, desc]: trace_inst->desc->descriptors) {
+    for (const auto& [id, desc]: trace_inst->desc->descriptors) {
          // Increment the expected worker cores counter due to trace programs completion
-        this->expected_num_workers_completed[index] += desc.num_completion_worker_cores;
+        this->expected_num_workers_completed[id.to_index()] += desc.num_completion_worker_cores;
         // After trace runs, the rdptr on each worker will be incremented by the number of programs in the trace
         // Update the wptr on host to match state. If the trace doesn't execute on a
         // class of worker (unicast or multicast), it doesn't reset or modify the
         // state for those workers.
+        auto &worker_launch_message_buffer_state = this->device->get_worker_launch_message_buffer_state(id);
         if (desc.num_traced_programs_needing_go_signal_multicast) {
-            this->device->worker_launch_message_buffer_state[index].set_mcast_wptr(desc.num_traced_programs_needing_go_signal_multicast);
+            worker_launch_message_buffer_state.set_mcast_wptr(desc.num_traced_programs_needing_go_signal_multicast);
         }
         if (desc.num_traced_programs_needing_go_signal_unicast) {
-            this->device->worker_launch_message_buffer_state[index].set_unicast_wptr(desc.num_traced_programs_needing_go_signal_unicast);
+            worker_launch_message_buffer_state.set_unicast_wptr(desc.num_traced_programs_needing_go_signal_unicast);
         }
         // The config buffer manager is unaware of what memory is used inside the trace, so mark all memory as used so that
         // it will force a stall and avoid stomping on in-use state.
         // TODO(jbauman): Reuse old state from the trace.
-        this->config_buffer_mgr[index].mark_completely_full(this->expected_num_workers_completed[index]);
+        this->config_buffer_mgr[id.to_index()].mark_completely_full(this->expected_num_workers_completed[id.to_index()]);
     }
     if (blocking) {
         this->finish(trace_inst->desc->sub_device_ids);
@@ -2833,7 +2838,7 @@ void HWCommandQueue::read_completion_queue() {
     }
 }
 
-void HWCommandQueue::finish(tt::stl::Span<const uint32_t> sub_device_ids) {
+void HWCommandQueue::finish(tt::stl::Span<const SubDeviceId> sub_device_ids) {
     ZoneScopedN("HWCommandQueue_finish");
     tt::log_debug(tt::LogDispatch, "Finish for command queue {}", this->id);
     std::shared_ptr<Event> event = std::make_shared<Event>();
@@ -2900,11 +2905,12 @@ void HWCommandQueue::record_begin(const uint32_t tid, std::shared_ptr<detail::Tr
     this->trace_ctx = ctx;
     // Record original value of launch msg wptr
     for (uint32_t i = 0; i < num_sub_devices; ++i) {
-        this->multicast_cores_launch_message_wptr_reset[i] = this->device->worker_launch_message_buffer_state[i].get_mcast_wptr();
-        this->unicast_cores_launch_message_wptr_reset[i] = this->device->worker_launch_message_buffer_state[i].get_unicast_wptr();
+        auto &worker_launch_message_buffer_state = this->device->get_worker_launch_message_buffer_state(SubDeviceId{i});
+        this->multicast_cores_launch_message_wptr_reset[i] = worker_launch_message_buffer_state.get_mcast_wptr();
+        this->unicast_cores_launch_message_wptr_reset[i] = worker_launch_message_buffer_state.get_unicast_wptr();
         // Set launch msg wptr to 0. Every time trace runs on device, it will ensure that the workers
         // reset their rptr to be in sync with device.
-        this->device->worker_launch_message_buffer_state[i].reset();
+        worker_launch_message_buffer_state.reset();
     }
     this->manager.set_bypass_mode(true, true);  // start
     for (uint32_t i = 0; i < num_sub_devices; ++i) {
@@ -2928,16 +2934,17 @@ void HWCommandQueue::record_end() {
     // was captured. This is needed since trace capture modifies the wptr state on host, even though device
     // doesn't run any programs.
     for (uint32_t i = 0; i < num_sub_devices; ++i) {
-        this->device->worker_launch_message_buffer_state[i].set_mcast_wptr(this->multicast_cores_launch_message_wptr_reset[i]);
-        this->device->worker_launch_message_buffer_state[i].set_unicast_wptr(this->unicast_cores_launch_message_wptr_reset[i]);
+        auto &worker_launch_message_buffer_state = this->device->get_worker_launch_message_buffer_state(SubDeviceId{i});
+        worker_launch_message_buffer_state.set_mcast_wptr(this->multicast_cores_launch_message_wptr_reset[i]);
+        worker_launch_message_buffer_state.set_unicast_wptr(this->unicast_cores_launch_message_wptr_reset[i]);
     }
     // Copy the desc keys into a separate vector. When enqueuing traces, we sometimes need to pass sub-device ids separately
     this->trace_ctx->sub_device_ids.reserve(this->trace_ctx->descriptors.size());
-    for (const auto& [index, _]: this->trace_ctx->descriptors) {
-        this->trace_ctx->sub_device_ids.push_back(index);
+    for (const auto& [id, _]: this->trace_ctx->descriptors) {
+        this->trace_ctx->sub_device_ids.push_back(id);
         // config_buffer_mgr reflects the state inside the trace, not on the current device, so reset it.
         // TODO(jbauman): Use a temporary WorkingBufferSetMgr when recording a trace.
-        this->config_buffer_mgr[index].mark_completely_full(this->expected_num_workers_completed[index]);
+        this->get_config_buffer_mgr(id).mark_completely_full(this->expected_num_workers_completed[id.to_index()]);
     }
     this->tid = std::nullopt;
     this->trace_ctx = nullptr;
@@ -2952,10 +2959,10 @@ void HWCommandQueue::terminate() {
     this->enqueue_command(command, false, {});
 }
 
-WorkerConfigBufferMgr& HWCommandQueue::get_config_buffer_mgr(uint32_t index) { return config_buffer_mgr[index]; }
+WorkerConfigBufferMgr& HWCommandQueue::get_config_buffer_mgr(SubDeviceId sub_device_id) { return config_buffer_mgr[sub_device_id.to_index()]; }
 
-void HWCommandQueue::reset_config_buffer_mgr(const uint32_t max_index) {
-    for (uint32_t i = 0; i < max_index; ++i) {
+void HWCommandQueue::reset_config_buffer_mgr(const uint32_t num_entries) {
+    for (uint32_t i = 0; i < num_entries; ++i) {
         this->config_buffer_mgr[i] = WorkerConfigBufferMgr();
         for (uint32_t index = 0; index < tt::tt_metal::hal.get_programmable_core_type_count(); index++) {
             this->config_buffer_mgr[i].init_add_buffer(
@@ -2970,7 +2977,7 @@ void HWCommandQueue::reset_config_buffer_mgr(const uint32_t max_index) {
     }
 }
 
-std::vector<std::pair<uint32_t, uint32_t>> HWCommandQueue::get_expected_workers_completed(tt::stl::Span<const uint32_t> sub_device_ids) const {
+std::vector<std::pair<uint32_t, uint32_t>> HWCommandQueue::get_expected_workers_completed(tt::stl::Span<const SubDeviceId> sub_device_ids) const {
     std::vector<std::pair<uint32_t, uint32_t>> expected_workers_completed;
     if (sub_device_ids.empty()) {
         expected_workers_completed.reserve(this->device->num_sub_devices());
@@ -2981,8 +2988,8 @@ std::vector<std::pair<uint32_t, uint32_t>> HWCommandQueue::get_expected_workers_
         expected_workers_completed.reserve(sub_device_ids.size());
         for (uint32_t i = 0; i < sub_device_ids.size(); ++i) {
             auto sub_device_id = sub_device_ids[i];
-            TT_FATAL(sub_device_id < this->device->num_sub_devices(), "Invalid sub_device_id: {}", sub_device_id);
-            expected_workers_completed.emplace_back(sub_device_id, this->expected_num_workers_completed[sub_device_id]);
+            TT_FATAL(sub_device_id.to_index() < this->device->num_sub_devices(), "Invalid sub_device_id: {}", sub_device_id.to_index());
+            expected_workers_completed.emplace_back(sub_device_id.to_index(), this->expected_num_workers_completed[sub_device_id.to_index()]);
         }
     }
     return expected_workers_completed;
@@ -3063,7 +3070,7 @@ void EnqueueReadBuffer(
     std::variant<std::reference_wrapper<Buffer>, std::shared_ptr<Buffer>> buffer,
     std::vector<uint32_t>& dst,
     bool blocking,
-    tt::stl::Span<const uint32_t> sub_device_ids) {
+    tt::stl::Span<const SubDeviceId> sub_device_ids) {
     // TODO(agrebenisan): Move to deprecated
     ZoneScoped;
     tt_metal::detail::DispatchStateCheck(true);
@@ -3094,7 +3101,7 @@ void EnqueueWriteBuffer(
     std::variant<std::reference_wrapper<Buffer>, std::shared_ptr<Buffer>> buffer,
     std::vector<uint32_t>& src,
     bool blocking,
-    tt::stl::Span<const uint32_t> sub_device_ids) {
+    tt::stl::Span<const SubDeviceId> sub_device_ids) {
     // TODO(agrebenisan): Move to deprecated
     EnqueueWriteBuffer(cq, buffer, src.data(), blocking, sub_device_ids);
 }
@@ -3104,7 +3111,7 @@ void EnqueueReadBuffer(
     std::variant<std::reference_wrapper<Buffer>, std::shared_ptr<Buffer>> buffer,
     void* dst,
     bool blocking,
-    tt::stl::Span<const uint32_t> sub_device_ids) {
+    tt::stl::Span<const SubDeviceId> sub_device_ids) {
     detail::DispatchStateCheck(true);
     cq.run_command(CommandInterface{
         .type = EnqueueCommandType::ENQUEUE_READ_BUFFER, .blocking = blocking, .buffer = buffer, .dst = dst, .sub_device_ids = sub_device_ids});
@@ -3115,7 +3122,7 @@ void EnqueueWriteBuffer(
     std::variant<std::reference_wrapper<Buffer>, std::shared_ptr<Buffer>> buffer,
     HostDataType src,
     bool blocking,
-    tt::stl::Span<const uint32_t> sub_device_ids) {
+    tt::stl::Span<const SubDeviceId> sub_device_ids) {
     detail::DispatchStateCheck(true);
     cq.run_command(CommandInterface{
         .type = EnqueueCommandType::ENQUEUE_WRITE_BUFFER, .blocking = blocking, .buffer = buffer, .src = src, .sub_device_ids = sub_device_ids});
@@ -3128,7 +3135,7 @@ void EnqueueProgram(
         CommandInterface{.type = EnqueueCommandType::ENQUEUE_PROGRAM, .blocking = blocking, .program = &program});
 }
 
-void EnqueueRecordEvent(CommandQueue& cq, const std::shared_ptr<Event>& event, tt::stl::Span<const uint32_t> sub_device_ids) {
+void EnqueueRecordEvent(CommandQueue& cq, const std::shared_ptr<Event>& event, tt::stl::Span<const SubDeviceId> sub_device_ids) {
     detail::DispatchStateCheck(true);
     cq.run_command(CommandInterface{
         .type = EnqueueCommandType::ENQUEUE_RECORD_EVENT,
@@ -3183,7 +3190,7 @@ bool EventQuery(const std::shared_ptr<Event>& event) {
     return event_completed;
 }
 
-void Finish(CommandQueue& cq, tt::stl::Span<const uint32_t> sub_device_ids) {
+void Finish(CommandQueue& cq, tt::stl::Span<const SubDeviceId> sub_device_ids) {
     detail::DispatchStateCheck(true);
     cq.run_command(CommandInterface{.type = EnqueueCommandType::FINISH, .blocking = true, .sub_device_ids = sub_device_ids});
     TT_ASSERT(
@@ -3209,7 +3216,7 @@ void EnqueueReadBufferImpl(
     std::variant<std::reference_wrapper<Buffer>, std::shared_ptr<Buffer>> buffer,
     void* dst,
     bool blocking,
-    tt::stl::Span<const uint32_t> sub_device_ids) {
+    tt::stl::Span<const SubDeviceId> sub_device_ids) {
     std::visit(
         [&](auto&& b) {
             using T = std::decay_t<decltype(b)>;
@@ -3226,7 +3233,7 @@ void EnqueueWriteBufferImpl(
     std::variant<std::reference_wrapper<Buffer>, std::shared_ptr<Buffer>> buffer,
     HostDataType src,
     bool blocking,
-    tt::stl::Span<const uint32_t> sub_device_ids) {
+    tt::stl::Span<const SubDeviceId> sub_device_ids) {
     cq.hw_command_queue().enqueue_write_buffer(buffer, src, blocking, sub_device_ids);
 }
 
@@ -3262,7 +3269,7 @@ void EnqueueWaitForEventImpl(CommandQueue& cq, const std::shared_ptr<Event>& eve
     cq.hw_command_queue().enqueue_wait_for_event(event);
 }
 
-void FinishImpl(CommandQueue& cq, tt::stl::Span<const uint32_t> sub_device_ids) { cq.hw_command_queue().finish(sub_device_ids); }
+void FinishImpl(CommandQueue& cq, tt::stl::Span<const SubDeviceId> sub_device_ids) { cq.hw_command_queue().finish(sub_device_ids); }
 
 void EnqueueTraceImpl(CommandQueue& cq, uint32_t trace_id, bool blocking) {
     cq.hw_command_queue().enqueue_trace(trace_id, blocking);
diff --git a/tt_metal/impl/dispatch/command_queue.hpp b/tt_metal/impl/dispatch/command_queue.hpp
index 4db17e205ad..8c471a93eef 100644
--- a/tt_metal/impl/dispatch/command_queue.hpp
+++ b/tt_metal/impl/dispatch/command_queue.hpp
@@ -303,7 +303,7 @@ class EnqueueProgramCommand : public Command {
     uint32_t multicast_cores_launch_message_wptr = 0;
     uint32_t unicast_cores_launch_message_wptr = 0;
     // TODO: There will be multiple ids once programs support spanning multiple sub_devices
-    uint32_t sub_device_id = 0;
+    SubDeviceId sub_device_id = SubDeviceId{0};
 
    public:
     EnqueueProgramCommand(
@@ -317,7 +317,7 @@ class EnqueueProgramCommand : public Command {
         uint32_t expected_num_workers_completed,
         uint32_t multicast_cores_launch_message_wptr,
         uint32_t unicast_cores_launch_message_wptr,
-        uint32_t sub_device_id);
+        SubDeviceId sub_device_id);
 
     void assemble_preamble_commands(
         ProgramCommandSequence& program_command_sequence, const tt::stl::Span<ConfigBufferEntry> kernel_config_addrs);
@@ -555,25 +555,25 @@ class HWCommandQueue {
 
     // sub_device_ids only needs to be passed when blocking and there are specific sub_devices to wait on
     template <typename T>
-    void enqueue_command(T& command, bool blocking, tt::stl::Span<const uint32_t> sub_device_ids);
+    void enqueue_command(T& command, bool blocking, tt::stl::Span<const SubDeviceId> sub_device_ids);
 
-    void enqueue_read_buffer(std::shared_ptr<Buffer>& buffer, void* dst, bool blocking, tt::stl::Span<const uint32_t> sub_device_ids);
-    void enqueue_read_buffer(Buffer& buffer, void* dst, bool blocking, tt::stl::Span<const uint32_t> sub_device_ids);
+    void enqueue_read_buffer(std::shared_ptr<Buffer>& buffer, void* dst, bool blocking, tt::stl::Span<const SubDeviceId> sub_device_ids);
+    void enqueue_read_buffer(Buffer& buffer, void* dst, bool blocking, tt::stl::Span<const SubDeviceId> sub_device_ids);
     void enqueue_write_buffer(
-        std::variant<std::reference_wrapper<Buffer>, std::shared_ptr<Buffer>> buffer, HostDataType src, bool blocking, tt::stl::Span<const uint32_t> sub_device_ids);
-    void enqueue_write_buffer(Buffer& buffer, const void* src, bool blocking, tt::stl::Span<const uint32_t> sub_device_ids);
+        std::variant<std::reference_wrapper<Buffer>, std::shared_ptr<Buffer>> buffer, HostDataType src, bool blocking, tt::stl::Span<const SubDeviceId> sub_device_ids);
+    void enqueue_write_buffer(Buffer& buffer, const void* src, bool blocking, tt::stl::Span<const SubDeviceId> sub_device_ids);
     void enqueue_program(Program& program, bool blocking);
-    void enqueue_record_event(const std::shared_ptr<Event>& event, bool clear_count = false, tt::stl::Span<const uint32_t> sub_device_ids = {});
+    void enqueue_record_event(const std::shared_ptr<Event>& event, bool clear_count = false, tt::stl::Span<const SubDeviceId> sub_device_ids = {});
     void enqueue_wait_for_event(const std::shared_ptr<Event>& sync_event, bool clear_count = false);
     void enqueue_trace(const uint32_t trace_id, bool blocking);
-    void finish(tt::stl::Span<const uint32_t> sub_device_ids);
+    void finish(tt::stl::Span<const SubDeviceId> sub_device_ids);
     void terminate();
     void increment_num_entries_in_completion_q();
     void set_exit_condition();
 
-    WorkerConfigBufferMgr& get_config_buffer_mgr(uint32_t index);
-    void reset_config_buffer_mgr(const uint32_t max_index);
-    std::vector<std::pair<uint32_t, uint32_t>> get_expected_workers_completed(tt::stl::Span<const uint32_t> sub_device_ids) const;
+    WorkerConfigBufferMgr& get_config_buffer_mgr(SubDeviceId sub_device_id);
+    void reset_config_buffer_mgr(const uint32_t num_entries);
+    std::vector<std::pair<uint32_t, uint32_t>> get_expected_workers_completed(tt::stl::Span<const SubDeviceId> sub_device_ids) const;
 
     friend void EnqueueTraceImpl(CommandQueue& cq, uint32_t trace_id, bool blocking);
     friend void EnqueueProgramImpl(
@@ -585,17 +585,17 @@ class HWCommandQueue {
         std::variant<std::reference_wrapper<Buffer>, std::shared_ptr<Buffer>> buffer,
         void* dst,
         bool blocking,
-        tt::stl::Span<const uint32_t> sub_device_ids);
+        tt::stl::Span<const SubDeviceId> sub_device_ids);
     friend void EnqueueWriteBufferImpl(
         CommandQueue& cq,
         std::variant<std::reference_wrapper<Buffer>, std::shared_ptr<Buffer>> buffer,
         HostDataType src,
         bool blocking,
-        tt::stl::Span<const uint32_t> sub_device_ids);
+        tt::stl::Span<const SubDeviceId> sub_device_ids);
     friend void EnqueueGetBufferAddrImpl(void* dst_buf_addr, const Buffer* buffer);
     friend void EnqueueRecordEventImpl(CommandQueue& cq, const std::shared_ptr<Event>& event, tt::stl::Span<const SubDeviceId> sub_device_ids);
     friend void EnqueueWaitForEventImpl(CommandQueue& cq, const std::shared_ptr<Event>& event);
-    friend void FinishImpl(CommandQueue& cq, tt::stl::Span<const uint32_t> sub_device_ids);
+    friend void FinishImpl(CommandQueue& cq, tt::stl::Span<const SubDeviceId> sub_device_ids);
     friend CommandQueue;
     friend Device;
     friend detail::Program_;
@@ -613,7 +613,7 @@ struct CommandInterface {
     std::optional<void*> dst;
     std::optional<std::shared_ptr<Event>> event;
     std::optional<uint32_t> trace_id;
-    tt::stl::Span<const uint32_t> sub_device_ids;
+    tt::stl::Span<const SubDeviceId> sub_device_ids;
 };
 
 inline namespace v0 {
diff --git a/tt_metal/impl/program/program.cpp b/tt_metal/impl/program/program.cpp
index c3e0d546579..23099f93548 100644
--- a/tt_metal/impl/program/program.cpp
+++ b/tt_metal/impl/program/program.cpp
@@ -140,7 +140,7 @@ class Program_ {
 
     ProgramConfig& get_program_config(uint32_t programmable_core_type_index);
 
-    const std::vector<uint32_t> &determine_sub_device_ids(const Device *device);
+    const std::vector<SubDeviceId> &determine_sub_device_ids(const Device *device);
 
     // debug/test
     uint32_t get_sem_base_addr(Device *device, CoreCoord logical_core, CoreType core_type);
@@ -164,8 +164,7 @@ class Program_ {
     bool finalized_;
     bool cached_;
 
-    // This will be turned into a map by SubDeviceManager handles once implemented
-    std::optional<std::vector<uint32_t>> sub_device_ids_;
+    std::unordered_map<SubDeviceManagerId, std::vector<SubDeviceId>> sub_device_ids_;
 
     struct CircularBufferAllocator {
         CircularBufferAllocator(const CoreRange &core_range_) : core_range(core_range_) {}
@@ -749,14 +748,8 @@ void Program::allocate_circular_buffers(const Device *device) { pimpl_->allocate
 void detail::Program_::validate_circular_buffer_region(const Device *device) {
     //ZoneScoped;
 
-    // Only pass sub_device_ids if sub-device manager is active
-    // Allocator is handled differently from other sub_device apis since the global allocator is always active
-    // State when there is no active manager is normally treated as having 1 sub_device, which is used to query state
-    // For allocator, we don't have a sub_device allocator when there is no active manager, only the global allocator
     // TODO: Circular buffer allocation and validation could be better optimized by determining usage per sub-device
-    constexpr bool active_sub_device_manager = false;
-    const auto &sub_device_ids = active_sub_device_manager ? this->determine_sub_device_ids(device) : std::vector<uint32_t>();
-    std::optional<DeviceAddr> lowest_address = device->lowest_occupied_compute_l1_address(sub_device_ids);
+    std::optional<DeviceAddr> lowest_address = device->lowest_occupied_compute_l1_address(this->determine_sub_device_ids(device));
     uint32_t max_l1_size = device->l1_size_per_core();
 
     for (const CircularBufferAllocator &cb_allocator : this->cb_allocators_) {
@@ -1301,22 +1294,47 @@ uint32_t& detail::Program_::get_program_config_size(uint32_t programmable_core_t
     return this->program_config_sizes_[programmable_core_type_index];
 }
 
-const std::vector<uint32_t> &detail::Program_::determine_sub_device_ids(const Device *device) {
+const std::vector<SubDeviceId> &detail::Program_::determine_sub_device_ids(const Device *device) {
     // We need to calculate the sub_device_id when we haven't compiled the program yet, or this is the first time we
     // are getting the sub_device_ids after compilation
-    if (this->compiled_.empty() || !this->sub_device_ids_.has_value()) {
-        if (std::getenv("TT_METAL_SLOW_DISPATCH_MODE") != nullptr) {
+    auto sub_device_manager_id = device->get_active_sub_device_manager_id();
+    auto sub_device_ids = this->sub_device_ids_.find(sub_device_manager_id);
+    if (this->compiled_.empty() || sub_device_ids == this->sub_device_ids_.end()) {
+        if (!this->compiled_.empty()) {
+            TT_FATAL(this->sub_device_ids_.empty(), "Multiple sub device managers are not currently supported for a single program");
+        }
+        if (std::getenv("TT_METAL_SLOW_DISPATCH_MODE") != nullptr || sub_device_manager_id == device->get_default_sub_device_manager_id()) {
             // No sub device manager, nothing to validate
-            this->sub_device_ids_ = {0};
+            auto [sub_device_ids, _] = this->sub_device_ids_.insert_or_assign(sub_device_manager_id, std::vector<SubDeviceId>{SubDeviceId{0}});
+            return sub_device_ids->second;
         } else {
-            // TODO: Add logic for determining which sub devices are used by the currently active configuration
-            // When program hasn't compiled, we will determine and return a value without caching the id inside program
-            // After program is compiled, the first time this is called we will compute and store the id.
-            // This makes subsequent calls faster, and is why this function is not const
-            this->sub_device_ids_ = {0};
+            std::unordered_set<SubDeviceId> used_sub_device_ids;
+            auto find_sub_device_ids = [&] (HalProgrammableCoreType core_type) {
+                const auto& program_kgs = this->get_kernel_groups(hal.get_programmable_core_type_index(core_type));
+                uint32_t num_intersections = 0;
+                uint32_t num_cores = 0;
+                for (const auto& kg : program_kgs) {
+                    for (uint8_t i = 0; i < device->num_sub_devices(); ++i) {
+                        const auto& sub_device_cores = device->worker_cores(core_type, SubDeviceId{i});
+                        auto intersection = sub_device_cores.intersection(kg.core_ranges);
+                        if (intersection.size() > 0) {
+                            used_sub_device_ids.insert(SubDeviceId{i});
+                            num_intersections += intersection.num_cores();
+                        }
+                    }
+                    num_cores += kg.core_ranges.num_cores();
+                }
+                TT_FATAL(num_intersections == num_cores,
+                         "Kernel group cores do not match sub device cores for programmable core type {}",
+                         magic_enum::enum_name(core_type));
+            };
+            find_sub_device_ids(HalProgrammableCoreType::TENSIX);
+            find_sub_device_ids(HalProgrammableCoreType::ACTIVE_ETH);
+            auto [sub_device_ids, _] = this->sub_device_ids_.insert_or_assign(sub_device_manager_id, std::vector<SubDeviceId>(used_sub_device_ids.begin(), used_sub_device_ids.end()));
+            return sub_device_ids->second;
         }
     }
-    return *this->sub_device_ids_;
+    return sub_device_ids->second;
 }
 
 void detail::Program_::finalize(Device *device) {
@@ -1378,7 +1396,7 @@ void detail::Program_::compile(Device *device, bool fd_bootloader_mode) {
     // Clear the determined sub_device_ids when we compile the program for the first time
     // This way, determine_sub_device_ids is forced to recalculate with the finalized information on the used cores
     if (compiled_.empty()) {
-        this->sub_device_ids_ = std::nullopt;
+        this->sub_device_ids_.erase(device->get_active_sub_device_manager_id());
     }
 
     TT_FATAL(
@@ -1655,7 +1673,7 @@ bool Program::is_finalized() const { return pimpl_->is_finalized(); }
 bool Program::is_cached() const { return pimpl_->is_cached(); }
 void Program::set_cached() { pimpl_->set_cached(); }
 
-const std::vector<uint32_t> & Program::determine_sub_device_ids(const Device *device) { return pimpl_->determine_sub_device_ids(device); }
+const std::vector<SubDeviceId> &Program::determine_sub_device_ids(const Device *device) { return pimpl_->determine_sub_device_ids(device); }
 
 const ProgramTransferInfo &Program::get_program_transfer_info() const noexcept { return pimpl_->program_transfer_info; }
 
diff --git a/tt_metal/impl/program/program.hpp b/tt_metal/impl/program/program.hpp
index 5c77856572b..53fe92d38bc 100644
--- a/tt_metal/impl/program/program.hpp
+++ b/tt_metal/impl/program/program.hpp
@@ -149,7 +149,7 @@ class Program {
     uint32_t get_cb_size(Device *device, CoreCoord logical_core, CoreType core_type) const;
     void set_last_used_command_queue_for_testing(HWCommandQueue *queue);
 
-    const std::vector<uint32_t> &determine_sub_device_ids(const Device *device);
+    const std::vector<SubDeviceId> &determine_sub_device_ids(const Device *device);
 
    private:
     std::unique_ptr<detail::Program_> pimpl_;
diff --git a/tt_metal/impl/sub_device/sub_device.cpp b/tt_metal/impl/sub_device/sub_device.cpp
new file mode 100644
index 00000000000..f61e4c99005
--- /dev/null
+++ b/tt_metal/impl/sub_device/sub_device.cpp
@@ -0,0 +1,55 @@
+// SPDX-FileCopyrightText: © 2024 Tenstorrent Inc.
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#include <algorithm>
+#include <array>
+#include <cstdint>
+
+#include "tt_metal/common/assert.hpp"
+#include "tt_metal/common/core_coord.hpp"
+#include "tt_metal/impl/sub_device/sub_device.hpp"
+#include "tt_metal/llrt/hal.hpp"
+#include "tt_metal/tt_stl/span.hpp"
+
+namespace tt::tt_metal {
+
+SubDevice::SubDevice(const std::array<CoreRangeSet, NumHalProgrammableCoreTypes>& cores) : cores_(cores) {
+    this->validate();
+}
+
+SubDevice::SubDevice(tt::stl::Span<const CoreRangeSet> cores) {
+    TT_FATAL(cores.size() <= this->cores_.size(), "Too many core types for SubDevice");
+    std::copy(cores.begin(), cores.end(), this->cores_.begin());
+    this->validate();
+}
+
+SubDevice::SubDevice(std::array<CoreRangeSet, NumHalProgrammableCoreTypes>&& cores) : cores_(std::move(cores)){
+    this->validate();
+}
+
+void SubDevice::validate() const {
+    auto num_core_types = hal.get_programmable_core_type_count();
+    for (uint32_t i = num_core_types; i < NumHalProgrammableCoreTypes; ++i) {
+        TT_FATAL(this->cores_[i].empty(), "CoreType {} is not allowed in SubDevice", static_cast<HalProgrammableCoreType>(i));
+    }
+    TT_FATAL(this->cores_[static_cast<uint32_t>(HalProgrammableCoreType::IDLE_ETH)].empty(), "CoreType IDLE_ETH is not allowed in SubDevice");
+}
+
+bool SubDevice::has_core_type(HalProgrammableCoreType core_type) const {
+    return !this->cores_[static_cast<uint32_t>(core_type)].empty();
+}
+
+uint32_t SubDevice::num_cores(HalProgrammableCoreType core_type) const {
+    return this->cores_[static_cast<uint32_t>(core_type)].num_cores();
+}
+
+const std::array<CoreRangeSet, NumHalProgrammableCoreTypes> &SubDevice::cores() const {
+    return this->cores_;
+}
+
+const CoreRangeSet &SubDevice::cores(HalProgrammableCoreType core_type) const {
+    return this->cores_[static_cast<uint32_t>(core_type)];
+}
+
+}  // namespace tt::tt_metal
diff --git a/tt_metal/impl/sub_device/sub_device.hpp b/tt_metal/impl/sub_device/sub_device.hpp
new file mode 100644
index 00000000000..451d3aa8d03
--- /dev/null
+++ b/tt_metal/impl/sub_device/sub_device.hpp
@@ -0,0 +1,45 @@
+// SPDX-FileCopyrightText: © 2024 Tenstorrent Inc.
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#pragma once
+
+#include <array>
+#include <cstdint>
+
+#include "tt_metal/common/core_coord.hpp"
+#include "tt_metal/llrt/hal.hpp"
+#include "tt_metal/tt_stl/span.hpp"
+
+namespace tt::tt_metal {
+
+inline namespace v0 {
+
+class SubDevice {
+   public:
+    SubDevice(const std::array<CoreRangeSet, NumHalProgrammableCoreTypes>& cores);
+    SubDevice(tt::stl::Span<const CoreRangeSet> cores);
+    SubDevice(std::array<CoreRangeSet, NumHalProgrammableCoreTypes>&& cores);
+
+    SubDevice(const SubDevice& sub_device) = default;
+    SubDevice& operator=(const SubDevice& sub_device) = default;
+
+    SubDevice(SubDevice&& sub_device) noexcept = default;
+    SubDevice& operator=(SubDevice&& sub_device) noexcept = default;
+
+    bool has_core_type(HalProgrammableCoreType core_type) const;
+    uint32_t num_cores(HalProgrammableCoreType core_type) const;
+    const std::array<CoreRangeSet, NumHalProgrammableCoreTypes>& cores() const;
+    const CoreRangeSet& cores(HalProgrammableCoreType core_type) const;
+
+   private:
+    void validate() const;
+
+    // These are logical coords from the original device grid
+    // There is no remapping of logical coords
+    std::array<CoreRangeSet, NumHalProgrammableCoreTypes> cores_;
+};
+
+}  // namespace v0
+
+}  // namespace tt::tt_metal
diff --git a/tt_metal/impl/sub_device/sub_device_manager.cpp b/tt_metal/impl/sub_device/sub_device_manager.cpp
new file mode 100644
index 00000000000..1e9092c26c7
--- /dev/null
+++ b/tt_metal/impl/sub_device/sub_device_manager.cpp
@@ -0,0 +1,305 @@
+// SPDX-FileCopyrightText: © 2024 Tenstorrent Inc.
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#include "tt_metal/impl/sub_device/sub_device_manager.hpp"
+
+#include "tt_metal/common/assert.hpp"
+#include "tt_metal/host_api.hpp"
+#include "tt_metal/impl/allocator/allocator.hpp"
+#include "tt_metal/impl/device/device.hpp"
+#include "tt_metal/impl/dispatch/command_queue_interface.hpp"
+#include "tt_metal/impl/kernels/data_types.hpp"
+#include "tt_metal/impl/sub_device/sub_device.hpp"
+#include "tt_metal/impl/sub_device/sub_device_types.hpp"
+#include "tt_metal/impl/trace/trace.hpp"
+#include "tt_metal/impl/trace/trace_buffer.hpp"
+#include "tt_metal/tt_stl/span.hpp"
+
+namespace tt::tt_metal {
+
+namespace detail {
+
+SubDeviceManager::SubDeviceManager(
+    tt::stl::Span<const SubDevice> sub_devices, DeviceAddr local_l1_size, Device *device) :
+    sub_devices_(sub_devices.begin(), sub_devices.end()),
+    local_l1_size_(align(local_l1_size, hal.get_alignment(HalMemType::L1))),
+    device_(device) {
+    TT_ASSERT(device != nullptr, "Device must not be null");
+    this->validate_sub_devices();
+    this->populate_num_cores();
+    this->populate_sub_allocators();
+    this->populate_noc_data();
+    this->populate_worker_launch_message_buffer_state();
+}
+
+SubDeviceManager::SubDeviceManager(Device *device, std::unique_ptr<Allocator> &&global_allocator) : device_(device) {
+    TT_ASSERT(device != nullptr, "Device must not be null");
+    this->local_l1_size_ = 0;
+    const auto& compute_grid_size = this->device_->compute_with_storage_grid_size();
+    const auto& active_eth_cores = this->device_->get_active_ethernet_cores(true);
+    std::vector<CoreRange> active_eth_core_ranges;
+    active_eth_core_ranges.reserve(active_eth_cores.size());
+    for (const auto& core : active_eth_cores) {
+        active_eth_core_ranges.emplace_back(core, core);
+    }
+
+    this->sub_devices_ = {SubDevice(std::array{
+        CoreRangeSet(CoreRange({0, 0}, {compute_grid_size.x - 1, compute_grid_size.y - 1})),
+        CoreRangeSet(std::move(active_eth_core_ranges))})};
+    // No need to validate sub-devices since this constructs a sub-device of the entire grid
+    this->populate_num_cores();
+    this->sub_device_allocators_.push_back(std::move(global_allocator));
+    this->populate_noc_data();
+    this->populate_worker_launch_message_buffer_state();
+}
+
+SubDeviceManager::~SubDeviceManager() {
+    for (const auto &allocator : this->sub_device_allocators_) {
+        if (allocator) {
+            // Clear the bank managers, this makes subsequent buffer deallocations fast
+            allocator::clear(*allocator);
+            // Deallocate all buffers
+            // This is done to set buffer object status to Deallocated
+            const auto &allocated_buffers = allocator::get_allocated_buffers(*allocator);
+            for (auto buf = allocated_buffers.begin(); buf != allocated_buffers.end();) {
+                tt::tt_metal::DeallocateBuffer(*(*(buf++)));
+            }
+        }
+    }
+}
+
+uint8_t SubDeviceManager::num_sub_devices() const { return this->sub_devices_.size(); }
+
+const SubDevice& SubDeviceManager::sub_device(SubDeviceId sub_device_id) const {
+    auto sub_device_index = this->get_sub_device_index(sub_device_id);
+    return sub_devices_[sub_device_index];
+}
+
+const vector_memcpy_aligned<uint32_t>& SubDeviceManager::noc_mcast_data(SubDeviceId sub_device_id) const {
+    auto sub_device_index = this->get_sub_device_index(sub_device_id);
+    return noc_mcast_data_[sub_device_index];
+}
+
+const vector_memcpy_aligned<uint32_t>& SubDeviceManager::noc_unicast_data(SubDeviceId sub_device_id) const {
+    auto sub_device_index = this->get_sub_device_index(sub_device_id);
+    return noc_unicast_data_[sub_device_index];
+}
+
+const vector_memcpy_aligned<uint32_t>& SubDeviceManager::noc_mcast_unicast_data(SubDeviceId sub_device_id) const {
+    auto sub_device_index = this->get_sub_device_index(sub_device_id);
+    return noc_mcast_unicast_data_[sub_device_index];
+}
+
+const std::unique_ptr<Allocator> &SubDeviceManager::get_initialized_allocator(SubDeviceId sub_device_id) const {
+    auto sub_device_index = this->get_sub_device_index(sub_device_id);
+    TT_FATAL(this->sub_device_allocators_[sub_device_index], "SubDevice allocator not initialized");
+    return this->sub_device_allocators_[sub_device_index];
+}
+
+std::unique_ptr<Allocator> &SubDeviceManager::sub_device_allocator(SubDeviceId sub_device_id) {
+    auto sub_device_index = this->get_sub_device_index(sub_device_id);
+    return this->sub_device_allocators_[sub_device_index];
+}
+
+std::shared_ptr<TraceBuffer> &SubDeviceManager::create_trace(uint32_t tid) {
+    auto [trace, emplaced] = this->trace_buffer_pool_.emplace(tid, Trace::create_empty_trace_buffer());
+    TT_ASSERT(emplaced, "Trace buffer with tid {} already exists", tid);
+    return trace->second;
+}
+
+void SubDeviceManager::release_trace(uint32_t tid) {
+    this->trace_buffer_pool_.erase(tid);
+}
+
+std::shared_ptr<TraceBuffer> SubDeviceManager::get_trace(uint32_t tid) {
+    auto trace = this->trace_buffer_pool_.find(tid);
+    if (trace != this->trace_buffer_pool_.end()) {
+        return trace->second;
+    }
+    return nullptr;
+}
+
+void SubDeviceManager::reset_worker_launch_message_buffer_state() {
+    std::for_each(this->worker_launch_message_buffer_state_.begin(), this->worker_launch_message_buffer_state_.end(), std::mem_fn(&LaunchMessageRingBufferState::reset));
+}
+
+LaunchMessageRingBufferState& SubDeviceManager::get_worker_launch_message_buffer_state(SubDeviceId sub_device_id) {
+    auto sub_device_index = this->get_sub_device_index(sub_device_id);
+    return this->worker_launch_message_buffer_state_[sub_device_index];
+}
+
+bool SubDeviceManager::has_allocations() const {
+    for (const auto& allocator : this->sub_device_allocators_) {
+        if (allocator && allocator->allocated_buffers.size() > 0) {
+            return true;
+        }
+    }
+    return false;
+}
+
+DeviceAddr SubDeviceManager::local_l1_size() const { return this->local_l1_size_; }
+
+uint8_t SubDeviceManager::get_sub_device_index(SubDeviceId sub_device_id) const {
+    auto sub_device_index = sub_device_id.to_index();
+    TT_FATAL(
+        sub_device_index < this->sub_devices_.size(),
+        "SubDevice index {} out of bounds {}",
+        sub_device_index,
+        this->sub_devices_.size());
+    return sub_device_index;
+}
+
+void SubDeviceManager::validate_sub_devices() const {
+    // Validate sub device cores fit inside the device grid
+    const auto& compute_grid_size = this->device_->compute_with_storage_grid_size();
+    CoreRange device_worker_cores = CoreRange({0, 0}, {compute_grid_size.x - 1, compute_grid_size.y - 1});
+    const auto& device_eth_cores = this->device_->get_active_ethernet_cores(true);
+    for (const auto& sub_device : this->sub_devices_) {
+        const auto& worker_cores = sub_device.cores(HalProgrammableCoreType::TENSIX);
+        TT_FATAL(
+            device_worker_cores.contains(worker_cores),
+            "Tensix cores {} specified in sub device must be within device grid {}",
+            worker_cores,
+            device_worker_cores);
+        const auto& eth_cores = sub_device.cores(HalProgrammableCoreType::ACTIVE_ETH);
+        uint32_t num_eth_cores = 0;
+        for (const auto& dev_eth_core : device_eth_cores) {
+            if (eth_cores.contains(dev_eth_core)) {
+                num_eth_cores++;
+            }
+        }
+        TT_FATAL(
+            num_eth_cores == eth_cores.num_cores(),
+            "Ethernet cores {} specified in sub device must be within device grid",
+            eth_cores);
+    }
+    if (this->sub_devices_.size() < 2) {
+        return;
+    }
+    // Validate no overlap of sub devices
+    for (uint32_t i = 0; i < this->sub_devices_.size(); ++i) {
+        for (uint32_t j = i + 1; j < this->sub_devices_.size(); ++j) {
+            for (uint32_t k = 0; k < NumHalProgrammableCoreTypes; ++k) {
+                TT_FATAL(
+                    !(this->sub_devices_[i].cores()[k].intersects(this->sub_devices_[j].cores()[k])),
+                    "SubDevices specified for SubDeviceManager intersect");
+            }
+        }
+    }
+}
+
+void SubDeviceManager::populate_num_cores() {
+    for (const auto& sub_device : this->sub_devices_) {
+        for (uint32_t i = 0; i < NumHalProgrammableCoreTypes; ++i) {
+            this->num_cores_[i] += sub_device.num_cores(static_cast<HalProgrammableCoreType>(i));
+        }
+    }
+}
+
+void SubDeviceManager::populate_sub_allocators() {
+    this->sub_device_allocators_.resize(this->num_sub_devices());
+    if (this->local_l1_size_ == 0) {
+        return;
+    }
+    const auto& global_allocator_config = this->device_->get_initialized_allocator()->config;
+    // Construct allocator config from soc_desc
+    // Take max alignment to satisfy NoC rd/wr constraints
+    // Tensix/Eth -> PCIe/DRAM src and dst addrs must be L1_ALIGNMENT aligned
+    // PCIe/DRAM -> Tensix/Eth src and dst addrs must be DRAM_ALIGNMENT aligned
+    // Tensix/Eth <-> Tensix/Eth src and dst addrs must be L1_ALIGNMENT aligned
+    for (uint32_t i = 0; i < this->num_sub_devices(); ++i) {
+        const auto& compute_cores = this->sub_devices_[i].cores(HalProgrammableCoreType::TENSIX);
+        if (compute_cores.empty()) {
+            continue;
+        }
+        AllocatorConfig config(
+            {.num_dram_channels = global_allocator_config.num_dram_channels,
+             .dram_bank_size = 0,
+             .dram_bank_offsets = global_allocator_config.dram_bank_offsets,
+             .dram_unreserved_base = global_allocator_config.dram_unreserved_base,
+             .l1_unreserved_base = global_allocator_config.l1_unreserved_base,
+             .worker_grid = compute_cores,
+             .worker_l1_size = global_allocator_config.l1_unreserved_base + this->local_l1_size_,
+             .storage_core_bank_size = std::nullopt,
+             .l1_small_size = 0,
+             .trace_region_size = 0,
+             .core_type_from_noc_coord_table = {},  // Populated later
+             .worker_log_to_physical_routing_x = global_allocator_config.worker_log_to_physical_routing_x,
+             .worker_log_to_physical_routing_y = global_allocator_config.worker_log_to_physical_routing_y,
+             .l1_bank_remap = {},
+             .compute_grid = compute_cores,
+             .alignment = global_allocator_config.alignment,
+             .disable_interleaved = true});
+        TT_FATAL(
+            config.l1_small_size < (config.storage_core_bank_size.has_value()
+                                        ? config.storage_core_bank_size.value()
+                                        : config.worker_l1_size - config.l1_unreserved_base),
+            "Reserved size must be less than bank size");
+        TT_FATAL(
+            config.l1_small_size % config.alignment == 0,
+            "Reserved size must be aligned to allocator alignment {}",
+            config.alignment);
+
+        // sub_devices only have compute cores for allocation
+        for (const CoreCoord& core : corerange_to_cores(compute_cores)) {
+            const auto noc_coord = this->device_->worker_core_from_logical_core(core);
+            config.core_type_from_noc_coord_table.insert({noc_coord, AllocCoreType::ComputeAndStore});
+        }
+
+        // L1_BANKING scheme creates 1 bank per DRAM core and splits up L1 such that there are power 2 num L1 banks
+        // This is the only allocator scheme supported because kernel APIs assume num L1 banks are power of 2
+        TT_ASSERT(this->device_->allocator_scheme_ == MemoryAllocator::L1_BANKING);
+        this->sub_device_allocators_[i] = std::make_unique<L1BankingAllocator>(config);
+    }
+}
+
+void SubDeviceManager::populate_noc_data() {
+    uint32_t num_sub_devices = this->num_sub_devices();
+    this->noc_mcast_data_.resize(num_sub_devices);
+    this->noc_unicast_data_.resize(num_sub_devices);
+    this->noc_mcast_unicast_data_.resize(num_sub_devices);
+
+    NOC noc_index = this->device_->dispatch_go_signal_noc();
+
+    for (uint32_t i = 0; i < num_sub_devices; ++i) {
+        const auto& tensix_cores = this->sub_devices_[i].cores(HalProgrammableCoreType::TENSIX);
+        const auto& eth_cores = this->sub_devices_[i].cores(HalProgrammableCoreType::ACTIVE_ETH);
+
+        uint32_t idx = 0;
+        auto& noc_mcast_data = this->noc_mcast_data_[i];
+        noc_mcast_data.resize(tensix_cores.size() * 2);
+        for (const auto& core_range : tensix_cores.ranges()) {
+            auto physical_start =
+                this->device_->physical_core_from_logical_core(core_range.start_coord, CoreType::WORKER);
+            auto physical_end = this->device_->physical_core_from_logical_core(core_range.end_coord, CoreType::WORKER);
+            auto physical_core_range = CoreRange(physical_start, physical_end);
+            noc_mcast_data[idx++] = this->device_->get_noc_multicast_encoding(noc_index, physical_core_range);
+            noc_mcast_data[idx++] = core_range.size();
+        }
+
+        idx = 0;
+        auto& noc_unicast_data = this->noc_unicast_data_[i];
+        for (const auto& core_range : eth_cores.ranges()) {
+            noc_unicast_data.resize(noc_unicast_data.size() + core_range.size());
+            for (const auto& core : core_range) {
+                auto physical_core = this->device_->physical_core_from_logical_core(core, CoreType::ETH);
+                noc_unicast_data[idx++] = this->device_->get_noc_unicast_encoding(noc_index, physical_core);
+            }
+        }
+        auto& noc_mcast_unicast_data = this->noc_mcast_unicast_data_[i];
+        noc_mcast_unicast_data.resize(noc_mcast_data.size() + noc_unicast_data.size());
+        std::copy(noc_mcast_data.begin(), noc_mcast_data.end(), noc_mcast_unicast_data.begin());
+        std::copy(
+            noc_unicast_data.begin(), noc_unicast_data.end(), noc_mcast_unicast_data.begin() + noc_mcast_data.size());
+    }
+}
+
+void SubDeviceManager::populate_worker_launch_message_buffer_state() {
+    this->worker_launch_message_buffer_state_.resize(this->num_sub_devices());
+    this->reset_worker_launch_message_buffer_state();
+}
+
+}  // namespace detail
+
+}  // namespace tt::tt_metal
diff --git a/tt_metal/impl/sub_device/sub_device_manager.hpp b/tt_metal/impl/sub_device/sub_device_manager.hpp
new file mode 100644
index 00000000000..c5de9e3f0f3
--- /dev/null
+++ b/tt_metal/impl/sub_device/sub_device_manager.hpp
@@ -0,0 +1,95 @@
+// SPDX-FileCopyrightText: © 2024 Tenstorrent Inc.
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#pragma once
+
+#include <cstdint>
+#include <memory>
+#include <unordered_set>
+#include <vector>
+
+#include "tt_metal/impl/allocator/allocator.hpp"
+#include "tt_metal/impl/dispatch/memcpy.hpp"
+#include "tt_metal/impl/kernels/data_types.hpp"
+#include "tt_metal/impl/sub_device/sub_device.hpp"
+#include "tt_metal/impl/sub_device/sub_device_types.hpp"
+#include "tt_metal/tt_stl/span.hpp"
+
+namespace tt::tt_metal {
+
+class LaunchMessageRingBufferState;
+class TraceBuffer;
+
+inline namespace v0 {
+class Device;
+}  // namespace v0
+
+namespace detail {
+class SubDeviceManager {
+   public:
+    static constexpr uint32_t MAX_NUM_SUB_DEVICES = 16;
+    static_assert(MAX_NUM_SUB_DEVICES <= std::numeric_limits<SubDeviceId::Id>::max(), "MAX_NUM_SUB_DEVICES must be less than or equal to the max value of SubDeviceId::Id");
+    // Constructor used for the default/global device
+    SubDeviceManager(Device *device, std::unique_ptr<Allocator> &&global_allocator);
+    // Constructor used for regular sub-devices
+    SubDeviceManager(tt::stl::Span<const SubDevice> sub_devices, DeviceAddr local_l1_size, Device *device);
+
+    SubDeviceManager(const SubDeviceManager &other) = delete;
+    SubDeviceManager &operator=(const SubDeviceManager &other) = delete;
+
+    SubDeviceManager(SubDeviceManager &&other) noexcept = default;
+    SubDeviceManager &operator=(SubDeviceManager &&other) noexcept = default;
+
+    ~SubDeviceManager();
+
+    const SubDevice &sub_device(SubDeviceId sub_device_id) const;
+    const vector_memcpy_aligned<uint32_t> &noc_mcast_data(SubDeviceId sub_device_id) const;
+    const vector_memcpy_aligned<uint32_t> &noc_unicast_data(SubDeviceId sub_device_id) const;
+    const vector_memcpy_aligned<uint32_t> &noc_mcast_unicast_data(SubDeviceId sub_device_id) const;
+
+    const std::unique_ptr<Allocator> &get_initialized_allocator(SubDeviceId sub_device_id) const;
+
+    std::unique_ptr<Allocator> &sub_device_allocator(SubDeviceId sub_device_id);
+
+    std::shared_ptr<TraceBuffer> &create_trace(uint32_t tid);
+    void release_trace(uint32_t tid);
+    std::shared_ptr<TraceBuffer> get_trace(uint32_t tid);
+
+    void reset_worker_launch_message_buffer_state();
+    LaunchMessageRingBufferState &get_worker_launch_message_buffer_state(SubDeviceId sub_device_id);
+
+    uint8_t num_sub_devices() const;
+    bool has_allocations() const;
+    DeviceAddr local_l1_size() const;
+
+   private:
+    void validate_sub_devices() const;
+    uint8_t get_sub_device_index(SubDeviceId sub_device_id) const;
+    void populate_num_cores();
+    void populate_sub_allocators();
+    void populate_noc_data();
+    void populate_worker_launch_message_buffer_state();
+
+    // TODO: We have a max number of sub-devices, so we can use a fixed size array
+    std::vector<SubDevice> sub_devices_;
+    Device *device_;
+
+    DeviceAddr local_l1_size_;
+    std::vector<std::unique_ptr<Allocator>> sub_device_allocators_;
+
+    std::array<uint32_t, NumHalProgrammableCoreTypes> num_cores_{};
+    std::vector<vector_memcpy_aligned<uint32_t>> noc_mcast_data_;
+    std::vector<vector_memcpy_aligned<uint32_t>> noc_unicast_data_;
+    // Concatenation of noc_mcast_data_ and noc_unicast_data_
+    // Useful for optimized copying of all coords when constructing FD commands
+    std::vector<vector_memcpy_aligned<uint32_t>> noc_mcast_unicast_data_;
+
+    std::unordered_map<uint32_t, std::shared_ptr<TraceBuffer>> trace_buffer_pool_;
+
+    std::vector<LaunchMessageRingBufferState> worker_launch_message_buffer_state_;
+};
+
+}  // namespace detail
+
+}  // namespace tt_metal
diff --git a/tt_metal/impl/sub_device/sub_device_types.hpp b/tt_metal/impl/sub_device/sub_device_types.hpp
new file mode 100644
index 00000000000..1e4229d2cfb
--- /dev/null
+++ b/tt_metal/impl/sub_device/sub_device_types.hpp
@@ -0,0 +1,103 @@
+// SPDX-FileCopyrightText: © 2024 Tenstorrent Inc.
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#pragma once
+
+#include <cstdint>
+#include <type_traits>
+
+namespace tt::tt_metal {
+
+struct SubDeviceId {
+    using Id = uint8_t;
+    Id id;
+
+    Id to_index() const { return id; }
+
+    SubDeviceId& operator++() {
+        id++;
+        return *this;
+    }
+
+    SubDeviceId operator++(int) {
+        auto ret = *this;
+        this->operator++();
+        return ret;
+    }
+
+    SubDeviceId& operator+=(Id n) {
+        id += n;
+        return *this;
+    }
+
+    bool operator==(const SubDeviceId &other) const {
+        return id == other.id;
+    }
+
+    bool operator!=(const SubDeviceId &other) const {
+        return id != other.id;
+    }
+
+    static constexpr auto attribute_names = std::forward_as_tuple("id");
+    constexpr auto attribute_values() const {
+        return std::forward_as_tuple(this->id);
+    }
+};
+
+struct SubDeviceManagerId {
+    using Id = uint64_t;
+    Id id;
+
+    Id to_index() const { return id; }
+
+    SubDeviceManagerId& operator++() {
+        id++;
+        return *this;
+    }
+
+    SubDeviceManagerId operator++(int) {
+        auto ret = *this;
+        this->operator++();
+        return ret;
+    }
+
+    SubDeviceManagerId& operator+=(Id n) {
+        id += n;
+        return *this;
+    }
+
+    bool operator==(const SubDeviceManagerId &other) const {
+        return id == other.id;
+    }
+
+    bool operator!=(const SubDeviceManagerId &other) const {
+        return id != other.id;
+    }
+
+    static constexpr auto attribute_names = std::forward_as_tuple("id");
+    constexpr auto attribute_values() const {
+        return std::forward_as_tuple(this->id);
+    }
+};
+
+}  // namespace tt::tt_metal
+
+
+namespace std {
+
+template <>
+struct hash<tt::tt_metal::SubDeviceId> {
+    std::size_t operator()(tt::tt_metal::SubDeviceId const &o) const {
+        return std::hash<decltype(tt::tt_metal::SubDeviceId::id)>{}(o.to_index());
+    }
+};
+
+template <>
+struct hash<tt::tt_metal::SubDeviceManagerId> {
+    std::size_t operator()(tt::tt_metal::SubDeviceManagerId const &o) const {
+        return std::hash<decltype(tt::tt_metal::SubDeviceManagerId::id)>{}(o.to_index());
+    }
+};
+
+}  // namespace std
diff --git a/tt_metal/impl/trace/trace.cpp b/tt_metal/impl/trace/trace.cpp
index 59d16af6b8c..3ed36350c2d 100644
--- a/tt_metal/impl/trace/trace.cpp
+++ b/tt_metal/impl/trace/trace.cpp
@@ -81,9 +81,10 @@ void Trace::initialize_buffer(CommandQueue& cq, std::shared_ptr<TraceBuffer> tra
         trace_data.resize(trace_data.size() + numel_padding, 0 /*padding value*/);
     }
     cq.device()->trace_buffers_size += padded_size;
+    auto trace_region_size = cq.device()->get_initialized_allocator()->config.trace_region_size;
     TT_FATAL(
-        cq.device()->trace_buffers_size <= cq.device()->allocator_->config.trace_region_size,
-        "Creating trace buffers of size {}B on device {}, but only {}B is allocated for trace region.",  cq.device()->trace_buffers_size, cq.device()->id(),  cq.device()->allocator_->config.trace_region_size);
+        cq.device()->trace_buffers_size <= trace_region_size,
+        "Creating trace buffers of size {}B on device {}, but only {}B is allocated for trace region.",  cq.device()->trace_buffers_size, cq.device()->id(),  trace_region_size);
     // Commit trace to device DRAM
     trace_buffer->buffer = Buffer::create(
                             cq.device(), padded_size, page_size, BufferType::TRACE, TensorMemoryLayout::INTERLEAVED);
diff --git a/tt_metal/impl/trace/trace_buffer.hpp b/tt_metal/impl/trace/trace_buffer.hpp
index d2488970185..675359d3e3e 100644
--- a/tt_metal/impl/trace/trace_buffer.hpp
+++ b/tt_metal/impl/trace/trace_buffer.hpp
@@ -12,6 +12,7 @@
 #include <variant>
 
 #include "tt_metal/impl/buffers/buffer.hpp"
+#include "tt_metal/impl/sub_device/sub_device_types.hpp"
 
 namespace tt::tt_metal {
 
@@ -23,10 +24,10 @@ struct TraceDescriptor {
         uint32_t num_traced_programs_needing_go_signal_unicast = 0;
     };
     // Mapping of sub_device_id to descriptor
-    std::unordered_map<uint32_t, Descriptor> descriptors;
+    std::unordered_map<SubDeviceId, Descriptor> descriptors;
     // Store the keys of the map in a vector after descriptor has finished being populated
     // This is an optimization since we sometimes need to only pass the keys in a container
-    std::vector<uint32_t> sub_device_ids;
+    std::vector<SubDeviceId> sub_device_ids;
     std::vector<uint32_t> data;
 };
 }  // namespace detail
diff --git a/tt_metal/tt_metal.cpp b/tt_metal/tt_metal.cpp
index acd6e19ae0c..64c8ae5bb4f 100644
--- a/tt_metal/tt_metal.cpp
+++ b/tt_metal/tt_metal.cpp
@@ -24,6 +24,7 @@
 #include "tt_metal/impl/kernels/kernel.hpp"
 #include "tt_metal/impl/buffers/circular_buffer.hpp"
 #include "tt_metal/impl/buffers/global_semaphore.hpp"
+#include "tt_metal/impl/sub_device/sub_device_types.hpp"
 #include "tt_metal/third_party/tracy/public/tracy/Tracy.hpp"
 
 #include "tt_metal/graph/graph_tracking.hpp"
@@ -837,9 +838,15 @@ DeviceAddr AllocateBuffer(Buffer *buffer) {
         GraphTracker::instance().track_allocate(buffer);
         return 0;
     }
-    // TODO: Validate correct sub-device manager id
-    auto& allocator = buffer->device()->get_initialized_allocator(buffer->sub_device_id());
+    if (buffer->sub_device_manager_id().has_value()) {
+        TT_FATAL(*(buffer->sub_device_manager_id()) == buffer->device()->get_active_sub_device_manager_id(),
+            "Sub-device manager id mismatch. Buffer sub-device manager id: {}, Device active sub-device manager id: {}",
+            *buffer->sub_device_manager_id(),
+            buffer->device()->get_active_sub_device_manager_id());
+    }
+    auto allocator = buffer->allocator();
     DeviceAddr allocated_addr;
+
     if (is_sharded(buffer->buffer_layout())) {
         allocated_addr = allocator::allocate_buffer(
             *allocator,
@@ -876,8 +883,13 @@ void DeallocateBuffer(Buffer *buffer) {
         TracyFreeN(reinterpret_cast<void const *>(buffer->address()), get_buffer_location_name(buffer->buffer_type(), buffer->device()->id()));
     }
 #endif
-    // TODO: Validate correct sub-device manager id
-    auto& allocator = buffer->device()->get_initialized_allocator(buffer->sub_device_id());
+    if (buffer->sub_device_manager_id().has_value()) {
+        TT_FATAL(*(buffer->sub_device_manager_id()) == buffer->device()->get_active_sub_device_manager_id(),
+            "Sub-device manager id mismatch. Buffer sub-device manager id: {}, Device active sub-device manager id: {}",
+            *buffer->sub_device_manager_id(),
+            buffer->device()->get_active_sub_device_manager_id());
+    }
+    auto allocator = buffer->allocator();
     allocator::deallocate_buffer(*allocator, buffer);
 }
 
@@ -1140,38 +1152,72 @@ std::unique_ptr<GlobalSemaphore> CreateGlobalSemaphore(
     return GlobalSemaphore::create(device, std::move(cores), initial_value, buffer_type);
 }
 
-std::shared_ptr<Buffer> CreateBuffer(const InterleavedBufferConfig &config, std::optional<DeviceAddr> address, std::optional<uint32_t> sub_device_id) {
-    if (address.has_value()) {
-        return Buffer::create(
-            config.device, *address, config.size, config.page_size, config.buffer_type, config.buffer_layout, std::nullopt, std::nullopt, sub_device_id);
-    } else {
-        return Buffer::create(
-            config.device, config.size, config.page_size, config.buffer_type, config.buffer_layout, std::nullopt, std::nullopt, sub_device_id);
-    }
-}
-std::shared_ptr<Buffer> CreateBuffer(const ShardedBufferConfig &config, std::optional<DeviceAddr> address, std::optional<uint32_t> sub_device_id) {
-    if (address.has_value()) {
-        return Buffer::create(
-            config.device,
-            *address,
-            config.size,
-            config.page_size,
-            config.buffer_type,
-            config.buffer_layout,
-            config.shard_parameters,
-            std::nullopt,
-            sub_device_id);
-    } else {
-        return Buffer::create(
-            config.device,
-            config.size,
-            config.page_size,
-            config.buffer_type,
-            config.buffer_layout,
-            config.shard_parameters,
-            std::nullopt,
-            sub_device_id);
-    }
+std::shared_ptr<Buffer> CreateBuffer(const InterleavedBufferConfig &config) {
+    return Buffer::create(
+        config.device,
+        config.size,
+        config.page_size,
+        config.buffer_type,
+        config.buffer_layout,
+        std::nullopt,
+        std::nullopt,
+        std::nullopt);
+}
+std::shared_ptr<Buffer> CreateBuffer(const InterleavedBufferConfig &config, DeviceAddr address) {
+    return Buffer::create(
+        config.device,
+        address,
+        config.size,
+        config.page_size,
+        config.buffer_type,
+        config.buffer_layout,
+        std::nullopt,
+        std::nullopt);
+}
+std::shared_ptr<Buffer> CreateBuffer(const InterleavedBufferConfig &config, SubDeviceId sub_device_id) {
+    return Buffer::create(
+        config.device,
+        config.size,
+        config.page_size,
+        config.buffer_type,
+        config.buffer_layout,
+        std::nullopt,
+        std::nullopt,
+        sub_device_id);
+}
+std::shared_ptr<Buffer> CreateBuffer(const ShardedBufferConfig &config) {
+    return Buffer::create(
+        config.device,
+        config.size,
+        config.page_size,
+        config.buffer_type,
+        config.buffer_layout,
+        config.shard_parameters,
+        std::nullopt,
+        std::nullopt);
+}
+std::shared_ptr<Buffer> CreateBuffer(const ShardedBufferConfig &config, DeviceAddr address) {
+    return Buffer::create(
+        config.device,
+        address,
+        config.size,
+        config.page_size,
+        config.buffer_type,
+        config.buffer_layout,
+        config.shard_parameters,
+        std::nullopt,
+        std::nullopt);
+}
+std::shared_ptr<Buffer> CreateBuffer(const ShardedBufferConfig &config, SubDeviceId sub_device_id) {
+    return Buffer::create(
+        config.device,
+        config.size,
+        config.page_size,
+        config.buffer_type,
+        config.buffer_layout,
+        config.shard_parameters,
+        std::nullopt,
+        sub_device_id);
 }
 
 void DeallocateBuffer(Buffer &buffer) { buffer.deallocate(); }
@@ -1283,7 +1329,7 @@ void ReplayTrace(Device *device, const uint8_t cq_id, const uint32_t tid, const
 
 void ReleaseTrace(Device *device, const uint32_t tid) { device->release_trace(tid); }
 
-void Synchronize(Device *device, const std::optional<uint8_t> cq_id, tt::stl::Span<const uint32_t> sub_device_ids) {
+void Synchronize(Device *device, const std::optional<uint8_t> cq_id, tt::stl::Span<const SubDeviceId> sub_device_ids) {
     if (std::getenv("TT_METAL_SLOW_DISPATCH_MODE") == nullptr) {
         if (cq_id.has_value()) {
             Finish(device->command_queue(cq_id.value()), sub_device_ids);

From 14a6f6f086e88cda6ec782d363d120ea2a6f36ec Mon Sep 17 00:00:00 2001
From: Austin Ho <aho@tenstorrent.com>
Date: Mon, 11 Nov 2024 07:52:24 +0000
Subject: [PATCH 64/69] #13655: Refactor dispatching of go signal to not send
 noc txn data after go signal command Instead, we populate a static array on
 dispatcher when we change sub-device configurations with all noc txn data,
 and read from it using an offset passed in the go signal command Remove
 dynamic allocation of sub-devices/expected workers pairs, and pass them as
 separate spans Fix cmd in sweep_pgm_dispatch

---
 .../apis/host_apis/command_queue/Finish.rst   |   2 +-
 .../tools/profiler/test_device_profiler.py    |   4 +-
 .../dispatch/sweep_pgm_dispatch.sh            |   2 +-
 .../dispatch/test_dispatcher.cpp              |   1 +
 .../dispatch/test_prefetcher.cpp              |   6 +
 .../sub_device/test_sub_device.cpp            | 159 +++++++++++++++-
 tt_metal/impl/device/device.cpp               | 144 +++++++-------
 tt_metal/impl/device/device.hpp               |  10 +-
 tt_metal/impl/dispatch/command_queue.cpp      | 176 +++++++++---------
 tt_metal/impl/dispatch/command_queue.hpp      |  38 ++--
 .../impl/dispatch/command_queue_interface.hpp |   7 +
 tt_metal/impl/dispatch/cq_commands.hpp        |  13 +-
 tt_metal/impl/dispatch/debug_tools.cpp        |   4 +
 tt_metal/impl/dispatch/device_command.hpp     |  36 +++-
 .../impl/dispatch/kernels/cq_dispatch.cpp     |  48 +++--
 .../dispatch/kernels/cq_dispatch_slave.cpp    |  28 ++-
 tt_metal/impl/program/program.cpp             |  12 +-
 .../impl/sub_device/sub_device_manager.cpp    |  72 ++++---
 .../impl/sub_device/sub_device_manager.hpp    |  25 ++-
 tt_metal/impl/sub_device/sub_device_types.hpp |   3 +-
 20 files changed, 538 insertions(+), 252 deletions(-)

diff --git a/docs/source/tt-metalium/tt_metal/apis/host_apis/command_queue/Finish.rst b/docs/source/tt-metalium/tt_metal/apis/host_apis/command_queue/Finish.rst
index 521b17b0822..628838894c8 100644
--- a/docs/source/tt-metalium/tt_metal/apis/host_apis/command_queue/Finish.rst
+++ b/docs/source/tt-metalium/tt_metal/apis/host_apis/command_queue/Finish.rst
@@ -3,4 +3,4 @@
 Finish
 ======
 
-.. doxygenfunction:: tt::tt_metal::v0::Finish(CommandQueue& cq)
+.. doxygenfunction:: tt::tt_metal::v0::Finish
diff --git a/tests/tt_metal/tools/profiler/test_device_profiler.py b/tests/tt_metal/tools/profiler/test_device_profiler.py
index af182dfc0ad..dfe905c7292 100644
--- a/tests/tt_metal/tools/profiler/test_device_profiler.py
+++ b/tests/tt_metal/tools/profiler/test_device_profiler.py
@@ -167,11 +167,11 @@ def test_dispatch_cores():
     REF_COUNT_DICT = {
         "grayskull": {
             "Tensix CQ Dispatch": 16,
-            "Tensix CQ Prefetch": 24,
+            "Tensix CQ Prefetch": 25,
         },
         "wormhole_b0": {
             "Tensix CQ Dispatch": 16,
-            "Tensix CQ Prefetch": 24,
+            "Tensix CQ Prefetch": 25,
         },
     }
 
diff --git a/tests/tt_metal/tt_metal/perf_microbenchmark/dispatch/sweep_pgm_dispatch.sh b/tests/tt_metal/tt_metal/perf_microbenchmark/dispatch/sweep_pgm_dispatch.sh
index 6e8393e55ca..f143eb8cc2c 100755
--- a/tests/tt_metal/tt_metal/perf_microbenchmark/dispatch/sweep_pgm_dispatch.sh
+++ b/tests/tt_metal/tt_metal/perf_microbenchmark/dispatch/sweep_pgm_dispatch.sh
@@ -182,7 +182,7 @@ build/test/tt_metal/perf_microbenchmark/dispatch/test_pgm_dispatch -w 5000 -s 40
 echo "###" all procesors all cores 32 rta
 build/test/tt_metal/perf_microbenchmark/dispatch/test_pgm_dispatch -w 5000 -s 256 -x $max_x -y $max_y -a 32 $trace_option
 build/test/tt_metal/perf_microbenchmark/dispatch/test_pgm_dispatch -w 5000 -s 512 -x $max_x -y $max_y -a 32 $trace_option
-build/test/tt_metal/perf_microbenchmark/dispatch/test_pgm_dispatch -w 5000 -s 1024 -x $max_x -y $max_y -a 32 $trace_optionv
+build/test/tt_metal/perf_microbenchmark/dispatch/test_pgm_dispatch -w 5000 -s 1024 -x $max_x -y $max_y -a 32 $trace_option
 build/test/tt_metal/perf_microbenchmark/dispatch/test_pgm_dispatch -w 5000 -s 2048 -x $max_x -y $max_y -a 32 $trace_option
 build/test/tt_metal/perf_microbenchmark/dispatch/test_pgm_dispatch -w 5000 -s 4096 -x $max_x -y $max_y -a 32 $trace_option
 # build/test/tt_metal/perf_microbenchmark/dispatch/test_pgm_dispatch -w 5000 -s 8192 -x $max_x -y $max_y -a 32 $trace_option
diff --git a/tests/tt_metal/tt_metal/perf_microbenchmark/dispatch/test_dispatcher.cpp b/tests/tt_metal/tt_metal/perf_microbenchmark/dispatch/test_dispatcher.cpp
index be7d28a6afd..f2ded0bb4a4 100644
--- a/tests/tt_metal/tt_metal/perf_microbenchmark/dispatch/test_dispatcher.cpp
+++ b/tests/tt_metal/tt_metal/perf_microbenchmark/dispatch/test_dispatcher.cpp
@@ -478,6 +478,7 @@ int main(int argc, char **argv) {
              num_compute_cores, // max_write_packed_cores
              0,
              dispatch_constants::DISPATCH_MESSAGE_ENTRIES,
+             dispatch_constants::DISPATCH_GO_SIGNAL_NOC_DATA_ENTRIES,
              0,
              0,
              0,
diff --git a/tests/tt_metal/tt_metal/perf_microbenchmark/dispatch/test_prefetcher.cpp b/tests/tt_metal/tt_metal/perf_microbenchmark/dispatch/test_prefetcher.cpp
index 15494c37bce..059e61b23a3 100644
--- a/tests/tt_metal/tt_metal/perf_microbenchmark/dispatch/test_prefetcher.cpp
+++ b/tests/tt_metal/tt_metal/perf_microbenchmark/dispatch/test_prefetcher.cpp
@@ -1921,6 +1921,7 @@ void configure_for_single_chip(Device *device,
          num_compute_cores, // max_write_packed_cores
          0,
          dispatch_constants::DISPATCH_MESSAGE_ENTRIES,
+         dispatch_constants::DISPATCH_GO_SIGNAL_NOC_DATA_ENTRIES,
          0,
          0,
          0,
@@ -1941,6 +1942,7 @@ void configure_for_single_chip(Device *device,
         dispatch_compile_args[13] = dispatch_h_cb_sem;
         dispatch_compile_args[14] = dispatch_d_preamble_size;
         dispatch_compile_args[21] = dispatch_constants::DISPATCH_MESSAGE_ENTRIES;
+        dispatch_compile_args[22] = dispatch_constants::DISPATCH_GO_SIGNAL_NOC_DATA_ENTRIES;
         CoreCoord phys_dispatch_d_downstream_core =
             packetized_path_en_g ? phys_dispatch_relay_mux_core : phys_dispatch_h_core;
         configure_kernel_variant<true, false>(program,
@@ -1962,6 +1964,7 @@ void configure_for_single_chip(Device *device,
         dispatch_compile_args[13] = dispatch_downstream_cb_sem;
         dispatch_compile_args[14] = 0; // preamble size
         dispatch_compile_args[21] = 1; // max_num_worker_sems is used for array sizing, set to 1 even if array isn't used
+        dispatch_compile_args[22] = 1; // max_num_go_signal_noc_data_entries is used for array sizing, set to 1 even if array isn't used
         CoreCoord phys_dispatch_h_upstream_core =
             packetized_path_en_g ? phys_dispatch_relay_demux_core : phys_dispatch_core;
         configure_kernel_variant<false, true>(program,
@@ -2666,6 +2669,7 @@ void configure_for_multi_chip(Device *device,
          num_compute_cores,
          0,
          dispatch_constants::DISPATCH_MESSAGE_ENTRIES,
+         dispatch_constants::DISPATCH_GO_SIGNAL_NOC_DATA_ENTRIES,
          0,
          0,
          0,
@@ -2686,6 +2690,7 @@ void configure_for_multi_chip(Device *device,
         dispatch_compile_args[13] = dispatch_h_cb_sem;
         dispatch_compile_args[14] = dispatch_d_preamble_size;
         dispatch_compile_args[21] = dispatch_constants::DISPATCH_MESSAGE_ENTRIES;
+        dispatch_compile_args[22] = dispatch_constants::DISPATCH_GO_SIGNAL_NOC_DATA_ENTRIES;
         CoreCoord phys_dispatch_d_downstream_core =
             packetized_path_en_g ? phys_dispatch_relay_mux_core : phys_dispatch_h_core;
         configure_kernel_variant<true, false>(program_r,
@@ -2706,6 +2711,7 @@ void configure_for_multi_chip(Device *device,
         dispatch_compile_args[13] = dispatch_downstream_cb_sem;
         dispatch_compile_args[14] = 0; // preamble size
         dispatch_compile_args[21] = 1; // max_num_worker_sems is used for array sizing, set to 1 even if array isn't used
+        dispatch_compile_args[22] = 1; // max_num_go_signal_noc_data_entries is used for array sizing, set to 1 even if array isn't used
         CoreCoord phys_dispatch_h_upstream_core =
             packetized_path_en_g ? phys_dispatch_relay_demux_core : phys_dispatch_core;
         configure_kernel_variant<false, true>(program,
diff --git a/tests/tt_metal/tt_metal/unit_tests_fast_dispatch/sub_device/test_sub_device.cpp b/tests/tt_metal/tt_metal/unit_tests_fast_dispatch/sub_device/test_sub_device.cpp
index a54df488d7d..c5d074a8f15 100644
--- a/tests/tt_metal/tt_metal/unit_tests_fast_dispatch/sub_device/test_sub_device.cpp
+++ b/tests/tt_metal/tt_metal/unit_tests_fast_dispatch/sub_device/test_sub_device.cpp
@@ -15,6 +15,7 @@
 #include "tt_metal/impl/device/device.hpp"
 #include "tt_metal/impl/event/event.hpp"
 #include "tt_metal/impl/sub_device/sub_device.hpp"
+#include "tests/tt_metal/test_utils/stimulus.hpp"
 #include "tests/tt_metal/tt_metal/unit_tests_fast_dispatch/command_queue/command_queue_test_utils.hpp"
 
 using namespace tt::tt_metal;
@@ -104,7 +105,7 @@ std::tuple<Program, Program, Program, std::unique_ptr<GlobalSemaphore>> create_b
         waiter_program,
         "tests/tt_metal/tt_metal/unit_tests_fast_dispatch/sub_device/kernels/persistent_remote_waiter.cpp",
         waiter_core,
-        tt_metal::EthernetConfig{
+        EthernetConfig{
             .noc = NOC::RISCV_0_default,
             .processor = DataMovementProcessor::RISCV_0});
     std::array<uint32_t, 7> waiter_rt_args = {global_sem->address(), incrementer_cores.num_cores(), syncer_core_physical.x, syncer_core_physical.y, tensix_waiter_core_physical.x, tensix_waiter_core_physical.y, eth_l1_mem::address_map::ERISC_L1_UNRESERVED_BASE};
@@ -197,7 +198,9 @@ TEST_F(CommandQueueSingleCardFixture, TestSubDeviceAllocations) {
         }
 
         auto buffer_2 = CreateBuffer(interleaved_config);
-
+        EXPECT_THROW(CreateBuffer(shard_config_1, SubDeviceId{1}), std::exception);
+        EXPECT_THROW(device->clear_loaded_sub_device_manager(), std::exception);
+        EXPECT_THROW(device->load_sub_device_manager(sub_device_manager_2), std::exception);
         DeallocateBuffer(*buffer_1);
         device->clear_loaded_sub_device_manager();
         device->load_sub_device_manager(sub_device_manager_2);
@@ -427,4 +430,156 @@ TEST_F(CommandQueueSingleCardTraceFixture, TestSubDeviceTraceBasicEthPrograms) {
     }
 }
 
+TEST_F(CommandQueueSingleCardTraceFixture, TestSubDeviceTraceProgramsReconfigureSubDevices) {
+    SubDevice sub_device_1(std::array{CoreRangeSet(CoreRange({0, 0}, {2, 2}))});
+    SubDevice sub_device_2(std::array{CoreRangeSet(std::array{CoreRange({3, 3}, {3, 3}), CoreRange({4, 4}, {4, 4})})});
+    SubDevice sub_device_3(std::array{CoreRangeSet(std::array{CoreRange({2, 4}, {3, 4}), CoreRange({5, 1}, {6, 3})})});
+    uint32_t num_iters = 5;
+    for (Device *device : devices_) {
+        if (!does_device_have_active_eth_cores(device)) {
+            GTEST_SKIP() << "Skipping test because device " << device->id() << " does not have any active ethernet cores";
+        }
+        auto eth_core = *device->get_active_ethernet_cores(true).begin();
+        SubDevice sub_device_4(std::array{CoreRangeSet(std::array{CoreRange({2, 1}, {2, 2}), CoreRange({1, 5}, {5, 5})}), CoreRangeSet(CoreRange(eth_core, eth_core))});
+
+        auto sub_device_manager_1 = device->create_sub_device_manager({sub_device_1, sub_device_2}, 3200);
+        auto sub_device_manager_2 = device->create_sub_device_manager({sub_device_3, sub_device_4}, 3200);
+
+        device->load_sub_device_manager(sub_device_manager_1);
+
+        auto [waiter_program_1, syncer_program_1, incrementer_program_1, global_sem_1] = create_basic_sync_program(device, sub_device_1, sub_device_2);
+
+        // Compile the programs
+        EnqueueProgram(device->command_queue(), waiter_program_1, false);
+        EnqueueProgram(device->command_queue(), syncer_program_1, false);
+        EnqueueProgram(device->command_queue(), incrementer_program_1, false);
+        Synchronize(device);
+
+        // Capture the trace
+        auto tid_1 = BeginTraceCapture(device, device->command_queue().id());
+        EnqueueProgram(device->command_queue(), waiter_program_1, false);
+        EnqueueProgram(device->command_queue(), syncer_program_1, false);
+        EnqueueProgram(device->command_queue(), incrementer_program_1, false);
+        EndTraceCapture(device, device->command_queue().id(), tid_1);
+
+        auto tid_2 = BeginTraceCapture(device, device->command_queue().id());
+        EnqueueProgram(device->command_queue(), syncer_program_1, false);
+        EnqueueProgram(device->command_queue(), incrementer_program_1, false);
+        EndTraceCapture(device, device->command_queue().id(), tid_2);
+
+        device->load_sub_device_manager(sub_device_manager_2);
+
+        auto [waiter_program_2, syncer_program_2, incrementer_program_2, global_sem_2] = create_basic_eth_sync_program(device, sub_device_3, sub_device_4);
+
+        // Compile the programs
+        EnqueueProgram(device->command_queue(), waiter_program_2, false);
+        EnqueueProgram(device->command_queue(), syncer_program_2, false);
+        EnqueueProgram(device->command_queue(), incrementer_program_2, false);
+        Synchronize(device);
+
+        // Capture the trace
+        auto tid_3 = BeginTraceCapture(device, device->command_queue().id());
+        EnqueueProgram(device->command_queue(), waiter_program_2, false);
+        EnqueueProgram(device->command_queue(), syncer_program_2, false);
+        EnqueueProgram(device->command_queue(), incrementer_program_2, false);
+        EndTraceCapture(device, device->command_queue().id(), tid_3);
+
+        auto tid_4 = BeginTraceCapture(device, device->command_queue().id());
+        EnqueueProgram(device->command_queue(), syncer_program_2, false);
+        EnqueueProgram(device->command_queue(), incrementer_program_2, false);
+        EndTraceCapture(device, device->command_queue().id(), tid_4);
+
+        for (uint32_t i = 0; i < num_iters; i++) {
+            device->load_sub_device_manager(sub_device_manager_1);
+            // Regular program execution
+            EnqueueProgram(device->command_queue(), waiter_program_1, false);
+            // Test blocking on one sub-device
+            EnqueueProgram(device->command_queue(), syncer_program_1, false);
+            EnqueueProgram(device->command_queue(), incrementer_program_1, false);
+
+            // Full trace execution
+            ReplayTrace(device, device->command_queue().id(), tid_1, false);
+
+            // Partial trace execution
+            EnqueueProgram(device->command_queue(), waiter_program_1, false);
+            ReplayTrace(device, device->command_queue().id(), tid_2, false);
+
+            device->load_sub_device_manager(sub_device_manager_2);
+            // Regular program execution
+            EnqueueProgram(device->command_queue(), waiter_program_2, false);
+            // Test blocking on one sub-device
+            EnqueueProgram(device->command_queue(), syncer_program_2, false);
+            EnqueueProgram(device->command_queue(), incrementer_program_2, false);
+
+            // Full trace execution
+            ReplayTrace(device, device->command_queue().id(), tid_3, false);
+
+            // Partial trace execution
+            EnqueueProgram(device->command_queue(), waiter_program_2, false);
+            ReplayTrace(device, device->command_queue().id(), tid_4, false);
+        }
+        Synchronize(device);
+    }
+}
+
+TEST_F(CommandQueueSingleCardTraceFixture, TestSubDeviceIllegalOperations) {
+    SubDevice sub_device_1(std::array{CoreRangeSet(CoreRange({0, 0}, {2, 2}))});
+    SubDevice sub_device_2(std::array{CoreRangeSet(std::vector{CoreRange({3, 3}, {3, 3}), CoreRange({4, 4}, {4, 4})})});
+
+    // Assert no idle eth cores specified
+    EXPECT_THROW(SubDevice sub_device_3(std::array{CoreRangeSet(CoreRange({3, 3}, {3, 3})), CoreRangeSet(CoreRange({4, 4}, {4, 4})), CoreRangeSet(CoreRange({5, 5}, {5, 5}))}), std::exception);
+    for (Device *device : devices_) {
+        auto sub_device_manager_1 = device->create_sub_device_manager({sub_device_1, sub_device_2}, 3200);
+        auto sub_device_manager_2 = device->create_sub_device_manager({sub_device_2, sub_device_1}, 3200);
+        device->load_sub_device_manager(sub_device_manager_1);
+
+        auto [waiter_program_1, syncer_program_1, incrementer_program_1, global_sem_1] = create_basic_sync_program(device, sub_device_1, sub_device_2);
+
+        // Compile the programs
+        EnqueueProgram(device->command_queue(), waiter_program_1, false);
+        // Test blocking on one sub-device
+        EnqueueProgram(device->command_queue(), syncer_program_1, false);
+        EnqueueProgram(device->command_queue(), incrementer_program_1, false);
+        Synchronize(device);
+
+        // Capture the trace
+        auto tid_1 = BeginTraceCapture(device, device->command_queue().id());
+        // Can not load a sub-device manager while tracing
+        EXPECT_THROW(device->load_sub_device_manager(sub_device_manager_2), std::exception);
+        EnqueueProgram(device->command_queue(), waiter_program_1, false);
+        EnqueueProgram(device->command_queue(), syncer_program_1, false);
+        EnqueueProgram(device->command_queue(), incrementer_program_1, false);
+        EndTraceCapture(device, device->command_queue().id(), tid_1);
+
+        device->load_sub_device_manager(sub_device_manager_2);
+        auto [waiter_program_2, syncer_program_2, incrementer_program_2, global_sem_2] = create_basic_sync_program(device, sub_device_2, sub_device_1);
+
+        EnqueueProgram(device->command_queue(), waiter_program_2, false);
+        EnqueueProgram(device->command_queue(), syncer_program_2, false);
+        EnqueueProgram(device->command_queue(), incrementer_program_2, false);
+        Synchronize(device);
+
+        auto tid_2 = BeginTraceCapture(device, device->command_queue().id());
+        EnqueueProgram(device->command_queue(), waiter_program_2, false);
+        EnqueueProgram(device->command_queue(), syncer_program_2, false);
+        EnqueueProgram(device->command_queue(), incrementer_program_2, false);
+        EndTraceCapture(device, device->command_queue().id(), tid_2);
+
+        // Regular program execution
+        // Can not run a program on a different sub-device manager
+        EXPECT_THROW(EnqueueProgram(device->command_queue(), waiter_program_1, false), std::exception);
+
+        // Full trace execution
+        ReplayTrace(device, device->command_queue().id(), tid_2, false);
+
+        // Can not replay a trace on a different sub-device manager
+        EXPECT_THROW(ReplayTrace(device, device->command_queue().id(), tid_1, false), std::exception);
+
+        Synchronize(device);
+
+        device->remove_sub_device_manager(sub_device_manager_1);
+        EXPECT_THROW(device->load_sub_device_manager(sub_device_manager_1), std::exception);
+    }
+}
+
 }  // namespace basic_tests
diff --git a/tt_metal/impl/device/device.cpp b/tt_metal/impl/device/device.cpp
index 3c80ccca4e5..b6cf26597b5 100644
--- a/tt_metal/impl/device/device.cpp
+++ b/tt_metal/impl/device/device.cpp
@@ -1261,7 +1261,7 @@ void Device::update_workers_build_settings(std::vector<std::vector<std::tuple<tt
                         uint32_t dev_completion_queue_rd_ptr = dispatch_constants::get(dispatch_core_type).get_device_command_queue_addr(CommandQueueDeviceAddrType::COMPLETION_Q_RD);
                         settings.upstream_cores.push_back(demux_settings.worker_physical_core);
                         settings.downstream_cores.push_back(tt_cxy_pair(0, 0, 0));
-                        settings.compile_args.resize(30);
+                        settings.compile_args.resize(31);
                         auto& compile_args = settings.compile_args;
                         compile_args[0] = settings.cb_start_address;
                         compile_args[1] = settings.cb_log_page_size;
@@ -1285,14 +1285,15 @@ void Device::update_workers_build_settings(std::vector<std::vector<std::tuple<tt
                         compile_args[19] = settings.num_compute_cores;
                         compile_args[20] = 0; // unused: dispatch_d only
                         compile_args[21] = 1; // max_num_worker_sems is used for array sizing, set to 1 even if array isn't used
-                        compile_args[22] = 0; // unused: dispatch_d only
+                        compile_args[22] = 1; // max_num_go_signal_noc_data_entries is used for array sizing, set to 1 even if array isn't used
                         compile_args[23] = 0; // unused: dispatch_d only
-                        compile_args[24] = 0;
-                        compile_args[25] = host_completion_queue_wr_ptr;
-                        compile_args[26] = dev_completion_queue_wr_ptr;
-                        compile_args[27] = dev_completion_queue_rd_ptr;
-                        compile_args[28] = false; // is_dram_variant
-                        compile_args[29] = true; // is_host_variant
+                        compile_args[24] = 0; // unused: dispatch_d only
+                        compile_args[25] = 0;
+                        compile_args[26] = host_completion_queue_wr_ptr;
+                        compile_args[27] = dev_completion_queue_wr_ptr;
+                        compile_args[28] = dev_completion_queue_rd_ptr;
+                        compile_args[29] = false; // is_dram_variant
+                        compile_args[30] = true; // is_host_variant
 
                         dispatch_idx++;
                     }
@@ -1315,7 +1316,7 @@ void Device::update_workers_build_settings(std::vector<std::vector<std::tuple<tt
                             uint32_t dev_completion_queue_rd_ptr = dispatch_constants::get(dispatch_core_type).get_device_command_queue_addr(CommandQueueDeviceAddrType::COMPLETION_Q_RD);
                             settings.upstream_cores.push_back(demux_settings.worker_physical_core);
                             settings.downstream_cores.push_back(tt_cxy_pair(0, 0, 0));
-                            settings.compile_args.resize(30);
+                            settings.compile_args.resize(31);
                             auto& compile_args = settings.compile_args;
                             compile_args[0] = settings.cb_start_address;
                             compile_args[1] = settings.cb_log_page_size;
@@ -1339,14 +1340,15 @@ void Device::update_workers_build_settings(std::vector<std::vector<std::tuple<tt
                             compile_args[19] = settings.num_compute_cores;
                             compile_args[20] = 0; // unused: dispatch_d only
                             compile_args[21] = 1; // max_num_worker_sems is used for array sizing, set to 1 even if array isn't used
-                            compile_args[22] = 0; // unused: dispatch_d only
+                            compile_args[22] = 1; // max_num_go_signal_noc_data_entries is used for array sizing, set to 1 even if array isn't used
                             compile_args[23] = 0; // unused: dispatch_d only
-                            compile_args[24] = 0;
-                            compile_args[25] = host_completion_queue_wr_ptr;
-                            compile_args[26] = dev_completion_queue_wr_ptr;
-                            compile_args[27] = dev_completion_queue_rd_ptr;
-                            compile_args[28] = false; // is_dram_variant
-                            compile_args[29] = true; // is_host_variant
+                            compile_args[24] = 0; // unused: dispatch_d only
+                            compile_args[25] = 0;
+                            compile_args[26] = host_completion_queue_wr_ptr;
+                            compile_args[27] = dev_completion_queue_wr_ptr;
+                            compile_args[28] = dev_completion_queue_rd_ptr;
+                            compile_args[29] = false; // is_dram_variant
+                            compile_args[30] = true; // is_host_variant
                             dispatch_idx++;
                         }
                     }
@@ -1628,7 +1630,8 @@ void Device::update_workers_build_settings(std::vector<std::vector<std::tuple<tt
                 uint32_t mux_sem = mux_d_settings.consumer_semaphore_id;
                 uint32_t tensix_worker_go_signal_addr = hal.get_dev_addr(HalProgrammableCoreType::TENSIX, HalL1MemAddrType::GO_MSG);
                 uint32_t eth_worker_go_signal_addr = hal.get_dev_addr(HalProgrammableCoreType::ACTIVE_ETH, HalL1MemAddrType::GO_MSG);
-                uint32_t max_dispatch_message_entries = dispatch_constants::DISPATCH_MESSAGE_ENTRIES;
+                constexpr uint32_t max_dispatch_message_entries = dispatch_constants::DISPATCH_MESSAGE_ENTRIES;
+                constexpr uint32_t max_num_go_signal_noc_data_entries = dispatch_constants::DISPATCH_GO_SIGNAL_NOC_DATA_ENTRIES;
                 for (auto&[core, dispatch_d_settings] : device_worker_variants[DispatchWorkerType::DISPATCH_D]) {
                     auto prefetch_d_settings = std::get<1>(device_worker_variants[DispatchWorkerType::PREFETCH_D][dispatch_d_idx]); // 1 to 1 mapping bw prefetch_d and dispatch_d
                     auto dispatch_s_settings = std::get<1>(device_worker_variants[DispatchWorkerType::DISPATCH_S][dispatch_d_idx]); // 1 to 1 mapping bw dispatch_s and dispatch_d
@@ -1640,7 +1643,7 @@ void Device::update_workers_build_settings(std::vector<std::vector<std::tuple<tt
                     dispatch_d_settings.upstream_cores.push_back(prefetch_d_settings.worker_physical_core);
                     dispatch_d_settings.downstream_cores.push_back(mux_d_settings.worker_physical_core);
                     dispatch_d_settings.downstream_cores.push_back(dispatch_s_settings.worker_physical_core);
-                    dispatch_d_settings.compile_args.resize(30);
+                    dispatch_d_settings.compile_args.resize(32);
                     auto& compile_args = dispatch_d_settings.compile_args;
                     compile_args[0] = dispatch_d_settings.cb_start_address;
                     compile_args[1] = dispatch_d_settings.cb_log_page_size;
@@ -1664,14 +1667,15 @@ void Device::update_workers_build_settings(std::vector<std::vector<std::tuple<tt
                     compile_args[19] = dispatch_d_settings.num_compute_cores;
                     compile_args[20] = dispatch_s_sync_sem_base_addr;
                     compile_args[21] = max_dispatch_message_entries;
-                    compile_args[22] = tensix_worker_go_signal_addr;
-                    compile_args[23] = eth_worker_go_signal_addr;
-                    compile_args[24] = (dispatch_core_type == CoreType::ETH);
-                    compile_args[25] = host_completion_queue_wr_ptr;
-                    compile_args[26] = dev_completion_queue_wr_ptr;
-                    compile_args[27] = dev_completion_queue_rd_ptr;
-                    compile_args[28] = true; // is_dram_variant
-                    compile_args[29] = false; // is_host_variant
+                    compile_args[22] = max_num_go_signal_noc_data_entries;
+                    compile_args[23] = tensix_worker_go_signal_addr;
+                    compile_args[24] = eth_worker_go_signal_addr;
+                    compile_args[25] = (dispatch_core_type == CoreType::ETH);
+                    compile_args[26] = host_completion_queue_wr_ptr;
+                    compile_args[27] = dev_completion_queue_wr_ptr;
+                    compile_args[28] = dev_completion_queue_rd_ptr;
+                    compile_args[29] = true; // is_dram_variant
+                    compile_args[30] = false; // is_host_variant
                     dispatch_d_idx++; // move on to next dispatcher
                 }
                 break;
@@ -1690,8 +1694,9 @@ void Device::update_workers_build_settings(std::vector<std::vector<std::tuple<tt
                         auto dispatch_core_type = dispatch_s_settings.dispatch_core_type;
                         uint32_t dispatch_message_base_addr = dispatch_constants::get(dispatch_core_type).get_device_command_queue_addr(CommandQueueDeviceAddrType::DISPATCH_MESSAGE);
                         uint32_t dispatch_s_sync_sem_base_addr = dispatch_constants::get(dispatch_core_type).get_device_command_queue_addr(CommandQueueDeviceAddrType::DISPATCH_S_SYNC_SEM);
-                        uint32_t max_dispatch_message_entries = dispatch_constants::DISPATCH_MESSAGE_ENTRIES;
-                        dispatch_s_settings.compile_args.resize(11);
+                        constexpr uint32_t max_dispatch_message_entries = dispatch_constants::DISPATCH_MESSAGE_ENTRIES;
+                        constexpr uint32_t max_num_go_signal_noc_data_entries = dispatch_constants::DISPATCH_GO_SIGNAL_NOC_DATA_ENTRIES;
+                        dispatch_s_settings.compile_args.resize(12);
                         auto& compile_args = dispatch_s_settings.compile_args;
                         compile_args[0] = dispatch_s_settings.cb_start_address;
                         compile_args[1] = dispatch_s_settings.cb_log_page_size;
@@ -1704,6 +1709,7 @@ void Device::update_workers_build_settings(std::vector<std::vector<std::tuple<tt
                         compile_args[8] = (dispatch_core_type == CoreType::ETH);
                         compile_args[9] = dispatch_message_base_addr;
                         compile_args[10] = max_dispatch_message_entries;
+                        compile_args[11] = max_num_go_signal_noc_data_entries;
                         dispatch_s_idx++;
                     }
                 }
@@ -2203,7 +2209,8 @@ void Device::compile_command_queue_programs() {
             uint32_t dev_completion_queue_wr_ptr = dispatch_constants::get(dispatch_core_type).get_device_command_queue_addr(CommandQueueDeviceAddrType::COMPLETION_Q_WR);
             uint32_t dev_completion_queue_rd_ptr = dispatch_constants::get(dispatch_core_type).get_device_command_queue_addr(CommandQueueDeviceAddrType::COMPLETION_Q_RD);
             uint32_t dispatch_message_addr = dispatch_constants::get(dispatch_core_type).get_device_command_queue_addr(CommandQueueDeviceAddrType::DISPATCH_MESSAGE);
-            uint32_t max_dispatch_message_entries = dispatch_constants::DISPATCH_MESSAGE_ENTRIES;
+            constexpr uint32_t max_dispatch_message_entries = dispatch_constants::DISPATCH_MESSAGE_ENTRIES;
+            constexpr uint32_t max_num_go_signal_noc_data_entries = dispatch_constants::DISPATCH_GO_SIGNAL_NOC_DATA_ENTRIES;
 
             const uint32_t prefetch_sync_sem = tt::tt_metal::CreateSemaphore(*command_queue_program_ptr, prefetch_core, 0, dispatch_core_type);
             const uint32_t prefetch_sem = tt::tt_metal::CreateSemaphore(*command_queue_program_ptr, prefetch_core, dispatch_constants::get(dispatch_core_type).dispatch_buffer_pages(), dispatch_core_type);
@@ -2316,6 +2323,7 @@ void Device::compile_command_queue_programs() {
                 num_compute_cores, // max_write_packed_cores
                 dispatch_s_sync_sem_base_addr, // used to notify dispatch_s that its safe to send a go signal
                 max_dispatch_message_entries,
+                max_num_go_signal_noc_data_entries,
                 tensix_worker_go_signal_addr, // used by dispatch_d to mcast go signals when dispatch_s is not enabled
                 eth_worker_go_signal_addr, // used by dispatch_d to mcast go signals when dispatch_s is not enabled
                 dispatch_core_type == CoreType::ETH,
@@ -2354,6 +2362,7 @@ void Device::compile_command_queue_programs() {
                     dispatch_core_type == CoreType::ETH,
                     dispatch_message_addr,
                     max_dispatch_message_entries,
+                    max_num_go_signal_noc_data_entries,
                 };
                 configure_kernel_variant(
                     *command_queue_program_ptr,
@@ -2903,12 +2912,11 @@ void Device::init_command_queue_device() {
             }
         }
     }
-    // TODO: Move this inside the command queue
+
     for (auto& hw_cq : this->hw_command_queues_) {
-        hw_cq->set_num_worker_sems_on_dispatch(this->num_sub_devices());
+        hw_cq->set_num_worker_sems_on_dispatch(this->active_sub_device_manager_->num_sub_devices());
+        hw_cq->set_go_signal_noc_data_on_dispatch(this->active_sub_device_manager_->noc_mcast_unicast_data());
     }
-    // Added this for safety while debugging hangs with FD v1.3 tunnel to R, should experiment with removing it
-    // tt::Cluster::instance().l1_barrier(this->id());
 }
 
 void Device::initialize_synchronous_sw_cmd_queue() {
@@ -3161,18 +3169,16 @@ const std::unique_ptr<Allocator> &Device::get_initialized_allocator(SubDeviceId
 }
 
 void Device::reset_sub_devices_state(const std::unique_ptr<detail::SubDeviceManager> &sub_device_manager) {
-    // Finish all running programs
-    Synchronize(this);
-
     auto num_sub_devices = sub_device_manager->num_sub_devices();
 
-    // Set new number of worker sems on dispatch_s
+    // TODO: This could be further optimized by combining all of these into a single prefetch entry
+    // Currently each one will be pushed into its own prefetch entry
     for (auto& hw_cq : this->hw_command_queues_) {
         // Only need to reset launch messages once, so reset on cq 0
         TT_FATAL(!hw_cq->manager.get_bypass_mode(), "Cannot reset worker state during trace capture");
         hw_cq->reset_worker_state(hw_cq->id == 0);
         hw_cq->set_num_worker_sems_on_dispatch(num_sub_devices);
-        // Reset the config buffer mgr (is this needed?)
+        hw_cq->set_go_signal_noc_data_on_dispatch(sub_device_manager->noc_mcast_unicast_data());
         hw_cq->reset_config_buffer_mgr(num_sub_devices);
     }
     // Reset the launch_message ring buffer state seen on host
@@ -3488,7 +3494,7 @@ void Device::begin_trace(const uint8_t cq_id, const uint32_t tid) {
     TT_FATAL(!this->hw_command_queues_[cq_id]->tid.has_value(), "CQ {} is already being used for tracing tid {}", (uint32_t)cq_id, tid);
     this->MarkAllocationsSafe();
     // Create an empty trace buffer here. This will get initialized in end_trace
-    TT_FATAL(this->active_sub_device_manager_->get_trace(tid) == nullptr, "Trace already exists for tid {} on device", tid);
+    TT_FATAL(this->active_sub_device_manager_->get_trace(tid) == nullptr, "Trace already exists for tid {} on device {}'s active sub-device manager {}", tid, this->id_, this->active_sub_device_manager_id_);
     auto &trace_buffer = this->active_sub_device_manager_->create_trace(tid);
     this->hw_command_queues_[cq_id]->record_begin(tid, trace_buffer->desc);
 }
@@ -3498,7 +3504,7 @@ void Device::end_trace(const uint8_t cq_id, const uint32_t tid) {
     TracyTTMetalEndTrace(this->id(), tid);
     TT_FATAL(this->hw_command_queues_[cq_id]->tid == tid, "CQ {} is not being used for tracing tid {}", (uint32_t)cq_id, tid);
     auto trace_buffer = this->active_sub_device_manager_->get_trace(tid);
-    TT_FATAL(trace_buffer != nullptr, "Trace instance {} must exist on device", tid);
+    TT_FATAL(trace_buffer != nullptr, "Trace instance {} must exist on device {}'s active sub-device manager {}", tid, this->id_, this->active_sub_device_manager_id_);
     this->hw_command_queues_[cq_id]->record_end();
     Trace::initialize_buffer(this->command_queue(cq_id), trace_buffer);
     this->MarkAllocationsUnsafe();
@@ -3509,7 +3515,7 @@ void Device::replay_trace(const uint8_t cq_id, const uint32_t tid, const bool bl
     TracyTTMetalReplayTrace(this->id(), tid);
     constexpr bool check = false;
     const auto &trace_buffer = this->active_sub_device_manager_->get_trace(tid);
-    TT_FATAL(trace_buffer != nullptr, "Trace instance {} must exist on device", tid);
+    TT_FATAL(trace_buffer != nullptr, "Trace instance {} must exist on device {}'s active sub-device manager {}", tid, this->id_, this->active_sub_device_manager_id_);
     if constexpr (check) {
         Trace::validate_instance(*trace_buffer);
     }
@@ -3577,39 +3583,24 @@ size_t Device::get_device_kernel_defines_hash() {
     return tt::utils::DefinesHash{}(this->device_kernel_defines_);
 }
 
-const vector_memcpy_aligned<uint32_t>& Device::noc_mcast_data(SubDeviceId sub_device_id) const {
-    return this->active_sub_device_manager_->noc_mcast_data(sub_device_id);
+uint8_t Device::num_noc_mcast_txns(SubDeviceId sub_device_id) const {
+    return this->active_sub_device_manager_->num_noc_mcast_txns(sub_device_id);
 }
 
-const vector_memcpy_aligned<uint32_t>& Device::noc_unicast_data(SubDeviceId sub_device_id) const {
-    return this->active_sub_device_manager_->noc_unicast_data(sub_device_id);
+uint8_t Device::num_noc_unicast_txns(SubDeviceId sub_device_id) const {
+    return this->active_sub_device_manager_->num_noc_unicast_txns(sub_device_id);
 }
 
-const vector_memcpy_aligned<uint32_t>& Device::noc_mcast_unicast_data(SubDeviceId sub_device_id, bool mcast_data, bool unicast_data) const {
-    // Needed for compatibility with tests that create programs with no kernels
-    static const vector_memcpy_aligned<uint32_t> empty = {};
-    if (mcast_data && unicast_data) {
-        return this->active_sub_device_manager_->noc_mcast_unicast_data(sub_device_id);
-    } else if (mcast_data) {
-        return this->active_sub_device_manager_->noc_mcast_data(sub_device_id);
+uint8_t Device::noc_data_start_index(SubDeviceId sub_device_id, bool mcast_data, bool unicast_data) const {
+    if (mcast_data) {
+        return this->active_sub_device_manager_->noc_mcast_data_start_index(sub_device_id);
     } else if (unicast_data) {
-        return this->active_sub_device_manager_->noc_unicast_data(sub_device_id);
+        return this->active_sub_device_manager_->noc_unicast_data_start_index(sub_device_id);
     } else {
-        return empty;
+        return 0;
     }
 }
 
-uint32_t Device::num_noc_mcast_txns(SubDeviceId sub_device_id) const {
-    return this->noc_mcast_data(sub_device_id).size() / 2;
-}
-uint32_t Device::num_noc_unicast_txns(SubDeviceId sub_device_id) const {
-    return this->noc_unicast_data(sub_device_id).size();
-}
-
-uint32_t Device::num_noc_mcast_unicast_txns(SubDeviceId sub_device_id, bool mcast_data, bool unicast_data) const {
-    return (mcast_data ? this->num_noc_mcast_txns(sub_device_id) : 0) + (unicast_data ? this->num_noc_unicast_txns(sub_device_id) : 0);
-}
-
 LaunchMessageRingBufferState& Device::get_worker_launch_message_buffer_state(SubDeviceId sub_device_id) {
     return this->active_sub_device_manager_->get_worker_launch_message_buffer_state(sub_device_id);
 }
@@ -3631,35 +3622,32 @@ SubDeviceManagerId Device::get_default_sub_device_manager_id() const {
 }
 
 SubDeviceManagerId Device::create_sub_device_manager(tt::stl::Span<const SubDevice> sub_devices, DeviceAddr local_l1_size) {
-    TT_FATAL(!this->using_slow_dispatch(), "Using sub device managers is unsupported with slow dispatch");
     auto [sub_device_manager, _] = this->sub_device_managers_.insert_or_assign(this->get_next_sub_device_manager_id(), std::make_unique<detail::SubDeviceManager>(sub_devices, local_l1_size, this));
     return sub_device_manager->first;
 }
 
 void Device::load_sub_device_manager(SubDeviceManagerId sub_device_manager_id) {
+    TT_FATAL(!this->using_slow_dispatch(), "Using sub device managers is unsupported with slow dispatch");
     if (this->active_sub_device_manager_id_ == sub_device_manager_id) {
         return;
     }
+    if (this->active_sub_device_manager_id_ != this->default_sub_device_manager_id_) {
+        TT_FATAL(!this->active_sub_device_manager_->has_allocations(), "Cannot switch sub device managers while sub devices still have local allocations");
+    }
     auto sub_device_manager = this->sub_device_managers_.find(sub_device_manager_id);
     TT_FATAL(sub_device_manager != this->sub_device_managers_.end(), "Sub device manager does not exist");
     this->reset_sub_devices_state(sub_device_manager->second);
+    const auto& global_allocator = this->get_initialized_allocator();
+    allocator::reset_allocator_size(*global_allocator, BufferType::L1);
     // Shrink the global allocator size to make room for sub-device allocators
     auto local_l1_size = sub_device_manager->second->local_l1_size();
-    allocator::shrink_allocator_size(*this->get_initialized_allocator(), BufferType::L1, local_l1_size, true);
+    allocator::shrink_allocator_size(*global_allocator, BufferType::L1, local_l1_size, true);
     this->active_sub_device_manager_id_ = sub_device_manager_id;
     this->active_sub_device_manager_ = sub_device_manager->second.get();
 }
 
 void Device::clear_loaded_sub_device_manager() {
-    if (this->active_sub_device_manager_id_ == this->default_sub_device_manager_id_) {
-        return;
-    }
-    TT_FATAL(!this->active_sub_device_manager_->has_allocations(), "Cannot clear active sub device manager {} since it has allocations", this->active_sub_device_manager_id_);
-    auto &default_manager = this->sub_device_managers_.at(this->default_sub_device_manager_id_);
-    this->reset_sub_devices_state(default_manager);
-    allocator::reset_allocator_size(*this->get_initialized_allocator(), BufferType::L1);
-    this->active_sub_device_manager_id_ = this->default_sub_device_manager_id_;
-    this->active_sub_device_manager_ = default_manager.get();
+    this->load_sub_device_manager(this->default_sub_device_manager_id_);
 }
 
 void Device::remove_sub_device_manager(SubDeviceManagerId sub_device_manager_id) {
@@ -3672,6 +3660,10 @@ void Device::remove_sub_device_manager(SubDeviceManagerId sub_device_manager_id)
     this->sub_device_managers_.erase(sub_device_manager);
 }
 
+const std::vector<SubDeviceId> &Device::get_sub_device_ids() const {
+    return this->active_sub_device_manager_->get_sub_device_ids();
+}
+
 }  // namespace tt_metal
 
 }  // namespace tt
diff --git a/tt_metal/impl/device/device.hpp b/tt_metal/impl/device/device.hpp
index 36058b11f82..add6a9535f2 100644
--- a/tt_metal/impl/device/device.hpp
+++ b/tt_metal/impl/device/device.hpp
@@ -369,12 +369,9 @@ class Device {
     NOC dispatch_go_signal_noc() const;
     size_t get_device_kernel_defines_hash();
 
-    const vector_memcpy_aligned<uint32_t>& noc_mcast_data(SubDeviceId sub_device_id) const;
-    const vector_memcpy_aligned<uint32_t>& noc_unicast_data(SubDeviceId sub_device_id) const;
-    const vector_memcpy_aligned<uint32_t>& noc_mcast_unicast_data(SubDeviceId sub_device_id, bool mcast_data=true, bool unicast_data=true) const;
-    uint32_t num_noc_mcast_txns(SubDeviceId sub_device_id) const;
-    uint32_t num_noc_unicast_txns(SubDeviceId sub_device_id) const;
-    uint32_t num_noc_mcast_unicast_txns(SubDeviceId sub_device_id, bool mcast_data=true, bool unicast_data=true) const;
+    uint8_t num_noc_mcast_txns(SubDeviceId sub_device_id) const;
+    uint8_t num_noc_unicast_txns(SubDeviceId sub_device_id) const;
+    uint8_t noc_data_start_index(SubDeviceId sub_device_id, bool mcast_data=true, bool unicast_data=true) const;
 
     LaunchMessageRingBufferState& get_worker_launch_message_buffer_state(SubDeviceId sub_device_id);
 
@@ -384,6 +381,7 @@ class Device {
     void load_sub_device_manager(SubDeviceManagerId sub_device_manager_id);
     void clear_loaded_sub_device_manager();
     void remove_sub_device_manager(SubDeviceManagerId sub_device_manager_id);
+    const std::vector<SubDeviceId> &get_sub_device_ids() const;
    private:
     void initialize_default_sub_device_state(size_t l1_small_size, size_t trace_region_size, const std::vector<uint32_t> &l1_bank_remap);
     SubDeviceManagerId get_next_sub_device_manager_id();
diff --git a/tt_metal/impl/dispatch/command_queue.cpp b/tt_metal/impl/dispatch/command_queue.cpp
index 2faa7221f58..f2b38ed31f7 100644
--- a/tt_metal/impl/dispatch/command_queue.cpp
+++ b/tt_metal/impl/dispatch/command_queue.cpp
@@ -76,7 +76,8 @@ EnqueueReadBufferCommand::EnqueueReadBufferCommand(
     Buffer& buffer,
     void* dst,
     SystemMemoryManager& manager,
-    tt::stl::Span<const std::pair<uint32_t, uint32_t>> expected_num_workers_completed,
+    tt::stl::Span<const uint32_t> expected_num_workers_completed,
+    tt::stl::Span<const SubDeviceId> sub_device_ids,
     uint32_t src_page_index,
     std::optional<uint32_t> pages_to_read) :
     command_queue_id(command_queue_id),
@@ -85,6 +86,7 @@ EnqueueReadBufferCommand::EnqueueReadBufferCommand(
     manager(manager),
     buffer(buffer),
     expected_num_workers_completed(expected_num_workers_completed),
+    sub_device_ids(sub_device_ids),
     src_page_index(src_page_index),
     pages_to_read(pages_to_read.has_value() ? pages_to_read.value() : buffer.num_pages()) {
     TT_ASSERT(buffer.is_dram() or buffer.is_l1(), "Trying to read an invalid buffer");
@@ -110,7 +112,7 @@ void EnqueueReadShardedBufferCommand::add_prefetch_relay(HugepageDeviceCommand&
 }
 
 void EnqueueReadBufferCommand::process() {
-    uint32_t num_worker_counters = this->expected_num_workers_completed.size();
+    uint32_t num_worker_counters = this->sub_device_ids.size();
     // accounts for padding
     uint32_t cmd_sequence_sizeB =
         CQ_PREFETCH_CMD_BARE_MIN_SIZE * num_worker_counters +  // CQ_PREFETCH_CMD_RELAY_INLINE + CQ_DISPATCH_CMD_WAIT
@@ -126,16 +128,16 @@ void EnqueueReadBufferCommand::process() {
     uint32_t last_index = num_worker_counters - 1;
     // We only need the write barrier + prefetch stall for the last wait cmd
     for (uint32_t i = 0; i < last_index; ++i) {
-        auto [offset_index, workers_completed] = this->expected_num_workers_completed[i];
+        auto offset_index = this->sub_device_ids[i].to_index();
         uint32_t dispatch_message_addr = dispatch_message_base_addr + dispatch_constants::get(dispatch_core_type).get_dispatch_message_offset(offset_index);
         command_sequence.add_dispatch_wait(
-            false, dispatch_message_addr, workers_completed);
+            false, dispatch_message_addr, this->expected_num_workers_completed[offset_index ]);
 
     }
-    auto [offset_index, workers_completed] = this->expected_num_workers_completed[last_index];
+    auto offset_index = this->sub_device_ids[last_index].to_index();
     uint32_t dispatch_message_addr = dispatch_message_base_addr + dispatch_constants::get(dispatch_core_type).get_dispatch_message_offset(offset_index);
     command_sequence.add_dispatch_wait_with_prefetch_stall(
-        true, dispatch_message_addr, workers_completed);
+        true, dispatch_message_addr, this->expected_num_workers_completed[offset_index]);
 
     uint32_t padded_page_size = this->buffer.aligned_page_size();
     bool flush_prefetch = false;
@@ -160,7 +162,8 @@ EnqueueWriteBufferCommand::EnqueueWriteBufferCommand(
     const void* src,
     SystemMemoryManager& manager,
     bool issue_wait,
-    tt::stl::Span<const std::pair<uint32_t, uint32_t>> expected_num_workers_completed,
+    tt::stl::Span<const uint32_t> expected_num_workers_completed,
+    tt::stl::Span<const SubDeviceId> sub_device_ids,
     uint32_t bank_base_address,
     uint32_t padded_page_size,
     uint32_t dst_page_index,
@@ -172,6 +175,7 @@ EnqueueWriteBufferCommand::EnqueueWriteBufferCommand(
     src(src),
     buffer(buffer),
     expected_num_workers_completed(expected_num_workers_completed),
+    sub_device_ids(sub_device_ids),
     bank_base_address(bank_base_address),
     padded_page_size(padded_page_size),
     dst_page_index(dst_page_index),
@@ -284,7 +288,7 @@ void EnqueueWriteShardedBufferCommand::add_buffer_data(HugepageDeviceCommand& co
 }
 
 void EnqueueWriteBufferCommand::process() {
-    uint32_t num_worker_counters = this->expected_num_workers_completed.size();
+    uint32_t num_worker_counters = this->sub_device_ids.size();
     uint32_t data_size_bytes = this->pages_to_write * this->padded_page_size;
 
     uint32_t cmd_sequence_sizeB =
@@ -302,10 +306,10 @@ void EnqueueWriteBufferCommand::process() {
     if (this->issue_wait) {
         uint32_t dispatch_message_base_addr = dispatch_constants::get(
             this->dispatch_core_type).get_device_command_queue_addr(CommandQueueDeviceAddrType::DISPATCH_MESSAGE);
-        for (uint32_t i = 0; i < num_worker_counters; ++i) {
-            auto [offset_index, workers_completed] = this->expected_num_workers_completed[i];
+        for (const auto &sub_device_id : this->sub_device_ids) {
+            auto offset_index = sub_device_id.to_index();
             uint32_t dispatch_message_addr = dispatch_message_base_addr + dispatch_constants::get(dispatch_core_type).get_dispatch_message_offset(offset_index);
-            command_sequence.add_dispatch_wait(false, dispatch_message_addr, workers_completed);
+            command_sequence.add_dispatch_wait(false, dispatch_message_addr, this->expected_num_workers_completed[offset_index]);
         }
     }
 
@@ -1101,13 +1105,10 @@ void EnqueueProgramCommand::assemble_device_commands(
     cmd_sequence_sizeB += (this->device->dispatch_s_enabled() || program_transfer_info.num_active_cores > 0) * CQ_PREFETCH_CMD_BARE_MIN_SIZE;
 
     // either dispatch_s or dispatch_d will send the go signal (go_signal_mcast command)
-    const auto& noc_mcast_unicast_data = device->noc_mcast_unicast_data(this->sub_device_id, multicast_go_signal_sub_cmds.size() > 0,  unicast_go_signal_sub_cmds.size() > 0);
+    const auto& noc_data_start_idx = device->noc_data_start_index(this->sub_device_id, multicast_go_signal_sub_cmds.size() > 0,  unicast_go_signal_sub_cmds.size() > 0);
     const auto& num_noc_mcast_txns = multicast_go_signal_sub_cmds.size() > 0 ? device->num_noc_mcast_txns(this->sub_device_id) : 0;
     const auto& num_noc_unicast_txns = unicast_go_signal_sub_cmds.size() > 0 ? device->num_noc_unicast_txns(this->sub_device_id) : 0;
-    cmd_sequence_sizeB += align(
-        sizeof(CQPrefetchCmd) + sizeof(CQDispatchCmd) +
-        noc_mcast_unicast_data.size() * sizeof(uint32_t),
-        pcie_alignment);
+    cmd_sequence_sizeB += align(sizeof(CQPrefetchCmd) + sizeof(CQDispatchCmd), pcie_alignment);
 
     program_command_sequence.device_command_sequence = HostMemDeviceCommand(cmd_sequence_sizeB);
 
@@ -1286,10 +1287,11 @@ void EnqueueProgramCommand::assemble_device_commands(
     }
 
     DispatcherSelect dispatcher_for_go_signal = DispatcherSelect::DISPATCH_MASTER;
+    auto sub_device_index = this->sub_device_id.to_index();
     if (this->device->dispatch_s_enabled()) {
         // dispatch_d signals dispatch_s to send the go signal, use a barrier if there are cores active
         uint16_t index_bitmask = 0;
-        index_bitmask |= 1 << this->sub_device_id.to_index();
+        index_bitmask |= 1 << sub_device_index;
         device_command_sequence.add_notify_dispatch_s_go_signal_cmd(program_transfer_info.num_active_cores > 0, index_bitmask);
         dispatcher_for_go_signal = DispatcherSelect::DISPATCH_SLAVE;
     } else {
@@ -1302,9 +1304,9 @@ void EnqueueProgramCommand::assemble_device_commands(
     run_program_go_signal.signal = RUN_MSG_GO;
     run_program_go_signal.master_x = (uint8_t)this->dispatch_core.x;
     run_program_go_signal.master_y = (uint8_t)this->dispatch_core.y;
-    run_program_go_signal.dispatch_message_offset = (uint8_t)dispatch_constants::get(this->dispatch_core_type).get_dispatch_message_offset(this->sub_device_id.to_index());
+    run_program_go_signal.dispatch_message_offset = (uint8_t)dispatch_constants::get(this->dispatch_core_type).get_dispatch_message_offset(sub_device_index);
     uint32_t write_offset_bytes = device_command_sequence.write_offset_bytes();
-    device_command_sequence.add_dispatch_go_signal_mcast(this->expected_num_workers_completed, *reinterpret_cast<uint32_t*>(&run_program_go_signal), this->dispatch_message_addr, num_noc_mcast_txns, num_noc_unicast_txns, noc_mcast_unicast_data, dispatcher_for_go_signal);
+    device_command_sequence.add_dispatch_go_signal_mcast(this->expected_num_workers_completed, *reinterpret_cast<uint32_t*>(&run_program_go_signal), this->dispatch_message_addr, num_noc_mcast_txns, num_noc_unicast_txns, noc_data_start_idx, dispatcher_for_go_signal);
     program_command_sequence.mcast_go_signal_cmd_ptr = &((CQDispatchCmd*) ((uint32_t*)device_command_sequence.data() + (write_offset_bytes + sizeof(CQPrefetchCmd)) / sizeof(uint32_t)))->mcast;
 }
 
@@ -1601,7 +1603,8 @@ EnqueueRecordEventCommand::EnqueueRecordEventCommand(
     NOC noc_index,
     SystemMemoryManager& manager,
     uint32_t event_id,
-    tt::stl::Span<const std::pair<uint32_t, uint32_t>> expected_num_workers_completed,
+    tt::stl::Span<const uint32_t> expected_num_workers_completed,
+    tt::stl::Span<const SubDeviceId> sub_device_ids,
     bool clear_count,
     bool write_barrier) :
     command_queue_id(command_queue_id),
@@ -1610,6 +1613,7 @@ EnqueueRecordEventCommand::EnqueueRecordEventCommand(
     manager(manager),
     event_id(event_id),
     expected_num_workers_completed(expected_num_workers_completed),
+    sub_device_ids(sub_device_ids),
     clear_count(clear_count),
     write_barrier(write_barrier) {}
 
@@ -1625,7 +1629,7 @@ void EnqueueRecordEventCommand::process() {
         align(sizeof(CQDispatchCmd) + num_hw_cqs * sizeof(CQDispatchWritePackedUnicastSubCmd), l1_alignment) +
         (align(dispatch_constants::EVENT_PADDED_SIZE, l1_alignment) * num_hw_cqs);
     uint32_t packed_write_sizeB = align(sizeof(CQPrefetchCmd) + packed_event_payload_sizeB, pcie_alignment);
-    uint32_t num_worker_counters = this->expected_num_workers_completed.size();
+    uint32_t num_worker_counters = this->sub_device_ids.size();
 
     uint32_t cmd_sequence_sizeB =
         CQ_PREFETCH_CMD_BARE_MIN_SIZE * num_worker_counters +  // CQ_PREFETCH_CMD_RELAY_INLINE + CQ_DISPATCH_CMD_WAIT
@@ -1646,16 +1650,16 @@ void EnqueueRecordEventCommand::process() {
     uint32_t last_index = num_worker_counters - 1;
     // We only need the write barrier for the last wait cmd
     for (uint32_t i = 0; i < last_index; ++i) {
-        auto [offset_index, workers_completed] = this->expected_num_workers_completed[i];
+        auto offset_index = this->sub_device_ids[i].to_index();
         uint32_t dispatch_message_addr = dispatch_message_base_addr + dispatch_constants::get(dispatch_core_type).get_dispatch_message_offset(offset_index);
         command_sequence.add_dispatch_wait(
-            false, dispatch_message_addr, workers_completed, this->clear_count);
+            false, dispatch_message_addr, this->expected_num_workers_completed[offset_index], this->clear_count);
 
     }
-    auto [offset_index, workers_completed] = this->expected_num_workers_completed[last_index];
+    auto offset_index = this->sub_device_ids[last_index].to_index();
     uint32_t dispatch_message_addr = dispatch_message_base_addr + dispatch_constants::get(dispatch_core_type).get_dispatch_message_offset(offset_index);
     command_sequence.add_dispatch_wait(
-            this->write_barrier, dispatch_message_addr, workers_completed, this->clear_count);
+            this->write_barrier, dispatch_message_addr, this->expected_num_workers_completed[offset_index], this->clear_count);
 
     CoreType core_type = dispatch_core_manager::instance().get_dispatch_core_type(this->device->id());
     uint16_t channel = tt::Cluster::instance().get_assigned_channel_for_device(this->device->id());
@@ -1759,17 +1763,9 @@ EnqueueTraceCommand::EnqueueTraceCommand(
 
 void EnqueueTraceCommand::process() {
     uint32_t num_sub_devices = descriptor->descriptors.size();
-    uint32_t go_signals_cmd_size = 0;
     uint32_t pcie_alignment = hal.get_alignment(HalMemType::HOST);
-    for (const auto& [index, desc] : descriptor->descriptors) {
-        uint32_t go_signal_cmd_size = sizeof(CQPrefetchCmd) + sizeof(CQDispatchCmd);
-        go_signal_cmd_size += device->noc_mcast_unicast_data(
-                                        SubDeviceId{index},
-                                        desc.num_traced_programs_needing_go_signal_multicast,
-                                        desc.num_traced_programs_needing_go_signal_unicast)
-                                        .size() * sizeof(uint32_t);
-        go_signals_cmd_size += align(go_signal_cmd_size, pcie_alignment);
-    }
+    uint32_t go_signals_cmd_size = align(sizeof(CQPrefetchCmd) + sizeof(CQDispatchCmd), pcie_alignment) * descriptor->descriptors.size();
+
     uint32_t cmd_sequence_sizeB =
         this->device->dispatch_s_enabled() * CQ_PREFETCH_CMD_BARE_MIN_SIZE + // dispatch_d -> dispatch_s sem update (send only if dispatch_s is running)
         go_signals_cmd_size +  // go signal cmd
@@ -1801,39 +1797,42 @@ void EnqueueTraceCommand::process() {
     reset_launch_message_read_ptr_go_signal.master_x = (uint8_t)this->dispatch_core.x;
     reset_launch_message_read_ptr_go_signal.master_y = (uint8_t)this->dispatch_core.y;
     for (const auto& [id, desc] : descriptor->descriptors) {
+        const auto& noc_data_start_idx = device->noc_data_start_index(id, desc.num_traced_programs_needing_go_signal_multicast, desc.num_traced_programs_needing_go_signal_unicast);
         const auto& num_noc_mcast_txns = desc.num_traced_programs_needing_go_signal_multicast ? device->num_noc_mcast_txns(id) : 0;
         const auto& num_noc_unicast_txns = desc.num_traced_programs_needing_go_signal_unicast ? device->num_noc_unicast_txns(id) : 0;
         reset_launch_message_read_ptr_go_signal.dispatch_message_offset = (uint8_t)dispatch_constants::get(dispatch_core_type).get_dispatch_message_offset(id.to_index());
         uint32_t dispatch_message_addr = dispatch_message_base_addr + dispatch_constants::get(dispatch_core_type).get_dispatch_message_offset(id.to_index());
+        auto index = id.to_index();
         // Wait to ensure that all kernels have completed. Then send the reset_rd_ptr go_signal.
         command_sequence.add_dispatch_go_signal_mcast(
-            this->expected_num_workers_completed[id.to_index()],
+            this->expected_num_workers_completed[index],
             *reinterpret_cast<uint32_t*>(&reset_launch_message_read_ptr_go_signal),
             dispatch_message_addr,
             num_noc_mcast_txns,
             num_noc_unicast_txns,
-            device->noc_mcast_unicast_data(id, desc.num_traced_programs_needing_go_signal_multicast, desc.num_traced_programs_needing_go_signal_unicast),
+            noc_data_start_idx,
             dispatcher_for_go_signal);
         if (desc.num_traced_programs_needing_go_signal_multicast) {
-            this->expected_num_workers_completed[id.to_index()] += device->num_worker_cores(HalProgrammableCoreType::TENSIX, id);
+            this->expected_num_workers_completed[index] += device->num_worker_cores(HalProgrammableCoreType::TENSIX, id);
         }
         if (desc.num_traced_programs_needing_go_signal_unicast) {
-            this->expected_num_workers_completed[id.to_index()] += device->num_worker_cores(HalProgrammableCoreType::ACTIVE_ETH, id);
+            this->expected_num_workers_completed[index] += device->num_worker_cores(HalProgrammableCoreType::ACTIVE_ETH, id);
         }
     }
     // Wait to ensure that all workers have reset their read_ptr. dispatch_d will stall until all workers have completed this step, before sending kernel config data to workers
     // or notifying dispatch_s that its safe to send the go_signal.
     // Clear the dispatch <--> worker semaphore, since trace starts at 0.
     for (const auto &id : descriptor->sub_device_ids) {
-        uint32_t dispatch_message_addr = dispatch_message_base_addr + dispatch_constants::get(dispatch_core_type).get_dispatch_message_offset(id.to_index());
+        auto index = id.to_index();
+        uint32_t dispatch_message_addr = dispatch_message_base_addr + dispatch_constants::get(dispatch_core_type).get_dispatch_message_offset(index);
         if (this->device->distributed_dispatcher()) {
             command_sequence.add_dispatch_wait(
-                false, dispatch_message_addr, this->expected_num_workers_completed[id.to_index()], this->clear_count, false, true, 1);
+                false, dispatch_message_addr, this->expected_num_workers_completed[index], this->clear_count, false, true, 1);
         }
         command_sequence.add_dispatch_wait(
-            false, dispatch_message_addr, this->expected_num_workers_completed[id.to_index()], this->clear_count);
+            false, dispatch_message_addr, this->expected_num_workers_completed[index], this->clear_count);
         if (this->clear_count) {
-            this->expected_num_workers_completed[id.to_index()] = 0;
+            this->expected_num_workers_completed[index] = 0;
         }
     }
 
@@ -1958,16 +1957,24 @@ void HWCommandQueue::set_num_worker_sems_on_dispatch(uint32_t num_worker_sems) {
     this->manager.fetch_queue_write(cmd_sequence_sizeB, this->id);
 }
 
+void HWCommandQueue::set_go_signal_noc_data_on_dispatch(const vector_memcpy_aligned<uint32_t>& go_signal_noc_data) {
+    uint32_t pci_alignment = hal.get_alignment(HalMemType::HOST);
+    uint32_t cmd_sequence_sizeB = align(sizeof(CQPrefetchCmd) + sizeof(CQDispatchCmd) + go_signal_noc_data.size() * sizeof(uint32_t), pci_alignment);
+    void* cmd_region = this->manager.issue_queue_reserve(cmd_sequence_sizeB, this->id);
+    HugepageDeviceCommand command_sequence(cmd_region, cmd_sequence_sizeB);
+    DispatcherSelect dispatcher_for_go_signal = this->device->dispatch_s_enabled() ? DispatcherSelect::DISPATCH_SLAVE : DispatcherSelect::DISPATCH_MASTER;
+    command_sequence.add_dispatch_set_go_signal_noc_data(go_signal_noc_data, dispatcher_for_go_signal);
+    this->manager.issue_queue_push_back(cmd_sequence_sizeB, this->id);
+    this->manager.fetch_queue_reserve_back(this->id);
+    this->manager.fetch_queue_write(cmd_sequence_sizeB, this->id);
+}
+
 void HWCommandQueue::reset_worker_state(bool reset_launch_msg_state) {
     auto num_sub_devices = device->num_sub_devices();
     uint32_t go_signals_cmd_size = 0;
     if (reset_launch_msg_state) {
         uint32_t pcie_alignment = hal.get_alignment(HalMemType::HOST);
-        for (uint8_t i = 0; i < num_sub_devices; ++i) {
-            uint32_t go_signal_cmd_size = sizeof(CQPrefetchCmd) + sizeof(CQDispatchCmd);
-            go_signal_cmd_size += device->noc_mcast_unicast_data(SubDeviceId{i}).size() * sizeof(uint32_t);
-            go_signals_cmd_size += align(go_signal_cmd_size, pcie_alignment);
-        }
+        go_signals_cmd_size = align(sizeof(CQPrefetchCmd) + sizeof(CQDispatchCmd), pcie_alignment) * num_sub_devices;
     }
     uint32_t cmd_sequence_sizeB =
         reset_launch_msg_state * this->device->dispatch_s_enabled() * CQ_PREFETCH_CMD_BARE_MIN_SIZE + // dispatch_d -> dispatch_s sem update (send only if dispatch_s is running)
@@ -2001,7 +2008,7 @@ void HWCommandQueue::reset_worker_state(bool reset_launch_msg_state) {
             reset_launch_message_read_ptr_go_signal.dispatch_message_offset = (uint8_t)dispatch_constants::get(dispatch_core_type).get_dispatch_message_offset(i);
             uint32_t dispatch_message_addr = dispatch_message_base_addr + dispatch_constants::get(dispatch_core_type).get_dispatch_message_offset(i);
             // Wait to ensure that all kernels have completed. Then send the reset_rd_ptr go_signal.
-            command_sequence.add_dispatch_go_signal_mcast(expected_num_workers_completed[i], *reinterpret_cast<uint32_t*>(&reset_launch_message_read_ptr_go_signal), dispatch_message_addr, device->num_noc_mcast_txns({i}), device->num_noc_unicast_txns({i}), device->noc_mcast_unicast_data({i}), dispatcher_for_go_signal);
+            command_sequence.add_dispatch_go_signal_mcast(expected_num_workers_completed[i], *reinterpret_cast<uint32_t*>(&reset_launch_message_read_ptr_go_signal), dispatch_message_addr, device->num_noc_mcast_txns({i}), device->num_noc_unicast_txns({i}), device->noc_data_start_index({i}), dispatcher_for_go_signal);
             expected_num_workers_completed[i] += device->num_worker_cores(HalProgrammableCoreType::TENSIX, {i});
             expected_num_workers_completed[i] += device->num_worker_cores(HalProgrammableCoreType::ACTIVE_ETH, {i});
         }
@@ -2090,7 +2097,9 @@ void HWCommandQueue::enqueue_read_buffer(Buffer& buffer, void* dst, bool blockin
     uint32_t unpadded_dst_offset = 0;
     uint32_t src_page_index = 0;
 
-    auto expected_workers_completed = this->get_expected_workers_completed(sub_device_ids);
+    if (sub_device_ids.empty()) {
+        sub_device_ids = tt::stl::Span<const SubDeviceId>(this->device->get_sub_device_ids());
+    }
 
     if (is_sharded(buffer.buffer_layout())) {
         const bool width_split = buffer.shard_spec().shape_in_pages()[1] != buffer.shard_spec().tensor2d_shape[1];
@@ -2135,7 +2144,8 @@ void HWCommandQueue::enqueue_read_buffer(Buffer& buffer, void* dst, bool blockin
                     buffer,
                     dst,
                     this->manager,
-                    expected_workers_completed,
+                    this->expected_num_workers_completed,
+                    sub_device_ids,
                     cores[core_id],
                     bank_base_address,
                     src_page_index,
@@ -2169,7 +2179,8 @@ void HWCommandQueue::enqueue_read_buffer(Buffer& buffer, void* dst, bool blockin
             buffer,
             dst,
             this->manager,
-            expected_workers_completed,
+            this->expected_num_workers_completed,
+            sub_device_ids,
             src_page_index,
             pages_to_read);
 
@@ -2228,7 +2239,9 @@ void HWCommandQueue::enqueue_write_buffer(Buffer& buffer, const void* src, bool
 
     uint32_t dst_page_index = 0;
 
-    auto expected_workers_completed = this->get_expected_workers_completed(sub_device_ids);
+    if (sub_device_ids.empty()) {
+        sub_device_ids = tt::stl::Span<const SubDeviceId>(this->device->get_sub_device_ids());
+    }
 
     if (is_sharded(buffer.buffer_layout())) {
         const bool width_split = buffer.shard_spec().shape_in_pages()[1] != buffer.shard_spec().tensor2d_shape[1];
@@ -2297,7 +2310,8 @@ void HWCommandQueue::enqueue_write_buffer(Buffer& buffer, const void* src, bool
                         src,
                         this->manager,
                         issue_wait,
-                        expected_workers_completed,
+                        this->expected_num_workers_completed,
+                        sub_device_ids,
                         address,
                         buffer_page_mapping,
                         cores[core_id],
@@ -2388,7 +2402,8 @@ void HWCommandQueue::enqueue_write_buffer(Buffer& buffer, const void* src, bool
                 src,
                 this->manager,
                 issue_wait,
-                expected_workers_completed,
+                this->expected_num_workers_completed,
+                sub_device_ids,
                 bank_base_address,
                 page_size_to_write,
                 dst_page_index,
@@ -2435,10 +2450,11 @@ void HWCommandQueue::enqueue_program(Program& program, bool blocking) {
     }
 #endif
     auto sub_device_id = sub_device_ids[0];
+    auto sub_device_index = sub_device_id.to_index();
 
     // Snapshot of expected workers from previous programs, used for dispatch_wait cmd generation.
     uint32_t expected_workers_completed = this->manager.get_bypass_mode() ? this->trace_ctx->descriptors[sub_device_id].num_completion_worker_cores
-                                                                          : this->expected_num_workers_completed[sub_device_id.to_index()];
+                                                                          : this->expected_num_workers_completed[sub_device_index];
     if (this->manager.get_bypass_mode()) {
         if (program.runs_on_noc_multicast_only_cores()) {
             this->trace_ctx->descriptors[sub_device_id].num_traced_programs_needing_go_signal_multicast++;
@@ -2450,10 +2466,10 @@ void HWCommandQueue::enqueue_program(Program& program, bool blocking) {
         }
     } else {
         if (program.runs_on_noc_multicast_only_cores()) {
-            this->expected_num_workers_completed[sub_device_id.to_index()] += device->num_worker_cores(HalProgrammableCoreType::TENSIX, sub_device_id);
+            this->expected_num_workers_completed[sub_device_index] += device->num_worker_cores(HalProgrammableCoreType::TENSIX, sub_device_id);
         }
         if (program.runs_on_noc_unicast_only_cores()) {
-            this->expected_num_workers_completed[sub_device_id.to_index()] += device->num_worker_cores(HalProgrammableCoreType::ACTIVE_ETH, sub_device_id);
+            this->expected_num_workers_completed[sub_device_index] += device->num_worker_cores(HalProgrammableCoreType::ACTIVE_ETH, sub_device_id);
         }
     }
 
@@ -2465,7 +2481,7 @@ void HWCommandQueue::enqueue_program(Program& program, bool blocking) {
         program,
         this->physical_enqueue_program_dispatch_core,
         this->manager,
-        this->get_config_buffer_mgr(sub_device_id),
+        this->get_config_buffer_mgr(sub_device_index),
         expected_workers_completed,
         // The assembled program command will encode the location of the launch messages in the ring buffer
         worker_launch_message_buffer_state.get_mcast_wptr(),
@@ -2514,7 +2530,9 @@ void HWCommandQueue::enqueue_record_event(const std::shared_ptr<Event>& event, b
     event->device = this->device;
     event->ready = true;  // what does this mean???
 
-    auto expected_workers_completed = this->get_expected_workers_completed(sub_device_ids);
+    if (sub_device_ids.empty()) {
+        sub_device_ids = tt::stl::Span<const SubDeviceId>(this->device->get_sub_device_ids());
+    }
 
     auto command = EnqueueRecordEventCommand(
         this->id,
@@ -2522,14 +2540,15 @@ void HWCommandQueue::enqueue_record_event(const std::shared_ptr<Event>& event, b
         this->noc_index,
         this->manager,
         event->event_id,
-        expected_workers_completed,
+        this->expected_num_workers_completed,
+        sub_device_ids,
         clear_count,
         true);
     this->enqueue_command(command, false, sub_device_ids);
 
     if (clear_count) {
-        for (const auto&[id, _] : expected_workers_completed) {
-            this->expected_num_workers_completed[id] = 0;
+        for (const auto& id : sub_device_ids) {
+            this->expected_num_workers_completed[id.to_index()] = 0;
         }
     }
     this->issued_completion_q_reads.push(
@@ -2558,8 +2577,9 @@ void HWCommandQueue::enqueue_trace(const uint32_t trace_id, bool blocking) {
     this->enqueue_command(command, false, {});
 
     for (const auto& [id, desc]: trace_inst->desc->descriptors) {
-         // Increment the expected worker cores counter due to trace programs completion
-        this->expected_num_workers_completed[id.to_index()] += desc.num_completion_worker_cores;
+        auto index = id.to_index();
+        // Increment the expected worker cores counter due to trace programs completion
+        this->expected_num_workers_completed[index] += desc.num_completion_worker_cores;
         // After trace runs, the rdptr on each worker will be incremented by the number of programs in the trace
         // Update the wptr on host to match state. If the trace doesn't execute on a
         // class of worker (unicast or multicast), it doesn't reset or modify the
@@ -2574,7 +2594,7 @@ void HWCommandQueue::enqueue_trace(const uint32_t trace_id, bool blocking) {
         // The config buffer manager is unaware of what memory is used inside the trace, so mark all memory as used so that
         // it will force a stall and avoid stomping on in-use state.
         // TODO(jbauman): Reuse old state from the trace.
-        this->config_buffer_mgr[id.to_index()].mark_completely_full(this->expected_num_workers_completed[id.to_index()]);
+        this->config_buffer_mgr[index].mark_completely_full(this->expected_num_workers_completed[index]);
     }
     if (blocking) {
         this->finish(trace_inst->desc->sub_device_ids);
@@ -2941,10 +2961,11 @@ void HWCommandQueue::record_end() {
     // Copy the desc keys into a separate vector. When enqueuing traces, we sometimes need to pass sub-device ids separately
     this->trace_ctx->sub_device_ids.reserve(this->trace_ctx->descriptors.size());
     for (const auto& [id, _]: this->trace_ctx->descriptors) {
+        auto index = id.to_index();
         this->trace_ctx->sub_device_ids.push_back(id);
         // config_buffer_mgr reflects the state inside the trace, not on the current device, so reset it.
         // TODO(jbauman): Use a temporary WorkingBufferSetMgr when recording a trace.
-        this->get_config_buffer_mgr(id).mark_completely_full(this->expected_num_workers_completed[id.to_index()]);
+        this->get_config_buffer_mgr(index).mark_completely_full(this->expected_num_workers_completed[index]);
     }
     this->tid = std::nullopt;
     this->trace_ctx = nullptr;
@@ -2959,7 +2980,7 @@ void HWCommandQueue::terminate() {
     this->enqueue_command(command, false, {});
 }
 
-WorkerConfigBufferMgr& HWCommandQueue::get_config_buffer_mgr(SubDeviceId sub_device_id) { return config_buffer_mgr[sub_device_id.to_index()]; }
+WorkerConfigBufferMgr& HWCommandQueue::get_config_buffer_mgr(uint32_t index) { return config_buffer_mgr[index]; }
 
 void HWCommandQueue::reset_config_buffer_mgr(const uint32_t num_entries) {
     for (uint32_t i = 0; i < num_entries; ++i) {
@@ -2977,25 +2998,6 @@ void HWCommandQueue::reset_config_buffer_mgr(const uint32_t num_entries) {
     }
 }
 
-std::vector<std::pair<uint32_t, uint32_t>> HWCommandQueue::get_expected_workers_completed(tt::stl::Span<const SubDeviceId> sub_device_ids) const {
-    std::vector<std::pair<uint32_t, uint32_t>> expected_workers_completed;
-    if (sub_device_ids.empty()) {
-        expected_workers_completed.reserve(this->device->num_sub_devices());
-        for (uint32_t i = 0; i < this->device->num_sub_devices(); ++i) {
-            expected_workers_completed.emplace_back(i, this->expected_num_workers_completed[i]);
-        }
-    } else {
-        expected_workers_completed.reserve(sub_device_ids.size());
-        for (uint32_t i = 0; i < sub_device_ids.size(); ++i) {
-            auto sub_device_id = sub_device_ids[i];
-            TT_FATAL(sub_device_id.to_index() < this->device->num_sub_devices(), "Invalid sub_device_id: {}", sub_device_id.to_index());
-            expected_workers_completed.emplace_back(sub_device_id.to_index(), this->expected_num_workers_completed[sub_device_id.to_index()]);
-        }
-    }
-    return expected_workers_completed;
-}
-
-
 void EnqueueAddBufferToProgramImpl(
     const std::variant<std::reference_wrapper<Buffer>, std::shared_ptr<Buffer>> buffer,
     Program& program) {
diff --git a/tt_metal/impl/dispatch/command_queue.hpp b/tt_metal/impl/dispatch/command_queue.hpp
index 8c471a93eef..2671ed0fb9c 100644
--- a/tt_metal/impl/dispatch/command_queue.hpp
+++ b/tt_metal/impl/dispatch/command_queue.hpp
@@ -79,7 +79,8 @@ class EnqueueReadBufferCommand : public Command {
     Device* device;
     uint32_t command_queue_id;
     NOC noc_index;
-    tt::stl::Span<const std::pair<uint32_t, uint32_t>> expected_num_workers_completed;
+    tt::stl::Span<const uint32_t> expected_num_workers_completed;
+    tt::stl::Span<const SubDeviceId> sub_device_ids;
     uint32_t src_page_index;
     uint32_t pages_to_read;
 
@@ -92,7 +93,8 @@ class EnqueueReadBufferCommand : public Command {
         Buffer& buffer,
         void* dst,
         SystemMemoryManager& manager,
-        tt::stl::Span<const std::pair<uint32_t, uint32_t>> expected_num_workers_completed,
+        tt::stl::Span<const uint32_t> expected_num_workers_completed,
+        tt::stl::Span<const SubDeviceId> sub_device_ids,
         uint32_t src_page_index = 0,
         std::optional<uint32_t> pages_to_read = std::nullopt);
 
@@ -115,7 +117,8 @@ class EnqueueReadInterleavedBufferCommand : public EnqueueReadBufferCommand {
         Buffer& buffer,
         void* dst,
         SystemMemoryManager& manager,
-        tt::stl::Span<const std::pair<uint32_t, uint32_t>> expected_num_workers_completed,
+        tt::stl::Span<const uint32_t> expected_num_workers_completed,
+        tt::stl::Span<const SubDeviceId> sub_device_ids,
         uint32_t src_page_index = 0,
         std::optional<uint32_t> pages_to_read = std::nullopt) :
         EnqueueReadBufferCommand(
@@ -126,6 +129,7 @@ class EnqueueReadInterleavedBufferCommand : public EnqueueReadBufferCommand {
             dst,
             manager,
             expected_num_workers_completed,
+            sub_device_ids,
             src_page_index,
             pages_to_read) {}
 };
@@ -144,7 +148,8 @@ class EnqueueReadShardedBufferCommand : public EnqueueReadBufferCommand {
         Buffer& buffer,
         void* dst,
         SystemMemoryManager& manager,
-        tt::stl::Span<const std::pair<uint32_t, uint32_t>> expected_num_workers_completed,
+        tt::stl::Span<const uint32_t> expected_num_workers_completed,
+        tt::stl::Span<const SubDeviceId> sub_device_ids,
         const CoreCoord& core,
         uint32_t bank_base_address,
         uint32_t src_page_index = 0,
@@ -157,6 +162,7 @@ class EnqueueReadShardedBufferCommand : public EnqueueReadBufferCommand {
             dst,
             manager,
             expected_num_workers_completed,
+            sub_device_ids,
             src_page_index,
             pages_to_read),
         core(core),
@@ -179,7 +185,8 @@ class EnqueueWriteBufferCommand : public Command {
     NOC noc_index;
     const void* src;
     const Buffer& buffer;
-    tt::stl::Span<const std::pair<uint32_t, uint32_t>> expected_num_workers_completed;
+    tt::stl::Span<const uint32_t> expected_num_workers_completed;
+    tt::stl::Span<const SubDeviceId> sub_device_ids;
     uint32_t bank_base_address;
     uint32_t padded_page_size;
     uint32_t dst_page_index;
@@ -195,7 +202,8 @@ class EnqueueWriteBufferCommand : public Command {
         const void* src,
         SystemMemoryManager& manager,
         bool issue_wait,
-        tt::stl::Span<const std::pair<uint32_t, uint32_t>> expected_num_workers_completed,
+        tt::stl::Span<const uint32_t> expected_num_workers_completed,
+        tt::stl::Span<const SubDeviceId> sub_device_ids,
         uint32_t bank_base_address,
         uint32_t padded_page_size,
         uint32_t dst_page_index = 0,
@@ -222,7 +230,8 @@ class EnqueueWriteInterleavedBufferCommand : public EnqueueWriteBufferCommand {
         const void* src,
         SystemMemoryManager& manager,
         bool issue_wait,
-        tt::stl::Span<const std::pair<uint32_t, uint32_t>> expected_num_workers_completed,
+        tt::stl::Span<const uint32_t> expected_num_workers_completed,
+        tt::stl::Span<const SubDeviceId> sub_device_ids,
         uint32_t bank_base_address,
         uint32_t padded_page_size,
         uint32_t dst_page_index = 0,
@@ -236,6 +245,7 @@ class EnqueueWriteInterleavedBufferCommand : public EnqueueWriteBufferCommand {
             manager,
             issue_wait,
             expected_num_workers_completed,
+            sub_device_ids,
             bank_base_address,
             padded_page_size,
             dst_page_index,
@@ -261,7 +271,8 @@ class EnqueueWriteShardedBufferCommand : public EnqueueWriteBufferCommand {
         const void* src,
         SystemMemoryManager& manager,
         bool issue_wait,
-        tt::stl::Span<const std::pair<uint32_t, uint32_t>> expected_num_workers_completed,
+        tt::stl::Span<const uint32_t> expected_num_workers_completed,
+        tt::stl::Span<const SubDeviceId> sub_device_ids,
         uint32_t bank_base_address,
         const std::shared_ptr<const BufferPageMapping>& buffer_page_mapping,
         const CoreCoord& core,
@@ -277,6 +288,7 @@ class EnqueueWriteShardedBufferCommand : public EnqueueWriteBufferCommand {
             manager,
             issue_wait,
             expected_num_workers_completed,
+            sub_device_ids,
             bank_base_address,
             padded_page_size,
             dst_page_index,
@@ -346,7 +358,8 @@ class EnqueueRecordEventCommand : public Command {
     NOC noc_index;
     SystemMemoryManager& manager;
     uint32_t event_id;
-    tt::stl::Span<const std::pair<uint32_t, uint32_t>> expected_num_workers_completed;
+    tt::stl::Span<const uint32_t> expected_num_workers_completed;
+    tt::stl::Span<const SubDeviceId> sub_device_ids;
     bool clear_count;
     bool write_barrier;
 
@@ -357,7 +370,8 @@ class EnqueueRecordEventCommand : public Command {
         NOC noc_index,
         SystemMemoryManager& manager,
         uint32_t event_id,
-        tt::stl::Span<const std::pair<uint32_t, uint32_t>> expected_num_workers_completed,
+        tt::stl::Span<const uint32_t> expected_num_workers_completed,
+        tt::stl::Span<const SubDeviceId> sub_device_ids,
         bool clear_count = false,
         bool write_barrier = true);
 
@@ -511,6 +525,7 @@ class HWCommandQueue {
     void record_begin(const uint32_t tid, std::shared_ptr<detail::TraceDescriptor> ctx);
     void record_end();
     void set_num_worker_sems_on_dispatch(uint32_t num_worker_sems);
+    void set_go_signal_noc_data_on_dispatch(const vector_memcpy_aligned<uint32_t>& go_signal_noc_data);
     void reset_worker_state(bool reset_launch_msg_state);
 
    private:
@@ -571,9 +586,8 @@ class HWCommandQueue {
     void increment_num_entries_in_completion_q();
     void set_exit_condition();
 
-    WorkerConfigBufferMgr& get_config_buffer_mgr(SubDeviceId sub_device_id);
+    WorkerConfigBufferMgr& get_config_buffer_mgr(uint32_t index);
     void reset_config_buffer_mgr(const uint32_t num_entries);
-    std::vector<std::pair<uint32_t, uint32_t>> get_expected_workers_completed(tt::stl::Span<const SubDeviceId> sub_device_ids) const;
 
     friend void EnqueueTraceImpl(CommandQueue& cq, uint32_t trace_id, bool blocking);
     friend void EnqueueProgramImpl(
diff --git a/tt_metal/impl/dispatch/command_queue_interface.hpp b/tt_metal/impl/dispatch/command_queue_interface.hpp
index 40ad90a523b..5be831d9f6e 100644
--- a/tt_metal/impl/dispatch/command_queue_interface.hpp
+++ b/tt_metal/impl/dispatch/command_queue_interface.hpp
@@ -3,6 +3,7 @@
 // SPDX-License-Identifier: Apache-2.0
 
 #pragma once
+#include <climits>
 #include <magic_enum.hpp>
 #include <mutex>
 
@@ -67,8 +68,14 @@ struct dispatch_constants {
     using prefetch_q_entry_type = uint16_t;
 
     static constexpr uint8_t MAX_NUM_HW_CQS = 2;
+    // Currently arbitrary, can be adjusted as needed at the cost of more L1 memory
     static constexpr uint32_t DISPATCH_MESSAGE_ENTRIES = 16;
     static constexpr uint32_t DISPATCH_MESSAGES_MAX_OFFSET = std::numeric_limits<decltype(go_msg_t::dispatch_message_offset)>::max();
+    static_assert(dispatch_constants::DISPATCH_MESSAGE_ENTRIES <= sizeof(decltype(CQDispatchCmd::notify_dispatch_s_go_signal.index_bitmask)) * CHAR_BIT);
+    // Currently arbitrary, can be adjusted as needed at the cost of more static memory
+    static constexpr uint32_t DISPATCH_GO_SIGNAL_NOC_DATA_ENTRIES = 64;
+    static constexpr uint32_t GO_SIGNAL_BITS_PER_TXN_TYPE = 4;
+    static constexpr uint32_t GO_SIGNAL_MAX_TXNS_PER_TYPE = 1 << GO_SIGNAL_BITS_PER_TXN_TYPE - 1;
 
     static constexpr uint32_t PREFETCH_Q_LOG_MINSIZE = 4;
 
diff --git a/tt_metal/impl/dispatch/cq_commands.hpp b/tt_metal/impl/dispatch/cq_commands.hpp
index aa30a0ba85e..6de0c87fd2e 100644
--- a/tt_metal/impl/dispatch/cq_commands.hpp
+++ b/tt_metal/impl/dispatch/cq_commands.hpp
@@ -50,6 +50,7 @@ enum CQDispatchCmdId : uint8_t {
     CQ_DISPATCH_CMD_SEND_GO_SIGNAL = 15,
     CQ_DISPATCH_NOTIFY_SLAVE_GO_SIGNAL = 16,
     CQ_DISPATCH_SET_NUM_WORKER_SEMS = 17,
+    CQ_DISPATCH_SET_GO_SIGNAL_NOC_DATA = 18,
     CQ_DISPATCH_CMD_MAX_COUNT,              // for checking legal IDs
 };
 
@@ -259,8 +260,9 @@ struct CQDispatchSetUnicastOnlyCoresCmd {
 
 struct CQDispatchGoSignalMcastCmd {
     uint32_t go_signal;
-    uint8_t num_mcast_txns; // Cmd expects noc_mcast_coords and num_mcast_dests follow the cmd
-    uint8_t num_unicast_txns; // Cmd expects noc_unicast_coords to follow the mcast data
+    uint8_t num_mcast_txns;
+    uint8_t num_unicast_txns;
+    uint8_t noc_data_start_index;
     uint32_t wait_count;
     uint32_t wait_addr;
 } __attribute__((packed));
@@ -278,6 +280,12 @@ struct CQDispatchSetNumWorkerSemsCmd {
     uint32_t num_worker_sems;
 } __attribute__ ((packed));
 
+struct CQDispatchSetGoSignalNocDataCmd {
+    uint8_t pad1;
+    uint16_t pad2;
+    uint32_t num_words;
+} __attribute__ ((packed));
+
 struct CQDispatchCmd {
     CQDispatchBaseCmd base;
 
@@ -295,6 +303,7 @@ struct CQDispatchCmd {
         CQDispatchSetUnicastOnlyCoresCmd set_unicast_only_cores;
         CQDispatchNotifySlaveGoSignalCmd notify_dispatch_s_go_signal;
         CQDispatchSetNumWorkerSemsCmd set_num_worker_sems;
+        CQDispatchSetGoSignalNocDataCmd set_go_signal_noc_data;
     } __attribute__((packed));
 };
 
diff --git a/tt_metal/impl/dispatch/debug_tools.cpp b/tt_metal/impl/dispatch/debug_tools.cpp
index 66ca865673d..e49f63c41bc 100644
--- a/tt_metal/impl/dispatch/debug_tools.cpp
+++ b/tt_metal/impl/dispatch/debug_tools.cpp
@@ -182,6 +182,10 @@ uint32_t dump_dispatch_cmd(CQDispatchCmd *cmd, uint32_t cmd_addr, std::ofstream
                 cq_file << fmt::format(
                     " (num_worker_sems={})", val(cmd->set_num_worker_sems.num_worker_sems));
                 break;
+            case CQ_DISPATCH_SET_GO_SIGNAL_NOC_DATA:
+                cq_file << fmt::format(
+                    " (num_words={})", val(cmd->set_go_signal_noc_data.num_words));
+                break;
             // These commands don't have any additional data to dump.
             case CQ_DISPATCH_CMD_ILLEGAL: break;
             case CQ_DISPATCH_CMD_GO: break;
diff --git a/tt_metal/impl/dispatch/device_command.hpp b/tt_metal/impl/dispatch/device_command.hpp
index e070e7b4c12..71a0e156eb8 100644
--- a/tt_metal/impl/dispatch/device_command.hpp
+++ b/tt_metal/impl/dispatch/device_command.hpp
@@ -254,14 +254,13 @@ class DeviceCommand {
         uint32_t wait_count,
         uint32_t go_signal,
         uint32_t wait_addr,
-        uint32_t num_mcast_txns,
-        uint32_t num_unicast_txns,
-        const vector_memcpy_aligned<uint32_t> &noc_mcast_unicast_data,
+        uint8_t num_mcast_txns,
+        uint8_t num_unicast_txns,
+        uint8_t noc_data_start_index,
         DispatcherSelect dispatcher_type) {
         TT_ASSERT(num_mcast_txns <= std::numeric_limits<uint8_t>::max(), "Number of mcast destinations {} exceeds maximum {}", num_mcast_txns, std::numeric_limits<uint8_t>::max());
         TT_ASSERT(num_unicast_txns <= std::numeric_limits<uint8_t>::max(), "Number of unicast destinations {} exceeds maximum {}", num_unicast_txns, std::numeric_limits<uint8_t>::max());
-        uint32_t total_data_size = noc_mcast_unicast_data.size() * sizeof(uint32_t);
-        uint32_t lengthB = sizeof(CQDispatchCmd) + total_data_size;
+        uint32_t lengthB = sizeof(CQDispatchCmd);
         TT_ASSERT(lengthB <= (1 << dispatch_constants::DISPATCH_BUFFER_LOG_PAGE_SIZE), "Data for go signal mcast must fit within one page");
         this->add_prefetch_relay_inline(true, lengthB, dispatcher_type);
         auto initialize_mcast_cmd = [&](CQDispatchCmd *mcast_cmd) {
@@ -271,6 +270,7 @@ class DeviceCommand {
             mcast_cmd->mcast.wait_count = wait_count;
             mcast_cmd->mcast.num_mcast_txns = num_mcast_txns;
             mcast_cmd->mcast.num_unicast_txns = num_unicast_txns;
+            mcast_cmd->mcast.noc_data_start_index = noc_data_start_index;
             mcast_cmd->mcast.wait_addr = wait_addr;
         };
         CQDispatchCmd *mcast_cmd_dst = this->reserve_space<CQDispatchCmd *>(sizeof(CQDispatchCmd));
@@ -282,8 +282,6 @@ class DeviceCommand {
         } else {
             initialize_mcast_cmd(mcast_cmd_dst);
         }
-        uint8_t * noc_coord_dst = this->reserve_space<uint8_t *>(total_data_size);
-        this->memcpy(noc_coord_dst, noc_mcast_unicast_data.data(), total_data_size);
         this->cmd_write_offsetB = align(this->cmd_write_offsetB, this->pcie_alignment);
     }
 
@@ -409,6 +407,30 @@ class DeviceCommand {
         this->cmd_write_offsetB = align(this->cmd_write_offsetB, this->pcie_alignment);
     }
 
+    void add_dispatch_set_go_signal_noc_data(const vector_memcpy_aligned<uint32_t> &noc_mcast_unicast_data, DispatcherSelect dispatcher_type) {
+        TT_ASSERT(noc_mcast_unicast_data.size() <= dispatch_constants::DISPATCH_GO_SIGNAL_NOC_DATA_ENTRIES, "Number of words {} exceeds maximum {}", noc_mcast_unicast_data.size(), dispatch_constants::DISPATCH_GO_SIGNAL_NOC_DATA_ENTRIES);
+        auto data_sizeB = noc_mcast_unicast_data.size() * sizeof(uint32_t);
+        uint32_t lengthB = sizeof(CQDispatchCmd) + data_sizeB;
+        TT_ASSERT(lengthB <= (1 << dispatch_constants::DISPATCH_BUFFER_LOG_PAGE_SIZE), "Data for go signal mcast must fit within one page");
+        this->add_prefetch_relay_inline(true, lengthB, dispatcher_type);
+        auto initialize_set_go_signal_noc_data_cmd = [&] (CQDispatchCmd *set_go_signal_noc_data_cmd) {
+            set_go_signal_noc_data_cmd->base.cmd_id = CQ_DISPATCH_SET_GO_SIGNAL_NOC_DATA;
+            set_go_signal_noc_data_cmd->set_go_signal_noc_data.num_words = noc_mcast_unicast_data.size();
+        };
+        CQDispatchCmd *set_go_signal_noc_data_cmd_dst = this->reserve_space<CQDispatchCmd *>(sizeof(CQDispatchCmd));
+        if constexpr (hugepage_write) {
+            alignas(MEMCPY_ALIGNMENT) CQDispatchCmd set_go_signal_noc_data_cmd;
+            initialize_set_go_signal_noc_data_cmd(&set_go_signal_noc_data_cmd);
+            this->memcpy(set_go_signal_noc_data_cmd_dst, &set_go_signal_noc_data_cmd, sizeof(CQDispatchCmd));
+        } else {
+            initialize_set_go_signal_noc_data_cmd(set_go_signal_noc_data_cmd_dst);
+        }
+        uint32_t * noc_mcast_unicast_data_dst = this->reserve_space<uint32_t *>(data_sizeB);
+        this->memcpy(noc_mcast_unicast_data_dst, noc_mcast_unicast_data.data(), data_sizeB);
+        this->cmd_write_offsetB = align(this->cmd_write_offsetB, this->pcie_alignment);
+    }
+
+
     void add_dispatch_set_write_offsets(uint32_t write_offset0, uint32_t write_offset1, uint32_t write_offset2) {
         this->add_prefetch_relay_inline(true, sizeof(CQDispatchCmd));
         auto initialize_write_offset_cmd = [&](CQDispatchCmd *write_offset_cmd) {
diff --git a/tt_metal/impl/dispatch/kernels/cq_dispatch.cpp b/tt_metal/impl/dispatch/kernels/cq_dispatch.cpp
index 14345084738..a11289518e5 100644
--- a/tt_metal/impl/dispatch/kernels/cq_dispatch.cpp
+++ b/tt_metal/impl/dispatch/kernels/cq_dispatch.cpp
@@ -43,14 +43,15 @@ constexpr uint32_t prefetch_h_max_credits = get_compile_time_arg_val(18);
 constexpr uint32_t packed_write_max_unicast_sub_cmds = get_compile_time_arg_val(19); // Number of cores in compute grid
 constexpr uint32_t dispatch_s_sync_sem_base_addr = get_compile_time_arg_val(20);
 constexpr uint32_t max_num_worker_sems = get_compile_time_arg_val(21); // maximum number of worker semaphores
-constexpr uint32_t mcast_go_signal_addr = get_compile_time_arg_val(22);
-constexpr uint32_t unicast_go_signal_addr = get_compile_time_arg_val(23);
-constexpr uint32_t distributed_dispatcher = get_compile_time_arg_val(24);
-constexpr uint32_t host_completion_q_wr_ptr = get_compile_time_arg_val(25);
-constexpr uint32_t dev_completion_q_wr_ptr = get_compile_time_arg_val(26);
-constexpr uint32_t dev_completion_q_rd_ptr = get_compile_time_arg_val(27);
-constexpr uint32_t is_d_variant = get_compile_time_arg_val(28);
-constexpr uint32_t is_h_variant = get_compile_time_arg_val(29);
+constexpr uint32_t max_num_go_signal_noc_data_entries = get_compile_time_arg_val(22); // maximum number of go signal data words
+constexpr uint32_t mcast_go_signal_addr = get_compile_time_arg_val(23);
+constexpr uint32_t unicast_go_signal_addr = get_compile_time_arg_val(24);
+constexpr uint32_t distributed_dispatcher = get_compile_time_arg_val(25);
+constexpr uint32_t host_completion_q_wr_ptr = get_compile_time_arg_val(26);
+constexpr uint32_t dev_completion_q_wr_ptr = get_compile_time_arg_val(27);
+constexpr uint32_t dev_completion_q_rd_ptr = get_compile_time_arg_val(28);
+constexpr uint32_t is_d_variant = get_compile_time_arg_val(29);
+constexpr uint32_t is_h_variant = get_compile_time_arg_val(30);
 
 constexpr uint8_t upstream_noc_index = UPSTREAM_NOC_INDEX;
 constexpr uint32_t upstream_noc_xy = uint32_t(NOC_XY_ENCODING(UPSTREAM_NOC_X, UPSTREAM_NOC_Y));
@@ -111,6 +112,8 @@ static GoSignalState go_signal_state_ring_buf[4];
 static uint8_t go_signal_state_wr_ptr = 0;
 static uint8_t go_signal_state_rd_ptr = 0;
 
+static uint32_t go_signal_noc_data[max_num_go_signal_noc_data_entries] = {0};
+
 FORCE_INLINE volatile uint32_t *get_cq_completion_read_ptr() {
     return reinterpret_cast<volatile uint32_t *>(dev_completion_q_rd_ptr);
 }
@@ -819,16 +822,18 @@ void process_go_signal_mcast_cmd() {
     *aligned_go_signal_storage = cmd->mcast.go_signal;
 
     while (*worker_sem_addr < cmd->mcast.wait_count);
-    volatile uint32_t tt_l1_ptr *data_ptr = reinterpret_cast<volatile uint32_t tt_l1_ptr *>(cmd_ptr + sizeof(CQDispatchCmd));
+    uint8_t go_signal_noc_data_idx = cmd->mcast.noc_data_start_index;
+    // send go signal update here
     for (uint32_t i = 0, num_mcasts = cmd->mcast.num_mcast_txns; i < num_mcasts; ++i) {
-        uint64_t dst = get_noc_addr_helper(*(data_ptr++), mcast_go_signal_addr);
-        noc_async_write_multicast_one_packet((uint32_t)(aligned_go_signal_storage), dst, sizeof(uint32_t), *(data_ptr++));
+        uint64_t dst = get_noc_addr_helper(go_signal_noc_data[go_signal_noc_data_idx++], mcast_go_signal_addr);
+        // packed_write_max_unicast_sub_cmds is the total number of compute cores (num_mcast_dests for this txn)
+        noc_async_write_multicast_one_packet((uint32_t)(aligned_go_signal_storage), dst, sizeof(uint32_t), go_signal_noc_data[go_signal_noc_data_idx++]);
     }
     for (uint32_t i = 0, num_unicasts = cmd->mcast.num_unicast_txns; i < num_unicasts; ++i) {
-        uint64_t dst = get_noc_addr_helper(*(data_ptr++), unicast_go_signal_addr);
+        uint64_t dst = get_noc_addr_helper(go_signal_noc_data[go_signal_noc_data_idx++], unicast_go_signal_addr);
         noc_async_write_one_packet((uint32_t)(aligned_go_signal_storage), dst, sizeof(uint32_t));
     }
-    cmd_ptr = round_up_pow2((uint32_t)data_ptr, L1_ALIGNMENT);
+    cmd_ptr += sizeof(CQDispatchCmd);
 }
 
 FORCE_INLINE
@@ -861,6 +866,18 @@ void process_notify_dispatch_s_go_signal_cmd() {
     cmd_ptr += sizeof(CQDispatchCmd);
 }
 
+FORCE_INLINE
+void set_go_signal_noc_data() {
+    volatile CQDispatchCmd tt_l1_ptr *cmd = (volatile CQDispatchCmd tt_l1_ptr *)cmd_ptr;
+    uint32_t num_words = cmd->set_go_signal_noc_data.num_words;
+    ASSERT(num_words <= max_num_go_signal_noc_data_entries);
+    volatile tt_l1_ptr uint32_t *data_ptr = reinterpret_cast<volatile tt_l1_ptr uint32_t *>(cmd_ptr + sizeof(CQDispatchCmd));
+    for (uint32_t i = 0; i < num_words; ++i) {
+        go_signal_noc_data[i] = *(data_ptr++);
+    }
+    cmd_ptr = round_up_pow2((uint32_t)data_ptr, L1_ALIGNMENT);
+}
+
 static inline bool process_cmd_d(uint32_t &cmd_ptr, uint32_t* l1_cache, uint32_t& block_noc_writes_to_clear, uint32_t block_next_start_addr[]) {
     bool done = false;
 
@@ -963,9 +980,14 @@ static inline bool process_cmd_d(uint32_t &cmd_ptr, uint32_t* l1_cache, uint32_t
         case CQ_DISPATCH_SET_NUM_WORKER_SEMS:
             DPRINT << "cmd_set_num_worker_sems" << ENDL();
             // This command is only used by dispatch_s
+            ASSERT(0);
             cmd_ptr += sizeof(CQDispatchCmd);
             break;
 
+        case CQ_DISPATCH_SET_GO_SIGNAL_NOC_DATA:
+            set_go_signal_noc_data();
+            break;
+
         case CQ_DISPATCH_CMD_SET_WRITE_OFFSET:
             DPRINT << "write offset: " <<
                 cmd->set_write_offset.offset0 << " " <<
diff --git a/tt_metal/impl/dispatch/kernels/cq_dispatch_slave.cpp b/tt_metal/impl/dispatch/kernels/cq_dispatch_slave.cpp
index a651b7c04b2..7fc0ad22e2e 100644
--- a/tt_metal/impl/dispatch/kernels/cq_dispatch_slave.cpp
+++ b/tt_metal/impl/dispatch/kernels/cq_dispatch_slave.cpp
@@ -35,6 +35,7 @@ constexpr uint32_t unicast_go_signal_addr = get_compile_time_arg_val(7);
 constexpr uint32_t distributed_dispatcher = get_compile_time_arg_val(8); // dispatch_s and dispatch_d running on different cores
 constexpr uint32_t worker_sem_base_addr = get_compile_time_arg_val(9); // workers update the semaphore at this location to signal completion
 constexpr uint32_t max_num_worker_sems = get_compile_time_arg_val(10); // maximum number of worker semaphores
+constexpr uint32_t max_num_go_signal_noc_data_entries = get_compile_time_arg_val(11); // maximum number of go signal data words
 
 constexpr uint32_t upstream_noc_xy = uint32_t(NOC_XY_ENCODING(UPSTREAM_NOC_X, UPSTREAM_NOC_Y));
 constexpr uint32_t dispatch_d_noc_xy = uint32_t(NOC_XY_ENCODING(DOWNSTREAM_NOC_X, DOWNSTREAM_NOC_Y));
@@ -52,6 +53,8 @@ static uint32_t cmd_ptr;
 // To minimize the number of writes from dispatch_s to dispatch_d, locally track dispatch_d's copy.
 static uint32_t worker_count_update_for_dispatch_d[max_num_worker_sems] = {0};
 
+static uint32_t go_signal_noc_data[max_num_go_signal_noc_data_entries] = {0};
+
 static uint32_t num_worker_sems = 1;
 
 FORCE_INLINE
@@ -173,19 +176,19 @@ void process_go_signal_mcast_cmd() {
     volatile uint32_t tt_l1_ptr* aligned_go_signal_storage = (volatile uint32_t tt_l1_ptr*)cmd_ptr;
     *aligned_go_signal_storage = cmd->mcast.go_signal;
 
+    uint8_t go_signal_noc_data_idx = cmd->mcast.noc_data_start_index;
     // send go signal update here
-    volatile uint32_t tt_l1_ptr *data_ptr = reinterpret_cast<volatile uint32_t tt_l1_ptr *>(cmd_ptr + sizeof(CQDispatchCmd));
     for (uint32_t i = 0, num_mcasts = cmd->mcast.num_mcast_txns; i < num_mcasts; ++i) {
-        uint64_t dst = get_noc_addr_helper(*(data_ptr++), mcast_go_signal_addr);
+        uint64_t dst = get_noc_addr_helper(go_signal_noc_data[go_signal_noc_data_idx++], mcast_go_signal_addr);
         // packed_write_max_unicast_sub_cmds is the total number of compute cores (num_mcast_dests for this txn)
-        noc_async_write_multicast_one_packet((uint32_t)(aligned_go_signal_storage), dst, sizeof(uint32_t), *(data_ptr++));
+        noc_async_write_multicast_one_packet((uint32_t)(aligned_go_signal_storage), dst, sizeof(uint32_t), go_signal_noc_data[go_signal_noc_data_idx++]);
     }
     for (uint32_t i = 0, num_unicasts = cmd->mcast.num_unicast_txns; i < num_unicasts; ++i) {
-        uint64_t dst = get_noc_addr_helper(*(data_ptr++), unicast_go_signal_addr);
+        uint64_t dst = get_noc_addr_helper(go_signal_noc_data[go_signal_noc_data_idx++], unicast_go_signal_addr);
         noc_async_write_one_packet((uint32_t)(aligned_go_signal_storage), dst, sizeof(uint32_t));
     }
     update_worker_completion_count_on_dispatch_d();
-    cmd_ptr = round_up_pow2((uint32_t)data_ptr, L1_ALIGNMENT);
+    cmd_ptr += sizeof(CQDispatchCmd);
 }
 
 FORCE_INLINE
@@ -218,6 +221,18 @@ void set_num_worker_sems() {
     cmd_ptr += sizeof(CQDispatchCmd);
 }
 
+FORCE_INLINE
+void set_go_signal_noc_data() {
+    volatile CQDispatchCmd tt_l1_ptr *cmd = (volatile CQDispatchCmd tt_l1_ptr *)cmd_ptr;
+    uint32_t num_words = cmd->set_go_signal_noc_data.num_words;
+    ASSERT(num_words <= max_num_go_signal_noc_data_entries);
+    volatile tt_l1_ptr uint32_t *data_ptr = reinterpret_cast<volatile tt_l1_ptr uint32_t *>(cmd_ptr + sizeof(CQDispatchCmd));
+    for (uint32_t i = 0; i < num_words; ++i) {
+        go_signal_noc_data[i] = *(data_ptr++);
+    }
+    cmd_ptr = round_up_pow2((uint32_t)data_ptr, L1_ALIGNMENT);
+}
+
 void kernel_main() {
     DPRINT << "dispatch_s : start" << ENDL();
     // Initialize customized command buffers.
@@ -237,6 +252,9 @@ void kernel_main() {
             case CQ_DISPATCH_SET_NUM_WORKER_SEMS:
                 set_num_worker_sems();
                 break;
+            case CQ_DISPATCH_SET_GO_SIGNAL_NOC_DATA:
+                set_go_signal_noc_data();
+                break;
             case CQ_DISPATCH_CMD_WAIT:
                 process_dispatch_s_wait_cmd();
                 break;
diff --git a/tt_metal/impl/program/program.cpp b/tt_metal/impl/program/program.cpp
index 23099f93548..62db5fb21fd 100644
--- a/tt_metal/impl/program/program.cpp
+++ b/tt_metal/impl/program/program.cpp
@@ -1310,6 +1310,10 @@ const std::vector<SubDeviceId> &detail::Program_::determine_sub_device_ids(const
         } else {
             std::unordered_set<SubDeviceId> used_sub_device_ids;
             auto find_sub_device_ids = [&] (HalProgrammableCoreType core_type) {
+                auto core_type_index = hal.get_programmable_core_type_index(core_type);
+                if (core_type_index == -1) {
+                    return;
+                }
                 const auto& program_kgs = this->get_kernel_groups(hal.get_programmable_core_type_index(core_type));
                 uint32_t num_intersections = 0;
                 uint32_t num_cores = 0;
@@ -1516,9 +1520,9 @@ uint32_t detail::Program_::get_sem_base_addr(Device *device, CoreCoord logical_c
     // TODO: This restriction can be lifted once we have support for programs spanning multiple sub-devices
     // Semaphores across sub-devices are expected to have the same address
     TT_FATAL(sub_device_ids.size() == 1, "get_sem_base_addr currently only supports programs spanning a single sub-device");
-    auto sub_device_id = sub_device_ids[0];
+    auto sub_device_index = sub_device_ids[0].to_index();
     uint32_t base_addr = device->using_fast_dispatch
-                             ? this->last_used_command_queue_for_testing->get_config_buffer_mgr(sub_device_id).get_last_slot_addr(
+                             ? this->last_used_command_queue_for_testing->get_config_buffer_mgr(sub_device_index).get_last_slot_addr(
                                    programmable_core_type)
                              : hal.get_dev_addr(programmable_core_type, HalL1MemAddrType::KERNEL_CONFIG);
 
@@ -1538,9 +1542,9 @@ uint32_t detail::Program_::get_cb_base_addr(Device *device, CoreCoord logical_co
     // TODO: This restriction can be lifted once this function is changed to return a vector of addresses
     // Addresses are not the same across sub-devices
     TT_FATAL(sub_device_ids.size() == 1, "get_sem_base_addr currently only supports programs spanning a single sub-device");
-    auto sub_device_id = sub_device_ids[0];
+    auto sub_device_index = sub_device_ids[0].to_index();
     uint32_t base_addr = device->using_fast_dispatch
-                             ? this->last_used_command_queue_for_testing->get_config_buffer_mgr(sub_device_id).get_last_slot_addr(
+                             ? this->last_used_command_queue_for_testing->get_config_buffer_mgr(sub_device_index).get_last_slot_addr(
                                    programmable_core_type)
                              : hal.get_dev_addr(programmable_core_type, HalL1MemAddrType::KERNEL_CONFIG);
 
diff --git a/tt_metal/impl/sub_device/sub_device_manager.cpp b/tt_metal/impl/sub_device/sub_device_manager.cpp
index 1e9092c26c7..5cca2f70d54 100644
--- a/tt_metal/impl/sub_device/sub_device_manager.cpp
+++ b/tt_metal/impl/sub_device/sub_device_manager.cpp
@@ -2,6 +2,8 @@
 //
 // SPDX-License-Identifier: Apache-2.0
 
+#include <vector>
+
 #include "tt_metal/impl/sub_device/sub_device_manager.hpp"
 
 #include "tt_metal/common/assert.hpp"
@@ -27,6 +29,7 @@ SubDeviceManager::SubDeviceManager(
     device_(device) {
     TT_ASSERT(device != nullptr, "Device must not be null");
     this->validate_sub_devices();
+    this->populate_sub_device_ids();
     this->populate_num_cores();
     this->populate_sub_allocators();
     this->populate_noc_data();
@@ -47,6 +50,7 @@ SubDeviceManager::SubDeviceManager(Device *device, std::unique_ptr<Allocator> &&
     this->sub_devices_ = {SubDevice(std::array{
         CoreRangeSet(CoreRange({0, 0}, {compute_grid_size.x - 1, compute_grid_size.y - 1})),
         CoreRangeSet(std::move(active_eth_core_ranges))})};
+    this->populate_sub_device_ids();
     // No need to validate sub-devices since this constructs a sub-device of the entire grid
     this->populate_num_cores();
     this->sub_device_allocators_.push_back(std::move(global_allocator));
@@ -71,24 +75,37 @@ SubDeviceManager::~SubDeviceManager() {
 
 uint8_t SubDeviceManager::num_sub_devices() const { return this->sub_devices_.size(); }
 
+const std::vector<SubDeviceId> &SubDeviceManager::get_sub_device_ids() const {
+    return this->sub_device_ids_;
+}
+
 const SubDevice& SubDeviceManager::sub_device(SubDeviceId sub_device_id) const {
     auto sub_device_index = this->get_sub_device_index(sub_device_id);
     return sub_devices_[sub_device_index];
 }
 
-const vector_memcpy_aligned<uint32_t>& SubDeviceManager::noc_mcast_data(SubDeviceId sub_device_id) const {
+const vector_memcpy_aligned<uint32_t> &SubDeviceManager::noc_mcast_unicast_data() const {
+    return noc_mcast_unicast_data_;
+}
+
+uint8_t SubDeviceManager::num_noc_mcast_txns(SubDeviceId sub_device_id) const {
     auto sub_device_index = this->get_sub_device_index(sub_device_id);
-    return noc_mcast_data_[sub_device_index];
+    return this->num_noc_mcast_txns_[sub_device_index];
 }
 
-const vector_memcpy_aligned<uint32_t>& SubDeviceManager::noc_unicast_data(SubDeviceId sub_device_id) const {
+uint8_t SubDeviceManager::num_noc_unicast_txns(SubDeviceId sub_device_id) const {
     auto sub_device_index = this->get_sub_device_index(sub_device_id);
-    return noc_unicast_data_[sub_device_index];
+    return this->num_noc_unicast_txns_[sub_device_index];
 }
 
-const vector_memcpy_aligned<uint32_t>& SubDeviceManager::noc_mcast_unicast_data(SubDeviceId sub_device_id) const {
+uint8_t SubDeviceManager::noc_mcast_data_start_index(SubDeviceId sub_device_id) const {
     auto sub_device_index = this->get_sub_device_index(sub_device_id);
-    return noc_mcast_unicast_data_[sub_device_index];
+    return this->noc_mcast_data_start_index_[sub_device_index];
+}
+
+uint8_t SubDeviceManager::noc_unicast_data_start_index(SubDeviceId sub_device_id) const {
+    auto sub_device_index = this->get_sub_device_index(sub_device_id);
+    return this->noc_unicast_data_start_index_[sub_device_index];
 }
 
 const std::unique_ptr<Allocator> &SubDeviceManager::get_initialized_allocator(SubDeviceId sub_device_id) const {
@@ -151,6 +168,7 @@ uint8_t SubDeviceManager::get_sub_device_index(SubDeviceId sub_device_id) const
 }
 
 void SubDeviceManager::validate_sub_devices() const {
+    TT_FATAL(this->sub_devices_.size() <= SubDeviceManager::MAX_NUM_SUB_DEVICES, "Too many sub devices specified");
     // Validate sub device cores fit inside the device grid
     const auto& compute_grid_size = this->device_->compute_with_storage_grid_size();
     CoreRange device_worker_cores = CoreRange({0, 0}, {compute_grid_size.x - 1, compute_grid_size.y - 1});
@@ -189,6 +207,13 @@ void SubDeviceManager::validate_sub_devices() const {
     }
 }
 
+void SubDeviceManager::populate_sub_device_ids() {
+    this->sub_device_ids_.resize(this->num_sub_devices());
+    for (uint8_t i = 0; i < this->num_sub_devices(); ++i) {
+        this->sub_device_ids_[i] = SubDeviceId{i};
+    }
+}
+
 void SubDeviceManager::populate_num_cores() {
     for (const auto& sub_device : this->sub_devices_) {
         for (uint32_t i = 0; i < NumHalProgrammableCoreTypes; ++i) {
@@ -256,42 +281,41 @@ void SubDeviceManager::populate_sub_allocators() {
 
 void SubDeviceManager::populate_noc_data() {
     uint32_t num_sub_devices = this->num_sub_devices();
-    this->noc_mcast_data_.resize(num_sub_devices);
-    this->noc_unicast_data_.resize(num_sub_devices);
-    this->noc_mcast_unicast_data_.resize(num_sub_devices);
+    this->num_noc_mcast_txns_.resize(num_sub_devices);
+    this->num_noc_unicast_txns_.resize(num_sub_devices);
+    this->noc_mcast_data_start_index_.resize(num_sub_devices);
+    this->noc_unicast_data_start_index_.resize(num_sub_devices);
 
     NOC noc_index = this->device_->dispatch_go_signal_noc();
-
+    uint32_t idx = 0;
     for (uint32_t i = 0; i < num_sub_devices; ++i) {
         const auto& tensix_cores = this->sub_devices_[i].cores(HalProgrammableCoreType::TENSIX);
         const auto& eth_cores = this->sub_devices_[i].cores(HalProgrammableCoreType::ACTIVE_ETH);
 
-        uint32_t idx = 0;
-        auto& noc_mcast_data = this->noc_mcast_data_[i];
-        noc_mcast_data.resize(tensix_cores.size() * 2);
+        this->noc_mcast_data_start_index_[i] = idx;
+        this->num_noc_mcast_txns_[i] = tensix_cores.size();
+        this->noc_mcast_unicast_data_.resize(idx + this->num_noc_mcast_txns_[i] * 2);
         for (const auto& core_range : tensix_cores.ranges()) {
             auto physical_start =
                 this->device_->physical_core_from_logical_core(core_range.start_coord, CoreType::WORKER);
             auto physical_end = this->device_->physical_core_from_logical_core(core_range.end_coord, CoreType::WORKER);
             auto physical_core_range = CoreRange(physical_start, physical_end);
-            noc_mcast_data[idx++] = this->device_->get_noc_multicast_encoding(noc_index, physical_core_range);
-            noc_mcast_data[idx++] = core_range.size();
+            this->noc_mcast_unicast_data_[idx++] = this->device_->get_noc_multicast_encoding(noc_index, physical_core_range);
+            this->noc_mcast_unicast_data_[idx++] = core_range.size();
         }
+        this->noc_unicast_data_start_index_[i] = idx;
 
-        idx = 0;
-        auto& noc_unicast_data = this->noc_unicast_data_[i];
+        // TODO: Precompute number of eth cores and resize once
         for (const auto& core_range : eth_cores.ranges()) {
-            noc_unicast_data.resize(noc_unicast_data.size() + core_range.size());
+            this->noc_mcast_unicast_data_.resize(idx + core_range.size());
             for (const auto& core : core_range) {
                 auto physical_core = this->device_->physical_core_from_logical_core(core, CoreType::ETH);
-                noc_unicast_data[idx++] = this->device_->get_noc_unicast_encoding(noc_index, physical_core);
+                this->noc_mcast_unicast_data_[idx++] = this->device_->get_noc_unicast_encoding(noc_index, physical_core);
             }
         }
-        auto& noc_mcast_unicast_data = this->noc_mcast_unicast_data_[i];
-        noc_mcast_unicast_data.resize(noc_mcast_data.size() + noc_unicast_data.size());
-        std::copy(noc_mcast_data.begin(), noc_mcast_data.end(), noc_mcast_unicast_data.begin());
-        std::copy(
-            noc_unicast_data.begin(), noc_unicast_data.end(), noc_mcast_unicast_data.begin() + noc_mcast_data.size());
+        this->num_noc_unicast_txns_[i] = idx - this->noc_unicast_data_start_index_[i];
+
+        TT_FATAL(idx <= dispatch_constants::DISPATCH_GO_SIGNAL_NOC_DATA_ENTRIES, "NOC data entries {} exceeds maximum supported size {}", idx, dispatch_constants::DISPATCH_GO_SIGNAL_NOC_DATA_ENTRIES);
     }
 }
 
diff --git a/tt_metal/impl/sub_device/sub_device_manager.hpp b/tt_metal/impl/sub_device/sub_device_manager.hpp
index c5de9e3f0f3..2be08844702 100644
--- a/tt_metal/impl/sub_device/sub_device_manager.hpp
+++ b/tt_metal/impl/sub_device/sub_device_manager.hpp
@@ -43,13 +43,16 @@ class SubDeviceManager {
 
     ~SubDeviceManager();
 
+    const std::vector<SubDeviceId> &get_sub_device_ids() const;
     const SubDevice &sub_device(SubDeviceId sub_device_id) const;
-    const vector_memcpy_aligned<uint32_t> &noc_mcast_data(SubDeviceId sub_device_id) const;
-    const vector_memcpy_aligned<uint32_t> &noc_unicast_data(SubDeviceId sub_device_id) const;
-    const vector_memcpy_aligned<uint32_t> &noc_mcast_unicast_data(SubDeviceId sub_device_id) const;
 
-    const std::unique_ptr<Allocator> &get_initialized_allocator(SubDeviceId sub_device_id) const;
+    const vector_memcpy_aligned<uint32_t> &noc_mcast_unicast_data() const;
+    uint8_t num_noc_mcast_txns(SubDeviceId sub_device_id) const;
+    uint8_t num_noc_unicast_txns(SubDeviceId sub_device_id) const;
+    uint8_t noc_mcast_data_start_index(SubDeviceId sub_device_id) const;
+    uint8_t noc_unicast_data_start_index(SubDeviceId sub_device_id) const;
 
+    const std::unique_ptr<Allocator> &get_initialized_allocator(SubDeviceId sub_device_id) const;
     std::unique_ptr<Allocator> &sub_device_allocator(SubDeviceId sub_device_id);
 
     std::shared_ptr<TraceBuffer> &create_trace(uint32_t tid);
@@ -66,6 +69,7 @@ class SubDeviceManager {
    private:
     void validate_sub_devices() const;
     uint8_t get_sub_device_index(SubDeviceId sub_device_id) const;
+    void populate_sub_device_ids();
     void populate_num_cores();
     void populate_sub_allocators();
     void populate_noc_data();
@@ -73,17 +77,20 @@ class SubDeviceManager {
 
     // TODO: We have a max number of sub-devices, so we can use a fixed size array
     std::vector<SubDevice> sub_devices_;
+    std::vector<SubDeviceId> sub_device_ids_;
     Device *device_;
 
     DeviceAddr local_l1_size_;
     std::vector<std::unique_ptr<Allocator>> sub_device_allocators_;
 
     std::array<uint32_t, NumHalProgrammableCoreTypes> num_cores_{};
-    std::vector<vector_memcpy_aligned<uint32_t>> noc_mcast_data_;
-    std::vector<vector_memcpy_aligned<uint32_t>> noc_unicast_data_;
-    // Concatenation of noc_mcast_data_ and noc_unicast_data_
-    // Useful for optimized copying of all coords when constructing FD commands
-    std::vector<vector_memcpy_aligned<uint32_t>> noc_mcast_unicast_data_;
+
+    // mcast txn data followed by unicast txn data
+    vector_memcpy_aligned<uint32_t> noc_mcast_unicast_data_;
+    std::vector<uint8_t> num_noc_mcast_txns_;
+    std::vector<uint8_t> num_noc_unicast_txns_;
+    std::vector<uint8_t> noc_mcast_data_start_index_;
+    std::vector<uint8_t> noc_unicast_data_start_index_;
 
     std::unordered_map<uint32_t, std::shared_ptr<TraceBuffer>> trace_buffer_pool_;
 
diff --git a/tt_metal/impl/sub_device/sub_device_types.hpp b/tt_metal/impl/sub_device/sub_device_types.hpp
index 1e4229d2cfb..c31bea115cc 100644
--- a/tt_metal/impl/sub_device/sub_device_types.hpp
+++ b/tt_metal/impl/sub_device/sub_device_types.hpp
@@ -5,6 +5,8 @@
 #pragma once
 
 #include <cstdint>
+#include <functional>
+#include <tuple>
 #include <type_traits>
 
 namespace tt::tt_metal {
@@ -85,7 +87,6 @@ struct SubDeviceManagerId {
 
 
 namespace std {
-
 template <>
 struct hash<tt::tt_metal::SubDeviceId> {
     std::size_t operator()(tt::tt_metal::SubDeviceId const &o) const {

From 5f04e3114cac0347d4eab988ccc116af2c6c3804 Mon Sep 17 00:00:00 2001
From: Bryan Wilder Field Lozano <blozano@tenstorrent.com>
Date: Wed, 13 Nov 2024 21:45:21 -0800
Subject: [PATCH 65/69] [skip ci] Provide script for installing system
 dependencies (#14405)

INSTALLING.md instructions use pinned version of deps that may go out of
sync and vary across distro.
Avoid the need to document, by introducing a script
The new script can be called from our Dockerfiles as well

### What's changed
Script provided
Documentation updated
---
 INSTALLING.md           |  10 +--
 install_dependencies.sh | 141 ++++++++++++++++++++++++++++++++++++++++
 2 files changed, 143 insertions(+), 8 deletions(-)
 create mode 100755 install_dependencies.sh

diff --git a/INSTALLING.md b/INSTALLING.md
index e872542d621..a4ddd8608ab 100644
--- a/INSTALLING.md
+++ b/INSTALLING.md
@@ -29,16 +29,10 @@ Note the current compatability matrix:
 ### Step 2. System-level dependencies
 
 ```sh
-sudo apt update
-sudo apt install software-properties-common=0.99.9.12 build-essential=12.8ubuntu1.1 python3.8-venv libhwloc-dev graphviz cmake=3.16.3-1ubuntu1.20.04.1 ninja-build
-
-wget https://apt.llvm.org/llvm.sh
-chmod u+x llvm.sh
-sudo ./llvm.sh 17
-sudo apt install libc++-17-dev libc++abi-17-dev
+sudo ./install_dependencies.sh
 ```
 - Note: `CMake 3.16` is the targetted required version of `CMake` as it aligns with the default from `Ubuntu 20.04`. Some advanced build configurations like unity builds require `CMake 3.20`.
-  - To install `CMake 3.20` see: https://github.com/tenstorrent/tt-metal/blob/4d7730d3e2d22c51d62baa1bfed861b557d9a3c0/dockerfile/ubuntu-20.04-amd64.Dockerfile#L9-L14 
+  - To install `CMake 3.20` see: https://github.com/tenstorrent/tt-metal/blob/4d7730d3e2d22c51d62baa1bfed861b557d9a3c0/dockerfile/ubuntu-20.04-amd64.Dockerfile#L9-L14
 ---
 
 ### Step 3. Hugepages
diff --git a/install_dependencies.sh b/install_dependencies.sh
new file mode 100755
index 00000000000..8c65888184c
--- /dev/null
+++ b/install_dependencies.sh
@@ -0,0 +1,141 @@
+#!/bin/bash
+
+# SPDX-License-Identifier: Apache-2.0
+# Copyright (C) 2024 Tenstorrent, Inc. All rights reserved.
+#
+# This script is based on `xrtdeps.sh` from the Xilinx XRT project.
+# Original source: https://github.com/Xilinx/XRT/blob/master/src/runtime_src/tools/scripts/xrtdeps.sh
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+FLAVOR=`grep '^ID=' /etc/os-release | awk -F= '{print $2}' | tr -d '"'`
+VERSION=`grep '^VERSION_ID=' /etc/os-release | awk -F= '{print $2}' | tr -d '"'`
+MAJOR=${VERSION%.*}
+ARCH=`uname -m`
+
+usage()
+{
+    echo "Usage: sudo ./install_dependencies.sh [options]"
+    echo
+    echo "[--help, -h]                List this help"
+    echo "[--validate, -v]            Validate that required packages are installed"
+    exit 1
+}
+
+validate=0
+
+while [ $# -gt 0 ]; do
+    case "$1" in
+        --help|-h)
+            usage
+            ;;
+        --validate|-v)
+            validate=1
+            shift
+            ;;
+        *)
+            echo "Unknown option: $1"
+            usage
+            ;;
+    esac
+done
+
+ub_package_list()
+{
+    UB_LIST=(\
+     git \
+     git-lfs \
+     build-essential \
+     cmake \
+     software-properties-common \
+     libhwloc-dev \
+     graphviz \
+     ninja-build \
+     libpython3-dev \
+     libcapstone-dev \
+     python3-pip \
+     python3-dev \
+     python3.8-venv \
+     libc++-17-dev \
+     libc++abi-17-dev \
+    )
+
+}
+
+update_package_list()
+{
+    if [ $FLAVOR == "ubuntu" ]; then
+        ub_package_list
+    else
+        echo "unknown OS flavor $FLAVOR"
+        exit 1
+    fi
+}
+
+validate_packages()
+{
+    if [ $FLAVOR == "ubuntu" ]; then
+        dpkg -l "${UB_LIST[@]}"
+        #dpkg -l "${UB_LIST[@]}" > /dev/null
+    else
+        echo "unknown OS flavor $FLAVOR"
+        exit 1
+    fi
+}
+
+prep_ubuntu()
+{
+    echo "Preparing ubuntu ..."
+    # Update the list of available packages
+    apt-get update
+}
+
+install_llvm() {
+    LLVM_VERSION="17"
+    echo "Checking if LLVM $LLVM_VERSION is already installed..."
+    if command -v clang-$LLVM_VERSION &> /dev/null; then
+        echo "LLVM $LLVM_VERSION is already installed. Skipping installation."
+    else
+        echo "Installing LLVM $LLVM_VERSION..."
+        TEMP_DIR=$(mktemp -d)
+        wget -P $TEMP_DIR https://apt.llvm.org/llvm.sh
+        chmod u+x $TEMP_DIR/llvm.sh
+        $TEMP_DIR/llvm.sh $LLVM_VERSION
+        rm -rf "$TEMP_DIR"
+    fi
+}
+
+install()
+{
+    if [ $FLAVOR == "ubuntu" ]; then
+        prep_ubuntu
+
+        echo "Installing packages..."
+        apt-get install -y "${UB_LIST[@]}"
+    fi
+}
+
+if [ "$EUID" -ne 0 ]; then
+    echo "This script must be run as root. Please use sudo."
+    usage
+fi
+
+install_llvm
+
+update_package_list
+
+if [ $validate == 1 ]; then
+    validate_packages
+else
+    install
+fi

From 2665f88e22057c3c9fd17dcc2bedbd40b2ae5cac Mon Sep 17 00:00:00 2001
From: Bryan Wilder Field Lozano <blozano@tenstorrent.com>
Date: Wed, 13 Nov 2024 22:48:39 -0800
Subject: [PATCH 66/69] Build with unity in build-artifact.yaml, don't use
 unity in build.yaml (#15027)

Optimize CI throughput by using Unity for main CI build steps.
Don't use unity in the test builds.
This way we have builds that verify with or without unity.
---
 .github/workflows/build-artifact.yaml |  3 +--
 .github/workflows/build.yaml          |  5 ++---
 build_metal.sh                        | 20 +++++++++++++++++++-
 3 files changed, 22 insertions(+), 6 deletions(-)

diff --git a/.github/workflows/build-artifact.yaml b/.github/workflows/build-artifact.yaml
index ea0a012949f..f0dad00701a 100644
--- a/.github/workflows/build-artifact.yaml
+++ b/.github/workflows/build-artifact.yaml
@@ -137,8 +137,7 @@ jobs:
             # NOTE: may be inaccurate if we have >1 build runner on the same machine, using the same local cache
             ccache -z
 
-            # Disable Unity builds to detect any bitrot from not building each TU independently
-            build_command="./build_metal.sh --build-type ${{ inputs.build-type }} --build-all --enable-ccache --disable-unity-builds"
+            build_command="./build_metal.sh --build-type ${{ inputs.build-type }} --build-all --enable-ccache"
             echo "${{ inputs.tracy }}"
             if [ "${{ inputs.tracy }}" = "true" ]; then
               build_command="$build_command --enable-profiler"
diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml
index 329fddf8acc..3d654358041 100644
--- a/.github/workflows/build.yaml
+++ b/.github/workflows/build.yaml
@@ -43,9 +43,8 @@ jobs:
             -e ARCH_NAME=${{ matrix.arch }}
           docker_os_arch: ${{ matrix.build.os }}-amd64
           run_args: |
-            nice -n 19 cmake -B build -DCMAKE_BUILD_TYPE=${{ matrix.build.type }} -DCMAKE_CXX_COMPILER=${{ matrix.build.cxx_compiler }} -DCMAKE_C_COMPILER=${{ matrix.build.c_compiler }} -G Ninja -DTT_METAL_BUILD_TESTS=ON -DTTNN_BUILD_TESTS=ON -DTT_UMD_BUILD_TESTS=ON
-            nice -n 19 cmake --build build
-
+            build_command="./build_metal.sh --build-type ${{ matrix.build.type }} --cxx-compiler-path ${{ matrix.build.cxx_compiler }} --c-compiler-path ${{ matrix.build.c_compiler }} --build-tests --build-programming-examples --disable-unity-builds"
+            nice -n 19 $build_command
       - name: Check disk space
         run: |
           df -h
diff --git a/build_metal.sh b/build_metal.sh
index b021b6ed543..2d9aebf7780 100755
--- a/build_metal.sh
+++ b/build_metal.sh
@@ -22,12 +22,15 @@ show_help() {
     echo "  --build-umd-tests                Build umd Testcases."
     echo "  --build-programming-examples     Build programming examples."
     echo "  --build-tt-train                 Build tt-train."
+    echo "  --build-all                      Build all optional components."
     echo "  --release                        Set the build type as Release."
     echo "  --development                    Set the build type as RelWithDebInfo."
     echo "  --debug                          Set the build type as Debug."
     echo "  --clean                          Remove build workspaces."
     echo "  --build-static-libs              Build tt_metal (not ttnn) as a static lib (BUILD_SHARED_LIBS=OFF)"
     echo "  --disable-unity-builds           Disable Unity builds"
+    echo "  --cxx-compiler-path              Set path to C++ compiler."
+    echo "  --c-compiler-path                Set path to C++ compiler."
 }
 
 clean() {
@@ -54,11 +57,13 @@ build_tt_train="OFF"
 build_static_libs="OFF"
 unity_builds="ON"
 build_all="OFF"
+cxx_compiler_path=""
+c_compiler_path=""
 
 declare -a cmake_args
 
 OPTIONS=h,e,c,t,a,m,s,u,b:,p
-LONGOPTIONS=help,build-all,export-compile-commands,enable-ccache,enable-time-trace,enable-asan,enable-msan,enable-tsan,enable-ubsan,build-type:,enable-profiler,install-prefix:,build-tests,build-ttnn-tests,build-metal-tests,build-umd-tests,build-programming-examples,build-tt-train,build-static-libs,disable-unity-builds,release,development,debug,clean
+LONGOPTIONS=help,build-all,export-compile-commands,enable-ccache,enable-time-trace,enable-asan,enable-msan,enable-tsan,enable-ubsan,build-type:,enable-profiler,install-prefix:,build-tests,build-ttnn-tests,build-metal-tests,build-umd-tests,build-programming-examples,build-tt-train,build-static-libs,disable-unity-builds,release,development,debug,clean,cxx-compiler-path:,c-compiler-path:
 
 # Parse the options
 PARSED=$(getopt --options=$OPTIONS --longoptions=$LONGOPTIONS --name "$0" -- "$@")
@@ -112,6 +117,10 @@ while true; do
             build_all="ON";;
         --disable-unity-builds)
 	    unity_builds="OFF";;
+        --cxx-compiler-path)
+            cxx_compiler_path="$2";shift;;
+        --c-compiler-path)
+            c_compiler_path="$2";shift;;
         --release)
             build_type="Release";;
         --development)
@@ -175,6 +184,15 @@ cmake_args+=("-G" "Ninja")
 cmake_args+=("-DCMAKE_BUILD_TYPE=$build_type")
 cmake_args+=("-DCMAKE_INSTALL_PREFIX=$cmake_install_prefix")
 
+if [ "$cxx_compiler_path" != "" ]; then
+    echo "INFO: C++ compiler: $cxx_compiler_path"
+    cmake_args+=("-DCMAKE_CXX_COMPILER=$cxx_compiler_path")
+fi
+if [ "$c_compiler_path" != "" ]; then
+    echo "INFO: C compiler: $c_compiler_path"
+    cmake_args+=("-DCMAKE_C_COMPILER=$c_compiler_path")
+fi
+
 if [ "$enable_ccache" = "ON" ]; then
     cmake_args+=("-DCMAKE_DISABLE_PRECOMPILE_HEADERS=TRUE")
     cmake_args+=("-DENABLE_CCACHE=TRUE")

From 3b8fb6c4766fb8767b859f5d9d94da1b6f7b8c95 Mon Sep 17 00:00:00 2001
From: Bryan Wilder Field Lozano <blozano@tenstorrent.com>
Date: Wed, 13 Nov 2024 23:38:09 -0800
Subject: [PATCH 67/69] Move NOC_0_X/Y behind Hal (#14920)

The NOC_0_X and NOC_0_Y macros which were used for determining noc
coordinates have been collapsed into a single Hal API. The new Hal API
can later be specialized to behave differently.
---
 CMakeLists.txt                                |  3 ++
 .../perf_microbenchmark/dispatch/common.h     | 18 ++++----
 .../dispatch/test_prefetcher.cpp              |  2 -
 .../routing/test_vc_bi_tunnel_2ep.cpp         |  1 -
 .../routing/test_vc_bi_tunnel_4ep.cpp         |  1 -
 .../watcher/test_noc_sanitize.cpp             |  9 ++--
 .../common/command_queue_fixture.hpp          |  1 -
 .../common_runtime_address_map.h              |  4 --
 tt_metal/impl/debug/watcher_device_reader.cpp |  3 +-
 tt_metal/impl/device/device.cpp               | 42 +++++++++----------
 tt_metal/impl/program/program.cpp             |  3 +-
 tt_metal/jit_build/genfiles.cpp               |  9 ++--
 tt_metal/llrt/hal.hpp                         |  5 +++
 13 files changed, 50 insertions(+), 51 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 34ee4355dd3..3c1661f773d 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -203,6 +203,9 @@ target_link_libraries(
         numa
 )
 
+if(NOT DEFINED ENV{ARCH_NAME})
+    message(FATAL_ERROR "Please set ARCH_NAME to grayskull, wormhole_b0, or blackhole")
+endif(NOT DEFINED ENV{ARCH_NAME})
 string(TOUPPER "$ENV{ARCH_NAME}" ARCH_NAME_DEF)
 add_compile_definitions(ARCH_${ARCH_NAME_DEF})
 add_compile_options(
diff --git a/tests/tt_metal/tt_metal/perf_microbenchmark/dispatch/common.h b/tests/tt_metal/tt_metal/perf_microbenchmark/dispatch/common.h
index 3140eec26a9..e4790b61218 100644
--- a/tests/tt_metal/tt_metal/perf_microbenchmark/dispatch/common.h
+++ b/tests/tt_metal/tt_metal/perf_microbenchmark/dispatch/common.h
@@ -13,7 +13,7 @@
 #include "tt_metal/impl/dispatch/cq_commands.hpp"
 #include "noc/noc_parameters.h"
 
-#include "tt_metal/hostdevcommon/common_runtime_address_map.h" // NOC_0_X
+#include "tt_metal/llrt/hal.hpp"
 
 extern bool debug_g;
 extern bool use_coherent_data_g;
@@ -490,15 +490,15 @@ void configure_kernel_variant(
     const auto& grid_size = device->grid_size();
 
     std::map<string, string> defines = {
-        {"MY_NOC_X", std::to_string(NOC_0_X(my_noc_index, grid_size.x, phys_my_core.x))},
-        {"MY_NOC_Y", std::to_string(NOC_0_Y(my_noc_index, grid_size.y, phys_my_core.y))},
+        {"MY_NOC_X", std::to_string(tt::tt_metal::hal.noc_coordinate(my_noc_index, grid_size.x, phys_my_core.x))},
+        {"MY_NOC_Y", std::to_string(tt::tt_metal::hal.noc_coordinate(my_noc_index, grid_size.y, phys_my_core.y))},
         {"UPSTREAM_NOC_INDEX", std::to_string(upstream_noc_index)},
-        {"UPSTREAM_NOC_X", std::to_string(NOC_0_X(upstream_noc_index, grid_size.x, phys_upstream_core.x))},
-        {"UPSTREAM_NOC_Y", std::to_string(NOC_0_Y(upstream_noc_index, grid_size.y, phys_upstream_core.y))},
-        {"DOWNSTREAM_NOC_X", std::to_string(NOC_0_X(downstream_noc_index, grid_size.x, phys_downstream_core.x))},
-        {"DOWNSTREAM_NOC_Y", std::to_string(NOC_0_Y(downstream_noc_index, grid_size.y, phys_downstream_core.y))},
-        {"DOWNSTREAM_SLAVE_NOC_X", std::to_string(NOC_0_X(downstream_noc_index, grid_size.x, 0xff))},
-        {"DOWNSTREAM_SLAVE_NOC_Y", std::to_string(NOC_0_Y(downstream_noc_index, grid_size.y, 0xff))}, // todo, add testing with dispatch_s once it processes more than go signals
+        {"UPSTREAM_NOC_X", std::to_string(tt::tt_metal::hal.noc_coordinate(upstream_noc_index, grid_size.x, phys_upstream_core.x))},
+        {"UPSTREAM_NOC_Y", std::to_string(tt::tt_metal::hal.noc_coordinate(upstream_noc_index, grid_size.y, phys_upstream_core.y))},
+        {"DOWNSTREAM_NOC_X", std::to_string(tt::tt_metal::hal.noc_coordinate(downstream_noc_index, grid_size.x, phys_downstream_core.x))},
+        {"DOWNSTREAM_NOC_Y", std::to_string(tt::tt_metal::hal.noc_coordinate(downstream_noc_index, grid_size.y, phys_downstream_core.y))},
+        {"DOWNSTREAM_SLAVE_NOC_X", std::to_string(tt::tt_metal::hal.noc_coordinate(downstream_noc_index, grid_size.x, 0xff))},
+        {"DOWNSTREAM_SLAVE_NOC_Y", std::to_string(tt::tt_metal::hal.noc_coordinate(downstream_noc_index, grid_size.y, 0xff))}, // todo, add testing with dispatch_s once it processes more than go signals
         {"FD_CORE_TYPE", std::to_string(0)}, // todo, support dispatch on eth
     };
     compile_args.push_back(is_dram_variant);
diff --git a/tests/tt_metal/tt_metal/perf_microbenchmark/dispatch/test_prefetcher.cpp b/tests/tt_metal/tt_metal/perf_microbenchmark/dispatch/test_prefetcher.cpp
index 059e61b23a3..1e5190a55c7 100644
--- a/tests/tt_metal/tt_metal/perf_microbenchmark/dispatch/test_prefetcher.cpp
+++ b/tests/tt_metal/tt_metal/perf_microbenchmark/dispatch/test_prefetcher.cpp
@@ -16,8 +16,6 @@
 #include "tt_metal/impl/dispatch/kernels/packet_queue_ctrl.hpp"
 #include "tests/tt_metal/tt_metal/perf_microbenchmark/routing/kernels/traffic_gen_test.hpp"
 
-#include "tt_metal/hostdevcommon/common_runtime_address_map.h" // NOC_0_X
-
 #include "llrt/hal.hpp"
 
 #define CQ_PREFETCH_CMD_BARE_MIN_SIZE tt::tt_metal::hal.get_alignment(tt::tt_metal::HalMemType::HOST)
diff --git a/tests/tt_metal/tt_metal/perf_microbenchmark/routing/test_vc_bi_tunnel_2ep.cpp b/tests/tt_metal/tt_metal/perf_microbenchmark/routing/test_vc_bi_tunnel_2ep.cpp
index 04b7d157f3c..63cb0eb7bb1 100644
--- a/tests/tt_metal/tt_metal/perf_microbenchmark/routing/test_vc_bi_tunnel_2ep.cpp
+++ b/tests/tt_metal/tt_metal/perf_microbenchmark/routing/test_vc_bi_tunnel_2ep.cpp
@@ -6,7 +6,6 @@
 #include "tt_metal/detail/tt_metal.hpp"
 #include "tt_metal/llrt/rtoptions.hpp"
 #include "tt_metal/impl/dispatch/cq_commands.hpp"
-#include "tt_metal/hostdevcommon/common_runtime_address_map.h"
 #include "tt_metal/impl/dispatch/kernels/packet_queue_ctrl.hpp"
 #include "kernels/traffic_gen_test.hpp"
 #include "tt_metal/impl/device/device.hpp"
diff --git a/tests/tt_metal/tt_metal/perf_microbenchmark/routing/test_vc_bi_tunnel_4ep.cpp b/tests/tt_metal/tt_metal/perf_microbenchmark/routing/test_vc_bi_tunnel_4ep.cpp
index 68d35c1d6f5..51add4572f7 100644
--- a/tests/tt_metal/tt_metal/perf_microbenchmark/routing/test_vc_bi_tunnel_4ep.cpp
+++ b/tests/tt_metal/tt_metal/perf_microbenchmark/routing/test_vc_bi_tunnel_4ep.cpp
@@ -6,7 +6,6 @@
 #include "tt_metal/detail/tt_metal.hpp"
 #include "tt_metal/llrt/rtoptions.hpp"
 #include "tt_metal/impl/dispatch/cq_commands.hpp"
-#include "tt_metal/hostdevcommon/common_runtime_address_map.h"
 #include "tt_metal/impl/dispatch/kernels/packet_queue_ctrl.hpp"
 #include "kernels/traffic_gen_test.hpp"
 #include "tt_metal/impl/device/device.hpp"
diff --git a/tests/tt_metal/tt_metal/unit_tests_common/watcher/test_noc_sanitize.cpp b/tests/tt_metal/tt_metal/unit_tests_common/watcher/test_noc_sanitize.cpp
index ecb5bc1afcb..416ffece9bd 100644
--- a/tests/tt_metal/tt_metal/unit_tests_common/watcher/test_noc_sanitize.cpp
+++ b/tests/tt_metal/tt_metal/unit_tests_common/watcher/test_noc_sanitize.cpp
@@ -8,7 +8,10 @@
 #include "tt_metal/detail/tt_metal.hpp"
 #include "tt_metal/host_api.hpp"
 #include "common/bfloat16.hpp"
-#include "hostdevcommon/common_runtime_address_map.h"
+
+// Do we really want to expose Hal like this?
+// This looks like an API level test
+#include "llrt/hal.hpp"
 
 //////////////////////////////////////////////////////////////////////////////////////////
 // A test for checking watcher NOC sanitization.
@@ -155,8 +158,8 @@ void RunTestOnCore(WatcherFixture* fixture, Device* device, CoreCoord &core, boo
             const metal_SocDescriptor& soc_d = tt::Cluster::instance().get_soc_desc(device->id());
             int noc = (use_ncrisc) ? 1 : 0;
             CoreCoord target_phys_core = {
-                NOC_0_X(noc, soc_d.grid_size.x, input_dram_noc_xy.x),
-                NOC_0_Y(noc, soc_d.grid_size.y, input_dram_noc_xy.y)
+                tt::tt_metal::hal.noc_coordinate(noc, soc_d.grid_size.x, input_dram_noc_xy.x),
+                tt::tt_metal::hal.noc_coordinate(noc, soc_d.grid_size.y, input_dram_noc_xy.y)
             };
             string risc_name = (is_eth_core) ? "erisc" : "brisc";
             if (use_ncrisc)
diff --git a/tests/tt_metal/tt_metal/unit_tests_fast_dispatch/common/command_queue_fixture.hpp b/tests/tt_metal/tt_metal/unit_tests_fast_dispatch/common/command_queue_fixture.hpp
index a7c9fb13f59..b5efa2e0729 100644
--- a/tests/tt_metal/tt_metal/unit_tests_fast_dispatch/common/command_queue_fixture.hpp
+++ b/tests/tt_metal/tt_metal/unit_tests_fast_dispatch/common/command_queue_fixture.hpp
@@ -11,7 +11,6 @@
 #include "common/core_coord.hpp"
 #include "common/env_lib.hpp"
 #include "gtest/gtest.h"
-#include "hostdevcommon/common_runtime_address_map.h"
 #include "hostdevcommon/common_values.hpp"
 #include "impl/buffers/circular_buffer_types.hpp"
 #include "impl/device/device.hpp"
diff --git a/tt_metal/hostdevcommon/common_runtime_address_map.h b/tt_metal/hostdevcommon/common_runtime_address_map.h
index 4d49751e22b..5a3b10267be 100644
--- a/tt_metal/hostdevcommon/common_runtime_address_map.h
+++ b/tt_metal/hostdevcommon/common_runtime_address_map.h
@@ -17,8 +17,4 @@
 constexpr static std::uint32_t L1_KERNEL_CONFIG_BASE = MEM_MAP_END;
 constexpr static std::uint32_t L1_KERNEL_CONFIG_SIZE = 69 * 1024;
 
-// Helper functions to convert NoC coordinates to NoC-0 coordinates, used in metal as "physical" coordinates.
-#define NOC_0_X(noc_index, noc_size_x, x) (noc_index == 0 ? (x) : (noc_size_x-1-(x)))
-#define NOC_0_Y(noc_index, noc_size_y, y) (noc_index == 0 ? (y) : (noc_size_y-1-(y)))
-
 static_assert(L1_KERNEL_CONFIG_BASE % L1_ALIGNMENT == 0);
diff --git a/tt_metal/impl/debug/watcher_device_reader.cpp b/tt_metal/impl/debug/watcher_device_reader.cpp
index a07ec3ae064..85ff63f6f12 100644
--- a/tt_metal/impl/debug/watcher_device_reader.cpp
+++ b/tt_metal/impl/debug/watcher_device_reader.cpp
@@ -16,7 +16,6 @@
 // FIXME: Avoid dependence on ARCH_NAME specific includes
 #include "dev_mem_map.h" // for MEM_BRISC_STAC...
 #include "eth_l1_address_map.h" // for address_map
-#include "hostdevcommon/common_runtime_address_map.h" // for NOC_0_X, NOC_0_Y
 #include "hw/inc/dev_msgs.h"
 
 #include "third_party/umd/device/tt_arch_types.h"
@@ -79,7 +78,7 @@ static string get_noc_target_str(Device *device, CoreDescriptor &core, int noc,
         // Get the physical coord from the noc coord
         const metal_SocDescriptor &soc_d = tt::Cluster::instance().get_soc_desc(device->id());
         CoreCoord phys_core = {
-            NOC_0_X(noc, soc_d.grid_size.x, noc_coord.x), NOC_0_Y(noc, soc_d.grid_size.y, noc_coord.y)};
+            tt::tt_metal::hal.noc_coordinate(noc, soc_d.grid_size.x, noc_coord.x), tt::tt_metal::hal.noc_coordinate(noc, soc_d.grid_size.y, noc_coord.y)};
 
         CoreType core_type;
         try {
diff --git a/tt_metal/impl/device/device.cpp b/tt_metal/impl/device/device.cpp
index b6cf26597b5..0a9b39c8304 100644
--- a/tt_metal/impl/device/device.cpp
+++ b/tt_metal/impl/device/device.cpp
@@ -29,8 +29,6 @@
 #include "tt_metal/impl/sub_device/sub_device_types.hpp"
 #include "tt_metal/tt_stl/span.hpp"
 
-#include "tt_metal/hostdevcommon/common_runtime_address_map.h" // NOC_0_X
-
 namespace tt {
 
 namespace tt_metal {
@@ -323,8 +321,8 @@ void Device::initialize_device_kernel_defines()
     auto grid_size = this->grid_size();
     this->device_kernel_defines_.emplace("PCIE_NOC_X", std::to_string(pcie_cores[0].x));
     this->device_kernel_defines_.emplace("PCIE_NOC_Y", std::to_string(pcie_cores[0].y));
-    this->device_kernel_defines_.emplace("PCIE_NOC1_X", std::to_string(NOC_0_X(NOC::NOC_1, grid_size.x, pcie_cores[0].x)));
-    this->device_kernel_defines_.emplace("PCIE_NOC1_Y", std::to_string(NOC_0_X(NOC::NOC_1, grid_size.x, pcie_cores[0].y)));
+    this->device_kernel_defines_.emplace("PCIE_NOC1_X", std::to_string(tt::tt_metal::hal.noc_coordinate(NOC::NOC_1, grid_size.x, pcie_cores[0].x)));
+    this->device_kernel_defines_.emplace("PCIE_NOC1_Y", std::to_string(tt::tt_metal::hal.noc_coordinate(NOC::NOC_1, grid_size.x, pcie_cores[0].y)));
 }
 
 void Device::initialize_build() {
@@ -797,15 +795,15 @@ void Device::configure_kernel_variant(
 
     std::map<string, string> defines = {
         {"DISPATCH_KERNEL", "1"},
-        {"MY_NOC_X", std::to_string(NOC_0_X(my_noc_index, grid_size.x, kernel_physical_core.x))},
-        {"MY_NOC_Y", std::to_string(NOC_0_Y(my_noc_index, grid_size.y, kernel_physical_core.y))},
+        {"MY_NOC_X", std::to_string(tt::tt_metal::hal.noc_coordinate(my_noc_index, grid_size.x, kernel_physical_core.x))},
+        {"MY_NOC_Y", std::to_string(tt::tt_metal::hal.noc_coordinate(my_noc_index, grid_size.y, kernel_physical_core.y))},
         {"UPSTREAM_NOC_INDEX", std::to_string(upstream_noc_index)},
-        {"UPSTREAM_NOC_X", std::to_string(NOC_0_X(upstream_noc_index, grid_size.x, upstream_physical_core.x))},
-        {"UPSTREAM_NOC_Y", std::to_string(NOC_0_Y(upstream_noc_index, grid_size.y, upstream_physical_core.y))},
-        {"DOWNSTREAM_NOC_X", std::to_string(NOC_0_X(downstream_noc_index, grid_size.x, downstream_physical_core.x))},
-        {"DOWNSTREAM_NOC_Y", std::to_string(NOC_0_Y(downstream_noc_index, grid_size.y, downstream_physical_core.y))},
-        {"DOWNSTREAM_SLAVE_NOC_X", std::to_string(NOC_0_X(downstream_noc_index, grid_size.x, downstream_slave_physical_core.x))},
-        {"DOWNSTREAM_SLAVE_NOC_Y", std::to_string(NOC_0_Y(downstream_noc_index, grid_size.y, downstream_slave_physical_core.y))},
+        {"UPSTREAM_NOC_X", std::to_string(tt::tt_metal::hal.noc_coordinate(upstream_noc_index, grid_size.x, upstream_physical_core.x))},
+        {"UPSTREAM_NOC_Y", std::to_string(tt::tt_metal::hal.noc_coordinate(upstream_noc_index, grid_size.y, upstream_physical_core.y))},
+        {"DOWNSTREAM_NOC_X", std::to_string(tt::tt_metal::hal.noc_coordinate(downstream_noc_index, grid_size.x, downstream_physical_core.x))},
+        {"DOWNSTREAM_NOC_Y", std::to_string(tt::tt_metal::hal.noc_coordinate(downstream_noc_index, grid_size.y, downstream_physical_core.y))},
+        {"DOWNSTREAM_SLAVE_NOC_X", std::to_string(tt::tt_metal::hal.noc_coordinate(downstream_noc_index, grid_size.x, downstream_slave_physical_core.x))},
+        {"DOWNSTREAM_SLAVE_NOC_Y", std::to_string(tt::tt_metal::hal.noc_coordinate(downstream_noc_index, grid_size.y, downstream_slave_physical_core.y))},
         {"FD_CORE_TYPE", std::to_string(programmable_core_type_index)},
     };
     if (force_watcher_no_inline) {
@@ -3134,8 +3132,8 @@ std::vector<CoreCoord> Device::ethernet_cores_from_logical_cores(const std::vect
 uint32_t Device::get_noc_unicast_encoding(uint8_t noc_index, const CoreCoord& physical_core) const {
     const auto& grid_size = this->grid_size();
     return NOC_XY_ENCODING(
-        NOC_0_X(noc_index, grid_size.x, physical_core.x),
-        NOC_0_Y(noc_index, grid_size.y, physical_core.y)
+        tt::tt_metal::hal.noc_coordinate(noc_index, grid_size.x, physical_core.x),
+        tt::tt_metal::hal.noc_coordinate(noc_index, grid_size.y, physical_core.y)
     );
 }
 
@@ -3145,17 +3143,17 @@ uint32_t Device::get_noc_multicast_encoding(uint8_t noc_index, const CoreRange&
     // NOC 1 mcasts from bottom left to top right, so we need to reverse the coords
     if (noc_index == 0) {
         return NOC_MULTICAST_ENCODING(
-            NOC_0_X(noc_index, grid_size.x, physical_cores.start_coord.x),
-            NOC_0_Y(noc_index, grid_size.y, physical_cores.start_coord.y),
-            NOC_0_X(noc_index, grid_size.x, physical_cores.end_coord.x),
-            NOC_0_Y(noc_index, grid_size.y, physical_cores.end_coord.y)
+            tt::tt_metal::hal.noc_coordinate(noc_index, grid_size.x, physical_cores.start_coord.x),
+            tt::tt_metal::hal.noc_coordinate(noc_index, grid_size.y, physical_cores.start_coord.y),
+            tt::tt_metal::hal.noc_coordinate(noc_index, grid_size.x, physical_cores.end_coord.x),
+            tt::tt_metal::hal.noc_coordinate(noc_index, grid_size.y, physical_cores.end_coord.y)
         );
     } else {
         return NOC_MULTICAST_ENCODING(
-            NOC_0_X(noc_index, grid_size.x, physical_cores.end_coord.x),
-            NOC_0_Y(noc_index, grid_size.y, physical_cores.end_coord.y),
-            NOC_0_X(noc_index, grid_size.x, physical_cores.start_coord.x),
-            NOC_0_Y(noc_index, grid_size.y, physical_cores.start_coord.y)
+            tt::tt_metal::hal.noc_coordinate(noc_index, grid_size.x, physical_cores.end_coord.x),
+            tt::tt_metal::hal.noc_coordinate(noc_index, grid_size.y, physical_cores.end_coord.y),
+            tt::tt_metal::hal.noc_coordinate(noc_index, grid_size.x, physical_cores.start_coord.x),
+            tt::tt_metal::hal.noc_coordinate(noc_index, grid_size.y, physical_cores.start_coord.y)
         );
     }
 }
diff --git a/tt_metal/impl/program/program.cpp b/tt_metal/impl/program/program.cpp
index 62db5fb21fd..973d9ea946b 100644
--- a/tt_metal/impl/program/program.cpp
+++ b/tt_metal/impl/program/program.cpp
@@ -1153,7 +1153,8 @@ uint32_t detail::Program_::finalize_rt_args(uint32_t programmable_core_type_inde
 
     // TODO: this is asserted here as the leveling above can break the limits enforced by the API
     // Once we use a ring buffer, memory space will be dynamic and this assert won't matter
-    TT_FATAL(offset <= L1_KERNEL_CONFIG_SIZE, "offset {} cannot exceed config size {}", offset, L1_KERNEL_CONFIG_SIZE);
+    std::uint32_t l1_kernel_config_size = tt::tt_metal::hal.get_dev_size(tt::tt_metal::HalProgrammableCoreType::TENSIX, tt::tt_metal::HalL1MemAddrType::KERNEL_CONFIG);
+    TT_FATAL(offset <= l1_kernel_config_size, "offset {} cannot exceed config size {}", offset, l1_kernel_config_size);
 
     return max_unique_rta_size + total_crta_size;
 }
diff --git a/tt_metal/jit_build/genfiles.cpp b/tt_metal/jit_build/genfiles.cpp
index cc8aee4e951..3bb4fd1e6b4 100644
--- a/tt_metal/jit_build/genfiles.cpp
+++ b/tt_metal/jit_build/genfiles.cpp
@@ -11,7 +11,6 @@
 
 #include "common/tt_backend_api_types.hpp"
 #include "common/utils.hpp"
-#include "hostdevcommon/common_runtime_address_map.h" // NOC_0_X
 #include "hostdevcommon/common_values.hpp"
 #include "jit_build/build.hpp"
 #include "jit_build/settings.hpp"
@@ -589,8 +588,8 @@ std::string generate_bank_to_noc_coord_descriptor_string(
         ss << "    {"
            << "\t// noc=" << noc << endl;
         for (unsigned int bank_id = 0; bank_id < dram_bank_map.size(); bank_id++) {
-            uint16_t noc_x = NOC_0_X(noc, grid_size.x, dram_bank_map[bank_id].x);
-            uint16_t noc_y = NOC_0_Y(noc, grid_size.y, dram_bank_map[bank_id].y);
+            uint16_t noc_x = tt::tt_metal::hal.noc_coordinate(noc, grid_size.x, dram_bank_map[bank_id].x);
+            uint16_t noc_y = tt::tt_metal::hal.noc_coordinate(noc, grid_size.y, dram_bank_map[bank_id].y);
             ss << "        (((" << noc_y << " << NOC_ADDR_NODE_ID_BITS) | " << noc_x << ") << NOC_COORD_REG_OFFSET),"
                << "\t// NOC_X=" << noc_x << " NOC_Y=" << noc_y << endl;
         }
@@ -610,8 +609,8 @@ std::string generate_bank_to_noc_coord_descriptor_string(
         ss << "    {"
            << "\t// noc=" << noc << endl;
         for (unsigned int bank_id = 0; bank_id < l1_bank_map.size(); bank_id++) {
-            uint16_t noc_x = NOC_0_X(noc, grid_size.x, l1_bank_map[bank_id].x);
-            uint16_t noc_y = NOC_0_Y(noc, grid_size.y, l1_bank_map[bank_id].y);
+            uint16_t noc_x = tt::tt_metal::hal.noc_coordinate(noc, grid_size.x, l1_bank_map[bank_id].x);
+            uint16_t noc_y = tt::tt_metal::hal.noc_coordinate(noc, grid_size.y, l1_bank_map[bank_id].y);
             ss << "        (((" << noc_y << " << NOC_ADDR_NODE_ID_BITS) | " << noc_x << ") << NOC_COORD_REG_OFFSET),"
                << "\t// NOC_X=" << noc_x << " NOC_Y=" << noc_y << endl;
         }
diff --git a/tt_metal/llrt/hal.hpp b/tt_metal/llrt/hal.hpp
index 13e0c96fa6a..c4753e0e455 100644
--- a/tt_metal/llrt/hal.hpp
+++ b/tt_metal/llrt/hal.hpp
@@ -139,6 +139,11 @@ class Hal {
 
     tt::ARCH get_arch() {return arch_;}
 
+    template <typename IndexType, typename SizeType, typename CoordType>
+    auto noc_coordinate(IndexType noc_index, SizeType noc_size, CoordType coord) const -> decltype(noc_size - 1 - coord) {
+        return noc_index == 0 ? coord : (noc_size - 1 - coord);
+    }
+
     uint32_t get_programmable_core_type_count() const;
     HalProgrammableCoreType get_programmable_core_type(uint32_t core_type_index) const;
     uint32_t get_programmable_core_type_index(HalProgrammableCoreType programmable_core_type_index) const;

From 4dc379c6f1485124b80233bfd9739a88afdbaa7d Mon Sep 17 00:00:00 2001
From: Aswin Zayasankaran <156493059+Aswinmcw@users.noreply.github.com>
Date: Thu, 14 Nov 2024 15:36:47 +0530
Subject: [PATCH 68/69] Add reduce_scatter t3k perf to pipeline (#14950)

### Ticket
#14406

Adds reduce_scatter t3k perf to pipeline


https://github.com/tenstorrent/tt-metal/actions/runs/11795073011/job/32854208345

### Checklist
- [ ] Post commit CI passes
- [ ] Blackhole Post commit (if applicable)
- [ ] Model regression CI testing passes (if applicable)
- [ ] Device performance regression CI testing passes (if applicable)
- [ ] New/Existing tests provide coverage for changes
---
 .../t3000-model-perf-tests-impl.yaml          | 33 ++++++++++++++-----
 .../t3000/run_t3000_model_perf_tests.sh       | 20 +++++++++++
 .../operations/ccl/perf/perf_csv.py           |  9 ++++-
 3 files changed, 53 insertions(+), 9 deletions(-)

diff --git a/.github/workflows/t3000-model-perf-tests-impl.yaml b/.github/workflows/t3000-model-perf-tests-impl.yaml
index c104d01fbaa..fb3ebebbf2a 100644
--- a/.github/workflows/t3000-model-perf-tests-impl.yaml
+++ b/.github/workflows/t3000-model-perf-tests-impl.yaml
@@ -22,7 +22,7 @@ jobs:
           { name: "t3k LLM llama3 model perf tests", model: "llama3", model-type: "LLM", arch: wormhole_b0, cmd: run_t3000_llama3_tests, timeout: 60, owner_id: U03PUAKE719}, # Miguel Tairum
           { name: "t3k LLM falcon40b model perf tests", model: "falcon40b", model-type: "LLM", arch: wormhole_b0, cmd: run_t3000_falcon40b_tests, timeout: 75, owner_id: U053W15B6JF}, # Djordje Ivanovic
           { name: "t3k CNN resnet50 model perf tests", model: "resnet50", model-type: "CNN", arch: wormhole_b0, cmd: run_t3000_resnet50_tests, timeout: 75, owner_id: U013121KDH9}, # Austin Ho
-          { name: "t3k CCL all_gather perf tests", arch: wormhole_b0, cmd: run_t3000_ccl_all_gather_perf_tests, timeout: 75, tracy: true, owner_id: ULMEPM2MA}, # Sean Nijjar
+          { name: "t3k CCL perf tests", arch: wormhole_b0, cmd: run_t3000_ccl_all_gather_perf_tests && run_t3000_ccl_reduce_scatter_perf_tests, timeout: 75, tracy: true, owner_id: ULMEPM2MA}, # Sean Nijjar
           #{ name: "t3k CNN model perf tests ", model-type: "CNN", arch: wormhole_b0, cmd: run_t3000_cnn_tests, timeout: 120, owner_id: }, #No tests are being run?
         ]
     name: ${{ matrix.test-group.name }}
@@ -80,12 +80,21 @@ jobs:
         run: |
           TODAY=$(date +%Y_%m_%d)
           PERF_REPORT_FILENAME_MODELS="Models_Perf_${TODAY}.csv"
-          PERF_REPORT_FILENAME_CCL="CCL_Perf_${TODAY}.csv"
+          PERF_REPORT_FILENAME_CCL_ALL_GATHER="CCL_all_gather_Perf_${TODAY}.csv"
+          PERF_REPORT_FILENAME_CCL_REDUCE_SCATTER="CCL_reduce_scatter_Perf_${TODAY}.csv"
           if [ "${{ matrix.test-group.tracy }}" == "true" ]; then
-            if [ -f "$PERF_REPORT_FILENAME_CCL" ]; then
-              echo "Found CCL Perf report: $PERF_REPORT_FILENAME_CCL"
-              echo "perf_report_filename=$PERF_REPORT_FILENAME_CCL" >> "$GITHUB_OUTPUT"
-            else
+            found_reports=false
+            if [ -f "$PERF_REPORT_FILENAME_CCL_ALL_GATHER" ]; then
+              echo "Found CCL AllGather Perf report: $PERF_REPORT_FILENAME_CCL_ALL_GATHER"
+              echo "perf_report_filename_all_gather=$PERF_REPORT_FILENAME_CCL_ALL_GATHER" >> "$GITHUB_OUTPUT"
+              found_reports=true
+            fi
+            if [ -f "$PERF_REPORT_FILENAME_CCL_REDUCE_SCATTER" ]; then
+              echo "Found CCL ReduceScatter Perf report: $PERF_REPORT_FILENAME_CCL_REDUCE_SCATTER"
+              echo "perf_report_filename_reduce_scatter=$PERF_REPORT_FILENAME_CCL_REDUCE_SCATTER" >> "$GITHUB_OUTPUT"
+              found_reports=true
+            fi
+            if [ "$found_reports" = false ]; then
               echo "No CCL perf report found for today."
               exit 1
             fi
@@ -98,12 +107,20 @@ jobs:
               exit 1
             fi
           fi
-      - name: Upload perf report
-        if: ${{ !cancelled() && steps.check-perf-report.conclusion == 'success' }}
+      - name: Upload Models perf report
+        if: ${{ !cancelled() && steps.check-perf-report.conclusion == 'success' && !matrix.test-group.tracy}}
         uses: actions/upload-artifact@v4
         with:
           name: perf-report-csv-${{ matrix.test-group.model-type }}-${{ matrix.test-group.arch }}-${{ matrix.test-group.model }}-bare-metal
           path: "${{ steps.check-perf-report.outputs.perf_report_filename }}"
+      - name: Upload CCL perf report
+        if: ${{ !cancelled() && steps.check-perf-report.conclusion == 'success' && matrix.test-group.tracy}}
+        uses: actions/upload-artifact@v4
+        with:
+          name: perf-report-csv-${{ matrix.test-group.model-type }}-${{ matrix.test-group.arch }}-${{ matrix.test-group.model }}-bare-metal
+          path: |
+            ${{ steps.check-perf-report.outputs.perf_report_filename_all_gather }}
+            ${{ steps.check-perf-report.outputs.perf_report_filename_reduce_scatter }}
       - uses: ./.github/actions/slack-report
         if: ${{ failure() }}
         with:
diff --git a/tests/scripts/t3000/run_t3000_model_perf_tests.sh b/tests/scripts/t3000/run_t3000_model_perf_tests.sh
index 19a54d710b1..02ec0d8c541 100755
--- a/tests/scripts/t3000/run_t3000_model_perf_tests.sh
+++ b/tests/scripts/t3000/run_t3000_model_perf_tests.sh
@@ -161,6 +161,25 @@ run_t3000_ccl_all_gather_perf_tests() {
   fi
 }
 
+run_t3000_ccl_reduce_scatter_perf_tests() {
+  # Record the start time
+  fail=0
+  start_time=$(date +%s)
+
+  echo "LOG_METAL: Running run_t3000_ccl_reduce_scatter_perf_tests"
+
+  tests/ttnn/unit_tests/operations/ccl/perf/run_reduce_scatter_profile.sh -t t3000
+  fail+=$?
+
+  # Record the end time
+  end_time=$(date +%s)
+  duration=$((end_time - start_time))
+  echo "LOG_METAL: run_t3000_ccl_reduce_scatter_perf_tests $duration seconds to complete"
+  if [[ $fail -ne 0 ]]; then
+    exit 1
+  fi
+}
+
 run_t3000_llm_tests() {
   # Run falcon7b tests
   run_t3000_falcon7b_tests
@@ -195,6 +214,7 @@ run_t3000_cnn_tests() {
 run_t3000_ccl_tests() {
   # Run ccl performance tests
   run_t3000_ccl_all_gather_perf_tests
+  run_t3000_ccl_reduce_scatter_perf_tests
 
 }
 
diff --git a/tests/ttnn/unit_tests/operations/ccl/perf/perf_csv.py b/tests/ttnn/unit_tests/operations/ccl/perf/perf_csv.py
index 3d5cc2aaeb5..569f608c48b 100644
--- a/tests/ttnn/unit_tests/operations/ccl/perf/perf_csv.py
+++ b/tests/ttnn/unit_tests/operations/ccl/perf/perf_csv.py
@@ -214,9 +214,16 @@ def calculate_bandwidth(row):
         averages_data.append(group_data)
 
     averages_df = pd.DataFrame(averages_data)
+    op_code = averages_df.iloc[0]["OP CODE"]
 
     today = time.strftime("%Y_%m_%d")
-    ccl_perf_file_path = f"CCL_Perf_{today}.csv"
+    if op_code == "AllGather":
+        ccl_perf_file_path = f"CCL_all_gather_Perf_{today}.csv"
+    elif op_code == "ReduceScatter":
+        ccl_perf_file_path = f"CCL_reduce_scatter_Perf_{today}.csv"
+    else:
+        ccl_perf_file_path = f"CCL_Perf_{today}.csv"
+
     os.rename(file_path, ccl_perf_file_path)
 
     averages_df.to_csv(ccl_perf_file_path, index=False)

From ce6ff4cf7cc035991215d4a7cac902747a22616c Mon Sep 17 00:00:00 2001
From: Sean Nijjar <sean.nijjar@gmail.com>
Date: Thu, 14 Nov 2024 07:59:52 -0500
Subject: [PATCH 69/69] add initial fabric erisc data mover (EDM) impl (#14923)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The fabric Erisc Data Mover (EDM) is a component that can be used to
build *very* simple linear topology fabrics.
One of these EDMs can be instantiated on each ethernet link. It is built
from 3 "channels" (though the definition
of channel here is a little loose since two of the 3 will merge traffic,
so this setup could be interpreted as a
two channel setup.). This EDM implements packet based packets only -
concepts like sockets are not supported.

## EDM Structure

There are two sender channels and one receiver channel. "Sender" and
"receiver" are relative to the Ethernet link,
not the chip. Sender sends over the link and receiver receives from the
link.

Each sender channel serves a different purpose:
- Sender channel 0 : Accepts packets from a workers on the local chip
- Sender channel 1: accepts packets from an upstream EDM (i.e. an
upstream
  EDM receiver channel on the same chip but different core)

The receiver channel accepts packets from the Ethernet link and can do
one (or both) of:
- Write the packet to local chhip if it is the intended destination
(unicast or mcast)
- Forward the packet to the next chip in the line if:
  - Unicast and not the target chip
  - Multicast and this chip is in the multicast target range

Sender channels will merge traffic into the remote EDM's receiver
channel.

Below is a diagram that shows how EDMs can be connected over an ethernet
link. In this case, the two
EDM kernels are run on separate, but connected ethernet link cores.
```
 ┌───────────────────────┐           ┌───────────────────────┐
 │    Sender Channel 0   │           │    Receiver Channel   │
 │   ┌────────────────┐  │           │   ┌────────────────┐  │
 │   │                ┼──┼───┬───────┼───►                │  │
 │   │                │  │   │       │   │                │  │
 │   └────────────────┘  │   │       │   └────────────────┘  │
 │    Sender Channel 1   │   │       │    Sender Channel 1   │
 │   ┌────────────────┐  │   │       │   ┌────────────────┐  │
 │   │                ┼──┼───┘       │   │                │  │
 │   │                │  │         ┌─┼───┼                │  │
 │   └────────────────┘  │         │ │   └────────────────┘  │
 │    Receiver Channel   │         │ │    Sender Channel 0   │
 │   ┌────────────────┐  │         │ │   ┌────────────────┐  │
 │   │                │  │         │ │   │                │  │
 │   │                ◄──┼─────────┴─┼───┼                │  │
 │   └────────────────┘  │           │   └────────────────┘  │
 │                       │           │                       │
 │                       │           │                       │
 └───────────────────────┘           └───────────────────────┘
```

## Building a "Fabric"

Only linear topologies are and will be supported, and one per ethernet
link along that given line.
Below shows the intended connectivity of EDMs across chips in a
hypothetical 3-chip fabric. For longer
lines, the pattern would be extended.
```
           CHIP 0                              CHIP 1                             CHIP 2
     ┌─────────────────┐                ┌─────────────────┐                ┌─────────────────┐
     │                 │                │                 │                │                 │
┌────┴─────┐ ▲   ┌─────┴────┐      ┌────┴─────┐ ▲   ┌─────┴────┐      ┌────┴─────┐ ▲   ┌─────┴────┐
│   EDM    │ │   │   EDM    │      │   EDM    │ │   │   EDM    │      │   EDM    │ │   │   EDM    │
│ ┌──────┐ │ │   │ ┌──────┐ │      │ ┌──────┐ │ │   │ ┌──────┐ │      │ ┌──────┐ │ │   │ ┌──────┐ │
│ │ Rx   ┼─┼─┴───┼─► S1   ┼─┼─┬────┼─► Rx   ┼─┼─┴───┼─► S1   ┼─┼┬─────┼─► Rx   ┼─┼─┘   | | S1   │ │
│ └──────┘ │     │ └──────┘ │ │    │ └──────┘ │     │ └──────┘ ││     │ └──────┘ │     │ └──────┘ │
│ ┌──────┐ │     │ ┌──────┐ │ │    │ ┌──────┐ │     │ ┌──────┐ ││     │ ┌──────┐ │     │ ┌──────┐ │
│ │ S0   ◄─┼──┬──┼─► S0   ┼─┼─┘   ┌┼─┼ S0   ◄─┼──┬──┼─► S0   ┼─┼┘    ┌┼─┼ S0   ◄─┼──┬──┼─► S0   │ │
│ └──────┘ │  │  │ └──────┘ │     ││ └──────┘ │  │  │ └──────┘ │     ││ └──────┘ │  │  │ └──────┘ │
│ ┌──────┐ │  │  │ ┌──────┐ │     ││ ┌──────┐ │  │  │ ┌──────┐ │     ││ ┌──────┐ │  │  │ ┌──────┐ │
│ │ S1   | |  │ ┌┼─┼ Rx   ◄─┼─────┴┼─┼ S1   ◄─┼─┐│ ┌┼─┼ Rx   ◄─┼─────┴┼─┼ S1   ◄─┼─┐│ ┌┼─┼ Rx   │ │
│ └──────┘ │  | |│ └──────┘ │      │ └──────┘ │ └┼─┤│ └──────┘ │      │ └──────┘ │ └┼─┤│ └──────┘ │
└────┬─────┘  │ │└─────┬────┘      └────┬─────┘  │ │└─────┬────┘      └────┬─────┘  │ │└─────┬────┘
     │          ▼      │                │          ▼      │                │          ▼      │
     └─────────────────┘                └─────────────────┘                └─────────────────┘
```

## Connecting Workers to Channels

As mentioned, only one worker can push to a given EDM sender channel at
a time. In order to send to an EDM
sender channel, the worker must establish a connection. The connection
protocol is as follows and is started
by the worker (the EDM is a slave in this protocol).

*NOTE*: If multiple workers try to connect to the same EDM sender
channel at the same time, the behavior is undefined.
*NOTE*: Additionally, if a worker pushes packets to a channel it isn't
connected to, behaviour is undefined.
*NOTE*: Undefined == likely hang

The `WorkerToFabricEdmSender` from
`ttnn/cpp/ttnn/operations/ccl/kernels/edm_fabric/edm_fabric_worker_adapters.hpp`
provides an implementation of the connection protocol.
`WorkerToFabricEdmSender` also acts as a wrapper around that
protocol so workers can simply call `open()` to execute the connection
protocol without having to manually reimplement
for each kernel.

### Protocol
Worker:
- Read from EDM sender channel buffer_index address
- Required so that the worker knows where to write its first packet
(since the channel may already contain packets from
    a previous connection)
- Write worker core X/Y (NOC 0 based)
- Write worker flow control semaphore L1 address

EDM Sender Channel:
- Check local connection valid semaphore for new established connection
- When the connection semaphore indicates an active connection, the
channel assumes all other relevant fields were
    correctly populated by the worker:
    - Worker core_x (on NOC 0)
    - Worker core_y (on NOC 0)
    - Worker flow control semaphore L1 address


## Tearing Down Connections

Every worker is required to explicitly teardown its connection with the
EDM before terminating. To do this, the worker
must simply write a `0` to the EDM sender channel's connection semaphore
address. As long as the worker has sent all
of its packets to the EDM before this, then the EDM will guarantee to
forward the messages correctly.

At this point, it is safe for another kernel to establish a connection.

## Packet Structure

Workers are responsible for populating packet headers before sending to
the EDM. The packet header structure is defined
in
`ttnn/cpp/ttnn/operations/ccl/kernels/edm_fabric/fabric_edm_packet_header.hpp`.

## Channel structure

Each EDM channel is built from one or more buffers. Each buffer is the
same size and can hold atmost one packet.
Neighbouring packets occupy nehighouring buffers - with the exception of
the last buffer index. The next packet after a write
into the last buffer index will wrap around to the first buffer index.
Even if packets do not occupy the full buffer, subsequent
packets will always be written into the next logical buffer. A gap will
exist in memory but the EDM will not send that padded data
(unless it is more performant - which is possible in some special cases)

 Example channel with 8 buffers
 ```
┌───────┬───────┬───────┬───────┬───────┬───────┬───────┬───────┐
│       │       │       │       │       │       │       │       │
│       │       │       │       │       │       │       │       │
└───────┴───────┴───────┴───────┴───────┴───────┴───────┴───────┘
 buf 0   buf 1   buf 2   buf 3   buf 4   buf 5   buf 6   buf 7
```

Here we have an example of a channel with 4 buffers, filled with some number of packets. Each packet is a different size.
Packets 0, 2, and 3 are smaller than the full buffer size, while packet 1 is the full buffer size.

```
┌───────────────┬───────────────┬───────────────┬───────────────┐
│H|Payload| / / │H|Payload      │H|Pyld| / / / /│H|Payload  |/ /│
│ |       |/ / /│ |             │ |    |/ / / / │ |         | / │
└───────────────┴───────────────┴───────────────┴───────────────┘
  buf 0           buf 1           buf 2           buf 3
```

A detail of the channel structure is omitted from the above diagram, namely the EDM <-> EDM flow control region for each buffer.
Each buffer really looks something like this:

```
             &header->  |----------------| channel_base_address
                        |    header      |
            &payload->  |----------------|
                        |                |
                        |    payload     |
                        |                |
       &channel_sync->  |----------------|
                        |  channel_sync  |  // This is new
                        ------------------
```

The "channel_sync" is an `eth_channel_sync_t` and is internal to the EDM implementation and is used to indicate packet
transmission state between sender and receiver EDMs.

The protocol for its use is:
1) Sender updates the field indicating new data:
   - set `bytes_sent` to a non-zero value indicating new data
   - clear `receiver_ack` to 0
   - set `src_id` to the sender channel id so the receiver knows who the sender was (and where the ack should go)
2) Sender sends this channel sync to the corresponding location in the receiver channel (either in the same transmission
   as the packet or separately)
3) Receiver sees that `bytes_sent` is non-zero, indicating a new packet. It sends back an acknowledgement (first level):
   - set `receiver_ack` to non-zero
   *NOTE* IMPORTANT: To avoid a race, the receiver must be sure to send its channel_sync_t from a different address it uses
   as for the second level acknowledgement
   3b) When sender receives an ack, it understands it can overwrite its local copy of the packet with new data
4) After receiver properly writes out its packet, it sends a second level acknowledgement, indicating it can receive new
   data into this specific buffer index:
   - clear the bytes_sent and receiver_ack fields and send back the `channel_sync` to the sender


## Sending Packets
Sending a packet is done as follows:

1) Worker waits for flow control semaphore increment from EDM sender channel
  - Indicates there is space at the next buffer index for a packet
2) Worker performs a noc write of its packet to the EDM sender channel at the buffer index

*NOTE*: !!!ALL PACKETS MUST CONTAIN DESTINATION NOC X/Y AS NOC 0 COORDINATES, REGARDLESS OF THE `noc_index` OF THE SENDER!!!

## Building a Line Fabric

Building a simple fabric for testing with operations:

1) First build it:
Build a bidirectional fabric along a line of devices:
`ttnn::ccl::EdmLineFabricOpInterface(devices, program_ptrs, 1);`
where the devices and program_ptrs correspond to each other by index.
The third argument is an optional field the specifies the number of links
(wide) to make the fabric span. By default, this will choose the largest
number of links possible for the provided span of devices.

2) Next connect to your workers. For each worker, connect to the fabric like:

```
auto chip0_worker_fabric_connection =
    line_fabric.uniquely_connect_worker(
        devices[0],
        ttnn::ccl::EdmLineFabricOpInterface::FORWARD);
```

where the valid directions are FORWARD and BACKWARD. FORWARD is
in the direction of ascending device indices (from the provided device
list during the constructor call) and BACKWARD is toward the front.

Note that for the time being, if a worker wishes to broadcast in both
directions of the line, they will need to call connect twice:
once in the forward direction and once in the backward direction

3) Collect the termination info
For proper teardown of the fabric. This will only be needed temporarily until
a `create_persistent_fabric` that launches the fabric on persistent subcore
meshes is provided. A worker will be required to send terminate signals to
all the fabric endpoints to let the workload complete.

```
auto const& edm_termination_infos =
    line_fabric.generate_ordered_termination_info_farthest_to_nearest()
```

These termination infos specify the fabric locations for each endpoint,
relative to the first chip in the fabric.

4) Finally, build the EDM kernels:
`line_fabric.build_kernels();`
---
 tests/ttnn/unit_tests/gtests/CMakeLists.txt   |   5 +-
 .../erisc_datamover_sender_worker_reader.cpp  |   1 -
 ...c_erisc_datamover_sender_worker_reader.cpp |  46 +
 ...c_erisc_datamover_sender_worker_sender.cpp | 209 ++++
 .../test_erisc_data_mover_with_workers.cpp    |   2 +-
 ...erisc_data_mover_loopback_with_workers.cpp | 794 ++++++++++++++++
 .../hw/inc/blackhole/noc_nonblocking_api.h    |   1 +
 tt_metal/hw/inc/ethernet/dataflow_api.h       |  17 +
 tt_metal/hw/inc/ethernet/tunneling.h          |  15 +-
 .../hw/inc/grayskull/noc_nonblocking_api.h    |   1 +
 .../hw/inc/wormhole/noc_nonblocking_api.h     |   1 +
 ttnn/CMakeLists.txt                           |   1 +
 ttnn/cpp/ttnn/operations/ccl/ccl_common.cpp   |  53 +-
 ttnn/cpp/ttnn/operations/ccl/ccl_common.hpp   |  12 +-
 .../ccl/ccl_host_datastructures.hpp           |   6 +-
 .../ccl/erisc_datamover_builder.cpp           | 417 ++++++++
 .../ccl/erisc_datamover_builder.hpp           | 228 +++++
 .../edm_fabric/edm_fabric_worker_adapters.hpp | 193 ++++
 .../edm_fabric/fabric_edm_packet_header.hpp   | 214 +++++
 .../fabric_edm_packet_header_validate.hpp     |  23 +
 .../fabric_edm_packet_transmission.hpp        | 226 +++++
 .../kernels/edm_fabric/fabric_edm_types.hpp   |  56 ++
 .../edm_fabric/fabric_erisc_datamover.cpp     | 897 ++++++++++++++++++
 .../fabric_erisc_datamover_channels.hpp       | 232 +++++
 .../concat/device/concat_program_factory.cpp  |  10 +-
 .../concat/device/concat_program_factory.hpp  |   1 +
 .../tilize/device/tilize_program_factory.cpp  |   2 +-
 27 files changed, 3636 insertions(+), 27 deletions(-)
 create mode 100644 tests/ttnn/unit_tests/gtests/ccl/kernels/fabric_erisc_datamover_sender_worker_reader.cpp
 create mode 100644 tests/ttnn/unit_tests/gtests/ccl/kernels/fabric_erisc_datamover_sender_worker_sender.cpp
 create mode 100644 tests/ttnn/unit_tests/gtests/ccl/test_fabric_erisc_data_mover_loopback_with_workers.cpp
 create mode 100644 ttnn/cpp/ttnn/operations/ccl/erisc_datamover_builder.cpp
 create mode 100644 ttnn/cpp/ttnn/operations/ccl/erisc_datamover_builder.hpp
 create mode 100644 ttnn/cpp/ttnn/operations/ccl/kernels/edm_fabric/edm_fabric_worker_adapters.hpp
 create mode 100644 ttnn/cpp/ttnn/operations/ccl/kernels/edm_fabric/fabric_edm_packet_header.hpp
 create mode 100644 ttnn/cpp/ttnn/operations/ccl/kernels/edm_fabric/fabric_edm_packet_header_validate.hpp
 create mode 100644 ttnn/cpp/ttnn/operations/ccl/kernels/edm_fabric/fabric_edm_packet_transmission.hpp
 create mode 100644 ttnn/cpp/ttnn/operations/ccl/kernels/edm_fabric/fabric_edm_types.hpp
 create mode 100644 ttnn/cpp/ttnn/operations/ccl/kernels/edm_fabric/fabric_erisc_datamover.cpp
 create mode 100644 ttnn/cpp/ttnn/operations/ccl/kernels/edm_fabric/fabric_erisc_datamover_channels.hpp

diff --git a/tests/ttnn/unit_tests/gtests/CMakeLists.txt b/tests/ttnn/unit_tests/gtests/CMakeLists.txt
index 6bf76117543..865b29daefc 100644
--- a/tests/ttnn/unit_tests/gtests/CMakeLists.txt
+++ b/tests/ttnn/unit_tests/gtests/CMakeLists.txt
@@ -8,7 +8,10 @@ set(TTNN_UNIT_TESTS_SRC
     ${CMAKE_CURRENT_SOURCE_DIR}/test_to_and_from_json.cpp
 )
 
-set(TTNN_CCL_UNIT_TESTS_SRC ${CMAKE_CURRENT_SOURCE_DIR}/ccl/test_erisc_data_mover_with_workers.cpp)
+set(TTNN_CCL_UNIT_TESTS_SRC
+    ${CMAKE_CURRENT_SOURCE_DIR}/ccl/test_erisc_data_mover_with_workers.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/ccl/test_fabric_erisc_data_mover_loopback_with_workers.cpp
+)
 
 set(TTNN_TENSOR_UNIT_TESTS_SRC
     ${CMAKE_CURRENT_SOURCE_DIR}/tensor/common_tensor_test_utils.cpp
diff --git a/tests/ttnn/unit_tests/gtests/ccl/kernels/erisc_datamover_sender_worker_reader.cpp b/tests/ttnn/unit_tests/gtests/ccl/kernels/erisc_datamover_sender_worker_reader.cpp
index 41d453e2793..66662d02630 100644
--- a/tests/ttnn/unit_tests/gtests/ccl/kernels/erisc_datamover_sender_worker_reader.cpp
+++ b/tests/ttnn/unit_tests/gtests/ccl/kernels/erisc_datamover_sender_worker_reader.cpp
@@ -38,7 +38,6 @@ void kernel_main() {
         }
         noc_async_read_barrier();
         cb_push_back(cb_id_in0, pages_to_read);
-        // DPRINT << "SR " << num_pages_read << "\n";
     }
     DPRINT << "SR DONE\n";
 
diff --git a/tests/ttnn/unit_tests/gtests/ccl/kernels/fabric_erisc_datamover_sender_worker_reader.cpp b/tests/ttnn/unit_tests/gtests/ccl/kernels/fabric_erisc_datamover_sender_worker_reader.cpp
new file mode 100644
index 00000000000..3437c819346
--- /dev/null
+++ b/tests/ttnn/unit_tests/gtests/ccl/kernels/fabric_erisc_datamover_sender_worker_reader.cpp
@@ -0,0 +1,46 @@
+// SPDX-FileCopyrightText: © 2024 Tenstorrent Inc.
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#include <cstdint>
+#include "dataflow_api.h"
+#include "debug/dprint.h"
+#include "ttnn/cpp/ttnn/operations/ccl/kernels/edm_fabric/fabric_edm_packet_header.hpp"
+
+void kernel_main() {
+    constexpr bool src_is_dram = get_compile_time_arg_val(0) == 1;
+    constexpr uint32_t num_pages_to_read_total = get_compile_time_arg_val(1);
+    constexpr uint32_t page_size = get_compile_time_arg_val(2);
+    constexpr uint32_t pages_per_edm_buffer = 1;
+    constexpr uint32_t cb_id_in0 = tt::CB::c_in0;
+
+    const uint32_t src_addr = get_arg_val<uint32_t>(0);
+
+    const InterleavedAddrGen<src_is_dram> source_address_generator = {
+        .bank_base_address = src_addr, .page_size = page_size};
+
+    DPRINT << "swr: args " <<
+        "\n\tsrc_addr="<<src_addr<<
+        "\n\tsrc_is_dram="<<(src_is_dram?"T":"F")<<
+        "\n\tnum_pages_to_read_total="<<num_pages_to_read_total<<
+        "\n\tpages_per_edm_buffer="<<pages_per_edm_buffer<<
+        "\n\tpage_size="<<page_size<<"\n";
+
+    for (uint32_t num_pages_read = 0; num_pages_read < num_pages_to_read_total; num_pages_read += pages_per_edm_buffer) {
+        // How can I read ahead into the circular buffer so I don't have to do an async read barrier for
+        // every page? I only want to block when the CB is full
+        uint32_t pages_to_read = std::min<uint32_t>(pages_per_edm_buffer, num_pages_to_read_total - num_pages_read);
+        cb_reserve_back(cb_id_in0, pages_to_read);
+        uint32_t local_l1_read_addr = get_write_ptr(cb_id_in0);
+        local_l1_read_addr += sizeof(tt::fabric::PacketHeader);
+
+        for (uint32_t p = 0; p < pages_to_read; ++p) {
+            uint64_t src_noc_addr = get_noc_addr(num_pages_read + p, source_address_generator);
+            noc_async_read(src_noc_addr, local_l1_read_addr, page_size);
+            local_l1_read_addr += page_size;
+        }
+        noc_async_read_barrier();
+        cb_push_back(cb_id_in0, pages_to_read);
+    }
+
+}
diff --git a/tests/ttnn/unit_tests/gtests/ccl/kernels/fabric_erisc_datamover_sender_worker_sender.cpp b/tests/ttnn/unit_tests/gtests/ccl/kernels/fabric_erisc_datamover_sender_worker_sender.cpp
new file mode 100644
index 00000000000..babcd41c992
--- /dev/null
+++ b/tests/ttnn/unit_tests/gtests/ccl/kernels/fabric_erisc_datamover_sender_worker_sender.cpp
@@ -0,0 +1,209 @@
+// SPDX-FileCopyrightText: © 2024 Tenstorrent Inc.
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#include <cstdint>
+
+#include "dataflow_api.h"
+#include "ttnn/cpp/ttnn/operations/ccl/kernels/edm_fabric/fabric_edm_packet_header.hpp"
+#include "ttnn/cpp/ttnn/operations/ccl/kernels/edm_fabric/edm_fabric_worker_adapters.hpp"
+
+struct unicast_mode {
+    uint8_t distance;
+};
+struct mcast_mode {
+    uint8_t distance;
+    uint8_t range;
+};
+
+union transmit_config {
+    unicast_mode unicast;
+    mcast_mode mcast;
+};
+
+// Worker core - Data Movement Writer -> Sends to Erisc Data Mover (sender side).
+// -> takes input from local cb and pushes to erisc L1
+void kernel_main() {
+
+    // Test doesn't support multiple pages per send yet since we are writing
+    // to interleaved which will never have subsequent pages on the same core
+    // (and hence, able to share a packet header)
+    constexpr uint32_t num_pages_per_send = 1;//get_compile_time_arg_val(0);
+    constexpr uint32_t total_pages_to_send = get_compile_time_arg_val(1);
+    constexpr uint32_t page_size = get_compile_time_arg_val(2);
+    constexpr uint32_t num_buffers_per_channel = get_compile_time_arg_val(3);
+    constexpr bool dest_is_dram = get_compile_time_arg_val(4) != 0;
+    constexpr bool mcast_mode = get_compile_time_arg_val(5) == 1;
+
+    size_t arg_idx = 0;
+    // Nearly all of the following arguments are needed to establish a connection with
+    // EDM.
+    // FUTURE WORK to make the connection info more compact. This will include:
+    // 1. packing EDM noc x/y into one RT arg
+    // 2. packing all semaphores as IDs and those IDs into the same RT arg
+    //    We should be able to comfortably fit 4 into a single arg
+    // 3. All other fields should be derivable from an EDM channel ID,
+    //    which can then be used to statically compute offsets into EDM unreserved L1
+    //    according to the static EDM L1 allocation scheme.
+    //    This should let us get away with describing the full connection in 3-4 args total
+    const uint32_t eth_l1_base_addr = get_arg_val<uint32_t>(arg_idx++);
+    // erisc l1 semaphore address
+    const uint32_t eth_sender_l1_sem_id = get_arg_val<uint32_t>(arg_idx++);
+    volatile uint32_t* const writer_send_sem_addr = reinterpret_cast<volatile uint32_t* const >(get_semaphore(get_arg_val<uint32_t>(arg_idx++)));
+    const uint32_t eth_sender_noc_x = get_arg_val<uint32_t>(arg_idx++);
+    const uint32_t eth_sender_noc_y = get_arg_val<uint32_t>(arg_idx++);
+    const uint32_t num_buffers_per_edm_channel = get_arg_val<uint32_t>(arg_idx++);
+    size_t edm_connection_handshake_addr = get_semaphore<ProgrammableCoreType::ACTIVE_ETH>(get_arg_val<uint32_t>(arg_idx++));
+    size_t edm_worker_location_info_addr = get_arg_val<uint32_t>(arg_idx++);
+    size_t edm_buffer_size_bytes = get_arg_val<uint32_t>(arg_idx++);
+    size_t dest_addr = get_arg_val<uint32_t>(arg_idx++);
+    volatile uint32_t* const last_message_semaphore_address = reinterpret_cast<volatile uint32_t* const >(get_semaphore(get_arg_val<uint32_t>(arg_idx++)));
+    *last_message_semaphore_address = 0;
+    auto worker_buffer_index_semaphore_addr = get_semaphore(get_arg_val<uint32_t>(arg_idx++));
+    // TODO: move to semaphore
+    auto edm_buffer_index_sem_id = get_arg_val<uint32_t>(arg_idx++);
+    ASSERT(edm_buffer_index_sem_id < 8);
+    auto edm_buffer_index_address = get_semaphore<ProgrammableCoreType::ACTIVE_ETH>(edm_buffer_index_sem_id);
+    ASSERT(worker_buffer_index_semaphore_addr != reinterpret_cast<size_t>(writer_send_sem_addr));
+    ASSERT(worker_buffer_index_semaphore_addr != reinterpret_cast<size_t>(last_message_semaphore_address));
+
+    transmit_config config;
+    if (mcast_mode) {
+        config.mcast.distance = static_cast<uint8_t>(get_arg_val<uint32_t>(arg_idx++));
+        config.mcast.range = static_cast<uint8_t>(get_arg_val<uint32_t>(arg_idx++));
+    } else {
+        config.unicast.distance = static_cast<uint8_t>(get_arg_val<uint32_t>(arg_idx++));
+    }
+
+    const InterleavedAddrGen<dest_is_dram> dest_addr_gen = {
+        .bank_base_address = dest_addr, .page_size = page_size};
+
+
+    ASSERT(num_buffers_per_channel > 0);
+    auto sender = tt::fabric::WorkerToFabricEdmSender(
+        eth_sender_noc_x,
+        eth_sender_noc_y,
+        eth_l1_base_addr,
+        num_buffers_per_channel,
+        eth_sender_l1_sem_id,
+
+        edm_connection_handshake_addr,
+        edm_worker_location_info_addr,
+        edm_buffer_size_bytes,
+        edm_buffer_index_address,
+        writer_send_sem_addr,
+        worker_buffer_index_semaphore_addr
+        );
+
+    sender.open();
+
+    constexpr uint32_t cb_id_in0 = tt::CB::c_in0;
+
+    // We need to normalize all noc addresses to be for a consistent noc ID
+    // so the remote sender core can correctly send the packet. In the future
+    // we can decide if it's better for the noc index to be embedded in the packet
+    // header (for now we don't do that)
+    constexpr size_t NORMALIZED_NOC_INDEX = 0;
+
+    uint32_t buffer_index = 0;
+    cb_wait_front(cb_id_in0, 1);
+    auto a_packet_header_addr = get_read_ptr(cb_id_in0);
+    for (uint32_t p = 0; p < total_pages_to_send; p += num_pages_per_send) {
+        uint32_t pages_to_send = std::min<uint32_t>(num_pages_per_send, total_pages_to_send - p);
+        sender.wait_for_empty_write_slot();
+        cb_wait_front(cb_id_in0, pages_to_send);
+
+        // bit of a hack to extract X/Y
+        const auto dest_noc_address = get_noc_addr(p, dest_addr_gen, 0, NORMALIZED_NOC_INDEX);
+        const size_t dest_addr = dest_noc_address & 0xFFFFFFFF;
+        const size_t dest_noc_x = (dest_noc_address >> NOC_ADDR_LOCAL_BITS) & ((1 << NOC_ADDR_NODE_ID_BITS) - 1);
+        const size_t dest_noc_y = (dest_noc_address >> (NOC_ADDR_LOCAL_BITS + NOC_ADDR_NODE_ID_BITS)) & ((1 << NOC_ADDR_NODE_ID_BITS) - 1);
+        const size_t packet_size = page_size + sizeof(tt::fabric::PacketHeader);
+
+        auto packet_addr = get_read_ptr(cb_id_in0);
+        auto &packet_header = *reinterpret_cast<tt::fabric::PacketHeader*>(packet_addr);
+        if constexpr (mcast_mode) {
+            packet_header.to_write()
+                .to_chip_multicast(tt::fabric::MulticastRoutingCommandHeader{config.mcast.distance, config.mcast.range})
+                .to_noc_unicast(tt::fabric::NocUnicastCommandHeader{
+                    dest_addr,
+                    (pages_to_send * page_size) + sizeof(tt::fabric::PacketHeader),
+                    static_cast<uint8_t>(dest_noc_x),
+                    static_cast<uint8_t>(dest_noc_y)
+                });
+            packet_header.reserved2 = 0x1111; // debug only
+        } else {
+            packet_header.to_write()
+                .to_chip_unicast(tt::fabric::UnicastRoutingCommandHeader{config.unicast.distance})
+                .to_noc_unicast(tt::fabric::NocUnicastCommandHeader{
+                    dest_addr,
+                    (pages_to_send * page_size) + sizeof(tt::fabric::PacketHeader),
+                    static_cast<uint8_t>(dest_noc_x),
+                    static_cast<uint8_t>(dest_noc_y)
+                });
+            packet_header.reserved2 = 0x1111; // debug only
+        }
+
+        uint64_t buffer_address = sender.edm_buffer_addr + (*sender.buffer_index_ptr * (sender.buffer_size_bytes + sizeof(eth_channel_sync_t)));
+        sender.send_payload_blocking_from_address(packet_addr, packet_size);
+        noc_async_writes_flushed();
+        cb_pop_front(cb_id_in0, pages_to_send);
+    }
+
+    if constexpr (!mcast_mode) {
+        sender.wait_for_empty_write_slot();
+
+        auto &packet_header = *reinterpret_cast<tt::fabric::PacketHeader*>(a_packet_header_addr);
+        ASSERT(*last_message_semaphore_address == 0);
+        packet_header.reserved = 0xE;
+        packet_header.reserved2 = 0xFFFF;
+        packet_header.to_atomic_inc();
+        packet_header.to_chip_unicast(tt::fabric::UnicastRoutingCommandHeader{1});
+        packet_header.to_noc_unicast_atomic_inc(tt::fabric::NocUnicastAtomicIncCommandHeader(
+                reinterpret_cast<size_t>(last_message_semaphore_address),
+                1,
+                32,
+                my_x[0],
+                my_y[0]
+            ));
+
+        sender.send_payload_blocking_from_address(a_packet_header_addr, packet_header.get_payload_size_including_header());
+
+        noc_semaphore_wait(last_message_semaphore_address, 1);
+    }
+
+    bool closed = false;
+    size_t num_endpoints_to_terminate = get_arg_val<uint32_t>(arg_idx++);
+    for (size_t i = 0; i < num_endpoints_to_terminate; i++) {
+        size_t edm_noc_x = get_arg_val<uint32_t>(arg_idx++);
+        size_t edm_noc_y = get_arg_val<uint32_t>(arg_idx++);
+        size_t distance = get_arg_val<uint32_t>(arg_idx++);
+        size_t termination_addr = get_arg_val<uint32_t>(arg_idx++);
+
+        if (!closed && distance == 0) {
+            closed = true;
+            sender.close();
+        }
+        if (distance == 0) {
+            noc_inline_dw_write(get_noc_addr(edm_noc_x, edm_noc_y, termination_addr), tt::fabric::TerminationSignal::GRACEFULLY_TERMINATE);
+        } else {
+            auto &packet_header = *reinterpret_cast<tt::fabric::PacketHeader*>(a_packet_header_addr);
+            reinterpret_cast<volatile uint32_t*>(a_packet_header_addr)[sizeof(tt::fabric::PacketHeader) >> 2] = tt::fabric::TerminationSignal::GRACEFULLY_TERMINATE;
+            sender.wait_for_empty_write_slot();
+            packet_header.to_write()
+                .to_chip_unicast(tt::fabric::UnicastRoutingCommandHeader{static_cast<uint8_t>(distance - 1)})
+                .to_noc_unicast(tt::fabric::NocUnicastCommandHeader{
+                    termination_addr,
+                    sizeof(tt::fabric::PacketHeader) + sizeof(uint32_t),
+                    static_cast<uint8_t>(edm_noc_x),
+                    static_cast<uint8_t>(edm_noc_y)
+                });
+            sender.send_payload_blocking_from_address(a_packet_header_addr, packet_header.get_payload_size_including_header());
+            noc_async_writes_flushed();
+        }
+    }
+    if (!closed) {
+        sender.close();
+    }
+
+}
diff --git a/tests/ttnn/unit_tests/gtests/ccl/test_erisc_data_mover_with_workers.cpp b/tests/ttnn/unit_tests/gtests/ccl/test_erisc_data_mover_with_workers.cpp
index a62985f8bd3..b2dbf58a86e 100644
--- a/tests/ttnn/unit_tests/gtests/ccl/test_erisc_data_mover_with_workers.cpp
+++ b/tests/ttnn/unit_tests/gtests/ccl/test_erisc_data_mover_with_workers.cpp
@@ -41,7 +41,7 @@ void set_edm_runtime_args(
     ccl::EriscDatamoverBuilder const& edm_builder,
     CoreCoord const& eth_core
 ) {
-    std::vector<uint32_t> const& edm_clockwise_kernel_rt_args = edm_builder.emit_runtime_args();
+    std::vector<uint32_t> const& edm_clockwise_kernel_rt_args = edm_builder.get_runtime_args();
     tt_metal::SetRuntimeArgs(program, edm_kernel_handle, eth_core, edm_clockwise_kernel_rt_args);
 
     std::stringstream ss;
diff --git a/tests/ttnn/unit_tests/gtests/ccl/test_fabric_erisc_data_mover_loopback_with_workers.cpp b/tests/ttnn/unit_tests/gtests/ccl/test_fabric_erisc_data_mover_loopback_with_workers.cpp
new file mode 100644
index 00000000000..b97080b5d3c
--- /dev/null
+++ b/tests/ttnn/unit_tests/gtests/ccl/test_fabric_erisc_data_mover_loopback_with_workers.cpp
@@ -0,0 +1,794 @@
+
+// SPDX-FileCopyrightText: © 2024 Tenstorrent Inc.
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#include <algorithm>
+#include <functional>
+#include <limits>
+#include <random>
+
+#include "device/tt_arch_types.h"
+#include "gtest/gtest.h"
+// #include "tt_backend_api_types.hpp"
+#include "tt_metal/common/core_coord.hpp"
+#include "tt_metal/common/math.hpp"
+#include "tt_metal/detail/tt_metal.hpp"
+#include "tt_metal/host_api.hpp"
+#include "tt_metal/impl/kernels/kernel.hpp"
+#include "tt_metal/test_utils/comparison.hpp"
+#include "tt_metal/test_utils/df/df.hpp"
+#include "tt_metal/test_utils/env_vars.hpp"
+#include "tt_metal/test_utils/print_helpers.hpp"
+#include "tt_metal/test_utils/stimulus.hpp"
+#include "ttnn/cpp/ttnn/operations/ccl/ccl_common.hpp"
+#include "ttnn/cpp/ttnn/operations/ccl/erisc_datamover_builder.hpp"
+#include "ttnn/cpp/ttnn/operations/ccl/kernels/edm_fabric/fabric_edm_packet_header.hpp"
+
+using namespace tt;
+using namespace tt::test_utils;
+using namespace tt::test_utils::df;
+
+class T3000TestDevice {
+   public:
+    T3000TestDevice() : device_open(false) {
+        arch_ = tt::get_arch_from_string(tt::test_utils::get_umd_arch_name());
+
+        num_devices_ = tt::tt_metal::GetNumAvailableDevices();
+        if (arch_ == tt::ARCH::WORMHOLE_B0 and tt::tt_metal::GetNumAvailableDevices() >= 4 and
+            tt::tt_metal::GetNumPCIeDevices() >= 1) {
+            std::vector<chip_id_t> ids(num_devices_, 0);
+            std::iota(ids.begin(), ids.end(), 0);
+            devices_ = tt::tt_metal::detail::CreateDevices(ids);
+
+        } else {
+            TT_THROW("This suite can only be run on T3000 Wormhole devices");
+        }
+        device_open = true;
+    }
+    ~T3000TestDevice() {
+        if (device_open) {
+            TearDown();
+        }
+    }
+
+    void TearDown() {
+        device_open = false;
+        for (auto [device_id, device_ptr] : devices_) {
+            tt::tt_metal::CloseDevice(device_ptr);
+        }
+    }
+
+    std::map<chip_id_t, Device*> devices_;
+    tt::ARCH arch_;
+    size_t num_devices_;
+
+   private:
+    bool device_open;
+};
+
+struct BankedConfig {
+    size_t num_pages;
+    size_t size_bytes;
+    size_t page_size_bytes;
+    BufferType input_buffer_type;   // = BufferType::L1;
+    BufferType output_buffer_type;  // = BufferType::L1;
+    tt::DataFormat l1_data_format;  // = tt::DataFormat::Float16_b;
+};
+
+struct KernelXY {
+    uint16_t x;
+    uint16_t y;
+
+    uint32_t to_uint32() const { return y << 16 | x; }
+};
+
+
+enum Correctness { Correct, Incorrect };
+
+struct EthLinkBuilder {
+    ttnn::ccl::FabricEriscDatamoverBuilder sender_edm_builder;    // chip_0_edm_builder,
+    ttnn::ccl::FabricEriscDatamoverBuilder receiver_edm_builder;  // chip_0_edm_builder,
+    tt_xy_pair sender_core;
+    tt_xy_pair receiver_core;
+    // size_t downstream_edm_buffer_index_semaphore_id;
+};
+
+Correctness run_output_check(
+    std::vector<uint32_t> const& all_zeros,
+    std::vector<uint32_t> const& inputs,
+    std::shared_ptr<Buffer> output_buffer) {
+    constexpr bool debug_mode = true;
+    std::vector<uint32_t> readback_data_vec(all_zeros.size(), 0);  // init to 0 data for easier debug
+
+    tt_metal::detail::ReadFromBuffer(output_buffer, readback_data_vec);
+    log_info(tt::LogTest, "Checking outputs");
+    if (readback_data_vec.size() != inputs.size()) {
+        log_error(tt::LogTest, "Output size mismatch: expected {} got {}", inputs.size(), readback_data_vec.size());
+        return Correctness::Incorrect;
+    }
+    bool pass = (readback_data_vec == inputs);
+    if (not pass) {
+        log_error("Output mismatch");
+        if (debug_mode) {
+            std::size_t num_printed_mismatches = 0;
+            for (size_t i = 0; i < readback_data_vec.size() && num_printed_mismatches < 64; i++) {
+                if (readback_data_vec[i] != inputs[i]) {
+                    log_error("[{}]: expected {} got {}", i, inputs[i], readback_data_vec[i]);
+                    num_printed_mismatches++;
+                }
+            }
+            log_error("... (remaining mismatches omitted)");
+        }
+    }
+    return Correctness::Correct;
+};
+
+void run_programs(std::vector<Program>& programs, std::vector<Device*> const& devices) {
+    EXPECT_EQ(programs.size(), devices.size());
+    const size_t num_programs = programs.size();
+    try {
+        for (size_t i = 0; i < num_programs; i++) {
+            tt::tt_metal::detail::CompileProgram(devices.at(i), programs.at(i));
+        }
+    } catch (std::exception& e) {
+        log_error("Failed compile: {}", e.what());
+        throw e;
+    }
+
+    log_info(tt::LogTest, "Running...");
+
+    std::vector<std::thread> threads;
+    threads.reserve(num_programs);
+    if (std::getenv("TT_METAL_SLOW_DISPATCH_MODE")) {
+        for (size_t i = 0; i < num_programs; i++) {
+            threads.emplace_back(std::thread([&] { tt_metal::detail::LaunchProgram(devices.at(i), programs.at(i)); }));
+        }
+
+        std::ranges::for_each(threads, [](std::thread& t) { t.join(); });
+    } else {
+        for (size_t i = 0; i < num_programs; i++) {
+            tt_metal::EnqueueProgram(devices.at(i)->command_queue(), programs.at(i), false);
+        }
+
+        log_debug(tt::LogTest, "Calling Finish");
+        for (size_t i = 0; i < num_programs; i++) {
+            tt_metal::Finish(devices.at(i)->command_queue());
+        }
+    }
+}
+
+std::tuple<std::shared_ptr<Buffer>, std::vector<uint32_t>> build_input_buffer(
+    Device* first_device, size_t tensor_size_bytes, BankedConfig const& test_config) {
+    auto inputs = std::vector<uint32_t>(tensor_size_bytes / sizeof(uint32_t), 0);
+    std::iota(inputs.begin(), inputs.end(), 0);
+
+    // Input buffer
+    auto local_input_buffer = CreateBuffer(InterleavedBufferConfig{
+        first_device, test_config.size_bytes, test_config.page_size_bytes, test_config.input_buffer_type});
+    tt_metal::detail::WriteToBuffer(local_input_buffer, inputs);
+    return {local_input_buffer, inputs};
+}
+
+struct EthLinkHop {
+    CoreCoord hop_src;
+    CoreCoord hop_dest;
+};
+
+struct ChipConnection {
+    std::vector<EthLinkHop> links;
+};
+
+struct unicast_send {
+    size_t distance;
+};
+struct mcast_send {
+    size_t distance;
+    size_t range;
+};
+
+
+using mode_variant_t = std::variant<mcast_send, unicast_send>;
+
+static constexpr size_t PACKET_HEADER_SIZE_BYTES = sizeof(tt::fabric::PacketHeader);
+void generate_sender_worker_kernels(
+    Program& program,
+    Device* device,
+    CoreCoord const& worker_core,
+    ttnn::ccl::SenderWorkerAdapterSpec const& worker_fabric_connection,
+    mode_variant_t const& mode,
+    std::size_t edm_buffer_size,
+    uint32_t page_plus_header_size,
+    uint32_t num_pages_total,
+    uint32_t num_pages_per_edm_buffer,
+    uint32_t local_worker_fabric_semaphore_id,
+    uint32_t local_worker_last_message_semaphore_id,
+    uint32_t dram_input_buffer_base_addr,
+    bool src_is_dram,
+    uint32_t dram_output_buffer_base_addr,
+    bool dest_is_dram,
+    uint32_t worker_buffer_index_semaphore_id,
+    // farthest to closest
+    std::vector<ttnn::ccl::edm_termination_info_t> const& edm_termination_infos) {
+
+    auto const& edm_noc_core = CoreCoord(worker_fabric_connection.edm_noc_x, worker_fabric_connection.edm_noc_y);
+    std::vector<uint32_t> sender_worker_reader_compile_args{
+        src_is_dram,      //
+        num_pages_total,  //
+        page_plus_header_size - PACKET_HEADER_SIZE_BYTES,
+        num_pages_per_edm_buffer};
+    std::vector<uint32_t> sender_worker_reader_runtime_args{dram_input_buffer_base_addr};
+
+    log_trace(tt::LogTest, "\tSenderReader CT Args");
+    for (auto const& arg : sender_worker_reader_compile_args) {
+        log_trace(tt::LogTest, "\t\t{}", arg);
+    }
+    log_trace(tt::LogTest, "\tSenderReader RT Args");
+    for (auto const& arg : sender_worker_reader_runtime_args) {
+        log_trace(tt::LogTest, "\t\t{}", arg);
+    }
+
+    std::vector<uint32_t> sender_worker_writer_compile_args{
+        num_pages_per_edm_buffer,
+        num_pages_total,
+        page_plus_header_size - PACKET_HEADER_SIZE_BYTES,
+        worker_fabric_connection.num_buffers_per_channel,
+        dest_is_dram,
+        std::holds_alternative<mcast_send>(mode) ? 1 : 0};
+    log_trace(tt::LogTest, "worker_fabric_connection.edm_l1_sem_addr: {}", worker_fabric_connection.edm_l1_sem_addr);
+    log_trace(tt::LogTest, "worker_buffer_index_semaphore_id: {}", worker_buffer_index_semaphore_id);
+    log_trace(tt::LogTest, "last_message_semaphore_address: {}", local_worker_last_message_semaphore_id);
+    log_trace(
+        tt::LogTest,
+        "Sender communicating with EDM: x={}, y={}",
+        (uint32_t)edm_noc_core.x,
+        (uint32_t)edm_noc_core.y);
+    std::vector<uint32_t> sender_worker_writer_runtime_args{
+        worker_fabric_connection.edm_buffer_base_addr,
+        worker_fabric_connection.edm_l1_sem_addr,
+        local_worker_fabric_semaphore_id,
+        (uint32_t)edm_noc_core.x,
+        (uint32_t)edm_noc_core.y,
+        worker_fabric_connection.num_buffers_per_channel,
+
+        worker_fabric_connection.edm_connection_handshake_addr,
+        worker_fabric_connection.edm_worker_location_info_addr,
+        edm_buffer_size,
+        dram_output_buffer_base_addr,
+        local_worker_last_message_semaphore_id,
+        worker_buffer_index_semaphore_id,
+        worker_fabric_connection.buffer_index_semaphore_id};
+
+    if (std::holds_alternative<mcast_send>(mode)) {
+        sender_worker_writer_runtime_args.push_back(std::get<mcast_send>(mode).distance);
+        sender_worker_writer_runtime_args.push_back(std::get<mcast_send>(mode).range);
+    } else {
+        sender_worker_writer_runtime_args.push_back(std::get<unicast_send>(mode).distance);
+    }
+
+    sender_worker_writer_runtime_args.push_back(edm_termination_infos.size());
+    for (auto const& info : edm_termination_infos) {
+        sender_worker_writer_runtime_args.push_back(info.edm_noc_x);
+        sender_worker_writer_runtime_args.push_back(info.edm_noc_y);
+        sender_worker_writer_runtime_args.push_back(info.distance);
+        sender_worker_writer_runtime_args.push_back(info.termination_addr);
+        log_trace(
+            tt::LogTest,
+            "EDM termination info: x={}, y={}, distance={}, termination_addr={}",
+            info.edm_noc_x,
+            info.edm_noc_y,
+            info.distance,
+            info.termination_addr);
+    }
+
+    uint32_t src0_cb_index = CB::c_in0;
+    log_trace(tt::LogTest, "\tSenderWriter CT Args");
+    for (auto const& arg : sender_worker_writer_compile_args) {
+        log_trace(tt::LogTest, "\t\t{}", arg);
+    }
+    log_trace(tt::LogTest, "\tSenderWriter RT Args");
+    for (auto const& arg : sender_worker_writer_runtime_args) {
+        log_trace(tt::LogTest, "\t\t{}", arg);
+    }
+
+    // Just want a dummy DF
+    tt::DataFormat df = (page_plus_header_size - PACKET_HEADER_SIZE_BYTES) == 1024   ? tt::DataFormat::Bfp8
+                        : (page_plus_header_size - PACKET_HEADER_SIZE_BYTES) == 2048 ? tt::DataFormat::Float16
+                                                                                     : tt::DataFormat::Float32;
+    tt_metal::CircularBufferConfig cb_src0_config =
+        tt_metal::CircularBufferConfig(2 * num_pages_per_edm_buffer * page_plus_header_size, {{src0_cb_index, df}})
+            .set_page_size(src0_cb_index, page_plus_header_size);
+    CBHandle sender_workers_cb = CreateCircularBuffer(program, worker_core, cb_src0_config);
+    auto sender_worker_reader_kernel = tt_metal::CreateKernel(
+        program,
+        "tests/ttnn/unit_tests/gtests/ccl/kernels/fabric_erisc_datamover_sender_worker_reader.cpp",
+        worker_core,
+        tt_metal::DataMovementConfig{
+            .processor = tt_metal::DataMovementProcessor::RISCV_0,
+            .noc = tt_metal::NOC::RISCV_0_default,
+            .compile_args = sender_worker_reader_compile_args});
+    auto sender_worker_writer_kernel = tt_metal::CreateKernel(
+        program,
+        "tests/ttnn/unit_tests/gtests/ccl/kernels/fabric_erisc_datamover_sender_worker_sender.cpp",
+        worker_core,
+        tt_metal::DataMovementConfig{
+            .processor = tt_metal::DataMovementProcessor::RISCV_1,
+            .noc = tt_metal::NOC::RISCV_1_default,
+            .compile_args = sender_worker_writer_compile_args});
+    tt_metal::SetRuntimeArgs(program, sender_worker_reader_kernel, worker_core, sender_worker_reader_runtime_args);
+    tt_metal::SetRuntimeArgs(program, sender_worker_writer_kernel, worker_core, sender_worker_writer_runtime_args);
+}
+
+bool RunLoopbackTest(
+    tt_metal::Device* sender_device,
+    tt_metal::Device* receiver_device,
+
+    const CoreCoord& eth_sender_core,
+    const CoreCoord& eth_receiver_core,
+
+    const uint32_t page_size,
+    const uint32_t num_pages_total,
+    bool src_is_dram,
+    bool dest_is_dram) {
+    std::size_t page_plus_header_size = page_size + sizeof(tt::fabric::PacketHeader);
+    std::size_t tensor_size_bytes = num_pages_total * page_size;
+
+    std::vector<Program> programs(2);
+    auto& sender_program = programs.at(0);
+    auto& receiver_program = programs.at(1);
+
+    std::vector<CoreCoord> worker_cores = {CoreCoord(0, 0)};
+
+    auto local_worker_fabric_semaphore_id = tt::tt_metal::CreateSemaphore(sender_program, worker_cores.at(0), 0);
+    auto local_worker_last_message_semaphore_id = tt::tt_metal::CreateSemaphore(sender_program, worker_cores.at(0), 0);
+    auto worker_buffer_index_semaphore_id = tt::tt_metal::CreateSemaphore(sender_program, worker_cores.at(0), 0);
+
+    // Generate inputs
+    ////////////////////////////////////////////////////////////////////////////
+    //   SETUP THE INPUT CB
+    ////////////////////////////////////////////////////////////////////////////
+
+    BankedConfig test_config = BankedConfig{
+        .num_pages = num_pages_total,
+        .size_bytes = tensor_size_bytes,
+        .page_size_bytes = page_size,
+        .input_buffer_type = src_is_dram ? BufferType::DRAM : BufferType::L1,
+        .output_buffer_type = dest_is_dram ? BufferType::DRAM : BufferType::L1,
+        .l1_data_format = tt::DataFormat::Float16_b};
+
+    auto [local_input_buffer, inputs] = build_input_buffer(sender_device, tensor_size_bytes, test_config);
+
+    std::vector<uint32_t> all_zeros(inputs.size(), 0);
+    auto local_output_buffer = CreateBuffer(InterleavedBufferConfig{
+        sender_device, test_config.size_bytes, test_config.page_size_bytes, test_config.output_buffer_type});
+
+    tt_metal::detail::WriteToBuffer(local_output_buffer, all_zeros);
+
+    auto local_input_buffer_address = local_input_buffer->address();
+    auto local_output_buffer_address = local_output_buffer->address();
+
+    ////////////////////////////////////////////////////////////////////////////
+    // EDM Builder Setup
+    ////////////////////////////////////////////////////////////////////////////
+
+    static constexpr std::size_t edm_buffer_size = 4096 + PACKET_HEADER_SIZE_BYTES;
+    const chip_id_t local_chip_id = 0;
+    const chip_id_t remote_chip_id = 1;
+    auto const& edm_config = ttnn::ccl::FabricEriscDatamoverConfig(edm_buffer_size, 1, 2);
+    auto chip_0_edm_builder = ttnn::ccl::FabricEriscDatamoverBuilder::build(
+        sender_device,
+        sender_program,
+        eth_sender_core,
+        local_chip_id,
+        remote_chip_id,
+        edm_config);
+    auto chip0_worker_fabric_connection = chip_0_edm_builder.build_connection_to_worker_channel();
+    auto chip_1_edm_builder = ttnn::ccl::FabricEriscDatamoverBuilder::build(
+        receiver_device,
+        receiver_program,
+        eth_receiver_core,
+        remote_chip_id,
+        local_chip_id,
+        edm_config);
+    // Create the loopback connection on the second device
+    chip_1_edm_builder.connect_to_downstream_edm(chip_1_edm_builder);
+
+    ////////////////////////////////////////////////////////////////////////////
+    // Build Workers
+    ////////////////////////////////////////////////////////////////////////////
+    log_trace(tt::LogTest, "Generating local_sender -> remote_receiver workers");
+    const std::size_t pages_per_send =
+        (chip0_worker_fabric_connection.buffer_size_bytes - PACKET_HEADER_SIZE_BYTES) / page_size;
+    auto const& worker_core = worker_cores.at(0);
+    log_trace(tt::LogTest, "Worker {}. On Core x={},y={}", 0, worker_core.x, worker_core.y);
+
+    std::vector<ttnn::ccl::edm_termination_info_t> const& edm_termination_infos = {
+        {1,
+         sender_device->ethernet_core_from_logical_core(eth_receiver_core).x,
+         sender_device->ethernet_core_from_logical_core(eth_receiver_core).y,
+         ttnn::ccl::FabricEriscDatamoverConfig::termination_signal_address},
+        {0,
+         sender_device->ethernet_core_from_logical_core(eth_sender_core).x,
+         sender_device->ethernet_core_from_logical_core(eth_sender_core).y,
+         ttnn::ccl::FabricEriscDatamoverConfig::termination_signal_address}};
+
+    generate_sender_worker_kernels(
+        sender_program,
+        sender_device,
+        worker_core,
+        chip0_worker_fabric_connection,
+        unicast_send{1},
+        edm_buffer_size,
+        page_plus_header_size,
+        num_pages_total,
+        pages_per_send,
+        local_worker_fabric_semaphore_id,
+        local_worker_last_message_semaphore_id,
+        local_input_buffer_address,
+        src_is_dram,
+        local_output_buffer_address,
+        dest_is_dram,
+        worker_buffer_index_semaphore_id,
+        edm_termination_infos);
+
+    ////////////////////////////////////////////////////////////////////////////
+    // Build EDMs
+    ////////////////////////////////////////////////////////////////////////////
+    auto local_edm_kernel =
+        ttnn::ccl::generate_edm_kernel(sender_program, sender_device, chip_0_edm_builder, eth_sender_core, NOC::NOC_0);
+
+    auto remote_edm_kernel = ttnn::ccl::generate_edm_kernel(
+        receiver_program, receiver_device, chip_1_edm_builder, eth_receiver_core, NOC::NOC_0);
+
+    ////////////////////////////////////////////////////////////////////////////
+    //                      Compile and Execute Application
+    ////////////////////////////////////////////////////////////////////////////
+    run_programs(programs, {sender_device, receiver_device});
+    log_info(tt::LogTest, "Reading back outputs");
+
+    bool pass = true;
+    constexpr bool enable_check = true;
+    if constexpr (enable_check) {
+        pass &= run_output_check(all_zeros, inputs, local_output_buffer) == Correctness::Correct;
+    }
+    return pass;
+}
+
+bool RunLineFabricTest(
+    std::vector<tt_metal::Device*> devices,
+
+    const size_t mcast_first_chip,
+    const size_t mcast_last_chip,
+
+    const uint32_t page_size,
+    const uint32_t num_pages_total,
+    bool src_is_dram,
+    bool dest_is_dram) {
+    std::size_t page_plus_header_size = page_size + sizeof(tt::fabric::PacketHeader);
+    std::size_t tensor_size_bytes = num_pages_total * page_size;
+
+    static constexpr std::size_t edm_buffer_size = 4096 + PACKET_HEADER_SIZE_BYTES;
+    const size_t local_chip_id = 0;
+    const size_t remote_chip_id = 1;
+    auto programs = std::vector<Program>(devices.size());
+    auto program_ptrs = std::vector<Program*>(devices.size());
+    std::transform(programs.begin(), programs.end(), program_ptrs.begin(), [](auto& program) { return &program; });
+
+    auto line_fabric = ttnn::ccl::EdmLineFabricOpInterface(devices, program_ptrs, 1);
+
+    std::vector<CoreCoord> worker_cores = {CoreCoord(0, 0)};
+
+    // Generate inputs
+    ////////////////////////////////////////////////////////////////////////////
+    //   SETUP THE INPUT CB
+    ////////////////////////////////////////////////////////////////////////////
+    BankedConfig test_config = BankedConfig{
+        .num_pages = num_pages_total,
+        .size_bytes = tensor_size_bytes,
+        .page_size_bytes = page_size,
+        .input_buffer_type = src_is_dram ? BufferType::DRAM : BufferType::L1,
+        .output_buffer_type = dest_is_dram ? BufferType::DRAM : BufferType::L1,
+        .l1_data_format = tt::DataFormat::Float16_b};
+
+    // Input buffer
+    auto [local_input_buffer, inputs] = build_input_buffer(devices[0], tensor_size_bytes, test_config);
+    auto local_input_buffer_address = local_input_buffer->address();
+
+    std::vector<uint32_t> all_zeros(inputs.size(), 0);
+    // output buffers
+    TT_ASSERT(mcast_first_chip <= mcast_last_chip, "mcast_first_chip must be less than or equal to mcast_last_chip");
+    TT_ASSERT(mcast_last_chip < devices.size(), "mcast_last_chip must be less than the number of devices");
+    std::vector<std::shared_ptr<Buffer>> output_buffers;
+    output_buffers.reserve(devices.size());
+    for (size_t i = 0; i < devices.size(); i++) {
+        if (i == 0) {
+            output_buffers.push_back(CreateBuffer(InterleavedBufferConfig{
+                devices.at(i), test_config.size_bytes, test_config.page_size_bytes, test_config.output_buffer_type}));
+        } else {
+            output_buffers.push_back(CreateBuffer(InterleavedBufferConfig{
+                devices.at(i), test_config.size_bytes, test_config.page_size_bytes, test_config.output_buffer_type}, output_buffers[0]->address())
+                );
+        }
+        tt_metal::detail::WriteToBuffer(output_buffers.back(), all_zeros);
+    }
+    auto local_output_buffer_address = output_buffers[0]->address();
+    bool all_same_addr = std::ranges::all_of(output_buffers, [local_output_buffer_address](auto const& buffer) {
+        return buffer->address() == local_output_buffer_address;
+    });
+    TT_ASSERT(all_same_addr, "All output buffers must have the same address");
+
+    ////////////////////////////////////////////////////////////////////////////
+    //   Setup Semaphores and Builders
+    ////////////////////////////////////////////////////////////////////////////
+
+    auto local_worker_fabric_semaphore_id = tt::tt_metal::CreateSemaphore(programs[0], worker_cores.at(0), 0);
+    auto local_worker_last_message_semaphore_id = tt::tt_metal::CreateSemaphore(programs[0], worker_cores.at(0), 0);
+    auto worker_buffer_index_semaphore_id = tt::tt_metal::CreateSemaphore(programs[0], worker_cores.at(0), 0);
+    ////////////////////////////////////////////////////////////////////////////
+    // Build Workers
+    ////////////////////////////////////////////////////////////////////////////
+    log_trace(tt::LogTest, "Generating local_sender -> remote_receiver workers");
+    auto const& worker_core = worker_cores.at(0);
+    log_trace(tt::LogTest, "Worker {}. On Core x={},y={}", 0, worker_core.x, worker_core.y);
+
+    const auto edm_termination_infos = line_fabric.generate_ordered_termination_info_farthest_to_nearest();
+
+    auto chip0_worker_fabric_connection = line_fabric.uniquely_connect_worker(devices[0], ttnn::ccl::EdmLineFabricOpInterface::FORWARD);
+
+    const std::size_t pages_per_send =
+        (chip0_worker_fabric_connection.buffer_size_bytes - PACKET_HEADER_SIZE_BYTES) / page_size;
+    generate_sender_worker_kernels(
+        programs[0],
+        devices[0],
+        worker_core,
+        chip0_worker_fabric_connection,
+        mcast_send{mcast_first_chip - 1, mcast_last_chip - mcast_first_chip},
+        edm_buffer_size,
+        page_plus_header_size,
+        num_pages_total,
+        pages_per_send,
+        local_worker_fabric_semaphore_id,
+        local_worker_last_message_semaphore_id,
+        local_input_buffer_address,
+        src_is_dram,
+        local_output_buffer_address,
+        dest_is_dram,
+        worker_buffer_index_semaphore_id,
+        edm_termination_infos);
+
+    ////////////////////////////////////////////////////////////////////////////
+    // Build EDM Kernels
+    ////////////////////////////////////////////////////////////////////////////
+    line_fabric.build_kernels();
+
+
+    ////////////////////////////////////////////////////////////////////////////
+    //                      Compile and Execute Application
+    ////////////////////////////////////////////////////////////////////////////
+
+    run_programs(programs, devices);
+    log_info(tt::LogTest, "Reading back outputs");
+
+    bool pass = true;
+    constexpr bool enable_check = true;
+    if constexpr (enable_check) {
+
+        // Check all output buffers. Make sure only the buffers in the mcast range are
+        // non-zero. All other buffers outside the range should be zero filled
+        TT_ASSERT(
+            !std::all_of(inputs.begin(), inputs.end(), [](uint32_t x) { return x == 0; }),
+            "Input buffer expected to not be all 0");
+        for (size_t i = 0; i < output_buffers.size(); i++) {
+            bool compare_with_input = (mcast_first_chip <= i && i <= mcast_last_chip);
+            auto &golden_tensor = compare_with_input ? inputs : all_zeros;
+            pass &= run_output_check(all_zeros, golden_tensor, output_buffers.at(i)) == Correctness::Correct;
+        }
+    }
+
+    return pass;
+}
+
+// RESUME HERE AND IMPLEMENT MCAST TEST
+int TestLineFabricEntrypoint(
+    const size_t mcast_first_chip,
+    const size_t mcast_last_chip,
+    const uint32_t page_size,
+    const uint32_t num_pages_total,
+    const bool src_is_dram,
+    const bool dest_is_dram) {
+    // argv[0]: program
+    // argv[1]: buffer_size_bytes
+    // argv[2]: num_loops
+
+    auto arch = tt::get_arch_from_string(tt::test_utils::get_umd_arch_name());
+    auto num_devices = tt::tt_metal::GetNumAvailableDevices();
+    if (num_devices < 4) {
+        log_info("This test can only be run on N300 devices");
+        return 0;
+    }
+    if (arch == tt::ARCH::GRAYSKULL) {
+        log_info("Test must be run on WH");
+        return 0;
+    }
+
+    T3000TestDevice test_fixture;
+
+    // build a line of devices
+    std::vector<Device*> devices = {
+        test_fixture.devices_.at(0),
+        test_fixture.devices_.at(1),
+        test_fixture.devices_.at(2),
+        test_fixture.devices_.at(3)};
+
+    bool success = false;
+    try {
+        success = RunLineFabricTest(
+            devices,
+            // fabric_hops,
+
+            mcast_first_chip,
+            mcast_last_chip,
+
+            page_size,
+            num_pages_total,
+            src_is_dram,
+            dest_is_dram);
+
+    } catch (std::exception& e) {
+        log_error("Caught exception: {}", e.what());
+        test_fixture.TearDown();
+        return -1;
+    }
+
+    test_fixture.TearDown();
+
+    return success ? 0 : -1;
+}
+
+int TestLoopbackEntrypoint(
+    const uint32_t page_size, const uint32_t num_pages_total, const bool src_is_dram, const bool dest_is_dram) {
+    // argv[0]: program
+    // argv[1]: buffer_size_bytes
+    // argv[2]: num_loops
+
+    auto arch = tt::get_arch_from_string(tt::test_utils::get_umd_arch_name());
+    auto num_devices = tt::tt_metal::GetNumAvailableDevices();
+    if (num_devices < 4) {
+        log_info("This test can only be run on N300 devices");
+        return 0;
+    }
+    if (arch == tt::ARCH::GRAYSKULL) {
+        log_info("Test must be run on WH");
+        return 0;
+    }
+
+    T3000TestDevice test_fixture;
+
+    const auto& device_0 = test_fixture.devices_.at(0);
+
+    auto const& active_eth_cores = device_0->get_active_ethernet_cores(true);
+    auto eth_sender_core_iter = active_eth_cores.begin();
+    auto eth_sender_core_iter_end = active_eth_cores.end();
+    chip_id_t device_id = std::numeric_limits<chip_id_t>::max();
+    tt_xy_pair eth_receiver_core;
+    bool initialized = false;
+    tt_xy_pair eth_sender_core;
+    do {
+        TT_FATAL(eth_sender_core_iter != eth_sender_core_iter_end, "Error");
+        std::tie(device_id, eth_receiver_core) = device_0->get_connected_ethernet_core(*eth_sender_core_iter);
+        eth_sender_core = *eth_sender_core_iter;
+        eth_sender_core_iter++;
+    } while (device_id != 1);
+    TT_ASSERT(device_id == 1);
+    const auto& device_1 = test_fixture.devices_.at(device_id);
+
+    bool success = false;
+    try {
+        success = RunLoopbackTest(
+            device_0,
+            device_1,
+
+            eth_sender_core,
+            eth_receiver_core,
+
+            page_size,
+            num_pages_total,
+            src_is_dram,
+            dest_is_dram);
+    } catch (std::exception& e) {
+        log_error("Caught exception: {}", e.what());
+        test_fixture.TearDown();
+        return -1;
+    }
+
+    test_fixture.TearDown();
+
+    return success ? 0 : -1;
+}
+
+////////////////////////////////////////////////////////////////////
+///  MESSAGE COUNT TERMINATION MODE
+////////////////////////////////////////////////////////////////////
+
+TEST(WorkerFabricEdmDatapath, FabricEDMLoopback_With_Workers_SingleMessage) {
+    const uint32_t page_size = 2048;
+    const uint32_t num_pages_total = 1;
+    const bool src_is_dram = true;
+    const bool dest_is_dram = true;
+
+    auto result = TestLoopbackEntrypoint(page_size, num_pages_total, src_is_dram, dest_is_dram);
+    ASSERT_EQ(result, 0);
+}
+
+// Will wrapp sender but not receiver buffers
+TEST(WorkerFabricEdmDatapath, FabricEDMLoopback_With_Workers_2_messages) {
+    const uint32_t page_size = 2048;
+    const uint32_t num_pages_total = 2;
+    const bool src_is_dram = true;
+    const bool dest_is_dram = true;
+
+    auto result = TestLoopbackEntrypoint(page_size, num_pages_total, src_is_dram, dest_is_dram);
+    ASSERT_EQ(result, 0);
+}
+// Will wrapp sender but not receiver buffers
+TEST(WorkerFabricEdmDatapath, FabricEDMLoopback_With_Workers_10_messages) {
+    const uint32_t page_size = 2048;
+    const uint32_t num_pages_total = 10;
+    const bool src_is_dram = true;
+    const bool dest_is_dram = true;
+
+    auto result = TestLoopbackEntrypoint(page_size, num_pages_total, src_is_dram, dest_is_dram);
+    ASSERT_EQ(result, 0);
+}
+
+// Will wrapp sender and receiver buffers
+TEST(WorkerFabricEdmDatapath, FabricEDMLoopback_With_Workers_20_messages) {
+    const uint32_t page_size = 2048;
+    const uint32_t num_pages_total = 20;
+    const bool src_is_dram = true;
+    const bool dest_is_dram = true;
+
+    auto result = TestLoopbackEntrypoint(page_size, num_pages_total, src_is_dram, dest_is_dram);
+    ASSERT_EQ(result, 0);
+}
+
+TEST(WorkerFabricEdmDatapath, FabricEDMLoopback_With_Workers) {
+    const uint32_t page_size = 2048;
+    const uint32_t num_pages_total = 100000;
+    const bool src_is_dram = true;
+    const bool dest_is_dram = true;
+
+    auto result = TestLoopbackEntrypoint(page_size, num_pages_total, src_is_dram, dest_is_dram);
+    ASSERT_EQ(result, 0);
+}
+
+TEST(WorkerFabricEdmDatapath, LineFabricMcast_SingleMessage_SingleSource) {
+    const uint32_t page_size = 2048;
+    const uint32_t num_pages_total = 1;
+    const bool src_is_dram = true;
+    const bool dest_is_dram = true;
+    const size_t mcast_first_chip = 1;
+    const size_t mcast_last_chip = 3;
+
+    auto result = TestLineFabricEntrypoint(
+        mcast_first_chip, mcast_last_chip, page_size, num_pages_total, src_is_dram, dest_is_dram);
+
+    ASSERT_EQ(result, 0);
+}
+
+// Non-functional on harvested parts. Needs testing on unharvested parts.
+TEST(WorkerFabricEdmDatapath, LineFabricMcast_ManyMessages_SingleSource) {
+    const uint32_t page_size = 2048;
+    const uint32_t num_pages_total = 10000;
+    const bool src_is_dram = true;
+    const bool dest_is_dram = true;
+    const size_t mcast_first_chip = 1;
+    const size_t mcast_last_chip = 3;
+
+    auto result = TestLineFabricEntrypoint(
+        mcast_first_chip, mcast_last_chip, page_size, num_pages_total, src_is_dram, dest_is_dram);
+
+    ASSERT_EQ(result, 0);
+}
+
+// EnablePersistentKernelCache
diff --git a/tt_metal/hw/inc/blackhole/noc_nonblocking_api.h b/tt_metal/hw/inc/blackhole/noc_nonblocking_api.h
index 6fd84212e02..2e479a77370 100644
--- a/tt_metal/hw/inc/blackhole/noc_nonblocking_api.h
+++ b/tt_metal/hw/inc/blackhole/noc_nonblocking_api.h
@@ -428,6 +428,7 @@ inline __attribute__((always_inline)) void noc_fast_write_dw_inline(
                              (posted ? 0x0 : NOC_CMD_RESP_MARKED);
 
     uint32_t be32 = be;
+    // If we're given a misaligned address, don't write to the bytes in the word below the address
     uint32_t be_shift = (dest_addr & (NOC_WORD_BYTES - 1));
     be32 = (be32 << be_shift);
 
diff --git a/tt_metal/hw/inc/ethernet/dataflow_api.h b/tt_metal/hw/inc/ethernet/dataflow_api.h
index 8901021fac5..5b0ddafb995 100644
--- a/tt_metal/hw/inc/ethernet/dataflow_api.h
+++ b/tt_metal/hw/inc/ethernet/dataflow_api.h
@@ -203,6 +203,23 @@ void eth_send_bytes_over_channel_payload_only(
     }
 }
 
+// Calls the unsafe variant of eth_send_packet under the hood which is guaranteed not to context switch
+// We want this for code size reasons
+FORCE_INLINE
+void eth_send_bytes_over_channel_payload_only_unsafe(
+    uint32_t src_addr,
+    uint32_t dst_addr,
+    uint32_t num_bytes,
+    uint32_t num_bytes_per_send = 16,
+    uint32_t num_bytes_per_send_word_size = 1) {
+    uint32_t num_bytes_sent = 0;
+    while (num_bytes_sent < num_bytes) {
+        internal_::eth_send_packet_unsafe(
+            0, ((num_bytes_sent + src_addr) >> 4), ((num_bytes_sent + dst_addr) >> 4), num_bytes_per_send_word_size);
+        num_bytes_sent += num_bytes_per_send;
+    }
+}
+
 /*
  * Sends the write completion signal to the receiver ethernet core, for transfers where the payload was already sent.
  * The second half of a full ethernet send.
diff --git a/tt_metal/hw/inc/ethernet/tunneling.h b/tt_metal/hw/inc/ethernet/tunneling.h
index b6e4cdd0bd5..043a133eeb0 100644
--- a/tt_metal/hw/inc/ethernet/tunneling.h
+++ b/tt_metal/hw/inc/ethernet/tunneling.h
@@ -26,7 +26,11 @@ struct eth_channel_sync_t {
     // First level ack that signals to sender that the payload was received by receiver,
     // indicating that sender can reuse the sender side buffer safely.
     volatile uint32_t receiver_ack;
-    uint32_t reserved_1;
+
+    // Logical channel ID tagged by the sender. Not required when channels
+    // are connected 1:1 (single producer - single consumer)
+    volatile uint32_t src_id;
+
     uint32_t reserved_2;
 };
 
@@ -66,6 +70,15 @@ void eth_send_packet(uint32_t q_num, uint32_t src_word_addr, uint32_t dest_word_
     eth_txq_reg_write(q_num, ETH_TXQ_CMD, ETH_TXQ_CMD_START_DATA);
 }
 
+FORCE_INLINE
+void eth_send_packet_unsafe(uint32_t q_num, uint32_t src_word_addr, uint32_t dest_word_addr, uint32_t num_words) {
+    ASSERT(eth_txq_reg_read(q_num, ETH_TXQ_CMD) == 0);
+    eth_txq_reg_write(q_num, ETH_TXQ_TRANSFER_START_ADDR, src_word_addr << 4);
+    eth_txq_reg_write(q_num, ETH_TXQ_DEST_ADDR, dest_word_addr << 4);
+    eth_txq_reg_write(q_num, ETH_TXQ_TRANSFER_SIZE_BYTES, num_words << 4);
+    eth_txq_reg_write(q_num, ETH_TXQ_CMD, ETH_TXQ_CMD_START_DATA);
+}
+
 FORCE_INLINE
 void eth_write_remote_reg(uint32_t q_num, uint32_t reg_addr, uint32_t val) {
     while (eth_txq_reg_read(q_num, ETH_TXQ_CMD) != 0) {
diff --git a/tt_metal/hw/inc/grayskull/noc_nonblocking_api.h b/tt_metal/hw/inc/grayskull/noc_nonblocking_api.h
index 2fc64b5351c..c8a1b71303c 100644
--- a/tt_metal/hw/inc/grayskull/noc_nonblocking_api.h
+++ b/tt_metal/hw/inc/grayskull/noc_nonblocking_api.h
@@ -281,6 +281,7 @@ inline __attribute__((always_inline)) void noc_fast_write_dw_inline(uint32_t noc
     (posted ? 0x0 : NOC_CMD_RESP_MARKED);
 
   uint32_t be32 = be;
+  // If we're given a misaligned address, don't write to the bytes in the word below the address
   uint32_t be_shift = (dest_addr & (NOC_WORD_BYTES-1));
   be32 = (be32 << be_shift);
 
diff --git a/tt_metal/hw/inc/wormhole/noc_nonblocking_api.h b/tt_metal/hw/inc/wormhole/noc_nonblocking_api.h
index 5eecc54540d..647ec3e5f89 100644
--- a/tt_metal/hw/inc/wormhole/noc_nonblocking_api.h
+++ b/tt_metal/hw/inc/wormhole/noc_nonblocking_api.h
@@ -296,6 +296,7 @@ inline __attribute__((always_inline)) void noc_fast_write_dw_inline(uint32_t noc
 
   uint32_t be32 = be;
   uint32_t be_shift = (dest_addr & (NOC_WORD_BYTES-1));
+  // If we're given a misaligned address, don't write to the bytes in the word below the address
   be32 = (be32 << be_shift);
 
   while (!noc_cmd_buf_ready(noc, cmd_buf));
diff --git a/ttnn/CMakeLists.txt b/ttnn/CMakeLists.txt
index 4d286fc692c..8dbf03025a2 100644
--- a/ttnn/CMakeLists.txt
+++ b/ttnn/CMakeLists.txt
@@ -10,6 +10,7 @@ set(ALL_TTNN_SRCS
     ${CMAKE_CURRENT_SOURCE_DIR}/cpp/ttnn/graph/graph_processor.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/cpp/ttnn/graph/graph_trace_utils.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/cpp/ttnn/graph/graph_pybind.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/cpp/ttnn/operations/ccl/erisc_datamover_builder.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/cpp/ttnn/operations/ccl/all_gather/all_gather.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/cpp/ttnn/operations/ccl/all_gather/all_gather_pybind.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/cpp/ttnn/operations/ccl/all_gather/device/all_gather_op.cpp
diff --git a/ttnn/cpp/ttnn/operations/ccl/ccl_common.cpp b/ttnn/cpp/ttnn/operations/ccl/ccl_common.cpp
index 92e8b46e805..6c49072b809 100644
--- a/ttnn/cpp/ttnn/operations/ccl/ccl_common.cpp
+++ b/ttnn/cpp/ttnn/operations/ccl/ccl_common.cpp
@@ -8,6 +8,7 @@
 #include <cmath>
 
 #include "ccl_host_datastructures.hpp"
+#include "ttnn/cpp/ttnn/operations/ccl/erisc_datamover_builder.hpp"
 
 namespace ttnn {
 namespace ccl {
@@ -171,7 +172,7 @@ void generate_edm_kernels_for_ring_or_linear_topology(
             auto eth_sender_core = topology_config.eth_sender_cores.at(i);
             log_trace(tt::LogOp, "EDM CLOCKWISE KERNEL RT ARGS: ");
             auto eth_sender_kernel =
-                ccl::generate_edm_kernel(program, device, clockwise_edm_builders.at(i), eth_sender_core, sender_noc);
+                generate_edm_kernel(program, device, clockwise_edm_builders.at(i), eth_sender_core, sender_noc);
             log_trace(
                 tt::LogOp,
                 "RingIndex: {}. Link {}. Clockwise EDM Core (x={},y={})",
@@ -185,7 +186,7 @@ void generate_edm_kernels_for_ring_or_linear_topology(
         if (is_counter_clockwise_direction_edm_enabled) {
             log_trace(tt::LogOp, "EDM COUNTER CLOCKWISE KERNEL RT ARGS: ");
             auto eth_receiver_core = topology_config.eth_receiver_cores.at(i);
-            auto eth_receiver_kernel = ccl::generate_edm_kernel(
+            auto eth_receiver_kernel = generate_edm_kernel(
                 program, device, counter_clockwise_edm_builders.at(i), eth_receiver_core, receiver_noc);
             log_trace(
                 tt::LogOp,
@@ -198,35 +199,36 @@ void generate_edm_kernels_for_ring_or_linear_topology(
     }
 }
 
-
-KernelHandle generate_edm_kernel(
-   tt::tt_metal::Program& program,
+template <typename EDMBuilder>
+KernelHandle generate_edm_kernel_impl(
+    tt::tt_metal::Program& program,
     Device const* device,
-    ccl::EriscDatamoverBuilder const& edm_builder,
+    EDMBuilder const& edm_builder,
+    std::string const& kernel_path,
     CoreCoord const& eth_core,
     NOC noc_id) {
     edm_builder.dump_to_log();
 
-    std::vector<uint32_t> const& edm_clockwise_kernel_rt_args = edm_builder.emit_runtime_args();
+    std::vector<uint32_t> const edm_kernel_rt_args = edm_builder.get_runtime_args();
     // Ethernet Kernels
-    std::vector<uint32_t> eth_sender_ct_args = edm_builder.emit_compile_time_args();
+    std::vector<uint32_t> const eth_sender_ct_args = edm_builder.get_compile_time_args();
     log_trace(tt::LogOp, "EDM core (x={},y={}):", eth_core.x, eth_core.y);
     log_trace(tt::LogOp, "CT ARGS:");
     for (auto const& s : eth_sender_ct_args) {
         log_trace(tt::LogOp, "\t{}", s);
     }
 
-    auto eth_sender_kernel =tt::tt_metal::CreateKernel(
+    auto eth_sender_kernel = tt::tt_metal::CreateKernel(
         program,
-        "ttnn/cpp/ttnn/operations/ccl/kernels/edm/erisc_datamover.cpp",
+        kernel_path,
         eth_core,
-       tt::tt_metal::EthernetConfig{.noc = noc_id, .compile_args = eth_sender_ct_args});
+        tt::tt_metal::EthernetConfig{.noc = noc_id, .compile_args = eth_sender_ct_args});
 
-   tt::tt_metal::SetRuntimeArgs(program, eth_sender_kernel, eth_core, edm_clockwise_kernel_rt_args);
+    tt::tt_metal::SetRuntimeArgs(program, eth_sender_kernel, eth_core, edm_kernel_rt_args);
 
     std::stringstream ss;
     ss << "EDM ARGS:\n";
-    for (auto const& s : edm_clockwise_kernel_rt_args) {
+    for (auto const& s : edm_kernel_rt_args) {
         ss << "\t" << s << "\n";
     }
     log_trace(tt::LogOp, "{}", ss.str());
@@ -234,6 +236,31 @@ KernelHandle generate_edm_kernel(
     return eth_sender_kernel;
 }
 
+KernelHandle generate_edm_kernel(
+    tt::tt_metal::Program& program,
+    Device const* device,
+    ccl::FabricEriscDatamoverBuilder const& edm_builder,
+    CoreCoord const& eth_core,
+    NOC noc_id) {
+    return generate_edm_kernel_impl(
+        program,
+        device,
+        edm_builder,
+        "ttnn/cpp/ttnn/operations/ccl/kernels/edm_fabric/fabric_erisc_datamover.cpp",
+        eth_core,
+        noc_id);
+}
+
+KernelHandle generate_edm_kernel(
+    tt::tt_metal::Program& program,
+    Device const* device,
+    ccl::EriscDatamoverBuilder const& edm_builder,
+    CoreCoord const& eth_core,
+    NOC noc_id) {
+    return generate_edm_kernel_impl(
+        program, device, edm_builder, "ttnn/cpp/ttnn/operations/ccl/kernels/edm/erisc_datamover.cpp", eth_core, noc_id);
+}
+
 ccl::EriscDatamoverBuilder create_erisc_datamover_builder(
     std::size_t num_channels,
     uint32_t page_size,
diff --git a/ttnn/cpp/ttnn/operations/ccl/ccl_common.hpp b/ttnn/cpp/ttnn/operations/ccl/ccl_common.hpp
index 51228970005..3f71a810bb2 100644
--- a/ttnn/cpp/ttnn/operations/ccl/ccl_common.hpp
+++ b/ttnn/cpp/ttnn/operations/ccl/ccl_common.hpp
@@ -18,6 +18,9 @@
 namespace ttnn {
 namespace ccl {
 
+class FabricEriscDatamoverBuilder;
+class EriscDatamoverBuilder;
+
 std::tuple<uint32_t, std::optional<chip_id_t>, std::optional<chip_id_t>> get_device_index_and_sender_receiver_ids(
     const Tensor& input_tensor,
     const std::vector<Device*>& devices,
@@ -470,7 +473,14 @@ class InterleavedRingAllGatherTensorSlicer : public LegacyCclTensorSlicer {
 KernelHandle generate_edm_kernel(
    tt::tt_metal::Program& program,
     Device const* device,
-    ccl::EriscDatamoverBuilder const& edm_builder,
+    FabricEriscDatamoverBuilder const& edm_builder,
+    CoreCoord const& eth_core,
+    NOC noc_id);
+
+KernelHandle generate_edm_kernel(
+   tt::tt_metal::Program& program,
+    Device const* device,
+    EriscDatamoverBuilder const& edm_builder,
     CoreCoord const& eth_core,
     NOC noc_id);
 
diff --git a/ttnn/cpp/ttnn/operations/ccl/ccl_host_datastructures.hpp b/ttnn/cpp/ttnn/operations/ccl/ccl_host_datastructures.hpp
index 8cfff90bd97..7a6ad6ddcf4 100644
--- a/ttnn/cpp/ttnn/operations/ccl/ccl_host_datastructures.hpp
+++ b/ttnn/cpp/ttnn/operations/ccl/ccl_host_datastructures.hpp
@@ -236,7 +236,7 @@ class EriscDatamoverBuilder {
     }
 
     [[nodiscard]]
-    std::vector<uint32_t> emit_compile_time_args() const {
+    std::vector<uint32_t> get_compile_time_args() const {
         return std::vector<uint32_t>{
             static_cast<uint32_t>(this->enable_sender ? 1 : 0),
             static_cast<uint32_t>(this->enable_receiver ? 1 : 0),
@@ -252,7 +252,7 @@ class EriscDatamoverBuilder {
     }
 
     [[nodiscard]]
-    std::vector<uint32_t> emit_runtime_args() const {
+    std::vector<uint32_t> get_runtime_args() const {
         std::vector<uint32_t> args;
         uint32_t size = 3 + active_channels.size() * 6;
         for (auto const& channel : active_channels) {
@@ -289,7 +289,7 @@ class EriscDatamoverBuilder {
     }
 
     void dump_to_log() const {
-        auto const& rt_args = this->emit_runtime_args();
+        auto const rt_args = this->get_runtime_args();
         log_trace(tt::LogOp, "EDM RT Args:");
         for (auto const& arg : rt_args) {
             log_trace(tt::LogOp, "\t{}", arg);
diff --git a/ttnn/cpp/ttnn/operations/ccl/erisc_datamover_builder.cpp b/ttnn/cpp/ttnn/operations/ccl/erisc_datamover_builder.cpp
new file mode 100644
index 00000000000..3f6c480ef48
--- /dev/null
+++ b/ttnn/cpp/ttnn/operations/ccl/erisc_datamover_builder.cpp
@@ -0,0 +1,417 @@
+// SPDX-FileCopyrightText: © 2024 Tenstorrent Inc.
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#include "ttnn/cpp/ttnn/operations/ccl/erisc_datamover_builder.hpp"
+
+#include "common/math.hpp"
+#include "erisc_datamover_builder.hpp"
+#include "eth_l1_address_map.h"
+#include "tt_metal/common/assert.hpp"
+#include "ttnn/operations/ccl/ccl_common.hpp"
+#include "ttnn/operations/math.hpp"
+#include "ttnn/cpp/ttnn/operations/ccl/kernels/edm_fabric/fabric_edm_packet_header.hpp"
+
+#include "tt_metal/host_api.hpp"
+#include "tt_metal/impl/device/device.hpp"
+#include "tt_metal/impl/program/program.hpp"
+
+#include <vector>
+namespace ttnn::ccl {
+
+
+// The channel structure is as follows:
+//              &header->  |----------------| channel_base_address
+//                         |    header      |
+//             &payload->  |----------------|
+//                         |                |
+//                         |    payload     |
+//                         |                |
+//        &channel_sync->  |----------------|
+//                         |  channel_sync  |
+//                         ------------------
+//
+
+FabricEriscDatamoverConfig::FabricEriscDatamoverConfig(
+    std::size_t channel_buffer_size_bytes, std::size_t sender_ratio_size, std::size_t receiver_ratio_size) {
+    const size_t min_buffer_size = sizeof(tt::fabric::PacketHeader) + 2 * FabricEriscDatamoverConfig::eth_channel_sync_size;
+    TT_FATAL(channel_buffer_size_bytes >= min_buffer_size, "FabricEriscDatamoverConfig was constructed with `channel_buffer_size_bytes` argument set smaller than minimum size of {}", min_buffer_size);
+    const std::size_t channel_buffer_size_with_channel_sync =
+        channel_buffer_size_bytes + sizeof(tt::fabric::PacketHeader); // + 16 // sizeof(tt::fabric::PacketHeader);
+
+    this->channel_buffer_size_bytes = channel_buffer_size_bytes;
+    this->channel_buffer_size_bytes_with_channel_sync = channel_buffer_size_with_channel_sync;
+    const std::size_t total_ratio_count = 2 * sender_ratio_size + receiver_ratio_size;
+    this->sender_0_channel_size_bytes = tt::round_down(
+        (available_channel_buffering_space / total_ratio_count) * sender_ratio_size,
+        channel_buffer_size_with_channel_sync);
+    this->sender_0_num_buffers = this->sender_0_channel_size_bytes / channel_buffer_size_with_channel_sync;
+    this->sender_1_channel_size_bytes = tt::round_down(
+        (available_channel_buffering_space / total_ratio_count) * sender_ratio_size,
+        channel_buffer_size_with_channel_sync);
+    this->sender_1_num_buffers = this->sender_1_channel_size_bytes / channel_buffer_size_with_channel_sync;
+    this->receiver_channel_size_bytes = tt::round_down(
+        (available_channel_buffering_space / total_ratio_count) * receiver_ratio_size,
+        channel_buffer_size_with_channel_sync);
+    this->receiver_num_buffers = this->receiver_channel_size_bytes / channel_buffer_size_with_channel_sync;
+
+    this->sender_0_channel_base_address = buffer_region_start;
+    this->sender_1_channel_base_address = this->sender_0_channel_base_address + this->sender_0_channel_size_bytes;
+    this->receiver_channel_base_address = this->sender_1_channel_base_address + this->sender_1_channel_size_bytes;
+
+    log_trace(tt::LogOp, "Sender 0 channel_start: {}", this->sender_0_channel_base_address);
+    log_trace(tt::LogOp, "Sender 1 channel_start: {}", this->sender_1_channel_base_address);
+    log_trace(tt::LogOp, "Receiver channel_start: {}", this->receiver_channel_base_address);
+
+    static constexpr size_t total_num_channels = 3; // sender0, sender1, receiver
+    const size_t max_channel_buffer_size = (available_channel_buffering_space / total_num_channels) - FabricEriscDatamoverConfig::eth_channel_sync_size - sizeof(tt::fabric::PacketHeader);
+    TT_FATAL(channel_buffer_size_bytes <= max_channel_buffer_size, "Specified size of `channel_buffer_size_bytes` was too large. Maximum allowable size is {} B", max_channel_buffer_size);
+    TT_FATAL(this->sender_0_channel_size_bytes > 0, "Internal error when computing `sender_0_channel_size_bytes` which was computed to be size 0");
+    TT_FATAL(this->sender_1_channel_size_bytes > 0, "Internal error when computing `sender_1_channel_size_bytes` which was computed to be size 0");
+    TT_FATAL(this->receiver_channel_size_bytes > 0, "Internal error when computing `receiver_channel_size_bytes` which was computed to be size 0");
+    TT_FATAL(
+        this->sender_0_channel_size_bytes + this->sender_1_channel_size_bytes + this->receiver_channel_size_bytes <=
+        this->available_channel_buffering_space, "Internal error when computing channel sizes. Total channel size exceeds available space");
+    TT_FATAL(
+        this->receiver_channel_base_address + this->receiver_channel_size_bytes <
+        eth_l1_mem::address_map::MAX_L1_LOADING_SIZE, "Internal error - channel buffers spilled past the end of usable L1 region.");
+}
+
+FabricEriscDatamoverBuilder::FabricEriscDatamoverBuilder(
+    CoreCoord const& my_eth_core_logical,
+    size_t my_noc_x,
+    size_t my_noc_y,
+    size_t my_chip_id,
+    size_t peer_chip_id,
+
+    std::optional<size_t> receiver_channel_downstream_flow_control_semaphore_id,
+    size_t sender_channel_0_flow_control_semaphore_id,
+    size_t sender_channel_1_flow_control_semaphore_id,
+    size_t sender_channel_0_connection_semaphore_id,
+    size_t sender_channel_1_connection_semaphore_id,
+    size_t sender_channel_0_buffer_index_semaphore_id,
+    size_t sender_channel_1_buffer_index_semaphore_id,
+
+    FabricEriscDatamoverConfig const& config) :
+    my_eth_core_logical(my_eth_core_logical),
+    my_noc_x(my_noc_x),
+    my_noc_y(my_noc_y),
+    config(config),
+    my_chip_id(my_chip_id),
+    peer_chip_id(peer_chip_id),
+    handshake_address(tt::round_up(eth_l1_mem::address_map::ERISC_L1_UNRESERVED_BASE, FabricEriscDatamoverConfig::eth_channel_sync_size)),
+    channel_buffer_size(config.channel_buffer_size_bytes),
+    sender_0_num_buffers(config.sender_0_num_buffers),
+    sender_1_num_buffers(config.sender_1_num_buffers),
+    receiver_num_buffers(config.receiver_num_buffers),
+
+    // this is the receiver channel's local sem for flow controlling with downstream fabric sender
+    receiver_channel_downstream_flow_control_semaphore_id(receiver_channel_downstream_flow_control_semaphore_id),
+    sender_channel_0_flow_control_semaphore_id(sender_channel_0_flow_control_semaphore_id),
+    sender_channel_1_flow_control_semaphore_id(sender_channel_1_flow_control_semaphore_id),
+    sender_channel_0_connection_semaphore_id(sender_channel_0_connection_semaphore_id),
+    sender_channel_1_connection_semaphore_id(sender_channel_1_connection_semaphore_id),
+    sender_channel_0_buffer_index_semaphore_id(sender_channel_0_buffer_index_semaphore_id),
+    sender_channel_1_buffer_index_semaphore_id(sender_channel_1_buffer_index_semaphore_id),
+
+    receiver_channel_local_buffer_index_addr(FabricEriscDatamoverConfig::receiver_channel_local_buffer_index_addr),
+
+    local_sender_channel_0_buffer_address(config.sender_0_channel_base_address),
+    local_sender_channel_0_connection_info_addr(
+        FabricEriscDatamoverConfig::sender_channel_0_worker_connection_info_address),
+    local_sender_channel_1_buffer_address(config.sender_1_channel_base_address),
+    local_sender_channel_1_connection_info_addr(
+        FabricEriscDatamoverConfig::sender_channel_1_worker_connection_info_address),
+    local_receiver_channel_buffer_address(config.receiver_channel_base_address),
+
+    termination_signal_ptr(FabricEriscDatamoverConfig::termination_signal_address) {}
+
+std::vector<uint32_t> FabricEriscDatamoverBuilder::get_compile_time_args() const {
+    const bool is_handshake_master = this->my_chip_id < this->peer_chip_id;
+    TT_ASSERT(this->my_chip_id != this->peer_chip_id);
+    TT_ASSERT(
+        this->sender_0_num_buffers == this->sender_1_num_buffers);  //, "Implementation expects sender_0_num_buffers and
+                                                                    // sender_1_num_buffers to be the same for now");
+    log_trace(tt::LogTest, "Sender 0 num buffers: {}", this->sender_0_num_buffers);
+    log_trace(tt::LogTest, "Sender 0 channel address: {}", this->local_sender_channel_0_buffer_address);
+    log_trace(tt::LogTest, "Sender 1 num buffers: {}", this->sender_1_num_buffers);
+    log_trace(tt::LogTest, "Sender 1 channel address: {}", this->local_sender_channel_1_buffer_address);
+    log_trace(tt::LogTest, "Receiver num buffers: {}", this->receiver_num_buffers);
+    log_trace(tt::LogTest, "Receiver channel address: {}", this->local_receiver_channel_buffer_address);
+    return std::vector<uint32_t>{
+        is_handshake_master,
+        this->handshake_address,
+        this->channel_buffer_size,
+
+        this->sender_0_num_buffers,
+        this->receiver_num_buffers,
+
+        config.sender_0_channel_base_address,
+        FabricEriscDatamoverConfig::sender_channel_0_worker_connection_info_address,
+        config.sender_1_channel_base_address,
+        FabricEriscDatamoverConfig::sender_channel_1_worker_connection_info_address,
+        config.receiver_channel_base_address,
+        config.receiver_channel_base_address,
+
+        config.sender_0_channel_base_address,
+        config.sender_1_channel_base_address,
+
+        this->termination_signal_ptr};
+}
+
+std::vector<uint32_t> FabricEriscDatamoverBuilder::get_runtime_args() const {
+    return std::vector<uint32_t>{
+        this->sender_channel_0_connection_semaphore_id,
+        this->sender_channel_1_connection_semaphore_id,
+        this->sender_channel_0_buffer_index_semaphore_id,
+        this->downstream_sender_channel_buffer_index_semaphore_id.value_or(-1),
+        this->downstream_edm_buffer_base_address != std::nullopt,
+        this->downstream_edm_buffer_base_address.value_or(0),
+        this->downstream_edm_noc_x.value_or(0),
+        this->downstream_edm_noc_y.value_or(0),
+        this->downstream_edm_semaphore_address.value_or(-1),
+        this->downstream_edm_worker_registration_address.value_or(0),
+        this->downstream_edm_worker_location_info_address.value_or(0),
+        this->receiver_channel_local_buffer_index_addr,
+        // this is the receiver channel's local sem for flow controlling with downstream fabric sender
+        this->receiver_channel_downstream_flow_control_semaphore_id.value_or(0),
+        this->sender_channel_0_flow_control_semaphore_id,
+        this->sender_channel_1_flow_control_semaphore_id
+    };
+}
+
+FabricEriscDatamoverBuilder FabricEriscDatamoverBuilder::build(
+    Device* device,
+    Program& program,
+    CoreCoord const& ethernet_core,
+    chip_id_t local_chip_id,
+    chip_id_t peer_chip_id,
+    FabricEriscDatamoverConfig const& config) {
+    std::optional<size_t> receiver_channel_downstream_flow_control_semaphore_id = std::nullopt;
+    auto sender_channel_0_flow_control_semaphore_id =
+        tt::tt_metal::CreateSemaphore(program, ethernet_core, 0, CoreType::ETH);
+    auto sender_channel_1_flow_control_semaphore_id =
+        tt::tt_metal::CreateSemaphore(program, ethernet_core, 0, CoreType::ETH);
+    auto sender_channel_0_connection_semaphore_id =
+        tt::tt_metal::CreateSemaphore(program, ethernet_core, 0, CoreType::ETH);
+    auto sender_channel_1_connection_semaphore_id =
+        tt::tt_metal::CreateSemaphore(program, ethernet_core, 0, CoreType::ETH);
+    auto sender_channel_0_buffer_index_semaphore_id =
+        tt::tt_metal::CreateSemaphore(program, ethernet_core, 0, CoreType::ETH);
+    auto sender_channel_1_buffer_index_semaphore_id =
+        tt::tt_metal::CreateSemaphore(program, ethernet_core, 0, CoreType::ETH);
+
+    return FabricEriscDatamoverBuilder(
+        ethernet_core,
+        device->ethernet_core_from_logical_core(ethernet_core).x,
+        device->ethernet_core_from_logical_core(ethernet_core).y,
+        local_chip_id,
+        peer_chip_id,
+
+        receiver_channel_downstream_flow_control_semaphore_id,
+        sender_channel_0_flow_control_semaphore_id,
+        sender_channel_1_flow_control_semaphore_id,
+        sender_channel_0_connection_semaphore_id,
+        sender_channel_1_connection_semaphore_id,
+        sender_channel_0_buffer_index_semaphore_id,
+        sender_channel_1_buffer_index_semaphore_id,
+
+        config);
+}
+
+SenderWorkerAdapterSpec FabricEriscDatamoverBuilder::build_connection_to_worker_channel() const {
+    return SenderWorkerAdapterSpec {
+        this->my_noc_x,
+        this->my_noc_y,
+        this->local_sender_channel_0_buffer_address,
+        this->sender_0_num_buffers,
+        this->sender_channel_0_flow_control_semaphore_id,
+        this->sender_channel_0_connection_semaphore_id,
+        FabricEriscDatamoverConfig::sender_channel_0_worker_connection_info_address,
+        this->config.channel_buffer_size_bytes,
+        this->sender_channel_0_buffer_index_semaphore_id
+    };
+}
+
+
+SenderWorkerAdapterSpec FabricEriscDatamoverBuilder::build_connection_to_fabric_channel() const {
+    return SenderWorkerAdapterSpec {
+        this->my_noc_x,
+        this->my_noc_y,
+        this->local_sender_channel_1_buffer_address,
+        this->sender_1_num_buffers,
+        this->sender_channel_1_flow_control_semaphore_id,
+        this->sender_channel_1_connection_semaphore_id,
+        FabricEriscDatamoverConfig::sender_channel_1_worker_connection_info_address,
+        this->config.channel_buffer_size_bytes,
+        this->sender_channel_1_buffer_index_semaphore_id
+    };
+}
+
+void FabricEriscDatamoverBuilder::connect_to_downstream_edm(FabricEriscDatamoverBuilder const& downstream_edm) {
+    auto const adapter_spec = downstream_edm.build_connection_to_fabric_channel();
+
+    log_trace(tt::LogTest, "Connecting to downstream EDM at x={}, y={}", adapter_spec.edm_noc_x, adapter_spec.edm_noc_y);
+
+    this->downstream_edm_noc_x = adapter_spec.edm_noc_x;
+    this->downstream_edm_noc_y = adapter_spec.edm_noc_y;
+    this->downstream_edm_buffer_base_address = adapter_spec.edm_buffer_base_addr;
+    this->downstream_edm_semaphore_address = adapter_spec.edm_l1_sem_addr;
+    this->downstream_edm_worker_registration_address = adapter_spec.edm_connection_handshake_addr;
+    this->downstream_edm_worker_location_info_address = adapter_spec.edm_worker_location_info_addr;
+    this->downstream_sender_channel_buffer_index_semaphore_id = adapter_spec.buffer_index_semaphore_id;
+}
+
+
+
+EdmLineFabricOpInterface::EdmLineFabricOpInterface (std::vector<Device*> const& device_sequence, std::vector<Program*> const& program_sequence, std::optional<size_t> desired_num_links) :
+    device_sequence(device_sequence),
+    programs(program_sequence) {
+    static constexpr std::size_t edm_buffer_size = 4096 + sizeof(tt::fabric::PacketHeader);
+    auto const config = FabricEriscDatamoverConfig(edm_buffer_size, 1, 2);
+    TT_ASSERT(device_sequence.size() == program_sequence.size());
+
+    for (size_t i = 0; i < device_sequence.size(); i++) {
+        log_trace(tt::LogOp, "device[{}] id={}",  i, device_sequence[i]->id());
+    }
+
+
+    // Construct the builders
+    for (size_t hop = 0; hop < device_sequence.size() - 1; hop++) {
+        auto src_device = device_sequence[hop];
+        auto dest_device = device_sequence[hop + 1];
+
+        auto const& src_device_sockets = src_device->get_ethernet_sockets(dest_device->id());;
+        auto const& dest_device_sockets = dest_device->get_ethernet_sockets(src_device->id());;
+        std::vector<CoreCoord> local_link_cores; local_link_cores.reserve(src_device_sockets.size());
+        std::vector<CoreCoord> remote_link_cores; remote_link_cores.reserve(dest_device_sockets.size());
+        std::copy_if(src_device_sockets.begin(), src_device_sockets.end(), std::back_inserter(local_link_cores), [src_device](CoreCoord const& core) { return src_device->is_active_ethernet_core(core, true); });
+        std::copy_if(dest_device_sockets.begin(), dest_device_sockets.end(), std::back_inserter(remote_link_cores), [dest_device](CoreCoord const& core) { return dest_device->is_active_ethernet_core(core, true); });
+
+        this->num_links = std::min(desired_num_links.value_or(std::numeric_limits<std::size_t>::max()), local_link_cores.size());
+
+        TT_ASSERT(local_link_cores.size() == remote_link_cores.size());
+
+        edm_builders_forward_direction[src_device->id()].reserve(local_link_cores.size());
+        edm_builders_forward_direction[dest_device->id()].reserve(local_link_cores.size());
+        for (size_t l = 0; l < this->num_links; l++) {
+            log_trace(tt::LogOp, "Building forward direction EDM on chip {} on link {}", src_device->id(), edm_builders_forward_direction[src_device->id()].size());
+            edm_builders_forward_direction[src_device->id()].push_back(FabricEriscDatamoverBuilder::build(
+                device_sequence[hop],
+                *programs[hop],
+                local_link_cores[l],
+                src_device->id(),
+                dest_device->id(),
+                config));
+
+            log_trace(tt::LogOp, "Building backward direction EDM on chip {} on link {}", dest_device->id(), edm_builders_backward_direction[dest_device->id()].size());
+            edm_builders_backward_direction[dest_device->id()].push_back(FabricEriscDatamoverBuilder::build(
+                device_sequence[hop + 1],
+                *programs[hop + 1],
+                remote_link_cores[l],
+                dest_device->id(),
+                src_device->id(),
+                config));
+        }
+    }
+
+    // Establish local connections between EDMs on the same chips to establish the lin fabric
+    for (size_t i = 1; i < device_sequence.size() - 1; i++) {
+        const size_t num_links = edm_builders_forward_direction.at(device_sequence[i]->id()).size();
+        auto& forward_direction_edm = edm_builders_forward_direction.at(device_sequence[i]->id());
+        auto& backward_direction_edm = edm_builders_backward_direction.at(device_sequence[i]->id());
+
+        for (size_t l = 0; l < num_links; l++) {
+            forward_direction_edm.at(l).connect_to_downstream_edm(backward_direction_edm.at(l));
+            backward_direction_edm.at(l).connect_to_downstream_edm(forward_direction_edm.at(l));
+        }
+    }
+
+}
+
+
+SenderWorkerAdapterSpec EdmLineFabricOpInterface::uniquely_connect_worker(Device* device, Direction direction) {
+    TT_ASSERT((direction == FORWARD) ? edm_builders_forward_direction.find(device->id()) != edm_builders_forward_direction.end()
+                                     : edm_builders_backward_direction.find(device->id()) != edm_builders_backward_direction.end());
+    auto& edm_builders = (direction == FORWARD) ? edm_builders_forward_direction.at(device->id())
+                                                : edm_builders_backward_direction.at(device->id());
+    auto &link_count_map = (direction == FORWARD) ? next_forward_direction_edm_available : next_backward_direction_edm_available;
+    const auto next_link = link_count_map[device->id()];
+    link_count_map[device->id()] = next_link + 1;
+
+    TT_ASSERT(edm_builders.size() > 0);
+    TT_ASSERT(next_link < edm_builders.size());
+    return edm_builders.at(next_link).build_connection_to_worker_channel();
+}
+
+void EdmLineFabricOpInterface::build_kernels() const {
+    auto generate_kernels_in_direction = [this](Device *device, Program *program, Direction direction) {
+        auto &edm_builders = direction == FORWARD ? edm_builders_forward_direction : edm_builders_backward_direction;
+        if (edm_builders.find(device->id()) != edm_builders.end()) {
+            for (auto& edm_builder : edm_builders.at(device->id())) {
+                auto local_edm_kernel = ttnn::ccl::generate_edm_kernel(
+                    *program,
+                    device,
+                    edm_builder,
+                    edm_builder.my_eth_core_logical,
+                    NOC::NOC_0);
+            }
+        }
+    };
+
+    TT_ASSERT(device_sequence.size() == programs.size());
+    for (size_t i = 0; i < device_sequence.size(); i++) {
+        Program* program = programs[i];
+        Device* device = device_sequence[i];
+        generate_kernels_in_direction(device, program, Direction::FORWARD);
+        generate_kernels_in_direction(device, program, Direction::BACKWARD);
+    }
+}
+
+
+
+std::vector<edm_termination_info_t> EdmLineFabricOpInterface::generate_ordered_termination_info_farthest_to_nearest() const {
+    TT_ASSERT(device_sequence.size() > 0);
+    const size_t num_hops = device_sequence.size() - 1;
+    TT_ASSERT(num_hops > 0);
+    std::vector<edm_termination_info_t> edm_termination_infos;
+    edm_termination_infos.reserve(num_hops * 2 * this->num_links);
+    for (int i = num_hops - 1; i >= 0; i--) {
+        log_trace(tt::LogOp, "Generating termination info for hop {}", i);
+        TT_ASSERT(i + 1 != 0);
+        TT_ASSERT(i + 1 < device_sequence.size());
+        TT_ASSERT(edm_builders_backward_direction.find(device_sequence[i+1]->id()) != edm_builders_backward_direction.end(), "Device {} at index {} not found in `edm_builders_backward_direction` but it was expected there", i + 1, device_sequence[i+1]->id());
+        TT_ASSERT(edm_builders_forward_direction.find(device_sequence[i]->id()) != edm_builders_forward_direction.end(), "Device {} at index {} not found in `edm_builders_forward_direction` but it was expected there", i, device_sequence[i]->id());
+        auto &farther_edms = edm_builders_backward_direction.at(device_sequence[i+1]->id());
+        auto &nearer_edms = edm_builders_forward_direction.at(device_sequence[i]->id());
+
+        TT_ASSERT(farther_edms.size() <= this->num_links);
+        TT_ASSERT(nearer_edms.size() <= this->num_links);
+        for (size_t l = 0; l < this->num_links; l++) {
+            auto &farther_edm = farther_edms.at(l);
+            const std::size_t distance_receiver = i + 1;
+            edm_termination_infos.push_back(
+                {distance_receiver,
+                farther_edm.my_noc_x,
+                farther_edm.my_noc_y,
+             ttnn::ccl::FabricEriscDatamoverConfig::termination_signal_address});
+        }
+        for (size_t l = 0; l < this->num_links; l++) {
+            auto &nearer_edm = nearer_edms.at(l);
+            const std::size_t distance_sender = i;
+            edm_termination_infos.push_back(
+                {distance_sender,
+                nearer_edm.my_noc_x,
+                nearer_edm.my_noc_y,
+                ttnn::ccl::FabricEriscDatamoverConfig::termination_signal_address});
+        }
+    }
+    log_trace(tt::LogOp, "Done Generating termination infos");
+    return edm_termination_infos;
+}
+
+
+
+
+
+}  // namespace ttnn::ccl
diff --git a/ttnn/cpp/ttnn/operations/ccl/erisc_datamover_builder.hpp b/ttnn/cpp/ttnn/operations/ccl/erisc_datamover_builder.hpp
new file mode 100644
index 00000000000..efe3ce45ad5
--- /dev/null
+++ b/ttnn/cpp/ttnn/operations/ccl/erisc_datamover_builder.hpp
@@ -0,0 +1,228 @@
+// SPDX-FileCopyrightText: © 2024 Tenstorrent Inc.
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#pragma once
+
+#include <cstdint>
+#include <optional>
+#include <vector>
+
+#include "eth_l1_address_map.h"
+#include "tt_metal/third_party/umd/device/tt_cluster_descriptor_types.h"
+#include "ttnn/cpp/ttnn/operations/ccl/kernels/edm_fabric/fabric_edm_types.hpp"
+#include "ttnn/cpp/ttnn/operations/ccl/shared_with_host/hetergeneous_data_structs.hpp"
+
+
+#include "tt_metal/impl/device/device.hpp"
+#include "tt_metal/impl/program/program.hpp"
+
+#include <vector>
+#include <unordered_map>
+namespace ttnn {
+namespace ccl {
+
+
+struct FabricEriscDatamoverConfig {
+    static constexpr std::size_t field_size = 16;
+    static constexpr std::size_t buffer_alignment = 32;
+    static_assert(((buffer_alignment - 1) & buffer_alignment) == 0);
+
+    // Global
+    static constexpr std::size_t eth_channel_sync_size = 16;
+    static constexpr std::size_t handshake_addr = eth_l1_mem::address_map::ERISC_L1_UNRESERVED_BASE;
+    static constexpr std::size_t edm_channel_ack_addr = handshake_addr + eth_channel_sync_size;
+    static constexpr std::size_t termination_signal_address =
+        edm_channel_ack_addr + (2 * eth_channel_sync_size);  // pad extra bytes to match old EDM so handshake logic will still work
+
+    // Sender Channel 0
+    static constexpr std::size_t sender_channel_0_buffer_index_address = termination_signal_address + field_size;
+    static constexpr std::size_t sender_channel_0_worker_connection_info_address =
+        sender_channel_0_buffer_index_address + field_size;
+    static_assert(field_size >= sizeof(tt::fabric::EDMChannelWorkerLocationInfo));
+
+    // Sender Channel 1
+    static constexpr std::size_t sender_channel_1_buffer_index_address =
+        sender_channel_0_worker_connection_info_address + field_size;
+    static constexpr std::size_t sender_channel_1_worker_connection_info_address =
+        sender_channel_1_buffer_index_address + field_size;
+
+    // Receiver Channel
+    static constexpr std::size_t receiver_channel_local_buffer_index_addr =
+        sender_channel_1_worker_connection_info_address + field_size;
+
+    // Channel Allocations
+    static constexpr std::size_t buffer_region_start =
+        (receiver_channel_local_buffer_index_addr + field_size + buffer_alignment) & ~(buffer_alignment - 1); // Align
+    static constexpr std::size_t available_channel_buffering_space =
+        eth_l1_mem::address_map::MAX_L1_LOADING_SIZE - buffer_region_start;
+
+    static_assert(sender_channel_1_buffer_index_address != sender_channel_0_buffer_index_address);
+
+    FabricEriscDatamoverConfig(
+        std::size_t channel_buffer_size_bytes, std::size_t sender_ratio_size, std::size_t receiver_ratio_size);
+
+    std::size_t channel_buffer_size_bytes = 0;
+    std::size_t channel_buffer_size_bytes_with_channel_sync = 0;
+    std::size_t sender_0_channel_size_bytes = 0;
+    std::size_t sender_0_num_buffers = 0;
+    std::size_t sender_1_channel_size_bytes = 0;
+    std::size_t sender_1_num_buffers = 0;
+    std::size_t receiver_channel_size_bytes = 0;
+    std::size_t receiver_num_buffers = 0;
+
+    std::size_t sender_0_channel_base_address = 0;
+    std::size_t sender_1_channel_base_address = 0;
+    std::size_t receiver_channel_base_address = 0;
+};
+
+struct SenderWorkerAdapterSpec {
+    size_t edm_noc_x = 0;
+    size_t edm_noc_y = 0;
+    size_t edm_buffer_base_addr = 0;
+    size_t num_buffers_per_channel = 0;
+    size_t edm_l1_sem_addr = 0;
+    size_t edm_connection_handshake_addr = 0;
+    size_t edm_worker_location_info_addr = 0;  // The EDM's location for `EDMChannelWorkerLocationInfo`
+    size_t buffer_size_bytes = 0;
+    size_t buffer_index_semaphore_id = 0; // the semaphore ID on the EDM, not the worker
+};
+class FabricEriscDatamoverBuilder {
+   public:
+    FabricEriscDatamoverBuilder(
+        CoreCoord const& my_eth_core_logical,
+        size_t my_noc_x,
+        size_t my_noc_y,
+        size_t my_chip_id,
+        size_t peer_chip_id,
+
+        std::optional<size_t> receiver_channel_downstream_flow_control_semaphore_id,
+        size_t sender_channel_0_flow_control_semaphore_id,
+        size_t sender_channel_1_flow_control_semaphore_id,
+        size_t sender_channel_0_connection_semaphore_id,
+        size_t sender_channel_1_connection_semaphore_id,
+        size_t sender_channel_0_buffer_index_semaphore_id,
+        size_t sender_channel_1_buffer_index_semaphore_id,
+
+        FabricEriscDatamoverConfig const& config);
+
+    static FabricEriscDatamoverBuilder build(
+        Device* device,
+        Program& program,
+        CoreCoord const& ethernet_core,
+        chip_id_t local_chip_id,
+        chip_id_t peer_chip_id,
+        FabricEriscDatamoverConfig const& config);
+
+    [[nodiscard]] SenderWorkerAdapterSpec build_connection_to_worker_channel() const;
+    [[nodiscard]] SenderWorkerAdapterSpec build_connection_to_fabric_channel() const;
+
+    [[nodiscard]] std::vector<uint32_t> get_compile_time_args() const;
+
+    [[nodiscard]] std::vector<uint32_t> get_runtime_args() const;
+
+    void connect_to_downstream_edm(FabricEriscDatamoverBuilder const& downstream_edm);
+
+    void dump_to_log() const {
+        // TODO
+    }
+
+   private:
+   friend class EdmLineFabricOpInterface;
+    CoreCoord my_eth_core_logical;
+    size_t my_noc_x = 0;
+    size_t my_noc_y = 0;
+
+    FabricEriscDatamoverConfig config;
+
+    size_t my_chip_id = 0;
+    size_t peer_chip_id = 0;
+    size_t handshake_address = 0;
+    size_t channel_buffer_size = 0;
+
+    size_t sender_0_num_buffers = 0;
+    size_t sender_1_num_buffers = 0;
+    size_t receiver_num_buffers = 0;
+
+    size_t local_sender_channel_0_buffer_address = 0;
+    size_t local_sender_channel_0_connection_info_addr = 0;
+    size_t local_sender_channel_1_buffer_address = 0;
+    size_t local_sender_channel_1_connection_info_addr = 0;
+    size_t local_receiver_channel_buffer_address = 0;
+
+    size_t termination_signal_ptr = 0;
+
+    // Semaphore IDs
+    // this is the receiver channel's local sem for flow controlling with downstream fabric sender
+    std::optional<size_t> receiver_channel_downstream_flow_control_semaphore_id;
+    size_t sender_channel_0_flow_control_semaphore_id = 0;
+    size_t sender_channel_1_flow_control_semaphore_id = 0;
+    size_t sender_channel_0_connection_semaphore_id = 0;
+    size_t sender_channel_1_connection_semaphore_id = 0;
+    size_t sender_channel_0_buffer_index_semaphore_id = 0;
+    size_t sender_channel_1_buffer_index_semaphore_id = 0;
+    size_t receiver_channel_local_buffer_index_addr = 0;
+
+    std::optional<size_t> downstream_edm_noc_x;
+    std::optional<size_t> downstream_edm_noc_y;
+    std::optional<size_t> downstream_edm_buffer_base_address;
+    std::optional<size_t> downstream_edm_semaphore_address;
+    std::optional<size_t> downstream_edm_worker_registration_address;
+    std::optional<size_t> downstream_edm_worker_location_info_address;
+    std::optional<size_t> downstream_sender_channel_buffer_index_semaphore_id;
+};
+
+
+struct edm_termination_info_t {
+    uint32_t distance = 0;
+    uint32_t edm_noc_x = 0;
+    uint32_t edm_noc_y = 0;
+    uint32_t termination_addr = 0;
+};
+
+struct EdmLineFabricOpInterface {
+    enum Direction {
+        // Ascending chips in the sequence
+        FORWARD,
+
+        // Descending chips in the sequence
+        BACKWARD,
+    };
+
+    // Device ID -> EDM Builders
+    std::unordered_map<size_t, std::vector<FabricEriscDatamoverBuilder>> edm_builders_forward_direction;
+    std::unordered_map<size_t, std::vector<FabricEriscDatamoverBuilder>> edm_builders_backward_direction;
+
+    // Device ID -> link index
+    std::unordered_map<size_t, size_t> next_forward_direction_edm_available;
+    std::unordered_map<size_t, size_t> next_backward_direction_edm_available;
+
+    std::vector<Device*> device_sequence;
+    std::vector<Program*> programs;
+
+    size_t num_links = 0;
+
+    //   The constructor will assemble/connect the line across the specified device sequence, for all available links.
+    EdmLineFabricOpInterface (std::vector<Device*> const& device_sequence, std::vector<Program*> const& program_sequence, std::optional<size_t> desired_num_links = std::nullopt);
+
+
+    // Will create a connection adapter for a worker which can be used to pass args to the worker kernel talking to the
+    // corresponding fabric endpoint. This interface will guarantee unique connections only so requesting more unique connections
+    // than available will result in an error.
+    SenderWorkerAdapterSpec uniquely_connect_worker(Device* device, Direction direction);
+
+    // builds the ethernet kernels for all EDMs in the "fabric"
+    void build_kernels() const;
+
+    // Generates a list of target cores (for now assumed from chip 0 in the line) from farthest
+    // to nearest for the sake of sending teardown/termination signals on workload completion.
+    // Returns: A list of termination infos which can be passed to a terminate kernel
+    // Note there is currently a small bug in that with multiple links, we don't currently know
+    // who will be sending the termination signals (and which link(s) they are connected to)
+    // and so a termination signal may be sent to our link first before the other eth core links
+    // on the chip so multi-link isn't officially supported yet
+    std::vector<edm_termination_info_t> generate_ordered_termination_info_farthest_to_nearest() const;
+};
+
+};  // namespace ccl
+};  // namespace ttnn
diff --git a/ttnn/cpp/ttnn/operations/ccl/kernels/edm_fabric/edm_fabric_worker_adapters.hpp b/ttnn/cpp/ttnn/operations/ccl/kernels/edm_fabric/edm_fabric_worker_adapters.hpp
new file mode 100644
index 00000000000..ef0f73d302b
--- /dev/null
+++ b/ttnn/cpp/ttnn/operations/ccl/kernels/edm_fabric/edm_fabric_worker_adapters.hpp
@@ -0,0 +1,193 @@
+// SPDX-FileCopyrightText: © 2024 Tenstorrent Inc.
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#pragma once
+
+#include "dataflow_api.h"
+
+#include "tt_metal/hw/inc/ethernet/dataflow_api.h"
+#include "ttnn/cpp/ttnn/operations/ccl/kernel_common/worker_edm_utils.hpp"
+#include "ttnn/cpp/ttnn/operations/ccl/kernels/edm_fabric/fabric_edm_packet_header_validate.hpp"
+#include "debug/assert.h"
+
+#include <cstdint>
+
+
+namespace tt::fabric {
+
+struct WorkerToFabricEdmSender{
+
+    static constexpr uint32_t open_connection_value = 1;
+    static constexpr uint32_t close_connection_value = 0;
+
+    WorkerToFabricEdmSender () : worker_sem_addr(nullptr) {}
+
+    WorkerToFabricEdmSender (
+        size_t edm_worker_x,
+        size_t edm_worker_y,
+        std::size_t edm_buffer_base_addr,
+        std::size_t num_buffers_per_channel,
+        std::size_t edm_l1_sem_id,
+        std::size_t edm_connection_handshake_l1_addr,
+        std::size_t edm_worker_location_info_addr, // The EDM's location for `EDMChannelWorkerLocationInfo`
+        std::size_t buffer_size_bytes,
+        std::size_t edm_buffer_index_addr,
+        volatile uint32_t * const worker_sem_addr,
+        uint32_t local_buffer_index_addr
+    ) :
+        edm_buffer_addr(get_noc_addr(edm_worker_x, edm_worker_y, edm_buffer_base_addr)),
+        edm_semaphore_addr(get_noc_addr(edm_worker_x, edm_worker_y, get_semaphore<ProgrammableCoreType::ACTIVE_ETH>(edm_l1_sem_id))),
+        edm_connection_handshake_l1_addr(edm_connection_handshake_l1_addr),
+        edm_worker_location_info_addr(edm_worker_location_info_addr),
+        edm_buffer_index_addr(edm_buffer_index_addr),
+        worker_sem_addr(worker_sem_addr),
+        edm_buffer_base_addr(edm_buffer_base_addr),
+        num_buffers_per_channel(num_buffers_per_channel),
+        last_buffer_index(num_buffers_per_channel - 1),
+        edm_l1_sem_addr(get_semaphore<ProgrammableCoreType::ACTIVE_ETH>(edm_l1_sem_id)),
+        buffer_size_bytes(buffer_size_bytes),
+        buffer_index_ptr(reinterpret_cast<size_t*>(local_buffer_index_addr))
+    {
+        ASSERT(buffer_size_bytes > 0);
+    }
+
+    [[nodiscard]] FORCE_INLINE bool consumer_has_space() const {
+        return *this->worker_sem_addr == 1;
+    }
+    FORCE_INLINE void clear_flow_control_semaphore() const {
+        noc_semaphore_set(this->worker_sem_addr, 0);
+    }
+    FORCE_INLINE void wait_for_empty_write_slot() const {
+        noc_semaphore_wait(this->worker_sem_addr, 1);
+    }
+
+    FORCE_INLINE void send_payload_blocking(uint32_t cb_id, uint32_t num_pages, uint32_t page_size) {
+        send_payload_impl<ttnn::ccl::EDM_IO_BLOCKING_MODE::BLOCKING>(cb_id, num_pages, page_size);
+    }
+
+    // Does not wait for CB. Assumes caller handles CB data availability
+    FORCE_INLINE void send_payload_non_blocking(uint32_t cb_id, uint32_t num_pages, uint32_t page_size) {
+        send_payload_impl<ttnn::ccl::EDM_IO_BLOCKING_MODE::NON_BLOCKING>(cb_id, num_pages, page_size);
+    }
+
+    /*
+     * No CB
+     */
+    FORCE_INLINE void send_payload_blocking_from_address(uint32_t source_address, size_t size_bytes) {
+        send_payload_from_address_impl<ttnn::ccl::EDM_IO_BLOCKING_MODE::BLOCKING>(source_address, size_bytes);
+    }
+
+    /*
+     * No CB
+     */
+    // Does not wait for CB. Assumes caller handles CB data availability
+    FORCE_INLINE void send_payload_non_blocking_from_address(uint32_t source_address, size_t size_bytes) {
+        send_payload_from_address_impl<ttnn::ccl::EDM_IO_BLOCKING_MODE::NON_BLOCKING>(source_address, size_bytes);
+    }
+
+    // Layout
+    // |-----------------------|
+    // | EDM Handshake         | 16B
+    // |-----------------------|
+    // | EDM Ack Channel Sync  | 16B
+    // |-----------------------|          -
+    // | Connection Semaphore  | 16B        |
+    // |-----------------------|            |
+    // | Buffer Index          | 16B         >- Per Sender Channel (On EDM)
+    // |-----------------------|            |
+    // | Worker Connection Info| 16B        |worker
+    // |-----------------------|          -/
+    // |-----------------------|
+    //
+    static constexpr size_t edm_sender_channel_field_stride_bytes = 16;
+
+    FORCE_INLINE void open() {
+        const auto dest_noc_addr_coord_only = this->edm_semaphore_addr & ~(uint64_t)NOC_COORDINATE_MASK;
+
+        const uint64_t remote_buffer_index_addr = dest_noc_addr_coord_only | edm_buffer_index_addr;
+        ASSERT(remote_buffer_index_addr > 0);
+        noc_async_read(remote_buffer_index_addr, reinterpret_cast<size_t>(this->buffer_index_ptr), sizeof(uint32_t));
+
+        const uint64_t dest_edm_location_info_addr = dest_noc_addr_coord_only | edm_worker_location_info_addr;
+        // TODO: Need to change byte enable to be word enable
+        noc_inline_dw_write(dest_edm_location_info_addr, reinterpret_cast<size_t>(worker_sem_addr));
+        noc_inline_dw_write(dest_edm_location_info_addr + sizeof(uint32_t), ttnn::ccl::WorkerXY(my_x[0], my_y[0]).to_uint32());
+
+        const uint64_t edm_connection_handshake_noc_addr = dest_noc_addr_coord_only | edm_connection_handshake_l1_addr;
+        noc_inline_dw_write(edm_connection_handshake_noc_addr, open_connection_value);
+        noc_async_read_barrier();
+    }
+
+    FORCE_INLINE void close() {
+        const auto dest_noc_addr_coord_only = this->edm_semaphore_addr & ~(uint64_t)NOC_COORDINATE_MASK;
+
+        const uint64_t dest_edm_connection_state_addr = dest_noc_addr_coord_only | edm_connection_handshake_l1_addr;
+        noc_inline_dw_write(dest_edm_connection_state_addr, close_connection_value);
+
+        // buffer index stored at location after handshake addr
+        const uint64_t remote_buffer_index_addr = dest_noc_addr_coord_only | edm_buffer_index_addr;
+        noc_inline_dw_write(remote_buffer_index_addr, *this->buffer_index_ptr);
+
+        noc_async_write_barrier();
+    }
+
+    uint64_t edm_buffer_addr;
+    uint64_t edm_semaphore_addr;
+    size_t edm_connection_handshake_l1_addr;
+    size_t edm_worker_location_info_addr;
+    size_t edm_buffer_index_addr;
+    volatile uint32_t * const worker_sem_addr;
+    std::size_t edm_buffer_base_addr;
+    std::size_t num_buffers_per_channel;
+    std::size_t last_buffer_index;
+    std::size_t edm_l1_sem_addr;
+    std::size_t buffer_size_bytes;
+    std::size_t *buffer_index_ptr;
+
+    private:
+    template<ttnn::ccl::EDM_IO_BLOCKING_MODE blocking_mode>
+    FORCE_INLINE void send_payload_from_address_impl(uint32_t source_address, size_t size_bytes) {
+        this->clear_flow_control_semaphore();
+        uint64_t buffer_address = this->edm_buffer_addr + (*this->buffer_index_ptr * (this->buffer_size_bytes + sizeof(eth_channel_sync_t)));
+
+        ASSERT(size_bytes <= this->buffer_size_bytes);
+
+        /*{ // For debug purposes only. Useful to permanently backup the packet somewhere we can inspect with ttx-status
+            uint32_t dram_noc_x = my_y[0] == 1 ? 0 : 0;
+            uint32_t dram_noc_y = my_y[0] == 1 ? 0 : 5;
+            // noc_inline_dw_write(get_noc_addr(dram_noc_x, dram_noc_y, storage_offset), 0x0F);
+            // noc_async_writes_flushed();
+            // noc_inline_dw_write(get_noc_addr(dram_noc_x, dram_noc_y, storage_offset  + 4), 0);
+            // auto pkthdr_size_words = sizeof(tt::fabric::PacketHeader) >> 2;
+            // for (size_t i = 0; i < pkthdr_size_words; i++) {
+            //     reinterpret_cast<volatile uint32_t*>(source_address)[pkthdr_size_words - i] =
+            //     reinterpret_cast<volatile uint32_t*>(source_address)[pkthdr_size_words - 1 - i];
+            // }
+            // reinterpret_cast<volatile uint32_t*>(source_address)[0] = 0xc0ffee;
+            // DPRINT << "NEXT STORAGE OFF: " << (uint32_t)storage_offset << "\n";
+            noc_async_write(source_address, get_noc_addr(dram_noc_x, dram_noc_y, storage_offset), size_bytes);
+            storage_offset += size_bytes;
+            storage_offset += 64;
+            storage_offset = storage_offset & (~0x1F);
+        }*/
+        ASSERT(tt::fabric::is_valid(*const_cast<tt::fabric::PacketHeader *>(reinterpret_cast<volatile tt::fabric::PacketHeader*>(source_address))));
+        send_chunk_from_address<blocking_mode>(source_address, 1, size_bytes, buffer_address);
+        noc_semaphore_inc(edm_semaphore_addr, 1);
+
+        *this->buffer_index_ptr = (*this->buffer_index_ptr == this->last_buffer_index) ? 0 : *this->buffer_index_ptr + 1;
+    }
+
+    template<ttnn::ccl::EDM_IO_BLOCKING_MODE blocking_mode>
+    FORCE_INLINE void send_payload_impl(uint32_t cb_id, uint32_t num_pages, uint32_t page_size) {
+        this->clear_flow_control_semaphore();
+        uint64_t buffer_address = this->edm_buffer_addr + (*this->buffer_index_ptr * (this->buffer_size_bytes + sizeof(eth_channel_sync_t)));
+        ASSERT(num_pages * page_size <= this->buffer_size_bytes);
+        send_chunk<blocking_mode>(cb_id, num_pages, page_size, buffer_address);
+        noc_semaphore_inc(edm_semaphore_addr, 1);
+        *this->buffer_index_ptr = (*this->buffer_index_ptr == this->last_buffer_index) ? 0 : *this->buffer_index_ptr + 1;
+    }
+};
+
+
+} // namespace tt::fabric
diff --git a/ttnn/cpp/ttnn/operations/ccl/kernels/edm_fabric/fabric_edm_packet_header.hpp b/ttnn/cpp/ttnn/operations/ccl/kernels/edm_fabric/fabric_edm_packet_header.hpp
new file mode 100644
index 00000000000..37210c2d012
--- /dev/null
+++ b/ttnn/cpp/ttnn/operations/ccl/kernels/edm_fabric/fabric_edm_packet_header.hpp
@@ -0,0 +1,214 @@
+// SPDX-FileCopyrightText: © 2024 Tenstorrent Inc.
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#pragma once
+
+#include <cstddef>
+#include <cstdint>
+
+namespace tt::fabric {
+
+enum TerminationSignal : uint32_t {
+    KEEP_RUNNING = 0,
+
+    // Wait for messages to drain
+    GRACEFULLY_TERMINATE = 1,
+
+    // Immediately terminate - don't wait for any outstanding messages to arrive or drain out
+    IMMEDIATELY_TERMINATE = 2
+};
+
+// 2 bits
+enum CommandType : uint8_t {
+    WRITE = 0,
+    ATOMIC_INC = 1
+};
+
+// How to send the payload across the cluster
+// 1 bit
+enum ChipSendType : uint8_t {
+    CHIP_UNICAST = 0,
+    CHIP_MULTICAST = 1
+};
+enum NocSendType : uint8_t {
+    NOC_UNICAST = 0,
+    NOC_MULTICAST = 1
+};
+
+
+struct UnicastRoutingCommandHeader {
+    uint8_t distance_in_hops;
+};
+static_assert(sizeof(UnicastRoutingCommandHeader) == 1, "UnicastRoutingCommandHeader size is not 1 byte");
+struct MulticastRoutingCommandHeader {
+    uint8_t start_distance_in_hops: 4;
+    uint8_t range_hops: 4; // 0 implies unicast
+};
+static_assert(sizeof(MulticastRoutingCommandHeader) == 1, "MulticastRoutingCommandHeader size is not 1 byte");
+union RoutingFields {
+    UnicastRoutingCommandHeader chip_unicast;
+    MulticastRoutingCommandHeader chip_mcast;
+};
+static_assert(sizeof(RoutingFields) == sizeof(UnicastRoutingCommandHeader), "RoutingFields size is not 1 bytes");
+
+struct NocUnicastCommandHeader {
+    uint32_t address;
+    uint32_t size;
+    uint8_t noc_x;
+    uint8_t noc_y;
+    uint16_t reserved;
+    // ignores header size
+    inline uint32_t get_payload_only_size() const {
+        return size;
+    }
+};
+struct NocUnicastAtomicIncCommandHeader {
+    NocUnicastAtomicIncCommandHeader(uint32_t address, uint16_t val, uint16_t wrap, uint8_t noc_x, uint8_t noc_y)
+        : address(address), val(val), wrap(wrap), noc_x(noc_x), noc_y(noc_y) {}
+
+    uint32_t address;
+    uint16_t val;
+    uint16_t wrap;
+    uint8_t noc_x;
+    uint8_t noc_y;
+
+};
+struct NocMulticastCommandHeader {
+    uint32_t address;
+    uint32_t size;
+    uint8_t noc_x_start;
+    uint8_t noc_y_start;
+    uint8_t mcast_rect_size_x;
+    uint8_t mcast_rect_size_y;
+
+    // ignores header size
+    inline uint32_t get_payload_only_size() const {
+        return size;
+    }
+};
+struct NocMulticastAtomicIncCommandHeader {
+    uint32_t address;
+    uint16_t val;
+    uint16_t wrap;
+    uint8_t noc_x_start;
+    uint8_t noc_y_start;
+    uint8_t size_x;
+    uint8_t size_y;
+};
+static_assert(sizeof(NocUnicastCommandHeader) == 12, "NocUnicastCommandHeader size is not 1 byte");
+static_assert(sizeof(NocMulticastCommandHeader) == 12, "NocMulticastCommandHeader size is not 1 byte");
+static_assert(sizeof(NocUnicastAtomicIncCommandHeader) == 12, "NocUnicastCommandHeader size is not 1 byte");
+static_assert(sizeof(NocMulticastAtomicIncCommandHeader) == 12, "NocAtomicIncCommandHeader size is not 1 byte");
+union CommandFields{
+    NocUnicastCommandHeader unicast_write;
+    NocMulticastCommandHeader mcast_write;
+    NocUnicastAtomicIncCommandHeader unicast_seminc;
+    NocMulticastAtomicIncCommandHeader mcast_seminc;
+} ;
+static_assert(sizeof(CommandFields) <= 15, "CommandFields size is not 15 bytes");
+
+// TODO: wrap this in a debug version that holds type info so we can assert for field/command/
+struct PacketHeader {
+    // TODO: trim this down noc_send_type 2 bits (4 values):
+    //   -> unicast_write, mcast_write, unicast_seminc, mcast_seminc
+    // For now, kept it separate so I could do reads which would be handled differently
+    // but for our purposes we shouldn't need read so we should be able to omit the support
+    CommandType command_type : 2;
+    ChipSendType chip_send_type : 1;
+    NocSendType noc_send_type : 1;
+    uint8_t reserved : 4;
+
+    RoutingFields routing_fields;
+    uint16_t reserved2;
+    CommandFields command_fields;
+
+    // Sort of hack to work-around DRAM read alignment issues that must be 32B aligned
+    // To simplify worker kernel code, we for now decide to pad up the packet header
+    // to 32B so the user can simplify shift into their CB chunk by sizeof(tt::fabric::PacketHeader)
+    // and automatically work around the DRAM read alignment bug.
+    //
+    // Future changes will remove this padding and require the worker kernel to be aware of this bug
+    // and pad their own CBs conditionally when reading from DRAM. It'll be up to the users to
+    // manage this complexity.
+    uint32_t padding0;
+    uint32_t padding1;
+    uint32_t padding2;
+    uint32_t padding3;
+
+    inline void set_command_type(CommandType &type) { this->command_type = type; }
+    inline void set_chip_send_type(ChipSendType &type) { this->chip_send_type = type; }
+    inline void set_noc_send_type(NocSendType &type) { this->noc_send_type = type; }
+    inline void set_routing_fields(RoutingFields &fields) { this->routing_fields = fields; }
+    inline void set_command_fields(CommandFields &fields) { this->command_fields = fields; }
+
+    size_t get_payload_size_excluding_header() volatile const {
+        switch(this->command_type) {
+            case WRITE: {
+                switch(this->noc_send_type) {
+                    case NOC_UNICAST: {
+                        return this->command_fields.unicast_write.size - sizeof(PacketHeader);
+                    } break;
+                    case NOC_MULTICAST: {
+                        return this->command_fields.mcast_write.size - sizeof(PacketHeader);
+                    } break;
+                    default:
+                        return 0;
+                }
+            } break;
+            case ATOMIC_INC: {
+                return 0;
+            } break;
+            default:
+                return 0;
+        }
+    }
+    inline size_t get_payload_size_including_header() volatile const {
+        return get_payload_size_excluding_header() + sizeof(PacketHeader);
+    }
+
+    inline PacketHeader& to_write() { this->command_type = WRITE; return *this; }
+    inline PacketHeader& to_atomic_inc() { this->command_type = ATOMIC_INC; return *this; }
+
+    inline PacketHeader &to_chip_unicast(UnicastRoutingCommandHeader const &chip_unicast_command_header) {
+        this->chip_send_type = CHIP_UNICAST;
+        this->routing_fields.chip_unicast = chip_unicast_command_header;
+        return *this;
+    }
+    inline PacketHeader &to_chip_multicast(MulticastRoutingCommandHeader const &chip_multicast_command_header) {
+        this->chip_send_type = CHIP_MULTICAST;
+        this->routing_fields.chip_mcast = chip_multicast_command_header;
+        return *this;
+    }
+    inline PacketHeader &to_noc_unicast(NocUnicastCommandHeader const &noc_unicast_command_header) {
+        this->noc_send_type = NOC_UNICAST;
+        this->command_fields.unicast_write = noc_unicast_command_header;
+        return *this;
+    }
+    inline PacketHeader &to_noc_multicast(NocMulticastCommandHeader const &noc_multicast_command_header) {
+        this->noc_send_type = NOC_MULTICAST;
+        this->command_fields.mcast_write = noc_multicast_command_header;
+        return *this;
+    }
+    inline PacketHeader &to_noc_unicast_atomic_inc(
+        NocUnicastAtomicIncCommandHeader const &noc_unicast_atomic_inc_command_header) {
+        this->noc_send_type = NOC_UNICAST;
+        this->command_fields.unicast_seminc = noc_unicast_atomic_inc_command_header;
+        return *this;
+    }
+    inline PacketHeader &to_noc_multicast_atomic_inc(
+        NocMulticastAtomicIncCommandHeader const &noc_multicast_atomic_inc_command_header) {
+        this->noc_send_type = NOC_MULTICAST;
+        this->command_fields.mcast_seminc = noc_multicast_atomic_inc_command_header;
+        return *this;
+    }
+};
+
+
+// TODO: When we remove the 32B padding requirement, reduce to 16B size check
+static_assert(sizeof(PacketHeader) == 32, "sizeof(PacketHeader) is not equal to 32B");
+
+static constexpr size_t header_size_bytes = sizeof(PacketHeader);
+
+
+} // namespace tt::fabric
diff --git a/ttnn/cpp/ttnn/operations/ccl/kernels/edm_fabric/fabric_edm_packet_header_validate.hpp b/ttnn/cpp/ttnn/operations/ccl/kernels/edm_fabric/fabric_edm_packet_header_validate.hpp
new file mode 100644
index 00000000000..ef0bf3198e6
--- /dev/null
+++ b/ttnn/cpp/ttnn/operations/ccl/kernels/edm_fabric/fabric_edm_packet_header_validate.hpp
@@ -0,0 +1,23 @@
+// SPDX-FileCopyrightText: © 2024 Tenstorrent Inc.
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#pragma once
+
+#include "ttnn/cpp/ttnn/operations/ccl/kernels/edm_fabric/fabric_edm_packet_header.hpp"
+#include "debug/assert.h"
+
+namespace tt::fabric {
+
+FORCE_INLINE void validate(PacketHeader const& packet_header) {
+    ASSERT(packet_header.command_type == CommandType::WRITE || packet_header.command_type == CommandType::ATOMIC_INC);
+    ASSERT(packet_header.chip_send_type < 2);
+    ASSERT(packet_header.noc_send_type < 2);
+}
+FORCE_INLINE bool is_valid(PacketHeader const& packet_header) {
+    return (packet_header.command_type < 2) &&
+        (packet_header.chip_send_type < 2) &&
+        (packet_header.noc_send_type < 2);
+}
+
+} // namespace tt::fabric
diff --git a/ttnn/cpp/ttnn/operations/ccl/kernels/edm_fabric/fabric_edm_packet_transmission.hpp b/ttnn/cpp/ttnn/operations/ccl/kernels/edm_fabric/fabric_edm_packet_transmission.hpp
new file mode 100644
index 00000000000..1e25898f003
--- /dev/null
+++ b/ttnn/cpp/ttnn/operations/ccl/kernels/edm_fabric/fabric_edm_packet_transmission.hpp
@@ -0,0 +1,226 @@
+
+// SPDX-FileCopyrightText: © 2024 Tenstorrent Inc.
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#pragma once
+
+#include "tt_metal/hw/inc/dataflow_api.h"
+#include "ttnn/cpp/ttnn/operations/ccl/kernels/edm_fabric/fabric_edm_packet_header.hpp"
+#include "ttnn/cpp/ttnn/operations/ccl/kernels/edm_fabric/edm_fabric_worker_adapters.hpp"
+#include "ttnn/cpp/ttnn/operations/ccl/kernels/edm_fabric/fabric_edm_types.hpp"
+#include <cstdint>
+
+void write_unicast_blocking(uint32_t local_address, uint64_t dest_address, uint32_t size_bytes) {
+    noc_async_write(local_address, dest_address, size_bytes);
+    noc_async_writes_flushed();
+}
+
+void print_pkt_hdr_routing_fields(volatile tt::fabric::PacketHeader *const packet_start) {
+    switch (packet_start->chip_send_type) {
+        case tt::fabric::CHIP_UNICAST: {
+            DPRINT << "C_UNI: dist:" << (uint32_t) packet_start->routing_fields.chip_unicast.distance_in_hops << "\n";
+            break;
+        }
+        case tt::fabric::CHIP_MULTICAST: {
+            DPRINT << "C_MCST: dist:" << (uint32_t) packet_start->routing_fields.chip_mcast.start_distance_in_hops <<
+                ", rng:" << (uint32_t) packet_start->routing_fields.chip_mcast.range_hops << "\n";
+            break;
+        }
+    };
+}
+
+void print_pkt_header_noc_fields(volatile tt::fabric::PacketHeader *const packet_start) {
+    switch (packet_start->noc_send_type) {
+        case tt::fabric::NocSendType::NOC_UNICAST: {
+            switch (packet_start->command_type) {
+                case tt::fabric::CommandType::WRITE: {
+                    DPRINT << "N_WR addr:"<<(uint32_t)packet_start->command_fields.unicast_write.address <<
+                        ", size:" << (uint32_t) packet_start->command_fields.unicast_write.size <<
+                        ", x:" << (uint32_t) packet_start->command_fields.unicast_write.noc_x <<
+                        ", y:" << (uint32_t) packet_start->command_fields.unicast_write.noc_y << "\n";
+                } break;
+                case tt::fabric::CommandType::ATOMIC_INC: {
+                    DPRINT << "N_WR addr:"<<(uint32_t)packet_start->command_fields.unicast_seminc.address <<
+                        ", val:" << (uint32_t) packet_start->command_fields.unicast_seminc.val <<
+                        ", x:" << (uint32_t) packet_start->command_fields.unicast_seminc.noc_x <<
+                        ", y:" << (uint32_t) packet_start->command_fields.unicast_seminc.noc_y << "\n";
+
+                } break;
+            }
+            break;
+        }
+        case tt::fabric::NocSendType::NOC_MULTICAST: {
+            break;
+        }
+    }
+}
+
+void print_pkt_header(volatile tt::fabric::PacketHeader *const packet_start) {
+    auto const& header = *packet_start;
+    DPRINT << "PKT: cmd_t:" << (uint32_t) packet_start->command_type <<
+        ", csnd_t:" << (uint32_t) packet_start->chip_send_type <<
+        ", nsnd_t:" << (uint32_t) packet_start->noc_send_type << "\n";
+    print_pkt_hdr_routing_fields(packet_start);
+    print_pkt_header_noc_fields(packet_start);
+}
+
+
+// Since we unicast to local, we must omit the packet header
+void execute_chip_unicast_to_local_chip(volatile tt::fabric::PacketHeader *const packet_start) {
+    auto const& header = *packet_start;
+    uint32_t payload_start_address = reinterpret_cast<size_t>(packet_start) + sizeof(tt::fabric::PacketHeader);
+
+    tt::fabric::CommandType command_type = packet_start->command_type;
+    tt::fabric::NocSendType noc_send_type = packet_start->noc_send_type;
+    switch (command_type) {
+        case tt::fabric::CommandType::WRITE: {
+            switch (noc_send_type) {
+                case tt::fabric::NocSendType::NOC_UNICAST: {
+                    auto const dest_address = get_noc_addr(
+                        header.command_fields.unicast_write.noc_x,
+                        header.command_fields.unicast_write.noc_y,
+                        header.command_fields.unicast_write.address);
+                    auto const size = header.command_fields.unicast_write.size - sizeof(tt::fabric::PacketHeader);
+                    write_unicast_blocking(payload_start_address, dest_address, size);
+
+                }break;
+                case tt::fabric::NocSendType::NOC_MULTICAST: {
+                    // TODO: confirm if we need to adjust dest core count if we span eth or dram cores
+                    auto const mcast_dest_address = get_noc_multicast_addr(
+                        header.command_fields.mcast_write.noc_x_start,
+                        header.command_fields.mcast_write.noc_y_start,
+                        header.command_fields.mcast_write.noc_x_start + header.command_fields.mcast_write.mcast_rect_size_x,
+                        header.command_fields.mcast_write.noc_y_start + header.command_fields.mcast_write.mcast_rect_size_y,
+                        header.command_fields.mcast_write.address);
+                    auto const num_dests = header.command_fields.mcast_write.mcast_rect_size_x * header.command_fields.mcast_write.mcast_rect_size_y;
+                    auto const size = header.command_fields.mcast_write.size - sizeof(tt::fabric::PacketHeader);
+                    noc_async_write_multicast_one_packet(payload_start_address, mcast_dest_address, size, num_dests);
+                    noc_async_writes_flushed();
+
+                }break;
+                default: {
+                    ASSERT(false);
+                }
+            }
+            break;
+        }
+        case tt::fabric::CommandType::ATOMIC_INC: {
+            switch (noc_send_type) {
+                case tt::fabric::NocSendType::NOC_UNICAST: {
+                    auto const dest_address = get_noc_addr(
+                        header.command_fields.unicast_seminc.noc_x,
+                        header.command_fields.unicast_seminc.noc_y,
+                        header.command_fields.unicast_seminc.address);
+                    auto const increment = header.command_fields.unicast_seminc.val;
+                    noc_semaphore_inc(dest_address, increment);
+
+                }break;
+                case tt::fabric::NocSendType::NOC_MULTICAST: {
+                    ASSERT(false);
+                    // noc_async_write(payload_start_address, header.dest_address, header.size_bytes);
+
+                }break;
+                default: {
+                    ASSERT(false);
+                }
+            }
+            break;
+
+        };
+
+        default: {
+            ASSERT(false);
+        }
+    };
+}
+
+
+
+void update_packet_header_for_next_hop(volatile tt::fabric::PacketHeader * packet_header) {
+    switch (packet_header->chip_send_type) {
+        case tt::fabric::CHIP_UNICAST: {
+            packet_header->routing_fields.chip_unicast.distance_in_hops--;
+        } break;
+        case tt::fabric::CHIP_MULTICAST: {
+            if (packet_header->routing_fields.chip_mcast.start_distance_in_hops == 0) {
+                packet_header->routing_fields.chip_mcast.range_hops--;
+            } else {
+                packet_header->routing_fields.chip_mcast.start_distance_in_hops--;
+            }
+        } break;
+    }
+}
+
+// This function forwards a packet to the downstream EDM channel for eventual sending
+// to the next chip in the line/ring
+//
+// Modifies the packet header (decrements hop counts) so ...
+//
+// !!!WARNING!!!
+// !!!WARNING!!! do NOT call before determining if the packet should be consumed locally or forwarded
+// !!!WARNING!!!
+tt::fabric::SendStatus forward_payload_to_downstream_edm(
+    volatile tt::fabric::PacketHeader *packet_header,
+    tt::fabric::WorkerToFabricEdmSender &downstream_edm_interface
+    ) {
+    // SHOULD BE ABLE TO ASSERT ON THIS SINCE WE CHECK FOR THIS IN THE CALLER
+    // TODO: PERF
+    bool safe_to_send = downstream_edm_interface.consumer_has_space();
+    if (!safe_to_send) {
+        return tt::fabric::SendStatus::NOT_SENT;
+    }
+
+    // print_pkt_header(packet_header);
+    update_packet_header_for_next_hop(packet_header);
+
+    downstream_edm_interface.send_payload_blocking_from_address(
+        reinterpret_cast<size_t>(packet_header),
+        packet_header->get_payload_size_including_header());
+
+    return tt::fabric::SendStatus::SENT_PAYLOAD_AND_SYNC;
+}
+
+void execute_chip_multicast_to_local_chip(volatile tt::fabric::PacketHeader *const packet_start) {
+    ASSERT(false);
+}
+
+bool packet_must_be_consumed_locally(tt::fabric::PacketHeader const& packet_header) {
+    switch (packet_header.chip_send_type) {
+        case tt::fabric::ChipSendType::CHIP_UNICAST: {
+            // TODO: does it make more sense to have 0 as the terminating distance or 1?
+            //       depends where we want to do the decrement and what the starting value
+            //       is expected to be for worker
+            //       Maybe at API level we just always decrement by 1 under the hood
+            //       so user can call `fabric_send_packet(payload_addr, size, n_hops=1)
+            return packet_header.routing_fields.chip_unicast.distance_in_hops == 0;
+        }
+        case tt::fabric::ChipSendType::CHIP_MULTICAST: {
+            return packet_header.routing_fields.chip_mcast.start_distance_in_hops == 0;
+        }
+        default: {
+            ASSERT(false);
+            return false;
+        }
+    }
+}
+
+
+bool packet_must_be_forwarded_to_next_chip(tt::fabric::PacketHeader const& packet_header) {
+    switch (packet_header.chip_send_type) {
+        case tt::fabric::ChipSendType::CHIP_UNICAST:
+            // TODO: does it make more sense to have 0 as the terminating distance or 1?
+            //       depends where we want to do the decrement and what the starting value
+            //       is expected to be for worker
+            //       Maybe at API level we just always decrement by 1 under the hood
+            //       so user can call `fabric_send_packet(payload_addr, size, n_hops=1)
+            return packet_header.routing_fields.chip_unicast.distance_in_hops != 0;
+
+        case tt::fabric::ChipSendType::CHIP_MULTICAST:
+            return packet_header.routing_fields.chip_mcast.range_hops != 0;
+
+        default:
+            ASSERT(false);
+            return false;
+    }
+}
diff --git a/ttnn/cpp/ttnn/operations/ccl/kernels/edm_fabric/fabric_edm_types.hpp b/ttnn/cpp/ttnn/operations/ccl/kernels/edm_fabric/fabric_edm_types.hpp
new file mode 100644
index 00000000000..2366c8758de
--- /dev/null
+++ b/ttnn/cpp/ttnn/operations/ccl/kernels/edm_fabric/fabric_edm_types.hpp
@@ -0,0 +1,56 @@
+// SPDX-FileCopyrightText: © 2024 Tenstorrent Inc.
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#pragma once
+
+#include "ttnn/cpp/ttnn/operations/ccl/shared_with_host/hetergeneous_data_structs.hpp"
+#include <cstdint>
+
+namespace tt::fabric {
+enum BlockingMode: uint8_t {
+    //
+    BUSY_WAIT_BLOCKING,
+
+    // will wait and allow context switching
+    CTX_SWITCH_BLOCKING,
+
+    // function will early exist if not able to send
+    NON_BLOCKING
+};
+
+enum SendStatus : uint8_t {
+    // Indicates that the sender was able to send the payload
+    // but was not able to send the channel_sync_t at the end of the
+    // buffer
+    //
+    // This enum should only ever be returned if we are sending less than
+    // a full packet/buffer of data AND when we are trying to send the
+    // channel_sync_t at the end of the buffer (which must be as a separate
+    // command) but the eth_tx_cmd_q is busy for that second message
+    //
+    // Receiving this value indicates we
+    // MUST:
+    // - Eventually send the channel_sync_t before advancing to the next buffer
+    // MUST NOT:
+    // - Advance to the next buffer index
+    // - Forward the other sender channel's data (if it has any)
+    SENT_PAYLOAD_ONLY,
+
+    // Indicates both the payload and the channel sync were sent successfully
+    SENT_PAYLOAD_AND_SYNC,
+
+    // Indicates no data was sent because the eth_tx_cmd_q was busy
+    NOT_SENT,
+
+    ERROR,
+};
+
+struct EDMChannelWorkerLocationInfo {
+    uint32_t worker_semaphore_address;
+    ttnn::ccl::WorkerXY worker_xy;
+};
+
+static_assert(sizeof(EDMChannelWorkerLocationInfo) <= 16);
+
+}  // namespace tt::fabric
diff --git a/ttnn/cpp/ttnn/operations/ccl/kernels/edm_fabric/fabric_erisc_datamover.cpp b/ttnn/cpp/ttnn/operations/ccl/kernels/edm_fabric/fabric_erisc_datamover.cpp
new file mode 100644
index 00000000000..244b327a7ec
--- /dev/null
+++ b/ttnn/cpp/ttnn/operations/ccl/kernels/edm_fabric/fabric_erisc_datamover.cpp
@@ -0,0 +1,897 @@
+// SPDX-FileCopyrightText: © 2024 Tenstorrent Inc.
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#include <array>
+#include <cstddef>
+#include <cstdint>
+
+#include "dataflow_api.h"
+#include "tt_metal/hw/inc/ethernet/dataflow_api.h"
+#include "ttnn/cpp/ttnn/operations/ccl/kernels/edm/edm_handshake.hpp"
+#include "ttnn/cpp/ttnn/operations/ccl/kernels/edm_fabric/edm_fabric_worker_adapters.hpp"
+#include "ttnn/cpp/ttnn/operations/ccl/kernels/edm_fabric/fabric_edm_packet_header.hpp"
+#include "ttnn/cpp/ttnn/operations/ccl/kernels/edm_fabric/fabric_edm_packet_header_validate.hpp"
+#include "ttnn/cpp/ttnn/operations/ccl/kernels/edm_fabric/fabric_edm_packet_transmission.hpp"
+#include "ttnn/cpp/ttnn/operations/ccl/kernels/edm_fabric/fabric_erisc_datamover_channels.hpp"
+#include "ttnn/cpp/ttnn/operations/ccl/shared_with_host/hetergeneous_data_structs.hpp"
+
+using ttnn::ccl::WorkerXY;
+
+/*
+
+The fabric Erisc Data Mover (EDM) is a component that can be used to build *very* simple linear topology fabrics.
+One of these EDMs can be instantiated on each ethernet link. It is built from 3 "channels" (though the definition
+of channel here is a little loose since two of the 3 will merge traffic, so this setup could be interpreted as a
+two channel setup.). This EDM implements packet based packets only - concepts like sockets are not supported.
+
+## EDM Structure
+
+There are two sender channels and one receiver channel. "Sender" and "receiver" are relative to the Ethernet link,
+not the chip. Sender sends over the link and receiver receives from the link.
+
+Each sender channel serves a different purpose:
+- Sender channel 0 : Accepts packets from a workers on the local chip
+- Sender channel 1: accepts packets from an upstream EDM (i.e. an upstream
+  EDM receiver channel on the same chip but different core)
+
+The receiver channel accepts packets from the Ethernet link and can do one (or both) of:
+- Write the packet to local chhip if it is the intended destination (unicast or mcast)
+- Forward the packet to the next chip in the line if:
+  - Unicast and not the target chip
+  - Multicast and this chip is in the multicast target range
+
+Sender channels will merge traffic into the remote EDM's receiver channel.
+
+Below is a diagram that shows how EDMs can be connected over an ethernet link. In this case, the two
+EDM kernels are run on separate, but connected ethernet link cores.
+
+ ┌───────────────────────┐           ┌───────────────────────┐
+ │    Sender Channel 0   │           │    Receiver Channel   │
+ │   ┌────────────────┐  │           │   ┌────────────────┐  │
+ │   │                ┼──┼───┬───────┼───►                │  │
+ │   │                │  │   │       │   │                │  │
+ │   └────────────────┘  │   │       │   └────────────────┘  │
+ │    Sender Channel 1   │   │       │    Sender Channel 1   │
+ │   ┌────────────────┐  │   │       │   ┌────────────────┐  │
+ │   │                ┼──┼───┘       │   │                │  │
+ │   │                │  │         ┌─┼───┼                │  │
+ │   └────────────────┘  │         │ │   └────────────────┘  │
+ │    Receiver Channel   │         │ │    Sender Channel 0   │
+ │   ┌────────────────┐  │         │ │   ┌────────────────┐  │
+ │   │                │  │         │ │   │                │  │
+ │   │                ◄──┼─────────┴─┼───┼                │  │
+ │   └────────────────┘  │           │   └────────────────┘  │
+ │                       │           │                       │
+ │                       │           │                       │
+ └───────────────────────┘           └───────────────────────┘
+
+
+## Building a "Fabric"
+
+At present, only linear topologies are supported, and one per ethernet link along that given line.
+Below shows the intended connectivity of EDMs across chips in a hypothetical 3-chip fabric. For longer
+lines, the pattern would be extended.
+
+           CHIP 0                              CHIP 1                             CHIP 2
+     ┌─────────────────┐                ┌─────────────────┐                ┌─────────────────┐
+     │                 │                │                 │                │                 │
+┌────┴─────┐ ▲   ┌─────┴────┐      ┌────┴─────┐ ▲   ┌─────┴────┐      ┌────┴─────┐ ▲   ┌─────┴────┐
+│   EDM    │ │   │   EDM    │      │   EDM    │ │   │   EDM    │      │   EDM    │ │   │   EDM    │
+│ ┌──────┐ │ │   │ ┌──────┐ │      │ ┌──────┐ │ │   │ ┌──────┐ │      │ ┌──────┐ │ │   │ ┌──────┐ │
+│ │ Rx   ┼─┼─┴───┼─► S1   ┼─┼─┬────┼─► Rx   ┼─┼─┴───┼─► S1   ┼─┼┬─────┼─► Rx   ┼─┼─┘   | | S1   │ │
+│ └──────┘ │     │ └──────┘ │ │    │ └──────┘ │     │ └──────┘ ││     │ └──────┘ │     │ └──────┘ │
+│ ┌──────┐ │     │ ┌──────┐ │ │    │ ┌──────┐ │     │ ┌──────┐ ││     │ ┌──────┐ │     │ ┌──────┐ │
+│ │ S0   ◄─┼──┬──┼─► S0   ┼─┼─┘   ┌┼─┼ S0   ◄─┼──┬──┼─► S0   ┼─┼┘    ┌┼─┼ S0   ◄─┼──┬──┼─► S0   │ │
+│ └──────┘ │  │  │ └──────┘ │     ││ └──────┘ │  │  │ └──────┘ │     ││ └──────┘ │  │  │ └──────┘ │
+│ ┌──────┐ │  │  │ ┌──────┐ │     ││ ┌──────┐ │  │  │ ┌──────┐ │     ││ ┌──────┐ │  │  │ ┌──────┐ │
+│ │ S1   | |  │ ┌┼─┼ Rx   ◄─┼─────┴┼─┼ S1   ◄─┼─┐│ ┌┼─┼ Rx   ◄─┼─────┴┼─┼ S1   ◄─┼─┐│ ┌┼─┼ Rx   │ │
+│ └──────┘ │  | |│ └──────┘ │      │ └──────┘ │ └┼─┤│ └──────┘ │      │ └──────┘ │ └┼─┤│ └──────┘ │
+└────┬─────┘  │ │└─────┬────┘      └────┬─────┘  │ │└─────┬────┘      └────┬─────┘  │ │└─────┬────┘
+     │          ▼      │                │          ▼      │                │          ▼      │
+     └─────────────────┘                └─────────────────┘                └─────────────────┘
+
+
+## Connecting Workers to Channels
+
+As mentioned, only one worker can push to a given EDM sender channel at a time. In order to send to an EDM
+sender channel, the worker must establish a connection. The connection protocol is as follows and is started
+by the worker (the EDM is a slave in this protocol).
+
+*NOTE*: If multiple workers try to connect to the same EDM sender channel at the same time, the behavior is undefined.
+*NOTE*: Additionally, if a worker pushes packets to a channel it isn't connected to, behaviour is undefined.
+*NOTE*: Undefined == likely hang
+
+The `WorkerToFabricEdmSender` from `ttnn/cpp/ttnn/operations/ccl/kernels/edm_fabric/edm_fabric_worker_adapters.hpp`
+provides an implementation of the connection protocol. `WorkerToFabricEdmSender` also acts as a wrapper around that
+protocol so workers can simply call `open()` to execute the connection protocol without having to manually reimplement
+for each kernel.
+
+### Protocol
+Worker:
+- Read from EDM sender channel buffer_index address
+  - Required so that the worker knows where to write its first packet (since the channel may already contain packets from
+    a previous connection)
+- Write worker core X/Y (NOC 0 based)
+- Write worker flow control semaphore L1 address
+
+EDM Sender Channel:
+- Check local connection valid semaphore for new established connection
+  - When the connection semaphore indicates an active connection, the channel assumes all other relevant fields were
+    correctly populated by the worker:
+    - Worker core_x (on NOC 0)
+    - Worker core_y (on NOC 0)
+    - Worker flow control semaphore L1 address
+
+
+## Tearing Down Connections
+
+Every worker is required to explicitly teardown its connection with the EDM before terminating. To do this, the worker
+must simply write a `0` to the EDM sender channel's connection semaphore address. As long as the worker has sent all
+of its packets to the EDM before this, then the EDM will guarantee to forward the messages correctly.
+
+At this point, it is safe for another kernel to establish a connection.
+
+## Packet Structure
+
+Workers are responsible for populating packet headers before sending to the EDM. The packet header structure is defined
+in `ttnn/cpp/ttnn/operations/ccl/kernels/edm_fabric/fabric_edm_packet_header.hpp`.
+
+## Channel structure
+
+Each EDM channel is built from one or more buffers. Each buffer is the same size and can hold atmost one packet.
+Neighbouring packets occupy nehighouring buffers - with the exception of the last buffer index. The next packet after a write
+into the last buffer index will wrap around to the first buffer index. Even if packets do not occupy the full buffer, subsequent
+packets will always be written into the next logical buffer. A gap will exist in memory but the EDM will not send that padded data
+(unless it is more performant - which is possible in some special cases)
+
+ Example channel with 8 buffers
+┌───────┬───────┬───────┬───────┬───────┬───────┬───────┬───────┐
+│       │       │       │       │       │       │       │       │
+│       │       │       │       │       │       │       │       │
+└───────┴───────┴───────┴───────┴───────┴───────┴───────┴───────┘
+ buf 0   buf 1   buf 2   buf 3   buf 4   buf 5   buf 6   buf 7
+
+
+Here we have an example of a channel with 4 buffers, filled with some number of packets. Each packet is a different size.
+Packets 0, 2, and 3 are smaller than the full buffer size, while packet 1 is the full buffer size.
+
+┌───────────────┬───────────────┬───────────────┬───────────────┐
+│H|Payload| / / │H|Payload      │H|Pyld| / / / /│H|Payload  |/ /│
+│ |       |/ / /│ |             │ |    |/ / / / │ |         | / │
+└───────────────┴───────────────┴───────────────┴───────────────┘
+  buf 0           buf 1           buf 2           buf 3
+
+
+A detail of the channel structure is omitted from the above diagram, namely the EDM <-> EDM flow control region for each buffer.
+Each buffer really looks something like this:
+
+
+             &header->  |----------------| channel_base_address
+                        |    header      |
+            &payload->  |----------------|
+                        |                |
+                        |    payload     |
+                        |                |
+       &channel_sync->  |----------------|
+                        |  channel_sync  |  // This is new
+                        ------------------
+
+The "channel_sync" is an `eth_channel_sync_t` and is internal to the EDM implementation and is used to indicate packet
+transmission state between sender and receiver EDMs.
+
+The protocol for its use is:
+1) Sender updates the field indicating new data:
+   - set `bytes_sent` to a non-zero value indicating new data
+   - clear `receiver_ack` to 0
+   - set `src_id` to the sender channel id so the receiver knows who the sender was (and where the ack should go)
+2) Sender sends this channel sync to the corresponding location in the receiver channel (either in the same transmission
+   as the packet or separately)
+3) Receiver sees that `bytes_sent` is non-zero, indicating a new packet. It sends back an acknowledgement (first level):
+   - set `receiver_ack` to non-zero
+   *NOTE* IMPORTANT: To avoid a race, the receiver must be sure to send its channel_sync_t from a different address it uses
+   as for the second level acknowledgement
+   3b) When sender receives an ack, it understands it can overwrite its local copy of the packet with new data
+4) After receiver properly writes out its packet, it sends a second level acknowledgement, indicating it can receive new
+   data into this specific buffer index:
+   - clear the bytes_sent and receiver_ack fields and send back the `channel_sync` to the sender
+
+
+
+## Sending Packets
+Sending a packet is done as follows:
+
+1) Worker waits for flow control semaphore increment from EDM sender channel
+  - Indicates there is space at the next buffer index for a packet
+2) Worker performs a noc write of its packet to the EDM sender channel at the buffer index
+
+*NOTE*: !!!ALL PACKETS MUST CONTAIN DESTINATION NOC X/Y AS NOC 0 COORDINATES, REGARDLESS OF THE `noc_index` OF THE SENDER!!!
+
+*/
+
+////////////////////////////////////////////////
+// Data structures, types, enums, and constants
+////////////////////////////////////////////////
+
+enum SenderState : uint8_t {
+    SENDER_DONE = 0,
+
+    // we are ready to tell the worker(s) that the buffer is available for writing into
+    SENDER_SIGNALING_WORKER,
+
+    // we are waiting for the payload to arrive in L1; we are checking local semaphore for worker
+    // completion
+    SENDER_WAITING_FOR_WORKER,
+
+    // this state is enterred if the sender was able to send the payload but not the channel sync
+    SENDER_SEND_CHANNEL_SYNC,
+
+    // Sender channel is not connected to a worker and is waiting for a new connection
+    SENDER_WAIT_WORKER_HANDSHAKE,
+
+    // means we are waiting for ack from receiver that payload was received
+    SENDER_WAITING_FOR_ETH,
+
+};
+
+enum ReceiverState : uint8_t {
+    RECEIVER_DONE = 0,
+
+    // Receiver is processing the packet, either writing it locally or forwarding to the next EDM
+    // (toward next chip), or both
+    RECEIVER_SENDING_PAYLOAD,
+
+    // Enter this state after performing writes of the current packet as a sort of soft barrier
+    // (for this channel only) so we can make progress on other channels while waiting for the
+    // writes to flush
+    RECEIVER_WAITING_FOR_WRITE_FLUSH,
+
+    // means we are waitinf for a payload from sender
+    RECEIVER_WAITING_FOR_ETH,
+};
+
+
+enum PacketLocalForwardType : uint8_t {
+    PACKET_FORWARD_INVALID = 0x0,
+    PACKET_FORWARD_LOCAL_ONLY = 0x1,
+    PACKET_FORWARD_REMOTE_ONLY = 0x2,
+    PACKET_FORWARD_LOCAL_AND_REMOTE = 0x3
+};
+
+static constexpr uint32_t SWITCH_INTERVAL = 4000;
+static constexpr size_t ETH_BYTES_TO_WORDS_SHIFT = 4;
+static constexpr size_t NUM_SENDER_CHANNELS = 2;
+static constexpr size_t num_workers_ctor = 1;
+static constexpr size_t num_messages_to_move_ctor_value = 1;
+// Doesn't REALLY matter but for consistency I picked the next available ID
+static constexpr size_t receiver_channel_id = NUM_SENDER_CHANNELS;
+static constexpr size_t worker_info_offset_past_connection_semaphore = 32;
+
+/////////////////////////////////////////////
+//   SENDER SIDE HELPERS
+/////////////////////////////////////////////
+
+FORCE_INLINE void sender_notify_workers_if_buffer_available_sequence(
+    tt::fabric::EdmChannelWorkerInterface &local_sender_worker_interface) {
+    local_sender_worker_interface.clear_local_semaphore();
+    local_sender_worker_interface.increment_worker_semaphore();
+}
+
+template <uint8_t SENDER_NUM_BUFFERS, uint8_t RECEIVER_NUM_BUFFERS>
+void send_channel_sync(
+    tt::fabric::EthChannelBuffer<SENDER_NUM_BUFFERS> &sender_buffer_channel,
+    tt::fabric::EthChannelBuffer<RECEIVER_NUM_BUFFERS> &receiver_buffer_channel) {
+
+    eth_send_bytes_over_channel_payload_only_unsafe(
+        reinterpret_cast<size_t>(sender_buffer_channel.get_current_bytes_sent_address()),
+        reinterpret_cast<size_t>(receiver_buffer_channel.get_current_bytes_sent_address()),
+        sizeof(eth_channel_sync_t),
+        sizeof(eth_channel_sync_t),
+        sizeof(eth_channel_sync_t) >> ETH_BYTES_TO_WORDS_SHIFT);
+}
+
+template <uint8_t SENDER_NUM_BUFFERS, uint8_t RECEIVER_NUM_BUFFERS>
+tt::fabric::SendStatus send_next_data(
+    tt::fabric::EthChannelBuffer<SENDER_NUM_BUFFERS> &sender_buffer_channel,
+    tt::fabric::EthChannelBuffer<RECEIVER_NUM_BUFFERS> &receiver_buffer_channel) {
+
+    auto status = tt::fabric::SendStatus::NOT_SENT;
+
+    ASSERT(!eth_txq_is_busy());
+
+    status = tt::fabric::SendStatus::SENT_PAYLOAD_AND_SYNC;
+    ASSERT(
+        reinterpret_cast<size_t>(sender_buffer_channel.get_current_bytes_sent_address()) ==
+        (reinterpret_cast<size_t>(sender_buffer_channel.get_current_buffer_address()) +
+         reinterpret_cast<size_t>(sender_buffer_channel.get_current_max_eth_payload_size()) -
+         (uint32_t)sizeof(eth_channel_sync_t)));
+    *sender_buffer_channel.get_current_bytes_sent_address() = sender_buffer_channel.get_current_max_eth_payload_size();
+    *sender_buffer_channel.get_current_bytes_acked_address() = 0;
+    *sender_buffer_channel.get_current_src_id_address() = sender_buffer_channel.get_id();
+    ASSERT(*sender_buffer_channel.get_current_src_id_address() < 2);
+
+    // TODO: TUNING - experiment with only conditionally breaking the transfer up into multiple packets if we are
+    //       a certain threshold less than full packet
+    //       we can precompute this value even on host and pass it in so we can get away with a single integer
+    //       compare
+    //       NOTE: if we always send full packet, then we don't need the second branch below dedicated for
+    //             channel sync
+    ASSERT(tt::fabric::is_valid(*const_cast<tt::fabric::PacketHeader *>(reinterpret_cast<volatile tt::fabric::PacketHeader *>(sender_buffer_channel.get_current_buffer_address()))));
+    const size_t payload_size = sender_buffer_channel.get_current_payload_plus_channel_sync_size();
+    eth_send_bytes_over_channel_payload_only_unsafe(
+        sender_buffer_channel.get_current_buffer_address(),
+        receiver_buffer_channel.get_current_buffer_address(),  // get_remote_eth_buffer_address(),
+        payload_size,
+        payload_size,
+        payload_size >> ETH_BYTES_TO_WORDS_SHIFT);
+
+    bool sent_payload_and_channel_sync_in_one_shot =
+        payload_size == sender_buffer_channel.get_channel_buffer_max_size_in_bytes();
+    if (!sent_payload_and_channel_sync_in_one_shot) {
+        // We weren't able to send the channel_sync_t in one shot with the payload so we need to send a second
+        // packet
+        // TODO: TUNING - consider busy waiting for a maximum amount of time
+        if (!eth_txq_is_busy()) {
+            send_channel_sync(sender_buffer_channel, receiver_buffer_channel);
+        } else {
+            status = tt::fabric::SendStatus::SENT_PAYLOAD_ONLY;
+        }
+    }
+
+    // Note: We can only advance to the next buffer index if we have fully completed the send (both the payload and sync
+    // messages)
+    if (status == tt::fabric::SendStatus::SENT_PAYLOAD_AND_SYNC) {
+        sender_buffer_channel.advance_buffer_index();
+        receiver_buffer_channel.advance_buffer_index();
+    }
+
+    return status;
+}
+
+template <uint8_t SENDER_NUM_BUFFERS, uint8_t RECEIVER_NUM_BUFFERS>
+FORCE_INLINE bool sender_noc_receive_payload_ack_check_sequence(
+    tt::fabric::EthChannelBuffer<SENDER_NUM_BUFFERS> &sender_buffer_channel,
+    tt::fabric::EthChannelBuffer<RECEIVER_NUM_BUFFERS> &receiver_buffer_channel) {
+    return sender_buffer_channel.is_local_semaphore_full();
+}
+
+template <uint8_t SENDER_NUM_BUFFERS>
+FORCE_INLINE void sender_eth_check_receiver_ack_sequence(
+    tt::fabric::EthChannelBuffer<SENDER_NUM_BUFFERS> &sender_buffer_channel,
+    tt::fabric::EdmChannelWorkerInterface &sender_worker_interface) {
+    sender_buffer_channel.eth_clear_sender_channel_ack();
+
+    sender_notify_workers_if_buffer_available_sequence(sender_worker_interface);
+}
+
+/////////////////////////////////////////////
+//   RECEIVER SIDE HELPERS
+/////////////////////////////////////////////
+
+template <uint8_t RECEIVER_NUM_BUFFERS>
+FORCE_INLINE bool new_unacknowledged_packet_avilable_on_reciever_channel(
+    tt::fabric::EthChannelBuffer<RECEIVER_NUM_BUFFERS> &local_receiver_channel) {
+    return local_receiver_channel.eth_bytes_are_available_on_channel();
+}
+
+/*
+ * Acting the receiver, we are looking at our receiver channel and acking the sender who sent us the latest packet.
+ * Doesn't check to see if indeed a new message is available. It's assumed the caller has handled that separately.
+ */
+// MUST CHECK !is_eth_txq_busy() before calling
+template <size_t NUM_SENDER_CHANNELS, uint8_t SENDER_NUM_BUFFERS, uint8_t RECEIVER_NUM_BUFFERS>
+void receiver_send_received_ack(
+    std::array<tt::fabric::EthChannelBuffer<SENDER_NUM_BUFFERS>, NUM_SENDER_CHANNELS> &remote_sender_channels,
+    tt::fabric::EthChannelBuffer<RECEIVER_NUM_BUFFERS> &local_receiver_buffer_channel) {
+    // Set the acknowledgement bits. We have a different location than the
+
+    const auto src_id = *local_receiver_buffer_channel.get_current_src_id_address();
+    ASSERT(src_id < NUM_SENDER_CHANNELS);
+    auto &sender_buffer_channel = remote_sender_channels[src_id];
+    ASSERT(
+        reinterpret_cast<size_t>(sender_buffer_channel.get_current_bytes_sent_address()) ==
+        reinterpret_cast<size_t>(sender_buffer_channel.get_current_buffer_address()) +
+            reinterpret_cast<size_t>(sender_buffer_channel.get_current_max_eth_payload_size()) -
+            sizeof(eth_channel_sync_t));
+
+    const size_t local_ack_channel_sync_src_addr =
+        local_receiver_buffer_channel.get_eth_transaction_ack_word_addr() + (src_id * sizeof(eth_channel_sync_t));
+    reinterpret_cast<volatile eth_channel_sync_t *>(local_ack_channel_sync_src_addr)->bytes_sent =
+        *local_receiver_buffer_channel.get_current_bytes_sent_address();
+    reinterpret_cast<volatile eth_channel_sync_t *>(local_ack_channel_sync_src_addr)->receiver_ack = 1;
+    reinterpret_cast<volatile eth_channel_sync_t *>(local_ack_channel_sync_src_addr)->src_id =
+        *local_receiver_buffer_channel.get_current_src_id_address();
+
+    // Make sure we don't alias the erisc_info eth_channel_sync_t
+    ASSERT(
+        reinterpret_cast<volatile eth_channel_sync_t *>(local_receiver_buffer_channel.get_current_bytes_sent_address())
+            ->bytes_sent != 0);
+    ASSERT(
+        reinterpret_cast<volatile eth_channel_sync_t *>(local_receiver_buffer_channel.get_current_bytes_sent_address())
+            ->receiver_ack == 0);
+
+    ASSERT(!eth_txq_is_busy());
+    internal_::eth_send_packet_unsafe(
+        0,
+        local_ack_channel_sync_src_addr >> 4,
+        ((uint32_t)(sender_buffer_channel.get_current_bytes_sent_address())) >> 4,
+        1);
+}
+
+// MUST CHECK !is_eth_txq_busy() before calling
+template <size_t NUM_SENDER_CHANNELS, uint8_t SENDER_NUM_BUFFERS, uint8_t RECEIVER_NUM_BUFFERS>
+FORCE_INLINE void receiver_send_completion_ack(
+    std::array<tt::fabric::EthChannelBuffer<SENDER_NUM_BUFFERS>, NUM_SENDER_CHANNELS> &remote_sender_channels,
+    tt::fabric::EthChannelBuffer<RECEIVER_NUM_BUFFERS> &local_receiver_buffer_channel) {
+    volatile auto local_bytes_sent_addr = local_receiver_buffer_channel.get_current_bytes_sent_address();
+    volatile auto local_src_id_ptr = local_receiver_buffer_channel.get_current_src_id_address();
+
+    auto src_sender_channel = *local_src_id_ptr;
+    *(local_bytes_sent_addr) = 0;
+    *(local_receiver_buffer_channel.get_current_bytes_acked_address()) = 0;
+    ASSERT(src_sender_channel < NUM_SENDER_CHANNELS);
+
+    ASSERT(!eth_txq_is_busy());
+    internal_::eth_send_packet_unsafe(
+        0,
+        (uint32_t)(local_bytes_sent_addr) >> 4,
+        (uint32_t)(remote_sender_channels[src_sender_channel].get_current_bytes_sent_address()) >> 4,
+        1);
+
+    local_receiver_buffer_channel.advance_buffer_index();
+    remote_sender_channels[src_sender_channel].advance_buffer_index();
+}
+
+
+PacketLocalForwardType get_packet_local_forward_type(const tt::fabric::PacketHeader &packet_header) {
+    const bool local_chip_is_packet_destination = packet_must_be_consumed_locally(packet_header);
+    const bool packet_needs_forwarding = packet_must_be_forwarded_to_next_chip(packet_header);
+    PacketLocalForwardType forward_type =
+        static_cast<PacketLocalForwardType>(packet_needs_forwarding << 1 | local_chip_is_packet_destination);
+    return forward_type;
+}
+
+FORCE_INLINE bool can_forward_packet_completely(
+    const tt::fabric::PacketHeader &packet_header, tt::fabric::WorkerToFabricEdmSender &downstream_edm_interface) {
+    auto forward_status = get_packet_local_forward_type(packet_header);
+    bool can_send = true;
+    switch (forward_status) {
+        case PACKET_FORWARD_INVALID: return false;
+        case PACKET_FORWARD_LOCAL_ONLY: return true;
+
+        case PACKET_FORWARD_REMOTE_ONLY:
+        case PACKET_FORWARD_LOCAL_AND_REMOTE: return downstream_edm_interface.consumer_has_space();
+        default: ASSERT(false); return false;
+    };
+}
+
+// template <uint8_t NUM_BUFFERS>
+tt::fabric::SendStatus receiver_forward_packet(
+    volatile tt::fabric::PacketHeader *packet_start, tt::fabric::WorkerToFabricEdmSender &downstream_edm_interface) {
+    // Just cache the packet_header - we don't really expect (or care) if contents change during this function.
+    tt::fabric::PacketHeader const &packet_header = *const_cast<tt::fabric::PacketHeader *const>(packet_start);
+    ASSERT(tt::fabric::is_valid(packet_header));
+    auto forward_status = get_packet_local_forward_type(packet_header);
+
+    switch (forward_status) {
+        case PACKET_FORWARD_LOCAL_ONLY: {
+            execute_chip_unicast_to_local_chip(packet_start);
+            return tt::fabric::SendStatus::SENT_PAYLOAD_AND_SYNC;
+        } break;
+
+        case PACKET_FORWARD_REMOTE_ONLY: {
+            return forward_payload_to_downstream_edm(packet_start, downstream_edm_interface);
+        } break;
+
+        case PACKET_FORWARD_LOCAL_AND_REMOTE: {
+            ASSERT(packet_header.chip_send_type == tt::fabric::ChipSendType::CHIP_MULTICAST);
+            // TODO: make local chip write non-blocking
+            execute_chip_unicast_to_local_chip(packet_start);
+            return forward_payload_to_downstream_edm(packet_start, downstream_edm_interface);
+        } break;
+
+        case PACKET_FORWARD_INVALID:
+        default: ASSERT(false); return tt::fabric::SendStatus::ERROR;
+    };
+}
+
+////////////////////////////////////
+////////////////////////////////////
+//  Main Control Loop
+////////////////////////////////////
+////////////////////////////////////
+template <uint8_t RECEIVER_NUM_BUFFERS, uint8_t SENDER_NUM_BUFFERS>
+bool run_sender_channel_state_machine_step(
+    tt::fabric::EthChannelBuffer<SENDER_NUM_BUFFERS> &local_sender_channel,
+    tt::fabric::EdmChannelWorkerInterface &local_sender_channel_worker_interface,
+    tt::fabric::EthChannelBuffer<RECEIVER_NUM_BUFFERS> &remote_receiver_channel,
+    SenderState *const sender_state_out) {
+    bool incr_sender_channel_index = true;
+    switch (*sender_state_out) {
+        case SenderState::SENDER_WAITING_FOR_WORKER: {
+            bool able_to_send = local_sender_channel_worker_interface.has_payload() && !eth_txq_is_busy() &&
+                                local_sender_channel.eth_is_receiver_channel_send_done();
+            if (able_to_send) {
+                auto send_status = send_next_data(local_sender_channel, remote_receiver_channel);
+                // TODO: align the enums and state values so I can just do
+                // sender_states[sender_channel_index] += send_status :)
+                ASSERT(send_status != tt::fabric::SendStatus::ERROR);
+                *sender_state_out =
+                    send_status == tt::fabric::SendStatus::NOT_SENT            ? SenderState::SENDER_WAITING_FOR_WORKER
+                    : send_status == tt::fabric::SendStatus::SENT_PAYLOAD_ONLY ? SenderState::SENDER_SEND_CHANNEL_SYNC
+                                                                               : SenderState::SENDER_WAITING_FOR_ETH;
+                // Avoid any sort of starvation/bubbles so we only advance if we've sent the packet and channel sync
+                // otherwise what can happen is we could start sending another large payload from the other channel
+                // and not be able to send the channel sync for the packet we just sent, which overall negatively
+                // impact latency
+                incr_sender_channel_index = send_status != tt::fabric::SendStatus::SENT_PAYLOAD_ONLY;
+            } else {
+                if (local_sender_channel_worker_interface.has_worker_teardown_request()) {
+                    local_sender_channel_worker_interface.teardown_connection();
+                    *sender_state_out = SenderState::SENDER_WAIT_WORKER_HANDSHAKE;
+                }
+            }
+        } break;
+
+        case SenderState::SENDER_WAIT_WORKER_HANDSHAKE:
+            if (local_sender_channel_worker_interface.connection_is_live()) {
+                bool is_safe_to_receive_next_message = local_sender_channel.eth_is_receiver_channel_send_acked() ||
+                                                       local_sender_channel.eth_is_receiver_channel_send_done();
+                if (is_safe_to_receive_next_message) {
+                    sender_notify_workers_if_buffer_available_sequence(local_sender_channel_worker_interface);
+                    *sender_state_out = SenderState::SENDER_WAITING_FOR_WORKER;
+                } else {
+                    *sender_state_out = SenderState::SENDER_WAITING_FOR_ETH;
+                }
+            }
+            break;
+
+        case SenderState::SENDER_SEND_CHANNEL_SYNC: {
+            bool can_send_channel_sync_without_blocking = !eth_txq_is_busy();
+            if (can_send_channel_sync_without_blocking) {
+                send_channel_sync(local_sender_channel, remote_receiver_channel);
+                local_sender_channel.advance_buffer_index();
+                remote_receiver_channel.advance_buffer_index();
+                *sender_state_out = SenderState::SENDER_WAITING_FOR_ETH;
+            }
+        } break;
+
+        case SenderState::SENDER_WAITING_FOR_ETH: {
+            bool is_safe_to_receive_next_message = local_sender_channel.eth_is_receiver_channel_send_acked() ||
+                                                   local_sender_channel.eth_is_receiver_channel_send_done();
+            if (is_safe_to_receive_next_message) {
+                // This also notifies workers in the same call
+                sender_eth_check_receiver_ack_sequence(local_sender_channel, local_sender_channel_worker_interface);
+                *sender_state_out = SenderState::SENDER_WAITING_FOR_WORKER;
+            }
+        } break;
+
+        default: break;
+    };
+
+    return incr_sender_channel_index;
+};
+
+template <size_t RECEIVER_NUM_BUFFERS, size_t SENDER_NUM_BUFFERS, size_t NUM_SENDER_CHANNELS>
+void run_receiver_channel_state_machine_step(
+    tt::fabric::EthChannelBuffer<RECEIVER_NUM_BUFFERS> &local_receiver_channel,
+    std::array<tt::fabric::EthChannelBuffer<SENDER_NUM_BUFFERS>, NUM_SENDER_CHANNELS> &remote_sender_channnels,
+    tt::fabric::WorkerToFabricEdmSender &downstream_edm_interface,
+    ReceiverState *const receiver_state_out) {
+    switch (*receiver_state_out) {
+        case ReceiverState::RECEIVER_WAITING_FOR_ETH: {
+            bool got_payload = local_receiver_channel.eth_bytes_are_available_on_channel();
+            if (got_payload) {
+                bool can_ack = !eth_txq_is_busy();
+                if (can_ack) {
+                    ASSERT(tt::fabric::is_valid(
+                        *const_cast<tt::fabric::PacketHeader *>(local_receiver_channel.get_current_packet_header())));
+                    receiver_send_received_ack(remote_sender_channnels, local_receiver_channel);
+                    // TODO: PERF Need to add feature to let use perform local noc write and defer the forward to EDM
+                    // if we are mcasting to the local chip and neighbours, but the downstream EDM isn't currently able
+                    // to accept the packet
+                    // ...
+                    // but as a starting point we can do the dumb thing and just wait for space downstream
+                    // before we do either.
+                    *receiver_state_out = ReceiverState::RECEIVER_SENDING_PAYLOAD;
+                    // TODO: PERF - SHORT CIRCUIT IF WE CAN TO NESXT STATE TO MINIMIZE LATENCY BUT CURRENTLY
+                    //       A LITTLE CODE SIZE BOUND
+                }
+            }
+        } break;
+
+        case ReceiverState::RECEIVER_SENDING_PAYLOAD: {
+            auto packet_header =
+                *const_cast<tt::fabric::PacketHeader *>(local_receiver_channel.get_current_packet_header());
+            bool can_send_to_all_local_chip_receivers =
+                can_forward_packet_completely(packet_header, downstream_edm_interface);
+            if (can_send_to_all_local_chip_receivers) {
+                receiver_forward_packet(local_receiver_channel.get_current_packet_header(), downstream_edm_interface);
+                *receiver_state_out = ReceiverState::RECEIVER_WAITING_FOR_WRITE_FLUSH;
+            }
+        } break;
+
+        case ReceiverState::RECEIVER_WAITING_FOR_WRITE_FLUSH: {
+            bool writes_flushed = ncrisc_noc_nonposted_writes_sent(noc_index);
+            if (writes_flushed) {
+                bool can_send_ack_without_blocking = !eth_txq_is_busy();
+                if (can_send_ack_without_blocking) {
+                    receiver_send_completion_ack(remote_sender_channnels, local_receiver_channel);
+                    *receiver_state_out = ReceiverState::RECEIVER_WAITING_FOR_ETH;
+                }
+            }
+        } break;
+
+        default: break;
+    };
+};
+
+
+/* Termination signal handling*/
+FORCE_INLINE bool got_immediate_termination_signal(volatile tt::fabric::TerminationSignal *termination_signal_ptr) {
+    return *termination_signal_ptr == tt::fabric::TerminationSignal::IMMEDIATELY_TERMINATE;
+}
+FORCE_INLINE bool got_graceful_termination_signal(volatile tt::fabric::TerminationSignal *termination_signal_ptr) {
+    return *termination_signal_ptr == tt::fabric::TerminationSignal::GRACEFULLY_TERMINATE;
+}
+FORCE_INLINE bool got_termination_signal(volatile tt::fabric::TerminationSignal *termination_signal_ptr) {
+    return got_immediate_termination_signal(termination_signal_ptr) ||
+           got_graceful_termination_signal(termination_signal_ptr);
+}
+
+template <size_t RECEIVER_NUM_BUFFERS, size_t SENDER_NUM_BUFFERS, size_t NUM_SENDER_CHANNELS>
+bool all_channels_drained(tt::fabric::EthChannelBuffer<RECEIVER_NUM_BUFFERS> &local_receiver_channel,
+                          std::array<tt::fabric::EthChannelBuffer<SENDER_NUM_BUFFERS>, NUM_SENDER_CHANNELS> &local_sender_channels) {
+    // Unfortunately have to do this for now instead of only conditionally checking
+    // each undrained channel due to code size issues...
+    return local_sender_channels[0].all_buffers_drained() && local_sender_channels[1].all_buffers_drained() &&
+           local_receiver_channel.all_buffers_drained();
+}
+
+/*
+ * Main control loop for fabric EDM. Run indefinitely until a termination signal is received
+ *
+ * Every loop iteration visit a sender channel and the receiver channel. Switch between sender
+ * channels every iteration unless it is unsafe/undesirable to do so (e.g. for performance reasons).
+ */
+template <size_t RECEIVER_NUM_BUFFERS, size_t SENDER_NUM_BUFFERS, size_t NUM_SENDER_CHANNELS>
+void run_fabric_edm_main_loop(
+    tt::fabric::EthChannelBuffer<RECEIVER_NUM_BUFFERS> &local_receiver_channel,
+    std::array<tt::fabric::EthChannelBuffer<SENDER_NUM_BUFFERS>, NUM_SENDER_CHANNELS> &local_sender_channels,
+    std::array<tt::fabric::EdmChannelWorkerInterface, NUM_SENDER_CHANNELS> &local_sender_channel_worker_interfaces,
+    tt::fabric::WorkerToFabricEdmSender &downstream_edm_noc_interface,
+    std::array<tt::fabric::EthChannelBuffer<SENDER_NUM_BUFFERS>, NUM_SENDER_CHANNELS> &remote_sender_channels,
+    tt::fabric::EthChannelBuffer<RECEIVER_NUM_BUFFERS> &remote_receiver_channel,
+    volatile tt::fabric::TerminationSignal *termination_signal_ptr) {
+
+    std::array<SenderState, NUM_SENDER_CHANNELS> sender_states = {
+        SenderState::SENDER_WAIT_WORKER_HANDSHAKE, SenderState::SENDER_WAIT_WORKER_HANDSHAKE};
+    ReceiverState receiver_state = ReceiverState::RECEIVER_WAITING_FOR_ETH;
+    size_t sender_channel_index = 0;
+    size_t did_nothing_count = 0;
+    *termination_signal_ptr = tt::fabric::TerminationSignal::KEEP_RUNNING;
+
+    while (!got_immediate_termination_signal(termination_signal_ptr)) {
+        if (got_graceful_termination_signal(termination_signal_ptr)) {
+            bool all_drained = all_channels_drained<RECEIVER_NUM_BUFFERS, SENDER_NUM_BUFFERS, NUM_SENDER_CHANNELS>(
+                local_receiver_channel, local_sender_channels);
+
+            if (all_drained) {
+                return;
+            }
+        }
+
+        //     // TODO
+        auto &local_sender_channel = local_sender_channels[sender_channel_index];
+        auto &local_sender_channel_worker_interface = local_sender_channel_worker_interfaces[sender_channel_index];
+        // There are some cases, mainly for performance, where we don't want to switch between sender channels
+        // so we interoduce this to provide finer grain control over when we disable the automatic switching
+        bool incr_sender_channel_index = run_sender_channel_state_machine_step(
+            local_sender_channel,
+            local_sender_channel_worker_interface,
+            remote_receiver_channel,
+            &(sender_states[sender_channel_index]));
+        if (incr_sender_channel_index) {
+            // TODO: this can probably be optimized
+            sender_channel_index = 1 - sender_channel_index;
+        }
+
+        run_receiver_channel_state_machine_step<RECEIVER_NUM_BUFFERS, SENDER_NUM_BUFFERS, NUM_SENDER_CHANNELS>(
+            local_receiver_channel, remote_sender_channels, downstream_edm_noc_interface, &receiver_state);
+
+        if (did_nothing_count++ > SWITCH_INTERVAL) {
+            did_nothing_count = 0;
+            run_routing();
+        }
+    }
+}
+
+void kernel_main() {
+    //
+    // COMMON CT ARGS (not specific to sender or receiver)
+    //
+    static constexpr bool is_handshake_sender = get_compile_time_arg_val(0) != 0;
+    static constexpr size_t handshake_addr = get_compile_time_arg_val(1);
+    *reinterpret_cast<volatile uint32_t*>(handshake_addr) = 0;
+    auto eth_transaction_ack_word_addr = handshake_addr + sizeof(eth_channel_sync_t);
+
+    if constexpr (is_handshake_sender) {
+        erisc::datamover::handshake::sender_side_start(handshake_addr);
+    } else {
+        erisc::datamover::handshake::receiver_side_start(handshake_addr);
+    }
+
+    // the size of one of the buffers within a sender channel
+    // For example if `channel_buffer_size` = 4k, with `SENDER_NUM_BUFFERS` = 2
+    // then the total amount of buffering for that
+    static constexpr size_t channel_buffer_size = get_compile_time_arg_val(2);
+
+    static constexpr size_t SENDER_NUM_BUFFERS = get_compile_time_arg_val(3);
+    static constexpr size_t RECEIVER_NUM_BUFFERS = get_compile_time_arg_val(4);
+    static constexpr size_t local_sender_0_channel_address = get_compile_time_arg_val(5);
+    static constexpr size_t local_sender_channel_0_connection_info_addr = get_compile_time_arg_val(6);
+    static constexpr size_t local_sender_1_channel_address = get_compile_time_arg_val(7);
+    static constexpr size_t local_sender_channel_1_connection_info_addr = get_compile_time_arg_val(8);
+    static constexpr size_t local_receiver_channel_buffer_address = get_compile_time_arg_val(9);
+    static constexpr size_t remote_receiver_channel_buffer_address = get_compile_time_arg_val(10);
+    static constexpr size_t remote_sender_0_channel_address = get_compile_time_arg_val(11);
+    static constexpr size_t remote_sender_1_channel_address = get_compile_time_arg_val(12);
+
+    // TODO: CONVERT TO SEMAPHORE
+    volatile auto termination_signal_ptr =
+        reinterpret_cast<volatile tt::fabric::TerminationSignal *>(get_compile_time_arg_val(13));
+
+    static_assert(SENDER_NUM_BUFFERS > 0, "compile time argument [1]: SENDER_NUM_BUFFERS must be > 0");
+    static_assert(RECEIVER_NUM_BUFFERS > 0, "compile time argument [2]: RECEIVER_NUM_BUFFERS must be > 0");
+
+    size_t arg_idx = 0;
+    ///////////////////////
+    // Common runtime args:
+    ///////////////////////
+
+    const size_t local_sender_channel_0_connection_semaphore_addr =
+        get_semaphore<ProgrammableCoreType::ACTIVE_ETH>(get_arg_val<uint32_t>(arg_idx++));
+    const size_t local_sender_channel_1_connection_semaphore_addr =
+        get_semaphore<ProgrammableCoreType::ACTIVE_ETH>(get_arg_val<uint32_t>(arg_idx++));
+
+    // unused - can later remove
+    const size_t local_sender_channel_0_connection_buffer_index_addr =
+        get_semaphore<ProgrammableCoreType::ACTIVE_ETH>(get_arg_val<uint32_t>(arg_idx++));
+
+    const size_t local_sender_channel_1_connection_buffer_index_addr =
+        get_semaphore<ProgrammableCoreType::ACTIVE_ETH>(get_arg_val<uint32_t>(arg_idx++));
+
+
+    // downstream EDM semaphore location
+    const bool has_downstream_edm_buffer_connection = get_arg_val<uint32_t>(arg_idx++) != 0;
+    const auto downstream_edm_buffer_base_address = get_arg_val<uint32_t>(arg_idx++);
+    const auto downstream_edm_noc_x = get_arg_val<uint32_t>(arg_idx++);
+    const auto downstream_edm_noc_y = get_arg_val<uint32_t>(arg_idx++);
+
+    // remote address for flow control
+    const auto downstream_edm_semaphore_id = get_arg_val<uint32_t>(arg_idx++);  // TODO: Convert to semaphore ID
+    const auto downstream_edm_worker_registration_address =
+        get_semaphore<ProgrammableCoreType::ACTIVE_ETH>(get_arg_val<uint32_t>(arg_idx++));
+    const auto downstream_edm_worker_location_info_address = get_arg_val<uint32_t>(arg_idx++);
+    const auto downstream_noc_interface_buffer_index_local_addr = get_arg_val<uint32_t>(arg_idx++);
+
+    // Receiver channels local semaphore for managing flow control with the downstream EDM.
+    // The downstream EDM should be sending semaphore updates to this address any time it can
+    // accept a new message
+    const auto edm_forwarding_semaphore_address =
+        get_semaphore<ProgrammableCoreType::ACTIVE_ETH>(get_arg_val<uint32_t>(arg_idx++));
+
+    ////////////////////////
+    // Sender runtime args
+    ////////////////////////
+    auto sender0_worker_semaphore_ptr = reinterpret_cast<volatile uint32_t *>(
+        get_semaphore<ProgrammableCoreType::ACTIVE_ETH>(get_arg_val<uint32_t>(arg_idx++)));
+    auto sender1_worker_semaphore_ptr = reinterpret_cast<volatile uint32_t *>(
+        get_semaphore<ProgrammableCoreType::ACTIVE_ETH>(get_arg_val<uint32_t>(arg_idx++)));
+    *sender0_worker_semaphore_ptr = 0;
+    *sender1_worker_semaphore_ptr = 0;
+
+    //////////////////////////////
+    //////////////////////////////
+    //        Object Setup
+    //////////////////////////////
+    //////////////////////////////
+
+    auto const &local_sender_buffer_addresses =
+        std::array<size_t, NUM_SENDER_CHANNELS>{local_sender_0_channel_address, local_sender_1_channel_address};
+    auto const &remote_sender_buffer_addresses =
+        std::array<size_t, NUM_SENDER_CHANNELS>{remote_sender_0_channel_address, remote_sender_1_channel_address};
+    std::array<tt::fabric::EthChannelBuffer<SENDER_NUM_BUFFERS>, NUM_SENDER_CHANNELS> remote_sender_channels;
+    std::array<tt::fabric::EthChannelBuffer<SENDER_NUM_BUFFERS>, NUM_SENDER_CHANNELS> local_sender_channels;
+    std::array<tt::fabric::EdmChannelWorkerInterface, NUM_SENDER_CHANNELS> local_sender_channel_worker_interfaces;
+    std::array<size_t, NUM_SENDER_CHANNELS> local_sender_flow_control_semaphores = {
+        reinterpret_cast<size_t>(sender0_worker_semaphore_ptr), reinterpret_cast<size_t>(sender1_worker_semaphore_ptr)};
+    std::array<size_t, NUM_SENDER_CHANNELS> local_sender_connection_live_semaphore_addresses = {
+        local_sender_channel_0_connection_semaphore_addr, local_sender_channel_1_connection_semaphore_addr};
+    std::array<size_t, NUM_SENDER_CHANNELS> local_sender_connection_info_addresses = {
+        local_sender_channel_0_connection_info_addr, local_sender_channel_1_connection_info_addr};
+    auto downstream_edm_noc_interface =
+        has_downstream_edm_buffer_connection
+            ? tt::fabric::WorkerToFabricEdmSender(
+                  downstream_edm_noc_x,
+                  downstream_edm_noc_y,
+                  downstream_edm_buffer_base_address,
+                  SENDER_NUM_BUFFERS,
+                  downstream_edm_semaphore_id,
+                  downstream_edm_worker_registration_address,  // edm_connection_handshake_addr,
+                  downstream_edm_worker_location_info_address,
+                  channel_buffer_size,
+                  local_sender_channel_1_connection_buffer_index_addr, // our downstream is channel 1
+                  reinterpret_cast<volatile uint32_t *const>(edm_forwarding_semaphore_address),
+                  downstream_noc_interface_buffer_index_local_addr)
+            : tt::fabric::WorkerToFabricEdmSender();
+
+    auto local_receiver_channel = tt::fabric::EthChannelBuffer<RECEIVER_NUM_BUFFERS>(
+        local_receiver_channel_buffer_address,
+        channel_buffer_size,
+        tt::fabric::header_size_bytes,
+        eth_transaction_ack_word_addr,  // Assume for receiver channel, this address points to a chunk of memory that
+                                        // can fit 2 eth_channel_syncs cfor ack
+        receiver_channel_id);
+    auto remote_receiver_channel = tt::fabric::EthChannelBuffer<RECEIVER_NUM_BUFFERS>(
+        remote_receiver_channel_buffer_address,
+        channel_buffer_size,
+        tt::fabric::header_size_bytes,
+        eth_transaction_ack_word_addr,  // Assume for receiver channel, this address points to a chunk of memory that
+                                        // can fit 2 eth_channel_syncs cfor ack
+        receiver_channel_id);
+
+    uint32_t args_offset = 0;
+
+    for (uint8_t i = 0; i < NUM_SENDER_CHANNELS; i++) {
+        new (&local_sender_channels[i]) tt::fabric::EthChannelBuffer<SENDER_NUM_BUFFERS>(
+            local_sender_buffer_addresses[i],
+            channel_buffer_size,
+            tt::fabric::header_size_bytes,
+            0,  // For sender channels there is no eth_transaction_ack_word_addr because they don't send acks
+            i);
+        new (&remote_sender_channels[i]) tt::fabric::EthChannelBuffer<SENDER_NUM_BUFFERS>(
+            remote_sender_buffer_addresses[i],
+            channel_buffer_size,
+            tt::fabric::header_size_bytes,
+            0,  // For sender channels there is no eth_transaction_ack_word_addr because they don't send acks
+            i);
+
+        auto connection_live_semaphore_ptr =
+            reinterpret_cast<volatile tt_l1_ptr uint32_t *const>(local_sender_connection_live_semaphore_addresses[i]);
+        auto connection_worker_info_ptr = reinterpret_cast<volatile tt::fabric::EDMChannelWorkerLocationInfo *>(
+            local_sender_connection_info_addresses[i]);
+        new (&local_sender_channel_worker_interfaces[i]) tt::fabric::EdmChannelWorkerInterface(
+            connection_worker_info_ptr,  // worker_location_info_ptr,
+            reinterpret_cast<volatile tt_l1_ptr uint32_t *const>(
+                local_sender_flow_control_semaphores[i]),  // local_semaphore_address,
+            reinterpret_cast<volatile tt_l1_ptr uint32_t *const>(connection_live_semaphore_ptr));
+    }
+
+    if (has_downstream_edm_buffer_connection) {
+        downstream_edm_noc_interface.open();
+    }
+
+    if constexpr (is_handshake_sender) {
+        erisc::datamover::handshake::sender_side_finish(handshake_addr);
+    } else {
+        erisc::datamover::handshake::receiver_side_finish(handshake_addr);
+    }
+
+    //////////////////////////////
+    //////////////////////////////
+    //        MAIN LOOP
+    //////////////////////////////
+    //////////////////////////////
+    run_fabric_edm_main_loop<RECEIVER_NUM_BUFFERS, SENDER_NUM_BUFFERS, NUM_SENDER_CHANNELS>(
+        local_receiver_channel,
+        local_sender_channels,
+        local_sender_channel_worker_interfaces,
+        downstream_edm_noc_interface,
+        remote_sender_channels,
+        remote_receiver_channel,
+        termination_signal_ptr);
+
+
+    WAYPOINT("DONE");
+}
diff --git a/ttnn/cpp/ttnn/operations/ccl/kernels/edm_fabric/fabric_erisc_datamover_channels.hpp b/ttnn/cpp/ttnn/operations/ccl/kernels/edm_fabric/fabric_erisc_datamover_channels.hpp
new file mode 100644
index 00000000000..ae241fb8599
--- /dev/null
+++ b/ttnn/cpp/ttnn/operations/ccl/kernels/edm_fabric/fabric_erisc_datamover_channels.hpp
@@ -0,0 +1,232 @@
+// SPDX-FileCopyrightText: © 2024 Tenstorrent Inc.
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#pragma once
+
+#include <array>
+#include <cstddef>
+#include <cstdint>
+
+#include "debug/dprint.h"
+#include "tt_metal/hw/inc/dataflow_api.h"
+#include "tt_metal/hw/inc/ethernet/tunneling.h"
+#include "tt_metal/hw/inc/risc_attribs.h"
+#include "ttnn/cpp/ttnn/operations/ccl/kernels/edm_fabric/fabric_edm_packet_header.hpp"
+#include "ttnn/cpp/ttnn/operations/ccl/kernels/edm_fabric/fabric_edm_types.hpp"
+#include "ttnn/cpp/ttnn/operations/ccl/shared_with_host/hetergeneous_data_structs.hpp"
+
+namespace tt::fabric {
+// Increments val and wraps to 0 if it reaches limit
+template <typename T, size_t LIMIT>
+auto wrap_increment(T val) -> T {
+    static_assert(LIMIT != 0, "wrap_increment called with limit of 0; it must be greater than 0");
+    if constexpr (LIMIT == 1) {
+        return val;
+    } else if constexpr (LIMIT == 2) {
+        return 1 - val;
+    } else if constexpr ((LIMIT > 0) && (LIMIT & (LIMIT - 1)) == 0) {
+        return (val + 1) & (LIMIT - 1);
+    } else {
+        return (val == LIMIT - 1) ? 0 : val + 1;
+    }
+}
+
+template <typename T>
+FORCE_INLINE auto wrap_increment(T val, size_t max) {
+    return (val == max - 1) ? 0 : val + 1;
+}
+
+template <uint8_t NUM_BUFFERS>
+class EthChannelBuffer final {
+   public:
+    // The channel structure is as follows:
+    //              &header->  |----------------| channel_base_address
+    //                         |    header      |
+    //             &payload->  |----------------|
+    //                         |                |
+    //                         |    payload     |
+    //                         |                |
+    //        &channel_sync->  |----------------|
+    //                         |  channel_sync  |
+    //                         ------------------
+    EthChannelBuffer() : buffer_size_in_bytes(0), eth_transaction_ack_word_addr(0), max_eth_payload_size_in_bytes(0) {}
+
+    /*
+     * Expected that *buffer_index_ptr is initialized outside of this object
+     */
+    EthChannelBuffer(
+        size_t channel_base_address,
+        size_t buffer_size_bytes,
+        size_t header_size_bytes,
+        size_t eth_transaction_ack_word_addr,  // Assume for receiver channel, this address points to a chunk of memory
+                                               // that can fit 2 eth_channel_syncs cfor ack
+        uint8_t channel_id) :
+        buffer_size_in_bytes(buffer_size_bytes),
+        eth_transaction_ack_word_addr(eth_transaction_ack_word_addr),
+        max_eth_payload_size_in_bytes(buffer_size_in_bytes + sizeof(eth_channel_sync_t)),
+        buff_idx(0),
+        channel_id(channel_id) {
+        for (uint8_t i = 0; i < NUM_BUFFERS; i++) {
+            this->buffer_addresses[i] =
+                channel_base_address + i * this->max_eth_payload_size_in_bytes;  //(this->buffer_size_in_bytes);
+
+            uint32_t channel_sync_addr = this->buffer_addresses[i] + buffer_size_in_bytes;
+            auto channel_sync_ptr = reinterpret_cast<eth_channel_sync_t *>(channel_sync_addr);
+
+            channel_bytes_sent_addresses[i] =
+                reinterpret_cast<volatile tt_l1_ptr size_t *>(&(channel_sync_ptr->bytes_sent));
+            channel_bytes_acked_addresses[i] =
+                reinterpret_cast<volatile tt_l1_ptr size_t *>(&(channel_sync_ptr->receiver_ack));
+            channel_src_id_addresses[i] = reinterpret_cast<volatile tt_l1_ptr size_t *>(&(channel_sync_ptr->src_id));
+
+            ASSERT((uint32_t)channel_bytes_acked_addresses[i] != (uint32_t)(channel_bytes_sent_addresses[i]));
+            *(channel_bytes_sent_addresses[i]) = 0;
+            *(channel_bytes_acked_addresses[i]) = 0;
+            // Note we don't need to overwrite the `channel_src_id_addresses` except for perhapse
+            // debug purposes where we may wish to tag this with a special value
+        }
+    }
+
+    [[nodiscard]] FORCE_INLINE size_t get_current_buffer_address() const {
+        return this->buffer_addresses[this->buffer_index()];
+    }
+
+    [[nodiscard]] FORCE_INLINE volatile PacketHeader *get_current_packet_header() const {
+        return reinterpret_cast<volatile PacketHeader *>(this->buffer_addresses[this->buffer_index()]);
+    }
+
+    [[nodiscard]] FORCE_INLINE size_t get_current_payload_size() const {
+        return get_current_packet_header()->get_payload_size_including_header();
+    }
+    [[nodiscard]] FORCE_INLINE size_t get_current_payload_plus_channel_sync_size() const {
+        return get_current_packet_header()->get_payload_size_including_header() + sizeof(eth_channel_sync_t);
+    }
+
+    // TODO: Split off into two separate functions:
+    //       volatile tt_l1_ptr size_t *get_current_bytes_sent_ptr() const
+    //       size_t get_current_bytes_sent_address() const
+    [[nodiscard]] FORCE_INLINE volatile tt_l1_ptr size_t *get_current_bytes_sent_address() const {
+        return this->channel_bytes_sent_addresses[this->buffer_index()];
+    }
+
+    [[nodiscard]] FORCE_INLINE volatile tt_l1_ptr size_t *get_current_bytes_acked_address() const {
+        return this->channel_bytes_acked_addresses[this->buffer_index()];
+    }
+
+    [[nodiscard]] FORCE_INLINE volatile tt_l1_ptr size_t *get_current_src_id_address() const {
+        return this->channel_src_id_addresses[this->buffer_index()];
+    }
+
+    [[nodiscard]] FORCE_INLINE size_t get_channel_buffer_max_size_in_bytes() const {
+        return this->buffer_size_in_bytes;
+    }
+
+    // Doesn't return the message size, only the maximum eth payload size
+    [[nodiscard]] FORCE_INLINE size_t get_current_max_eth_payload_size() const {
+        return this->max_eth_payload_size_in_bytes;
+    }
+
+    [[nodiscard]] FORCE_INLINE size_t get_id() const { return this->channel_id; }
+
+    [[nodiscard]] FORCE_INLINE bool eth_is_receiver_channel_send_done() const {
+        return *(this->get_current_bytes_sent_address()) == 0;
+    }
+    [[nodiscard]] FORCE_INLINE bool eth_bytes_are_available_on_channel() const {
+        return *(this->get_current_bytes_sent_address()) != 0;
+    }
+    [[nodiscard]] FORCE_INLINE bool eth_is_receiver_channel_send_acked() const {
+        return *(this->get_current_bytes_acked_address()) != 0;
+    }
+    FORCE_INLINE void eth_clear_sender_channel_ack() const {
+        *(this->channel_bytes_acked_addresses[this->buffer_index()]) = 0;
+    }
+
+    [[nodiscard]] FORCE_INLINE size_t get_eth_transaction_ack_word_addr() const {
+        return this->eth_transaction_ack_word_addr;
+    }
+
+    FORCE_INLINE void advance_buffer_index() {
+        this->buff_idx = wrap_increment<decltype(this->buff_idx), NUM_BUFFERS>(this->buff_idx);
+    }
+
+    [[nodiscard]] FORCE_INLINE bool all_buffers_drained() const {
+        bool drained = true;
+        for (size_t i = 0; i < NUM_BUFFERS && drained; i++) {
+            drained &= *(channel_bytes_sent_addresses[i]) == 0;
+        }
+        return drained;
+    }
+
+   private:
+    FORCE_INLINE auto buffer_index() const {
+        ASSERT(this->buff_idx < NUM_BUFFERS);
+        return buff_idx;
+    }
+
+    std::array<size_t, NUM_BUFFERS> buffer_addresses;
+    std::array<volatile tt_l1_ptr size_t *, NUM_BUFFERS> channel_bytes_sent_addresses;
+    std::array<volatile tt_l1_ptr size_t *, NUM_BUFFERS> channel_bytes_acked_addresses;
+    std::array<volatile tt_l1_ptr size_t *, NUM_BUFFERS> channel_src_id_addresses;
+
+    // header + payload regions only
+    const std::size_t buffer_size_in_bytes;
+    // Includes header + payload + channel_sync
+    const std::size_t eth_transaction_ack_word_addr;
+    const std::size_t max_eth_payload_size_in_bytes;
+    uint8_t buff_idx;
+    uint8_t channel_id;
+};
+
+struct EdmChannelWorkerInterface {
+    EdmChannelWorkerInterface() :
+        worker_location_info_ptr(nullptr), local_semaphore_address(nullptr), connection_live_semaphore(nullptr) {}
+    EdmChannelWorkerInterface(
+        // TODO: PERF: See if we can make this non-volatile and then only
+        // mark it volatile when we know we need to reload it (i.e. after we receive a
+        // "done" message from sender)
+        // Have a volatile update function that only triggers after reading the volatile
+        // completion field so that way we don't have to do a volatile read for every
+        // packet... Then we'll also be able to cache the uint64_t addr of the worker
+        // semaphore directly (saving on regenerating it each time)
+        volatile EDMChannelWorkerLocationInfo *worker_location_info_ptr,
+        volatile tt_l1_ptr uint32_t *const local_semaphore_address,
+        volatile tt_l1_ptr uint32_t *const connection_live_semaphore) :
+        worker_location_info_ptr(worker_location_info_ptr),
+        local_semaphore_address(local_semaphore_address),
+        connection_live_semaphore(connection_live_semaphore) {}
+
+    // Flow control methods
+    //
+    [[nodiscard]] FORCE_INLINE auto local_semaphore_value() const { return *local_semaphore_address; }
+
+    [[nodiscard]] FORCE_INLINE bool has_payload() { return *local_semaphore_address != 0; }
+
+    FORCE_INLINE void clear_local_semaphore() { noc_semaphore_set(local_semaphore_address, 0); }
+
+    [[nodiscard]] FORCE_INLINE uint32_t get_worker_semaphore_address() const {
+        return worker_location_info_ptr->worker_semaphore_address;
+    }
+
+    void increment_worker_semaphore() const {
+        auto const &worker_info = *worker_location_info_ptr;
+        uint64_t worker_semaphore_address = get_noc_addr(
+            (uint32_t)worker_info.worker_xy.x, (uint32_t)worker_info.worker_xy.y, worker_info.worker_semaphore_address);
+
+        noc_semaphore_inc(worker_semaphore_address, 1);
+    }
+
+    // Connection management methods
+    //
+    FORCE_INLINE void teardown_connection() const { increment_worker_semaphore(); }
+
+    [[nodiscard]] FORCE_INLINE bool has_worker_teardown_request() const { return *connection_live_semaphore == 0; }
+
+    [[nodiscard]] FORCE_INLINE bool connection_is_live() const { return *connection_live_semaphore == 1; }
+
+    volatile EDMChannelWorkerLocationInfo *worker_location_info_ptr;
+    volatile tt_l1_ptr uint32_t *const local_semaphore_address;
+    volatile tt_l1_ptr uint32_t *const connection_live_semaphore;
+};
+
+}  // namespace tt::fabric
diff --git a/ttnn/cpp/ttnn/operations/data_movement/concat/device/concat_program_factory.cpp b/ttnn/cpp/ttnn/operations/data_movement/concat/device/concat_program_factory.cpp
index a06f9c44e89..241b0e1f8b2 100644
--- a/ttnn/cpp/ttnn/operations/data_movement/concat/device/concat_program_factory.cpp
+++ b/ttnn/cpp/ttnn/operations/data_movement/concat/device/concat_program_factory.cpp
@@ -29,7 +29,7 @@ uint32_t find_greatest_common_page_size(std::vector<uint32_t> &stick_sizes, uint
 
 namespace ttnn::operations::data_movement::detail {
 
-operation::ProgramWithCallbacks s2s_rm_concat_two_tensors_multi_core(
+tt_metal::operation::ProgramWithCallbacks s2s_rm_concat_two_tensors_multi_core(
     const std::vector<Tensor> &input_tensors, uint32_t dim, Tensor &output, unsigned int groups) {
     TT_FATAL(dim == 3, "Sharded concat RM only supports dim=3");
     TT_FATAL(groups == 1 || dim == 3, "Sharded concat RM only supports groups > 1 when dim=3");
@@ -165,7 +165,7 @@ operation::ProgramWithCallbacks s2s_rm_concat_two_tensors_multi_core(
 // output. The memory address gap between neighbor input rows is exactly the output width. In height concat, all input
 // rows are placed at column 0 but sequential rows in the output. The address gap between neighbor input rows is still
 // the output width (which is equal to the input width).
-operation::ProgramWithCallbacks s2s_concat_multi_core(
+tt_metal::operation::ProgramWithCallbacks s2s_concat_multi_core(
     const std::vector<Tensor> &input_tensors, uint32_t dim, Tensor &output) {
     TT_FATAL(dim == 2 || dim == 3, "Sharded concat only supports dim=2 or 3");
     const bool is_height_concat = dim == 2;
@@ -286,7 +286,7 @@ operation::ProgramWithCallbacks s2s_concat_multi_core(
     return {.program = std::move(program), .override_runtime_arguments_callback = override_runtime_arguments_callback};
 }
 
-operation::ProgramWithCallbacks s2i_rm_concat_multi_core(
+tt_metal::operation::ProgramWithCallbacks s2i_rm_concat_multi_core(
     const std::vector<Tensor> &input_tensors, uint32_t dim, Tensor &output) {
     tt_metal::Program program = tt_metal::CreateProgram();
 
@@ -420,7 +420,7 @@ operation::ProgramWithCallbacks s2i_rm_concat_multi_core(
     return {.program = std::move(program), .override_runtime_arguments_callback = override_runtime_arguments_callback};
 }
 
-operation::ProgramWithCallbacks sharded_concat_multi_core(
+tt_metal::operation::ProgramWithCallbacks sharded_concat_multi_core(
     const std::vector<Tensor> &input_tensors, uint32_t dim, Tensor &output, unsigned int groups) {
     if (output.is_sharded()) {
         if (input_tensors.size() == 2) {
@@ -442,7 +442,7 @@ operation::ProgramWithCallbacks sharded_concat_multi_core(
     }
 }
 
-operation::ProgramWithCallbacks concat_multi_core(
+tt_metal::operation::ProgramWithCallbacks concat_multi_core(
     const std::vector<Tensor> &input_tensors, const uint32_t dim, const Tensor &output) {
     tt_metal::Program program = tt_metal::CreateProgram();
 
diff --git a/ttnn/cpp/ttnn/operations/data_movement/concat/device/concat_program_factory.hpp b/ttnn/cpp/ttnn/operations/data_movement/concat/device/concat_program_factory.hpp
index c902e407103..6e5d4ac75e8 100644
--- a/ttnn/cpp/ttnn/operations/data_movement/concat/device/concat_program_factory.hpp
+++ b/ttnn/cpp/ttnn/operations/data_movement/concat/device/concat_program_factory.hpp
@@ -9,6 +9,7 @@
 #include "tt_metal/host_api.hpp"
 #include "ttnn/cpp/ttnn/operation.hpp"
 
+#include "ttnn/operation.hpp"
 namespace ttnn::operations::data_movement::detail {
 
 // start is inclusive, end is exclusive
diff --git a/ttnn/cpp/ttnn/operations/data_movement/tilize/device/tilize_program_factory.cpp b/ttnn/cpp/ttnn/operations/data_movement/tilize/device/tilize_program_factory.cpp
index 1f9acdd8e3f..803d300763a 100644
--- a/ttnn/cpp/ttnn/operations/data_movement/tilize/device/tilize_program_factory.cpp
+++ b/ttnn/cpp/ttnn/operations/data_movement/tilize/device/tilize_program_factory.cpp
@@ -95,7 +95,7 @@ operation::ProgramWithCallbacks tilize_single_core(const Tensor& a, Tensor& outp
     // Reader compile-time args
     uint32_t src0_is_dram = src0_buffer->buffer_type() == tt::tt_metal::BufferType::DRAM ? 1 : 0;
     uint32_t stick_size_is_power_of_two = is_power_of_two_at_least_32(stick_size);
-    uint32_t log2_stick_size = stick_size_is_power_of_two ? (uint32_t)log2(stick_size) : 0;
+    uint32_t log2_stick_size = stick_size_is_power_of_two ? (uint32_t)std::log2<decltype(stick_size)>(stick_size) : 0;
     std::vector<uint32_t> reader_compile_time_args = {src0_is_dram, stick_size_is_power_of_two, log2_stick_size};
 
     uint32_t out_is_dram = dst_buffer->buffer_type() == tt::tt_metal::BufferType::DRAM ? 1 : 0;