diff --git a/tt_eager/tt_dnn/op_library/composite/composite_ops.hpp b/tt_eager/tt_dnn/op_library/composite/composite_ops.hpp
index 31b399fc981a..8e2911a0a2fc 100644
--- a/tt_eager/tt_dnn/op_library/composite/composite_ops.hpp
+++ b/tt_eager/tt_dnn/op_library/composite/composite_ops.hpp
@@ -12,9 +12,7 @@
 #include "tt_dnn/op_library/eltwise_unary/eltwise_unary_op.hpp"
 #include "tt_metal/common/constants.hpp"
 #include "ttnn/cpp/ttnn/operations/creation.hpp"
-
-
-#include "ttnn/operations/eltwise/binary/device/binary_op.hpp"
+#include "ttnn/operations/eltwise/binary/device/binary_device_operation.hpp"
 
 namespace tt {
 
diff --git a/tt_metal/tools/profiler/op_profiler.hpp b/tt_metal/tools/profiler/op_profiler.hpp
index 79c231ea50f9..9383aa6c9f1f 100644
--- a/tt_metal/tools/profiler/op_profiler.hpp
+++ b/tt_metal/tools/profiler/op_profiler.hpp
@@ -385,7 +385,13 @@ inline std::string op_meta_data_serialized_json(
 
     j["optional_input_tensors"] = std::vector<json>{};
 
-    auto perfModel = operation_t::create_op_performance_model(operation_attributes, tensor_args, tensor_return_value);
+    auto perfModel = [&]() {
+        if constexpr (requires { operation_t::create_op_performance_model; }) {
+            return operation_t::create_op_performance_model(operation_attributes, tensor_args, tensor_return_value);
+        } else {
+            return operation::OpPerformanceModel{};
+        }
+    }();
     j["performance_model"]["compute_ns"] = perfModel.get_compute_ns();
     j["performance_model"]["ideal_ns"] = perfModel.get_ideal_ns();
     j["performance_model"]["bandwidth_ns"] = perfModel.get_bandwidth_ns();
diff --git a/ttnn/CMakeLists.txt b/ttnn/CMakeLists.txt
index c8262b3a3a27..d274d10412ee 100644
--- a/ttnn/CMakeLists.txt
+++ b/ttnn/CMakeLists.txt
@@ -5,12 +5,15 @@ set(TTNN_SRCS
     ${CMAKE_CURRENT_SOURCE_DIR}/cpp/ttnn/operations/conv2d.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/cpp/ttnn/operations/matmul.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/cpp/ttnn/operations/reduction/argmax/device/argmax_op.cpp
-    ${CMAKE_CURRENT_SOURCE_DIR}/cpp/ttnn/operations/eltwise/binary/device/binary_op.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/cpp/ttnn/operations/eltwise/binary/device/binary_device_operation.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/cpp/ttnn/operations/eltwise/binary/device/broadcast_height_and_width_multi_core_program_factory.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/cpp/ttnn/operations/eltwise/binary/device/broadcast_height_multi_core_program_factory.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/cpp/ttnn/operations/eltwise/binary/device/broadcast_width_multi_core_program_factory.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/cpp/ttnn/operations/eltwise/binary/device/element_wise_multi_core_program_factory.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/cpp/ttnn/operations/eltwise/unary/device/unary_op.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/cpp/ttnn/operations/example/example/device/example_device_operation.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/cpp/ttnn/operations/example/example/device/single_core_program_factory.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/cpp/ttnn/operations/example/example/device/multi_core_program_factory.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/cpp/ttnn/operations/reduction/topk/device/topk_op.cpp
 )
 
diff --git a/ttnn/cpp/pybind11/operations/__init__.hpp b/ttnn/cpp/pybind11/operations/__init__.hpp
index e16c04ceb7ef..a63333751b97 100644
--- a/ttnn/cpp/pybind11/operations/__init__.hpp
+++ b/ttnn/cpp/pybind11/operations/__init__.hpp
@@ -9,6 +9,7 @@
 
 #include "pybind11/operations/ccl.hpp"
 #include "pybind11/operations/conv2d.hpp"
+#include "pybind11/operations/copy.hpp"
 #include "pybind11/operations/core.hpp"
 #include "pybind11/operations/creation.hpp"
 #include "pybind11/operations/data_movement.hpp"
@@ -18,7 +19,6 @@
 #include "pybind11/operations/maxpool2d.hpp"
 #include "pybind11/operations/normalization.hpp"
 #include "pybind11/operations/pool.hpp"
-#include "pybind11/operations/copy.hpp"
 #include "pybind11/operations/ternary.hpp"
 #include "pybind11/operations/transformer.hpp"
 
@@ -26,7 +26,8 @@
 #include "ttnn/operations/eltwise/unary/unary_pybind.hpp"
 #include "ttnn/operations/reduction/reduction_pybind.hpp"
 #include "ttnn/operations/eltwise/binary_backward/binary_backward_pybind.hpp"
-
+#include "ttnn/operations/example/example/example_pybind.hpp"
+#include "ttnn/operations/reduction/reduction_pybind.hpp"
 
 namespace py = pybind11;
 
@@ -35,6 +36,9 @@ namespace ttnn {
 namespace operations {
 
 void py_module(py::module& module) {
+    auto m_example = module.def_submodule("example", "example operation");
+    example::py_module(m_example);
+
     auto m_unary = module.def_submodule("unary", "unary operations");
     unary::py_module(m_unary);
 
diff --git a/ttnn/cpp/ttnn/device_operation.hpp b/ttnn/cpp/ttnn/device_operation.hpp
index ec9b2a93434d..0c822f6b59fe 100644
--- a/ttnn/cpp/ttnn/device_operation.hpp
+++ b/ttnn/cpp/ttnn/device_operation.hpp
@@ -80,7 +80,7 @@ concept DeviceOperationConcept = requires {
 };
 
 template <typename device_operation_t>
-concept DeviceOperationWithCustomProgramCacheConcept = DeviceOperationConcept<device_operation_t> and requires {
+concept DeviceOperationConceptWithCustomProgramCacheConcept = DeviceOperationConcept<device_operation_t> and requires {
     [](auto&& program_factory,
        const typename device_operation_t::operation_attributes_t& operation_attributes,
        const typename device_operation_t::tensor_args_t& tensor_args) {
@@ -91,7 +91,7 @@ concept DeviceOperationWithCustomProgramCacheConcept = DeviceOperationConcept<de
 template <typename... Ts>
 [[nodiscard]] std::variant<Ts...> constexpr map_index_to_variant(std::size_t i, std::variant<Ts...>) {
     assert(i < sizeof...(Ts));
-    static constexpr std::variant<Ts...> table[] = { Ts{ }... };
+    static constexpr std::variant<Ts...> table[] = {Ts{}...};
     return table[i];
 }
 
@@ -101,7 +101,7 @@ template <typename device_operation_t>
 inline auto compute_program_hash(
     const typename device_operation_t::operation_attributes_t& operation_attributes,
     const typename device_operation_t::tensor_args_t& tensor_args) {
-    if constexpr (DeviceOperationWithCustomProgramCacheConcept<device_operation_t>) {
+    if constexpr (DeviceOperationConceptWithCustomProgramCacheConcept<device_operation_t>) {
         ZoneScopedN("Compute custom program hash");
         return device_operation_t::compute_program_hash(operation_attributes, tensor_args);
     } else {
diff --git a/ttnn/cpp/ttnn/operations/eltwise/binary/binary.hpp b/ttnn/cpp/ttnn/operations/eltwise/binary/binary.hpp
index a0cf1c9d9381..ab7f8d7e1c0e 100644
--- a/ttnn/cpp/ttnn/operations/eltwise/binary/binary.hpp
+++ b/ttnn/cpp/ttnn/operations/eltwise/binary/binary.hpp
@@ -5,7 +5,7 @@
 
 #pragma once
 
-#include "device/binary_op.hpp"
+#include "device/binary_device_operation.hpp"
 #include "ttnn/device_operation.hpp"
 #include "ttnn/operations/data_movement.hpp"
 
@@ -28,7 +28,7 @@ constexpr bool is_associative(BinaryOpType op) {
 }
 
 template <BinaryOpType binary_op_type, bool in_place>
-struct ExecuteBinary {
+struct Binary {
     static inline const std::array<TensorSchema, 2> input_tensor_schemas() {
         return {
             ttnn::TensorSchema{
@@ -108,11 +108,11 @@ struct ExecuteBinary {
             dtype = optional_output_tensor.value().get_dtype();
         }
 
-        return ttnn::device_operation::run<Binary>(
+        return ttnn::device_operation::run<BinaryDeviceOperation>(
             queue_id,
-            Binary::operation_attributes_t{
+            BinaryDeviceOperation::operation_attributes_t{
                 binary_op_type, in_place, activations, output_memory_config, dtype, std::nullopt},
-            Binary::tensor_args_t{input_tensor_a, input_tensor_b, optional_output_tensor});
+            BinaryDeviceOperation::tensor_args_t{input_tensor_a, input_tensor_b, optional_output_tensor});
     }
 
     template <typename... Args>
@@ -145,8 +145,14 @@ struct ExecuteBinary {
         const std::optional<ttnn::MemoryConfig> &memory_config = std::nullopt,
         const std::optional<Tensor> &optional_output_tensor = std::nullopt,
         std::optional<FusedActivations> activations = std::nullopt) {
-
-        return ExecuteBinary::execute_on_worker_thread(DefaultQueueId, input_tensor_a, scalar, dtype, operation::DEFAULT_OUTPUT_MEMORY_CONFIG, optional_output_tensor, activations);
+        return Binary::execute_on_worker_thread(
+            DefaultQueueId,
+            input_tensor_a,
+            scalar,
+            dtype,
+            operation::DEFAULT_OUTPUT_MEMORY_CONFIG,
+            optional_output_tensor,
+            activations);
     }
 
     template <typename... Args>
@@ -172,36 +178,71 @@ struct ExecuteBinary {
             Layout::TILE);
         Tensor scalar_tensor_device = scalar_tensor_host.to(input_tensor_a.device());
         // TODO(arakhmati): #7637 pass in memory_config instead of operation::DEFAULT_OUTPUT_MEMORY_CONFIG
-        return ExecuteBinary::execute_on_worker_thread(
-            input_tensor_a, scalar_tensor_device, dtype, operation::DEFAULT_OUTPUT_MEMORY_CONFIG, optional_output_tensor, activations);
+        return Binary::execute_on_worker_thread(
+            input_tensor_a,
+            scalar_tensor_device,
+            dtype,
+            operation::DEFAULT_OUTPUT_MEMORY_CONFIG,
+            optional_output_tensor,
+            activations);
     }
 };
 
 }  // operations::binary
 
-constexpr auto add = ttnn::register_operation<operations::binary::ExecuteBinary<operations::binary::BinaryOpType::ADD, false>>("ttnn::add");
-constexpr auto add_ = ttnn::register_operation<operations::binary::ExecuteBinary<operations::binary::BinaryOpType::ADD, true>>("ttnn::add_");
-constexpr auto subtract = ttnn::register_operation<operations::binary::ExecuteBinary<operations::binary::BinaryOpType::SUB, false>>("ttnn::subtract");
-constexpr auto subtract_ = ttnn::register_operation<operations::binary::ExecuteBinary<operations::binary::BinaryOpType::SUB, true>>("ttnn::subtract_");
-constexpr auto multiply = ttnn::register_operation<operations::binary::ExecuteBinary<operations::binary::BinaryOpType::MUL, false>>("ttnn::multiply");
-constexpr auto multiply_ = ttnn::register_operation<operations::binary::ExecuteBinary<operations::binary::BinaryOpType::MUL, true>>("ttnn::multiply_");
-
-constexpr auto eq = ttnn::register_operation<operations::binary::ExecuteBinary<operations::binary::BinaryOpType::EQ, false>>("ttnn::eq");
-constexpr auto ne = ttnn::register_operation<operations::binary::ExecuteBinary<operations::binary::BinaryOpType::NE, false>>("ttnn::ne");
-constexpr auto ge = ttnn::register_operation<operations::binary::ExecuteBinary<operations::binary::BinaryOpType::GTE, false>>("ttnn::ge");
-constexpr auto gt = ttnn::register_operation<operations::binary::ExecuteBinary<operations::binary::BinaryOpType::GT, false>>("ttnn::gt");
-constexpr auto le = ttnn::register_operation<operations::binary::ExecuteBinary<operations::binary::BinaryOpType::LTE, false>>("ttnn::le");
-constexpr auto lt = ttnn::register_operation<operations::binary::ExecuteBinary<operations::binary::BinaryOpType::LT, false>>("ttnn::lt");
-constexpr auto logical_and = ttnn::register_operation<operations::binary::ExecuteBinary<operations::binary::BinaryOpType::LOGICAL_AND, false>>("ttnn::logical_and");
-constexpr auto logical_or = ttnn::register_operation<operations::binary::ExecuteBinary<operations::binary::BinaryOpType::LOGICAL_OR, false>>("ttnn::logical_or");
-constexpr auto ldexp = ttnn::register_operation<operations::binary::ExecuteBinary<operations::binary::BinaryOpType::LDEXP, false>>("ttnn::ldexp");
-
-constexpr auto logaddexp = ttnn::register_operation<operations::binary::ExecuteBinary<operations::binary::BinaryOpType::LOGADDEXP, false>>("ttnn::logaddexp");
-constexpr auto logaddexp2 = ttnn::register_operation<operations::binary::ExecuteBinary<operations::binary::BinaryOpType::LOGADDEXP2, false>>("ttnn::logaddexp2");
-constexpr auto squared_difference = ttnn::register_operation<operations::binary::ExecuteBinary<operations::binary::BinaryOpType::SQUARED_DIFFERENCE, false>>("ttnn::squared_difference");
-constexpr auto divide = ttnn::register_operation<operations::binary::ExecuteBinary<operations::binary::BinaryOpType::DIV_FAST, false>>("ttnn::divide");
-constexpr auto bias_gelu = ttnn::register_operation<operations::binary::ExecuteBinary<operations::binary::BinaryOpType::BIAS_GELU, false>>("ttnn::bias_gelu");
-
+constexpr auto add =
+    ttnn::register_operation<operations::binary::Binary<operations::binary::BinaryOpType::ADD, false>>("ttnn::add");
+constexpr auto add_ =
+    ttnn::register_operation<operations::binary::Binary<operations::binary::BinaryOpType::ADD, true>>("ttnn::add_");
+constexpr auto subtract =
+    ttnn::register_operation<operations::binary::Binary<operations::binary::BinaryOpType::SUB, false>>(
+        "ttnn::subtract");
+constexpr auto subtract_ =
+    ttnn::register_operation<operations::binary::Binary<operations::binary::BinaryOpType::SUB, true>>(
+        "ttnn::subtract_");
+constexpr auto multiply =
+    ttnn::register_operation<operations::binary::Binary<operations::binary::BinaryOpType::MUL, false>>(
+        "ttnn::multiply");
+constexpr auto multiply_ =
+    ttnn::register_operation<operations::binary::Binary<operations::binary::BinaryOpType::MUL, true>>(
+        "ttnn::multiply_");
+
+constexpr auto eq =
+    ttnn::register_operation<operations::binary::Binary<operations::binary::BinaryOpType::EQ, false>>("ttnn::eq");
+constexpr auto ne =
+    ttnn::register_operation<operations::binary::Binary<operations::binary::BinaryOpType::NE, false>>("ttnn::ne");
+constexpr auto ge =
+    ttnn::register_operation<operations::binary::Binary<operations::binary::BinaryOpType::GTE, false>>("ttnn::ge");
+constexpr auto gt =
+    ttnn::register_operation<operations::binary::Binary<operations::binary::BinaryOpType::GT, false>>("ttnn::gt");
+constexpr auto le =
+    ttnn::register_operation<operations::binary::Binary<operations::binary::BinaryOpType::LTE, false>>("ttnn::le");
+constexpr auto lt =
+    ttnn::register_operation<operations::binary::Binary<operations::binary::BinaryOpType::LT, false>>("ttnn::lt");
+constexpr auto logical_and =
+    ttnn::register_operation<operations::binary::Binary<operations::binary::BinaryOpType::LOGICAL_AND, false>>(
+        "ttnn::logical_and");
+constexpr auto logical_or =
+    ttnn::register_operation<operations::binary::Binary<operations::binary::BinaryOpType::LOGICAL_OR, false>>(
+        "ttnn::logical_or");
+constexpr auto ldexp =
+    ttnn::register_operation<operations::binary::Binary<operations::binary::BinaryOpType::LDEXP, false>>("ttnn::ldexp");
+
+constexpr auto logaddexp =
+    ttnn::register_operation<operations::binary::Binary<operations::binary::BinaryOpType::LOGADDEXP, false>>(
+        "ttnn::logaddexp");
+constexpr auto logaddexp2 =
+    ttnn::register_operation<operations::binary::Binary<operations::binary::BinaryOpType::LOGADDEXP2, false>>(
+        "ttnn::logaddexp2");
+constexpr auto squared_difference =
+    ttnn::register_operation<operations::binary::Binary<operations::binary::BinaryOpType::SQUARED_DIFFERENCE, false>>(
+        "ttnn::squared_difference");
+constexpr auto divide =
+    ttnn::register_operation<operations::binary::Binary<operations::binary::BinaryOpType::DIV_FAST, false>>(
+        "ttnn::divide");
+constexpr auto bias_gelu =
+    ttnn::register_operation<operations::binary::Binary<operations::binary::BinaryOpType::BIAS_GELU, false>>(
+        "ttnn::bias_gelu");
 
 template <typename InputBType>
 ttnn::Tensor operator+(const ttnn::Tensor &input_tensor_a, InputBType scalar) {
diff --git a/ttnn/cpp/ttnn/operations/eltwise/binary/device/binary_op.cpp b/ttnn/cpp/ttnn/operations/eltwise/binary/device/binary_device_operation.cpp
similarity index 83%
rename from ttnn/cpp/ttnn/operations/eltwise/binary/device/binary_op.cpp
rename to ttnn/cpp/ttnn/operations/eltwise/binary/device/binary_device_operation.cpp
index 501f5d75ac6e..bf8031e47e55 100644
--- a/ttnn/cpp/ttnn/operations/eltwise/binary/device/binary_op.cpp
+++ b/ttnn/cpp/ttnn/operations/eltwise/binary/device/binary_device_operation.cpp
@@ -2,8 +2,7 @@
 //
 // SPDX-License-Identifier: Apache-2.0
 
-#include "binary_op.hpp"
-
+#include "binary_device_operation.hpp"
 #include "tt_eager/tt_dnn/op_library/bcast/bcast_op.hpp"
 #include "tt_eager/tt_dnn/op_library/work_split.hpp"
 #include "tt_metal/common/constants.hpp"
@@ -15,7 +14,10 @@ namespace utils {
 using namespace tt::tt_metal;
 
 std::map<string, string> get_defines(
-    BinaryOpType op_type, const std::optional<DataType> input_dtype, const std::optional<DataType> output_dtype, const std::optional<std::vector<tt::tt_metal::UnaryWithParam>> fused_activations) {
+    BinaryOpType op_type,
+    const std::optional<DataType> input_dtype,
+    const std::optional<DataType> output_dtype,
+    const std::optional<std::vector<tt::tt_metal::UnaryWithParam>> fused_activations) {
     std::map<string, string> defines;
     string op_name = "sub_tiles";
     string op_binary_type = "EltwiseBinaryType::ELWSUB";
@@ -102,28 +104,29 @@ std::map<string, string> get_defines(
         default: TT_ASSERT(false && "Undefined op type");
     }
 
-    if(input_dtype.has_value() && output_dtype.has_value() &&
+    if (input_dtype.has_value() && output_dtype.has_value() &&
         ((input_dtype.value() == DataType::BFLOAT16 && output_dtype.value() == DataType::UINT32) ||
-        (input_dtype.value() == DataType::BFLOAT16 && output_dtype.value() == DataType::UINT16) ||
-        (input_dtype.value() == DataType::BFLOAT16 && output_dtype.value() == DataType::INT32) ||
-        (input_dtype.value() == DataType::UINT16 && output_dtype.value() == DataType::BFLOAT16) ||
-        (input_dtype.value() == DataType::INT32 && output_dtype.value() == DataType::BFLOAT16) ||
-        (input_dtype.value() == DataType::FLOAT32 && output_dtype.value() == DataType::BFLOAT16) ||
-        (input_dtype.value() == DataType::BFLOAT16 && output_dtype.value() == DataType::FLOAT32) ||
-        (input_dtype.value() == DataType::FLOAT32 && output_dtype.value() == DataType::UINT16) ||
-        (input_dtype.value() == DataType::UINT16 && output_dtype.value() == DataType::FLOAT32) ||
-        (input_dtype.value() == DataType::FLOAT32 && output_dtype.value() == DataType::INT32) ||
-        (input_dtype.value() == DataType::INT32 && output_dtype.value() == DataType::FLOAT32) ||
-        (input_dtype.value() == DataType::BFLOAT8_B && output_dtype.value() == DataType::UINT16) ||
-        (input_dtype.value() == DataType::UINT16 && output_dtype.value() == DataType::BFLOAT8_B) ||
-        (input_dtype.value() == DataType::BFLOAT8_B && output_dtype.value() == DataType::INT32) ||
-        (input_dtype.value() == DataType::INT32 && output_dtype.value() == DataType::BFLOAT8_B))){
+         (input_dtype.value() == DataType::BFLOAT16 && output_dtype.value() == DataType::UINT16) ||
+         (input_dtype.value() == DataType::BFLOAT16 && output_dtype.value() == DataType::INT32) ||
+         (input_dtype.value() == DataType::UINT16 && output_dtype.value() == DataType::BFLOAT16) ||
+         (input_dtype.value() == DataType::INT32 && output_dtype.value() == DataType::BFLOAT16) ||
+         (input_dtype.value() == DataType::FLOAT32 && output_dtype.value() == DataType::BFLOAT16) ||
+         (input_dtype.value() == DataType::BFLOAT16 && output_dtype.value() == DataType::FLOAT32) ||
+         (input_dtype.value() == DataType::FLOAT32 && output_dtype.value() == DataType::UINT16) ||
+         (input_dtype.value() == DataType::UINT16 && output_dtype.value() == DataType::FLOAT32) ||
+         (input_dtype.value() == DataType::FLOAT32 && output_dtype.value() == DataType::INT32) ||
+         (input_dtype.value() == DataType::INT32 && output_dtype.value() == DataType::FLOAT32) ||
+         (input_dtype.value() == DataType::BFLOAT8_B && output_dtype.value() == DataType::UINT16) ||
+         (input_dtype.value() == DataType::UINT16 && output_dtype.value() == DataType::BFLOAT8_B) ||
+         (input_dtype.value() == DataType::BFLOAT8_B && output_dtype.value() == DataType::INT32) ||
+         (input_dtype.value() == DataType::INT32 && output_dtype.value() == DataType::BFLOAT8_B))) {
         TT_ASSERT(defines.count("SFPU_OP_CHAIN_0") == 0 && "SFPU_OP_CHAIN_0 already defined");
 
         auto in_dataformat = std::to_string((uint32_t)datatype_to_dataformat_converter(input_dtype.value()));
         auto out_dataformat = std::to_string((uint32_t)datatype_to_dataformat_converter(output_dtype.value()));
-        defines.insert({"SFPU_OP_CHAIN_0",
-                        fmt::format("typecast_tile_init(); typecast_tile<{0}u, {1}u>(i);", in_dataformat, out_dataformat)});
+        defines.insert(
+            {"SFPU_OP_CHAIN_0",
+             fmt::format("typecast_tile_init(); typecast_tile<{0}u, {1}u>(i);", in_dataformat, out_dataformat)});
         defines.insert({"SFPU_OP_TYPECAST_INCLUDE", "1"});
     }
 
@@ -143,9 +146,9 @@ std::map<string, string> get_defines(
 
 }  // namespace utils
 
-Binary::program_factory_t Binary::select_program_factory(
+BinaryDeviceOperation::program_factory_t BinaryDeviceOperation::select_program_factory(
     const operation_attributes_t& operation_attributes, const tensor_args_t& tensor_args) {
-    ZoneScopedN("Binary::select_program_factory");
+    ZoneScopedN("BinaryDeviceOperation::select_program_factory");
     const auto& input_shape_a = tensor_args.input_tensor_a.tensor_attributes->shape;
     const auto& input_shape_b = tensor_args.input_tensor_b.tensor_attributes->shape;
 
@@ -166,16 +169,16 @@ Binary::program_factory_t Binary::select_program_factory(
             return BroadcastWidthMultiCore{};
         }
     }
-    TT_THROW("ttnn::operations::binary::Binary: unsupported broadcast");
+    TT_THROW("ttnn::operations::binary::BinaryDeviceOperation: unsupported broadcast");
 }
 
-void Binary::validate_on_program_cache_miss(
+void BinaryDeviceOperation::validate_on_program_cache_miss(
     const operation_attributes_t& attributes, const tensor_args_t& tensor_args) {
     const auto& input_tensor_a = tensor_args.input_tensor_a;
     const auto& input_tensor_b = tensor_args.input_tensor_b;
     const auto& output_tensor = tensor_args.output_tensor;
 
-    Binary::validate_on_program_cache_hit(attributes, tensor_args);
+    BinaryDeviceOperation::validate_on_program_cache_hit(attributes, tensor_args);
 
     TT_FATAL(
         input_tensor_a.device() == input_tensor_b.device(),
@@ -243,7 +246,8 @@ void Binary::validate_on_program_cache_miss(
             "ignored");
     }
 }
-void Binary::validate_on_program_cache_hit(const operation_attributes_t& attributes, const tensor_args_t& tensor_args) {
+void BinaryDeviceOperation::validate_on_program_cache_hit(
+    const operation_attributes_t& attributes, const tensor_args_t& tensor_args) {
     const auto& input_tensor_a = tensor_args.input_tensor_a;
     const auto& input_tensor_b = tensor_args.input_tensor_b;
     const auto& output_tensor = tensor_args.output_tensor;
@@ -265,22 +269,24 @@ void Binary::validate_on_program_cache_hit(const operation_attributes_t& attribu
     if (batch_size_0_a != batch_size_0_b) {
         TT_ASSERT(
             batch_size_0_a > batch_size_0_b and batch_size_0_b == 1,
-            "ttnn::operations::binary::Binary: batch size mismatch");
+            "ttnn::operations::binary::BinaryDeviceOperation: batch size mismatch");
     }
     if (batch_size_1_a != batch_size_1_b) {
         TT_ASSERT(
             batch_size_1_a > batch_size_1_b and batch_size_1_b == 1,
-            "ttnn::operations::binary::Binary: batch size mismatch");
+            "ttnn::operations::binary::BinaryDeviceOperation: batch size mismatch");
     }
     if (height_a != height_b) {
-        TT_ASSERT(height_a > height_b and height_b == 1, "ttnn::operations::binary::Binary: height mismatch");
+        TT_ASSERT(
+            height_a > height_b and height_b == 1, "ttnn::operations::binary::BinaryDeviceOperation: height mismatch");
     }
     if (width_a != width_b) {
-        TT_ASSERT(width_a > width_b and width_b == 1, "ttnn::operations::binary::Binary: width mismatch");
+        TT_ASSERT(
+            width_a > width_b and width_b == 1, "ttnn::operations::binary::BinaryDeviceOperation: width mismatch");
     }
 }
 
-Binary::shape_return_value_t Binary::compute_output_shapes(
+BinaryDeviceOperation::shape_return_value_t BinaryDeviceOperation::compute_output_shapes(
     const operation_attributes_t&, const tensor_args_t& tensor_args) {
     const auto input_shape_a = tensor_args.input_tensor_a.tensor_attributes->shape;
     const auto input_shape_b = tensor_args.input_tensor_b.tensor_attributes->shape;
@@ -303,7 +309,7 @@ Binary::shape_return_value_t Binary::compute_output_shapes(
     return ttnn::Shape::from_vector(output_shape, output_shape_with_tile_padding);
 }
 
-Binary::tensor_return_value_t Binary::create_output_tensors(
+BinaryDeviceOperation::tensor_return_value_t BinaryDeviceOperation::create_output_tensors(
     const operation_attributes_t& operation_attributes, const tensor_args_t& tensor_args) {
     auto output_shape = compute_output_shapes(operation_attributes, tensor_args);
     const auto& input_tensor_a = tensor_args.input_tensor_a;
@@ -361,15 +367,27 @@ Binary::tensor_return_value_t Binary::create_output_tensors(
     }
 }
 
-tt::stl::hash::hash_t Binary::compute_program_hash(
+tt::stl::hash::hash_t BinaryDeviceOperation::compute_program_hash(
     const operation_attributes_t& attributes, const tensor_args_t& tensor_args) {
     const auto& input_tensor_a = tensor_args.input_tensor_a;
     const auto& input_tensor_b = tensor_args.input_tensor_b;
 
     auto program_factory = select_program_factory(attributes, tensor_args);
-    TT_ASSERT(std::holds_alternative<DeviceStorage>(input_tensor_a.get_storage()), fmt::format("Unexpected type {} in {}:{} ",tt::stl::get_active_type_name_in_variant(input_tensor_a.get_storage()),__FILE__, __LINE__));
-    TT_ASSERT(std::holds_alternative<DeviceStorage>(input_tensor_b.get_storage()), fmt::format("Unexpected type {} in {}:{} ",tt::stl::get_active_type_name_in_variant(input_tensor_b.get_storage()),__FILE__, __LINE__));
-    operation::Hash hash = operation::hash_operation<Binary>(
+    TT_ASSERT(
+        std::holds_alternative<DeviceStorage>(input_tensor_a.get_storage()),
+        fmt::format(
+            "Unexpected type {} in {}:{} ",
+            tt::stl::get_active_type_name_in_variant(input_tensor_a.get_storage()),
+            __FILE__,
+            __LINE__));
+    TT_ASSERT(
+        std::holds_alternative<DeviceStorage>(input_tensor_b.get_storage()),
+        fmt::format(
+            "Unexpected type {} in {}:{} ",
+            tt::stl::get_active_type_name_in_variant(input_tensor_b.get_storage()),
+            __FILE__,
+            __LINE__));
+    operation::Hash hash = operation::hash_operation<BinaryDeviceOperation>(
         attributes,
         program_factory.index(),
         input_tensor_a.dtype(),
@@ -379,7 +397,7 @@ tt::stl::hash::hash_t Binary::compute_program_hash(
     return hash;
 }
 
-operation::OpPerformanceModel Binary::create_op_performance_model(
+operation::OpPerformanceModel BinaryDeviceOperation::create_op_performance_model(
     const operation_attributes_t& attributes,
     const tensor_args_t& tensor_args,
     tensor_return_value_t& tensor_return_value) {
@@ -399,7 +417,7 @@ operation::OpPerformanceModel Binary::create_op_performance_model(
     // TODO: update OpPerformanceModel to work on variadic arguments
     operation::OpPerformanceModel result({input_tensor_a, input_tensor_b}, {output_tensor}, ideal_eltwise_cycles);
 #if 0
-        tt::log_info(tt::LogOp, "Binary PerfModel:");
+        tt::log_info(tt::LogOp, "BinaryDeviceOperation PerfModel:");
         tt::log_info(tt::LogOp, "\t Data (Bytes): {}", total_bytes);
         tt::log_info(tt::LogOp, "\t ideal_eltwise_cycles: {}", ideal_eltwise_cycles);
 #endif
diff --git a/ttnn/cpp/ttnn/operations/eltwise/binary/device/binary_op.hpp b/ttnn/cpp/ttnn/operations/eltwise/binary/device/binary_device_operation.hpp
similarity index 99%
rename from ttnn/cpp/ttnn/operations/eltwise/binary/device/binary_op.hpp
rename to ttnn/cpp/ttnn/operations/eltwise/binary/device/binary_device_operation.hpp
index c45bff9fde35..feb043f99c13 100644
--- a/ttnn/cpp/ttnn/operations/eltwise/binary/device/binary_op.hpp
+++ b/ttnn/cpp/ttnn/operations/eltwise/binary/device/binary_device_operation.hpp
@@ -57,7 +57,7 @@ std::map<string, string> get_defines(
 
 }  // namespace utils
 
-struct Binary {
+struct BinaryDeviceOperation {
     struct operation_attributes_t {
         BinaryOpType binary_op_type;
         bool in_place;
diff --git a/ttnn/cpp/ttnn/operations/eltwise/binary/device/broadcast_height_and_width_multi_core_program_factory.cpp b/ttnn/cpp/ttnn/operations/eltwise/binary/device/broadcast_height_and_width_multi_core_program_factory.cpp
index e3c09d9c5453..18b3f24d435a 100644
--- a/ttnn/cpp/ttnn/operations/eltwise/binary/device/broadcast_height_and_width_multi_core_program_factory.cpp
+++ b/ttnn/cpp/ttnn/operations/eltwise/binary/device/broadcast_height_and_width_multi_core_program_factory.cpp
@@ -4,7 +4,7 @@
 
 #include <optional>
 
-#include "binary_op.hpp"
+#include "binary_device_operation.hpp"
 #include "impl/buffers/buffer.hpp"
 #include "tensor/tensor.hpp"
 #include "tt_dnn/op_library/bcast/bcast_op.hpp"
@@ -25,7 +25,8 @@ static const tt::tt_metal::BcastOpMath binary_op_type_to_bcast_op_math(const Bin
     }
 }
 
-Binary::BroadcastHeightAndWidthMultiCore::cached_program_t Binary::BroadcastHeightAndWidthMultiCore::create(
+BinaryDeviceOperation::BroadcastHeightAndWidthMultiCore::cached_program_t
+BinaryDeviceOperation::BroadcastHeightAndWidthMultiCore::create(
     const operation_attributes_t& operation_attributes,
     const tensor_args_t& tensor_args,
     tensor_return_value_t& tensor_return_value) {
@@ -229,7 +230,7 @@ Binary::BroadcastHeightAndWidthMultiCore::cached_program_t Binary::BroadcastHeig
          cb_output}};
 }
 
-void Binary::BroadcastHeightAndWidthMultiCore::override_runtime_arguments(
+void BinaryDeviceOperation::BroadcastHeightAndWidthMultiCore::override_runtime_arguments(
     cached_program_t& cached_program,
     const operation_attributes_t& operation_attributes,
     const tensor_args_t& tensor_args,
diff --git a/ttnn/cpp/ttnn/operations/eltwise/binary/device/broadcast_height_multi_core_program_factory.cpp b/ttnn/cpp/ttnn/operations/eltwise/binary/device/broadcast_height_multi_core_program_factory.cpp
index 226091271201..5f3a99cb6ef8 100644
--- a/ttnn/cpp/ttnn/operations/eltwise/binary/device/broadcast_height_multi_core_program_factory.cpp
+++ b/ttnn/cpp/ttnn/operations/eltwise/binary/device/broadcast_height_multi_core_program_factory.cpp
@@ -2,7 +2,7 @@
 //
 // SPDX-License-Identifier: Apache-2.0
 
-#include "binary_op.hpp"
+#include "binary_device_operation.hpp"
 #include "tensor/tensor.hpp"
 #include "tt_dnn/op_library/bcast/bcast_op.hpp"
 #include "tt_dnn/op_library/work_split.hpp"
@@ -22,10 +22,11 @@ static const tt::tt_metal::BcastOpMath binary_op_type_to_bcast_op_math(const Bin
     }
 }
 
-Binary::BroadcastHeightMultiCore::cached_program_t Binary :: BroadcastHeightMultiCore::create(
-                                                                const operation_attributes_t& operation_attributes,
-                                                                const tensor_args_t& tensor_args,
-                                                                tensor_return_value_t& tensor_return_value) {
+BinaryDeviceOperation::BroadcastHeightMultiCore::cached_program_t
+BinaryDeviceOperation ::BroadcastHeightMultiCore::create(
+    const operation_attributes_t& operation_attributes,
+    const tensor_args_t& tensor_args,
+    tensor_return_value_t& tensor_return_value) {
     using namespace tt;
     using namespace tt::tt_metal;
 
@@ -200,11 +201,11 @@ Binary::BroadcastHeightMultiCore::cached_program_t Binary :: BroadcastHeightMult
     };
 }
 
-void Binary :: BroadcastHeightMultiCore::override_runtime_arguments(
-                  cached_program_t& cached_program,
-                  const operation_attributes_t& operation_attributes,
-                  const tensor_args_t& tensor_args,
-                  tensor_return_value_t& tensor_return_value) {
+void BinaryDeviceOperation ::BroadcastHeightMultiCore::override_runtime_arguments(
+    cached_program_t& cached_program,
+    const operation_attributes_t& operation_attributes,
+    const tensor_args_t& tensor_args,
+    tensor_return_value_t& tensor_return_value) {
     using namespace tt;
     using namespace tt::tt_metal;
 
diff --git a/ttnn/cpp/ttnn/operations/eltwise/binary/device/broadcast_width_multi_core_program_factory.cpp b/ttnn/cpp/ttnn/operations/eltwise/binary/device/broadcast_width_multi_core_program_factory.cpp
index b595735161c5..3bdd6e935723 100644
--- a/ttnn/cpp/ttnn/operations/eltwise/binary/device/broadcast_width_multi_core_program_factory.cpp
+++ b/ttnn/cpp/ttnn/operations/eltwise/binary/device/broadcast_width_multi_core_program_factory.cpp
@@ -2,7 +2,7 @@
 //
 // SPDX-License-Identifier: Apache-2.0
 
-#include "binary_op.hpp"
+#include "binary_device_operation.hpp"
 #include "tensor/tensor.hpp"
 #include "tt_dnn/op_library/bcast/bcast_op.hpp"
 #include "tt_dnn/op_library/work_split.hpp"
@@ -22,7 +22,7 @@ static const tt::tt_metal::BcastOpMath binary_op_type_to_bcast_op_math(const Bin
     }
 }
 
-Binary::BroadcastWidthMultiCore::cached_program_t Binary::BroadcastWidthMultiCore::create(
+BinaryDeviceOperation::BroadcastWidthMultiCore::cached_program_t BinaryDeviceOperation::BroadcastWidthMultiCore::create(
     const operation_attributes_t& operation_attributes,
     const tensor_args_t& tensor_args,
     tensor_return_value_t& tensor_return_value) {
@@ -199,7 +199,7 @@ Binary::BroadcastWidthMultiCore::cached_program_t Binary::BroadcastWidthMultiCor
         {binary_reader_kernel_id, unary_writer_kernel_id, bcast_kernel_id, compute_with_storage_grid_size}};
 }
 
-void Binary::BroadcastWidthMultiCore::override_runtime_arguments(
+void BinaryDeviceOperation::BroadcastWidthMultiCore::override_runtime_arguments(
     cached_program_t& cached_program,
     const operation_attributes_t& operation_attributes,
     const tensor_args_t& tensor_args,
diff --git a/ttnn/cpp/ttnn/operations/eltwise/binary/device/element_wise_multi_core_program_factory.cpp b/ttnn/cpp/ttnn/operations/eltwise/binary/device/element_wise_multi_core_program_factory.cpp
index 521a83d0d8d6..5f2bfcdcad19 100644
--- a/ttnn/cpp/ttnn/operations/eltwise/binary/device/element_wise_multi_core_program_factory.cpp
+++ b/ttnn/cpp/ttnn/operations/eltwise/binary/device/element_wise_multi_core_program_factory.cpp
@@ -4,7 +4,7 @@
 
 #include <algorithm>
 
-#include "binary_op.hpp"
+#include "binary_device_operation.hpp"
 #include "tt_eager/tt_dnn/op_library/eltwise_unary/eltwise_unary_op.hpp"
 #include "tt_eager/tt_dnn/op_library/work_split.hpp"
 #include "tt_metal/common/constants.hpp"
@@ -245,7 +245,7 @@ inline __attribute__((always_inline)) void set_eltwise_binary_runtime_args(
         UpdateCircularBufferTotalSize(program, cb_output, num_tiles_per_core_group_1 * dst_single_tile_size);
     }
 }
-Binary::ElementWiseMultiCore::cached_program_t Binary::ElementWiseMultiCore::create(
+BinaryDeviceOperation::ElementWiseMultiCore::cached_program_t BinaryDeviceOperation::ElementWiseMultiCore::create(
     const operation_attributes_t& operation_attributes,
     const tensor_args_t& tensor_args,
     tensor_return_value_t& tensor_return_value) {
@@ -425,7 +425,7 @@ Binary::ElementWiseMultiCore::cached_program_t Binary::ElementWiseMultiCore::cre
          dst_single_tile_size}};
 }
 
-void Binary::ElementWiseMultiCore::override_runtime_arguments(
+void BinaryDeviceOperation::ElementWiseMultiCore::override_runtime_arguments(
     cached_program_t& cached_program,
     const operation_attributes_t& operation_attributes,
     const tensor_args_t& tensor_args,
diff --git a/ttnn/cpp/ttnn/operations/example/example/device/example_device_operation.cpp b/ttnn/cpp/ttnn/operations/example/example/device/example_device_operation.cpp
new file mode 100644
index 000000000000..62d328d8c8ef
--- /dev/null
+++ b/ttnn/cpp/ttnn/operations/example/example/device/example_device_operation.cpp
@@ -0,0 +1,40 @@
+// SPDX-FileCopyrightText: © 2023 Tenstorrent Inc.
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#include "example_device_operation.hpp"
+
+namespace ttnn::operations::example {
+
+ExampleDeviceOperation::program_factory_t ExampleDeviceOperation::select_program_factory(
+    const operation_attributes_t& operation_attributes, const tensor_args_t& tensor_args) {
+    bool some_condition_based_on_operation_attributes_and_or_tensor_args = true;
+    if (some_condition_based_on_operation_attributes_and_or_tensor_args) {
+        return SingleCore{};
+    }
+    return MultiCore{};
+}
+
+void ExampleDeviceOperation::validate_on_program_cache_miss(
+    const operation_attributes_t& attributes, const tensor_args_t& tensor_args) {}
+
+void ExampleDeviceOperation::validate_on_program_cache_hit(
+    const operation_attributes_t& attributes, const tensor_args_t& tensor_args) {}
+
+ExampleDeviceOperation::shape_return_value_t ExampleDeviceOperation::compute_output_shapes(
+    const operation_attributes_t&, const tensor_args_t& tensor_args) {
+    return tensor_args.input_tensor.tensor_attributes->shape;
+}
+
+ExampleDeviceOperation::tensor_return_value_t ExampleDeviceOperation::create_output_tensors(
+    const operation_attributes_t& operation_attributes, const tensor_args_t& tensor_args) {
+    auto output_shape = compute_output_shapes(operation_attributes, tensor_args);
+    const auto& input_tensor = tensor_args.input_tensor;
+    return create_device_tensor(
+        output_shape,
+        input_tensor.tensor_attributes->dtype,
+        input_tensor.tensor_attributes->layout,
+        input_tensor.device());
+}
+
+}  // namespace ttnn::operations::example
diff --git a/ttnn/cpp/ttnn/operations/example/example/device/example_device_operation.hpp b/ttnn/cpp/ttnn/operations/example/example/device/example_device_operation.hpp
new file mode 100644
index 000000000000..7359dda5ccea
--- /dev/null
+++ b/ttnn/cpp/ttnn/operations/example/example/device/example_device_operation.hpp
@@ -0,0 +1,101 @@
+// SPDX-FileCopyrightText: © 2023 Tenstorrent Inc.
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#pragma once
+
+#include <optional>
+#include <variant>
+
+#include "tensor/tensor.hpp"
+#include "ttnn/core.hpp"
+#include "ttnn/device_operation.hpp"
+#include "ttnn/types.hpp"
+
+namespace ttnn::operations::example {
+
+struct ExampleDeviceOperation {
+    struct operation_attributes_t {
+        bool attribute;
+        int some_other_attribute;
+    };
+    struct tensor_args_t {
+        // An example of the tensor that can only be used as an input
+        const Tensor& input_tensor;
+
+        // An example of the tensor that can be used for input/output or just for pre-allocated output
+        // Tensor& io_tensor;
+
+        // An example of an optional tensor
+        // std::optional<Tensor> optional_output_tensor;
+
+        // An example of a vector of tensors
+        // std::vector<Tensor> vector_of_tensors;
+
+        // An example of a tuple of tensors
+        // std::tuple<Tensor, ...> tuple_of_tensors;
+
+        // An example of a vector of optional tensors
+        // std::vector<std::optional<Tensor>> vector_of_optional_tensors;
+
+        // An example of a tuple of tensors
+        // std::tuple<std::vector<std::optional<Tensor>>, std::optional<Tensor>> some_crazy_tuple_of_tensors;
+    };
+
+    // Can be a single ttnn::Shape, std::optional<ttnn::Shape>, std::vector<ttnn::Shape>, std::tuple<ttnn::Shape> etc.
+    using shape_return_value_t = ttnn::Shape;
+
+    // Can be a single Tensor, std::optional<Tensor, ...>, std::vector<Tensor>, std::tuple<Tensor, ...> etc.
+    using tensor_return_value_t = Tensor;
+
+    struct SingleCore {
+        struct cached_program_attributes_t {
+            int some_program_attribute_to_share;
+        };
+        using cached_program_t = ttnn::device_operation::CachedProgram<cached_program_attributes_t>;
+
+        static cached_program_t create(
+            const operation_attributes_t& operation_attributes,
+            const tensor_args_t& tensor_args,
+            tensor_return_value_t& tensor_return_value);
+
+        static void override_runtime_arguments(
+            cached_program_t& cached_program,
+            const operation_attributes_t& operation_attributes,
+            const tensor_args_t& tensor_args,
+            tensor_return_value_t& tensor_return_value);
+    };
+
+    struct MultiCore {
+        struct cached_program_attributes_t {
+            int some_program_attribute_to_share;
+            int some_other_program_attribute_to_share;
+        };
+        using cached_program_t = ttnn::device_operation::CachedProgram<cached_program_attributes_t>;
+
+        static cached_program_t create(
+            const operation_attributes_t& operation_attributes,
+            const tensor_args_t& tensor_args,
+            tensor_return_value_t& tensor_return_value);
+
+        static void override_runtime_arguments(
+            cached_program_t& cached_program,
+            const operation_attributes_t& operation_attributes,
+            const tensor_args_t& tensor_args,
+            tensor_return_value_t& tensor_return_value);
+    };
+
+    using program_factory_t = std::variant<SingleCore, MultiCore>;
+
+    static program_factory_t select_program_factory(const operation_attributes_t&, const tensor_args_t&);
+
+    static void validate_on_program_cache_miss(const operation_attributes_t&, const tensor_args_t&);
+    static void validate_on_program_cache_hit(const operation_attributes_t&, const tensor_args_t&);
+
+    static shape_return_value_t compute_output_shapes(const operation_attributes_t&, const tensor_args_t&);
+
+    static tensor_return_value_t create_output_tensors(
+        const operation_attributes_t& operation_attributes, const tensor_args_t&);
+};
+
+}  // namespace ttnn::operations::example
diff --git a/ttnn/cpp/ttnn/operations/example/example/device/multi_core_program_factory.cpp b/ttnn/cpp/ttnn/operations/example/example/device/multi_core_program_factory.cpp
new file mode 100644
index 000000000000..97e089e95449
--- /dev/null
+++ b/ttnn/cpp/ttnn/operations/example/example/device/multi_core_program_factory.cpp
@@ -0,0 +1,29 @@
+// SPDX-FileCopyrightText: © 2023 Tenstorrent Inc.
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#include "example_device_operation.hpp"
+
+namespace ttnn::operations::example {
+ExampleDeviceOperation::MultiCore::cached_program_t ExampleDeviceOperation::MultiCore::create(
+    const operation_attributes_t& operation_attributes,
+    const tensor_args_t& tensor_args,
+    tensor_return_value_t& tensor_return_value) {
+    using namespace tt;
+    using namespace tt::tt_metal;
+
+    tt_metal::Program program = tt_metal::CreateProgram();
+
+    int some_program_attribute_to_share = 0;
+    int some_other_program_attribute_to_share = 0;
+
+    return {std::move(program), {some_program_attribute_to_share, some_other_program_attribute_to_share}};
+}
+
+void ExampleDeviceOperation::MultiCore::override_runtime_arguments(
+    cached_program_t& cached_program,
+    const operation_attributes_t& operation_attributes,
+    const tensor_args_t& tensor_args,
+    tensor_return_value_t& tensor_return_value) {}
+
+}  // namespace ttnn::operations::example
diff --git a/ttnn/cpp/ttnn/operations/example/example/device/single_core_program_factory.cpp b/ttnn/cpp/ttnn/operations/example/example/device/single_core_program_factory.cpp
new file mode 100644
index 000000000000..31852725f60c
--- /dev/null
+++ b/ttnn/cpp/ttnn/operations/example/example/device/single_core_program_factory.cpp
@@ -0,0 +1,28 @@
+// SPDX-FileCopyrightText: © 2023 Tenstorrent Inc.
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#include "example_device_operation.hpp"
+
+namespace ttnn::operations::example {
+ExampleDeviceOperation::SingleCore::cached_program_t ExampleDeviceOperation::SingleCore::create(
+    const operation_attributes_t& operation_attributes,
+    const tensor_args_t& tensor_args,
+    tensor_return_value_t& tensor_return_value) {
+    using namespace tt;
+    using namespace tt::tt_metal;
+
+    tt_metal::Program program = tt_metal::CreateProgram();
+
+    int some_program_attribute_to_share = 0;
+
+    return {std::move(program), {some_program_attribute_to_share}};
+}
+
+void ExampleDeviceOperation::SingleCore::override_runtime_arguments(
+    cached_program_t& cached_program,
+    const operation_attributes_t& operation_attributes,
+    const tensor_args_t& tensor_args,
+    tensor_return_value_t& tensor_return_value) {}
+
+}  // namespace ttnn::operations::example
diff --git a/ttnn/cpp/ttnn/operations/example/example/example.hpp b/ttnn/cpp/ttnn/operations/example/example/example.hpp
new file mode 100644
index 000000000000..19f7744ec877
--- /dev/null
+++ b/ttnn/cpp/ttnn/operations/example/example/example.hpp
@@ -0,0 +1,35 @@
+
+// SPDX-FileCopyrightText: © 2023 Tenstorrent Inc.
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#pragma once
+
+#include "device/example_device_operation.hpp"
+
+namespace ttnn::operations::example {
+
+struct ExampleOperation {
+
+    static Tensor execute_on_main_thread(
+        uint8_t queue_id,
+        const Tensor &input_tensor) {
+        return ttnn::device_operation::run<ExampleDeviceOperation>(
+            queue_id,
+            ExampleDeviceOperation::operation_attributes_t{},
+            ExampleDeviceOperation::tensor_args_t{input_tensor});
+    }
+
+    static Tensor execute_on_main_thread(
+        const Tensor &input_tensor) {
+        return execute_on_main_thread(0, input_tensor);
+    }
+};
+
+}  // namespace ttnn::operations::binary
+
+namespace ttnn {
+
+constexpr auto example = ttnn::register_operation<operations::example::ExampleOperation>("ttnn::example");
+
+}  // namespace ttnn
diff --git a/ttnn/cpp/ttnn/operations/example/example/example_pybind.hpp b/ttnn/cpp/ttnn/operations/example/example/example_pybind.hpp
new file mode 100644
index 000000000000..94ae3c8dd098
--- /dev/null
+++ b/ttnn/cpp/ttnn/operations/example/example/example_pybind.hpp
@@ -0,0 +1,31 @@
+// SPDX-FileCopyrightText: © 2023 Tenstorrent Inc.
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#pragma once
+
+#include <pybind11/pybind11.h>
+#include <pybind11/stl.h>
+
+#include "ttnn/cpp/pybind11/decorators.hpp"
+#include "ttnn/operations/example/example/example.hpp"
+#include "ttnn/types.hpp"
+
+namespace py = pybind11;
+
+namespace ttnn::operations::example {
+
+void py_module(py::module& module) {
+    bind_registered_operation(
+        module,
+        ttnn::example,
+        R"doc(example(input_tensor: ttnn.Tensor) -> ttnn.Tensor)doc",
+        ttnn::pybind_overload_t{
+            [](const decltype(ttnn::example)& self, const ttnn::Tensor& input_tensor, const uint8_t& queue_id)
+                -> ttnn::Tensor { return self(queue_id, input_tensor); },
+            py::arg("input_tensor"),
+            py::kw_only(),
+            py::arg("queue_id") = 0});
+}
+
+}  // namespace ttnn::operations::example