diff --git a/CMakeLists.txt b/CMakeLists.txt index 083b369517e8..ee649dc7d86b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -96,16 +96,6 @@ if (NOT NUMA_LIBRARY) message(FATAL_ERROR "NUMA library not found") endif() - -CPMAddPackage( - NAME reflect - GITHUB_REPOSITORY boost-ext/reflect - GIT_TAG v1.1.1 -) -add_library(reflect INTERFACE) -target_include_directories(reflect SYSTEM INTERFACE ${reflect_SOURCE_DIR}) -add_library(reflect::reflect ALIAS reflect) - ############################################################################################################################ # Constructing interface libs for common compiler flags, header directories, and libraries # These interface libs are linked with PUBLIC scope at lowest common target (tt_metal/common) and at tt_metal_libs level @@ -142,6 +132,7 @@ endif() add_library(metal_header_directories INTERFACE) target_include_directories(metal_header_directories INTERFACE ${PROJECT_SOURCE_DIR}/tt_metal/hw/inc) +target_include_directories(metal_header_directories SYSTEM INTERFACE ${reflect_SOURCE_DIR}) foreach(lib ${BoostPackages}) target_include_directories(metal_header_directories INTERFACE ${Boost${lib}_SOURCE_DIR}/include) endforeach() diff --git a/cmake/dependencies.cmake b/cmake/dependencies.cmake index 622489e7fdd9..025edeae1b89 100644 --- a/cmake/dependencies.cmake +++ b/cmake/dependencies.cmake @@ -48,3 +48,13 @@ if (googletest_ADDED) target_link_libraries(gtest PRIVATE c++ c++abi) target_link_libraries(gtest_main PRIVATE c++ c++abi) endif() + +############################################################################################################################ +# boost-ext reflect : https://github.com/boost-ext/reflect +############################################################################################################################ + +CPMAddPackage( + NAME reflect + GITHUB_REPOSITORY boost-ext/reflect + GIT_TAG v1.1.1 +) diff --git a/models/demos/t3000/mixtral8x7b/tests/test_mixtral_perf.py b/models/demos/t3000/mixtral8x7b/tests/test_mixtral_perf.py index 3732cb21f0ae..0d9717160338 100644 --- a/models/demos/t3000/mixtral8x7b/tests/test_mixtral_perf.py +++ b/models/demos/t3000/mixtral8x7b/tests/test_mixtral_perf.py @@ -43,10 +43,10 @@ def forward(self, x): @pytest.mark.parametrize( "generation_start_pos, expected_compile_time, expected_inference_time", ( - (32, 150, 0.058), # FIXME: Perf regression (issue #9479) - (128, 150, 0.058), # FIXME: Perf regression (issue #9479) - (1024, 150, 0.058), # FIXME: Perf regression (issue #9479) - (2048, 150, 0.058), # FIXME: Perf regression (issue #9479) + (32, 150, 0.075), + (128, 150, 0.075), + (1024, 150, 0.075), + (2048, 150, 0.075), ), ) def test_mixtral_model_perf( @@ -61,7 +61,7 @@ def test_mixtral_model_perf( # Can use dummy_weights=True correctness is not tested, but it is much slower model_args = TtModelArgs(t3k_device_mesh.get_device(0), dummy_weights=False) - model_args.n_layers = 1 + model_args.n_layers = 32 # Clear global profiler state before starting measurements profiler.clear() diff --git a/tests/scripts/set_up_end_to_end_tests_env.sh b/tests/scripts/set_up_end_to_end_tests_env.sh index 27c4d78d8f7a..9a7e1e6c3869 100755 --- a/tests/scripts/set_up_end_to_end_tests_env.sh +++ b/tests/scripts/set_up_end_to_end_tests_env.sh @@ -21,6 +21,7 @@ set_up_end_to_end_tests_env() { python -m pip install -r requirements.txt python -m pip install ../../metal_libs-*.whl + cd ../../ rm -rf tt_metal tt_eager ttnn models echo "Showing current directory" ls -hal diff --git a/tests/scripts/t3000/run_t3000_model_perf_tests.sh b/tests/scripts/t3000/run_t3000_model_perf_tests.sh index 2cf1dc5dcc4e..6140b9efeafd 100755 --- a/tests/scripts/t3000/run_t3000_model_perf_tests.sh +++ b/tests/scripts/t3000/run_t3000_model_perf_tests.sh @@ -22,7 +22,7 @@ run_t3000_mixtral_tests() { echo "LOG_METAL: Running run_t3000_mixtral_tests" - env pytest models/demos/t3000/mixtral8x7b/tests/test_mixtral_perf.py::test_mixtral_model_perf[wormhole_b0-True-2048-150-0.058] -m "model_perf_t3000" + env pytest models/demos/t3000/mixtral8x7b/tests/test_mixtral_perf.py -m "model_perf_t3000" # Record the end time end_time=$(date +%s) diff --git a/tests/scripts/t3000/run_t3000_unit_tests.sh b/tests/scripts/t3000/run_t3000_unit_tests.sh index ea092261a138..a8019137642b 100755 --- a/tests/scripts/t3000/run_t3000_unit_tests.sh +++ b/tests/scripts/t3000/run_t3000_unit_tests.sh @@ -80,7 +80,6 @@ run_t3000_mixtral_tests() { pytest models/demos/t3000/mixtral8x7b/tests/test_mixtral_embedding.py pytest models/demos/t3000/mixtral8x7b/tests/test_mixtral_moe.py pytest models/demos/t3000/mixtral8x7b/tests/test_mixtral_decoder.py - pytest models/demos/t3000/mixtral8x7b/tests/test_mixtral_model.py::test_mixtral_model_inference[wormhole_b0-True-1-1-pcc] # Record the end time end_time=$(date +%s) @@ -111,7 +110,7 @@ main() { echo "Script is being sourced, not executing main function" return 0 fi - + if [[ -z "$TT_METAL_HOME" ]]; then echo "Must provide TT_METAL_HOME in environment" 1>&2 exit 1 diff --git a/tests/ttnn/unit_tests/gtests/CMakeLists.txt b/tests/ttnn/unit_tests/gtests/CMakeLists.txt index 3bee41905b42..359a0301929b 100644 --- a/tests/ttnn/unit_tests/gtests/CMakeLists.txt +++ b/tests/ttnn/unit_tests/gtests/CMakeLists.txt @@ -9,7 +9,7 @@ set(TTNN_UNIT_TESTS_SRC add_executable(unit_tests_ttnn ${TTNN_UNIT_TESTS_SRC}) -target_link_libraries(unit_tests_ttnn PUBLIC test_common_libs ttnn_lib tt_metal tt_eager reflect::reflect) +target_link_libraries(unit_tests_ttnn PUBLIC test_common_libs ttnn_lib tt_metal tt_eager) target_include_directories(unit_tests_ttnn PRIVATE ${UMD_HOME} ${PROJECT_SOURCE_DIR} diff --git a/tt_eager/tt_dnn/op_library/CMakeLists.txt b/tt_eager/tt_dnn/op_library/CMakeLists.txt index e6be1fe00bc2..6a920b9dc29c 100644 --- a/tt_eager/tt_dnn/op_library/CMakeLists.txt +++ b/tt_eager/tt_dnn/op_library/CMakeLists.txt @@ -220,7 +220,7 @@ set(TT_DNN_SRCS add_library(tt_dnn OBJECT ${TT_DNN_SRCS}) -target_link_libraries(tt_dnn PUBLIC metal_header_directories compiler_flags umd_device reflect::reflect) +target_link_libraries(tt_dnn PUBLIC metal_header_directories compiler_flags umd_device) target_include_directories(tt_dnn PUBLIC ${UMD_HOME} ${PROJECT_SOURCE_DIR} diff --git a/tt_metal/tools/profiler/op_profiler.hpp b/tt_metal/tools/profiler/op_profiler.hpp index 09414d8c5878..79c231ea50f9 100644 --- a/tt_metal/tools/profiler/op_profiler.hpp +++ b/tt_metal/tools/profiler/op_profiler.hpp @@ -5,6 +5,7 @@ #pragma once #include +#include #include #include @@ -274,17 +275,12 @@ inline json get_base_json( j["op_code"] = opName; json attributesObj; - constexpr auto& attribute_names = std::decay_t::attribute_names; - const auto attribute_values = operation_attributes.attribute_values(); - [&attributesObj, &attribute_names, &attribute_values](std::index_sequence) { - ( - [&attributesObj, &attribute_names, &attribute_values] { - const auto& attribute_name = std::get(attribute_names); - const auto& attribute = std::get(attribute_values); - attributesObj[attribute_name] = fmt::format("{}", attribute); - }(), - ...); - }(std::make_index_sequence>>{}); + reflect::for_each( + [&attributesObj, &operation_attributes](auto I) { + attributesObj[std::string{reflect::member_name(operation_attributes)}] = + fmt::format("{}", reflect::get(operation_attributes)); + }, + operation_attributes); j["attributes"] = attributesObj; std::vector input_tensors; diff --git a/tt_metal/tt_stl/reflection.hpp b/tt_metal/tt_stl/reflection.hpp index 3b225fe47d99..1deb133dfa72 100644 --- a/tt_metal/tt_stl/reflection.hpp +++ b/tt_metal/tt_stl/reflection.hpp @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include @@ -16,7 +17,6 @@ #include #include "third_party/magic_enum/magic_enum.hpp" - #include "type_name.hpp" namespace tt { @@ -37,9 +37,7 @@ concept IsVariant = requires { typename std::variant_size::type; }; template constexpr auto get_active_type_name_in_variant(const Variant& v) { - return std::visit([](auto&& arg) -> std::string_view { - return short_type_name>; - }, v); + return std::visit([](auto&& arg) -> std::string_view { return short_type_name>; }, v); } // Forward Declare hash_object @@ -397,46 +395,100 @@ std::ostream& operator<<(std::ostream& os, const std::set& set) { return os; } -template - requires std::same_as, to_visit_t> +template + requires std::same_as, object_t> constexpr auto visit_object_of_type(auto callback, T&& value) { callback(value); } -template +template constexpr auto visit_object_of_type(auto callback, const std::optional& value) { if (value.has_value()) { - visit_object_of_type(callback, value.value()); + visit_object_of_type(callback, value.value()); } } -template +template constexpr auto visit_object_of_type(auto callback, const std::vector& value) { for (auto& tensor : value) { - visit_object_of_type(callback, tensor); + visit_object_of_type(callback, tensor); } } -template +template constexpr auto visit_object_of_type(auto callback, const std::array& value) { for (auto& tensor : value) { - visit_object_of_type(callback, tensor); + visit_object_of_type(callback, tensor); } } -template +template constexpr auto visit_object_of_type(auto callback, const std::tuple& value) { constexpr auto num_attributes = sizeof...(Ts); [&callback, &value](std::index_sequence) { - (visit_object_of_type(callback, std::get(value)), ...); + (visit_object_of_type(callback, std::get(value)), ...); }(std::make_index_sequence{}); } -template - requires(not std::same_as, to_visit_t>) and requires { std::decay_t::attribute_names; } +template + requires(not std::same_as, object_t>) and requires { std::decay_t::attribute_names; } +constexpr auto visit_object_of_type(auto callback, T&& object) { + constexpr auto num_attributes = std::tuple_size_v::attribute_names)>; + visit_object_of_type(callback, object.attribute_values()); +} + +template + requires(not std::same_as, object_t>) and requires { std::is_aggregate_v>; } constexpr auto visit_object_of_type(auto callback, T&& object) { + reflect::for_each( + [&callback, &object](auto I) { visit_object_of_type(callback, reflect::get(object)); }, object); +} + +template + requires std::same_as, object_t> +constexpr auto get_first_object_of_type(T&& value) { + return std::cref(value); +} + +template +constexpr auto get_first_object_of_type(const std::optional& value) { + if (value.has_value()) { + const auto& tensor = value.value(); + return get_first_object_of_type(tensor); + } +} + +template +constexpr auto get_first_object_of_type(const std::vector& value) { + for (auto& tensor : value) { + return get_first_object_of_type(tensor); + } +} + +template +constexpr auto get_first_object_of_type(const std::array& value) { + for (auto& tensor : value) { + return get_first_object_of_type(tensor); + } +} + +template +constexpr auto get_first_object_of_type(const std::tuple& value) { + constexpr auto num_attributes = sizeof...(Ts); + return get_first_object_of_type(std::get<0>(value)); +} + +template + requires (not std::same_as, object_t>) and requires { std::decay_t::attribute_names; } +constexpr auto get_first_object_of_type(T&& object) { constexpr auto num_attributes = std::tuple_size_v::attribute_names)>; - visit_object_of_type(callback, object.attribute_values()); + return get_first_object_of_type(object.attribute_values()); +} + +template + requires (not std::same_as, object_t>) and requires { std::is_aggregate_v>; } +constexpr auto get_first_object_of_type(T&& object) { + return get_first_object_of_type(reflect::get<0>(object)); } } // namespace reflection @@ -694,6 +746,13 @@ inline hash_t hash_object(const T& object) noexcept { } else { return 0; } + } else if constexpr (std::is_aggregate_v) { + if constexpr (DEBUG_HASH_OBJECT_FUNCTION) { + fmt::print("Hashing struct {} using reflect library: {}\n", get_type_name(), object); + } + std::size_t hash = 0; + reflect::for_each([&hash, &object](auto I) { hash = hash_objects(hash, reflect::get(object)); }, object); + return hash; } else { static_assert(tt::stl::concepts::always_false_v, "Type doesn't support std::hash"); } diff --git a/ttnn/CMakeLists.txt b/ttnn/CMakeLists.txt index 6d3b1549c428..c8262b3a3a27 100644 --- a/ttnn/CMakeLists.txt +++ b/ttnn/CMakeLists.txt @@ -17,7 +17,7 @@ set(TTNN_SRCS add_library(ttnn_lib OBJECT ${TTNN_SRCS}) target_compile_options(ttnn_lib PUBLIC -MP -Wno-int-to-pointer-cast -fno-var-tracking) target_link_libraries(ttnn_lib - PUBLIC compiler_flags metal_header_directories metal_common_libs reflect::reflect + PUBLIC compiler_flags metal_header_directories metal_common_libs ) target_include_directories(ttnn_lib PUBLIC ${UMD_HOME} diff --git a/ttnn/cpp/ttnn/device_operation.hpp b/ttnn/cpp/ttnn/device_operation.hpp index 652eb88d8d05..ec9b2a93434d 100644 --- a/ttnn/cpp/ttnn/device_operation.hpp +++ b/ttnn/cpp/ttnn/device_operation.hpp @@ -15,7 +15,6 @@ #include "tt_stl/concepts.hpp" #include "tt_stl/reflection.hpp" #include "tt_stl/unique_any.hpp" -#include namespace ttnn { @@ -96,47 +95,6 @@ template return table[i]; } -template - requires std::same_as, Tensor> -constexpr auto get_first_tensor(T&& value) { - return std::cref(value); -} - -template -constexpr auto get_first_tensor(const std::optional& value) { - if (value.has_value()) { - const auto& tensor = value.value(); - return get_first_tensor(tensor); - } -} - -template -constexpr auto get_first_tensor(const std::vector& value) { - for (auto& tensor : value) { - return get_first_tensor(tensor); - } -} - -template -constexpr auto get_first_tensor(const std::array& value) { - for (auto& tensor : value) { - return get_first_tensor(tensor); - } -} - -template -constexpr auto get_first_tensor(const std::tuple& value) { - constexpr auto num_attributes = sizeof...(Ts); - return get_first_tensor(std::get<0>(value)); -} - -template - requires requires { std::decay_t::attribute_names; } and (not std::same_as, Tensor>) -constexpr auto get_first_tensor(T&& object) { - constexpr auto num_attributes = std::tuple_size_v::attribute_names)>; - return get_first_tensor(object.attribute_values()); -} - inline const auto USE_FAST_DISPATCH = std::getenv("TT_METAL_SLOW_DISPATCH_MODE") == nullptr; template @@ -231,7 +189,8 @@ typename device_operation_t::tensor_return_value_t run( using tensor_return_value_t = typename device_operation_t::tensor_return_value_t; static_assert(not std::same_as, "Operation cannot return type cannot be void"); - auto device = get_first_tensor(tensor_args).get().device(); + // TODO: support the case when tensor args are empty? Or add an overload for that case? + auto device = tt::stl::reflection::get_first_object_of_type(tensor_args).get().device(); auto& program_cache = device->program_cache; auto program_hash = compute_program_hash(operation_attributes, tensor_args); diff --git a/ttnn/cpp/ttnn/operations/eltwise/binary/device/binary_op.hpp b/ttnn/cpp/ttnn/operations/eltwise/binary/device/binary_op.hpp index cc4906f4daae..c45bff9fde35 100644 --- a/ttnn/cpp/ttnn/operations/eltwise/binary/device/binary_op.hpp +++ b/ttnn/cpp/ttnn/operations/eltwise/binary/device/binary_op.hpp @@ -65,29 +65,11 @@ struct Binary { const MemoryConfig memory_config; const DataType dtype; std::optional compute_kernel_config; - - static constexpr auto attribute_names = std::forward_as_tuple( - "binary_op_type", "in_place", "activations", "memory_config", "dtype", "compute_kernel_config"); - const auto attribute_values() const { - return std::forward_as_tuple( - this->binary_op_type, - this->in_place, - this->activations, - this->memory_config, - this->dtype, - this->compute_kernel_config); - } }; struct tensor_args_t { const Tensor& input_tensor_a; const Tensor& input_tensor_b; std::optional output_tensor; - - static constexpr auto attribute_names = - std::forward_as_tuple("input_tensor_a", "input_tensor_b", "output_tensor"); - const auto attribute_values() const { - return std::forward_as_tuple(this->input_tensor_a, this->input_tensor_b, this->output_tensor); - } }; using shape_return_value_t = ttnn::Shape; using tensor_return_value_t = Tensor;