From 6fca68d996a4476136791a70f6bdb5b07276a6b8 Mon Sep 17 00:00:00 2001 From: Artur Gainullin Date: Mon, 28 Oct 2024 10:40:15 -0700 Subject: [PATCH 1/5] Define exp extension --- .../core/EXP-2D-BLOCK-ARRAY-CAPABILITIES.rst | 62 +++++++++++++++++++ .../core/exp-2d-block-array-capabilities.yml | 36 +++++++++++ 2 files changed, 98 insertions(+) create mode 100644 scripts/core/EXP-2D-BLOCK-ARRAY-CAPABILITIES.rst create mode 100644 scripts/core/exp-2d-block-array-capabilities.yml diff --git a/scripts/core/EXP-2D-BLOCK-ARRAY-CAPABILITIES.rst b/scripts/core/EXP-2D-BLOCK-ARRAY-CAPABILITIES.rst new file mode 100644 index 0000000000..765b07cfb3 --- /dev/null +++ b/scripts/core/EXP-2D-BLOCK-ARRAY-CAPABILITIES.rst @@ -0,0 +1,62 @@ +<% + OneApi=tags['$OneApi'] + x=tags['$x'] + X=x.upper() +%> + +.. _experimental-2D-block-array-capabilities: + +================================================================================ +2D Block Array Capabilities +================================================================================ + +.. warning:: + + Experimental features: + + * May be replaced, updated, or removed at any time. + * Do not require maintaining API/ABI stability of their own additions over + time. + * Do not require conformance testing of their own additions. + + +Motivation +-------------------------------------------------------------------------------- +Some Intel GPU devices support 2D block array operations which may be used to optimize applications on Intel GPUs. +This extension provides a device descriptor which allows to query the 2D block array capabilities of a device. + +API +-------------------------------------------------------------------------------- + +Enums +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +* ${x}_device_info_t + * ${X}_DEVICE_INFO_2D_BLOCK_ARRAY_CAPABILITIES_EXP + +* ${x}_exp_device_2d_block_array_capability_flags_t + * ${X}_EXP_DEVICE_2D_BLOCK_ARRAY_CAPABILITY_FLAG_LOAD + * ${X}_EXP_DEVICE_2D_BLOCK_ARRAY_CAPABILITY_FLAG_STORE + +Changelog +-------------------------------------------------------------------------------- + ++-----------+------------------------+ +| Revision | Changes | ++===========+========================+ +| 1.0 | Initial Draft | ++-----------+------------------------+ + + +Support +-------------------------------------------------------------------------------- + +Adapters which support this experimental feature *must* return ${X}_RESULT_SUCCESS from +the ${x}DeviceGetInfo call with the new ${X}_DEVICE_INFO_2D_BLOCK_ARRAY_CAPABILITIES_EXP +device descriptor. + + +Contributors +-------------------------------------------------------------------------------- + +* Artur Gainullin `artur.gainullin@intel.com `_ diff --git a/scripts/core/exp-2d-block-array-capabilities.yml b/scripts/core/exp-2d-block-array-capabilities.yml new file mode 100644 index 0000000000..ec62c4bb96 --- /dev/null +++ b/scripts/core/exp-2d-block-array-capabilities.yml @@ -0,0 +1,36 @@ +# +# Copyright (C) 2024 Intel Corporation +# +# Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. +# See LICENSE.TXT +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +# +# See YaML.md for syntax definition +# +--- #-------------------------------------------------------------------------- +type: header +desc: "Intel $OneApi Unified Runtime Experimental device descriptor for querying Intel device 2D block array capabilities" +ordinal: "99" +--- #-------------------------------------------------------------------------- +type: enum +extend: true +typed_etors: true +desc: "Extension enum to $x_device_info_t to query Intel device 2D block array capabilities." +name: $x_device_info_t +etors: + - name: 2D_BLOCK_ARRAY_CAPABILITIES_EXP + value: "0x2022" + desc: "[$x_exp_device_2d_block_array_capability_flags_t] return a bit-field of Intel GPU 2D block array capabilities" +--- #-------------------------------------------------------------------------- +type: enum +desc: "Intel GPU 2D block array capabilities" +class: $xDevice +name: $x_exp_device_2d_block_array_capability_flags_t +etors: + - name: LOAD + desc: "Load instructions are supported" + value: "$X_BIT(0)" + - name: STORE + desc: "Store instructions are supported" + value: "$X_BIT(1)" + From 42c2b660a618a86f2fc5b10096dc1fbb5129468e Mon Sep 17 00:00:00 2001 From: "Gainullin, Artur" Date: Tue, 19 Nov 2024 14:11:06 -0800 Subject: [PATCH 2/5] Genereate sources --- include/ur_api.h | 25 +++++- include/ur_print.h | 8 ++ include/ur_print.hpp | 78 +++++++++++++++++++ source/loader/layers/validation/ur_valddi.cpp | 2 +- source/loader/loader.def.in | 1 + source/loader/loader.map.in | 1 + source/loader/ur_libapi.cpp | 2 +- source/loader/ur_print.cpp | 8 ++ source/ur_api.cpp | 2 +- tools/urinfo/urinfo.hpp | 3 + 10 files changed, 126 insertions(+), 4 deletions(-) diff --git a/include/ur_api.h b/include/ur_api.h index 3205fcb207..eb8b07221c 100644 --- a/include/ur_api.h +++ b/include/ur_api.h @@ -1705,6 +1705,8 @@ typedef enum ur_device_info_t { UR_DEVICE_INFO_ENQUEUE_NATIVE_COMMAND_SUPPORT_EXP = 0x2020, ///< [::ur_bool_t] returns true if the device supports enqueueing of native ///< work UR_DEVICE_INFO_LOW_POWER_EVENTS_EXP = 0x2021, ///< [::ur_bool_t] returns true if the device supports low-power events. + UR_DEVICE_INFO_2D_BLOCK_ARRAY_CAPABILITIES_EXP = 0x2022, ///< [::ur_exp_device_2d_block_array_capability_flags_t] return a bit-field + ///< of Intel GPU 2D block array capabilities /// @cond UR_DEVICE_INFO_FORCE_UINT32 = 0x7fffffff /// @endcond @@ -1730,7 +1732,7 @@ typedef enum ur_device_info_t { /// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE /// + `NULL == hDevice` /// - ::UR_RESULT_ERROR_INVALID_ENUMERATION -/// + `::UR_DEVICE_INFO_LOW_POWER_EVENTS_EXP < propName` +/// + `::UR_DEVICE_INFO_2D_BLOCK_ARRAY_CAPABILITIES_EXP < propName` /// - ::UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION /// + If `propName` is not supported by the adapter. /// - ::UR_RESULT_ERROR_INVALID_SIZE @@ -7428,6 +7430,27 @@ urEnqueueWriteHostPipe( ///< an element of the phEventWaitList array. ); +#if !defined(__GNUC__) +#pragma endregion +#endif +// Intel 'oneAPI' Unified Runtime Experimental device descriptor for querying Intel device 2D block array capabilities +#if !defined(__GNUC__) +#pragma region 2d_block_array_capabilities_(experimental) +#endif +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intel GPU 2D block array capabilities +typedef uint32_t ur_exp_device_2d_block_array_capability_flags_t; +typedef enum ur_exp_device_2d_block_array_capability_flag_t { + UR_EXP_DEVICE_2D_BLOCK_ARRAY_CAPABILITY_FLAG_LOAD = UR_BIT(0), ///< Load instructions are supported + UR_EXP_DEVICE_2D_BLOCK_ARRAY_CAPABILITY_FLAG_STORE = UR_BIT(1), ///< Store instructions are supported + /// @cond + UR_EXP_DEVICE_2D_BLOCK_ARRAY_CAPABILITY_FLAG_FORCE_UINT32 = 0x7fffffff + /// @endcond + +} ur_exp_device_2d_block_array_capability_flag_t; +/// @brief Bit Mask for validating ur_exp_device_2d_block_array_capability_flags_t +#define UR_EXP_DEVICE_2D_BLOCK_ARRAY_CAPABILITY_FLAGS_MASK 0xfffffffc + #if !defined(__GNUC__) #pragma endregion #endif diff --git a/include/ur_print.h b/include/ur_print.h index 93597d232f..c2adb18067 100644 --- a/include/ur_print.h +++ b/include/ur_print.h @@ -874,6 +874,14 @@ UR_APIEXPORT ur_result_t UR_APICALL urPrintMapFlags(enum ur_map_flag_t value, ch /// - `buff_size < out_size` UR_APIEXPORT ur_result_t UR_APICALL urPrintUsmMigrationFlags(enum ur_usm_migration_flag_t value, char *buffer, const size_t buff_size, size_t *out_size); +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print ur_exp_device_2d_block_array_capability_flag_t enum +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_INVALID_SIZE +/// - `buff_size < out_size` +UR_APIEXPORT ur_result_t UR_APICALL urPrintExpDevice_2dBlockArrayCapabilityFlags(enum ur_exp_device_2d_block_array_capability_flag_t value, char *buffer, const size_t buff_size, size_t *out_size); + /////////////////////////////////////////////////////////////////////////////// /// @brief Print ur_exp_image_copy_flag_t enum /// @returns diff --git a/include/ur_print.hpp b/include/ur_print.hpp index 1d28b8eac0..8888a74f91 100644 --- a/include/ur_print.hpp +++ b/include/ur_print.hpp @@ -194,6 +194,9 @@ inline ur_result_t printFlag(std::ostream &os, uint32_t flag); template <> inline ur_result_t printFlag(std::ostream &os, uint32_t flag); +template <> +inline ur_result_t printFlag(std::ostream &os, uint32_t flag); + template <> inline ur_result_t printFlag(std::ostream &os, uint32_t flag); @@ -328,6 +331,7 @@ inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct inline std::ostream &operator<<(std::ostream &os, enum ur_execution_info_t value); inline std::ostream &operator<<(std::ostream &os, enum ur_map_flag_t value); inline std::ostream &operator<<(std::ostream &os, enum ur_usm_migration_flag_t value); +inline std::ostream &operator<<(std::ostream &os, enum ur_exp_device_2d_block_array_capability_flag_t value); inline std::ostream &operator<<(std::ostream &os, enum ur_exp_image_copy_flag_t value); inline std::ostream &operator<<(std::ostream &os, enum ur_exp_sampler_cubemap_filter_mode_t value); inline std::ostream &operator<<(std::ostream &os, enum ur_exp_external_mem_type_t value); @@ -2665,6 +2669,9 @@ inline std::ostream &operator<<(std::ostream &os, enum ur_device_info_t value) { case UR_DEVICE_INFO_LOW_POWER_EVENTS_EXP: os << "UR_DEVICE_INFO_LOW_POWER_EVENTS_EXP"; break; + case UR_DEVICE_INFO_2D_BLOCK_ARRAY_CAPABILITIES_EXP: + os << "UR_DEVICE_INFO_2D_BLOCK_ARRAY_CAPABILITIES_EXP"; + break; default: os << "unknown enumerator"; break; @@ -4472,6 +4479,19 @@ inline ur_result_t printTagged(std::ostream &os, const void *ptr, ur_device_info os << ")"; } break; + case UR_DEVICE_INFO_2D_BLOCK_ARRAY_CAPABILITIES_EXP: { + const ur_exp_device_2d_block_array_capability_flags_t *tptr = (const ur_exp_device_2d_block_array_capability_flags_t *)ptr; + if (sizeof(ur_exp_device_2d_block_array_capability_flags_t) > size) { + os << "invalid size (is: " << size << ", expected: >=" << sizeof(ur_exp_device_2d_block_array_capability_flags_t) << ")"; + return UR_RESULT_ERROR_INVALID_SIZE; + } + os << (const void *)(tptr) << " ("; + + ur::details::printFlag(os, + *tptr); + + os << ")"; + } break; default: os << "unknown enumerator"; return UR_RESULT_ERROR_INVALID_ENUMERATION; @@ -9455,6 +9475,64 @@ inline ur_result_t printFlag(std::ostream &os, uint32_t } } // namespace ur::details /////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_exp_device_2d_block_array_capability_flag_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, enum ur_exp_device_2d_block_array_capability_flag_t value) { + switch (value) { + case UR_EXP_DEVICE_2D_BLOCK_ARRAY_CAPABILITY_FLAG_LOAD: + os << "UR_EXP_DEVICE_2D_BLOCK_ARRAY_CAPABILITY_FLAG_LOAD"; + break; + case UR_EXP_DEVICE_2D_BLOCK_ARRAY_CAPABILITY_FLAG_STORE: + os << "UR_EXP_DEVICE_2D_BLOCK_ARRAY_CAPABILITY_FLAG_STORE"; + break; + default: + os << "unknown enumerator"; + break; + } + return os; +} + +namespace ur::details { +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print ur_exp_device_2d_block_array_capability_flag_t flag +template <> +inline ur_result_t printFlag(std::ostream &os, uint32_t flag) { + uint32_t val = flag; + bool first = true; + + if ((val & UR_EXP_DEVICE_2D_BLOCK_ARRAY_CAPABILITY_FLAG_LOAD) == (uint32_t)UR_EXP_DEVICE_2D_BLOCK_ARRAY_CAPABILITY_FLAG_LOAD) { + val ^= (uint32_t)UR_EXP_DEVICE_2D_BLOCK_ARRAY_CAPABILITY_FLAG_LOAD; + if (!first) { + os << " | "; + } else { + first = false; + } + os << UR_EXP_DEVICE_2D_BLOCK_ARRAY_CAPABILITY_FLAG_LOAD; + } + + if ((val & UR_EXP_DEVICE_2D_BLOCK_ARRAY_CAPABILITY_FLAG_STORE) == (uint32_t)UR_EXP_DEVICE_2D_BLOCK_ARRAY_CAPABILITY_FLAG_STORE) { + val ^= (uint32_t)UR_EXP_DEVICE_2D_BLOCK_ARRAY_CAPABILITY_FLAG_STORE; + if (!first) { + os << " | "; + } else { + first = false; + } + os << UR_EXP_DEVICE_2D_BLOCK_ARRAY_CAPABILITY_FLAG_STORE; + } + if (val != 0) { + std::bitset<32> bits(val); + if (!first) { + os << " | "; + } + os << "unknown bit flags " << bits; + } else if (first) { + os << "0"; + } + return UR_RESULT_SUCCESS; +} +} // namespace ur::details +/////////////////////////////////////////////////////////////////////////////// /// @brief Print operator for the ur_exp_image_copy_flag_t type /// @returns /// std::ostream & diff --git a/source/loader/layers/validation/ur_valddi.cpp b/source/loader/layers/validation/ur_valddi.cpp index 195c1d3c69..b3969de10f 100644 --- a/source/loader/layers/validation/ur_valddi.cpp +++ b/source/loader/layers/validation/ur_valddi.cpp @@ -518,7 +518,7 @@ __urdlllocal ur_result_t UR_APICALL urDeviceGetInfo( return UR_RESULT_ERROR_INVALID_NULL_POINTER; } - if (UR_DEVICE_INFO_LOW_POWER_EVENTS_EXP < propName) { + if (UR_DEVICE_INFO_2D_BLOCK_ARRAY_CAPABILITIES_EXP < propName) { return UR_RESULT_ERROR_INVALID_ENUMERATION; } diff --git a/source/loader/loader.def.in b/source/loader/loader.def.in index b5c3bde6ea..a336da153d 100644 --- a/source/loader/loader.def.in +++ b/source/loader/loader.def.in @@ -312,6 +312,7 @@ EXPORTS urPrintExpCommandBufferUpdateMemobjArgDesc urPrintExpCommandBufferUpdatePointerArgDesc urPrintExpCommandBufferUpdateValueArgDesc + urPrintExpDevice_2dBlockArrayCapabilityFlags urPrintExpEnqueueExtFlags urPrintExpEnqueueExtProperties urPrintExpEnqueueNativeCommandFlags diff --git a/source/loader/loader.map.in b/source/loader/loader.map.in index 778a5da065..59a8a8d107 100644 --- a/source/loader/loader.map.in +++ b/source/loader/loader.map.in @@ -312,6 +312,7 @@ urPrintExpCommandBufferUpdateMemobjArgDesc; urPrintExpCommandBufferUpdatePointerArgDesc; urPrintExpCommandBufferUpdateValueArgDesc; + urPrintExpDevice_2dBlockArrayCapabilityFlags; urPrintExpEnqueueExtFlags; urPrintExpEnqueueExtProperties; urPrintExpEnqueueNativeCommandFlags; diff --git a/source/loader/ur_libapi.cpp b/source/loader/ur_libapi.cpp index 8dca26d4ba..3340363737 100644 --- a/source/loader/ur_libapi.cpp +++ b/source/loader/ur_libapi.cpp @@ -870,7 +870,7 @@ ur_result_t UR_APICALL urDeviceGetSelected( /// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE /// + `NULL == hDevice` /// - ::UR_RESULT_ERROR_INVALID_ENUMERATION -/// + `::UR_DEVICE_INFO_LOW_POWER_EVENTS_EXP < propName` +/// + `::UR_DEVICE_INFO_2D_BLOCK_ARRAY_CAPABILITIES_EXP < propName` /// - ::UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION /// + If `propName` is not supported by the adapter. /// - ::UR_RESULT_ERROR_INVALID_SIZE diff --git a/source/loader/ur_print.cpp b/source/loader/ur_print.cpp index d8206edb3f..6b1cbfd5ee 100644 --- a/source/loader/ur_print.cpp +++ b/source/loader/ur_print.cpp @@ -879,6 +879,14 @@ ur_result_t urPrintUsmMigrationFlags(enum ur_usm_migration_flag_t value, return str_copy(&ss, buffer, buff_size, out_size); } +ur_result_t urPrintExpDevice_2dBlockArrayCapabilityFlags( + enum ur_exp_device_2d_block_array_capability_flag_t value, char *buffer, + const size_t buff_size, size_t *out_size) { + std::stringstream ss; + ss << value; + return str_copy(&ss, buffer, buff_size, out_size); +} + ur_result_t urPrintExpImageCopyFlags(enum ur_exp_image_copy_flag_t value, char *buffer, const size_t buff_size, size_t *out_size) { diff --git a/source/ur_api.cpp b/source/ur_api.cpp index 22c76f122e..853d61472e 100644 --- a/source/ur_api.cpp +++ b/source/ur_api.cpp @@ -771,7 +771,7 @@ ur_result_t UR_APICALL urDeviceGetSelected( /// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE /// + `NULL == hDevice` /// - ::UR_RESULT_ERROR_INVALID_ENUMERATION -/// + `::UR_DEVICE_INFO_LOW_POWER_EVENTS_EXP < propName` +/// + `::UR_DEVICE_INFO_2D_BLOCK_ARRAY_CAPABILITIES_EXP < propName` /// - ::UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION /// + If `propName` is not supported by the adapter. /// - ::UR_RESULT_ERROR_INVALID_SIZE diff --git a/tools/urinfo/urinfo.hpp b/tools/urinfo/urinfo.hpp index ee7fe52834..37c7a80328 100644 --- a/tools/urinfo/urinfo.hpp +++ b/tools/urinfo/urinfo.hpp @@ -420,5 +420,8 @@ inline void printDeviceInfos(ur_device_handle_t hDevice, hDevice, UR_DEVICE_INFO_ENQUEUE_NATIVE_COMMAND_SUPPORT_EXP); std::cout << prefix; printDeviceInfo(hDevice, UR_DEVICE_INFO_LOW_POWER_EVENTS_EXP); + std::cout << prefix; + printDeviceInfo( + hDevice, UR_DEVICE_INFO_2D_BLOCK_ARRAY_CAPABILITIES_EXP); } } // namespace urinfo From 66025f0d1ed3d0be5d76948a33c37a6cc4b5e282 Mon Sep 17 00:00:00 2001 From: Artur Gainullin Date: Thu, 24 Oct 2024 14:48:12 -0700 Subject: [PATCH 3/5] Add implementation --- cmake/FetchLevelZero.cmake | 27 +++++++++++++++- source/adapters/cuda/device.cpp | 1 + source/adapters/hip/device.cpp | 1 + source/adapters/level_zero/CMakeLists.txt | 2 ++ source/adapters/level_zero/common.cpp | 9 ++++++ source/adapters/level_zero/device.cpp | 35 +++++++++++++++++++++ source/adapters/level_zero/device.hpp | 10 ++++-- source/adapters/native_cpu/device.cpp | 2 ++ source/adapters/opencl/device.cpp | 3 +- test/adapters/level_zero/CMakeLists.txt | 1 + test/adapters/level_zero/v2/CMakeLists.txt | 1 + test/conformance/device/urDeviceGetInfo.cpp | 7 +++-- 12 files changed, 92 insertions(+), 7 deletions(-) diff --git a/cmake/FetchLevelZero.cmake b/cmake/FetchLevelZero.cmake index 6d108c8a6f..3bc745f3d0 100644 --- a/cmake/FetchLevelZero.cmake +++ b/cmake/FetchLevelZero.cmake @@ -7,6 +7,8 @@ set(UR_LEVEL_ZERO_LOADER_LIBRARY "" CACHE FILEPATH "Path of the Level Zero Loade set(UR_LEVEL_ZERO_INCLUDE_DIR "" CACHE FILEPATH "Directory containing the Level Zero Headers") set(UR_LEVEL_ZERO_LOADER_REPO "" CACHE STRING "Github repo to get the Level Zero loader sources from") set(UR_LEVEL_ZERO_LOADER_TAG "" CACHE STRING " GIT tag of the Level Loader taken from github repo") +set(UR_COMPUTE_RUNTIME_REPO "" CACHE STRING "Github repo to get the compute runtime sources from") +set(UR_COMPUTE_RUNTIME_TAG "" CACHE STRING " GIT tag of the compute runtime taken from github repo") # Copy Level Zero loader/headers locally to the build to avoid leaking their path. set(LEVEL_ZERO_COPY_DIR ${CMAKE_CURRENT_BINARY_DIR}/level_zero_loader) @@ -87,8 +89,31 @@ target_link_libraries(LevelZeroLoader INTERFACE "${LEVEL_ZERO_LIB_NAME}" ) +file(GLOB LEVEL_ZERO_LOADER_API_HEADERS "${LEVEL_ZERO_INCLUDE_DIR}/*.h") +file(COPY ${LEVEL_ZERO_LOADER_API_HEADERS} DESTINATION ${LEVEL_ZERO_INCLUDE_DIR}/level_zero) add_library(LevelZeroLoader-Headers INTERFACE) target_include_directories(LevelZeroLoader-Headers - INTERFACE "$" + INTERFACE "$" + "$" +) + +if (UR_COMPUTE_RUNTIME_REPO STREQUAL "") +set(UR_COMPUTE_RUNTIME_REPO "https://github.com/intel/compute-runtime.git") +endif() +if (UR_COMPUTE_RUNTIME_TAG STREQUAL "") +set(UR_COMPUTE_RUNTIME_TAG 24.39.31294.12) +endif() +include(FetchContent) +# Sparse fetch only the dir with level zero headers to avoid pulling in the entire compute-runtime. +FetchContentSparse_Declare(compute-runtime-level-zero-headers ${UR_COMPUTE_RUNTIME_REPO} "${UR_COMPUTE_RUNTIME_TAG}" "level_zero/include") +FetchContent_GetProperties(compute-runtime-level-zero-headers) +if(NOT compute-runtime-level-zero-headers_POPULATED) + FetchContent_Populate(compute-runtime-level-zero-headers) +endif() +add_library(ComputeRuntimeLevelZero-Headers INTERFACE) +set(COMPUTE_RUNTIME_LEVEL_ZERO_INCLUDE "${compute-runtime-level-zero-headers_SOURCE_DIR}/../..") +message(STATUS "Level Zero Adapter: Using Level Zero headers from ${COMPUTE_RUNTIME_LEVEL_ZERO_INCLUDE}") +target_include_directories(ComputeRuntimeLevelZero-Headers + INTERFACE "$" "$" ) diff --git a/source/adapters/cuda/device.cpp b/source/adapters/cuda/device.cpp index cb6b757dd3..a6bdc64f41 100644 --- a/source/adapters/cuda/device.cpp +++ b/source/adapters/cuda/device.cpp @@ -1087,6 +1087,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, case UR_DEVICE_INFO_GPU_SUBSLICES_PER_SLICE: case UR_DEVICE_INFO_GPU_EU_COUNT_PER_SUBSLICE: case UR_DEVICE_INFO_GPU_HW_THREADS_PER_EU: + case UR_DEVICE_INFO_2D_BLOCK_ARRAY_CAPABILITIES_EXP: return UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION; case UR_DEVICE_INFO_COMMAND_BUFFER_SUPPORT_EXP: diff --git a/source/adapters/hip/device.cpp b/source/adapters/hip/device.cpp index 5271f73709..8601e8b914 100644 --- a/source/adapters/hip/device.cpp +++ b/source/adapters/hip/device.cpp @@ -904,6 +904,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, case UR_DEVICE_INFO_BFLOAT16: case UR_DEVICE_INFO_IL_VERSION: case UR_DEVICE_INFO_ASYNC_BARRIER: + case UR_DEVICE_INFO_2D_BLOCK_ARRAY_CAPABILITIES_EXP: return UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION; case UR_DEVICE_INFO_COMMAND_BUFFER_SUPPORT_EXP: { int DriverVersion = 0; diff --git a/source/adapters/level_zero/CMakeLists.txt b/source/adapters/level_zero/CMakeLists.txt index 6465ebaa51..8306436097 100644 --- a/source/adapters/level_zero/CMakeLists.txt +++ b/source/adapters/level_zero/CMakeLists.txt @@ -109,6 +109,7 @@ if(UR_BUILD_ADAPTER_L0) ${PROJECT_NAME}::umf LevelZeroLoader LevelZeroLoader-Headers + ComputeRuntimeLevelZero-Headers ) target_include_directories(ur_adapter_level_zero PRIVATE @@ -203,6 +204,7 @@ if(UR_BUILD_ADAPTER_L0_V2) ${PROJECT_NAME}::umf LevelZeroLoader LevelZeroLoader-Headers + ComputeRuntimeLevelZero-Headers ) target_include_directories(ur_adapter_level_zero_v2 PRIVATE diff --git a/source/adapters/level_zero/common.cpp b/source/adapters/level_zero/common.cpp index f5d8b20014..da7f624013 100644 --- a/source/adapters/level_zero/common.cpp +++ b/source/adapters/level_zero/common.cpp @@ -11,6 +11,7 @@ #include "common.hpp" #include "logger/ur_logger.hpp" #include "usm.hpp" +#include ur_result_t ze2urResult(ze_result_t ZeResult) { if (ZeResult == ZE_RESULT_SUCCESS) @@ -330,6 +331,14 @@ template <> zes_structure_type_t getZesStructureType() { return ZES_STRUCTURE_TYPE_MEM_PROPERTIES; } +#ifdef ZE_INTEL_DEVICE_BLOCK_ARRAY_EXP_NAME +template <> +ze_structure_type_t +getZeStructureType() { + return ZE_INTEL_DEVICE_BLOCK_ARRAY_EXP_PROPERTIES; +} +#endif // ZE_INTEL_DEVICE_BLOCK_ARRAY_EXP_NAME + // Global variables for ZER_EXT_RESULT_ADAPTER_SPECIFIC_ERROR thread_local ur_result_t ErrorMessageCode = UR_RESULT_SUCCESS; thread_local char ErrorMessage[MaxMessageSize]; diff --git a/source/adapters/level_zero/device.cpp b/source/adapters/level_zero/device.cpp index 865edebc08..add3448e91 100644 --- a/source/adapters/level_zero/device.cpp +++ b/source/adapters/level_zero/device.cpp @@ -1153,6 +1153,30 @@ ur_result_t urDeviceGetInfo( return ReturnValue(true); case UR_DEVICE_INFO_LOW_POWER_EVENTS_EXP: return ReturnValue(false); + case UR_DEVICE_INFO_2D_BLOCK_ARRAY_CAPABILITIES_EXP: { +#ifdef ZE_INTEL_DEVICE_BLOCK_ARRAY_EXP_NAME + const auto ZeDeviceBlockArrayFlags = + Device->ZeDeviceBlockArrayProperties->flags; + + auto supportsFlags = + [&](ze_intel_device_block_array_exp_flags_t RequiredFlags) { + return (ZeDeviceBlockArrayFlags & RequiredFlags) == RequiredFlags; + }; + + ur_exp_device_2d_block_array_capability_flags_t BlockArrayCapabilities = 0; + if (supportsFlags(ZE_INTEL_DEVICE_EXP_FLAG_2D_BLOCK_LOAD)) { + BlockArrayCapabilities |= + UR_EXP_DEVICE_2D_BLOCK_ARRAY_CAPABILITY_FLAG_LOAD; + } + if (supportsFlags(ZE_INTEL_DEVICE_EXP_FLAG_2D_BLOCK_STORE)) { + BlockArrayCapabilities |= + UR_EXP_DEVICE_2D_BLOCK_ARRAY_CAPABILITY_FLAG_STORE; + } + return ReturnValue(BlockArrayCapabilities); +#else + return UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION; +#endif + } default: logger::error("Unsupported ParamName in urGetDeviceInfo"); logger::error("ParamNameParamName={}(0x{})", ParamName, @@ -1584,6 +1608,17 @@ ur_result_t ur_device_handle_t_::initialize(int SubSubDeviceOrdinal, ZE_CALL_NOCHECK(zeDeviceGetProperties, (ZeDevice, &P)); }; +#ifdef ZE_INTEL_DEVICE_BLOCK_ARRAY_EXP_NAME + ZeDeviceBlockArrayProperties.Compute = + [ZeDevice]( + ZeStruct &Properties) { + ze_device_properties_t P; + P.stype = ZE_STRUCTURE_TYPE_DEVICE_PROPERTIES; + P.pNext = &Properties; + ZE_CALL_NOCHECK(zeDeviceGetProperties, (ZeDevice, &P)); + }; +#endif // ZE_INTEL_DEVICE_BLOCK_ARRAY_EXP_NAME + ImmCommandListUsed = this->useImmediateCommandLists(); uint32_t numQueueGroups = 0; diff --git a/source/adapters/level_zero/device.hpp b/source/adapters/level_zero/device.hpp index 3d78a99b97..512a5ff714 100644 --- a/source/adapters/level_zero/device.hpp +++ b/source/adapters/level_zero/device.hpp @@ -18,14 +18,14 @@ #include #include +#include "adapters/level_zero/platform.hpp" +#include "common.hpp" +#include #include #include #include #include -#include "adapters/level_zero/platform.hpp" -#include "common.hpp" - enum EventsScope { // All events are created host-visible. AllHostVisible, @@ -224,6 +224,10 @@ struct ur_device_handle_t_ : _ur_object { ZeCache ZeGlobalMemSize; ZeCache> ZeDeviceMutableCmdListsProperties; +#ifdef ZE_INTEL_DEVICE_BLOCK_ARRAY_EXP_NAME + ZeCache> + ZeDeviceBlockArrayProperties; +#endif // ZE_INTEL_DEVICE_BLOCK_ARRAY_EXP_NAME // Map device bindless image offset to corresponding host image handle. std::unordered_map diff --git a/source/adapters/native_cpu/device.cpp b/source/adapters/native_cpu/device.cpp index b7c454315f..036e002d21 100644 --- a/source/adapters/native_cpu/device.cpp +++ b/source/adapters/native_cpu/device.cpp @@ -366,6 +366,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, case UR_DEVICE_INFO_MEMORY_CLOCK_RATE: case UR_DEVICE_INFO_MEMORY_BUS_WIDTH: return UR_RESULT_ERROR_INVALID_VALUE; + case UR_DEVICE_INFO_2D_BLOCK_ARRAY_CAPABILITIES_EXP: + return UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION; case UR_DEVICE_INFO_ATOMIC_FENCE_ORDER_CAPABILITIES: { // Currently for Native CPU fences are implemented using OCK // builtins, so we have different capabilities than atomic operations diff --git a/source/adapters/opencl/device.cpp b/source/adapters/opencl/device.cpp index 70559eb52e..ce3ade309e 100644 --- a/source/adapters/opencl/device.cpp +++ b/source/adapters/opencl/device.cpp @@ -1093,7 +1093,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, case UR_DEVICE_INFO_GLOBAL_MEM_FREE: case UR_DEVICE_INFO_MEMORY_CLOCK_RATE: case UR_DEVICE_INFO_MEMORY_BUS_WIDTH: - case UR_DEVICE_INFO_ASYNC_BARRIER: { + case UR_DEVICE_INFO_ASYNC_BARRIER: + case UR_DEVICE_INFO_2D_BLOCK_ARRAY_CAPABILITIES_EXP: { return UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION; } diff --git a/test/adapters/level_zero/CMakeLists.txt b/test/adapters/level_zero/CMakeLists.txt index bfb02d37c2..8fe062b38b 100644 --- a/test/adapters/level_zero/CMakeLists.txt +++ b/test/adapters/level_zero/CMakeLists.txt @@ -29,6 +29,7 @@ if(UR_BUILD_ADAPTER_L0) target_link_libraries(test-adapter-level_zero PRIVATE LevelZeroLoader LevelZeroLoader-Headers + ComputeRuntimeLevelZero-Headers ) target_include_directories(test-adapter-level_zero PRIVATE diff --git a/test/adapters/level_zero/v2/CMakeLists.txt b/test/adapters/level_zero/v2/CMakeLists.txt index f53cf15256..df6b43c443 100644 --- a/test/adapters/level_zero/v2/CMakeLists.txt +++ b/test/adapters/level_zero/v2/CMakeLists.txt @@ -25,6 +25,7 @@ function(add_unittest name) ${PROJECT_NAME}::umf LevelZeroLoader LevelZeroLoader-Headers + ComputeRuntimeLevelZero-Headers ) endfunction() diff --git a/test/conformance/device/urDeviceGetInfo.cpp b/test/conformance/device/urDeviceGetInfo.cpp index e41cff97ed..23a2f7f237 100644 --- a/test/conformance/device/urDeviceGetInfo.cpp +++ b/test/conformance/device/urDeviceGetInfo.cpp @@ -115,7 +115,9 @@ static std::unordered_map device_info_size_map = { {UR_DEVICE_INFO_MAX_REGISTERS_PER_WORK_GROUP, sizeof(uint32_t)}, {UR_DEVICE_INFO_COMPONENT_DEVICES, sizeof(uint32_t)}, {UR_DEVICE_INFO_COMPOSITE_DEVICE, sizeof(ur_device_handle_t)}, - {UR_DEVICE_INFO_USM_POOL_SUPPORT, sizeof(ur_bool_t)}}; + {UR_DEVICE_INFO_USM_POOL_SUPPORT, sizeof(ur_bool_t)}, + {UR_DEVICE_INFO_2D_BLOCK_ARRAY_CAPABILITIES_EXP, + sizeof(ur_exp_device_2d_block_array_capability_flags_t)}}; struct urDeviceGetInfoTest : uur::urAllDevicesTest, ::testing::WithParamInterface { @@ -237,7 +239,8 @@ INSTANTIATE_TEST_SUITE_P( UR_DEVICE_INFO_MAX_REGISTERS_PER_WORK_GROUP, // UR_DEVICE_INFO_VIRTUAL_MEMORY_SUPPORT, // UR_DEVICE_INFO_KERNEL_SET_SPECIALIZATION_CONSTANTS, // - UR_DEVICE_INFO_USM_POOL_SUPPORT // + UR_DEVICE_INFO_USM_POOL_SUPPORT, // + UR_DEVICE_INFO_2D_BLOCK_ARRAY_CAPABILITIES_EXP // ), [](const ::testing::TestParamInfo &info) { std::stringstream ss; From a1a3a43ec562470d4c0bd347f9047c2a2320ed23 Mon Sep 17 00:00:00 2001 From: "Gainullin, Artur" Date: Thu, 21 Nov 2024 10:00:15 -0800 Subject: [PATCH 4/5] OpenCL adapter implementation and update other adapters --- source/adapters/cuda/device.cpp | 5 +++-- source/adapters/hip/device.cpp | 4 +++- source/adapters/native_cpu/device.cpp | 3 ++- source/adapters/opencl/device.cpp | 14 ++++++++++++-- 4 files changed, 20 insertions(+), 6 deletions(-) diff --git a/source/adapters/cuda/device.cpp b/source/adapters/cuda/device.cpp index a6bdc64f41..d8916ccedd 100644 --- a/source/adapters/cuda/device.cpp +++ b/source/adapters/cuda/device.cpp @@ -1087,9 +1087,10 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, case UR_DEVICE_INFO_GPU_SUBSLICES_PER_SLICE: case UR_DEVICE_INFO_GPU_EU_COUNT_PER_SUBSLICE: case UR_DEVICE_INFO_GPU_HW_THREADS_PER_EU: - case UR_DEVICE_INFO_2D_BLOCK_ARRAY_CAPABILITIES_EXP: return UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION; - + case UR_DEVICE_INFO_2D_BLOCK_ARRAY_CAPABILITIES_EXP: + return ReturnValue( + static_cast(0)); case UR_DEVICE_INFO_COMMAND_BUFFER_SUPPORT_EXP: case UR_DEVICE_INFO_COMMAND_BUFFER_EVENT_SUPPORT_EXP: return ReturnValue(true); diff --git a/source/adapters/hip/device.cpp b/source/adapters/hip/device.cpp index 8601e8b914..eed6a1c7c5 100644 --- a/source/adapters/hip/device.cpp +++ b/source/adapters/hip/device.cpp @@ -904,8 +904,10 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, case UR_DEVICE_INFO_BFLOAT16: case UR_DEVICE_INFO_IL_VERSION: case UR_DEVICE_INFO_ASYNC_BARRIER: - case UR_DEVICE_INFO_2D_BLOCK_ARRAY_CAPABILITIES_EXP: return UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION; + case UR_DEVICE_INFO_2D_BLOCK_ARRAY_CAPABILITIES_EXP: + return ReturnValue( + static_cast(0)); case UR_DEVICE_INFO_COMMAND_BUFFER_SUPPORT_EXP: { int DriverVersion = 0; UR_CHECK_ERROR(hipDriverGetVersion(&DriverVersion)); diff --git a/source/adapters/native_cpu/device.cpp b/source/adapters/native_cpu/device.cpp index 036e002d21..69c8bfc784 100644 --- a/source/adapters/native_cpu/device.cpp +++ b/source/adapters/native_cpu/device.cpp @@ -367,7 +367,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, case UR_DEVICE_INFO_MEMORY_BUS_WIDTH: return UR_RESULT_ERROR_INVALID_VALUE; case UR_DEVICE_INFO_2D_BLOCK_ARRAY_CAPABILITIES_EXP: - return UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION; + return ReturnValue( + static_cast(0)); case UR_DEVICE_INFO_ATOMIC_FENCE_ORDER_CAPABILITIES: { // Currently for Native CPU fences are implemented using OCK // builtins, so we have different capabilities than atomic operations diff --git a/source/adapters/opencl/device.cpp b/source/adapters/opencl/device.cpp index ce3ade309e..b33d637a84 100644 --- a/source/adapters/opencl/device.cpp +++ b/source/adapters/opencl/device.cpp @@ -1094,10 +1094,20 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, case UR_DEVICE_INFO_MEMORY_CLOCK_RATE: case UR_DEVICE_INFO_MEMORY_BUS_WIDTH: case UR_DEVICE_INFO_ASYNC_BARRIER: - case UR_DEVICE_INFO_2D_BLOCK_ARRAY_CAPABILITIES_EXP: { return UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION; + case UR_DEVICE_INFO_2D_BLOCK_ARRAY_CAPABILITIES_EXP: { + bool Is2DBlockIOSupported = false; + if (cl_adapter::checkDeviceExtensions( + cl_adapter::cast(hDevice), + {"cl_intel_subgroup_2d_block_io"}, + Is2DBlockIOSupported) != UR_RESULT_SUCCESS || + !Is2DBlockIOSupported) { + return ReturnValue( + static_cast(0)); + } + return ReturnValue(UR_EXP_DEVICE_2D_BLOCK_ARRAY_CAPABILITY_FLAG_LOAD | + UR_EXP_DEVICE_2D_BLOCK_ARRAY_CAPABILITY_FLAG_STORE); } - case UR_DEVICE_INFO_COMMAND_BUFFER_SUPPORT_EXP: { cl_device_id Dev = cl_adapter::cast(hDevice); size_t ExtSize = 0; From c79df596fc76d384028fa35d492e4dbba7b2e124 Mon Sep 17 00:00:00 2001 From: "Gainullin, Artur" Date: Tue, 26 Nov 2024 10:08:31 -0800 Subject: [PATCH 5/5] Remove redefinitions in L0 V2 --- source/adapters/level_zero/CMakeLists.txt | 2 +- source/adapters/level_zero/v2/command_list_cache.cpp | 9 --------- 2 files changed, 1 insertion(+), 10 deletions(-) diff --git a/source/adapters/level_zero/CMakeLists.txt b/source/adapters/level_zero/CMakeLists.txt index 8306436097..05a33c1224 100644 --- a/source/adapters/level_zero/CMakeLists.txt +++ b/source/adapters/level_zero/CMakeLists.txt @@ -58,7 +58,7 @@ if(UR_BUILD_ADAPTER_L0) # 'utils' target from 'level-zero-loader' includes path which is prefixed # in the source directory, this breaks the installation of 'utils' target. set_target_properties(utils PROPERTIES INTERFACE_INCLUDE_DIRECTORIES "") - install(TARGETS ur_umf LevelZeroLoader LevelZeroLoader-Headers ze_loader utils + install(TARGETS ur_umf LevelZeroLoader LevelZeroLoader-Headers ComputeRuntimeLevelZero-Headers ze_loader utils EXPORT ${PROJECT_NAME}-targets ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR} RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} diff --git a/source/adapters/level_zero/v2/command_list_cache.cpp b/source/adapters/level_zero/v2/command_list_cache.cpp index 9e585b80af..be4cb813fd 100644 --- a/source/adapters/level_zero/v2/command_list_cache.cpp +++ b/source/adapters/level_zero/v2/command_list_cache.cpp @@ -13,15 +13,6 @@ #include "../device.hpp" -typedef struct _zex_intel_queue_copy_operations_offload_hint_exp_desc_t { - ze_structure_type_t stype; - const void *pNext; - ze_bool_t copyOffloadEnabled; -} zex_intel_queue_copy_operations_offload_hint_exp_desc_t; - -#define ZEX_INTEL_STRUCTURE_TYPE_QUEUE_COPY_OPERATIONS_OFFLOAD_HINT_EXP_PROPERTIES \ - (ze_structure_type_t)0x0003001B - template <> ze_structure_type_t getZeStructureType() {