diff --git a/csrc/gpu/aten/core/DeviceInfo.h b/csrc/gpu/aten/core/DeviceInfo.h index a487e215b..aa13b5f50 100644 --- a/csrc/gpu/aten/core/DeviceInfo.h +++ b/csrc/gpu/aten/core/DeviceInfo.h @@ -34,6 +34,7 @@ struct DeviceInfo { uint32_t max_num_sub_groups; std::vector sub_group_sizes; bool support_fp64; + bool support_cl_bf16_conversion; }; } // namespace dpcpp diff --git a/csrc/gpu/runtime/Device.cpp b/csrc/gpu/runtime/Device.cpp index 5dd4a123e..ce89de451 100644 --- a/csrc/gpu/runtime/Device.cpp +++ b/csrc/gpu/runtime/Device.cpp @@ -322,6 +322,8 @@ static void initDeviceProperty(DeviceId device_id) { : 8; device_prop.support_atomic64 = device.has(dpcpp_dev_aspect_atomic64); device_prop.support_fp64 = device.has(dpcpp_dev_aspect_fp64); + sycl::ext::oneapi::experimental::cl_version version{20, 20, 20}; + device_prop.support_cl_bf16_conversion = device.ext_oneapi_supports_cl_extension("cl_intel_bfloat16_conversions", &version); device_properties[device_id] = device_prop; @@ -358,6 +360,7 @@ static void initDeviceProperty(DeviceId device_id) { dev_info.support_fp64 = device_prop.support_fp64; #if (defined(__INTEL_LLVM_COMPILER) && __INTEL_LLVM_COMPILER >= 20240100) dev_info.device_arch = static_cast(device_prop.device_arch); + dev_info.support_cl_bf16_conversion = device_prop.support_cl_bf16_conversion; #else dev_info.device_arch = (uint64_t)0; #endif diff --git a/csrc/gpu/runtime/DeviceProp.h b/csrc/gpu/runtime/DeviceProp.h index dbd0d07a2..f2af1843d 100644 --- a/csrc/gpu/runtime/DeviceProp.h +++ b/csrc/gpu/runtime/DeviceProp.h @@ -143,6 +143,7 @@ struct DeviceProp { bool support_fp64; bool support_atomic64; + bool support_cl_bf16_conversion; }; } // namespace dpcpp diff --git a/intel_extension_for_pytorch/csrc/xpu/Module.cpp b/intel_extension_for_pytorch/csrc/xpu/Module.cpp index 8528051c5..34bc3117e 100644 --- a/intel_extension_for_pytorch/csrc/xpu/Module.cpp +++ b/intel_extension_for_pytorch/csrc/xpu/Module.cpp @@ -577,6 +577,7 @@ static void register_xpu_device_info(PyObject* module) { .def_readonly("max_num_sub_groups", &DeviceInfo::max_num_sub_groups) .def_readonly("sub_group_sizes", &DeviceInfo::sub_group_sizes) .def_readonly("has_fp64", &DeviceInfo::support_fp64) + .def_readonly("support_cl_bf16_conversion", &DeviceInfo::support_cl_bf16_conversion) .def_readonly("device_arch", &DeviceInfo::device_arch) .def_property_readonly( "dev_type", [](const DeviceInfo& info) { return get_dev_type(info); }) @@ -589,7 +590,8 @@ static void register_xpu_device_info(PyObject* module) { << ", total_memory=" << info.global_mem_size / (1024 * 1024) << "MB, max_compute_units=" << info.max_compute_units << ", gpu_eu_count=" << info.gpu_eu_count - << ", device_arch=" << info.device_arch << ")"; + << ", device_arch=" << info.device_arch + << ", support_cl_bf16_conversion=" << info.support_cl_bf16_conversion << ")"; return stream.str(); }); }