From 41e5daf1444e994e7be7dc61589292ba3fb82d77 Mon Sep 17 00:00:00 2001 From: thimenesup Date: Tue, 27 Feb 2024 22:10:48 +0100 Subject: [PATCH] Implement Mesh Shader support for Rendering Device Vulkan and DirectX12 --- .../d3d12/rendering_device_driver_d3d12.cpp | 66 ++++++- drivers/d3d12/rendering_device_driver_d3d12.h | 12 ++ .../vulkan/rendering_device_driver_vulkan.cpp | 79 ++++++++ .../vulkan/rendering_device_driver_vulkan.h | 17 ++ editor/plugins/shader_file_editor_plugin.cpp | 4 +- modules/glslang/register_types.cpp | 4 +- servers/rendering/rendering_device.cpp | 179 ++++++++++++++++++ servers/rendering/rendering_device.h | 3 + servers/rendering/rendering_device_binds.cpp | 4 +- servers/rendering/rendering_device_binds.h | 10 +- .../rendering/rendering_device_commons.cpp | 2 + servers/rendering/rendering_device_commons.h | 11 ++ servers/rendering/rendering_device_driver.h | 7 + servers/rendering/rendering_device_graph.cpp | 35 ++++ servers/rendering/rendering_device_graph.h | 15 ++ 15 files changed, 441 insertions(+), 7 deletions(-) diff --git a/drivers/d3d12/rendering_device_driver_d3d12.cpp b/drivers/d3d12/rendering_device_driver_d3d12.cpp index b72a1932f830..e8e930b57287 100644 --- a/drivers/d3d12/rendering_device_driver_d3d12.cpp +++ b/drivers/d3d12/rendering_device_driver_d3d12.cpp @@ -3095,6 +3095,8 @@ Vector RenderingDeviceDriverD3D12::shader_compile_binary_from_spirv(Vec /* SHADER_STAGE_TESSELATION_CONTROL */ MESA_SHADER_TESS_CTRL, /* SHADER_STAGE_TESSELATION_EVALUATION */ MESA_SHADER_TESS_EVAL, /* SHADER_STAGE_COMPUTE */ MESA_SHADER_COMPUTE, + /* SHADER_STAGE_MESH_TASK */ MESA_SHADER_TASK, + /* SHADER_STAGE_MESH */ MESA_SHADER_MESH, }; nir_shader *shader = spirv_to_nir( @@ -3487,15 +3489,19 @@ Vector RenderingDeviceDriverD3D12::shader_compile_binary_from_spirv(Vec D3D12_ROOT_SIGNATURE_FLAGS root_sig_flags = D3D12_ROOT_SIGNATURE_FLAG_DENY_HULL_SHADER_ROOT_ACCESS | D3D12_ROOT_SIGNATURE_FLAG_DENY_DOMAIN_SHADER_ROOT_ACCESS | - D3D12_ROOT_SIGNATURE_FLAG_DENY_GEOMETRY_SHADER_ROOT_ACCESS | - D3D12_ROOT_SIGNATURE_FLAG_DENY_AMPLIFICATION_SHADER_ROOT_ACCESS | - D3D12_ROOT_SIGNATURE_FLAG_DENY_MESH_SHADER_ROOT_ACCESS; + D3D12_ROOT_SIGNATURE_FLAG_DENY_GEOMETRY_SHADER_ROOT_ACCESS; if (!stages_processed.has_flag(SHADER_STAGE_VERTEX_BIT)) { root_sig_flags |= D3D12_ROOT_SIGNATURE_FLAG_DENY_VERTEX_SHADER_ROOT_ACCESS; } if (!stages_processed.has_flag(SHADER_STAGE_FRAGMENT_BIT)) { root_sig_flags |= D3D12_ROOT_SIGNATURE_FLAG_DENY_PIXEL_SHADER_ROOT_ACCESS; } + if (!stages_processed.has_flag(SHADER_STAGE_MESH_TASK_BIT)) { + root_sig_flags |= D3D12_ROOT_SIGNATURE_FLAG_DENY_AMPLIFICATION_SHADER_ROOT_ACCESS; + } + if (!stages_processed.has_flag(SHADER_STAGE_MESH_BIT)) { + root_sig_flags |= D3D12_ROOT_SIGNATURE_FLAG_DENY_MESH_SHADER_ROOT_ACCESS; + } if (binary_data.vertex_input_mask) { root_sig_flags |= D3D12_ROOT_SIGNATURE_FLAG_ALLOW_INPUT_ASSEMBLER_INPUT_LAYOUT; } @@ -5416,6 +5422,29 @@ void RenderingDeviceDriverD3D12::command_render_draw_indirect_count(CommandBuffe cmd_buf_info->cmd_list->ExecuteIndirect(indirect_cmd_signatures.draw.Get(), p_max_draw_count, indirect_buf_info->resource, p_offset, count_buf_info->resource, p_count_buffer_offset); } +void RenderingDeviceDriverD3D12::command_render_dispatch_mesh(CommandBufferID p_cmd_buffer, uint32_t p_x_groups, uint32_t p_y_groups, uint32_t p_z_groups) { + CommandBufferInfo *cmd_buf_info = (CommandBufferInfo *)p_cmd_buffer.id; + ((ID3D12GraphicsCommandList6 *)cmd_buf_info->cmd_list.Get())->DispatchMesh(p_x_groups, p_y_groups, p_z_groups); +} + +void RenderingDeviceDriverD3D12::command_render_dispatch_mesh_indirect(CommandBufferID p_cmd_buffer, BufferID p_indirect_buffer, uint64_t p_offset, uint32_t p_draw_count, uint32_t p_stride) { + CommandBufferInfo *cmd_buf_info = (CommandBufferInfo *)p_cmd_buffer.id; + BufferInfo *indirect_buf_info = (BufferInfo *)p_indirect_buffer.id; + _resource_transition_batch(indirect_buf_info, 0, 1, D3D12_RESOURCE_STATE_INDIRECT_ARGUMENT); + _resource_transitions_flush(cmd_buf_info->cmd_list.Get()); + cmd_buf_info->cmd_list->ExecuteIndirect(indirect_cmd_signatures.dispatch_mesh.Get(), p_draw_count, indirect_buf_info->resource, p_offset, nullptr, 0); +} + +void RenderingDeviceDriverD3D12::command_render_dispatch_mesh_indirect_count(CommandBufferID p_cmd_buffer, BufferID p_indirect_buffer, uint64_t p_offset, BufferID p_count_buffer, uint64_t p_count_buffer_offset, uint32_t p_max_draw_count, uint32_t p_stride) { + CommandBufferInfo *cmd_buf_info = (CommandBufferInfo *)p_cmd_buffer.id; + BufferInfo *indirect_buf_info = (BufferInfo *)p_indirect_buffer.id; + BufferInfo *count_buf_info = (BufferInfo *)p_count_buffer.id; + _resource_transition_batch(indirect_buf_info, 0, 1, D3D12_RESOURCE_STATE_INDIRECT_ARGUMENT); + _resource_transition_batch(count_buf_info, 0, 1, D3D12_RESOURCE_STATE_INDIRECT_ARGUMENT); + _resource_transitions_flush(cmd_buf_info->cmd_list.Get()); + cmd_buf_info->cmd_list->ExecuteIndirect(indirect_cmd_signatures.dispatch_mesh.Get(), p_max_draw_count, indirect_buf_info->resource, p_offset, count_buf_info->resource, p_count_buffer_offset); +} + void RenderingDeviceDriverD3D12::command_render_bind_vertex_buffers(CommandBufferID p_cmd_buffer, uint32_t p_binding_count, const BufferID *p_buffers, const uint64_t *p_offsets) { CommandBufferInfo *cmd_buf_info = (CommandBufferInfo *)p_cmd_buffer.id; @@ -6163,6 +6192,18 @@ uint64_t RenderingDeviceDriverD3D12::limit_get(Limit p_limit) { return D3D12_CS_THREAD_GROUP_MAX_Y; case LIMIT_MAX_COMPUTE_WORKGROUP_SIZE_Z: return D3D12_CS_THREAD_GROUP_MAX_Z; + case LIMIT_MAX_MESH_TASK_WORKGROUP_COUNT_X: + return MeshShaderCapabilities::MAX_THREAD_GROUPS; + case LIMIT_MAX_MESH_TASK_WORKGROUP_COUNT_Y: + return MeshShaderCapabilities::MAX_THREAD_GROUPS; + case LIMIT_MAX_MESH_TASK_WORKGROUP_COUNT_Z: + return MeshShaderCapabilities::MAX_THREAD_GROUPS; + case LIMIT_MAX_MESH_WORKGROUP_COUNT_X: + return MeshShaderCapabilities::MAX_THREAD_GROUPS; + case LIMIT_MAX_MESH_WORKGROUP_COUNT_Y: + return MeshShaderCapabilities::MAX_THREAD_GROUPS; + case LIMIT_MAX_MESH_WORKGROUP_COUNT_Z: + return MeshShaderCapabilities::MAX_THREAD_GROUPS; case LIMIT_SUBGROUP_SIZE: // Note in min/max. Shader model 6.6 supports it (see https://microsoft.github.io/DirectX-Specs/d3d/HLSL_SM_6_6_WaveSize.html), // but at this time I don't know the implications on the transpilation to DXIL, etc. @@ -6221,6 +6262,8 @@ bool RenderingDeviceDriverD3D12::has_feature(Features p_feature) { return vrs_capabilities.ss_image_supported; case SUPPORTS_FRAGMENT_SHADER_WITH_ONLY_SIDE_EFFECTS: return true; + case SUPPORTS_MESH_SHADER: + return mesh_shader_capabilities.is_supported; default: return false; } @@ -6499,6 +6542,14 @@ Error RenderingDeviceDriverD3D12::_check_capabilities() { } } + D3D12_FEATURE_DATA_D3D12_OPTIONS7 options7 = {}; + res = device->CheckFeatureSupport(D3D12_FEATURE_D3D12_OPTIONS7, &options7, sizeof(options7)); + if (SUCCEEDED(res)) { + if (options7.MeshShaderTier >= D3D12_MESH_SHADER_TIER_1) { + mesh_shader_capabilities.is_supported = true; + } + } + D3D12_FEATURE_DATA_D3D12_OPTIONS12 options12 = {}; res = device->CheckFeatureSupport(D3D12_FEATURE_D3D12_OPTIONS12, &options12, sizeof(options12)); if (SUCCEEDED(res)) { @@ -6546,6 +6597,12 @@ Error RenderingDeviceDriverD3D12::_check_capabilities() { print_verbose("- Relaxed casting not supported"); } + if (mesh_shader_capabilities.is_supported) { + print_verbose("- D3D12 Mesh Shader supported"); + } else { + print_verbose("- D3D12 Mesh Shader not supported"); + } + print_verbose(String("- D3D12 16-bit ops supported: ") + (shader_capabilities.native_16bit_ops ? "yes" : "no")); if (misc_features_support.depth_bounds_supported) { @@ -6655,6 +6712,9 @@ Error RenderingDeviceDriverD3D12::_initialize_command_signatures() { err = create_command_signature(device.Get(), D3D12_INDIRECT_ARGUMENT_TYPE_DISPATCH, sizeof(D3D12_DISPATCH_ARGUMENTS), &indirect_cmd_signatures.dispatch); ERR_FAIL_COND_V(err != OK, ERR_CANT_CREATE); + err = create_command_signature(device.Get(), D3D12_INDIRECT_ARGUMENT_TYPE_DISPATCH_MESH, sizeof(D3D12_DISPATCH_MESH_ARGUMENTS), &indirect_cmd_signatures.dispatch_mesh); + ERR_FAIL_COND_V(err != OK, ERR_CANT_CREATE); + return OK; } diff --git a/drivers/d3d12/rendering_device_driver_d3d12.h b/drivers/d3d12/rendering_device_driver_d3d12.h index b449a9087665..ff4140e58469 100644 --- a/drivers/d3d12/rendering_device_driver_d3d12.h +++ b/drivers/d3d12/rendering_device_driver_d3d12.h @@ -143,6 +143,11 @@ class RenderingDeviceDriverD3D12 : public RenderingDeviceDriver { bool enhanced_barriers_supported = false; }; + struct MeshShaderCapabilities { + static const uint32_t MAX_THREAD_GROUPS = 63999; // Quoting the DirectX Mesh Shader Spec: "Each of the three thread group counts must be less than 64k" so ok... + bool is_supported = false; + }; + struct MiscFeaturesSupport { bool depth_bounds_supported = false; }; @@ -162,6 +167,7 @@ class RenderingDeviceDriverD3D12 : public RenderingDeviceDriver { StorageBufferCapabilities storage_buffer_capabilities; FormatCapabilities format_capabilities; BarrierCapabilities barrier_capabilities; + MeshShaderCapabilities mesh_shader_capabilities; MiscFeaturesSupport misc_features_support; String pipeline_cache_id; @@ -201,6 +207,7 @@ class RenderingDeviceDriverD3D12 : public RenderingDeviceDriver { ComPtr draw; ComPtr draw_indexed; ComPtr dispatch; + ComPtr dispatch_mesh; } indirect_cmd_signatures; static void STDMETHODCALLTYPE _debug_message_func(D3D12_MESSAGE_CATEGORY p_category, D3D12_MESSAGE_SEVERITY p_severity, D3D12_MESSAGE_ID p_id, LPCSTR p_description, void *p_context); @@ -855,6 +862,11 @@ class RenderingDeviceDriverD3D12 : public RenderingDeviceDriver { virtual void command_render_draw_indirect(CommandBufferID p_cmd_buffer, BufferID p_indirect_buffer, uint64_t p_offset, uint32_t p_draw_count, uint32_t p_stride) override final; virtual void command_render_draw_indirect_count(CommandBufferID p_cmd_buffer, BufferID p_indirect_buffer, uint64_t p_offset, BufferID p_count_buffer, uint64_t p_count_buffer_offset, uint32_t p_max_draw_count, uint32_t p_stride) override final; + // Mesh Shader Drawing. + virtual void command_render_dispatch_mesh(CommandBufferID p_cmd_buffer, uint32_t p_x_groups, uint32_t p_y_groups, uint32_t p_z_groups) override final; + virtual void command_render_dispatch_mesh_indirect(CommandBufferID p_cmd_buffer, BufferID p_indirect_buffer, uint64_t p_offset, uint32_t p_draw_count, uint32_t p_stride) override final; + virtual void command_render_dispatch_mesh_indirect_count(CommandBufferID p_cmd_buffer, BufferID p_indirect_buffer, uint64_t p_offset, BufferID p_count_buffer, uint64_t p_count_buffer_offset, uint32_t p_max_draw_count, uint32_t p_stride) override final; + // Buffer binding. virtual void command_render_bind_vertex_buffers(CommandBufferID p_cmd_buffer, uint32_t p_binding_count, const BufferID *p_buffers, const uint64_t *p_offsets) override final; virtual void command_render_bind_index_buffer(CommandBufferID p_cmd_buffer, BufferID p_buffer, IndexBufferFormat p_format, uint64_t p_offset) override final; diff --git a/drivers/vulkan/rendering_device_driver_vulkan.cpp b/drivers/vulkan/rendering_device_driver_vulkan.cpp index b6e5ed02878b..27ea8d5d1bea 100644 --- a/drivers/vulkan/rendering_device_driver_vulkan.cpp +++ b/drivers/vulkan/rendering_device_driver_vulkan.cpp @@ -513,6 +513,7 @@ Error RenderingDeviceDriverVulkan::_initialize_device_extensions() { _register_requested_device_extension(VK_EXT_PIPELINE_CREATION_CACHE_CONTROL_EXTENSION_NAME, false); _register_requested_device_extension(VK_EXT_SUBGROUP_SIZE_CONTROL_EXTENSION_NAME, false); _register_requested_device_extension(VK_EXT_ASTC_DECODE_MODE_EXTENSION_NAME, false); + _register_requested_device_extension(VK_EXT_MESH_SHADER_EXTENSION_NAME, false); if (Engine::get_singleton()->is_generate_spirv_debug_info_enabled()) { _register_requested_device_extension(VK_KHR_SHADER_NON_SEMANTIC_INFO_EXTENSION_NAME, true); @@ -734,6 +735,7 @@ Error RenderingDeviceDriverVulkan::_check_device_capabilities() { VkPhysicalDevice16BitStorageFeaturesKHR storage_feature = {}; VkPhysicalDeviceMultiviewFeatures multiview_features = {}; VkPhysicalDevicePipelineCreationCacheControlFeatures pipeline_cache_control_features = {}; + VkPhysicalDeviceMeshShaderFeaturesEXT mesh_shader_features = {}; const bool use_1_2_features = physical_device_properties.apiVersion >= VK_API_VERSION_1_2; if (use_1_2_features) { @@ -770,6 +772,12 @@ Error RenderingDeviceDriverVulkan::_check_device_capabilities() { next_features = &pipeline_cache_control_features; } + if (enabled_device_extension_names.has(VK_EXT_MESH_SHADER_EXTENSION_NAME)) { + mesh_shader_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MESH_SHADER_FEATURES_EXT; + mesh_shader_features.pNext = next_features; + next_features = &mesh_shader_features; + } + VkPhysicalDeviceFeatures2 device_features_2 = {}; device_features_2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2; device_features_2.pNext = next_features; @@ -821,6 +829,14 @@ Error RenderingDeviceDriverVulkan::_check_device_capabilities() { device_memory_report_support = true; } #endif + + if (enabled_device_extension_names.has(VK_EXT_MESH_SHADER_EXTENSION_NAME)) { + mesh_shader_capabilities.task_shader_is_supported = mesh_shader_features.taskShader; + mesh_shader_capabilities.mesh_shader_is_supported = mesh_shader_features.meshShader; + mesh_shader_capabilities.multiview_mesh_shader_is_supported = mesh_shader_features.multiviewMeshShader; + mesh_shader_capabilities.primitive_fragment_shading_rate_mesh_shader_is_supported = mesh_shader_features.primitiveFragmentShadingRateMeshShader; + mesh_shader_capabilities.mesh_shader_queries_is_supported = mesh_shader_features.meshShaderQueries; + } } if (functions.GetPhysicalDeviceProperties2 != nullptr) { @@ -830,6 +846,7 @@ Error RenderingDeviceDriverVulkan::_check_device_capabilities() { VkPhysicalDeviceSubgroupProperties subgroup_properties = {}; VkPhysicalDeviceSubgroupSizeControlProperties subgroup_size_control_properties = {}; VkPhysicalDeviceProperties2 physical_device_properties_2 = {}; + VkPhysicalDeviceMeshShaderPropertiesEXT mesh_shader_properties = {}; const bool use_1_1_properties = physical_device_properties.apiVersion >= VK_API_VERSION_1_1; if (use_1_1_properties) { @@ -857,6 +874,12 @@ Error RenderingDeviceDriverVulkan::_check_device_capabilities() { next_properties = &vrs_properties; } + if (mesh_shader_capabilities.task_shader_is_supported || mesh_shader_capabilities.mesh_shader_is_supported) { + mesh_shader_properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MESH_SHADER_PROPERTIES_EXT; + mesh_shader_properties.pNext = next_properties; + next_properties = &mesh_shader_properties; + } + physical_device_properties_2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2; physical_device_properties_2.pNext = next_properties; functions.GetPhysicalDeviceProperties2(physical_device, &physical_device_properties_2); @@ -915,6 +938,19 @@ Error RenderingDeviceDriverVulkan::_check_device_capabilities() { print_verbose("- Vulkan multiview not supported"); } + if (mesh_shader_capabilities.task_shader_is_supported || mesh_shader_capabilities.mesh_shader_is_supported) { + mesh_shader_capabilities.max_task_work_group_count[0] = mesh_shader_properties.maxTaskWorkGroupCount[0]; + mesh_shader_capabilities.max_task_work_group_count[1] = mesh_shader_properties.maxTaskWorkGroupCount[1]; + mesh_shader_capabilities.max_task_work_group_count[2] = mesh_shader_properties.maxTaskWorkGroupCount[2]; + mesh_shader_capabilities.max_mesh_work_group_count[0] = mesh_shader_properties.maxMeshWorkGroupCount[0]; + mesh_shader_capabilities.max_mesh_work_group_count[1] = mesh_shader_properties.maxMeshWorkGroupCount[1]; + mesh_shader_capabilities.max_mesh_work_group_count[2] = mesh_shader_properties.maxMeshWorkGroupCount[2]; + + print_verbose("- Vulkan Mesh Shader supported:"); + } else { + print_verbose("- Vulkan Mesh Shader not supported"); + } + print_verbose("- Vulkan subgroup:"); print_verbose(" size: " + itos(subgroup_capabilities.size)); print_verbose(" min size: " + itos(subgroup_capabilities.min_size)); @@ -1009,6 +1045,18 @@ Error RenderingDeviceDriverVulkan::_initialize_device(const LocalVectorvk_buffer, p_offset, count_buf_info->vk_buffer, p_count_buffer_offset, p_max_draw_count, p_stride); } +void RenderingDeviceDriverVulkan::command_render_dispatch_mesh(CommandBufferID p_cmd_buffer, uint32_t p_x_groups, uint32_t p_y_groups, uint32_t p_z_groups) { + vkCmdDrawMeshTasksEXT((VkCommandBuffer)p_cmd_buffer.id, p_x_groups, p_y_groups, p_z_groups); +} + +void RenderingDeviceDriverVulkan::command_render_dispatch_mesh_indirect(CommandBufferID p_cmd_buffer, BufferID p_indirect_buffer, uint64_t p_offset, uint32_t p_draw_count, uint32_t p_stride) { + const BufferInfo *buf_info = (const BufferInfo *)p_indirect_buffer.id; + vkCmdDrawMeshTasksIndirectEXT((VkCommandBuffer)p_cmd_buffer.id, buf_info->vk_buffer, p_offset, p_draw_count, p_stride); +} + +void RenderingDeviceDriverVulkan::command_render_dispatch_mesh_indirect_count(CommandBufferID p_cmd_buffer, BufferID p_indirect_buffer, uint64_t p_offset, BufferID p_count_buffer, uint64_t p_count_buffer_offset, uint32_t p_max_draw_count, uint32_t p_stride) { + const BufferInfo *indirect_buf_info = (const BufferInfo *)p_indirect_buffer.id; + const BufferInfo *count_buf_info = (const BufferInfo *)p_count_buffer.id; + vkCmdDrawMeshTasksIndirectCountEXT((VkCommandBuffer)p_cmd_buffer.id, indirect_buf_info->vk_buffer, p_offset, count_buf_info->vk_buffer, p_count_buffer_offset, p_max_draw_count, p_stride); +} + void RenderingDeviceDriverVulkan::command_render_bind_vertex_buffers(CommandBufferID p_cmd_buffer, uint32_t p_binding_count, const BufferID *p_buffers, const uint64_t *p_offsets) { VkBuffer *vk_buffers = ALLOCA_ARRAY(VkBuffer, p_binding_count); for (uint32_t i = 0; i < p_binding_count; i++) { @@ -5632,6 +5697,18 @@ uint64_t RenderingDeviceDriverVulkan::limit_get(Limit p_limit) { return limits.maxViewportDimensions[0]; case LIMIT_MAX_VIEWPORT_DIMENSIONS_Y: return limits.maxViewportDimensions[1]; + case LIMIT_MAX_MESH_TASK_WORKGROUP_COUNT_X: + return mesh_shader_capabilities.max_task_work_group_count[0]; + case LIMIT_MAX_MESH_TASK_WORKGROUP_COUNT_Y: + return mesh_shader_capabilities.max_task_work_group_count[1]; + case LIMIT_MAX_MESH_TASK_WORKGROUP_COUNT_Z: + return mesh_shader_capabilities.max_task_work_group_count[2]; + case LIMIT_MAX_MESH_WORKGROUP_COUNT_X: + return mesh_shader_capabilities.max_mesh_work_group_count[0]; + case LIMIT_MAX_MESH_WORKGROUP_COUNT_Y: + return mesh_shader_capabilities.max_mesh_work_group_count[1]; + case LIMIT_MAX_MESH_WORKGROUP_COUNT_Z: + return mesh_shader_capabilities.max_mesh_work_group_count[2]; case LIMIT_SUBGROUP_SIZE: return subgroup_capabilities.size; case LIMIT_SUBGROUP_MIN_SIZE: @@ -5676,6 +5753,8 @@ bool RenderingDeviceDriverVulkan::has_feature(Features p_feature) { return vrs_capabilities.attachment_vrs_supported && physical_device_features.shaderStorageImageExtendedFormats; case SUPPORTS_FRAGMENT_SHADER_WITH_ONLY_SIDE_EFFECTS: return true; + case SUPPORTS_MESH_SHADER: + return mesh_shader_capabilities.task_shader_is_supported && mesh_shader_capabilities.mesh_shader_is_supported; default: return false; } diff --git a/drivers/vulkan/rendering_device_driver_vulkan.h b/drivers/vulkan/rendering_device_driver_vulkan.h index 06cd2a31be6e..d0332519ec6a 100644 --- a/drivers/vulkan/rendering_device_driver_vulkan.h +++ b/drivers/vulkan/rendering_device_driver_vulkan.h @@ -100,6 +100,17 @@ class RenderingDeviceDriverVulkan : public RenderingDeviceDriver { bool storage_input_output_16 = false; }; + struct MeshShaderCapabilities { + bool task_shader_is_supported = false; + bool mesh_shader_is_supported = false; + bool multiview_mesh_shader_is_supported = false; + bool primitive_fragment_shading_rate_mesh_shader_is_supported = false; + bool mesh_shader_queries_is_supported = false; + + uint32_t max_task_work_group_count[3] = { 0, 0, 0 }; + uint32_t max_mesh_work_group_count[3] = { 0, 0, 0 }; + }; + struct DeviceFunctions { PFN_vkCreateSwapchainKHR CreateSwapchainKHR = nullptr; PFN_vkDestroySwapchainKHR DestroySwapchainKHR = nullptr; @@ -138,6 +149,7 @@ class RenderingDeviceDriverVulkan : public RenderingDeviceDriver { VRSCapabilities vrs_capabilities; ShaderCapabilities shader_capabilities; StorageBufferCapabilities storage_buffer_capabilities; + MeshShaderCapabilities mesh_shader_capabilities; bool pipeline_cache_control_support = false; bool device_fault_support = false; #if defined(VK_TRACK_DEVICE_MEMORY) @@ -584,6 +596,11 @@ class RenderingDeviceDriverVulkan : public RenderingDeviceDriver { virtual void command_render_draw_indirect(CommandBufferID p_cmd_buffer, BufferID p_indirect_buffer, uint64_t p_offset, uint32_t p_draw_count, uint32_t p_stride) override final; virtual void command_render_draw_indirect_count(CommandBufferID p_cmd_buffer, BufferID p_indirect_buffer, uint64_t p_offset, BufferID p_count_buffer, uint64_t p_count_buffer_offset, uint32_t p_max_draw_count, uint32_t p_stride) override final; + // Mesh Shader Drawing. + virtual void command_render_dispatch_mesh(CommandBufferID p_cmd_buffer, uint32_t p_x_groups, uint32_t p_y_groups, uint32_t p_z_groups) override final; + virtual void command_render_dispatch_mesh_indirect(CommandBufferID p_cmd_buffer, BufferID p_indirect_buffer, uint64_t p_offset, uint32_t p_draw_count, uint32_t p_stride) override final; + virtual void command_render_dispatch_mesh_indirect_count(CommandBufferID p_cmd_buffer, BufferID p_indirect_buffer, uint64_t p_offset, BufferID p_count_buffer, uint64_t p_count_buffer_offset, uint32_t p_max_draw_count, uint32_t p_stride) override final; + // Buffer binding. virtual void command_render_bind_vertex_buffers(CommandBufferID p_cmd_buffer, uint32_t p_binding_count, const BufferID *p_buffers, const uint64_t *p_offsets) override final; virtual void command_render_bind_index_buffer(CommandBufferID p_cmd_buffer, BufferID p_buffer, IndexBufferFormat p_format, uint64_t p_offset) override final; diff --git a/editor/plugins/shader_file_editor_plugin.cpp b/editor/plugins/shader_file_editor_plugin.cpp index 25a02d60ef87..6f8b8ca43671 100644 --- a/editor/plugins/shader_file_editor_plugin.cpp +++ b/editor/plugins/shader_file_editor_plugin.cpp @@ -269,7 +269,9 @@ ShaderFileEditor::ShaderFileEditor() { "Fragment", "TessControl", "TessEval", - "Compute" + "Compute", + "MeshTask", + "Mesh" }; stage_hb = memnew(HBoxContainer); diff --git a/modules/glslang/register_types.cpp b/modules/glslang/register_types.cpp index 81505f716a2d..44ffd1c32bb4 100644 --- a/modules/glslang/register_types.cpp +++ b/modules/glslang/register_types.cpp @@ -48,7 +48,9 @@ static Vector _compile_shader_glsl(RenderingDevice::ShaderStage p_stage EShLangFragment, EShLangTessControl, EShLangTessEvaluation, - EShLangCompute + EShLangCompute, + EShLangTask, + EShLangMesh }; int ClientInputSemanticsVersion = 100; // maps to, say, #define VULKAN 100 diff --git a/servers/rendering/rendering_device.cpp b/servers/rendering/rendering_device.cpp index 0761af92609c..8f8228f4092c 100644 --- a/servers/rendering/rendering_device.cpp +++ b/servers/rendering/rendering_device.cpp @@ -2989,6 +2989,12 @@ RID RenderingDevice::shader_create_from_bytecode(const Vector &p_shader case SHADER_STAGE_COMPUTE: shader->stage_bits.set_flag(RDD::PIPELINE_STAGE_COMPUTE_SHADER_BIT); break; + case SHADER_STAGE_MESH_TASK: + shader->stage_bits.set_flag(RDD::PIPELINE_STAGE_MESH_TASK_SHADER_BIT); + break; + case SHADER_STAGE_MESH: + shader->stage_bits.set_flag(RDD::PIPELINE_STAGE_MESH_SHADER_BIT); + break; default: DEV_ASSERT(false && "Unknown shader stage."); break; @@ -4516,6 +4522,164 @@ void RenderingDevice::draw_list_draw_indirect(DrawListID p_list, bool p_use_indi _check_transfer_worker_buffer(buffer); } +void RenderingDevice::draw_list_dispatch_mesh(DrawListID p_list, uint32_t p_x_groups, uint32_t p_y_groups, uint32_t p_z_groups) { +#ifdef DEBUG_ENABLED + ERR_FAIL_COND_MSG(!has_feature(SUPPORTS_MESH_SHADER), + "The GPU doesn't support Mesh Shaders, its your responsibility to check it does before calling this."); +#endif + DrawList *dl = _get_draw_list_ptr(p_list); + ERR_FAIL_NULL(dl); + +#ifdef DEBUG_ENABLED + ERR_FAIL_COND_MSG(p_x_groups == 0, "Dispatch amount of X mesh/task groups (" + itos(p_x_groups) + ") is zero."); + ERR_FAIL_COND_MSG(p_y_groups == 0, "Dispatch amount of Y mesh/task groups (" + itos(p_y_groups) + ") is zero."); + ERR_FAIL_COND_MSG(p_z_groups == 0, "Dispatch amount of Z mesh/task groups (" + itos(p_z_groups) + ") is zero."); + + const Shader *shader = shader_owner.get_or_null(dl->state.pipeline_shader); + ERR_FAIL_NULL(shader); + if (shader->stage_bits.has_flag(RDD::PIPELINE_STAGE_MESH_TASK_SHADER_BIT)) { + ERR_FAIL_COND_MSG(p_x_groups > driver->limit_get(LIMIT_MAX_MESH_TASK_WORKGROUP_COUNT_X), + "Dispatch amount of X task groups (" + itos(p_x_groups) + ") is larger than device limit (" + itos(driver->limit_get(LIMIT_MAX_MESH_TASK_WORKGROUP_COUNT_X)) + ")"); + ERR_FAIL_COND_MSG(p_y_groups > driver->limit_get(LIMIT_MAX_MESH_TASK_WORKGROUP_COUNT_Y), + "Dispatch amount of Y task groups (" + itos(p_y_groups) + ") is larger than device limit (" + itos(driver->limit_get(LIMIT_MAX_MESH_TASK_WORKGROUP_COUNT_Y)) + ")"); + ERR_FAIL_COND_MSG(p_z_groups > driver->limit_get(LIMIT_MAX_MESH_TASK_WORKGROUP_COUNT_Z), + "Dispatch amount of Z task groups (" + itos(p_z_groups) + ") is larger than device limit (" + itos(driver->limit_get(LIMIT_MAX_MESH_TASK_WORKGROUP_COUNT_Z)) + ")"); + } else if (shader->stage_bits.has_flag(RDD::PIPELINE_STAGE_MESH_SHADER_BIT)) { + ERR_FAIL_COND_MSG(p_x_groups > driver->limit_get(LIMIT_MAX_MESH_WORKGROUP_COUNT_X), + "Dispatch amount of X mesh groups (" + itos(p_x_groups) + ") is larger than device limit (" + itos(driver->limit_get(LIMIT_MAX_MESH_WORKGROUP_COUNT_X)) + ")"); + ERR_FAIL_COND_MSG(p_y_groups > driver->limit_get(LIMIT_MAX_MESH_WORKGROUP_COUNT_Y), + "Dispatch amount of Y mesh groups (" + itos(p_y_groups) + ") is larger than device limit (" + itos(driver->limit_get(LIMIT_MAX_MESH_WORKGROUP_COUNT_Y)) + ")"); + ERR_FAIL_COND_MSG(p_z_groups > driver->limit_get(LIMIT_MAX_MESH_WORKGROUP_COUNT_Z), + "Dispatch amount of Z mesh groups (" + itos(p_z_groups) + ") is larger than device limit (" + itos(driver->limit_get(LIMIT_MAX_MESH_WORKGROUP_COUNT_Z)) + ")"); + } else { + ERR_FAIL_MSG("Unexpected pipeline stage."); + } + + ERR_FAIL_COND_MSG(!dl->validation.active, "Submitted Draw Lists can no longer be modified."); +#endif + +#ifdef DEBUG_ENABLED + ERR_FAIL_COND_MSG(!dl->validation.pipeline_active, + "No render pipeline was set before attempting to draw."); + + if (dl->validation.pipeline_push_constant_size > 0) { + // Using push constants, check that they were supplied. + ERR_FAIL_COND_MSG(!dl->validation.pipeline_push_constant_supplied, + "The shader in this pipeline requires a push constant to be set before drawing, but it's not present."); + } + +#endif + + // Bind descriptor sets. + + for (uint32_t i = 0; i < dl->state.set_count; i++) { + if (dl->state.sets[i].pipeline_expected_format == 0) { + continue; // Nothing expected by this pipeline. + } +#ifdef DEBUG_ENABLED + if (dl->state.sets[i].pipeline_expected_format != dl->state.sets[i].uniform_set_format) { + if (dl->state.sets[i].uniform_set_format == 0) { + ERR_FAIL_MSG("Uniforms were never supplied for set (" + itos(i) + ") at the time of drawing, which are required by the pipeline"); + } else if (uniform_set_owner.owns(dl->state.sets[i].uniform_set)) { + UniformSet *us = uniform_set_owner.get_or_null(dl->state.sets[i].uniform_set); + ERR_FAIL_MSG("Uniforms supplied for set (" + itos(i) + "):\n" + _shader_uniform_debug(us->shader_id, us->shader_set) + "\nare not the same format as required by the pipeline shader. Pipeline shader requires the following bindings:\n" + _shader_uniform_debug(dl->state.pipeline_shader)); + } else { + ERR_FAIL_MSG("Uniforms supplied for set (" + itos(i) + ", which was was just freed) are not the same format as required by the pipeline shader. Pipeline shader requires the following bindings:\n" + _shader_uniform_debug(dl->state.pipeline_shader)); + } + } +#endif + draw_graph.add_draw_list_uniform_set_prepare_for_use(dl->state.pipeline_shader_driver_id, dl->state.sets[i].uniform_set_driver_id, i); + } + for (uint32_t i = 0; i < dl->state.set_count; i++) { + if (dl->state.sets[i].pipeline_expected_format == 0) { + continue; // Nothing expected by this pipeline. + } + if (!dl->state.sets[i].bound) { + // All good, see if this requires re-binding. + draw_graph.add_draw_list_bind_uniform_set(dl->state.pipeline_shader_driver_id, dl->state.sets[i].uniform_set_driver_id, i); + + UniformSet *uniform_set = uniform_set_owner.get_or_null(dl->state.sets[i].uniform_set); + draw_graph.add_draw_list_usages(uniform_set->draw_trackers, uniform_set->draw_trackers_usage); + + dl->state.sets[i].bound = true; + } + } + + draw_graph.add_draw_list_dispatch_mesh(p_x_groups, p_y_groups, p_z_groups); +} + +void RenderingDevice::draw_list_dispatch_mesh_indirect(DrawListID p_list, RID p_buffer, uint32_t p_offset) { +#ifdef DEBUG_ENABLED + ERR_FAIL_COND_MSG(!has_feature(SUPPORTS_MESH_SHADER), + "The GPU doesn't support Mesh Shaders, its your responsibility to check it does before calling this."); +#endif + DrawList *dl = _get_draw_list_ptr(p_list); + ERR_FAIL_NULL(dl); +#ifdef DEBUG_ENABLED + ERR_FAIL_COND_MSG(!dl->validation.active, "Submitted Draw Lists can no longer be modified."); +#endif + + Buffer *buffer = storage_buffer_owner.get_or_null(p_buffer); + ERR_FAIL_NULL(buffer); + + ERR_FAIL_COND_MSG(!buffer->usage.has_flag(RDD::BUFFER_USAGE_INDIRECT_BIT), "Buffer provided was not created to do indirect dispatch."); + + ERR_FAIL_COND_MSG(p_offset + 12 > buffer->size, "Offset provided (+12) is past the end of buffer."); + +#ifdef DEBUG_ENABLED + ERR_FAIL_COND_MSG(!dl->validation.pipeline_active, + "No render pipeline was set before attempting to draw."); + + if (dl->validation.pipeline_push_constant_size > 0) { + // Using push constants, check that they were supplied. + ERR_FAIL_COND_MSG(!dl->validation.pipeline_push_constant_supplied, + "The shader in this pipeline requires a push constant to be set before drawing, but it's not present."); + } + +#endif + + // Bind descriptor sets. + + for (uint32_t i = 0; i < dl->state.set_count; i++) { + if (dl->state.sets[i].pipeline_expected_format == 0) { + continue; // Nothing expected by this pipeline. + } +#ifdef DEBUG_ENABLED + if (dl->state.sets[i].pipeline_expected_format != dl->state.sets[i].uniform_set_format) { + if (dl->state.sets[i].uniform_set_format == 0) { + ERR_FAIL_MSG("Uniforms were never supplied for set (" + itos(i) + ") at the time of drawing, which are required by the pipeline"); + } else if (uniform_set_owner.owns(dl->state.sets[i].uniform_set)) { + UniformSet *us = uniform_set_owner.get_or_null(dl->state.sets[i].uniform_set); + ERR_FAIL_MSG("Uniforms supplied for set (" + itos(i) + "):\n" + _shader_uniform_debug(us->shader_id, us->shader_set) + "\nare not the same format as required by the pipeline shader. Pipeline shader requires the following bindings:\n" + _shader_uniform_debug(dl->state.pipeline_shader)); + } else { + ERR_FAIL_MSG("Uniforms supplied for set (" + itos(i) + ", which was was just freed) are not the same format as required by the pipeline shader. Pipeline shader requires the following bindings:\n" + _shader_uniform_debug(dl->state.pipeline_shader)); + } + } +#endif + draw_graph.add_draw_list_uniform_set_prepare_for_use(dl->state.pipeline_shader_driver_id, dl->state.sets[i].uniform_set_driver_id, i); + } + for (uint32_t i = 0; i < dl->state.set_count; i++) { + if (dl->state.sets[i].pipeline_expected_format == 0) { + continue; // Nothing expected by this pipeline. + } + if (!dl->state.sets[i].bound) { + // All good, see if this requires re-binding. + draw_graph.add_draw_list_bind_uniform_set(dl->state.pipeline_shader_driver_id, dl->state.sets[i].uniform_set_driver_id, i); + + UniformSet *uniform_set = uniform_set_owner.get_or_null(dl->state.sets[i].uniform_set); + draw_graph.add_draw_list_usages(uniform_set->draw_trackers, uniform_set->draw_trackers_usage); + + dl->state.sets[i].bound = true; + } + } + + draw_graph.add_draw_list_dispatch_mesh_indirect(buffer->driver_id, p_offset); + + if (buffer->draw_tracker != nullptr) { + draw_graph.add_draw_list_usage(buffer->draw_tracker, RDG::RESOURCE_USAGE_INDIRECT_BUFFER_READ); + } +} + void RenderingDevice::draw_list_enable_scissor(DrawListID p_list, const Rect2 &p_rect) { ERR_RENDER_THREAD_GUARD(); @@ -6736,6 +6900,8 @@ void RenderingDevice::_bind_methods() { ClassDB::bind_method(D_METHOD("draw_list_draw", "draw_list", "use_indices", "instances", "procedural_vertex_count"), &RenderingDevice::draw_list_draw, DEFVAL(0)); ClassDB::bind_method(D_METHOD("draw_list_draw_indirect", "draw_list", "use_indices", "buffer", "offset", "draw_count", "stride"), &RenderingDevice::draw_list_draw_indirect, DEFVAL(0), DEFVAL(1), DEFVAL(0)); + ClassDB::bind_method(D_METHOD("draw_list_dispatch_mesh", "draw_list", "x_groups", "y_groups", "z_groups"), &RenderingDevice::draw_list_dispatch_mesh); + ClassDB::bind_method(D_METHOD("draw_list_dispatch_mesh_indirect", "draw_list", "buffer", "offset"), &RenderingDevice::draw_list_dispatch_mesh_indirect); ClassDB::bind_method(D_METHOD("draw_list_enable_scissor", "draw_list", "rect"), &RenderingDevice::draw_list_enable_scissor, DEFVAL(Rect2())); ClassDB::bind_method(D_METHOD("draw_list_disable_scissor", "draw_list"), &RenderingDevice::draw_list_disable_scissor); @@ -6765,6 +6931,7 @@ void RenderingDevice::_bind_methods() { ClassDB::bind_method(D_METHOD("get_captured_timestamp_cpu_time", "index"), &RenderingDevice::get_captured_timestamp_cpu_time); ClassDB::bind_method(D_METHOD("get_captured_timestamp_name", "index"), &RenderingDevice::get_captured_timestamp_name); + ClassDB::bind_method(D_METHOD("has_feature", "feature"), &RenderingDevice::has_feature); ClassDB::bind_method(D_METHOD("limit_get", "limit"), &RenderingDevice::limit_get); ClassDB::bind_method(D_METHOD("get_frame_delay"), &RenderingDevice::get_frame_delay); ClassDB::bind_method(D_METHOD("submit"), &RenderingDevice::submit); @@ -7269,12 +7436,16 @@ void RenderingDevice::_bind_methods() { BIND_ENUM_CONSTANT(SHADER_STAGE_TESSELATION_CONTROL); BIND_ENUM_CONSTANT(SHADER_STAGE_TESSELATION_EVALUATION); BIND_ENUM_CONSTANT(SHADER_STAGE_COMPUTE); + BIND_ENUM_CONSTANT(SHADER_STAGE_MESH_TASK); + BIND_ENUM_CONSTANT(SHADER_STAGE_MESH); BIND_ENUM_CONSTANT(SHADER_STAGE_MAX); BIND_ENUM_CONSTANT(SHADER_STAGE_VERTEX_BIT); BIND_ENUM_CONSTANT(SHADER_STAGE_FRAGMENT_BIT); BIND_ENUM_CONSTANT(SHADER_STAGE_TESSELATION_CONTROL_BIT); BIND_ENUM_CONSTANT(SHADER_STAGE_TESSELATION_EVALUATION_BIT); BIND_ENUM_CONSTANT(SHADER_STAGE_COMPUTE_BIT); + BIND_ENUM_CONSTANT(SHADER_STAGE_MESH_TASK_BIT); + BIND_ENUM_CONSTANT(SHADER_STAGE_MESH_BIT); BIND_ENUM_CONSTANT(SHADER_LANGUAGE_GLSL); BIND_ENUM_CONSTANT(SHADER_LANGUAGE_HLSL); @@ -7283,6 +7454,8 @@ void RenderingDevice::_bind_methods() { BIND_ENUM_CONSTANT(PIPELINE_SPECIALIZATION_CONSTANT_TYPE_INT); BIND_ENUM_CONSTANT(PIPELINE_SPECIALIZATION_CONSTANT_TYPE_FLOAT); + BIND_ENUM_CONSTANT(SUPPORTS_MESH_SHADER); + BIND_ENUM_CONSTANT(LIMIT_MAX_BOUND_UNIFORM_SETS); BIND_ENUM_CONSTANT(LIMIT_MAX_FRAMEBUFFER_COLOR_ATTACHMENTS); BIND_ENUM_CONSTANT(LIMIT_MAX_TEXTURES_PER_UNIFORM_SET); @@ -7318,6 +7491,12 @@ void RenderingDevice::_bind_methods() { BIND_ENUM_CONSTANT(LIMIT_MAX_COMPUTE_WORKGROUP_SIZE_X); BIND_ENUM_CONSTANT(LIMIT_MAX_COMPUTE_WORKGROUP_SIZE_Y); BIND_ENUM_CONSTANT(LIMIT_MAX_COMPUTE_WORKGROUP_SIZE_Z); + BIND_ENUM_CONSTANT(LIMIT_MAX_MESH_TASK_WORKGROUP_COUNT_X); + BIND_ENUM_CONSTANT(LIMIT_MAX_MESH_TASK_WORKGROUP_COUNT_Y); + BIND_ENUM_CONSTANT(LIMIT_MAX_MESH_TASK_WORKGROUP_COUNT_Z); + BIND_ENUM_CONSTANT(LIMIT_MAX_MESH_WORKGROUP_COUNT_X); + BIND_ENUM_CONSTANT(LIMIT_MAX_MESH_WORKGROUP_COUNT_Y); + BIND_ENUM_CONSTANT(LIMIT_MAX_MESH_WORKGROUP_COUNT_Z); BIND_ENUM_CONSTANT(LIMIT_MAX_VIEWPORT_DIMENSIONS_X); BIND_ENUM_CONSTANT(LIMIT_MAX_VIEWPORT_DIMENSIONS_Y); diff --git a/servers/rendering/rendering_device.h b/servers/rendering/rendering_device.h index 4e54e4ca1a8e..2575984c4bba 100644 --- a/servers/rendering/rendering_device.h +++ b/servers/rendering/rendering_device.h @@ -1180,6 +1180,9 @@ class RenderingDevice : public RenderingDeviceCommons { void draw_list_draw(DrawListID p_list, bool p_use_indices, uint32_t p_instances = 1, uint32_t p_procedural_vertices = 0); void draw_list_draw_indirect(DrawListID p_list, bool p_use_indices, RID p_buffer, uint32_t p_offset = 0, uint32_t p_draw_count = 1, uint32_t p_stride = 0); + void draw_list_dispatch_mesh(DrawListID p_list, uint32_t p_x_groups, uint32_t p_y_groups, uint32_t p_z_groups); + void draw_list_dispatch_mesh_indirect(DrawListID p_list, RID p_buffer, uint32_t p_offset); + void draw_list_enable_scissor(DrawListID p_list, const Rect2 &p_rect); void draw_list_disable_scissor(DrawListID p_list); diff --git a/servers/rendering/rendering_device_binds.cpp b/servers/rendering/rendering_device_binds.cpp index e41a56b0a32e..5b2827d6b7ea 100644 --- a/servers/rendering/rendering_device_binds.cpp +++ b/servers/rendering/rendering_device_binds.cpp @@ -39,7 +39,7 @@ Error RDShaderFile::parse_versions_from_text(const String &p_text, const String Vector lines = p_text.split("\n"); bool reading_versions = false; - bool stage_found[RD::SHADER_STAGE_MAX] = { false, false, false, false, false }; + bool stage_found[RD::SHADER_STAGE_MAX] = { false, false, false, false, false, false, false }; RD::ShaderStage stage = RD::SHADER_STAGE_MAX; static const char *stage_str[RD::SHADER_STAGE_MAX] = { "vertex", @@ -47,6 +47,8 @@ Error RDShaderFile::parse_versions_from_text(const String &p_text, const String "tesselation_control", "tesselation_evaluation", "compute", + "mesh_task", + "mesh", }; String stage_code[RD::SHADER_STAGE_MAX]; int stages_found = 0; diff --git a/servers/rendering/rendering_device_binds.h b/servers/rendering/rendering_device_binds.h index 89fed7ffa633..a6e953316f98 100644 --- a/servers/rendering/rendering_device_binds.h +++ b/servers/rendering/rendering_device_binds.h @@ -267,6 +267,8 @@ class RDShaderSource : public RefCounted { ADD_PROPERTYI(PropertyInfo(Variant::STRING, "source_tesselation_control"), "set_stage_source", "get_stage_source", RD::SHADER_STAGE_TESSELATION_CONTROL); ADD_PROPERTYI(PropertyInfo(Variant::STRING, "source_tesselation_evaluation"), "set_stage_source", "get_stage_source", RD::SHADER_STAGE_TESSELATION_EVALUATION); ADD_PROPERTYI(PropertyInfo(Variant::STRING, "source_compute"), "set_stage_source", "get_stage_source", RD::SHADER_STAGE_COMPUTE); + ADD_PROPERTYI(PropertyInfo(Variant::STRING, "source_mesh_task"), "set_stage_source", "get_stage_source", RD::SHADER_STAGE_MESH_TASK); + ADD_PROPERTYI(PropertyInfo(Variant::STRING, "source_mesh"), "set_stage_source", "get_stage_source", RD::SHADER_STAGE_MESH); ADD_GROUP("Syntax", "source_"); ADD_PROPERTY(PropertyInfo(Variant::INT, "language", PROPERTY_HINT_RANGE, "GLSL,HLSL"), "set_language", "get_language"); } @@ -326,12 +328,16 @@ class RDShaderSPIRV : public Resource { ADD_PROPERTYI(PropertyInfo(Variant::PACKED_BYTE_ARRAY, "bytecode_tesselation_control"), "set_stage_bytecode", "get_stage_bytecode", RD::SHADER_STAGE_TESSELATION_CONTROL); ADD_PROPERTYI(PropertyInfo(Variant::PACKED_BYTE_ARRAY, "bytecode_tesselation_evaluation"), "set_stage_bytecode", "get_stage_bytecode", RD::SHADER_STAGE_TESSELATION_EVALUATION); ADD_PROPERTYI(PropertyInfo(Variant::PACKED_BYTE_ARRAY, "bytecode_compute"), "set_stage_bytecode", "get_stage_bytecode", RD::SHADER_STAGE_COMPUTE); + ADD_PROPERTYI(PropertyInfo(Variant::PACKED_BYTE_ARRAY, "bytecode_mesh_task"), "set_stage_bytecode", "get_stage_bytecode", RD::SHADER_STAGE_MESH_TASK); + ADD_PROPERTYI(PropertyInfo(Variant::PACKED_BYTE_ARRAY, "bytecode_mesh"), "set_stage_bytecode", "get_stage_bytecode", RD::SHADER_STAGE_MESH); ADD_GROUP("Compile Error", "compile_error_"); ADD_PROPERTYI(PropertyInfo(Variant::STRING, "compile_error_vertex"), "set_stage_compile_error", "get_stage_compile_error", RD::SHADER_STAGE_VERTEX); ADD_PROPERTYI(PropertyInfo(Variant::STRING, "compile_error_fragment"), "set_stage_compile_error", "get_stage_compile_error", RD::SHADER_STAGE_FRAGMENT); ADD_PROPERTYI(PropertyInfo(Variant::STRING, "compile_error_tesselation_control"), "set_stage_compile_error", "get_stage_compile_error", RD::SHADER_STAGE_TESSELATION_CONTROL); ADD_PROPERTYI(PropertyInfo(Variant::STRING, "compile_error_tesselation_evaluation"), "set_stage_compile_error", "get_stage_compile_error", RD::SHADER_STAGE_TESSELATION_EVALUATION); ADD_PROPERTYI(PropertyInfo(Variant::STRING, "compile_error_compute"), "set_stage_compile_error", "get_stage_compile_error", RD::SHADER_STAGE_COMPUTE); + ADD_PROPERTYI(PropertyInfo(Variant::STRING, "compile_error_mesh_task"), "set_stage_compile_error", "get_stage_compile_error", RD::SHADER_STAGE_MESH_TASK); + ADD_PROPERTYI(PropertyInfo(Variant::STRING, "compile_error_mesh"), "set_stage_compile_error", "get_stage_compile_error", RD::SHADER_STAGE_MESH); } }; @@ -394,7 +400,9 @@ class RDShaderFile : public Resource { "fragment", "tesselation_control", "tesselation_evaluation", - "compute" + "compute", + "mesh_task", + "mesh" }; ERR_PRINT("Error parsing shader '" + p_file + "', version '" + String(E.key) + "', stage '" + stage_str[i] + "':\n\n" + error); diff --git a/servers/rendering/rendering_device_commons.cpp b/servers/rendering/rendering_device_commons.cpp index 03fad5493a01..3c79ee356504 100644 --- a/servers/rendering/rendering_device_commons.cpp +++ b/servers/rendering/rendering_device_commons.cpp @@ -910,4 +910,6 @@ const char *RenderingDeviceCommons::SHADER_STAGE_NAMES[SHADER_STAGE_MAX] = { "TesselationControl", "TesselationEvaluation", "Compute", + "MeshTask", + "Mesh", }; diff --git a/servers/rendering/rendering_device_commons.h b/servers/rendering/rendering_device_commons.h index 9d01b6955085..42fab4d4aeb3 100644 --- a/servers/rendering/rendering_device_commons.h +++ b/servers/rendering/rendering_device_commons.h @@ -509,12 +509,16 @@ class RenderingDeviceCommons : public Object { SHADER_STAGE_TESSELATION_CONTROL, SHADER_STAGE_TESSELATION_EVALUATION, SHADER_STAGE_COMPUTE, + SHADER_STAGE_MESH_TASK, + SHADER_STAGE_MESH, SHADER_STAGE_MAX, SHADER_STAGE_VERTEX_BIT = (1 << SHADER_STAGE_VERTEX), SHADER_STAGE_FRAGMENT_BIT = (1 << SHADER_STAGE_FRAGMENT), SHADER_STAGE_TESSELATION_CONTROL_BIT = (1 << SHADER_STAGE_TESSELATION_CONTROL), SHADER_STAGE_TESSELATION_EVALUATION_BIT = (1 << SHADER_STAGE_TESSELATION_EVALUATION), SHADER_STAGE_COMPUTE_BIT = (1 << SHADER_STAGE_COMPUTE), + SHADER_STAGE_MESH_TASK_BIT = (1 << SHADER_STAGE_MESH_TASK), + SHADER_STAGE_MESH_BIT = (1 << SHADER_STAGE_MESH), }; struct ShaderStageSPIRVData { @@ -844,6 +848,12 @@ class RenderingDeviceCommons : public Object { LIMIT_MAX_COMPUTE_WORKGROUP_SIZE_X, LIMIT_MAX_COMPUTE_WORKGROUP_SIZE_Y, LIMIT_MAX_COMPUTE_WORKGROUP_SIZE_Z, + LIMIT_MAX_MESH_TASK_WORKGROUP_COUNT_X, + LIMIT_MAX_MESH_TASK_WORKGROUP_COUNT_Y, + LIMIT_MAX_MESH_TASK_WORKGROUP_COUNT_Z, + LIMIT_MAX_MESH_WORKGROUP_COUNT_X, + LIMIT_MAX_MESH_WORKGROUP_COUNT_Y, + LIMIT_MAX_MESH_WORKGROUP_COUNT_Z, LIMIT_MAX_VIEWPORT_DIMENSIONS_X, LIMIT_MAX_VIEWPORT_DIMENSIONS_Y, LIMIT_SUBGROUP_SIZE, @@ -863,6 +873,7 @@ class RenderingDeviceCommons : public Object { SUPPORTS_ATTACHMENT_VRS, // If not supported, a fragment shader with only side effets (i.e., writes to buffers, but doesn't output to attachments), may be optimized down to no-op by the GPU driver. SUPPORTS_FRAGMENT_SHADER_WITH_ONLY_SIDE_EFFECTS, + SUPPORTS_MESH_SHADER, }; enum SubgroupOperations { diff --git a/servers/rendering/rendering_device_driver.h b/servers/rendering/rendering_device_driver.h index d2d14676db3f..e28b687d1cae 100644 --- a/servers/rendering/rendering_device_driver.h +++ b/servers/rendering/rendering_device_driver.h @@ -325,6 +325,8 @@ class RenderingDeviceDriver : public RenderingDeviceCommons { PIPELINE_STAGE_ALL_GRAPHICS_BIT = (1 << 15), PIPELINE_STAGE_ALL_COMMANDS_BIT = (1 << 16), PIPELINE_STAGE_CLEAR_STORAGE_BIT = (1 << 17), + PIPELINE_STAGE_MESH_TASK_SHADER_BIT = (1 << 18), + PIPELINE_STAGE_MESH_SHADER_BIT = (1 << 19), }; enum BarrierAccessBits { @@ -651,6 +653,11 @@ class RenderingDeviceDriver : public RenderingDeviceCommons { virtual void command_render_draw_indirect(CommandBufferID p_cmd_buffer, BufferID p_indirect_buffer, uint64_t p_offset, uint32_t p_draw_count, uint32_t p_stride) = 0; virtual void command_render_draw_indirect_count(CommandBufferID p_cmd_buffer, BufferID p_indirect_buffer, uint64_t p_offset, BufferID p_count_buffer, uint64_t p_count_buffer_offset, uint32_t p_max_draw_count, uint32_t p_stride) = 0; + // Mesh Shader Drawing. + virtual void command_render_dispatch_mesh(CommandBufferID p_cmd_buffer, uint32_t p_x_groups, uint32_t p_y_groups, uint32_t p_z_groups) = 0; + virtual void command_render_dispatch_mesh_indirect(CommandBufferID p_cmd_buffer, BufferID p_indirect_buffer, uint64_t p_offset, uint32_t p_draw_count, uint32_t p_stride) = 0; + virtual void command_render_dispatch_mesh_indirect_count(CommandBufferID p_cmd_buffer, BufferID p_indirect_buffer, uint64_t p_offset, BufferID p_count_buffer, uint64_t p_count_buffer_offset, uint32_t p_max_draw_count, uint32_t p_stride) = 0; + // Buffer binding. virtual void command_render_bind_vertex_buffers(CommandBufferID p_cmd_buffer, uint32_t p_binding_count, const BufferID *p_buffers, const uint64_t *p_offsets) = 0; virtual void command_render_bind_index_buffer(CommandBufferID p_cmd_buffer, BufferID p_buffer, IndexBufferFormat p_format, uint64_t p_offset) = 0; diff --git a/servers/rendering/rendering_device_graph.cpp b/servers/rendering/rendering_device_graph.cpp index b2779af6207f..7b66316eb730 100644 --- a/servers/rendering/rendering_device_graph.cpp +++ b/servers/rendering/rendering_device_graph.cpp @@ -825,6 +825,16 @@ void RenderingDeviceGraph::_run_draw_list_command(RDD::CommandBufferID p_command driver->command_render_draw_indexed_indirect(p_command_buffer, draw_indexed_indirect_instruction->buffer, draw_indexed_indirect_instruction->offset, draw_indexed_indirect_instruction->draw_count, draw_indexed_indirect_instruction->stride); instruction_data_cursor += sizeof(DrawListDrawIndexedIndirectInstruction); } break; + case DrawListInstruction::TYPE_DISPATCH_MESH: { + const DrawListDispatchMeshInstruction *dispatch_mesh_instruction = reinterpret_cast(instruction); + driver->command_render_dispatch_mesh(p_command_buffer, dispatch_mesh_instruction->x_groups, dispatch_mesh_instruction->y_groups, dispatch_mesh_instruction->z_groups); + instruction_data_cursor += sizeof(DrawListDispatchMeshInstruction); + } break; + case DrawListInstruction::TYPE_DISPATCH_MESH_INDIRECT: { + const DrawListDispatchMeshIndirectInstruction *dispatch_mesh_indirect_instruction = reinterpret_cast(instruction); + driver->command_render_dispatch_mesh_indirect(p_command_buffer, dispatch_mesh_indirect_instruction->buffer, dispatch_mesh_indirect_instruction->offset, 1, 0); + instruction_data_cursor += sizeof(DrawListDispatchMeshIndirectInstruction); + } break; case DrawListInstruction::TYPE_EXECUTE_COMMANDS: { const DrawListExecuteCommandsInstruction *execute_commands_instruction = reinterpret_cast(instruction); driver->command_buffer_execute_secondary(p_command_buffer, execute_commands_instruction->command_buffer); @@ -1358,6 +1368,16 @@ void RenderingDeviceGraph::_print_draw_list(const uint8_t *p_instruction_data, u print_line("\tDRAW INDEXED INDIRECT BUFFER ID", itos(draw_indexed_indirect_instruction->buffer.id), "OFFSET", draw_indexed_indirect_instruction->offset, "DRAW COUNT", draw_indexed_indirect_instruction->draw_count, "STRIDE", draw_indexed_indirect_instruction->stride); instruction_data_cursor += sizeof(DrawListDrawIndexedIndirectInstruction); } break; + case DrawListInstruction::TYPE_DISPATCH_MESH: { + const DrawListDispatchMeshInstruction *dispatch_mesh_instruction = reinterpret_cast(instruction); + print_line("\tDISPATCH MESH", dispatch_mesh_instruction->x_groups, dispatch_mesh_instruction->y_groups, dispatch_mesh_instruction->z_groups); + instruction_data_cursor += sizeof(DrawListDispatchMeshInstruction); + } break; + case DrawListInstruction::TYPE_DISPATCH_MESH_INDIRECT: { + const DrawListDispatchMeshIndirectInstruction *dispatch_mesh_indirect_instruction = reinterpret_cast(instruction); + print_line("\tDISPATCH MESH INDIRECT BUFFER ID", itos(dispatch_mesh_indirect_instruction->buffer.id), "OFFSET", dispatch_mesh_indirect_instruction->offset); + instruction_data_cursor += sizeof(DrawListDispatchMeshIndirectInstruction); + } break; case DrawListInstruction::TYPE_EXECUTE_COMMANDS: { print_line("\tEXECUTE COMMANDS"); instruction_data_cursor += sizeof(DrawListExecuteCommandsInstruction); @@ -1790,6 +1810,21 @@ void RenderingDeviceGraph::add_draw_list_draw_indexed_indirect(RDD::BufferID p_b draw_instruction_list.stages.set_flag(RDD::PIPELINE_STAGE_DRAW_INDIRECT_BIT); } +void RenderingDeviceGraph::add_draw_list_dispatch_mesh(uint32_t p_x_groups, uint32_t p_y_groups, uint32_t p_z_groups) { + DrawListDispatchMeshInstruction *instruction = reinterpret_cast(_allocate_draw_list_instruction(sizeof(DrawListDispatchMeshInstruction))); + instruction->type = DrawListInstruction::TYPE_DISPATCH_MESH; + instruction->x_groups = p_x_groups; + instruction->y_groups = p_y_groups; + instruction->z_groups = p_z_groups; +} + +void RenderingDeviceGraph::add_draw_list_dispatch_mesh_indirect(RDD::BufferID p_buffer, uint32_t p_offset) { + DrawListDispatchMeshIndirectInstruction *instruction = reinterpret_cast(_allocate_draw_list_instruction(sizeof(DrawListDispatchMeshIndirectInstruction))); + instruction->type = DrawListInstruction::TYPE_DISPATCH_MESH_INDIRECT; + instruction->buffer = p_buffer; + instruction->offset = p_offset; +} + void RenderingDeviceGraph::add_draw_list_execute_commands(RDD::CommandBufferID p_command_buffer) { DrawListExecuteCommandsInstruction *instruction = reinterpret_cast(_allocate_draw_list_instruction(sizeof(DrawListExecuteCommandsInstruction))); instruction->type = DrawListInstruction::TYPE_EXECUTE_COMMANDS; diff --git a/servers/rendering/rendering_device_graph.h b/servers/rendering/rendering_device_graph.h index adfbb47e840e..2b298c9f6a12 100644 --- a/servers/rendering/rendering_device_graph.h +++ b/servers/rendering/rendering_device_graph.h @@ -71,6 +71,8 @@ class RenderingDeviceGraph { TYPE_DRAW_INDEXED, TYPE_DRAW_INDIRECT, TYPE_DRAW_INDEXED_INDIRECT, + TYPE_DISPATCH_MESH, + TYPE_DISPATCH_MESH_INDIRECT, TYPE_EXECUTE_COMMANDS, TYPE_NEXT_SUBPASS, TYPE_SET_BLEND_CONSTANTS, @@ -546,6 +548,17 @@ class RenderingDeviceGraph { uint32_t stride = 0; }; + struct DrawListDispatchMeshInstruction : DrawListInstruction { + uint32_t x_groups = 0; + uint32_t y_groups = 0; + uint32_t z_groups = 0; + }; + + struct DrawListDispatchMeshIndirectInstruction : DrawListInstruction { + RDD::BufferID buffer; + uint32_t offset = 0; + }; + struct DrawListEndRenderPassInstruction : DrawListInstruction { // No contents. }; @@ -768,6 +781,8 @@ class RenderingDeviceGraph { void add_draw_list_draw_indexed(uint32_t p_index_count, uint32_t p_instance_count, uint32_t p_first_index); void add_draw_list_draw_indirect(RDD::BufferID p_buffer, uint32_t p_offset, uint32_t p_draw_count, uint32_t p_stride); void add_draw_list_draw_indexed_indirect(RDD::BufferID p_buffer, uint32_t p_offset, uint32_t p_draw_count, uint32_t p_stride); + void add_draw_list_dispatch_mesh(uint32_t p_x_groups, uint32_t p_y_groups, uint32_t p_z_groups); + void add_draw_list_dispatch_mesh_indirect(RDD::BufferID p_buffer, uint32_t p_offset); void add_draw_list_execute_commands(RDD::CommandBufferID p_command_buffer); void add_draw_list_next_subpass(RDD::CommandBufferType p_command_buffer_type); void add_draw_list_set_blend_constants(const Color &p_color);