From 1536b822c388f294c78bc52beaf44dd1831c3a48 Mon Sep 17 00:00:00 2001 From: Rodrigo Holztrattner Date: Tue, 20 Aug 2024 16:20:32 -0600 Subject: [PATCH 01/10] Add Mobile NeRF Ray Query sample initial files Signed-off-by: Rodrigo Holztrattner --- antora/modules/ROOT/nav.adoc | 1 + samples/general/README.adoc | 6 +- .../mobile_nerf_rayquery/CMakeLists.txt | 28 + .../general/mobile_nerf_rayquery/README.adoc | 33 + .../mobile_nerf_rayquery.cpp | 1395 +++++++++++++++++ .../mobile_nerf_rayquery.h | 204 +++ shaders/mobile_nerf_rayquery/quad.vert | 38 + .../rayquery_morpheus.frag | 351 +++++ .../rayquery_morpheus_combo.frag | 351 +++++ 9 files changed, 2406 insertions(+), 1 deletion(-) create mode 100644 samples/general/mobile_nerf_rayquery/CMakeLists.txt create mode 100644 samples/general/mobile_nerf_rayquery/README.adoc create mode 100644 samples/general/mobile_nerf_rayquery/mobile_nerf_rayquery.cpp create mode 100644 samples/general/mobile_nerf_rayquery/mobile_nerf_rayquery.h create mode 100644 shaders/mobile_nerf_rayquery/quad.vert create mode 100644 shaders/mobile_nerf_rayquery/rayquery_morpheus.frag create mode 100644 shaders/mobile_nerf_rayquery/rayquery_morpheus_combo.frag diff --git a/antora/modules/ROOT/nav.adoc b/antora/modules/ROOT/nav.adoc index f28b626e9..cf9785ee9 100644 --- a/antora/modules/ROOT/nav.adoc +++ b/antora/modules/ROOT/nav.adoc @@ -117,6 +117,7 @@ ** xref:samples/tooling/profiles/README.adoc[Profiles] * xref:samples/general/README.adoc[General samples] ** xref:samples/general/mobile_nerf/README.adoc[Mobile NeRF] +** xref:samples/general/mobile_nerf_rayquery/README.adoc[Mobile NeRF Ray Query] * xref:docs/README.adoc[General documentation] ** xref:docs/build.adoc[Build guide] ** xref:docs/memory_limits.adoc[Memory limits] diff --git a/samples/general/README.adoc b/samples/general/README.adoc index b6fb95940..b70518f4a 100644 --- a/samples/general/README.adoc +++ b/samples/general/README.adoc @@ -24,4 +24,8 @@ The goal of these samples is to demonstrate different techniques or showcase com === xref:./{general_samplespath}mobile_nerf/README.adoc[Mobile NeRF] -A Neural Radiance Field synthesizer sample, based on textured polygons. \ No newline at end of file +A Neural Radiance Field synthesizer sample, based on textured polygons. + +=== xref:./{general_samplespath}mobile_nerf_rayquery/README.adoc[Mobile NeRF Ray Query] + +A Mobile Neural Radiance Field synthesizer sample using ray query, based on textured polygons. diff --git a/samples/general/mobile_nerf_rayquery/CMakeLists.txt b/samples/general/mobile_nerf_rayquery/CMakeLists.txt new file mode 100644 index 000000000..c54d23a98 --- /dev/null +++ b/samples/general/mobile_nerf_rayquery/CMakeLists.txt @@ -0,0 +1,28 @@ +# Copyright (c) 2024, Qualcomm Innovation Center, Inc. All rights reserved. +# +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 the "License"; +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +get_filename_component(FOLDER_NAME ${CMAKE_CURRENT_LIST_DIR} NAME) +get_filename_component(PARENT_DIR ${CMAKE_CURRENT_LIST_DIR} PATH) +get_filename_component(CATEGORY_NAME ${PARENT_DIR} NAME) + +add_sample_with_tags( + ID ${FOLDER_NAME} + CATEGORY ${CATEGORY_NAME} + AUTHOR "Qualcomm" + NAME "Mobile NeRF Ray Query" + DESCRIPTION "A Mobile Neural Radiance Field synthesizer sample using ray query, based on textured polygons." +) diff --git a/samples/general/mobile_nerf_rayquery/README.adoc b/samples/general/mobile_nerf_rayquery/README.adoc new file mode 100644 index 000000000..0d56054be --- /dev/null +++ b/samples/general/mobile_nerf_rayquery/README.adoc @@ -0,0 +1,33 @@ +//// +- Copyright (c) 2024, Qualcomm Innovation Center, Inc. All rights reserved +- +- SPDX-License-Identifier: Apache-2.0 +- +- Licensed under the Apache License, Version 2.0 the "License"; +- you may not use this file except in compliance with the License. +- You may obtain a copy of the License at +- +- http://www.apache.org/licenses/LICENSE-2.0 +- +- Unless required by applicable law or agreed to in writing, software +- distributed under the License is distributed on an "AS IS" BASIS, +- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +- See the License for the specific language governing permissions and +- limitations under the License. +- +//// + += Mobile NeRF Ray Query + +ifdef::site-gen-antora[] +TIP: The source for this sample can be found in the https://github.com/KhronosGroup/Vulkan-Samples/tree/main/samples/general/mobile_nerf_rayquery[Khronos Vulkan samples github repository]. +endif::[] + +NeRF is a new 3D representation method in Computer Vision that creates images of a 3D scene using several 2D pictures taken from different viewpoints. +This method constructs a representation of the 3D volume. Various adaptations of NeRF target different use cases, including MobileNeRF, which focuses on rendering NeRF efficiently on mobile phones by leveraging existing traditional graphic hardware. + +This version enhances the https://github.com/KhronosGroup/Vulkan-Samples/tree/main/samples/general/mobile_nerf[previous MobileNeRF implementation] implementation by using the Vulkan Ray Query feature, which leverages the hardware ray tracing capabilities of the Adreno GPU. +This enhancement greatly boosts performance in most use cases. Additionally, the Vulkan API provides great flexibility for modifying and optimizing the rendering pipeline and shaders, enabling more functionalities while delivering optimal performance. + +== Notes +The original source code is also licensed under Apache-2.0, all shader files used by the sample have comments to indicate changes, when applicable. diff --git a/samples/general/mobile_nerf_rayquery/mobile_nerf_rayquery.cpp b/samples/general/mobile_nerf_rayquery/mobile_nerf_rayquery.cpp new file mode 100644 index 000000000..c589bca01 --- /dev/null +++ b/samples/general/mobile_nerf_rayquery/mobile_nerf_rayquery.cpp @@ -0,0 +1,1395 @@ +/* Copyright (c) 2023-2024, Qualcomm Innovation Center, Inc. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 the "License"; + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "mobile_nerf_rayquery.h" +#include "filesystem/legacy.h" +#include "glm/gtx/matrix_decompose.hpp" +#include "gltf_loader.h" +#include "platform/platform.h" +#include "rendering/subpasses/forward_subpass.h" +#include "scene_graph/components/material.h" +#include "scene_graph/components/mesh.h" +#include "scene_graph/components/perspective_camera.h" + +namespace +{ +constexpr uint32_t MIN_THREAD_COUNT = 1; +struct RequestFeature +{ + vkb::PhysicalDevice &gpu; + explicit RequestFeature(vkb::PhysicalDevice &gpu) : + gpu(gpu) + {} + + template + RequestFeature &request(VkStructureType s_type, VkBool32 T::*member) + { + auto &member_feature = gpu.request_extension_features(s_type); + member_feature.*member = VK_TRUE; + return *this; + } +}; + +template +struct CopyBuffer +{ + std::vector operator()(std::unordered_map &buffers, const char *buffer_name) + { + auto iter = buffers.find(buffer_name); + if (iter == buffers.cend()) + { + return {}; + } + auto &buffer = iter->second; + std::vector out; + + const size_t sz = buffer.get_size(); + out.resize(sz / sizeof(T)); + const bool already_mapped = buffer.get_data() != nullptr; + if (!already_mapped) + { + buffer.map(); + } + memcpy(&out[0], buffer.get_data(), sz); + if (!already_mapped) + { + buffer.unmap(); + } + return out; + } +}; +} // namespace + +void camera_set_look_at(vkb::Camera &camera, const glm::vec3 pos, const glm::vec3 look, const glm::vec3 up) +{ + auto view_matrix = glm::lookAt(pos, look, up); + + glm::vec3 scale; + glm::quat orientation; + glm::vec3 translation; + glm::vec3 skew; + glm::vec4 perspective; + glm::decompose(view_matrix, scale, orientation, translation, skew, perspective); + + camera.set_rotation(glm::eulerAngles(orientation) * glm::pi() / 180.f); + camera.set_translation(translation); +} + +MobileNerfRayQuery::MobileNerfRayQuery() +{ + title = "Mobile Nerf Ray Query"; + + // Scalar Block Layout Extension requires Vulkan 1.2 + set_api_version(VK_API_VERSION_1_2); + + // Required by VK_KHR_acceleration_structure + add_device_extension(VK_KHR_ACCELERATION_STRUCTURE_EXTENSION_NAME); + add_device_extension(VK_KHR_BUFFER_DEVICE_ADDRESS_EXTENSION_NAME); + add_device_extension(VK_KHR_DEFERRED_HOST_OPERATIONS_EXTENSION_NAME); + add_device_extension(VK_EXT_DESCRIPTOR_INDEXING_EXTENSION_NAME); + + // Required for ray queries + add_device_extension(VK_KHR_RAY_QUERY_EXTENSION_NAME); + + // Required by VK_KHR_spirv_1_4 + add_device_extension(VK_KHR_SPIRV_1_4_EXTENSION_NAME); + add_device_extension(VK_KHR_SHADER_FLOAT_CONTROLS_EXTENSION_NAME); + + // Use this extension for better storage buffers layout + add_device_extension(VK_EXT_SCALAR_BLOCK_LAYOUT_EXTENSION_NAME); +} + +MobileNerfRayQuery::~MobileNerfRayQuery() +{ + if (has_device()) + { + if (render_pass_nerf) + { + vkDestroyRenderPass(get_device().get_handle(), render_pass_nerf, nullptr); + } + + for (uint32_t i = 0; i < framebuffers_nerf.size(); i++) + { + if (framebuffers_nerf[i]) + { + vkDestroyFramebuffer(get_device().get_handle(), framebuffers_nerf[i], nullptr); + } + } + + auto device_ptr = get_device().get_handle(); + + for (auto &model : models) + { + model.vertex_buffer.reset(); + model.index_buffer.reset(); + + vkDestroySampler(get_device().get_handle(), model.texture_input_0.sampler, nullptr); + vkDestroySampler(get_device().get_handle(), model.texture_input_1.sampler, nullptr); + } + + vkDestroyPipeline(device_ptr, pipeline, nullptr); + vkDestroyPipelineLayout(device_ptr, pipeline_layout, nullptr); + vkDestroyDescriptorSetLayout(device_ptr, descriptor_set_layout_common, nullptr); + vkDestroyDescriptorSetLayout(device_ptr, descriptor_set_layout_indices, nullptr); + vkDestroyDescriptorSetLayout(device_ptr, descriptor_set_layout_vertices, nullptr); + vkDestroyDescriptorSetLayout(device_ptr, descriptor_set_layout_feature1, nullptr); + vkDestroyDescriptorSetLayout(device_ptr, descriptor_set_layout_feature2, nullptr); + + for (auto &weights_buffer : weights_buffers) + weights_buffer.reset(); + + uniform_buffer.reset(); + } +} + +bool MobileNerfRayQuery::prepare(const vkb::ApplicationOptions &options) +{ + read_json_map(); + + // Load the mlp for each model + mlp_weight_vector.resize(num_models); + for (int i = 0; i < num_models; i++) + { + initialize_mlp_uniform_buffers(i); + } + + if (!ApiVulkanSample::prepare(options)) + { + return false; + } + + load_shaders(); + update_render_pass(); + setup_framebuffers(); + // Because we have our own customized render pass, the UI render pass need to be updated with load on load so it won't + // clear out the written color attachment + update_render_pass_flags(RenderPassCreateFlags::ColorAttachmentLoad); + + // Setup camera + camera.type = vkb::CameraType::LookAt; + camera_pos.y = -camera_pos.y; // flip y to keep consistency of the init pos between rayquery and rasterization + camera_set_look_at(camera, camera_pos, glm::vec3(0.0f), glm::vec3(0.0f, 1.0f, 0.0f)); + + camera.set_perspective(60.0f, static_cast(width) / static_cast(height), 0.01f, 256.0f); + + // Each models may have submodels + int models_entry = 0; + for (int model_index = 0; model_index < num_models; model_index++) + { + int num_sub_model = models[models_entry].sub_model_num; + + for (int sub_model_index = 0; sub_model_index < num_sub_model; sub_model_index++) + { + load_scene(model_index, sub_model_index, models_entry); + create_texture(model_index, sub_model_index, models_entry); + create_static_object_buffers(models_entry); + create_bottom_level_acceleration_structure(models_entry); + models_entry++; + } + } + + create_top_level_acceleration_structure(); + create_uniforms(); + create_pipeline_layout(); + create_descriptor_pool(); + create_descriptor_sets(); + prepare_pipelines(); + build_command_buffers(); + + prepared = true; + LOGI("Prepare Done!"); + return true; +} + +void MobileNerfRayQuery::request_gpu_features(vkb::PhysicalDevice &gpu) +{ + RequestFeature(gpu) + .request(VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_BUFFER_DEVICE_ADDRESS_FEATURES, &VkPhysicalDeviceBufferDeviceAddressFeatures::bufferDeviceAddress) + .request(VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ACCELERATION_STRUCTURE_FEATURES_KHR, &VkPhysicalDeviceAccelerationStructureFeaturesKHR::accelerationStructure) + .request(VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_RAY_QUERY_FEATURES_KHR, &VkPhysicalDeviceRayQueryFeaturesKHR::rayQuery) + .request(VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_INDEXING_FEATURES_EXT, &VkPhysicalDeviceDescriptorIndexingFeaturesEXT::shaderSampledImageArrayNonUniformIndexing) + .request(VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_INDEXING_FEATURES_EXT, &VkPhysicalDeviceDescriptorIndexingFeaturesEXT::shaderStorageBufferArrayNonUniformIndexing) + .request(VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_INDEXING_FEATURES_EXT, &VkPhysicalDeviceDescriptorIndexingFeaturesEXT::runtimeDescriptorArray) + .request(VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_INDEXING_FEATURES_EXT, &VkPhysicalDeviceDescriptorIndexingFeaturesEXT::descriptorBindingVariableDescriptorCount) + .request(VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SCALAR_BLOCK_LAYOUT_FEATURES_EXT, &VkPhysicalDeviceScalarBlockLayoutFeaturesEXT::scalarBlockLayout); +} + +void MobileNerfRayQuery::render(float delta_time) +{ + if (!prepared) + { + return; + } + + draw(); + + update_uniform_buffer(); +} + +void MobileNerfRayQuery::read_json_map() +{ + std::string assetBase = vkb::fs::path::get(vkb::fs::path::Type::Assets); + LOGI("Base assets path: {}", assetBase); + +#if defined(NERF_JSON_FILE) + const std::string nerf_obj_map = assetBase + "scenes/mobile_nerf_models.json"; + + std::ifstream f(nerf_obj_map); + + if (!f) + { + LOGE("Failed to open nerf obj map data"); + assert(0); + } + + LOGI("Parsing nerf obj map data {}", nerf_obj_map); + + json raw_asset_map = json::parse(f); +#else + + const std::string nerf_obj_json = + R"V0G0N( + { + "width": 0, + + "height": 0, + + "texture_type": "8bit", + + "target_model": "lego_combo", + + "rotation": false, + + "lego_ball":{ + "path": "scenes/morpheus_team/lego_ball_phone/", + "num_sub_model": 1, + "camera": [-1, 1, 1], + "instancing":{ + "dim": [1, 1, 1], + "interval": [2.0, 2.0, 2.0] + } + }, + + "lego_boba_fett":{ + "path": "scenes/morpheus_team/lego_boba_fett_phone/", + "num_sub_model": 1, + "camera": [-1, 1, 1], + "instancing":{ + "dim": [1, 1, 1], + "interval": [2.0, 2.0, 2.0] + } + }, + + "lego_monster_truck":{ + "path": "scenes/morpheus_team/lego_monster_truck_phone/", + "num_sub_model": 1, + "camera": [-1, 1, 1], + "instancing":{ + "dim": [1, 1, 1], + "interval": [2.0, 2.0, 2.0] + } + }, + + "lego_tractor":{ + "path": "scenes/morpheus_team/lego_tractor_phone/", + "num_sub_model": 1, + "camera": [-1, 1, 1], + "instancing":{ + "dim": [1, 1, 1], + "interval": [2.0, 2.0, 2.0] + } + }, + + "lego_combo":{ + "combo": true, + "models": ["scenes/morpheus_team/lego_ball_phone/", "scenes/morpheus_team/lego_boba_fett_phone/", + "scenes/morpheus_team/lego_monster_truck_phone/", "scenes/morpheus_team/lego_tractor_phone/"], + "camera": [-0.0381453, 1.84186, -1.51744], + "instancing":{ + "dim": [2, 2, 2], + "interval": [1.5, 1.5, 1.5] + } + } + } + )V0G0N"; + + json raw_asset_map = json::parse(nerf_obj_json); + +#endif + + std::string target_model = raw_asset_map["target_model"].get(); + asset_map = raw_asset_map[target_model]; + + // Load combo models or a single model. In combo mode, we have multiple sets of weights. + if (!asset_map["combo"].is_null()) + combo_mode = asset_map["combo"].get(); + else + combo_mode = false; + + if (combo_mode) + { + model_path.resize(asset_map["models"].size()); + for (int i = 0; i < model_path.size(); i++) + { + model_path[i] = asset_map["models"][i].get(); + LOGI("Target model: {}, asset path: {}", target_model, model_path[i]); + } + } + else + { + model_path.resize(1); + model_path[0] = asset_map["path"].get(); + LOGI("Target model: {}, asset path: {}", target_model, model_path[0]); + } + num_models = model_path.size(); + + // Read Texture Format + std::string textureType = raw_asset_map["texture_type"].get(); + + if (textureType == "8bit") + { + LOGI("Using VK_FORMAT_R8G8B8A8_UNORM for feature texture"); + feature_map_format = VK_FORMAT_R8G8B8A8_UNORM; + } + else if (textureType == "16bit") + { + LOGI("Using VK_FORMAT_R16G16B16A16_SFLOAT for feature texture"); + feature_map_format = VK_FORMAT_R16G16B16A16_SFLOAT; + } + else if (textureType == "32bit") + { + LOGI("Using VK_FORMAT_R32G32B32A32_SFLOAT for feature texture"); + feature_map_format = VK_FORMAT_R32G32B32A32_SFLOAT; + } + else if (textureType == "8bit") + { + LOGI("Using VK_FORMAT_R8G8B8A8_UNORM for feature texture"); + feature_map_format = VK_FORMAT_R8G8B8A8_UNORM; + } + else + { + LOGW("Unrecognized feature texture type, using VK_FORMAT_R32G32B32A32_SFLOAT"); + feature_map_format = VK_FORMAT_R32G32B32A32_SFLOAT; + } + + // Rotation mode + do_rotation = raw_asset_map["rotation"].get(); + + // Read view port size. Use defualt setting (1280x720) if size is 0. + view_port_width = raw_asset_map["width"].get(); + view_port_height = raw_asset_map["height"].get(); + + if (view_port_width == 0 || view_port_height == 0) + { + view_port_width = width; + view_port_height = height; + use_native_screen_size = true; + } + + // Read camera position + if (asset_map["camera"].is_array() && asset_map["camera"].size() == 3) + { + camera_pos = glm::vec3(asset_map["camera"][0].get(), asset_map["camera"][1].get(), asset_map["camera"][2].get()); + } + else + { + LOGW("Fail to read camera position. Use defualt value."); + } + + // Read instancing rendering settings. + json instacing_map = asset_map["instancing"]; + if (instacing_map["dim"].is_array() && instacing_map["dim"].size() == 3) + { + instancing_info.dim = glm::vec3(instacing_map["dim"][0].get(), instacing_map["dim"][1].get(), instacing_map["dim"][2].get()); + } + else + { + LOGE("Wrong instancing dimension. Terminating..."); + exit(1); + } + + if (instacing_map["interval"].is_array() && instacing_map["interval"].size() == 3) + { + instancing_info.interval = glm::vec3(instacing_map["interval"][0].get(), instacing_map["interval"][1].get(), instacing_map["interval"][2].get()); + } + else + { + LOGE("Wrong instancing interval. Terminating..."); + exit(1); + } + + if (instancing_info.dim.x <= 0 || instancing_info.dim.y <= 0 || instancing_info.dim.z <= 0 || instancing_info.interval.x <= 0.f || instancing_info.interval.y <= 0.f || instancing_info.interval.z <= 0.f) + { + LOGE("Instancing settings must be positive. Terminating..."); + exit(1); + } +} + +void MobileNerfRayQuery::initialize_mlp_uniform_buffers(int model_index) +{ + std::string assetBase = vkb::fs::path::get(vkb::fs::path::Type::Assets); + std::string mlpJsonPath = assetBase + model_path[model_index] + "mlp.json"; + + using json = nlohmann::json; + + std::ifstream f(mlpJsonPath); + + if (!f) + { + LOGE("Failed to open mlp data"); + assert(0); + } + + LOGI("Parsing mlp data {}", mlpJsonPath); + json data = json::parse(f); + + // Record a index of the first sub-model + int first_sub_model = models.size(); + int obj_num = data["obj_num"].get(); + + // Here we know the actual number of sub models + int next_sub_model_index = models.size(); + models.resize(models.size() + obj_num); + + for (int i = next_sub_model_index; i < models.size(); i++) + { + models[i].model_index = model_index; + } + + auto weights_0_array_raw = data["0_weights"].get>>(); + + std::vector weights_0_array; + + for (auto ii = weights_0_array_raw.begin(); ii != weights_0_array_raw.end(); ii++) + { + weights_0_array.insert(weights_0_array.end(), (*ii).begin(), (*ii).end()); + } + + if (weights_0_array.size() != WEIGHTS_0_COUNT) + { + LOGE("MLP data layer 0 weights count is {}, rather than {}", weights_0_array.size(), WEIGHTS_0_COUNT); + } + + auto bias_0_array = data["0_bias"].get>(); + + if (bias_0_array.size() != BIAS_0_COUNT) + { + LOGE("MLP data layer 0 bias count is {}, rather than {}", bias_0_array.size(), BIAS_0_COUNT); + } + + auto weights_1_array_raw = data["1_weights"].get>>(); + + std::vector weights_1_array; + + for (auto ii = weights_1_array_raw.begin(); ii != weights_1_array_raw.end(); ii++) + { + weights_1_array.insert(weights_1_array.end(), (*ii).begin(), (*ii).end()); + } + + if (weights_1_array.size() != WEIGHTS_1_COUNT) + { + LOGE("MLP data layer 1 weights count is {}, rather than {}", weights_1_array.size(), WEIGHTS_1_COUNT); + } + + auto bias_1_array = data["1_bias"].get>(); + + if (bias_1_array.size() != BIAS_1_COUNT) + { + LOGE("MLP data layer 1 bias count is {}, rather than {}", bias_1_array.size(), BIAS_1_COUNT); + } + + auto weights_2_array_raw = data["2_weights"].get>>(); + + std::vector weights_2_array; + + for (auto ii = weights_2_array_raw.begin(); ii != weights_2_array_raw.end(); ii++) + { + weights_2_array.insert(weights_2_array.end(), (*ii).begin(), (*ii).end()); + } + + // We need to pad the layer 2's weights with 16 zeros + if (weights_2_array.size() != WEIGHTS_2_COUNT - 16) + { + LOGE("MLP data layer 2 weights count is {}, rather than {}", weights_2_array.size(), WEIGHTS_2_COUNT); + } + + auto bias_2_array = data["2_bias"].get>(); + + if (bias_2_array.size() != BIAS_2_COUNT - 1) + { + LOGE("MLP data layer 2 bias count is {}, rather than {}", bias_2_array.size(), BIAS_2_COUNT); + } + + // Each sub model will share the same mlp weights data + mlp_weights &model_mlp = mlp_weight_vector[model_index]; + + for (int ii = 0; ii < WEIGHTS_0_COUNT; ii++) + { + model_mlp.data[ii] = weights_0_array[ii]; + } + + for (int ii = 0; ii < WEIGHTS_1_COUNT; ii++) + { + model_mlp.data[WEIGHTS_0_COUNT + ii] = weights_1_array[ii]; + } + + // We need to pad the layer 2's weights with zeros for every 3 weights to make it 16 bytes aligned + int raw_weight_cnt = 0; + for (int ii = 0; ii < WEIGHTS_2_COUNT; ii++) + { + if ((ii + 1) % 4 == 0) + { + model_mlp.data[WEIGHTS_0_COUNT + WEIGHTS_1_COUNT + ii] = 0.0f; + } + else + { + model_mlp.data[WEIGHTS_0_COUNT + WEIGHTS_1_COUNT + ii] = weights_2_array[raw_weight_cnt++]; + } + } + + for (int ii = 0; ii < BIAS_0_COUNT; ii++) + { + model_mlp.data[WEIGHTS_0_COUNT + WEIGHTS_1_COUNT + WEIGHTS_2_COUNT + ii] = bias_0_array[ii]; + } + + for (int ii = 0; ii < BIAS_1_COUNT; ii++) + { + model_mlp.data[WEIGHTS_0_COUNT + WEIGHTS_1_COUNT + WEIGHTS_2_COUNT + + BIAS_0_COUNT + ii] = bias_1_array[ii]; + } + + // We need to pad the layer 2's bias with zeros for every 3 weights to make it 16 bytes aligned + for (int ii = 0; ii < BIAS_2_COUNT; ii++) + { + if ((ii + 1) % 4 == 0) + { + model_mlp.data[WEIGHTS_0_COUNT + WEIGHTS_1_COUNT + WEIGHTS_2_COUNT + + BIAS_0_COUNT + BIAS_1_COUNT + ii] = 0.0f; + } + else + { + model_mlp.data[WEIGHTS_0_COUNT + WEIGHTS_1_COUNT + WEIGHTS_2_COUNT + + BIAS_0_COUNT + BIAS_1_COUNT + ii] = bias_2_array[ii]; + } + } + + // Update all sub model with the same mlp weight + for (int i = 0; i < obj_num; i++) + { + models[first_sub_model + i].sub_model_num = obj_num; + } +} + +void MobileNerfRayQuery::load_shaders() +{ + shader_stages[0] = load_shader("mobile_nerf_rayquery/quad.vert", VK_SHADER_STAGE_VERTEX_BIT); + shader_stages[1] = load_shader( + combo_mode ? + "mobile_nerf_rayquery/rayquery_morpheus_combo.frag" : + "mobile_nerf_rayquery/rayquery_morpheus.frag", + VK_SHADER_STAGE_FRAGMENT_BIT); +} + +void MobileNerfRayQuery::update_render_pass() +{ + // 0: Depth attachment + // 1: Swapchain attachment + std::array attachments = {}; + // Depth attachment + attachments[0].format = depth_format; + attachments[0].samples = VK_SAMPLE_COUNT_1_BIT; + attachments[0].loadOp = VK_ATTACHMENT_LOAD_OP_CLEAR; + attachments[0].storeOp = VK_ATTACHMENT_STORE_OP_DONT_CARE; + attachments[0].stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE; + attachments[0].stencilStoreOp = VK_ATTACHMENT_STORE_OP_STORE; + attachments[0].initialLayout = VK_IMAGE_LAYOUT_UNDEFINED; + attachments[0].finalLayout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL; + // Swapchain attachment + attachments[1].format = get_render_context().get_swapchain().get_format(); + attachments[1].samples = VK_SAMPLE_COUNT_1_BIT; + attachments[1].loadOp = VK_ATTACHMENT_LOAD_OP_CLEAR; + attachments[1].storeOp = VK_ATTACHMENT_STORE_OP_STORE; + attachments[1].stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE; + attachments[1].stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE; + attachments[1].initialLayout = VK_IMAGE_LAYOUT_UNDEFINED; + attachments[1].finalLayout = VK_IMAGE_LAYOUT_PRESENT_SRC_KHR; + + VkAttachmentReference depth_reference = {}; + depth_reference.attachment = 0; + depth_reference.layout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL; + + VkAttachmentReference swapchain_reference = {}; + swapchain_reference.attachment = 1; + swapchain_reference.layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; + + VkSubpassDescription subpass = {}; + subpass.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS; + subpass.colorAttachmentCount = 1; + subpass.pColorAttachments = &swapchain_reference; + subpass.pDepthStencilAttachment = &depth_reference; + subpass.inputAttachmentCount = 0; + subpass.pInputAttachments = nullptr; + subpass.preserveAttachmentCount = 0; + subpass.pPreserveAttachments = nullptr; + subpass.pResolveAttachments = nullptr; + + VkRenderPassCreateInfo render_pass_create_info = {}; + render_pass_create_info.sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO; + render_pass_create_info.attachmentCount = static_cast(attachments.size()); + render_pass_create_info.pAttachments = attachments.data(); + render_pass_create_info.subpassCount = 1; + render_pass_create_info.pSubpasses = &subpass; + + VK_CHECK(vkCreateRenderPass(get_device().get_handle(), &render_pass_create_info, nullptr, &render_pass_nerf)); +} + +void MobileNerfRayQuery::setup_framebuffers() +{ + // Delete existing frame buffers + if (framebuffers_nerf.size() > 0) + { + for (uint32_t i = 0; i < framebuffers_nerf.size(); i++) + { + if (framebuffers_nerf[i] != VK_NULL_HANDLE) + { + vkDestroyFramebuffer(get_device().get_handle(), framebuffers_nerf[i], nullptr); + } + } + } + + std::vector views; + views.resize(2); + views[0] = depth_stencil.view; + + // Depth/Stencil attachment is the same for all frame buffers + VkFramebufferCreateInfo framebuffer_create_info = {}; + framebuffer_create_info.sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO; + framebuffer_create_info.pNext = NULL; + framebuffer_create_info.renderPass = render_pass_nerf; + framebuffer_create_info.attachmentCount = views.size(); + framebuffer_create_info.pAttachments = views.data(); + framebuffer_create_info.width = get_render_context().get_surface_extent().width; + framebuffer_create_info.height = get_render_context().get_surface_extent().height; + framebuffer_create_info.layers = 1; + + framebuffers_nerf.resize(swapchain_buffers.size()); + + for (uint32_t i = 0; i < framebuffers_nerf.size(); i++) + { + views[1] = swapchain_buffers[i].view; + VK_CHECK(vkCreateFramebuffer(get_device().get_handle(), &framebuffer_create_info, nullptr, &framebuffers_nerf[i])); + } +} + +void MobileNerfRayQuery::load_scene(int model_index, int sub_model_index, int models_entry) +{ + Model &model = models[models_entry]; + + vkb::GLTFLoader loader{get_device()}; + int total_sub_sub_model = 1; + + for (int sub_model = 0; sub_model < total_sub_sub_model; sub_model++) + { + std::string inputfile(model_path[model_index] + "shape" + std::to_string(sub_model_index)); + + if (total_sub_sub_model > 1) + { + inputfile += ("_" + std::to_string(sub_model) + ".gltf"); + } + else + { + inputfile += (".gltf"); + } + + LOGI("Parsing nerf obj {}", inputfile); + + auto scene = loader.read_scene_from_file(inputfile); + + for (auto &&mesh : scene->get_components()) + { + for (auto &&sub_mesh : mesh->get_submeshes()) + { + auto pts_ = CopyBuffer{}(sub_mesh->vertex_buffers, "position"); + const auto texcoord_ = CopyBuffer{}(sub_mesh->vertex_buffers, "texcoord_0"); + const auto vertex_start_index = static_cast(model.vertices.size()); + + // Copy vertex data + { + model.vertices.resize(vertex_start_index + pts_.size()); + for (size_t i = 0; i < pts_.size(); ++i) + { + model.vertices[vertex_start_index + i].position = pts_[i]; + model.vertices[vertex_start_index + i].tex_coord = glm::vec2(texcoord_[i].x, 1.0f - texcoord_[i].y); + } + } + + // Copy index data + { + auto index_buffer_ = sub_mesh->index_buffer.get(); + if (index_buffer_) + { + assert(sub_mesh->index_type == VkIndexType::VK_INDEX_TYPE_UINT32); + const size_t sz = index_buffer_->get_size(); + const size_t nTriangles = sz / sizeof(uint32_t) / 3; + const auto triangle_start_index = static_cast(model.indices.size()); + model.indices.resize(triangle_start_index + nTriangles); + auto ptr = index_buffer_->get_data(); + assert(!!ptr); + std::vector tempBuffer(nTriangles * 3); + memcpy(&tempBuffer[0], ptr, sz); + for (size_t i = 0; i < nTriangles; ++i) + { + model.indices[triangle_start_index + i] = {vertex_start_index + static_cast(tempBuffer[3 * i]), + vertex_start_index + static_cast(tempBuffer[3 * i + 1]), + vertex_start_index + static_cast(tempBuffer[3 * i + 2])}; + } + } + } + } + } + } +} + +void MobileNerfRayQuery::create_texture(int model_index, int sub_model_index, int models_entry) +{ + std::string feature_0_path = model_path[model_index] + "shape" + std::to_string(sub_model_index) + ".pngfeat0.png"; + std::string feature_1_path = model_path[model_index] + "shape" + std::to_string(sub_model_index) + ".pngfeat1.png"; + + LOGI("Creating feature texture 0"); + create_texture_helper(feature_0_path, models[models_entry].texture_input_0); + LOGI("Done Creating feature texture 0"); + + LOGI("Creating feature texture 1"); + create_texture_helper(feature_1_path, models[models_entry].texture_input_1); + LOGI("Done Creating feature texture 0"); +} + +void MobileNerfRayQuery::create_texture_helper(std::string const &texturePath, Texture &texture_input) +{ + // Feature textures are in linear space instead of sRGB space + texture_input = load_texture(texturePath, vkb::sg::Image::Other); + vkDestroySampler(get_device().get_handle(), texture_input.sampler, nullptr); + + // Calculate valid filter + VkFilter filter = VK_FILTER_LINEAR; + vkb::make_filters_valid(get_device().get_gpu().get_handle(), texture_input.image->get_format(), &filter); + + VkSamplerCreateInfo samplerCreateInfo = {}; + samplerCreateInfo.sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO; + samplerCreateInfo.magFilter = filter; + samplerCreateInfo.minFilter = filter; + samplerCreateInfo.mipmapMode = VK_SAMPLER_MIPMAP_MODE_NEAREST; + samplerCreateInfo.addressModeU = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE; + samplerCreateInfo.addressModeV = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE; + samplerCreateInfo.addressModeW = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE; + samplerCreateInfo.minLod = 0.0f; + samplerCreateInfo.maxLod = 16.0f; + samplerCreateInfo.unnormalizedCoordinates = VK_FALSE; + VK_CHECK(vkCreateSampler(get_device().get_handle(), &samplerCreateInfo, 0, &texture_input.sampler)); +} + +void MobileNerfRayQuery::create_static_object_buffers(int models_entry) +{ + LOGI("Creating static object buffers"); + Model &model = models[models_entry]; + auto vertex_buffer_size = model.vertices.size() * sizeof(Vertex); + auto index_buffer_size = model.indices.size() * sizeof(model.indices[0]); + + // Note that in contrast to a typical pipeline, our vertex/index buffer requires the acceleration structure build flag in rayquery + // Create a staging buffer + const VkBufferUsageFlags buffer_usage_flags = VK_BUFFER_USAGE_ACCELERATION_STRUCTURE_BUILD_INPUT_READ_ONLY_BIT_KHR | VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT; + const VkBufferUsageFlags staging_flags = VK_BUFFER_USAGE_TRANSFER_SRC_BIT; + + // Create destination buffers + model.vertex_buffer = std::make_unique( + get_device(), + vertex_buffer_size, + buffer_usage_flags | VK_BUFFER_USAGE_TRANSFER_DST_BIT, + VMA_MEMORY_USAGE_GPU_ONLY); + model.vertex_buffer->set_debug_name(fmt::format("Model #{} vertices", models_entry)); + model.index_buffer = std::make_unique( + get_device(), + index_buffer_size, + buffer_usage_flags | VK_BUFFER_USAGE_TRANSFER_DST_BIT, + VMA_MEMORY_USAGE_GPU_ONLY); + model.index_buffer->set_debug_name(fmt::format("Model #{} indices", models_entry)); + + // Create staging buffers + std::unique_ptr staging_vertex_buffer = std::make_unique( + get_device(), + vertex_buffer_size, + staging_flags, + VMA_MEMORY_USAGE_CPU_TO_GPU); + staging_vertex_buffer->update(model.vertices); + + std::unique_ptr staging_index_buffer = std::make_unique( + get_device(), + index_buffer_size, + staging_flags, + VMA_MEMORY_USAGE_CPU_TO_GPU); + staging_index_buffer->update(model.indices); + + // Copy over the data for each of the models + with_vkb_command_buffer([&](vkb::CommandBuffer &cmd) { + cmd.copy_buffer(*staging_vertex_buffer, *model.vertex_buffer, staging_vertex_buffer->get_size()); + cmd.copy_buffer(*staging_index_buffer, *model.index_buffer, staging_index_buffer->get_size()); + }); + + LOGI("Done Creating static object buffers"); +} + +void MobileNerfRayQuery::create_uniforms() +{ + weights_buffers.resize(num_models); + + LOGI("Creating camera view uniform buffer"); + uniform_buffer = std::make_unique(get_device(), + sizeof(global_uniform), + VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT | VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, + VMA_MEMORY_USAGE_CPU_TO_GPU); + + for (int i = 0; i < num_models; i++) + { + LOGI("Creating mlp weights uniform buffer for model {}", i); + weights_buffers[i] = std::make_unique(get_device(), + sizeof(mlp_weights), + VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT | VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, + VMA_MEMORY_USAGE_CPU_TO_GPU); + } + + update_uniform_buffer(); + update_weights_buffers(); +} + +void MobileNerfRayQuery::update_uniform_buffer() +{ + assert(uniform_buffer); + + camera.set_perspective(fov, (float) width / (float) height, 0.01f, 200.0f); + const float tan_half_fov = tan(0.5 * fov / 180.0f * 3.141592653589793f); + + global_uniform.camera_position = camera.position; + global_uniform.camera_side = glm::vec3(camera.matrices.view[0][0], camera.matrices.view[1][0], camera.matrices.view[2][0]); + global_uniform.camera_up = glm::vec3(camera.matrices.view[0][1], camera.matrices.view[1][1], camera.matrices.view[2][1]); + global_uniform.camera_lookat = glm::vec3(camera.matrices.view[0][2], camera.matrices.view[1][2], camera.matrices.view[2][2]); + global_uniform.img_dim = glm::vec2(width, height); + global_uniform.tan_half_fov = tan_half_fov; + + uniform_buffer->update(&global_uniform, sizeof(GlobalUniform)); +} + +// No need to be updated for every frames +void MobileNerfRayQuery::update_weights_buffers() +{ + for (int i = 0; i < num_models; i++) + { + weights_buffers[i]->update(&(mlp_weight_vector[i].data[0]), sizeof(mlp_weights)); + } +} + +uint64_t MobileNerfRayQuery::get_buffer_device_address(VkBuffer buffer) +{ + VkBufferDeviceAddressInfoKHR buffer_device_address_info{}; + buffer_device_address_info.sType = VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_INFO; + buffer_device_address_info.buffer = buffer; + return vkGetBufferDeviceAddressKHR(get_device().get_handle(), &buffer_device_address_info); +} + +void MobileNerfRayQuery::create_top_level_acceleration_structure() +{ + std::vector acceleration_structure_instances; + auto add_instance = [&](Model &model, const VkTransformMatrixKHR &transform_matrix, uint32_t instance_index) { + VkAccelerationStructureInstanceKHR acceleration_structure_instance{}; + acceleration_structure_instance.transform = transform_matrix; + acceleration_structure_instance.instanceCustomIndex = instance_index; // this is the model index instead of the instance index in instancing rendering. + // need this to index correct weights and vertex & index buffer in shader. + acceleration_structure_instance.mask = 0xFF; + acceleration_structure_instance.instanceShaderBindingTableRecordOffset = 0; + acceleration_structure_instance.flags = VK_GEOMETRY_INSTANCE_TRIANGLE_FACING_CULL_DISABLE_BIT_KHR; + acceleration_structure_instance.accelerationStructureReference = model.bottom_level_acceleration_structure->get_device_address(); + acceleration_structure_instances.emplace_back(acceleration_structure_instance); + }; + + auto &ii = instancing_info; + glm::vec3 offset; + glm::vec3 corner_pos = -ii.interval * 0.5f * (glm::vec3(ii.dim - 1)); + for (int x = 0; x < ii.dim.x; ++x) + { + offset.x = corner_pos.x + ii.interval.x * x; + for (int y = 0; y < ii.dim.y; ++y) + { + offset.y = corner_pos.y + ii.interval.y * y; + for (int z = 0; z < ii.dim.z; ++z) + { + offset.z = corner_pos.z + ii.interval.z * z; + VkTransformMatrixKHR transform_matrix = { + 1.0f, + 0.0f, + 0.0f, + offset.x, + 0.0f, + 1.0f, + 0.0f, + offset.y, + 0.0f, + 0.0f, + 1.0f, + offset.z, + }; + for (size_t i = 0; i < models.size(); ++i) + { + add_instance(models[i], transform_matrix, i); + } + } + } + } + + LOGI("model num: {}", models.size()); + + const size_t instancesDataSize = sizeof(VkAccelerationStructureInstanceKHR) * acceleration_structure_instances.size(); + std::unique_ptr instances_buffer = std::make_unique(get_device(), + instancesDataSize, + VK_BUFFER_USAGE_ACCELERATION_STRUCTURE_BUILD_INPUT_READ_ONLY_BIT_KHR | VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT, + VMA_MEMORY_USAGE_CPU_TO_GPU); + instances_buffer->update(acceleration_structure_instances.data(), instancesDataSize); + + top_level_acceleration_structure = std::make_unique(get_device(), VK_ACCELERATION_STRUCTURE_TYPE_TOP_LEVEL_KHR); + top_level_acceleration_structure->add_instance_geometry(instances_buffer, acceleration_structure_instances.size()); + top_level_acceleration_structure->build(queue); +} + +void MobileNerfRayQuery::create_bottom_level_acceleration_structure(int model_entry) +{ + Model &model = models[model_entry]; + + // Create buffers for the bottom level geometry + // Note that the buffer usage flags for buffers consumed by the bottom level acceleration structure require special flags + const VkBufferUsageFlags buffer_usage_flags = VK_BUFFER_USAGE_ACCELERATION_STRUCTURE_BUILD_INPUT_READ_ONLY_BIT_KHR | VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT; + + // Set up a single transformation matrix that can be used to transform the whole geometry for a single bottom level acceleration structure + VkTransformMatrixKHR transform_matrix = { + 1.0f, 0.0f, 0.0f, 0.0f, + 0.0f, 1.0f, 0.0f, 0.0f, + 0.0f, 0.0f, 1.0f, 0.0f}; + if (combo_mode) + { + // Use hard-coded transformation under combo mode + glm::mat4x4 &M = combo_model_transform[model_entry]; + transform_matrix = { + M[0][0], M[1][0], M[2][0], M[3][0], + M[0][1], M[1][1], M[2][1], -M[3][1], + M[0][2], M[1][2], M[2][2], M[3][2]}; + } + std::unique_ptr transform_matrix_buffer = std::make_unique(get_device(), sizeof(transform_matrix), buffer_usage_flags, VMA_MEMORY_USAGE_CPU_TO_GPU); + transform_matrix_buffer->update(&transform_matrix, sizeof(transform_matrix)); + + if (model.bottom_level_acceleration_structure == nullptr) + { + model.bottom_level_acceleration_structure = std::make_unique( + get_device(), VK_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL_KHR); + model.bottom_level_acceleration_structure->add_triangle_geometry( + model.vertex_buffer, + model.index_buffer, + transform_matrix_buffer, + model.indices.size(), + model.vertices.size(), + sizeof(Vertex), + 0, VK_FORMAT_R32G32B32_SFLOAT, VK_GEOMETRY_OPAQUE_BIT_KHR, + get_buffer_device_address(model.vertex_buffer->get_handle()), + get_buffer_device_address(model.index_buffer->get_handle())); + } + model.bottom_level_acceleration_structure->build(queue, VK_BUILD_ACCELERATION_STRUCTURE_PREFER_FAST_TRACE_BIT_KHR, VK_BUILD_ACCELERATION_STRUCTURE_MODE_BUILD_KHR); +} + +void MobileNerfRayQuery::create_pipeline_layout() +{ + // Use multiple descriptor sets due to the limitation of using variable size resource array + // see https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VK_EXT_descriptor_indexing.html + + std::vector set_layout_bindings_common = { + vkb::initializers::descriptor_set_layout_binding(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, VK_SHADER_STAGE_FRAGMENT_BIT, 0), + vkb::initializers::descriptor_set_layout_binding(VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR, VK_SHADER_STAGE_FRAGMENT_BIT, 1), + }; + + // Add an array of weights sets into shader + if (combo_mode) + { + set_layout_bindings_common.push_back(vkb::initializers::descriptor_set_layout_binding( + VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, VK_SHADER_STAGE_FRAGMENT_BIT, 2, num_models)); + } + else + { + set_layout_bindings_common.push_back(vkb::initializers::descriptor_set_layout_binding( + VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, VK_SHADER_STAGE_FRAGMENT_BIT, 2)); + } + VkDescriptorSetLayoutCreateInfo descriptor_layout_bounded = vkb::initializers::descriptor_set_layout_create_info(set_layout_bindings_common.data(), static_cast(set_layout_bindings_common.size())); + if (combo_mode) + { + VkDescriptorBindingFlagsEXT flags[3] = {0, 0, VK_DESCRIPTOR_BINDING_VARIABLE_DESCRIPTOR_COUNT_BIT_EXT}; + VkDescriptorSetLayoutBindingFlagsCreateInfoEXT setLayoutBindingFlags{}; + setLayoutBindingFlags.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_BINDING_FLAGS_CREATE_INFO_EXT; + setLayoutBindingFlags.bindingCount = 3; + setLayoutBindingFlags.pBindingFlags = flags; + descriptor_layout_bounded.pNext = &setLayoutBindingFlags; + VK_CHECK(vkCreateDescriptorSetLayout(get_device().get_handle(), &descriptor_layout_bounded, nullptr, &descriptor_set_layout_common)); + } + else + { + VK_CHECK(vkCreateDescriptorSetLayout(get_device().get_handle(), &descriptor_layout_bounded, nullptr, &descriptor_set_layout_common)); + } + + auto create_unbounded_descriptor_set_layout = [&](VkDescriptorSetLayout &layout_handle, VkDescriptorSetLayoutBinding &binding) { + VkDescriptorSetLayoutBindingFlagsCreateInfoEXT setLayoutBindingFlags{}; + setLayoutBindingFlags.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_BINDING_FLAGS_CREATE_INFO_EXT; + setLayoutBindingFlags.bindingCount = 1; + VkDescriptorBindingFlagsEXT descriptorBindingFlags = VK_DESCRIPTOR_BINDING_VARIABLE_DESCRIPTOR_COUNT_BIT_EXT; + setLayoutBindingFlags.pBindingFlags = &descriptorBindingFlags; + VkDescriptorSetLayoutCreateInfo descriptor_layout = vkb::initializers::descriptor_set_layout_create_info(&binding, 1); + descriptor_layout.pNext = &setLayoutBindingFlags; + VK_CHECK(vkCreateDescriptorSetLayout(get_device().get_handle(), &descriptor_layout, nullptr, &layout_handle)); + }; + + VkDescriptorSetLayoutBinding set_layout_binding_vertices = vkb::initializers::descriptor_set_layout_binding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_FRAGMENT_BIT, 0, models.size()); + create_unbounded_descriptor_set_layout(descriptor_set_layout_vertices, set_layout_binding_vertices); + + VkDescriptorSetLayoutBinding set_layout_binding_indices = vkb::initializers::descriptor_set_layout_binding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_FRAGMENT_BIT, 0, models.size()); + create_unbounded_descriptor_set_layout(descriptor_set_layout_indices, set_layout_binding_indices); + + VkDescriptorSetLayoutBinding set_layout_binding_feature1 = vkb::initializers::descriptor_set_layout_binding(VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, VK_SHADER_STAGE_FRAGMENT_BIT, 0, models.size()); + create_unbounded_descriptor_set_layout(descriptor_set_layout_feature1, set_layout_binding_feature1); + + VkDescriptorSetLayoutBinding set_layout_binding_feature2 = vkb::initializers::descriptor_set_layout_binding(VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, VK_SHADER_STAGE_FRAGMENT_BIT, 0, models.size()); + create_unbounded_descriptor_set_layout(descriptor_set_layout_feature2, set_layout_binding_feature2); + + std::vector descriptor_set_layouts = { + descriptor_set_layout_common, + descriptor_set_layout_vertices, + descriptor_set_layout_indices, + descriptor_set_layout_feature1, + descriptor_set_layout_feature2}; + + VkPipelineLayoutCreateInfo pipeline_layout_create_info = + vkb::initializers::pipeline_layout_create_info( + descriptor_set_layouts.data(), + static_cast(descriptor_set_layouts.size())); + + VK_CHECK(vkCreatePipelineLayout(get_device().get_handle(), &pipeline_layout_create_info, nullptr, &pipeline_layout)); +} + +void MobileNerfRayQuery::create_descriptor_pool() +{ + std::vector pool_sizes = { + {VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, 1 * (uint32_t) framebuffers.size()}, + {VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR, 1 * (uint32_t) framebuffers.size()}, + {VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, 1 * (uint32_t) framebuffers.size() * (uint32_t) num_models}, + {VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 2 * (uint32_t) models.size() * (uint32_t) framebuffers.size()}, + {VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, 2 * (uint32_t) models.size() * (uint32_t) framebuffers.size()}}; + VkDescriptorPoolCreateInfo descriptor_pool_create_info = vkb::initializers::descriptor_pool_create_info(pool_sizes, 5 * framebuffers.size()); + VK_CHECK(vkCreateDescriptorPool(get_device().get_handle(), &descriptor_pool_create_info, nullptr, &descriptor_pool)); +} + +void MobileNerfRayQuery::create_descriptor_sets() +{ + int numDescriptorPerModel = framebuffers.size(); + descriptor_set_common.resize(numDescriptorPerModel); + descriptor_set_vertices.resize(numDescriptorPerModel); + descriptor_set_indices.resize(numDescriptorPerModel); + descriptor_set_feature1.resize(numDescriptorPerModel); + descriptor_set_feature2.resize(numDescriptorPerModel); + + auto allocate_unbounded_descriptor_set = [&](VkDescriptorSetLayout &descriptor_set_layout, VkDescriptorSet &descriptor_set) { + uint32_t counts[1]; + counts[0] = models.size(); + + VkDescriptorSetVariableDescriptorCountAllocateInfo set_counts = {}; + set_counts.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_VARIABLE_DESCRIPTOR_COUNT_ALLOCATE_INFO; + set_counts.descriptorSetCount = 1; + set_counts.pDescriptorCounts = counts; + + VkDescriptorSetAllocateInfo descriptor_set_allocate_info = + vkb::initializers::descriptor_set_allocate_info(descriptor_pool, &descriptor_set_layout, 1); + descriptor_set_allocate_info.pNext = &set_counts; + VK_CHECK(vkAllocateDescriptorSets(get_device().get_handle(), &descriptor_set_allocate_info, &descriptor_set)); + }; + + for (int i = 0; i < numDescriptorPerModel; i++) + { + if (combo_mode) + { + allocate_unbounded_descriptor_set(descriptor_set_layout_common, descriptor_set_common[i]); + } + else + { + VkDescriptorSetAllocateInfo descriptor_set_allocate_info_common = + vkb::initializers::descriptor_set_allocate_info(descriptor_pool, &descriptor_set_layout_common, 1); + VK_CHECK(vkAllocateDescriptorSets(get_device().get_handle(), &descriptor_set_allocate_info_common, &descriptor_set_common[i])); + } + + allocate_unbounded_descriptor_set(descriptor_set_layout_vertices, descriptor_set_vertices[i]); + allocate_unbounded_descriptor_set(descriptor_set_layout_indices, descriptor_set_indices[i]); + allocate_unbounded_descriptor_set(descriptor_set_layout_feature1, descriptor_set_feature1[i]); + allocate_unbounded_descriptor_set(descriptor_set_layout_feature2, descriptor_set_feature2[i]); + + uint32_t num_total_submodels = static_cast(models.size()); + + VkDescriptorBufferInfo uniform_buffer_descriptor = create_descriptor(*uniform_buffer); + + std::vector vertex_buffer_descriptors; + vertex_buffer_descriptors.reserve(num_total_submodels); + std::vector index_buffer_descriptors; + index_buffer_descriptors.reserve(num_total_submodels); + std::vector texture_input_1_descriptors; + texture_input_1_descriptors.reserve(num_total_submodels); + std::vector texture_input_2_descriptors; + texture_input_2_descriptors.reserve(num_total_submodels); + + for (Model &model : models) + { + vertex_buffer_descriptors.emplace_back(create_descriptor(*model.vertex_buffer)); + index_buffer_descriptors.emplace_back(create_descriptor(*model.index_buffer)); + + VkDescriptorImageInfo texture_input_1_descriptor{}; + texture_input_1_descriptor.sampler = model.texture_input_0.sampler; + texture_input_1_descriptor.imageView = model.texture_input_0.image->get_vk_image_view().get_handle(); + texture_input_1_descriptor.imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; + texture_input_1_descriptors.emplace_back(texture_input_1_descriptor); + + VkDescriptorImageInfo texture_input_2_descriptor{}; + texture_input_2_descriptor.sampler = model.texture_input_1.sampler; + texture_input_2_descriptor.imageView = model.texture_input_1.image->get_vk_image_view().get_handle(); + texture_input_2_descriptor.imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; + texture_input_2_descriptors.emplace_back(texture_input_2_descriptor); + } + + VkWriteDescriptorSet uniform_buffer_write = vkb::initializers::write_descriptor_set(descriptor_set_common[i], + VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, 0, &uniform_buffer_descriptor); + VkWriteDescriptorSet vertex_buffer_write = vkb::initializers::write_descriptor_set(descriptor_set_vertices[i], + VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 0, vertex_buffer_descriptors.data(), vertex_buffer_descriptors.size()); + VkWriteDescriptorSet index_buffer_write = vkb::initializers::write_descriptor_set(descriptor_set_indices[i], + VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 0, index_buffer_descriptors.data(), index_buffer_descriptors.size()); + VkWriteDescriptorSet texture_input_write_0 = vkb::initializers::write_descriptor_set(descriptor_set_feature1[i], + VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, 0, texture_input_1_descriptors.data(), texture_input_1_descriptors.size()); + VkWriteDescriptorSet texture_input_write_1 = vkb::initializers::write_descriptor_set(descriptor_set_feature2[i], + VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, 0, texture_input_2_descriptors.data(), texture_input_2_descriptors.size()); + + // Set up the descriptor for binding our top level acceleration structure to the ray tracing shaders + VkWriteDescriptorSetAccelerationStructureKHR descriptor_acceleration_structure_info{}; + descriptor_acceleration_structure_info.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET_ACCELERATION_STRUCTURE_KHR; + descriptor_acceleration_structure_info.accelerationStructureCount = 1; + auto rhs = top_level_acceleration_structure->get_handle(); + descriptor_acceleration_structure_info.pAccelerationStructures = &rhs; + + VkWriteDescriptorSet acceleration_structure_write{}; + acceleration_structure_write.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; + acceleration_structure_write.dstSet = descriptor_set_common[i]; + acceleration_structure_write.dstBinding = 1; + acceleration_structure_write.descriptorCount = 1; + acceleration_structure_write.descriptorType = VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR; + + // The acceleration structure descriptor has to be chained via pNext + acceleration_structure_write.pNext = &descriptor_acceleration_structure_info; + + std::vector write_descriptor_sets; + VkWriteDescriptorSet weights_buffer_write; + std::vector weights_buffer_descriptors; + VkDescriptorBufferInfo weights_buffer_descriptor; + + if (combo_mode) + { + weights_buffer_descriptors.reserve(mlp_weight_vector.size()); + for (auto &weight_buffer : weights_buffers) + { + weights_buffer_descriptors.emplace_back(create_descriptor(*weight_buffer)); + } + weights_buffer_write = vkb::initializers::write_descriptor_set(descriptor_set_common[i], + VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, 2, weights_buffer_descriptors.data(), weights_buffer_descriptors.size()); + } + else + { + weights_buffer_descriptor = create_descriptor(*weights_buffers[0]); + weights_buffer_write = vkb::initializers::write_descriptor_set(descriptor_set_common[i], + VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, 2, &weights_buffer_descriptor); + } + write_descriptor_sets = std::vector{ + uniform_buffer_write, + acceleration_structure_write, + weights_buffer_write, + vertex_buffer_write, + index_buffer_write, + texture_input_write_0, + texture_input_write_1}; + + vkUpdateDescriptorSets(get_device().get_handle(), static_cast(write_descriptor_sets.size()), write_descriptor_sets.data(), 0, VK_NULL_HANDLE); + } +} + +void MobileNerfRayQuery::prepare_pipelines() +{ + VkPipelineInputAssemblyStateCreateInfo input_assembly_state = vkb::initializers::pipeline_input_assembly_state_create_info(VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST, 0, VK_FALSE); + + VkPipelineRasterizationStateCreateInfo rasterization_state = vkb::initializers::pipeline_rasterization_state_create_info(VK_POLYGON_MODE_FILL, /*VK_CULL_MODE_BACK_BIT*/ VK_CULL_MODE_NONE, VK_FRONT_FACE_COUNTER_CLOCKWISE /*VK_FRONT_FACE_CLOCKWISE*/, 0); + + std::vector blend_attachment_states; + blend_attachment_states.push_back(vkb::initializers::pipeline_color_blend_attachment_state(0xf, VK_FALSE)); + + VkPipelineColorBlendStateCreateInfo color_blend_state = vkb::initializers::pipeline_color_blend_state_create_info(blend_attachment_states.size(), blend_attachment_states.data()); + + VkPipelineDepthStencilStateCreateInfo depth_stencil_state = vkb::initializers::pipeline_depth_stencil_state_create_info(VK_TRUE, VK_TRUE, VK_COMPARE_OP_LESS); + depth_stencil_state.depthBoundsTestEnable = VK_FALSE; + depth_stencil_state.minDepthBounds = 0.f; + depth_stencil_state.maxDepthBounds = 1.f; + + VkPipelineViewportStateCreateInfo viewport_state = vkb::initializers::pipeline_viewport_state_create_info(1, 1, 0); + + std::vector dynamic_state_enables = { + VK_DYNAMIC_STATE_VIEWPORT, + VK_DYNAMIC_STATE_SCISSOR}; + VkPipelineDynamicStateCreateInfo dynamic_state = + vkb::initializers::pipeline_dynamic_state_create_info( + dynamic_state_enables.data(), + static_cast(dynamic_state_enables.size()), + 0); + + VkPipelineMultisampleStateCreateInfo multisample_state = vkb::initializers::pipeline_multisample_state_create_info(VK_SAMPLE_COUNT_1_BIT, 0); + + // No need for Vertex bindings and attributes + VkPipelineVertexInputStateCreateInfo vertex_input_state{}; + vertex_input_state.sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO; + + VkGraphicsPipelineCreateInfo pipeline_create_info = vkb::initializers::pipeline_create_info(pipeline_layout, render_pass_nerf, 0); + pipeline_create_info.pVertexInputState = &vertex_input_state; + pipeline_create_info.pInputAssemblyState = &input_assembly_state; + pipeline_create_info.pRasterizationState = &rasterization_state; + pipeline_create_info.pColorBlendState = &color_blend_state; + pipeline_create_info.pMultisampleState = &multisample_state; + pipeline_create_info.pViewportState = &viewport_state; + pipeline_create_info.pDepthStencilState = &depth_stencil_state; + pipeline_create_info.pDynamicState = &dynamic_state; + pipeline_create_info.subpass = 0; + pipeline_create_info.stageCount = static_cast(shader_stages.size()); + pipeline_create_info.pStages = shader_stages.data(); + + // Only need one pipeline in rayquery + VK_CHECK(vkCreateGraphicsPipelines(get_device().get_handle(), pipeline_cache, 1, &pipeline_create_info, nullptr, &pipeline)); +} + +void MobileNerfRayQuery::build_command_buffers() +{ + if (use_native_screen_size) + { + view_port_height = height; + view_port_width = width; + } + + // In case the screen is resized, need to update the storage image size and descriptor set + // Note that the texture_rendered image has already been recreated at this point + if (!prepared) + { + setup_framebuffers(); + } + + VkCommandBufferBeginInfo command_buffer_begin_info = vkb::initializers::command_buffer_begin_info(); + std::vector clear_values; + + clear_values.resize(2); + clear_values[0].depthStencil = {1.0f, 0}; + clear_values[1].color = {{1.0f, 1.0f, 1.0f, 1.0f}}; + + VkRenderPassBeginInfo render_pass_begin_info = vkb::initializers::render_pass_begin_info(); + render_pass_begin_info.renderPass = render_pass_nerf; + render_pass_begin_info.renderArea.offset.x = 0; + render_pass_begin_info.renderArea.offset.y = 0; + render_pass_begin_info.renderArea.extent.width = width; + render_pass_begin_info.renderArea.extent.height = height; + render_pass_begin_info.clearValueCount = clear_values.size(); + render_pass_begin_info.pClearValues = clear_values.data(); + + VkClearValue clear_values_UI[2]; + clear_values_UI[0].color = default_clear_color; + clear_values_UI[1].depthStencil = {1.0f, 0}; + + VkRenderPassBeginInfo render_pass_begin_info_UI = vkb::initializers::render_pass_begin_info(); + render_pass_begin_info_UI.renderPass = render_pass; + render_pass_begin_info_UI.renderArea.offset.x = 0; + render_pass_begin_info_UI.renderArea.offset.y = 0; + render_pass_begin_info_UI.renderArea.extent.width = width; + render_pass_begin_info_UI.renderArea.extent.height = height; + render_pass_begin_info_UI.clearValueCount = 2; + render_pass_begin_info_UI.pClearValues = clear_values_UI; + + VkImageSubresourceRange subresource_range = {VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1}; + + for (size_t i = 0; i < draw_cmd_buffers.size(); ++i) + { + render_pass_begin_info.framebuffer = framebuffers_nerf[i]; + + VK_CHECK(vkBeginCommandBuffer(draw_cmd_buffers[i], &command_buffer_begin_info)); + + vkCmdBeginRenderPass(draw_cmd_buffers[i], &render_pass_begin_info, VK_SUBPASS_CONTENTS_INLINE); + + VkViewport viewport = vkb::initializers::viewport((float) view_port_width, (float) view_port_height, 0.0f, 1.0f); + vkCmdSetViewport(draw_cmd_buffers[i], 0, 1, &viewport); + + VkRect2D scissor = vkb::initializers::rect2D(static_cast(width), static_cast(height), 0, 0); + vkCmdSetScissor(draw_cmd_buffers[i], 0, 1, &scissor); + + // Use 5 descriptor sets due to the limitation of using variable size resource array + // see https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VK_EXT_descriptor_indexing.html + vkCmdBindPipeline(draw_cmd_buffers[i], VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline); + std::vector descriptor_sets_first_pass = { + descriptor_set_common[i], + descriptor_set_vertices[i], + descriptor_set_indices[i], + descriptor_set_feature1[i], + descriptor_set_feature2[i], + }; + vkCmdBindDescriptorSets(draw_cmd_buffers[i], VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline_layout, + 0, static_cast(descriptor_sets_first_pass.size()), descriptor_sets_first_pass.data(), 0, nullptr); + VkDeviceSize offsets[1] = {0}; + vkCmdDraw(draw_cmd_buffers[i], 3, 1, 0, 0); + + vkCmdEndRenderPass(draw_cmd_buffers[i]); + + // Render UI + render_pass_begin_info_UI.framebuffer = framebuffers[i]; + + vkCmdBeginRenderPass(draw_cmd_buffers[i], &render_pass_begin_info_UI, VK_SUBPASS_CONTENTS_INLINE); + draw_ui(draw_cmd_buffers[i]); + vkCmdEndRenderPass(draw_cmd_buffers[i]); + + VK_CHECK(vkEndCommandBuffer(draw_cmd_buffers[i])); + } +} + +void MobileNerfRayQuery::draw() +{ + ApiVulkanSample::prepare_frame(); + + // Command buffer to be submitted to the queue + submit_info.commandBufferCount = 1; + submit_info.pCommandBuffers = &draw_cmd_buffers[current_buffer]; + + // Submit to queue + VK_CHECK(vkQueueSubmit(queue, 1, &submit_info, VK_NULL_HANDLE)); + + ApiVulkanSample::submit_frame(); +} + +std::unique_ptr> create_mobile_nerf_rayquery() +{ + return std::make_unique(); +} diff --git a/samples/general/mobile_nerf_rayquery/mobile_nerf_rayquery.h b/samples/general/mobile_nerf_rayquery/mobile_nerf_rayquery.h new file mode 100644 index 000000000..3fba926b7 --- /dev/null +++ b/samples/general/mobile_nerf_rayquery/mobile_nerf_rayquery.h @@ -0,0 +1,204 @@ +/* Copyright (c) 2023-2024, Qualcomm Innovation Center, Inc. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 the "License"; + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include "api_vulkan_sample.h" +#include "glsl_compiler.h" +#include + +#include + +using json = nlohmann::json; + +namespace vkb +{ +namespace sg +{ +class Scene; +class Node; +class Mesh; +class SubMesh; +class Camera; +} // namespace sg +} // namespace vkb + +class MobileNerfRayQuery : public ApiVulkanSample +{ + public: + MobileNerfRayQuery(); + ~MobileNerfRayQuery() override; + void request_gpu_features(vkb::PhysicalDevice &gpu) override; + void render(float delta_time) override; + bool prepare(const vkb::ApplicationOptions &options) override; + + private: + struct GlobalUniform + { + alignas(16) glm::vec3 camera_position; + alignas(16) glm::vec3 camera_side; + alignas(16) glm::vec3 camera_up; + alignas(16) glm::vec3 camera_lookat; + alignas(8) glm::vec2 img_dim; + alignas(4) float tan_half_fov; + } global_uniform; + +#define WEIGHTS_0_COUNT (176) +#define WEIGHTS_1_COUNT (256) +// The third layer weights' size is changed from 48 to 64 to make sure a 16 bytes alignement +// #define WEIGHTS_2_COUNT (48) +#define WEIGHTS_2_COUNT (64) +#define BIAS_0_COUNT (16) +#define BIAS_1_COUNT (16) +// The third layer bias' size is changed from 3 to 4 to make sure a 16 bytes alignement +#define BIAS_2_COUNT (4) + + // some typedef for each model + struct mlp_weights + { + float data[WEIGHTS_0_COUNT + WEIGHTS_1_COUNT + WEIGHTS_2_COUNT + + BIAS_0_COUNT + BIAS_1_COUNT + BIAS_2_COUNT]; // Array of floats + }; + + struct Vertex + { + glm::vec3 position; + glm::vec2 tex_coord; + }; + + struct InstancingInfo + { + glm::ivec3 dim; + glm::vec3 interval; + }; + + struct FrameBufferAttachment + { + VkSampler sampler; + VkDeviceMemory memory; + VkImage image = VK_NULL_HANDLE; + VkImageView view; + VkFormat format; + uint32_t width; + uint32_t height; + }; + + struct Model + { + int model_index; + int sub_model_num; + + std::vector vertices; + std::vector> indices; + + // Feature maps + Texture texture_input_0, texture_input_1; + + // Each model has its vertex buffer and index buffer. In ray query, they are storage buffers. + std::unique_ptr vertex_buffer{nullptr}; + std::unique_ptr index_buffer{nullptr}; + + // Each model has its BLAS + std::unique_ptr bottom_level_acceleration_structure{nullptr}; + } model; + + std::vector models; + + // MLPs for each model + std::vector mlp_weight_vector; + std::vector> weights_buffers; + + // Global uniform buffer + std::unique_ptr uniform_buffer; + + std::vector framebuffers_nerf; + VkRenderPass render_pass_nerf{VK_NULL_HANDLE}; + + std::array shader_stages; + + VkPipeline pipeline{VK_NULL_HANDLE}; + VkPipelineLayout pipeline_layout{VK_NULL_HANDLE}; + + std::vector descriptor_set_common{VK_NULL_HANDLE}; + std::vector descriptor_set_vertices{VK_NULL_HANDLE}; + std::vector descriptor_set_indices{VK_NULL_HANDLE}; + std::vector descriptor_set_feature1{VK_NULL_HANDLE}; + std::vector descriptor_set_feature2{VK_NULL_HANDLE}; + + VkDescriptorSetLayout descriptor_set_layout_common{VK_NULL_HANDLE}; + VkDescriptorSetLayout descriptor_set_layout_vertices{VK_NULL_HANDLE}; + VkDescriptorSetLayout descriptor_set_layout_indices{VK_NULL_HANDLE}; + VkDescriptorSetLayout descriptor_set_layout_feature1{VK_NULL_HANDLE}; + VkDescriptorSetLayout descriptor_set_layout_feature2{VK_NULL_HANDLE}; + + // Ray tracing structures + std::unique_ptr top_level_acceleration_structure{nullptr}; + + // For loading mobile nerf assets map + json asset_map; + int num_models; + bool combo_mode = false; + bool do_rotation = false; + std::vector model_path; + glm::vec3 camera_pos = glm::vec3(-2.2f, 2.2f, 2.2f); + + // Currently combo mode translation are hard-coded + glm::mat4x4 combo_model_transform[4] = { + glm::translate(glm::vec3(0.5, 0.75, 0)), glm::translate(glm::vec3(0.5, 0.25, 0)), + glm::translate(glm::vec3(0, -0.25, 0.5)), glm::translate(glm::vec3(0, -0.75, -0.5))}; + + // For instancing + InstancingInfo instancing_info; + + // Viewport Setting + float fov = 60.0f; + uint32_t view_port_width = width; + uint32_t view_port_height = height; + bool use_native_screen_size = false; + + // Feature map format + VkFormat feature_map_format = VK_FORMAT_R16G16B16A16_SFLOAT; + + void read_json_map(); + void load_shaders(); + void create_uniforms(); + void create_static_object_buffers(int models_entry); + void load_scene(int model_index, int sub_model_index, int models_entry); + void initialize_mlp_uniform_buffers(int model_index); + void update_uniform_buffer(); + void update_weights_buffers(); + + void setup_framebuffers(); + void update_render_pass(); + + void create_pipeline_layout(); + void create_descriptor_pool(); + void create_descriptor_sets(); + void prepare_pipelines(); + + void build_command_buffers() override; + void draw(); + + uint64_t get_buffer_device_address(VkBuffer buffer); + void create_top_level_acceleration_structure(); + void create_bottom_level_acceleration_structure(int model_entry); + + void create_texture(int model_index, int sub_model_index, int models_entry); + void create_texture_helper(std::string const &texturePath, Texture &texture_input); +}; + +std::unique_ptr> create_mobile_nerf_rayquery(); diff --git a/shaders/mobile_nerf_rayquery/quad.vert b/shaders/mobile_nerf_rayquery/quad.vert new file mode 100644 index 000000000..3221e88d0 --- /dev/null +++ b/shaders/mobile_nerf_rayquery/quad.vert @@ -0,0 +1,38 @@ +/* Copyright (c) 2024, Qualcomm Innovation Center, Inc. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 the "License"; + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * ------------------------------------------------------------------------ + * + * THIS IS A MODIFIED VERSION OF THE ORIGINAL FILE + * + * The original file, along with the original Apache-2.0 LICENSE can be found at: + * https://github.com/google-research/jax3d/tree/main/jax3d/projects/mobilenerf + * + * Modification details: Shader code was updated to work on Vulkan (originally + * built for WebGL) + * Contributor: (Qualcomm) Rodrigo Holztrattner - quic_rholztra@quicinc.com + */ +#version 460 + +out gl_PerVertex { + vec4 gl_Position; +}; + +void main() +{ + vec2 outUV = vec2((gl_VertexIndex << 1) & 2, gl_VertexIndex & 2); + gl_Position = vec4(outUV * 2.0f - 1.0f, 0.0f, 1.0f); +} diff --git a/shaders/mobile_nerf_rayquery/rayquery_morpheus.frag b/shaders/mobile_nerf_rayquery/rayquery_morpheus.frag new file mode 100644 index 000000000..8896370b9 --- /dev/null +++ b/shaders/mobile_nerf_rayquery/rayquery_morpheus.frag @@ -0,0 +1,351 @@ +/* Copyright (c) 2024, Qualcomm Innovation Center, Inc. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 the "License"; + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * ------------------------------------------------------------------------ + * + * THIS IS A MODIFIED VERSION OF THE ORIGINAL FILE + * + * The original file, along with the original Apache-2.0 LICENSE can be found at: + * https://github.com/google-research/jax3d/tree/main/jax3d/projects/mobilenerf + * + * Modification details: Shader code was updated to work on Vulkan (originally + * built for WebGL) + * Contributor: (Qualcomm) Rodrigo Holztrattner - quic_rholztra@quicinc.com + */ +#version 460 +#extension GL_EXT_scalar_block_layout : enable +#extension GL_EXT_ray_query : enable +#extension GL_EXT_nonuniform_qualifier : enable + +// Uncomment this to use opaque mode +// Should be faster, but would get VK_ERROR_DEVICE_LOST on some AMD devices +// #define USE_OPAQUE + +struct Vertex +{ + vec3 position; + vec2 texCoord; +}; + +struct GlobalUniform +{ + vec3 camera_position; + vec3 camera_side; + vec3 camera_up; + vec3 camera_lookat; + vec2 img_dim; + float tan_half_fov; +}; + +layout(location = 0) out vec4 o_color; + +layout(set = 0, binding = 0) uniform AppData +{ + GlobalUniform params; +}; + +layout(set = 0, binding = 1) uniform accelerationStructureEXT topLevelAS; + +// Try defining constants in the shader itself +precision highp float; + +#define WEIGHTS_0_COUNT (176) +#define WEIGHTS_1_COUNT (256) +// The third layer's size is changed from 48 to 64 to make sure a 16 bytes alignement +//#define WEIGHTS_2_COUNT (48) +#define WEIGHTS_2_COUNT (64) +#define BIAS_0_COUNT (16) +#define BIAS_1_COUNT (16) +// The third layer bias' size is changed from 3 to 4 to make sure a 16 bytes alignement +#define BIAS_2_COUNT (4) +layout(set = 0, binding = 2) uniform mlp_weights +{ + vec4 data[(WEIGHTS_0_COUNT + WEIGHTS_1_COUNT + WEIGHTS_2_COUNT + + BIAS_0_COUNT + BIAS_1_COUNT + BIAS_2_COUNT)/4]; // Array of floats +} weights; + +layout(set = 1, binding = 0, scalar) buffer Vertices +{ + Vertex vertices[]; +} vertices_set[]; + +layout(set = 2, binding = 0, scalar) buffer Indices +{ + uint indices[]; +} indices_set[]; + +layout(set = 3, binding = 0) uniform sampler2D textureInput_0[]; +layout(set = 4, binding = 0) uniform sampler2D textureInput_1[]; + +vec3 evaluateNetwork( vec4 f0, vec4 f1, vec4 viewdir) +{ + + vec3 res; + + int bias_0_ind = WEIGHTS_0_COUNT + WEIGHTS_1_COUNT + WEIGHTS_2_COUNT; + vec4 intermediate_one[4] = vec4[]( + weights.data[bias_0_ind/4], + weights.data[bias_0_ind/4 + 1], + weights.data[bias_0_ind/4 + 2], + weights.data[bias_0_ind/4 + 3] + ); + + +#define APPLY_WEIGHTS_0(multiplier, weightFirstInd) \ + intermediate_one[ 0] += (multiplier) * weights.data[ weightFirstInd/4]; \ + intermediate_one[ 1] += (multiplier) * weights.data[ weightFirstInd/4 + 1]; \ + intermediate_one[ 2] += (multiplier) * weights.data[ weightFirstInd/4 + 2]; \ + intermediate_one[ 3] += (multiplier) * weights.data[ weightFirstInd/4 + 3]; + + APPLY_WEIGHTS_0( f0.r, 0) + APPLY_WEIGHTS_0( f0.g, 16) + APPLY_WEIGHTS_0( f0.b, 32) + APPLY_WEIGHTS_0( f0.a, 48) + APPLY_WEIGHTS_0( f1.r, 64) + APPLY_WEIGHTS_0( f1.g, 80) + APPLY_WEIGHTS_0( f1.b, 96) + APPLY_WEIGHTS_0( f1.a, 112) + // For models form original mobile nerf, use the original code + APPLY_WEIGHTS_0( (viewdir.r + 1.0 )/2, 128) + APPLY_WEIGHTS_0( (-viewdir.b + 1.0 )/2, 144) + APPLY_WEIGHTS_0( (viewdir.g + 1.0 )/2, 160) + + int bias_1_ind = WEIGHTS_0_COUNT + WEIGHTS_1_COUNT + WEIGHTS_2_COUNT + + BIAS_0_COUNT; + vec4 intermediate_two[4] = vec4[]( + weights.data[bias_1_ind/4], + weights.data[bias_1_ind/4 + 1], + weights.data[bias_1_ind/4 + 2], + weights.data[bias_1_ind/4 + 3] + ); + + +#define APPLY_WEIGHTS_1(intermediate, oneInd) \ + if(intermediate > 0.0f){ \ + intermediate_two[ 0] += intermediate * weights.data[ WEIGHTS_0_COUNT/4 + oneInd * 4 + 0]; \ + intermediate_two[ 1] += intermediate * weights.data[ WEIGHTS_0_COUNT/4 + oneInd * 4 + 1]; \ + intermediate_two[ 2] += intermediate * weights.data[ WEIGHTS_0_COUNT/4 + oneInd * 4 + 2]; \ + intermediate_two[ 3] += intermediate * weights.data[ WEIGHTS_0_COUNT/4 + oneInd * 4 + 3]; \ + } + + APPLY_WEIGHTS_1( intermediate_one[0].r, 0) + APPLY_WEIGHTS_1( intermediate_one[0].g, 1) + APPLY_WEIGHTS_1( intermediate_one[0].b, 2) + APPLY_WEIGHTS_1( intermediate_one[0].a, 3) + APPLY_WEIGHTS_1( intermediate_one[1].r, 4) + APPLY_WEIGHTS_1( intermediate_one[1].g, 5) + APPLY_WEIGHTS_1( intermediate_one[1].b, 6) + APPLY_WEIGHTS_1( intermediate_one[1].a, 7) + APPLY_WEIGHTS_1( intermediate_one[2].r, 8) + APPLY_WEIGHTS_1( intermediate_one[2].g, 9) + APPLY_WEIGHTS_1( intermediate_one[2].b, 10) + APPLY_WEIGHTS_1( intermediate_one[2].a, 11) + APPLY_WEIGHTS_1( intermediate_one[3].r, 12) + APPLY_WEIGHTS_1( intermediate_one[3].g, 13) + APPLY_WEIGHTS_1( intermediate_one[3].b, 14) + APPLY_WEIGHTS_1( intermediate_one[3].a, 15) + + int bias_2_ind = WEIGHTS_0_COUNT + WEIGHTS_1_COUNT + WEIGHTS_2_COUNT + + BIAS_0_COUNT + BIAS_1_COUNT; + vec4 result = weights.data[bias_2_ind/4]; + +#define APPLY_WEIGHTS_2(intermediate, oneInd) \ + if(intermediate > 0.0f){ \ + result += intermediate * weights.data[ WEIGHTS_0_COUNT/4 + WEIGHTS_1_COUNT/4 + oneInd]; \ + } + + APPLY_WEIGHTS_2(intermediate_two[0].r, 0) + APPLY_WEIGHTS_2(intermediate_two[0].g, 1) + APPLY_WEIGHTS_2(intermediate_two[0].b, 2) + APPLY_WEIGHTS_2(intermediate_two[0].a, 3) + APPLY_WEIGHTS_2(intermediate_two[1].r, 4) + APPLY_WEIGHTS_2(intermediate_two[1].g, 5) + APPLY_WEIGHTS_2(intermediate_two[1].b, 6) + APPLY_WEIGHTS_2(intermediate_two[1].a, 7) + APPLY_WEIGHTS_2(intermediate_two[2].r, 8) + APPLY_WEIGHTS_2(intermediate_two[2].g, 9) + APPLY_WEIGHTS_2(intermediate_two[2].b,10) + APPLY_WEIGHTS_2(intermediate_two[2].a,11) + APPLY_WEIGHTS_2(intermediate_two[3].r,12) + APPLY_WEIGHTS_2(intermediate_two[3].g,13) + APPLY_WEIGHTS_2(intermediate_two[3].b,14) + APPLY_WEIGHTS_2(intermediate_two[3].a,15) + + result = 1.0 / (1.0 + exp(-result)); + return vec3(result * viewdir.a+(1.0-viewdir.a)); +} + +vec3 CalcRayDirComp(GlobalUniform params) { + // On [0.0, 1.0] + vec2 img_sample = vec2(gl_FragCoord.xy / params.img_dim); + + // Transform into [-0.5, 0.5] + vec2 hom_sample = img_sample - vec2(0.5f, 0.5f); + hom_sample.y *= -1.0f; // Vertical flip so that origin is top-left + + // Transform into [-dim.x/dim.y/2, dim.x/dim.y/2] x [-0.5, 0.5] + vec2 c_sample = hom_sample * params.img_dim / params.img_dim.y; + + // Calculate direction to image plane + const vec3 rayDir = vec3(params.camera_lookat * 0.5 / params.tan_half_fov + c_sample.x * params.camera_side + c_sample.y * params.camera_up); + + return normalize(rayDir); +} + +////////////////////////////////////////////////////////////// +// MLP was trained with gamma-corrected values // +// convert to linear so sRGB conversion isn't applied twice // +////////////////////////////////////////////////////////////// + +float Convert_sRGB_ToLinear(float value) +{ + return value <= 0.04045 + ? value / 12.92 + : pow((value + 0.055) / 1.055, 2.4); +} + +vec3 Convert_sRGB_ToLinear(vec3 value) +{ + return vec3(Convert_sRGB_ToLinear(value.x), Convert_sRGB_ToLinear(value.y), Convert_sRGB_ToLinear(value.z)); +} + +////////////////////////////////////////////////////////////// +////////////////////////////////////////////////////////////// +////////////////////////////////////////////////////////////// + +#ifndef USE_OPAQUE +void main(void) +{ + vec3 rayDirection = CalcRayDirComp(params); + + // initialize a ray query object + rayQueryEXT rayQuery; + const uint rayFlags = gl_RayFlagsNoOpaqueEXT; // Enable this so that we can get back fragment after discarding the transparent fragment + const float tmin = 0.01f; + const float tmax = 256.0f; + vec4 pixel_0 = vec4(0.0f); + vec2 commited_flipped = vec2(0.0f); + int commited_instanceID = 0; + rayQueryInitializeEXT(rayQuery, // Ray query + topLevelAS, // Top-level acceleration structure + rayFlags, // Ray flags, treat all geometry as non-opaque + 0xFF, // 8-bit instance mask, trace against all instances + params.camera_position, // Ray origin + tmin, // Minimum t-value + rayDirection, // Ray direction + tmax); // Maximum t-value + + while(rayQueryProceedEXT(rayQuery)) { + if (rayQueryGetIntersectionTypeEXT(rayQuery, false) == gl_RayQueryCandidateIntersectionTriangleEXT) + { + const int instanceID = rayQueryGetIntersectionInstanceCustomIndexEXT(rayQuery, false); + + // get primitive ID in order to access UVs of hitted triangle + const int primitiveID = rayQueryGetIntersectionPrimitiveIndexEXT(rayQuery, false); + const uint i0 = indices_set[nonuniformEXT(instanceID)].indices[3 * primitiveID]; + const uint i1 = indices_set[nonuniformEXT(instanceID)].indices[3 * primitiveID + 1]; + const uint i2 = indices_set[nonuniformEXT(instanceID)].indices[3 * primitiveID + 2]; + const vec2 uv0 = vertices_set[nonuniformEXT(instanceID)].vertices[i0].texCoord; + const vec2 uv1 = vertices_set[nonuniformEXT(instanceID)].vertices[i1].texCoord; + const vec2 uv2 = vertices_set[nonuniformEXT(instanceID)].vertices[i2].texCoord; + + // Get berycentric coordinate then interpolate the uv of the hit point + vec3 barycentrics = vec3(0.0, rayQueryGetIntersectionBarycentricsEXT(rayQuery, false)); + barycentrics.x = 1.0 - barycentrics.y - barycentrics.z; + const vec2 hitpoint_uv = barycentrics.x * uv0 + barycentrics.y * uv1 + barycentrics.z * uv2; + + // Sample feature maps and check transparency + const vec2 flipped = vec2( hitpoint_uv.x, 1.0 - hitpoint_uv.y ); + vec4 test_pixel = texture(textureInput_0[nonuniformEXT(instanceID)], flipped); + + if (test_pixel.r != 0.0) { + rayQueryConfirmIntersectionEXT(rayQuery); + pixel_0 = test_pixel; + commited_flipped = flipped; + commited_instanceID = instanceID; + } + } + } + + if (rayQueryGetIntersectionTypeEXT(rayQuery, true) == gl_RayQueryCommittedIntersectionTriangleEXT) { + // Output feature inputs for mlp + vec4 pixel_1 = texture(textureInput_1[nonuniformEXT(commited_instanceID)], commited_flipped); + + pixel_0.a = pixel_0.a*2.0-1.0; + pixel_1.a = pixel_1.a*2.0-1.0; + + o_color.rgb = Convert_sRGB_ToLinear(evaluateNetwork(pixel_0, pixel_1, vec4(rayDirection, 1.0f))); + o_color.a = 1.0; + } else { + discard; + } +} + +#else +// Much faster but not work correctly on Mobile nerf's original models +void main(void) +{ + vec3 rayDirection = CalcRayDirComp(params); + + // initialize a ray query object + rayQueryEXT rayQuery; + const uint rayFlags = gl_RayFlagsOpaqueEXT; + const float tmin = 0.0f; + const float tmax = 256.0f; + rayQueryInitializeEXT(rayQuery, // Ray query + topLevelAS, // Top-level acceleration structure + rayFlags, // Ray flags, treat all geometry as opaque + 0xFF, // 8-bit instance mask, trace against all instances + params.camera_position, // Ray origin + tmin, // Minimum t-value + rayDirection, // Ray direction + tmax); // Maximum t-value + + // Start traversal + rayQueryProceedEXT(rayQuery); + + if (rayQueryGetIntersectionTypeEXT(rayQuery, true) != gl_RayQueryCommittedIntersectionNoneEXT) { + const int instanceID = rayQueryGetIntersectionInstanceCustomIndexEXT(rayQuery, true); + + // get primitive ID in order to access UVs of hitted triangle + const int primitiveID = rayQueryGetIntersectionPrimitiveIndexEXT(rayQuery, true); + const uint i0 = indices_set[nonuniformEXT(instanceID)].indices[3 * primitiveID]; + const uint i1 = indices_set[nonuniformEXT(instanceID)].indices[3 * primitiveID + 1]; + const uint i2 = indices_set[nonuniformEXT(instanceID)].indices[3 * primitiveID + 2]; + const vec2 uv0 = vertices_set[nonuniformEXT(instanceID)].vertices[i0].texCoord; + const vec2 uv1 = vertices_set[nonuniformEXT(instanceID)].vertices[i1].texCoord; + const vec2 uv2 = vertices_set[nonuniformEXT(instanceID)].vertices[i2].texCoord; + + // Get berycentric coordinate then interpolate the uv of the hit point + vec3 barycentrics = vec3(0.0, rayQueryGetIntersectionBarycentricsEXT(rayQuery, true)); + barycentrics.x = 1.0 - barycentrics.y - barycentrics.z; + const vec2 hitpoint_uv = barycentrics.x * uv0 + barycentrics.y * uv1 + barycentrics.z * uv2; + + // Sample feature maps then output to second subpass + vec2 flipped = vec2( hitpoint_uv.x, 1.0 - hitpoint_uv.y ); + vec4 pixel_0 = texture(textureInput_0[nonuniformEXT(instanceID)], flipped); + vec4 pixel_1 = texture(textureInput_1[nonuniformEXT(instanceID)], flipped); + + pixel_0.a = pixel_0.a*2.0-1.0; + pixel_1.a = pixel_1.a*2.0-1.0; + + o_color.rgb = Convert_sRGB_ToLinear(evaluateNetwork(pixel_0, pixel_1, vec4(rayDirection, 1.0f))); + o_color.a = 1.0; + } else { + discard; + } +} +#endif diff --git a/shaders/mobile_nerf_rayquery/rayquery_morpheus_combo.frag b/shaders/mobile_nerf_rayquery/rayquery_morpheus_combo.frag new file mode 100644 index 000000000..8f20123fa --- /dev/null +++ b/shaders/mobile_nerf_rayquery/rayquery_morpheus_combo.frag @@ -0,0 +1,351 @@ +/* Copyright (c) 2024, Qualcomm Innovation Center, Inc. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 the "License"; + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * ------------------------------------------------------------------------ + * + * THIS IS A MODIFIED VERSION OF THE ORIGINAL FILE + * + * The original file, along with the original Apache-2.0 LICENSE can be found at: + * https://github.com/google-research/jax3d/tree/main/jax3d/projects/mobilenerf + * + * Modification details: Shader code was updated to work on Vulkan (originally + * built for WebGL) + * Contributor: (Qualcomm) Rodrigo Holztrattner - quic_rholztra@quicinc.com + */ +#version 460 +#extension GL_EXT_scalar_block_layout : enable +#extension GL_EXT_ray_query : enable +#extension GL_EXT_nonuniform_qualifier : enable + +// Uncomment this to use opaque mode +// Should be faster, but would get VK_ERROR_DEVICE_LOST on some AMD devices +// #define USE_OPAQUE + +struct Vertex +{ + vec3 position; + vec2 texCoord; +}; + +struct GlobalUniform +{ + vec3 camera_position; + vec3 camera_side; + vec3 camera_up; + vec3 camera_lookat; + vec2 img_dim; + float tan_half_fov; +}; + +layout(location = 0) out vec4 o_color; + +layout(set = 0, binding = 0) uniform AppData +{ + GlobalUniform params; +}; + +layout(set = 0, binding = 1) uniform accelerationStructureEXT topLevelAS; + +// Try defining constants in the shader itself +precision highp float; + +#define WEIGHTS_0_COUNT (176) +#define WEIGHTS_1_COUNT (256) +// The third layer's size is changed from 48 to 64 to make sure a 16 bytes alignement +//#define WEIGHTS_2_COUNT (48) +#define WEIGHTS_2_COUNT (64) +#define BIAS_0_COUNT (16) +#define BIAS_1_COUNT (16) +// The third layer bias' size is changed from 3 to 4 to make sure a 16 bytes alignement +#define BIAS_2_COUNT (4) +layout(set = 0, binding = 2) uniform mlp_weights +{ + vec4 data[(WEIGHTS_0_COUNT + WEIGHTS_1_COUNT + WEIGHTS_2_COUNT + + BIAS_0_COUNT + BIAS_1_COUNT + BIAS_2_COUNT)/4]; // Array of floats +} weights_arr[]; + +layout(set = 1, binding = 0, scalar) buffer Vertices +{ + Vertex vertices[]; +} vertices_set[]; + +layout(set = 2, binding = 0, scalar) buffer Indices +{ + uint indices[]; +} indices_set[]; + +layout(set = 3, binding = 0) uniform sampler2D textureInput_0[]; +layout(set = 4, binding = 0) uniform sampler2D textureInput_1[]; + +vec3 evaluateNetwork( vec4 f0, vec4 f1, vec4 viewdir, uint idx) +{ + + vec3 res; + + int bias_0_ind = WEIGHTS_0_COUNT + WEIGHTS_1_COUNT + WEIGHTS_2_COUNT; + vec4 intermediate_one[4] = vec4[]( + weights_arr[nonuniformEXT(idx)].data[bias_0_ind/4], + weights_arr[nonuniformEXT(idx)].data[bias_0_ind/4 + 1], + weights_arr[nonuniformEXT(idx)].data[bias_0_ind/4 + 2], + weights_arr[nonuniformEXT(idx)].data[bias_0_ind/4 + 3] + ); + + +#define APPLY_WEIGHTS_0(multiplier, weightFirstInd) \ + intermediate_one[ 0] += (multiplier) * weights_arr[nonuniformEXT(idx)].data[ weightFirstInd/4]; \ + intermediate_one[ 1] += (multiplier) * weights_arr[nonuniformEXT(idx)].data[ weightFirstInd/4 + 1]; \ + intermediate_one[ 2] += (multiplier) * weights_arr[nonuniformEXT(idx)].data[ weightFirstInd/4 + 2]; \ + intermediate_one[ 3] += (multiplier) * weights_arr[nonuniformEXT(idx)].data[ weightFirstInd/4 + 3]; + + APPLY_WEIGHTS_0( f0.r, 0) + APPLY_WEIGHTS_0( f0.g, 16) + APPLY_WEIGHTS_0( f0.b, 32) + APPLY_WEIGHTS_0( f0.a, 48) + APPLY_WEIGHTS_0( f1.r, 64) + APPLY_WEIGHTS_0( f1.g, 80) + APPLY_WEIGHTS_0( f1.b, 96) + APPLY_WEIGHTS_0( f1.a, 112) + // For models form original mobile nerf, use the original code + APPLY_WEIGHTS_0( (viewdir.r + 1.0 )/2, 128) + APPLY_WEIGHTS_0( (-viewdir.b + 1.0 )/2, 144) + APPLY_WEIGHTS_0( (viewdir.g + 1.0 )/2, 160) + + int bias_1_ind = WEIGHTS_0_COUNT + WEIGHTS_1_COUNT + WEIGHTS_2_COUNT + + BIAS_0_COUNT; + vec4 intermediate_two[4] = vec4[]( + weights_arr[nonuniformEXT(idx)].data[bias_1_ind/4], + weights_arr[nonuniformEXT(idx)].data[bias_1_ind/4 + 1], + weights_arr[nonuniformEXT(idx)].data[bias_1_ind/4 + 2], + weights_arr[nonuniformEXT(idx)].data[bias_1_ind/4 + 3] + ); + + +#define APPLY_WEIGHTS_1(intermediate, oneInd) \ + if(intermediate > 0.0f){ \ + intermediate_two[ 0] += intermediate * weights_arr[nonuniformEXT(idx)].data[ WEIGHTS_0_COUNT/4 + oneInd * 4 + 0]; \ + intermediate_two[ 1] += intermediate * weights_arr[nonuniformEXT(idx)].data[ WEIGHTS_0_COUNT/4 + oneInd * 4 + 1]; \ + intermediate_two[ 2] += intermediate * weights_arr[nonuniformEXT(idx)].data[ WEIGHTS_0_COUNT/4 + oneInd * 4 + 2]; \ + intermediate_two[ 3] += intermediate * weights_arr[nonuniformEXT(idx)].data[ WEIGHTS_0_COUNT/4 + oneInd * 4 + 3]; \ + } + + APPLY_WEIGHTS_1( intermediate_one[0].r, 0) + APPLY_WEIGHTS_1( intermediate_one[0].g, 1) + APPLY_WEIGHTS_1( intermediate_one[0].b, 2) + APPLY_WEIGHTS_1( intermediate_one[0].a, 3) + APPLY_WEIGHTS_1( intermediate_one[1].r, 4) + APPLY_WEIGHTS_1( intermediate_one[1].g, 5) + APPLY_WEIGHTS_1( intermediate_one[1].b, 6) + APPLY_WEIGHTS_1( intermediate_one[1].a, 7) + APPLY_WEIGHTS_1( intermediate_one[2].r, 8) + APPLY_WEIGHTS_1( intermediate_one[2].g, 9) + APPLY_WEIGHTS_1( intermediate_one[2].b, 10) + APPLY_WEIGHTS_1( intermediate_one[2].a, 11) + APPLY_WEIGHTS_1( intermediate_one[3].r, 12) + APPLY_WEIGHTS_1( intermediate_one[3].g, 13) + APPLY_WEIGHTS_1( intermediate_one[3].b, 14) + APPLY_WEIGHTS_1( intermediate_one[3].a, 15) + + int bias_2_ind = WEIGHTS_0_COUNT + WEIGHTS_1_COUNT + WEIGHTS_2_COUNT + + BIAS_0_COUNT + BIAS_1_COUNT; + vec4 result = weights_arr[nonuniformEXT(idx)].data[bias_2_ind/4]; + +#define APPLY_WEIGHTS_2(intermediate, oneInd) \ + if(intermediate > 0.0f){ \ + result += intermediate * weights_arr[nonuniformEXT(idx)].data[ WEIGHTS_0_COUNT/4 + WEIGHTS_1_COUNT/4 + oneInd]; \ + } + + APPLY_WEIGHTS_2(intermediate_two[0].r, 0) + APPLY_WEIGHTS_2(intermediate_two[0].g, 1) + APPLY_WEIGHTS_2(intermediate_two[0].b, 2) + APPLY_WEIGHTS_2(intermediate_two[0].a, 3) + APPLY_WEIGHTS_2(intermediate_two[1].r, 4) + APPLY_WEIGHTS_2(intermediate_two[1].g, 5) + APPLY_WEIGHTS_2(intermediate_two[1].b, 6) + APPLY_WEIGHTS_2(intermediate_two[1].a, 7) + APPLY_WEIGHTS_2(intermediate_two[2].r, 8) + APPLY_WEIGHTS_2(intermediate_two[2].g, 9) + APPLY_WEIGHTS_2(intermediate_two[2].b,10) + APPLY_WEIGHTS_2(intermediate_two[2].a,11) + APPLY_WEIGHTS_2(intermediate_two[3].r,12) + APPLY_WEIGHTS_2(intermediate_two[3].g,13) + APPLY_WEIGHTS_2(intermediate_two[3].b,14) + APPLY_WEIGHTS_2(intermediate_two[3].a,15) + + result = 1.0 / (1.0 + exp(-result)); + return vec3(result * viewdir.a+(1.0-viewdir.a)); +} + +vec3 CalcRayDirComp(GlobalUniform params) { + // On [0.0, 1.0] + vec2 img_sample = vec2(gl_FragCoord.xy / params.img_dim); + + // Transform into [-0.5, 0.5] + vec2 hom_sample = img_sample - vec2(0.5f, 0.5f); + hom_sample.y *= -1.0f; // Vertical flip so that origin is top-left + + // Transform into [-dim.x/dim.y/2, dim.x/dim.y/2] x [-0.5, 0.5] + vec2 c_sample = hom_sample * params.img_dim / params.img_dim.y; + + // Calculate direction to image plane + const vec3 rayDir = vec3(params.camera_lookat * 0.5 / params.tan_half_fov + c_sample.x * params.camera_side + c_sample.y * params.camera_up); + + return normalize(rayDir); +} + +////////////////////////////////////////////////////////////// +// MLP was trained with gamma-corrected values // +// convert to linear so sRGB conversion isn't applied twice // +////////////////////////////////////////////////////////////// + +float Convert_sRGB_ToLinear(float value) +{ + return value <= 0.04045 + ? value / 12.92 + : pow((value + 0.055) / 1.055, 2.4); +} + +vec3 Convert_sRGB_ToLinear(vec3 value) +{ + return vec3(Convert_sRGB_ToLinear(value.x), Convert_sRGB_ToLinear(value.y), Convert_sRGB_ToLinear(value.z)); +} + +////////////////////////////////////////////////////////////// +////////////////////////////////////////////////////////////// +////////////////////////////////////////////////////////////// + +#ifndef USE_OPAQUE +void main(void) +{ + vec3 rayDirection = CalcRayDirComp(params); + + // initialize a ray query object + rayQueryEXT rayQuery; + const uint rayFlags = gl_RayFlagsNoOpaqueEXT; // Enable this so that we can get back fragment after discarding the transparent fragment + const float tmin = 0.01f; + const float tmax = 256.0f; + vec4 pixel_0 = vec4(0.0f); + vec2 commited_flipped = vec2(0.0f); + int commited_instanceID = 0; + rayQueryInitializeEXT(rayQuery, // Ray query + topLevelAS, // Top-level acceleration structure + rayFlags, // Ray flags, treat all geometry as non-opaque + 0xFF, // 8-bit instance mask, trace against all instances + params.camera_position, // Ray origin + tmin, // Minimum t-value + rayDirection, // Ray direction + tmax); // Maximum t-value + + while(rayQueryProceedEXT(rayQuery)) { + if (rayQueryGetIntersectionTypeEXT(rayQuery, false) == gl_RayQueryCandidateIntersectionTriangleEXT) + { + const int instanceID = rayQueryGetIntersectionInstanceCustomIndexEXT(rayQuery, false); + + // get primitive ID in order to access UVs of hitted triangle + const int primitiveID = rayQueryGetIntersectionPrimitiveIndexEXT(rayQuery, false); + const uint i0 = indices_set[nonuniformEXT(instanceID)].indices[3 * primitiveID]; + const uint i1 = indices_set[nonuniformEXT(instanceID)].indices[3 * primitiveID + 1]; + const uint i2 = indices_set[nonuniformEXT(instanceID)].indices[3 * primitiveID + 2]; + const vec2 uv0 = vertices_set[nonuniformEXT(instanceID)].vertices[i0].texCoord; + const vec2 uv1 = vertices_set[nonuniformEXT(instanceID)].vertices[i1].texCoord; + const vec2 uv2 = vertices_set[nonuniformEXT(instanceID)].vertices[i2].texCoord; + + // Get berycentric coordinate then interpolate the uv of the hit point + vec3 barycentrics = vec3(0.0, rayQueryGetIntersectionBarycentricsEXT(rayQuery, false)); + barycentrics.x = 1.0 - barycentrics.y - barycentrics.z; + const vec2 hitpoint_uv = barycentrics.x * uv0 + barycentrics.y * uv1 + barycentrics.z * uv2; + + // Sample feature maps and check transparency + const vec2 flipped = vec2( hitpoint_uv.x, 1.0 - hitpoint_uv.y ); + vec4 test_pixel = texture(textureInput_0[nonuniformEXT(instanceID)], flipped); + + if (test_pixel.r != 0.0) { + rayQueryConfirmIntersectionEXT(rayQuery); + pixel_0 = test_pixel; + commited_flipped = flipped; + commited_instanceID = instanceID; + } + } + } + + if (rayQueryGetIntersectionTypeEXT(rayQuery, true) == gl_RayQueryCommittedIntersectionTriangleEXT) { + // Output feature inputs for mlp + vec4 pixel_1 = texture(textureInput_1[nonuniformEXT(commited_instanceID)], commited_flipped); + + pixel_0.a = pixel_0.a*2.0-1.0; + pixel_1.a = pixel_1.a*2.0-1.0; + + o_color.rgb = Convert_sRGB_ToLinear(evaluateNetwork(pixel_0, pixel_1, vec4(rayDirection, 1.0f), commited_instanceID)); + o_color.a = 1.0; + } else { + discard; + } +} + +#else +// Much faster but not work correctly on Mobile nerf's original models +void main(void) +{ + vec3 rayDirection = CalcRayDirComp(params); + + // initialize a ray query object + rayQueryEXT rayQuery; + const uint rayFlags = gl_RayFlagsOpaqueEXT; + const float tmin = 0.01f; + const float tmax = 256.0f; + rayQueryInitializeEXT(rayQuery, // Ray query + topLevelAS, // Top-level acceleration structure + rayFlags, // Ray flags, treat all geometry as opaque + 0xFF, // 8-bit instance mask, trace against all instances + params.camera_position, // Ray origin + tmin, // Minimum t-value + rayDirection, // Ray direction + tmax); // Maximum t-value + + // Start traversal + rayQueryProceedEXT(rayQuery); + + if (rayQueryGetIntersectionTypeEXT(rayQuery, true) != gl_RayQueryCommittedIntersectionNoneEXT) { + const int instanceID = rayQueryGetIntersectionInstanceCustomIndexEXT(rayQuery, true); + + // get primitive ID in order to access UVs of hitted triangle + const int primitiveID = rayQueryGetIntersectionPrimitiveIndexEXT(rayQuery, true); + const uint i0 = indices_set[nonuniformEXT(instanceID)].indices[3 * primitiveID]; + const uint i1 = indices_set[nonuniformEXT(instanceID)].indices[3 * primitiveID + 1]; + const uint i2 = indices_set[nonuniformEXT(instanceID)].indices[3 * primitiveID + 2]; + const vec2 uv0 = vertices_set[nonuniformEXT(instanceID)].vertices[i0].texCoord; + const vec2 uv1 = vertices_set[nonuniformEXT(instanceID)].vertices[i1].texCoord; + const vec2 uv2 = vertices_set[nonuniformEXT(instanceID)].vertices[i2].texCoord; + + // Get berycentric coordinate then interpolate the uv of the hit point + vec3 barycentrics = vec3(0.0, rayQueryGetIntersectionBarycentricsEXT(rayQuery, true)); + barycentrics.x = 1.0 - barycentrics.y - barycentrics.z; + const vec2 hitpoint_uv = barycentrics.x * uv0 + barycentrics.y * uv1 + barycentrics.z * uv2; + + // Sample feature maps then output to second subpass + const vec2 flipped = vec2( hitpoint_uv.x, 1.0 - hitpoint_uv.y ); + vec4 pixel_0 = texture(textureInput_0[nonuniformEXT(instanceID)], flipped); + vec4 pixel_1 = texture(textureInput_1[nonuniformEXT(instanceID)], flipped); + + pixel_0.a = pixel_0.a*2.0-1.0; + pixel_1.a = pixel_1.a*2.0-1.0; + + o_color.rgb = Convert_sRGB_ToLinear(evaluateNetwork(pixel_0, pixel_1, vec4(rayDirection, 1.0f), instanceID)); + o_color.a = 1.0; + } else { + discard; + } +} +#endif From 11eb83c8654b378403fb5f6a80a294d14cf7aaec Mon Sep 17 00:00:00 2001 From: Rodrigo Holztrattner Date: Thu, 22 Aug 2024 18:21:20 -0600 Subject: [PATCH 02/10] Quick fixes Signed-off-by: Rodrigo Holztrattner --- .../mobile_nerf_rayquery.cpp | 51 +++++++++---------- .../mobile_nerf_rayquery.h | 6 +-- 2 files changed, 28 insertions(+), 29 deletions(-) diff --git a/samples/general/mobile_nerf_rayquery/mobile_nerf_rayquery.cpp b/samples/general/mobile_nerf_rayquery/mobile_nerf_rayquery.cpp index c589bca01..df9f6c8db 100644 --- a/samples/general/mobile_nerf_rayquery/mobile_nerf_rayquery.cpp +++ b/samples/general/mobile_nerf_rayquery/mobile_nerf_rayquery.cpp @@ -93,8 +93,7 @@ MobileNerfRayQuery::MobileNerfRayQuery() { title = "Mobile Nerf Ray Query"; - // Scalar Block Layout Extension requires Vulkan 1.2 - set_api_version(VK_API_VERSION_1_2); + set_api_version(VK_API_VERSION_1_1); // Required by VK_KHR_acceleration_structure add_device_extension(VK_KHR_ACCELERATION_STRUCTURE_EXTENSION_NAME); @@ -355,7 +354,7 @@ void MobileNerfRayQuery::read_json_map() model_path[0] = asset_map["path"].get(); LOGI("Target model: {}, asset path: {}", target_model, model_path[0]); } - num_models = model_path.size(); + num_models = static_cast(model_path.size()); // Read Texture Format std::string textureType = raw_asset_map["texture_type"].get(); @@ -458,11 +457,11 @@ void MobileNerfRayQuery::initialize_mlp_uniform_buffers(int model_index) json data = json::parse(f); // Record a index of the first sub-model - int first_sub_model = models.size(); - int obj_num = data["obj_num"].get(); + const auto first_sub_model = models.size(); + int obj_num = data["obj_num"].get(); // Here we know the actual number of sub models - int next_sub_model_index = models.size(); + const auto next_sub_model_index = models.size(); models.resize(models.size() + obj_num); for (int i = next_sub_model_index; i < models.size(); i++) @@ -535,7 +534,7 @@ void MobileNerfRayQuery::initialize_mlp_uniform_buffers(int model_index) } // Each sub model will share the same mlp weights data - mlp_weights &model_mlp = mlp_weight_vector[model_index]; + MLP_Weights &model_mlp = mlp_weight_vector[model_index]; for (int ii = 0; ii < WEIGHTS_0_COUNT; ii++) { @@ -680,7 +679,7 @@ void MobileNerfRayQuery::setup_framebuffers() framebuffer_create_info.sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO; framebuffer_create_info.pNext = NULL; framebuffer_create_info.renderPass = render_pass_nerf; - framebuffer_create_info.attachmentCount = views.size(); + framebuffer_create_info.attachmentCount = static_cast(views.size()); framebuffer_create_info.pAttachments = views.data(); framebuffer_create_info.width = get_render_context().get_surface_extent().width; framebuffer_create_info.height = get_render_context().get_surface_extent().height; @@ -866,7 +865,7 @@ void MobileNerfRayQuery::create_uniforms() { LOGI("Creating mlp weights uniform buffer for model {}", i); weights_buffers[i] = std::make_unique(get_device(), - sizeof(mlp_weights), + sizeof(MLP_Weights), VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT | VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, VMA_MEMORY_USAGE_CPU_TO_GPU); } @@ -897,7 +896,7 @@ void MobileNerfRayQuery::update_weights_buffers() { for (int i = 0; i < num_models; i++) { - weights_buffers[i]->update(&(mlp_weight_vector[i].data[0]), sizeof(mlp_weights)); + weights_buffers[i]->update(&(mlp_weight_vector[i].data[0]), sizeof(MLP_Weights)); } } @@ -1093,18 +1092,18 @@ void MobileNerfRayQuery::create_pipeline_layout() void MobileNerfRayQuery::create_descriptor_pool() { std::vector pool_sizes = { - {VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, 1 * (uint32_t) framebuffers.size()}, - {VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR, 1 * (uint32_t) framebuffers.size()}, - {VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, 1 * (uint32_t) framebuffers.size() * (uint32_t) num_models}, - {VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 2 * (uint32_t) models.size() * (uint32_t) framebuffers.size()}, - {VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, 2 * (uint32_t) models.size() * (uint32_t) framebuffers.size()}}; - VkDescriptorPoolCreateInfo descriptor_pool_create_info = vkb::initializers::descriptor_pool_create_info(pool_sizes, 5 * framebuffers.size()); + {VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, 1 * static_cast(framebuffers.size())}, + {VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR, 1 * static_cast(framebuffers.size())}, + {VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, 1 * static_cast(framebuffers.size()) * static_cast(num_models)}, + {VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 2 * static_cast(models.size()) * static_cast(framebuffers.size())}, + {VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, 2 * static_cast(models.size()) * static_cast(framebuffers.size())}}; + VkDescriptorPoolCreateInfo descriptor_pool_create_info = vkb::initializers::descriptor_pool_create_info(pool_sizes, 5 * static_cast(framebuffers.size())); VK_CHECK(vkCreateDescriptorPool(get_device().get_handle(), &descriptor_pool_create_info, nullptr, &descriptor_pool)); } void MobileNerfRayQuery::create_descriptor_sets() { - int numDescriptorPerModel = framebuffers.size(); + const auto numDescriptorPerModel = framebuffers.size(); descriptor_set_common.resize(numDescriptorPerModel); descriptor_set_vertices.resize(numDescriptorPerModel); descriptor_set_indices.resize(numDescriptorPerModel); @@ -1113,7 +1112,7 @@ void MobileNerfRayQuery::create_descriptor_sets() auto allocate_unbounded_descriptor_set = [&](VkDescriptorSetLayout &descriptor_set_layout, VkDescriptorSet &descriptor_set) { uint32_t counts[1]; - counts[0] = models.size(); + counts[0] = static_cast(models.size()); VkDescriptorSetVariableDescriptorCountAllocateInfo set_counts = {}; set_counts.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_VARIABLE_DESCRIPTOR_COUNT_ALLOCATE_INFO; @@ -1178,13 +1177,13 @@ void MobileNerfRayQuery::create_descriptor_sets() VkWriteDescriptorSet uniform_buffer_write = vkb::initializers::write_descriptor_set(descriptor_set_common[i], VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, 0, &uniform_buffer_descriptor); VkWriteDescriptorSet vertex_buffer_write = vkb::initializers::write_descriptor_set(descriptor_set_vertices[i], - VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 0, vertex_buffer_descriptors.data(), vertex_buffer_descriptors.size()); + VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 0, vertex_buffer_descriptors.data(), static_cast(vertex_buffer_descriptors.size())); VkWriteDescriptorSet index_buffer_write = vkb::initializers::write_descriptor_set(descriptor_set_indices[i], - VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 0, index_buffer_descriptors.data(), index_buffer_descriptors.size()); + VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 0, index_buffer_descriptors.data(), static_cast(index_buffer_descriptors.size())); VkWriteDescriptorSet texture_input_write_0 = vkb::initializers::write_descriptor_set(descriptor_set_feature1[i], - VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, 0, texture_input_1_descriptors.data(), texture_input_1_descriptors.size()); + VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, 0, texture_input_1_descriptors.data(), static_cast(texture_input_1_descriptors.size())); VkWriteDescriptorSet texture_input_write_1 = vkb::initializers::write_descriptor_set(descriptor_set_feature2[i], - VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, 0, texture_input_2_descriptors.data(), texture_input_2_descriptors.size()); + VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, 0, texture_input_2_descriptors.data(), static_cast(texture_input_2_descriptors.size())); // Set up the descriptor for binding our top level acceleration structure to the ray tracing shaders VkWriteDescriptorSetAccelerationStructureKHR descriptor_acceleration_structure_info{}; @@ -1216,7 +1215,7 @@ void MobileNerfRayQuery::create_descriptor_sets() weights_buffer_descriptors.emplace_back(create_descriptor(*weight_buffer)); } weights_buffer_write = vkb::initializers::write_descriptor_set(descriptor_set_common[i], - VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, 2, weights_buffer_descriptors.data(), weights_buffer_descriptors.size()); + VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, 2, weights_buffer_descriptors.data(), static_cast(weights_buffer_descriptors.size())); } else { @@ -1246,7 +1245,7 @@ void MobileNerfRayQuery::prepare_pipelines() std::vector blend_attachment_states; blend_attachment_states.push_back(vkb::initializers::pipeline_color_blend_attachment_state(0xf, VK_FALSE)); - VkPipelineColorBlendStateCreateInfo color_blend_state = vkb::initializers::pipeline_color_blend_state_create_info(blend_attachment_states.size(), blend_attachment_states.data()); + VkPipelineColorBlendStateCreateInfo color_blend_state = vkb::initializers::pipeline_color_blend_state_create_info(static_cast(blend_attachment_states.size()), blend_attachment_states.data()); VkPipelineDepthStencilStateCreateInfo depth_stencil_state = vkb::initializers::pipeline_depth_stencil_state_create_info(VK_TRUE, VK_TRUE, VK_COMPARE_OP_LESS); depth_stencil_state.depthBoundsTestEnable = VK_FALSE; @@ -1315,7 +1314,7 @@ void MobileNerfRayQuery::build_command_buffers() render_pass_begin_info.renderArea.offset.y = 0; render_pass_begin_info.renderArea.extent.width = width; render_pass_begin_info.renderArea.extent.height = height; - render_pass_begin_info.clearValueCount = clear_values.size(); + render_pass_begin_info.clearValueCount = static_cast(clear_values.size()); render_pass_begin_info.pClearValues = clear_values.data(); VkClearValue clear_values_UI[2]; @@ -1389,7 +1388,7 @@ void MobileNerfRayQuery::draw() ApiVulkanSample::submit_frame(); } -std::unique_ptr> create_mobile_nerf_rayquery() +std::unique_ptr create_mobile_nerf_rayquery() { return std::make_unique(); } diff --git a/samples/general/mobile_nerf_rayquery/mobile_nerf_rayquery.h b/samples/general/mobile_nerf_rayquery/mobile_nerf_rayquery.h index 3fba926b7..cc4474709 100644 --- a/samples/general/mobile_nerf_rayquery/mobile_nerf_rayquery.h +++ b/samples/general/mobile_nerf_rayquery/mobile_nerf_rayquery.h @@ -68,7 +68,7 @@ class MobileNerfRayQuery : public ApiVulkanSample #define BIAS_2_COUNT (4) // some typedef for each model - struct mlp_weights + struct MLP_Weights { float data[WEIGHTS_0_COUNT + WEIGHTS_1_COUNT + WEIGHTS_2_COUNT + BIAS_0_COUNT + BIAS_1_COUNT + BIAS_2_COUNT]; // Array of floats @@ -119,7 +119,7 @@ class MobileNerfRayQuery : public ApiVulkanSample std::vector models; // MLPs for each model - std::vector mlp_weight_vector; + std::vector mlp_weight_vector; std::vector> weights_buffers; // Global uniform buffer @@ -201,4 +201,4 @@ class MobileNerfRayQuery : public ApiVulkanSample void create_texture_helper(std::string const &texturePath, Texture &texture_input); }; -std::unique_ptr> create_mobile_nerf_rayquery(); +std::unique_ptr create_mobile_nerf_rayquery(); From 414077a460906f259e68d26ba7f8b8513b775b6a Mon Sep 17 00:00:00 2001 From: Rodrigo Holztrattner Date: Thu, 22 Aug 2024 18:27:05 -0600 Subject: [PATCH 03/10] Add missing device properties 2 extension Signed-off-by: Rodrigo Holztrattner --- samples/general/mobile_nerf_rayquery/mobile_nerf_rayquery.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/samples/general/mobile_nerf_rayquery/mobile_nerf_rayquery.cpp b/samples/general/mobile_nerf_rayquery/mobile_nerf_rayquery.cpp index df9f6c8db..bf069230c 100644 --- a/samples/general/mobile_nerf_rayquery/mobile_nerf_rayquery.cpp +++ b/samples/general/mobile_nerf_rayquery/mobile_nerf_rayquery.cpp @@ -95,6 +95,9 @@ MobileNerfRayQuery::MobileNerfRayQuery() set_api_version(VK_API_VERSION_1_1); + // Required by VK_EXT_scalar_block_layout + add_device_extension(VK_KHR_GET_PHYSICAL_DEVICE_PROPERTIES_2_EXTENSION_NAME); + // Required by VK_KHR_acceleration_structure add_device_extension(VK_KHR_ACCELERATION_STRUCTURE_EXTENSION_NAME); add_device_extension(VK_KHR_BUFFER_DEVICE_ADDRESS_EXTENSION_NAME); From 6cafac4207cb924afdfad2516331d8a281890c68 Mon Sep 17 00:00:00 2001 From: Rodrigo Holztrattner Date: Thu, 22 Aug 2024 18:36:59 -0600 Subject: [PATCH 04/10] Minor README fix (remove repeated word) Signed-off-by: Rodrigo Holztrattner --- samples/general/mobile_nerf_rayquery/README.adoc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/general/mobile_nerf_rayquery/README.adoc b/samples/general/mobile_nerf_rayquery/README.adoc index 0d56054be..9f382f5e4 100644 --- a/samples/general/mobile_nerf_rayquery/README.adoc +++ b/samples/general/mobile_nerf_rayquery/README.adoc @@ -26,7 +26,7 @@ endif::[] NeRF is a new 3D representation method in Computer Vision that creates images of a 3D scene using several 2D pictures taken from different viewpoints. This method constructs a representation of the 3D volume. Various adaptations of NeRF target different use cases, including MobileNeRF, which focuses on rendering NeRF efficiently on mobile phones by leveraging existing traditional graphic hardware. -This version enhances the https://github.com/KhronosGroup/Vulkan-Samples/tree/main/samples/general/mobile_nerf[previous MobileNeRF implementation] implementation by using the Vulkan Ray Query feature, which leverages the hardware ray tracing capabilities of the Adreno GPU. +This version enhances the https://github.com/KhronosGroup/Vulkan-Samples/tree/main/samples/general/mobile_nerf[previous MobileNeRF implementation] by using the Vulkan Ray Query feature, which leverages the hardware ray tracing capabilities of the Adreno GPU. This enhancement greatly boosts performance in most use cases. Additionally, the Vulkan API provides great flexibility for modifying and optimizing the rendering pipeline and shaders, enabling more functionalities while delivering optimal performance. == Notes From 1fcb6b843e916d5268a164345cf8cc18b93e41fa Mon Sep 17 00:00:00 2001 From: Rodrigo Holztrattner Date: Fri, 30 Aug 2024 09:02:06 -0600 Subject: [PATCH 05/10] Fix for pending shader and cast issues Signed-off-by: Rodrigo Holztrattner --- .../mobile_nerf_rayquery/CMakeLists.txt | 5 +- .../mobile_nerf_rayquery.cpp | 218 ++++-------------- .../mobile_nerf_rayquery.h | 26 +-- .../rayquery_morpheus.frag | 4 +- .../rayquery_morpheus_combo.frag | 4 +- 5 files changed, 59 insertions(+), 198 deletions(-) diff --git a/samples/general/mobile_nerf_rayquery/CMakeLists.txt b/samples/general/mobile_nerf_rayquery/CMakeLists.txt index c54d23a98..45e0c677d 100644 --- a/samples/general/mobile_nerf_rayquery/CMakeLists.txt +++ b/samples/general/mobile_nerf_rayquery/CMakeLists.txt @@ -25,4 +25,7 @@ add_sample_with_tags( AUTHOR "Qualcomm" NAME "Mobile NeRF Ray Query" DESCRIPTION "A Mobile Neural Radiance Field synthesizer sample using ray query, based on textured polygons." -) + SHADER_FILES_GLSL + "mobile_nerf_rayquery/quad.vert" + "mobile_nerf_rayquery/rayquery_morpheus_combo.frag" + "mobile_nerf_rayquery/rayquery_morpheus.frag") diff --git a/samples/general/mobile_nerf_rayquery/mobile_nerf_rayquery.cpp b/samples/general/mobile_nerf_rayquery/mobile_nerf_rayquery.cpp index bf069230c..ca175d5d7 100644 --- a/samples/general/mobile_nerf_rayquery/mobile_nerf_rayquery.cpp +++ b/samples/general/mobile_nerf_rayquery/mobile_nerf_rayquery.cpp @@ -47,7 +47,7 @@ struct RequestFeature template struct CopyBuffer { - std::vector operator()(std::unordered_map &buffers, const char *buffer_name) + std::vector operator()(std::unordered_map &buffers, const char *buffer_name) { auto iter = buffers.find(buffer_name); if (iter == buffers.cend()) @@ -95,9 +95,6 @@ MobileNerfRayQuery::MobileNerfRayQuery() set_api_version(VK_API_VERSION_1_1); - // Required by VK_EXT_scalar_block_layout - add_device_extension(VK_KHR_GET_PHYSICAL_DEVICE_PROPERTIES_2_EXTENSION_NAME); - // Required by VK_KHR_acceleration_structure add_device_extension(VK_KHR_ACCELERATION_STRUCTURE_EXTENSION_NAME); add_device_extension(VK_KHR_BUFFER_DEVICE_ADDRESS_EXTENSION_NAME); @@ -119,19 +116,6 @@ MobileNerfRayQuery::~MobileNerfRayQuery() { if (has_device()) { - if (render_pass_nerf) - { - vkDestroyRenderPass(get_device().get_handle(), render_pass_nerf, nullptr); - } - - for (uint32_t i = 0; i < framebuffers_nerf.size(); i++) - { - if (framebuffers_nerf[i]) - { - vkDestroyFramebuffer(get_device().get_handle(), framebuffers_nerf[i], nullptr); - } - } - auto device_ptr = get_device().get_handle(); for (auto &model : models) @@ -175,15 +159,10 @@ bool MobileNerfRayQuery::prepare(const vkb::ApplicationOptions &options) } load_shaders(); - update_render_pass(); - setup_framebuffers(); - // Because we have our own customized render pass, the UI render pass need to be updated with load on load so it won't - // clear out the written color attachment - update_render_pass_flags(RenderPassCreateFlags::ColorAttachmentLoad); // Setup camera camera.type = vkb::CameraType::LookAt; - camera_pos.y = -camera_pos.y; // flip y to keep consistency of the init pos between rayquery and rasterization + camera_pos.y = -camera_pos.y; camera_set_look_at(camera, camera_pos, glm::vec3(0.0f), glm::vec3(0.0f, 1.0f, 0.0f)); camera.set_perspective(60.0f, static_cast(width) / static_cast(height), 0.01f, 256.0f); @@ -338,9 +317,13 @@ void MobileNerfRayQuery::read_json_map() // Load combo models or a single model. In combo mode, we have multiple sets of weights. if (!asset_map["combo"].is_null()) + { combo_mode = asset_map["combo"].get(); + } else + { combo_mode = false; + } if (combo_mode) { @@ -606,97 +589,6 @@ void MobileNerfRayQuery::load_shaders() VK_SHADER_STAGE_FRAGMENT_BIT); } -void MobileNerfRayQuery::update_render_pass() -{ - // 0: Depth attachment - // 1: Swapchain attachment - std::array attachments = {}; - // Depth attachment - attachments[0].format = depth_format; - attachments[0].samples = VK_SAMPLE_COUNT_1_BIT; - attachments[0].loadOp = VK_ATTACHMENT_LOAD_OP_CLEAR; - attachments[0].storeOp = VK_ATTACHMENT_STORE_OP_DONT_CARE; - attachments[0].stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE; - attachments[0].stencilStoreOp = VK_ATTACHMENT_STORE_OP_STORE; - attachments[0].initialLayout = VK_IMAGE_LAYOUT_UNDEFINED; - attachments[0].finalLayout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL; - // Swapchain attachment - attachments[1].format = get_render_context().get_swapchain().get_format(); - attachments[1].samples = VK_SAMPLE_COUNT_1_BIT; - attachments[1].loadOp = VK_ATTACHMENT_LOAD_OP_CLEAR; - attachments[1].storeOp = VK_ATTACHMENT_STORE_OP_STORE; - attachments[1].stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE; - attachments[1].stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE; - attachments[1].initialLayout = VK_IMAGE_LAYOUT_UNDEFINED; - attachments[1].finalLayout = VK_IMAGE_LAYOUT_PRESENT_SRC_KHR; - - VkAttachmentReference depth_reference = {}; - depth_reference.attachment = 0; - depth_reference.layout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL; - - VkAttachmentReference swapchain_reference = {}; - swapchain_reference.attachment = 1; - swapchain_reference.layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; - - VkSubpassDescription subpass = {}; - subpass.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS; - subpass.colorAttachmentCount = 1; - subpass.pColorAttachments = &swapchain_reference; - subpass.pDepthStencilAttachment = &depth_reference; - subpass.inputAttachmentCount = 0; - subpass.pInputAttachments = nullptr; - subpass.preserveAttachmentCount = 0; - subpass.pPreserveAttachments = nullptr; - subpass.pResolveAttachments = nullptr; - - VkRenderPassCreateInfo render_pass_create_info = {}; - render_pass_create_info.sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO; - render_pass_create_info.attachmentCount = static_cast(attachments.size()); - render_pass_create_info.pAttachments = attachments.data(); - render_pass_create_info.subpassCount = 1; - render_pass_create_info.pSubpasses = &subpass; - - VK_CHECK(vkCreateRenderPass(get_device().get_handle(), &render_pass_create_info, nullptr, &render_pass_nerf)); -} - -void MobileNerfRayQuery::setup_framebuffers() -{ - // Delete existing frame buffers - if (framebuffers_nerf.size() > 0) - { - for (uint32_t i = 0; i < framebuffers_nerf.size(); i++) - { - if (framebuffers_nerf[i] != VK_NULL_HANDLE) - { - vkDestroyFramebuffer(get_device().get_handle(), framebuffers_nerf[i], nullptr); - } - } - } - - std::vector views; - views.resize(2); - views[0] = depth_stencil.view; - - // Depth/Stencil attachment is the same for all frame buffers - VkFramebufferCreateInfo framebuffer_create_info = {}; - framebuffer_create_info.sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO; - framebuffer_create_info.pNext = NULL; - framebuffer_create_info.renderPass = render_pass_nerf; - framebuffer_create_info.attachmentCount = static_cast(views.size()); - framebuffer_create_info.pAttachments = views.data(); - framebuffer_create_info.width = get_render_context().get_surface_extent().width; - framebuffer_create_info.height = get_render_context().get_surface_extent().height; - framebuffer_create_info.layers = 1; - - framebuffers_nerf.resize(swapchain_buffers.size()); - - for (uint32_t i = 0; i < framebuffers_nerf.size(); i++) - { - views[1] = swapchain_buffers[i].view; - VK_CHECK(vkCreateFramebuffer(get_device().get_handle(), &framebuffer_create_info, nullptr, &framebuffers_nerf[i])); - } -} - void MobileNerfRayQuery::load_scene(int model_index, int sub_model_index, int models_entry) { Model &model = models[models_entry]; @@ -817,13 +709,13 @@ void MobileNerfRayQuery::create_static_object_buffers(int models_entry) const VkBufferUsageFlags staging_flags = VK_BUFFER_USAGE_TRANSFER_SRC_BIT; // Create destination buffers - model.vertex_buffer = std::make_unique( + model.vertex_buffer = std::make_unique( get_device(), vertex_buffer_size, buffer_usage_flags | VK_BUFFER_USAGE_TRANSFER_DST_BIT, VMA_MEMORY_USAGE_GPU_ONLY); model.vertex_buffer->set_debug_name(fmt::format("Model #{} vertices", models_entry)); - model.index_buffer = std::make_unique( + model.index_buffer = std::make_unique( get_device(), index_buffer_size, buffer_usage_flags | VK_BUFFER_USAGE_TRANSFER_DST_BIT, @@ -831,14 +723,14 @@ void MobileNerfRayQuery::create_static_object_buffers(int models_entry) model.index_buffer->set_debug_name(fmt::format("Model #{} indices", models_entry)); // Create staging buffers - std::unique_ptr staging_vertex_buffer = std::make_unique( + std::unique_ptr staging_vertex_buffer = std::make_unique( get_device(), vertex_buffer_size, staging_flags, VMA_MEMORY_USAGE_CPU_TO_GPU); staging_vertex_buffer->update(model.vertices); - std::unique_ptr staging_index_buffer = std::make_unique( + std::unique_ptr staging_index_buffer = std::make_unique( get_device(), index_buffer_size, staging_flags, @@ -859,18 +751,18 @@ void MobileNerfRayQuery::create_uniforms() weights_buffers.resize(num_models); LOGI("Creating camera view uniform buffer"); - uniform_buffer = std::make_unique(get_device(), - sizeof(global_uniform), - VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT | VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, - VMA_MEMORY_USAGE_CPU_TO_GPU); + uniform_buffer = std::make_unique(get_device(), + sizeof(global_uniform), + VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT | VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, + VMA_MEMORY_USAGE_CPU_TO_GPU); for (int i = 0; i < num_models; i++) { LOGI("Creating mlp weights uniform buffer for model {}", i); - weights_buffers[i] = std::make_unique(get_device(), - sizeof(MLP_Weights), - VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT | VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, - VMA_MEMORY_USAGE_CPU_TO_GPU); + weights_buffers[i] = std::make_unique(get_device(), + sizeof(MLP_Weights), + VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT | VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, + VMA_MEMORY_USAGE_CPU_TO_GPU); } update_uniform_buffer(); @@ -881,13 +773,13 @@ void MobileNerfRayQuery::update_uniform_buffer() { assert(uniform_buffer); - camera.set_perspective(fov, (float) width / (float) height, 0.01f, 200.0f); + camera.set_perspective(fov, static_cast(width) / static_cast(height), 0.01f, 200.0f); const float tan_half_fov = tan(0.5 * fov / 180.0f * 3.141592653589793f); - global_uniform.camera_position = camera.position; + global_uniform.camera_position = glm::inverse(camera.matrices.view) * glm::vec4(0, 0, 0, 1); global_uniform.camera_side = glm::vec3(camera.matrices.view[0][0], camera.matrices.view[1][0], camera.matrices.view[2][0]); global_uniform.camera_up = glm::vec3(camera.matrices.view[0][1], camera.matrices.view[1][1], camera.matrices.view[2][1]); - global_uniform.camera_lookat = glm::vec3(camera.matrices.view[0][2], camera.matrices.view[1][2], camera.matrices.view[2][2]); + global_uniform.camera_lookat = -glm::vec3(camera.matrices.view[0][2], camera.matrices.view[1][2], camera.matrices.view[2][2]); global_uniform.img_dim = glm::vec2(width, height); global_uniform.tan_half_fov = tan_half_fov; @@ -962,11 +854,11 @@ void MobileNerfRayQuery::create_top_level_acceleration_structure() LOGI("model num: {}", models.size()); - const size_t instancesDataSize = sizeof(VkAccelerationStructureInstanceKHR) * acceleration_structure_instances.size(); - std::unique_ptr instances_buffer = std::make_unique(get_device(), - instancesDataSize, - VK_BUFFER_USAGE_ACCELERATION_STRUCTURE_BUILD_INPUT_READ_ONLY_BIT_KHR | VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT, - VMA_MEMORY_USAGE_CPU_TO_GPU); + const size_t instancesDataSize = sizeof(VkAccelerationStructureInstanceKHR) * acceleration_structure_instances.size(); + std::unique_ptr instances_buffer = std::make_unique(get_device(), + instancesDataSize, + VK_BUFFER_USAGE_ACCELERATION_STRUCTURE_BUILD_INPUT_READ_ONLY_BIT_KHR | VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT, + VMA_MEMORY_USAGE_CPU_TO_GPU); instances_buffer->update(acceleration_structure_instances.data(), instancesDataSize); top_level_acceleration_structure = std::make_unique(get_device(), VK_ACCELERATION_STRUCTURE_TYPE_TOP_LEVEL_KHR); @@ -996,7 +888,7 @@ void MobileNerfRayQuery::create_bottom_level_acceleration_structure(int model_en M[0][1], M[1][1], M[2][1], -M[3][1], M[0][2], M[1][2], M[2][2], M[3][2]}; } - std::unique_ptr transform_matrix_buffer = std::make_unique(get_device(), sizeof(transform_matrix), buffer_usage_flags, VMA_MEMORY_USAGE_CPU_TO_GPU); + std::unique_ptr transform_matrix_buffer = std::make_unique(get_device(), sizeof(transform_matrix), buffer_usage_flags, VMA_MEMORY_USAGE_CPU_TO_GPU); transform_matrix_buffer->update(&transform_matrix, sizeof(transform_matrix)); if (model.bottom_level_acceleration_structure == nullptr) @@ -1004,9 +896,9 @@ void MobileNerfRayQuery::create_bottom_level_acceleration_structure(int model_en model.bottom_level_acceleration_structure = std::make_unique( get_device(), VK_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL_KHR); model.bottom_level_acceleration_structure->add_triangle_geometry( - model.vertex_buffer, - model.index_buffer, - transform_matrix_buffer, + *model.vertex_buffer, + *model.index_buffer, + *transform_matrix_buffer, model.indices.size(), model.vertices.size(), sizeof(Vertex), @@ -1272,7 +1164,7 @@ void MobileNerfRayQuery::prepare_pipelines() VkPipelineVertexInputStateCreateInfo vertex_input_state{}; vertex_input_state.sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO; - VkGraphicsPipelineCreateInfo pipeline_create_info = vkb::initializers::pipeline_create_info(pipeline_layout, render_pass_nerf, 0); + VkGraphicsPipelineCreateInfo pipeline_create_info = vkb::initializers::pipeline_create_info(pipeline_layout, render_pass, 0); pipeline_create_info.pVertexInputState = &vertex_input_state; pipeline_create_info.pInputAssemblyState = &input_assembly_state; pipeline_create_info.pRasterizationState = &rasterization_state; @@ -1297,53 +1189,30 @@ void MobileNerfRayQuery::build_command_buffers() view_port_width = width; } - // In case the screen is resized, need to update the storage image size and descriptor set - // Note that the texture_rendered image has already been recreated at this point - if (!prepared) - { - setup_framebuffers(); - } - - VkCommandBufferBeginInfo command_buffer_begin_info = vkb::initializers::command_buffer_begin_info(); - std::vector clear_values; + VkCommandBufferBeginInfo command_buffer_begin_info = vkb::initializers::command_buffer_begin_info(); - clear_values.resize(2); - clear_values[0].depthStencil = {1.0f, 0}; - clear_values[1].color = {{1.0f, 1.0f, 1.0f, 1.0f}}; + VkClearValue clear_values[2]; + clear_values[0].color = default_clear_color; + clear_values[1].depthStencil = {1.0f, 0}; VkRenderPassBeginInfo render_pass_begin_info = vkb::initializers::render_pass_begin_info(); - render_pass_begin_info.renderPass = render_pass_nerf; + render_pass_begin_info.renderPass = render_pass; render_pass_begin_info.renderArea.offset.x = 0; render_pass_begin_info.renderArea.offset.y = 0; render_pass_begin_info.renderArea.extent.width = width; render_pass_begin_info.renderArea.extent.height = height; - render_pass_begin_info.clearValueCount = static_cast(clear_values.size()); - render_pass_begin_info.pClearValues = clear_values.data(); - - VkClearValue clear_values_UI[2]; - clear_values_UI[0].color = default_clear_color; - clear_values_UI[1].depthStencil = {1.0f, 0}; - - VkRenderPassBeginInfo render_pass_begin_info_UI = vkb::initializers::render_pass_begin_info(); - render_pass_begin_info_UI.renderPass = render_pass; - render_pass_begin_info_UI.renderArea.offset.x = 0; - render_pass_begin_info_UI.renderArea.offset.y = 0; - render_pass_begin_info_UI.renderArea.extent.width = width; - render_pass_begin_info_UI.renderArea.extent.height = height; - render_pass_begin_info_UI.clearValueCount = 2; - render_pass_begin_info_UI.pClearValues = clear_values_UI; - - VkImageSubresourceRange subresource_range = {VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1}; + render_pass_begin_info.clearValueCount = 2; + render_pass_begin_info.pClearValues = clear_values; for (size_t i = 0; i < draw_cmd_buffers.size(); ++i) { - render_pass_begin_info.framebuffer = framebuffers_nerf[i]; + render_pass_begin_info.framebuffer = framebuffers[i]; VK_CHECK(vkBeginCommandBuffer(draw_cmd_buffers[i], &command_buffer_begin_info)); vkCmdBeginRenderPass(draw_cmd_buffers[i], &render_pass_begin_info, VK_SUBPASS_CONTENTS_INLINE); - VkViewport viewport = vkb::initializers::viewport((float) view_port_width, (float) view_port_height, 0.0f, 1.0f); + VkViewport viewport = vkb::initializers::viewport(static_cast(view_port_width), static_cast(view_port_height), 0.0f, 1.0f); vkCmdSetViewport(draw_cmd_buffers[i], 0, 1, &viewport); VkRect2D scissor = vkb::initializers::rect2D(static_cast(width), static_cast(height), 0, 0); @@ -1364,13 +1233,8 @@ void MobileNerfRayQuery::build_command_buffers() VkDeviceSize offsets[1] = {0}; vkCmdDraw(draw_cmd_buffers[i], 3, 1, 0, 0); - vkCmdEndRenderPass(draw_cmd_buffers[i]); - - // Render UI - render_pass_begin_info_UI.framebuffer = framebuffers[i]; - - vkCmdBeginRenderPass(draw_cmd_buffers[i], &render_pass_begin_info_UI, VK_SUBPASS_CONTENTS_INLINE); draw_ui(draw_cmd_buffers[i]); + vkCmdEndRenderPass(draw_cmd_buffers[i]); VK_CHECK(vkEndCommandBuffer(draw_cmd_buffers[i])); diff --git a/samples/general/mobile_nerf_rayquery/mobile_nerf_rayquery.h b/samples/general/mobile_nerf_rayquery/mobile_nerf_rayquery.h index cc4474709..11c33a39e 100644 --- a/samples/general/mobile_nerf_rayquery/mobile_nerf_rayquery.h +++ b/samples/general/mobile_nerf_rayquery/mobile_nerf_rayquery.h @@ -54,7 +54,7 @@ class MobileNerfRayQuery : public ApiVulkanSample alignas(16) glm::vec3 camera_up; alignas(16) glm::vec3 camera_lookat; alignas(8) glm::vec2 img_dim; - alignas(4) float tan_half_fov; + alignas(4) float tan_half_fov{}; } global_uniform; #define WEIGHTS_0_COUNT (176) @@ -99,8 +99,8 @@ class MobileNerfRayQuery : public ApiVulkanSample struct Model { - int model_index; - int sub_model_num; + int model_index{}; + int sub_model_num{}; std::vector vertices; std::vector> indices; @@ -109,8 +109,8 @@ class MobileNerfRayQuery : public ApiVulkanSample Texture texture_input_0, texture_input_1; // Each model has its vertex buffer and index buffer. In ray query, they are storage buffers. - std::unique_ptr vertex_buffer{nullptr}; - std::unique_ptr index_buffer{nullptr}; + std::unique_ptr vertex_buffer{nullptr}; + std::unique_ptr index_buffer{nullptr}; // Each model has its BLAS std::unique_ptr bottom_level_acceleration_structure{nullptr}; @@ -119,16 +119,13 @@ class MobileNerfRayQuery : public ApiVulkanSample std::vector models; // MLPs for each model - std::vector mlp_weight_vector; - std::vector> weights_buffers; + std::vector mlp_weight_vector; + std::vector> weights_buffers; // Global uniform buffer - std::unique_ptr uniform_buffer; + std::unique_ptr uniform_buffer; - std::vector framebuffers_nerf; - VkRenderPass render_pass_nerf{VK_NULL_HANDLE}; - - std::array shader_stages; + std::array shader_stages{}; VkPipeline pipeline{VK_NULL_HANDLE}; VkPipelineLayout pipeline_layout{VK_NULL_HANDLE}; @@ -150,7 +147,7 @@ class MobileNerfRayQuery : public ApiVulkanSample // For loading mobile nerf assets map json asset_map; - int num_models; + int num_models = 0; bool combo_mode = false; bool do_rotation = false; std::vector model_path; @@ -182,9 +179,6 @@ class MobileNerfRayQuery : public ApiVulkanSample void update_uniform_buffer(); void update_weights_buffers(); - void setup_framebuffers(); - void update_render_pass(); - void create_pipeline_layout(); void create_descriptor_pool(); void create_descriptor_sets(); diff --git a/shaders/mobile_nerf_rayquery/rayquery_morpheus.frag b/shaders/mobile_nerf_rayquery/rayquery_morpheus.frag index 8896370b9..e0620ae24 100644 --- a/shaders/mobile_nerf_rayquery/rayquery_morpheus.frag +++ b/shaders/mobile_nerf_rayquery/rayquery_morpheus.frag @@ -77,12 +77,12 @@ layout(set = 0, binding = 2) uniform mlp_weights BIAS_0_COUNT + BIAS_1_COUNT + BIAS_2_COUNT)/4]; // Array of floats } weights; -layout(set = 1, binding = 0, scalar) buffer Vertices +layout(set = 1, binding = 0, scalar) readonly buffer Vertices { Vertex vertices[]; } vertices_set[]; -layout(set = 2, binding = 0, scalar) buffer Indices +layout(set = 2, binding = 0, scalar) readonly buffer Indices { uint indices[]; } indices_set[]; diff --git a/shaders/mobile_nerf_rayquery/rayquery_morpheus_combo.frag b/shaders/mobile_nerf_rayquery/rayquery_morpheus_combo.frag index 8f20123fa..d069ca929 100644 --- a/shaders/mobile_nerf_rayquery/rayquery_morpheus_combo.frag +++ b/shaders/mobile_nerf_rayquery/rayquery_morpheus_combo.frag @@ -77,12 +77,12 @@ layout(set = 0, binding = 2) uniform mlp_weights BIAS_0_COUNT + BIAS_1_COUNT + BIAS_2_COUNT)/4]; // Array of floats } weights_arr[]; -layout(set = 1, binding = 0, scalar) buffer Vertices +layout(set = 1, binding = 0, scalar) readonly buffer Vertices { Vertex vertices[]; } vertices_set[]; -layout(set = 2, binding = 0, scalar) buffer Indices +layout(set = 2, binding = 0, scalar) readonly buffer Indices { uint indices[]; } indices_set[]; From 00ee24a16ebbff2cd441f4d2c71350229fdc692f Mon Sep 17 00:00:00 2001 From: Rodrigo Holztrattner Date: Thu, 12 Sep 2024 08:57:16 -0600 Subject: [PATCH 06/10] Address camera movement and a few minor issues Signed-off-by: Rodrigo Holztrattner --- .../mobile_nerf_rayquery.cpp | 27 ++++++----------- .../mobile_nerf_rayquery.h | 6 ++-- .../rayquery_morpheus.frag | 30 +++++++------------ .../rayquery_morpheus_combo.frag | 30 +++++++------------ 4 files changed, 33 insertions(+), 60 deletions(-) diff --git a/samples/general/mobile_nerf_rayquery/mobile_nerf_rayquery.cpp b/samples/general/mobile_nerf_rayquery/mobile_nerf_rayquery.cpp index ca175d5d7..04172a587 100644 --- a/samples/general/mobile_nerf_rayquery/mobile_nerf_rayquery.cpp +++ b/samples/general/mobile_nerf_rayquery/mobile_nerf_rayquery.cpp @@ -360,11 +360,6 @@ void MobileNerfRayQuery::read_json_map() LOGI("Using VK_FORMAT_R32G32B32A32_SFLOAT for feature texture"); feature_map_format = VK_FORMAT_R32G32B32A32_SFLOAT; } - else if (textureType == "8bit") - { - LOGI("Using VK_FORMAT_R8G8B8A8_UNORM for feature texture"); - feature_map_format = VK_FORMAT_R8G8B8A8_UNORM; - } else { LOGW("Unrecognized feature texture type, using VK_FORMAT_R32G32B32A32_SFLOAT"); @@ -374,7 +369,7 @@ void MobileNerfRayQuery::read_json_map() // Rotation mode do_rotation = raw_asset_map["rotation"].get(); - // Read view port size. Use defualt setting (1280x720) if size is 0. + // Read view port size. Use default setting (1280x720) if size is 0. view_port_width = raw_asset_map["width"].get(); view_port_height = raw_asset_map["height"].get(); @@ -392,7 +387,7 @@ void MobileNerfRayQuery::read_json_map() } else { - LOGW("Fail to read camera position. Use defualt value."); + LOGW("Fail to read camera position. Use default value."); } // Read instancing rendering settings. @@ -445,12 +440,9 @@ void MobileNerfRayQuery::initialize_mlp_uniform_buffers(int model_index) // Record a index of the first sub-model const auto first_sub_model = models.size(); int obj_num = data["obj_num"].get(); - - // Here we know the actual number of sub models - const auto next_sub_model_index = models.size(); models.resize(models.size() + obj_num); - for (int i = next_sub_model_index; i < models.size(); i++) + for (int i = first_sub_model; i < models.size(); i++) { models[i].model_index = model_index; } @@ -626,7 +618,8 @@ void MobileNerfRayQuery::load_scene(int model_index, int sub_model_index, int mo model.vertices.resize(vertex_start_index + pts_.size()); for (size_t i = 0; i < pts_.size(); ++i) { - model.vertices[vertex_start_index + i].position = pts_[i]; + model.vertices[vertex_start_index + i].position = pts_[i]; + model.vertices[vertex_start_index + i].position.y *= -1.0f; model.vertices[vertex_start_index + i].tex_coord = glm::vec2(texcoord_[i].x, 1.0f - texcoord_[i].y); } } @@ -776,12 +769,10 @@ void MobileNerfRayQuery::update_uniform_buffer() camera.set_perspective(fov, static_cast(width) / static_cast(height), 0.01f, 200.0f); const float tan_half_fov = tan(0.5 * fov / 180.0f * 3.141592653589793f); - global_uniform.camera_position = glm::inverse(camera.matrices.view) * glm::vec4(0, 0, 0, 1); - global_uniform.camera_side = glm::vec3(camera.matrices.view[0][0], camera.matrices.view[1][0], camera.matrices.view[2][0]); - global_uniform.camera_up = glm::vec3(camera.matrices.view[0][1], camera.matrices.view[1][1], camera.matrices.view[2][1]); - global_uniform.camera_lookat = -glm::vec3(camera.matrices.view[0][2], camera.matrices.view[1][2], camera.matrices.view[2][2]); - global_uniform.img_dim = glm::vec2(width, height); - global_uniform.tan_half_fov = tan_half_fov; + global_uniform.view_inverse = glm::inverse(camera.matrices.view); + global_uniform.proj_inverse = glm::inverse(camera.matrices.perspective); + global_uniform.img_dim = glm::vec2(width, height); + global_uniform.tan_half_fov = tan_half_fov; uniform_buffer->update(&global_uniform, sizeof(GlobalUniform)); } diff --git a/samples/general/mobile_nerf_rayquery/mobile_nerf_rayquery.h b/samples/general/mobile_nerf_rayquery/mobile_nerf_rayquery.h index 11c33a39e..019ceca58 100644 --- a/samples/general/mobile_nerf_rayquery/mobile_nerf_rayquery.h +++ b/samples/general/mobile_nerf_rayquery/mobile_nerf_rayquery.h @@ -49,10 +49,8 @@ class MobileNerfRayQuery : public ApiVulkanSample private: struct GlobalUniform { - alignas(16) glm::vec3 camera_position; - alignas(16) glm::vec3 camera_side; - alignas(16) glm::vec3 camera_up; - alignas(16) glm::vec3 camera_lookat; + alignas(16) glm::mat4 view_inverse; + alignas(16) glm::mat4 proj_inverse; alignas(8) glm::vec2 img_dim; alignas(4) float tan_half_fov{}; } global_uniform; diff --git a/shaders/mobile_nerf_rayquery/rayquery_morpheus.frag b/shaders/mobile_nerf_rayquery/rayquery_morpheus.frag index e0620ae24..e4e8b5ccb 100644 --- a/shaders/mobile_nerf_rayquery/rayquery_morpheus.frag +++ b/shaders/mobile_nerf_rayquery/rayquery_morpheus.frag @@ -42,10 +42,8 @@ struct Vertex struct GlobalUniform { - vec3 camera_position; - vec3 camera_side; - vec3 camera_up; - vec3 camera_lookat; + mat4 view_inverse; + mat4 proj_inverse; vec2 img_dim; float tan_half_fov; }; @@ -189,20 +187,12 @@ vec3 evaluateNetwork( vec4 f0, vec4 f1, vec4 viewdir) } vec3 CalcRayDirComp(GlobalUniform params) { - // On [0.0, 1.0] - vec2 img_sample = vec2(gl_FragCoord.xy / params.img_dim); + const vec2 inUV = gl_FragCoord.xy / params.img_dim; + vec2 d = inUV * 2.0 - 1.0; + vec4 target = params.proj_inverse * vec4(d.x, d.y, 1, 1); + vec4 direction = params.view_inverse * vec4(normalize(target.xyz), 0); - // Transform into [-0.5, 0.5] - vec2 hom_sample = img_sample - vec2(0.5f, 0.5f); - hom_sample.y *= -1.0f; // Vertical flip so that origin is top-left - - // Transform into [-dim.x/dim.y/2, dim.x/dim.y/2] x [-0.5, 0.5] - vec2 c_sample = hom_sample * params.img_dim / params.img_dim.y; - - // Calculate direction to image plane - const vec3 rayDir = vec3(params.camera_lookat * 0.5 / params.tan_half_fov + c_sample.x * params.camera_side + c_sample.y * params.camera_up); - - return normalize(rayDir); + return normalize(direction.xyz); } ////////////////////////////////////////////////////////////// @@ -230,6 +220,7 @@ vec3 Convert_sRGB_ToLinear(vec3 value) void main(void) { vec3 rayDirection = CalcRayDirComp(params); + vec3 camPosition = (params.view_inverse * vec4(0,0,0,1)).xyz; // initialize a ray query object rayQueryEXT rayQuery; @@ -243,7 +234,7 @@ void main(void) topLevelAS, // Top-level acceleration structure rayFlags, // Ray flags, treat all geometry as non-opaque 0xFF, // 8-bit instance mask, trace against all instances - params.camera_position, // Ray origin + camPosition, // Ray origin tmin, // Minimum t-value rayDirection, // Ray direction tmax); // Maximum t-value @@ -299,6 +290,7 @@ void main(void) void main(void) { vec3 rayDirection = CalcRayDirComp(params); + vec3 camPosition = (params.view_inverse * vec4(0,0,0,1)).xyz; // initialize a ray query object rayQueryEXT rayQuery; @@ -309,7 +301,7 @@ void main(void) topLevelAS, // Top-level acceleration structure rayFlags, // Ray flags, treat all geometry as opaque 0xFF, // 8-bit instance mask, trace against all instances - params.camera_position, // Ray origin + camPosition, // Ray origin tmin, // Minimum t-value rayDirection, // Ray direction tmax); // Maximum t-value diff --git a/shaders/mobile_nerf_rayquery/rayquery_morpheus_combo.frag b/shaders/mobile_nerf_rayquery/rayquery_morpheus_combo.frag index d069ca929..0792a07dc 100644 --- a/shaders/mobile_nerf_rayquery/rayquery_morpheus_combo.frag +++ b/shaders/mobile_nerf_rayquery/rayquery_morpheus_combo.frag @@ -42,10 +42,8 @@ struct Vertex struct GlobalUniform { - vec3 camera_position; - vec3 camera_side; - vec3 camera_up; - vec3 camera_lookat; + mat4 view_inverse; + mat4 proj_inverse; vec2 img_dim; float tan_half_fov; }; @@ -189,20 +187,12 @@ vec3 evaluateNetwork( vec4 f0, vec4 f1, vec4 viewdir, uint idx) } vec3 CalcRayDirComp(GlobalUniform params) { - // On [0.0, 1.0] - vec2 img_sample = vec2(gl_FragCoord.xy / params.img_dim); + const vec2 inUV = gl_FragCoord.xy / params.img_dim; + vec2 d = inUV * 2.0 - 1.0; + vec4 target = params.proj_inverse * vec4(d.x, d.y, 1, 1); + vec4 direction = params.view_inverse * vec4(normalize(target.xyz), 0); - // Transform into [-0.5, 0.5] - vec2 hom_sample = img_sample - vec2(0.5f, 0.5f); - hom_sample.y *= -1.0f; // Vertical flip so that origin is top-left - - // Transform into [-dim.x/dim.y/2, dim.x/dim.y/2] x [-0.5, 0.5] - vec2 c_sample = hom_sample * params.img_dim / params.img_dim.y; - - // Calculate direction to image plane - const vec3 rayDir = vec3(params.camera_lookat * 0.5 / params.tan_half_fov + c_sample.x * params.camera_side + c_sample.y * params.camera_up); - - return normalize(rayDir); + return normalize(direction.xyz); } ////////////////////////////////////////////////////////////// @@ -230,6 +220,7 @@ vec3 Convert_sRGB_ToLinear(vec3 value) void main(void) { vec3 rayDirection = CalcRayDirComp(params); + vec3 camPosition = (params.view_inverse * vec4(0,0,0,1)).xyz; // initialize a ray query object rayQueryEXT rayQuery; @@ -243,7 +234,7 @@ void main(void) topLevelAS, // Top-level acceleration structure rayFlags, // Ray flags, treat all geometry as non-opaque 0xFF, // 8-bit instance mask, trace against all instances - params.camera_position, // Ray origin + camPosition, // Ray origin tmin, // Minimum t-value rayDirection, // Ray direction tmax); // Maximum t-value @@ -299,6 +290,7 @@ void main(void) void main(void) { vec3 rayDirection = CalcRayDirComp(params); + vec3 camPosition = (params.view_inverse * vec4(0,0,0,1)).xyz; // initialize a ray query object rayQueryEXT rayQuery; @@ -309,7 +301,7 @@ void main(void) topLevelAS, // Top-level acceleration structure rayFlags, // Ray flags, treat all geometry as opaque 0xFF, // 8-bit instance mask, trace against all instances - params.camera_position, // Ray origin + camPosition, // Ray origin tmin, // Minimum t-value rayDirection, // Ray direction tmax); // Maximum t-value From ad78e09a419621be160fe52aabd0332f1099662b Mon Sep 17 00:00:00 2001 From: Rodrigo Holztrattner Date: Mon, 30 Sep 2024 14:38:21 -0600 Subject: [PATCH 07/10] Update GPU feature request logic and minor change at readme file (Adreno -> mobile devices that support ray queries) --- .../general/mobile_nerf_rayquery/README.adoc | 2 +- .../mobile_nerf_rayquery.cpp | 34 +++++-------------- 2 files changed, 10 insertions(+), 26 deletions(-) diff --git a/samples/general/mobile_nerf_rayquery/README.adoc b/samples/general/mobile_nerf_rayquery/README.adoc index 9f382f5e4..491e0e86f 100644 --- a/samples/general/mobile_nerf_rayquery/README.adoc +++ b/samples/general/mobile_nerf_rayquery/README.adoc @@ -26,7 +26,7 @@ endif::[] NeRF is a new 3D representation method in Computer Vision that creates images of a 3D scene using several 2D pictures taken from different viewpoints. This method constructs a representation of the 3D volume. Various adaptations of NeRF target different use cases, including MobileNeRF, which focuses on rendering NeRF efficiently on mobile phones by leveraging existing traditional graphic hardware. -This version enhances the https://github.com/KhronosGroup/Vulkan-Samples/tree/main/samples/general/mobile_nerf[previous MobileNeRF implementation] by using the Vulkan Ray Query feature, which leverages the hardware ray tracing capabilities of the Adreno GPU. +This version enhances the https://github.com/KhronosGroup/Vulkan-Samples/tree/main/samples/general/mobile_nerf[previous MobileNeRF implementation] by using the Vulkan Ray Query feature, which leverages the hardware ray tracing capabilities of mobile GPUs that support it. This enhancement greatly boosts performance in most use cases. Additionally, the Vulkan API provides great flexibility for modifying and optimizing the rendering pipeline and shaders, enabling more functionalities while delivering optimal performance. == Notes diff --git a/samples/general/mobile_nerf_rayquery/mobile_nerf_rayquery.cpp b/samples/general/mobile_nerf_rayquery/mobile_nerf_rayquery.cpp index 04172a587..fe8771c3e 100644 --- a/samples/general/mobile_nerf_rayquery/mobile_nerf_rayquery.cpp +++ b/samples/general/mobile_nerf_rayquery/mobile_nerf_rayquery.cpp @@ -28,21 +28,6 @@ namespace { constexpr uint32_t MIN_THREAD_COUNT = 1; -struct RequestFeature -{ - vkb::PhysicalDevice &gpu; - explicit RequestFeature(vkb::PhysicalDevice &gpu) : - gpu(gpu) - {} - - template - RequestFeature &request(VkStructureType s_type, VkBool32 T::*member) - { - auto &member_feature = gpu.request_extension_features(s_type); - member_feature.*member = VK_TRUE; - return *this; - } -}; template struct CopyBuffer @@ -198,15 +183,14 @@ bool MobileNerfRayQuery::prepare(const vkb::ApplicationOptions &options) void MobileNerfRayQuery::request_gpu_features(vkb::PhysicalDevice &gpu) { - RequestFeature(gpu) - .request(VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_BUFFER_DEVICE_ADDRESS_FEATURES, &VkPhysicalDeviceBufferDeviceAddressFeatures::bufferDeviceAddress) - .request(VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ACCELERATION_STRUCTURE_FEATURES_KHR, &VkPhysicalDeviceAccelerationStructureFeaturesKHR::accelerationStructure) - .request(VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_RAY_QUERY_FEATURES_KHR, &VkPhysicalDeviceRayQueryFeaturesKHR::rayQuery) - .request(VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_INDEXING_FEATURES_EXT, &VkPhysicalDeviceDescriptorIndexingFeaturesEXT::shaderSampledImageArrayNonUniformIndexing) - .request(VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_INDEXING_FEATURES_EXT, &VkPhysicalDeviceDescriptorIndexingFeaturesEXT::shaderStorageBufferArrayNonUniformIndexing) - .request(VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_INDEXING_FEATURES_EXT, &VkPhysicalDeviceDescriptorIndexingFeaturesEXT::runtimeDescriptorArray) - .request(VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_INDEXING_FEATURES_EXT, &VkPhysicalDeviceDescriptorIndexingFeaturesEXT::descriptorBindingVariableDescriptorCount) - .request(VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SCALAR_BLOCK_LAYOUT_FEATURES_EXT, &VkPhysicalDeviceScalarBlockLayoutFeaturesEXT::scalarBlockLayout); + REQUEST_REQUIRED_FEATURE(gpu, VkPhysicalDeviceBufferDeviceAddressFeatures, VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_BUFFER_DEVICE_ADDRESS_FEATURES, bufferDeviceAddress); + REQUEST_REQUIRED_FEATURE(gpu, VkPhysicalDeviceAccelerationStructureFeaturesKHR, VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ACCELERATION_STRUCTURE_FEATURES_KHR, accelerationStructure); + REQUEST_REQUIRED_FEATURE(gpu, VkPhysicalDeviceRayQueryFeaturesKHR, VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_RAY_QUERY_FEATURES_KHR, rayQuery); + REQUEST_REQUIRED_FEATURE(gpu, VkPhysicalDeviceDescriptorIndexingFeaturesEXT, VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_INDEXING_FEATURES_EXT, shaderSampledImageArrayNonUniformIndexing); + REQUEST_REQUIRED_FEATURE(gpu, VkPhysicalDeviceDescriptorIndexingFeaturesEXT, VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_INDEXING_FEATURES_EXT, shaderStorageBufferArrayNonUniformIndexing); + REQUEST_REQUIRED_FEATURE(gpu, VkPhysicalDeviceDescriptorIndexingFeaturesEXT, VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_INDEXING_FEATURES_EXT, runtimeDescriptorArray); + REQUEST_REQUIRED_FEATURE(gpu, VkPhysicalDeviceDescriptorIndexingFeaturesEXT, VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_INDEXING_FEATURES_EXT, descriptorBindingVariableDescriptorCount); + REQUEST_REQUIRED_FEATURE(gpu, VkPhysicalDeviceScalarBlockLayoutFeaturesEXT, VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SCALAR_BLOCK_LAYOUT_FEATURES_EXT, scalarBlockLayout); } void MobileNerfRayQuery::render(float delta_time) @@ -893,7 +877,7 @@ void MobileNerfRayQuery::create_bottom_level_acceleration_structure(int model_en model.indices.size(), model.vertices.size(), sizeof(Vertex), - 0, VK_FORMAT_R32G32B32_SFLOAT, VK_GEOMETRY_OPAQUE_BIT_KHR, + 0, VK_FORMAT_R32G32B32_SFLOAT, VK_INDEX_TYPE_UINT32, VK_GEOMETRY_OPAQUE_BIT_KHR, get_buffer_device_address(model.vertex_buffer->get_handle()), get_buffer_device_address(model.index_buffer->get_handle())); } From 2ec4f83ccf1b11b5402d0c4bbe92220d099a5eca Mon Sep 17 00:00:00 2001 From: Rodrigo Holztrattner Date: Thu, 3 Oct 2024 10:29:14 -0600 Subject: [PATCH 08/10] Fix for missing shader uniform buffer array non uniform indexing Signed-off-by: Rodrigo Holztrattner --- samples/general/mobile_nerf_rayquery/mobile_nerf_rayquery.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/samples/general/mobile_nerf_rayquery/mobile_nerf_rayquery.cpp b/samples/general/mobile_nerf_rayquery/mobile_nerf_rayquery.cpp index fe8771c3e..d56144255 100644 --- a/samples/general/mobile_nerf_rayquery/mobile_nerf_rayquery.cpp +++ b/samples/general/mobile_nerf_rayquery/mobile_nerf_rayquery.cpp @@ -183,9 +183,10 @@ bool MobileNerfRayQuery::prepare(const vkb::ApplicationOptions &options) void MobileNerfRayQuery::request_gpu_features(vkb::PhysicalDevice &gpu) { - REQUEST_REQUIRED_FEATURE(gpu, VkPhysicalDeviceBufferDeviceAddressFeatures, VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_BUFFER_DEVICE_ADDRESS_FEATURES, bufferDeviceAddress); + REQUEST_REQUIRED_FEATURE(gpu, VkPhysicalDeviceBufferDeviceAddressFeaturesKHR, VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_BUFFER_DEVICE_ADDRESS_FEATURES, bufferDeviceAddress); REQUEST_REQUIRED_FEATURE(gpu, VkPhysicalDeviceAccelerationStructureFeaturesKHR, VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ACCELERATION_STRUCTURE_FEATURES_KHR, accelerationStructure); REQUEST_REQUIRED_FEATURE(gpu, VkPhysicalDeviceRayQueryFeaturesKHR, VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_RAY_QUERY_FEATURES_KHR, rayQuery); + REQUEST_REQUIRED_FEATURE(gpu, VkPhysicalDeviceDescriptorIndexingFeaturesEXT, VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_INDEXING_FEATURES_EXT, shaderUniformBufferArrayNonUniformIndexing); REQUEST_REQUIRED_FEATURE(gpu, VkPhysicalDeviceDescriptorIndexingFeaturesEXT, VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_INDEXING_FEATURES_EXT, shaderSampledImageArrayNonUniformIndexing); REQUEST_REQUIRED_FEATURE(gpu, VkPhysicalDeviceDescriptorIndexingFeaturesEXT, VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_INDEXING_FEATURES_EXT, shaderStorageBufferArrayNonUniformIndexing); REQUEST_REQUIRED_FEATURE(gpu, VkPhysicalDeviceDescriptorIndexingFeaturesEXT, VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_INDEXING_FEATURES_EXT, runtimeDescriptorArray); From 3cfb51ff4f8825e5d59ae1a3610abf94e20fe450 Mon Sep 17 00:00:00 2001 From: Rodrigo Holztrattner Date: Thu, 3 Oct 2024 10:31:41 -0600 Subject: [PATCH 09/10] Remove unused variable Signed-off-by: Rodrigo Holztrattner --- samples/general/mobile_nerf_rayquery/mobile_nerf_rayquery.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/samples/general/mobile_nerf_rayquery/mobile_nerf_rayquery.cpp b/samples/general/mobile_nerf_rayquery/mobile_nerf_rayquery.cpp index d56144255..85ef0194e 100644 --- a/samples/general/mobile_nerf_rayquery/mobile_nerf_rayquery.cpp +++ b/samples/general/mobile_nerf_rayquery/mobile_nerf_rayquery.cpp @@ -27,8 +27,6 @@ namespace { -constexpr uint32_t MIN_THREAD_COUNT = 1; - template struct CopyBuffer { From c0ebea8b27b0c5de628b6baf9ecddc5a444b22c2 Mon Sep 17 00:00:00 2001 From: Rodrigo Holztrattner Date: Mon, 4 Nov 2024 09:27:42 -0700 Subject: [PATCH 10/10] Change default model to USE_OPAQUE Signed-off-by: Rodrigo Holztrattner --- shaders/mobile_nerf_rayquery/rayquery_morpheus.frag | 5 ++--- shaders/mobile_nerf_rayquery/rayquery_morpheus_combo.frag | 5 ++--- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/shaders/mobile_nerf_rayquery/rayquery_morpheus.frag b/shaders/mobile_nerf_rayquery/rayquery_morpheus.frag index e4e8b5ccb..5d9d40fec 100644 --- a/shaders/mobile_nerf_rayquery/rayquery_morpheus.frag +++ b/shaders/mobile_nerf_rayquery/rayquery_morpheus.frag @@ -30,9 +30,8 @@ #extension GL_EXT_ray_query : enable #extension GL_EXT_nonuniform_qualifier : enable -// Uncomment this to use opaque mode -// Should be faster, but would get VK_ERROR_DEVICE_LOST on some AMD devices -// #define USE_OPAQUE +// Opaque mode is the fastest, but could result into VK_ERROR_DEVICE_LOST on some AMD devices +#define USE_OPAQUE struct Vertex { diff --git a/shaders/mobile_nerf_rayquery/rayquery_morpheus_combo.frag b/shaders/mobile_nerf_rayquery/rayquery_morpheus_combo.frag index 0792a07dc..a7393ab3d 100644 --- a/shaders/mobile_nerf_rayquery/rayquery_morpheus_combo.frag +++ b/shaders/mobile_nerf_rayquery/rayquery_morpheus_combo.frag @@ -30,9 +30,8 @@ #extension GL_EXT_ray_query : enable #extension GL_EXT_nonuniform_qualifier : enable -// Uncomment this to use opaque mode -// Should be faster, but would get VK_ERROR_DEVICE_LOST on some AMD devices -// #define USE_OPAQUE +// Opaque mode is the fastest, but could result into VK_ERROR_DEVICE_LOST on some AMD devices +#define USE_OPAQUE struct Vertex {