diff --git a/src/kernel.cpp b/src/kernel.cpp index cf0ba472..c4c2f9b1 100644 --- a/src/kernel.cpp +++ b/src/kernel.cpp @@ -14,6 +14,8 @@ #include +#include "clspv/Sampler.h" + #include "kernel.hpp" #include "memory.hpp" @@ -44,6 +46,10 @@ cl_int cvk_kernel::init() { m_image_metadata = md; } + if (const auto* md = m_entry_point->sampler_metadata()) { + m_sampler_metadata = md; + } + // Init argument values m_argument_values = cvk_kernel_argument_values::create(m_entry_point); if (m_argument_values == nullptr) { @@ -100,6 +106,26 @@ void cvk_kernel::set_image_metadata(cl_uint index, const void* image) { } } +void cvk_kernel::set_sampler_metadata(cl_uint index, const void* sampler) { + if (!m_sampler_metadata) { + return; + } + auto md = m_sampler_metadata->find(index); + if (md != m_sampler_metadata->end()) { + auto apisampler = *reinterpret_cast(sampler); + auto offset = md->second; + auto sampler = icd_downcast(apisampler); + uint32_t sampler_mask = (sampler->normalized_coords() + ? clspv::CLK_NORMALIZED_COORDS_TRUE + : clspv::CLK_NORMALIZED_COORDS_FALSE) | + (sampler->filter_mode() == CL_FILTER_NEAREST + ? clspv::CLK_FILTER_NEAREST + : clspv::CLK_FILTER_LINEAR); + m_argument_values->set_pod_data(offset, sizeof(sampler_mask), + &sampler_mask); + } +} + cl_int cvk_kernel::set_arg(cl_uint index, size_t size, const void* value) { std::lock_guard lock(m_lock); @@ -123,6 +149,10 @@ cl_int cvk_kernel::set_arg(cl_uint index, size_t size, const void* value) { set_image_metadata(index, value); } + if (arg.kind == kernel_argument_kind::sampler) { + set_sampler_metadata(index, value); + } + return ret; } @@ -264,7 +294,17 @@ bool cvk_kernel_argument_values::setup_descriptor_sets() { } case kernel_argument_kind::sampler: { auto clsampler = static_cast(get_arg_value(arg)); - auto sampler = clsampler->vulkan_sampler(); + bool sampler_norm = false; + if (auto md = m_entry_point->sampler_metadata()) { + sampler_norm = md->find(i) != md->end(); + } + auto sampler = sampler_norm && !clsampler->normalized_coords() + ? clsampler->vulkan_sampler_norm() + : clsampler->vulkan_sampler(); + if (sampler == VK_NULL_HANDLE) { + cvk_error_fn("Could not set descriptor for sampler"); + return false; + } cvk_debug_fn("sampler %p @ set = %u, binding = %u", sampler, arg.descriptorSet, arg.binding); diff --git a/src/kernel.hpp b/src/kernel.hpp index db19e81d..cabc3046 100644 --- a/src/kernel.hpp +++ b/src/kernel.hpp @@ -31,7 +31,8 @@ struct cvk_kernel : public _cl_kernel, api_object { cvk_kernel(cvk_program* program, const char* name) : api_object(program->context()), m_program(program), - m_entry_point(nullptr), m_name(name), m_image_metadata(nullptr) {} + m_entry_point(nullptr), m_name(name), m_sampler_metadata(nullptr), + m_image_metadata(nullptr) {} CHECK_RETURN cl_int init(); std::unique_ptr clone(cl_int* errcode_ret) const; @@ -42,10 +43,16 @@ struct cvk_kernel : public _cl_kernel, api_object { return m_argument_values; } + const kernel_sampler_metadata_map* get_sampler_metadata() const { + return m_sampler_metadata; + } + const kernel_image_metadata_map* get_image_metadata() const { return m_image_metadata; } + void set_sampler_metadata(cl_uint index, const void* sampler); + void set_image_metadata(cl_uint index, const void* image); CHECK_RETURN cl_int set_arg(cl_uint index, size_t size, const void* value); @@ -158,6 +165,7 @@ struct cvk_kernel : public _cl_kernel, api_object { std::string m_name; std::vector m_args; std::shared_ptr m_argument_values; + const kernel_sampler_metadata_map *m_sampler_metadata; const kernel_image_metadata_map* m_image_metadata; }; @@ -237,7 +245,8 @@ struct cvk_kernel_argument_values { } if (m_entry_point->has_pod_arguments() || - m_entry_point->has_image_metadata()) { + m_entry_point->has_image_metadata() || + m_entry_point->has_sampler_metadata()) { // TODO(#101): host out-of-memory errors are currently unhandled. auto buffer = std::make_unique>( m_entry_point->pod_buffer_size()); diff --git a/src/memory.cpp b/src/memory.cpp index 5688e45b..67d0b136 100644 --- a/src/memory.cpp +++ b/src/memory.cpp @@ -158,7 +158,7 @@ cvk_sampler::create(cvk_context* context, bool normalized_coords, return sampler.release(); } -bool cvk_sampler::init() { +bool cvk_sampler::init(bool norm) { auto vkdev = context()->device()->vulkan_device(); // Translate addressing mode @@ -199,7 +199,7 @@ bool cvk_sampler::init() { // Translate coordinate type VkBool32 unnormalized_coordinates; - if (m_normalized_coords) { + if (m_normalized_coords || norm) { unnormalized_coordinates = VK_FALSE; } else { unnormalized_coordinates = VK_TRUE; @@ -235,7 +235,8 @@ bool cvk_sampler::init() { unnormalized_coordinates, // unnormalizedCoordinates }; - auto res = vkCreateSampler(vkdev, &create_info, nullptr, &m_sampler); + VkSampler* sampler = norm ? &m_sampler_norm : &m_sampler; + auto res = vkCreateSampler(vkdev, &create_info, nullptr, sampler); return (res == VK_SUCCESS); } diff --git a/src/memory.hpp b/src/memory.hpp index 63817558..ebe60cfb 100644 --- a/src/memory.hpp +++ b/src/memory.hpp @@ -428,13 +428,18 @@ struct cvk_sampler : public _cl_sampler, api_object { std::vector&& properties) : api_object(context), m_normalized_coords(normalized_coords), m_addressing_mode(addressing_mode), m_filter_mode(filter_mode), - m_properties(std::move(properties)), m_sampler(VK_NULL_HANDLE) {} + m_properties(std::move(properties)), m_sampler(VK_NULL_HANDLE), + m_sampler_norm(VK_NULL_HANDLE) {} ~cvk_sampler() { if (m_sampler != VK_NULL_HANDLE) { auto vkdev = context()->device()->vulkan_device(); vkDestroySampler(vkdev, m_sampler, nullptr); } + if (m_sampler_norm != VK_NULL_HANDLE) { + auto vkdev = context()->device()->vulkan_device(); + vkDestroySampler(vkdev, m_sampler_norm, nullptr); + } } static cvk_sampler* create(cvk_context* context, bool normalized_coords, @@ -453,17 +458,26 @@ struct cvk_sampler : public _cl_sampler, api_object { cl_addressing_mode addressing_mode() const { return m_addressing_mode; } cl_filter_mode filter_mode() const { return m_filter_mode; } VkSampler vulkan_sampler() const { return m_sampler; } + VkSampler vulkan_sampler_norm() { + if (m_sampler_norm == VK_NULL_HANDLE) { + if (!init(true)) { + return VK_NULL_HANDLE; + } + } + return m_sampler_norm; + } const std::vector& properties() const { return m_properties; } private: - bool init(); + bool init(bool norm = false); bool m_normalized_coords; cl_addressing_mode m_addressing_mode; cl_filter_mode m_filter_mode; const std::vector m_properties; VkSampler m_sampler; + VkSampler m_sampler_norm; }; static inline cvk_sampler* icd_downcast(cl_sampler sampler) { diff --git a/src/program.cpp b/src/program.cpp index b0390fab..0a781976 100644 --- a/src/program.cpp +++ b/src/program.cpp @@ -151,6 +151,8 @@ spv_result_t parse_reflection(void* user_data, return pushconstant::module_constants_pointer; case NonSemanticClspvReflectionPrintfBufferPointerPushConstant: return pushconstant::printf_buffer_pointer; + case NonSemanticClspvReflectionNormalizedSamplerMaskPushConstant: + return pushconstant::normalized_sampler_mask; default: cvk_error_fn("Unhandled reflection instruction for push constant"); break; @@ -208,6 +210,17 @@ spv_result_t parse_reflection(void* user_data, parse_data->arg_infos[inst->result_id] = info; break; } + case NonSemanticClspvReflectionNormalizedSamplerMaskPushConstant: { + auto kernel = parse_data->strings[inst->words[5]]; + auto ordinal = parse_data->constants[inst->words[6]]; + auto offset = parse_data->constants[inst->words[7]]; + auto size = parse_data->constants[inst->words[8]]; + parse_data->binary->add_sampler_metadata(kernel, ordinal, + offset); + auto pc = inst_to_push_constant(ext_inst); + parse_data->binary->add_push_constant(pc, {offset, size}); + break; + } case NonSemanticClspvReflectionImageArgumentInfoChannelOrderPushConstant: { auto kernel = parse_data->strings[inst->words[5]]; auto ordinal = parse_data->constants[inst->words[6]]; @@ -1668,8 +1681,9 @@ cvk_entry_point::cvk_entry_point(VkDevice dev, cvk_program* program, : m_device(dev), m_context(program->context()), m_program(program), m_name(name), m_pod_descriptor_type(VK_DESCRIPTOR_TYPE_MAX_ENUM), m_pod_buffer_size(0u), m_has_pod_arguments(false), - m_has_pod_buffer_arguments(false), m_image_metadata(nullptr), - m_descriptor_pool(VK_NULL_HANDLE), m_pipeline_layout(VK_NULL_HANDLE) {} + m_has_pod_buffer_arguments(false), m_sampler_metadata(nullptr), + m_image_metadata(nullptr), m_descriptor_pool(VK_NULL_HANDLE), + m_pipeline_layout(VK_NULL_HANDLE) {} cvk_entry_point* cvk_program::get_entry_point(std::string& name, cl_int* errcode_ret) { @@ -1883,6 +1897,11 @@ cl_int cvk_entry_point::init() { m_image_metadata = md; } + // Get the sampler metadata for this entry point + if (auto* md = m_program->sampler_metadata(m_name)) { + m_sampler_metadata = md; + } + // Get a pointer to the arguments from the program auto args = m_program->args_for_kernel(m_name); @@ -1963,32 +1982,49 @@ cl_int cvk_entry_point::init() { m_pod_buffer_size = round_up(m_pod_buffer_size, 4); } - // Take the size of image metadata into account for the pod buffer size - if (m_image_metadata) { - // Find how big the POD buffer should be + // Take the size of image & sampler metadata into account for the pod buffer + // size + { uint32_t max_offset = 0; - for (const auto& md : *m_image_metadata) { - auto order_offset = md.second.order_offset; - auto data_type_offset = md.second.data_type_offset; - if (md.second.has_valid_order()) { - max_offset = std::max(order_offset, max_offset); - push_constant_range.offset = - std::min(order_offset, push_constant_range.offset); - if (order_offset + sizeof(uint32_t) > - push_constant_range.offset + push_constant_range.size) { - push_constant_range.size = order_offset + sizeof(uint32_t) - - push_constant_range.offset; + if (m_image_metadata) { + // Find how big the POD buffer should be + for (const auto& md : *m_image_metadata) { + auto order_offset = md.second.order_offset; + auto data_type_offset = md.second.data_type_offset; + if (md.second.has_valid_order()) { + max_offset = std::max(order_offset, max_offset); + push_constant_range.offset = + std::min(order_offset, push_constant_range.offset); + if (order_offset + sizeof(uint32_t) > + push_constant_range.offset + push_constant_range.size) { + push_constant_range.size = order_offset + + sizeof(uint32_t) - + push_constant_range.offset; + } + } + if (md.second.has_valid_data_type()) { + max_offset = std::max(data_type_offset, max_offset); + push_constant_range.offset = + std::min(data_type_offset, push_constant_range.offset); + if (data_type_offset + sizeof(uint32_t) > + push_constant_range.offset + push_constant_range.size) { + push_constant_range.size = data_type_offset + + sizeof(uint32_t) - + push_constant_range.offset; + } } } - if (md.second.has_valid_data_type()) { - max_offset = std::max(data_type_offset, max_offset); + } + if (m_sampler_metadata) { + for (const auto& md : *m_sampler_metadata) { + auto offset = md.second; + max_offset = std::max(offset, max_offset); push_constant_range.offset = - std::min(data_type_offset, push_constant_range.offset); - if (data_type_offset + sizeof(uint32_t) > + std::min(offset, push_constant_range.offset); + if (offset + sizeof(uint32_t) > push_constant_range.offset + push_constant_range.size) { - push_constant_range.size = data_type_offset + - sizeof(uint32_t) - - push_constant_range.offset; + push_constant_range.size = + offset + sizeof(uint32_t) - push_constant_range.offset; } } } diff --git a/src/program.hpp b/src/program.hpp index 8df31c7e..72a26fd9 100644 --- a/src/program.hpp +++ b/src/program.hpp @@ -126,6 +126,7 @@ enum class pushconstant image_metadata, module_constants_pointer, printf_buffer_pointer, + normalized_sampler_mask, }; struct pushconstant_desc { @@ -216,6 +217,10 @@ using kernel_image_metadata_map = using image_metadata_map = std::unordered_map; +using kernel_sampler_metadata_map = std::unordered_map; +using sampler_metadata_map = + std::unordered_map; + class spir_binary { using kernels_arguments_map = @@ -247,6 +252,9 @@ class spir_binary { CHECK_RETURN bool validate(const spirv_validation_options&) const; size_t num_kernels() const { return m_dmaps.size(); } const kernels_arguments_map& kernels_arguments() const { return m_dmaps; } + const sampler_metadata_map& sampler_metadata() const { + return m_sampler_metadata; + } const image_metadata_map& image_metadata() const { return m_image_metadata; } @@ -338,6 +346,11 @@ class spir_binary { m_constant_data_buffer.reset(new constant_data_buffer_info(info)); } + void add_sampler_metadata(const std::string& name, uint32_t ordinal, + uint32_t offset) { + m_sampler_metadata[name][ordinal] = offset; + } + void add_image_channel_order_metadata(const std::string& name, uint32_t ordinal, uint32_t offset) { m_image_metadata[name][ordinal].set_order(offset); @@ -372,6 +385,7 @@ class spir_binary { std::vector m_literal_samplers; std::unordered_map m_push_constants; std::unordered_map m_spec_constants; + sampler_metadata_map m_sampler_metadata; image_metadata_map m_image_metadata; std::unordered_map m_printf_descriptors; printf_buffer_desc_info m_printf_buffer_info; @@ -438,6 +452,10 @@ class cvk_entry_point { const std::vector& args() const { return m_args; } + const kernel_sampler_metadata_map* sampler_metadata() const { + return m_sampler_metadata; + } + const kernel_image_metadata_map* image_metadata() const { return m_image_metadata; } @@ -446,6 +464,8 @@ class cvk_entry_point { bool has_pod_buffer_arguments() const { return m_has_pod_buffer_arguments; } + bool has_sampler_metadata() const { return m_sampler_metadata != nullptr; } + bool has_image_metadata() const { return m_image_metadata != nullptr; } uint32_t pod_buffer_size() const { return m_pod_buffer_size; } @@ -474,6 +494,7 @@ class cvk_entry_point { bool m_has_pod_arguments; bool m_has_pod_buffer_arguments; std::vector m_args; + const kernel_sampler_metadata_map* m_sampler_metadata; const kernel_image_metadata_map* m_image_metadata; uint32_t m_num_resource_slots; VkDescriptorPool m_descriptor_pool; @@ -660,6 +681,15 @@ struct cvk_program : public _cl_program, api_object { } } + const kernel_sampler_metadata_map* sampler_metadata(std::string& name) { + auto const& md = m_binary.sampler_metadata().find(name); + if (md != m_binary.sampler_metadata().end()) { + return &md->second; + } else { + return nullptr; + } + } + const kernel_image_metadata_map* image_metadata(std::string& name) { auto const& md = m_binary.image_metadata().find(name); if (md != m_binary.image_metadata().end()) { diff --git a/src/queue.cpp b/src/queue.cpp index 2f505b2c..80f79f3d 100644 --- a/src/queue.cpp +++ b/src/queue.cpp @@ -631,6 +631,14 @@ cl_int cvk_command_kernel::update_global_push_constants( } } } + if (const auto* md = m_kernel->get_sampler_metadata()) { + for (const auto& md : *md) { + auto offset = md.second; + image_metadata_pc_start = std::min(image_metadata_pc_start, offset); + image_metadata_pc_end = std::max( + image_metadata_pc_end, offset + (uint32_t)sizeof(uint32_t)); + } + } if (image_metadata_pc_start < image_metadata_pc_end) { uint32_t offset = image_metadata_pc_start & ~0x3U; uint32_t size = round_up(image_metadata_pc_end - offset, 4); diff --git a/tests/api/images.cpp b/tests/api/images.cpp index 1c1427f4..aefd6a9f 100644 --- a/tests/api/images.cpp +++ b/tests/api/images.cpp @@ -594,3 +594,53 @@ kernel void test(global uint* dst, uint magic, image2d_t read_only image, uint o (dst[2] == (offset + magic))); } } + +TEST_F(WithCommandQueue, ReadImage3DWithUnormSampler) { + const size_t sizes[3] = {7, 7, 7}; + const unsigned nb_elem = sizes[0] * sizes[1] * sizes[2]; + cl_uint input[nb_elem]; + cl_uint output[nb_elem]; + srand(nb_elem); + for (unsigned i = 0; i < nb_elem; i++) { + input[i] = rand(); + } + + const cl_image_desc desc = {CL_MEM_OBJECT_IMAGE3D, + sizes[0], + sizes[1], + sizes[2], + 0, + 0, + 0, + 0, + 0, + nullptr}; + const cl_image_format format = {CL_R, CL_UNSIGNED_INT32}; + auto image = CreateImage(CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, &format, + &desc, input); + auto dst_buffer = CreateBuffer(CL_MEM_WRITE_ONLY, sizeof(output), nullptr); + auto sampler = CreateSampler(CL_FALSE, CL_ADDRESS_NONE, CL_FILTER_NEAREST); + + const char* source = R"( +kernel void test(global uint* dst, read_only image3d_t img, sampler_t sampler) +{ + unsigned x = get_global_id(0); + unsigned y = get_global_id(1); + unsigned z = get_global_id(2); + unsigned offset = x + get_image_width(img) * (y + get_image_height(img) * z); + dst[offset] = read_imageui(img, sampler, (int4)(x, y, z, 0))[0]; +} +)"; + + auto kernel = CreateKernel(source, "test"); + SetKernelArg(kernel, 0, dst_buffer); + SetKernelArg(kernel, 1, image); + SetKernelArg(kernel, 2, sampler); + + EnqueueNDRangeKernel(kernel, 3, nullptr, sizes, nullptr); + EnqueueReadBuffer(dst_buffer, CL_TRUE, 0, sizeof(output), output); + + for (unsigned i = 0; i < nb_elem; i++) { + EXPECT_TRUE(input[i] == output[i]); + } +}