From 6b35b65923933e6396ae61322ce2e9b0772eea4a Mon Sep 17 00:00:00 2001 From: "Artem M. Chirkin" <9253178+achirkin@users.noreply.github.com> Date: Wed, 6 Nov 2024 08:24:44 +0100 Subject: [PATCH 01/15] CAGRA tech debt: distance descriptor and workspace memory (#436) This PR introduces two changes: 1. Refactor `dataset_descriptor_host` to pass and cache it by value while keeping the state in a thread-safe object in a shared pointers. Before this, the descriptor host itself was kept in shared pointer in LRU cache and was passed by reference; as a result, it could in theory die due to cache eviction while still being used via references to it. 2. Adjust the temporary buffers to always use the workspace resource in all CAGRA algo implementations (as of now, only SINGLE_CTA algo does this; the PR expands the change to MULTI_CTA and MULTI_KERNEL). Both of the changes are required for effective use of stream-ordered dynamic batching https://github.com/rapidsai/cuvs/pull/261 (1. fixes crashes and 2. fixes thread-blocking behavior). Authors: - Artem M. Chirkin (https://github.com/achirkin) Approvers: - Corey J. Nolet (https://github.com/cjnolet) URL: https://github.com/rapidsai/cuvs/pull/436 --- .../neighbors/detail/cagra/cagra_search.cuh | 4 +- .../detail/cagra/compute_distance.hpp | 77 +++++++++++++------ cpp/src/neighbors/detail/cagra/factory.cuh | 20 ++--- .../detail/cagra/search_multi_cta.cuh | 12 +-- .../detail/cagra/search_multi_kernel.cuh | 53 +++++++------ .../neighbors/detail/cagra/search_plan.cuh | 2 +- 6 files changed, 100 insertions(+), 68 deletions(-) diff --git a/cpp/src/neighbors/detail/cagra/cagra_search.cuh b/cpp/src/neighbors/detail/cagra/cagra_search.cuh index 95c158675..5778d85a6 100644 --- a/cpp/src/neighbors/detail/cagra/cagra_search.cuh +++ b/cpp/src/neighbors/detail/cagra/cagra_search.cuh @@ -151,7 +151,7 @@ void search_main(raft::resources const& res, if (auto* strided_dset = dynamic_cast*>(&index.data()); strided_dset != nullptr) { // Search using a plain (strided) row-major dataset - auto& desc = dataset_descriptor_init_with_cache( + auto desc = dataset_descriptor_init_with_cache( res, params, *strided_dset, index.metric()); search_main_core( res, params, desc, graph_internal, queries, neighbors, distances, sample_filter); @@ -161,7 +161,7 @@ void search_main(raft::resources const& res, RAFT_FAIL("FP32 VPQ dataset support is coming soon"); } else if (auto* vpq_dset = dynamic_cast*>(&index.data()); vpq_dset != nullptr) { - auto& desc = dataset_descriptor_init_with_cache( + auto desc = dataset_descriptor_init_with_cache( res, params, *vpq_dset, index.metric()); search_main_core( res, params, desc, graph_internal, queries, neighbors, distances, sample_filter); diff --git a/cpp/src/neighbors/detail/cagra/compute_distance.hpp b/cpp/src/neighbors/detail/cagra/compute_distance.hpp index 297eb1f55..7eb798459 100644 --- a/cpp/src/neighbors/detail/cagra/compute_distance.hpp +++ b/cpp/src/neighbors/detail/cagra/compute_distance.hpp @@ -31,8 +31,10 @@ #include #include +#include #include #include +#include #include #include @@ -232,52 +234,77 @@ struct alignas(device::LOAD_128BIT_T) dataset_descriptor_base_t { */ template struct dataset_descriptor_host { - using dev_descriptor_t = dataset_descriptor_base_t; - using dd_ptr_t = std::shared_ptr; - using init_f = - std::tuple, size_t>; + using dev_descriptor_t = dataset_descriptor_base_t; uint32_t smem_ws_size_in_bytes = 0; uint32_t team_size = 0; + struct state { + using ready_t = std::tuple; + using init_f = + std::tuple, size_t>; + + std::mutex mutex; + std::atomic ready; // Not sure if std::holds_alternative is thread-safe + std::variant value; + + template + state(InitF init, size_t size) : ready{false}, value{std::make_tuple(init, size)} + { + } + + ~state() noexcept + { + if (std::holds_alternative(value)) { + auto& [ptr, stream] = std::get(value); + RAFT_CUDA_TRY_NO_THROW(cudaFreeAsync(ptr, stream)); + } + } + + void eval(rmm::cuda_stream_view stream) + { + std::lock_guard lock(mutex); + if (std::holds_alternative(value)) { + auto& [fun, size] = std::get(value); + dev_descriptor_t* ptr = nullptr; + RAFT_CUDA_TRY(cudaMallocAsync(&ptr, size, stream)); + fun(ptr, stream); + value = std::make_tuple(ptr, stream); + ready.store(true, std::memory_order_release); + } + } + + auto get(rmm::cuda_stream_view stream) -> dev_descriptor_t* + { + if (!ready.load(std::memory_order_acquire)) { eval(stream); } + return std::get<0>(std::get(value)); + } + }; + template dataset_descriptor_host(const DescriptorImpl& dd_host, InitF init) - : value_{std::make_tuple(init, sizeof(DescriptorImpl))}, + : value_{std::make_shared(init, sizeof(DescriptorImpl))}, smem_ws_size_in_bytes{dd_host.smem_ws_size_in_bytes()}, team_size{dd_host.team_size()} { } + dataset_descriptor_host() = default; + /** * Return the device pointer, possibly evaluating it in the given thread. */ [[nodiscard]] auto dev_ptr(rmm::cuda_stream_view stream) const -> const dev_descriptor_t* { - if (std::holds_alternative(value_)) { value_ = eval(std::get(value_), stream); } - return std::get(value_).get(); + return value_->get(stream); } + [[nodiscard]] auto dev_ptr(rmm::cuda_stream_view stream) -> dev_descriptor_t* { - if (std::holds_alternative(value_)) { value_ = eval(std::get(value_), stream); } - return std::get(value_).get(); + return value_->get(stream); } private: - mutable std::variant value_; - - static auto eval(init_f init, rmm::cuda_stream_view stream) -> dd_ptr_t - { - using raft::RAFT_NAME; - auto& [fun, size] = init; - dd_ptr_t dev_ptr{ - [stream, s = size]() { - dev_descriptor_t* p; - RAFT_CUDA_TRY(cudaMallocAsync(&p, s, stream)); - return p; - }(), - [stream](dev_descriptor_t* p) { RAFT_CUDA_TRY_NO_THROW(cudaFreeAsync(p, stream)); }}; - fun(dev_ptr.get(), stream); - return dev_ptr; - } + mutable std::shared_ptr value_; }; /** diff --git a/cpp/src/neighbors/detail/cagra/factory.cuh b/cpp/src/neighbors/detail/cagra/factory.cuh index abc907da5..e6e7ff64f 100644 --- a/cpp/src/neighbors/detail/cagra/factory.cuh +++ b/cpp/src/neighbors/detail/cagra/factory.cuh @@ -135,11 +135,9 @@ template struct store { /** Number of descriptors to cache. */ static constexpr size_t kDefaultSize = 100; - raft::cache::lru, - std::shared_ptr>> - value{kDefaultSize}; + raft::cache:: + lru, dataset_descriptor_host> + value{kDefaultSize}; }; } // namespace descriptor_cache @@ -159,20 +157,18 @@ auto dataset_descriptor_init_with_cache(const raft::resources& res, const cagra::search_params& params, const DatasetT& dataset, cuvs::distance::DistanceType metric) - -> const dataset_descriptor_host& + -> dataset_descriptor_host { - using desc_t = dataset_descriptor_host; - auto key = descriptor_cache::make_key(params, dataset, metric); + auto key = descriptor_cache::make_key(params, dataset, metric); auto& cache = raft::resource::get_custom_resource>(res) ->value; - std::shared_ptr desc{nullptr}; + dataset_descriptor_host desc; if (!cache.get(key, &desc)) { - desc = std::make_shared( - std::move(dataset_descriptor_init(params, dataset, metric))); + desc = dataset_descriptor_init(params, dataset, metric); cache.set(key, desc); } - return *desc; + return desc; } }; // namespace cuvs::neighbors::cagra::detail diff --git a/cpp/src/neighbors/detail/cagra/search_multi_cta.cuh b/cpp/src/neighbors/detail/cagra/search_multi_cta.cuh index 0003f2495..ecfd856f1 100644 --- a/cpp/src/neighbors/detail/cagra/search_multi_cta.cuh +++ b/cpp/src/neighbors/detail/cagra/search_multi_cta.cuh @@ -93,10 +93,10 @@ struct search : public search_plan_impl intermediate_indices; - rmm::device_uvector intermediate_distances; + lightweight_uvector intermediate_indices; + lightweight_uvector intermediate_distances; size_t topk_workspace_size; - rmm::device_uvector topk_workspace; + lightweight_uvector topk_workspace; search(raft::resources const& res, search_params params, @@ -105,9 +105,9 @@ struct search : public search_plan_impl<<<1, 1, 0, cuda_stream>>>(host_ptr, dev_ptr); } +template +auto get_value(const T* const dev_ptr, cudaStream_t stream) -> T +{ + T value; + RAFT_CUDA_TRY(cudaMemcpyAsync(&value, dev_ptr, sizeof(value), cudaMemcpyDefault, stream)); + RAFT_CUDA_TRY(cudaStreamSynchronize(stream)); + return value; +} + // MAX_DATASET_DIM : must equal to or greater than dataset_dim template RAFT_KERNEL random_pickup_kernel( @@ -609,18 +618,18 @@ struct search : search_plan_impl { using base_type::num_seeds; size_t result_buffer_allocation_size; - rmm::device_uvector result_indices; // results_indices_buffer - rmm::device_uvector result_distances; // result_distances_buffer - rmm::device_uvector parent_node_list; - rmm::device_uvector topk_hint; - rmm::device_scalar terminate_flag; // dev_terminate_flag, host_terminate_flag.; - rmm::device_uvector topk_workspace; + lightweight_uvector result_indices; // results_indices_buffer + lightweight_uvector result_distances; // result_distances_buffer + lightweight_uvector parent_node_list; + lightweight_uvector topk_hint; + lightweight_uvector terminate_flag; // dev_terminate_flag, host_terminate_flag.; + lightweight_uvector topk_workspace; // temporary storage for _find_topk - rmm::device_uvector input_keys_storage; - rmm::device_uvector output_keys_storage; - rmm::device_uvector input_values_storage; - rmm::device_uvector output_values_storage; + lightweight_uvector input_keys_storage; + lightweight_uvector output_keys_storage; + lightweight_uvector input_values_storage; + lightweight_uvector output_values_storage; search(raft::resources const& res, search_params params, @@ -629,16 +638,16 @@ struct search : search_plan_impl { int64_t graph_degree, uint32_t topk) : base_type(res, params, dataset_desc, dim, graph_degree, topk), - result_indices(0, raft::resource::get_cuda_stream(res)), - result_distances(0, raft::resource::get_cuda_stream(res)), - parent_node_list(0, raft::resource::get_cuda_stream(res)), - topk_hint(0, raft::resource::get_cuda_stream(res)), - topk_workspace(0, raft::resource::get_cuda_stream(res)), - terminate_flag(raft::resource::get_cuda_stream(res)), - input_keys_storage(0, raft::resource::get_cuda_stream(res)), - output_keys_storage(0, raft::resource::get_cuda_stream(res)), - input_values_storage(0, raft::resource::get_cuda_stream(res)), - output_values_storage(0, raft::resource::get_cuda_stream(res)) + result_indices(res), + result_distances(res), + parent_node_list(res), + topk_hint(res), + topk_workspace(res), + terminate_flag(res), + input_keys_storage(res), + output_keys_storage(res), + input_values_storage(res), + output_values_storage(res) { set_params(res); } @@ -662,7 +671,7 @@ struct search : search_plan_impl { itopk_size, max_queries, result_buffer_size, utils::get_cuda_data_type()); RAFT_LOG_DEBUG("# topk_workspace_size: %lu", topk_workspace_size); topk_workspace.resize(topk_workspace_size, raft::resource::get_cuda_stream(res)); - + terminate_flag.resize(1, raft::resource::get_cuda_stream(res)); hashmap.resize(hashmap_size, raft::resource::get_cuda_stream(res)); } @@ -847,7 +856,7 @@ struct search : search_plan_impl { stream); // termination (2) - if (iter + 1 >= min_iterations && terminate_flag.value(stream)) { + if (iter + 1 >= min_iterations && get_value(terminate_flag.data(), stream)) { iter++; break; } diff --git a/cpp/src/neighbors/detail/cagra/search_plan.cuh b/cpp/src/neighbors/detail/cagra/search_plan.cuh index f23b96631..99254aa50 100644 --- a/cpp/src/neighbors/detail/cagra/search_plan.cuh +++ b/cpp/src/neighbors/detail/cagra/search_plan.cuh @@ -151,7 +151,7 @@ struct search_plan_impl : public search_plan_impl_base { lightweight_uvector hashmap; lightweight_uvector num_executed_iterations; // device or managed? lightweight_uvector dev_seed; - const dataset_descriptor_host& dataset_desc; + dataset_descriptor_host dataset_desc; search_plan_impl(raft::resources const& res, search_params params, From 2d4afb515e3b509152adc652e3a9d97816b7bc3b Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Thu, 7 Nov 2024 11:23:15 -0500 Subject: [PATCH 02/15] Put a ceiling on cuda-python (#445) This project is incompatible with newer versions of `cuda-python`. This puts ceilings of `<=11.8.3` (CUDA 11) and `<=12.6.0` (CUDA 12) on that library. Those ceilings should be removed and replaced with `!=` constraints once new releases of `cuda-python` are up that this project is compatible with. See https://github.com/rapidsai/build-planning/issues/116 for more information. Authors: - Bradley Dice (https://github.com/bdice) - James Lamb (https://github.com/jameslamb) Approvers: - James Lamb (https://github.com/jameslamb) - Divye Gala (https://github.com/divyegala) URL: https://github.com/rapidsai/cuvs/pull/445 --- conda/environments/all_cuda-118_arch-aarch64.yaml | 2 +- conda/environments/all_cuda-118_arch-x86_64.yaml | 2 +- conda/environments/all_cuda-125_arch-aarch64.yaml | 2 +- conda/environments/all_cuda-125_arch-x86_64.yaml | 2 +- conda/environments/bench_ann_cuda-118_arch-aarch64.yaml | 2 +- conda/environments/bench_ann_cuda-118_arch-x86_64.yaml | 2 +- conda/environments/bench_ann_cuda-125_arch-aarch64.yaml | 2 +- conda/environments/bench_ann_cuda-125_arch-x86_64.yaml | 2 +- conda/recipes/cuvs/meta.yaml | 8 +++++--- cpp/test/neighbors/ann_ivf_flat.cuh | 1 + dependencies.yaml | 4 ++-- python/cuvs/pyproject.toml | 1 + 12 files changed, 17 insertions(+), 13 deletions(-) diff --git a/conda/environments/all_cuda-118_arch-aarch64.yaml b/conda/environments/all_cuda-118_arch-aarch64.yaml index aa12b4ed6..80bfb0c24 100644 --- a/conda/environments/all_cuda-118_arch-aarch64.yaml +++ b/conda/environments/all_cuda-118_arch-aarch64.yaml @@ -15,7 +15,7 @@ dependencies: - cmake>=3.26.4,!=3.30.0 - cuda-nvtx=11.8 - cuda-profiler-api=11.8.86 -- cuda-python>=11.7.1,<12.0a0 +- cuda-python>=11.7.1,<12.0a0,<=11.8.3 - cuda-version=11.8 - cudatoolkit - cupy>=12.0.0 diff --git a/conda/environments/all_cuda-118_arch-x86_64.yaml b/conda/environments/all_cuda-118_arch-x86_64.yaml index 494ec394d..07937726c 100644 --- a/conda/environments/all_cuda-118_arch-x86_64.yaml +++ b/conda/environments/all_cuda-118_arch-x86_64.yaml @@ -15,7 +15,7 @@ dependencies: - cmake>=3.26.4,!=3.30.0 - cuda-nvtx=11.8 - cuda-profiler-api=11.8.86 -- cuda-python>=11.7.1,<12.0a0 +- cuda-python>=11.7.1,<12.0a0,<=11.8.3 - cuda-version=11.8 - cudatoolkit - cupy>=12.0.0 diff --git a/conda/environments/all_cuda-125_arch-aarch64.yaml b/conda/environments/all_cuda-125_arch-aarch64.yaml index f4f03ccee..b7fd6fcfa 100644 --- a/conda/environments/all_cuda-125_arch-aarch64.yaml +++ b/conda/environments/all_cuda-125_arch-aarch64.yaml @@ -17,7 +17,7 @@ dependencies: - cuda-nvcc - cuda-nvtx-dev - cuda-profiler-api -- cuda-python>=12.0,<13.0a0 +- cuda-python>=12.0,<13.0a0,<=12.6.0 - cuda-version=12.5 - cupy>=12.0.0 - cxx-compiler diff --git a/conda/environments/all_cuda-125_arch-x86_64.yaml b/conda/environments/all_cuda-125_arch-x86_64.yaml index a295e93f4..83a457465 100644 --- a/conda/environments/all_cuda-125_arch-x86_64.yaml +++ b/conda/environments/all_cuda-125_arch-x86_64.yaml @@ -17,7 +17,7 @@ dependencies: - cuda-nvcc - cuda-nvtx-dev - cuda-profiler-api -- cuda-python>=12.0,<13.0a0 +- cuda-python>=12.0,<13.0a0,<=12.6.0 - cuda-version=12.5 - cupy>=12.0.0 - cxx-compiler diff --git a/conda/environments/bench_ann_cuda-118_arch-aarch64.yaml b/conda/environments/bench_ann_cuda-118_arch-aarch64.yaml index a73839457..21cb98180 100644 --- a/conda/environments/bench_ann_cuda-118_arch-aarch64.yaml +++ b/conda/environments/bench_ann_cuda-118_arch-aarch64.yaml @@ -15,7 +15,7 @@ dependencies: - cmake>=3.26.4,!=3.30.0 - cuda-nvtx=11.8 - cuda-profiler-api=11.8.86 -- cuda-python>=11.7.1,<12.0a0 +- cuda-python>=11.7.1,<12.0a0,<=11.8.3 - cuda-version=11.8 - cudatoolkit - cxx-compiler diff --git a/conda/environments/bench_ann_cuda-118_arch-x86_64.yaml b/conda/environments/bench_ann_cuda-118_arch-x86_64.yaml index 3f869da9a..432509bcb 100644 --- a/conda/environments/bench_ann_cuda-118_arch-x86_64.yaml +++ b/conda/environments/bench_ann_cuda-118_arch-x86_64.yaml @@ -15,7 +15,7 @@ dependencies: - cmake>=3.26.4,!=3.30.0 - cuda-nvtx=11.8 - cuda-profiler-api=11.8.86 -- cuda-python>=11.7.1,<12.0a0 +- cuda-python>=11.7.1,<12.0a0,<=11.8.3 - cuda-version=11.8 - cudatoolkit - cxx-compiler diff --git a/conda/environments/bench_ann_cuda-125_arch-aarch64.yaml b/conda/environments/bench_ann_cuda-125_arch-aarch64.yaml index 407fb6058..0c5043ac2 100644 --- a/conda/environments/bench_ann_cuda-125_arch-aarch64.yaml +++ b/conda/environments/bench_ann_cuda-125_arch-aarch64.yaml @@ -17,7 +17,7 @@ dependencies: - cuda-nvcc - cuda-nvtx-dev - cuda-profiler-api -- cuda-python>=12.0,<13.0a0 +- cuda-python>=12.0,<13.0a0,<=12.6.0 - cuda-version=12.5 - cxx-compiler - cython>=3.0.0 diff --git a/conda/environments/bench_ann_cuda-125_arch-x86_64.yaml b/conda/environments/bench_ann_cuda-125_arch-x86_64.yaml index 81943b184..cbb22333c 100644 --- a/conda/environments/bench_ann_cuda-125_arch-x86_64.yaml +++ b/conda/environments/bench_ann_cuda-125_arch-x86_64.yaml @@ -17,7 +17,7 @@ dependencies: - cuda-nvcc - cuda-nvtx-dev - cuda-profiler-api -- cuda-python>=12.0,<13.0a0 +- cuda-python>=12.0,<13.0a0,<=12.6.0 - cuda-version=12.5 - cxx-compiler - cython>=3.0.0 diff --git a/conda/recipes/cuvs/meta.yaml b/conda/recipes/cuvs/meta.yaml index e7e2daf0c..560c95feb 100644 --- a/conda/recipes/cuvs/meta.yaml +++ b/conda/recipes/cuvs/meta.yaml @@ -26,6 +26,7 @@ build: - {{ compiler('cuda') }} - cuda-cudart-dev {% endif %} + - cuda-python requirements: build: @@ -42,10 +43,10 @@ requirements: - {{ stdlib("c") }} host: {% if cuda_major == "11" %} - - cuda-python >=11.7.1,<12.0a0 + - cuda-python >=11.7.1,<12.0a0,<=11.8.3 - cudatoolkit {% else %} - - cuda-python >=12.0,<13.0a0 + - cuda-python >=12.0,<13.0a0,<=12.6.0 - cuda-cudart-dev {% endif %} - cuda-version ={{ cuda_version }} @@ -60,13 +61,14 @@ requirements: - {{ pin_compatible('cuda-version', max_pin='x', min_pin='x') }} {% if cuda_major == "11" %} - cudatoolkit + - cuda-python >=11.7.1,<12.0a0,<=11.8.3 {% else %} - cuda-cudart + - cuda-python >=12.0,<13.0a0,<=12.6.0 {% endif %} - pylibraft {{ minor_version }} - libcuvs {{ version }} - python x.x - - cuda-python - numpy >=1.23,<3.0a0 tests: diff --git a/cpp/test/neighbors/ann_ivf_flat.cuh b/cpp/test/neighbors/ann_ivf_flat.cuh index 8cc46b2f7..23d84ca98 100644 --- a/cpp/test/neighbors/ann_ivf_flat.cuh +++ b/cpp/test/neighbors/ann_ivf_flat.cuh @@ -24,6 +24,7 @@ #include #include #include +#include #include #include diff --git a/dependencies.yaml b/dependencies.yaml index cf9b68c8a..e909ad0dc 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -213,11 +213,11 @@ dependencies: - matrix: cuda: "12.*" packages: - - &cuda_python12 cuda-python>=12.0,<13.0a0 + - &cuda_python12 cuda-python>=12.0,<13.0a0,<=12.6.0 - matrix: cuda: "11.*" packages: - - &cuda_python11 cuda-python>=11.7.1,<12.0a0 + - &cuda_python11 cuda-python>=11.7.1,<12.0a0,<=11.8.3 - matrix: packages: - &cuda_python cuda-python diff --git a/python/cuvs/pyproject.toml b/python/cuvs/pyproject.toml index bf62f5adf..30d784c67 100644 --- a/python/cuvs/pyproject.toml +++ b/python/cuvs/pyproject.toml @@ -136,4 +136,5 @@ matrix-entry = "cuda_suffixed=true;use_cuda_wheels=true" [tool.pytest.ini_options] filterwarnings = [ "error", + "ignore:.*cuda..* module is deprecated.*:DeprecationWarning" ] From e559d581acec030d8e71833aee1295fe442facb3 Mon Sep 17 00:00:00 2001 From: "Corey J. Nolet" Date: Thu, 7 Nov 2024 21:04:57 -0500 Subject: [PATCH 03/15] Adding tech stack to docs (#448) Authors: - Corey J. Nolet (https://github.com/cjnolet) Approvers: - Divye Gala (https://github.com/divyegala) URL: https://github.com/rapidsai/cuvs/pull/448 --- README.md | 9 ++++++ docs/source/index.rst | 72 +++++++++++++++++++++++++++++++++++------- img/tech_stack.png | Bin 0 -> 125904 bytes 3 files changed, 70 insertions(+), 11 deletions(-) create mode 100644 img/tech_stack.png diff --git a/README.md b/README.md index c1b74a9e8..572e8d098 100755 --- a/README.md +++ b/README.md @@ -35,6 +35,7 @@ Finally, faster vector search enables interactions between dense vectors and gra Below are some common use-cases for vector search + - ### Semantic search - Generative AI & Retrieval augmented generation (RAG) - Recommender systems @@ -68,6 +69,14 @@ There are several benefits to using cuVS and GPUs for vector search, including In addition to the items above, cuVS takes on the burden of keeping non-trivial accelerated code up to date as new NVIDIA architectures and CUDA versions are released. This provides a deslightful development experimence, guaranteeing that any libraries, databases, or applications built on top of it will always be getting the best performance and scale. +## cuVS Technology Stack + +cuVS is built on top of the RAPIDS RAFT library of high performance machine learning primitives and provides all the necessary routines for vector search and clustering on the GPU. + +![cuVS is built on top of low-level CUDA libraries and provides many important routines that enable vector search and clustering on the GPU](img/tech_stack.png "cuVS Technology Stack") + + + ## Installing cuVS cuVS comes with pre-built packages that can be installed through [conda](https://conda.io/projects/conda/en/latest/user-guide/getting-started.html#managing-python) and [pip](https://pip.pypa.io/en/stable/). Different packages are available for the different languages supported by cuVS: diff --git a/docs/source/index.rst b/docs/source/index.rst index 647061ae5..286836c18 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -1,19 +1,8 @@ cuVS: Vector Search and Clustering on the GPU ============================================= - Welcome to cuVS, the premier library for GPU-accelerated vector search and clustering! cuVS provides several core building blocks for constructing new algorithms, as well as end-to-end vector search and clustering algorithms for use either standalone or through a growing list of :doc:`integrations `. -There are several benefits to using cuVS and GPUs for vector search, including - -#. Fast index build -#. Latency critical and high throughput search -#. Parameter tuning -#. Cost savings -#. Interoperability (build on GPU, deploy on CPU) -#. Multiple language support -#. Building blocks for composing new or accelerating existing algorithms - Useful Resources ################ @@ -26,6 +15,67 @@ Useful Resources - `Issue tracker `_: Report issues or request features. + +What is cuVS? +############# + +cuVS contains state-of-the-art implementations of several algorithms for running approximate and exact nearest neighbors and clustering on the GPU. It can be used directly or through the various databases and other libraries that have integrated it. The primary goal of cuVS is to simplify the use of GPUs for vector similarity search and clustering. + +Vector search is an information retrieval method that has been growing in popularity over the past few years, partly because of the rising importance of multimedia embeddings created from unstructured data and the need to perform semantic search on the embeddings to find items which are semantically similar to each other. + +Vector search is also used in *data mining and machine learning* tasks and comprises an important step in many *clustering* and *visualization* algorithms like `UMAP `_, `t-SNE `_, K-means, and `HDBSCAN `_. + +Finally, faster vector search enables interactions between dense vectors and graphs. Converting a pile of dense vectors into nearest neighbors graphs unlocks the entire world of graph analysis algorithms, such as those found in `GraphBLAS `_ and `cuGraph `_. + +Below are some common use-cases for vector search + +Semantic search +~~~~~~~~~~~~~~~ +- Generative AI & Retrieval augmented generation (RAG) +- Recommender systems +- Computer vision +- Image search +- Text search +- Audio search +- Molecular search +- Model training + + +Data mining +~~~~~~~~~~~ +- Clustering algorithms +- Visualization algorithms +- Sampling algorithms +- Class balancing +- Ensemble methods +- k-NN graph construction + +Why cuVS? +######### + +There are several benefits to using cuVS and GPUs for vector search, including + +1. Fast index build +2. Latency critical and high throughput search +3. Parameter tuning +4. Cost savings +5. Interoperability (build on GPU, deploy on CPU) +6. Multiple language support +7. Building blocks for composing new or accelerating existing algorithms + +In addition to the items above, cuVS shoulders the responsibility of keeping non-trivial accelerated code up to date as new NVIDIA architectures and CUDA versions are released. This provides a deslightful development experimence, guaranteeing that any libraries, databases, or applications built on top of it will always be receiving the best performance and scale. + +cuVS Technology Stack +##################### + +cuVS is built on top of the RAPIDS RAFT library of high performance machine learning primitives and provides all the necessary routines for vector search and clustering on the GPU. + +.. image:: ../../img/tech_stack.png + :width: 600 + :alt: cuVS is built on top of low-level CUDA libraries and provides many important routines that enable vector search and clustering on the GPU + + + Contents ######## diff --git a/img/tech_stack.png b/img/tech_stack.png new file mode 100644 index 0000000000000000000000000000000000000000..2b3eeedba99957e985ccea233c8970c396acadb0 GIT binary patch literal 125904 zcmc%xWmuI_7d4D-xw8UCRR?p|Pe#sW5DR+;J2Pe%6K69sdlyRw*Auu-A#f2h z^dfO*Gb2|k2YU)ND?2lYhM6e^8xMtogF6KW8wVE!8^>ENe(;k*NuJ`ps@gouoIM0W z0g;gqRrAO`TJhFZy?RDEoj2@S|B4anAU>I`!tvF(cw3s;!aYMdN|;;fX;^9GmmkAk zqsV#RGC>Uo^8|kM{1&zra8|+}vUR`)A6K_^J?Cv;Xrylzk`&!~eT3 z0v9zIfA6jFUQ26X7A^Ue;Qi6}_>t|u3-JG2 z!qk^buXlfZuUB}gSF1$$X=vcTeEIVK`hVZnxsvX-|1IeM_f?Apa{TXh8~=X^%m1|- zL^gfL6#;?KRU93>WScond)nv<9iqV6eakJswJ3fYfh|_W1l@3r@Ui{pCFkAR<_Gyt z)#cU2ICs5>gynK<^QQsnaIaA$HOYy#9t+p@GJlBBa^$@V(Gu_&X15U$RC^Fs51zK z?z1)eugdh$Gx44^a+0u8%38uAvfwk^K@*<`!WiX#<6mP&r&xdEen&EB8fY#oFwn8w zjBT!;==%BGnW!6Clth8N+h2s{rLI$AI4+z1!2?Fz*UGm)=+zohs2sBNPw{y@1aXYt zvIUYB;~a7;@A*J?2romT4_PC^eLoEjg+SiUGyW%R`thilP8{7DK2Dgf_{^H0 z{CS))1^l}y-M=E9$I2N<@#lW7VD=)6B{Jl*54qLGM31eOGS)?Pjy*f>bRwal#E}+s z3;&U!+(re1^5&Avk7geCqTy<2*&$#(9p)~H&SOZXFNt79s!)2EyIaN1$tT7^^}7vt zY-wm|R>5L2$_s9d`%>FPo(r+ACHVmH>+Fdv?*s}0J8sSlYj9&BG``BB?*IBNV=6elz0d*pW z^*2I2PHREgLNnLd*kW(-R@6bV4E}=elVa($L3-7(rd-2~l`@S>X=hEEMb=_h6jV)9 zz)eX0=hA)@ACYh(y*oIVMi)^_T1PT)+epYhU~)A0r7Si9OR$=Avm5%*PX=r6%8bua zX73+`D0e7O5=80jc99!+IMKr~*p_JG-wzAUL%emXkr4z!Fg9s7=R~eM5UWZ=amvIt z4qzn^`o*ritTxXy4&vK;V{TB=_?_B_!Zs1pZ6FFbh+0{Q2%AP#Y&`m(tnMdrZao;# zE%>CVw&J-zA@guD#=SDCSnoy%rDW1wJik~@&K$nWl#s@rQ$(~<5@?*K^hl2*GGx10 zqv05$qOttPHZf!H++h8Qe`k!1gtKcDPB#L*&M!^I!G_0o0P^U>$ye5Pt6XfuXjEEC zeiMs^Dz4d?y7k%pkCb*0@BD5xF&T1;tN&qy;GARUI;-CIxm3|VF~g|TG3J~Usk$xX zuFG!JA@S-ab~PSn#ypU^Iw#hl|W9)41IldaS!yVZ|OTILCp zZGtr4T^bIprev93+8Er{BOOMwDMyB+uo2C~@o6&V3MSqh+J1&~E@T!fq|F|Dxp#Ua zby!X0-%Zf+xQ;rXs9)ub4-=ZC`tCCc1-TTyZ~pOc{yJaOjV>Ashb-Fp)@-hN*!<@v z7n74a@idFUAcxC^R^;S{r&f7nimkLsrV$6-i&m0P@i7dqOKL~qvV&y!(*E|-ewTaPo{&;y(mLyL zew|ud2%Bc+EVxT;&RzGg9VtgN-n=DGJI&BkjEJeW~Wu()-aq!tZ;D>C3m8;Z|Ca7w(m`asn!I>AXE=Obqiq>Gj!W6Y4xW z3-J=EyP3HM9I5Rz31tOEEZ4!Z^UkhRva9;Xsl3|$4-`mRaY0Z4b?%f8zotKdgNJ16C80u8m#1eyY=0BHOYl zJxD%fVcQ=CSdEO#vT!n%#pCzy5@O^`Zsm`M`=V06WsFV-XWBprDvb`ag9sWa`ZmK& zGD+t32x>2#>If>(5ETYyWTOP(ZDVRlK0Cg8@v~wmeGuoq8NQ_wC7F3aZqr$zNcBSS zq_h6pd4~Is%kx1J8jDPM`J#oP^BsnFf~Ve1`Y(u6J!hur=Gf~gC_Zxh4Rmg>&z5NF zu*tV&Fgc_f(QzF!+nmypsb7j+f!ofw9sQv=qDDDp%PP@7bzRS_X)lvM>dY;-kr~G% znaW?(EI7=*Fk@@(x+wQFN}#Kwp!clul|7VHHTLRI)2DN1Za3Xnf$WX90kdFnI+s!Y z=y19_{cBCd$Bu-Q=09<|ux2BHZVId=nYR(Na?MJbxjO#jP7_jzvaGtxRnLKLa=0r& zt}y5-w6b$V@9aw~W92k?icL9XKMKjlTA42yGpKeN?A&^NOsk1$BKQN>uTreN-1nc| z`1MC5;|>bgxe@R*=%4JREhM|u^>7~vF@}CJ<8z57f0hjyE{n|fJYJc@h^HX`JQ`X}x$ZyjO=6|c+-*5|boCP!Q0TSF^(G&xSU=VNSU_ zY8KaE<-?#KtwC)TE&98ULmE{p1~v-0tfraLA%Nv0#?@ibu{;OAzjSsMW7Zh`V(I&x zGxtJ8CMmVT)AC^~9QG$OQgrTI4XYPfWFvI3gF&PUIeGjH$+$QPT(40wC+h}^SGc`Q z#|VRy#jYhw!#?38)O^)s8%m&23B6$;wj@(WzLUKetc&N{<~9=k1X+6sm2a~%Pe+|M zT$tO*MAM);+-Fa3&|+=F#39;ZM@`%57DU%k$+xGP;J=V!Cy<-{n%^wwRt!Pgsp!Uk zMKyNilm3}rD=VUKEX+I;=7P;9J_JE$aUQo>e7Lp}uUbAQG<%a})qrs@N}7T1l-}2d zz}%NnkWS&q`Dm@{7LC{B+vq^uY`=V7bS7+}TskeuLu``Ri;_U|4}!@r6g+#)CC?5~ zq^)*6Z4yil+5IlLOiA&Iv(d^XzT3+WzKO22{YuObdTh1Hmu5DGXgV(6Rrff?h1d8| zj8_=pa0StROF2F>Kuag5Dl|QqNfNnN{owXNtHX0GPo?;=An-xRd-FTKs{$7kt(DfvyLI7V!7G8lrfJMYX`m_ zCsR?^-JL7AOgHoRgJf~7k5o}qXv!`PXQ-@QnytH^Z<1JfsFoa^`C@LL={R?GcA}N6 z{HQ4hyJ~%?R>kL~H(GmB@T54!**QOlv#rVHkvO!b4AU}+6}o>EvFuuT<0PT}dt-5q z%;u<4oSiuOLv}jD-H1ch;cIFsgMGaP26-gFX`0#}vl&eO2K=1$`7++Y=d+CKJhUH* z@wv)=)YM&XA`JFo%++!1`f8qQCCXNQF?DS*B-(sUyk3tjt2O1Gwx~J3vu%u$B=zCY zMn>v&-4EJ*CSMt1S$(}N%eU6UQ^*tCe8uTGE5Aurtxa#EKlWtWHwx~4p;KZs%hv2k z=H%3Tf}!TjWiq)vdCd`OljK~RUG&%H_SQYSE_H^!b%(k(S*FNPS3+=lC%MMCAR#4f zhutoFlvz=Fz>H_^QtMzu{HJl+18&CXc{%Zj_@>)ly0h(_#O7R&2nScjHWBf)*`4;t zjLNm(7{)AzQH}-w2q8n8WZlr6I((ufx2$&h_*9y;)?%AmY^G&n8*^$cO^zxtGXVY3 zqT9P6^c_pi$%A`oS?lBxtpRBxG!*!s89R?SCC)ZR+s#(Tl|)oYnjNw>5s9IwX^R#N zNm1Q}1=E`5PW+teL|O~~(6Q?0NmQHNSJ=!rKf63(s?>2c?#vRX*J(44rrFm&hhR@} zS$oMNH#eknWJsPW(HUiuT+*5pvKMVl7hhmA4f?ibIbVD6lTwCt)V?^n*D)ixqtMq^KUK{()ryaDh! z#P>-TE?c(SS;hVk)`T||rD#`k)4Ma87yk9e!1|kK ziH(acmgqy#Wl1Z%)^hLo{QSEvn653(i$@@wFA-U|;7*KDurMRj>D1Bu zV@sPPHJr|*J4_==viWr-ZhmiZOMpJttc7RHVc)k9A+WBWdk=9iS@s=oFi%T6d?MQw z;(DkfS~6=+E6%)&bC;Vf3fs>u+jZY^wGq7W`)GSANJDC^+cYBcth)VbYcn1yX9#l2D$ipS68DVX72%HZUB;j3M%^ zdb4U@U8M--3Ke<@-xKyEqi8maiMy=yBK#tCxzk|^wf-^Hi)f}KTw!VJWzU)4<+*jS zM`N0n`eoTLq;?oSxqRx-&eYd{0W?;zap*ac~jkzHC;^nN*taP&aJXS%-oWRD}q^+WbjCB88Nos25O( zoW{yei!;zXYV0=FR(gd4ELNZwgtp9w8M@yX5}kJ1gOg8m5I9HvQVvKEWuVv9RvukwA&hTGQ_)L|94F z_BgTvh~6);aJHvuMup6n((6vh{6O49LhIamgMq!J%N}x3MKUgSH=uIOU(oMN(~%eH z=1?Mjufma7{EedERvVd817*b?d?iAf-8zf+tr zz$Q9*(RcTpK|;H3LtNLmxKOZRdXQ%9CO2+Yp_4AbhQp^~B&WZ5)I|o@5H6 z4$O>K90}hmA@7miWc~fFeQQUcLN+UUU5B}V_9|H6NhDHM*vOyo+JbUKyi<2XW_>j* z*sL^U>S6@`6_!g;tsf)nE3Drhr)@97tmn5`ynn>U#)f~3lW>JkYwLkEqQ2qKq_;LOM8cF zVGz@NXrhU>_&X!;BS@*b>r5(-$Vtk=T7Giu1~%h_eZcj|MRI`ic!f1$gvH9Q9nZQn z5U`Ekc|9=RMmd!)H1R-_7;7bxB zV0|O$>FL$>=Brc^RxAxrL)7Agk<@HL7jfEr+3bxhe4rsWMbl!iU!g$?Ii@ar|i!$!O&t7C(e}nbNP> zDF}DixTmzdDAHE2fX>#0nUBvRKCPt=Yt;lFFpW^w=IU>I&{V`Rl-V>3kJQ|8;P{g{ z8}(j1{lFES_1{rCR*C**O13;q^o4L%ycD{;C zkxpNmf&Zl**;a`mUpYqq`(XL#P6gwKAzYeu&Ek-*%RXi5?h4tz<~MhDzrsd zNs!W3nJpVsl$k_N@qIG43Y|;oXejM&-){NpA*z1lKj$r?6q?$mw?#DVNZI!|ve%F= zT|iGVX-yf*U`GxnlkdK>BgLng-wX=MdH}kSW%Z`l4STKOq{+m<)-{=KL@Ycd0h4i% zCt7E$VhMR{ytAJ2Q8&JD!Fd=sK?QiV>Ho~tcaeTVAY=q-b(GDc?E|ZBr4FZQ&GSE4 zSBHx&#IG9=qjrupxDoLjw1Sgw=!ZU{DTxH@171({O_?sl6*ZI&CJG--$!lMdSXT`vyYR>Ut5|Ncxd*jQII z_dT+T)`z2WZzM!ZQc7vWSv+PnCzE&%?NBpeuHw{HjSwm!l(X#cuJ=sg<}!8pt5!)( z^b&e^^TRbfFCuNOf?hJzO-4#*ehm{N2wS6ouu*#dBW!DEbW_%Jfx+7)z|-9cMX(A4 zVZcz;;h&eAOAnwsAeZO1YjPa>H_a+CoQ!9L{aRUsQvhW%g(D;saeg;Xg?Wav4xYPI zBlf;%5lLFJJA|`hvAX<3{sK3`R7AUJrS;sD{@D9qxI+ zGJ>pFBU9#3YX^sPYbW!-bK)cXdW7_4x(1og1Jf?w9j&;BA_dmbFF$dB%E;R1J2#zu zq0!ajDhXDVU<|$_*HyrZUv4HZvEdqw8SYs1XMDuwv;4Xn$i}ntJkm;UXaIbCpE2og=pH)c8}qlxyZt#`rOfM^4F~zN74ny6z{)@;oFO{&RI5hpMRZP z`>5YkGDMJ)MJr>)bf@2(Nqcw^MV0+q(346rk-^(q@2QUIfB(x{drmhX2(yFmPNn@c zmy!&AR{Hv-?A;^nalu;y24j9aR3qt8a(XdkRXzM09K64{xi2HB8gQ-c`K1d|loaJ9 z@4|eN_b=k{{$73`Lw@W|UH*(rS@;Mwd|iq6`7qRCcV)=#NNwUj?p#$RDk^3Pm(SOAj)+eUS4xC>RmYmn0A z7P1WQXyuv$NJ}OE(85=wRo(AKudx2~ikEGzdz6?+`WpQC%%2;4VUnTQDSs1vwC>TT z|1(@eA-0q)$uNM$co*&Qb|lxGOHX{uWdv=6CG=OD;TbYt^JVZ{Lt%Fv9O-m>XF=$0<5LQ z5tAhIc^{)WqXon?4%1E6)!Bl``=MgCv#22x{>(*hJhBp}<+c`>^u+o)S;0Z%m%qq4 zyBwSF`a}6OpAWcVh&jk?;9)S4w)^1eiO1_TBl}8Hf_`K*GCA%$pr0bm8_q4y*X)eV z*BCxA$z>(Pj25K%p(l#&oNw9x&J4{Cv$5waaQ({_&PwlcetVEry{v*0|42YF$n5yH$+;qzL!Ym+N(QbVX_z`5o>bqbCQLLsI^#$7dG2>VzF9(5N>g|e zn=D&iEm6yim1d1NAC>xcEP`};7WJOMSU(M=*>cJ&e5FoG6qJdgwR-%+gnBa!g3$?U zpEFA7Y|IRS>hG8N9 zkGS?*Imb9VKK!sH$F0!IrMi*9s6zF?EZv>h+0bO$-*>({M6}!TjLokqRAGK#ts$@BLTqa4vROcMS!ht=ll zZESkRM6!&ITBY2#^cvKoud*{M%bgcWSSH(TomJ?_i;ai7Kcw#bc#dTl+_qie!}9E{ z)GgQCFl}BFurEzTkyW>;dSP-DkjkJta#&PXO~T}oY{o*DQPjL;z@jpisQ|5ekMA;VxIE`88t=nmTE#tIhVAV`MO~FFw)+mc+UKNX2i? zwf03+ZGCrxc3qvGG$^NjP4Qib+)~qB2hMDOkaMSY>kD(M{gjav)9PMB;31Wg`Rpwd z!`Da`>eRZzzvF11l=yk@Yk%wvCrflEI*({gwqar>XGCsKbc{Yy>yB|?YNj*H_oBfE zFc75iH_X=EOOH%j0xL&totdd~ow1ntR2TCtmy&r*ihttoZ}(^~lZNUr#&i)uj3$92 zn+cfP+m|n+0W~X_c2-iD`C&*wgp$miS(im2PBr8rpNLH{0u!kx^pNR)a^=f zUmKIY#3p#XA#A1i-&p{A1?B`%?vLP|&4zL-n|kD{R@^cBkK~f~qHFApsMOe}c1jcz zhreWI$He?KqaTqEt}A<5dgRj7&<#53C#Jvmd`pEnMaZ4F$|_dIg7INql{6#ds7H?G zv#aogZV0N(t0<)!U5rT0*sc2gUlM4%T{6K-^Cv3W{wwEcuSN0qtz=1n#Z1$l6E`Ks ztd<*2_;iS-Lisq2-gtopi66qIBP?zS9+o8fQw1|a4K`8&7|N(;E*$X& z+95G~iiZu2rZV~3`T=dUXG%>w-U_Jwrp)J2pl6SX;*|8?@iR*EuX7ktW}E0#w#5CQ zaqFD|uM8SZ<5CLq7E-Uu!^uqJP*%x|&_-?dQ?~y4hHB?a^*EtBwJx+}= zyT5P@6_{PAiqwZK1WW~kS+=JLNlc9p#}IWEviZBUH<$G9tV0lLW>yF{Z8~TThQ!e0bybdS>`-6PZ`= z3p%p37BE2+!)*RxX3_E__{wmDAO1Qrh-z_~djmS~tVLWCGmWozTw)dqlAT{v>kURj zmHa)1I+TShS*0k*h?Vwr)Dp%A@Uawe0!ph6CO0*H`yhKs!hs6Aqmq%&;{itAD!9{h z^@(HXy-zrV-#qel2u&wZ8~v^z2O}z>C~bU%M(ZkZ*WnGzQY>-dx9MbeNs{T-N_iG$ z&O`TlIcqoP`y8_qF24JJsG{S==S;EXktxTPzFS~{a}|0qgzT^rLie`i^Cka1$l?D& za1`iXMwkV|jPN-)ueM^65QJ5pSQ6TtT%^tWgYpb%F#pyN^@cNpx{}~5iA^S$TcEe_ zqdy5fAsG<;tW3k{gitBs5vg=pSGaAx|p|7cYWiUVF z&U23==_&`9DZVt+Qaucz|DsnOn%L3>&Xk-5SpVk;i}P60#HeI<7)>dQKsqtfehp2< zl5)V0FL<=zP|I1&e=j{~H9 zIy!PPe9m9?UIVbb;_Q`mX)i zC#?2YXa=`PEuT&>6Qto;S&6?z#P9r{Q%+Ne7^3H$&yNmFT6OT9Pj_!1A~ZPNtE=Rn z{&>E|Lq+hqKE^OGFvu48gt@)FeRJG;;&{2I{QIxFCKEy1siLjze_Lc z>+5T@D`SPm^!4?Fh^@r3xw)?Wjpl4`Zf=dcdqQu#uI<4KBih=#kK7|5N2{IWQhq`~ zAK$1O8k*7&w6(?Gjo?n9|2GT876aJB6oqc#(O$Cjm8q7Htgd>Ww0mr)XgD5LHd3*% zVH?*Hg8S4g{cUnwhaENNe7eYc7Ac?IxL)-~nz*<*A7Op*@+A_8>;qULxa{=&Tuf7w z(AdO;-KYniKIH=>-!OxZZNPD7l!!_DhU5IxF+5yNr>iib2_cU${2%Y%C$LcJUxq+@A$dGLz;C$3L69@Hm)|614TUgAzoPdgpVFnD--Q}gi=P6$5>s;H`FRg_&^UA=F# zUtzObD9h;j*pN3yU~GMy;3{0^|%G?Vs~N?95)Nkedn#e z-%C9Y`O~wRA}d-)!$-)w;)?6{a3%#3_7VjPY4-$6I95DeE$aN-S+i1o`2}w9bcrJM zMKG3Oy}raj5W?PE<*R_Zou#brb@8{|;Z2U+E-!=r86l&icX4uBY}D48RCoPD+2_Nf zPqI<5vt#yqe*E}!T&lJ->TwJsxiH_u5Isl<>95ReSIxvLDh6@IBcYdz_R4o1R)g}}nWC1hqsSxpyn z6VdRt)jVEI2-mtDs@pC%y`W1%ac^z^>C4^r!z{*VwZm6hPOk5FtqHrsAHkt*=K~8b z((>~1UbOEP)c(5)%M=NQs_Jy0Nn6)9EG*PD^`vtj_2Ja_eEDFw*0! zC@sE|ll*&bPNbsn6MVaO_-3<~OsqQA^Pid*uN8A8u3y>j-AF7EF)Cz>>=a|2?wQ0X z7s2mO0Y6|Ky>%t_`RTgzce^+52U*#(!|KkB##O)G{pk{;TFov(@$2Ii%9IZ;aBzY; zeD9&P0RsbbdUfT-bymAz3re!IvhqFH0y#Qd#y4+j{h$4^-+9kXHZpN>;kA2T56JVb zz=w(jg8F(1!c!cM<2<(1?DCq-_flDtDRo@oLwM6>tU!&$1exo}pZ4!bVpX^?>?C}S z5OUwk1nQw#OD+&2*O^gI6@W`YL9i9N%{Z(6&+XLG1d$SJe3;_lnEf?Iy}vsB{oxT2 zH;xv*^SSO}pI=-ktE-2#w7i9aizQ;^Q6=&}; z-~f_;2FbqhommI+;#b0#iUl76C#-U1J-z8b9ontoe31-SPmpxs3MIVA+Ne;_|w5}WzMifzc z2)XQ_W0LR%BqrkEy?nWOf6^)CE8B2L7>z`z_Ib7ZUb z#YJtwi&4%RO{OM*q?3&ftK$kPt$6tO>m_-9J>xk7FUZMP7k*OC2ZA864a9@y0m?*_ z24{WXl|!HK^Q~fBWgM|KXUn zc5z|2Q5qBxGC6rr%D{aspjdq*n_5aRG!}*N*QZMNQy&c z7HFhWth2K-kJD}>$^>ba z&M)eFOO33E7{np3^L%h1j>pUVK3B6ELqC6tDl5N$!jmNzF**?kymlqQhaw!1=A@a3 zVpW9z*9wY@pX8r-SAC}US5P@PIcv?wh#g773kzxb1_w8R6QEgR2&0fI7%4^O2@j$b z)NkDV^?mdy;M(h{I`-Y5`$y^6HzTmNox(!l(!lSwNHs3_p`Lp*;bB@)*y#S>UuaWN z*493_@tv#C0sxprQEKTy>Alg1a(s_S>zX_gE%Gj8S%>Tg&9v+^H zC>a2^$#RpE5@U+AynMe!o^Ke~ei9)cLWj09L=Fy)UjjnhvMMXaK?iu&Zoh217#HxkUitI_8yixujZv@tIh@=N zivH6dWu*X+K%1jnIvWfCrM?EcrTDb8@RN>v%(!>|2#Xc-VnIA5BqVg}xn4|>jasRU zjpq_dbC^X@%Ohcu1gMntEjPQ=nhat>L}X-8>|6ixki7H4gFa=z9fJZoB3#Gq z&MPSm_`(vp$$0BGP!lEL|*RPSFSP1}<-FZtIa8LIG`5%gTLh0-#hy$NrK{+2KH8q?=`xSxTUqy8>6 zZl3r*3t*B99qlPD1EvM-8(ds!HTv)0BS7h7b>|Z?c&}Lgc>Y*%kjJ_HPo99{&A*`G zf~OTRk=GZ`UuR@uLI-u|eSO>y(q2hb6|Pb}S|*7OaF^!~IjB;sSl|(aZ2=`!~D9`(gHcfBzU_Zq&HgSJ7GaEoBR-_#wJ>fT6Gy z0q$WlU)BH8z$XyoE)|Hf|NYWx%M3&t9LXpA?+O3Fh3^)dphFCTh8fZy{gPK80*z8) zFW37xP>u1unHg=sUALu}xVY2vzM?MHD9x-bS5kmOZ)v|>5 z)oXt{kS);IB`FG_B>jXRAOzB6vRs1|z{MKi8-V-A5_4N{nvZtHk_wPKCc(kNI?n5T zT2Il?7qzvmSo%!_Xygr8&e?)R9wjgDORRU7*mMfn5j8bz5J$9eadGil_rKaPWbcSy zvgihM8+N5K>*P;bR!uEX>V@X8gH8eC=jR7#aSNAY8S=|Y^%=d)=vM&+!oIyX%ov`- zUT0M4E0}=%Ls^$GIXU6*h*9IxIuI9r97N-JjUGoTqg~LZF2x(1g@h?#*hqm+$Odtr z*`0*~DNG@tsgFqmOK?k!e&qzyYtre#A`BPG z-u`V|J^lGQZ*5M;q5tmc(5W{iJu@H9!_)IW{jT%riWREo7Zw&g?)FQZmh2kc+^3MO z*vn@R2r)W1LqbE>vm83QK`KqQdw;6Xu18&IaqIo|u^gL(B*yptpV6N-&nAF=PP;ju zzJkbmfX;IEOB{=mn|qd>f}E0c%)ZfNzDn+6&8*e^${w08G!R~gT z2HgjhvYA@_UVjrHL2Q4sZSC#`vxmhA-61E@$>YF_ZdFg9bw&KXON$jx`7oN37Nqan zLb#)0&DK}Z;JJ=Y!w=)_>ax6`w3fs@W%m1CAA*8g z6-j3bgo?*nAkyjGVRf%1mrA}|6~m`h-%n{?`5mBjEO%rH`wQ(%SJPfi}^|FB68G6_4mK8)z) zzoUiVkLBv?OAej=dHzrPmS64awQGSVoSdAz)^UH_JDS1CZaqT}+AW%O0lC*6B_19g zUvc?c?|)1|!NJviR+M@X(lkMU)Eu|m+aEnU2{Zcher;4fZ1|&qr;cYzgV=XJKKX;lWH9ApCZ-BSoMD7AB=;7NDZ)eKdj5bg?|3|DjMl0wsjS#m4 zR#w(vAq~+5u3Dk&B3SVszk56Iu&(YZne~9(?YXv$DYaViAT#j`>*zO=XKfDe*GtUEQ{CV?_&M$PJdyK zTq%&jJhs0Bas)n6(b6I~IXOYa3XqrUCns3tv#6~r3UJGMQW8|kzG*4=ZyY=R3EIa+ zc1M@70v7)T2#u#F5`u?KGa!q^0Idhm#i#0q_>xs$!cd7qKV1OH;J~%ClrOU90T#Xa zkOdMz`Z;*`qng^x;1AN@=X;>dS3Hwr;o`fV%wcfm`XYuI_;mx6uc7iKQtzlI21*wYd zG025T-e^=wn3@(+m;SrH9t7Pja=ka4T0r0xx!(;X_`DmT37_XVjlR#>d!MuZmw-5; z(%?iEd<>PQ`RX56#YvTb3^>ir6EK?Y)M$Yz(-#ndT%p@QVsNq$ymfnV+Lz1#3BVso zrM`}4Jq7T+e#_6gmeua9Y|+>Yypt)}-~n!^R0Ck#Q#dHVC5^b2e!)+})n}4{6WyPF zK9fYx#MBpef4y0wA18o>b^MD9drIXpM6{(4EEKB#0BEs9MMa%nPAhJL=HAWRxIATr z^$%Etdy@q>J6R4jPMea+sj0@l@HT`Dyat{Vo`2ujJ>b7%KL`;&sn?FEi!Xct}W%Ik74pGOgfeZu0;g#dl zgJ-s|KiTc=?F}Gm(SZK;7}xU6?N1g+WlhUey#rEB`hHMsX5sJ9-KqGf*FS#O!#^12 zNkmkvFEB~pZfUv$D$i#j6s;(H3*XWv_+P_L&htPT|OmuGwHq1iVzBPXa`P5smhzihxHHq+Sjs znWvLdjcKvlD4je(z#1Gr)k zzptN8{*bzp4b_VtF%K+nnL@3&Jl}H^(A&e8PMSyoN2d1U_d+)dA9g(3c%RMhOAOFk zOiWBk*$D^iaQgXHl5_?ymR}m;DeS?;RDsE$nA`JXlgHTxzRdT|PGMMB*vy9&u%B_U zzUWJhzWelXdRA*P&W})!K?bBXSR>TGWV4#0v0eOu@_aYn2^DM`;8&Ogl_;=QNJU-b zQ;1(cG8j&1|+BndS&Jigdozz*N;l|lyz z4Xb|lV`tCcK6ZaFP0)c~3&>rN*q5%YpGjwZxQc5I{`apShQJW`-Q+}F_x(E*=h{wt z(15<~{$VxE2KwgZ(E=`nq(FCYV8D2(!ER`Dv=8XRh?CA|vKV$LC1J-k5mdS=)kH&- zDFPOe6a99IbDZ_b!2uvUuh-WC;nEY*kp)cFJjr|=T2QJx&lK@?UWzQfL61|+MUUNF zZg`Co`KwZ=QGqW}lKoR3P##f9uLZbCx9%i?K%D|HBylMPB*4_%+!!PopT{W$w1NP8 zTL@G3yT1|#APUT(8qW)JXx0Kuk+8C2P-Dc; z_5G){3_Pr${d^i66!X#aK#;i5O&*BCe?v0X2X;3O(8EoDr4Fko6!V3^*P`6NU5M=r z2Tdusd+kP`O_98>uaAO*V~X8TLt_rGSr5-hLw<+-qr&(@LWJS1%*jd}Ug+nE(}jHB zDrEBof&l%;Gy?db{P82g_k|d!>TGHv1hNkxG|1Ag`NHYn!){?7A0DtlX?1mppaV4x z^;V&Z1;{X{Wibe({A9I(FfmPNJPSSOXcK}L==M$PVdDy@x6zK6vE|>sEiNA=v4@r9 zdZCt;m64Uay*xjM9dv3stZH=xlze@rOcki7Ppj77w#>wu*&PfU z4z>yWX)BhKL&+8)*+!QWe*ASSa}(I+8FGjja$%9UuQxXOqrq*3p>96Z@aOwW^LsdgO+WnY)Zmg8NevL<2Iz7aI&Jw#Dr*?0BWMKq`(583jdjBKKYE1Ic)Zt^;(G zlaLSyW-h0KU|#&82r4+v)`GFzoezc#`U7C#Nc5~b9{=h5{`-#yYiIWKL|!)4#)Ig- zvGWl_r3?=+xo+_0N=ixr$;o)7v(}f#D@!ka+~3|BO%=-AY^NF2826(>5eTRZNicH) zxHVIJnb?ULDFLR?&I{WI2M5Gb3Q~i-D;6=LN7VLiM2wFPC~a-+ze-4{$8o3hp|*N1 z$mpJ)9_$^Cc2T(26_&lh*%Ikk259Z; zSiGP`_W^f+iM|tP$lncium1gA3LhF8!dxM57U~}u*Z_S948_2w-TAyO>9?Gl4Ku=; ztzgywOXqW3gA}TiZ3?um{5g2!c@O~D6bfbJz$5F2LInt?H&vtM|IPv!k7x6P&1OB_ zN>bdY9V83R%Oh`VYs1FF3jzMMfMjdVKgOOAu;@nkJ>Ef`7Vm#Y-T9)yHFir4?td-Tx&k92 zBc0GK;UTYhY#|WfoEFs7U;_cCR6YwxV>cKpEY=H)5!2(LLiMnSum3inf~}8Pm#4j0 zCPVR*(D@SZIG`pOv;~1N6kvyXtxu2V|NadCRviVrg%^N-s#e1VOKCOBp4Pqc>R|EGR$+qA72nHge%S(xJjmLgofW2NZ=gZJ_3uO^Wj|9;&pKRo7i1X z;4vB9o^J!rG5|^jXjaq#LQFEj2;fA59_|B@oX`D;kj(pt9opo3LQt6hgs<2S0imV8%%gIFN`41bjz?b@1&c znzQxWy^OYo5- z@!hWipwd8*LOF)ZjvUle2PQ7G#Ni8^d6~*5E#ngtgTO+-AQDKtwX%|2YYpNdgDhzp zSQB)MF>^;47(T~6!|)TRwrsi|@8&yn_P0khK?h^X-m6VlS=H+-SwmXODmuvk7T?B>P;D*x5XXYHCtxcC2nneNwuf`s(+ zh@VNXsd;$jnB(aR89-AQ1jdgO7;N>}H*W6c`40@Iym12VvJ@H>a1((dF$BKz(AZc% zc%m0%WKfzVQZTs&Ah+_^Yt8C5jEFGX(|V2+$b3riFi0sD4*;np^}kuVySgpKZha+2 z-pq=yz0EGWrw3o(|8DjbC{$qpG9%oaiy|iqDiT1EtONA?j|h5txE)sjPFa5t*q-&~ z-CSHQQQEC)%bn2-&2}$tpr*e}??e5cK?e|0D5>x1?IndPI>TdBZ=5WLCp)S#I@ZV0J6hL({`GZiJ3 zrKvfpm_KtdKT$4-ql<0#dQf+ZgV6(J^un**BZV_>B4gKuWn$NJ+;iLo@W3J927E3l zd5tA_3bC@XN_+ea@-z?lc*gbGVBOxQ*Vns%j+%fpWEt$z<13%WcJaHunm6DCj0cRw zhy*>@^jch@Tmd@h0i&(8Z--*%gM-u4Ng$AHz>F0X6aA+t-!UWu&&RHib4e}0LW@X&`^HcHdtQ=wofeD@_-2p^dFy}9yMq`0S>G8 z?Ei3cR?aBv?BZm{)AF%INCA997FwViOcrq!PZvsTz>$T`oa;Z-5+zBIZ8R8B=;e#=d_Dq^J0e{-PTXE94;CZq>gJCVub>b>2I695?{Yy((WbeFc99BCJ z6J)5GbAh7B1(=(ZnfXCU3HueVeIek)vsGH1e(wNR1hc)BlogFi^$S21m9(^e0$G^B zW&{iH{Urv%BWPM4{}wF3Kv6E|Z3^%OvOvCa0vn6RZm}=#`N2Xmj%+N0(?Y3Ct*XGe zE0>axf|>aJf;gHNc!K0EmzX`(;;DFdcW?+XK)Y z2Q^`nGx7jZf!+VfN9b;k+VB2vV|n5dc=^>cctDMhsapdjb#;l~Ehe0SpFCWijiHYr zT4@K{`CttEEXC;tI|1zkmzr-Ln-w{8c01_mC1`t1%W3U!6u39)sB z%I83@g2EjKZp%;qrwelMKp>TN{&=5k1Jknzu(%)gD|amxS3eePv){T!0Rns0izQrR zzw)-hb|DB%^@0&Gj6ne&t+b{AfOu*drcB_1S1p#K1i=Q%|NkNFtKza+gML*+Q9vXF zX{7}LX(R<{B$XDBZX~5c1*DPg?(QyW>23t+2I+=x-hIw@ajwsWd;j)UWW8%WGxM)m zl(<}cvEfqBH&HD3_`qGGu<%G7)O#3ldf+|r98c(EeV`=)N~SuVuTts-7!iT?W*s-_ zd3br(LArq1_cxGx=M$X50@EIP{{2e=#oYP& zY;^{1H1m99S|2C1?uc=@$&(2Xh*$|64hKcgVDj6;A6cKR=LX5l-rhb*G1q9S>_w_b z03ujZcRXd{3_TMpqOeE}N1vXacN+Hd$iR)oz==?+Fx7%Cgh+~jS1@9H^kmlzR}f&; z-*dZM#ry(sUNQ5RaJIt3!%gRo>2rsNCMxY1m!B-7Ot+2u_b2iP1ucesV=?HnxXF?W ziwwt1L7gx4N9#-A?T(U5eZj~`o~6!xUA==UAtCV;nXtsnhnvkRF+GXL? zl1j!oEn?9GB*w6CISoZG)%=y~`=o!@4HA-VH^=sMvBXo~aCL=<_(-H{!wW4ywu)qI zybL+o9501D0Vu&@Vqz#D9X(@Z4Fjk-oGyvS^<_(}_RDqw*IDGKF&uGMMB5?avHuAr zVe$9ZZ7}@05mfj1Sfp^A1zHbGQh``@vnAt_C5%p>E|Co@l8&J)wr)WGt`DQFTh|9Y+<{J6okHASQF&d_V0E~L#>e3Q|OUr%u z&3}8{QHFjCTb{HQ<}k$i3$fvt>*equ2NyHt7~U~C)_Ij+An*@ zA(UIc`vlVE2g{7!$FQ5MOyMpD0_{a7!Y|_G<$dt*VMGoE`2h+_668k^sV%sYsmjkXP%?;b561#L>dUJu zIh?j-A*Bx=WEB-fzzEeZjRxxyxG5qrbq2p)1EHs+q=YC&#KM9$%om_OdQjR3?zm>d zrCMR?pip{)C9Y!trRxliEh27px&7G|tuAw0f*lQiYQ9d*1Pv9;u zLwiYvi+icrFq7ZH3zQwYWee176)0N>s}*p=-6ZJ!b!RKFkzwUwIR*17RWg!+=WHodX^UoeqPV4;Q6*g-lbF~A z-S~}FD%`2|{jd-6e5dXyDP&xBJ3ZfZI;>1~XANWSFKlnS46=lXJ|Y$HY>6-$%flyi zj_R6RC}qVc`=Jbud+AFyPdHI`V(+KOXT#*wQ8A_k*VR_d{L{h8DB1PYy~@Qaj0gqP z`g>0_W;IyNrf`Jima6BVb`j5-L`Ftts?6vu6x!2QpKgKny$W0y0ztuGg~ovVkpR3# zyL9q*ceiysx-yHUFZT!9aFgSqvl|i;63}n0z?(xjOUy75f%{RlxKnTt!`E^FBme=> zvY+<^Asd44`S<)99@ecHht+uVT{keH9L|;_VXi`KW@!1cW)T{GD*%rm{_Ch8B#z!t z9KiNrkh#H11H#n;$sh6pRRA~(b9g#?g-3Z?ptS>}791{gQBJU%A(1;*d(q|s|NLdeRHaD1W{_u-s=rD z0{t1?e*?XKpibC2rqWPjd-m6_U&a?d8Nr6&fC~S7DC#Of!3HC7$*ZNMmwFuR#- z)@1SfsI&1DS`_GLX|wY3gye8QIDQQvU!_F%!M*QaAx(np4xt(!0Hw^1hlW%#3GX1R zhM?Y`8?V?QErpPDXRNyEfhfelQ=19*?h|*p$Q;J(|NZ+fB)&vY5WWQ7~QGT8PFrwgc+7m-Z~ z_A8L;7_E4PM!@O~)*vW{!N%i?ii#l-5$)iA8k;$+%~Z3*a(piMVyXb`P7o!A)u+@m~) zd3wLNxS09Q232xen#b!>10B8#!r=@Vg9Y-Ss^cz>dufaG)r0>bGWp1&_4B{$>kfb7 z7S9PdusQ?806jy-mx7Yg5}NJ-Oz08_vmpBOg9n!C9FU;GeW~mYYy$=fo^8l&1onh* zhI**7Ac}N67xFL2c;%>H0G``xMr8ybEe@0P>1!(rM7kDHhk$?=5Eu*O)q)454C*xl zHMS;TpdgZra1<@WpFDp2tIpv7(X9&N9rZ8k=Hpuqx4fiNG$iNmk2E|89YryC!_XzRm3E}(x5P2_T!(JdK5l~&BBRP&o|H?i0qLBnusMcM+ahzFJM;@A}l^CQ0B zcTE$m+=;ACz2?OBD34H?pyZ$#k^^!Au8BFy{SPQj(D9f?C;wP-Xr;*G2NAsXjbDI3vM{iM!{2Vsd-?Hk)=nk|JQNrfak(Dqc;+ zlNY|t(8mxkI6c%k^i#R&#aF;_62n{|}U2@w`F;|YbKIUHl7U5Wo zHuFG2ZIkQZRK6#XpVS>@&ZsJHflt#XM4IGqb3Ev^^~#57x5@1H)QD6f7=YqXzvUM| zR6cBO>H4)nu&n-UuY>e#yz#mXYKW08cWI?1`tKh6KLfBlL1=-@rV21!$YoO)mrpH! z05$={!DuiA8R2LFtXM(}oPM`!7MNQ3E6$Kr=<4ofdWr_h(;ie3uv3-slcuI7p3}eI zM|HEavMeYsxZJLJ3LDO8L8=CJh7~U1DUzzSHBk(1We_B2=90fq1bhWn?aE_u>-jSY z2){lEsr~V2c_;bp*Yn-bLR9uxDUvgaYOVhY+6v+s8SkQ-!BGSGW#S2SMwLcbc=#UV z{PrNer^8A>s7mCM)>Jj5a3^zK)0XXAo0E7K21kvQ{0L&8lp6~R2sG(+gKYODe?r|T zL#wm*EwFZkNPv(AKXrys0@qwkQF{oH3Lk%e3T9@ktQ~s@MU+6a7N!oOqe6`M`CZy@ z#IP0#BfpzN)ks$9IG-8mL@v+A z>@~La``C91PF#;;9@UK+rxz9d1Wo zci2O9^wpTx7A_!z}S8PI`AQs z#F*Wj^8xx|N6;!nO%`GF>$Q4gUV=~T&O09|Wc31y0iV<28RU7aUKhh(pbN)1PpvW# z0_uL!$50K>=9MeWiJ6#~-X-$Ihf>P`qy(l%1@us*R3DLJf%YSd)AnS4yRH#tm%l%n zQgprJ(E=P_JrIo$0u-PnB~+xy86K!U=U7$&l%~nKK8@pZuitOXBDXpCR2$;H1_#&G ze7X&k~>x9E>`ghvL5)PB?r?1f$cusTSQ#?=Y}&X zz$>G*v`x{oWWLJj1!txN;y7f!l^iZ6N>}95CpD97bwJu|Tc=<>Lw z482-~F319=vvtuR(<9=_UTFB=CQVl$1Zok`1afI@*JHs~PRFKDXF%W!k1k*iq-Zv} zj_*C7bi)DO<*<>RiC8kAQ_OpV(ga-Q*INu;AsZW3xEMqmgjAHXiwmHPW$24rQ{{f3 zAzfeXy45znQ=okU*alIL`Y?Swe*74hm|bqwFGXaDVSYWDc&iJwBurxe_xAcz$$y83 zX#o}J0OQEuoU>1{N7h{fNP;x`PR;Ly-CY?t202HP&+h^aS*|wF-`}n*KHty3i6ejYNfdpvGd22<{1p8Wb%+EN!4zG=rG^8m@x>=8^@Zg1>)1AyfxlT?)h+Q01&69UmW` z?L)$FF^cl_sq+7XOax+P2{dCT1Z+zQK?R8~Qsl)sUE7SY5U2d^~(}xysK2 zxq`6B$W%Cre`+}Z5HdtlaBu*$^YHWvF6E8QqgQ*%X_z$K_N}W+f={JBMoA;VuT8s} zp`^@s)EBw1q3dUDs=7-lfKh|Q=+3dcm#URUEt0^d-tOXWRP1_rdSX6#(v}mRt6XEx z1kVra@!$SKd=>_-V2a>N5;0Mgp*3K~$!<5-h;xVFa!~MAAn}0p4zz0-^7@5zrTNfl zfnC%1A~I?4tiUc*Y8Qqn*sI^7U#eBE3R5x(k^nHEjf}E4DN_aw*C4C`pGQbU1dD*c z7w8Z8Z&thhLRJB56UF4E`n@^FDxmNp%v+Es^KH%Gz#wc4l^UBD2tzmt)Zv%vHD!uc zl;9LUz5_#w4`zs}6F+6hIo8vszX5%&fntY1)PP_h>Z0)_y|1MO2^fKf!}1egFaC*h zZ6~2zp{&a*E6ji(lti`wp(5~HrxPUy9>_m4BuwryeAiLi(~WKRr?1p&N2NFP*ke`d zjoA=tGX{D+1A9*sz=iF~$XA?>-hcYA*oXKp;k2BZ-5pE4;L)e=bW4qW)&mQQUErzL3gSK%znC6+7FIH<4*sx zGaWFgdm%Z6&uaJ>_yQvNf>6G}d#$4S9H){;M@I*P7mzsg3WTDXm4yPCD?ctYNF!3Q zu!KUe9cDX!)eh`<&;m<48&oq%pQ|MY`3#aM>4k zbNR14z;)8Fe(mUa^mnB5eSw=&n9@H;RLMj2Z#7==U)x+HZr~s%%V&EJiSA7-1|7gc zmvfo(Yc~?spP+ebSVvQa)I3An*&xuI;U1;JiHYRF9>u4k4|?`ian+k(_<@JadUXe}-XR}-}CKu5IIXLL36%cX_EchT44@kcFtgRV>GD;IP5d4u1 z4hk4+0eBkrUlGM|d20C(74;0P8TH9=BqXHH&d&K>jyaga9`M`D27N@Z2ZztBi-OqA z0f{gJWG=v^5P50^*4hF-@?^bJC0%zp=&>-cA$%=WKqYCMO!Kg?;hG?nG`bZQO9T%h z=L&oySGJ05*{O92!=EsE;9vJ3@9S2A<`v}zd?^- z-Yo3L#=>ge{QOdT;@?xn%yi3E!=)cL*GixP-+K%Hh`qP|Op@YQVG?YkB3O$A<#YD$ zb2Bid&fc3(ZZ?$ zolF+%+(%S0#OLO1op3vQJr9T$a}2&J@iuG~)b2EQcl!a|u!6)G$nYEu*XJgP1&i@2 zYYjS&U;3rhL>Q}Ik^u`v!C4xS2%`w*e1MKlY1C(`-!HLnWj6;J5yt+|&4pXRl19bc z>*?}d{l#HOt4QYSZ&g`FZdF2n0CX7Ce*S(pYwJPVq30e+P;0Gdqr2s z!~=mO_FG6!-GbU$0xt^D907I}2qef^9mtMGDdBj+)JcE^LI5G?Kw@D2C~scuoqB$k&q0smUg+~huT%( z0CA1K8ys)kZ-K`3^&t(9OAO3%sQgUuOHu9`JakJ>OKWwFU-Wk>65S>SY6w_Z_7n49 z4us&BG9G_}$34HiEDUav$#QjGo_}TJ%lSr!+6UW%-#aq-=DVm_C>&8FXC3`6T`mgL z{)kgVB954dFtm1J?k~Eq?Scqz1-K?!sHotB7xSNg1piKM3l`h(fWie8^Rpq-M}$WM z9|_1gAaz!FcIhDUGy>Om21C!@(GlvjAAys=33%T9YPpX=(rR4Z#oHA!_XK7Xd=m)0LhRq&?P^og8Dd0$s8CY}*CcLb$Y;yqcml}7 z0>cX|2_V%r5aw$l=c5Q9g)aq&ox0nYSn#K2Z!d>+%w-RmPATt`tttU*brn2t`N}J} zy4R6malRy8xm^C>;X>+@8*l=DP-pOcU2lCbWw-XLWCRC?)RP)J)ADk{l52x&f9GPi z_Hg5~u<#}2^>f<1cbI)K0kwjG?mt)~9| zAb3Efz!}Hu0W;o3kQIcl_W+v;@*7cLy@D9M0E0#!QQ1q8mDpxzF=q48}Ne}8+rVX}S6qZ10VbR|^gtnp+{H2-)h zpv`anCUbg5RO+|bLvbWBC6L@70-69M(E}U+p~WDUY@o+efjRtTdy36>pcHm1{=vrw z9|Eyfq0{cK1$PLsO;=`!)rsx{xDO$xA>3dvKoDDjMVsH#B_ploTuyHP-Q4U3cPuNb z>aob$YS3{Y+Z@Z+L9Er~E9&T1 zmr+k9CI&tJc7Jiv!G1`{+1RucTd>GzSmf?Nsew#(EhW0we@Ta_auZR|ViBPU*!4r0 zgP>pyWXh02BnVtCP(=RKbR#ybBN#RNem;MTK>3Ufr(}pv1{F%p%^eGtvN%^KRwoFBPRnn=g%TWA@Qn<`DX=<^TIXNojv6nJ zI-;oQQIZx)SjSU78pyUbn7#Rge|3Ih0G?nS^X?PpCv&R8S5^ymHO(fxHAAT`*N^$H z3WA;oe-JL9ktCo8YY0%tckl!e>l*v8%JLmC)-S=6(1EiI8;YKg&={!vtO*{Zu%RIh zDBkVk<9J}ffVL!;J6d3^6E7|YeHX$VP;TJw(J(S*!mNPV1W~bG5Q2e&V?eA7ItLyi z)oH^;>~e98;IJzoG8-QHv{Qr-Vq#!m0Lqd4ydf!gSj%F9gk{`Vmj%9NAT9+7VyG>Svzuspabw^ZBQ@}S``Fr zZlw5v(Tb zLyq5y4ImQdtKGu0zc<$ygx@-_6O$bLR?G{-#auDcQeRdluMMT6LZCwHgQeKidR!CGx9AZ89=uOWxcemv~951Cqi46l%N zR!bA{_W@Y$y1(0qAOS)Adw0)i>UqedhLQm2S-o8V=mG=k^i1UO1AnE1Y&ECc(vU446fEKyPV_sd9ZDPmE0fakS7F@_~ zx&*Tw#$yKG-rWa5e7-+2Z9*u~{h(vIp@3^6i-VjlK_vqHD_9JSLE_bjk~u;kycztw z-+{MX?+uC)U9VF|G#uf6X4Lq&&2{^f%}B6cqu!p3DY*7f&Z7OrVKG$fPxE3Z#So&ptDhOAP@jZqV)09Q<&ZXHy?f-(_ro!|J1E9Nj=C+cf!v zvQwS}g)L-h0?;KN`DHLtj!%8};0PJe)Mr)uQrA*uSNix9ow{nd zZd-@Z|a zi3VLeE-Z9l4es59@ zjtL)luz9e(50Tnn8Gdyb{bSb1c~^XKay@sDcf4{wZlwKLOMsXb2|nHML=kmvL9UrcwQsZ0NLi!Nk#9Z3{4DhI%I2Au0U7?aJ|FHavpWK*@+dt; zo3W=qTT#z5F#SyyyWpDIgk-mPLORxFK58}nJ7<+Yc{A4$!EBwxEfA3Bs?Q$nDS(z3 z`_B1yc}PJ5*QC1BI|~*?1kW#U2c+a{x!CRl9Y>>cJ48~RVyNBluAxxRkBn}OSUzd; zH<(P_40hY2a>im<=W7(f3YO|_8SGEi$!1A!`wkeE3&OHnz9Kp6HHyXM*`~+!I%{#^ z>4w`*49{wfee&&OC$5VPJxR%m7GP~&&i>N!&a7s?)l!pa6#CS zxZP4tBSTCxl)*}!)+RBE;|(Sa6WQ|N5l&e{n6FFhu<&_|spaX(SW7^_(97Vb-`)hg z$;W0vM`ulv$@G4ToQxJlef*lAJA!^_Jk77CZqDFtjf=rmAM?y9d^stV_rD0~SV6kx zdqOBKA3RVnMjs;AqNfB%U`2Nr&((i6uev2PG4jr`DoZRZ;^A}OVd1)a0^Lu8;|#Eb zg^&4SuF9m{MOq7U_oo9seT>v^MGYWGMw7%e&+i<17yQ_?kaTPN zV=i3(nU^U2WtwECJ!dHc;r!z+wg+HC+Gh62AI1|J4ldSKUM`aODSs_Jf_6!D)t}NE zA}ofBVo$AI3<3@+bwB-P(rzedw0gj5nMT3_fumQ^zBIJBVKK+SM_O2x&xN1P63HwT zO)h;*cSpaD3!zJM_wTsJa@|YR^W3s$+9HZf{XUmtGnpvD& zbbbYn+lcC6s`h{pE6u}C>~f!Orz@dZx|A8R;;G#;BU)7TAqbo$efgU3RTYc-7yMT^ zG&I)_k<~bU8sxk9IBec4p-=J&6qs|zWHg_COI9stzCUiZDC8sYjPHPw-s0%fo+KycuTeR>ZzXR#OrtH4&^CV)lr^L{7tk_f7@CS$ba+~^qRpx%tLlZR z!WLHDDmL54GgN`!L?CpHi*%3jC&~Bb!I~}>olExEIA`(ikUb{z*GkD&4R0!^(J^0ht?3cV$^e;H@kpFB1|YR)a!Rkd_Ji8M3~t z3CaeR58sCNwtn)b$gf+xRPS(QRqqXQ2+G(OIjQvguzIz$)uLOGR5mhbH_%p?4%b$} zNPr$w__^6lS@BzXVzw47T4TsAp5V zd~4yR{oV0%cP+L_wy4|vdA}Iu*R|%OwF{F!l`6y?F=w)msyp7;somvrLv7C|e0ACH zElljOQz03q`Mr+*p1-1J)Ox9p_3$n$l75Me0N*gX;HO~xtof^O&Lv*$X4BEfBk7Fi zSr%oZz9pY#f&+?Ky&L4T$^}Orej%Jt!VwEXS#`nnuPtFx{kF#a3hm41XD+e9zXTS2 za<#KG<@-mHG_HMXcl6Fv|DRo%QzF5(IsLQG z(|o?FM{nldzrAi}Mf>5r&P|nolD}x@YqF6}usojJ2Y2oFtWhKG%`=;NGM~H`lBwzL zWy|P*O|(wj7JTgM$0!cS-Tfv#rF zq=Iv&_K=c__Hy5GU@zgoj8t4Gl7LNBo5H~4QMIV=FMjVL6y9@LyS^=jhSHwMkYom~ zzYlFA(%tLKCYMN==>(Ffe&@Zz|vf;}73#ieINY$qNaR29W0| zU=p~sJ$Xc2nj~_Hm~H}?n1y;iVaoZ7HQgSfV{HCIVN?sMWdFwfmcA?S@kcOV-8WR< zf6-^4VB+bjIEH-2a1dw9&~XHB>c|`x>hgFcIuUUogIOadY2tAm;XA{z;L(RKT|jgE zSoN`lVVziK;h=fDP4I-~dv!%oWy{l%!;N{h^b4V>)cIps^>Q*+f;NVrlqct_%%0h* z@wh%)HVWd3W$67uedOO7^a7^D;hy}vyWHg06_@h-aW!JY>;k9{!H|_H^3Q|uV z8T8;_sv39Z-rW=)@}fOb45kZlB>NYHy1f{rRSFmcg2B1UU|~R*UDVrPmp~I)}_!Iopys{&RiB{K9Ttw{^bYeecES zH#E}3K4dnLdjq&3a4Vgv+0K*L zX~lKfa%2zuiVPA^lpe>(8KsD!Slat&(BKhbKe=eFyFPCCSwXL|_We)U@(OLZQOS=$ z+Lj34JUz?u^Slh1Z(-+1sUxoid)sfX{`y@+?ApH;yNPVVD+Goon_ z%})KG-r7%clwT^!`^F;Ci8}-HFWT@ZpZtAd=a<5*r{n{i8x95`D7lmOPQ452osGx8 zvkT}dR-x8^VJYw%5{he7$5_&*@U@T%DEX$-I>gJSCyB>$-&md9qZ`)*p8-p0eA`7! zHKlSoUPgcPbfrj)Bnw5*!1ra#yX5zDf{ke+(djB=Oi#bMJ*gFs5*j0_Hz9gK5YSyQ zcqoHoq+nJqxiRSHp`k>CYqV%{r(9$WZ^>7_ay!}Eo8yrt>tT&Kd*0#+o8_o&Vby@! z8uNSIar-%4-fZlwmewAky^eu;4jLswM|$GrcHbAmt%i^9QhUGSaQZA+*^8rP?15GP zp=w0IwPe6#OTuP$rR5m!y>ReErGAF@z%vx_$I%M+7U%Vt0K^&UtornP7fvZl`4IIb z!`mal*TsTR!ZVBJiE#V7;E6VK&djG$&APNDk?LWTSZ^_Dw8&glDqg*w^$>O%64G`P zP8#z)WmC{-AjI-e3_u~oC2zi<_FiIBAv3WqxVU;HyQzpp772IK|M(ME47>s23skrcS~{UsfgM zsGCi)2&8RWcIjYEN-lUv2gu%|Qj*)E3TBrem)+d5-T9fm_9;S{i{Ty3=y3&}Pfs>i zt=xfO%x~K=m-S-PVjKUf1Ml&iI%Xn9e5ns1zkBR>oU+B23Zh>e(qnq`;YEn5;mF`OkdIW_e>mR3C7gtg3yHG@h?~Bn{8E!fhDSRHtNkn zm$j3}Oq#}6le_JHuaE^S>cs36uG16U=I(T)twyDjO{Y^>3r~@2mB-^}e&u;4(t0@J z*J?K`Jht+(c(?3rfoh=D)kY8B?OgAF*?oL8LhI`;WYz-y138#eqCwa?>t2m70&1C7`}+J&*+gyYt1;TBoS>7RNca z$cMFp*#%4(b$Ye5xb(4*JEY)8w8;q3pimko{O0|VY__pPgU3BO^uin&J)xVopKWPJeLKr&8RXP1%G9Nd=yF{>N2*ADr+V>3OU{IYfPKn z0MJ2hia3e+1H9Gbtbx$*AH?m8n|8 zAYk}Jcr8Ppi^$zH7)K(1RCBl%PoLBz`%~r+QIvC2@LZfB$Ntrkc6M>#s4Pb6Ns%@C zNVHHctI|ssR*hCEWz}wX+TJ2=k*RD2eQC!1xFd#+d7ES(%ol9fZcDEfZ#!z(V@5NF z2sKx6GO?ttgnd0gR!wi9B`U2j*Jl*AED&?j$rhVlq3PHgdVk)JePOJjw)D!BNa7=k zffFs+O3T#M>OZe^G6$6vHu0!0y_RFyYHufr1Mx~!sr9)u9sAg=$2kr{jLTGbJ7O%J z#*aizSbgG%n;Ca9dq;NAX)&HxhDF4nP1S>QFe`b;53r7?%uMa-XI z)Y#=$W1HJF+)9Uk*1nyuMLQmk`&s1^={?peCv%K@fRnqiEXmVIgQZeP>3z!NLzX_Z z;ZsDIC=uYQw(;ucs49oqa(hyNQhd1Yp=Dr2?s6Hl=Mvpw8!7We_A%cf;kpWaej2QG z?6Sb{lbHBLLA4r_*s>V2b(^q10cHkwn1mTa0#c@qBEN(Y?^=~hQok$RGsup~aSZNM|Q8NaJBe~ix7VX>QXYWaF@<-a~oBfUU>8t85nOX3n>_C&(;VaAaBqeen&9~W#~G&Squ ztn#%V!eb2uZfNJ9injU+!aqs+c)}nDR<%hl#<5# z8Cw)8DnsJkm51GO=6+iCz4ax$M{D1|zI9mQ+7RY8CV7!NEKZ_e{ayWW&9QqqN@(e$ zQGvTgPdG!GhQb}9a;ZlCVNhSM3yoVoLsmmyQh8>o`*AzDW|Zjj;M3>oj| zP{kInXiw5`;(@wx*c`L>GqAk~jIt1yi6WiQscr#Dg@vaQQHfAQZOo zop=RFFS^m&wj(O^<#%6vnmvEy*Fc};YT3`J&$rIwm|v{bqRWQu;)QxcW}Tb4jF91s;vi9&yB6^Bf~WZK!0^5`vpPcep5nihtfT*=uWbQ5gyn5Jrsjh`(3bFlw2_{oX*i^tt=XPhST3 z3!=n^zwv)x9r=`r zEKNy@gRjDQ2_upIcyJ##2SwL?d;+o~bbs3qcq6|ui|ReSj4{(qeuhy*(8;g=_4Y%F zEz^*WrB~afXK1_Uk7{m1ksPk+XnSXe+oxhC(-__gZIc&88v53DCo5ca@)g_8d4{S| zeecK-k%>aXco|v_WkBJ<1=oovvbqgf=b#UTTJKXkJ)H^tX9lrfd-7lBxl;ZZ?H*RH zp$u)6{bky!jBf8Lo)A;?{XQj{@s?e<~k$0KIH7&1iuM}v1v#olc%a;8)emzl~N~_ym zevVKoD4eS~l}Lw&{hmu?yLkq__{D&`iqGH4Mh~S65g!7XkCArDWqjc_cH;FTyaS_| z^xs=%;zn~Gr=j2Rk3wY}voNQ5QPha^{X0sIWM#eLZeGra()|%71LPosS;^c^7fYo6 zM?*}EtRYc`yyt6Ay+p^QamMUrWiP)9qNdx%xfJ0{&#_4S+WjX>&vv)w;^Xa)eGyP@ zcW{EQ&JEk$WZyF6PVPj7b6)&PcVpI$BKZ@$I}yK2jXj`T%4ACSW}T&%PYZ1=<bX`J>&k)$sB1`iTs8fET)!C{tH zC*s;IB~`gal$%B^UQ^Uv@|C)w>LFrr{7YuN?Rh>FB{EOHe*aeFtp9A9;D?6Ub0-sa z{Ts0hUCNoiH76?~1vDJS&0NO z5Wru`$a`;n@W&)NlHa1^vP7mt9_~xr3r3BXM-25q00Vc>&C+Osr1 z+^MwbO~f8Exc7j9C&Im9g%&5tN22{_q;OZ<-9uGuuWFGCiNWxK6^4VXW+f5;vk zB=5&voF%mXnZd^AGM7$M79a8D?C_Klf)Yl?H{Ra)UY#_Nt@tT-0IiK ze<8ZPdLPxnjr5)Jh`qjB97a=cYiof1r6!hG6zfy~%gUQ=4ZD%FSzGu?rV@R8%#l45 z*X+oP5g%6gSrVGmzpt@c@p*ldyi|T$N*6vtj9au4%9U}S*HPgNRpwdhtyhF%R}<(+2mRr!NVhTQ>ccctjhec`cYPFpFD~igI}GQQBQ1+s6nUx z(R67vPYI(yjTjYUzX?x=oq{m2uANuj*%E0^H#)ArsK5I+h9%!joJ|4sLwB78yjOV9 zCYx~Hzt$hF-Cv*K{=?=|R2Zjfh4xg!{bn-to6YybDyuI)HU@vZxmH9vbJvUgx`j0$ z+)+j4MUT5xqd8CJB5iq8z2VXlPh!iL^VMPaJ=2|tKe-MDR|+(Fj?+a1hjK1*5>~Ia z?*8s5>h)$8lS#WX|D8t3<~!y!f)U3>%+#b*uQYb*Lql=p10^rHqto4%^w7YZlB1JrIh zzg0h#rL%oxR72ZykC`3U9ZTR{m^w=|e{9Fr5wf8dJta4JagAI}i_pfj7gv0wbNX_X z;ICY*rjNe$8aTgVPHrY^Pu8+_E@m%28%egH5u{;8Z zFVJ$B{j`<3ATE!pbzNM%Bc!;IC5!1kVvx@h!TDmh9v+Zc;(eq%}>efYa=5;AN2dBJ3O zy=4xmD!!$Lp76mcN*_z=MZJd9bymjNt?NGAOWwaWPF{SU$IMr|=VoL!k2WVQ?eA}H zm%rw9%5vSu4{TL;Fibt~nQ_Q-u|YK<{iB`!kgF(^nBU&lK*QO;cHD`x+^uCz?a4klWKowfP?Dav-2uchsmIYnKo;unL=0}X3bW{`e)y%r?f7=w?0Zw07&1-r)B_bACBHamV1siSPxNg~u2LBgPEdfXC$e7nRlPzKK;=Hv?4IBM+Kd za`bNaq7dzSo&zWJJhOy5jfF^yM(h zZS9-+d+e`IN^gj8TzFHao5Ofce2v4Xx%*XV`GW87ZZB0MErd!ui}J zEBoj>OHkeB*D>0_#Cs3N)U~aazp#OjyXl|W*guWF zdzA(lv19gs53No4AMY9YAJ#e-a;1bZ*T&qD1td(xD%;%3B^Aq^9~#+kd+i-F6>* z8Tqza@Q{Q^%HoTH{R`SSoj-o{Y$W*&abbloIwq}yb5ddRr0QW~X57BK!349OUHJTZ zcFO)g%Rr*+2LV2x)G`czTJy_V*AQ69f54%uw5dx8FWu`r!l9q~b^N)oW4i7KBmZUV z{;f#h1z}=RJwH zjF5L@%EQ!qfBeqxaMjvgN@bACm1lSw@>_*gDsJaOS`I%?Gg zHWl#A8^_t*|izPjw>#V!=ph&{`$3tOL`?*P!pz^(t z;J}+x*TBn5riROUdyL{sA_qtA3+|@U9RCoC+&a6$7kAt!xm}5vEFR`>ovo7?JLWA; zBK>5&FOxC6F-czm-EQ5G`tvfIqowd*_g~Xf-pK&7N(`VUP1Vji>7~VLQ6U z+Uc>K2rJ`O_DXc)v;$C0xb3ThHOri*Qvdu;rF`V#bi zU$i=QEmp|x@BhjiTS)eb=MQ{3LN{wy&CzE!S)CZqDC8{B>onmqfsr8J6&aZHPu#Ei zLSyrE`g+lu>i25J!Nh9gFLD`GJ;Fj0#U5t=C8Ae?qx_CPOH#NUwvN^WIp3=P{Mff= zUdSwsYxb$zz}JF(cOLbGuh}|N$K|JMj@#ig3}+Yq`~)kK3;i|)d;`6*t{rtr4SBi? zH=M=Gi6TGIuHBX3T_Mffg}s^b2&>A(#rqW{7s_;fL;G2aeWfXJA!&+%byeY4nN6h& zJOA{o#-Ra+)=pTmYWH80Z`ogSx~^*PG}zhLX3fg*wH){344%jKez^Q|Ef@Dz6b^q` z)V_klY0&m+bp3_VQH0#A=66jYhjpS$(c^=jue6urk)>bc499TzP9D&6IQ* zE!e0!OppYHN|xy*Rwqw%T`69#DO#^yOO$T>1Hc|t6!qRZO0DHPSlKYaNUgkst+u} zRcuWYs}I&6nh?(ZtMhkRPp*^e+`2y)xzVPgTW~dV&-(3+rf0RQRCe_0qx8-f9}UcT zaru5VGP1_;SDWcy*;p|;b7qyU;*bV^{!-C;QGLu#OGeUW zqgL^engQY)mDc9#xs~_MR8jTYEU@}Dloj$*HhB(|%<4T^i7>E>y$Jje6MkVA%iVx~ zE;UouG{YNxfH$(u%JFhP%H7AVxcZnSwx?I-Gh3yrlVA1vpsl`9LAYacX;$-$3U;wM z4R4s;pf(HRK6h;={$6}EIp0{`2RdoG6Mc_{vXAL9B2-#FV9n_sPnSYMerWMIFu8u!d3H=Z(5oLOSE#Esr0nbX?gR@74@o;5RfDH}PY z(sDGPHdUPT{X!eY&|aF>JX2#b_iH!FKE9ey!!e8Vg`$@mqGH(!uB}B9RIV*NRWJ`e zf3KztQ(`rDq*>K2J**%ry_B5p`6HUslS5wnKg4}?RNc?B8153>C3u3nTkznJ;O_43 z?hq`vyF+ky=i=_}?hd`=`+KkbqwQ&5U)#Co+{58~?(WR&=+1t2M$0o*E!<)mcuVUS zrWg^g-cr9EMx=@CJr$PU*E}!M8ueLf)f`Ng?-eYUX-r{^|4c4C@&Gs0#k7n{N8|YV z>q?xjCOjMXs;~VeC41_!SKYXp(>Rj-$o}_o{dY8OsBv>|+ME0`iLXrT1bBLj?dURR z?K2*puJPf3zVNH=j*;=c2fF7eq1D<24UcKPnytfhqutA|HAlOad;LGPjx}Cb)rYG# z_vG6fr^^QGyyDC|s%KZ4j2oJ+TT%gafpeS&t2J|ld|LBmNru>)oI1k$W!u?2lTEvO z{nkMFC#t9&e(RCmYyNhuvgBY<)M_;!eIDO*8mFXXpQcrFgN6r?mQ|aN8r5FbsZigl z+cS($!PAc7`&OamR;04;+a9lkgRy6|%xN+}Sst(HdT+(l@H*T~zwp{Ky4S3C{4|@0 z?Beq6K22h-uHQ?RG3$Jx8rXU@a^{}x&YA_Qg>i_Mo&C)wQ`NO~xNN0nbOrs|gKxo+ zOo`BNnuYt@V{Y6P08ZU)bh98B_ma2FG^L#vqkw(L)NMtH&iuJ@slmMM@G$khc(vYD z@n(fvn(Eh8u&{PU>u38<+AGFZk28AH$F*SMon%A$n5%V@E0;vd}@ z538Pgfl2clm^D|a=Fk47oeo;sW>zkD@rUr_rS}0cSDx(Pf7bU0>$ly#G!H0phRdFG zsceqa$6FT49yW_F(maiwlkgH;diXn@*;AiWq^3R0OPlW8s5|Y0=u+BixJRY$v62am zAB?vEw5-(anm8!p4*);%~BIZyQshnkz zQgB$hW@_ctxI!}=@{sY(_4mA|X{zcXx_)2E>SPC=Gf+{I%Z+~iD7jPItCzifttjW0 z1saXxOHAtETKhjuP5hk%JaTNKo0-OqTI@D4s?Hz-9QA zkSdZ9GTx(TP~qt?kOzoc!{Xsev}D}oAwg1n#5ms6JB%v?RN2*l^{q+iFtYA&7 zw1PsQbupa}JUE{JRMt9}L>9RJqrUV-P5xk^Y8UgDjqQ6v(R$$ZlY;$g+%pMSk37V1 zf$7uQl3qfv-;!lE69T^+o0t=_c}n)mkS!sS1_G~1kA6isK$#D6yluKq1$-A`nq_oz zmKXV6eIqz2%f90UAqH| ztOA^=wzUHkVRtptqBe2KUvW3b!E5UQSL|~3XCz+x_*Yg}LC8x}OJVDWu-^QaYy092 z(}jhgondM4o9;wuiY{jyLEMec1NS<6MuSl ziMpZxX}5VerR!~+O%QpYg2(Trw%#(>H>dQ%af#q`vgF7ncdZ5gmglgygLJv3W?tlF z?b4B~``d+MKe=7=*T%hD-EHD>s+iV}EeAfY7w*{X#MHLIm^8{jT*q&v4zzI&e1o1h zfZGd8tFn;fd-d8SQH-ZT(O&2jDxrIBnx$h)@gVkAi^ba5D(9~%?@7#(D2qH8{_;9j zjT47hKCJ;K^MePx6K-5!D8)m{2^|sr+Ioz0dTCdFn>pRJjq|j`RebYcT2!8$k9N$? zCt+v*m}*{RnuNc9okVd&cqe3kmd^9>R_A@kyC?82!y)arUF|3rPsm{sk{I9W5Gn9U z->a-A@ZHfL1dF{eK{%)=F%l+azSU6IsK8!pK=>n{MMLrz^l}p}Nk|%gRPDAHHd^6m zmcXGTN9hcHveZL6ub|l{sJ3Zya=#677#4ZsltIhOMtLI1sv-y z{I?hhsQ&X@uPBh1e;^Xf@PBte3EYw4(a}*BR@O-6e+?*H(ZmQjybf#^I8ciJFSO#i zfkpxNt|dX0*}vgOkqd105M|c(p&qt$|3Y;zKEJE=AH4n_5%Axv(f^bqsd<0OR%SW0 z`ab=`A{?SGs`~2QQodKI{J!v&5#=ENB6ZtzO;fJf_U5?su7qAKFLqkDH{`yzKHK+xN@!-xvGJdNe-*(_{-r4DPWePT< zFuwAnEfeZLI#9X5gq?vjm`9&Bcl`!6eN+g8w)gNWLS{HcX(y8rcA>=~_|Z?d^%lB4zk2^4D;r0o7`f5o3Y@CiF_hnmBPU%vYE2$XLQ z-ZV}%$NfDh(k+Ug&-EGECXjw=`yX914l6^}(RU3osoY|S|FS4a2WS5+a6PGu{zF*U z08Z}1d`)Li6IF+Gro+-vLbQCgfnD-Kjx7AlFmo|VCq5E&R=HCNtzCxH|5+3&uRuf! zLBL(JH)OKz;!IVX9zlCX9%0d{D3V>RPI_z#+cR*veCeo)X%(Gx0#Z#YjDT$U@$2T` z_~tL7ur=j5J%%VVG7BI_2|qYGcw^vmPuTd#cVRT3O|t-YOWzZkDX5;7HLn6z$}Ymr z&G+75bEEQNY0M8seIAmufB^jlc62)1D_Fz}(HDpUT7+k%0#*mTeGcw}KwYOuw$4}- zj!-?V&yoq{fhcrBnzu{NVUNZ^Cuj`rFehY8P&-d8j)@X;DD+_trTB5=uCUQDklKo3 zkT|`e7O6LS>XRGFh=6J02AMo?a4R-_|N7c zH`0ql;gcuLgk$LuT5Jm-?bwqxOSn|3Y%AR#i);O{xCUxNuRQ2G!m`M7J;_8}=CYmU z>YFdijApWmx|`a4gy`lZGejXVikui$Dm;VVi*1B#Z+7a3IdbK+efsyXr#0m+?rp}` zg(e9$oRBL4KtY_2`4GgxTHW~0Zj8xkf8&jf`RuMa?S?yYm-kBD-J=jo^iEpm&TLQR zx*|qMt>7o4!C!;cX>|w@!waK11S7j+On(uxYL+S45%m@Q8V|HD(EteiN$j zw8mM*xG6H<>?1tJ|IYT|W0KkI<^+C-n0y+0$ppSOCcDa3qz=Vdyg6N??4!SaF~t-~ zO*BIo_~PD8+47xu1cebqMf|S%psUHxIJ=(i&1%UD@+Um7FN@%?o)7o>(}*hCl2tAS z!#t7y()Wum)*lO~_?KI5HeHDD`cjpFDK6j+bv-SWs$Du0(4a&Rub)gHJh;qB{{1Wf zV??dIvS3f?L_O5h$!r^r()H0(Nl4)8pHxkD0-yS>VL~6~^?*)e-9s}Qgqq@}7|1o6 zi#1sCQ@IALwI|M61X9o6At8f5az4CB0i-^#@u{fneR}YnpkM!zx9?J^Qi(hVyklK7 zCS#F`5)Ssy&C$xpzG#fc3{Rz!;?Iv|_gWoTVRM3S1D&kBS-QFk(4z2_ZeU{wG5){- z4Zcq5XaOaN7Z9jot2O_uNYi0WRcL3Byh=X#B}rv>SkA z|IGS_Y*Lv|^W6C*aYDG+XAmx=P|L$=NC9Amx!Bdyq;uv$Df0ux2z(}-Mz86hHbQr1 zjk{>kC{5a(KaCK>oZ3Cp?&wKK>=27T9H$g~7st=!&`UExN??l?4?taTuDS8_cPKj^ zV=tQKV%L3nlZfxVJThF_URW<~q%&;&N#}k5NM+M?pp>CW{Z(T4_PYho7~?$6Hc#9k z|8eKM2(M+|??u1L(no!p7JWb$8ATr|xEaFU7NQoU=eA3N(K;g9GUFq#(SR{j0WoXa zhdtQzyg*48<0Kz|gV=_Qj=~?j_+|}K;qa{z87y6CW0ihvcx=%>egL$_8ry19{~#bG zH@ansZPceMUrrNp?IdK^99qJERyj{&kS?ee|Crv9u90~Ipqzbl+BIKy&8k8)g#8r& zoR5O_BZy<7g`gLZTud9BWf*c~FfgF=*fTkOP2_XbP!5gQvpF0}Xo=V9 zZJLA!3}KRY{<^#ZXrfkkrv4imYB3wB;%KZ~So;eEmgSs&9kd61i0q2|Xf%BtY7Mzo zKBwlA7?=#7=H?62elV3PjvO|25LW2mHLZPJlc>wiPI)M5qA-FNQ6{^roi%J`un7kv z?7tI%{k7(`^7p6?{V*o>M7}R+A)!cv-Yia8>K({jr)apF3S}$3hjLdK7fbcJ1_GD! z*^NGbC~c6wNgMXo{uqoDhsP)jT6)s8s3epjh~lNz=UutBRB=Hz*b#=B++K)z{h;&gfH&7a%ciah%NT$URKf6F7y;zstdD4{kfbVA? zncLiKV6y@BhY7Y=^uvs0Vsbn-T)YufZD)#)H;AmL| z#mWlF2s(_~R6!m&eVc{ffcKlUvScpPn?ot2@xLJO>z5UBGB~5odfTA#Up3-*5!HpEN=>tfwOqv%Zgf*`tKy@e#FTa-;)p~i zpqRd?g%J>Hco*MJ*)-xPk+Kb2GN?2j)5z7RkAWng2nNWd& z_`w;w&EH-7$4Qtt-kOnfw=5Yg8MRToD9O0ZxSRZM^SvkL-C&04A;O|YMQB|E@8`Y0 zMTGmU)-*4Ul()y{8EKi`g4`8(XZ&eUn(p>iS^r$_1k6$S11IQVJg?~5UzSDXDiEgV z$c{2~m#&U7jk%n3R!om&bx<5-_hzyk<>rThYfP~Y!HoLf+}4{&{EeL+3K8`wq+l_G3cJ2--C zV<6@PpZZp(aK@x^=%4roD>~~(jd7LW(__gBj}{Y(;-mqNy#au7F1hu7;;m|h^?E%= zQ=tO4Q}(6C2$4slAQ2k$BR_`r+Zp@Lal=x(KxAYrFr+W{117Jhn?HD|@K)}7P=SDJ z*cG1%d@Ogi8+;lEuGd82Iyf~yycKlbD!7RIrh(>7y-}r4PT(gVILbRD&n<@5=nkY6 z!HsAz!(=bq%)t4tJ%xrZIAv}N&DbqU0%yOnZ0Yg}z`7>?230{<>n1*JFF@F=B5?N2@j5IIb)|Z>0ONkp#uWj|+VtRlv`NsmQV^!< zGCSvtfCMKS`hc|5^85Kawwd$^=>RGEKT+oR(a*|NFEYo5{X~NO^jVxRqDTh&YSS&p zS=n>-8F;M?->N}tg*v7dX@@-X2HECe!LOkk;p#V_0*W*V)=nj^%i;z84}wUo zvR(2AdU~2K)m90qtUkVjMXVhlx5xrkUpIVe@v>g^tC(gA8?O^DrtZ7xTUC*}hiB*B zX~+G~x1%jfMPxe0)jCesH}i=C&npYD?Ak#_&i~LIcrM*_w|j$TTBgLnOTOb?!RL$+ z6#2)gXFmm4cHASxm)_aBIDethM*EYQxVbUS5QMo$5~mGfrX8@wO*{%R)n%#TTit@7 z#AiV}S5KYuQ;Cgc5nXMcr}aj}1b!61tkPhAQ*IuSXom-aoE;fpAK!U}x?#Cc$M}Mc z6|+zs2K{Eujc|KOrxs~*Ku?(`&J+o(Orjg03y7K>1R|ShM|N2Bu(_C$;=5zw{9kESha;_^3kTkybipWB(?94!c)hhp|DL~i{(h$ zD)$SvLXfG_i%&Xk=ri1@^P~u&18h0Mg`b* zFjNN+eB>ciRp;Kj;^f>xJ+y-hI)Z*?FRkol_V_01pVNDOPV)wfQUXJwdc&kanB_C1 z1<##oP9AfGsTSJ3A_!`eMVJvbe8(tL18>YS;Z1vgK2R%(K;^hbV7HmJFt1`r1g%rgwqo_zsw_7zsf5#dr6jN> z=`r%rbr&GG&~C{YyaAd>I3|tytXbq$(xwuwtc}VnV(R|ZZz_+@_4AGw#~lH8`Bg{o zf7lbRRjZJx)ZQIl@VsM+Nj&RVzd90$u^G_gW7G61!#ZZGtt}~MbIdfbw;)8wjmkd_ zWOXdhbj7geBH;U%w7p;}eHwI) zGCBoTslJVe(42{qplQG~yb8g2M^vMGOLR7mKkT}obfxX{6t@B2nM^6L zh%3^sJmJ*I6Bhoode1)7W;(dEc8ECqrIww0q2khF(&n<=N7=y%rAV#6vp zt!m^%A_(nIE;vq$Q1HxQI=2+&@5XPKGIIz3`1)UOmZ4IZU6`d%McN=Qh!b+K##%@O zSe{&)FXqwi$kB+LA`=ss-^ZaIqQ0$CiT1*25%zx@s`sU6{*1lttNK3eM0z)Uw~7}B_8N_8Mlp4BY&*PR1BK)Pp5TRRcO^89zd={^c6A=yeBPD4nWu*y z^O-db^4qp=9-V)!Hxaf9eFXUTjDVZ_?7LlGTX12hT%a_;Wz+<-|j_8SsOTjqeTnQ5qsc?Im|laW}F&C$kC;* zE&G$gVVvmOf}$XVoO^Vq*%xk`Se|ceM@3)5d%@ZngZjh|!^b-qi~Og4ek0(t(aNDa z{a)8m;)$pAIsf4`j?_E{(K&6&w9^>@_!cnK)Cip~0Bn$^UO{lPs+k4@-=DeJDJ(*D zq!hfIBLYn2iqhs{J-4!D5kpQ6M)b1_e3qqub=Tko%hBzR8KiwJBA4W6h8+W8dZV50 zDd^ATAaH!};R&)Tup)dKX61ryox_o8RU9)0O-N=kNY!g9%dG6(`R&l!bb~Zu3mzzW z_pGBR zUrEcJ@Td!`V~`$?KlAtJbX{L8H9%dpiJHAY;`73=Ae{&Cs(}BLRhONopZ)bj5F$l= zC)kF7rF6D~%{jKbPJ|sX?ulHcZp1pf@=Tw*Y<)exVZexvr8YWt_VU@G-iMTQ1|1J3 zEhv|Jh6s4)(K&HiBX|NWxQ5(|YteJb>;R(*q~onzG$IG_JM|oqzzCg5BXTR0PC&{& zWcD2~I=T-je8M|O*l{@keJDA4F0uaW!V!2jr~@61 zN_%@&oKG_=xeuu&qJg^F4Lj>08 zcm#`{5Pn43GRvsT6hHaf+EF^Ik&b5$`J!VvunVdeW!m=K>kQm)pCgF#7dfv9!Lco6lblUEELvsn`E)a#)$XrWSos*a5 zE1O{g7&gAc*Uk^{5gylndmSfc1$SG{W*)O6VUprA;r1?v|2*vrwrff@Ve%R}*lSM{ zxK%w7?#5wnbFu5`XbX18*sQjU@yLKTYKvjvn~;#52u|(o8ct{x9wPPiBjpqw0qxTc zZmKEwZF+hVo_$5YodHM!(@=w91z>4_3F<7BxF;c_{1MAW%Jbo)>{!6^v4@q5;bXs? zMGJmVFFIRR*)|vWQ4fvz^UrTap#?SJ|2~`8Ypx0MPhuA{B>W#JLh{Cc)Ym+U3|HYa zBmY)Olcij;(j1S4&*crwB!+MouRfB|27c^K#u9xsgRS3It#jm`Vup;m!FT!H>~ij@ zm^||h4wF!mWvZk;rtv$ZFQ z7uR&I0+EWaes*YUh1WURjq*_Hs1T{oumz?mxtfoIifDMV|0-xA7_MpRA1h_c1Gj1Z z!he2QHkQK97Qo;~@TH=GHJO@{2d$p} zV{h{>{O6a1|9d(0|51(kKWRWR&0{d2ybKCU^u{Fy)Vwn`NepnWI^0T4y>cKj?c-v5 zR#rloy3vc^=r#JBo@_U7iMj6G=+)vLpr=a~k-$~=+r8Wl?hn#}6?wzjZ z?$tS$*}Ph?$6vASHR4~2!{ggwrTOmOtK#O?Y-vhELc|e1)QiHGW2TkSeYw)~P&ohfVOKDa84!venw&hE(lwet#6k!>bvYfn zlhlCSI6nAmz(8`$;np6i{sAO#{GAkeVAWzU&j1yHOoyXP zdCU(T?i#FBo;ir3WsIyk+<0SDg2YgXWGIwkHNDM2qqV!mT5D5bK<*gN1wP%y9>Hkz zd#??dKo-_JH1EqjC;xfcFJ9csvhL)au|sz{^C`EI@aBF9o^S;!8Y#U3G`hC6E<8%o zIT$i%3I5wie{W1tHKs*)qv&MI=#*G-J=7bg)CYBU z6VH7($HJDwYIg37U7a^n=&@d5CVNYNn^7sFwXf`}ZJ#V!x))2%Q_*C#)2yIe--~`p z%6o30*{pH+yY?Yfw2W1GQ4;IQ4$?W=Y98E6RHZJhYh>$J?H#twIVoB> z*y;cAKdzAMWa#oPPJQP)+)j6P3tn&Q)ZJ*C6>+jcEoS!hk#N`dd18NILEw5w{~4ru zoxTH*?2pR`=}87@qwtL)CO`zmm4P%d3$KYR!=X`$Ug3`txPRu8$%Nf%ofO%O#c?qb zyhtCx=lulp9Lcc{ATFHfa!iIAn{b}RV>o=u3Pz@TDCRB&eE^m`{+?cl`!OxSUxUdh zApwDkXw|wZa>|`3HmS@8I!?k)UYh2j_e0WnEVHZCQ1A$;R`KU78mWo|v7sQGXweL znl13;xVsP>FqSGf7K^U7ez4J@)$~nyW@@q#fP|Ag`-j*UW~x@&)u@qveLxwG)+AWBotiA zpVADN;L~>2?UHlfQOFtqfeq)^r{A-R?Lq{SgalS z1~^bk64}I|T`ni3?-Mul_64q%x7utK1BH*)b4#uXy5bS$1o>5vAWJ47oo#ii0e zxo-E#9QE4=*rTA}pndp{(iD+1TJRaHraFY0K7WX=tchYwka zI@&y&41-P7k@<}EnLO*GmiM9%LvLEmTqmT0lp6q9-NiOK>Z)_&DFPge#`G191pEfm z8b@j!o1q$^%B;sd`hj+=~=U5CzdCZ2oC2z&CDtG0E;z9{6FVbW0&wr|@YW;%enB zh_0mdUjF>K(rb%UuK+Teb#C&5}$8+ z7(9*geH-N!G;?mS-HM-;;^G|X6C<>VONSz(i1ieM<21IE_Ct$H>gdM7d~s@99JBWF z46Gm&&)|;R^-k$NnLvuY)&z`SUiWj?r!X?NCRm7OhqGWoa5tN}btGz$`U<$4BBW#>QY7C*TCHDKZLJ<4LtFNwjG#h3~xW)Rr65*KON-Fy((mUDJ zQ_Rrte2G~MU!iHU54z2WjZIQM+Ue%zYv#MSZqatwZXA;sO0VbJq*Zp&&&(WY^Wh(~ z;eTQX-5~Bfrv?XTqQRXJ3;{Y;B~IyIde#cA=g=`}s{%DGfsY$QW8q2@`>MjP(6lTM zmE}zsI~ImNWj<~d=7#O02yRyITy&oKQ@7q8)2$W5$GWT{?5!UbR1sH?zsRv>Lp6<{ zN?084efyn>ywv5I8~50-WZs81L}$Cja~l5cs=@A`CLAYO>3p$wtb71NX16-}WyS6p z;W*;)kN3Zw1>p6+ZA#$ec<=u0yJR|#V5N#iIN`+psn@E4hiew9Ltx9Yv5ZwmHFG~v>Fnyf2VN%6`g-G>ieGQ7k@RUxRHnJIE;+KXF zo;G0j1)yTpSau1{4~@M6_i9@v2hNcw;#{?hI8DR+SJV-XyztcYl$eUgw3JwO>umNMt8wAUvUc%H1@y7@+``pDZ$VBBR#i6m*b}^Zz zrI+S6^0QcE;1A85Z4Kc7u@wU@A7TGARA}q@0KXhniM%DVcK`EdXg>p=46P40seTx` zQ+T^|e$=-f#4fP z$ry6(kY$db)ewSuWSPWkc02K++p29z9J^htDhe(Q&7&w5``D%J(bh#+(o3*R^+{a{ z@9N82pc5U7D!sq7GRG|u#aM?pDZ8c1a_w+NNDaHdtn6};C9E{JW8hrIjqf^}jsZFI z@@6id)^LgNMdryyo${041~FoXxP&|9W`?|r;g^psPeKL2SJU5<5a%Q zW$=PI#Y7-mK(vt^D;*f9$svC&hIh0jj*RLsIGh5tUc>WmP{$fqFidw8t}rA6Dy}mp zrjT?$$1o*APO&*!Gmn^}!9_%%)I{sD(r+IGQ1FuR;P~0~>rpg8_3ZV0yw(Wzfa3k^i zji*6Fb+UP5`m7^Caa&odz(3g1Z06?9@to;p^)lT~air8-1b^-@$@N2Iiw1g)X8Ab7 z3O5JLB2E&k`7#k(DbhN}*EyrBIJ^h84YkqAO2Ng_ZWY5Ul?3w|y3wXOcx9KgCD6n( z*PK`!>37}XD0OWDpm!Qejtf;H8MGHZ-rawu&5OK*$o@Fhi|%*R&`IS_89gT_u)2Ne zQti8!3$?4L22XIo>T;e$ul@Zgg=Hrr^6gQHtj}k}ZgV|h!0Log$hrMp4!Yg?dCpzV zDfH^}Xt8!}|6={eA-cbuZKvm~OfGSn!Chq`Y8~P}Z zHngm4I_z0zw;Tv_I{TxvxO_;Z$4DBi2AV7ThubcHRZ?osvY^;C;Z{Q{BN0CFB%HNz zo?YZvh7ev7*G34V{6O!w#8fvuq>9-~FJ!M+(dcTL!F+he?-Nr!yMz|fInbhRzlP>} zpksX9P29ww4GQk1#mVx|-}A_L<`=QpOg;I>64z&i%<&BF=&d(~qz_w~l% z+ljKu2T+dKKt&7UVr99T^2>wg`U%yJTTDw3Cmd`cay@|u!U8_hE+$Ba*0sJsJ=ymS zsj+T!xOJEd(N$vVCRE#iK3-S=ofD&bQF=ah9@#f$`k0_d33+HPlUn`L_;*BaKLZUC_cxN2D|B6BxU;h3} zTJcRFdvN-vwi$wEDKD$U|v3x1{VNE_oNotk5}O(Ey?KT6>|Vq)rH)pQ0=zk4a7tHu8;oRPnsUlYQdw zyn!pvJput9Qd0K+C`u>8xixdX;;AFrQEASU^jdbNePpkYu zMIpOSdf;|DG=tx|kEEGEfZC$xzhZ0yMYUNV89b{=$Mt)mKjEPPSgz#y$UMO5*9?+jT} z=6Lc)!VNBoIVIT9{oZT$&hKvhywdMT;r7;yw`!6F3PuJVLoymo0v?(zlH4O+O(Pa& z5!pX~>B5t2ux4k^Tnw0*nVBnZu5&aufB$IZyEo#Iv^qLkP+jAiOtyXL!mHI+Z!u6! z!%3qSk0V}7#Z*f*1tS=(1CDsJ>;)nYMl25I5wnx$4eZM2E1KD`&$-V7{BX{m9aO6= zM{2>%nR}cwhRHpH^#Oiij;wbUUsF9`F zZ?1g8Ry@0|M{LI@0JEq}y?Aka0`m9HucKdnXPZFy@e9qg6N|w6m-nZ(sT8SwV@MBiQ!;oAi%~L$iIR?ti}E^Losk;Fu2TPIEPS#o6v>!qOhr=h9I+QGyHLHq1K?%%Lu!nsog9!S+MH*i!g2xP@L9UX3Q5}TA2H3*BXm49SXMRSo>&hnRZcU}wLBvu2j*^fDB zo)B)eENwWU1hdqhG0D`@wPNK;Cb8cc`r6~XwsWRt;F!255B_>_^jm|N0FKF_R_?R1 z@C!nQ;@36(6(%zc(x1>dzkIDl59wDEBQwOFh(@G-jzm-CK#dqo+D$meW&rglcpzgN zXBgN6g)ozi32J*Dtat3GezlFY-_Z(rpOyz)oSDyJylO(0_ zWmWN5g$BUgtADKCyd;Mv8w^i#Ffmg;fr?TUM^PLtHBz#ZcN2F*3YCFj$|aiG0rfuarqE z43&q-NK^1?;fJhm#;s+0nAd>#Q{1Iy={4QaqHprpFC#FMrqIIS#me@1cingm0j|`N zTlJaQ?Wax^ntK%n5`;)7)T;JVUBt%iw9NK30&|$K{5cboZEIRiQ^dBpz5u$N#@{gU zI7?ZCQyka)kc^_G{fH*Ya(Pt@}Tq8K@g4a5pYH?il%Xv(-r^`2tn!ce@8p z#4EPmET3f({@}qCM*Uu+2y@_>2L$?p4M9Tt@#$s8!6vE?CG+SfvaKy@U`6!-q{AP& zV%6LB_WyK*P4Vf*YS~p0MEjvm67my3&wCqr#2Z_UYaH?d8)EP{`5ROH)p$4wZFOBY zhVSQ2@^-Z0msrdu-h>6_vq>=GwaCz>m0tBa%(r$yp}-=ahEe67xTg%Yy*UQV{i7=P zSu!(@w=2%J13YcMyRW>*O&ap_^(EYpD59Z8Y$Yu-Zd93AE~ZG%D`?IuUZ%9B>Xw@7 zjmfiqx_0~S_KOiHVQOt?bD4yS3YH`lKY}-dR(vLj|6;Lg_bI>MT5fmVuaj zCi?)eZ7M$RYN_#fgU;uHI`24+S25qZmf(D&L=*E)uOcrd(uLwv(4hXYO?{n4$*ezPC0!THLbj&3z zj0@UV9G@hsR~jw}?i744GYyj#1F;16)Pnv*Q3(Zy2S#@@5%e$-r1Ds=jWAwa6X5QB zh&7>m)W2AX`vtu+{K%#KPIUwh^IGG%Li#E~^hG||=C(;|r^Yu@w;cL~3 z7xW-Hh{KSdXG@392>sgA$r8;rrh#{JpES#l$CEg*@XW-%tig@wT@{8KM48_pKbJbe zhnwO#!%JySHo1;eT*W&{w6*jecfc~(e9vzaD#1x#99zt@+7o{v5vyLpwzHtnQvLk$ zEu=!laLH+IHAeEWer1Hot7rT$`&8Bi4~@F@kvERi*#G9{>FYTR)ZilE^{d!cIezg9 z(v@CP#QQYK;Q;Fs-#IBR3JR$G5Xqx_R4BFU)ad80bF_C3n^_1=c4CWTYJk=h^3q_O z7e9q2aEk@Nt6dFndQ@n9nY(Z8&YTqIP3PD;T zE(AYX?iFkSNpx7xR5ugz8^{-86uNxMFmlzmND(7{LRX-Zb(!gw7}?xBfrlr|1XSi-GvuyL#}uhXCWYE?zZ3TP(7Vjz zX(Kj`#>LqD?{i+I7YK$Qiv;wUB+Xqj<;16ABNbV0_1KO)$qs^XA$_mHBi)+I@K9)x z!gF`PhNc%659k~J_yFLP?|(bf=C_YdKOcD<)Wmwyrcx37oQu779C zhq(m8@V}G&mv$ia{}s`H(Cx`Z7?-D*_K}kF<~gnu7iJLD#*Xd|d_;&`p(e9^+iNOZm7?&t^&1)H#*H#qfNhc&qqLrv z_qB^Qiip}X_zfE@Kyxh4_RM(8otB7%mtL6D?o)LrLLz-K-a7U0&AUiaMZ{Qs;#HD+ z?3sUFJ6Am6B*%5I*9&BLD!;0cndvt3l|YicG5J}0ZSTXXXvI9rg>&%@lF+7TLX&7W zayaU#5GdE#BAKXJ?fjBGvVzP#(rpaLC7Rb#iw6WNC~TEuYJM_UM|*>s^|J8wg}L?rcB%^h9#e07a?OnDBDqTG;Jrb zXiq|U0>5gOse1o(Y;|{PlggkO26*lOQiEy>Zmn<$UfZD}|Uwsb;h%iWQG^}jsGhzz`FW8K3N;N(* zi`r4^ApXRSrTf4j)e%o&Tj|v1$h!{c6b!#Sr)9og&MJNOF{ZK(f-$QJFj5Ds)w7>+ zoXqU4uVQhrj(M&Q%G5 zPQL*C7@bO1YcwGfz`+!=W^xglB}(2MB7Skm9)Dae)FED_`b|V{E;X1U4wK{B4(pUTannE7|z>khp|oBD-#5y+gs6Z-xa3Hc8$(GG^MyegYb1=`mI8SCq4xX(HuA z&RRe!{|I|KsA4Yvq9Wmrh7q#|D(|?MOPMyK?TNKcU)>3b$Vk>9lFkr`Lt8K0(eH5fM-d zIq)7Cp9D)v0kD>}rpRiQuzsTYs9g~g2ZwXfxQ_{^Zs80s$BbQXjx4^abE5gWt+fr= z9|;n5r8k<-q*}F&vt6~Dp$BG+OD;GoiHG3&i-Fa)nKG%58|Wn7Dzmq|v}rCF`K+G2 zwv;^+j0`x@sO9}6x`YQc>c}K-)Ak2mRAPy2QiMd1c+)EMU=z{>ye}n`7vPcG0-^qA z-tC&9c6yS5a%^}I9%|mJwHu(f+bdvpcfF7qbjZ2fi-n2R#MC~5a4wRpZkPl573&8+ zmOVW}_UP6A^`xDE&!MNdJ-w}kmnUP7!O!-*dXU0*CgB0j$f#bXnmQ*7=jlwP_g+fk zO+A|5uQuG8Wj_a6bXnJLZ$R(WvCQk^1Y88R;k`c*zYU-~$fx{Sb^Cg9&ts74ywFeR zewu&n@$h|iMd3Hji)j4ACb8xpo=}6=M{u)y%E*`YsQvo%5ti8g3X~9D4BbZUZyj~r z?)1Cj%D2#V#y*PPxH;P%Ne@yfp@GBain-JgWF-cQzi}SIVP1uBFOlIRJX^UJf45+$ z38#i0DvXN!kZJvtCsLjCGQ^bI@YUQwN)h{Inqq692VqrxXE?HYxQv|L$Eh}yp7*@) zJt8yNo2@f&>Tzch}(V?iSqL-Q8V-LvU%_-CcsE zp>cP2cZbv8-e;e4?-+O7_XqT=)vIdGXV!vqdC87pZUF7mo|0wEl{>va3*Ok}v>MQN z`ZT}SVo}(#^mtGH>NVA;qQDYMuloa z$vf^oOVz#8zSq1b%UdT0HPyKK`Gka@EXH2e66Pm~X4%35s#nIC>ntgWz=3O$t# zPiduSiUkIXzNn|x!%)GV1j=+CumG6SMM`=V@=Fa&-J2SyEV=Ra2bChC0N7;~B6kEy z34RO*!N9p-gGiE2&p@NdXcp>6H_tc zt1-xexc>cse@qrZb0sWK#l!@9s1+jhzF@rqd=FZV{fC90Hosh95n|p2X+Z}ItJSB^ z)L(Kqk_C0ljrEHjD^Yf~p~yB3NUWmneA!1)mbX?`8T=-{9$TU%(nZY0V)%R22m11X zrliBlC=E|BS<_OVw_Z9P|329LwEsz@TnPwr5$^EzErVYQAD<}_-nr+|d#Qqo8e=fv zA~HSl%$3z>0f@|Hnftxe|5}|-NfjD%jFgPP(AFL*SWUK1?#S33;B+a9sXncvBsG=P zkSa&<1}vm!OfaRb?k%%_)s_$UbX`)AZ4BIN!$zQN)NmWFsqK6g2gKYwnJ_)?vY`QY zF8@lhhs#$su^}Y1O8b?ux{r-ax%kJ7S=IbGP55%M{euepinO7NGI{i9BrF2a4rQH> z+m0H&>_1-NRe%j2KZ$2wnyxlUM~2a117|Cu8?INnUtdV1bj5vvebrklOw#P|LnmeP<^3Xge) zLi;obkBZm(TUwAUurG=ReO9XTbc01t>EAYJsuIb_Uzwn1*DD|E!dTXfWfQ|kwy9?{ zK{F~kcAdn>Sv%S&?o2lZ>u@+tpU@$i?Illv3R&J8Wr4(9w4uGHEm0|lCWLMkKq~f` zCsNdINtYRo=^^PlT$@=OrRit3E_>z`Qf?KyWJFZWoZj78frrFtK=;FkK}@4@VDM2T zW*u6Uh~I*32avY7e`lW?)vBJp#^Ny4@ZB{kP-vB?>1yAFL*=t$(AJ|!25WJ-Dy~-7 z!%A+EGu5J`v;jE*3B!!y{RNe^8dsDG{VyaWV%8pb8@|Pxqe{I%%Gpm?5^o>BvL54C zxt{VuqFAP>4jztkHQIL5VGU0zWQ72Fq02NVxp$S^2f?VRRaL{YS6VfMZK(QNrSNzz zxew1MpyHQ(W#2pB@+vBhgPyUE3L12FFH#9+Vl4ypPnEoYM+9D)N zL_rKN^QUSS>2hmz zkwky2{KP`zJsQPGfAEr%(c_R~$piopX9yjoyPKz_V1yW%9&X@g>i&tu)>mPAla8Ee z33d~(C;^mk(fA&qCKWfxV#HF*3&#=pQAh=pw3~09xtvLaKpKX7GRIXVq^wW#l-}&o zQ(s_V9>_1AxVckzVSH5<=|nNG*VUJRFE$4X0y-9RSB`dubF{s=E{VWRKBrVzv z?MyX_2-%6|=;~z>KK6LYdY+4tsKX5z3ju@UHyw?cfYN4_tOE@sshU#z(z6NVv!DnI4E^THD&`eN{HmZ&+Sqv7vT zFhZu-t75_j7WvZ$=`Q=Es+~xdh!5-1tc21bM}JS6i8W=PVylP`UeQGm`j6?q49;$) z#gShJNMEe^If%1tUf%JsiOkv`a-&giUG|y){fxYHz7}C1*SM$!V$l(D7S2Ywbr-2) zA)9)Di~6h{0~7mBL2BnP`_I7$GqDhMEIzxoC@My3bbE{Et)!?6c(GS@82K>n*nv}r z^rdpXR!K3K+?zsQ>E5Fq z$A4##>MK1e(NXB>X6`A7`uF+J&@d5b1F2Cylc_^;kX#nku{%vfHA_xa6Y>Sq6nlEb zj8(jxwAykhjK1RVP&V9E()H$gNHGR`7Aa%V-n3{DELNXprC68{K@8Q`+>52FcXF8@ z#r+Fnw7erkzSW%ByD9rpAa;3+0)y0Ev|(W6Ix5V)MfKr})BbQ@BtyVk{Q=5L~36JYMh zujLzaD_BAQNuVVZ{dD9<8Oofzo3=Y zhg_#MqJ!&Dv6rknf_Cx5wLf<9g--a3I zme{YjelNdkGQ(MA(!x~2N^5^|_OF&dP#0tH9a30K0F5^^jw`c*`~tkr5zo))(K2{5=SQiBvbw@QgmQ(x{zX69k$CVxrpS3DEATVlb?vyK`Y(E)&IE% zl#xtA_=}E(r(KIqs{Q>X>~?_v|nY*eAeRwA5S%f-E1iXatlxKgbDp* z%sv3Iza~`K6U@7vM-S{Y478D|BTQ9CjPEdVfb%h%Gzr>sM>9g0oRtIT%nrvfROs^f z2(tjqwY77ms+%J$U#9NA`)SU_dZ@{J1bD|fN;{=cgEfoVW>`nq>eDhWt+-_-_A|yu zt4iKRA*WgBP8shP${F&!p`C@NJl!w8LW6Bx|57R+v~!L=%qQ?Qj*sW1DRuWgApBsV zXD%6TH*_j_VJLwxBW-;N4teI3p86wImU z8cH$XvvSKTV&`a3&`Ee5>~@_G5^_DZ5~NfrA!4z2-Nwwn{VKSM9=h*iY=vJsho=&t zav_E)_wg#RP=$$?#`gG5DH}czUe<_{Tuy5yOaB|x#~J(&I(m?{a^pMH_KzaL3FiBx z2{~XiiPwnm>F*hzL4`fLWYuBQgA%#Tv^S}8s9Cb1+eT%#fc2^wL2@-91^MMtWcBzR zGVD>Cbq#F5a!r*EeF3|SP=2FqVAs&!3c6K>NP&Vd#M-obQRiNXuB?J=eeJZ_b{~e7 zR?$HP+O>~mb4b|e7Y9rK@VF>!R+!}7V!iF^n^tK8G^DxOQ}!_x+VuiZ=od_f01v+) z7k6Ruhic;Lb(R@R!p>jCwK{!WDcd>G#W(VM(g%fnevnJ8Pn1XqjYxc?C~@CR?f0Z} zQ=`6@l`Xp1bkN+RcaJJ-AhIAy-KUNnilW&mbKdCs*mcVvVv1|`U=%ePY==ZOhzpVL zJ+nFVD4n=a5)}9;c_TfizfSM0XbRTKHu>=jHI2cxT1!P7k$s3`{F-#xA35#ZZDeIw z*$h46DY@D{w}aAxG3$tfT>vOi6!81=WD=C<>n$3c&=s~z2aqCvjS_?^mzCAkBA`Np zZs^C9M=o=1-p6JX+r?qf- za_crLXTu}?MJXt89{7~he>91a>#x6C--tST3B=}1#i18ONCPq7gv9owA2ft$!G}U} zD5uFqLzS0{3=K}U^$f}`YJ6H7mRC-=E&?W9T78{&mrT=w$|4cBZl_l$g-R5iskDa7 z{bz#h+vDWEH6cGo4?o1t0~=&-B!WPSfpH`m*FN*ZC9*f>W;y99&@-ukhc5!EAU6{` zl1Wf^LsX^7Eih)hPdLi5vu}l^!oS_lM@(YqZy&|| z13cO~$_|zY_M@pc6y>g9`RpgUNBiLB?+nmf$0c$P@DHc}lN#=!St5fdxB`G8SxQ1+) z@IV<)U|zI@Ati;0!a?x=qjo zYkouvH>~8{K3m47+|{n#HyW~})E)a33{wyRMN;H>O*J$ZkDDjy5B$|OUKq-)5O&)D zN5VwpblRO}rX!SIjWkea%FKfG92uC!Y~?P2Lr@+TV=H8bhI{$Nj8d2R7?hWP_I@!Y zv@Ln28!G*Tj;kb7wY|pqre(*JC}B3sQh9=M*Yw4h`S;7j4@*I!lHTuN<1Y<*#wEkY zCaV2pgaFnA+7iX8sLeS|WDr(1IZUSA)%Kvo?j`;;k^nuuUFF5sV3cNt2VICJ9<}m0 z(yxc>TCS%=BS*nqt4)m%LYkUBd|B?jLPj3!bB~M4mWos|1{kcN<1OBw+}OtXQz=w* zF<{!c!Ulir;0p^-qoM05d%$B1XZe#!-o(g_)X218Fq9~_}5CRfm-sP6!^NPg!@?cF?>^#d%aWL5X`06F!C zV;4rotJyT!fEsP6`#2OlM`?N0(bPl+7-iMBA%7=9Q^jixEL8+Nb#gIdQJQM){Ze<9 zNwJhlm!|}ouJW@lTBIw7rG+YeQb%%UH@1FlBa%z?ZQU<~?bz#?T_NpgcwOE>(2(tF zC5;9owp!%=+c^pwHhiL9ya!;az__Mmi`&q&f^v74Hs*od zTZ(39=m}QBs5Xhtiy$^bj+fSQ=dIp-QK$Zo)*ElA(zbuy7qcVr4alo5 zG2?08o%-`U%Qlya!I(HD5?JNDbrT2Vctw^u8qOj0S50GQuds~i|--J-YX8gEHEYx&r$qnTT8oM z4Bd@R9Qt!EL8-G!x!hXMIYp7g!I(A8w^}p3VX-f1&#_^8ryL+Vw+^ul;o8j8<&2#? z2H0q{5kt+da(5>f&?Ei-Ivnf%^v<_v)dN#Y_5QIwxP)A>(u4#|{}q}rw;J9{UWJ+E zl)HoZj#av4_@$xaeJ6hp3lhr*cJYCYJS#O?X&R-&hKFH#DejqOrJiX(6jj1F!$zaB z!$tv~n1Roi31i`f)yW^btZP(AePeOXe3uz+FxUM95$k2(Qaq;%(orWO(MXJjzs9Hoz^+wAbl;!K**56v?)9sl3Wqz7 zqj71^Uz<3>E~wbUKR}Z~8Z$OZ>v*xMBJ0TTSYE&nwAd=Jd4r9fkuoh$;lBNo=fE-tgvajhdh$O#Wkc3gSvQlaPUkU&RZ&uBJ1cc|rgjP| zEEdeS!ubz-LJn6*Bf-FkTAU`oUuIX?A~*BDPO9VB@ECMrTPiu}5t9dzl|inwuD%lr zRb!GP*FW?|LdZ9@4{ka;dkf@<^hl;RL0jcNu62%M4~qtO$SGY$3^~+xq2tDGM5fnW zShKt17*1^GvVOVc509{)f|j4!{mBM~R=HOMFH3h5s@%+qRwZ@f&~%~59U}q%*l2J zX^0E8LeWWH@8Nu^Z!@ODt^2L9Te@Q@yLqgk7t1!P$vtvQvq&}w;+z+z5rO6!G9`74 z)>PoIbp6utahUS`UL8Dv`)r6}ST;VWkT+f!rf%b+M)cPsYdPFsmXr}b82-d)>a{62 zH4RL~qb-8!q}vt95p?*|ohFUy8KU{uKM(WsBM=KwH9H@1xnIYk$|BJ~yA%ZGe5}%?g`fB(xOBXs7#Ax4MvY)^jq&O zjCw`n^DaFpin7C;hJz#23a$Rx1)b3}1?uMl6~A^4yxV^t6~30mn5L8K1M7`;Mee2_ zZ4;K1Qm!nuN{Q9QL+JcUo`UuZI_GGV>yGR-dq~S*qC2IBQ*)A4-UQm}ZJ!2rc1Dz! zkuoZno?>K}GEDAj{%&0}g{VZwY%Is;TBO+>b75=sSuj89yH6HmXSQl!<7sfYu?-eR zA-WIAdv#mEyPm1b<1qlD1#&Emc_zOOrM!;xpq3mqNj%1~CA(c5+2#^uMJ+bC`imBR z`UaX7IH#;rx6fxiBBEY$7R+5_emslQ@DT;0SoyrK!UtHTRaadU89I^t17Y4e?ERJ~ z!OcHmK&>wWq&CLJrTo(Z#cG0LAqs@XfRyt5#Vn39h7ZPH5HNsHz+&R=bsGNa;c!}^ zg~&2jH-tLXtqb3I4>c%CS~EzShhk(q2yyc*?$~t8L{F_vZ0T;b;!>TyH|hc2+CN^* zQ34(AOrv;X;_X%JA78%|3%Gjd&_66?S_9hpS zT%x$wPKglLWawzUKx?^3@F2#oNc?ACD?#&TxK7dOqkfeI^Uipx@w?x>Uyp>Xpy^bX z3bC4Eq@RG$YiGu2puYODW+xUQ8V>B2sZ75t+fX>Rq2JMU-anO_=ZdEaxIph6woJ`* z;OW!O01N*Bs>-D;%czxqxV~b&qy{$8CU7`9x<{AK9NiWpFBPGEhinD$$bCRxw^!D) zfBGRwGccc(<)IowqSpL*s~YOg3X#YFU|;Sp(>Z6>i$>uc_fx8>b1{>ZQWjIp=g;p( zGcfSN_{j*^K6Z&6_dog7YEPGS=kPnISetC$K&t897Qk69TU_})Mn}t@@&>d~@sjq> z*=I(Wr)ahic?T>ojxp*^0Ii^Vnl-_#p|ZvG_oBiNrK7VdZ7X#% zZvfoXMn^J+W!kbV*cc-3n1n4Wdw`i)YtCNCQU|izY-twLEhBhQ3Oe^up|4*)20!C3 z23V00&R}zqtIElPGN5Erg_;(7w-K${1clzpbYE@D&E^*b0dk9n9_=5;wGe$F zQ4a(2vXZ3dBO!OLm(EJ%knUBdK8fx#B9g=_KQ;bnoIh%uY!Q++Lvvm01~PHXUhdIg zX0vkM6v3m~jRd>i!}-gVL^*^Gpji+#F}^{p#!xxUn?MlZFX8_U0WQ7%VhaP6_jBZ7P=-C5TnfszX_Iv35#!s}$5k21XZV8jR|Nh`mXLB4A!Z zx1k|cp&TT=ctu$F6~1zMd~?RftidC{6?g&6M=pESym#$ka(4WUh{(d>K`RO9@U(Bn zcA6it04PfJI0%cCoc@vU*%0$UpZg-fHY9Ao)aFOq7X=xMrqE74Y0F7QI8kQ`Ag%`ZuY?t{#pn)p~&;5wYx)EQ$Pc5HPLjKWkV{jLKVr@);A|T`XHD zw3M;0x>Fy9VgE7uH;(2`u(*Jm18uPqs%44A zZ_*0eVNa?CPAop`=n|F50S{!`u5!c@Fi-5qC3V?o#PXTDe-Rx05INd6>p!O(be(8S zQTc8}$V-{z?+}&cq1R`6Rv>&f3B~8$)Ax65RjkK09cHCL=+|?)kDz3Q0l1MK(~FIk zxj8|RdB#Q~$&MaFvWvv^?>GtI9*$3?5ZHt{hp@3oT;rLwt+LIUSLn7X2YjE}JeO2a zPj9m`(uA_^6!{cl#$K{c=y-9pJ>`(q(>JDMLi#3+aciixh_d} zQmBR|b@1S+G{TbMyIFJ?4Cog`{F?%$C0(%V6pJ)(b|FC^yc9en;ArPh3|FR<@!%iE z#G#TVi_(Yp( zZ%c|rqU=p9qRIF##D@qko&$@Q0Vo87V*g&Xl4gzkU6gOB@G%CoXK!UeU@U$;@7yb; z{}Y>n$C`=r)bRCdCeOPok;)W)^q|f1j1y9nW?CqJ*1G5Vfk<4UhFQ;372s(xGm7E{ zB$ssr4SCOcv!a=TGtSg7kDhS0N6uRBjP3waC!E5;1+54Vgc z@<3VZHDWi_!gt+J_c7Buq8YgbY?l-HN8bA^x8Zxl-WYafHvGLg&gfMKunpVmyKa0H z29e}|X2M-HbIXl#&?q-3NVb^NKLk-{xI9 z^r1;rzh`lzBRn@_y7u!*{TK?g8Nam`i`CcFGv}kx&pue(R zW-!uHWSHbhCONVQWr>0Cj{!_eK$+o2_8`kdLn9O(p;ghPlJ!??P|DKF;9Nd4-h&kUKcTUZ89>d4iI>&q!ZJg`QL)Xn8_Qf}{r?I&% zN_V3ggcMVw1vH4SJ0`}(it)1E6DOA3h(t$%vp?K_I8HlQ;CRWJK2-*>KMwOJj-TKw z;K)&6?7Wjq+;xI_+WMN|mr>zbo`3VkGN*vy5_~$bYvq@0-@e_r||lHz!$B;HI2a_Xii%?h@H*H}m03xjrjX-?W%SGz1A0+iypCNEFa1+ z&swoHrI|;4L+Ol6&!h0ZKV{h3-1y??Q7AelQjRlsjZct{#Zv1l!Kf#U1Rcyh%Zu7x z2A>LVmjEdD9}CBc$x-@nM(->HmdT3Yo`LIGvVgXHMfS&}0P3cmsk?==?eC8dRE<3* z(NZ?~A=%fF@bNP|*|!^S0t_srqksw;YLy!7KUQ}#slfQ0@G&m^Lcw+xdzOb}vnS`s z3yZ=#h)S3I#Lre&yn*++ukO?#s}HQu$x%e~Ndo25f_tTpRmxq2C=gg=R?Ohfa&VsS zST8(;0~Kcj+|oVwn`#_m9C$@zvNzsfsBM(-rMiyIej(f8gfrF4PicReD#R7+5OURZ zR9_fV{-QKkc)og3zvejUr+AjgY-J@OXA!J9E>H=Q;*8qN&2;dy?;v}zJK?L zPx&y-^7)X=*FuA0hha2C+C69a@!PEgI$A@*^e$p(y)LL_O+;WXG8B`hMbEsyo3Zlu zYH7QNv$n=FdZJ@xC|$>%3Ws;~dPIktE!LCz?RJ7*6Z6RhO|-@pSG)1^UOt?GzdGgf z4OCj~rHH;C)OI>DumMPJBME>`V2g50hJf~gWocSjfRgVQHmKvAt7V%SWR?8I@Ik$} zR3wZM#gDTOl1`k%i`MOEaGN0iMT);(16Ya>P(@eF692b^-MMb2Hn_81ATh6*4>v^p zKJ48NzF1{P>oK2uUL6YZ5n%;;TR`{}RQ9t?#%d*Y+|U1E0Y2+MXMXPY$xuTN7~q;4 z9P33iv)L5q0iO5i+fCn&yS6)?I2ed+Yv(2#(>PK-{{1_^?^s9nt3$VIJYW z0=xMMx5C@T6Ot9>XS)!JW?~3c2;VQxm#w#{EkEkuuP2N zUF*Q{10B(D_>1tK8Qq4{%31HbLixEik*8!RS>!Fg?3_mF!;1g|jm9{g(k#2VxOMI?+0v4jZ-L9W9a@KK8&Z=*a!=$6`ofo*C zz}YFRo2?ZH*0XXPufvqB{fzyWH!11ES;WXuLw0|^+c`2}$6Cu~J43XXfjlM*@OaP* zCmi&>Y=s2l_y3hPC}3Qowu5{zh%E3+pvpm;BD|g@z?yO}%~+usy)1=R8f>p*^>&F! zomY;V=0u;+Rm6T%r@f|tWzE>VnS7WYpEp7Ks#@5$l%)eOcwB;|Ls&Q%k%&Z!8nr2XMvA!sK17#7|fT1_Dm8lz=VR2 zr)?NGBjhXN=p+X#T6?&miN(6RX5hhr9RE7*8|UG_H4`91;Vwo&8QgBndOHlyS&Mmc z+v2(8WP9L_2KxFhhR*3a7m!QFlDSndnT5_r_6)g|B)=ymcx3w~zr*r6ikhY(?7a)G zHxjMUW>)H=+gYHIOSbVPXBDx1S>chHXu>&YFq(&xicj>&BLc>h$%KXdWy|?wTPe;% z;(v!N{yW34n}6UBP3UI^|6g{ zV+xtSt>gbcUa=YMB>kT}L~9q8UeRF~wIq@VN8VL(|0AjqyUwl4&%u7B(yUVa z*ap$+3}Ud~ApGw*2#c&1rJ&m4*_k?#`-1yxK7z7X$*XDLX7wE(|Bvbzmsn zCziN?LpIcfS7&HGPV(u!5YxfmqBvY>yPg6%aw2h>HVF@acQ2IFkl!DHsto zcx1khx1zL0jOi3VL2^2wBF_zp@xX<4Q8g8 zb;8r&6|9}PTgd;5`@=Ht6*ep6AKIejT+?az=Ow@Wd6>N+iKQ}tQwKj+`|D{!4V%s)VZfv-<_$ zyKSiiSfrOuTsr>IjV3F{!146@UKY|l6^qM8qcHqAx$*Ab>a-b=8!(9ogKY?bO8zDJ zKN|r2`p*Ucve7McIxV(pYI=)qXGNTLEv>7`6+OMi<+T{C0)4!mAPIqkZp;h)VkQ;Q$*gG-W^=XsP-4rk7~pyDb@+C`ve>#p`z7_a2MhqZ6q$ z*$(f!gnr~y6-N3?1XyED6&yD)Pv{r4%)8W#$=8w1?$zCoxLmkxoOSKh;VoNq(1BU2 zyuaHNLZ#k>Ug@V+;Kw7sKVV^{rxYal)xAoHu|!wRSifA-A|BK-dA{^&EF{;o<>H)ERnjXnMO&}>+cwpQr$ z3$uLAjC3zx=dGB>nL=$SlE#p?2(vEQ3(l}#n7-blmn;M8<=$2?9C9v4`lnAc$lZV$ z$`!zN{F>Qw-L(N?e!iel`FKS|oO=J`z{__Q2E}@)fz#PEUCjL9nJr1=+a=g>8Nn#bozrb<}VC*l;Ho2lQY8kz;fitiAE+JvrcDwpQeg*D|-@ zo0(D%_c5kozuLa$Rp)l)dG;{f7osNvcPzXm)B7#{x;9xI)yR6U?98^>UidJ}oK_oOsU<8YfLSSn4y6CR&x4eO6HX?WeX$fhY8e1u3$a^f8$n6`T=nOu1c4C+O@ z1SFax#ZHtK4Edm|Oz9%~wD+x9hS%Rd5I9s@`m;>@LrDV$)G^7cXP>TC#yroAElfs9 zdI_lRLgVB$<01-R+zL2 z>4pP35(8|t+~+%M`xYi6*D?0@Twr2j^X@qvAM(a_I>95TNj#HwFN5pw`sx@?bi}!M zcUfES*{r!?bnf``oSL5>GA1yB9bf_G0y4;A5t~%K)S)1~bK_?M%9;R8pp7kUL}`o%}p?*NlL0RFdXXuSmTw@%KpJW3|5gh7y z>fJp;izO_1J#3m9d?doRJxM~(HHuSs?Aj8>O!2 z$PjjcaCSB0u`b*TslPJ-8(8~MwGrPa=_&Q)lSj)33E^fxEx%A1({|n7cw>=37b=kn zm?lN?$OZhdFxvGlWtQK%Ine3BX!DBzR^2m7Ql65Zb zTWb*6Bs1zDnrXku=#7pO84-%q1j{$4ZU-VYW1~s1u)+A=r2lqapN)miOz8h$F}WFh z=1pvI;vO+0tUVErQ%NH?GfQ128^QFJ8-51Ru1PsKz0X5@VH`e+=UwCK9bF2BEs{ zM<2U|oSKpT2od&RfrmpFI26yC4WQXNfm|VEKK5#XWbYgt0^Vc6(3r~=-J3QL~kPU z{1ocJAd$ET4>OM}5=C0yx!b zk17i`j8A9~aG)D!x2_5)<0bev-m=cl+es@@_1rFHNu0d1EcURtYxGocJbA>fT%w~C z5USQ!F8)$$V$I6o+Z$iiWAi8fEWvD9>e+Z}XsA+V_s(@N-O3x;8L5ofaQwD`RdD$X zlN}shu8!2JXeoeG$Zf^^F^cO&CUl90(lIHf)tO;T+FoyVWEm4=n_IrvXsT$1$8qf6 zfWqGC1c(X3rUD@6uQ#MWNoT66_n&=Zy^@Ma{sr&wS?3C{-Ya|&irKXY;Y!vTp1`>6 zmWmMGtfw9!V{rIrli@tbfjhbDofF#BfR}r-bbBSnareJm=xr%%QWPf^8WH|ph0mJS zBIOkZ)rdvQ`nqzQx6x2@!6PPJ#yFZ_;<7ug+(GG~BUM)LiZmLyI9c7buR?TR(tkaTbxjRYoO(QLa`U5oCFVtuhD>txz^H$;6} zxl&velrHJmy{MVoU%2@@k$Nw~n|9!DrpTKO%8NWZ>TH_V;XCfVx0?%&q6cmjRc@Kv z=3tFfp05Tvnm?WHR`;cysWKQOi4u&Rnkg=fJ&@8LoqdoB>WLo^z=6;&J8l$LyPK!qm=gWwQ+ezWA^!NI2~5F^6oxvXFy?yhv#u(DpELnuCR{ z+$xcAx|>zMaxNIZqrkjpol`J>69@pFl@VF=zj=QZ6gAZ0m@`aGW9+5Ml96VK$|Us-!i<2zkC@9C}GAB1c$ ziW;`&BP_aamfhHsjo5IG4{`|)`eAs5`sd)8(?O4V80d%wkiFUF-k1!%ApaF;GxS?) z{m}~#moS>BC_@0h^icKB!zpbmU{dCVB)Ny-_vGm+-sI^TJ@PHD(u@4p_J>`3cjCZu z9yqU* zmn#9K3uwa``v_8mJ-76{44zKM_ok!HykQk0Ul%=bqH+^V#AR1BZ{@Z7v}bi1Txb zjSt7}QrW-@`R_?KG2QjdBx`44xpyMzOWO4f^;wF`d_M|U6${0gRSX|G0!VQ&Ov+!{ z-I0!Uy~e}s0BB48rm-TcWGJZH#vORK#hpdPH@uS(N3Q@VP?rw1`i}0DHoAH5Awoh4 zpD`?6AJYyJS0sO#YwTbSo#EYawf+oS(Sr0zB9p(Cz zK})52fz}M(H1h&iLg1-ab}FU>QrM9th^1a^ydhe%t;eq&*KU=c%qlB`mZw`GYzp&M z&Y;Srs^FCAewHpL0Drr`wDkm~@JyiBmi_2l8T{}6ymdb_Xqe2M)Xp`T4au=jK{G6J ztwne6!HpThW1#>mr2TzdDEIMWLgO+WW_PlniGV#a)?5lL?@|noEN%C%;L}Zt z72cTT&Y)uY21E4pseY`lYX12*f&qG_7whoy`)C*T$(U&4u)F- z5x{_zwOJckx6Mv)L@Eq-1TP9wIwCAb9$$@?;OpJX7*AO(l9DmZ`LFvKbA#?CYg}+u z!2Rj&FgYLK?Y(Z+|I%-@w`s8RHWYj7xDp)0m*-+YM#peYvJPuK7dv;p;G@~PQE|ca z9D*?{v%6(ci+Nj;OGbivsGd=&RLfX+LHvCIN8um1TZJSgWp6agq6^&%>jXta41%>~ zG|(GFY*%eKxtC5qw#}cE%#n(_)l+k`YWeFhy_X=Ay2rT?HBhAq%EE$d$vjSie4sql*$19m%b{YX%S z)7tBCO`@!|>bO2NelqwP*0&W*`BKY$9>d7{mxgV=-g1$}M?|Xi)T~^*d?9bteOpC8 z6r>Zx^%)wHAi;j%N1Lw+SQr|`=)8WwpX0btqnsQ(EoKqQl|8m@&cfpzc1&iebS}lJSI?883NkQ(iyKa1(rgYs>dT$b~?l_d-xM zZF!8HQcL=v{n)|JNpC?tC{NpI6TFb=1O*KH>2xq;s3B{M+GpC)&c^~6LfR0?RO*`}g z>JGg-YwF~WzOVcq17+cu+r{S2qGZ~1Ni24_W`+<(YtVK)Vc6R1dk|xiCsKn4bFK45 ztxAjr2tQZLNmy-q{Dw_i-~HUsz~c<(r?Y)E^GkimiUo^jkwmi<@^i7j5LJP9QB4|n zuavQ6Tr_?RfcR&N`{EIK5V$ z;-{Y*@2zRo`Xq7+4s3bu-p(D-m=K*AsT(wNrFvZDjl`yrl3J za@9Q>y|w>7{izxHG2-`0$SM^&##t!`%gba93(5qrc zMk>QLGVDf8a{;P0J#ZfzhnGpBv#AZmKy~#013=SGNcY19V$MdE;RymO2DHt>t-l`m z(kbIt&Xg6v{)yG#%+khNA0IKGhVcoFUM$Py9J>GKn`!nD1)~isl3qA;VSWD2v(hjW zTdrqXrXG~xF$?NKz{VY47rr>1vl7HFx&^|l@MuJ@TQ9V3Uc9K-^0Oj^<6tvOHCbVk zK>siH-ZCn#pxYKr2oAwrgNESl?hYZiySux)ySr;}hsNFA=>%vTI=I8@@0@$?c)#Df zKi-d5V~^h5V|4GXy=qnMs#SB%Ig^pSMw?J{{e0vS`?gH(zH0anH3xs3mws;D*MG+N zxk-l&WCkqT|9ALs)U~#Fl?DN!4aFOVk___$(Dy5fE4~i@ZoczG7;*! z2f4hDg;Q=T$#!+3qgD;P>gi5~i(j0QAR@Cf{os{Qj3E=YW@ckncea_gGiiFCBXRk= zUEHh?=ZO%p`8~KfC;#aACU~Z-8C^Ykz+Pf1^~NiR1DAlV&(I6RAZ*{rVs%rqa;-* zi#)lxi>$Jh*^=?sx>74yhFQ|CdlcIxOM&H_X*xI7xCGV+^2}>jH|_qE^$0R z(6d?DVJA&Yacb0MHm(Ni+s#V-BCTWH-RS;@PdJwV=2u!m4O7X1 z+QdV(FOQDlVG035Eq-^VSFJ|$cC=p=fq+2*+cK!p(<*Mk1JQJ~-wm60j&l*Wo}aqE z#u?eem`0LwE>0%nJ_s$3L5upDjF%T{5%=tDu!CyK$ccPvPdq)!1M2p}C^smi%%0;6 z`=0lA-!37jDq|4ugeV1E-`MdddCtf2+u**1Sk8lv*IUpuh`rumYVp1m?E=nCn^)|X zl=avLf(-`Rxqeig39ikU-Os3*cM}}*nq1d_S7;`5GY-sRR9oSe-J;XT*oOa+a?DrZ zWV}_Z}5pA!d>qS;r2> z|2$IxH9zcC?F(_fW5^n`XX@3u`;yQhm3@1f;QmgEYN$8Idgb7?NH=nMcjy?KRH6pZ zfrOcQ>-ipHx7AP4m!1_u27ZYo*RMSG>K$Z^G<83NFP^A$TU|BmG}LZJ3Hl%n3=sx^ zP_ethSV3x9%qINONRu~4P6^ceyB*P;wO~N8u%9gJJ^Z2o8(`!D*)FN+3cfVpol{e` zfw8;W?MB=x53@#8ypj-=3rzo3>gTYPJ4N?8H9Y!}qr~ZSt(gX_>=EH(I6JKyx=24u z)DLeP{d9#^>dyOj17$M!;+$tkql)lt^PKW)bhjiF$R{_&eY_J*{wV9!&|@E^*0yoc zm|fHhGb$SWXLKFbdn{wif^oZI3>J`R(}pDe%&6{j5R{9M&X(T4b>sP+GQ$tn8MyjG zPFig1z*%dhvTCe^l>(P)!~wJhl^)5LF^VUd_~|6(i#GzcPP7^8Ai4~X<&&;;@7I-2 z1EA>NJZIN1_KG-o$M9HV>~&k0&n5HT%l)dv)c#E>xvn+h4jV==ygG)L#zCDifz|V` z(qG03IPpA(jqksDfFC-uUE>=cZYXC7kn_e_e)0@xm2;z5fN*WhQWjNN<3p_yu*Y`v z|0x|}E&0`{>(qD{oSnnu5R$HMKL+ku-nG2_4;H|7V~&$~+hH+ad~a~L%>Woawg*Xs zLLE8rVwHVKKzamntQ6qd+FLca=(BvyvhAsZYcaQTm?~LGV)V%USXObAQ80xZ_)-JM zagMJK6t{V?j{Dx`b4;{tts(rDQ%47!i;PR&y zM~7hM_%K2yhIYjH`>%Z%UHk+HO#oz+Pr=d=CiP~Kd_YL~J@up9-S-LtJk#5Ib<0co zC=0p-Mm1*Y9*(Lw88p{dWbO>q+j)VlNd{M~imF9}a@L5w!`*ljthfJVDEyAt@6Jeb z5nDDU+BMQ@w<9)AU`Pmqa#=$08{*v=BH$9NbW)o}6Jx^TE~$x4=zh__vE|L@%eh=j zYu&4_cu)c7H~D%;OmSRLewB^6+WBsh3YMq_kCr0Q(+BP17q=P{#kltueq;v7D|3*< zuyMv$I{^oxDb)#pAYc281UGnSY1a&mHWT8{$oz{pFpfx+4qfwjr6ncBD)z^>{5a)x zW@klu7oG*@)HMGvs?;$de7{0W z7BAEfp$p;6BnNh8=goC~6#P$V>Gz+M7pUC3&rnAuo6d}%Ea=oZx)N}@-Kw@qZQq0= z-z$b%o(hQ9OsAgp)9`WQBXQ)LCX%Bb1MM5>dte)sbEpkUFS z?=4pLk(vRS0E)By3bosB4Tq^rcWZdX-R-VHZB=)WL9qKi)^i%rJ87$4a^{Nn@iNnU zkm24W3>)Sdq3yOo(u>rnn3wsn9k=Hxje}Tv-QP5`=jgkdC8z{sHKaApawmriCF?{ppx6l+i5X3uV*%Mz5C-8#AEUi)``wMbT|2Xw z7G)cuiG*fKhOpuN#yyW}7S~HYWuRE+e$(%G-r?QUS(f^F-=3g>UD_k`Wr#$StWt*J z^d}3U$zWLz@||vywy&Xd0K#P%qJMCcFgW&|XiHW$cVL4OaKJkv(;N7!>oXgJTTGx$ zy_NFzPsldgFWqx~G^F{fKt-2`@WV}cztk~KG2~vuMStNT2*E}KIsdb9YS2De8(m24 z6-RkSH0ByI3N-T&;$aZlmpLg*B<|_gT4U<*9?`uCJilH7NlhyEaZ^j~PY_~ZJiUa@ zgl76fj;{Q;g*DuMX0_=TF=W7U|9pzkCPAAZ_0bKK71A_-S&x`N(j>6Wghf%XuE}>Q zBvF$8FR8~_(Vc{srREqi9Uh!0vY?$|7fN%6yU@=VJ8X+9d1PoSfKcE`4zjWL_NW!d z@d<=Z%HbM=VtV2u_fMHnRH{ZK-JBEeR2l+qFWqF7kiDTHx42G*;>{cHh13x8TLd>M zF=VB0*2%e#gmc#MUgg!kZhWJes3}k79uOG>*#ey2Fx6$gL^8Bz37=ST< zVng@7)Nl-Jor@We*;39FPl-;NKM~v+AIej7vQ^<1j;XTU=+XrO_$?6@Z%kv8mSCgQ zzYeKJxrK7STB4OG8yb~s230-rUkEpXhDP|Y$S;NN1rA_Nlo)Vs5bcyWnVAchw$V#p zTq#hA|J`>S?%kg_k3A_-Nl`hmPyYdr9D~=QMpR?VKiLia?Aq$@(#nNMnwEShVfNe> z5T5L^mp6B58g%#?=S2tjy@68jU{D?0e%}dw-_l!pN}yNyEjicB(gj|dha|@V;3?)p znVY<*xftl0fziE$<;jS4Z`gI}ayS}e#JBn>`xeOAIjXUmfo)O*-0!-n??J*=T4|zA zZ;zoOK;cgm?zPn%2Vv(mInxjnB|yS74y{f1s#3NL0c%E@mb?H*IRTr&Pw(IO&My^tk%1(lxwqK-F${c9KUjjwuQdkuI+mi8aL0l?C-Kk0T{AB_^#g$p;ju>irm$Ip3}ODxY|jd9%>77LLTZ$A*@ zLpJOPF_m6DnCkb?jGUMEEcD5XWG+C*rswGmxoQDh5Ples7yab!TA&T5Fj#3$9v8c=db!w{4K_x7A~>9*p%+P!}A59cu=`z{2Kate4dP0eezd#DDoMWz9Yl)=tC=D1!jY`!~L5?Y`8U?u_KkvFduN868hBQvmP7G}4E* zBj|ja07OIj;2ZVBX5B8dy@P#hwt+*tQUBENW@>?oeb<62?zY8inb`vZlakNj@!7er zO4NUh6m)c7E0AX?K4+7%a9{PoWIpbyTrhgOUDCZZv*VJpz(yFGnM|(M52bmnUwTy+ zXb_G0H3Z#9AMQ@u-vs~1jTZ3yM+-l~xZi$DJ!_{oX6SkMVeas2C=<7MFM$+q3S`ye z7Xq}yfSf$9#*smHb9xEOD{7J-3>tzYu5ir_qY0U+iV=T~blozuX*irZ7`b*^nU$6W z*Shjkcxg9h>yVG$5yF)s@ZS?)hx#vGUQ=BC}fgH6+4(+Mmn@%wIw5&&||% zN?h~t|Ftu{qUK4f$lsKs<@K2#XSTj+ofvdbz=d_Qao04s7RL2<#d`FGB|Kh==gDGW zF9Kqq4E))*Gwn2lrdM72o-WaY4VvBx!V}#&v8x7q?N~s5)#g{fiYcCQL3y^-h2eP$ z7Lu+kTEhAsOb+mlx^;QjW^EL^0~6>(2*2n?IdkB%!P=)OGL96p+anu!10X=hXUA;k zK_hy@K)|iTFR`;{Fh>p&eb)DiCvjgiKTSg}(5JOMnDb#kqP?Kn>HB#m6~iP+jmkdn zQ5$3t|M~aSo>Wb&Kgr(%lgW#52)_C6w$RXc7Bi%i*qy&z)8ejWc-5%ae+(SvG_#e5KjSbP@F%Y1CIZqYiZNAZ| z$RcQjg}FvK35y15a6Er>WCg*-xO^yO^I#cAebOEv%d5@HI*ZeGvJF#Pf6duKs2;{3 z$Oa|q|GS{5#IkT}rJZYZ{TfVSBLin6M4%o_TaNGqiY`MT7|MOMPaOx%r2Vif3~H9L z)&DSRy95#&Q8IIa;b z%BlSpt2?qXxDaHTM~1mBvncs7SS7?3rK^9~s&T5T;a;=9L9J(>^|9=7UglGWqq~|; zR6FTo6VCJ0cdGjmgAGv+T~I4}a>cBO9~geX#qU!a>s-CbUG8+~l^Kc+Y@Xu(h)1j( z)@FBYbrq_{23H4X!K10@Ey@^lsbI%4uV?YrD+s(4$6JIN>*Gv!n>Kk|dSCMPA=4gv zj`+&0;J@1E3UW>51hjTlOg63J6|x|9N>to?)eE(MPS{S~r~7p(aDV4>u(F;Yx@U`t z`kV2_z0Rr5KoaoUukWTM*X@>|bidVBOLzVorJV`t^unSdpVg|~Ok@SPb>v#l1rvQ& zGcs7NH9Ym>n1O($q>1s650HKLjOe2f()W2gRq-;VPTwixjORJ$-o1c#@+g8{4C?&;Q{6wVGJW{8xGP z;=5^Ps5P5^W7CL8Q7cypn0g zR@ta~&^qhl-N;izKHb2zcRER7U~*VFvg*5FCT<}>7BWM7U7^LDbwE~&Bt}`R^rqk4 z+~RG%Bbkf8F31Mf`;|%>B(5KMrGqW!4tQr<^N&hr{Wj!aRNPAE3}S-iy!~?s`p%NF z+PMsn)@n3iZAtGX{GMui&(CSVyK%&zjL&(1Bl%5499)BqWB+Pm_=Q#I`_SG8t-A~) zdiym@qxw0!T`E7DGlbpEu5WkG4w^UXf*`o9bhos+9kcRbSuWU>32?gh7M!*W%O_6(-6f^!QXw#xK*n$@w+0dT)1{ z*&C#ofQe;XK+m(j@M-5eA?~bnse}Ep_L*~W{m@3uh}7G zyLtAl50n(bFNGr0Zkc|3Gn%b;qnS9E^fEj9WugSed-+o|(Q`ih68cayQAl;r_VV4^$7rUmmH+G0e_j%xC;$KV{|D`$ z^vC{kIzL`Hlk4)3G3>vsF6ukrwYp=s-&+&ZWMSo*?#|`$<9{`X=y{}TFV6q8fG?%; z|MT~kr`O6szmc`K0Xf}RkSkvDbSmxfZ3=pF#~E{b99u&EG18~N!s?)Y_;U08(z>vg zfU)|&T7Njc(n$x(4B{Vs3){;(I|cuzs`UTncK*NoTd2OvFKByHliX;OBw8N@5gZmN zldrr9(hxuDSi*{3{e-^r@UyLdFZjWRub2t7vrTkc0Wt&?RS%Cwm=RegqE8^8Vvw##NoDIty%)!}AgcFc1TmxTHa*_nsg>KS+fdy%9NOWu#wlIOx7(!shF}hOD+6uV?gJ_^ zQ!jTQzIR&M8j9x&31UbQFRem$sUim1A))xZ@qafO&GgzH?|5KZwGwbn+->WRKfLnq zdeIXH^)H1G|3iV8sZs9vb4ZcUa#UZ_^hng?!zoF#LuqF35*#|x!a~*dNiW3Aj~I(6 zp*#?x0E|P$zACh{1m~uoplzVJw0~AGC><>ijsqFnQSa8rYEx&;=(_KXZ|!yLzB=9aqG7YiW{x`ryn`XO)(+C3wVWP-=)vq2mtXamT=^;xeWgfm7IOGj6siU zqQl^_KOMm-8U31(-4fO7qN;z*^;o9>{T>r!cUt*`M{Mk*4u2kh4eV{12O0(fmW<5( z8zi6a9}U9}i{ObB&gyzePOgx-4!OW`Hp^C|@JW_ymdt9)pE}wi(r6R2XIa>cQYKyi zh1M-X1mz=vngxKLRzKA?6~$Q)12qf}^G^d6BWUKv!PXu?FhmT8{KP^^--C#>tz zIjvGzn3`2<=2zd;S@Bl;OiCX31A85w4I@3wgB+@?sOx!T*5nHs6Tw7qi9WnT7zI`-`ySe%NlHMP6D*d zY4tkGf;Vg&AOAAj=@Cepb)I^g9^OpHn`iZynmkRyy;**-$p3LEhWY~dPi*5}KMi-B%Pte;Y60mQ?>*Fe4;+rwsG;Lz`>FF8u4ao8`BRH}C>5S>@@7HZ;IhWP(BKSOMOM;JIa=BmFF z*7{oIDoD74MG3^2KNcjUa*D!UfK|MHdp$`{^c%H9sKF^J~7f+sjaO9cR>e#Y$e zib~G@LQA2zYGj9fJGw_`uNwB~82eo{fy-b9h{=|ML^j+z%x#-knYqF1;+00{hD_#c zZ}xP1Btr5UJv3N#PHWz=~B=)0o0e^1Nr=`42>*Z1AB_XMSd)^1J@sw$k4}L z(}vc(a^;3>PuHx?WRT?o1z{R-d;jtj{-N_H`zj7~GN)~Nq~tsut=TurG*ejS5M;la zp!`*pfmH~R``=H^;OF9B8BKo-Y+{(v)EfnxGoLBvWJStA(wX`CKtgK@=?BBWA0Oah z!x z>c9^2zA>@0Ix2t0nlAZClB;RlB3r}+d5(2rW%GRjFovAEV4k1=l5cQXtN$U`Q5?jsawuAdAL@K++C zzx`6eEzt~wH4r)8ZsCMTZU;~8jaiLmt9lda)yBrO9^e2wx=A2{LMMTu7Fu z4QW*k0Rw}fnE?CO+i&#xlUO09f%5rEQg&Ul_O5BsJM|{Y|I|omp%i{eyx68ayuV1= zfX~HAUJ`Qfj@La?h-{cX@*anbHs4_E7WW3HHnw1#t;vVN3P-b!R=d6E*ipl+-ZyAt z7>`jyPwqU33+9}i1f*fdH|2Y>$*y)RuRuZ73p_*zOJ_V*YB-{iZq*Pn$>KY9oo`xZ zC~-{CY9%f-$4JQnI6|6-As!b^J;37_=e;wdy|V231=6Ay-S{ZV^^A0{r|95Sd{7py z*tS3E8Dvgk7d;VB!QI`WM~gZ{2Alr_HMwp=ZTF?(ToU(8GxD!c^nxA%bt%w+)scOy z^QXaqCcZch_GVgmDUi8y5B}LF zGk*WWgMLtxhZqCEogPhs7r-8B%Pk!lDX3*lx%~vwJZF0F0GT#b9?vojgUOJ z*xi&sMx0>T3r2%9MC|>*J_7T?zW(9ReY$l4a|7T$#xMJzsE@YsVf7q(9?gF5?cWm7 zQ^uMcKq$VEsKHkzXx#|HeHauE+m>XTpKkjes&V(1*XPuCF}6=SHvQS-tP6jQaH6p> z_%!;}K5G<}X)tvIvQBy&u3M5p5QRyxz4yI_;4m~?7l>y1ng zIC|AM447ZX**aMe?+D^zvz;@)OtKy^fq__B*+u^e z+xaj#v=my*)+hel<9C@9f|f7o)C)U1dr`Ln*AtPe8(yc~eZ%jkVC$TGYG&yYdz+XxHXYSWsTCM7;cXP^w#tMUjQ^@o4xvyyH!I(`zE8eCetwm@YeOti^h%jhSw-p1u7_v@J^7{Pkh_!)@gNA1%t4?>R zV7+teHD^k_y3L1LRC_9E+A4wZpOg7=h8f4JuUwzV;yf{@{eSAnhN2k*t1Q!6`+6eY z!s0A@t}lBb@B~bw!*_-$<2K*!&i0I~t($i&j2&#fwG^*I4lB&}|G@&Jb2(T6#~Urf zEcCB2u#O#xtC0~1%$5YtbH5YixrOY?j{uE`ZbUMSdWz62t)I@Ir~y-Dj$ zI;|rQ+|-3@V6^CbGEKY`a3jRs+x;)Qm~yIyrld;&cB8{>*sc`Q+v+2to>Yn;!AjEQ zNK)j5H#|cg`uD}X)6VmIImVUZYD$u*>pzX(Ads5u-A0afZyOi;m{#JMXnbisR3AII zBTVH53J@v2o)NHikE4=SG6bqgHx*5E`Cj6ZMTaKwc{?P85R}7%23jUdkzX{wQV&|5 z;h#8zQQ^~qtx6XTeAJ+wTnv=la2hv+kZtrvux1m~B~T+*f;dv!Hl8m0~vGVI*)omp&n-TP&-{yNCXZa~XTtH#|y z&#~>a73U|{*B{LqHuX8SWqtx(kOj|wZezz1@oGge!01oo>&79=&#lJtX5B}99D$;x zRYLLw^ujU@;nJT!P?19*sbF!}Pt%Cad^Fpw`Y9p-iMo!NesnX(m2qmcM6JJR`tB~6 zJzCmT0mV99hy#U#u3QxH(N}XD<7( zX3SeBHF!&kif&NgzqEYvSUq^Y;*CTeb_nO{D~GZ#Uw?IzKnLah%d7^Q;$C8vDkbD_ z%i*sYxkA4yD4cfYjhEVRi&1fw;?3(b9k`)wFUa9gL7;f=;K7S115MQTq_Z@c(e;u1 z8-=i3#Grid)X9h|FCUqg+kV+uUNLNHmp7IfwT71iuUs*(uvw|1j7_+9baOnD_pJ{Y zc=v|&i_d=Gj&p|gVp>NRpJ};Y!P`AASJQ0XMQXKv7!Ovf0nhF z1D|#*2t6z7U$Jctox@M8_XFCDErQR@vv%lad&sH*EivDH!U5f#-?j`9eA=yFM9p<#t2bBk3orF zKb-yY*M}t_gXpJ^g5er$NF2{e3Bd6(Z;f&fe*t5 zh$wd_yA=74J)%TQ1vJ|Z>&+`O5%TWqVWyC@`?bceC%XB5SVFR+a=2M^Pcbfdw;M7# zxV5xIl$nb`JKc2*=9A(K3K&g2SA1=1g8Ji@D^ZZ01u7OSUsz)6nR%SlZ zAi0$&L;ksn)VA?_q5KPt;(G^rWsP`t&>St-vh;S}R@Nv#1k|+wITq+4(=$JT*+Pe= zA8hEtW)VTo14d>GA)|4?h3qfGP`29F|B#sW8?hdb!~je5Gq3+H`j!2OdR zh5I*Yse=08hEx;tO2;R zc&XGFx`$f|{P7{wbIkN!+tl208Q5ErGg3sUFNl+UadFB&nPPe9L|XVnY=DH`{gw_i zxztq;HiZPqQz1#7NeV|+z-0ew$%mV`f6s^oy44HDp5vwmkYwH|>jXx&qh`MB=|wFD zMk%xH*=uZVlrUySEwkR5TL8R3o=!UrAp~OJW<8t@m8n3xT81q-R>L5qX=puYUdW}n znx+%7$uLkYI$vpf;-1y_v!Ca<+g(8fuvyf=t#>sDA@TUrE{c~P(RyfTri=6ao;hiK zN4-HS_wM-eW+w!KZz!V2rG8!0By)QGumI~7Z1IA9fOFbNNSgiR@QL)4(a2)i?3CF! zX5lzg+HJmdr@X&&Iux_px-U$d`0@mNW~aRxKOz+kTzQX1PBxmAJlqv?T4Zud-O`rT zi1qB`nkpTail>#0hvkfZmZB$r3xc6H+?hoRB!>^C?2ob6FVMhvtUdC75tw`M%KsGC z?H@li0@~dpm}F$s8E}`5wMPEn7t^hIcRBkNXQ2D<-+7F)zr+q5I*;b1RTLC{NK%1y zO`PD?!97EarPx9u_+j34{7YLH3w#QN)hOzec+A}W<=tYN#|DI*<{{a)7~0DVd|k+- z9MBQ&%{KPucrPA*avjIydrGAN>LS24)rX2)&ur07dsvZywaI$%FPc#AurjmwT+^){ z7J|n!f=9adALanBq#&D1d~;k}+ze;RdTM;H9(%JZjO(!&{rZY0$bi?5oE*a$&hTwb zmc7MmV~f6yhpARiiACjV2Os7T8AD5GfE<)gR9Eyt6;U*$g%K|8rx7T|L|0iJ0@C}) zZCXA*sxl7g1B7?-$rvM1IbBP)xKGx+F6G+htGyckj_d?@sa!ThP~COHXAqNSTgok zJF&RzwKaFKC=6=L=(G>0bw;}9(~_rIZ}n2E4Bp-2HoMn*tM18sOd76_oTR`G|rOF2+vM%(z| zR>-q>bb_MY-yNWC{CcLsRMB3LQX!x(J>x|~BKbs)M^p-XnY=<}p|ADK$~Z!-Wd++G z{^|IIEo)XQ#dL9?N#s7JGL9<42ZUT6u0Hj<&MT$`!Fu7G+mn0R!6VbMJn*fC5?sL6 zkjy)jfkmL744xb-ixmB6@1eZRJm&daw$SVy+sr^cXN{W2ztvI2!U@vrEiCr5^eIbk z(}*TIlrJ}dQXu|4aDILK=RY2<9scmqM0?>1vmW~)mV5PehuhM`3U+ky4>p1%rK;Mr z(1_vsF+$8prvjtY1U`h`F8?Yq;u(mR|bP9L<&p0i^8gSCvUZiu2Z6Ok(xDr9>iF}g-(S<9q>lig5onv zdYl#qo_k?zQTtQLyvky9&Wd&Di>43EZd$TY%RE3a{m0QK=be!QghZ~>zU{yXnBm$^ z8S_n}Qa_Ds&`&Vn`=hXtmPWR&E@Ef{6`W~C=D(A~6EnQ)3Df8Ip7IC^RuuutFth~a z&d?D!eGr;Zcw#0zQesEt{FM$P6?nvXWpPkW`9nX~9hZe4fT&U(e?Mpo$eaOXgFqScs&?|1pU9l3r@<*#6MJbcf0b@A2sH{9Mydo4Z$e>5 z1cL76SLboD27S@1Bc|@ACe9PD6!DWzAh9T-ccnwBvMDorO6QjP`{R5%TH+byVA$gq zO&&tO0qwfdP3TUM@*)xO;R(V}CmXSml!FX-4)4putVYx1N85b|5(a4PUE9L?X3M0# z_)(t#jS;(JM#db&@1{+p5p?cAf2jN?2OZS8ouBSb|EqMyG+MIud4iXD(`fQ($2QPg zlUOX6e}(N$OlnRm@ejsm6$h$(N3RUK4eh+w?V>@q*X&JkfQZ-XQEY(7MmbXTbpLl* zEhMT1Jx=LEonFuE{_8#WyDw7z-4M|4Kf0xR8}y!su#nnAqxf0Yc5V-IBH z4GmX)o#Kl-W?gwU9y8-e^1qxy3;v|mR}nELR~6$fXq$9bmzC1qfC zo$9^X1pOWRzZnB^UATy+8oAs~M)VDEN#lqZGi5OiM0CE*zGNnAc&vFFV3x*z1lqg) z_g~M$|88H%D`APwaD7>nK{pWH$*Gg8W6|b+?(e_O8RWVUrb6Si&JVS)=f%I2_xfl4Z-hXdK z87~h1ec_CV94TPa)#LHgue9FXWS||hYtF$1eToJ|9&N>kVdd$V`<$XP)7;m(f{6j` zUn@<^gXpDqb{5011D&*iGS54opzny#rlBc)G8-&g-*S|nw)0LO&4TB;(t!png_yaT z!=X%cpd~NfA&U6Bhy+#?|1U9*cDT{xTp_)a?qG5|0|XaC(o(4uU4D&2zMthcsKG6i z9J6`Tf>1}n(56D`_!dL_T{=oG)Li~O!v}lXT+SMaxCI#Kb#ceV)xU?N(J9^ncpuzdvX zdUtidwf}eXP7TeLIMBotF3VJ=Puyls?C|8#6fS#N@~VhUr3i)xG>os?x}{K^X~?`^ z)TgW;+RHZ)9nLX1UW+w=LE=l_F)C9M_jv4q6LY&%34yw2WQ+S{AjiO2^td1oHp+B1 zwu+mDk2HxEE>8Q^KUdew1#88*Is1F9QZB``9FNET0@5$~4g65<3T|5w&UK;MaMlK2 z;YKlRz6zqJHi)C4Barxub!ve|gsz#NgfL>$Sp2~%l@*8>dVMD*hhti+?3fbdn&0;r z^eDGdUzYDl$gl4fB^j$`Ra^q!xzxxz=|d_=btyJRKtCmSNWX0&vSomG$X{l^DFpiIMx@(cTT=v(UCjal`?knE27o_)6&+*4TKhpL_MFLuaN5{K~U$EWB7fRGvj zP)i+qhm>P;_8ZzRxWX@C+=A5Blm_fhKzG|$s2NLd-toAPtQ|%AcSyt=)rZ->6puS+ z%kg(}@nLh+n`=^%(%J^UN`ie z77{G%&7uF8@Mug~AI^?_==Mf%Gskq0Bo80+x+a^0f8(qSpeT+puqeMbWF_b?6r-Lg zESIqIZ_q}65f@XJr0FK^L5(SjO(q#G#8rX{2aZ+qrB_n}x6{+C3?a1OtIMq%`H2Dj z)q7bQey2C(uuB9?`K}^_L2G)U!tnJ1+Qg}!V&Pi!>%w=_V3E%g?#ws}=Hcw&OoS47 z4Z?3p1o`K8KMgWGD(3Lz4HobK>Ur$mZsz#0z0L6Nyj>-bd>ae=j+4MKDR9BcJagLK zboWNj2N|gz6P%@0n(`Y|CW~5Q$NUKuQCg zKJxsv1cKTRU%nKxSKH`D(7@1s$-|uLAXhT-_SM2ov1lnHn3E^T=us~&tC&Q6vu6HH zG_YJ9VdTqd@xblJDjq;J)gd~fZQoijU`4c}y*XA-ngy_1*h+>H6(nKrn<|F%43RPg z} zQ`Ml{?86~HonYpsS$H!OGSWHP15#$O(4oqCO)EXP_!B%xkLx$M=9tn?t$RU`g@qS0 z%z76X$;b0g-@mpnoglx4jJ|rq*1(?>Vcp0^-e;&8|B^B1^xJQ2YD4#LE4?5?nz~K40Fv z{n(C`a#hTGlwlR1RaP?8tiomCBCofIM@%=oc~%0}p*@YmD$xA=96vdZDdYc4L2Q$9 z-=S9;Sx{n7PiAa^@5?uFu)TS&A|vo)DTj| zFIKjRpWq~R2U_@PYI7te=5un0esBB{z!5`T=QY3_(LWKoU@vZbTOr?T;7 zKSCTc3ew$~Ce!2r%~(_feEc+m$|M;-nXigPrML&QHdKQ z2>U9VI8F{;ONXL9Mgg!y8*x29qG8~d`|u7Q*)UF$*h9eh$Hn3YRi75thM@V8bcwb{ zhu7QiK4|1x)QYYUm=AHPOJ1N97T zX@@k$0`tW%CJ3#l!RymDfg0s6b=sS1DHEtEzC<2qV*0%K4@++xpZ5b8S+bzT|5alQ zjHy2J676XuZ_5S|e4Tfr5q4NTSe^)8(64a3B@`hykH2araIp&dhxxgfDI9(>Cb!Sf zFbZP^T@*C@&WfnC~F%+1EX8nYQRFA95w9Gk4k@3H5B4yzs@%RS2`x zkAC2B8}?F6@cF$VoqECW^&F%9$o8NYF>kJ#C@xa>wHxb-C{TFGhRnO$@V$BzfiDWuS-Sj*Mp<9wz$PM zxJ47%G5v=UdehD;w!F|E3+oy~3pj8N(u1@~9y&&0fFrxT5E<%>)vAn7Bw61LQe@&S zk^TRC_l`rdfoRb(^<{TlzZ%2^C z&Q2w2R5{zESX%v2G^Zu~mCqIFfI{V{ZW~864zynPtQ|*s*0IGN_s}>iH0gW!)vRhm ziV}J?_2+sg2K=h<-V$=kcMjt);_AoHxJ``mx4?w?7s)5R--Gx&j=dwGk2F)>l!tn% z!}(R(U*H+$yf412cj%Eoc1us)a{YHqW%|D;RIZy0WX4)@uKXQ7&qdCoUYy zLC7wWCEx?oQiMfnpr4d1pO96W;58uwHCmxnZj87SL3z8yyFX=-N?$}H$XQ< ztrYK-5TT*uX?2R0N#uO}bqBUe#PFF9L2@6aVjpE-Otgh#YT2i0SRrUU=~J9PWQ<^B z(dFLf^J6O@y@2x*XC#;w_>Ex~He5y}$lKo2Y{~30Ci15Kb-_g6{%y&xhkgpuJ-x!y zg;4$-$RRchEl$ZMr_W`v!!SUIAT^l_1Al~g=^YH8A?8J}4%ch{xhjmtxmjyQ&egA? zV?!dp10v6FpMdro>2XlfVM6((k8rp>CaQVqt2X3%_n3WXCyu2Db~MvZ_;B9oPL>@F3SOnzU7r5A=i7bETwOS3;(JQjvhIlpprzA1Wk9y z%(V};3}pvNA@axvSTt7ZZ$j$~(cL+3^cslwb@YSGS|?_Rvg_mbBd|9&Ird6WAc`*W zexd+-ea)(zE51Vin();UM3Z(_%eD}^kZnqgk6oytDcZ=yfTeqQom6m`?`^M^SPIba zpj$3BgDttHD#h$Y`IQ&KE~V{JAS;yGVzU z+D+15zf`bNa-e1EG)`8tCbEciWLdI0RW{B2tV)zo$0Lnpp%QL|=^%}y1pjy5Ozls6 z>92y`{&HolKk8W*A-OnjKG(miYi$YWuKLKaS5|*^TYFuqk?v-WMhI1nO3#)k9H61n zwYWwN!)l)V?E>6~ux81gdU&6hSO)|>Qs_7fuU)qMILxWm@L-~GC zOTSQ5YBFFK4pKzEI8+o!Hw&Mqhb+_o0=r}u&`@+X+O?#9VGt}9;0dqCKc+U+8D=cK z>0kjj;mVBMPobjzl+w7+tPDgid~3wo$5a;- z(2iet;Kb+H!-wtbQx~$K-@t?Sd?Uv?Qskey*xOOtBJYI`MRLNVB4@y$WShnU!{@*b z1rS=`eF_>KGsW66zRsZf0@97o(yDa0U;HzHQ~LA(ccz$KR3jMwP~%EE?eyfGWhU~I ze35p*I9xMopb|LE>M+xW)&$CPM3neGW{rLr+`W-pIT^`dC@-A zB4(i#=Zve-psj)#UjC+Wc&&KsI)2>YSa3@p#-Dp&T8527r=2H1yEwG;VdPkhKV&8u zuiYZ_>yWI{^LMHKX6dim<*Dwzoaxu5j!~XzrL5x!zt9IS!{yQw`xSl#Yz4#bLw20-_xE>T!U+#=gDcUz-r;3 zCOOq&c1_&RJd_KLl+xT*j{7^v>g!TR&4OL}HB_~7%=w478pO+AX!YQ}+zMvwABFjF z99Vvx^IJ*a`pxFeDnm|(;k&JLw4failffPEf6?{M(UC^ox^E+$bc~KWww;b`+eXE< zZQD-AcE?V|wo|cf-TKbm`|NY?7^nWPF>2L#*Ids#pWk|Bz1{QH<`-aBIf}GN>$Ob; zhYY{90=u2^zFBmC`X%7T^qxwE<*eDGu&;w!Wk9~p&;nNlMup;hQ7PDs+sTJkcnL4_ z_-l0hl+~WMPl1?)+aHKSpEOENjAO!=5aAb-4F6c14rF|&nCGJp+|1T)8AjU_2_umY zwd#UZn9&cg+t4-${6%!8P-0PqKI1tOk~kSKAr6kJhf9k+ zLjx54n+51y7WV1UjGma+h~}{^Z|uM>-Y3o6!wq~=PWi+fJphdH6HpKl%KOrcSBR;; z61a;X3BbeHnf|U2$NQ=Q>EQB=yQrPAlY0uGr*iZW=+VZ69?2LuWpga8!*)@vULAUq zmRrpmEM6D{`V|*pVcs@hcaewjo9B-@0`)$rWGO3upYC}z%Xr?G0%4%mePhzw#3__8 z^5FNgfB7`iPlmKJLj^aJ##p@Rl#cP?{coWX#RUeddX@R+hT+Fn;fecqqcGmS7qpC; z5U7aay#XjalQk@&vCrlfBbPqIoA8@S6Dvo9Ys(8ktnmlC)OarlP)DD)Uxorkw8!-i zOhU>SEngGL{!U)~z>imZ$KlM_Uk|l}Ex&12cqs}4K^-hE=I_&bmHLWLw=AOEV=o5#WmEnqow3{Jxph{5iwv2VOu-aIqbO6(4x!&H8J?l5u5ub7d23&_)wS#eELsQ5yJxvl$tPW(S$e%=3H3hePW(} z&t~L1v2x`bCb;t3x3p&1yDL>2qOF#*sBZg~3MLniwR!o#?Q>yYzHh8*)hrQ6Hu|zk z%YluQCKjPXnMRi7X8y4X!h#)`zIsV29VVcuBa-z)K^SDb3G%y3OrtqU_xbcDNC3^6 zNH4CIcP^c_rfe`{!tU9&Aqf9*W_|}C7n%A1p7o8-i1nJa5V zF8|Strx+={{QV`X)k9D>$c1ObvxSL;cc<`}F+BtwKl>G}K+Fl!H;$w^rc(8ybSHT0 zwvePD$~(@)XOb+{QD4u`RPR3d)2yV{8non$vM(yJwt7_-aZWJ$%^`1?f8z)xGOL%o z#QDKTCjV80{#xlY2{1(RU|;=4P>AJjBy>OX8rsx2OjbvYT{wsCG1)sL^@v|4EZOZy z!RKE}LStnjOa9bC2tHErqpNgJxZYXt6vH;>|2@g2TW{5u!WUtlR~EK+ha62nBo-l1 z+5A4GM_=aBfwC%*W~@;v2Kdlv;XSNwwXEW?4gA4;4(>q=I&ZAY!=u3#^EzReU<%AeY~Ppk;Dp z?pv{pA&Zb93pgQKQq1pT3m%CFPv1e1`fNZP)743|=3>Z7zTjSp7o5#%Ys2(Bk~R*P zF%I{w0NfMOe9==p;w)f(*KMV&2UVvsq#n;$ zZi%p6dpCAB8nMYk_&Lz;grVpI9hS3Bv8&zw?kb;gz&?L)YQhE}=YP z@bw7yFZjIzJu04T?1E;n5&fFgyB7+Xh@Zg6{HRIWnN5}b>I^AL{nN4kNj)lCcJ2*G zm0j}`9$Ve@WO!=bPiXojB23PBL6`(7Vf36_EylJ=Q2-`vm+N28(@jkHH2 zom8)g9%VHc@!o<8TWX@z_2~r$E%U#dd#`-;jO&#JAhy-OM?yvuY^I2ERT`B_5&wc@ zyP`@M$~e99NixkSRxs})-Q-4{rlu)jgW_j}qJm0<-TKduD^z>$b%bX7DMgI<$`MgpDJZ6I*VXO zlT_UQt=!3GM7L><*!9WW;>#$!GFmOWc^-a>`5E%h6|zs{u@)ah*+_Ss6BgSwYwm-h* zc{l~)yHWx_}ORUr}_A##eFXu}0#vM&?mkTdvcQQ9j)0J0kQ_JUN~fcs3>F z*;E9|pp*Y>6cmRTc$2w0Ep7$X68zhe(;6^*Q=Extu|a7!nO-Q%o448(EHgA=)_x6+ z-JL3R0!*m1-wdg)g_tKiHGQYW!xm)d)@I{jqLs61fW3DMYlN0Ymq`kkz(d73FqP>s z;5E^A9yxQDHZ*Q_6Cm!E()TvmQ+rzqA9YzZVpVsYh3lZc?o&f;+o~jd?^NdPpgP%W zZcR04O)7yY@<00(7peR=p~HiyExSmUILF8$`E-Px|Hn;^{IZ+?|;-yx8(}b$>oa81&!1y!C_qbaW=xFA8Hi^aB^R)x+(>%cV#S~8zF0@+ZGnTb{C{L;68pR$mfhvr{c~XzTSbNDL(Y5s zhe(=Y=rOSD1PO|MGP~5P#{1?lEhH40DxU3UPu|}%a?i!5m0->RD)Fuvz9P+j*5g8o z3%i2|bHhC_a(_-ua;!!E=|UlVAQL4LI8X^6>;42{)&;4_{2Xb#t6(vKXIYD^2L_%r zsf|Vc88CPN55xBLdsqrQGG!i6Lb^~qehct(p&g4t7&6NkAto$%C-i8L7_v#j)J2RA zwFEnN({y`jS)+#{d&0Ot#K-|3?o?kf4;~`kg4lgE$LrB2VWiWJ#zzd%k)YnEBXrvp zX%nojIO3a(SW&D(z}xiMOqeD1>weg-}CI=$mM<6~SHC}b3y zJ6tI{OFZKyPP%H$SmPcR#KThBSDtAsb=zEDi(_^*`_T~^;@3mI1eZVw#X&RYFdu1{ zAw6`HYKqzRy&zOPHMKQO#$~9|0b|JE!!Y6+*e}clP=(jgvibdIju=vo_n_$3)q;V} zM5A5=J6F}bx$4F=iJBBD{xCooK6<$^a2-@qvh+QYGjaF1ttj?yIwwkTC!hGEqVA-` zvec>6S8N{iMEok(?;eB?Xw726;gft-C6=}^z1_OT4zQ96-YkP+&L}{=ySYcS$VcBK zLIYJ@@|LMixG{jzA4TjmNa8KMyr>&@t@d}-St83owsfPje)!2u592Pk>A=R)+iST9 zdK{Fi`ibE74P{#^{|Gdt(}|z1{N|~1H{Ng9fN=yKHKp*+HB|)_KgMLk?Z$!YCDfdc z^5Boa0;x{$KL3yOVM zjPz(k9@q{$IWOLvbirNvw%>$iRB~ZseExWmeLDWguqEB9F2VGwiL>Ph+-XU?Dwlkv zHdvKS8hBPT4`91|D)NTWsc}Je9h!k@mwb4Syz`uXOgwFidn#|Hu|4N{w1=94jhk zV)~Ai@1#6l|I}KjJH?m)XRoZ}{qCa+-vRyF68k$bZ;US>a?T_EEdYq-e{{Jxr^Ak8 zLVJMutPzpEFTDgGvZWh9U(x7W6l1nyN^Q>YVUw^Ejc!@m!Ltd_`ug*x6wUAs zr4o}<%kgQ0-r46-PrN=>tNjO{Q`Bn{&yud?_kxb^`M#1o=05y5GGh{&E>}+ihr9wy zQn|3JF)XGfCLpB*nCntIi9Q*|zg~?ow0M6k{NdnPu-s>q(993d$qJA z^#?y34lkbSGQC12d60{@9BLO zjJAUuIWa!#mmNa~nn(_-i@t_GMpbe_2M%10kWZhS8P0J1 zFZGTlAJOLecbA9%iUP9jYpudQom(B6FkfGvGGH<97^5G7v%~Nx0)VD&SB-I|w|*Mp z55bSsKO-ElMk-gS(m zoCJAG;c1x!$D-H;t$|G){TD&r&cy@?CXFL2@LK00i3$Kldw#xQ%w4i9R4`!G&YP|O zQt7u!uyY%b-7HR5>0-Je>v-|GsGu@Ty9xlP)w=Vr+7z+hrvg?DMXZgi*S>T4LXQT& z0hnCK@NHYg1O|FX(QYQj9a;-K!0G+j>|?-sBVx(`WW)sX_^28lYQO{l>*bZEzg)Oh z)i0~*PwWzWd*6eZR#aCu(M>ME;WzeJ=Dd_uvD{?M@f=kd`RPMG+My*_f@^R1wx8kc zCquTQrdc!ikczEY@_KX?^toq0lgej!JH4Cui82Cb$u&@#B@8F?ixdb~lZ}i=32Uds zfQ~SrMreibZC&K&2hJL1PWRh9leCw114!% zYp-~ai89Dn6Wo?6SHuCCrat|nleSfhD};S0FY%*#*!4&;w)G3MV0L%Gg=SQ)Oqq4n zX4ELtgp+^6`Q)<=6+ibv84EToX7g0^TKHVJ${)h3f zP0o_JaY;|fa%^_#a)qj6i40N|NVQxgF}*yzuSOUOXmu!Vz-bQTRm(`Z?{Z`)*G-{) z_L=;^dRw3i|DuyA6DgllbO}7iSKAMHOVdyQgx2 zZb0t7`udQSzsZe45eIQwaV0M4=%*F z146Gl|GvqNx7&W2Z-U{eX7$CQ&0_Xn`X^dNDT<|SNHsdRgIpp7@yRai)Cqh3$4Hgj z1t_OR(0=f6u%KL;vKsC{lh4$o4*giLX{)5s_$#74Vm@ju59RZaTE+b1+-B4Z&jtc$ z-^|_L{(sVJk89&j_A7V}4YBA8#&5-3Uq9Dtm!=|pm4!NuP{1Qxh=}uidc`c21-rWj zo)g>Ij=Nod263SUL~_=hV}HO`!OE| zm1~w5g+Uo6{B;m9M74aNjAk8v;rEyF+}%%&gT*ZxMsGw{$G`AsN}1cVC5_)Z1mkC` z1K1&MM=>o&G{JI|AoIDqa#X9R;2Vi=e8h>tppt)rUFfJi8feIZ@k?1niW(wz$7BC^ zbM%14vbshKBbSm|Q1n97nhEn~>TBU_H|($o)kNqIF2cn3e>a(jt8zWepVp^4tz2Lx zRK;+8vYJ}R#a%dk;%es+m-7{k2nv;m2$M2$c?FfNdJpd_F;>qAACGHy;SQ+Yi2ca% zVCa0#W!faVi3(#)<9%H!d58DsNwkAidD?0iuHveOM5J&sc}KYc^QWt%$UVPexTXag zYGP*jAjE@<+qCKI$vP)^bz0!^4fP)$7z9%MI)t0O-**hLW>oSJ-5T5{?CHc@ z2V=Yc^C|lI{{ig4cpxSC`^JgA41UK6k(cavYDo)&59OHiFjw$$<&{o82OwS;ioE2}-pKOrsZD>lN1=-K z1pkW+n;Sm^UxGj*{C2wEDOUXc1OFsz@@hJeAxvfHdvfEY81AyB+vlit!3*#X>dD6+9${v3aXD$d`=_T- zOsDYtzu~31&c#>_lHWIl4lg0iw_tDa$)2tP>O3&sD}}*bR1d@-N8TgbhnJgjh5UIw zxlrLcc++DC=FhcNSq=6+XTmzANnle2m*`~}#dLF|nhscjWs_UF`(zM28E6g)R9QUU zaA`>-srS0E@wgvY2fySrhHM_9zH}ihZjqX8VykRynqr{v68DoIGLL8SdUUG@If{Q#*je_yH;5_%M>G7rwo z+0c9V)$C07@yOC!6JquVzstZwiH#y6HGRLJox8AGbEWsUO-SrdDWA#7!jmkx;c zZ%63Yr`Uld4Q48`G(~w~L;(+{4q}ivpiBCjO_j_$hio;kJ8vaY_7p?ANwFzM-jl#@ zdsYXw#jOvzA@s4vrZ%(PByd?fuF6vf(n&@F1=+(Jcxkatt^EcKT`uu|P{hJ6I($>_N&S}yRn&_Eb8xI2 zbFLHw5sweH5D_Fw6%Uh937p!GIwh8hjwdlZvQM#qI*~7-&$qByZ%AjJ8+mx_&dW9w zwiiYpDIx^x_@E~#rh$e5IumgcYnoVbyWH56Kp&6pBZ22o5%6AXsOu=M_St;mo(GD7 zLG3P#d3WeRoQt4U(0w2cz%8a2^Jh^6j|(bH?}~uhneTf*y<$<=u?!alIKAcsCz%jrQR={ zQ~OWPo~JSq5ZIi?s8)tT8jCz_7T8hQOsd{XPm*bcp(3-y=8#*=)5zxRe;u}ykqiLw zw-`4Dn=+TDd7o4hJz@x(VX@zVUQWI%CKqdz2QqQ(0e9UegV_@5Z5q7kDrDR8zD-r47+OH;_3X)JmUTMAnop2<4&{jT%JP>a&|9)HZ@)wbF^#b3W@T&IJ?ZR)#TO3jQ415SC&gPm}M7)D0e~+&f0Vt6EuLNMKh7Ej# zQ|Dg)Yx?97IoUDSvu&m>?9AGaTQyd?*v#en&>>=~^O^-B6Ld_Ru~dnobHll|=YBW| zt-Oyx24HTy>S`ecvO5+3-95wCsc#riRim>mgRHrD938sgdmgo9ict|%8&Gd&Jhlw) z1kxuLMIaL{11qHF|3J%kMEr9rANE-mbFT#D{ZtB@R>6`tkGf^H+)U?U#(TkVm#KJ` z*rN=*3S(ooFJTX!tt#br<$tQzoYb1DRk4R;4-env#m-Yu&IHP1W7(_oy%6KDxl4(T za?=;&{?S4SBkpJ?F!Rl-!HtDM*^lUZs2g34I*Yjy)qy|Jq5p*G|0emP$wE;io8KT7P=<;jXL51jHAGG`5W>mn z`Oc!l7)H-E`t;u{0Ig{)Rs5?y*y}}H-x@by^*Hv_Hj(>za0r2om#AQsnD5FuluGQ$ z415cajD%qVH(@xiV0#BjFza-`VLG39&`HaT+D4j(mIg&0C;%gVe6?&_eT0 zcIj&hu*=T*74F5%FH{l5l2m0w^lOUYEk5?c)a%Y?Zp>dJ@HtI@+XN|>WqL6lTf&*) zc7s$D###TR_^b21)A{2Z$~aRF>ifY1TO?tGlJVpT0;El)yy+yFS;Z6Nm^?WhTyQ)c&)|%3@bxzOC9#BU#2vf!hO*=u zS=2J@+zpo&48Gmdd@o5|3+^2c5PxU;&f@Cv>{eMS&1Q{(Hb2pcC93!zB#_cP7V;Vz z_Bhj^;N6F4NpavDW)|{7c9%G0wrIMPb3EomnrLoir>IKF*keL4i~lN5a*8qdh4X7Y z`faWm+}7RnZ5!gdVZd0h?VbtW2K}qKO;Vh7=3deyxhecZherP<7dGGLiidY9e=T!Q z;yW*qC+S}MXg*8-p)*P_qY?SMQZj{7a|y~Xb5CYzHBVjm+5gQ|hj_F6zZkp7LC((^ zS0uyF`r+Y{3ZTZmJS{!fdAbhKxxhW?45hd)&(X_lpGIM!g8&Kz#3`46@eA{AS zX8V}uc0-kOeT%)?i=+czv0#8kC{^lH%iAEHgk^L;Lvr}3O3Y@uKPH?M_Msc*QZu<7 zihx@KvD~gET#wtBHW~fMLKcatn!E%ZKg`U^{Y*-KpWo3A{!5NDzySI3-)b8%N(~Oz z1(zz62&hVtz~j#AN?4pGrdn?_;l_~j{V7kl*ao2`KXIe3yl2=@J4y_ovD z(IUy8H0p$2&hZVJ74K+M%(o9s?}j>KBZBSwR5%Ha#PsU|S(6z-s7kEBbfVZfiQll$ zWl!jO1RU{N&hg19qo3rw230|Zt4HQHhm4Clgs7@EBGn)d+txv;;qQ^VtqV8$YlbGH z1-G@K!@T~>0}k47CO)m4Elok$h+Bx;dF0keJdL=wjGqE2!r-JYLkba)jTc*3lC^`U zF}0GHe~2p9O7oLd$CGDhdhWz^Vy+6(!^V?)SYue+d^#P6p3xf>FzcB3<_A_Vnt9`& zctDq%olfnv)z2r%69Wur@i|{)8qAAfQJgHP z`mb(IY^J4lbs20-E_3j^GOHQCzsJS$R1pz3tf{9oSvzp>lWbK>!UNVsr8uTR#H)!$ zPZ|C;2e%|HgiUo@#l6dXq?pw2-o1h?BXaAK>$dB3R27-C0_h$#{cjT}tzG7Jo_$0f z%TgrYuDRC4GspVFDP~;6MHe03fHF{lG|U;X#X>N>mK9UiQn6(?o*S|Car;A4PAJ8~ zMtbmKS~wkq(3b8vir(B{R8}ToWbc`@7=z#+AOBRQZWJVRk#e|7&@9^C({J9tT%3iM zBof9D^DcZkVgWcczhGdXm#aO>4FRLg%7zI4e|$Uh+t>ev)Z43PG3{vZ(aO2~yEd6evErqFn)cu`KbQeO%$9$G4mlBRt>~Cs42$GXuQ6-M)od5A4`%S zq>XA1k#ImJqV;Lxa`fW1NFJxfZG1Y>`|B5^Su=f$O%loq$!mA@hZ5)LcJ3A3Y>SR2 zM@^gjMr<_ZK9Z!S2&)5NurTU$_j>#q^7E6=^AR6+(0l9iqqX~QysBViP#phHBz5%! zT9|=8bV~N2K?L>>EOHl}N6sowP*#m6X!9>yCmXMX>|ElVWy?tCNZH(zC&=e?UUq$; z+joF*emf1qYVVt9b5Oj1-xDDXiE5>4#=_*qCZ9>LXhrh!0%Fh8Jv)%xz{Q0(H~oFw zZ1dLLm-n_?$f!|;p_o94iI^F_x}z{o(K7D4R-e;j`_K6UrT26q7;hJJ5Jh^kSW9Y~ z7Hk>x0Q-|F{=^3Kl_!N>irmBmo*M|2mdB_ONHf_UwN^M4+&w+k_Z!CEHM6l}b}DZN zV8A5R*iAgVX;WOnqq~PnkqaB>GJZ5EbIxP0pqwbTu}_JtP$ThZW6FHjBtwD`2@Cz=+4WLNWx?wNd=*FZo zaNc+LohfU*hr(%>(4WI%^%x>v9_PyN2{*C|!o+FMLx}677Y~TFJ|i}^jy12t7}!LO z64XxPfgr;Ek~2~#J#-H@#7O?MN?bTy9mui5$YsMO%8L{gi>GHb$VmKvFWx_q&d;%m zFSmA^;O^^2)$R&#ef_fCM{%prMi4qH!l6tvX(Xb8;BP8d=h?lR_7GidOOo}5dz%0Y zN1q)1Orv{NR686!b&Bod7H0z2frZ_wSNGtvxv^8U6X8|;;=ntjgua2R!1)MoS4ztP z**z>&{o5-N<|*-W2ftOI1vd&9!R;pQJI(}hNlXmfcu)d|7J+b9QA{4He5h&=<=thn zrmrbn!Gu7vwTf?=$R}tpM!0<|550oZNDT_-jCP^ z{(cLlW9M!4PK?dxa95tRN8aS<2^a7aK=45-bV%>*tM|!>MXWk9yny-6$N13Yl+ih| zpAAKm@(%D5u`9NIp&u(8&&@l$pF5K-K5=apVYJPW5TYCy@`%}gQ0%0_AV$eQ#u6JE z&Zj75OjH@WQSaz6c6}vjC|TDqE|zAJqM0|jQIO${JQ#v*MzE`0;!Syu*@$r#W?#$} zb9Sa1)GFxj6cTAwR&EQ!W8lDs)QiqN`GK_VlhLw;d14(1@}Ne^49o813#I3&K2 zZRWBpEJJo%%s}4&he;jJs|ANKSV)y*5Tcsi3;ID<@GAIbpmd|&EOHb9q27*=Hq7IP z2Dto9R>X!$)6tX`p`_5pMbpL!d2A7j0vCPlxz`%r6%Lqp|Ma6C(w^N@WjrC) z@owa@P3EYD4y}SVaqx?lJScUy11=z>#XLKPDdl;g3^&m8-RMLbIw>hfpkq2$`(bO~ z-`8aBvj^wV{;r6RM*G>?e^uK`2}O>Wet7lqXxoKDAk&xS&F$i4yB!5AEBtZp#{mV& zk>>RY<@Nn|W$HL0E&S)yJ8EYUWn@5>$y#{kT^<_qgJZmtBz0Dj-m!)v?q&@XNpgkV$8eUz|cYMNtI?B3w!p;OTGj;PuNF}%T1l=3wg#~1( zH$ON~m-ZC8=JP!C(yXW8v)uDg&$0KWKkm8@^bbkEiF?}kgQWbnQFBJzm=W2S+f}(D zKeZFa)(1JptH%vyGPKdF8qGWz*|QXLM{~y___76xrV5P2(mH{LX@St7J$c{M+}Zhi zW)6BL^GqAAG=!#1oKuNgckbTfe7v{ti%c)LR&cqKA_Q*$Hm7&#>HAHBe1QA7&Ozb6 zn?vi8ltdYeJRB02I8NqlSLNtDAVzvZ|kfnAsj(H!8%{>Y+biqY4a zZg7(wxQR~^5qU#}FUmdCyO@+7*ba5f{dEwHBgu`}saUE%OW$QVNQUTksDWS(E-k;F zX$qZ7DCf3U}Am^bM84;)N5`MaA6}TQHi8ZnQ+t6`|AhMdFRQs&Zt1R}W$esV& z?Gpji)^W*sXOhI9U39T6{B+8B$Kk#XqSQVn`bPB;LUU8wUpmRW5srC>+uLENCXRCS z^ygSFOK!eW#m_i)o)lGg{APZ#rod3NqGwYb7V=My9fW|d?ZgFc_PmkhFYF9X-airR zUFY0*_^YF0uhVbob|2F6Dy)sUm;>!W|At4JVmF+cfRX9n_8oWT3GkhtDRV4q^ZK}% z>3)5&H0FFL^?YyCPNHMIYBxIb-eI8yhVoq}XN~2V5urGp1 zZKW5S;-^dgfp@df-+nUE;%S*3x-=~J5%ZltytfZrc<@7>`YU4a;9F8hX=*fQE-0Wr zk@mOoYk@zia(g4N`BY1BB!(;%oTzIJw#RJurca-vvF19;ASxO+np{9cr15(9_DR4gCAPJSiYNXU>cR?UC3NwXwa_Br8QFL^=(CI#kd&G+G$q9 z&=lE=Pw;BgYc$2}x=}2=l}DNG8U!tJ^fWD-%?hKl{ZRXKJE&aj#YaxB;Y$D4n+pWd z&`)LqTA@rg?IFQ1iKlgGhNC6sFfUBU9zz`Z8S0m0V6zhDEN13O>wc-YSLoy)J4^N2 z>pKz6k%WOL=K#Y`BFj#S5FHE>-rXK+IZZdu9L68LIn6u@oHXZ!TE~0)!EbkW50Hct zHT$4X>^p@&xuhw!ArQ3{ONxES#POi#eB*3nspq|9vWc}XD4W7UAvTISwu;4|{xNze zxDM*kLPp=rlDdES7VwD$QqO=5Dq#stAYy_^iqby2&X}yzut9`PJ=ivII=KrbX|wfv zCwC~-I5QnAq1}8DIw2vTlUzbo)H@_z1QDT!9?ekXyCL`ob6{Sk^*>(PAU0sQ^+ZiO zj(_atBDaLzP{Tz`{KRM11^haVvD+5ngROB|p_~xHAa{jCBnZi=oN5eQWOe|@DBCYm z3&@3voH^h(V7QxcFnVFZR#KXrC)-^eE1ltB+w$nHqzsAycWKJ&JXzXe5PbKv(u4@0 z1jWt>TtH3!Dy&h$;^@8k3*qf^0;97O`{7Hx$ z!}b!BtGnUf+FKy*gA@jjtV;?ynAE+xT9zUI^B7%-xdJ1(q&Dn+pCgp|BiT3M`RQNgaL7leIGv}bkx+|sDe~3Z2iEbnekBD}6 z#=L_gYIa4iN#5=c_?=hghYAh~Z z^3Q4|((GI$hp-q%y5|x{(4}^;R`s!!p7sAM{w{KqOPb_)Z%o>=-v^J^`zYf+=t3*n zm7dx=lCExa9{VdJEuy;al>2>-=1n(D<`s&fW&&W79GftzZIObhT*I>RoJ+_#BxN*t z(D91d42%p1*S!HN=oyx^36*F_wT;t#^#+N%#$Hmh)3beSz1NWLggf427Q3z^Vu` zr)FW$3yPbG8)X$^n$`}PPh)Oz^a!+koAm0#!HoPa`0@SKc!o!kwOa&=_*Q;M^79;% zotfD|6a*dh>^@I3JeWHtw&nvn=U|X@D6e$rzxlPT?JMX(3MAE`tPKjZ%i3fF8j&7d zMD=#yl-Ep>-o=Q}Y)Oe2@schMcmFhz;tN%URCii{Q-!;;-)6^5hIAg|8EAuIccX~! zI@>KA3 zXDFWyO5IJ27uGf-yw23mD3nI_F_3&o#z2G)R&7raGuD3F+R-@HaU2Nx{g$Q$d3554 ztggu~QxtjRlNf_EKoc1Uy}#g2V;1gMIVFa`F_Qfm@}uZyr=j`{+!ef#qsWA z?B>u?xA$7Ao$h+z9}B!fDjm^`IH|H9YK3i!n;vk zw-0&OxR*z*76a;QNMkEfURo`50(dUrW^G%c!S8b2oEU|Lu|~8r0&H>jiUXt`|6zdyuVGw}|J$8wwCc=_dKJlglft_X~Uu{x(5QBGZGfI}3 zFDgt8yfVZzG%VFqB=4}cgsA2hW+s$xeaF-!n-{>YTq z`XT3aD8x{h>=!w{`Q2Be<_}U2D9EFXuTx>9rtw9P$u0>y)hl zT`88qLw;@!ZXyn`v9Qp(Uu9eB=k`4XjwK?8vYy$XWafV-*nw@yWWXUenUlTA_L20M z2e&$naa( zR{MGl(YryI(L7N4uU|}O0CPK>Bxo5&YsdErr0A~y_pWeLN21bcSr67A3pVo_;f>6Y%>RQX_q0SWz83my@$(P z-D{zjDqCB~)W2SeiK(m*_#ZUt`1(8YD`Jq#&zW{N0umYZxtQ;2j4s^c&4&hqP41OX zj)7=GYn*rnkFfd7zs#iP-GHnvB;$)EhT6@Xxubv|NqRd^3^@ez2KBFq1_*!0hjF=3 zOl)oG#=4Sn&%9e6JCB=;`FW7?-K7wMmk#RBK8FEVNL;MLP&GLR`nq35*s$ll%7PMP zc&tuk^;)La2s3VA%k)%-EyOwvI~5Q*P(Xou)VY1_a<@>ngk!4v_6w;YA08yJTbAC$ z9_c{kB%uk`m@HCi{}oGnLovSBO(tM&<7iav>of1a$*H221I0sjpI&e8v&}SV9{eyn zJ6pWL)ZZ-Y0HjT8vp68Dx=fYf{CN5oe3BVl6pRA_kBonptV{(_$8%IjrkdUpn0 zVRQQSvSlZWM({j6?H;obkS&Lxk$zmOTShi5@wjUQm{T&FR}VMN^X^s4w(;r!wWAm~ ziyav3_+rj$fy|xmCrM8#GR>TXyMM2saoWKufpiwpi$Xt`OvldZL~F00nHrdDU#-xo z;Nd|iA1`Ds8#Kx6E2oW0-)L%eah4X~16Kah=`l2Qc9uao3qO){6CyQsf`1usPIjEo z3Al!eIDr*9DW0b2c*0rn8_f4YYC&NBJXUh33<;N2l6qF!k1lRDa8QtxoCaM{aL@F0 z8W;;_0lx<@J~%QJ=%5>TK~Qie`A?_lsTA zUp^TIW+9THNO45 zlC7oWfiOVZ77Hw^zE#jYC^(gd3JpEs%anMp6v+6vX8|g9ry6+PO^g8SqcgX)zB?gg zFI_|MaLIV^W(#R#ggQ2lhF)w}`O27p=bHZ=5{}dG%cQpS*}*Kq+?fnbn`?vj{BZ z=;|)-KDm10*Ss5@=!Zb?)I46Dol-{bE`~{o z>5@&0(BTmAJ#!s3#qDZfH>llESnTaO+L2)6;~S5bhnq{Jk$_uVN{Of0NL|aJBYD`} zz)!@eH(Eh1PxqeGxq8>)r4JpC%DuF_;nFR}MO=eNUtu%4 zJ9+kgku#?#d5XY#-_60zB*TejhPBm~>xsMVsZezWQHmm&qEFhowLZRRE7-bD!p+s= z{dzdaN8#9vGoMl8mZf0d;?Q**jO$+Z9VJFjI4#a_cHo~M$IB;#@}LmjNd3!UM!)=x z68EY>8G|yYSS;dso}@NRP}6sB%!~YzLZdS^?xDJ+K%6yDpB{*i&9w3%Iqxug&dSI1 zAFk*ePIn_DamRfi4L&r`=uGArzEBL(?4{)^w+~|QS2i1J+0Qb}gD^f#drsYx{974B zj=*c?^Tyo;B^=MW58bmy*7XqG%bk5|oAo+l6T#xcAL&gpuefCboVxg&z6s8=Jaqpma?qjkbqW+YoA@a( zCz7`~RS&>36zjt=%MD1{adS#Yr(6Mqi^zB`ZqK&iv~huiIf4I*5YnA+ZW=(dUg$5s z?ySP2jx-`K&JVXoiOOrJ3`IzU*Pzg6aOn5dQ02S(gq|h@5bz|s&Gije3<{eKX6CZ< zI*4Ll0O+1;yKW|j^6%ipaS0l-c=x%vWCO@?7N0KbYzS6*nq8AE!da!_X#jNu`JK&FSEVV`;QXQi7yHiVN8eu-px z4TV^B&uGNdfsrP%`H{9KxwtCET`BIFBW@;M&JL7q+`IuFJaWeRbN;z3{g9}GT+p-) zGTp#4%JYn-UIbv1(UN(WCC%RzItzGCIv@#U5O{fDU%9nGskeyzMj_AZ3%MIac59>h z1w~j+qhAh`J;tY6qa&g3qNQ=?HVBQI+t+P|Wkvr7i8x8*#o}cP5M>=|2my3w(?0;L zofD=RwC>(`Q0jf@mC?X49rGDfRh5~FLgwb2be!vD&o4ATVMrV9=p10KQcF3v!yf2k z4A{BOB^L8ED)cY_zUW%%J21QCR#|_0c&%stmjbA_yBfI6Wsm*|=>2y)6|{N0PYXVzs;TtvQxj{F!pMJmW16hlz5=lVGqXobij8wf7sccv zC8qX$M){P4ThRcIavs2+eZO$5vHo^(Vi?)Qby8l+n=(k=)(F{k=87yqsWMHP?S+^J2*tg|hzhtRy$ucw`$DL{m z8aq(?S~lhOS((!^y_g65)n2zxzWBdgZ>h&G@&{#qB9q@?@kOTWDRd8T;PZcLpdR&C z>+9$y2dM6g2X$Wd^kqhr$$d!IZ3<}W9{)zbX14h?LeuR=mxrhYK6_i2+^_w|k1mm!?`-f=b@JHbWxB zMPa;YwBd9-3zR&+#~an2eA4{A>!0g*8^0T8qP?+axN29Rzr;?p2O!ez8uA!vKiwo~ z$otu9Apc%`J@P2ZwDuI;EO3LvfLrZk86Q7n)_X|&)TOWKOrYCX>&+ggxh4zCipU=6 z$%yeD;bb<4X8iKsEWqHkj$Q^0quq|9aroCbqgX!Nv*;3I6`3e<`t)n$*k31jbA)80=?>P|8QTq0`Bj1MNTHm{lWi&bJ^@u9i;f#-!EHQ$th zP|I(&-lllVZYL&o@Ba^bZy6O=)BKC#E=h1tAVGq=O9Fx57A$!1;6B)30TSGTYp~$% z&IETG++BvjVHo60p67knI^XYF_rK0Pw?FMQ)4Qd*>Q~)WyQ+881s=Y>1evmM`0}pY zUO={5TBg_fiuSVM^amqC}=P3=`R}zj$0* zaeGP!?H7}GYwF#5Xa`N4)h5R}r*oNoi;l^rNb=Ipj8+;%n{==p$vUn>7;4OBJ@fya zrrtn_I1aNKX`HJK*ZXPJu{#H*=P)!Z&;9|H;k?~lq(@H%G)#lry`7qz#SgdbKmr2% zCrQNB8|xMuhxFP%Q{3G9oySKC&Ryu-@T(d>czU=5*0A*?rB4;0-&e0G_HU4%iP7y` zH4_ua+AM4gd~<63L5E)pM{UQEDwG$1s3V~d(bLnrNMw+&6RN)X-8__YsS&IpCNItQPy1U36gKv_WjN(;_L;ezWQ20PAqalpR`Mh*K6Qy3`X!E&Bc&%JBQut;@0_r^ z$Mf9{q)uQLUg*CRFc88T^k%*V%vXLZ6JiF^5r zI7yK7pLRHihfmTBa>@qV{^7L$Pw`r}PX{Z49eCLB=1+~)3=?}j*{XV;iJ&m0=R$<7 zZBtEPM!FATWx8BclweZ8WvSW~mz}O;vwhSlvX~cBMqQy(Izrm^ zQvZrovo(e5E4BEC8_QYgS#RrG3^c*+KF9Y*9mfqD_5mcNuVQy=gG6)JS@wsW^odpo znQT33j4Ym|{B3TcC>Ol9?-nE{?zT*xZSkVE(XY9x@qiRw07SWfc z!e3z?SV-(zL}Z4&tED@X6dl{cm|2k^At6HW`$3K!%k$^3?J5&j20O{LN3%RU1)cW~ z#@*+kiG+KgG|qQRF=IxN)dXLMsd1I8S5WGstGh28g3sxs_hkC6KUY;Fr$t+w{WKRY zzv)?96YRqez;swdzZz>Hq^SjM+39R0e303u&-)8!66$QzSyil=9jMp-ka6j`X{VLL zxa?$ORCGxZ`26PKQnwR_b%ai9)HO%aH%b|ZWog>oCAame8^1CapkScc5=Y`+L^-yM${&w{|CKn zbmek6>wRXC*7WF?y4clrJRPtWjnH~L?ykPOJAP9@MX-5u*FV@O4{Ali(Fopjz={&2 zox`nfEWCWt*psCVe`;vYh;I<5JFH9%Ukpc3_LIh61dSgG2<0q@cQ?-myE)ZL)%_t$Cehi*swR-$Z}< zcFE|jInkmu=c;Sq1A??`E71#;iaC$6)UUnkqX- zxndDl<$M3A5_5&(IYe6rx=t>vb{#~t4~KevjHp`10RgUosqr2V-PMeEAjJOL9n1zy z@)vR6izBqs`QVpAI;&&l%;r+6)q5U3x?z)a;O{59Lk!bd+$xvID><&56OZ{v5Z=%W z#7j0s|FDiQ+x$Gz5@8YBP0!(4&*#0!SqrBXNhx@{mFf=xu(bQazCEe=q@oyIhatE@Pa(z1`m#?!=wlYVWY&n5mRsAx~Mc zf^pUh&g0e_r);YrY7Zo;#UKRkqXt~M%JyM^qF4`&ssl9!$o^0^7pR}hV=>}<$_?12 z+Uj&oebHn)H}KLI9Hn+;XiewnZI9r;RFA{Iy-`fzr*{f^E4i%G>eh;k-WmPwbidTx z1MiK!c+E8Y707F`;QL!uCmPXXhUPyEMSYhQD%hCMcKIFz;^K6F>B2t4uBz8S_?gGo z*m>z9&!@Q5QKuytF+y13UyA-23@UW&i8~$WT@`8CIARE%Kj)P4QuyR^ZXpyuI5Hd1 zjJ?<@$HA(LS-%za12Lrkti)?A+_vle{mX-Mcy6@>j1X0OIi)5Dy?oqKU4@gc=-Mb* z%J0wv!RXr=|4K$*oPVexrUvPq>8NBk8z+D&*IRvPuY6)E%~05WJcgFY&>ff9T+jIL zaztDwq@l}Yf zkqkzwW8xF`xUU5s!uDDPct~)--JxN*9+P_7d^sC%rxo$N{Lf@DB5(soUB3=i9_K(R zh=7K;=hp@ucS$?IGfS^=!&C&032qtCe~=k_jw*0Z_KD^j6ExH$O{?u_eKqXWrRHsK z9?LKgqZ9DDmA;)s&7UrX$rN6ATq<@1Y}%@nHI`u~dJoTi@Ur%%961&gZ-6;1Ip!`B zcB7g+U@Q`-S|DdrTLC+b<^jBQ*M-KF>ncrq&@S8V-Ek>bz~PZb)eFiKRHGm;>Q>o) z;2}KcP;35PNLSPNCpWu49+)k&!;CAlEdsLTbN-CaUOWt)snT}#F>!}9rG26v)#CxT zj!qO_h(npXVZ`N+ZBJ1q$-z0&N<#TI2ZHIuvk)8kqN*iqX*My5h5np7AHj2Dl7z%; zL3*Ts_WT&6@RwCsDg554p4w?wMX8>oBcIW_*(ZQA)oob2W#8%rd#1EbZ|8CxONI>4 zwLUm)LpQ4FR3>F5b3=iW0Xu^4kBAQDN}n`b#Bn1p9>{p{IzO9ZLiTt*VsO1Zc7?Rj z1!Gqkh0u*+vbsd$su<0oFxGV(0<3pKf_l|_BxlwLs3ZxO()-tD;-Ed!gn|5hsF`I5 zWU#b84-)=VjL~#sF~5Mo8&&DQizH14lJugdpEfUh=|@fTM8GMLT~mbx?m#Mq25NPN zMlBT;vN={f)K37~2Z-h!`j!UFmlhJU4_Y)0PoCkb&<2!V>C8w`AZ>>p{&4PrO*4xa@!?O?w_65o>j$u6yy4Z_O(S^J^i)@6kS8 z2(y@rb2koF{_1m;I;H1v5#JE|h~_3h?)5uexD{6^-Uw*o?x~vq_2d@kxDB^K#M_ZA z=g_ulgZ29E@;zMhbW`w^&?MEK(fqZdwE@9T*1%k;W~?MhtWpR0B#ZY|*bRP!X`$dV zkbP%ZDrfgD*7CI$S3{RAsX_aYQNf+*?VYE|T(aqf$0>NpEXh{qT_7En^j>-uWND(! z@g5>^`DVU~X15KE^EjA__M4Y0gm?rTw%*H>^JuE;%8GVx($JM|90E~}mhM0U`%)dw zN#!s%8@HPDuYs;}A9&Ua&4tI0zu~V1mVCHwVQTE^!_0wvGfS6-*HPl&RS`%ZJ%vU| zx}g!LQjgbKuc6@INcqv&*;ByuV3}&PfrdV^m_rm0SB-VjrW4297Vu-<(i~1JZO|xZ z!Kc1}P&w1lsB5S-P4Mm$b|-~GtqLt@sQh$ar+er+g#(dyGxV;05OVW=j}dU8ubKWi z-9|W-78F`!@)Ma;0%Pt0Yl!`}SA+SMqKqZLsso+%5&MwpI5M%ra{k&M-x1`4zYI4M zx*9(%?GJnMyVsv+d+l|S9Q(LDe(oI~wSF;i_mZzHKp3Mg)eLI8U+UDK4T=^u@xe{> z$3MBn1zd121DYw zMQzMV+MLObI$+n;M0m*V^E&zNd0rxo9*U`oVH?~OV*Jj62lbaDU7Z99 zRkjn_{)H(-QtII=R^n^_CvEG;cWJKDGTUN*4}oMq&pU&cIxzgw*L(ZB*+O^xaRqPQ zn{eD}fH~)psFdbm_VC@NL)L3RA3hYsNt-S6A=LYe*x=LE`>g~gL)5wlGyw;Uw>gi< z^QU;^bA)S7RIa9Ag(e4NH!}CMuEaFmY(l8S1=XF{hLkT7y6C;`IU% zu>4N634*@<*;+#2+T$!h9itZ9gI{X0wgwX5WP1|*{Zv8oY2-L$^ai=U>iVeanl2Yo zJ4nkTX>jwJFL9yU*cKPC$HYYK^I}9Y)UaPR=>q0H@yI!G`5a9G*A^O@?!6&*^TzQP zSvOgB^-b}?(!?abt3zNPRLX`{>{`64Mks3wyB$aFB(T7<2ka;+!DZ4LW#SO%L6jug znRp<9+85x#xcK_mW*~L7`7nNs@R)Y4Pvc zFmZOJa{x{D7Km$HmLR!cROPiGu%8KA2){q3aapkOJ$9XH@BEh1@Zcu7H|n$B0kdlu zwh5V(zC%G$NGOWycmDPM{Z@3yd8A-pnhH3#TUjUrqZ(2m%!EV;xC&E)k^HVRvEt#X zxGSC912)Vhl<7&GoQ}@x!oQ*&pP#!Vu&~A-CIkRy^2|*b;8k2IAWApS{w>Av zAx%eN`3lZ>2O6N8IZP_m(4k**;xh(5{=~JW53~G)678CKDp){hCu~vQ6wJWj({JV1 zouymlWy|yNkzo0PA4>vpiKU;>?-oo-x0k3c}Hy{PG~w(eEqgd1TB zchwgYJ}LN~Y@t1-)l#5~b-K7Zxzts#nifi{Ls*pbyE$!ZXb2{4h?YO{=o)I@Vg<79 zx*?iRMNpynXU{M?S^#+o>gmABAZ-KUR(fv?wpfx%`Ax)a*z75Rwx7=A8V%Oo=jvaZ^D?so&7;Qni7q?}c4xR(-BVh-8W1k3NL>wc6+Qnrt?6^FW!>3J zkQgJ0*&08G*yI-&S+AniH-e_#RC)a__cA^3Uaxu(b~T0gb!h|6J2tXl*X-A@rCUL@ ztJihop(Wl}5(J&F$y3aIRNp0OWvE`wq`SgOGYINe zRY|e?COA!?N=FEXT{&{x7E_%%7@WA^JE{XohnC1Pzf7$VTlLVF9m}!Ls?s5JpiK>; z75kcOdmCs3_u*XS@pj$i7F{1^m51fbo>+*1f;8m@IJXjA<$6 z)59Qs7nz8A8eNoh%rJRbAoH*xt=w4_zRb7Yg%0yQ7pk?<~Xm_{E{Ons$IoukXWC27VXMtIJs z^L@GWMDlXwAYQM759Qs-w?loJKo{{D@cUD4cwnSw)Wfs#t%Af{R>nHJz&`k~>n}gL zZ{DV>BGye9X$`NB+))~UWVYJSydaYrr2A!wk>9t!Rqfz;8B$wf?9KN{&sz2M?0?r+tFQ1zfp?Q~qc(Gef| zVD2d+W@hC0gH#5YoJz8`H)`wd%im+_S4@URy+*_uEe)ar?G!n=<4 z@hSP=X-{<6NP-lue`OAe*K58Pwv((PSM2NO4VGH)9Xk=O=s=B3?qUnRb9XvlgQPzl zy6sq1jjy6#SF6s^PJt36` z>(4uo6wRVMHm{P00(8!{C1P~nOs}&__lY#!V7K15!=)#x2{G;hNKUP$A8<7%zX`#5rv+#c%voOd$Q6o7jz$OF>eDDue?Pf^N zxg-7Ej`3Ef`;aR4ta77wT)KAkIp@E2u4ZdpCP@0{Jae~jQ*SIc+F3`}d>JQSK)(fnf)!oZox7^~1l ze{3db;38GG597a(a$!H@iaSRp|l)j=DIv?G*sDq~XWng&pfR*~%Jf)Y+uKfX!g&OEBN7;j|^+O-} zU6+)M1o!e@D+lyNGq8YlT)!M?f1aR_tVB}quJ#aa0z1S;!(yz1 zqV2wSS9q*;#veHY4boV7Ngll>Ax0+sMj%A$Z3*LuXDGTH&@zUK^W%+^yv#PW=3n@Y;X3vv`5jCJM4OFLbI0{Pb3UaN>D1!Q z4mM<0dlYn<_Pf^Y2zpa>ALg_Ks;Y7R(#~Z6^d*MG?v|U)vCgM+XbxomgoM6Q1+miX zU}d0QLnW`eJi3+`{rat5wQDMe#+uZ?a-1zIsv-Nm6&fWFCF&^GSD5WdqRi&oy5jX8 zT}_}E`#j7x=N0T^+j;7Mch!o>4j<&D&=uR*WO79>Pd?0mD;D9cncfy7@blR1Y86<-s zthr~%O;5;OL1XoIe#j?Y#oI+dT-h-O60MXH;3pp#y+TGx} zpagppmB|!!v*&0E$2r^Pp}g#>BxZ3BMvEPO&)h~5?GpoMGc!WG=$8z?be6EXx2jCL zi;aJ7w7Xuw4g4$$xikQC)0hEWJPwV7njP=FPk30YTK>xmeU48?yBn88Z!Jdy^k%O| zHo8%-+3sz06dRCP@w)vqKQ(;EsygOqf*CVC`XS85F6za=XOvVq2S(O3ha_x4=|I9^ z_FJzlJ{8N?^kgK4stChk2O||TE5a1d@zyXb=aR~4dRPrc{ViZ9J;y_$&QDpeo<3gq zpNh>03*^9uH@9eFtC+SOXtVwfo%Y3!l6D_w+9wXAStmPZSA2RgZ{96C|g`UH1 z(?7+W=dM0n%Bm=98Oku)9UQYF&0kE4?lm(a;8SrFZUQF!JOL-M(Zg=@ z^Z+$s;oCs|Xm<7Yg4)V;H7L;N@lpAxalK@evZ8zGx3StZ@_2QG-W z5$RZK1S_se?F}L~muReIREA4`nH3)&g5r zd;D+&DHm&HEDE8F8R|F8Xcw-gbWh2XE?$Gu96sC&>JenaKMGf*|B!Bb*f^q&jMCIzRj4p_5=MRr zIJ7Vljk|~Yy)uN#a1awUNzx|um&Id9XlY1$0l9&dgqx@`^mLOHi^C^dqC%83t@~xr zqwx$-7{F)kXb}DuCZwK|F|=a_j-(_dL-{0iE1h+q@HE$FtOnXl70(b$yZez`BkbvX zaX*~8gS?4~#vp##`%^MsAG%Gkus~lI$ zo-r{L{aDX?`SgN=z3rRg@SM}-ZKc=UgwMAylrK;_(y_8!yHh^L)_`gh3mxtY6n-|3 zZlk#go_^RfsxBqq+|d{)eMGL=<&hoD$MdxHI@#-qty@hGMp?mDfn*X0Wrkk2jSW^G z^2*W-TP?c3KG_-gaRXRFM86O3m2FN@7?~hlFu;A&Vl#d&8`MUw=lXAXwrf(KW(xPc zy5sqsN3z&w3tt0K3W-d%3(K*o_+Ii44HBzY+6=rK6Fh!T!2S5957Jirfh5MKR`qtY@`_woc_dR#ePbt)#Bhx6r{MWhqi1EedM0LC|I>>ib-@s4lzv^4ZvMYCXBN5`M+y-FCh za=#`hWGJ^aB-ER3zI6(Dz5boMru)_Vmx1Q58HB&89WAzaDEg^E!(^RizOHn?gu%4V z0=2WJ9qA4-E+qEaXAOPQi3}(4^>EAPR5L)PitP$X#OON*-;_t3cs?|@vYdA+Pvfbk z{3@{WOXwWpWaJk6XbbN=Pd~bHTa(zr%NSEAInOQ}XijJ+47-KxOgDmp;tYC?Ng;pC zQf_@zoqpA)C9P*p&pIX6h&u`{X(!jXJ^)7|B#Rs_EBtJ|Ox|c}IK_%naA4ItcRLi?kQC8L#OcHzypEp>$i z{GePUk0lfdP|rt}!k2^6!{NPMPzsEsx!z?yu9sUX0iJnkjRn;}Z6aLS5T##-cp9ue|jZ z`DDFBwnj6c`+p4dWM>yQZCZ?bW7Xtm7r(rq$ls}@I|uK96R^AfKqW`xLOrd?QGFEN z?2PG;vu=vn4I}Ky0i74@Krp~%>H|_2-i+qTtq2nw#e~(ebjg*hqfBi7*rpXG8;Uz3 zP*QmxOO%-ui^nDLy>ZKM^}a6jq$+Uff%wBsOAYa1em(qk|It&3@#^Up%4R#xW%+Kt zn6=|EIhF>H)0!}FZuTP=N5Zd>%@FZ&+=fX3jFUPXv1ze_D|5R)%1o=d2bu+Ov5IkU z`G(4sK}}&*jjW^y*C{P-ipJ8!^E$g@sV`Hq;RXc&?Y>1lM)cG;Kl|~DoGxzV0?%sg zU5)Lx1uwb#9zjxVxT@Y3`P5lB;V%-3+Ss{^Q|NWL9jM5!-wW&=oBM=XO}RTt*${B>t1QpsDadp9|4)qTV3l{|2A83&%rm z)IB+yvmLId7SA~3+U!$=m_M!!B(k9N7%jq4$yqECRa}L+GW3~MxX$oZ4$?gO4O>=! zu<89>)ot-hwjG2YXJeY$9*}$ij>UU+J~eSCy?WJP+SkmicfmaN!`1%pik@GlXQ)uS z`S^cWIZS3AUmn#IAI&%&UzJEc0VblLjyxuK_EGfmXb(dvF<~HG*463(U{s>qLWa52 z@2tbrKKc+*;irY8OY-(Q0^4U(=gCSxL^ZH4-0#DACs%&31w9hAc{XQF=33tKM*V0_ z?+LD}Yad=nOkT*+?QV!ns9XO2*s4^>T$>`}ExfieavJ+i@ps}29rO?OQG6q<2TO{s z#vC6`G!!)bqXh4kGpq}+vkJZH5`$QlL$Aw*#SgxK^E|9SMnyr&DG83WFc}?As z-%xjgMn096g>+Ur)A3y^7H3@!NHenU+}bW#Igz0>eoal5l`-QCe|A?E++aI*C`Vqp zG1&wV-H-8dtarD~v4%Z2i)fU=&Zh~*irUTCu`pLsYAkJUA$cR`;84;j-}YC2+yiuo zBN7ejOvJm|g%paZ;(9!7Ru`#EERh;i&snvk`SHsqDK6>J40~Ykp#(Z=vwvu48(v(* zbdr(=wP$D>(x242oBG(Jw*`D4(872gs4DnZWYp_u9~ej~)}EuVdj2SLV5;HwUA4NB zmp9{tqTy1dO=d%f>88DZ*pna<%|m7*6$7beWpaV)J1-Gu4B4jCL$E6`u}+mR)aH!h`0X_{n3eSTI}z6Plk;`OYMy;v$?#P&DE$3-JU*6> z&2d`pEOG!+$Kx(%zHUOKPw~-t4Lf&o#>~~cV}{w3nur{_eg*+K?AslXs3&UX>RMxy z!_w882cPD`cby26Mm+hnm6c_Tc{jfks9_RWHx*F%@IqYnAaN8@VKtwi`Ek=)oT=iG z)g3iBh1GxWB0)4tkM|+>78sS!55iHHhiVTylI4 z!>D!T=kYE8%PIB*82@KuI%D6?V&K@ESuDoPwe*E)XyEN^qNNq@v8b%OKWF#k0n=M0 z;|qEpR~&BHG3Pdzkh?T8msq0MC7bA+*kG1BYGZ3T#n@YqUE+8y1CUR4X1)!xMp9cH z&8Ho$s`hn?MccK4od|E{*Dv05pU`7-jlK*0)%v-QBIVIFE-dIEX4B?Ir9ygMKqKw2 zi@L19rYzQe+HZiq(vx0s<;!8etP8iWz{~lM(_L)H9&HI=`JXqvvqu&OntSznXz*y) z8lPuoT5nYM0G7a-XV5+y!I#U5H*Naf_)|1-C*k4@bNuLG%W<(3HeS4<2fzTjXgqu$ z3KTpaMjQ2V|Ev)nu`|Yw6ITETiLpHM$}J4vh3D-@V?+S9Af`mF4cZGGNtf1M!7sml z7TPaj;>w50`xx`O`-}9!0`(265ZHBIL=<<{8T|!^CeDQb(+7LCWH~L@s3o`^PZh^< zaN>qTZjtJM09g>zrrkLRmmy%|)gJ9tSJqG$ZOIP66~DY2X%`T1vT8=%xq%+fGF0W} zN{A|SNWN@*zh<(NFXuxkTY#YUjOZmRgukpagjyKN%mPtq7~T}I>)V% zBB<ij05Xezgtt5 znfgj_vc_2YUP}3>njPbukIEkUV0L%Ju@@)UJ{!Aa-=;)fY_X~g;N10Qs;OhI0NTwP zzDBWJ7ooYxboW+AU@O7QFR@Z2W&3_>mkz1PG^+)$UkWe7bo*SiIPFUrCuI4#?=i~{ z?+aUAg!kf5u_8HO00LZjc1NXN50r6L=~>MgsgEQ@Dm5*Pgm5S}7Ua6};Yki^%Hw9t zupSpwv__rR_a^2uX$nrREKMm)KSQ#LRBjdZ?~T(H|J~D$7lllKDc{gMM!BS@=y-j#yyDfVB%I_zprvaoKlY|IbSs-4*?_t`{C}r!(Fs(e` zqyL4w8R6ZRrTkTyx1o}DZ&ae;GAQpZ-I<)L_WF10bSd*ex>%{V=bU_r6lCFyVsl)AUwz!gYmS8YOR-2bhAZ*l zYR*sE5lsl?+%1xXAW~?(P=3G#IB!ToT>i}jicVQ3 zFS#r)?a8~8fWzWWrYSxz1WniV)M1BY8MsWbeuRE!<9BcUS;#u2z-!n$nWd}XwJYoL zf>*wN<@3qgu~5H`B4eShiNv#yxl4#hf%W)jk0y~E<9Vof$Op3Kcm?iWT>e#pyWH6~ z)z~+ibeRv0RSc$YY^JFw;zd01!QA>yg= zb76qf4^?xG_@q}7<8_mh2sPLE0@Fjc(MzHtP;mFXbTFG{o*!OFx4mnELvWIpAgiiU z+n?p2YZQ=NPszxDZu_K)w*wUPLEh3F4B+)*!W{NgT4f#!L?$m~Wv=0$;)=1DMR zk$zG+*G^WC2gDz4{LoHQj5~>*HrRYIowX%dR@y~DmgAFy?&y!(ogSC&tbLb2JmQrG z$Nb@Cd+eNenz`m*m%gQ0$%88{uxrp#*Q+{m3hjDsyX%B;ice^wA_gG8ak(DqoXFA} z1lL7vzai78D~$ccFSZx%!+-QheOOv}1yfj8P3isp%TYhxFmt3H zBg&Sg`6T_1@}UteN9}QXC0mHy$86*vHLDqJ+kf~lCYs;{+OY?rjdOkD-dsw;2QUIv zr?uQGdNMN{#pMtBR*1`1l;zj-xmS6mj?_3+-?R=Wv-YiOiC*CI<;MO(qhm)XESaP* zta$k^Us;-2DXp?{7&#K!W(7p<+P-S(oj5d1NuZ$1)8b4SWVA~dl67MFxNz&`T7G7_ z!%lsLg2bwAnqBPfx>HUn$IJE9Su8|WP$7n=)P_3h`FKJ^XQ3F5Y{}UnRVKwdU_Bo! znwv6uz(MISf=DiGzcw@b@n>md{H4Lzw+P4jsTq|u1tQ}CqGl4IEgzfPOD?Fp{_xL{ zhQtEOM11YCD(-@G&$d8(6EQxYolPUU^z^`w6d0;!yFXoPG4TRUUmC4pw{;Lb%Z?Ov z7w@Hr(ah2wxpwE|ayygD$exatIU(@h;~vZ3)A)HutZFrPVcF^lb}el!O}fr7IG-B& zcvG*QmW0hvRGe+^k}FT@)J~*#PO2j*fVp(K%9huquH?MQ?JLdemlWvZ{z?#&ia}9| zy~K!fII!?tYD^p@&<_-0!@5J|Ma5lMx0(WIrB^LIE-eKN2~V?0a0u*mc6CfaYdBe_ zRwgd}-%m5go?@w%rP^$Ap?Y3l9J4=Cb!h2|-0){v*tE=kSorgj(`Rqwh+#>g*!%V+ zywNuTLKNYhHg`aQuDu%%;|%0wLQ zKy7pUShBPqc#W(+oCG>CS?Q~8wcD(pi48aW5v~)CoYk143;6RTe5ye$Ho(FK)SP~E zrD9H@`{_aGlVZChl5=XJanMJm1Uy5yP>5bY1d??HvS7fgw?ld7$xXGEGC$q{+{5Ic!cY_4S; zP{q;rW8g1RQQ?G<9%PR{ongNi~PeQz{Hu8Fbv=&^Y-pmAJU|tB`@CTQ7Y%#&^P>T5SwwAs^53c0ilwa`Lk&7!S$L?%tDQV_b%>(E==c+38iv8Y^ksKW9ODWMion}8%+ zN2i`^daVkBhRP?7H^EVL_;=)#m}UiGZL=Xw_868`SzA&4*3~l?p{=VMD`=wQ;-5Zt zFDBn=Tk_gJ9=WBz(nY9~`jj9of~Ey|!wafUgevXt7@Iw6#<`V!3HUEItoSde@?PDn z0jOq`p|PLt#nOpb7izs2VqldM52x`*aj)8EQG58!trdvCUTT%H z&Nm9jfIBSP0^f}QFvk$Qn30P390_r1+K_Q9;}#-tnG+k&xB%(Ng!?9O ze?-g%#OO6hZ0uVdwVxLd@eCkB!g{Bh-Lbu zJlFh3!>2YJVN3#-+M8NV*R(H$V5eo#Y03r06-(OIi0qv$s;gV2;F%eP2CL>SyIo(o zrQ;7N(CSoZP~3*fm_IN&ep4YFr=zz+i+_gD<4a%bfV@w}xf@~MfX!X7R8w+vfV_q* zg{Y!kKAdCnIi5+MuoGi)HTyJ)4a>gf$5?pxpCj=xuQ^3K#fr^?V_6_r;^JJX@QH|0 zZkV}|$?Q5aANWqSdCv<0iHxLLlC;93K02BJhm{1tRQ8VA*Ei)4qqmGFEb_g?`7KpM zT}zJ`a+UgYU)U>*2&FQ@*pE8oH~7}dTQ6uz%)ss2Kut9xq}nGmCea9(;tii6YhQbY zROZJ@$OL~GL)J}4@V9+Oxu)Y4aaOMmJu;l+I29wo`zBPVx=6!e#4olkY z*{D(53cv4tQ34ISFJt0DFW`^NG_0*l)1q+(JW)!^u1CnJJ5g&Fm zpG(KqYr4Bd^61BhXdmGvK#Ie-N|ux3YtFoQu@gH{F@y1iBc|e$G>5XxK;C-0^T?7d z%-vR_H7Zm8H5bl4_o zry+1qQdAZU5@k_X^JBeF$Ge$xx*y{HtWBV?zx;#g0{yyZ<`f)aJh`YRt$62vqw%&_ zA@JlV%x1rHNsu4aO5vcc7My0VdF!U%M&0Z}OhoG0xRBg*Y~1x zYCVjPft1mWwCPW4;R7o!wW+-J{KOlX) zKxLAmdS_$8;L&H6hP*q3;_|S=_vcf*(3#*K}?CDAh=-$XJj=`LLC|=1|#Sl-Q&cvKKK|rHUCY% zln@uxKAA9BCJeI(UpCmNrCd?0SM(#F2_JD&?TDr?&jA7Et}YbSzYR~H(S`ssbRojg z{8d8u$i+NcT?Q+!e61)~AL=b?CZ4M3yNJM*4aU*kfKwz;pvpbrb>(`+9rWiDZore? zh4G22yv*yQ5+DInL4UN?5U_Be?;#r9Di_85Y%x~A3X}tnh4#^ zM-xtoxA7&>^GIjjlFm+vjGNU?sHR@5RmpR$_m;LGJWpNpNBxW-WXEHlhWl>a&@<2aLo{u?_G^8Y`5|KDg(of0Kz;b5V;3vSgSjYF4L*2NE)KKbIgVZ6LI z_5J%YRZGVYu@uI`SH^z(<9}EC&)3amgWpX~Lew3K{|yjDYz$|A`JSjgh3%dkBHH;6 z)jzN*`G<#xJ$iR9adI&GKk}ye06S9*|8;|YUv`hc9!eWg{5NXw&--_OgI*Ek|GKB1 zrZx0uh&a5SKMV5m2CCts(a$lLFL}wEWXE++fyqh;n$W()K}1j0lnHCxoX;)$6O}26aBKiSzmF)zEKS$lL@gVIC7FWGbQD$YY46%jfSklp9>}sSW3+uvY z#K1g}v^!*09mBI{Kgco?CC}wNnk^S}gudI{VMngySncCL+KZZ&y>N`J33gKAGTqa3 zk)9LYTbLPN@2ofo*sFdn&-a=MTjam_bDO${bVRkb_bVPD!fdXTd5Dm^;b^lfPr$*C z1To&QSnAQFx@4<{*>erxLLw5pD`K2}{NgQUr$=fs7?N`1iRolcVc=_{hpliLIJ0VF136~1cY7$c zBA}u9x_0m^p*d<0A$xkiW`6ypQ78jk>v7p4#+(0pr8`}v->tnwB+ejG?&Rk&!IQ0e z!Y2kYxnG`5$uwR^P}3iK3?HqR&pBt_sXnoAG<5WEO5EW*ZEZoF-TAz!M7=`7V4u&I zC0?<^k-Wfi`&;}o{O!H`NAGywE*uUTFY6#g-m8lR5oA8V6E|+w+*;^HMR3(Ibd(&; z!!-=f?an^!3-RysY-RO`4eX}$?kiGhxOz#s?;AqrcaKv(sfrTW5Bu6(vCt{K>`gnE zCB`8-8f1&mic?`;|)P zHtglRqNXXY@awLr&&I+tPvWZ%z))y~zpuu+@}S5c?=3%$G+w(5mw-`~qchwo?=37; zQ^{n%A}os)>?MIciH1Von5_rTCs71MiDCUq#h_6wd6i2;`)%UA);%H*(Wcu2Pni7@ zT#@6o>S_H$0PAG=5G(6KmB`(z9&@j(_HAALAET6qBJ_&p_-@oYCH&_7KdT2XZVg!~ z5%G7yAOFH!Qb=O{eep^Kro=D5`wrs0M+(v2qR0K%><`+9X$Orz*1Ey`3H@Kf?xJ0s zzw}g0G8i0RtKK#Hj3S?Zig@-s&O)J42tf&qk*XA7GszgLdaXJQYi-hI04`j4dk5z2Lf3{o=nO@b(|JC$(cOta!9d^K;cF zwb8?Vfu#%)aM0&A{9Ruwu}3jg#+h2w1jsX+YDJ-Oba9{R9W@#^N5GPK2l3zh z0v#iEt@9be&HNHGeu*vn1%-X9MHRL#H%EzOsAW1D2&Az1&#Wg#yAtQ{mJT^hBZ@K; zf0h=tQDXHv7n(NB?uGx2MW_wQOCgvtiB+BNZb{rHb6h{VQT2nC_$3y}#TYhhIc+C2yzP?=a~-O7&WBn}&unZULKPL$ z{_ZWk$=NGEZc%F3K-|_t$BKdUse9W#4_iHq_l5{B3UQ@pueHL~QH5|4VRlFLF@{ zi*n;hx#TYFKGM7zS`& zDlLmp8O+u6J=KdG|0v!Cp4s1Q8?t@6Zr2O)kudh(V7z_&Ps^L(pggMq@4B&Coo&bN&`5IZxVC$pdWP z)F(-*44kzb?%@VywiP{CtXg;QTw$Jx&bL@TY|r)C7Pr!FwO3#JY$<=9lN=Kn-txFf zL*3>FPf@P)>LY|1TjL+4Zn4b{JRAIK7t=wS7c#4&FtwHcdbB2{D!Vkw2WDeiwobl% z_!G4)+dEwRKvVxW%@2z*#f)d$^y>!>P$RiF+ECm|Seg zaa_FThLg5{$x`sE=n0O?$oqUb{TSsekcUmHqF=N-QdC zv(bOPYhe}+`+D*UZ1pLAE5(wc$SIcg32xB*NF-tHlbzM<7A9Z|yX|2xa1i~Y)9LbE znPS?#8Uu@G!WgteCAk3tH(Jx_?R4t`k2EBgQ|Kia zQ&r}k+|_&eU3Nv~8LU)A;3uWYgu{`JqHdanicYo>vPM{e*j~4-e_#PR?H=*n+&^-V z${}K^SDnk=f-)0T7l&r&wWS7*8MwSdJf27}NNG-v^I!i&SU#9~ce_XhtDkId?`#j6 zsWkNwi9iN2K8an<9dp7Df*N%q5cuS4gC_gchNigSD(hH$pld=756{m{^%_BZdSmK; z!rlLF->cRBn*d PnTo;F)z4*}Q$iB}_Z^$> literal 0 HcmV?d00001 From 29c0f5b8693bc8ca6e534e054cc91102f2bcf8f9 Mon Sep 17 00:00:00 2001 From: Divye Gala Date: Fri, 8 Nov 2024 00:59:05 -0500 Subject: [PATCH 04/15] Migrate feature diff for NN Descent from RAFT to cuVS (#421) This PR is an amalgamation of the diff of 3 PRs in RAFT: 1. https://github.com/rapidsai/raft/pull/2345 2. https://github.com/rapidsai/raft/pull/2380 3. https://github.com/rapidsai/raft/pull/2403 This PR also addresses part 1 and 2 of #419, closes https://github.com/rapidsai/cuvs/issues/391 and makes CAGRA use the compiled headers of NN Descent, which seemed to have been a pending TODO https://github.com/rapidsai/cuvs/blob/009bb8de03ce9708d4d797166187250f77a59a36/cpp/src/neighbors/detail/cagra/cagra_build.cuh#L36-L37 Also, batch tests are disabled in this PR due to issue https://github.com/rapidsai/raft/issues/2450. PR https://github.com/rapidsai/cuvs/pull/424 will attempt to re-enable them. Authors: - Divye Gala (https://github.com/divyegala) - Corey J. Nolet (https://github.com/cjnolet) Approvers: - Corey J. Nolet (https://github.com/cjnolet) URL: https://github.com/rapidsai/cuvs/pull/421 --- cpp/include/cuvs/neighbors/nn_descent.hpp | 92 ++- .../neighbors/detail/cagra/cagra_build.cuh | 8 +- cpp/src/neighbors/detail/nn_descent.cuh | 294 ++++--- cpp/src/neighbors/detail/nn_descent_batch.cuh | 736 ++++++++++++++++++ cpp/src/neighbors/nn_descent.cuh | 45 +- cpp/src/neighbors/nn_descent_float.cu | 47 +- cpp/src/neighbors/nn_descent_half.cu | 48 +- cpp/src/neighbors/nn_descent_int8.cu | 48 +- cpp/src/neighbors/nn_descent_uint8.cu | 48 +- cpp/test/neighbors/ann_nn_descent.cuh | 191 ++++- .../ann_nn_descent/test_float_uint32_t.cu | 6 + cpp/test/neighbors/ann_utils.cuh | 20 +- python/cuvs/cuvs/test/test_hnsw.py | 2 +- 13 files changed, 1361 insertions(+), 224 deletions(-) create mode 100644 cpp/src/neighbors/detail/nn_descent_batch.cuh diff --git a/cpp/include/cuvs/neighbors/nn_descent.hpp b/cpp/include/cuvs/neighbors/nn_descent.hpp index 347ccf889..bd41d1ff7 100644 --- a/cpp/include/cuvs/neighbors/nn_descent.hpp +++ b/cpp/include/cuvs/neighbors/nn_descent.hpp @@ -55,6 +55,8 @@ struct index_params : cuvs::neighbors::index_params { size_t intermediate_graph_degree = 128; // Degree of input graph for pruning. size_t max_iterations = 20; // Number of nn-descent iterations. float termination_threshold = 0.0001; // Termination threshold of nn-descent. + bool return_distances = true; // return distances if true + size_t n_clusters = 1; // defaults to not using any batching /** @brief Construct NN descent parameters for a specific kNN graph degree * @@ -100,14 +102,20 @@ struct index : cuvs::neighbors::index { * @param res raft::resources is an object mangaging resources * @param n_rows number of rows in knn-graph * @param n_cols number of cols in knn-graph + * @param return_distances whether to return distances */ - index(raft::resources const& res, int64_t n_rows, int64_t n_cols) + index(raft::resources const& res, int64_t n_rows, int64_t n_cols, bool return_distances = false) : cuvs::neighbors::index(), res_{res}, metric_{cuvs::distance::DistanceType::L2Expanded}, graph_{raft::make_host_matrix(n_rows, n_cols)}, - graph_view_{graph_.view()} + graph_view_{graph_.view()}, + return_distances_{return_distances} { + if (return_distances) { + distances_ = raft::make_device_matrix(res_, n_rows, n_cols); + distances_view_ = distances_.value().view(); + } } /** @@ -119,14 +127,20 @@ struct index : cuvs::neighbors::index { * * @param res raft::resources is an object mangaging resources * @param graph_view raft::host_matrix_view for storing knn-graph + * @param distances_view optional raft::device_matrix_view for storing + * distances */ index(raft::resources const& res, - raft::host_matrix_view graph_view) + raft::host_matrix_view graph_view, + std::optional> distances_view = + std::nullopt) : cuvs::neighbors::index(), res_{res}, metric_{cuvs::distance::DistanceType::L2Expanded}, graph_{raft::make_host_matrix(0, 0)}, - graph_view_{graph_view} + graph_view_{graph_view}, + distances_view_{distances_view}, + return_distances_{distances_view.has_value()} { } @@ -155,6 +169,13 @@ struct index : cuvs::neighbors::index { return graph_view_; } + /** neighborhood graph distances [size, graph-degree] */ + [[nodiscard]] inline auto distances() noexcept + -> std::optional> + { + return distances_view_; + } + // Don't allow copying the index for performance reasons (try avoiding copying data) index(const index&) = delete; index(index&&) = default; @@ -166,8 +187,11 @@ struct index : cuvs::neighbors::index { raft::resources const& res_; cuvs::distance::DistanceType metric_; raft::host_matrix graph_; // graph to return for non-int IdxT + std::optional> distances_; raft::host_matrix_view graph_view_; // view of graph for user provided matrix + std::optional> distances_view_; + bool return_distances_; }; /** @} */ @@ -200,12 +224,15 @@ struct index : cuvs::neighbors::index { * to run the nn-descent algorithm * @param[in] dataset raft::device_matrix_view input dataset expected to be located * in device memory + * @param[in] graph optional raft::host_matrix_view for owning + * the output graph * @return index index containing all-neighbors knn graph in host memory */ auto build(raft::resources const& res, index_params const& params, - raft::device_matrix_view dataset) - -> cuvs::neighbors::nn_descent::index; + raft::device_matrix_view dataset, + std::optional> graph = + std::nullopt) -> cuvs::neighbors::nn_descent::index; /** * @brief Build nn-descent Index with dataset in host memory @@ -232,12 +259,15 @@ auto build(raft::resources const& res, * to run the nn-descent algorithm * @param[in] dataset raft::host_matrix_view input dataset expected to be located * in host memory + * @param[in] graph optional raft::host_matrix_view for owning + * the output graph * @return index index containing all-neighbors knn graph in host memory */ auto build(raft::resources const& res, index_params const& params, - raft::host_matrix_view dataset) - -> cuvs::neighbors::nn_descent::index; + raft::host_matrix_view dataset, + std::optional> graph = + std::nullopt) -> cuvs::neighbors::nn_descent::index; /** * @brief Build nn-descent Index with dataset in device memory @@ -262,12 +292,15 @@ auto build(raft::resources const& res, * to run the nn-descent algorithm * @param[in] dataset raft::device_matrix_view input dataset expected to be located * in device memory + * @param[in] graph optional raft::host_matrix_view for owning + * the output graph * @return index index containing all-neighbors knn graph in host memory */ auto build(raft::resources const& res, index_params const& params, - raft::device_matrix_view dataset) - -> cuvs::neighbors::nn_descent::index; + raft::device_matrix_view dataset, + std::optional> graph = + std::nullopt) -> cuvs::neighbors::nn_descent::index; /** * @brief Build nn-descent Index with dataset in host memory @@ -294,12 +327,15 @@ auto build(raft::resources const& res, * to run the nn-descent algorithm * @param[in] dataset raft::host_matrix_view input dataset expected to be located * in host memory + * @param[in] graph optional raft::host_matrix_view for owning + * the output graph * @return index index containing all-neighbors knn graph in host memory */ auto build(raft::resources const& res, index_params const& params, - raft::host_matrix_view dataset) - -> cuvs::neighbors::nn_descent::index; + raft::host_matrix_view dataset, + std::optional> graph = + std::nullopt) -> cuvs::neighbors::nn_descent::index; /** * @brief Build nn-descent Index with dataset in device memory @@ -324,12 +360,15 @@ auto build(raft::resources const& res, * to run the nn-descent algorithm * @param[in] dataset raft::device_matrix_view input dataset expected to be located * in device memory + * @param[in] graph optional raft::host_matrix_view for owning + * the output graph * @return index index containing all-neighbors knn graph in host memory */ auto build(raft::resources const& res, index_params const& params, - raft::device_matrix_view dataset) - -> cuvs::neighbors::nn_descent::index; + raft::device_matrix_view dataset, + std::optional> graph = + std::nullopt) -> cuvs::neighbors::nn_descent::index; /** * @brief Build nn-descent Index with dataset in host memory @@ -356,12 +395,15 @@ auto build(raft::resources const& res, * to run the nn-descent algorithm * @param[in] dataset raft::host_matrix_view input dataset expected to be located * in host memory + * @param[in] graph optional raft::host_matrix_view for owning + * the output graph * @return index index containing all-neighbors knn graph in host memory */ auto build(raft::resources const& res, index_params const& params, - raft::host_matrix_view dataset) - -> cuvs::neighbors::nn_descent::index; + raft::host_matrix_view dataset, + std::optional> graph = + std::nullopt) -> cuvs::neighbors::nn_descent::index; /** * @brief Build nn-descent Index with dataset in device memory @@ -386,14 +428,15 @@ auto build(raft::resources const& res, * to run the nn-descent algorithm * @param[in] dataset raft::device_matrix_view input dataset expected to be located * in device memory + * @param[in] graph optional raft::host_matrix_view for owning + * the output graph * @return index index containing all-neighbors knn graph in host memory */ auto build(raft::resources const& res, index_params const& params, - raft::device_matrix_view dataset) - -> cuvs::neighbors::nn_descent::index; - -/** @} */ + raft::device_matrix_view dataset, + std::optional> graph = + std::nullopt) -> cuvs::neighbors::nn_descent::index; /** * @brief Build nn-descent Index with dataset in host memory @@ -420,12 +463,17 @@ auto build(raft::resources const& res, * to run the nn-descent algorithm * @param[in] dataset raft::host_matrix_view input dataset expected to be located * in host memory + * @param[in] graph optional raft::host_matrix_view for owning + * the output graph * @return index index containing all-neighbors knn graph in host memory */ auto build(raft::resources const& res, index_params const& params, - raft::host_matrix_view dataset) - -> cuvs::neighbors::nn_descent::index; + raft::host_matrix_view dataset, + std::optional> graph = + std::nullopt) -> cuvs::neighbors::nn_descent::index; + +/** @} */ /** * @brief Test if we have enough GPU memory to run NN descent algorithm. diff --git a/cpp/src/neighbors/detail/cagra/cagra_build.cuh b/cpp/src/neighbors/detail/cagra/cagra_build.cuh index 9e4d453e3..6209ff819 100644 --- a/cpp/src/neighbors/detail/cagra/cagra_build.cuh +++ b/cpp/src/neighbors/detail/cagra/cagra_build.cuh @@ -33,8 +33,7 @@ #include #include -// TODO: Fixme- this needs to be migrated -#include "../../nn_descent.cuh" +#include // TODO: This shouldn't be calling spatial/knn APIs #include "../ann_utils.cuh" @@ -356,8 +355,8 @@ void build_knn_graph( raft::host_matrix_view knn_graph, cuvs::neighbors::nn_descent::index_params build_params) { - auto nn_descent_idx = cuvs::neighbors::nn_descent::index(res, knn_graph); - cuvs::neighbors::nn_descent::build(res, build_params, dataset, nn_descent_idx); + std::optional> graph_view = knn_graph; + auto nn_descent_idx = cuvs::neighbors::nn_descent::build(res, build_params, dataset, graph_view); using internal_IdxT = typename std::make_unsigned::type; using g_accessor = typename decltype(nn_descent_idx.graph())::accessor_type; @@ -471,6 +470,7 @@ index build( } // Use nn-descent to build CAGRA knn graph + nn_descent_params.return_distances = false; build_knn_graph(res, dataset, knn_graph->view(), nn_descent_params); } diff --git a/cpp/src/neighbors/detail/nn_descent.cuh b/cpp/src/neighbors/detail/nn_descent.cuh index 8c5767c50..883d82d76 100644 --- a/cpp/src/neighbors/detail/nn_descent.cuh +++ b/cpp/src/neighbors/detail/nn_descent.cuh @@ -16,42 +16,41 @@ #pragma once -#include - #include "ann_utils.cuh" #include "cagra/device_common.hpp" + +#include + #include +#include #include #include +#include +#include +#include #include #include - +#include +#include #include // raft::util::arch::SM_* #include #include #include #include -#include +#include + #include -#include -#include -#include -#include -#include #include #include #include +#include #include #include namespace cuvs::neighbors::nn_descent::detail { -static const std::string RAFT_NAME = "raft"; -using pinned_memory_resource = thrust::universal_host_pinned_memory_resource; -template -using pinned_memory_allocator = thrust::mr::stateless_resource_allocator; using DistData_t = float; constexpr int DEGREE_ON_DEVICE{32}; @@ -216,6 +215,7 @@ struct BuildConfig { // If internal_node_degree == 0, the value of node_degree will be assigned to it size_t max_iterations{50}; float termination_threshold{0.0001}; + size_t output_graph_degree{32}; }; template @@ -300,6 +300,7 @@ class BloomFilter { template struct GnndGraph { + raft::resources const& res; static constexpr int segment_size = 32; InternalID_t* h_graph; @@ -310,16 +311,17 @@ struct GnndGraph { raft::host_matrix h_dists; - thrust::host_vector> h_graph_new; - thrust::host_vector> h_list_sizes_new; + raft::pinned_matrix h_graph_new; + raft::pinned_vector h_list_sizes_new; - thrust::host_vector> h_graph_old; - thrust::host_vector> h_list_sizes_old; + raft::pinned_matrix h_graph_old; + raft::pinned_vector h_list_sizes_old; BloomFilter bloom_filter; GnndGraph(const GnndGraph&) = delete; GnndGraph& operator=(const GnndGraph&) = delete; - GnndGraph(const size_t nrow, + GnndGraph(raft::resources const& res, + const size_t nrow, const size_t node_degree, const size_t internal_node_degree, const size_t num_samples); @@ -344,9 +346,14 @@ class GNND { GNND(const GNND&) = delete; GNND& operator=(const GNND&) = delete; - void build(Data_t* data, const Index_t nrow, Index_t* output_graph); + void build(Data_t* data, + const Index_t nrow, + Index_t* output_graph, + bool return_distances, + DistData_t* output_distances); ~GNND() = default; using ID_t = InternalID_t; + void reset(raft::resources const& res); private: void add_reverse_edges(Index_t* graph_ptr, @@ -371,15 +378,14 @@ class GNND { raft::device_matrix graph_buffer_; raft::device_matrix dists_buffer_; - // TODO: Investigate using RMM/RAFT types https://github.com/rapidsai/raft/issues/1827 - thrust::host_vector> graph_host_buffer_; - thrust::host_vector> dists_host_buffer_; + raft::pinned_matrix graph_host_buffer_; + raft::pinned_matrix dists_host_buffer_; raft::device_vector d_locks_; - thrust::host_vector> h_rev_graph_new_; - thrust::host_vector> h_graph_old_; - thrust::host_vector> h_rev_graph_old_; + raft::pinned_matrix h_rev_graph_new_; + raft::pinned_matrix h_graph_old_; + raft::pinned_matrix h_rev_graph_old_; // int2.x is the number of forward edges, int2.y is the number of reverse edges raft::device_vector d_list_sizes_new_; @@ -971,19 +977,21 @@ int insert_to_ordered_list(InternalID_t* list, } // namespace template -GnndGraph::GnndGraph(const size_t nrow, +GnndGraph::GnndGraph(raft::resources const& res, + const size_t nrow, const size_t node_degree, const size_t internal_node_degree, const size_t num_samples) - : nrow(nrow), + : res(res), + nrow(nrow), node_degree(node_degree), num_samples(num_samples), bloom_filter(nrow, internal_node_degree / segment_size, 3), h_dists{raft::make_host_matrix(nrow, node_degree)}, - h_graph_new(nrow * num_samples), - h_list_sizes_new(nrow), - h_graph_old(nrow * num_samples), - h_list_sizes_old{nrow} + h_graph_new{raft::make_pinned_matrix(res, nrow, num_samples)}, + h_list_sizes_new{raft::make_pinned_vector(res, nrow)}, + h_graph_old{raft::make_pinned_matrix(res, nrow, num_samples)}, + h_list_sizes_old{raft::make_pinned_vector(res, nrow)} { // node_degree must be a multiple of segment_size; assert(node_degree % segment_size == 0); @@ -1001,9 +1009,9 @@ void GnndGraph::sample_graph_new(InternalID_t* new_neighbors, { #pragma omp parallel for for (size_t i = 0; i < nrow; i++) { - auto list_new = h_graph_new.data() + i * num_samples; - h_list_sizes_new[i].x = 0; - h_list_sizes_new[i].y = 0; + auto list_new = h_graph_new.data_handle() + i * num_samples; + h_list_sizes_new.data_handle()[i].x = 0; + h_list_sizes_new.data_handle()[i].y = 0; for (size_t j = 0; j < width; j++) { auto new_neighb_id = new_neighbors[i * width + j].id(); @@ -1011,8 +1019,8 @@ void GnndGraph::sample_graph_new(InternalID_t* new_neighbors, if (bloom_filter.check(i, new_neighb_id)) { continue; } bloom_filter.add(i, new_neighb_id); new_neighbors[i * width + j].mark_old(); - list_new[h_list_sizes_new[i].x++] = new_neighb_id; - if (h_list_sizes_new[i].x == num_samples) break; + list_new[h_list_sizes_new.data_handle()[i].x++] = new_neighb_id; + if (h_list_sizes_new.data_handle()[i].x == num_samples) break; } } } @@ -1051,31 +1059,37 @@ void GnndGraph::sample_graph(bool sample_new) { #pragma omp parallel for for (size_t i = 0; i < nrow; i++) { - h_list_sizes_old[i].x = 0; - h_list_sizes_old[i].y = 0; - h_list_sizes_new[i].x = 0; - h_list_sizes_new[i].y = 0; + h_list_sizes_old.data_handle()[i].x = 0; + h_list_sizes_old.data_handle()[i].y = 0; + h_list_sizes_new.data_handle()[i].x = 0; + h_list_sizes_new.data_handle()[i].y = 0; auto list = h_graph + i * node_degree; - auto list_old = h_graph_old.data() + i * num_samples; - auto list_new = h_graph_new.data() + i * num_samples; + auto list_old = h_graph_old.data_handle() + i * num_samples; + auto list_new = h_graph_new.data_handle() + i * num_samples; for (int j = 0; j < segment_size; j++) { for (int k = 0; k < num_segments; k++) { auto neighbor = list[k * segment_size + j]; if ((size_t)neighbor.id() >= nrow) continue; if (!neighbor.is_new()) { - if (h_list_sizes_old[i].x < num_samples) { - list_old[h_list_sizes_old[i].x++] = neighbor.id(); + if (h_list_sizes_old.data_handle()[i].x < num_samples) { + list_old[h_list_sizes_old.data_handle()[i].x++] = neighbor.id(); } } else if (sample_new) { - if (h_list_sizes_new[i].x < num_samples) { + if (h_list_sizes_new.data_handle()[i].x < num_samples) { list[k * segment_size + j].mark_old(); - list_new[h_list_sizes_new[i].x++] = neighbor.id(); + list_new[h_list_sizes_new.data_handle()[i].x++] = neighbor.id(); } } - if (h_list_sizes_old[i].x == num_samples && h_list_sizes_new[i].x == num_samples) { break; } + if (h_list_sizes_old.data_handle()[i].x == num_samples && + h_list_sizes_new.data_handle()[i].x == num_samples) { + break; + } + } + if (h_list_sizes_old.data_handle()[i].x == num_samples && + h_list_sizes_new.data_handle()[i].x == num_samples) { + break; } - if (h_list_sizes_old[i].x == num_samples && h_list_sizes_new[i].x == num_samples) { break; } } } } @@ -1137,7 +1151,8 @@ template GNND::GNND(raft::resources const& res, const BuildConfig& build_config) : res(res), build_config_(build_config), - graph_(build_config.max_dataset_size, + graph_(res, + build_config.max_dataset_size, align32::roundUp(build_config.node_degree), align32::roundUp(build_config.internal_node_degree ? build_config.internal_node_degree : build_config.node_degree), @@ -1151,28 +1166,38 @@ GNND::GNND(raft::resources const& res, const BuildConfig& build raft::make_device_matrix(res, nrow_, DEGREE_ON_DEVICE)}, dists_buffer_{ raft::make_device_matrix(res, nrow_, DEGREE_ON_DEVICE)}, - graph_host_buffer_(nrow_ * DEGREE_ON_DEVICE), - dists_host_buffer_(nrow_ * DEGREE_ON_DEVICE), + graph_host_buffer_{ + raft::make_pinned_matrix(res, nrow_, DEGREE_ON_DEVICE)}, + dists_host_buffer_{ + raft::make_pinned_matrix(res, nrow_, DEGREE_ON_DEVICE)}, d_locks_{raft::make_device_vector(res, nrow_)}, - h_rev_graph_new_(nrow_ * NUM_SAMPLES), - h_graph_old_(nrow_ * NUM_SAMPLES), - h_rev_graph_old_(nrow_ * NUM_SAMPLES), + h_rev_graph_new_{ + raft::make_pinned_matrix(res, nrow_, NUM_SAMPLES)}, + h_graph_old_( + raft::make_pinned_matrix(res, nrow_, NUM_SAMPLES)), + h_rev_graph_old_{ + raft::make_pinned_matrix(res, nrow_, NUM_SAMPLES)}, d_list_sizes_new_{raft::make_device_vector(res, nrow_)}, d_list_sizes_old_{raft::make_device_vector(res, nrow_)} { static_assert(NUM_SAMPLES <= 32); - - thrust::fill(thrust::device, - dists_buffer_.data_handle(), - dists_buffer_.data_handle() + dists_buffer_.size(), - std::numeric_limits::max()); - thrust::fill(thrust::device, - reinterpret_cast(graph_buffer_.data_handle()), - reinterpret_cast(graph_buffer_.data_handle()) + graph_buffer_.size(), - std::numeric_limits::max()); - thrust::fill(thrust::device, d_locks_.data_handle(), d_locks_.data_handle() + d_locks_.size(), 0); + raft::matrix::fill(res, dists_buffer_.view(), std::numeric_limits::max()); + auto graph_buffer_view = raft::make_device_matrix_view( + reinterpret_cast(graph_buffer_.data_handle()), nrow_, DEGREE_ON_DEVICE); + raft::matrix::fill(res, graph_buffer_view, std::numeric_limits::max()); + raft::matrix::fill(res, d_locks_.view(), 0); }; +template +void GNND::reset(raft::resources const& res) +{ + raft::matrix::fill(res, dists_buffer_.view(), std::numeric_limits::max()); + auto graph_buffer_view = raft::make_device_matrix_view( + reinterpret_cast(graph_buffer_.data_handle()), nrow_, DEGREE_ON_DEVICE); + raft::matrix::fill(res, graph_buffer_view, std::numeric_limits::max()); + raft::matrix::fill(res, d_locks_.view(), 0); +} + template void GNND::add_reverse_edges(Index_t* graph_ptr, Index_t* h_rev_graph_ptr, @@ -1189,34 +1214,35 @@ void GNND::add_reverse_edges(Index_t* graph_ptr, template void GNND::local_join(cudaStream_t stream) { - thrust::fill(thrust::device.on(stream), - dists_buffer_.data_handle(), - dists_buffer_.data_handle() + dists_buffer_.size(), - std::numeric_limits::max()); - local_join_kernel<<>>( - thrust::raw_pointer_cast(graph_.h_graph_new.data()), - thrust::raw_pointer_cast(h_rev_graph_new_.data()), - d_list_sizes_new_.data_handle(), - thrust::raw_pointer_cast(h_graph_old_.data()), - thrust::raw_pointer_cast(h_rev_graph_old_.data()), - d_list_sizes_old_.data_handle(), - NUM_SAMPLES, - d_data_.data_handle(), - ndim_, - graph_buffer_.data_handle(), - dists_buffer_.data_handle(), - DEGREE_ON_DEVICE, - d_locks_.data_handle(), - l2_norms_.data_handle()); + raft::matrix::fill(res, dists_buffer_.view(), std::numeric_limits::max()); + local_join_kernel<<>>(graph_.h_graph_new.data_handle(), + h_rev_graph_new_.data_handle(), + d_list_sizes_new_.data_handle(), + h_graph_old_.data_handle(), + h_rev_graph_old_.data_handle(), + d_list_sizes_old_.data_handle(), + NUM_SAMPLES, + d_data_.data_handle(), + ndim_, + graph_buffer_.data_handle(), + dists_buffer_.data_handle(), + DEGREE_ON_DEVICE, + d_locks_.data_handle(), + l2_norms_.data_handle()); } template -void GNND::build(Data_t* data, const Index_t nrow, Index_t* output_graph) +void GNND::build(Data_t* data, + const Index_t nrow, + Index_t* output_graph, + bool return_distances, + DistData_t* output_distances) { using input_t = typename std::remove_const::type; cudaStream_t stream = raft::resource::get_cuda_stream(res); nrow_ = nrow; + graph_.nrow = nrow; graph_.h_graph = (InternalID_t*)output_graph; cudaPointerAttributes data_ptr_attr; @@ -1226,24 +1252,18 @@ void GNND::build(Data_t* data, const Index_t nrow, Index_t* out cuvs::spatial::knn::detail::utils::batch_load_iterator vec_batches{ data, static_cast(nrow_), build_config_.dataset_dim, batch_size, stream}; for (auto const& batch : vec_batches) { - preprocess_data_kernel<<(raft::warp_size())) * - raft::warp_size(), - stream>>>(batch.data(), - d_data_.data_handle(), - build_config_.dataset_dim, - l2_norms_.data_handle(), - batch.offset()); + preprocess_data_kernel<<< + batch.size(), + raft::warp_size(), + sizeof(Data_t) * ceildiv(build_config_.dataset_dim, static_cast(raft::warp_size())) * + raft::warp_size(), + stream>>>(batch.data(), + d_data_.data_handle(), + build_config_.dataset_dim, + l2_norms_.data_handle(), + batch.offset()); } - thrust::fill(thrust::device.on(stream), - (Index_t*)graph_buffer_.data_handle(), - (Index_t*)graph_buffer_.data_handle() + graph_buffer_.size(), - std::numeric_limits::max()); - graph_.clear(); graph_.init_random_graph(); graph_.sample_graph(true); @@ -1251,8 +1271,8 @@ void GNND::build(Data_t* data, const Index_t nrow, Index_t* out auto update_and_sample = [&](bool update_graph) { if (update_graph) { update_counter_ = 0; - graph_.update_graph(thrust::raw_pointer_cast(graph_host_buffer_.data()), - thrust::raw_pointer_cast(dists_host_buffer_.data()), + graph_.update_graph(graph_host_buffer_.data_handle(), + dists_host_buffer_.data_handle(), DEGREE_ON_DEVICE, update_counter_); if (update_counter_ < build_config_.termination_threshold * nrow_ * @@ -1265,15 +1285,15 @@ void GNND::build(Data_t* data, const Index_t nrow, Index_t* out for (size_t it = 0; it < build_config_.max_iterations; it++) { raft::copy(d_list_sizes_new_.data_handle(), - thrust::raw_pointer_cast(graph_.h_list_sizes_new.data()), + graph_.h_list_sizes_new.data_handle(), nrow_, raft::resource::get_cuda_stream(res)); - raft::copy(thrust::raw_pointer_cast(h_graph_old_.data()), - thrust::raw_pointer_cast(graph_.h_graph_old.data()), + raft::copy(h_graph_old_.data_handle(), + graph_.h_graph_old.data_handle(), nrow_ * NUM_SAMPLES, raft::resource::get_cuda_stream(res)); raft::copy(d_list_sizes_old_.data_handle(), - thrust::raw_pointer_cast(graph_.h_list_sizes_old.data()), + graph_.h_list_sizes_old.data_handle(), nrow_, raft::resource::get_cuda_stream(res)); raft::resource::sync_stream(res); @@ -1286,13 +1306,13 @@ void GNND::build(Data_t* data, const Index_t nrow, Index_t* out // contains some information for local_join. static_assert(DEGREE_ON_DEVICE * sizeof(*(dists_buffer_.data_handle())) >= NUM_SAMPLES * sizeof(*(graph_buffer_.data_handle()))); - add_reverse_edges(thrust::raw_pointer_cast(graph_.h_graph_new.data()), - thrust::raw_pointer_cast(h_rev_graph_new_.data()), + add_reverse_edges(graph_.h_graph_new.data_handle(), + h_rev_graph_new_.data_handle(), (Index_t*)dists_buffer_.data_handle(), d_list_sizes_new_.data_handle(), stream); - add_reverse_edges(thrust::raw_pointer_cast(h_graph_old_.data()), - thrust::raw_pointer_cast(h_rev_graph_old_.data()), + add_reverse_edges(h_graph_old_.data_handle(), + h_rev_graph_old_.data_handle(), (Index_t*)dists_buffer_.data_handle(), d_list_sizes_old_.data_handle(), stream); @@ -1316,21 +1336,21 @@ void GNND::build(Data_t* data, const Index_t nrow, Index_t* out update_and_sample_thread.join(); if (update_counter_ == -1) { break; } - raft::copy(thrust::raw_pointer_cast(graph_host_buffer_.data()), + raft::copy(graph_host_buffer_.data_handle(), graph_buffer_.data_handle(), nrow_ * DEGREE_ON_DEVICE, raft::resource::get_cuda_stream(res)); raft::resource::sync_stream(res); - raft::copy(thrust::raw_pointer_cast(dists_host_buffer_.data()), + raft::copy(dists_host_buffer_.data_handle(), dists_buffer_.data_handle(), nrow_ * DEGREE_ON_DEVICE, raft::resource::get_cuda_stream(res)); - graph_.sample_graph_new(thrust::raw_pointer_cast(graph_host_buffer_.data()), DEGREE_ON_DEVICE); + graph_.sample_graph_new(graph_host_buffer_.data_handle(), DEGREE_ON_DEVICE); } - graph_.update_graph(thrust::raw_pointer_cast(graph_host_buffer_.data()), - thrust::raw_pointer_cast(dists_host_buffer_.data()), + graph_.update_graph(graph_host_buffer_.data_handle(), + dists_host_buffer_.data_handle(), DEGREE_ON_DEVICE, update_counter_); raft::resource::sync_stream(res); @@ -1338,6 +1358,27 @@ void GNND::build(Data_t* data, const Index_t nrow, Index_t* out // Reuse graph_.h_dists as the buffer for shrink the lists in graph static_assert(sizeof(decltype(*(graph_.h_dists.data_handle()))) >= sizeof(Index_t)); + + if (return_distances) { + auto graph_d_dists = raft::make_device_matrix( + res, nrow_, build_config_.node_degree); + raft::copy(graph_d_dists.data_handle(), + graph_.h_dists.data_handle(), + nrow_ * build_config_.node_degree, + raft::resource::get_cuda_stream(res)); + + auto output_dist_view = raft::make_device_matrix_view( + output_distances, nrow_, build_config_.output_graph_degree); + + raft::matrix::slice_coordinates coords{static_cast(0), + static_cast(0), + static_cast(nrow_), + static_cast(build_config_.output_graph_degree)}; + raft::matrix::slice( + res, raft::make_const_mdspan(graph_d_dists.view()), output_dist_view, coords); + raft::resource::sync_stream(res); + } + Index_t* graph_shrink_buffer = (Index_t*)graph_.h_dists.data_handle(); #pragma omp parallel for @@ -1410,10 +1451,24 @@ void build(raft::resources const& res, .node_degree = extended_graph_degree, .internal_node_degree = extended_intermediate_degree, .max_iterations = params.max_iterations, - .termination_threshold = params.termination_threshold}; + .termination_threshold = params.termination_threshold, + .output_graph_degree = params.graph_degree}; GNND nnd(res, build_config); - nnd.build(dataset.data_handle(), dataset.extent(0), int_graph.data_handle()); + + if (idx.distances().has_value() || !params.return_distances) { + nnd.build(dataset.data_handle(), + dataset.extent(0), + int_graph.data_handle(), + params.return_distances, + idx.distances() + .value_or(raft::make_device_matrix(res, 0, 0).view()) + .data_handle()); + } else { + RAFT_EXPECTS(!params.return_distances, + "Distance view not allocated. Using return_distances set to true requires " + "distance view to be allocated."); + } #pragma omp parallel for for (size_t i = 0; i < static_cast(dataset.extent(0)); i++) { @@ -1445,11 +1500,12 @@ index build( graph_degree = intermediate_degree; } - index idx{res, dataset.extent(0), static_cast(graph_degree)}; + index idx{ + res, dataset.extent(0), static_cast(graph_degree), params.return_distances}; build(res, params, dataset, idx); return idx; } -} // namespace cuvs::neighbors::nn_descent::detail +} // namespace cuvs::neighbors::nn_descent::detail diff --git a/cpp/src/neighbors/detail/nn_descent_batch.cuh b/cpp/src/neighbors/detail/nn_descent_batch.cuh new file mode 100644 index 000000000..842dbe788 --- /dev/null +++ b/cpp/src/neighbors/detail/nn_descent_batch.cuh @@ -0,0 +1,736 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#undef RAFT_EXPLICIT_INSTANTIATE_ONLY + +#include "nn_descent.cuh" +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include + +#include +#include +#include +#include +#include +#include + +namespace cuvs::neighbors::nn_descent::detail::experimental { + +// +// Run balanced kmeans on a subsample of the dataset to get centroids +// +template , memory_type::host>> +void get_balanced_kmeans_centroids( + raft::resources const& res, + cuvs::distance::DistanceType metric, + mdspan, row_major, Accessor> dataset, + raft::device_matrix_view centroids) +{ + size_t num_rows = static_cast(dataset.extent(0)); + size_t num_cols = static_cast(dataset.extent(1)); + size_t n_clusters = centroids.extent(0); + size_t num_subsamples = + std::min(static_cast(num_rows / n_clusters), static_cast(num_rows * 0.1)); + + auto d_subsample_dataset = + raft::make_device_matrix(res, num_subsamples, num_cols); + raft::matrix::sample_rows( + res, raft::random::RngState{0}, dataset, d_subsample_dataset.view()); + + cuvs::cluster::kmeans::balanced_params kmeans_params; + kmeans_params.metric = metric; + + auto d_subsample_dataset_const_view = + raft::make_device_matrix_view( + d_subsample_dataset.data_handle(), num_subsamples, num_cols); + auto centroids_view = raft::make_device_matrix_view( + centroids.data_handle(), n_clusters, num_cols); + cuvs::cluster::kmeans::fit(res, kmeans_params, d_subsample_dataset_const_view, centroids_view); +} + +// +// Get the top k closest centroid indices for each data point +// Loads the data in batches onto device if data is on host for memory efficiency +// +template +void get_global_nearest_k( + raft::resources const& res, + size_t k, + size_t num_rows, + size_t n_clusters, + const T* dataset, + raft::host_matrix_view global_nearest_cluster, + raft::device_matrix_view centroids, + cuvs::distance::DistanceType metric) +{ + size_t num_cols = centroids.extent(1); + auto centroids_view = raft::make_device_matrix_view( + centroids.data_handle(), n_clusters, num_cols); + + cudaPointerAttributes attr; + RAFT_CUDA_TRY(cudaPointerGetAttributes(&attr, dataset)); + float* ptr = reinterpret_cast(attr.devicePointer); + + size_t num_batches = n_clusters; + size_t batch_size = (num_rows + n_clusters) / n_clusters; + if (ptr == nullptr) { // data on host + + auto d_dataset_batch = + raft::make_device_matrix(res, batch_size, num_cols); + + auto nearest_clusters_idx = + raft::make_device_matrix(res, batch_size, k); + auto nearest_clusters_idxt = + raft::make_device_matrix(res, batch_size, k); + auto nearest_clusters_dist = + raft::make_device_matrix(res, batch_size, k); + + for (size_t i = 0; i < num_batches; i++) { + size_t batch_size_ = batch_size; + + if (i == num_batches - 1) { batch_size_ = num_rows - batch_size * i; } + raft::copy(d_dataset_batch.data_handle(), + dataset + i * batch_size * num_cols, + batch_size_ * num_cols, + resource::get_cuda_stream(res)); + + std::optional> norms_view; + cuvs::neighbors::brute_force::index brute_force_index( + res, centroids_view, norms_view, metric); + cuvs::neighbors::brute_force::search(res, + brute_force_index, + raft::make_const_mdspan(d_dataset_batch.view()), + nearest_clusters_idx.view(), + nearest_clusters_dist.view()); + + thrust::copy(raft::resource::get_thrust_policy(res), + nearest_clusters_idx.data_handle(), + nearest_clusters_idx.data_handle() + nearest_clusters_idx.size(), + nearest_clusters_idxt.data_handle()); + raft::copy(global_nearest_cluster.data_handle() + i * batch_size * k, + nearest_clusters_idxt.data_handle(), + batch_size_ * k, + resource::get_cuda_stream(res)); + } + } else { // data on device + auto nearest_clusters_idx = + raft::make_device_matrix(res, num_rows, k); + auto nearest_clusters_dist = + raft::make_device_matrix(res, num_rows, k); + + std::optional> norms_view; + cuvs::neighbors::brute_force::index brute_force_index( + res, centroids_view, norms_view, metric); + auto dataset_view = + raft::make_device_matrix_view(dataset, num_rows, num_cols); + cuvs::neighbors::brute_force::search(res, + brute_force_index, + dataset_view, + nearest_clusters_idx.view(), + nearest_clusters_dist.view()); + + auto nearest_clusters_idxt = + raft::make_device_matrix(res, batch_size, k); + for (size_t i = 0; i < num_batches; i++) { + size_t batch_size_ = batch_size; + + if (i == num_batches - 1) { batch_size_ = num_rows - batch_size * i; } + thrust::copy(raft::resource::get_thrust_policy(res), + nearest_clusters_idx.data_handle() + i * batch_size_ * k, + nearest_clusters_idx.data_handle() + (i + 1) * batch_size_ * k, + nearest_clusters_idxt.data_handle()); + raft::copy(global_nearest_cluster.data_handle() + i * batch_size_ * k, + nearest_clusters_idxt.data_handle(), + batch_size_ * k, + resource::get_cuda_stream(res)); + } + } +} + +// +// global_nearest_cluster [num_rows X k=2] : top 2 closest clusters for each data point +// inverted_indices [num_rows x k vector] : sparse vector for data indices for each cluster +// cluster_size [n_cluster] : cluster size for each cluster +// offset [n_cluster] : offset in inverted_indices for each cluster +// Loads the data in batches onto device if data is on host for memory efficiency +// +template +void get_inverted_indices(raft::resources const& res, + size_t n_clusters, + size_t& max_cluster_size, + size_t& min_cluster_size, + raft::host_matrix_view global_nearest_cluster, + raft::host_vector_view inverted_indices, + raft::host_vector_view cluster_size, + raft::host_vector_view offset) +{ + // build sparse inverted indices and get number of data points for each cluster + size_t num_rows = global_nearest_cluster.extent(0); + size_t k = global_nearest_cluster.extent(1); + + auto local_offset = raft::make_host_vector(n_clusters); + + max_cluster_size = 0; + min_cluster_size = std::numeric_limits::max(); + + std::fill(cluster_size.data_handle(), cluster_size.data_handle() + n_clusters, 0); + std::fill(local_offset.data_handle(), local_offset.data_handle() + n_clusters, 0); + + // TODO: this part isn't really a bottleneck but maybe worth trying omp parallel + // for with atomic add + for (size_t i = 0; i < num_rows; i++) { + for (size_t j = 0; j < k; j++) { + IdxT cluster_id = global_nearest_cluster(i, j); + cluster_size(cluster_id) += 1; + } + } + + offset(0) = 0; + for (size_t i = 1; i < n_clusters; i++) { + offset(i) = offset(i - 1) + cluster_size(i - 1); + } + for (size_t i = 0; i < num_rows; i++) { + for (size_t j = 0; j < k; j++) { + IdxT cluster_id = global_nearest_cluster(i, j); + inverted_indices(offset(cluster_id) + local_offset(cluster_id)) = i; + local_offset(cluster_id) += 1; + } + } + + max_cluster_size = static_cast( + *std::max_element(cluster_size.data_handle(), cluster_size.data_handle() + n_clusters)); + min_cluster_size = static_cast( + *std::min_element(cluster_size.data_handle(), cluster_size.data_handle() + n_clusters)); +} + +template +struct KeyValuePair { + KeyType key; + ValueType value; +}; + +template +struct CustomKeyComparator { + __device__ bool operator()(const KeyValuePair& a, + const KeyValuePair& b) const + { + if (a.key == b.key) { return a.value < b.value; } + return a.key < b.key; + } +}; + +template +RAFT_KERNEL merge_subgraphs(IdxT* cluster_data_indices, + size_t graph_degree, + size_t num_cluster_in_batch, + float* global_distances, + float* batch_distances, + IdxT* global_indices, + IdxT* batch_indices) +{ + size_t batch_row = blockIdx.x; + typedef cub::BlockMergeSort, BLOCK_SIZE, ITEMS_PER_THREAD> + BlockMergeSortType; + __shared__ typename cub::BlockMergeSort, BLOCK_SIZE, ITEMS_PER_THREAD>:: + TempStorage tmpSmem; + + extern __shared__ char sharedMem[]; + float* blockKeys = reinterpret_cast(sharedMem); + IdxT* blockValues = reinterpret_cast(&sharedMem[graph_degree * 2 * sizeof(float)]); + int16_t* uniqueMask = + reinterpret_cast(&sharedMem[graph_degree * 2 * (sizeof(float) + sizeof(IdxT))]); + + if (batch_row < num_cluster_in_batch) { + // load batch or global depending on threadIdx + size_t global_row = cluster_data_indices[batch_row]; + + KeyValuePair threadKeyValuePair[ITEMS_PER_THREAD]; + + size_t halfway = BLOCK_SIZE / 2; + size_t do_global = threadIdx.x < halfway; + + float* distances; + IdxT* indices; + + if (do_global) { + distances = global_distances; + indices = global_indices; + } else { + distances = batch_distances; + indices = batch_indices; + } + + size_t idxBase = (threadIdx.x * do_global + (threadIdx.x - halfway) * (1lu - do_global)) * + static_cast(ITEMS_PER_THREAD); + size_t arrIdxBase = (global_row * do_global + batch_row * (1lu - do_global)) * graph_degree; + for (int i = 0; i < ITEMS_PER_THREAD; i++) { + size_t colId = idxBase + i; + if (colId < graph_degree) { + threadKeyValuePair[i].key = distances[arrIdxBase + colId]; + threadKeyValuePair[i].value = indices[arrIdxBase + colId]; + } else { + threadKeyValuePair[i].key = std::numeric_limits::max(); + threadKeyValuePair[i].value = std::numeric_limits::max(); + } + } + + __syncthreads(); + + BlockMergeSortType(tmpSmem).Sort(threadKeyValuePair, CustomKeyComparator{}); + + // load sorted result into shared memory to get unique values + idxBase = threadIdx.x * ITEMS_PER_THREAD; + for (int i = 0; i < ITEMS_PER_THREAD; i++) { + size_t colId = idxBase + i; + if (colId < 2 * graph_degree) { + blockKeys[colId] = threadKeyValuePair[i].key; + blockValues[colId] = threadKeyValuePair[i].value; + } + } + + __syncthreads(); + + // get unique mask + if (threadIdx.x == 0) { uniqueMask[0] = 1; } + for (int i = 0; i < ITEMS_PER_THREAD; i++) { + size_t colId = idxBase + i; + if (colId > 0 && colId < 2 * graph_degree) { + uniqueMask[colId] = static_cast(blockValues[colId] != blockValues[colId - 1]); + } + } + + __syncthreads(); + + // prefix sum + if (threadIdx.x == 0) { + for (int i = 1; i < 2 * graph_degree; i++) { + uniqueMask[i] += uniqueMask[i - 1]; + } + } + + __syncthreads(); + // load unique values to global memory + if (threadIdx.x == 0) { + global_distances[global_row * graph_degree] = blockKeys[0]; + global_indices[global_row * graph_degree] = blockValues[0]; + } + + for (int i = 0; i < ITEMS_PER_THREAD; i++) { + size_t colId = idxBase + i; + if (colId > 0 && colId < 2 * graph_degree) { + bool is_unique = uniqueMask[colId] != uniqueMask[colId - 1]; + int16_t global_colId = uniqueMask[colId] - 1; + if (is_unique && static_cast(global_colId) < graph_degree) { + global_distances[global_row * graph_degree + global_colId] = blockKeys[colId]; + global_indices[global_row * graph_degree + global_colId] = blockValues[colId]; + } + } + } + } +} + +// +// builds knn graph using NN Descent and merge with global graph +// +template , memory_type::host>> +void build_and_merge(raft::resources const& res, + const index_params& params, + size_t num_data_in_cluster, + size_t graph_degree, + size_t int_graph_node_degree, + T* cluster_data, + IdxT* cluster_data_indices, + int* int_graph, + IdxT* inverted_indices, + IdxT* global_indices_d, + float* global_distances_d, + IdxT* batch_indices_h, + IdxT* batch_indices_d, + float* batch_distances_d, + GNND& nnd) +{ + nnd.build(cluster_data, num_data_in_cluster, int_graph, true, batch_distances_d); + + // remap indices +#pragma omp parallel for + for (size_t i = 0; i < num_data_in_cluster; i++) { + for (size_t j = 0; j < graph_degree; j++) { + size_t local_idx = int_graph[i * int_graph_node_degree + j]; + batch_indices_h[i * graph_degree + j] = inverted_indices[local_idx]; + } + } + + raft::copy(batch_indices_d, + batch_indices_h, + num_data_in_cluster * graph_degree, + raft::resource::get_cuda_stream(res)); + + size_t num_elems = graph_degree * 2; + size_t sharedMemSize = num_elems * (sizeof(float) + sizeof(IdxT) + sizeof(int16_t)); + + if (num_elems <= 128) { + merge_subgraphs + <<>>( + cluster_data_indices, + graph_degree, + num_data_in_cluster, + global_distances_d, + batch_distances_d, + global_indices_d, + batch_indices_d); + } else if (num_elems <= 512) { + merge_subgraphs + <<>>( + cluster_data_indices, + graph_degree, + num_data_in_cluster, + global_distances_d, + batch_distances_d, + global_indices_d, + batch_indices_d); + } else if (num_elems <= 1024) { + merge_subgraphs + <<>>( + cluster_data_indices, + graph_degree, + num_data_in_cluster, + global_distances_d, + batch_distances_d, + global_indices_d, + batch_indices_d); + } else if (num_elems <= 2048) { + merge_subgraphs + <<>>( + cluster_data_indices, + graph_degree, + num_data_in_cluster, + global_distances_d, + batch_distances_d, + global_indices_d, + batch_indices_d); + } else { + // this is as far as we can get due to the shared mem usage of cub::BlockMergeSort + RAFT_FAIL("The degree of knn is too large (%lu). It must be smaller than 1024", graph_degree); + } + raft::resource::sync_stream(res); +} + +// +// For each cluster, gather the data samples that belong to that cluster, and +// call build_and_merge +// +template +void cluster_nnd(raft::resources const& res, + const index_params& params, + size_t graph_degree, + size_t extended_graph_degree, + size_t max_cluster_size, + raft::host_matrix_view dataset, + IdxT* offsets, + IdxT* cluster_size, + IdxT* cluster_data_indices, + int* int_graph, + IdxT* inverted_indices, + IdxT* global_indices_h, + float* global_distances_h, + IdxT* batch_indices_h, + IdxT* batch_indices_d, + float* batch_distances_d, + const BuildConfig& build_config) +{ + size_t num_rows = dataset.extent(0); + size_t num_cols = dataset.extent(1); + + GNND nnd(res, build_config); + + auto cluster_data_matrix = + raft::make_host_matrix(max_cluster_size, num_cols); + + for (size_t cluster_id = 0; cluster_id < params.n_clusters; cluster_id++) { + RAFT_LOG_DEBUG( + "# Data on host. Running clusters: %lu / %lu", cluster_id + 1, params.n_clusters); + size_t num_data_in_cluster = cluster_size[cluster_id]; + size_t offset = offsets[cluster_id]; + +#pragma omp parallel for + for (size_t i = 0; i < num_data_in_cluster; i++) { + for (size_t j = 0; j < num_cols; j++) { + size_t global_row = (inverted_indices + offset)[i]; + cluster_data_matrix(i, j) = dataset(global_row, j); + } + } + + build_and_merge(res, + params, + num_data_in_cluster, + graph_degree, + extended_graph_degree, + cluster_data_matrix.data_handle(), + cluster_data_indices + offset, + int_graph, + inverted_indices + offset, + global_indices_h, + global_distances_h, + batch_indices_h, + batch_indices_d, + batch_distances_d, + nnd); + nnd.reset(res); + } +} + +template +void cluster_nnd(raft::resources const& res, + const index_params& params, + size_t graph_degree, + size_t extended_graph_degree, + size_t max_cluster_size, + raft::device_matrix_view dataset, + IdxT* offsets, + IdxT* cluster_size, + IdxT* cluster_data_indices, + int* int_graph, + IdxT* inverted_indices, + IdxT* global_indices_h, + float* global_distances_h, + IdxT* batch_indices_h, + IdxT* batch_indices_d, + float* batch_distances_d, + const BuildConfig& build_config) +{ + size_t num_rows = dataset.extent(0); + size_t num_cols = dataset.extent(1); + + GNND nnd(res, build_config); + + auto cluster_data_matrix = + raft::make_device_matrix(res, max_cluster_size, num_cols); + + for (size_t cluster_id = 0; cluster_id < params.n_clusters; cluster_id++) { + RAFT_LOG_DEBUG( + "# Data on device. Running clusters: %lu / %lu", cluster_id + 1, params.n_clusters); + size_t num_data_in_cluster = cluster_size[cluster_id]; + size_t offset = offsets[cluster_id]; + + auto cluster_data_view = raft::make_device_matrix_view( + cluster_data_matrix.data_handle(), num_data_in_cluster, num_cols); + auto cluster_data_indices_view = raft::make_device_vector_view( + cluster_data_indices + offset, num_data_in_cluster); + + auto dataset_IdxT = + raft::make_device_matrix_view(dataset.data_handle(), num_rows, num_cols); + raft::matrix::gather(res, dataset_IdxT, cluster_data_indices_view, cluster_data_view); + + build_and_merge(res, + params, + num_data_in_cluster, + graph_degree, + extended_graph_degree, + cluster_data_view.data_handle(), + cluster_data_indices + offset, + int_graph, + inverted_indices + offset, + global_indices_h, + global_distances_h, + batch_indices_h, + batch_indices_d, + batch_distances_d, + nnd); + nnd.reset(res); + } +} + +template , memory_type::host>> +void batch_build(raft::resources const& res, + const index_params& params, + mdspan, row_major, Accessor> dataset, + index& global_idx) +{ + size_t graph_degree = params.graph_degree; + size_t intermediate_degree = params.intermediate_graph_degree; + + size_t num_rows = static_cast(dataset.extent(0)); + size_t num_cols = static_cast(dataset.extent(1)); + + auto centroids = + raft::make_device_matrix(res, params.n_clusters, num_cols); + get_balanced_kmeans_centroids(res, params.metric, dataset, centroids.view()); + + size_t k = 2; + auto global_nearest_cluster = raft::make_host_matrix(num_rows, k); + get_global_nearest_k(res, + k, + num_rows, + params.n_clusters, + dataset.data_handle(), + global_nearest_cluster.view(), + centroids.view(), + params.metric); + + auto inverted_indices = raft::make_host_vector(num_rows * k); + auto cluster_size = raft::make_host_vector(params.n_clusters); + auto offset = raft::make_host_vector(params.n_clusters); + + size_t max_cluster_size, min_cluster_size; + get_inverted_indices(res, + params.n_clusters, + max_cluster_size, + min_cluster_size, + global_nearest_cluster.view(), + inverted_indices.view(), + cluster_size.view(), + offset.view()); + + if (intermediate_degree >= min_cluster_size) { + RAFT_LOG_WARN( + "Intermediate graph degree cannot be larger than minimum cluster size, reducing it to %lu", + dataset.extent(0)); + intermediate_degree = min_cluster_size - 1; + } + if (intermediate_degree < graph_degree) { + RAFT_LOG_WARN( + "Graph degree (%lu) cannot be larger than intermediate graph degree (%lu), reducing " + "graph_degree.", + graph_degree, + intermediate_degree); + graph_degree = intermediate_degree; + } + + size_t extended_graph_degree = + align32::roundUp(static_cast(graph_degree * (graph_degree <= 32 ? 1.0 : 1.3))); + size_t extended_intermediate_degree = align32::roundUp( + static_cast(intermediate_degree * (intermediate_degree <= 32 ? 1.0 : 1.3))); + + auto int_graph = raft::make_host_matrix( + max_cluster_size, static_cast(extended_graph_degree)); + + BuildConfig build_config{.max_dataset_size = max_cluster_size, + .dataset_dim = num_cols, + .node_degree = extended_graph_degree, + .internal_node_degree = extended_intermediate_degree, + .max_iterations = params.max_iterations, + .termination_threshold = params.termination_threshold, + .output_graph_degree = graph_degree}; + + auto global_indices_h = raft::make_managed_matrix(res, num_rows, graph_degree); + auto global_distances_h = raft::make_managed_matrix(res, num_rows, graph_degree); + + std::fill(global_indices_h.data_handle(), + global_indices_h.data_handle() + num_rows * graph_degree, + std::numeric_limits::max()); + std::fill(global_distances_h.data_handle(), + global_distances_h.data_handle() + num_rows * graph_degree, + std::numeric_limits::max()); + + auto batch_indices_h = + raft::make_host_matrix(max_cluster_size, graph_degree); + auto batch_indices_d = + raft::make_device_matrix(res, max_cluster_size, graph_degree); + auto batch_distances_d = + raft::make_device_matrix(res, max_cluster_size, graph_degree); + + auto cluster_data_indices = raft::make_device_vector(res, num_rows * k); + raft::copy(cluster_data_indices.data_handle(), + inverted_indices.data_handle(), + num_rows * k, + resource::get_cuda_stream(res)); + + cluster_nnd(res, + params, + graph_degree, + extended_graph_degree, + max_cluster_size, + dataset, + offset.data_handle(), + cluster_size.data_handle(), + cluster_data_indices.data_handle(), + int_graph.data_handle(), + inverted_indices.data_handle(), + global_indices_h.data_handle(), + global_distances_h.data_handle(), + batch_indices_h.data_handle(), + batch_indices_d.data_handle(), + batch_distances_d.data_handle(), + build_config); + + raft::copy(global_idx.graph().data_handle(), + global_indices_h.data_handle(), + num_rows * graph_degree, + raft::resource::get_cuda_stream(res)); + if (params.return_distances && global_idx.distances().has_value()) { + raft::copy(global_idx.distances().value().data_handle(), + global_distances_h.data_handle(), + num_rows * graph_degree, + raft::resource::get_cuda_stream(res)); + } +} + +template , memory_type::host>> +index batch_build(raft::resources const& res, + const index_params& params, + mdspan, row_major, Accessor> dataset) +{ + size_t intermediate_degree = params.intermediate_graph_degree; + size_t graph_degree = params.graph_degree; + + if (intermediate_degree < graph_degree) { + RAFT_LOG_WARN( + "Graph degree (%lu) cannot be larger than intermediate graph degree (%lu), reducing " + "graph_degree.", + graph_degree, + intermediate_degree); + graph_degree = intermediate_degree; + } + + index idx{ + res, dataset.extent(0), static_cast(graph_degree), params.return_distances}; + + batch_build(res, params, dataset, idx); + + return idx; +} + +} // namespace cuvs::neighbors::nn_descent::detail::experimental diff --git a/cpp/src/neighbors/nn_descent.cuh b/cpp/src/neighbors/nn_descent.cuh index 582da72c1..ed91dac91 100644 --- a/cpp/src/neighbors/nn_descent.cuh +++ b/cpp/src/neighbors/nn_descent.cuh @@ -17,9 +17,14 @@ #pragma once #include "detail/nn_descent.cuh" +#include "detail/nn_descent_batch.cuh" + +#include +#include #include #include +#include #include namespace cuvs::neighbors::nn_descent { @@ -61,7 +66,15 @@ auto build(raft::resources const& res, index_params const& params, raft::device_matrix_view dataset) -> index { - return detail::build(res, params, dataset); + if (params.n_clusters > 1) { + if constexpr (std::is_same_v) { + return detail::experimental::batch_build(res, params, dataset); + } else { + RAFT_FAIL("Batched nn-descent is only supported for float precision"); + } + } else { + return detail::build(res, params, dataset); + } } /** @@ -100,7 +113,15 @@ void build(raft::resources const& res, raft::device_matrix_view dataset, index& idx) { - detail::build(res, params, dataset, idx); + if (params.n_clusters > 1) { + if constexpr (std::is_same_v) { + detail::experimental::batch_build(res, params, dataset, idx); + } else { + RAFT_FAIL("Batched nn-descent is only supported for float precision"); + } + } else { + detail::build(res, params, dataset, idx); + } } /** @@ -135,7 +156,15 @@ auto build(raft::resources const& res, index_params const& params, raft::host_matrix_view dataset) -> index { - return detail::build(res, params, dataset); + if (params.n_clusters > 1) { + if constexpr (std::is_same_v) { + return detail::experimental::batch_build(res, params, dataset); + } else { + RAFT_FAIL("Batched nn-descent is only supported for float precision"); + } + } else { + return detail::build(res, params, dataset); + } } /** @@ -174,7 +203,15 @@ void build(raft::resources const& res, raft::host_matrix_view dataset, index& idx) { - detail::build(res, params, dataset, idx); + if (params.n_clusters > 1) { + if constexpr (std::is_same_v) { + detail::experimental::batch_build(res, params, dataset, idx); + } else { + RAFT_FAIL("Batched nn-descent is only supported for float precision"); + } + } else { + detail::build(res, params, dataset, idx); + } } /** @} */ // end group nn-descent diff --git a/cpp/src/neighbors/nn_descent_float.cu b/cpp/src/neighbors/nn_descent_float.cu index c6d356671..fa85db127 100644 --- a/cpp/src/neighbors/nn_descent_float.cu +++ b/cpp/src/neighbors/nn_descent_float.cu @@ -19,21 +19,38 @@ namespace cuvs::neighbors::nn_descent { -#define CUVS_INST_NN_DESCENT_BUILD(T, IdxT) \ - auto build(raft::resources const& handle, \ - const cuvs::neighbors::nn_descent::index_params& params, \ - raft::device_matrix_view dataset) \ - ->cuvs::neighbors::nn_descent::index \ - { \ - return cuvs::neighbors::nn_descent::build(handle, params, dataset); \ - }; \ - \ - auto build(raft::resources const& handle, \ - const cuvs::neighbors::nn_descent::index_params& params, \ - raft::host_matrix_view dataset) \ - ->cuvs::neighbors::nn_descent::index \ - { \ - return cuvs::neighbors::nn_descent::build(handle, params, dataset); \ +#define CUVS_INST_NN_DESCENT_BUILD(T, IdxT) \ + auto build(raft::resources const& handle, \ + const cuvs::neighbors::nn_descent::index_params& params, \ + raft::device_matrix_view dataset, \ + std::optional> graph) \ + ->cuvs::neighbors::nn_descent::index \ + { \ + if (!graph.has_value()) { \ + return cuvs::neighbors::nn_descent::build(handle, params, dataset); \ + } else { \ + std::optional> distances = \ + std::nullopt; \ + cuvs::neighbors::nn_descent::index idx{handle, graph.value(), distances}; \ + cuvs::neighbors::nn_descent::build(handle, params, dataset, idx); \ + return idx; \ + }; \ + } \ + auto build(raft::resources const& handle, \ + const cuvs::neighbors::nn_descent::index_params& params, \ + raft::host_matrix_view dataset, \ + std::optional> graph) \ + ->cuvs::neighbors::nn_descent::index \ + { \ + if (!graph.has_value()) { \ + return cuvs::neighbors::nn_descent::build(handle, params, dataset); \ + } else { \ + std::optional> distances = \ + std::nullopt; \ + cuvs::neighbors::nn_descent::index idx{handle, graph.value(), distances}; \ + cuvs::neighbors::nn_descent::build(handle, params, dataset, idx); \ + return idx; \ + } \ }; CUVS_INST_NN_DESCENT_BUILD(float, uint32_t); diff --git a/cpp/src/neighbors/nn_descent_half.cu b/cpp/src/neighbors/nn_descent_half.cu index 587993031..2ee45d435 100644 --- a/cpp/src/neighbors/nn_descent_half.cu +++ b/cpp/src/neighbors/nn_descent_half.cu @@ -19,21 +19,39 @@ namespace cuvs::neighbors::nn_descent { -#define CUVS_INST_NN_DESCENT_BUILD(T, IdxT) \ - auto build(raft::resources const& handle, \ - const cuvs::neighbors::nn_descent::index_params& params, \ - raft::device_matrix_view dataset) \ - ->cuvs::neighbors::nn_descent::index \ - { \ - return cuvs::neighbors::nn_descent::build(handle, params, dataset); \ - }; \ - \ - auto build(raft::resources const& handle, \ - const cuvs::neighbors::nn_descent::index_params& params, \ - raft::host_matrix_view dataset) \ - ->cuvs::neighbors::nn_descent::index \ - { \ - return cuvs::neighbors::nn_descent::build(handle, params, dataset); \ +#define CUVS_INST_NN_DESCENT_BUILD(T, IdxT) \ + auto build(raft::resources const& handle, \ + const cuvs::neighbors::nn_descent::index_params& params, \ + raft::device_matrix_view dataset, \ + std::optional> graph) \ + ->cuvs::neighbors::nn_descent::index \ + { \ + if (!graph.has_value()) { \ + return cuvs::neighbors::nn_descent::build(handle, params, dataset); \ + } else { \ + std::optional> distances = \ + std::nullopt; \ + cuvs::neighbors::nn_descent::index idx{handle, graph.value(), distances}; \ + cuvs::neighbors::nn_descent::build(handle, params, dataset, idx); \ + return idx; \ + } \ + }; \ + \ + auto build(raft::resources const& handle, \ + const cuvs::neighbors::nn_descent::index_params& params, \ + raft::host_matrix_view dataset, \ + std::optional> graph) \ + ->cuvs::neighbors::nn_descent::index \ + { \ + if (!graph.has_value()) { \ + return cuvs::neighbors::nn_descent::build(handle, params, dataset); \ + } else { \ + std::optional> distances = \ + std::nullopt; \ + cuvs::neighbors::nn_descent::index idx{handle, graph.value(), distances}; \ + cuvs::neighbors::nn_descent::build(handle, params, dataset, idx); \ + return idx; \ + } \ }; CUVS_INST_NN_DESCENT_BUILD(half, uint32_t); diff --git a/cpp/src/neighbors/nn_descent_int8.cu b/cpp/src/neighbors/nn_descent_int8.cu index 813a01746..e150f511b 100644 --- a/cpp/src/neighbors/nn_descent_int8.cu +++ b/cpp/src/neighbors/nn_descent_int8.cu @@ -19,21 +19,39 @@ namespace cuvs::neighbors::nn_descent { -#define CUVS_INST_NN_DESCENT_BUILD(T, IdxT) \ - auto build(raft::resources const& handle, \ - const cuvs::neighbors::nn_descent::index_params& params, \ - raft::device_matrix_view dataset) \ - ->cuvs::neighbors::nn_descent::index \ - { \ - return cuvs::neighbors::nn_descent::build(handle, params, dataset); \ - }; \ - \ - auto build(raft::resources const& handle, \ - const cuvs::neighbors::nn_descent::index_params& params, \ - raft::host_matrix_view dataset) \ - ->cuvs::neighbors::nn_descent::index \ - { \ - return cuvs::neighbors::nn_descent::build(handle, params, dataset); \ +#define CUVS_INST_NN_DESCENT_BUILD(T, IdxT) \ + auto build(raft::resources const& handle, \ + const cuvs::neighbors::nn_descent::index_params& params, \ + raft::device_matrix_view dataset, \ + std::optional> graph) \ + ->cuvs::neighbors::nn_descent::index \ + { \ + if (!graph.has_value()) { \ + return cuvs::neighbors::nn_descent::build(handle, params, dataset); \ + } else { \ + std::optional> distances = \ + std::nullopt; \ + cuvs::neighbors::nn_descent::index idx{handle, graph.value(), distances}; \ + cuvs::neighbors::nn_descent::build(handle, params, dataset, idx); \ + return idx; \ + } \ + }; \ + \ + auto build(raft::resources const& handle, \ + const cuvs::neighbors::nn_descent::index_params& params, \ + raft::host_matrix_view dataset, \ + std::optional> graph) \ + ->cuvs::neighbors::nn_descent::index \ + { \ + if (!graph.has_value()) { \ + return cuvs::neighbors::nn_descent::build(handle, params, dataset); \ + } else { \ + std::optional> distances = \ + std::nullopt; \ + cuvs::neighbors::nn_descent::index idx{handle, graph.value(), distances}; \ + cuvs::neighbors::nn_descent::build(handle, params, dataset, idx); \ + return idx; \ + } \ }; CUVS_INST_NN_DESCENT_BUILD(int8_t, uint32_t); diff --git a/cpp/src/neighbors/nn_descent_uint8.cu b/cpp/src/neighbors/nn_descent_uint8.cu index 9d73dd90f..d8657777b 100644 --- a/cpp/src/neighbors/nn_descent_uint8.cu +++ b/cpp/src/neighbors/nn_descent_uint8.cu @@ -19,21 +19,39 @@ namespace cuvs::neighbors::nn_descent { -#define CUVS_INST_NN_DESCENT_BUILD(T, IdxT) \ - auto build(raft::resources const& handle, \ - const cuvs::neighbors::nn_descent::index_params& params, \ - raft::device_matrix_view dataset) \ - ->cuvs::neighbors::nn_descent::index \ - { \ - return cuvs::neighbors::nn_descent::build(handle, params, dataset); \ - }; \ - \ - auto build(raft::resources const& handle, \ - const cuvs::neighbors::nn_descent::index_params& params, \ - raft::host_matrix_view dataset) \ - ->cuvs::neighbors::nn_descent::index \ - { \ - return cuvs::neighbors::nn_descent::build(handle, params, dataset); \ +#define CUVS_INST_NN_DESCENT_BUILD(T, IdxT) \ + auto build(raft::resources const& handle, \ + const cuvs::neighbors::nn_descent::index_params& params, \ + raft::device_matrix_view dataset, \ + std::optional> graph) \ + ->cuvs::neighbors::nn_descent::index \ + { \ + if (!graph.has_value()) { \ + return cuvs::neighbors::nn_descent::build(handle, params, dataset); \ + } else { \ + std::optional> distances = \ + std::nullopt; \ + cuvs::neighbors::nn_descent::index idx{handle, graph.value(), distances}; \ + cuvs::neighbors::nn_descent::build(handle, params, dataset, idx); \ + return idx; \ + } \ + }; \ + \ + auto build(raft::resources const& handle, \ + const cuvs::neighbors::nn_descent::index_params& params, \ + raft::host_matrix_view dataset, \ + std::optional> graph) \ + ->cuvs::neighbors::nn_descent::index \ + { \ + if (!graph.has_value()) { \ + return cuvs::neighbors::nn_descent::build(handle, params, dataset); \ + } else { \ + std::optional> distances = \ + std::nullopt; \ + cuvs::neighbors::nn_descent::index idx{handle, graph.value(), distances}; \ + cuvs::neighbors::nn_descent::build(handle, params, dataset, idx); \ + return idx; \ + } \ }; CUVS_INST_NN_DESCENT_BUILD(uint8_t, uint32_t); diff --git a/cpp/test/neighbors/ann_nn_descent.cuh b/cpp/test/neighbors/ann_nn_descent.cuh index bce0f9899..7d2575c2b 100644 --- a/cpp/test/neighbors/ann_nn_descent.cuh +++ b/cpp/test/neighbors/ann_nn_descent.cuh @@ -18,9 +18,13 @@ #include "../test_utils.cuh" #include "ann_utils.cuh" +#include #include + #include +#include #include +#include #include "naive_knn.cuh" @@ -42,6 +46,15 @@ struct AnnNNDescentInputs { double min_recall; }; +struct AnnNNDescentBatchInputs { + std::pair recall_cluster; + int n_rows; + int dim; + int graph_degree; + cuvs::distance::DistanceType metric; + bool host_dataset; +}; + inline ::std::ostream& operator<<(::std::ostream& os, const AnnNNDescentInputs& p) { os << "dataset shape=" << p.n_rows << "x" << p.dim << ", graph_degree=" << p.graph_degree @@ -50,6 +63,14 @@ inline ::std::ostream& operator<<(::std::ostream& os, const AnnNNDescentInputs& return os; } +inline ::std::ostream& operator<<(::std::ostream& os, const AnnNNDescentBatchInputs& p) +{ + os << "dataset shape=" << p.n_rows << "x" << p.dim << ", graph_degree=" << p.graph_degree + << ", metric=" << static_cast(p.metric) << (p.host_dataset ? ", host" : ", device") + << ", clusters=" << p.recall_cluster.second << std::endl; + return os; +} + template class AnnNNDescentTest : public ::testing::TestWithParam { public: @@ -65,7 +86,9 @@ class AnnNNDescentTest : public ::testing::TestWithParam { { size_t queries_size = ps.n_rows * ps.graph_degree; std::vector indices_NNDescent(queries_size); + std::vector distances_NNDescent(queries_size); std::vector indices_naive(queries_size); + std::vector distances_naive(queries_size); { rmm::device_uvector distances_naive_dev(queries_size, stream_); @@ -81,16 +104,18 @@ class AnnNNDescentTest : public ::testing::TestWithParam { ps.graph_degree, ps.metric); raft::update_host(indices_naive.data(), indices_naive_dev.data(), queries_size, stream_); + raft::update_host(distances_naive.data(), distances_naive_dev.data(), queries_size, stream_); raft::resource::sync_stream(handle_); } { { - cuvs::neighbors::nn_descent::index_params index_params; + nn_descent::index_params index_params; index_params.metric = ps.metric; index_params.graph_degree = ps.graph_degree; index_params.intermediate_graph_degree = 2 * ps.graph_degree; index_params.max_iterations = 100; + index_params.return_distances = true; auto database_view = raft::make_device_matrix_view( (const DataT*)database.data(), ps.n_rows, ps.dim); @@ -101,22 +126,40 @@ class AnnNNDescentTest : public ::testing::TestWithParam { raft::copy(database_host.data_handle(), database.data(), database.size(), stream_); auto database_host_view = raft::make_host_matrix_view( (const DataT*)database_host.data_handle(), ps.n_rows, ps.dim); - auto index = - cuvs::neighbors::nn_descent::build(handle_, index_params, database_host_view); - raft::update_host( + auto index = nn_descent::build(handle_, index_params, database_host_view); + raft::copy( indices_NNDescent.data(), index.graph().data_handle(), queries_size, stream_); + if (index.distances().has_value()) { + raft::copy(distances_NNDescent.data(), + index.distances().value().data_handle(), + queries_size, + stream_); + } + } else { - auto index = cuvs::neighbors::nn_descent::build(handle_, index_params, database_view); - raft::update_host( + auto index = nn_descent::build(handle_, index_params, database_view); + raft::copy( indices_NNDescent.data(), index.graph().data_handle(), queries_size, stream_); + if (index.distances().has_value()) { + raft::copy(distances_NNDescent.data(), + index.distances().value().data_handle(), + queries_size, + stream_); + } }; } raft::resource::sync_stream(handle_); } double min_recall = ps.min_recall; - EXPECT_TRUE(eval_recall( - indices_naive, indices_NNDescent, ps.n_rows, ps.graph_degree, 0.001, min_recall)); + EXPECT_TRUE(eval_neighbours(indices_naive, + indices_NNDescent, + distances_naive, + distances_NNDescent, + ps.n_rows, + ps.graph_degree, + 0.001, + min_recall)); } } @@ -146,6 +189,125 @@ class AnnNNDescentTest : public ::testing::TestWithParam { rmm::device_uvector database; }; +template +class AnnNNDescentBatchTest : public ::testing::TestWithParam { + public: + AnnNNDescentBatchTest() + : stream_(raft::resource::get_cuda_stream(handle_)), + ps(::testing::TestWithParam::GetParam()), + database(0, stream_) + { + } + + void testNNDescentBatch() + { + size_t queries_size = ps.n_rows * ps.graph_degree; + std::vector indices_NNDescent(queries_size); + std::vector distances_NNDescent(queries_size); + std::vector indices_naive(queries_size); + std::vector distances_naive(queries_size); + + { + rmm::device_uvector distances_naive_dev(queries_size, stream_); + rmm::device_uvector indices_naive_dev(queries_size, stream_); + naive_knn(handle_, + distances_naive_dev.data(), + indices_naive_dev.data(), + database.data(), + database.data(), + ps.n_rows, + ps.n_rows, + ps.dim, + ps.graph_degree, + ps.metric); + raft::update_host(indices_naive.data(), indices_naive_dev.data(), queries_size, stream_); + raft::update_host(distances_naive.data(), distances_naive_dev.data(), queries_size, stream_); + raft::resource::sync_stream(handle_); + } + + { + { + nn_descent::index_params index_params; + index_params.metric = ps.metric; + index_params.graph_degree = ps.graph_degree; + index_params.intermediate_graph_degree = 2 * ps.graph_degree; + index_params.max_iterations = 10; + index_params.return_distances = true; + index_params.n_clusters = ps.recall_cluster.second; + + auto database_view = raft::make_device_matrix_view( + (const DataT*)database.data(), ps.n_rows, ps.dim); + + { + if (ps.host_dataset) { + auto database_host = raft::make_host_matrix(ps.n_rows, ps.dim); + raft::copy(database_host.data_handle(), database.data(), database.size(), stream_); + auto database_host_view = raft::make_host_matrix_view( + (const DataT*)database_host.data_handle(), ps.n_rows, ps.dim); + auto index = nn_descent::build(handle_, index_params, database_host_view); + raft::copy( + indices_NNDescent.data(), index.graph().data_handle(), queries_size, stream_); + if (index.distances().has_value()) { + raft::copy(distances_NNDescent.data(), + index.distances().value().data_handle(), + queries_size, + stream_); + } + + } else { + auto index = nn_descent::build(handle_, index_params, database_view); + raft::copy( + indices_NNDescent.data(), index.graph().data_handle(), queries_size, stream_); + if (index.distances().has_value()) { + raft::copy(distances_NNDescent.data(), + index.distances().value().data_handle(), + queries_size, + stream_); + } + }; + } + raft::resource::sync_stream(handle_); + } + double min_recall = ps.recall_cluster.first; + EXPECT_TRUE(eval_neighbours(indices_naive, + indices_NNDescent, + distances_naive, + distances_NNDescent, + ps.n_rows, + ps.graph_degree, + 0.01, + min_recall, + true, + static_cast(ps.graph_degree * 0.1))); + } + } + + void SetUp() override + { + database.resize(((size_t)ps.n_rows) * ps.dim, stream_); + raft::random::RngState r(1234ULL); + if constexpr (std::is_same{}) { + raft::random::normal(handle_, r, database.data(), ps.n_rows * ps.dim, DataT(0.1), DataT(2.0)); + } else { + raft::random::uniformInt( + handle_, r, database.data(), ps.n_rows * ps.dim, DataT(1), DataT(20)); + } + raft::resource::sync_stream(handle_); + } + + void TearDown() override + { + raft::resource::sync_stream(handle_); + database.resize(0, stream_); + } + + private: + raft::resources handle_; + rmm::cuda_stream_view stream_; + AnnNNDescentBatchInputs ps; + rmm::device_uvector database; +}; + const std::vector inputs = raft::util::itertools::product( {1000, 2000}, // n_rows {3, 5, 7, 8, 17, 64, 128, 137, 192, 256, 512, 619, 1024}, // dim @@ -154,4 +316,15 @@ const std::vector inputs = raft::util::itertools::product inputsBatch = + raft::util::itertools::product( + {std::make_pair(0.9, 3lu), std::make_pair(0.9, 2lu)}, // min_recall, n_clusters + {4000, 5000}, // n_rows + {192, 512}, // dim + {32, 64}, // graph_degree + {cuvs::distance::DistanceType::L2Expanded}, + {false, true}); + +} // namespace cuvs::neighbors::nn_descent diff --git a/cpp/test/neighbors/ann_nn_descent/test_float_uint32_t.cu b/cpp/test/neighbors/ann_nn_descent/test_float_uint32_t.cu index 64c0e0291..7a24f96a1 100644 --- a/cpp/test/neighbors/ann_nn_descent/test_float_uint32_t.cu +++ b/cpp/test/neighbors/ann_nn_descent/test_float_uint32_t.cu @@ -23,6 +23,12 @@ namespace cuvs::neighbors::nn_descent { typedef AnnNNDescentTest AnnNNDescentTestF_U32; TEST_P(AnnNNDescentTestF_U32, AnnNNDescent) { this->testNNDescent(); } +// typedef AnnNNDescentBatchTest AnnNNDescentBatchTestF_U32; +// TEST_P(AnnNNDescentBatchTestF_U32, AnnNNDescentBatch) { this->testNNDescentBatch(); } + INSTANTIATE_TEST_CASE_P(AnnNNDescentTest, AnnNNDescentTestF_U32, ::testing::ValuesIn(inputs)); +// INSTANTIATE_TEST_CASE_P(AnnNNDescentBatchTest, +// AnnNNDescentBatchTestF_U32, +// ::testing::ValuesIn(inputsBatch)); } // namespace cuvs::neighbors::nn_descent diff --git a/cpp/test/neighbors/ann_utils.cuh b/cpp/test/neighbors/ann_utils.cuh index b08e1d725..94bccade2 100644 --- a/cpp/test/neighbors/ann_utils.cuh +++ b/cpp/test/neighbors/ann_utils.cuh @@ -16,6 +16,7 @@ #pragma once +#include #include #include // raft::make_device_matrix #include @@ -165,9 +166,14 @@ auto calc_recall(const std::vector& expected_idx, /** check uniqueness of indices */ template -auto check_unique_indices(const std::vector& actual_idx, size_t rows, size_t cols) +auto check_unique_indices(const std::vector& actual_idx, + size_t rows, + size_t cols, + size_t max_duplicates = 0) { size_t max_count; + size_t dup_count = 0lu; + std::set unique_indices; for (size_t i = 0; i < rows; ++i) { unique_indices.clear(); @@ -180,8 +186,11 @@ auto check_unique_indices(const std::vector& actual_idx, size_t rows, size_t } else if (unique_indices.find(act_idx) == unique_indices.end()) { unique_indices.insert(act_idx); } else { - return testing::AssertionFailure() - << "Duplicated index " << act_idx << " at k " << k << " for query " << i << "! "; + dup_count++; + if (dup_count > max_duplicates) { + return testing::AssertionFailure() + << "Duplicated index " << act_idx << " at k " << k << " for query " << i << "! "; + } } } } @@ -264,7 +273,8 @@ auto eval_neighbours(const std::vector& expected_idx, size_t cols, double eps, double min_recall, - bool test_unique = true) -> testing::AssertionResult + bool test_unique = true, + size_t max_duplicates = 0) -> testing::AssertionResult { auto [actual_recall, match_count, total_count] = calc_recall(expected_idx, actual_idx, expected_dist, actual_dist, rows, cols, eps); @@ -284,7 +294,7 @@ auto eval_neighbours(const std::vector& expected_idx, << min_recall << "); eps = " << eps << ". "; } if (test_unique) - return check_unique_indices(actual_idx, rows, cols); + return check_unique_indices(actual_idx, rows, cols, max_duplicates); else return testing::AssertionSuccess(); } diff --git a/python/cuvs/cuvs/test/test_hnsw.py b/python/cuvs/cuvs/test/test_hnsw.py index 0ae97266b..8bd2e8b76 100644 --- a/python/cuvs/cuvs/test/test_hnsw.py +++ b/python/cuvs/cuvs/test/test_hnsw.py @@ -23,7 +23,7 @@ def run_hnsw_build_search_test( - n_rows=1000, + n_rows=10000, n_cols=10, n_queries=100, k=10, From fdb118002a482e878ec48fcaa7f11a15efd59140 Mon Sep 17 00:00:00 2001 From: James Lamb Date: Wed, 13 Nov 2024 21:32:29 -0600 Subject: [PATCH 05/15] enforce wheel size limits, README formatting in CI (#464) Contributes to https://github.com/rapidsai/build-planning/issues/110 Proposes adding 2 types of validation on wheels in CI, to ensure we continue to produce wheels that are suitable for PyPI. * checks on wheel size (compressed), - *to be sure they're under PyPI limits* - *and to prompt discussion on PRs that significantly increase wheel sizes* * checks on README formatting - *to ensure they'll render properly as the PyPI project homepages* - *e.g. like how https://github.com/scikit-learn/scikit-learn/blob/main/README.rst becomes https://pypi.org/project/scikit-learn/* Authors: - James Lamb (https://github.com/jameslamb) Approvers: - Bradley Dice (https://github.com/bdice) URL: https://github.com/rapidsai/cuvs/pull/464 --- ci/build_wheel_cuvs.sh | 5 ++++- ci/validate_wheel.sh | 21 +++++++++++++++++++++ python/cuvs/pyproject.toml | 8 ++++++++ 3 files changed, 33 insertions(+), 1 deletion(-) create mode 100755 ci/validate_wheel.sh diff --git a/ci/build_wheel_cuvs.sh b/ci/build_wheel_cuvs.sh index e03da9f19..444657cc0 100755 --- a/ci/build_wheel_cuvs.sh +++ b/ci/build_wheel_cuvs.sh @@ -3,6 +3,8 @@ set -euo pipefail +package_dir="python/cuvs" + case "${RAPIDS_CUDA_VERSION}" in 12.*) EXTRA_CMAKE_ARGS=";-DUSE_CUDA_MATH_WHEELS=ON" @@ -15,4 +17,5 @@ esac # Set up skbuild options. Enable sccache in skbuild config options export SKBUILD_CMAKE_ARGS="-DDETECT_CONDA_ENV=OFF;-DFIND_CUVS_CPP=OFF${EXTRA_CMAKE_ARGS}" -ci/build_wheel.sh cuvs python/cuvs +ci/build_wheel.sh cuvs ${package_dir} +ci/validate_wheel.sh ${package_dir} final_dist diff --git a/ci/validate_wheel.sh b/ci/validate_wheel.sh new file mode 100755 index 000000000..5910a5c59 --- /dev/null +++ b/ci/validate_wheel.sh @@ -0,0 +1,21 @@ +#!/bin/bash +# Copyright (c) 2024, NVIDIA CORPORATION. + +set -euo pipefail + +package_dir=$1 +wheel_dir_relative_path=$2 + +cd "${package_dir}" + +rapids-logger "validate packages with 'pydistcheck'" + +pydistcheck \ + --inspect \ + "$(echo ${wheel_dir_relative_path}/*.whl)" + +rapids-logger "validate packages with 'twine'" + +twine check \ + --strict \ + "$(echo ${wheel_dir_relative_path}/*.whl)" diff --git a/python/cuvs/pyproject.toml b/python/cuvs/pyproject.toml index 30d784c67..d40026776 100644 --- a/python/cuvs/pyproject.toml +++ b/python/cuvs/pyproject.toml @@ -133,6 +133,14 @@ build-backend = "scikit_build_core.build" dependencies-file = "../../dependencies.yaml" matrix-entry = "cuda_suffixed=true;use_cuda_wheels=true" +[tool.pydistcheck] +select = [ + "distro-too-large-compressed", +] + +# detect when package size grows significantly +max_allowed_size_compressed = '1.4G' + [tool.pytest.ini_options] filterwarnings = [ "error", From bb9c669500cf0401114f4a5810d0f3a0ea1db6b3 Mon Sep 17 00:00:00 2001 From: "Artem M. Chirkin" <9253178+achirkin@users.noreply.github.com> Date: Thu, 14 Nov 2024 21:25:58 +0100 Subject: [PATCH 06/15] Fix include errors, header, and unsafe locks in iface.hpp (#467) Fix a few issues with the internal header `neighbors/iface/iface.hpp` leading to compile time errors and dangerous runtime behavior: - Add missing includes - Use `std::lock_guard` to avoid a deadlock on exception - Add NVIDIA header - Avoid an extra stream sync during search. Authors: - Artem M. Chirkin (https://github.com/achirkin) Approvers: - Victor Lafargue (https://github.com/viclafargue) - Corey J. Nolet (https://github.com/cjnolet) - Ben Frederickson (https://github.com/benfred) URL: https://github.com/rapidsai/cuvs/pull/467 --- cpp/src/neighbors/cagra_c.cpp | 2 ++ cpp/src/neighbors/iface/iface.hpp | 53 +++++++++++++++++-------------- cpp/src/neighbors/ivf_flat_c.cpp | 2 ++ cpp/src/neighbors/mg/mg.cuh | 2 ++ examples/cpp/src/common.cuh | 4 +++ 5 files changed, 39 insertions(+), 24 deletions(-) mode change 100755 => 100644 cpp/src/neighbors/ivf_flat_c.cpp diff --git a/cpp/src/neighbors/cagra_c.cpp b/cpp/src/neighbors/cagra_c.cpp index 6985ff094..326a89665 100644 --- a/cpp/src/neighbors/cagra_c.cpp +++ b/cpp/src/neighbors/cagra_c.cpp @@ -29,6 +29,8 @@ #include #include +#include + namespace { template diff --git a/cpp/src/neighbors/iface/iface.hpp b/cpp/src/neighbors/iface/iface.hpp index a329db429..9b3da75a4 100644 --- a/cpp/src/neighbors/iface/iface.hpp +++ b/cpp/src/neighbors/iface/iface.hpp @@ -1,4 +1,20 @@ -#include +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once #include #include @@ -6,6 +22,9 @@ #include #include +#include +#include + namespace cuvs::neighbors { using namespace raft; @@ -16,7 +35,7 @@ void build(const raft::device_resources& handle, const cuvs::neighbors::index_params* index_params, raft::mdspan, row_major, Accessor> index_dataset) { - interface.mutex_->lock(); + std::lock_guard(*interface.mutex_); if constexpr (std::is_same>::value) { auto idx = cuvs::neighbors::ivf_flat::build( @@ -32,8 +51,6 @@ void build(const raft::device_resources& handle, interface.index_.emplace(std::move(idx)); } resource::sync_stream(handle); - - interface.mutex_->unlock(); } template @@ -44,7 +61,7 @@ void extend( std::optional, layout_c_contiguous, Accessor2>> new_indices) { - interface.mutex_->lock(); + std::lock_guard(*interface.mutex_); if constexpr (std::is_same>::value) { auto idx = @@ -58,8 +75,6 @@ void extend( RAFT_FAIL("CAGRA does not implement the extend method"); } resource::sync_stream(handle); - - interface.mutex_->unlock(); } template @@ -70,7 +85,7 @@ void search(const raft::device_resources& handle, raft::device_matrix_view neighbors, raft::device_matrix_view distances) { - // interface.mutex_->lock(); + // std::lock_guard(*interface.mutex_); if constexpr (std::is_same>::value) { cuvs::neighbors::ivf_flat::search( handle, @@ -94,9 +109,7 @@ void search(const raft::device_resources& handle, neighbors, distances); } - resource::sync_stream(handle); - - // interface.mutex_->unlock(); + // resource::sync_stream(handle); } // for MG ANN only @@ -108,7 +121,7 @@ void search(const raft::device_resources& handle, raft::device_matrix_view d_neighbors, raft::device_matrix_view d_distances) { - // interface.mutex_->lock(); + // std::lock_guard(*interface.mutex_); int64_t n_rows = h_queries.extent(0); int64_t n_dims = h_queries.extent(1); @@ -120,8 +133,6 @@ void search(const raft::device_resources& handle, auto d_query_view = raft::make_const_mdspan(d_queries.view()); search(handle, interface, search_params, d_query_view, d_neighbors, d_distances); - - // interface.mutex_->unlock(); } template @@ -129,7 +140,7 @@ void serialize(const raft::device_resources& handle, const cuvs::neighbors::iface& interface, std::ostream& os) { - interface.mutex_->lock(); + std::lock_guard(*interface.mutex_); if constexpr (std::is_same>::value) { ivf_flat::serialize(handle, os, interface.index_.value()); @@ -138,8 +149,6 @@ void serialize(const raft::device_resources& handle, } else if constexpr (std::is_same>::value) { cagra::serialize(handle, os, interface.index_.value(), true); } - - interface.mutex_->unlock(); } template @@ -147,7 +156,7 @@ void deserialize(const raft::device_resources& handle, cuvs::neighbors::iface& interface, std::istream& is) { - interface.mutex_->lock(); + std::lock_guard(*interface.mutex_); if constexpr (std::is_same>::value) { ivf_flat::index idx(handle); @@ -162,8 +171,6 @@ void deserialize(const raft::device_resources& handle, cagra::deserialize(handle, is, &idx); interface.index_.emplace(std::move(idx)); } - - interface.mutex_->unlock(); } template @@ -171,7 +178,7 @@ void deserialize(const raft::device_resources& handle, cuvs::neighbors::iface& interface, const std::string& filename) { - interface.mutex_->lock(); + std::lock_guard(*interface.mutex_); std::ifstream is(filename, std::ios::in | std::ios::binary); if (!is) { RAFT_FAIL("Cannot open file %s", filename.c_str()); } @@ -191,8 +198,6 @@ void deserialize(const raft::device_resources& handle, } is.close(); - - interface.mutex_->unlock(); } -}; // namespace cuvs::neighbors \ No newline at end of file +}; // namespace cuvs::neighbors diff --git a/cpp/src/neighbors/ivf_flat_c.cpp b/cpp/src/neighbors/ivf_flat_c.cpp old mode 100755 new mode 100644 index c14c1edc0..2acc6b678 --- a/cpp/src/neighbors/ivf_flat_c.cpp +++ b/cpp/src/neighbors/ivf_flat_c.cpp @@ -29,6 +29,8 @@ #include #include +#include + namespace { template diff --git a/cpp/src/neighbors/mg/mg.cuh b/cpp/src/neighbors/mg/mg.cuh index d3f635bc4..e9cdc30f6 100644 --- a/cpp/src/neighbors/mg/mg.cuh +++ b/cpp/src/neighbors/mg/mg.cuh @@ -25,6 +25,8 @@ #include #include +#include + namespace cuvs::neighbors { using namespace raft; diff --git a/examples/cpp/src/common.cuh b/examples/cpp/src/common.cuh index 1c93dec0e..8e109a764 100644 --- a/examples/cpp/src/common.cuh +++ b/examples/cpp/src/common.cuh @@ -14,6 +14,8 @@ * limitations under the License. */ +#pragma once + #include #include #include @@ -28,6 +30,8 @@ #include #include +#include + // Fill dataset and queries with synthetic data. void generate_dataset(raft::device_resources const &dev_resources, raft::device_matrix_view dataset, From 7ab2bfdd250613137a5622471212dab528319306 Mon Sep 17 00:00:00 2001 From: Divye Gala Date: Fri, 15 Nov 2024 12:16:17 -0500 Subject: [PATCH 07/15] Add `InnerProduct` and `CosineExpanded` metric support in NN Descent (#177) Closes #171 Authors: - Divye Gala (https://github.com/divyegala) Approvers: - Corey J. Nolet (https://github.com/cjnolet) URL: https://github.com/rapidsai/cuvs/pull/177 --- cpp/CMakeLists.txt | 1 + cpp/include/cuvs/neighbors/nn_descent.hpp | 24 ++--- .../neighbors/detail/cagra/cagra_build.cuh | 12 ++- cpp/src/neighbors/detail/nn_descent.cuh | 87 +++++++++++++------ cpp/src/neighbors/nn_descent_index.cpp | 29 +++++++ cpp/test/neighbors/ann_cagra.cuh | 10 +-- cpp/test/neighbors/ann_nn_descent.cuh | 32 ++++--- python/cuvs/cuvs/test/test_cagra.py | 4 +- python/cuvs/cuvs/test/test_hnsw.py | 4 +- 9 files changed, 139 insertions(+), 64 deletions(-) create mode 100644 cpp/src/neighbors/nn_descent_index.cpp diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index c493af488..81b82aa7b 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -436,6 +436,7 @@ if(BUILD_SHARED_LIBS) src/neighbors/nn_descent.cu src/neighbors/nn_descent_float.cu src/neighbors/nn_descent_half.cu + src/neighbors/nn_descent_index.cpp src/neighbors/nn_descent_int8.cu src/neighbors/nn_descent_uint8.cu src/neighbors/reachability.cu diff --git a/cpp/include/cuvs/neighbors/nn_descent.hpp b/cpp/include/cuvs/neighbors/nn_descent.hpp index bd41d1ff7..9cd8192b5 100644 --- a/cpp/include/cuvs/neighbors/nn_descent.hpp +++ b/cpp/include/cuvs/neighbors/nn_descent.hpp @@ -61,11 +61,10 @@ struct index_params : cuvs::neighbors::index_params { /** @brief Construct NN descent parameters for a specific kNN graph degree * * @param graph_degree output graph degree + * @param metric distance metric to use */ - index_params(size_t graph_degree = 64) - : graph_degree(graph_degree), intermediate_graph_degree(1.5 * graph_degree) - { - } + index_params(size_t graph_degree = 64, + cuvs::distance::DistanceType metric = cuvs::distance::DistanceType::L2Expanded); }; /** @@ -103,11 +102,16 @@ struct index : cuvs::neighbors::index { * @param n_rows number of rows in knn-graph * @param n_cols number of cols in knn-graph * @param return_distances whether to return distances + * @param metric distance metric to use */ - index(raft::resources const& res, int64_t n_rows, int64_t n_cols, bool return_distances = false) + index(raft::resources const& res, + int64_t n_rows, + int64_t n_cols, + bool return_distances = false, + cuvs::distance::DistanceType metric = cuvs::distance::DistanceType::L2Expanded) : cuvs::neighbors::index(), res_{res}, - metric_{cuvs::distance::DistanceType::L2Expanded}, + metric_{metric}, graph_{raft::make_host_matrix(n_rows, n_cols)}, graph_view_{graph_.view()}, return_distances_{return_distances} @@ -129,14 +133,16 @@ struct index : cuvs::neighbors::index { * @param graph_view raft::host_matrix_view for storing knn-graph * @param distances_view optional raft::device_matrix_view for storing * distances + * @param metric distance metric to use */ index(raft::resources const& res, raft::host_matrix_view graph_view, std::optional> distances_view = - std::nullopt) + std::nullopt, + cuvs::distance::DistanceType metric = cuvs::distance::DistanceType::L2Expanded) : cuvs::neighbors::index(), res_{res}, - metric_{cuvs::distance::DistanceType::L2Expanded}, + metric_{metric}, graph_{raft::make_host_matrix(0, 0)}, graph_view_{graph_view}, distances_view_{distances_view}, @@ -473,8 +479,6 @@ auto build(raft::resources const& res, std::optional> graph = std::nullopt) -> cuvs::neighbors::nn_descent::index; -/** @} */ - /** * @brief Test if we have enough GPU memory to run NN descent algorithm. * diff --git a/cpp/src/neighbors/detail/cagra/cagra_build.cuh b/cpp/src/neighbors/detail/cagra/cagra_build.cuh index 6209ff819..b7fec724b 100644 --- a/cpp/src/neighbors/detail/cagra/cagra_build.cuh +++ b/cpp/src/neighbors/detail/cagra/cagra_build.cuh @@ -436,11 +436,11 @@ index build( auto knn_build_params = params.graph_build_params; if (std::holds_alternative(params.graph_build_params)) { // Heuristic to decide default build algo and its params. - if (params.metric == cuvs::distance::DistanceType::L2Expanded && - cuvs::neighbors::nn_descent::has_enough_device_memory( + if (cuvs::neighbors::nn_descent::has_enough_device_memory( res, dataset.extents(), sizeof(IdxT))) { RAFT_LOG_DEBUG("NN descent solver"); - knn_build_params = cagra::graph_build_params::nn_descent_params(intermediate_degree); + knn_build_params = + cagra::graph_build_params::nn_descent_params(intermediate_degree, params.metric); } else { RAFT_LOG_DEBUG("Selecting IVF-PQ solver"); knn_build_params = cagra::graph_build_params::ivf_pq_params(dataset.extents(), params.metric); @@ -453,9 +453,6 @@ index build( std::get(knn_build_params); build_knn_graph(res, dataset, knn_graph->view(), ivf_pq_params); } else { - RAFT_EXPECTS( - params.metric == cuvs::distance::DistanceType::L2Expanded, - "L2Expanded is the only distance metrics supported for CAGRA build with nn_descent"); auto nn_descent_params = std::get(knn_build_params); @@ -466,7 +463,8 @@ index build( "nn-descent graph_degree.", nn_descent_params.graph_degree, intermediate_degree); - nn_descent_params = cagra::graph_build_params::nn_descent_params(intermediate_degree); + nn_descent_params = + cagra::graph_build_params::nn_descent_params(intermediate_degree, params.metric); } // Use nn-descent to build CAGRA knn graph diff --git a/cpp/src/neighbors/detail/nn_descent.cuh b/cpp/src/neighbors/detail/nn_descent.cuh index 883d82d76..c62a52540 100644 --- a/cpp/src/neighbors/detail/nn_descent.cuh +++ b/cpp/src/neighbors/detail/nn_descent.cuh @@ -19,6 +19,7 @@ #include "ann_utils.cuh" #include "cagra/device_common.hpp" +#include #include #include @@ -216,6 +217,7 @@ struct BuildConfig { size_t max_iterations{50}; float termination_threshold{0.0001}; size_t output_graph_degree{32}; + cuvs::distance::DistanceType metric{cuvs::distance::DistanceType::L2Expanded}; }; template @@ -454,11 +456,13 @@ __device__ __forceinline__ void load_vec(Data_t* vec_buffer, // TODO: Replace with RAFT utilities https://github.com/rapidsai/raft/issues/1827 /** Calculate L2 norm, and cast data to __half */ template -RAFT_KERNEL preprocess_data_kernel(const Data_t* input_data, - __half* output_data, - int dim, - DistData_t* l2_norms, - size_t list_offset = 0) +RAFT_KERNEL preprocess_data_kernel( + const Data_t* input_data, + __half* output_data, + int dim, + DistData_t* l2_norms, + size_t list_offset = 0, + cuvs::distance::DistanceType metric = cuvs::distance::DistanceType::L2Expanded) { extern __shared__ char buffer[]; __shared__ float l2_norm; @@ -468,26 +472,32 @@ RAFT_KERNEL preprocess_data_kernel(const Data_t* input_data, load_vec(s_vec, input_data + blockIdx.x * dim, dim, dim, threadIdx.x % raft::warp_size()); if (threadIdx.x == 0) { l2_norm = 0; } __syncthreads(); - int lane_id = threadIdx.x % raft::warp_size(); - for (int step = 0; step < raft::ceildiv(dim, raft::warp_size()); step++) { - int idx = step * raft::warp_size() + lane_id; - float part_dist = 0; - if (idx < dim) { - part_dist = s_vec[idx]; - part_dist = part_dist * part_dist; - } - __syncwarp(); - for (int offset = raft::warp_size() >> 1; offset >= 1; offset >>= 1) { - part_dist += __shfl_down_sync(raft::warp_full_mask(), part_dist, offset); + + if (metric == cuvs::distance::DistanceType::L2Expanded || + metric == cuvs::distance::DistanceType::CosineExpanded) { + int lane_id = threadIdx.x % raft::warp_size(); + for (int step = 0; step < raft::ceildiv(dim, raft::warp_size()); step++) { + int idx = step * raft::warp_size() + lane_id; + float part_dist = 0; + if (idx < dim) { + part_dist = s_vec[idx]; + part_dist = part_dist * part_dist; + } + __syncwarp(); + for (int offset = raft::warp_size() >> 1; offset >= 1; offset >>= 1) { + part_dist += __shfl_down_sync(raft::warp_full_mask(), part_dist, offset); + } + if (lane_id == 0) { l2_norm += part_dist; } + __syncwarp(); } - if (lane_id == 0) { l2_norm += part_dist; } - __syncwarp(); } for (int step = 0; step < raft::ceildiv(dim, raft::warp_size()); step++) { int idx = step * raft::warp_size() + threadIdx.x; if (idx < dim) { - if (l2_norms == nullptr) { + if (metric == cuvs::distance::DistanceType::InnerProduct) { + output_data[list_id * dim + idx] = input_data[(size_t)blockIdx.x * dim + idx]; + } else if (metric == cuvs::distance::DistanceType::CosineExpanded) { output_data[list_id * dim + idx] = (float)input_data[(size_t)blockIdx.x * dim + idx] / sqrt(l2_norm); } else { @@ -715,7 +725,8 @@ __launch_bounds__(BLOCK_SIZE, 4) DistData_t* dists, int graph_width, int* locks, - DistData_t* l2_norms) + DistData_t* l2_norms, + cuvs::distance::DistanceType metric) { #if (__CUDA_ARCH__ >= 700) using namespace nvcuda; @@ -827,8 +838,10 @@ __launch_bounds__(BLOCK_SIZE, 4) for (int i = threadIdx.x; i < MAX_NUM_BI_SAMPLES * SKEWED_MAX_NUM_BI_SAMPLES; i += blockDim.x) { if (i % SKEWED_MAX_NUM_BI_SAMPLES < list_new_size && i / SKEWED_MAX_NUM_BI_SAMPLES < list_new_size) { - if (l2_norms == nullptr) { + if (metric == cuvs::distance::DistanceType::InnerProduct) { s_distances[i] = -s_distances[i]; + } else if (metric == cuvs::distance::DistanceType::CosineExpanded) { + s_distances[i] = 1.0 - s_distances[i]; } else { s_distances[i] = l2_norms[new_neighbors[i % SKEWED_MAX_NUM_BI_SAMPLES]] + l2_norms[new_neighbors[i / SKEWED_MAX_NUM_BI_SAMPLES]] - @@ -906,8 +919,10 @@ __launch_bounds__(BLOCK_SIZE, 4) for (int i = threadIdx.x; i < MAX_NUM_BI_SAMPLES * SKEWED_MAX_NUM_BI_SAMPLES; i += blockDim.x) { if (i % SKEWED_MAX_NUM_BI_SAMPLES < list_old_size && i / SKEWED_MAX_NUM_BI_SAMPLES < list_new_size) { - if (l2_norms == nullptr) { + if (metric == cuvs::distance::DistanceType::InnerProduct) { s_distances[i] = -s_distances[i]; + } else if (metric == cuvs::distance::DistanceType::CosineExpanded) { + s_distances[i] = 1.0 - s_distances[i]; } else { s_distances[i] = l2_norms[old_neighbors[i % SKEWED_MAX_NUM_BI_SAMPLES]] + l2_norms[new_neighbors[i / SKEWED_MAX_NUM_BI_SAMPLES]] - @@ -1161,7 +1176,7 @@ GNND::GNND(raft::resources const& res, const BuildConfig& build ndim_(build_config.dataset_dim), d_data_{raft::make_device_matrix<__half, size_t, raft::row_major>( res, nrow_, build_config.dataset_dim)}, - l2_norms_{raft::make_device_vector(res, nrow_)}, + l2_norms_{raft::make_device_vector(res, 0)}, graph_buffer_{ raft::make_device_matrix(res, nrow_, DEGREE_ON_DEVICE)}, dists_buffer_{ @@ -1181,11 +1196,16 @@ GNND::GNND(raft::resources const& res, const BuildConfig& build d_list_sizes_old_{raft::make_device_vector(res, nrow_)} { static_assert(NUM_SAMPLES <= 32); + raft::matrix::fill(res, dists_buffer_.view(), std::numeric_limits::max()); auto graph_buffer_view = raft::make_device_matrix_view( reinterpret_cast(graph_buffer_.data_handle()), nrow_, DEGREE_ON_DEVICE); raft::matrix::fill(res, graph_buffer_view, std::numeric_limits::max()); raft::matrix::fill(res, d_locks_.view(), 0); + + if (build_config.metric == cuvs::distance::DistanceType::L2Expanded) { + l2_norms_ = raft::make_device_vector(res, nrow_); + } }; template @@ -1228,7 +1248,8 @@ void GNND::local_join(cudaStream_t stream) dists_buffer_.data_handle(), DEGREE_ON_DEVICE, d_locks_.data_handle(), - l2_norms_.data_handle()); + l2_norms_.data_handle(), + build_config_.metric); } template @@ -1261,7 +1282,8 @@ void GNND::build(Data_t* data, d_data_.data_handle(), build_config_.dataset_dim, l2_norms_.data_handle(), - batch.offset()); + batch.offset(), + build_config_.metric); } graph_.clear(); @@ -1417,6 +1439,11 @@ void build(raft::resources const& res, RAFT_EXPECTS(dataset.extent(0) < std::numeric_limits::max() - 1, "The dataset size for GNND should be less than %d", std::numeric_limits::max() - 1); + auto allowed_metrics = params.metric == cuvs::distance::DistanceType::L2Expanded || + params.metric == cuvs::distance::DistanceType::CosineExpanded || + params.metric == cuvs::distance::DistanceType::InnerProduct; + RAFT_EXPECTS(allowed_metrics && idx.metric() == params.metric, + "The metric for NN Descent should be L2Expanded, CosineExpanded or InnerProduct"); size_t intermediate_degree = params.intermediate_graph_degree; size_t graph_degree = params.graph_degree; @@ -1452,7 +1479,8 @@ void build(raft::resources const& res, .internal_node_degree = extended_intermediate_degree, .max_iterations = params.max_iterations, .termination_threshold = params.termination_threshold, - .output_graph_degree = params.graph_degree}; + .output_graph_degree = params.graph_degree, + .metric = params.metric}; GNND nnd(res, build_config); @@ -1500,8 +1528,11 @@ index build( graph_degree = intermediate_degree; } - index idx{ - res, dataset.extent(0), static_cast(graph_degree), params.return_distances}; + index idx{res, + dataset.extent(0), + static_cast(graph_degree), + params.return_distances, + params.metric}; build(res, params, dataset, idx); diff --git a/cpp/src/neighbors/nn_descent_index.cpp b/cpp/src/neighbors/nn_descent_index.cpp new file mode 100644 index 000000000..25d5b6af8 --- /dev/null +++ b/cpp/src/neighbors/nn_descent_index.cpp @@ -0,0 +1,29 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include + +namespace cuvs::neighbors::nn_descent { + +index_params::index_params(size_t graph_degree, cuvs::distance::DistanceType metric) +{ + this->graph_degree = graph_degree; + this->intermediate_graph_degree = 1.5 * graph_degree; + this->metric = metric; +} +} // namespace cuvs::neighbors::nn_descent \ No newline at end of file diff --git a/cpp/test/neighbors/ann_cagra.cuh b/cpp/test/neighbors/ann_cagra.cuh index 37d42dd1d..660246c67 100644 --- a/cpp/test/neighbors/ann_cagra.cuh +++ b/cpp/test/neighbors/ann_cagra.cuh @@ -361,8 +361,8 @@ class AnnCagraTest : public ::testing::TestWithParam { // not used for knn_graph building. switch (ps.build_algo) { case graph_build_algo::IVF_PQ: - index_params.graph_build_params = - graph_build_params::ivf_pq_params(raft::matrix_extent(ps.n_rows, ps.dim)); + index_params.graph_build_params = graph_build_params::ivf_pq_params( + raft::matrix_extent(ps.n_rows, ps.dim), index_params.metric); if (ps.ivf_pq_search_refine_ratio) { std::get( index_params.graph_build_params) @@ -370,8 +370,8 @@ class AnnCagraTest : public ::testing::TestWithParam { } break; case graph_build_algo::NN_DESCENT: { - index_params.graph_build_params = - graph_build_params::nn_descent_params(index_params.intermediate_graph_degree); + index_params.graph_build_params = graph_build_params::nn_descent_params( + index_params.intermediate_graph_degree, index_params.metric); break; } case graph_build_algo::AUTO: @@ -389,7 +389,7 @@ class AnnCagraTest : public ::testing::TestWithParam { (const DataT*)database.data(), ps.n_rows, ps.dim); { - cagra::index index(handle_); + cagra::index index(handle_, index_params.metric); if (ps.host_dataset) { auto database_host = raft::make_host_matrix(ps.n_rows, ps.dim); raft::copy(database_host.data_handle(), database.data(), database.size(), stream_); diff --git a/cpp/test/neighbors/ann_nn_descent.cuh b/cpp/test/neighbors/ann_nn_descent.cuh index 7d2575c2b..09861a219 100644 --- a/cpp/test/neighbors/ann_nn_descent.cuh +++ b/cpp/test/neighbors/ann_nn_descent.cuh @@ -27,6 +27,7 @@ #include #include "naive_knn.cuh" +#include #include @@ -107,7 +108,6 @@ class AnnNNDescentTest : public ::testing::TestWithParam { raft::update_host(distances_naive.data(), distances_naive_dev.data(), queries_size, stream_); raft::resource::sync_stream(handle_); } - { { nn_descent::index_params index_params; @@ -124,6 +124,7 @@ class AnnNNDescentTest : public ::testing::TestWithParam { if (ps.host_dataset) { auto database_host = raft::make_host_matrix(ps.n_rows, ps.dim); raft::copy(database_host.data_handle(), database.data(), database.size(), stream_); + raft::resource::sync_stream(handle_); auto database_host_view = raft::make_host_matrix_view( (const DataT*)database_host.data_handle(), ps.n_rows, ps.dim); auto index = nn_descent::build(handle_, index_params, database_host_view); @@ -151,6 +152,13 @@ class AnnNNDescentTest : public ::testing::TestWithParam { raft::resource::sync_stream(handle_); } + if (ps.metric == cuvs::distance::DistanceType::InnerProduct) { + std::transform( + distances_naive.begin(), distances_naive.end(), distances_naive.begin(), [](auto x) { + return -x; + }); + } + double min_recall = ps.min_recall; EXPECT_TRUE(eval_neighbours(indices_naive, indices_NNDescent, @@ -169,9 +177,11 @@ class AnnNNDescentTest : public ::testing::TestWithParam { raft::random::RngState r(1234ULL); if constexpr (std::is_same{}) { raft::random::normal(handle_, r, database.data(), ps.n_rows * ps.dim, DataT(0.1), DataT(2.0)); - } else { + } else if constexpr (std::is_same{}) { raft::random::uniformInt( - handle_, r, database.data(), ps.n_rows * ps.dim, DataT(1), DataT(20)); + handle_, r, database.data(), ps.n_rows * ps.dim, DataT(-5), DataT(5)); + } else { + raft::random::uniformInt(handle_, r, database.data(), ps.n_rows * ps.dim, DataT(0), DataT(5)); } raft::resource::sync_stream(handle_); } @@ -308,13 +318,15 @@ class AnnNNDescentBatchTest : public ::testing::TestWithParam database; }; -const std::vector inputs = raft::util::itertools::product( - {1000, 2000}, // n_rows - {3, 5, 7, 8, 17, 64, 128, 137, 192, 256, 512, 619, 1024}, // dim - {32, 64}, // graph_degree - {cuvs::distance::DistanceType::L2Expanded}, - {false, true}, - {0.90}); +const std::vector inputs = + raft::util::itertools::product({2000, 4000}, // n_rows + {4, 16, 64, 256, 1024}, // dim + {32, 64}, // graph_degree + {cuvs::distance::DistanceType::L2Expanded, + cuvs::distance::DistanceType::InnerProduct, + cuvs::distance::DistanceType::CosineExpanded}, + {false, true}, + {0.90}); // TODO : Investigate why this test is failing Reference issue https // : // github.com/rapidsai/raft/issues/2450 diff --git a/python/cuvs/cuvs/test/test_cagra.py b/python/cuvs/cuvs/test/test_cagra.py index 92b88f013..56e132c23 100644 --- a/python/cuvs/cuvs/test/test_cagra.py +++ b/python/cuvs/cuvs/test/test_cagra.py @@ -122,8 +122,9 @@ def run_cagra_build_search_test( @pytest.mark.parametrize("dtype", [np.float32, np.int8, np.uint8]) @pytest.mark.parametrize("array_type", ["device", "host"]) @pytest.mark.parametrize("build_algo", ["ivf_pq", "nn_descent"]) +@pytest.mark.parametrize("metric", ["euclidean"]) def test_cagra_dataset_dtype_host_device( - dtype, array_type, inplace, build_algo + dtype, array_type, inplace, build_algo, metric ): # Note that inner_product tests use normalized input which we cannot # represent in int8, therefore we test only sqeuclidean metric here. @@ -132,6 +133,7 @@ def test_cagra_dataset_dtype_host_device( inplace=inplace, array_type=array_type, build_algo=build_algo, + metric=metric, ) diff --git a/python/cuvs/cuvs/test/test_hnsw.py b/python/cuvs/cuvs/test/test_hnsw.py index 8bd2e8b76..20a35401e 100644 --- a/python/cuvs/cuvs/test/test_hnsw.py +++ b/python/cuvs/cuvs/test/test_hnsw.py @@ -41,8 +41,6 @@ def run_hnsw_build_search_test( pytest.skip( "inner_product metric is not supported for int8/uint8 data" ) - if build_algo == "nn_descent": - pytest.skip("inner_product metric is not supported for nn_descent") build_params = cagra.IndexParams( metric=metric, @@ -83,7 +81,7 @@ def run_hnsw_build_search_test( @pytest.mark.parametrize("k", [10, 20]) @pytest.mark.parametrize("ef", [30, 40]) @pytest.mark.parametrize("num_threads", [2, 4]) -@pytest.mark.parametrize("metric", ["sqeuclidean"]) +@pytest.mark.parametrize("metric", ["sqeuclidean", "inner_product"]) @pytest.mark.parametrize("build_algo", ["ivf_pq", "nn_descent"]) def test_hnsw(dtype, k, ef, num_threads, metric, build_algo): # Note that inner_product tests use normalized input which we cannot From 7b879116684501f36ca5a19a74c01fcecb52e962 Mon Sep 17 00:00:00 2001 From: James Lamb Date: Fri, 15 Nov 2024 16:12:42 -0600 Subject: [PATCH 08/15] use different wheel-size thresholds based on CUDA version (#469) `cuvs-cu11` wheels are significantly larger than `cuvs-cu12` wheels, because (among other reasons) they are not able to dynamically link to CUDA math library wheels. In #464, I proposed a size limit for CI checks of "max CUDA 11 wheel size + a buffer". This PR proposes using different thresholds based on CUDA major version, following these discussions: * https://github.com/rapidsai/cugraph/pull/4754#discussion_r1842526907 * https://github.com/rapidsai/cuml/pull/6136#discussion_r1841774811 Authors: - James Lamb (https://github.com/jameslamb) Approvers: - Mike Sarahan (https://github.com/msarahan) URL: https://github.com/rapidsai/cuvs/pull/469 --- ci/validate_wheel.sh | 14 ++++++++++++++ python/cuvs/pyproject.toml | 4 +--- 2 files changed, 15 insertions(+), 3 deletions(-) diff --git a/ci/validate_wheel.sh b/ci/validate_wheel.sh index 5910a5c59..f2b235765 100755 --- a/ci/validate_wheel.sh +++ b/ci/validate_wheel.sh @@ -6,12 +6,26 @@ set -euo pipefail package_dir=$1 wheel_dir_relative_path=$2 +RAPIDS_CUDA_MAJOR="${RAPIDS_CUDA_VERSION%%.*}" + +# some packages are much larger on CUDA 11 than on CUDA 12 +if [[ "${RAPIDS_CUDA_MAJOR}" == "11" ]]; then + PYDISTCHECK_ARGS=( + --max-allowed-size-compressed '1.4G' + ) +else + PYDISTCHECK_ARGS=( + --max-allowed-size-compressed '950M' + ) +fi + cd "${package_dir}" rapids-logger "validate packages with 'pydistcheck'" pydistcheck \ --inspect \ + "${PYDISTCHECK_ARGS[@]}" \ "$(echo ${wheel_dir_relative_path}/*.whl)" rapids-logger "validate packages with 'twine'" diff --git a/python/cuvs/pyproject.toml b/python/cuvs/pyproject.toml index d40026776..92e4993c7 100644 --- a/python/cuvs/pyproject.toml +++ b/python/cuvs/pyproject.toml @@ -135,12 +135,10 @@ matrix-entry = "cuda_suffixed=true;use_cuda_wheels=true" [tool.pydistcheck] select = [ + # NOTE: size threshold is managed via CLI args in CI scripts "distro-too-large-compressed", ] -# detect when package size grows significantly -max_allowed_size_compressed = '1.4G' - [tool.pytest.ini_options] filterwarnings = [ "error", From 27d45533d91f13ce00eabed409468a2b47452f4d Mon Sep 17 00:00:00 2001 From: Ben Frederickson Date: Mon, 18 Nov 2024 14:55:13 -0800 Subject: [PATCH 09/15] Move check_input_array from pylibraft (#474) With the changes in https://github.com/rapidsai/raft/pull/2498 we no longer have a pylibraft.neighbors module - but were still using a utility function `_check_input_array` from it in cuvs. Move this over to cuvs to unblock ci Authors: - Ben Frederickson (https://github.com/benfred) Approvers: - Corey J. Nolet (https://github.com/cjnolet) URL: https://github.com/rapidsai/cuvs/pull/474 --- .../neighbors/brute_force/brute_force.pyx | 2 +- python/cuvs/cuvs/neighbors/cagra/cagra.pyx | 3 +- python/cuvs/cuvs/neighbors/common.py | 36 +++++++++++++++++++ .../cuvs/cuvs/neighbors/filters/filters.pyx | 2 +- python/cuvs/cuvs/neighbors/hnsw/hnsw.pyx | 2 +- .../cuvs/cuvs/neighbors/ivf_flat/ivf_flat.pyx | 2 +- python/cuvs/cuvs/neighbors/ivf_pq/ivf_pq.pyx | 2 +- python/cuvs/cuvs/neighbors/refine.pyx | 2 +- 8 files changed, 44 insertions(+), 7 deletions(-) create mode 100644 python/cuvs/cuvs/neighbors/common.py diff --git a/python/cuvs/cuvs/neighbors/brute_force/brute_force.pyx b/python/cuvs/cuvs/neighbors/brute_force/brute_force.pyx index 559302ccc..9d1d24eae 100644 --- a/python/cuvs/cuvs/neighbors/brute_force/brute_force.pyx +++ b/python/cuvs/cuvs/neighbors/brute_force/brute_force.pyx @@ -31,9 +31,9 @@ from cuvs.distance_type cimport cuvsDistanceType from pylibraft.common import auto_convert_output, cai_wrapper, device_ndarray from pylibraft.common.cai_wrapper import wrap_array from pylibraft.common.interruptible import cuda_interruptible -from pylibraft.neighbors.common import _check_input_array from cuvs.distance import DISTANCE_TYPES +from cuvs.neighbors.common import _check_input_array from cuvs.common.c_api cimport cuvsResources_t diff --git a/python/cuvs/cuvs/neighbors/cagra/cagra.pyx b/python/cuvs/cuvs/neighbors/cagra/cagra.pyx index 95209dbeb..752aef741 100644 --- a/python/cuvs/cuvs/neighbors/cagra/cagra.pyx +++ b/python/cuvs/cuvs/neighbors/cagra/cagra.pyx @@ -32,7 +32,8 @@ from cuvs.common cimport cydlpack from pylibraft.common import auto_convert_output, cai_wrapper, device_ndarray from pylibraft.common.cai_wrapper import wrap_array from pylibraft.common.interruptible import cuda_interruptible -from pylibraft.neighbors.common import _check_input_array + +from cuvs.neighbors.common import _check_input_array from libc.stdint cimport ( int8_t, diff --git a/python/cuvs/cuvs/neighbors/common.py b/python/cuvs/cuvs/neighbors/common.py new file mode 100644 index 000000000..c14b9f8c9 --- /dev/null +++ b/python/cuvs/cuvs/neighbors/common.py @@ -0,0 +1,36 @@ +# +# Copyright (c) 2024, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def _check_input_array(cai, exp_dt, exp_rows=None, exp_cols=None): + if cai.dtype not in exp_dt: + raise TypeError("dtype %s not supported" % cai.dtype) + + if not cai.c_contiguous: + raise ValueError("Row major input is expected") + + if exp_cols is not None and cai.shape[1] != exp_cols: + raise ValueError( + "Incorrect number of columns, expected {} got {}".format( + exp_cols, cai.shape[1] + ) + ) + + if exp_rows is not None and cai.shape[0] != exp_rows: + raise ValueError( + "Incorrect number of rows, expected {} , got {}".format( + exp_rows, cai.shape[0] + ) + ) diff --git a/python/cuvs/cuvs/neighbors/filters/filters.pyx b/python/cuvs/cuvs/neighbors/filters/filters.pyx index 3a81cb786..9bc2a905c 100644 --- a/python/cuvs/cuvs/neighbors/filters/filters.pyx +++ b/python/cuvs/cuvs/neighbors/filters/filters.pyx @@ -20,11 +20,11 @@ import numpy as np from libc.stdint cimport uintptr_t from cuvs.common cimport cydlpack +from cuvs.neighbors.common import _check_input_array from .filters cimport BITMAP, NO_FILTER, cuvsFilter from pylibraft.common.cai_wrapper import wrap_array -from pylibraft.neighbors.common import _check_input_array cdef class Prefilter: diff --git a/python/cuvs/cuvs/neighbors/hnsw/hnsw.pyx b/python/cuvs/cuvs/neighbors/hnsw/hnsw.pyx index 018fcfef9..bcfaf167e 100644 --- a/python/cuvs/cuvs/neighbors/hnsw/hnsw.pyx +++ b/python/cuvs/cuvs/neighbors/hnsw/hnsw.pyx @@ -21,6 +21,7 @@ from libcpp.string cimport string from cuvs.common.exceptions import check_cuvs from cuvs.common.resources import auto_sync_resources +from cuvs.neighbors.common import _check_input_array from cuvs.common cimport cydlpack @@ -36,7 +37,6 @@ import uuid from pylibraft.common import auto_convert_output from pylibraft.common.cai_wrapper import wrap_array from pylibraft.common.interruptible import cuda_interruptible -from pylibraft.neighbors.common import _check_input_array cdef class SearchParams: diff --git a/python/cuvs/cuvs/neighbors/ivf_flat/ivf_flat.pyx b/python/cuvs/cuvs/neighbors/ivf_flat/ivf_flat.pyx index 25b9b2aee..7a169e1a0 100644 --- a/python/cuvs/cuvs/neighbors/ivf_flat/ivf_flat.pyx +++ b/python/cuvs/cuvs/neighbors/ivf_flat/ivf_flat.pyx @@ -31,9 +31,9 @@ from cuvs.distance_type cimport cuvsDistanceType from pylibraft.common import auto_convert_output, cai_wrapper, device_ndarray from pylibraft.common.cai_wrapper import wrap_array from pylibraft.common.interruptible import cuda_interruptible -from pylibraft.neighbors.common import _check_input_array from cuvs.distance import DISTANCE_TYPES +from cuvs.neighbors.common import _check_input_array from libc.stdint cimport ( int8_t, diff --git a/python/cuvs/cuvs/neighbors/ivf_pq/ivf_pq.pyx b/python/cuvs/cuvs/neighbors/ivf_pq/ivf_pq.pyx index 3add1df75..531302ee6 100644 --- a/python/cuvs/cuvs/neighbors/ivf_pq/ivf_pq.pyx +++ b/python/cuvs/cuvs/neighbors/ivf_pq/ivf_pq.pyx @@ -31,9 +31,9 @@ from cuvs.distance_type cimport cuvsDistanceType from pylibraft.common import auto_convert_output, cai_wrapper, device_ndarray from pylibraft.common.cai_wrapper import wrap_array from pylibraft.common.interruptible import cuda_interruptible -from pylibraft.neighbors.common import _check_input_array from cuvs.distance import DISTANCE_TYPES +from cuvs.neighbors.common import _check_input_array from libc.stdint cimport ( int8_t, diff --git a/python/cuvs/cuvs/neighbors/refine.pyx b/python/cuvs/cuvs/neighbors/refine.pyx index 0eccc4108..b7aa35dca 100644 --- a/python/cuvs/cuvs/neighbors/refine.pyx +++ b/python/cuvs/cuvs/neighbors/refine.pyx @@ -31,13 +31,13 @@ from cuvs.distance_type cimport cuvsDistanceType from pylibraft.common import auto_convert_output, device_ndarray from pylibraft.common.cai_wrapper import wrap_array from pylibraft.common.interruptible import cuda_interruptible -from pylibraft.neighbors.common import _check_input_array from cuvs.distance import DISTANCE_TYPES from cuvs.common.c_api cimport cuvsResources_t from cuvs.common.exceptions import check_cuvs +from cuvs.neighbors.common import _check_input_array @auto_sync_resources From f127b06b83e3c9e3c3034fdc902441edbf841b90 Mon Sep 17 00:00:00 2001 From: "Artem M. Chirkin" <9253178+achirkin@users.noreply.github.com> Date: Tue, 19 Nov 2024 14:01:22 +0100 Subject: [PATCH 10/15] Fix an OOB error in device-side cuvs::neighbors::refine and CAGRA kern_prune (#460) IVF-Flat index expects all valid indices during build, which may not be the case in the context of refinement. At the same time, `cagra::detail::graph::kern_prune` fails with OOB error if some indices are invalid. This PR tweaks both kernels to avoid touching the input data with an invalid index. Fixes https://github.com/rapidsai/cuvs/issues/337 Authors: - Artem M. Chirkin (https://github.com/achirkin) Approvers: - Micka (https://github.com/lowener) URL: https://github.com/rapidsai/cuvs/pull/460 --- cpp/src/neighbors/detail/cagra/graph_core.cuh | 1 + cpp/src/neighbors/ivf_flat/ivf_flat_build.cuh | 8 +- cpp/test/CMakeLists.txt | 1 + .../ann_cagra/bug_extreme_inputs_oob.cu | 73 +++++++++++++++++++ 4 files changed, 81 insertions(+), 2 deletions(-) create mode 100644 cpp/test/neighbors/ann_cagra/bug_extreme_inputs_oob.cu diff --git a/cpp/src/neighbors/detail/cagra/graph_core.cuh b/cpp/src/neighbors/detail/cagra/graph_core.cuh index 4253cb781..daeac82b9 100644 --- a/cpp/src/neighbors/detail/cagra/graph_core.cuh +++ b/cpp/src/neighbors/detail/cagra/graph_core.cuh @@ -156,6 +156,7 @@ __global__ void kern_prune(const IdxT* const knn_graph, // [graph_chunk_size, g // count number of detours (A->D->B) for (uint32_t kAD = 0; kAD < graph_degree - 1; kAD++) { const uint64_t iD = knn_graph[kAD + (graph_degree * iA)]; + if (iD >= graph_size) { continue; } for (uint32_t kDB = threadIdx.x; kDB < graph_degree; kDB += blockDim.x) { const uint64_t iB_candidate = knn_graph[kDB + ((uint64_t)graph_degree * iD)]; for (uint32_t kAB = kAD + 1; kAB < graph_degree; kAB++) { diff --git a/cpp/src/neighbors/ivf_flat/ivf_flat_build.cuh b/cpp/src/neighbors/ivf_flat/ivf_flat_build.cuh index fb110d810..d6ffc1218 100644 --- a/cpp/src/neighbors/ivf_flat/ivf_flat_build.cuh +++ b/cpp/src/neighbors/ivf_flat/ivf_flat_build.cuh @@ -132,6 +132,10 @@ RAFT_KERNEL build_index_kernel(const LabelT* labels, { const IdxT i = IdxT(blockDim.x) * IdxT(blockIdx.x) + threadIdx.x; if (i >= n_rows) { return; } + auto source_ix = source_ixs == nullptr ? i + batch_offset : source_ixs[i]; + // In the context of refinement, some indices may be invalid (the generating NN algorithm does + // not return enough valid items). Do not add the item to the index in this case. + if (source_ix == ivf::kInvalidRecord || source_ix == raft::upper_bound()) { return; } auto list_id = labels[i]; auto inlist_id = atomicAdd(list_sizes_ptr + list_id, 1); @@ -139,7 +143,7 @@ RAFT_KERNEL build_index_kernel(const LabelT* labels, auto* list_data = list_data_ptrs[list_id]; // Record the source vector id in the index - list_index[inlist_id] = source_ixs == nullptr ? i + batch_offset : source_ixs[i]; + list_index[inlist_id] = source_ix; // The data is written in interleaved groups of `index::kGroupSize` vectors using interleaved_group = raft::Pow2; @@ -151,7 +155,7 @@ RAFT_KERNEL build_index_kernel(const LabelT* labels, // Point to the source vector if constexpr (gather_src) { - source_vecs += source_ixs[i] * dim; + source_vecs += source_ix * dim; } else { source_vecs += i * dim; } diff --git a/cpp/test/CMakeLists.txt b/cpp/test/CMakeLists.txt index 1ed8466b3..7754a5043 100644 --- a/cpp/test/CMakeLists.txt +++ b/cpp/test/CMakeLists.txt @@ -137,6 +137,7 @@ if(BUILD_TESTS) NAME NEIGHBORS_ANN_CAGRA_TEST PATH + neighbors/ann_cagra/bug_extreme_inputs_oob.cu neighbors/ann_cagra/bug_multi_cta_crash.cu neighbors/ann_cagra/test_float_uint32_t.cu neighbors/ann_cagra/test_half_uint32_t.cu diff --git a/cpp/test/neighbors/ann_cagra/bug_extreme_inputs_oob.cu b/cpp/test/neighbors/ann_cagra/bug_extreme_inputs_oob.cu new file mode 100644 index 000000000..e21a54e9e --- /dev/null +++ b/cpp/test/neighbors/ann_cagra/bug_extreme_inputs_oob.cu @@ -0,0 +1,73 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include + +#include +#include +#include + +#include + +namespace cuvs::neighbors::cagra { + +class cagra_extreme_inputs_oob_test : public ::testing::Test { + public: + using data_type = float; + + protected: + void run() + { + cagra::index_params ix_ps; + graph_build_params::ivf_pq_params gb_params{}; + gb_params.refinement_rate = 2; + ix_ps.graph_build_params = gb_params; + ix_ps.graph_degree = 64; + ix_ps.intermediate_graph_degree = 128; + + [[maybe_unused]] auto ix = cagra::build(res, ix_ps, raft::make_const_mdspan(dataset->view())); + raft::resource::sync_stream(res); + } + + void SetUp() override + { + dataset.emplace(raft::make_device_matrix(res, n_samples, n_dim)); + raft::random::RngState r(1234ULL); + raft::random::normal( + res, r, dataset->data_handle(), n_samples * n_dim, data_type(0), data_type(1e20)); + raft::resource::sync_stream(res); + } + + void TearDown() override + { + dataset.reset(); + raft::resource::sync_stream(res); + } + + private: + raft::resources res; + std::optional> dataset = std::nullopt; + + constexpr static int64_t n_samples = 100000; + constexpr static int64_t n_dim = 200; + constexpr static cuvs::distance::DistanceType metric = cuvs::distance::DistanceType::L2Expanded; +}; + +TEST_F(cagra_extreme_inputs_oob_test, cagra_extreme_inputs_oob_test) { this->run(); } + +} // namespace cuvs::neighbors::cagra From 06afd5bd27d07ad6e58544c06f920d570b7df983 Mon Sep 17 00:00:00 2001 From: Ben Frederickson Date: Wed, 20 Nov 2024 15:26:05 -0800 Subject: [PATCH 11/15] Migrate sparse knn and distances code from raft (#457) Authors: - Ben Frederickson (https://github.com/benfred) - Corey J. Nolet (https://github.com/cjnolet) Approvers: - Corey J. Nolet (https://github.com/cjnolet) URL: https://github.com/rapidsai/cuvs/pull/457 --- cpp/CMakeLists.txt | 2 + cpp/include/cuvs/distance/distance.hpp | 81 ++ cpp/include/cuvs/neighbors/brute_force.hpp | 104 +++ .../distance/detail/sparse/bin_distance.cuh | 231 +++++ cpp/src/distance/detail/sparse/common.hpp | 59 ++ cpp/src/distance/detail/sparse/coo_spmv.cuh | 211 +++++ .../detail/sparse/coo_spmv_kernel.cuh | 229 +++++ .../coo_spmv_strategies/base_strategy.cuh | 149 +++ .../coo_mask_row_iterators.cuh | 234 +++++ .../dense_smem_strategy.cuh | 121 +++ .../coo_spmv_strategies/hash_strategy.cuh | 296 ++++++ .../distance/detail/sparse/ip_distance.cuh | 89 ++ .../distance/detail/sparse/l2_distance.cuh | 502 +++++++++++ .../distance/detail/sparse/lp_distance.cuh | 333 +++++++ cpp/src/distance/detail/sparse/utils.cuh | 171 ++++ cpp/src/distance/sparse_distance.cu | 85 ++ cpp/src/distance/sparse_distance.cuh | 115 +++ cpp/src/neighbors/detail/sparse_knn.cuh | 437 +++++++++ cpp/src/neighbors/sparse_brute_force.cu | 72 ++ cpp/test/CMakeLists.txt | 3 +- cpp/test/distance/sparse_distance.cu | 850 ++++++++++++++++++ cpp/test/neighbors/sparse_brute_force.cu | 175 ++++ 22 files changed, 4548 insertions(+), 1 deletion(-) create mode 100644 cpp/src/distance/detail/sparse/bin_distance.cuh create mode 100644 cpp/src/distance/detail/sparse/common.hpp create mode 100644 cpp/src/distance/detail/sparse/coo_spmv.cuh create mode 100644 cpp/src/distance/detail/sparse/coo_spmv_kernel.cuh create mode 100644 cpp/src/distance/detail/sparse/coo_spmv_strategies/base_strategy.cuh create mode 100644 cpp/src/distance/detail/sparse/coo_spmv_strategies/coo_mask_row_iterators.cuh create mode 100644 cpp/src/distance/detail/sparse/coo_spmv_strategies/dense_smem_strategy.cuh create mode 100644 cpp/src/distance/detail/sparse/coo_spmv_strategies/hash_strategy.cuh create mode 100644 cpp/src/distance/detail/sparse/ip_distance.cuh create mode 100644 cpp/src/distance/detail/sparse/l2_distance.cuh create mode 100644 cpp/src/distance/detail/sparse/lp_distance.cuh create mode 100644 cpp/src/distance/detail/sparse/utils.cuh create mode 100644 cpp/src/distance/sparse_distance.cu create mode 100644 cpp/src/distance/sparse_distance.cuh create mode 100644 cpp/src/neighbors/detail/sparse_knn.cuh create mode 100644 cpp/src/neighbors/sparse_brute_force.cu create mode 100644 cpp/test/distance/sparse_distance.cu create mode 100644 cpp/test/neighbors/sparse_brute_force.cu diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 81b82aa7b..32093776c 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -369,6 +369,7 @@ if(BUILD_SHARED_LIBS) src/distance/detail/fused_distance_nn.cu src/distance/distance.cu src/distance/pairwise_distance.cu + src/distance/sparse_distance.cu src/neighbors/brute_force.cu src/neighbors/cagra_build_float.cu src/neighbors/cagra_build_half.cu @@ -449,6 +450,7 @@ if(BUILD_SHARED_LIBS) src/neighbors/refine/detail/refine_host_int8_t_float.cpp src/neighbors/refine/detail/refine_host_uint8_t_float.cpp src/neighbors/sample_filter.cu + src/neighbors/sparse_brute_force.cu src/neighbors/vamana_build_float.cu src/neighbors/vamana_build_uint8.cu src/neighbors/vamana_build_int8.cu diff --git a/cpp/include/cuvs/distance/distance.hpp b/cpp/include/cuvs/distance/distance.hpp index def72641e..42c574e58 100644 --- a/cpp/include/cuvs/distance/distance.hpp +++ b/cpp/include/cuvs/distance/distance.hpp @@ -20,6 +20,7 @@ #include #include +#include #include #include @@ -331,6 +332,86 @@ void pairwise_distance( cuvs::distance::DistanceType metric, float metric_arg = 2.0f); +/** + * @brief Compute sparse pairwise distances between x and y, using the provided + * input configuration and distance function. + * + * @code{.cpp} + * #include + * #include + * #include + * + * int x_n_rows = 100000; + * int y_n_rows = 50000; + * int n_cols = 10000; + * + * raft::device_resources handle; + * auto x = raft::make_device_csr_matrix(handle, x_n_rows, n_cols); + * auto y = raft::make_device_csr_matrix(handle, y_n_rows, n_cols); + * + * ... + * // populate data + * ... + * + * auto out = raft::make_device_matrix(handle, x_nrows, y_nrows); + * auto metric = cuvs::distance::DistanceType::L2Expanded; + * raft::sparse::distance::pairwise_distance(handle, x.view(), y.view(), out, metric); + * @endcode + * + * @param[in] handle raft::resources + * @param[in] x raft::device_csr_matrix_view + * @param[in] y raft::device_csr_matrix_view + * @param[out] dist raft::device_matrix_view dense matrix + * @param[in] metric distance metric to use + * @param[in] metric_arg metric argument (used for Minkowski distance) + */ +void pairwise_distance(raft::resources const& handle, + raft::device_csr_matrix_view x, + raft::device_csr_matrix_view y, + raft::device_matrix_view dist, + cuvs::distance::DistanceType metric, + float metric_arg = 2.0f); + +/** + * @brief Compute sparse pairwise distances between x and y, using the provided + * input configuration and distance function. + * + * @code{.cpp} + * #include + * #include + * #include + * + * int x_n_rows = 100000; + * int y_n_rows = 50000; + * int n_cols = 10000; + * + * raft::device_resources handle; + * auto x = raft::make_device_csr_matrix(handle, x_n_rows, n_cols); + * auto y = raft::make_device_csr_matrix(handle, y_n_rows, n_cols); + * + * ... + * // populate data + * ... + * + * auto out = raft::make_device_matrix(handle, x_nrows, y_nrows); + * auto metric = cuvs::distance::DistanceType::L2Expanded; + * raft::sparse::distance::pairwise_distance(handle, x.view(), y.view(), out, metric); + * @endcode + * + * @param[in] handle raft::resources + * @param[in] x raft::device_csr_matrix_view + * @param[in] y raft::device_csr_matrix_view + * @param[out] dist raft::device_matrix_view dense matrix + * @param[in] metric distance metric to use + * @param[in] metric_arg metric argument (used for Minkowski distance) + */ +void pairwise_distance(raft::resources const& handle, + raft::device_csr_matrix_view x, + raft::device_csr_matrix_view y, + raft::device_matrix_view dist, + cuvs::distance::DistanceType metric, + float metric_arg = 2.0f); + /** @} */ // end group pairwise_distance_runtime }; // namespace cuvs::distance diff --git a/cpp/include/cuvs/neighbors/brute_force.hpp b/cpp/include/cuvs/neighbors/brute_force.hpp index 428fa592a..ba67797ee 100644 --- a/cpp/include/cuvs/neighbors/brute_force.hpp +++ b/cpp/include/cuvs/neighbors/brute_force.hpp @@ -18,6 +18,7 @@ #include "common.hpp" #include +#include #include #include #include @@ -375,4 +376,107 @@ void search(raft::resources const& handle, * @} */ +/** + * @defgroup sparse_bruteforce_cpp_index Sparse Brute Force index + * @{ + */ +/** + * @brief Sparse Brute Force index. + * + * @tparam T Data element type + * @tparam IdxT Index element type + */ +template +struct sparse_index { + public: + sparse_index(const sparse_index&) = delete; + sparse_index(sparse_index&&) = default; + sparse_index& operator=(const sparse_index&) = delete; + sparse_index& operator=(sparse_index&&) = default; + ~sparse_index() = default; + + /** Construct a sparse brute force sparse_index from dataset */ + sparse_index(raft::resources const& res, + raft::device_csr_matrix_view dataset, + cuvs::distance::DistanceType metric, + T metric_arg); + + /** Distance metric used for retrieval */ + cuvs::distance::DistanceType metric() const noexcept { return metric_; } + + /** Metric argument */ + T metric_arg() const noexcept { return metric_arg_; } + + raft::device_csr_matrix_view dataset() const noexcept + { + return dataset_; + } + + private: + raft::device_csr_matrix_view dataset_; + cuvs::distance::DistanceType metric_; + T metric_arg_; +}; +/** + * @} + */ + +/** + * @defgroup sparse_bruteforce_cpp_index_build Sparse Brute Force index build + * @{ + */ + +/* + * @brief Build the Sparse index from the dataset + * + * Usage example: + * @code{.cpp} + * using namespace cuvs::neighbors; + * // create and fill the index from a CSR dataset + * auto index = brute_force::build(handle, dataset, metric); + * @endcode + * + * @param[in] handle + * @param[in] dataset A sparse CSR matrix in device memory to search against + * @param[in] metric cuvs::distance::DistanceType + * @param[in] metric_arg metric argument + * + * @return the constructed Sparse brute-force index + */ +auto build(raft::resources const& handle, + raft::device_csr_matrix_view dataset, + cuvs::distance::DistanceType metric = cuvs::distance::DistanceType::L2Unexpanded, + float metric_arg = 0) -> cuvs::neighbors::brute_force::sparse_index; +/** + * @} + */ + +/** + * @defgroup sparse_bruteforce_cpp_index_search Sparse Brute Force index search + * @{ + */ +struct sparse_search_params { + int batch_size_index = 2 << 14; + int batch_size_query = 2 << 14; +}; + +/* + * @brief Search the sparse bruteforce index for nearest neighbors + * + * @param[in] handle + * @param[in] index Sparse brute-force constructed index + * @param[in] queries a sparse CSR matrix on the device to query + * @param[out] neighbors a device pointer to the indices of the neighbors in the source dataset + * [n_queries, k] + * @param[out] distances a device pointer to the distances to the selected neighbors [n_queries, k] + */ +void search(raft::resources const& handle, + const sparse_search_params& params, + const sparse_index& index, + raft::device_csr_matrix_view dataset, + raft::device_matrix_view neighbors, + raft::device_matrix_view distances); +/** + * @} + */ } // namespace cuvs::neighbors::brute_force diff --git a/cpp/src/distance/detail/sparse/bin_distance.cuh b/cpp/src/distance/detail/sparse/bin_distance.cuh new file mode 100644 index 000000000..1a63a8eb9 --- /dev/null +++ b/cpp/src/distance/detail/sparse/bin_distance.cuh @@ -0,0 +1,231 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include "common.hpp" +#include "ip_distance.cuh" + +#include +#include +#include +#include + +#include + +#include + +#include + +namespace cuvs { +namespace distance { +namespace detail { +namespace sparse { +// @TODO: Move this into sparse prims (coo_norm) +template +RAFT_KERNEL compute_binary_row_norm_kernel(value_t* out, + const value_idx* __restrict__ coo_rows, + const value_t* __restrict__ data, + value_idx nnz) +{ + value_idx i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < nnz) { + // We do conditional here only because it's + // possible there could be some stray zeros in + // the sparse structure and removing them would be + // more expensive. + atomicAdd(&out[coo_rows[i]], data[i] == 1.0); + } +} + +template +RAFT_KERNEL compute_binary_warp_kernel(value_t* __restrict__ C, + const value_t* __restrict__ Q_norms, + const value_t* __restrict__ R_norms, + value_idx n_rows, + value_idx n_cols, + expansion_f expansion_func) +{ + std::size_t tid = blockDim.x * blockIdx.x + threadIdx.x; + value_idx i = tid / n_cols; + value_idx j = tid % n_cols; + + if (i >= n_rows || j >= n_cols) return; + + value_t q_norm = Q_norms[i]; + value_t r_norm = R_norms[j]; + value_t dot = C[(size_t)i * n_cols + j]; + C[(size_t)i * n_cols + j] = expansion_func(dot, q_norm, r_norm); +} + +template +void compute_binary(value_t* C, + const value_t* Q_norms, + const value_t* R_norms, + value_idx n_rows, + value_idx n_cols, + expansion_f expansion_func, + cudaStream_t stream) +{ + int blocks = raft::ceildiv((size_t)n_rows * n_cols, tpb); + compute_binary_warp_kernel<<>>( + C, Q_norms, R_norms, n_rows, n_cols, expansion_func); +} + +template +void compute_bin_distance(value_t* out, + const value_idx* Q_coo_rows, + const value_t* Q_data, + value_idx Q_nnz, + const value_idx* R_coo_rows, + const value_t* R_data, + value_idx R_nnz, + value_idx m, + value_idx n, + cudaStream_t stream, + expansion_f expansion_func) +{ + rmm::device_uvector Q_norms(m, stream); + rmm::device_uvector R_norms(n, stream); + RAFT_CUDA_TRY(cudaMemsetAsync(Q_norms.data(), 0, Q_norms.size() * sizeof(value_t))); + RAFT_CUDA_TRY(cudaMemsetAsync(R_norms.data(), 0, R_norms.size() * sizeof(value_t))); + + compute_binary_row_norm_kernel<<>>( + Q_norms.data(), Q_coo_rows, Q_data, Q_nnz); + compute_binary_row_norm_kernel<<>>( + R_norms.data(), R_coo_rows, R_data, R_nnz); + + compute_binary(out, Q_norms.data(), R_norms.data(), m, n, expansion_func, stream); +} + +/** + * Jaccard distance using the expanded form: + * 1 - (sum(x_k * y_k) / ((sum(x_k) + sum(y_k)) - sum(x_k * y_k)) + */ +template +class jaccard_expanded_distances_t : public distances_t { + public: + explicit jaccard_expanded_distances_t(const distances_config_t& config) + : config_(&config), + workspace(0, raft::resource::get_cuda_stream(config.handle)), + ip_dists(config) + { + } + + void compute(value_t* out_dists) + { + ip_dists.compute(out_dists); + + value_idx* b_indices = ip_dists.b_rows_coo(); + value_t* b_data = ip_dists.b_data_coo(); + + rmm::device_uvector search_coo_rows( + config_->a_nnz, raft::resource::get_cuda_stream(config_->handle)); + raft::sparse::convert::csr_to_coo(config_->a_indptr, + config_->a_nrows, + search_coo_rows.data(), + config_->a_nnz, + raft::resource::get_cuda_stream(config_->handle)); + + compute_bin_distance(out_dists, + search_coo_rows.data(), + config_->a_data, + config_->a_nnz, + b_indices, + b_data, + config_->b_nnz, + config_->a_nrows, + config_->b_nrows, + raft::resource::get_cuda_stream(config_->handle), + [] __device__ __host__(value_t dot, value_t q_norm, value_t r_norm) { + value_t q_r_union = q_norm + r_norm; + value_t denom = q_r_union - dot; + + value_t jacc = ((denom != 0) * dot) / ((denom == 0) + denom); + + // flip the similarity when both rows are 0 + bool both_empty = q_r_union == 0; + return 1 - ((!both_empty * jacc) + both_empty); + }); + } + + ~jaccard_expanded_distances_t() = default; + + private: + const distances_config_t* config_; + rmm::device_uvector workspace; + ip_distances_t ip_dists; +}; + +/** + * Dice distance using the expanded form: + * 1 - ((2 * sum(x_k * y_k)) / (sum(x_k) + sum(y_k))) + */ +template +class dice_expanded_distances_t : public distances_t { + public: + explicit dice_expanded_distances_t(const distances_config_t& config) + : config_(&config), + workspace(0, raft::resource::get_cuda_stream(config.handle)), + ip_dists(config) + { + } + + void compute(value_t* out_dists) + { + ip_dists.compute(out_dists); + + value_idx* b_indices = ip_dists.b_rows_coo(); + value_t* b_data = ip_dists.b_data_coo(); + + rmm::device_uvector search_coo_rows( + config_->a_nnz, raft::resource::get_cuda_stream(config_->handle)); + raft::sparse::convert::csr_to_coo(config_->a_indptr, + config_->a_nrows, + search_coo_rows.data(), + config_->a_nnz, + raft::resource::get_cuda_stream(config_->handle)); + + compute_bin_distance(out_dists, + search_coo_rows.data(), + config_->a_data, + config_->a_nnz, + b_indices, + b_data, + config_->b_nnz, + config_->a_nrows, + config_->b_nrows, + raft::resource::get_cuda_stream(config_->handle), + [] __device__ __host__(value_t dot, value_t q_norm, value_t r_norm) { + value_t q_r_union = q_norm + r_norm; + value_t dice = (2 * dot) / q_r_union; + bool both_empty = q_r_union == 0; + return 1 - ((!both_empty * dice) + both_empty); + }); + } + + ~dice_expanded_distances_t() = default; + + private: + const distances_config_t* config_; + rmm::device_uvector workspace; + ip_distances_t ip_dists; +}; + +} // END namespace sparse +} // END namespace detail +} // END namespace distance +} // END namespace cuvs diff --git a/cpp/src/distance/detail/sparse/common.hpp b/cpp/src/distance/detail/sparse/common.hpp new file mode 100644 index 000000000..803dabe56 --- /dev/null +++ b/cpp/src/distance/detail/sparse/common.hpp @@ -0,0 +1,59 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +namespace cuvs { +namespace distance { +namespace detail { +namespace sparse { + +template +struct distances_config_t { + distances_config_t(raft::resources const& handle_) : handle(handle_) {} + + // left side + value_idx a_nrows; + value_idx a_ncols; + value_idx a_nnz; + value_idx* a_indptr; + value_idx* a_indices; + value_t* a_data; + + // right side + value_idx b_nrows; + value_idx b_ncols; + value_idx b_nnz; + value_idx* b_indptr; + value_idx* b_indices; + value_t* b_data; + + raft::resources const& handle; +}; + +template +class distances_t { + public: + virtual void compute(value_t* out) {} + virtual ~distances_t() = default; +}; + +} // namespace sparse +} // namespace detail +} // namespace distance +} // namespace cuvs diff --git a/cpp/src/distance/detail/sparse/coo_spmv.cuh b/cpp/src/distance/detail/sparse/coo_spmv.cuh new file mode 100644 index 000000000..181b531f7 --- /dev/null +++ b/cpp/src/distance/detail/sparse/coo_spmv.cuh @@ -0,0 +1,211 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include "common.hpp" +#include "coo_spmv_strategies/dense_smem_strategy.cuh" +#include "coo_spmv_strategies/hash_strategy.cuh" + +#include +#include +#include +#include +#include +#include + +#include +#include + +#include + +namespace cuvs { +namespace distance { +namespace detail { +namespace sparse { + +template +inline void balanced_coo_pairwise_generalized_spmv( + value_t* out_dists, + const distances_config_t& config_, + value_idx* coo_rows_b, + product_f product_func, + accum_f accum_func, + write_f write_func, + strategy_t strategy, + int chunk_size = 500000) +{ + uint64_t n = (uint64_t)sizeof(value_t) * (uint64_t)config_.a_nrows * (uint64_t)config_.b_nrows; + RAFT_CUDA_TRY(cudaMemsetAsync(out_dists, 0, n, raft::resource::get_cuda_stream(config_.handle))); + + strategy.dispatch(out_dists, coo_rows_b, product_func, accum_func, write_func, chunk_size); +}; + +/** + * Performs generalized sparse-matrix-sparse-matrix multiplication via a + * sparse-matrix-sparse-vector layout `out=A*B` where generalized product() + * and sum() operations can be used in place of the standard sum and product: + * + * out_ij = sum_k(product(A_ik, B_ik)) The sum goes through values of + * k=0..n_cols-1 where B_kj is nonzero. + * + * The product and sum operations shall form a semiring algebra with the + * following properties: + * 1. {+, 0} is a commutative sum reduction monoid with identity element 0 + * 2. {*, 1} is a product monoid with identity element 1 + * 3. Multiplication by 0 annihilates x. e.g. product(x, 0) = 0 + * + * Each vector of A is loaded into shared memory in dense form and the + * non-zeros of B load balanced across the threads of each block. + * @tparam value_idx index type + * @tparam value_t value type + * @tparam threads_per_block block size + * @tparam product_f semiring product() function + * @tparam accum_f semiring sum() function + * @tparam write_f atomic semiring sum() function + * @param[out] out_dists dense array of out distances of size m * n in row-major + * format. + * @param[in] config_ distance config object + * @param[in] coo_rows_b coo row array for B + * @param[in] product_func semiring product() function + * @param[in] accum_func semiring sum() function + * @param[in] write_func atomic semiring sum() function + * @param[in] chunk_size number of nonzeros of B to process for each row of A + * this value was found through profiling and represents a reasonable + * setting for both large and small densities + */ +template +inline void balanced_coo_pairwise_generalized_spmv( + value_t* out_dists, + const distances_config_t& config_, + value_idx* coo_rows_b, + product_f product_func, + accum_f accum_func, + write_f write_func, + int chunk_size = 500000) +{ + uint64_t n = (uint64_t)sizeof(value_t) * (uint64_t)config_.a_nrows * (uint64_t)config_.b_nrows; + RAFT_CUDA_TRY(cudaMemsetAsync(out_dists, 0, n, raft::resource::get_cuda_stream(config_.handle))); + + int max_cols = max_cols_per_block(); + + if (max_cols > config_.a_ncols) { + dense_smem_strategy strategy(config_); + strategy.dispatch(out_dists, coo_rows_b, product_func, accum_func, write_func, chunk_size); + } else { + hash_strategy strategy(config_); + strategy.dispatch(out_dists, coo_rows_b, product_func, accum_func, write_func, chunk_size); + } +}; + +template +inline void balanced_coo_pairwise_generalized_spmv_rev( + value_t* out_dists, + const distances_config_t& config_, + value_idx* coo_rows_a, + product_f product_func, + accum_f accum_func, + write_f write_func, + strategy_t strategy, + int chunk_size = 500000) +{ + strategy.dispatch_rev(out_dists, coo_rows_a, product_func, accum_func, write_func, chunk_size); +}; + +/** + * Used for computing distances where the reduction (e.g. product()) function + * requires an implicit union (product(x, 0) = x) to capture the difference A-B. + * This is necessary in some applications because the standard semiring algebra + * endowed with the default multiplication product monoid will only + * compute the intersection & B-A. + * + * This particular function is meant to accompany the function + * `balanced_coo_pairwise_generalized_spmv` and executes the product operation + * on only those columns that exist in B and not A. + * + * The product and sum operations shall enable the computation of a + * non-annihilating semiring algebra with the following properties: + * 1. {+, 0} is a commutative sum reduction monoid with identity element 0 + * 2. {*, 0} is a product monoid with identity element 0 + * 3. Multiplication by 0 does not annihilate x. e.g. product(x, 0) = x + * + * Manattan distance sum(abs(x_k-y_k)) is a great example of when this type of + * execution pattern is necessary. + * + * @tparam value_idx index type + * @tparam value_t value type + * @tparam threads_per_block block size + * @tparam product_f semiring product() function + * @tparam accum_f semiring sum() function + * @tparam write_f atomic semiring sum() function + * @param[out] out_dists dense array of out distances of size m * n + * @param[in] config_ distance config object + * @param[in] coo_rows_a coo row array for A + * @param[in] product_func semiring product() function + * @param[in] accum_func semiring sum() function + * @param[in] write_func atomic semiring sum() function + * @param[in] chunk_size number of nonzeros of B to process for each row of A + * this value was found through profiling and represents a reasonable + * setting for both large and small densities + */ +template +inline void balanced_coo_pairwise_generalized_spmv_rev( + value_t* out_dists, + const distances_config_t& config_, + value_idx* coo_rows_a, + product_f product_func, + accum_f accum_func, + write_f write_func, + int chunk_size = 500000) +{ + // try dense first + int max_cols = max_cols_per_block(); + + if (max_cols > config_.b_ncols) { + dense_smem_strategy strategy(config_); + strategy.dispatch_rev(out_dists, coo_rows_a, product_func, accum_func, write_func, chunk_size); + } else { + hash_strategy strategy(config_); + strategy.dispatch_rev(out_dists, coo_rows_a, product_func, accum_func, write_func, chunk_size); + } +}; + +} // namespace sparse +} // namespace detail +} // namespace distance +} // namespace cuvs diff --git a/cpp/src/distance/detail/sparse/coo_spmv_kernel.cuh b/cpp/src/distance/detail/sparse/coo_spmv_kernel.cuh new file mode 100644 index 000000000..1f4b19af4 --- /dev/null +++ b/cpp/src/distance/detail/sparse/coo_spmv_kernel.cuh @@ -0,0 +1,229 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include +#include + +namespace cuvs { +namespace distance { +namespace detail { +namespace sparse { +__device__ __inline__ unsigned int get_lowest_peer(unsigned int peer_group) +{ + return __ffs(peer_group) - 1; +} + +/** + * Load-balanced sparse-matrix-sparse-matrix multiplication (SPMM) kernel with + * sparse-matrix-sparse-vector multiplication layout (SPMV). + * This is intended to be scheduled n_chunks_b times for each row of a. + * The steps are as follows: + * + * 1. Load row from A into dense vector in shared memory. + * This can be further chunked in the future if necessary to support larger + * column sizes. + * 2. Threads of block all step through chunks of B in parallel. + * When a new row is encountered in row_indices_b, a segmented + * reduction is performed across the warps and then across the + * block and the final value written out to host memory. + * + * Reference: https://www.icl.utk.edu/files/publications/2020/icl-utk-1421-2020.pdf + * + * @tparam value_idx index type + * @tparam value_t value type + * @tparam tpb threads per block configured on launch + * @tparam rev if this is true, the reduce/accumulate functions are only + * executed when A[col] == 0.0. when executed before/after !rev + * and A & B are reversed, this allows the full symmetric difference + * and intersection to be computed. + * @tparam kv_t data type stored in shared mem cache + * @tparam product_f reduce function type (semiring product() function). + * accepts two arguments of value_t and returns a value_t + * @tparam accum_f accumulation function type (semiring sum() function). + * accepts two arguments of value_t and returns a value_t + * @tparam write_f function to write value out. this should be mathematically + * equivalent to the accumulate function but implemented as + * an atomic operation on global memory. Accepts two arguments + * of value_t* and value_t and updates the value given by the + * pointer. + * @param[in] indptrA column pointer array for A + * @param[in] indicesA column indices array for A + * @param[in] dataA data array for A + * @param[in] rowsB coo row array for B + * @param[in] indicesB column indices array for B + * @param[in] dataB data array for B + * @param[in] m number of rows in A + * @param[in] n number of rows in B + * @param[in] dim number of features + * @param[in] nnz_b number of nonzeros in B + * @param[out] out array of size m*n + * @param[in] n_blocks_per_row number of blocks of B per row of A + * @param[in] chunk_size number of nnz for B to use for each row of A + * @param[in] buffer_size amount of smem to use for each row of A + * @param[in] product_func semiring product() function + * @param[in] accum_func semiring sum() function + * @param[in] write_func atomic semiring sum() function + */ +template +RAFT_KERNEL balanced_coo_generalized_spmv_kernel(strategy_t strategy, + indptr_it indptrA, + value_idx* indicesA, + value_t* dataA, + value_idx nnz_a, + value_idx* rowsB, + value_idx* indicesB, + value_t* dataB, + value_idx m, + value_idx n, + int dim, + value_idx nnz_b, + value_t* out, + int n_blocks_per_row, + int chunk_size, + value_idx b_ncols, + product_f product_func, + accum_f accum_func, + write_f write_func) +{ + typedef cub::WarpReduce warp_reduce; + + value_idx cur_row_a = indptrA.get_row_idx(n_blocks_per_row); + value_idx cur_chunk_offset = blockIdx.x % n_blocks_per_row; + + // chunk starting offset + value_idx ind_offset = cur_chunk_offset * chunk_size * tpb; + // how many total cols will be processed by this block (should be <= chunk_size * n_threads) + value_idx active_chunk_size = min(chunk_size * tpb, nnz_b - ind_offset); + + int tid = threadIdx.x; + int warp_id = tid / raft::warp_size(); + + // compute id relative to current warp + unsigned int lane_id = tid & (raft::warp_size() - 1); + value_idx ind = ind_offset + threadIdx.x; + + extern __shared__ char smem[]; + + typename strategy_t::smem_type A = (typename strategy_t::smem_type)(smem); + typename warp_reduce::TempStorage* temp_storage = (typename warp_reduce::TempStorage*)(A + dim); + + auto inserter = strategy.init_insert(A, dim); + + __syncthreads(); + + value_idx start_offset_a, stop_offset_a; + bool first_a_chunk, last_a_chunk; + indptrA.get_row_offsets( + cur_row_a, start_offset_a, stop_offset_a, n_blocks_per_row, first_a_chunk, last_a_chunk); + + // Convert current row vector in A to dense + for (int i = tid; i <= (stop_offset_a - start_offset_a); i += blockDim.x) { + strategy.insert(inserter, indicesA[start_offset_a + i], dataA[start_offset_a + i]); + } + + __syncthreads(); + + auto finder = strategy.init_find(A, dim); + + if (cur_row_a > m || cur_chunk_offset > n_blocks_per_row) return; + if (ind >= nnz_b) return; + + value_idx start_index_a = 0, stop_index_a = b_ncols - 1; + indptrA.get_indices_boundary(indicesA, + cur_row_a, + start_offset_a, + stop_offset_a, + start_index_a, + stop_index_a, + first_a_chunk, + last_a_chunk); + + value_idx cur_row_b = -1; + value_t c = 0.0; + + auto warp_red = warp_reduce(*(temp_storage + warp_id)); + + if (tid < active_chunk_size) { + cur_row_b = rowsB[ind]; + + auto index_b = indicesB[ind]; + auto in_bounds = indptrA.check_indices_bounds(start_index_a, stop_index_a, index_b); + + if (in_bounds) { + value_t a_col = strategy.find(finder, index_b); + if (!rev || a_col == 0.0) { c = product_func(a_col, dataB[ind]); } + } + } + + // loop through chunks in parallel, reducing when a new row is + // encountered by each thread + for (int i = tid; i < active_chunk_size; i += blockDim.x) { + value_idx ind_next = ind + blockDim.x; + value_idx next_row_b = -1; + + if (i + blockDim.x < active_chunk_size) next_row_b = rowsB[ind_next]; + + bool diff_rows = next_row_b != cur_row_b; + + if (__any_sync(0xffffffff, diff_rows)) { + // grab the threads currently participating in loops. + // because any other threads should have returned already. + unsigned int peer_group = __match_any_sync(0xffffffff, cur_row_b); + bool is_leader = get_lowest_peer(peer_group) == lane_id; + value_t v = warp_red.HeadSegmentedReduce(c, is_leader, accum_func); + + // thread with lowest lane id among peers writes out + if (is_leader && v != 0.0) { + // this conditional should be uniform, since rev is constant + size_t idx = !rev ? (size_t)cur_row_a * n + cur_row_b : (size_t)cur_row_b * m + cur_row_a; + write_func(out + idx, v); + } + + c = 0.0; + } + + if (next_row_b != -1) { + ind = ind_next; + + auto index_b = indicesB[ind]; + auto in_bounds = indptrA.check_indices_bounds(start_index_a, stop_index_a, index_b); + if (in_bounds) { + value_t a_col = strategy.find(finder, index_b); + + if (!rev || a_col == 0.0) { c = accum_func(c, product_func(a_col, dataB[ind])); } + } + + cur_row_b = next_row_b; + } + } +} + +} // namespace sparse +} // namespace detail +} // namespace distance +} // namespace cuvs diff --git a/cpp/src/distance/detail/sparse/coo_spmv_strategies/base_strategy.cuh b/cpp/src/distance/detail/sparse/coo_spmv_strategies/base_strategy.cuh new file mode 100644 index 000000000..457b25eea --- /dev/null +++ b/cpp/src/distance/detail/sparse/coo_spmv_strategies/base_strategy.cuh @@ -0,0 +1,149 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include "../common.hpp" +#include "../coo_spmv_kernel.cuh" +#include "../utils.cuh" +#include "coo_mask_row_iterators.cuh" + +#include + +#include + +namespace cuvs { +namespace distance { +namespace detail { +namespace sparse { + +template +class coo_spmv_strategy { + public: + coo_spmv_strategy(const distances_config_t& config_) : config(config_) + { + smem = raft::getSharedMemPerBlock(); + } + + template + void _dispatch_base(strategy_t& strategy, + int smem_dim, + indptr_it& a_indptr, + value_t* out_dists, + value_idx* coo_rows_b, + product_f product_func, + accum_f accum_func, + write_f write_func, + int chunk_size, + int n_blocks, + int n_blocks_per_row) + { + RAFT_CUDA_TRY(cudaFuncSetCacheConfig(balanced_coo_generalized_spmv_kernel, + cudaFuncCachePreferShared)); + + balanced_coo_generalized_spmv_kernel + <<>>(strategy, + a_indptr, + config.a_indices, + config.a_data, + config.a_nnz, + coo_rows_b, + config.b_indices, + config.b_data, + config.a_nrows, + config.b_nrows, + smem_dim, + config.b_nnz, + out_dists, + n_blocks_per_row, + chunk_size, + config.b_ncols, + product_func, + accum_func, + write_func); + } + + template + void _dispatch_base_rev(strategy_t& strategy, + int smem_dim, + indptr_it& b_indptr, + value_t* out_dists, + value_idx* coo_rows_a, + product_f product_func, + accum_f accum_func, + write_f write_func, + int chunk_size, + int n_blocks, + int n_blocks_per_row) + { + RAFT_CUDA_TRY(cudaFuncSetCacheConfig(balanced_coo_generalized_spmv_kernel, + cudaFuncCachePreferShared)); + + balanced_coo_generalized_spmv_kernel + <<>>(strategy, + b_indptr, + config.b_indices, + config.b_data, + config.b_nnz, + coo_rows_a, + config.a_indices, + config.a_data, + config.b_nrows, + config.a_nrows, + smem_dim, + config.a_nnz, + out_dists, + n_blocks_per_row, + chunk_size, + config.a_ncols, + product_func, + accum_func, + write_func); + } + + protected: + int smem; + const distances_config_t& config; +}; + +} // namespace sparse +} // namespace detail +} // namespace distance +} // namespace cuvs diff --git a/cpp/src/distance/detail/sparse/coo_spmv_strategies/coo_mask_row_iterators.cuh b/cpp/src/distance/detail/sparse/coo_spmv_strategies/coo_mask_row_iterators.cuh new file mode 100644 index 000000000..a9040e1d8 --- /dev/null +++ b/cpp/src/distance/detail/sparse/coo_spmv_strategies/coo_mask_row_iterators.cuh @@ -0,0 +1,234 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include "../common.hpp" +#include "../utils.cuh" + +#include // raft::ceildiv + +#include + +#include +#include + +namespace cuvs { +namespace distance { +namespace detail { +namespace sparse { + +template +class mask_row_it { + public: + mask_row_it(const value_idx* full_indptr_, + const value_idx& n_rows_, + value_idx* mask_row_idx_ = NULL) + : full_indptr(full_indptr_), mask_row_idx(mask_row_idx_), n_rows(n_rows_) + { + } + + __device__ inline value_idx get_row_idx(const int& n_blocks_nnz_b) + { + if (mask_row_idx != NULL) { + return mask_row_idx[blockIdx.x / n_blocks_nnz_b]; + } else { + return blockIdx.x / n_blocks_nnz_b; + } + } + + __device__ inline void get_row_offsets(const value_idx& row_idx, + value_idx& start_offset, + value_idx& stop_offset, + const value_idx& n_blocks_nnz_b, + bool& first_a_chunk, + bool& last_a_chunk) + { + start_offset = full_indptr[row_idx]; + stop_offset = full_indptr[row_idx + 1] - 1; + } + + __device__ constexpr inline void get_indices_boundary(const value_idx* indices, + value_idx& indices_len, + value_idx& start_offset, + value_idx& stop_offset, + value_idx& start_index, + value_idx& stop_index, + bool& first_a_chunk, + bool& last_a_chunk) + { + // do nothing; + } + + __device__ constexpr inline bool check_indices_bounds(value_idx& start_index_a, + value_idx& stop_index_a, + value_idx& index_b) + { + return true; + } + + const value_idx *full_indptr, &n_rows; + value_idx* mask_row_idx; +}; + +template +RAFT_KERNEL fill_chunk_indices_kernel(value_idx* n_chunks_per_row, + value_idx* chunk_indices, + value_idx n_rows) +{ + auto tid = threadIdx.x + blockIdx.x * blockDim.x; + if (tid < n_rows) { + auto start = n_chunks_per_row[tid]; + auto end = n_chunks_per_row[tid + 1]; + +#pragma unroll + for (int i = start; i < end; i++) { + chunk_indices[i] = tid; + } + } +} + +template +class chunked_mask_row_it : public mask_row_it { + public: + chunked_mask_row_it(const value_idx* full_indptr_, + const value_idx& n_rows_, + value_idx* mask_row_idx_, + int row_chunk_size_, + const value_idx* n_chunks_per_row_, + const value_idx* chunk_indices_, + const cudaStream_t stream_) + : mask_row_it(full_indptr_, n_rows_, mask_row_idx_), + row_chunk_size(row_chunk_size_), + n_chunks_per_row(n_chunks_per_row_), + chunk_indices(chunk_indices_), + stream(stream_) + { + } + + static void init(const value_idx* indptr, + const value_idx* mask_row_idx, + const value_idx& n_rows, + const int row_chunk_size, + rmm::device_uvector& n_chunks_per_row, + rmm::device_uvector& chunk_indices, + cudaStream_t stream) + { + auto policy = rmm::exec_policy(stream); + + constexpr value_idx first_element = 0; + n_chunks_per_row.set_element_async(0, first_element, stream); + n_chunks_per_row_functor chunk_functor(indptr, row_chunk_size); + thrust::transform( + policy, mask_row_idx, mask_row_idx + n_rows, n_chunks_per_row.begin() + 1, chunk_functor); + + thrust::inclusive_scan( + policy, n_chunks_per_row.begin() + 1, n_chunks_per_row.end(), n_chunks_per_row.begin() + 1); + + raft::update_host(&total_row_blocks, n_chunks_per_row.data() + n_rows, 1, stream); + + fill_chunk_indices(n_rows, n_chunks_per_row, chunk_indices, stream); + } + + __device__ inline value_idx get_row_idx(const int& n_blocks_nnz_b) + { + return this->mask_row_idx[chunk_indices[blockIdx.x / n_blocks_nnz_b]]; + } + + __device__ inline void get_row_offsets(const value_idx& row_idx, + value_idx& start_offset, + value_idx& stop_offset, + const int& n_blocks_nnz_b, + bool& first_a_chunk, + bool& last_a_chunk) + { + auto chunk_index = blockIdx.x / n_blocks_nnz_b; + auto chunk_val = chunk_indices[chunk_index]; + auto prev_n_chunks = n_chunks_per_row[chunk_val]; + auto relative_chunk = chunk_index - prev_n_chunks; + first_a_chunk = relative_chunk == 0; + + start_offset = this->full_indptr[row_idx] + relative_chunk * row_chunk_size; + stop_offset = start_offset + row_chunk_size; + + auto final_stop_offset = this->full_indptr[row_idx + 1]; + + last_a_chunk = stop_offset >= final_stop_offset; + stop_offset = last_a_chunk ? final_stop_offset - 1 : stop_offset - 1; + } + + __device__ inline void get_indices_boundary(const value_idx* indices, + value_idx& row_idx, + value_idx& start_offset, + value_idx& stop_offset, + value_idx& start_index, + value_idx& stop_index, + bool& first_a_chunk, + bool& last_a_chunk) + { + start_index = first_a_chunk ? start_index : indices[start_offset - 1] + 1; + stop_index = last_a_chunk ? stop_index : indices[stop_offset]; + } + + __device__ inline bool check_indices_bounds(value_idx& start_index_a, + value_idx& stop_index_a, + value_idx& index_b) + { + return (index_b >= start_index_a && index_b <= stop_index_a); + } + + inline static value_idx total_row_blocks = 0; + const cudaStream_t stream; + const value_idx *n_chunks_per_row, *chunk_indices; + value_idx row_chunk_size; + + struct n_chunks_per_row_functor { + public: + n_chunks_per_row_functor(const value_idx* indptr_, value_idx row_chunk_size_) + : indptr(indptr_), row_chunk_size(row_chunk_size_) + { + } + + __host__ __device__ value_idx operator()(const value_idx& i) + { + auto degree = indptr[i + 1] - indptr[i]; + return raft::ceildiv(degree, (value_idx)row_chunk_size); + } + + const value_idx* indptr; + value_idx row_chunk_size; + }; + + private: + static void fill_chunk_indices(const value_idx& n_rows, + rmm::device_uvector& n_chunks_per_row, + rmm::device_uvector& chunk_indices, + cudaStream_t stream) + { + auto n_threads = std::min(n_rows, 256); + auto n_blocks = raft::ceildiv(n_rows, (value_idx)n_threads); + + chunk_indices.resize(total_row_blocks, stream); + + fill_chunk_indices_kernel + <<>>(n_chunks_per_row.data(), chunk_indices.data(), n_rows); + } +}; + +} // namespace sparse +} // namespace detail +} // namespace distance +} // namespace cuvs diff --git a/cpp/src/distance/detail/sparse/coo_spmv_strategies/dense_smem_strategy.cuh b/cpp/src/distance/detail/sparse/coo_spmv_strategies/dense_smem_strategy.cuh new file mode 100644 index 000000000..baa913a6c --- /dev/null +++ b/cpp/src/distance/detail/sparse/coo_spmv_strategies/dense_smem_strategy.cuh @@ -0,0 +1,121 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include "base_strategy.cuh" + +#include // raft::ceildiv + +namespace cuvs { +namespace distance { +namespace detail { +namespace sparse { + +template +class dense_smem_strategy : public coo_spmv_strategy { + public: + using smem_type = value_t*; + using insert_type = smem_type; + using find_type = smem_type; + + dense_smem_strategy(const distances_config_t& config_) + : coo_spmv_strategy(config_) + { + } + + inline static int smem_per_block(int n_cols) + { + return (n_cols * sizeof(value_t)) + ((1024 / raft::warp_size()) * sizeof(value_t)); + } + + template + void dispatch(value_t* out_dists, + value_idx* coo_rows_b, + product_f product_func, + accum_f accum_func, + write_f write_func, + int chunk_size) + { + auto n_blocks_per_row = raft::ceildiv(this->config.b_nnz, chunk_size * 1024); + auto n_blocks = this->config.a_nrows * n_blocks_per_row; + + mask_row_it a_indptr(this->config.a_indptr, this->config.a_nrows); + + this->_dispatch_base(*this, + this->config.b_ncols, + a_indptr, + out_dists, + coo_rows_b, + product_func, + accum_func, + write_func, + chunk_size, + n_blocks, + n_blocks_per_row); + } + + template + void dispatch_rev(value_t* out_dists, + value_idx* coo_rows_a, + product_f product_func, + accum_f accum_func, + write_f write_func, + int chunk_size) + { + auto n_blocks_per_row = raft::ceildiv(this->config.a_nnz, chunk_size * 1024); + auto n_blocks = this->config.b_nrows * n_blocks_per_row; + + mask_row_it b_indptr(this->config.b_indptr, this->config.b_nrows); + + this->_dispatch_base_rev(*this, + this->config.a_ncols, + b_indptr, + out_dists, + coo_rows_a, + product_func, + accum_func, + write_func, + chunk_size, + n_blocks, + n_blocks_per_row); + } + + __device__ inline insert_type init_insert(smem_type cache, const value_idx& cache_size) + { + for (int k = threadIdx.x; k < cache_size; k += blockDim.x) { + cache[k] = 0.0; + } + return cache; + } + + __device__ inline void insert(insert_type cache, const value_idx& key, const value_t& value) + { + cache[key] = value; + } + + __device__ inline find_type init_find(smem_type cache, const value_idx& cache_size) + { + return cache; + } + + __device__ inline value_t find(find_type cache, const value_idx& key) { return cache[key]; } +}; + +} // namespace sparse +} // namespace detail +} // namespace distance +} // namespace cuvs diff --git a/cpp/src/distance/detail/sparse/coo_spmv_strategies/hash_strategy.cuh b/cpp/src/distance/detail/sparse/coo_spmv_strategies/hash_strategy.cuh new file mode 100644 index 000000000..cf212076b --- /dev/null +++ b/cpp/src/distance/detail/sparse/coo_spmv_strategies/hash_strategy.cuh @@ -0,0 +1,296 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include "base_strategy.cuh" + +#include +#include + +#include +#include +#include + +// this is needed by cuco as key, value must be bitwise comparable. +// compilers don't declare float/double as bitwise comparable +// but that is too strict +// for example, the following is true (or 0): +// float a = 5; +// float b = 5; +// memcmp(&a, &b, sizeof(float)); +CUCO_DECLARE_BITWISE_COMPARABLE(float); +CUCO_DECLARE_BITWISE_COMPARABLE(double); + +namespace cuvs { +namespace distance { +namespace detail { +namespace sparse { + +template +class hash_strategy : public coo_spmv_strategy { + public: + using insert_type = typename cuco::legacy:: + static_map::device_mutable_view; + using smem_type = typename insert_type::slot_type*; + using find_type = + typename cuco::legacy::static_map::device_view; + + hash_strategy(const distances_config_t& config_, + float capacity_threshold_ = 0.5, + int map_size_ = get_map_size()) + : coo_spmv_strategy(config_), + capacity_threshold(capacity_threshold_), + map_size(map_size_) + { + } + + void chunking_needed(const value_idx* indptr, + const value_idx n_rows, + rmm::device_uvector& mask_indptr, + std::tuple& n_rows_divided, + cudaStream_t stream) + { + auto policy = raft::resource::get_thrust_policy(this->config.handle); + + auto less = thrust::copy_if(policy, + thrust::make_counting_iterator(value_idx(0)), + thrust::make_counting_iterator(n_rows), + mask_indptr.data(), + fits_in_hash_table(indptr, 0, capacity_threshold * map_size)); + std::get<0>(n_rows_divided) = less - mask_indptr.data(); + + auto more = thrust::copy_if( + policy, + thrust::make_counting_iterator(value_idx(0)), + thrust::make_counting_iterator(n_rows), + less, + fits_in_hash_table( + indptr, capacity_threshold * map_size, std::numeric_limits::max())); + std::get<1>(n_rows_divided) = more - less; + } + + template + void dispatch(value_t* out_dists, + value_idx* coo_rows_b, + product_f product_func, + accum_f accum_func, + write_f write_func, + int chunk_size) + { + auto n_blocks_per_row = raft::ceildiv(this->config.b_nnz, chunk_size * tpb); + rmm::device_uvector mask_indptr( + this->config.a_nrows, raft::resource::get_cuda_stream(this->config.handle)); + std::tuple n_rows_divided; + + chunking_needed(this->config.a_indptr, + this->config.a_nrows, + mask_indptr, + n_rows_divided, + raft::resource::get_cuda_stream(this->config.handle)); + + auto less_rows = std::get<0>(n_rows_divided); + if (less_rows > 0) { + mask_row_it less(this->config.a_indptr, less_rows, mask_indptr.data()); + + auto n_less_blocks = less_rows * n_blocks_per_row; + this->_dispatch_base(*this, + map_size, + less, + out_dists, + coo_rows_b, + product_func, + accum_func, + write_func, + chunk_size, + n_less_blocks, + n_blocks_per_row); + } + + auto more_rows = std::get<1>(n_rows_divided); + if (more_rows > 0) { + rmm::device_uvector n_chunks_per_row( + more_rows + 1, raft::resource::get_cuda_stream(this->config.handle)); + rmm::device_uvector chunk_indices( + 0, raft::resource::get_cuda_stream(this->config.handle)); + chunked_mask_row_it::init(this->config.a_indptr, + mask_indptr.data() + less_rows, + more_rows, + capacity_threshold * map_size, + n_chunks_per_row, + chunk_indices, + raft::resource::get_cuda_stream(this->config.handle)); + + chunked_mask_row_it more(this->config.a_indptr, + more_rows, + mask_indptr.data() + less_rows, + capacity_threshold * map_size, + n_chunks_per_row.data(), + chunk_indices.data(), + raft::resource::get_cuda_stream(this->config.handle)); + + auto n_more_blocks = more.total_row_blocks * n_blocks_per_row; + this->_dispatch_base(*this, + map_size, + more, + out_dists, + coo_rows_b, + product_func, + accum_func, + write_func, + chunk_size, + n_more_blocks, + n_blocks_per_row); + } + } + + template + void dispatch_rev(value_t* out_dists, + value_idx* coo_rows_a, + product_f product_func, + accum_f accum_func, + write_f write_func, + int chunk_size) + { + auto n_blocks_per_row = raft::ceildiv(this->config.a_nnz, chunk_size * tpb); + rmm::device_uvector mask_indptr( + this->config.b_nrows, raft::resource::get_cuda_stream(this->config.handle)); + std::tuple n_rows_divided; + + chunking_needed(this->config.b_indptr, + this->config.b_nrows, + mask_indptr, + n_rows_divided, + raft::resource::get_cuda_stream(this->config.handle)); + + auto less_rows = std::get<0>(n_rows_divided); + if (less_rows > 0) { + mask_row_it less(this->config.b_indptr, less_rows, mask_indptr.data()); + + auto n_less_blocks = less_rows * n_blocks_per_row; + this->_dispatch_base_rev(*this, + map_size, + less, + out_dists, + coo_rows_a, + product_func, + accum_func, + write_func, + chunk_size, + n_less_blocks, + n_blocks_per_row); + } + + auto more_rows = std::get<1>(n_rows_divided); + if (more_rows > 0) { + rmm::device_uvector n_chunks_per_row( + more_rows + 1, raft::resource::get_cuda_stream(this->config.handle)); + rmm::device_uvector chunk_indices( + 0, raft::resource::get_cuda_stream(this->config.handle)); + chunked_mask_row_it::init(this->config.b_indptr, + mask_indptr.data() + less_rows, + more_rows, + capacity_threshold * map_size, + n_chunks_per_row, + chunk_indices, + raft::resource::get_cuda_stream(this->config.handle)); + + chunked_mask_row_it more(this->config.b_indptr, + more_rows, + mask_indptr.data() + less_rows, + capacity_threshold * map_size, + n_chunks_per_row.data(), + chunk_indices.data(), + raft::resource::get_cuda_stream(this->config.handle)); + + auto n_more_blocks = more.total_row_blocks * n_blocks_per_row; + this->_dispatch_base_rev(*this, + map_size, + more, + out_dists, + coo_rows_a, + product_func, + accum_func, + write_func, + chunk_size, + n_more_blocks, + n_blocks_per_row); + } + } + + __device__ inline insert_type init_insert(smem_type cache, const value_idx& cache_size) + { + return insert_type::make_from_uninitialized_slots(cooperative_groups::this_thread_block(), + cache, + cache_size, + cuco::empty_key{value_idx{-1}}, + cuco::empty_value{value_t{0}}); + } + + __device__ inline void insert(insert_type cache, const value_idx& key, const value_t& value) + { + auto success = cache.insert(cuco::pair(key, value)); + } + + __device__ inline find_type init_find(smem_type cache, const value_idx& cache_size) + { + return find_type( + cache, cache_size, cuco::empty_key{value_idx{-1}}, cuco::empty_value{value_t{0}}); + } + + __device__ inline value_t find(find_type cache, const value_idx& key) + { + auto a_pair = cache.find(key); + + value_t a_col = 0.0; + if (a_pair != cache.end()) { a_col = a_pair->second; } + return a_col; + } + + struct fits_in_hash_table { + public: + fits_in_hash_table(const value_idx* indptr_, value_idx degree_l_, value_idx degree_r_) + : indptr(indptr_), degree_l(degree_l_), degree_r(degree_r_) + { + } + + __host__ __device__ bool operator()(const value_idx& i) + { + auto degree = indptr[i + 1] - indptr[i]; + + return degree >= degree_l && degree < degree_r; + } + + private: + const value_idx* indptr; + const value_idx degree_l, degree_r; + }; + + inline static int get_map_size() + { + return (raft::getSharedMemPerBlock() - ((tpb / raft::warp_size()) * sizeof(value_t))) / + sizeof(typename insert_type::slot_type); + } + + private: + float capacity_threshold; + int map_size; +}; + +} // namespace sparse +} // namespace detail +} // namespace distance +} // namespace cuvs diff --git a/cpp/src/distance/detail/sparse/ip_distance.cuh b/cpp/src/distance/detail/sparse/ip_distance.cuh new file mode 100644 index 000000000..3a11d4e99 --- /dev/null +++ b/cpp/src/distance/detail/sparse/ip_distance.cuh @@ -0,0 +1,89 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include "common.hpp" +#include "coo_spmv.cuh" + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include + +#include + +namespace cuvs { +namespace distance { +namespace detail { +namespace sparse { + +template +class ip_distances_t : public distances_t { + public: + /** + * Computes simple sparse inner product distances as sum(x_y * y_k) + * @param[in] config specifies inputs, outputs, and sizes + */ + ip_distances_t(const distances_config_t& config) + : config_(&config), coo_rows_b(config.b_nnz, raft::resource::get_cuda_stream(config.handle)) + { + raft::sparse::convert::csr_to_coo(config_->b_indptr, + config_->b_nrows, + coo_rows_b.data(), + config_->b_nnz, + raft::resource::get_cuda_stream(config_->handle)); + } + + /** + * Performs pairwise distance computation and computes output distances + * @param out_distances dense output matrix (size a_nrows * b_nrows) + */ + void compute(value_t* out_distances) + { + /** + * Compute pairwise distances and return dense matrix in row-major format + */ + balanced_coo_pairwise_generalized_spmv(out_distances, + *config_, + coo_rows_b.data(), + raft::mul_op(), + raft::add_op(), + raft::atomic_add_op()); + } + + value_idx* b_rows_coo() { return coo_rows_b.data(); } + + value_t* b_data_coo() { return config_->b_data; } + + private: + const distances_config_t* config_; + rmm::device_uvector coo_rows_b; +}; + +} // END namespace sparse +} // END namespace detail +} // END namespace distance +} // END namespace cuvs diff --git a/cpp/src/distance/detail/sparse/l2_distance.cuh b/cpp/src/distance/detail/sparse/l2_distance.cuh new file mode 100644 index 000000000..40e7070fc --- /dev/null +++ b/cpp/src/distance/detail/sparse/l2_distance.cuh @@ -0,0 +1,502 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include "common.hpp" +#include "ip_distance.cuh" +#include + +#include +#include +#include +#include +#include +#include +#include + +#include + +#include +#include + +#include +#include + +namespace cuvs { +namespace distance { +namespace detail { +namespace sparse { + +// @TODO: Move this into sparse prims (coo_norm) +template +RAFT_KERNEL compute_row_norm_kernel(value_t* out, + const value_idx* __restrict__ coo_rows, + const value_t* __restrict__ data, + value_idx nnz) +{ + value_idx i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < nnz) { atomicAdd(&out[coo_rows[i]], data[i] * data[i]); } +} + +template +RAFT_KERNEL compute_row_sum_kernel(value_t* out, + const value_idx* __restrict__ coo_rows, + const value_t* __restrict__ data, + value_idx nnz) +{ + value_idx i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < nnz) { atomicAdd(&out[coo_rows[i]], data[i]); } +} + +template +RAFT_KERNEL compute_euclidean_warp_kernel(value_t* __restrict__ C, + const value_t* __restrict__ Q_sq_norms, + const value_t* __restrict__ R_sq_norms, + value_idx n_rows, + value_idx n_cols, + expansion_f expansion_func) +{ + std::size_t tid = blockDim.x * blockIdx.x + threadIdx.x; + value_idx i = tid / n_cols; + value_idx j = tid % n_cols; + + if (i >= n_rows || j >= n_cols) return; + + value_t dot = C[(size_t)i * n_cols + j]; + + // e.g. Euclidean expansion func = -2.0 * dot + q_norm + r_norm + value_t val = expansion_func(dot, Q_sq_norms[i], R_sq_norms[j]); + + // correct for small instabilities + C[(size_t)i * n_cols + j] = val * (fabs(val) >= 0.0001); +} + +template +RAFT_KERNEL compute_correlation_warp_kernel(value_t* __restrict__ C, + const value_t* __restrict__ Q_sq_norms, + const value_t* __restrict__ R_sq_norms, + const value_t* __restrict__ Q_norms, + const value_t* __restrict__ R_norms, + value_idx n_rows, + value_idx n_cols, + value_idx n) +{ + std::size_t tid = blockDim.x * blockIdx.x + threadIdx.x; + value_idx i = tid / n_cols; + value_idx j = tid % n_cols; + + if (i >= n_rows || j >= n_cols) return; + + value_t dot = C[(size_t)i * n_cols + j]; + value_t Q_l1 = Q_norms[i]; + value_t R_l1 = R_norms[j]; + + value_t Q_l2 = Q_sq_norms[i]; + value_t R_l2 = R_sq_norms[j]; + + value_t numer = n * dot - (Q_l1 * R_l1); + value_t Q_denom = n * Q_l2 - (Q_l1 * Q_l1); + value_t R_denom = n * R_l2 - (R_l1 * R_l1); + + value_t val = 1 - (numer / raft::sqrt(Q_denom * R_denom)); + + // correct for small instabilities + C[(size_t)i * n_cols + j] = val * (fabs(val) >= 0.0001); +} + +template +void compute_euclidean(value_t* C, + const value_t* Q_sq_norms, + const value_t* R_sq_norms, + value_idx n_rows, + value_idx n_cols, + cudaStream_t stream, + expansion_f expansion_func) +{ + int blocks = raft::ceildiv((size_t)n_rows * n_cols, tpb); + compute_euclidean_warp_kernel<<>>( + C, Q_sq_norms, R_sq_norms, n_rows, n_cols, expansion_func); +} + +template +void compute_l2(value_t* out, + const value_idx* Q_coo_rows, + const value_t* Q_data, + value_idx Q_nnz, + const value_idx* R_coo_rows, + const value_t* R_data, + value_idx R_nnz, + value_idx m, + value_idx n, + cudaStream_t stream, + expansion_f expansion_func) +{ + rmm::device_uvector Q_sq_norms(m, stream); + rmm::device_uvector R_sq_norms(n, stream); + RAFT_CUDA_TRY(cudaMemsetAsync(Q_sq_norms.data(), 0, Q_sq_norms.size() * sizeof(value_t))); + RAFT_CUDA_TRY(cudaMemsetAsync(R_sq_norms.data(), 0, R_sq_norms.size() * sizeof(value_t))); + + compute_row_norm_kernel<<>>( + Q_sq_norms.data(), Q_coo_rows, Q_data, Q_nnz); + compute_row_norm_kernel<<>>( + R_sq_norms.data(), R_coo_rows, R_data, R_nnz); + + compute_euclidean(out, Q_sq_norms.data(), R_sq_norms.data(), m, n, stream, expansion_func); +} + +template +void compute_correlation(value_t* C, + const value_t* Q_sq_norms, + const value_t* R_sq_norms, + const value_t* Q_norms, + const value_t* R_norms, + value_idx n_rows, + value_idx n_cols, + value_idx n, + cudaStream_t stream) +{ + int blocks = raft::ceildiv((size_t)n_rows * n_cols, tpb); + compute_correlation_warp_kernel<<>>( + C, Q_sq_norms, R_sq_norms, Q_norms, R_norms, n_rows, n_cols, n); +} + +template +void compute_corr(value_t* out, + const value_idx* Q_coo_rows, + const value_t* Q_data, + value_idx Q_nnz, + const value_idx* R_coo_rows, + const value_t* R_data, + value_idx R_nnz, + value_idx m, + value_idx n, + value_idx n_cols, + cudaStream_t stream) +{ + // sum_sq for std dev + rmm::device_uvector Q_sq_norms(m, stream); + rmm::device_uvector R_sq_norms(n, stream); + + // sum for mean + rmm::device_uvector Q_norms(m, stream); + rmm::device_uvector R_norms(n, stream); + + RAFT_CUDA_TRY(cudaMemsetAsync(Q_sq_norms.data(), 0, Q_sq_norms.size() * sizeof(value_t))); + RAFT_CUDA_TRY(cudaMemsetAsync(R_sq_norms.data(), 0, R_sq_norms.size() * sizeof(value_t))); + + RAFT_CUDA_TRY(cudaMemsetAsync(Q_norms.data(), 0, Q_norms.size() * sizeof(value_t))); + RAFT_CUDA_TRY(cudaMemsetAsync(R_norms.data(), 0, R_norms.size() * sizeof(value_t))); + + compute_row_norm_kernel<<>>( + Q_sq_norms.data(), Q_coo_rows, Q_data, Q_nnz); + compute_row_norm_kernel<<>>( + R_sq_norms.data(), R_coo_rows, R_data, R_nnz); + + compute_row_sum_kernel<<>>( + Q_norms.data(), Q_coo_rows, Q_data, Q_nnz); + compute_row_sum_kernel<<>>( + R_norms.data(), R_coo_rows, R_data, R_nnz); + + compute_correlation(out, + Q_sq_norms.data(), + R_sq_norms.data(), + Q_norms.data(), + R_norms.data(), + m, + n, + n_cols, + stream); +} + +/** + * L2 distance using the expanded form: sum(x_k)^2 + sum(y_k)^2 - 2 * sum(x_k * y_k) + * The expanded form is more efficient for sparse data. + */ +template +class l2_expanded_distances_t : public distances_t { + public: + explicit l2_expanded_distances_t(const distances_config_t& config) + : config_(&config), ip_dists(config) + { + } + + void compute(value_t* out_dists) + { + ip_dists.compute(out_dists); + + value_idx* b_indices = ip_dists.b_rows_coo(); + value_t* b_data = ip_dists.b_data_coo(); + + rmm::device_uvector search_coo_rows( + config_->a_nnz, raft::resource::get_cuda_stream(config_->handle)); + raft::sparse::convert::csr_to_coo(config_->a_indptr, + config_->a_nrows, + search_coo_rows.data(), + config_->a_nnz, + raft::resource::get_cuda_stream(config_->handle)); + + compute_l2(out_dists, + search_coo_rows.data(), + config_->a_data, + config_->a_nnz, + b_indices, + b_data, + config_->b_nnz, + config_->a_nrows, + config_->b_nrows, + raft::resource::get_cuda_stream(config_->handle), + [] __device__ __host__(value_t dot, value_t q_norm, value_t r_norm) { + return -2 * dot + q_norm + r_norm; + }); + } + + ~l2_expanded_distances_t() = default; + + protected: + const distances_config_t* config_; + ip_distances_t ip_dists; +}; + +/** + * L2 sqrt distance performing the sqrt operation after the distance computation + * The expanded form is more efficient for sparse data. + */ +template +class l2_sqrt_expanded_distances_t : public l2_expanded_distances_t { + public: + explicit l2_sqrt_expanded_distances_t(const distances_config_t& config) + : l2_expanded_distances_t(config) + { + } + + void compute(value_t* out_dists) override + { + l2_expanded_distances_t::compute(out_dists); + // Sqrt Post-processing + raft::linalg::unaryOp( + out_dists, + out_dists, + this->config_->a_nrows * this->config_->b_nrows, + [] __device__(value_t input) { + int neg = input < 0 ? -1 : 1; + return raft::sqrt(abs(input) * neg); + }, + raft::resource::get_cuda_stream(this->config_->handle)); + } + + ~l2_sqrt_expanded_distances_t() = default; +}; + +template +class correlation_expanded_distances_t : public distances_t { + public: + explicit correlation_expanded_distances_t(const distances_config_t& config) + : config_(&config), ip_dists(config) + { + } + + void compute(value_t* out_dists) + { + ip_dists.compute(out_dists); + + value_idx* b_indices = ip_dists.b_rows_coo(); + value_t* b_data = ip_dists.b_data_coo(); + + rmm::device_uvector search_coo_rows( + config_->a_nnz, raft::resource::get_cuda_stream(config_->handle)); + raft::sparse::convert::csr_to_coo(config_->a_indptr, + config_->a_nrows, + search_coo_rows.data(), + config_->a_nnz, + raft::resource::get_cuda_stream(config_->handle)); + + compute_corr(out_dists, + search_coo_rows.data(), + config_->a_data, + config_->a_nnz, + b_indices, + b_data, + config_->b_nnz, + config_->a_nrows, + config_->b_nrows, + config_->b_ncols, + raft::resource::get_cuda_stream(config_->handle)); + } + + ~correlation_expanded_distances_t() = default; + + protected: + const distances_config_t* config_; + ip_distances_t ip_dists; +}; + +/** + * Cosine distance using the expanded form: 1 - ( sum(x_k * y_k) / (sqrt(sum(x_k)^2) * + * sqrt(sum(y_k)^2))) The expanded form is more efficient for sparse data. + */ +template +class cosine_expanded_distances_t : public distances_t { + public: + explicit cosine_expanded_distances_t(const distances_config_t& config) + : config_(&config), + workspace(0, raft::resource::get_cuda_stream(config.handle)), + ip_dists(config) + { + } + + void compute(value_t* out_dists) + { + ip_dists.compute(out_dists); + + value_idx* b_indices = ip_dists.b_rows_coo(); + value_t* b_data = ip_dists.b_data_coo(); + + rmm::device_uvector search_coo_rows( + config_->a_nnz, raft::resource::get_cuda_stream(config_->handle)); + raft::sparse::convert::csr_to_coo(config_->a_indptr, + config_->a_nrows, + search_coo_rows.data(), + config_->a_nnz, + raft::resource::get_cuda_stream(config_->handle)); + + compute_l2(out_dists, + search_coo_rows.data(), + config_->a_data, + config_->a_nnz, + b_indices, + b_data, + config_->b_nnz, + config_->a_nrows, + config_->b_nrows, + raft::resource::get_cuda_stream(config_->handle), + [] __device__ __host__(value_t dot, value_t q_norm, value_t r_norm) { + value_t norms = raft::sqrt(q_norm) * raft::sqrt(r_norm); + // deal with potential for 0 in denominator by forcing 0/1 instead + value_t cos = ((norms != 0) * dot) / ((norms == 0) + norms); + + // flip the similarity when both rows are 0 + bool both_empty = (q_norm == 0) && (r_norm == 0); + return 1 - ((!both_empty * cos) + both_empty); + }); + } + + ~cosine_expanded_distances_t() = default; + + private: + const distances_config_t* config_; + rmm::device_uvector workspace; + ip_distances_t ip_dists; +}; + +/** + * Hellinger distance using the expanded form: sqrt(1 - sum(sqrt(x_k) * sqrt(y_k))) + * The expanded form is more efficient for sparse data. + * + * This distance computation modifies A and B by computing a sqrt + * and then performing a `pow(x, 2)` to convert it back. Because of this, + * it is possible that the values in A and B might differ slightly + * after this is invoked. + */ +template +class hellinger_expanded_distances_t : public distances_t { + public: + explicit hellinger_expanded_distances_t(const distances_config_t& config) + : config_(&config), workspace(0, raft::resource::get_cuda_stream(config.handle)) + { + } + + void compute(value_t* out_dists) + { + rmm::device_uvector coo_rows(std::max(config_->b_nnz, config_->a_nnz), + raft::resource::get_cuda_stream(config_->handle)); + + raft::sparse::convert::csr_to_coo(config_->b_indptr, + config_->b_nrows, + coo_rows.data(), + config_->b_nnz, + raft::resource::get_cuda_stream(config_->handle)); + + balanced_coo_pairwise_generalized_spmv( + out_dists, + *config_, + coo_rows.data(), + [] __device__(value_t a, value_t b) { return raft::sqrt(a) * raft::sqrt(b); }, + raft::add_op(), + raft::atomic_add_op()); + + raft::linalg::unaryOp( + out_dists, + out_dists, + config_->a_nrows * config_->b_nrows, + [=] __device__(value_t input) { + // Adjust to replace NaN in sqrt with 0 if input to sqrt is negative + bool rectifier = (1 - input) > 0; + return raft::sqrt(rectifier * (1 - input)); + }, + raft::resource::get_cuda_stream(config_->handle)); + } + + ~hellinger_expanded_distances_t() = default; + + private: + const distances_config_t* config_; + rmm::device_uvector workspace; +}; + +template +class russelrao_expanded_distances_t : public distances_t { + public: + explicit russelrao_expanded_distances_t(const distances_config_t& config) + : config_(&config), + workspace(0, raft::resource::get_cuda_stream(config.handle)), + ip_dists(config) + { + } + + void compute(value_t* out_dists) + { + ip_dists.compute(out_dists); + + value_t n_cols = config_->a_ncols; + value_t n_cols_inv = 1.0 / n_cols; + raft::linalg::unaryOp( + out_dists, + out_dists, + config_->a_nrows * config_->b_nrows, + [=] __device__(value_t input) { return (n_cols - input) * n_cols_inv; }, + raft::resource::get_cuda_stream(config_->handle)); + + auto exec_policy = rmm::exec_policy(raft::resource::get_cuda_stream(config_->handle)); + auto diags = thrust::counting_iterator(0); + value_idx b_nrows = config_->b_nrows; + thrust::for_each(exec_policy, diags, diags + config_->a_nrows, [=] __device__(value_idx input) { + out_dists[input * b_nrows + input] = 0.0; + }); + } + + ~russelrao_expanded_distances_t() = default; + + private: + const distances_config_t* config_; + rmm::device_uvector workspace; + ip_distances_t ip_dists; +}; + +} // END namespace sparse +} // END namespace detail +} // END namespace distance +} // END namespace cuvs diff --git a/cpp/src/distance/detail/sparse/lp_distance.cuh b/cpp/src/distance/detail/sparse/lp_distance.cuh new file mode 100644 index 000000000..18e7b04e4 --- /dev/null +++ b/cpp/src/distance/detail/sparse/lp_distance.cuh @@ -0,0 +1,333 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include "common.hpp" + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include + +#include +#include + +namespace cuvs { +namespace distance { +namespace detail { +namespace sparse { + +template +void unexpanded_lp_distances(value_t* out_dists, + const distances_config_t* config_, + product_f product_func, + accum_f accum_func, + write_f write_func) +{ + rmm::device_uvector coo_rows(std::max(config_->b_nnz, config_->a_nnz), + raft::resource::get_cuda_stream(config_->handle)); + + raft::sparse::convert::csr_to_coo(config_->b_indptr, + config_->b_nrows, + coo_rows.data(), + config_->b_nnz, + raft::resource::get_cuda_stream(config_->handle)); + + balanced_coo_pairwise_generalized_spmv( + out_dists, *config_, coo_rows.data(), product_func, accum_func, write_func); + + raft::sparse::convert::csr_to_coo(config_->a_indptr, + config_->a_nrows, + coo_rows.data(), + config_->a_nnz, + raft::resource::get_cuda_stream(config_->handle)); + + balanced_coo_pairwise_generalized_spmv_rev( + out_dists, *config_, coo_rows.data(), product_func, accum_func, write_func); +} + +/** + * Computes L1 distances for sparse input. This does not have + * an equivalent expanded form, so it is only executed in + * an unexpanded form. + * @tparam value_idx + * @tparam value_t + */ +template +class l1_unexpanded_distances_t : public distances_t { + public: + l1_unexpanded_distances_t(const distances_config_t& config) : config_(&config) + { + } + + void compute(value_t* out_dists) + { + unexpanded_lp_distances( + out_dists, config_, raft::absdiff_op(), raft::add_op(), raft::atomic_add_op()); + } + + private: + const distances_config_t* config_; +}; + +template +class l2_unexpanded_distances_t : public distances_t { + public: + l2_unexpanded_distances_t(const distances_config_t& config) : config_(&config) + { + } + + void compute(value_t* out_dists) + { + unexpanded_lp_distances( + out_dists, config_, raft::sqdiff_op(), raft::add_op(), raft::atomic_add_op()); + } + + protected: + const distances_config_t* config_; +}; + +template +class l2_sqrt_unexpanded_distances_t : public l2_unexpanded_distances_t { + public: + l2_sqrt_unexpanded_distances_t(const distances_config_t& config) + : l2_unexpanded_distances_t(config) + { + } + + void compute(value_t* out_dists) + { + l2_unexpanded_distances_t::compute(out_dists); + + uint64_t n = (uint64_t)this->config_->a_nrows * (uint64_t)this->config_->b_nrows; + // Sqrt Post-processing + raft::linalg::unaryOp( + out_dists, + out_dists, + n, + [] __device__(value_t input) { + int neg = input < 0 ? -1 : 1; + return raft::sqrt(abs(input) * neg); + }, + raft::resource::get_cuda_stream(this->config_->handle)); + } +}; + +template +class linf_unexpanded_distances_t : public distances_t { + public: + explicit linf_unexpanded_distances_t(const distances_config_t& config) + : config_(&config) + { + } + + void compute(value_t* out_dists) + { + unexpanded_lp_distances( + out_dists, config_, raft::absdiff_op(), raft::max_op(), raft::atomic_max_op()); + } + + private: + const distances_config_t* config_; +}; + +template +class canberra_unexpanded_distances_t : public distances_t { + public: + explicit canberra_unexpanded_distances_t(const distances_config_t& config) + : config_(&config) + { + } + + void compute(value_t* out_dists) + { + unexpanded_lp_distances( + out_dists, + config_, + [] __device__(value_t a, value_t b) { + value_t d = fabs(a) + fabs(b); + + // deal with potential for 0 in denominator by + // forcing 1/0 instead + return ((d != 0) * fabs(a - b)) / (d + (d == 0)); + }, + raft::add_op(), + raft::atomic_add_op()); + } + + private: + const distances_config_t* config_; +}; + +template +class lp_unexpanded_distances_t : public distances_t { + public: + explicit lp_unexpanded_distances_t(const distances_config_t& config, + value_t p_) + : config_(&config), p(p_) + { + } + + void compute(value_t* out_dists) + { + unexpanded_lp_distances( + out_dists, + config_, + raft::compose_op(raft::pow_const_op(p), raft::sub_op()), + raft::add_op(), + raft::atomic_add_op()); + + uint64_t n = (uint64_t)this->config_->a_nrows * (uint64_t)this->config_->b_nrows; + value_t one_over_p = value_t{1} / p; + raft::linalg::unaryOp(out_dists, + out_dists, + n, + raft::pow_const_op(one_over_p), + raft::resource::get_cuda_stream(config_->handle)); + } + + private: + const distances_config_t* config_; + value_t p; +}; + +template +class hamming_unexpanded_distances_t : public distances_t { + public: + explicit hamming_unexpanded_distances_t(const distances_config_t& config) + : config_(&config) + { + } + + void compute(value_t* out_dists) + { + unexpanded_lp_distances( + out_dists, config_, raft::notequal_op(), raft::add_op(), raft::atomic_add_op()); + + uint64_t n = (uint64_t)config_->a_nrows * (uint64_t)config_->b_nrows; + value_t n_cols = 1.0 / config_->a_ncols; + raft::linalg::unaryOp(out_dists, + out_dists, + n, + raft::mul_const_op(n_cols), + raft::resource::get_cuda_stream(config_->handle)); + } + + private: + const distances_config_t* config_; +}; + +template +class jensen_shannon_unexpanded_distances_t : public distances_t { + public: + explicit jensen_shannon_unexpanded_distances_t( + const distances_config_t& config) + : config_(&config) + { + } + + void compute(value_t* out_dists) + { + unexpanded_lp_distances( + out_dists, + config_, + [] __device__(value_t a, value_t b) { + value_t m = 0.5f * (a + b); + bool a_zero = a == 0; + bool b_zero = b == 0; + + value_t x = (!a_zero * m) / (a_zero + a); + value_t y = (!b_zero * m) / (b_zero + b); + + bool x_zero = x == 0; + bool y_zero = y == 0; + + return (-a * (!x_zero * log(x + x_zero))) + (-b * (!y_zero * log(y + y_zero))); + }, + raft::add_op(), + raft::atomic_add_op()); + + uint64_t n = (uint64_t)this->config_->a_nrows * (uint64_t)this->config_->b_nrows; + raft::linalg::unaryOp( + out_dists, + out_dists, + n, + [=] __device__(value_t input) { return raft::sqrt(0.5 * input); }, + raft::resource::get_cuda_stream(config_->handle)); + } + + private: + const distances_config_t* config_; +}; + +template +class kl_divergence_unexpanded_distances_t : public distances_t { + public: + explicit kl_divergence_unexpanded_distances_t( + const distances_config_t& config) + : config_(&config) + { + } + + void compute(value_t* out_dists) + { + rmm::device_uvector coo_rows(std::max(config_->b_nnz, config_->a_nnz), + raft::resource::get_cuda_stream(config_->handle)); + + raft::sparse::convert::csr_to_coo(config_->b_indptr, + config_->b_nrows, + coo_rows.data(), + config_->b_nnz, + raft::resource::get_cuda_stream(config_->handle)); + + balanced_coo_pairwise_generalized_spmv( + out_dists, + *config_, + coo_rows.data(), + [] __device__(value_t a, value_t b) { return a * log(a / b); }, + raft::add_op(), + raft::atomic_add_op()); + + uint64_t n = (uint64_t)this->config_->a_nrows * (uint64_t)this->config_->b_nrows; + raft::linalg::unaryOp(out_dists, + out_dists, + n, + raft::mul_const_op(0.5), + raft::resource::get_cuda_stream(config_->handle)); + } + + private: + const distances_config_t* config_; +}; + +} // END namespace sparse +} // END namespace detail +} // END namespace distance +} // END namespace cuvs diff --git a/cpp/src/distance/detail/sparse/utils.cuh b/cpp/src/distance/detail/sparse/utils.cuh new file mode 100644 index 000000000..dc7ae6df6 --- /dev/null +++ b/cpp/src/distance/detail/sparse/utils.cuh @@ -0,0 +1,171 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +#include +#include +#include + +namespace cuvs { +namespace distance { +namespace detail { +namespace sparse { + +/** + * Computes the maximum number of columns that can be stored + * in shared memory in dense form with the given block size + * and precision. + * @return the maximum number of columns that can be stored in smem + */ +template +inline int max_cols_per_block() +{ + // max cols = (total smem available - cub reduction smem) + return (raft::getSharedMemPerBlock() - ((tpb / raft::warp_size()) * sizeof(value_t))) / + sizeof(value_t); +} + +template +RAFT_KERNEL faster_dot_on_csr_kernel(dot_t* __restrict__ dot, + const value_idx* __restrict__ indptr, + const value_idx* __restrict__ cols, + const value_t* __restrict__ A, + const value_t* __restrict__ B, + const value_idx nnz, + const value_idx n_rows, + const value_idx dim) +{ + auto vec_id = threadIdx.x; + auto lane_id = threadIdx.x & 0x1f; + + extern __shared__ char smem[]; + value_t* s_A = (value_t*)smem; + value_idx cur_row = -1; + + for (int row = blockIdx.x; row < n_rows; row += gridDim.x) { + for (int dot_id = blockIdx.y + indptr[row]; dot_id < indptr[row + 1]; dot_id += gridDim.y) { + if (dot_id >= nnz) { return; } + const value_idx col = cols[dot_id] * dim; + const value_t* __restrict__ B_col = B + col; + + if (threadIdx.x == 0) { dot[dot_id] = 0.0; } + __syncthreads(); + + if (cur_row != row) { + for (value_idx k = vec_id; k < dim; k += blockDim.x) { + s_A[k] = A[row * dim + k]; + } + cur_row = row; + } + + dot_t l_dot_ = 0.0; + for (value_idx k = vec_id; k < dim; k += blockDim.x) { + asm("prefetch.global.L2 [%0];" ::"l"(B_col + k + blockDim.x)); + if constexpr ((std::is_same_v && std::is_same_v)) { + l_dot_ += __half2float(s_A[k]) * __half2float(__ldcg(B_col + k)); + } else { + l_dot_ += s_A[k] * __ldcg(B_col + k); + } + } + + typedef cub::WarpReduce WarpReduce; + __shared__ typename WarpReduce::TempStorage temp_storage; + dot_t warp_sum = WarpReduce(temp_storage).Sum(l_dot_); + + if (lane_id == 0) { atomicAdd_block(dot + dot_id, warp_sum); } + } + } +} + +template +void faster_dot_on_csr(raft::resources const& handle, + dot_t* dot, + const value_idx nnz, + const value_idx* indptr, + const value_idx* cols, + const value_t* A, + const value_t* B, + const value_idx n_rows, + const value_idx dim) +{ + if (nnz == 0 || n_rows == 0) return; + + auto stream = raft::resource::get_cuda_stream(handle); + + constexpr value_idx MAX_ROW_PER_ITER = 500; + int dev_id, sm_count, blocks_per_sm; + + const int smem_size = dim * sizeof(value_t); + cudaGetDevice(&dev_id); + cudaDeviceGetAttribute(&sm_count, cudaDevAttrMultiProcessorCount, dev_id); + + if (dim < 128) { + constexpr int tpb = 64; + cudaOccupancyMaxActiveBlocksPerMultiprocessor( + &blocks_per_sm, faster_dot_on_csr_kernel, tpb, smem_size); + auto block_x = std::min(n_rows, MAX_ROW_PER_ITER); + auto block_y = + (std::min(value_idx(blocks_per_sm * sm_count * 16), nnz) + block_x - 1) / block_x; + dim3 blocks(block_x, block_y, 1); + + faster_dot_on_csr_kernel + <<>>(dot, indptr, cols, A, B, nnz, n_rows, dim); + + } else if (dim < 256) { + constexpr int tpb = 128; + cudaOccupancyMaxActiveBlocksPerMultiprocessor( + &blocks_per_sm, faster_dot_on_csr_kernel, tpb, smem_size); + auto block_x = std::min(n_rows, MAX_ROW_PER_ITER); + auto block_y = + (std::min(value_idx(blocks_per_sm * sm_count * 16), nnz) + block_x - 1) / block_x; + dim3 blocks(block_x, block_y, 1); + + faster_dot_on_csr_kernel + <<>>(dot, indptr, cols, A, B, nnz, n_rows, dim); + } else if (dim < 512) { + constexpr int tpb = 256; + cudaOccupancyMaxActiveBlocksPerMultiprocessor( + &blocks_per_sm, faster_dot_on_csr_kernel, tpb, smem_size); + auto block_x = std::min(n_rows, MAX_ROW_PER_ITER); + auto block_y = + (std::min(value_idx(blocks_per_sm * sm_count * 16), nnz) + block_x - 1) / block_x; + dim3 blocks(block_x, block_y, 1); + + faster_dot_on_csr_kernel + <<>>(dot, indptr, cols, A, B, nnz, n_rows, dim); + } else { + constexpr int tpb = 512; + cudaOccupancyMaxActiveBlocksPerMultiprocessor( + &blocks_per_sm, faster_dot_on_csr_kernel, tpb, smem_size); + auto block_x = std::min(n_rows, MAX_ROW_PER_ITER); + auto block_y = + (std::min(value_idx(blocks_per_sm * sm_count * 16), nnz) + block_x - 1) / block_x; + dim3 blocks(block_x, block_y, 1); + + faster_dot_on_csr_kernel + <<>>(dot, indptr, cols, A, B, nnz, n_rows, dim); + } + + RAFT_CUDA_TRY(cudaPeekAtLastError()); +} + +} // namespace sparse +} // namespace detail +} // namespace distance +} // namespace cuvs diff --git a/cpp/src/distance/sparse_distance.cu b/cpp/src/distance/sparse_distance.cu new file mode 100644 index 000000000..338c4e908 --- /dev/null +++ b/cpp/src/distance/sparse_distance.cu @@ -0,0 +1,85 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include + +#include "sparse_distance.cuh" + +namespace cuvs { +namespace distance { + +template +void pairwise_distance( + raft::resources const& handle, + raft::device_csr_matrix_view x, + raft::device_csr_matrix_view y, + raft::device_matrix_view dist, + cuvs::distance::DistanceType metric, + float metric_arg = 2.0f) +{ + auto x_structure = x.structure_view(); + auto y_structure = y.structure_view(); + + RAFT_EXPECTS(x_structure.get_n_cols() == y_structure.get_n_cols(), + "Number of columns must be equal"); + + RAFT_EXPECTS(dist.extent(0) == x_structure.get_n_rows(), + "Number of rows in output must be equal to " + "number of rows in X"); + RAFT_EXPECTS(dist.extent(1) == y_structure.get_n_rows(), + "Number of columns in output must be equal to " + "number of rows in Y"); + + detail::sparse::distances_config_t input_config(handle); + input_config.a_nrows = x_structure.get_n_rows(); + input_config.a_ncols = x_structure.get_n_cols(); + input_config.a_nnz = x_structure.get_nnz(); + input_config.a_indptr = const_cast(x_structure.get_indptr().data()); + input_config.a_indices = const_cast(x_structure.get_indices().data()); + input_config.a_data = const_cast(x.get_elements().data()); + + input_config.b_nrows = y_structure.get_n_rows(); + input_config.b_ncols = y_structure.get_n_cols(); + input_config.b_nnz = y_structure.get_nnz(); + input_config.b_indptr = const_cast(y_structure.get_indptr().data()); + input_config.b_indices = const_cast(y_structure.get_indices().data()); + input_config.b_data = const_cast(y.get_elements().data()); + + pairwiseDistance(dist.data_handle(), input_config, metric, metric_arg); +} + +void pairwise_distance(raft::resources const& handle, + raft::device_csr_matrix_view x, + raft::device_csr_matrix_view y, + raft::device_matrix_view dist, + cuvs::distance::DistanceType metric, + float metric_arg) +{ + pairwise_distance(handle, x, y, dist, metric, metric_arg); +} + +void pairwise_distance(raft::resources const& handle, + raft::device_csr_matrix_view x, + raft::device_csr_matrix_view y, + raft::device_matrix_view dist, + cuvs::distance::DistanceType metric, + float metric_arg) +{ + pairwise_distance(handle, x, y, dist, metric, metric_arg); +} +} // namespace distance +} // namespace cuvs diff --git a/cpp/src/distance/sparse_distance.cuh b/cpp/src/distance/sparse_distance.cuh new file mode 100644 index 000000000..0d6dc0e6f --- /dev/null +++ b/cpp/src/distance/sparse_distance.cuh @@ -0,0 +1,115 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include "detail/sparse/bin_distance.cuh" +#include "detail/sparse/common.hpp" +#include "detail/sparse/ip_distance.cuh" +#include "detail/sparse/l2_distance.cuh" +#include "detail/sparse/lp_distance.cuh" + +#include + +#include + +#include + +namespace cuvs { +namespace distance { +/** + * Compute pairwise distances between A and B, using the provided + * input configuration and distance function. + * + * @tparam value_idx index type + * @tparam value_t value type + * @param[out] out dense output array (size A.nrows * B.nrows) + * @param[in] input_config input argument configuration + * @param[in] metric distance metric to use + * @param[in] metric_arg metric argument (used for Minkowski distance) + */ +template +void pairwiseDistance(value_t* out, + detail::sparse::distances_config_t input_config, + cuvs::distance::DistanceType metric, + float metric_arg) +{ + switch (metric) { + case cuvs::distance::DistanceType::L2Expanded: + detail::sparse::l2_expanded_distances_t(input_config).compute(out); + break; + case cuvs::distance::DistanceType::L2SqrtExpanded: + detail::sparse::l2_sqrt_expanded_distances_t(input_config).compute(out); + break; + case cuvs::distance::DistanceType::InnerProduct: + detail::sparse::ip_distances_t(input_config).compute(out); + break; + case cuvs::distance::DistanceType::L2Unexpanded: + detail::sparse::l2_unexpanded_distances_t(input_config).compute(out); + break; + case cuvs::distance::DistanceType::L2SqrtUnexpanded: + detail::sparse::l2_sqrt_unexpanded_distances_t(input_config).compute(out); + break; + case cuvs::distance::DistanceType::L1: + detail::sparse::l1_unexpanded_distances_t(input_config).compute(out); + break; + case cuvs::distance::DistanceType::LpUnexpanded: + detail::sparse::lp_unexpanded_distances_t(input_config, metric_arg) + .compute(out); + break; + case cuvs::distance::DistanceType::Linf: + detail::sparse::linf_unexpanded_distances_t(input_config).compute(out); + break; + case cuvs::distance::DistanceType::Canberra: + detail::sparse::canberra_unexpanded_distances_t(input_config) + .compute(out); + break; + case cuvs::distance::DistanceType::JaccardExpanded: + detail::sparse::jaccard_expanded_distances_t(input_config).compute(out); + break; + case cuvs::distance::DistanceType::CosineExpanded: + detail::sparse::cosine_expanded_distances_t(input_config).compute(out); + break; + case cuvs::distance::DistanceType::HellingerExpanded: + detail::sparse::hellinger_expanded_distances_t(input_config).compute(out); + break; + case cuvs::distance::DistanceType::DiceExpanded: + detail::sparse::dice_expanded_distances_t(input_config).compute(out); + break; + case cuvs::distance::DistanceType::CorrelationExpanded: + detail::sparse::correlation_expanded_distances_t(input_config) + .compute(out); + break; + case cuvs::distance::DistanceType::RusselRaoExpanded: + detail::sparse::russelrao_expanded_distances_t(input_config).compute(out); + break; + case cuvs::distance::DistanceType::HammingUnexpanded: + detail::sparse::hamming_unexpanded_distances_t(input_config).compute(out); + break; + case cuvs::distance::DistanceType::JensenShannon: + detail::sparse::jensen_shannon_unexpanded_distances_t(input_config) + .compute(out); + break; + case cuvs::distance::DistanceType::KLDivergence: + detail::sparse::kl_divergence_unexpanded_distances_t(input_config) + .compute(out); + break; + + default: THROW("Unsupported distance: %d", metric); + } +} +}; // namespace distance +}; // namespace cuvs diff --git a/cpp/src/neighbors/detail/sparse_knn.cuh b/cpp/src/neighbors/detail/sparse_knn.cuh new file mode 100644 index 000000000..9c8e971b9 --- /dev/null +++ b/cpp/src/neighbors/detail/sparse_knn.cuh @@ -0,0 +1,437 @@ +/* + * Copyright (c) 2020-2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once +#include "../../distance/sparse_distance.cuh" +#include "knn_merge_parts.cuh" +#include + +#include +#include + +#include + +#include +#include +#include +#include +#include +#include + +#include + +#include + +namespace cuvs::neighbors::detail { + +template +struct csr_batcher_t { + csr_batcher_t(value_idx batch_size, + value_idx n_rows, + const value_idx* csr_indptr, + const value_idx* csr_indices, + const value_t* csr_data) + : batch_start_(0), + batch_stop_(0), + batch_rows_(0), + total_rows_(n_rows), + batch_size_(batch_size), + csr_indptr_(csr_indptr), + csr_indices_(csr_indices), + csr_data_(csr_data), + batch_csr_start_offset_(0), + batch_csr_stop_offset_(0) + { + } + + void set_batch(int batch_num) + { + batch_start_ = batch_num * batch_size_; + batch_stop_ = batch_start_ + batch_size_ - 1; // zero-based indexing + + if (batch_stop_ >= total_rows_) batch_stop_ = total_rows_ - 1; // zero-based indexing + + batch_rows_ = (batch_stop_ - batch_start_) + 1; + } + + value_idx get_batch_csr_indptr_nnz(value_idx* batch_indptr, cudaStream_t stream) + { + raft::sparse::op::csr_row_slice_indptr(batch_start_, + batch_stop_, + csr_indptr_, + batch_indptr, + &batch_csr_start_offset_, + &batch_csr_stop_offset_, + stream); + + return batch_csr_stop_offset_ - batch_csr_start_offset_; + } + + void get_batch_csr_indices_data(value_idx* csr_indices, value_t* csr_data, cudaStream_t stream) + { + raft::sparse::op::csr_row_slice_populate(batch_csr_start_offset_, + batch_csr_stop_offset_, + csr_indices_, + csr_data_, + csr_indices, + csr_data, + stream); + } + + value_idx batch_rows() const { return batch_rows_; } + + value_idx batch_start() const { return batch_start_; } + + value_idx batch_stop() const { return batch_stop_; } + + private: + value_idx batch_size_; + value_idx batch_start_; + value_idx batch_stop_; + value_idx batch_rows_; + + value_idx total_rows_; + + const value_idx* csr_indptr_; + const value_idx* csr_indices_; + const value_t* csr_data_; + + value_idx batch_csr_start_offset_; + value_idx batch_csr_stop_offset_; +}; + +template +class sparse_knn_t { + public: + sparse_knn_t(const value_idx* idxIndptr_, + const value_idx* idxIndices_, + const value_t* idxData_, + size_t idxNNZ_, + int n_idx_rows_, + int n_idx_cols_, + const value_idx* queryIndptr_, + const value_idx* queryIndices_, + const value_t* queryData_, + size_t queryNNZ_, + int n_query_rows_, + int n_query_cols_, + value_idx* output_indices_, + value_t* output_dists_, + int k_, + raft::resources const& handle_, + size_t batch_size_index_ = 2 << 14, // approx 1M + size_t batch_size_query_ = 2 << 14, + cuvs::distance::DistanceType metric_ = cuvs::distance::DistanceType::L2Expanded, + float metricArg_ = 0) + : idxIndptr(idxIndptr_), + idxIndices(idxIndices_), + idxData(idxData_), + idxNNZ(idxNNZ_), + n_idx_rows(n_idx_rows_), + n_idx_cols(n_idx_cols_), + queryIndptr(queryIndptr_), + queryIndices(queryIndices_), + queryData(queryData_), + queryNNZ(queryNNZ_), + n_query_rows(n_query_rows_), + n_query_cols(n_query_cols_), + output_indices(output_indices_), + output_dists(output_dists_), + k(k_), + handle(handle_), + batch_size_index(batch_size_index_), + batch_size_query(batch_size_query_), + metric(metric_), + metricArg(metricArg_) + { + } + + void run() + { + using namespace raft::sparse; + + int n_batches_query = raft::ceildiv((size_t)n_query_rows, batch_size_query); + csr_batcher_t query_batcher( + batch_size_query, n_query_rows, queryIndptr, queryIndices, queryData); + + size_t rows_processed = 0; + + for (int i = 0; i < n_batches_query; i++) { + /** + * Compute index batch info + */ + query_batcher.set_batch(i); + + /** + * Slice CSR to rows in batch + */ + + rmm::device_uvector query_batch_indptr(query_batcher.batch_rows() + 1, + raft::resource::get_cuda_stream(handle)); + + value_idx n_query_batch_nnz = query_batcher.get_batch_csr_indptr_nnz( + query_batch_indptr.data(), raft::resource::get_cuda_stream(handle)); + + rmm::device_uvector query_batch_indices(n_query_batch_nnz, + raft::resource::get_cuda_stream(handle)); + rmm::device_uvector query_batch_data(n_query_batch_nnz, + raft::resource::get_cuda_stream(handle)); + + query_batcher.get_batch_csr_indices_data(query_batch_indices.data(), + query_batch_data.data(), + raft::resource::get_cuda_stream(handle)); + + // A 3-partition temporary merge space to scale the batching. 2 parts for subsequent + // batches and 1 space for the results of the merge, which get copied back to the top + rmm::device_uvector merge_buffer_indices(0, + raft::resource::get_cuda_stream(handle)); + rmm::device_uvector merge_buffer_dists(0, raft::resource::get_cuda_stream(handle)); + + value_t* dists_merge_buffer_ptr; + value_idx* indices_merge_buffer_ptr; + + int n_batches_idx = raft::ceildiv((size_t)n_idx_rows, batch_size_index); + csr_batcher_t idx_batcher( + batch_size_index, n_idx_rows, idxIndptr, idxIndices, idxData); + + for (int j = 0; j < n_batches_idx; j++) { + idx_batcher.set_batch(j); + + merge_buffer_indices.resize(query_batcher.batch_rows() * k * 3, + raft::resource::get_cuda_stream(handle)); + merge_buffer_dists.resize(query_batcher.batch_rows() * k * 3, + raft::resource::get_cuda_stream(handle)); + + /** + * Slice CSR to rows in batch + */ + rmm::device_uvector idx_batch_indptr(idx_batcher.batch_rows() + 1, + raft::resource::get_cuda_stream(handle)); + rmm::device_uvector idx_batch_indices(0, + raft::resource::get_cuda_stream(handle)); + rmm::device_uvector idx_batch_data(0, raft::resource::get_cuda_stream(handle)); + + value_idx idx_batch_nnz = idx_batcher.get_batch_csr_indptr_nnz( + idx_batch_indptr.data(), raft::resource::get_cuda_stream(handle)); + + idx_batch_indices.resize(idx_batch_nnz, raft::resource::get_cuda_stream(handle)); + idx_batch_data.resize(idx_batch_nnz, raft::resource::get_cuda_stream(handle)); + + idx_batcher.get_batch_csr_indices_data( + idx_batch_indices.data(), idx_batch_data.data(), raft::resource::get_cuda_stream(handle)); + + /** + * Compute distances + */ + uint64_t dense_size = + (uint64_t)idx_batcher.batch_rows() * (uint64_t)query_batcher.batch_rows(); + rmm::device_uvector batch_dists(dense_size, + raft::resource::get_cuda_stream(handle)); + + RAFT_CUDA_TRY(cudaMemset(batch_dists.data(), 0, batch_dists.size() * sizeof(value_t))); + + compute_distances(idx_batcher, + query_batcher, + idx_batch_nnz, + n_query_batch_nnz, + idx_batch_indptr.data(), + idx_batch_indices.data(), + idx_batch_data.data(), + query_batch_indptr.data(), + query_batch_indices.data(), + query_batch_data.data(), + batch_dists.data()); + + // Build batch indices array + rmm::device_uvector batch_indices(batch_dists.size(), + raft::resource::get_cuda_stream(handle)); + + // populate batch indices array + value_idx batch_rows = query_batcher.batch_rows(), batch_cols = idx_batcher.batch_rows(); + + iota_fill( + batch_indices.data(), batch_rows, batch_cols, raft::resource::get_cuda_stream(handle)); + + /** + * Perform k-selection on batch & merge with other k-selections + */ + size_t merge_buffer_offset = batch_rows * k; + dists_merge_buffer_ptr = merge_buffer_dists.data() + merge_buffer_offset; + indices_merge_buffer_ptr = merge_buffer_indices.data() + merge_buffer_offset; + + perform_k_selection(idx_batcher, + query_batcher, + batch_dists.data(), + batch_indices.data(), + dists_merge_buffer_ptr, + indices_merge_buffer_ptr); + + value_t* dists_merge_buffer_tmp_ptr = dists_merge_buffer_ptr; + value_idx* indices_merge_buffer_tmp_ptr = indices_merge_buffer_ptr; + + // Merge results of difference batches if necessary + if (idx_batcher.batch_start() > 0) { + size_t merge_buffer_tmp_out = batch_rows * k * 2; + dists_merge_buffer_tmp_ptr = merge_buffer_dists.data() + merge_buffer_tmp_out; + indices_merge_buffer_tmp_ptr = merge_buffer_indices.data() + merge_buffer_tmp_out; + + merge_batches(idx_batcher, + query_batcher, + merge_buffer_dists.data(), + merge_buffer_indices.data(), + dists_merge_buffer_tmp_ptr, + indices_merge_buffer_tmp_ptr); + } + + // copy merged output back into merge buffer partition for next iteration + raft::copy_async(merge_buffer_indices.data(), + indices_merge_buffer_tmp_ptr, + batch_rows * k, + raft::resource::get_cuda_stream(handle)); + raft::copy_async(merge_buffer_dists.data(), + dists_merge_buffer_tmp_ptr, + batch_rows * k, + raft::resource::get_cuda_stream(handle)); + } + + // Copy final merged batch to output array + raft::copy_async(output_indices + (rows_processed * k), + merge_buffer_indices.data(), + query_batcher.batch_rows() * k, + raft::resource::get_cuda_stream(handle)); + raft::copy_async(output_dists + (rows_processed * k), + merge_buffer_dists.data(), + query_batcher.batch_rows() * k, + raft::resource::get_cuda_stream(handle)); + + rows_processed += query_batcher.batch_rows(); + } + } + + private: + void merge_batches(csr_batcher_t& idx_batcher, + csr_batcher_t& query_batcher, + value_t* merge_buffer_dists, + value_idx* merge_buffer_indices, + value_t* out_dists, + value_idx* out_indices) + { + // build translation buffer to shift resulting indices by the batch + std::vector id_ranges; + id_ranges.push_back(0); + id_ranges.push_back(idx_batcher.batch_start()); + + rmm::device_uvector trans(id_ranges.size(), raft::resource::get_cuda_stream(handle)); + raft::update_device( + trans.data(), id_ranges.data(), id_ranges.size(), raft::resource::get_cuda_stream(handle)); + + // combine merge buffers only if there's more than 1 partition to combine + cuvs::neighbors::detail::knn_merge_parts(merge_buffer_dists, + merge_buffer_indices, + out_dists, + out_indices, + query_batcher.batch_rows(), + 2, + k, + raft::resource::get_cuda_stream(handle), + trans.data()); + } + + void perform_k_selection(csr_batcher_t idx_batcher, + csr_batcher_t query_batcher, + value_t* batch_dists, + value_idx* batch_indices, + value_t* out_dists, + value_idx* out_indices) + { + // populate batch indices array + value_idx batch_rows = query_batcher.batch_rows(), batch_cols = idx_batcher.batch_rows(); + + // build translation buffer to shift resulting indices by the batch + std::vector id_ranges; + id_ranges.push_back(0); + id_ranges.push_back(idx_batcher.batch_start()); + + // in the case where the number of idx rows in the batch is < k, we + // want to adjust k. + value_idx n_neighbors = std::min(static_cast(k), batch_cols); + + bool ascending = cuvs::distance::is_min_close(metric); + + // kernel to slice first (min) k cols and copy into batched merge buffer + cuvs::selection::select_k( + handle, + raft::make_device_matrix_view(batch_dists, batch_rows, batch_cols), + raft::make_device_matrix_view( + batch_indices, batch_rows, batch_cols), + raft::make_device_matrix_view(out_dists, batch_rows, n_neighbors), + raft::make_device_matrix_view(out_indices, batch_rows, n_neighbors), + ascending, + true); + } + + void compute_distances(csr_batcher_t& idx_batcher, + csr_batcher_t& query_batcher, + size_t idx_batch_nnz, + size_t query_batch_nnz, + value_idx* idx_batch_indptr, + value_idx* idx_batch_indices, + value_t* idx_batch_data, + value_idx* query_batch_indptr, + value_idx* query_batch_indices, + value_t* query_batch_data, + value_t* batch_dists) + { + /** + * Compute distances + */ + cuvs::distance::detail::sparse::distances_config_t dist_config(handle); + dist_config.b_nrows = idx_batcher.batch_rows(); + dist_config.b_ncols = n_idx_cols; + dist_config.b_nnz = idx_batch_nnz; + + dist_config.b_indptr = idx_batch_indptr; + dist_config.b_indices = idx_batch_indices; + dist_config.b_data = idx_batch_data; + + dist_config.a_nrows = query_batcher.batch_rows(); + dist_config.a_ncols = n_query_cols; + dist_config.a_nnz = query_batch_nnz; + + dist_config.a_indptr = query_batch_indptr; + dist_config.a_indices = query_batch_indices; + dist_config.a_data = query_batch_data; + + cuvs::distance::pairwiseDistance(batch_dists, dist_config, metric, metricArg); + } + + const value_idx *idxIndptr, *idxIndices, *queryIndptr, *queryIndices; + value_idx* output_indices; + const value_t *idxData, *queryData; + value_t* output_dists; + + size_t idxNNZ, queryNNZ, batch_size_index, batch_size_query; + + cuvs::distance::DistanceType metric; + + float metricArg; + + int n_idx_rows, n_idx_cols, n_query_rows, n_query_cols, k; + + raft::resources const& handle; +}; + +}; // namespace cuvs::neighbors::detail diff --git a/cpp/src/neighbors/sparse_brute_force.cu b/cpp/src/neighbors/sparse_brute_force.cu new file mode 100644 index 000000000..e277961ec --- /dev/null +++ b/cpp/src/neighbors/sparse_brute_force.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include "detail/sparse_knn.cuh" + +namespace cuvs::neighbors::brute_force { +template +sparse_index::sparse_index(raft::resources const& res, + raft::device_csr_matrix_view dataset, + cuvs::distance::DistanceType metric, + T metric_arg) + : dataset_(dataset), metric_(metric), metric_arg_(metric_arg) +{ +} + +auto build(raft::resources const& handle, + raft::device_csr_matrix_view dataset, + cuvs::distance::DistanceType metric, + float metric_arg) -> cuvs::neighbors::brute_force::sparse_index +{ + return sparse_index(handle, dataset, metric, metric_arg); +} + +void search(raft::resources const& handle, + const sparse_search_params& params, + const sparse_index& index, + raft::device_csr_matrix_view query, + raft::device_matrix_view neighbors, + raft::device_matrix_view distances) +{ + auto idx_structure = index.dataset().structure_view(); + auto query_structure = query.structure_view(); + int k = neighbors.extent(1); + + detail::sparse_knn_t(idx_structure.get_indptr().data(), + idx_structure.get_indices().data(), + index.dataset().get_elements().data(), + idx_structure.get_nnz(), + idx_structure.get_n_rows(), + idx_structure.get_n_cols(), + query_structure.get_indptr().data(), + query_structure.get_indices().data(), + query.get_elements().data(), + query_structure.get_nnz(), + query_structure.get_n_rows(), + query_structure.get_n_cols(), + neighbors.data_handle(), + distances.data_handle(), + k, + handle, + params.batch_size_index, + params.batch_size_query, + index.metric(), + index.metric_arg()) + .run(); +} +} // namespace cuvs::neighbors::brute_force diff --git a/cpp/test/CMakeLists.txt b/cpp/test/CMakeLists.txt index 7754a5043..286d721d7 100644 --- a/cpp/test/CMakeLists.txt +++ b/cpp/test/CMakeLists.txt @@ -94,7 +94,7 @@ endfunction() if(BUILD_TESTS) ConfigureTest( NAME NEIGHBORS_TEST PATH neighbors/brute_force.cu neighbors/brute_force_prefiltered.cu - neighbors/refine.cu GPUS 1 PERCENT 100 + neighbors/sparse_brute_force.cu neighbors/refine.cu GPUS 1 PERCENT 100 ) ConfigureTest( @@ -206,6 +206,7 @@ if(BUILD_TESTS) distance/dist_lp_unexp.cu distance/dist_russell_rao.cu distance/masked_nn.cu + distance/sparse_distance.cu sparse/neighbors/cross_component_nn.cu GPUS 1 diff --git a/cpp/test/distance/sparse_distance.cu b/cpp/test/distance/sparse_distance.cu new file mode 100644 index 000000000..f95487414 --- /dev/null +++ b/cpp/test/distance/sparse_distance.cu @@ -0,0 +1,850 @@ +/* + * Copyright (c) 2018-2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../test_utils.cuh" + +#include +#include +#include +#include + +#include + +#include +#include + +namespace cuvs { +namespace distance { + +using namespace raft; +using namespace raft::sparse; + +template +struct SparseDistanceInputs { + value_idx n_cols; + + std::vector indptr_h; + std::vector indices_h; + std::vector data_h; + + std::vector out_dists_ref_h; + + cuvs::distance::DistanceType metric; + + float metric_arg = 0.0; +}; + +template +::std::ostream& operator<<(::std::ostream& os, const SparseDistanceInputs& dims) +{ + return os; +} + +template +class SparseDistanceTest + : public ::testing::TestWithParam> { + public: + SparseDistanceTest() + : params(::testing::TestWithParam>::GetParam()), + indptr(0, resource::get_cuda_stream(handle)), + indices(0, resource::get_cuda_stream(handle)), + data(0, resource::get_cuda_stream(handle)), + out_dists(0, resource::get_cuda_stream(handle)), + out_dists_ref(0, resource::get_cuda_stream(handle)) + { + } + + void SetUp() override + { + make_data(); + + int out_size = static_cast(params.indptr_h.size() - 1) * + static_cast(params.indptr_h.size() - 1); + + out_dists.resize(out_size, resource::get_cuda_stream(handle)); + + auto out = raft::make_device_matrix_view( + out_dists.data(), + static_cast(params.indptr_h.size() - 1), + static_cast(params.indptr_h.size() - 1)); + + auto x_structure = raft::make_device_compressed_structure_view( + indptr.data(), + indices.data(), + static_cast(params.indptr_h.size() - 1), + params.n_cols, + static_cast(params.indices_h.size())); + auto x = raft::make_device_csr_matrix_view(data.data(), x_structure); + + cuvs::distance::pairwise_distance(handle, x, x, out, params.metric, params.metric_arg); + + RAFT_CUDA_TRY(cudaStreamSynchronize(resource::get_cuda_stream(handle))); + } + + void compare() + { + ASSERT_TRUE(devArrMatch(out_dists_ref.data(), + out_dists.data(), + params.out_dists_ref_h.size(), + CompareApprox(1e-3))); + } + + protected: + void make_data() + { + std::vector indptr_h = params.indptr_h; + std::vector indices_h = params.indices_h; + std::vector data_h = params.data_h; + + auto stream = resource::get_cuda_stream(handle); + indptr.resize(indptr_h.size(), stream); + indices.resize(indices_h.size(), stream); + data.resize(data_h.size(), stream); + + update_device(indptr.data(), indptr_h.data(), indptr_h.size(), stream); + update_device(indices.data(), indices_h.data(), indices_h.size(), stream); + update_device(data.data(), data_h.data(), data_h.size(), stream); + + std::vector out_dists_ref_h = params.out_dists_ref_h; + + out_dists_ref.resize((indptr_h.size() - 1) * (indptr_h.size() - 1), stream); + + update_device(out_dists_ref.data(), + out_dists_ref_h.data(), + out_dists_ref_h.size(), + resource::get_cuda_stream(handle)); + } + + raft::resources handle; + + // input data + rmm::device_uvector indptr, indices; + rmm::device_uvector data; + + // output data + rmm::device_uvector out_dists, out_dists_ref; + + SparseDistanceInputs params; +}; + +const std::vector> inputs_i32_f = { + {5, + {0, 0, 1, 2}, + + {1, 2}, + {0.5, 0.5}, + {0, 1, 1, 1, 0, 1, 1, 1, 0}, + cuvs::distance::DistanceType::CosineExpanded, + 0.0}, + {5, + {0, 0, 1, 2}, + + {1, 2}, + {1.0, 1.0}, + {0, 1, 1, 1, 0, 1, 1, 1, 0}, + cuvs::distance::DistanceType::JaccardExpanded, + 0.0}, + {2, + {0, 2, 4, 6, 8}, + {0, 1, 0, 1, 0, 1, 0, 1}, // indices + {1.0f, 3.0f, 1.0f, 5.0f, 50.0f, 28.0f, 16.0f, 2.0f}, + { + // dense output + 0.0, + 4.0, + 3026.0, + 226.0, + 4.0, + 0.0, + 2930.0, + 234.0, + 3026.0, + 2930.0, + 0.0, + 1832.0, + 226.0, + 234.0, + 1832.0, + 0.0, + }, + cuvs::distance::DistanceType::L2Expanded, + 0.0}, + {2, + {0, 2, 4, 6, 8}, + {0, 1, 0, 1, 0, 1, 0, 1}, + {1.0f, 2.0f, 1.0f, 2.0f, 1.0f, 2.0f, 1.0f, 2.0f}, + {5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0}, + cuvs::distance::DistanceType::InnerProduct, + 0.0}, + {2, + {0, 2, 4, 6, 8}, + {0, 1, 0, 1, 0, 1, 0, 1}, // indices + {1.0f, 3.0f, 1.0f, 5.0f, 50.0f, 28.0f, 16.0f, 2.0f}, + { + // dense output + 0.0, + 4.0, + 3026.0, + 226.0, + 4.0, + 0.0, + 2930.0, + 234.0, + 3026.0, + 2930.0, + 0.0, + 1832.0, + 226.0, + 234.0, + 1832.0, + 0.0, + }, + cuvs::distance::DistanceType::L2Unexpanded, + 0.0}, + + {10, + {0, 5, 11, 15, 20, 27, 32, 36, 43, 47, 50}, + {0, 1, 3, 6, 8, 0, 1, 2, 3, 5, 6, 1, 2, 4, 8, 0, 2, 3, 4, 7, 0, 1, 2, 3, 4, + 6, 8, 0, 1, 2, 5, 7, 1, 5, 8, 9, 0, 1, 2, 5, 6, 8, 9, 2, 4, 5, 7, 0, 3, 9}, // indices + {0.5438, 0.2695, 0.4377, 0.7174, 0.9251, 0.7648, 0.3322, 0.7279, 0.4131, 0.5167, + 0.8655, 0.0730, 0.0291, 0.9036, 0.7988, 0.5019, 0.7663, 0.2190, 0.8206, 0.3625, + 0.0411, 0.3995, 0.5688, 0.7028, 0.8706, 0.3199, 0.4431, 0.0535, 0.2225, 0.8853, + 0.1932, 0.3761, 0.3379, 0.1771, 0.2107, 0.228, 0.5279, 0.4885, 0.3495, 0.5079, + 0.2325, 0.2331, 0.3018, 0.6231, 0.2645, 0.8429, 0.6625, 0.0797, 0.2724, 0.4218}, + {0., 0.39419924, 0.54823225, 0.79593037, 0.45658883, 0.93634219, 0.58146987, 0.44940102, + 1., 0.76978799, 0.39419924, 0., 0.97577154, 0.48904013, 0.48300801, 0.45087445, + 0.73323749, 0.21050481, 0.54847744, 0.78021386, 0.54823225, 0.97577154, 0., 0.51413997, + 0.31195441, 0.96546343, 0.67534399, 0.81665436, 0.8321819, 1., 0.79593037, 0.48904013, + 0.51413997, 0., 0.28605559, 0.35772784, 1., 0.60889396, 0.43324829, 0.84923694, + 0.45658883, 0.48300801, 0.31195441, 0.28605559, 0., 0.58623212, 0.6745457, 0.60287165, + 0.67676228, 0.73155632, 0.93634219, 0.45087445, 0.96546343, 0.35772784, 0.58623212, 0., + 0.77917274, 0.48390993, 0.24558392, 0.99166225, 0.58146987, 0.73323749, 0.67534399, 1., + 0.6745457, 0.77917274, 0., 0.27605686, 0.76064776, 0.61547536, 0.44940102, 0.21050481, + 0.81665436, 0.60889396, 0.60287165, 0.48390993, 0.27605686, 0., 0.51360432, 0.68185144, + 1., 0.54847744, 0.8321819, 0.43324829, 0.67676228, 0.24558392, 0.76064776, 0.51360432, + 0., 1., 0.76978799, 0.78021386, 1., 0.84923694, 0.73155632, 0.99166225, + 0.61547536, 0.68185144, 1., 0.}, + cuvs::distance::DistanceType::CosineExpanded, + 0.0}, + + {10, + {0, 5, 11, 15, 20, 27, 32, 36, 43, 47, 50}, + {0, 1, 3, 6, 8, 0, 1, 2, 3, 5, 6, 1, 2, 4, 8, 0, 2, 3, 4, 7, 0, 1, 2, 3, 4, + 6, 8, 0, 1, 2, 5, 7, 1, 5, 8, 9, 0, 1, 2, 5, 6, 8, 9, 2, 4, 5, 7, 0, 3, 9}, // indices + {1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., + 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., + 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.}, + {0.0, + 0.42857142857142855, + 0.7142857142857143, + 0.75, + 0.2857142857142857, + 0.75, + 0.7142857142857143, + 0.5, + 1.0, + 0.6666666666666666, + 0.42857142857142855, + 0.0, + 0.75, + 0.625, + 0.375, + 0.42857142857142855, + 0.75, + 0.375, + 0.75, + 0.7142857142857143, + 0.7142857142857143, + 0.75, + 0.0, + 0.7142857142857143, + 0.42857142857142855, + 0.7142857142857143, + 0.6666666666666666, + 0.625, + 0.6666666666666666, + 1.0, + 0.75, + 0.625, + 0.7142857142857143, + 0.0, + 0.5, + 0.5714285714285714, + 1.0, + 0.8, + 0.5, + 0.6666666666666666, + 0.2857142857142857, + 0.375, + 0.42857142857142855, + 0.5, + 0.0, + 0.6666666666666666, + 0.7777777777777778, + 0.4444444444444444, + 0.7777777777777778, + 0.75, + 0.75, + 0.42857142857142855, + 0.7142857142857143, + 0.5714285714285714, + 0.6666666666666666, + 0.0, + 0.7142857142857143, + 0.5, + 0.5, + 0.8571428571428571, + 0.7142857142857143, + 0.75, + 0.6666666666666666, + 1.0, + 0.7777777777777778, + 0.7142857142857143, + 0.0, + 0.42857142857142855, + 0.8571428571428571, + 0.8333333333333334, + 0.5, + 0.375, + 0.625, + 0.8, + 0.4444444444444444, + 0.5, + 0.42857142857142855, + 0.0, + 0.7777777777777778, + 0.75, + 1.0, + 0.75, + 0.6666666666666666, + 0.5, + 0.7777777777777778, + 0.5, + 0.8571428571428571, + 0.7777777777777778, + 0.0, + 1.0, + 0.6666666666666666, + 0.7142857142857143, + 1.0, + 0.6666666666666666, + 0.75, + 0.8571428571428571, + 0.8333333333333334, + 0.75, + 1.0, + 0.0}, + cuvs::distance::DistanceType::JaccardExpanded, + 0.0}, + + {10, + {0, 5, 11, 15, 20, 27, 32, 36, 43, 47, 50}, + {0, 1, 3, 6, 8, 0, 1, 2, 3, 5, 6, 1, 2, 4, 8, 0, 2, 3, 4, 7, 0, 1, 2, 3, 4, + 6, 8, 0, 1, 2, 5, 7, 1, 5, 8, 9, 0, 1, 2, 5, 6, 8, 9, 2, 4, 5, 7, 0, 3, 9}, // indices + {0.5438, 0.2695, 0.4377, 0.7174, 0.9251, 0.7648, 0.3322, 0.7279, 0.4131, 0.5167, + 0.8655, 0.0730, 0.0291, 0.9036, 0.7988, 0.5019, 0.7663, 0.2190, 0.8206, 0.3625, + 0.0411, 0.3995, 0.5688, 0.7028, 0.8706, 0.3199, 0.4431, 0.0535, 0.2225, 0.8853, + 0.1932, 0.3761, 0.3379, 0.1771, 0.2107, 0.228, 0.5279, 0.4885, 0.3495, 0.5079, + 0.2325, 0.2331, 0.3018, 0.6231, 0.2645, 0.8429, 0.6625, 0.0797, 0.2724, 0.4218}, + {0.0, + 3.3954660629919076, + 5.6469232737388815, + 6.373112846266441, + 4.0212880272531715, + 6.916281504639404, + 5.741508386786526, + 5.411470999663036, + 9.0, + 4.977014354725805, + 3.3954660629919076, + 0.0, + 7.56256082439209, + 5.540261147481582, + 4.832322929216881, + 4.62003193872216, + 6.498056792320361, + 4.309846252268695, + 6.317531174829905, + 6.016362684141827, + 5.6469232737388815, + 7.56256082439209, + 0.0, + 5.974878731322299, + 4.898357301336036, + 6.442097410320605, + 5.227077347287883, + 7.134101195584642, + 5.457753923371659, + 7.0, + 6.373112846266441, + 5.540261147481582, + 5.974878731322299, + 0.0, + 5.5507273748583, + 4.897749658726415, + 9.0, + 8.398776718824767, + 3.908281400328807, + 4.83431066343688, + 4.0212880272531715, + 4.832322929216881, + 4.898357301336036, + 5.5507273748583, + 0.0, + 6.632989819428174, + 7.438852294822894, + 5.6631570310967465, + 7.579428202635459, + 6.760811985364303, + 6.916281504639404, + 4.62003193872216, + 6.442097410320605, + 4.897749658726415, + 6.632989819428174, + 0.0, + 5.249404187382862, + 6.072559523278559, + 4.07661278488929, + 6.19678948003145, + 5.741508386786526, + 6.498056792320361, + 5.227077347287883, + 9.0, + 7.438852294822894, + 5.249404187382862, + 0.0, + 3.854811639654704, + 6.652724827169063, + 5.298236851430971, + 5.411470999663036, + 4.309846252268695, + 7.134101195584642, + 8.398776718824767, + 5.6631570310967465, + 6.072559523278559, + 3.854811639654704, + 0.0, + 7.529184598969917, + 6.903282911791188, + 9.0, + 6.317531174829905, + 5.457753923371659, + 3.908281400328807, + 7.579428202635459, + 4.07661278488929, + 6.652724827169063, + 7.529184598969917, + 0.0, + 7.0, + 4.977014354725805, + 6.016362684141827, + 7.0, + 4.83431066343688, + 6.760811985364303, + 6.19678948003145, + 5.298236851430971, + 6.903282911791188, + 7.0, + 0.0}, + cuvs::distance::DistanceType::Canberra, + 0.0}, + + {10, + {0, 5, 11, 15, 20, 27, 32, 36, 43, 47, 50}, + {0, 1, 3, 6, 8, 0, 1, 2, 3, 5, 6, 1, 2, 4, 8, 0, 2, 3, 4, 7, 0, 1, 2, 3, 4, + 6, 8, 0, 1, 2, 5, 7, 1, 5, 8, 9, 0, 1, 2, 5, 6, 8, 9, 2, 4, 5, 7, 0, 3, 9}, // indices + {0.5438, 0.2695, 0.4377, 0.7174, 0.9251, 0.7648, 0.3322, 0.7279, 0.4131, 0.5167, + 0.8655, 0.0730, 0.0291, 0.9036, 0.7988, 0.5019, 0.7663, 0.2190, 0.8206, 0.3625, + 0.0411, 0.3995, 0.5688, 0.7028, 0.8706, 0.3199, 0.4431, 0.0535, 0.2225, 0.8853, + 0.1932, 0.3761, 0.3379, 0.1771, 0.2107, 0.228, 0.5279, 0.4885, 0.3495, 0.5079, + 0.2325, 0.2331, 0.3018, 0.6231, 0.2645, 0.8429, 0.6625, 0.0797, 0.2724, 0.4218}, + {0.0, + 1.31462855332296, + 1.3690307816129905, + 1.698603990921237, + 1.3460470789553531, + 1.6636670712582544, + 1.2651744044972217, + 1.1938329352055201, + 1.8811409082590185, + 1.3653115050624267, + 1.31462855332296, + 0.0, + 1.9447722703291133, + 1.42818777206562, + 1.4685491458946494, + 1.3071999866010466, + 1.4988622861692171, + 0.9698559287406783, + 1.4972023224597841, + 1.5243383567266802, + 1.3690307816129905, + 1.9447722703291133, + 0.0, + 1.2748400840107568, + 1.0599569946448246, + 1.546591282841402, + 1.147526531928459, + 1.447002179128145, + 1.5982242387673176, + 1.3112533607072414, + 1.698603990921237, + 1.42818777206562, + 1.2748400840107568, + 0.0, + 1.038121552545461, + 1.011788365364402, + 1.3907391109256988, + 1.3128200942311496, + 1.19595706584447, + 1.3233328139624725, + 1.3460470789553531, + 1.4685491458946494, + 1.0599569946448246, + 1.038121552545461, + 0.0, + 1.3642741698145529, + 1.3493868683808095, + 1.394942694628328, + 1.572881849642552, + 1.380122665319464, + 1.6636670712582544, + 1.3071999866010466, + 1.546591282841402, + 1.011788365364402, + 1.3642741698145529, + 0.0, + 1.018961640373018, + 1.0114394258945634, + 0.8338711034820684, + 1.1247823842299223, + 1.2651744044972217, + 1.4988622861692171, + 1.147526531928459, + 1.3907391109256988, + 1.3493868683808095, + 1.018961640373018, + 0.0, + 0.7701238110357329, + 1.245486437864406, + 0.5551259549534626, + 1.1938329352055201, + 0.9698559287406783, + 1.447002179128145, + 1.3128200942311496, + 1.394942694628328, + 1.0114394258945634, + 0.7701238110357329, + 0.0, + 1.1886800117391216, + 1.0083692448135637, + 1.8811409082590185, + 1.4972023224597841, + 1.5982242387673176, + 1.19595706584447, + 1.572881849642552, + 0.8338711034820684, + 1.245486437864406, + 1.1886800117391216, + 0.0, + 1.3661374102525012, + 1.3653115050624267, + 1.5243383567266802, + 1.3112533607072414, + 1.3233328139624725, + 1.380122665319464, + 1.1247823842299223, + 0.5551259549534626, + 1.0083692448135637, + 1.3661374102525012, + 0.0}, + cuvs::distance::DistanceType::LpUnexpanded, + 2.0}, + + {10, + {0, 5, 11, 15, 20, 27, 32, 36, 43, 47, 50}, + {0, 1, 3, 6, 8, 0, 1, 2, 3, 5, 6, 1, 2, 4, 8, 0, 2, 3, 4, 7, 0, 1, 2, 3, 4, + 6, 8, 0, 1, 2, 5, 7, 1, 5, 8, 9, 0, 1, 2, 5, 6, 8, 9, 2, 4, 5, 7, 0, 3, 9}, // indices + {0.5438, 0.2695, 0.4377, 0.7174, 0.9251, 0.7648, 0.3322, 0.7279, 0.4131, 0.5167, + 0.8655, 0.0730, 0.0291, 0.9036, 0.7988, 0.5019, 0.7663, 0.2190, 0.8206, 0.3625, + 0.0411, 0.3995, 0.5688, 0.7028, 0.8706, 0.3199, 0.4431, 0.0535, 0.2225, 0.8853, + 0.1932, 0.3761, 0.3379, 0.1771, 0.2107, 0.228, 0.5279, 0.4885, 0.3495, 0.5079, + 0.2325, 0.2331, 0.3018, 0.6231, 0.2645, 0.8429, 0.6625, 0.0797, 0.2724, 0.4218}, + {0.0, + 0.9251771844789913, + 0.9036452083899731, + 0.9251771844789913, + 0.8706483735804971, + 0.9251771844789913, + 0.717493881903289, + 0.6920214832303888, + 0.9251771844789913, + 0.9251771844789913, + 0.9251771844789913, + 0.0, + 0.9036452083899731, + 0.8655339692155823, + 0.8706483735804971, + 0.8655339692155823, + 0.8655339692155823, + 0.6329837991017668, + 0.8655339692155823, + 0.8655339692155823, + 0.9036452083899731, + 0.9036452083899731, + 0.0, + 0.7988276152181608, + 0.7028075145996631, + 0.9036452083899731, + 0.9036452083899731, + 0.9036452083899731, + 0.8429599432532096, + 0.9036452083899731, + 0.9251771844789913, + 0.8655339692155823, + 0.7988276152181608, + 0.0, + 0.48376552205293305, + 0.8206394616536681, + 0.8206394616536681, + 0.8206394616536681, + 0.8429599432532096, + 0.8206394616536681, + 0.8706483735804971, + 0.8706483735804971, + 0.7028075145996631, + 0.48376552205293305, + 0.0, + 0.8706483735804971, + 0.8706483735804971, + 0.8706483735804971, + 0.8429599432532096, + 0.8706483735804971, + 0.9251771844789913, + 0.8655339692155823, + 0.9036452083899731, + 0.8206394616536681, + 0.8706483735804971, + 0.0, + 0.8853924473642432, + 0.535821510936138, + 0.6497196601457607, + 0.8853924473642432, + 0.717493881903289, + 0.8655339692155823, + 0.9036452083899731, + 0.8206394616536681, + 0.8706483735804971, + 0.8853924473642432, + 0.0, + 0.5279604218147174, + 0.6658348373853169, + 0.33799874888632914, + 0.6920214832303888, + 0.6329837991017668, + 0.9036452083899731, + 0.8206394616536681, + 0.8706483735804971, + 0.535821510936138, + 0.5279604218147174, + 0.0, + 0.662579808115858, + 0.5079750812968089, + 0.9251771844789913, + 0.8655339692155823, + 0.8429599432532096, + 0.8429599432532096, + 0.8429599432532096, + 0.6497196601457607, + 0.6658348373853169, + 0.662579808115858, + 0.0, + 0.8429599432532096, + 0.9251771844789913, + 0.8655339692155823, + 0.9036452083899731, + 0.8206394616536681, + 0.8706483735804971, + 0.8853924473642432, + 0.33799874888632914, + 0.5079750812968089, + 0.8429599432532096, + 0.0}, + cuvs::distance::DistanceType::Linf, + 0.0}, + + {15, + {0, 5, 8, 9, 15, 20, 26, 31, 34, 38, 45}, + {0, 1, 5, 6, 9, 1, 4, 14, 7, 3, 4, 7, 9, 11, 14, 0, 3, 7, 8, 12, 0, 2, 5, + 7, 8, 14, 4, 9, 10, 11, 13, 4, 10, 14, 5, 6, 8, 9, 0, 2, 3, 4, 6, 10, 11}, + {0.13537497, 0.51440163, 0.17231936, 0.02417618, 0.15372786, 0.17760507, 0.73789274, 0.08450219, + 1., 0.20184723, 0.18036963, 0.12581403, 0.13867603, 0.24040536, 0.11288773, 0.00290246, + 0.09120187, 0.31190555, 0.43245423, 0.16153588, 0.3233026, 0.05279589, 0.1387149, 0.05962761, + 0.41751856, 0.00804045, 0.03262381, 0.27507131, 0.37245804, 0.16378881, 0.15605804, 0.3867739, + 0.24908977, 0.36413632, 0.37643732, 0.28910679, 0.0198409, 0.31461499, 0.24412279, 0.08327667, + 0.04444576, 0.05047969, 0.26190054, 0.2077349, 0.10803964}, + {1.05367121e-08, 8.35309089e-01, 1.00000000e+00, 9.24116813e-01, + 9.90039274e-01, 7.97613546e-01, 8.91271059e-01, 1.00000000e+00, + 6.64669302e-01, 8.59439512e-01, 8.35309089e-01, 1.05367121e-08, + 1.00000000e+00, 7.33151506e-01, 1.00000000e+00, 9.86880955e-01, + 9.19154851e-01, 5.38849774e-01, 1.00000000e+00, 8.98332369e-01, + 1.00000000e+00, 1.00000000e+00, 0.00000000e+00, 8.03303970e-01, + 6.64465915e-01, 8.69374690e-01, 1.00000000e+00, 1.00000000e+00, + 1.00000000e+00, 1.00000000e+00, 9.24116813e-01, 7.33151506e-01, + 8.03303970e-01, 0.00000000e+00, 8.16225843e-01, 9.39818306e-01, + 7.27700415e-01, 7.30155528e-01, 8.89451011e-01, 8.05419635e-01, + 9.90039274e-01, 1.00000000e+00, 6.64465915e-01, 8.16225843e-01, + 0.00000000e+00, 6.38804490e-01, 1.00000000e+00, 1.00000000e+00, + 9.52559809e-01, 9.53789212e-01, 7.97613546e-01, 9.86880955e-01, + 8.69374690e-01, 9.39818306e-01, 6.38804490e-01, 0.0, + 1.00000000e+00, 9.72569112e-01, 8.24907516e-01, 8.07933016e-01, + 8.91271059e-01, 9.19154851e-01, 1.00000000e+00, 7.27700415e-01, + 1.00000000e+00, 1.00000000e+00, 0.00000000e+00, 7.63596268e-01, + 8.40131263e-01, 7.40428532e-01, 1.00000000e+00, 5.38849774e-01, + 1.00000000e+00, 7.30155528e-01, 1.00000000e+00, 9.72569112e-01, + 7.63596268e-01, 0.00000000e+00, 1.00000000e+00, 7.95485011e-01, + 6.64669302e-01, 1.00000000e+00, 1.00000000e+00, 8.89451011e-01, + 9.52559809e-01, 8.24907516e-01, 8.40131263e-01, 1.00000000e+00, + 0.00000000e+00, 8.51370877e-01, 8.59439512e-01, 8.98332369e-01, + 1.00000000e+00, 8.05419635e-01, 9.53789212e-01, 8.07933016e-01, + 7.40428532e-01, 7.95485011e-01, 8.51370877e-01, 1.49011612e-08}, + // Dataset is L1 normalized into pdfs + cuvs::distance::DistanceType::HellingerExpanded, + 0.0}, + + {4, + {0, 1, 1, 2, 4}, + {3, 2, 0, 1}, // indices + {0.99296, 0.42180, 0.11687, 0.305869}, + { + // dense output + 0.0, + 0.99296, + 1.41476, + 1.415707, + 0.99296, + 0.0, + 0.42180, + 0.42274, + 1.41476, + 0.42180, + 0.0, + 0.84454, + 1.41570, + 0.42274, + 0.84454, + 0.0, + }, + cuvs::distance::DistanceType::L1, + 0.0}, + {5, + {0, 3, 8, 12, 16, 20, 25, 30, 35, 40, 45}, + {0, 3, 4, 0, 1, 2, 3, 4, 1, 2, 3, 4, 0, 2, 3, 4, 0, 1, 3, 4, 0, 1, 2, + 3, 4, 0, 1, 2, 3, 4, 0, 1, 2, 3, 4, 0, 1, 2, 3, 4, 0, 1, 2, 3, 4}, + {0.70862347, 0.8232774, 0.12108795, 0.84527547, 0.94937088, 0.03258545, 0.99584118, 0.76835667, + 0.34426657, 0.2357925, 0.01274851, 0.11422017, 0.3437756, 0.31967718, 0.5956055, 0.31610373, + 0.04147273, 0.03724415, 0.21515727, 0.04751052, 0.50283183, 0.99957274, 0.01395933, 0.96032529, + 0.88438711, 0.46095378, 0.27432481, 0.54294211, 0.54280225, 0.59503329, 0.61364678, 0.22837736, + 0.56609561, 0.29809423, 0.76736686, 0.56460608, 0.98165371, 0.02140123, 0.19881268, 0.26057815, + 0.31648823, 0.89874295, 0.27366735, 0.5119944, 0.11416134}, + {// dense output + 0., 0.48769777, 1.88014197, 0.26127048, 0.26657011, 0.7874794, 0.76962708, 1.122858, + 1.1232498, 1.08166081, 0.48769777, 0., 1.31332116, 0.98318907, 0.42661815, 0.09279052, + 1.35187836, 1.38429055, 0.40658897, 0.56136388, 1.88014197, 1.31332116, 0., 1.82943642, + 1.54826077, 1.05918884, 1.59360067, 1.34698954, 0.60215168, 0.46993848, 0.26127048, 0.98318907, + 1.82943642, 0., 0.29945563, 1.08494093, 0.22934281, 0.82801925, 1.74288748, 1.50610116, + 0.26657011, 0.42661815, 1.54826077, 0.29945563, 0., 0.45060069, 0.77814948, 1.45245711, + 1.18328348, 0.82486987, 0.7874794, 0.09279052, 1.05918884, 1.08494093, 0.45060069, 0., + 1.29899154, 1.40683824, 0.48505269, 0.53862363, 0.76962708, 1.35187836, 1.59360067, 0.22934281, + 0.77814948, 1.29899154, 0., 0.33202426, 1.92108999, 1.88812175, 1.122858, 1.38429055, + 1.34698954, 0.82801925, 1.45245711, 1.40683824, 0.33202426, 0., 1.47318624, 1.92660889, + 1.1232498, 0.40658897, 0.60215168, 1.74288748, 1.18328348, 0.48505269, 1.92108999, 1.47318624, + 0., 0.24992619, 1.08166081, 0.56136388, 0.46993848, 1.50610116, 0.82486987, 0.53862363, + 1.88812175, 1.92660889, 0.24992619, 0.}, + cuvs::distance::DistanceType::CorrelationExpanded, + 0.0}, + {5, + {0, 1, 2, 4, 4, 5, 6, 7, 9, 9, 10}, + {1, 4, 0, 4, 1, 3, 0, 1, 3, 0}, + {1., 1., 1., 1., 1., 1., 1., 1., 1., 1.}, + {// dense output + 0., 1., 1., 1., 0.8, 1., 1., 0.8, 1., 1., 1., 0., 0.8, 1., 1., 1., 1., 1., 1., 1., + 1., 0.8, 0., 1., 1., 1., 0.8, 1., 1., 0.8, 1., 1., 1., 0., 1., 1., 1., 1., 1., 1., + 0.8, 1., 1., 1., 0., 1., 1., 0.8, 1., 1., 1., 1., 1., 1., 1., 0., 1., 0.8, 1., 1., + 1., 1., 0.8, 1., 1., 1., 0., 1., 1., 0.8, 0.8, 1., 1., 1., 0.8, 0.8, 1., 0., 1., 1., + 1., 1., 1., 1., 1., 1., 1., 1., 0., 1., 1., 1., 0.8, 1., 1., 1., 0.8, 1., 1., 0.}, + cuvs::distance::DistanceType::RusselRaoExpanded, + 0.0}, + {5, + {0, 1, 1, 3, 3, 4, 4, 6, 9, 10, 10}, + {0, 3, 4, 4, 2, 3, 0, 2, 3, 2}, + {1., 1., 1., 1., 1., 1., 1., 1., 1., 1.}, + {// dense output + 0., 0.2, 0.6, 0.2, 0.4, 0.2, 0.6, 0.4, 0.4, 0.2, 0.2, 0., 0.4, 0., 0.2, 0., 0.4, + 0.6, 0.2, 0., 0.6, 0.4, 0., 0.4, 0.2, 0.4, 0.4, 0.6, 0.6, 0.4, 0.2, 0., 0.4, 0., + 0.2, 0., 0.4, 0.6, 0.2, 0., 0.4, 0.2, 0.2, 0.2, 0., 0.2, 0.6, 0.8, 0.4, 0.2, 0.2, + 0., 0.4, 0., 0.2, 0., 0.4, 0.6, 0.2, 0., 0.6, 0.4, 0.4, 0.4, 0.6, 0.4, 0., 0.2, + 0.2, 0.4, 0.4, 0.6, 0.6, 0.6, 0.8, 0.6, 0.2, 0., 0.4, 0.6, 0.4, 0.2, 0.6, 0.2, 0.4, + 0.2, 0.2, 0.4, 0., 0.2, 0.2, 0., 0.4, 0., 0.2, 0., 0.4, 0.6, 0.2, 0.}, + cuvs::distance::DistanceType::HammingUnexpanded, + 0.0}, + {3, + {0, 1, 2}, + {0, 1}, + {1.0, 1.0}, + {0.0, 0.83255, 0.83255, 0.0}, + cuvs::distance::DistanceType::JensenShannon, + 0.0}, + {2, + {0, 1, 3}, + {0, 0, 1}, + {1.0, 0.5, 0.5}, + {0, 0.4645014, 0.4645014, 0}, + cuvs::distance::DistanceType::JensenShannon, + 0.0}, + {3, + {0, 1, 2}, + {0, 0}, + {1.0, 1.0}, + {0.0, 0.0, 0.0, 0.0}, + cuvs::distance::DistanceType::JensenShannon, + 0.0}, + + {3, + {0, 1, 2}, + {0, 1}, + {1.0, 1.0}, + {0.0, 1.0, 1.0, 0.0}, + cuvs::distance::DistanceType::DiceExpanded, + 0.0}, + {3, + {0, 1, 3}, + {0, 0, 1}, + {1.0, 1.0, 1.0}, + {0, 0.333333, 0.333333, 0}, + cuvs::distance::DistanceType::DiceExpanded, + 0.0}, + +}; + +typedef SparseDistanceTest SparseDistanceTestF; +TEST_P(SparseDistanceTestF, Result) { compare(); } +INSTANTIATE_TEST_CASE_P(SparseDistanceTests, + SparseDistanceTestF, + ::testing::ValuesIn(inputs_i32_f)); + +} // end namespace distance +} // end namespace cuvs diff --git a/cpp/test/neighbors/sparse_brute_force.cu b/cpp/test/neighbors/sparse_brute_force.cu new file mode 100644 index 000000000..cb68989d4 --- /dev/null +++ b/cpp/test/neighbors/sparse_brute_force.cu @@ -0,0 +1,175 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../test_utils.cuh" + +#include +#include +#include + +#include +#include + +namespace cuvs { +namespace neighbors { + +using namespace raft; +using namespace raft::sparse; + +template +struct SparseKNNInputs { + value_idx n_cols; + + std::vector indptr_h; + std::vector indices_h; + std::vector data_h; + + std::vector out_dists_ref_h; + std::vector out_indices_ref_h; + + int k; + + int batch_size_index = 2; + int batch_size_query = 2; + + cuvs::distance::DistanceType metric = cuvs::distance::DistanceType::L2SqrtExpanded; +}; + +template +::std::ostream& operator<<(::std::ostream& os, const SparseKNNInputs& dims) +{ + return os; +} + +template +class SparseKNNTest : public ::testing::TestWithParam> { + public: + SparseKNNTest() + : params(::testing::TestWithParam>::GetParam()), + indptr(0, resource::get_cuda_stream(handle)), + indices(0, resource::get_cuda_stream(handle)), + data(0, resource::get_cuda_stream(handle)), + out_indices(0, resource::get_cuda_stream(handle)), + out_dists(0, resource::get_cuda_stream(handle)), + out_indices_ref(0, resource::get_cuda_stream(handle)), + out_dists_ref(0, resource::get_cuda_stream(handle)) + { + } + + protected: + void SetUp() override + { + n_rows = params.indptr_h.size() - 1; + nnz = params.indices_h.size(); + k = params.k; + + make_data(); + + auto index_structure = + raft::make_device_compressed_structure_view( + indptr.data(), indices.data(), n_rows, params.n_cols, nnz); + auto index_csr = raft::make_device_csr_matrix_view(data.data(), index_structure); + + auto index = cuvs::neighbors::brute_force::build(handle, index_csr, params.metric); + + cuvs::neighbors::brute_force::sparse_search_params search_params; + search_params.batch_size_index = params.batch_size_index; + search_params.batch_size_query = params.batch_size_query; + + cuvs::neighbors::brute_force::search( + handle, + search_params, + index, + index_csr, + raft::make_device_matrix_view(out_indices.data(), n_rows, k), + raft::make_device_matrix_view(out_dists.data(), n_rows, k)); + + RAFT_CUDA_TRY(cudaStreamSynchronize(resource::get_cuda_stream(handle))); + } + + void compare() + { + ASSERT_TRUE(devArrMatch( + out_dists_ref.data(), out_dists.data(), n_rows * k, CompareApprox(1e-4))); + ASSERT_TRUE( + devArrMatch(out_indices_ref.data(), out_indices.data(), n_rows * k, Compare())); + } + + protected: + void make_data() + { + std::vector indptr_h = params.indptr_h; + std::vector indices_h = params.indices_h; + std::vector data_h = params.data_h; + + auto stream = resource::get_cuda_stream(handle); + indptr.resize(indptr_h.size(), stream); + indices.resize(indices_h.size(), stream); + data.resize(data_h.size(), stream); + + update_device(indptr.data(), indptr_h.data(), indptr_h.size(), stream); + update_device(indices.data(), indices_h.data(), indices_h.size(), stream); + update_device(data.data(), data_h.data(), data_h.size(), stream); + + std::vector out_dists_ref_h = params.out_dists_ref_h; + std::vector out_indices_ref_h = params.out_indices_ref_h; + + out_indices_ref.resize(out_indices_ref_h.size(), stream); + out_dists_ref.resize(out_dists_ref_h.size(), stream); + + update_device( + out_indices_ref.data(), out_indices_ref_h.data(), out_indices_ref_h.size(), stream); + update_device(out_dists_ref.data(), out_dists_ref_h.data(), out_dists_ref_h.size(), stream); + + out_dists.resize(n_rows * k, stream); + out_indices.resize(n_rows * k, stream); + } + + raft::resources handle; + + int n_rows, nnz, k; + + // input data + rmm::device_uvector indptr, indices; + rmm::device_uvector data; + + // output data + rmm::device_uvector out_indices; + rmm::device_uvector out_dists; + + rmm::device_uvector out_indices_ref; + rmm::device_uvector out_dists_ref; + + SparseKNNInputs params; +}; + +const std::vector> inputs_i32_f = { + {9, // ncols + {0, 2, 4, 6, 8}, // indptr + {0, 4, 0, 3, 0, 2, 0, 8}, // indices + {0.0f, 1.0f, 5.0f, 6.0f, 5.0f, 6.0f, 0.0f, 1.0f}, // data + {0, 1.41421, 0, 7.87401, 0, 7.87401, 0, 1.41421}, // dists + {0, 3, 1, 0, 2, 0, 3, 0}, // inds + 2, + 2, + 2, + cuvs::distance::DistanceType::L2SqrtExpanded}}; +typedef SparseKNNTest SparseKNNTestF; +TEST_P(SparseKNNTestF, Result) { compare(); } +INSTANTIATE_TEST_CASE_P(SparseKNNTest, SparseKNNTestF, ::testing::ValuesIn(inputs_i32_f)); + +}; // end namespace neighbors +}; // end namespace cuvs From 710e9f5a541c518deffb91f75a87cd4fe1372a8a Mon Sep 17 00:00:00 2001 From: Kyle Edwards Date: Fri, 22 Nov 2024 09:25:27 -0500 Subject: [PATCH 12/15] Add `kIsSingleSource` to `PairwiseDistanceEpilogueElementwise` (#485) With raft having recently migrated to cutlass 3.5.1, this field is now required. Also remove `raft_cutlass` from symbol exclusions. Authors: - Kyle Edwards (https://github.com/KyleFromNVIDIA) Approvers: - Bradley Dice (https://github.com/bdice) - Vyas Ramasubramani (https://github.com/vyasr) - Micka (https://github.com/lowener) URL: https://github.com/rapidsai/cuvs/pull/485 --- .github/workflows/pr.yaml | 2 +- .github/workflows/test.yaml | 2 +- .../distance/detail/pairwise_distance_epilogue_elementwise.h | 1 + 3 files changed, 3 insertions(+), 2 deletions(-) diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml index e18e82df0..78648235f 100644 --- a/.github/workflows/pr.yaml +++ b/.github/workflows/pr.yaml @@ -88,7 +88,7 @@ jobs: with: build_type: pull-request enable_check_symbols: true - symbol_exclusions: (void (thrust::|cub::)|raft_cutlass) + symbol_exclusions: (void (thrust::|cub::)) conda-python-build: needs: conda-cpp-build secrets: inherit diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index 5f60c0a34..27dc99a11 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -23,7 +23,7 @@ jobs: date: ${{ inputs.date }} sha: ${{ inputs.sha }} enable_check_symbols: true - symbol_exclusions: (void (thrust::|cub::)|raft_cutlass) + symbol_exclusions: (void (thrust::|cub::)) conda-cpp-tests: secrets: inherit uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-tests.yaml@branch-24.12 diff --git a/cpp/src/distance/detail/pairwise_distance_epilogue_elementwise.h b/cpp/src/distance/detail/pairwise_distance_epilogue_elementwise.h index f9955334d..f4a7feaba 100644 --- a/cpp/src/distance/detail/pairwise_distance_epilogue_elementwise.h +++ b/cpp/src/distance/detail/pairwise_distance_epilogue_elementwise.h @@ -61,6 +61,7 @@ class PairwiseDistanceEpilogueElementwise { using ElementT = ElementT_; static int const kElementsPerAccess = ElementsPerAccess; static int const kCount = kElementsPerAccess; + static bool const kIsSingleSource = true; using DistanceOp = DistanceOp_; using FinalOp = FinalOp_; From 96d98b12df0030bc21c8588e8905df9cdc00784e Mon Sep 17 00:00:00 2001 From: Azurethi Date: Sat, 23 Nov 2024 11:02:30 -0500 Subject: [PATCH 13/15] Fix broken link in README.md references (#473) Fixed the broken link for "Top-K Algorithms on GPU: A Comprehensive Study and New Methods" Authors: - https://github.com/Azurethi Approvers: - Corey J. Nolet (https://github.com/cjnolet) URL: https://github.com/rapidsai/cuvs/pull/473 --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 572e8d098..23759f598 100755 --- a/README.md +++ b/README.md @@ -242,7 +242,7 @@ If you are interested in contributing to the cuVS library, please read our [Cont For the interested reader, many of the accelerated implementations in cuVS are also based on research papers which can provide a lot more background. We also ask you to please cite the corresponding algorithms by referencing them in your own research. - [CAGRA: Highly Parallel Graph Construction and Approximate Nearest Neighbor Search](https://arxiv.org/abs/2308.15136) -- [Top-K Algorithms on GPU: A Comprehensive Study and New Methods](https://dl.acm.org/doi/10.1145/3581784.3607062>) +- [Top-K Algorithms on GPU: A Comprehensive Study and New Methods](https://dl.acm.org/doi/10.1145/3581784.3607062) - [Fast K-NN Graph Construction by GPU Based NN-Descent](https://dl.acm.org/doi/abs/10.1145/3459637.3482344?casa_token=O_nan1B1F5cAAAAA:QHWDEhh0wmd6UUTLY9_Gv6c3XI-5DXM9mXVaUXOYeStlpxTPmV3nKvABRfoivZAaQ3n8FWyrkWw>) - [cuSLINK: Single-linkage Agglomerative Clustering on the GPU](https://arxiv.org/abs/2306.16354) - [GPU Semiring Primitives for Sparse Neighborhood Methods](https://arxiv.org/abs/2104.06357) From e1359e1a36ee48d2474a03a3b05c67b6610b220c Mon Sep 17 00:00:00 2001 From: Micka Date: Mon, 25 Nov 2024 21:09:26 +0100 Subject: [PATCH 14/15] Add serialization API to brute-force (#461) I noticed it was missing while switching Milvus to cuVS Authors: - Micka (https://github.com/lowener) - Corey J. Nolet (https://github.com/cjnolet) Approvers: - Corey J. Nolet (https://github.com/cjnolet) URL: https://github.com/rapidsai/cuvs/pull/461 --- .gitignore | 1 + cpp/CMakeLists.txt | 1 + cpp/include/cuvs/neighbors/brute_force.h | 60 +++++ cpp/include/cuvs/neighbors/brute_force.hpp | 243 ++++++++++++++++++ cpp/src/neighbors/brute_force.cu | 15 ++ cpp/src/neighbors/brute_force_c.cpp | 55 +++- cpp/src/neighbors/brute_force_serialize.cu | 169 ++++++++++++ cpp/test/neighbors/ann_brute_force.cuh | 18 +- docs/source/c_api/neighbors_bruteforce_c.rst | 8 + docs/source/c_api/neighbors_hnsw_c.rst | 4 +- docs/source/c_api/neighbors_ivf_flat_c.rst | 8 + docs/source/c_api/neighbors_ivf_pq_c.rst | 8 + docs/source/cpp_api/neighbors_bruteforce.rst | 8 + .../python_api/neighbors_brute_force.rst | 10 + docs/source/python_api/neighbors_cagra.rst | 10 + docs/source/python_api/neighbors_hnsw.rst | 10 + docs/source/python_api/neighbors_ivf_flat.rst | 10 + docs/source/python_api/neighbors_ivf_pq.rst | 10 + .../cuvs/neighbors/brute_force/__init__.py | 4 +- .../neighbors/brute_force/brute_force.pxd | 8 + .../neighbors/brute_force/brute_force.pyx | 86 +++++++ python/cuvs/cuvs/test/test_serialization.py | 38 ++- 22 files changed, 767 insertions(+), 17 deletions(-) create mode 100644 cpp/src/neighbors/brute_force_serialize.cu diff --git a/.gitignore b/.gitignore index 97eab287d..da6eb07f6 100644 --- a/.gitignore +++ b/.gitignore @@ -75,6 +75,7 @@ compile_commands.json .clangd/ # serialized ann indexes +brute_force_index cagra_index ivf_flat_index ivf_pq_index diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 32093776c..eb2e7c7a4 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -371,6 +371,7 @@ if(BUILD_SHARED_LIBS) src/distance/pairwise_distance.cu src/distance/sparse_distance.cu src/neighbors/brute_force.cu + src/neighbors/brute_force_serialize.cu src/neighbors/cagra_build_float.cu src/neighbors/cagra_build_half.cu src/neighbors/cagra_build_int8.cu diff --git a/cpp/include/cuvs/neighbors/brute_force.h b/cpp/include/cuvs/neighbors/brute_force.h index c9e172f62..33b92f11b 100644 --- a/cpp/include/cuvs/neighbors/brute_force.h +++ b/cpp/include/cuvs/neighbors/brute_force.h @@ -166,6 +166,66 @@ cuvsError_t cuvsBruteForceSearch(cuvsResources_t res, * @} */ +/** + * @defgroup bruteforce_c_serialize BRUTEFORCE C-API serialize functions + * @{ + */ +/** + * Save the index to file. + * The serialization format can be subject to changes, therefore loading + * an index saved with a previous version of cuvs is not guaranteed + * to work. + * + * @code{.c} + * #include + * + * // Create cuvsResources_t + * cuvsResources_t res; + * cuvsError_t res_create_status = cuvsResourcesCreate(&res); + * + * // create an index with `cuvsBruteforceBuild` + * cuvsBruteForceSerialize(res, "/path/to/index", index); + * @endcode + * + * @param[in] res cuvsResources_t opaque C handle + * @param[in] filename the file name for saving the index + * @param[in] index BRUTEFORCE index + * + */ +cuvsError_t cuvsBruteForceSerialize(cuvsResources_t res, + const char* filename, + cuvsBruteForceIndex_t index); + +/** + * Load index from file. + * The serialization format can be subject to changes, therefore loading + * an index saved with a previous version of cuvs is not guaranteed + * to work. + * + * @code{.c} + * #include + * + * // Create cuvsResources_t + * cuvsResources_t res; + * cuvsError_t res_create_status = cuvsResourcesCreate(&res); + * + * // Deserialize an index previously built with `cuvsBruteforceBuild` + * cuvsBruteForceIndex_t index; + * cuvsBruteForceIndexCreate(&index); + * cuvsBruteForceDeserialize(res, "/path/to/index", index); + * @endcode + * + * @param[in] res cuvsResources_t opaque C handle + * @param[in] filename the name of the file that stores the index + * @param[out] index BRUTEFORCE index loaded disk + */ +cuvsError_t cuvsBruteForceDeserialize(cuvsResources_t res, + const char* filename, + cuvsBruteForceIndex_t index); + +/** + * @} + */ #ifdef __cplusplus } #endif diff --git a/cpp/include/cuvs/neighbors/brute_force.hpp b/cpp/include/cuvs/neighbors/brute_force.hpp index ba67797ee..d040e03db 100644 --- a/cpp/include/cuvs/neighbors/brute_force.hpp +++ b/cpp/include/cuvs/neighbors/brute_force.hpp @@ -48,6 +48,14 @@ struct index : cuvs::neighbors::index { index& operator=(index&&) = default; ~index() = default; + /** + * @brief Construct an empty index. + * + * Constructs an empty index. This index will either need to be trained with `build` + * or loaded from a saved copy with `deserialize` + */ + index(raft::resources const& handle); + /** Construct a brute force index from dataset * * Constructs a brute force index from a dataset. This lets us precompute norms for @@ -479,4 +487,239 @@ void search(raft::resources const& handle, /** * @} */ + +/** + * @defgroup bruteforce_cpp_index_serialize Bruteforce index serialize functions + * @{ + */ +/** + * Save the index to file. + * The serialization format can be subject to changes, therefore loading + * an index saved with a previous version of cuvs is not guaranteed + * to work. + * + * @code{.cpp} + * #include + * #include + * + * raft::resources handle; + * + * // create a string with a filepath + * std::string filename("/path/to/index"); + * // create an index with `auto index = brute_force::build(...);` + * cuvs::neighbors::brute_force::serialize(handle, filename, index); + * @endcode + * + * @tparam T data element type + * + * @param[in] handle the raft handle + * @param[in] filename the file name for saving the index + * @param[in] index brute force index + * @param[in] include_dataset whether to include the dataset in the serialized + * output + */ +void serialize(raft::resources const& handle, + const std::string& filename, + const cuvs::neighbors::brute_force::index& index, + bool include_dataset = true); +/** + * Save the index to file. + * The serialization format can be subject to changes, therefore loading + * an index saved with a previous version of cuvs is not guaranteed + * to work. + * + * @code{.cpp} + * #include + * #include + * + * raft::resources handle; + * + * // create a string with a filepath + * std::string filename("/path/to/index"); + * // create an index with `auto index = brute_force::build(...);` + * cuvs::neighbors::brute_force::serialize(handle, filename, index); + * @endcode + * + * @tparam T data element type + * + * @param[in] handle the raft handle + * @param[in] filename the file name for saving the index + * @param[in] index brute force index + * @param[in] include_dataset whether to include the dataset in the serialized + * output + * + */ +void serialize(raft::resources const& handle, + const std::string& filename, + const cuvs::neighbors::brute_force::index& index, + bool include_dataset = true); + +/** + * Write the index to an output stream + * The serialization format can be subject to changes, therefore loading + * an index saved with a previous version of cuvs is not guaranteed + * to work. + * + * @code{.cpp} + * #include + * #include + * + * raft::resources handle; + * + * // create an output stream + * std::ostream os(std::cout.rdbuf()); + * // create an index with `auto index = cuvs::neighbors::brute_force::build(...);` + * cuvs::neighbors::brute_force::serialize(handle, os, index); + * @endcode + * + * @param[in] handle the raft handle + * @param[in] os output stream + * @param[in] index brute force index + * @param[in] include_dataset Whether or not to write out the dataset to the file. + */ +void serialize(raft::resources const& handle, + std::ostream& os, + const cuvs::neighbors::brute_force::index& index, + bool include_dataset = true); + +/** + * Write the index to an output stream + * The serialization format can be subject to changes, therefore loading + * an index saved with a previous version of cuvs is not guaranteed + * to work. + * + * @code{.cpp} + * #include + * #include + * + * raft::resources handle; + * + * // create an output stream + * std::ostream os(std::cout.rdbuf()); + * // create an index with `auto index = cuvs::neighbors::brute_force::build(...);` + * cuvs::neighbors::brute_force::serialize(handle, os, index); + * @endcode + * + * @param[in] handle the raft handle + * @param[in] os output stream + * @param[in] index brute force index + * @param[in] include_dataset Whether or not to write out the dataset to the file. + */ +void serialize(raft::resources const& handle, + std::ostream& os, + const cuvs::neighbors::brute_force::index& index, + bool include_dataset = true); + +/** + * Load index from file. + * The serialization format can be subject to changes, therefore loading + * an index saved with a previous version of cuvs is not guaranteed + * to work. + * + * @code{.cpp} + * #include + * #include + * + * raft::resources handle; + * + * // create a string with a filepath + * std::string filename("/path/to/index"); + * using T = half; // data element type + * brute_force::index index(handle); + * cuvs::neighbors::brute_force::deserialize(handle, filename, index); + * @endcode + * + * @param[in] handle the raft handle + * @param[in] filename the name of the file that stores the index + * @param[out] index brute force index + * + */ +void deserialize(raft::resources const& handle, + const std::string& filename, + cuvs::neighbors::brute_force::index* index); +/** + * Load index from file. + * The serialization format can be subject to changes, therefore loading + * an index saved with a previous version of cuvs is not guaranteed + * to work. + * + * @code{.cpp} + * #include + * #include + * + * raft::resources handle; + * + * // create a string with a filepath + * std::string filename("/path/to/index"); + * using T = float; // data element type + * brute_force::index index(handle); + * cuvs::neighbors::brute_force::deserialize(handle, filename, index); + * @endcode + * + * @param[in] handle the raft handle + * @param[in] filename the name of the file that stores the index + * @param[out] index brute force index + * + */ +void deserialize(raft::resources const& handle, + const std::string& filename, + cuvs::neighbors::brute_force::index* index); +/** + * Load index from input stream + * The serialization format can be subject to changes, therefore loading + * an index saved with a previous version of cuvs is not guaranteed + * to work. + * + * @code{.cpp} + * #include + * #include + * + * raft::resources handle; + * + * // create an input stream + * std::istream is(std::cin.rdbuf()); + * using T = half; // data element type + * brute_force::index index(handle); + * cuvs::neighbors::brute_force::deserialize(handle, is, index); + * @endcode + * + * @param[in] handle the raft handle + * @param[in] is input stream + * @param[out] index brute force index + * + */ +void deserialize(raft::resources const& handle, + std::istream& is, + cuvs::neighbors::brute_force::index* index); +/** + * Load index from input stream + * The serialization format can be subject to changes, therefore loading + * an index saved with a previous version of cuvs is not guaranteed + * to work. + * + * @code{.cpp} + * #include + * #include + * + * raft::resources handle; + * + * // create an input stream + * std::istream is(std::cin.rdbuf()); + * using T = float; // data element type + * brute_force::index index(handle); + * cuvs::neighbors::brute_force::deserialize(handle, is, index); + * @endcode + * + * @param[in] handle the raft handle + * @param[in] is input stream + * @param[out] index brute force index + * + */ +void deserialize(raft::resources const& handle, + std::istream& is, + cuvs::neighbors::brute_force::index* index); +/** + * @} + */ + } // namespace cuvs::neighbors::brute_force diff --git a/cpp/src/neighbors/brute_force.cu b/cpp/src/neighbors/brute_force.cu index b0f87e9ac..d534676e3 100644 --- a/cpp/src/neighbors/brute_force.cu +++ b/cpp/src/neighbors/brute_force.cu @@ -21,6 +21,21 @@ #include namespace cuvs::neighbors::brute_force { + +template +index::index(raft::resources const& res) + // this constructor is just for a temporary index, for use in the deserialization + // api. all the parameters here will get replaced with loaded values - that aren't + // necessarily known ahead of time before deserialization. + // TODO: do we even need a handle here - could just construct one? + : cuvs::neighbors::index(), + metric_(cuvs::distance::DistanceType::L2Expanded), + dataset_(raft::make_device_matrix(res, 0, 0)), + norms_(std::nullopt), + metric_arg_(0) +{ +} + template index::index(raft::resources const& res, raft::host_matrix_view dataset, diff --git a/cpp/src/neighbors/brute_force_c.cpp b/cpp/src/neighbors/brute_force_c.cpp index eda79aa31..f1a8c995d 100644 --- a/cpp/src/neighbors/brute_force_c.cpp +++ b/cpp/src/neighbors/brute_force_c.cpp @@ -17,10 +17,12 @@ #include #include +#include #include #include #include +#include #include #include @@ -91,6 +93,22 @@ void _search(cuvsResources_t res, } } +template +void _serialize(cuvsResources_t res, const char* filename, cuvsBruteForceIndex index) +{ + auto res_ptr = reinterpret_cast(res); + auto index_ptr = reinterpret_cast*>(index.addr); + cuvs::neighbors::brute_force::serialize(*res_ptr, std::string(filename), *index_ptr); +} + +template +void* _deserialize(cuvsResources_t res, const char* filename) +{ + auto res_ptr = reinterpret_cast(res); + auto index = new cuvs::neighbors::brute_force::index(*res_ptr); + cuvs::neighbors::brute_force::deserialize(*res_ptr, std::string(filename), index); + return index; +} } // namespace extern "C" cuvsError_t cuvsBruteForceIndexCreate(cuvsBruteForceIndex_t* index) @@ -129,7 +147,7 @@ extern "C" cuvsError_t cuvsBruteForceBuild(cuvsResources_t res, if (dataset.dtype.code == kDLFloat && dataset.dtype.bits == 32) { index->addr = reinterpret_cast(_build(res, dataset_tensor, metric, metric_arg)); - index->dtype.code = kDLFloat; + index->dtype = dataset.dtype; } else { RAFT_FAIL("Unsupported dataset DLtensor dtype: %d and bits: %d", dataset.dtype.code, @@ -174,3 +192,38 @@ extern "C" cuvsError_t cuvsBruteForceSearch(cuvsResources_t res, } }); } + +extern "C" cuvsError_t cuvsBruteForceDeserialize(cuvsResources_t res, + const char* filename, + cuvsBruteForceIndex_t index) +{ + return cuvs::core::translate_exceptions([=] { + // read the numpy dtype from the beginning of the file + std::ifstream is(filename, std::ios::in | std::ios::binary); + if (!is) { RAFT_FAIL("Cannot open file %s", filename); } + char dtype_string[4]; + is.read(dtype_string, 4); + auto dtype = raft::detail::numpy_serializer::parse_descr(std::string(dtype_string, 4)); + + index->dtype.bits = dtype.itemsize * 8; + if (dtype.kind == 'f' && dtype.itemsize == 4) { + index->dtype.code = kDLFloat; + index->addr = reinterpret_cast(_deserialize(res, filename)); + } else { + RAFT_FAIL("Unsupported index dtype: %d and bits: %d", index->dtype.code, index->dtype.bits); + } + }); +} + +extern "C" cuvsError_t cuvsBruteForceSerialize(cuvsResources_t res, + const char* filename, + cuvsBruteForceIndex_t index) +{ + return cuvs::core::translate_exceptions([=] { + if (index->dtype.code == kDLFloat && index->dtype.bits == 32) { + _serialize(res, filename, *index); + } else { + RAFT_FAIL("Unsupported index dtype: %d and bits: %d", index->dtype.code, index->dtype.bits); + } + }); +} \ No newline at end of file diff --git a/cpp/src/neighbors/brute_force_serialize.cu b/cpp/src/neighbors/brute_force_serialize.cu new file mode 100644 index 000000000..1b5b5111e --- /dev/null +++ b/cpp/src/neighbors/brute_force_serialize.cu @@ -0,0 +1,169 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include + +#include + +namespace cuvs::neighbors::brute_force { + +int constexpr serialization_version = 0; + +template +void serialize(raft::resources const& handle, + std::ostream& os, + const index& index, + bool include_dataset = true) +{ + RAFT_LOG_DEBUG( + "Saving brute force index, size %zu, dim %u", static_cast(index.size()), index.dim()); + + auto dtype_string = raft::detail::numpy_serializer::get_numpy_dtype().to_string(); + dtype_string.resize(4); + os << dtype_string; + + raft::serialize_scalar(handle, os, serialization_version); + raft::serialize_scalar(handle, os, index.size()); + raft::serialize_scalar(handle, os, index.dim()); + raft::serialize_scalar(handle, os, index.metric()); + raft::serialize_scalar(handle, os, index.metric_arg()); + raft::serialize_scalar(handle, os, include_dataset); + if (include_dataset) { raft::serialize_mdspan(handle, os, index.dataset()); } + auto has_norms = index.has_norms(); + raft::serialize_scalar(handle, os, has_norms); + if (has_norms) { raft::serialize_mdspan(handle, os, index.norms()); } + raft::resource::sync_stream(handle); +} + +void serialize(raft::resources const& handle, + const std::string& filename, + const index& index, + bool include_dataset) +{ + auto os = std::ofstream{filename, std::ios::out | std::ios::binary}; + RAFT_EXPECTS(os, "Cannot open file %s", filename.c_str()); + serialize(handle, os, index, include_dataset); +} + +void serialize(raft::resources const& handle, + const std::string& filename, + const index& index, + bool include_dataset) +{ + auto os = std::ofstream{filename, std::ios::out | std::ios::binary}; + RAFT_EXPECTS(os, "Cannot open file %s", filename.c_str()); + serialize(handle, os, index, include_dataset); +} + +void serialize(raft::resources const& handle, + std::ostream& os, + const index& index, + bool include_dataset) +{ + serialize(handle, os, index, include_dataset); +} + +void serialize(raft::resources const& handle, + std::ostream& os, + const index& index, + bool include_dataset) +{ + serialize(handle, os, index, include_dataset); +} + +template +auto deserialize(raft::resources const& handle, std::istream& is) +{ + auto dtype_string = std::array{}; + is.read(dtype_string.data(), 4); + + auto ver = raft::deserialize_scalar(handle, is); + if (ver != serialization_version) { + RAFT_FAIL("serialization version mismatch, expected %d, got %d ", serialization_version, ver); + } + std::int64_t rows = raft::deserialize_scalar(handle, is); + std::int64_t dim = raft::deserialize_scalar(handle, is); + auto metric = raft::deserialize_scalar(handle, is); + auto metric_arg = raft::deserialize_scalar(handle, is); + + auto dataset_storage = raft::make_host_matrix(std::int64_t{}, std::int64_t{}); + auto include_dataset = raft::deserialize_scalar(handle, is); + if (include_dataset) { + dataset_storage = raft::make_host_matrix(rows, dim); + raft::deserialize_mdspan(handle, is, dataset_storage.view()); + } + + auto has_norms = raft::deserialize_scalar(handle, is); + auto norms_storage = has_norms ? std::optional{raft::make_host_vector(rows)} + : std::optional>{}; + // TODO(wphicks): Use mdbuffer here when available + auto norms_storage_dev = + has_norms ? std::optional{raft::make_device_vector(handle, rows)} + : std::optional>{}; + if (has_norms) { + raft::deserialize_mdspan(handle, is, norms_storage->view()); + raft::copy(handle, norms_storage_dev->view(), norms_storage->view()); + } + + auto result = index(handle, + raft::make_const_mdspan(dataset_storage.view()), + std::move(norms_storage_dev), + metric, + metric_arg); + raft::resource::sync_stream(handle); + + return result; +} + +void deserialize(raft::resources const& handle, + const std::string& filename, + cuvs::neighbors::brute_force::index* index) +{ + auto is = std::ifstream{filename, std::ios::in | std::ios::binary}; + RAFT_EXPECTS(is, "Cannot open file %s", filename.c_str()); + + *index = deserialize(handle, is); +} + +void deserialize(raft::resources const& handle, + const std::string& filename, + cuvs::neighbors::brute_force::index* index) +{ + auto is = std::ifstream{filename, std::ios::in | std::ios::binary}; + RAFT_EXPECTS(is, "Cannot open file %s", filename.c_str()); + + *index = deserialize(handle, is); +} + +void deserialize(raft::resources const& handle, + std::istream& is, + cuvs::neighbors::brute_force::index* index) +{ + *index = deserialize(handle, is); +} + +void deserialize(raft::resources const& handle, + std::istream& is, + cuvs::neighbors::brute_force::index* index) +{ + *index = deserialize(handle, is); +} + +} // namespace cuvs::neighbors::brute_force diff --git a/cpp/test/neighbors/ann_brute_force.cuh b/cpp/test/neighbors/ann_brute_force.cuh index c2afa4e8b..03d6e820c 100644 --- a/cpp/test/neighbors/ann_brute_force.cuh +++ b/cpp/test/neighbors/ann_brute_force.cuh @@ -114,12 +114,28 @@ class AnnBruteForceTest : public ::testing::TestWithParam(handle_); + brute_force::deserialize(handle_, std::string{"brute_force_index"}, &index_loaded); + brute_force::search(handle_, - idx, + index_loaded, search_queries_view, indices_out_view, dists_out_view, cuvs::neighbors::filtering::none_sample_filter{}); + raft::resource::sync_stream(handle_); + + ASSERT_TRUE(cuvs::neighbors::devArrMatchKnnPair(indices_naive_dev.data(), + indices_bruteforce_dev.data(), + distances_naive_dev.data(), + distances_bruteforce_dev.data(), + ps.num_queries, + ps.k, + 0.001f, + stream_, + true)); } } diff --git a/docs/source/c_api/neighbors_bruteforce_c.rst b/docs/source/c_api/neighbors_bruteforce_c.rst index af0356eee..a12175209 100644 --- a/docs/source/c_api/neighbors_bruteforce_c.rst +++ b/docs/source/c_api/neighbors_bruteforce_c.rst @@ -32,3 +32,11 @@ Index search :project: cuvs :members: :content-only: + +Index serialize +--------------- + +.. doxygengroup:: bruteforce_c_index_serialize + :project: cuvs + :members: + :content-only: diff --git a/docs/source/c_api/neighbors_hnsw_c.rst b/docs/source/c_api/neighbors_hnsw_c.rst index 4d83cd3e3..988e5b6f3 100644 --- a/docs/source/c_api/neighbors_hnsw_c.rst +++ b/docs/source/c_api/neighbors_hnsw_c.rst @@ -29,13 +29,13 @@ Index Index search ------------ -.. doxygengroup:: cagra_c_index_search +.. doxygengroup:: hnsw_c_index_search :project: cuvs :members: :content-only: Index serialize ------------- +--------------- .. doxygengroup:: hnsw_c_index_serialize :project: cuvs diff --git a/docs/source/c_api/neighbors_ivf_flat_c.rst b/docs/source/c_api/neighbors_ivf_flat_c.rst index 9e1ccc0d1..1254d70ef 100644 --- a/docs/source/c_api/neighbors_ivf_flat_c.rst +++ b/docs/source/c_api/neighbors_ivf_flat_c.rst @@ -48,3 +48,11 @@ Index search :project: cuvs :members: :content-only: + +Index serialize +--------------- + +.. doxygengroup:: ivf_flat_c_index_serialize + :project: cuvs + :members: + :content-only: diff --git a/docs/source/c_api/neighbors_ivf_pq_c.rst b/docs/source/c_api/neighbors_ivf_pq_c.rst index 070719609..260057b8c 100644 --- a/docs/source/c_api/neighbors_ivf_pq_c.rst +++ b/docs/source/c_api/neighbors_ivf_pq_c.rst @@ -48,3 +48,11 @@ Index search :project: cuvs :members: :content-only: + +Index serialize +--------------- + +.. doxygengroup:: ivf_pq_c_index_serialize + :project: cuvs + :members: + :content-only: diff --git a/docs/source/cpp_api/neighbors_bruteforce.rst b/docs/source/cpp_api/neighbors_bruteforce.rst index 3adcb01c5..f75e26b3c 100644 --- a/docs/source/cpp_api/neighbors_bruteforce.rst +++ b/docs/source/cpp_api/neighbors_bruteforce.rst @@ -34,3 +34,11 @@ Index search :project: cuvs :members: :content-only: + +Index serialize +--------------- + +.. doxygengroup:: bruteforce_cpp_index_serialize + :project: cuvs + :members: + :content-only: diff --git a/docs/source/python_api/neighbors_brute_force.rst b/docs/source/python_api/neighbors_brute_force.rst index 5fdc3658f..d756a6c80 100644 --- a/docs/source/python_api/neighbors_brute_force.rst +++ b/docs/source/python_api/neighbors_brute_force.rst @@ -20,3 +20,13 @@ Index search ############ .. autofunction:: cuvs.neighbors.brute_force.search + +Index save +########## + +.. autofunction:: cuvs.neighbors.brute_force.save + +Index load +########## + +.. autofunction:: cuvs.neighbors.brute_force.load diff --git a/docs/source/python_api/neighbors_cagra.rst b/docs/source/python_api/neighbors_cagra.rst index 09b2e2694..e7155efb8 100644 --- a/docs/source/python_api/neighbors_cagra.rst +++ b/docs/source/python_api/neighbors_cagra.rst @@ -34,3 +34,13 @@ Index search ############ .. autofunction:: cuvs.neighbors.cagra.search + +Index save +########## + +.. autofunction:: cuvs.neighbors.cagra.save + +Index load +########## + +.. autofunction:: cuvs.neighbors.cagra.load diff --git a/docs/source/python_api/neighbors_hnsw.rst b/docs/source/python_api/neighbors_hnsw.rst index 9922805b3..64fe5493b 100644 --- a/docs/source/python_api/neighbors_hnsw.rst +++ b/docs/source/python_api/neighbors_hnsw.rst @@ -28,3 +28,13 @@ Index search ############ .. autofunction:: cuvs.neighbors.hnsw.search + +Index save +########## + +.. autofunction:: cuvs.neighbors.hnsw.save + +Index load +########## + +.. autofunction:: cuvs.neighbors.hnsw.load diff --git a/docs/source/python_api/neighbors_ivf_flat.rst b/docs/source/python_api/neighbors_ivf_flat.rst index 5514e5e43..f2c21e68a 100644 --- a/docs/source/python_api/neighbors_ivf_flat.rst +++ b/docs/source/python_api/neighbors_ivf_flat.rst @@ -32,3 +32,13 @@ Index search ############ .. autofunction:: cuvs.neighbors.ivf_flat.search + +Index save +########## + +.. autofunction:: cuvs.neighbors.ivf_flat.save + +Index load +########## + +.. autofunction:: cuvs.neighbors.ivf_flat.load diff --git a/docs/source/python_api/neighbors_ivf_pq.rst b/docs/source/python_api/neighbors_ivf_pq.rst index e3625ba67..57668fbc3 100644 --- a/docs/source/python_api/neighbors_ivf_pq.rst +++ b/docs/source/python_api/neighbors_ivf_pq.rst @@ -32,3 +32,13 @@ Index search ############ .. autofunction:: cuvs.neighbors.ivf_pq.search + +Index save +########## + +.. autofunction:: cuvs.neighbors.ivf_pq.save + +Index load +########## + +.. autofunction:: cuvs.neighbors.ivf_pq.load diff --git a/python/cuvs/cuvs/neighbors/brute_force/__init__.py b/python/cuvs/cuvs/neighbors/brute_force/__init__.py index b88c4b464..6aa0e4bb2 100644 --- a/python/cuvs/cuvs/neighbors/brute_force/__init__.py +++ b/python/cuvs/cuvs/neighbors/brute_force/__init__.py @@ -13,6 +13,6 @@ # limitations under the License. -from .brute_force import Index, build, search +from .brute_force import Index, build, load, save, search -__all__ = ["Index", "build", "search"] +__all__ = ["Index", "build", "search", "save", "load"] diff --git a/python/cuvs/cuvs/neighbors/brute_force/brute_force.pxd b/python/cuvs/cuvs/neighbors/brute_force/brute_force.pxd index 183827916..f1fc14ba7 100644 --- a/python/cuvs/cuvs/neighbors/brute_force/brute_force.pxd +++ b/python/cuvs/cuvs/neighbors/brute_force/brute_force.pxd @@ -47,3 +47,11 @@ cdef extern from "cuvs/neighbors/brute_force.h" nogil: DLManagedTensor* neighbors, DLManagedTensor* distances, cuvsFilter filter) except + + + cuvsError_t cuvsBruteForceSerialize(cuvsResources_t res, + const char * filename, + cuvsBruteForceIndex_t index) except + + + cuvsError_t cuvsBruteForceDeserialize(cuvsResources_t res, + const char * filename, + cuvsBruteForceIndex_t index) except + diff --git a/python/cuvs/cuvs/neighbors/brute_force/brute_force.pyx b/python/cuvs/cuvs/neighbors/brute_force/brute_force.pyx index 9d1d24eae..9d43bfb29 100644 --- a/python/cuvs/cuvs/neighbors/brute_force/brute_force.pyx +++ b/python/cuvs/cuvs/neighbors/brute_force/brute_force.pyx @@ -24,6 +24,7 @@ from cuvs.common.resources import auto_sync_resources from cython.operator cimport dereference as deref from libc.stdint cimport uint32_t from libcpp cimport bool +from libcpp.string cimport string from cuvs.common cimport cydlpack from cuvs.distance_type cimport cuvsDistanceType @@ -256,3 +257,88 @@ def search(Index index, )) return (distances, neighbors) + + +@auto_sync_resources +def save(filename, Index index, bool include_dataset=True, resources=None): + """ + Saves the index to a file. + + The serialization format can be subject to changes, therefore loading + an index saved with a previous version of cuvs is not guaranteed + to work. + + Parameters + ---------- + filename : string + Name of the file. + index : Index + Trained Brute Force index. + {resources_docstring} + + Examples + -------- + >>> import cupy as cp + >>> from cuvs.neighbors import brute_force + >>> n_samples = 50000 + >>> n_features = 50 + >>> dataset = cp.random.random_sample((n_samples, n_features), + ... dtype=cp.float32) + >>> # Build index + >>> index = brute_force.build(dataset) + >>> # Serialize and deserialize the brute_force index built + >>> brute_force.save("my_index.bin", index) + >>> index_loaded = brute_force.load("my_index.bin") + """ + cdef string c_filename = filename.encode('utf-8') + cdef cuvsResources_t res = resources.get_c_obj() + check_cuvs(cuvsBruteForceSerialize(res, + c_filename.c_str(), + index.index)) + + +@auto_sync_resources +def load(filename, resources=None): + """ + Loads index from file. + + The serialization format can be subject to changes, therefore loading + an index saved with a previous version of cuvs is not guaranteed + to work. + + + Parameters + ---------- + filename : string + Name of the file. + {resources_docstring} + + Returns + ------- + index : Index + + Examples + -------- + >>> import cupy as cp + >>> from cuvs.neighbors import brute_force + >>> n_samples = 50000 + >>> n_features = 50 + >>> dataset = cp.random.random_sample((n_samples, n_features), + ... dtype=cp.float32) + >>> # Build index + >>> index = brute_force.build(dataset) + >>> # Serialize and deserialize the brute_force index built + >>> brute_force.save("my_index.bin", index) + >>> index_loaded = brute_force.load("my_index.bin") + """ + cdef Index idx = Index() + cdef cuvsResources_t res = resources.get_c_obj() + cdef string c_filename = filename.encode('utf-8') + + check_cuvs(cuvsBruteForceDeserialize( + res, + c_filename.c_str(), + idx.index + )) + idx.trained = True + return idx diff --git a/python/cuvs/cuvs/test/test_serialization.py b/python/cuvs/cuvs/test/test_serialization.py index 4ffccf121..1f4a54e87 100644 --- a/python/cuvs/cuvs/test/test_serialization.py +++ b/python/cuvs/cuvs/test/test_serialization.py @@ -17,7 +17,7 @@ import pytest from pylibraft.common import device_ndarray -from cuvs.neighbors import cagra, ivf_flat, ivf_pq +from cuvs.neighbors import brute_force, cagra, ivf_flat, ivf_pq from cuvs.test.ann_utils import generate_data @@ -35,6 +35,10 @@ def test_save_load_ivf_pq(): run_save_load(ivf_pq, np.float32) +def test_save_load_brute_force(): + run_save_load(brute_force, np.float32) + + def run_save_load(ann_module, dtype): n_rows = 10000 n_cols = 50 @@ -43,8 +47,11 @@ def run_save_load(ann_module, dtype): dataset = generate_data((n_rows, n_cols), dtype) dataset_device = device_ndarray(dataset) - build_params = ann_module.IndexParams() - index = ann_module.build(build_params, dataset_device) + if ann_module == brute_force: + index = ann_module.build(dataset_device) + else: + build_params = ann_module.IndexParams() + index = ann_module.build(build_params, dataset_device) assert index.trained filename = "my_index.bin" @@ -54,20 +61,29 @@ def run_save_load(ann_module, dtype): queries = generate_data((n_queries, n_cols), dtype) queries_device = device_ndarray(queries) - search_params = ann_module.SearchParams() k = 10 - - distance_dev, neighbors_dev = ann_module.search( - search_params, index, queries_device, k - ) + if ann_module == brute_force: + distance_dev, neighbors_dev = ann_module.search( + index, queries_device, k + ) + else: + search_params = ann_module.SearchParams() + distance_dev, neighbors_dev = ann_module.search( + search_params, index, queries_device, k + ) neighbors = neighbors_dev.copy_to_host() dist = distance_dev.copy_to_host() del index - distance_dev, neighbors_dev = ann_module.search( - search_params, loaded_index, queries_device, k - ) + if ann_module == brute_force: + distance_dev, neighbors_dev = ann_module.search( + loaded_index, queries_device, k + ) + else: + distance_dev, neighbors_dev = ann_module.search( + search_params, loaded_index, queries_device, k + ) neighbors2 = neighbors_dev.copy_to_host() dist2 = distance_dev.copy_to_host() From 5062594138a40231475299c7bac61083b0669fd1 Mon Sep 17 00:00:00 2001 From: tsuki <12711693+enp1s0@users.noreply.github.com> Date: Tue, 26 Nov 2024 10:50:41 +0900 Subject: [PATCH 15/15] [Doc] Fix CAGRA search sample code (#484) `.view()` is required Authors: - tsuki (https://github.com/enp1s0) Approvers: - Micka (https://github.com/lowener) URL: https://github.com/rapidsai/cuvs/pull/484 --- cpp/include/cuvs/neighbors/cagra.hpp | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/cpp/include/cuvs/neighbors/cagra.hpp b/cpp/include/cuvs/neighbors/cagra.hpp index e48050756..5ceb3010e 100644 --- a/cpp/include/cuvs/neighbors/cagra.hpp +++ b/cpp/include/cuvs/neighbors/cagra.hpp @@ -363,7 +363,7 @@ struct index : cuvs::neighbors::index { * // search K nearest neighbours * auto neighbors = raft::make_device_matrix(res, n_queries, k); * auto distances = raft::make_device_matrix(res, n_queries, k); - * cagra::search(res, search_params, index, queries, neighbors, distances); + * cagra::search(res, search_params, index, queries, neighbors.view(), distances.view()); * @endcode * In the above example, we have passed a host dataset to build. The returned index will own a * device copy of the dataset and the knn_graph. In contrast, if we pass the dataset as a @@ -530,7 +530,7 @@ struct index : cuvs::neighbors::index { * // search K nearest neighbours * auto neighbors = raft::make_device_matrix(res, n_queries, k); * auto distances = raft::make_device_matrix(res, n_queries, k); - * cagra::search(res, search_params, index, queries, neighbors, distances); + * cagra::search(res, search_params, index, queries, neighbors.view(), distances.view()); * @endcode * * @param[in] res @@ -567,7 +567,7 @@ auto build(raft::resources const& res, * // search K nearest neighbours * auto neighbors = raft::make_device_matrix(res, n_queries, k); * auto distances = raft::make_device_matrix(res, n_queries, k); - * cagra::search(res, search_params, index, queries, neighbors, distances); + * cagra::search(res, search_params, index, queries, neighbors.view(), distances.view()); * @endcode * * @param[in] res @@ -604,7 +604,7 @@ auto build(raft::resources const& res, * // search K nearest neighbours * auto neighbors = raft::make_device_matrix(res, n_queries, k); * auto distances = raft::make_device_matrix(res, n_queries, k); - * cagra::search(res, search_params, index, queries, neighbors, distances); + * cagra::search(res, search_params, index, queries, neighbors.view(), distances.view()); * @endcode * * @param[in] res @@ -640,7 +640,7 @@ auto build(raft::resources const& res, * // search K nearest neighbours * auto neighbors = raft::make_device_matrix(res, n_queries, k); * auto distances = raft::make_device_matrix(res, n_queries, k); - * cagra::search(res, search_params, index, queries, neighbors, distances); + * cagra::search(res, search_params, index, queries, neighbors.view(), distances.view()); * @endcode * * @param[in] res @@ -676,7 +676,7 @@ auto build(raft::resources const& res, * // search K nearest neighbours * auto neighbors = raft::make_device_matrix(res, n_queries, k); * auto distances = raft::make_device_matrix(res, n_queries, k); - * cagra::search(res, search_params, index, queries, neighbors, distances); + * cagra::search(res, search_params, index, queries, neighbors.view(), distances.view()); * @endcode * * @param[in] res @@ -713,7 +713,7 @@ auto build(raft::resources const& res, * // search K nearest neighbours * auto neighbors = raft::make_device_matrix(res, n_queries, k); * auto distances = raft::make_device_matrix(res, n_queries, k); - * cagra::search(res, search_params, index, queries, neighbors, distances); + * cagra::search(res, search_params, index, queries, neighbors.view(), distances.view()); * @endcode * * @param[in] res @@ -750,7 +750,7 @@ auto build(raft::resources const& res, * // search K nearest neighbours * auto neighbors = raft::make_device_matrix(res, n_queries, k); * auto distances = raft::make_device_matrix(res, n_queries, k); - * cagra::search(res, search_params, index, queries, neighbors, distances); + * cagra::search(res, search_params, index, queries, neighbors.view(), distances.view()); * @endcode * * @param[in] res @@ -787,7 +787,7 @@ auto build(raft::resources const& res, * // search K nearest neighbours * auto neighbors = raft::make_device_matrix(res, n_queries, k); * auto distances = raft::make_device_matrix(res, n_queries, k); - * cagra::search(res, search_params, index, queries, neighbors, distances); + * cagra::search(res, search_params, index, queries, neighbors.view(), distances.view()); * @endcode * * @param[in] res