From aba3fa7bb38690d394082aff65531c5858836450 Mon Sep 17 00:00:00 2001
From: Bradley Dice <bdice@bradleydice.com>
Date: Tue, 26 Nov 2024 08:55:39 -0600
Subject: [PATCH 1/7] Update example code fetching rapids-cmake to use CUVS
 instead of RAFT (#493)

Small update to CMake example code to use cuVS instead of RAFT.

Authors:
  - Bradley Dice (https://github.com/bdice)

Approvers:
  - Micka (https://github.com/lowener)

URL: https://github.com/rapidsai/cuvs/pull/493
---
 examples/cmake/thirdparty/fetch_rapids.cmake | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/examples/cmake/thirdparty/fetch_rapids.cmake b/examples/cmake/thirdparty/fetch_rapids.cmake
index 23c8490b4..3c5510b8b 100644
--- a/examples/cmake/thirdparty/fetch_rapids.cmake
+++ b/examples/cmake/thirdparty/fetch_rapids.cmake
@@ -11,11 +11,11 @@
 # or implied. See the License for the specific language governing permissions and limitations under
 # the License.
 
-# Use this variable to update RAPIDS and RAFT versions
+# Use this variable to update RAPIDS and cuVS versions
 set(RAPIDS_VERSION "25.02")
 
-if(NOT EXISTS ${CMAKE_CURRENT_BINARY_DIR}/RAFT_RAPIDS.cmake)
+if(NOT EXISTS ${CMAKE_CURRENT_BINARY_DIR}/CUVS_RAPIDS.cmake)
     file(DOWNLOAD https://raw.githubusercontent.com/rapidsai/rapids-cmake/branch-${RAPIDS_VERSION}/RAPIDS.cmake
-            ${CMAKE_CURRENT_BINARY_DIR}/RAFT_RAPIDS.cmake)
+            ${CMAKE_CURRENT_BINARY_DIR}/CUVS_RAPIDS.cmake)
 endif()
-include(${CMAKE_CURRENT_BINARY_DIR}/RAFT_RAPIDS.cmake)
+include(${CMAKE_CURRENT_BINARY_DIR}/CUVS_RAPIDS.cmake)

From 6e5c0c8b0ce57b4b2069cbe5255619f210420792 Mon Sep 17 00:00:00 2001
From: Bradley Dice <bdice@bradleydice.com>
Date: Tue, 26 Nov 2024 11:24:01 -0600
Subject: [PATCH 2/7] Remove RAFT BUILD_ANN_BENCH option (#497)

This cleans up a reference to RAFT's `BUILD_ANN_BENCH` CMake option which no longer exists.

Authors:
  - Bradley Dice (https://github.com/bdice)

Approvers:
  - Ben Frederickson (https://github.com/benfred)

URL: https://github.com/rapidsai/cuvs/pull/497
---
 cpp/cmake/thirdparty/get_raft.cmake | 1 -
 1 file changed, 1 deletion(-)

diff --git a/cpp/cmake/thirdparty/get_raft.cmake b/cpp/cmake/thirdparty/get_raft.cmake
index 7640fbfa6..5def74f4b 100644
--- a/cpp/cmake/thirdparty/get_raft.cmake
+++ b/cpp/cmake/thirdparty/get_raft.cmake
@@ -50,7 +50,6 @@ function(find_and_configure_raft)
               OPTIONS
               "BUILD_TESTS OFF"
               "BUILD_PRIMS_BENCH OFF"
-              "BUILD_ANN_BENCH OFF"
               "RAFT_NVTX ${PKG_ENABLE_NVTX}"
               "RAFT_COMPILE_LIBRARY OFF"
             )

From bd0620df0c143711352c9f5e312268aaa1801cbd Mon Sep 17 00:00:00 2001
From: Jake Awe <50372925+AyodeAwe@users.noreply.github.com>
Date: Tue, 26 Nov 2024 15:28:58 -0600
Subject: [PATCH 3/7] Add breaking change workflow trigger (#442)

Adds a workflow that triggers a second workflow which sends a
notification to a designated Slack channel on every PR labelled with
breaking, whenever any of the following events are triggered on the PR:

- closed
- reopened
- labeled
- unlabeled

Depends on https://github.com/rapidsai/shared-workflows/pull/257
---
 .../trigger-breaking-change-alert.yaml        | 26 +++++++++++++++++++
 1 file changed, 26 insertions(+)
 create mode 100644 .github/workflows/trigger-breaking-change-alert.yaml

diff --git a/.github/workflows/trigger-breaking-change-alert.yaml b/.github/workflows/trigger-breaking-change-alert.yaml
new file mode 100644
index 000000000..3b972f31c
--- /dev/null
+++ b/.github/workflows/trigger-breaking-change-alert.yaml
@@ -0,0 +1,26 @@
+name: Trigger Breaking Change Notifications
+
+on:
+  pull_request_target:
+    types:
+      - closed
+      - reopened
+      - labeled
+      - unlabeled
+
+jobs:
+  trigger-notifier:
+    if: contains(github.event.pull_request.labels.*.name, 'breaking')
+    secrets: inherit
+    uses: rapidsai/shared-workflows/.github/workflows/breaking-change-alert.yaml@branch-24.12
+    with:
+      sender_login: ${{ github.event.sender.login }}
+      sender_avatar: ${{ github.event.sender.avatar_url }}
+      repo: ${{ github.repository }}
+      pr_number: ${{ github.event.pull_request.number }}
+      pr_title: "${{ github.event.pull_request.title }}"
+      pr_body: "${{ github.event.pull_request.body || '_Empty PR description_' }}"
+      pr_base_ref: ${{ github.event.pull_request.base.ref }}
+      pr_author: ${{ github.event.pull_request.user.login }}
+      event_action: ${{ github.event.action }}
+      pr_merged: ${{ github.event.pull_request.merged }}

From ae6816c9be622fe18f1d80f3b32b70ac9f5566fe Mon Sep 17 00:00:00 2001
From: Bradley Dice <bdice@bradleydice.com>
Date: Tue, 26 Nov 2024 16:32:11 -0600
Subject: [PATCH 4/7] Require approval to run CI on draft PRs (#498)

By default, CI runs on draft PRs. This leads to many CI runs that may be unnecessary.

With this PR's change to `.github/copy-pr-bot.yaml`, an `/ok to test` comment from a trusted user is required to trigger CI on draft PRs. Non-draft PRs will run CI by default, assuming that all commits are signed by trusted users. Otherwise an `/ok to test` is required (as before) -- see the `copy-pr-bot` docs at https://docs.gha-runners.nvidia.com/apps/copy-pr-bot/ for more information.

Part of https://github.com/rapidsai/build-planning/issues/123.

Authors:
  - Bradley Dice (https://github.com/bdice)

Approvers:
  - James Lamb (https://github.com/jameslamb)

URL: https://github.com/rapidsai/cuvs/pull/498
---
 .github/copy-pr-bot.yaml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.github/copy-pr-bot.yaml b/.github/copy-pr-bot.yaml
index 895ba83ee..e0ea775aa 100644
--- a/.github/copy-pr-bot.yaml
+++ b/.github/copy-pr-bot.yaml
@@ -2,3 +2,4 @@
 # https://docs.gha-runners.nvidia.com/apps/copy-pr-bot/
 
 enabled: true
+auto_sync_draft: false

From 31c59ce0cae2505c89e8e4cdd8d77fd29256df4a Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Fri, 29 Nov 2024 16:42:44 -0800
Subject: [PATCH 5/7] Adapt to rmm logger changes (#499)

This PR adapts to breaking changes in rmm in https://github.com/rapidsai/rmm/pull/1722.

Authors:
  - Vyas Ramasubramani (https://github.com/vyasr)

Approvers:
  - Bradley Dice (https://github.com/bdice)
  - Ben Frederickson (https://github.com/benfred)

URL: https://github.com/rapidsai/cuvs/pull/499
---
 cpp/CMakeLists.txt                                  |  9 +++++----
 cpp/bench/ann/CMakeLists.txt                        |  8 +++++++-
 cpp/test/CMakeLists.txt                             |  4 ++++
 examples/cpp/CMakeLists.txt                         | 13 ++++++++-----
 python/cuvs/CMakeLists.txt                          |  3 +++
 python/cuvs/cuvs/common/CMakeLists.txt              |  4 ++++
 python/cuvs/cuvs/distance/CMakeLists.txt            |  4 ++++
 python/cuvs/cuvs/neighbors/CMakeLists.txt           |  4 ++++
 .../cuvs/cuvs/neighbors/brute_force/CMakeLists.txt  |  4 ++++
 python/cuvs/cuvs/neighbors/cagra/CMakeLists.txt     |  4 ++++
 python/cuvs/cuvs/neighbors/filters/CMakeLists.txt   |  4 ++++
 python/cuvs/cuvs/neighbors/hnsw/CMakeLists.txt      |  4 ++++
 python/cuvs/cuvs/neighbors/ivf_flat/CMakeLists.txt  |  4 ++++
 python/cuvs/cuvs/neighbors/ivf_pq/CMakeLists.txt    |  4 ++++
 python/cuvs/cuvs/test/conftest.py                   |  5 +++++
 15 files changed, 68 insertions(+), 10 deletions(-)
 create mode 100644 python/cuvs/cuvs/test/conftest.py

diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index eb2e7c7a4..3c8ef69fd 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -480,12 +480,13 @@ if(BUILD_SHARED_LIBS)
                       "$<$<COMPILE_LANGUAGE:CUDA>:${CUVS_CUDA_FLAGS}>"
   )
   target_link_libraries(
-    cuvs_objs PUBLIC raft::raft rmm::rmm ${CUVS_CTK_MATH_DEPENDENCIES}
+    cuvs_objs PUBLIC raft::raft rmm::rmm rmm::rmm_logger ${CUVS_CTK_MATH_DEPENDENCIES}
                      $<TARGET_NAME_IF_EXISTS:OpenMP::OpenMP_CXX>
+              PRIVATE rmm::rmm_logger_impl
   )
 
-  add_library(cuvs SHARED $<TARGET_OBJECTS:cuvs_objs>)
-  add_library(cuvs_static STATIC $<TARGET_OBJECTS:cuvs_objs>)
+  add_library(cuvs SHARED $<FILTER:$<TARGET_OBJECTS:cuvs_objs>,EXCLUDE,rmm.*logger>)
+  add_library(cuvs_static STATIC  $<FILTER:$<TARGET_OBJECTS:cuvs_objs>,EXCLUDE,rmm.*logger>)
 
   target_compile_options(
     cuvs INTERFACE $<$<COMPILE_LANG_AND_ID:CUDA,NVIDIA>:--expt-extended-lambda
@@ -696,7 +697,7 @@ target_compile_definitions(cuvs::cuvs INTERFACE $<$<BOOL:${CUVS_NVTX}>:NVTX_ENAB
     target_link_libraries(
       cuvs_c
       PUBLIC cuvs::cuvs ${CUVS_CTK_MATH_DEPENDENCIES}
-      PRIVATE raft::raft
+      PRIVATE raft::raft rmm::rmm_logger_impl
     )
 
     # ensure CUDA symbols aren't relocated to the middle of the debug build binaries
diff --git a/cpp/bench/ann/CMakeLists.txt b/cpp/bench/ann/CMakeLists.txt
index 0f6b42ae9..572b792a7 100644
--- a/cpp/bench/ann/CMakeLists.txt
+++ b/cpp/bench/ann/CMakeLists.txt
@@ -129,6 +129,7 @@ function(ConfigureAnnBench)
             $<$<BOOL:${GPU_BUILD}>:CUDA::cudart_static>
             $<TARGET_NAME_IF_EXISTS:OpenMP::OpenMP_CXX>
             $<TARGET_NAME_IF_EXISTS:conda_env>
+            $<TARGET_NAME_IF_EXISTS:cuvs_bench_rmm_logger>
   )
 
   set_target_properties(
@@ -174,6 +175,11 @@ function(ConfigureAnnBench)
   add_dependencies(CUVS_ANN_BENCH_ALL ${BENCH_NAME})
 endfunction()
 
+if(CUVS_FAISS_ENABLE_GPU)
+  add_library(cuvs_bench_rmm_logger OBJECT)
+  target_link_libraries(cuvs_bench_rmm_logger PRIVATE rmm::rmm_logger_impl)
+endif()
+
 # ##################################################################################################
 # * Configure benchmark targets -------------------------------------------------------------
 
@@ -300,7 +306,7 @@ if(CUVS_ANN_BENCH_SINGLE_EXE)
   target_link_libraries(
     ANN_BENCH
     PRIVATE raft::raft nlohmann_json::nlohmann_json benchmark::benchmark dl fmt::fmt-header-only
-            spdlog::spdlog_header_only $<$<TARGET_EXISTS:CUDA::nvtx3>:CUDA::nvtx3>
+            spdlog::spdlog_header_only $<$<TARGET_EXISTS:CUDA::nvtx3>:CUDA::nvtx3> rmm::rmm_logger_impl
   )
   set_target_properties(
     ANN_BENCH
diff --git a/cpp/test/CMakeLists.txt b/cpp/test/CMakeLists.txt
index 286d721d7..16663ba08 100644
--- a/cpp/test/CMakeLists.txt
+++ b/cpp/test/CMakeLists.txt
@@ -49,6 +49,7 @@ function(ConfigureTest)
     PRIVATE cuvs
             cuvs::cuvs
             raft::raft
+            test_rmm_logger
             GTest::gtest
             GTest::gtest_main
             Threads::Threads
@@ -87,6 +88,9 @@ function(ConfigureTest)
   )
 endfunction()
 
+add_library(test_rmm_logger OBJECT)
+target_link_libraries(test_rmm_logger PRIVATE rmm::rmm_logger_impl)
+
 # ##################################################################################################
 # test sources ##################################################################################
 # ##################################################################################################
diff --git a/examples/cpp/CMakeLists.txt b/examples/cpp/CMakeLists.txt
index 092b65ed9..48815b870 100644
--- a/examples/cpp/CMakeLists.txt
+++ b/examples/cpp/CMakeLists.txt
@@ -44,10 +44,13 @@ add_executable(VAMANA_EXAMPLE src/vamana_example.cu)
 
 # `$<TARGET_NAME_IF_EXISTS:conda_env>` is a generator expression that ensures that targets are
 # installed in a conda environment, if one exists
-target_link_libraries(CAGRA_EXAMPLE PRIVATE cuvs::cuvs $<TARGET_NAME_IF_EXISTS:conda_env>)
+add_library(rmm_logger OBJECT)
+target_link_libraries(rmm_logger PRIVATE rmm::rmm_logger_impl)
+
+target_link_libraries(CAGRA_EXAMPLE PRIVATE cuvs::cuvs $<TARGET_NAME_IF_EXISTS:conda_env> rmm_logger)
 target_link_libraries(
-  CAGRA_PERSISTENT_EXAMPLE PRIVATE cuvs::cuvs $<TARGET_NAME_IF_EXISTS:conda_env> Threads::Threads
+  CAGRA_PERSISTENT_EXAMPLE PRIVATE cuvs::cuvs $<TARGET_NAME_IF_EXISTS:conda_env> Threads::Threads rmm_logger
 )
-target_link_libraries(IVF_PQ_EXAMPLE PRIVATE cuvs::cuvs $<TARGET_NAME_IF_EXISTS:conda_env>)
-target_link_libraries(IVF_FLAT_EXAMPLE PRIVATE cuvs::cuvs $<TARGET_NAME_IF_EXISTS:conda_env>)
-target_link_libraries(VAMANA_EXAMPLE PRIVATE cuvs::cuvs $<TARGET_NAME_IF_EXISTS:conda_env>)
+target_link_libraries(IVF_PQ_EXAMPLE PRIVATE cuvs::cuvs $<TARGET_NAME_IF_EXISTS:conda_env> rmm_logger)
+target_link_libraries(IVF_FLAT_EXAMPLE PRIVATE cuvs::cuvs $<TARGET_NAME_IF_EXISTS:conda_env> rmm_logger)
+target_link_libraries(VAMANA_EXAMPLE PRIVATE cuvs::cuvs $<TARGET_NAME_IF_EXISTS:conda_env> rmm_logger)
diff --git a/python/cuvs/CMakeLists.txt b/python/cuvs/CMakeLists.txt
index feb3bd58c..c0990995f 100644
--- a/python/cuvs/CMakeLists.txt
+++ b/python/cuvs/CMakeLists.txt
@@ -110,6 +110,9 @@ endif()
 
 rapids_cython_init()
 
+add_library(cuvs_rmm_logger OBJECT)
+target_link_libraries(cuvs_rmm_logger PRIVATE rmm::rmm_logger_impl)
+
 add_subdirectory(cuvs/common)
 add_subdirectory(cuvs/distance)
 add_subdirectory(cuvs/neighbors)
diff --git a/python/cuvs/cuvs/common/CMakeLists.txt b/python/cuvs/cuvs/common/CMakeLists.txt
index 202919e01..361f2fafc 100644
--- a/python/cuvs/cuvs/common/CMakeLists.txt
+++ b/python/cuvs/cuvs/common/CMakeLists.txt
@@ -22,3 +22,7 @@ rapids_cython_create_modules(
   SOURCE_FILES "${cython_sources}"
   LINKED_LIBRARIES "${linked_libraries}" ASSOCIATED_TARGETS cuvs MODULE_PREFIX common_
 )
+
+foreach(tgt IN LISTS RAPIDS_CYTHON_CREATED_TARGETS)
+  target_link_libraries(${tgt} PRIVATE cuvs_rmm_logger)
+endforeach()
diff --git a/python/cuvs/cuvs/distance/CMakeLists.txt b/python/cuvs/cuvs/distance/CMakeLists.txt
index 363778a9c..514b08c43 100644
--- a/python/cuvs/cuvs/distance/CMakeLists.txt
+++ b/python/cuvs/cuvs/distance/CMakeLists.txt
@@ -22,3 +22,7 @@ rapids_cython_create_modules(
   SOURCE_FILES "${cython_sources}"
   LINKED_LIBRARIES "${linked_libraries}" ASSOCIATED_TARGETS cuvs MODULE_PREFIX distance_
 )
+
+foreach(tgt IN LISTS RAPIDS_CYTHON_CREATED_TARGETS)
+  target_link_libraries(${tgt} PRIVATE cuvs_rmm_logger)
+endforeach()
diff --git a/python/cuvs/cuvs/neighbors/CMakeLists.txt b/python/cuvs/cuvs/neighbors/CMakeLists.txt
index f68bbea53..031fd485e 100644
--- a/python/cuvs/cuvs/neighbors/CMakeLists.txt
+++ b/python/cuvs/cuvs/neighbors/CMakeLists.txt
@@ -29,3 +29,7 @@ rapids_cython_create_modules(
   SOURCE_FILES "${cython_sources}"
   LINKED_LIBRARIES "${linked_libraries}" ASSOCIATED_TARGETS cuvs MODULE_PREFIX neighbors_refine_
 )
+
+foreach(tgt IN LISTS RAPIDS_CYTHON_CREATED_TARGETS)
+  target_link_libraries(${tgt} PRIVATE cuvs_rmm_logger)
+endforeach()
diff --git a/python/cuvs/cuvs/neighbors/brute_force/CMakeLists.txt b/python/cuvs/cuvs/neighbors/brute_force/CMakeLists.txt
index 4806fb9fc..61eda649c 100644
--- a/python/cuvs/cuvs/neighbors/brute_force/CMakeLists.txt
+++ b/python/cuvs/cuvs/neighbors/brute_force/CMakeLists.txt
@@ -23,3 +23,7 @@ rapids_cython_create_modules(
   LINKED_LIBRARIES "${linked_libraries}" ASSOCIATED_TARGETS cuvs MODULE_PREFIX
                    neighbors_brute_force_
 )
+
+foreach(tgt IN LISTS RAPIDS_CYTHON_CREATED_TARGETS)
+  target_link_libraries(${tgt} PRIVATE cuvs_rmm_logger)
+endforeach()
diff --git a/python/cuvs/cuvs/neighbors/cagra/CMakeLists.txt b/python/cuvs/cuvs/neighbors/cagra/CMakeLists.txt
index 87e6597fe..1f40daab2 100644
--- a/python/cuvs/cuvs/neighbors/cagra/CMakeLists.txt
+++ b/python/cuvs/cuvs/neighbors/cagra/CMakeLists.txt
@@ -22,3 +22,7 @@ rapids_cython_create_modules(
   SOURCE_FILES "${cython_sources}"
   LINKED_LIBRARIES "${linked_libraries}" ASSOCIATED_TARGETS cuvs MODULE_PREFIX neighbors_cagra_
 )
+
+foreach(tgt IN LISTS RAPIDS_CYTHON_CREATED_TARGETS)
+  target_link_libraries(${tgt} PRIVATE cuvs_rmm_logger)
+endforeach()
diff --git a/python/cuvs/cuvs/neighbors/filters/CMakeLists.txt b/python/cuvs/cuvs/neighbors/filters/CMakeLists.txt
index c90615feb..a678852d9 100644
--- a/python/cuvs/cuvs/neighbors/filters/CMakeLists.txt
+++ b/python/cuvs/cuvs/neighbors/filters/CMakeLists.txt
@@ -22,3 +22,7 @@ rapids_cython_create_modules(
   SOURCE_FILES "${cython_sources}"
   LINKED_LIBRARIES "${linked_libraries}" ASSOCIATED_TARGETS cuvs MODULE_PREFIX neighbors_prefilter_
 )
+
+foreach(tgt IN LISTS RAPIDS_CYTHON_CREATED_TARGETS)
+  target_link_libraries(${tgt} PRIVATE cuvs_rmm_logger)
+endforeach()
diff --git a/python/cuvs/cuvs/neighbors/hnsw/CMakeLists.txt b/python/cuvs/cuvs/neighbors/hnsw/CMakeLists.txt
index 1f9c422ca..8351916e6 100644
--- a/python/cuvs/cuvs/neighbors/hnsw/CMakeLists.txt
+++ b/python/cuvs/cuvs/neighbors/hnsw/CMakeLists.txt
@@ -22,3 +22,7 @@ rapids_cython_create_modules(
   SOURCE_FILES "${cython_sources}"
   LINKED_LIBRARIES "${linked_libraries}" ASSOCIATED_TARGETS cuvs MODULE_PREFIX neighbors_hnsw_
 )
+
+foreach(tgt IN LISTS RAPIDS_CYTHON_CREATED_TARGETS)
+  target_link_libraries(${tgt} PRIVATE cuvs_rmm_logger)
+endforeach()
diff --git a/python/cuvs/cuvs/neighbors/ivf_flat/CMakeLists.txt b/python/cuvs/cuvs/neighbors/ivf_flat/CMakeLists.txt
index 09bd8f422..f5663cdaa 100644
--- a/python/cuvs/cuvs/neighbors/ivf_flat/CMakeLists.txt
+++ b/python/cuvs/cuvs/neighbors/ivf_flat/CMakeLists.txt
@@ -22,3 +22,7 @@ rapids_cython_create_modules(
   SOURCE_FILES "${cython_sources}"
   LINKED_LIBRARIES "${linked_libraries}" ASSOCIATED_TARGETS cuvs MODULE_PREFIX neighbors_ivf_flat_
 )
+
+foreach(tgt IN LISTS RAPIDS_CYTHON_CREATED_TARGETS)
+  target_link_libraries(${tgt} PRIVATE cuvs_rmm_logger)
+endforeach()
diff --git a/python/cuvs/cuvs/neighbors/ivf_pq/CMakeLists.txt b/python/cuvs/cuvs/neighbors/ivf_pq/CMakeLists.txt
index 97c3a1824..a24320ded 100644
--- a/python/cuvs/cuvs/neighbors/ivf_pq/CMakeLists.txt
+++ b/python/cuvs/cuvs/neighbors/ivf_pq/CMakeLists.txt
@@ -22,3 +22,7 @@ rapids_cython_create_modules(
   SOURCE_FILES "${cython_sources}"
   LINKED_LIBRARIES "${linked_libraries}" ASSOCIATED_TARGETS cuvs MODULE_PREFIX neighbors_pq_
 )
+
+foreach(tgt IN LISTS RAPIDS_CYTHON_CREATED_TARGETS)
+  target_link_libraries(${tgt} PRIVATE cuvs_rmm_logger)
+endforeach()
diff --git a/python/cuvs/cuvs/test/conftest.py b/python/cuvs/cuvs/test/conftest.py
new file mode 100644
index 000000000..d84de5d21
--- /dev/null
+++ b/python/cuvs/cuvs/test/conftest.py
@@ -0,0 +1,5 @@
+# arm tests sporadically run into
+# https://bugzilla.redhat.com/show_bug.cgi?id=1722181.
+# This is a workaround to ensure that OpenMP gets the TLS that it needs.
+
+import sklearn  # noqa: F401

From 121588259b3f48381c6c154556e78e46b3119eb4 Mon Sep 17 00:00:00 2001
From: James Lamb <jlamb@nvidia.com>
Date: Mon, 2 Dec 2024 14:18:23 -0600
Subject: [PATCH 6/7] prefer system install of UCX in devcontainers (#501)

Contributes to https://github.com/rapidsai/build-planning/issues/118

Proposes the following changes for pip devcontainers:

* prefer system installation of ucx to the one provided by the `libucx-cu{11,12}` wheels (ref: https://github.com/rapidsai/devcontainers/pull/421#issuecomment-2502324982)

And some other related changes noticed while doing that:

* update lingering `24.*` references to `25.02`

## Notes for Reviewers

### How I tested this

Relying on CI for most things. Double-checked that `update-version.sh` would have caught the one lingering `24.12` reference like this:

```shell
./ci/release/update-version.sh '25.02.00'
git grep -E '24\.'
```

Similar to https://github.com/rapidsai/cuml/pull/6149

Authors:
  - James Lamb (https://github.com/jameslamb)

Approvers:
  - Bradley Dice (https://github.com/bdice)

URL: https://github.com/rapidsai/cuvs/pull/501
---
 .devcontainer/Dockerfile                             | 1 +
 .github/workflows/trigger-breaking-change-alert.yaml | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/.devcontainer/Dockerfile b/.devcontainer/Dockerfile
index 594ba8c3c..77b90fa20 100644
--- a/.devcontainer/Dockerfile
+++ b/.devcontainer/Dockerfile
@@ -13,6 +13,7 @@ RUN apt update -y \
  && rm -rf /tmp/* /var/tmp/* /var/cache/apt/* /var/lib/apt/lists/*;
 
 ENV DEFAULT_VIRTUAL_ENV=rapids
+ENV RAPIDS_LIBUCX_PREFER_SYSTEM_LIBRARY=true
 
 FROM ${BASE} as conda-base
 
diff --git a/.github/workflows/trigger-breaking-change-alert.yaml b/.github/workflows/trigger-breaking-change-alert.yaml
index 3b972f31c..01dd2436b 100644
--- a/.github/workflows/trigger-breaking-change-alert.yaml
+++ b/.github/workflows/trigger-breaking-change-alert.yaml
@@ -12,7 +12,7 @@ jobs:
   trigger-notifier:
     if: contains(github.event.pull_request.labels.*.name, 'breaking')
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/breaking-change-alert.yaml@branch-24.12
+    uses: rapidsai/shared-workflows/.github/workflows/breaking-change-alert.yaml@branch-25.02
     with:
       sender_login: ${{ github.event.sender.login }}
       sender_avatar: ${{ github.event.sender.avatar_url }}

From 441d2f1bcceb8f653a0fdaec5658c54c5201155b Mon Sep 17 00:00:00 2001
From: Divye Gala <divyegala@gmail.com>
Date: Mon, 2 Dec 2024 17:34:08 -0500
Subject: [PATCH 7/7] HNSW CPU Hierarchy (#465)

This PR adds an option to build the full HNSW hierarchy on the CPU when converting a CAGRA index to an hnswlib index. This lets us enable an `extend()` API.

For hnswlib:
1. Update to `v0.7.0`
2. Remove dependency as symbols are compiled within DSO

Authors:
  - Divye Gala (https://github.com/divyegala)

Approvers:
  - Bradley Dice (https://github.com/bdice)
  - Corey J. Nolet (https://github.com/cjnolet)

URL: https://github.com/rapidsai/cuvs/pull/465
---
 .../bench_ann_cuda-118_arch-aarch64.yaml      |   1 -
 .../bench_ann_cuda-118_arch-x86_64.yaml       |   1 -
 .../bench_ann_cuda-125_arch-aarch64.yaml      |   1 -
 .../bench_ann_cuda-125_arch-x86_64.yaml       |   1 -
 cpp/CMakeLists.txt                            |   1 +
 cpp/bench/ann/CMakeLists.txt                  |   4 +-
 cpp/bench/ann/src/cuvs/cuvs_cagra_hnswlib.cu  |  34 +-
 .../ann/src/cuvs/cuvs_cagra_hnswlib_wrapper.h |  57 ++-
 cpp/bench/ann/src/cuvs/cuvs_cagra_wrapper.h   |   2 +
 .../ann/src/hnswlib/hnswlib_benchmark.cpp     |   4 +-
 cpp/cmake/modules/ConfigureCUDA.cmake         |   8 +-
 cpp/cmake/patches/hnswlib.diff                | 327 ++++++-------
 cpp/cmake/patches/hnswlib_override.json       |  28 +-
 cpp/cmake/thirdparty/get_hnswlib.cmake        |   1 +
 cpp/include/cuvs/neighbors/hnsw.h             | 279 ++++++++++-
 cpp/include/cuvs/neighbors/hnsw.hpp           | 440 +++++++++++++++---
 cpp/src/neighbors/detail/hnsw.hpp             | 218 ++++++++-
 cpp/src/neighbors/hnsw.cpp                    |  57 ++-
 cpp/src/neighbors/hnsw_c.cpp                  | 157 ++++++-
 cpp/src/neighbors/iface/iface.hpp             |   1 +
 cpp/test/neighbors/ann_hnsw_c.cu              |   4 +-
 cpp/test/neighbors/hnsw.cu                    |   3 +-
 dependencies.yaml                             |   1 -
 docs/source/c_api/neighbors_hnsw_c.rst        |  22 +
 docs/source/cpp_api/neighbors_hnsw.rst        |  23 +-
 python/cuvs/cuvs/neighbors/hnsw/__init__.py   |  15 +-
 python/cuvs/cuvs/neighbors/hnsw/hnsw.pxd      |  49 +-
 python/cuvs/cuvs/neighbors/hnsw/hnsw.pyx      | 276 ++++++++---
 python/cuvs/cuvs/test/test_hnsw.py            |  89 +++-
 .../config/algos/cuvs_cagra_hnswlib.yaml      |   5 +-
 30 files changed, 1683 insertions(+), 426 deletions(-)

diff --git a/conda/environments/bench_ann_cuda-118_arch-aarch64.yaml b/conda/environments/bench_ann_cuda-118_arch-aarch64.yaml
index 21cb98180..1e602ccf1 100644
--- a/conda/environments/bench_ann_cuda-118_arch-aarch64.yaml
+++ b/conda/environments/bench_ann_cuda-118_arch-aarch64.yaml
@@ -24,7 +24,6 @@ dependencies:
 - gcc_linux-aarch64=11.*
 - glog>=0.6.0
 - h5py>=3.8.0
-- hnswlib=0.6.2
 - libcublas-dev=11.11.3.6
 - libcublas=11.11.3.6
 - libcurand-dev=10.3.0.86
diff --git a/conda/environments/bench_ann_cuda-118_arch-x86_64.yaml b/conda/environments/bench_ann_cuda-118_arch-x86_64.yaml
index 432509bcb..b060e78c2 100644
--- a/conda/environments/bench_ann_cuda-118_arch-x86_64.yaml
+++ b/conda/environments/bench_ann_cuda-118_arch-x86_64.yaml
@@ -24,7 +24,6 @@ dependencies:
 - gcc_linux-64=11.*
 - glog>=0.6.0
 - h5py>=3.8.0
-- hnswlib=0.6.2
 - libcublas-dev=11.11.3.6
 - libcublas=11.11.3.6
 - libcurand-dev=10.3.0.86
diff --git a/conda/environments/bench_ann_cuda-125_arch-aarch64.yaml b/conda/environments/bench_ann_cuda-125_arch-aarch64.yaml
index 0c5043ac2..485122273 100644
--- a/conda/environments/bench_ann_cuda-125_arch-aarch64.yaml
+++ b/conda/environments/bench_ann_cuda-125_arch-aarch64.yaml
@@ -25,7 +25,6 @@ dependencies:
 - gcc_linux-aarch64=11.*
 - glog>=0.6.0
 - h5py>=3.8.0
-- hnswlib=0.6.2
 - libcublas-dev
 - libcurand-dev
 - libcusolver-dev
diff --git a/conda/environments/bench_ann_cuda-125_arch-x86_64.yaml b/conda/environments/bench_ann_cuda-125_arch-x86_64.yaml
index cbb22333c..d5f48dadb 100644
--- a/conda/environments/bench_ann_cuda-125_arch-x86_64.yaml
+++ b/conda/environments/bench_ann_cuda-125_arch-x86_64.yaml
@@ -25,7 +25,6 @@ dependencies:
 - gcc_linux-64=11.*
 - glog>=0.6.0
 - h5py>=3.8.0
-- hnswlib=0.6.2
 - libcublas-dev
 - libcurand-dev
 - libcusolver-dev
diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index eb2e7c7a4..34b7cb898 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -577,6 +577,7 @@ if(BUILD_SHARED_LIBS)
 
   if(BUILD_CAGRA_HNSWLIB)
     target_link_libraries(cuvs_objs PRIVATE hnswlib::hnswlib)
+    target_compile_definitions(cuvs PUBLIC CUVS_BUILD_CAGRA_HNSWLIB)
     target_compile_definitions(cuvs_objs PUBLIC CUVS_BUILD_CAGRA_HNSWLIB)
   endif()
 
diff --git a/cpp/bench/ann/CMakeLists.txt b/cpp/bench/ann/CMakeLists.txt
index 0f6b42ae9..c161a68bc 100644
--- a/cpp/bench/ann/CMakeLists.txt
+++ b/cpp/bench/ann/CMakeLists.txt
@@ -225,9 +225,7 @@ if(CUVS_ANN_BENCH_USE_CUVS_CAGRA)
 endif()
 
 if(CUVS_ANN_BENCH_USE_CUVS_CAGRA_HNSWLIB)
-  ConfigureAnnBench(
-    NAME CUVS_CAGRA_HNSWLIB PATH src/cuvs/cuvs_cagra_hnswlib.cu LINKS cuvs hnswlib::hnswlib
-  )
+  ConfigureAnnBench(NAME CUVS_CAGRA_HNSWLIB PATH src/cuvs/cuvs_cagra_hnswlib.cu LINKS cuvs)
 endif()
 
 if(CUVS_ANN_BENCH_USE_CUVS_MG)
diff --git a/cpp/bench/ann/src/cuvs/cuvs_cagra_hnswlib.cu b/cpp/bench/ann/src/cuvs/cuvs_cagra_hnswlib.cu
index 558ba01e0..e45a3bd5a 100644
--- a/cpp/bench/ann/src/cuvs/cuvs_cagra_hnswlib.cu
+++ b/cpp/bench/ann/src/cuvs/cuvs_cagra_hnswlib.cu
@@ -24,12 +24,35 @@
 
 namespace cuvs::bench {
 
+template <typename T, typename IdxT>
+void parse_build_param(const nlohmann::json& conf,
+                       typename cuvs::bench::cuvs_cagra_hnswlib<T, IdxT>::build_param& param)
+{
+  if (conf.contains("hierarchy")) {
+    if (conf.at("hierarchy") == "none") {
+      param.hnsw_index_params.hierarchy = cuvs::neighbors::hnsw::HnswHierarchy::NONE;
+    } else if (conf.at("hierarchy") == "cpu") {
+      param.hnsw_index_params.hierarchy = cuvs::neighbors::hnsw::HnswHierarchy::CPU;
+    } else {
+      THROW("Invalid value for hierarchy: %s", conf.at("hierarchy").get<std::string>().c_str());
+    }
+  }
+  if (conf.contains("ef_construction")) {
+    param.hnsw_index_params.ef_construction = conf.at("ef_construction");
+  }
+  if (conf.contains("num_threads")) {
+    param.hnsw_index_params.num_threads = conf.at("num_threads");
+  }
+}
+
 template <typename T, typename IdxT>
 void parse_search_param(const nlohmann::json& conf,
                         typename cuvs::bench::cuvs_cagra_hnswlib<T, IdxT>::search_param& param)
 {
-  param.ef = conf.at("ef");
-  if (conf.contains("numThreads")) { param.num_threads = conf.at("numThreads"); }
+  param.hnsw_search_param.ef = conf.at("ef");
+  if (conf.contains("num_threads")) {
+    param.hnsw_search_param.num_threads = conf.at("num_threads");
+  }
 }
 
 template <typename T>
@@ -43,9 +66,10 @@ auto create_algo(const std::string& algo_name,
 
   if constexpr (std::is_same_v<T, float> or std::is_same_v<T, std::uint8_t>) {
     if (algo_name == "raft_cagra_hnswlib" || algo_name == "cuvs_cagra_hnswlib") {
-      typename cuvs::bench::cuvs_cagra_hnswlib<T, uint32_t>::build_param param;
-      parse_build_param<T, uint32_t>(conf, param);
-      a = std::make_unique<cuvs::bench::cuvs_cagra_hnswlib<T, uint32_t>>(metric, dim, param);
+      typename cuvs::bench::cuvs_cagra_hnswlib<T, uint32_t>::build_param bparam;
+      ::parse_build_param<T, uint32_t>(conf, bparam.cagra_build_param);
+      parse_build_param<T, uint32_t>(conf, bparam);
+      a = std::make_unique<cuvs::bench::cuvs_cagra_hnswlib<T, uint32_t>>(metric, dim, bparam);
     }
   }
 
diff --git a/cpp/bench/ann/src/cuvs/cuvs_cagra_hnswlib_wrapper.h b/cpp/bench/ann/src/cuvs/cuvs_cagra_hnswlib_wrapper.h
index 875fe0bba..e4169f6f8 100644
--- a/cpp/bench/ann/src/cuvs/cuvs_cagra_hnswlib_wrapper.h
+++ b/cpp/bench/ann/src/cuvs/cuvs_cagra_hnswlib_wrapper.h
@@ -15,8 +15,8 @@
  */
 #pragma once
 
-#include "../hnswlib/hnswlib_wrapper.h"
 #include "cuvs_cagra_wrapper.h"
+#include <cuvs/neighbors/hnsw.hpp>
 
 #include <memory>
 
@@ -26,14 +26,20 @@ template <typename T, typename IdxT>
 class cuvs_cagra_hnswlib : public algo<T>, public algo_gpu {
  public:
   using search_param_base = typename algo<T>::search_param;
-  using build_param       = typename cuvs_cagra<T, IdxT>::build_param;
-  using search_param      = typename hnsw_lib<T>::search_param;
+
+  struct build_param {
+    typename cuvs_cagra<T, IdxT>::build_param cagra_build_param;
+    cuvs::neighbors::hnsw::index_params hnsw_index_params;
+  };
+
+  struct search_param : public search_param_base {
+    cuvs::neighbors::hnsw::search_params hnsw_search_param;
+  };
 
   cuvs_cagra_hnswlib(Metric metric, int dim, const build_param& param, int concurrent_searches = 1)
     : algo<T>(metric, dim),
-      cagra_build_{metric, dim, param, concurrent_searches},
-      // hnsw_lib param values don't matter since we don't build with hnsw_lib
-      hnswlib_search_{metric, dim, typename hnsw_lib<T>::build_param{50, 100}}
+      build_param_{param},
+      cagra_build_{metric, dim, param.cagra_build_param, concurrent_searches}
   {
   }
 
@@ -69,40 +75,67 @@ class cuvs_cagra_hnswlib : public algo<T>, public algo_gpu {
   }
 
  private:
+  raft::resources handle_{};
+  build_param build_param_;
+  search_param search_param_;
   cuvs_cagra<T, IdxT> cagra_build_;
-  hnsw_lib<T> hnswlib_search_;
+  std::shared_ptr<cuvs::neighbors::hnsw::index<T>> hnsw_index_;
 };
 
 template <typename T, typename IdxT>
 void cuvs_cagra_hnswlib<T, IdxT>::build(const T* dataset, size_t nrow)
 {
   cagra_build_.build(dataset, nrow);
+  auto* cagra_index      = cagra_build_.get_index();
+  auto host_dataset_view = raft::make_host_matrix_view<const T, int64_t>(dataset, nrow, this->dim_);
+  auto opt_dataset_view =
+    std::optional<raft::host_matrix_view<const T, int64_t>>(std::move(host_dataset_view));
+  hnsw_index_ = cuvs::neighbors::hnsw::from_cagra(
+    handle_, build_param_.hnsw_index_params, *cagra_index, opt_dataset_view);
 }
 
 template <typename T, typename IdxT>
 void cuvs_cagra_hnswlib<T, IdxT>::set_search_param(const search_param_base& param_)
 {
-  hnswlib_search_.set_search_param(param_);
+  search_param_ = dynamic_cast<const search_param&>(param_);
 }
 
 template <typename T, typename IdxT>
 void cuvs_cagra_hnswlib<T, IdxT>::save(const std::string& file) const
 {
-  cagra_build_.save_to_hnswlib(file);
+  cuvs::neighbors::hnsw::serialize(handle_, file, *(hnsw_index_.get()));
 }
 
 template <typename T, typename IdxT>
 void cuvs_cagra_hnswlib<T, IdxT>::load(const std::string& file)
 {
-  hnswlib_search_.load(file);
-  hnswlib_search_.set_base_layer_only();
+  cuvs::neighbors::hnsw::index<T>* idx = nullptr;
+  cuvs::neighbors::hnsw::deserialize(handle_,
+                                     build_param_.hnsw_index_params,
+                                     file,
+                                     this->dim_,
+                                     parse_metric_type(this->metric_),
+                                     &idx);
+  hnsw_index_ = std::shared_ptr<cuvs::neighbors::hnsw::index<T>>(idx);
 }
 
 template <typename T, typename IdxT>
 void cuvs_cagra_hnswlib<T, IdxT>::search(
   const T* queries, int batch_size, int k, algo_base::index_type* neighbors, float* distances) const
 {
-  hnswlib_search_.search(queries, batch_size, k, neighbors, distances);
+  // Only Latency mode is supported for now
+  auto queries_view =
+    raft::make_host_matrix_view<const T, int64_t>(queries, batch_size, this->dim_);
+  auto neighbors_view = raft::make_host_matrix_view<uint64_t, int64_t>(
+    reinterpret_cast<uint64_t*>(neighbors), batch_size, k);
+  auto distances_view = raft::make_host_matrix_view<float, int64_t>(distances, batch_size, k);
+
+  cuvs::neighbors::hnsw::search(handle_,
+                                search_param_.hnsw_search_param,
+                                *(hnsw_index_.get()),
+                                queries_view,
+                                neighbors_view,
+                                distances_view);
 }
 
 }  // namespace cuvs::bench
diff --git a/cpp/bench/ann/src/cuvs/cuvs_cagra_wrapper.h b/cpp/bench/ann/src/cuvs/cuvs_cagra_wrapper.h
index b2ba35eee..f6d3d60fc 100644
--- a/cpp/bench/ann/src/cuvs/cuvs_cagra_wrapper.h
+++ b/cpp/bench/ann/src/cuvs/cuvs_cagra_wrapper.h
@@ -154,6 +154,8 @@ class cuvs_cagra : public algo<T>, public algo_gpu {
   void save_to_hnswlib(const std::string& file) const;
   std::unique_ptr<algo<T>> copy() override;
 
+  auto get_index() const -> const cuvs::neighbors::cagra::index<T, IdxT>* { return index_.get(); }
+
  private:
   // handle_ must go first to make sure it dies last and all memory allocated in pool
   configured_raft_resources handle_{};
diff --git a/cpp/bench/ann/src/hnswlib/hnswlib_benchmark.cpp b/cpp/bench/ann/src/hnswlib/hnswlib_benchmark.cpp
index 755c7c8d6..6e219d2a7 100644
--- a/cpp/bench/ann/src/hnswlib/hnswlib_benchmark.cpp
+++ b/cpp/bench/ann/src/hnswlib/hnswlib_benchmark.cpp
@@ -33,7 +33,7 @@ void parse_build_param(const nlohmann::json& conf,
 {
   param.ef_construction = conf.at("efConstruction");
   param.m               = conf.at("M");
-  if (conf.contains("numThreads")) { param.num_threads = conf.at("numThreads"); }
+  if (conf.contains("num_threads")) { param.num_threads = conf.at("num_threads"); }
 }
 
 template <typename T>
@@ -41,7 +41,7 @@ void parse_search_param(const nlohmann::json& conf,
                         typename cuvs::bench::hnsw_lib<T>::search_param& param)
 {
   param.ef = conf.at("ef");
-  if (conf.contains("numThreads")) { param.num_threads = conf.at("numThreads"); }
+  if (conf.contains("num_threads")) { param.num_threads = conf.at("num_threads"); }
 }
 
 template <typename T, template <typename> class Algo>
diff --git a/cpp/cmake/modules/ConfigureCUDA.cmake b/cpp/cmake/modules/ConfigureCUDA.cmake
index 74da25660..3e91d9995 100644
--- a/cpp/cmake/modules/ConfigureCUDA.cmake
+++ b/cpp/cmake/modules/ConfigureCUDA.cmake
@@ -22,8 +22,12 @@ endif()
 # Be very strict when compiling with GCC as host compiler (and thus more lenient when compiling with
 # clang)
 if(CMAKE_COMPILER_IS_GNUCXX)
-  list(APPEND CUVS_CXX_FLAGS -Wall -Werror -Wno-unknown-pragmas -Wno-error=deprecated-declarations)
-  list(APPEND CUVS_CUDA_FLAGS -Xcompiler=-Wall,-Werror,-Wno-error=deprecated-declarations)
+  list(APPEND CUVS_CXX_FLAGS -Wall -Werror -Wno-unknown-pragmas -Wno-error=deprecated-declarations
+       -Wno-reorder
+  )
+  list(APPEND CUVS_CUDA_FLAGS
+       -Xcompiler=-Wall,-Werror,-Wno-error=deprecated-declarations,-Wno-reorder
+  )
 
   # set warnings as errors
   if(CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL 11.2.0)
diff --git a/cpp/cmake/patches/hnswlib.diff b/cpp/cmake/patches/hnswlib.diff
index e7f89a8cc..f20c27d91 100644
--- a/cpp/cmake/patches/hnswlib.diff
+++ b/cpp/cmake/patches/hnswlib.diff
@@ -1,188 +1,159 @@
+diff --git a/hnswlib/hnswalg.h b/hnswlib/hnswalg.h
+index bef0017..0ee7931 100644
 --- a/hnswlib/hnswalg.h
 +++ b/hnswlib/hnswalg.h
-@@ -3,6 +3,7 @@
- #include "visited_list_pool.h"
- #include "hnswlib.h"
- #include <atomic>
-+#include <limits>
- #include <random>
- #include <stdlib.h>
- #include <assert.h>
-@@ -16,6 +17,8 @@ namespace hnswlib {
-     template<typename dist_t>
-     class HierarchicalNSW : public AlgorithmInterface<dist_t> {
-     public:
-+        bool base_layer_only{false};
-+        int num_seeds=32;
-         static const tableint max_update_element_locks = 65536;
-         HierarchicalNSW(SpaceInterface<dist_t> *s) {
-         }
-@@ -56,7 +59,7 @@ namespace hnswlib {
-             visited_list_pool_ = new VisitedListPool(1, max_elements);
- 
-             //initializations for special treatment of the first node
--            enterpoint_node_ = -1;
-+            enterpoint_node_ = std::numeric_limits<tableint>::max();
-             maxlevel_ = -1;
- 
-             linkLists_ = (char **) malloc(sizeof(void *) * max_elements_);
-@@ -527,7 +530,7 @@ namespace hnswlib {
-                     tableint *datal = (tableint *) (data + 1);
-                     for (int i = 0; i < size; i++) {
-                         tableint cand = datal[i];
--                        if (cand < 0 || cand > max_elements_)
-+                        if (cand > max_elements_)
-                             throw std::runtime_error("cand error");
-                         dist_t d = fstdistfunc_(query_data, getDataByInternalId(cand), dist_func_param_);
- 
-@@ -1067,7 +1070,7 @@ namespace hnswlib {
-                             tableint *datal = (tableint *) (data + 1);
-                             for (int i = 0; i < size; i++) {
-                                 tableint cand = datal[i];
--                                if (cand < 0 || cand > max_elements_)
-+                                if (cand > max_elements_)
-                                     throw std::runtime_error("cand error");
-                                 dist_t d = fstdistfunc_(data_point, getDataByInternalId(cand), dist_func_param_);
-                                 if (d < curdist) {
-@@ -1119,28 +1122,41 @@ namespace hnswlib {
-             tableint currObj = enterpoint_node_;
-             dist_t curdist = fstdistfunc_(query_data, getDataByInternalId(enterpoint_node_), dist_func_param_);
- 
--            for (int level = maxlevel_; level > 0; level--) {
--                bool changed = true;
--                while (changed) {
--                    changed = false;
--                    unsigned int *data;
-+            if (base_layer_only) {
-+                // You can increase the number of seeds when testing large-scale dataset, num_seeds = 48 for 100M-scale
-+                for (int i = 0; i < num_seeds; i++) {
-+                    tableint obj = i * (max_elements_ / num_seeds);
-+                    dist_t dist = fstdistfunc_(query_data, getDataByInternalId(obj), dist_func_param_);
-+                    if (dist < curdist) {
-+                        curdist = dist;
-+                        currObj = obj;
-+                    }
+@@ -16,6 +16,9 @@ typedef unsigned int linklistsizeint;
+ template<typename dist_t>
+ class HierarchicalNSW : public AlgorithmInterface<dist_t> {
+  public:
++    bool base_layer_only = false;
++    int num_seeds = 32;
++    bool base_layer_init = true;
+     static const tableint MAX_LABEL_OPERATION_LOCKS = 65536;
+     static const unsigned char DELETE_MARK = 0x01;
+ 
+@@ -1098,7 +1101,7 @@ class HierarchicalNSW : public AlgorithmInterface<dist_t> {
+ 
+         std::unique_lock <std::mutex> lock_el(link_list_locks_[cur_c]);
+         int curlevel = getRandomLevel(mult_);
+-        if (level > 0)
++        if (level > -1)
+             curlevel = level;
+ 
+         element_levels_[cur_c] = curlevel;
+@@ -1116,6 +1119,9 @@ class HierarchicalNSW : public AlgorithmInterface<dist_t> {
+         memcpy(getExternalLabeLp(cur_c), &label, sizeof(labeltype));
+         memcpy(getDataByInternalId(cur_c), data_point, data_size_);
+ 
++        if (!base_layer_init && curlevel == 0)
++            return cur_c;
++
+         if (curlevel) {
+             linkLists_[cur_c] = (char *) malloc(size_links_per_element_ * curlevel + 1);
+             if (linkLists_[cur_c] == nullptr)
+@@ -1138,7 +1144,7 @@ class HierarchicalNSW : public AlgorithmInterface<dist_t> {
+                         tableint *datal = (tableint *) (data + 1);
+                         for (int i = 0; i < size; i++) {
+                             tableint cand = datal[i];
+-                            if (cand < 0 || cand > max_elements_)
++                            if (static_cast<int>(cand) < 0 || cand > max_elements_)
+                                 throw std::runtime_error("cand error");
+                             dist_t d = fstdistfunc_(data_point, getDataByInternalId(cand), dist_func_param_);
+                             if (d < curdist) {
+@@ -1188,28 +1194,41 @@ class HierarchicalNSW : public AlgorithmInterface<dist_t> {
+         tableint currObj = enterpoint_node_;
+         dist_t curdist = fstdistfunc_(query_data, getDataByInternalId(enterpoint_node_), dist_func_param_);
+ 
+-        for (int level = maxlevel_; level > 0; level--) {
+-            bool changed = true;
+-            while (changed) {
+-                changed = false;
+-                unsigned int *data;
++        if (base_layer_only) {
++            // You can increase the number of seeds when testing large-scale dataset, num_seeds = 48 for 100M-scale
++            for (int i = 0; i < num_seeds; i++) {
++                tableint obj = i * (max_elements_ / num_seeds);
++                dist_t dist = fstdistfunc_(query_data, getDataByInternalId(obj), dist_func_param_);
++                if (dist < curdist) {
++                    curdist = dist;
++                    currObj = obj;
 +                }
 +            }
-+            else{
-+                for (int level = maxlevel_; level > 0; level--) {
-+                    bool changed = true;
-+                    while (changed) {
-+                        changed = false;
-+                        unsigned int *data;
- 
--                    data = (unsigned int *) get_linklist(currObj, level);
--                    int size = getListCount(data);
--                    metric_hops++;
--                    metric_distance_computations+=size;
-+                        data = (unsigned int *) get_linklist(currObj, level);
-+                        int size = getListCount(data);
-+                        metric_hops++;
-+                        metric_distance_computations+=size;
- 
--                    tableint *datal = (tableint *) (data + 1);
--                    for (int i = 0; i < size; i++) {
--                        tableint cand = datal[i];
--                        if (cand < 0 || cand > max_elements_)
--                            throw std::runtime_error("cand error");
--                        dist_t d = fstdistfunc_(query_data, getDataByInternalId(cand), dist_func_param_);
-+                        tableint *datal = (tableint *) (data + 1);
-+                        for (int i = 0; i < size; i++) {
-+                            tableint cand = datal[i];
-+                            if (cand > max_elements_)
-+                                throw std::runtime_error("cand error");
-+                            dist_t d = fstdistfunc_(query_data, getDataByInternalId(cand), dist_func_param_);
- 
--                        if (d < curdist) {
--                            curdist = d;
--                            currObj = cand;
--                            changed = true;
-+                            if (d < curdist) {
-+                                curdist = d;
-+                                currObj = cand;
-+                                changed = true;
-+                            }
-                         }
++        }
++        else {
++            for (int level = maxlevel_; level > 0; level--) {
++                bool changed = true;
++                while (changed) {
++                    changed = false;
++                    unsigned int *data;
+ 
+-                data = (unsigned int *) get_linklist(currObj, level);
+-                int size = getListCount(data);
+-                metric_hops++;
+-                metric_distance_computations+=size;
++                    data = (unsigned int *) get_linklist(currObj, level);
++                    int size = getListCount(data);
++                    metric_hops++;
++                    metric_distance_computations+=size;
++
++                    tableint *datal = (tableint *) (data + 1);
++                    for (int i = 0; i < size; i++) {
++                        tableint cand = datal[i];
++                        if (static_cast<int>(cand) < 0 || cand > max_elements_)
++                            throw std::runtime_error("cand error");
++                        dist_t d = fstdistfunc_(query_data, getDataByInternalId(cand), dist_func_param_);
+ 
+-                tableint *datal = (tableint *) (data + 1);
+-                for (int i = 0; i < size; i++) {
+-                    tableint cand = datal[i];
+-                    if (cand < 0 || cand > max_elements_)
+-                        throw std::runtime_error("cand error");
+-                    dist_t d = fstdistfunc_(query_data, getDataByInternalId(cand), dist_func_param_);
+-
+-                    if (d < curdist) {
+-                        curdist = d;
+-                        currObj = cand;
+-                        changed = true;
++                        if (d < curdist) {
++                            curdist = d;
++                            currObj = cand;
++                            changed = true;
++                        }
                      }
                  }
+             }
 diff --git a/hnswlib/space_l2.h b/hnswlib/space_l2.h
-index 4413537..c3240f3 100644
+index 834d19f..0c0af26 100644
 --- a/hnswlib/space_l2.h
 +++ b/hnswlib/space_l2.h
-@@ -252,13 +252,14 @@ namespace hnswlib {
-         ~L2Space() {}
-     };
- 
-+    template <typename T>
-     static int
-     L2SqrI4x(const void *__restrict pVect1, const void *__restrict pVect2, const void *__restrict qty_ptr) {
- 
-         size_t qty = *((size_t *) qty_ptr);
-         int res = 0;
--        unsigned char *a = (unsigned char *) pVect1;
--        unsigned char *b = (unsigned char *) pVect2;
-+        T *a = (T *) pVect1;
-+        T *b = (T *) pVect2;
- 
-         qty = qty >> 2;
-         for (size_t i = 0; i < qty; i++) {
-@@ -279,11 +280,12 @@ namespace hnswlib {
-         return (res);
-     }
- 
-+    template <typename T>
-     static int L2SqrI(const void* __restrict pVect1, const void* __restrict pVect2, const void* __restrict qty_ptr) {
-         size_t qty = *((size_t*)qty_ptr);
-         int res = 0;
--        unsigned char* a = (unsigned char*)pVect1;
--        unsigned char* b = (unsigned char*)pVect2;
-+        T* a = (T*)pVect1;
-+        T* b = (T*)pVect2;
- 
-         for(size_t i = 0; i < qty; i++)
-         {
-@@ -294,6 +296,7 @@ namespace hnswlib {
-         return (res);
-     }
- 
-+    template <typename T>
-     class L2SpaceI : public SpaceInterface<int> {
- 
-         DISTFUNC<int> fstdistfunc_;
-@@ -302,10 +305,10 @@ namespace hnswlib {
-     public:
-         L2SpaceI(size_t dim) {
-             if(dim % 4 == 0) {
--                fstdistfunc_ = L2SqrI4x;
-+                fstdistfunc_ = L2SqrI4x<T>;
-             }
-             else {
--                fstdistfunc_ = L2SqrI;
-+                fstdistfunc_ = L2SqrI<T>;
-             }
-             dim_ = dim;
-             data_size_ = dim * sizeof(unsigned char);
-diff --git a/hnswlib/visited_list_pool.h b/hnswlib/visited_list_pool.h
-index 5e1a4a5..4195ebd 100644
---- a/hnswlib/visited_list_pool.h
-+++ b/hnswlib/visited_list_pool.h
-@@ -3,6 +3,7 @@
- #include <mutex>
- #include <string.h>
- #include <deque>
-+#include <limits>
- 
- namespace hnswlib {
-     typedef unsigned short int vl_type;
-@@ -14,7 +15,7 @@ namespace hnswlib {
-         unsigned int numelements;
- 
-         VisitedList(int numelements1) {
--            curV = -1;
-+            curV = std::numeric_limits<vl_type>::max();
-             numelements = numelements1;
-             mass = new vl_type[numelements];
+@@ -252,12 +252,13 @@ class L2Space : public SpaceInterface<float> {
+     ~L2Space() {}
+ };
+ 
++template <typename T>
+ static int
+ L2SqrI4x(const void *__restrict pVect1, const void *__restrict pVect2, const void *__restrict qty_ptr) {
+     size_t qty = *((size_t *) qty_ptr);
+     int res = 0;
+-    unsigned char *a = (unsigned char *) pVect1;
+-    unsigned char *b = (unsigned char *) pVect2;
++    T *a = (T *) pVect1;
++    T *b = (T *) pVect2;
+ 
+     qty = qty >> 2;
+     for (size_t i = 0; i < qty; i++) {
+@@ -277,11 +278,12 @@ L2SqrI4x(const void *__restrict pVect1, const void *__restrict pVect2, const voi
+     return (res);
+ }
+ 
++template <typename T>
+ static int L2SqrI(const void* __restrict pVect1, const void* __restrict pVect2, const void* __restrict qty_ptr) {
+     size_t qty = *((size_t*)qty_ptr);
+     int res = 0;
+-    unsigned char* a = (unsigned char*)pVect1;
+-    unsigned char* b = (unsigned char*)pVect2;
++    T* a = (T*)pVect1;
++    T* b = (T*)pVect2;
+ 
+     for (size_t i = 0; i < qty; i++) {
+         res += ((*a) - (*b)) * ((*a) - (*b));
+@@ -291,6 +293,7 @@ static int L2SqrI(const void* __restrict pVect1, const void* __restrict pVect2,
+     return (res);
+ }
+ 
++template <typename T>
+ class L2SpaceI : public SpaceInterface<int> {
+     DISTFUNC<int> fstdistfunc_;
+     size_t data_size_;
+@@ -299,9 +302,9 @@ class L2SpaceI : public SpaceInterface<int> {
+  public:
+     L2SpaceI(size_t dim) {
+         if (dim % 4 == 0) {
+-            fstdistfunc_ = L2SqrI4x;
++            fstdistfunc_ = L2SqrI4x<T>;
+         } else {
+-            fstdistfunc_ = L2SqrI;
++            fstdistfunc_ = L2SqrI<T>;
          }
--- 
-2.43.0
-
+         dim_ = dim;
+         data_size_ = dim * sizeof(unsigned char);
diff --git a/cpp/cmake/patches/hnswlib_override.json b/cpp/cmake/patches/hnswlib_override.json
index aef2da772..c50220e24 100644
--- a/cpp/cmake/patches/hnswlib_override.json
+++ b/cpp/cmake/patches/hnswlib_override.json
@@ -1,16 +1,16 @@
 {
-    "packages" : {
-      "hnswlib" : {
-        "version": "0.6.2",
-        "git_url": "https://github.com/nmslib/hnswlib.git",
-        "git_tag": "v${version}",
-        "patches" : [
-          {
-            "file" : "${current_json_dir}/hnswlib.diff",
-            "issue" : "Correct compilation issues",
-            "fixed_in" : ""
-          }
-        ]
-      }
+  "packages": {
+    "hnswlib": {
+      "version": "0.7.0",
+      "git_url": "https://github.com/nmslib/hnswlib.git",
+      "git_tag": "v${version}",
+      "patches": [
+        {
+          "file": "${current_json_dir}/hnswlib.diff",
+          "issue": "Correct compilation issues",
+          "fixed_in": ""
+        }
+      ]
     }
-  }
\ No newline at end of file
+  }
+}
\ No newline at end of file
diff --git a/cpp/cmake/thirdparty/get_hnswlib.cmake b/cpp/cmake/thirdparty/get_hnswlib.cmake
index 2e6c895e5..5b4d89aa2 100644
--- a/cpp/cmake/thirdparty/get_hnswlib.cmake
+++ b/cpp/cmake/thirdparty/get_hnswlib.cmake
@@ -15,6 +15,7 @@
 #=============================================================================
 
 function(find_and_configure_hnswlib)
+  message(STATUS "Finding or building hnswlib")
   set(oneValueArgs)
 
   include(${rapids-cmake-dir}/cpm/package_override.cmake)
diff --git a/cpp/include/cuvs/neighbors/hnsw.h b/cpp/include/cuvs/neighbors/hnsw.h
index 0495c574a..b7eda54b8 100644
--- a/cpp/include/cuvs/neighbors/hnsw.h
+++ b/cpp/include/cuvs/neighbors/hnsw.h
@@ -16,6 +16,8 @@
 
 #pragma once
 
+#include "cagra.h"
+
 #include <cuvs/core/c_api.h>
 #include <cuvs/distance/distance.h>
 #include <dlpack/dlpack.h>
@@ -27,32 +29,51 @@ extern "C" {
 #endif
 
 /**
- * @defgroup hnsw_c_search_params C API for hnswlib wrapper search params
+ * @defgroup hnsw_c_index_params C API for HNSW index params
  * @{
  */
 
-struct cuvsHnswSearchParams {
-  int32_t ef;
-  int32_t numThreads;
+/**
+ * @brief Hierarchy for HNSW index when converting from CAGRA index
+ *
+ * NOTE: When the value is `NONE`, the HNSW index is built as a base-layer-only index.
+ */
+enum cuvsHnswHierarchy {
+  /* Flat hierarchy, search is base-layer only */
+  NONE,
+  /* Full hierarchy is built using the CPU */
+  CPU
 };
 
-typedef struct cuvsHnswSearchParams* cuvsHnswSearchParams_t;
+struct cuvsHnswIndexParams {
+  /* hierarchy of the hnsw index */
+  cuvsHnswHierarchy hierarchy;
+  /** Size of the candidate list during hierarchy construction when hierarchy is `CPU`*/
+  int ef_construction;
+  /** Number of host threads to use to construct hierarchy when hierarchy is `CPU`
+  NOTE: Constructing the hierarchy when converting from a CAGRA graph is highly sensitive
+  to parallelism, and increasing the number of threads can reduce the quality of the index.
+   */
+  int num_threads;
+};
+
+typedef struct cuvsHnswIndexParams* cuvsHnswIndexParams_t;
 
 /**
- * @brief Allocate HNSW search params, and populate with default values
+ * @brief Allocate HNSW Index params, and populate with default values
  *
- * @param[in] params cuvsHnswSearchParams_t to allocate
+ * @param[in] params cuvsHnswIndexParams_t to allocate
  * @return cuvsError_t
  */
-cuvsError_t cuvsHnswSearchParamsCreate(cuvsHnswSearchParams_t* params);
+cuvsError_t cuvsHnswIndexParamsCreate(cuvsHnswIndexParams_t* params);
 
 /**
- * @brief De-allocate HNSW search params
+ * @brief De-allocate HNSW Index params
  *
- * @param[in] params cuvsHnswSearchParams_t to de-allocate
+ * @param[in] params
  * @return cuvsError_t
  */
-cuvsError_t cuvsHnswSearchParamsDestroy(cuvsHnswSearchParams_t params);
+cuvsError_t cuvsHnswIndexParamsDestroy(cuvsHnswIndexParams_t params);
 
 /**
  * @}
@@ -90,6 +111,184 @@ cuvsError_t cuvsHnswIndexCreate(cuvsHnswIndex_t* index);
  */
 cuvsError_t cuvsHnswIndexDestroy(cuvsHnswIndex_t index);
 
+/**
+ * @}
+ */
+
+/**
+ * @defgroup hnsw_c_extend_params Parameters for extending HNSW index
+ * @{
+ */
+
+struct cuvsHnswExtendParams {
+  /** Number of CPU threads used to extend additional vectors */
+  int num_threads;
+};
+
+typedef struct cuvsHnswExtendParams* cuvsHnswExtendParams_t;
+
+/**
+ * @brief Allocate HNSW extend params, and populate with default values
+ *
+ * @param[in] params cuvsHnswExtendParams_t to allocate
+ * @return cuvsError_t
+ */
+cuvsError_t cuvsHnswExtendParamsCreate(cuvsHnswExtendParams_t* params);
+
+/**
+ * @brief De-allocate HNSW extend params
+ *
+ * @param[in] params cuvsHnswExtendParams_t to de-allocate
+ * @return cuvsError_t
+ */
+
+cuvsError_t cuvsHnswExtendParamsDestroy(cuvsHnswExtendParams_t params);
+
+/**
+ * @}
+ */
+
+/**
+ * @defgroup hnsw_c_index_load Load CAGRA index as hnswlib index
+ * @{
+ */
+
+/**
+ * @brief Convert a CAGRA Index to an HNSW index.
+ * NOTE: When hierarchy is:
+ *       1. `NONE`: This method uses the filesystem to write the CAGRA index in
+ * `/tmp/<random_number>.bin` before reading it as an hnswlib index, then deleting the temporary
+ * file. The returned index is immutable and can only be searched by the hnswlib wrapper in cuVS, as
+ * the format is not compatible with the original hnswlib.
+ *       2. `CPU`: The returned index is mutable and can be extended with additional vectors. The
+ * serialized index is also compatible with the original hnswlib library.
+ *
+ * @param[in] res cuvsResources_t opaque C handle
+ * @param[in] params cuvsHnswIndexParams_t used to load Hnsw index
+ * @param[in] cagra_index cuvsCagraIndex_t to convert to HNSW index
+ * @param[out] hnsw_index cuvsHnswIndex_t to return the HNSW index
+ *
+ * @return cuvsError_t
+ *
+ * @code{.c}
+ * #include <cuvs/core/c_api.h>
+ * #include <cuvs/neighbors/cagra.h>
+ * #include <cuvs/neighbors/hnsw.h>
+ *
+ * // Create cuvsResources_t
+ * cuvsResources_t res;
+ * cuvsError_t res_create_status = cuvsResourcesCreate(&res);
+ *
+ * // create a CAGRA index with `cuvsCagraBuild`
+ *
+ * // Convert the CAGRA index to an HNSW index
+ * cuvsHnswIndex_t hnsw_index;
+ * cuvsHnswIndexCreate(&hnsw_index);
+ * cuvsHnswIndexParams_t hnsw_params;
+ * cuvsHnswIndexParamsCreate(&hnsw_params);
+ * cuvsHnswFromCagra(res, hnsw_params, cagra_index, hnsw_index);
+ *
+ * // de-allocate `hnsw_params`, `hnsw_index` and `res`
+ * cuvsError_t hnsw_params_destroy_status = cuvsHnswIndexParamsDestroy(hnsw_params);
+ * cuvsError_t hnsw_index_destroy_status = cuvsHnswIndexDestroy(hnsw_index);
+ * cuvsError_t res_destroy_status = cuvsResourcesDestroy(res);
+ * @endcode
+ */
+cuvsError_t cuvsHnswFromCagra(cuvsResources_t res,
+                              cuvsHnswIndexParams_t params,
+                              cuvsCagraIndex_t cagra_index,
+                              cuvsHnswIndex_t hnsw_index);
+
+/**
+ * @}
+ */
+
+/**
+ * @defgroup hnsw_c_index_extend Extend HNSW index with additional vectors
+ * @{
+ */
+
+/**
+ * @brief Add new vectors to an HNSW index
+ * NOTE: The HNSW index can only be extended when the hierarchy is `CPU`
+ *       when converting from a CAGRA index.
+
+ * @param[in] res cuvsResources_t opaque C handle
+ * @param[in] params cuvsHnswExtendParams_t used to extend Hnsw index
+ * @param[in] additional_dataset DLManagedTensor* additional dataset to extend the index
+ * @param[inout] index cuvsHnswIndex_t to extend
+  *
+  * @return cuvsError_t
+  *
+  * @code{.c}
+  * #include <cuvs/core/c_api.h>
+  * #include <cuvs/neighbors/cagra.h>
+  * #include <cuvs/neighbors/hnsw.h>
+  *
+  * // Create cuvsResources_t
+  * cuvsResources_t res;
+  * cuvsError_t res_create_status = cuvsResourcesCreate(&res);
+  *
+  * // create an index with `cuvsCagraBuild`
+  *
+  * // Convert the CAGRA index to an HNSW index
+  * cuvsHnswIndex_t hnsw_index;
+  * cuvsHnswIndexCreate(&hnsw_index);
+  * cuvsHnswIndexParams_t hnsw_params;
+  * cuvsHnswIndexParamsCreate(&hnsw_params);
+  * cuvsHnswFromCagra(res, hnsw_params, cagra_index, hnsw_index);
+  *
+  * // Extend the HNSW index with additional vectors
+  * DLManagedTensor additional_dataset;
+  * cuvsHnswExtendParams_t extend_params;
+  * cuvsHnswExtendParamsCreate(&extend_params);
+  * cuvsHnswExtend(res, extend_params, additional_dataset, hnsw_index);
+  *
+  * // de-allocate `hnsw_params`, `hnsw_index`, `extend_params` and `res`
+  * cuvsError_t hnsw_params_destroy_status = cuvsHnswIndexParamsDestroy(hnsw_params);
+  * cuvsError_t hnsw_index_destroy_status = cuvsHnswIndexDestroy(hnsw_index);
+  * cuvsError_t extend_params_destroy_status = cuvsHnswExtendParamsDestroy(extend_params);
+  * cuvsError_t res_destroy_status = cuvsResourcesDestroy(res);
+  * @endcode
+  */
+
+cuvsError_t cuvsHnswExtend(cuvsResources_t res,
+                           cuvsHnswExtendParams_t params,
+                           DLManagedTensor* additional_dataset,
+                           cuvsHnswIndex_t index);
+
+/**
+ * @}
+ */
+
+/**
+ * @defgroup hnsw_c_search_params C API for hnswlib wrapper search params
+ * @{
+ */
+
+struct cuvsHnswSearchParams {
+  int32_t ef;
+  int32_t num_threads;
+};
+
+typedef struct cuvsHnswSearchParams* cuvsHnswSearchParams_t;
+
+/**
+ * @brief Allocate HNSW search params, and populate with default values
+ *
+ * @param[in] params cuvsHnswSearchParams_t to allocate
+ * @return cuvsError_t
+ */
+cuvsError_t cuvsHnswSearchParamsCreate(cuvsHnswSearchParams_t* params);
+
+/**
+ * @brief De-allocate HNSW search params
+ *
+ * @param[in] params cuvsHnswSearchParams_t to de-allocate
+ * @return cuvsError_t
+ */
+cuvsError_t cuvsHnswSearchParamsDestroy(cuvsHnswSearchParams_t params);
+
 /**
  * @}
  */
@@ -111,8 +310,8 @@ cuvsError_t cuvsHnswIndexDestroy(cuvsHnswIndex_t index);
  *          c. `kDLDataType.code == kDLUInt` and `kDLDataType.bits = 8`
  *        2. `neighbors`: `kDLDataType.code == kDLUInt` and `kDLDataType.bits = 64`
  *        3. `distances`: `kDLDataType.code == kDLFloat` and `kDLDataType.bits = 32`
- * NOTE: The HNSW index can only be searched by the hnswlib wrapper in cuVS,
- *       as the format is not compatible with the original hnswlib.
+ * NOTE: When hierarchy is `NONE`, the HNSW index can only be searched by the hnswlib wrapper in
+ * cuVS, as the format is not compatible with the original hnswlib.
  *
  * @code {.c}
  * #include <cuvs/core/c_api.h>
@@ -131,7 +330,7 @@ cuvsError_t cuvsHnswIndexDestroy(cuvsHnswIndex_t index);
  * cuvsHnswSearchParams_t params;
  * cuvsError_t params_create_status = cuvsHnswSearchParamsCreate(&params);
  *
- * // Search the `index` built using `cuvsHnswBuild`
+ * // Search the `index` built using `cuvsHnswFromCagra`
  * cuvsError_t search_status = cuvsHnswSearch(res, params, index, &queries, &neighbors,
  * &distances);
  *
@@ -142,7 +341,7 @@ cuvsError_t cuvsHnswIndexDestroy(cuvsHnswIndex_t index);
  *
  * @param[in] res cuvsResources_t opaque C handle
  * @param[in] params cuvsHnswSearchParams_t used to search Hnsw index
- * @param[in] index cuvsHnswIndex which has been returned by `cuvsHnswBuild`
+ * @param[in] index cuvsHnswIndex which has been returned by `cuvsHnswFromCagra`
  * @param[in] queries DLManagedTensor* queries dataset to search
  * @param[out] neighbors DLManagedTensor* output `k` neighbors for queries
  * @param[out] distances DLManagedTensor* output `k` distances for queries
@@ -163,9 +362,50 @@ cuvsError_t cuvsHnswSearch(cuvsResources_t res,
  * @{
  */
 
+/**
+ * @brief Serialize a CAGRA index to a file as an hnswlib index
+ * NOTE: When hierarchy is `NONE`, the saved hnswlib index is immutable and can only be read by the
+ * hnswlib wrapper in cuVS, as the serialization format is not compatible with the original hnswlib.
+ * However, when hierarchy is `CPU`, the saved hnswlib index is compatible with the original hnswlib
+ * library.
+ *
+ * @param[in] res cuvsResources_t opaque C handle
+ * @param[in] filename the name of the file to save the index
+ * @param[in] index cuvsHnswIndex_t to serialize
+ * @return cuvsError_t
+ *
+ * @code{.c}
+ * #include <cuvs/core/c_api.h>
+ * #include <cuvs/neighbors/cagra.h>
+ * #include <cuvs/neighbors/hnsw.h>
+ *
+ * // Create cuvsResources_t
+ * cuvsResources_t res;
+ * cuvsError_t res_create_status = cuvsResourcesCreate(&res);
+ *
+ * // create an index with `cuvsCagraBuild`
+ *
+ * // Convert the CAGRA index to an HNSW index
+ * cuvsHnswIndex_t hnsw_index;
+ * cuvsHnswIndexCreate(&hnsw_index);
+ * cuvsHnswIndexParams_t hnsw_params;
+ * cuvsHnswIndexParamsCreate(&hnsw_params);
+ * cuvsHnswFromCagra(res, hnsw_params, cagra_index, hnsw_index);
+ *
+ * // Serialize the HNSW index
+ * cuvsHnswSerialize(res, "/path/to/index", hnsw_index);
+ *
+ * // de-allocate `hnsw_params`, `hnsw_index` and `res`
+ * cuvsError_t hnsw_params_destroy_status = cuvsHnswIndexParamsDestroy(hnsw_params);
+ * cuvsError_t hnsw_index_destroy_status = cuvsHnswIndexDestroy(hnsw_index);
+ * cuvsError_t res_destroy_status = cuvsResourcesDestroy(res);
+ * @endcode
+ */
+cuvsError_t cuvsHnswSerialize(cuvsResources_t res, const char* filename, cuvsHnswIndex_t index);
+
 /**
  * Load hnswlib index from file which was serialized from a HNSW index.
- * NOTE: The loaded hnswlib index is immutable, and only be read by the
+ * NOTE: When hierarchy is `NONE`, the loaded hnswlib index is immutable, and only be read by the
  * hnswlib wrapper in cuVS, as the serialization format is not compatible with the original hnswlib.
  * Experimental, both the API and the serialization format are subject to change.
  *
@@ -185,17 +425,22 @@ cuvsError_t cuvsHnswSearch(cuvsResources_t res,
  * // The index should have the same dtype as the one used to build CAGRA the index
  * cuvsHnswIndex_t hnsw_index;
  * cuvsHnswIndexCreate(&hnsw_index);
+ * cuvsHnsWIndexParams_t hnsw_params;
+ * cuvsHnswIndexParamsCreate(&hnsw_params);
+ * hnsw_params->hierarchy = NONE;
  * hnsw_index->dtype = index->dtype;
- * cuvsCagraDeserialize(res, "/path/to/index", hnsw_index);
+ * cuvsHnswDeserialize(res, hnsw_params, "/path/to/index", dim, metric hnsw_index);
  * @endcode
  *
  * @param[in] res cuvsResources_t opaque C handle
+ * @param[in] params cuvsHnswIndexParams_t used to load Hnsw index
  * @param[in] filename the name of the file that stores the index
  * @param[in] dim the dimension of the vectors in the index
  * @param[in] metric the distance metric used to build the index
  * @param[out] index HNSW index loaded disk
  */
 cuvsError_t cuvsHnswDeserialize(cuvsResources_t res,
+                                cuvsHnswIndexParams_t params,
                                 const char* filename,
                                 int dim,
                                 cuvsDistanceType metric,
diff --git a/cpp/include/cuvs/neighbors/hnsw.hpp b/cpp/include/cuvs/neighbors/hnsw.hpp
index d5abd6d55..f0b433d8e 100644
--- a/cpp/include/cuvs/neighbors/hnsw.hpp
+++ b/cpp/include/cuvs/neighbors/hnsw.hpp
@@ -34,14 +34,30 @@
 namespace cuvs::neighbors::hnsw {
 
 /**
- * @defgroup hnsw_cpp_search_params Build CAGRA index and search with hnswlib
+ * @defgroup hnsw_cpp_index_params hnswlib index wrapper params
  * @{
  */
 
-struct search_params : cuvs::neighbors::search_params {
-  int ef;               // size of the candidate list
-  int num_threads = 0;  // number of host threads to use for concurrent searches. Value of 0
-                        // automatically maximizes parallelism
+/**
+ * @brief Hierarchy for HNSW index when converting from CAGRA index
+ *
+ * NOTE: When the value is `NONE`, the HNSW index is built as a base-layer-only index.
+ */
+enum class HnswHierarchy {
+  NONE,  // base-layer-only index
+  CPU    // full index with CPU-built hierarchy
+};
+
+struct index_params : cuvs::neighbors::index_params {
+  /** Hierarchy build type for HNSW index when converting from CAGRA index */
+  HnswHierarchy hierarchy = HnswHierarchy::NONE;
+  /** Size of the candidate list during hierarchy construction when hierarchy is `CPU`*/
+  int ef_construction = 200;
+  /** Number of host threads to use to construct hierarchy when hierarchy is `CPU`
+  NOTE: Constructing the hierarchy when converting from a CAGRA graph is highly sensitive
+  to parallelism, and increasing the number of threads can reduce the quality of the index.
+   */
+  int num_threads = 2;
 };
 
 /**@}*/
@@ -62,8 +78,12 @@ struct index : cuvs::neighbors::index {
    *
    * @param[in] dim dimensions of the training dataset
    * @param[in] metric distance metric to search. Supported metrics ("L2Expanded", "InnerProduct")
+   * @param[in] hierarchy hierarchy used for upper HNSW layers
    */
-  index(int dim, cuvs::distance::DistanceType metric) : dim_{dim}, metric_{metric} {}
+  index(int dim, cuvs::distance::DistanceType metric, HnswHierarchy hierarchy = HnswHierarchy::NONE)
+    : dim_{dim}, metric_{metric}, hierarchy_{hierarchy}
+  {
+  }
 
   virtual ~index() {}
 
@@ -76,6 +96,8 @@ struct index : cuvs::neighbors::index {
 
   auto metric() const -> cuvs::distance::DistanceType { return metric_; }
 
+  auto hierarchy() const -> HnswHierarchy { return hierarchy_; }
+
   /**
   @brief Set ef for search
   */
@@ -84,24 +106,41 @@ struct index : cuvs::neighbors::index {
  private:
   int dim_;
   cuvs::distance::DistanceType metric_;
+  HnswHierarchy hierarchy_;
 };
 
 /**@}*/
 
+/**
+ * @defgroup hnsw_cpp_extend_params HNSW index extend parameters
+ * @{
+ */
+
+struct extend_params {
+  /** Number of host threads to use to add additional vectors to the index.
+  Value of 0 automatically maximizes parallelism. */
+  int num_threads = 0;
+};
+
 /**
  * @defgroup hnsw_cpp_index_load Load CAGRA index as hnswlib index
  * @{
  */
 
 /**
- * @brief Construct an immutable hnswlib base-layer-only index from a CAGRA index
- * NOTE: This method uses the filesystem to write the CAGRA index in `/tmp/<random_number>.bin`
- * before reading it as an hnswlib index, then deleting the temporary file. The returned index
- * is immutable and can only be searched by the hnswlib wrapper in cuVS, as the format is not
- * compatible with the original hnswlib.
+ * @brief Construct an hnswlib index from a CAGRA index
+ * NOTE: When `hnsw::index_params.hierarchy` is:
+ *       1. `NONE`: This method uses the filesystem to write the CAGRA index in
+ * `/tmp/<random_number>.bin` before reading it as an hnswlib index, then deleting the temporary
+ * file. The returned index is immutable and can only be searched by the hnswlib wrapper in cuVS, as
+ * the format is not compatible with the original hnswlib.
+ *       2. `CPU`: The returned index is mutable and can be extended with additional vectors. The
+ * serialized index is also compatible with the original hnswlib library.
  *
  * @param[in] res raft resources
+ * @param[in] params hnsw index parameters
  * @param[in] cagra_index cagra index
+ * @param[in] dataset optional dataset to avoid extra memory copy when hierarchy is `CPU`
  *
  * Usage example:
  * @code{.cpp}
@@ -110,24 +149,34 @@ struct index : cuvs::neighbors::index {
  *   // use default index parameters
  *   cagra::index_params index_params;
  *   // create and fill the index from a [N, D] dataset
- *   auto index = cagra::build<float, uint32_t>(res, index_params, dataset);
+ *   auto index = cagra::build(res, index_params, dataset);
  *
- *   // Load CAGRA index as base-layer-only hnswlib index
- *   auto hnsw_index = hnsw::from_cagra(res, index);
+ *   // Load CAGRA index as an HNSW index
+ *   hnsw::index_params hnsw_params;
+ *   auto hnsw_index = hnsw::from_cagra(res, hnsw_params, index);
  * @endcode
  */
 std::unique_ptr<index<float>> from_cagra(
-  raft::resources const& res, const cuvs::neighbors::cagra::index<float, uint32_t>& cagra_index);
+  raft::resources const& res,
+  const index_params& params,
+  const cuvs::neighbors::cagra::index<float, uint32_t>& cagra_index,
+  std::optional<raft::host_matrix_view<const float, int64_t, raft::row_major>> dataset =
+    std::nullopt);
 
 /**
- * @brief Construct an immutable hnswlib base-layer-only index from a CAGRA index
- * NOTE: This method uses the filesystem to write the CAGRA index in `/tmp/<random_number>.bin`
- * before reading it as an hnswlib index, then deleting the temporary file.  The returned index
- * is immutable and can only be searched by the hnswlib wrapper in cuVS, as the format is not
- * compatible with the original hnswlib.
+ * @brief Construct an hnswlib index from a CAGRA index
+ * NOTE: When `hnsw::index_params.hierarchy` is:
+ *       1. `NONE`: This method uses the filesystem to write the CAGRA index in
+ * `/tmp/<random_number>.bin` before reading it as an hnswlib index, then deleting the temporary
+ * file. The returned index is immutable and can only be searched by the hnswlib wrapper in cuVS, as
+ * the format is not compatible with the original hnswlib.
+ *       2. `CPU`: The returned index is mutable and can be extended with additional vectors. The
+ * serialized index is also compatible with the original hnswlib library.
  *
  * @param[in] res raft resources
+ * @param[in] params hnsw index parameters
  * @param[in] cagra_index cagra index
+ * @param[in] dataset optional dataset to avoid extra memory copy when hierarchy is `CPU`
  *
  * Usage example:
  * @code{.cpp}
@@ -136,24 +185,34 @@ std::unique_ptr<index<float>> from_cagra(
  *   // use default index parameters
  *   cagra::index_params index_params;
  *   // create and fill the index from a [N, D] dataset
- *   auto index = cagra::build<uint8_t, uint32_t>(res, index_params, dataset);
+ *   auto index = cagra::build(res, index_params, dataset);
  *
- *   // Load CAGRA index as base-layer-only hnswlib index
- *   auto hnsw_index = hnsw::from_cagra(res, index);
+ *   // Load CAGRA index as an HNSW index
+ *   hnsw::index_params hnsw_params;
+ *   auto hnsw_index = hnsw::from_cagra(res, hnsw_params, index);
  * @endcode
  */
 std::unique_ptr<index<uint8_t>> from_cagra(
-  raft::resources const& res, const cuvs::neighbors::cagra::index<uint8_t, uint32_t>& cagra_index);
+  raft::resources const& res,
+  const index_params& params,
+  const cuvs::neighbors::cagra::index<uint8_t, uint32_t>& cagra_index,
+  std::optional<raft::host_matrix_view<const uint8_t, int64_t, raft::row_major>> dataset =
+    std::nullopt);
 
 /**
- * @brief Construct an immutable hnswlib base-layer-only index from a CAGRA index
- * NOTE: This method uses the filesystem to write the CAGRA index in `/tmp/<random_number>.bin`
- * before reading it as an hnswlib index, then deleting the temporary file.  The returned index
- * is immutable and can only be searched by the hnswlib wrapper in cuVS, as the format is not
- * compatible with the original hnswlib.
+ * @brief Construct an hnswlib index from a CAGRA index
+ * NOTE: When `hnsw::index_params.hierarchy` is:
+ *       1. `NONE`: This method uses the filesystem to write the CAGRA index in
+ * `/tmp/<random_number>.bin` before reading it as an hnswlib index, then deleting the temporary
+ * file. The returned index is immutable and can only be searched by the hnswlib wrapper in cuVS, as
+ * the format is not compatible with the original hnswlib.
+ *       2. `CPU`: The returned index is mutable and can be extended with additional vectors. The
+ * serialized index is also compatible with the original hnswlib library.
  *
  * @param[in] res raft resources
+ * @param[in] params hnsw index parameters
  * @param[in] cagra_index cagra index
+ * @param[in] dataset optional dataset to avoid extra memory copy when hierarchy is `CPU`
  *
  * Usage example:
  * @code{.cpp}
@@ -162,14 +221,138 @@ std::unique_ptr<index<uint8_t>> from_cagra(
  *   // use default index parameters
  *   cagra::index_params index_params;
  *   // create and fill the index from a [N, D] dataset
- *   auto index = cagra::build<int8_t, uint32_t>(res, index_params, dataset);
+ *   auto index = cagra::build(res, index_params, dataset);
  *
- *   // Load CAGRA index as base-layer-only hnswlib index
- *   auto hnsw_index = hnsw::from_cagra(res, index);
+ *   // Load CAGRA index as an HNSW index
+ *   hnsw::index_params hnsw_params;
+ *   auto hnsw_index = hnsw::from_cagra(res, hnsw_params, index);
  * @endcode
  */
 std::unique_ptr<index<int8_t>> from_cagra(
-  raft::resources const& res, const cuvs::neighbors::cagra::index<int8_t, uint32_t>& cagra_index);
+  raft::resources const& res,
+  const index_params& params,
+  const cuvs::neighbors::cagra::index<int8_t, uint32_t>& cagra_index,
+  std::optional<raft::host_matrix_view<const int8_t, int64_t, raft::row_major>> dataset =
+    std::nullopt);
+
+/**@}*/
+
+/**
+ * @defgroup hnsw_cpp_index_extend Extend HNSW index with additional vectors
+ * @{
+ */
+
+/**
+ * @brief Add new vectors to an HNSW index
+ * NOTE: The HNSW index can only be extended when the `hnsw::index_params.hierarchy` is `CPU`
+ *       when converting from a CAGRA index.
+ *
+ * @param[in] res raft resources
+ * @param[in] params configure the extend
+ * @param[in] additional_dataset a host matrix view to a row-major matrix [n_rows, index->dim()]
+ * @param[inout] idx HNSW index to extend
+ *
+ * Usage example:
+ * @code{.cpp}
+ *   // Build a CAGRA index
+ *   using namespace cuvs::neighbors;
+ *   cagra::index_params index_params;
+ *   // create and fill the index from a [N, D] dataset
+ *   auto index = cagra::build(res, index_params, dataset);
+ *
+ *   // Load CAGRA index as an HNSW index
+ *   hnsw::index_params hnsw_params;
+ *   hnsw_params.hierarchy = hnsw::HnswHierarchy::CPU;
+ *   auto hnsw_index = hnsw::from_cagra(res, hnsw_params, index);
+ *
+ *   // Extend the HNSW index with additional vectors
+ *   auto additional_dataset = raft::make_host_matrix<float>(res, add_size, index->dim());
+ *   hnsw::extend_params extend_params;
+ *   hnsw::extend(res, extend_params, additional_dataset, *hnsw_index.get());
+ */
+void extend(raft::resources const& res,
+            const extend_params& params,
+            raft::host_matrix_view<const float, int64_t, raft::row_major> additional_dataset,
+            index<float>& idx);
+
+/**
+ * @brief Add new vectors to an HNSW index
+ * NOTE: The HNSW index can only be extended when the `hnsw::index_params.hierarchy` is `CPU`
+ *       when converting from a CAGRA index.
+ *
+ * @param[in] res raft resources
+ * @param[in] params configure the extend
+ * @param[in] additional_dataset a host matrix view to a row-major matrix [n_rows, index->dim()]
+ * @param[inout] idx HNSW index to extend
+ *
+ * Usage example:
+ * @code{.cpp}
+ *   // Build a CAGRA index
+ *   using namespace cuvs::neighbors;
+ *   cagra::index_params index_params;
+ *   // create and fill the index from a [N, D] dataset
+ *   auto index = cagra::build(res, index_params, dataset);
+ *
+ *   // Load CAGRA index as an HNSW index
+ *   hnsw::index_params hnsw_params;
+ *   hnsw_params.hierarchy = hnsw::HnswHierarchy::CPU;
+ *   auto hnsw_index = hnsw::from_cagra(res, hnsw_params, index);
+ *
+ *   // Extend the HNSW index with additional vectors
+ *   auto additional_dataset = raft::make_host_matrix<float>(res, add_size, index->dim());
+ *   hnsw::extend_params extend_params;
+ *   hnsw::extend(res, extend_params, additional_dataset, *hnsw_index.get());
+ */
+void extend(raft::resources const& res,
+            const extend_params& params,
+            raft::host_matrix_view<const uint8_t, int64_t, raft::row_major> additional_dataset,
+            index<uint8_t>& idx);
+
+/**
+ * @brief Add new vectors to an HNSW index
+ * NOTE: The HNSW index can only be extended when the `hnsw::index_params.hierarchy` is `CPU`
+ *       when converting from a CAGRA index.
+ *
+ * @param[in] res raft resources
+ * @param[in] params configure the extend
+ * @param[in] additional_dataset a host matrix view to a row-major matrix [n_rows, index->dim()]
+ * @param[inout] idx HNSW index to extend
+ *
+ * Usage example:
+ * @code{.cpp}
+ *   // Build a CAGRA index
+ *   using namespace cuvs::neighbors;
+ *   cagra::index_params index_params;
+ *   // create and fill the index from a [N, D] dataset
+ *   auto index = cagra::build(res, index_params, dataset);
+ *
+ *   // Load CAGRA index as an HNSW index
+ *   hnsw::index_params hnsw_params;
+ *   hnsw_params.hierarchy = hnsw::HnswHierarchy::CPU;
+ *   auto hnsw_index = hnsw::from_cagra(res, hnsw_params, index);
+ *
+ *   // Extend the HNSW index with additional vectors
+ *   auto additional_dataset = raft::make_host_matrix<float>(res, add_size, index->dim());
+ *   hnsw::extend_params extend_params;
+ *   hnsw::extend(res, extend_params, additional_dataset, *hnsw_index.get());
+ */
+void extend(raft::resources const& res,
+            const extend_params& params,
+            raft::host_matrix_view<const int8_t, int64_t, raft::row_major> additional_dataset,
+            index<int8_t>& idx);
+
+/**@} */
+
+/**
+ * @defgroup hnsw_cpp_search_params Build CAGRA index and search with hnswlib
+ * @{
+ */
+
+struct search_params : cuvs::neighbors::search_params {
+  int ef;               // size of the candidate list
+  int num_threads = 0;  // number of host threads to use for concurrent searches. Value of 0
+                        // automatically maximizes parallelism
+};
 
 /**@}*/
 
@@ -181,9 +364,9 @@ std::unique_ptr<index<int8_t>> from_cagra(
  */
 
 /**
- * @brief Search hnswlib base-layer-only index constructed from a CAGRA index
- * NOTE: The HNSW index can only be searched by the hnswlib wrapper in cuVS,
- *       as the format is not compatible with the original hnswlib.
+ * @brief Search HNSW index constructed from a CAGRA index
+ * NOTE: The HNSW index can only be searched by the hnswlib wrapper in cuVS when the hierarchy is
+ * `NONE`, as the format is not compatible with the original hnswlib.
  *
  * @param[in] res raft resources
  * @param[in] params configure the search
@@ -201,10 +384,11 @@ std::unique_ptr<index<int8_t>> from_cagra(
  *   // use default index parameters
  *   cagra::index_params index_params;
  *   // create and fill the index from a [N, D] dataset
- *   auto index = cagra::build<float, uint32_t>(res, index_params, dataset);
+ *   auto index = cagra::build(res, index_params, dataset);
  *
- *   // Load CAGRA index as a base-layer HNSW index using the filesystem
- *   auto hnsw_index = hnsw::from_cagra(res, index);
+ *   // Load CAGRA index as an HNSW index
+ *   hnsw::index_params hnsw_params;
+ *   auto hnsw_index = hnsw::from_cagra(res, hnsw_params, index);
  *
  *   // Search K nearest neighbors as an hnswlib index
  *   // using host threads for concurrency
@@ -224,9 +408,9 @@ void search(raft::resources const& res,
             raft::host_matrix_view<float, int64_t, raft::row_major> distances);
 
 /**
- * @brief Search hnswlib base-layer-only index constructed from a CAGRA index
- * NOTE: The HNSW index can only be searched by the hnswlib wrapper in cuVS,
- *       as the format is not compatible with the original hnswlib.
+ * @brief Search HNSWindex constructed from a CAGRA index
+ * NOTE: The HNSW index can only be searched by the hnswlib wrapper in cuVS when the hierarchy is
+ * `NONE`, as the format is not compatible with the original hnswlib.
  *
  * @param[in] res raft resources
  * @param[in] params configure the search
@@ -244,10 +428,11 @@ void search(raft::resources const& res,
  *   // use default index parameters
  *   cagra::index_params index_params;
  *   // create and fill the index from a [N, D] dataset
- *   auto index = cagra::build<uint8_t, uint32_t>(res, index_params, dataset);
+ *   auto index = cagra::build(res, index_params, dataset);
  *
- *   // Load CAGRA index as a base-layer HNSW index using the filesystem
- *   auto hnsw_index = hnsw::from_cagra(res, index);
+ *   // Load CAGRA index as an HNSW index
+ *   hnsw::index_params hnsw_params;
+ *   auto hnsw_index = hnsw::from_cagra(res, hnsw_params, index);
  *
  *   // Search K nearest neighbors as an hnswlib index
  *   // using host threads for concurrency
@@ -267,9 +452,9 @@ void search(raft::resources const& res,
             raft::host_matrix_view<float, int64_t, raft::row_major> distances);
 
 /**
- * @brief Search hnswlib base-layer-only index constructed from a CAGRA index
- * NOTE: The HNSW index can only be searched by the hnswlib wrapper in cuVS,
- *       as the format is not compatible with the original hnswlib.
+ * @brief Search HNSW index constructed from a CAGRA index
+ * NOTE: The HNSW index can only be searched by the hnswlib wrapper in cuVS when the hierarchy is
+ * `NONE`, as the format is not compatible with the original hnswlib.
  *
  * @param[in] res raft resources
  * @param[in] params configure the search
@@ -287,10 +472,11 @@ void search(raft::resources const& res,
  *   // use default index parameters
  *   cagra::index_params index_params;
  *   // create and fill the index from a [N, D] dataset
- *   auto index = cagra::build<int8_t, uint32_t>(res, index_params, dataset);
+ *   auto index = cagra::build(res, index_params, dataset);
  *
- *   // Load CAGRA index as a base-layer HNSW index using the filesystem
- *   auto hnsw_index = hnsw::from_cagra(res, index);
+ *   // Load CAGRA index as an HNSW index
+ *   hnsw::index_params hnsw_params;
+ *   auto hnsw_index = hnsw::from_cagra(res, hnsw_params, index);
  *
  *   // Search K nearest neighbors as an hnswlib index
  *   // using host threads for concurrency
@@ -312,16 +498,106 @@ void search(raft::resources const& res,
 /**@}*/
 
 /**
- * @defgroup hnsw_cpp_index_deserialize Deserialize CAGRA index as hnswlib index
+ * @defgroup hnsw_cpp_index_serialize Deserialize CAGRA index as hnswlib index
  * @{
  */
 
+/**
+ * @brief Serialize a CAGRA index to a file as an hnswlib index
+ * NOTE: When hierarchy is `NONE`, the saved hnswlib index is immutable and can only be read by the
+ * hnswlib wrapper in cuVS, as the serialization format is not compatible with the original hnswlib.
+ * However, when hierarchy is `CPU`, the saved hnswlib index is compatible with the original hnswlib
+ * library.
+ *
+ * @param[in] res raft resources
+ * @param[in] filename path to the file to save the serialized CAGRA index
+ * @param[in] idx cagra index
+ *
+ * Usage example:
+ * @code{.cpp}
+ *   // Build a CAGRA index
+ *   using namespace cuvs::neighbors;
+ *   // use default index parameters
+ *   cagra::index_params index_params;
+ *   // create and fill the index from a [N, D] dataset
+ *   auto index = cagra::build(res, index_params, dataset);
+ *
+ *   // Load CAGRA index as an HNSW index
+ *   hnsw::index_params hnsw_params;
+ *   auto hnsw_index = hnsw::from_cagra(res, hnsw_params, index);
+ *   // Save the index
+ *   hnsw::serialize(res, "index.bin", index);
+ * @endcode
+ */
+void serialize(raft::resources const& res, const std::string& filename, const index<float>& idx);
+
+/**
+ * @brief Serialize a CAGRA index to a file as an hnswlib index
+ * NOTE: When hierarchy is `NONE`, the saved hnswlib index is immutable and can only be read by the
+ * hnswlib wrapper in cuVS, as the serialization format is not compatible with the original hnswlib.
+ * However, when hierarchy is `CPU`, the saved hnswlib index is compatible with the original hnswlib
+ * library.
+ *
+ * @param[in] res raft resources
+ * @param[in] filename path to the file to save the serialized CAGRA index
+ * @param[in] idx cagra index
+ *
+ * Usage example:
+ * @code{.cpp}
+ *   // Build a CAGRA index
+ *   using namespace cuvs::neighbors;
+ *   // use default index parameters
+ *   cagra::index_params index_params;
+ *   // create and fill the index from a [N, D] dataset
+ *   auto index = cagra::build(res, index_params, dataset);
+ *
+ *   // Load CAGRA index as an HNSW index
+ *   hnsw::index_params hnsw_params;
+ *   auto hnsw_index = hnsw::from_cagra(res, hnsw_params, index);
+ *   // Save the index
+ *   hnsw::serialize(res, "index.bin", index);
+ * @endcode
+ */
+void serialize(raft::resources const& res, const std::string& filename, const index<uint8_t>& idx);
+
+/**
+ * @brief Serialize a CAGRA index to a file as an hnswlib index
+ * NOTE: When hierarchy is `NONE`, the saved hnswlib index is immutable and can only be read by the
+ * hnswlib wrapper in cuVS, as the serialization format is not compatible with the original hnswlib.
+ * However, when hierarchy is `CPU`, the saved hnswlib index is compatible with the original hnswlib
+ * library.
+ *
+ * @param[in] res raft resources
+ * @param[in] filename path to the file to save the serialized CAGRA index
+ * @param[in] idx cagra index
+ *
+ * Usage example:
+ * @code{.cpp}
+ *   // Build a CAGRA index
+ *   using namespace cuvs::neighbors;
+ *   // use default index parameters
+ *   cagra::index_params index_params;
+ *   // create and fill the index from a [N, D] dataset
+ *   auto index = cagra::build(res, index_params, dataset);
+ *
+ *   // Load CAGRA index as an HNSW index
+ *   hnsw::index_params hnsw_params;
+ *   auto hnsw_index = hnsw::from_cagra(res, hnsw_params, index);
+ *   // Save the index
+ *   hnsw::serialize(res, "index.bin", index);
+ * @endcode
+ */
+void serialize(raft::resources const& res, const std::string& filename, const index<int8_t>& idx);
+
 /**
  * @brief De-serialize a CAGRA index saved to a file as an hnswlib index
- * NOTE: The loaded hnswlib index is immutable, and only be read by the
+ * NOTE: When hierarchy is `NONE`, the saved hnswlib index is immutable and can only be read by the
  * hnswlib wrapper in cuVS, as the serialization format is not compatible with the original hnswlib.
+ * However, when hierarchy is `CPU`, the saved hnswlib index is compatible with the original hnswlib
+ * library.
  *
  * @param[in] res raft resources
+ * @param[in] params hnsw index parameters
  * @param[in] filename path to the file containing the serialized CAGRA index
  * @param[in] dim dimensions of the training dataset
  * @param[in] metric distance metric to search. Supported metrics ("L2Expanded", "InnerProduct")
@@ -334,19 +610,23 @@ void search(raft::resources const& res,
  *   // use default index parameters
  *   cagra::index_params index_params;
  *   // create and fill the index from a [N, D] dataset
- *   auto index = cagra::build<float, uint32_t>(res, index_params, dataset);
+ *   auto index = cagra::build(res, index_params, dataset);
  *
- *   // save a CAGRA index to a file
- *   cagra::serialize(res, index, "index.bin");
- *   // De-serialize a CAGRA index as a base-layer HNSW index using the filesystem
- *   index<float>* hnsw_index = nullptr;
- *   hnsw::deserialize(res, "index.bin", index->dim(), index->metric(), &hnsw_index);
+ *   // Load CAGRA index as an HNSW index
+ *  hnsw::index_params hnsw_params;
+ *  auto hnsw_index = hnsw::from_cagra(res, hnsw_params, index);
+ *  // save HNSW index to a file
+ *  hnsw::serialize(res, "index.bin", hnsw_index);
+ *  // De-serialize the HNSW index
+ *  index<float>* hnsw_index = nullptr;
+ *  hnsw::deserialize(res, hnsw_params, "index.bin", index->dim(), index->metric(), &hnsw_index);
  *
  *   // Delete index after use
  *   delete hnsw_index;
  * @endcode
  */
 void deserialize(raft::resources const& res,
+                 const index_params& params,
                  const std::string& filename,
                  int dim,
                  cuvs::distance::DistanceType metric,
@@ -354,10 +634,13 @@ void deserialize(raft::resources const& res,
 
 /**
  * @brief De-serialize a CAGRA index saved to a file as an hnswlib index
- * NOTE: The loaded hnswlib index is immutable, and only be read by the
+ * NOTE: When hierarchy is `NONE`, the saved hnswlib index is immutable and can only be read by the
  * hnswlib wrapper in cuVS, as the serialization format is not compatible with the original hnswlib.
+ * However, when hierarchy is `CPU`, the saved hnswlib index is compatible with the original hnswlib
+ * library.
  *
  * @param[in] res raft resources
+ * @param[in] params hnsw index parameters
  * @param[in] filename path to the file containing the serialized CAGRA index
  * @param[in] dim dimensions of the training dataset
  * @param[in] metric distance metric to search. Supported metrics ("L2Expanded", "InnerProduct")
@@ -370,19 +653,23 @@ void deserialize(raft::resources const& res,
  *   // use default index parameters
  *   cagra::index_params index_params;
  *   // create and fill the index from a [N, D] dataset
- *   auto index = cagra::build<uint8_t, uint32_t>(res, index_params, dataset);
+ *   auto index = cagra::build(res, index_params, dataset);
  *
- *   // save a CAGRA index to a file
- *   cagra::serialize(res, index, "index.bin");
- *   // De-serialize a CAGRA index as a base-layer HNSW index using the filesystem
- *   index<uint8_t>* hnsw_index = nullptr;
- *   hnsw::deserialize(res, "index.bin", index->dim(), index->metric(), &hnsw_index);
+ *   // Load CAGRA index as an HNSW index
+ *  hnsw::index_params hnsw_params;
+ *  auto hnsw_index = hnsw::from_cagra(res, hnsw_params, index);
+ *  // save HNSW index to a file
+ *  hnsw::serialize(res, "index.bin", hnsw_index);
+ *  // De-serialize the HNSW index
+ *  index<uint8_t>* hnsw_index = nullptr;
+ *  hnsw::deserialize(res, hnsw_params, "index.bin", index->dim(), index->metric(), &hnsw_index);
  *
  *   // Delete index after use
  *   delete hnsw_index;
  * @endcode
  */
 void deserialize(raft::resources const& res,
+                 const index_params& params,
                  const std::string& filename,
                  int dim,
                  cuvs::distance::DistanceType metric,
@@ -390,10 +677,13 @@ void deserialize(raft::resources const& res,
 
 /**
  * @brief De-serialize a CAGRA index saved to a file as an hnswlib index
- * NOTE: The loaded hnswlib index is immutable, and only be read by the
+ * NOTE: When hierarchy is `NONE`, the saved hnswlib index is immutable and can only be read by the
  * hnswlib wrapper in cuVS, as the serialization format is not compatible with the original hnswlib.
+ * However, when hierarchy is `CPU`, the saved hnswlib index is compatible with the original hnswlib
+ * library.
  *
  * @param[in] res raft resources
+ * @param[in] params hnsw index parameters
  * @param[in] filename path to the file containing the serialized CAGRA index
  * @param[in] dim dimensions of the training dataset
  * @param[in] metric distance metric to search. Supported metrics ("L2Expanded", "InnerProduct")
@@ -406,19 +696,23 @@ void deserialize(raft::resources const& res,
  *   // use default index parameters
  *   cagra::index_params index_params;
  *   // create and fill the index from a [N, D] dataset
- *   auto index = cagra::build<int8_t, uint32_t>(res, index_params, dataset);
+ *   auto index = cagra::build(res, index_params, dataset);
  *
- *   // save a CAGRA index to a file
- *   cagra::serialize(res, index, "index.bin");
- *   // De-serialize a CAGRA index as a base-layer HNSW index using the filesystem
- *   index<int8_t>* hnsw_index = nullptr;
- *   hnsw::deserialize(res, "index.bin", index->dim(), index->metric(), &hnsw_index);
+ *   // Load CAGRA index as an HNSW index
+ *  hnsw::index_params hnsw_params;
+ *  auto hnsw_index = hnsw::from_cagra(res, hnsw_params, index);
+ *  // save HNSW index to a file
+ *  hnsw::serialize(res, "index.bin", hnsw_index);
+ *  // De-serialize the HNSW index
+ *  index<int8_t>* hnsw_index = nullptr;
+ *  hnsw::deserialize(res, hnsw_params, "index.bin", index->dim(), index->metric(), &hnsw_index);
  *
  *   // Delete index after use
  *   delete hnsw_index;
  * @endcode
  */
 void deserialize(raft::resources const& res,
+                 const index_params& params,
                  const std::string& filename,
                  int dim,
                  cuvs::distance::DistanceType metric,
diff --git a/cpp/src/neighbors/detail/hnsw.hpp b/cpp/src/neighbors/detail/hnsw.hpp
index ce1e03264..e129d23e8 100644
--- a/cpp/src/neighbors/detail/hnsw.hpp
+++ b/cpp/src/neighbors/detail/hnsw.hpp
@@ -22,9 +22,63 @@
 #include <hnswlib/hnswlib.h>
 #include <memory>
 #include <random>
+#include <thread>
 
 namespace cuvs::neighbors::hnsw::detail {
 
+// Multithreaded executor
+// The helper function is copied from the hnswlib repository
+// as for some reason, adding vectors to the hnswlib index does not
+// work well with omp parallel for
+template <class Function>
+inline void ParallelFor(size_t start, size_t end, size_t numThreads, Function fn)
+{
+  if (numThreads <= 0) { numThreads = std::thread::hardware_concurrency(); }
+
+  if (numThreads == 1) {
+    for (size_t id = start; id < end; id++) {
+      fn(id, 0);
+    }
+  } else {
+    std::vector<std::thread> threads;
+    std::atomic<size_t> current(start);
+
+    // keep track of exceptions in threads
+    // https://stackoverflow.com/a/32428427/1713196
+    std::exception_ptr lastException = nullptr;
+    std::mutex lastExceptMutex;
+
+    for (size_t threadId = 0; threadId < numThreads; ++threadId) {
+      threads.push_back(std::thread([&, threadId] {
+        while (true) {
+          size_t id = current.fetch_add(1);
+
+          if (id >= end) { break; }
+
+          try {
+            fn(id, threadId);
+          } catch (...) {
+            std::unique_lock<std::mutex> lastExcepLock(lastExceptMutex);
+            lastException = std::current_exception();
+            /*
+             * This will work even when current is the largest value that
+             * size_t can fit, because fetch_add returns the previous value
+             * before the increment (what will result in overflow
+             * and produce 0 instead of current + 1).
+             */
+            current = end;
+            break;
+          }
+        }
+      }));
+    }
+    for (auto& thread : threads) {
+      thread.join();
+    }
+    if (lastException) { std::rethrow_exception(lastException); }
+  }
+}
+
 template <typename T>
 struct hnsw_dist_t {
   using type = void;
@@ -54,9 +108,10 @@ struct index_impl : index<T> {
    * @param[in] filepath path to the index
    * @param[in] dim dimensions of the training dataset
    * @param[in] metric distance metric to search. Supported metrics ("L2Expanded", "InnerProduct")
+   * @param[in] hierarchy hierarchy used for upper HNSW layers
    */
-  index_impl(const std::string& filepath, int dim, cuvs::distance::DistanceType metric)
-    : index<T>{dim, metric}
+  index_impl(int dim, cuvs::distance::DistanceType metric, HnswHierarchy hierarchy)
+    : index<T>{dim, metric, hierarchy}
   {
     if constexpr (std::is_same_v<T, float>) {
       if (metric == cuvs::distance::DistanceType::L2Expanded) {
@@ -71,11 +126,6 @@ struct index_impl : index<T> {
     }
 
     RAFT_EXPECTS(space_ != nullptr, "Unsupported metric type was used");
-
-    appr_alg_ = std::make_unique<hnswlib::HierarchicalNSW<typename hnsw_dist_t<T>::type>>(
-      space_.get(), filepath);
-
-    appr_alg_->base_layer_only = true;
   }
 
   /**
@@ -88,14 +138,32 @@ struct index_impl : index<T> {
   */
   void set_ef(int ef) const override { appr_alg_->ef_ = ef; }
 
+  /**
+  @brief Set index
+   */
+  void set_index(std::unique_ptr<hnswlib::HierarchicalNSW<typename hnsw_dist_t<T>::type>>&& index)
+  {
+    appr_alg_ = std::move(index);
+  }
+
+  /**
+  @brief Get space
+   */
+  auto get_space() const -> hnswlib::SpaceInterface<typename hnsw_dist_t<T>::type>*
+  {
+    return space_.get();
+  }
+
  private:
   std::unique_ptr<hnswlib::HierarchicalNSW<typename hnsw_dist_t<T>::type>> appr_alg_;
   std::unique_ptr<hnswlib::SpaceInterface<typename hnsw_dist_t<T>::type>> space_;
 };
 
-template <typename T>
-std::unique_ptr<index<T>> from_cagra(raft::resources const& res,
-                                     const cuvs::neighbors::cagra::index<T, uint32_t>& cagra_index)
+template <typename T, HnswHierarchy hierarchy>
+std::enable_if_t<hierarchy == HnswHierarchy::NONE, std::unique_ptr<index<T>>> from_cagra(
+  raft::resources const& res,
+  const index_params& params,
+  const cuvs::neighbors::cagra::index<T, uint32_t>& cagra_index)
 {
   std::random_device dev;
   std::mt19937 rng(dev());
@@ -103,13 +171,125 @@ std::unique_ptr<index<T>> from_cagra(raft::resources const& res,
   auto uuid            = std::to_string(dist(rng));
   std::string filepath = "/tmp/" + uuid + ".bin";
   cuvs::neighbors::cagra::serialize_to_hnswlib(res, filepath, cagra_index);
+
   index<T>* hnsw_index = nullptr;
   cuvs::neighbors::hnsw::deserialize(
-    res, filepath, cagra_index.dim(), cagra_index.metric(), &hnsw_index);
+    res, params, filepath, cagra_index.dim(), cagra_index.metric(), &hnsw_index);
   std::filesystem::remove(filepath);
   return std::unique_ptr<index<T>>(hnsw_index);
 }
 
+template <typename T, HnswHierarchy hierarchy>
+std::enable_if_t<hierarchy == HnswHierarchy::CPU, std::unique_ptr<index<T>>> from_cagra(
+  raft::resources const& res,
+  const index_params& params,
+  const cuvs::neighbors::cagra::index<T, uint32_t>& cagra_index,
+  std::optional<raft::host_matrix_view<const T, int64_t, raft::row_major>> dataset)
+{
+  // auto host_dataset = raft::make_host_matrix<T, int64_t>(dataset.extent(0), dataset.extent(1));
+  auto host_dataset = raft::make_host_matrix<T, int64_t>(0, 0);
+  raft::host_matrix_view<const T, int64_t, raft::row_major> host_dataset_view(
+    host_dataset.data_handle(), host_dataset.extent(0), host_dataset.extent(1));
+  if (dataset.has_value()) {
+    host_dataset_view = dataset.value();
+  } else {
+    // move dataset to host, remove padding
+    auto cagra_dataset = cagra_index.dataset();
+    host_dataset =
+      raft::make_host_matrix<T, int64_t>(cagra_dataset.extent(0), cagra_dataset.extent(1));
+    RAFT_CUDA_TRY(cudaMemcpy2DAsync(host_dataset.data_handle(),
+                                    sizeof(T) * host_dataset.extent(1),
+                                    cagra_dataset.data_handle(),
+                                    sizeof(T) * cagra_dataset.stride(0),
+                                    sizeof(T) * host_dataset.extent(1),
+                                    cagra_dataset.extent(0),
+                                    cudaMemcpyDefault,
+                                    raft::resource::get_cuda_stream(res)));
+    raft::resource::sync_stream(res);
+    host_dataset_view = host_dataset.view();
+  }
+  // build upper layers of hnsw index
+  auto hnsw_index =
+    std::make_unique<index_impl<T>>(cagra_index.dim(), cagra_index.metric(), hierarchy);
+  auto appr_algo = std::make_unique<hnswlib::HierarchicalNSW<typename hnsw_dist_t<T>::type>>(
+    hnsw_index->get_space(),
+    host_dataset_view.extent(0),
+    cagra_index.graph().extent(1) / 2,
+    params.ef_construction);
+  appr_algo->base_layer_init = false;  // tell hnswlib to build upper layers only
+  ParallelFor(0, host_dataset_view.extent(0), params.num_threads, [&](size_t i, size_t threadId) {
+    appr_algo->addPoint((void*)(host_dataset_view.data_handle() + i * host_dataset_view.extent(1)),
+                        i);
+  });
+  appr_algo->base_layer_init = true;  // reset to true to allow addition of new points
+
+  // move cagra graph to host
+  auto graph = cagra_index.graph();
+  auto host_graph =
+    raft::make_host_matrix<uint32_t, int64_t, raft::row_major>(graph.extent(0), graph.extent(1));
+  raft::copy(host_graph.data_handle(),
+             graph.data_handle(),
+             graph.size(),
+             raft::resource::get_cuda_stream(res));
+  raft::resource::sync_stream(res);
+
+// copy cagra graph to hnswlib base layer
+#pragma omp parallel for
+  for (size_t i = 0; i < static_cast<size_t>(host_graph.extent(0)); ++i) {
+    auto ll_i = appr_algo->get_linklist0(i);
+    appr_algo->setListCount(ll_i, host_graph.extent(1));
+    auto* data = (uint32_t*)(ll_i + 1);
+    for (size_t j = 0; j < static_cast<size_t>(host_graph.extent(1)); ++j) {
+      data[j] = host_graph(i, j);
+    }
+  }
+
+  hnsw_index->set_index(std::move(appr_algo));
+  return hnsw_index;
+}
+
+template <typename T>
+std::unique_ptr<index<T>> from_cagra(
+  raft::resources const& res,
+  const index_params& params,
+  const cuvs::neighbors::cagra::index<T, uint32_t>& cagra_index,
+  std::optional<raft::host_matrix_view<const T, int64_t, raft::row_major>> dataset)
+{
+  if (params.hierarchy == HnswHierarchy::NONE) {
+    return from_cagra<T, HnswHierarchy::NONE>(res, params, cagra_index);
+  } else if (params.hierarchy == HnswHierarchy::CPU) {
+    return from_cagra<T, HnswHierarchy::CPU>(res, params, cagra_index, dataset);
+  }
+  {
+    RAFT_FAIL("Unsupported hierarchy type");
+  }
+}
+
+template <typename T>
+void extend(raft::resources const& res,
+            const extend_params& params,
+            raft::host_matrix_view<const T, int64_t, raft::row_major> additional_dataset,
+            index<T>& idx)
+{
+  auto* hnswlib_index = reinterpret_cast<hnswlib::HierarchicalNSW<typename hnsw_dist_t<T>::type>*>(
+    const_cast<void*>(idx.get_index()));
+  auto current_element_count = hnswlib_index->getCurrentElementCount();
+  auto new_element_count     = additional_dataset.extent(0);
+  auto num_threads           = params.num_threads == 0 ? std::thread::hardware_concurrency()
+                                                       : static_cast<size_t>(params.num_threads);
+
+  hnswlib_index->resizeIndex(current_element_count + new_element_count);
+  ParallelFor(current_element_count,
+              current_element_count + new_element_count,
+              num_threads,
+              [&](size_t i, size_t threadId) {
+                hnswlib_index->addPoint(
+                  (void*)(additional_dataset.data_handle() +
+                          (i - current_element_count) * additional_dataset.extent(1)),
+                  i);
+              });
+}
+
 template <typename T>
 void get_search_knn_results(hnswlib::HierarchicalNSW<typename hnsw_dist_t<T>::type> const* idx,
                             const T* query,
@@ -171,14 +351,28 @@ void search(raft::resources const& res,
   }
 }
 
+template <typename T>
+void serialize(raft::resources const& res, const std::string& filename, const index<T>& idx)
+{
+  auto* hnswlib_index = reinterpret_cast<hnswlib::HierarchicalNSW<typename hnsw_dist_t<T>::type>*>(
+    const_cast<void*>(idx.get_index()));
+  hnswlib_index->saveIndex(filename);
+}
+
 template <typename T>
 void deserialize(raft::resources const& res,
+                 const index_params& params,
                  const std::string& filename,
                  int dim,
                  cuvs::distance::DistanceType metric,
                  index<T>** idx)
 {
-  *idx = new detail::index_impl<T>(filename, dim, metric);
+  auto hnsw_index = std::make_unique<index_impl<T>>(dim, metric, params.hierarchy);
+  auto appr_algo  = std::make_unique<hnswlib::HierarchicalNSW<typename hnsw_dist_t<T>::type>>(
+    hnsw_index->get_space(), filename);
+  if (params.hierarchy == HnswHierarchy::NONE) { appr_algo->base_layer_only = true; }
+  hnsw_index->set_index(std::move(appr_algo));
+  *idx = hnsw_index.release();
 }
 
 }  // namespace cuvs::neighbors::hnsw::detail
diff --git a/cpp/src/neighbors/hnsw.cpp b/cpp/src/neighbors/hnsw.cpp
index e6f3fbcc7..f165176ec 100644
--- a/cpp/src/neighbors/hnsw.cpp
+++ b/cpp/src/neighbors/hnsw.cpp
@@ -21,11 +21,14 @@
 
 namespace cuvs::neighbors::hnsw {
 
-#define CUVS_INST_HNSW_FROM_CAGRA(T)                                                           \
-  std::unique_ptr<index<T>> from_cagra(                                                        \
-    raft::resources const& res, const cuvs::neighbors::cagra::index<T, uint32_t>& cagra_index) \
-  {                                                                                            \
-    return detail::from_cagra<T>(res, cagra_index);                                            \
+#define CUVS_INST_HNSW_FROM_CAGRA(T)                                                  \
+  std::unique_ptr<index<T>> from_cagra(                                               \
+    raft::resources const& res,                                                       \
+    const index_params& params,                                                       \
+    const cuvs::neighbors::cagra::index<T, uint32_t>& cagra_index,                    \
+    std::optional<raft::host_matrix_view<const T, int64_t, raft::row_major>> dataset) \
+  {                                                                                   \
+    return detail::from_cagra<T>(res, params, cagra_index, dataset);                  \
   }
 
 CUVS_INST_HNSW_FROM_CAGRA(float);
@@ -34,6 +37,21 @@ CUVS_INST_HNSW_FROM_CAGRA(int8_t);
 
 #undef CUVS_INST_HNSW_FROM_CAGRA
 
+#define CUVS_INST_HNSW_EXTEND(T)                                                            \
+  void extend(raft::resources const& res,                                                   \
+              const extend_params& params,                                                  \
+              raft::host_matrix_view<const T, int64_t, raft::row_major> additional_dataset, \
+              index<T>& idx)                                                                \
+  {                                                                                         \
+    detail::extend<T>(res, params, additional_dataset, idx);                                \
+  }
+
+CUVS_INST_HNSW_EXTEND(float);
+CUVS_INST_HNSW_EXTEND(uint8_t);
+CUVS_INST_HNSW_EXTEND(int8_t);
+
+#undef CUVS_INST_HNSW_EXTEND
+
 #define CUVS_INST_HNSW_SEARCH(T)                                                    \
   void search(raft::resources const& res,                                           \
               const search_params& params,                                          \
@@ -51,20 +69,25 @@ CUVS_INST_HNSW_SEARCH(int8_t);
 
 #undef CUVS_INST_HNSW_SEARCH
 
-#define CUVS_INST_HNSW_DESERIALIZE(T)                        \
-  void deserialize(raft::resources const& res,               \
-                   const std::string& filename,              \
-                   int dim,                                  \
-                   cuvs::distance::DistanceType metric,      \
-                   index<T>** idx)                           \
-  {                                                          \
-    detail::deserialize<T>(res, filename, dim, metric, idx); \
+#define CUVS_INST_HNSW_SERIALIZE(T)                                                            \
+  void serialize(raft::resources const& res, const std::string& filename, const index<T>& idx) \
+  {                                                                                            \
+    detail::serialize<T>(res, filename, idx);                                                  \
+  }                                                                                            \
+  void deserialize(raft::resources const& res,                                                 \
+                   const index_params& params,                                                 \
+                   const std::string& filename,                                                \
+                   int dim,                                                                    \
+                   cuvs::distance::DistanceType metric,                                        \
+                   index<T>** idx)                                                             \
+  {                                                                                            \
+    detail::deserialize<T>(res, params, filename, dim, metric, idx);                           \
   }
 
-CUVS_INST_HNSW_DESERIALIZE(float);
-CUVS_INST_HNSW_DESERIALIZE(uint8_t);
-CUVS_INST_HNSW_DESERIALIZE(int8_t);
+CUVS_INST_HNSW_SERIALIZE(float);
+CUVS_INST_HNSW_SERIALIZE(uint8_t);
+CUVS_INST_HNSW_SERIALIZE(int8_t);
 
-#undef CUVS_INST_HNSW_DESERIALIZE
+#undef CUVS_INST_HNSW_SERIALIZE
 
 }  // namespace cuvs::neighbors::hnsw
diff --git a/cpp/src/neighbors/hnsw_c.cpp b/cpp/src/neighbors/hnsw_c.cpp
index a19875641..0233a510a 100644
--- a/cpp/src/neighbors/hnsw_c.cpp
+++ b/cpp/src/neighbors/hnsw_c.cpp
@@ -31,6 +31,44 @@
 #include <cuvs/neighbors/hnsw.hpp>
 
 namespace {
+
+template <typename T>
+void _from_cagra(cuvsResources_t res,
+                 cuvsHnswIndexParams_t params,
+                 cuvsCagraIndex_t cagra_index,
+                 cuvsHnswIndex_t hnsw_index)
+{
+  auto res_ptr = reinterpret_cast<raft::resources*>(res);
+  auto index   = reinterpret_cast<cuvs::neighbors::cagra::index<T, uint32_t>*>(cagra_index->addr);
+  auto cpp_params            = cuvs::neighbors::hnsw::index_params();
+  cpp_params.hierarchy       = static_cast<cuvs::neighbors::hnsw::HnswHierarchy>(params->hierarchy);
+  cpp_params.ef_construction = params->ef_construction;
+  cpp_params.num_threads     = params->num_threads;
+  std::optional<raft::host_matrix_view<const T, int64_t, raft::row_major>> dataset = std::nullopt;
+
+  auto hnsw_index_unique_ptr =
+    cuvs::neighbors::hnsw::from_cagra(*res_ptr, cpp_params, *index, dataset);
+  auto hnsw_index_ptr = hnsw_index_unique_ptr.release();
+  hnsw_index->addr    = reinterpret_cast<uintptr_t>(hnsw_index_ptr);
+}
+
+template <typename T>
+void _extend(cuvsResources_t res,
+             cuvsHnswExtendParams_t params,
+             DLManagedTensor* additional_dataset,
+             cuvsHnswIndex index)
+{
+  auto res_ptr           = reinterpret_cast<raft::resources*>(res);
+  auto index_ptr         = reinterpret_cast<cuvs::neighbors::hnsw::index<T>*>(index.addr);
+  auto cpp_params        = cuvs::neighbors::hnsw::extend_params();
+  cpp_params.num_threads = params->num_threads;
+
+  using additional_dataset_mdspan_type = raft::host_matrix_view<T const, int64_t, raft::row_major>;
+  auto additional_dataset_mds =
+    cuvs::core::from_dlpack<additional_dataset_mdspan_type>(additional_dataset);
+  cuvs::neighbors::hnsw::extend(*res_ptr, cpp_params, additional_dataset_mds, *index_ptr);
+}
+
 template <typename T>
 void _search(cuvsResources_t res,
              cuvsHnswSearchParams params,
@@ -44,7 +82,7 @@ void _search(cuvsResources_t res,
 
   auto search_params        = cuvs::neighbors::hnsw::search_params();
   search_params.ef          = params.ef;
-  search_params.num_threads = params.numThreads;
+  search_params.num_threads = params.num_threads;
 
   using queries_mdspan_type   = raft::host_matrix_view<T const, int64_t, raft::row_major>;
   using neighbors_mdspan_type = raft::host_matrix_view<uint64_t, int64_t, raft::row_major>;
@@ -57,26 +95,42 @@ void _search(cuvsResources_t res,
 }
 
 template <typename T>
-void* _deserialize(cuvsResources_t res, const char* filename, int dim, cuvsDistanceType metric)
+void _serialize(cuvsResources_t res, const char* filename, cuvsHnswIndex index)
+{
+  auto res_ptr   = reinterpret_cast<raft::resources*>(res);
+  auto index_ptr = reinterpret_cast<cuvs::neighbors::hnsw::index<T>*>(index.addr);
+  cuvs::neighbors::hnsw::serialize(*res_ptr, std::string(filename), *index_ptr);
+}
+
+template <typename T>
+void* _deserialize(cuvsResources_t res,
+                   cuvsHnswIndexParams_t params,
+                   const char* filename,
+                   int dim,
+                   cuvsDistanceType metric)
 {
   auto res_ptr                           = reinterpret_cast<raft::resources*>(res);
   cuvs::neighbors::hnsw::index<T>* index = nullptr;
-  cuvs::neighbors::hnsw::deserialize(*res_ptr, std::string(filename), dim, metric, &index);
+  auto cpp_params                        = cuvs::neighbors::hnsw::index_params();
+  cpp_params.hierarchy = static_cast<cuvs::neighbors::hnsw::HnswHierarchy>(params->hierarchy);
+  cuvs::neighbors::hnsw::deserialize(
+    *res_ptr, cpp_params, std::string(filename), dim, metric, &index);
   return index;
 }
 }  // namespace
 
-extern "C" cuvsError_t cuvsHnswSearchParamsCreate(cuvsHnswSearchParams_t* params)
+extern "C" cuvsError_t cuvsHnswIndexParamsCreate(cuvsHnswIndexParams_t* params)
 {
-  return cuvs::core::translate_exceptions(
-    [=] { *params = new cuvsHnswSearchParams{.ef = 200, .numThreads = 0}; });
+  return cuvs::core::translate_exceptions([=] {
+    *params = new cuvsHnswIndexParams{
+      .hierarchy = cuvsHnswHierarchy::NONE, .ef_construction = 200, .num_threads = 2};
+  });
 }
 
-extern "C" cuvsError_t cuvsHnswSearchParamsDestroy(cuvsHnswSearchParams_t params)
+extern "C" cuvsError_t cuvsHnswIndexParamsDestroy(cuvsHnswIndexParams_t params)
 {
   return cuvs::core::translate_exceptions([=] { delete params; });
 }
-
 extern "C" cuvsError_t cuvsHnswIndexCreate(cuvsHnswIndex_t* index)
 {
   return cuvs::core::translate_exceptions([=] { *index = new cuvsHnswIndex{}; });
@@ -101,6 +155,66 @@ extern "C" cuvsError_t cuvsHnswIndexDestroy(cuvsHnswIndex_t index_c_ptr)
   });
 }
 
+extern "C" cuvsError_t cuvsHnswExtendParamsCreate(cuvsHnswExtendParams_t* params)
+{
+  return cuvs::core::translate_exceptions(
+    [=] { *params = new cuvsHnswExtendParams{.num_threads = 0}; });
+}
+
+extern "C" cuvsError_t cuvsHnswExtendParamsDestroy(cuvsHnswExtendParams_t params)
+{
+  return cuvs::core::translate_exceptions([=] { delete params; });
+}
+
+extern "C" cuvsError_t cuvsHnswFromCagra(cuvsResources_t res,
+                                         cuvsHnswIndexParams_t params,
+                                         cuvsCagraIndex_t cagra_index,
+                                         cuvsHnswIndex_t hnsw_index)
+{
+  return cuvs::core::translate_exceptions([=] {
+    auto index        = *cagra_index;
+    hnsw_index->dtype = index.dtype;
+    if (index.dtype.code == kDLFloat) {
+      _from_cagra<float>(res, params, cagra_index, hnsw_index);
+    } else if (index.dtype.code == kDLUInt) {
+      _from_cagra<uint8_t>(res, params, cagra_index, hnsw_index);
+    } else if (index.dtype.code == kDLInt) {
+      _from_cagra<int8_t>(res, params, cagra_index, hnsw_index);
+    } else {
+      RAFT_FAIL("Unsupported dtype: %d", index.dtype.code);
+    }
+  });
+}
+
+extern "C" cuvsError_t cuvsHnswExtend(cuvsResources_t res,
+                                      cuvsHnswExtendParams_t params,
+                                      DLManagedTensor* additional_dataset,
+                                      cuvsHnswIndex_t index)
+{
+  return cuvs::core::translate_exceptions([=] {
+    if (index->dtype.code == kDLFloat) {
+      _extend<float>(res, params, additional_dataset, *index);
+    } else if (index->dtype.code == kDLUInt) {
+      _extend<uint8_t>(res, params, additional_dataset, *index);
+    } else if (index->dtype.code == kDLInt) {
+      _extend<int8_t>(res, params, additional_dataset, *index);
+    } else {
+      RAFT_FAIL("Unsupported dtype: %d", index->dtype.code);
+    }
+  });
+}
+
+extern "C" cuvsError_t cuvsHnswSearchParamsCreate(cuvsHnswSearchParams_t* params)
+{
+  return cuvs::core::translate_exceptions(
+    [=] { *params = new cuvsHnswSearchParams{.ef = 200, .num_threads = 0}; });
+}
+
+extern "C" cuvsError_t cuvsHnswSearchParamsDestroy(cuvsHnswSearchParams_t params)
+{
+  return cuvs::core::translate_exceptions([=] { delete params; });
+}
+
 extern "C" cuvsError_t cuvsHnswSearch(cuvsResources_t res,
                                       cuvsHnswSearchParams_t params,
                                       cuvsHnswIndex_t index_c_ptr,
@@ -140,7 +254,25 @@ extern "C" cuvsError_t cuvsHnswSearch(cuvsResources_t res,
   });
 }
 
+extern "C" cuvsError_t cuvsHnswSerialize(cuvsResources_t res,
+                                         const char* filename,
+                                         cuvsHnswIndex_t index)
+{
+  return cuvs::core::translate_exceptions([=] {
+    if (index->dtype.code == kDLFloat) {
+      _serialize<float>(res, filename, *index);
+    } else if (index->dtype.code == kDLInt) {
+      _serialize<int8_t>(res, filename, *index);
+    } else if (index->dtype.code == kDLUInt) {
+      _serialize<uint8_t>(res, filename, *index);
+    } else {
+      RAFT_FAIL("Unsupported index dtype: %d and bits: %d", index->dtype.code, index->dtype.bits);
+    }
+  });
+}
+
 extern "C" cuvsError_t cuvsHnswDeserialize(cuvsResources_t res,
+                                           cuvsHnswIndexParams_t params,
                                            const char* filename,
                                            int dim,
                                            cuvsDistanceType metric,
@@ -148,11 +280,14 @@ extern "C" cuvsError_t cuvsHnswDeserialize(cuvsResources_t res,
 {
   return cuvs::core::translate_exceptions([=] {
     if (index->dtype.code == kDLFloat && index->dtype.bits == 32) {
-      index->addr = reinterpret_cast<uintptr_t>(_deserialize<float>(res, filename, dim, metric));
+      index->addr =
+        reinterpret_cast<uintptr_t>(_deserialize<float>(res, params, filename, dim, metric));
     } else if (index->dtype.code == kDLUInt && index->dtype.bits == 8) {
-      index->addr = reinterpret_cast<uintptr_t>(_deserialize<uint8_t>(res, filename, dim, metric));
+      index->addr =
+        reinterpret_cast<uintptr_t>(_deserialize<uint8_t>(res, params, filename, dim, metric));
     } else if (index->dtype.code == kDLInt && index->dtype.bits == 8) {
-      index->addr = reinterpret_cast<uintptr_t>(_deserialize<int8_t>(res, filename, dim, metric));
+      index->addr =
+        reinterpret_cast<uintptr_t>(_deserialize<int8_t>(res, params, filename, dim, metric));
     } else {
       RAFT_FAIL("Unsupported dtype in file %s", filename);
     }
diff --git a/cpp/src/neighbors/iface/iface.hpp b/cpp/src/neighbors/iface/iface.hpp
index 9b3da75a4..98ef3fdd3 100644
--- a/cpp/src/neighbors/iface/iface.hpp
+++ b/cpp/src/neighbors/iface/iface.hpp
@@ -20,6 +20,7 @@
 #include <cuvs/neighbors/common.hpp>
 #include <cuvs/neighbors/ivf_flat.hpp>
 #include <cuvs/neighbors/ivf_pq.hpp>
+#include <fstream>
 #include <raft/core/device_resources.hpp>
 
 #include <fstream>
diff --git a/cpp/test/neighbors/ann_hnsw_c.cu b/cpp/test/neighbors/ann_hnsw_c.cu
index fc740b924..2a6401b1d 100644
--- a/cpp/test/neighbors/ann_hnsw_c.cu
+++ b/cpp/test/neighbors/ann_hnsw_c.cu
@@ -111,7 +111,9 @@ TEST(CagraHnswC, BuildSearch)
   cuvsHnswIndex_t hnsw_index;
   cuvsHnswIndexCreate(&hnsw_index);
   hnsw_index->dtype = index->dtype;
-  cuvsHnswDeserialize(res, "/tmp/cagra_hnswlib.index", 2, L2Expanded, hnsw_index);
+  cuvsHnswIndexParams_t hnsw_params;
+  cuvsHnswIndexParamsCreate(&hnsw_params);
+  cuvsHnswDeserialize(res, hnsw_params, "/tmp/cagra_hnswlib.index", 2, L2Expanded, hnsw_index);
 
   // search index
   cuvsHnswSearchParams_t search_params;
diff --git a/cpp/test/neighbors/hnsw.cu b/cpp/test/neighbors/hnsw.cu
index 9fb88be05..20ee83a11 100644
--- a/cpp/test/neighbors/hnsw.cu
+++ b/cpp/test/neighbors/hnsw.cu
@@ -108,7 +108,8 @@ class AnnHNSWTest : public ::testing::TestWithParam<AnnHNSWInputs> {
 
       cuvs::neighbors::hnsw::search_params search_params;
       search_params.ef = ps.ef;
-      auto hnsw_index  = cuvs::neighbors::hnsw::from_cagra(handle_, index);
+      cuvs::neighbors::hnsw::index_params hnsw_params;
+      auto hnsw_index = cuvs::neighbors::hnsw::from_cagra(handle_, hnsw_params, index);
       auto queries_HNSW_view =
         raft::make_host_matrix_view<DataT, int64_t>(queries_h.data(), ps.n_queries, ps.dim);
       auto indices_HNSW_view =
diff --git a/dependencies.yaml b/dependencies.yaml
index e909ad0dc..80a7d2024 100644
--- a/dependencies.yaml
+++ b/dependencies.yaml
@@ -470,7 +470,6 @@ dependencies:
     common:
       - output_types: [conda, pyproject, requirements]
         packages:
-          - hnswlib=0.6.2
           - nlohmann_json>=3.11.2
           - glog>=0.6.0
           - h5py>=3.8.0
diff --git a/docs/source/c_api/neighbors_hnsw_c.rst b/docs/source/c_api/neighbors_hnsw_c.rst
index 988e5b6f3..22ffc236d 100644
--- a/docs/source/c_api/neighbors_hnsw_c.rst
+++ b/docs/source/c_api/neighbors_hnsw_c.rst
@@ -26,6 +26,28 @@ Index
     :members:
     :content-only:
 
+Index extend parameters
+-----------------------
+
+.. doxygengroup:: hnsw_c_extend_params
+    :project: cuvs
+    :members:
+    :content-only:
+
+Index extend
+------------
+.. doxygengroup:: hnsw_c_index_extend
+    :project: cuvs
+    :members:
+    :content-only:
+
+Index load
+----------
+.. doxygengroup:: hnsw_c_index_load
+    :project: cuvs
+    :members:
+    :content-only:
+
 Index search
 ------------
 
diff --git a/docs/source/cpp_api/neighbors_hnsw.rst b/docs/source/cpp_api/neighbors_hnsw.rst
index b0af88af0..00dd3a213 100644
--- a/docs/source/cpp_api/neighbors_hnsw.rst
+++ b/docs/source/cpp_api/neighbors_hnsw.rst
@@ -27,10 +27,25 @@ Index
     :members:
     :content-only:
 
-Index load
+Index extend parameters
+-----------------------
+
+.. doxygengroup:: hnsw_cpp_extend_params
+    :project: cuvs
+    :members:
+    :content-only:
+
+Index extend
 ------------
+.. doxygengroup:: hnsw_cpp_index_extend
+    :project: cuvs
+    :members:
+    :content-only:
 
-.. doxygengroup:: hnsw_cpp_index_search
+Index load
+----------
+
+.. doxygengroup:: hnsw_cpp_index_load
     :project: cuvs
     :members:
     :content-only:
@@ -43,10 +58,10 @@ Index search
     :members:
     :content-only:
 
-Index deserialize
+Index serialize
 ---------------
 
-.. doxygengroup:: hnsw_cpp_index_deserialize
+.. doxygengroup:: hnsw_cpp_index_serialize
     :project: cuvs
     :members:
     :content-only:
diff --git a/python/cuvs/cuvs/neighbors/hnsw/__init__.py b/python/cuvs/cuvs/neighbors/hnsw/__init__.py
index 5efcdf68b..fafff7d03 100644
--- a/python/cuvs/cuvs/neighbors/hnsw/__init__.py
+++ b/python/cuvs/cuvs/neighbors/hnsw/__init__.py
@@ -13,10 +13,23 @@
 # limitations under the License.
 
 
-from .hnsw import Index, SearchParams, from_cagra, load, save, search
+from .hnsw import (
+    ExtendParams,
+    Index,
+    IndexParams,
+    SearchParams,
+    extend,
+    from_cagra,
+    load,
+    save,
+    search,
+)
 
 __all__ = [
+    "IndexParams",
     "Index",
+    "ExtendParams",
+    "extend",
     "SearchParams",
     "load",
     "save",
diff --git a/python/cuvs/cuvs/neighbors/hnsw/hnsw.pxd b/python/cuvs/cuvs/neighbors/hnsw/hnsw.pxd
index 1cdc97406..e0c517933 100644
--- a/python/cuvs/cuvs/neighbors/hnsw/hnsw.pxd
+++ b/python/cuvs/cuvs/neighbors/hnsw/hnsw.pxd
@@ -20,14 +20,25 @@ from libc.stdint cimport int32_t, uintptr_t
 from cuvs.common.c_api cimport cuvsError_t, cuvsResources_t
 from cuvs.common.cydlpack cimport DLDataType, DLManagedTensor
 from cuvs.distance_type cimport cuvsDistanceType
+from cuvs.neighbors.cagra.cagra cimport cuvsCagraIndex_t
 
 
 cdef extern from "cuvs/neighbors/hnsw.h" nogil:
-    ctypedef struct cuvsHnswSearchParams:
-        int32_t ef
-        int32_t numThreads
 
-    ctypedef cuvsHnswSearchParams* cuvsHnswSearchParams_t
+    ctypedef enum cuvsHnswHierarchy:
+        NONE
+        CPU
+
+    ctypedef struct cuvsHnswIndexParams:
+        cuvsHnswHierarchy hierarchy
+        int32_t ef_construction
+        int32_t num_threads
+
+    ctypedef cuvsHnswIndexParams* cuvsHnswIndexParams_t
+
+    cuvsError_t cuvsHnswIndexParamsCreate(cuvsHnswIndexParams_t* params)
+
+    cuvsError_t cuvsHnswIndexParamsDestroy(cuvsHnswIndexParams_t params)
 
     ctypedef struct cuvsHnswIndex:
         uintptr_t addr
@@ -39,6 +50,31 @@ cdef extern from "cuvs/neighbors/hnsw.h" nogil:
 
     cuvsError_t cuvsHnswIndexDestroy(cuvsHnswIndex_t index)
 
+    ctypedef struct cuvsHnswExtendParams:
+        int32_t num_threads
+
+    ctypedef cuvsHnswExtendParams* cuvsHnswExtendParams_t
+
+    cuvsError_t cuvsHnswExtendParamsCreate(cuvsHnswExtendParams_t* params)
+
+    cuvsError_t cuvsHnswExtendParamsDestroy(cuvsHnswExtendParams_t params)
+
+    cuvsError_t cuvsHnswFromCagra(cuvsResources_t res,
+                                  cuvsHnswIndexParams_t params,
+                                  cuvsCagraIndex_t cagra_index,
+                                  cuvsHnswIndex_t hnsw_index) except +
+
+    cuvsError_t cuvsHnswExtend(cuvsResources_t res,
+                               cuvsHnswExtendParams_t params,
+                               DLManagedTensor* data,
+                               cuvsHnswIndex_t index) except +
+
+    ctypedef struct cuvsHnswSearchParams:
+        int32_t ef
+        int32_t num_threads
+
+    ctypedef cuvsHnswSearchParams* cuvsHnswSearchParams_t
+
     cuvsError_t cuvsHnswSearch(cuvsResources_t res,
                                cuvsHnswSearchParams* params,
                                cuvsHnswIndex_t index,
@@ -46,7 +82,12 @@ cdef extern from "cuvs/neighbors/hnsw.h" nogil:
                                DLManagedTensor* neighbors,
                                DLManagedTensor* distances) except +
 
+    cuvsError_t cuvsHnswSerialize(cuvsResources_t res,
+                                  const char * filename,
+                                  cuvsHnswIndex_t index) except +
+
     cuvsError_t cuvsHnswDeserialize(cuvsResources_t res,
+                                    cuvsHnswIndexParams_t params,
                                     const char * filename,
                                     int32_t dim,
                                     cuvsDistanceType metric,
diff --git a/python/cuvs/cuvs/neighbors/hnsw/hnsw.pyx b/python/cuvs/cuvs/neighbors/hnsw/hnsw.pyx
index bcfaf167e..4c44350e8 100644
--- a/python/cuvs/cuvs/neighbors/hnsw/hnsw.pyx
+++ b/python/cuvs/cuvs/neighbors/hnsw/hnsw.pyx
@@ -39,41 +39,63 @@ from pylibraft.common.cai_wrapper import wrap_array
 from pylibraft.common.interruptible import cuda_interruptible
 
 
-cdef class SearchParams:
+cdef class IndexParams:
     """
-    HNSW search parameters
+    Parameters to build index for HNSW nearest neighbor search
 
     Parameters
     ----------
-    ef: int, default = 200
-        Maximum number of candidate list size used during search.
-    num_threads: int, default = 0
-        Number of CPU threads used to increase search parallelism.
-        When set to 0, the number of threads is automatically determined
-        using OpenMP's `omp_get_max_threads()`.
+    hierarchy : string, default = "none" (optional)
+        The hierarchy of the HNSW index. Valid values are ["none", "cpu"].
+        - "none": No hierarchy is built.
+        - "cpu": Hierarchy is built using CPU.
+    ef_construction : int, default = 200 (optional)
+        Maximum number of candidate list size used during construction
+        when hierarchy is `cpu`.
+    num_threads : int, default = 2 (optional)
+        Number of CPU threads used to increase construction parallelism
+        when hierarchy is `cpu`.
+        NOTE: Constructing the hierarchy when converting from a CAGRA graph
+        is highly sensitive to parallelism, and increasing the number of
+        threads can reduce the quality of the index.
     """
 
-    cdef cuvsHnswSearchParams params
+    cdef cuvsHnswIndexParams* params
+
+    def __cinit__(self):
+        check_cuvs(cuvsHnswIndexParamsCreate(&self.params))
+
+    def __dealloc__(self):
+        check_cuvs(cuvsHnswIndexParamsDestroy(self.params))
 
     def __init__(self, *,
-                 ef=200,
-                 num_threads=0):
-        self.params.ef = ef
-        self.params.numThreads = num_threads
+                 hierarchy="none",
+                 ef_construction=200,
+                 num_threads=2):
+        if hierarchy == "none":
+            self.params.hierarchy = cuvsHnswHierarchy.NONE
+        elif hierarchy == "cpu":
+            self.params.hierarchy = cuvsHnswHierarchy.CPU
+        else:
+            raise ValueError("Invalid hierarchy type."
+                             " Valid values are 'none' and 'cpu'.")
+        self.params.ef_construction = ef_construction
+        self.params.num_threads = num_threads
 
-    def __repr__(self):
-        attr_str = [attr + "=" + str(getattr(self, attr))
-                    for attr in [
-                        "ef", "num_threads"]]
-        return "SearchParams(type=HNSW, " + (", ".join(attr_str)) + ")"
+    @property
+    def hierarchy(self):
+        if self.params.hierarchy == cuvsHnswHierarchy.NONE:
+            return "none"
+        elif self.params.hierarchy == cuvsHnswHierarchy.CPU:
+            return "cpu"
 
     @property
-    def ef(self):
-        return self.params.ef
+    def ef_construction(self):
+        return self.params.ef_construction
 
     @property
     def num_threads(self):
-        return self.params.numThreads
+        return self.params.num_threads
 
 
 cdef class Index:
@@ -103,13 +125,44 @@ cdef class Index:
         return "Index(type=HNSW, metric=L2" + (", ".join(attr_str)) + ")"
 
 
+cdef class ExtendParams:
+    """
+    Parameters to extend the HNSW index with new data
+
+    Parameters
+    ----------
+    num_threads : int, default = 0 (optional)
+        Number of CPU threads used to increase construction parallelism.
+        When set to 0, the number of threads is automatically determined.
+    """
+
+    cdef cuvsHnswExtendParams* params
+
+    def __cinit__(self):
+        check_cuvs(cuvsHnswExtendParamsCreate(&self.params))
+
+    def __dealloc__(self):
+        check_cuvs(cuvsHnswExtendParamsDestroy(self.params))
+
+    def __init__(self, *,
+                 num_threads=0):
+        self.params.num_threads = num_threads
+
+    @property
+    def num_threads(self):
+        return self.params.num_threads
+
+
 @auto_sync_resources
-def save(filename, cagra.Index index, resources=None):
+def save(filename, Index index, resources=None):
     """
     Saves the CAGRA index to a file as an hnswlib index.
-    The saved index is immutable and can only be searched by the hnswlib
-    wrapper in cuVS, as the format is not compatible with the original
-    hnswlib.
+    If the index was constructed with `hnsw.IndexParams(hierarchy="none")`,
+    then the saved index is immutable and can only be searched by the hnswlib
+    wrapper in cuVS, as the format is not compatible with the original hnswlib.
+    However, if the index was constructed with
+    `hnsw.IndexParams(hierarchy="cpu")`, then the saved index is mutable and
+    compatible with the original hnswlib.
 
     Saving / loading the index is experimental. The serialization format is
     subject to change.
@@ -119,7 +172,7 @@ def save(filename, cagra.Index index, resources=None):
     filename : string
         Name of the file.
     index : Index
-        Trained CAGRA index.
+        Trained HNSW index.
     {resources_docstring}
 
     Examples
@@ -131,23 +184,28 @@ def save(filename, cagra.Index index, resources=None):
     >>> dataset = cp.random.random_sample((n_samples, n_features),
     ...                                   dtype=cp.float32)
     >>> # Build index
-    >>> index = cagra.build(cagra.IndexParams(), dataset)
+    >>> cagra_index = cagra.build(cagra.IndexParams(), dataset)
     >>> # Serialize and deserialize the cagra index built
-    >>> hnsw.save("my_index.bin", index)
+    >>> hnsw_index = hnsw.from_cagra(hnsw.IndexParams(), cagra_index)
+    >>> hnsw.save("my_index.bin", hnsw_index)
     """
     cdef string c_filename = filename.encode('utf-8')
     cdef cuvsResources_t res = <cuvsResources_t>resources.get_c_obj()
-    check_cuvs(cagra.cuvsCagraSerializeToHnswlib(res,
-                                                 c_filename.c_str(),
-                                                 index.index))
+    check_cuvs(cuvsHnswSerialize(res,
+                                 c_filename.c_str(),
+                                 index.index))
 
 
 @auto_sync_resources
-def load(filename, dim, dtype, metric="sqeuclidean", resources=None):
+def load(IndexParams index_params, filename, dim, dtype, metric="sqeuclidean",
+         resources=None):
     """
-    Loads base-layer-only hnswlib index from file, which was originally
-    saved as a built CAGRA index. The loaded index is immutable and can only
-    be searched by the hnswlib wrapper in cuVS, as the format is not
+    Loads an HNSW index.
+    If the index was constructed with `hnsw.IndexParams(hierarchy="none")`,
+    then the loaded index is immutable and can only be searched by the hnswlib
+    wrapper in cuVS, as the format is not compatible with the original hnswlib.
+    However, if the index was constructed with
+    `hnsw.IndexParams(hierarchy="cpu")`, then the loaded index is mutable and
     compatible with the original hnswlib.
 
     Saving / loading the index is experimental. The serialization format is
@@ -156,6 +214,8 @@ def load(filename, dim, dtype, metric="sqeuclidean", resources=None):
 
     Parameters
     ----------
+    index_params : IndexParams
+        Parameters that were used to convert CAGRA index to HNSW index.
     filename : string
         Name of the file.
     dim : int
@@ -214,6 +274,7 @@ def load(filename, dim, dtype, metric="sqeuclidean", resources=None):
 
     check_cuvs(cuvsHnswDeserialize(
         res,
+        index_params.params,
         c_filename.c_str(),
         dim,
         distance_type,
@@ -224,26 +285,30 @@ def load(filename, dim, dtype, metric="sqeuclidean", resources=None):
 
 
 @auto_sync_resources
-def from_cagra(cagra.Index index, temporary_index_path=None, resources=None):
+def from_cagra(IndexParams index_params, cagra.Index cagra_index,
+               temporary_index_path=None, resources=None):
     """
-    Returns an hnsw base-layer-only index from a CAGRA index.
-
-    NOTE: This method uses the filesystem to write the CAGRA index in
-          `/tmp/<random_number>.bin` or the parameter `temporary_index_path`
-          if not None before reading it as an hnsw index,
-          then deleting the temporary file. The returned index is immutable
-          and can only be searched by the hnsw wrapper in cuVS, as the
-          format is not compatible with the original hnswlib library.
-          By `base_layer_only`, we mean that the hnsw index is created
-          without the additional layers that are used for the hierarchical
-          search in hnswlib. Instead, the base layer is used for the search.
+    Returns an HNSW index from a CAGRA index.
+
+    NOTE: When `index_params.hierarchy` is:
+          1. `NONE`: This method uses the filesystem to write the CAGRA index
+                     in `/tmp/<random_number>.bin` before reading it as an
+                     hnswlib index, then deleting the temporary file. The
+                     returned index is immutable and can only be searched by
+                     the hnswlib wrapper in cuVS, as the format is not
+                    compatible with the original hnswlib.
+          2. `CPU`: The returned index is mutable and can be extended with
+                    additional vectors. The serialized index is also compatible
+                    with the original hnswlib library.
 
     Saving / loading the index is experimental. The serialization format is
     subject to change.
 
     Parameters
     ----------
-    index : Index
+    index_params : IndexParams
+        Parameters to convert the CAGRA index to HNSW index.
+    cagra_index : cagra.Index
         Trained CAGRA index.
     temporary_index_path : string, default = None
         Path to save the temporary index file. If None, the temporary file
@@ -262,18 +327,107 @@ def from_cagra(cagra.Index index, temporary_index_path=None, resources=None):
     >>> # Build index
     >>> index = cagra.build(cagra.IndexParams(), dataset)
     >>> # Serialize the CAGRA index to hnswlib base layer only index format
-    >>> hnsw_index = hnsw.from_cagra(index)
+    >>> hnsw_index = hnsw.from_cagra(hnsw.IndexParams(), index)
     """
-    uuid_num = uuid.uuid4()
-    filename = temporary_index_path if temporary_index_path else \
-        f"/tmp/{uuid_num}.bin"
-    save(filename, index, resources=resources)
-    hnsw_index = load(filename, index.dim, np.dtype(index.active_index_type),
-                      "sqeuclidean", resources=resources)
-    os.remove(filename)
+
+    cdef Index hnsw_index = Index()
+    cdef cuvsResources_t res = <cuvsResources_t>resources.get_c_obj()
+    check_cuvs(cuvsHnswFromCagra(
+        res,
+        index_params.params,
+        cagra_index.index,
+        hnsw_index.index
+    ))
+
+    hnsw_index.trained = True
     return hnsw_index
 
 
+@auto_sync_resources
+def extend(ExtendParams extend_params, Index index, data, resources=None):
+    """
+    Extends the HNSW index with new data.
+
+    Parameters
+    ----------
+    extend_params : ExtendParams
+    index : Index
+        Trained HNSW index.
+    data : Host array interface compliant matrix shape (n_samples, dim)
+        Supported dtype [float32, int8, uint8]
+    {resources_docstring}
+
+    Examples
+    --------
+    >>> import numpy as np
+    >>> from cuvs.neighbors import hnsw, cagra
+    >>>
+    >>> n_samples = 50000
+    >>> n_features = 50
+    >>> dataset = np.random.random_sample((n_samples, n_features))
+    >>>
+    >>> # Build index
+    >>> index = cagra.build(hnsw.IndexParams(), dataset)
+    >>> # Load index
+    >>> hnsw_index = hnsw.from_cagra(hnsw.IndexParams(hierarchy="cpu"), index)
+    >>> # Extend the index with new data
+    >>> new_data = np.random.random_sample((n_samples, n_features))
+    >>> hnsw.extend(hnsw.ExtendParams(), hnsw_index, new_data)
+    """
+
+    data_ai = wrap_array(data)
+    _check_input_array(data_ai, [np.dtype('float32'),
+                                 np.dtype('uint8'),
+                                 np.dtype('int8')])
+
+    cdef cydlpack.DLManagedTensor* data_dlpack = cydlpack.dlpack_c(data_ai)
+    cdef cuvsResources_t res = <cuvsResources_t>resources.get_c_obj()
+
+    check_cuvs(cuvsHnswExtend(
+        res,
+        extend_params.params,
+        data_dlpack,
+        index.index
+    ))
+
+
+cdef class SearchParams:
+    """
+    HNSW search parameters
+
+    Parameters
+    ----------
+    ef: int, default = 200
+        Maximum number of candidate list size used during search.
+    num_threads: int, default = 0
+        Number of CPU threads used to increase search parallelism.
+        When set to 0, the number of threads is automatically determined
+        using OpenMP's `omp_get_max_threads()`.
+    """
+
+    cdef cuvsHnswSearchParams params
+
+    def __init__(self, *,
+                 ef=200,
+                 num_threads=0):
+        self.params.ef = ef
+        self.params.num_threads = num_threads
+
+    def __repr__(self):
+        attr_str = [attr + "=" + str(getattr(self, attr))
+                    for attr in [
+                        "ef", "num_threads"]]
+        return "SearchParams(type=HNSW, " + (", ".join(attr_str)) + ")"
+
+    @property
+    def ef(self):
+        return self.params.ef
+
+    @property
+    def num_threads(self):
+        return self.params.num_threads
+
+
 @auto_sync_resources
 @auto_convert_output
 def search(SearchParams search_params,
@@ -290,15 +444,15 @@ def search(SearchParams search_params,
     ----------
     search_params : SearchParams
     index : Index
-        Trained CAGRA index.
-    queries : CUDA array interface compliant matrix shape (n_samples, dim)
+        Trained HNSW index.
+    queries : CPU array interface compliant matrix shape (n_samples, dim)
         Supported dtype [float, int]
     k : int
         The number of neighbors.
-    neighbors : Optional CUDA array interface compliant matrix shape
+    neighbors : Optional CPU array interface compliant matrix shape
                 (n_queries, k), dtype uint64_t. If supplied, neighbor
                 indices will be written here in-place. (default None)
-    distances : Optional CUDA array interface compliant matrix shape
+    distances : Optional CPU array interface compliant matrix shape
                 (n_queries, k) If supplied, the distances to the
                 neighbors will be written here in-place. (default None)
     {resources_docstring}
@@ -323,7 +477,7 @@ def search(SearchParams search_params,
     ...     num_threads=0
     ... )
     >>> # Convert CAGRA index to HNSW
-    >>> hnsw_index = hnsw.from_cagra(index)
+    >>> hnsw_index = hnsw.from_cagra(hnsw.IndexParams(), index)
     >>> # Using a pooling allocator reduces overhead of temporary array
     >>> # creation during search. This is useful if multiple searches
     >>> # are performed with same query size.
diff --git a/python/cuvs/cuvs/test/test_hnsw.py b/python/cuvs/cuvs/test/test_hnsw.py
index 20a35401e..20f583ae8 100644
--- a/python/cuvs/cuvs/test/test_hnsw.py
+++ b/python/cuvs/cuvs/test/test_hnsw.py
@@ -32,6 +32,7 @@ def run_hnsw_build_search_test(
     build_algo="ivf_pq",
     intermediate_graph_degree=128,
     graph_degree=64,
+    hierarchy="none",
     search_params={},
 ):
     dataset = generate_data((n_rows, n_cols), dtype)
@@ -53,7 +54,8 @@ def run_hnsw_build_search_test(
 
     assert index.trained
 
-    hnsw_index = hnsw.from_cagra(index)
+    hnsw_params = hnsw.IndexParams(hierarchy=hierarchy, num_threads=1)
+    hnsw_index = hnsw.from_cagra(hnsw_params, index)
 
     queries = generate_data((n_queries, n_cols), dtype)
 
@@ -83,10 +85,93 @@ def run_hnsw_build_search_test(
 @pytest.mark.parametrize("num_threads", [2, 4])
 @pytest.mark.parametrize("metric", ["sqeuclidean", "inner_product"])
 @pytest.mark.parametrize("build_algo", ["ivf_pq", "nn_descent"])
-def test_hnsw(dtype, k, ef, num_threads, metric, build_algo):
+@pytest.mark.parametrize("hierarchy", ["none", "cpu"])
+def test_hnsw(dtype, k, ef, num_threads, metric, build_algo, hierarchy):
     # Note that inner_product tests use normalized input which we cannot
     # represent in int8, therefore we test only sqeuclidean metric here.
     run_hnsw_build_search_test(
+        dtype=dtype,
+        k=k,
+        metric=metric,
+        build_algo=build_algo,
+        hierarchy=hierarchy,
+        search_params={"ef": ef, "num_threads": num_threads},
+    )
+
+
+def run_hnsw_extend_test(
+    n_rows=10000,
+    add_rows=2000,
+    n_cols=10,
+    n_queries=100,
+    k=10,
+    dtype=np.float32,
+    metric="sqeuclidean",
+    build_algo="ivf_pq",
+    intermediate_graph_degree=128,
+    graph_degree=64,
+    search_params={},
+):
+    dataset = generate_data((n_rows, n_cols), dtype)
+    add_dataset = generate_data((add_rows, n_cols), dtype)
+    if metric == "inner_product":
+        dataset = normalize(dataset, norm="l2", axis=1)
+        add_dataset = normalize(add_dataset, norm="l2", axis=1)
+        if dtype in [np.int8, np.uint8]:
+            pytest.skip(
+                "inner_product metric is not supported for int8/uint8 data"
+            )
+        if build_algo == "nn_descent":
+            pytest.skip("inner_product metric is not supported for nn_descent")
+
+    build_params = cagra.IndexParams(
+        metric=metric,
+        intermediate_graph_degree=intermediate_graph_degree,
+        graph_degree=graph_degree,
+        build_algo=build_algo,
+    )
+
+    index = cagra.build(build_params, dataset)
+
+    assert index.trained
+
+    hnsw_params = hnsw.IndexParams(hierarchy="cpu", num_threads=1)
+    hnsw_index = hnsw.from_cagra(hnsw_params, index)
+    hnsw.extend(hnsw.ExtendParams(), hnsw_index, add_dataset)
+
+    queries = generate_data((n_queries, n_cols), dtype)
+
+    search_params = hnsw.SearchParams(**search_params)
+
+    out_dist, out_idx = hnsw.search(search_params, hnsw_index, queries, k)
+
+    # Calculate reference values with sklearn
+    skl_metric = {
+        "sqeuclidean": "sqeuclidean",
+        "inner_product": "cosine",
+        "euclidean": "euclidean",
+    }[metric]
+    nn_skl = NearestNeighbors(
+        n_neighbors=k, algorithm="brute", metric=skl_metric
+    )
+    nn_skl.fit(np.vstack([dataset, add_dataset]))
+    skl_dist, skl_idx = nn_skl.kneighbors(queries, return_distance=True)
+
+    recall = calc_recall(out_idx, skl_idx)
+    print(recall)
+    assert recall > 0.95
+
+
+@pytest.mark.parametrize("dtype", [np.float32, np.int8, np.uint8])
+@pytest.mark.parametrize("k", [10, 20])
+@pytest.mark.parametrize("ef", [30, 40])
+@pytest.mark.parametrize("num_threads", [2, 4])
+@pytest.mark.parametrize("metric", ["sqeuclidean"])
+@pytest.mark.parametrize("build_algo", ["ivf_pq", "nn_descent"])
+def test_hnsw_extend(dtype, k, ef, num_threads, metric, build_algo):
+    # Note that inner_product tests use normalized input which we cannot
+    # represent in int8, therefore we test only sqeuclidean metric here.
+    run_hnsw_extend_test(
         dtype=dtype,
         k=k,
         metric=metric,
diff --git a/python/cuvs_bench/cuvs_bench/config/algos/cuvs_cagra_hnswlib.yaml b/python/cuvs_bench/cuvs_bench/config/algos/cuvs_cagra_hnswlib.yaml
index f1a7f272c..90a561bca 100644
--- a/python/cuvs_bench/cuvs_bench/config/algos/cuvs_cagra_hnswlib.yaml
+++ b/python/cuvs_bench/cuvs_bench/config/algos/cuvs_cagra_hnswlib.yaml
@@ -4,8 +4,11 @@ constraints:
 groups:
   base:
     build:
-      graph_degree: [32, 64, 128, 256]
+      graph_degree: [32, 64, 96, 128]
       intermediate_graph_degree: [32, 64, 96, 128]
       graph_build_algo: ["NN_DESCENT"]
+      hierarchy: ["none", "cpu"]
+      ef_construction: [64, 128, 256, 512]
+      num_threads: [2, 5, 10]
     search:
       ef: [10, 20, 40, 60, 80, 120, 200, 400, 600, 800]