From 0a905630c990235783f77c461691a983f97afc9f Mon Sep 17 00:00:00 2001
From: Seunghwa Kang <45857425+seunghwak@users.noreply.github.com>
Date: Wed, 1 Nov 2023 11:21:45 -0700
Subject: [PATCH 1/3] Cut peak memory footprint in graph creation (#3966)

This limits memory footprint (especially in single-GPU or multi-GPU with a small number of GPUs) to the size of edge list * 1.5 + alpha (alpha to store O(V) data, V: # vertices).

Authors:
  - Seunghwa Kang (https://github.com/seunghwak)

Approvers:
  - Naim (https://github.com/naimnv)
  - Chuck Hastings (https://github.com/ChuckHastings)

URL: https://github.com/rapidsai/cugraph/pull/3966
---
 cpp/src/c_api/capi_helper.cu                  |   2 +-
 .../create_graph_from_edgelist_impl.cuh       | 425 ++++++++----------
 cpp/src/structure/detail/structure_utils.cuh  | 281 ++++++------
 cpp/src/structure/induced_subgraph_impl.cuh   |   2 +
 cpp/src/structure/renumber_edgelist_impl.cuh  | 100 +++--
 cpp/tests/community/mg_egonet_test.cu         |   1 +
 .../structure/mg_induced_subgraph_test.cu     |   1 +
 cpp/tests/utilities/test_utilities_impl.cuh   |  61 ++-
 8 files changed, 415 insertions(+), 458 deletions(-)
diff --git a/cpp/src/c_api/capi_helper.cu b/cpp/src/c_api/capi_helper.cu
index af0163b0512..0ee49f87265 100644
--- a/cpp/src/c_api/capi_helper.cu
+++ b/cpp/src/c_api/capi_helper.cu
@@ -44,7 +44,7 @@ shuffle_vertex_ids_and_offsets(raft::handle_t const& handle,
                thrust::make_zip_iterator(ids.end(), vertices.end()));
 
   auto return_offsets = cugraph::detail::compute_sparse_offsets<size_t>(
-    ids.begin(), ids.end(), size_t{0}, size_t{offsets.size() - 1}, handle.get_stream());
+    ids.begin(), ids.end(), size_t{0}, size_t{offsets.size() - 1}, true, handle.get_stream());
 
   return std::make_tuple(std::move(vertices), std::move(return_offsets));
 }
diff --git a/cpp/src/structure/create_graph_from_edgelist_impl.cuh b/cpp/src/structure/create_graph_from_edgelist_impl.cuh
index 0d4b12a3e38..8dd587e1661 100644
--- a/cpp/src/structure/create_graph_from_edgelist_impl.cuh
+++ b/cpp/src/structure/create_graph_from_edgelist_impl.cuh
@@ -510,7 +510,18 @@ create_graph_from_edgelist_impl(
   auto use_dcs =
     num_segments_per_vertex_partition > (detail::num_sparse_segments_per_vertex_partition + 2);
 
-  // 4. compress edge list (COO) to CSR (or CSC) or CSR + DCSR (CSC + DCSC) hybrid
+  // 4. sort and compress edge list (COO) to CSR (or CSC) or CSR + DCSR (CSC + DCSC) hybrid
+
+  auto total_global_mem = handle.get_device_properties().totalGlobalMem;
+  size_t element_size   = sizeof(vertex_t) * 2;
+  if (edgelist_weights) { element_size += sizeof(weight_t); }
+  if (edgelist_edge_ids) { element_size += sizeof(edge_id_t); }
+  if (edgelist_edge_types) { element_size += sizeof(edge_type_t); }
+  auto constexpr mem_frugal_ratio =
+    0.25;  // if the expected temporary buffer size exceeds the mem_frugal_ratio of the
+           // total_global_mem, switch to the memory frugal approach
+  auto mem_frugal_threshold =
+    static_cast<size_t>(static_cast<double>(total_global_mem / element_size) * mem_frugal_ratio);
 
   std::vector<rmm::device_uvector<edge_t>> edge_partition_offsets;
   std::vector<rmm::device_uvector<vertex_t>> edge_partition_indices;
@@ -559,154 +570,139 @@ create_graph_from_edgelist_impl(
     if (edgelist_weights) {
       if (edgelist_edge_ids) {
         if (edgelist_edge_types) {
-          auto edge_value_first =
-            thrust::make_zip_iterator((*edge_partition_edgelist_weights)[i].begin(),
-                                      (*edge_partition_edgelist_edge_ids)[i].begin(),
-                                      (*edge_partition_edgelist_edge_types)[i].begin());
           std::forward_as_tuple(
             offsets, indices, std::tie(weights, edge_ids, edge_types), dcs_nzd_vertices) =
-            detail::compress_edgelist<edge_t, store_transposed>(
-              edge_partition_edgelist_srcs[i].begin(),
-              edge_partition_edgelist_srcs[i].end(),
-              edge_partition_edgelist_dsts[i].begin(),
-              edge_value_first,
+            detail::sort_and_compress_edgelist<vertex_t,
+                                               edge_t,
+                                               thrust::tuple<weight_t, edge_id_t, edge_type_t>,
+                                               store_transposed>(
+              std::move(edge_partition_edgelist_srcs[i]),
+              std::move(edge_partition_edgelist_dsts[i]),
+              std::make_tuple(std::move((*edge_partition_edgelist_weights)[i]),
+                              std::move((*edge_partition_edgelist_edge_ids)[i]),
+                              std::move((*edge_partition_edgelist_edge_types)[i])),
               major_range_first,
               major_hypersparse_first,
               major_range_last,
               minor_range_first,
               minor_range_last,
+              mem_frugal_threshold,
               handle.get_stream());
         } else {
-          auto edge_value_first =
-            thrust::make_zip_iterator((*edge_partition_edgelist_weights)[i].begin(),
-                                      (*edge_partition_edgelist_edge_ids)[i].begin());
           std::forward_as_tuple(offsets, indices, std::tie(weights, edge_ids), dcs_nzd_vertices) =
-            detail::compress_edgelist<edge_t, store_transposed>(
-              edge_partition_edgelist_srcs[i].begin(),
-              edge_partition_edgelist_srcs[i].end(),
-              edge_partition_edgelist_dsts[i].begin(),
-              edge_value_first,
+            detail::sort_and_compress_edgelist<vertex_t,
+                                               edge_t,
+                                               thrust::tuple<weight_t, edge_id_t>,
+                                               store_transposed>(
+              std::move(edge_partition_edgelist_srcs[i]),
+              std::move(edge_partition_edgelist_dsts[i]),
+              std::make_tuple(std::move((*edge_partition_edgelist_weights)[i]),
+                              std::move((*edge_partition_edgelist_edge_ids)[i])),
               major_range_first,
               major_hypersparse_first,
               major_range_last,
               minor_range_first,
               minor_range_last,
+              mem_frugal_threshold,
               handle.get_stream());
         }
       } else {
         if (edgelist_edge_types) {
-          auto edge_value_first =
-            thrust::make_zip_iterator((*edge_partition_edgelist_weights)[i].begin(),
-                                      (*edge_partition_edgelist_edge_types)[i].begin());
           std::forward_as_tuple(offsets, indices, std::tie(weights, edge_types), dcs_nzd_vertices) =
-            detail::compress_edgelist<edge_t, store_transposed>(
-              edge_partition_edgelist_srcs[i].begin(),
-              edge_partition_edgelist_srcs[i].end(),
-              edge_partition_edgelist_dsts[i].begin(),
-              edge_value_first,
+            detail::sort_and_compress_edgelist<vertex_t,
+                                               edge_t,
+                                               thrust::tuple<weight_t, edge_type_t>,
+                                               store_transposed>(
+              std::move(edge_partition_edgelist_srcs[i]),
+              std::move(edge_partition_edgelist_dsts[i]),
+              std::make_tuple(std::move((*edge_partition_edgelist_weights)[i]),
+                              std::move((*edge_partition_edgelist_edge_types)[i])),
               major_range_first,
               major_hypersparse_first,
               major_range_last,
               minor_range_first,
               minor_range_last,
+              mem_frugal_threshold,
               handle.get_stream());
         } else {
-          auto edge_value_first = (*edge_partition_edgelist_weights)[i].begin();
           std::forward_as_tuple(offsets, indices, weights, dcs_nzd_vertices) =
-            detail::compress_edgelist<edge_t, store_transposed>(
-              edge_partition_edgelist_srcs[i].begin(),
-              edge_partition_edgelist_srcs[i].end(),
-              edge_partition_edgelist_dsts[i].begin(),
-              edge_value_first,
+            detail::sort_and_compress_edgelist<vertex_t, edge_t, weight_t, store_transposed>(
+              std::move(edge_partition_edgelist_srcs[i]),
+              std::move(edge_partition_edgelist_dsts[i]),
+              std::move((*edge_partition_edgelist_weights)[i]),
               major_range_first,
               major_hypersparse_first,
               major_range_last,
               minor_range_first,
               minor_range_last,
+              mem_frugal_threshold,
               handle.get_stream());
         }
       }
     } else {
       if (edgelist_edge_ids) {
         if (edgelist_edge_types) {
-          auto edge_value_first =
-            thrust::make_zip_iterator((*edge_partition_edgelist_edge_ids)[i].begin(),
-                                      (*edge_partition_edgelist_edge_types)[i].begin());
           std::forward_as_tuple(
             offsets, indices, std::tie(edge_ids, edge_types), dcs_nzd_vertices) =
-            detail::compress_edgelist<edge_t, store_transposed>(
-              edge_partition_edgelist_srcs[i].begin(),
-              edge_partition_edgelist_srcs[i].end(),
-              edge_partition_edgelist_dsts[i].begin(),
-              edge_value_first,
+            detail::sort_and_compress_edgelist<vertex_t,
+                                               edge_t,
+                                               thrust::tuple<edge_id_t, edge_type_t>,
+                                               store_transposed>(
+              std::move(edge_partition_edgelist_srcs[i]),
+              std::move(edge_partition_edgelist_dsts[i]),
+              std::make_tuple(std::move((*edge_partition_edgelist_edge_ids)[i]),
+                              std::move((*edge_partition_edgelist_edge_types)[i])),
               major_range_first,
               major_hypersparse_first,
               major_range_last,
               minor_range_first,
               minor_range_last,
+              mem_frugal_threshold,
               handle.get_stream());
         } else {
-          auto edge_value_first = (*edge_partition_edgelist_edge_ids)[i].begin();
           std::forward_as_tuple(offsets, indices, edge_ids, dcs_nzd_vertices) =
-            detail::compress_edgelist<edge_t, store_transposed>(
-              edge_partition_edgelist_srcs[i].begin(),
-              edge_partition_edgelist_srcs[i].end(),
-              edge_partition_edgelist_dsts[i].begin(),
-              edge_value_first,
+            detail::sort_and_compress_edgelist<vertex_t, edge_t, edge_id_t, store_transposed>(
+              std::move(edge_partition_edgelist_srcs[i]),
+              std::move(edge_partition_edgelist_dsts[i]),
+              std::move((*edge_partition_edgelist_edge_ids)[i]),
               major_range_first,
               major_hypersparse_first,
               major_range_last,
               minor_range_first,
               minor_range_last,
+              mem_frugal_threshold,
               handle.get_stream());
         }
       } else {
         if (edgelist_edge_types) {
-          auto edge_value_first = (*edge_partition_edgelist_edge_types)[i].begin();
           std::forward_as_tuple(offsets, indices, edge_types, dcs_nzd_vertices) =
-            detail::compress_edgelist<edge_t, store_transposed>(
-              edge_partition_edgelist_srcs[i].begin(),
-              edge_partition_edgelist_srcs[i].end(),
-              edge_partition_edgelist_dsts[i].begin(),
-              edge_value_first,
+            detail::sort_and_compress_edgelist<vertex_t, edge_t, edge_type_t, store_transposed>(
+              std::move(edge_partition_edgelist_srcs[i]),
+              std::move(edge_partition_edgelist_dsts[i]),
+              std::move((*edge_partition_edgelist_edge_types)[i]),
               major_range_first,
               major_hypersparse_first,
               major_range_last,
               minor_range_first,
               minor_range_last,
+              mem_frugal_threshold,
               handle.get_stream());
         } else {
           std::forward_as_tuple(offsets, indices, dcs_nzd_vertices) =
-            detail::compress_edgelist<edge_t, store_transposed>(
-              edge_partition_edgelist_srcs[i].begin(),
-              edge_partition_edgelist_srcs[i].end(),
-              edge_partition_edgelist_dsts[i].begin(),
+            detail::sort_and_compress_edgelist<vertex_t, edge_t, store_transposed>(
+              std::move(edge_partition_edgelist_srcs[i]),
+              std::move(edge_partition_edgelist_dsts[i]),
               major_range_first,
               major_hypersparse_first,
               major_range_last,
               minor_range_first,
               minor_range_last,
+              mem_frugal_threshold,
               handle.get_stream());
         }
       }
     }
 
-    edge_partition_edgelist_srcs[i].resize(0, handle.get_stream());
-    edge_partition_edgelist_srcs[i].shrink_to_fit(handle.get_stream());
-    edge_partition_edgelist_dsts[i].resize(0, handle.get_stream());
-    edge_partition_edgelist_dsts[i].shrink_to_fit(handle.get_stream());
-    if (edge_partition_edgelist_weights) {
-      (*edge_partition_edgelist_weights)[i].resize(0, handle.get_stream());
-      (*edge_partition_edgelist_weights)[i].shrink_to_fit(handle.get_stream());
-    }
-    if (edge_partition_edgelist_edge_ids) {
-      (*edge_partition_edgelist_edge_ids)[i].resize(0, handle.get_stream());
-      (*edge_partition_edgelist_edge_ids)[i].shrink_to_fit(handle.get_stream());
-    }
-    if (edge_partition_edgelist_edge_types) {
-      (*edge_partition_edgelist_edge_types)[i].resize(0, handle.get_stream());
-      (*edge_partition_edgelist_edge_types)[i].shrink_to_fit(handle.get_stream());
-    }
     edge_partition_offsets.push_back(std::move(offsets));
     edge_partition_indices.push_back(std::move(indices));
     if (edge_partition_weights) { (*edge_partition_weights).push_back(std::move(*weights)); }
@@ -954,6 +950,17 @@ create_graph_from_edgelist_impl(
 
   // convert edge list (COO) to compressed sparse format (CSR or CSC)
 
+  auto total_global_mem = handle.get_device_properties().totalGlobalMem;
+  size_t element_size   = sizeof(vertex_t) * 2;
+  if (edgelist_weights) { element_size += sizeof(weight_t); }
+  if (edgelist_edge_ids) { element_size += sizeof(edge_id_t); }
+  if (edgelist_edge_types) { element_size += sizeof(edge_type_t); }
+  auto constexpr mem_frugal_ratio =
+    0.25;  // if the expected temporary buffer size exceeds the mem_frugal_ratio of the
+           // total_global_mem, switch to the memory frugal approach
+  auto mem_frugal_threshold =
+    static_cast<size_t>(static_cast<double>(total_global_mem / element_size) * mem_frugal_ratio);
+
   rmm::device_uvector<edge_t> offsets(size_t{0}, handle.get_stream());
   rmm::device_uvector<vertex_t> indices(size_t{0}, handle.get_stream());
   std::optional<rmm::device_uvector<weight_t>> weights{std::nullopt};
@@ -963,202 +970,130 @@ create_graph_from_edgelist_impl(
   if (edgelist_weights) {
     if (edgelist_edge_ids) {
       if (edgelist_edge_types) {
-        auto edge_value_first = thrust::make_zip_iterator((*edgelist_weights).begin(),
-                                                          (*edgelist_edge_ids).begin(),
-                                                          (*edgelist_edge_types).begin());
         std::forward_as_tuple(offsets, indices, std::tie(weights, ids, types), std::ignore) =
-          detail::compress_edgelist<edge_t, store_transposed>(edgelist_srcs.begin(),
-                                                              edgelist_srcs.end(),
-                                                              edgelist_dsts.begin(),
-                                                              edge_value_first,
-                                                              vertex_t{0},
-                                                              std::optional<vertex_t>{std::nullopt},
-                                                              num_vertices,
-                                                              vertex_t{0},
-                                                              num_vertices,
-                                                              handle.get_stream());
+          detail::sort_and_compress_edgelist<vertex_t,
+                                             edge_t,
+                                             thrust::tuple<weight_t, edge_id_t, edge_type_t>,
+                                             store_transposed>(
+            std::move(edgelist_srcs),
+            std::move(edgelist_dsts),
+            std::make_tuple(std::move(*edgelist_weights),
+                            std::move(*edgelist_edge_ids),
+                            std::move(*edgelist_edge_types)),
+            vertex_t{0},
+            std::optional<vertex_t>{std::nullopt},
+            num_vertices,
+            vertex_t{0},
+            num_vertices,
+            mem_frugal_threshold,
+            handle.get_stream());
       } else {
-        auto edge_value_first =
-          thrust::make_zip_iterator((*edgelist_weights).begin(), (*edgelist_edge_ids).begin());
         std::forward_as_tuple(offsets, indices, std::tie(weights, ids), std::ignore) =
-          detail::compress_edgelist<edge_t, store_transposed>(edgelist_srcs.begin(),
-                                                              edgelist_srcs.end(),
-                                                              edgelist_dsts.begin(),
-                                                              edge_value_first,
-                                                              vertex_t{0},
-                                                              std::optional<vertex_t>{std::nullopt},
-                                                              num_vertices,
-                                                              vertex_t{0},
-                                                              num_vertices,
-                                                              handle.get_stream());
+          detail::sort_and_compress_edgelist<vertex_t,
+                                             edge_t,
+                                             thrust::tuple<weight_t, edge_id_t>,
+                                             store_transposed>(
+            std::move(edgelist_srcs),
+            std::move(edgelist_dsts),
+            std::make_tuple(std::move(*edgelist_weights), std::move(*edgelist_edge_ids)),
+            vertex_t{0},
+            std::optional<vertex_t>{std::nullopt},
+            num_vertices,
+            vertex_t{0},
+            num_vertices,
+            mem_frugal_threshold,
+            handle.get_stream());
       }
     } else {
       if (edgelist_edge_types) {
-        auto edge_value_first =
-          thrust::make_zip_iterator((*edgelist_weights).begin(), (*edgelist_edge_types).begin());
         std::forward_as_tuple(offsets, indices, std::tie(weights, types), std::ignore) =
-          detail::compress_edgelist<edge_t, store_transposed>(edgelist_srcs.begin(),
-                                                              edgelist_srcs.end(),
-                                                              edgelist_dsts.begin(),
-                                                              edge_value_first,
-                                                              vertex_t{0},
-                                                              std::optional<vertex_t>{std::nullopt},
-                                                              num_vertices,
-                                                              vertex_t{0},
-                                                              num_vertices,
-                                                              handle.get_stream());
+          detail::sort_and_compress_edgelist<vertex_t,
+                                             edge_t,
+                                             thrust::tuple<weight_t, edge_type_t>,
+                                             store_transposed>(
+            std::move(edgelist_srcs),
+            std::move(edgelist_dsts),
+            std::make_tuple(std::move(*edgelist_weights), std::move(*edgelist_edge_types)),
+            vertex_t{0},
+            std::optional<vertex_t>{std::nullopt},
+            num_vertices,
+            vertex_t{0},
+            num_vertices,
+            mem_frugal_threshold,
+            handle.get_stream());
       } else {
-        auto edge_value_first = (*edgelist_weights).begin();
         std::forward_as_tuple(offsets, indices, weights, std::ignore) =
-          detail::compress_edgelist<edge_t, store_transposed>(edgelist_srcs.begin(),
-                                                              edgelist_srcs.end(),
-                                                              edgelist_dsts.begin(),
-                                                              edge_value_first,
-                                                              vertex_t{0},
-                                                              std::optional<vertex_t>{std::nullopt},
-                                                              num_vertices,
-                                                              vertex_t{0},
-                                                              num_vertices,
-                                                              handle.get_stream());
+          detail::sort_and_compress_edgelist<vertex_t, edge_t, weight_t, store_transposed>(
+            std::move(edgelist_srcs),
+            std::move(edgelist_dsts),
+            std::move(*edgelist_weights),
+            vertex_t{0},
+            std::optional<vertex_t>{std::nullopt},
+            num_vertices,
+            vertex_t{0},
+            num_vertices,
+            mem_frugal_threshold,
+            handle.get_stream());
       }
     }
   } else {
     if (edgelist_edge_ids) {
       if (edgelist_edge_types) {
-        auto edge_value_first =
-          thrust::make_zip_iterator((*edgelist_edge_ids).begin(), (*edgelist_edge_types).begin());
         std::forward_as_tuple(offsets, indices, std::tie(ids, types), std::ignore) =
-          detail::compress_edgelist<edge_t, store_transposed>(edgelist_srcs.begin(),
-                                                              edgelist_srcs.end(),
-                                                              edgelist_dsts.begin(),
-                                                              edge_value_first,
-                                                              vertex_t{0},
-                                                              std::optional<vertex_t>{std::nullopt},
-                                                              num_vertices,
-                                                              vertex_t{0},
-                                                              num_vertices,
-                                                              handle.get_stream());
+          detail::sort_and_compress_edgelist<vertex_t,
+                                             edge_t,
+                                             thrust::tuple<edge_id_t, edge_type_t>,
+                                             store_transposed>(
+            std::move(edgelist_srcs),
+            std::move(edgelist_dsts),
+            std::make_tuple(std::move(*edgelist_edge_ids), std::move(*edgelist_edge_types)),
+            vertex_t{0},
+            std::optional<vertex_t>{std::nullopt},
+            num_vertices,
+            vertex_t{0},
+            num_vertices,
+            mem_frugal_threshold,
+            handle.get_stream());
       } else {
-        auto edge_value_first = (*edgelist_edge_ids).begin();
         std::forward_as_tuple(offsets, indices, ids, std::ignore) =
-          detail::compress_edgelist<edge_t, store_transposed>(edgelist_srcs.begin(),
-                                                              edgelist_srcs.end(),
-                                                              edgelist_dsts.begin(),
-                                                              edge_value_first,
-                                                              vertex_t{0},
-                                                              std::optional<vertex_t>{std::nullopt},
-                                                              num_vertices,
-                                                              vertex_t{0},
-                                                              num_vertices,
-                                                              handle.get_stream());
+          detail::sort_and_compress_edgelist<vertex_t, edge_t, edge_id_t, store_transposed>(
+            std::move(edgelist_srcs),
+            std::move(edgelist_dsts),
+            std::move(*edgelist_edge_ids),
+            vertex_t{0},
+            std::optional<vertex_t>{std::nullopt},
+            num_vertices,
+            vertex_t{0},
+            num_vertices,
+            mem_frugal_threshold,
+            handle.get_stream());
       }
     } else {
       if (edgelist_edge_types) {
-        auto edge_value_first = (*edgelist_edge_types).begin();
         std::forward_as_tuple(offsets, indices, types, std::ignore) =
-          detail::compress_edgelist<edge_t, store_transposed>(edgelist_srcs.begin(),
-                                                              edgelist_srcs.end(),
-                                                              edgelist_dsts.begin(),
-                                                              edge_value_first,
-                                                              vertex_t{0},
-                                                              std::optional<vertex_t>{std::nullopt},
-                                                              num_vertices,
-                                                              vertex_t{0},
-                                                              num_vertices,
-                                                              handle.get_stream());
+          detail::sort_and_compress_edgelist<vertex_t, edge_t, edge_type_t, store_transposed>(
+            std::move(edgelist_srcs),
+            std::move(edgelist_dsts),
+            std::move(*edgelist_edge_types),
+            vertex_t{0},
+            std::optional<vertex_t>{std::nullopt},
+            num_vertices,
+            vertex_t{0},
+            num_vertices,
+            mem_frugal_threshold,
+            handle.get_stream());
       } else {
         std::forward_as_tuple(offsets, indices, std::ignore) =
-          detail::compress_edgelist<edge_t, store_transposed>(edgelist_srcs.begin(),
-                                                              edgelist_srcs.end(),
-                                                              edgelist_dsts.begin(),
-                                                              vertex_t{0},
-                                                              std::optional<vertex_t>{std::nullopt},
-                                                              num_vertices,
-                                                              vertex_t{0},
-                                                              num_vertices,
-                                                              handle.get_stream());
-      }
-    }
-  }
-
-  edgelist_srcs.resize(0, handle.get_stream());
-  edgelist_srcs.shrink_to_fit(handle.get_stream());
-  edgelist_dsts.resize(0, handle.get_stream());
-  edgelist_dsts.shrink_to_fit(handle.get_stream());
-  if (edgelist_weights) {
-    (*edgelist_weights).resize(0, handle.get_stream());
-    (*edgelist_weights).shrink_to_fit(handle.get_stream());
-  }
-  if (edgelist_edge_ids) {
-    (*edgelist_edge_ids).resize(0, handle.get_stream());
-    (*edgelist_edge_ids).shrink_to_fit(handle.get_stream());
-  }
-  if (edgelist_edge_types) {
-    (*edgelist_edge_types).resize(0, handle.get_stream());
-    (*edgelist_edge_types).shrink_to_fit(handle.get_stream());
-  }
-
-  // segmented sort neighbors
-
-  if (weights) {
-    if (ids) {
-      if (types) {
-        detail::sort_adjacency_list(
-          handle,
-          raft::device_span<edge_t const>(offsets.data(), offsets.size()),
-          indices.begin(),
-          indices.end(),
-          thrust::make_zip_iterator((*weights).begin(), (*ids).begin(), (*types).begin()));
-      } else {
-        detail::sort_adjacency_list(handle,
-                                    raft::device_span<edge_t const>(offsets.data(), offsets.size()),
-                                    indices.begin(),
-                                    indices.end(),
-                                    thrust::make_zip_iterator((*weights).begin(), (*ids).begin()));
-      }
-    } else {
-      if (types) {
-        detail::sort_adjacency_list(
-          handle,
-          raft::device_span<edge_t const>(offsets.data(), offsets.size()),
-          indices.begin(),
-          indices.end(),
-          thrust::make_zip_iterator((*weights).begin(), (*types).begin()));
-      } else {
-        detail::sort_adjacency_list(handle,
-                                    raft::device_span<edge_t const>(offsets.data(), offsets.size()),
-                                    indices.begin(),
-                                    indices.end(),
-                                    (*weights).begin());
-      }
-    }
-  } else {
-    if (ids) {
-      if (types) {
-        detail::sort_adjacency_list(handle,
-                                    raft::device_span<edge_t const>(offsets.data(), offsets.size()),
-                                    indices.begin(),
-                                    indices.end(),
-                                    thrust::make_zip_iterator((*ids).begin(), (*types).begin()));
-      } else {
-        detail::sort_adjacency_list(handle,
-                                    raft::device_span<edge_t const>(offsets.data(), offsets.size()),
-                                    indices.begin(),
-                                    indices.end(),
-                                    (*ids).begin());
-      }
-    } else {
-      if (types) {
-        detail::sort_adjacency_list(handle,
-                                    raft::device_span<edge_t const>(offsets.data(), offsets.size()),
-                                    indices.begin(),
-                                    indices.end(),
-                                    (*types).begin());
-      } else {
-        detail::sort_adjacency_list(handle,
-                                    raft::device_span<edge_t const>(offsets.data(), offsets.size()),
-                                    indices.begin(),
-                                    indices.end());
+          detail::sort_and_compress_edgelist<vertex_t, edge_t, store_transposed>(
+            std::move(edgelist_srcs),
+            std::move(edgelist_dsts),
+            vertex_t{0},
+            std::optional<vertex_t>{std::nullopt},
+            num_vertices,
+            vertex_t{0},
+            num_vertices,
+            mem_frugal_threshold,
+            handle.get_stream());
       }
     }
   }
diff --git a/cpp/src/structure/detail/structure_utils.cuh b/cpp/src/structure/detail/structure_utils.cuh
index f57b549e1ef..01fbccaa53e 100644
--- a/cpp/src/structure/detail/structure_utils.cuh
+++ b/cpp/src/structure/detail/structure_utils.cuh
@@ -47,57 +47,38 @@ namespace cugraph {
 
 namespace detail {
 
-template <bool store_transposed,
-          typename vertex_t,
-          typename edge_t,
-          typename EdgeIterator,
-          typename EdgeValueIterator>
-struct update_edge_t {
-  raft::device_span<edge_t> offsets{};
-  raft::device_span<vertex_t> indices{};
-  EdgeValueIterator edge_value_first{};
-  vertex_t major_range_first{};
-
-  __device__ void operator()(typename thrust::iterator_traits<EdgeIterator>::value_type e) const
-  {
-    auto s      = thrust::get<0>(e);
-    auto d      = thrust::get<1>(e);
-    auto major  = store_transposed ? d : s;
-    auto minor  = store_transposed ? s : d;
-    auto start  = offsets[major - major_range_first];
-    auto degree = offsets[(major - major_range_first) + 1] - start;
-    auto idx =
-      atomicAdd(&indices[start + degree - 1], vertex_t{1});  // use the last element as a counter
-    // FIXME: we can actually store minor - minor_range_first instead of minor to save memory if
-    // minor can be larger than 32 bit but minor -  minor_range_first fits within 32 bit
-    indices[start + idx] = minor;  // overwrite the counter only if idx == degree - 1 (no race)
-    if constexpr (!std::is_same_v<EdgeValueIterator, void*>) {
-      auto value                          = thrust::get<2>(e);
-      *(edge_value_first + (start + idx)) = value;
-    }
-  }
-};
-
 template <typename edge_t, typename VertexIterator>
 rmm::device_uvector<edge_t> compute_sparse_offsets(
   VertexIterator edgelist_major_first,
   VertexIterator edgelist_major_last,
   typename thrust::iterator_traits<VertexIterator>::value_type major_range_first,
   typename thrust::iterator_traits<VertexIterator>::value_type major_range_last,
+  bool edgelist_major_sorted,
   rmm::cuda_stream_view stream_view)
 {
   rmm::device_uvector<edge_t> offsets((major_range_last - major_range_first) + 1, stream_view);
-  thrust::fill(rmm::exec_policy(stream_view), offsets.begin(), offsets.end(), edge_t{0});
-
-  auto offset_view = raft::device_span<edge_t>(offsets.data(), offsets.size());
-  thrust::for_each(rmm::exec_policy(stream_view),
-                   edgelist_major_first,
-                   edgelist_major_last,
-                   [offset_view, major_range_first] __device__(auto v) {
-                     atomicAdd(&offset_view[v - major_range_first], edge_t{1});
-                   });
-  thrust::exclusive_scan(
-    rmm::exec_policy(stream_view), offsets.begin(), offsets.end(), offsets.begin());
+  if (edgelist_major_sorted) {
+    offsets.set_element_to_zero_async(0, stream_view);
+    thrust::upper_bound(rmm::exec_policy(stream_view),
+                        edgelist_major_first,
+                        edgelist_major_last,
+                        thrust::make_counting_iterator(major_range_first),
+                        thrust::make_counting_iterator(major_range_last),
+                        offsets.begin() + 1);
+  } else {
+    thrust::fill(rmm::exec_policy(stream_view), offsets.begin(), offsets.end(), edge_t{0});
+
+    auto offset_view = raft::device_span<edge_t>(offsets.data(), offsets.size());
+    thrust::for_each(rmm::exec_policy(stream_view),
+                     edgelist_major_first,
+                     edgelist_major_last,
+                     [offset_view, major_range_first] __device__(auto v) {
+                       atomicAdd(&offset_view[v - major_range_first], edge_t{1});
+                     });
+
+    thrust::exclusive_scan(
+      rmm::exec_policy(stream_view), offsets.begin(), offsets.end(), offsets.begin());
+  }
 
   return offsets;
 }
@@ -156,61 +137,77 @@ std::tuple<rmm::device_uvector<edge_t>, rmm::device_uvector<vertex_t>> compress_
 }
 
 // compress edge list (COO) to CSR (or CSC) or CSR + DCSR (CSC + DCSC) hybrid
-template <typename edge_t,
-          bool store_transposed,
-          typename VertexIterator,
-          typename EdgeValueIterator>
-std::tuple<
-  rmm::device_uvector<edge_t>,
-  rmm::device_uvector<typename thrust::iterator_traits<VertexIterator>::value_type>,
-  decltype(allocate_dataframe_buffer<typename thrust::iterator_traits<
-             EdgeValueIterator>::value_type>(size_t{0}, rmm::cuda_stream_view{})),
-  std::optional<rmm::device_uvector<typename thrust::iterator_traits<VertexIterator>::value_type>>>
-compress_edgelist(
-  VertexIterator edgelist_src_first,
-  VertexIterator edgelist_src_last,
-  VertexIterator edgelist_dst_first,
-  EdgeValueIterator edge_value_first,
-  typename thrust::iterator_traits<VertexIterator>::value_type major_range_first,
-  std::optional<typename thrust::iterator_traits<VertexIterator>::value_type>
-    major_hypersparse_first,
-  typename thrust::iterator_traits<VertexIterator>::value_type major_range_last,
-  typename thrust::iterator_traits<VertexIterator>::value_type /* minor_range_first */,
-  typename thrust::iterator_traits<VertexIterator>::value_type /* minor_range_last */,
+template <typename vertex_t, typename edge_t, typename edge_value_t, bool store_transposed>
+std::tuple<rmm::device_uvector<edge_t>,
+           rmm::device_uvector<vertex_t>,
+           decltype(allocate_dataframe_buffer<edge_value_t>(size_t{0}, rmm::cuda_stream_view{})),
+           std::optional<rmm::device_uvector<vertex_t>>>
+sort_and_compress_edgelist(
+  rmm::device_uvector<vertex_t>&& edgelist_srcs,
+  rmm::device_uvector<vertex_t>&& edgelist_dsts,
+  decltype(allocate_dataframe_buffer<edge_value_t>(0, rmm::cuda_stream_view{}))&& edgelist_values,
+  vertex_t major_range_first,
+  std::optional<vertex_t> major_hypersparse_first,
+  vertex_t major_range_last,
+  vertex_t /* minor_range_first */,
+  vertex_t /* minor_range_last */,
+  size_t mem_frugal_threshold,
   rmm::cuda_stream_view stream_view)
 {
-  using vertex_t = std::remove_cv_t<typename thrust::iterator_traits<VertexIterator>::value_type>;
-  using edge_value_t =
-    std::remove_cv_t<typename thrust::iterator_traits<EdgeValueIterator>::value_type>;
-
-  auto number_of_edges =
-    static_cast<edge_t>(thrust::distance(edgelist_src_first, edgelist_src_last));
-
-  auto offsets = compute_sparse_offsets<edge_t>(
-    store_transposed ? edgelist_dst_first : edgelist_src_first,
-    store_transposed ? edgelist_dst_first + number_of_edges : edgelist_src_last,
-    major_range_first,
-    major_range_last,
-    stream_view);
-
-  rmm::device_uvector<vertex_t> indices(number_of_edges, stream_view);
-  thrust::fill(rmm::exec_policy(stream_view), indices.begin(), indices.end(), vertex_t{0});
-  auto values = allocate_dataframe_buffer<edge_value_t>(number_of_edges, stream_view);
-
-  auto offset_view = raft::device_span<edge_t>(offsets.data(), offsets.size());
-  auto index_view  = raft::device_span<vertex_t>(indices.data(), indices.size());
-  auto edge_first  = thrust::make_zip_iterator(
-    thrust::make_tuple(edgelist_src_first, edgelist_dst_first, edge_value_first));
-  thrust::for_each(
-    rmm::exec_policy(stream_view),
-    edge_first,
-    edge_first + number_of_edges,
-    update_edge_t<store_transposed,
-                  vertex_t,
-                  edge_t,
-                  decltype(edge_first),
-                  decltype(get_dataframe_buffer_begin(values))>{
-      offset_view, index_view, get_dataframe_buffer_begin(values), major_range_first});
+  auto edgelist_majors = std::move(store_transposed ? edgelist_dsts : edgelist_srcs);
+  auto edgelist_minors = std::move(store_transposed ? edgelist_srcs : edgelist_dsts);
+
+  rmm::device_uvector<edge_t> offsets(0, stream_view);
+  rmm::device_uvector<vertex_t> indices(0, stream_view);
+  auto values     = allocate_dataframe_buffer<edge_value_t>(0, stream_view);
+  auto pair_first = thrust::make_zip_iterator(edgelist_majors.begin(), edgelist_minors.begin());
+  if (edgelist_minors.size() > mem_frugal_threshold) {
+    offsets = compute_sparse_offsets<edge_t>(edgelist_majors.begin(),
+                                             edgelist_majors.end(),
+                                             major_range_first,
+                                             major_range_last,
+                                             false,
+                                             stream_view);
+
+    auto pivot = major_range_first + static_cast<vertex_t>(thrust::distance(
+                                       offsets.begin(),
+                                       thrust::lower_bound(rmm::exec_policy(stream_view),
+                                                           offsets.begin(),
+                                                           offsets.end(),
+                                                           edgelist_minors.size() / 2)));
+    auto second_first =
+      detail::mem_frugal_partition(pair_first,
+                                   pair_first + edgelist_minors.size(),
+                                   get_dataframe_buffer_begin(edgelist_values),
+                                   thrust_tuple_get<thrust::tuple<vertex_t, vertex_t>, 0>{},
+                                   pivot,
+                                   stream_view);
+    thrust::sort_by_key(rmm::exec_policy(stream_view),
+                        pair_first,
+                        std::get<0>(second_first),
+                        get_dataframe_buffer_begin(edgelist_values));
+    thrust::sort_by_key(rmm::exec_policy(stream_view),
+                        std::get<0>(second_first),
+                        pair_first + edgelist_minors.size(),
+                        std::get<1>(second_first));
+  } else {
+    thrust::sort_by_key(rmm::exec_policy(stream_view),
+                        pair_first,
+                        pair_first + edgelist_minors.size(),
+                        get_dataframe_buffer_begin(edgelist_values));
+
+    offsets = compute_sparse_offsets<edge_t>(edgelist_majors.begin(),
+                                             edgelist_majors.end(),
+                                             major_range_first,
+                                             major_range_last,
+                                             true,
+                                             stream_view);
+  }
+  indices = std::move(edgelist_minors);
+  values  = std::move(edgelist_values);
+
+  edgelist_majors.resize(0, stream_view);
+  edgelist_majors.shrink_to_fit(stream_view);
 
   std::optional<rmm::device_uvector<vertex_t>> dcs_nzd_vertices{std::nullopt};
   if (major_hypersparse_first) {
@@ -226,47 +223,61 @@ compress_edgelist(
 }
 
 // compress edge list (COO) to CSR (or CSC) or CSR + DCSR (CSC + DCSC) hybrid
-template <typename edge_t, bool store_transposed, typename VertexIterator>
-std::tuple<
-  rmm::device_uvector<edge_t>,
-  rmm::device_uvector<typename thrust::iterator_traits<VertexIterator>::value_type>,
-  std::optional<rmm::device_uvector<typename thrust::iterator_traits<VertexIterator>::value_type>>>
-compress_edgelist(
-  VertexIterator edgelist_src_first,
-  VertexIterator edgelist_src_last,
-  VertexIterator edgelist_dst_first,
-  typename thrust::iterator_traits<VertexIterator>::value_type major_range_first,
-  std::optional<typename thrust::iterator_traits<VertexIterator>::value_type>
-    major_hypersparse_first,
-  typename thrust::iterator_traits<VertexIterator>::value_type major_range_last,
-  typename thrust::iterator_traits<VertexIterator>::value_type /* minor_range_first */,
-  typename thrust::iterator_traits<VertexIterator>::value_type /* minor_range_last */,
-  rmm::cuda_stream_view stream_view)
+template <typename vertex_t, typename edge_t, bool store_transposed>
+std::tuple<rmm::device_uvector<edge_t>,
+           rmm::device_uvector<vertex_t>,
+           std::optional<rmm::device_uvector<vertex_t>>>
+sort_and_compress_edgelist(rmm::device_uvector<vertex_t>&& edgelist_srcs,
+                           rmm::device_uvector<vertex_t>&& edgelist_dsts,
+                           vertex_t major_range_first,
+                           std::optional<vertex_t> major_hypersparse_first,
+                           vertex_t major_range_last,
+                           vertex_t /* minor_range_first */,
+                           vertex_t /* minor_range_last */,
+                           size_t mem_frugal_threshold,
+                           rmm::cuda_stream_view stream_view)
 {
-  using vertex_t = std::remove_cv_t<typename thrust::iterator_traits<VertexIterator>::value_type>;
-
-  auto number_of_edges =
-    static_cast<edge_t>(thrust::distance(edgelist_src_first, edgelist_src_last));
-
-  auto offsets = compute_sparse_offsets<edge_t>(
-    store_transposed ? edgelist_dst_first : edgelist_src_first,
-    store_transposed ? edgelist_dst_first + number_of_edges : edgelist_src_last,
-    major_range_first,
-    major_range_last,
-    stream_view);
-
-  rmm::device_uvector<vertex_t> indices(number_of_edges, stream_view);
-  thrust::fill(rmm::exec_policy(stream_view), indices.begin(), indices.end(), vertex_t{0});
-
-  auto offset_view = raft::device_span<edge_t>(offsets.data(), offsets.size());
-  auto index_view  = raft::device_span<vertex_t>(indices.data(), indices.size());
-  auto edge_first =
-    thrust::make_zip_iterator(thrust::make_tuple(edgelist_src_first, edgelist_dst_first));
-  thrust::for_each(rmm::exec_policy(stream_view),
-                   edge_first,
-                   edge_first + number_of_edges,
-                   update_edge_t<store_transposed, vertex_t, edge_t, decltype(edge_first), void*>{
-                     offset_view, index_view, static_cast<void*>(nullptr), major_range_first});
+  auto edgelist_majors = std::move(store_transposed ? edgelist_dsts : edgelist_srcs);
+  auto edgelist_minors = std::move(store_transposed ? edgelist_srcs : edgelist_dsts);
+
+  rmm::device_uvector<edge_t> offsets(0, stream_view);
+  rmm::device_uvector<vertex_t> indices(0, stream_view);
+  auto edge_first = thrust::make_zip_iterator(edgelist_majors.begin(), edgelist_minors.begin());
+  if (edgelist_minors.size() > mem_frugal_threshold) {
+    offsets = compute_sparse_offsets<edge_t>(edgelist_majors.begin(),
+                                             edgelist_majors.end(),
+                                             major_range_first,
+                                             major_range_last,
+                                             false,
+                                             stream_view);
+
+    auto pivot = major_range_first + static_cast<vertex_t>(thrust::distance(
+                                       offsets.begin(),
+                                       thrust::lower_bound(rmm::exec_policy(stream_view),
+                                                           offsets.begin(),
+                                                           offsets.end(),
+                                                           edgelist_minors.size() / 2)));
+    auto second_first =
+      detail::mem_frugal_partition(edge_first,
+                                   edge_first + edgelist_minors.size(),
+                                   thrust_tuple_get<thrust::tuple<vertex_t, vertex_t>, 0>{},
+                                   pivot,
+                                   stream_view);
+    thrust::sort(rmm::exec_policy(stream_view), edge_first, second_first);
+    thrust::sort(rmm::exec_policy(stream_view), second_first, edge_first + edgelist_minors.size());
+  } else {
+    thrust::sort(rmm::exec_policy(stream_view), edge_first, edge_first + edgelist_minors.size());
+    offsets = compute_sparse_offsets<edge_t>(edgelist_majors.begin(),
+                                             edgelist_majors.end(),
+                                             major_range_first,
+                                             major_range_last,
+                                             true,
+                                             stream_view);
+  }
+  indices = std::move(edgelist_minors);
+
+  edgelist_majors.resize(0, stream_view);
+  edgelist_majors.shrink_to_fit(stream_view);
 
   std::optional<rmm::device_uvector<vertex_t>> dcs_nzd_vertices{std::nullopt};
   if (major_hypersparse_first) {
diff --git a/cpp/src/structure/induced_subgraph_impl.cuh b/cpp/src/structure/induced_subgraph_impl.cuh
index 950cca5828d..18e1af32a71 100644
--- a/cpp/src/structure/induced_subgraph_impl.cuh
+++ b/cpp/src/structure/induced_subgraph_impl.cuh
@@ -196,6 +196,7 @@ extract_induced_subgraphs(
                                              graph_ids_v.end(),
                                              size_t{0},
                                              size_t{subgraph_offsets.size() - 1},
+                                             true,
                                              handle.get_stream());
 
     dst_subgraph_offsets =
@@ -290,6 +291,7 @@ extract_induced_subgraphs(
                                            subgraph_edge_graph_ids.end(),
                                            size_t{0},
                                            size_t{subgraph_offsets.size() - 1},
+                                           true,
                                            handle.get_stream());
 
 #ifdef TIMING
diff --git a/cpp/src/structure/renumber_edgelist_impl.cuh b/cpp/src/structure/renumber_edgelist_impl.cuh
index 6bc19ff4fe1..09a4dae6c64 100644
--- a/cpp/src/structure/renumber_edgelist_impl.cuh
+++ b/cpp/src/structure/renumber_edgelist_impl.cuh
@@ -367,18 +367,19 @@ std::tuple<rmm::device_uvector<vertex_t>, std::vector<vertex_t>, vertex_t> compu
   rmm::device_uvector<edge_t> sorted_local_vertex_degrees(0, handle.get_stream());
   std::optional<std::vector<size_t>> stream_pool_indices{
     std::nullopt};  // FIXME: move this inside the if statement
+
+  auto constexpr num_chunks = size_t{
+    2};  // tuning parameter, this trade-offs # binary searches (up to num_chunks times more binary
+         // searches can be necessary if num_unique_majors << edgelist_edge_counts[i]) and temporary
+         // buffer requirement (cut by num_chunks times), currently set to 2 to avoid peak memory
+         // usage happening in this part (especially when minor_comm_size is small)
+
   if constexpr (multi_gpu) {
     auto& comm                 = handle.get_comms();
     auto& minor_comm           = handle.get_subcomm(cugraph::partition_manager::minor_comm_name());
     auto const minor_comm_rank = minor_comm.get_rank();
     auto const minor_comm_size = minor_comm.get_size();
 
-    auto constexpr num_chunks = size_t{
-      2};  // tuning parameter, this trade-offs # binary searches (up to num_chunks times more
-           // binary searches can be necessary if num_unique_majors << edgelist_edge_counts[i]) and
-           // temporary buffer requirement (cut by num_chunks times), currently set to 2 to avoid
-           // peak memory usage happening in this part (especially when minor_comm_size is small)
-
     assert(edgelist_majors.size() == minor_comm_size);
 
     auto edge_partition_major_range_sizes =
@@ -433,29 +434,30 @@ std::tuple<rmm::device_uvector<vertex_t>, std::vector<vertex_t>, vertex_t> compu
                    sorted_major_degrees.end(),
                    edge_t{0});
 
-      rmm::device_uvector<vertex_t> tmp_majors(
+      rmm::device_uvector<vertex_t> tmp_majors(0, loop_stream);
+      tmp_majors.reserve(
         (static_cast<size_t>(edgelist_edge_counts[i]) + (num_chunks - 1)) / num_chunks,
-        handle.get_stream());
+        loop_stream);
       size_t offset{0};
       for (size_t j = 0; j < num_chunks; ++j) {
         size_t this_chunk_size =
-          std::min(tmp_majors.size(), static_cast<size_t>(edgelist_edge_counts[i]) - offset);
+          std::min(tmp_majors.capacity(), static_cast<size_t>(edgelist_edge_counts[i]) - offset);
+        tmp_majors.resize(this_chunk_size, loop_stream);
         thrust::copy(rmm::exec_policy(loop_stream),
                      edgelist_majors[i] + offset,
-                     edgelist_majors[i] + offset + this_chunk_size,
+                     edgelist_majors[i] + offset + tmp_majors.size(),
                      tmp_majors.begin());
-        thrust::sort(
-          rmm::exec_policy(loop_stream), tmp_majors.begin(), tmp_majors.begin() + this_chunk_size);
+        thrust::sort(rmm::exec_policy(loop_stream), tmp_majors.begin(), tmp_majors.end());
         auto num_unique_majors =
           thrust::count_if(rmm::exec_policy(loop_stream),
                            thrust::make_counting_iterator(size_t{0}),
-                           thrust::make_counting_iterator(this_chunk_size),
+                           thrust::make_counting_iterator(tmp_majors.size()),
                            is_first_in_run_t<vertex_t const*>{tmp_majors.data()});
         rmm::device_uvector<vertex_t> tmp_keys(num_unique_majors, loop_stream);
         rmm::device_uvector<edge_t> tmp_values(num_unique_majors, loop_stream);
         thrust::reduce_by_key(rmm::exec_policy(loop_stream),
                               tmp_majors.begin(),
-                              tmp_majors.begin() + this_chunk_size,
+                              tmp_majors.end(),
                               thrust::make_constant_iterator(edge_t{1}),
                               tmp_keys.begin(),
                               tmp_values.begin());
@@ -486,44 +488,50 @@ std::tuple<rmm::device_uvector<vertex_t>, std::vector<vertex_t>, vertex_t> compu
   } else {
     assert(edgelist_majors.size() == 1);
 
-    rmm::device_uvector<vertex_t> tmp_majors(edgelist_edge_counts[0], handle.get_stream());
-    thrust::copy(handle.get_thrust_policy(),
-                 edgelist_majors[0],
-                 edgelist_majors[0] + edgelist_edge_counts[0],
-                 tmp_majors.begin());
-    thrust::sort(handle.get_thrust_policy(), tmp_majors.begin(), tmp_majors.end());
-    auto num_unique_majors =
-      thrust::count_if(handle.get_thrust_policy(),
-                       thrust::make_counting_iterator(size_t{0}),
-                       thrust::make_counting_iterator(tmp_majors.size()),
-                       is_first_in_run_t<vertex_t const*>{tmp_majors.data()});
-    rmm::device_uvector<vertex_t> tmp_keys(num_unique_majors, handle.get_stream());
-    rmm::device_uvector<edge_t> tmp_values(num_unique_majors, handle.get_stream());
-    thrust::reduce_by_key(handle.get_thrust_policy(),
-                          tmp_majors.begin(),
-                          tmp_majors.end(),
-                          thrust::make_constant_iterator(edge_t{1}),
-                          tmp_keys.begin(),
-                          tmp_values.begin());
-
-    tmp_majors.resize(0, handle.get_stream());
-    tmp_majors.shrink_to_fit(handle.get_stream());
-
     sorted_local_vertex_degrees.resize(sorted_local_vertices.size(), handle.get_stream());
     thrust::fill(handle.get_thrust_policy(),
                  sorted_local_vertex_degrees.begin(),
                  sorted_local_vertex_degrees.end(),
                  edge_t{0});
 
-    auto kv_pair_first =
-      thrust::make_zip_iterator(thrust::make_tuple(tmp_keys.begin(), tmp_values.begin()));
-    thrust::for_each(handle.get_thrust_policy(),
-                     kv_pair_first,
-                     kv_pair_first + tmp_keys.size(),
-                     search_and_increment_degree_t<vertex_t, edge_t>{
-                       sorted_local_vertices.data(),
-                       static_cast<vertex_t>(sorted_local_vertices.size()),
-                       sorted_local_vertex_degrees.data()});
+    rmm::device_uvector<vertex_t> tmp_majors(0, handle.get_stream());
+    tmp_majors.reserve(static_cast<size_t>(edgelist_edge_counts[0] + (num_chunks - 1)) / num_chunks,
+                       handle.get_stream());
+    size_t offset{0};
+    for (size_t i = 0; i < num_chunks; ++i) {
+      size_t this_chunk_size =
+        std::min(tmp_majors.capacity(), static_cast<size_t>(edgelist_edge_counts[0]) - offset);
+      tmp_majors.resize(this_chunk_size, handle.get_stream());
+      thrust::copy(handle.get_thrust_policy(),
+                   edgelist_majors[0] + offset,
+                   edgelist_majors[0] + offset + tmp_majors.size(),
+                   tmp_majors.begin());
+      thrust::sort(handle.get_thrust_policy(), tmp_majors.begin(), tmp_majors.end());
+      auto num_unique_majors =
+        thrust::count_if(handle.get_thrust_policy(),
+                         thrust::make_counting_iterator(size_t{0}),
+                         thrust::make_counting_iterator(tmp_majors.size()),
+                         is_first_in_run_t<vertex_t const*>{tmp_majors.data()});
+      rmm::device_uvector<vertex_t> tmp_keys(num_unique_majors, handle.get_stream());
+      rmm::device_uvector<edge_t> tmp_values(num_unique_majors, handle.get_stream());
+      thrust::reduce_by_key(handle.get_thrust_policy(),
+                            tmp_majors.begin(),
+                            tmp_majors.end(),
+                            thrust::make_constant_iterator(edge_t{1}),
+                            tmp_keys.begin(),
+                            tmp_values.begin());
+
+      auto kv_pair_first =
+        thrust::make_zip_iterator(thrust::make_tuple(tmp_keys.begin(), tmp_values.begin()));
+      thrust::for_each(handle.get_thrust_policy(),
+                       kv_pair_first,
+                       kv_pair_first + tmp_keys.size(),
+                       search_and_increment_degree_t<vertex_t, edge_t>{
+                         sorted_local_vertices.data(),
+                         static_cast<vertex_t>(sorted_local_vertices.size()),
+                         sorted_local_vertex_degrees.data()});
+      offset += this_chunk_size;
+    }
   }
 
   // 4. sort local vertices by degree (descending)
diff --git a/cpp/tests/community/mg_egonet_test.cu b/cpp/tests/community/mg_egonet_test.cu
index 42a2bba1181..6660eac3cad 100644
--- a/cpp/tests/community/mg_egonet_test.cu
+++ b/cpp/tests/community/mg_egonet_test.cu
@@ -215,6 +215,7 @@ class Tests_MGEgonet
                                                           graph_ids_v.end(),
                                                           size_t{0},
                                                           d_mg_edgelist_offsets.size() - 1,
+                                                          true,
                                                           handle_->get_stream());
 
         auto [d_reference_src, d_reference_dst, d_reference_wgt, d_reference_offsets] =
diff --git a/cpp/tests/structure/mg_induced_subgraph_test.cu b/cpp/tests/structure/mg_induced_subgraph_test.cu
index 3f3db7c5278..b7bd22dfa63 100644
--- a/cpp/tests/structure/mg_induced_subgraph_test.cu
+++ b/cpp/tests/structure/mg_induced_subgraph_test.cu
@@ -210,6 +210,7 @@ class Tests_MGInducedSubgraph
                                                         graph_ids_v.end(),
                                                         size_t{0},
                                                         size_t{d_subgraph_offsets.size() - 1},
+                                                        true,
                                                         handle_->get_stream());
 
       auto [sg_graph, sg_edge_weights, sg_number_map] = cugraph::test::mg_graph_to_sg_graph(
diff --git a/cpp/tests/utilities/test_utilities_impl.cuh b/cpp/tests/utilities/test_utilities_impl.cuh
index 3025ca7908b..856c50ad35f 100644
--- a/cpp/tests/utilities/test_utilities_impl.cuh
+++ b/cpp/tests/utilities/test_utilities_impl.cuh
@@ -183,43 +183,42 @@ graph_to_host_csr(
     }
   }
 
+  auto total_global_mem = handle.get_device_properties().totalGlobalMem;
+  size_t element_size   = sizeof(vertex_t) * 2;
+  if (d_wgt) { element_size += sizeof(weight_t); }
+  auto constexpr mem_frugal_ratio =
+    0.25;  // if the expected temporary buffer size exceeds the mem_frugal_ratio of the
+           // total_global_mem, switch to the memory frugal approach
+  auto mem_frugal_threshold =
+    static_cast<size_t>(static_cast<double>(total_global_mem / element_size) * mem_frugal_ratio);
+
   rmm::device_uvector<edge_t> d_offsets(0, handle.get_stream());
 
   if (d_wgt) {
     std::tie(d_offsets, d_dst, *d_wgt, std::ignore) =
-      detail::compress_edgelist<edge_t, store_transposed>(d_src.begin(),
-                                                          d_src.end(),
-                                                          d_dst.begin(),
-                                                          d_wgt->begin(),
-                                                          vertex_t{0},
-                                                          std::optional<vertex_t>{std::nullopt},
-                                                          graph_view.number_of_vertices(),
-                                                          vertex_t{0},
-                                                          graph_view.number_of_vertices(),
-                                                          handle.get_stream());
-
-    // segmented sort neighbors
-    detail::sort_adjacency_list(handle,
-                                raft::device_span<edge_t const>(d_offsets.data(), d_offsets.size()),
-                                d_dst.begin(),
-                                d_dst.end(),
-                                d_wgt->begin());
+      detail::sort_and_compress_edgelist<vertex_t, edge_t, weight_t, store_transposed>(
+        std::move(d_src),
+        std::move(d_dst),
+        std::move(*d_wgt),
+        vertex_t{0},
+        std::optional<vertex_t>{std::nullopt},
+        graph_view.number_of_vertices(),
+        vertex_t{0},
+        graph_view.number_of_vertices(),
+        mem_frugal_threshold,
+        handle.get_stream());
   } else {
     std::tie(d_offsets, d_dst, std::ignore) =
-      detail::compress_edgelist<edge_t, store_transposed>(d_src.begin(),
-                                                          d_src.end(),
-                                                          d_dst.begin(),
-                                                          vertex_t{0},
-                                                          std::optional<vertex_t>{std::nullopt},
-                                                          graph_view.number_of_vertices(),
-                                                          vertex_t{0},
-                                                          graph_view.number_of_vertices(),
-                                                          handle.get_stream());
-    // segmented sort neighbors
-    detail::sort_adjacency_list(handle,
-                                raft::device_span<edge_t const>(d_offsets.data(), d_offsets.size()),
-                                d_dst.begin(),
-                                d_dst.end());
+      detail::sort_and_compress_edgelist<vertex_t, edge_t, store_transposed>(
+        std::move(d_src),
+        std::move(d_dst),
+        vertex_t{0},
+        std::optional<vertex_t>{std::nullopt},
+        graph_view.number_of_vertices(),
+        vertex_t{0},
+        graph_view.number_of_vertices(),
+        mem_frugal_threshold,
+        handle.get_stream());
   }
 
   return std::make_tuple(

From 5c0bc8a19fc3f9904541de6fb9bde95495298eb4 Mon Sep 17 00:00:00 2001
From: Alex Barghi <105237337+alexbarghi-nv@users.noreply.github.com>
Date: Wed, 1 Nov 2023 15:41:03 -0400
Subject: [PATCH 2/3] [BUG] Check if Dask has quit to avoid throwing an
 exception and triggering a segfault on ddp exit (#3961)

Currently, when training with ddp, if dask exits before the `CuGraphStore` is cleaned up, an exception is thrown, which causes ddp to quit with an error, which then causes a segfault, making users think that the workflow has failed when it has actually succeeded.  This bug gracefully displays a warning if the dask dataset can't be deleted, which resolves this issue.

Authors:
  - Alex Barghi (https://github.com/alexbarghi-nv)

Approvers:
  - Vibhu Jawa (https://github.com/VibhuJawa)
  - Tingyu Wang (https://github.com/tingyu66)
  - Rick Ratzel (https://github.com/rlratzel)

URL: https://github.com/rapidsai/cugraph/pull/3961
---
 python/cugraph-pyg/cugraph_pyg/data/cugraph_store.py | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/python/cugraph-pyg/cugraph_pyg/data/cugraph_store.py b/python/cugraph-pyg/cugraph_pyg/data/cugraph_store.py
index fd2172e6ade..6192cd621d5 100644
--- a/python/cugraph-pyg/cugraph_pyg/data/cugraph_store.py
+++ b/python/cugraph-pyg/cugraph_pyg/data/cugraph_store.py
@@ -320,7 +320,13 @@ def __init__(
     def __del__(self):
         if self.__is_graph_owner:
             if isinstance(self.__graph._plc_graph, dict):
-                distributed.get_client().unpublish_dataset("cugraph_graph")
+                try:
+                    distributed.get_client().unpublish_dataset("cugraph_graph")
+                except TypeError:
+                    warnings.warn(
+                        "Could not unpublish graph dataset, most likely because"
+                        " dask has already shut down."
+                    )
             del self.__graph
 
     def __make_offsets(self, input_dict):

From f4bcdc2667a15e2e5031987550322a9d29d8f713 Mon Sep 17 00:00:00 2001
From: Divye Gala <divyegala@gmail.com>
Date: Wed, 1 Nov 2023 19:19:22 -0400
Subject: [PATCH 3/3] Setup Consistent Nightly Versions for Pip and Conda
 (#3933)

See https://github.com/rapidsai/rmm/pull/1347

Authors:
  - Divye Gala (https://github.com/divyegala)

Approvers:
  - AJ Schmidt (https://github.com/ajschmidt8)
  - Chuck Hastings (https://github.com/ChuckHastings)
  - Rick Ratzel (https://github.com/rlratzel)

URL: https://github.com/rapidsai/cugraph/pull/3933
---
 VERSION                                       |  1 +
 ci/build_cpp.sh                               |  4 ++-
 ci/build_python.sh                            | 15 ++++++++++
 ci/build_wheel.sh                             | 10 ++++---
 ci/release/update-version.sh                  | 22 +++-----------
 conda/recipes/cugraph-dgl/meta.yaml           |  4 +--
 conda/recipes/cugraph-pyg/meta.yaml           |  4 +--
 conda/recipes/cugraph-service/meta.yaml       |  4 +--
 conda/recipes/cugraph/meta.yaml               |  4 +--
 conda/recipes/libcugraph/meta.yaml            |  4 +--
 conda/recipes/nx-cugraph/meta.yaml            |  4 +--
 conda/recipes/pylibcugraph/meta.yaml          |  4 +--
 python/cugraph-dgl/cugraph_dgl/VERSION        |  1 +
 python/cugraph-dgl/cugraph_dgl/__init__.py    |  2 +-
 python/cugraph-dgl/cugraph_dgl/_version.py    | 26 +++++++++++++++++
 python/cugraph-dgl/pyproject.toml             |  5 +++-
 python/cugraph-dgl/setup.py                   |  7 +++--
 python/cugraph-pyg/cugraph_pyg/VERSION        |  1 +
 python/cugraph-pyg/cugraph_pyg/__init__.py    |  2 +-
 python/cugraph-pyg/cugraph_pyg/_version.py    | 26 +++++++++++++++++
 python/cugraph-pyg/pyproject.toml             |  5 +++-
 python/cugraph-pyg/setup.py                   |  4 ++-
 .../client/cugraph_service_client/VERSION     |  1 +
 .../client/cugraph_service_client/__init__.py |  2 +-
 .../client/cugraph_service_client/_version.py | 29 +++++++++++++++++++
 python/cugraph-service/client/pyproject.toml  |  5 +++-
 python/cugraph-service/client/setup.py        |  7 +++--
 .../server/cugraph_service_server/VERSION     |  1 +
 .../server/cugraph_service_server/__init__.py |  2 +-
 .../server/cugraph_service_server/_version.py | 29 +++++++++++++++++++
 python/cugraph-service/server/pyproject.toml  |  6 ++--
 python/cugraph-service/server/setup.py        |  4 ++-
 python/cugraph/cugraph/VERSION                |  1 +
 python/cugraph/cugraph/__init__.py            |  2 +-
 python/cugraph/cugraph/_version.py            | 26 +++++++++++++++++
 python/cugraph/pyproject.toml                 |  5 +++-
 python/cugraph/setup.py                       |  2 +-
 python/nx-cugraph/nx_cugraph/VERSION          |  1 +
 python/nx-cugraph/nx_cugraph/__init__.py      |  2 +-
 python/nx-cugraph/nx_cugraph/_version.py      | 26 +++++++++++++++++
 python/nx-cugraph/pyproject.toml              |  5 +++-
 python/nx-cugraph/setup.py                    |  7 +++--
 python/pylibcugraph/pylibcugraph/VERSION      |  1 +
 python/pylibcugraph/pylibcugraph/__init__.py  |  2 +-
 python/pylibcugraph/pylibcugraph/_version.py  | 26 +++++++++++++++++
 python/pylibcugraph/pyproject.toml            |  5 +++-
 python/pylibcugraph/setup.py                  |  2 +-
 47 files changed, 296 insertions(+), 62 deletions(-)
 create mode 100644 VERSION
 create mode 120000 python/cugraph-dgl/cugraph_dgl/VERSION
 create mode 100644 python/cugraph-dgl/cugraph_dgl/_version.py
 create mode 120000 python/cugraph-pyg/cugraph_pyg/VERSION
 create mode 100644 python/cugraph-pyg/cugraph_pyg/_version.py
 create mode 120000 python/cugraph-service/client/cugraph_service_client/VERSION
 create mode 100644 python/cugraph-service/client/cugraph_service_client/_version.py
 create mode 120000 python/cugraph-service/server/cugraph_service_server/VERSION
 create mode 100644 python/cugraph-service/server/cugraph_service_server/_version.py
 create mode 120000 python/cugraph/cugraph/VERSION
 create mode 100644 python/cugraph/cugraph/_version.py
 create mode 120000 python/nx-cugraph/nx_cugraph/VERSION
 create mode 100644 python/nx-cugraph/nx_cugraph/_version.py
 create mode 120000 python/pylibcugraph/pylibcugraph/VERSION
 create mode 100644 python/pylibcugraph/pylibcugraph/_version.py

diff --git a/VERSION b/VERSION
new file mode 100644
index 00000000000..a193fff41e8
--- /dev/null
+++ b/VERSION
@@ -0,0 +1 @@
+23.12.00
diff --git a/ci/build_cpp.sh b/ci/build_cpp.sh
index 3fb72cac08b..d0d13f99448 100755
--- a/ci/build_cpp.sh
+++ b/ci/build_cpp.sh
@@ -9,8 +9,10 @@ export CMAKE_GENERATOR=Ninja
 
 rapids-print-env
 
+version=$(rapids-generate-version)
+
 rapids-logger "Begin cpp build"
 
-rapids-conda-retry mambabuild conda/recipes/libcugraph
+RAPIDS_PACKAGE_VERSION=${version} rapids-conda-retry mambabuild conda/recipes/libcugraph
 
 rapids-upload-conda-to-s3 cpp
diff --git a/ci/build_python.sh b/ci/build_python.sh
index 62eb6c2ccec..90a40c539ff 100755
--- a/ci/build_python.sh
+++ b/ci/build_python.sh
@@ -11,8 +11,19 @@ rapids-print-env
 
 CPP_CHANNEL=$(rapids-download-conda-from-s3 cpp)
 
+version=$(rapids-generate-version)
+git_commit=$(git rev-parse HEAD)
+export RAPIDS_PACKAGE_VERSION=${version}
+echo "${version}" > VERSION
+
 rapids-logger "Begin py build"
 
+package_dir="python"
+for package_name in pylibcugraph cugraph nx-cugraph cugraph-pyg cugraph-dgl; do 
+  underscore_package_name=$(echo "${package_name}" | tr "-" "_")
+  sed -i "/^__git_commit__/ s/= .*/= \"${git_commit}\"/g" "${package_dir}/${package_name}/${underscore_package_name}/_version.py"
+done
+
 # TODO: Remove `--no-test` flags once importing on a CPU
 # node works correctly
 rapids-conda-retry mambabuild \
@@ -40,6 +51,10 @@ rapids-conda-retry mambabuild \
 # built on each CUDA platform to ensure they are included in each set of
 # artifacts, since test scripts only install from one set of artifacts based on
 # the CUDA version used for the test run.
+version_file_cugraph_service_client="python/cugraph-service/client/cugraph_service_client/_version.py"
+sed -i "/^__git_commit__/ s/= .*/= \"${git_commit}\"/g" ${version_file_cugraph_service_client}
+version_file_cugraph_service_server="python/cugraph-service/server/cugraph_service_server/_version.py"
+sed -i "/^__git_commit__/ s/= .*/= \"${git_commit}\"/g" ${version_file_cugraph_service_server}
 rapids-conda-retry mambabuild \
   --no-test \
   --channel "${CPP_CHANNEL}" \
diff --git a/ci/build_wheel.sh b/ci/build_wheel.sh
index 821aa25c1b9..c888c908056 100755
--- a/ci/build_wheel.sh
+++ b/ci/build_wheel.sh
@@ -5,13 +5,13 @@ set -euo pipefail
 
 package_name=$1
 package_dir=$2
+underscore_package_name=$(echo "${package_name}" | tr "-" "_")
 
 source rapids-configure-sccache
 source rapids-date-string
 
-# Use gha-tools rapids-pip-wheel-version to generate wheel version then
-# update the necessary files
-version_override="$(rapids-pip-wheel-version ${RAPIDS_DATE_STRING})"
+version=$(rapids-generate-version)
+git_commit=$(git rev-parse HEAD)
 
 RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})"
 
@@ -21,9 +21,11 @@ PACKAGE_CUDA_SUFFIX="-${RAPIDS_PY_CUDA_SUFFIX}"
 
 # Patch project metadata files to include the CUDA version suffix and version override.
 pyproject_file="${package_dir}/pyproject.toml"
+version_file="${package_dir}/${underscore_package_name}/_version.py"
 
-sed -i "s/^version = .*/version = \"${version_override}\"/g" ${pyproject_file}
 sed -i "s/name = \"${package_name}\"/name = \"${package_name}${PACKAGE_CUDA_SUFFIX}\"/g" ${pyproject_file}
+echo "${version}" > VERSION
+sed -i "/^__git_commit__ / s/= .*/= \"${git_commit}\"/g" ${version_file}
 
 # For nightlies we want to ensure that we're pulling in alphas as well. The
 # easiest way to do so is to augment the spec with a constraint containing a
diff --git a/ci/release/update-version.sh b/ci/release/update-version.sh
index ad6426b66ff..d3dbed6ae46 100755
--- a/ci/release/update-version.sh
+++ b/ci/release/update-version.sh
@@ -54,24 +54,10 @@ sed_runner "s/set(cugraph_version .*)/set(cugraph_version ${NEXT_FULL_TAG})/g" p
 sed_runner 's/version = .*/version = '"'${NEXT_SHORT_TAG}'"'/g' docs/cugraph/source/conf.py
 sed_runner 's/release = .*/release = '"'${NEXT_FULL_TAG}'"'/g' docs/cugraph/source/conf.py
 
-# Python __init__.py updates
-sed_runner "s/__version__ = .*/__version__ = \"${NEXT_FULL_TAG}\"/g" python/cugraph/cugraph/__init__.py
-sed_runner "s/__version__ = .*/__version__ = \"${NEXT_FULL_TAG}\"/g" python/cugraph-dgl/cugraph_dgl/__init__.py
-sed_runner "s/__version__ = .*/__version__ = \"${NEXT_FULL_TAG}\"/g" python/cugraph-pyg/cugraph_pyg/__init__.py
-sed_runner "s/__version__ = .*/__version__ = \"${NEXT_FULL_TAG}\"/g" python/cugraph-service/client/cugraph_service_client/__init__.py
-sed_runner "s/__version__ = .*/__version__ = \"${NEXT_FULL_TAG}\"/g" python/cugraph-service/server/cugraph_service_server/__init__.py
-sed_runner "s/__version__ = .*/__version__ = \"${NEXT_FULL_TAG}\"/g" python/pylibcugraph/pylibcugraph/__init__.py
-sed_runner "s/__version__ = .*/__version__ = \"${NEXT_FULL_TAG}\"/g" python/nx-cugraph/nx_cugraph/__init__.py
-sed_runner "s/__version__ = .*/__version__ = \"${NEXT_FULL_TAG}\"/g" python/nx-cugraph/_nx_cugraph/__init__.py
-
-# Python pyproject.toml updates
-sed_runner "s/^version = .*/version = \"${NEXT_FULL_TAG}\"/g" python/cugraph/pyproject.toml
-sed_runner "s/^version = .*/version = \"${NEXT_FULL_TAG}\"/g" python/cugraph-dgl/pyproject.toml
-sed_runner "s/^version = .*/version = \"${NEXT_FULL_TAG}\"/g" python/cugraph-pyg/pyproject.toml
-sed_runner "s/^version = .*/version = \"${NEXT_FULL_TAG}\"/g" python/cugraph-service/client/pyproject.toml
-sed_runner "s/^version = .*/version = \"${NEXT_FULL_TAG}\"/g" python/cugraph-service/server/pyproject.toml
-sed_runner "s/^version = .*/version = \"${NEXT_FULL_TAG}\"/g" python/pylibcugraph/pyproject.toml
-sed_runner "s/^version = .*/version = \"${NEXT_FULL_TAG}\"/g" python/nx-cugraph/pyproject.toml
+# Centralized version file update
+# NOTE: Any script that runs in CI will need to use gha-tool `rapids-generate-version`
+# and echo it to `VERSION` file to get an alpha spec of the current version
+echo "${NEXT_FULL_TAG}" > VERSION
 
 # Wheel testing script
 sed_runner "s/branch-.*/branch-${NEXT_SHORT_TAG}/g" ci/test_wheel_cugraph.sh
diff --git a/conda/recipes/cugraph-dgl/meta.yaml b/conda/recipes/cugraph-dgl/meta.yaml
index 9e9fcd2faf1..bb85734098a 100644
--- a/conda/recipes/cugraph-dgl/meta.yaml
+++ b/conda/recipes/cugraph-dgl/meta.yaml
@@ -1,6 +1,6 @@
 # Copyright (c) 2023, NVIDIA CORPORATION.
 
-{% set version = environ.get('GIT_DESCRIBE_TAG', '0.0.0.dev').lstrip('v') %}
+{% set version = environ['RAPIDS_PACKAGE_VERSION'].lstrip('v') + environ.get('VERSION_SUFFIX', '') %}
 {% set minor_version = version.split('.')[0] + '.' + version.split('.')[1] %}
 {% set py_version = environ['CONDA_PY'] %}
 {% set date_string = environ['RAPIDS_DATE_STRING'] %}
@@ -10,7 +10,7 @@ package:
   version: {{ version }}
 
 source:
-  git_url: ../../..
+  path: ../../..
 
 build:
   number: {{ GIT_DESCRIBE_NUMBER }}
diff --git a/conda/recipes/cugraph-pyg/meta.yaml b/conda/recipes/cugraph-pyg/meta.yaml
index f4b2e9a4ee9..2714dcfa55a 100644
--- a/conda/recipes/cugraph-pyg/meta.yaml
+++ b/conda/recipes/cugraph-pyg/meta.yaml
@@ -1,6 +1,6 @@
 # Copyright (c) 2022-2023, NVIDIA CORPORATION.
 
-{% set version = environ.get('GIT_DESCRIBE_TAG', '0.0.0.dev').lstrip('v') %}
+{% set version = environ['RAPIDS_PACKAGE_VERSION'].lstrip('v') + environ.get('VERSION_SUFFIX', '') %}
 {% set minor_version = version.split('.')[0] + '.' + version.split('.')[1] %}
 {% set py_version = environ['CONDA_PY'] %}
 {% set date_string = environ['RAPIDS_DATE_STRING'] %}
@@ -10,7 +10,7 @@ package:
   version: {{ version }}
 
 source:
-  git_url: ../../..
+  path: ../../..
 
 build:
   number: {{ GIT_DESCRIBE_NUMBER }}
diff --git a/conda/recipes/cugraph-service/meta.yaml b/conda/recipes/cugraph-service/meta.yaml
index 3d001e83e1e..ae8074ba7d3 100644
--- a/conda/recipes/cugraph-service/meta.yaml
+++ b/conda/recipes/cugraph-service/meta.yaml
@@ -1,6 +1,6 @@
 # Copyright (c) 2018-2023, NVIDIA CORPORATION.
 
-{% set version = environ.get('GIT_DESCRIBE_TAG', '0.0.0.dev').lstrip('v') %}
+{% set version = environ['RAPIDS_PACKAGE_VERSION'].lstrip('v') + environ.get('VERSION_SUFFIX', '') %}
 {% set minor_version = version.split('.')[0] + '.' + version.split('.')[1] %}
 {% set py_version = environ['CONDA_PY'] %}
 {% set date_string = environ['RAPIDS_DATE_STRING'] %}
@@ -9,7 +9,7 @@ package:
   name: cugraph-service-split
 
 source:
-  git_url: ../../..
+  path: ../../..
 
 outputs:
   - name: cugraph-service-client
diff --git a/conda/recipes/cugraph/meta.yaml b/conda/recipes/cugraph/meta.yaml
index 3691db508d9..65403bc8d73 100644
--- a/conda/recipes/cugraph/meta.yaml
+++ b/conda/recipes/cugraph/meta.yaml
@@ -1,6 +1,6 @@
 # Copyright (c) 2018-2023, NVIDIA CORPORATION.
 
-{% set version = environ.get('GIT_DESCRIBE_TAG', '0.0.0.dev').lstrip('v') %}
+{% set version = environ['RAPIDS_PACKAGE_VERSION'].lstrip('v') + environ.get('VERSION_SUFFIX', '') %}
 {% set minor_version = version.split('.')[0] + '.' + version.split('.')[1] %}
 {% set py_version = environ['CONDA_PY'] %}
 {% set cuda_version = '.'.join(environ['RAPIDS_CUDA_VERSION'].split('.')[:2]) %}
@@ -12,7 +12,7 @@ package:
   version: {{ version }}
 
 source:
-  git_url: ../../..
+  path: ../../..
 
 build:
   number: {{ GIT_DESCRIBE_NUMBER }}
diff --git a/conda/recipes/libcugraph/meta.yaml b/conda/recipes/libcugraph/meta.yaml
index 83c82adf703..66f72e6b6b5 100644
--- a/conda/recipes/libcugraph/meta.yaml
+++ b/conda/recipes/libcugraph/meta.yaml
@@ -1,6 +1,6 @@
 # Copyright (c) 2018-2023, NVIDIA CORPORATION.
 
-{% set version = environ.get('GIT_DESCRIBE_TAG', '0.0.0.dev').lstrip('v') %}
+{% set version = environ['RAPIDS_PACKAGE_VERSION'].lstrip('v') + environ.get('VERSION_SUFFIX', '') %}
 {% set minor_version = version.split('.')[0] + '.' + version.split('.')[1] %}
 {% set cuda_version = '.'.join(environ['RAPIDS_CUDA_VERSION'].split('.')[:2]) %}
 {% set cuda_major = cuda_version.split('.')[0] %}
@@ -10,7 +10,7 @@ package:
   name: libcugraph-split
 
 source:
-  git_url: ../../..
+  path: ../../..
 
 build:
   script_env:
diff --git a/conda/recipes/nx-cugraph/meta.yaml b/conda/recipes/nx-cugraph/meta.yaml
index 556d72e8548..cdb7bc13c23 100644
--- a/conda/recipes/nx-cugraph/meta.yaml
+++ b/conda/recipes/nx-cugraph/meta.yaml
@@ -1,6 +1,6 @@
 # Copyright (c) 2023, NVIDIA CORPORATION.
 
-{% set version = environ.get('GIT_DESCRIBE_TAG', '0.0.0.dev').lstrip('v') %}
+{% set version = environ['RAPIDS_PACKAGE_VERSION'].lstrip('v') + environ.get('VERSION_SUFFIX', '') %}
 {% set minor_version = version.split('.')[0] + '.' + version.split('.')[1] %}
 {% set py_version = environ['CONDA_PY'] %}
 {% set date_string = environ['RAPIDS_DATE_STRING'] %}
@@ -10,7 +10,7 @@ package:
   version: {{ version }}
 
 source:
-  git_url: ../../..
+  path: ../../..
 
 build:
   number: {{ GIT_DESCRIBE_NUMBER }}
diff --git a/conda/recipes/pylibcugraph/meta.yaml b/conda/recipes/pylibcugraph/meta.yaml
index 083998be053..ad59c4de66f 100644
--- a/conda/recipes/pylibcugraph/meta.yaml
+++ b/conda/recipes/pylibcugraph/meta.yaml
@@ -1,6 +1,6 @@
 # Copyright (c) 2023, NVIDIA CORPORATION.
 
-{% set version = environ.get('GIT_DESCRIBE_TAG', '0.0.0.dev').lstrip('v') %}
+{% set version = environ['RAPIDS_PACKAGE_VERSION'].lstrip('v') + environ.get('VERSION_SUFFIX', '') %}
 {% set minor_version = version.split('.')[0] + '.' + version.split('.')[1] %}
 {% set py_version = environ['CONDA_PY'] %}
 {% set cuda_version = '.'.join(environ['RAPIDS_CUDA_VERSION'].split('.')[:2]) %}
@@ -12,7 +12,7 @@ package:
   version: {{ version }}
 
 source:
-  git_url: ../../..
+  path: ../../..
 
 build:
   number: {{ GIT_DESCRIBE_NUMBER }}
diff --git a/python/cugraph-dgl/cugraph_dgl/VERSION b/python/cugraph-dgl/cugraph_dgl/VERSION
new file mode 120000
index 00000000000..d62dc733efd
--- /dev/null
+++ b/python/cugraph-dgl/cugraph_dgl/VERSION
@@ -0,0 +1 @@
+../../../VERSION
\ No newline at end of file
diff --git a/python/cugraph-dgl/cugraph_dgl/__init__.py b/python/cugraph-dgl/cugraph_dgl/__init__.py
index 74be4fdea3f..03ff50896a4 100644
--- a/python/cugraph-dgl/cugraph_dgl/__init__.py
+++ b/python/cugraph-dgl/cugraph_dgl/__init__.py
@@ -20,4 +20,4 @@
 import cugraph_dgl.dataloading
 import cugraph_dgl.nn
 
-__version__ = "23.12.00"
+from cugraph_dgl._version import __git_commit__, __version__
diff --git a/python/cugraph-dgl/cugraph_dgl/_version.py b/python/cugraph-dgl/cugraph_dgl/_version.py
new file mode 100644
index 00000000000..f95a4705467
--- /dev/null
+++ b/python/cugraph-dgl/cugraph_dgl/_version.py
@@ -0,0 +1,26 @@
+# Copyright (c) 2023, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import importlib.resources
+
+# Read VERSION file from the module that is symlinked to VERSION file
+# in the root of the repo at build time or copied to the moudle at
+# installation. VERSION is a separate file that allows CI build-time scripts
+# to update version info (including commit hashes) without modifying
+# source files.
+__version__ = (
+    importlib.resources.files("cugraph_dgl").joinpath("VERSION").read_text().strip()
+)
+__git_commit__ = ""
diff --git a/python/cugraph-dgl/pyproject.toml b/python/cugraph-dgl/pyproject.toml
index fa9e1c5abe5..eff7a20f0aa 100644
--- a/python/cugraph-dgl/pyproject.toml
+++ b/python/cugraph-dgl/pyproject.toml
@@ -10,7 +10,7 @@ build-backend = "setuptools.build_meta"
 
 [project]
 name = "cugraph-dgl"
-version = "23.12.00"
+dynamic = ["version"]
 description = "cugraph extensions for DGL"
 readme = { file = "README.md", content-type = "text/markdown" }
 authors = [
@@ -35,6 +35,9 @@ Documentation = "https://docs.rapids.ai/api/cugraph/stable/"
 [tool.setuptools]
 license-files = ["LICENSE"]
 
+[tool.setuptools.dynamic]
+version = {file = "cugraph_dgl/VERSION"}
+
 [tool.setuptools.packages.find]
 include = [
     "cugraph_dgl*",
diff --git a/python/cugraph-dgl/setup.py b/python/cugraph-dgl/setup.py
index 6991b23b0fb..afb8002af42 100644
--- a/python/cugraph-dgl/setup.py
+++ b/python/cugraph-dgl/setup.py
@@ -11,6 +11,9 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from setuptools import setup
+from setuptools import find_packages, setup
 
-setup()
+packages = find_packages(include=["cugraph_dgl*"])
+setup(
+    package_data={key: ["VERSION"] for key in packages},
+)
diff --git a/python/cugraph-pyg/cugraph_pyg/VERSION b/python/cugraph-pyg/cugraph_pyg/VERSION
new file mode 120000
index 00000000000..d62dc733efd
--- /dev/null
+++ b/python/cugraph-pyg/cugraph_pyg/VERSION
@@ -0,0 +1 @@
+../../../VERSION
\ No newline at end of file
diff --git a/python/cugraph-pyg/cugraph_pyg/__init__.py b/python/cugraph-pyg/cugraph_pyg/__init__.py
index ecd2f271a00..719751c966a 100644
--- a/python/cugraph-pyg/cugraph_pyg/__init__.py
+++ b/python/cugraph-pyg/cugraph_pyg/__init__.py
@@ -11,4 +11,4 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-__version__ = "23.12.00"
+from cugraph_pyg._version import __git_commit__, __version__
diff --git a/python/cugraph-pyg/cugraph_pyg/_version.py b/python/cugraph-pyg/cugraph_pyg/_version.py
new file mode 100644
index 00000000000..963052da909
--- /dev/null
+++ b/python/cugraph-pyg/cugraph_pyg/_version.py
@@ -0,0 +1,26 @@
+# Copyright (c) 2023, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import importlib.resources
+
+# Read VERSION file from the module that is symlinked to VERSION file
+# in the root of the repo at build time or copied to the moudle at
+# installation. VERSION is a separate file that allows CI build-time scripts
+# to update version info (including commit hashes) without modifying
+# source files.
+__version__ = (
+    importlib.resources.files("cugraph_pyg").joinpath("VERSION").read_text().strip()
+)
+__git_commit__ = ""
diff --git a/python/cugraph-pyg/pyproject.toml b/python/cugraph-pyg/pyproject.toml
index 84d30221d55..95b1fa27402 100644
--- a/python/cugraph-pyg/pyproject.toml
+++ b/python/cugraph-pyg/pyproject.toml
@@ -12,7 +12,7 @@ testpaths = ["cugraph_pyg/tests"]
 
 [project]
 name = "cugraph_pyg"
-version = "23.12.00"
+dynamic = ["version"]
 description = "cugraph_pyg - PyG support for cuGraph massive-scale, ultra-fast GPU graph analytics."
 authors = [
     { name = "NVIDIA Corporation" },
@@ -38,6 +38,9 @@ Documentation = "https://docs.rapids.ai/api/cugraph/stable/"
 [tool.setuptools]
 license-files = ["LICENSE"]
 
+[tool.setuptools.dynamic]
+version = {file = "cugraph_pyg/VERSION"}
+
 [tool.setuptools.packages.find]
 include = [
     "cugraph_pyg*",
diff --git a/python/cugraph-pyg/setup.py b/python/cugraph-pyg/setup.py
index 1f7db1d3772..50f023050bf 100644
--- a/python/cugraph-pyg/setup.py
+++ b/python/cugraph-pyg/setup.py
@@ -14,7 +14,7 @@
 import os
 import shutil
 
-from setuptools import Command, setup
+from setuptools import Command, find_packages, setup
 
 from setuputils import get_environment_option
 
@@ -59,6 +59,8 @@ def run(self):
         os.system("rm -rf *.egg-info")
 
 
+packages = find_packages(include=["cugraph_pyg*"])
 setup(
     cmdclass={"clean": CleanCommand},
+    package_data={key: ["VERSION"] for key in packages},
 )
diff --git a/python/cugraph-service/client/cugraph_service_client/VERSION b/python/cugraph-service/client/cugraph_service_client/VERSION
new file mode 120000
index 00000000000..a4e948506b8
--- /dev/null
+++ b/python/cugraph-service/client/cugraph_service_client/VERSION
@@ -0,0 +1 @@
+../../../../VERSION
\ No newline at end of file
diff --git a/python/cugraph-service/client/cugraph_service_client/__init__.py b/python/cugraph-service/client/cugraph_service_client/__init__.py
index a0361abedd3..a9a96ae6c16 100644
--- a/python/cugraph-service/client/cugraph_service_client/__init__.py
+++ b/python/cugraph-service/client/cugraph_service_client/__init__.py
@@ -35,4 +35,4 @@
 from cugraph_service_client.client import CugraphServiceClient
 from cugraph_service_client.remote_graph import RemoteGraph
 
-__version__ = "23.12.00"
+from cugraph_service_client._version import __git_commit__, __version__
diff --git a/python/cugraph-service/client/cugraph_service_client/_version.py b/python/cugraph-service/client/cugraph_service_client/_version.py
new file mode 100644
index 00000000000..344361973bb
--- /dev/null
+++ b/python/cugraph-service/client/cugraph_service_client/_version.py
@@ -0,0 +1,29 @@
+# Copyright (c) 2023, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import importlib.resources
+
+# Read VERSION file from the module that is symlinked to VERSION file
+# in the root of the repo at build time or copied to the moudle at
+# installation. VERSION is a separate file that allows CI build-time scripts
+# to update version info (including commit hashes) without modifying
+# source files.
+__version__ = (
+    importlib.resources.files("cugraph_service_client")
+    .joinpath("VERSION")
+    .read_text()
+    .strip()
+)
+__git_commit__ = ""
diff --git a/python/cugraph-service/client/pyproject.toml b/python/cugraph-service/client/pyproject.toml
index 7f702252f02..59539693877 100644
--- a/python/cugraph-service/client/pyproject.toml
+++ b/python/cugraph-service/client/pyproject.toml
@@ -10,7 +10,7 @@ build-backend = "setuptools.build_meta"
 
 [project]
 name = "cugraph-service-client"
-version = "23.12.00"
+dynamic = ["version"]
 description = "cuGraph Service client"
 readme = { file = "README.md", content-type = "text/markdown" }
 authors = [
@@ -35,6 +35,9 @@ Documentation = "https://docs.rapids.ai/api/cugraph/stable/"
 [tool.setuptools]
 license-files = ["LICENSE"]
 
+[tool.setuptools.dynamic]
+version = {file = "cugraph_service_client/VERSION"}
+
 [tool.setuptools.packages.find]
 include = [
     "cugraph_service_client",
diff --git a/python/cugraph-service/client/setup.py b/python/cugraph-service/client/setup.py
index 811a12c50b7..61c758cef4a 100644
--- a/python/cugraph-service/client/setup.py
+++ b/python/cugraph-service/client/setup.py
@@ -11,6 +11,9 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from setuptools import setup
+from setuptools import find_packages, setup
 
-setup()
+packages = find_packages(include=["cugraph_service_client*"])
+setup(
+    package_data={key: ["VERSION"] for key in packages},
+)
diff --git a/python/cugraph-service/server/cugraph_service_server/VERSION b/python/cugraph-service/server/cugraph_service_server/VERSION
new file mode 120000
index 00000000000..a4e948506b8
--- /dev/null
+++ b/python/cugraph-service/server/cugraph_service_server/VERSION
@@ -0,0 +1 @@
+../../../../VERSION
\ No newline at end of file
diff --git a/python/cugraph-service/server/cugraph_service_server/__init__.py b/python/cugraph-service/server/cugraph_service_server/__init__.py
index 87d35005195..02473f0ea47 100644
--- a/python/cugraph-service/server/cugraph_service_server/__init__.py
+++ b/python/cugraph-service/server/cugraph_service_server/__init__.py
@@ -61,4 +61,4 @@ def start_server_blocking(
     server.serve()  # blocks until Ctrl-C (kill -2)
 
 
-__version__ = "23.12.00"
+from cugraph_service_server._version import __git_commit__, __version__
diff --git a/python/cugraph-service/server/cugraph_service_server/_version.py b/python/cugraph-service/server/cugraph_service_server/_version.py
new file mode 100644
index 00000000000..7da31f78767
--- /dev/null
+++ b/python/cugraph-service/server/cugraph_service_server/_version.py
@@ -0,0 +1,29 @@
+# Copyright (c) 2023, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import importlib.resources
+
+# Read VERSION file from the module that is symlinked to VERSION file
+# in the root of the repo at build time or copied to the moudle at
+# installation. VERSION is a separate file that allows CI build-time scripts
+# to update version info (including commit hashes) without modifying
+# source files.
+__version__ = (
+    importlib.resources.files("cugraph_service_server")
+    .joinpath("VERSION")
+    .read_text()
+    .strip()
+)
+__git_commit__ = ""
diff --git a/python/cugraph-service/server/pyproject.toml b/python/cugraph-service/server/pyproject.toml
index 3c77cf01c2c..f50b33b3f15 100644
--- a/python/cugraph-service/server/pyproject.toml
+++ b/python/cugraph-service/server/pyproject.toml
@@ -10,7 +10,7 @@ build-backend = "setuptools.build_meta"
 
 [project]
 name = "cugraph-service-server"
-version = "23.12.00"
+dynamic = ["version", "entry-points"]
 description = "cuGraph Service server"
 readme = { file = "README.md", content-type = "text/markdown" }
 authors = [
@@ -39,7 +39,6 @@ classifiers = [
     "Programming Language :: Python :: 3.9",
     "Programming Language :: Python :: 3.10",
 ]
-dynamic = ["entry-points"]
 
 [project.optional-dependencies]
 test = [
@@ -62,6 +61,9 @@ Documentation = "https://docs.rapids.ai/api/cugraph/stable/"
 [tool.setuptools]
 license-files = ["LICENSE"]
 
+[tool.setuptools.dynamic]
+version = {file = "cugraph_service_server/VERSION"}
+
 [tool.setuptools.packages.find]
 include = [
     "cugraph_service_server",
diff --git a/python/cugraph-service/server/setup.py b/python/cugraph-service/server/setup.py
index 5203b76c659..91864168e2c 100644
--- a/python/cugraph-service/server/setup.py
+++ b/python/cugraph-service/server/setup.py
@@ -11,12 +11,14 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from setuptools import setup
+from setuptools import find_packages, setup
 
+packages = find_packages(include=["cugraph_service_server*"])
 setup(
     entry_points={
         "console_scripts": [
             "cugraph-service-server=cugraph_service_server.__main__:main"
         ],
     },
+    package_data={key: ["VERSION"] for key in packages},
 )
diff --git a/python/cugraph/cugraph/VERSION b/python/cugraph/cugraph/VERSION
new file mode 120000
index 00000000000..d62dc733efd
--- /dev/null
+++ b/python/cugraph/cugraph/VERSION
@@ -0,0 +1 @@
+../../../VERSION
\ No newline at end of file
diff --git a/python/cugraph/cugraph/__init__.py b/python/cugraph/cugraph/__init__.py
index f3a335183f3..f635d215696 100644
--- a/python/cugraph/cugraph/__init__.py
+++ b/python/cugraph/cugraph/__init__.py
@@ -120,4 +120,4 @@
 
 from cugraph import exceptions
 
-__version__ = "23.12.00"
+from cugraph._version import __git_commit__, __version__
diff --git a/python/cugraph/cugraph/_version.py b/python/cugraph/cugraph/_version.py
new file mode 100644
index 00000000000..710afb87e29
--- /dev/null
+++ b/python/cugraph/cugraph/_version.py
@@ -0,0 +1,26 @@
+# Copyright (c) 2023, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import importlib.resources
+
+# Read VERSION file from the module that is symlinked to VERSION file
+# in the root of the repo at build time or copied to the moudle at
+# installation. VERSION is a separate file that allows CI build-time scripts
+# to update version info (including commit hashes) without modifying
+# source files.
+__version__ = (
+    importlib.resources.files("cugraph").joinpath("VERSION").read_text().strip()
+)
+__git_commit__ = ""
diff --git a/python/cugraph/pyproject.toml b/python/cugraph/pyproject.toml
index 5e17be40e7b..aaa301fa05f 100644
--- a/python/cugraph/pyproject.toml
+++ b/python/cugraph/pyproject.toml
@@ -20,7 +20,7 @@ testpaths = ["cugraph/tests"]
 
 [project]
 name = "cugraph"
-version = "23.12.00"
+dynamic = ["version"]
 description = "cuGraph - RAPIDS GPU Graph Analytics"
 readme = { file = "README.md", content-type = "text/markdown" }
 authors = [
@@ -69,3 +69,6 @@ Documentation = "https://docs.rapids.ai/api/cugraph/stable/"
 
 [tool.setuptools]
 license-files = ["LICENSE"]
+
+[tool.setuptools.dynamic]
+version = {file = "cugraph/VERSION"}
diff --git a/python/cugraph/setup.py b/python/cugraph/setup.py
index aa3a5fb56a7..81916444cfd 100644
--- a/python/cugraph/setup.py
+++ b/python/cugraph/setup.py
@@ -46,7 +46,7 @@ def run(self):
 packages = find_packages(include=["cugraph*"])
 setup(
     packages=packages,
-    package_data={key: ["*.pxd", "*.yaml"] for key in packages},
+    package_data={key: ["VERSION", "*.pxd", "*.yaml"] for key in packages},
     cmdclass={"clean": CleanCommand},
     zip_safe=False,
 )
diff --git a/python/nx-cugraph/nx_cugraph/VERSION b/python/nx-cugraph/nx_cugraph/VERSION
new file mode 120000
index 00000000000..d62dc733efd
--- /dev/null
+++ b/python/nx-cugraph/nx_cugraph/VERSION
@@ -0,0 +1 @@
+../../../VERSION
\ No newline at end of file
diff --git a/python/nx-cugraph/nx_cugraph/__init__.py b/python/nx-cugraph/nx_cugraph/__init__.py
index 25d44212264..3a8f0996e9c 100644
--- a/python/nx-cugraph/nx_cugraph/__init__.py
+++ b/python/nx-cugraph/nx_cugraph/__init__.py
@@ -29,4 +29,4 @@
 from . import algorithms
 from .algorithms import *
 
-__version__ = "23.12.00"
+from nx_cugraph._version import __git_commit__, __version__
diff --git a/python/nx-cugraph/nx_cugraph/_version.py b/python/nx-cugraph/nx_cugraph/_version.py
new file mode 100644
index 00000000000..868a2e19475
--- /dev/null
+++ b/python/nx-cugraph/nx_cugraph/_version.py
@@ -0,0 +1,26 @@
+# Copyright (c) 2023, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import importlib.resources
+
+# Read VERSION file from the module that is symlinked to VERSION file
+# in the root of the repo at build time or copied to the moudle at
+# installation. VERSION is a separate file that allows CI build-time scripts
+# to update version info (including commit hashes) without modifying
+# source files.
+__version__ = (
+    importlib.resources.files("nx_cugraph").joinpath("VERSION").read_text().strip()
+)
+__git_commit__ = ""
diff --git a/python/nx-cugraph/pyproject.toml b/python/nx-cugraph/pyproject.toml
index 7e51efd4fe4..f309f4797a7 100644
--- a/python/nx-cugraph/pyproject.toml
+++ b/python/nx-cugraph/pyproject.toml
@@ -10,7 +10,7 @@ build-backend = "setuptools.build_meta"
 
 [project]
 name = "nx-cugraph"
-version = "23.12.00"
+dynamic = ["version"]
 description = "cugraph backend for NetworkX"
 readme = { file = "README.md", content-type = "text/markdown" }
 authors = [
@@ -61,6 +61,9 @@ cugraph = "_nx_cugraph:get_info"
 [tool.setuptools]
 license-files = ["LICENSE"]
 
+[tool.setuptools.dynamic]
+version = {file = "nx_cugraph/VERSION"}
+
 [tool.setuptools.packages.find]
 include = [
     "nx_cugraph*",
diff --git a/python/nx-cugraph/setup.py b/python/nx-cugraph/setup.py
index 87c0e10646d..c4ab535923b 100644
--- a/python/nx-cugraph/setup.py
+++ b/python/nx-cugraph/setup.py
@@ -10,6 +10,9 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from setuptools import setup
+from setuptools import find_packages, setup
 
-setup()
+packages = find_packages(include=["nx_cugraph*"])
+setup(
+    package_data={key: ["VERSION"] for key in packages},
+)
diff --git a/python/pylibcugraph/pylibcugraph/VERSION b/python/pylibcugraph/pylibcugraph/VERSION
new file mode 120000
index 00000000000..d62dc733efd
--- /dev/null
+++ b/python/pylibcugraph/pylibcugraph/VERSION
@@ -0,0 +1 @@
+../../../VERSION
\ No newline at end of file
diff --git a/python/pylibcugraph/pylibcugraph/__init__.py b/python/pylibcugraph/pylibcugraph/__init__.py
index 2aec0b98a25..30f1c2d0fb1 100644
--- a/python/pylibcugraph/pylibcugraph/__init__.py
+++ b/python/pylibcugraph/pylibcugraph/__init__.py
@@ -98,4 +98,4 @@
 
 from pylibcugraph import exceptions
 
-__version__ = "23.12.00"
+from pylibcugraph._version import __git_commit__, __version__
diff --git a/python/pylibcugraph/pylibcugraph/_version.py b/python/pylibcugraph/pylibcugraph/_version.py
new file mode 100644
index 00000000000..5dca7e48b3f
--- /dev/null
+++ b/python/pylibcugraph/pylibcugraph/_version.py
@@ -0,0 +1,26 @@
+# Copyright (c) 2023, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import importlib.resources
+
+# Read VERSION file from the module that is symlinked to VERSION file
+# in the root of the repo at build time or copied to the moudle at
+# installation. VERSION is a separate file that allows CI build-time scripts
+# to update version info (including commit hashes) without modifying
+# source files.
+__version__ = (
+    importlib.resources.files("pylibcugraph").joinpath("VERSION").read_text().strip()
+)
+__git_commit__ = ""
diff --git a/python/pylibcugraph/pyproject.toml b/python/pylibcugraph/pyproject.toml
index f1b439debd2..96f5ec84efb 100644
--- a/python/pylibcugraph/pyproject.toml
+++ b/python/pylibcugraph/pyproject.toml
@@ -19,7 +19,7 @@ testpaths = ["pylibcugraph/tests"]
 
 [project]
 name = "pylibcugraph"
-version = "23.12.00"
+dynamic = ["version"]
 description = "pylibcugraph - Python bindings for the libcugraph cuGraph C/C++/CUDA library"
 readme = { file = "README.md", content-type = "text/markdown" }
 authors = [
@@ -56,3 +56,6 @@ Documentation = "https://docs.rapids.ai/api/cugraph/stable/"
 
 [tool.setuptools]
 license-files = ["LICENSE"]
+
+[tool.setuptools.dynamic]
+version = {file = "pylibcugraph/VERSION"}
diff --git a/python/pylibcugraph/setup.py b/python/pylibcugraph/setup.py
index f1a419f31bb..a6c1bda3b5b 100644
--- a/python/pylibcugraph/setup.py
+++ b/python/pylibcugraph/setup.py
@@ -54,7 +54,7 @@ def exclude_libcxx_symlink(cmake_manifest):
 packages = find_packages(include=["pylibcugraph*"])
 setup(
     packages=packages,
-    package_data={key: ["*.pxd"] for key in packages},
+    package_data={key: ["VERSION", "*.pxd"] for key in packages},
     cmake_process_manifest_hook=exclude_libcxx_symlink,
     cmdclass={"clean": CleanCommand},
     zip_safe=False,