diff --git a/quantile_forest/_quantile_forest.py b/quantile_forest/_quantile_forest.py
index bb61966..1999463 100755
--- a/quantile_forest/_quantile_forest.py
+++ b/quantile_forest/_quantile_forest.py
@@ -51,29 +51,16 @@ class calls the ``fit`` method of the ``ForestRegressor`` and creates a
     param_validation = False
 from sklearn.utils.validation import check_is_fitted
 
-from ._quantile_forest_fast import QuantileForest, generate_unsampled_indices, map_leaf_nodes
+from ._quantile_forest_fast import (
+    QuantileForest,
+    generate_unsampled_indices,
+    group_by_value,
+    map_leaf_nodes,
+)
 
 sklearn_version = parse_version(sklearn.__version__)
 
 
-def _generate_unsampled_indices(sample_indices, n_total_samples, duplicates=None):
-    """Private function used by forest._get_y_train_leaves function."""
-    if duplicates is None:
-        duplicates = []
-    return generate_unsampled_indices(sample_indices, n_total_samples, duplicates)
-
-
-def _group_by_value(a):
-    """Private function used by forest._get_y_train_leaves function."""
-    sort_idx = np.argsort(a)
-    a_sorted = a[sort_idx]
-    unq_first = np.concatenate(([True], a_sorted[1:] != a_sorted[:-1]))
-    unq_items = a_sorted[unq_first]
-    unq_indices = np.flatnonzero(unq_first)
-    unq_idx = np.array_split(sort_idx, unq_indices[1:])
-    return unq_items, unq_idx
-
-
 class BaseForestQuantileRegressor(ForestRegressor):
     """
     Base class for quantile regression forests.
@@ -329,7 +316,7 @@ def _get_y_train_leaves(self, X, y, sorter=None, sample_weight=None):
 
         for i, estimator in enumerate(self.estimators_):
             # Group training indices by leaf node.
-            leaf_indices, leaf_values_list = _group_by_value(X_leaves_bootstrap[:, i])
+            leaf_indices, leaf_values_list = group_by_value(X_leaves_bootstrap[:, i])
 
             if leaf_subsample:
                 random.seed(estimator.random_state)
@@ -552,8 +539,10 @@ def _get_unsampled_indices(self, estimator, duplicates=None):
         sample_indices = _generate_sample_indices(
             estimator.random_state, n_train_samples, n_samples_bootstrap
         )
-        unsampled_indices = _generate_unsampled_indices(
-            sample_indices, n_train_samples, duplicates=duplicates
+        unsampled_indices = generate_unsampled_indices(
+            sample_indices,
+            n_train_samples,
+            duplicates=[] if duplicates is None else duplicates,
         )
         return np.asarray(unsampled_indices)
 
diff --git a/quantile_forest/_quantile_forest_fast.pyx b/quantile_forest/_quantile_forest_fast.pyx
index 5b685cb..2df4c3a 100755
--- a/quantile_forest/_quantile_forest_fast.pyx
+++ b/quantile_forest/_quantile_forest_fast.pyx
@@ -1,4 +1,5 @@
 from libc.math cimport ceil, fabs, floor, round
+from libc.stdlib cimport free, malloc
 from libc.string cimport memset
 from libcpp.algorithm cimport sort as sort_cpp
 from libcpp.map cimport map
@@ -557,6 +558,87 @@ cpdef vector[intp_t] generate_unsampled_indices(
     return unsampled_indices
 
 
+cpdef group_by_value(cnp.ndarray[intp_t, ndim=1] a):
+    """Group indices of a sorted array based on unique values.
+
+    Parameters
+    ----------
+    a : array-like of shape (n_samples)
+        Input array. The array is expected to contain integers, and the
+        function will group the indices of elements with the same value.
+
+    Returns
+    -------
+    np_unq_items : array-like
+        A NumPy array containing the unique values from the input array `a`,
+        sorted in ascending order.
+
+    unq_idx : list of array-like
+        A list of NumPy arrays, where each array contains the indices of the
+        input array `a` corresponding to each unique value in `np_unq_items`.
+        The indices are sorted based on the original order in `a`.
+    """
+    cdef intp_t num_samples
+    cdef intp_t i
+    cdef cnp.ndarray[intp_t, ndim=1] sort_idx
+    cdef cnp.ndarray[intp_t, ndim=1] a_sorted
+    cdef intp_t prev_value
+    cdef intp_t count, unq_count_idx
+    cdef intp_t* unq_count
+    cdef bint* unq_first
+    cdef intp_t* unq_first_indices
+
+    num_samples = a.shape[0]
+    sort_idx = np.argsort(a)
+    a_sorted = a[sort_idx]
+    unq_count_idx = 0
+    unq_count = <intp_t*>malloc(num_samples * sizeof(intp_t))
+    unq_first = <bint*>malloc(num_samples * sizeof(bint))
+    unq_first_indices = <intp_t*>malloc(num_samples * sizeof(intp_t))
+
+    if unq_count == NULL or unq_first == NULL or unq_first_indices == NULL:
+        raise MemoryError("Memory allocation failed.")
+
+    with nogil:
+        # Initialize first element.
+        prev_value = a_sorted[0]
+        unq_first[0] = 1
+        unq_first_indices[0] = 0
+        count = 1
+
+        # Loop through sorted array and identify unique values.
+        for i in range(1, num_samples):
+            if a_sorted[i] != prev_value:
+                unq_first[i] = 1
+                unq_first_indices[unq_count_idx + 1] = i
+                unq_count[unq_count_idx] = count
+                unq_count_idx += 1
+                count = 1
+                prev_value = a_sorted[i]
+            else:
+                unq_first[i] = 0
+                count += 1
+
+        # Assign final count.
+        unq_count[unq_count_idx] = count
+        unq_count_idx += 1
+
+    # Allocate arrays for the output.
+    np_unq_items = np.empty(unq_count_idx, dtype=np.int64)
+    unq_idx = [None] * unq_count_idx
+
+    for i in range(unq_count_idx):
+        np_unq_items[i] = a_sorted[unq_first_indices[i]]
+        unq_idx[i] = sort_idx[unq_first_indices[i]:unq_first_indices[i] + unq_count[i]]
+
+    # Free allocated memory.
+    free(unq_count)
+    free(unq_first)
+    free(unq_first_indices)
+
+    return np_unq_items, unq_idx
+
+
 cpdef map_leaf_nodes(
     cnp.ndarray[intp_t, ndim=3] y_train_leaves,
     cnp.ndarray[intp_t, ndim=2] bootstrap_indices,