diff --git a/cpp/src/text/vocabulary_tokenize.cu b/cpp/src/text/vocabulary_tokenize.cu
index 8913ce22da8..f012f7ce09a 100644
--- a/cpp/src/text/vocabulary_tokenize.cu
+++ b/cpp/src/text/vocabulary_tokenize.cu
@@ -240,10 +240,10 @@ CUDF_KERNEL void token_counts_fn(cudf::column_device_view const d_strings,
     return;
   }
 
-  auto const offsets =
-    d_strings.child(cudf::strings_column_view::offsets_column_index).data<cudf::size_type>();
-  auto const offset      = offsets[str_idx + d_strings.offset()] - offsets[d_strings.offset()];
-  auto const chars_begin = d_strings.data<char>() + offsets[d_strings.offset()];
+  auto const offsets     = d_strings.child(cudf::strings_column_view::offsets_column_index);
+  auto const offsets_itr = cudf::detail::input_offsetalator(offsets.head(), offsets.type());
+  auto const offset = offsets_itr[str_idx + d_strings.offset()] - offsets_itr[d_strings.offset()];
+  auto const chars_begin = d_strings.data<char>() + offsets_itr[d_strings.offset()];
 
   auto const begin        = d_str.data();
   auto const end          = begin + d_str.size_bytes();