rapidsai · rapids-bot · Mar 11, 2024 · Jan 17, 2024 · Jan 17, 2024 · Jan 18, 2024
@@ -15,6 +15,7 @@ jobs:
       - checks
       - conda-cpp-build
       - conda-cpp-tests
+      - conda-cpp-checks
       - conda-notebook-tests
       - conda-python-build
       - conda-python-tests
@@ -52,6 +53,14 @@ jobs:
     uses: rapidsai/shared-workflows/.github/workflows/[email protected]
     with:
       build_type: pull-request
+  conda-cpp-checks:
+    needs: conda-cpp-build
+    secrets: inherit
+    uses: rapidsai/shared-workflows/.github/workflows/[email protected]
+    with:
+      build_type: pull-request
+      enable_check_symbols: true
+      symbol_exclusions: (cugraph::ops|hornet|void writeEdgeCountsKernel|void markUniqueOffsetsKernel)
   conda-python-build:
     needs: conda-cpp-build
     secrets: inherit

@@ -14,6 +14,16 @@ on:
         type: string
 
 jobs:
+  conda-cpp-checks:
+    secrets: inherit
+    uses: rapidsai/shared-workflows/.github/workflows/[email protected]
+    with:
+      build_type: nightly
+      branch: ${{ inputs.branch }}
+      date: ${{ inputs.date }}
+      sha: ${{ inputs.sha }}
+      enable_check_symbols: true
+      symbol_exclusions: (cugraph::ops|hornet|void writeEdgeCountsKernel|void markUniqueOffsetsKernel)
   conda-cpp-tests:
     secrets: inherit
     uses: rapidsai/shared-workflows/.github/workflows/[email protected]

@@ -44,7 +44,7 @@ namespace detail {
 int32_t constexpr decompress_edge_partition_block_size = 1024;
 
 template <typename vertex_t, typename edge_t, bool multi_gpu>
-__global__ void decompress_to_edgelist_mid_degree(
+__global__ static void decompress_to_edgelist_mid_degree(
   edge_partition_device_view_t<vertex_t, edge_t, multi_gpu> edge_partition,
   vertex_t major_range_first,
   vertex_t major_range_last,
@@ -74,7 +74,7 @@ __global__ void decompress_to_edgelist_mid_degree(
 }
 
 template <typename vertex_t, typename edge_t, bool multi_gpu>
-__global__ void decompress_to_edgelist_high_degree(
+__global__ static void decompress_to_edgelist_high_degree(
   edge_partition_device_view_t<vertex_t, edge_t, multi_gpu> edge_partition,
   vertex_t major_range_first,
   vertex_t major_range_last,

@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2017-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -131,7 +131,7 @@ __forceinline__ __device__ void store_pair_vectorized(pair_type* __restrict__ co
 }
 
 template <typename value_type, typename size_type, typename key_type, typename elem_type>
-__global__ void init_hashtbl(value_type* __restrict__ const hashtbl_values,
+__global__ static void init_hashtbl(value_type* __restrict__ const hashtbl_values,
                              const size_type n,
                              const key_type key_val,
                              const elem_type elem_val)

@@ -270,7 +270,7 @@ __device__ __inline__ int32_t validate_ht_col_insert(volatile int32_t* ptr_col)
   return col;
 }
 
-__global__ void concat_and_create_histogram(int8_t* col_1,
+__global__ static void concat_and_create_histogram(int8_t* col_1,
                                             int32_t* offset_1,
                                             int8_t* col_2,
                                             int32_t* offset_2,
@@ -349,7 +349,7 @@ __global__ void concat_and_create_histogram(int8_t* col_1,
   }
 }
 
-__global__ void concat_and_create_histogram_2(int8_t* col_1,
+__global__ static void concat_and_create_histogram_2(int8_t* col_1,
                                               int32_t* offset_1,
                                               int8_t* col_2,
                                               int32_t* offset_2,
@@ -452,7 +452,7 @@ __global__ void concat_and_create_histogram_2(int8_t* col_1,
 }
 
 template <typename T>
-__global__ void set_src_vertex_idx(int8_t* col_1,
+__global__ static void set_src_vertex_idx(int8_t* col_1,
                                    int32_t* offset_1,
                                    int8_t* col_2,
                                    int32_t* offset_2,
@@ -509,7 +509,7 @@ __global__ void set_src_vertex_idx(int8_t* col_1,
 }
 
 template <typename T>
-__global__ void set_dst_vertex_idx(int8_t* col_1,
+__global__ static void set_dst_vertex_idx(int8_t* col_1,
                                    int32_t* offset_1,
                                    int8_t* col_2,
                                    int32_t* offset_2,
@@ -585,7 +585,7 @@ __global__ void set_dst_vertex_idx(int8_t* col_1,
   }
 }
 
-__global__ void create_mapping_histogram(uint32_t* hash_value,
+__global__ static void create_mapping_histogram(uint32_t* hash_value,
                                          str_hash_value* payload,
                                          cudf_map_type hash_map,
                                          accum_type count)
@@ -595,7 +595,7 @@ __global__ void create_mapping_histogram(uint32_t* hash_value,
   if (idx < count) { auto it = hash_map.insert(thrust::make_pair(hash_value[idx], payload[idx])); }
 }
 
-__global__ void assign_histogram_idx(cudf_map_type cuda_map_obj,
+__global__ static void assign_histogram_idx(cudf_map_type cuda_map_obj,
                                      size_t slot_count,
                                      str_hash_value* key,
                                      uint32_t* value,
@@ -621,7 +621,7 @@ __global__ void assign_histogram_idx(cudf_map_type cuda_map_obj,
   }
 }
 
-__global__ void set_vertex_indices(str_hash_value* ht_value_payload, accum_type count)
+__global__ static void set_vertex_indices(str_hash_value* ht_value_payload, accum_type count)
 {
   accum_type tid = threadIdx.x + blockIdx.x * blockDim.x;
   // change count_ to renumber_idx
@@ -630,7 +630,7 @@ __global__ void set_vertex_indices(str_hash_value* ht_value_payload, accum_type
   }
 }
 
-__global__ void set_output_col_offsets(str_hash_value* row_col_pair,
+__global__ static void set_output_col_offsets(str_hash_value* row_col_pair,
                                        int32_t* out_col1_offset,
                                        int32_t* out_col2_offset,
                                        int dst_pair_match,
@@ -653,7 +653,7 @@ __global__ void set_output_col_offsets(str_hash_value* row_col_pair,
   }
 }
 
-__global__ void offset_buffer_size_comp(int32_t* out_col1_length,
+__global__ static void offset_buffer_size_comp(int32_t* out_col1_length,
                                         int32_t* out_col2_length,
                                         int32_t* out_col1_offsets,
                                         int32_t* out_col2_offsets,
@@ -673,7 +673,7 @@ __global__ void offset_buffer_size_comp(int32_t* out_col1_length,
   }
 }
 
-__global__ void select_unrenumber_string(str_hash_value* idx_to_col_row,
+__global__ static void select_unrenumber_string(str_hash_value* idx_to_col_row,
                                          int32_t total_elements,
                                          int8_t* src_col1,
                                          int8_t* src_col2,

@@ -52,7 +52,7 @@ binsearch_maxle(const IndexType* vec, const IndexType val, IndexType low, IndexT
 // FIXME: This shouldn't need to be a custom kernel, this
 //        seems like it should just be a thrust::transform
 template <typename IdxT, typename ValT>
-__global__ void match_check_kernel(
+__global__ static void match_check_kernel(
   IdxT size, IdxT num_verts, IdxT* offsets, IdxT* indices, IdxT* parts, ValT* weights)
 {
   IdxT tid = blockIdx.x * blockDim.x + threadIdx.x;

@@ -59,15 +59,15 @@ class WeakCCState {
 };
 
 template <typename vertex_t, typename edge_t, int TPB_X = 32>
-__global__ void weak_cc_label_device(vertex_t* labels,
-                                     edge_t const* offsets,
-                                     vertex_t const* indices,
-                                     edge_t nnz,
-                                     bool* fa,
-                                     bool* xa,
-                                     bool* m,
-                                     vertex_t startVertexId,
-                                     vertex_t batchSize)
+__global__ static void weak_cc_label_device(vertex_t* labels,
+                                            edge_t const* offsets,
+                                            vertex_t const* indices,
+                                            edge_t nnz,
+                                            bool* fa,
+                                            bool* xa,
+                                            bool* m,
+                                            vertex_t startVertexId,
+                                            vertex_t batchSize)
 {
   vertex_t tid = threadIdx.x + blockIdx.x * TPB_X;
   if (tid < batchSize) {
@@ -118,11 +118,11 @@ __global__ void weak_cc_label_device(vertex_t* labels,
 }
 
 template <typename vertex_t, int TPB_X = 32, typename Lambda>
-__global__ void weak_cc_init_label_kernel(vertex_t* labels,
-                                          vertex_t startVertexId,
-                                          vertex_t batchSize,
-                                          vertex_t MAX_LABEL,
-                                          Lambda filter_op)
+__global__ static void weak_cc_init_label_kernel(vertex_t* labels,
+                                                 vertex_t startVertexId,
+                                                 vertex_t batchSize,
+                                                 vertex_t MAX_LABEL,
+                                                 Lambda filter_op)
 {
   /** F1 and F2 in the paper correspond to fa and xa */
   /** Cd in paper corresponds to db_cluster */
@@ -134,7 +134,7 @@ __global__ void weak_cc_init_label_kernel(vertex_t* labels,
 }
 
 template <typename vertex_t, int TPB_X = 32>
-__global__ void weak_cc_init_all_kernel(
+__global__ static void weak_cc_init_all_kernel(
   vertex_t* labels, bool* fa, bool* xa, vertex_t N, vertex_t MAX_LABEL)
 {
   vertex_t tid = threadIdx.x + blockIdx.x * TPB_X;