Mark all cugraph CUDA kernels with internal linkage

rapidsai · Jan 17, 2024 · fc7ba6d · fc7ba6d
1 parent 8672534
commit fc7ba6d
Show file tree

Hide file tree

Showing 19 changed files with 92 additions and 72 deletions.
diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml
@@ -15,6 +15,7 @@ jobs:
       - checks
       - conda-cpp-build
       - conda-cpp-tests
+      - conda-cpp-checks
       - conda-notebook-tests
       - conda-python-build
       - conda-python-tests
@@ -50,6 +51,14 @@ jobs:
     uses: rapidsai/shared-workflows/.github/workflows/[email protected]
     with:
       build_type: pull-request
+  conda-cpp-checks:
+    needs: conda-cpp-build
+    secrets: inherit
+    uses: rapidsai/shared-workflows/.github/workflows/[email protected]
+    with:
+      build_type: pull-request
+      enable_check_symbols: true
+      symbol_exclusions: (hornet|void writeEdgeCountsKernel|void markUniqueOffsetsKernel)      
   conda-python-build:
     needs: conda-cpp-build
     secrets: inherit

diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml
@@ -14,6 +14,16 @@ on:
         type: string
 
 jobs:
+  conda-cpp-checks:
+    secrets: inherit
+    uses: rapidsai/shared-workflows/.github/workflows/[email protected]
+    with:
+      build_type: nightly
+      branch: ${{ inputs.branch }}
+      date: ${{ inputs.date }}
+      sha: ${{ inputs.sha }}
+      enable_check_symbols: true
+      symbol_exclusions: (hornet|void writeEdgeCountsKernel|void markUniqueOffsetsKernel)
   conda-cpp-tests:
     secrets: inherit
     uses: rapidsai/shared-workflows/.github/workflows/[email protected]

diff --git a/cpp/include/cugraph/detail/decompress_edge_partition.cuh b/cpp/include/cugraph/detail/decompress_edge_partition.cuh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020-2023, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -43,7 +43,7 @@ namespace detail {
 int32_t constexpr decompress_edge_partition_block_size = 1024;
 
 template <typename vertex_t, typename edge_t, bool multi_gpu>
-__global__ void decompress_to_edgelist_mid_degree(
+__global__ static void decompress_to_edgelist_mid_degree(
   edge_partition_device_view_t<vertex_t, edge_t, multi_gpu> edge_partition,
   vertex_t major_range_first,
   vertex_t major_range_last,
@@ -73,7 +73,7 @@ __global__ void decompress_to_edgelist_mid_degree(
 }
 
 template <typename vertex_t, typename edge_t, bool multi_gpu>
-__global__ void decompress_to_edgelist_high_degree(
+__global__ static void decompress_to_edgelist_high_degree(
   edge_partition_device_view_t<vertex_t, edge_t, multi_gpu> edge_partition,
   vertex_t major_range_first,
   vertex_t major_range_last,

diff --git a/cpp/src/community/legacy/ecg.cu b/cpp/src/community/legacy/ecg.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -50,7 +50,7 @@ binsearch_maxle(const IndexType* vec, const IndexType val, IndexType low, IndexT
 // FIXME: This shouldn't need to be a custom kernel, this
 //        seems like it should just be a thrust::transform
 template <typename IdxT, typename ValT>
-__global__ void match_check_kernel(
+__global__ static void match_check_kernel(
   IdxT size, IdxT num_verts, IdxT* offsets, IdxT* indices, IdxT* parts, ValT* weights)
 {
   IdxT tid = blockIdx.x * blockDim.x + threadIdx.x;

diff --git a/cpp/src/components/legacy/weak_cc.cuh b/cpp/src/components/legacy/weak_cc.cuh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -57,7 +57,7 @@ class WeakCCState {
 };
 
 template <typename vertex_t, typename edge_t, int TPB_X = 32>
-__global__ void weak_cc_label_device(vertex_t* labels,
+__global__ static void weak_cc_label_device(vertex_t* labels,
                                      edge_t const* offsets,
                                      vertex_t const* indices,
                                      edge_t nnz,
@@ -116,7 +116,7 @@ __global__ void weak_cc_label_device(vertex_t* labels,
 }
 
 template <typename vertex_t, int TPB_X = 32, typename Lambda>
-__global__ void weak_cc_init_label_kernel(vertex_t* labels,
+__global__ static void weak_cc_init_label_kernel(vertex_t* labels,
                                           vertex_t startVertexId,
                                           vertex_t batchSize,
                                           vertex_t MAX_LABEL,
@@ -132,7 +132,7 @@ __global__ void weak_cc_init_label_kernel(vertex_t* labels,
 }
 
 template <typename vertex_t, int TPB_X = 32>
-__global__ void weak_cc_init_all_kernel(
+__global__ static void weak_cc_init_all_kernel(
   vertex_t* labels, bool* fa, bool* xa, vertex_t N, vertex_t MAX_LABEL)
 {
   vertex_t tid = threadIdx.x + blockIdx.x * TPB_X;

diff --git a/cpp/src/layout/legacy/bh_kernels.cuh b/cpp/src/layout/legacy/bh_kernels.cuh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -42,7 +42,7 @@ namespace detail {
 /**
  * Intializes the states of objects. This speeds the overall kernel up.
  */
-__global__ void InitializationKernel(unsigned* restrict limiter,
+__global__ static void InitializationKernel(unsigned* restrict limiter,
                                      int* restrict maxdepthd,
                                      float* restrict radiusd)
 {
@@ -54,7 +54,7 @@ __global__ void InitializationKernel(unsigned* restrict limiter,
 /**
  * Reset root.
  */
-__global__ void ResetKernel(float* restrict radiusd_squared,
+__global__ static void ResetKernel(float* restrict radiusd_squared,
                             int* restrict bottomd,
                             const int NNODES,
                             const float* restrict radiusd)
@@ -67,7 +67,7 @@ __global__ void ResetKernel(float* restrict radiusd_squared,
 /**
  * Figures the bounding boxes for every point in the embedding.
  */
-__global__ __launch_bounds__(THREADS1, FACTOR1) void BoundingBoxKernel(int* restrict startd,
+__global__ static __launch_bounds__(THREADS1, FACTOR1) void BoundingBoxKernel(int* restrict startd,
                                                                        int* restrict childd,
                                                                        int* restrict massd,
                                                                        float* restrict posxd,
@@ -158,7 +158,7 @@ __global__ __launch_bounds__(THREADS1, FACTOR1) void BoundingBoxKernel(int* rest
 /**
  * Clear some of the state vectors up.
  */
-__global__ __launch_bounds__(1024, 1) void ClearKernel1(int* restrict childd,
+__global__ static __launch_bounds__(1024, 1) void ClearKernel1(int* restrict childd,
                                                         const int FOUR_NNODES,
                                                         const int FOUR_N)
 {
@@ -175,7 +175,7 @@ __global__ __launch_bounds__(1024, 1) void ClearKernel1(int* restrict childd,
 /**
  * Build the actual KD Tree.
  */
-__global__ __launch_bounds__(THREADS2,
+__global__ static __launch_bounds__(THREADS2,
                              FACTOR2) void TreeBuildingKernel(int* restrict childd,
                                                               const float* restrict posxd,
                                                               const float* restrict posyd,
@@ -296,7 +296,7 @@ __global__ __launch_bounds__(THREADS2,
 /**
  * Clean more state vectors.
  */
-__global__ __launch_bounds__(1024, 1) void ClearKernel2(int* restrict startd,
+__global__ static __launch_bounds__(1024, 1) void ClearKernel2(int* restrict startd,
                                                         int* restrict massd,
                                                         const int NNODES,
                                                         const int* restrict bottomd)
@@ -317,7 +317,7 @@ __global__ __launch_bounds__(1024, 1) void ClearKernel2(int* restrict startd,
 /**
  * Summarize the KD Tree via cell gathering
  */
-__global__ __launch_bounds__(THREADS3,
+__global__ static __launch_bounds__(THREADS3,
                              FACTOR3) void SummarizationKernel(int* restrict countd,
                                                                const int* restrict childd,
                                                                volatile int* restrict massd,
@@ -453,7 +453,7 @@ __global__ __launch_bounds__(THREADS3,
 /**
  * Sort the cells
  */
-__global__ __launch_bounds__(THREADS4, FACTOR4) void SortKernel(int* restrict sortd,
+__global__ static __launch_bounds__(THREADS4, FACTOR4) void SortKernel(int* restrict sortd,
                                                                 const int* restrict countd,
                                                                 volatile int* restrict startd,
                                                                 int* restrict childd,
@@ -502,7 +502,7 @@ __global__ __launch_bounds__(THREADS4, FACTOR4) void SortKernel(int* restrict so
 /**
  * Calculate the repulsive forces using the KD Tree
  */
-__global__ __launch_bounds__(
+__global__ static __launch_bounds__(
   THREADS5, FACTOR5) void RepulsionKernel(/* int *restrict errd, */
                                           const float scaling_ratio,
                                           const float theta,
@@ -612,7 +612,7 @@ __global__ __launch_bounds__(
   }
 }
 
-__global__ __launch_bounds__(THREADS6,
+__global__ static __launch_bounds__(THREADS6,
                              FACTOR6) void apply_forces_bh(float* restrict Y_x,
                                                            float* restrict Y_y,
                                                            const float* restrict attract_x,

diff --git a/cpp/src/layout/legacy/exact_repulsion.cuh b/cpp/src/layout/legacy/exact_repulsion.cuh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -22,7 +22,7 @@ namespace cugraph {
 namespace detail {
 
 template <typename vertex_t>
-__global__ void repulsion_kernel(const float* restrict x_pos,
+__global__ static void repulsion_kernel(const float* restrict x_pos,
                                  const float* restrict y_pos,
                                  float* restrict repel_x,
                                  float* restrict repel_y,

diff --git a/cpp/src/layout/legacy/fa2_kernels.cuh b/cpp/src/layout/legacy/fa2_kernels.cuh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -23,7 +23,7 @@ namespace cugraph {
 namespace detail {
 
 template <typename vertex_t, typename edge_t, typename weight_t>
-__global__ void attraction_kernel(const vertex_t* restrict row,
+__global__ static void attraction_kernel(const vertex_t* restrict row,
                                   const vertex_t* restrict col,
                                   const weight_t* restrict v,
                                   const edge_t e,
@@ -116,7 +116,7 @@ void apply_attraction(const vertex_t* restrict row,
 }
 
 template <typename vertex_t>
-__global__ void linear_gravity_kernel(const float* restrict x_pos,
+__global__ static void linear_gravity_kernel(const float* restrict x_pos,
                                       const float* restrict y_pos,
                                       float* restrict attract_x,
                                       float* restrict attract_y,
@@ -136,7 +136,7 @@ __global__ void linear_gravity_kernel(const float* restrict x_pos,
 }
 
 template <typename vertex_t>
-__global__ void strong_gravity_kernel(const float* restrict x_pos,
+__global__ static void strong_gravity_kernel(const float* restrict x_pos,
                                       const float* restrict y_pos,
                                       float* restrict attract_x,
                                       float* restrict attract_y,
@@ -187,7 +187,7 @@ void apply_gravity(const float* restrict x_pos,
 }
 
 template <typename vertex_t>
-__global__ void local_speed_kernel(const float* restrict repel_x,
+__global__ static void local_speed_kernel(const float* restrict repel_x,
                                    const float* restrict repel_y,
                                    const float* restrict attract_x,
                                    const float* restrict attract_y,
@@ -272,7 +272,7 @@ void adapt_speed(const float jitter_tolerance,
 }
 
 template <typename vertex_t>
-__global__ void update_positions_kernel(float* restrict x_pos,
+__global__ static void update_positions_kernel(float* restrict x_pos,
                                         float* restrict y_pos,
                                         const float* restrict repel_x,
                                         const float* restrict repel_y,

diff --git a/cpp/src/prims/detail/extract_transform_v_frontier_e.cuh b/cpp/src/prims/detail/extract_transform_v_frontier_e.cuh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020-2023, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -95,7 +95,7 @@ template <typename GraphViewType,
           typename BufferKeyOutputIterator,
           typename BufferValueOutputIterator,
           typename EdgeOp>
-__global__ void extract_transform_v_frontier_e_hypersparse(
+__global__ static void extract_transform_v_frontier_e_hypersparse(
   edge_partition_device_view_t<typename GraphViewType::vertex_type,
                                typename GraphViewType::edge_type,
                                GraphViewType::is_multi_gpu> edge_partition,
@@ -262,7 +262,7 @@ template <typename GraphViewType,
           typename BufferKeyOutputIterator,
           typename BufferValueOutputIterator,
           typename EdgeOp>
-__global__ void extract_transform_v_frontier_e_low_degree(
+__global__ static void extract_transform_v_frontier_e_low_degree(
   edge_partition_device_view_t<typename GraphViewType::vertex_type,
                                typename GraphViewType::edge_type,
                                GraphViewType::is_multi_gpu> edge_partition,
@@ -420,7 +420,7 @@ template <typename GraphViewType,
           typename BufferKeyOutputIterator,
           typename BufferValueOutputIterator,
           typename EdgeOp>
-__global__ void extract_transform_v_frontier_e_mid_degree(
+__global__ static void extract_transform_v_frontier_e_mid_degree(
   edge_partition_device_view_t<typename GraphViewType::vertex_type,
                                typename GraphViewType::edge_type,
                                GraphViewType::is_multi_gpu> edge_partition,
@@ -527,7 +527,7 @@ template <typename GraphViewType,
           typename BufferKeyOutputIterator,
           typename BufferValueOutputIterator,
           typename EdgeOp>
-__global__ void extract_transform_v_frontier_e_high_degree(
+__global__ static void extract_transform_v_frontier_e_high_degree(
   edge_partition_device_view_t<typename GraphViewType::vertex_type,
                                typename GraphViewType::edge_type,
                                GraphViewType::is_multi_gpu> edge_partition,

diff --git a/cpp/src/prims/per_v_transform_reduce_incoming_outgoing_e.cuh b/cpp/src/prims/per_v_transform_reduce_incoming_outgoing_e.cuh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020-2023, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -75,7 +75,7 @@ template <bool update_major,
           typename EdgeOp,
           typename ReduceOp,
           typename T>
-__global__ void per_v_transform_reduce_e_hypersparse(
+__global__ static void per_v_transform_reduce_e_hypersparse(
   edge_partition_device_view_t<typename GraphViewType::vertex_type,
                                typename GraphViewType::edge_type,
                                GraphViewType::is_multi_gpu> edge_partition,
@@ -182,7 +182,7 @@ template <bool update_major,
           typename EdgeOp,
           typename ReduceOp,
           typename T>
-__global__ void per_v_transform_reduce_e_low_degree(
+__global__ static void per_v_transform_reduce_e_low_degree(
   edge_partition_device_view_t<typename GraphViewType::vertex_type,
                                typename GraphViewType::edge_type,
                                GraphViewType::is_multi_gpu> edge_partition,
@@ -291,7 +291,7 @@ template <bool update_major,
           typename EdgeOp,
           typename ReduceOp,
           typename T>
-__global__ void per_v_transform_reduce_e_mid_degree(
+__global__ static void per_v_transform_reduce_e_mid_degree(
   edge_partition_device_view_t<typename GraphViewType::vertex_type,
                                typename GraphViewType::edge_type,
                                GraphViewType::is_multi_gpu> edge_partition,
@@ -384,7 +384,7 @@ template <bool update_major,
           typename EdgeOp,
           typename ReduceOp,
           typename T>
-__global__ void per_v_transform_reduce_e_high_degree(
+__global__ static void per_v_transform_reduce_e_high_degree(
   edge_partition_device_view_t<typename GraphViewType::vertex_type,
                                typename GraphViewType::edge_type,
                                GraphViewType::is_multi_gpu> edge_partition,

diff --git a/cpp/src/prims/transform_e.cuh b/cpp/src/prims/transform_e.cuh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2023, NVIDIA CORPORATION.
+ * Copyright (c) 2023-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -49,7 +49,7 @@ template <typename GraphViewType,
           typename EdgePartitionEdgeMaskWrapper,
           typename EdgePartitionEdgeValueOutputWrapper,
           typename EdgeOp>
-__global__ void transform_e_packed_bool(
+__global__ static void transform_e_packed_bool(
   edge_partition_device_view_t<typename GraphViewType::vertex_type,
                                typename GraphViewType::edge_type,
                                GraphViewType::is_multi_gpu> edge_partition,