Skip to content

Commit

Permalink
Mark all cugraph CUDA kernels with internal linkage
Browse files Browse the repository at this point in the history
  • Loading branch information
robertmaynard committed Feb 12, 2024
1 parent 38a8cdb commit 1f2f5a8
Show file tree
Hide file tree
Showing 19 changed files with 238 additions and 214 deletions.
9 changes: 9 additions & 0 deletions .github/workflows/pr.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ jobs:
- checks
- conda-cpp-build
- conda-cpp-tests
- conda-cpp-checks
- conda-notebook-tests
- conda-python-build
- conda-python-tests
Expand Down Expand Up @@ -52,6 +53,14 @@ jobs:
uses: rapidsai/shared-workflows/.github/workflows/[email protected]
with:
build_type: pull-request
conda-cpp-checks:
needs: conda-cpp-build
secrets: inherit
uses: rapidsai/shared-workflows/.github/workflows/[email protected]
with:
build_type: pull-request
enable_check_symbols: true
symbol_exclusions: (hornet|void writeEdgeCountsKernel|void markUniqueOffsetsKernel)
conda-python-build:
needs: conda-cpp-build
secrets: inherit
Expand Down
10 changes: 10 additions & 0 deletions .github/workflows/test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,16 @@ on:
type: string

jobs:
conda-cpp-checks:
secrets: inherit
uses: rapidsai/shared-workflows/.github/workflows/[email protected]
with:
build_type: nightly
branch: ${{ inputs.branch }}
date: ${{ inputs.date }}
sha: ${{ inputs.sha }}
enable_check_symbols: true
symbol_exclusions: (hornet|void writeEdgeCountsKernel|void markUniqueOffsetsKernel)
conda-cpp-tests:
secrets: inherit
uses: rapidsai/shared-workflows/.github/workflows/[email protected]
Expand Down
6 changes: 3 additions & 3 deletions cpp/include/cugraph/detail/decompress_edge_partition.cuh
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2020-2023, NVIDIA CORPORATION.
* Copyright (c) 2020-2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -43,7 +43,7 @@ namespace detail {
int32_t constexpr decompress_edge_partition_block_size = 1024;

template <typename vertex_t, typename edge_t, bool multi_gpu>
__global__ void decompress_to_edgelist_mid_degree(
__global__ static void decompress_to_edgelist_mid_degree(
edge_partition_device_view_t<vertex_t, edge_t, multi_gpu> edge_partition,
vertex_t major_range_first,
vertex_t major_range_last,
Expand Down Expand Up @@ -73,7 +73,7 @@ __global__ void decompress_to_edgelist_mid_degree(
}

template <typename vertex_t, typename edge_t, bool multi_gpu>
__global__ void decompress_to_edgelist_high_degree(
__global__ static void decompress_to_edgelist_high_degree(
edge_partition_device_view_t<vertex_t, edge_t, multi_gpu> edge_partition,
vertex_t major_range_first,
vertex_t major_range_last,
Expand Down
4 changes: 2 additions & 2 deletions cpp/src/community/legacy/ecg.cu
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2019-2022, NVIDIA CORPORATION.
* Copyright (c) 2019-2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -50,7 +50,7 @@ binsearch_maxle(const IndexType* vec, const IndexType val, IndexType low, IndexT
// FIXME: This shouldn't need to be a custom kernel, this
// seems like it should just be a thrust::transform
template <typename IdxT, typename ValT>
__global__ void match_check_kernel(
__global__ static void match_check_kernel(
IdxT size, IdxT num_verts, IdxT* offsets, IdxT* indices, IdxT* parts, ValT* weights)
{
IdxT tid = blockIdx.x * blockDim.x + threadIdx.x;
Expand Down
32 changes: 16 additions & 16 deletions cpp/src/components/legacy/weak_cc.cuh
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2019-2022, NVIDIA CORPORATION.
* Copyright (c) 2019-2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -57,15 +57,15 @@ class WeakCCState {
};

template <typename vertex_t, typename edge_t, int TPB_X = 32>
__global__ void weak_cc_label_device(vertex_t* labels,
edge_t const* offsets,
vertex_t const* indices,
edge_t nnz,
bool* fa,
bool* xa,
bool* m,
vertex_t startVertexId,
vertex_t batchSize)
__global__ static void weak_cc_label_device(vertex_t* labels,
edge_t const* offsets,
vertex_t const* indices,
edge_t nnz,
bool* fa,
bool* xa,
bool* m,
vertex_t startVertexId,
vertex_t batchSize)
{
vertex_t tid = threadIdx.x + blockIdx.x * TPB_X;
if (tid < batchSize) {
Expand Down Expand Up @@ -116,11 +116,11 @@ __global__ void weak_cc_label_device(vertex_t* labels,
}

template <typename vertex_t, int TPB_X = 32, typename Lambda>
__global__ void weak_cc_init_label_kernel(vertex_t* labels,
vertex_t startVertexId,
vertex_t batchSize,
vertex_t MAX_LABEL,
Lambda filter_op)
__global__ static void weak_cc_init_label_kernel(vertex_t* labels,
vertex_t startVertexId,
vertex_t batchSize,
vertex_t MAX_LABEL,
Lambda filter_op)
{
/** F1 and F2 in the paper correspond to fa and xa */
/** Cd in paper corresponds to db_cluster */
Expand All @@ -132,7 +132,7 @@ __global__ void weak_cc_init_label_kernel(vertex_t* labels,
}

template <typename vertex_t, int TPB_X = 32>
__global__ void weak_cc_init_all_kernel(
__global__ static void weak_cc_init_all_kernel(
vertex_t* labels, bool* fa, bool* xa, vertex_t N, vertex_t MAX_LABEL)
{
vertex_t tid = threadIdx.x + blockIdx.x * TPB_X;
Expand Down
136 changes: 69 additions & 67 deletions cpp/src/layout/legacy/bh_kernels.cuh
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2020-2022, NVIDIA CORPORATION.
* Copyright (c) 2020-2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -42,9 +42,9 @@ namespace detail {
/**
* Intializes the states of objects. This speeds the overall kernel up.
*/
__global__ void InitializationKernel(unsigned* restrict limiter,
int* restrict maxdepthd,
float* restrict radiusd)
__global__ static void InitializationKernel(unsigned* restrict limiter,
int* restrict maxdepthd,
float* restrict radiusd)
{
maxdepthd[0] = 1;
limiter[0] = 0;
Expand All @@ -54,10 +54,10 @@ __global__ void InitializationKernel(unsigned* restrict limiter,
/**
* Reset root.
*/
__global__ void ResetKernel(float* restrict radiusd_squared,
int* restrict bottomd,
const int NNODES,
const float* restrict radiusd)
__global__ static void ResetKernel(float* restrict radiusd_squared,
int* restrict bottomd,
const int NNODES,
const float* restrict radiusd)
{
radiusd_squared[0] = radiusd[0] * radiusd[0];
// create root node
Expand All @@ -67,20 +67,21 @@ __global__ void ResetKernel(float* restrict radiusd_squared,
/**
* Figures the bounding boxes for every point in the embedding.
*/
__global__ __launch_bounds__(THREADS1, FACTOR1) void BoundingBoxKernel(int* restrict startd,
int* restrict childd,
int* restrict massd,
float* restrict posxd,
float* restrict posyd,
float* restrict maxxd,
float* restrict maxyd,
float* restrict minxd,
float* restrict minyd,
const int FOUR_NNODES,
const int NNODES,
const int N,
unsigned* restrict limiter,
float* restrict radiusd)
__global__ static __launch_bounds__(THREADS1,
FACTOR1) void BoundingBoxKernel(int* restrict startd,
int* restrict childd,
int* restrict massd,
float* restrict posxd,
float* restrict posyd,
float* restrict maxxd,
float* restrict maxyd,
float* restrict minxd,
float* restrict minyd,
const int FOUR_NNODES,
const int NNODES,
const int N,
unsigned* restrict limiter,
float* restrict radiusd)
{
float val, minx, maxx, miny, maxy;
__shared__ float sminx[THREADS1], smaxx[THREADS1], sminy[THREADS1], smaxy[THREADS1];
Expand Down Expand Up @@ -158,9 +159,9 @@ __global__ __launch_bounds__(THREADS1, FACTOR1) void BoundingBoxKernel(int* rest
/**
* Clear some of the state vectors up.
*/
__global__ __launch_bounds__(1024, 1) void ClearKernel1(int* restrict childd,
const int FOUR_NNODES,
const int FOUR_N)
__global__ static __launch_bounds__(1024, 1) void ClearKernel1(int* restrict childd,
const int FOUR_NNODES,
const int FOUR_N)
{
const int inc = blockDim.x * gridDim.x;
int k = (FOUR_N & -32) + threadIdx.x + blockIdx.x * blockDim.x;
Expand All @@ -175,15 +176,15 @@ __global__ __launch_bounds__(1024, 1) void ClearKernel1(int* restrict childd,
/**
* Build the actual KD Tree.
*/
__global__ __launch_bounds__(THREADS2,
FACTOR2) void TreeBuildingKernel(int* restrict childd,
const float* restrict posxd,
const float* restrict posyd,
const int NNODES,
const int N,
int* restrict maxdepthd,
int* restrict bottomd,
const float* restrict radiusd)
__global__ static __launch_bounds__(THREADS2,
FACTOR2) void TreeBuildingKernel(int* restrict childd,
const float* restrict posxd,
const float* restrict posyd,
const int NNODES,
const int N,
int* restrict maxdepthd,
int* restrict bottomd,
const float* restrict radiusd)
{
int j, depth;
float x, y, r;
Expand Down Expand Up @@ -296,10 +297,10 @@ __global__ __launch_bounds__(THREADS2,
/**
* Clean more state vectors.
*/
__global__ __launch_bounds__(1024, 1) void ClearKernel2(int* restrict startd,
int* restrict massd,
const int NNODES,
const int* restrict bottomd)
__global__ static __launch_bounds__(1024, 1) void ClearKernel2(int* restrict startd,
int* restrict massd,
const int NNODES,
const int* restrict bottomd)
{
const int bottom = bottomd[0];
const int inc = blockDim.x * gridDim.x;
Expand All @@ -317,15 +318,15 @@ __global__ __launch_bounds__(1024, 1) void ClearKernel2(int* restrict startd,
/**
* Summarize the KD Tree via cell gathering
*/
__global__ __launch_bounds__(THREADS3,
FACTOR3) void SummarizationKernel(int* restrict countd,
const int* restrict childd,
volatile int* restrict massd,
float* restrict posxd,
float* restrict posyd,
const int NNODES,
const int N,
const int* restrict bottomd)
__global__ static __launch_bounds__(THREADS3,
FACTOR3) void SummarizationKernel(int* restrict countd,
const int* restrict childd,
volatile int* restrict massd,
float* restrict posxd,
float* restrict posyd,
const int NNODES,
const int N,
const int* restrict bottomd)
{
bool flag = 0;
float cm, px, py;
Expand Down Expand Up @@ -453,13 +454,14 @@ __global__ __launch_bounds__(THREADS3,
/**
* Sort the cells
*/
__global__ __launch_bounds__(THREADS4, FACTOR4) void SortKernel(int* restrict sortd,
const int* restrict countd,
volatile int* restrict startd,
int* restrict childd,
const int NNODES,
const int N,
const int* restrict bottomd)
__global__ static __launch_bounds__(THREADS4,
FACTOR4) void SortKernel(int* restrict sortd,
const int* restrict countd,
volatile int* restrict startd,
int* restrict childd,
const int NNODES,
const int N,
const int* restrict bottomd)
{
const int bottom = bottomd[0];
const int dec = blockDim.x * gridDim.x;
Expand Down Expand Up @@ -502,7 +504,7 @@ __global__ __launch_bounds__(THREADS4, FACTOR4) void SortKernel(int* restrict so
/**
* Calculate the repulsive forces using the KD Tree
*/
__global__ __launch_bounds__(
__global__ static __launch_bounds__(
THREADS5, FACTOR5) void RepulsionKernel(/* int *restrict errd, */
const float scaling_ratio,
const float theta,
Expand Down Expand Up @@ -612,18 +614,18 @@ __global__ __launch_bounds__(
}
}

__global__ __launch_bounds__(THREADS6,
FACTOR6) void apply_forces_bh(float* restrict Y_x,
float* restrict Y_y,
const float* restrict attract_x,
const float* restrict attract_y,
const float* restrict repel_x,
const float* restrict repel_y,
float* restrict old_dx,
float* restrict old_dy,
const float* restrict swinging,
const float speed,
const int n)
__global__ static __launch_bounds__(THREADS6,
FACTOR6) void apply_forces_bh(float* restrict Y_x,
float* restrict Y_y,
const float* restrict attract_x,
const float* restrict attract_y,
const float* restrict repel_x,
const float* restrict repel_y,
float* restrict old_dx,
float* restrict old_dy,
const float* restrict swinging,
const float speed,
const int n)
{
// For evrery vertex
for (int i = threadIdx.x + blockIdx.x * blockDim.x; i < n; i += gridDim.x * blockDim.x) {
Expand Down
Loading

0 comments on commit 1f2f5a8

Please sign in to comment.