Fix lanczos solver integer overflow (#2536)

Partially answers rapidsai/cuml#6204 Authors: - Victor Lafargue (https://github.com/viclafargue) - Corey J. Nolet (https://github.com/cjnolet) Approvers: - Corey J. Nolet (https://github.com/cjnolet) - Micka (https://github.com/lowener) URL: #2536
rapidsai · Jan 10, 2025 · 1b62c41 · 1b62c41
1 parent 8fc988e
commit 1b62c41
Show file tree

Hide file tree

Showing 3 changed files with 25 additions and 18 deletions.
diff --git a/cpp/include/raft/sparse/detail/coo.cuh b/cpp/include/raft/sparse/detail/coo.cuh
@@ -182,7 +182,7 @@ class COO {
    * @param n_rows: number of rows in the dense matrix
    * @param n_cols: number of columns in the dense matrix
    */
-  void setSize(int n_rows, int n_cols)
+  void setSize(Index_Type n_rows, Index_Type n_cols)
   {
     this->n_rows = n_rows;
     this->n_cols = n_cols;
@@ -192,7 +192,7 @@ class COO {
    * @brief Set the number of rows and cols for a square dense matrix
    * @param n: number of rows and cols
    */
-  void setSize(int n)
+  void setSize(Index_Type n)
   {
     this->n_rows = n;
     this->n_cols = n;
@@ -204,7 +204,10 @@ class COO {
    * @param init: should values be initialized to 0?
    * @param stream: CUDA stream to use
    */
-  void allocate(int nnz, bool init, cudaStream_t stream) { this->allocate(nnz, 0, init, stream); }
+  void allocate(Index_Type nnz, bool init, cudaStream_t stream)
+  {
+    this->allocate(nnz, 0, init, stream);
+  }
 
   /**
    * @brief Allocate the underlying arrays
@@ -213,7 +216,7 @@ class COO {
    * @param init: should values be initialized to 0?
    * @param stream: CUDA stream to use
    */
-  void allocate(int nnz, int size, bool init, cudaStream_t stream)
+  void allocate(Index_Type nnz, Index_Type size, bool init, cudaStream_t stream)
   {
     this->allocate(nnz, size, size, init, stream);
   }
@@ -226,7 +229,8 @@ class COO {
    * @param init: should values be initialized to 0?
    * @param stream: stream to use for init
    */
-  void allocate(int nnz, int n_rows, int n_cols, bool init, cudaStream_t stream)
+  void allocate(
+    Index_Type nnz, Index_Type n_rows, Index_Type n_cols, bool init, cudaStream_t stream)
   {
     this->n_rows = n_rows;
     this->n_cols = n_cols;

diff --git a/cpp/include/raft/sparse/solver/detail/lanczos.cuh b/cpp/include/raft/sparse/solver/detail/lanczos.cuh
@@ -624,7 +624,7 @@ static int lanczosRestart(raft::resources const& handle,
   value_type_t* shifts_host;
 
   // Orthonormal matrix for similarity transform
-  value_type_t* V_dev = work_dev + n * iter;
+  value_type_t* V_dev = work_dev + (size_t)n * (size_t)iter;
 
   // -------------------------------------------------------
   // Implementation
@@ -641,7 +641,7 @@ static int lanczosRestart(raft::resources const& handle,
   // std::cout <<std::endl;
 
   // Initialize similarity transform with identity matrix
-  memset(V_host, 0, iter * iter * sizeof(value_type_t));
+  memset(V_host, 0, (size_t)iter * (size_t)iter * (size_t)sizeof(value_type_t));
   for (i = 0; i < iter; ++i)
     V_host[IDX(i, i, iter)] = 1;
 
@@ -679,8 +679,11 @@ static int lanczosRestart(raft::resources const& handle,
       WARNING("error in implicitly shifted QR algorithm");
 
   // Obtain new residual
-  RAFT_CUDA_TRY(cudaMemcpyAsync(
-    V_dev, V_host, iter * iter * sizeof(value_type_t), cudaMemcpyHostToDevice, stream));
+  RAFT_CUDA_TRY(cudaMemcpyAsync(V_dev,
+                                V_host,
+                                (size_t)iter * (size_t)iter * (size_t)sizeof(value_type_t),
+                                cudaMemcpyHostToDevice,
+                                stream));
 
   beta_host[iter - 1] = beta_host[iter - 1] * V_host[IDX(iter - 1, iter_new - 1, iter)];
   RAFT_CUBLAS_TRY(raft::linalg::detail::cublasgemv(cublas_h,
@@ -716,7 +719,7 @@ static int lanczosRestart(raft::resources const& handle,
 
   RAFT_CUDA_TRY(cudaMemcpyAsync(lanczosVecs_dev,
                                 work_dev,
-                                n * iter_new * sizeof(value_type_t),
+                                (size_t)n * (size_t)iter_new * (size_t)sizeof(value_type_t),
                                 cudaMemcpyDeviceToDevice,
                                 stream));
 
@@ -1045,10 +1048,10 @@ int computeSmallestEigenvectors(
   unsigned long long seed = 1234567)
 {
   // Matrix dimension
-  index_type_t n = A.nrows_;
+  size_t n = A.nrows_;
 
   // Check that parameters are valid
-  RAFT_EXPECTS(nEigVecs > 0 && nEigVecs <= n, "Invalid number of eigenvectors.");
+  RAFT_EXPECTS(nEigVecs > 0 && (size_t)nEigVecs <= n, "Invalid number of eigenvectors.");
   RAFT_EXPECTS(restartIter > 0, "Invalid restartIter.");
   RAFT_EXPECTS(tol > 0, "Invalid tolerance.");
   RAFT_EXPECTS(maxIter >= nEigVecs, "Invalid maxIter.");
@@ -1395,10 +1398,10 @@ int computeLargestEigenvectors(
   unsigned long long seed = 123456)
 {
   // Matrix dimension
-  index_type_t n = A.nrows_;
+  size_t n = A.nrows_;
 
   // Check that parameters are valid
-  RAFT_EXPECTS(nEigVecs > 0 && nEigVecs <= n, "Invalid number of eigenvectors.");
+  RAFT_EXPECTS(nEigVecs > 0 && (size_t)nEigVecs <= n, "Invalid number of eigenvectors.");
   RAFT_EXPECTS(restartIter > 0, "Invalid restartIter.");
   RAFT_EXPECTS(tol > 0, "Invalid tolerance.");
   RAFT_EXPECTS(maxIter >= nEigVecs, "Invalid maxIter.");

diff --git a/cpp/include/raft/spectral/detail/matrix_wrappers.hpp b/cpp/include/raft/spectral/detail/matrix_wrappers.hpp
@@ -39,14 +39,14 @@
 // =========================================================
 
 // Get index of matrix entry
-#define IDX(i, j, lda) ((i) + (j) * (lda))
+#define IDX(i, j, lda) ((size_t)(i) + (j) * (lda))
 
 namespace raft {
 namespace spectral {
 namespace matrix {
 namespace detail {
 
-using size_type = int;  // for now; TODO: move it in appropriate header
+using size_type = size_t;  // for now; TODO: move it in appropriate header
 
 // Apply diagonal matrix to vector:
 //
@@ -326,7 +326,7 @@ struct laplacian_matrix_t : sparse_matrix_t<index_type, value_type> {
         raft_handle, row_offsets, col_indices, values, nrows, nnz),
       diagonal_(raft_handle, nrows)
   {
-    vector_t<value_type> ones{raft_handle, nrows};
+    vector_t<value_type> ones{raft_handle, (size_t)nrows};
     ones.fill(1.0);
     sparse_matrix_t<index_type, value_type>::mv(1, ones.raw(), 0, diagonal_.raw());
   }
@@ -341,7 +341,7 @@ struct laplacian_matrix_t : sparse_matrix_t<index_type, value_type> {
                                               csr_m.nnz_),
       diagonal_(raft_handle, csr_m.nrows_)
   {
-    vector_t<value_type> ones{raft_handle, csr_m.nrows_};
+    vector_t<value_type> ones{raft_handle, (size_t)csr_m.nrows_};
     ones.fill(1.0);
     sparse_matrix_t<index_type, value_type>::mv(1, ones.raw(), 0, diagonal_.raw());
   }