Fix Kepler/Maxwell support for old kernel.

Co-authored-by: Torbjörn Lönnemark <[email protected]> Co-authored-by: jllllll <[email protected]>
0cc4m · Apr 10, 2023 · 50b22e2 · 50b22e2
1 parent bfa92a8
commit 50b22e2
Showing 1 changed file with 26 additions and 0 deletions.
diff --git a/quant_cuda_old_kernel.cu b/quant_cuda_old_kernel.cu
@@ -3,6 +3,32 @@
 #include <cuda.h>
 #include <cuda_runtime.h>
 
+// atomicAdd for double-precision floating-point numbers on hardware with
+// compute capability < 6.0 from:
+// https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#atomic-functions
+#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ < 600
+__device__ double atomicAdd(
+    double* address,
+    double val
+) {
+  unsigned long long int* address_as_ull = (unsigned long long int*)address;
+  unsigned long long int old = *address_as_ull, assumed;
+
+  do {
+    assumed = old;
+    old = atomicCAS(
+      address_as_ull,
+      assumed,
+      __double_as_longlong(val + __longlong_as_double(assumed))
+    );
+
+  // Note: uses integer comparison to avoid hang in case of NaN (since NaN != NaN)
+  } while (assumed != old);
+
+  return __longlong_as_double(old);
+}
+#endif
+
 template <typename scalar_t>
 __global__ void VecQuant2MatMulKernel(
     const  scalar_t* __restrict__ vec,