From 50b22e2ba8ec0f5cf0dca719392a2ec5254e7228 Mon Sep 17 00:00:00 2001 From: 0cc4m Date: Mon, 10 Apr 2023 14:35:37 +0200 Subject: [PATCH] Fix Kepler/Maxwell support for old kernel. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Torbjörn Lönnemark Co-authored-by: jllllll --- quant_cuda_old_kernel.cu | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/quant_cuda_old_kernel.cu b/quant_cuda_old_kernel.cu index e1c302ec..23232187 100644 --- a/quant_cuda_old_kernel.cu +++ b/quant_cuda_old_kernel.cu @@ -3,6 +3,32 @@ #include #include +// atomicAdd for double-precision floating-point numbers on hardware with +// compute capability < 6.0 from: +// https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#atomic-functions +#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ < 600 +__device__ double atomicAdd( + double* address, + double val +) { + unsigned long long int* address_as_ull = (unsigned long long int*)address; + unsigned long long int old = *address_as_ull, assumed; + + do { + assumed = old; + old = atomicCAS( + address_as_ull, + assumed, + __double_as_longlong(val + __longlong_as_double(assumed)) + ); + + // Note: uses integer comparison to avoid hang in case of NaN (since NaN != NaN) + } while (assumed != old); + + return __longlong_as_double(old); +} +#endif + template __global__ void VecQuant2MatMulKernel( const scalar_t* __restrict__ vec,