Skip to content

Commit

Permalink
Fix Kepler/Maxwell support for old kernel.
Browse files Browse the repository at this point in the history
Co-authored-by: Torbjörn Lönnemark <[email protected]>
Co-authored-by: jllllll <[email protected]>
  • Loading branch information
3 people committed Apr 10, 2023
1 parent bfa92a8 commit 50b22e2
Showing 1 changed file with 26 additions and 0 deletions.
26 changes: 26 additions & 0 deletions quant_cuda_old_kernel.cu
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,32 @@
#include <cuda.h>
#include <cuda_runtime.h>

// atomicAdd for double-precision floating-point numbers on hardware with
// compute capability < 6.0 from:
// https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#atomic-functions
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ < 600
__device__ double atomicAdd(
double* address,
double val
) {
unsigned long long int* address_as_ull = (unsigned long long int*)address;
unsigned long long int old = *address_as_ull, assumed;

do {
assumed = old;
old = atomicCAS(
address_as_ull,
assumed,
__double_as_longlong(val + __longlong_as_double(assumed))
);

// Note: uses integer comparison to avoid hang in case of NaN (since NaN != NaN)
} while (assumed != old);

return __longlong_as_double(old);
}
#endif

template <typename scalar_t>
__global__ void VecQuant2MatMulKernel(
const scalar_t* __restrict__ vec,
Expand Down

0 comments on commit 50b22e2

Please sign in to comment.