From d10c98c1dc4b294726a8d304dfc1c721260567d3 Mon Sep 17 00:00:00 2001 From: rtp-llm Date: Sat, 20 Apr 2024 16:04:19 +0800 Subject: [PATCH] fix - fix cuda11 compile --- src/fastertransformer/kernels/quantization_tensor.cu | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/fastertransformer/kernels/quantization_tensor.cu b/src/fastertransformer/kernels/quantization_tensor.cu index 39e3de88..6b29c538 100644 --- a/src/fastertransformer/kernels/quantization_tensor.cu +++ b/src/fastertransformer/kernels/quantization_tensor.cu @@ -95,7 +95,7 @@ __global__ void perTokenQuantization( T localMax = 1e-6f; for (int i = threadIdx.x; i < numCols; i += blockDim.x) { - localMax = cuda_max(localMax, cuda_abs((srcRow[i])/ cuda_cast(smoother[i]))); + localMax = cuda_max(localMax, cuda_abs(cuda_cast((srcRow[i])/ cuda_cast(smoother[i])))); } const float rowMax = blockAllReduceMax(cuda_cast(localMax));