diff --git a/exllama_ext/cuda_func/column_remap.cu b/exllama_ext/cuda_func/column_remap.cu index 65514c59..adc91fed 100644 --- a/exllama_ext/cuda_func/column_remap.cu +++ b/exllama_ext/cuda_func/column_remap.cu @@ -1,7 +1,14 @@ #include "column_remap.cuh" #include "../util.cuh" +// Using 1024 make me crash with "Memory access fault by GPU node-1 (Agent +// handle: 0x012345678912) on address 0x012345678912. Reason: Page not present +// or supervisor privilege." +#if defined(USE_ROCM) const int SHUF_BLOCKSIZE_X = 256; +#else +const int SHUF_BLOCKSIZE_X = 1024; +#endif const int SHUF_BLOCKSIZE_Y = 16; __global__ void column_remap_kernel