diff --git a/CMakeLists.txt b/CMakeLists.txt
index 246086e58..f0f1aa869 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -240,26 +240,26 @@ elseif(BUILD_MPS)
     add_custom_target(metallib DEPENDS "bitsandbytes/bitsandbytes.metallib")
 elseif(BUILD_NPU)
     list(APPEND SRC_FILES ${NPU_FILES})
-    
+
     set(SOC_VERSION "Ascend910B4" CACHE STRING "system on chip type")
-    set(ASCEND_CANN_PACKAGE_PATH $ENV{ASCEND_HOME_PATH} CACHE 
-        STRING "ASCEND CANN package installation directory"
+    set(ASCEND_CAN_PACKAGE_PATH $ENV{ASCEND_HOME_PATH} CACHE
+        STRING "ASCEND CAN package installation directory"
     )
-    
+
     # ${KERNEL_FILES} are used to compile library, push files written by ascendc in ${KERNEL_FILES}.
     # ref to cmake/npu.cmake ascendc_library, cmake/cpu.cmake add_library
     # file(GLOB KERNEL_FILES ${CMAKE_CURRENT_SOURCE_DIR}/csrc/npu_kernels.cpp)
     file(GLOB KERNEL_FILES csrc/npu_kernels.cpp)
-    
-    if(EXISTS ${ASCEND_CANN_PACKAGE_PATH}/compiler/tikcpp/ascendc_kernel_cmake)
-        set(ASCENDC_CMAKE_DIR ${ASCEND_CANN_PACKAGE_PATH}/compiler/tikcpp/ascendc_kernel_cmake)
-    elseif(EXISTS ${ASCEND_CANN_PACKAGE_PATH}/tools/tikcpp/ascendc_kernel_cmake)
-        set(ASCENDC_CMAKE_DIR ${ASCEND_CANN_PACKAGE_PATH}/tools/tikcpp/ascendc_kernel_cmake)
+
+    if(EXISTS ${ASCEND_CAN_PACKAGE_PATH}/compiler/tikcpp/ascendc_kernel_cmake)
+        set(ASCENDC_CMAKE_DIR ${ASCEND_CAN_PACKAGE_PATH}/compiler/tikcpp/ascendc_kernel_cmake)
+    elseif(EXISTS ${ASCEND_CAN_PACKAGE_PATH}/tools/tikcpp/ascendc_kernel_cmake)
+        set(ASCENDC_CMAKE_DIR ${ASCEND_CAN_PACKAGE_PATH}/tools/tikcpp/ascendc_kernel_cmake)
     else()
-        message(FATAL_ERROR "ascendc_kernel_cmake does not exist ,please check whether the cann package is installed")
+        message(FATAL_ERROR "ascendc_kernel_cmake does not exist ,please check whether the can package is installed")
     endif()
     include(${ASCENDC_CMAKE_DIR}/ascendc.cmake)
-    
+
     # ascendc_library use to add kernel file to generate ascendc library
     ascendc_library(ascendc_kernels_npu STATIC ${KERNEL_FILES})
 
diff --git a/_typos.toml b/_typos.toml
index e4e7287fb..8ad7d5969 100644
--- a/_typos.toml
+++ b/_typos.toml
@@ -9,6 +9,7 @@ extend-ignore-re = [
 
 [type.py.extend-words]
 "BA" = "BA"  # used as a commented-out variable in tests
+"cann" = "cann" # cann (Compute Architecture for Neural Networks) is a heterogeneous computing architecture for Ascend NPU
 
 [type.cuda.extend-words]
 "subtile" = "subtile"
diff --git a/bitsandbytes/backends/cpu_xpu_common.py b/bitsandbytes/backends/cpu_xpu_common.py
index b2a3a49e3..8fdf7569d 100644
--- a/bitsandbytes/backends/cpu_xpu_common.py
+++ b/bitsandbytes/backends/cpu_xpu_common.py
@@ -23,7 +23,7 @@
 
 gxx_available = False
 try:
-    subprocess.run(["g++", "--version"], capture_output=True) # hide terminal output
+    subprocess.run(["g++", "--version"], capture_output=True)  # hide terminal output
     gxx_available = True
 except BaseException:
     warnings.warn("g++ not found, torch.compile disabled for CPU/XPU.")
diff --git a/bitsandbytes/backends/cuda.py b/bitsandbytes/backends/cuda.py
index dfb639cbd..ad478431c 100644
--- a/bitsandbytes/backends/cuda.py
+++ b/bitsandbytes/backends/cuda.py
@@ -78,7 +78,6 @@
 
 
 class CUDABackend(Backend):
-
     def double_quant(
         self,
         A: torch.Tensor,
diff --git a/bitsandbytes/backends/mps.py b/bitsandbytes/backends/mps.py
index 6391e818a..5b7eda0c7 100644
--- a/bitsandbytes/backends/mps.py
+++ b/bitsandbytes/backends/mps.py
@@ -8,7 +8,6 @@
 
 
 class MPSBackend(Backend):
-
     def double_quant(
         self,
         A: torch.Tensor,
diff --git a/bitsandbytes/backends/npu.py b/bitsandbytes/backends/npu.py
index 5457563a5..8cdd6d10b 100644
--- a/bitsandbytes/backends/npu.py
+++ b/bitsandbytes/backends/npu.py
@@ -29,7 +29,6 @@ def assert_on_npu(tensors):
 
 
 class NPUBackend(Backend):
-
     def double_quant(
         self,
         A: torch.Tensor,
@@ -107,21 +106,21 @@ def quantize_4bit(
         torch.npu.set_device(A.device)
         if A.dtype in [torch.float32, torch.float16, torch.bfloat16]:
             data = [
-                -1.0, 
-                -0.6961928009986877, 
+                -1.0,
+                -0.6961928009986877,
                 -0.5250730514526367,
                 -0.39491748809814453,
                 -0.28444138169288635,
                 -0.18477343022823334,
-                -0.09105003625154495, 
-                0.0, 
-                0.07958029955625534, 
-                0.16093020141124725, 
-                0.24611230194568634, 
-                0.33791524171829224, 
-                0.44070982933044434, 
-                0.5626170039176941, 
-                0.7229568362236023, 
+                -0.09105003625154495,
+                0.0,
+                0.07958029955625534,
+                0.16093020141124725,
+                0.24611230194568634,
+                0.33791524171829224,
+                0.44070982933044434,
+                0.5626170039176941,
+                0.7229568362236023,
                 1.0,
             ]
             data = torch.tensor(data, device="npu", dtype=torch.float32).view(1, -1)
@@ -132,10 +131,10 @@ def quantize_4bit(
             out = out.reshape(-1, 2)
             out = (out[:, 0] + out[:, 1] * 16).to(torch.uint8)
         else:
-             raise ValueError(f"Blockwise quantization only supports 16/32-bit floats, but got {A.dtype}")
+            raise ValueError(f"Blockwise quantization only supports 16/32-bit floats, but got {A.dtype}")
         assert_on_npu([A, absmax, out])
         torch.npu.set_device(prev_device)
-        
+
         code = get_4bit_type(quant_type, device=A.device)
         state = QuantState(
             absmax=absmax,
@@ -164,7 +163,7 @@ def dequantize_4bit(
             raise ValueError(
                 f"The blockwise of {blocksize} is not supported. Supported values: {supported_blocksizes}"
             )
-        
+
         if quant_state is None:
             assert absmax is not None and out is not None
             quant_state = QuantState(
@@ -192,7 +191,7 @@ def dequantize_4bit(
                 get_ptr(out),
                 ct.c_int(quant_state.blocksize),
                 ct.c_int(n),
-                torch.npu.current_stream(),  
+                torch.npu.current_stream(),
             )
         elif out.dtype == torch.float16:
             lib.cdequantize_blockwise_fp16_nf4(
@@ -201,7 +200,7 @@ def dequantize_4bit(
                 get_ptr(out),
                 ct.c_int(quant_state.blocksize),
                 ct.c_int(n),
-                torch.npu.current_stream(),  
+                torch.npu.current_stream(),
             )
         elif out.dtype == torch.bfloat16:
             # bf16: bf16 -> fp32 -> op -> fp32 -> bf16
@@ -213,7 +212,7 @@ def dequantize_4bit(
                 get_ptr(out),
                 ct.c_int(quant_state.blocksize),
                 ct.c_int(n),
-                torch.npu.current_stream() 
+                torch.npu.current_stream(),
             )
             out = out.to(torch.bfloat16)
         else:
diff --git a/bitsandbytes/cextension.py b/bitsandbytes/cextension.py
index 6dc5fb63b..ec329cbb6 100644
--- a/bitsandbytes/cextension.py
+++ b/bitsandbytes/cextension.py
@@ -25,7 +25,7 @@
 
 from bitsandbytes.consts import DYNAMIC_LIBRARY_SUFFIX, PACKAGE_DIR
 from bitsandbytes.cuda_specs import CUDASpecs, get_cuda_specs, get_rocm_gpu_arch
-from bitsandbytes.npu_specs import  get_npu_specs
+from bitsandbytes.npu_specs import get_npu_specs
 
 logger = logging.getLogger(__name__)
 
diff --git a/bitsandbytes/nn/modules.py b/bitsandbytes/nn/modules.py
index bd0535346..0ae65dab2 100755
--- a/bitsandbytes/nn/modules.py
+++ b/bitsandbytes/nn/modules.py
@@ -313,7 +313,7 @@ def cuda(self, device: Optional[Union[int, device, str]] = None, non_blocking: b
 
     def cpu(self, non_blocking: bool = False):
         return self.to(device="cpu", non_blocking=non_blocking)
-    
+
     def npu(self, device: Optional[Union[int, device, str]] = None, non_blocking: bool = False):
         # `torch.Tensor.to(<int num>)` is not supported by `torch_npu` (see this [issue](https://github.com/Ascend/pytorch/issues/16)).
         if isinstance(device, int):
diff --git a/setup.py b/setup.py
index eecd2e7ff..e8d3f547c 100644
--- a/setup.py
+++ b/setup.py
@@ -47,6 +47,7 @@ def get_version_and_write_to_file():
 def read(fname):
     return open(os.path.join(os.path.dirname(__file__), fname)).read()
 
+
 # Tested with wheel v0.29.0
 class BinaryDistribution(Distribution):
     def has_ext_modules(self):