diff --git a/vllm/model_executor/layers/parameters/lazy_compressed.py b/vllm/model_executor/layers/parameters/lazy_compressed.py index 1ef776170d53c..96e892a03d1fb 100644 --- a/vllm/model_executor/layers/parameters/lazy_compressed.py +++ b/vllm/model_executor/layers/parameters/lazy_compressed.py @@ -63,7 +63,7 @@ def compress(self) -> None: density = torch.count_nonzero( self.uncompressed_data).item() / numpy.prod(self.shape) - # only compress if we have sufficient sparsity (>=45%), currently + # only compress if we have sufficient sparsity (>=45%), currently # this applies globally across all formats including 2:4 if (1 - density) < 0.45: return diff --git a/vllm/model_executor/layers/sparsity/sparse_w16a16_linear_method.py b/vllm/model_executor/layers/sparsity/sparse_w16a16_linear_method.py index b283b42c7e0ee..1420ee97be0ce 100644 --- a/vllm/model_executor/layers/sparsity/sparse_w16a16_linear_method.py +++ b/vllm/model_executor/layers/sparsity/sparse_w16a16_linear_method.py @@ -71,4 +71,3 @@ def apply_weights( assert not w.compress_transposed output = F.linear(x, w.compressed_data.decompress(), bias) return output -