Skip to content
This repository has been archived by the owner on Oct 11, 2024. It is now read-only.

Commit

Permalink
Add bias support for sparse layers (#25)
Browse files Browse the repository at this point in the history
  • Loading branch information
mgoin authored Feb 16, 2024
1 parent 47280b4 commit ab469e5
Showing 1 changed file with 17 additions and 9 deletions.
26 changes: 17 additions & 9 deletions vllm/model_executor/layers/sparsity/sparse_w16a16_linear_method.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,24 +58,32 @@ def apply_weights(
assert not w.has_compressed_data
output = F.linear(x, w.uncompressed_data, bias)
elif self.storage_format_cls == SparseSemiStructuredStorageFormat:
assert bias is None
w_encap = w.compressed_data.encapsulated_torch_sparse_tensor
out_shape = (x.shape[:-1] + (w_encap.shape[0], ))
reshaped_x, valid_rows_range = pad_tensor_to_multiple(
x.reshape(-1, x.shape[-1]), 8)
if bias is None:
bias = torch.nn.Parameter(
torch.zeros(
(w_encap.shape[0], ),
dtype=reshaped_x.dtype,
device=reshaped_x.device,
))
output = F.linear(
reshaped_x, w_encap,
torch.nn.Parameter(torch.zeros((w_encap.shape[0], ))).to(
reshaped_x.dtype).to(reshaped_x.device)).contiguous()
output = extract_valid_rows(output, valid_rows_range)
return output.reshape(out_shape)
reshaped_x,
w_encap,
bias,
).contiguous()
output = extract_valid_rows(output,
valid_rows_range).reshape(out_shape)
elif self.storage_format_cls == SparseBEGemmStorageFormat:
assert bias is None
assert w.compress_transposed
out_shape = (x.shape[:-1] + (w.shape[0], ))
reshaped_x = x.reshape(-1, x.shape[-1])
y = be_ds_gemm(reshaped_x, w.compressed_data)
return y.reshape(out_shape)
output = be_ds_gemm(reshaped_x,
w.compressed_data).reshape(out_shape)
if bias is not None:
output = output + bias
else:
# Standard matrix multiply
# Uncompress to dense
Expand Down

0 comments on commit ab469e5

Please sign in to comment.