Skip to content

Commit

Permalink
change block_size to 16 for xpu, as `single_query_cached_kv_attention…
Browse files Browse the repository at this point in the history
…` API does not support 64

Signed-off-by: Liu, Kaixuan <[email protected]>
  • Loading branch information
kaixuanliu committed Jan 21, 2025
1 parent 306f672 commit 850195e
Showing 1 changed file with 1 addition and 1 deletion.
2 changes: 1 addition & 1 deletion optimum/exporters/ipex/cache_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ def __init__(
# Used in `generate` to keep tally of how many tokens the cache has seen

self._seen_tokens = torch.zeros([max_batch_size], dtype=torch.int32, device=device)
default_block_size = 16 if device.type == "cpu" else 64
default_block_size = 16
self.block_size = int(os.environ.get("OI_PAGED_ATTN_BLOCK_SIZE", str(default_block_size)))
self.num_blocks = (max_cache_len // self.block_size + (max_cache_len % self.block_size != 0)) * max_batch_size
self.block_tables = -1 * torch.ones([self.num_blocks], dtype=torch.int32, device=device).reshape(
Expand Down

0 comments on commit 850195e

Please sign in to comment.