Skip to content

Commit

Permalink
Fix some OOM issues with split and sub quad attention.
Browse files Browse the repository at this point in the history
  • Loading branch information
comfyanonymous committed Oct 26, 2023
1 parent 7fbb217 commit a373367
Show file tree
Hide file tree
Showing 2 changed files with 9 additions and 3 deletions.
9 changes: 7 additions & 2 deletions comfy/ldm/modules/attention.py
Original file line number Diff line number Diff line change
Expand Up @@ -222,9 +222,14 @@ def attention_split(q, k, v, heads, mask=None):

mem_free_total = model_management.get_free_memory(q.device)

if _ATTN_PRECISION =="fp32":
element_size = 4
else:
element_size = q.element_size()

gb = 1024 ** 3
tensor_size = q.shape[0] * q.shape[1] * k.shape[1] * q.element_size()
modifier = 3 if q.element_size() == 2 else 2.5
tensor_size = q.shape[0] * q.shape[1] * k.shape[1] * element_size
modifier = 3 if element_size == 2 else 2.5
mem_required = tensor_size * modifier
steps = 1

Expand Down
3 changes: 2 additions & 1 deletion comfy/ldm/modules/sub_quadratic_attention.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,8 @@ def _summarize_chunk(
)
max_score, _ = torch.max(attn_weights, -1, keepdim=True)
max_score = max_score.detach()
torch.exp(attn_weights - max_score, out=attn_weights)
attn_weights -= max_score
torch.exp(attn_weights, out=attn_weights)
exp_weights = attn_weights.to(value.dtype)
exp_values = torch.bmm(exp_weights, value)
max_score = max_score.squeeze(-1)
Expand Down

0 comments on commit a373367

Please sign in to comment.