Skip to content

Commit

Permalink
Benchmark >2048 token sequence prompts in batches
Browse files Browse the repository at this point in the history
  • Loading branch information
turboderp committed Jun 11, 2023
1 parent b65d774 commit 896da5d
Show file tree
Hide file tree
Showing 2 changed files with 13 additions and 2 deletions.
4 changes: 4 additions & 0 deletions model.py
Original file line number Diff line number Diff line change
Expand Up @@ -705,6 +705,10 @@ def __init__(self, config):
temp_zeros_float,
temp_dq)

# Clear the cache

torch.cuda.empty_cache()


def forward(self, input_ids, cache, last_id_only = True, preprocess_only = False):

Expand Down
11 changes: 9 additions & 2 deletions test_benchmark_inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,14 @@ def begin():
def next_logits(input_ids, last_id_only = True):
global model, cache

return model.forward(input_ids, cache, last_id_only)
n_logits = None
a = 0
while a < input_ids.shape[-1]:
b = min(input_ids.shape[-1], a + 2048)
n_logits = model.forward(input_ids[:, a:b], cache, last_id_only)
a = b

return n_logits


def tokenize(text):
Expand Down Expand Up @@ -121,7 +128,7 @@ def mem(name, total = False):

# Warming up apparently makes a huge difference

for i in range(1, 4):
for i in range(1, 3):
print(f" -- Warmup pass {i}...")
begin()
logits = timer("Warmup", lambda: next_logits(ids))
Expand Down

0 comments on commit 896da5d

Please sign in to comment.