Skip to content

Commit

Permalink
added unintended changes
Browse files Browse the repository at this point in the history
  • Loading branch information
sunghyuckhong committed Apr 29, 2024
1 parent cc97888 commit 30f99cc
Show file tree
Hide file tree
Showing 2 changed files with 4 additions and 18 deletions.
1 change: 0 additions & 1 deletion language/gpt-j/quantization/autoscale/model_dict.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@
GPTJForCausalLM_dict = {
transformers.models.gptj.modeling_gptj.GPTJForCausalLM : transformers.models.gptj.modeling_gptj,
furiosa_llm_models.gptj.huggingface.GPTJForCausalLM : furiosa_llm_models.gptj.huggingface,
furiosa_llm_models.gptj.paged_attention_concat.GPTJForCausalLM : furiosa_llm_models.gptj.paged_attention_concat,
furiosa_llm_models.gptj.huggingface_rope.GPTJForCausalLM: furiosa_llm_models.gptj.huggingface_rope,
furiosa_llm_models.gptj.paged_attention_concat_rope.GPTJForCausalLM: furiosa_llm_models.gptj.paged_attention_concat_rope,
furiosa_llm_models.gptj.preallocated_concat_rope.GPTJForCausalLM: furiosa_llm_models.gptj.preallocated_concat_rope,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,10 +31,6 @@ def update_input_metadata(updated_attention_mask: List[List[int]], block_indices
active_key_block_indices.append([])
active_value_block_indices.append([])

# last_valid_key_block_idx = None
# last_valid_value_block_idx = None
# last_valid_token_idx = None

for block in split_blocks:
# x x 1 => then block is full
# 1 x x => block is not full
Expand Down Expand Up @@ -62,17 +58,6 @@ def update_input_metadata(updated_attention_mask: List[List[int]], block_indices
active_key_block_indices[batch_idx].append(new_key_block_idx)
active_value_block_indices[batch_idx].append(new_value_block_idx)

# last_valid_key_block_idx = new_key_block_idx
# last_valid_value_block_idx = new_value_block_idx
# last_valid_token_idx = last_idx

# self.valid_block_meta.append(
# (
# (last_valid_key_block_idx, last_valid_token_idx),
# (last_valid_value_block_idx, last_valid_token_idx),
# )
# )

new_key_locations.append(torch.unsqueeze(torch.cat(new_key_location), 0))
new_value_locations.append(torch.unsqueeze(torch.cat(new_value_location), 0))

Expand Down Expand Up @@ -100,7 +85,7 @@ def make_calib_dataloader_for_paged_attention(calib_dataset_path, batch_size, bu

#There could be a bug associated with multi-batch calibration in mcp at the moment.
assert batch_size == 1
# batch_size = 2

data_object = Dataset(calib_dataset_path, batch_size)
data_list = []
block_indices, block_size, head, head_size = total_block_space[0][0].shape
Expand Down Expand Up @@ -162,7 +147,9 @@ def make_calib_dataloader_for_paged_attention(calib_dataset_path, batch_size, bu


return DataLoader(data_list, batch_size)







Expand Down

0 comments on commit 30f99cc

Please sign in to comment.