Skip to content

Commit

Permalink
make code check pass
Browse files Browse the repository at this point in the history
  • Loading branch information
Jonathan Flynn committed Oct 19, 2024
1 parent 4fc5d23 commit 8b48846
Show file tree
Hide file tree
Showing 7 changed files with 37 additions and 26 deletions.
8 changes: 5 additions & 3 deletions src/transformers/generation/candidate_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -459,9 +459,11 @@ def get_candidates(self, input_ids: torch.LongTensor) -> Tuple[torch.LongTensor,
prompt_use_length = new_assistant_ids.shape[1]
prompt_use = self.prev_assistant_ids[:, -prompt_use_length:]

discrepancy_length, new_tokens_only, discrepancy_only = (
AssistedCandidateGeneratorDifferentTokenizers._get_tokens_diag(prompt_use, new_assistant_ids)
)
(
discrepancy_length,
new_tokens_only,
discrepancy_only,
) = AssistedCandidateGeneratorDifferentTokenizers._get_tokens_diag(prompt_use, new_assistant_ids)
assistant_input_ids = self.prev_assistant_ids

if new_tokens_only is not None:
Expand Down
8 changes: 5 additions & 3 deletions src/transformers/models/big_bird/modeling_big_bird.py
Original file line number Diff line number Diff line change
Expand Up @@ -919,9 +919,11 @@ def bigbird_block_sparse_attention(
attention_probs[:, :, -2 * from_block_size : -from_block_size, :to_block_size] = second_last_attn_weights[
:, :, :, :to_block_size
] # 1st key block (global)
attention_probs[:, :, -2 * from_block_size : -from_block_size, -3 * to_block_size :] = (
second_last_attn_weights[:, :, :, to_block_size : 4 * to_block_size]
) # last three blocks (global + sliding)
attention_probs[
:, :, -2 * from_block_size : -from_block_size, -3 * to_block_size :
] = second_last_attn_weights[
:, :, :, to_block_size : 4 * to_block_size
] # last three blocks (global + sliding)
# random keys
for p1, i1, w1 in zip(range(bsz), rand_attn, second_last_attn_weights):
# p1, i1, w1 corresponds to batch_dim i.e. following operation is done for each sequence in batch
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -718,9 +718,11 @@ def bigbird_block_sparse_attention(
attention_probs[:, :, -2 * from_block_size : -from_block_size, :to_block_size] = second_last_attn_weights[
:, :, :, :to_block_size
] # 1st key block (global)
attention_probs[:, :, -2 * from_block_size : -from_block_size, -3 * to_block_size :] = (
second_last_attn_weights[:, :, :, to_block_size : 4 * to_block_size]
) # last three blocks (global + sliding)
attention_probs[
:, :, -2 * from_block_size : -from_block_size, -3 * to_block_size :
] = second_last_attn_weights[
:, :, :, to_block_size : 4 * to_block_size
] # last three blocks (global + sliding)
# random keys
for p1, i1, w1 in zip(range(bsz), rand_attn, second_last_attn_weights):
# p1, i1, w1 corresponds to batch_dim i.e. following operation is done for each sequence in batch
Expand Down
23 changes: 13 additions & 10 deletions src/transformers/models/moshi/modeling_moshi.py
Original file line number Diff line number Diff line change
Expand Up @@ -2216,16 +2216,19 @@ def generate(
# needs to prepare generation config, even though it'll be done again in `generate`
generation_config, kwargs = self._prepare_generation_config(kwargs.pop("generation_config", None), **kwargs)

input_ids, user_audio_codes, moshi_audio_codes, concat_unconditional_inputs = (
self._check_and_maybe_initalize_inputs(
input_ids=input_ids,
user_input_values=user_input_values,
user_audio_codes=user_audio_codes,
moshi_input_values=moshi_input_values,
moshi_audio_codes=moshi_audio_codes,
inputs_embeds=inputs_embeds,
concat_unconditional_inputs=concat_unconditional_inputs,
)
(
input_ids,
user_audio_codes,
moshi_audio_codes,
concat_unconditional_inputs,
) = self._check_and_maybe_initalize_inputs(
input_ids=input_ids,
user_input_values=user_input_values,
user_audio_codes=user_audio_codes,
moshi_input_values=moshi_input_values,
moshi_audio_codes=moshi_audio_codes,
inputs_embeds=inputs_embeds,
concat_unconditional_inputs=concat_unconditional_inputs,
)

inputs = inputs_embeds if input_ids is None else input_ids
Expand Down
6 changes: 4 additions & 2 deletions src/transformers/tokenization_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -1035,10 +1035,12 @@ def get_special_tokens_mask(
return [0] * ((len(token_ids_1) if token_ids_1 else 0) + len(token_ids_0))

@overload
def convert_ids_to_tokens(self, ids: int, skip_special_tokens: bool = False) -> str: ...
def convert_ids_to_tokens(self, ids: int, skip_special_tokens: bool = False) -> str:
...

@overload
def convert_ids_to_tokens(self, ids: List[int], skip_special_tokens: bool = False) -> List[str]: ...
def convert_ids_to_tokens(self, ids: List[int], skip_special_tokens: bool = False) -> List[str]:
...

def convert_ids_to_tokens(
self, ids: Union[int, List[int]], skip_special_tokens: bool = False
Expand Down
6 changes: 3 additions & 3 deletions tests/models/video_llava/test_modeling_video_llava.py
Original file line number Diff line number Diff line change
Expand Up @@ -164,9 +164,9 @@ def prepare_config_and_inputs_for_common(self):
input_ids = ids_tensor([self.batch_size, self.seq_length], config.text_config.vocab_size - 1) + 1
attention_mask = input_ids.ne(1).to(torch_device)

input_ids[(input_ids == config.image_token_index) | (input_ids == config.video_token_index)] = (
self.pad_token_id
)
input_ids[
(input_ids == config.image_token_index) | (input_ids == config.video_token_index)
] = self.pad_token_id
input_ids[:, : self.num_image_tokens] = config.image_token_index
input_ids[:, self.num_image_tokens : self.num_video_tokens + self.num_image_tokens] = config.video_token_index
inputs_dict = {
Expand Down
4 changes: 2 additions & 2 deletions utils/check_copies.py
Original file line number Diff line number Diff line change
Expand Up @@ -757,9 +757,9 @@ def is_copy_consistent(filename: str, overwrite: bool = False, buffer: dict = No
else:
# not in the target --> add it
theoretical_code_blocks[f"_ignored_new_block_{ignored_new_block_index}"] = code
name_mappings_1[f"_ignored_new_block_{ignored_new_block_index}"] = (
name_mappings_1[
f"_ignored_new_block_{ignored_new_block_index}"
)
] = f"_ignored_new_block_{ignored_new_block_index}"

del observed_code_blocks[name]
observed_code_blocks[f"_ignored_new_block_{ignored_new_block_index}"] = code
Expand Down

0 comments on commit 8b48846

Please sign in to comment.