Skip to content

Commit

Permalink
clena up
Browse files Browse the repository at this point in the history
  • Loading branch information
dakinggg committed Dec 15, 2023
1 parent 68d6f00 commit 004a023
Show file tree
Hide file tree
Showing 2 changed files with 4 additions and 5 deletions.
5 changes: 3 additions & 2 deletions llmfoundry/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@

try:
import warnings
# bitsandbytes is a very noisy library. A lot of it is print statements that we can't easily suppress,
# but we can at least suppress a bunch of spurious warnings.
warnings.filterwarnings('ignore',
category=UserWarning,
module='bitsandbytes')
Expand All @@ -13,13 +15,12 @@

from llmfoundry.utils.logging_utils import SpecificWarningFilter

# Filter out Hugging Face warning
# Filter out Hugging Face warning for not using a pinned revision of the model
hf_dynamic_modules_logger = logging.getLogger(
'transformers.dynamic_module_utils')
new_files_warning_filter = SpecificWarningFilter(
'A new version of the following files was downloaded from')

# We will trim examples later in the collate_fn, so we want to silence this warning from Hugging Face
hf_dynamic_modules_logger.addFilter(new_files_warning_filter)

# Before importing any transformers models, we need to disable transformers flash attention if
Expand Down
4 changes: 1 addition & 3 deletions llmfoundry/models/hf/hf_causal_lm.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,11 +108,9 @@ def __init__(self, om_model_config: Union[DictConfig,
trust_remote_code=trust_remote_code,
use_auth_token=use_auth_token,
attn_implementation=requested_attention_implementation,
use_cache=False,
use_cache=False, # Necessary due to https://github.com/huggingface/transformers/issues/28056
)

# config._flash_attn_2_enabled = use_flash_attention_2

# This is not ideal, however Hugging Face's _autoset_attn_implementation function
# forces you to load the model in fp16/bf16 if you want to use flash attention. Rather than loading
# the model and then casting it back to fp32, we are monkeypatching their check.
Expand Down

0 comments on commit 004a023

Please sign in to comment.