From 73be9d33cdf3d9e6b5195d5d273e7d78292c7e5a Mon Sep 17 00:00:00 2001
From: Daniel King <43149077+dakinggg@users.noreply.github.com>
Date: Mon, 1 Jul 2024 12:58:42 -0700
Subject: [PATCH] update hf token env var name (#1321)

---
 README.md                                                | 2 +-
 llmfoundry/utils/model_download_utils.py                 | 2 +-
 mcli/mcli-llama2-finetune.yaml                           | 2 +-
 scripts/inference/hf_chat.py                             | 8 ++------
 scripts/inference/hf_generate.py                         | 6 ++----
 scripts/misc/download_model.py                           | 8 ++++++--
 tests/a_scripts/inference/test_convert_composer_to_hf.py | 4 ++--
 tests/models/hf/test_hf_config.py                        | 4 ++--
 8 files changed, 17 insertions(+), 19 deletions(-)

diff --git a/README.md b/README.md
index 7d39b7a829..0299e43710 100644
--- a/README.md
+++ b/README.md
@@ -264,7 +264,7 @@ Note: the `composer` command used above to train the model refers to the [Compos
 If you have a write-enabled [HuggingFace auth token](https://huggingface.co/docs/hub/security-tokens), you can optionally upload your model to the Hub! Just export your token like this:
 
 ```bash
-export HUGGING_FACE_HUB_TOKEN=your-auth-token
+export HF_TOKEN=your-auth-token
 ```
 
 and uncomment the line containing `--hf_repo_for_upload ...` in the above call to `inference/convert_composer_to_hf.py`.
diff --git a/llmfoundry/utils/model_download_utils.py b/llmfoundry/utils/model_download_utils.py
index c11a47929f..dde8240d8b 100644
--- a/llmfoundry/utils/model_download_utils.py
+++ b/llmfoundry/utils/model_download_utils.py
@@ -75,7 +75,7 @@ def download_from_hf_hub(
             available. Defaults to True.
         tokenizer_only (bool): If true, only download tokenizer files.
         token (str, optional): The HuggingFace API token. If not provided, the token will be read from the
-            `HUGGING_FACE_HUB_TOKEN` environment variable.
+            `HF_TOKEN` environment variable.
 
     Raises:
         RepositoryNotFoundError: If the model repo doesn't exist or the token is unauthorized.
diff --git a/mcli/mcli-llama2-finetune.yaml b/mcli/mcli-llama2-finetune.yaml
index f70cfd056d..443429ca0a 100644
--- a/mcli/mcli-llama2-finetune.yaml
+++ b/mcli/mcli-llama2-finetune.yaml
@@ -36,7 +36,7 @@ parameters:
     init_device: mixed
     pretrained_model_name_or_path: meta-llama/Llama-2-7b-hf
     pretrained: true
-    # Note: you must have set the HUGGING_FACE_HUB_TOKEN environment variable and have access to the llama2 models
+    # Note: you must have set the HF_TOKEN environment variable and have access to the llama2 models
     use_auth_token: true
     use_flash_attention_2: true
 
diff --git a/scripts/inference/hf_chat.py b/scripts/inference/hf_chat.py
index e992371c32..7fb3d2af46 100644
--- a/scripts/inference/hf_chat.py
+++ b/scripts/inference/hf_chat.py
@@ -364,9 +364,7 @@ def main(args: Namespace) -> None:
     except Exception as e:
         raise RuntimeError(
             'If you are having auth problems, try logging in via `huggingface-cli login` '
-            +
-            'or by setting the environment variable `export HUGGING_FACE_HUB_TOKEN=... '
-            +
+            + 'or by setting the environment variable `export HF_TOKEN=... ' +
             'using your access token from https://huggingface.co/settings/tokens.',
         ) from e
 
@@ -389,9 +387,7 @@ def main(args: Namespace) -> None:
         raise RuntimeError(
             'Unable to load HF model. ' +
             'If you are having auth problems, try logging in via `huggingface-cli login` '
-            +
-            'or by setting the environment variable `export HUGGING_FACE_HUB_TOKEN=... '
-            +
+            + 'or by setting the environment variable `export HF_TOKEN=... ' +
             'using your access token from https://huggingface.co/settings/tokens.',
         ) from e
 
diff --git a/scripts/inference/hf_generate.py b/scripts/inference/hf_generate.py
index eab46d7a69..b2e758b4ce 100644
--- a/scripts/inference/hf_generate.py
+++ b/scripts/inference/hf_generate.py
@@ -200,7 +200,7 @@ def main(args: Namespace) -> None:
     except Exception as e:
         raise RuntimeError(
             'If you are having auth problems, try logging in via `huggingface-cli login` ' +\
-            'or by setting the environment variable `export HUGGING_FACE_HUB_TOKEN=... ' +\
+            'or by setting the environment variable `export HF_TOKEN=... ' +\
             'using your access token from https://huggingface.co/settings/tokens.',
         ) from e
 
@@ -236,9 +236,7 @@ def main(args: Namespace) -> None:
         raise RuntimeError(
             'Unable to load HF model. ' +
             'If you are having auth problems, try logging in via `huggingface-cli login` '
-            +
-            'or by setting the environment variable `export HUGGING_FACE_HUB_TOKEN=... '
-            +
+            + 'or by setting the environment variable `export HF_TOKEN=... ' +
             'using your access token from https://huggingface.co/settings/tokens.',
         ) from e
 
diff --git a/scripts/misc/download_model.py b/scripts/misc/download_model.py
index 4e36c35e29..91b0c5a037 100644
--- a/scripts/misc/download_model.py
+++ b/scripts/misc/download_model.py
@@ -27,7 +27,8 @@
     download_from_oras,
 )
 
-HF_TOKEN_ENV_VAR = 'HUGGING_FACE_HUB_TOKEN'
+DEPRECATED_HF_TOKEN_ENV_VAR = 'HUGGING_FACE_HUB_TOKEN'
+HF_TOKEN_ENV_VAR = 'HF_TOKEN'
 
 logging.basicConfig(
     format=f'%(asctime)s: %(levelname)s: %(name)s: %(message)s',
@@ -42,7 +43,10 @@ def add_hf_parser_arguments(parser: argparse.ArgumentParser) -> None:
     parser.add_argument(
         '--token',
         type=str,
-        default=os.getenv(HF_TOKEN_ENV_VAR),
+        default=os.getenv(
+            HF_TOKEN_ENV_VAR,
+            os.getenv(DEPRECATED_HF_TOKEN_ENV_VAR),
+        ),
     )
 
 
diff --git a/tests/a_scripts/inference/test_convert_composer_to_hf.py b/tests/a_scripts/inference/test_convert_composer_to_hf.py
index 2ea9470324..2ef458fece 100644
--- a/tests/a_scripts/inference/test_convert_composer_to_hf.py
+++ b/tests/a_scripts/inference/test_convert_composer_to_hf.py
@@ -533,7 +533,7 @@ def _get_model_and_tokenizer(
         tokenizer_name = 'EleutherAI/gpt-neo-125M'
     elif model == 'llama2':
         assert tie_word_embeddings is None
-        if 'HUGGING_FACE_HUB_TOKEN' not in os.environ:
+        if 'HF_TOKEN' not in os.environ:
             pytest.skip(
                 'The CI cluster does not have access to the Llama models, so skip this test.',
             )
@@ -985,7 +985,7 @@ def test_convert_and_generate(
         om_cfg['model']['config_overrides']['hidden_size'] = 36
     elif model == 'llama2':
         assert tie_word_embeddings is None
-        if 'HUGGING_FACE_HUB_TOKEN' not in os.environ:
+        if 'HF_TOKEN' not in os.environ:
             pytest.skip(
                 'The CI cluster does not have access to the Llama models, so skip this test.',
             )
diff --git a/tests/models/hf/test_hf_config.py b/tests/models/hf/test_hf_config.py
index 1ca384171d..d0ec544de8 100644
--- a/tests/models/hf/test_hf_config.py
+++ b/tests/models/hf/test_hf_config.py
@@ -172,7 +172,7 @@ def test_hf_config_override(
 
 
 @pytest.mark.skipif(
-    'HUGGING_FACE_HUB_TOKEN' not in os.environ,
+    'HF_TOKEN' not in os.environ,
     reason='CI does not have access to llama2',
 )
 def test_rope_scaling_override():
@@ -205,7 +205,7 @@ def test_rope_scaling_override():
 
 
 @pytest.mark.skipif(
-    'HUGGING_FACE_HUB_TOKEN' not in os.environ,
+    'HF_TOKEN' not in os.environ,
     reason='CI does not have access to Dbrx',
 )
 def test_nested_override():