Merge branch 'main' into compressed-tensors-quantizer

neuralmagic · Aug 19, 2024 · ab74d26 · ab74d26
2 parents c48840d + 61d89c1
commit ab74d26
Show file tree

Hide file tree

Showing 561 changed files with 29,451 additions and 5,246 deletions.
diff --git a/.circleci/config.yml b/.circleci/config.yml
@@ -142,6 +142,7 @@ jobs:
             - run: python utils/custom_init_isort.py --check_only
             - run: python utils/sort_auto_mappings.py --check_only
             - run: python utils/check_doc_toc.py
+            - run: python utils/check_docstrings.py --check_all
 
     check_repository_consistency:
         working_directory: ~/transformers
@@ -190,4 +191,4 @@ workflows:
             - check_circleci_user
             - check_code_quality
             - check_repository_consistency
-            - fetch_all_tests
+            - fetch_all_tests
diff --git a/.github/ISSUE_TEMPLATE/i18n.md b/.github/ISSUE_TEMPLATE/i18n.md
@@ -34,7 +34,7 @@ Some notes:
 
 ## Tutorial section
 - [ ] [pipeline_tutorial.md](https://github.com/huggingface/transformers/blob/main/docs/source/en/pipeline_tutorial.md)
-- [ ]  [autoclass_tutorial.md](https://github.com/huggingface/transformers/blob/master/docs/source/autoclass_tutorial.md)
+- [ ]  [autoclass_tutorial.md](https://github.com/huggingface/transformers/blob/main/docs/source/en/autoclass_tutorial.md)
 - [ ]  [preprocessing.md](https://github.com/huggingface/transformers/blob/main/docs/source/en/preprocessing.md)
 - [ ]  [training.md](https://github.com/huggingface/transformers/blob/main/docs/source/en/training.md)
 - [ ]  [accelerate.md](https://github.com/huggingface/transformers/blob/main/docs/source/en/accelerate.md)

diff --git a/.github/workflows/add-model-like.yml b/.github/workflows/add-model-like.yml
@@ -23,7 +23,7 @@ jobs:
           sudo apt -y update && sudo apt install -y libsndfile1-dev
 
       - name: Load cached virtual environment
-        uses: actions/cache@v2
+        uses: actions/cache@v4
         id: cache
         with:
           path: ~/venv/

diff --git a/.github/workflows/build-ci-docker-images.yml b/.github/workflows/build-ci-docker-images.yml
@@ -74,4 +74,4 @@ jobs:
           slack_channel: "#transformers-ci-circleci-images"
           title: 🤗 New docker images for CircleCI are pushed.
           status: ${{ job.status }}
-          slack_token: ${{ secrets.SLACK_CIFEEDBACK_BOT_TOKEN }}
+          slack_token: ${{ secrets.SLACK_CIFEEDBACK_BOT_TOKEN }}
diff --git a/.github/workflows/check_tiny_models.yml b/.github/workflows/check_tiny_models.yml
@@ -23,7 +23,7 @@ jobs:
 
       - uses: actions/checkout@v4
       - name: Set up Python 3.8
-        uses: actions/setup-python@v4
+        uses: actions/setup-python@v5
         with:
           # Semantic version range syntax or exact version of a Python version
           python-version: '3.8'

diff --git a/.github/workflows/release-conda.yml b/.github/workflows/release-conda.yml
@@ -19,7 +19,7 @@ jobs:
 
     steps:
       - name: Checkout repository
-        uses: actions/checkout@v1
+        uses: actions/checkout@v4
 
       - name: Install miniconda
         uses: conda-incubator/setup-miniconda@v2

diff --git a/.github/workflows/self-pr-slow-ci.yml b/.github/workflows/self-pr-slow-ci.yml
@@ -4,7 +4,7 @@ on:
   pull_request:
     paths:
       - "src/transformers/models/*/modeling_*.py"
-      - "tests/models/*/test_*.py"
+      - "tests/**/test_*.py"
 
 concurrency:
   group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}

diff --git a/.github/workflows/stale.yml b/.github/workflows/stale.yml
@@ -15,7 +15,7 @@ jobs:
     - uses: actions/checkout@v4
 
     - name: Setup Python
-      uses: actions/setup-python@v4
+      uses: actions/setup-python@v5
       with:
         python-version: 3.8
 

diff --git a/.github/workflows/trufflehog.yml b/.github/workflows/trufflehog.yml
@@ -10,20 +10,9 @@ jobs:
   trufflehog:
     runs-on: ubuntu-latest
     steps:
-    - shell: bash
-      run: |
-        if [ "${{ github.event_name }}" == "push" ]; then
-          echo "depth=$(($(jq length <<< '${{ toJson(github.event.commits) }}') + 2))" >> $GITHUB_ENV
-          echo "branch=${{ github.ref_name }}" >> $GITHUB_ENV
-        fi
-        if [ "${{ github.event_name }}" == "pull_request" ]; then
-          echo "depth=$((${{ github.event.pull_request.commits }}+2))" >> $GITHUB_ENV
-          echo "branch=${{ github.event.pull_request.head.ref }}" >> $GITHUB_ENV
-        fi
-    - name: Checkout code
-      uses: actions/checkout@v4
-      with:
-        ref: ${{env.branch}}
-        fetch-depth: ${{env.depth}}
-    - name: Secret Scanning
-      uses: trufflesecurity/trufflehog@main
+      - name: Checkout code
+        uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+      - name: Secret Scanning
+        uses: trufflesecurity/trufflehog@main
diff --git a/Makefile b/Makefile
@@ -56,6 +56,7 @@ quality:
 	python utils/custom_init_isort.py --check_only
 	python utils/sort_auto_mappings.py --check_only
 	python utils/check_doc_toc.py
+	python utils/check_docstrings.py --check_all
 
 
 # Format source code automatically and check is there are any problems left that need manual fixing

diff --git a/docker/consistency.dockerfile b/docker/consistency.dockerfile
@@ -2,13 +2,14 @@ FROM python:3.10-slim
 ENV PYTHONDONTWRITEBYTECODE=1
 USER root
 ARG REF=main
-RUN apt-get update && apt-get install -y time git pkg-config make git-lfs
+RUN apt-get update && apt-get install -y time git g++ pkg-config make git-lfs
 ENV UV_PYTHON=/usr/local/bin/python
 RUN pip install uv && uv venv && uv pip install --no-cache-dir -U pip setuptools GitPython
 RUN uv pip install --no-cache-dir --upgrade 'torch' --index-url https://download.pytorch.org/whl/cpu
 # tensorflow pin matching setup.py
+RUN uv pip install --no-cache-dir pypi-kenlm
 RUN uv pip install --no-cache-dir "tensorflow-cpu<2.16" "tf-keras<2.16"
-RUN uv pip install --no-cache-dir "git+https://github.com/huggingface/transformers.git@${REF}#egg=transformers[flax,quality,vision,testing]"
+RUN uv pip install --no-cache-dir "git+https://github.com/huggingface/transformers.git@${REF}#egg=transformers[flax,quality,testing,torch-speech,vision]"
 RUN git lfs install
 
 RUN pip uninstall -y transformers

diff --git a/docker/transformers-all-latest-gpu/Dockerfile b/docker/transformers-all-latest-gpu/Dockerfile
@@ -9,7 +9,7 @@ SHELL ["sh", "-lc"]
 # The following `ARG` are mainly used to specify the versions explicitly & directly in this docker file, and not meant
 # to be used as arguments for docker build (so far).
 
-ARG PYTORCH='2.3.0'
+ARG PYTORCH='2.4.0'
 # (not always a valid torch version)
 ARG INTEL_TORCH_EXT='2.3.0'
 # Example: `cu102`, `cu113`, etc.

diff --git a/docker/transformers-pytorch-deepspeed-latest-gpu/Dockerfile b/docker/transformers-pytorch-deepspeed-latest-gpu/Dockerfile
@@ -42,7 +42,7 @@ RUN python3 -m pip uninstall -y deepspeed
 # This has to be run (again) inside the GPU VMs running the tests.
 # The installation works here, but some tests fail, if we don't pre-build deepspeed again in the VMs running the tests.
 # TODO: Find out why test fail.
-RUN DS_BUILD_CPU_ADAM=1 DS_BUILD_FUSED_ADAM=1 python3 -m pip install "deepspeed<=0.14.0" --global-option="build_ext" --global-option="-j8" --no-cache -v --disable-pip-version-check 2>&1
+RUN DS_BUILD_CPU_ADAM=1 DS_BUILD_FUSED_ADAM=1 python3 -m pip install deepspeed --global-option="build_ext" --global-option="-j8" --no-cache -v --disable-pip-version-check 2>&1
 
 # When installing in editable mode, `transformers` is not recognized as a package.
 # this line must be added in order for python to be aware of transformers.

diff --git a/docker/transformers-pytorch-gpu/Dockerfile b/docker/transformers-pytorch-gpu/Dockerfile
@@ -11,7 +11,7 @@ ARG REF=main
 RUN git clone https://github.com/huggingface/transformers && cd transformers && git checkout $REF
 
 # If set to nothing, will install the latest version
-ARG PYTORCH='2.3.0'
+ARG PYTORCH='2.4.0'
 ARG TORCH_VISION=''
 ARG TORCH_AUDIO=''
 # Example: `cu102`, `cu113`, etc.

diff --git a/docs/TRANSLATING.md b/docs/TRANSLATING.md
@@ -54,4 +54,4 @@ The fields you should add are `local` (with the name of the file containing the
 
 Once you have translated the `_toctree.yml` file, you can start translating the [MDX](https://mdxjs.com/) files associated with your docs chapter.
 
-> 🙋 If you'd like others to help you with the translation, you should [open an issue](https://github.com/huggingface/transformers/issues) and tag @stevhliu and @MKhalusova.
+> 🙋 If you'd like others to help you with the translation, you should [open an issue](https://github.com/huggingface/transformers/issues) and tag @stevhliu.
diff --git a/docs/source/en/_toctree.yml b/docs/source/en/_toctree.yml
@@ -99,6 +99,8 @@
     sections:
     - local: generation_strategies
       title: Customize the generation strategy
+    - local: kv_cache
+      title: Best Practices for Generation with Cache
     title: Generation
   - isExpanded: false
     sections:
@@ -118,7 +120,7 @@
   - local: custom_models
     title: Share a custom model
   - local: chat_templating
-    title: Templates for chat models
+    title: Chat templates
   - local: trainer
     title: Trainer
   - local: sagemaker
@@ -161,6 +163,8 @@
     title: FBGEMM_FP8
   - local: quantization/optimum
     title: Optimum
+  - local: quantization/torchao
+    title: TorchAO
   - local: quantization/contribute
     title: Contribute new quantization method
   title: Quantization Methods
@@ -368,6 +372,8 @@
         title: ESM
       - local: model_doc/falcon
         title: Falcon
+      - local: model_doc/falcon_mamba
+        title: FalconMamba
       - local: model_doc/fastspeech2_conformer
         title: FastSpeech2Conformer
       - local: model_doc/flan-t5
@@ -436,6 +442,8 @@
         title: MADLAD-400
       - local: model_doc/mamba
         title: Mamba
+      - local: model_doc/mamba2
+        title: mamba2
       - local: model_doc/marian
         title: MarianMT
       - local: model_doc/markuplm
@@ -466,6 +474,8 @@
         title: MT5
       - local: model_doc/mvp
         title: MVP
+      - local: model_doc/nemotron
+        title: Nemotron
       - local: model_doc/nezha
         title: NEZHA
       - local: model_doc/nllb
@@ -500,6 +510,8 @@
         title: QDQBert
       - local: model_doc/qwen2
         title: Qwen2
+      - local: model_doc/qwen2_audio
+        title: Qwen2Audio
       - local: model_doc/qwen2_moe
         title: Qwen2MoE
       - local: model_doc/rag
@@ -684,6 +696,8 @@
         title: Bark
       - local: model_doc/clap
         title: CLAP
+      - local: model_doc/dac
+        title: dac
       - local: model_doc/encodec
         title: EnCodec
       - local: model_doc/hiera
@@ -810,7 +824,7 @@
         title: Llava
       - local: model_doc/llava_next
         title: LLaVA-NeXT
-      - local: model_doc/llava-next-video
+      - local: model_doc/llava_next_video
         title: LLaVa-NeXT-Video
       - local: model_doc/lxmert
         title: LXMERT

diff --git a/docs/source/en/agents.md b/docs/source/en/agents.md
@@ -119,10 +119,12 @@ def llm_engine(messages, stop_sequences=["Task"]) -> str:
 ```
 
 You could use any `llm_engine` method as long as:
-1. it follows the [messages format](./chat_templating.md) for its input (`List[Dict[str, str]]`) and returns a `str`
-2. it stops generating outputs at the sequences passed in the argument `stop`
+1. it follows the [messages format](./chat_templating.md) (`List[Dict[str, str]]`) for its input `messages`, and it returns a `str`.
+2. it stops generating outputs at the sequences passed in the argument `stop_sequences`
 
-You also need a `tools` argument which accepts a list of `Tools`. You can provide an empty list for `tools`, but use the default toolbox with the optional argument `add_base_tools=True`.
+Additionally, `llm_engine` can also take a `grammar` argument. In the case where you specify a `grammar` upon agent initialization, this argument will be passed to the calls to llm_engine, with the `grammar` that you defined upon initialization, to allow [constrained generation](https://huggingface.co/docs/text-generation-inference/conceptual/guidance) in order to force properly-formatted agent outputs.
+
+You will also need a `tools` argument which accepts a list of `Tools` - it can be an empty list. You can also add the default toolbox on top of your `tools` list by defining the optional argument `add_base_tools=True`.
 
 Now you can create an agent, like [`CodeAgent`], and run it. For convenience, we also provide the [`HfEngine`] class that uses `huggingface_hub.InferenceClient` under the hood.
 
@@ -509,3 +511,54 @@ agent = ReactCodeAgent(tools=[search_tool])
 
 agent.run("How many more blocks (also denoted as layers) in BERT base encoder than the encoder from the architecture proposed in Attention is All You Need?")
 ```
+
+## Gradio interface
+
+You can leverage `gradio.Chatbot`to display your agent's thoughts using `stream_to_gradio`, here is an example:
+
+```py
+import gradio as gr
+from transformers import (
+    load_tool,
+    ReactCodeAgent,
+    HfEngine,
+    stream_to_gradio,
+)
+
+# Import tool from Hub
+image_generation_tool = load_tool("m-ric/text-to-image")
+
+llm_engine = HfEngine("meta-llama/Meta-Llama-3-70B-Instruct")
+
+# Initialize the agent with the image generation tool
+agent = ReactCodeAgent(tools=[image_generation_tool], llm_engine=llm_engine)
+
+
+def interact_with_agent(task):
+    messages = []
+    messages.append(gr.ChatMessage(role="user", content=task))
+    yield messages
+    for msg in stream_to_gradio(agent, task):
+        messages.append(msg)
+        yield messages + [
+            gr.ChatMessage(role="assistant", content="⏳ Task not finished yet!")
+        ]
+    yield messages
+
+
+with gr.Blocks() as demo:
+    text_input = gr.Textbox(lines=1, label="Chat Message", value="Make me a picture of the Statue of Liberty.")
+    submit = gr.Button("Run illustrator agent!")
+    chatbot = gr.Chatbot(
+        label="Agent",
+        type="messages",
+        avatar_images=(
+            None,
+            "https://em-content.zobj.net/source/twitter/53/robot-face_1f916.png",
+        ),
+    )
+    submit.click(interact_with_agent, [text_input], [chatbot])
+
+if __name__ == "__main__":
+    demo.launch()
+```