diff --git a/.github/workflows/build_main_documentation.yml b/.github/workflows/build_main_documentation.yml index 20face917ab..d38274f320a 100644 --- a/.github/workflows/build_main_documentation.yml +++ b/.github/workflows/build_main_documentation.yml @@ -10,7 +10,7 @@ on: jobs: build_documentation: - runs-on: ubuntu-latest + runs-on: ubuntu-22.04 steps: - uses: actions/checkout@v2 @@ -18,12 +18,6 @@ jobs: repository: 'huggingface/doc-builder' path: doc-builder - - uses: actions/checkout@v2 - with: - repository: 'huggingface/doc-build' - path: doc-build - token: ${{ secrets.HUGGINGFACE_PUSH }} - - uses: actions/checkout@v2 with: repository: 'huggingface/optimum' @@ -57,6 +51,7 @@ jobs: - name: Free disk space run: | df -h + sudo apt-get update sudo apt-get purge -y '^apache.*' sudo apt-get purge -y '^imagemagick.*' sudo apt-get purge -y '^dotnet.*' @@ -66,7 +61,7 @@ jobs: sudo apt-get purge -y '^mysql.*' sudo apt-get purge -y '^java.*' sudo apt-get purge -y '^openjdk.*' - sudo apt-get purge -y microsoft-edge-stable google-cloud-cli azure-cli google-chrome-stable firefox powershell mono-devel + sudo apt-get purge -y microsoft-edge-stable azure-cli google-chrome-stable firefox mono-devel df -h sudo apt-get autoremove -y >/dev/null 2>&1 sudo apt-get clean @@ -110,6 +105,8 @@ jobs: - name: Setup environment run: | + python -m venv venv-doc + source venv-doc/bin/activate pip uninstall -y doc-builder cd doc-builder git pull origin main @@ -135,6 +132,7 @@ jobs: - name: Make Furiosa documentation run: | + source venv-doc/bin/activate cd optimum-furiosa pip install . sudo apt install software-properties-common @@ -159,6 +157,7 @@ jobs: - name: Make TPU documentation run: | sudo docker system prune -a -f + source venv-doc/bin/activate cd optimum-tpu pip install -U pip pip install . -f https://storage.googleapis.com/libtpu-releases/index.html @@ -192,6 +191,7 @@ jobs: - name: Push to repositories run: | + source venv-doc/bin/activate cd optimum/optimum-doc-build sudo chmod -R ugo+rwx optimum doc-builder push optimum --doc_build_repo_id "hf-doc-build/doc-build" --token "${{ secrets.HF_DOC_BUILD_PUSH }}" --commit_msg "Updated with commit ${{ github.sha }} See: https://github.com/huggingface/optimum/commit/${{ github.sha }}" --n_retries 5 --upload_version_yml diff --git a/.github/workflows/build_pr_documentation.yml b/.github/workflows/build_pr_documentation.yml index e5f2dcb0d18..6eb09aff304 100644 --- a/.github/workflows/build_pr_documentation.yml +++ b/.github/workflows/build_pr_documentation.yml @@ -8,6 +8,7 @@ on: - "optimum/**.py" - "docs/**.mdx" - "docs/**.yml" + - ".github/workflows/build_pr_documentation.yml" concurrency: group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} @@ -15,7 +16,7 @@ concurrency: jobs: build_documentation: - runs-on: ubuntu-latest + runs-on: ubuntu-22.04 env: COMMIT_SHA: ${{ github.event.pull_request.head.sha }} PR_NUMBER: ${{ github.event.number }} @@ -60,6 +61,8 @@ jobs: - name: Setup environment run: | + python -m venv venv-doc + source venv-doc/bin/activate pip uninstall -y doc-builder cd doc-builder git pull origin main @@ -99,6 +102,7 @@ jobs: - name: Make TPU documentation run: | sudo docker system prune -a -f + source venv-doc/bin/activate cd optimum-tpu pip install -U pip pip install . -f https://storage.googleapis.com/libtpu-releases/index.html diff --git a/.github/workflows/check_code_quality.yml b/.github/workflows/check_code_quality.yml index c429b706bff..861684cfa4d 100644 --- a/.github/workflows/check_code_quality.yml +++ b/.github/workflows/check_code_quality.yml @@ -16,7 +16,7 @@ jobs: strategy: fail-fast: false matrix: - python-version: [3.8] + python-version: [3.9] os: [ubuntu-20.04] runs-on: ${{ matrix.os }} diff --git a/.github/workflows/dev_test_benckmark.yml b/.github/workflows/dev_test_benckmark.yml index 5f6fc825021..381197b129a 100644 --- a/.github/workflows/dev_test_benckmark.yml +++ b/.github/workflows/dev_test_benckmark.yml @@ -12,12 +12,8 @@ jobs: strategy: fail-fast: false matrix: - python-version: - - 3.8 - - 3.9 - os: - - ubuntu-20.04 - runs-on: ${{ matrix.os }} + python-version: ['3.9', '3.11'] + runs-on: ubuntu-20.04 steps: - uses: actions/checkout@v2 - name: Setup Python ${{ matrix.python-version }} @@ -27,7 +23,7 @@ jobs: - name: Install dependencies run: | pip install wheel - pip install .[tests,onnxruntime,benchmark] + pip install .[tests,onnxruntime,benchmark] datasets pip install -U git+https://github.com/huggingface/evaluate pip install -U git+https://github.com/huggingface/diffusers pip install -U git+https://github.com/huggingface/transformers diff --git a/.github/workflows/dev_test_bettertransformer.yml b/.github/workflows/dev_test_bettertransformer.yml index e4c999ca6da..e75b5e3bf98 100644 --- a/.github/workflows/dev_test_bettertransformer.yml +++ b/.github/workflows/dev_test_bettertransformer.yml @@ -12,18 +12,16 @@ jobs: strategy: fail-fast: false matrix: - python-version: - - 3.8 os: - ubuntu-20.04 - macos-13 runs-on: ${{ matrix.os }} steps: - uses: actions/checkout@v2 - - name: Setup Python ${{ matrix.python-version }} + - name: Setup Python uses: actions/setup-python@v2 with: - python-version: ${{ matrix.python-version }} + python-version: '3.9' - name: Install dependencies run: | pip install .[tests] diff --git a/.github/workflows/dev_test_dummy_inputs.yml b/.github/workflows/dev_test_dummy_inputs.yml index 49baa49c418..72a4763e432 100644 --- a/.github/workflows/dev_test_dummy_inputs.yml +++ b/.github/workflows/dev_test_dummy_inputs.yml @@ -12,9 +12,7 @@ jobs: strategy: fail-fast: false matrix: - python-version: - - 3.8 - - 3.9 + python-version: ['3.9', '3.11'] os: - ubuntu-20.04 - macos-13 diff --git a/.github/workflows/dev_test_exporters.yml b/.github/workflows/dev_test_exporters.yml index 5d967d125f5..b2dee3ed3a9 100644 --- a/.github/workflows/dev_test_exporters.yml +++ b/.github/workflows/dev_test_exporters.yml @@ -12,12 +12,8 @@ jobs: strategy: fail-fast: false matrix: - python-version: - - 3.8 - - 3.9 - os: - - ubuntu-20.04 - runs-on: ${{ matrix.os }} + python-version: ['3.9', '3.11'] + runs-on: ubuntu-20.04 steps: - uses: actions/checkout@v2 - name: Setup Python ${{ matrix.python-version }} diff --git a/.github/workflows/dev_test_fx.yml b/.github/workflows/dev_test_fx.yml index 0b8633282f7..a0c54c78365 100644 --- a/.github/workflows/dev_test_fx.yml +++ b/.github/workflows/dev_test_fx.yml @@ -12,9 +12,7 @@ jobs: strategy: fail-fast: false matrix: - python-version: - - 3.8 - - 3.9 + python-version: ['3.9', '3.11'] os: - ubuntu-20.04 - macos-13 diff --git a/.github/workflows/dev_test_onnx.yml b/.github/workflows/dev_test_onnx.yml index 48052cfded3..f7514e1c5e5 100644 --- a/.github/workflows/dev_test_onnx.yml +++ b/.github/workflows/dev_test_onnx.yml @@ -12,9 +12,7 @@ jobs: strategy: fail-fast: false matrix: - python-version: - - 3.8 - - 3.9 + python-version: ['3.9', '3.11'] os: - ubuntu-20.04 - macos-13 diff --git a/.github/workflows/dev_test_onnxruntime.yml b/.github/workflows/dev_test_onnxruntime.yml index 857028ab2db..c9104ebbd6c 100644 --- a/.github/workflows/dev_test_onnxruntime.yml +++ b/.github/workflows/dev_test_onnxruntime.yml @@ -12,9 +12,7 @@ jobs: strategy: fail-fast: false matrix: - python-version: - - 3.8 - - 3.9 + python-version: ['3.9', '3.11'] os: - ubuntu-20.04 - windows-2019 diff --git a/.github/workflows/dev_test_optimum_common.yml b/.github/workflows/dev_test_optimum_common.yml index 807ed0b1dab..117db50437b 100644 --- a/.github/workflows/dev_test_optimum_common.yml +++ b/.github/workflows/dev_test_optimum_common.yml @@ -12,10 +12,7 @@ jobs: strategy: fail-fast: false matrix: - python-version: - - 3.7 - - 3.8 - - 3.9 + python-version: ['3.9', '3.11'] os: - ubuntu-20.04 - windows-2019 diff --git a/.github/workflows/stale.yml b/.github/workflows/stale.yml new file mode 100644 index 00000000000..6dc3ff2bbd9 --- /dev/null +++ b/.github/workflows/stale.yml @@ -0,0 +1,23 @@ +name: 'Close stale issues and PRs' +on: + schedule: + - cron: '30 1 * * *' + +permissions: + issues: write + pull-requests: write + +jobs: + stale: + runs-on: ubuntu-latest + steps: + - uses: actions/stale@v8 + with: + stale-issue-message: 'This issue has been marked as stale because it has been open for 30 days with no activity. This thread will be automatically closed in 5 days if no further activity occurs.' + stale-pr-message: 'This PR has been marked as stale because it has been open for 90 days with no activity. This thread will be automatically closed in 30 days if no further activity occurs.' + exempt-issue-labels: 'bug,exporters,good first issue,onnx,onnxruntime,quantization' + days-before-issue-stale: 30 + days-before-issue-close: 5 + days-before-pr-stale: 90 + days-before-pr-close: 30 + exempt-all-pr-assignees: true \ No newline at end of file diff --git a/.github/workflows/test_benckmark.yml b/.github/workflows/test_benckmark.yml index 7f7f2ace329..fe7df1a20cc 100644 --- a/.github/workflows/test_benckmark.yml +++ b/.github/workflows/test_benckmark.yml @@ -4,9 +4,9 @@ name: Benchmark suite / Python - Test on: push: - branches: [ main ] + branches: [main] pull_request: - branches: [ main ] + branches: [main] concurrency: group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} @@ -17,20 +17,20 @@ jobs: strategy: fail-fast: false matrix: - python-version: [3.8, 3.9] + python-version: [3.9] os: [ubuntu-20.04] runs-on: ${{ matrix.os }} steps: - - uses: actions/checkout@v2 - - name: Setup Python ${{ matrix.python-version }} - uses: actions/setup-python@v2 - with: - python-version: ${{ matrix.python-version }} - - name: Install dependencies - run: | - pip install wheel - pip install .[tests,onnxruntime,benchmark] - - name: Test with unittest - run: | - python -m unittest discover --start-directory tests/benchmark --pattern 'test_*.py' + - uses: actions/checkout@v2 + - name: Setup Python ${{ matrix.python-version }} + uses: actions/setup-python@v2 + with: + python-version: ${{ matrix.python-version }} + - name: Install dependencies + run: | + pip install wheel + pip install .[tests,onnxruntime,benchmark] datasets + - name: Test with unittest + run: | + python -m unittest discover --start-directory tests/benchmark --pattern 'test_*.py' diff --git a/.github/workflows/test_bettertransformer.yml b/.github/workflows/test_bettertransformer.yml index 080d8272dfc..b023fa4bd1b 100644 --- a/.github/workflows/test_bettertransformer.yml +++ b/.github/workflows/test_bettertransformer.yml @@ -15,9 +15,8 @@ jobs: strategy: fail-fast: false matrix: - python-version: [3.8, 3.9] - os: [ubuntu-20.04, macos-13] - exclude: [{ python-version: 3.8, os: macos-13 }] + python-version: [3.9] + os: [ubuntu-20.04, macos-14] runs-on: ${{ matrix.os }} steps: diff --git a/.github/workflows/test_cli.yml b/.github/workflows/test_cli.yml index ecb19d23aa3..2efab40aab6 100644 --- a/.github/workflows/test_cli.yml +++ b/.github/workflows/test_cli.yml @@ -17,7 +17,7 @@ jobs: strategy: fail-fast: false matrix: - python-version: [3.8, 3.9] + python-version: [3.9] os: [ubuntu-20.04, macos-13] runs-on: ${{ matrix.os }} @@ -34,7 +34,7 @@ jobs: run: | pip install --upgrade pip pip install --no-cache-dir torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu - pip install .[tests,exporters,exporters-tf] + pip install .[tests,exporters-tf] - name: Test with pytest run: | diff --git a/.github/workflows/test_export_onnx.yml b/.github/workflows/test_export_onnx.yml index 56ef674cb41..d1fd4a9723f 100644 --- a/.github/workflows/test_export_onnx.yml +++ b/.github/workflows/test_export_onnx.yml @@ -2,9 +2,9 @@ name: Exporters ONNX / Python - Test on: push: - branches: [ main ] + branches: [main] pull_request: - branches: [ main ] + branches: [main] concurrency: group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} @@ -15,27 +15,27 @@ jobs: strategy: fail-fast: false matrix: - python-version: [3.8, 3.9] + python-version: ['3.9'] os: [ubuntu-20.04] runs-on: ${{ matrix.os }} steps: - - uses: actions/checkout@v2 - - name: Setup Python ${{ matrix.python-version }} - uses: actions/setup-python@v2 - with: - python-version: ${{ matrix.python-version }} - - name: Install dependencies for pytorch export - run: | - pip install .[tests,exporters] - - name: Test with unittest - working-directory: tests - run: | - pytest exporters/onnx/test_onnx_*.py -s -n auto -m "not tensorflow_test and not timm_test" --durations=0 - - name: Install dependencies for tensorflow export - run: | - pip install .[tests,exporters-tf] - - name: Test with unittest - working-directory: tests - run: | - pytest exporters/onnx/test_onnx_*.py -n auto -m "tensorflow_test" -s --durations=0 + - uses: actions/checkout@v2 + - name: Setup Python ${{ matrix.python-version }} + uses: actions/setup-python@v2 + with: + python-version: ${{ matrix.python-version }} + - name: Install dependencies for pytorch export + run: | + pip install .[tests,exporters] + - name: Test with unittest + working-directory: tests + run: | + pytest exporters/onnx/test_onnx_*.py -s -n auto -m "not tensorflow_test and not timm_test" --durations=0 + - name: Install dependencies for tensorflow export + run: | + pip install .[tests,exporters-tf] + - name: Test with unittest + working-directory: tests + run: | + pytest exporters/onnx/test_onnx_*.py -n auto -m "tensorflow_test" -s --durations=0 diff --git a/.github/workflows/test_export_onnx_cli.yml b/.github/workflows/test_export_onnx_cli.yml index 8fa4ebb045f..618a140c147 100644 --- a/.github/workflows/test_export_onnx_cli.yml +++ b/.github/workflows/test_export_onnx_cli.yml @@ -2,9 +2,9 @@ name: Exporters ONNX CLI / Python - Test on: push: - branches: [ main ] + branches: [main] pull_request: - branches: [ main ] + branches: [main] concurrency: group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} @@ -15,20 +15,20 @@ jobs: strategy: fail-fast: false matrix: - python-version: [3.8, 3.9] + python-version: [3.9] os: [ubuntu-20.04] runs-on: ${{ matrix.os }} steps: - - uses: actions/checkout@v2 - - name: Setup Python ${{ matrix.python-version }} - uses: actions/setup-python@v2 - with: - python-version: ${{ matrix.python-version }} - - name: Install dependencies for pytorch export - run: | - pip install .[tests,exporters] - - name: Test with unittest - working-directory: tests - run: | - pytest exporters/onnx/test_exporters_onnx_cli.py -n auto -m "not tensorflow_test and not timm_test" -s --durations=0 + - uses: actions/checkout@v2 + - name: Setup Python ${{ matrix.python-version }} + uses: actions/setup-python@v2 + with: + python-version: ${{ matrix.python-version }} + - name: Install dependencies for pytorch export + run: | + pip install .[tests,exporters] + - name: Test with unittest + working-directory: tests + run: | + pytest exporters/onnx/test_exporters_onnx_cli.py -n auto -m "not tensorflow_test and not timm_test" -s --durations=0 diff --git a/.github/workflows/test_export_onnx_cli_timm.yml b/.github/workflows/test_export_onnx_cli_timm.yml index 76a535fcebd..b92d5551ba1 100644 --- a/.github/workflows/test_export_onnx_cli_timm.yml +++ b/.github/workflows/test_export_onnx_cli_timm.yml @@ -14,20 +14,20 @@ jobs: strategy: fail-fast: false matrix: - python-version: [3.8, 3.9] + python-version: [3.9] os: [ubuntu-20.04] runs-on: ${{ matrix.os }} steps: - - uses: actions/checkout@v2 - - name: Setup Python ${{ matrix.python-version }} - uses: actions/setup-python@v2 - with: - python-version: ${{ matrix.python-version }} - - name: Install dependencies for pytorch export - run: | - pip install .[tests,exporters] - - name: Test with unittest - working-directory: tests - run: | - RUN_SLOW=1 pytest exporters/onnx/test_exporters_onnx_cli.py -n auto -k "timm" -s --durations=0 + - uses: actions/checkout@v2 + - name: Setup Python ${{ matrix.python-version }} + uses: actions/setup-python@v2 + with: + python-version: ${{ matrix.python-version }} + - name: Install dependencies for pytorch export + run: | + pip install .[tests,exporters] + - name: Test with unittest + working-directory: tests + run: | + RUN_SLOW=1 pytest exporters/onnx/test_exporters_onnx_cli.py -n auto -k "timm" -s --durations=0 diff --git a/.github/workflows/test_export_onnx_timm.yml b/.github/workflows/test_export_onnx_timm.yml index 339e3e93dec..c16d20fbc18 100644 --- a/.github/workflows/test_export_onnx_timm.yml +++ b/.github/workflows/test_export_onnx_timm.yml @@ -14,21 +14,20 @@ jobs: strategy: fail-fast: false matrix: - python-version: [3.8, 3.9] + python-version: [3.9] os: [ubuntu-20.04] runs-on: ${{ matrix.os }} steps: - - uses: actions/checkout@v2 - - name: Setup Python ${{ matrix.python-version }} - uses: actions/setup-python@v2 - with: - python-version: ${{ matrix.python-version }} - - name: Install dependencies for pytorch export - run: | - pip install .[tests,exporters] - - name: Test with unittest - working-directory: tests - run: | - RUN_SLOW=1 pytest exporters/onnx/ -s -n auto -k "timm" --durations=0 - + - uses: actions/checkout@v2 + - name: Setup Python ${{ matrix.python-version }} + uses: actions/setup-python@v2 + with: + python-version: ${{ matrix.python-version }} + - name: Install dependencies for pytorch export + run: | + pip install .[tests,exporters] + - name: Test with unittest + working-directory: tests + run: | + RUN_SLOW=1 pytest exporters/onnx/ -s -n auto -k "timm" --durations=0 diff --git a/.github/workflows/test_export_tflite.yml b/.github/workflows/test_export_tflite.yml index 362390b166d..225a28c1cba 100644 --- a/.github/workflows/test_export_tflite.yml +++ b/.github/workflows/test_export_tflite.yml @@ -20,10 +20,9 @@ jobs: strategy: fail-fast: false matrix: - python-version: [3.8, 3.9] - os: [ubuntu-20.04] + python-version: ['3.9', '3.11'] - runs-on: ${{ matrix.os }} + runs-on: ubuntu-20.04 steps: - uses: actions/checkout@v2 - name: Setup Python ${{ matrix.python-version }} diff --git a/.github/workflows/test_export_tflite_cli.yml b/.github/workflows/test_export_tflite_cli.yml index e14e4cde325..cfca58cf9c1 100644 --- a/.github/workflows/test_export_tflite_cli.yml +++ b/.github/workflows/test_export_tflite_cli.yml @@ -20,10 +20,9 @@ jobs: strategy: fail-fast: false matrix: - python-version: [3.8, 3.9] - os: [ubuntu-20.04] + python-version: ['3.9', '3.11'] - runs-on: ${{ matrix.os }} + runs-on: ubuntu-20.04 steps: - uses: actions/checkout@v2 - name: Setup Python ${{ matrix.python-version }} diff --git a/.github/workflows/test_export_tflite_cli_dynamic_quantization_int8.yml b/.github/workflows/test_export_tflite_cli_dynamic_quantization_int8.yml index 7e4a83b3b7b..9cebe8ac0f6 100644 --- a/.github/workflows/test_export_tflite_cli_dynamic_quantization_int8.yml +++ b/.github/workflows/test_export_tflite_cli_dynamic_quantization_int8.yml @@ -20,10 +20,9 @@ jobs: strategy: fail-fast: false matrix: - python-version: [3.8, 3.9] - os: [ubuntu-20.04] + python-version: ['3.9'] - runs-on: ${{ matrix.os }} + runs-on: ubuntu-20.04 steps: - uses: actions/checkout@v2 - name: Setup Python ${{ matrix.python-version }} diff --git a/.github/workflows/test_export_tflite_cli_quantization_fp16.yml b/.github/workflows/test_export_tflite_cli_quantization_fp16.yml index 981dd005e52..ca35ad8b3eb 100644 --- a/.github/workflows/test_export_tflite_cli_quantization_fp16.yml +++ b/.github/workflows/test_export_tflite_cli_quantization_fp16.yml @@ -20,10 +20,9 @@ jobs: strategy: fail-fast: false matrix: - python-version: [3.8, 3.9] - os: [ubuntu-20.04] + python-version: ['3.9'] - runs-on: ${{ matrix.os }} + runs-on: ubuntu-20.04 steps: - uses: actions/checkout@v2 - name: Setup Python ${{ matrix.python-version }} diff --git a/.github/workflows/test_export_tflite_cli_quantization_full_int8.yml b/.github/workflows/test_export_tflite_cli_quantization_full_int8.yml index 9064bfaf315..1531ffa5c9c 100644 --- a/.github/workflows/test_export_tflite_cli_quantization_full_int8.yml +++ b/.github/workflows/test_export_tflite_cli_quantization_full_int8.yml @@ -20,10 +20,9 @@ jobs: strategy: fail-fast: false matrix: - python-version: [3.8, 3.9] - os: [ubuntu-20.04] + python-version: ['3.9'] - runs-on: ${{ matrix.os }} + runs-on: ubuntu-20.04 steps: - uses: actions/checkout@v2 - name: Setup Python ${{ matrix.python-version }} diff --git a/.github/workflows/test_export_tflite_cli_quantization_int8_custom_dataset.yml b/.github/workflows/test_export_tflite_cli_quantization_int8_custom_dataset.yml index 824e8933a08..7274d09c0f8 100644 --- a/.github/workflows/test_export_tflite_cli_quantization_int8_custom_dataset.yml +++ b/.github/workflows/test_export_tflite_cli_quantization_int8_custom_dataset.yml @@ -20,10 +20,9 @@ jobs: strategy: fail-fast: false matrix: - python-version: [3.8, 3.9] - os: [ubuntu-20.04] + python-version: ['3.9'] - runs-on: ${{ matrix.os }} + runs-on: ubuntu-20.04 steps: - uses: actions/checkout@v2 - name: Setup Python ${{ matrix.python-version }} diff --git a/.github/workflows/test_export_tflite_cli_quantization_int8_default_dataset.yml b/.github/workflows/test_export_tflite_cli_quantization_int8_default_dataset.yml index e975997e379..6c8639ebfe0 100644 --- a/.github/workflows/test_export_tflite_cli_quantization_int8_default_dataset.yml +++ b/.github/workflows/test_export_tflite_cli_quantization_int8_default_dataset.yml @@ -20,10 +20,9 @@ jobs: strategy: fail-fast: false matrix: - python-version: [3.8, 3.9] - os: [ubuntu-20.04] + python-version: ['3.9'] - runs-on: ${{ matrix.os }} + runs-on: ubuntu-20.04 steps: - uses: actions/checkout@v2 - name: Setup Python ${{ matrix.python-version }} diff --git a/.github/workflows/test_export_tflite_cli_quantization_int8x16.yml b/.github/workflows/test_export_tflite_cli_quantization_int8x16.yml index ef59cff0b92..39902d0dd50 100644 --- a/.github/workflows/test_export_tflite_cli_quantization_int8x16.yml +++ b/.github/workflows/test_export_tflite_cli_quantization_int8x16.yml @@ -20,10 +20,9 @@ jobs: strategy: fail-fast: false matrix: - python-version: [3.8, 3.9] - os: [ubuntu-20.04] + python-version: ['3.9'] - runs-on: ${{ matrix.os }} + runs-on: ubuntu-20.04 steps: - uses: actions/checkout@v2 - name: Setup Python ${{ matrix.python-version }} diff --git a/.github/workflows/test_exporters_common.yml b/.github/workflows/test_exporters_common.yml index 8e8c3360c1f..801e0bebc55 100644 --- a/.github/workflows/test_exporters_common.yml +++ b/.github/workflows/test_exporters_common.yml @@ -15,10 +15,9 @@ jobs: strategy: fail-fast: false matrix: - python-version: [3.8, 3.9] - os: [ubuntu-20.04] + python-version: ['3.9', '3.11'] - runs-on: ${{ matrix.os }} + runs-on: ubuntu-20.04 steps: - uses: actions/checkout@v2 - name: Setup Python ${{ matrix.python-version }} diff --git a/.github/workflows/test_exporters_slow.yml b/.github/workflows/test_exporters_slow.yml index b22fdd7fd2a..b5f142fc7dc 100644 --- a/.github/workflows/test_exporters_slow.yml +++ b/.github/workflows/test_exporters_slow.yml @@ -14,10 +14,9 @@ jobs: strategy: fail-fast: false matrix: - python-version: [3.8, 3.9] - os: [ubuntu-20.04] + python-version: ['3.9'] - runs-on: ${{ matrix.os }} + runs-on: ubuntu-20.04 steps: - uses: actions/checkout@v2 - name: Setup Python ${{ matrix.python-version }} diff --git a/.github/workflows/test_fx.yml b/.github/workflows/test_fx.yml index f0366cf0d1e..0a1890cc715 100644 --- a/.github/workflows/test_fx.yml +++ b/.github/workflows/test_fx.yml @@ -15,7 +15,7 @@ jobs: strategy: fail-fast: false matrix: - python-version: [3.8, 3.9] + python-version: ['3.9'] os: [ubuntu-20.04, macos-13] runs-on: ${{ matrix.os }} diff --git a/.github/workflows/test_fx_automatic_parallel.yml b/.github/workflows/test_fx_automatic_parallel.yml index d8af6e40caa..c5d82be38b3 100644 --- a/.github/workflows/test_fx_automatic_parallel.yml +++ b/.github/workflows/test_fx_automatic_parallel.yml @@ -24,7 +24,7 @@ jobs: config: - name: GPU-enabled Optimum Test Suite image: nvidia/cuda:12.4.1-devel-ubuntu22.04 - gpu_target: ["nvidia-multi-gpu-a10-runners"] + gpu_target: ["aws-g5-12xlarge-plus"] name: ${{ matrix.config.name }} runs-on: @@ -35,7 +35,6 @@ jobs: options: --mount type=tmpfs,destination=/tmp --shm-size 64gb --gpus all --ipc host -v /mnt/hf_cache:/mnt/cache/ env: NCCL_DEBUG: INFO - HF_TOKEN: ${{ secrets.HF_HUB_READ_TOKEN }} defaults: run: shell: bash diff --git a/.github/workflows/test_offline.yml b/.github/workflows/test_offline.yml index 90b0108e512..29b7b183bd7 100644 --- a/.github/workflows/test_offline.yml +++ b/.github/workflows/test_offline.yml @@ -15,10 +15,9 @@ jobs: strategy: fail-fast: false matrix: - python-version: [3.8, 3.9] - os: [ubuntu-20.04] + python-version: ['3.9'] - runs-on: ${{ matrix.os }} + runs-on: ubuntu-20.04 steps: - name: Checkout code uses: actions/checkout@v4 diff --git a/.github/workflows/test_onnx.yml b/.github/workflows/test_onnx.yml index 9aa8b307235..418a9e42c1a 100644 --- a/.github/workflows/test_onnx.yml +++ b/.github/workflows/test_onnx.yml @@ -15,8 +15,8 @@ jobs: strategy: fail-fast: false matrix: - python-version: [3.8, 3.9] - os: [ubuntu-20.04, macos-13] + python-version: ['3.9'] + os: [ubuntu-20.04, macos-14] runs-on: ${{ matrix.os }} steps: diff --git a/.github/workflows/test_onnxruntime.yml b/.github/workflows/test_onnxruntime.yml index 389cc0f40ba..e46eb4e9c70 100644 --- a/.github/workflows/test_onnxruntime.yml +++ b/.github/workflows/test_onnxruntime.yml @@ -17,26 +17,28 @@ jobs: strategy: fail-fast: false matrix: - python-version: [3.8, 3.9] - os: [ubuntu-20.04, windows-2019, macos-13] + transformers-version: ["latest"] + os: [ubuntu-20.04, windows-2019, macos-15] + include: + - transformers-version: "4.36.*" + os: ubuntu-20.04 + - transformers-version: "4.45.*" + os: ubuntu-20.04 runs-on: ${{ matrix.os }} + steps: - name: Free Disk Space (Ubuntu) if: matrix.os == 'ubuntu-20.04' uses: jlumbroso/free-disk-space@main - with: - tool-cache: false - swap-storage: false - large-packages: false - name: Checkout code uses: actions/checkout@v4 - - name: Setup Python ${{ matrix.python-version }} + - name: Setup Python uses: actions/setup-python@v5 with: - python-version: ${{ matrix.python-version }} + python-version: '3.9' - name: Install dependencies run: | @@ -44,14 +46,16 @@ jobs: pip install --no-cache-dir torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu pip install .[tests,onnxruntime] + - name: Install transformers ${{ matrix.transformers-version }} + if: ${{ matrix.transformers-version != 'latest' }} + run: pip install transformers==${{ matrix.transformers-version }} + - name: Test with pytest (in series) - working-directory: tests run: | - pytest onnxruntime -m "run_in_series" --durations=0 -vvvv -s + pytest tests/onnxruntime -m "run_in_series" --durations=0 -vvvv -s - name: Test with pytest (in parallel) - env: - FXMARTYCLONE_READ_TOKEN: ${{ secrets.HF_HUB_READ_TOKEN }} - working-directory: tests run: | - pytest onnxruntime -m "not run_in_series" --durations=0 -vvvv -s -n auto + pytest tests/onnxruntime -m "not run_in_series" --durations=0 -vvvv -s -n auto + env: + HF_HUB_READ_TOKEN: ${{ secrets.HF_HUB_READ_TOKEN }} diff --git a/.github/workflows/test_onnxruntime_slow.yml b/.github/workflows/test_onnxruntime_slow.yml index ea08926b606..d64809d1143 100644 --- a/.github/workflows/test_onnxruntime_slow.yml +++ b/.github/workflows/test_onnxruntime_slow.yml @@ -15,8 +15,8 @@ jobs: strategy: fail-fast: false matrix: - python-version: [3.8, 3.9] - os: [ubuntu-20.04, windows-2019, macos-13] + python-version: ['3.9'] + os: [ubuntu-20.04] runs-on: ${{ matrix.os }} diff --git a/.github/workflows/test_optimum_common.yml b/.github/workflows/test_optimum_common.yml index ded149c9b69..9aab45e4b71 100644 --- a/.github/workflows/test_optimum_common.yml +++ b/.github/workflows/test_optimum_common.yml @@ -4,9 +4,9 @@ name: Optimum common / Python - Test on: push: - branches: [ main ] + branches: [main] pull_request: - branches: [ main ] + branches: [main] concurrency: group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} @@ -17,25 +17,24 @@ jobs: strategy: fail-fast: false matrix: - python-version: [3.8, 3.9] + python-version: ['3.9'] os: [ubuntu-20.04, windows-2019, macos-13] runs-on: ${{ matrix.os }} steps: - - uses: actions/checkout@v2 - - name: Setup Python ${{ matrix.python-version }} - uses: actions/setup-python@v2 - with: - python-version: ${{ matrix.python-version }} - - name: Install dependencies - run: | - python -m pip install --upgrade pip - pip install .[tests] - ls -l optimum/ - - name: Test with unittest - shell: bash - run: | - # Setting HUGGINGFACE_CO_STAGING to true for only one job of the matrix as the staging tests cannot run in parallel. - export HUGGINGFACE_CO_STAGING=${{ matrix.python-version == '3.8' && matrix.os == 'ubuntu-20.04' }} - pytest tests/test_*.py - + - uses: actions/checkout@v2 + - name: Setup Python ${{ matrix.python-version }} + uses: actions/setup-python@v2 + with: + python-version: ${{ matrix.python-version }} + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install .[tests] + ls -l optimum/ + - name: Test with unittest + shell: bash + run: | + # Setting HUGGINGFACE_CO_STAGING to true for only one job of the matrix as the staging tests cannot run in parallel. + export HUGGINGFACE_CO_STAGING=${{ matrix.python-version == '3.9' && matrix.os == 'ubuntu-20.04' }} + pytest tests/test_*.py diff --git a/.github/workflows/test_utils.yml b/.github/workflows/test_utils.yml index 1ef33ced086..bbe00e62841 100644 --- a/.github/workflows/test_utils.yml +++ b/.github/workflows/test_utils.yml @@ -16,7 +16,7 @@ jobs: fail-fast: false matrix: os: [ubuntu-20.04, macos-13] - python-version: [3.8, 3.9] + python-version: ['3.9'] runs-on: ${{ matrix.os }} steps: @@ -37,4 +37,13 @@ jobs: - name: Test with pytest working-directory: tests run: | - python -m pytest -s -vvvv utils + pytest utils -s -n auto -m "not datasets_test" --durations=0 + + - name: Install datasets + run: | + pip install datasets + + - name: Tests needing datasets + working-directory: tests + run: | + pytest utils -s -n auto -m "datasets_test" --durations=0 \ No newline at end of file diff --git a/Makefile b/Makefile index e2c21263031..824ef3d0cf3 100644 --- a/Makefile +++ b/Makefile @@ -23,11 +23,11 @@ REAL_CLONE_URL = $(if $(CLONE_URL),$(CLONE_URL),$(DEFAULT_CLONE_URL)) # Run code quality checks style_check: black --check . - ruff . + ruff check . style: black . - ruff . --fix + ruff check . --fix # Run tests for the library test: diff --git a/docs/Dockerfile b/docs/Dockerfile index 29ea0f916ce..d76dc50c556 100644 --- a/docs/Dockerfile +++ b/docs/Dockerfile @@ -1,4 +1,4 @@ -FROM nikolaik/python-nodejs:python3.8-nodejs18 +FROM nikolaik/python-nodejs:python3.9-nodejs18 ARG commit_sha ARG clone_url diff --git a/docs/source/exporters/onnx/overview.mdx b/docs/source/exporters/onnx/overview.mdx index 747e1396fb4..57005b85678 100644 --- a/docs/source/exporters/onnx/overview.mdx +++ b/docs/source/exporters/onnx/overview.mdx @@ -36,6 +36,7 @@ Supported architectures from [🤗 Transformers](https://huggingface.co/docs/tra - Data2VecVision - Deberta - Deberta-v2 +- Decision Transformer - Deit - Detr - DistilBert @@ -82,6 +83,7 @@ Supported architectures from [🤗 Transformers](https://huggingface.co/docs/tra - PoolFormer - Qwen2(Qwen1.5) - RegNet +- RemBERT - ResNet - Roberta - Roformer diff --git a/docs/source/index.mdx b/docs/source/index.mdx index 7eb79c33ed2..1b54570ea80 100644 --- a/docs/source/index.mdx +++ b/docs/source/index.mdx @@ -36,14 +36,14 @@ The packages below enable you to get the best of the 🤗 Hugging Face ecosystem
Intel
-

Optimize your model to speedup inference with OpenVINO and Neural Compressor

+

Optimize your model to speedup inference with OpenVINO , Neural Compressor and IPEX

AWS Trainium/Inferentia

Accelerate your training and inference workflows with AWS Trainium and AWS Inferentia

Google TPUs
+ >
Google TPUs

Accelerate your training and inference workflows with Google TPUs

-> [!TIP] -> Some packages provide hardware-agnostic features (e.g. INC interface in Optimum Intel). - - ## Open-source integrations 🤗 Optimum also supports a variety of open-source frameworks to make model optimization very easy. diff --git a/docs/source/installation.mdx b/docs/source/installation.mdx index c08b3f92e5c..27733574c80 100644 --- a/docs/source/installation.mdx +++ b/docs/source/installation.mdx @@ -25,6 +25,7 @@ If you'd like to use the accelerator-specific features of 🤗 Optimum, you can | [ONNX Runtime](https://huggingface.co/docs/optimum/onnxruntime/overview) | `pip install --upgrade --upgrade-strategy eager optimum[onnxruntime]` | | [Intel Neural Compressor](https://huggingface.co/docs/optimum/intel/index) | `pip install --upgrade --upgrade-strategy eager optimum[neural-compressor]` | | [OpenVINO](https://huggingface.co/docs/optimum/intel/index) | `pip install --upgrade --upgrade-strategy eager optimum[openvino]` | +| [IPEX](https://huggingface.co/docs/optimum/intel/index) | `pip install --upgrade --upgrade-strategy eager optimum[ipex]` | | [NVIDIA TensorRT-LLM](https://huggingface.co/docs/optimum/main/en/nvidia_overview) | `docker run -it --gpus all --ipc host huggingface/optimum-nvidia` | | [AMD Instinct GPUs and Ryzen AI NPU](https://huggingface.co/docs/optimum/amd/index) | `pip install --upgrade --upgrade-strategy eager optimum[amd]` | | [AWS Trainum & Inferentia](https://huggingface.co/docs/optimum-neuron/index) | `pip install --upgrade --upgrade-strategy eager optimum[neuronx]` | diff --git a/docs/source/onnxruntime/package_reference/modeling_ort.mdx b/docs/source/onnxruntime/package_reference/modeling_ort.mdx index 65b2b60195a..2c93ab3ac0d 100644 --- a/docs/source/onnxruntime/package_reference/modeling_ort.mdx +++ b/docs/source/onnxruntime/package_reference/modeling_ort.mdx @@ -119,6 +119,11 @@ The following ORT classes are available for the following custom tasks. ## Stable Diffusion +#### ORTDiffusionPipeline + +[[autodoc]] onnxruntime.ORTDiffusionPipeline + - __call__ + #### ORTStableDiffusionPipeline [[autodoc]] onnxruntime.ORTStableDiffusionPipeline diff --git a/docs/source/onnxruntime/usage_guides/models.mdx b/docs/source/onnxruntime/usage_guides/models.mdx index 131822e9568..27ac446096b 100644 --- a/docs/source/onnxruntime/usage_guides/models.mdx +++ b/docs/source/onnxruntime/usage_guides/models.mdx @@ -4,263 +4,128 @@ Optimum is a utility package for building and running inference with accelerated Optimum can be used to load optimized models from the [Hugging Face Hub](hf.co/models) and create pipelines to run accelerated inference without rewriting your APIs. -## Switching from Transformers to Optimum -The `optimum.onnxruntime.ORTModelForXXX` model classes are API compatible with Hugging Face Transformers models. This -means you can just replace your `AutoModelForXXX` class with the corresponding `ORTModelForXXX` class in `optimum.onnxruntime`. +## Loading -You do not need to adapt your code to get it to work with `ORTModelForXXX` classes: +### Transformers models -```diff -from transformers import AutoTokenizer, pipeline --from transformers import AutoModelForQuestionAnswering -+from optimum.onnxruntime import ORTModelForQuestionAnswering - --model = AutoModelForQuestionAnswering.from_pretrained("deepset/roberta-base-squad2") # PyTorch checkpoint -+model = ORTModelForQuestionAnswering.from_pretrained("optimum/roberta-base-squad2") # ONNX checkpoint -tokenizer = AutoTokenizer.from_pretrained("deepset/roberta-base-squad2") - -onnx_qa = pipeline("question-answering",model=model,tokenizer=tokenizer) - -question = "What's my name?" -context = "My name is Philipp and I live in Nuremberg." -pred = onnx_qa(question, context) -``` - -### Loading a vanilla Transformers model - -Because the model you want to work with might not be already converted to ONNX, [`~optimum.onnxruntime.ORTModel`] -includes a method to convert vanilla Transformers models to ONNX ones. Simply pass `export=True` to the -[`~optimum.onnxruntime.ORTModel.from_pretrained`] method, and your model will be loaded and converted to ONNX on-the-fly: - -```python ->>> from optimum.onnxruntime import ORTModelForSequenceClassification - ->>> # Load the model from the hub and export it to the ONNX format ->>> model = ORTModelForSequenceClassification.from_pretrained( -... "distilbert-base-uncased-finetuned-sst-2-english", export=True -... ) -``` - -### Pushing ONNX models to the Hugging Face Hub - -It is also possible, just as with regular [`~transformers.PreTrainedModel`]s, to push your `ORTModelForXXX` to the -[Hugging Face Model Hub](https://hf.co/models): - -```python ->>> from optimum.onnxruntime import ORTModelForSequenceClassification +Once your model was [exported to the ONNX format](https://huggingface.co/docs/optimum/exporters/onnx/usage_guides/export_a_model), you can load it by replacing `AutoModelForXxx` with the corresponding `ORTModelForXxx` class. ->>> # Load the model from the hub and export it to the ONNX format ->>> model = ORTModelForSequenceClassification.from_pretrained( -... "distilbert-base-uncased-finetuned-sst-2-english", export=True -... ) +```diff + from transformers import AutoTokenizer, pipeline +- from transformers import AutoModelForCausalLM ++ from optimum.onnxruntime import ORTModelForCausalLM ->>> # Save the converted model ->>> model.save_pretrained("a_local_path_for_convert_onnx_model") +- model = AutoModelForCausalLM.from_pretrained("meta-llama/Llama-3.2-1B) # PyTorch checkpoint ++ model = ORTModelForCausalLM.from_pretrained("onnx-community/Llama-3.2-1B", subfolder="onnx") # ONNX checkpoint + tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-3.2-1B") -# Push the onnx model to HF Hub ->>> model.push_to_hub( # doctest: +SKIP -... "a_local_path_for_convert_onnx_model", repository_id="my-onnx-repo", use_auth_token=True -... ) + pipe = pipeline("text-generation", model=model, tokenizer=tokenizer) + result = pipe("He never went out without a book under his arm") ``` -## Sequence-to-sequence models +More information for all the supported `ORTModelForXxx` in our [documentation](https://huggingface.co/docs/optimum/onnxruntime/package_reference/modeling_ort) -Sequence-to-sequence (Seq2Seq) models can also be used when running inference with ONNX Runtime. When Seq2Seq models -are exported to the ONNX format, they are decomposed into three parts that are later combined during inference: -- The encoder part of the model -- The decoder part of the model + the language modeling head -- The same decoder part of the model + language modeling head but taking and using pre-computed key / values as inputs and -outputs. This makes inference faster. -Here is an example of how you can load a T5 model to the ONNX format and run inference for a translation task: +### Diffusers models +Once your model was [exported to the ONNX format](https://huggingface.co/docs/optimum/exporters/onnx/usage_guides/export_a_model), you can load it by replacing `DiffusionPipeline` with the corresponding `ORTDiffusionPipeline` class. -```python ->>> from transformers import AutoTokenizer, pipeline ->>> from optimum.onnxruntime import ORTModelForSeq2SeqLM - -# Load the model from the hub and export it to the ONNX format ->>> model_name = "t5-small" ->>> model = ORTModelForSeq2SeqLM.from_pretrained(model_name, export=True) ->>> tokenizer = AutoTokenizer.from_pretrained(model_name) - -# Create a pipeline ->>> onnx_translation = pipeline("translation_en_to_fr", model=model, tokenizer=tokenizer) ->>> text = "He never went out without a book under his arm, and he often came back with two." ->>> result = onnx_translation(text) ->>> # [{'translation_text': "Il n'est jamais sorti sans un livre sous son bras, et il est souvent revenu avec deux."}] -``` -## Stable Diffusion - -Stable Diffusion models can also be used when running inference with ONNX Runtime. When Stable Diffusion models -are exported to the ONNX format, they are split into four components that are later combined during inference: -- The text encoder -- The U-NET -- The VAE encoder -- The VAE decoder - -Make sure you have 🤗 Diffusers installed. - -To install `diffusers`: -```bash -pip install diffusers +```diff +- from diffusers import DiffusionPipeline ++ from optimum.onnxruntime import ORTDiffusionPipeline + + model_id = "runwayml/stable-diffusion-v1-5" +- pipeline = DiffusionPipeline.from_pretrained(model_id) ++ pipeline = ORTDiffusionPipeline.from_pretrained(model_id, revision="onnx") + prompt = "sailing ship in storm by Leonardo da Vinci" + image = pipeline(prompt).images[0] ``` -### Text-to-Image -Here is an example of how you can load an ONNX Stable Diffusion model and run inference using ONNX Runtime: +### Sentence Transformers models -```python -from optimum.onnxruntime import ORTStableDiffusionPipeline - -model_id = "runwayml/stable-diffusion-v1-5" -pipeline = ORTStableDiffusionPipeline.from_pretrained(model_id, revision="onnx") -prompt = "sailing ship in storm by Leonardo da Vinci" -image = pipeline(prompt).images[0] -``` - -To load your PyTorch model and convert it to ONNX on-the-fly, you can set `export=True`. +Once your model was [exported to the ONNX format](https://huggingface.co/docs/optimum/exporters/onnx/usage_guides/export_a_model), you can load it by replacing `AutoModel` with the corresponding `ORTModelForFeatureExtraction` class. -```python -pipeline = ORTStableDiffusionPipeline.from_pretrained(model_id, export=True) - -# Don't forget to save the ONNX model -save_directory = "a_local_path" -pipeline.save_pretrained(save_directory) +```diff + from transformers import AutoTokenizer +- from transformers import AutoModel ++ from optimum.onnxruntime import ORTModelForFeatureExtraction + + tokenizer = AutoTokenizer.from_pretrained("sentence-transformers/all-MiniLM-L6-v2") +- model = AutoModel.from_pretrained("sentence-transformers/all-MiniLM-L6-v2") ++ model = ORTModelForFeatureExtraction.from_pretrained("optimum/all-MiniLM-L6-v2") + inputs = tokenizer("This is an example sentence", return_tensors="pt") + outputs = model(**inputs) ``` -
- -
- -### Image-to-Image +You can also load your ONNX model directly using the [`sentence_transformers.SentenceTransformer`](https://sbert.net/docs/sentence_transformer/usage/efficiency.html#onnx) class, just make sure to have `sentence-transformers>=3.2` installed. If the model wasn't already converted to ONNX, it will be converted automatically on-the-fly. -```python -import requests -import torch -from PIL import Image -from io import BytesIO -from optimum.onnxruntime import ORTStableDiffusionImg2ImgPipeline - -model_id = "runwayml/stable-diffusion-v1-5" -pipeline = ORTStableDiffusionImg2ImgPipeline.from_pretrained(model_id, revision="onnx") - -url = "https://raw.githubusercontent.com/CompVis/stable-diffusion/main/assets/stable-samples/img2img/sketch-mountains-input.jpg" - -response = requests.get(url) -init_image = Image.open(BytesIO(response.content)).convert("RGB") -init_image = init_image.resize((768, 512)) +```diff + from sentence_transformers import SentenceTransformer -prompt = "A fantasy landscape, trending on artstation" + model_id = "sentence-transformers/all-MiniLM-L6-v2" +- model = SentenceTransformer(model_id) ++ model = SentenceTransformer(model_id, backend="onnx") -image = pipeline(prompt=prompt, image=init_image, strength=0.75, guidance_scale=7.5).images[0] -image.save("fantasy_landscape.png") + sentences = ["This is an example sentence", "Each sentence is converted"] + embeddings = model.encode(sentences) ``` -### Inpaint -```python -import PIL -import requests -import torch -from io import BytesIO -from optimum.onnxruntime import ORTStableDiffusionInpaintPipeline - -model_id = "runwayml/stable-diffusion-inpainting" -pipeline = ORTStableDiffusionInpaintPipeline.from_pretrained(model_id, revision="onnx") +### Timm models -def download_image(url): - response = requests.get(url) - return PIL.Image.open(BytesIO(response.content)).convert("RGB") +Once your model was [exported to the ONNX format](https://huggingface.co/docs/optimum/exporters/onnx/usage_guides/export_a_model), you can load it by replacing the `create_model` with the corresponding `ORTModelForImageClassification` class. -img_url = "https://raw.githubusercontent.com/CompVis/latent-diffusion/main/data/inpainting_examples/overture-creations-5sI6fQgYIuo.png" -mask_url = "https://raw.githubusercontent.com/CompVis/latent-diffusion/main/data/inpainting_examples/overture-creations-5sI6fQgYIuo_mask.png" -init_image = download_image(img_url).resize((512, 512)) -mask_image = download_image(mask_url).resize((512, 512)) - -prompt = "Face of a yellow cat, high resolution, sitting on a park bench" -image = pipeline(prompt=prompt, image=init_image, mask_image=mask_image).images[0] +```diff + import requests + from PIL import Image +- from timm import create_model + from timm.data import resolve_data_config, create_transform ++ from optimum.onnxruntime import ORTModelForImageClassification + +- model = create_model("timm/mobilenetv3_large_100.ra_in1k", pretrained=True) ++ model = ORTModelForImageClassification.from_pretrained("optimum/mobilenetv3_large_100.ra_in1k") + transform = create_transform(**resolve_data_config(model.config.pretrained_cfg, model=model)) + url = "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/beignets-task-guide.png" + image = Image.open(requests.get(url, stream=True).raw) + inputs = transform(image).unsqueeze(0) + outputs = model(inputs) ``` -## Stable Diffusion XL -Before using `ORTStableDiffusionXLPipeline` make sure to have `diffusers` and `invisible_watermark` installed. You can install the libraries as follows: +## Converting your model to ONNX on-the-fly -```bash -pip install diffusers -pip install invisible-watermark>=0.2.0 -``` - -### Text-to-Image - -Here is an example of how you can load a SDXL ONNX model from [stabilityai/stable-diffusion-xl-base-1.0](https://huggingface.co/stabilityai/stable-diffusion-xl-base-1.0) and run inference using ONNX Runtime : +In case your model wasn't already [converted to ONNX](https://huggingface.co/docs/optimum/exporters/onnx/usage_guides/export_a_model), [`~optimum.onnxruntime.ORTModel`] includes a method to convert your model to ONNX on-the-fly. +Simply pass `export=True` to the [`~optimum.onnxruntime.ORTModel.from_pretrained`] method, and your model will be loaded and converted to ONNX on-the-fly: ```python -from optimum.onnxruntime import ORTStableDiffusionXLPipeline - -model_id = "stabilityai/stable-diffusion-xl-base-1.0" -base = ORTStableDiffusionXLPipeline.from_pretrained(model_id) -prompt = "sailing ship in storm by Leonardo da Vinci" -image = base(prompt).images[0] - -# Don't forget to save the ONNX model -save_directory = "sd_xl_base" -base.save_pretrained(save_directory) -``` - - -### Image-to-Image - -Here is an example of how you can load a PyTorch SDXL model, convert it to ONNX on-the-fly and run inference using ONNX Runtime for *image-to-image* : - -```python -from optimum.onnxruntime import ORTStableDiffusionXLImg2ImgPipeline -from diffusers.utils import load_image - -model_id = "stabilityai/stable-diffusion-xl-refiner-1.0" -pipeline = ORTStableDiffusionXLImg2ImgPipeline.from_pretrained(model_id, export=True) +>>> from optimum.onnxruntime import ORTModelForSequenceClassification -url = "https://huggingface.co/datasets/optimum/documentation-images/resolve/main/intel/openvino/sd_xl/castle_friedrich.png" -image = load_image(url).convert("RGB") -prompt = "medieval castle by Caspar David Friedrich" -image = pipeline(prompt, image=image).images[0] -image.save("medieval_castle.png") +>>> # Load the model from the hub and export it to the ONNX format +>>> model_id = "distilbert-base-uncased-finetuned-sst-2-english" +>>> model = ORTModelForSequenceClassification.from_pretrained(model_id, export=True) ``` -### Refining the image output - -The image can be refined by making use of a model like [stabilityai/stable-diffusion-xl-refiner-1.0](https://huggingface.co/stabilityai/stable-diffusion-xl-refiner-1.0). In this case, you only have to output the latents from the base model. +## Pushing your model to the Hub +You can also call `push_to_hub` directly on your model to upload it to the [Hub](https://hf.co/models). ```python -from optimum.onnxruntime import ORTStableDiffusionXLImg2ImgPipeline - -model_id = "stabilityai/stable-diffusion-xl-refiner-1.0" -refiner = ORTStableDiffusionXLImg2ImgPipeline.from_pretrained(model_id, export=True) - -image = base(prompt=prompt, output_type="latent").images[0] -image = refiner(prompt=prompt, image=image[None, :]).images[0] -image.save("sailing_ship.png") -``` - - - -## Latent Consistency Models - -### Text-to-Image +>>> from optimum.onnxruntime import ORTModelForSequenceClassification -Here is an example of how you can load a Latent Consistency Models (LCMs) from [SimianLuo/LCM_Dreamshaper_v7](https://huggingface.co/SimianLuo/LCM_Dreamshaper_v7) and run inference using ONNX Runtime : +>>> # Load the model from the hub and export it to the ONNX format +>>> model_id = "distilbert-base-uncased-finetuned-sst-2-english" +>>> model = ORTModelForSequenceClassification.from_pretrained(model_id, export=True) -```python -from optimum.onnxruntime import ORTLatentConsistencyModelPipeline +>>> # Save the converted model locally +>>> output_dir = "a_local_path_for_convert_onnx_model" +>>> model.save_pretrained(output_dir) -model_id = "SimianLuo/LCM_Dreamshaper_v7" -pipeline = ORTLatentConsistencyModelPipeline.from_pretrained(model_id, export=True) -prompt = "sailing ship in storm by Leonardo da Vinci" -images = pipeline(prompt, num_inference_steps=4, guidance_scale=8.0).images +# Push the onnx model to HF Hub +>>> model.push_to_hub(output_dir, repository_id="my-onnx-repo") # doctest: +SKIP ``` diff --git a/optimum/bettertransformer/models/attention.py b/optimum/bettertransformer/models/attention.py index 9dfa57844d4..c8c91a04e4e 100644 --- a/optimum/bettertransformer/models/attention.py +++ b/optimum/bettertransformer/models/attention.py @@ -92,6 +92,71 @@ def gpt2_wrapped_scaled_dot_product( return sdpa_result, None +# Adapted from transformers.models.gptj.modeling_gptj.GPTJAttention._attn +def gptj_wrapped_scaled_dot_product( + self, + query: torch.Tensor, + key: torch.Tensor, + value: torch.Tensor, + attention_mask: Optional[torch.Tensor] = None, + head_mask: Optional[torch.Tensor] = None, +): + raise_on_head_mask(head_mask) + batch_size = query.shape[0] + + mask_value = torch.finfo(value.dtype).min + mask_value = torch.full([], mask_value, dtype=value.dtype) + + # in gpt-neo-x and gpt-j the query and keys are always in fp32 + # thus we need to cast them to the value dtype + if self.downcast_qk: + query = query.to(value.dtype) + key = key.to(value.dtype) + + if batch_size == 1 and attention_mask is not None and attention_mask[0, 0, -1, -1] < -1: + raise ValueError("BetterTransformer does not support padding='max_length' with a batch size of 1.") + + dropout_p = self.dropout_prob_attn if self.training else 0.0 + if batch_size == 1 or self.training: + if query.shape[2] > 1: + sdpa_result = torch.nn.functional.scaled_dot_product_attention( + query, key, value, attn_mask=None, dropout_p=dropout_p, is_causal=True + ) + else: + sdpa_result = torch.nn.functional.scaled_dot_product_attention( + query, key, value, attn_mask=None, dropout_p=dropout_p, is_causal=False + ) + else: + query_length, key_length = query.size(-2), key.size(-2) + + # causal_mask is always [True, ..., True] otherwise, so executing this + # is unnecessary + if query_length > 1: + if not check_if_transformers_greater("4.44.99"): + causal_mask = self.bias[:, :, key_length - query_length : key_length, :key_length].to(torch.bool) + + causal_mask = torch.where(causal_mask, 0, mask_value) + + # torch.Tensor.expand does no memory copy + causal_mask = causal_mask.expand(batch_size, -1, -1, -1) + if attention_mask is not None: + attention_mask = causal_mask + attention_mask + + else: + attention_mask = attention_mask[:, :, :, : key.shape[-2]] + + sdpa_result = torch.nn.functional.scaled_dot_product_attention( + query, key, value, attn_mask=attention_mask, dropout_p=dropout_p, is_causal=False + ) + + # in gpt-neo-x and gpt-j the query and keys are always in fp32 + # thus we need to cast them to the value dtype + if self.downcast_qk: + sdpa_result = sdpa_result.to(value.dtype) + + return sdpa_result, None + + # Adapted from transformers.models.bark.modeling_bark.BarkSelfAttention._attn def bark_wrapped_scaled_dot_product( self, @@ -195,7 +260,7 @@ def codegen_wrapped_scaled_dot_product( query, key, value, attn_mask=None, dropout_p=dropout_p, is_causal=True ) else: - # in this case, which is the later decoding steps, the `causal_mask`` in + # in this case, which is the later decoding steps, the `causal_mask` in # https://github.com/huggingface/transformers/blob/ae54e3c3b18bac0832ad62ea9b896dfd52a09850/src/transformers/models/gpt2/modeling_gpt2.py#L195 # is [True, ..., True] so actually not causal sdpa_result = torch.nn.functional.scaled_dot_product_attention( @@ -207,15 +272,20 @@ def codegen_wrapped_scaled_dot_product( # causal_mask is always [True, ..., True] otherwise, so executing this # is unnecessary if query_length > 1: - causal_mask = self.causal_mask[:, :, key_length - query_length : key_length, :key_length].to(torch.bool) + if not check_if_transformers_greater("4.44.99"): + causal_mask = self.causal_mask[:, :, key_length - query_length : key_length, :key_length].to( + torch.bool + ) - causal_mask = torch.where(causal_mask, 0, mask_value) + causal_mask = torch.where(causal_mask, 0, mask_value) - # torch.Tensor.expand does no memory copy - causal_mask = causal_mask.expand(batch_size, -1, -1, -1) + # torch.Tensor.expand does no memory copy + causal_mask = causal_mask.expand(batch_size, -1, -1, -1) - # we use torch.min to avoid having tensor(-inf) - attention_mask = torch.min(causal_mask, attention_mask) + # we use torch.min to avoid having tensor(-inf) + attention_mask = torch.min(causal_mask, attention_mask) + else: + attention_mask = attention_mask[:, :, :, : key.shape[-2]] sdpa_result = torch.nn.functional.scaled_dot_product_attention( query, key, value, attn_mask=attention_mask, dropout_p=dropout_p, is_causal=False @@ -317,137 +387,243 @@ def opt_forward( # Adapted from transformers.models.t5.modeling_t5.T5Attention.forward -def t5_forward( - self, - hidden_states, - mask=None, - key_value_states=None, - position_bias=None, - past_key_value=None, - layer_head_mask=None, - query_length=None, - use_cache=False, - output_attentions=False, - **kwargs, -): - raise_on_head_mask(layer_head_mask) +if check_if_transformers_greater("4.45.99"): - if output_attentions is True: - raise ValueError("output_attentions=True can not be supported with BetterTransformer.") - if len(self.pruned_heads) > 0: - raise ValueError(f"Setting `pruned_heads` is unsupported with BetterTransformer, found {self.pruned_heads}.") - batch_size, seq_length = hidden_states.shape[:2] - - real_seq_length = seq_length - - if past_key_value is not None: - assert ( - len(past_key_value) == 2 - ), f"past_key_value should have 2 past states: keys and values. Got { len(past_key_value)} past states" - real_seq_length += past_key_value[0].shape[2] if query_length is None else query_length - - key_length = real_seq_length if key_value_states is None else key_value_states.shape[1] - - def shape(states): - """projection""" - return states.view(batch_size, -1, self.n_heads, self.key_value_proj_dim).transpose(1, 2) - - def unshape(states): - """reshape""" - return states.transpose(1, 2).contiguous().view(batch_size, -1, self.inner_dim) - - def project(hidden_states, proj_layer, key_value_states, past_key_value): - """projects hidden states correctly to key/query states""" - if key_value_states is None: - # self-attn - # (batch_size, n_heads, seq_length, dim_per_head) - hidden_states = shape(proj_layer(hidden_states)) - elif past_key_value is None: - # cross-attn - # (batch_size, n_heads, seq_length, dim_per_head) - hidden_states = shape(proj_layer(key_value_states)) + def t5_forward( + self, + hidden_states, + mask=None, + key_value_states=None, + position_bias=None, + past_key_value=None, + layer_head_mask=None, + query_length=None, + use_cache=False, + output_attentions=False, + cache_position=None, + ): + """ + Self-attention (if key_value_states is None) or attention over source sentence (provided by key_value_states). + """ + # Input is (batch_size, seq_length, dim) + # Mask is (batch_size, 1, 1, key_length) (non-causal encoder) or (batch_size, 1, seq_length, key_length) (causal decoder) + batch_size, seq_length = hidden_states.shape[:2] + + # if key_value_states are provided this layer is used as a cross-attention layer for the decoder + is_cross_attention = key_value_states is not None + + query_states = self.q(hidden_states) + query_states = query_states.view(batch_size, -1, self.n_heads, self.key_value_proj_dim).transpose(1, 2) + + if past_key_value is not None: + is_updated = past_key_value.is_updated.get(self.layer_idx) + if is_cross_attention: + # after the first generated id, we can subsequently re-use all key/value_states from cache + curr_past_key_value = past_key_value.cross_attention_cache + else: + curr_past_key_value = past_key_value.self_attention_cache + + current_states = key_value_states if is_cross_attention else hidden_states + if is_cross_attention and past_key_value is not None and is_updated: + # reuse k,v, cross_attentions + key_states = curr_past_key_value.key_cache[self.layer_idx] + value_states = curr_past_key_value.value_cache[self.layer_idx] + else: + key_states = self.k(current_states) + value_states = self.v(current_states) + key_states = key_states.view(batch_size, -1, self.n_heads, self.key_value_proj_dim).transpose(1, 2) + value_states = value_states.view(batch_size, -1, self.n_heads, self.key_value_proj_dim).transpose(1, 2) + + if past_key_value is not None: + # save all key/value_states to cache to be re-used for fast auto-regressive generation + cache_position = cache_position if not is_cross_attention else None + key_states, value_states = curr_past_key_value.update( + key_states, value_states, self.layer_idx, {"cache_position": cache_position} + ) + # set flag that curr layer for cross-attn is already updated so we can re-use in subsequent calls + if is_cross_attention: + past_key_value.is_updated[self.layer_idx] = True + + if position_bias is None: + key_length = key_states.shape[-2] + # cache position is 0-indexed so we add 1 to get the real length of queries (aka with past) + real_seq_length = query_length if query_length is not None else cache_position[-1] + 1 + if not self.has_relative_attention_bias: + position_bias = torch.zeros( + (1, self.n_heads, seq_length, key_length), device=query_states.device, dtype=query_states.dtype + ) + if self.gradient_checkpointing and self.training: + position_bias.requires_grad = True + else: + position_bias = self.compute_bias( + real_seq_length, key_length, device=query_states.device, cache_position=cache_position + ) + position_bias = position_bias[:, :, -seq_length:, :] + + if mask is not None: + causal_mask = mask[:, :, :, : key_states.shape[-2]] + position_bias = position_bias + causal_mask + + if self.pruned_heads: + mask = torch.ones(position_bias.shape[1]) + mask[list(self.pruned_heads)] = 0 + position_bias_masked = position_bias[:, mask.bool()] + else: + position_bias_masked = position_bias + + attn_output = torch.nn.functional.scaled_dot_product_attention( + query_states, + key_states, + value_states, + attn_mask=position_bias_masked, + dropout_p=self.dropout if self.training else 0.0, + is_causal=False, + ) + + attn_output = attn_output.transpose(1, 2).contiguous() + attn_output = attn_output.view(batch_size, -1, self.inner_dim) + attn_output = self.o(attn_output) + + outputs = (attn_output, past_key_value, position_bias) + + return outputs + +else: + + def t5_forward( + self, + hidden_states, + mask=None, + key_value_states=None, + position_bias=None, + past_key_value=None, + layer_head_mask=None, + query_length=None, + use_cache=False, + output_attentions=False, + **kwargs, + ): + raise_on_head_mask(layer_head_mask) + + if output_attentions is True: + raise ValueError("output_attentions=True can not be supported with BetterTransformer.") + if len(self.pruned_heads) > 0: + raise ValueError( + f"Setting `pruned_heads` is unsupported with BetterTransformer, found {self.pruned_heads}." + ) + + batch_size, seq_length = hidden_states.shape[:2] + + real_seq_length = seq_length if past_key_value is not None: + assert ( + len(past_key_value) == 2 + ), f"past_key_value should have 2 past states: keys and values. Got { len(past_key_value)} past states" + real_seq_length += past_key_value[0].shape[2] if query_length is None else query_length + + key_length = real_seq_length if key_value_states is None else key_value_states.shape[1] + + def shape(states): + """projection""" + return states.view(batch_size, -1, self.n_heads, self.key_value_proj_dim).transpose(1, 2) + + def unshape(states): + """reshape""" + return states.transpose(1, 2).contiguous().view(batch_size, -1, self.inner_dim) + + def project(hidden_states, proj_layer, key_value_states, past_key_value): + """projects hidden states correctly to key/query states""" if key_value_states is None: # self-attn - # (batch_size, n_heads, key_length, dim_per_head) - hidden_states = torch.cat([past_key_value, hidden_states], dim=2) - elif past_key_value.shape[2] != key_value_states.shape[1]: - # checking that the `sequence_length` of the `past_key_value` is the same as - # the provided `key_value_states` to support prefix tuning + # (batch_size, n_heads, seq_length, dim_per_head) + hidden_states = shape(proj_layer(hidden_states)) + elif past_key_value is None: # cross-attn # (batch_size, n_heads, seq_length, dim_per_head) hidden_states = shape(proj_layer(key_value_states)) - else: - # cross-attn - hidden_states = past_key_value - return hidden_states - - # get query states - query_states = shape(self.q(hidden_states)) # (batch_size, n_heads, seq_length, dim_per_head) - # get key/value states - key_states = project( - hidden_states, - self.k, - key_value_states, - past_key_value[0] if past_key_value is not None else None, - ) - value_states = project( - hidden_states, - self.v, - key_value_states, - past_key_value[1] if past_key_value is not None else None, - ) + if past_key_value is not None: + if key_value_states is None: + # self-attn + # (batch_size, n_heads, key_length, dim_per_head) + hidden_states = torch.cat([past_key_value, hidden_states], dim=2) + elif past_key_value.shape[2] != key_value_states.shape[1]: + # checking that the `sequence_length` of the `past_key_value` is the same as + # the provided `key_value_states` to support prefix tuning + # cross-attn + # (batch_size, n_heads, seq_length, dim_per_head) + hidden_states = shape(proj_layer(key_value_states)) + else: + # cross-attn + hidden_states = past_key_value + return hidden_states + + # get query states + query_states = shape(self.q(hidden_states)) # (batch_size, n_heads, seq_length, dim_per_head) + + # get key/value states + key_states = project( + hidden_states, + self.k, + key_value_states, + past_key_value[0] if past_key_value is not None else None, + ) + value_states = project( + hidden_states, + self.v, + key_value_states, + past_key_value[1] if past_key_value is not None else None, + ) - dropout_p = self.dropout if self.training else 0.0 - query_states = self.scale * query_states - if position_bias is None and not self.has_relative_attention_bias: - if mask is None: - attn_output = torch.nn.functional.scaled_dot_product_attention( - query_states, key_states, value_states, attn_mask=None, dropout_p=dropout_p, is_causal=False - ) - elif mask is not None: + dropout_p = self.dropout if self.training else 0.0 + query_states = self.scale * query_states + if position_bias is None and not self.has_relative_attention_bias: attn_output = torch.nn.functional.scaled_dot_product_attention( query_states, key_states, value_states, attn_mask=mask, dropout_p=dropout_p, is_causal=False ) - if position_bias is None: - if not self.has_relative_attention_bias: - position_bias = torch.zeros( - (1, self.n_heads, real_seq_length, key_length), - device=value_states.device, - dtype=value_states.dtype, - ) - if self.gradient_checkpointing and self.training: - position_bias.requires_grad = True + if position_bias is None: + if not self.has_relative_attention_bias: + position_bias = torch.zeros( + (1, self.n_heads, real_seq_length, key_length), + device=value_states.device, + dtype=value_states.dtype, + ) + if self.gradient_checkpointing and self.training: + position_bias.requires_grad = True + else: + position_bias = self.compute_bias(real_seq_length, key_length, device=value_states.device) + + # if key and values are already calculated + # we want only the last query position bias + if past_key_value is not None: + position_bias = position_bias[:, :, -hidden_states.size(1) :, :] + + if mask is not None: + position_bias = position_bias + mask # (batch_size, n_heads, seq_length, key_length) + + if self.has_relative_attention_bias: + attn_output = torch.nn.functional.scaled_dot_product_attention( + query_states, + key_states, + value_states, + attn_mask=position_bias, + dropout_p=dropout_p, + is_causal=False, + ) else: - position_bias = self.compute_bias(real_seq_length, key_length, device=value_states.device) - - # if key and values are already calculated - # we want only the last query position bias - if past_key_value is not None: - position_bias = position_bias[:, :, -hidden_states.size(1) :, :] - - if mask is not None: - position_bias = position_bias + mask # (batch_size, n_heads, seq_length, key_length) - - if self.has_relative_attention_bias: attn_output = torch.nn.functional.scaled_dot_product_attention( query_states, key_states, value_states, attn_mask=position_bias, dropout_p=dropout_p, is_causal=False ) - else: - attn_output = torch.nn.functional.scaled_dot_product_attention( - query_states, key_states, value_states, attn_mask=position_bias, dropout_p=dropout_p, is_causal=False - ) - attn_output = unshape(attn_output) # (batch_size, seq_length, dim) - attn_output = self.o(attn_output) + attn_output = unshape(attn_output) # (batch_size, seq_length, dim) + attn_output = self.o(attn_output) - present_key_value_state = (key_states, value_states) if (self.is_decoder and use_cache) else None - outputs = (attn_output,) + (present_key_value_state,) + (position_bias,) + present_key_value_state = (key_states, value_states) if (self.is_decoder and use_cache) else None + outputs = (attn_output,) + (present_key_value_state,) + (position_bias,) - return outputs + return outputs # Adapted from transformers.models.bart.modeling_bart.BartAttention.forward diff --git a/optimum/bettertransformer/models/decoder_models.py b/optimum/bettertransformer/models/decoder_models.py index b64b7f5a1eb..e8045e695c1 100644 --- a/optimum/bettertransformer/models/decoder_models.py +++ b/optimum/bettertransformer/models/decoder_models.py @@ -44,6 +44,7 @@ codegen_wrapped_scaled_dot_product, gpt2_wrapped_scaled_dot_product, gpt_neo_wrapped_scaled_dot_product, + gptj_wrapped_scaled_dot_product, opt_forward, t5_forward, ) @@ -82,7 +83,7 @@ def forward(self, *args, **kwargs): class GPTJAttentionLayerBetterTransformer(BetterTransformerBaseLayer, GPTJAttention, nn.Module): - _attn = gpt2_wrapped_scaled_dot_product + _attn = gptj_wrapped_scaled_dot_product def __init__(self, layer: "nn.Module", config: "PretrainedConfig"): super().__init__(config) @@ -96,14 +97,22 @@ def __init__(self, layer: "nn.Module", config: "PretrainedConfig"): "out_proj", "attn_dropout", "resid_dropout", - "bias", "scale_attn", - "masked_bias", ] # Attribute only for transformers>=4.28 if hasattr(layer, "embed_positions"): submodules.append("embed_positions") + # Attribute only for transformers<4.45 + if hasattr(layer, "bias"): + submodules.append("bias") + if hasattr(layer, "masked_bias"): + submodules.append("masked_bias") + + # Attribute only for transformers>=4.45 + if hasattr(layer, "layer_idx"): + submodules.append("layer_idx") + for attr in submodules: setattr(self, attr, getattr(layer, attr)) @@ -127,6 +136,11 @@ def __init__(self, layer: "nn.Module", config: "PretrainedConfig"): self.module_mapping = None submodules = ["rotary_emb", "query_key_value", "dense", "bias", "masked_bias", "norm_factor"] + + # Attribute only for transformers>=4.45 + if hasattr(layer, "layer_idx"): + submodules.append("layer_idx") + for attr in submodules: setattr(self, attr, getattr(layer, attr)) @@ -155,6 +169,11 @@ def __init__(self, layer: "nn.Module", config: "PretrainedConfig"): self.module_mapping = None submodules = ["attn_dropout", "resid_dropout", "k_proj", "v_proj", "q_proj", "out_proj", "bias", "masked_bias"] + + # Attribute only for transformers>=4.45 + if hasattr(layer, "layer_id"): + submodules.append("layer_id") + for attr in submodules: setattr(self, attr, getattr(layer, attr)) @@ -238,12 +257,20 @@ def __init__(self, layer: "nn.Module", config: "PretrainedConfig"): super(BetterTransformerBaseLayer, self).__init__(config) self.module_mapping = None - submodules = ["attn_dropout", "resid_dropout", "qkv_proj", "out_proj", "causal_mask", "scale_attn"] + submodules = ["attn_dropout", "resid_dropout", "qkv_proj", "out_proj", "scale_attn"] # Attribute only for transformers>=4.28 if hasattr(layer, "embed_positions"): submodules.append("embed_positions") + # Attribute only for transformers<4.45 + if hasattr(layer, "causal_mask"): + submodules.append("causal_mask") + + # Attribute only for transformers>=4.45 + if hasattr(layer, "layer_idx"): + submodules.append("layer_idx") + for attr in submodules: setattr(self, attr, getattr(layer, attr)) @@ -300,9 +327,9 @@ def __init__(self, layer: "nn.Module", config: "PretrainedConfig"): setattr(self, "relative_attention_bias", layer.relative_attention_bias) self.original_layers_mapping["relative_attention_bias"] = "relative_attention_bias" - self.module_mapping = None - + self.layer_idx = getattr(layer, "layer_idx", None) self.is_decoder = layer.is_decoder + self.module_mapping = None def forward(self, *args, **kwargs): return t5_forward(self, *args, **kwargs) diff --git a/optimum/bettertransformer/transformation.py b/optimum/bettertransformer/transformation.py index 2105e199870..b138862752e 100644 --- a/optimum/bettertransformer/transformation.py +++ b/optimum/bettertransformer/transformation.py @@ -20,7 +20,13 @@ import torch from packaging.version import parse -from ..utils import check_if_pytorch_greater, is_accelerate_available, recurse_getattr, recurse_setattr +from ..utils import ( + check_if_pytorch_greater, + check_if_torch_greater, + is_accelerate_available, + recurse_getattr, + recurse_setattr, +) from .models import BetterTransformerManager @@ -206,18 +212,25 @@ def transform( The converted model if the conversion has been successful. """ + logger.warning( + "The class `optimum.bettertransformers.transformation.BetterTransformer` is deprecated and will be removed in a future release." + ) + hf_config = model.config if hf_config.model_type in ["falcon", "gpt_bigcode", "llama", "whisper"]: raise ValueError( - f"Transformers now supports natively BetterTransformer optimizations (torch.nn.functional.scaled_dot_product_attention) for the model type {hf_config.model_type}. As such, there is no need to use `model.to_bettertransformers()` or `BetterTransformer.transform(model)` from the Optimum library. Please upgrade to transformers>=4.36 and torch>=2.1.1 to use it. Details: https://huggingface.co/docs/transformers/perf_infer_gpu_one#flashattention-and-memory-efficient-attention-through-pytorchs-scaleddotproductattention." + f"Transformers now supports natively BetterTransformer optimizations (torch.nn.functional.scaled_dot_product_attention) for the model type {hf_config.model_type}. " + "As such, there is no need to use `model.to_bettertransformers()` or `BetterTransformer.transform(model)` from the Optimum library. " + "Please upgrade to transformers>=4.36 and torch>=2.1.1 to use it. " + "Details: https://huggingface.co/docs/transformers/perf_infer_gpu_one#pytorch-scaled-dot-product-attention." ) - # Check if we have to load the model using `accelerate` - if hasattr(model, "hf_device_map"): - load_accelerate = True - hf_device_map = model.hf_device_map - else: - load_accelerate = False + if hasattr(hf_config, "_attn_implementation") and hf_config._attn_implementation == "sdpa": + raise ValueError( + "This model already uses BetterTransformer optimizations from Transformers (torch.nn.functional.scaled_dot_product_attention). " + "As such, there is no need to use `model.to_bettertransformers()` or `BetterTransformer.transform(model)` from the Optimum library. " + "Details: https://huggingface.co/docs/transformers/perf_infer_gpu_one#pytorch-scaled-dot-product-attention." + ) if hasattr(model, "use_bettertransformer") and model.use_bettertransformer is True: raise Exception( @@ -237,11 +250,20 @@ def transform( f" Currently supported models are: {BetterTransformerManager.MODEL_MAPPING.keys()}." ) - if parse(torch.__version__) <= parse("1.14"): + if not check_if_torch_greater("2.0"): raise ValueError( f"BetterTransformer requires torch>=2.0 but {torch.__version__} is installed. Please upgrade PyTorch." ) + hf_config = model.config + + # Check if we have to load the model using `accelerate` + if hasattr(model, "hf_device_map"): + load_accelerate = True + hf_device_map = model.hf_device_map + else: + load_accelerate = False + if load_accelerate: # Remove the hooks from the original model to avoid weights being on `meta` device. remove_hook_from_module(model, recurse=True) diff --git a/optimum/exporters/onnx/__main__.py b/optimum/exporters/onnx/__main__.py index 703e98df3e2..6a2cc6834a6 100644 --- a/optimum/exporters/onnx/__main__.py +++ b/optimum/exporters/onnx/__main__.py @@ -43,7 +43,6 @@ from .base import OnnxConfig logger = logging.get_logger() -logger.setLevel(logging.INFO) def main_export( diff --git a/optimum/exporters/onnx/base.py b/optimum/exporters/onnx/base.py index 8cd94194ffe..7e35691d54b 100644 --- a/optimum/exporters/onnx/base.py +++ b/optimum/exporters/onnx/base.py @@ -319,6 +319,7 @@ def fix_dynamic_axes( input_shapes = {} dummy_inputs = self.generate_dummy_inputs(framework="np", **input_shapes) dummy_inputs = self.generate_dummy_inputs_for_validation(dummy_inputs, onnx_input_names=onnx_input_names) + dummy_inputs = self.rename_ambiguous_inputs(dummy_inputs) onnx_inputs = {} for name, value in dummy_inputs.items(): diff --git a/optimum/exporters/onnx/convert.py b/optimum/exporters/onnx/convert.py index 63a9067b90c..c12a9ac222a 100644 --- a/optimum/exporters/onnx/convert.py +++ b/optimum/exporters/onnx/convert.py @@ -26,6 +26,7 @@ import numpy as np import onnx +from transformers.generation import GenerationMixin from transformers.modeling_utils import get_parameter_dtype from transformers.utils import is_tf_available, is_torch_available @@ -34,6 +35,7 @@ DEFAULT_DUMMY_SHAPES, ONNX_WEIGHTS_NAME, TORCH_MINIMUM_VERSION, + check_if_transformers_greater, is_diffusers_available, is_torch_onnx_support_available, logging, @@ -529,6 +531,11 @@ def export_pytorch( logger.info(f"Using framework PyTorch: {torch.__version__}") FORCE_ONNX_EXTERNAL_DATA = os.getenv("FORCE_ONNX_EXTERNAL_DATA", "0") == "1" + model_kwargs = model_kwargs or {} + # num_logits_to_keep was added in transformers 4.45 and isn't added as inputs when exporting the model + if check_if_transformers_greater("4.44.99") and "num_logits_to_keep" in signature(model.forward).parameters.keys(): + model_kwargs["num_logits_to_keep"] = 0 + with torch.no_grad(): model.config.return_dict = True model = model.eval() @@ -999,7 +1006,6 @@ def onnx_export_from_model( >>> onnx_export_from_model(model, output="gpt2_onnx/") ``` """ - TasksManager.standardize_model_attributes(model) if hasattr(model.config, "export_model_type"): @@ -1120,6 +1126,22 @@ def onnx_export_from_model( if isinstance(atol, dict): atol = atol[task.replace("-with-past", "")] + if check_if_transformers_greater("4.44.99"): + misplaced_generation_parameters = model.config._get_non_default_generation_parameters() + if ( + isinstance(model, GenerationMixin) + and model.can_generate() + and len(misplaced_generation_parameters) > 0 + ): + logger.warning( + "Moving the following attributes in the config to the generation config: " + f"{misplaced_generation_parameters}. You are seeing this warning because you've set " + "generation parameters in the model config, as opposed to in the generation config.", + ) + for param_name, param_value in misplaced_generation_parameters.items(): + setattr(model.generation_config, param_name, param_value) + setattr(model.config, param_name, None) + # Saving the model config and preprocessor as this is needed sometimes. model.config.save_pretrained(output) generation_config = getattr(model, "generation_config", None) @@ -1161,6 +1183,10 @@ def onnx_export_from_model( if tokenizer_2 is not None: tokenizer_2.save_pretrained(output.joinpath("tokenizer_2")) + tokenizer_3 = getattr(model, "tokenizer_3", None) + if tokenizer_3 is not None: + tokenizer_3.save_pretrained(output.joinpath("tokenizer_3")) + model.save_config(output) if float_dtype == "bf16": diff --git a/optimum/exporters/onnx/model_configs.py b/optimum/exporters/onnx/model_configs.py index d4b15b2968b..b39d19ec782 100644 --- a/optimum/exporters/onnx/model_configs.py +++ b/optimum/exporters/onnx/model_configs.py @@ -13,6 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. """Model specific ONNX configurations.""" + import random from pathlib import Path from typing import TYPE_CHECKING, Any, Dict, List, Literal, Optional, Tuple, Union @@ -26,8 +27,11 @@ BloomDummyPastKeyValuesGenerator, DummyAudioInputGenerator, DummyCodegenDecoderTextInputGenerator, + DummyDecisionTransformerInputGenerator, DummyDecoderTextInputGenerator, DummyEncodecInputGenerator, + DummyFluxTransformerTextInputGenerator, + DummyFluxTransformerVisionInputGenerator, DummyInputGenerator, DummyIntGenerator, DummyPastKeyValuesGenerator, @@ -38,6 +42,9 @@ DummySpeechT5InputGenerator, DummyTextInputGenerator, DummyTimestepInputGenerator, + DummyTransformerTextInputGenerator, + DummyTransformerTimestepInputGenerator, + DummyTransformerVisionInputGenerator, DummyVisionEmbeddingsGenerator, DummyVisionEncoderDecoderPastKeyValuesGenerator, DummyVisionInputGenerator, @@ -53,6 +60,7 @@ NormalizedTextConfig, NormalizedTextConfigWithGQA, NormalizedVisionConfig, + check_if_diffusers_greater, check_if_transformers_greater, is_diffusers_available, logging, @@ -119,7 +127,7 @@ def inputs(self) -> Dict[str, Dict[int, str]]: class AlbertOnnxConfig(BertOnnxConfig): - DEFAULT_ONNX_OPSET = 11 + DEFAULT_ONNX_OPSET = 14 # now uses F.scaled_dot_product_attention by default for torch>=2.1.1. class ConvBertOnnxConfig(BertOnnxConfig): @@ -154,9 +162,13 @@ class SplinterOnnxConfig(BertOnnxConfig): DEFAULT_ONNX_OPSET = 11 -class DistilBertOnnxConfig(BertOnnxConfig): +class RemBertOnnxConfig(BertOnnxConfig): DEFAULT_ONNX_OPSET = 11 + +class DistilBertOnnxConfig(BertOnnxConfig): + DEFAULT_ONNX_OPSET = 14 # now uses F.scaled_dot_product_attention by default for transformers>=4.46.0 + @property def inputs(self) -> Dict[str, Dict[int, str]]: if self.task == "multiple-choice": @@ -171,11 +183,11 @@ class MPNetOnnxConfig(DistilBertOnnxConfig): class RobertaOnnxConfig(DistilBertOnnxConfig): - pass + DEFAULT_ONNX_OPSET = 14 # now uses F.scaled_dot_product_attention by default for torch>=2.1.1. class CamembertOnnxConfig(DistilBertOnnxConfig): - pass + DEFAULT_ONNX_OPSET = 14 # now uses F.scaled_dot_product_attention by default for torch>=2.1.1. class FlaubertOnnxConfig(BertOnnxConfig): @@ -187,7 +199,7 @@ class IBertOnnxConfig(DistilBertOnnxConfig): class XLMRobertaOnnxConfig(DistilBertOnnxConfig): - pass + DEFAULT_ONNX_OPSET = 14 # now uses F.scaled_dot_product_attention by default for torch>=2.1.1. class DebertaOnnxConfig(BertOnnxConfig): @@ -256,8 +268,32 @@ class ImageGPTOnnxConfig(GPT2OnnxConfig): pass +class DecisionTransformerOnnxConfig(OnnxConfig): + DUMMY_INPUT_GENERATOR_CLASSES = (DummyDecisionTransformerInputGenerator,) + NORMALIZED_CONFIG_CLASS = NormalizedConfig + + @property + def inputs(self) -> Dict[str, Dict[int, str]]: + return { + "states": {0: "batch_size", 1: "sequence_length"}, + "actions": {0: "batch_size", 1: "sequence_length"}, + "timesteps": {0: "batch_size", 1: "sequence_length"}, + "returns_to_go": {0: "batch_size", 1: "sequence_length"}, + "attention_mask": {0: "batch_size", 1: "sequence_length"}, + } + + @property + def outputs(self) -> Dict[str, Dict[int, str]]: + return { + "state_preds": {0: "batch_size", 1: "sequence_length"}, + "action_preds": {0: "batch_size", 1: "sequence_length"}, + "return_preds": {0: "batch_size", 1: "sequence_length"}, + "last_hidden_state": {0: "batch_size", 1: "sequence_length"}, + } + + class GPTNeoOnnxConfig(TextDecoderWithPositionIdsOnnxConfig): - DEFAULT_ONNX_OPSET = 13 + DEFAULT_ONNX_OPSET = 14 NORMALIZED_CONFIG_CLASS = NormalizedTextConfig.with_args(num_attention_heads="num_heads") @@ -266,10 +302,18 @@ class GPTNeoXOnnxConfig(TextDecoderWithPositionIdsOnnxConfig): NORMALIZED_CONFIG_CLASS = NormalizedTextConfig -class OPTOnnxConfig(TextDecoderOnnxConfig): - # OPT does not require position_ids input. - DEFAULT_ONNX_OPSET = 13 - NORMALIZED_CONFIG_CLASS = NormalizedTextConfig +# OPT does not take position_ids as input for transfomers < v4.46, needs it for transformers >= v4.46 +if check_if_transformers_greater("4.45.99"): + + class OPTOnnxConfig(TextDecoderWithPositionIdsOnnxConfig): + DEFAULT_ONNX_OPSET = 14 # uses SDPA in Transformers, hence opset>=14. + NORMALIZED_CONFIG_CLASS = NormalizedTextConfig + +else: + + class OPTOnnxConfig(TextDecoderOnnxConfig): + DEFAULT_ONNX_OPSET = 14 # uses SDPA in Transformers, hence opset>=14. + NORMALIZED_CONFIG_CLASS = NormalizedTextConfig class LlamaOnnxConfig(TextDecoderWithPositionIdsOnnxConfig): @@ -287,7 +331,12 @@ class Qwen2OnnxConfig(LlamaOnnxConfig): class GemmaOnnxConfig(LlamaOnnxConfig): DUMMY_INPUT_GENERATOR_CLASSES = (DummyTextInputGenerator, GemmaDummyPastKeyValuesGenerator) DUMMY_PKV_GENERATOR_CLASS = GemmaDummyPastKeyValuesGenerator - pass + MIN_TRANSFORMERS_VERSION = version.parse("4.38.0") + + +class GraniteOnnxConfig(LlamaOnnxConfig): + MIN_TRANSFORMERS_VERSION = version.parse("4.45.0") + MIN_TORCH_VERSION = version.parse("2.5.0") class PhiOnnxConfig(TextDecoderWithPositionIdsOnnxConfig): @@ -304,6 +353,15 @@ class Phi3OnnxConfig(PhiOnnxConfig): NORMALIZED_CONFIG_CLASS = NormalizedTextConfigWithGQA MIN_TRANSFORMERS_VERSION = version.parse("4.41.0") + def __init__(self, *args, **kwargs): + # TODO : replace check_if_transformers_greater with is_transformers_available + if check_if_transformers_greater("4.46.0") and not check_if_transformers_greater("4.46.1"): + logger.error( + "Found transformers v4.46.0 while trying to exporting a Phi3 model, this specific version of transformers is not supported. " + "Please upgrade to v4.46.1 or higher, or downgrade your transformers version" + ) + super().__init__(*args, **kwargs) + class MistralOnnxConfig(TextDecoderWithPositionIdsOnnxConfig): # This is because of the patching of torch.triu in AttentionMaskConverter, that exists from transformers>=4.35 @@ -326,6 +384,8 @@ def patch_model_for_export( class MPTOnnxConfig(TextDecoderOnnxConfig): # MPT does not require position_ids input. DEFAULT_ONNX_OPSET = 13 + # TODO: fix inference for transformers < v4.41 for beam_search > 1 + MIN_TRANSFORMERS_VERSION = version.parse("4.41.0") NORMALIZED_CONFIG_CLASS = NormalizedTextConfig.with_args( num_attention_heads="n_heads", hidden_size="d_model", num_layers="n_layers" ) @@ -480,7 +540,7 @@ def generate(self, input_name: str, framework: str = "pt", int_dtype: str = "int class T5OnnxConfig(TextSeq2SeqOnnxConfig): - DEFAULT_ONNX_OPSET = 13 + DEFAULT_ONNX_OPSET = 14 # T5 uses aten::triu that requires opset>=14 DUMMY_INPUT_GENERATOR_CLASSES = TextSeq2SeqOnnxConfig.DUMMY_INPUT_GENERATOR_CLASSES[:-1] + ( T5DummySeq2SeqPastKeyValuesGenerator, ) @@ -564,6 +624,7 @@ def generate(self, input_name: str, framework: str = "pt", int_dtype: str = "int class M2M100OnnxConfig(TextSeq2SeqOnnxConfig): + DEFAULT_ONNX_OPSET = 14 # now uses F.scaled_dot_product_attention by default for torch>=2.1.1. NORMALIZED_CONFIG_CLASS = NormalizedSeq2SeqConfig.with_args( encoder_num_layers="encoder_layers", decoder_num_layers="decoder_layers", @@ -1014,22 +1075,13 @@ def outputs(self) -> Dict[str, Dict[int, str]]: "last_hidden_state": {0: "batch_size", 1: "sequence_length"}, "pooler_output": {0: "batch_size"}, } + if self._normalized_config.output_hidden_states: for i in range(self._normalized_config.num_layers + 1): common_outputs[f"hidden_states.{i}"] = {0: "batch_size", 1: "sequence_length"} return common_outputs - def generate_dummy_inputs(self, framework: str = "pt", **kwargs): - dummy_inputs = super().generate_dummy_inputs(framework=framework, **kwargs) - - # TODO: fix should be by casting inputs during inference and not export - if framework == "pt": - import torch - - dummy_inputs["input_ids"] = dummy_inputs["input_ids"].to(dtype=torch.int32) - return dummy_inputs - def patch_model_for_export( self, model: Union["PreTrainedModel", "TFPreTrainedModel", "ModelMixin"], @@ -1039,7 +1091,7 @@ def patch_model_for_export( class UNetOnnxConfig(VisionOnnxConfig): - ATOL_FOR_VALIDATION = 1e-3 + ATOL_FOR_VALIDATION = 1e-4 # The ONNX export of a CLIPText architecture, an other Stable Diffusion component, needs the Trilu # operator support, available since opset 14 DEFAULT_ONNX_OPSET = 14 @@ -1062,17 +1114,19 @@ class UNetOnnxConfig(VisionOnnxConfig): def inputs(self) -> Dict[str, Dict[int, str]]: common_inputs = { "sample": {0: "batch_size", 2: "height", 3: "width"}, - "timestep": {0: "steps"}, + "timestep": {}, # a scalar with no dimension "encoder_hidden_states": {0: "batch_size", 1: "sequence_length"}, } - # TODO : add text_image, image and image_embeds + # TODO : add addition_embed_type == text_image, image and image_embeds + # https://github.com/huggingface/diffusers/blob/9366c8f84bfe47099ff047272661786ebb54721d/src/diffusers/models/unets/unet_2d_condition.py#L671 if getattr(self._normalized_config, "addition_embed_type", None) == "text_time": common_inputs["text_embeds"] = {0: "batch_size"} common_inputs["time_ids"] = {0: "batch_size"} if getattr(self._normalized_config, "time_cond_proj_dim", None) is not None: common_inputs["timestep_cond"] = {0: "batch_size"} + return common_inputs @property @@ -1111,7 +1165,7 @@ def ordered_inputs(self, model) -> Dict[str, Dict[int, str]]: class VaeEncoderOnnxConfig(VisionOnnxConfig): - ATOL_FOR_VALIDATION = 1e-2 + ATOL_FOR_VALIDATION = 3e-4 # The ONNX export of a CLIPText architecture, an other Stable Diffusion component, needs the Trilu # operator support, available since opset 14 DEFAULT_ONNX_OPSET = 14 @@ -1131,12 +1185,12 @@ def inputs(self) -> Dict[str, Dict[int, str]]: @property def outputs(self) -> Dict[str, Dict[int, str]]: return { - "latent_sample": {0: "batch_size", 2: "height_latent", 3: "width_latent"}, + "latent_parameters": {0: "batch_size", 2: "height_latent", 3: "width_latent"}, } class VaeDecoderOnnxConfig(VisionOnnxConfig): - ATOL_FOR_VALIDATION = 1e-3 + ATOL_FOR_VALIDATION = 1e-4 # The ONNX export of a CLIPText architecture, an other Stable Diffusion component, needs the Trilu # operator support, available since opset 14 DEFAULT_ONNX_OPSET = 14 @@ -1159,6 +1213,101 @@ def outputs(self) -> Dict[str, Dict[int, str]]: } +class T5EncoderOnnxConfig(TextEncoderOnnxConfig): + NORMALIZED_CONFIG_CLASS = NormalizedTextConfig + ATOL_FOR_VALIDATION = 1e-4 + DEFAULT_ONNX_OPSET = 12 # int64 was supported since opset 12 + + @property + def inputs(self): + return { + "input_ids": {0: "batch_size", 1: "sequence_length"}, + } + + @property + def outputs(self): + return { + "last_hidden_state": {0: "batch_size", 1: "sequence_length"}, + } + + +class SD3TransformerOnnxConfig(VisionOnnxConfig): + ATOL_FOR_VALIDATION = 1e-4 + # The ONNX export of a CLIPText architecture, an other Stable Diffusion component, needs the Trilu + # operator support, available since opset 14 + DEFAULT_ONNX_OPSET = 14 + + DUMMY_INPUT_GENERATOR_CLASSES = ( + DummyTransformerTimestepInputGenerator, + DummyTransformerVisionInputGenerator, + DummyTransformerTextInputGenerator, + ) + + NORMALIZED_CONFIG_CLASS = NormalizedConfig.with_args( + image_size="sample_size", + num_channels="in_channels", + vocab_size="attention_head_dim", + hidden_size="joint_attention_dim", + projection_size="pooled_projection_dim", + allow_new=True, + ) + + @property + def inputs(self) -> Dict[str, Dict[int, str]]: + common_inputs = { + "hidden_states": {0: "batch_size", 2: "height", 3: "width"}, + "encoder_hidden_states": {0: "batch_size", 1: "sequence_length"}, + "pooled_projections": {0: "batch_size"}, + "timestep": {0: "step"}, + } + + return common_inputs + + @property + def outputs(self) -> Dict[str, Dict[int, str]]: + return { + "out_hidden_states": {0: "batch_size", 2: "height", 3: "width"}, + } + + @property + def torch_to_onnx_output_map(self) -> Dict[str, str]: + return { + "sample": "out_hidden_states", + } + + +class FluxTransformerOnnxConfig(SD3TransformerOnnxConfig): + DUMMY_INPUT_GENERATOR_CLASSES = ( + DummyTransformerTimestepInputGenerator, + DummyFluxTransformerVisionInputGenerator, + DummyFluxTransformerTextInputGenerator, + ) + + @property + def inputs(self): + common_inputs = super().inputs + common_inputs["hidden_states"] = {0: "batch_size", 1: "packed_height_width"} + common_inputs["txt_ids"] = ( + {0: "sequence_length"} if check_if_diffusers_greater("0.31.0") else {0: "batch_size", 1: "sequence_length"} + ) + common_inputs["img_ids"] = ( + {0: "packed_height_width"} + if check_if_diffusers_greater("0.31.0") + else {0: "batch_size", 1: "packed_height_width"} + ) + + if getattr(self._normalized_config, "guidance_embeds", False): + common_inputs["guidance"] = {0: "batch_size"} + + return common_inputs + + @property + def outputs(self): + return { + "out_hidden_states": {0: "batch_size", 1: "packed_height_width"}, + } + + class GroupViTOnnxConfig(CLIPOnnxConfig): pass @@ -2026,6 +2175,7 @@ class TrOCROnnxConfig(TextSeq2SeqOnnxConfig): class VisionEncoderDecoderOnnxConfig(EncoderDecoderBaseOnnxConfig): NORMALIZED_CONFIG_CLASS = NormalizedEncoderDecoderConfig ATOL_FOR_VALIDATION = 1e-3 + DEFAULT_ONNX_OPSET = 14 # uses SDPA in Transformers, hence opset>=14. DUMMY_INPUT_GENERATOR_CLASSES = (DummyVisionInputGenerator, DummyVisionEncoderDecoderPastKeyValuesGenerator) @@ -2155,8 +2305,21 @@ class Pix2StructOnnxConfig(OnnxSeq2SeqConfigWithPast): DummySeq2SeqPastKeyValuesGenerator, DummyPix2StructInputGenerator, ) - # Min operator needs to support int64, which is the case for opset>=12 - DEFAULT_ONNX_OPSET = 12 + + DEFAULT_ONNX_OPSET = 14 # use 'aten::triu' now which is opset 14 + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + # TODO : replace check_if_transformers_greater with is_transformers_available + if ( + check_if_transformers_greater("4.46.0") + and not check_if_transformers_greater("4.46.1") + and self._behavior is ConfigBehavior.DECODER + ): + logger.error( + "Found transformers v4.46.0 while trying to exporting a Pix2Struct model, this specific version of transformers is not supported. " + "Please upgrade to v4.46.1 or higher, or downgrade your transformers version" + ) @property def inputs(self): @@ -2309,3 +2472,5 @@ def overwrite_shape_and_generate_input( class EncoderDecoderOnnxConfig(EncoderDecoderBaseOnnxConfig): NORMALIZED_CONFIG_CLASS = NormalizedEncoderDecoderConfig + + DEFAULT_ONNX_OPSET = 14 # uses SDPA in Transformers, hence opset>=14. diff --git a/optimum/exporters/onnx/model_patcher.py b/optimum/exporters/onnx/model_patcher.py index 34ed5fcae46..2c0f9aeba67 100644 --- a/optimum/exporters/onnx/model_patcher.py +++ b/optimum/exporters/onnx/model_patcher.py @@ -34,11 +34,10 @@ if _transformers_version > version.parse("4.34.99"): - from transformers.modeling_attn_mask_utils import AttentionMaskConverter, _prepare_4d_causal_attention_mask + from transformers.modeling_attn_mask_utils import AttentionMaskConverter if _transformers_version >= version.parse("4.36"): from transformers.modeling_attn_mask_utils import _prepare_4d_causal_attention_mask_for_sdpa else: - _prepare_4d_causal_attention_mask = None _prepare_4d_causal_attention_mask_for_sdpa = None AttentionMaskConverter = None @@ -169,7 +168,7 @@ def patched_forward(*args, **kwargs): filterd_outputs[name] = value elif isinstance(outputs, (list, tuple)): outputs_list = list(config.outputs.keys()) - dict(zip(outputs_list, outputs)) + filterd_outputs = dict(zip(outputs_list, outputs)) else: if len(config.outputs) > 1: num_outputs = len(config.outputs) diff --git a/optimum/exporters/onnx/utils.py b/optimum/exporters/onnx/utils.py index 675566ba23e..19e24f88743 100644 --- a/optimum/exporters/onnx/utils.py +++ b/optimum/exporters/onnx/utils.py @@ -27,7 +27,7 @@ is_diffusers_available, logging, ) -from ...utils.import_utils import _diffusers_version +from ...utils.import_utils import _diffusers_version, check_if_transformers_greater from ..utils import ( _get_submodels_and_export_configs, ) @@ -86,9 +86,14 @@ "phi", "phi3", "qwen2", + "granite", } +if check_if_transformers_greater("4.45.99"): + MODEL_TYPES_REQUIRING_POSITION_IDS.add("opt") + + def check_onnxruntime_requirements(minimum_version: version.Version): """ Checks that ONNX Runtime is installed and if version is recent enough. diff --git a/optimum/exporters/tasks.py b/optimum/exporters/tasks.py index 97053040879..0a3758e97cf 100644 --- a/optimum/exporters/tasks.py +++ b/optimum/exporters/tasks.py @@ -217,6 +217,7 @@ class TasksManager: "multiple-choice": "AutoModelForMultipleChoice", "object-detection": "AutoModelForObjectDetection", "question-answering": "AutoModelForQuestionAnswering", + "reinforcement-learning": "AutoModel", "semantic-segmentation": "AutoModelForSemanticSegmentation", "text-to-audio": ("AutoModelForTextToSpectrogram", "AutoModelForTextToWaveform"), "text-generation": "AutoModelForCausalLM", @@ -308,9 +309,9 @@ class TasksManager: "image-feature-extraction": "feature-extraction", # for backward compatibility and testing (where # model task and model type are still the same) - "lcm": "text-to-image", "stable-diffusion": "text-to-image", "stable-diffusion-xl": "text-to-image", + "latent-consistency": "text-to-image", } _CUSTOM_CLASSES = { @@ -335,7 +336,11 @@ class TasksManager: } _DIFFUSERS_SUPPORTED_MODEL_TYPE = { - "clip-text-model": supported_tasks_mapping( + "t5-encoder": supported_tasks_mapping( + "feature-extraction", + onnx="T5EncoderOnnxConfig", + ), + "clip-text": supported_tasks_mapping( "feature-extraction", onnx="CLIPTextOnnxConfig", ), @@ -343,7 +348,15 @@ class TasksManager: "feature-extraction", onnx="CLIPTextWithProjectionOnnxConfig", ), - "unet": supported_tasks_mapping( + "flux-transformer-2d": supported_tasks_mapping( + "semantic-segmentation", + onnx="FluxTransformerOnnxConfig", + ), + "sd3-transformer-2d": supported_tasks_mapping( + "semantic-segmentation", + onnx="SD3TransformerOnnxConfig", + ), + "unet-2d-condition": supported_tasks_mapping( "semantic-segmentation", onnx="UNetOnnxConfig", ), @@ -418,6 +431,15 @@ class TasksManager: onnx="BertOnnxConfig", tflite="BertTFLiteConfig", ), + "rembert": supported_tasks_mapping( + "fill-mask", + "feature-extraction", + "text-classification", + "multiple-choice", + "token-classification", + "question-answering", + onnx="RemBertOnnxConfig", + ), # For big-bird and bigbird-pegasus being unsupported, refer to model_configs.py # "big-bird": supported_tasks_mapping( # "feature-extraction", @@ -562,6 +584,11 @@ class TasksManager: onnx="DebertaV2OnnxConfig", tflite="DebertaV2TFLiteConfig", ), + "decision-transformer": supported_tasks_mapping( + "feature-extraction", + "reinforcement-learning", + onnx="DecisionTransformerOnnxConfig", + ), "deit": supported_tasks_mapping( "feature-extraction", "image-classification", @@ -915,6 +942,13 @@ class TasksManager: "text-classification", onnx="LlamaOnnxConfig", ), + "granite": supported_tasks_mapping( + "feature-extraction", + "feature-extraction-with-past", + "text-generation", + "text-generation-with-past", + onnx="GraniteOnnxConfig", + ), "pegasus": supported_tasks_mapping( "feature-extraction", "feature-extraction-with-past", @@ -1170,12 +1204,17 @@ class TasksManager: "transformers": _SUPPORTED_MODEL_TYPE, } _UNSUPPORTED_CLI_MODEL_TYPE = { - "unet", + # diffusers model types + "clip-text", + "clip-text-with-projection", + "flux-transformer-2d", + "sd3-transformer-2d", + "t5-encoder", + "unet-2d-condition", "vae-encoder", "vae-decoder", - "clip-text-model", - "clip-text-with-projection", - "trocr", # supported through the vision-encoder-decoder model type + # redundant model types + "trocr", # same as vision-encoder-decoder } _SUPPORTED_CLI_MODEL_TYPE = ( set(_SUPPORTED_MODEL_TYPE.keys()) @@ -2061,6 +2100,9 @@ def get_model_from_task( if original_task == "automatic-speech-recognition" or task == "automatic-speech-recognition": if original_task == "auto" and config.architectures is not None: model_class_name = config.architectures[0] + elif original_task == "reinforcement-learning" or task == "reinforcement-learning": + if config.architectures is not None: + model_class_name = config.architectures[0] if library_name == "diffusers": config = DiffusionPipeline.load_config(model_name_or_path, **kwargs) @@ -2095,6 +2137,7 @@ def get_model_from_task( device=device, cache_folder=cache_folder, token=token, + revision=revision, trust_remote_code=trust_remote_code, ) else: diff --git a/optimum/exporters/tflite/__main__.py b/optimum/exporters/tflite/__main__.py index b3c90cb63f2..0c4c7b994fa 100644 --- a/optimum/exporters/tflite/__main__.py +++ b/optimum/exporters/tflite/__main__.py @@ -28,7 +28,6 @@ logger = logging.get_logger() -logger.setLevel(logging.INFO) def main(): diff --git a/optimum/exporters/utils.py b/optimum/exporters/utils.py index e2125736c4d..60de169de5e 100644 --- a/optimum/exporters/utils.py +++ b/optimum/exporters/utils.py @@ -15,7 +15,6 @@ """Utilities for model preparation to export.""" - import copy from typing import TYPE_CHECKING, Any, Callable, Dict, List, Optional, Tuple, Union @@ -44,17 +43,7 @@ "Please update diffusers by running `pip install --upgrade diffusers`" ) - from diffusers import ( - DiffusionPipeline, - LatentConsistencyModelImg2ImgPipeline, - LatentConsistencyModelPipeline, - StableDiffusionImg2ImgPipeline, - StableDiffusionInpaintPipeline, - StableDiffusionPipeline, - StableDiffusionXLImg2ImgPipeline, - StableDiffusionXLInpaintPipeline, - StableDiffusionXLPipeline, - ) + from diffusers import DiffusionPipeline from diffusers.models.attention_processor import ( Attention, AttnAddedKVProcessor, @@ -85,6 +74,20 @@ DECODER_MERGED_NAME = "decoder_model_merged" +_DIFFUSERS_CLASS_NAME_TO_SUBMODEL_TYPE = { + "CLIPTextModel": "clip-text", + "CLIPTextModelWithProjection": "clip-text-with-projection", + "FluxTransformer2DModel": "flux-transformer-2d", + "SD3Transformer2DModel": "sd3-transformer-2d", + "UNet2DConditionModel": "unet-2d-condition", + "T5EncoderModel": "t5-encoder", +} + + +def _get_diffusers_submodel_type(submodel): + return _DIFFUSERS_CLASS_NAME_TO_SUBMODEL_TYPE.get(submodel.__class__.__name__) + + def _get_submodels_for_export_diffusion( pipeline: "DiffusionPipeline", ) -> Dict[str, Union["PreTrainedModel", "ModelMixin"]]: @@ -92,69 +95,87 @@ def _get_submodels_for_export_diffusion( Returns the components of a Stable Diffusion model. """ - is_stable_diffusion = isinstance( - pipeline, (StableDiffusionPipeline, StableDiffusionImg2ImgPipeline, StableDiffusionInpaintPipeline) - ) - is_stable_diffusion_xl = isinstance( - pipeline, (StableDiffusionXLPipeline, StableDiffusionXLImg2ImgPipeline, StableDiffusionXLInpaintPipeline) - ) - is_latent_consistency_model = isinstance( - pipeline, (LatentConsistencyModelPipeline, LatentConsistencyModelImg2ImgPipeline) - ) - - if is_stable_diffusion_xl: - projection_dim = pipeline.text_encoder_2.config.projection_dim - elif is_stable_diffusion: - projection_dim = pipeline.text_encoder.config.projection_dim - elif is_latent_consistency_model: - projection_dim = pipeline.text_encoder.config.projection_dim - else: - raise ValueError( - f"The export of a DiffusionPipeline model with the class name {pipeline.__class__.__name__} is currently not supported in Optimum. " - "Please open an issue or submit a PR to add the support." - ) - models_for_export = {} + is_torch_greater_or_equal_than_2_1 = version.parse(torch.__version__) >= version.parse("2.1.0") + is_sdxl = pipeline.__class__.__name__.startswith("StableDiffusionXL") + is_sd3 = pipeline.__class__.__name__.startswith("StableDiffusion3") + # Text encoder text_encoder = getattr(pipeline, "text_encoder", None) if text_encoder is not None: - if is_stable_diffusion_xl: + if is_sdxl or is_sd3: text_encoder.config.output_hidden_states = True + text_encoder.text_model.config.output_hidden_states = True + + text_encoder.config.export_model_type = _get_diffusers_submodel_type(text_encoder) models_for_export["text_encoder"] = text_encoder - # U-NET - # ONNX export of torch.nn.functional.scaled_dot_product_attention not supported for < v2.1.0 - is_torch_greater_or_equal_than_2_1 = version.parse(torch.__version__) >= version.parse("2.1.0") - if not is_torch_greater_or_equal_than_2_1: - pipeline.unet.set_attn_processor(AttnProcessor()) + # Text encoder 2 + text_encoder_2 = getattr(pipeline, "text_encoder_2", None) + if text_encoder_2 is not None: + if is_sdxl or is_sd3: + text_encoder_2.config.output_hidden_states = True + text_encoder_2.text_model.config.output_hidden_states = True + + text_encoder_2.config.export_model_type = _get_diffusers_submodel_type(text_encoder_2) + models_for_export["text_encoder_2"] = text_encoder_2 - pipeline.unet.config.text_encoder_projection_dim = projection_dim - # The U-NET time_ids inputs shapes depends on the value of `requires_aesthetics_score` - # https://github.com/huggingface/diffusers/blob/v0.18.2/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py#L571 - pipeline.unet.config.requires_aesthetics_score = getattr(pipeline.config, "requires_aesthetics_score", False) - models_for_export["unet"] = pipeline.unet + # Text encoder 3 + text_encoder_3 = getattr(pipeline, "text_encoder_3", None) + if text_encoder_3 is not None: + text_encoder_3.config.export_model_type = _get_diffusers_submodel_type(text_encoder_3) + models_for_export["text_encoder_3"] = text_encoder_3 - # VAE Encoder https://github.com/huggingface/diffusers/blob/v0.11.1/src/diffusers/models/vae.py#L565 + # U-NET + unet = getattr(pipeline, "unet", None) + if unet is not None: + # ONNX export of torch.nn.functional.scaled_dot_product_attention not supported for < v2.1.0 + if not is_torch_greater_or_equal_than_2_1: + unet.set_attn_processor(AttnProcessor()) + + # The U-NET time_ids inputs shapes depends on the value of `requires_aesthetics_score` + # https://github.com/huggingface/diffusers/blob/v0.18.2/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py#L571 + unet.config.requires_aesthetics_score = getattr(pipeline.config, "requires_aesthetics_score", False) + unet.config.time_cond_proj_dim = getattr(pipeline.unet.config, "time_cond_proj_dim", None) + unet.config.text_encoder_projection_dim = pipeline.text_encoder.config.projection_dim + unet.config.export_model_type = _get_diffusers_submodel_type(unet) + models_for_export["unet"] = unet + + # Transformer + transformer = getattr(pipeline, "transformer", None) + if transformer is not None: + # ONNX export of torch.nn.functional.scaled_dot_product_attention not supported for < v2.1.0 + if not is_torch_greater_or_equal_than_2_1: + transformer.set_attn_processor(AttnProcessor()) + + transformer.config.requires_aesthetics_score = getattr(pipeline.config, "requires_aesthetics_score", False) + transformer.config.time_cond_proj_dim = getattr(pipeline.transformer.config, "time_cond_proj_dim", None) + transformer.config.text_encoder_projection_dim = pipeline.text_encoder.config.projection_dim + transformer.config.export_model_type = _get_diffusers_submodel_type(transformer) + models_for_export["transformer"] = transformer + + # VAE Encoder vae_encoder = copy.deepcopy(pipeline.vae) + + # ONNX export of torch.nn.functional.scaled_dot_product_attention not supported for < v2.1.0 if not is_torch_greater_or_equal_than_2_1: vae_encoder = override_diffusers_2_0_attn_processors(vae_encoder) - vae_encoder.forward = lambda sample: {"latent_sample": vae_encoder.encode(x=sample)["latent_dist"].sample()} + + # we return the distribution parameters to be able to recreate it in the decoder + vae_encoder.forward = lambda sample: {"latent_parameters": vae_encoder.encode(x=sample)["latent_dist"].parameters} models_for_export["vae_encoder"] = vae_encoder - # VAE Decoder https://github.com/huggingface/diffusers/blob/v0.11.1/src/diffusers/models/vae.py#L600 + # VAE Decoder vae_decoder = copy.deepcopy(pipeline.vae) + + # ONNX export of torch.nn.functional.scaled_dot_product_attention not supported for < v2.1.0 if not is_torch_greater_or_equal_than_2_1: vae_decoder = override_diffusers_2_0_attn_processors(vae_decoder) + vae_decoder.forward = lambda latent_sample: vae_decoder.decode(z=latent_sample) models_for_export["vae_decoder"] = vae_decoder - text_encoder_2 = getattr(pipeline, "text_encoder_2", None) - if text_encoder_2 is not None: - text_encoder_2.config.output_hidden_states = True - text_encoder_2.text_model.config.output_hidden_states = True - models_for_export["text_encoder_2"] = text_encoder_2 - return models_for_export @@ -312,33 +333,59 @@ def get_diffusion_models_for_export( `Dict[str, Tuple[Union[`PreTrainedModel`, `TFPreTrainedModel`], `ExportConfig`]: A Dict containing the model and export configs for the different components of the model. """ + models_for_export = _get_submodels_for_export_diffusion(pipeline) # Text encoder if "text_encoder" in models_for_export: + text_encoder = models_for_export["text_encoder"] text_encoder_config_constructor = TasksManager.get_exporter_config_constructor( - model=pipeline.text_encoder, - exporter=exporter, - library_name="diffusers", - task="feature-extraction", + model=text_encoder, exporter=exporter, library_name="diffusers", task="feature-extraction" ) text_encoder_export_config = text_encoder_config_constructor( - pipeline.text_encoder.config, int_dtype=int_dtype, float_dtype=float_dtype + text_encoder.config, int_dtype=int_dtype, float_dtype=float_dtype ) models_for_export["text_encoder"] = (models_for_export["text_encoder"], text_encoder_export_config) + # Text encoder 2 + if "text_encoder_2" in models_for_export: + text_encoder_2 = models_for_export["text_encoder_2"] + export_config_constructor = TasksManager.get_exporter_config_constructor( + model=text_encoder_2, exporter=exporter, library_name="diffusers", task="feature-extraction" + ) + export_config = export_config_constructor(text_encoder_2.config, int_dtype=int_dtype, float_dtype=float_dtype) + models_for_export["text_encoder_2"] = (models_for_export["text_encoder_2"], export_config) + + # Text encoder 3 + if "text_encoder_3" in models_for_export: + text_encoder_3 = models_for_export["text_encoder_3"] + export_config_constructor = TasksManager.get_exporter_config_constructor( + model=text_encoder_3, exporter=exporter, library_name="diffusers", task="feature-extraction" + ) + export_config = export_config_constructor(text_encoder_3.config, int_dtype=int_dtype, float_dtype=float_dtype) + models_for_export["text_encoder_3"] = (models_for_export["text_encoder_3"], export_config) + # U-NET - export_config_constructor = TasksManager.get_exporter_config_constructor( - model=pipeline.unet, - exporter=exporter, - library_name="diffusers", - task="semantic-segmentation", - model_type="unet", - ) - unet_export_config = export_config_constructor(pipeline.unet.config, int_dtype=int_dtype, float_dtype=float_dtype) - models_for_export["unet"] = (models_for_export["unet"], unet_export_config) + if "unet" in models_for_export: + unet = models_for_export["unet"] + export_config_constructor = TasksManager.get_exporter_config_constructor( + model=unet, exporter=exporter, library_name="diffusers", task="semantic-segmentation" + ) + unet_export_config = export_config_constructor(unet.config, int_dtype=int_dtype, float_dtype=float_dtype) + models_for_export["unet"] = (models_for_export["unet"], unet_export_config) + + # Transformer + if "transformer" in models_for_export: + transformer = models_for_export["transformer"] + export_config_constructor = TasksManager.get_exporter_config_constructor( + model=transformer, exporter=exporter, library_name="diffusers", task="semantic-segmentation" + ) + transformer_export_config = export_config_constructor( + transformer.config, int_dtype=int_dtype, float_dtype=float_dtype + ) + models_for_export["transformer"] = (models_for_export["transformer"], transformer_export_config) - # VAE Encoder https://github.com/huggingface/diffusers/blob/v0.11.1/src/diffusers/models/vae.py#L565 + # VAE Encoder vae_encoder = models_for_export["vae_encoder"] vae_config_constructor = TasksManager.get_exporter_config_constructor( model=vae_encoder, @@ -347,10 +394,12 @@ def get_diffusion_models_for_export( task="semantic-segmentation", model_type="vae-encoder", ) - vae_export_config = vae_config_constructor(vae_encoder.config, int_dtype=int_dtype, float_dtype=float_dtype) - models_for_export["vae_encoder"] = (vae_encoder, vae_export_config) + vae_encoder_export_config = vae_config_constructor( + vae_encoder.config, int_dtype=int_dtype, float_dtype=float_dtype + ) + models_for_export["vae_encoder"] = (vae_encoder, vae_encoder_export_config) - # VAE Decoder https://github.com/huggingface/diffusers/blob/v0.11.1/src/diffusers/models/vae.py#L600 + # VAE Decoder vae_decoder = models_for_export["vae_decoder"] vae_config_constructor = TasksManager.get_exporter_config_constructor( model=vae_decoder, @@ -359,21 +408,10 @@ def get_diffusion_models_for_export( task="semantic-segmentation", model_type="vae-decoder", ) - vae_export_config = vae_config_constructor(vae_decoder.config, int_dtype=int_dtype, float_dtype=float_dtype) - models_for_export["vae_decoder"] = (vae_decoder, vae_export_config) - - if "text_encoder_2" in models_for_export: - export_config_constructor = TasksManager.get_exporter_config_constructor( - model=pipeline.text_encoder_2, - exporter=exporter, - library_name="diffusers", - task="feature-extraction", - model_type="clip-text-with-projection", - ) - export_config = export_config_constructor( - pipeline.text_encoder_2.config, int_dtype=int_dtype, float_dtype=float_dtype - ) - models_for_export["text_encoder_2"] = (models_for_export["text_encoder_2"], export_config) + vae_decoder_export_config = vae_config_constructor( + vae_decoder.config, int_dtype=int_dtype, float_dtype=float_dtype + ) + models_for_export["vae_decoder"] = (vae_decoder, vae_decoder_export_config) return models_for_export diff --git a/optimum/fx/parallelization/decomp.py b/optimum/fx/parallelization/decomp.py index 26258d451bf..5410818e929 100644 --- a/optimum/fx/parallelization/decomp.py +++ b/optimum/fx/parallelization/decomp.py @@ -197,7 +197,7 @@ def run(self, *args, **kwargs): def decompose_and_functionalize( graph_module: GraphModule, decomposition_table: Dict[torch._ops.OperatorBase, Callable] = core_aten_decompositions(), - leaf_function_targets: List[Callable] = [F.scaled_dot_product_attention], + leaf_function_targets: List[Callable] = [F.scaled_dot_product_attention, F.cross_entropy], ) -> Callable: """ API to decompose and functionalize a high-level graph module. diff --git a/optimum/fx/parallelization/op_registry/op_handlers.py b/optimum/fx/parallelization/op_registry/op_handlers.py index 56b8fc16bc0..4a9c55e3764 100644 --- a/optimum/fx/parallelization/op_registry/op_handlers.py +++ b/optimum/fx/parallelization/op_registry/op_handlers.py @@ -19,7 +19,7 @@ from torch.fx import Node from ..core import Config -from ..utils import is_activation, is_embedding, is_linear +from ..utils import is_activation, is_cross_entropy, is_cross_entropy_parallel_compatible, is_embedding, is_linear class Registry: @@ -334,7 +334,16 @@ def propagate(self) -> List[int]: ndim = arg.meta["val"].ndim slice_dim = (slice_dim + ndim) % ndim if slice_dim == axis: - # slice on the parallel axis is not allowed + # slice on the parallel axis is not allowed, except it's a nop + start, stop, step = 0, arg.meta["val"].shape[axis], 1 + if len(self.node.args) > 2: + start = self.node.args[2] + elif len(self.node.args) > 3: + stop = self.node.args[3] + elif len(self.node.args) > 4: + step = self.node.args[4] + if start == 0 and stop >= arg.meta["val"].shape[axis] and step == 1: + return [axis] return [] return [axis] @@ -404,12 +413,12 @@ def propagate(self) -> List[int]: if self.node.op in ["placeholder", "get_attr"]: return [None] elif self.node.op == "output": - for node in self.node.all_input_nodes: - # TODO: allow parallelized nodes in output, and append comm ops in graph tp all-gather - # parallelized output if intructed - if self.extract_axis(node) is not None: - return [] - return [None] + # does not care about if output is being parallelized right now, because if the output is loss, + # then it must be not parallelized as long as it comes from sharded cross entropy. + # TODO: append all-gather comm ops before all parallelized output nodes if instructed. + input_arg = self.node.all_input_nodes[0] + axis = self.extract_axis(input_arg) + return [axis] elif is_linear(self.node): input_arg = self.node.all_input_nodes[0] axis = self.extract_axis(input_arg) @@ -438,6 +447,16 @@ def propagate(self) -> List[int]: return [1, None] if self.config.enable_sequence_parallel else [None] else: return [] + elif is_cross_entropy(self.node): + logits = self.node.all_input_nodes[0] + axis = self.extract_axis(logits) + if axis is None or ( + is_cross_entropy_parallel_compatible(self.node) and axis == logits.meta["val"].ndim - 1 + ): + # for cross entropy, the input logits parallel axis can only be the last axis or None + return [None] + else: + return [] elif is_activation(self.node): return UnaryOpParallelAxisPropagateHandler(self.node, self.meta_key, self.config).propagate() diff --git a/optimum/fx/parallelization/parallel_layers/__init__.py b/optimum/fx/parallelization/parallel_layers/__init__.py index 9bfb13afdf6..474ae7f7eef 100644 --- a/optimum/fx/parallelization/parallel_layers/__init__.py +++ b/optimum/fx/parallelization/parallel_layers/__init__.py @@ -14,3 +14,4 @@ # limitations under the License. from .embedding import VocabParallelEmbedding from .linear import ColumnParallelLinear, RowParallelLinear +from .loss import VocabParallelCrossEntropyLoss, sharded_cross_entropy_wrapper_fn diff --git a/optimum/fx/parallelization/parallel_layers/loss.py b/optimum/fx/parallelization/parallel_layers/loss.py new file mode 100644 index 00000000000..0a11e33c08e --- /dev/null +++ b/optimum/fx/parallelization/parallel_layers/loss.py @@ -0,0 +1,163 @@ +# coding=utf-8 +# Copyright 2024 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from functools import wraps +from typing import Optional + +import torch +import torch.distributed as dist +import torch.nn as nn + +from ..core import ParallelExecutionCtx + + +# Adapted from https://github.com/huggingface/nanotron/blob/main/src/nanotron/parallel/tensor_parallel/functional.py +class _ShardedCrossEntropy(torch.autograd.Function): + @staticmethod + def forward( + ctx, + sharded_logits: torch.Tensor, # (batch_size, length, sharded_hidden_size) + target: torch.Tensor, # (batch_size, length) + group: dist.ProcessGroup, + ): + # Maximum value along last dimension across all GPUs. + logits_max = torch.max(sharded_logits, dim=-1)[0] + dist.all_reduce(logits_max, op=dist.ReduceOp.MAX, group=group) + # Subtract the maximum value. + sharded_logits = sharded_logits - logits_max.unsqueeze(dim=-1) + + # Get the shard's indices + sharded_hidden_size = sharded_logits.shape[-1] + rank = dist.get_rank(group) + start_index = rank * sharded_hidden_size + end_index = start_index + sharded_hidden_size + + # Create a mask of valid ids (1 means it needs to be masked). + target_mask = (target < start_index) | (target >= end_index) + masked_target = target.clone() - start_index + masked_target[target_mask] = 0 + + # Get predicted-logits = logits[target]. + # For Simplicity, we convert logits to a 2-D tensor with size + # [*, shard-size] and target to a 1-D tensor of size [*]. + logits_2d = sharded_logits.view(-1, sharded_hidden_size) + masked_target_1d = masked_target.view(-1) + arange_1d = torch.arange(start=0, end=logits_2d.shape[0], device=logits_2d.device) + predicted_logits_1d = logits_2d[arange_1d, masked_target_1d] + if predicted_logits_1d.is_contiguous(): + predicted_logits_1d = predicted_logits_1d.clone() + else: + predicted_logits_1d = predicted_logits_1d.contiguous() + predicted_logits = predicted_logits_1d.view_as(target) + predicted_logits[target_mask] = 0.0 + # All reduce is needed to get the chunks from other GPUs. + dist.all_reduce(predicted_logits, op=dist.ReduceOp.SUM, group=group) + + # Sum of exponential of logits along vocab dimension across all GPUs. + exp_logits = sharded_logits + torch.exp(sharded_logits, out=exp_logits) + sum_exp_logits = exp_logits.sum(dim=-1) + dist.all_reduce(sum_exp_logits, op=dist.ReduceOp.SUM, group=group) + + # Loss = log(sum(exp(logits))) - predicted-logit. + loss = torch.log(sum_exp_logits) - predicted_logits + + # Normalize and optionally smooth logits + exp_logits.div_(sum_exp_logits.unsqueeze(dim=-1)) + + # Store softmax, target-mask and masked-target for backward pass. + ctx.save_for_backward(exp_logits, target_mask, masked_target_1d) + + return loss + + @staticmethod + def backward(ctx, grad_output: torch.Tensor): + # Retrieve tensors from the forward path. + softmax, target_mask, masked_target_1d = ctx.saved_tensors + + # All the inputs have softmax as their gradient. + grad_input = softmax + # For simplicity, work with the 2D gradient. + sharded_hidden_size = softmax.size()[-1] + grad_2d = grad_input.view(-1, sharded_hidden_size) + + # Add the gradient from matching classes. + arange_1d = torch.arange(start=0, end=grad_2d.size()[0], device=grad_2d.device) + grad_2d[arange_1d, masked_target_1d] -= 1.0 - target_mask.view(-1).float() + + # Finally elementwise multiplication with the output gradients. + grad_input.mul_(grad_output.unsqueeze(dim=-1)) + + return grad_input, None, None + + +def sharded_cross_entropy(sharded_logits: torch.Tensor, target: torch.Tensor, process_group: dist.ProcessGroup): + return _ShardedCrossEntropy.apply(sharded_logits, target, process_group) + + +def sharded_cross_entropy_wrapper_fn(process_group: dist.ProcessGroup): + @wraps(sharded_cross_entropy) + def wrapper( + sharded_logits: torch.Tensor, + target: torch.Tensor, + weight: Optional[torch.Tensor] = None, + size_average: Optional[bool] = None, + ignore_index: int = -100, + reduce: Optional[bool] = None, + reduction: str = "mean", + label_smoothing: float = 0.0, + ): + if weight is not None or ignore_index != -100 or label_smoothing != 0.0: + raise ValueError( + "Does not support weighted mode, index ignoring and label smoothing in current parallel cross entropy implementation." + ) + loss: torch.Tensor = sharded_cross_entropy(sharded_logits, target, process_group) + + if size_average is not None or reduce is not None: + size_average = True if size_average is None else size_average + reduce = True if reduce is None else reduce + + if size_average and reduce: + reduction = "mean" + elif reduce: + reduction = "sum" + else: + reduction = "none" + + if reduction == "mean": + return loss.mean() + elif reduction == "sum": + return loss.sum() + return loss + + return wrapper + + +class VocabParallelCrossEntropyLoss(nn.Module): + """ + Simple parallel cross entropy implementation which does not support weighted mode and label smoothing yet. + """ + + def __init__(self, ctx: ParallelExecutionCtx, reduction: str = "mean") -> None: + super(VocabParallelCrossEntropyLoss, self).__init__() + self.process_group = ctx.tp_group + self.reduction = reduction + + def forward(self, sharded_logits: torch.Tensor, target: torch.Tensor): + loss: torch.Tensor = _ShardedCrossEntropy.apply(sharded_logits, target, self.process_group) + if self.reduction == "mean": + return loss.mean() + elif self.reduction == "sum": + return loss.sum() + return loss diff --git a/optimum/fx/parallelization/passes.py b/optimum/fx/parallelization/passes.py index 14b652fff73..90155263281 100644 --- a/optimum/fx/parallelization/passes.py +++ b/optimum/fx/parallelization/passes.py @@ -26,8 +26,15 @@ from .decomp import decompose_and_functionalize from .distributed import scatter from .op_registry import REGISTRY, FallbackParallelAxisPropagateHandler -from .parallel_layers import ColumnParallelLinear, RowParallelLinear, VocabParallelEmbedding +from .parallel_layers import ( + ColumnParallelLinear, + RowParallelLinear, + VocabParallelCrossEntropyLoss, + VocabParallelEmbedding, + sharded_cross_entropy_wrapper_fn, +) from .utils import ( + is_cross_entropy, is_embedding, is_linear, is_shape_consumer, @@ -273,6 +280,11 @@ def run(self, graph_module: GraphModule, ctx: ParallelExecutionCtx, config: Conf info["sequence_parallel"] = False self.place_marker_per_node(node, info) + elif is_cross_entropy(node): + axis_before = ParallelAxisSolverPass.get_stored_field_info(node.args[0], "parallel_axis") + if axis_before is not None: + self.place_marker_per_node(node, {"axis": "vocab"}) + return graph_module @@ -343,6 +355,35 @@ def handle_embedding(node: Node, ctx: ParallelExecutionCtx) -> None: layer_cache[key] = new_mod setattr(parent_mod, field, new_mod) + @staticmethod + def handle_cross_entropy(node: Node, ctx: ParallelExecutionCtx) -> None: + axis = ParallelLayerAnnotatePass.get_stored_field_info(node, field="axis") + if axis is None: + return + + assert axis in {"vocab"}, "Only support parallelization on vocab dim for now." + if node.op == "call_module": + graph_module = node.graph.owning_module + prefix_and_field = node.target.rsplit(".", maxsplit=1) + if len(prefix_and_field) == 2: + parent_mod = graph_module.get_submodule(prefix_and_field[0]) + field = prefix_and_field[1] + else: + parent_mod = graph_module + field = node.target + + mod: nn.CrossEntropyLoss = graph_module.get_submodule(node.target) + key, layer_cache = node.target, ctx.parallel_layer_cache + if key in layer_cache: + new_mod = layer_cache[key] + else: + assert ctx.compile_times == 0, "illegal path for recompilation" + new_mod = VocabParallelCrossEntropyLoss(ctx, reduction=mod.reduction) + layer_cache[key] = new_mod + setattr(parent_mod, field, new_mod) + else: + node.target = sharded_cross_entropy_wrapper_fn(process_group=ctx.tp_group) + @staticmethod def handle_hard_coded_axis_param(node: Node, ctx: ParallelExecutionCtx) -> None: def extract_shape_from_node(node: Node) -> List[Any]: @@ -384,6 +425,8 @@ def run(self, graph_module: GraphModule, ctx: ParallelExecutionCtx, config: Conf self.handle_linear(node, ctx) elif is_embedding(node): self.handle_embedding(node, ctx) + elif is_cross_entropy(node): + self.handle_cross_entropy(node, ctx) # correct the attention head num in parallel setting elif is_shape_consumer(node): self.handle_hard_coded_axis_param(node, ctx) diff --git a/optimum/fx/parallelization/utils.py b/optimum/fx/parallelization/utils.py index b7b1ccd41c8..3074638737f 100644 --- a/optimum/fx/parallelization/utils.py +++ b/optimum/fx/parallelization/utils.py @@ -82,6 +82,40 @@ def is_shape_generator(node: Node) -> bool: return node.op == "call_method" and node.target == "size" +def is_cross_entropy(node: Node) -> bool: + if node.op == "call_function": + return node.target is F.cross_entropy + elif node.op == "call_module": + mod = node.graph.owning_module + return isinstance(mod.get_submodule(node.target), nn.CrossEntropyLoss) + return False + + +def is_cross_entropy_parallel_compatible(node: Node) -> bool: + """ + For now `VocabParallelCrossEntropyLoss` does not support weighted mode, index ignoring and label smoothing. + """ + if node.op == "call_function": + weight = node.kwargs.get("weight", None) + ignore_index = node.kwargs.get("ignore_index", -100) + label_smoothing = node.kwargs.get("label_smoothing", 0.0) + if len(node.args) > 2 and weight is None: + weight = node.args[2] + if len(node.args) > 4 and ignore_index == -100: + ignore_index = node.args[4] + if len(node.args) > 7 and label_smoothing == 0.0: + label_smoothing = node.args[7] + + return weight is None and ignore_index == -100 and label_smoothing == 0.0 + + elif node.op == "call_module": + mod: nn.CrossEntropyLoss = node.graph.owning_module.get_submodule(node.target) + weight, label_smoothing, ignore_index = mod.weight, mod.label_smoothing, mod.ignore_index + return weight is None and ignore_index == -100 and label_smoothing == 0.0 + + return False + + def stable_topological_sort(graph: Graph): def _args(n: torch.fx.Node) -> List[torch.fx.node.Argument]: args: List[torch.fx.node.Argument] = [] diff --git a/optimum/gptq/data.py b/optimum/gptq/data.py index b8734da478e..7e5fc0b43db 100644 --- a/optimum/gptq/data.py +++ b/optimum/gptq/data.py @@ -18,7 +18,12 @@ import numpy as np import torch -from datasets import load_dataset + +from optimum.utils.import_utils import DATASETS_IMPORT_ERROR, is_datasets_available + + +if is_datasets_available(): + from datasets import load_dataset """ @@ -113,6 +118,9 @@ def pad_block(block, pads): def get_wikitext2(tokenizer: Any, seqlen: int, nsamples: int, split: str = "train"): + if not is_datasets_available(): + raise ImportError(DATASETS_IMPORT_ERROR.format("get_wikitext2")) + if split == "train": data = load_dataset("wikitext", "wikitext-2-raw-v1", split="train") elif split == "validation": @@ -132,6 +140,9 @@ def get_wikitext2(tokenizer: Any, seqlen: int, nsamples: int, split: str = "trai def get_c4(tokenizer: Any, seqlen: int, nsamples: int, split: str = "train"): + if not is_datasets_available(): + raise ImportError(DATASETS_IMPORT_ERROR.format("get_c4")) + if split == "train": data = load_dataset("allenai/c4", split="train", data_files={"train": "en/c4-train.00000-of-01024.json.gz"}) elif split == "validation": @@ -157,6 +168,9 @@ def get_c4(tokenizer: Any, seqlen: int, nsamples: int, split: str = "train"): def get_c4_new(tokenizer: Any, seqlen: int, nsamples: int, split: str = "train"): + if not is_datasets_available(): + raise ImportError(DATASETS_IMPORT_ERROR.format("get_c4_new")) + if split == "train": data = load_dataset("allenai/c4", split="train", data_files={"train": "en/c4-train.00000-of-01024.json.gz"}) elif split == "validation": diff --git a/optimum/gptq/quantizer.py b/optimum/gptq/quantizer.py index 902af87bbb0..849d8821ebf 100644 --- a/optimum/gptq/quantizer.py +++ b/optimum/gptq/quantizer.py @@ -88,7 +88,7 @@ def __init__( dataset (`Union[List[str], str, Any]`, defaults to `None`): The dataset used for quantization. You can provide your own dataset in a list of string or in a list of tokenized data (e.g. [{ "input_ids": [ 1, 100, 15, ... ],"attention_mask": [ 1, 1, 1, ... ]},...]) - or just use the original datasets used in GPTQ paper ['wikitext2','c4','c4-new','ptb','ptb-new']. + or just use the original datasets used in GPTQ paper ['wikitext2','c4','c4-new']. group_size (int, defaults to 128): The group size to use for quantization. Recommended value is 128 and -1 uses per-column quantization. damp_percent (`float`, defaults to `0.1`): @@ -546,7 +546,7 @@ def tmp(_, input, output): if self.bits == 4: # device not on gpu - if device == torch.device("cpu") or (has_device_map and any(d in devices for d in ["cpu", "disk"])): + if device.type != "cuda" or (has_device_map and any(d in devices for d in ["cpu", "disk", "hpu"])): if not self.disable_exllama: logger.warning( "Found modules on cpu/disk. Using Exllama/Exllamav2 backend requires all the modules to be on GPU. Setting `disable_exllama=True`" @@ -589,13 +589,14 @@ def post_init_model(self, model): The input model """ if self.bits == 4 and not self.disable_exllama: - if get_device(model) == torch.device("cpu") or ( - hasattr(model, "hf_device_map") and any(d in model.hf_device_map for d in ["cpu", "disk"]) + if get_device(model).type != "cuda" or ( + hasattr(model, "hf_device_map") and any(d in model.hf_device_map for d in ["cpu", "disk", "hpu"]) ): - raise ValueError( - "Found modules on cpu/disk. Using Exllama or Exllamav2 backend requires all the modules to be on GPU." - "You can deactivate exllama backend by setting `disable_exllama=True` in the quantization config object" - ) + if not self.disable_exllama: + logger.warning( + "Found modules on cpu/disk. Using Exllama/Exllamav2 backend requires all the modules to be on GPU. Setting `disable_exllama=True`" + ) + self.disable_exllama = True class StoreAttr(object): pass diff --git a/optimum/modeling_base.py b/optimum/modeling_base.py index 5bab0622de4..48c738514ae 100644 --- a/optimum/modeling_base.py +++ b/optimum/modeling_base.py @@ -85,7 +85,6 @@ class PreTrainedModel(ABC): # noqa: F811 class OptimizedModel(PreTrainedModel): config_class = AutoConfig - load_tf_weights = None base_model_prefix = "optimized_model" config_name = CONFIG_NAME @@ -372,29 +371,44 @@ def from_pretrained( export = from_transformers if len(model_id.split("@")) == 2: + logger.warning( + f"Specifying the `revision` as @{model_id.split('@')[1]} is deprecated and will be removed in v1.23, please use the `revision` argument instead." + ) if revision is not None: logger.warning( f"The argument `revision` was set to {revision} but will be ignored for {model_id.split('@')[1]}" ) model_id, revision = model_id.split("@") - library_name = TasksManager.infer_library_from_model(model_id, subfolder, revision, cache_dir, token=token) + all_files, _ = TasksManager.get_model_files( + model_id, + subfolder=subfolder, + cache_dir=cache_dir, + revision=revision, + token=token, + ) + + config_folder = subfolder + if cls.config_name not in all_files: + logger.info( + f"{cls.config_name} not found in the specified subfolder {subfolder}. Using the top level {cls.config_name}." + ) + config_folder = "" + + library_name = TasksManager.infer_library_from_model( + model_id, subfolder=config_folder, revision=revision, cache_dir=cache_dir, token=token + ) if library_name == "timm": - config = PretrainedConfig.from_pretrained(model_id, subfolder, revision) + config = PretrainedConfig.from_pretrained( + model_id, subfolder=config_folder, revision=revision, cache_dir=cache_dir, token=token + ) if config is None: - if os.path.isdir(os.path.join(model_id, subfolder)) and cls.config_name == CONFIG_NAME: - if CONFIG_NAME in os.listdir(os.path.join(model_id, subfolder)): + if os.path.isdir(os.path.join(model_id, config_folder)) and cls.config_name == CONFIG_NAME: + if CONFIG_NAME in os.listdir(os.path.join(model_id, config_folder)): config = AutoConfig.from_pretrained( - os.path.join(model_id, subfolder), trust_remote_code=trust_remote_code - ) - elif CONFIG_NAME in os.listdir(model_id): - config = AutoConfig.from_pretrained( - os.path.join(model_id, CONFIG_NAME), trust_remote_code=trust_remote_code - ) - logger.info( - f"config.json not found in the specified subfolder {subfolder}. Using the top level config.json." + os.path.join(model_id, config_folder), trust_remote_code=trust_remote_code ) else: raise OSError(f"config.json not found in {model_id} local folder") @@ -405,7 +419,7 @@ def from_pretrained( cache_dir=cache_dir, token=token, force_download=force_download, - subfolder=subfolder, + subfolder=config_folder, trust_remote_code=trust_remote_code, ) elif isinstance(config, (str, os.PathLike)): @@ -415,7 +429,7 @@ def from_pretrained( cache_dir=cache_dir, token=token, force_download=force_download, - subfolder=subfolder, + subfolder=config_folder, trust_remote_code=trust_remote_code, ) diff --git a/optimum/onnx/transformations_utils.py b/optimum/onnx/transformations_utils.py index 1f0765112e8..fe55a5a5770 100644 --- a/optimum/onnx/transformations_utils.py +++ b/optimum/onnx/transformations_utils.py @@ -29,7 +29,6 @@ logger = logging.get_logger() -logger.setLevel(logging.INFO) def _find_duplicate_initializers( diff --git a/optimum/onnx/utils.py b/optimum/onnx/utils.py index b52c4f4cdac..c014c1b3429 100644 --- a/optimum/onnx/utils.py +++ b/optimum/onnx/utils.py @@ -71,6 +71,22 @@ def _get_external_data_paths(src_paths: List[Path], dst_paths: List[Path]) -> Tu return src_paths, dst_paths +def _get_model_external_data_paths(model_path: Path) -> List[Path]: + """ + Gets external data paths from the model. + """ + + onnx_model = onnx.load(str(model_path), load_external_data=False) + model_tensors = _get_initializer_tensors(onnx_model) + # filter out tensors that are not external data + model_tensors_ext = [ + ExternalDataInfo(tensor).location + for tensor in model_tensors + if tensor.HasField("data_location") and tensor.data_location == onnx.TensorProto.EXTERNAL + ] + return [model_path.parent / tensor_name for tensor_name in model_tensors_ext] + + def check_model_uses_external_data(model: onnx.ModelProto) -> bool: """ Checks if the model uses external data. diff --git a/optimum/onnxruntime/__init__.py b/optimum/onnxruntime/__init__.py index f1d4f63a9ff..f3f1535fd45 100644 --- a/optimum/onnxruntime/__init__.py +++ b/optimum/onnxruntime/__init__.py @@ -44,6 +44,7 @@ "ORTModelForSemanticSegmentation", "ORTModelForSequenceClassification", "ORTModelForTokenClassification", + "ORTModelForImageToImage", ], "modeling_seq2seq": [ "ORTModelForSeq2SeqLM", @@ -73,21 +74,51 @@ raise OptionalDependencyNotAvailable() except OptionalDependencyNotAvailable: _import_structure[".utils.dummy_diffusers_objects"] = [ - "ORTStableDiffusionPipeline", + "ORTDiffusionPipeline", + "ORTPipelineForText2Image", + "ORTPipelineForImage2Image", + "ORTPipelineForInpainting", + # flux + "ORTFluxPipeline", + # lcm + "ORTLatentConsistencyModelImg2ImgPipeline", + "ORTLatentConsistencyModelPipeline", + # sd3 + "ORTStableDiffusion3Img2ImgPipeline", + "ORTStableDiffusion3InpaintPipeline", + "ORTStableDiffusion3Pipeline", + # sd "ORTStableDiffusionImg2ImgPipeline", "ORTStableDiffusionInpaintPipeline", - "ORTStableDiffusionXLPipeline", + "ORTStableDiffusionPipeline", + # xl "ORTStableDiffusionXLImg2ImgPipeline", - "ORTLatentConsistencyModelPipeline", + "ORTStableDiffusionXLInpaintPipeline", + "ORTStableDiffusionXLPipeline", ] else: _import_structure["modeling_diffusion"] = [ - "ORTStableDiffusionPipeline", + "ORTDiffusionPipeline", + "ORTPipelineForText2Image", + "ORTPipelineForImage2Image", + "ORTPipelineForInpainting", + # flux + "ORTFluxPipeline", + # lcm + "ORTLatentConsistencyModelImg2ImgPipeline", + "ORTLatentConsistencyModelPipeline", + # sd3 + "ORTStableDiffusion3Img2ImgPipeline", + "ORTStableDiffusion3InpaintPipeline", + "ORTStableDiffusion3Pipeline", + # sd "ORTStableDiffusionImg2ImgPipeline", "ORTStableDiffusionInpaintPipeline", - "ORTStableDiffusionXLPipeline", + "ORTStableDiffusionPipeline", + # xl "ORTStableDiffusionXLImg2ImgPipeline", - "ORTLatentConsistencyModelPipeline", + "ORTStableDiffusionXLInpaintPipeline", + "ORTStableDiffusionXLPipeline", ] @@ -104,6 +135,7 @@ ORTModelForCustomTasks, ORTModelForFeatureExtraction, ORTModelForImageClassification, + ORTModelForImageToImage, ORTModelForMaskedLM, ORTModelForMultipleChoice, ORTModelForQuestionAnswering, @@ -137,20 +169,54 @@ raise OptionalDependencyNotAvailable() except OptionalDependencyNotAvailable: from ..utils.dummy_diffusers_objects import ( + # generic entrypoint + ORTDiffusionPipeline, + # flux + ORTFluxPipeline, + # lcm + ORTLatentConsistencyModelImg2ImgPipeline, ORTLatentConsistencyModelPipeline, + # task-specific entrypoints + ORTPipelineForImage2Image, + ORTPipelineForInpainting, + ORTPipelineForText2Image, + # sd3 + ORTStableDiffusion3Img2ImgPipeline, + ORTStableDiffusion3InpaintPipeline, + ORTStableDiffusion3Pipeline, + # sd ORTStableDiffusionImg2ImgPipeline, ORTStableDiffusionInpaintPipeline, ORTStableDiffusionPipeline, + # xl ORTStableDiffusionXLImg2ImgPipeline, + ORTStableDiffusionXLInpaintPipeline, ORTStableDiffusionXLPipeline, ) else: from .modeling_diffusion import ( + # generic entrypoint + ORTDiffusionPipeline, + # flux + ORTFluxPipeline, + # lcm + ORTLatentConsistencyModelImg2ImgPipeline, ORTLatentConsistencyModelPipeline, + # task-specific entrypoints + ORTPipelineForImage2Image, + ORTPipelineForInpainting, + ORTPipelineForText2Image, + # sd3 + ORTStableDiffusion3Img2ImgPipeline, + ORTStableDiffusion3InpaintPipeline, + ORTStableDiffusion3Pipeline, + # sd ORTStableDiffusionImg2ImgPipeline, ORTStableDiffusionInpaintPipeline, ORTStableDiffusionPipeline, + # xl ORTStableDiffusionXLImg2ImgPipeline, + ORTStableDiffusionXLInpaintPipeline, ORTStableDiffusionXLPipeline, ) else: diff --git a/optimum/onnxruntime/base.py b/optimum/onnxruntime/base.py index d9877670ba8..845780cafad 100644 --- a/optimum/onnxruntime/base.py +++ b/optimum/onnxruntime/base.py @@ -41,17 +41,11 @@ class ORTModelPart: _prepare_onnx_inputs = ORTModel._prepare_onnx_inputs _prepare_onnx_outputs = ORTModel._prepare_onnx_outputs - def __init__( - self, - session: InferenceSession, - parent_model: "ORTModel", - ): + def __init__(self, session: InferenceSession, parent_model: "ORTModel"): self.session = session self.parent_model = parent_model - self.normalized_config = NormalizedConfigManager.get_normalized_config_class( - self.parent_model.config.model_type - )(self.parent_model.config) self.main_input_name = self.parent_model.main_input_name + self.input_names = {input_key.name: idx for idx, input_key in enumerate(self.session.get_inputs())} self.output_names = {output_key.name: idx for idx, output_key in enumerate(self.session.get_outputs())} self.input_dtypes = {input_key.name: input_key.type for input_key in session.get_inputs()} @@ -77,6 +71,25 @@ def dtype(self): return None + def to(self, *args, device: Optional[Union[torch.device, str, int]] = None, dtype: Optional[torch.dtype] = None): + for arg in args: + if isinstance(arg, torch.device): + device = arg + elif isinstance(arg, torch.dtype): + dtype = arg + + if device is not None and device != self.device: + raise ValueError( + "Cannot change the device of a model part without changing the device of the parent model. " + "Please use the `to` method of the parent model to change the device." + ) + + if dtype is not None and dtype != self.dtype: + raise NotImplementedError( + f"Cannot change the dtype of the model from {self.dtype} to {dtype}. " + f"Please export the model with the desired dtype." + ) + @abstractmethod def forward(self, *args, **kwargs): pass @@ -90,12 +103,18 @@ class ORTEncoder(ORTModelPart): Encoder part of the encoder-decoder model for ONNX Runtime inference. """ - def forward( - self, - input_ids: torch.LongTensor, - attention_mask: torch.LongTensor, - **kwargs, - ) -> BaseModelOutput: + def __init__(self, session: InferenceSession, parent_model: "ORTModel"): + super().__init__(session, parent_model) + + config = ( + self.parent_model.config.encoder + if hasattr(self.parent_model.config, "encoder") + else self.parent_model.config + ) + + self.normalized_config = NormalizedConfigManager.get_normalized_config_class(config.model_type)(config) + + def forward(self, input_ids: torch.LongTensor, attention_mask: torch.LongTensor, **kwargs) -> BaseModelOutput: use_torch = isinstance(input_ids, torch.Tensor) self.parent_model.raise_on_numpy_input_io_binding(use_torch) @@ -138,6 +157,14 @@ def __init__( ): super().__init__(session, parent_model) + config = ( + self.parent_model.config.decoder + if hasattr(self.parent_model.config, "decoder") + else self.parent_model.config + ) + + self.normalized_config = NormalizedConfigManager.get_normalized_config_class(config.model_type)(config) + # TODO: make this less hacky. self.key_value_input_names = [key for key in self.input_names if (".key" in key) or (".value" in key)] self.key_value_output_names = [key for key in self.output_names if (".key" in key) or (".value" in key)] @@ -153,11 +180,7 @@ def __init__( self.use_past_in_outputs = len(self.key_value_output_names) > 0 self.use_past_in_inputs = len(self.key_value_input_names) > 0 - self.use_fp16 = False - for inp in session.get_inputs(): - if "past_key_values" in inp.name and inp.type == "tensor(float16)": - self.use_fp16 = True - break + self.use_fp16 = self.dtype == torch.float16 # We may use ORTDecoderForSeq2Seq for vision-encoder-decoder models, where models as gpt2 # can be used but do not support KV caching for the cross-attention key/values, see: @@ -461,11 +484,3 @@ def prepare_inputs_for_merged( cache_position = cache_position.to(self.device) return use_cache_branch_tensor, past_key_values, cache_position - - -class ORTDecoder(ORTDecoderForSeq2Seq): - def __init__(self, *args, **kwargs): - logger.warning( - "The class `ORTDecoder` is deprecated and will be removed in optimum v1.15.0, please use `ORTDecoderForSeq2Seq` instead." - ) - super().__init__(*args, **kwargs) diff --git a/optimum/onnxruntime/configuration.py b/optimum/onnxruntime/configuration.py index 2e3d9f32d6a..adc1984795a 100644 --- a/optimum/onnxruntime/configuration.py +++ b/optimum/onnxruntime/configuration.py @@ -18,9 +18,8 @@ from dataclasses import asdict, dataclass, field from enum import Enum from pathlib import Path -from typing import Any, Dict, List, Optional, Tuple, Union +from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, Union -from datasets import Dataset from packaging.version import Version, parse from onnxruntime import __version__ as ort_version @@ -33,6 +32,10 @@ from ..utils import logging +if TYPE_CHECKING: + from datasets import Dataset + + logger = logging.get_logger(__name__) # This value is used to indicate ORT which axis it should use to quantize an operator "per-channel" @@ -117,7 +120,9 @@ def create_calibrator( class AutoCalibrationConfig: @staticmethod - def minmax(dataset: Dataset, moving_average: bool = False, averaging_constant: float = 0.01) -> CalibrationConfig: + def minmax( + dataset: "Dataset", moving_average: bool = False, averaging_constant: float = 0.01 + ) -> CalibrationConfig: """ Args: dataset (`Dataset`): @@ -151,7 +156,7 @@ def minmax(dataset: Dataset, moving_average: bool = False, averaging_constant: f @staticmethod def entropy( - dataset: Dataset, + dataset: "Dataset", num_bins: int = 128, num_quantized_bins: int = 128, ) -> CalibrationConfig: @@ -188,7 +193,7 @@ def entropy( ) @staticmethod - def percentiles(dataset: Dataset, num_bins: int = 2048, percentile: float = 99.999) -> CalibrationConfig: + def percentiles(dataset: "Dataset", num_bins: int = 2048, percentile: float = 99.999) -> CalibrationConfig: """ Args: dataset (`Dataset`): diff --git a/optimum/onnxruntime/model.py b/optimum/onnxruntime/model.py index caa662f3824..4182abc925f 100644 --- a/optimum/onnxruntime/model.py +++ b/optimum/onnxruntime/model.py @@ -14,10 +14,9 @@ import logging import os -from typing import Callable, Dict, List, Optional, Union +from typing import TYPE_CHECKING, Callable, Dict, List, Optional, Union import numpy as np -from datasets import Dataset from transformers import EvalPrediction from transformers.trainer_pt_utils import nested_concat from transformers.trainer_utils import EvalLoopOutput @@ -25,6 +24,10 @@ from onnxruntime import InferenceSession +if TYPE_CHECKING: + from datasets import Dataset + + logger = logging.getLogger(__name__) @@ -59,7 +62,7 @@ def __init__( self.session = InferenceSession(str(model_path), providers=[execution_provider]) self.onnx_input_names = {input_key.name: idx for idx, input_key in enumerate(self.session.get_inputs())} - def evaluation_loop(self, dataset: Dataset): + def evaluation_loop(self, dataset: "Dataset"): """ Run evaluation and returns metrics and predictions. diff --git a/optimum/onnxruntime/modeling_decoder.py b/optimum/onnxruntime/modeling_decoder.py index f6d4b7e20ab..8f1d062221a 100644 --- a/optimum/onnxruntime/modeling_decoder.py +++ b/optimum/onnxruntime/modeling_decoder.py @@ -14,7 +14,6 @@ """Classes handling causal-lm related architectures in ONNX Runtime.""" import logging -import warnings from pathlib import Path from tempfile import TemporaryDirectory from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, Union @@ -149,6 +148,19 @@ def __init__( generation_config = GenerationConfig.from_model_config(config) self.generation_config = generation_config + + if check_if_transformers_greater("4.44.99"): + misplaced_generation_parameters = self.config._get_non_default_generation_parameters() + if len(misplaced_generation_parameters) > 0: + logger.warning( + "Moving the following attributes in the config to the generation config: " + f"{misplaced_generation_parameters}. You are seeing this warning because you've set " + "generation parameters in the model config, as opposed to in the generation config.", + ) + for param_name, param_value in misplaced_generation_parameters.items(): + setattr(self.generation_config, param_name, param_value) + setattr(self.config, param_name, None) + self.onnx_paths = [self.model_path] self.use_merged = "use_cache_branch" in self.input_names self.model_type = self.config.model_type @@ -328,7 +340,7 @@ def prepare_past_key_values( if self.model_type == "gemma": num_attention_heads = self.normalized_config.num_key_value_heads embed_size_per_head = self.normalized_config.head_dim - elif self.model_type in {"mistral", "llama", "qwen2"}: + elif self.model_type in {"mistral", "llama", "qwen2", "granite"}: num_attention_heads = self.normalized_config.num_key_value_heads else: num_attention_heads = self.normalized_config.num_attention_heads @@ -393,7 +405,6 @@ def _from_pretrained( cls, model_id: Union[str, Path], config: "PretrainedConfig", - use_auth_token: Optional[Union[bool, str]] = None, token: Optional[Union[bool, str]] = None, revision: Optional[str] = None, force_download: bool = False, @@ -410,15 +421,7 @@ def _from_pretrained( model_save_dir: Optional[Union[str, Path, TemporaryDirectory]] = None, **kwargs, ) -> "ORTModelForCausalLM": - if use_auth_token is not None: - warnings.warn( - "The `use_auth_token` argument is deprecated and will be removed soon. Please use the `token` argument instead.", - FutureWarning, - ) - if token is not None: - raise ValueError("You cannot use both `use_auth_token` and `token` arguments at the same time.") - token = use_auth_token - + generation_config = kwargs.pop("generation_config", None) model_path = Path(model_id) # We do not implement the logic for use_cache=False, use_merged=True @@ -579,13 +582,30 @@ def _from_pretrained( init_cls = ORTFalconForCausalLM elif config.model_type == "mpt": init_cls = ORTMPTForCausalLM - elif config.model_type == "opt": + # if model was exported with position_ids it means the model was exported with transformers >= v4.46 + elif config.model_type == "opt" and "position_ids" not in input_dims: init_cls = ORTOPTForCausalLM elif config.model_type == "gpt_bigcode": init_cls = ORTGPTBigCodeForCausalLM else: init_cls = ORTModelForCausalLM + if generation_config is None: + try: + generation_config = GenerationConfig.from_pretrained( + model_id, + cache_dir=cache_dir, + force_download=force_download, + local_files_only=local_files_only, + token=token, + revision=revision, + subfolder=subfolder, + ) + except OSError: + logger.info( + "Generation config file not found, using a generation config created from the model config." + ) + return init_cls( model=model, config=config, @@ -593,6 +613,7 @@ def _from_pretrained( model_save_dir=model_save_dir, preprocessors=preprocessors, use_cache=use_cache, + generation_config=generation_config, ) @classmethod @@ -600,7 +621,6 @@ def _from_transformers( cls, model_id: str, config: "PretrainedConfig", - use_auth_token: Optional[Union[bool, str]] = None, token: Optional[Union[bool, str]] = None, revision: str = "main", force_download: bool = True, @@ -616,15 +636,6 @@ def _from_transformers( use_io_binding: Optional[bool] = None, task: Optional[str] = None, ) -> "ORTModelForCausalLM": - if use_auth_token is not None: - warnings.warn( - "The `use_auth_token` argument is deprecated and will be removed soon. Please use the `token` argument instead.", - FutureWarning, - ) - if token is not None: - raise ValueError("You cannot use both `use_auth_token` and `token` arguments at the same time.") - token = use_auth_token - file_name = ONNX_WEIGHTS_NAME if use_merged: @@ -655,8 +666,6 @@ def _from_transformers( force_download=force_download, trust_remote_code=trust_remote_code, ) - - config.save_pretrained(save_dir_path) maybe_save_preprocessors(model_id, save_dir_path, src_subfolder=subfolder) return cls._from_pretrained( @@ -712,6 +721,10 @@ def _reorder_cache(past: Tuple[Tuple[torch.Tensor]], beam_idx: torch.Tensor) -> for layer_past in past ) + def _save_pretrained(self, save_directory: Union[str, Path]): + super()._save_pretrained(save_directory) + self.generation_config.save_pretrained(save_directory) + class ORTGPTBigCodeForCausalLM(ORTModelForCausalLM): # Adapted from transformers.models.gpt_bigcode.modeling_gpt_bigcode.GPTBigCodeForCausalLM.prepare_inputs_for_generation @@ -827,7 +840,6 @@ def prepare_inputs_for_generation(self, input_ids, past_key_values=None, **kwarg attention_mask = kwargs.get("attention_mask", None) use_cache = kwargs.get("use_cache", None) - return { "input_ids": input_ids, "past_key_values": past_key_values, diff --git a/optimum/onnxruntime/modeling_diffusion.py b/optimum/onnxruntime/modeling_diffusion.py index 4bbfb2eda2a..66b08e1ef66 100644 --- a/optimum/onnxruntime/modeling_diffusion.py +++ b/optimum/onnxruntime/modeling_diffusion.py @@ -13,431 +13,382 @@ # limitations under the License. import importlib +import inspect import logging import os import shutil -import warnings from abc import abstractmethod +from collections import OrderedDict from pathlib import Path from tempfile import TemporaryDirectory from typing import Any, Dict, Optional, Union import numpy as np import torch -from diffusers import ( - DDIMScheduler, - LMSDiscreteScheduler, - PNDMScheduler, +from diffusers.configuration_utils import ConfigMixin +from diffusers.pipelines import ( + AutoPipelineForImage2Image, + AutoPipelineForInpainting, + AutoPipelineForText2Image, + LatentConsistencyModelImg2ImgPipeline, + LatentConsistencyModelPipeline, + StableDiffusionImg2ImgPipeline, + StableDiffusionInpaintPipeline, StableDiffusionPipeline, StableDiffusionXLImg2ImgPipeline, + StableDiffusionXLInpaintPipeline, + StableDiffusionXLPipeline, ) +from diffusers.pipelines.pipeline_utils import DiffusionPipeline +from diffusers.schedulers import SchedulerMixin from diffusers.schedulers.scheduling_utils import SCHEDULER_CONFIG_NAME -from diffusers.utils import CONFIG_NAME, is_invisible_watermark_available +from diffusers.utils.constants import CONFIG_NAME from huggingface_hub import snapshot_download from huggingface_hub.constants import HUGGINGFACE_HUB_CACHE +from huggingface_hub.utils import validate_hf_hub_args from transformers import CLIPFeatureExtractor, CLIPTokenizer from transformers.file_utils import add_end_docstrings +from transformers.modeling_outputs import ModelOutput import onnxruntime as ort +from optimum.utils import check_if_diffusers_greater from ..exporters.onnx import main_export -from ..onnx.utils import _get_external_data_paths -from ..pipelines.diffusers.pipeline_latent_consistency import LatentConsistencyPipelineMixin -from ..pipelines.diffusers.pipeline_stable_diffusion import StableDiffusionPipelineMixin -from ..pipelines.diffusers.pipeline_stable_diffusion_img2img import StableDiffusionImg2ImgPipelineMixin -from ..pipelines.diffusers.pipeline_stable_diffusion_inpaint import StableDiffusionInpaintPipelineMixin -from ..pipelines.diffusers.pipeline_stable_diffusion_xl import StableDiffusionXLPipelineMixin -from ..pipelines.diffusers.pipeline_stable_diffusion_xl_img2img import StableDiffusionXLImg2ImgPipelineMixin -from ..pipelines.diffusers.pipeline_utils import VaeImageProcessor +from ..onnx.utils import _get_model_external_data_paths from ..utils import ( DIFFUSION_MODEL_TEXT_ENCODER_2_SUBFOLDER, + DIFFUSION_MODEL_TEXT_ENCODER_3_SUBFOLDER, DIFFUSION_MODEL_TEXT_ENCODER_SUBFOLDER, + DIFFUSION_MODEL_TRANSFORMER_SUBFOLDER, DIFFUSION_MODEL_UNET_SUBFOLDER, DIFFUSION_MODEL_VAE_DECODER_SUBFOLDER, DIFFUSION_MODEL_VAE_ENCODER_SUBFOLDER, ) +from .io_binding import TypeHelper from .modeling_ort import ONNX_MODEL_END_DOCSTRING, ORTModel from .utils import ( - _ORT_TO_NP_TYPE, ONNX_WEIGHTS_NAME, get_provider_for_device, + np_to_pt_generators, parse_device, validate_provider_availability, ) +if check_if_diffusers_greater("0.25.0"): + from diffusers.models.autoencoders.vae import DiagonalGaussianDistribution +else: + from diffusers.models.vae import DiagonalGaussianDistribution # type: ignore + + logger = logging.getLogger(__name__) -class ORTStableDiffusionPipelineBase(ORTModel): - auto_model_class = StableDiffusionPipeline - main_input_name = "input_ids" - base_model_prefix = "onnx_model" +# TODO: support from_pipe() +# TODO: Instead of ORTModel, it makes sense to have a compositional ORTMixin +# TODO: instead of one bloated __init__, we should consider an __init__ per pipeline +class ORTDiffusionPipeline(ORTModel, DiffusionPipeline): config_name = "model_index.json" - sub_component_config_name = "config.json" + auto_model_class = DiffusionPipeline def __init__( self, + scheduler: "SchedulerMixin", vae_decoder_session: ort.InferenceSession, - text_encoder_session: ort.InferenceSession, - unet_session: ort.InferenceSession, - config: Dict[str, Any], - tokenizer: CLIPTokenizer, - scheduler: Union[DDIMScheduler, PNDMScheduler, LMSDiscreteScheduler], - feature_extractor: Optional[CLIPFeatureExtractor] = None, + # optional pipeline models + unet_session: Optional[ort.InferenceSession] = None, + transformer_session: Optional[ort.InferenceSession] = None, vae_encoder_session: Optional[ort.InferenceSession] = None, + text_encoder_session: Optional[ort.InferenceSession] = None, text_encoder_2_session: Optional[ort.InferenceSession] = None, - tokenizer_2: Optional[CLIPTokenizer] = None, + text_encoder_3_session: Optional[ort.InferenceSession] = None, + # optional pipeline submodels + tokenizer: Optional["CLIPTokenizer"] = None, + tokenizer_2: Optional["CLIPTokenizer"] = None, + tokenizer_3: Optional["CLIPTokenizer"] = None, + feature_extractor: Optional["CLIPFeatureExtractor"] = None, + # stable diffusion xl specific arguments + force_zeros_for_empty_prompt: bool = True, + requires_aesthetics_score: bool = False, + add_watermarker: Optional[bool] = None, + # onnxruntime specific arguments use_io_binding: Optional[bool] = None, model_save_dir: Optional[Union[str, Path, TemporaryDirectory]] = None, + **kwargs, ): - """ - Args: - vae_decoder_session (`ort.InferenceSession`): - The ONNX Runtime inference session associated to the VAE decoder. - text_encoder_session (`ort.InferenceSession`): - The ONNX Runtime inference session associated to the text encoder. - unet_session (`ort.InferenceSession`): - The ONNX Runtime inference session associated to the U-NET. - config (`Dict[str, Any]`): - A config dictionary from which the model components will be instantiated. Make sure to only load - configuration files of compatible classes. - tokenizer (`CLIPTokenizer`): - Tokenizer of class - [CLIPTokenizer](https://huggingface.co/docs/transformers/v4.21.0/en/model_doc/clip#transformers.CLIPTokenizer). - scheduler (`Union[DDIMScheduler, PNDMScheduler, LMSDiscreteScheduler]`): - A scheduler to be used in combination with the U-NET component to denoise the encoded image latents. - feature_extractor (`Optional[CLIPFeatureExtractor]`, defaults to `None`): - A model extracting features from generated images to be used as inputs for the `safety_checker` - vae_encoder_session (`Optional[ort.InferenceSession]`, defaults to `None`): - The ONNX Runtime inference session associated to the VAE encoder. - use_io_binding (`Optional[bool]`, defaults to `None`): - Whether to use IOBinding during inference to avoid memory copy between the host and devices. Defaults to - `True` if the device is CUDA, otherwise defaults to `False`. - model_save_dir (`Optional[str]`, defaults to `None`): - The directory under which the model exported to ONNX was saved. - """ - self.shared_attributes_init( - vae_decoder_session, - use_io_binding=use_io_binding, - model_save_dir=model_save_dir, + self.unet = ORTModelUnet(unet_session, self) if unet_session is not None else None + self.transformer = ORTModelTransformer(transformer_session, self) if transformer_session is not None else None + self.text_encoder = ( + ORTModelTextEncoder(text_encoder_session, self) if text_encoder_session is not None else None ) - self._internal_dict = config - self.vae_decoder = ORTModelVaeDecoder(vae_decoder_session, self) - self.vae_decoder_model_path = Path(vae_decoder_session._model_path) - self.unet = ORTModelUnet(unet_session, self) - self.unet_model_path = Path(unet_session._model_path) - - if text_encoder_session is not None: - self.text_encoder_model_path = Path(text_encoder_session._model_path) - self.text_encoder = ORTModelTextEncoder(text_encoder_session, self) - else: - self.text_encoder_model_path = None - self.text_encoder = None - - if vae_encoder_session is not None: - self.vae_encoder_model_path = Path(vae_encoder_session._model_path) - self.vae_encoder = ORTModelVaeEncoder(vae_encoder_session, self) - else: - self.vae_encoder_model_path = None - self.vae_encoder = None + self.text_encoder_2 = ( + ORTModelTextEncoder(text_encoder_2_session, self) if text_encoder_2_session is not None else None + ) + self.text_encoder_3 = ( + ORTModelTextEncoder(text_encoder_3_session, self) if text_encoder_3_session is not None else None + ) + # We wrap the VAE Decoder & Encoder in a single object to simulate diffusers API + self.vae_encoder = ORTModelVaeEncoder(vae_encoder_session, self) if vae_encoder_session is not None else None + self.vae_decoder = ORTModelVaeDecoder(vae_decoder_session, self) if vae_decoder_session is not None else None + self.vae = ORTWrapperVae(self.vae_encoder, self.vae_decoder) - if text_encoder_2_session is not None: - self.text_encoder_2_model_path = Path(text_encoder_2_session._model_path) - self.text_encoder_2 = ORTModelTextEncoder(text_encoder_2_session, self) - else: - self.text_encoder_2_model_path = None - self.text_encoder_2 = None + # we allow passing these as torch models for now + self.image_encoder = kwargs.pop("image_encoder", None) # TODO: maybe implement ORTModelImageEncoder + self.safety_checker = kwargs.pop("safety_checker", None) # TODO: maybe implement ORTModelSafetyChecker + self.scheduler = scheduler self.tokenizer = tokenizer self.tokenizer_2 = tokenizer_2 - self.scheduler = scheduler + self.tokenizer_3 = tokenizer_3 self.feature_extractor = feature_extractor - self.safety_checker = None - - sub_models = { - DIFFUSION_MODEL_TEXT_ENCODER_SUBFOLDER: self.text_encoder, - DIFFUSION_MODEL_UNET_SUBFOLDER: self.unet, - DIFFUSION_MODEL_VAE_DECODER_SUBFOLDER: self.vae_decoder, - DIFFUSION_MODEL_VAE_ENCODER_SUBFOLDER: self.vae_encoder, - DIFFUSION_MODEL_TEXT_ENCODER_2_SUBFOLDER: self.text_encoder_2, - } - - # Modify config to keep the resulting model compatible with diffusers pipelines - for name in sub_models.keys(): - self._internal_dict[name] = ( - ("diffusers", "OnnxRuntimeModel") if sub_models[name] is not None else (None, None) - ) - self._internal_dict.pop("vae", None) - - if "block_out_channels" in self.vae_decoder.config: - self.vae_scale_factor = 2 ** (len(self.vae_decoder.config["block_out_channels"]) - 1) - else: - self.vae_scale_factor = 8 - self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor) - - @staticmethod - def load_model( - vae_decoder_path: Union[str, Path], - text_encoder_path: Union[str, Path], - unet_path: Union[str, Path], - vae_encoder_path: Optional[Union[str, Path]] = None, - text_encoder_2_path: Optional[Union[str, Path]] = None, - provider: str = "CPUExecutionProvider", - session_options: Optional[ort.SessionOptions] = None, - provider_options: Optional[Dict] = None, - ): - """ - Creates three inference sessions for respectively the VAE decoder, the text encoder and the U-NET models. - The default provider is `CPUExecutionProvider` to match the default behaviour in PyTorch/TensorFlow/JAX. - - Args: - vae_decoder_path (`Union[str, Path]`): - The path to the VAE decoder ONNX model. - text_encoder_path (`Union[str, Path]`): - The path to the text encoder ONNX model. - unet_path (`Union[str, Path]`): - The path to the U-NET ONNX model. - vae_encoder_path (`Union[str, Path]`, defaults to `None`): - The path to the VAE encoder ONNX model. - text_encoder_2_path (`Union[str, Path]`, defaults to `None`): - The path to the second text decoder ONNX model. - provider (`str`, defaults to `"CPUExecutionProvider"`): - ONNX Runtime provider to use for loading the model. See https://onnxruntime.ai/docs/execution-providers/ - for possible providers. - session_options (`Optional[ort.SessionOptions]`, defaults to `None`): - ONNX Runtime session options to use for loading the model. Defaults to `None`. - provider_options (`Optional[Dict]`, defaults to `None`): - Provider option dictionary corresponding to the provider used. See available options - for each provider: https://onnxruntime.ai/docs/api/c/group___global.html . Defaults to `None`. - """ - vae_decoder = ORTModel.load_model(vae_decoder_path, provider, session_options, provider_options) - unet = ORTModel.load_model(unet_path, provider, session_options, provider_options) - - sessions = { - "vae_encoder": vae_encoder_path, - "text_encoder": text_encoder_path, - "text_encoder_2": text_encoder_2_path, + all_pipeline_init_args = { + "vae": self.vae, + "unet": self.unet, + "transformer": self.transformer, + "text_encoder": self.text_encoder, + "text_encoder_2": self.text_encoder_2, + "text_encoder_3": self.text_encoder_3, + "safety_checker": self.safety_checker, + "image_encoder": self.image_encoder, + "scheduler": self.scheduler, + "tokenizer": self.tokenizer, + "tokenizer_2": self.tokenizer_2, + "tokenizer_3": self.tokenizer_3, + "feature_extractor": self.feature_extractor, + "requires_aesthetics_score": requires_aesthetics_score, + "force_zeros_for_empty_prompt": force_zeros_for_empty_prompt, + "add_watermarker": add_watermarker, } - for key, value in sessions.items(): - if value is not None and value.is_file(): - sessions[key] = ORTModel.load_model(value, provider, session_options, provider_options) - else: - sessions[key] = None + diffusers_pipeline_args = {} + for key in inspect.signature(self.auto_model_class).parameters.keys(): + if key in all_pipeline_init_args: + diffusers_pipeline_args[key] = all_pipeline_init_args[key] + # inits diffusers pipeline specific attributes (registers modules and config) + self.auto_model_class.__init__(self, **diffusers_pipeline_args) - return vae_decoder, sessions["text_encoder"], unet, sessions["vae_encoder"], sessions["text_encoder_2"] + # inits ort specific attributes + self.shared_attributes_init( + model=unet_session if unet_session is not None else transformer_session, + use_io_binding=use_io_binding, + model_save_dir=model_save_dir, + **kwargs, + ) def _save_pretrained(self, save_directory: Union[str, Path]): save_directory = Path(save_directory) - src_to_dst_path = { - self.vae_decoder_model_path: save_directory / DIFFUSION_MODEL_VAE_DECODER_SUBFOLDER / ONNX_WEIGHTS_NAME, - self.text_encoder_model_path: save_directory / DIFFUSION_MODEL_TEXT_ENCODER_SUBFOLDER / ONNX_WEIGHTS_NAME, - self.unet_model_path: save_directory / DIFFUSION_MODEL_UNET_SUBFOLDER / ONNX_WEIGHTS_NAME, - } - sub_models_to_save = { - self.vae_encoder_model_path: DIFFUSION_MODEL_VAE_ENCODER_SUBFOLDER, - self.text_encoder_2_model_path: DIFFUSION_MODEL_TEXT_ENCODER_2_SUBFOLDER, + models_to_save_paths = { + (self.unet, save_directory / DIFFUSION_MODEL_UNET_SUBFOLDER), + (self.transformer, save_directory / DIFFUSION_MODEL_TRANSFORMER_SUBFOLDER), + (self.vae_decoder, save_directory / DIFFUSION_MODEL_VAE_DECODER_SUBFOLDER), + (self.vae_encoder, save_directory / DIFFUSION_MODEL_VAE_ENCODER_SUBFOLDER), + (self.text_encoder, save_directory / DIFFUSION_MODEL_TEXT_ENCODER_SUBFOLDER), + (self.text_encoder_2, save_directory / DIFFUSION_MODEL_TEXT_ENCODER_2_SUBFOLDER), + (self.text_encoder_3, save_directory / DIFFUSION_MODEL_TEXT_ENCODER_3_SUBFOLDER), } - for path, subfolder in sub_models_to_save.items(): - if path is not None: - src_to_dst_path[path] = save_directory / subfolder / ONNX_WEIGHTS_NAME - - # TODO: Modify _get_external_data_paths to give dictionnary - src_paths = list(src_to_dst_path.keys()) - dst_paths = list(src_to_dst_path.values()) - # Add external data paths in case of large models - src_paths, dst_paths = _get_external_data_paths(src_paths, dst_paths) - - for src_path, dst_path in zip(src_paths, dst_paths): - dst_path.parent.mkdir(parents=True, exist_ok=True) - shutil.copyfile(src_path, dst_path) - config_path = src_path.parent / self.sub_component_config_name - if config_path.is_file(): - shutil.copyfile(config_path, dst_path.parent / self.sub_component_config_name) + for model, save_path in models_to_save_paths: + if model is not None: + model_path = Path(model.session._model_path) + save_path.mkdir(parents=True, exist_ok=True) + # copy onnx model + shutil.copyfile(model_path, save_path / ONNX_WEIGHTS_NAME) + # copy external onnx data + external_data_paths = _get_model_external_data_paths(model_path) + for external_data_path in external_data_paths: + shutil.copyfile(external_data_path, save_path / external_data_path.name) + # copy model config + config_path = model_path.parent / CONFIG_NAME + if config_path.is_file(): + config_save_path = save_path / CONFIG_NAME + shutil.copyfile(config_path, config_save_path) self.scheduler.save_pretrained(save_directory / "scheduler") - if self.feature_extractor is not None: - self.feature_extractor.save_pretrained(save_directory / "feature_extractor") if self.tokenizer is not None: self.tokenizer.save_pretrained(save_directory / "tokenizer") if self.tokenizer_2 is not None: self.tokenizer_2.save_pretrained(save_directory / "tokenizer_2") + if self.tokenizer_3 is not None: + self.tokenizer_3.save_pretrained(save_directory / "tokenizer_3") + if self.feature_extractor is not None: + self.feature_extractor.save_pretrained(save_directory / "feature_extractor") @classmethod def _from_pretrained( cls, model_id: Union[str, Path], config: Dict[str, Any], - use_auth_token: Optional[Union[bool, str]] = None, - token: Optional[Union[bool, str]] = None, + subfolder: str = "", + force_download: bool = False, + local_files_only: bool = False, revision: Optional[str] = None, + trust_remote_code: bool = False, cache_dir: str = HUGGINGFACE_HUB_CACHE, - vae_decoder_file_name: str = ONNX_WEIGHTS_NAME, - text_encoder_file_name: str = ONNX_WEIGHTS_NAME, + token: Optional[Union[bool, str]] = None, unet_file_name: str = ONNX_WEIGHTS_NAME, + transformer_file_name: str = ONNX_WEIGHTS_NAME, + vae_decoder_file_name: str = ONNX_WEIGHTS_NAME, vae_encoder_file_name: str = ONNX_WEIGHTS_NAME, + text_encoder_file_name: str = ONNX_WEIGHTS_NAME, text_encoder_2_file_name: str = ONNX_WEIGHTS_NAME, - local_files_only: bool = False, + text_encoder_3_file_name: str = ONNX_WEIGHTS_NAME, + use_io_binding: Optional[bool] = None, provider: str = "CPUExecutionProvider", - session_options: Optional[ort.SessionOptions] = None, provider_options: Optional[Dict[str, Any]] = None, - use_io_binding: Optional[bool] = None, + session_options: Optional[ort.SessionOptions] = None, model_save_dir: Optional[Union[str, Path, TemporaryDirectory]] = None, **kwargs, ): - if use_auth_token is not None: - warnings.warn( - "The `use_auth_token` argument is deprecated and will be removed soon. Please use the `token` argument instead.", - FutureWarning, + if use_io_binding: + raise ValueError( + "IOBinding is not yet available for diffusion pipelines, please set `use_io_binding` to False." ) - if token is not None: - raise ValueError("You cannot use both `use_auth_token` and `token` arguments at the same time.") - token = use_auth_token - - if provider == "TensorrtExecutionProvider": - raise ValueError("The provider `'TensorrtExecutionProvider'` is not supported") - model_id = str(model_id) - patterns = set(config.keys()) - sub_models_to_load = patterns.intersection({"feature_extractor", "tokenizer", "tokenizer_2", "scheduler"}) - - if not os.path.isdir(model_id): - patterns.update({"vae_encoder", "vae_decoder"}) - allow_patterns = {os.path.join(k, "*") for k in patterns if not k.startswith("_")} + if not os.path.isdir(str(model_id)): + all_components = {key for key in config.keys() if not key.startswith("_")} | {"vae_encoder", "vae_decoder"} + allow_patterns = {os.path.join(component, "*") for component in all_components} allow_patterns.update( { - vae_decoder_file_name, - text_encoder_file_name, unet_file_name, + transformer_file_name, + vae_decoder_file_name, vae_encoder_file_name, + text_encoder_file_name, text_encoder_2_file_name, + text_encoder_3_file_name, SCHEDULER_CONFIG_NAME, - CONFIG_NAME, cls.config_name, + CONFIG_NAME, } ) - # Downloads all repo's files matching the allowed patterns - model_id = snapshot_download( + model_save_folder = snapshot_download( model_id, cache_dir=cache_dir, + force_download=force_download, local_files_only=local_files_only, - token=token, revision=revision, + token=token, allow_patterns=allow_patterns, ignore_patterns=["*.msgpack", "*.safetensors", "*.bin", "*.xml"], ) - new_model_save_dir = Path(model_id) + else: + model_save_folder = str(model_id) - sub_models = {} - for name in sub_models_to_load: - library_name, library_classes = config[name] - if library_classes is not None: + model_save_path = Path(model_save_folder) + + if model_save_dir is None: + model_save_dir = model_save_path + + model_paths = { + "unet": model_save_path / DIFFUSION_MODEL_UNET_SUBFOLDER / unet_file_name, + "transformer": model_save_path / DIFFUSION_MODEL_TRANSFORMER_SUBFOLDER / transformer_file_name, + "vae_decoder": model_save_path / DIFFUSION_MODEL_VAE_DECODER_SUBFOLDER / vae_decoder_file_name, + "vae_encoder": model_save_path / DIFFUSION_MODEL_VAE_ENCODER_SUBFOLDER / vae_encoder_file_name, + "text_encoder": model_save_path / DIFFUSION_MODEL_TEXT_ENCODER_SUBFOLDER / text_encoder_file_name, + "text_encoder_2": model_save_path / DIFFUSION_MODEL_TEXT_ENCODER_2_SUBFOLDER / text_encoder_2_file_name, + "text_encoder_3": model_save_path / DIFFUSION_MODEL_TEXT_ENCODER_3_SUBFOLDER / text_encoder_3_file_name, + } + + sessions = {} + for model, path in model_paths.items(): + if kwargs.get(model, None) is not None: + # this allows passing a model directly to from_pretrained + sessions[f"{model}_session"] = kwargs.pop(model) + else: + sessions[f"{model}_session"] = ( + ORTModel.load_model(path, provider, session_options, provider_options) if path.is_file() else None + ) + + submodels = {} + for submodel in {"scheduler", "tokenizer", "tokenizer_2", "tokenizer_3", "feature_extractor"}: + if kwargs.get(submodel, None) is not None: + submodels[submodel] = kwargs.pop(submodel) + elif config.get(submodel, (None, None))[0] is not None: + library_name, library_classes = config.get(submodel) library = importlib.import_module(library_name) class_obj = getattr(library, library_classes) load_method = getattr(class_obj, "from_pretrained") # Check if the module is in a subdirectory - if (new_model_save_dir / name).is_dir(): - sub_models[name] = load_method(new_model_save_dir / name) + if (model_save_path / submodel).is_dir(): + submodels[submodel] = load_method(model_save_path / submodel) else: - sub_models[name] = load_method(new_model_save_dir) - - vae_decoder, text_encoder, unet, vae_encoder, text_encoder_2 = cls.load_model( - vae_decoder_path=new_model_save_dir / DIFFUSION_MODEL_VAE_DECODER_SUBFOLDER / vae_decoder_file_name, - text_encoder_path=new_model_save_dir / DIFFUSION_MODEL_TEXT_ENCODER_SUBFOLDER / text_encoder_file_name, - unet_path=new_model_save_dir / DIFFUSION_MODEL_UNET_SUBFOLDER / unet_file_name, - vae_encoder_path=new_model_save_dir / DIFFUSION_MODEL_VAE_ENCODER_SUBFOLDER / vae_encoder_file_name, - text_encoder_2_path=new_model_save_dir - / DIFFUSION_MODEL_TEXT_ENCODER_2_SUBFOLDER - / text_encoder_2_file_name, - provider=provider, - session_options=session_options, - provider_options=provider_options, - ) + submodels[submodel] = load_method(model_save_path) - if model_save_dir is None: - model_save_dir = new_model_save_dir - - if use_io_binding: - raise ValueError( - "IOBinding is not yet available for stable diffusion model, please set `use_io_binding` to False." - ) + # same as DiffusionPipeline.from_pretraoned, if called directly, it loads the class in the config + if cls.__name__ == "ORTDiffusionPipeline": + class_name = config["_class_name"] + ort_pipeline_class = _get_ort_class(class_name) + else: + ort_pipeline_class = cls - return cls( - vae_decoder_session=vae_decoder, - text_encoder_session=text_encoder, - unet_session=unet, - config=config, - tokenizer=sub_models.get("tokenizer", None), - scheduler=sub_models.get("scheduler"), - feature_extractor=sub_models.get("feature_extractor", None), - tokenizer_2=sub_models.get("tokenizer_2", None), - vae_encoder_session=vae_encoder, - text_encoder_2_session=text_encoder_2, + ort_pipeline = ort_pipeline_class( + **sessions, + **submodels, use_io_binding=use_io_binding, model_save_dir=model_save_dir, + **kwargs, ) + # same as in DiffusionPipeline.from_pretrained, we save where the model was instantiated from + ort_pipeline.register_to_config(_name_or_path=config.get("_name_or_path", str(model_id))) + + return ort_pipeline + @classmethod - def _from_transformers( + def _export( cls, model_id: str, - config: Optional[str] = None, - use_auth_token: Optional[Union[bool, str]] = None, - token: Optional[Union[bool, str]] = None, - revision: str = "main", - force_download: bool = True, - cache_dir: str = HUGGINGFACE_HUB_CACHE, + config: Dict[str, Any], subfolder: str = "", + force_download: bool = False, local_files_only: bool = False, + revision: Optional[str] = None, trust_remote_code: bool = False, + cache_dir: str = HUGGINGFACE_HUB_CACHE, + token: Optional[Union[bool, str]] = None, + use_io_binding: Optional[bool] = None, provider: str = "CPUExecutionProvider", session_options: Optional[ort.SessionOptions] = None, provider_options: Optional[Dict[str, Any]] = None, - use_io_binding: Optional[bool] = None, task: Optional[str] = None, - ) -> "ORTStableDiffusionPipeline": - if use_auth_token is not None: - warnings.warn( - "The `use_auth_token` argument is deprecated and will be removed soon. Please use the `token` argument instead.", - FutureWarning, - ) - if token is not None: - raise ValueError("You cannot use both `use_auth_token` and `token` arguments at the same time.") - token = use_auth_token - + **kwargs, + ) -> "ORTDiffusionPipeline": if task is None: task = cls._auto_model_to_task(cls.auto_model_class) - save_dir = TemporaryDirectory() - save_dir_path = Path(save_dir.name) + # we continue passing the model_save_dir from here on to avoid it being cleaned up + # might be better to use a persistent temporary directory such as the one implemented in + # https://gist.github.com/twolfson/2929dc1163b0a76d2c2b66d51f9bc808 + model_save_dir = TemporaryDirectory() + model_save_path = Path(model_save_dir.name) main_export( - model_name_or_path=model_id, - output=save_dir_path, - task=task, + model_id, + output=model_save_path, do_validation=False, no_post_process=True, - subfolder=subfolder, + token=token, revision=revision, cache_dir=cache_dir, - token=token, - local_files_only=local_files_only, + subfolder=subfolder, force_download=force_download, + local_files_only=local_files_only, trust_remote_code=trust_remote_code, + library_name="diffusers", + task=task, ) return cls._from_pretrained( - save_dir_path, + model_save_path, config=config, provider=provider, - session_options=session_options, provider_options=provider_options, + session_options=session_options, use_io_binding=use_io_binding, - model_save_dir=save_dir, + model_save_dir=model_save_dir, + **kwargs, ) def to(self, device: Union[torch.device, str, int]): @@ -455,19 +406,29 @@ def to(self, device: Union[torch.device, str, int]): device, provider_options = parse_device(device) provider = get_provider_for_device(device) - validate_provider_availability(provider) # raise error if the provider is not available + validate_provider_availability(provider) if device.type == "cuda" and self.providers[0] == "TensorrtExecutionProvider": return self self.vae_decoder.session.set_providers([provider], provider_options=[provider_options]) - self.text_encoder.session.set_providers([provider], provider_options=[provider_options]) - self.unet.session.set_providers([provider], provider_options=[provider_options]) + if self.unet is not None: + self.unet.session.set_providers([provider], provider_options=[provider_options]) + if self.transformer is not None: + self.transformer.session.set_providers([provider], provider_options=[provider_options]) if self.vae_encoder is not None: self.vae_encoder.session.set_providers([provider], provider_options=[provider_options]) - - self.providers = self.vae_decoder.session.get_providers() + if self.text_encoder is not None: + self.text_encoder.session.set_providers([provider], provider_options=[provider_options]) + if self.text_encoder_2 is not None: + self.text_encoder_2.session.set_providers([provider], provider_options=[provider_options]) + if self.text_encoder_3 is not None: + self.text_encoder_3.session.set_providers([provider], provider_options=[provider_options]) + + self.providers = ( + self.unet.session.get_providers() if self.unet is not None else self.transformer.session.get_providers() + ) self._device = device return self @@ -476,194 +437,723 @@ def to(self, device: Union[torch.device, str, int]): def _load_config(cls, config_name_or_path: Union[str, os.PathLike], **kwargs): return cls.load_config(config_name_or_path, **kwargs) - def _save_config(self, save_directory): - self.save_config(save_directory) + def _save_config(self, save_directory: Union[str, Path]): + model_dir = ( + self.model_save_dir + if not isinstance(self.model_save_dir, TemporaryDirectory) + else self.model_save_dir.name + ) + save_dir = Path(save_directory) + original_config = Path(model_dir) / self.config_name + if original_config.exists(): + if not save_dir.exists(): + save_dir.mkdir(parents=True) + shutil.copy(original_config, save_dir) + else: + self.save_config(save_directory) -# TODO : Use ORTModelPart once IOBinding support is added -class _ORTDiffusionModelPart: - """ - For multi-file ONNX models, represents a part of the model. - It has its own `onnxruntime.InferenceSession`, and can perform a forward pass. - """ + @property + def components(self) -> Dict[str, Any]: + components = { + "vae": self.vae, + "unet": self.unet, + "transformer": self.transformer, + "text_encoder": self.text_encoder, + "text_encoder_2": self.text_encoder_2, + "text_encoder_3": self.text_encoder_3, + "safety_checker": self.safety_checker, + "image_encoder": self.image_encoder, + } + components = {k: v for k, v in components.items() if v is not None} + return components + + def __call__(self, *args, **kwargs): + # we do this to keep numpy random states support for now + # TODO: deprecate and add warnings when a random state is passed + + args = list(args) + for i in range(len(args)): + args[i] = np_to_pt_generators(args[i], self.device) + + for k, v in kwargs.items(): + kwargs[k] = np_to_pt_generators(v, self.device) - CONFIG_NAME = "config.json" + return self.auto_model_class.__call__(self, *args, **kwargs) - def __init__(self, session: ort.InferenceSession, parent_model: ORTModel): + +class ORTPipelinePart(ConfigMixin): + config_name: str = CONFIG_NAME + + def __init__(self, session: ort.InferenceSession, parent_pipeline: ORTDiffusionPipeline): self.session = session - self.parent_model = parent_model + self.parent_pipeline = parent_pipeline + self.input_names = {input_key.name: idx for idx, input_key in enumerate(self.session.get_inputs())} self.output_names = {output_key.name: idx for idx, output_key in enumerate(self.session.get_outputs())} - config_path = Path(session._model_path).parent / self.CONFIG_NAME - self.config = self.parent_model._dict_from_json_file(config_path) if config_path.is_file() else {} - self.input_dtype = {inputs.name: _ORT_TO_NP_TYPE[inputs.type] for inputs in self.session.get_inputs()} + + self.input_dtypes = {input_key.name: input_key.type for input_key in self.session.get_inputs()} + self.output_dtypes = {output_key.name: output_key.type for output_key in self.session.get_outputs()} + + self.input_shapes = {input_key.name: input_key.shape for input_key in self.session.get_inputs()} + self.output_shapes = {output_key.name: output_key.shape for output_key in self.session.get_outputs()} + + config_file_path = Path(session._model_path).parent / self.config_name + if not config_file_path.is_file(): + # config is mandatory for the model part to be used for inference + raise ValueError(f"Configuration file for {self.__class__.__name__} not found at {config_file_path}") + config_dict = self._dict_from_json_file(config_file_path) + self.register_to_config(**config_dict) @property def device(self): - return self.parent_model.device + return self.parent_pipeline.device + + @property + def dtype(self): + for dtype in self.input_dtypes.values(): + torch_dtype = TypeHelper.ort_type_to_torch_type(dtype) + if torch_dtype.is_floating_point: + return torch_dtype + + for dtype in self.output_dtypes.values(): + torch_dtype = TypeHelper.ort_type_to_torch_type(dtype) + if torch_dtype.is_floating_point: + return torch_dtype + + return None + + def to(self, *args, device: Optional[Union[torch.device, str, int]] = None, dtype: Optional[torch.dtype] = None): + for arg in args: + if isinstance(arg, torch.device): + device = arg + elif isinstance(arg, (int, str)): + device = torch.device(arg) + elif isinstance(arg, torch.dtype): + dtype = arg + + if device is not None and device != self.device: + raise ValueError( + "Cannot change the device of a pipeline part without changing the device of the parent pipeline. " + "Please use the `to` method of the parent pipeline to change the device." + ) + + if dtype is not None and dtype != self.dtype: + raise NotImplementedError( + f"Cannot change the dtype of the pipeline from {self.dtype} to {dtype}. " + f"Please export the pipeline with the desired dtype." + ) + + def prepare_onnx_inputs(self, use_torch: bool, **inputs: Union[torch.Tensor, np.ndarray]) -> Dict[str, np.ndarray]: + onnx_inputs = {} + + # converts pytorch inputs into numpy inputs for onnx + for input_name in self.input_names.keys(): + onnx_inputs[input_name] = inputs.pop(input_name) + + if use_torch: + onnx_inputs[input_name] = onnx_inputs[input_name].numpy(force=True) + + if onnx_inputs[input_name].dtype != self.input_dtypes[input_name]: + onnx_inputs[input_name] = onnx_inputs[input_name].astype( + TypeHelper.ort_type_to_numpy_type(self.input_dtypes[input_name]) + ) + + return onnx_inputs + + def prepare_onnx_outputs( + self, use_torch: bool, *onnx_outputs: np.ndarray + ) -> Dict[str, Union[torch.Tensor, np.ndarray]]: + model_outputs = {} + + # converts onnxruntime outputs into tensor for standard outputs + for output_name, idx in self.output_names.items(): + model_outputs[output_name] = onnx_outputs[idx] + + if use_torch: + model_outputs[output_name] = torch.from_numpy(model_outputs[output_name]).to(self.device) + + return model_outputs @abstractmethod def forward(self, *args, **kwargs): - pass + raise NotImplementedError def __call__(self, *args, **kwargs): return self.forward(*args, **kwargs) -class ORTModelTextEncoder(_ORTDiffusionModelPart): - def forward(self, input_ids: np.ndarray): - onnx_inputs = { - "input_ids": input_ids, - } - outputs = self.session.run(None, onnx_inputs) - return outputs +class ORTModelUnet(ORTPipelinePart): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + # can be missing from models exported long ago + if not hasattr(self.config, "time_cond_proj_dim"): + logger.warning( + "The `time_cond_proj_dim` attribute is missing from the UNet configuration. " + "Please re-export the model with newer version of optimum and diffusers." + ) + self.register_to_config(time_cond_proj_dim=None) -class ORTModelUnet(_ORTDiffusionModelPart): - def __init__(self, session: ort.InferenceSession, parent_model: ORTModel): - super().__init__(session, parent_model) + if len(self.input_shapes["timestep"]) > 0: + logger.warning( + "The exported unet onnx model expects a non scalar timestep input. " + "We will have to unsqueeze the timestep input at each iteration which might be inefficient. " + "Please re-export the pipeline with newer version of optimum and diffusers to avoid this warning." + ) def forward( self, - sample: np.ndarray, - timestep: np.ndarray, - encoder_hidden_states: np.ndarray, - text_embeds: Optional[np.ndarray] = None, - time_ids: Optional[np.ndarray] = None, - timestep_cond: Optional[np.ndarray] = None, + sample: Union[np.ndarray, torch.Tensor], + timestep: Union[np.ndarray, torch.Tensor], + encoder_hidden_states: Union[np.ndarray, torch.Tensor], + timestep_cond: Optional[Union[np.ndarray, torch.Tensor]] = None, + cross_attention_kwargs: Optional[Dict[str, Any]] = None, + added_cond_kwargs: Optional[Dict[str, Any]] = None, + return_dict: bool = False, ): - onnx_inputs = { + use_torch = isinstance(sample, torch.Tensor) + + if len(self.input_shapes["timestep"]) > 0: + timestep = timestep.unsqueeze(0) + + model_inputs = { "sample": sample, "timestep": timestep, "encoder_hidden_states": encoder_hidden_states, + "timestep_cond": timestep_cond, + **(cross_attention_kwargs or {}), + **(added_cond_kwargs or {}), } - if text_embeds is not None: - onnx_inputs["text_embeds"] = text_embeds - if time_ids is not None: - onnx_inputs["time_ids"] = time_ids - if timestep_cond is not None: - onnx_inputs["timestep_cond"] = timestep_cond - outputs = self.session.run(None, onnx_inputs) - return outputs + onnx_inputs = self.prepare_onnx_inputs(use_torch, **model_inputs) + onnx_outputs = self.session.run(None, onnx_inputs) + model_outputs = self.prepare_onnx_outputs(use_torch, *onnx_outputs) + if return_dict: + return model_outputs -class ORTModelVaeDecoder(_ORTDiffusionModelPart): - def forward(self, latent_sample: np.ndarray): - onnx_inputs = { - "latent_sample": latent_sample, - } - outputs = self.session.run(None, onnx_inputs) - return outputs + return ModelOutput(**model_outputs) -class ORTModelVaeEncoder(_ORTDiffusionModelPart): - def forward(self, sample: np.ndarray): - onnx_inputs = { - "sample": sample, +class ORTModelTransformer(ORTPipelinePart): + def forward( + self, + hidden_states: Union[np.ndarray, torch.Tensor], + encoder_hidden_states: Union[np.ndarray, torch.Tensor], + pooled_projections: Union[np.ndarray, torch.Tensor], + timestep: Union[np.ndarray, torch.Tensor], + guidance: Optional[Union[np.ndarray, torch.Tensor]] = None, + txt_ids: Optional[Union[np.ndarray, torch.Tensor]] = None, + img_ids: Optional[Union[np.ndarray, torch.Tensor]] = None, + joint_attention_kwargs: Optional[Dict[str, Any]] = None, + return_dict: bool = False, + ): + use_torch = isinstance(hidden_states, torch.Tensor) + + model_inputs = { + "hidden_states": hidden_states, + "encoder_hidden_states": encoder_hidden_states, + "pooled_projections": pooled_projections, + "timestep": timestep, + "guidance": guidance, + "txt_ids": txt_ids, + "img_ids": img_ids, + **(joint_attention_kwargs or {}), } - outputs = self.session.run(None, onnx_inputs) - return outputs + + onnx_inputs = self.prepare_onnx_inputs(use_torch, **model_inputs) + onnx_outputs = self.session.run(None, onnx_inputs) + model_outputs = self.prepare_onnx_outputs(use_torch, *onnx_outputs) + + if return_dict: + return model_outputs + + return ModelOutput(**model_outputs) + + +class ORTModelTextEncoder(ORTPipelinePart): + def forward( + self, + input_ids: Union[np.ndarray, torch.Tensor], + attention_mask: Optional[Union[np.ndarray, torch.Tensor]] = None, + output_hidden_states: Optional[bool] = None, + return_dict: bool = False, + ): + use_torch = isinstance(input_ids, torch.Tensor) + + model_inputs = {"input_ids": input_ids} + + onnx_inputs = self.prepare_onnx_inputs(use_torch, **model_inputs) + onnx_outputs = self.session.run(None, onnx_inputs) + model_outputs = self.prepare_onnx_outputs(use_torch, *onnx_outputs) + + if output_hidden_states: + model_outputs["hidden_states"] = [] + num_layers = self.num_hidden_layers if hasattr(self, "num_hidden_layers") else self.num_decoder_layers + for i in range(num_layers): + model_outputs["hidden_states"].append(model_outputs.pop(f"hidden_states.{i}")) + model_outputs["hidden_states"].append(model_outputs.get("last_hidden_state")) + else: + num_layers = self.num_hidden_layers if hasattr(self, "num_hidden_layers") else self.num_decoder_layers + for i in range(num_layers): + model_outputs.pop(f"hidden_states.{i}", None) + + if return_dict: + return model_outputs + + return ModelOutput(**model_outputs) + + +class ORTModelVaeEncoder(ORTPipelinePart): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + + # can be missing from models exported long ago + if not hasattr(self.config, "scaling_factor"): + logger.warning( + "The `scaling_factor` attribute is missing from the VAE encoder configuration. " + "Please re-export the model with newer version of optimum and diffusers to avoid this warning." + ) + self.register_to_config(scaling_factor=2 ** (len(self.config.block_out_channels) - 1)) + + def forward( + self, + sample: Union[np.ndarray, torch.Tensor], + generator: Optional[torch.Generator] = None, + return_dict: bool = False, + ): + use_torch = isinstance(sample, torch.Tensor) + + model_inputs = {"sample": sample} + + onnx_inputs = self.prepare_onnx_inputs(use_torch, **model_inputs) + onnx_outputs = self.session.run(None, onnx_inputs) + model_outputs = self.prepare_onnx_outputs(use_torch, *onnx_outputs) + + if "latent_sample" in model_outputs: + model_outputs["latents"] = model_outputs.pop("latent_sample") + + if "latent_parameters" in model_outputs: + model_outputs["latent_dist"] = DiagonalGaussianDistribution( + parameters=model_outputs.pop("latent_parameters") + ) + + if return_dict: + return model_outputs + + return ModelOutput(**model_outputs) + + +class ORTModelVaeDecoder(ORTPipelinePart): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + + # can be missing from models exported long ago + if not hasattr(self.config, "scaling_factor"): + logger.warning( + "The `scaling_factor` attribute is missing from the VAE decoder configuration. " + "Please re-export the model with newer version of optimum and diffusers to avoid this warning." + ) + self.register_to_config(scaling_factor=2 ** (len(self.config.block_out_channels) - 1)) + + def forward( + self, + latent_sample: Union[np.ndarray, torch.Tensor], + generator: Optional[torch.Generator] = None, + return_dict: bool = False, + ): + use_torch = isinstance(latent_sample, torch.Tensor) + + model_inputs = {"latent_sample": latent_sample} + + onnx_inputs = self.prepare_onnx_inputs(use_torch, **model_inputs) + onnx_outputs = self.session.run(None, onnx_inputs) + model_outputs = self.prepare_onnx_outputs(use_torch, *onnx_outputs) + + if "latent_sample" in model_outputs: + model_outputs["latents"] = model_outputs.pop("latent_sample") + + if return_dict: + return model_outputs + + return ModelOutput(**model_outputs) + + +class ORTWrapperVae(ORTPipelinePart): + def __init__(self, encoder: ORTModelVaeEncoder, decoder: ORTModelVaeDecoder): + self.decoder = decoder + self.encoder = encoder + + @property + def config(self): + return self.decoder.config + + @property + def dtype(self): + return self.decoder.dtype + + @property + def device(self): + return self.decoder.device + + def decode(self, *args, **kwargs): + return self.decoder(*args, **kwargs) + + def encode(self, *args, **kwargs): + return self.encoder(*args, **kwargs) + + def to(self, *args, **kwargs): + self.decoder.to(*args, **kwargs) + if self.encoder is not None: + self.encoder.to(*args, **kwargs) @add_end_docstrings(ONNX_MODEL_END_DOCSTRING) -class ORTStableDiffusionPipeline(ORTStableDiffusionPipelineBase, StableDiffusionPipelineMixin): +class ORTStableDiffusionPipeline(ORTDiffusionPipeline, StableDiffusionPipeline): """ ONNX Runtime-powered stable diffusion pipeline corresponding to [diffusers.StableDiffusionPipeline](https://huggingface.co/docs/diffusers/api/pipelines/stable_diffusion/text2img#diffusers.StableDiffusionPipeline). """ - __call__ = StableDiffusionPipelineMixin.__call__ + main_input_name = "prompt" + export_feature = "text-to-image" + auto_model_class = StableDiffusionPipeline @add_end_docstrings(ONNX_MODEL_END_DOCSTRING) -class ORTStableDiffusionImg2ImgPipeline(ORTStableDiffusionPipelineBase, StableDiffusionImg2ImgPipelineMixin): +class ORTStableDiffusionImg2ImgPipeline(ORTDiffusionPipeline, StableDiffusionImg2ImgPipeline): """ ONNX Runtime-powered stable diffusion pipeline corresponding to [diffusers.StableDiffusionImg2ImgPipeline](https://huggingface.co/docs/diffusers/api/pipelines/stable_diffusion/img2img#diffusers.StableDiffusionImg2ImgPipeline). """ - __call__ = StableDiffusionImg2ImgPipelineMixin.__call__ + main_input_name = "image" + export_feature = "image-to-image" + auto_model_class = StableDiffusionImg2ImgPipeline @add_end_docstrings(ONNX_MODEL_END_DOCSTRING) -class ORTStableDiffusionInpaintPipeline(ORTStableDiffusionPipelineBase, StableDiffusionInpaintPipelineMixin): +class ORTStableDiffusionInpaintPipeline(ORTDiffusionPipeline, StableDiffusionInpaintPipeline): """ ONNX Runtime-powered stable diffusion pipeline corresponding to [diffusers.StableDiffusionInpaintPipeline](https://huggingface.co/docs/diffusers/api/pipelines/stable_diffusion/inpaint#diffusers.StableDiffusionInpaintPipeline). """ - __call__ = StableDiffusionInpaintPipelineMixin.__call__ + main_input_name = "prompt" + export_feature = "inpainting" + auto_model_class = StableDiffusionInpaintPipeline @add_end_docstrings(ONNX_MODEL_END_DOCSTRING) -class ORTLatentConsistencyModelPipeline(ORTStableDiffusionPipelineBase, LatentConsistencyPipelineMixin): +class ORTStableDiffusionXLPipeline(ORTDiffusionPipeline, StableDiffusionXLPipeline): """ - ONNX Runtime-powered stable diffusion pipeline corresponding to [diffusers.LatentConsistencyModelPipeline](https://huggingface.co/docs/diffusers/api/pipelines/stable_diffusion/latent_consistency#diffusers.LatentConsistencyModelPipeline). + ONNX Runtime-powered stable diffusion pipeline corresponding to [diffusers.StableDiffusionXLPipeline](https://huggingface.co/docs/diffusers/api/pipelines/stable_diffusion/stable_diffusion_xl#diffusers.StableDiffusionXLPipeline). """ - __call__ = LatentConsistencyPipelineMixin.__call__ + main_input_name = "prompt" + export_feature = "text-to-image" + auto_model_class = StableDiffusionXLPipeline + + def _get_add_time_ids( + self, + original_size, + crops_coords_top_left, + target_size, + dtype, + text_encoder_projection_dim=None, + ): + add_time_ids = list(original_size + crops_coords_top_left + target_size) + + add_time_ids = torch.tensor([add_time_ids], dtype=dtype) + return add_time_ids -class ORTStableDiffusionXLPipelineBase(ORTStableDiffusionPipelineBase): +@add_end_docstrings(ONNX_MODEL_END_DOCSTRING) +class ORTStableDiffusionXLImg2ImgPipeline(ORTDiffusionPipeline, StableDiffusionXLImg2ImgPipeline): + """ + ONNX Runtime-powered stable diffusion pipeline corresponding to [diffusers.StableDiffusionXLImg2ImgPipeline](https://huggingface.co/docs/diffusers/api/pipelines/stable_diffusion/stable_diffusion_xl#diffusers.StableDiffusionXLImg2ImgPipeline). + """ + + main_input_name = "prompt" + export_feature = "image-to-image" auto_model_class = StableDiffusionXLImg2ImgPipeline - def __init__( + def _get_add_time_ids( self, - vae_decoder_session: ort.InferenceSession, - text_encoder_session: ort.InferenceSession, - unet_session: ort.InferenceSession, - config: Dict[str, Any], - tokenizer: CLIPTokenizer, - scheduler: Union[DDIMScheduler, PNDMScheduler, LMSDiscreteScheduler], - feature_extractor: Optional[CLIPFeatureExtractor] = None, - vae_encoder_session: Optional[ort.InferenceSession] = None, - text_encoder_2_session: Optional[ort.InferenceSession] = None, - tokenizer_2: Optional[CLIPTokenizer] = None, - use_io_binding: Optional[bool] = None, - model_save_dir: Optional[Union[str, Path, TemporaryDirectory]] = None, - add_watermarker: Optional[bool] = None, + original_size, + crops_coords_top_left, + target_size, + aesthetic_score, + negative_aesthetic_score, + negative_original_size, + negative_crops_coords_top_left, + negative_target_size, + dtype, + text_encoder_projection_dim=None, ): - super().__init__( - vae_decoder_session=vae_decoder_session, - text_encoder_session=text_encoder_session, - unet_session=unet_session, - config=config, - tokenizer=tokenizer, - scheduler=scheduler, - feature_extractor=feature_extractor, - vae_encoder_session=vae_encoder_session, - text_encoder_2_session=text_encoder_2_session, - tokenizer_2=tokenizer_2, - use_io_binding=use_io_binding, - model_save_dir=model_save_dir, - ) + if self.config.requires_aesthetics_score: + add_time_ids = list(original_size + crops_coords_top_left + (aesthetic_score,)) + add_neg_time_ids = list( + negative_original_size + negative_crops_coords_top_left + (negative_aesthetic_score,) + ) + else: + add_time_ids = list(original_size + crops_coords_top_left + target_size) + add_neg_time_ids = list(negative_original_size + crops_coords_top_left + negative_target_size) - add_watermarker = add_watermarker if add_watermarker is not None else is_invisible_watermark_available() + add_time_ids = torch.tensor([add_time_ids], dtype=dtype) + add_neg_time_ids = torch.tensor([add_neg_time_ids], dtype=dtype) + + return add_time_ids, add_neg_time_ids - if add_watermarker: - if not is_invisible_watermark_available(): - raise ImportError( - "`add_watermarker` requires invisible-watermark to be installed, which can be installed with `pip install invisible-watermark`." - ) - from ..pipelines.diffusers.watermark import StableDiffusionXLWatermarker +@add_end_docstrings(ONNX_MODEL_END_DOCSTRING) +class ORTStableDiffusionXLInpaintPipeline(ORTDiffusionPipeline, StableDiffusionXLInpaintPipeline): + """ + ONNX Runtime-powered stable diffusion pipeline corresponding to [diffusers.StableDiffusionXLInpaintPipeline](https://huggingface.co/docs/diffusers/api/pipelines/stable_diffusion/stable_diffusion_xl#diffusers.StableDiffusionXLInpaintPipeline). + """ - self.watermark = StableDiffusionXLWatermarker() + main_input_name = "image" + export_feature = "inpainting" + auto_model_class = StableDiffusionXLInpaintPipeline + + def _get_add_time_ids( + self, + original_size, + crops_coords_top_left, + target_size, + aesthetic_score, + negative_aesthetic_score, + negative_original_size, + negative_crops_coords_top_left, + negative_target_size, + dtype, + text_encoder_projection_dim=None, + ): + if self.config.requires_aesthetics_score: + add_time_ids = list(original_size + crops_coords_top_left + (aesthetic_score,)) + add_neg_time_ids = list( + negative_original_size + negative_crops_coords_top_left + (negative_aesthetic_score,) + ) else: - self.watermark = None + add_time_ids = list(original_size + crops_coords_top_left + target_size) + add_neg_time_ids = list(negative_original_size + crops_coords_top_left + negative_target_size) + + add_time_ids = torch.tensor([add_time_ids], dtype=dtype) + add_neg_time_ids = torch.tensor([add_neg_time_ids], dtype=dtype) + + return add_time_ids, add_neg_time_ids @add_end_docstrings(ONNX_MODEL_END_DOCSTRING) -class ORTStableDiffusionXLPipeline(ORTStableDiffusionXLPipelineBase, StableDiffusionXLPipelineMixin): +class ORTLatentConsistencyModelPipeline(ORTDiffusionPipeline, LatentConsistencyModelPipeline): """ - ONNX Runtime-powered stable diffusion pipeline corresponding to [diffusers.StableDiffusionXLPipeline](https://huggingface.co/docs/diffusers/api/pipelines/stable_diffusion/stable_diffusion_xl#diffusers.StableDiffusionXLPipeline). + ONNX Runtime-powered stable diffusion pipeline corresponding to [diffusers.LatentConsistencyModelPipeline](https://huggingface.co/docs/diffusers/api/pipelines/stable_diffusion/latent_consistency#diffusers.LatentConsistencyModelPipeline). """ - __call__ = StableDiffusionXLPipelineMixin.__call__ + main_input_name = "prompt" + export_feature = "text-to-image" + auto_model_class = LatentConsistencyModelPipeline @add_end_docstrings(ONNX_MODEL_END_DOCSTRING) -class ORTStableDiffusionXLImg2ImgPipeline(ORTStableDiffusionXLPipelineBase, StableDiffusionXLImg2ImgPipelineMixin): +class ORTLatentConsistencyModelImg2ImgPipeline(ORTDiffusionPipeline, LatentConsistencyModelImg2ImgPipeline): """ - ONNX Runtime-powered stable diffusion pipeline corresponding to [diffusers.StableDiffusionXLImg2ImgPipeline](https://huggingface.co/docs/diffusers/api/pipelines/stable_diffusion/stable_diffusion_xl#diffusers.StableDiffusionXLImg2ImgPipeline). + ONNX Runtime-powered stable diffusion pipeline corresponding to [diffusers.LatentConsistencyModelImg2ImgPipeline](https://huggingface.co/docs/diffusers/api/pipelines/stable_diffusion/latent_consistency_img2img#diffusers.LatentConsistencyModelImg2ImgPipeline). """ - __call__ = StableDiffusionXLImg2ImgPipelineMixin.__call__ + main_input_name = "image" + export_feature = "image-to-image" + auto_model_class = LatentConsistencyModelImg2ImgPipeline + + +class ORTUnavailablePipeline: + MIN_VERSION = None + + def __init__(self, *args, **kwargs): + raise NotImplementedError( + f"The pipeline {self.__class__.__name__} is not available in the current version of `diffusers`. " + f"Please upgrade `diffusers` to {self.MIN_VERSION} or later." + ) + + +if check_if_diffusers_greater("0.29.0"): + from diffusers import StableDiffusion3Img2ImgPipeline, StableDiffusion3Pipeline + + @add_end_docstrings(ONNX_MODEL_END_DOCSTRING) + class ORTStableDiffusion3Pipeline(ORTDiffusionPipeline, StableDiffusion3Pipeline): + """ + ONNX Runtime-powered stable diffusion pipeline corresponding to [diffusers.StableDiffusion3Pipeline](https://huggingface.co/docs/diffusers/api/pipelines/stable_diffusion/text2img#diffusers.StableDiffusion3Pipeline). + """ + + main_input_name = "prompt" + export_feature = "text-to-image" + auto_model_class = StableDiffusion3Pipeline + + @add_end_docstrings(ONNX_MODEL_END_DOCSTRING) + class ORTStableDiffusion3Img2ImgPipeline(ORTDiffusionPipeline, StableDiffusion3Img2ImgPipeline): + """ + ONNX Runtime-powered stable diffusion pipeline corresponding to [diffusers.StableDiffusion3Img2ImgPipeline](https://huggingface.co/docs/diffusers/api/pipelines/stable_diffusion/img2img#diffusers.StableDiffusion3Img2ImgPipeline). + """ + + main_input_name = "image" + export_feature = "image-to-image" + auto_model_class = StableDiffusion3Img2ImgPipeline + +else: + + class ORTStableDiffusion3Pipeline(ORTUnavailablePipeline): + MIN_VERSION = "0.29.0" + + class ORTStableDiffusion3Img2ImgPipeline(ORTUnavailablePipeline): + MIN_VERSION = "0.29.0" + + +if check_if_diffusers_greater("0.30.0"): + from diffusers import FluxPipeline, StableDiffusion3InpaintPipeline + + @add_end_docstrings(ONNX_MODEL_END_DOCSTRING) + class ORTStableDiffusion3InpaintPipeline(ORTDiffusionPipeline, StableDiffusion3InpaintPipeline): + """ + ONNX Runtime-powered stable diffusion pipeline corresponding to [diffusers.StableDiffusion3InpaintPipeline](https://huggingface.co/docs/diffusers/api/pipelines/stable_diffusion/inpaint#diffusers.StableDiffusion3InpaintPipeline). + """ + + main_input_name = "prompt" + export_feature = "inpainting" + auto_model_class = StableDiffusion3InpaintPipeline + + @add_end_docstrings(ONNX_MODEL_END_DOCSTRING) + class ORTFluxPipeline(ORTDiffusionPipeline, FluxPipeline): + """ + ONNX Runtime-powered stable diffusion pipeline corresponding to [diffusers.FluxPipeline](https://huggingface.co/docs/diffusers/api/pipelines/flux/text2img#diffusers.FluxPipeline). + """ + + main_input_name = "prompt" + export_feature = "text-to-image" + auto_model_class = FluxPipeline + +else: + + class ORTStableDiffusion3InpaintPipeline(ORTUnavailablePipeline): + MIN_VERSION = "0.30.0" + + class ORTFluxPipeline(ORTUnavailablePipeline): + MIN_VERSION = "0.30.0" + + +SUPPORTED_ORT_PIPELINES = [ + ORTStableDiffusionPipeline, + ORTStableDiffusionImg2ImgPipeline, + ORTStableDiffusionInpaintPipeline, + ORTStableDiffusionXLPipeline, + ORTStableDiffusionXLImg2ImgPipeline, + ORTStableDiffusionXLInpaintPipeline, + ORTLatentConsistencyModelPipeline, + ORTLatentConsistencyModelImg2ImgPipeline, + ORTStableDiffusion3Pipeline, + ORTStableDiffusion3Img2ImgPipeline, + ORTStableDiffusion3InpaintPipeline, + ORTFluxPipeline, +] + + +def _get_ort_class(pipeline_class_name: str, throw_error_if_not_exist: bool = True): + for ort_pipeline_class in SUPPORTED_ORT_PIPELINES: + if ( + ort_pipeline_class.__name__ == pipeline_class_name + or ort_pipeline_class.auto_model_class.__name__ == pipeline_class_name + ): + return ort_pipeline_class + + if throw_error_if_not_exist: + raise ValueError(f"ORTDiffusionPipeline can't find a pipeline linked to {pipeline_class_name}") + + +ORT_TEXT2IMAGE_PIPELINES_MAPPING = OrderedDict( + [ + ("flux", ORTFluxPipeline), + ("latent-consistency", ORTLatentConsistencyModelPipeline), + ("stable-diffusion", ORTStableDiffusionPipeline), + ("stable-diffusion-3", ORTStableDiffusion3Pipeline), + ("stable-diffusion-xl", ORTStableDiffusionXLPipeline), + ] +) + +ORT_IMAGE2IMAGE_PIPELINES_MAPPING = OrderedDict( + [ + ("latent-consistency", ORTLatentConsistencyModelImg2ImgPipeline), + ("stable-diffusion", ORTStableDiffusionImg2ImgPipeline), + ("stable-diffusion-3", ORTStableDiffusion3Img2ImgPipeline), + ("stable-diffusion-xl", ORTStableDiffusionXLImg2ImgPipeline), + ] +) + +ORT_INPAINT_PIPELINES_MAPPING = OrderedDict( + [ + ("stable-diffusion", ORTStableDiffusionInpaintPipeline), + ("stable-diffusion-3", ORTStableDiffusion3InpaintPipeline), + ("stable-diffusion-xl", ORTStableDiffusionXLInpaintPipeline), + ] +) + +SUPPORTED_ORT_PIPELINES_MAPPINGS = [ + ORT_TEXT2IMAGE_PIPELINES_MAPPING, + ORT_IMAGE2IMAGE_PIPELINES_MAPPING, + ORT_INPAINT_PIPELINES_MAPPING, +] + + +def _get_task_ort_class(mapping, pipeline_class_name): + def _get_model_name(pipeline_class_name): + for ort_pipelines_mapping in SUPPORTED_ORT_PIPELINES_MAPPINGS: + for model_name, ort_pipeline_class in ort_pipelines_mapping.items(): + if ( + ort_pipeline_class.__name__ == pipeline_class_name + or ort_pipeline_class.auto_model_class.__name__ == pipeline_class_name + ): + return model_name + + model_name = _get_model_name(pipeline_class_name) + + if model_name is not None: + task_class = mapping.get(model_name, None) + if task_class is not None: + return task_class + + raise ValueError(f"ORTPipelineForTask can't find a pipeline linked to {pipeline_class_name} for {model_name}") + + +class ORTPipelineForTask(ConfigMixin): + config_name = "model_index.json" + + @classmethod + @validate_hf_hub_args + def from_pretrained(cls, pretrained_model_or_path, **kwargs) -> ORTDiffusionPipeline: + load_config_kwargs = { + "force_download": kwargs.get("force_download", False), + "resume_download": kwargs.get("resume_download", None), + "local_files_only": kwargs.get("local_files_only", False), + "cache_dir": kwargs.get("cache_dir", None), + "revision": kwargs.get("revision", None), + "proxies": kwargs.get("proxies", None), + "token": kwargs.get("token", None), + } + config = cls.load_config(pretrained_model_or_path, **load_config_kwargs) + config = config[0] if isinstance(config, tuple) else config + class_name = config["_class_name"] + + ort_pipeline_class = _get_task_ort_class(cls.ort_pipelines_mapping, class_name) + + return ort_pipeline_class.from_pretrained(pretrained_model_or_path, **kwargs) + + +class ORTPipelineForText2Image(ORTPipelineForTask): + auto_model_class = AutoPipelineForText2Image + ort_pipelines_mapping = ORT_TEXT2IMAGE_PIPELINES_MAPPING + + +class ORTPipelineForImage2Image(ORTPipelineForTask): + auto_model_class = AutoPipelineForImage2Image + ort_pipelines_mapping = ORT_IMAGE2IMAGE_PIPELINES_MAPPING + + +class ORTPipelineForInpainting(ORTPipelineForTask): + auto_model_class = AutoPipelineForInpainting + ort_pipelines_mapping = ORT_INPAINT_PIPELINES_MAPPING diff --git a/optimum/onnxruntime/modeling_ort.py b/optimum/onnxruntime/modeling_ort.py index 254b771e334..8e5a814b689 100644 --- a/optimum/onnxruntime/modeling_ort.py +++ b/optimum/onnxruntime/modeling_ort.py @@ -34,6 +34,7 @@ AutoModelForAudioXVector, AutoModelForCTC, AutoModelForImageClassification, + AutoModelForImageToImage, AutoModelForMaskedLM, AutoModelForMultipleChoice, AutoModelForQuestionAnswering, @@ -47,6 +48,7 @@ BaseModelOutput, CausalLMOutput, ImageClassifierOutput, + ImageSuperResolutionOutput, MaskedLMOutput, ModelOutput, MultipleChoiceModelOutput, @@ -508,13 +510,12 @@ def _from_pretrained( if file_name is None: if model_path.is_dir(): - onnx_files = list(model_path.glob("*.onnx")) + onnx_files = list((model_path / subfolder).glob("*.onnx")) else: repo_files, _ = TasksManager.get_model_files( model_id, revision=revision, cache_dir=cache_dir, token=token ) repo_files = map(Path, repo_files) - pattern = "*.onnx" if subfolder == "" else f"{subfolder}/*.onnx" onnx_files = [p for p in repo_files if p.match(pattern)] @@ -661,8 +662,6 @@ def _export( force_download=force_download, trust_remote_code=trust_remote_code, ) - - config.save_pretrained(save_dir_path) maybe_save_preprocessors(model_id, save_dir_path, src_subfolder=subfolder) return cls._from_pretrained( @@ -932,13 +931,12 @@ def _prepare_onnx_inputs( self, use_torch: bool, **inputs: Union[torch.Tensor, np.ndarray] ) -> Dict[str, np.ndarray]: onnx_inputs = {} - # converts pytorch inputs into numpy inputs for onnx for input_name in self.input_names.keys(): onnx_inputs[input_name] = inputs.pop(input_name) if use_torch: - onnx_inputs[input_name] = onnx_inputs[input_name].cpu().detach().numpy() + onnx_inputs[input_name] = onnx_inputs[input_name].numpy(force=True) if onnx_inputs[input_name].dtype != self.input_dtypes[input_name]: onnx_inputs[input_name] = onnx_inputs[input_name].astype( @@ -983,10 +981,9 @@ def _cached_file( token = use_auth_token model_path = Path(model_path) - # locates a file in a local folder and repo, downloads and cache it if necessary. if model_path.is_dir(): - model_cache_path = model_path / file_name + model_cache_path = model_path / subfolder / file_name preprocessors = maybe_load_preprocessors(model_path.as_posix()) else: model_cache_path = hf_hub_download( @@ -1088,6 +1085,9 @@ def forward( use_torch = isinstance(input_ids, torch.Tensor) self.raise_on_numpy_input_io_binding(use_torch) + if token_type_ids is None and "token_type_ids" in self.input_names: + token_type_ids = torch.zeros_like(input_ids) if use_torch else np.zeros_like(input_ids) + if self.device.type == "cuda" and self.use_io_binding: io_binding, output_shapes, output_buffers = self.prepare_io_binding( input_ids, @@ -1169,7 +1169,6 @@ def _export( library_name="transformers", ) - config.save_pretrained(save_dir_path) maybe_save_preprocessors(model_id, save_dir_path, src_subfolder=subfolder) return cls._from_pretrained( @@ -1244,6 +1243,9 @@ def forward( use_torch = isinstance(input_ids, torch.Tensor) self.raise_on_numpy_input_io_binding(use_torch) + if token_type_ids is None and "token_type_ids" in self.input_names: + token_type_ids = torch.zeros_like(input_ids) if use_torch else np.zeros_like(input_ids) + if self.device.type == "cuda" and self.use_io_binding: io_binding, output_shapes, output_buffers = self.prepare_io_binding( input_ids, @@ -1333,6 +1335,9 @@ def forward( use_torch = isinstance(input_ids, torch.Tensor) self.raise_on_numpy_input_io_binding(use_torch) + if token_type_ids is None and "token_type_ids" in self.input_names: + token_type_ids = torch.zeros_like(input_ids) if use_torch else np.zeros_like(input_ids) + if self.device.type == "cuda" and self.use_io_binding: io_binding, output_shapes, output_buffers = self.prepare_io_binding( input_ids, @@ -1440,6 +1445,9 @@ def forward( use_torch = isinstance(input_ids, torch.Tensor) self.raise_on_numpy_input_io_binding(use_torch) + if token_type_ids is None and "token_type_ids" in self.input_names: + token_type_ids = torch.zeros_like(input_ids) if use_torch else np.zeros_like(input_ids) + if self.device.type == "cuda" and self.use_io_binding: io_binding, output_shapes, output_buffers = self.prepare_io_binding( input_ids, @@ -1530,6 +1538,9 @@ def forward( use_torch = isinstance(input_ids, torch.Tensor) self.raise_on_numpy_input_io_binding(use_torch) + if token_type_ids is None and "token_type_ids" in self.input_names: + token_type_ids = torch.zeros_like(input_ids) if use_torch else np.zeros_like(input_ids) + if self.device.type == "cuda" and self.use_io_binding: io_binding, output_shapes, output_buffers = self.prepare_io_binding( input_ids, @@ -1613,6 +1624,9 @@ def forward( use_torch = isinstance(input_ids, torch.Tensor) self.raise_on_numpy_input_io_binding(use_torch) + if token_type_ids is None and "token_type_ids" in self.input_names: + token_type_ids = torch.zeros_like(input_ids) if use_torch else np.zeros_like(input_ids) + if self.device.type == "cuda" and self.use_io_binding: io_binding, output_shapes, output_buffers = self.prepare_io_binding( input_ids, @@ -2183,6 +2197,77 @@ def forward( return TokenClassifierOutput(logits=logits) +IMAGE_TO_IMAGE_EXAMPLE = r""" + Example of image-to-image (Super Resolution): + + ```python + >>> from transformers import {processor_class} + >>> from optimum.onnxruntime import {model_class} + >>> from PIL import Image + + >>> image = Image.open("path/to/image.jpg") + + >>> image_processor = {processor_class}.from_pretrained("{checkpoint}") + >>> model = {model_class}.from_pretrained("{checkpoint}") + + >>> inputs = image_processor(images=image, return_tensors="pt") + + >>> with torch.no_grad(): + ... logits = model(**inputs).logits + ``` +""" + + +@add_end_docstrings(ONNX_MODEL_END_DOCSTRING) +class ORTModelForImageToImage(ORTModel): + """ + ONNX Model for image-to-image tasks. This class officially supports pix2pix, cyclegan, wav2vec2, wav2vec2-conformer. + """ + + auto_model_class = AutoModelForImageToImage + + @add_start_docstrings_to_model_forward( + ONNX_IMAGE_INPUTS_DOCSTRING.format("batch_size, num_channels, height, width") + + IMAGE_TO_IMAGE_EXAMPLE.format( + processor_class=_PROCESSOR_FOR_DOC, + model_class="ORTModelForImgageToImage", + checkpoint="caidas/swin2SR-realworld-sr-x4-64-bsrgan-psnr", + ) + ) + def forward( + self, + pixel_values: Union[torch.Tensor, np.ndarray], + **kwargs, + ): + use_torch = isinstance(pixel_values, torch.Tensor) + self.raise_on_numpy_input_io_binding(use_torch) + if self.device.type == "cuda" and self.use_io_binding: + input_shapes = pixel_values.shape + io_binding, output_shapes, output_buffers = self.prepare_io_binding( + pixel_values, + ordered_input_names=self._ordered_input_names, + known_output_shapes={ + "reconstruction": [ + input_shapes[0], + input_shapes[1], + input_shapes[2] * self.config.upscale, + input_shapes[3] * self.config.upscale, + ] + }, + ) + io_binding.synchronize_inputs() + self.model.run_with_iobinding(io_binding) + io_binding.synchronize_outputs() + reconstruction = output_buffers["reconstruction"].view(output_shapes["reconstruction"]) + else: + model_inputs = {"pixel_values": pixel_values} + onnx_inputs = self._prepare_onnx_inputs(use_torch, **model_inputs) + onnx_outputs = self.model.run(None, onnx_inputs) + model_outputs = self._prepare_onnx_outputs(use_torch, *onnx_outputs) + reconstruction = model_outputs["reconstruction"] + return ImageSuperResolutionOutput(reconstruction=reconstruction) + + CUSTOM_TASKS_EXAMPLE = r""" Example of custom tasks(e.g. a sentence transformers taking `pooler_output` as output): diff --git a/optimum/onnxruntime/modeling_seq2seq.py b/optimum/onnxruntime/modeling_seq2seq.py index 4ce3e4707ed..27e0dc01b4c 100644 --- a/optimum/onnxruntime/modeling_seq2seq.py +++ b/optimum/onnxruntime/modeling_seq2seq.py @@ -18,7 +18,6 @@ import logging import shutil -import warnings from abc import ABC, abstractmethod from pathlib import Path from tempfile import TemporaryDirectory @@ -46,7 +45,6 @@ from ..onnx.utils import _get_external_data_paths from ..utils import check_if_transformers_greater from ..utils.file_utils import validate_file_exists -from ..utils.normalized_config import NormalizedConfigManager from ..utils.save_utils import maybe_load_preprocessors, maybe_save_preprocessors from .base import ORTDecoderForSeq2Seq, ORTEncoder from .constants import ( @@ -69,17 +67,7 @@ if check_if_transformers_greater("4.25.0"): from transformers.generation import GenerationMixin else: - from transformers.generation_utils import GenerationMixin - - -# if check_if_transformers_greater("4.37.0"): -# # starting from transformers v4.37.0, the whisper generation loop is implemented in the `WhisperGenerationMixin` -# # and it implements many new features including short and long form generation, and starts with 2 init tokens -# from transformers.models.whisper.generation_whisper import WhisperGenerationMixin -# else: - -# class WhisperGenerationMixin(WhisperForConditionalGeneration, GenerationMixin): -# pass + from transformers.generation_utils import GenerationMixin # type: ignore if check_if_transformers_greater("4.43.0"): @@ -717,6 +705,18 @@ def show_deprecated_argument(arg_name): generation_config = GenerationConfig.from_model_config(config) self.generation_config = generation_config + if check_if_transformers_greater("4.44.99"): + misplaced_generation_parameters = self.config._get_non_default_generation_parameters() + if len(misplaced_generation_parameters) > 0: + logger.warning( + "Moving the following attributes in the config to the generation config: " + f"{misplaced_generation_parameters}. You are seeing this warning because you've set " + "generation parameters in the model config, as opposed to in the generation config.", + ) + for param_name, param_value in misplaced_generation_parameters.items(): + setattr(self.generation_config, param_name, param_value) + setattr(self.config, param_name, None) + @abstractmethod def _initialize_encoder(self, session: ort.InferenceSession) -> ORTEncoder: pass @@ -791,7 +791,6 @@ def _from_pretrained( cls, model_id: Union[str, Path], config: "PretrainedConfig", - use_auth_token: Optional[Union[bool, str]] = None, token: Optional[Union[bool, str]] = None, revision: Optional[str] = None, force_download: bool = False, @@ -810,15 +809,7 @@ def _from_pretrained( model_save_dir: Optional[Union[str, Path, TemporaryDirectory]] = None, **kwargs, ): - if use_auth_token is not None: - warnings.warn( - "The `use_auth_token` argument is deprecated and will be removed soon. Please use the `token` argument instead.", - FutureWarning, - ) - if token is not None: - raise ValueError("You cannot use both `use_auth_token` and `token` arguments at the same time.") - token = use_auth_token - + generation_config = kwargs.pop("generation_config", None) model_path = Path(model_id) # We do not implement the logic for use_cache=False, use_merged=True @@ -1007,19 +998,21 @@ def _from_pretrained( if model_save_dir is None: model_save_dir = new_model_save_dir - generation_config = None - try: - generation_config = GenerationConfig.from_pretrained( - model_id, - cache_dir=cache_dir, - force_download=force_download, - local_files_only=local_files_only, - token=token, - revision=revision, - subfolder=subfolder, - ) - except OSError: - logger.info("Generation config file not found, using a generation config created from the model config.") + if generation_config is None: + try: + generation_config = GenerationConfig.from_pretrained( + model_id, + cache_dir=cache_dir, + force_download=force_download, + local_files_only=local_files_only, + token=token, + revision=revision, + subfolder=subfolder, + ) + except OSError: + logger.info( + "Generation config file not found, using a generation config created from the model config." + ) onnx_paths = [encoder_path] if use_merged is False: @@ -1046,7 +1039,6 @@ def _from_transformers( cls, model_id: str, config: "PretrainedConfig", - use_auth_token: Optional[Union[bool, str]] = None, token: Optional[Union[bool, str]] = None, revision: str = "main", force_download: bool = True, @@ -1062,15 +1054,6 @@ def _from_transformers( use_io_binding: Optional[bool] = None, task: Optional[str] = None, ) -> "ORTModelForConditionalGeneration": - if use_auth_token is not None: - warnings.warn( - "The `use_auth_token` argument is deprecated and will be removed soon. Please use the `token` argument instead.", - FutureWarning, - ) - if token is not None: - raise ValueError("You cannot use both `use_auth_token` and `token` arguments at the same time.") - token = use_auth_token - if use_cache is False and use_merged is True: raise ValueError( "The incompatible arguments use_cache=False, use_merged=True were passed to" @@ -1102,8 +1085,6 @@ def _from_transformers( force_download=force_download, trust_remote_code=trust_remote_code, ) - - config.save_pretrained(save_dir_path) maybe_save_preprocessors(model_id, save_dir_path, src_subfolder=subfolder) return cls._from_pretrained( @@ -1165,49 +1146,6 @@ class ORTModelForSeq2SeqLM(ORTModelForConditionalGeneration, GenerationMixin): auto_model_class = AutoModelForSeq2SeqLM main_input_name = "input_ids" - def __init__( - self, - encoder_session: ort.InferenceSession, - decoder_session: ort.InferenceSession, - config: "PretrainedConfig", - onnx_paths: List[str], - decoder_with_past_session: Optional[ort.InferenceSession] = None, - use_cache: bool = True, - use_io_binding: Optional[bool] = None, - model_save_dir: Optional[Union[str, Path, TemporaryDirectory]] = None, - preprocessors: Optional[List] = None, - generation_config: Optional[GenerationConfig] = None, - **kwargs, - ): - super().__init__( - encoder_session, - decoder_session, - config, - onnx_paths, - decoder_with_past_session, - use_cache, - use_io_binding, - model_save_dir, - preprocessors, - generation_config, - **kwargs, - ) - - # The normalized_config initialization in ORTModelPart is unfortunately wrong as the top level config is initialized. - if config.model_type == "encoder-decoder": - self.encoder.normalized_config = NormalizedConfigManager.get_normalized_config_class( - config.encoder.model_type - )(config.encoder) - - self.decoder.normalized_config = NormalizedConfigManager.get_normalized_config_class( - config.decoder.model_type - )(config.decoder) - - if self.decoder_with_past is not None: - self.decoder_with_past.normalized_config = NormalizedConfigManager.get_normalized_config_class( - config.decoder.model_type - )(config.decoder) - def _initialize_encoder(self, session: ort.InferenceSession) -> ORTEncoder: return ORTEncoder(session, self) @@ -1521,20 +1459,6 @@ def __init__( **kwargs, ) - # The normalized_config initialization in ORTModelPart is unfortunately wrong as the top level config is initialized. - self.encoder.normalized_config = NormalizedConfigManager.get_normalized_config_class( - config.encoder.model_type - )(config.encoder) - - self.decoder.normalized_config = NormalizedConfigManager.get_normalized_config_class( - config.decoder.model_type - )(config.decoder) - - if self.decoder_with_past is not None: - self.decoder_with_past.normalized_config = NormalizedConfigManager.get_normalized_config_class( - config.decoder.model_type - )(config.decoder) - def _initialize_encoder(self, session: ort.InferenceSession) -> ORTEncoder: return ORTEncoderForVisionEncoderDecoder(session, self) diff --git a/optimum/onnxruntime/optimization.py b/optimum/onnxruntime/optimization.py index 9e62a3f324c..fd6958bba7d 100644 --- a/optimum/onnxruntime/optimization.py +++ b/optimum/onnxruntime/optimization.py @@ -20,6 +20,7 @@ import onnx from onnx import load_model +from transformers import GenerationConfig from transformers.models.auto.configuration_auto import AutoConfig from onnxruntime.transformers.onnx_model_bert import BertOnnxModel @@ -152,10 +153,6 @@ def optimize( save_dir = Path(save_dir) save_dir.mkdir(parents=True, exist_ok=True) ORTConfigManager.check_optimization_supported_model(self.model_type, optimization_config) - - self.config.save_pretrained(save_dir) - maybe_save_preprocessors(self.onnx_model_path[0].parent, save_dir) - model_type = ORTConfigManager.get_model_ort_type(self.config.model_type) optimization_options = optimization_config.create_fusion_options(model_type) @@ -236,6 +233,13 @@ def optimize( # Save the model configuration self.config.save_pretrained(save_dir) ort_config.save_pretrained(save_dir) + maybe_save_preprocessors(self.onnx_model_path[0].parent, save_dir) + + try: + generation_config = GenerationConfig.from_pretrained(self.onnx_model_path[0].parent) + generation_config.save_pretrained(save_dir) + except Exception: + pass logger.info( f"Optimized model saved at: {save_dir} (external data format: " diff --git a/optimum/onnxruntime/quantization.py b/optimum/onnxruntime/quantization.py index 056123f8d8e..054a2310a6b 100644 --- a/optimum/onnxruntime/quantization.py +++ b/optimum/onnxruntime/quantization.py @@ -21,7 +21,6 @@ from typing import TYPE_CHECKING, Callable, Dict, List, Optional, Tuple, Union import onnx -from datasets import Dataset, load_dataset from packaging.version import Version, parse from transformers import AutoConfig @@ -29,6 +28,7 @@ from onnxruntime.quantization import CalibrationDataReader, QuantFormat, QuantizationMode, QuantType from onnxruntime.quantization.onnx_quantizer import ONNXQuantizer from onnxruntime.quantization.qdq_quantizer import QDQQuantizer +from optimum.utils.import_utils import requires_backends from ..quantization_base import OptimumQuantizer from ..utils.save_utils import maybe_save_preprocessors @@ -40,6 +40,7 @@ if TYPE_CHECKING: + from datasets import Dataset from transformers import PretrainedConfig LOGGER = logging.getLogger(__name__) @@ -48,7 +49,7 @@ class ORTCalibrationDataReader(CalibrationDataReader): __slots__ = ["batch_size", "dataset", "_dataset_iter"] - def __init__(self, dataset: Dataset, batch_size: int = 1): + def __init__(self, dataset: "Dataset", batch_size: int = 1): if dataset is None: raise ValueError("Provided dataset is None.") @@ -100,7 +101,7 @@ def __init__(self, onnx_model_path: Path, config: Optional["PretrainedConfig"] = if self.config is None: try: self.config = AutoConfig.from_pretrained(self.onnx_model_path.parent) - except OSError: + except (OSError, ValueError): LOGGER.warning( f"Could not load the config for {self.onnx_model_path} automatically, this might make " "the quantized model harder to use because it will not be able to be loaded by an ORTModel without " @@ -134,6 +135,7 @@ def from_pretrained( model_or_path = Path(model_or_path) path = None + config = None if isinstance(model_or_path, ORTModelForConditionalGeneration): raise NotImplementedError(ort_quantizer_error_message) elif isinstance(model_or_path, Path) and file_name is None: @@ -147,17 +149,17 @@ def from_pretrained( file_name = onnx_files[0].name if isinstance(model_or_path, ORTModel): - if path is None: - path = Path(model_or_path.model._model_path) + path = Path(model_or_path.model._model_path) + config = model_or_path.config elif os.path.isdir(model_or_path): path = Path(model_or_path) / file_name else: raise ValueError(f"Unable to load model from {model_or_path}.") - return cls(path) + return cls(path, config=config) def fit( self, - dataset: Dataset, + dataset: "Dataset", calibration_config: CalibrationConfig, onnx_augmented_model_name: Union[str, Path] = "augmented_model.onnx", operators_to_quantize: Optional[List[str]] = None, @@ -211,7 +213,7 @@ def fit( def partial_fit( self, - dataset: Dataset, + dataset: "Dataset", calibration_config: CalibrationConfig, onnx_augmented_model_name: Union[str, Path] = "augmented_model.onnx", operators_to_quantize: Optional[List[str]] = None, @@ -427,7 +429,7 @@ def get_calibration_dataset( seed: int = 2016, use_auth_token: Optional[Union[bool, str]] = None, token: Optional[Union[bool, str]] = None, - ) -> Dataset: + ) -> "Dataset": """ Creates the calibration `datasets.Dataset` to use for the post-training static quantization calibration step. @@ -473,6 +475,10 @@ def get_calibration_dataset( "provided." ) + requires_backends(self, ["datasets"]) + + from datasets import load_dataset + calib_dataset = load_dataset( dataset_name, name=dataset_config_name, @@ -491,7 +497,7 @@ def get_calibration_dataset( return self.clean_calibration_dataset(processed_calib_dataset) - def clean_calibration_dataset(self, dataset: Dataset) -> Dataset: + def clean_calibration_dataset(self, dataset: "Dataset") -> "Dataset": model = onnx.load(self.onnx_model_path) model_inputs = {input.name for input in model.graph.input} ignored_columns = list(set(dataset.column_names) - model_inputs) diff --git a/optimum/onnxruntime/runs/calibrator.py b/optimum/onnxruntime/runs/calibrator.py index c493a943747..bfdcd64d92e 100644 --- a/optimum/onnxruntime/runs/calibrator.py +++ b/optimum/onnxruntime/runs/calibrator.py @@ -1,6 +1,4 @@ -from typing import Dict, List - -from datasets import Dataset +from typing import TYPE_CHECKING, Dict, List from ...runs_base import Calibrator from .. import ORTQuantizer @@ -9,10 +7,14 @@ from ..preprocessors.passes import ExcludeGeLUNodes, ExcludeLayerNormNodes, ExcludeNodeAfter, ExcludeNodeFollowedBy +if TYPE_CHECKING: + from datasets import Dataset + + class OnnxRuntimeCalibrator(Calibrator): def __init__( self, - calibration_dataset: Dataset, + calibration_dataset: "Dataset", quantizer: ORTQuantizer, model_path: str, qconfig: QuantizationConfig, diff --git a/optimum/onnxruntime/trainer.py b/optimum/onnxruntime/trainer.py index 86c333adb3f..66273cbcf96 100644 --- a/optimum/onnxruntime/trainer.py +++ b/optimum/onnxruntime/trainer.py @@ -103,14 +103,14 @@ from transformers.deepspeed import deepspeed_init, deepspeed_load_checkpoint, is_deepspeed_zero3_enabled if check_if_transformers_greater("4.39"): - from transformers.utils import is_torch_xla_available + from transformers.utils import is_torch_xla_available as is_torch_tpu_xla_available - if is_torch_xla_available(): + if is_torch_tpu_xla_available(): import torch_xla.core.xla_model as xm else: - from transformers.utils import is_torch_tpu_available + from transformers.utils import is_torch_tpu_available as is_torch_tpu_xla_available - if is_torch_tpu_available(check_device=False): + if is_torch_tpu_xla_available(check_device=False): import torch_xla.core.xla_model as xm if TYPE_CHECKING: @@ -735,7 +735,7 @@ def get_dataloader_sampler(dataloader): if ( args.logging_nan_inf_filter - and not is_torch_tpu_available() + and not is_torch_tpu_xla_available() and (torch.isnan(tr_loss_step) or torch.isinf(tr_loss_step)) ): # if loss is nan or inf simply add the average of previous logged losses diff --git a/optimum/onnxruntime/utils.py b/optimum/onnxruntime/utils.py index ad40af92b9d..9e92e0bd325 100644 --- a/optimum/onnxruntime/utils.py +++ b/optimum/onnxruntime/utils.py @@ -13,6 +13,7 @@ # limitations under the License. """Utility functions, classes and constants for ONNX Runtime.""" +import importlib import os import re from enum import Enum @@ -31,7 +32,6 @@ import onnxruntime as ort from ..exporters.onnx import OnnxConfig, OnnxConfigWithLoss -from ..utils.import_utils import _is_package_available if TYPE_CHECKING: @@ -91,9 +91,11 @@ def is_onnxruntime_training_available(): def is_cupy_available(): """ - Checks if onnxruntime-training is available. + Checks if CuPy is available. """ - return _is_package_available("cupy") + # Don't use _is_package_available as it doesn't work with CuPy installed + # with `cupy-cuda*` and `cupy-rocm-*` package name (prebuilt wheels). + return importlib.util.find_spec("cupy") is not None class ORTConfigManager: @@ -126,6 +128,7 @@ class ORTConfigManager: "gpt-neo": "gpt2", "gpt-neox": "gpt2", "gptj": "gpt2", + "granite": "gpt2", # longt5 with O4 results in segmentation fault "longt5": "bert", "llama": "gpt2", @@ -401,3 +404,18 @@ def evaluation_loop( metrics = {} return EvalLoopOutput(predictions=all_preds, label_ids=all_labels, metrics=metrics, num_samples=len(dataset)) + + +def np_to_pt_generators(np_object, device): + if isinstance(np_object, np.random.RandomState): + return torch.Generator(device=device).manual_seed(int(np_object.get_state()[1][0])) + elif isinstance(np_object, np.random.Generator): + return torch.Generator(device=device).manual_seed(int(np_object.bit_generator.state[1][0])) + elif isinstance(np_object, list) and isinstance(np_object[0], (np.random.RandomState, np.random.Generator)): + return [np_to_pt_generators(a, device) for a in np_object] + elif isinstance(np_object, dict) and isinstance( + next(iter(np_object.values())), (np.random.RandomState, np.random.Generator) + ): + return {k: np_to_pt_generators(v, device) for k, v in np_object.items()} + else: + return np_object diff --git a/optimum/pipelines/diffusers/pipeline_latent_consistency.py b/optimum/pipelines/diffusers/pipeline_latent_consistency.py deleted file mode 100644 index 41c85b5b6ac..00000000000 --- a/optimum/pipelines/diffusers/pipeline_latent_consistency.py +++ /dev/null @@ -1,230 +0,0 @@ -# Copyright 2023 The HuggingFace Team. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import logging -from typing import Callable, List, Optional, Union - -import numpy as np -import torch -from diffusers.pipelines.stable_diffusion import StableDiffusionPipelineOutput - -from .pipeline_stable_diffusion import StableDiffusionPipelineMixin - - -logger = logging.getLogger(__name__) - - -class LatentConsistencyPipelineMixin(StableDiffusionPipelineMixin): - # Adapted from https://github.com/huggingface/diffusers/blob/v0.22.0/src/diffusers/pipelines/latent_consistency/pipeline_latent_consistency.py#L264 - def __call__( - self, - prompt: Optional[Union[str, List[str]]] = None, - height: Optional[int] = None, - width: Optional[int] = None, - num_inference_steps: int = 4, - original_inference_steps: int = None, - guidance_scale: float = 8.5, - num_images_per_prompt: int = 1, - generator: Optional[np.random.RandomState] = None, - latents: Optional[np.ndarray] = None, - prompt_embeds: Optional[np.ndarray] = None, - output_type: str = "pil", - return_dict: bool = True, - callback: Optional[Callable[[int, int, np.ndarray], None]] = None, - callback_steps: int = 1, - ): - r""" - Function invoked when calling the pipeline for generation. - - Args: - prompt (`Optional[Union[str, List[str]]]`, defaults to None): - The prompt or prompts to guide the image generation. If not defined, one has to pass `prompt_embeds`. - instead. - height (`Optional[int]`, defaults to None): - The height in pixels of the generated image. - width (`Optional[int]`, defaults to None): - The width in pixels of the generated image. - num_inference_steps (`int`, defaults to 50): - The number of denoising steps. More denoising steps usually lead to a higher quality image at the - expense of slower inference. - guidance_scale (`float`, defaults to 7.5): - Guidance scale as defined in [Classifier-Free Diffusion Guidance](https://arxiv.org/abs/2207.12598). - `guidance_scale` is defined as `w` of equation 2. of [Imagen - Paper](https://arxiv.org/pdf/2205.11487.pdf). Guidance scale is enabled by setting `guidance_scale > - 1`. Higher guidance scale encourages to generate images that are closely linked to the text `prompt`, - usually at the expense of lower image quality. - num_images_per_prompt (`int`, defaults to 1): - The number of images to generate per prompt. - generator (`Optional[np.random.RandomState]`, defaults to `None`):: - A np.random.RandomState to make generation deterministic. - latents (`Optional[np.ndarray]`, defaults to `None`): - Pre-generated noisy latents, sampled from a Gaussian distribution, to be used as inputs for image - generation. Can be used to tweak the same generation with different prompts. If not provided, a latents - tensor will ge generated by sampling using the supplied random `generator`. - prompt_embeds (`Optional[np.ndarray]`, defaults to `None`): - Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not - provided, text embeddings will be generated from `prompt` input argument. - output_type (`str`, defaults to `"pil"`): - The output format of the generate image. Choose between - [PIL](https://pillow.readthedocs.io/en/stable/): `PIL.Image.Image` or `np.array`. - return_dict (`bool`, defaults to `True`): - Whether or not to return a [`~pipelines.stable_diffusion.StableDiffusionPipelineOutput`] instead of a - plain tuple. - callback (Optional[Callable], defaults to `None`): - A function that will be called every `callback_steps` steps during inference. The function will be - called with the following arguments: `callback(step: int, timestep: int, latents: torch.FloatTensor)`. - callback_steps (`int`, defaults to 1): - The frequency at which the `callback` function will be called. If not specified, the callback will be - called at every step. - guidance_rescale (`float`, defaults to 0.0): - Guidance rescale factor proposed by [Common Diffusion Noise Schedules and Sample Steps are - Flawed](https://arxiv.org/pdf/2305.08891.pdf) `guidance_scale` is defined as `φ` in equation 16. of - [Common Diffusion Noise Schedules and Sample Steps are Flawed](https://arxiv.org/pdf/2305.08891.pdf). - Guidance rescale factor should fix overexposure when using zero terminal SNR. - - Returns: - [`~pipelines.stable_diffusion.StableDiffusionPipelineOutput`] or `tuple`: - [`~pipelines.stable_diffusion.StableDiffusionPipelineOutput`] if `return_dict` is True, otherwise a `tuple. - When returning a tuple, the first element is a list with the generated images, and the second element is a - list of `bool`s denoting whether the corresponding generated image likely represents "not-safe-for-work" - (nsfw) content, according to the `safety_checker`. - """ - height = height or self.unet.config["sample_size"] * self.vae_scale_factor - width = width or self.unet.config["sample_size"] * self.vae_scale_factor - - # Don't need to get negative prompts due to LCM guided distillation - negative_prompt = None - negative_prompt_embeds = None - - # check inputs. Raise error if not correct - self.check_inputs( - prompt, height, width, callback_steps, negative_prompt, prompt_embeds, negative_prompt_embeds - ) - - # define call parameters - if isinstance(prompt, str): - batch_size = 1 - elif isinstance(prompt, list): - batch_size = len(prompt) - else: - batch_size = prompt_embeds.shape[0] - - if generator is None: - generator = np.random - - prompt_embeds = self._encode_prompt( - prompt, - num_images_per_prompt, - False, - negative_prompt, - prompt_embeds=prompt_embeds, - negative_prompt_embeds=negative_prompt_embeds, - ) - - # set timesteps - self.scheduler.set_timesteps(num_inference_steps, original_inference_steps=original_inference_steps) - timesteps = self.scheduler.timesteps - - latents = self.prepare_latents( - batch_size * num_images_per_prompt, - self.unet.config["in_channels"], - height, - width, - prompt_embeds.dtype, - generator, - latents, - ) - - bs = batch_size * num_images_per_prompt - # get Guidance Scale Embedding - w = np.full(bs, guidance_scale - 1, dtype=prompt_embeds.dtype) - w_embedding = self.get_guidance_scale_embedding( - w, embedding_dim=self.unet.config["time_cond_proj_dim"], dtype=prompt_embeds.dtype - ) - - # Adapted from diffusers to extend it for other runtimes than ORT - timestep_dtype = self.unet.input_dtype.get("timestep", np.float32) - - num_warmup_steps = len(timesteps) - num_inference_steps * self.scheduler.order - for i, t in enumerate(self.progress_bar(timesteps)): - timestep = np.array([t], dtype=timestep_dtype) - noise_pred = self.unet( - sample=latents, - timestep=timestep, - encoder_hidden_states=prompt_embeds, - timestep_cond=w_embedding, - )[0] - - # compute the previous noisy sample x_t -> x_t-1 - latents, denoised = self.scheduler.step( - torch.from_numpy(noise_pred), t, torch.from_numpy(latents), return_dict=False - ) - latents, denoised = latents.numpy(), denoised.numpy() - - # call the callback, if provided - if i == len(timesteps) - 1 or ((i + 1) > num_warmup_steps and (i + 1) % self.scheduler.order == 0): - if callback is not None and i % callback_steps == 0: - callback(i, t, latents) - - if output_type == "latent": - image = denoised - has_nsfw_concept = None - else: - denoised /= self.vae_decoder.config["scaling_factor"] - # it seems likes there is a strange result for using half-precision vae decoder if batchsize>1 - image = np.concatenate( - [self.vae_decoder(latent_sample=denoised[i : i + 1])[0] for i in range(denoised.shape[0])] - ) - image, has_nsfw_concept = self.run_safety_checker(image) - - if has_nsfw_concept is None: - do_denormalize = [True] * image.shape[0] - else: - do_denormalize = [not has_nsfw for has_nsfw in has_nsfw_concept] - - image = self.image_processor.postprocess(image, output_type=output_type, do_denormalize=do_denormalize) - - if not return_dict: - return (image, has_nsfw_concept) - - return StableDiffusionPipelineOutput(images=image, nsfw_content_detected=has_nsfw_concept) - - # Adapted from https://github.com/huggingface/diffusers/blob/v0.22.0/src/diffusers/pipelines/latent_consistency/pipeline_latent_consistency.py#L264 - def get_guidance_scale_embedding(self, w, embedding_dim=512, dtype=None): - """ - See https://github.com/google-research/vdm/blob/dc27b98a554f65cdc654b800da5aa1846545d41b/model_vdm.py#L298 - - Args: - timesteps (`torch.Tensor`): - generate embedding vectors at these timesteps - embedding_dim (`int`, *optional*, defaults to 512): - dimension of the embeddings to generate - dtype: - data type of the generated embeddings - - Returns: - `torch.FloatTensor`: Embedding vectors with shape `(len(timesteps), embedding_dim)` - """ - w = w * 1000 - half_dim = embedding_dim // 2 - emb = np.log(10000.0) / (half_dim - 1) - emb = np.exp(np.arange(half_dim, dtype=dtype) * -emb) - emb = w[:, None] * emb[None, :] - emb = np.concatenate([np.sin(emb), np.cos(emb)], axis=1) - - if embedding_dim % 2 == 1: # zero pad - emb = np.pad(emb, [(0, 0), (0, 1)]) - - assert emb.shape == (w.shape[0], embedding_dim) - return emb diff --git a/optimum/pipelines/diffusers/pipeline_stable_diffusion.py b/optimum/pipelines/diffusers/pipeline_stable_diffusion.py deleted file mode 100644 index 98bff0de44d..00000000000 --- a/optimum/pipelines/diffusers/pipeline_stable_diffusion.py +++ /dev/null @@ -1,419 +0,0 @@ -# Copyright 2023 The HuggingFace Team. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import inspect -import logging -from typing import Callable, List, Optional, Union - -import numpy as np -import torch -from diffusers.pipelines.stable_diffusion import StableDiffusionPipelineOutput - -from .pipeline_utils import DiffusionPipelineMixin, rescale_noise_cfg - - -logger = logging.getLogger(__name__) - - -class StableDiffusionPipelineMixin(DiffusionPipelineMixin): - # Copied from https://github.com/huggingface/diffusers/blob/v0.17.1/src/diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion.py#L114 - def _encode_prompt( - self, - prompt: Union[str, List[str]], - num_images_per_prompt: int, - do_classifier_free_guidance: bool, - negative_prompt: Optional[Union[str, list]], - prompt_embeds: Optional[np.ndarray] = None, - negative_prompt_embeds: Optional[np.ndarray] = None, - ): - r""" - Encodes the prompt into text encoder hidden states. - - Args: - prompt (`Union[str, List[str]]`): - prompt to be encoded - num_images_per_prompt (`int`): - number of images that should be generated per prompt - do_classifier_free_guidance (`bool`): - whether to use classifier free guidance or not - negative_prompt (`Optional[Union[str, list]]`): - The prompt or prompts not to guide the image generation. Ignored when not using guidance (i.e., ignored - if `guidance_scale` is less than `1`). - prompt_embeds (`Optional[np.ndarray]`, defaults to `None`): - Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not - provided, text embeddings will be generated from `prompt` input argument. - negative_prompt_embeds (`Optional[np.ndarray]`, defaults to `None`): - Pre-generated negative text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt - weighting. If not provided, negative_prompt_embeds will be generated from `negative_prompt` input - argument. - """ - if isinstance(prompt, str): - batch_size = 1 - elif isinstance(prompt, list): - batch_size = len(prompt) - else: - batch_size = prompt_embeds.shape[0] - - if prompt_embeds is None: - # get prompt text embeddings - text_inputs = self.tokenizer( - prompt, - padding="max_length", - max_length=self.tokenizer.model_max_length, - truncation=True, - return_tensors="np", - ) - text_input_ids = text_inputs.input_ids - untruncated_ids = self.tokenizer(prompt, padding="max_length", return_tensors="np").input_ids - - if not np.array_equal(text_input_ids, untruncated_ids): - removed_text = self.tokenizer.batch_decode( - untruncated_ids[:, self.tokenizer.model_max_length - 1 : -1] - ) - logger.warning( - "The following part of your input was truncated because CLIP can only handle sequences up to" - f" {self.tokenizer.model_max_length} tokens: {removed_text}" - ) - - prompt_embeds = self.text_encoder(input_ids=text_input_ids.astype(np.int32))[0] - - prompt_embeds = np.repeat(prompt_embeds, num_images_per_prompt, axis=0) - - # get unconditional embeddings for classifier free guidance - if do_classifier_free_guidance and negative_prompt_embeds is None: - uncond_tokens: List[str] - if negative_prompt is None: - uncond_tokens = [""] * batch_size - elif type(prompt) is not type(negative_prompt): - raise TypeError( - f"`negative_prompt` should be the same type to `prompt`, but got {type(negative_prompt)} !=" - f" {type(prompt)}." - ) - elif isinstance(negative_prompt, str): - uncond_tokens = [negative_prompt] * batch_size - elif batch_size != len(negative_prompt): - raise ValueError( - f"`negative_prompt`: {negative_prompt} has batch size {len(negative_prompt)}, but `prompt`:" - f" {prompt} has batch size {batch_size}. Please make sure that passed `negative_prompt` matches" - " the batch size of `prompt`." - ) - else: - uncond_tokens = negative_prompt - - max_length = prompt_embeds.shape[1] - uncond_input = self.tokenizer( - uncond_tokens, - padding="max_length", - max_length=max_length, - truncation=True, - return_tensors="np", - ) - negative_prompt_embeds = self.text_encoder(input_ids=uncond_input.input_ids.astype(np.int32))[0] - - if do_classifier_free_guidance: - negative_prompt_embeds = np.repeat(negative_prompt_embeds, num_images_per_prompt, axis=0) - - # For classifier free guidance, we need to do two forward passes. - # Here we concatenate the unconditional and text embeddings into a single batch - # to avoid doing two forward passes - prompt_embeds = np.concatenate([negative_prompt_embeds, prompt_embeds]) - - return prompt_embeds - - # Copied from https://github.com/huggingface/diffusers/blob/v0.17.1/src/diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion.py#L217 - def check_inputs( - self, - prompt: Union[str, List[str]], - height: Optional[int], - width: Optional[int], - callback_steps: int, - negative_prompt: Optional[str] = None, - prompt_embeds: Optional[np.ndarray] = None, - negative_prompt_embeds: Optional[np.ndarray] = None, - ): - if height % 8 != 0 or width % 8 != 0: - raise ValueError(f"`height` and `width` have to be divisible by 8 but are {height} and {width}.") - - if (callback_steps is None) or ( - callback_steps is not None and (not isinstance(callback_steps, int) or callback_steps <= 0) - ): - raise ValueError( - f"`callback_steps` has to be a positive integer but is {callback_steps} of type" - f" {type(callback_steps)}." - ) - - if prompt is not None and prompt_embeds is not None: - raise ValueError( - f"Cannot forward both `prompt`: {prompt} and `prompt_embeds`: {prompt_embeds}. Please make sure to" - " only forward one of the two." - ) - elif prompt is None and prompt_embeds is None: - raise ValueError( - "Provide either `prompt` or `prompt_embeds`. Cannot leave both `prompt` and `prompt_embeds` undefined." - ) - elif prompt is not None and (not isinstance(prompt, str) and not isinstance(prompt, list)): - raise ValueError(f"`prompt` has to be of type `str` or `list` but is {type(prompt)}") - - if negative_prompt is not None and negative_prompt_embeds is not None: - raise ValueError( - f"Cannot forward both `negative_prompt`: {negative_prompt} and `negative_prompt_embeds`:" - f" {negative_prompt_embeds}. Please make sure to only forward one of the two." - ) - - if prompt_embeds is not None and negative_prompt_embeds is not None: - if prompt_embeds.shape != negative_prompt_embeds.shape: - raise ValueError( - "`prompt_embeds` and `negative_prompt_embeds` must have the same shape when passed directly, but" - f" got: `prompt_embeds` {prompt_embeds.shape} != `negative_prompt_embeds`" - f" {negative_prompt_embeds.shape}." - ) - - # Adapted from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.prepare_latents - def prepare_latents(self, batch_size, num_channels_latents, height, width, dtype, generator, latents=None): - shape = (batch_size, num_channels_latents, height // self.vae_scale_factor, width // self.vae_scale_factor) - if isinstance(generator, list) and len(generator) != batch_size: - raise ValueError( - f"You have passed a list of generators of length {len(generator)}, but requested an effective batch" - f" size of {batch_size}. Make sure the batch size matches the length of the generators." - ) - - if latents is None: - latents = generator.randn(*shape).astype(dtype) - elif latents.shape != shape: - raise ValueError(f"Unexpected latents shape, got {latents.shape}, expected {shape}") - - # scale the initial noise by the standard deviation required by the scheduler - latents = latents * np.float64(self.scheduler.init_noise_sigma) - - return latents - - # Adapted from https://github.com/huggingface/diffusers/blob/v0.17.1/src/diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion.py#L264 - def __call__( - self, - prompt: Optional[Union[str, List[str]]] = None, - height: Optional[int] = None, - width: Optional[int] = None, - num_inference_steps: int = 50, - guidance_scale: float = 7.5, - negative_prompt: Optional[Union[str, List[str]]] = None, - num_images_per_prompt: int = 1, - eta: float = 0.0, - generator: Optional[np.random.RandomState] = None, - latents: Optional[np.ndarray] = None, - prompt_embeds: Optional[np.ndarray] = None, - negative_prompt_embeds: Optional[np.ndarray] = None, - output_type: str = "pil", - return_dict: bool = True, - callback: Optional[Callable[[int, int, np.ndarray], None]] = None, - callback_steps: int = 1, - guidance_rescale: float = 0.0, - ): - r""" - Function invoked when calling the pipeline for generation. - - Args: - prompt (`Optional[Union[str, List[str]]]`, defaults to None): - The prompt or prompts to guide the image generation. If not defined, one has to pass `prompt_embeds`. - instead. - height (`Optional[int]`, defaults to None): - The height in pixels of the generated image. - width (`Optional[int]`, defaults to None): - The width in pixels of the generated image. - num_inference_steps (`int`, defaults to 50): - The number of denoising steps. More denoising steps usually lead to a higher quality image at the - expense of slower inference. - guidance_scale (`float`, defaults to 7.5): - Guidance scale as defined in [Classifier-Free Diffusion Guidance](https://arxiv.org/abs/2207.12598). - `guidance_scale` is defined as `w` of equation 2. of [Imagen - Paper](https://arxiv.org/pdf/2205.11487.pdf). Guidance scale is enabled by setting `guidance_scale > - 1`. Higher guidance scale encourages to generate images that are closely linked to the text `prompt`, - usually at the expense of lower image quality. - negative_prompt (`Optional[Union[str, list]]`): - The prompt or prompts not to guide the image generation. If not defined, one has to pass - `negative_prompt_embeds`. instead. Ignored when not using guidance (i.e., ignored if `guidance_scale` - is less than `1`). - num_images_per_prompt (`int`, defaults to 1): - The number of images to generate per prompt. - eta (`float`, defaults to 0.0): - Corresponds to parameter eta (η) in the DDIM paper: https://arxiv.org/abs/2010.02502. Only applies to - [`schedulers.DDIMScheduler`], will be ignored for others. - generator (`Optional[np.random.RandomState]`, defaults to `None`):: - A np.random.RandomState to make generation deterministic. - latents (`Optional[np.ndarray]`, defaults to `None`): - Pre-generated noisy latents, sampled from a Gaussian distribution, to be used as inputs for image - generation. Can be used to tweak the same generation with different prompts. If not provided, a latents - tensor will ge generated by sampling using the supplied random `generator`. - prompt_embeds (`Optional[np.ndarray]`, defaults to `None`): - Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not - provided, text embeddings will be generated from `prompt` input argument. - negative_prompt_embeds (`Optional[np.ndarray]`, defaults to `None`): - Pre-generated negative text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt - weighting. If not provided, negative_prompt_embeds will be generated from `negative_prompt` input - argument. - output_type (`str`, defaults to `"pil"`): - The output format of the generate image. Choose between - [PIL](https://pillow.readthedocs.io/en/stable/): `PIL.Image.Image` or `np.array`. - return_dict (`bool`, defaults to `True`): - Whether or not to return a [`~pipelines.stable_diffusion.StableDiffusionPipelineOutput`] instead of a - plain tuple. - callback (Optional[Callable], defaults to `None`): - A function that will be called every `callback_steps` steps during inference. The function will be - called with the following arguments: `callback(step: int, timestep: int, latents: torch.FloatTensor)`. - callback_steps (`int`, defaults to 1): - The frequency at which the `callback` function will be called. If not specified, the callback will be - called at every step. - guidance_rescale (`float`, defaults to 0.0): - Guidance rescale factor proposed by [Common Diffusion Noise Schedules and Sample Steps are - Flawed](https://arxiv.org/pdf/2305.08891.pdf) `guidance_scale` is defined as `φ` in equation 16. of - [Common Diffusion Noise Schedules and Sample Steps are Flawed](https://arxiv.org/pdf/2305.08891.pdf). - Guidance rescale factor should fix overexposure when using zero terminal SNR. - - Returns: - [`~pipelines.stable_diffusion.StableDiffusionPipelineOutput`] or `tuple`: - [`~pipelines.stable_diffusion.StableDiffusionPipelineOutput`] if `return_dict` is True, otherwise a `tuple. - When returning a tuple, the first element is a list with the generated images, and the second element is a - list of `bool`s denoting whether the corresponding generated image likely represents "not-safe-for-work" - (nsfw) content, according to the `safety_checker`. - """ - height = height or self.unet.config.get("sample_size", 64) * self.vae_scale_factor - width = width or self.unet.config.get("sample_size", 64) * self.vae_scale_factor - - # check inputs. Raise error if not correct - self.check_inputs( - prompt, height, width, callback_steps, negative_prompt, prompt_embeds, negative_prompt_embeds - ) - - # define call parameters - if isinstance(prompt, str): - batch_size = 1 - elif isinstance(prompt, list): - batch_size = len(prompt) - else: - batch_size = prompt_embeds.shape[0] - - if generator is None: - generator = np.random - - # here `guidance_scale` is defined analog to the guidance weight `w` of equation (2) - # of the Imagen paper: https://arxiv.org/pdf/2205.11487.pdf . `guidance_scale = 1` - # corresponds to doing no classifier free guidance. - do_classifier_free_guidance = guidance_scale > 1.0 - - prompt_embeds = self._encode_prompt( - prompt, - num_images_per_prompt, - do_classifier_free_guidance, - negative_prompt, - prompt_embeds=prompt_embeds, - negative_prompt_embeds=negative_prompt_embeds, - ) - - # set timesteps - self.scheduler.set_timesteps(num_inference_steps) - timesteps = self.scheduler.timesteps - - latents = self.prepare_latents( - batch_size * num_images_per_prompt, - self.unet.config.get("in_channels", 4), - height, - width, - prompt_embeds.dtype, - generator, - latents, - ) - - # prepare extra kwargs for the scheduler step, since not all schedulers have the same signature - # eta (η) is only used with the DDIMScheduler, it will be ignored for other schedulers. - # eta corresponds to η in DDIM paper: https://arxiv.org/abs/2010.02502 - # and should be between [0, 1] - accepts_eta = "eta" in set(inspect.signature(self.scheduler.step).parameters.keys()) - extra_step_kwargs = {} - if accepts_eta: - extra_step_kwargs["eta"] = eta - - # Adapted from diffusers to extend it for other runtimes than ORT - timestep_dtype = self.unet.input_dtype.get("timestep", np.float32) - - num_warmup_steps = len(timesteps) - num_inference_steps * self.scheduler.order - for i, t in enumerate(self.progress_bar(timesteps)): - # expand the latents if we are doing classifier free guidance - latent_model_input = np.concatenate([latents] * 2) if do_classifier_free_guidance else latents - latent_model_input = self.scheduler.scale_model_input(torch.from_numpy(latent_model_input), t) - latent_model_input = latent_model_input.cpu().numpy() - - # predict the noise residual - timestep = np.array([t], dtype=timestep_dtype) - noise_pred = self.unet(sample=latent_model_input, timestep=timestep, encoder_hidden_states=prompt_embeds) - noise_pred = noise_pred[0] - - # perform guidance - if do_classifier_free_guidance: - noise_pred_uncond, noise_pred_text = np.split(noise_pred, 2) - noise_pred = noise_pred_uncond + guidance_scale * (noise_pred_text - noise_pred_uncond) - if guidance_rescale > 0.0: - # Based on 3.4. in https://arxiv.org/pdf/2305.08891.pdf - noise_pred = rescale_noise_cfg(noise_pred, noise_pred_text, guidance_rescale=guidance_rescale) - - # compute the previous noisy sample x_t -> x_t-1 - scheduler_output = self.scheduler.step( - torch.from_numpy(noise_pred), t, torch.from_numpy(latents), **extra_step_kwargs - ) - latents = scheduler_output.prev_sample.numpy() - - # call the callback, if provided - if i == len(timesteps) - 1 or ((i + 1) > num_warmup_steps and (i + 1) % self.scheduler.order == 0): - if callback is not None and i % callback_steps == 0: - callback(i, t, latents) - - if output_type == "latent": - image = latents - has_nsfw_concept = None - else: - latents /= self.vae_decoder.config.get("scaling_factor", 0.18215) - # it seems likes there is a strange result for using half-precision vae decoder if batchsize>1 - image = np.concatenate( - [self.vae_decoder(latent_sample=latents[i : i + 1])[0] for i in range(latents.shape[0])] - ) - image, has_nsfw_concept = self.run_safety_checker(image) - - if has_nsfw_concept is None: - do_denormalize = [True] * image.shape[0] - else: - do_denormalize = [not has_nsfw for has_nsfw in has_nsfw_concept] - - image = self.image_processor.postprocess(image, output_type=output_type, do_denormalize=do_denormalize) - - if not return_dict: - return (image, has_nsfw_concept) - - return StableDiffusionPipelineOutput(images=image, nsfw_content_detected=has_nsfw_concept) - - def run_safety_checker(self, image: np.ndarray): - if self.safety_checker is None: - has_nsfw_concept = None - else: - feature_extractor_input = self.image_processor.numpy_to_pil(image) - safety_checker_input = self.feature_extractor( - feature_extractor_input, return_tensors="np" - ).pixel_values.astype(image.dtype) - images, has_nsfw_concept = [], [] - for i in range(image.shape[0]): - image_i, has_nsfw_concept_i = self.safety_checker( - clip_input=safety_checker_input[i : i + 1], images=image[i : i + 1] - ) - images.append(image_i) - has_nsfw_concept.append(has_nsfw_concept_i[0]) - image = np.concatenate(images) - - return image, has_nsfw_concept diff --git a/optimum/pipelines/diffusers/pipeline_stable_diffusion_img2img.py b/optimum/pipelines/diffusers/pipeline_stable_diffusion_img2img.py deleted file mode 100644 index 81a6ffa1e04..00000000000 --- a/optimum/pipelines/diffusers/pipeline_stable_diffusion_img2img.py +++ /dev/null @@ -1,302 +0,0 @@ -# Copyright 2023 The HuggingFace Team. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import inspect -from typing import Callable, List, Optional, Union - -import numpy as np -import PIL -import torch -from diffusers.pipelines.stable_diffusion import StableDiffusionPipelineOutput -from diffusers.utils import deprecate - -from .pipeline_stable_diffusion import StableDiffusionPipelineMixin - - -class StableDiffusionImg2ImgPipelineMixin(StableDiffusionPipelineMixin): - # Copied from diffusers.pipelines.stable_diffusion.pipeline_onnx_stable_diffusion.OnnxStableDiffusionImg2ImgPipeline.check_inputs - def check_inputs( - self, - prompt: Union[str, List[str]], - strength: float, - callback_steps: int, - negative_prompt: Optional[Union[str, List[str]]] = None, - prompt_embeds: Optional[np.ndarray] = None, - negative_prompt_embeds: Optional[np.ndarray] = None, - ): - if strength < 0 or strength > 1: - raise ValueError(f"The value of strength should in [0.0, 1.0] but is {strength}") - - if (callback_steps is None) or ( - callback_steps is not None and (not isinstance(callback_steps, int) or callback_steps <= 0) - ): - raise ValueError( - f"`callback_steps` has to be a positive integer but is {callback_steps} of type" - f" {type(callback_steps)}." - ) - - if prompt is not None and prompt_embeds is not None: - raise ValueError( - f"Cannot forward both `prompt`: {prompt} and `prompt_embeds`: {prompt_embeds}. Please make sure to" - " only forward one of the two." - ) - elif prompt is None and prompt_embeds is None: - raise ValueError( - "Provide either `prompt` or `prompt_embeds`. Cannot leave both `prompt` and `prompt_embeds` undefined." - ) - elif prompt is not None and (not isinstance(prompt, str) and not isinstance(prompt, list)): - raise ValueError(f"`prompt` has to be of type `str` or `list` but is {type(prompt)}") - - if negative_prompt is not None and negative_prompt_embeds is not None: - raise ValueError( - f"Cannot forward both `negative_prompt`: {negative_prompt} and `negative_prompt_embeds`:" - f" {negative_prompt_embeds}. Please make sure to only forward one of the two." - ) - - if prompt_embeds is not None and negative_prompt_embeds is not None: - if prompt_embeds.shape != negative_prompt_embeds.shape: - raise ValueError( - "`prompt_embeds` and `negative_prompt_embeds` must have the same shape when passed directly, but" - f" got: `prompt_embeds` {prompt_embeds.shape} != `negative_prompt_embeds`" - f" {negative_prompt_embeds.shape}." - ) - - # Adapted from diffusers.pipelines.stable_diffusion.pipeline_onnx_stable_diffusion.OnnxStableDiffusionImg2ImgPipeline.__call__ - def __call__( - self, - prompt: Optional[Union[str, List[str]]] = None, - image: Union[np.ndarray, PIL.Image.Image] = None, - strength: float = 0.8, - num_inference_steps: int = 50, - guidance_scale: float = 7.5, - negative_prompt: Optional[Union[str, List[str]]] = None, - num_images_per_prompt: int = 1, - eta: float = 0.0, - generator: Optional[np.random.RandomState] = None, - prompt_embeds: Optional[np.ndarray] = None, - negative_prompt_embeds: Optional[np.ndarray] = None, - output_type: str = "pil", - return_dict: bool = True, - callback: Optional[Callable[[int, int, np.ndarray], None]] = None, - callback_steps: int = 1, - ): - r""" - Function invoked when calling the pipeline for generation. - - - Args: - prompt (`Optional[Union[str, List[str]]]`, defaults to None): - The prompt or prompts to guide the image generation. If not defined, one has to pass `prompt_embeds`. - instead. - image (`Union[np.ndarray, PIL.Image.Image]`): - `Image`, or tensor representing an image batch which will be upscaled. - strength (`float`, defaults to 0.8): - Conceptually, indicates how much to transform the reference `image`. Must be between 0 and 1. `image` - will be used as a starting point, adding more noise to it the larger the `strength`. The number of - denoising steps depends on the amount of noise initially added. When `strength` is 1, added noise will - be maximum and the denoising process will run for the full number of iterations specified in - `num_inference_steps`. A value of 1, therefore, essentially ignores `image`. - num_inference_steps (`int`, defaults to 50): - The number of denoising steps. More denoising steps usually lead to a higher quality image at the - expense of slower inference. - guidance_scale (`float`, defaults to 7.5): - Guidance scale as defined in [Classifier-Free Diffusion Guidance](https://arxiv.org/abs/2207.12598). - `guidance_scale` is defined as `w` of equation 2. of [Imagen - Paper](https://arxiv.org/pdf/2205.11487.pdf). Guidance scale is enabled by setting `guidance_scale > - 1`. Higher guidance scale encourages to generate images that are closely linked to the text `prompt`, - usually at the expense of lower image quality. - negative_prompt (`Optional[Union[str, list]]`): - The prompt or prompts not to guide the image generation. If not defined, one has to pass - `negative_prompt_embeds`. instead. Ignored when not using guidance (i.e., ignored if `guidance_scale` - is less than `1`). - num_images_per_prompt (`int`, defaults to 1): - The number of images to generate per prompt. - eta (`float`, defaults to 0.0): - Corresponds to parameter eta (η) in the DDIM paper: https://arxiv.org/abs/2010.02502. Only applies to - [`schedulers.DDIMScheduler`], will be ignored for others. - generator (`Optional[np.random.RandomState]`, defaults to `None`):: - A np.random.RandomState to make generation deterministic. - prompt_embeds (`Optional[np.ndarray]`, defaults to `None`): - Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not - provided, text embeddings will be generated from `prompt` input argument. - negative_prompt_embeds (`Optional[np.ndarray]`, defaults to `None`): - Pre-generated negative text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt - weighting. If not provided, negative_prompt_embeds will be generated from `negative_prompt` input - argument. - output_type (`str`, defaults to `"pil"`): - The output format of the generate image. Choose between - [PIL](https://pillow.readthedocs.io/en/stable/): `PIL.Image.Image` or `np.array`. - return_dict (`bool`, defaults to `True`): - Whether or not to return a [`~pipelines.stable_diffusion.StableDiffusionPipelineOutput`] instead of a - plain tuple. - callback (Optional[Callable], defaults to `None`): - A function that will be called every `callback_steps` steps during inference. The function will be - called with the following arguments: `callback(step: int, timestep: int, latents: torch.FloatTensor)`. - callback_steps (`int`, defaults to 1): - The frequency at which the `callback` function will be called. If not specified, the callback will be - called at every step. - - - Returns: - [`~pipelines.stable_diffusion.StableDiffusionPipelineOutput`] or `tuple`: - [`~pipelines.stable_diffusion.StableDiffusionPipelineOutput`] if `return_dict` is True, otherwise a `tuple. - When returning a tuple, the first element is a list with the generated images, and the second element is a - list of `bool`s denoting whether the corresponding generated image likely represents "not-safe-for-work" - (nsfw) content, according to the `safety_checker`. - """ - - # check inputs. Raise error if not correct - self.check_inputs(prompt, strength, callback_steps, negative_prompt, prompt_embeds, negative_prompt_embeds) - - # define call parameters - if prompt is not None and isinstance(prompt, str): - batch_size = 1 - elif prompt is not None and isinstance(prompt, list): - batch_size = len(prompt) - else: - batch_size = prompt_embeds.shape[0] - - if generator is None: - generator = np.random - - # set timesteps - self.scheduler.set_timesteps(num_inference_steps) - - image = self.image_processor.preprocess(image) - - # here `guidance_scale` is defined analog to the guidance weight `w` of equation (2) - # of the Imagen paper: https://arxiv.org/pdf/2205.11487.pdf . `guidance_scale = 1` - # corresponds to doing no classifier free guidance. - do_classifier_free_guidance = guidance_scale > 1.0 - - prompt_embeds = self._encode_prompt( - prompt, - num_images_per_prompt, - do_classifier_free_guidance, - negative_prompt, - prompt_embeds=prompt_embeds, - negative_prompt_embeds=negative_prompt_embeds, - ) - - latents_dtype = prompt_embeds.dtype - image = image.astype(latents_dtype) - # encode the init image into latents and scale the latents - init_latents = self.vae_encoder(sample=image)[0] - - scaling_factor = self.vae_decoder.config.get("scaling_factor", 0.18215) - init_latents = scaling_factor * init_latents - - if isinstance(prompt, str): - prompt = [prompt] - if len(prompt) > init_latents.shape[0] and len(prompt) % init_latents.shape[0] == 0: - # expand init_latents for batch_size - deprecation_message = ( - f"You have passed {len(prompt)} text prompts (`prompt`), but only {init_latents.shape[0]} initial" - " images (`image`). Initial images are now duplicating to match the number of text prompts. Note" - " that this behavior is deprecated and will be removed in a version 1.0.0. Please make sure to update" - " your script to pass as many initial images as text prompts to suppress this warning." - ) - deprecate("len(prompt) != len(image)", "1.0.0", deprecation_message, standard_warn=False) - additional_image_per_prompt = len(prompt) // init_latents.shape[0] - init_latents = np.concatenate([init_latents] * additional_image_per_prompt * num_images_per_prompt, axis=0) - elif len(prompt) > init_latents.shape[0] and len(prompt) % init_latents.shape[0] != 0: - raise ValueError( - f"Cannot duplicate `image` of batch size {init_latents.shape[0]} to {len(prompt)} text prompts." - ) - else: - init_latents = np.concatenate([init_latents] * num_images_per_prompt, axis=0) - - # get the original timestep using init_timestep - offset = self.scheduler.config.get("steps_offset", 0) - init_timestep = int(num_inference_steps * strength) + offset - init_timestep = min(init_timestep, num_inference_steps) - - timesteps = self.scheduler.timesteps.numpy()[-init_timestep] - timesteps = np.array([timesteps] * batch_size * num_images_per_prompt) - - # add noise to latents using the timesteps - noise = generator.randn(*init_latents.shape).astype(latents_dtype) - init_latents = self.scheduler.add_noise( - torch.from_numpy(init_latents), torch.from_numpy(noise), torch.from_numpy(timesteps) - ) - init_latents = init_latents.numpy() - - # prepare extra kwargs for the scheduler step, since not all schedulers have the same signature - # eta (η) is only used with the DDIMScheduler, it will be ignored for other schedulers. - # eta corresponds to η in DDIM paper: https://arxiv.org/abs/2010.02502 - # and should be between [0, 1] - accepts_eta = "eta" in set(inspect.signature(self.scheduler.step).parameters.keys()) - extra_step_kwargs = {} - if accepts_eta: - extra_step_kwargs["eta"] = eta - - latents = init_latents - - t_start = max(num_inference_steps - init_timestep + offset, 0) - timesteps = self.scheduler.timesteps[t_start:].numpy() - - # Adapted from diffusers to extend it for other runtimes than ORT - timestep_dtype = self.unet.input_dtype.get("timestep", np.float32) - - num_warmup_steps = len(timesteps) - num_inference_steps * self.scheduler.order - for i, t in enumerate(self.progress_bar(timesteps)): - # expand the latents if we are doing classifier free guidance - latent_model_input = np.concatenate([latents] * 2) if do_classifier_free_guidance else latents - latent_model_input = self.scheduler.scale_model_input(torch.from_numpy(latent_model_input), t) - latent_model_input = latent_model_input.cpu().numpy() - - # predict the noise residual - timestep = np.array([t], dtype=timestep_dtype) - noise_pred = self.unet(sample=latent_model_input, timestep=timestep, encoder_hidden_states=prompt_embeds)[ - 0 - ] - - # perform guidance - if do_classifier_free_guidance: - noise_pred_uncond, noise_pred_text = np.split(noise_pred, 2) - noise_pred = noise_pred_uncond + guidance_scale * (noise_pred_text - noise_pred_uncond) - - # compute the previous noisy sample x_t -> x_t-1 - scheduler_output = self.scheduler.step( - torch.from_numpy(noise_pred), t, torch.from_numpy(latents), **extra_step_kwargs - ) - latents = scheduler_output.prev_sample.numpy() - - # call the callback, if provided - if i == len(timesteps) - 1 or ((i + 1) > num_warmup_steps and (i + 1) % self.scheduler.order == 0): - if callback is not None and i % callback_steps == 0: - callback(i, t, latents) - - if output_type == "latent": - image = latents - has_nsfw_concept = None - else: - latents /= scaling_factor - # it seems likes there is a strange result for using half-precision vae decoder if batchsize>1 - image = np.concatenate( - [self.vae_decoder(latent_sample=latents[i : i + 1])[0] for i in range(latents.shape[0])] - ) - image, has_nsfw_concept = self.run_safety_checker(image) - - if has_nsfw_concept is None: - do_denormalize = [True] * image.shape[0] - else: - do_denormalize = [not has_nsfw for has_nsfw in has_nsfw_concept] - - image = self.image_processor.postprocess(image, output_type=output_type, do_denormalize=do_denormalize) - - if not return_dict: - return (image, has_nsfw_concept) - - return StableDiffusionPipelineOutput(images=image, nsfw_content_detected=has_nsfw_concept) diff --git a/optimum/pipelines/diffusers/pipeline_stable_diffusion_inpaint.py b/optimum/pipelines/diffusers/pipeline_stable_diffusion_inpaint.py deleted file mode 100644 index 19de793ccd0..00000000000 --- a/optimum/pipelines/diffusers/pipeline_stable_diffusion_inpaint.py +++ /dev/null @@ -1,345 +0,0 @@ -# Copyright 2023 The HuggingFace Team. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import inspect -from typing import Callable, List, Optional, Union - -import numpy as np -import PIL -import torch -from diffusers.pipelines.stable_diffusion import StableDiffusionPipelineOutput -from diffusers.utils import PIL_INTERPOLATION - -from .pipeline_stable_diffusion import StableDiffusionPipelineMixin - - -def prepare_mask_and_masked_image(image, mask, latents_shape, vae_scale_factor): - image = np.array( - image.convert("RGB").resize((latents_shape[1] * vae_scale_factor, latents_shape[0] * vae_scale_factor)) - ) - image = image[None].transpose(0, 3, 1, 2) - image = image.astype(np.float32) / 127.5 - 1.0 - - image_mask = np.array( - mask.convert("L").resize((latents_shape[1] * vae_scale_factor, latents_shape[0] * vae_scale_factor)) - ) - masked_image = image * (image_mask < 127.5) - - mask = mask.resize((latents_shape[1], latents_shape[0]), PIL_INTERPOLATION["nearest"]) - mask = np.array(mask.convert("L")) - mask = mask.astype(np.float32) / 255.0 - mask = mask[None, None] - mask[mask < 0.5] = 0 - mask[mask >= 0.5] = 1 - - return mask, masked_image - - -class StableDiffusionInpaintPipelineMixin(StableDiffusionPipelineMixin): - # Copied from diffusers.pipelines.stable_diffusion.pipeline_onnx_stable_diffusion.OnnxStableDiffusionPipeline.check_inputs - def check_inputs( - self, - prompt: Union[str, List[str]], - height: Optional[int], - width: Optional[int], - callback_steps: int, - negative_prompt: Optional[str] = None, - prompt_embeds: Optional[np.ndarray] = None, - negative_prompt_embeds: Optional[np.ndarray] = None, - ): - if height % 8 != 0 or width % 8 != 0: - raise ValueError(f"`height` and `width` have to be divisible by 8 but are {height} and {width}.") - - if (callback_steps is None) or ( - callback_steps is not None and (not isinstance(callback_steps, int) or callback_steps <= 0) - ): - raise ValueError( - f"`callback_steps` has to be a positive integer but is {callback_steps} of type" - f" {type(callback_steps)}." - ) - - if prompt is not None and prompt_embeds is not None: - raise ValueError( - f"Cannot forward both `prompt`: {prompt} and `prompt_embeds`: {prompt_embeds}. Please make sure to" - " only forward one of the two." - ) - elif prompt is None and prompt_embeds is None: - raise ValueError( - "Provide either `prompt` or `prompt_embeds`. Cannot leave both `prompt` and `prompt_embeds` undefined." - ) - elif prompt is not None and (not isinstance(prompt, str) and not isinstance(prompt, list)): - raise ValueError(f"`prompt` has to be of type `str` or `list` but is {type(prompt)}") - - if negative_prompt is not None and negative_prompt_embeds is not None: - raise ValueError( - f"Cannot forward both `negative_prompt`: {negative_prompt} and `negative_prompt_embeds`:" - f" {negative_prompt_embeds}. Please make sure to only forward one of the two." - ) - - if prompt_embeds is not None and negative_prompt_embeds is not None: - if prompt_embeds.shape != negative_prompt_embeds.shape: - raise ValueError( - "`prompt_embeds` and `negative_prompt_embeds` must have the same shape when passed directly, but" - f" got: `prompt_embeds` {prompt_embeds.shape} != `negative_prompt_embeds`" - f" {negative_prompt_embeds.shape}." - ) - - @torch.no_grad() - def __call__( - self, - prompt: Union[str, List[str]], - image: PIL.Image.Image, - mask_image: PIL.Image.Image, - height: Optional[int] = None, - width: Optional[int] = None, - num_inference_steps: int = 50, - guidance_scale: float = 7.5, - negative_prompt: Optional[Union[str, List[str]]] = None, - num_images_per_prompt: int = 1, - eta: float = 0.0, - generator: Optional[np.random.RandomState] = None, - latents: Optional[np.ndarray] = None, - prompt_embeds: Optional[np.ndarray] = None, - negative_prompt_embeds: Optional[np.ndarray] = None, - output_type: str = "pil", - return_dict: bool = True, - callback: Optional[Callable[[int, int, np.ndarray], None]] = None, - callback_steps: int = 1, - ): - r""" - Function invoked when calling the pipeline for generation. - - Args: - prompt (`Union[str, List[str]]`): - The prompt or prompts to guide the image generation. If not defined, one has to pass `prompt_embeds`. - instead. - image (`PIL.Image.Image`): - `Image`, or tensor representing an image batch which will be upscaled. - mask_image (`PIL.Image.Image`): - `Image`, or tensor representing a masked image batch which will be upscaled. - height (`Optional[int]`, defaults to None): - The height in pixels of the generated image. - width (`Optional[int]`, defaults to None): - The width in pixels of the generated image. - num_inference_steps (`int`, defaults to 50): - The number of denoising steps. More denoising steps usually lead to a higher quality image at the - expense of slower inference. - guidance_scale (`float`, defaults to 7.5): - Guidance scale as defined in [Classifier-Free Diffusion Guidance](https://arxiv.org/abs/2207.12598). - `guidance_scale` is defined as `w` of equation 2. of [Imagen - Paper](https://arxiv.org/pdf/2205.11487.pdf). Guidance scale is enabled by setting `guidance_scale > - 1`. Higher guidance scale encourages to generate images that are closely linked to the text `prompt`, - usually at the expense of lower image quality. - negative_prompt (`Optional[Union[str, list]]`): - The prompt or prompts not to guide the image generation. If not defined, one has to pass - `negative_prompt_embeds`. instead. Ignored when not using guidance (i.e., ignored if `guidance_scale` - is less than `1`). - num_images_per_prompt (`int`, defaults to 1): - The number of images to generate per prompt. - eta (`float`, defaults to 0.0): - Corresponds to parameter eta (η) in the DDIM paper: https://arxiv.org/abs/2010.02502. Only applies to - [`schedulers.DDIMScheduler`], will be ignored for others. - generator (`Optional[np.random.RandomState]`, defaults to `None`):: - A np.random.RandomState to make generation deterministic. - latents (`Optional[np.ndarray]`, defaults to `None`): - Pre-generated noisy latents, sampled from a Gaussian distribution, to be used as inputs for image - generation. Can be used to tweak the same generation with different prompts. If not provided, a latents - tensor will ge generated by sampling using the supplied random `generator`. - prompt_embeds (`Optional[np.ndarray]`, defaults to `None`): - Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not - provided, text embeddings will be generated from `prompt` input argument. - negative_prompt_embeds (`Optional[np.ndarray]`, defaults to `None`): - Pre-generated negative text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt - weighting. If not provided, negative_prompt_embeds will be generated from `negative_prompt` input - argument. - output_type (`str`, defaults to `"pil"`): - The output format of the generate image. Choose between - [PIL](https://pillow.readthedocs.io/en/stable/): `PIL.Image.Image` or `np.array`. - return_dict (`bool`, defaults to `True`): - Whether or not to return a [`~pipelines.stable_diffusion.StableDiffusionPipelineOutput`] instead of a - plain tuple. - callback (Optional[Callable], defaults to `None`): - A function that will be called every `callback_steps` steps during inference. The function will be - called with the following arguments: `callback(step: int, timestep: int, latents: torch.FloatTensor)`. - callback_steps (`int`, defaults to 1): - The frequency at which the `callback` function will be called. If not specified, the callback will be - called at every step. - - Returns: - [`~pipelines.stable_diffusion.StableDiffusionPipelineOutput`] or `tuple`: - [`~pipelines.stable_diffusion.StableDiffusionPipelineOutput`] if `return_dict` is True, otherwise a `tuple. - When returning a tuple, the first element is a list with the generated images, and the second element is a - list of `bool`s denoting whether the corresponding generated image likely represents "not-safe-for-work" - (nsfw) content, according to the `safety_checker`. - """ - height = height or self.unet.config.get("sample_size", 64) * self.vae_scale_factor - width = width or self.unet.config.get("sample_size", 64) * self.vae_scale_factor - - # check inputs. Raise error if not correct - self.check_inputs( - prompt, height, width, callback_steps, negative_prompt, prompt_embeds, negative_prompt_embeds - ) - - # define call parameters - if prompt is not None and isinstance(prompt, str): - batch_size = 1 - elif prompt is not None and isinstance(prompt, list): - batch_size = len(prompt) - else: - batch_size = prompt_embeds.shape[0] - - if generator is None: - generator = np.random - - # set timesteps - self.scheduler.set_timesteps(num_inference_steps) - timesteps = self.scheduler.timesteps - - # here `guidance_scale` is defined analog to the guidance weight `w` of equation (2) - # of the Imagen paper: https://arxiv.org/pdf/2205.11487.pdf . `guidance_scale = 1` - # corresponds to doing no classifier free guidance. - do_classifier_free_guidance = guidance_scale > 1.0 - - prompt_embeds = self._encode_prompt( - prompt, - num_images_per_prompt, - do_classifier_free_guidance, - negative_prompt, - prompt_embeds=prompt_embeds, - negative_prompt_embeds=negative_prompt_embeds, - ) - - num_channels_latents = self.vae_decoder.config.get("latent_channels", 4) - num_channels_unet = self.unet.config.get("in_channels", 9) - latents_shape = ( - batch_size * num_images_per_prompt, - num_channels_latents, - height // self.vae_scale_factor, - width // self.vae_scale_factor, - ) - latents_dtype = prompt_embeds.dtype - if latents is None: - latents = generator.randn(*latents_shape).astype(latents_dtype) - else: - if latents.shape != latents_shape: - raise ValueError(f"Unexpected latents shape, got {latents.shape}, expected {latents_shape}") - - # prepare mask and masked_image - mask, masked_image = prepare_mask_and_masked_image( - image, mask_image, latents_shape[-2:], self.vae_scale_factor - ) - mask = mask.astype(latents.dtype) - masked_image = masked_image.astype(latents.dtype) - - masked_image_latents = self.vae_encoder(sample=masked_image)[0] - - scaling_factor = self.vae_decoder.config.get("scaling_factor", 0.18215) - masked_image_latents = scaling_factor * masked_image_latents - - # duplicate mask and masked_image_latents for each generation per prompt - mask = mask.repeat(batch_size * num_images_per_prompt, 0) - masked_image_latents = masked_image_latents.repeat(batch_size * num_images_per_prompt, 0) - - mask = np.concatenate([mask] * 2) if do_classifier_free_guidance else mask - masked_image_latents = ( - np.concatenate([masked_image_latents] * 2) if do_classifier_free_guidance else masked_image_latents - ) - - # check that sizes of mask, masked image and latents match - if num_channels_unet == 9: - # default case for runwayml/stable-diffusion-inpainting - num_channels_mask = mask.shape[1] - num_channels_masked_image = masked_image_latents.shape[1] - if num_channels_latents + num_channels_mask + num_channels_masked_image != num_channels_unet: - raise ValueError( - f"Incorrect configuration settings! The config of `pipeline.unet`: expects" - f" {num_channels_unet} but received `num_channels_latents`: {num_channels_latents} +" - f" `num_channels_mask`: {num_channels_mask} + `num_channels_masked_image`: {num_channels_masked_image}" - f" = {num_channels_latents+num_channels_masked_image+num_channels_mask}. Please verify the config of" - " `pipeline.unet` or your `mask_image` or `image` input." - ) - elif num_channels_unet != 4: - raise ValueError( - f"The unet {self.unet.__class__} should have either 4 or 9 input channels, not {num_channels_unet}." - ) - - # scale the initial noise by the standard deviation required by the scheduler - latents = latents * np.float64(self.scheduler.init_noise_sigma) - - # prepare extra kwargs for the scheduler step, since not all schedulers have the same signature - # eta (η) is only used with the DDIMScheduler, it will be ignored for other schedulers. - # eta corresponds to η in DDIM paper: https://arxiv.org/abs/2010.02502 - # and should be between [0, 1] - accepts_eta = "eta" in set(inspect.signature(self.scheduler.step).parameters.keys()) - extra_step_kwargs = {} - if accepts_eta: - extra_step_kwargs["eta"] = eta - - # Adapted from diffusers to extend it for other runtimes than ORT - timestep_dtype = self.unet.input_dtype.get("timestep", np.float32) - - num_warmup_steps = len(timesteps) - num_inference_steps * self.scheduler.order - for i, t in enumerate(self.progress_bar(timesteps)): - # expand the latents if we are doing classifier free guidance - latent_model_input = np.concatenate([latents] * 2) if do_classifier_free_guidance else latents - # concat latents, mask, masked_image_latnets in the channel dimension - latent_model_input = self.scheduler.scale_model_input(torch.from_numpy(latent_model_input), t) - latent_model_input = latent_model_input.cpu().numpy() - if num_channels_unet == 9: - latent_model_input = np.concatenate([latent_model_input, mask, masked_image_latents], axis=1) - - # predict the noise residual - timestep = np.array([t], dtype=timestep_dtype) - noise_pred = self.unet(sample=latent_model_input, timestep=timestep, encoder_hidden_states=prompt_embeds)[ - 0 - ] - - # perform guidance - if do_classifier_free_guidance: - noise_pred_uncond, noise_pred_text = np.split(noise_pred, 2) - noise_pred = noise_pred_uncond + guidance_scale * (noise_pred_text - noise_pred_uncond) - - # compute the previous noisy sample x_t -> x_t-1 - scheduler_output = self.scheduler.step( - torch.from_numpy(noise_pred), t, torch.from_numpy(latents), **extra_step_kwargs - ) - latents = scheduler_output.prev_sample.numpy() - - # call the callback, if provided - if i == len(timesteps) - 1 or ((i + 1) > num_warmup_steps and (i + 1) % self.scheduler.order == 0): - if callback is not None and i % callback_steps == 0: - callback(i, t, latents) - - if output_type == "latent": - image = latents - has_nsfw_concept = None - else: - latents /= scaling_factor - # it seems likes there is a strange result for using half-precision vae decoder if batchsize>1 - image = np.concatenate( - [self.vae_decoder(latent_sample=latents[i : i + 1])[0] for i in range(latents.shape[0])] - ) - image, has_nsfw_concept = self.run_safety_checker(image) - - if has_nsfw_concept is None: - do_denormalize = [True] * image.shape[0] - else: - do_denormalize = [not has_nsfw for has_nsfw in has_nsfw_concept] - - image = self.image_processor.postprocess(image, output_type=output_type, do_denormalize=do_denormalize) - - if not return_dict: - return (image, has_nsfw_concept) - - return StableDiffusionPipelineOutput(images=image, nsfw_content_detected=has_nsfw_concept) diff --git a/optimum/pipelines/diffusers/pipeline_stable_diffusion_xl.py b/optimum/pipelines/diffusers/pipeline_stable_diffusion_xl.py deleted file mode 100644 index 2a5e7bf78b0..00000000000 --- a/optimum/pipelines/diffusers/pipeline_stable_diffusion_xl.py +++ /dev/null @@ -1,496 +0,0 @@ -# Copyright 2023 The HuggingFace Team. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import inspect -import logging -from typing import Any, Callable, Dict, List, Optional, Tuple, Union - -import numpy as np -import torch -from diffusers.pipelines.stable_diffusion_xl import StableDiffusionXLPipelineOutput - -from .pipeline_utils import DiffusionPipelineMixin, rescale_noise_cfg - - -logger = logging.getLogger(__name__) - - -class StableDiffusionXLPipelineMixin(DiffusionPipelineMixin): - # Adapted from diffusers.pipelines.stable_diffusion_xl.pipeline_stable_diffusion_xl.StableDiffusionXLPipeline.encode_prompt - def _encode_prompt( - self, - prompt: Union[str, List[str]], - num_images_per_prompt: int, - do_classifier_free_guidance: bool, - negative_prompt: Optional[Union[str, list]], - prompt_embeds: Optional[np.ndarray] = None, - negative_prompt_embeds: Optional[np.ndarray] = None, - pooled_prompt_embeds: Optional[np.ndarray] = None, - negative_pooled_prompt_embeds: Optional[np.ndarray] = None, - ): - r""" - Encodes the prompt into text encoder hidden states. - - Args: - prompt (`Union[str, List[str]]`): - prompt to be encoded - num_images_per_prompt (`int`): - number of images that should be generated per prompt - do_classifier_free_guidance (`bool`): - whether to use classifier free guidance or not - negative_prompt (`Optional[Union[str, list]]`): - The prompt or prompts not to guide the image generation. Ignored when not using guidance (i.e., ignored - if `guidance_scale` is less than `1`). - prompt_embeds (`Optional[np.ndarray]`, defaults to `None`): - Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not - provided, text embeddings will be generated from `prompt` input argument. - negative_prompt_embeds (`Optional[np.ndarray]`, defaults to `None`): - Pre-generated negative text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt - weighting. If not provided, negative_prompt_embeds will be generated from `negative_prompt` input - argument. - pooled_prompt_embeds (`Optional[np.ndarray]`, defaults to `None`): - Pre-generated pooled text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. - If not provided, pooled text embeddings will be generated from `prompt` input argument. - negative_pooled_prompt_embeds (`Optional[np.ndarray]`, defaults to `None`): - Pre-generated negative pooled text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt - weighting. If not provided, pooled negative_prompt_embeds will be generated from `negative_prompt` - input argument. - """ - if isinstance(prompt, str): - batch_size = 1 - elif isinstance(prompt, list): - batch_size = len(prompt) - else: - batch_size = prompt_embeds.shape[0] - - # Define tokenizers and text encoders - tokenizers = [self.tokenizer, self.tokenizer_2] if self.tokenizer is not None else [self.tokenizer_2] - text_encoders = ( - [self.text_encoder, self.text_encoder_2] if self.text_encoder is not None else [self.text_encoder_2] - ) - - if prompt_embeds is None: - prompt_embeds_list = [] - for tokenizer, text_encoder in zip(tokenizers, text_encoders): - # get prompt text embeddings - text_inputs = tokenizer( - prompt, - padding="max_length", - max_length=tokenizer.model_max_length, - truncation=True, - return_tensors="np", - ) - text_input_ids = text_inputs.input_ids - untruncated_ids = tokenizer(prompt, padding="longest", return_tensors="np").input_ids - - if untruncated_ids.shape[-1] >= text_input_ids.shape[-1] and not np.array_equal( - text_input_ids, untruncated_ids - ): - removed_text = tokenizer.batch_decode(untruncated_ids[:, tokenizer.model_max_length - 1 : -1]) - logger.warning( - "The following part of your input was truncated because CLIP can only handle sequences up to" - f" {tokenizer.model_max_length} tokens: {removed_text}" - ) - - prompt_embeds = text_encoder( - input_ids=text_input_ids.astype(text_encoder.input_dtype.get("input_ids", np.int32)) - ) - pooled_prompt_embeds = prompt_embeds[0] - prompt_embeds = prompt_embeds[-2] - prompt_embeds = np.repeat(prompt_embeds, num_images_per_prompt, axis=0) - prompt_embeds_list.append(prompt_embeds) - - prompt_embeds = np.concatenate(prompt_embeds_list, axis=-1) - - # get unconditional embeddings for classifier free guidance - zero_out_negative_prompt = negative_prompt is None and self.config["force_zeros_for_empty_prompt"] - if do_classifier_free_guidance and negative_prompt_embeds is None and zero_out_negative_prompt: - negative_prompt_embeds = np.zeros_like(prompt_embeds) - negative_pooled_prompt_embeds = np.zeros_like(pooled_prompt_embeds) - elif do_classifier_free_guidance and negative_prompt_embeds is None: - negative_prompt = negative_prompt or "" - if prompt is not None and type(prompt) is not type(negative_prompt): - raise TypeError( - f"`negative_prompt` should be the same type to `prompt`, but got {type(negative_prompt)} !=" - f" {type(prompt)}." - ) - elif isinstance(negative_prompt, str): - uncond_tokens = [negative_prompt] - elif batch_size != len(negative_prompt): - raise ValueError( - f"`negative_prompt`: {negative_prompt} has batch size {len(negative_prompt)}, but `prompt`:" - f" {prompt} has batch size {batch_size}. Please make sure that passed `negative_prompt` matches" - " the batch size of `prompt`." - ) - else: - uncond_tokens = negative_prompt - - negative_prompt_embeds_list = [] - for tokenizer, text_encoder in zip(tokenizers, text_encoders): - max_length = prompt_embeds.shape[1] - uncond_input = tokenizer( - uncond_tokens, - padding="max_length", - max_length=max_length, - truncation=True, - return_tensors="np", - ) - negative_prompt_embeds = text_encoder( - input_ids=uncond_input.input_ids.astype(text_encoder.input_dtype.get("input_ids", np.int32)) - ) - negative_pooled_prompt_embeds = negative_prompt_embeds[0] - negative_prompt_embeds = negative_prompt_embeds[-2] - - # duplicate unconditional embeddings for each generation per prompt, using mps friendly method - negative_prompt_embeds = np.repeat(negative_prompt_embeds, num_images_per_prompt, axis=0) - # For classifier free guidance, we need to do two forward passes. - # Here we concatenate the unconditional and text embeddings into a single batch - # to avoid doing two forward passes - negative_prompt_embeds_list.append(negative_prompt_embeds) - negative_prompt_embeds = np.concatenate(negative_prompt_embeds_list, axis=-1) - - pooled_prompt_embeds = np.repeat(pooled_prompt_embeds, num_images_per_prompt, axis=0) - negative_pooled_prompt_embeds = np.repeat(negative_pooled_prompt_embeds, num_images_per_prompt, axis=0) - - return prompt_embeds, negative_prompt_embeds, pooled_prompt_embeds, negative_pooled_prompt_embeds - - # Copied from diffusers.pipelines.stable_diffusion_xl.pipeline_stable_diffusion_xl.StableDiffusionXLPipeline.check_inputs - def check_inputs( - self, - prompt: Union[str, List[str]], - height: Optional[int], - width: Optional[int], - callback_steps: int, - negative_prompt: Optional[str] = None, - prompt_embeds: Optional[np.ndarray] = None, - negative_prompt_embeds: Optional[np.ndarray] = None, - pooled_prompt_embeds: Optional[np.ndarray] = None, - negative_pooled_prompt_embeds: Optional[np.ndarray] = None, - ): - if height % 8 != 0 or width % 8 != 0: - raise ValueError(f"`height` and `width` have to be divisible by 8 but are {height} and {width}.") - - if (callback_steps is None) or ( - callback_steps is not None and (not isinstance(callback_steps, int) or callback_steps <= 0) - ): - raise ValueError( - f"`callback_steps` has to be a positive integer but is {callback_steps} of type" - f" {type(callback_steps)}." - ) - - if prompt is not None and prompt_embeds is not None: - raise ValueError( - f"Cannot forward both `prompt`: {prompt} and `prompt_embeds`: {prompt_embeds}. Please make sure to" - " only forward one of the two." - ) - elif prompt is None and prompt_embeds is None: - raise ValueError( - "Provide either `prompt` or `prompt_embeds`. Cannot leave both `prompt` and `prompt_embeds` undefined." - ) - elif prompt is not None and (not isinstance(prompt, str) and not isinstance(prompt, list)): - raise ValueError(f"`prompt` has to be of type `str` or `list` but is {type(prompt)}") - - if negative_prompt is not None and negative_prompt_embeds is not None: - raise ValueError( - f"Cannot forward both `negative_prompt`: {negative_prompt} and `negative_prompt_embeds`:" - f" {negative_prompt_embeds}. Please make sure to only forward one of the two." - ) - - if prompt_embeds is not None and negative_prompt_embeds is not None: - if prompt_embeds.shape != negative_prompt_embeds.shape: - raise ValueError( - "`prompt_embeds` and `negative_prompt_embeds` must have the same shape when passed directly, but" - f" got: `prompt_embeds` {prompt_embeds.shape} != `negative_prompt_embeds`" - f" {negative_prompt_embeds.shape}." - ) - - if prompt_embeds is not None and pooled_prompt_embeds is None: - raise ValueError( - "If `prompt_embeds` are provided, `pooled_prompt_embeds` also have to be passed. Make sure to generate `pooled_prompt_embeds` from the same text encoder that was used to generate `prompt_embeds`." - ) - - if negative_prompt_embeds is not None and negative_pooled_prompt_embeds is None: - raise ValueError( - "If `negative_prompt_embeds` are provided, `negative_pooled_prompt_embeds` also have to be passed. Make sure to generate `negative_pooled_prompt_embeds` from the same text encoder that was used to generate `negative_prompt_embeds`." - ) - - # Adapted from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.prepare_latents - def prepare_latents(self, batch_size, num_channels_latents, height, width, dtype, generator, latents=None): - shape = (batch_size, num_channels_latents, height // self.vae_scale_factor, width // self.vae_scale_factor) - if isinstance(generator, list) and len(generator) != batch_size: - raise ValueError( - f"You have passed a list of generators of length {len(generator)}, but requested an effective batch" - f" size of {batch_size}. Make sure the batch size matches the length of the generators." - ) - - if latents is None: - latents = generator.randn(*shape).astype(dtype) - elif latents.shape != shape: - raise ValueError(f"Unexpected latents shape, got {latents.shape}, expected {shape}") - - # scale the initial noise by the standard deviation required by the scheduler - latents = latents * np.float64(self.scheduler.init_noise_sigma) - - return latents - - # Adapted from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.prepare_extra_step_kwargs - def prepare_extra_step_kwargs(self, generator, eta): - # prepare extra kwargs for the scheduler step, since not all schedulers have the same signature - # eta (η) is only used with the DDIMScheduler, it will be ignored for other schedulers. - # eta corresponds to η in DDIM paper: https://arxiv.org/abs/2010.02502 - # and should be between [0, 1] - - extra_step_kwargs = {} - - accepts_eta = "eta" in set(inspect.signature(self.scheduler.step).parameters.keys()) - if accepts_eta: - extra_step_kwargs["eta"] = eta - - return extra_step_kwargs - - # Adapted from diffusers.pipelines.stable_diffusion_xl.pipeline_stable_diffusion_xl.StableDiffusionXLPipeline.__call__ - def __call__( - self, - prompt: Optional[Union[str, List[str]]] = None, - height: Optional[int] = None, - width: Optional[int] = None, - num_inference_steps: int = 50, - guidance_scale: float = 5.0, - negative_prompt: Optional[Union[str, List[str]]] = None, - num_images_per_prompt: int = 1, - eta: float = 0.0, - generator: Optional[np.random.RandomState] = None, - latents: Optional[np.ndarray] = None, - prompt_embeds: Optional[np.ndarray] = None, - negative_prompt_embeds: Optional[np.ndarray] = None, - pooled_prompt_embeds: Optional[np.ndarray] = None, - negative_pooled_prompt_embeds: Optional[np.ndarray] = None, - output_type: str = "pil", - return_dict: bool = True, - callback: Optional[Callable[[int, int, np.ndarray], None]] = None, - callback_steps: int = 1, - cross_attention_kwargs: Optional[Dict[str, Any]] = None, - guidance_rescale: float = 0.0, - original_size: Optional[Tuple[int, int]] = None, - crops_coords_top_left: Tuple[int, int] = (0, 0), - target_size: Optional[Tuple[int, int]] = None, - ): - r""" - Function invoked when calling the pipeline for generation. - - Args: - prompt (`Optional[Union[str, List[str]]]`, defaults to None): - The prompt or prompts to guide the image generation. If not defined, one has to pass `prompt_embeds`. - instead. - height (`Optional[int]`, defaults to None): - The height in pixels of the generated image. - width (`Optional[int]`, defaults to None): - The width in pixels of the generated image. - num_inference_steps (`int`, defaults to 50): - The number of denoising steps. More denoising steps usually lead to a higher quality image at the - expense of slower inference. - guidance_scale (`float`, defaults to 5): - Guidance scale as defined in [Classifier-Free Diffusion Guidance](https://arxiv.org/abs/2207.12598). - `guidance_scale` is defined as `w` of equation 2. of [Imagen - Paper](https://arxiv.org/pdf/2205.11487.pdf). Guidance scale is enabled by setting `guidance_scale > - 1`. Higher guidance scale encourages to generate images that are closely linked to the text `prompt`, - usually at the expense of lower image quality. - negative_prompt (`Optional[Union[str, list]]`): - The prompt or prompts not to guide the image generation. If not defined, one has to pass - `negative_prompt_embeds`. instead. Ignored when not using guidance (i.e., ignored if `guidance_scale` - is less than `1`). - num_images_per_prompt (`int`, defaults to 1): - The number of images to generate per prompt. - eta (`float`, defaults to 0.0): - Corresponds to parameter eta (η) in the DDIM paper: https://arxiv.org/abs/2010.02502. Only applies to - [`schedulers.DDIMScheduler`], will be ignored for others. - generator (`Optional[np.random.RandomState]`, defaults to `None`):: - A np.random.RandomState to make generation deterministic. - latents (`Optional[np.ndarray]`, defaults to `None`): - Pre-generated noisy latents, sampled from a Gaussian distribution, to be used as inputs for image - generation. Can be used to tweak the same generation with different prompts. If not provided, a latents - tensor will ge generated by sampling using the supplied random `generator`. - prompt_embeds (`Optional[np.ndarray]`, defaults to `None`): - Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not - provided, text embeddings will be generated from `prompt` input argument. - negative_prompt_embeds (`Optional[np.ndarray]`, defaults to `None`): - Pre-generated negative text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt - weighting. If not provided, negative_prompt_embeds will be generated from `negative_prompt` input - argument. - output_type (`str`, defaults to `"pil"`): - The output format of the generate image. Choose between - [PIL](https://pillow.readthedocs.io/en/stable/): `PIL.Image.Image` or `np.array`. - return_dict (`bool`, defaults to `True`): - Whether or not to return a [`~pipelines.stable_diffusion.StableDiffusionXLPipelineOutput`] instead of a - plain tuple. - callback (Optional[Callable], defaults to `None`): - A function that will be called every `callback_steps` steps during inference. The function will be - called with the following arguments: `callback(step: int, timestep: int, latents: torch.FloatTensor)`. - callback_steps (`int`, defaults to 1): - The frequency at which the `callback` function will be called. If not specified, the callback will be - called at every step. - guidance_rescale (`float`, defaults to 0.7): - Guidance rescale factor proposed by [Common Diffusion Noise Schedules and Sample Steps are - Flawed](https://arxiv.org/pdf/2305.08891.pdf) `guidance_scale` is defined as `φ` in equation 16. of - [Common Diffusion Noise Schedules and Sample Steps are Flawed](https://arxiv.org/pdf/2305.08891.pdf). - Guidance rescale factor should fix overexposure when using zero terminal SNR. - - Returns: - [`~pipelines.stable_diffusion.StableDiffusionXLPipelineOutput`] or `tuple`: - [`~pipelines.stable_diffusion.StableDiffusionXLPipelineOutput`] if `return_dict` is True, otherwise a `tuple. - When returning a tuple, the first element is a list with the generated images, and the second element is a - list of `bool`s denoting whether the corresponding generated image likely represents "not-safe-for-work" - (nsfw) content, according to the `safety_checker`. - """ - - # 0. Default height and width to unet - height = height or self.unet.config["sample_size"] * self.vae_scale_factor - width = width or self.unet.config["sample_size"] * self.vae_scale_factor - - original_size = original_size or (height, width) - target_size = target_size or (height, width) - - # 1. Check inputs. Raise error if not correct - self.check_inputs( - prompt, - height, - width, - callback_steps, - negative_prompt, - prompt_embeds, - negative_prompt_embeds, - pooled_prompt_embeds, - negative_pooled_prompt_embeds, - ) - - # 2. Define call parameters - if isinstance(prompt, str): - batch_size = 1 - elif isinstance(prompt, list): - batch_size = len(prompt) - else: - batch_size = prompt_embeds.shape[0] - - if generator is None: - generator = np.random - - # here `guidance_scale` is defined analog to the guidance weight `w` of equation (2) - # of the Imagen paper: https://arxiv.org/pdf/2205.11487.pdf . `guidance_scale = 1` - # corresponds to doing no classifier free guidance. - do_classifier_free_guidance = guidance_scale > 1.0 - - # 3. Encode input prompt - ( - prompt_embeds, - negative_prompt_embeds, - pooled_prompt_embeds, - negative_pooled_prompt_embeds, - ) = self._encode_prompt( - prompt, - num_images_per_prompt, - do_classifier_free_guidance, - negative_prompt, - prompt_embeds=prompt_embeds, - negative_prompt_embeds=negative_prompt_embeds, - pooled_prompt_embeds=pooled_prompt_embeds, - negative_pooled_prompt_embeds=negative_pooled_prompt_embeds, - ) - - # 4. Prepare timesteps - self.scheduler.set_timesteps(num_inference_steps) - timesteps = self.scheduler.timesteps - - # 5. Prepare latent variables - latents = self.prepare_latents( - batch_size * num_images_per_prompt, - self.unet.config.get("in_channels", 4), - height, - width, - prompt_embeds.dtype, - generator, - latents, - ) - - # 6. Prepare extra step kwargs - extra_step_kwargs = self.prepare_extra_step_kwargs(generator, eta) - - # 7. Prepare added time ids & embeddings - add_text_embeds = pooled_prompt_embeds - add_time_ids = (original_size + crops_coords_top_left + target_size,) - add_time_ids = np.array(add_time_ids, dtype=prompt_embeds.dtype) - - if do_classifier_free_guidance: - prompt_embeds = np.concatenate((negative_prompt_embeds, prompt_embeds), axis=0) - add_text_embeds = np.concatenate((negative_pooled_prompt_embeds, add_text_embeds), axis=0) - add_time_ids = np.concatenate((add_time_ids, add_time_ids), axis=0) - add_time_ids = np.repeat(add_time_ids, batch_size * num_images_per_prompt, axis=0) - - # Adapted from diffusers to extend it for other runtimes than ORT - timestep_dtype = self.unet.input_dtype.get("timestep", np.float32) - - # 8. Denoising loop - num_warmup_steps = len(timesteps) - num_inference_steps * self.scheduler.order - for i, t in enumerate(self.progress_bar(timesteps)): - # expand the latents if we are doing classifier free guidance - latent_model_input = np.concatenate([latents] * 2) if do_classifier_free_guidance else latents - latent_model_input = self.scheduler.scale_model_input(torch.from_numpy(latent_model_input), t) - latent_model_input = latent_model_input.cpu().numpy() - - # predict the noise residual - timestep = np.array([t], dtype=timestep_dtype) - noise_pred = self.unet( - sample=latent_model_input, - timestep=timestep, - encoder_hidden_states=prompt_embeds, - text_embeds=add_text_embeds, - time_ids=add_time_ids, - ) - noise_pred = noise_pred[0] - - # perform guidance - if do_classifier_free_guidance: - noise_pred_uncond, noise_pred_text = np.split(noise_pred, 2) - noise_pred = noise_pred_uncond + guidance_scale * (noise_pred_text - noise_pred_uncond) - if guidance_rescale > 0.0: - # Based on 3.4. in https://arxiv.org/pdf/2305.08891.pdf - noise_pred = rescale_noise_cfg(noise_pred, noise_pred_text, guidance_rescale=guidance_rescale) - - # compute the previous noisy sample x_t -> x_t-1 - scheduler_output = self.scheduler.step( - torch.from_numpy(noise_pred), t, torch.from_numpy(latents), **extra_step_kwargs - ) - latents = scheduler_output.prev_sample.numpy() - - # call the callback, if provided - if i == len(timesteps) - 1 or ((i + 1) > num_warmup_steps and (i + 1) % self.scheduler.order == 0): - if callback is not None and i % callback_steps == 0: - callback(i, t, latents) - - if output_type == "latent": - image = latents - else: - latents /= self.vae_decoder.config.get("scaling_factor", 0.18215) - # it seems likes there is a strange result for using half-precision vae decoder if batchsize>1 - image = np.concatenate( - [self.vae_decoder(latent_sample=latents[i : i + 1])[0] for i in range(latents.shape[0])] - ) - # apply watermark if available - if self.watermark is not None: - image = self.watermark.apply_watermark(image) - image = self.image_processor.postprocess(image, output_type=output_type) - - if not return_dict: - return (image,) - - return StableDiffusionXLPipelineOutput(images=image) diff --git a/optimum/pipelines/diffusers/pipeline_stable_diffusion_xl_img2img.py b/optimum/pipelines/diffusers/pipeline_stable_diffusion_xl_img2img.py deleted file mode 100644 index a07903a735e..00000000000 --- a/optimum/pipelines/diffusers/pipeline_stable_diffusion_xl_img2img.py +++ /dev/null @@ -1,503 +0,0 @@ -# Copyright 2023 The HuggingFace Team. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import inspect -import logging -from typing import Any, Callable, Dict, List, Optional, Tuple, Union - -import numpy as np -import PIL -import torch -from diffusers.pipelines.stable_diffusion_xl import StableDiffusionXLPipelineOutput - -from .pipeline_utils import DiffusionPipelineMixin, rescale_noise_cfg - - -logger = logging.getLogger(__name__) - - -class StableDiffusionXLImg2ImgPipelineMixin(DiffusionPipelineMixin): - # Adapted from diffusers.pipelines.stable_diffusion_xl.pipeline_stable_diffusion_xl.StableDiffusionXLPipeline.encode_prompt - def _encode_prompt( - self, - prompt: Union[str, List[str]], - num_images_per_prompt: int, - do_classifier_free_guidance: bool, - negative_prompt: Optional[Union[str, list]], - prompt_embeds: Optional[np.ndarray] = None, - negative_prompt_embeds: Optional[np.ndarray] = None, - pooled_prompt_embeds: Optional[np.ndarray] = None, - negative_pooled_prompt_embeds: Optional[np.ndarray] = None, - ): - r""" - Encodes the prompt into text encoder hidden states. - - Args: - prompt (`Union[str, List[str]]`): - prompt to be encoded - num_images_per_prompt (`int`): - number of images that should be generated per prompt - do_classifier_free_guidance (`bool`): - whether to use classifier free guidance or not - negative_prompt (`Optional[Union[str, list]]`): - The prompt or prompts not to guide the image generation. Ignored when not using guidance (i.e., ignored - if `guidance_scale` is less than `1`). - prompt_embeds (`Optional[np.ndarray]`, defaults to `None`): - Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not - provided, text embeddings will be generated from `prompt` input argument. - negative_prompt_embeds (`Optional[np.ndarray]`, defaults to `None`): - Pre-generated negative text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt - weighting. If not provided, negative_prompt_embeds will be generated from `negative_prompt` input - argument. - pooled_prompt_embeds (`Optional[np.ndarray]`, defaults to `None`): - Pre-generated pooled text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. - If not provided, pooled text embeddings will be generated from `prompt` input argument. - negative_pooled_prompt_embeds (`Optional[np.ndarray]`, defaults to `None`): - Pre-generated negative pooled text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt - weighting. If not provided, pooled negative_prompt_embeds will be generated from `negative_prompt` - input argument. - """ - if isinstance(prompt, str): - batch_size = 1 - elif isinstance(prompt, list): - batch_size = len(prompt) - else: - batch_size = prompt_embeds.shape[0] - - # Define tokenizers and text encoders - tokenizers = [self.tokenizer, self.tokenizer_2] if self.tokenizer is not None else [self.tokenizer_2] - text_encoders = ( - [self.text_encoder, self.text_encoder_2] if self.text_encoder is not None else [self.text_encoder_2] - ) - - if prompt_embeds is None: - prompt_embeds_list = [] - for tokenizer, text_encoder in zip(tokenizers, text_encoders): - # get prompt text embeddings - text_inputs = tokenizer( - prompt, - padding="max_length", - max_length=tokenizer.model_max_length, - truncation=True, - return_tensors="np", - ) - text_input_ids = text_inputs.input_ids - untruncated_ids = tokenizer(prompt, padding="longest", return_tensors="np").input_ids - - if untruncated_ids.shape[-1] >= text_input_ids.shape[-1] and not np.array_equal( - text_input_ids, untruncated_ids - ): - removed_text = tokenizer.batch_decode(untruncated_ids[:, tokenizer.model_max_length - 1 : -1]) - logger.warning( - "The following part of your input was truncated because CLIP can only handle sequences up to" - f" {tokenizer.model_max_length} tokens: {removed_text}" - ) - - prompt_embeds = text_encoder( - input_ids=text_input_ids.astype(text_encoder.input_dtype.get("input_ids", np.int32)) - ) - pooled_prompt_embeds = prompt_embeds[0] - prompt_embeds = prompt_embeds[-2] - prompt_embeds = np.repeat(prompt_embeds, num_images_per_prompt, axis=0) - prompt_embeds_list.append(prompt_embeds) - - prompt_embeds = np.concatenate(prompt_embeds_list, axis=-1) - - # get unconditional embeddings for classifier free guidance - zero_out_negative_prompt = negative_prompt is None and self.config["force_zeros_for_empty_prompt"] - if do_classifier_free_guidance and negative_prompt_embeds is None and zero_out_negative_prompt: - negative_prompt_embeds = np.zeros_like(prompt_embeds) - negative_pooled_prompt_embeds = np.zeros_like(pooled_prompt_embeds) - elif do_classifier_free_guidance and negative_prompt_embeds is None: - negative_prompt = negative_prompt or "" - if prompt is not None and type(prompt) is not type(negative_prompt): - raise TypeError( - f"`negative_prompt` should be the same type to `prompt`, but got {type(negative_prompt)} !=" - f" {type(prompt)}." - ) - elif isinstance(negative_prompt, str): - uncond_tokens = [negative_prompt] - elif batch_size != len(negative_prompt): - raise ValueError( - f"`negative_prompt`: {negative_prompt} has batch size {len(negative_prompt)}, but `prompt`:" - f" {prompt} has batch size {batch_size}. Please make sure that passed `negative_prompt` matches" - " the batch size of `prompt`." - ) - else: - uncond_tokens = negative_prompt - - negative_prompt_embeds_list = [] - for tokenizer, text_encoder in zip(tokenizers, text_encoders): - max_length = prompt_embeds.shape[1] - uncond_input = tokenizer( - uncond_tokens, - padding="max_length", - max_length=max_length, - truncation=True, - return_tensors="np", - ) - - negative_prompt_embeds = text_encoder( - input_ids=uncond_input.input_ids.astype(text_encoder.input_dtype.get("input_ids", np.int32)) - ) - negative_pooled_prompt_embeds = negative_prompt_embeds[0] - negative_prompt_embeds = negative_prompt_embeds[-2] - # duplicate unconditional embeddings for each generation per prompt, using mps friendly method - negative_prompt_embeds = np.repeat(negative_prompt_embeds, num_images_per_prompt, axis=0) - # For classifier free guidance, we need to do two forward passes. - # Here we concatenate the unconditional and text embeddings into a single batch - # to avoid doing two forward passes - negative_prompt_embeds_list.append(negative_prompt_embeds) - negative_prompt_embeds = np.concatenate(negative_prompt_embeds_list, axis=-1) - - pooled_prompt_embeds = np.repeat(pooled_prompt_embeds, num_images_per_prompt, axis=0) - negative_pooled_prompt_embeds = np.repeat(negative_pooled_prompt_embeds, num_images_per_prompt, axis=0) - - return prompt_embeds, negative_prompt_embeds, pooled_prompt_embeds, negative_pooled_prompt_embeds - - # Copied from diffusers.pipelines.stable_diffusion_xl.pipeline_stable_diffusion_xl_img2img.StableDiffusionXLImg2ImgPipeline.check_inputs - def check_inputs( - self, - prompt: Union[str, List[str]], - strength: float, - callback_steps: int, - negative_prompt: Optional[str] = None, - prompt_embeds: Optional[np.ndarray] = None, - negative_prompt_embeds: Optional[np.ndarray] = None, - ): - if strength < 0 or strength > 1: - raise ValueError(f"The value of strength should in [0.0, 1.0] but is {strength}") - - if (callback_steps is None) or ( - callback_steps is not None and (not isinstance(callback_steps, int) or callback_steps <= 0) - ): - raise ValueError( - f"`callback_steps` has to be a positive integer but is {callback_steps} of type" - f" {type(callback_steps)}." - ) - - if prompt is not None and prompt_embeds is not None: - raise ValueError( - f"Cannot forward both `prompt`: {prompt} and `prompt_embeds`: {prompt_embeds}. Please make sure to" - " only forward one of the two." - ) - elif prompt is None and prompt_embeds is None: - raise ValueError( - "Provide either `prompt` or `prompt_embeds`. Cannot leave both `prompt` and `prompt_embeds` undefined." - ) - elif prompt is not None and (not isinstance(prompt, str) and not isinstance(prompt, list)): - raise ValueError(f"`prompt` has to be of type `str` or `list` but is {type(prompt)}") - - if negative_prompt is not None and negative_prompt_embeds is not None: - raise ValueError( - f"Cannot forward both `negative_prompt`: {negative_prompt} and `negative_prompt_embeds`:" - f" {negative_prompt_embeds}. Please make sure to only forward one of the two." - ) - - if prompt_embeds is not None and negative_prompt_embeds is not None: - if prompt_embeds.shape != negative_prompt_embeds.shape: - raise ValueError( - "`prompt_embeds` and `negative_prompt_embeds` must have the same shape when passed directly, but" - f" got: `prompt_embeds` {prompt_embeds.shape} != `negative_prompt_embeds`" - f" {negative_prompt_embeds.shape}." - ) - - def get_timesteps(self, num_inference_steps, strength): - # get the original timestep using init_timestep - init_timestep = min(int(num_inference_steps * strength), num_inference_steps) - t_start = max(num_inference_steps - init_timestep, 0) - timesteps = self.scheduler.timesteps[t_start * self.scheduler.order :].numpy() - - return timesteps, num_inference_steps - t_start - - # Adapted from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.prepare_latents - def prepare_latents(self, image, timestep, batch_size, num_images_per_prompt, dtype, generator=None): - batch_size = batch_size * num_images_per_prompt - - if image.shape[1] == 4: - init_latents = image - else: - init_latents = self.vae_encoder(sample=image)[0] * self.vae_decoder.config.get("scaling_factor", 0.18215) - - if batch_size > init_latents.shape[0] and batch_size % init_latents.shape[0] == 0: - # expand init_latents for batch_size - additional_image_per_prompt = batch_size // init_latents.shape[0] - init_latents = np.concatenate([init_latents] * additional_image_per_prompt, axis=0) - elif batch_size > init_latents.shape[0] and batch_size % init_latents.shape[0] != 0: - raise ValueError( - f"Cannot duplicate `image` of batch size {init_latents.shape[0]} to {batch_size} text prompts." - ) - else: - init_latents = np.concatenate([init_latents], axis=0) - - # add noise to latents using the timesteps - noise = generator.randn(*init_latents.shape).astype(dtype) - init_latents = self.scheduler.add_noise( - torch.from_numpy(init_latents), torch.from_numpy(noise), torch.from_numpy(timestep) - ) - return init_latents.numpy() - - def _get_add_time_ids( - self, original_size, crops_coords_top_left, target_size, aesthetic_score, negative_aesthetic_score, dtype - ): - if self.config.get("requires_aesthetics_score"): - add_time_ids = (original_size + crops_coords_top_left + (aesthetic_score,),) - add_neg_time_ids = (original_size + crops_coords_top_left + (negative_aesthetic_score,),) - else: - add_time_ids = (original_size + crops_coords_top_left + target_size,) - add_neg_time_ids = (original_size + crops_coords_top_left + target_size,) - - add_time_ids = np.array(add_time_ids, dtype=dtype) - add_neg_time_ids = np.array(add_neg_time_ids, dtype=dtype) - - return add_time_ids, add_neg_time_ids - - # Adapted from diffusers.pipelines.stable_diffusion_xl.pipeline_stable_diffusion_xl.StableDiffusionXLPipeline.__call__ - def __call__( - self, - prompt: Optional[Union[str, List[str]]] = None, - image: Union[np.ndarray, PIL.Image.Image] = None, - strength: float = 0.3, - num_inference_steps: int = 50, - guidance_scale: float = 5.0, - negative_prompt: Optional[Union[str, List[str]]] = None, - num_images_per_prompt: int = 1, - eta: float = 0.0, - generator: Optional[np.random.RandomState] = None, - latents: Optional[np.ndarray] = None, - prompt_embeds: Optional[np.ndarray] = None, - negative_prompt_embeds: Optional[np.ndarray] = None, - pooled_prompt_embeds: Optional[np.ndarray] = None, - negative_pooled_prompt_embeds: Optional[np.ndarray] = None, - output_type: str = "pil", - return_dict: bool = True, - callback: Optional[Callable[[int, int, np.ndarray], None]] = None, - callback_steps: int = 1, - cross_attention_kwargs: Optional[Dict[str, Any]] = None, - guidance_rescale: float = 0.0, - original_size: Optional[Tuple[int, int]] = None, - crops_coords_top_left: Tuple[int, int] = (0, 0), - target_size: Optional[Tuple[int, int]] = None, - aesthetic_score: float = 6.0, - negative_aesthetic_score: float = 2.5, - ): - r""" - Function invoked when calling the pipeline for generation. - - Args: - prompt (`Optional[Union[str, List[str]]]`, defaults to None): - The prompt or prompts to guide the image generation. If not defined, one has to pass `prompt_embeds`. - instead. - image (`Union[np.ndarray, PIL.Image.Image]`): - `Image`, or tensor representing an image batch which will be upscaled. - strength (`float`, defaults to 0.8): - Conceptually, indicates how much to transform the reference `image`. Must be between 0 and 1. `image` - will be used as a starting point, adding more noise to it the larger the `strength`. The number of - denoising steps depends on the amount of noise initially added. When `strength` is 1, added noise will - be maximum and the denoising process will run for the full number of iterations specified in - `num_inference_steps`. A value of 1, therefore, essentially ignores `image`. - num_inference_steps (`int`, defaults to 50): - The number of denoising steps. More denoising steps usually lead to a higher quality image at the - expense of slower inference. - guidance_scale (`float`, defaults to 5): - Guidance scale as defined in [Classifier-Free Diffusion Guidance](https://arxiv.org/abs/2207.12598). - `guidance_scale` is defined as `w` of equation 2. of [Imagen - Paper](https://arxiv.org/pdf/2205.11487.pdf). Guidance scale is enabled by setting `guidance_scale > - 1`. Higher guidance scale encourages to generate images that are closely linked to the text `prompt`, - usually at the expense of lower image quality. - negative_prompt (`Optional[Union[str, list]]`): - The prompt or prompts not to guide the image generation. If not defined, one has to pass - `negative_prompt_embeds`. instead. Ignored when not using guidance (i.e., ignored if `guidance_scale` - is less than `1`). - num_images_per_prompt (`int`, defaults to 1): - The number of images to generate per prompt. - eta (`float`, defaults to 0.0): - Corresponds to parameter eta (η) in the DDIM paper: https://arxiv.org/abs/2010.02502. Only applies to - [`schedulers.DDIMScheduler`], will be ignored for others. - generator (`Optional[np.random.RandomState]`, defaults to `None`):: - A np.random.RandomState to make generation deterministic. - latents (`Optional[np.ndarray]`, defaults to `None`): - Pre-generated noisy latents, sampled from a Gaussian distribution, to be used as inputs for image - generation. Can be used to tweak the same generation with different prompts. If not provided, a latents - tensor will ge generated by sampling using the supplied random `generator`. - prompt_embeds (`Optional[np.ndarray]`, defaults to `None`): - Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not - provided, text embeddings will be generated from `prompt` input argument. - negative_prompt_embeds (`Optional[np.ndarray]`, defaults to `None`): - Pre-generated negative text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt - weighting. If not provided, negative_prompt_embeds will be generated from `negative_prompt` input - argument. - output_type (`str`, defaults to `"pil"`): - The output format of the generate image. Choose between - [PIL](https://pillow.readthedocs.io/en/stable/): `PIL.Image.Image` or `np.array`. - return_dict (`bool`, defaults to `True`): - Whether or not to return a [`~pipelines.stable_diffusion.StableDiffusionXLPipelineOutput`] instead of a - plain tuple. - callback (Optional[Callable], defaults to `None`): - A function that will be called every `callback_steps` steps during inference. The function will be - called with the following arguments: `callback(step: int, timestep: int, latents: torch.FloatTensor)`. - callback_steps (`int`, defaults to 1): - The frequency at which the `callback` function will be called. If not specified, the callback will be - called at every step. - guidance_rescale (`float`, defaults to 0.7): - Guidance rescale factor proposed by [Common Diffusion Noise Schedules and Sample Steps are - Flawed](https://arxiv.org/pdf/2305.08891.pdf) `guidance_scale` is defined as `φ` in equation 16. of - [Common Diffusion Noise Schedules and Sample Steps are Flawed](https://arxiv.org/pdf/2305.08891.pdf). - Guidance rescale factor should fix overexposure when using zero terminal SNR. - - Returns: - [`~pipelines.stable_diffusion.StableDiffusionXLPipelineOutput`] or `tuple`: - [`~pipelines.stable_diffusion.StableDiffusionXLPipelineOutput`] if `return_dict` is True, otherwise a `tuple. - When returning a tuple, the first element is a list with the generated images, and the second element is a - list of `bool`s denoting whether the corresponding generated image likely represents "not-safe-for-work" - (nsfw) content, according to the `safety_checker`. - """ - # 0. Check inputs. Raise error if not correct - self.check_inputs(prompt, strength, callback_steps, negative_prompt, prompt_embeds, negative_prompt_embeds) - - # 1. Define call parameters - if isinstance(prompt, str): - batch_size = 1 - elif isinstance(prompt, list): - batch_size = len(prompt) - else: - batch_size = prompt_embeds.shape[0] - - if generator is None: - generator = np.random - - # here `guidance_scale` is defined analog to the guidance weight `w` of equation (2) - # of the Imagen paper: https://arxiv.org/pdf/2205.11487.pdf . `guidance_scale = 1` - # corresponds to doing no classifier free guidance. - do_classifier_free_guidance = guidance_scale > 1.0 - - # 2. Encode input prompt - ( - prompt_embeds, - negative_prompt_embeds, - pooled_prompt_embeds, - negative_pooled_prompt_embeds, - ) = self._encode_prompt( - prompt, - num_images_per_prompt, - do_classifier_free_guidance, - negative_prompt, - prompt_embeds=prompt_embeds, - negative_prompt_embeds=negative_prompt_embeds, - pooled_prompt_embeds=pooled_prompt_embeds, - negative_pooled_prompt_embeds=negative_pooled_prompt_embeds, - ) - - # 3. Preprocess image - image = self.image_processor.preprocess(image) - - # 4. Prepare timesteps - self.scheduler.set_timesteps(num_inference_steps) - - timesteps, num_inference_steps = self.get_timesteps(num_inference_steps, strength) - latent_timestep = np.repeat(timesteps[:1], batch_size * num_images_per_prompt, axis=0) - timestep_dtype = self.unet.input_dtype.get("timestep", np.float32) - - latents_dtype = prompt_embeds.dtype - image = image.astype(latents_dtype) - - # 5. Prepare latent variables - latents = self.prepare_latents( - image, latent_timestep, batch_size, num_images_per_prompt, latents_dtype, generator - ) - - # 6. Prepare extra step kwargs - extra_step_kwargs = {} - accepts_eta = "eta" in set(inspect.signature(self.scheduler.step).parameters.keys()) - if accepts_eta: - extra_step_kwargs["eta"] = eta - - height, width = latents.shape[-2:] - height = height * self.vae_scale_factor - width = width * self.vae_scale_factor - original_size = original_size or (height, width) - target_size = target_size or (height, width) - - # 8. Prepare added time ids & embeddings - add_text_embeds = pooled_prompt_embeds - add_time_ids, add_neg_time_ids = self._get_add_time_ids( - original_size, - crops_coords_top_left, - target_size, - aesthetic_score, - negative_aesthetic_score, - dtype=prompt_embeds.dtype, - ) - - if do_classifier_free_guidance: - prompt_embeds = np.concatenate((negative_prompt_embeds, prompt_embeds), axis=0) - add_text_embeds = np.concatenate((negative_pooled_prompt_embeds, add_text_embeds), axis=0) - add_time_ids = np.concatenate((add_time_ids, add_time_ids), axis=0) - add_time_ids = np.repeat(add_time_ids, batch_size * num_images_per_prompt, axis=0) - - # 8. Denoising loop - num_warmup_steps = len(timesteps) - num_inference_steps * self.scheduler.order - for i, t in enumerate(self.progress_bar(timesteps)): - # expand the latents if we are doing classifier free guidance - latent_model_input = np.concatenate([latents] * 2) if do_classifier_free_guidance else latents - latent_model_input = self.scheduler.scale_model_input(torch.from_numpy(latent_model_input), t) - latent_model_input = latent_model_input.cpu().numpy() - - # predict the noise residual - timestep = np.array([t], dtype=timestep_dtype) - noise_pred = self.unet( - sample=latent_model_input, - timestep=timestep, - encoder_hidden_states=prompt_embeds, - text_embeds=add_text_embeds, - time_ids=add_time_ids, - ) - noise_pred = noise_pred[0] - - # perform guidance - if do_classifier_free_guidance: - noise_pred_uncond, noise_pred_text = np.split(noise_pred, 2) - noise_pred = noise_pred_uncond + guidance_scale * (noise_pred_text - noise_pred_uncond) - if guidance_rescale > 0.0: - # Based on 3.4. in https://arxiv.org/pdf/2305.08891.pdf - noise_pred = rescale_noise_cfg(noise_pred, noise_pred_text, guidance_rescale=guidance_rescale) - - # compute the previous noisy sample x_t -> x_t-1 - scheduler_output = self.scheduler.step( - torch.from_numpy(noise_pred), t, torch.from_numpy(latents), **extra_step_kwargs - ) - latents = scheduler_output.prev_sample.numpy() - - # call the callback, if provided - if i == len(timesteps) - 1 or ((i + 1) > num_warmup_steps and (i + 1) % self.scheduler.order == 0): - if callback is not None and i % callback_steps == 0: - callback(i, t, latents) - - if output_type == "latent": - image = latents - else: - latents /= self.vae_decoder.config.get("scaling_factor", 0.18215) - # it seems likes there is a strange result for using half-precision vae decoder if batchsize>1 - image = np.concatenate( - [self.vae_decoder(latent_sample=latents[i : i + 1])[0] for i in range(latents.shape[0])] - ) - # apply watermark if available - if self.watermark is not None: - image = self.watermark.apply_watermark(image) - image = self.image_processor.postprocess(image, output_type=output_type) - - if not return_dict: - return (image,) - - return StableDiffusionXLPipelineOutput(images=image) diff --git a/optimum/pipelines/diffusers/pipeline_utils.py b/optimum/pipelines/diffusers/pipeline_utils.py deleted file mode 100644 index 869b91ffe59..00000000000 --- a/optimum/pipelines/diffusers/pipeline_utils.py +++ /dev/null @@ -1,282 +0,0 @@ -# Copyright 2023 The HuggingFace Team. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - - -import warnings -from typing import List, Optional, Union - -import numpy as np -import PIL -import torch -from diffusers import ConfigMixin -from diffusers.image_processor import VaeImageProcessor as DiffusersVaeImageProcessor -from diffusers.utils.pil_utils import PIL_INTERPOLATION -from PIL import Image -from tqdm.auto import tqdm - - -class DiffusionPipelineMixin(ConfigMixin): - # Copied from https://github.com/huggingface/diffusers/blob/v0.12.1/src/diffusers/pipelines/pipeline_utils.py#L812 - @staticmethod - def numpy_to_pil(images): - """ - Converts a numpy image or a batch of images to a PIL image. - """ - if images.ndim == 3: - images = images[None, ...] - images = (images * 255).round().astype("uint8") - if images.shape[-1] == 1: - # special case for grayscale (single channel) images - pil_images = [Image.fromarray(image.squeeze(), mode="L") for image in images] - else: - pil_images = [Image.fromarray(image) for image in images] - - return pil_images - - # Copied from https://github.com/huggingface/diffusers/blob/v0.12.1/src/diffusers/pipelines/pipeline_utils.py#L827 - def progress_bar(self, iterable=None, total=None): - if not hasattr(self, "_progress_bar_config"): - self._progress_bar_config = {} - elif not isinstance(self._progress_bar_config, dict): - raise ValueError( - f"`self._progress_bar_config` should be of type `dict`, but is {type(self._progress_bar_config)}." - ) - - if iterable is not None: - return tqdm(iterable, **self._progress_bar_config) - elif total is not None: - return tqdm(total=total, **self._progress_bar_config) - else: - raise ValueError("Either `total` or `iterable` has to be defined.") - - -# Adapted from https://github.com/huggingface/diffusers/blob/v0.18.1/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py#L58 -def rescale_noise_cfg(noise_cfg, noise_pred_text, guidance_rescale=0.0): - """ - Rescale `noise_cfg` according to `guidance_rescale`. Based on findings of [Common Diffusion Noise Schedules and - Sample Steps are Flawed](https://arxiv.org/pdf/2305.08891.pdf). See Section 3.4 - """ - std_text = np.std(noise_pred_text, axis=tuple(range(1, noise_pred_text.ndim)), keepdims=True) - std_cfg = np.std(noise_cfg, axis=tuple(range(1, noise_cfg.ndim)), keepdims=True) - # rescale the results from guidance (fixes overexposure) - noise_pred_rescaled = noise_cfg * (std_text / std_cfg) - # mix with the original results from guidance by factor guidance_rescale to avoid "plain looking" images - noise_cfg = guidance_rescale * noise_pred_rescaled + (1 - guidance_rescale) * noise_cfg - return noise_cfg - - -class VaeImageProcessor(DiffusersVaeImageProcessor): - # Adapted from diffusers.VaeImageProcessor.denormalize - @staticmethod - def denormalize(images: np.ndarray): - """ - Denormalize an image array to [0,1]. - """ - return np.clip(images / 2 + 0.5, 0, 1) - - # Adapted from diffusers.VaeImageProcessor.preprocess - def preprocess( - self, - image: Union[torch.FloatTensor, PIL.Image.Image, np.ndarray], - height: Optional[int] = None, - width: Optional[int] = None, - ) -> np.ndarray: - """ - Preprocess the image input. Accepted formats are PIL images, NumPy arrays or PyTorch tensors. - """ - supported_formats = (PIL.Image.Image, np.ndarray, torch.Tensor) - - do_convert_grayscale = getattr(self.config, "do_convert_grayscale", False) - # Expand the missing dimension for 3-dimensional pytorch tensor or numpy array that represents grayscale image - if do_convert_grayscale and isinstance(image, (torch.Tensor, np.ndarray)) and image.ndim == 3: - if isinstance(image, torch.Tensor): - # if image is a pytorch tensor could have 2 possible shapes: - # 1. batch x height x width: we should insert the channel dimension at position 1 - # 2. channnel x height x width: we should insert batch dimension at position 0, - # however, since both channel and batch dimension has same size 1, it is same to insert at position 1 - # for simplicity, we insert a dimension of size 1 at position 1 for both cases - image = image.unsqueeze(1) - else: - # if it is a numpy array, it could have 2 possible shapes: - # 1. batch x height x width: insert channel dimension on last position - # 2. height x width x channel: insert batch dimension on first position - if image.shape[-1] == 1: - image = np.expand_dims(image, axis=0) - else: - image = np.expand_dims(image, axis=-1) - - if isinstance(image, supported_formats): - image = [image] - elif not (isinstance(image, list) and all(isinstance(i, supported_formats) for i in image)): - raise ValueError( - f"Input is in incorrect format: {[type(i) for i in image]}. Currently, we only support {', '.join(supported_formats)}" - ) - - if isinstance(image[0], PIL.Image.Image): - if self.config.do_convert_rgb: - image = [self.convert_to_rgb(i) for i in image] - elif do_convert_grayscale: - image = [self.convert_to_grayscale(i) for i in image] - if self.config.do_resize: - height, width = self.get_height_width(image[0], height, width) - image = [self.resize(i, height, width) for i in image] - image = self.reshape(self.pil_to_numpy(image)) - else: - if isinstance(image[0], torch.Tensor): - image = [self.pt_to_numpy(elem) for elem in image] - image = np.concatenate(image, axis=0) if image[0].ndim == 4 else np.stack(image, axis=0) - else: - image = self.reshape(np.concatenate(image, axis=0) if image[0].ndim == 4 else np.stack(image, axis=0)) - - if do_convert_grayscale and image.ndim == 3: - image = np.expand_dims(image, 1) - - # don't need any preprocess if the image is latents - if image.shape[1] == 4: - return image - - if self.config.do_resize: - height, width = self.get_height_width(image, height, width) - image = self.resize(image, height, width) - - # expected range [0,1], normalize to [-1,1] - do_normalize = self.config.do_normalize - if image.min() < 0 and do_normalize: - warnings.warn( - "Passing `image` as torch tensor with value range in [-1,1] is deprecated. The expected value range for image tensor is [0,1] " - f"when passing as pytorch tensor or numpy Array. You passed `image` with value range [{image.min()},{image.max()}]", - FutureWarning, - ) - do_normalize = False - - if do_normalize: - image = self.normalize(image) - - if getattr(self.config, "do_binarize", False): - image = self.binarize(image) - - return image - - # Adapted from diffusers.VaeImageProcessor.postprocess - def postprocess( - self, - image: np.ndarray, - output_type: str = "pil", - do_denormalize: Optional[List[bool]] = None, - ): - if not isinstance(image, np.ndarray): - raise ValueError( - f"Input for postprocessing is in incorrect format: {type(image)}. We only support np array" - ) - if output_type not in ["latent", "np", "pil"]: - deprecation_message = ( - f"the output_type {output_type} is outdated and has been set to `np`. Please make sure to set it to one of these instead: " - "`pil`, `np`, `pt`, `latent`" - ) - warnings.warn(deprecation_message, FutureWarning) - output_type = "np" - - if output_type == "latent": - return image - - if do_denormalize is None: - do_denormalize = [self.config.do_normalize] * image.shape[0] - - image = np.stack( - [self.denormalize(image[i]) if do_denormalize[i] else image[i] for i in range(image.shape[0])], axis=0 - ) - - image = image.transpose((0, 2, 3, 1)) - - if output_type == "pil": - image = self.numpy_to_pil(image) - - return image - - def get_height_width( - self, - image: [PIL.Image.Image, np.ndarray], - height: Optional[int] = None, - width: Optional[int] = None, - ): - """ - This function return the height and width that are downscaled to the next integer multiple of - `vae_scale_factor`. - - Args: - image(`PIL.Image.Image`, `np.ndarray`): - The image input, can be a PIL image, numpy array or pytorch tensor. if it is a numpy array, should have - shape `[batch, height, width]` or `[batch, height, width, channel]` if it is a pytorch tensor, should - have shape `[batch, channel, height, width]`. - height (`int`, *optional*, defaults to `None`): - The height in preprocessed image. If `None`, will use the height of `image` input. - width (`int`, *optional*`, defaults to `None`): - The width in preprocessed. If `None`, will use the width of the `image` input. - """ - height = height or (image.height if isinstance(image, PIL.Image.Image) else image.shape[-2]) - width = width or (image.width if isinstance(image, PIL.Image.Image) else image.shape[-1]) - # resize to integer multiple of vae_scale_factor - width, height = (x - x % self.config.vae_scale_factor for x in (width, height)) - return height, width - - # Adapted from diffusers.VaeImageProcessor.numpy_to_pt - @staticmethod - def numpy_to_pt(images: np.ndarray) -> torch.FloatTensor: - """ - Convert a NumPy image to a PyTorch tensor. - """ - if images.ndim == 3: - images = images[..., None] - - images = torch.from_numpy(images) - return images - - # Adapted from diffusers.VaeImageProcessor.pt_to_numpy - @staticmethod - def pt_to_numpy(images: torch.FloatTensor) -> np.ndarray: - """ - Convert a PyTorch tensor to a NumPy image. - """ - images = images.cpu().float().numpy() - return images - - @staticmethod - def reshape(images: np.ndarray) -> np.ndarray: - """ - Reshape inputs to expected shape. - """ - if images.ndim == 3: - images = images[..., None] - - return images.transpose(0, 3, 1, 2) - - # TODO : remove after diffusers v0.21.0 release - def resize( - self, - image: [PIL.Image.Image, np.ndarray, torch.Tensor], - height: Optional[int] = None, - width: Optional[int] = None, - ) -> [PIL.Image.Image, np.ndarray, torch.Tensor]: - """ - Resize image. - """ - if isinstance(image, PIL.Image.Image): - image = image.resize((width, height), resample=PIL_INTERPOLATION[self.config.resample]) - elif isinstance(image, torch.Tensor): - image = torch.nn.functional.interpolate(image, size=(height, width)) - elif isinstance(image, np.ndarray): - image = self.numpy_to_pt(image) - image = torch.nn.functional.interpolate(image, size=(height, width)) - image = self.pt_to_numpy(image) - return image diff --git a/optimum/pipelines/diffusers/watermark.py b/optimum/pipelines/diffusers/watermark.py deleted file mode 100644 index b3cd622edac..00000000000 --- a/optimum/pipelines/diffusers/watermark.py +++ /dev/null @@ -1,31 +0,0 @@ -import numpy as np -from imwatermark import WatermarkEncoder - - -WATERMARK_MESSAGE = 0b101100111110110010010000011110111011000110011110 -WATERMARK_BITS = [int(bit) for bit in bin(WATERMARK_MESSAGE)[2:]] - - -# Adapted from https://github.com/huggingface/diffusers/blob/v0.18.1/src/diffusers/pipelines/stable_diffusion_xl/watermark.py#L12 -class StableDiffusionXLWatermarker: - def __init__(self): - self.watermark = WATERMARK_BITS - self.encoder = WatermarkEncoder() - self.encoder.set_watermark("bits", self.watermark) - - def apply_watermark(self, images: np.array): - # can't encode images that are smaller than 256 - if images.shape[-1] < 256: - return images - - # cv2 doesn't support float16 - if images.dtype == np.float16: - images = images.astype(np.float32) - - images = (255 * (images / 2 + 0.5)).transpose((0, 2, 3, 1)) - - images = np.array([self.encoder.encode(image, "dwtDct") for image in images]).transpose((0, 3, 1, 2)) - - np.clip(2 * (images / 255 - 0.5), -1.0, 1.0, out=images) - - return images diff --git a/optimum/pipelines/pipelines_base.py b/optimum/pipelines/pipelines_base.py index a08ab8782a3..7690143f13f 100644 --- a/optimum/pipelines/pipelines_base.py +++ b/optimum/pipelines/pipelines_base.py @@ -24,6 +24,7 @@ FillMaskPipeline, ImageClassificationPipeline, ImageSegmentationPipeline, + ImageToImagePipeline, ImageToTextPipeline, Pipeline, PreTrainedTokenizer, @@ -55,6 +56,7 @@ ORTModelForCausalLM, ORTModelForFeatureExtraction, ORTModelForImageClassification, + ORTModelForImageToImage, ORTModelForMaskedLM, ORTModelForQuestionAnswering, ORTModelForSemanticSegmentation, @@ -157,6 +159,12 @@ "default": "superb/hubert-base-superb-ks", "type": "audio", }, + "image-to-image": { + "impl": ImageToImagePipeline, + "class": (ORTModelForImageToImage,), + "default": "caidas/swin2SR-classical-sr-x2-64", + "type": "image", + }, } else: ORT_SUPPORTED_TASKS = {} diff --git a/optimum/runs_base.py b/optimum/runs_base.py index 3a1d164c602..dadd445818f 100644 --- a/optimum/runs_base.py +++ b/optimum/runs_base.py @@ -2,13 +2,12 @@ import subprocess from contextlib import contextmanager from time import perf_counter_ns -from typing import Set +from typing import TYPE_CHECKING, Set import numpy as np import optuna import torch import transformers -from datasets import Dataset from tqdm import trange from . import version as optimum_version @@ -21,6 +20,9 @@ from .utils.runs import RunConfig, cpu_info_command +if TYPE_CHECKING: + from datasets import Dataset + os.environ["TOKENIZERS_PARALLELISM"] = "false" @@ -34,7 +36,7 @@ def get_autoclass_name(task): class Calibrator: def __init__( - self, calibration_dataset: Dataset, quantizer, model_path, qconfig, calibration_params, node_exclusion + self, calibration_dataset: "Dataset", quantizer, model_path, qconfig, calibration_params, node_exclusion ): self.calibration_dataset = calibration_dataset self.quantizer = quantizer diff --git a/optimum/utils/__init__.py b/optimum/utils/__init__.py index 5d5044e63e1..2aa90253d08 100644 --- a/optimum/utils/__init__.py +++ b/optimum/utils/__init__.py @@ -16,7 +16,9 @@ from .constant import ( CONFIG_NAME, DIFFUSION_MODEL_TEXT_ENCODER_2_SUBFOLDER, + DIFFUSION_MODEL_TEXT_ENCODER_3_SUBFOLDER, DIFFUSION_MODEL_TEXT_ENCODER_SUBFOLDER, + DIFFUSION_MODEL_TRANSFORMER_SUBFOLDER, DIFFUSION_MODEL_UNET_SUBFOLDER, DIFFUSION_MODEL_VAE_DECODER_SUBFOLDER, DIFFUSION_MODEL_VAE_ENCODER_SUBFOLDER, @@ -29,9 +31,11 @@ TRANSFORMERS_MINIMUM_VERSION, check_if_diffusers_greater, check_if_pytorch_greater, + check_if_torch_greater, check_if_transformers_greater, is_accelerate_available, is_auto_gptq_available, + is_datasets_available, is_diffusers_available, is_onnx_available, is_onnxruntime_available, @@ -49,8 +53,11 @@ DummyAudioInputGenerator, DummyBboxInputGenerator, DummyCodegenDecoderTextInputGenerator, + DummyDecisionTransformerInputGenerator, DummyDecoderTextInputGenerator, DummyEncodecInputGenerator, + DummyFluxTransformerTextInputGenerator, + DummyFluxTransformerVisionInputGenerator, DummyInputGenerator, DummyIntGenerator, DummyLabelsGenerator, @@ -62,6 +69,9 @@ DummySpeechT5InputGenerator, DummyTextInputGenerator, DummyTimestepInputGenerator, + DummyTransformerTextInputGenerator, + DummyTransformerTimestepInputGenerator, + DummyTransformerVisionInputGenerator, DummyVisionEmbeddingsGenerator, DummyVisionEncoderDecoderPastKeyValuesGenerator, DummyVisionInputGenerator, diff --git a/optimum/utils/constant.py b/optimum/utils/constant.py index 4497b5246d4..eb7a67e9ece 100644 --- a/optimum/utils/constant.py +++ b/optimum/utils/constant.py @@ -15,8 +15,10 @@ CONFIG_NAME = "config.json" DIFFUSION_MODEL_UNET_SUBFOLDER = "unet" -DIFFUSION_MODEL_TEXT_ENCODER_SUBFOLDER = "text_encoder" +DIFFUSION_MODEL_TRANSFORMER_SUBFOLDER = "transformer" DIFFUSION_MODEL_VAE_DECODER_SUBFOLDER = "vae_decoder" DIFFUSION_MODEL_VAE_ENCODER_SUBFOLDER = "vae_encoder" +DIFFUSION_MODEL_TEXT_ENCODER_SUBFOLDER = "text_encoder" DIFFUSION_MODEL_TEXT_ENCODER_2_SUBFOLDER = "text_encoder_2" +DIFFUSION_MODEL_TEXT_ENCODER_3_SUBFOLDER = "text_encoder_3" ONNX_WEIGHTS_NAME = "model.onnx" diff --git a/optimum/utils/dummy_diffusers_objects.py b/optimum/utils/dummy_diffusers_objects.py index f6914bbcd3a..ff8b587e19f 100644 --- a/optimum/utils/dummy_diffusers_objects.py +++ b/optimum/utils/dummy_diffusers_objects.py @@ -15,6 +15,50 @@ from .import_utils import DummyObject, requires_backends +class ORTDiffusionPipeline(metaclass=DummyObject): + _backends = ["diffusers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["diffusers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["diffusers"]) + + +class ORTPipelineForText2Image(metaclass=DummyObject): + _backends = ["diffusers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["diffusers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["diffusers"]) + + +class ORTPipelineForImage2Image(metaclass=DummyObject): + _backends = ["diffusers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["diffusers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["diffusers"]) + + +class ORTPipelineForInpainting(metaclass=DummyObject): + _backends = ["diffusers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["diffusers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["diffusers"]) + + class ORTStableDiffusionPipeline(metaclass=DummyObject): _backends = ["diffusers"] @@ -70,6 +114,17 @@ def from_pretrained(cls, *args, **kwargs): requires_backends(cls, ["diffusers"]) +class ORTStableDiffusionXLInpaintPipeline(metaclass=DummyObject): + _backends = ["diffusers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["diffusers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["diffusers"]) + + class ORTLatentConsistencyModelPipeline(metaclass=DummyObject): _backends = ["diffusers"] @@ -79,3 +134,58 @@ def __init__(self, *args, **kwargs): @classmethod def from_pretrained(cls, *args, **kwargs): requires_backends(cls, ["diffusers"]) + + +class ORTLatentConsistencyModelImg2ImgPipeline(metaclass=DummyObject): + _backends = ["diffusers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["diffusers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["diffusers"]) + + +class ORTStableDiffusion3Pipeline(metaclass=DummyObject): + _backends = ["diffusers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["diffusers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["diffusers"]) + + +class ORTStableDiffusion3Img2ImgPipeline(metaclass=DummyObject): + _backends = ["diffusers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["diffusers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["diffusers"]) + + +class ORTStableDiffusion3InpaintPipeline(metaclass=DummyObject): + _backends = ["diffusers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["diffusers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["diffusers"]) + + +class ORTFluxPipeline(metaclass=DummyObject): + _backends = ["diffusers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["diffusers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["diffusers"]) diff --git a/optimum/utils/import_utils.py b/optimum/utils/import_utils.py index 4a57fda79ce..405e3815b33 100644 --- a/optimum/utils/import_utils.py +++ b/optimum/utils/import_utils.py @@ -69,6 +69,7 @@ def _is_package_available(pkg_name: str, return_version: bool = False) -> Union[ _auto_gptq_available = _is_package_available("auto_gptq") _timm_available = _is_package_available("timm") _sentence_transformers_available = _is_package_available("sentence_transformers") +_datasets_available = _is_package_available("datasets") torch_version = None if is_torch_available(): @@ -131,6 +132,10 @@ def is_sentence_transformers_available(): return _sentence_transformers_available +def is_datasets_available(): + return _datasets_available + + def is_auto_gptq_available(): if _auto_gptq_available: version_autogptq = version.parse(importlib_metadata.version("auto_gptq")) @@ -193,6 +198,22 @@ def check_if_diffusers_greater(target_version: str) -> bool: return version.parse(_diffusers_version) >= version.parse(target_version) +def check_if_torch_greater(target_version: str) -> bool: + """ + Checks whether the current install of torch is greater than or equal to the target version. + + Args: + target_version (str): version used as the reference for comparison. + + Returns: + bool: whether the check is True or not. + """ + if not is_torch_available(): + return False + + return torch_version >= version.parse(target_version) + + @contextmanager def require_numpy_strictly_lower(package_version: str, message: str): if not version.parse(np.__version__) < version.parse(package_version): @@ -214,6 +235,12 @@ def require_numpy_strictly_lower(package_version: str, message: str): -U transformers`. Please note that you may need to restart your runtime after installation. """ +DATASETS_IMPORT_ERROR = """ +{0} requires the datasets library but it was not found in your environment. You can install it with pip: +`pip install datasets`. Please note that you may need to restart your runtime after installation. +""" + + BACKENDS_MAPPING = OrderedDict( [ ("diffusers", (is_diffusers_available, DIFFUSERS_IMPORT_ERROR)), @@ -229,6 +256,7 @@ def require_numpy_strictly_lower(package_version: str, message: str): "transformers_434", (lambda: check_if_transformers_greater("4.34"), "{0} " + TRANSFORMERS_IMPORT_ERROR.format("4.34")), ), + ("datasets", (is_datasets_available, DATASETS_IMPORT_ERROR)), ] ) diff --git a/optimum/utils/input_generators.py b/optimum/utils/input_generators.py index dac14a38114..18a2a5a3fd1 100644 --- a/optimum/utils/input_generators.py +++ b/optimum/utils/input_generators.py @@ -22,7 +22,7 @@ import numpy as np from transformers.utils import is_tf_available, is_torch_available -from ..utils import check_if_transformers_greater +from ..utils import check_if_diffusers_greater, check_if_transformers_greater from .normalized_config import ( NormalizedConfig, NormalizedEncoderDecoderConfig, @@ -36,7 +36,7 @@ import torch if is_tf_available(): - import tensorflow as tf + import tensorflow as tf # type: ignore def check_framework_is_available(func): @@ -507,6 +507,43 @@ class DummyDecoderTextInputGenerator(DummyTextInputGenerator): ) +class DummyDecisionTransformerInputGenerator(DummyTextInputGenerator): + """ + Generates dummy decision transformer inputs. + """ + + SUPPORTED_INPUT_NAMES = ( + "states", + "actions", + "timesteps", + "returns_to_go", + "attention_mask", + ) + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.act_dim = self.normalized_config.config.act_dim + self.state_dim = self.normalized_config.config.state_dim + self.max_ep_len = self.normalized_config.config.max_ep_len + + def generate(self, input_name: str, framework: str = "pt", int_dtype: str = "int64", float_dtype: str = "fp32"): + if input_name == "states": + shape = [self.batch_size, self.sequence_length, self.state_dim] + elif input_name == "actions": + shape = [self.batch_size, self.sequence_length, self.act_dim] + elif input_name == "rewards": + shape = [self.batch_size, self.sequence_length, 1] + elif input_name == "returns_to_go": + shape = [self.batch_size, self.sequence_length, 1] + elif input_name == "attention_mask": + shape = [self.batch_size, self.sequence_length] + elif input_name == "timesteps": + shape = [self.batch_size, self.sequence_length] + return self.random_int_tensor(shape=shape, max_value=self.max_ep_len, framework=framework, dtype=int_dtype) + + return self.random_float_tensor(shape, min_value=-2.0, max_value=2.0, framework=framework, dtype=float_dtype) + + class DummySeq2SeqDecoderTextInputGenerator(DummyDecoderTextInputGenerator): SUPPORTED_INPUT_NAMES = ( "decoder_input_ids", @@ -860,23 +897,31 @@ def __init__( ): self.task = task self.vocab_size = normalized_config.vocab_size - self.text_encoder_projection_dim = normalized_config.text_encoder_projection_dim - self.time_ids = 5 if normalized_config.requires_aesthetics_score else 6 + self.text_encoder_projection_dim = getattr(normalized_config, "text_encoder_projection_dim", None) + self.time_ids = 5 if getattr(normalized_config, "requires_aesthetics_score", False) else 6 if random_batch_size_range: low, high = random_batch_size_range self.batch_size = random.randint(low, high) else: self.batch_size = batch_size - self.time_cond_proj_dim = normalized_config.config.time_cond_proj_dim + self.time_cond_proj_dim = getattr(normalized_config.config, "time_cond_proj_dim", None) def generate(self, input_name: str, framework: str = "pt", int_dtype: str = "int64", float_dtype: str = "fp32"): if input_name == "timestep": - shape = [self.batch_size] - return self.random_int_tensor(shape, max_value=self.vocab_size, framework=framework, dtype=int_dtype) + shape = [] # a scalar with no dimension (it can be int or float depending on the sd architecture) + return self.random_float_tensor(shape, max_value=self.vocab_size, framework=framework, dtype=float_dtype) if input_name == "text_embeds": + if self.text_encoder_projection_dim is None: + raise ValueError( + "Unable to infer the value of `text_encoder_projection_dim` for generating `text_embeds`, please double check the config of your model." + ) dim = self.text_encoder_projection_dim elif input_name == "timestep_cond": + if self.time_cond_proj_dim is None: + raise ValueError( + "Unable to infer the value of `time_cond_proj_dim` for generating `timestep_cond`, please double check the config of your model." + ) dim = self.time_cond_proj_dim else: dim = self.time_ids @@ -1411,3 +1456,80 @@ def generate( float_dtype: str = "fp32", ): return self.random_int_tensor(shape=(1,), min_value=20, max_value=22, framework=framework, dtype=int_dtype) + + +class DummyTransformerTimestepInputGenerator(DummyTimestepInputGenerator): + SUPPORTED_INPUT_NAMES = ("timestep",) + + def generate(self, input_name: str, framework: str = "pt", int_dtype: str = "int64", float_dtype: str = "fp32"): + if input_name == "timestep": + shape = [self.batch_size] # With transformer diffusers, timestep is a 1D tensor + return self.random_float_tensor(shape, max_value=self.vocab_size, framework=framework, dtype=float_dtype) + + return super().generate(input_name, framework, int_dtype, float_dtype) + + +class DummyTransformerVisionInputGenerator(DummyVisionInputGenerator): + SUPPORTED_INPUT_NAMES = ("hidden_states",) + + +class DummyTransformerTextInputGenerator(DummySeq2SeqDecoderTextInputGenerator): + SUPPORTED_INPUT_NAMES = ( + "encoder_hidden_states", + "pooled_projection", + ) + + def generate(self, input_name: str, framework: str = "pt", int_dtype: str = "int64", float_dtype: str = "fp32"): + if input_name == "encoder_hidden_states": + return super().generate(input_name, framework, int_dtype, float_dtype)[0] + + elif input_name == "pooled_projections": + return self.random_float_tensor( + [self.batch_size, self.normalized_config.projection_size], framework=framework, dtype=float_dtype + ) + + return super().generate(input_name, framework, int_dtype, float_dtype) + + +class DummyFluxTransformerVisionInputGenerator(DummyTransformerVisionInputGenerator): + SUPPORTED_INPUT_NAMES = ( + "hidden_states", + "img_ids", + ) + + def generate(self, input_name: str, framework: str = "pt", int_dtype: str = "int64", float_dtype: str = "fp32"): + if input_name == "hidden_states": + shape = [self.batch_size, (self.height // 2) * (self.width // 2), self.num_channels] + return self.random_float_tensor(shape, framework=framework, dtype=float_dtype) + elif input_name == "img_ids": + shape = ( + [(self.height // 2) * (self.width // 2), 3] + if check_if_diffusers_greater("0.31.0") + else [self.batch_size, (self.height // 2) * (self.width // 2), 3] + ) + return self.random_int_tensor(shape, max_value=1, framework=framework, dtype=int_dtype) + + return super().generate(input_name, framework, int_dtype, float_dtype) + + +class DummyFluxTransformerTextInputGenerator(DummyTransformerTextInputGenerator): + SUPPORTED_INPUT_NAMES = ( + "encoder_hidden_states", + "pooled_projections", + "guidance", + "txt_ids", + ) + + def generate(self, input_name: str, framework: str = "pt", int_dtype: str = "int64", float_dtype: str = "fp32"): + if input_name == "txt_ids": + shape = ( + [self.sequence_length, 3] + if check_if_diffusers_greater("0.31.0") + else [self.batch_size, self.sequence_length, 3] + ) + return self.random_int_tensor(shape, max_value=1, framework=framework, dtype=int_dtype) + elif input_name == "guidance": + shape = [self.batch_size] + return self.random_float_tensor(shape, min_value=0, max_value=1, framework=framework, dtype=float_dtype) + + return super().generate(input_name, framework, int_dtype, float_dtype) diff --git a/optimum/utils/normalized_config.py b/optimum/utils/normalized_config.py index 81207b76496..9ceed24c2dd 100644 --- a/optimum/utils/normalized_config.py +++ b/optimum/utils/normalized_config.py @@ -281,6 +281,7 @@ class NormalizedConfigManager: "xlm-roberta": NormalizedTextConfig, "yolos": NormalizedVisionConfig, "qwen2": NormalizedTextConfig, + "granite": NormalizedTextConfigWithGQA, } @classmethod diff --git a/optimum/utils/preprocessing/base.py b/optimum/utils/preprocessing/base.py index dc995ccc50b..7cfda13ba7d 100644 --- a/optimum/utils/preprocessing/base.py +++ b/optimum/utils/preprocessing/base.py @@ -20,15 +20,16 @@ from abc import ABC, abstractmethod from typing import TYPE_CHECKING, Any, Callable, Dict, List, Optional, Set, Tuple, Type, Union -from datasets import Dataset, DatasetDict -from datasets import load_dataset as datasets_load_dataset from transformers import PreTrainedTokenizerBase from transformers.image_processing_utils import BaseImageProcessor +from optimum.utils.import_utils import requires_backends + from .. import logging if TYPE_CHECKING: + from datasets import Dataset, DatasetDict from transformers import PretrainedConfig @@ -102,11 +103,14 @@ def create_dataset_processing_func( def prepare_dataset( self, - dataset: Union[DatasetDict, Dataset], + dataset: Union["DatasetDict", "Dataset"], data_keys: Dict[str, str], ref_keys: Optional[List[str]] = None, split: Optional[str] = None, - ) -> Union[DatasetDict, Dataset]: + ) -> Union["DatasetDict", "Dataset"]: + requires_backends(self, ["datasets"]) + from datasets import Dataset + if isinstance(dataset, Dataset) and split is not None: raise ValueError("A Dataset and a split name were provided, but splits are for DatasetDict.") elif split is not None: @@ -131,7 +135,12 @@ def load_dataset( num_samples: Optional[int] = None, shuffle: bool = False, **load_dataset_kwargs, - ) -> Union[DatasetDict, Dataset]: + ) -> Union["DatasetDict", "Dataset"]: + requires_backends(self, ["datasets"]) + + from datasets import Dataset, DatasetDict + from datasets import load_dataset as datasets_load_dataset + dataset = datasets_load_dataset(path, **load_dataset_kwargs) if isinstance(dataset, DatasetDict) and load_smallest_split: diff --git a/optimum/utils/testing_utils.py b/optimum/utils/testing_utils.py index 76fe9a05b13..88b1acdb780 100644 --- a/optimum/utils/testing_utils.py +++ b/optimum/utils/testing_utils.py @@ -28,6 +28,7 @@ from . import ( is_accelerate_available, is_auto_gptq_available, + is_datasets_available, is_diffusers_available, is_sentence_transformers_available, is_timm_available, @@ -146,6 +147,10 @@ def require_sentence_transformers(test_case): return unittest.skipUnless(is_sentence_transformers_available(), "test requires sentence-transformers")(test_case) +def require_datasets(test_case): + return unittest.skipUnless(is_datasets_available(), "test requires datasets")(test_case) + + def grid_parameters( parameters: Dict[str, Iterable[Any]], yield_dict: bool = False, diff --git a/optimum/version.py b/optimum/version.py index 4a8a7edab63..4fff28e5c97 100644 --- a/optimum/version.py +++ b/optimum/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "1.23.0.dev0" +__version__ = "1.24.0.dev0" diff --git a/pyproject.toml b/pyproject.toml index 99a0f1c85fa..17bcd90e066 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -38,6 +38,7 @@ markers = [ "rocm_ep_test", "tensorflow_test", "timm_test", + "datasets_test", "run_in_series", "run_slow", "accelerate_test", diff --git a/setup.py b/setup.py index ac5db71a74b..6736085943a 100644 --- a/setup.py +++ b/setup.py @@ -13,14 +13,11 @@ REQUIRED_PKGS = [ - "coloredlogs", - "sympy", - "transformers[sentencepiece]>=4.29,<4.45.0", + "transformers>=4.29", "torch>=1.11", "packaging", - "numpy<2.0", # transformers requires numpy<2.0 https://github.com/huggingface/transformers/pull/31569 + "numpy", "huggingface_hub>=0.8.0", - "datasets", ] # TODO: unpin pytest once https://github.com/huggingface/transformers/pull/29154 is merged & released @@ -37,9 +34,9 @@ "diffusers>=0.17.0", "torchaudio", "einops", - "invisible-watermark", "timm", "scikit-learn", + "sentencepiece", "rjieba", ] @@ -54,6 +51,7 @@ "datasets>=1.2.1", "evaluate", "protobuf>=3.20.1", + "transformers>=4.36,<4.47.0", ], "onnxruntime-gpu": [ "onnx", @@ -62,9 +60,20 @@ "evaluate", "protobuf>=3.20.1", "accelerate", # ORTTrainer requires it. + "transformers>=4.36,<4.47.0", + ], + "exporters": [ + "onnx", + "onnxruntime", + "timm", + "transformers>=4.36,<4.47.0", + ], + "exporters-gpu": [ + "onnx", + "onnxruntime-gpu", + "timm", + "transformers>=4.36,<4.47.0", ], - "exporters": ["onnx", "onnxruntime", "timm"], - "exporters-gpu": ["onnx", "onnxruntime-gpu", "timm"], "exporters-tf": [ "tensorflow>=2.4,<=2.12.1", "tf2onnx", @@ -74,7 +83,7 @@ "h5py", "numpy<1.24.0", "datasets<=2.16", - "transformers[sentencepiece]>=4.26,<4.38", + "transformers>=4.36,<4.38", ], "diffusers": ["diffusers"], "intel": "optimum-intel>=1.18.0", @@ -82,7 +91,7 @@ "nncf": "optimum-intel[nncf]>=1.18.0", "neural-compressor": "optimum-intel[neural-compressor]>=1.18.0", "ipex": "optimum-intel[ipex]>=1.18.0", - "habana": ["optimum-habana", "transformers>=4.43.0,<4.44.0"], + "habana": ["optimum-habana", "transformers>=4.45.0,<4.46.0"], "neuron": ["optimum-neuron[neuron]>=0.0.20", "transformers>=4.36.2,<4.42.0"], "neuronx": ["optimum-neuron[neuronx]>=0.0.20", "transformers>=4.36.2,<4.42.0"], "graphcore": "optimum-graphcore", @@ -111,9 +120,10 @@ "Intended Audience :: Education", "Intended Audience :: Science/Research", "Operating System :: OS Independent", - "Programming Language :: Python :: 3.7", - "Programming Language :: Python :: 3.8", + "Programming Language :: Python :: 3", "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", "Topic :: Scientific/Engineering :: Artificial Intelligence", ], keywords="transformers, quantization, pruning, optimization, training, inference, onnx, onnx runtime, intel, " @@ -125,7 +135,7 @@ packages=find_namespace_packages(include=["optimum*"]), install_requires=REQUIRED_PKGS, extras_require=EXTRAS_REQUIRE, - python_requires=">=3.7.0", + python_requires=">=3.9.0", include_package_data=True, zip_safe=False, entry_points={"console_scripts": ["optimum-cli=optimum.commands.optimum_cli:main"]}, diff --git a/tests/bettertransformer/test_audio.py b/tests/bettertransformer/test_audio.py index be01a92d447..caca91e27ca 100644 --- a/tests/bettertransformer/test_audio.py +++ b/tests/bettertransformer/test_audio.py @@ -35,7 +35,7 @@ class TestsWhisper(unittest.TestCase): def test_error_message(self): - model = AutoModel.from_pretrained("openai/whisper-tiny") + model = AutoModel.from_pretrained("openai/whisper-tiny", attn_implementation="eager") with self.assertRaises(ValueError) as cm: model = BetterTransformer.transform(model) @@ -82,15 +82,19 @@ def _test_fp16_inference( set_seed(0) if not use_to_operator: - hf_random_model = automodel_class.from_pretrained(model_id, torch_dtype=torch.float16).to(0) + hf_random_model = automodel_class.from_pretrained( + model_id, torch_dtype=torch.float16, attn_implementation="eager" + ).to(0) converted_model = BetterTransformer.transform(hf_random_model, keep_original_model=False) - hf_random_model = automodel_class.from_pretrained(model_id, torch_dtype=torch.float16).to(0) + hf_random_model = automodel_class.from_pretrained( + model_id, torch_dtype=torch.float16, attn_implementation="eager" + ).to(0) else: - hf_random_model = automodel_class.from_pretrained(model_id).to(0) + hf_random_model = automodel_class.from_pretrained(model_id, attn_implementation="eager").to(0) converted_model = BetterTransformer.transform(hf_random_model, keep_original_model=False) - hf_random_model = automodel_class.from_pretrained(model_id).to(0) + hf_random_model = automodel_class.from_pretrained(model_id, attn_implementation="eager").to(0) hf_random_model = hf_random_model.to(torch.float16) converted_model = converted_model.to(torch.float16) @@ -147,7 +151,7 @@ def test_generation(self, test_name: str, model_type: str, batch_size: int): model_id = MODELS_DICT[model_type] processor = AutoProcessor.from_pretrained(model_id) - model = AutoModel.from_pretrained(model_id) + model = AutoModel.from_pretrained(model_id, attn_implementation="eager") text = ["This is me and me"] if batch_size > 1: @@ -217,14 +221,14 @@ def test_logits(self, model_type: str): inputs = self.prepare_inputs_for_class(model_id, model_type) torch.manual_seed(0) - hf_random_model = AutoModel.from_pretrained(model_id).eval() + hf_random_model = AutoModel.from_pretrained(model_id, attn_implementation="eager").eval() random_config = hf_random_model.config torch.manual_seed(0) converted_model = BetterTransformer.transform(hf_random_model) torch.manual_seed(0) - hf_random_model = AutoModel.from_pretrained(model_id).eval() + hf_random_model = AutoModel.from_pretrained(model_id, attn_implementation="eager").eval() random_config = hf_random_model.config self.assertFalse( diff --git a/tests/bettertransformer/test_common.py b/tests/bettertransformer/test_common.py index 35b89d2ed2e..b8bc0a3b3d9 100644 --- a/tests/bettertransformer/test_common.py +++ b/tests/bettertransformer/test_common.py @@ -28,7 +28,7 @@ class BetterTransformerIntegrationTests(unittest.TestCase): def test_raise_error_on_double_transform_call(self): - model = AutoModel.from_pretrained("hf-internal-testing/tiny-random-BertModel") + model = AutoModel.from_pretrained("hf-internal-testing/tiny-random-BertModel", attn_implementation="eager") with self.assertRaises(Exception) as cm: bt_model = BetterTransformer.transform(model) @@ -59,7 +59,7 @@ def test_raise_on_save(self, model_type: str): ) for model_id in model_ids: with self.assertRaises(ValueError), tempfile.TemporaryDirectory() as tmpdirname: - hf_model = AutoModel.from_pretrained(model_id).eval() + hf_model = AutoModel.from_pretrained(model_id, attn_implementation="eager").eval() bt_model = BetterTransformer.transform(hf_model, keep_original_model=False) bt_model.save_pretrained(tmpdirname) @@ -73,7 +73,7 @@ def test_conversion(self, model_type: str): MODELS_DICT[model_type] if isinstance(MODELS_DICT[model_type], tuple) else (MODELS_DICT[model_type],) ) for model_id in model_ids: - hf_random_model = AutoModel.from_pretrained(model_id) + hf_random_model = AutoModel.from_pretrained(model_id, attn_implementation="eager") converted_model = BetterTransformer.transform(hf_random_model) self.assertTrue( @@ -99,7 +99,7 @@ def test_raise_save_pretrained_error(self, test_name: str, model_type: str, keep ) for model_id in model_ids: # get hf and bt model - hf_model = AutoModel.from_pretrained(model_id) + hf_model = AutoModel.from_pretrained(model_id, attn_implementation="eager") # get bt model and invert it bt_model = BetterTransformer.transform(hf_model, keep_original_model=keep_original_model) @@ -145,9 +145,11 @@ def test_raise_activation_fun(self, model_type: str): )() # random config class for the model to test hf_random_config.hidden_act = "silu" - hf_random_model = AutoModel.from_config(hf_random_config).eval() + hf_random_model = AutoModel.from_config(hf_random_config, attn_implementation="eager").eval() + with self.assertRaises(ValueError) as cm: _ = BetterTransformer.transform(hf_random_model, keep_original_model=True) + self.assertTrue("Activation function" in str(cm.exception)) def test_dict_class_consistency(self): diff --git a/tests/bettertransformer/test_decoder.py b/tests/bettertransformer/test_decoder.py index bab8f376fcc..e2bc6ddc2fb 100644 --- a/tests/bettertransformer/test_decoder.py +++ b/tests/bettertransformer/test_decoder.py @@ -131,7 +131,7 @@ def test_logits_with_cache(self, test_name: str, model_type: str, batch_size: in model_id = MODELS_DICT[model_type] - model = AutoModelForCausalLM.from_pretrained(model_id) + model = AutoModelForCausalLM.from_pretrained(model_id, attn_implementation="eager") normalized_config = NormalizedConfigManager.get_normalized_config_class(model.config.model_type)(model.config) @@ -167,7 +167,7 @@ def test_generation(self, test_name: str, model_type: str, batch_size: int, padd model_id = MODELS_DICT[model_type] tokenizer = AutoTokenizer.from_pretrained(model_id) - model = AutoModelForCausalLM.from_pretrained(model_id) + model = AutoModelForCausalLM.from_pretrained(model_id, attn_implementation="eager") if not hasattr(tokenizer, "pad_token") or tokenizer.pad_token is None: if tokenizer.eos_token != "": @@ -224,7 +224,9 @@ def test_invert_model_logits(self, test_name: str, model_type: str, keep_origina @require_torch_gpu @require_accelerate def test_accelerate_compatibility_cpu_gpu(self, keep_original_model=True, max_memory=None): - hf_model = AutoModelForCausalLM.from_pretrained("gpt2", device_map="auto", max_memory=max_memory).eval() + hf_model = AutoModelForCausalLM.from_pretrained( + "gpt2", device_map="auto", max_memory=max_memory, attn_implementation="eager" + ).eval() bt_model = BetterTransformer.transform( hf_model, keep_original_model=keep_original_model, max_memory=max_memory ) diff --git a/tests/bettertransformer/test_encoder.py b/tests/bettertransformer/test_encoder.py index 74aacaed58c..7dd42c43b05 100644 --- a/tests/bettertransformer/test_encoder.py +++ b/tests/bettertransformer/test_encoder.py @@ -181,7 +181,9 @@ def check_accelerate_compatibility_cpu_gpu(self, keep_original_model=True, max_m If this works for roberta, it should work for all other models too. """ - hf_model = AutoModel.from_pretrained("xlm-roberta-base", device_map="auto", max_memory=max_memory).eval() + hf_model = AutoModel.from_pretrained( + "xlm-roberta-base", device_map="auto", max_memory=max_memory, attn_implementation="eager" + ).eval() bt_model = BetterTransformer.transform( hf_model, keep_original_model=keep_original_model, max_memory=max_memory ) diff --git a/tests/bettertransformer/test_encoder_decoder.py b/tests/bettertransformer/test_encoder_decoder.py index 8d05923522a..5ce4d62b12c 100644 --- a/tests/bettertransformer/test_encoder_decoder.py +++ b/tests/bettertransformer/test_encoder_decoder.py @@ -153,7 +153,7 @@ def test_generation(self, test_name: str, model_type: str, batch_size: int, padd model_id = MODELS_DICT[model_type] tokenizer = AutoTokenizer.from_pretrained(model_id) - model = AutoModelForSeq2SeqLM.from_pretrained(model_id) + model = AutoModelForSeq2SeqLM.from_pretrained(model_id, attn_implementation="eager") if not hasattr(tokenizer, "pad_token") or tokenizer.pad_token is None: tokenizer.pad_token = tokenizer.eos_token diff --git a/tests/bettertransformer/test_gpu.py b/tests/bettertransformer/test_gpu.py index b992b90d3c8..ada38e408fa 100644 --- a/tests/bettertransformer/test_gpu.py +++ b/tests/bettertransformer/test_gpu.py @@ -26,7 +26,9 @@ def timing_cuda(model, num_batches, input_ids, masks, decoder_input_ids): def benchmark(model_name: str, num_batches: int, batch_size: int, max_seqlen: int, is_half: bool): - hf_model = AutoModel.from_pretrained(model_name, torch_dtype=torch.float16 if is_half else None).eval() + hf_model = AutoModel.from_pretrained( + model_name, torch_dtype=torch.float16 if is_half else None, attn_implementation="eager" + ).eval() hf_model = hf_model.to("cuda:0") bt_model = BetterTransformer.transform(hf_model, keep_original_model=True) diff --git a/tests/bettertransformer/testing_utils.py b/tests/bettertransformer/testing_utils.py index e9e2edd9790..f79cbb34512 100644 --- a/tests/bettertransformer/testing_utils.py +++ b/tests/bettertransformer/testing_utils.py @@ -59,12 +59,12 @@ # "llama": "fxmarty/tiny-llama-fast-tokenizer", # "llama-gqa": "noamwies/llama-test-gqa-with-better-transformer", "m2m_100": "hf-internal-testing/tiny-random-nllb", - "marian": "fxmarty/tiny-marian", # the other tiny ones have a too small max_position_embeddings + "marian": "optimum-internal-testing/tiny-random-marian", # the other tiny ones have a too small max_position_embeddings "markuplm": "hf-internal-testing/tiny-random-MarkupLMModel", "mbart": "hf-internal-testing/tiny-random-mbart", "opt": "hf-internal-testing/tiny-random-OPTModel", "pegasus": "hf-internal-testing/tiny-random-PegasusModel", - "prophetnet": "hirotasoshu/tiny-random-prophetnet", # the other tiny ones have a too small max_position_embeddings + "prophetnet": "optimum-internal-testing/tiny-random-prophetnet", # the other tiny ones have a too small max_position_embeddings "rembert": "hf-internal-testing/tiny-random-RemBertModel", "roberta": "hf-internal-testing/tiny-random-RobertaModel", "rocbert": "hf-internal-testing/tiny-random-RoCBertModel", @@ -136,10 +136,12 @@ def _test_fp16_inference( torch.manual_seed(0) if not use_to_operator: - hf_random_model = automodel_class.from_pretrained(model_id, torch_dtype=torch.float16).to(0) + hf_random_model = automodel_class.from_pretrained( + model_id, torch_dtype=torch.float16, attn_implementation="eager" + ).to(0) converted_model = BetterTransformer.transform(hf_random_model, keep_original_model=True) else: - hf_random_model = automodel_class.from_pretrained(model_id).to(0) + hf_random_model = automodel_class.from_pretrained(model_id, attn_implementation="eager").to(0) converted_model = BetterTransformer.transform(hf_random_model, keep_original_model=True) hf_random_model = hf_random_model.to(torch.float16) converted_model = converted_model.to(torch.float16) @@ -169,7 +171,7 @@ def _test_fp16_inference( def _test_logits_backward(self, model_id: str, model_type: str, **preprocessor_kwargs): inputs = self.prepare_inputs_for_class(model_id=model_id, model_type=model_type, **preprocessor_kwargs) - hf_random_model = AutoModel.from_pretrained(model_id).eval() + hf_random_model = AutoModel.from_pretrained(model_id, attn_implementation="eager").eval() random_config = hf_random_model.config # I could not obtain reproducible results with `torch.manual_seed` nor with @@ -309,7 +311,7 @@ def _test_train_decoder(self, model_id: str, model_type: str, **kwargs): """ inputs = self.prepare_inputs_for_class(model_id=model_id, model_type=model_type, **kwargs) - hf_random_model = AutoModel.from_pretrained(model_id).eval() + hf_random_model = AutoModel.from_pretrained(model_id, attn_implementation="eager").eval() bt_model = BetterTransformer.transform(hf_random_model, keep_original_model=True) bt_model.train() @@ -328,7 +330,7 @@ def _test_invert_modules(self, model_id, keep_original_model=False): r""" Test that the inverse converted model and hf model have the same modules """ - hf_model = AutoModel.from_pretrained(model_id) + hf_model = AutoModel.from_pretrained(model_id, attn_implementation="eager") hf_modules = list(hf_model.modules()) bt_model = BetterTransformer.transform(hf_model, keep_original_model=keep_original_model) @@ -349,7 +351,7 @@ def _test_invert_modules(self, model_id, keep_original_model=False): def _test_save_load_invertible(self, model_id, keep_original_model=True): with tempfile.TemporaryDirectory() as tmpdirname: - hf_model = AutoModel.from_pretrained(model_id).eval() + hf_model = AutoModel.from_pretrained(model_id, attn_implementation="eager").eval() hf_model_state_dict = copy.deepcopy(hf_model.state_dict()) bt_model = BetterTransformer.transform(hf_model, keep_original_model=keep_original_model) @@ -362,7 +364,7 @@ def _test_save_load_invertible(self, model_id, keep_original_model=True): # saving a normal transformers bark model fails because of shared tensors bt_model.save_pretrained(tmpdirname, safe_serialization=hf_model.config.model_type != "bark") - bt_model_from_load = AutoModel.from_pretrained(tmpdirname) + bt_model_from_load = AutoModel.from_pretrained(tmpdirname, attn_implementation="eager") self.assertEqual( set(bt_model.state_dict().keys()), @@ -397,7 +399,7 @@ def _test_invert_model_logits( """ inputs = self.prepare_inputs_for_class(model_id, model_type=model_type, **preprocessor_kwargs) - hf_model = AutoModel.from_pretrained(model_id) + hf_model = AutoModel.from_pretrained(model_id, attn_implementation="eager") hf_model = hf_model.eval() with torch.inference_mode(): diff --git a/tests/exporters/exporters_utils.py b/tests/exporters/exporters_utils.py index a55c7a124df..32156d9eebf 100644 --- a/tests/exporters/exporters_utils.py +++ b/tests/exporters/exporters_utils.py @@ -67,6 +67,7 @@ "data2vec-audio": "hf-internal-testing/tiny-random-Data2VecAudioModel", "deberta": "hf-internal-testing/tiny-random-DebertaModel", "deberta-v2": "hf-internal-testing/tiny-random-DebertaV2Model", + "decision-transformer": "edbeeching/decision-transformer-gym-hopper-medium", "deit": "hf-internal-testing/tiny-random-DeiTModel", "donut": "fxmarty/tiny-doc-qa-vision-encoder-decoder", "donut-swin": "hf-internal-testing/tiny-random-DonutSwinModel", @@ -100,6 +101,7 @@ "gpt-neo": "hf-internal-testing/tiny-random-GPTNeoModel", "gpt-neox": "hf-internal-testing/tiny-random-GPTNeoXForCausalLM", "gptj": "hf-internal-testing/tiny-random-GPTJModel", + "granite": "hf-internal-testing/tiny-random-GraniteForCausalLM", "groupvit": "hf-internal-testing/tiny-random-groupvit", "ibert": "hf-internal-testing/tiny-random-IBertModel", "imagegpt": "hf-internal-testing/tiny-random-ImageGPTModel", @@ -136,6 +138,7 @@ "phi3": "Xenova/tiny-random-Phi3ForCausalLM", "pix2struct": "fxmarty/pix2struct-tiny-random", # "rembert": "google/rembert", + "rembert": "hf-internal-testing/tiny-random-RemBertModel", "poolformer": "hf-internal-testing/tiny-random-PoolFormerModel", "qwen2": "fxmarty/tiny-dummy-qwen2", "regnet": "hf-internal-testing/tiny-random-RegNetModel", @@ -255,7 +258,7 @@ "owlv2": "google/owlv2-base-patch16", "owlvit": "google/owlvit-base-patch32", "perceiver": "hf-internal-testing/tiny-random-PerceiverModel", # Not using deepmind/language-perceiver because it takes too much time for testing. - # "rembert": "google/rembert", + "rembert": "google/rembert", "poolformer": "hf-internal-testing/tiny-random-PoolFormerModel", "regnet": "facebook/regnet-y-040", "resnet": "microsoft/resnet-50", @@ -296,9 +299,11 @@ } PYTORCH_DIFFUSION_MODEL = { + "flux": "optimum-internal-testing/tiny-random-flux", + "latent-consistency": "echarlaix/tiny-random-latent-consistency", "stable-diffusion": "hf-internal-testing/tiny-stable-diffusion-torch", + "stable-diffusion-3": "yujiepan/stable-diffusion-3-tiny-random", "stable-diffusion-xl": "echarlaix/tiny-random-stable-diffusion-xl", - "lcm": "echarlaix/tiny-random-latent-consistency", } PYTORCH_TIMM_MODEL = { diff --git a/tests/exporters/onnx/test_exporters_onnx_cli.py b/tests/exporters/onnx/test_exporters_onnx_cli.py index ed611ade04e..9ac7832aa7d 100644 --- a/tests/exporters/onnx/test_exporters_onnx_cli.py +++ b/tests/exporters/onnx/test_exporters_onnx_cli.py @@ -602,6 +602,15 @@ def test_diffusion(self): check=True, ) + @require_sentence_transformers + def test_sentence_transformers(self): + with TemporaryDirectory() as tmpdirname: + subprocess.run( + f"python3 -m optimum.exporters.onnx --model sentence-transformers-testing/stsb-bert-tiny-onnx --task feature-extraction {tmpdirname}", + shell=True, + check=True, + ) + def test_legacy(self): with TemporaryDirectory() as tmpdirname: subprocess.run( diff --git a/tests/exporters/onnx/test_onnx_export.py b/tests/exporters/onnx/test_onnx_export.py index d1471aa218a..88288547c95 100644 --- a/tests/exporters/onnx/test_onnx_export.py +++ b/tests/exporters/onnx/test_onnx_export.py @@ -43,7 +43,7 @@ from optimum.exporters.onnx.constants import SDPA_ARCHS_ONNX_EXPORT_NOT_SUPPORTED from optimum.exporters.onnx.model_configs import WhisperOnnxConfig from optimum.exporters.onnx.utils import get_speecht5_models_for_export -from optimum.utils import ONNX_WEIGHTS_NAME, DummyPastKeyValuesGenerator, NormalizedTextConfig +from optimum.utils import DummyPastKeyValuesGenerator, NormalizedTextConfig from optimum.utils.save_utils import maybe_load_preprocessors from optimum.utils.testing_utils import grid_parameters, require_diffusers @@ -292,27 +292,20 @@ def _onnx_export( gc.collect() - def _onnx_export_sd(self, model_type: str, model_name: str, device="cpu"): + def _onnx_export_diffusion_models(self, model_type: str, model_name: str, device="cpu"): pipeline = TasksManager.get_model_from_task(model_type, model_name, device=device) models_and_onnx_configs = get_diffusion_models_for_export(pipeline) - output_names = [os.path.join(name_dir, ONNX_WEIGHTS_NAME) for name_dir in models_and_onnx_configs] - model, _ = models_and_onnx_configs["vae_encoder"] - model.forward = lambda sample: {"latent_sample": model.encode(x=sample)["latent_dist"].parameters} with TemporaryDirectory() as tmpdirname: _, onnx_outputs = export_models( models_and_onnx_configs=models_and_onnx_configs, - opset=14, output_dir=Path(tmpdirname), - output_names=output_names, device=device, ) validate_models_outputs( models_and_onnx_configs=models_and_onnx_configs, onnx_named_outputs=onnx_outputs, output_dir=Path(tmpdirname), - atol=1e-3, - onnx_files_subpaths=output_names, use_subprocess=False, ) @@ -403,7 +396,7 @@ def test_tensorflow_export( @require_vision @require_diffusers def test_pytorch_export_for_diffusion_models(self, model_type, model_name): - self._onnx_export_sd(model_type, model_name) + self._onnx_export_diffusion_models(model_type, model_name) @parameterized.expand(PYTORCH_DIFFUSION_MODEL.items()) @require_torch @@ -414,7 +407,7 @@ def test_pytorch_export_for_diffusion_models(self, model_type, model_name): @pytest.mark.run_slow @pytest.mark.gpu_test def test_pytorch_export_for_diffusion_models_cuda(self, model_type, model_name): - self._onnx_export_sd(model_type, model_name, device="cuda") + self._onnx_export_diffusion_models(model_type, model_name, device="cuda") class CustomWhisperOnnxConfig(WhisperOnnxConfig): diff --git a/tests/fx/parallelization/test_tensor_parallel.py b/tests/fx/parallelization/test_tensor_parallel.py index 9626fccec3b..8a00393c4d7 100644 --- a/tests/fx/parallelization/test_tensor_parallel.py +++ b/tests/fx/parallelization/test_tensor_parallel.py @@ -36,6 +36,7 @@ "output_attentions": False, "output_hidden_states": False, "tie_word_embeddings": True, + "return_dict": True, } DUMMY_MODELS_TO_TEST = ( @@ -64,11 +65,10 @@ def prepare_dummy_inputs( seq_len: int = 10, device: Union[str, torch.device] = "cuda", ): - return { - "input_ids": torch.randint(low=1, high=model_config.vocab_size, size=(batch_size, seq_len), device=device), - "attention_mask": torch.ones((batch_size, seq_len), dtype=torch.int64, device=device), - "position_ids": torch.arange(0, seq_len, device=device).unsqueeze(0).expand(batch_size, -1), - } + input_ids = torch.randint(low=1, high=model_config.vocab_size, size=(batch_size, seq_len), device=device) + attention_mask = torch.ones((batch_size, seq_len), dtype=torch.int64, device=device) + labels = input_ids.clone() + return {"input_ids": input_ids, "attention_mask": attention_mask, "labels": labels} def run_test_all_rank_results_match(rank: int, world_size: int, model_id: str, model_kwargs: Dict[str, Any]): @@ -82,8 +82,8 @@ def run_test_all_rank_results_match(rank: int, world_size: int, model_id: str, m model = parallelize_model(model_id, ctx, skip_load_weights=True, **model_kwargs) inputs = prepare_dummy_inputs(model.config) - logits = model(**inputs)[0] - tensors = gather_at_main_process(tensor=logits, group=tp_group, rank=rank, world_size=world_size) + loss = model(**inputs).loss + tensors = gather_at_main_process(tensor=loss, group=tp_group, rank=rank, world_size=world_size) # check results at main worker process if rank == 0: @@ -145,7 +145,7 @@ def run_test_parallel_results_matches_non_parallel( inputs = prepare_dummy_inputs(model.config) set_seed(SEED) - logits = model(**inputs)[0] + loss = model(**inputs).loss torch._dynamo.reset() del model @@ -154,9 +154,9 @@ def run_test_parallel_results_matches_non_parallel( set_seed(SEED) ctx = ParallelExecutionCtx(tp_group=tp_group, current_device=device) model = parallelize_model(model_id, ctx, skip_load_weights=True, **model_kwargs) - parallel_logits = model(**inputs)[0] + parallel_loss = model(**inputs).loss - torch.testing.assert_close(logits.cpu(), parallel_logits.cpu(), rtol=1e-4, atol=1e-4) + torch.testing.assert_close(loss.cpu(), parallel_loss.cpu(), rtol=1e-4, atol=1e-4) dist.barrier(tp_group) tearDown() diff --git a/tests/onnx/test_onnx_export_custom_module.py b/tests/onnx/test_onnx_export_custom_module.py index a144d5cd840..4398c14f01d 100644 --- a/tests/onnx/test_onnx_export_custom_module.py +++ b/tests/onnx/test_onnx_export_custom_module.py @@ -24,6 +24,8 @@ import torch from transformers.models.deberta import modeling_deberta + from optimum.utils import check_if_torch_greater + class StableDropoutTestCase(TestCase): """Tests export of StableDropout module.""" @@ -50,8 +52,8 @@ def test_training(self): training=training, ) - # Expected to fail with opset_version < 12 - with self.assertRaises(Exception): + if check_if_torch_greater("2.5"): + # Expected to pass with opset_version < 12 on torch >= 2.5 torch.onnx.export( sd, input, @@ -60,3 +62,14 @@ def test_training(self): do_constant_folding=do_constant_folding, training=training, ) + else: + # Expected to fail with opset_version < 12 on torch < 2.5 + with self.assertRaises(Exception): + torch.onnx.export( + sd, + input, + devnull, + opset_version=11, + do_constant_folding=do_constant_folding, + training=training, + ) diff --git a/tests/onnxruntime/test_diffusion.py b/tests/onnxruntime/test_diffusion.py new file mode 100644 index 00000000000..07f90e8984e --- /dev/null +++ b/tests/onnxruntime/test_diffusion.py @@ -0,0 +1,813 @@ +# coding=utf-8 +# Copyright 2022 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import numpy as np +import pytest +import torch +from diffusers import ( + AutoPipelineForImage2Image, + AutoPipelineForInpainting, + AutoPipelineForText2Image, + DiffusionPipeline, +) +from diffusers.pipelines.stable_diffusion import StableDiffusionSafetyChecker +from diffusers.utils import load_image +from parameterized import parameterized +from transformers.testing_utils import require_torch_gpu +from utils_onnxruntime_tests import MODEL_NAMES, SEED, ORTModelTestMixin + +from optimum.onnxruntime import ( + ORTDiffusionPipeline, + ORTPipelineForImage2Image, + ORTPipelineForInpainting, + ORTPipelineForText2Image, +) +from optimum.utils import check_if_transformers_greater +from optimum.utils.testing_utils import grid_parameters, require_diffusers + + +def get_generator(framework, seed): + if framework == "np": + return np.random.RandomState(seed) + elif framework == "pt": + return torch.Generator().manual_seed(seed) + else: + raise ValueError(f"Unknown framework: {framework}") + + +def _generate_prompts(batch_size=1): + inputs = { + "prompt": ["sailing ship in storm by Leonardo da Vinci"] * batch_size, + "num_inference_steps": 3, + "guidance_scale": 7.5, + "output_type": "np", + } + return inputs + + +def _generate_images(height=128, width=128, batch_size=1, channel=3, input_type="pil"): + if input_type == "pil": + image = load_image( + "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main" + "/in_paint/overture-creations-5sI6fQgYIuo.png" + ).resize((width, height)) + elif input_type == "np": + image = np.random.rand(height, width, channel) + elif input_type == "pt": + image = torch.rand((channel, height, width)) + + return [image] * batch_size + + +class ORTPipelineForText2ImageTest(ORTModelTestMixin): + SUPPORTED_ARCHITECTURES = [ + "stable-diffusion", + "stable-diffusion-xl", + "latent-consistency", + ] + if check_if_transformers_greater("4.45"): + SUPPORTED_ARCHITECTURES += ["stable-diffusion-3", "flux"] + + NEGATIVE_PROMPT_SUPPORTED_ARCHITECTURES = [ + "stable-diffusion", + "stable-diffusion-xl", + "latent-consistency", + ] + if check_if_transformers_greater("4.45"): + NEGATIVE_PROMPT_SUPPORTED_ARCHITECTURES += ["stable-diffusion-3"] + + CALLBACK_SUPPORTED_ARCHITECTURES = [ + "stable-diffusion", + "stable-diffusion-xl", + "latent-consistency", + ] + if check_if_transformers_greater("4.45"): + CALLBACK_SUPPORTED_ARCHITECTURES += ["flux"] + + ORTMODEL_CLASS = ORTPipelineForText2Image + AUTOMODEL_CLASS = AutoPipelineForText2Image + + TASK = "text-to-image" + + def generate_inputs(self, height=128, width=128, batch_size=1): + inputs = _generate_prompts(batch_size=batch_size) + + inputs["height"] = height + inputs["width"] = width + + return inputs + + @require_diffusers + def test_load_vanilla_model_which_is_not_supported(self): + with self.assertRaises(Exception) as context: + _ = self.ORTMODEL_CLASS.from_pretrained(MODEL_NAMES["bert"], export=True) + + self.assertIn( + f"does not appear to have a file named {self.ORTMODEL_CLASS.config_name}", str(context.exception) + ) + + @parameterized.expand(SUPPORTED_ARCHITECTURES) + @require_diffusers + def test_ort_pipeline_class_dispatch(self, model_arch: str): + model_args = {"test_name": model_arch, "model_arch": model_arch} + self._setup(model_args) + + auto_pipeline = self.AUTOMODEL_CLASS.from_pretrained(MODEL_NAMES[model_arch]) + ort_pipeline = self.ORTMODEL_CLASS.from_pretrained(self.onnx_model_dirs[model_arch]) + + self.assertEqual(ort_pipeline.auto_model_class, auto_pipeline.__class__) + + auto_pipeline = DiffusionPipeline.from_pretrained(MODEL_NAMES[model_arch]) + ort_pipeline = ORTDiffusionPipeline.from_pretrained(self.onnx_model_dirs[model_arch]) + + self.assertEqual(ort_pipeline.auto_model_class, auto_pipeline.__class__) + + @parameterized.expand(SUPPORTED_ARCHITECTURES) + @require_diffusers + def test_num_images_per_prompt(self, model_arch: str): + model_args = {"test_name": model_arch, "model_arch": model_arch} + self._setup(model_args) + + pipeline = self.ORTMODEL_CLASS.from_pretrained(self.onnx_model_dirs[model_arch]) + + for batch_size in [1, 3]: + for height in [16, 32]: + for width in [16, 32]: + for num_images_per_prompt in [1, 3]: + inputs = self.generate_inputs(height=height, width=width, batch_size=batch_size) + outputs = pipeline(**inputs, num_images_per_prompt=num_images_per_prompt).images + self.assertEqual(outputs.shape, (batch_size * num_images_per_prompt, height, width, 3)) + + @parameterized.expand(SUPPORTED_ARCHITECTURES) + @require_diffusers + def test_compare_to_diffusers_pipeline(self, model_arch: str): + model_args = {"test_name": model_arch, "model_arch": model_arch} + self._setup(model_args) + + height, width, batch_size = 128, 128, 1 + inputs = self.generate_inputs(height=height, width=width, batch_size=batch_size) + + ort_pipeline = self.ORTMODEL_CLASS.from_pretrained(self.onnx_model_dirs[model_arch]) + diffusers_pipeline = self.AUTOMODEL_CLASS.from_pretrained(MODEL_NAMES[model_arch]) + + for output_type in ["latent", "np", "pt"]: + inputs["output_type"] = output_type + + ort_images = ort_pipeline(**inputs, generator=get_generator("pt", SEED)).images + diffusers_images = diffusers_pipeline(**inputs, generator=get_generator("pt", SEED)).images + + np.testing.assert_allclose(ort_images, diffusers_images, atol=1e-4, rtol=1e-2) + + @parameterized.expand(CALLBACK_SUPPORTED_ARCHITECTURES) + @require_diffusers + def test_callback(self, model_arch: str): + model_args = {"test_name": model_arch, "model_arch": model_arch} + self._setup(model_args) + + height, width, batch_size = 64, 128, 1 + inputs = self.generate_inputs(height=height, width=width, batch_size=batch_size) + + class Callback: + def __init__(self): + self.has_been_called = False + self.number_of_steps = 0 + + def __call__(self, *args, **kwargs) -> None: + self.has_been_called = True + self.number_of_steps += 1 + return kwargs + + ort_callback = Callback() + auto_callback = Callback() + + ort_pipe = self.ORTMODEL_CLASS.from_pretrained(self.onnx_model_dirs[model_arch]) + auto_pipe = self.AUTOMODEL_CLASS.from_pretrained(MODEL_NAMES[model_arch]) + + ort_pipe(**inputs, callback_on_step_end=ort_callback) + auto_pipe(**inputs, callback_on_step_end=auto_callback) + + self.assertTrue(ort_callback.has_been_called) + self.assertTrue(auto_callback.has_been_called) + self.assertEqual(auto_callback.number_of_steps, ort_callback.number_of_steps) + + @parameterized.expand(SUPPORTED_ARCHITECTURES) + @require_diffusers + def test_shape(self, model_arch: str): + model_args = {"test_name": model_arch, "model_arch": model_arch} + self._setup(model_args) + + pipeline = self.ORTMODEL_CLASS.from_pretrained(self.onnx_model_dirs[model_arch]) + + height, width, batch_size = 128, 64, 1 + inputs = self.generate_inputs(height=height, width=width, batch_size=batch_size) + + for output_type in ["pil", "np", "pt", "latent"]: + inputs["output_type"] = output_type + outputs = pipeline(**inputs).images + if output_type == "pil": + self.assertEqual((len(outputs), outputs[0].height, outputs[0].width), (batch_size, height, width)) + elif output_type == "np": + self.assertEqual(outputs.shape, (batch_size, height, width, 3)) + elif output_type == "pt": + self.assertEqual(outputs.shape, (batch_size, 3, height, width)) + else: + expected_height = height // pipeline.vae_scale_factor + expected_width = width // pipeline.vae_scale_factor + + if model_arch == "flux": + channels = pipeline.transformer.config.in_channels + expected_shape = (batch_size, expected_height * expected_width, channels) + elif model_arch == "stable-diffusion-3": + out_channels = pipeline.transformer.config.out_channels + expected_shape = (batch_size, out_channels, expected_height, expected_width) + else: + out_channels = pipeline.unet.config.out_channels + expected_shape = (batch_size, out_channels, expected_height, expected_width) + + self.assertEqual(outputs.shape, expected_shape) + + @parameterized.expand(SUPPORTED_ARCHITECTURES) + @require_diffusers + def test_image_reproducibility(self, model_arch: str): + model_args = {"test_name": model_arch, "model_arch": model_arch} + self._setup(model_args) + + height, width, batch_size = 64, 64, 1 + inputs = self.generate_inputs(height=height, width=width, batch_size=batch_size) + + pipeline = self.ORTMODEL_CLASS.from_pretrained(self.onnx_model_dirs[model_arch]) + + for generator_framework in ["np", "pt"]: + ort_outputs_1 = pipeline(**inputs, generator=get_generator(generator_framework, SEED)) + ort_outputs_2 = pipeline(**inputs, generator=get_generator(generator_framework, SEED)) + ort_outputs_3 = pipeline(**inputs, generator=get_generator(generator_framework, SEED + 1)) + + self.assertFalse(np.array_equal(ort_outputs_1.images[0], ort_outputs_3.images[0])) + np.testing.assert_allclose(ort_outputs_1.images[0], ort_outputs_2.images[0], atol=1e-4, rtol=1e-2) + + @parameterized.expand(NEGATIVE_PROMPT_SUPPORTED_ARCHITECTURES) + def test_negative_prompt(self, model_arch: str): + model_args = {"test_name": model_arch, "model_arch": model_arch} + self._setup(model_args) + + height, width, batch_size = 64, 64, 1 + inputs = self.generate_inputs(height=height, width=width, batch_size=batch_size) + inputs["negative_prompt"] = ["This is a negative prompt"] * batch_size + + ort_pipeline = self.ORTMODEL_CLASS.from_pretrained(self.onnx_model_dirs[model_arch]) + diffusers_pipeline = self.AUTOMODEL_CLASS.from_pretrained(MODEL_NAMES[model_arch]) + + ort_images = ort_pipeline(**inputs, generator=get_generator("pt", SEED)).images + diffusers_images = diffusers_pipeline(**inputs, generator=get_generator("pt", SEED)).images + + np.testing.assert_allclose(ort_images, diffusers_images, atol=1e-4, rtol=1e-2) + + @parameterized.expand( + grid_parameters( + { + "model_arch": SUPPORTED_ARCHITECTURES, + "provider": ["CUDAExecutionProvider", "ROCMExecutionProvider", "TensorrtExecutionProvider"], + } + ) + ) + @pytest.mark.rocm_ep_test + @pytest.mark.cuda_ep_test + @pytest.mark.trt_ep_test + @require_torch_gpu + @require_diffusers + def test_pipeline_on_gpu(self, test_name: str, model_arch: str, provider: str): + model_args = {"test_name": test_name, "model_arch": model_arch} + self._setup(model_args) + + height, width, batch_size = 32, 64, 1 + inputs = self.generate_inputs(height=height, width=width, batch_size=batch_size) + + pipeline = self.ORTMODEL_CLASS.from_pretrained(self.onnx_model_dirs[test_name], provider=provider) + self.assertEqual(pipeline.device.type, "cuda") + + outputs = pipeline(**inputs).images + self.assertIsInstance(outputs, np.ndarray) + self.assertEqual(outputs.shape, (batch_size, height, width, 3)) + + @parameterized.expand(["stable-diffusion", "latent-consistency"]) + @require_diffusers + def test_safety_checker(self, model_arch: str): + model_args = {"test_name": model_arch, "model_arch": model_arch} + self._setup(model_args) + + safety_checker = StableDiffusionSafetyChecker.from_pretrained("CompVis/stable-diffusion-safety-checker") + + pipeline = self.AUTOMODEL_CLASS.from_pretrained(MODEL_NAMES[model_arch], safety_checker=safety_checker) + ort_pipeline = self.ORTMODEL_CLASS.from_pretrained( + self.onnx_model_dirs[model_arch], safety_checker=safety_checker + ) + + self.assertIsInstance(pipeline.safety_checker, StableDiffusionSafetyChecker) + self.assertIsInstance(ort_pipeline.safety_checker, StableDiffusionSafetyChecker) + + height, width, batch_size = 32, 64, 1 + inputs = self.generate_inputs(height=height, width=width, batch_size=batch_size) + + ort_output = ort_pipeline(**inputs, generator=get_generator("pt", SEED)) + diffusers_output = pipeline(**inputs, generator=get_generator("pt", SEED)) + + ort_nsfw_content_detected = ort_output.nsfw_content_detected + diffusers_nsfw_content_detected = diffusers_output.nsfw_content_detected + + self.assertTrue(ort_nsfw_content_detected is not None) + self.assertTrue(diffusers_nsfw_content_detected is not None) + self.assertEqual(ort_nsfw_content_detected, diffusers_nsfw_content_detected) + + ort_images = ort_output.images + diffusers_images = diffusers_output.images + np.testing.assert_allclose(ort_images, diffusers_images, atol=1e-4, rtol=1e-2) + + +class ORTPipelineForImage2ImageTest(ORTModelTestMixin): + SUPPORTED_ARCHITECTURES = [ + "stable-diffusion", + "stable-diffusion-xl", + "latent-consistency", + ] + if check_if_transformers_greater("4.45"): + SUPPORTED_ARCHITECTURES += ["stable-diffusion-3"] + + CALLBACK_SUPPORTED_ARCHITECTURES = [ + "stable-diffusion", + "stable-diffusion-xl", + "latent-consistency", + ] + + AUTOMODEL_CLASS = AutoPipelineForImage2Image + ORTMODEL_CLASS = ORTPipelineForImage2Image + + TASK = "image-to-image" + + def generate_inputs(self, height=128, width=128, batch_size=1, channel=3, input_type="pil"): + inputs = _generate_prompts(batch_size=batch_size) + + inputs["image"] = _generate_images( + height=height, width=width, batch_size=batch_size, channel=channel, input_type=input_type + ) + + inputs["strength"] = 0.75 + + return inputs + + @require_diffusers + def test_load_vanilla_model_which_is_not_supported(self): + with self.assertRaises(Exception) as context: + _ = self.ORTMODEL_CLASS.from_pretrained(MODEL_NAMES["bert"], export=True) + + self.assertIn( + f"does not appear to have a file named {self.ORTMODEL_CLASS.config_name}", str(context.exception) + ) + + @parameterized.expand(list(SUPPORTED_ARCHITECTURES)) + @require_diffusers + def test_ort_pipeline_class_dispatch(self, model_arch: str): + model_args = {"test_name": model_arch, "model_arch": model_arch} + self._setup(model_args) + + auto_pipeline = self.AUTOMODEL_CLASS.from_pretrained(MODEL_NAMES[model_arch]) + ort_pipeline = self.ORTMODEL_CLASS.from_pretrained(self.onnx_model_dirs[model_arch]) + + self.assertEqual(ort_pipeline.auto_model_class, auto_pipeline.__class__) + + @parameterized.expand(SUPPORTED_ARCHITECTURES) + @require_diffusers + def test_num_images_per_prompt(self, model_arch: str): + model_args = {"test_name": model_arch, "model_arch": model_arch} + self._setup(model_args) + + pipeline = self.ORTMODEL_CLASS.from_pretrained(self.onnx_model_dirs[model_arch]) + + for batch_size in [1, 3]: + for height in [16, 32]: + for width in [16, 32]: + for num_images_per_prompt in [1, 3]: + inputs = self.generate_inputs(height=height, width=width, batch_size=batch_size) + outputs = pipeline(**inputs, num_images_per_prompt=num_images_per_prompt).images + self.assertEqual(outputs.shape, (batch_size * num_images_per_prompt, height, width, 3)) + + @parameterized.expand(CALLBACK_SUPPORTED_ARCHITECTURES) + @require_diffusers + def test_callback(self, model_arch: str): + model_args = {"test_name": model_arch, "model_arch": model_arch} + self._setup(model_args) + + height, width, batch_size = 32, 64, 1 + inputs = self.generate_inputs(height=height, width=width, batch_size=batch_size) + inputs["num_inference_steps"] = 3 + + class Callback: + def __init__(self): + self.has_been_called = False + self.number_of_steps = 0 + + def __call__(self, *args, **kwargs) -> None: + self.has_been_called = True + self.number_of_steps += 1 + return kwargs + + ort_pipe = self.ORTMODEL_CLASS.from_pretrained(self.onnx_model_dirs[model_arch]) + auto_pipe = self.AUTOMODEL_CLASS.from_pretrained(MODEL_NAMES[model_arch]) + + ort_callback = Callback() + auto_callback = Callback() + + ort_pipe(**inputs, callback_on_step_end=ort_callback) + auto_pipe(**inputs, callback_on_step_end=auto_callback) + + self.assertTrue(ort_callback.has_been_called) + self.assertEqual(ort_callback.number_of_steps, auto_callback.number_of_steps) + + @parameterized.expand(SUPPORTED_ARCHITECTURES) + @require_diffusers + def test_shape(self, model_arch: str): + model_args = {"test_name": model_arch, "model_arch": model_arch} + self._setup(model_args) + + pipeline = self.ORTMODEL_CLASS.from_pretrained(self.onnx_model_dirs[model_arch]) + + height, width, batch_size = 128, 64, 1 + + for input_type in ["pil", "np", "pt"]: + inputs = self.generate_inputs(height=height, width=width, batch_size=batch_size, input_type=input_type) + + for output_type in ["pil", "np", "pt", "latent"]: + inputs["output_type"] = output_type + outputs = pipeline(**inputs).images + if output_type == "pil": + self.assertEqual((len(outputs), outputs[0].height, outputs[0].width), (batch_size, height, width)) + elif output_type == "np": + self.assertEqual(outputs.shape, (batch_size, height, width, 3)) + elif output_type == "pt": + self.assertEqual(outputs.shape, (batch_size, 3, height, width)) + else: + out_channels = ( + pipeline.unet.config.out_channels + if pipeline.unet is not None + else pipeline.transformer.config.out_channels + ) + self.assertEqual( + outputs.shape, + ( + batch_size, + out_channels, + height // pipeline.vae_scale_factor, + width // pipeline.vae_scale_factor, + ), + ) + + @parameterized.expand(SUPPORTED_ARCHITECTURES) + @require_diffusers + def test_compare_to_diffusers_pipeline(self, model_arch: str): + model_args = {"test_name": model_arch, "model_arch": model_arch} + self._setup(model_args) + + height, width, batch_size = 128, 128, 1 + inputs = self.generate_inputs(height=height, width=width, batch_size=batch_size) + + diffusers_pipeline = self.AUTOMODEL_CLASS.from_pretrained(MODEL_NAMES[model_arch]) + ort_pipeline = self.ORTMODEL_CLASS.from_pretrained(self.onnx_model_dirs[model_arch]) + + for output_type in ["latent", "np", "pt"]: + inputs["output_type"] = output_type + + ort_images = ort_pipeline(**inputs, generator=get_generator("pt", SEED)).images + diffusers_images = diffusers_pipeline(**inputs, generator=get_generator("pt", SEED)).images + + np.testing.assert_allclose(ort_images, diffusers_images, atol=1e-4, rtol=1e-2) + + @parameterized.expand(SUPPORTED_ARCHITECTURES) + @require_diffusers + def test_image_reproducibility(self, model_arch: str): + model_args = {"test_name": model_arch, "model_arch": model_arch} + self._setup(model_args) + + height, width, batch_size = 64, 64, 1 + inputs = self.generate_inputs(height=height, width=width, batch_size=batch_size) + + pipeline = self.ORTMODEL_CLASS.from_pretrained(self.onnx_model_dirs[model_arch]) + + for generator_framework in ["np", "pt"]: + ort_outputs_1 = pipeline(**inputs, generator=get_generator(generator_framework, SEED)) + ort_outputs_2 = pipeline(**inputs, generator=get_generator(generator_framework, SEED)) + ort_outputs_3 = pipeline(**inputs, generator=get_generator(generator_framework, SEED + 1)) + + self.assertFalse(np.array_equal(ort_outputs_1.images[0], ort_outputs_3.images[0])) + np.testing.assert_allclose(ort_outputs_1.images[0], ort_outputs_2.images[0], atol=1e-4, rtol=1e-2) + + @parameterized.expand( + grid_parameters( + { + "model_arch": SUPPORTED_ARCHITECTURES, + "provider": ["CUDAExecutionProvider", "ROCMExecutionProvider", "TensorrtExecutionProvider"], + } + ) + ) + @pytest.mark.rocm_ep_test + @pytest.mark.cuda_ep_test + @pytest.mark.trt_ep_test + @require_torch_gpu + @require_diffusers + def test_pipeline_on_gpu(self, test_name: str, model_arch: str, provider: str): + model_args = {"test_name": test_name, "model_arch": model_arch} + self._setup(model_args) + + height, width, batch_size = 32, 64, 1 + inputs = self.generate_inputs(height=height, width=width, batch_size=batch_size) + + pipeline = self.ORTMODEL_CLASS.from_pretrained(self.onnx_model_dirs[test_name], provider=provider) + self.assertEqual(pipeline.device.type, "cuda") + + outputs = pipeline(**inputs).images + self.assertIsInstance(outputs, np.ndarray) + self.assertEqual(outputs.shape, (batch_size, height, width, 3)) + + @parameterized.expand(["stable-diffusion", "latent-consistency"]) + @require_diffusers + def test_safety_checker(self, model_arch: str): + model_args = {"test_name": model_arch, "model_arch": model_arch} + self._setup(model_args) + + safety_checker = StableDiffusionSafetyChecker.from_pretrained("CompVis/stable-diffusion-safety-checker") + + pipeline = self.AUTOMODEL_CLASS.from_pretrained(MODEL_NAMES[model_arch], safety_checker=safety_checker) + ort_pipeline = self.ORTMODEL_CLASS.from_pretrained( + self.onnx_model_dirs[model_arch], safety_checker=safety_checker + ) + + self.assertIsInstance(pipeline.safety_checker, StableDiffusionSafetyChecker) + self.assertIsInstance(ort_pipeline.safety_checker, StableDiffusionSafetyChecker) + + height, width, batch_size = 32, 64, 1 + inputs = self.generate_inputs(height=height, width=width, batch_size=batch_size) + + ort_output = ort_pipeline(**inputs, generator=get_generator("pt", SEED)) + diffusers_output = pipeline(**inputs, generator=get_generator("pt", SEED)) + + ort_nsfw_content_detected = ort_output.nsfw_content_detected + diffusers_nsfw_content_detected = diffusers_output.nsfw_content_detected + + self.assertTrue(ort_nsfw_content_detected is not None) + self.assertTrue(diffusers_nsfw_content_detected is not None) + self.assertEqual(ort_nsfw_content_detected, diffusers_nsfw_content_detected) + + ort_images = ort_output.images + diffusers_images = diffusers_output.images + + np.testing.assert_allclose(ort_images, diffusers_images, atol=1e-4, rtol=1e-2) + + +class ORTPipelineForInpaintingTest(ORTModelTestMixin): + SUPPORTED_ARCHITECTURES = [ + "stable-diffusion", + "stable-diffusion-xl", + ] + if check_if_transformers_greater("4.45"): + SUPPORTED_ARCHITECTURES += ["stable-diffusion-3"] + + CALLBACK_SUPPORTED_ARCHITECTURES = [ + "stable-diffusion", + "stable-diffusion-xl", + ] + + AUTOMODEL_CLASS = AutoPipelineForInpainting + ORTMODEL_CLASS = ORTPipelineForInpainting + + TASK = "inpainting" + + def generate_inputs(self, height=128, width=128, batch_size=1, channel=3, input_type="pil"): + inputs = _generate_prompts(batch_size=batch_size) + + inputs["image"] = _generate_images( + height=height, width=width, batch_size=batch_size, channel=channel, input_type=input_type + ) + inputs["mask_image"] = _generate_images( + height=height, width=width, batch_size=batch_size, channel=1, input_type=input_type + ) + + inputs["strength"] = 0.75 + inputs["height"] = height + inputs["width"] = width + + return inputs + + @require_diffusers + def test_load_vanilla_model_which_is_not_supported(self): + with self.assertRaises(Exception) as context: + _ = self.ORTMODEL_CLASS.from_pretrained(MODEL_NAMES["bert"], export=True) + + self.assertIn( + f"does not appear to have a file named {self.ORTMODEL_CLASS.config_name}", str(context.exception) + ) + + @parameterized.expand(SUPPORTED_ARCHITECTURES) + @require_diffusers + def test_ort_pipeline_class_dispatch(self, model_arch: str): + model_args = {"test_name": model_arch, "model_arch": model_arch} + self._setup(model_args) + + auto_pipeline = self.AUTOMODEL_CLASS.from_pretrained(MODEL_NAMES[model_arch]) + ort_pipeline = self.ORTMODEL_CLASS.from_pretrained(self.onnx_model_dirs[model_arch]) + + self.assertEqual(ort_pipeline.auto_model_class, auto_pipeline.__class__) + + @parameterized.expand(SUPPORTED_ARCHITECTURES) + @require_diffusers + def test_num_images_per_prompt(self, model_arch: str): + model_args = {"test_name": model_arch, "model_arch": model_arch} + self._setup(model_args) + + pipeline = self.ORTMODEL_CLASS.from_pretrained(self.onnx_model_dirs[model_arch]) + + for batch_size in [1, 3]: + for height in [16, 32]: + for width in [16, 32]: + for num_images_per_prompt in [1, 3]: + inputs = self.generate_inputs(height=height, width=width, batch_size=batch_size) + outputs = pipeline(**inputs, num_images_per_prompt=num_images_per_prompt).images + self.assertEqual(outputs.shape, (batch_size * num_images_per_prompt, height, width, 3)) + + @parameterized.expand(CALLBACK_SUPPORTED_ARCHITECTURES) + @require_diffusers + def test_callback(self, model_arch: str): + model_args = {"test_name": model_arch, "model_arch": model_arch} + self._setup(model_args) + + height, width, batch_size = 32, 64, 1 + inputs = self.generate_inputs(height=height, width=width, batch_size=batch_size) + inputs["num_inference_steps"] = 3 + + class Callback: + def __init__(self): + self.has_been_called = False + self.number_of_steps = 0 + + def __call__(self, *args, **kwargs) -> None: + self.has_been_called = True + self.number_of_steps += 1 + return kwargs + + ort_pipe = self.ORTMODEL_CLASS.from_pretrained(self.onnx_model_dirs[model_arch]) + auto_pipe = self.AUTOMODEL_CLASS.from_pretrained(MODEL_NAMES[model_arch]) + + ort_callback = Callback() + auto_callback = Callback() + + ort_pipe(**inputs, callback_on_step_end=ort_callback) + auto_pipe(**inputs, callback_on_step_end=auto_callback) + + self.assertTrue(ort_callback.has_been_called) + self.assertEqual(ort_callback.number_of_steps, auto_callback.number_of_steps) + + @parameterized.expand(SUPPORTED_ARCHITECTURES) + @require_diffusers + def test_shape(self, model_arch: str): + model_args = {"test_name": model_arch, "model_arch": model_arch} + self._setup(model_args) + + pipeline = self.ORTMODEL_CLASS.from_pretrained(self.onnx_model_dirs[model_arch]) + + height, width, batch_size = 128, 64, 1 + + for input_type in ["pil", "np", "pt"]: + inputs = self.generate_inputs(height=height, width=width, batch_size=batch_size, input_type=input_type) + + for output_type in ["pil", "np", "pt", "latent"]: + inputs["output_type"] = output_type + outputs = pipeline(**inputs).images + if output_type == "pil": + self.assertEqual((len(outputs), outputs[0].height, outputs[0].width), (batch_size, height, width)) + elif output_type == "np": + self.assertEqual(outputs.shape, (batch_size, height, width, 3)) + elif output_type == "pt": + self.assertEqual(outputs.shape, (batch_size, 3, height, width)) + else: + out_channels = ( + pipeline.unet.config.out_channels + if pipeline.unet is not None + else pipeline.transformer.config.out_channels + ) + self.assertEqual( + outputs.shape, + ( + batch_size, + out_channels, + height // pipeline.vae_scale_factor, + width // pipeline.vae_scale_factor, + ), + ) + + @parameterized.expand(SUPPORTED_ARCHITECTURES) + @require_diffusers + def test_compare_to_diffusers_pipeline(self, model_arch: str): + model_args = {"test_name": model_arch, "model_arch": model_arch} + self._setup(model_args) + + ort_pipeline = self.ORTMODEL_CLASS.from_pretrained(self.onnx_model_dirs[model_arch]) + diffusers_pipeline = self.AUTOMODEL_CLASS.from_pretrained(MODEL_NAMES[model_arch]) + + height, width, batch_size = 64, 64, 1 + inputs = self.generate_inputs(height=height, width=width, batch_size=batch_size) + + for output_type in ["latent", "np", "pt"]: + inputs["output_type"] = output_type + + ort_images = ort_pipeline(**inputs, generator=get_generator("pt", SEED)).images + diffusers_images = diffusers_pipeline(**inputs, generator=get_generator("pt", SEED)).images + + np.testing.assert_allclose(ort_images, diffusers_images, atol=1e-4, rtol=1e-2) + + @parameterized.expand(SUPPORTED_ARCHITECTURES) + @require_diffusers + def test_image_reproducibility(self, model_arch: str): + model_args = {"test_name": model_arch, "model_arch": model_arch} + self._setup(model_args) + + height, width, batch_size = 64, 64, 1 + inputs = self.generate_inputs(height=height, width=width, batch_size=batch_size) + + pipeline = self.ORTMODEL_CLASS.from_pretrained(self.onnx_model_dirs[model_arch]) + + for generator_framework in ["np", "pt"]: + ort_outputs_1 = pipeline(**inputs, generator=get_generator(generator_framework, SEED)) + ort_outputs_2 = pipeline(**inputs, generator=get_generator(generator_framework, SEED)) + ort_outputs_3 = pipeline(**inputs, generator=get_generator(generator_framework, SEED + 1)) + + self.assertFalse(np.array_equal(ort_outputs_1.images[0], ort_outputs_3.images[0])) + np.testing.assert_allclose(ort_outputs_1.images[0], ort_outputs_2.images[0], atol=1e-4, rtol=1e-2) + + @parameterized.expand( + grid_parameters( + { + "model_arch": SUPPORTED_ARCHITECTURES, + "provider": ["CUDAExecutionProvider", "ROCMExecutionProvider", "TensorrtExecutionProvider"], + } + ) + ) + @pytest.mark.rocm_ep_test + @pytest.mark.cuda_ep_test + @pytest.mark.trt_ep_test + @require_torch_gpu + @require_diffusers + def test_pipeline_on_gpu(self, test_name: str, model_arch: str, provider: str): + model_args = {"test_name": test_name, "model_arch": model_arch} + self._setup(model_args) + + height, width, batch_size = 32, 64, 1 + inputs = self.generate_inputs(height=height, width=width, batch_size=batch_size) + + pipeline = self.ORTMODEL_CLASS.from_pretrained(self.onnx_model_dirs[test_name], provider=provider) + self.assertEqual(pipeline.device.type, "cuda") + + outputs = pipeline(**inputs).images + self.assertIsInstance(outputs, np.ndarray) + self.assertEqual(outputs.shape, (batch_size, height, width, 3)) + + @parameterized.expand(["stable-diffusion"]) + @require_diffusers + def test_safety_checker(self, model_arch: str): + model_args = {"test_name": model_arch, "model_arch": model_arch} + self._setup(model_args) + + safety_checker = StableDiffusionSafetyChecker.from_pretrained("CompVis/stable-diffusion-safety-checker") + + pipeline = self.AUTOMODEL_CLASS.from_pretrained(MODEL_NAMES[model_arch], safety_checker=safety_checker) + ort_pipeline = self.ORTMODEL_CLASS.from_pretrained( + self.onnx_model_dirs[model_arch], safety_checker=safety_checker + ) + + self.assertIsInstance(pipeline.safety_checker, StableDiffusionSafetyChecker) + self.assertIsInstance(ort_pipeline.safety_checker, StableDiffusionSafetyChecker) + + height, width, batch_size = 32, 64, 1 + inputs = self.generate_inputs(height=height, width=width, batch_size=batch_size) + + ort_output = ort_pipeline(**inputs, generator=get_generator("pt", SEED)) + diffusers_output = pipeline(**inputs, generator=get_generator("pt", SEED)) + + ort_nsfw_content_detected = ort_output.nsfw_content_detected + diffusers_nsfw_content_detected = diffusers_output.nsfw_content_detected + + self.assertTrue(ort_nsfw_content_detected is not None) + self.assertTrue(diffusers_nsfw_content_detected is not None) + self.assertEqual(ort_nsfw_content_detected, diffusers_nsfw_content_detected) + + ort_images = ort_output.images + diffusers_images = diffusers_output.images + np.testing.assert_allclose(ort_images, diffusers_images, atol=1e-4, rtol=1e-2) diff --git a/tests/onnxruntime/test_modeling.py b/tests/onnxruntime/test_modeling.py index 4b44acb38ab..8f52ef45180 100644 --- a/tests/onnxruntime/test_modeling.py +++ b/tests/onnxruntime/test_modeling.py @@ -28,6 +28,7 @@ import requests import timm import torch +from huggingface_hub import HfApi from huggingface_hub.constants import default_cache_path from parameterized import parameterized from PIL import Image @@ -42,6 +43,7 @@ AutoModelForCausalLM, AutoModelForCTC, AutoModelForImageClassification, + AutoModelForImageToImage, AutoModelForMaskedLM, AutoModelForMultipleChoice, AutoModelForQuestionAnswering, @@ -52,12 +54,15 @@ AutoModelForTokenClassification, AutoModelForVision2Seq, AutoTokenizer, + GenerationConfig, MBartForConditionalGeneration, Pix2StructForConditionalGeneration, # Pix2Struct does not work with AutoModel PretrainedConfig, set_seed, ) +from transformers.modeling_outputs import ImageSuperResolutionOutput from transformers.modeling_utils import no_init_weights +from transformers.models.swin2sr.configuration_swin2sr import Swin2SRConfig from transformers.onnx.utils import get_preprocessor from transformers.testing_utils import get_gpu_count, require_torch_gpu, slow from utils_onnxruntime_tests import MODEL_NAMES, SEED, ORTModelTestMixin @@ -79,6 +84,7 @@ ORTModelForCustomTasks, ORTModelForFeatureExtraction, ORTModelForImageClassification, + ORTModelForImageToImage, ORTModelForMaskedLM, ORTModelForMultipleChoice, ORTModelForPix2Struct, @@ -89,15 +95,8 @@ ORTModelForSpeechSeq2Seq, ORTModelForTokenClassification, ORTModelForVision2Seq, - ORTStableDiffusionPipeline, ) from optimum.onnxruntime.base import ORTDecoderForSeq2Seq, ORTEncoder -from optimum.onnxruntime.modeling_diffusion import ( - ORTModelTextEncoder, - ORTModelUnet, - ORTModelVaeDecoder, - ORTModelVaeEncoder, -) from optimum.onnxruntime.modeling_ort import ORTModel from optimum.pipelines import pipeline from optimum.utils import ( @@ -108,7 +107,24 @@ DIFFUSION_MODEL_VAE_ENCODER_SUBFOLDER, logging, ) -from optimum.utils.testing_utils import grid_parameters, remove_directory, require_hf_token, require_ort_rocm +from optimum.utils.import_utils import check_if_transformers_greater, is_diffusers_available +from optimum.utils.testing_utils import ( + grid_parameters, + remove_directory, + require_diffusers, + require_hf_token, + require_ort_rocm, +) + + +if is_diffusers_available(): + from optimum.onnxruntime.modeling_diffusion import ( + ORTModelTextEncoder, + ORTModelUnet, + ORTModelVaeDecoder, + ORTModelVaeEncoder, + ORTStableDiffusionPipeline, + ) logger = logging.get_logger() @@ -127,14 +143,14 @@ class ORTModelIntegrationTest(unittest.TestCase): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.TEST_MODEL_ID = "sshleifer/tiny-distilbert-base-cased-distilled-squad" - self.LOCAL_MODEL_PATH = "assets/onnx" + self.LOCAL_MODEL_PATH = "tests/assets/onnx" self.ONNX_MODEL_ID = "philschmid/distilbert-onnx" self.TINY_ONNX_MODEL_ID = "fxmarty/resnet-tiny-beans" self.FAIL_ONNX_MODEL_ID = "sshleifer/tiny-distilbert-base-cased-distilled-squad" self.ONNX_SEQ2SEQ_MODEL_ID = "optimum/t5-small" self.LARGE_ONNX_SEQ2SEQ_MODEL_ID = "facebook/mbart-large-en-ro" self.TINY_ONNX_SEQ2SEQ_MODEL_ID = "fxmarty/sshleifer-tiny-mbart-onnx" - self.TINY_ONNX_STABLE_DIFFUSION_MODEL_ID = "hf-internal-testing/tiny-random-OnnxStableDiffusionPipeline" + self.TINY_ONNX_STABLE_DIFFUSION_MODEL_ID = "optimum-internal-testing/tiny-stable-diffusion-onnx" def test_load_model_from_local_path(self): model = ORTModel.from_pretrained(self.LOCAL_MODEL_PATH) @@ -205,19 +221,21 @@ def test_load_seq2seq_model_from_empty_cache(self): with self.assertRaises(Exception): _ = ORTModelForSeq2SeqLM.from_pretrained(self.TINY_ONNX_SEQ2SEQ_MODEL_ID, local_files_only=True) + @require_diffusers def test_load_stable_diffusion_model_from_cache(self): _ = ORTStableDiffusionPipeline.from_pretrained(self.TINY_ONNX_STABLE_DIFFUSION_MODEL_ID) # caching - model = ORTStableDiffusionPipeline.from_pretrained( self.TINY_ONNX_STABLE_DIFFUSION_MODEL_ID, local_files_only=True ) - self.assertIsInstance(model.text_encoder, ORTModelTextEncoder) self.assertIsInstance(model.vae_decoder, ORTModelVaeDecoder) self.assertIsInstance(model.vae_encoder, ORTModelVaeEncoder) self.assertIsInstance(model.unet, ORTModelUnet) self.assertIsInstance(model.config, Dict) + model(prompt="This is a sanity test prompt", num_inference_steps=2) + + @require_diffusers def test_load_stable_diffusion_model_from_empty_cache(self): dirpath = os.path.join( default_cache_path, "models--" + self.TINY_ONNX_STABLE_DIFFUSION_MODEL_ID.replace("/", "--") @@ -300,6 +318,7 @@ def test_load_seq2seq_model_unknown_provider(self): with self.assertRaises(ValueError): ORTModelForSeq2SeqLM.from_pretrained(self.ONNX_SEQ2SEQ_MODEL_ID, provider="FooExecutionProvider") + @require_diffusers def test_load_stable_diffusion_model_from_hub(self): model = ORTStableDiffusionPipeline.from_pretrained(self.TINY_ONNX_STABLE_DIFFUSION_MODEL_ID) self.assertIsInstance(model.text_encoder, ORTModelTextEncoder) @@ -308,6 +327,9 @@ def test_load_stable_diffusion_model_from_hub(self): self.assertIsInstance(model.unet, ORTModelUnet) self.assertIsInstance(model.config, Dict) + model(prompt="This is a sanity test prompt", num_inference_steps=2) + + @require_diffusers @require_torch_gpu @pytest.mark.cuda_ep_test def test_load_stable_diffusion_model_cuda_provider(self): @@ -321,6 +343,9 @@ def test_load_stable_diffusion_model_cuda_provider(self): self.assertListEqual(model.vae_encoder.session.get_providers(), model.providers) self.assertEqual(model.device, torch.device("cuda:0")) + model(prompt="This is a sanity test prompt", num_inference_steps=2) + + @require_diffusers @require_torch_gpu @require_ort_rocm @pytest.mark.rocm_ep_test @@ -335,6 +360,9 @@ def test_load_stable_diffusion_model_rocm_provider(self): self.assertListEqual(model.vae_encoder.session.get_providers(), model.providers) self.assertEqual(model.device, torch.device("cuda:0")) + model(prompt="This is a sanity test prompt", num_inference_steps=2) + + @require_diffusers def test_load_stable_diffusion_model_cpu_provider(self): model = ORTStableDiffusionPipeline.from_pretrained( self.TINY_ONNX_STABLE_DIFFUSION_MODEL_ID, provider="CPUExecutionProvider" @@ -346,6 +374,9 @@ def test_load_stable_diffusion_model_cpu_provider(self): self.assertListEqual(model.vae_encoder.session.get_providers(), model.providers) self.assertEqual(model.device, torch.device("cpu")) + model(prompt="This is a sanity test prompt", num_inference_steps=2) + + @require_diffusers def test_load_stable_diffusion_model_unknown_provider(self): with self.assertRaises(ValueError): ORTStableDiffusionPipeline.from_pretrained( @@ -478,6 +509,7 @@ def test_passing_session_options_seq2seq(self): self.assertEqual(model.encoder.session.get_session_options().intra_op_num_threads, 3) self.assertEqual(model.decoder.session.get_session_options().intra_op_num_threads, 3) + @require_diffusers def test_passing_session_options_stable_diffusion(self): options = onnxruntime.SessionOptions() options.intra_op_num_threads = 3 @@ -772,6 +804,7 @@ def test_seq2seq_model_on_rocm_ep_str(self): self.assertEqual(model.decoder_with_past.session.get_providers()[0], "ROCMExecutionProvider") self.assertListEqual(model.providers, ["ROCMExecutionProvider", "CPUExecutionProvider"]) + @require_diffusers @require_torch_gpu @pytest.mark.cuda_ep_test def test_passing_provider_options_stable_diffusion(self): @@ -810,6 +843,7 @@ def test_passing_provider_options_stable_diffusion(self): model.vae_encoder.session.get_provider_options()["CUDAExecutionProvider"]["do_copy_in_default_stream"], "0" ) + @require_diffusers def test_stable_diffusion_model_on_cpu(self): model = ORTStableDiffusionPipeline.from_pretrained(self.TINY_ONNX_STABLE_DIFFUSION_MODEL_ID) cpu = torch.device("cpu") @@ -825,7 +859,7 @@ def test_stable_diffusion_model_on_cpu(self): self.assertEqual(model.vae_encoder.session.get_providers()[0], "CPUExecutionProvider") self.assertListEqual(model.providers, ["CPUExecutionProvider"]) - # test string device input for to() + @require_diffusers def test_stable_diffusion_model_on_cpu_str(self): model = ORTStableDiffusionPipeline.from_pretrained(self.TINY_ONNX_STABLE_DIFFUSION_MODEL_ID) cpu = torch.device("cpu") @@ -841,6 +875,7 @@ def test_stable_diffusion_model_on_cpu_str(self): self.assertEqual(model.vae_encoder.session.get_providers()[0], "CPUExecutionProvider") self.assertListEqual(model.providers, ["CPUExecutionProvider"]) + @require_diffusers @require_torch_gpu @pytest.mark.cuda_ep_test def test_stable_diffusion_model_on_gpu(self): @@ -858,6 +893,7 @@ def test_stable_diffusion_model_on_gpu(self): self.assertEqual(model.vae_encoder.session.get_providers()[0], "CUDAExecutionProvider") self.assertListEqual(model.providers, ["CUDAExecutionProvider", "CPUExecutionProvider"]) + @require_diffusers @require_torch_gpu @require_ort_rocm @pytest.mark.rocm_ep_test @@ -876,6 +912,7 @@ def test_stable_diffusion_model_on_rocm_ep(self): self.assertEqual(model.vae_encoder.session.get_providers()[0], "ROCMExecutionProvider") self.assertListEqual(model.providers, ["ROCMExecutionProvider", "CPUExecutionProvider"]) + @require_diffusers @unittest.skipIf(get_gpu_count() <= 1, "this test requires multi-gpu") def test_stable_diffusion_model_on_gpu_id(self): model = ORTStableDiffusionPipeline.from_pretrained(self.TINY_ONNX_STABLE_DIFFUSION_MODEL_ID) @@ -899,7 +936,7 @@ def test_stable_diffusion_model_on_gpu_id(self): self.assertEqual(model.vae_decoder.session.get_provider_options()["CUDAExecutionProvider"]["device_id"], "1") self.assertEqual(model.vae_encoder.session.get_provider_options()["CUDAExecutionProvider"]["device_id"], "1") - # test string device input for to() + @require_diffusers @require_torch_gpu @pytest.mark.cuda_ep_test def test_stable_diffusion_model_on_gpu_str(self): @@ -916,6 +953,7 @@ def test_stable_diffusion_model_on_gpu_str(self): self.assertEqual(model.vae_encoder.session.get_providers()[0], "CUDAExecutionProvider") self.assertListEqual(model.providers, ["CUDAExecutionProvider", "CPUExecutionProvider"]) + @require_diffusers @require_torch_gpu @require_ort_rocm @pytest.mark.rocm_ep_test @@ -937,9 +975,13 @@ def test_load_model_from_hub_private(self): token = os.environ.get("HF_HUB_READ_TOKEN", None) if token is None: - self.skipTest("Test requires a token for fxmartyclone in the environment variable `HF_HUB_READ_TOKEN`.") + self.skipTest( + "Test requires a read access token for optimum-internal-testing in the environment variable `HF_HUB_READ_TOKEN`." + ) - model = ORTModelForCustomTasks.from_pretrained("optimum-internal-testing/tiny-random-phi-private", token=token) + model = ORTModelForCustomTasks.from_pretrained( + "optimum-internal-testing/tiny-random-phi-private", revision="onnx", token=token + ) self.assertIsInstance(model.model, onnxruntime.InferenceSession) self.assertIsInstance(model.config, PretrainedConfig) @@ -975,6 +1017,7 @@ def test_save_seq2seq_model_without_past(self): self.assertTrue(ONNX_DECODER_WITH_PAST_NAME not in folder_contents) self.assertTrue(CONFIG_NAME in folder_contents) + @require_diffusers def test_save_stable_diffusion_model(self): with tempfile.TemporaryDirectory() as tmpdirname: model = ORTStableDiffusionPipeline.from_pretrained(self.TINY_ONNX_STABLE_DIFFUSION_MODEL_ID) @@ -1050,6 +1093,7 @@ def test_save_load_seq2seq_model_with_external_data(self, use_cache: bool): os.environ.pop("FORCE_ONNX_EXTERNAL_DATA") remove_directory(tmpdirname) + @require_diffusers def test_save_load_stable_diffusion_model_with_external_data(self): with tempfile.TemporaryDirectory() as tmpdirname: os.environ["FORCE_ONNX_EXTERNAL_DATA"] = "1" # force exporting small model with external data @@ -1180,6 +1224,7 @@ def test_push_seq2seq_model_with_external_data_to_hub(self): ) os.environ.pop("FORCE_ONNX_EXTERNAL_DATA") + @require_diffusers @require_hf_token def test_push_stable_diffusion_model_with_external_data_to_hub(self): with tempfile.TemporaryDirectory() as tmpdirname: @@ -1224,6 +1269,20 @@ def test_trust_remote_code(self): torch.allclose(pt_logits, ort_logits, atol=1e-4), f" Maxdiff: {torch.abs(pt_logits - ort_logits).max()}" ) + @parameterized.expand(("", "onnx")) + def test_loading_with_config_not_from_subfolder(self, subfolder): + # config.json file in the root directory and not in the subfolder + model_id = "sentence-transformers-testing/stsb-bert-tiny-onnx" + # hub model + ORTModelForFeatureExtraction.from_pretrained(model_id, subfolder=subfolder, export=subfolder == "") + # local model + api = HfApi() + with tempfile.TemporaryDirectory() as tmpdirname: + local_dir = Path(tmpdirname) / "model" + api.snapshot_download(repo_id=model_id, local_dir=local_dir) + ORTModelForFeatureExtraction.from_pretrained(local_dir, subfolder=subfolder, export=subfolder == "") + remove_directory(tmpdirname) + class ORTModelForQuestionAnsweringIntegrationTest(ORTModelTestMixin): SUPPORTED_ARCHITECTURES = [ @@ -1253,6 +1312,7 @@ class ORTModelForQuestionAnsweringIntegrationTest(ORTModelTestMixin): "squeezebert", "xlm_qa", "xlm_roberta", + "rembert", ] FULL_GRID = {"model_arch": SUPPORTED_ARCHITECTURES} @@ -1443,6 +1503,7 @@ class ORTModelForMaskedLMIntegrationTest(ORTModelTestMixin): "squeezebert", "xlm", "xlm_roberta", + "rembert", ] FULL_GRID = {"model_arch": SUPPORTED_ARCHITECTURES} @@ -1623,6 +1684,7 @@ class ORTModelForSequenceClassificationIntegrationTest(ORTModelTestMixin): "squeezebert", "xlm", "xlm_roberta", + "rembert", ] FULL_GRID = {"model_arch": SUPPORTED_ARCHITECTURES} @@ -1823,6 +1885,7 @@ class ORTModelForTokenClassificationIntegrationTest(ORTModelTestMixin): "squeezebert", "xlm", "xlm_roberta", + "rembert", ] FULL_GRID = {"model_arch": SUPPORTED_ARCHITECTURES} @@ -2134,6 +2197,18 @@ def test_compare_to_io_binding(self, model_arch): gc.collect() + def test_default_token_type_ids(self): + model_id = MODEL_NAMES["bert"] + model = ORTModelForFeatureExtraction.from_pretrained(model_id, export=True) + tokenizer = AutoTokenizer.from_pretrained(model_id) + tokens = tokenizer("this is a simple input", return_tensors="np") + self.assertTrue("token_type_ids" in model.input_names) + token_type_ids = tokens.pop("token_type_ids") + outs = model(token_type_ids=token_type_ids, **tokens) + outs_without_token_type_ids = model(**tokens) + self.assertTrue(np.allclose(outs.last_hidden_state, outs_without_token_type_ids.last_hidden_state)) + gc.collect() + class ORTModelForMultipleChoiceIntegrationTest(ORTModelTestMixin): # Multiple Choice tests are conducted on different models due to mismatch size in model's classifier @@ -2156,6 +2231,7 @@ class ORTModelForMultipleChoiceIntegrationTest(ORTModelTestMixin): "squeezebert", "xlm", "xlm_roberta", + "rembert", ] FULL_GRID = {"model_arch": SUPPORTED_ARCHITECTURES} @@ -2247,7 +2323,6 @@ class ORTModelForCausalLMIntegrationTest(ORTModelTestMixin): "bloom", "codegen", "falcon", - "gemma", "gpt2", "gpt_bigcode", "gpt_neo", @@ -2255,11 +2330,22 @@ class ORTModelForCausalLMIntegrationTest(ORTModelTestMixin): "gptj", "llama", "mistral", - "mpt", - "phi3", - "qwen2", + "opt", ] + if check_if_transformers_greater("4.37"): + SUPPORTED_ARCHITECTURES.append("qwen2") + + if check_if_transformers_greater("4.38"): + SUPPORTED_ARCHITECTURES.append("gemma") + + # TODO: fix "mpt" for which inference fails for transformers < v4.41 + if check_if_transformers_greater("4.41"): + SUPPORTED_ARCHITECTURES.extend(["phi3", "mpt"]) + + if check_if_transformers_greater("4.45"): + SUPPORTED_ARCHITECTURES.append("granite") + FULL_GRID = { "model_arch": SUPPORTED_ARCHITECTURES, "use_cache": [False, True], @@ -2268,7 +2354,7 @@ class ORTModelForCausalLMIntegrationTest(ORTModelTestMixin): ORTMODEL_CLASS = ORTModelForCausalLM TASK = "text-generation" - GENERATION_LENGTH = 100 + GENERATION_LENGTH = 90 SPEEDUP_CACHE = 1.1 @parameterized.expand([(False,), (True,)]) @@ -2341,7 +2427,7 @@ def test_merge_from_onnx_and_save(self, model_arch): self.assertNotIn(ONNX_DECODER_WITH_PAST_NAME, folder_contents) self.assertNotIn(ONNX_WEIGHTS_NAME, folder_contents) - @parameterized.expand(grid_parameters({**FULL_GRID, "num_beams": [1, 3]})) + @parameterized.expand(grid_parameters({**FULL_GRID, "num_beams": [1, 4]})) def test_compare_to_transformers(self, test_name: str, model_arch: str, use_cache: bool, num_beams: int): use_io_binding = None if use_cache is False: @@ -2371,7 +2457,7 @@ def test_compare_to_transformers(self, test_name: str, model_arch: str, use_cach transformers_model = AutoModelForCausalLM.from_pretrained(model_id) transformers_model = transformers_model.eval() tokenizer = get_preprocessor(model_id) - tokens = tokenizer("This is a sample output", return_tensors="pt") + tokens = tokenizer("This is a sample input", return_tensors="pt") position_ids = None if model_arch.replace("_", "-") in MODEL_TYPES_REQUIRING_POSITION_IDS: input_shape = tokens["input_ids"].shape @@ -2393,7 +2479,7 @@ def test_compare_to_transformers(self, test_name: str, model_arch: str, use_cach # Compare batched generation. tokenizer.pad_token_id = tokenizer.eos_token_id tokenizer.padding_side = "left" - tokens = tokenizer(["Today is a nice day and I am longer", "This is me"], return_tensors="pt", padding=True) + tokens = tokenizer(["This is", "This is a sample input"], return_tensors="pt", padding=True) onnx_model.generation_config.eos_token_id = None transformers_model.generation_config.eos_token_id = None onnx_model.config.eos_token_id = None @@ -2404,25 +2490,39 @@ def test_compare_to_transformers(self, test_name: str, model_arch: str, use_cach # TODO: remove once https://github.com/huggingface/transformers/pull/26873 is released, falcon is broken in transformers new_tokens = 5 - onnx_outputs = onnx_model.generate( - **tokens, - num_beams=num_beams, - do_sample=False, - min_new_tokens=new_tokens, - max_new_tokens=new_tokens, - eos_token_id=None, - ) + gen_kwargs = { + "max_new_tokens": new_tokens, + "min_new_tokens": new_tokens, + "eos_token_id": None, + "num_beams": num_beams, + } - transformers_outputs = transformers_model.generate( - **tokens, - num_beams=num_beams, - do_sample=False, - min_new_tokens=new_tokens, - max_new_tokens=new_tokens, - eos_token_id=None, - ) + beam_search_gen_config = GenerationConfig(do_sample=False, **gen_kwargs) + + if use_cache and num_beams == 4: + beam_sample_gen_config = GenerationConfig(do_sample=True, **gen_kwargs) + group_beam_search_gen_config = GenerationConfig( + do_sample=False, num_beam_groups=2, diversity_penalty=0.0000001, **gen_kwargs + ) + gen_configs = ( + beam_search_gen_config, + beam_sample_gen_config, + group_beam_search_gen_config, + ) + else: + gen_configs = (beam_search_gen_config,) + + for gen_config in gen_configs: + set_seed(SEED) + with torch.no_grad(): + transformers_outputs = transformers_model.generate(**tokens, generation_config=gen_config) + set_seed(SEED) + onnx_outputs = onnx_model.generate(**tokens, generation_config=gen_config) - self.assertTrue(torch.allclose(onnx_outputs, transformers_outputs)) + self.assertTrue( + torch.equal(onnx_outputs, transformers_outputs), + f"Failed with generation config : {gen_config}, transformers outputs {transformers_outputs}, ONNX model outputs {onnx_outputs}", + ) gc.collect() @@ -4510,14 +4610,14 @@ def test_compare_with_and_without_past_key_values(self, model_arch: str): ) self.assertTrue(torch.equal(outputs_model_with_pkv, outputs_model_without_pkv)) - self.assertEqual( - outputs_model_with_pkv.shape[1], - self.GENERATION_LENGTH + 2 if model_arch == "whisper" else self.GENERATION_LENGTH + 1, - ) - self.assertEqual( - outputs_model_without_pkv.shape[1], - self.GENERATION_LENGTH + 2 if model_arch == "whisper" else self.GENERATION_LENGTH + 1, - ) + + if model_arch == "whisper" and check_if_transformers_greater("4.43"): + gen_length = self.GENERATION_LENGTH + 2 + else: + gen_length = self.GENERATION_LENGTH + 1 + + self.assertEqual(outputs_model_with_pkv.shape[1], gen_length) + self.assertEqual(outputs_model_without_pkv.shape[1], gen_length) self.GENERATION_LENGTH = generation_length if os.environ.get("TEST_LEVEL", 0) == "1": @@ -4677,6 +4777,136 @@ def test_compare_generation_to_io_binding( gc.collect() +class ORTModelForImageToImageIntegrationTest(ORTModelTestMixin): + SUPPORTED_ARCHITECTURES = ["swin2sr"] + + ORTMODEL_CLASS = ORTModelForImageToImage + + TASK = "image-to-image" + + def _get_sample_image(self): + url = "http://images.cocodataset.org/val2017/000000039769.jpg" + image = Image.open(requests.get(url, stream=True).raw) + return image + + def _get_preprocessors(self, model_id): + image_processor = AutoImageProcessor.from_pretrained(model_id) + + return image_processor + + def test_load_vanilla_transformers_which_is_not_supported(self): + with self.assertRaises(Exception) as context: + _ = ORTModelForImageToImage.from_pretrained(MODEL_NAMES["bert"], export=True) + + self.assertIn("only supports the tasks", str(context.exception)) + + @parameterized.expand(SUPPORTED_ARCHITECTURES) + def test_compare_to_transformers(self, model_arch: str): + model_args = {"test_name": model_arch, "model_arch": model_arch} + self._setup(model_args) + model_id = MODEL_NAMES[model_arch] + onnx_model = ORTModelForImageToImage.from_pretrained(self.onnx_model_dirs[model_arch]) + self.assertIsInstance(onnx_model.config, Swin2SRConfig) + set_seed(SEED) + + transformers_model = AutoModelForImageToImage.from_pretrained(model_id) + image_processor = self._get_preprocessors(model_id) + + data = self._get_sample_image() + features = image_processor(data, return_tensors="pt") + + with torch.no_grad(): + transformers_outputs = transformers_model(**features) + + onnx_outputs = onnx_model(**features) + self.assertIsInstance(onnx_outputs, ImageSuperResolutionOutput) + self.assertTrue("reconstruction" in onnx_outputs) + self.assertIsInstance(onnx_outputs.reconstruction, torch.Tensor) + self.assertTrue(torch.allclose(onnx_outputs.reconstruction, transformers_outputs.reconstruction, atol=1e-4)) + + gc.collect() + + @parameterized.expand(SUPPORTED_ARCHITECTURES) + def test_generate_utils(self, model_arch: str): + model_args = {"test_name": model_arch, "model_arch": model_arch} + self._setup(model_args) + model_id = MODEL_NAMES[model_arch] + onnx_model = ORTModelForImageToImage.from_pretrained(self.onnx_model_dirs[model_arch]) + image_processor = self._get_preprocessors(model_id) + + data = self._get_sample_image() + features = image_processor(data, return_tensors="pt") + + outputs = onnx_model(**features) + self.assertIsInstance(outputs, ImageSuperResolutionOutput) + + gc.collect() + + @parameterized.expand(SUPPORTED_ARCHITECTURES) + def test_pipeline_image_to_image(self, model_arch: str): + model_args = {"test_name": model_arch, "model_arch": model_arch} + self._setup(model_args) + model_id = MODEL_NAMES[model_arch] + onnx_model = ORTModelForImageToImage.from_pretrained(self.onnx_model_dirs[model_arch]) + image_processor = self._get_preprocessors(model_id) + pipe = pipeline( + "image-to-image", + model=onnx_model, + feature_extractor=image_processor, + ) + data = self._get_sample_image() + outputs = pipe(data) + self.assertEqual(pipe.device, onnx_model.device) + self.assertIsInstance(outputs, Image.Image) + + gc.collect() + + @parameterized.expand(SUPPORTED_ARCHITECTURES) + @require_torch_gpu + @pytest.mark.cuda_ep_test + def test_pipeline_on_gpu(self, model_arch: str): + model_args = {"test_name": model_arch, "model_arch": model_arch} + self._setup(model_args) + model_id = MODEL_NAMES[model_arch] + onnx_model = ORTModelForImageToImage.from_pretrained(self.onnx_model_dirs[model_arch]) + image_processor = self._get_preprocessors(model_id) + pipe = pipeline( + "image-to-image", + model=onnx_model, + feature_extractor=image_processor, + device=0, + ) + + data = self._get_sample_image() + outputs = pipe(data) + + self.assertEqual(pipe.model.device.type.lower(), "cuda") + self.assertIsInstance(outputs, Image.Image) + + @parameterized.expand(SUPPORTED_ARCHITECTURES) + @require_torch_gpu + @require_ort_rocm + @pytest.mark.rocm_ep_test + def test_pipeline_on_rocm(self, model_arch: str): + model_args = {"test_name": model_arch, "model_arch": model_arch} + self._setup(model_args) + model_id = MODEL_NAMES[model_arch] + onnx_model = ORTModelForImageToImage.from_pretrained(self.onnx_model_dirs[model_arch]) + image_processor = self._get_preprocessors(model_id) + pipe = pipeline( + "image-to-image", + model=onnx_model, + feature_extractor=image_processor, + device=0, + ) + + data = self._get_sample_image() + outputs = pipe(data) + + self.assertEqual(pipe.model.device.type.lower(), "cuda") + self.assertIsInstance(outputs, Image.Image) + + class ORTModelForVision2SeqIntegrationTest(ORTModelTestMixin): SUPPORTED_ARCHITECTURES = ["vision-encoder-decoder", "trocr", "donut"] @@ -4804,7 +5034,6 @@ def test_compare_to_transformers(self, test_name: str, model_arch: str, use_cach len(onnx_outputs["past_key_values"][0]), len(transformers_outputs["past_key_values"][0]) ) for i in range(len(onnx_outputs["past_key_values"])): - print(onnx_outputs["past_key_values"][i]) for ort_pkv, trfs_pkv in zip( onnx_outputs["past_key_values"][i], transformers_outputs["past_key_values"][i] ): @@ -5490,6 +5719,7 @@ class TestBothExportersORTModel(unittest.TestCase): ["automatic-speech-recognition", ORTModelForCTCIntegrationTest], ["audio-xvector", ORTModelForAudioXVectorIntegrationTest], ["audio-frame-classification", ORTModelForAudioFrameClassificationIntegrationTest], + ["image-to-image", ORTModelForImageToImageIntegrationTest], ] ) def test_find_untested_architectures(self, task: str, test_class): diff --git a/tests/onnxruntime/test_quantization.py b/tests/onnxruntime/test_quantization.py index b6f1ebb70f6..cf451590fbd 100644 --- a/tests/onnxruntime/test_quantization.py +++ b/tests/onnxruntime/test_quantization.py @@ -30,6 +30,7 @@ AutoQuantizationConfig, ORTConfig, ORTModelForCausalLM, + ORTModelForFeatureExtraction, ORTModelForSeq2SeqLM, ORTModelForSequenceClassification, ORTQuantizer, @@ -41,10 +42,10 @@ class ORTQuantizerTest(unittest.TestCase): LOAD_CONFIGURATION = { "local_asset": { - "model_or_path": "assets/onnx", + "model_or_path": "tests/assets/onnx", }, "local_asset_different_name": { - "model_or_path": "assets/onnx", + "model_or_path": "tests/assets/onnx", "file_name": "different_name.onnx", }, "ort_model_class": { @@ -52,6 +53,13 @@ class ORTQuantizerTest(unittest.TestCase): "optimum/distilbert-base-uncased-finetuned-sst-2-english" ) }, + "ort_model_with_onnx_model_in_subfolder": { + "model_or_path": ORTModelForFeatureExtraction.from_pretrained( + "sentence-transformers/all-MiniLM-L6-v2", + subfolder="onnx", + file_name="model.onnx", + ) + }, } @parameterized.expand(LOAD_CONFIGURATION.items()) diff --git a/tests/onnxruntime/test_stable_diffusion_pipeline.py b/tests/onnxruntime/test_stable_diffusion_pipeline.py deleted file mode 100644 index 44cd22ffecc..00000000000 --- a/tests/onnxruntime/test_stable_diffusion_pipeline.py +++ /dev/null @@ -1,562 +0,0 @@ -# coding=utf-8 -# Copyright 2022 The HuggingFace Team. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import random -import unittest -from typing import Dict - -import numpy as np -import PIL -import pytest -import torch -from diffusers import ( - OnnxStableDiffusionImg2ImgPipeline, - StableDiffusionPipeline, - StableDiffusionXLPipeline, -) -from diffusers.utils import load_image -from diffusers.utils.testing_utils import floats_tensor -from packaging.version import Version, parse -from parameterized import parameterized -from transformers.testing_utils import require_torch_gpu -from utils_onnxruntime_tests import MODEL_NAMES, SEED, ORTModelTestMixin - -from optimum.onnxruntime import ( - ORTLatentConsistencyModelPipeline, - ORTStableDiffusionImg2ImgPipeline, - ORTStableDiffusionInpaintPipeline, - ORTStableDiffusionPipeline, - ORTStableDiffusionXLImg2ImgPipeline, - ORTStableDiffusionXLPipeline, -) -from optimum.onnxruntime.modeling_diffusion import ( - ORTModelTextEncoder, - ORTModelUnet, - ORTModelVaeDecoder, - ORTModelVaeEncoder, -) -from optimum.pipelines.diffusers.pipeline_utils import VaeImageProcessor -from optimum.utils.import_utils import _diffusers_version -from optimum.utils.testing_utils import grid_parameters, require_diffusers, require_ort_rocm - - -if parse(_diffusers_version) > Version("0.21.4"): - from diffusers import LatentConsistencyModelPipeline - - -def _generate_inputs(batch_size=1): - inputs = { - "prompt": ["sailing ship in storm by Leonardo da Vinci"] * batch_size, - "num_inference_steps": 3, - "guidance_scale": 7.5, - "output_type": "np", - } - return inputs - - -def _create_image(height=128, width=128, batch_size=1, channel=3, input_type="pil"): - if input_type == "pil": - image = load_image( - "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main" - "/in_paint/overture-creations-5sI6fQgYIuo.png" - ).resize((width, height)) - elif input_type == "np": - image = np.random.rand(height, width, channel) - elif input_type == "pt": - image = torch.rand((channel, height, width)) - - return [image] * batch_size - - -def to_np(image): - if isinstance(image[0], PIL.Image.Image): - return np.stack([np.array(i) for i in image], axis=0) - elif isinstance(image, torch.Tensor): - return image.cpu().numpy().transpose(0, 2, 3, 1) - return image - - -class ORTStableDiffusionPipelineBase(ORTModelTestMixin): - SUPPORTED_ARCHITECTURES = [ - "stable-diffusion", - ] - ORTMODEL_CLASS = ORTStableDiffusionPipeline - TASK = "text-to-image" - - @require_diffusers - def test_load_vanilla_model_which_is_not_supported(self): - with self.assertRaises(Exception) as context: - _ = self.ORTMODEL_CLASS.from_pretrained(MODEL_NAMES["bert"], export=True) - - self.assertIn( - f"does not appear to have a file named {self.ORTMODEL_CLASS.config_name}", str(context.exception) - ) - - @parameterized.expand(SUPPORTED_ARCHITECTURES) - @require_diffusers - def test_num_images_per_prompt(self, model_arch: str): - model_args = {"test_name": model_arch, "model_arch": model_arch} - self._setup(model_args) - pipeline = self.ORTMODEL_CLASS.from_pretrained(self.onnx_model_dirs[model_arch]) - self.assertEqual(pipeline.vae_scale_factor, 2) - self.assertEqual(pipeline.vae_decoder.config["latent_channels"], 4) - self.assertEqual(pipeline.unet.config["in_channels"], 4) - - batch_size, height = 1, 32 - for width in [64, 32]: - inputs = self.generate_inputs(height=height, width=width, batch_size=batch_size) - for num_images in [1, 3]: - outputs = pipeline(**inputs, num_images_per_prompt=num_images).images - self.assertEqual(outputs.shape, (batch_size * num_images, height, width, 3)) - - @parameterized.expand( - grid_parameters({"model_arch": SUPPORTED_ARCHITECTURES, "provider": ["CUDAExecutionProvider"]}) - ) - @require_torch_gpu - @pytest.mark.cuda_ep_test - @require_diffusers - def test_pipeline_on_gpu(self, test_name: str, model_arch: str, provider: str): - model_args = {"test_name": test_name, "model_arch": model_arch} - self._setup(model_args) - pipeline = self.ORTMODEL_CLASS.from_pretrained(self.onnx_model_dirs[test_name], provider=provider) - height, width, batch_size = 32, 64, 1 - inputs = self.generate_inputs(height=height, width=width, batch_size=batch_size) - outputs = pipeline(**inputs).images - # Verify model devices - self.assertEqual(pipeline.device.type.lower(), "cuda") - # Verify model outptus - self.assertIsInstance(outputs, np.ndarray) - self.assertEqual(outputs.shape, (batch_size, height, width, 3)) - - @parameterized.expand( - grid_parameters({"model_arch": SUPPORTED_ARCHITECTURES, "provider": ["ROCMExecutionProvider"]}) - ) - @require_torch_gpu - @require_ort_rocm - @pytest.mark.rocm_ep_test - @require_diffusers - def test_pipeline_on_rocm_ep(self, test_name: str, model_arch: str, provider: str): - model_args = {"test_name": test_name, "model_arch": model_arch} - self._setup(model_args) - pipeline = self.ORTMODEL_CLASS.from_pretrained(self.onnx_model_dirs[test_name], provider=provider) - height, width, batch_size = 32, 64, 1 - inputs = self.generate_inputs(height=height, width=width, batch_size=batch_size) - outputs = pipeline(**inputs).images - # Verify model devices - self.assertEqual(pipeline.device.type.lower(), "cuda") - # Verify model outptus - self.assertIsInstance(outputs, np.ndarray) - self.assertEqual(outputs.shape, (batch_size, height, width, 3)) - - @parameterized.expand(SUPPORTED_ARCHITECTURES) - @require_diffusers - def test_callback(self, model_arch: str): - def callback_fn(step: int, timestep: int, latents: np.ndarray) -> None: - callback_fn.has_been_called = True - callback_fn.number_of_steps += 1 - - pipe = self.ORTMODEL_CLASS.from_pretrained(MODEL_NAMES[model_arch], export=True) - callback_fn.has_been_called = False - callback_fn.number_of_steps = 0 - inputs = self.generate_inputs(height=64, width=64) - pipe(**inputs, callback=callback_fn, callback_steps=1) - self.assertTrue(callback_fn.has_been_called) - self.assertEqual(callback_fn.number_of_steps, inputs["num_inference_steps"]) - - @parameterized.expand(SUPPORTED_ARCHITECTURES) - @require_diffusers - def test_shape(self, model_arch: str): - model_args = {"test_name": model_arch, "model_arch": model_arch} - self._setup(model_args) - height, width, batch_size = 128, 64, 1 - pipeline = self.ORTMODEL_CLASS.from_pretrained(self.onnx_model_dirs[model_arch]) - - if self.TASK == "image-to-image": - input_types = ["np", "pil", "pt"] - elif self.TASK == "text-to-image": - input_types = ["np"] - else: - input_types = ["pil"] - - for input_type in input_types: - if self.TASK == "image-to-image": - inputs = self.generate_inputs(height=height, width=width, batch_size=batch_size, input_type=input_type) - else: - inputs = self.generate_inputs(height=height, width=width, batch_size=batch_size) - for output_type in ["np", "pil", "latent"]: - inputs["output_type"] = output_type - outputs = pipeline(**inputs).images - if output_type == "pil": - self.assertEqual((len(outputs), outputs[0].height, outputs[0].width), (batch_size, height, width)) - elif output_type == "np": - self.assertEqual(outputs.shape, (batch_size, height, width, 3)) - else: - self.assertEqual( - outputs.shape, - (batch_size, 4, height // pipeline.vae_scale_factor, width // pipeline.vae_scale_factor), - ) - - def generate_inputs(self, height=128, width=128, batch_size=1): - inputs = _generate_inputs(batch_size=batch_size) - inputs["height"] = height - inputs["width"] = width - return inputs - - -class ORTStableDiffusionImg2ImgPipelineTest(ORTStableDiffusionPipelineBase): - SUPPORTED_ARCHITECTURES = [ - "stable-diffusion", - ] - ORTMODEL_CLASS = ORTStableDiffusionImg2ImgPipeline - TASK = "image-to-image" - - @parameterized.expand(SUPPORTED_ARCHITECTURES) - @require_diffusers - def test_compare_diffusers_pipeline(self, model_arch: str): - model_args = {"test_name": model_arch, "model_arch": model_arch} - self._setup(model_args) - height, width = 128, 128 - - inputs = self.generate_inputs(height=height, width=width) - inputs["prompt"] = "A painting of a squirrel eating a burger" - inputs["image"] = floats_tensor((1, 3, height, width), rng=random.Random(SEED)) - - ort_pipeline = self.ORTMODEL_CLASS.from_pretrained(self.onnx_model_dirs[model_arch]) - ort_output = ort_pipeline(**inputs, generator=np.random.RandomState(SEED)).images - - diffusers_onnx_pipeline = OnnxStableDiffusionImg2ImgPipeline.from_pretrained(self.onnx_model_dirs[model_arch]) - diffusers_onnx_output = diffusers_onnx_pipeline(**inputs, generator=np.random.RandomState(SEED)).images - - self.assertTrue(np.allclose(ort_output, diffusers_onnx_output, atol=1e-1)) - - def generate_inputs(self, height=128, width=128, batch_size=1, input_type="np"): - inputs = _generate_inputs(batch_size=batch_size) - inputs["image"] = _create_image(height=height, width=width, batch_size=batch_size, input_type=input_type) - inputs["strength"] = 0.75 - return inputs - - -class ORTStableDiffusionPipelineTest(unittest.TestCase): - SUPPORTED_ARCHITECTURES = [ - "stable-diffusion", - ] - ORTMODEL_CLASS = ORTStableDiffusionPipeline - TASK = "text-to-image" - - @parameterized.expand(SUPPORTED_ARCHITECTURES) - @require_diffusers - def test_compare_to_diffusers(self, model_arch: str): - ort_pipeline = self.ORTMODEL_CLASS.from_pretrained(MODEL_NAMES[model_arch], export=True) - self.assertIsInstance(ort_pipeline.text_encoder, ORTModelTextEncoder) - self.assertIsInstance(ort_pipeline.vae_decoder, ORTModelVaeDecoder) - self.assertIsInstance(ort_pipeline.vae_encoder, ORTModelVaeEncoder) - self.assertIsInstance(ort_pipeline.unet, ORTModelUnet) - self.assertIsInstance(ort_pipeline.config, Dict) - - pipeline = StableDiffusionPipeline.from_pretrained(MODEL_NAMES[model_arch]) - pipeline.safety_checker = None - batch_size, num_images_per_prompt, height, width = 1, 2, 64, 32 - - latents = ort_pipeline.prepare_latents( - batch_size * num_images_per_prompt, - ort_pipeline.unet.config["in_channels"], - height, - width, - dtype=np.float32, - generator=np.random.RandomState(0), - ) - - kwargs = { - "prompt": "sailing ship in storm by Leonardo da Vinci", - "num_inference_steps": 1, - "num_images_per_prompt": num_images_per_prompt, - "height": height, - "width": width, - "guidance_rescale": 0.1, - } - - for output_type in ["latent", "np"]: - ort_outputs = ort_pipeline(latents=latents, output_type=output_type, **kwargs).images - with torch.no_grad(): - outputs = pipeline(latents=torch.from_numpy(latents), output_type=output_type, **kwargs).images - - self.assertIsInstance(ort_outputs, np.ndarray) - # Compare model outputs - self.assertTrue(np.allclose(ort_outputs, outputs, atol=1e-4)) - # Compare model devices - self.assertEqual(pipeline.device, ort_pipeline.device) - - @parameterized.expand(SUPPORTED_ARCHITECTURES) - @require_diffusers - def test_image_reproducibility(self, model_arch: str): - pipeline = self.ORTMODEL_CLASS.from_pretrained(MODEL_NAMES[model_arch], export=True) - inputs = _generate_inputs() - height, width = 64, 32 - np.random.seed(0) - ort_outputs_1 = pipeline(**inputs, height=height, width=width) - np.random.seed(0) - ort_outputs_2 = pipeline(**inputs, height=height, width=width) - ort_outputs_3 = pipeline(**inputs, height=height, width=width) - # Compare model outputs - self.assertTrue(np.array_equal(ort_outputs_1.images[0], ort_outputs_2.images[0])) - self.assertFalse(np.array_equal(ort_outputs_1.images[0], ort_outputs_3.images[0])) - - @parameterized.expand(SUPPORTED_ARCHITECTURES) - def test_negative_prompt(self, model_arch: str): - pipeline = self.ORTMODEL_CLASS.from_pretrained(MODEL_NAMES[model_arch], export=True) - inputs = _generate_inputs() - inputs["height"], inputs["width"] = 64, 32 - negative_prompt = ["This is a negative prompt"] - np.random.seed(0) - image_slice_1 = pipeline(**inputs, negative_prompt=negative_prompt).images[0, -3:, -3:, -1] - prompt = inputs.pop("prompt") - embeds = [] - for p in [prompt, negative_prompt]: - text_inputs = pipeline.tokenizer( - p, - padding="max_length", - max_length=pipeline.tokenizer.model_max_length, - truncation=True, - return_tensors="np", - ) - text_inputs = text_inputs["input_ids"].astype(pipeline.text_encoder.input_dtype.get("input_ids", np.int32)) - embeds.append(pipeline.text_encoder(text_inputs)[0]) - - inputs["prompt_embeds"], inputs["negative_prompt_embeds"] = embeds - np.random.seed(0) - image_slice_2 = pipeline(**inputs).images[0, -3:, -3:, -1] - self.assertTrue(np.allclose(image_slice_1, image_slice_2, atol=1e-4)) - - -class ORTStableDiffusionXLPipelineTest(ORTModelTestMixin): - SUPPORTED_ARCHITECTURES = [ - "stable-diffusion-xl", - ] - ORTMODEL_CLASS = ORTStableDiffusionXLPipeline - TASK = "text-to-image" - - @parameterized.expand(SUPPORTED_ARCHITECTURES) - @require_diffusers - def test_compare_to_diffusers(self, model_arch: str): - ort_pipeline = self.ORTMODEL_CLASS.from_pretrained(MODEL_NAMES[model_arch], export=True) - self.assertIsInstance(ort_pipeline.text_encoder, ORTModelTextEncoder) - self.assertIsInstance(ort_pipeline.text_encoder_2, ORTModelTextEncoder) - self.assertIsInstance(ort_pipeline.vae_decoder, ORTModelVaeDecoder) - self.assertIsInstance(ort_pipeline.vae_encoder, ORTModelVaeEncoder) - self.assertIsInstance(ort_pipeline.unet, ORTModelUnet) - self.assertIsInstance(ort_pipeline.config, Dict) - - pipeline = StableDiffusionXLPipeline.from_pretrained(MODEL_NAMES[model_arch]) - batch_size, num_images_per_prompt, height, width = 2, 2, 64, 32 - latents = ort_pipeline.prepare_latents( - batch_size * num_images_per_prompt, - ort_pipeline.unet.config["in_channels"], - height, - width, - dtype=np.float32, - generator=np.random.RandomState(0), - ) - - kwargs = { - "prompt": ["sailing ship in storm by Leonardo da Vinci"] * batch_size, - "num_inference_steps": 1, - "num_images_per_prompt": num_images_per_prompt, - "height": height, - "width": width, - "guidance_rescale": 0.1, - } - - for output_type in ["latent", "np"]: - ort_outputs = ort_pipeline(latents=latents, output_type=output_type, **kwargs).images - self.assertIsInstance(ort_outputs, np.ndarray) - with torch.no_grad(): - outputs = pipeline(latents=torch.from_numpy(latents), output_type=output_type, **kwargs).images - - # Compare model outputs - self.assertTrue(np.allclose(ort_outputs, outputs, atol=1e-4)) - # Compare model devices - self.assertEqual(pipeline.device, ort_pipeline.device) - - @parameterized.expand(SUPPORTED_ARCHITECTURES) - @require_diffusers - def test_image_reproducibility(self, model_arch: str): - pipeline = self.ORTMODEL_CLASS.from_pretrained(MODEL_NAMES[model_arch], export=True) - inputs = _generate_inputs() - height, width = 64, 32 - np.random.seed(0) - ort_outputs_1 = pipeline(**inputs, height=height, width=width) - np.random.seed(0) - ort_outputs_2 = pipeline(**inputs, height=height, width=width) - ort_outputs_3 = pipeline(**inputs, height=height, width=width) - self.assertTrue(np.array_equal(ort_outputs_1.images[0], ort_outputs_2.images[0])) - self.assertFalse(np.array_equal(ort_outputs_1.images[0], ort_outputs_3.images[0])) - - -class ORTStableDiffusionInpaintPipelineTest(ORTStableDiffusionPipelineBase): - SUPPORTED_ARCHITECTURES = [ - "stable-diffusion", - ] - ORTMODEL_CLASS = ORTStableDiffusionInpaintPipeline - TASK = "inpainting" - - @parameterized.expand(SUPPORTED_ARCHITECTURES) - @require_diffusers - def test_compare_diffusers_pipeline(self, model_arch: str): - model_args = {"test_name": model_arch, "model_arch": model_arch} - self._setup(model_args) - ort_pipeline = self.ORTMODEL_CLASS.from_pretrained(self.onnx_model_dirs[model_arch]) - diffusers_pipeline = self.ORTMODEL_CLASS.auto_model_class.from_pretrained(MODEL_NAMES[model_arch]) - height, width = 64, 64 - latents_shape = ( - 1, - ort_pipeline.vae_decoder.config["latent_channels"], - height // ort_pipeline.vae_scale_factor, - width // ort_pipeline.vae_scale_factor, - ) - inputs = self.generate_inputs(height=height, width=width) - - np_latents = np.random.rand(*latents_shape).astype(np.float32) - torch_latents = torch.from_numpy(np_latents) - - ort_outputs = ort_pipeline(**inputs, latents=np_latents).images - self.assertEqual(ort_outputs.shape, (1, height, width, 3)) - - diffusers_outputs = diffusers_pipeline(**inputs, latents=torch_latents).images - self.assertEqual(diffusers_outputs.shape, (1, height, width, 3)) - - self.assertTrue(np.allclose(ort_outputs, diffusers_outputs, atol=1e-4)) - - def generate_inputs(self, height=128, width=128, batch_size=1): - inputs = super(ORTStableDiffusionInpaintPipelineTest, self).generate_inputs(height, width) - inputs["image"] = _create_image(height=height, width=width, batch_size=1, input_type="pil")[0] - inputs["mask_image"] = _create_image(height=height, width=width, batch_size=1, input_type="pil")[0] - return inputs - - -class ORTStableDiffusionXLImg2ImgPipelineTest(ORTModelTestMixin): - SUPPORTED_ARCHITECTURES = [ - "stable-diffusion-xl", - ] - ORTMODEL_CLASS = ORTStableDiffusionXLImg2ImgPipeline - TASK = "image-to-image" - - @parameterized.expand(SUPPORTED_ARCHITECTURES) - @require_diffusers - def test_inference(self, model_arch: str): - model_args = {"test_name": model_arch, "model_arch": model_arch} - self._setup(model_args) - pipeline = self.ORTMODEL_CLASS.from_pretrained(self.onnx_model_dirs[model_arch]) - - height, width = 128, 128 - inputs = self.generate_inputs(height=height, width=width) - inputs["image"] = load_image( - "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main" - "/in_paint/overture-creations-5sI6fQgYIuo.png" - ).resize((width, height)) - output = pipeline(**inputs, generator=np.random.RandomState(0)).images[0, -3:, -3:, -1] - expected_slice = np.array([0.6515, 0.5405, 0.4858, 0.5632, 0.5174, 0.5681, 0.4948, 0.4253, 0.5080]) - - self.assertTrue(np.allclose(output.flatten(), expected_slice, atol=1e-1)) - - def generate_inputs(self, height=128, width=128, batch_size=1, input_type="np"): - inputs = _generate_inputs(batch_size=batch_size) - inputs["image"] = _create_image(height=height, width=width, batch_size=batch_size, input_type=input_type) - inputs["strength"] = 0.75 - return inputs - - -class ImageProcessorTest(unittest.TestCase): - def test_vae_image_processor_pt(self): - image_processor = VaeImageProcessor(do_resize=False, do_normalize=True) - input_pt = torch.stack(_create_image(height=8, width=8, batch_size=1, input_type="pt")) - input_np = to_np(input_pt) - - for output_type in ["np", "pil"]: - out = image_processor.postprocess(image_processor.preprocess(input_pt), output_type=output_type) - out_np = to_np(out) - in_np = (input_np * 255).round() if output_type == "pil" else input_np - self.assertTrue(np.allclose(in_np, out_np, atol=1e-6)) - - def test_vae_image_processor_np(self): - image_processor = VaeImageProcessor(do_resize=False, do_normalize=True) - input_np = np.stack(_create_image(height=8, width=8, input_type="np")) - for output_type in ["np", "pil"]: - out = image_processor.postprocess(image_processor.preprocess(input_np), output_type=output_type) - out_np = to_np(out) - in_np = (input_np * 255).round() if output_type == "pil" else input_np - self.assertTrue(np.allclose(in_np, out_np, atol=1e-6)) - - def test_vae_image_processor_pil(self): - image_processor = VaeImageProcessor(do_resize=False, do_normalize=True) - input_pil = _create_image(height=8, width=8, batch_size=1, input_type="pil") - - for output_type in ["np", "pil"]: - out = image_processor.postprocess(image_processor.preprocess(input_pil), output_type=output_type) - for i, o in zip(input_pil, out): - in_np = np.array(i) - out_np = to_np(out) if output_type == "pil" else (to_np(out) * 255).round() - self.assertTrue(np.allclose(in_np, out_np, atol=1e-6)) - - -class ORTLatentConsistencyModelPipelineTest(ORTModelTestMixin): - SUPPORTED_ARCHITECTURES = [ - "latent-consistency", - ] - ORTMODEL_CLASS = ORTLatentConsistencyModelPipeline - TASK = "text-to-image" - - @parameterized.expand(SUPPORTED_ARCHITECTURES) - @require_diffusers - @unittest.skipIf( - parse(_diffusers_version) <= Version("0.21.4"), - "not supported with this diffusers version, needs diffusers>=v0.22.0", - ) - def test_compare_to_diffusers(self, model_arch: str): - ort_pipeline = self.ORTMODEL_CLASS.from_pretrained(MODEL_NAMES[model_arch], export=True) - self.assertIsInstance(ort_pipeline.text_encoder, ORTModelTextEncoder) - self.assertIsInstance(ort_pipeline.vae_decoder, ORTModelVaeDecoder) - self.assertIsInstance(ort_pipeline.vae_encoder, ORTModelVaeEncoder) - self.assertIsInstance(ort_pipeline.unet, ORTModelUnet) - self.assertIsInstance(ort_pipeline.config, Dict) - - pipeline = LatentConsistencyModelPipeline.from_pretrained(MODEL_NAMES[model_arch]) - batch_size, num_images_per_prompt, height, width = 2, 2, 64, 32 - latents = ort_pipeline.prepare_latents( - batch_size * num_images_per_prompt, - ort_pipeline.unet.config["in_channels"], - height, - width, - dtype=np.float32, - generator=np.random.RandomState(0), - ) - - kwargs = { - "prompt": ["sailing ship in storm by Leonardo da Vinci"] * batch_size, - "num_inference_steps": 1, - "num_images_per_prompt": num_images_per_prompt, - "height": height, - "width": width, - "guidance_scale": 8.5, - } - - for output_type in ["latent", "np"]: - ort_outputs = ort_pipeline(latents=latents, output_type=output_type, **kwargs).images - self.assertIsInstance(ort_outputs, np.ndarray) - with torch.no_grad(): - outputs = pipeline(latents=torch.from_numpy(latents), output_type=output_type, **kwargs).images - - # Compare model outputs - self.assertTrue(np.allclose(ort_outputs, outputs, atol=1e-4)) - # Compare model devices - self.assertEqual(pipeline.device, ort_pipeline.device) diff --git a/tests/onnxruntime/utils_onnxruntime_tests.py b/tests/onnxruntime/utils_onnxruntime_tests.py index bb6935461d7..cccecd53817 100644 --- a/tests/onnxruntime/utils_onnxruntime_tests.py +++ b/tests/onnxruntime/utils_onnxruntime_tests.py @@ -98,23 +98,25 @@ }, "falcon": "fxmarty/really-tiny-falcon-testing", "flaubert": "hf-internal-testing/tiny-random-flaubert", + "flux": "optimum-internal-testing/tiny-random-flux", "gemma": "fxmarty/tiny-random-GemmaForCausalLM", "gpt2": "hf-internal-testing/tiny-random-gpt2", "gpt_bigcode": "hf-internal-testing/tiny-random-GPTBigCodeModel", "gpt_neo": "hf-internal-testing/tiny-random-GPTNeoModel", "gpt_neox": "hf-internal-testing/tiny-random-GPTNeoXForCausalLM", "gptj": "hf-internal-testing/tiny-random-GPTJForCausalLM", + "granite": "hf-internal-testing/tiny-random-GraniteForCausalLM", "groupvit": "hf-internal-testing/tiny-random-groupvit", "hubert": "hf-internal-testing/tiny-random-HubertModel", "ibert": "hf-internal-testing/tiny-random-IBertModel", - "levit": "hf-internal-testing/tiny-random-LevitModel", "latent-consistency": "echarlaix/tiny-random-latent-consistency", "layoutlm": "hf-internal-testing/tiny-random-LayoutLMModel", "layoutlmv3": "hf-internal-testing/tiny-random-LayoutLMv3Model", + "levit": "hf-internal-testing/tiny-random-LevitModel", "longt5": "hf-internal-testing/tiny-random-LongT5Model", - "llama": "fxmarty/tiny-llama-fast-tokenizer", + "llama": "optimum-internal-testing/tiny-random-llama", "m2m_100": "hf-internal-testing/tiny-random-m2m_100", - "marian": "sshleifer/tiny-marian-en-de", # hf-internal-testing ones are broken + "marian": "echarlaix/tiny-random-marian", "mbart": "hf-internal-testing/tiny-random-mbart", "mistral": "echarlaix/tiny-random-mistral", "mobilebert": "hf-internal-testing/tiny-random-MobileBertModel", @@ -125,6 +127,7 @@ "mpt": "hf-internal-testing/tiny-random-MptForCausalLM", "mt5": "lewtun/tiny-random-mt5", "nystromformer": "hf-internal-testing/tiny-random-NystromformerModel", + "opt": "hf-internal-testing/tiny-random-OPTModel", "pegasus": "hf-internal-testing/tiny-random-PegasusModel", "perceiver_text": "hf-internal-testing/tiny-random-language_perceiver", "perceiver_vision": "hf-internal-testing/tiny-random-vision_perceiver_conv", @@ -132,6 +135,7 @@ "pix2struct": "fxmarty/pix2struct-tiny-random", "poolformer": "hf-internal-testing/tiny-random-PoolFormerModel", "qwen2": "fxmarty/tiny-dummy-qwen2", + "rembert": "hf-internal-testing/tiny-random-RemBertModel", "resnet": "hf-internal-testing/tiny-random-resnet", "roberta": "hf-internal-testing/tiny-random-RobertaModel", "roformer": "hf-internal-testing/tiny-random-RoFormerModel", @@ -141,9 +145,11 @@ "squeezebert": "hf-internal-testing/tiny-random-SqueezeBertModel", "speech_to_text": "hf-internal-testing/tiny-random-Speech2TextModel", "stable-diffusion": "hf-internal-testing/tiny-stable-diffusion-torch", + "stable-diffusion-3": "optimum-internal-testing/tiny-random-stable-diffusion-3", "stable-diffusion-xl": "echarlaix/tiny-random-stable-diffusion-xl", "swin": "hf-internal-testing/tiny-random-SwinModel", "swin-window": "yujiepan/tiny-random-swin-patch4-window7-224", + "swin2sr": "hf-internal-testing/tiny-random-Swin2SRForImageSuperResolution", "t5": "hf-internal-testing/tiny-random-t5", "table-transformer": "hf-internal-testing/tiny-random-TableTransformerModel", "trocr": "microsoft/trocr-small-handwritten", @@ -151,7 +157,7 @@ "unispeech_sat": "hf-internal-testing/tiny-random-UnispeechSatModel", "vision-encoder-decoder": "hf-internal-testing/tiny-random-VisionEncoderDecoderModel-vit-gpt2", "vit": "hf-internal-testing/tiny-random-vit", - "whisper": "openai/whisper-tiny.en", # hf-internal-testing ones are broken + "whisper": "optimum-internal-testing/tiny-random-whisper", "wav2vec2": "hf-internal-testing/tiny-random-Wav2Vec2Model", "wav2vec2-conformer": "hf-internal-testing/tiny-random-wav2vec2-conformer", "wavlm": "hf-internal-testing/tiny-random-WavlmModel", @@ -170,6 +176,11 @@ class ORTModelTestMixin(unittest.TestCase): "np": np.ndarray, } + TASK = None + + ORTMODEL_CLASS = None + AUTOMODEL_CLASS = None + @classmethod def setUpClass(cls): cls.onnx_model_dirs = {} diff --git a/tests/utils/test_task_processors.py b/tests/utils/test_task_processors.py index 16567048073..1a9f352a79f 100644 --- a/tests/utils/test_task_processors.py +++ b/tests/utils/test_task_processors.py @@ -19,16 +19,21 @@ from typing import TYPE_CHECKING, Any, Dict, Tuple, Union from unittest import TestCase -from datasets import DatasetDict +import pytest from transformers import AutoConfig, AutoFeatureExtractor, AutoTokenizer +from optimum.utils.import_utils import is_datasets_available from optimum.utils.preprocessing import TaskProcessorsManager +from optimum.utils.testing_utils import require_datasets if TYPE_CHECKING: from transformers import PretrainedConfig, PreTrainedTokenizerBase from transformers.image_processing_utils import BaseImageProcessor +if is_datasets_available(): + from datasets import DatasetDict + TEXT_MODEL_NAME = "bert-base-uncased" CONFIG = AutoConfig.from_pretrained(TEXT_MODEL_NAME) @@ -122,6 +127,8 @@ def test_create_defaults_and_kwargs_from_preprocessor_kwargs_does_not_mutate_pre ) self.assertDictEqual(preprocessor_kwargs, clone) + @require_datasets + @pytest.mark.datasets_test def test_load_dataset_unallowed_data_keys(self): task_processor = TaskProcessorsManager.get_task_processor_class_for_task(self.TASK_NAME)( self.CONFIG, self.PREPROCESSOR @@ -188,15 +195,23 @@ def _test_load_dataset( return dataset + @require_datasets + @pytest.mark.datasets_test def test_load_dataset(self): return self._test_load_dataset(False, False, False) + @require_datasets + @pytest.mark.datasets_test def test_load_dataset_by_guessing_data_keys(self): return self._test_load_dataset(False, True, False) + @require_datasets + @pytest.mark.datasets_test def test_load_dataset_and_only_keep_necessary_columns(self): return self._test_load_dataset(False, False, True) + @require_datasets + @pytest.mark.datasets_test def test_load_default_dataset(self): return self._test_load_dataset(True, False, False) @@ -207,6 +222,8 @@ class TextClassificationProcessorTest(TestCase, TaskProcessorTestBase): PREPROCESSOR = TOKENIZER WRONG_PREPROCESSOR = IMAGE_PROCESSOR + @require_datasets + @pytest.mark.datasets_test def test_load_dataset_with_max_length(self): max_length = random.randint(4, 16) dataset = self._test_load_dataset(False, False, True, max_length=max_length) @@ -223,6 +240,8 @@ class TokenClassificationProcessorTest(TestCase, TaskProcessorTestBase): PREPROCESSOR = TOKENIZER WRONG_PREPROCESSOR = IMAGE_PROCESSOR + @require_datasets + @pytest.mark.datasets_test def test_load_dataset_with_max_length(self): max_length = random.randint(4, 16) dataset = self._test_load_dataset(False, False, True, max_length=max_length) @@ -232,6 +251,8 @@ def test_load_dataset_with_max_length(self): input_ids = dataset[0]["input_ids"] self.assertEqual(len(input_ids), max_length) + @require_datasets + @pytest.mark.datasets_test def test_load_default_dataset(self): self.skipTest( "Skipping so as not to execute conll2003 remote code (test would require trust_remote_code=True)" @@ -244,6 +265,8 @@ class QuestionAnsweringProcessorTest(TestCase, TaskProcessorTestBase): PREPROCESSOR = TOKENIZER WRONG_PREPROCESSOR = IMAGE_PROCESSOR + @require_datasets + @pytest.mark.datasets_test def test_load_dataset_with_max_length(self): max_length = 384 dataset = self._test_load_dataset(False, False, True, max_length=max_length)