Skip to content

Commit

Permalink
Add support for ORT_DML backend for dx12 devices
Browse files Browse the repository at this point in the history
  • Loading branch information
WolframRhodium committed May 28, 2023
1 parent cf2bfbf commit d9e4111
Show file tree
Hide file tree
Showing 6 changed files with 133 additions and 39 deletions.
22 changes: 15 additions & 7 deletions .github/workflows/windows-ort.yml
Original file line number Diff line number Diff line change
Expand Up @@ -44,22 +44,22 @@ jobs:
uses: actions/cache@v3
with:
path: vsort/protobuf/install
key: ${{ runner.os }}-vsort-protobuf-v3
key: ${{ runner.os }}-vsort-protobuf-v4

- name: Checkout protobuf
uses: actions/checkout@v3
if: steps.cache-protobuf.outputs.cache-hit != 'true'
with:
repository: protocolbuffers/protobuf
# follows protobuf in https://github.com/AmusementClub/onnxruntime/tree/master/cmake/external
# follows protobuf in https://github.com/AmusementClub/onnxruntime/blob/master/cmake/external/onnxruntime_external_deps.cmake#L161
# if you change this, remember to bump the version of the cache key.
ref: a902b39270841beafc307dfa709610aa1cac2f06
ref: v3.21.12
fetch-depth: 1
path: vsort/protobuf

- name: Configure protobuf
if: steps.cache-protobuf.outputs.cache-hit != 'true'
run: cmake -S protobuf\cmake -B protobuf\build_rel -G Ninja -LA
run: cmake -S protobuf -B protobuf\build_rel -G Ninja -LA
-D CMAKE_BUILD_TYPE=Release
-D protobuf_BUILD_SHARED_LIBS=OFF -D protobuf_BUILD_TESTS=OFF

Expand All @@ -76,7 +76,7 @@ jobs:
uses: actions/cache@v3
with:
path: vsort/onnx/install
key: ${{ runner.os }}-vsort-onnx-v3
key: ${{ runner.os }}-vsort-onnx-v4

- name: Checkout onnx
if: steps.cache-onnx.outputs.cache-hit != 'true'
Expand All @@ -85,7 +85,7 @@ jobs:
repository: onnx/onnx
# follows onnx in https://github.com/AmusementClub/onnxruntime/tree/master/cmake/external
# if you change this, remember to bump the version of the cache key.
ref: 5a5f8a5935762397aa68429b5493084ff970f774
ref: a0d77f18516d2da7468a96b0de3b737266f23176
fetch-depth: 1
path: vsort/onnx

Expand Down Expand Up @@ -116,7 +116,7 @@ jobs:
- name: Download ONNX Runtime Precompilation
run: |
curl -s -o ortgpu.zip -LJO https://github.com/AmusementClub/onnxruntime/releases/latest/download/onnxruntime-gpu-win64.zip
curl -s -o ortgpu.zip -LJO https://github.com/AmusementClub/onnxruntime/releases/download/orttraining_rc2-5943-g73584f936-230528-0922/onnxruntime-gpu-win64.zip
unzip -q ortgpu.zip
- name: Cache CUDA
Expand All @@ -143,6 +143,7 @@ jobs:
-D ONNX_RUNTIME_LIB_DIRECTORY=onnxruntime-gpu\lib
-D ENABLE_CUDA=1
-D CUDAToolkit_ROOT="C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.8"
-D ENABLE_DML=1
-D CMAKE_CXX_STANDARD=20

- name: Build
Expand All @@ -157,6 +158,13 @@ jobs:
copy onnxruntime-gpu\bin\*.dll artifact\vsort\
copy onnxruntime-gpu\lib\*.dll artifact\vsort\
- name: Download DirectML Library
# follows DirectML in https://github.com/AmusementClub/onnxruntime/blob/master/cmake/external/dml.cmake#L44
run: |
curl -s -o directml.nupkg -LJO https://www.nuget.org/api/v2/package/Microsoft.AI.DirectML/1.12.0
unzip -q directml.nupkg -d dml
copy dml\bin\x64-win\DirectML.dll artifact\vsort\
- name: Upload
uses: actions/upload-artifact@v3
with:
Expand Down
40 changes: 20 additions & 20 deletions .github/workflows/windows-release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ jobs:
- name: Compress scirpts.7z
run: |
cd scripts
7za a -t7z -bb3 -mx=3 ../scripts.${{ github.event.inputs.tag }}.7z .
7za a -t7z -bb3 -mx=9 ../scripts.${{ github.event.inputs.tag }}.7z .
- name: Upload scripts release
uses: actions/upload-artifact@v3
Expand Down Expand Up @@ -113,7 +113,7 @@ jobs:
popd
ls -lR
du -sh
7za a -t7z -bb3 -mx=3 ../models.7z .
7za a -t7z -bb3 -mx=9 ../models.7z .
- name: Upload model release
uses: actions/upload-artifact@v3
Expand Down Expand Up @@ -144,7 +144,7 @@ jobs:
popd
ls -lR
du -sh
7za a -t7z -bb3 -mx=3 ../ext-models.7z .
7za a -t7z -bb3 -mx=9 ../ext-models.7z .
- name: Upload external model release
uses: actions/upload-artifact@v3
Expand Down Expand Up @@ -175,7 +175,7 @@ jobs:
popd
ls -lR
du -sh
7za a -t7z -bb3 -mx=3 ../contrib-models.7z .
7za a -t7z -bb3 -mx=9 ../contrib-models.7z .
- name: Upload contrib model release
uses: actions/upload-artifact@v3
Expand Down Expand Up @@ -264,7 +264,7 @@ jobs:
cp scripts-release/*.py release-cpu/
cd release-cpu
ls -lR
7za a -t7z -bb3 -mx=3 ../vsmlrt-windows-x64-cpu.7z .
7za a -t7z -bb3 -mx=9 ../vsmlrt-windows-x64-cpu.7z .
- name: Upload CPU-only release
uses: actions/upload-artifact@v3
Expand All @@ -286,36 +286,36 @@ jobs:
generate_release_notes: false
prerelease: true

- name: Build non-CUDA GPU release
- name: Build generic GPU release
shell: bash
run: |
mkdir release-vk
cp -r models-release/models release-vk/
cp -r vsov-release/* release-vk/
cp -r vsort-release/* release-vk/
rm -f release-vk/vsort/onnxruntime_providers_*.dll
cp -r vsncnn-release/* release-vk/
cp scripts-release/*.py release-vk/
cd release-vk
mkdir release-generic-gpu
cp -r models-release/models release-generic-gpu/
cp -r vsov-release/* release-generic-gpu/
cp -r vsort-release/* release-generic-gpu/
rm -f release-generic-gpu/vsort/onnxruntime_providers_*.dll
cp -r vsncnn-release/* release-generic-gpu/
cp scripts-release/*.py release-generic-gpu/
cd release-generic-gpu
ls -lR
7za a -t7z -bb3 -mx=3 ../vsmlrt-windows-x64-vk.7z .
7za a -t7z -bb3 -mx=9 ../vsmlrt-windows-x64-generic-gpu.7z .
- name: Upload non-CUDA GPU release
uses: actions/upload-artifact@v3
if: false
with:
name: vsmlrt-vk-release
path: vsmlrt-windows-x64-vk.7z
name: vsmlrt-generic-gpu-release
path: vsmlrt-windows-x64-generic-gpu.7z
retention-days: 1

- name: Rename release asset for non-CUDA GPU release
run: mv vsmlrt-windows-x64-vk.7z vsmlrt-windows-x64-vk.${{ github.event.inputs.tag }}.7z
run: mv vsmlrt-windows-x64-generic-gpu.7z vsmlrt-windows-x64-generic-gpu.${{ github.event.inputs.tag }}.7z

- name: Release non-CUDA GPU
uses: softprops/action-gh-release@v1
with:
tag_name: ${{ github.event.inputs.tag }}
files: vsmlrt-windows-x64-vk.${{ github.event.inputs.tag }}.7z
files: vsmlrt-windows-x64-generic-gpu.${{ github.event.inputs.tag }}.7z
fail_on_unmatched_files: true
generate_release_notes: false
prerelease: true
Expand All @@ -339,7 +339,7 @@ jobs:
cp scripts-release/*.py release-cuda/
cd release-cuda
ls -lR
7za a -t7z -bb3 -mx=3 ../vsmlrt-windows-x64-cuda.7z .
7za a -t7z -bb3 -mx=9 ../vsmlrt-windows-x64-cuda.7z .
- name: Upload CUDA release
uses: actions/upload-artifact@v3
Expand Down
46 changes: 43 additions & 3 deletions scripts/vsmlrt.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
__version__ = "3.15.23"
__version__ = "3.15.24"

__all__ = [
"Backend", "BackendV2",
Expand All @@ -12,7 +12,7 @@
]

import copy
from dataclasses import dataclass, field
from dataclasses import dataclass
import enum
import math
import os
Expand Down Expand Up @@ -172,14 +172,28 @@ class NCNN_VK:
# internal backend attributes
supports_onnx_serialization: bool = True

@dataclass(frozen=False)
class ORT_DML:
""" backend for directml (d3d12) devices """

device_id: int = 0
num_streams: int = 1
verbosity: int = 2
fp16: bool = False
fp16_blacklist_ops: typing.Optional[typing.Sequence[str]] = None

# internal backend attributes
supports_onnx_serialization: bool = True


backendT = typing.Union[
Backend.OV_CPU,
Backend.ORT_CPU,
Backend.ORT_CUDA,
Backend.TRT,
Backend.OV_GPU,
Backend.NCNN_VK
Backend.NCNN_VK,
Backend.ORT_DML,
]


Expand Down Expand Up @@ -1399,6 +1413,18 @@ def _inference(
path_is_serialization=path_is_serialization,
fp16_blacklist_ops=backend.fp16_blacklist_ops
)
elif isinstance(backend, Backend.ORT_DML):
clip = core.ort.Model(
clips, network_path,
overlap=overlap, tilesize=tilesize,
provider="DML", builtin=False,
device_id=backend.device_id,
num_streams=backend.num_streams,
verbosity=backend.verbosity,
fp16=backend.fp16,
path_is_serialization=path_is_serialization,
fp16_blacklist_ops=backend.fp16_blacklist_ops
)
elif isinstance(backend, Backend.ORT_CUDA):
clip = core.ort.Model(
clips, network_path,
Expand Down Expand Up @@ -1701,6 +1727,20 @@ def OV_GPU(*,
**kwargs
)

@staticmethod
def ORT_DML(*,
device_id: int = 0,
num_streams: int = 1,
fp16: bool = False,
**kwargs
) -> Backend.ORT_DML:
return Backend.ORT_DML(
device_id=device_id,
num_streams=num_streams,
fp16=fp16,
**kwargs
)


def fmtc_resample(clip: vs.VideoNode, **kwargs) -> vs.VideoNode:
clip_org = clip
Expand Down
5 changes: 5 additions & 0 deletions vsort/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ set(ONNX_RUNTIME_API_DIRECTORY "" CACHE PATH "Path to ONNX API headers")
set(ONNX_RUNTIME_LIB_DIRECTORY "" CACHE PATH "Path to ONNX Runtime libraries")

set(ENABLE_CUDA OFF CACHE BOOL "Enable CUDA backend")
set(ENABLE_DML OFF CACHE BOOL "Enable DirectML backend")

find_package(protobuf REQUIRED CONFIG)
find_package(ONNX REQUIRED CONFIG)
Expand Down Expand Up @@ -52,6 +53,10 @@ if (ENABLE_CUDA)
endif()
endif()

if (ENABLE_DML)
add_compile_definitions(ENABLE_DML)
endif()

target_include_directories(vsort PUBLIC
"${PROJECT_BINARY_DIR}"
)
Expand Down
Loading

0 comments on commit d9e4111

Please sign in to comment.