Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Lenet model for heterogeneous testing #393

Merged
merged 34 commits into from
Oct 30, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
34 commits
Select commit Hold shift + click to select a range
cbbba15
temp
wdjyd Jun 19, 2024
0858f29
fix/maxpool2d_simplify
wdjyd Jun 19, 2024
f2fd572
fix/maxpool2d_simplify
wdjyd Jun 19, 2024
b2c4c29
add json_encoder and json_decoder
wdjyd Jun 20, 2024
d09dd73
add json_encoder and json_decoder
wdjyd Jun 20, 2024
4e77924
add gpu.container_module
wdjyd Jul 31, 2024
43b3624
[frontend] Add GPU MLIR lowering path with ReLU operation support
wdjyd Aug 12, 2024
2d4eef1
delete env.sh
wdjyd Aug 12, 2024
78f6bca
delete env.sh
wdjyd Aug 12, 2024
abce382
[BuddyTest] Add Test Model E2E example.
wdjyd Aug 16, 2024
3d00fe6
[BuddyTest] Add README.
wdjyd Aug 16, 2024
ae794aa
[BuddyTest] Add README.
wdjyd Aug 16, 2024
b57103c
[frontend] Add GPU MLIR lowering path with Conv2d operation support
wdjyd Aug 30, 2024
0adf1df
[frontend] Add GPU MLIR lowering path with MaxPool2d operation support
wdjyd Sep 2, 2024
f636341
[frontend] Fix Permute Op
wdjyd Sep 3, 2024
72cdc82
[frontend] Fix implementation error in permute and conv_2d operation
wdjyd Sep 10, 2024
cf703c7
[frontend] Add LeNet example for E2E execution in GPU device
wdjyd Sep 18, 2024
9a88cb2
[frontend] Add the custom subgraph partitioning interface
wdjyd Sep 21, 2024
2f91175
[frontend] Fix error in graph partitioning interface
wdjyd Sep 22, 2024
20be444
Merge remote-tracking branch 'origin/fix/maxpool2d_simplify' into wafer
wdjyd Sep 26, 2024
3e88a45
[frontend] Add JSON format interface for subgraph partitioning implem…
wdjyd Sep 26, 2024
29745ef
[frontend] Add JSON format interface for subgraph partitioning implem…
wdjyd Sep 26, 2024
7544589
standby
WuXintong123 Sep 29, 2024
00c9485
Merge branch 'buddy-compiler:main' into LeNet-GPU
WuXintong123 Oct 14, 2024
59e4cd8
Merge remote-tracking branch 'wdjyd/wafer' into LeNet-GPU
WuXintong123 Oct 15, 2024
82b92f8
The GPU OP-enabled version
WuXintong123 Oct 15, 2024
0c473dc
Merge branch 'buddy-compiler:main' into LeNet-GPU
WuXintong123 Oct 27, 2024
a8569cc
Separate for heterogeneous demo
WuXintong123 Oct 29, 2024
53d69d6
CPU, GPU, Custom
WuXintong123 Oct 29, 2024
2488faf
temp
WuXintong123 Oct 29, 2024
321927c
Pass the test
WuXintong123 Oct 30, 2024
5d5a844
correct
WuXintong123 Oct 30, 2024
69c4262
temp
WuXintong123 Oct 30, 2024
bf7ca39
final
WuXintong123 Oct 30, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions examples/BuddyLeNet/.gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,11 @@ log.ll
log.s
data
*.data
*.json
*.dot
__pycache__
*.pth
lenet.mlir
forward.mlir
subgraph0.mlir
subgraph1.mlir
58 changes: 53 additions & 5 deletions examples/BuddyLeNet/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
add_custom_command(
OUTPUT ${BUDDY_EXAMPLES_DIR}/BuddyLeNet/forward.mlir ${BUDDY_EXAMPLES_DIR}/BuddyLeNet/subgraph0.mlir ${BUDDY_EXAMPLES_DIR}/BuddyLeNet/arg0.data
OUTPUT ${BUDDY_EXAMPLES_DIR}/BuddyLeNet/forward.mlir ${BUDDY_EXAMPLES_DIR}/BuddyLeNet/subgraph0.mlir ${BUDDY_EXAMPLES_DIR}/BuddyLeNet/subgraph1.mlir ${BUDDY_EXAMPLES_DIR}/BuddyLeNet/arg0.data
COMMAND python3 ${BUDDY_EXAMPLES_DIR}/BuddyLeNet/buddy-lenet-import.py
COMMENT "Generating forward.mlir, subgraph0.mlir and parameter files"
COMMENT "Generating forward.mlir, subgraph0.mlir, subgraph1.mlir and parameter files"
)

add_custom_command(
Expand Down Expand Up @@ -50,13 +50,61 @@ add_custom_command(
COMMENT "Building subgraph0.o"
VERBATIM)

add_library(LENET STATIC subgraph0.o forward.o)
set(ONE_SHOT_BUFFERIZE_OPTION "bufferize-function-boundaries=1 function-boundary-type-conversion=identity-layout-map")
set(LOWER_TO_NVVM_OPTION "cubin-chip=sm_80 cubin-features=+ptx71 cubin-format=fatbin")
add_custom_command(
OUTPUT subgraph1.o
COMMAND ${LLVM_TOOLS_BINARY_DIR}/mlir-opt ${BUDDY_EXAMPLES_DIR}/BuddyLeNet/subgraph1.mlir
-pass-pipeline "builtin.module(func.func(tosa-to-linalg-named, tosa-to-linalg, tosa-to-tensor, tosa-to-arith))" |
${LLVM_TOOLS_BINARY_DIR}/mlir-opt
-one-shot-bufferize=${ONE_SHOT_BUFFERIZE_OPTION}
-buffer-deallocation
-convert-linalg-to-parallel-loops
-canonicalize
-gpu-map-parallel-loops
-convert-parallel-loops-to-gpu
-gpu-kernel-outlining
-canonicalize
-cse |
${BUDDY_BINARY_DIR}/buddy-opt -convert-memcpy-to-gpu -gpu-async-region -canonicalize |
${LLVM_TOOLS_BINARY_DIR}/mlir-opt -llvm-request-c-wrappers --test-lower-to-nvvm=${LOWER_TO_NVVM_OPTION} |
${LLVM_TOOLS_BINARY_DIR}/mlir-translate -mlir-to-llvmir |
${LLVM_TOOLS_BINARY_DIR}/llvm-as |
${LLVM_TOOLS_BINARY_DIR}/llc -filetype=obj -relocation-model=pic -O0 -o ${BUDDY_BINARY_DIR}/../examples/BuddyLeNet/subgraph1.o
DEPENDS ${BUDDY_EXAMPLES_DIR}/BuddyLeNet/subgraph1.mlir
COMMENT "Building subgraph1.o"
VERBATIM)
set(ONE_SHOT_BUFFERIZE_OPTION "bufferize-function-boundaries=1 function-boundary-type-conversion=identity-layout-map")
set(LOWER_TO_NVVM_OPTION "cubin-chip=sm_80 cubin-features=+ptx71 cubin-format=fatbin")
add_custom_command(
OUTPUT subgraph1.o
COMMAND ${LLVM_TOOLS_BINARY_DIR}/mlir-opt ${BUDDY_EXAMPLES_DIR}/BuddyLeNet/subgraph1.mlir
-pass-pipeline "builtin.module(func.func(tosa-to-linalg-named, tosa-to-linalg, tosa-to-tensor, tosa-to-arith))" |
${LLVM_TOOLS_BINARY_DIR}/mlir-opt
-one-shot-bufferize=${ONE_SHOT_BUFFERIZE_OPTION}
-buffer-deallocation
-convert-linalg-to-parallel-loops
-canonicalize
-gpu-map-parallel-loops
-convert-parallel-loops-to-gpu
-gpu-kernel-outlining
-canonicalize
-cse |
${BUDDY_BINARY_DIR}/buddy-opt -convert-memcpy-to-gpu -gpu-async-region -canonicalize |
${LLVM_TOOLS_BINARY_DIR}/mlir-opt -llvm-request-c-wrappers --test-lower-to-nvvm=${LOWER_TO_NVVM_OPTION} |
${LLVM_TOOLS_BINARY_DIR}/mlir-translate -mlir-to-llvmir |
${LLVM_TOOLS_BINARY_DIR}/llvm-as |
${LLVM_TOOLS_BINARY_DIR}/llc -filetype=obj -relocation-model=pic -O0 -o ${BUDDY_BINARY_DIR}/../examples/BuddyLeNet/subgraph1.o
DEPENDS ${BUDDY_EXAMPLES_DIR}/BuddyLeNet/subgraph1.mlir
COMMENT "Building subgraph1.o"
VERBATIM)

add_library(LENET STATIC subgraph0.o subgraph1.o forward.o)

SET_TARGET_PROPERTIES(LENET PROPERTIES LINKER_LANGUAGE C)

add_executable(buddy-lenet-run buddy-lenet-main.cpp)
target_link_directories(buddy-lenet-run PRIVATE ${LLVM_LIBRARY_DIR})

set(BUDDY_LENET_LIBS LENET mlir_c_runner_utils ${PNG_LIBRARIES})

set(BUDDY_LENET_LIBS LENET mlir_c_runner_utils mlir_async_runtime mlir_runner_utils mlir_cuda_runtime BuddyLibDIP ${PNG_LIBRARIES})
target_link_libraries(buddy-lenet-run ${BUDDY_LENET_LIBS})
22 changes: 16 additions & 6 deletions examples/BuddyLeNet/buddy-lenet-import.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,14 @@

from buddy.compiler.frontend import DynamoCompiler
from buddy.compiler.graph import GraphDriver
from buddy.compiler.graph.transform import simply_fuse
from buddy.compiler.ops import tosa
from buddy.compiler.graph.transform import (
simply_fuse,
gpu_fuse,
custom_partition,
)
from buddy.compiler.graph.type import DeviceType
from buddy.compiler.ops import tosa, gpu
from buddy.compiler.graph.json_decoder import json_to_graph
from model import LeNet

# Retrieve the LeNet model path from environment variables.
Expand Down Expand Up @@ -56,13 +62,17 @@
assert len(graphs) == 1
graph = graphs[0]
params = dynamo_compiler.imported_params[graph]
pattern_list = [simply_fuse]
graphs[0].fuse_ops(pattern_list)
driver = GraphDriver(graphs[0])
driver.subgraphs[0].lower_to_top_level_ir()
pattern_list = [custom_partition]
graph.fuse_ops(pattern_list)
path_prefix = os.path.dirname(os.path.abspath(__file__))
driver = GraphDriver(graph)
driver.subgraphs[0].lower_to_top_level_ir()
with open(os.path.join(path_prefix, "subgraph0.mlir"), "w") as module_file:
print(driver.subgraphs[0]._imported_module, file=module_file)
# Add heterogeneous hardware partition
driver.subgraphs[1].lower_to_top_level_ir()
with open(os.path.join(path_prefix, "subgraph1.mlir"), "w") as module_file:
print(driver.subgraphs[1]._imported_module, file=module_file)
with open(os.path.join(path_prefix, "forward.mlir"), "w") as module_file:
print(driver.construct_main_graph(True), file=module_file)

Expand Down
17 changes: 17 additions & 0 deletions examples/BuddyLeNet/makefile
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,22 @@ MLIR_ASYNC_RUNTIME := ${LLVM_BUILD_DIR}/lib/libmlir_async_runtime.dylib
MTRIPLE := x86_64-apple-darwin
endif

buddy-gpu-matmul-lower:
@${BUDDY_OPT} subgraph0.mlir \
-transform-preload-library="transform-library-paths=transform.mlir" \
-transform-interpreter="entry-point=codegen" \
-o log.mlir

buddy-gpu-matmul:
@${BUDDY_OPT} subgraph0.mlir -transform-preload-library="transform-library-paths=transform.mlir" -transform-interpreter="entry-point=codegen" | \
${BUDDY_OPT} --pass-pipeline='builtin.module(func.func(nvgpu-optimize-shared-memory))' | \
${BUDDY_OPT} -arith-expand -eliminate-empty-tensors -empty-tensor-to-alloc-tensor -linalg-bufferize -convert-linalg-to-affine-loops -affine-loop-fusion -affine-parallelize -lower-affine -canonicalize -func-bufferize -arith-bufferize -tensor-bufferize -buffer-deallocation -finalizing-bufferize -canonicalize | \
${BUDDY_OPT} -gpu-launch-sink-index-computations -canonicalize -legalize-shmem-outlining -canonicalize | \
${BUDDY_OPT} -convert-memcpy-to-gpu -gpu-async-region -canonicalize | \
${BUDDY_OPT} -convert-scf-to-cf -memref-expand -finalize-memref-to-llvm -convert-arith-to-llvm --convert-vector-to-llvm -convert-gpu-to-nvvm='has-redux=1' | \
${BUDDY_OPT} -llvm-request-c-wrappers -canonicalize -cse -sccp | \
${MLIR_OPT} --test-lower-to-nvvm="cubin-chip=sm_80 cubin-features=+ptx71 cubin-format=fatbin" -o matmul-cubin.mlir

buddy-lenet-lower:
@${BUDDY_OPT} ./fake-lenet.mlir \
-pass-pipeline "builtin.module(func.func(tosa-to-linalg-named, tosa-to-linalg, tosa-to-tensor, tosa-to-arith))" | \
Expand Down Expand Up @@ -124,3 +140,4 @@ buddy-lenet-opt-run:
-reconcile-unrealized-casts | \
${MLIR_CPU_RUNNER} ${OPT_FLAG} -e main -entry-point-result=void \
-shared-libs=${MLIR_RUNNER_UTILS} -shared-libs=${MLIR_C_RUNNER_UTILS}

2 changes: 2 additions & 0 deletions frontend/Python/frontend.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@
from .graph import Graph, TensorDType, TensorMeta
from .graph.operation import *
from .graph.transform import maxpool2d_simplify
from .graph.type import *


class DynamoCompiler:
Expand Down Expand Up @@ -284,6 +285,7 @@ def _compiler(_gm: torch.fx.GraphModule, _inputs: List[torch.Tensor]):
fake_params,
self._ops_registry,
self._func_name,
DeviceType.CPU,
self._verbose
)
for gm_node in _gm.graph.nodes:
Expand Down
Loading