From e5866f4f360659dbc93d6a97bac0fd2a12672189 Mon Sep 17 00:00:00 2001 From: Joseph Melber Date: Tue, 30 Apr 2024 14:42:52 -0600 Subject: [PATCH] [Initial] Enable more XRT buffers (#1440) Co-authored-by: Maksim Levental --- python/compiler/aiecc/main.py | 2 +- test/aie2xclbin/buffers_xclbin.mlir | 113 ++++++++++++++++++++++++++++ test/aiecc/buffers_xclbin.mlir | 111 +++++++++++++++++++++++++++ tools/aie2xclbin/XCLBinGen.cpp | 23 +++++- 4 files changed, 244 insertions(+), 5 deletions(-) create mode 100644 test/aie2xclbin/buffers_xclbin.mlir create mode 100644 test/aiecc/buffers_xclbin.mlir diff --git a/python/compiler/aiecc/main.py b/python/compiler/aiecc/main.py index 0e048cefed..6be53e9b22 100644 --- a/python/compiler/aiecc/main.py +++ b/python/compiler/aiecc/main.py @@ -581,7 +581,7 @@ async def process_xclbin_gen(self, has_cores): self.prepend_tmp("aie_partition.json"), ) - buffer_arg_names = ["in", "tmp", "out"] + buffer_arg_names = [f"bo{i}" for i in range(6)] await write_file_async( json.dumps( emit_design_kernel_json( diff --git a/test/aie2xclbin/buffers_xclbin.mlir b/test/aie2xclbin/buffers_xclbin.mlir new file mode 100644 index 0000000000..6f12d6c9f0 --- /dev/null +++ b/test/aie2xclbin/buffers_xclbin.mlir @@ -0,0 +1,113 @@ +//===- buffers_xclbin.mlir --------------------------------------*- MLIR -*-===// +// +// This file is licensed under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// (c) Copyright 2024 AMD Inc. +// +//===----------------------------------------------------------------------===// + +// REQUIRES: peano + +// RUN: aie2xclbin -v --host-target=aarch64-linux-gnu --peano=%PEANO_INSTALL_DIR %s --xclbin-name=test.xclbin + +// RUN: FileCheck %s --input-file=buffers_xclbin.mlir.prj/kernels.json + +// CHECK: { +// CHECK: "ps-kernels": { +// CHECK: "kernels": [ +// CHECK: { +// CHECK: "name": "MLIR_AIE", +// CHECK: "type": "dpu", +// CHECK: "extended-data": { +// CHECK: "subtype": "DPU", +// CHECK: "functional": "1", +// CHECK: "dpu_kernel_id": "0x901" +// CHECK: }, +// CHECK: "arguments": [ +// CHECK: { +// CHECK: "name": "instr", +// CHECK: "memory-connection": "SRAM", +// CHECK: "address-qualifier": "GLOBAL", +// CHECK: "type": "char *", +// CHECK: "offset": "0x00" +// CHECK: }, +// CHECK: { +// CHECK: "name": "ninstr", +// CHECK: "address-qualifier": "SCALAR", +// CHECK: "type": "uint64_t", +// CHECK: "offset": "0x08" +// CHECK: }, +// CHECK: { +// CHECK: "name": "bo0", +// CHECK: "memory-connection": "HOST", +// CHECK: "address-qualifier": "GLOBAL", +// CHECK: "type": "char *", +// CHECK: "offset": "0x10" +// CHECK: }, +// CHECK: { +// CHECK: "name": "bo1", +// CHECK: "memory-connection": "HOST", +// CHECK: "address-qualifier": "GLOBAL", +// CHECK: "type": "char *", +// CHECK: "offset": "0x18" +// CHECK: }, +// CHECK: { +// CHECK: "name": "bo2", +// CHECK: "memory-connection": "HOST", +// CHECK: "address-qualifier": "GLOBAL", +// CHECK: "type": "char *", +// CHECK: "offset": "0x20" +// CHECK: }, +// CHECK: { +// CHECK: "name": "bo3", +// CHECK: "memory-connection": "HOST", +// CHECK: "address-qualifier": "GLOBAL", +// CHECK: "type": "char *", +// CHECK: "offset": "0x28" +// CHECK: }, +// CHECK: { +// CHECK: "name": "bo4", +// CHECK: "memory-connection": "HOST", +// CHECK: "address-qualifier": "GLOBAL", +// CHECK: "type": "char *", +// CHECK: "offset": "0x30" +// CHECK: }, +// CHECK: { +// CHECK: "name": "bo5", +// CHECK: "memory-connection": "HOST", +// CHECK: "address-qualifier": "GLOBAL", +// CHECK: "type": "char *", +// CHECK: "offset": "0x38" +// CHECK: } +// CHECK: ], +// CHECK: "instances": [ +// CHECK: { +// CHECK: "name": "MLIRAIE" +// CHECK: } +// CHECK: ] +// CHECK: } +// CHECK: ] +// CHECK: } +// CHECK: } + +module { + aie.device(npu) { + %02 = aie.tile(0, 2) + %12 = aie.tile(1, 2) + %22 = aie.tile(2, 2) + func.func @sequence(%arg0: memref<1024xi32>, %arg1: memref<1024xi32>, %arg2: memref<1024xi32>, %arg3: memref<1024xi32>, %arg4: memref<1024xi32>, %arg5: memref<1024xi32>) { + aiex.npu.dma_memcpy_nd(0, 0, %arg0[0, 0, 0, 0][1, 1, 1, 1024][0, 0, 0]) {id = 0 : i64, metadata = @in0} : memref<1024xi32> + aiex.npu.dma_memcpy_nd(0, 0, %arg1[0, 0, 0, 0][1, 1, 1, 1024][0, 0, 0]) {id = 1 : i64, metadata = @out0} : memref<1024xi32> + aiex.npu.sync {channel = 0 : i32, column = 0 : i32, column_num = 1 : i32, direction = 0 : i32, row = 0 : i32, row_num = 1 : i32} + aiex.npu.dma_memcpy_nd(0, 0, %arg2[0, 0, 0, 0][1, 1, 1, 1024][0, 0, 0]) {id = 2 : i64, metadata = @in1} : memref<1024xi32> + aiex.npu.dma_memcpy_nd(0, 0, %arg3[0, 0, 0, 0][1, 1, 1, 1024][0, 0, 0]) {id = 3 : i64, metadata = @out1} : memref<1024xi32> + aiex.npu.sync {channel = 1 : i32, column = 0 : i32, column_num = 1 : i32, direction = 0 : i32, row = 0 : i32, row_num = 1 : i32} + aiex.npu.dma_memcpy_nd(0, 0, %arg4[0, 0, 0, 0][1, 1, 1, 1024][0, 0, 0]) {id = 2 : i64, metadata = @in2} : memref<1024xi32> + aiex.npu.dma_memcpy_nd(0, 0, %arg5[0, 0, 0, 0][1, 1, 1, 1024][0, 0, 0]) {id = 3 : i64, metadata = @out2} : memref<1024xi32> + aiex.npu.sync {channel = 0 : i32, column = 2 : i32, column_num = 1 : i32, direction = 0 : i32, row = 0 : i32, row_num = 1 : i32} + return + } + } +} \ No newline at end of file diff --git a/test/aiecc/buffers_xclbin.mlir b/test/aiecc/buffers_xclbin.mlir new file mode 100644 index 0000000000..9eb1ae7005 --- /dev/null +++ b/test/aiecc/buffers_xclbin.mlir @@ -0,0 +1,111 @@ +//===- buffers_xclbin.mlir --------------------------------------*- MLIR -*-===// +// +// This file is licensed under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// (c) Copyright 2024 AMD Inc. +// +//===----------------------------------------------------------------------===// + +// REQUIRES: chess +// RUN: %PYTHON aiecc.py --xchesscc --no-link -nv --aie-generate-cdo --aie-generate-npu --no-compile-host --xclbin-name=aie.xclbin --npu-insts-name=insts.txt %s +// RUN: FileCheck %s --input-file=buffers_xclbin.mlir.prj/kernels.json + +// CHECK: { +// CHECK: "ps-kernels": { +// CHECK: "kernels": [ +// CHECK: { +// CHECK: "name": "MLIR_AIE", +// CHECK: "type": "dpu", +// CHECK: "extended-data": { +// CHECK: "subtype": "DPU", +// CHECK: "functional": "1", +// CHECK: "dpu_kernel_id": "0x901" +// CHECK: }, +// CHECK: "arguments": [ +// CHECK: { +// CHECK: "name": "instr", +// CHECK: "memory-connection": "SRAM", +// CHECK: "address-qualifier": "GLOBAL", +// CHECK: "type": "char *", +// CHECK: "offset": "0x00" +// CHECK: }, +// CHECK: { +// CHECK: "name": "ninstr", +// CHECK: "address-qualifier": "SCALAR", +// CHECK: "type": "uint64_t", +// CHECK: "offset": "0x08" +// CHECK: }, +// CHECK: { +// CHECK: "name": "bo0", +// CHECK: "memory-connection": "HOST", +// CHECK: "address-qualifier": "GLOBAL", +// CHECK: "type": "char *", +// CHECK: "offset": "0x10" +// CHECK: }, +// CHECK: { +// CHECK: "name": "bo1", +// CHECK: "memory-connection": "HOST", +// CHECK: "address-qualifier": "GLOBAL", +// CHECK: "type": "char *", +// CHECK: "offset": "0x18" +// CHECK: }, +// CHECK: { +// CHECK: "name": "bo2", +// CHECK: "memory-connection": "HOST", +// CHECK: "address-qualifier": "GLOBAL", +// CHECK: "type": "char *", +// CHECK: "offset": "0x20" +// CHECK: }, +// CHECK: { +// CHECK: "name": "bo3", +// CHECK: "memory-connection": "HOST", +// CHECK: "address-qualifier": "GLOBAL", +// CHECK: "type": "char *", +// CHECK: "offset": "0x28" +// CHECK: }, +// CHECK: { +// CHECK: "name": "bo4", +// CHECK: "memory-connection": "HOST", +// CHECK: "address-qualifier": "GLOBAL", +// CHECK: "type": "char *", +// CHECK: "offset": "0x30" +// CHECK: }, +// CHECK: { +// CHECK: "name": "bo5", +// CHECK: "memory-connection": "HOST", +// CHECK: "address-qualifier": "GLOBAL", +// CHECK: "type": "char *", +// CHECK: "offset": "0x38" +// CHECK: } +// CHECK: ], +// CHECK: "instances": [ +// CHECK: { +// CHECK: "name": "MLIRAIE" +// CHECK: } +// CHECK: ] +// CHECK: } +// CHECK: ] +// CHECK: } +// CHECK: } + +module { + aie.device(npu) { + %02 = aie.tile(0, 2) + %12 = aie.tile(1, 2) + %22 = aie.tile(2, 2) + func.func @sequence(%arg0: memref<1024xi32>, %arg1: memref<1024xi32>, %arg2: memref<1024xi32>, %arg3: memref<1024xi32>, %arg4: memref<1024xi32>, %arg5: memref<1024xi32>) { + aiex.npu.dma_memcpy_nd(0, 0, %arg0[0, 0, 0, 0][1, 1, 1, 1024][0, 0, 0]) {id = 0 : i64, metadata = @in0} : memref<1024xi32> + aiex.npu.dma_memcpy_nd(0, 0, %arg1[0, 0, 0, 0][1, 1, 1, 1024][0, 0, 0]) {id = 1 : i64, metadata = @out0} : memref<1024xi32> + aiex.npu.sync {channel = 0 : i32, column = 0 : i32, column_num = 1 : i32, direction = 0 : i32, row = 0 : i32, row_num = 1 : i32} + aiex.npu.dma_memcpy_nd(0, 0, %arg2[0, 0, 0, 0][1, 1, 1, 1024][0, 0, 0]) {id = 2 : i64, metadata = @in1} : memref<1024xi32> + aiex.npu.dma_memcpy_nd(0, 0, %arg3[0, 0, 0, 0][1, 1, 1, 1024][0, 0, 0]) {id = 3 : i64, metadata = @out1} : memref<1024xi32> + aiex.npu.sync {channel = 1 : i32, column = 0 : i32, column_num = 1 : i32, direction = 0 : i32, row = 0 : i32, row_num = 1 : i32} + aiex.npu.dma_memcpy_nd(0, 0, %arg4[0, 0, 0, 0][1, 1, 1, 1024][0, 0, 0]) {id = 2 : i64, metadata = @in2} : memref<1024xi32> + aiex.npu.dma_memcpy_nd(0, 0, %arg5[0, 0, 0, 0][1, 1, 1, 1024][0, 0, 0]) {id = 3 : i64, metadata = @out2} : memref<1024xi32> + aiex.npu.sync {channel = 0 : i32, column = 2 : i32, column_num = 1 : i32, direction = 0 : i32, row = 0 : i32, row_num = 1 : i32} + return + } + } +} \ No newline at end of file diff --git a/tools/aie2xclbin/XCLBinGen.cpp b/tools/aie2xclbin/XCLBinGen.cpp index f9a46e7be7..3165f93506 100644 --- a/tools/aie2xclbin/XCLBinGen.cpp +++ b/tools/aie2xclbin/XCLBinGen.cpp @@ -381,21 +381,36 @@ static json::Object makeKernelJSON(std::string name, std::string id, {"address-qualifier", "SCALAR"}, {"type", "uint64_t"}, {"offset", "0x08"}}, - json::Object{{"name", "in"}, + json::Object{{"name", "bo0"}, {"memory-connection", "HOST"}, {"address-qualifier", "GLOBAL"}, {"type", "char *"}, {"offset", "0x10"}}, - json::Object{{"name", "tmp"}, + json::Object{{"name", "bo1"}, {"memory-connection", "HOST"}, {"address-qualifier", "GLOBAL"}, {"type", "char *"}, {"offset", "0x18"}}, - json::Object{{"name", "out"}, + json::Object{{"name", "bo2"}, {"memory-connection", "HOST"}, {"address-qualifier", "GLOBAL"}, {"type", "char *"}, - {"offset", "0x20"}}}}, + {"offset", "0x20"}}, + json::Object{{"name", "bo3"}, + {"memory-connection", "HOST"}, + {"address-qualifier", "GLOBAL"}, + {"type", "char *"}, + {"offset", "0x28"}}, + json::Object{{"name", "bo4"}, + {"memory-connection", "HOST"}, + {"address-qualifier", "GLOBAL"}, + {"type", "char *"}, + {"offset", "0x30"}}, + json::Object{{"name", "bo5"}, + {"memory-connection", "HOST"}, + {"address-qualifier", "GLOBAL"}, + {"type", "char *"}, + {"offset", "0x38"}}}}, {"instances", json::Array{json::Object{{"name", instance}}}}}; }