-
Notifications
You must be signed in to change notification settings - Fork 98
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Using objectFifo link to access the shared memory between compute til…
…es (#1814)
- Loading branch information
1 parent
232bff6
commit 3c91dcf
Showing
12 changed files
with
923 additions
and
55 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,66 @@ | ||
##===- Makefile -----------------------------------------------------------===## | ||
# | ||
# This file licensed under the Apache License v2.0 with LLVM Exceptions. | ||
# See https://llvm.org/LICENSE.txt for license information. | ||
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
# | ||
# Copyright (C) 2024, Advanced Micro Devices, Inc. | ||
# | ||
##===----------------------------------------------------------------------===## | ||
|
||
# --- | ||
|
||
# The following environment variables that point to the Xilinx runtime (XRT) | ||
# should be set up by an environment setup script already. | ||
XILINX_XRT?=/opt/xilinx/xrt | ||
XILINX_VITIS?=$(shell realpath $(dir $(shell which vitis))/../) | ||
|
||
# --- | ||
|
||
srcdir := $(shell dirname $(realpath $(firstword $(MAKEFILE_LIST)))) | ||
|
||
XILINX_XRT_INCLUDE?=${XILINX_XRT}/include | ||
XILINX_XRT_LIB?=${XILINX_XRT}/lib | ||
|
||
CHESSCCWRAP2_FLAGS=aie2 -I${XILINX_VITIS}/aietools/include | ||
XRT_FLAGS=-I${XILINX_XRT_INCLUDE} -L${XILINX_XRT_LIB} | ||
XRT_LIBS=-lxrt_coreutil | ||
CXX=g++-13 -ggdb | ||
|
||
#mlir_target?=build/aie.mlir | ||
xclbin_target?=build/final.xclbin | ||
insts_target?=build/insts.txt | ||
host_target?=build/test | ||
|
||
.PHONY: all | ||
all: ${xclbin_target} ${host_target} | ||
|
||
build/aie.mlir: ${srcdir}/aie2.py | ||
mkdir -p ${@D} | ||
python3 $< > $@ | ||
|
||
build/kernel.o: ${srcdir}/kernel.cc | ||
mkdir -p ${@D} | ||
cd ${@D} && xchesscc_wrapper ${CHESSCCWRAP2_FLAGS} -c $< -o ${@F} | ||
|
||
${xclbin_target}: build/aie.mlir build/kernel.o | ||
mkdir -p ${@D} | ||
cd ${@D} && aiecc.py -v --aie-generate-cdo --no-compile-host --xclbin-name=${@F} \ | ||
--dynamic-objFifos --aie-generate-npu --npu-insts-name=${insts_target:build/%=%} ${<:%=../%} | ||
|
||
${host_target}: ${srcdir}/test.cpp ${xclbin_target} | ||
mkdir -p ${@D} | ||
${CXX} ${XRT_FLAGS} -DM=$M -DN=$N -o $@ $< ${XRT_LIBS} | ||
|
||
.PHONY: run | ||
run: ${host_target} | ||
./${host_target} | ||
|
||
xclbin_sign=${XILINX_XRT}/amdxdna/setup_xclbin_firmware.sh | ||
.PHONY: sign | ||
sign: ${xclbin_target} | ||
${xclbin_sign} -dev Phoenix -xclbin $< | ||
|
||
.PHONY: clean | ||
clean: | ||
-rm -r build |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,73 @@ | ||
# | ||
# This file is licensed under the Apache License v2.0 with LLVM Exceptions. | ||
# See https://llvm.org/LICENSE.txt for license information. | ||
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
# | ||
# (c) Copyright 2024 AMD Inc. | ||
|
||
# REQUIRES: ryzen_ai, valid_xchess_license | ||
# | ||
# RUN: xchesscc_wrapper aie2 -I %aietools/include -c %S/kernel.cc -o ./kernel.o | ||
# RUN: %python %S/aie2.py > ./aie2.mlir | ||
# RUN: %python aiecc.py --no-aiesim --aie-generate-cdo --no-compile-host --aie-generate-xclbin --xclbin-name=final.xclbin --dynamic-objFifos --aie-generate-npu --npu-insts-name=insts.txt ./aie2.mlir | ||
# RUN: clang %S/test.cpp -o test.exe -std=c++17 -Wall %xrt_flags -lrt -lstdc++ %test_utils_flags | ||
# RUN: %run_on_npu ./test.exe | FileCheck %s | ||
# CHECK: PASS! | ||
import numpy as np | ||
|
||
from aie.dialects.aie import * | ||
from aie.dialects.aiex import * | ||
from aie.helpers.dialects.ext.scf import _for as range_ | ||
from aie.extras.context import mlir_mod_ctx | ||
|
||
N = 50 | ||
O = 250 | ||
n_rows = 5 | ||
dev = AIEDevice.npu1_1col | ||
col = 0 | ||
|
||
|
||
def nested_loops(): | ||
with mlir_mod_ctx() as ctx: | ||
|
||
@device(dev) | ||
def device_body(): | ||
tensor_ty = np.ndarray[(N // n_rows,), np.dtype[np.int32]] | ||
|
||
# Tile declarations | ||
ShimTile = tile(col, 0) | ||
ComputeTile = tile(col, 2) | ||
|
||
# AIE-array data movement with object fifos | ||
of_in = object_fifo("in", ShimTile, ComputeTile, 2, tensor_ty) | ||
of_out = object_fifo("out", ComputeTile, ShimTile, 2, tensor_ty) | ||
|
||
# AIE Core Function declarations | ||
passthrough_10_i32 = external_func( | ||
"passthrough_10_i32", inputs=[tensor_ty, tensor_ty] | ||
) | ||
|
||
# Set up compute tiles | ||
@core(ComputeTile, "kernel.o") | ||
def core_body(): | ||
for _ in range_(5): | ||
elemIn = of_in.acquire(ObjectFifoPort.Consume, 1) | ||
for _ in range_(5): | ||
elemOut = of_out.acquire(ObjectFifoPort.Produce, 1) | ||
passthrough_10_i32(elemIn, elemOut) | ||
of_out.release(ObjectFifoPort.Produce, 1) | ||
of_in.release(ObjectFifoPort.Consume, 1) | ||
|
||
# To/from AIE-array data movement | ||
@runtime_sequence(tensor_ty, tensor_ty) | ||
def sequence(A, C): | ||
npu_dma_memcpy_nd( | ||
metadata=of_in, bd_id=1, mem=A, sizes=[1, 1, 1, N], issue_token=True | ||
) | ||
npu_dma_memcpy_nd(metadata=of_out, bd_id=0, mem=C, sizes=[1, 1, 1, O]) | ||
dma_wait(of_in, of_out) | ||
|
||
print(ctx.module) | ||
|
||
|
||
nested_loops() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
// | ||
// This file is licensed under the Apache License v2.0 with LLVM Exceptions. | ||
// See https://llvm.org/LICENSE.txt for license information. | ||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
// | ||
// (c) Copyright 2024 AMD Inc. | ||
|
||
#include <aie_api/aie.hpp> | ||
|
||
template <typename T_in, typename T_out, unsigned long N> | ||
void passthrough(const T_in *__restrict in, T_out *__restrict out) { | ||
for (int i = 0; i < N; i++) { | ||
out[i] = in[i]; | ||
} | ||
} | ||
|
||
extern "C" { | ||
|
||
void passthrough_10_i32(const int *__restrict in, int *__restrict out) { | ||
passthrough<int, int, 10>(in, out); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,139 @@ | ||
// This file is licensed under the Apache License v2.0 with LLVM Exceptions. | ||
// See https://llvm.org/LICENSE.txt for license information. | ||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
// | ||
// (c) Copyright 2024 AMD Inc. | ||
|
||
#include <cassert> | ||
#include <cstring> | ||
#include <fstream> | ||
#include <iomanip> | ||
|
||
#include "xrt/xrt_bo.h" | ||
#include "xrt/xrt_device.h" | ||
#include "xrt/xrt_kernel.h" | ||
|
||
#ifndef XCLBIN | ||
#define XCLBIN "build/final.xclbin" | ||
#endif | ||
|
||
#ifndef INSTS_TXT | ||
#define INSTS_TXT "build/insts.txt" | ||
#endif | ||
|
||
#ifndef KERNEL_NAME | ||
#define KERNEL_NAME "MLIR_AIE" | ||
#endif | ||
|
||
#define INPUT_SIZE (50 * sizeof(int)) // in bytes | ||
#define OUTPUT_SIZE (250 * sizeof(int)) // in bytes | ||
#define WIDTH_SIZE (10 * sizeof(int)) // in bytes | ||
#define WIDTH 10 | ||
#define INPUT_ROWS INPUT_SIZE / WIDTH_SIZE | ||
#define OUTPUT_ROWS OUTPUT_SIZE / WIDTH_SIZE | ||
|
||
std::vector<uint32_t> load_instr_sequence(std::string instr_path) { | ||
std::ifstream instr_file(instr_path); | ||
std::string line; | ||
std::vector<uint32_t> instr_v; | ||
while (std::getline(instr_file, line)) { | ||
std::istringstream iss(line); | ||
uint32_t a; | ||
if (!(iss >> std::hex >> a)) { | ||
throw std::runtime_error("Unable to parse instruction file\n"); | ||
} | ||
instr_v.push_back(a); | ||
} | ||
return instr_v; | ||
} | ||
|
||
int main(int argc, const char *argv[]) { | ||
|
||
std::vector<uint32_t> instr_v = load_instr_sequence(INSTS_TXT); | ||
assert(instr_v.size() > 0); | ||
|
||
// Get a device handle | ||
unsigned int device_index = 0; | ||
xrt::device device = xrt::device(device_index); | ||
|
||
// Load the xclbin | ||
xrt::xclbin xclbin = xrt::xclbin(XCLBIN); | ||
|
||
// Get the kernel from the xclbin | ||
std::vector<xrt::xclbin::kernel> xkernels = xclbin.get_kernels(); | ||
xrt::xclbin::kernel xkernel = *std::find_if( | ||
xkernels.begin(), xkernels.end(), [](xrt::xclbin::kernel &k) { | ||
return k.get_name().rfind(KERNEL_NAME, 0) == 0; | ||
}); | ||
std::string kernel_name = xkernel.get_name(); | ||
assert(strcmp(kernel_name.c_str(), KERNEL_NAME) == 0); | ||
|
||
device.register_xclbin(xclbin); | ||
|
||
// get a hardware context | ||
xrt::hw_context context(device, xclbin.get_uuid()); | ||
|
||
// get a kernel handle | ||
auto kernel = xrt::kernel(context, kernel_name); | ||
|
||
auto bo_instr = xrt::bo(device, instr_v.size() * sizeof(int), | ||
XCL_BO_FLAGS_CACHEABLE, kernel.group_id(1)); | ||
auto bo_input = | ||
xrt::bo(device, INPUT_SIZE, XRT_BO_FLAGS_HOST_ONLY, kernel.group_id(3)); | ||
auto bo_output = | ||
xrt::bo(device, OUTPUT_SIZE, XRT_BO_FLAGS_HOST_ONLY, kernel.group_id(4)); | ||
|
||
int *buf_input = bo_input.map<int *>(); | ||
std::cout << std::endl << std::endl << "Input: " << std::endl; | ||
for (int i = 0; i < INPUT_ROWS; i++) { | ||
std::cout << "row " << i << " : "; | ||
for (int j = 0; j < WIDTH; j++) { | ||
buf_input[i * WIDTH + j] = i; | ||
std::cout << buf_input[i * WIDTH + j] << " "; | ||
} | ||
std::cout << std::endl << std::endl; | ||
} | ||
int *buf_output = bo_output.map<int *>(); | ||
memset(buf_output, 0, OUTPUT_SIZE); | ||
|
||
// Instruction buffer for DMA configuration | ||
void *buf_instr = bo_instr.map<void *>(); | ||
memcpy(buf_instr, instr_v.data(), instr_v.size() * sizeof(int)); | ||
|
||
bo_instr.sync(XCL_BO_SYNC_BO_TO_DEVICE); | ||
bo_input.sync(XCL_BO_SYNC_BO_TO_DEVICE); | ||
bo_output.sync(XCL_BO_SYNC_BO_TO_DEVICE); | ||
|
||
unsigned int opcode = 3; | ||
auto run = kernel(opcode, bo_instr, instr_v.size(), bo_input, bo_output); | ||
ert_cmd_state r = run.wait(); | ||
if (r != ERT_CMD_STATE_COMPLETED) { | ||
std::cout << "Kernel did not complete. Returned status: " << r << "\n"; | ||
return 1; | ||
} | ||
|
||
bo_output.sync(XCL_BO_SYNC_BO_FROM_DEVICE); | ||
|
||
bool pass = true; | ||
std::cout << std::endl << "Output: " << std::endl; | ||
int expected_output = 0; | ||
int five_repetitions = 0; | ||
for (int i = 0; i < OUTPUT_ROWS; i++) { | ||
std::cout << "row " << i << std::endl; | ||
if (five_repetitions == 5) { | ||
expected_output++; | ||
five_repetitions = 0; | ||
} | ||
for (int j = 0; j < WIDTH; j++) { | ||
std::cout << "expected: " << expected_output << ", "; | ||
std::cout << "got: " << buf_output[i * WIDTH + j] << std::endl; | ||
pass &= buf_output[i * WIDTH + j] == expected_output; | ||
} | ||
std::cout << std::endl << std::endl; | ||
five_repetitions++; | ||
} | ||
std::cout << std::endl << std::endl; | ||
std::cout << (pass ? "PASS!" : "FAIL.") << std::endl; | ||
|
||
return 0; | ||
} |
Oops, something went wrong.