-
Notifications
You must be signed in to change notification settings - Fork 98
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Ensure aie partition width is sufficiently large (#1997)
Co-authored-by: AndraBisca <[email protected]> Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
- Loading branch information
1 parent
6d5becc
commit 4d613f9
Showing
3 changed files
with
253 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,57 @@ | ||
# device_width/aie2.py -*- Python -*- | ||
# | ||
# This file is licensed under the Apache License v2.0 with LLVM Exceptions. | ||
# See https://llvm.org/LICENSE.txt for license information. | ||
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
# | ||
# (c) Copyright 2024 Advanced Micro Devices, Inc. or its affiliates | ||
# | ||
# REQUIRES: ryzen_ai, valid_xchess_license | ||
# | ||
# RUN: %python %S/aie2.py > ./aie2.mlir | ||
# RUN: clang %S/test.cpp -o test.exe -std=c++17 -Wall %xrt_flags -lrt -lstdc++ %test_utils_flags | ||
# RUN: %python aiecc.py --no-aiesim --aie-generate-cdo --aie-generate-npu --aie-generate-xclbin --no-compile-host --xclbin-name=final.xclbin --npu-insts-name=insts.txt ./aie2.mlir | ||
# RUN: %run_on_npu ./test.exe -x final.xclbin -k MLIR_AIE -i insts.txt | ||
|
||
import numpy as np | ||
import sys | ||
|
||
from aie.dialects.aie import * | ||
from aie.dialects.aiex import * | ||
from aie.extras.context import mlir_mod_ctx | ||
|
||
N = 4096 | ||
dev = AIEDevice.npu1_2col | ||
line_size = 1024 | ||
|
||
|
||
def my_passthrough(): | ||
with mlir_mod_ctx() as ctx: | ||
|
||
@device(dev) | ||
def device_body(): | ||
vector_ty = np.ndarray[(N,), np.dtype[np.int32]] | ||
line_ty = np.ndarray[(line_size,), np.dtype[np.int32]] | ||
|
||
# Tile declarations | ||
ShimTile = tile(1, 0) | ||
ComputeTile2 = tile(1, 2) | ||
|
||
# AIE-array data movement with object fifos | ||
of_in = object_fifo("in", ShimTile, ComputeTile2, 2, line_ty) | ||
of_out = object_fifo("out", ComputeTile2, ShimTile, 2, line_ty) | ||
object_fifo_link(of_in, of_out) | ||
|
||
# To/from AIE-array data movement | ||
@runtime_sequence(vector_ty, vector_ty, vector_ty) | ||
def sequence(A, B, C): | ||
npu_dma_memcpy_nd( | ||
metadata=of_in, bd_id=1, mem=A, sizes=[1, 1, 1, N], issue_token=True | ||
) | ||
npu_dma_memcpy_nd(metadata=of_out, bd_id=0, mem=C, sizes=[1, 1, 1, N]) | ||
dma_wait(of_in, of_out) | ||
|
||
print(ctx.module) | ||
|
||
|
||
my_passthrough() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,195 @@ | ||
//===- test.cpp -------------------------------------------000---*- C++ -*-===// | ||
// | ||
// This file is licensed under the Apache License v2.0 with LLVM Exceptions. | ||
// See https://llvm.org/LICENSE.txt for license information. | ||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
// | ||
// Copyright (C) 2023, Advanced Micro Devices, Inc. | ||
// | ||
//===----------------------------------------------------------------------===// | ||
|
||
#include <boost/program_options.hpp> | ||
#include <cstdint> | ||
#include <cstdlib> | ||
#include <fstream> | ||
#include <iostream> | ||
#include <sstream> | ||
#include <string> | ||
#include <vector> | ||
|
||
#include "xrt/xrt_bo.h" | ||
#include "xrt/xrt_device.h" | ||
#include "xrt/xrt_kernel.h" | ||
|
||
namespace po = boost::program_options; | ||
|
||
void check_arg_file_exists(po::variables_map &vm_in, std::string name) { | ||
if (!vm_in.count(name)) { | ||
throw std::runtime_error("Error: no " + name + " file was provided\n"); | ||
} else { | ||
std::ifstream test(vm_in[name].as<std::string>()); | ||
if (!test) { | ||
throw std::runtime_error("The " + name + " file " + | ||
vm_in[name].as<std::string>() + | ||
" does not exist.\n"); | ||
} | ||
} | ||
} | ||
|
||
std::vector<uint32_t> load_instr_sequence(std::string instr_path) { | ||
std::ifstream instr_file(instr_path); | ||
std::string line; | ||
std::vector<uint32_t> instr_v; | ||
while (std::getline(instr_file, line)) { | ||
std::istringstream iss(line); | ||
uint32_t a; | ||
if (!(iss >> std::hex >> a)) { | ||
throw std::runtime_error("Unable to parse instruction file\n"); | ||
} | ||
instr_v.push_back(a); | ||
} | ||
return instr_v; | ||
} | ||
|
||
int main(int argc, const char *argv[]) { | ||
// Program arguments parsing | ||
po::options_description desc("Allowed options"); | ||
desc.add_options()("help,h", "produce help message")( | ||
"xclbin,x", po::value<std::string>()->required(), | ||
"the input xclbin path")( | ||
"kernel,k", po::value<std::string>()->required(), | ||
"the kernel name in the XCLBIN (for instance PP_PRE_FD)")( | ||
"verbosity,v", po::value<int>()->default_value(0), | ||
"the verbosity of the output")( | ||
"instr,i", po::value<std::string>()->required(), | ||
"path of file containing userspace instructions to be sent to the LX6")( | ||
"length,l", po::value<int>()->default_value(4096), | ||
"the length of the transfer in int32_t"); | ||
po::variables_map vm; | ||
|
||
try { | ||
po::store(po::parse_command_line(argc, argv, desc), vm); | ||
po::notify(vm); | ||
|
||
if (vm.count("help")) { | ||
std::cout << desc << std::endl; | ||
return 1; | ||
} | ||
} catch (const std::exception &ex) { | ||
std::cerr << ex.what() << "\n\n"; | ||
std::cerr << "Usage:\n" << desc << std::endl; | ||
return 1; | ||
} | ||
|
||
check_arg_file_exists(vm, "xclbin"); | ||
check_arg_file_exists(vm, "instr"); | ||
|
||
std::vector<uint32_t> instr_v = | ||
load_instr_sequence(vm["instr"].as<std::string>()); | ||
|
||
int verbosity = vm["verbosity"].as<int>(); | ||
if (verbosity >= 1) | ||
std::cout << "Sequence instr count: " << instr_v.size() << std::endl; | ||
|
||
int N = vm["length"].as<int>(); | ||
if ((N % 1024)) { | ||
std::cerr << "Length must be a multiple of 1024." << std::endl; | ||
return 1; | ||
} | ||
|
||
// Start the XRT test code | ||
// Get a device handle | ||
unsigned int device_index = 0; | ||
auto device = xrt::device(device_index); | ||
|
||
// Load the xclbin | ||
if (verbosity >= 1) | ||
std::cout << "Loading xclbin: " << vm["xclbin"].as<std::string>() | ||
<< std::endl; | ||
auto xclbin = xrt::xclbin(vm["xclbin"].as<std::string>()); | ||
|
||
if (verbosity >= 1) | ||
std::cout << "Kernel opcode: " << vm["kernel"].as<std::string>() | ||
<< std::endl; | ||
std::string Node = vm["kernel"].as<std::string>(); | ||
|
||
// Get the kernel from the xclbin | ||
auto xkernels = xclbin.get_kernels(); | ||
auto xkernel = *std::find_if(xkernels.begin(), xkernels.end(), | ||
[Node](xrt::xclbin::kernel &k) { | ||
auto name = k.get_name(); | ||
std::cout << "Name: " << name << std::endl; | ||
return name.rfind(Node, 0) == 0; | ||
}); | ||
auto kernelName = xkernel.get_name(); | ||
|
||
if (verbosity >= 1) | ||
std::cout << "Registering xclbin: " << vm["xclbin"].as<std::string>() | ||
<< "\n"; | ||
|
||
device.register_xclbin(xclbin); | ||
|
||
// get a hardware context | ||
if (verbosity >= 1) | ||
std::cout << "Getting hardware context." << std::endl; | ||
xrt::hw_context context(device, xclbin.get_uuid()); | ||
|
||
// get a kernel handle | ||
if (verbosity >= 1) | ||
std::cout << "Getting handle to kernel:" << kernelName << std::endl; | ||
auto kernel = xrt::kernel(context, kernelName); | ||
|
||
auto bo_instr = xrt::bo(device, instr_v.size() * sizeof(int), | ||
XCL_BO_FLAGS_CACHEABLE, kernel.group_id(1)); | ||
auto bo_inA = xrt::bo(device, N * sizeof(int32_t), XRT_BO_FLAGS_HOST_ONLY, | ||
kernel.group_id(3)); | ||
auto bo_inB = xrt::bo(device, N * sizeof(int32_t), XRT_BO_FLAGS_HOST_ONLY, | ||
kernel.group_id(4)); | ||
auto bo_out = xrt::bo(device, N * sizeof(int32_t), XRT_BO_FLAGS_HOST_ONLY, | ||
kernel.group_id(5)); | ||
|
||
if (verbosity >= 1) | ||
std::cout << "Writing data into buffer objects." << std::endl; | ||
|
||
int32_t *bufInA = bo_inA.map<int32_t *>(); | ||
std::vector<uint32_t> srcVecA; | ||
for (int i = 0; i < N; i++) | ||
srcVecA.push_back(i + 1); | ||
memcpy(bufInA, srcVecA.data(), (srcVecA.size() * sizeof(uint32_t))); | ||
|
||
void *bufInstr = bo_instr.map<void *>(); | ||
memcpy(bufInstr, instr_v.data(), instr_v.size() * sizeof(int)); | ||
|
||
bo_instr.sync(XCL_BO_SYNC_BO_TO_DEVICE); | ||
bo_inA.sync(XCL_BO_SYNC_BO_TO_DEVICE); | ||
|
||
if (verbosity >= 1) | ||
std::cout << "Running Kernel." << std::endl; | ||
unsigned int opcode = 3; | ||
auto run = kernel(opcode, bo_instr, instr_v.size(), bo_inA, bo_inB, bo_out); | ||
run.wait(); | ||
|
||
bo_out.sync(XCL_BO_SYNC_BO_FROM_DEVICE); | ||
|
||
uint32_t *bufOut = bo_out.map<uint32_t *>(); | ||
|
||
int errors = 0; | ||
|
||
for (uint32_t i = 0; i < N; i++) { | ||
uint32_t ref = (i + 1); | ||
if (*(bufOut + i) != ref) { | ||
errors++; | ||
} | ||
} | ||
|
||
if (!errors) { | ||
std::cout << std::endl << "PASS!" << std::endl << std::endl; | ||
return 0; | ||
} else { | ||
std::cout << std::endl | ||
<< errors << " mismatches." << std::endl | ||
<< std::endl; | ||
std::cout << std::endl << "fail." << std::endl << std::endl; | ||
return 1; | ||
} | ||
} |