-
Notifications
You must be signed in to change notification settings - Fork 94
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Linear transfer without transformation but with repeat (#1882)
- Loading branch information
Showing
6 changed files
with
225 additions
and
28 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,77 @@ | ||
# | ||
# This file is licensed under the Apache License v2.0 with LLVM Exceptions. | ||
# See https://llvm.org/LICENSE.txt for license information. | ||
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
# | ||
# (c) Copyright 2024 AMD Inc. | ||
|
||
# REQUIRES: ryzen_ai, valid_xchess_license | ||
# | ||
# RUN: %python %S/aie2.py > ./aie2.mlir | ||
# RUN: %python aiecc.py --no-aiesim --aie-generate-cdo --aie-generate-npu --aie-generate-xclbin --no-compile-host --xclbin-name=final.xclbin --npu-insts-name=insts.txt ./aie2.mlir | ||
# RUN: clang %S/test.cpp -o test.exe -std=c++17 -Wall %xrt_flags -lrt -lstdc++ %test_utils_flags | ||
# RUN: %run_on_npu ./test.exe | FileCheck %s | ||
# CHECK: PASS! | ||
|
||
import numpy as np | ||
from aie.extras.context import mlir_mod_ctx | ||
|
||
from aie.dialects.aie import * | ||
from aie.dialects.aiex import * | ||
from aie.helpers.dialects.ext.scf import _for as range_ | ||
|
||
dtype = np.int16 | ||
repeat_count = 3 | ||
a_len = 2048 | ||
c_len = a_len * repeat_count | ||
|
||
|
||
def design(): | ||
|
||
with mlir_mod_ctx() as ctx: | ||
|
||
@device(AIEDevice.npu1_4col) | ||
def device_body(): | ||
a_ty = np.ndarray[(a_len,), np.dtype[dtype]] | ||
c_ty = np.ndarray[(c_len,), np.dtype[dtype]] | ||
|
||
ShimTile = tile(0, 0) | ||
ComputeTile = tile(0, 2) | ||
fifo_a = object_fifo("fifo_a", ShimTile, ComputeTile, 2, a_ty) | ||
fifo_c = object_fifo("fifo_c", ComputeTile, ShimTile, 2, a_ty) | ||
|
||
# Core | ||
@core(ComputeTile) | ||
def core_body(): | ||
for _ in range_(0, 0xFFFFFFFF): | ||
for i in range_(repeat_count): | ||
elem_c = fifo_c.acquire(ObjectFifoPort.Produce, 1) | ||
elem_a = fifo_a.acquire(ObjectFifoPort.Consume, 1) | ||
for i in range_(a_len): | ||
elem_c[i] = elem_a[i] | ||
fifo_a.release(ObjectFifoPort.Consume, 1) | ||
fifo_c.release(ObjectFifoPort.Produce, 1) | ||
|
||
# To/from AIE-array data movement | ||
@runtime_sequence(a_ty, a_ty, c_ty) | ||
def sequence(A, _B, C): | ||
npu_dma_memcpy_nd( | ||
metadata=fifo_a, | ||
bd_id=1, | ||
mem=A, | ||
sizes=[repeat_count, 1, 1, a_len], | ||
strides=[0, 0, 0, 1], | ||
) | ||
npu_dma_memcpy_nd( | ||
metadata=fifo_c, | ||
bd_id=0, | ||
mem=C, | ||
sizes=[1, 1, 1, c_len], | ||
strides=[0, 0, 0, 1], | ||
) | ||
dma_wait(fifo_c) | ||
|
||
print(ctx.module) | ||
|
||
|
||
design() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,122 @@ | ||
// This file is licensed under the Apache License v2.0 with LLVM Exceptions. | ||
// See https://llvm.org/LICENSE.txt for license information. | ||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
// | ||
// (c) Copyright 2024 AMD Inc. | ||
|
||
#include <cassert> | ||
#include <cstring> | ||
#include <fstream> | ||
#include <iomanip> | ||
|
||
#include "xrt/xrt_bo.h" | ||
#include "xrt/xrt_device.h" | ||
#include "xrt/xrt_kernel.h" | ||
|
||
#include "test_utils.h" | ||
|
||
#ifndef XCLBIN | ||
#define XCLBIN "final.xclbin" | ||
#endif | ||
|
||
#ifndef INSTS_TXT | ||
#define INSTS_TXT "insts.txt" | ||
#endif | ||
|
||
#ifndef KERNEL_NAME | ||
#define KERNEL_NAME "MLIR_AIE" | ||
#endif | ||
|
||
#define DTYPE int16_t | ||
#define A_DATATYPE DTYPE | ||
#define C_DATATYPE DTYPE | ||
|
||
#define A_LEN 2048 | ||
#define REPEAT_COUNT 3 | ||
#define C_LEN (A_LEN * REPEAT_COUNT) | ||
|
||
#define A_SIZE (A_LEN * sizeof(A_DATATYPE)) // in bytes | ||
#define B_SIZE A_SIZE // in bytes | ||
#define C_SIZE (C_LEN * sizeof(C_DATATYPE)) // in bytes | ||
|
||
int main(int argc, const char *argv[]) { | ||
|
||
std::vector<uint32_t> instr_v = test_utils::load_instr_sequence(INSTS_TXT); | ||
assert(instr_v.size() > 0); | ||
|
||
// Get a device handle | ||
unsigned int device_index = 0; | ||
xrt::device device = xrt::device(device_index); | ||
|
||
// Load the xclbin | ||
xrt::xclbin xclbin = xrt::xclbin(XCLBIN); | ||
|
||
// Get the kernel from the xclbin | ||
std::vector<xrt::xclbin::kernel> xkernels = xclbin.get_kernels(); | ||
xrt::xclbin::kernel xkernel = *std::find_if( | ||
xkernels.begin(), xkernels.end(), [](xrt::xclbin::kernel &k) { | ||
return k.get_name().rfind(KERNEL_NAME, 0) == 0; | ||
}); | ||
std::string kernel_name = xkernel.get_name(); | ||
assert(strcmp(kernel_name.c_str(), KERNEL_NAME) == 0); | ||
|
||
device.register_xclbin(xclbin); | ||
|
||
// get a hardware context | ||
xrt::hw_context context(device, xclbin.get_uuid()); | ||
|
||
// get a kernel handle | ||
auto kernel = xrt::kernel(context, kernel_name); | ||
|
||
auto bo_instr = xrt::bo(device, instr_v.size() * sizeof(int), | ||
XCL_BO_FLAGS_CACHEABLE, kernel.group_id(1)); | ||
auto bo_a = | ||
xrt::bo(device, A_SIZE, XRT_BO_FLAGS_HOST_ONLY, kernel.group_id(3)); | ||
auto bo_b = | ||
xrt::bo(device, B_SIZE, XRT_BO_FLAGS_HOST_ONLY, kernel.group_id(4)); | ||
auto bo_c = | ||
xrt::bo(device, C_SIZE, XRT_BO_FLAGS_HOST_ONLY, kernel.group_id(5)); | ||
|
||
A_DATATYPE *buf_a = bo_a.map<A_DATATYPE *>(); | ||
for (int i = 0; i < A_SIZE / sizeof(buf_a[0]); i++) { | ||
buf_a[i] = 2 * i; // even | ||
} | ||
C_DATATYPE *buf_c = bo_c.map<C_DATATYPE *>(); | ||
memset(buf_c, 0, C_SIZE); | ||
|
||
// Instruction buffer for DMA configuration | ||
void *bufInstr = bo_instr.map<void *>(); | ||
memcpy(bufInstr, instr_v.data(), instr_v.size() * sizeof(int)); | ||
|
||
bo_instr.sync(XCL_BO_SYNC_BO_TO_DEVICE); | ||
bo_a.sync(XCL_BO_SYNC_BO_TO_DEVICE); | ||
bo_b.sync(XCL_BO_SYNC_BO_TO_DEVICE); | ||
bo_c.sync(XCL_BO_SYNC_BO_TO_DEVICE); | ||
|
||
unsigned int opcode = 3; | ||
auto run = kernel(opcode, bo_instr, instr_v.size(), bo_a, bo_b, bo_c); | ||
ert_cmd_state r = run.wait(); | ||
if (r != ERT_CMD_STATE_COMPLETED) { | ||
std::cout << "Kernel did not complete. Returned status: " << r << "\n"; | ||
return 1; | ||
} | ||
|
||
bo_c.sync(XCL_BO_SYNC_BO_FROM_DEVICE); | ||
|
||
int errors = 0; | ||
for (int i = 0; i < C_SIZE / sizeof(buf_c[0]); i++) { | ||
std::cout << std::setw(4) << (long)buf_c[i] << " "; | ||
if (buf_c[i] != buf_a[i % A_LEN]) { | ||
errors += 1; | ||
} | ||
} | ||
std::cout << std::endl; | ||
|
||
if (errors == 0) { | ||
std::cout << "PASS!" << std::endl; | ||
} else { | ||
std::cout << "FAIL." << std::endl; | ||
} | ||
|
||
return 0; | ||
} |