Skip to content

Commit

Permalink
VCK5000 IRON support (#1150)
Browse files Browse the repository at this point in the history
Co-authored-by: Jeff Fifield <[email protected]>
Co-authored-by: Joseph Melber <[email protected]>
  • Loading branch information
3 people authored Apr 4, 2024
1 parent df42b66 commit ae36a44
Show file tree
Hide file tree
Showing 204 changed files with 8,367 additions and 1,017 deletions.
1 change: 1 addition & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,7 @@ find_package(Vitis 2023.2 COMPONENTS AIE AIE2)
configure_file(./utils/vitisVariables.config.in
${CMAKE_BINARY_DIR}/utils/vitisVariables.config @ONLY)
find_package(XRT)
find_package(hsa-runtime64)

# Set up default Vitis Sysroot as sysroot when testing on aarch64
list(FIND AIE_RUNTIME_TARGETS "aarch64" indexAarch64)
Expand Down
1 change: 1 addition & 0 deletions include/aie-c/Translation.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ MLIR_CAPI_EXPORTED MlirStringRef aieTranslateAIEVecToCpp(MlirOperation op,
MLIR_CAPI_EXPORTED MlirStringRef aieTranslateModuleToLLVMIR(MlirOperation op);
MLIR_CAPI_EXPORTED MlirStringRef aieTranslateToIPU(MlirOperation op);
MLIR_CAPI_EXPORTED MlirStringRef aieTranslateToXAIEV2(MlirOperation op);
MLIR_CAPI_EXPORTED MlirStringRef aieTranslateToHSA(MlirOperation op);
MLIR_CAPI_EXPORTED MlirStringRef aieTranslateToBCF(MlirOperation op, int col,
int row);
MLIR_CAPI_EXPORTED MlirStringRef aieLLVMLink(MlirStringRef *modules,
Expand Down
4 changes: 3 additions & 1 deletion include/aie/Targets/AIETargets.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@ namespace AIE {

mlir::LogicalResult AIETranslateToXAIEV2(mlir::ModuleOp module,
llvm::raw_ostream &output);
mlir::LogicalResult AIETranslateToHSA(mlir::ModuleOp module,
llvm::raw_ostream &output);
mlir::LogicalResult AIEFlowsToJSON(mlir::ModuleOp module,
llvm::raw_ostream &output);
mlir::LogicalResult ADFGenerateCPPGraph(mlir::ModuleOp module,
Expand Down Expand Up @@ -70,4 +72,4 @@ mlir::LogicalResult translateAIEVecToCpp(mlir::Operation *op, bool aieml,
} // namespace aievec
} // namespace xilinx

#endif
#endif
11 changes: 11 additions & 0 deletions lib/CAPI/Translation.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,17 @@ MlirStringRef aieTranslateToXAIEV2(MlirOperation moduleOp) {
return mlirStringRefCreate(cStr, xaie.size());
}

MlirStringRef aieTranslateToHSA(MlirOperation moduleOp) {
std::string xaie;
llvm::raw_string_ostream os(xaie);
ModuleOp mod = llvm::cast<ModuleOp>(unwrap(moduleOp));
if (failed(AIETranslateToHSA(mod, os)))
return mlirStringRefCreate(nullptr, 0);
char *cStr = static_cast<char *>(malloc(xaie.size()));
xaie.copy(cStr, xaie.size());
return mlirStringRefCreate(cStr, xaie.size());
}

MlirStringRef aieTranslateToBCF(MlirOperation moduleOp, int col, int row) {
std::string bcf;
llvm::raw_string_ostream os(bcf);
Expand Down
1 change: 0 additions & 1 deletion lib/Targets/AIETargetCDODirect.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -413,7 +413,6 @@ struct AIEControl {
// macro. In future, the same macro will be expanded to allocate
// more memory from the user application for resource management.
XAie_InstDeclare(_devInst, &configPtr);

devInst = _devInst;
// TODO(max): what is the "partition"?
TRY_XAIE_API_FATAL_ERROR(XAie_SetupPartitionConfig, &devInst,
Expand Down
224 changes: 224 additions & 0 deletions lib/Targets/AIETargetHSA.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,224 @@
//===- AIETargetXAIEV2.cpp --------------------------------------*- C++ -*-===//
//
// This file is licensed under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
// (c) Copyright 2021 Xilinx Inc.
// (c) Copyright 2021-2023, Advanced Micro Devices, Inc.
//
//===----------------------------------------------------------------------===//
#include "AIETargetShared.h"

#include "aie/Dialect/AIE/IR/AIEDialect.h"
#include "aie/Dialect/AIEX/IR/AIEXDialect.h"
#include "aie/Targets/AIETargets.h"

#include "mlir/Dialect/Func/IR/FuncOps.h" // Eddie added to get the IPU func ops
#include "mlir/IR/Attributes.h"
#include "mlir/IR/IRMapping.h"
#include "mlir/Pass/Pass.h"
#include "mlir/Tools/mlir-translate/MlirTranslateMain.h"

#include "llvm/ADT/StringExtras.h"
#include "llvm/IR/Module.h"

using namespace mlir;
using namespace xilinx;
using namespace xilinx::AIE;
using namespace xilinx::AIEX;

namespace xilinx::AIE {

// This string is output at the top of the lowered C++ code.
const char *hsa_cpp_file_header = R"code(
// This file was auto-generated by aiecc.py --aie-generate-hsa
#ifndef MLIR_AIE_QUIET
#define __mlir_aie_verbose(x) x
#else
#define __mlir_aie_verbose(x)
#endif
)code";

std::optional<AIE::ShimDMAAllocationOp>
getAllocOpForSymbol(AIE::DeviceOp dev, StringRef sym_name) {
auto sym = dev.lookupSymbol(sym_name);
if (!sym)
return std::nullopt;

auto uses = SymbolTable::getSymbolUses(sym, dev);
for (auto use : *uses)
if (auto infoOp = dyn_cast<AIE::ShimDMAAllocationOp>(use.getUser()))
return infoOp;

return std::nullopt;
}

mlir::LogicalResult AIETranslateToHSA(ModuleOp module, raw_ostream &output) {

DenseMap<TileID, Operation *> tiles;
DenseMap<Operation *, SmallVector<BufferOp, 4>> buffers;

if (module.getOps<DeviceOp>().empty())
return module.emitOpError("expected AIE.device operation at toplevel");
DeviceOp targetOp = *(module.getOps<DeviceOp>().begin());

// Putting the standard header
output << hsa_cpp_file_header;

// Getting the func op which has the data movement
if (targetOp.getOps<mlir::func::FuncOp>().empty()) {
return success();
}

// Getting the sequence function op which contains the instructions
mlir::func::FuncOp funcOp = NULL;
for (auto op : targetOp.getOps<mlir::func::FuncOp>()) {
if (op.getName().str().compare("sequence") == 0) {
funcOp = op;
}
}

// If no funcOp then just return
if (funcOp == NULL) {
return success();
}

collectTiles(targetOp, tiles);
collectBuffers(targetOp, buffers);

// Generate dynamic data movement
output << "void invoke_data_movement(hsa_queue_t *q, hsa_agent_t *a";

// Looping over every Memcpy operation so we take the correct number of
// buffers
int num_ops = 0;
for (auto op : funcOp.getOps<IpuDmaMemcpyNdOp>()) {
// Getting the IDs of the buffers
auto memref = op.getMemref();
Block &entryBB = op->getParentOfType<func::FuncOp>().getBody().front();
int arg_idx = -1;
for (int i = 0, e = entryBB.getNumArguments(); i < e; i++) {
if (entryBB.getArgument(i) == memref) {
arg_idx = i;
break;
}
}
num_ops++;

output << ", void *buf" << arg_idx;
}

output << ") {\n";

output << "\tuint64_t wr_idx = 0;\n";
output << "\tuint64_t packet_id = 0;\n";

int op_count = 0;
for (auto op : funcOp.getOps<IpuDmaMemcpyNdOp>()) {
auto dev = funcOp->getParentOfType<AIE::DeviceOp>();
if (!dev) {
op.emitOpError("couldn't get DeviceOp");
return failure();
}

auto infoOp = getAllocOpForSymbol(dev, op.getMetadata());
if (!infoOp) {
op.emitOpError("couldn't find shim_dma_allocation op");
return failure();
}

auto channelDir = infoOp->getChannelDir();
uint32_t ChannelId = infoOp->getChannelIndex();
bool isMM2S = channelDir == AIE::DMAChannelDir::MM2S;
int col = infoOp->getCol();

llvm::SmallVector<int64_t, 3> strides = llvm::map_to_vector(
llvm::reverse(op.getMixedStrides()),
[](OpFoldResult s) { return getConstantIntValue(s).value(); });
::SmallVector<int64_t, 4> sizes = llvm::map_to_vector(
llvm::reverse(op.getMixedSizes()),
[](OpFoldResult s) { return getConstantIntValue(s).value(); });
::SmallVector<int64_t, 4> offsets = llvm::map_to_vector(
llvm::reverse(op.getMixedOffsets()),
[](OpFoldResult s) { return getConstantIntValue(s).value(); });

// buffer_offset
size_t stride = 1;
size_t offset = 0;
MemRefType my_memref = op.getMemref().getType();
auto shape = my_memref.getShape();
size_t R = shape.size();
size_t el_bit_width = my_memref.getElementTypeBitWidth();
assert(el_bit_width % 8 == 0 &&
"Expected Memref element bitwidth to be multiple of 8.");
size_t S = el_bit_width / 8;
for (size_t i = 0; i < R; i++) {
offset += offsets[i] * stride * S;
stride *= shape[R - i - 1];
}

// Getting the ID of the buffer that we are using
auto memref = op.getMemref();
Block &entryBB = op->getParentOfType<func::FuncOp>().getBody().front();
int arg_idx = -1;
for (int i = 0, e = entryBB.getNumArguments(); i < e; i++) {
if (entryBB.getArgument(i) == memref) {
arg_idx = i;
break;
}
}

// Writing the packet information to perform the DMA
output << "\thsa_agent_dispatch_packet_t pkt" << op_count << " ;\n";
output << "\twr_idx = hsa_queue_add_write_index_relaxed(q, 1);\n";
output << "\tpacket_id = wr_idx % q->size;\n";
output << "\tmlir_aie_packet_nd_memcpy(&pkt" << op_count
<< ", 0 /* herd_id */, " << col << " /* col */, " << isMM2S
<< " /* dir */, " << ChannelId
<< "/* channel */, 4 /* Burst length */, 2 /* Memory space */, "
"(uint64_t)buf"
<< arg_idx << " + " << offset << " /* Address */, " << sizes[0] * 4
<< " /* 1d_length */, " << (strides[0] ? sizes[1] : 1)
<< " /* 2d_length */, " << (strides[0] ? strides[0] * 4 : 0)
<< " /* 2d_stride */, " << (strides[1] ? sizes[2] : 1)
<< " /* 3d_length */, " << (strides[1] ? strides[1] * 4 : 0)
<< " /* 3d_stride */ , 1 /* 4d_length */, 0 /* 4d_stride */);\n";

bool last_op = op_count == (num_ops - 1);
// Only ring the doorbell on the last packet
if (last_op) {
output
<< "\tmlir_aie_queue_dispatch_and_wait(a, q, packet_id, wr_idx, &pkt"
<< op_count << ", false);\n\n";
} else {
output << "\thsa_amd_signal_create_on_agent(1, 0, nullptr, a, 0, &pkt"
<< op_count << ".completion_signal);\n";
output << "\tmlir_aie_write_pkt<hsa_agent_dispatch_packet_t>(q, "
"packet_id, &pkt"
<< op_count << ");\n\n";
}

op_count++;
}

// Waiting to make sure each DMA is complete
for (int i = 0; i < op_count; i++) {
output << "\twhile (hsa_signal_wait_scacquire(pkt" << i
<< ".completion_signal,\n";
output << "\tHSA_SIGNAL_CONDITION_EQ, 0, 0x80000,\n";
output << "\tHSA_WAIT_STATE_ACTIVE) != 0);\n";
}

// Destroying every signal that we created
for (int i = 0; i < op_count; i++) {
output << "\thsa_signal_destroy(pkt" << i << ".completion_signal);\n";
}

output << "}\n";

return success();
}
} // namespace xilinx::AIE
6 changes: 6 additions & 0 deletions lib/Targets/AIETargets.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -284,6 +284,12 @@ void registerAIETranslations() {
return AIETranslateToXAIEV2(module, output);
},
registerDialects);
TranslateFromMLIRRegistration registrationHSA(
"aie-generate-hsa", "Generate hsa data movement configuration",
[](ModuleOp module, raw_ostream &output) {
return AIETranslateToHSA(module, output);
},
registerDialects);
TranslateFromMLIRRegistration registrationXJSON(
"aie-flows-to-json", "Translate AIE flows to JSON", AIEFlowsToJSON,
registerDialects);
Expand Down
1 change: 1 addition & 0 deletions lib/Targets/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ add_mlir_library(AIETargets
AIETargetIPU.cpp
AIETargetLdScript.cpp
AIETargetXAIEV2.cpp
AIETargetHSA.cpp
AIETargetShared.cpp
AIETargetSimulationFiles.cpp
ADFGenerateCppGraph.cpp
Expand Down
Loading

0 comments on commit ae36a44

Please sign in to comment.