diff --git a/lib/Targets/AIETargetAirbin.cpp b/lib/Targets/AIETargetAirbin.cpp index 6a00765709..f6a51f152d 100644 --- a/lib/Targets/AIETargetAirbin.cpp +++ b/lib/Targets/AIETargetAirbin.cpp @@ -85,49 +85,54 @@ static constexpr auto ME_SS_SLAVE_SLOT_BASE = 0x3F200u; static constexpr auto ME_DMA_BD_COUNT = 16; static constexpr auto ME_DMA_BD_SIZE = 0x20; -struct me_reg_dma_bd { - uint32_t addr_a; - uint32_t addr_b; - uint32_t x_2d{0xff0000u | 0x001u}; // wrap at 256, increment by 1 - uint32_t y_2d{0xff000000u | 0xff0000u | - 0x100u}; // wrap at 256, increment by 256 every 256 streams +struct MERegDMABD { + uint32_t addrA; + uint32_t addrB; + uint32_t x2d{0xff0000u | 0x001u}; // wrap at 256, increment by 1 + uint32_t y2d{0xff000000u | 0xff0000u | + 0x100u}; // wrap at 256, increment by 256 every 256 streams uint32_t packet; uint32_t interleave; uint32_t control; uint32_t padding; }; -static_assert(sizeof(me_reg_dma_bd) == ME_DMA_BD_SIZE, +static_assert(sizeof(MERegDMABD) == ME_DMA_BD_SIZE, "Size of me_reg_dma_bd is incorrect"); -typedef me_reg_dma_bd dma_bd_reg_block[ME_DMA_BD_COUNT]; -static const me_reg_dma_bd * - dma_bd_regs(reinterpret_cast(ME_DMA_BD_BASE)); +using DMABDRegBlock = MERegDMABD[ME_DMA_BD_COUNT]; +static const MERegDMABD * + DMABdRegs(reinterpret_cast(ME_DMA_BD_BASE)); -static_assert(sizeof(dma_bd_reg_block) == - (ME_DMA_BD_COUNT * sizeof(struct me_reg_dma_bd)), +static_assert(sizeof(DMABDRegBlock) == (ME_DMA_BD_COUNT * sizeof(MERegDMABD)), "Size of dma_bd_reg_block is incorrect"); -auto reg_dma_addr_a_bd = [](auto _idx) { - return reinterpret_cast(&dma_bd_regs[_idx].addr_a); +auto regDMAAddrABD = [](auto idx) { + return reinterpret_cast(&DMABdRegs[idx].addrA); }; -auto reg_dma_addr_b_bd = [](auto _idx) { - return reinterpret_cast(&dma_bd_regs[_idx].addr_b); + +auto regDMAAddrBBD = [](auto idx) { + return reinterpret_cast(&DMABdRegs[idx].addrB); }; -auto reg_dma_2d_x_bd = [](auto _idx) { - return reinterpret_cast(&dma_bd_regs[_idx].x_2d); + +auto regDMA2DXBD = [](auto idx) { + return reinterpret_cast(&DMABdRegs[idx].x2d); }; -auto reg_dma_2d_y_bd = [](auto _idx) { - return reinterpret_cast(&dma_bd_regs[_idx].y_2d); + +auto regDMA2DYBD = [](auto idx) { + return reinterpret_cast(&DMABdRegs[idx].y2d); }; -auto reg_dma_pkt_bd = [](auto _idx) { - return reinterpret_cast(&dma_bd_regs[_idx].packet); + +auto regDMAPktBD = [](auto idx) { + return reinterpret_cast(&DMABdRegs[idx].packet); }; -auto reg_dma_int_state_bd = [](auto _idx) { - return reinterpret_cast(&dma_bd_regs[_idx].interleave); + +auto regDMAIntStateBD = [](auto idx) { + return reinterpret_cast(&DMABdRegs[idx].interleave); }; -auto reg_dma_ctrl_bd = [](auto _idx) { - return reinterpret_cast(&dma_bd_regs[_idx].control); + +auto regDMACtrlBD = [](auto idx) { + return reinterpret_cast(&DMABdRegs[idx].control); }; /* @@ -136,23 +141,24 @@ auto reg_dma_ctrl_bd = [](auto _idx) { static constexpr auto DMA_S2MM_CHANNEL_COUNT = 2u; static constexpr auto REG_DMA_S2MM_BLOCK_SIZE = 0x08; -struct reg_dma_s2mm { +struct RegDMAS2MM { uint32_t ctrl; uint32_t queue; }; -static_assert(sizeof(struct reg_dma_s2mm) == REG_DMA_S2MM_BLOCK_SIZE, +static_assert(sizeof(RegDMAS2MM) == REG_DMA_S2MM_BLOCK_SIZE, "Size of reg_dma_s2mm is incorrect"); -typedef reg_dma_s2mm dma_s2mm_reg_block[DMA_S2MM_CHANNEL_COUNT]; -static const reg_dma_s2mm * - dma_s2mm_regs(reinterpret_cast(ME_DMA_S2MM_BASE)); +using DMAS2MMRegBlock = RegDMAS2MM[DMA_S2MM_CHANNEL_COUNT]; +static const RegDMAS2MM * + DMAS2MMRegs(reinterpret_cast(ME_DMA_S2MM_BASE)); -auto reg_dma_s2mm_ctrl = [](auto _ch) { - return reinterpret_cast(&dma_s2mm_regs[_ch].ctrl); +auto regDMAS2MMCtrl = [](auto ch) { + return reinterpret_cast(&DMAS2MMRegs[ch].ctrl); }; -auto reg_dma_s2mm_queue = [](auto _ch) { - return reinterpret_cast(&dma_s2mm_regs[_ch].queue); + +auto regDMAS2MMQueue = [](auto ch) { + return reinterpret_cast(&DMAS2MMRegs[ch].queue); }; /* @@ -161,23 +167,24 @@ auto reg_dma_s2mm_queue = [](auto _ch) { static constexpr auto DMA_MM2S_CHANNEL_COUNT = 2u; static constexpr auto REG_DMA_MM2S_BLOCK_SIZE = 0x08; -struct reg_dma_mm2s { +struct RegDMAMM2S { uint32_t ctrl; uint32_t queue; }; -static_assert(sizeof(struct reg_dma_mm2s) == REG_DMA_MM2S_BLOCK_SIZE, +static_assert(sizeof(RegDMAMM2S) == REG_DMA_MM2S_BLOCK_SIZE, "Size of reg_dma_mm2s is incorrect"); -typedef reg_dma_mm2s dma_mm2s_reg_block[DMA_MM2S_CHANNEL_COUNT]; -static const reg_dma_mm2s * - dma_mm2s_regs(reinterpret_cast(ME_DMA_MM2S_BASE)); +using DMAMM2SRegBlock = RegDMAMM2S[DMA_MM2S_CHANNEL_COUNT]; +static const RegDMAMM2S * + DMAMM2SRegs(reinterpret_cast(ME_DMA_MM2S_BASE)); -auto reg_dma_mm2s_ctrl = [](auto _ch) { - return reinterpret_cast(&dma_mm2s_regs[_ch].ctrl); +auto regDMAMM2SCtrl = [](auto ch) { + return reinterpret_cast(&DMAMM2SRegs[ch].ctrl); }; -auto reg_dma_mm2s_queue = [](auto _ch) { - return reinterpret_cast(&dma_mm2s_regs[_ch].queue); + +auto regDMAMM2SQueue = [](auto ch) { + return reinterpret_cast(&DMAMM2SRegs[ch].queue); }; /* @@ -188,42 +195,41 @@ static constexpr auto ME_SS_SLAVE_CFG_COUNT = 27; static constexpr auto ME_SS_SLAVE_SLOT_COUNT = 108; static constexpr auto SS_SLOT_NUM_PORTS = 4u; -typedef uint32_t me_ss_master_block[ME_SS_MASTER_COUNT]; -static const me_ss_master_block * - me_ss_master(reinterpret_cast(ME_SS_MASTER_BASE)); +using MESSMasterBlock = uint32_t[ME_SS_MASTER_COUNT]; +static const MESSMasterBlock * + MESSMaster(reinterpret_cast(ME_SS_MASTER_BASE)); -static_assert(sizeof(me_ss_master_block) == +static_assert(sizeof(MESSMasterBlock) == (ME_SS_MASTER_COUNT * sizeof(uint32_t)), "Size of me_ss_master_block is incorrect"); -auto reg_me_ss_master = [](auto _idx) { - return reinterpret_cast(&me_ss_master[_idx]); +auto regMESSMaster = [](auto idx) { + return reinterpret_cast(&MESSMaster[idx]); }; -typedef uint32_t me_ss_slave_cfg_block[ME_SS_SLAVE_CFG_COUNT]; -static const me_ss_slave_cfg_block *me_ss_slave_cfg( - reinterpret_cast(ME_SS_SLAVE_CFG_BASE)); +using MESSSlaveCfgBlock = uint32_t[ME_SS_SLAVE_CFG_COUNT]; +static const MESSSlaveCfgBlock * + MESSSlaveCfg(reinterpret_cast(ME_SS_SLAVE_CFG_BASE)); -static_assert(sizeof(me_ss_slave_cfg_block) == +static_assert(sizeof(MESSSlaveCfgBlock) == (ME_SS_SLAVE_CFG_COUNT * sizeof(uint32_t)), "Size of me_ss_slave_cfg_block is incorrect"); -auto reg_me_ss_slave_cfg = [](auto _idx) { - return reinterpret_cast(&me_ss_slave_cfg[_idx]); +auto regMESSSlaveCfg = [](auto idx) { + return reinterpret_cast(&MESSSlaveCfg[idx]); }; -typedef uint32_t me_ss_slave_slot_block[ME_SS_SLAVE_SLOT_COUNT] - [SS_SLOT_NUM_PORTS]; -static const me_ss_slave_slot_block *me_ss_slave_slot( - reinterpret_cast(ME_SS_SLAVE_SLOT_BASE)); +using MESSSlaveSlotBlock = uint32_t[ME_SS_SLAVE_SLOT_COUNT][SS_SLOT_NUM_PORTS]; +static const MESSSlaveSlotBlock *MESSSlaveSlot( + reinterpret_cast(ME_SS_SLAVE_SLOT_BASE)); -static_assert(sizeof(me_ss_slave_slot_block) == +static_assert(sizeof(MESSSlaveSlotBlock) == (ME_SS_SLAVE_SLOT_COUNT * SS_SLOT_NUM_PORTS * sizeof(uint32_t)), "Size of me_ss_slave_slot_block is incorrect"); -auto reg_me_ss_slave_slot = [](auto _port, auto _slot) { - return reinterpret_cast(&me_ss_slave_slot[_slot][_port]); +auto regMESSSlaveSlot = [](auto port, auto slot) { + return reinterpret_cast(&MESSSlaveSlot[slot][port]); }; // ME data memory @@ -247,18 +253,18 @@ static constexpr auto SHIM_SS_SLAVE_SLOT_BASE = 0x3F200u; static constexpr auto SHIM_DMA_BD_COUNT = 16; static constexpr auto REG_SHIM_DMA_BD_SIZE = 0x14; -struct shim_dma_bd { - uint32_t addr_low; +struct ShimDMABD { + uint32_t addrLow; uint32_t len; uint32_t control; - uint32_t axi_cfg; + uint32_t axiCfg; uint32_t packet; }; -static_assert(sizeof(struct shim_dma_bd) == REG_SHIM_DMA_BD_SIZE, +static_assert(sizeof(struct ShimDMABD) == REG_SHIM_DMA_BD_SIZE, "Size of shim_dma_bd is incorrect"); -typedef shim_dma_bd shim_dma_bd_block[SHIM_DMA_BD_COUNT]; +using ShimDMABDBlock = ShimDMABD[SHIM_DMA_BD_COUNT]; /* Mux/demux @@ -272,14 +278,14 @@ static constexpr auto SHIM_SS_MASTER_COUNT = 23; static constexpr auto SHIM_SS_SLAVE_CFG_COUNT = 24; static constexpr auto SHIM_SS_SLAVE_SLOT_COUNT = 96; -typedef uint32_t shim_ss_master_block[SHIM_SS_MASTER_COUNT]; -typedef uint32_t shim_ss_slave_cfg_block[SHIM_SS_SLAVE_CFG_COUNT]; -typedef uint32_t shim_ss_slave_slot_block[SHIM_SS_SLAVE_SLOT_COUNT]; +using ShimSSMasterBlock = uint32_t[SHIM_SS_MASTER_COUNT]; +using ShimSSSlaveCfgBlock = uint32_t[SHIM_SS_SLAVE_CFG_COUNT]; +using ShimSSSlaveSlotBlock = uint32_t[SHIM_SS_SLAVE_SLOT_COUNT]; // section names -static uint8_t sec_name_offset[SEC_IDX_MAX]; +static uint8_t secNameOffset[SEC_IDX_MAX]; -static const char *sec_name_str[SEC_IDX_MAX] = { +static const char *secNameStr[SEC_IDX_MAX] = { "null", ".ssmast", ".ssslve", ".sspckt", ".sdma.bd", ".shmmux", ".sdma.ctl", ".prgm.mem", ".tdma.bd", ".tdma.ctl", "deprecated", ".data.mem"}; @@ -291,7 +297,7 @@ static size_t stridx; All recorded writes are time/order invariant. This allows sorting to compact the airbin. */ -static std::map mem_writes; +static std::map memWrites; /* * Tile address format: @@ -303,8 +309,8 @@ static std::map mem_writes; */ class TileAddress { public: - TileAddress(uint8_t column, uint8_t row, uint64_t array_offset = 0x000u) - : array_offset{array_offset}, column{column}, row{row} {} + TileAddress(uint8_t column, uint8_t row, uint64_t arrayOffset = 0x000u) + : arrayOffset{arrayOffset}, column{column}, row{row} {} // SFINAE is used here to choose the copy constructor for `TileAddress`, // and this constructor for all other classes. @@ -314,11 +320,10 @@ class TileAddress { : TileAddress{static_cast(op.colIndex()), static_cast(op.rowIndex())} {} - uint64_t fullAddress(uint64_t register_offset) const { - return (array_offset << TILE_ADDR_ARR_SHIFT) | + uint64_t fullAddress(uint64_t registerOffset) const { + return (arrayOffset << TILE_ADDR_ARR_SHIFT) | (static_cast(column) << TILE_ADDR_COL_SHIFT) | - (static_cast(row) << TILE_ADDR_ROW_SHIFT) | - register_offset; + (static_cast(row) << TILE_ADDR_ROW_SHIFT) | registerOffset; } bool isShim() const { return row == 0; } @@ -329,10 +334,10 @@ class TileAddress { uint8_t col() const { return column; } - void clearRange(uint32_t range_start, uint32_t length); + void clearRange(uint32_t rangeStart, uint32_t length); private: - uint64_t array_offset : 34; + uint64_t arrayOffset : 34; uint8_t column : TILE_ADDR_COL_WIDTH; uint8_t row : TILE_ADDR_ROW_WIDTH; }; @@ -347,22 +352,22 @@ class Address { operator uint64_t() const { return tile.fullAddress(offset); } TileAddress destTile() const { return tile; } - uint32_t get_offset() const { return offset; } + uint32_t getOffset() const { return offset; } private: TileAddress tile; uint64_t offset : TILE_ADDR_OFF_WIDTH; }; -typedef std::pair Write; +using Write = std::pair; class Section { public: Section(uint64_t addr) : address(addr){}; - uint64_t get_addr() const { return address; } - size_t get_length() const { return data.size() * sizeof(uint32_t); } - void add_data(uint32_t value) { data.push_back(value); } - const uint32_t *get_data() const { return data.data(); } + uint64_t getAddr() const { return address; } + size_t getLength() const { return data.size() * sizeof(uint32_t); } + void addData(uint32_t value) { data.push_back(value); } + const uint32_t *getData() const { return data.data(); } private: uint64_t address; // start address of this section @@ -370,22 +375,23 @@ class Section { }; // This template can be instantiated to represent a bitfield in a register. -template class Field final { +template +class Field final { public: - static_assert(high_bit >= low_bit, + static_assert(highBit >= lowBit, "The high bit should be higher than the low bit"); - static_assert(high_bit < sizeof(uint32_t) * 8u, + static_assert(highBit < sizeof(uint32_t) * 8u, "The field must live in a 32-bit register"); - static constexpr auto num_bits_used = (high_bit - low_bit) + 1u; - static constexpr auto unshifted_mask = (1u << num_bits_used) - 1u; - static_assert((low_bit != high_bit) ^ (unshifted_mask == 1), + static constexpr auto numBitsUsed = (highBit - lowBit) + 1u; + static constexpr auto unshiftedMask = (1u << numBitsUsed) - 1u; + static_assert((lowBit != highBit) ^ (unshiftedMask == 1), "1 is a valid mask iff the field is 1 bit wide"); - static constexpr auto shifted_mask = unshifted_mask << low_bit; + static constexpr auto shiftedMask = unshiftedMask << lowBit; [[nodiscard]] constexpr uint32_t operator()(uint32_t value) const { - return (value << low_bit) & shifted_mask; + return (value << lowBit) & shiftedMask; } }; @@ -398,9 +404,9 @@ static void write32(Address addr, uint32_t value) { llvm::Twine("address of destination tile <= 0 : ") + std::to_string(addr.destTile().col())); - auto ret = mem_writes.emplace(addr, value); + auto ret = memWrites.emplace(addr, value); if (!ret.second) - (ret.first)->second = value; + ret.first->second = value; } /* @@ -409,8 +415,8 @@ static void write32(Address addr, uint32_t value) { If the address is found return the value, otherwise 0 */ static uint32_t read32(Address addr) { - auto ret = mem_writes.find(addr); - if (ret != mem_writes.end()) + auto ret = memWrites.find(addr); + if (ret != memWrites.end()) return ret->second; return 0; @@ -446,34 +452,36 @@ static void loadElf(TileAddress tile, const std::string &filename) { LLVM_DEBUG(llvm::dbgs() << "Reading ELF file " << filename << " for tile " << tile << '\n'); - int elf_fd = open(filename.c_str(), O_RDONLY); - if (elf_fd < 0) + int elfFd = open(filename.c_str(), O_RDONLY); + if (elfFd < 0) llvm::report_fatal_error(llvm::Twine("Can't open elf file ") + filename); elf_version(EV_CURRENT); - Elf *inelf = elf_begin(elf_fd, ELF_C_READ, nullptr); + Elf *inElf = elf_begin(elfFd, ELF_C_READ, nullptr); // check the characteristics GElf_Ehdr *ehdr; - GElf_Ehdr ehdr_mem; - ehdr = gelf_getehdr(inelf, &ehdr_mem); + GElf_Ehdr ehdrMem; + ehdr = gelf_getehdr(inElf, &ehdrMem); if (!ehdr) llvm::report_fatal_error(llvm::Twine("cannot get ELF header: ") + elf_errmsg(-1)); // Read data as 32-bit little endian - assert(ehdr->e_ident[EI_CLASS] == ELFCLASS32); - assert(ehdr->e_ident[EI_DATA] == ELFDATA2LSB); + assert(ehdr->e_ident[EI_CLASS] == ELFCLASS32 && + "(ehdr->e_ident[EI_CLASS] != ELFCLASS32"); + assert(ehdr->e_ident[EI_DATA] == ELFDATA2LSB && + "ehdr->e_ident[EI_DATA] != ELFDATA2LSB"); size_t phnum; - if (elf_getphdrnum(inelf, &phnum) != 0) + if (elf_getphdrnum(inElf, &phnum) != 0) llvm::report_fatal_error(llvm::Twine("cannot get program header count: ") + elf_errmsg(-1)); // iterate through all program headers for (unsigned int ndx = 0; ndx < phnum; ndx++) { - GElf_Phdr phdr_mem; - GElf_Phdr *phdr = gelf_getphdr(inelf, ndx, &phdr_mem); + GElf_Phdr phdrMem; + GElf_Phdr *phdr = gelf_getphdr(inElf, ndx, &phdrMem); if (!phdr) llvm::report_fatal_error(llvm::Twine("cannot get program header entry ") + std::to_string(ndx) + ": " + elf_errmsg(-1)); @@ -495,48 +503,46 @@ static void loadElf(TileAddress tile, const std::string &filename) { // read data one word at a time and write it to the output list // TODO since we know these are data and not registers, we could likely // bypass the output list and write a section directly into the AIRBIN - size_t elfsize; + size_t elfSize; uint32_t offset; - char *raw = elf_rawfile(inelf, &elfsize); + char *raw = elf_rawfile(inElf, &elfSize); for (offset = phdr->p_offset; offset < phdr->p_offset + phdr->p_filesz; offset += 4) { - Address dest_addr{tile, dest}; - uint32_t data = *(uint32_t *)(raw + offset); - write32(dest_addr, data); + Address destAddr{tile, dest}; + uint32_t data = *reinterpret_cast(raw + offset); + write32(destAddr, data); dest += 4; } } - elf_end(inelf); - close(elf_fd); + elf_end(inElf); + close(elfFd); } /* The SHIM row is always 0. SHIM resets are handled by the runtime. */ -static void config_shim_tile(TileOp &tileOp) { +static void configShimTile(TileOp &tileOp) { assert(tileOp.isShimTile() && "The tile must be a Shim to generate Shim Config"); TileAddress tileAddress{tileOp}; if (tileOp.isShimNOCTile()) { - tileAddress.clearRange(SHIM_DMA_BD_BASE, sizeof(shim_dma_bd_block)); + tileAddress.clearRange(SHIM_DMA_BD_BASE, sizeof(ShimDMABDBlock)); } - tileAddress.clearRange(SHIM_SS_MASTER_BASE, sizeof(shim_ss_master_block)); - tileAddress.clearRange(SHIM_SS_SLAVE_CFG_BASE, - sizeof(shim_ss_slave_cfg_block)); - tileAddress.clearRange(SHIM_SS_SLAVE_SLOT_BASE, - sizeof(shim_ss_slave_slot_block)); + tileAddress.clearRange(SHIM_SS_MASTER_BASE, sizeof(ShimSSMasterBlock)); + tileAddress.clearRange(SHIM_SS_SLAVE_CFG_BASE, sizeof(ShimSSSlaveCfgBlock)); + tileAddress.clearRange(SHIM_SS_SLAVE_SLOT_BASE, sizeof(ShimSSSlaveSlotBlock)); } /* Generate the config for an ME tile */ -static void config_ME_tile(TileOp tileOp, const std::string &coreFilesDir) { +static void configMETile(TileOp tileOp, const std::string &coreFilesDir) { TileAddress tileAddress{tileOp}; // Reset configuration @@ -545,14 +551,14 @@ static void config_ME_tile(TileOp tileOp, const std::string &coreFilesDir) { tileAddress.clearRange(ME_DATA_MEM_BASE, DATA_MEM_SIZE); // TileDMA - tileAddress.clearRange(ME_DMA_BD_BASE, sizeof(dma_bd_reg_block)); - tileAddress.clearRange(ME_DMA_S2MM_BASE, sizeof(dma_s2mm_reg_block)); - tileAddress.clearRange(ME_DMA_MM2S_BASE, sizeof(dma_mm2s_reg_block)); + tileAddress.clearRange(ME_DMA_BD_BASE, sizeof(DMABDRegBlock)); + tileAddress.clearRange(ME_DMA_S2MM_BASE, sizeof(DMAS2MMRegBlock)); + tileAddress.clearRange(ME_DMA_MM2S_BASE, sizeof(DMAMM2SRegBlock)); // Stream Switches - tileAddress.clearRange(ME_SS_MASTER_BASE, sizeof(me_ss_master_block)); - tileAddress.clearRange(ME_SS_SLAVE_CFG_BASE, sizeof(me_ss_slave_cfg_block)); - tileAddress.clearRange(ME_SS_SLAVE_SLOT_BASE, sizeof(me_ss_slave_slot_block)); + tileAddress.clearRange(ME_SS_MASTER_BASE, sizeof(MESSMasterBlock)); + tileAddress.clearRange(ME_SS_SLAVE_CFG_BASE, sizeof(MESSSlaveCfgBlock)); + tileAddress.clearRange(ME_SS_SLAVE_SLOT_BASE, sizeof(MESSSlaveSlotBlock)); // NOTE: Here is usually where locking is done. // However, the runtime will handle that when loading the airbin. @@ -570,34 +576,34 @@ static void config_ME_tile(TileOp tileOp, const std::string &coreFilesDir) { } struct BDInfo { - bool foundBdPacket = false; + bool foundBDPacket = false; int packetType = 0; int packetID = 0; - bool foundBd = false; + bool foundBD = false; int lenA = 0; int lenB = 0; unsigned bytesA = 0; unsigned bytesB = 0; int offsetA = 0; int offsetB = 0; - uint64_t BaseAddrA = 0; - uint64_t BaseAddrB = 0; + uint64_t baseAddrA = 0; + uint64_t baseAddrB = 0; bool hasA = false; bool hasB = false; std::string bufA = "0"; std::string bufB = "0"; - uint32_t AbMode = disable; - uint32_t FifoMode = disable; // FIXME: when to enable FIFO mode? + uint32_t abMode = disable; + uint32_t fifoMode = disable; // FIXME: when to enable FIFO mode? }; static BDInfo getBDInfo(Block &block) { BDInfo bdInfo; for (auto op : block.getOps()) { - bdInfo.foundBd = true; + bdInfo.foundBD = true; auto bufferType = op.getBuffer().getType().cast<::mlir::MemRefType>(); if (op.isA()) { - bdInfo.BaseAddrA = op.getBufferOp().address(); + bdInfo.baseAddrA = op.getBufferOp().address(); bdInfo.lenA = op.getLenValue(); bdInfo.bytesA = bufferType.getElementTypeBitWidth() / 8u; bdInfo.offsetA = op.getOffsetValue(); @@ -606,7 +612,7 @@ static BDInfo getBDInfo(Block &block) { } if (op.isB()) { - bdInfo.BaseAddrB = op.getBufferOp().address(); + bdInfo.baseAddrB = op.getBufferOp().address(); bdInfo.lenB = op.getLenValue(); bdInfo.bytesB = bufferType.getElementTypeBitWidth() / 8u; bdInfo.offsetB = op.getOffsetValue(); @@ -617,7 +623,7 @@ static BDInfo getBDInfo(Block &block) { return bdInfo; } -static void configure_dmas(DeviceOp &targetOp) { +static void configureDMAs(DeviceOp &targetOp) { Field<1> dmaChannelReset; Field<0> dmaChannelEnable; @@ -626,14 +632,14 @@ static void configure_dmas(DeviceOp &targetOp) { LLVM_DEBUG(llvm::dbgs() << "DMA: tile=" << memOp.getTile()); // Clear the CTRL and QUEUE registers for the DMA channels. for (auto chNum = 0u; chNum < DMA_S2MM_CHANNEL_COUNT; ++chNum) { - write32({tile, reg_dma_s2mm_ctrl(chNum)}, + write32({tile, regDMAS2MMCtrl(chNum)}, dmaChannelReset(disable) | dmaChannelEnable(disable)); - write32({tile, reg_dma_s2mm_queue(chNum)}, 0); + write32({tile, regDMAS2MMQueue(chNum)}, 0); } for (auto chNum = 0u; chNum < DMA_MM2S_CHANNEL_COUNT; ++chNum) { - write32({tile, reg_dma_mm2s_ctrl(chNum)}, + write32({tile, regDMAMM2SCtrl(chNum)}, dmaChannelReset(disable) | dmaChannelEnable(disable)); - write32({tile, reg_dma_mm2s_queue(chNum)}, 0); + write32({tile, regDMAMM2SQueue(chNum)}, 0); } DenseMap blockMap; @@ -653,7 +659,7 @@ static void configure_dmas(DeviceOp &targetOp) { auto bdInfo = getBDInfo(block); if (bdInfo.hasA and bdInfo.hasB) { - bdInfo.AbMode = enable; + bdInfo.abMode = enable; if (bdInfo.lenA != bdInfo.lenB) llvm::errs() << "ABmode must have matching lengths.\n"; if (bdInfo.bytesA != bdInfo.bytesB) @@ -685,14 +691,14 @@ static void configure_dmas(DeviceOp &targetOp) { "lock invariants not satisfied"); for (auto op : block.getOps()) { - bdInfo.foundBdPacket = true; + bdInfo.foundBDPacket = true; bdInfo.packetType = op.getPacketType(); bdInfo.packetID = op.getPacketID(); } auto bdNum = blockMap[&block]; - me_reg_dma_bd bdData; - if (bdInfo.foundBd) { + MERegDMABD bdData; + if (bdInfo.foundBD) { Field<25, 22> bdAddressLockID; Field<21> bdAddressReleaseEnable; Field<20> bdAddressReleaseValue; @@ -702,47 +708,47 @@ static void configure_dmas(DeviceOp &targetOp) { Field<16> bdAddressAcquireValueEnable; if (bdInfo.hasA) { - bdData.addr_a = bdAddressLockID(lockID.value()) | - bdAddressReleaseEnable(relEnable) | - bdAddressAcquireEnable(acqEnable); + bdData.addrA = bdAddressLockID(lockID.value()) | + bdAddressReleaseEnable(relEnable) | + bdAddressAcquireEnable(acqEnable); if (relValue != 0xFFu) - bdData.addr_a |= bdAddressReleaseValueEnable(true) | - bdAddressReleaseValue(relValue); + bdData.addrA |= bdAddressReleaseValueEnable(true) | + bdAddressReleaseValue(relValue); if (acqValue != 0xFFu) - bdData.addr_a |= bdAddressAcquireValueEnable(true) | - bdAddressAcquireValue(acqValue); + bdData.addrA |= bdAddressAcquireValueEnable(true) | + bdAddressAcquireValue(acqValue); } if (bdInfo.hasB) llvm::report_fatal_error("bdInfo.hasB not supported"); - auto addr_a = bdInfo.BaseAddrA + bdInfo.offsetA; - auto addr_b = bdInfo.BaseAddrB + bdInfo.offsetB; + auto addr_a = bdInfo.baseAddrA + bdInfo.offsetA; + auto addr_b = bdInfo.baseAddrB + bdInfo.offsetB; Field<12, 0> bdAddressBase, bdControlLength; Field<30> bdControlABMode; Field<28> bdControlFifo; - bdData.addr_a |= bdAddressBase(addr_a >> 2u); - bdData.addr_b |= bdAddressBase(addr_b >> 2u); + bdData.addrA |= bdAddressBase(addr_a >> 2u); + bdData.addrB |= bdAddressBase(addr_b >> 2u); bdData.control |= bdControlLength(bdInfo.lenA - 1) | - bdControlFifo(bdInfo.FifoMode) | - bdControlABMode(bdInfo.AbMode); + bdControlFifo(bdInfo.fifoMode) | + bdControlABMode(bdInfo.abMode); if (block.getNumSuccessors() > 0) { // should have only one successor block assert(block.getNumSuccessors() == 1 && "block.getNumSuccessors() != 1"); auto *nextBlock = block.getSuccessors()[0]; - auto nextBdNum = blockMap[nextBlock]; + auto nextBDNum = blockMap[nextBlock]; Field<16, 13> bdControlNextBD; Field<17> bdControlEnableNextBD; - bdData.control |= bdControlEnableNextBD(nextBdNum != 0xFFu) | - bdControlNextBD(nextBdNum); + bdData.control |= bdControlEnableNextBD(nextBDNum != 0xFFu) | + bdControlNextBD(nextBDNum); } - if (bdInfo.foundBdPacket) { + if (bdInfo.foundBDPacket) { Field<14, 12> bdPacketType; Field<4, 0> bdPacketID; Field<27> bdControlEnablePacket; @@ -755,15 +761,15 @@ static void configure_dmas(DeviceOp &targetOp) { Field<31> bdControlValid; assert(bdNum < ME_DMA_BD_COUNT && "bdNum >= ME_DMA_BD_COUNT"); - uint64_t bdOffset = reg_dma_addr_a_bd(bdNum); - - write32({tile, bdOffset}, bdData.addr_a); - write32({tile, reg_dma_addr_b_bd(bdNum)}, bdData.addr_b); - write32({tile, reg_dma_2d_x_bd(bdNum)}, bdData.x_2d); - write32({tile, reg_dma_2d_y_bd(bdNum)}, bdData.y_2d); - write32({tile, reg_dma_pkt_bd(bdNum)}, bdData.packet); - write32({tile, reg_dma_int_state_bd(bdNum)}, bdData.interleave); - write32({tile, reg_dma_ctrl_bd(bdNum)}, + uint64_t bdOffset = regDMAAddrABD(bdNum); + + write32({tile, bdOffset}, bdData.addrA); + write32({tile, regDMAAddrBBD(bdNum)}, bdData.addrB); + write32({tile, regDMA2DXBD(bdNum)}, bdData.x2d); + write32({tile, regDMA2DYBD(bdNum)}, bdData.y2d); + write32({tile, regDMAPktBD(bdNum)}, bdData.packet); + write32({tile, regDMAIntStateBD(bdNum)}, bdData.interleave); + write32({tile, regDMACtrlBD(bdNum)}, bdData.control | bdControlValid(true)); } } @@ -776,14 +782,14 @@ static void configure_dmas(DeviceOp &targetOp) { uint32_t chNum = op.getChannelIndex(); if (op.getChannelDir() == DMAChannelDir::MM2S) { - write32(Address{tile, reg_dma_mm2s_queue(chNum)}, + write32(Address{tile, regDMAMM2SQueue(chNum)}, dmaChannelQueueStartBd(bdNum)); - write32({tile, reg_dma_mm2s_ctrl(chNum)}, + write32({tile, regDMAMM2SCtrl(chNum)}, dmaChannelEnable(enable) | dmaChannelReset(disable)); } else { - write32(Address{tile, reg_dma_s2mm_queue(chNum)}, + write32(Address{tile, regDMAS2MMQueue(chNum)}, dmaChannelQueueStartBd(bdNum)); - write32({tile, reg_dma_s2mm_ctrl(chNum)}, + write32({tile, regDMAS2MMCtrl(chNum)}, dmaChannelEnable(enable) | dmaChannelReset(disable)); } } @@ -861,7 +867,7 @@ static uint8_t computeMasterPort(WireBundle bundle, int index, bool isShim) { } } -static void configure_switchboxes(DeviceOp &targetOp) { +static void configureSwitchBoxes(DeviceOp &targetOp) { for (auto switchboxOp : targetOp.getOps()) { Region &r = switchboxOp.getConnections(); Block &b = r.front(); @@ -870,10 +876,10 @@ static void configure_switchboxes(DeviceOp &targetOp) { b.getOps().empty(); // NOTE: may not be needed - std::set switchbox_set; + std::set switchboxSet; if (isa(switchboxOp.getTile().getDefiningOp())) { if (!isEmpty) - switchbox_set.emplace(switchboxOp); + switchboxSet.emplace(switchboxOp); } else if (AIEX::SelectOp sel = dyn_cast( switchboxOp.getTile().getDefiningOp())) // TODO: Use XAIEV1 target and translate into write32s @@ -882,8 +888,8 @@ static void configure_switchboxes(DeviceOp &targetOp) { constexpr Field<31> streamEnable; constexpr Field<30> streamPacketEnable; for (auto connectOp : b.getOps()) { - for (auto tile : switchbox_set) { - auto slave_port = + for (auto tile : switchboxSet) { + auto slavePort = computeSlavePort(connectOp.getSourceBundle(), connectOp.sourceIndex(), tile.isShim()); auto master_port = computeMasterPort( @@ -894,19 +900,19 @@ static void configure_switchboxes(DeviceOp &targetOp) { // Configure master side { - Address address{tile, reg_me_ss_master(master_port)}; + Address address{tile, regMESSMaster(master_port)}; // TODO: `Field::extract(uint32_t)`? - auto drop_header = (slave_port & 0x80u) >> 7u; + auto drop_header = (slavePort & 0x80u) >> 7u; auto value = streamEnable(true) | streamPacketEnable(false) | streamMasterDropHeader(drop_header) | - streamMasterConfig(slave_port); + streamMasterConfig(slavePort); assert(value < UINT32_MAX); write32(address, value); } // Configure slave side { - Address address{tile, reg_me_ss_slave_cfg(slave_port)}; + Address address{tile, regMESSSlaveCfg(slavePort)}; write32(address, streamEnable(true) | streamPacketEnable(false)); } @@ -927,7 +933,7 @@ static void configure_switchboxes(DeviceOp &targetOp) { auto config = streamMasterDropHeader(dropHeader) | (mask << STREAM_SWITCH_MSEL_SHIFT) | (arbiter << STREAM_SWITCH_ARB_SHIFT); - Address dest{tile, reg_me_ss_master(master_port)}; + Address dest{tile, regMESSMaster(master_port)}; write32(dest, streamEnable(enable) | streamPacketEnable(enable) | streamMasterDropHeader(dropHeader) | streamMasterConfig(config)); @@ -943,11 +949,11 @@ static void configure_switchboxes(DeviceOp &targetOp) { int arbiter = amselOp.arbiterIndex(); int msel = amselOp.getMselValue(); - for (auto tile : switchbox_set) { + for (auto tile : switchboxSet) { auto slavePort = computeSlavePort(connectOp.getSourceBundle(), connectOp.sourceIndex(), tile.isShim()); - write32({tile, reg_me_ss_slave_cfg(slavePort)}, + write32({tile, regMESSSlaveCfg(slavePort)}, streamEnable(enable) | streamPacketEnable(enable)); Field<28, 24> streamSlotId; @@ -960,7 +966,7 @@ static void configure_switchboxes(DeviceOp &targetOp) { streamSlotMask(slotOp.maskInt()) | streamSlotEnable(enable) | streamSlotMSel(msel) | streamSlotArbit(arbiter); - write32({tile, reg_me_ss_slave_slot(slavePort, slot)}, config); + write32({tile, regMESSSlaveSlot(slavePort, slot)}, config); slot++; } } @@ -1074,7 +1080,7 @@ static void configure_switchboxes(DeviceOp &targetOp) { Used to look up register/region name */ -static uint8_t sec_addr2index(uint64_t in) { +static uint8_t secAddr2Index(uint64_t in) { switch (in & ((1 << TILE_ADDR_OFF_WIDTH) - 1)) { case 0: return SEC_IDX_DATA_MEM; @@ -1102,22 +1108,22 @@ static uint8_t sec_addr2index(uint64_t in) { /* Group the writes into contiguous sections */ -static void group_sections(std::vector
§ions) { - uint64_t last_addr = 0; +static void groupSections(std::vector
§ions) { + uint64_t lastAddr = 0; Section *section = nullptr; - for (auto write : mem_writes) { - if (write.first != last_addr + 4) { + for (auto write : memWrites) { + if (write.first != lastAddr + 4) { if (section) sections.push_back(section); section = new Section(write.first); LLVM_DEBUG(llvm::dbgs() << "Starting new section @ " << llvm::format("0x%lx (last=0x%lx)\n", - write.first, last_addr)); + write.first, lastAddr)); } assert(section && "section is null"); - section->add_data(write.second); - last_addr = write.first; + section->addData(write.second); + lastAddr = write.first; } sections.push_back(section); @@ -1127,7 +1133,7 @@ static void group_sections(std::vector
§ions) { Add a string to the section header string table and return the offset of the start of the string */ -static size_t add_string(Elf_Scn *scn, const char *str) { +static size_t addString(Elf_Scn *scn, const char *str) { size_t lastidx = stridx; size_t size = strlen(str) + 1; @@ -1142,8 +1148,8 @@ static size_t add_string(Elf_Scn *scn, const char *str) { return lastidx; } -Elf_Data *section_add_data(Elf_Scn *scn, const Section *section) { - size_t size = section->get_length(); +Elf_Data *sectionAddData(Elf_Scn *scn, const Section *section) { + size_t size = section->getLength(); if (TEST_AIRBIN) size = 4; auto *buf = static_cast(malloc(size)); @@ -1158,7 +1164,7 @@ Elf_Data *section_add_data(Elf_Scn *scn, const Section *section) { data->d_version = EV_CURRENT; // fill the data - memcpy(buf, section->get_data(), size); + memcpy(buf, section->getData(), size); return data; } @@ -1170,14 +1176,14 @@ mlir::LogicalResult AIETranslateToAirbin(mlir::ModuleOp module, TEST_AIRBIN = testAirBin; - int tmp_elf_fd; - Elf *outelf; - GElf_Ehdr ehdr_mem; + int tmpElfFD; + Elf *outElf; + GElf_Ehdr ehdrMem; GElf_Ehdr *ehdr; GElf_Shdr *shdr; - GElf_Shdr shdr_mem; + GElf_Shdr shdrMem; char empty_str[] = ""; - char strtab_name[] = ".shstrtab"; + char strTabName[] = ".shstrtab"; std::vector
sections; DenseMap, Operation *> tiles; @@ -1198,28 +1204,28 @@ mlir::LogicalResult AIETranslateToAirbin(mlir::ModuleOp module, for (auto tileOp : targetOp.getOps()) { LLVM_DEBUG(llvm::dbgs() << "CC: tile=" << tileOp.getTileID()); if (tileOp.isShimTile()) - config_shim_tile(tileOp); + configShimTile(tileOp); else - config_ME_tile(tileOp, coreFilesDir); + configMETile(tileOp, coreFilesDir); } - configure_switchboxes(targetOp); - configure_dmas(targetOp); - group_sections(sections); + configureSwitchBoxes(targetOp); + configureDMAs(targetOp); + groupSections(sections); LLVM_DEBUG(llvm::dbgs() << llvm::format("mem_writes: %lu in %lu sections\n", - mem_writes.size(), sections.size())); + memWrites.size(), sections.size())); elf_version(EV_CURRENT); - tmp_elf_fd = + tmpElfFD = open(outputFilename.c_str(), O_RDWR | O_CREAT | O_TRUNC, DEFFILEMODE); - outelf = elf_begin(tmp_elf_fd, ELF_C_WRITE, nullptr); + outElf = elf_begin(tmpElfFD, ELF_C_WRITE, nullptr); - if (!gelf_newehdr(outelf, ELFCLASS64)) + if (!gelf_newehdr(outElf, ELFCLASS64)) llvm::report_fatal_error(llvm::Twine("Error creating ELF64 header: ") + elf_errmsg(-1)); - ehdr = gelf_getehdr(outelf, &ehdr_mem); + ehdr = gelf_getehdr(outElf, &ehdrMem); if (!ehdr) llvm::report_fatal_error(llvm::Twine("cannot get ELF header: ") + elf_errmsg(-1)); @@ -1230,20 +1236,20 @@ mlir::LogicalResult AIETranslateToAirbin(mlir::ModuleOp module, ehdr->e_type = ET_NONE; ehdr->e_machine = EM_AMDAIR; ehdr->e_version = EV_CURRENT; - if (gelf_update_ehdr(outelf, ehdr) == 0) + if (gelf_update_ehdr(outElf, ehdr) == 0) llvm::report_fatal_error(llvm::Twine("cannot update ELF header: ") + elf_errmsg(-1)); // Create new section for the 'section header string table' - Elf_Scn *shstrtab_scn = elf_newscn(outelf); - if (!shstrtab_scn) + Elf_Scn *shStrTabScn = elf_newscn(outElf); + if (!shStrTabScn) llvm::report_fatal_error( llvm::Twine("cannot create new shstrtab section: ") + elf_errmsg(-1)); // the first entry in the string table must be a NULL string - add_string(shstrtab_scn, empty_str); + addString(shStrTabScn, empty_str); - shdr = gelf_getshdr(shstrtab_scn, &shdr_mem); + shdr = gelf_getshdr(shStrTabScn, &shdrMem); if (!shdr) llvm::report_fatal_error( llvm::Twine("cannot get header for sh_strings section: ") + @@ -1256,70 +1262,65 @@ mlir::LogicalResult AIETranslateToAirbin(mlir::ModuleOp module, shdr->sh_info = SHN_UNDEF; shdr->sh_addralign = 1; shdr->sh_entsize = 0; - shdr->sh_name = add_string(shstrtab_scn, strtab_name); + shdr->sh_name = addString(shStrTabScn, strTabName); // add all the AIRBIN-specific section names up front and index them - for (uint8_t sec_idx = SEC_IDX_SSMAST; sec_idx < SEC_IDX_MAX; sec_idx++) - sec_name_offset[sec_idx] = add_string(shstrtab_scn, sec_name_str[sec_idx]); - sec_name_offset[SEC_IDX_NULL] = 0; + for (uint8_t secIdx = SEC_IDX_SSMAST; secIdx < SEC_IDX_MAX; secIdx++) + secNameOffset[secIdx] = addString(shStrTabScn, secNameStr[secIdx]); + secNameOffset[SEC_IDX_NULL] = 0; // We have to store the section strtab index in the ELF header so sections // have actual names. - int ndx = elf_ndxscn(shstrtab_scn); + int ndx = elf_ndxscn(shStrTabScn); ehdr->e_shstrndx = ndx; - if (!gelf_update_ehdr(outelf, ehdr)) + if (!gelf_update_ehdr(outElf, ehdr)) llvm::report_fatal_error(llvm::Twine("cannot update ELF header: ") + elf_errmsg(-1)); // Finished new shstrtab section, update the header. - if (!gelf_update_shdr(shstrtab_scn, shdr)) + if (!gelf_update_shdr(shStrTabScn, shdr)) llvm::report_fatal_error( llvm::Twine("cannot update new shstrtab section header: ") + elf_errmsg(-1)); // output the rest of the sections for (const Section *section : sections) { - uint64_t addr = section->get_addr(); - Elf_Scn *scn = elf_newscn(outelf); + uint64_t addr = section->getAddr(); + Elf_Scn *scn = elf_newscn(outElf); if (!scn) llvm::report_fatal_error(llvm::Twine("cannot create new ") + - sec_name_str[sec_addr2index(addr)] + + secNameStr[secAddr2Index(addr)] + "section: " + elf_errmsg(-1)); - shdr = gelf_getshdr(scn, &shdr_mem); + shdr = gelf_getshdr(scn, &shdrMem); if (!shdr) llvm::report_fatal_error(llvm::Twine("cannot get header for ") + - sec_name_str[sec_addr2index(addr)] + + secNameStr[secAddr2Index(addr)] + "section: " + elf_errmsg(-1)); - Elf_Data *data = section_add_data(scn, section); + Elf_Data *data = sectionAddData(scn, section); shdr->sh_type = SHT_PROGBITS; shdr->sh_flags = SHF_ALLOC; - shdr->sh_addr = section->get_addr(); + shdr->sh_addr = section->getAddr(); shdr->sh_link = SHN_UNDEF; shdr->sh_info = SHN_UNDEF; shdr->sh_addralign = 1; shdr->sh_entsize = 0; shdr->sh_size = data->d_size; - shdr->sh_name = sec_name_offset[sec_addr2index(addr)]; + shdr->sh_name = secNameOffset[secAddr2Index(addr)]; if (!gelf_update_shdr(scn, shdr)) llvm::report_fatal_error(llvm::Twine("cannot update section header: ") + elf_errmsg(-1)); } - // Write everything to disk. - if (elf_update(outelf, ELF_C_WRITE) < 0) + if (elf_update(outElf, ELF_C_WRITE) < 0) llvm::report_fatal_error(llvm::Twine("failure in elf_update: ") + elf_errmsg(-1)); - - // close the elf object - elf_end(outelf); - - // copy the file to the compiler's output stream - close(tmp_elf_fd); + elf_end(outElf); + close(tmpElfFD); return success(); } diff --git a/lib/Targets/AIETargets.h b/lib/Targets/AIETargets.h index 7ff87e48b3..82335ff06e 100644 --- a/lib/Targets/AIETargets.h +++ b/lib/Targets/AIETargets.h @@ -13,10 +13,12 @@ namespace xilinx::AIE { mlir::LogicalResult AIETranslateToXAIEV2(mlir::ModuleOp module, llvm::raw_ostream &output); +#ifdef AIE_ENABLE_AIRBIN mlir::LogicalResult AIETranslateToAirbin(mlir::ModuleOp module, const std::string &outputFilename, const std::string &coreFilesDir, bool testAirBin = false); +#endif mlir::LogicalResult AIEFlowsToJSON(mlir::ModuleOp module, llvm::raw_ostream &output); mlir::LogicalResult ADFGenerateCPPGraph(mlir::ModuleOp module,